In [1]:
import numpy as np
import sklearn.ensemble
from anchor import utils
import xaibenchmark as xb
from xaibenchmark import load_adult as la
from xaibenchmark import preprocessing
from xaibenchmark.comparator import ExplainerComparator

np.random.seed(1)

In [2]:
# make sure you have adult/adult.data inside dataset_folder
dataset_folder = '../data/'
adult_dataset = utils.load_dataset('adult', balance=True, dataset_folder=dataset_folder, discretize=True)

In [3]:
rf = sklearn.ensemble.RandomForestClassifier(n_estimators=50, n_jobs=5)
rf.fit(adult_dataset.train, adult_dataset.labels_train)

RandomForestClassifier(n_estimators=50, n_jobs=5)

In [4]:
adult_dataset.__dict__.keys()

dict_keys(['labels', 'class_names', 'class_target', 'ordinal_features', 'categorical_features', 'categorical_names', 'feature_names', 'data', 'train', 'labels_train', 'validation', 'labels_validation', 'test', 'labels_test', 'test_idx', 'validation_idx', 'train_idx'])

In [5]:
adult_dataset.feature_names

['Age',
 'Workclass',
 'Education',
 'Marital Status',
 'Occupation',
 'Relationship',
 'Race',
 'Sex',
 'Capital Gain',
 'Capital Loss',
 'Hours per week',
 'Country']

In [6]:
adult_dataset.train

array([[ 3.,  6., 15., ...,  0.,  1., 39.],
       [ 2.,  7., 10., ...,  0.,  0., 22.],
       [ 3.,  4., 15., ...,  0.,  0., 39.],
       ...,
       [ 1.,  6.,  5., ...,  0.,  1., 39.],
       [ 3.,  4., 11., ...,  0.,  0., 39.],
       [ 2.,  4., 15., ...,  0.,  0.,  8.]])

In [7]:
data = la.load_csv_data('adult', root_path=dataset_folder)
lime_training_set = preprocessing.lime_preprocess_dataset(data.data, data.categorical_features, data.data.keys())
lime_ml_model = sklearn.ensemble.RandomForestClassifier(n_estimators=100)
lime_ml_model.fit(lime_training_set, data.target.to_numpy().reshape(-1))    

RandomForestClassifier()

--------------
### IMport Explainers

In [8]:
from xaibenchmark.explainers import AnchorsExplainer, LimeExplainer

### Usage of implemented explainer

In [9]:
# instantiate anchors explainer
exp1 = AnchorsExplainer(rf, '../data/', 'adult')
exp2 = LimeExplainer(data, lime_ml_model, discretize_continuous=False)

In [10]:
data = la.load_csv_data('adult', root_path='../data/')

In [11]:
explanation = exp1.explain_instance(data.data.iloc[[70]], "test", threshold=0.99)
print("Current explanation:", explanation.names())

Current explanation: ['Education = 5th-6th', 'Age <= 28.00', 'Capital Gain = 0', 'Relationship = Own-child']


In [12]:
comp = ExplainerComparator()
comp.add_explainer(exp1, 'ANCHORS')
comp.add_explainer(exp2, 'LIME')

In [None]:
comp.explain_instances(data.data.iloc[[1, 2, 3000]])

In [None]:
comp.print_metrics(plot='bar')

In [None]:
comp.print_metrics(plot='table')

In [None]:
comp.print_metrics(explainer="ANCHORS", index=1, plot='bar')
comp.print_metrics(explainer="LIME", index=1, plot='bar')

In [None]:
comp.print_metrics(explainer="ANCHORS", index=1, plot='table')
comp.print_metrics(explainer="LIME", index=1, plot='table')