In [1]:
import numpy as np
import sklearn.ensemble
from anchor import utils
import xaibenchmark as xb
from xaibenchmark import load_adult as la
from xaibenchmark import preprocessing
from xaibenchmark.comparator import ExplainerComparator

np.random.seed(1)

In [2]:
# make sure you have adult/adult.data inside dataset_folder
dataset_folder = '../data/'
adult_dataset = utils.load_dataset('adult', balance=True, dataset_folder=dataset_folder, discretize=True)

In [3]:
rf = sklearn.ensemble.RandomForestClassifier(n_estimators=50, n_jobs=5)
rf.fit(adult_dataset.train, adult_dataset.labels_train)

RandomForestClassifier(n_estimators=50, n_jobs=5)

In [4]:
adult_dataset.__dict__.keys()

dict_keys(['labels', 'class_names', 'class_target', 'ordinal_features', 'categorical_features', 'categorical_names', 'feature_names', 'data', 'train', 'labels_train', 'validation', 'labels_validation', 'test', 'labels_test', 'test_idx', 'validation_idx', 'train_idx'])

In [5]:
adult_dataset.feature_names

['Age',
 'Workclass',
 'Education',
 'Marital Status',
 'Occupation',
 'Relationship',
 'Race',
 'Sex',
 'Capital Gain',
 'Capital Loss',
 'Hours per week',
 'Country']

In [6]:
adult_dataset.train

array([[ 3.,  6., 15., ...,  0.,  1., 39.],
       [ 2.,  7., 10., ...,  0.,  0., 22.],
       [ 3.,  4., 15., ...,  0.,  0., 39.],
       ...,
       [ 1.,  6.,  5., ...,  0.,  1., 39.],
       [ 3.,  4., 11., ...,  0.,  0., 39.],
       [ 2.,  4., 15., ...,  0.,  0.,  8.]])

In [7]:
data = la.load_csv_data('adult', root_path=dataset_folder)
lime_training_set = preprocessing.lime_preprocess_dataset(data.data, data.categorical_features, data.data.keys())
lime_ml_model = sklearn.ensemble.RandomForestClassifier(n_estimators=100)
lime_ml_model.fit(lime_training_set, data.target.to_numpy().reshape(-1))    

RandomForestClassifier()

--------------
### IMport Explainers

In [8]:
from xaibenchmark.explainers import AnchorsExplainer, LimeExplainer

### Usage of implemented explainer

In [9]:
# instantiate anchors explainer
exp1 = AnchorsExplainer(rf, '../data/', 'adult')
exp2 = LimeExplainer(data, lime_ml_model, discretize_continuous=False)

In [10]:
data = la.load_csv_data('adult', root_path='../data/')

In [11]:
explanation = exp1.explain_instance(data.data.iloc[[70]], "test", threshold=0.65)
print("Current explanation:", explanation.names())

Current explanation: ['Education = 5th-6th']


In [12]:
comp = ExplainerComparator()
comp.add_explainer(exp1, 'ANCHORS')
comp.add_explainer(exp2, 'LIME')

In [13]:
comp.explain_instance(data.data.iloc[[5000]])

In [14]:
comp.print_metrics()

ANCHORS : {('area', 0.0625), ('balance_model', 0.06451612903225806), ('precision', 0.78369384359401), ('accuracy', 0.9354838709677419), ('coverage', 0.0028), ('inverse_coverage', 357.14285714285717), ('balance_data', 0.0967741935483871), ('area_norm', 0.7937005259840998), ('balance_explanation', 0)}
LIME : {('area_norm', 19.537233873073223), ('accuracy', 0.7801956741104725), ('area', 2.589477453233665e+139), ('coverage', 4.053974932076651e-09), ('balance', 0.0194796648093713), ('furthest_distance', 0.003246795437737045), ('inverse_coverage', 246671480.89338318)}
