In [85]:
from __future__ import print_function
import numpy as np
np.random.seed(1)
import sys
import sklearn
import sklearn.ensemble
from anchor import utils
from anchor import anchor_tabular
import lime.lime_tabular
import pandas as pd

In [35]:
# make sure you have adult/adult.data inside dataset_folder
dataset_folder = '../data/'
dataset = utils.load_dataset('adult', balance=True, dataset_folder=dataset_folder, discretize=True)

[1 1 0 ... 1 0 1]


In [18]:
rf = sklearn.ensemble.RandomForestClassifier(n_estimators=50, n_jobs=5)
rf.fit(dataset.train, dataset.labels_train)
print('Train', sklearn.metrics.accuracy_score(dataset.labels_train, rf.predict(dataset.train)))
print('Test', sklearn.metrics.accuracy_score(dataset.labels_test, rf.predict(dataset.test)))

Train 0.9350338780390594
Test 0.8489483747609943


--------------
# Usage

In [19]:
import xaibenchmark as xb

In [263]:
class AnchorsExplainer(xb.Explainer):
    
    def __init__(self, dataset, pathToData):
        
        self.explainer = anchor_tabular.AnchorTabularExplainer(
            dataset.class_names,
            dataset.feature_names,
            dataset.train,
            dataset.categorical_names)
        self.dataset = dataset   
        self.data = pd.read_csv(pathToData, sep=',')
        
        
        
    def explain_instance(self, instance, predictor, threshold=0.95):
        self.explanation = self.explainer.explain_instance(instance, predictor, threshold=threshold)
        self.instance = instance   
        return self.explanation
    
    @xb.metric
    def coverage(self):
        if hasattr(self, 'explanation'):
            return self.explanation.coverage()
    
    @xb.metric
    def precision(self):
        if hasattr(self, 'explanation'):
            return self.explanation.precision()
        
    @xb.metric
    def balanceTrain(self):
        return np.count_nonzero(self.dataset.labels_train) / len(self.dataset.labels_train)
    
    @xb.metric
    def balanceDev(self):
        return np.count_nonzero(self.dataset.labels_validation) / len(self.dataset.labels_validation)
    
    @xb.metric
    def balanceTest(self):
        return np.count_nonzero(self.dataset.labels_test) / len(self.dataset.labels_test)
    
    @xb.metric
    def balanceExplanation(self):
        if hasattr(self, 'explanation'):
            labels = ["age", "workclass", "fnlwgt", "education", "education-num", "marital-status", "occupation", "relationship", "race", "sex", "capital-gain", "capital-loss", "hours-per-week", "country", "label"]
            correct_relevant_examples = 0
            correct_targets = 0
            print(self.explanation.names())
            for i in range(0,len(self.data)):
                correctFeatures = True
                for feature in self.explanation.names():
                    if("Age" in feature or "Capital Gain" in feature or "Hours per week" in feature):
                        continue
                        #todo
                    else:
                        feature2 = feature.replace(" ", "-")
                        feature_name, value = feature2.split("-=-")
                        index = labels.index(feature_name.lower())
                        if (str(self.data.iat[i,index]).strip() != str(value).strip()): 
                            #print("First index: ", str(self.data.iat[i,index]).strip(), "Second Index" , str(value).strip(), "Data ", self.data.iloc[[i]])
                            correctFeatures = False
                if correctFeatures:
                    if(self.data.iat[i,14].strip() == ">50K"):
                        correct_targets += 1
                    correct_relevant_examples += 1
        if correct_targets == 0: return 0
        return correct_targets / correct_relevant_examples
        
    @xb.utility
    def get_neighborhood_instances(self): 
        if hasattr(self, 'explanation'):
            fit_anchor = np.where(np.all(self.dataset.train[:, self.explanation.features()] == self.instance[self.explanation.features()], axis=1))[0]
            return self.dataset.train[fit_anchor]
        return []
    
    @xb.utility
    def get_explained_instance(self):
        return self.instance
    
    @xb.utility
    def distance(self, x, y):
        return np.linalg.norm(x-y)
    
    
        

In [265]:
# instantiate anchors explainer
exp = AnchorsExplainer(dataset ,"../data/adult/adult.data")
explanation = exp.explain_instance(dataset.test[4], rf.predict, threshold=0.95)
exp.balanceExplanation()

['Marital Status = Married-civ-spouse', 'Relationship = Husband', 'Occupation = Exec-managerial', '37.00 < Age <= 48.00', 'Education = Masters']


0.8825396825396825

In [None]:
dataset.categorical_features

In [81]:
dataset.feature_names

['Age',
 'Workclass',
 'Education',
 'Marital Status',
 'Occupation',
 'Relationship',
 'Race',
 'Sex',
 'Capital Gain',
 'Capital Loss',
 'Hours per week',
 'Country']

In [57]:
# get all currently defined metrics
exp.metrics()
print(exp.balanceTrain())

0.5006775607811877


In [58]:
# report all current metrics
exp.report()

{('balanceDev', 0.4968112244897959),
 ('balanceTest', 0.49776927979604846),
 ('balanceTrain', 0.5006775607811877),
 ('coverage', nan),
 ('precision', nan)}

In [59]:
exp.get_neighborhood_instances()

[]

In [102]:
# explain a single instance (needed to compute coverage)
explanation = exp.explain_instance(dataset.test[0], rf.predict, threshold=0.95)

In [61]:
# get all currently defined metrics
exp.metrics()

['balanceDev', 'balanceTest', 'balanceTrain', 'coverage', 'precision']

In [62]:
# report all current metrics
exp.report()

{('balanceDev', 0.4968112244897959),
 ('balanceTest', 0.49776927979604846),
 ('balanceTrain', 0.5006775607811877),
 ('coverage', 0.017),
 ('precision', 0.9710144927536232)}

In [63]:
# infer other possible metrics
exp.infer_metrics()

inferred metrics: {'furthest_distance', 'balanceTrain', 'balanceDev', 'coverage', 'precision', 'balanceTest', 'inverse_coverage'}


In [64]:
# get all currently defined metrics
exp.metrics()

['balanceDev',
 'balanceTest',
 'balanceTrain',
 'coverage',
 'furthest_distance',
 'inverse_coverage',
 'precision']

In [65]:
# report all current metrics
exp.report()

{('balanceDev', 0.4968112244897959),
 ('balanceTest', 0.49776927979604846),
 ('balanceTrain', 0.5006775607811877),
 ('coverage', 0.017),
 ('furthest_distance', 39.102429592034305),
 ('inverse_coverage', 58.8235294117647),
 ('precision', 0.9710144927536232)}