In [1]:
import numpy as np
import sklearn.ensemble
from anchor import utils
import xaibenchmark as xb
from xaibenchmark import load_adult as la
from xaibenchmark import preprocessing
from xaibenchmark.comparator import ExplainerComparator

np.random.seed(1)

### Adult Dataset

In [2]:
# make sure you have adult/adult.data inside dataset_folder
dataset_folder = '../data/'
adult_dataset = utils.load_dataset('adult', balance=True, dataset_folder=dataset_folder, discretize=True)

In [3]:
rf = sklearn.ensemble.RandomForestClassifier(n_estimators=50, n_jobs=5)
rf.fit(adult_dataset.train, adult_dataset.labels_train)

RandomForestClassifier(n_estimators=50, n_jobs=5)

In [4]:
adult_dataset.__dict__.keys()

dict_keys(['labels', 'class_names', 'class_target', 'ordinal_features', 'categorical_features', 'categorical_names', 'feature_names', 'data', 'train', 'labels_train', 'validation', 'labels_validation', 'test', 'labels_test', 'test_idx', 'validation_idx', 'train_idx'])

In [5]:
adult_dataset.feature_names

['Age',
 'Workclass',
 'Education',
 'Marital Status',
 'Occupation',
 'Relationship',
 'Race',
 'Sex',
 'Capital Gain',
 'Capital Loss',
 'Hours per week',
 'Country']

In [6]:
adult_dataset.train

array([[ 3.,  6., 15., ...,  0.,  1., 39.],
       [ 2.,  7., 10., ...,  0.,  0., 22.],
       [ 3.,  4., 15., ...,  0.,  0., 39.],
       ...,
       [ 1.,  6.,  5., ...,  0.,  1., 39.],
       [ 3.,  4., 11., ...,  0.,  0., 39.],
       [ 2.,  4., 15., ...,  0.,  0.,  8.]])

In [7]:
data = la.load_csv_data('adult', root_path=dataset_folder)
lime_training_set = preprocessing.lime_preprocess_dataset(data.data, data.categorical_features, data.data.keys())
lime_ml_model = sklearn.ensemble.RandomForestClassifier(n_estimators=100)
lime_ml_model.fit(lime_training_set, data.target.to_numpy().reshape(-1))    

RandomForestClassifier()

--------------
### IMport Explainers

In [8]:
from xaibenchmark.explainers import AnchorsExplainer, LimeExplainer

### Usage of implemented explainer

In [9]:
# instantiate anchors explainer
exp1 = AnchorsExplainer(rf, '../data/', 'adult', min_precision=0.95)
exp3 = AnchorsExplainer(rf, '../data/', 'adult', min_precision=0.6)
exp2 = LimeExplainer(data, lime_ml_model, discretize_continuous=False)

In [10]:
data = la.load_csv_data('adult', root_path='../data/')

In [11]:
explanation = exp1.explain_instance(data.data.iloc[[70]], "test")
print("Current explanation:", explanation.names())

Current explanation: ['Education = 10th', 'Country = El-Salvador', 'Capital Gain = 0', 'Marital Status = Divorced']


In [12]:
comp = ExplainerComparator()
comp.add_explainer(exp1, 'ANCHORS 0.95')
comp.add_explainer(exp3, 'ANCHORS 0.6')
comp.add_explainer(exp2, 'LIME')

In [23]:
comp.explain_instances(data.data.iloc[[250, 2500, 2201, 2205]])

In [24]:
comp.print_metrics(plot='bar')

In [25]:
comp.print_metrics(plot='table')

In [26]:
comp.print_metrics(explainer="ANCHORS 0.95", index=0)
comp.print_metrics(explainer="ANCHORS 0.95", index=1)
comp.print_metrics(explainer="ANCHORS 0.95", index=2)
comp.print_metrics(explainer="ANCHORS 0.95", index=3)
comp.print_metrics(explainer="LIME", index=2)

### Diabetes Dataset

In [17]:
diabetes_dataset = utils.load_dataset('diabetes', balance=True, dataset_folder='../data/')

FileNotFoundError: [Errno 2] No such file or directory: '../data/diabetes/diabetic_data.csv'

In [None]:
rf_db = sklearn.ensemble.RandomForestClassifier(n_estimators=50, n_jobs=5)
rf_db.fit(diabetes_dataset.train, diabetes_dataset.labels_train)

In [None]:
from xaibenchmark.explainers import AnchorsExplainer
exp1 = AnchorsExplainer(rf_db, '../data/', 'diabetes')

### DLIME

In [None]:
from sklearn.ensemble import RandomForestClassifier
import numpy as np

train = np.load("../data/X_train.npy")
test = np.load("../data/X_test.npy")
labels_train = np.load("../data/y_train.npy")
labels_test = np.load("../data/y_test.npy")

feature_names = ['mean radius',
                 'mean texture',
                 'mean perimeter',
                 'mean area',
                 'mean smoothness',
                 'mean compactness',
                 'mean concavity',
                 'mean concave points',
                 'mean symmetry',
                 'mean fractal dimension',
                 'radius error',
                 'texture error',
                 'perimeter error',
                 'area error',
                 'smoothness error',
                 'compactness error',
                 'concavity error',
                 'concave points error',
                 'symmetry error',
                 'fractal dimension error',
                 'worst radius',
                 'worst texture',
                 'worst perimeter',
                 'worst area',
                 'worst smoothness',
                 'worst compactness',
                 'worst concavity',
                 'worst concave points',
                 'worst symmetry',
                 'worst fractal dimension']
target_names = ['malignant', 'benign']

rf = RandomForestClassifier(n_estimators=10, random_state=0, max_depth=5, max_features=5)
rf.fit(train, labels_train)

In [None]:
from xaibenchmark.explainers import DLimeTabularExplainer

explainer = DLimeTabularExplainer(train,
                                mode="classification",
                                feature_names=feature_names,
                                class_names=target_names,
                                discretize_continuous=True,
                                verbose=False)

In [None]:
from sklearn.datasets import load_breast_cancer
from sklearn.neighbors import NearestNeighbors
from sklearn.cluster import AgglomerativeClustering

data = load_breast_cancer()
X = data.data

clustering = AgglomerativeClustering().fit(X)
names = list(feature_names)+["membership"]
clustered_data = np.column_stack([X, clustering.labels_])

nbrs = NearestNeighbors(n_neighbors=1, algorithm='ball_tree').fit(train)
distances, indices = nbrs.kneighbors(test)
clabel = clustering.labels_

In [None]:
from sklearn.linear_model import LinearRegression

for x in range(0, test.shape[0]):
    use_case_one_features = []
    use_case_two_features = []
    use_case_three_features = []
    use_case_four_features = []
    for i in range(0, 10):
        p_label = clabel[indices[x]]
        N = clustered_data[clustered_data[:, 30] == p_label]
        subset = np.delete(N, 30, axis=1)

        exp_dlime = explainer.explain_instance(test[x],
                                             rf.predict_proba,
                                             num_features=10,
                                             model_regressor=LinearRegression(),
                                             clustered_data = subset,
                                             regressor = 'linear', labels=(0,1))
        print(exp_dlime)
        break