In [1]:
import lore
from datamanager import *

from sklearn.datasets import load_iris, load_breast_cancer
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split

from sklearn.metrics import f1_score, accuracy_score

from util import record2str

ipynb to py in pycharm:
- jupyter nbconvert --to script new_run.ipynb

## Dataset

In [2]:
## Iris Dataset
dataset_name = 'dataset/iris.csv'
dataset = prepare_iris_dataset(dataset_name)

## wine
# dataset_name = 'dataset/wine.csv'
# dataset = prepare_wine_dataset(dataset_name)

##############################################
#           Categorical dataset              #
##############################################
## german: (0 = Good, 1 = Bad)
# dataset_name = 'dataset/german_credit.csv'
# dataset = prepare_german_dataset(dataset_name)

## adult: ['<=50K', '>50K']
# dataset_name = 'dataset/adult.csv'
# dataset = prepare_adult_dataset(dataset_name)

## compas-scores-two-years: ['High', 'Low', 'Medium']
# dataset_name = 'dataset/compas-scores-two-years.csv'
# dataset = prepare_compass_dataset(dataset_name)

dataframe = dataset[0]
class_name = dataset[1]
dataset_fin = prepare_dataset(dataframe, class_name)

In [3]:
df = dataset_fin[0] #dataframe with unique numeric class values(0, 1, ...)
feature_names = dataset_fin[1]
class_values = dataset_fin[2]
numeric_columns = dataset_fin[3]
rdf = dataset_fin[4] #real dataframe
real_feature_names = dataset_fin[5]
features_map = dataset_fin[6] #map each class name to its unique numeric value

In [4]:
rdf.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,class
0,4.9,3.0,1.4,0.2,Iris-setosa
1,4.7,3.2,1.3,0.2,Iris-setosa
2,4.6,3.1,1.5,0.2,Iris-setosa
3,5.0,3.6,1.4,0.2,Iris-setosa
4,5.4,3.9,1.7,0.4,Iris-setosa


In [5]:
df.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,class
0,4.9,3.0,1.4,0.2,0
1,4.7,3.2,1.3,0.2,0
2,4.6,3.1,1.5,0.2,0
3,5.0,3.6,1.4,0.2,0
4,5.4,3.9,1.7,0.4,0


## Black box classifier

In [6]:
X = df.loc[:, df.columns != class_name].values
y = df[class_name].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, stratify=y)
blackbox = RandomForestClassifier()
blackbox.fit(X_train, y_train)

RandomForestClassifier()

In [7]:
y_pred = blackbox.predict(X_test)
print('Accuracy: %.3f' % accuracy_score(y_test, y_pred))

Accuracy: 0.911


## select an instance _x_

In [42]:
i = 10
x = X_test[i]
y_val = blackbox.predict(x.reshape(1,-1))[0]

print(class_values)
class_prob = blackbox.predict_proba(x.reshape(1,-1))[0]
print(class_prob)

y_val_name = class_values[y_val]
print('blackbox(x) = { %s }' % y_val_name)


['Iris-setosa', 'Iris-versicolor', 'Iris-virginica']
[0.   0.97 0.03]
blackbox(x) = { Iris-versicolor }


In [9]:
print('x = %s' % record2str(x, feature_names, numeric_columns))

x = { sepal_length = 6.0, sepal_width = 2.9, petal_length = 4.5, petal_width = 1.5 }


# LORE explainer (explaining an instance x)

In [10]:
lore_obj = lore.LORE(X_test, blackbox, feature_names, class_name, class_values,
                 numeric_columns, features_map, neigh_type='ngmusx', verbose=False)

In [11]:
# just to check
Z = lore_obj.neighgen_fn(x)
print('Z is:',Z)
Z.shape

Z is: [[6.         2.9        4.5        1.5       ]
 [6.04689511 2.80635709 4.52816521 1.44658174]
 [6.03025244 2.83347291 4.4589409  1.43571066]
 ...
 [6.24790993 2.93188763 4.63349514 1.71025555]
 [6.19210268 2.90627439 4.52734356 1.65123054]
 [6.06124547 2.90299125 4.58190509 1.74307727]]


(1010, 4)

In [12]:
explanation = lore_obj.explain_instance(x, samples=1000, nbr_runs=10, verbose=True)

print(explanation)

generating neighborhood - ngmusx
synthetic neighborhood class counts {'Iris-versicolor': 673, 'Iris-virginica': 337}
r = { petal_width <= 1.70 } --> { class: Iris-versicolor }
c = { { petal_width > 1.70, sepal_length > 6.15 } }


In [13]:
print('x = %s' % record2str(x, feature_names, numeric_columns))

x = { sepal_length = 6.0, sepal_width = 2.9, petal_length = 4.5, petal_width = 1.5 }


## check the borderline

In [74]:
temp_x = x.copy()

In [4]:
features_map

defaultdict(dict,
            {0: {'sepal_length': 0},
             1: {'sepal_width': 1},
             2: {'petal_length': 2},
             3: {'petal_width': 3}})

* First, I changed the petal width by increasing it by 1 to be more than 1.70

In [75]:
# c = petal_width > 1.70
temp_x[3] = temp_x[3]+1
print('x = %s' % record2str(temp_x, feature_names, numeric_columns))

x = { sepal_length = 6.0, sepal_width = 2.9, petal_length = 4.5, petal_width = 2.5 }


* Now we check the probability of each classes and we see that they changed

In [76]:
print(class_values)
print(blackbox.predict_proba(temp_x.reshape(1,-1))[0])
print(class_values[blackbox.predict(temp_x.reshape(1,-1))[0]])

['Iris-setosa', 'Iris-versicolor', 'Iris-virginica']
[0.   0.55 0.45]
Iris-versicolor


In [77]:
explanation_temp = lore_obj.explain_instance(temp_x, samples=1000, nbr_runs=10, verbose=True)

print(explanation_temp)

generating neighborhood - ngmusx
synthetic neighborhood class counts {'Iris-versicolor': 714, 'Iris-virginica': 296}
r = { sepal_length <= 6.05, sepal_width > 2.82 } --> { class: Iris-versicolor }
c = { { sepal_length > 6.05, sepal_width <= 2.85 } }


* Then, I changed the sepal length value to be more than 6.15

In [78]:
# c = sepal_length > 6.15
temp_x[0] = 6.5
print('x = %s' % record2str(temp_x, feature_names, numeric_columns))

x = { sepal_length = 6.5, sepal_width = 2.9, petal_length = 4.5, petal_width = 2.5 }


In [79]:
print(class_values)
print(blackbox.predict_proba(temp_x.reshape(1,-1))[0])
print(class_values[blackbox.predict(temp_x.reshape(1,-1))[0]])

['Iris-setosa', 'Iris-versicolor', 'Iris-virginica']
[0.   0.43 0.57]
Iris-virginica


* We can see here the class has been changed from Iris-versicolor to Iris-virginica

In [80]:
explanation_temp2 = lore_obj.explain_instance(temp_x, samples=1000, nbr_runs=10, verbose=True)

print(explanation_temp2)

generating neighborhood - ngmusx
synthetic neighborhood class counts {'Iris-versicolor': 469, 'Iris-virginica': 541}
r = { sepal_width <= 3.10, petal_length > 4.41 } --> { class: Iris-virginica }
c = { { sepal_width > 3.10 } }
