# Testing LORE

In [1]:
import warnings
import numpy as np

from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, accuracy_score

from lore_explainer.util import record2str, neuclidean
from lore_explainer.datamanager import prepare_adult_dataset, prepare_dataset
from lore_explainer.lorem import LOREM

# warning suppression
with warnings.catch_warnings():
    warnings.simplefilter(action='ignore', category=FutureWarning)
    
    df, class_name = prepare_adult_dataset('rule_loaders/adult.csv')
    df, feature_names, class_values, numeric_columns, df_orig, real_feature_names, features_map = prepare_dataset(
        df, class_name)

In [2]:
test_size = 0.30
random_state = 42

X_train, X_test, Y_train, Y_test = train_test_split(df[feature_names].values, df[class_name].values, 
                                                    test_size=test_size,
                                                    random_state=random_state, 
                                                    stratify=df[class_name].values)

_, X_test_orig, _, _ = train_test_split(df_orig[real_feature_names].values, df_orig[class_name].values, 
                              test_size=test_size,
                              random_state=random_state, 
                              stratify=df[class_name].values)

### Train a black box classifier

In [3]:
bb = RandomForestClassifier(n_estimators=100, random_state=random_state)
# bb = MLPClassifier(random_state=random_state)
bb.fit(X_train, Y_train)

In [30]:
def bb_predict(X):
    return bb.predict(X)

def bb_predict_proba(X):
    return bb.predict_proba(X)

In [31]:
Y_pred = bb_predict(X_test)

print('Accuracy %.3f' % accuracy_score(Y_test, Y_pred))
print('F1-measure %.3f' % f1_score(Y_test, Y_pred))

Accuracy 0.846
F1-measure 0.664


### Select a record to explain

In [42]:
id_to_exp = 3
x_to_exp = X_test[id_to_exp]

print('x = %s' % record2str(x_to_exp, feature_names, numeric_columns),"\n")

bb_outcome = bb_predict(x_to_exp.reshape(1, -1))[0]
# get the name of the class
bb_outcome_str = class_values[bb_outcome]

print('bb(x) = { %s }' % bb_outcome_str, "\n")

x = { age = 36, capital-gain = 0, capital-loss = 0, hours-per-week = 48, workclass = Private, education = Some-college, marital-status = Married-civ-spouse, occupation = Machine-op-inspct, relationship = Husband, race = White, sex = Male, native-country = United-States } 

bb(x) = { <=50K } 



## LORE

In [70]:
lore_explainer = LOREM(X_test_orig, bb_predict,
                        feature_names, class_name, class_values, numeric_columns, features_map,
                        neigh_type='geneticp',
                        categorical_use_prob=True,
                        continuous_fun_estimation=False,
                        size=1000, ocr=0.1, random_state=random_state,
                        ngen=10, bb_predict_proba=bb_predict_proba, 
                        verbose=True)

calculating feature values


In [71]:
# warning suppression
with warnings.catch_warnings():
    warnings.simplefilter(action='ignore', category=RuntimeWarning)

    exp = lore_explainer.explain_instance(x_to_exp, samples=50, use_weights=True, metric=neuclidean)

generating neighborhood - geneticp
gen	nevals	avg     	min     	max     
0  	25    	0.496654	0.496654	0.496654
1  	18    	0.725575	0.496654	0.993298
2  	21    	0.90626 	0.496654	0.993298
3  	21    	0.976459	0.74657 	0.993274
4  	24    	0.984228	0.790591	0.993283
5  	22    	0.980551	0.745841	0.993286
6  	21    	0.963646	0.746116	0.993283
7  	22    	0.943535	0.596852	0.993283
8  	23    	0.949296	0.393308	0.993283
9  	17    	0.962851	0.468206	0.993283
10 	21    	0.972992	0.739712	0.993283
gen	nevals	avg	min	max
0  	25    	0.5	0.5	0.5
1  	21    	0.47388	0.254707	0.503609
2  	21    	0.479889	0.253899	0.504757
3  	21    	0.496015	0.355478	0.509871
4  	17    	0.506034	0.499312	0.564051
5  	22    	0.503747	0.358823	0.626791
6  	24    	0.503148	0.305765	0.626791
7  	24    	0.496712	0.261455	0.53167 
8  	22    	0.495427	0.299858	0.53167 
9  	20    	0.490874	0.255094	0.572539
10 	20    	0.517108	0.498894	0.572539
synthetic neighborhood class counts {'<=50K': 32, '>50K': 27}
learning local decisio

In [165]:
# Counterfactual rules. These rules explain how slight changes in the input features could lead to a different decision.
# Each rule in self.crules corresponds to a different counterfactual scenario.
#  Note: no use in GlocalX
print(exp.cstr())

{ { age > 38.00 } --> { class: >50K }, { occupation = Prof-specialty } --> { class: >50K } }


In [161]:
# Main rule, used to explain the decision for this instance
#  Note: also exp.rstr() or simply print(exp) the same result
print(exp.rule)

{ relationship != Not-in-family, age <= 38.00, age > 32.50, occupation != Prof-specialty } --> { class: <=50K }


## SHAP

In [None]:
# Shap explain

# import shap

# shap.initjs()

# f = lambda x: bb_predict_proba(x)[:, 1]
# med = np.median(X_test, axis=0).reshape((1, X_test.shape[1]))

# shap_explainer = shap.KernelExplainer(f, med)

# shap_values_single = shap_explainer.shap_values(x_to_exp, nsamples=1000)

# shap.force_plot(shap_explainer.expected_value, shap_values_single, features=x_to_exp, feature_names=feature_names)

# shap_values = shap_explainer.shap_values(X_test[0:100,:], nsamples=1000)

# shap.force_plot(shap_explainer.expected_value, shap_values, X_test[0:100,:], feature_names=feature_names)

# shap.summary_plot(shap_values, X_test[0:100,:], feature_names=feature_names)

## LIME

In [None]:
# Lime explain

# from lime.lime_tabular import LimeTabularExplainer

# lime_explainer = LimeTabularExplainer(X_test, feature_names=feature_names,
#                                       class_names=class_values, discretize_continuous=False)

# exp = lime_explainer.explain_instance(x_to_exp, bb_predict_proba)

# exp.show_in_notebook()