In [None]:
import pandas as pd

from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier as RFC

from aif360.datasets import MEPSDataset19

import aix360
import shap

In [None]:
med_data19 = MEPSDataset19()

## Data

In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    med_data19.features, med_data19.labels, 
    random_state = 0, stratify = med_data19.labels) 

In [None]:
X_train         = pd.DataFrame(X_train)
X_train.columns = med_data19.feature_names
X_train.head()

In [None]:
X_test         = pd.DataFrame(X_test)
X_test.columns = med_data19.feature_names

## Build a model that needs explaining

In [None]:
## p 225
rf = RFC(n_estimators=200, max_depth = 4)
rf.fit(X_train, y_train.ravel())

In [None]:
print('Train accuracy: %0.2f      Test accuracy: %0.2f' % 
          (accuracy_score(y_train, rf.predict(X_train)),
          accuracy_score(y_test, rf.predict(X_test))))

## SHAP
### p 223

In [None]:
## not shown in book
shap.initjs()

In [None]:
## p 225
rf_prob_1 = lambda x: rf.predict_proba(x)[:,1]

In [None]:
## p 226
ke = shap.KernelExplainer(rf_prob_1, shap.sample(X_train, 100), link = 'logit')

In [None]:
shap_values = ke.shap_values(X_test[:100], nsamples = 100)

In [None]:
## p 227
shap.summary_plot(shap_values, X_test[:100])

In [None]:
## p 228
shap.dependence_plot('AGE', shap_values,  X_test[:100])