In [None]:
import numpy as np
import pandas as pd

from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

from aix360.algorithms.rbm import FeatureBinarizer, LogisticRuleRegression

from aif360.datasets import MEPSDataset19

import aix360

## Getting some data
### p 208

### Bring in the dataset

In [None]:
## p 208
med_data19 = MEPSDataset19()

#### some brief exploration not shown in book

In [None]:
med_data19.features.shape

In [None]:
med_data19.labels.shape

In [None]:
med_data19.label_names

In [None]:
type(med_data19)

In [None]:
med_data19.features

In [None]:
med_data19.labels

#### back to loading data and continuing

In [None]:
## p 208
X_train, X_test, y_train, y_test = train_test_split(
    med_data19.features, med_data19.labels, 
    random_state = 0, stratify = med_data19.labels) 

In [None]:
X_train.shape

In [None]:
X_train         = pd.DataFrame(X_train)
X_train.columns = med_data19.feature_names
X_train.head()

In [None]:
X_test         = pd.DataFrame(X_test)
X_test.columns = med_data19.feature_names

In [None]:
X_test.head()

In [None]:
y_train = y_train[:, 0]

In [None]:
y_test = y_test[:, 0]

## Building interpretable models

In [None]:
## preparation: we need to binarize inputs
## p 209
feat_bin = FeatureBinarizer(negations=True, returnOrd=True)

In [None]:
X_train, X_train_std = feat_bin.fit_transform(X_train)
X_test, X_testStd    = feat_bin.transform(X_test)

In [None]:
X_train['AGE'].head()

## LIME 
### p 219

In [None]:
from sklearn.ensemble import RandomForestClassifier as RFC
from aix360.algorithms.lime.lime_wrapper import LimeTabularExplainer

In [None]:
orig_inputs         = pd.DataFrame(med_data19.features)
orig_inputs.columns = med_data19.feature_names

In [None]:
## p 221
orig_inputs         = pd.DataFrame(med_data19.features)
orig_inputs.columns = med_data19.feature_names
orig_target         = med_data19.labels

In [None]:
rf = RFC(n_estimators=500)
rf.fit(orig_inputs, orig_target.ravel())

In [None]:
accuracy_score(orig_target, rf.predict(orig_inputs))

In [None]:
## p 221
cat_idxs = [1] + list(range(5, 138))
ltf = LimeTabularExplainer(orig_inputs.values, 
                           feature_names = orig_inputs.columns,
                           class_names = orig_target.ravel(),
                           categorical_features = cat_idxs,
                           discretize_continuous = True
                          )

In [None]:
i = np.random.randint(0, orig_inputs.values.shape[0])
print("i = %d" % i)
exp = ltf.explain_instance(orig_inputs.values[i], rf.predict_proba, num_features=5, top_labels=1)
print(exp.as_list(label = 0) )

In [None]:
rf.predict_proba(orig_inputs.values[i:(i+1), :])

In [None]:
## p 221
fig = exp.as_pyplot_figure(0)

In [None]:
### what about for another data point?
orig_target[i]

In [None]:
## p 222
i = 1001
exp2 = ltf.explain_instance(orig_inputs.values[i], rf.predict_proba, num_features=5, top_labels=1)
exp2.as_list(orig_target[i][0]) 

In [None]:
fig = exp2.as_pyplot_figure()