In [None]:
import numpy as np
import pandas as pd


from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier as RFC

from aif360.datasets import MEPSDataset19

import aix360
from aix360.algorithms.protodash import ProtodashExplainer

In [None]:
med_data19 = MEPSDataset19()

In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    med_data19.features, med_data19.labels, 
    random_state = 0, stratify = med_data19.labels) 

In [None]:
med_data19.label_names

In [None]:
X_train         = pd.DataFrame(X_train)
X_train.columns = med_data19.feature_names
X_train.head()

In [None]:
X_test = pd.DataFrame(X_test)
X_test.columns = med_data19.feature_names

## ProtoDash
### p 229

### preparing the data p 230

In [None]:
## p 230
X_scale = np.vstack((X_train, X_test))
Xmax    = np.max(X_scale, axis = 0)
Xmin    = np.min(X_scale, axis = 0) 

In [None]:
X_scale = (X_scale - Xmin)/(Xmax - Xmin)
X_scale = X_scale - 0.5

In [None]:
X_scale_train = X_scale[:X_train.shape[0]]
X_scale_test  = X_scale[X_train.shape[0]:]

## Train a model

In [None]:
## p 230
rf = RFC(n_estimators=200, max_depth = 4)
rf.fit(X_scale_train, y_train.ravel())

In [None]:
print('Train accuracy: %0.2f      Test accuracy: %0.2f' % 
          (accuracy_score(y_train, rf.predict(X_scale_train)),
          accuracy_score(y_test, rf.predict(X_scale_test))))

## Generate samples similar to a specific data point

In [None]:
## p 231

## predict values from our model to divide data points according to predicted values
predicted_vals = rf.predict(X_scale_test)
results_df = pd.DataFrame(np.hstack([X_scale_test, predicted_vals.reshape(-1, 1)]))
results_df.columns = list(X_test.columns.values) + ["Class"]

## looking at those with UTILIZATION == 0.0 (as a binary variable of 0 or 1)
base_dataset = results_df[results_df.Class == 0.0].values

## select an example to explain 
selected_example_idx = 5
data_to_explain = np.expand_dims(base_dataset[selected_example_idx], axis = 1).transpose()

## remove the example of interest from the base dataset
base_dataset = np.delete(base_dataset, selected_example_idx, 0)

## how many prototypes do we want returned?
num_prototypes = 5

In [None]:
## p 231
exp = ProtodashExplainer()
(W, S, _) = exp.explain(data_to_explain, base_dataset, m = num_prototypes)

In [None]:
W

In [None]:
S

In [None]:
## p 232
dfs                     = pd.DataFrame.from_records(results_df.iloc[S, 0:-1].astype('double'))
dfs.columns             = X_test.columns
dfs[138]                = 0
dfs.columns.values[138] = "Utilization"

In [None]:
dfs["Weight"] = np.around(W, 4)/np.sum(np.around(W, 4)) # Calculate normalized importance weights

In [None]:
x_row = pd.DataFrame(data_to_explain)

In [None]:
x_row[139] = 100

In [None]:
dfs.columns

In [None]:
x_row.columns = dfs.columns

In [None]:
dfs = dfs.append(x_row)

In [None]:
## p 232
## reorder with Weight
dfs.sort_values("Weight", inplace = True, ascending = False)

In [None]:
dfs.transpose().to_csv( "protodash_results.csv")

In [None]:
dfs.transpose()