In [1]:
import numpy as np
import pandas as pd
import pickle
from pathlib import Path

from sklearn.model_selection import train_test_split
from sklearn.cluster import DBSCAN

from captum_explainers import explainer_attributes

from src.baseline_experiments import *

from mountaineer import Mountaineer
from gale import create_mapper, bootstrap_mapper_params

  from .autonotebook import tqdm as notebook_tqdm


## Import Diabetes dataset

In [2]:
np.random.seed(42)
torch.manual_seed(42)

df = pd.read_csv("./dataset/diabetes.csv")
df.drop(columns=["p_id"], inplace=True)

X = df.drop(columns=["diabetes"])
y = df["diabetes"]

## Train a neural network model with two hidden layers

In [3]:
batch_size = 16
num_epochs = 80
learning_rate = 4e-3
N = 100
shap_sample_size = 10
#possible blur/mean/zero
imputation_typ = 'blur'


In [4]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.33, random_state=42)
X_train_tens = torch.tensor(X_train.to_numpy()).float()
X_test_tens = torch.tensor(X_test.to_numpy()).float()

unique, counts = np.unique(y_train, return_counts=True)
    
averaging = 'binary'
y_train_tens = torch.tensor(y_train.to_numpy()).view(-1, 1).float()
output_dim = 1

device = torch.device("cpu")

dataset_tens = torch.utils.data.TensorDataset(X_train_tens, y_train_tens)
train_iter = torch.utils.data.DataLoader(dataset_tens, batch_size, shuffle=False)

print("Try to load the model..")
model = nn_model.get_model(device, train_iter, X_train.shape[1], output_dim, averaging, learning_rate, num_epochs)

Try to load the model..
Train model
Epoch 020: | Loss: 0.33728 | Acc: 84.038 | F1: 74.58514
Epoch 040: | Loss: 0.26124 | Acc: 89.538 | F1: 84.58061
Epoch 060: | Loss: 0.16321 | Acc: 93.500 | F1: 90.22665
Epoch 080: | Loss: 0.17705 | Acc: 93.885 | F1: 90.55816
Set to eval


In [5]:
X_test_tens = torch.tensor(X_test.to_numpy()).float()

X_test_c, Y_test_c = get_correct_predictions(model,X_test.reset_index(drop=True),X_test_tens,y_test.reset_index(drop=True),averaging)
X_test_c_tens = torch.from_numpy(X_test_c.to_numpy(dtype=np.float32))

X_test of correct predictions shape: (145, 8)
Y_test of correct predictions shape: (145,)
Label in Y_test of correct predictions ratio: 
 [[  0 104]
 [  1  41]]


In [6]:
predictions = torch.sigmoid(model(X_test_tens)).detach().numpy()

## Compute explanations

In [7]:
exp_dict = explainer_attributes(model, X_test_tens, n_perturb = 500)

In [8]:
function = np.array([np.squeeze(i) for i in predictions])
original_mapper = True

In [17]:
params_str = "dataset/diabetes/diabetes_params_mapper.p"

if Path(params_str).is_file():
    params_boots_fix = pickle.load(open(params_str, 'rb'))

else:
    resolutions=[5, 10, 15, 20]
    gains=[0.3, 0.35, 0.4]
    distances=[0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4]

    params_boots_fix = {}

    if original_mapper:
        params_boots_fix['Original'] =  bootstrap_mapper_params(X_test_tens.numpy(), function, resolutions, gains,
                                                                distances=distances, ci=0.95)

    #resolution = np.array([params_boots['Original']['resolution']])
    #gain = np.array([params_boots['Original']['gain']])

    for exp in exp_dict.keys():
        params_boots_fix[exp] = bootstrap_mapper_params(exp_dict[exp], function, resolutions, gains, distances=distances, ci=0.95)

    pickle.dump(params_boots_fix, open(params_str, "wb"))

for mode in params_boots_fix.keys():
    print(f"Params {mode}: {params_boots_fix[mode]}")

Params Original: {'stability': 0.01248645782470681, 'components': 3, 'resolution': 5, 'gain': 0.3, 'distance_threshold': 0.4}
Params Vanilla Gradient: {'stability': 0.008554775267839463, 'components': 3, 'resolution': 10, 'gain': 0.35, 'distance_threshold': 0.15}
Params Gradient x Input: {'stability': 0.014622390270233154, 'components': 3, 'resolution': 10, 'gain': 0.35, 'distance_threshold': 0.2}
Params Occlusion: {'stability': 0.028735458850860596, 'components': 4, 'resolution': 5, 'gain': 0.3, 'distance_threshold': 0.4}
Params Guided Backprop: {'stability': 0.011977085843682386, 'components': 3, 'resolution': 5, 'gain': 0.3, 'distance_threshold': 0.4}
Params LIME: {'stability': 0.014517366886139027, 'components': 3, 'resolution': 5, 'gain': 0.35, 'distance_threshold': 0.4}
Params KernelSHAP: {'stability': 0.013882311061024666, 'components': 2, 'resolution': 5, 'gain': 0.3, 'distance_threshold': 0.4}
Params SmoothGrad: {'stability': 0.012235535308718546, 'components': 3, 'resolution'

In [18]:
mappers = {}

if original_mapper:
    mappers['Original'] = create_mapper(X_test_tens.numpy(), function, resolution=params_boots_fix['Original']['resolution'], 
                                        gain=params_boots_fix['Original']['gain'],
                                        dist_thresh=params_boots_fix['Original']['distance_threshold'])

for exp in exp_dict.keys():
    mappers[exp] = create_mapper(exp_dict[exp], function, resolution=params_boots_fix[exp]['resolution'], 
                                  gain=params_boots_fix[exp]['gain'],
                                  dist_thresh=params_boots_fix[exp]['distance_threshold'])

In [19]:
#list of mapper outputs - minimum 2
mapper_outputs=[mappers[mode] for mode in mappers.keys()]

explanation_vectors=[]
if original_mapper:
    explanation_vectors.append(X_test_tens.numpy())
for exp in exp_dict.keys():
    explanation_vectors.append(exp_dict[exp])

explanation_list=[]
for expl in explanation_vectors:
    explanation_list.append(expl.tolist())

expl_labels = list(mappers.keys())
class_labels = {1:'Diabetic', 0:"Non Diabetic"}
predicted_prob = np.array([np.squeeze(i) for i in predictions])

#array of what we want the values to be colored by by default(In this case we are simply passing the lens - ie. Prediction Probabilities)
color_values = [function]

#column names of the dataframe
column_names= np.array(X.columns)

In [20]:
from IPython.display import display, HTML
display(HTML("<style>.container { width:1920px !important; }</style>"))

#visualize
mnt = Mountaineer()
mnt.visualize(X_test.to_numpy(),y_test.to_numpy(), predicted_prob, explanation_list, mapper_outputs, column_names, 
              expl_labels, class_labels)