In [8]:
import numpy as np
import pandas as pd
import pickle
from pathlib import Path

import torch.nn as nn

from captum_explainers import explainer_attributes

from src.baseline_experiments import *

from mountaineer import Mountaineer
from gale import create_mapper, bootstrap_mapper_params

## Load data and explanations

In [15]:
class FFNNClassification(nn.Module):
    def __init__(self, input_dim, n_nodes_per_layer=8):
        super().__init__()
        self.n_nodes_per_layer = n_nodes_per_layer
        
        self.linear1 = nn.Linear(input_dim, self.n_nodes_per_layer)
        self.relu1 = nn.ReLU()
        self.linear2 = nn.Linear(self.n_nodes_per_layer, self.n_nodes_per_layer)
        self.relu2 = nn.ReLU()
        self.linear3 = nn.Linear(self.n_nodes_per_layer, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):       
        out = self.linear1(x)
        out = self.relu1(out)
        out = self.linear2(out)
        out = self.relu2(out)
        out = self.linear3(out)
        out = self.sigmoid(out)
        
        return out

In [16]:
###1. load data and model
data_name = 'heloc'
task = 'classif'
model_name = 'ffnnA'
subset = False

test_size = 0.20
test_str = int(test_size*100)

if subset:
    filepath = f'dataset/{data_name}/{data_name}-clean-test{test_str}-normalized-subset.csv' # subset of test points
else:
    filepath = f'dataset/{data_name}/{data_name}-clean-test{test_str}-normalized.csv' #all test set points
data = np.loadtxt(filepath, delimiter=',', dtype=np.float64, skiprows=1)
data = torch.from_numpy(data).float()
X = data[:, 0:-1]
y = data[:, -1]

columns_df = pd.read_csv(filepath, nrows=1)
col_names = columns_df.columns

model_f = FFNNClassification(input_dim=X.shape[1])
checkpoint = torch.load(f'dataset/{data_name}/{task}_{model_name}_{data_name}_ckpt.pth', map_location=torch.device('cpu'))
model_f.load_state_dict(checkpoint['model'])

<All keys matched successfully>

In [17]:
if subset:
    exp_dic_str = "dataset/heloc/heloc_exp_dict_subset.p"
else:
    exp_dic_str = "dataset/heloc/heloc_exp_dict.p"

if Path(exp_dic_str).is_file():
    exp_dict = pickle.load(open(exp_dic_str, 'rb'))

else:
    exp_dict = explainer_attributes(model_f, X, n_perturb = 500)
    pickle.dump(exp_dict, open(exp_dic_str, "wb"))

## Creating the mapper outputs

In [18]:
predictions = model_f.forward(X)
function = np.array([np.squeeze(i.detach().numpy()) for i in predictions])

X_np = X.detach().numpy()

original_mapper = False

In [29]:
if subset:
    heloc_params_str = "dataset/heloc/heloc_params_mapper_subset.p"
else:
    heloc_params_str = "dataset/heloc/heloc_params_mapper.p"

if Path(heloc_params_str).is_file():
    params_boots_fix = pickle.load(open(heloc_params_str, 'rb'))

else:
    resolutions=[15,20,25,30]
    gains=[0.3, 0.35, 0.4]
    distances=None

    params_boots_fix = {}

    if original_mapper:
        params_boots_fix['Original'] =  bootstrap_mapper_params(X_np, predictions, resolutions, gains, distances, ci=0.95)


    for exp in exp_dict.keys():
        params_boots_fix[exp] = bootstrap_mapper_params(exp_dict[exp], predictions, resolutions, gains, distances, ci=0.95)

    pickle.dump(params_boots_fix, open(heloc_params_str, "wb"))

for mode in params_boots_fix.keys():
    print(f"Params {mode}: {params_boots_fix[mode]}")

Params Vanilla Gradient: {'stability': 0.006432756781578064, 'components': 3, 'resolution': 15, 'gain': 0.3, 'distance_threshold': 0.3}
Params Gradient x Input: {'stability': 0.020712420344352722, 'components': 6, 'resolution': 15, 'gain': 0.4, 'distance_threshold': 0.3}
Params Occlusion: {'stability': 0.008117839694023105, 'components': 3, 'resolution': 30, 'gain': 0.4, 'distance_threshold': 0.3}
Params Guided Backprop: {'stability': 0.09285593032836925, 'components': 9, 'resolution': 15, 'gain': 0.4, 'distance_threshold': 0.15}
Params LIME: {'stability': 0.0120716392993927, 'components': 5, 'resolution': 30, 'gain': 0.3, 'distance_threshold': 0.3}
Params KernelSHAP: {'stability': 0.012655198574066162, 'components': 4, 'resolution': 20, 'gain': 0.4, 'distance_threshold': 0.3}
Params SmoothGrad: {'stability': 0.005123555660247803, 'components': 1, 'resolution': 15, 'gain': 0.3, 'distance_threshold': 0.3}
Params Integrated Gradients: {'stability': 0.020418405532836914, 'components': 4, 

In [25]:
mappers = {}

if original_mapper:
    mappers['Original'] = create_mapper(X_np, predictions.detach().numpy(), resolution=params_boots_fix['Original']['resolution'], 
                                        gain=params_boots_fix['Original']['gain'],
                                        dist_thresh=params_boots_fix['Original']['distance_threshold'])

for exp in exp_dict.keys():
    mappers[exp] = create_mapper(exp_dict[exp], predictions.detach().numpy(), resolution=params_boots_fix[exp]['resolution'], 
                                  gain=params_boots_fix[exp]['gain'],
                                  dist_thresh=params_boots_fix[exp]['distance_threshold'])

## Visualize

In [26]:
#list of mapper outputs - minimum 2
mapper_outputs=[mappers[mode] for mode in mappers.keys()]

explanation_vectors=[]
if original_mapper:
    explanation_vectors.append(X_np)
for exp in exp_dict.keys():
    explanation_vectors.append(exp_dict[exp])

explanation_list=[]
for expl in explanation_vectors:
    explanation_list.append(expl.tolist())

expl_labels = list(mappers.keys())
class_labels = {1:'Diabetic', 0:"Non Diabetic"}
predicted_prob = np.array([np.squeeze(i.detach().numpy()) for i in predictions])

color_values = [function]

#column names of the dataframe
column_names = np.array(col_names[:-1])

In [27]:
from IPython.display import display, HTML
display(HTML("<style>.container { width:1920px !important; }</style>"))

#visualize
test = Mountaineer()

test.visualize(X_np, y.numpy(), predicted_prob, explanation_list, mapper_outputs, column_names, 
              expl_labels, class_labels, kamada_layout=True)

In [28]:
type(xe

SyntaxError: unexpected EOF while parsing (514434981.py, line 1)

In [None]:
predictions