In [1]:
import numpy as np
import pandas as pd
import pickle
import time
from pathlib import Path

import torch.nn as nn

from captum_explainers import explainer_attributes

from src.baseline_experiments import *

from mountaineer import Mountaineer
from gale import create_mapper, bootstrap_mapper_params

  from .autonotebook import tqdm as notebook_tqdm


## Load data and explanations

In [2]:
class FFNNClassification(nn.Module):
    def __init__(self, input_dim, n_nodes_per_layer=8):
        super().__init__()
        self.n_nodes_per_layer = n_nodes_per_layer
        
        self.linear1 = nn.Linear(input_dim, self.n_nodes_per_layer)
        self.relu1 = nn.ReLU()
        self.linear2 = nn.Linear(self.n_nodes_per_layer, self.n_nodes_per_layer)
        self.relu2 = nn.ReLU()
        self.linear3 = nn.Linear(self.n_nodes_per_layer, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):       
        out = self.linear1(x)
        out = self.relu1(out)
        out = self.linear2(out)
        out = self.relu2(out)
        out = self.linear3(out)
        out = self.sigmoid(out)
        
        return out

In [3]:
###1. load data and model
data_name = 'heloc'
task = 'classif'
model_name = 'ffnnA'
subset = False

test_size = 0.20
test_str = int(test_size*100)

if subset:
    filepath = f'dataset/{data_name}/{data_name}-clean-test{test_str}-normalized-subset.csv' # subset of test points
else:
    filepath = f'dataset/{data_name}/{data_name}-clean-test{test_str}-normalized.csv' #all test set points
data = np.loadtxt(filepath, delimiter=',', dtype=np.float64, skiprows=1)
data = torch.from_numpy(data).float()
X = data[:, 0:-1]
y = data[:, -1]

columns_df = pd.read_csv(filepath, nrows=1)
col_names = columns_df.columns

model_f = FFNNClassification(input_dim=X.shape[1])
checkpoint = torch.load(f'dataset/{data_name}/{task}_{model_name}_{data_name}_ckpt.pth', map_location=torch.device('cpu'))
model_f.load_state_dict(checkpoint['model'])

<All keys matched successfully>

In [4]:
if subset:
    exp_dic_str = "dataset/heloc/heloc_exp_dict_subset.p"
else:
    exp_dic_str = "dataset/heloc/heloc_exp_dict.p"

if Path(exp_dic_str).is_file():
    exp_dict = pickle.load(open(exp_dic_str, 'rb'))

else:
    exp_dict = explainer_attributes(model_f, X, n_perturb = 500)
    pickle.dump(exp_dict, open(exp_dic_str, "wb"))

## Creating the mapper outputs

In [5]:
predictions = model_f.forward(X)
function = np.array([np.squeeze(i.detach().numpy()) for i in predictions])

X_np = X.detach().numpy()

original_mapper = False

In [6]:
gains=[0.1,0.2,0.3,0.4,0.5]
resolutions = [i for i in range(10, 31)]
distances = [0.1,0.2,0.3,0.4,0.5]

heloc_params_str = "dataset/heloc-gale/heloc_params_mapper.p"

best_params = {}
mappers = {}
exec_times = {}

if Path(heloc_params_str).is_file():
    best_params = pickle.load(open(heloc_params_str, 'rb'))

for exp in exp_dict.keys():
    exec_times[exp] = {}
    
    if not Path(heloc_params_str).is_file():
        
        start_time = time.time()

        best_params[exp] = bootstrap_mapper_params(exp_dict[exp], function,
                            resolutions=resolutions,
                            gains=gains,
                            distances=distances,
                            n=100, 
                            n_jobs=-1)
        
        exec_times[exp]['Bootstrap parameters'] = time.time()-start_time
        print(f'Time: {exec_times[exp]["Bootstrap parameters"]}')
    
    print(f'Stability for {exp}: {best_params[exp]["stability"]:.5f} | Resolution: {best_params[exp]["resolution"]}')

    start_time = time.time()
    mappers[exp] = create_mapper(exp_dict[exp], function, 
                                 resolution=best_params[exp]['resolution'], 
                                 gain=best_params[exp]['gain'], 
                                 dist_thresh=best_params[exp]['distance_threshold'])
    if not exec_times is None:
        exec_times[exp]['Build mapper'] = time.time()-start_time

if not Path(heloc_params_str).is_file():
    pickle.dump(best_params, open(heloc_params_str, "wb"))

Time: 263.57509326934814
Stability for Vanilla Gradient: 0.00509 | Resolution: 15
Time: 387.3916165828705
Stability for Gradient x Input: 0.00555 | Resolution: 10
Time: 794.0999248027802
Stability for Occlusion: 0.00509 | Resolution: 15
Time: 236.69469046592712
Stability for Guided Backprop: 0.00509 | Resolution: 15
Time: 1308.5162737369537
Stability for LIME: 0.00509 | Resolution: 15
Time: 1448.2249104976654
Stability for KernelSHAP: 0.00509 | Resolution: 15
Time: 257.4611392021179
Stability for SmoothGrad: 0.00476 | Resolution: 13
Time: 553.4122791290283
Stability for Integrated Gradients: 0.00509 | Resolution: 15


In [7]:
exec_times

{'Vanilla Gradient': {'Bootstrap parameters': 263.57509326934814,
  'Build mapper': 0.030805110931396484},
 'Gradient x Input': {'Bootstrap parameters': 387.3916165828705,
  'Build mapper': 0.02549433708190918},
 'Occlusion': {'Bootstrap parameters': 794.0999248027802,
  'Build mapper': 0.025959253311157227},
 'Guided Backprop': {'Bootstrap parameters': 236.69469046592712,
  'Build mapper': 0.026262760162353516},
 'LIME': {'Bootstrap parameters': 1308.5162737369537,
  'Build mapper': 0.026154279708862305},
 'KernelSHAP': {'Bootstrap parameters': 1448.2249104976654,
  'Build mapper': 0.02621769905090332},
 'SmoothGrad': {'Bootstrap parameters': 257.4611392021179,
  'Build mapper': 0.019977807998657227},
 'Integrated Gradients': {'Bootstrap parameters': 553.4122791290283,
  'Build mapper': 0.026558399200439453}}

In [8]:
exec_time_str = f"dataset/heloc-gale/heloc_exec_time.p"

if not exec_times is None:
    pickle.dump(exec_times, open(exec_time_str, "wb"))
else:
    pickle.load(open(exec_time_str, 'rb'))

exec_times

{'Vanilla Gradient': {'Bootstrap parameters': 263.57509326934814,
  'Build mapper': 0.030805110931396484},
 'Gradient x Input': {'Bootstrap parameters': 387.3916165828705,
  'Build mapper': 0.02549433708190918},
 'Occlusion': {'Bootstrap parameters': 794.0999248027802,
  'Build mapper': 0.025959253311157227},
 'Guided Backprop': {'Bootstrap parameters': 236.69469046592712,
  'Build mapper': 0.026262760162353516},
 'LIME': {'Bootstrap parameters': 1308.5162737369537,
  'Build mapper': 0.026154279708862305},
 'KernelSHAP': {'Bootstrap parameters': 1448.2249104976654,
  'Build mapper': 0.02621769905090332},
 'SmoothGrad': {'Bootstrap parameters': 257.4611392021179,
  'Build mapper': 0.019977807998657227},
 'Integrated Gradients': {'Bootstrap parameters': 553.4122791290283,
  'Build mapper': 0.026558399200439453}}

## Visualize

In [9]:
#list of mapper outputs - minimum 2
exp_list = ['LIME', 'KernelSHAP', 'Occlusion', 'Integrated Gradients',
           'Gradient x Input', 'Vanilla Gradient', 'SmoothGrad']

mapper_outputs=[mappers[mode] for mode in exp_list]

explanation_vectors=[]
for exp in exp_list:
    explanation_vectors.append(exp_dict[exp])

explanation_list=[]
for expl in explanation_vectors:
    explanation_list.append(expl.tolist())

expl_labels = exp_list
class_labels = {1:'Payed', 0:"Not Payed"}
predicted_prob = np.array([np.squeeze(i.detach().numpy()) for i in predictions])

color_values = [function]

#column names of the dataframe
column_names = np.array(col_names[:-1])

In [10]:
from IPython.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

#visualize
test = Mountaineer()

test.visualize(X_np, y.numpy(), predicted_prob, explanation_list, mapper_outputs, column_names, 
              expl_labels, class_labels, kamada_layout=True)

AttributeError: 'dict' object has no attribute 'node_info_'