In [1]:
import numpy as np
import pandas as pd
import pickle
import time
from pathlib import Path

from dataset.adult.ann_model import ANN_softmax

from captum_explainers import explainer_attributes

from src.baseline_experiments import *

from mountaineer import Mountaineer
from gale import create_mapper, bootstrap_mapper_params

  from .autonotebook import tqdm as notebook_tqdm


## Load data and explanations

In [2]:
###1. load data and model
data_name = 'adult'

data = np.loadtxt(f'dataset/{data_name}/X_train_prep.csv', delimiter=',', dtype=np.float64, skiprows=1)
X_train = torch.from_numpy(data).float()
y_train = np.loadtxt(f'dataset/{data_name}/y_train.csv', delimiter=',', dtype=np.float64, skiprows=1)

data = np.loadtxt(f'dataset/{data_name}/X_test_prep.csv', delimiter=',', dtype=np.float64, skiprows=1)
X_test = torch.from_numpy(data).float()
y_test = np.loadtxt(f'dataset/{data_name}/y_test.csv', delimiter=',', dtype=np.float64, skiprows=1)

columns_df = pd.read_csv(f'dataset/{data_name}/X_test_prep.csv', nrows=1)
col_names = columns_df.columns

print(f"Number of samples: Train {X_train.shape[0]} | Test {X_test.shape[0]}")
print(f"Number of features: {X_train.shape[1]}")
print(f"Train Class 0: {y_train[y_train==0].shape[0]} | Class 1: {y_train[y_train==1].shape[0]}")
print(f"Test Class 0: {y_test[y_test==0].shape[0]} | Class 1: {y_test[y_test==1].shape[0]}")

Number of samples: Train 36177 | Test 9045
Number of features: 13
Train Class 0: 27211 | Class 1: 8966
Test Class 0: 6803 | Class 1: 2242


In [3]:
model = ANN_softmax(input_layer=X_train.shape[1],
                    hidden_layer_1=100,
                    num_of_classes=2)
model.load_state_dict(torch.load('dataset/adult/adult_lr_0.002_acc_0.83.pt'))
model.eval()

X_test_tensor = torch.tensor(X_test).float()
proba = model.predict_proba(X_test_tensor)
function = np.array([np.squeeze(i) for i in proba])

predictions = [1 if function[i][0] < function[i][1] else 0 for i in range(len(function))]

function = [function[i][1] for i in range(len(function))]
function = np.array(function)

X_np = X_test_tensor.detach().numpy()

print(f"Model Accuracy: {sum(predictions == y_test)/len(y_test):.3f}")

Model Accuracy: 0.831


In [4]:
exp_dic_str = f"dataset/{data_name}/{data_name}_exp_dict.p"

if Path(exp_dic_str).is_file():
    exp_dict = pickle.load(open(exp_dic_str, 'rb'))

else:
    exp_dict = explainer_attributes(model, X_test, n_perturb = 500)
    pickle.dump(exp_dict, open(exp_dic_str, "wb"))

## Creating the mapper outputs

In [5]:
data_name = 'adult-gale'

exec_times = {}

In [6]:
gains=[0.1,0.2,0.3,0.4,0.5]
resolutions = [i for i in range(10, 31)]
distances = [0.1,0.2,0.3,0.4,0.5]

heloc_params_str = f"dataset/{data_name}/{data_name}_params_mapper.p"

best_params = {}
mappers = {}

if Path(heloc_params_str).is_file():
    best_params = pickle.load(open(heloc_params_str, 'rb'))

for exp in exp_dict.keys():
    exec_times[exp] = {}

    if not Path(heloc_params_str).is_file():
        
        start_time = time.time()

        best_params[exp] = bootstrap_mapper_params(exp_dict[exp], function,
                            resolutions=resolutions,
                            gains=gains,
                            distances=distances,
                            n=100, 
                            n_jobs=-1)
        
        exec_times[exp]['Bootstrap parameters'] = time.time()-start_time
        print(f'Time: {exec_times[exp]["Bootstrap parameters"]}')
    
    print(f'Stability for {exp}: {best_params[exp]["stability"]:.5f} | Resolution: {best_params[exp]["resolution"]}')

    start_time = time.time()
    mappers[exp] = create_mapper(exp_dict[exp], function, 
                                 resolution=best_params[exp]['resolution'], 
                                 gain=best_params[exp]['gain'], 
                                 dist_thresh=best_params[exp]['distance_threshold'])
    if not exec_times is None:
        exec_times[exp]['Build mapper'] = time.time()-start_time

if not Path(heloc_params_str).is_file():
    pickle.dump(best_params, open(heloc_params_str, "wb"))

Stability for Vanilla Gradient: 0.00103 | Resolution: 30
Stability for Gradient x Input: 0.00113 | Resolution: 30
Stability for Occlusion: 0.00249 | Resolution: 24
Stability for Guided Backprop: 0.00103 | Resolution: 30
Stability for LIME: 0.00092 | Resolution: 29
Stability for KernelSHAP: 0.00092 | Resolution: 29
Stability for SmoothGrad: 0.00103 | Resolution: 30
Stability for Integrated Gradients: 0.00096 | Resolution: 28


In [7]:
exec_time_str = f"dataset/{data_name}/{data_name}_exec_time.p"

if not exec_times is None:
    pickle.dump(exec_times, open(exec_time_str, "wb"))
else:
    pickle.load(open(exec_time_str, 'rb'))

exec_times

{'Vanilla Gradient': {'Build mapper': 0.34166836738586426},
 'Gradient x Input': {'Build mapper': 0.34038400650024414},
 'Occlusion': {'Build mapper': 0.3626291751861572},
 'Guided Backprop': {'Build mapper': 0.2835366725921631},
 'LIME': {'Build mapper': 0.27878546714782715},
 'KernelSHAP': {'Build mapper': 0.28390073776245117},
 'SmoothGrad': {'Build mapper': 0.28180646896362305},
 'Integrated Gradients': {'Build mapper': 0.2749288082122803}}

## Visualize

In [8]:
exp_list = ['LIME', 'KernelSHAP', 'Occlusion', 'Integrated Gradients',
           'Gradient x Input', 'Vanilla Gradient', 'SmoothGrad']

#list of mapper outputs - minimum 2
mapper_outputs=[mappers[mode] for mode in exp_list]

explanation_vectors=[]
for exp in exp_list:
    explanation_vectors.append(exp_dict[exp])

explanation_list=[]
for expl in explanation_vectors:
    explanation_list.append(expl.tolist())

expl_labels = [exp for exp in exp_list]
class_labels = {1:'>50K', 0:"<=50K"}

color_values = [function]

#column names of the dataframe
column_names = np.array(col_names.str.replace('-', '_'))

In [None]:
from IPython.display import display, HTML
display(HTML("<style>.container { width:1920px !important; }</style>"))

#visualize
test = Mountaineer()

test.visualize(X_np, y_test, function, explanation_list, mapper_outputs, column_names, 
              expl_labels, class_labels, kamada_layout=False)