In [18]:
# Utils
import torch
import numpy as np
from openxai.experiment_utils import print_summary, load_config, fill_param_dict
from openxai.explainers.perturbation_methods import get_perturb_method

# ML models
from openxai.model import train_model

# Data loaders
from openxai.dataloader import ReturnLoaders, ReturnTrainTestX

# Explanation models
from openxai.explainer import Explainer

# Evaluation methods
from openxai.evaluator import Evaluator

In [19]:
# Choose the model and the data set you wish to generate explanations for
n_test_samples = 10
data_name = 'spambase' # must be one of ['adult', 'compas', 'gaussian', 'german', 'gmsc', 'heart', 'heloc', 'pima']
model_name = 'ann'    # must be one of ['lr', 'ann']

In [20]:
# Get training and test loaders
trainloader, testloader = ReturnLoaders(data_name=data_name, batch_size=n_test_samples)
inputs, labels = next(iter(testloader))
labels = labels.type(torch.int64)

# Get full train/test FloatTensors and feature metadata
X_train, X_test, feature_metadata = ReturnTrainTestX(data_name, float_tensor=True, return_feature_metadata=True)

In [21]:
import torch
import os
from openxai.model import train_model
from openxai.experiment_utils import load_config

if __name__ == "__main__":
    config = load_config('experiment_config.json')
    model_names, data_names = config['model_names'], config['data_names']
    train_config = config['training']
    epochs, learning_rate = train_config['epochs'], train_config['learning_rate']
    scaler, seed, warmup = train_config['scaler'], train_config['seed'], train_config['warmup']
    model_name = "lr"
    data_name = "spambase"
    print(f'Training {model_name} on {data_name} dataset')

    # Train Model
    data_config = train_config[data_name]
    batch, pcw, mpb = data_config['batch_size'], data_config['pos_class_weight'], data_config['mean_pred_bound']
    model, best_acc, best_epoch = train_model(model_name, data_name, learning_rate, epochs, batch,
                                                      scaler=scaler, seed=seed, pos_class_weight=pcw,
                                                      mean_prediction_bound=mpb, warmup=warmup, verbose=False)
            
    # Save Model
    params = {'ep': epochs, 'lr': learning_rate, 'batch': batch, 'seed': seed, 'pcw': pcw,
                      'mpb': mpb, 'wu': warmup, 'acc': str(round(best_acc*100, 2)), 'at_ep': best_epoch}
    params_str = '_'.join([f'{k}_{v}' for k, v in params.items()])
    model_file_name = f'{data_name}_{model_name}_{scaler}_{params_str}.pt'
    model_folder_name = f'models/{model.name}/'
    if not os.path.exists(model_folder_name):
            os.makedirs(model_folder_name)
    torch.save(model.state_dict(),  model_folder_name + model_file_name)
    print(f'File saved to {model_folder_name + model_file_name}')

Training lr on spambase dataset
6 20.93 Best Seen Test Acc (Mean Pred = 0.79)
7 23.46 Best Seen Test Acc (Mean Pred = 0.77)
8 25.42 Best Seen Test Acc (Mean Pred = 0.75)
9 27.3 Best Seen Test Acc (Mean Pred = 0.73)
10 28.96 Best Seen Test Acc (Mean Pred = 0.71)
11 30.41 Best Seen Test Acc (Mean Pred = 0.7)
12 32.8 Best Seen Test Acc (Mean Pred = 0.67)
13 33.74 Best Seen Test Acc (Mean Pred = 0.66)
14 37.8 Best Seen Test Acc (Mean Pred = 0.62)
15 40.12 Best Seen Test Acc (Mean Pred = 0.6)
16 41.13 Best Seen Test Acc (Mean Pred = 0.59)
17 42.0 Best Seen Test Acc (Mean Pred = 0.58)
18 43.01 Best Seen Test Acc (Mean Pred = 0.57)
19 44.9 Best Seen Test Acc (Mean Pred = 0.55)
Proportion of Class 1:
	Test Preds:	0.5510
	Test Set:	0.0000
Test Accuracy: 0.4490
Train Accuracy: 0.8699
File saved to models/LogisticRegression/spambase_lr_minmax_ep_20_lr_0.001_batch_32_seed_0_pcw_0.55_mpb_0.15_wu_5_acc_44.9_at_ep_19.pt


In [22]:
# Choose explainer
method = 'lime'

# Pass empty dict to use default parameters
param_dict = {}

# # If LIME/IG, then provide X_train
param_dict = fill_param_dict(method, {}, X_train)
params_preview = [f'{k}: array of size {v.shape}' if hasattr(v, 'shape') else f'{k}: {v}' for k, v in param_dict.items()]
print(f'{method.upper()} Parameters\n\n' +'\n'.join(params_preview))
print('Remaining parameters are set to their default values')

LIME Parameters

data: array of size torch.Size([3220, 57])
Remaining parameters are set to their default values


In [23]:
# Compute explanations
preds = model(inputs.float()).argmax(1)
lime = Explainer(method, model, param_dict)
lime_exps = lime.get_explanations(inputs.float(), preds).detach().numpy()
print(lime_exps[0])

[ 0.17815466 -0.22081512  0.2289229   0.17844062  0.32499242  0.3181003
  0.41927537  0.35710293  0.2693681   0.23881984  0.33944052 -0.12253211
  0.21121967  0.08442789  0.27314356  0.42983812  0.33861926  0.34939814
  0.34912372  0.35127473  0.35858402  0.27702907  0.38679504  0.3436327
 -0.4400574  -0.40359542 -0.4264144  -0.2833649  -0.27130625 -0.35710195
 -0.2935212  -0.27716857 -0.35203025 -0.22850059 -0.30423355 -0.24596947
 -0.32401863 -0.05644296 -0.35636088 -0.05011095 -0.23789021 -0.37307262
 -0.32942083 -0.27428567 -0.27408767 -0.2209086  -0.18799941 -0.37666097
 -0.12133754 -0.18904619 -0.23224598  0.4233583   0.36191273  0.18612024
  0.20406231  0.3286438   0.2951493 ]


In [24]:
from openxai.evaluator import ground_truth_metrics, prediction_metrics, stability_metrics
print('Ground truth metrics: ', ground_truth_metrics)
print('Prediction metrics: ', prediction_metrics)
print('Stability metrics: ', stability_metrics)

Ground truth metrics:  ['PRA', 'RC', 'FA', 'RA', 'SA', 'SRA']
Prediction metrics:  ['PGU', 'PGI']
Stability metrics:  ['RIS', 'RRS', 'ROS']


In [25]:
# Choose one of ['PRA', 'RC', 'FA', 'RA', 'SA', 'SRA']
metric = 'PRA'  

# Load config
param_dict = load_config('experiment_config.json')['evaluators']['ground_truth_metrics']
param_dict['explanations'] = lime_exps
if metric in ['FA', 'RA', 'SA', 'SRA']:
    param_dict['predictions'] = preds  # flips ground truth according to prediction
elif metric in ['PRA', 'RC']:
    del param_dict['k'], param_dict['AUC']  # not needed for PRA/RC

# Print final parameters
params_preview = [f'{k}: array of size {v.shape}' if hasattr(v, 'shape') else f'{k}: {v}' for k, v in param_dict.items()]
print(f'{metric.upper()} Parameters\n\n' +'\n'.join(params_preview))

PRA Parameters

explanations: array of size (10, 57)


In [26]:
# Evaluate the metric across the test inputs/explanations
evaluator = Evaluator(model, metric)
score, mean_score = evaluator.evaluate(**param_dict)

In [27]:
std_err = np.std(score) / np.sqrt(len(score))
print(f"{metric}: {mean_score:.2f}\u00B1{std_err:.2f}")
if metric in stability_metrics:
    log_mu, log_std = np.log(mean_score), np.log(std_err)
    print(f"log({metric}): {log_mu:.2f}\u00B1{log_std:.2f}")

PRA: 0.90±0.02
