In [1]:
# Utils
import torch
import numpy as np
from openxai.experiment_utils import print_summary, load_config, fill_param_dict
from openxai.explainers.perturbation_methods import get_perturb_method

# ML models
from openxai.model import train_model

# Data loaders
from openxai.dataloader import ReturnLoaders, ReturnTrainTestX

# Explanation models
from openxai.explainer import Explainer

# Evaluation methods
from openxai.evaluator import Evaluator

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Choose the model and the data set you wish to generate explanations for
n_test_samples = 10
data_name = 'spambase' # must be one of ['adult', 'compas', 'gaussian', 'german', 'gmsc', 'heart', 'heloc', 'pima']
model_name = 'ann'    # must be one of ['lr', 'ann']

In [3]:
# Get training and test loaders
trainloader, testloader = ReturnLoaders(data_name=data_name, batch_size=n_test_samples)
inputs, labels = next(iter(testloader))
labels = labels.type(torch.int64)

# Get full train/test FloatTensors and feature metadata
X_train, X_test, feature_metadata = ReturnTrainTestX(data_name, float_tensor=True, return_feature_metadata=True)

In [5]:
import os
from openxai.model import train_model
from openxai.experiment_utils import load_config

if __name__ == "__main__":
    config = load_config('experiment_config.json')
    model_names, data_names = config['model_names'], config['data_names']
    train_config = config['training']
    epochs, learning_rate = train_config['epochs'], train_config['learning_rate']
    scaler, seed, warmup = train_config['scaler'], train_config['seed'], train_config['warmup']
    model_name = "lr"
    data_name = "spambase"
    print(f'Training {model_name} on {data_name} dataset')

    # Train Model
    data_config = train_config[data_name]
    batch, pcw, mpb = data_config['batch_size'], data_config['pos_class_weight'], data_config['mean_pred_bound']
    model, best_acc, best_epoch = train_model(model_name, data_name, learning_rate, epochs, batch,
                                                      scaler=scaler, seed=seed, pos_class_weight=pcw,
                                                      mean_prediction_bound=mpb, warmup=warmup, verbose=False)
            
    # Save Model
    params = {'ep': epochs, 'lr': learning_rate, 'batch': batch, 'seed': seed, 'pcw': pcw,
                      'mpb': mpb, 'wu': warmup, 'acc': str(round(best_acc*100, 2)), 'at_ep': best_epoch}
    params_str = '_'.join([f'{k}_{v}' for k, v in params.items()])
    model_file_name = f'{data_name}_{model_name}_{scaler}_{params_str}.pt'
    model_folder_name = f'models/{model.name}/'
    if not os.path.exists(model_folder_name):
            os.makedirs(model_folder_name)
    torch.save(model.state_dict(),  model_folder_name + model_file_name)
    print(f'File saved to {model_folder_name + model_file_name}')

Training ann on spambase dataset
6 69.51 Best Seen Test Acc (Mean Pred = 0.3)
7 78.2 Best Seen Test Acc (Mean Pred = 0.22)
Proportion of Class 1:
	Test Preds:	0.2180
	Test Set:	0.0000
Test Accuracy: 0.7820
Train Accuracy: 0.9432
File saved to models/ArtificialNeuralNetwork/spambase_ann_minmax_ep_20_lr_0.001_batch_32_seed_0_pcw_0.55_mpb_0.15_wu_5_acc_78.2_at_ep_7.pt


In [6]:
# Choose explainer
method = 'lime'

# Pass empty dict to use default parameters
param_dict = {}

# # If LIME/IG, then provide X_train
param_dict = fill_param_dict(method, {}, X_train)
params_preview = [f'{k}: array of size {v.shape}' if hasattr(v, 'shape') else f'{k}: {v}' for k, v in param_dict.items()]
print(f'{method.upper()} Parameters\n\n' +'\n'.join(params_preview))
print('Remaining parameters are set to their default values')

LIME Parameters

data: array of size torch.Size([3220, 57])
Remaining parameters are set to their default values


In [7]:
# Compute explanations
preds = model(inputs.float()).argmax(1)
lime = Explainer(method, model, param_dict)
lime_exps = lime.get_explanations(inputs.float(), preds).detach().numpy()
print(lime_exps[0])

[ 0.05141808  0.03493157  0.16536704 -0.26003003 -0.09243292  0.00152439
 -0.7897435  -0.45802286 -0.18733783 -0.32081455 -0.12736195  0.12263069
  0.03992029 -0.17215899 -0.5311434  -0.84255403 -0.11506901 -0.19265045
  0.04535412 -0.45205206 -0.18451896 -0.5715987  -0.84387714 -0.8328402
  0.94846255  0.47635293  1.2275392   0.25440198  0.4573849   0.3408685
  0.8538546   0.30144352  0.11264968  0.4697578   0.26006842 -0.15625691
 -0.03723517  0.03762506  0.16297458  0.06101436  0.6319182   0.7767345
  0.06834373  0.6997992   0.33897105  0.26266935  0.23242605  0.61182725
 -0.0302032   0.08369949  0.13386141 -0.7826016  -1.0388671  -0.364889
 -0.09028549 -0.27863923 -0.24770932]


In [8]:
from openxai.evaluator import ground_truth_metrics, prediction_metrics, stability_metrics
print('Ground truth metrics: ', ground_truth_metrics)
print('Prediction metrics: ', prediction_metrics)
print('Stability metrics: ', stability_metrics)

Ground truth metrics:  ['PRA', 'RC', 'FA', 'RA', 'SA', 'SRA']
Prediction metrics:  ['PGU', 'PGI']
Stability metrics:  ['RIS', 'RRS', 'ROS']


In [9]:
# Choose one of ['PRA', 'RC', 'FA', 'RA', 'SA', 'SRA']
metric = 'PRA'  

# Load config
param_dict = load_config('experiment_config.json')['evaluators']['ground_truth_metrics']
param_dict['explanations'] = lime_exps
if metric in ['FA', 'RA', 'SA', 'SRA']:
    param_dict['predictions'] = preds  # flips ground truth according to prediction
elif metric in ['PRA', 'RC']:
    del param_dict['k'], param_dict['AUC']  # not needed for PRA/RC

# Print final parameters
params_preview = [f'{k}: array of size {v.shape}' if hasattr(v, 'shape') else f'{k}: {v}' for k, v in param_dict.items()]
print(f'{metric.upper()} Parameters\n\n' +'\n'.join(params_preview))

PRA Parameters

explanations: array of size (10, 57)


In [10]:
# Evaluate the metric across the test inputs/explanations
evaluator = Evaluator(model, metric)
score, mean_score = evaluator.evaluate(**param_dict)

ValueError: The metric PRA is incompatible with non-linear models.

In [27]:
std_err = np.std(score) / np.sqrt(len(score))
print(f"{metric}: {mean_score:.2f}\u00B1{std_err:.2f}")
if metric in stability_metrics:
    log_mu, log_std = np.log(mean_score), np.log(std_err)
    print(f"log({metric}): {log_mu:.2f}\u00B1{log_std:.2f}")

PRA: 0.90±0.02
