## Load libraries and set up directory paths and iterables

In [None]:
# Import libraries
import os
from modules.utils import load_object, load_dataset, save_object
from modules.explanations import sample_shap_explanations

In [None]:
# Define directory paths for models, datasets, and SHAP explanations
models_dir = '../models'
datasets_dir = '../data/processed/train'
explanations_dir = '../shap-explanations'

# Define iterables for looping over datasets and model types
dataset_names = sorted(os.listdir(models_dir))
model_types = ['logreg', 'dt', 'rf', 'xgb', 'svc']

## Compute global SHAP values

In [None]:
# NOTE: Takes many hours to run (>12 hours for certain models) and was performed per-dataset on Google Colab

# Compute SHAP explanations for each dataset and model
for name in dataset_names:
    print(f'Computing SHAP explanations for {name}')
    
    # Directory to save SHAP explanations for the current dataset
    explanations_subdir = os.path.join(explanations_dir, name)

    # Create directory if it does not exist
    if not os.path.exists(explanations_subdir):
        print(f'Creating directory for SHAP explanations at {explanations_subdir}')
        os.makedirs(explanations_subdir)

    # Load the training dataset
    training_set_path = os.path.join(datasets_dir, name.replace('-', '_') + '-train.csv.gz')
    print(f'Loading dataset at {training_set_path}')
    training_set = load_dataset(training_set_path)

    # Compute SHAP explanations for each model type
    for type_ in model_types:
        # Load the trained model
        model_path = os.path.join(models_dir, name, name.replace('-', '_') + f'-{type_}-full.pickle.xz')
        print(f'Loading model at {model_path}')
        model = load_object(model_path)
        
        # Path to save the computed SHAP explanation
        explanation_path = os.path.join(
            explanations_subdir, name.replace('-', '_') + f"-{type_}-sampling-global"
        )
        
        # Compute SHAP explanations
        shap_explanation = sample_shap_explanations(
            model_eval = model.predict,
            data = training_set,
            verbose = True
        )

        # Save SHAP explanation with LZMA compression
        save_object(shap_explanation, explanation_path, compression = 'lzma')