In [17]:
# 5_shap_explainability.ipynb

# Step 1: Import libraries
import pandas as pd
import numpy as np
import shap
import joblib
import matplotlib.pyplot as plt
import warnings

warnings.filterwarnings("ignore")

# Step 2: Load cleaned test data
X_test = pd.read_csv("X_test_cleaned.csv")
X_test = X_test.astype(float)  # Ensure numeric types only

# Step 3: Define a CPU-safe SHAP explanation function
def explain_model_cpu(model_path, model_name):
    print(f"Explaining {model_name} using CPU-safe SHAP...")

    # Load trained model
    model = joblib.load(model_path)

    # Create SHAP explainer using model.predict
    explainer = shap.Explainer(model.predict, X_test)

    # Compute SHAP values
    shap_values = explainer(X_test)

    # Summary plot
    shap.summary_plot(shap_values, X_test, show=False)
    plt.title(f"{model_name} - SHAP Summary")
    plt.tight_layout()
    plt.savefig(f"{model_name}_shap_summary.png")
    plt.close()

    # Waterfall plot for the first sample
    shap.plots.waterfall(shap_values[0], show=False)
    plt.title(f"{model_name} - SHAP Waterfall (Sample 0)")
    plt.tight_layout()
    plt.savefig(f"{model_name}_shap_waterfall.png")
    plt.close()

    print(f"{model_name} SHAP plots saved.\n")

# Step 4: Explain all models
explain_model_cpu("xgboost_model.pkl", "XGBoost")
explain_model_cpu("lasso_model.pkl", "Lasso")
explain_model_cpu("ridge_model.pkl", "Ridge")


Explaining XGBoost using CPU-safe SHAP...


PermutationExplainer explainer: 1460it [01:34, 14.04it/s]                       


XGBoost SHAP plots saved.

Explaining Lasso using CPU-safe SHAP...


PermutationExplainer explainer: 1460it [00:48, 23.85it/s]                       


Lasso SHAP plots saved.

Explaining Ridge using CPU-safe SHAP...


PermutationExplainer explainer: 1460it [00:48, 23.71it/s]                       


Ridge SHAP plots saved.

