# SHAP Explanations for Model Interpretability

## Overview
This notebook demonstrates SHAP (SHapley Additive exPlanations) for interpreting model predictions.
We visualize feature importance and per-instance explanations.

## Objectives
1. **Feature Importance**: Identify top contributing features
2. **Per-Instance Explanations**: Understand why specific predictions were made
3. **SHAP Visualizations**: Waterfall, summary, and feature plots


In [None]:
# Import required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
from pathlib import Path
import sys

# Add src to path
sys.path.insert(0, str(Path.cwd().parent))

from src.data.loader import DataLoader
from src.explainability.shap import (
    compute_and_save_shap,
    get_top_features_shap,
    sample_background_data,
    SHAP_AVAILABLE
)
from src.explainability.permutation import compute_permutation_importance_oof

# Try to import SHAP for visualization
try:
    import shap
    print(f"✅ SHAP version: {shap.__version__}")
except ImportError:
    print("⚠️ SHAP not installed. Install with: pip install shap")
    SHAP_AVAILABLE = False

# Set up plotting
plt.style.use('default')
sns.set_palette("husl")
warnings.filterwarnings('ignore')

print("✅ Libraries imported successfully!")


## Permutation Importance

Compute global feature importance using permutation method.


In [None]:
# Example: Load OOF predictions and compute permutation importance
# In practice, load from CV run: oof_df = pd.read_csv("../artifacts/cv/run_name/oof_predictions.csv")

# For demo, create synthetic data
rng = np.random.default_rng(42)
n = 100
X = rng.normal(size=(n, 10))
y_true = rng.binomial(1, 0.3, size=n)
y_proba = 1 / (1 + np.exp(-X.sum(axis=1) + rng.normal(0, 0.5, n)))

feature_names = [f"feature_{i}" for i in range(10)]

perm_importance = compute_permutation_importance_oof(
    X, y_true, y_proba,
    feature_names=feature_names,
    n_repeats=5,
    random_state=42
)

print("📊 Top 10 Features by Permutation Importance:")
print(perm_importance.head(10))


## SHAP Explanations

Compute SHAP values for local explanations.


In [None]:
if SHAP_AVAILABLE:
    # Define prediction function (in practice, use your trained model)
    def predict_fn(X_input):
        logits = X_input.sum(axis=1)
        return 1 / (1 + np.exp(-logits))
    
    # Compute SHAP values
    shap_results = compute_and_save_shap(
        X[:50],  # Explain first 50 instances
        predict_fn,
        output_path="../artifacts/shap_values.csv",
        n_background=20,
        feature_names=feature_names,
        random_state=42
    )
    
    print(f"✅ SHAP values saved to: {shap_results['csv_path']}")
    
    # Get top features
    top_shap = get_top_features_shap(
        shap_results['shap_values'],
        feature_names,
        top_k=10
    )
    print("\n📊 Top 10 Features by Mean |SHAP|:")
    print(top_shap)
else:
    print("⚠️ SHAP not available. Install with: pip install shap")
