# Task 3: Model Explainability with SHAP

## Objective

Interpret the best fraud detection model using built-in feature importance and SHAP.



## 1. Load Model, Preprocessor & Data

In [None]:

# Fraud Detection Explainability Analysis
import joblib
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from src.explainability import (
    plot_builtin_feature_importance,
    compute_shap_values,
    plot_shap_summary,
    plot_shap_force,
    get_test_case_indices
)
# Load pre-fitted model and preprocessor
model = joblib.load("../models/best_rf_model.joblib")
preprocessor = joblib.load("../models/preprocessor.joblib")

# Load test data - adjust path as needed
df_test = pd.read_csv("../data/processed/fraud_test.csv")
X_test = preprocessor.transform(df_test.drop(columns=["class"]))
y_test = df_test["class"].values

# *(Optional)*: Get correct feature names after transformation
try:
    feature_names = preprocessor.get_feature_names_out()
except AttributeError:
    feature_names = [f"f_{i}" for i in range(X_test.shape[1])]

print("Test shape:", X_test.shape)

## 2. Built-in Feature Importance

In [None]:

## 2. Built-in Feature Importance
plot_builtin_feature_importance(model, feature_names, top_n=10)
plt.show()

## 3. SHAP Global Importance (Summary Plot)

In [None]:

# 3. SHAP Global Explanation (Summary Plot)
# SHAP can be slow, subsample if needed for large datasets
N = 500
X_shap = X_test if X_test.shape[0] < N else X_test[:N]
explainer, shap_values = compute_shap_values(model, X_shap, model_type="tree")

# SHAP summary plot (global view)
plot_shap_summary(shap_values, X_shap, feature_names=feature_names)
plt.show()

# 4. SHAP Local Explanation (Force Plots)

In [None]:

#4. SHAP Local Explanation (Force Plots)
# Run predictions for picking test cases
y_pred = model.predict(X_test)

# Get indices for one TP, FP, FN
case_indices = get_test_case_indices(y_test, y_pred)

import matplotlib.pyplot as plt
import shap

# Force plot for True Positive
if case_indices["TP"] is not None:
    print("SHAP Force Plot: True Positive")
    plot_shap_force(explainer, shap_values, X_shap, sample_idx=case_indices["TP"], feature_names=feature_names)

# Force plot for False Positive
if case_indices["FP"] is not None:
    print("SHAP Force Plot: False Positive")
    plot_shap_force(explainer, shap_values, X_shap, sample_idx=case_indices["FP"], feature_names=feature_names)

# Force plot for False Negative
if case_indices["FN"] is not None:
    print("SHAP Force Plot: False Negative")
    plot_shap_force(explainer, shap_values, X_shap, sample_idx=case_indices["FN"], feature_names=feature_names)






## 5. Interpretation

- Compare built-in feature importances and SHAP summary plot.
- List top 5 features ("drivers") of fraud predictions by SHAP.
- Explain any surprising findings.

*(You can use code to extract top SHAP features if you wish:)*

# Example: average absolute SHAP value per feature
mean_abs_shap = np.abs(shap_values.values).mean(axis=0)
top5_idx = np.argsort(mean_abs_shap)[::-1][:5]
for i in top5_idx:
    print(f"{feature_names[i]}: {mean_abs_shap[i]:.4f}")






## 6. Business Recommendations

- List at least 3 actionable recommendations, each referencing specific SHAP findings.
  - Example: "Transactions within 2 hours of signup (as shown by high SHAP impact of 'time_since_signup') should get additional OTP verification."
- Connect SHAP insights and plots directly to real-world policy or intervention ideas.

