### Load Data and Trained Model

In [1]:
import pandas as pd
import joblib

# Load data (e.g., Credit Card dataset)
X_test = pd.read_csv("../data/processed/X_test_final_credit.csv")
y_test = pd.read_csv("../data/processed/y_test_final_credit.csv")

# Load trained Random Forest model
model = joblib.load("../models/rf_fraud_model.pkl") 


### Initialize SHAP Explainer

In [2]:
X_test.shape

(56746, 30)

In [3]:
import shap

# Sample 1000 rows from the test set
X_sample = X_test.sample(n=50, random_state=42)

# Initialize SHAP TreeExplainer
explainer = shap.TreeExplainer(model)

# Compute SHAP values on the sample
shap_values = explainer.shap_values(X_sample)


In [7]:
print(X_sample.columns)

Index(['Time', 'V1', 'V2', 'V3', 'V4', 'V5', 'V6', 'V7', 'V8', 'V9', 'V10',
       'V11', 'V12', 'V13', 'V14', 'V15', 'V16', 'V17', 'V18', 'V19', 'V20',
       'V21', 'V22', 'V23', 'V24', 'V25', 'V26', 'V27', 'V28', 'Amount'],
      dtype='object')


In [8]:
type(X_sample)


pandas.core.frame.DataFrame

In [5]:
type(model)

sklearn.ensemble._forest.RandomForestClassifier

#### Global Interpretation – SHAP Summary Plot

In [None]:
# Plot summary for the positive class (fraud class = 1)
shap.summary_plot(shap_values[1], X_sample)

# Force plot for a specific prediction (e.g., index 0)
shap.initjs()
shap.force_plot(explainer.expected_value[1], shap_values[1][0], X_sample.iloc[0])



In [None]:
# Multiple local explanations (e.g., first 10 instances)
shap.force_plot(
    explainer.expected_value[1],
    shap_values[1][:10],
    X_test.iloc[:10],
    matplotlib=True
)
