In [None]:
# Import required libraries
import pandas as pd
import joblib
import shap
import lime
import lime.lime_tabular
import matplotlib.pyplot as plt

# Load the preprocessed fraud data and the trained Random Forest model
fraud_data = pd.read_csv('../data/Fraud_Data.csv')
model = joblib.load('../models/random_forest_fraud_model.pkl')

# Drop non-numeric or irrelevant columns
fraud_data_encoded = fraud_data.drop(columns=['signup_time', 'purchase_time', 'device_id', 'user_id'])

# Convert categorical variables to one-hot encoding (must match training data format)
fraud_data_encoded = pd.get_dummies(fraud_data_encoded, columns=['source', 'browser', 'sex'], drop_first=True)

# Separate features and target
X = fraud_data_encoded.drop(columns=['class'])
y = fraud_data_encoded['class']

# Ensure SHAP can handle the data type
X_sample = X.sample(100)  # SHAP handles smaller samples for faster computation

# 1. SHAP Model Explainability
## Initialize the SHAP explainer with the Random Forest model
explainer = shap.TreeExplainer(model)

## Generate SHAP values with additivity check disabled
shap_values = explainer.shap_values(X_sample, check_additivity=False)

# Inspect the structure of shap_values to ensure compatibility
print("SHAP values structure:", type(shap_values), len(shap_values), shap_values[1].shape)

### SHAP Summary Plot
# Use shap_values[1] if you are focusing on the fraud class (1)
shap.summary_plot(shap_values[1], X_sample, plot_type="bar", show=True)

### SHAP Force Plot
# Choose an instance to explain
index = 10
shap.initjs()  # Load JS for visualizations
shap.force_plot(explainer.expected_value[1], shap_values[1][index], X_sample.iloc[index], matplotlib=True)

### SHAP Dependence Plot
# Select a feature to see its dependence with the prediction
shap.dependence_plot("purchase_value", shap_values[1], X_sample)

# 2. LIME Model Explainability
## Initialize LIME explainer
lime_explainer = lime.lime_tabular.LimeTabularExplainer(X.values, 
                                                        feature_names=X.columns,
                                                        class_names=['Not Fraud', 'Fraud'],
                                                        mode='classification')

## Choose an instance to explain
instance_index = 10
instance = X.iloc[instance_index].values.reshape(1, -1)
lime_exp = lime_explainer.explain_instance(X.iloc[instance_index].values, model.predict_proba)

### LIME Explanation Plot
lime_exp.show_in_notebook(show_table=True)
