In [1]:
# Testing SHAP and LIME Explanations

# Import required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import shap
from lime import lime_tabular
import joblib
from tensorflow.keras.models import load_model
import os
from data_loader import KerasBinaryClassifier, load_transaction_data, preprocess_data, split_data, train_random_forest
from model_explainer import explain_prediction, explain_model, explain_with_lime, plot_lime_explanation
from IPython.display import HTML

# Load models
models_dir = 'models'

# Load Random Forest
rf_model = joblib.load(os.path.join(models_dir, 'rf_model.joblib'))

# Load Neural Network
keras_model = load_model(os.path.join(models_dir, 'nn_model.h5'))
nn_model = KerasBinaryClassifier(model=keras_model, n_features=rf_model.n_features_in_)

# Load Ensemble
ensemble_model = joblib.load(os.path.join(models_dir, 'ensemble_model.joblib'))

print("Models loaded successfully!")





Models loaded successfully!


In [2]:
# Create a sample transaction
feature_names = ['step', 'customer', 'age', 'gender', 'merchant', 'category', 'amount']
sample_transaction = np.array([1, 1, 35, 1, 1, 1, 100.0])  # Example values

# Normalize the transaction
sample_transaction = (sample_transaction - sample_transaction.min()) / (sample_transaction.max() - sample_transaction.min())
sample_transaction = sample_transaction.reshape(1, -1)

print("Sample transaction created:")
for name, value in zip(feature_names, sample_transaction[0]):
    print(f"{name}: {value:.4f}")

# Generate force plot for each model
models = {
    'Random Forest': rf_model,
    'Neural Network': nn_model,
    'Ensemble': ensemble_model
}



Sample transaction created:
step: 0.0000
customer: 0.0000
age: 0.3434
gender: 0.0000
merchant: 0.0000
category: 0.0000
amount: 1.0000


In [3]:
# Load and preprocess data
df = load_transaction_data()
df_processed, column_mapping = preprocess_data(df)

# Split the data
X_train, X_test, y_train, y_test = split_data(df_processed)

# Train a Random Forest model
rf_model = train_random_forest(X_train.values, y_train.values)

# Explain a single transaction
transaction = X_test.values[0]  # First test transaction
feature_names = [col for col in X_test.columns if col != 'fraud']

In [4]:
print(f"\nGenerating force plot for {name}...")
force_plot = explain_prediction(
    rf_model,
    transaction,
    feature_names,
    plot_type='force'
)

shap_html = f"<head>{shap.getjs()}</head><body style='background-color: #f0f0f0;'>{force_plot.html()}</body>"
HTML(shap_html)



Generating force plot for amount...


  saveable.load_own_variables(weights_store.get(inner_path))
2025-04-24 17:45:18.863 
  command:

    streamlit run C:\Users\luqma\AppData\Roaming\Python\Python312\site-packages\ipykernel_launcher.py [ARGUMENTS]


Exception: An error occurred while generating SHAP explanation: too many indices for array: array is 1-dimensional, but 2 were indexed

In [12]:
print(f"\nGenerating force plot for {name}...")
waterfall_plot = explain_prediction(
    rf_model,
    transaction,
    feature_names,
    plot_type='waterfall'
)

print("Waterfall: ", waterfall_plot)

plt.show(waterfall_plot)

# shap_html = f"<head>{shap.getjs()}</head><body style='background-color: #f0f0f0;'>{force_plot.html()}</body>"
# HTML(shap_html)





Generating force plot for amount...
model_id:  1425970237312
model_id in _explainer_cache:  1425970237312
shap_values:  [[[ 0.02316019 -0.02316019]
  [ 0.03884477 -0.03884477]
  [ 0.00696927 -0.00696927]
  [ 0.00977026 -0.00977026]
  [ 0.          0.        ]
  [ 0.14340712 -0.14340712]
  [ 0.          0.        ]
  [ 0.10086992 -0.10086992]
  [ 0.17680426 -0.17680426]]]
Waterfall plot generated
Waterfall:  Figure(1000x600)


<Figure size 1000x600 with 0 Axes>

In [13]:
# Generate bar plot for each model
for name, model in models.items():
    print(f"\nGenerating bar plot for {name}...")
    bar_plot = explain_prediction(
        model,
        sample_transaction,
        feature_names,
        plot_type='bar'
    )
    plt.show()




Generating bar plot for Random Forest...
model_id:  1426181249776
cache_path:  models\explainers_cache.pkl




disk_cache:  {'random_forest': <shap.explainers._tree.TreeExplainer object at 0x0000014C059F1970>, 'neural_network': <shap.explainers._kernel.KernelExplainer object at 0x0000014C25791730>, 'ensemble': <shap.explainers._kernel.KernelExplainer object at 0x0000014C0F1B7FB0>}


Exception: An error occurred while generating SHAP explanation: Additivity check failed in TreeExplainer! Please ensure the data matrix you passed to the explainer is the same shape that the model was trained on. If your data shape is correct then please report this on GitHub. Consider retrying with the feature_perturbation='interventional' option. This check failed because for one of the samples the sum of the SHAP values was 0.744396, while the model output was 1.000000. If this difference is acceptable you can set check_additivity=False to disable this check.

In [None]:
# Generate summary plot for each model
background_data = np.random.rand(100, len(feature_names))

for name, model in models.items():
    print(f"\nGenerating summary plot for {name}...")
    summary_plot = explain_model(
        model,
        background_data,
        feature_names
    )
    plt.show()

# Generate LIME explanation for each model
for name, model in models.items():
    print(f"\nGenerating LIME explanation for {name}...")
    lime_explanation = explain_with_lime(
        model,
        sample_transaction,
        feature_names,
        num_features=10,
        num_samples=5000
    )
    
    # Plot the explanation
    plot_lime_explanation(lime_explanation)
    
    # Print the explanation in text format
    print("\nLIME Explanation:")
    print(lime_explanation.as_list())
    
    # Print prediction probabilities
    print("\nPrediction Probabilities:")
    print(f"Not Fraud: {lime_explanation.predict_proba[0]:.4f}")
    print(f"Fraud: {lime_explanation.predict_proba[1]:.4f}")