In [None]:
import shap
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from tensorflow.keras.models import load_model

In [None]:
# Step 1: Load processed data
from google.colab import drive
drive.mount('/content/drive')

data_path = '/content/drive/MyDrive/projects/ae-vae-anomaly-detection/data/processed/cleaned.csv'

cleaned_df = pd.read_csv(data_path)
print(f'Loaded cleaned data: {cleaned_df.shape[0]} rows, {cleaned_df.shape[1]} columns')

# get a small sample for training model
df = cleaned_df.sample(frac=0.1, random_state=42)
print(f'Sampled data: {df.shape[0]} rows, {df.shape[1]} columns')

In [None]:
# Step 2: Split data
X = df.drop(columns=['label'])
y = df['label']

X_train_full, X_test, y_train_full, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

In [None]:
X_train = X_train_full[y_train_full == 0]

In [None]:
# Step 3: Load trained AE model
from tensorflow.keras.models import load_model
from tensorflow.keras.losses import MeanSquaredError

best_model = load_model('/content/best_ae.h5', custom_objects={'mse': MeanSquaredError()})

In [None]:
# Step 4: Prepare SHAP explanation
background = X_train.sample(100, random_state=42)
X_explain = X_test[y_test == 1].sample(20, random_state=42)

def reconstruction_error(X):
    preds = best_model.predict(X)
    return np.mean(np.square(X - preds), axis=1)

explainer = shap.KernelExplainer(reconstruction_error, background)
shap_values = explainer.shap_values(X_explain)

In [None]:
# Step 5: SHAP Summary Plot
shap.summary_plot(shap_values, X_explain, plot_type="bar")

In [None]:
# Step 6: SHAP Value Plot for Single Anomaly
idx = 0
shap.initjs()

shap.force_plot(explainer.expected_value, shap_values[idx], X_explain.iloc[idx], matplotlib=True)
