# Hybrid CNN-LSTM Explainability Comparison (SHAP vs LIME vs FG)\n
\n
This Colab-ready notebook:\n
- Mounts Google Drive\n
- Loads preprocessed CICIDS2017 data from `MyDrive/Deep Learning Project/AI Agentic/data/processed/`\n
- Trains (or reloads) the best model (Hybrid CNN-LSTM) on 500,000 samples\n
- Reports test accuracy\n
- Compares SHAP, LIME, and Feature Gradients (FG)\n
- Saves comparison results to CSV\n
- Saves trained model so retraining is avoided on future runs

In [2]:
import os
import time
import sys
import random
import warnings
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

warnings.filterwarnings('ignore')
np.random.seed(42)
random.seed(42)
tf.random.set_seed(42)

IS_COLAB = 'google.colab' in sys.modules

if IS_COLAB:
    from google.colab import drive
    drive.mount('/content/drive', force_remount=False)

# Your provided Drive path
DRIVE_BASE = '/content/drive/MyDrive/Deep Learning Project/AI Agentic'
DATA_DIR = os.path.join(DRIVE_BASE, 'data', 'processed')
MODEL_DIR = os.path.join(DRIVE_BASE, 'saved_models')
RESULT_DIR = os.path.join(DRIVE_BASE, 'experiments', 'results')
NOTEBOOK_RESULT_DIR = os.path.join(DRIVE_BASE, 'notebooks', 'Explainability_Comparison', 'results')

for d in [MODEL_DIR, RESULT_DIR, NOTEBOOK_RESULT_DIR]:
    os.makedirs(d, exist_ok=True)

MODEL_PATH = os.path.join(MODEL_DIR, 'hybrid_cnn_lstm_500k.keras')
COMPARISON_CSV = os.path.join(RESULT_DIR, 'explainability_comparison_hybrid_500k.csv')
PER_SAMPLE_CSV = os.path.join(NOTEBOOK_RESULT_DIR, 'explainability_per_sample_hybrid_500k.csv')

required_files = ['X_train.npy', 'X_test.npy', 'y_train.npy', 'y_test.npy']
missing = [f for f in required_files if not os.path.exists(os.path.join(DATA_DIR, f))]
if missing:
    raise FileNotFoundError(f'Missing files in {DATA_DIR}: {missing}')

print('DATA_DIR:', DATA_DIR)
print('MODEL_PATH:', MODEL_PATH)
print('COMPARISON_CSV:', COMPARISON_CSV)


Mounted at /content/drive
DATA_DIR: /content/drive/MyDrive/Deep Learning Project/AI Agentic/data/processed
MODEL_PATH: /content/drive/MyDrive/Deep Learning Project/AI Agentic/saved_models/hybrid_cnn_lstm_500k.keras
COMPARISON_CSV: /content/drive/MyDrive/Deep Learning Project/AI Agentic/experiments/results/explainability_comparison_hybrid_500k.csv


In [3]:
# Colab dependency setup
# Safe to rerun.
%pip install -q --upgrade shap lime


[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/275.7 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m275.7/275.7 kB[0m [31m11.1 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
  Building wheel for lime (setup.py) ... [?25l[?25hdone


In [4]:
X_train = np.load(os.path.join(DATA_DIR, 'X_train.npy'))
X_test = np.load(os.path.join(DATA_DIR, 'X_test.npy'))
y_train = np.load(os.path.join(DATA_DIR, 'y_train.npy'))
y_test = np.load(os.path.join(DATA_DIR, 'y_test.npy'))

print('X_train:', X_train.shape, X_train.dtype)
print('X_test :', X_test.shape, X_test.dtype)
print('y_train:', y_train.shape, y_train.dtype)
print('y_test :', y_test.shape, y_test.dtype)


X_train: (2262300, 78, 1) float32
X_test : (565576, 78, 1) float32
y_train: (2262300,) int32
y_test : (565576,) int32


In [5]:
# Ensure 2D for indexing and then reshape to 3D for model
if X_train.ndim == 3 and X_train.shape[-1] == 1:
    X_train_2d = X_train[:, :, 0]
    X_test_2d = X_test[:, :, 0]
else:
    X_train_2d = X_train
    X_test_2d = X_test

N_TRAIN = min(500_000, X_train_2d.shape[0])
X_sub, _, y_sub, _ = train_test_split(
    X_train_2d, y_train,
    train_size=N_TRAIN,
    random_state=42,
    stratify=y_train
)

X_sub_3d = X_sub.reshape(X_sub.shape[0], X_sub.shape[1], 1).astype(np.float32)
X_test_3d = X_test_2d.reshape(X_test_2d.shape[0], X_test_2d.shape[1], 1).astype(np.float32)

num_classes = int(len(np.unique(y_train)))
input_shape = (X_sub_3d.shape[1], 1)
feature_names = [f'feature_{i}' for i in range(X_sub.shape[1])]

print('Training subset:', X_sub_3d.shape, y_sub.shape)
print('Test set:', X_test_3d.shape, y_test.shape)
print('Input shape:', input_shape, 'Classes:', num_classes)


Training subset: (500000, 78, 1) (500000,)
Test set: (565576, 78, 1) (565576,)
Input shape: (78, 1) Classes: 15


In [6]:
def build_hybrid_model(input_shape, num_classes):
    inputs = keras.layers.Input(shape=input_shape)

    x = keras.layers.Conv1D(64, 3, activation='relu', padding='same')(inputs)
    x = keras.layers.BatchNormalization()(x)
    x = keras.layers.MaxPooling1D(2)(x)

    x = keras.layers.Conv1D(128, 3, activation='relu', padding='same')(x)
    x = keras.layers.BatchNormalization()(x)
    x = keras.layers.MaxPooling1D(2)(x)

    x = keras.layers.LSTM(128)(x)
    x = keras.layers.Dropout(0.3)(x)
    outputs = keras.layers.Dense(num_classes, activation='softmax')(x)

    model = keras.Model(inputs, outputs)
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

if os.path.exists(MODEL_PATH):
    print('Loading cached model from', MODEL_PATH)
    model = keras.models.load_model(MODEL_PATH)
else:
    print('Training Hybrid CNN-LSTM on 500k samples...')
    model = build_hybrid_model(input_shape=input_shape, num_classes=num_classes)

    callbacks = [
        keras.callbacks.EarlyStopping(monitor='val_loss', patience=4, restore_best_weights=True),
        keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=2, min_lr=1e-6),
        keras.callbacks.ModelCheckpoint(MODEL_PATH, monitor='val_accuracy', save_best_only=True, verbose=1)
    ]

    history = model.fit(
        X_sub_3d, y_sub,
        validation_split=0.1,
        epochs=20,
        batch_size=256,
        callbacks=callbacks,
        verbose=1
    )

    model.save(MODEL_PATH)
    print('Model saved at', MODEL_PATH)


Training Hybrid CNN-LSTM on 500k samples...
Epoch 1/20
[1m1758/1758[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step - accuracy: 0.9443 - loss: 0.1907
Epoch 1: val_accuracy improved from -inf to 0.96484, saving model to /content/drive/MyDrive/Deep Learning Project/AI Agentic/saved_models/hybrid_cnn_lstm_500k.keras
[1m1758/1758[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m49s[0m 16ms/step - accuracy: 0.9443 - loss: 0.1906 - val_accuracy: 0.9648 - val_loss: 0.0971 - learning_rate: 0.0010
Epoch 2/20
[1m1758/1758[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - accuracy: 0.9772 - loss: 0.0558
Epoch 2: val_accuracy improved from 0.96484 to 0.97418, saving model to /content/drive/MyDrive/Deep Learning Project/AI Agentic/saved_models/hybrid_cnn_lstm_500k.keras
[1m1758/1758[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 12ms/step - accuracy: 0.9772 - loss: 0.0558 - val_accuracy: 0.9742 - val_loss: 0.0948 - learning_rate: 0.0010
Epoch 3/20
[1m1755

In [7]:
test_probs = model.predict(X_test_3d, verbose=0)
test_preds = np.argmax(test_probs, axis=1)
test_accuracy = accuracy_score(y_test, test_preds)

print(f'Hybrid CNN-LSTM test accuracy: {test_accuracy:.6f}')


Hybrid CNN-LSTM test accuracy: 0.983716


In [8]:
# Explainability comparison setup
N_EXPLAIN = min(200, X_test_3d.shape[0])
TOP_K = min(10, X_test_3d.shape[1])
rng = np.random.default_rng(42)
idx = rng.choice(X_test_3d.shape[0], size=N_EXPLAIN, replace=False)
X_eval = X_test_3d[idx]
y_eval = y_test[idx]

# LIME expects 2D input
X_sub_2d = X_sub_3d[:, :, 0]
X_eval_2d = X_eval[:, :, 0]

print('Explain subset:', X_eval.shape)


Explain subset: (200, 78, 1)


In [9]:
def _pred_confidence(x_3d):
    p = model.predict(x_3d[np.newaxis, ...], verbose=0)[0]
    c = int(np.argmax(p))
    return c, float(p[c]), p

def _mask_topk_features(sample_3d, topk_idx):
    masked = sample_3d.copy()
    masked[topk_idx, 0] = 0.0
    return masked

def _stability_score(importances, sample_3d, method_callable, noise_std=0.01, repeats=3):
    base = np.abs(importances).ravel()
    if np.std(base) == 0:
        return 0.0
    corrs = []
    for _ in range(repeats):
        noise = np.random.normal(0, noise_std, size=sample_3d.shape).astype(np.float32)
        pert = sample_3d + noise
        imp2 = np.abs(method_callable(pert)).ravel()
        if np.std(imp2) == 0:
            corrs.append(0.0)
        else:
            corrs.append(float(np.corrcoef(base, imp2)[0, 1]))
    corrs = [0.0 if np.isnan(c) else c for c in corrs]
    return float(np.mean(corrs))

def _aggregate_metrics(rows, method_name):
    df = pd.DataFrame(rows)
    return {
        'method': method_name,
        'samples_evaluated': int(len(df)),
        'avg_runtime_sec': float(df['runtime_sec'].mean()),
        'avg_confidence_drop': float(df['confidence_drop'].mean()),
        'avg_stability': float(df['stability'].mean()),
        'avg_sparsity': float(df['sparsity'].mean())
    }, df


In [None]:
# =========================
# SHAP (Original - GradientExplainer)
# =========================

import shap

print("Initializing SHAP GradientExplainer...")

# Use small background subset
background_n = min(200, X_sub_3d.shape[0])
bg_idx = np.random.choice(X_sub_3d.shape[0], background_n, replace=False)
background = X_sub_3d[bg_idx]

# GradientExplainer (standard for TF models)
shap_explainer = shap.GradientExplainer(model, background)

def shap_importance(sample_3d):
    sample = sample_3d[np.newaxis, ...]
    shap_vals = shap_explainer.shap_values(sample)

    pred_class, _, _ = _pred_confidence(sample_3d)

    # For multi-class models shap returns list
    if isinstance(shap_vals, list):
        s = shap_vals[pred_class][0]
    else:
        s = shap_vals[0]

    return np.abs(s[:, 0])  # shape (78,)

shap_rows = []

print("Running SHAP explanations...")

for i in range(X_eval.shape[0]):

    x = X_eval[i]

    t0 = time.time()
    imp = shap_importance(x)
    runtime = time.time() - t0

    topk = np.argsort(np.abs(imp))[-TOP_K:]

    c, conf, _ = _pred_confidence(x)

    masked = _mask_topk_features(x, topk)
    _, conf_masked, _ = _pred_confidence(masked)

    stability = _stability_score(imp, x, shap_importance)

    sparsity = float(np.mean(np.abs(imp) < np.percentile(np.abs(imp), 75)))

    shap_rows.append({
        'method': 'SHAP',
        'sample_index': int(idx[i]),
        'pred_class': c,
        'runtime_sec': runtime,
        'confidence_drop': float(conf - conf_masked),
        'stability': stability,
        'sparsity': sparsity
    })

shap_summary, shap_per_sample = _aggregate_metrics(shap_rows, 'SHAP')

shap_summary

In [11]:
# LIME
from lime.lime_tabular import LimeTabularExplainer

lime_explainer = LimeTabularExplainer(
    training_data=X_sub_2d[:min(10000, X_sub_2d.shape[0])],
    feature_names=feature_names,
    class_names=[str(i) for i in range(num_classes)],
    mode='classification',
    discretize_continuous=True,
    random_state=42
)

def lime_predict_fn(x2d):
    x3d = x2d.reshape(x2d.shape[0], x2d.shape[1], 1).astype(np.float32)
    return model.predict(x3d, verbose=0)

def lime_importance(sample_3d):
    sample_2d = sample_3d[:, 0]
    pred_class, _, _ = _pred_confidence(sample_3d)
    exp = lime_explainer.explain_instance(
        data_row=sample_2d,
        predict_fn=lime_predict_fn,
        num_features=sample_2d.shape[0],
        labels=[pred_class]
    )
    weights = np.zeros(sample_2d.shape[0], dtype=np.float32)
    for fid, w in exp.local_exp[pred_class]:
        weights[int(fid)] = float(w)
    return weights

lime_rows = []
for i in range(X_eval.shape[0]):
    x = X_eval[i]
    t0 = time.time()
    imp = lime_importance(x)
    runtime = time.time() - t0

    topk = np.argsort(np.abs(imp))[-TOP_K:]
    c, conf, _ = _pred_confidence(x)
    masked = _mask_topk_features(x, topk)
    _, conf_masked, _ = _pred_confidence(masked)

    stability = _stability_score(imp, x, lime_importance)
    sparsity = float(np.mean(np.abs(imp) < np.percentile(np.abs(imp), 75)))

    lime_rows.append({
        'method': 'LIME',
        'sample_index': int(idx[i]),
        'pred_class': c,
        'runtime_sec': runtime,
        'confidence_drop': float(conf - conf_masked),
        'stability': stability,
        'sparsity': sparsity
    })

lime_summary, lime_per_sample = _aggregate_metrics(lime_rows, 'LIME')
lime_summary


{'method': 'LIME',
 'samples_evaluated': 200,
 'avg_runtime_sec': 0.9337525010108948,
 'avg_confidence_drop': 0.012309952527284623,
 'avg_stability': 0.6225604153195117,
 'avg_sparsity': 0.7435897435897436}

In [12]:
# FG = Feature Gradients (saliency)
def fg_importance(sample_3d):
    x = tf.convert_to_tensor(sample_3d[np.newaxis, ...], dtype=tf.float32)
    with tf.GradientTape() as tape:
        tape.watch(x)
        probs = model(x, training=False)
        cls = tf.argmax(probs[0])
        target = probs[:, cls]
    grads = tape.gradient(target, x).numpy()[0]
    return np.abs(grads[:, 0])

fg_rows = []
for i in range(X_eval.shape[0]):
    x = X_eval[i]
    t0 = time.time()
    imp = fg_importance(x)
    runtime = time.time() - t0

    topk = np.argsort(np.abs(imp))[-TOP_K:]
    c, conf, _ = _pred_confidence(x)
    masked = _mask_topk_features(x, topk)
    _, conf_masked, _ = _pred_confidence(masked)

    stability = _stability_score(imp, x, fg_importance)
    sparsity = float(np.mean(np.abs(imp) < np.percentile(np.abs(imp), 75)))

    fg_rows.append({
        'method': 'FG',
        'sample_index': int(idx[i]),
        'pred_class': c,
        'runtime_sec': runtime,
        'confidence_drop': float(conf - conf_masked),
        'stability': stability,
        'sparsity': sparsity
    })

fg_summary, fg_per_sample = _aggregate_metrics(fg_rows, 'FG')
fg_summary


{'method': 'FG',
 'samples_evaluated': 200,
 'avg_runtime_sec': 0.04540868163108826,
 'avg_confidence_drop': 0.0008529290556907654,
 'avg_stability': 0.902665652259135,
 'avg_sparsity': 0.7435897435897436}

In [13]:
summary_df = pd.DataFrame([shap_summary, lime_summary, fg_summary])
summary_df['model_test_accuracy'] = test_accuracy

# Composite score: higher is better
def minmax(s):
    den = (s.max() - s.min())
    if den == 0:
        return pd.Series(np.ones_like(s, dtype=float), index=s.index)
    return (s - s.min()) / den

score_conf = minmax(summary_df['avg_confidence_drop'])
score_stab = minmax(summary_df['avg_stability'])
score_time = 1.0 - minmax(summary_df['avg_runtime_sec'])

summary_df['composite_score'] = 0.45 * score_conf + 0.35 * score_stab + 0.20 * score_time
summary_df = summary_df.sort_values('composite_score', ascending=False).reset_index(drop=True)
summary_df['rank'] = np.arange(1, len(summary_df) + 1)

best_method = summary_df.loc[0, 'method']
print('Best explainability method:', best_method)
summary_df


Best explainability method: LIME


Unnamed: 0,method,samples_evaluated,avg_runtime_sec,avg_confidence_drop,avg_stability,avg_sparsity,model_test_accuracy,composite_score,rank
0,LIME,200,0.933753,0.01231,0.62256,0.74359,0.983716,0.691392,1
1,FG,200,0.045409,0.000853,0.902666,0.74359,0.983716,0.55,2
2,SHAP,200,0.43775,0.005265,0.0,0.0,0.983716,0.28498,3


In [14]:
per_sample_df = pd.concat([shap_per_sample, lime_per_sample, fg_per_sample], ignore_index=True)
summary_df.to_csv(COMPARISON_CSV, index=False)
per_sample_df.to_csv(PER_SAMPLE_CSV, index=False)

print('Saved summary CSV:', COMPARISON_CSV)
print('Saved per-sample CSV:', PER_SAMPLE_CSV)


Saved summary CSV: /content/drive/MyDrive/Deep Learning Project/AI Agentic/experiments/results/explainability_comparison_hybrid_500k.csv
Saved per-sample CSV: /content/drive/MyDrive/Deep Learning Project/AI Agentic/notebooks/Explainability_Comparison/results/explainability_per_sample_hybrid_500k.csv


In [15]:
summary_df


Unnamed: 0,method,samples_evaluated,avg_runtime_sec,avg_confidence_drop,avg_stability,avg_sparsity,model_test_accuracy,composite_score,rank
0,LIME,200,0.933753,0.01231,0.62256,0.74359,0.983716,0.691392,1
1,FG,200,0.045409,0.000853,0.902666,0.74359,0.983716,0.55,2
2,SHAP,200,0.43775,0.005265,0.0,0.0,0.983716,0.28498,3
