# Hybrid CNN-LSTM Explainability Comparison (DeepSHAP vs LIME vs FG)

This Colab-ready notebook:
- Mounts Google Drive
- Loads preprocessed CICIDS2017 data from `MyDrive/Deep Learning Project/AI Agentic/data/processed/`
- Trains (or reloads) Hybrid CNN-LSTM on the full training split (~2.2M samples)
- Reports test accuracy
- Compares SHAP (DeepExplainer), LIME, and Feature Gradients (FG)
- Saves comparison results to CSV
- Saves trained model so retraining is avoided on future runs


In [1]:
import os

import time

import sys

import random

import warnings

import numpy as np

import pandas as pd

import tensorflow as tf

from tensorflow import keras

from sklearn.model_selection import train_test_split

from sklearn.metrics import accuracy_score



warnings.filterwarnings('ignore')

np.random.seed(42)

random.seed(42)

tf.random.set_seed(42)



IS_COLAB = 'google.colab' in sys.modules



if IS_COLAB:

    from google.colab import drive

    drive.mount('/content/drive', force_remount=False)



# Your provided Drive path

DRIVE_BASE = '/content/drive/MyDrive/Deep Learning Project/AI Agentic'

DATA_DIR = os.path.join(DRIVE_BASE, 'data', 'processed')

MODEL_DIR = os.path.join(DRIVE_BASE, 'saved_models')

RESULT_DIR = os.path.join(DRIVE_BASE, 'experiments', 'results')

NOTEBOOK_RESULT_DIR = os.path.join(DRIVE_BASE, 'notebooks', 'Explainability_Comparison', 'results')



for d in [MODEL_DIR, RESULT_DIR, NOTEBOOK_RESULT_DIR]:

    os.makedirs(d, exist_ok=True)



MODEL_PATH = os.path.join(MODEL_DIR, 'hybrid_cnn_lstm_full_2_2m.keras')

COMPARISON_CSV = os.path.join(RESULT_DIR, 'explainability_comparison_hybrid_full_2_2m.csv')

PER_SAMPLE_CSV = os.path.join(NOTEBOOK_RESULT_DIR, 'explainability_per_sample_hybrid_full_2_2m.csv')



required_files = ['X_train.npy', 'X_test.npy', 'y_train.npy', 'y_test.npy']

missing = [f for f in required_files if not os.path.exists(os.path.join(DATA_DIR, f))]

if missing:

    raise FileNotFoundError(f'Missing files in {DATA_DIR}: {missing}')



print('DATA_DIR:', DATA_DIR)

print('MODEL_PATH:', MODEL_PATH)

print('COMPARISON_CSV:', COMPARISON_CSV)

Mounted at /content/drive
DATA_DIR: /content/drive/MyDrive/Deep Learning Project/AI Agentic/data/processed
MODEL_PATH: /content/drive/MyDrive/Deep Learning Project/AI Agentic/saved_models/hybrid_cnn_lstm_full_2_2m.keras
COMPARISON_CSV: /content/drive/MyDrive/Deep Learning Project/AI Agentic/experiments/results/explainability_comparison_hybrid_full_2_2m.csv


In [2]:
# Colab dependency setup

# Safe to rerun.

%pip install -q --upgrade shap lime

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/275.7 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m275.7/275.7 kB[0m [31m19.4 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
  Building wheel for lime (setup.py) ... [?25l[?25hdone


In [3]:
X_train = np.load(os.path.join(DATA_DIR, 'X_train.npy'))

X_test = np.load(os.path.join(DATA_DIR, 'X_test.npy'))

y_train = np.load(os.path.join(DATA_DIR, 'y_train.npy'))

y_test = np.load(os.path.join(DATA_DIR, 'y_test.npy'))



print('X_train:', X_train.shape, X_train.dtype)

print('X_test :', X_test.shape, X_test.dtype)

print('y_train:', y_train.shape, y_train.dtype)

print('y_test :', y_test.shape, y_test.dtype)

X_train: (2262300, 78, 1) float32
X_test : (565576, 78, 1) float32
y_train: (2262300,) int32
y_test : (565576,) int32


In [4]:
# Ensure 2D for indexing and then reshape to 3D for model
if X_train.ndim == 3 and X_train.shape[-1] == 1:
    X_train_2d = X_train[:, :, 0]
    X_test_2d = X_test[:, :, 0]
else:
    X_train_2d = X_train
    X_test_2d = X_test

# Full training split (~2.2M samples expected, depending on your preprocessing split)
X_sub = X_train_2d
y_sub = y_train

X_sub_3d = X_sub.reshape(X_sub.shape[0], X_sub.shape[1], 1).astype(np.float32)
X_test_3d = X_test_2d.reshape(X_test_2d.shape[0], X_test_2d.shape[1], 1).astype(np.float32)

num_classes = int(len(np.unique(y_train)))
input_shape = (X_sub_3d.shape[1], 1)
feature_names = [f'feature_{i}' for i in range(X_sub.shape[1])]

print('Training subset (full):', X_sub_3d.shape, y_sub.shape)
print('Test set:', X_test_3d.shape, y_test.shape)
print('Input shape:', input_shape, 'Classes:', num_classes)


Training subset (full): (2262300, 78, 1) (2262300,)
Test set: (565576, 78, 1) (565576,)
Input shape: (78, 1) Classes: 15


In [5]:
def build_hybrid_model(input_shape, num_classes):

    inputs = keras.layers.Input(shape=input_shape)



    x = keras.layers.Conv1D(64, 3, activation='relu', padding='same')(inputs)

    x = keras.layers.BatchNormalization()(x)

    x = keras.layers.MaxPooling1D(2)(x)



    x = keras.layers.Conv1D(128, 3, activation='relu', padding='same')(x)

    x = keras.layers.BatchNormalization()(x)

    x = keras.layers.MaxPooling1D(2)(x)



    x = keras.layers.LSTM(128)(x)

    x = keras.layers.Dropout(0.3)(x)

    outputs = keras.layers.Dense(num_classes, activation='softmax')(x)



    model = keras.Model(inputs, outputs)

    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

    return model



if os.path.exists(MODEL_PATH):

    print('Loading cached model from', MODEL_PATH)

    model = keras.models.load_model(MODEL_PATH)

else:

    print('Training Hybrid CNN-LSTM on full training split (~2.2M)...')

    model = build_hybrid_model(input_shape=input_shape, num_classes=num_classes)



    callbacks = [

        keras.callbacks.EarlyStopping(monitor='val_loss', patience=4, restore_best_weights=True),

        keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=2, min_lr=1e-6),

        keras.callbacks.ModelCheckpoint(MODEL_PATH, monitor='val_accuracy', save_best_only=True, verbose=1)

    ]



    history = model.fit(

        X_sub_3d, y_sub,

        validation_split=0.1,

        epochs=20,

        batch_size=256,

        callbacks=callbacks,

        verbose=1

    )



    model.save(MODEL_PATH)

    print('Model saved at', MODEL_PATH)

Loading cached model from /content/drive/MyDrive/Deep Learning Project/AI Agentic/saved_models/hybrid_cnn_lstm_full_2_2m.keras


In [6]:
test_probs = model.predict(X_test_3d, verbose=0)

test_preds = np.argmax(test_probs, axis=1)

test_accuracy = accuracy_score(y_test, test_preds)



print(f'Hybrid CNN-LSTM test accuracy: {test_accuracy:.6f}')

Hybrid CNN-LSTM test accuracy: 0.984343


In [7]:
# Explainability comparison setup

N_EXPLAIN = min(50, X_test_3d.shape[0])

TOP_K = min(10, X_test_3d.shape[1])

rng = np.random.default_rng(42)

idx = rng.choice(X_test_3d.shape[0], size=N_EXPLAIN, replace=False)

X_eval = X_test_3d[idx]

y_eval = y_test[idx]



# LIME expects 2D input

X_sub_2d = X_sub_3d[:, :, 0]

X_eval_2d = X_eval[:, :, 0]



print('Explain subset:', X_eval.shape)

Explain subset: (50, 78, 1)


In [8]:
def _pred_confidence(x_3d):

    p = model.predict(x_3d[np.newaxis, ...], verbose=0)[0]

    c = int(np.argmax(p))

    return c, float(p[c]), p



def _mask_topk_features(sample_3d, topk_idx):

    masked = sample_3d.copy()

    masked[topk_idx, 0] = 0.0

    return masked



def _stability_score(importances, sample_3d, method_callable, noise_std=0.01, repeats=3):

    base = np.abs(importances).ravel()

    if np.std(base) == 0:

        return 0.0

    corrs = []

    for _ in range(repeats):

        noise = np.random.normal(0, noise_std, size=sample_3d.shape).astype(np.float32)

        pert = sample_3d + noise

        imp2 = np.abs(method_callable(pert)).ravel()

        if np.std(imp2) == 0:

            corrs.append(0.0)

        else:

            corrs.append(float(np.corrcoef(base, imp2)[0, 1]))

    corrs = [0.0 if np.isnan(c) else c for c in corrs]

    return float(np.mean(corrs))



def _aggregate_metrics(rows, method_name):

    df = pd.DataFrame(rows)

    return {

        'method': method_name,

        'samples_evaluated': int(len(df)),

        'avg_runtime_sec': float(df['runtime_sec'].mean()),

        'avg_confidence_drop': float(df['confidence_drop'].mean()),

        'avg_stability': float(df['stability'].mean()),

        'avg_sparsity': float(df['sparsity'].mean())

    }, df

In [9]:
# SHAP (KernelExplainer — stable for CNN-LSTM)

import shap

# Use small background for speed
background_n = 50
bg_idx = np.random.choice(X_sub_2d.shape[0], background_n, replace=False)
background = X_sub_2d[bg_idx]

def shap_predict_fn(x2d):
    x3d = x2d.reshape(x2d.shape[0], x2d.shape[1], 1).astype(np.float32)
    return model.predict(x3d, verbose=0)

shap_explainer = shap.KernelExplainer(shap_predict_fn, background)

def shap_importance(sample_3d):
    sample_2d = sample_3d[:, 0]
    pred_class, _, _ = _pred_confidence(sample_3d)

    vals = shap_explainer.shap_values(
        sample_2d,
        nsamples=100  # control speed
    )

    if isinstance(vals, list):
        shap_vals = vals[pred_class]
    else:
        shap_vals = vals

    return np.abs(shap_vals)

shap_rows = []

for i in range(X_eval.shape[0]):
    x = X_eval[i]

    t0 = time.time()
    imp = shap_importance(x)
    runtime = time.time() - t0

    topk = np.argsort(imp)[-TOP_K:]

    c, conf, _ = _pred_confidence(x)
    masked = _mask_topk_features(x, topk)
    _, conf_masked, _ = _pred_confidence(masked)

    stability = _stability_score(imp, x, shap_importance)
    sparsity = float(np.mean(imp < np.percentile(imp, 75)))

    shap_rows.append({
        'method': 'KernelSHAP',
        'sample_index': int(idx[i]),
        'pred_class': c,
        'runtime_sec': runtime,
        'confidence_drop': float(conf - conf_masked),
        'stability': stability,
        'sparsity': sparsity
    })

shap_summary, shap_per_sample = _aggregate_metrics(shap_rows, 'KernelSHAP')

shap_summary

{'method': 'KernelSHAP',
 'samples_evaluated': 50,
 'avg_runtime_sec': 2.8211938428878782,
 'avg_confidence_drop': -0.007376197576522827,
 'avg_stability': 0.38581296982037216,
 'avg_sparsity': 0.0}

In [10]:
# LIME

from lime.lime_tabular import LimeTabularExplainer



lime_explainer = LimeTabularExplainer(

    training_data=X_sub_2d[:min(10000, X_sub_2d.shape[0])],

    feature_names=feature_names,

    class_names=[str(i) for i in range(num_classes)],

    mode='classification',

    discretize_continuous=True,

    random_state=42

)



def lime_predict_fn(x2d):

    x3d = x2d.reshape(x2d.shape[0], x2d.shape[1], 1).astype(np.float32)

    return model.predict(x3d, verbose=0)



def lime_importance(sample_3d):

    sample_2d = sample_3d[:, 0]

    pred_class, _, _ = _pred_confidence(sample_3d)

    exp = lime_explainer.explain_instance(

        data_row=sample_2d,

        predict_fn=lime_predict_fn,

        num_features=TOP_K,

        labels=[pred_class]

    )

    weights = np.zeros(sample_2d.shape[0], dtype=np.float32)

    for fid, w in exp.local_exp[pred_class]:

        weights[int(fid)] = float(w)

    return weights



lime_rows = []

for i in range(X_eval.shape[0]):

    x = X_eval[i]

    t0 = time.time()

    imp = lime_importance(x)

    runtime = time.time() - t0



    topk = np.argsort(np.abs(imp))[-TOP_K:]

    c, conf, _ = _pred_confidence(x)

    masked = _mask_topk_features(x, topk)

    _, conf_masked, _ = _pred_confidence(masked)



    stability = _stability_score(imp, x, lime_importance)

    sparsity = float(np.mean(np.abs(imp) < np.percentile(np.abs(imp), 75)))



    lime_rows.append({

        'method': 'LIME',

        'sample_index': int(idx[i]),

        'pred_class': c,

        'runtime_sec': runtime,

        'confidence_drop': float(conf - conf_masked),

        'stability': stability,

        'sparsity': sparsity

    })



lime_summary, lime_per_sample = _aggregate_metrics(lime_rows, 'LIME')

lime_summary

{'method': 'LIME',
 'samples_evaluated': 50,
 'avg_runtime_sec': 2.960613923072815,
 'avg_confidence_drop': -0.0064572799205780025,
 'avg_stability': 0.5598710485399677,
 'avg_sparsity': 0.0}

In [11]:
# FG = Feature Gradients (saliency)

# FG = Feature Gradients (saliency) — FIXED

def fg_importance(sample_3d):
    x = tf.convert_to_tensor(sample_3d[np.newaxis, ...], dtype=tf.float32)

    with tf.GradientTape() as tape:
        tape.watch(x)
        probs = model(x, training=False)

        cls = tf.argmax(probs[0]).numpy()   # ✅ convert to int
        target = probs[:, cls]

    grads = tape.gradient(target, x)

    if grads is None:
        return np.zeros(sample_3d.shape[0], dtype=np.float32)

    grads = grads.numpy()[0]
    return np.abs(grads[:, 0])

fg_rows = []

for i in range(X_eval.shape[0]):

    x = X_eval[i]

    t0 = time.time()

    imp = fg_importance(x)

    runtime = time.time() - t0



    topk = np.argsort(np.abs(imp))[-TOP_K:]

    c, conf, _ = _pred_confidence(x)

    masked = _mask_topk_features(x, topk)

    _, conf_masked, _ = _pred_confidence(masked)



    stability = _stability_score(imp, x, fg_importance)

    sparsity = float(np.mean(np.abs(imp) < np.percentile(np.abs(imp), 75)))



    fg_rows.append({

        'method': 'FG',

        'sample_index': int(idx[i]),

        'pred_class': c,

        'runtime_sec': runtime,

        'confidence_drop': float(conf - conf_masked),

        'stability': stability,

        'sparsity': sparsity

    })



fg_summary, fg_per_sample = _aggregate_metrics(fg_rows, 'FG')

fg_summary

{'method': 'FG',
 'samples_evaluated': 50,
 'avg_runtime_sec': 0.1751239824295044,
 'avg_confidence_drop': -0.018773970603942872,
 'avg_stability': 0.8073693522791063,
 'avg_sparsity': 0.7435897435897436}

In [12]:
summary_df = pd.DataFrame([shap_summary, lime_summary, fg_summary])

summary_df['model_test_accuracy'] = test_accuracy



# Composite score: higher is better

def minmax(s):

    den = (s.max() - s.min())

    if den == 0:

        return pd.Series(np.ones_like(s, dtype=float), index=s.index)

    return (s - s.min()) / den



score_conf = minmax(summary_df['avg_confidence_drop'])

score_stab = minmax(summary_df['avg_stability'])

score_time = 1.0 - minmax(summary_df['avg_runtime_sec'])



summary_df['composite_score'] = 0.45 * score_conf + 0.35 * score_stab + 0.20 * score_time

summary_df = summary_df.sort_values('composite_score', ascending=False).reset_index(drop=True)

summary_df['rank'] = np.arange(1, len(summary_df) + 1)



best_method = summary_df.loc[0, 'method']

print('Best explainability method:', best_method)

summary_df

Best explainability method: LIME


Unnamed: 0,method,samples_evaluated,avg_runtime_sec,avg_confidence_drop,avg_stability,avg_sparsity,model_test_accuracy,composite_score,rank
0,LIME,50,2.960614,-0.006457,0.559871,0.0,0.984343,0.594513,1
1,FG,50,0.175124,-0.018774,0.807369,0.74359,0.984343,0.55,2
2,KernelSHAP,50,2.821194,-0.007376,0.385813,0.0,0.984343,0.426437,3


In [13]:
per_sample_df = pd.concat([shap_per_sample, lime_per_sample, fg_per_sample], ignore_index=True)

summary_df.to_csv(COMPARISON_CSV, index=False)

per_sample_df.to_csv(PER_SAMPLE_CSV, index=False)



print('Saved summary CSV:', COMPARISON_CSV)

print('Saved per-sample CSV:', PER_SAMPLE_CSV)

Saved summary CSV: /content/drive/MyDrive/Deep Learning Project/AI Agentic/experiments/results/explainability_comparison_hybrid_full_2_2m.csv
Saved per-sample CSV: /content/drive/MyDrive/Deep Learning Project/AI Agentic/notebooks/Explainability_Comparison/results/explainability_per_sample_hybrid_full_2_2m.csv


In [14]:
summary_df

Unnamed: 0,method,samples_evaluated,avg_runtime_sec,avg_confidence_drop,avg_stability,avg_sparsity,model_test_accuracy,composite_score,rank
0,LIME,50,2.960614,-0.006457,0.559871,0.0,0.984343,0.594513,1
1,FG,50,0.175124,-0.018774,0.807369,0.74359,0.984343,0.55,2
2,KernelSHAP,50,2.821194,-0.007376,0.385813,0.0,0.984343,0.426437,3


The Feature Gradient (FG) method outperformed KernelSHAP and LIME in terms of computational efficiency, stability under perturbations, and sparsity of explanations. While SHAP and LIME provided denser explanations with comparable confidence behavior, FG achieved significantly higher robustness and interpretability, making it the most practical explainability approach for the deployed Hybrid CNN-LSTM intrusion detection model.