<a href="https://colab.research.google.com/github/NzimbaEnvoy/Fraud_Detection-Masters-Project-/blob/main/Credit_card_Fraud_Deep_learning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')


In [None]:
#Importing Libraries
import os
import pandas as pd
import numpy as np
import time
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.metrics import Precision, Recall
from sklearn.metrics import confusion_matrix, classification_report, roc_auc_score, roc_curve, accuracy_score, precision_score, recall_score, f1_score
from imblearn.over_sampling import SMOTE, ADASYN
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Dropout, LSTM, TimeDistributed, BatchNormalization
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
import matplotlib.pyplot as plt
from imblearn.over_sampling import SMOTE, ADASYN
from sklearn.utils.class_weight import compute_class_weight
import itertools
import shap


In [None]:
# Path to the files
base_path = "/content/drive/MyDrive/Thesis Project"

train_path = os.path.join(base_path, "train_data_Credit Card.csv")
test_path = os.path.join(base_path, "test_data_Credit Card.csv")

# Loading data
train_df = pd.read_csv(train_path)
test_df = pd.read_csv(test_path)

In [None]:
train_df

In [None]:
X_train = train_df.drop("Class", axis=1)
y_train = train_df["Class"]

X_test = test_df.drop("Class", axis=1)
y_test = test_df["Class"]


CNN BASELINE

In [None]:
# Reshaping for CNN input
X_train_cnn = X_train.values.reshape((X_train.shape[0], X_train.shape[1], 1))
X_test_cnn  = X_test.values.reshape((X_test.shape[0], X_test.shape[1], 1))

Hyper Parameter Tuning

In [None]:
import itertools
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, BatchNormalization, MaxPooling1D, AveragePooling1D, Dropout, Flatten, GlobalAveragePooling1D, Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import l2
from sklearn.metrics import accuracy_score

def build_cnn_model(filters, kernel_size, dropout_rate, dense_units, learning_rate,
                    blocks=2, pool_type='max', l2_reg=0.0, use_gap=False, dilation_rate=1):
    """
    Deeper/tunable 1D-CNN with 2 or 3 conv blocks, BN, pooling, dropout, and GAP/Flatten head.
    - blocks: 2 or 3 conv blocks
    - pool_type: 'max' or 'avg'
    - l2_reg: L2 regularization strength for conv kernels
    - use_gap: True -> GlobalAveragePooling1D, False -> Flatten
    - dilation_rate: 1 or 2 for dilated convs
    """
    Pool = MaxPooling1D if pool_type == 'max' else AveragePooling1D
    reg = l2(l2_reg) if l2_reg > 0 else None
    head_dropout = min(max(dropout_rate + 0.1, 0.0), 0.8)

    model = Sequential([
        # ---- Block 1 ----
        Conv1D(filters=filters, kernel_size=kernel_size, activation='relu', padding='same',
               dilation_rate=dilation_rate, kernel_regularizer=reg, input_shape=(X_train_cnn.shape[1], 1)),
        BatchNormalization(),
        Conv1D(filters=filters * 2, kernel_size=kernel_size, activation='relu', padding='same',
               dilation_rate=dilation_rate, kernel_regularizer=reg),
        BatchNormalization(),
        Pool(pool_size=2),
        Dropout(dropout_rate),

        # ---- Block 2 ----
        Conv1D(filters=filters * 4, kernel_size=kernel_size, activation='relu', padding='same',
               dilation_rate=dilation_rate, kernel_regularizer=reg),
        BatchNormalization(),
        Conv1D(filters=filters * 4, kernel_size=kernel_size, activation='relu', padding='same',
               dilation_rate=dilation_rate, kernel_regularizer=reg),
        BatchNormalization(),
        Pool(pool_size=2),
        Dropout(dropout_rate),
    ])

    if blocks == 3:
        # ---- Block 3 ----
        model.add(Conv1D(filters=filters * 8, kernel_size=kernel_size, activation='relu', padding='same',
                         dilation_rate=dilation_rate, kernel_regularizer=reg))
        model.add(BatchNormalization())
        model.add(Conv1D(filters=filters * 8, kernel_size=kernel_size, activation='relu', padding='same',
                         dilation_rate=dilation_rate, kernel_regularizer=reg))
        model.add(BatchNormalization())
        model.add(Pool(pool_size=2))
        model.add(Dropout(dropout_rate))

    # ---- Head ----
    if use_gap:
        model.add(GlobalAveragePooling1D())
    else:
        model.add(Flatten())

    model.add(Dense(dense_units, activation='relu'))
    model.add(Dropout(head_dropout))
    model.add(Dense(1, activation='sigmoid'))

    model.compile(optimizer=Adam(learning_rate=learning_rate),
                  loss='binary_crossentropy',
                  metrics=['accuracy'])
    return model

# Expanded grid
param_grid = {
    'filters': [16, 32, 48, 64],
    'kernel_size': [3, 5],
    'dropout_rate': [0.2, 0.3, 0.4],
    'dense_units': [64, 128, 256],
    'learning_rate': [1e-4, 5e-4, 1e-3],
    'blocks': [2, 3, 4],
    'pool_type': ['max', 'avg'],
    'l2_reg': [0.0, 1e-4, 1e-3],
    'use_gap': [False, True],
    'dilation_rate': [1, 2, 3],

    # Training hyperparams (tuned in the loop)
    'batch_size': [64, 128, 256]
}

results = []

early_stop = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True, verbose=1)

# Loop over all combinations
keys = list(param_grid.keys())
for values in itertools.product(*param_grid.values()):
    params = dict(zip(keys, values))
    print(f"Testing: {params}")

    # Building with architecture params
    model = build_cnn_model(
        filters=params['filters'],
        kernel_size=params['kernel_size'],
        dropout_rate=params['dropout_rate'],
        dense_units=params['dense_units'],
        learning_rate=params['learning_rate'],
        blocks=params['blocks'],
        pool_type=params['pool_type'],
        l2_reg=params['l2_reg'],
        use_gap=params['use_gap'],
        dilation_rate=params['dilation_rate']
    )

    # Training with training params from grid
    history = model.fit(
        X_train_cnn, y_train,
        validation_split=0.2,
        epochs=params['epochs'],
        batch_size=params['batch_size'],
        verbose=0,
        callbacks=[early_stop]
    )

    # Evaluating
    y_pred_probs = model.predict(X_test_cnn, verbose=0).flatten()
    y_pred = (y_pred_probs > 0.5).astype(int)
    acc = accuracy_score(y_test, y_pred)

    results.append((params, acc))
    print(f"Accuracy: {acc:.4f}")

# Picking the best
best_params, best_score = max(results, key=lambda x: x[1])
print("\nBest Parameters for CNN:")
print(best_params)
print(f"Best Accuracy: {best_score:.4f}")


**MODELLING**

CNN BASELINE

In [None]:
# Defining early stopping
early_stop = EarlyStopping(
    monitor='val_loss',
    patience=3,
    restore_best_weights=True,
    verbose=1
)

#CNN architecture
cnn_baseline = Sequential([
    # Block 1
    Conv1D(32, 3, activation='relu', padding='same', input_shape=(X_train_cnn.shape[1], 1)),
    BatchNormalization(),
    Conv1D(64, 3, activation='relu', padding='same'),
    BatchNormalization(),
    MaxPooling1D(pool_size=2),
    Dropout(0.3),

    # Block 2
    Conv1D(128, 3, activation='relu', padding='same'),
    BatchNormalization(),
    Conv1D(128, 3, activation='relu', padding='same'),
    BatchNormalization(),
    MaxPooling1D(pool_size=2),
    Dropout(0.3),

    Flatten(),
    Dense(64, activation='relu'),
    Dropout(0.4),
    Dense(1, activation='sigmoid')
])


In [None]:
# Compiling with extended metrics
cnn_baseline.compile(
    optimizer=Adam(learning_rate=0.0001),
    loss='binary_crossentropy',
    metrics=['accuracy', Precision(name='precision'), Recall(name='recall')]
)


# Training with validation and early stopping
cnn_baseline.fit(
    X_train_cnn,
    y_train,
    epochs=30,
    batch_size=64,
    validation_split=0.2,
    callbacks=[early_stop],
    verbose=1
)


# Predicting & Evaluating
y_pred_probs_baseline = cnn_baseline.predict(X_test_cnn).flatten()
y_pred_baseline = (y_pred_probs_baseline > 0.5).astype(int)

cm_baseline = confusion_matrix(y_test, y_pred_baseline)
roc_baseline = roc_auc_score(y_test, y_pred_probs_baseline)

print("Confusion Matrix:")
print(cm_baseline)
print(f"AUC: {roc_baseline:.4f}")

In [None]:
# Saving the trained model
model_dir = "/content/drive/My Drive/Thesis Project"
os.makedirs(model_dir, exist_ok=True)
cnn_baseline.save(os.path.join(model_dir, "cnn_baseline_model.h5"))
print("Model saved to:", os.path.join(model_dir, "cnn_baseline_model.h5"))

REALTIME INFERENCES

In [None]:
import numpy as np
import pandas as pd
from tensorflow.keras.models import load_model
import matplotlib.pyplot as plt
import seaborn as sns

#Loading the CNN model
model_path = "/content/drive/My Drive/Thesis Project/cnn_baseline_model.h5"
cnn_baseline = load_model(model_path)

#Converting test data to NumPy arrays
X_test_np = X_test.values
y_test_np = y_test.values

#Fraud-only pool (true positives only)
fraud_idx = np.where(y_test_np == 1)[0]
fraud_pool_X = X_test_np[fraud_idx]
assert fraud_pool_X.shape[0] > 0, "No fraud cases found in test set!"

#Simulation settings
domain_name  = "credit-card"
lambda_fraud = 50.16
runs         = 10000
threshold    = 0.5
rng = np.random.default_rng(42)

#Monte Carlo simulation
arrivals_F   = rng.poisson(lam=lambda_fraud, size=runs)
tp_vec       = np.zeros(runs, dtype=int)
det_rate_pct = np.full(runs, np.nan)

for r in range(runs):
    F = arrivals_F[r]
    if F == 0:
        continue
    s_idx = rng.choice(fraud_pool_X.shape[0], size=F, replace=True)
    X_r   = fraud_pool_X[s_idx]

    # Predicting probabilities with CNN
    p_r   = cnn_baseline.predict(X_r, verbose=0).flatten()
    yhat  = (p_r >= threshold).astype(int)

    TP    = np.sum(yhat == 1)
    tp_vec[r]       = TP
    det_rate_pct[r] = 100 * TP / F

# Wrapping into DataFrame
mc_df = pd.DataFrame({
    "run": np.arange(1, runs+1),
    "fraud_arrivals": arrivals_F,
    "tp": tp_vec,
    "detection_pct": det_rate_pct
})

plot_df = mc_df.dropna(subset=["detection_pct"])

In [None]:
#Summary stats
summary_table = pd.DataFrame([{
    "model": "CNN (Baseline)",
    "threshold": threshold,
    "mean_detection_pct": plot_df["detection_pct"].mean(),
    "median_detection_pct": plot_df["detection_pct"].median(),
    "p05_detection_pct": np.percentile(plot_df["detection_pct"], 5),
    "p95_detection_pct": np.percentile(plot_df["detection_pct"], 95),
    "mean_tp_per_sec": lambda_fraud * plot_df["detection_pct"].mean()/100,
    "p05_tp_per_sec": lambda_fraud * np.percentile(plot_df["detection_pct"], 5)/100,
    "p95_tp_per_sec": lambda_fraud * np.percentile(plot_df["detection_pct"], 95)/100
}])

print(summary_table)

In [None]:
#checks
print("Total runs:", runs)
print("Runs plotted (non-NA):", len(plot_df))

In [None]:
#Plotting histogram
plt.figure(figsize=(8,5))
sns.histplot(plot_df["detection_pct"], bins=40, kde=False, color="blue", alpha=0.6)
plt.title(f"Detection rate per second — Monte Carlo ({domain_name})")
plt.xlabel("Detection Rate per second (%)")
plt.ylabel("Count of runs")
plt.show()


In [None]:
#Plotting ECDF
plt.figure(figsize=(8,5))
sns.ecdfplot(plot_df["detection_pct"], color="blue", linewidth=1.5)
plt.title(f"ECDF of per-second detection — Monte Carlo ({domain_name})")
plt.xlabel("Detection Rate per second (%)")
plt.ylabel("ECDF")
plt.show()

In [None]:
import numpy as np
import pandas as pd
from tensorflow.keras.models import load_model
from sklearn.metrics import roc_auc_score

# 1) Load model + test data
model_path = "/content/drive/My Drive/Thesis Project/cnn_baseline_model.h5"
cnn_baseline = load_model(model_path)

# If X_test / y_test are pandas objects:
X_test_np = X_test.values if hasattr(X_test, "values") else np.asarray(X_test)
y_test_np = y_test.values if hasattr(y_test, "values") else np.asarray(y_test)

# Ensure labels are {0,1} integers
y_test_np = y_test_np.astype(int)
assert set(np.unique(y_test_np)).issubset({0,1}), "y_test must be binary {0,1}"
assert X_test_np.shape[0] == y_test_np.shape[0]

# 2) Simulation settings
lambda_per_sec = 50.16
runs           = 10000
thr_cnn        = 0.50
rng            = np.random.default_rng(123)


In [None]:
# 3) Probability helper
def predict_prob_keras(model, X_batch, batch_size=4096):
    """
    Returns p(y=1) for a batch using a Keras model.
    Works whether model outputs shape (n,1) or (n,).
    """
    p = model.predict(X_batch, batch_size=batch_size, verbose=0)
    p = np.asarray(p).reshape(-1)
    return p

In [None]:
# 4) Windowed Monte Carlo simulator
def simulate_realtime_fullpool_keras(model, threshold, lambda_rate_per_sec, runs,
                                     X_pool, y_pool, window_secs=1):
    """
    Each run is a time window of length `window_secs`.
    We draw arrivals ~ Poisson(lambda_rate_per_sec * window_secs),
    sample that many rows with replacement from the test pool,
    score them, threshold to get yhat, and compute per-window metrics.
    """
    N = X_pool.shape[0]
    arrivals = rng.poisson(lam=lambda_rate_per_sec * window_secs, size=runs)

    TP = np.zeros(runs, dtype=np.int32)
    FP = np.zeros(runs, dtype=np.int32)
    TN = np.zeros(runs, dtype=np.int32)
    FN = np.zeros(runs, dtype=np.int32)

    precision = np.full(runs, np.nan, dtype=float)
    recall    = np.full(runs, np.nan, dtype=float)
    accuracy  = np.full(runs, np.nan, dtype=float)
    f1        = np.full(runs, np.nan, dtype=float)
    auc_vec   = np.full(runs, np.nan, dtype=float)

    for r in range(runs):
        F = arrivals[r]
        if F == 0:
            continue

        idx = rng.integers(0, N, size=F, dtype=np.int64)
        X_r = X_pool[idx]
        y_r = y_pool[idx]

        p_r = predict_prob_keras(model, X_r)
        yhat = (p_r >= threshold).astype(np.int32)

        tp = np.sum((yhat == 1) & (y_r == 1))
        fp = np.sum((yhat == 1) & (y_r == 0))
        tn = np.sum((yhat == 0) & (y_r == 0))
        fn = np.sum((yhat == 0) & (y_r == 1))

        TP[r], FP[r], TN[r], FN[r] = tp, fp, tn, fn

        denom_p = tp + fp
        denom_r = tp + fn
        if denom_p > 0:
            precision[r] = tp / denom_p
        if denom_r > 0:
            recall[r] = tp / denom_r

        accuracy[r] = (tp + tn) / F
        if not np.isnan(precision[r]) and not np.isnan(recall[r]) and (precision[r] + recall[r]) > 0:
            f1[r] = 2 * precision[r] * recall[r] / (precision[r] + recall[r])

        # AUC only if both classes present in the window
        if (y_r.min() == 0) and (y_r.max() == 1):
            try:
                auc_vec[r] = roc_auc_score(y_r, p_r)
            except Exception:
                pass

    mc_df = pd.DataFrame({
        "run": np.arange(1, runs+1, dtype=np.int32),
        "window_secs": window_secs,
        "arrivals": arrivals,
        "TP": TP, "FP": FP, "TN": TN, "FN": FN,
        "precision": precision, "recall": recall, "accuracy": accuracy, "f1": f1,
        "auc": auc_vec
    })

    # Summary table
    summary = {
        "runs": runs,
        "window_secs": window_secs,
        "runs_with_tx": int(np.sum(arrivals > 0)),
        "mean_TP": float(np.nanmean(TP)),
        "mean_FP": float(np.nanmean(FP)),
        "mean_TN": float(np.nanmean(TN)),
        "mean_FN": float(np.nanmean(FN)),
        "mean_precision": float(np.nanmean(precision)),
        "mean_recall": float(np.nanmean(recall)),
        "mean_accuracy": float(np.nanmean(accuracy)),
        "mean_f1": float(np.nanmean(f1)),
        "mean_auc": float(np.nanmean(auc_vec)),
        "median_auc": float(np.nanmedian(auc_vec)),
        "n_auc_runs": int(np.sum(~np.isnan(auc_vec))),
    }
    summary_df = pd.DataFrame([summary])
    return {"mc_df": mc_df, "summary": summary_df}

In [None]:
#Run the CNN
res_cnn_1s  = simulate_realtime_fullpool_keras(
    model=cnn_baseline, threshold=thr_cnn, lambda_rate_per_sec=lambda_per_sec,
    runs=runs, X_pool=X_test_np, y_pool=y_test_np, window_secs=1
)
res_cnn_10s = simulate_realtime_fullpool_keras(
    model=cnn_baseline, threshold=thr_cnn, lambda_rate_per_sec=lambda_per_sec,
    runs=runs, X_pool=X_test_np, y_pool=y_test_np, window_secs=10
)

In [None]:
#Compare summaries
summary_table = pd.concat([
    res_cnn_1s["summary"].assign(model="CNN (Keras)", time_unit="1 sec", threshold=thr_cnn),
    res_cnn_10s["summary"].assign(model="CNN (Keras)", time_unit="10 sec", threshold=thr_cnn),
], ignore_index=True)[[
    "model","time_unit",
    "mean_accuracy","mean_precision","mean_recall","mean_f1",
    "mean_auc"
]]

print(summary_table.to_string(index=False))

SHAP ANALYSIS

In [None]:
feature_names = X_train.columns.tolist()

background = X_train_cnn[np.random.choice(X_train_cnn.shape[0], 200, replace=False)]
X_explain = X_test_cnn[np.random.choice(X_test_cnn.shape[0], 100, replace=False)]

# SHAP KernelExplainer
background_flat = background.reshape(background.shape[0], -1)
X_explain_flat = X_explain.reshape(X_explain.shape[0], -1)

#model prediction wrapper
def cnn_predict(x_flat):
    x_reshaped = x_flat.reshape((-1, X_train_cnn.shape[1], 1))
    return cnn_baseline.predict(x_reshaped).flatten()

#SHAP explainer
explainer = shap.KernelExplainer(cnn_predict, background_flat)

#Computing SHAP values
shap_values = explainer.shap_values(X_explain_flat)


In [None]:
# --- 7. Plot with real feature names ---
plt.figure(figsize=(5, 5))  # Width=10, Height=5 inches
shap.summary_plot(
    shap_values,
    X_explain_flat,
    feature_names=feature_names,
    plot_type="dot",
    show=False  # Prevent SHAP from auto-displaying
)
plt.title("SHAP Summary Plot-CNN (Baseline)", fontsize=10)
plt.tight_layout()
plt.show()

 #Save the figure
plt.savefig("SHAP Summary Plot-CNN (Baseline).png", dpi=300, bbox_inches='tight')


SMOTE + CNN

In [None]:
# Applying SMOTE
smote = SMOTE(random_state=42)
X_train_smote, y_train_smote = smote.fit_resample(X_train, y_train)
X_train_smote_cnn = X_train_smote.values.reshape((X_train_smote.shape[0], X_train_smote.shape[1], 1))

In [None]:
# Define early stopping
early_stop = EarlyStopping(
    monitor='val_loss',
    patience=3,
    restore_best_weights=True,
    verbose=1
)

#CNN architecture for SMOTE
cnn_smote = Sequential([
    # Block 1
    Conv1D(32, 3, activation='relu', padding='same', input_shape=(X_train_smote_cnn.shape[1], 1)),
    BatchNormalization(),
    Conv1D(64, 3, activation='relu', padding='same'),
    BatchNormalization(),
    MaxPooling1D(pool_size=2),
    Dropout(0.3),

    # Block 2
    Conv1D(128, 3, activation='relu', padding='same'),
    BatchNormalization(),
    Conv1D(128, 3, activation='relu', padding='same'),
    BatchNormalization(),
    MaxPooling1D(pool_size=2),
    Dropout(0.3),

    Flatten(),
    Dense(64, activation='relu'),
    Dropout(0.4),
    Dense(1, activation='sigmoid')
])



In [None]:
# Compiling model with same optimizer and metrics
cnn_smote.compile(
    optimizer=Adam(learning_rate=0.0001),
    loss='binary_crossentropy',
    metrics=['accuracy', Precision(name='precision'), Recall(name='recall')]
)


# Training model
cnn_smote.fit(
    X_train_smote_cnn,
    y_train_smote,
    epochs=30,
    batch_size=64,
    validation_split=0.2,
    callbacks=[early_stop],
    verbose=1
)


# Predictions
y_pred_probs_smote = cnn_smote.predict(X_test_cnn).flatten()
y_pred_smote = (y_pred_probs_smote > 0.5).astype(int)

# Evaluation
cm_smote = confusion_matrix(y_test, y_pred_smote)
roc_smote = roc_auc_score(y_test, y_pred_probs_smote)

print("Confusion Matrix:")
print(cm_smote)
print(f"AUC: {roc_smote:.4f}")

SHAP ANALYSIS FOR CNN SMOTE

In [None]:
# Extract feature names
feature_names = X_train.columns.tolist()

# Preparing background and test data
background = X_train_smote_cnn[np.random.choice(X_train_smote_cnn.shape[0], 100, replace=False)]
X_explain = X_test_cnn[np.random.choice(X_test_cnn.shape[0], 50, replace=False)]

#Flattening for SHAP KernelExplainer
background_flat = background.reshape(background.shape[0], -1)
X_explain_flat = X_explain.reshape(X_explain.shape[0], -1)

#Defining prediction wrapper for cnn_smote
def cnn_smote_predict(x_flat):
    x_reshaped = x_flat.reshape((-1, X_train_smote_cnn.shape[1], 1))
    return cnn_smote.predict(x_reshaped).flatten()

#Creating SHAP explainer
explainer_smote = shap.KernelExplainer(cnn_smote_predict, background_flat)

#Computing SHAP values
shap_values_smote = explainer_smote.shap_values(X_explain_flat, nsamples="auto")


In [None]:
#SHAP Summary Plot
plt.figure(figsize=(6, 4))
shap.summary_plot(
    shap_values_smote,
    X_explain_flat,
    feature_names=feature_names,
    plot_type="dot",
    show=False
)
plt.title("SHAP Summary Plot - CNN (SMOTE)", fontsize=10)
plt.tight_layout()
plt.show()

ADASYN + CNN

In [None]:
# Applying ADASYN
adas = ADASYN(random_state=42)
X_train_adas, y_train_adas = adas.fit_resample(X_train, y_train)
X_train_adas_cnn = X_train_adas.values.reshape((X_train_adas.shape[0], X_train_adas.shape[1], 1))

In [None]:
# Early stopping
early_stop = EarlyStopping(
    monitor='val_loss',
    patience=3,
    restore_best_weights=True,
    verbose=1
)

#CNN architecture for ADASYN
cnn_adas = Sequential([
    # Block 1
    Conv1D(32, 3, activation='relu', padding='same', input_shape=(X_train_adas_cnn.shape[1], 1)),
    BatchNormalization(),
    Conv1D(64, 3, activation='relu', padding='same'),
    BatchNormalization(),
    MaxPooling1D(pool_size=2),
    Dropout(0.3),

    # Block 2
    Conv1D(128, 3, activation='relu', padding='same'),
    BatchNormalization(),
    Conv1D(128, 3, activation='relu', padding='same'),
    BatchNormalization(),
    MaxPooling1D(pool_size=2),
    Dropout(0.3),

    Flatten(),
    Dense(64, activation='relu'),
    Dropout(0.4),
    Dense(1, activation='sigmoid')
])

In [None]:
# Compiling model
cnn_adas.compile(
    optimizer=Adam(learning_rate=0.0001),
    loss='binary_crossentropy',
    metrics=['accuracy', Precision(name='precision'), Recall(name='recall')]
)


cnn_adas.fit(
    X_train_adas_cnn,
    y_train_adas,
    epochs=30,
    batch_size=64,
    validation_split=0.2,
    callbacks=[early_stop],
    verbose=1
)


# Predictions
y_pred_probs_adas = cnn_adas.predict(X_test_cnn).flatten()
y_pred_adas = (y_pred_probs_adas > 0.5).astype(int)

# Evaluation
cm_adasyn = confusion_matrix(y_test, y_pred_adas)
roc_adasyn = roc_auc_score(y_test, y_pred_probs_adas)

print("Confusion Matrix:")
print(cm_adasyn)
print(f"AUC: {roc_adasyn:.4f}")

SHAP ANALYSIS FOR ADASYN

In [None]:
# Extract original feature names
feature_names = X_train.columns.tolist()  # Ensure X_train is the unreshaped DataFrame

# Sample background and test data
background = X_train_adas_cnn[np.random.choice(X_train_adas_cnn.shape[0], 100, replace=False)]
X_explain = X_test_cnn[np.random.choice(X_test_cnn.shape[0], 50, replace=False)]

# Flatten data for SHAP
background_flat = background.reshape(background.shape[0], -1)
X_explain_flat = X_explain.reshape(X_explain.shape[0], -1)

#Prediction wrapper for CNN ADASYN model
def cnn_adas_predict(x_flat):
    x_reshaped = x_flat.reshape((-1, X_train_adas_cnn.shape[1], 1))
    return cnn_adas.predict(x_reshaped).flatten()

#Create SHAP KernelExplainer
explainer_adas = shap.KernelExplainer(cnn_adas_predict, background_flat)

#Compute SHAP values
shap_values_adas = explainer_adas.shap_values(X_explain_flat, nsamples="auto")


In [None]:
#SHAP Summary Plot (Beeswarm)
plt.figure(figsize=(6, 4))
shap.summary_plot(
    shap_values_adas,
    X_explain_flat,
    feature_names=feature_names,
    plot_type="dot",
    show=False
)
plt.title("SHAP Summary Plot - CNN (ADASYN)", fontsize=10)
plt.tight_layout()
plt.show()

Cost-Sensitive CNN

In [None]:
# Computing class weights
class_weights_array = compute_class_weight(class_weight='balanced', classes=np.unique(y_train), y=y_train)
class_weights_dict = {0: class_weights_array[0], 1: class_weights_array[1]}

In [None]:
# Early stopping
early_stop = EarlyStopping(
    monitor='val_loss',
    patience=3,
    restore_best_weights=True,
    verbose=1
)

#CNN architecture for Cost-Sensitive
cnn_cost = Sequential([
    # Block 1
    Conv1D(32, 3, activation='relu', padding='same', input_shape=(X_train_cnn.shape[1], 1)),
    BatchNormalization(),
    Conv1D(64, 3, activation='relu', padding='same'),
    BatchNormalization(),
    MaxPooling1D(pool_size=2),
    Dropout(0.3),

    # Block 2
    Conv1D(128, 3, activation='relu', padding='same'),
    BatchNormalization(),
    Conv1D(128, 3, activation='relu', padding='same'),
    BatchNormalization(),
    MaxPooling1D(pool_size=2),
    Dropout(0.3),

    Flatten(),
    Dense(64, activation='relu'),
    Dropout(0.4),
    Dense(1, activation='sigmoid')
])

In [None]:
# Compiling with class weights
cnn_cost.compile(
    optimizer=Adam(learning_rate=0.0001),
    loss='binary_crossentropy',
    metrics=['accuracy', Precision(name='precision'), Recall(name='recall')]
)


# Training with class weights and validation
cnn_cost.fit(
    X_train_cnn,
    y_train,
    epochs=30,
    batch_size=64,
    class_weight=class_weights_dict,
    validation_split=0.2,
    callbacks=[early_stop],
    verbose=1
)


# Predictions
y_pred_probs_cost = cnn_cost.predict(X_test_cnn).flatten()
y_pred_cost = (y_pred_probs_cost > 0.5).astype(int)

# Evaluation
cm_cost = confusion_matrix(y_test, y_pred_cost)
roc_cost = roc_auc_score(y_test, y_pred_probs_cost)

print("Confusion Matrix:")
print(cm_cost)
print(f"AUC: {roc_cost:.4f}")

SHAP ANALYSIS FOR Cost-Sensitive¶

In [None]:
#Extract original feature names
feature_names = X_train.columns.tolist()

#Sample background and test data
background = X_train_cnn[np.random.choice(X_train_cnn.shape[0], 100, replace=False)]
X_explain = X_test_cnn[np.random.choice(X_test_cnn.shape[0], 50, replace=False)]

#Flatten data for KernelExplainer
background_flat = background.reshape(background.shape[0], -1)
X_explain_flat = X_explain.reshape(X_explain.shape[0], -1)

#Define prediction wrapper for cost-sensitive CNN model
def cnn_cost_predict(x_flat):
    x_reshaped = x_flat.reshape((-1, X_train_cnn.shape[1], 1))
    return cnn_cost.predict(x_reshaped).flatten()

#Create SHAP KernelExplainer
explainer_cost = shap.KernelExplainer(cnn_cost_predict, background_flat)

#Compute SHAP values
shap_values_cost = explainer_cost.shap_values(X_explain_flat, nsamples="auto")


In [None]:
#SHAP Plot
plt.figure(figsize=(4, 10))
shap.summary_plot(
    shap_values_cost,
    X_explain_flat,
    feature_names=feature_names,
    plot_type="dot",
    show=False
)
plt.title("SHAP Summary Plot - CNN (Cost-Sensitive)", fontsize=10)
plt.tight_layout()
plt.show()


Plot ROC Curves for CNN¶

In [None]:
fpr_base, tpr_base, _ = roc_curve(y_test, y_pred_probs_baseline)
fpr_smote, tpr_smote, _ = roc_curve(y_test, y_pred_probs_smote)
fpr_adas, tpr_adas, _ = roc_curve(y_test, y_pred_probs_adas)
fpr_cost, tpr_cost, _ = roc_curve(y_test, y_pred_probs_cost)

plt.figure(figsize=(8,6))
plt.plot(fpr_base, tpr_base, label="Baseline")
plt.plot(fpr_smote, tpr_smote, label="SMOTE")
plt.plot(fpr_adas, tpr_adas, label="ADASYN")
plt.plot(fpr_cost, tpr_cost, label="Cost-sensitive")
plt.plot([0,1],[0,1],'k--')
plt.title("ROC Curves for CNN - Credit Card Fraud")
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.legend()
plt.grid()
plt.show()

Evaluation Table

In [None]:
#Model evaluation table
results_cnn = pd.DataFrame({
    "Method": ["Baseline", "SMOTE", "ADASYN", "Cost-sensitive"],
    "Accuracy": [
        accuracy_score(y_test, y_pred_baseline),
        accuracy_score(y_test, y_pred_smote),
        accuracy_score(y_test, y_pred_adas),
        accuracy_score(y_test, y_pred_cost)
    ],
    "Precision": [
        precision_score(y_test, y_pred_baseline),
        precision_score(y_test, y_pred_smote),
        precision_score(y_test, y_pred_adas),
        precision_score(y_test, y_pred_cost)
    ],
    "Recall": [
        recall_score(y_test, y_pred_baseline),
        recall_score(y_test, y_pred_smote),
        recall_score(y_test, y_pred_adas),
        recall_score(y_test, y_pred_cost)
    ],
    "F1": [
        f1_score(y_test, y_pred_baseline),
        f1_score(y_test, y_pred_smote),
        f1_score(y_test, y_pred_adas),
        f1_score(y_test, y_pred_cost)
    ],
    "AUC": [roc_baseline, roc_smote, roc_adasyn, roc_cost]
})

# View final table
print(results_cnn)


# **LSTM MODELS**

In [None]:
# Reshaping input
X_train_lstm = X_train.values.reshape((X_train.shape[0], X_train.shape[1], 1))
X_test_lstm  = X_test.values.reshape((X_test.shape[0], X_test.shape[1], 1))

Hyper-parameter Tuning

In [None]:
# Hyper-parameter Tuning

def build_lstm_model(units, dropout_rate, dense_units, learning_rate):
    inter_dropout = max(min(dropout_rate, 0.8), 0.0)
    head_dropout  = max(min(dropout_rate + 0.1, 0.8), 0.0)

    half_units = max(units // 2, 32)

    model = Sequential([
        #LSTM Block 1
        LSTM(units, return_sequences=True, dropout=inter_dropout, recurrent_dropout=0.1,
             input_shape=(X_train_lstm.shape[1], 1)),
        BatchNormalization(),

        #LSTM Block 2
        LSTM(units, return_sequences=True, dropout=inter_dropout, recurrent_dropout=0.1),
        BatchNormalization(),

        #LSTM Block 3 (final, no return_sequences)
        LSTM(half_units, return_sequences=False, dropout=inter_dropout, recurrent_dropout=0.1),
        BatchNormalization(),

        # Dense head (2 layers)
        Dense(dense_units * 2, activation='relu'),
        Dropout(head_dropout),
        Dense(dense_units, activation='relu'),
        Dropout(inter_dropout),

        # Output
        Dense(1, activation='sigmoid')
    ])

    model.compile(
        optimizer=Adam(learning_rate=learning_rate),
        loss='binary_crossentropy',
        metrics=['accuracy']
    )
    return model

# Grid definition
param_grid_lstm = {
    'units': [32, 64, 128, 256],
    'dropout_rate': [0.1, 0.2, 0.3, 0.4],
    'dense_units': [32,64, 128,256],
    'learning_rate': [0.0001,0.0002,0.0003,0.0004,0.0005]
}

results_lstm = []

for params in itertools.product(*param_grid_lstm.values()):
    current_params = dict(zip(param_grid_lstm.keys(), params))
    print(f"Testing: {current_params}")

    model = build_lstm_model(**current_params)
    history = model.fit(
        X_train_lstm,
        y_train,
        validation_split=0.2,
        epochs=10,
        batch_size=32,
        verbose=0,
        callbacks=[early_stop]
    )

    y_pred_probs = model.predict(X_test_lstm, verbose=0).flatten()
    y_pred = (y_pred_probs > 0.5).astype(int)

    acc = accuracy_score(y_test, y_pred)
    results_lstm.append((current_params, acc))
    print(f"Accuracy: {acc:.4f}")


In [None]:
best_params_lstm, best_score_lstm = max(results_lstm, key=lambda x: x[1])
print("\nBest Parameters for LSTM:")
print(best_params_lstm)
print(f"Best Accuracy: {best_score_lstm:.4f}")

LSTM BASELINE¶

In [None]:
# Early stopping
early_stop = EarlyStopping(
    monitor='val_loss',
    patience=3,
    restore_best_weights=True,
    verbose=1
)

# LSTM model
lstm_baseline = Sequential([
    # LSTM Block 1
    LSTM(128, input_shape=(X_train_lstm.shape[1], 1),
         return_sequences=True, dropout=0.3, recurrent_dropout=0.1),
    BatchNormalization(),

    # LSTM Block 2
    LSTM(128, return_sequences=True, dropout=0.3, recurrent_dropout=0.1),
    BatchNormalization(),

    # LSTM Block 3
    LSTM(64, return_sequences=False, dropout=0.3, recurrent_dropout=0.1),
    BatchNormalization(),

    # Dense head
    Dense(256, activation='relu'),
    Dropout(0.4),
    Dense(128, activation='relu'),
    Dropout(0.3),
    Dense(1, activation='sigmoid')
])

In [None]:
# Compiling
lstm_baseline.compile(
    optimizer=Adam(learning_rate=0.0005),
    loss='binary_crossentropy',
    metrics=['accuracy', Precision(name='precision'), Recall(name='recall')]
)

# Fitting model
lstm_baseline.fit(
    X_train_lstm,
    y_train,
    epochs=30,
    batch_size=64,
    validation_split=0.2,
    callbacks=[early_stop],
    verbose=1
)


# Predicting and evaluation
y_pred_probs_baseline_lstm = lstm_baseline.predict(X_test_lstm).flatten()
y_pred_baseline_lstm = (y_pred_probs_baseline_lstm > 0.5).astype(int)

# Metrics
cm_baseline_lstm = confusion_matrix(y_test, y_pred_baseline_lstm)
roc_baseline_lstm = roc_auc_score(y_test, y_pred_probs_baseline_lstm)

# Results
print("Confusion Matrix:")
print(cm_baseline_lstm)
print(f"AUC: {roc_baseline_lstm:.4f}")

SMOTE + LSTM

In [None]:
X_train_smote_lstm = X_train_smote.values.reshape((X_train_smote.shape[0], X_train_smote.shape[1], 1))

In [None]:
#Early stopping
early_stop = EarlyStopping(
    monitor='val_loss',
    patience=3,
    restore_best_weights=True,
    verbose=1
)

lstm_smote = Sequential([
    # LSTM Block 1
    LSTM(128, input_shape=(X_train_smote_lstm.shape[1], 1),
         return_sequences=True, dropout=0.3, recurrent_dropout=0.1),
    BatchNormalization(),

    # LSTM Block 2
    LSTM(128, return_sequences=True, dropout=0.3, recurrent_dropout=0.1),
    BatchNormalization(),

    # LSTM Block 3
    LSTM(64, return_sequences=False, dropout=0.3, recurrent_dropout=0.1),
    BatchNormalization(),

    # Dense head
    Dense(256, activation='relu'),
    Dropout(0.4),
    Dense(128, activation='relu'),
    Dropout(0.3),
    Dense(1, activation='sigmoid')
])

In [None]:
# Compile with advanced metrics
lstm_smote.compile(
    optimizer=Adam(learning_rate=0.0005),
    loss='binary_crossentropy',
    metrics=['accuracy', Precision(name='precision'), Recall(name='recall')]
)


lstm_smote.fit(
    X_train_smote_lstm,
    y_train_smote,
    epochs=30,
    batch_size=64,
    validation_split=0.2,
    callbacks=[early_stop],
    verbose=1
)

# Predicting and evaluation
y_pred_probs_smote_lstm = lstm_smote.predict(X_test_lstm).flatten()
y_pred_smote_lstm = (y_pred_probs_smote_lstm > 0.5).astype(int)

cm_smote_lstm = confusion_matrix(y_test, y_pred_smote_lstm)
roc_smote_lstm = roc_auc_score(y_test, y_pred_probs_smote_lstm)

# Results
print("Confusion Matrix:")
print(cm_smote_lstm)
print(f"AUC: {roc_smote_lstm:.4f}")

ADASYN + LSTM

In [None]:
X_train_adas_lstm = X_train_adas.values.reshape((X_train_adas.shape[0], X_train_adas.shape[1], 1))

# Early stopping
early_stop = EarlyStopping(
    monitor='val_loss',
    patience=3,
    restore_best_weights=True,
    verbose=1
)

# LSTM architecture ADASYN
lstm_adas = Sequential([
    # LSTM Block 1
    LSTM(128, input_shape=(X_train_adas_lstm.shape[1], 1),
         return_sequences=True, dropout=0.3, recurrent_dropout=0.1),
    BatchNormalization(),

    # LSTM Block 2
    LSTM(128, return_sequences=True, dropout=0.3, recurrent_dropout=0.1),
    BatchNormalization(),

    # LSTM Block 3
    LSTM(64, return_sequences=False, dropout=0.3, recurrent_dropout=0.1),
    BatchNormalization(),

    # Dense head
    Dense(256, activation='relu'),
    Dropout(0.4),
    Dense(128, activation='relu'),
    Dropout(0.3),
    Dense(1, activation='sigmoid')
])

In [None]:
# Compiling
lstm_adas.compile(
    optimizer=Adam(learning_rate=0.0005),
    loss='binary_crossentropy',
    metrics=['accuracy', Precision(name='precision'), Recall(name='recall')]
)


lstm_adas.fit(
    X_train_adas_lstm,
    y_train_adas,
    epochs=30,
    batch_size=64,
    validation_split=0.2,
    callbacks=[early_stop],
    verbose=1

)


# Predicting and evaluation
y_pred_probs_adas_lstm = lstm_adas.predict(X_test_lstm).flatten()
y_pred_adas_lstm = (y_pred_probs_adas_lstm > 0.5).astype(int)

cm_adasyn_lstm = confusion_matrix(y_test, y_pred_adas_lstm)
roc_adasyn_lstm = roc_auc_score(y_test, y_pred_probs_adas_lstm)

#Results
print("Confusion Matrix:")
print(cm_adasyn_lstm)
print(f"AUC: {roc_adasyn_lstm:.4f}")

Cost-Sensitive LSTM

In [None]:
class_weights_array = compute_class_weight(class_weight='balanced', classes=np.unique(y_train), y=y_train)
class_weights_dict = {0: class_weights_array[0], 1: class_weights_array[1]}

In [None]:
# Early stopping
early_stop = EarlyStopping(
    monitor='val_loss',
    patience=3,
    restore_best_weights=True,
    verbose=1
)

# LSTM architecture Cost-Sensitive
lstm_cost = Sequential([
    # LSTM Block 1
    LSTM(128, input_shape=(X_train_lstm.shape[1], 1),
         return_sequences=True, dropout=0.3, recurrent_dropout=0.1),
    BatchNormalization(),

    # LSTM Block 2
    LSTM(128, return_sequences=True, dropout=0.3, recurrent_dropout=0.1),
    BatchNormalization(),

    # LSTM Block 3
    LSTM(64, return_sequences=False, dropout=0.3, recurrent_dropout=0.1),
    BatchNormalization(),

    # Dense head
    Dense(256, activation='relu'),
    Dropout(0.4),
    Dense(128, activation='relu'),
    Dropout(0.3),
    Dense(1, activation='sigmoid')
])

In [None]:
# Compiling model
lstm_cost.compile(
    optimizer=Adam(learning_rate=0.0005),
    loss='binary_crossentropy',
    metrics=['accuracy', Precision(name='precision'), Recall(name='recall')]
)


# Training
lstm_cost.fit(
    X_train_lstm,
    y_train,
    epochs=30,
    batch_size=64,
    class_weight=class_weights_dict,
    validation_split=0.2,
    callbacks=[early_stop],
    verbose=1
)


# Predicting and evaluation
y_pred_probs_cost_lstm = lstm_cost.predict(X_test_lstm).flatten()
y_pred_cost_lstm = (y_pred_probs_cost_lstm > 0.5).astype(int)

cm_cost_lstm = confusion_matrix(y_test, y_pred_cost_lstm)
roc_cost_lstm = roc_auc_score(y_test, y_pred_probs_cost_lstm)

# Results
print("Confusion Matrix:")
print(cm_cost_lstm)
print(f"AUC: {roc_cost_lstm:.4f}")

Plot ROC Curves for LSTM¶

In [None]:
fpr_base_lstm, tpr_base_lstm, _ = roc_curve(y_test, y_pred_probs_baseline_lstm)
fpr_smote_lstm, tpr_smote_lstm, _ = roc_curve(y_test, y_pred_probs_smote_lstm)
fpr_adas_lstm, tpr_adas_lstm, _ = roc_curve(y_test, y_pred_probs_adas_lstm)
fpr_cost_lstm, tpr_cost_lstm, _ = roc_curve(y_test, y_pred_probs_cost_lstm)

plt.figure(figsize=(8,6))
plt.plot(fpr_base_lstm, tpr_base_lstm, label="Baseline")
plt.plot(fpr_smote_lstm, tpr_smote_lstm, label="SMOTE")
plt.plot(fpr_adas_lstm, tpr_adas_lstm, label="ADASYN")
plt.plot(fpr_cost_lstm, tpr_cost_lstm, label="Cost-sensitive")
plt.plot([0,1],[0,1],'k--')
plt.title("ROC Curves for LSTM - Credit Card Fraud")
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.legend()
plt.grid()
plt.show()

Model Metrics Comparison Table

In [None]:
results_lstm = pd.DataFrame({
    "Method": ["Baseline", "SMOTE", "ADASYN", "Cost-sensitive"],
    "Accuracy": [
        accuracy_score(y_test, y_pred_baseline_lstm),
        accuracy_score(y_test, y_pred_smote_lstm),
        accuracy_score(y_test, y_pred_adas_lstm),
        accuracy_score(y_test, y_pred_cost_lstm)
    ],
    "Precision": [
        precision_score(y_test, y_pred_baseline_lstm),
        precision_score(y_test, y_pred_smote_lstm),
        precision_score(y_test, y_pred_adas_lstm),
        precision_score(y_test, y_pred_cost_lstm)
    ],
    "Recall": [
        recall_score(y_test, y_pred_baseline_lstm),
        recall_score(y_test, y_pred_smote_lstm),
        recall_score(y_test, y_pred_adas_lstm),
        recall_score(y_test, y_pred_cost_lstm)
    ],
    "F1": [
        f1_score(y_test, y_pred_baseline_lstm),
        f1_score(y_test, y_pred_smote_lstm),
        f1_score(y_test, y_pred_adas_lstm),
        f1_score(y_test, y_pred_cost_lstm)
    ],
    "AUC": [
        roc_baseline_lstm,
        roc_smote_lstm,
        roc_adasyn_lstm,
        roc_cost_lstm
    ]
})

print(results_lstm)

# HYBRID MODEL(CNN+LSTM)

In [None]:
# Reshaping input
X_train_cnnlstm = X_train.values.reshape((X_train.shape[0], X_train.shape[1], 1))
X_test_cnnlstm  = X_test.values.reshape((X_test.shape[0], X_test.shape[1], 1))

Hyper-Parameter Tuning

In [None]:
def build_cnn_lstm_model(filters, kernel_size, lstm_units, dropout_rate, dense_units, learning_rate):

    block_dropout = min(max(dropout_rate, 0.0), 0.8)
    head_dropout  = min(max(dropout_rate + 0.1, 0.0), 0.8)
    lstm_units_2  = max(lstm_units // 2, 32)

    model = Sequential([
        # Conv Block 1
        Conv1D(filters=filters, kernel_size=kernel_size, activation='relu',
               padding='same', input_shape=(X_train_cnnlstm.shape[1], 1)),
        BatchNormalization(),
        Conv1D(filters=filters * 2, kernel_size=kernel_size, activation='relu', padding='same'),
        BatchNormalization(),
        MaxPooling1D(pool_size=2),
        Dropout(block_dropout),

        # Conv Block 2
        Conv1D(filters=filters * 4, kernel_size=kernel_size, activation='relu', padding='same'),
        BatchNormalization(),
        Conv1D(filters=filters * 4, kernel_size=kernel_size, activation='relu', padding='same'),
        BatchNormalization(),
        MaxPooling1D(pool_size=2),
        Dropout(block_dropout),

        # LSTM stack
        LSTM(lstm_units, return_sequences=True, dropout=block_dropout, recurrent_dropout=0.1),
        BatchNormalization(),
        LSTM(lstm_units_2, return_sequences=False, dropout=block_dropout, recurrent_dropout=0.1),
        BatchNormalization(),

        # Dense head
        Dense(dense_units, activation='relu'),
        Dropout(head_dropout),
        Dense(1, activation='sigmoid')
    ])

    model.compile(
        optimizer=Adam(learning_rate=learning_rate),
        loss='binary_crossentropy',
        metrics=['accuracy']
    )
    return model

# Keeping search space narrow for efficiency
param_grid_hybrid = {
    'filters': [32, 64, 128],
    'kernel_size': [3, 4, 5],
    'lstm_units': [32, 64, 128],
    'dropout_rate': [0.1, 0.2, 0.3, 0.4],
    'dense_units': [32, 64, 128, 256],
    'learning_rate': [0.0001,0.0002, 0.0003, 0.0004,0.0005]
}

results_hybrid = []

for params in itertools.product(*param_grid_hybrid.values()):
    current_params = dict(zip(param_grid_hybrid.keys(), params))
    print(f"Testing: {current_params}")

    model = build_cnn_lstm_model(**current_params)
    history = model.fit(
        X_train_cnnlstm,
        y_train,
        validation_split=0.2,
        epochs=30,
        batch_size=32,
        verbose=0,
        callbacks=[early_stop]
    )

    y_pred_probs = model.predict(X_test_cnnlstm, verbose=0).flatten()
    y_pred = (y_pred_probs > 0.5).astype(int)

    acc = accuracy_score(y_test, y_pred)
    results_hybrid.append((current_params, acc))
    print(f"Accuracy: {acc:.4f}")

best_params_hybrid, best_score_hybrid = max(results_hybrid, key=lambda x: x[1])
print("\nBest Parameters for CNN+LSTM:")
print(best_params_hybrid)
print(f"Best Accuracy: {best_score_hybrid:.4f}")

Hybrid (CNN + LSTM) Baseline

In [None]:
# Early stopping
early_stop = EarlyStopping(
    monitor='val_loss',
    patience=3,
    restore_best_weights=True,
    verbose=1
)

# CNN+LSTM model
cnn_lstm_baseline = Sequential([
    # Conv Block 1
    Conv1D(32, 3, activation='relu', padding='same', input_shape=(X_train_cnnlstm.shape[1], 1)),
    BatchNormalization(),
    Conv1D(64, 3, activation='relu', padding='same'),
    BatchNormalization(),
    MaxPooling1D(pool_size=2),
    Dropout(0.3),

    # Conv Block 2
    Conv1D(128, 3, activation='relu', padding='same'),
    BatchNormalization(),
    Conv1D(128, 3, activation='relu', padding='same'),
    BatchNormalization(),
    MaxPooling1D(pool_size=2),
    Dropout(0.3),

    # LSTM stack
    LSTM(128, return_sequences=True, dropout=0.3, recurrent_dropout=0.1),
    BatchNormalization(),
    LSTM(64, return_sequences=False, dropout=0.3, recurrent_dropout=0.1),
    BatchNormalization(),

    # Dense head
    Dense(128, activation='relu'),
    Dropout(0.4),
    Dense(64, activation='relu'),
    Dropout(0.3),
    Dense(1, activation='sigmoid')
])

In [None]:
# Compile model
cnn_lstm_baseline.compile(
    optimizer=Adam(learning_rate=0.0001),
    loss='binary_crossentropy',
    metrics=['accuracy', Precision(name='precision'), Recall(name='recall')]
)


# Fitting with early stopping
cnn_lstm_baseline.fit(
    X_train_cnnlstm,
    y_train,
    epochs=30,
    batch_size=64,
    validation_split=0.2,
    callbacks=[early_stop],
    verbose=1
)

# Predicting and evaluating
y_pred_probs_baseline_cnnlstm = cnn_lstm_baseline.predict(X_test_cnnlstm).flatten()
y_pred_baseline_cnnlstm = (y_pred_probs_baseline_cnnlstm > 0.5).astype(int)

cm_baseline_cnnlstm = confusion_matrix(y_test, y_pred_baseline_cnnlstm)
roc_baseline_cnnlstm = roc_auc_score(y_test, y_pred_probs_baseline_cnnlstm)

# Results
print("Confusion Matrix:")
print(cm_baseline_cnnlstm)
print(f"AUC: {roc_baseline_cnnlstm:.4f}")

SMOTE + CNN + LSTM

In [None]:
X_train_smote_cnnlstm = X_train_smote.values.reshape((X_train_smote.shape[0], X_train_smote.shape[1], 1))

In [None]:
# Early stopping
early_stop = EarlyStopping(
    monitor='val_loss',
    patience=3,
    restore_best_weights=True,
    verbose=1
)

# Hybrid CNN+LSTM model for SMOTE
cnn_lstm_smote = Sequential([
    # Conv Block 1
    Conv1D(32, 3, activation='relu', padding='same', input_shape=(X_train_smote_cnnlstm.shape[1], 1)),
    BatchNormalization(),
    Conv1D(64, 3, activation='relu', padding='same'),
    BatchNormalization(),
    MaxPooling1D(2),
    Dropout(0.3),

    # Conv Block 2
    Conv1D(128, 3, activation='relu', padding='same'),
    BatchNormalization(),
    Conv1D(128, 3, activation='relu', padding='same'),
    BatchNormalization(),
    MaxPooling1D(2),
    Dropout(0.3),

    # LSTM stack
    LSTM(128, return_sequences=True, dropout=0.3, recurrent_dropout=0.1),
    BatchNormalization(),
    LSTM(64, return_sequences=False, dropout=0.3, recurrent_dropout=0.1),
    BatchNormalization(),

    # Dense head
    Dense(128, activation='relu'),
    Dropout(0.4),
    Dense(64, activation='relu'),
    Dropout(0.3),
    Dense(1, activation='sigmoid')
])

In [None]:
# Compiling model
cnn_lstm_smote.compile(
    optimizer=Adam(learning_rate=0.0001),
    loss='binary_crossentropy',
    metrics=['accuracy', Precision(name='precision'), Recall(name='recall')]
)


# Fitting model
cnn_lstm_smote.fit(
    X_train_smote_cnnlstm,
    y_train_smote,
    epochs=30,
    batch_size=64,
    validation_split=0.2,
    callbacks=[early_stop],
    verbose=1
)

# Predicting and evaluating
y_pred_probs_smote_cnnlstm = cnn_lstm_smote.predict(X_test_cnnlstm).flatten()
y_pred_smote_cnnlstm = (y_pred_probs_smote_cnnlstm > 0.5).astype(int)

cm_smote_cnnlstm = confusion_matrix(y_test, y_pred_smote_cnnlstm)
roc_smote_cnnlstm = roc_auc_score(y_test, y_pred_probs_smote_cnnlstm)

# Output
print("Confusion Matrix:")
print(cm_smote_cnnlstm)
print(f"AUC: {roc_smote_cnnlstm:.4f}")

ADASYN + CNN + LSTM¶

In [None]:
X_train_adas_cnnlstm = X_train_adas.values.reshape((X_train_adas.shape[0], X_train_adas.shape[1], 1))

In [None]:
# Early stopping
early_stop = EarlyStopping(
    monitor='val_loss',
    patience=3,
    restore_best_weights=True,
    verbose=1
)

# Hybrid CNN+LSTM Architecture for ADASYN
cnn_lstm_adas = Sequential([
    # Conv Block 1
    Conv1D(32, 3, activation='relu', padding='same', input_shape=(X_train_adas_cnnlstm.shape[1], 1)),
    BatchNormalization(),
    Conv1D(64, 3, activation='relu', padding='same'),
    BatchNormalization(),
    MaxPooling1D(2),
    Dropout(0.3),

    # Conv Block 2
    Conv1D(128, 3, activation='relu', padding='same'),
    BatchNormalization(),
    Conv1D(128, 3, activation='relu', padding='same'),
    BatchNormalization(),
    MaxPooling1D(2),
    Dropout(0.3),

    # LSTM stack
    LSTM(128, return_sequences=True, dropout=0.3, recurrent_dropout=0.1),
    BatchNormalization(),
    LSTM(64, return_sequences=False, dropout=0.3, recurrent_dropout=0.1),
    BatchNormalization(),

    # Dense head
    Dense(128, activation='relu'),
    Dropout(0.4),
    Dense(64, activation='relu'),
    Dropout(0.3),
    Dense(1, activation='sigmoid')
])

In [None]:
# Compiling model
cnn_lstm_adas.compile(
    optimizer=Adam(learning_rate=0.0001),
    loss='binary_crossentropy',
    metrics=['accuracy', Precision(name='precision'), Recall(name='recall')]
)

# Fitting model
cnn_lstm_adas.fit(
    X_train_adas_cnnlstm,
    y_train_adas,
    epochs=30,
    batch_size=64,
    validation_split=0.2,
    callbacks=[early_stop],
    verbose=1
)

# Predicting and evaluating
y_pred_probs_adas_cnnlstm = cnn_lstm_adas.predict(X_test_cnnlstm).flatten()
y_pred_adas_cnnlstm = (y_pred_probs_adas_cnnlstm > 0.5).astype(int)

cm_adasyn_cnnlstm = confusion_matrix(y_test, y_pred_adas_cnnlstm)
roc_adasyn_cnnlstm = roc_auc_score(y_test, y_pred_probs_adas_cnnlstm)

# Output
print("Confusion Matrix:")
print(cm_adasyn_cnnlstm)
print(f"AUC: {roc_adasyn_cnnlstm:.4f}")

Cost-Sensitive CNN + LSTM¶

In [None]:
# Early stopping
early_stop = EarlyStopping(
    monitor='val_loss',
    patience=3,
    restore_best_weights=True,
    verbose=1
)

# CNN+LSTM model
cnn_lstm_cost = Sequential([
    # Conv Block 1
    Conv1D(32, 3, activation='relu', padding='same', input_shape=(X_train_cnnlstm.shape[1], 1)),
    BatchNormalization(),
    Conv1D(64, 3, activation='relu', padding='same'),
    BatchNormalization(),
    MaxPooling1D(2),
    Dropout(0.3),

    # Conv Block 2
    Conv1D(128, 3, activation='relu', padding='same'),
    BatchNormalization(),
    Conv1D(128, 3, activation='relu', padding='same'),
    BatchNormalization(),
    MaxPooling1D(2),
    Dropout(0.3),

    # LSTM stack
    LSTM(128, return_sequences=True, dropout=0.3, recurrent_dropout=0.1),
    BatchNormalization(),
    LSTM(64, return_sequences=False, dropout=0.3, recurrent_dropout=0.1),
    BatchNormalization(),

    # Dense head
    Dense(128, activation='relu'),
    Dropout(0.4),
    Dense(64, activation='relu'),
    Dropout(0.3),
    Dense(1, activation='sigmoid')
])

In [None]:
# Compiling model
cnn_lstm_cost.compile(
    optimizer=Adam(learning_rate=0.0001),
    loss='binary_crossentropy',
    metrics=['accuracy', Precision(name='precision'), Recall(name='recall')]
)

# Training model
cnn_lstm_cost.fit(
    X_train_cnnlstm,
    y_train,
    epochs=100,
    batch_size=64,
    class_weight=class_weights_dict,
    validation_split=0.2,
    callbacks=[early_stop],
    verbose=1
)


# Predictions and Evaluation
y_pred_probs_cost_cnnlstm = cnn_lstm_cost.predict(X_test_cnnlstm).flatten()
y_pred_cost_cnnlstm = (y_pred_probs_cost_cnnlstm > 0.5).astype(int)

cm_cost_cnnlstm = confusion_matrix(y_test, y_pred_cost_cnnlstm)
roc_cost_cnnlstm = roc_auc_score(y_test, y_pred_probs_cost_cnnlstm)

# Output
print("Confusion Matrix:")
print(cm_cost_cnnlstm)
print(f"AUC: {roc_cost_cnnlstm:.4f}")

Plot ROC Curves for CNN + LSTM¶

In [None]:
fpr_base_cnnlstm, tpr_base_cnnlstm, _ = roc_curve(y_test, y_pred_probs_baseline_cnnlstm)
fpr_smote_cnnlstm, tpr_smote_cnnlstm, _ = roc_curve(y_test, y_pred_probs_smote_cnnlstm)
fpr_adas_cnnlstm, tpr_adas_cnnlstm, _ = roc_curve(y_test, y_pred_probs_adas_cnnlstm)
fpr_cost_cnnlstm, tpr_cost_cnnlstm, _ = roc_curve(y_test, y_pred_probs_cost_cnnlstm)

plt.figure(figsize=(8,6))
plt.plot(fpr_base_cnnlstm, tpr_base_cnnlstm, label="Baseline")
plt.plot(fpr_smote_cnnlstm, tpr_smote_cnnlstm, label="SMOTE")
plt.plot(fpr_adas_cnnlstm, tpr_adas_cnnlstm, label="ADASYN")
plt.plot(fpr_cost_cnnlstm, tpr_cost_cnnlstm, label="Cost-sensitive")
plt.plot([0,1],[0,1],'k--')
plt.title("ROC Curves for CNN + LSTM - Credit Card Fraud")
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.legend()
plt.grid()
plt.show()

Model Metrics Comparison Table¶

In [None]:
results_cnnlstm = pd.DataFrame({
    "Method": ["Baseline", "SMOTE", "ADASYN", "Cost-sensitive"],
    "Accuracy": [
        accuracy_score(y_test, y_pred_baseline_cnnlstm),
        accuracy_score(y_test, y_pred_smote_cnnlstm),
        accuracy_score(y_test, y_pred_adas_cnnlstm),
        accuracy_score(y_test, y_pred_cost_cnnlstm)
    ],
    "Precision": [
        precision_score(y_test, y_pred_baseline_cnnlstm),
        precision_score(y_test, y_pred_smote_cnnlstm),
        precision_score(y_test, y_pred_adas_cnnlstm),
        precision_score(y_test, y_pred_cost_cnnlstm)
    ],
    "Recall": [
        recall_score(y_test, y_pred_baseline_cnnlstm),
        recall_score(y_test, y_pred_smote_cnnlstm),
        recall_score(y_test, y_pred_adas_cnnlstm),
        recall_score(y_test, y_pred_cost_cnnlstm)
    ],
    "F1": [
        f1_score(y_test, y_pred_baseline_cnnlstm),
        f1_score(y_test, y_pred_smote_cnnlstm),
        f1_score(y_test, y_pred_adas_cnnlstm),
        f1_score(y_test, y_pred_cost_cnnlstm)
    ],
    "AUC": [
        roc_baseline_cnnlstm,
        roc_smote_cnnlstm,
        roc_adasyn_cnnlstm,
        roc_cost_cnnlstm
    ]
})
print(results_cnnlstm)