# 02 — Model Training and Evaluation — CLDNN (Conv1D + LSTM)

This notebook trains a CLDNN model that stacks Conv1D layers followed by a bidirectional LSTM and dense classifier.

In [None]:
# Imports, style, seed, GPU check, and paths
import os, pickle, numpy as np, random
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm.keras import TqdmCallback
from sklearn.metrics import classification_report, confusion_matrix

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

plt.style.use('seaborn-v0_8-whitegrid')
sns.set_style('whitegrid')

SEED = 42
np.random.seed(SEED)
random.seed(SEED)
tf.random.set_seed(SEED)

gpus = tf.config.list_physical_devices('GPU')
print('GPU detected:' if gpus else 'No GPU detected by TensorFlow.', gpus)

PROJECT_ROOT_PATH = '/content/drive/MyDrive/DTVT_IUH_2025/AMC_RML2016_10b/'
PROCESSED_DATA_PATH = os.path.join(PROJECT_ROOT_PATH, 'data/')
PREPROCESSING_OBJECTS_PATH = os.path.join(PROJECT_ROOT_PATH, 'preprocessing_objects/')
MODEL_SAVE_PATH = os.path.join(PROJECT_ROOT_PATH, 'models/', 'CLDNN/')
MODEL_VISUALIZATIONS_PATH = os.path.join(MODEL_SAVE_PATH, 'visualizations/')
for d in [MODEL_SAVE_PATH, MODEL_VISUALIZATIONS_PATH]: os.makedirs(d, exist_ok=True)

In [None]:
# Load 1D data and preprocessing objects
import numpy as np, os, pickle
X_train = np.load(os.path.join(PROCESSED_DATA_PATH, 'X_train_1d.npy'))
X_val = np.load(os.path.join(PROCESSED_DATA_PATH, 'X_val_1d.npy'))
X_test = np.load(os.path.join(PROCESSED_DATA_PATH, 'X_test_1d.npy'))
y_train = np.load(os.path.join(PROCESSED_DATA_PATH, 'y_train.npy'))
y_val = np.load(os.path.join(PROCESSED_DATA_PATH, 'y_val.npy'))
y_test = np.load(os.path.join(PROCESSED_DATA_PATH, 'y_test.npy'))
snr_train = np.load(os.path.join(PROCESSED_DATA_PATH, 'snr_train.npy'))
snr_val = np.load(os.path.join(PROCESSED_DATA_PATH, 'snr_val.npy'))
snr_test = np.load(os.path.join(PROCESSED_DATA_PATH, 'snr_test.npy'))

with open(os.path.join(PREPROCESSING_OBJECTS_PATH, 'label_encoder.pkl'), 'rb') as f:
    label_encoder = pickle.load(f)
with open(os.path.join(PREPROCESSING_OBJECTS_PATH, 'standard_scaler.pkl'), 'rb') as f:
    scaler = pickle.load(f)

num_classes = y_train.shape[1]
input_shape = X_train.shape[1:]
print('Shapes:', X_train.shape, X_val.shape, X_test.shape, '| Classes:', num_classes)

In [None]:
# Build CLDNN (Conv1D + LSTM)
def build_cldnn(input_shape, num_classes):
    """Build a compact CLDNN: Conv1D blocks -> BiLSTM -> Dense."""
    inputs = keras.Input(shape=input_shape)
    x = layers.Conv1D(32, 5, padding='same', activation='relu')(inputs)
    x = layers.BatchNormalization()(x)
    x = layers.Conv1D(64, 5, padding='same', activation='relu')(x)
    x = layers.BatchNormalization()(x)
    x = layers.MaxPooling1D(2)(x)
    x = layers.Conv1D(128, 3, padding='same', activation='relu')(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(0.3)(x)
    x = layers.Bidirectional(layers.LSTM(64, return_sequences=False))(x)
    x = layers.Dropout(0.3)(x)
    outputs = layers.Dense(num_classes, activation='softmax')(x)
    model = keras.Model(inputs, outputs, name='CLDNN_AMC')
    return model

model = build_cldnn(input_shape, num_classes)
model.summary()

In [None]:
# Compile, callbacks, and train with tqdm
optimizer = keras.optimizers.Adam(learning_rate=0.008)
model.compile(optimizer=optimizer, loss=keras.losses.CategoricalCrossentropy(), metrics=['accuracy'])

ckpt_path = os.path.join(MODEL_SAVE_PATH, 'best_model.keras')
callbacks = [
    keras.callbacks.ModelCheckpoint(ckpt_path, monitor='val_loss', save_best_only=True, save_weights_only=False),
    keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, verbose=1),
    keras.callbacks.EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True, verbose=1)
]

history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=100,
    batch_size=1024,
    callbacks=[TqdmCallback(verbose=0)] + callbacks,
    verbose=0
)

In [None]:
# Reload best model (safeguard)
best_model = keras.models.load_model(ckpt_path)

In [None]:
# Plots: history, confusion matrices, heatmaps, accuracy vs SNR (same as CNN1D)
def plot_history(hist, prefix='cldnn'):
    hist_dict = hist.history
    plt.figure(figsize=(12,4))
    plt.plot(hist_dict['accuracy'], label='train_acc')
    plt.plot(hist_dict['val_accuracy'], label='val_acc')
    plt.title('Accuracy over Epochs')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.legend(loc='best')
    plt.tight_layout()
    plt.savefig(os.path.join(MODEL_VISUALIZATIONS_PATH, f'{prefix}_history_accuracy.png'), dpi=300)
    plt.show()
    plt.figure(figsize=(12,4))
    plt.plot(hist_dict['loss'], label='train_loss')
    plt.plot(hist_dict['val_loss'], label='val_loss')
    plt.title('Loss over Epochs')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend(loc='best')
    plt.tight_layout()
    plt.savefig(os.path.join(MODEL_VISUALIZATIONS_PATH, f'{prefix}_history_loss.png'), dpi=300)
    plt.show()

plot_history(history, prefix='cldnn')

In [None]:
# Evaluation and reports
test_loss, test_acc = best_model.evaluate(X_test, y_test, verbose=0)
print(f'Test loss: {test_loss:.4f} | Test accuracy: {test_acc:.4f}')

y_pred_probs = best_model.predict(X_test, verbose=0)
y_pred = np.argmax(y_pred_probs, axis=1)
y_true = np.argmax(y_test, axis=1)
class_names = label_encoder.classes_.tolist()
print(classification_report(y_true, y_pred, target_names=class_names, digits=3))

In [None]:
# Accuracy heatmap by (mod x SNR)
from sklearn.metrics import confusion_matrix
snrs_unique = np.sort(np.unique(snr_test))
acc_matrix = np.zeros((len(class_names), len(snrs_unique)))
for j, s in enumerate(snrs_unique):
    idx = np.where(snr_test == s)[0]
    if len(idx) == 0: continue
    yt = y_true[idx]; yp = y_pred[idx]
    for i, cname in enumerate(class_names):
        mask = (yt == i)
        denom = np.sum(mask)
        acc_matrix[i, j] = (np.sum((yp == i) & mask) / denom) if denom > 0 else np.nan

plt.figure(figsize=(12,6))
sns.heatmap(acc_matrix, annot=True, fmt='.3f', cmap='magma', cbar=True, square=True, linewidths=0.5, linecolor='white', xticklabels=snrs_unique, yticklabels=class_names)
plt.title('Per-Class Accuracy vs SNR')
plt.xlabel('SNR (dB)')
plt.ylabel('Modulation')
plt.tight_layout()
plt.savefig(os.path.join(MODEL_VISUALIZATIONS_PATH, 'accuracy_heatmap.png'), dpi=300)
plt.show()

In [None]:
# Confusion matrices overall and per SNR with fixed colorbars
cm = confusion_matrix(y_true, y_pred, labels=np.arange(len(class_names)))
cm_norm = cm / cm.sum(axis=1, keepdims=True)
vmax_raw = cm.sum(axis=1).max()
fig, axes = plt.subplots(1, 2, figsize=(16,6))
sns.heatmap(cm, ax=axes[0], cmap='magma', annot=True, fmt='d', cbar=True, square=True, linewidths=0.5, linecolor='white', vmin=0, vmax=vmax_raw)
axes[0].set_title('Confusion Matrix — Raw Counts')
axes[0].set_xlabel('Predicted')
axes[0].set_ylabel('True')
axes[0].set_xticklabels(class_names, rotation=90)
axes[0].set_yticklabels(class_names, rotation=0)
sns.heatmap(cm_norm, ax=axes[1], cmap='magma', annot=True, fmt='.3f', cbar=True, square=True, linewidths=0.5, linecolor='white', vmin=0.0, vmax=1.0)
axes[1].set_title('Confusion Matrix — Normalized')
axes[1].set_xlabel('Predicted')
axes[1].set_ylabel('True')
axes[1].set_xticklabels(class_names, rotation=90)
axes[1].set_yticklabels(class_names, rotation=0)
plt.tight_layout()
plt.savefig(os.path.join(MODEL_VISUALIZATIONS_PATH, 'confusion_matrices_overall.png'), dpi=300)
plt.show()

snrs_unique = np.sort(np.unique(snr_test))
for s in snrs_unique:
    idx = np.where(snr_test == s)[0]
    if len(idx)==0: continue
    yt = y_true[idx]; yp = y_pred[idx]
    cm_s = confusion_matrix(yt, yp, labels=np.arange(len(class_names)))
    cm_s_norm = cm_s / cm_s.sum(axis=1, keepdims=True)
    vmax_raw_s = cm_s.sum(axis=1).max()
    fig, axes = plt.subplots(1, 2, figsize=(16,6))
    sns.heatmap(cm_s, ax=axes[0], cmap='magma', annot=True, fmt='d', cbar=True, square=True, linewidths=0.5, linecolor='white', vmin=0, vmax=vmax_raw_s)
    axes[0].set_title(f'Confusion (Raw) — SNR {s} dB')
    axes[0].set_xlabel('Predicted')
    axes[0].set_ylabel('True')
    axes[0].set_xticklabels(class_names, rotation=90)
    axes[0].set_yticklabels(class_names, rotation=0)
    sns.heatmap(cm_s_norm, ax=axes[1], cmap='magma', annot=True, fmt='.3f', cbar=True, square=True, linewidths=0.5, linecolor='white', vmin=0.0, vmax=1.0)
    axes[1].set_title(f'Confusion (Normalized) — SNR {s} dB')
    axes[1].set_xlabel('Predicted')
    axes[1].set_ylabel('True')
    axes[1].set_xticklabels(class_names, rotation=90)
    axes[1].set_yticklabels(class_names, rotation=0)
    plt.tight_layout()
    plt.savefig(os.path.join(MODEL_VISUALIZATIONS_PATH, f'confusion_matrices_snr_{s}.png'), dpi=300)
    plt.show()

In [None]:
# Accuracy vs SNR (overall, per class)
snrs_unique = np.sort(np.unique(snr_test))
y_pred = np.argmax(best_model.predict(X_test, verbose=0), axis=1)
y_true = np.argmax(y_test, axis=1)
overall_acc = []
per_class_acc = {i: [] for i in range(len(class_names))}
for s in snrs_unique:
    idx = np.where(snr_test == s)[0]
    if len(idx)==0: continue
    yt = y_true[idx]; yp = y_pred[idx]
    overall_acc.append(np.mean(yt == yp))
    for i in range(len(class_names)):
        mask = (yt == i)
        denom = np.sum(mask)
        per_class_acc[i].append((np.sum((yp==i)&mask)/denom) if denom>0 else np.nan)

plt.figure(figsize=(12,4))
plt.plot(snrs_unique, overall_acc, marker='o', linewidth=2) 
plt.title('Overall Accuracy vs SNR')
plt.xlabel('SNR (dB)')
plt.ylabel('Accuracy')
plt.tight_layout()
plt.savefig(os.path.join(MODEL_VISUALIZATIONS_PATH, 'overall_accuracy_vs_snr.png'), dpi=300)
plt.show()

plt.figure(figsize=(12,4))
palette = sns.color_palette('tab10', n_colors=len(class_names))
for i, cname in enumerate(class_names):
    plt.plot(snrs_unique, per_class_acc[i], marker='o', label=cname, linewidth=2, color=palette[i % len(palette)])
plt.title('Per-Class Accuracy vs SNR')
plt.xlabel('SNR (dB)')
plt.ylabel('Accuracy')
plt.legend(loc='best', ncol=2)
plt.tight_layout()
plt.savefig(os.path.join(MODEL_VISUALIZATIONS_PATH, 'per_class_accuracy_vs_snr.png'), dpi=300)
plt.show()

## Results Summary

Artifacts and plots are saved. See the confusion matrices, accuracy curves, and heatmaps for performance diagnostics.

In [None]:
# Configure colormaps for heatmaps/matrices
CMAP = 'magma'  # options: 'magma', 'viridis', 'plasma', 'cividis'

## Final Results Summary

- Test accuracy/loss printed above.
- Confusion matrices (overall and per-SNR), heatmaps, and accuracy curves are saved.
- The best `.keras` model file is available under the model directory.

## Comparative Analysis: CNN1D vs CLDNN

This section loads the CNN1D best model and compares it to the current CLDNN on the same test set: overall accuracy, per-class accuracy vs SNR, and top confusion pairs.

In [None]:
# Load CNN1D model and compare against CLDNN
import os, numpy as np, seaborn as sns, matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix

cnn1d_path = os.path.join(PROJECT_ROOT_PATH, 'models', 'CNN1D', 'best_model.keras')
if os.path.exists(cnn1d_path):
    cnn1d_best = keras.models.load_model(cnn1d_path)
    y_pred_cnn = np.argmax(cnn1d_best.predict(X_test, verbose=0), axis=1)
    y_pred_cld = np.argmax(best_model.predict(X_test, verbose=0), axis=1)
    y_true = np.argmax(y_test, axis=1)
    # Overall accuracy
    acc_cnn = float(np.mean(y_pred_cnn == y_true))
    acc_cld = float(np.mean(y_pred_cld == y_true))
    print(f'CNN1D Test Accuracy: {acc_cnn:.4f}')
    print(f'CLDNN Test Accuracy: {acc_cld:.4f}')
    # Per-class accuracy vs SNR
    snrs_unique = np.sort(np.unique(snr_test))
    per_class_acc_cnn = np.zeros((len(class_names), len(snrs_unique)))
    per_class_acc_cld = np.zeros((len(class_names), len(snrs_unique)))
    for j, s in enumerate(snrs_unique):
        idx = np.where(snr_test == s)[0]
        yt = y_true[idx]
        ypc = y_pred_cnn[idx]
        ypl = y_pred_cld[idx]
        for i in range(len(class_names)):
            mask = (yt == i)
            denom = mask.sum()
            per_class_acc_cnn[i, j] = ((ypc==i)&mask).sum()/denom if denom>0 else np.nan
            per_class_acc_cld[i, j] = ((ypl==i)&mask).sum()/denom if denom>0 else np.nan
    plt.figure(figsize=(12,6))
    diff = per_class_acc_cnn - per_class_acc_cld
    sns.heatmap(diff, annot=True, fmt='.3f', cmap=CMAP, square=True, linewidths=0.5, linecolor='white', xticklabels=snrs_unique, yticklabels=class_names)
    plt.title('Accuracy Difference (CNN1D - CLDNN) per Class vs SNR')
    plt.xlabel('SNR (dB)')
    plt.ylabel('Modulation')
    plt.tight_layout()
    plt.savefig(os.path.join(MODEL_VISUALIZATIONS_PATH, 'comparison_accuracy_diff_heatmap.png'), dpi=300)
    plt.show()
    # Top confusion pairs
    def top_pairs(cm, k=3):
        off = cm.copy(); np.fill_diagonal(off, 0)
        pairs = []
        for i in range(off.shape[0]):
            for j in range(off.shape[1]):
                if i!=j and off[i,j]>0:
                    pairs.append((off[i,j], class_names[i], class_names[j]))
        pairs.sort(reverse=True)
        return pairs[:k]
    cm_cnn = confusion_matrix(y_true, y_pred_cnn, labels=np.arange(len(class_names)))
    cm_cld = confusion_matrix(y_true, y_pred_cld, labels=np.arange(len(class_names)))
    print('Top Confusions — CNN1D:', top_pairs(cm_cnn))
    print('Top Confusions — CLDNN:', top_pairs(cm_cld))
else:
    print('CNN1D model file not found; run the CNN1D notebook first to enable comparison.')

In [None]:
# Save training history for comparison overlays
import json
hist_json = {k: list(map(float, v)) for k, v in history.history.items()}
with open(os.path.join(MODEL_SAVE_PATH, 'history.json'), 'w') as f:
    json.dump(hist_json, f)
print('Saved training history to', os.path.join(MODEL_SAVE_PATH, 'history.json'))

In [None]:
# Overlay training curves: CNN1D vs CLDNN
cnn_hist_path = os.path.join(PROJECT_ROOT_PATH, 'models', 'CNN1D', 'history.json')
cld_hist_path = os.path.join(MODEL_SAVE_PATH, 'history.json')
if os.path.exists(cnn_hist_path) and os.path.exists(cld_hist_path):
    import json
    with open(cnn_hist_path, 'r') as f: cnn_hist = json.load(f)
    with open(cld_hist_path, 'r') as f: cld_hist = json.load(f)
    # Accuracy overlay
    plt.figure(figsize=(12,4))
    plt.plot(cnn_hist.get('accuracy', []), label='CNN1D train_acc', linewidth=2)
    plt.plot(cnn_hist.get('val_accuracy', []), label='CNN1D val_acc', linewidth=2)
    plt.plot(cld_hist.get('accuracy', []), label='CLDNN train_acc', linewidth=2)
    plt.plot(cld_hist.get('val_accuracy', []), label='CLDNN val_acc', linewidth=2)
    plt.title('Training Accuracy — CNN1D vs CLDNN')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.legend(loc='best')
    plt.tight_layout()
    plt.savefig(os.path.join(MODEL_VISUALIZATIONS_PATH, 'overlay_accuracy.png'), dpi=300)
    plt.show()
    # Loss overlay
    plt.figure(figsize=(12,4))
    plt.plot(cnn_hist.get('loss', []), label='CNN1D train_loss', linewidth=2)
    plt.plot(cnn_hist.get('val_loss', []), label='CNN1D val_loss', linewidth=2)
    plt.plot(cld_hist.get('loss', []), label='CLDNN train_loss', linewidth=2)
    plt.plot(cld_hist.get('val_loss', []), label='CLDNN val_loss', linewidth=2)
    plt.title('Training Loss — CNN1D vs CLDNN')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend(loc='best')
    plt.tight_layout()
    plt.savefig(os.path.join(MODEL_VISUALIZATIONS_PATH, 'overlay_loss.png'), dpi=300)
    plt.show()
else:
    print('Overlay skipped: history.json missing. Run both model trainings to generate.')

In [None]:
# Who-wins heatmap: sign(cnn - cld) over (class × SNR)
if 'per_class_acc_cnn' in locals() and 'per_class_acc_cld' in locals():
    sign_mat = np.sign(per_class_acc_cnn - per_class_acc_cld)
    # Map to -1, 0, +1; keep NaN as-is
    plt.figure(figsize=(12,6))
    ax = sns.heatmap(sign_mat, annot=False, cmap=CMAP, square=True, linewidths=0.5, linecolor='white', vmin=-1, vmax=1, xticklabels=snrs_unique, yticklabels=class_names, cbar=True)
    plt.title('Who Wins per Class × SNR (sign: +1 CNN1D, -1 CLDNN, 0 tie)')
    plt.xlabel('SNR (dB)')
    plt.ylabel('Modulation')
    plt.tight_layout()
    plt.savefig(os.path.join(MODEL_VISUALIZATIONS_PATH, 'who_wins_heatmap.png'), dpi=300)
    plt.show()
else:
    print('Who-wins heatmap skipped: per-class accuracy arrays not available. Run comparison cell first.')

In [None]:
# Overall Accuracy vs SNR — CNN1D vs CLDNN on one chart
if 'y_pred_cnn' in locals() and 'y_pred_cld' in locals():
    snrs_plot = np.sort(np.unique(snr_test))
    overall_cnn, overall_cld = [], []
    for s in snrs_plot:
        idx = np.where(snr_test == s)[0]
        if len(idx) == 0:
            overall_cnn.append(np.nan)
            overall_cld.append(np.nan)
            continue
        yt = y_true[idx]
        overall_cnn.append(np.mean(y_pred_cnn[idx] == yt))
        overall_cld.append(np.mean(y_pred_cld[idx] == yt))
    plt.figure(figsize=(12,4))
    palette = sns.color_palette('tab10', n_colors=2)
    plt.plot(snrs_plot, overall_cnn, marker='o', linewidth=2, label='CNN1D', color=palette[0])
    plt.plot(snrs_plot, overall_cld, marker='s', linewidth=2, label='CLDNN', color=palette[1])
    plt.title('Overall Accuracy vs SNR — CNN1D vs CLDNN')
    plt.xlabel('SNR (dB)')
    plt.ylabel('Accuracy')
    plt.legend(loc='best')
    plt.tight_layout()
    plt.savefig(os.path.join(MODEL_VISUALIZATIONS_PATH, 'overall_accuracy_vs_snr_both_models.png'), dpi=300)
    plt.show()
else:
    print('Overall accuracy comparison skipped: predictions for both models not available. Run the comparison cell first.')