# 02 — Model Training and Evaluation — CNN1D

This notebook loads preprocessed data and trains a moderate 1D CNN on sequences of shape (128, 2). It adheres to the plotting and evaluation standards.

In [None]:
# Imports, style, seed, GPU check, and paths
import os, pickle, numpy as np, random
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm.keras import TqdmCallback
from tqdm import tqdm
from sklearn.metrics import classification_report, confusion_matrix

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

plt.style.use('seaborn-v0_8-whitegrid')
sns.set_style('whitegrid')

SEED = 42
np.random.seed(SEED)
random.seed(SEED)
tf.random.set_seed(SEED)

gpus = tf.config.list_physical_devices('GPU')
print('GPU detected:' if gpus else 'No GPU detected by TensorFlow.', gpus)

PROJECT_ROOT_PATH = '/content/drive/MyDrive/DTVT_IUH_2025/AMC_RML2016_10b/'
PROCESSED_DATA_PATH = os.path.join(PROJECT_ROOT_PATH, 'data/')
PREPROCESSING_OBJECTS_PATH = os.path.join(PROJECT_ROOT_PATH, 'preprocessing_objects/')
MODEL_SAVE_PATH = os.path.join(PROJECT_ROOT_PATH, 'models/', 'CNN1D/')
MODEL_VISUALIZATIONS_PATH = os.path.join(MODEL_SAVE_PATH, 'visualizations/')
for d in [MODEL_SAVE_PATH, MODEL_VISUALIZATIONS_PATH]: os.makedirs(d, exist_ok=True)

In [None]:
# Google Drive mount (for Colab) and data validation
try:
    from google.colab import drive  # type: ignore
    drive.mount('/content/drive')
    print('Google Drive mounted at /content/drive')
except Exception as e:
    print('Not running in Colab or mount skipped. Proceeding with local filesystem. Reason:', str(e))
    # If running locally, ensure PROJECT_ROOT_PATH is correct or override via environment variable if needed.

# Ensure model directories exist (already created above); validate data/preprocessing files exist
required_npy = [
    'X_train_1d.npy','X_val_1d.npy','X_test_1d.npy',
    'y_train.npy','y_val.npy','y_test.npy',
    'snr_train.npy','snr_val.npy','snr_test.npy'
 ]
required_pkl = ['label_encoder.pkl','standard_scaler.pkl']

missing = []
for fname in required_npy:
    if not os.path.exists(os.path.join(PROCESSED_DATA_PATH, fname)):
        missing.append(os.path.join(PROCESSED_DATA_PATH, fname))
for fname in required_pkl:
    if not os.path.exists(os.path.join(PREPROCESSING_OBJECTS_PATH, fname)):
        missing.append(os.path.join(PREPROCESSING_OBJECTS_PATH, fname))

if missing:
    print('Missing required files:')
    for m in missing: print(' -', m)
    raise FileNotFoundError('Preprocessed data not found. Please run 01_Data_Preparation_and_Visualization.ipynb to generate datasets and preprocessing objects.')
else:
    print('All required data/preprocessing files found.')

In [None]:
# Load 1D data and preprocessing objects
X_train = np.load(os.path.join(PROCESSED_DATA_PATH, 'X_train_1d.npy'))
X_val = np.load(os.path.join(PROCESSED_DATA_PATH, 'X_val_1d.npy'))
X_test = np.load(os.path.join(PROCESSED_DATA_PATH, 'X_test_1d.npy'))
y_train = np.load(os.path.join(PROCESSED_DATA_PATH, 'y_train.npy'))
y_val = np.load(os.path.join(PROCESSED_DATA_PATH, 'y_val.npy'))
y_test = np.load(os.path.join(PROCESSED_DATA_PATH, 'y_test.npy'))
snr_train = np.load(os.path.join(PROCESSED_DATA_PATH, 'snr_train.npy'))
snr_val = np.load(os.path.join(PROCESSED_DATA_PATH, 'snr_val.npy'))
snr_test = np.load(os.path.join(PROCESSED_DATA_PATH, 'snr_test.npy'))

with open(os.path.join(PREPROCESSING_OBJECTS_PATH, 'label_encoder.pkl'), 'rb') as f:
    label_encoder = pickle.load(f)
with open(os.path.join(PREPROCESSING_OBJECTS_PATH, 'standard_scaler.pkl'), 'rb') as f:
    scaler = pickle.load(f)

num_classes = y_train.shape[1]
input_shape = X_train.shape[1:]
print('Shapes:', X_train.shape, X_val.shape, X_test.shape, '| Classes:', num_classes)

In [None]:
# Build a moderate 1D CNN
def build_cnn1d(input_shape, num_classes):
    """Build a moderate-depth 1D CNN for AMC."""
    inputs = keras.Input(shape=input_shape)
    x = layers.Conv1D(32, 5, padding='same', activation='relu')(inputs)
    x = layers.BatchNormalization()(x)
    x = layers.Conv1D(64, 5, padding='same', activation='relu')(x)
    x = layers.BatchNormalization()(x)
    x = layers.MaxPooling1D(2)(x)
    x = layers.Conv1D(128, 3, padding='same', activation='relu')(x)
    x = layers.BatchNormalization()(x)
    x = layers.GlobalAveragePooling1D()(x)
    x = layers.Dropout(0.3)(x)
    outputs = layers.Dense(num_classes, activation='softmax')(x)
    model = keras.Model(inputs, outputs, name='CNN1D_AMC')
    return model

model = build_cnn1d(input_shape, num_classes)
model.summary()

In [None]:
# Compile, callbacks, and train with tqdm
optimizer = keras.optimizers.Adam(learning_rate=0.008)
model.compile(optimizer=optimizer, loss=keras.losses.CategoricalCrossentropy(), metrics=['accuracy'])

ckpt_path = os.path.join(MODEL_SAVE_PATH, 'best_model.keras')
callbacks = [
    keras.callbacks.ModelCheckpoint(ckpt_path, monitor='val_loss', save_best_only=True, save_weights_only=False),
    keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, verbose=1),
    keras.callbacks.EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True, verbose=1)
]

history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=100,
    batch_size=1024,
    callbacks=[TqdmCallback(verbose=0)] + callbacks,
    verbose=0
)

In [None]:
# Reload best model (safeguard)
best_model = keras.models.load_model(ckpt_path)

In [None]:
# Plot training history (save then show)
def plot_history(hist, prefix='cnn1d'):
    hist_dict = hist.history
    # Accuracy
    plt.figure(figsize=(12,4))
    plt.plot(hist_dict['accuracy'], label='train_acc')
    plt.plot(hist_dict['val_accuracy'], label='val_acc')
    plt.title('Accuracy over Epochs')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.legend(loc='best')
    plt.tight_layout()
    plt.savefig(os.path.join(MODEL_VISUALIZATIONS_PATH, f'{prefix}_history_accuracy.png'), dpi=300)
    plt.show()
    # Loss
    plt.figure(figsize=(12,4))
    plt.plot(hist_dict['loss'], label='train_loss')
    plt.plot(hist_dict['val_loss'], label='val_loss')
    plt.title('Loss over Epochs')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend(loc='best')
    plt.tight_layout()
    plt.savefig(os.path.join(MODEL_VISUALIZATIONS_PATH, f'{prefix}_history_loss.png'), dpi=300)
    plt.show()

plot_history(history, prefix='cnn1d')

In [None]:
# Evaluate on test set
test_loss, test_acc = best_model.evaluate(X_test, y_test, verbose=0)
print(f'Test loss: {test_loss:.4f} | Test accuracy: {test_acc:.4f}')

In [None]:
# Predictions and classification report
y_pred_probs = best_model.predict(X_test, verbose=0)
y_pred = np.argmax(y_pred_probs, axis=1)
y_true = np.argmax(y_test, axis=1)
class_names = label_encoder.classes_.tolist()
report = classification_report(y_true, y_pred, target_names=class_names, digits=3)
print(report)

In [None]:
# Accuracy heatmap by (mod x SNR)
snrs_unique = np.sort(np.unique(snr_test))
acc_matrix = np.zeros((len(class_names), len(snrs_unique)))
for j, s in enumerate(snrs_unique):
    idx = np.where(snr_test == s)[0]
    if len(idx) == 0: continue
    yt = y_true[idx]; yp = y_pred[idx]
    for i, cname in enumerate(class_names):
        cls = i
        mask = (yt == cls)
        denom = np.sum(mask)
        acc_matrix[i, j] = (np.sum((yp == cls) & mask) / denom) if denom > 0 else np.nan

plt.figure(figsize=(12,6))
sns.heatmap(acc_matrix, annot=True, fmt='.3f', cmap='magma', cbar=True, square=True, linewidths=0.5, linecolor='white', xticklabels=snrs_unique, yticklabels=class_names)
plt.title('Per-Class Accuracy vs SNR')
plt.xlabel('SNR (dB)')
plt.ylabel('Modulation')
plt.tight_layout()
plt.savefig(os.path.join(MODEL_VISUALIZATIONS_PATH, 'accuracy_heatmap.png'), dpi=300)
plt.show()

In [None]:
# Overall confusion matrices (raw and normalized)
cm = confusion_matrix(y_true, y_pred, labels=np.arange(len(class_names)))
cm_norm = cm / cm.sum(axis=1, keepdims=True)

per_class_total = cm.sum(axis=1)
vmax_raw = per_class_total.max() if per_class_total.size>0 else None

fig, axes = plt.subplots(1, 2, figsize=(16,6))
sns.heatmap(cm, ax=axes[0], cmap='magma', annot=True, fmt='d', cbar=True, square=True, linewidths=0.5, linecolor='white', vmin=0, vmax=vmax_raw)
axes[0].set_title('Confusion Matrix — Raw Counts')
axes[0].set_xlabel('Predicted')
axes[0].set_ylabel('True')
axes[0].set_xticklabels(class_names, rotation=90)
axes[0].set_yticklabels(class_names, rotation=0)

sns.heatmap(cm_norm, ax=axes[1], cmap='magma', annot=True, fmt='.3f', cbar=True, square=True, linewidths=0.5, linecolor='white', vmin=0.0, vmax=1.0)
axes[1].set_title('Confusion Matrix — Normalized')
axes[1].set_xlabel('Predicted')
axes[1].set_ylabel('True')
axes[1].set_xticklabels(class_names, rotation=90)
axes[1].set_yticklabels(class_names, rotation=0)

plt.tight_layout()
plt.savefig(os.path.join(MODEL_VISUALIZATIONS_PATH, 'confusion_matrices_overall.png'), dpi=300)
plt.show()

In [None]:
# Confusion matrices per SNR with fixed colorbars
snrs_unique = np.sort(np.unique(snr_test))
class_count_per_snr = None
for s in snrs_unique:
    idx = np.where(snr_test == s)[0]
    if len(idx)==0: continue
    yt = y_true[idx]; yp = y_pred[idx]
    cm_s = confusion_matrix(yt, yp, labels=np.arange(len(class_names)))
    cm_s_norm = cm_s / cm_s.sum(axis=1, keepdims=True)
    # vmin/vmax: fix across SNRs based on per-class count at this SNR
    per_class_total_s = cm_s.sum(axis=1)
    vmax_raw_s = per_class_total_s.max() if per_class_total_s.size>0 else None
    fig, axes = plt.subplots(1, 2, figsize=(16,6))
    sns.heatmap(cm_s, ax=axes[0], cmap='magma', annot=True, fmt='d', cbar=True, square=True, linewidths=0.5, linecolor='white', vmin=0, vmax=vmax_raw_s)
    axes[0].set_title(f'Confusion (Raw) — SNR {s} dB')
    axes[0].set_xlabel('Predicted')
    axes[0].set_ylabel('True')
    axes[0].set_xticklabels(class_names, rotation=90)
    axes[0].set_yticklabels(class_names, rotation=0)
    sns.heatmap(cm_s_norm, ax=axes[1], cmap='magma', annot=True, fmt='.3f', cbar=True, square=True, linewidths=0.5, linecolor='white', vmin=0.0, vmax=1.0)
    axes[1].set_title(f'Confusion (Normalized) — SNR {s} dB')
    axes[1].set_xlabel('Predicted')
    axes[1].set_ylabel('True')
    axes[1].set_xticklabels(class_names, rotation=90)
    axes[1].set_yticklabels(class_names, rotation=0)
    plt.tight_layout()
    plt.savefig(os.path.join(MODEL_VISUALIZATIONS_PATH, f'confusion_matrices_snr_{s}.png'), dpi=300)
    plt.show()

In [None]:
# Accuracy vs. SNR (overall and per class)
snrs_unique = np.sort(np.unique(snr_test))
overall_acc = []
per_class_acc = {i: [] for i in range(len(class_names))}
for s in snrs_unique:
    idx = np.where(snr_test == s)[0]
    if len(idx)==0: continue
    yt = y_true[idx]; yp = y_pred[idx]
    overall_acc.append(np.mean(yt == yp))
    for i in range(len(class_names)):
        mask = (yt == i)
        denom = np.sum(mask)
        per_class_acc[i].append((np.sum((yp==i)&mask)/denom) if denom>0 else np.nan)

plt.figure(figsize=(12,4))
plt.plot(snrs_unique, overall_acc, marker='o', linewidth=2) 
plt.title('Overall Accuracy vs SNR')
plt.xlabel('SNR (dB)')
plt.ylabel('Accuracy')
plt.tight_layout()
plt.savefig(os.path.join(MODEL_VISUALIZATIONS_PATH, 'overall_accuracy_vs_snr.png'), dpi=300)
plt.show()

plt.figure(figsize=(12,4))
palette = sns.color_palette('tab10', n_colors=len(class_names))
for i, cname in enumerate(class_names):
    plt.plot(snrs_unique, per_class_acc[i], marker='o', label=cname, linewidth=2, color=palette[i % len(palette)])
plt.title('Per-Class Accuracy vs SNR')
plt.xlabel('SNR (dB)')
plt.ylabel('Accuracy')
plt.legend(loc='best', ncol=2)
plt.tight_layout()
plt.savefig(os.path.join(MODEL_VISUALIZATIONS_PATH, 'per_class_accuracy_vs_snr.png'), dpi=300)
plt.show()

In [None]:
# Top confusion pairs analysis and summary
off_diag = cm.copy(); np.fill_diagonal(off_diag, 0)
pairs = []
for i_true in range(off_diag.shape[0]):
    for j_pred in range(off_diag.shape[1]):
        if i_true != j_pred and off_diag[i_true, j_pred] > 0:
            pairs.append((off_diag[i_true, j_pred], class_names[i_true], class_names[j_pred]))
pairs.sort(reverse=True)
top_pairs = pairs[:3]
print('Top Confusion Pairs:')
for k, (cnt, t, p) in enumerate(top_pairs, start=1):
    print(f"{k}. {t} → {p}: {cnt}")

## Results Summary

- Overall test accuracy is printed above.
- Training history, confusion matrices (overall and per SNR), heatmap, and accuracy vs SNR plots are saved under the model's visualizations directory.
- Top confusion pairs are printed for quick insight.

In [None]:
# Configure colormaps for heatmaps/matrices
CMAP = 'magma'  # options: 'magma', 'viridis', 'plasma', 'cividis'

## Final Results Summary

- Overall test accuracy and loss have been computed above.
- The best epoch weights were restored, and the saved `.keras` file was reloaded for evaluation.
- Top confusion pairs (highest off-diagonal counts) are printed above for diagnostic clarity.

All images and artifacts are saved under the model directory.

In [None]:
# Save training history for comparison overlays
import json
hist_json = {k: list(map(float, v)) for k, v in history.history.items()}
with open(os.path.join(MODEL_SAVE_PATH, 'history.json'), 'w') as f:
    json.dump(hist_json, f)
print('Saved training history to', os.path.join(MODEL_SAVE_PATH, 'history.json'))