# training-B.ipynb
1. This code is intended for training three models with raw data for 1D-CNN and WPT data for MLP and Random Forest
2. The dataset are MIT-BIH Arrhythmia Database and additional ECG data with format of .bin
3. Classes/labels: N, S, V, F, and/without Q
4. The data utilizes dual-lead based on MIT-BIH Arrhythmia Database so that the ECG data from .bin is duplicated onto the other lead

## **LIBRARY IMPORTS**

In [None]:
# Import Libraries
import cudf
import os
import joblib
import pywt
import keras
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import wfdb  # For reading MIT-BIH data
import keras_tuner as kt
import seaborn as sns
import tensorflow as tf
import neurokit2 as nk

# Scikit-learn and Imbalanced-learn imports
from sklearn.model_selection import train_test_split, RepeatedStratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import (
    classification_report,
    confusion_matrix,
    roc_auc_score,
    precision_recall_curve,
    auc,
    f1_score,
    precision_score,
    recall_score,
    accuracy_score
)
from imblearn.over_sampling import SMOTE, ADASYN
from imblearn.combine import SMOTEENN, SMOTETomek
from imblearn.pipeline import Pipeline as ImbPipeline
from scipy.stats import entropy
from collections import Counter
from scipy.signal import find_peaks, resample, butter, filtfilt, iirnotch, spectrogram
from sklearn.utils import class_weight

# Model imports
from sklearn.svm import SVC
from imblearn.ensemble import BalancedRandomForestClassifier
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Input, Conv1D, BatchNormalization, Activation, MaxPooling1D, Dropout, Add, GlobalAveragePooling1D, Dense, Flatten
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import l2
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from cuml.ensemble import RandomForestClassifier
from scikeras.wrappers import KerasClassifier
from sklearn.utils.class_weight import compute_class_weight

# Additional setups
# Checking cUML
print(cudf.Series([1, 2, 3]))

# Setting TensorFlow flags
os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0'
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

# Checking GPU
gpu_devices = tf.config.list_physical_devices('GPU')
if gpu_devices:
    print(f"TensorFlow has detected {len(gpu_devices)} GPU(s):")
    for device in gpu_devices:
        print(f"- {device}")
else:
    print("TensorFlow did not detect any GPUs. Training will run on the CPU.")


## **DATA PREPARATION**

### DATA PREPARATION FUNCTIONS

In [None]:
# 1. CONFIGURATIONS & ADDITIONAL FUNCTIONS
label_map = {
    'N': 0, '.': 0, 'L': 0, 'R': 0, 'e': 0, 'j': 0,  # Class 0: Normal Beats (N)
    'V': 1, 'E': 1,                                  # Class 1: Ventricular Ectopic (VEB)
    'S': 2, 'A': 2, 'a': 2, 'J': 2,                  # Class 2: Supraventricular Ectopic (SVEB)
    'F': 3                                           # Class 3: Fusion Beat (F)
}
DB_PATH_MIT = '../data/raw/MIT-BIH/mit-bih-arrhythmia-database-1.0.0/mit-bih-arrhythmia-database-1.0.0/'
FS_MIT = 360
FS_TARGET = 500
WINDOW_SIZE = int(FS_TARGET*0.8)  # 600ms window -> 0.6s * 500Hz
# Split MIT-BIH records into training and testing sets to prevent patient data leakage
RECORDS_TRAIN = ['101', '106', '108', '109', '112', '114', '115', '116', '118', '119',
                 '122', '124', '201', '203', '205', '207', '208', '209', '215', '220',
                 '223', '230'] # DS1
RECORDS_TEST = ['100', '103', '105', '111', '113', '117', '121', '123', '200', '202',
                '210', '212', '213', '214', '219', '221', '222', '228', '231', '232',
                '233', '234'] # DS2
custom_file_paths = {
    'Arrhythmia': '../data/raw/Arrhythmia/ECG_WAVE.bin',
    'Normal': '../data/raw/Normal/ecg_normal.bin' 
}
custom_file_labels = {'Arrhythmia': 2, 'Normal': 0} # SVEB and Normal

# Wavelet Feature Configuration
WAVELET_TYPE = 'db4'
WAVELET_LEVEL = 4

# Define the output directory
output_dir_prepare_data = '../data/processed'
os.makedirs(output_dir_prepare_data, exist_ok=True) # This creates the directory if it doesn't exist

# Filtering ECG data on a certain frequencies
def preprocess_signal(signal, fs=500):
    """
    Applies a multi-stage denoising pipeline to a raw ECG signal.
    This pipeline is designed to remove baseline wander, powerline interference,
    and high-frequency noise to maximize signal-to-noise ratio.
    """
    # Ensure signal is a numpy array
    signal = np.array(signal)
    
    # Step 1: Remove baseline wander with a high-pass Butterworth filter.
    nyq = 0.5 * fs
    low_cutoff = 0.6
    b, a = butter(2, low_cutoff / nyq, btype='high')
    signal_bw_removed = filtfilt(b, a, signal)

    # Step 2: Remove powerline interference with a notch filter (50Hz for Indonesia).
    powerline_freq = 50
    b, a = iirnotch(powerline_freq / nyq, Q=30)
    signal_pl_removed = filtfilt(b, a, signal_bw_removed)

    # Step 3: Attenuate high-frequency noise with a low-pass Butterworth filter.
    high_cutoff = 100
    b, a = butter(4, high_cutoff / nyq, btype='low')
    cleaned_signal = filtfilt(b, a, signal_pl_removed)
    
    return cleaned_signal

# Extract features of wavelet
def extract_wavelet_features(window, wavelet='db4', level=4):
    """
    Extracts statistical and entropy-based features from the wavelet coefficients of an ECG window.
    
    Args:
        window (np.ndarray): A 1D numpy array representing a single ECG beat window.
        wavelet (str): The type of wavelet to use.
        level (int): The level of wavelet decomposition.
        
    Returns:
        np.ndarray: A 1D numpy array containing the extracted features.
    """
    # Decompose the signal
    coeffs = pywt.wavedec(window, wavelet, level=level)
    
    features = []
    for c in coeffs:
        # Basic statistical features
        features.append(np.mean(c))
        features.append(np.std(c))
        features.append(np.var(c))
        
        # Energy of the coefficients
        features.append(np.sum(np.square(c)))
        
        # Shannon Entropy of the coefficients
        # We use the squared coeffs to represent energy distribution for entropy calculation
        # Adding a small epsilon to avoid log(0)
        features.append(entropy(np.square(c) + 1e-9))
        
    return np.array(features)

# Wavelet Packet Transformation
def extract_wpt_features(window, wavelet='db4', level=4):
    """
    Extracts statistical features from the Wavelet Packet Transform (WPT) 
    coefficients of an ECG window for a more detailed analysis.
    """
    # Create the wavelet packet object
    wp = pywt.WaveletPacket(data=window, wavelet=wavelet, mode='symmetric', maxlevel=level)
    
    # Get the coefficient nodes at the specified level
    nodes = wp.get_level(level, order='natural')
    # Extract the data (coefficients) from each node
    coeffs = [node.data for node in nodes]
    
    features = []
    for c in coeffs:
        # Basic statistical features
        features.append(np.mean(c))
        features.append(np.std(c))
        features.append(np.var(c))
        
        # Energy of the coefficients
        features.append(np.sum(np.square(c)))
        
        # Shannon Entropy of the coefficients
        features.append(entropy(np.square(c) + 1e-9))
        
    return np.array(features)

In [None]:
# 2. DATA LOADING UTILITY OF MIT-BIH DATA
# This function efficiently loads the specified records and their annotations.
def load_mitbih_records(db_path, record_names):
    """
    Loads raw ECG signals and annotations for specified records from both leads.

    Args:
        db_path (str): The path to the database directory.
        record_names (list): A list of record names as strings.

    Returns:
        A tuple containing three lists: (signals_leadA, signals_leadB, annotations).
        - signals_leadA: A list of raw ECG signal arrays for channel 0.
        - signals_leadB: A list of raw ECG signal arrays for channel 1.
        - annotations: A list of wfdb Annotation objects.
    """
    signals_leadA = []
    signals_leadB = []
    all_annotations = []
    print(f"Loading records: {', '.join(record_names)}...")
    for rec_name in record_names:
        record_path = f'{db_path}/{rec_name}'
        try:
            # Read both channels (0 and 1)
            record = wfdb.rdrecord(record_path, channels=[0, 1])
            signals_leadA.append(record.p_signal[:, 0].flatten())
            signals_leadB.append(record.p_signal[:, 1].flatten())
            
            # Annotations are the same for both leads
            annotation = wfdb.rdann(record_path, 'atr')
            all_annotations.append(annotation)
        except Exception as e:
            print(f"Error processing record {rec_name}: {e}")
    print("Loading complete.")
    return signals_leadA, signals_leadB, all_annotations

def prepare_wpt_and_raw_data(signals_A, signals_B, annotations, window_size, fs=360, target_fs=500):
    """
    Processes dual-lead MIT-BIH data once to generate both WPT features and raw windows.
    """
    all_wpt_features, all_raw_windows, all_labels = [], [], []
    samples_before = window_size // 3
    samples_after = window_size - samples_before

    for i, (raw_signal_A, raw_signal_B) in enumerate(zip(signals_A, signals_B)):
        ann = annotations[i]
        try:
            resampled_A = resample(raw_signal_A, int(len(raw_signal_A) * (target_fs / fs)))
            cleaned_A = preprocess_signal(resampled_A, fs=target_fs)
            resampled_B = resample(raw_signal_B, int(len(raw_signal_B) * (target_fs / fs)))
            cleaned_B = preprocess_signal(resampled_B, fs=target_fs)
            r_peaks_resampled = np.round(ann.sample * (target_fs / fs)).astype(int)

            for j, r_peak_loc in enumerate(r_peaks_resampled):
                symbol = ann.symbol[j]
                if symbol in label_map:
                    start, end = r_peak_loc - samples_before, r_peak_loc + samples_after
                    if start >= 0 and end < len(cleaned_A):
                        window_A, window_B = cleaned_A[start:end], cleaned_B[start:end]
                        
                        # WPT Features: Concatenate from both leads
                        wpt_A = extract_wpt_features(window_A)
                        wpt_B = extract_wpt_features(window_B)
                        all_wpt_features.append(np.concatenate((wpt_A, wpt_B)))
                        
                        # Raw Windows: Stack into a 2-channel array
                        all_raw_windows.append(np.stack((window_A, window_B), axis=-1))
                        all_labels.append(label_map[symbol])
        except Exception as e:
            print(f"Could not process record {ann.record_name}: {e}")

    return np.array(all_wpt_features), np.array(all_raw_windows), np.array(all_labels)

In [None]:
# 3. DATA LOADING FOR .bin FILES
# A function to load ECG data
def load_ecg_from_bin(file_path, dtype=np.int16):
    """
    Loading raw ECG signals from binary files.

    Args:
        file_path (str): Path to the .bin file.
        dtype (numpy.dtype): Data type of the signal in the .bin file.

    Return:
        numpy.ndarray: ECG signals as a numpy array.
    """
    try:
        signal = np.fromfile(file_path, dtype=dtype)
        print(f"Completed reading {len(signal)} samples from {file_path}")
        return signal
    except IOError as e:
        print(f"An error has occurred while reading: {e}")
        return None

# A function to detect R-peaks for labelling
def detect_r_peaks_robust(signal, fs):
    """Detects R-peaks using a robust algorithm from NeuroKit2."""
    try:
        _, rpeaks_dict = nk.ecg_peaks(signal, sampling_rate=fs)
        r_peaks = rpeaks_dict['ECG_R_Peaks']
        print(f"Detected {len(r_peaks)} R-peaks.")
        return r_peaks
    except Exception as e:
        print(f"Could not detect R-peaks: {e}")
        return np.array([])

def prepare_wpt_and_raw_data_from_bin(signal, r_peaks, window_size, label, wavelet='db4', level=4, target_fs=500):
    """
    Processes a single-lead .bin file to generate both WPT features and raw windows.
    Duplicates features to match the dual-lead format.
    """
    all_wpt_features, all_raw_windows, all_labels = [], [], []
    samples_before = window_size // 3
    samples_after = window_size - samples_before
    cleaned_signal = preprocess_signal(signal, fs=target_fs)

    for r_peak_loc in r_peaks:
        start, end = r_peak_loc - samples_before, r_peak_loc + samples_after
        if start >= 0 and end < len(cleaned_signal):
            window = cleaned_signal[start:end]
            
            # WPT Features: Extract and duplicate
            wpt_features = extract_wpt_features(window, wavelet=wavelet, level=level)
            all_wpt_features.append(np.concatenate((wpt_features, wpt_features)))
            
            # Raw Windows: Stack duplicated window for 2 channels
            all_raw_windows.append(np.stack((window, window), axis=-1))
            all_labels.append(label)
            
    return np.array(all_wpt_features), np.array(all_raw_windows), np.array(all_labels)

### DATA PREPARATION EXECUTION

In [None]:
if __name__ == '__main__':
    print("--- [Step 1] Processing Multi-Lead MIT-BIH Data ---")
    print("\nProcessing MIT-BIH Training set (DS1)...")
    train_signals_A, train_signals_B, train_anns = load_mitbih_records(DB_PATH_MIT, RECORDS_TRAIN)
    X_train_wpt, X_train_raw, y_train_mit = prepare_wpt_and_raw_data(train_signals_A, train_signals_B, train_anns, WINDOW_SIZE, fs=FS_MIT, target_fs=FS_TARGET)
    print(f"Combined MIT-BIH Training WPT feature matrix shape: {X_train_wpt.shape}")
    print(f"Combined MIT-BIH Training Raw window matrix shape: {X_train_raw.shape}")

    print("\nProcessing MIT-BIH Testing set (DS2)...")
    test_signals_A, test_signals_B, test_anns = load_mitbih_records(DB_PATH_MIT, RECORDS_TEST)
    X_test_wpt_mit, X_test_raw_mit, y_test_mitbih = prepare_wpt_and_raw_data(test_signals_A, test_signals_B, test_anns, WINDOW_SIZE, fs=FS_MIT, target_fs=FS_TARGET)
    print(f"Combined MIT-BIH Testing WPT feature matrix shape: {X_test_wpt_mit.shape}")
    print(f"Combined MIT-BIH Testing Raw window matrix shape: {X_test_raw_mit.shape}")

    # --- [Step 2] Process Single-Lead Custom Data ---
    print("\n--- [Step 2] Processing Single-Lead Custom Data ---")
    X_test_custom_wpt_list, X_test_custom_raw_list, y_test_custom_list = [], [], []
    for name, path in custom_file_paths.items():
        label = custom_file_labels[name]
        signal = load_ecg_from_bin(path)
        if signal is not None:
            r_peaks = detect_r_peaks_robust(signal, fs=FS_TARGET)
            wpt_single, raw_single, y_single = prepare_wpt_and_raw_data_from_bin(
                signal, r_peaks, WINDOW_SIZE, label, wavelet=WAVELET_TYPE, level=WAVELET_LEVEL, target_fs=FS_TARGET
            )
            X_test_custom_wpt_list.append(wpt_single)
            X_test_custom_raw_list.append(raw_single)
            y_test_custom_list.append(y_single)
            
    X_test_custom_wpt = np.vstack(X_test_custom_wpt_list)
    X_test_custom_raw = np.vstack(X_test_custom_raw_list)
    y_test_custom = np.concatenate(y_test_custom_list)
    print(f"Combined Custom Testing WPT feature matrix shape: {X_test_custom_wpt.shape}")
    print(f"Combined Custom Testing Raw window matrix shape: {X_test_custom_raw.shape}")

    # --- [Step 3] Finalize Datasets and Scale WPT data ---
    print("\n--- [Step 3] Scaling and Finalizing Data ---")
    X_train_wpt_base = X_train_wpt
    y_train = y_train_mit
    
    scaler = StandardScaler()
    X_train_wpt_scaled = scaler.fit_transform(X_train_wpt_base)
    print("Scaler trained on WPT training data.")
    
    X_test_wpt_mit_scaled = scaler.transform(X_test_wpt_mit)
    X_test_wpt_custom_scaled = scaler.transform(X_test_custom_wpt)
    
    # Combine test sets
    X_test_wpt_final = np.concatenate((X_test_wpt_mit_scaled, X_test_wpt_custom_scaled), axis=0)
    X_test_raw_final = np.concatenate((X_test_raw_mit, X_test_custom_raw), axis=0)
    y_test_final = np.concatenate((y_test_mitbih, y_test_custom), axis=0)
    
    # --- [Step 4] Splitting & Hybrid Sampling for WPT data ---
    print("\n--- [Step 4] Finalizing Training Data (Split & Hybrid Sample) ---")
    X_train_wpt_fold, X_val_wpt_fold, y_train_fold, y_val_fold = train_test_split(
        X_train_wpt_scaled, y_train, test_size=0.2, random_state=42, stratify=y_train
    )

    # Also split the corresponding raw data for the CNN with the same indices
    X_train_raw_fold, X_val_raw_fold, _, _ = train_test_split(
        X_train_raw, y_train, test_size=0.2, random_state=42, stratify=y_train
    )

    print("Applying sampling to WPT training data...")
    print("Class distribution before sampling:", Counter(y_train_fold))
    sampler = SMOTE(random_state=42)
    X_train_wpt_resampled, y_train_wpt_resampled = sampler.fit_resample(X_train_wpt_fold, y_train_fold)
    print("Class distribution after sampling:", Counter(y_train_wpt_resampled))

    # --- [Step 5] Final Data Preparation for Models ---
    print("\n--- [Step 5] Preparing Final Datasets for Models ---")
    output_dim = len(np.unique(y_train))
    
    # Data for MLP
    X_train_mlp, y_train_mlp = X_train_wpt_resampled, to_categorical(y_train_wpt_resampled, num_classes=output_dim)
    X_val_mlp, y_val_mlp = X_val_wpt_fold, to_categorical(y_val_fold, num_classes=output_dim)
    X_test_mlp, y_test_mlp = X_test_wpt_final, to_categorical(y_test_final, num_classes=output_dim)

    # Data for 1D-CNN
    # **CORRECTION**: Use the raw window data, not the WPT features
    X_train_cnn, y_train_cnn = X_train_raw_fold, to_categorical(y_train_fold, num_classes=output_dim)
    X_val_cnn, y_val_cnn = X_val_raw_fold, to_categorical(y_val_fold, num_classes=output_dim)
    X_test_cnn, y_test_cnn = X_test_raw_final, to_categorical(y_test_final, num_classes=output_dim)

    # Data for RandomForest
    X_train_rf, y_train_rf = X_train_wpt_resampled, y_train_wpt_resampled
    X_val_rf, y_val_rf = X_val_wpt_fold, y_val_fold
    X_test_rf, y_test_rf = X_test_wpt_final, y_test_final

    print("\n" + "="*60)
    print("✅ DATA PREPARATION COMPLETE ✅")
    print(f"Shapes for MLP -> Train: {X_train_mlp.shape}, Val: {X_val_mlp.shape}, Test: {X_test_mlp.shape}")
    print(f"Shapes for 1D-CNN -> Train: {X_train_cnn.shape}, Val: {X_val_cnn.shape}, Test: {X_test_cnn.shape}")
    print(f"Shapes for RandomForest -> Train: {X_train_rf.shape}, Val: {X_val_rf.shape}, Test: {X_test_rf.shape}")
    print("="*60)

## **MACHINE LEARNING MODEL TRAINING & SAVING**

### MACHINE LEARNING MODEL FUNCTIONS

In [None]:
def create_mlp_model(input_dim, output_dim):
    """Creates and compiles a Keras MLP model."""
    model = Sequential([
        # Hyperparameters tuning
        Dense(512, input_dim=input_dim, activation='relu'),
        Dropout(0.1),
        Dense(512, activation='relu'),
        Dropout(0.4),
        Dense(output_dim, activation='softmax') # Softmax for multi-class classification
    ])
    model.compile(
        optimizer=Adam(learning_rate=0.0001),
        loss='categorical_crossentropy', # Suitable for one-hot labels
        metrics=[
            'accuracy',
            # tf.keras.metrics.Precision(name='precision'),
            # tf.keras.metrics.Recall(name='recall'),
            tf.keras.metrics.F1Score(average='weighted', name='f1_score'),
            tf.keras.metrics.SpecificityAtSensitivity(0.9, name='specificity')
        ]
    )
    return model
# 1st CNN model
def create_cnn_model(input_shape, output_dim):
    """Creates and compiles a Keras 1D-CNN model."""
    # Input shape for CNN must be 3D: (samples, steps, features)
    # Example: (10000, 187, 1)

    model = Sequential([
        Conv1D(filters=512, kernel_size=6, activation='relu', # Reduced filters
               input_shape=input_shape),
        Dropout(0.1),
        MaxPooling1D(pool_size=2),

        Conv1D(filters=512, kernel_size=3, activation='relu'), # Reduced filters
        Dropout(0.2),
        MaxPooling1D(pool_size=2),

        Flatten(), # Now flattens a much smaller tensor

        Dense(512, activation='relu'), # Reduced dense units
        Dropout(0.4),

        Dense(output_dim, activation='softmax')
    ])

    model.compile(
        optimizer=Adam(learning_rate=0.0001),
        loss='categorical_crossentropy',
        metrics=[
            'accuracy',
            tf.keras.metrics.Precision(name='precision'),
            tf.keras.metrics.Recall(name='recall'),
            tf.keras.metrics.F1Score(average='weighted', name='f1_score'),
            tf.keras.metrics.SpecificityAtSensitivity(0.9, name='specificity')
        ]
    )
    return model
# 1D-CNN optimized based on paper
def create_cnn_model_optimized(input_shape, output_dim, hp=None):
    """
    Creates and compiles an optimized 1D-CNN model.
    If 'hp' is provided, it builds a tunable model for KerasTuner.
    Otherwise, it builds a model with default hyperparameters.
    """
    # Define a default hyperparameter object if none is passed
    if hp is None:
        hp = kt.HyperParameters()
        # Set default values for when not tuning
        hp.values['conv4_filters'] = 100
        hp.values['dense_units'] = 256
        hp.values['learning_rate'] = 0.0001

    inputs = Input(shape=input_shape)
    x = Conv1D(filters=64, kernel_size=3, activation='relu', padding='valid', name='conv1d_1_freezed')(inputs)
    x = MaxPooling1D(pool_size=2, name='maxpool1d_1_freezed')(x)
    x = Conv1D(filters=64, kernel_size=3, activation='relu', padding='same', name='conv1d_2_freezed')(x)
    x = MaxPooling1D(pool_size=2, name='maxpool1d_2_freezed')(x)
    x = Conv1D(filters=4, kernel_size=3, activation='relu', padding='same', name='conv1d_3_freezed')(x)
    x = MaxPooling1D(pool_size=2, name='maxpool1d_3_freezed')(x)
    # ===============================================
    #           Trainable Layers
    # ===============================================
    x = Conv1D(filters=hp.values['conv4_filters'], kernel_size=3, activation='relu', padding='same', name='conv1d_4_trainable')(x)
    x = Flatten(name='flatten_layer')(x)
    x = Dense(units=hp.values['dense_units'], activation='relu', name='dense_1_trainable')(x)
    outputs = Dense(units=output_dim, activation='softmax', name='output_layer_trainable')(x)
    model = Model(inputs=inputs, outputs=outputs)
    
    model.compile(
        optimizer=Adam(learning_rate=hp.values['learning_rate']),
        loss='categorical_crossentropy',
        metrics=[
            'accuracy',
            tf.keras.metrics.Precision(name='precision'),
            tf.keras.metrics.Recall(name='recall'),
            tf.keras.metrics.F1Score(average='weighted', name='f1_score'),
            tf.keras.metrics.SpecificityAtSensitivity(0.9, name='specificity')
        ]
    )
    return model

# 1D-ResNet
# def create_cnn_model_optimized(input_shape, output_dim, hp=None):
#     """
#     Creates and compiles an optimized 1D-CNN model.
#     If 'hp' is provided, it builds a tunable model for KerasTuner.
#     Otherwise, it builds a model with default hyperparameters.
#     """
#     # Define a default hyperparameter object if none is passed
#     if hp is None:
#         hp = kt.HyperParameters()
#         # Set default values for when not tuning
#         hp.values['initial_filters'] = 384
#         hp.values['res_block_1_filters'] = 384
#         hp.values['res_block_2_filters'] = 384
#         hp.values['kernel_size_initial'] = 7
#         hp.values['kernel_size_res'] = 5
#         hp.values['dropout_1'] = 0.1
#         hp.values['dropout_2'] = 0.3
#         hp.values['dense_units'] = 512
#         hp.values['dense_dropout'] = 0.5
#         hp.values['learning_rate'] = 0.0001

#     def residual_block(x, filters, kernel_size):
#         y = Conv1D(filters=filters, kernel_size=kernel_size, padding='same')(x)
#         y = BatchNormalization()(y)
#         y = Activation('relu')(y)
#         y = Conv1D(filters=filters, kernel_size=kernel_size, padding='same')(y)
#         y = BatchNormalization()(y)
#         shortcut = Conv1D(filters=filters, kernel_size=1, padding='same')(x) if x.shape[-1] != filters else x
#         res_output = Add()([shortcut, y])
#         return Activation('relu')(res_output)

#     inputs = Input(shape=input_shape)
#     x = Conv1D(filters=hp.values['initial_filters'], kernel_size=hp.values['kernel_size_initial'], padding='same')(inputs)
#     x = BatchNormalization()(x)
#     x = Activation('relu')(x)
#     x = MaxPooling1D(pool_size=2)(x)
#     x = residual_block(x, filters=hp.values['res_block_1_filters'], kernel_size=hp.values['kernel_size_res'])
#     x = MaxPooling1D(pool_size=2)(x)
#     x = Dropout(hp.values['dropout_1'])(x)
#     x = residual_block(x, filters=hp.values['res_block_2_filters'], kernel_size=hp.values['kernel_size_res'])
#     x = MaxPooling1D(pool_size=2)(x)
#     x = Dropout(hp.values['dropout_2'])(x)
#     x = GlobalAveragePooling1D()(x)
#     x = Dense(hp.values['dense_units'], activation='relu')(x)
#     x = Dropout(hp.values['dense_dropout'])(x)
#     outputs = Dense(output_dim, activation='softmax')(x)
    
#     model = Model(inputs=inputs, outputs=outputs)
    
#     model.compile(
#         optimizer=Adam(learning_rate=hp.values['learning_rate']),
#         loss='categorical_crossentropy',
#         metrics=[
#             # 'accuracy',
#             tf.keras.metrics.Precision(name='precision'),
#             tf.keras.metrics.Recall(name='recall'),
#             # Note: F1Score might require a different setup in some TF versions.
#             # If it causes issues, consider a custom callback to calculate it.
#             tf.keras.metrics.F1Score(average='weighted', name='f1_score'),
#             tf.keras.metrics.SpecificityAtSensitivity(0.9, name='specificity')
#         ]
#     )
#     return model

# Function to create the RandomForest model
def create_rf_model():
    """Creates an instance of the GPU-accelerated RandomForestClassifier model using cuML."""
    # Hyperparameters are similar to imblearn's
    return RandomForestClassifier(
        n_estimators=200, 
        max_depth=30, 
        random_state=42
    )

### MACHINE LEARNING MODEL TRAINING EXECUTION
1. Targeted metrics: Precision, Recall, F1-Score, and Specificity
2. After training, all models immediately saved and exported into a certain folder

In [None]:
# Training Multiple Models: Single-fold validation
input_shape_cnn = (X_train_cnn.shape[1], X_train_cnn.shape[2])
input_dim = X_train_mlp.shape[1]
output_dim = y_train_mlp.shape[1]

# Saving/exporting models
output_dir = '../models'
os.makedirs(output_dir, exist_ok=True)

print("--- Manually Calculating Class Weights for Cost-Sensitive Learning ---")
# 1. Count the number of samples in each class using np.bincount.
# `y_train_fold` should be a 1D array of integer class labels (e.g., [0, 1, 1, 2, 0]).
class_counts = np.bincount(y_train_fold)
# 2. Get the total number of samples and classes.
n_samples = len(y_train_fold)
n_classes = len(class_counts)
# 3. Calculate the weight for each class using the standard formula.
manual_weights = n_samples / (n_classes * class_counts)
# 4. Create the dictionary that Keras expects, mapping class indices to weights.
class_weights_dict = {i: weight for i, weight in enumerate(manual_weights)}

print("Class Counts:", dict(enumerate(class_counts)))
print("Manually Calculated Weights:", class_weights_dict)

models = {
    "1D-CNN": create_cnn_model_optimized(input_shape_cnn, output_dim),
    "RandomForest": create_rf_model(),
    "MLP": create_mlp_model(input_dim, output_dim)
}

# Dictionary to store the final results
results = {}

# --- TRAINING AND EVALUATING EACH MODEL ---

for name, model in models.items():
    print(f"\n{'='*20} TRAINING MODEL: {name} {'='*20}")

    # 🧠 Training
    if name == "1D-CNN":
        model.fit(
            X_train_cnn, y_train_cnn,
            epochs=150, # Reduced for quick example
            batch_size=100,
            verbose=1, # Set to 0 to keep output clean
            validation_data=(X_val_cnn, y_val_cnn),
            class_weight=class_weights_dict
        )
    elif name == "MLP":
        model.fit(
            X_train_mlp, y_train_mlp,
            epochs=100, # Reduced for quick example
            batch_size=200,
            verbose=1,
            validation_data=(X_val_mlp, y_val_mlp),
            class_weight=class_weights_dict
        )
    else: # 📊 RandomForest
        model.fit(X_train_rf, y_train_rf)

    # ⚡ Prediction on the Test Set
    print(f"Evaluating model {name}...")
    if name in ["MLP", "1D-CNN"]:
        y_pred_raw = model.predict(X_test_mlp if name == "MLP" else X_test_cnn)
        y_pred = np.argmax(y_pred_raw, axis=1)
    else: # RandomForest
        y_pred = model.predict(X_test_rf)

    # Store prediction results and ground truth for final evaluation
    results[name] = {'y_pred': y_pred, 'y_true': y_test_final}

# --- PRINT ALL RESULTS SIMULTANEOUSLY ---
class_names = ['Normal (N)', 'Ventricular (V)', 'Supraventricular (S)', 'Fusion (F)']

print(f"\n{'='*25} FINAL EVALUATION RESULTS {'='*25}")

for name, result_data in results.items():
    y_true = result_data['y_true']
    y_pred = result_data['y_pred']

    print(f"\n\n{'~'*15} REPORT FOR MODEL: {name} {'~'*15}")
    
    # --- Classification Report ---
    print("\nClassification Report:")
    report = classification_report(y_true, y_pred, target_names=class_names, zero_division=0)
    print(report)

    # --- Confusion Matrix Visualization ---
    print("Confusion Matrix:")
    cm = confusion_matrix(y_true, y_pred)
    
    plt.figure(figsize=(8, 6))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
                xticklabels=class_names, yticklabels=class_names)
    plt.title(f'Confusion Matrix for {name}', fontsize=16)
    plt.ylabel('True Label', fontsize=12)
    plt.xlabel('Predicted Label', fontsize=12)
    plt.show()

    # SAVING MODEL AFTER TRAINING
    print(f"--- Saving model: {name} ---")
    if name in ["1D-CNN", "MLP"]:
        # TensorFlow/Keras models
        model_path = os.path.join(output_dir, f"model_{name.lower()}_saved")
        model.export(model_path) # Saving models
        print(f"✅ Model {name} has been saved on: {model_path}")
    else: #RandomForest/other scikit-learn models
        model_path = os.path.join(output_dir, f"model_{name.lower()}.joblib")
        joblib.dump(model, model_path) # Saving models
        print(f"✅ Model {name} has been saved on: {model_path}")

In [None]:
# Training Multiple Models: Ten-fold cross validation
# --- SETUP ---
input_shape_cnn = (X_cnn_full.shape[1], X_cnn_full.shape[2])
input_dim = X_mlp_full.shape[1]
output_dim = y_full_categorical.shape[1] # Assuming y is one-hot encoded for NN models

# Saving/exporting models
output_dir = '../models'
os.makedirs(output_dir, exist_ok=True)

# Dictionary to store cross-validation results (e.g., accuracies)
cv_results = {}
# Dictionary to store final predictions on the test set
final_results = {}
class_names = ['Normal (N)', 'Ventricular (V)', 'Supraventricular (S)', 'Fusion (F)']


# --- 10-FOLD CROSS-VALIDATION LOOP ---
n_splits = 10
skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=42)

# Define model creation functions in a dictionary
# This allows re-creating a fresh, untrained model for each fold
model_creators = {
    "1D-CNN": lambda: create_cnn_model_optimized(input_shape_cnn, output_dim),
    "RandomForest": lambda: create_rf_model(),
    "MLP": lambda: create_mlp_model(input_dim, output_dim)
}

# Use the non-categorical labels for splitting to ensure stratification
y_labels_for_split = np.argmax(y_full_categorical, axis=1)

for name, create_model in model_creators.items():
    print(f"\n{'='*25} RUNNING {n_splits}-FOLD CV FOR: {name} {'='*25}")
    
    fold_accuracies = []
    
    # Determine the correct dataset for splitting
    if name == "1D-CNN":
        X_data = X_cnn_full
    elif name == "MLP":
        X_data = X_mlp_full
    else: # RandomForest
        X_data = X_rf_full

    for fold, (train_index, val_index) in enumerate(skf.split(X_data, y_labels_for_split)):
        print(f"\n--- FOLD {fold + 1}/{n_splits} ---")

        # --- 1. Split Data for Current Fold ---
        X_train_fold, X_val_fold = X_data[train_index], X_data[val_index]
        
        # Handle labels for different model types
        if name in ["1D-CNN", "MLP"]:
            y_train_fold, y_val_fold = y_full_categorical[train_index], y_full_categorical[val_index]
            y_train_fold_labels = y_labels_for_split[train_index] # For class weights
        else: # RandomForest uses 1D labels
            y_train_fold, y_val_fold = y_labels_for_split[train_index], y_labels_for_split[val_index]

        # --- 2. Calculate Class Weights for this Fold's Training Data ---
        class_counts = np.bincount(y_train_fold_labels if name in ["1D-CNN", "MLP"] else y_train_fold)
        n_samples = len(y_train_fold)
        n_classes = len(class_counts)
        manual_weights = n_samples / (n_classes * class_counts)
        class_weights_dict = {i: weight for i, weight in enumerate(manual_weights)}
        print("Fold Class Weights:", class_weights_dict)

        # --- 3. Create and Train a New Model Instance ---
        model = create_model()

        if name == "1D-CNN":
            model.fit(
                X_train_fold, y_train_fold,
                epochs=150, batch_size=100, verbose=0,
                validation_data=(X_val_fold, y_val_fold),
                class_weight=class_weights_dict
            )
        elif name == "MLP":
            model.fit(
                X_train_fold, y_train_fold,
                epochs=100, batch_size=200, verbose=0,
                validation_data=(X_val_fold, y_val_fold),
                class_weight=class_weights_dict
            )
        else: # RandomForest
            model.fit(X_train_fold, y_train_fold)

        # --- 4. Evaluate on the Validation Set for this Fold ---
        if name in ["1D-CNN", "MLP"]:
            y_pred_raw = model.predict(X_val_fold)
            y_pred_fold = np.argmax(y_pred_raw, axis=1)
            y_true_fold = np.argmax(y_val_fold, axis=1)
        else: # RandomForest
            y_pred_fold = model.predict(X_val_fold)
            y_true_fold = y_val_fold
            
        acc = accuracy_score(y_true_fold, y_pred_fold)
        fold_accuracies.append(acc)
        print(f"Fold {fold + 1} Validation Accuracy: {acc:.4f}")

    # --- 5. Calculate and Store Average CV Performance ---
    mean_accuracy = np.mean(fold_accuracies)
    std_accuracy = np.std(fold_accuracies)
    cv_results[name] = {'mean_accuracy': mean_accuracy, 'std_accuracy': std_accuracy}
    print(f"\nAverage CV Accuracy for {name}: {mean_accuracy:.4f} (+/- {std_accuracy:.4f})")


# --- FINAL TRAINING & EVALUATION ON TEST SET ---
print(f"\n\n{'='*25} FINAL MODEL TRAINING & EVALUATION {'='*25}")

for name, create_model in model_creators.items():
    print(f"\n{'~'*20} TRAINING FINAL MODEL: {name} ON ALL DATA {'~'*20}")

    # --- 1. Create Final Model and Train on the FULL Training Dataset ---
    final_model = create_model()
    
    # Recalculate class weights on the full dataset
    full_class_counts = np.bincount(y_labels_for_split)
    full_n_samples = len(y_labels_for_split)
    full_n_classes = len(full_class_counts)
    full_manual_weights = full_n_samples / (full_n_classes * full_class_counts)
    full_class_weights_dict = {i: weight for i, weight in enumerate(full_manual_weights)}
    print("Final Model Class Weights:", full_class_weights_dict)

    if name == "1D-CNN":
        final_model.fit(
            X_cnn_full, y_full_categorical,
            epochs=150, batch_size=100, verbose=1,
            class_weight=full_class_weights_dict
        )
    elif name == "MLP":
        final_model.fit(
            X_mlp_full, y_full_categorical,
            epochs=100, batch_size=200, verbose=1,
            class_weight=full_class_weights_dict
        )
    else: # RandomForest
        final_model.fit(X_rf_full, y_labels_for_split)

    # --- 2. Evaluate on the Hold-Out Test Set ---
    print(f"Evaluating final {name} model on the test set...")
    if name in ["MLP", "1D-CNN"]:
        X_test_data = X_test_mlp if name == "MLP" else X_test_cnn
        y_pred_raw = final_model.predict(X_test_data)
        y_pred = np.argmax(y_pred_raw, axis=1)
    else: # RandomForest
        y_pred = final_model.predict(X_test_rf)
    
    # Store results for final report
    final_results[name] = {'y_pred': y_pred, 'y_true': y_test_final}

    # --- 3. Save the Final Model ---
    print(f"--- Saving final model: {name} ---")
    if name in ["1D-CNN", "MLP"]:
        model_path = os.path.join(output_dir, f"final_model_{name.lower()}_saved")
        final_model.save(model_path)
    else:
        model_path = os.path.join(output_dir, f"final_model_{name.lower()}.joblib")
        joblib.dump(final_model, model_path)
    print(f"✅ Final model {name} saved to: {model_path}")

# --- PRINT ALL FINAL RESULTS ---

print(f"\n\n{'='*25} CROSS-VALIDATION SUMMARY {'='*25}")
for name, metrics in cv_results.items():
    print(f"{name}: Mean Accuracy = {metrics['mean_accuracy']:.4f} (Std Dev = {metrics['std_accuracy']:.4f})")

print(f"\n\n{'='*25} FINAL TEST SET EVALUATION REPORTS {'='*25}")
for name, result_data in final_results.items():
    y_true = result_data['y_true']
    y_pred = result_data['y_pred']

    print(f"\n\n{'~'*15} REPORT FOR FINAL MODEL: {name} {'~'*15}")
    
    # --- Classification Report ---
    print("\nClassification Report:")
    report = classification_report(y_true, y_pred, target_names=class_names, zero_division=0)
    print(report)

    # --- Confusion Matrix Visualization ---
    print("Confusion Matrix:")
    cm = confusion_matrix(y_true, y_pred)
    
    plt.figure(figsize=(8, 6))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
                xticklabels=class_names, yticklabels=class_names)
    plt.title(f'Final Confusion Matrix for {name}', fontsize=16)
    plt.ylabel('True Label', fontsize=12)
    plt.xlabel('Predicted Label', fontsize=12)
    plt.show()