In [1]:
import numpy as np
import pandas as pd
import wfdb
import os
import scipy.signal as sgn
from tqdm import tqdm
from keras import models, layers, optimizers, regularizers
from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau, EarlyStopping
from sklearn.utils.class_weight import compute_class_weight
from scipy.signal import cheby1, filtfilt




In [2]:
# File paths
train_files = [f"a{str(i).zfill(2)}" for i in range(1, 21)] + [f"b{str(i).zfill(2)}" for i in range(1, 6)] + [f"c{str(i).zfill(2)}" for i in range(1, 11)]
test_files = [f"x{str(i).zfill(2)}" for i in range(1, 36)]
base_path = "apnea-ecg/1.0.0/"
train_paths = [os.path.join(base_path, file) for file in train_files]
test_paths = [os.path.join(base_path, file) for file in test_files]


In [13]:
# Chebyshev Filter
def apply_chebyshev_filter(signal, lowcut=0.5, highcut=40, fs=100, order=4):
    nyquist = 0.5 * fs
    low = lowcut / nyquist
    high = highcut / nyquist
    b, a = cheby1(order, 0.5, [low, high], btype='band')
    return filtfilt(b, a, signal)

# Function to load and preprocess the ECG signal
def load_ecg_and_segment(file, segment_duration=10, fs=100):
    if not os.path.exists(f"{file}.hea") or not os.path.exists(f"{file}.dat"):
        print(f"File not found: {file}.hea or {file}.dat")
        return [], []
    
    record = wfdb.rdrecord(file)
    annotation = wfdb.rdann(file, 'apn')

    signal = record.p_signal[:, 0]
    filtered_signal = apply_chebyshev_filter(signal)

    segments = []
    labels = []
    samples_per_segment = segment_duration * fs

    for i, samp in enumerate(annotation.sample):
        start = max(samp - samples_per_segment // 2, 0)
        end = start + samples_per_segment
        if end > len(filtered_signal):
            break
        segment = filtered_signal[start:end]
        segments.append(segment)
        labels.append(1 if annotation.symbol[i] == 'A' else 0)
        
    return np.array(segments), np.array(labels)

# Load and prepare the full dataset
def prepare_data(paths):
    data, labels = [], []
    for path in tqdm(paths):
        segments, seg_labels = load_ecg_and_segment(path)
        if segments.size > 0:  
            data.extend(segments)
            labels.extend(seg_labels)
    return np.array(data), np.array(labels)

In [14]:
from sklearn.utils import shuffle

# Load training and validation data, and shuffle
X_train, y_train = prepare_data(train_paths)
X_test, y_test = prepare_data(test_paths)

# Filter for balanced classes (6550 samples each for labels 0 and 1)
label_0_indices = np.where(y_train == 0)[0][:6550]
label_1_indices = np.where(y_train == 1)[0][:6550]
balanced_indices = np.concatenate([label_0_indices, label_1_indices])
X_train, y_train = X_train[balanced_indices], y_train[balanced_indices]

# Shuffle the balanced data
X_train, y_train = shuffle(X_train, y_train, random_state=42)

# Print the number of segments loaded for verification
print(f"Training segments: {len(X_train)}, Test segments: {len(X_test)}")

# Reshape data for model input format
X_train = X_train[..., np.newaxis]  # Adding channel dimension
X_test = X_test[..., np.newaxis]


print("Training data shape:", X_train.shape)
print("Training labels shape:", y_train.shape)
print("Test data shape:", X_test.shape)
print("Test labels shape:", y_test.shape)

100%|██████████████████████████████████████████████████████████████████████████████████| 35/35 [00:03<00:00, 10.13it/s]
100%|██████████████████████████████████████████████████████████████████████████████████| 35/35 [00:03<00:00,  9.86it/s]

Training segments: 13064, Test segments: 17268
Training data shape: (13064, 1000, 1)
Training labels shape: (13064,)
Test data shape: (17268, 1000, 1)
Test labels shape: (17268,)





In [19]:
from sklearn.model_selection import StratifiedKFold

# Splitting the training data into train and validation sets with StratifiedKFold
strat_kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
train_indices, val_indices = next(strat_kfold.split(X_train, y_train))

# Creating training and validation sets
X_ctrain, X_val = X_train[train_indices], X_train[val_indices]
y_ctrain, y_val = y_train[train_indices], y_train[val_indices]

# Print shapes of the resulting sets for verification
print("Train data shape:", X_ctrain.shape)
print("Validation data shape:", X_val.shape)
print("Train labels shape:", y_ctrain.shape)
print("Validation labels shape:", y_val.shape)

Train data shape: (10451, 1000, 1)
Validation data shape: (2613, 1000, 1)
Train labels shape: (10451,)
Validation labels shape: (2613,)


In [25]:
from tensorflow.keras import models, layers, regularizers, optimizers

def build_cnn_lstm_model(input_shape):
    input_layer = layers.Input(shape=input_shape)
    x_bn = layers.BatchNormalization()(input_layer)
    
    branch1 = layers.Conv1D(24, kernel_size=125, strides=1, activation='relu', padding='same')(x_bn)
    branch1 = layers.MaxPooling1D(pool_size=2, strides=1, padding='same')(branch1)
    
    branch2 = layers.Conv1D(24, kernel_size=15, strides=1, activation='relu', padding='same')(x_bn)
    branch2 = layers.MaxPooling1D(pool_size=2, strides=1, padding='same')(branch2)
    
    branch3 = layers.Conv1D(24, kernel_size=5, strides=1, activation='relu', padding='same')(x_bn)
    branch3 = layers.MaxPooling1D(pool_size=2, strides=1, padding='same')(branch3)
    
    concatenated = layers.Concatenate(axis=-1)([branch1, branch2, branch3])
    x = layers.MaxPooling1D(pool_size=3, strides=1, padding='same')(concatenated)
    
    conv_adjusted = layers.Conv1D(24, kernel_size=3, strides=1, activation='relu', padding='same', kernel_regularizer=regularizers.l2(0.01))(x)
    x = layers.BatchNormalization()(conv_adjusted)
    x = layers.Add()([x, conv_adjusted])
    
    x = layers.Dense(48, activation='leaky_relu', kernel_regularizer=regularizers.l2(0.01))(x)
    x = layers.Dropout(0.5)(x)
    
    x = layers.GlobalAveragePooling1D()(x)
    x = layers.Reshape((1, -1))(x)
    x = layers.LSTM(64)(x)
    
    output_layer = layers.Dense(2, activation='softmax')(x)
    model = models.Model(inputs=input_layer, outputs=output_layer)
    return model

In [26]:

# Compile the model with specified learning rate
input_shape = (1000, 1)  # Adjust as per 10-second segments at 100 Hz sampling rate
model = build_cnn_lstm_model(input_shape)
model.compile(optimizer=optimizers.Adam(learning_rate=0.001), 
              loss='sparse_categorical_crossentropy', 
              metrics=['accuracy'])

# Model summary for verification
model.summary()


Model: "model_3"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_4 (InputLayer)        [(None, 1000, 1)]            0         []                            
                                                                                                  
 batch_normalization_4 (Bat  (None, 1000, 1)              4         ['input_4[0][0]']             
 chNormalization)                                                                                 
                                                                                                  
 conv1d_12 (Conv1D)          (None, 1000, 24)             3024      ['batch_normalization_4[0][0]'
                                                                    ]                             
                                                                                            

In [27]:
# Callbacks
checkpoint_path = "best_model.h5"
checkpoint = ModelCheckpoint(filepath=checkpoint_path, monitor='val_loss', save_best_only=True, mode='min', verbose=1)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, min_lr=1e-6, verbose=1)
early_stopping = EarlyStopping(monitor='val_loss', patience=7, restore_best_weights=True)

# Class weights
class_weights = compute_class_weight(class_weight='balanced', classes=[0, 1], y=y_ctrain)
class_weights_dict = {0: class_weights[0], 1: class_weights[1]}

# Train the model
history = model.fit(X_ctrain, y_ctrain, 
                    validation_data=(X_val, y_val), 
                    epochs=30, 
                    batch_size=64,
                    class_weight=class_weights_dict,
                    callbacks=[checkpoint, reduce_lr, early_stopping]
                   )

Epoch 1/30
Epoch 1: val_loss improved from inf to 0.74089, saving model to C:/Users/abbas/BAU/11Fall 2024/FYP2/apnea-ecg/1.0.0/Model_CheckPoint_draft_3\best_model.h5
Epoch 2/30


  saving_api.save_model(


Epoch 2: val_loss improved from 0.74089 to 0.62962, saving model to C:/Users/abbas/BAU/11Fall 2024/FYP2/apnea-ecg/1.0.0/Model_CheckPoint_draft_3\best_model.h5
Epoch 3/30
Epoch 3: val_loss improved from 0.62962 to 0.56244, saving model to C:/Users/abbas/BAU/11Fall 2024/FYP2/apnea-ecg/1.0.0/Model_CheckPoint_draft_3\best_model.h5
Epoch 4/30
Epoch 4: val_loss improved from 0.56244 to 0.50084, saving model to C:/Users/abbas/BAU/11Fall 2024/FYP2/apnea-ecg/1.0.0/Model_CheckPoint_draft_3\best_model.h5
Epoch 5/30
Epoch 5: val_loss did not improve from 0.50084
Epoch 6/30
Epoch 6: val_loss did not improve from 0.50084
Epoch 7/30
Epoch 7: val_loss did not improve from 0.50084

Epoch 7: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
Epoch 8/30
Epoch 8: val_loss improved from 0.50084 to 0.45194, saving model to C:/Users/abbas/BAU/11Fall 2024/FYP2/apnea-ecg/1.0.0/Model_CheckPoint_draft_3\best_model.h5
Epoch 9/30
Epoch 9: val_loss did not improve from 0.45194
Epoch 10/30
Epoch 10: 

In [28]:
from sklearn.metrics import confusion_matrix, cohen_kappa_score

# Evaluate on the test set
test_loss, test_accuracy = model.evaluate(X_test, y_test, verbose=0)
print(f"Test Accuracy: {test_accuracy * 100:.2f}%")

# Predict on the test set
y_test_pred = np.argmax(model.predict(X_test), axis=1)

# Compute confusion matrix and evaluation metrics
tn, fp, fn, tp = confusion_matrix(y_test, y_test_pred).ravel()
TAC = (tp + tn) / (tp + tn + fp + fn) * 100
sensitivity = tp / (tp + fn) * 100
specificity = tn / (tn + fp) * 100
PPV = tp / (tp + fp) * 100
NPV = tn / (tn + fn) * 100
kappa = cohen_kappa_score(y_test, y_test_pred)

# Display metrics
print(f"Total Accuracy (TAC): {TAC:.2f}%")
print(f"Sensitivity (SE): {sensitivity:.2f}%")
print(f"Specificity (SP): {specificity:.2f}%")
print(f"Positive Predictive Value (PPV): {PPV:.2f}%")
print(f"Negative Predictive Value (NPV): {NPV:.2f}%")


Test Accuracy: 57.68%
Total Accuracy (TAC): 57.68%
Sensitivity (SE): 76.69%
Specificity (SP): 46.06%
Positive Predictive Value (PPV): 46.49%
Negative Predictive Value (NPV): 76.38%
