In [2]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import (Conv1D, BatchNormalization, LSTM, GRU, Dense, Dropout, Flatten,
                                     Bidirectional, GlobalAveragePooling1D, Input, LayerNormalization, MultiHeadAttention)
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import classification_report, confusion_matrix

In [3]:
class ECGClassifier:
    def __init__(self, normal_csv, abnormal_csv, test_size=0.15, val_size=0.15, random_state=42):
        self.normal_csv = normal_csv
        self.abnormal_csv = abnormal_csv
        self.test_size = test_size
        self.val_size = val_size
        self.random_state = random_state
        self.model = None
        self.load_and_preprocess_data()

    def load_and_preprocess_data(self):
        """Loads the PTBDB dataset, normalizes ECG signals, and splits into train, validation, and test sets."""
        
        # Load the datasets
        df_normal = pd.read_csv(self.normal_csv, header=None)
        df_abnormal = pd.read_csv(self.abnormal_csv, header=None)
        
        # Combine normal and abnormal ECGs
        df = pd.concat([df_normal, df_abnormal], axis=0)
        
        # Features and labels
        X = df.iloc[:, :-1].values  # All columns except the last one
        y = df.iloc[:, -1].values   # The last column is the label

        # Split into train+val and test sets
        X_train_val, X_test, y_train_val, y_test = train_test_split(
            X, y, test_size=self.test_size, random_state=self.random_state, stratify=y
        )

        # Split train+val into train and validation sets
        val_ratio = self.val_size / (1 - self.test_size)  # Adjust val_ratio according to the remaining data
        X_train, X_val, y_train, y_val = train_test_split(
            X_train_val, y_train_val, test_size=val_ratio, random_state=self.random_state, stratify=y_train_val
        )

        # Normalize the training features
        scaler = StandardScaler()
        X_train = scaler.fit_transform(X_train)  # Fit on training data and transform

        # Transform validation and test data using the same scaler
        X_val = scaler.transform(X_val)
        X_test = scaler.transform(X_test)

        # Reshape the data for LSTM input (samples, timesteps, features)
        X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
        X_val = X_val.reshape(X_val.shape[0], X_val.shape[1], 1)
        X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)

        # Assign to instance variables
        self.X_train, self.X_val, self.X_test = X_train, X_val, X_test
        self.y_train, self.y_val, self.y_test = y_train, y_val, y_test
        

    def build_model(self):
        """Builds an advanced CNN + BiLSTM + BiGRU model."""
        inputs = Input(shape=(self.X_train.shape[1], 1))

        x = Conv1D(filters=8, kernel_size=3, activation='relu', padding='same')(inputs)
        x = BatchNormalization()(x)

        x = Bidirectional(LSTM(128, return_sequences=True, dropout=0.2, recurrent_dropout=0.2))(x)
        x = Bidirectional(LSTM(128, return_sequences=True, dropout=0.25, recurrent_dropout=0.2))(x)
        x = Bidirectional(LSTM(64, return_sequences=True, dropout=0.3, recurrent_dropout=0.2))(x)
        x = Bidirectional(LSTM(64, return_sequences=True, dropout=0.3, recurrent_dropout=0.2))(x)

        x = Flatten()(x)
        
        x = Dense(128, activation='relu')(x)
        x = Dropout(0.2)(x)
        x = Dense(64, activation='relu')(x)
        x = Dropout(0.2)(x)
        outputs = Dense(1, activation='sigmoid')(x)  

        self.model = Model(inputs, outputs)
        self.model.summary()

        optimizer = Adam(learning_rate=1e-3)  
        self.model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])

    def train_model(self, epochs=150, batch_size=64):
        if self.model is None:
            raise ValueError("Model has not been built. Call build_model() first.")

        callbacks = [
            tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True),
            tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, min_lr=1e-4)
        ]

        self.history = self.model.fit(
            self.X_train, self.y_train,
            epochs=epochs, batch_size=batch_size,
            validation_data=(self.X_val, self.y_val),
            callbacks=callbacks
        )

    def evaluate_model(self):
        """Evaluates the trained model on the test dataset and prints classification results."""
        if self.model is None:
            raise ValueError("Model has not been trained. Call train_model() first.")

        test_loss, test_acc = self.model.evaluate(self.X_test, self.y_test)
        print(f"\nTest Accuracy: {test_acc * 100:.2f}%")

        y_pred = (self.model.predict(self.X_test) > 0.5).astype("int32")

        print("\nClassification Report:")
        print(classification_report(self.y_test, y_pred, digits=4))

        print("\nConfusion Matrix:")
        confusion_matrix(self.y_test, y_pred)

        return test_loss, test_acc

In [4]:
if __name__ == "__main__":
    classifier = ECGClassifier(normal_csv="/kaggle/input/ptbdb-dataset/ptbdb_normal.csv",
                               abnormal_csv="/kaggle/input/ptbdb-dataset/ptbdb_abnormal.csv")
    classifier.build_model()
    classifier.train_model(epochs=30, batch_size=128)
    classifier.evaluate_model()

Epoch 1/30
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m174s[0m 2s/step - accuracy: 0.7351 - loss: 0.4994 - val_accuracy: 0.7508 - val_loss: 0.4309 - learning_rate: 0.0010
Epoch 2/30
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m156s[0m 2s/step - accuracy: 0.8556 - loss: 0.3204 - val_accuracy: 0.7448 - val_loss: 0.5038 - learning_rate: 0.0010
Epoch 3/30
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m155s[0m 2s/step - accuracy: 0.8987 - loss: 0.2383 - val_accuracy: 0.7989 - val_loss: 0.3775 - learning_rate: 0.0010
Epoch 4/30
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m155s[0m 2s/step - accuracy: 0.9259 - loss: 0.1871 - val_accuracy: 0.8360 - val_loss: 0.3326 - learning_rate: 0.0010
Epoch 5/30
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m155s[0m 2s/step - accuracy: 0.9430 - loss: 0.1509 - val_accuracy: 0.9139 - val_loss: 0.1813 - learning_rate: 0.0010
Epoch 6/30
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m155s[0