# Task A: Deep Learning for ECG Heartbeat Classification

In this notebook, we evaluate all trained models on unseen data from 'mitbih_test.csv'. We first do a comparison of basic models and then compare the performance of hybrid models trained on original and augmented datasets to see if data augmentation can help our models to achieve better classification accuracy.

In [None]:
import torch
print("PyTorch version:", torch.__version__)

# Check if CUDA is available
print("CUDA available:", torch.cuda.is_available())

# Get current CUDA device index (if available)
if torch.cuda.is_available():
    print("Current CUDA device index:", torch.cuda.current_device())
    print("CUDA device name:", torch.cuda.get_device_name(torch.cuda.current_device()))
else:
    print("No CUDA devices found.")

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
import os

import pandas as pd
import numpy as np
from torch import nn, optim
import torch.nn.functional as F
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from torch.utils.data import DataLoader, TensorDataset
import torch.nn.functional as F

from common_utils import get_dataloader, set_and_get_seed

from sklearn.metrics import classification_report, confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt
import seaborn as sns

SEED = set_and_get_seed()

In [None]:
test_df = pd.read_csv("Heartbeat_Dataset/mitbih_test.csv", header=None)
print(test_df.shape)

In [None]:
test_df.head()

# Evaluation of basic models

In this section, we evaluate 3 basic models, namely GRU, CNN and Transformer.

In [None]:
labels = {
    0.0: "N",
    1.0: "S",
    2.0: "V",
    3.0: "F",
    4.0: "Q"
}

test_df.iloc[:, -1] = test_df.iloc[:, -1].replace(labels)

In [None]:
x_data = test_df.iloc[:, :-1]
y_label = test_df.iloc[:, -1]

In [None]:
y_label.value_counts()

In [None]:
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y_label) 

X = x_data
X_test = np.expand_dims(X, axis=1)  
print(X_test.shape) 
X_test_tensor = torch.tensor(X_test).float()    
y_test_tensor = torch.tensor(y).long()        

test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

## CNN model

In [None]:
from common_utils import CNN1D

# Load model
model_path = "./model/cnn_model.pth"
num_classes = len(label_encoder.classes_)
loaded_CNNmodel = CNN1D(num_classes).to(DEVICE)  
loaded_CNNmodel.load_state_dict(torch.load(model_path, weights_only=True))
print("Model loaded successfully.")

In [None]:
from common_utils import evaluateCNN_model

# Evaluate
test_accuracy = evaluateCNN_model(x_data, y_label, num_classes, loaded_CNNmodel, batch_size=32, device=DEVICE)

## Transformer model

In [None]:
# Loading transformer modules and constants
from pytorch_lightning import Trainer
from pytorch_lightning.callbacks import ModelCheckpoint

batch_size = 128
num_workers = 0

input_size = 200
num_classes = 5
num_heads = 5
depth = 6
max_epochs = 22
lr = 1e-4
dropout = 0.0

In [None]:
path = "./heartbeat_Dataset"
from transformer_eval import LitTransformer, LitMITBIH
Transformer_model = LitTransformer(input_size, num_classes, num_heads, depth, max_epochs, lr, dropout)
datamodule = LitMITBIH(path, batch_size, num_workers, length=input_size)
datamodule.setup()

In [None]:
save_path = "./model/"
ckpt_name = "ecg-transformer"

model_checkpoint = ModelCheckpoint(
    dirpath=os.path.join(save_path, "checkpoints"),
    filename=ckpt_name,
    save_top_k=1,
    verbose=True,
    monitor='val_acc',
    # monitor='test_acc',
    mode='max',
)

trainer = Trainer(accelerator="gpu",
                  devices=1,
                  max_epochs=max_epochs,
                  logger=False,
                  callbacks=[model_checkpoint]
                )

print(f"Loading checkpoint: {ckpt_name}.ckpt")
Transformer_model = Transformer_model.load_from_checkpoint(
    os.path.join(save_path, "checkpoints", ckpt_name+".ckpt")
)

trainer.test(Transformer_model, datamodule=datamodule)

## RNN model

In [None]:
from common_utils import GRUModel

model_path = "./model/gru.pth"
inputSize = 1
hiddenSize = 64
numClasses = 5
numLayers = 1

# Loss
criterion = nn.CrossEntropyLoss()

# Load model
rnn_model = GRUModel(inputSize, hiddenSize, numLayers, numClasses).to(DEVICE)
rnn_model.load_state_dict(torch.load(model_path, weights_only=True))
print("Model loaded successfully.")

reverse_labels = {v: k for k, v in labels.items()}

# Test dataset for RNN
RNN_X_test = test_df.iloc[:, :-1].values  
RNN_y_test = test_df.iloc[:, -1].replace(reverse_labels).astype(float).values  # Convert labels back to numbers

RNN_X_test = np.reshape(RNN_X_test, (RNN_X_test.shape[0], RNN_X_test.shape[1], 1))
print(RNN_X_test.shape)

RNN_test_loader = get_dataloader(RNN_X_test, RNN_y_test, False)

test_loss = 0.0
correct = 0.0
y_pred, y_true = [], []

# Get test results
with torch.no_grad():  # No need to calculate gradients for validation
    for X_batch, y_batch in RNN_test_loader:
        X_batch, y_batch = X_batch.to(DEVICE), y_batch.to(DEVICE)
        outputs = rnn_model(X_batch)
        loss = criterion(outputs, y_batch.long())
        test_loss += loss.item() * X_batch.size(0)
        
        # Get predictions
        _, predictions = torch.max(outputs, 1)
        correct += (predictions == y_batch).sum().item()

        y_pred.append(predictions.float().detach().cpu())
        y_true.append(y_batch.cpu())

# Calculate average validation loss and accuracy
test_loss /= len(RNN_test_loader.dataset)
test_acc = correct / len(RNN_test_loader.dataset)

preds = torch.cat(y_pred).numpy()
truths = torch.cat(y_true).numpy()

# Get classification report and confusion matrix
classes = ['N', 'S', 'V', 'F', 'Q']
labels = [0.0, 1.0, 2.0, 3.0, 4.0]
report = pd.DataFrame(classification_report(truths, preds, labels=labels, target_names=classes, output_dict=True)).transpose()
cm = confusion_matrix(truths, preds)

# Print test loss and accuracy
print(f'Test loss: {test_loss:.4f}, Test accuracy: {test_acc * 100:.2f}%')

In [None]:
report

In [None]:
# Display confusion matrix
disp = ConfusionMatrixDisplay(cm)
disp.plot()

These are the results obtained from the different models
| Model        | Test accuracy |
|--------------|---------------|
| CNN          | 0.9823        |
| Transformers | 0.9779        |
| RNN          | 0.8279        |
| RNN(LSTM)    | 0.8278        |
| RNN(GRU)     | 0.9740        |

# Hybrid models

Based on the performance of individual models, we decided to explore hybrid architectures to leverage the strengths of different models. Consequently, we developed Transformer-CNN and CNN-GRU hybrid models. In the following section, we will evaluate the performance of the hybrid models which have been trained on the original and augmented datasets.

## CNN-Transformer Model

In [None]:
from common_utils import cnn_transformer_evaluate
from CNN_Transformer_hybrid import CNNTransformerHybrid

### Evaluation of model trained on original train data

In [None]:
model_path = "./model/cnn_transformer_model.pth"

CNN_transformer_model = CNNTransformerHybrid(
    input_dim=187, 
    num_classes=5,  
    num_heads=8, 
    num_layers=6  
).to(DEVICE)

CNN_transformer_model.load_state_dict(torch.load(model_path, weights_only=True))
print("Model loaded successfully.")

In [None]:
criterion = torch.nn.CrossEntropyLoss()  

epoch_loss, epoch_acc, all_preds, all_labels = cnn_transformer_evaluate(CNN_transformer_model, test_loader, criterion, DEVICE)
print(f"Loading model from: {model_path}")
print(f"Test Loss: {epoch_loss:.4f} | Test Accuracy: {epoch_acc:.4f}")

### Evaluation of model trained on signal transformation data

In [None]:
model_path = "./model/cnn_transformer_model_augment.pth"

CNN_transformer_model.load_state_dict(torch.load(model_path, weights_only=True))
print("Model loaded successfully.")

cnn_transformer_augment_epoch_loss, cnn_transformer_augment_epoch_acc, cnn_transformer_augment_all_preds, cnn_transformer_augment_all_labels = cnn_transformer_evaluate(CNN_transformer_model, test_loader, criterion, DEVICE)
print(f"Loading model from: {model_path}")
print(f"Test Loss: {cnn_transformer_augment_epoch_loss:.4f} | Test Accuracy: {cnn_transformer_augment_epoch_acc:.4f}")

### Evaluation of model trained on SMOTE train data

In [None]:
model_path = "./model/cnn_transformer_model_smote.pth"

CNN_transformer_model.load_state_dict(torch.load(model_path, weights_only=True))
print("Model loaded successfully.")

cnn_transformer_smote_epoch_loss, cnn_transformer_smote_epoch_acc, cnn_transformer_smote_all_preds, cnn_transformer_smote_all_labels = cnn_transformer_evaluate(CNN_transformer_model, test_loader, criterion, DEVICE)
print(f"Loading model from: {model_path}")
print(f"Test Loss: {cnn_transformer_smote_epoch_loss:.4f} | Test Accuracy: {cnn_transformer_smote_epoch_acc:.4f}")


### Observations about the best performing CNN-Transformers model

In [None]:
cm = confusion_matrix(cnn_transformer_augment_all_labels, cnn_transformer_augment_all_preds, labels=[1, 3, 4, 0, 2])

# Plot confusion matrix
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=['N', 'S', 'V', 'F', 'Q'])
disp.plot( values_format="d")
plt.title("Confusion Matrix")
plt.show()

In [None]:
# preds, truths = test(CNN_transformer_model, test_loader, device)
# report = classification_report(truths, preds, labels=[0.0, 1.0, 2.0, 3.0, 4.0], target_names=['N', 'S', 'V', 'S', 'Q'], output_dict=True)
# conf_matrix = confusion_matrix(truths, preds, labels=[0.0, 1.0, 2.0, 3.0, 4.0])

report = classification_report(cnn_transformer_augment_all_labels, cnn_transformer_augment_all_preds, labels=[1, 3, 4, 0, 2], target_names=['N', 'S', 'V', 'F', 'Q'], output_dict=True)

# Convert the report dictionary to a DataFrame and display
report_df = pd.DataFrame(report).transpose()
report_df

| Model           | Dataset                | Test Loss | Test Accuracy |
| -------------   | ---------------------- | --------- | ------------- |
| CNN-Transformer | Original               | 0.0666    | 98.50%        |
|                 | SMOTE                  | 0.0920    | 98.28%        |
|                 | Signal Transformation  | 0.0671    | 98.55%        |

## CNN-GRU model

### Evaluation of model trained on original train data

In [None]:
def test(model, test_loader, device=DEVICE):
    model.to(device)
    model.eval()

    criterion = nn.CrossEntropyLoss()

    test_loss = 0.0
    correct = 0
    y_pred, y_true = [], []

    with torch.no_grad():  # No need to calculate gradients for validation/testing
        for X_batch, y_batch in test_loader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            outputs = model(X_batch)
            loss = criterion(outputs, y_batch.long())
            test_loss += loss.item() * X_batch.size(0)

            _, predictions = torch.max(outputs, 1)
            correct += (predictions == y_batch).sum().item()

            y_pred.append(predictions.float().detach().cpu())
            y_true.append(y_batch.cpu())

    # Calculate average validation loss and accuracy
    test_loss /= len(test_loader.dataset)
    test_acc = correct / len(test_loader.dataset)
    preds = torch.cat(y_pred).numpy()
    truths = torch.cat(y_true).numpy()

    return preds, truths, test_loss, test_acc

In [None]:
# Load test data
test_data = pd.read_csv('Heartbeat_Dataset/mitbih_test.csv', header=None)
X_test = test_data.iloc[:, :-1].values
y_test = test_data.iloc[:, -1].values

X_test = np.expand_dims(X_test, axis=1)  # Change axis from 2 to 1

test_loader = get_dataloader(X_test, y_test, is_train=False)

In [None]:
from common_utils import CNN_GRU

# Load model
cnn_gru_model = CNN_GRU()
model_path = './model/cnn_gru_original.pth'
cnn_gru_model.load_state_dict(torch.load(model_path, weights_only=True))

# Test model
truths, preds, test_loss, test_acc = test(cnn_gru_model, test_loader)

# Get classification report and confusion matrix
classes_list = ['N', 'S', 'V', 'F', 'Q']
labels_list = [0.0, 1.0, 2.0, 3.0, 4.0]
report = pd.DataFrame(classification_report(truths, preds, labels=labels_list, target_names=classes_list, output_dict=True)).transpose()
cm = confusion_matrix(truths, preds)

# Print test loss and accuracy
print(f'Test loss: {test_loss:.4f}, Test accuracy: {test_acc * 100:.2f}%')

In [None]:
# Display report
report

In [None]:
# Display confusion matrix
disp = ConfusionMatrixDisplay(cm)
disp.plot()

### Evaluation of model trained on SMOTE train data

In [None]:
from common_utils import CNN_GRU

# Load model
cnn_gru_model = CNN_GRU()
model_path = './model/cnn_gru_smote.pth'
cnn_gru_model.load_state_dict(torch.load(model_path, weights_only=True))

# Test model
truths, preds, test_loss, test_acc = test(cnn_gru_model, test_loader)

# Get classification report and confusion matrix
report = pd.DataFrame(classification_report(truths, preds, labels=labels_list, target_names=classes_list, output_dict=True)).transpose()
cm = confusion_matrix(truths, preds)

# Print test loss and accuracy
print(f'Test loss: {test_loss:.4f}, Test accuracy: {test_acc* 100:.2f}%')

In [None]:
# Display report
report

In [None]:
# Display confusion matrix
disp = ConfusionMatrixDisplay(cm)
disp.plot()

### Evaluation of model trained on Signal Transformation train data

In [None]:
from common_utils import CNN_GRU

# Load model
cnn_gru_model = CNN_GRU()
model_path = './model/cnn_gru_st.pth'
cnn_gru_model.load_state_dict(torch.load(model_path, weights_only=True))

# Test model
truths, preds, test_loss, test_acc = test(cnn_gru_model, test_loader)

# Get classification report and confusion matrix
report = pd.DataFrame(classification_report(truths, preds, labels=labels_list, target_names=classes_list, output_dict=True)).transpose()
cm = confusion_matrix(truths, preds)

# Print test loss and accuracy
print(f'Test loss: {test_loss:.4f}, Test accuracy: {test_acc * 100:.2f}%')

In [None]:
# Display report
report

In [None]:
# Display confusion matrix
disp = ConfusionMatrixDisplay(cm)
disp.plot()

| Model           | Dataset                | Test Loss | Test Accuracy |
| -------------   | ---------------------- | --------- | ------------- |
| CNN-Transformer | Original               | 0.0666    | 98.50%        |
|                 | SMOTE                  | 0.0920    | 98.28%        |
|                 | Signal Transformation  | 0.0671    | 98.55%        |
| CNN-GRU         | Original               | 0.0718    | 98.70%        |
|                 | SMOTE                  | 0.1385    | 98.68%        |
|                 | Signal Transformation  | 0.0808    | 98.78%        |