# Task A: Deep Learning for ECG Heartbeat Classification

We shall evaluate all models on unseen data by using the models which are only trained on the mitbih_train.csv and we do testing on mitbih_test.csv here

In [1]:
import torch
print("PyTorch version:", torch.__version__)

# Check if CUDA is available
print("CUDA available:", torch.cuda.is_available())

# Get current CUDA device index (if available)
if torch.cuda.is_available():
    print("Current CUDA device index:", torch.cuda.current_device())
    print("CUDA device name:", torch.cuda.get_device_name(torch.cuda.current_device()))
else:
    print("No CUDA devices found.")

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

PyTorch version: 2.5.0+cu118
CUDA available: True
Current CUDA device index: 0
CUDA device name: NVIDIA GeForce RTX 3070


In [2]:
import os

import pandas as pd
import numpy as np
from torch import nn, optim
import torch.nn.functional as F
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from torch.utils.data import DataLoader, TensorDataset
import torch.nn.functional as F



In [3]:

dataframes = {}
directory_path = 'Heartbeat_Dataset'
all_files = os.listdir(directory_path)

for file in os.listdir(directory_path):
    if file.endswith('.csv'):
        file_path = os.path.join(directory_path, file)
        # Remove the .csv extension for the DataFrame name
        df_name = os.path.splitext(file)[0]
        dataframes[df_name] = pd.read_csv(file_path, header=None)


In [4]:
print(dataframes.keys())


dict_keys(['mitbih_test', 'mitbih_train', 'ptbdb_abnormal', 'ptbdb_normal'])


In [5]:

test_df = dataframes['mitbih_test']

print(test_df.shape)

(21892, 188)


# Data Augmentation
 here we map numerical values  of its categories to string labels 

In [6]:
# x_data = data_df.iloc[:, 2:]
# y_label = data_df[['type']]


labels = {
    0.0: "N",
    1.0: "S",
    2.0: "V",
    3.0: "F",
    4.0: "Q"
}

test_df.iloc[:, -1] = test_df.iloc[:, -1].replace(labels)

# Now get the value counts for the renamed last column
train_counts = test_df.iloc[:, -1].value_counts()

print(train_counts)

187
N    18118
Q     1608
V     1448
S      556
F      162
Name: count, dtype: int64


1        N
2        N
3        N
4        N
        ..
21887    Q
21888    Q
21889    Q
21890    Q
21891    Q
Name: 187, Length: 21892, dtype: object' has dtype incompatible with float64, please explicitly cast to a compatible dtype first.
  test_df.iloc[:, -1] = test_df.iloc[:, -1].replace(labels)


In [7]:
x_data = test_df.iloc[:,:187]
y_label = test_df.iloc[:,-1]

In [8]:
y_label.value_counts()

187
N    18118
Q     1608
V     1448
S      556
F      162
Name: count, dtype: int64

In [9]:
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y_label) 

X = x_data
X_test = np.expand_dims(X, axis=1)  
print(X_test.shape) 
X_test_tensor = torch.tensor(X_test).float()    
y_test_tensor = torch.tensor(y).long()        

test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

(21892, 1, 187)


# Loading CNN model

In [10]:

from common_utils import CNN1D

 

model_path = "./model/cnn_model.pth"
num_classes = len(label_encoder.classes_)
loaded_CNNmodel = CNN1D(num_classes).to(device)  
loaded_CNNmodel.load_state_dict(torch.load(model_path, weights_only=True))
print("Model loaded successfully.")

Model loaded successfully.


In [11]:
from common_utils import evaluateCNN_model

test_accuracy = evaluateCNN_model(x_data, y_label, num_classes, loaded_CNNmodel, batch_size=32, device='cuda' if torch.cuda.is_available() else 'cpu')


Test Accuracy for CNN model: 0.9823


# Loading Transformer model

In [12]:
# Loading transformer modules and constants
from pytorch_lightning import Trainer
from pytorch_lightning.callbacks import ModelCheckpoint


batch_size = 128
num_workers = 0

input_size = 200
num_classes = 5
num_heads = 5
depth = 6
max_epochs = 22
lr = 1e-4
dropout = 0.0

In [13]:
path = "./heartbeat_Dataset"
from transformer_eval import LitTransformer, LitMITBIH
Transformer_model = LitTransformer(input_size, num_classes, num_heads, depth, max_epochs, lr, dropout)
datamodule = LitMITBIH( path, batch_size, num_workers, length=input_size)
datamodule.setup()


In [14]:
save_path = "./model/"
ckpt_name = "ecg-transformer"

model_checkpoint = ModelCheckpoint(
    dirpath=os.path.join(save_path, "checkpoints"),
    filename=ckpt_name,
    save_top_k=1,
    verbose=True,
    monitor='val_acc',
    # monitor='test_acc',
    mode='max',
)
trainer = Trainer(accelerator="gpu", devices=1,

                    max_epochs=max_epochs,
                    logger=False,
                    callbacks=[model_checkpoint]
                )

print(f"Loading checkpoint: {ckpt_name}.ckpt")
Transformer_model = Transformer_model.load_from_checkpoint(
    os.path.join(save_path, "checkpoints", ckpt_name+".ckpt")
)
trainer.test(Transformer_model, datamodule=datamodule)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  return torch.load(f, map_location=map_location)


Loading checkpoint: ecg-transformer.ckpt


You are using a CUDA device ('NVIDIA GeForce RTX 3070') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
  rank_zero_warn(


Testing: 0it [00:00, ?it/s]

[{'test_acc': 97.78797149658203, 'test_loss': 0.11676190793514252}]

# Loading RNN model

In [15]:
from common_utils import GRUModel

model_path = "./model/gru_rnn_model.pth"
inputSize = 1
hiddenSize = 64
numClasses = 5
numLayers = 1

rnn_model = GRUModel(inputSize, hiddenSize, numLayers, numClasses).to(device)
rnn_model.load_state_dict(torch.load(model_path, weights_only=True))
print("Model loaded successfully.")

Model loaded successfully.


In [21]:
# RNN_X_test = np.expand_dims(X, axis=-1) 
# RNN_X_test_tensor = torch.tensor(RNN_X_test).float()  

# # Create DataLoader with updated shape
# RNN_test_dataset = TensorDataset(RNN_X_test_tensor, y_test_tensor)
# RNN_test_loader = DataLoader(RNN_test_dataset, batch_size=32, shuffle=False)


reverse_labels = {v: k for k, v in labels.items()}

RNN_X_test = test_df.iloc[:, :-1].values  
RNN_y_test = test_df.iloc[:, -1].replace(reverse_labels).astype(float).values  # Convert labels back to numbers

RNN_X_test = np.expand_dims(RNN_X_test, axis=1)    

print(f"Shape of X: {RNN_X_test.shape}")
print(f"Shape of y: {RNN_y_test}")

from common_utils import get_dataloader
RNN_test_loader = get_dataloader(RNN_X_test, RNN_y_test, False, batchSize=32)

Shape of X: (21892, 1, 187)
Shape of y: [0. 0. 0. ... 4. 4. 4.]


  RNN_y_test = test_df.iloc[:, -1].replace(reverse_labels).astype(float).values  # Convert labels back to numbers


In [22]:
from common_utils import evaluate

RNNLoss, RNNacc = evaluate(rnn_model, RNN_test_loader, device, criterion=nn.CrossEntropyLoss())
print(f"Test Loss: {RNNLoss:.4f} | Test Accuracy: {RNNacc:.4f}")

RuntimeError: input.size(-1) must be equal to input_size. Expected 1, got 187

These are the results obtained from the different models
| Model        | Test accuracy |
|--------------|---------------|
| CNN          | 0.9823        |
| Transformers | 0.9779        |
| RNN          | 0.8279        |
| RNN(LSTM)    | 0.8278        |
| RNN(GRU)     | 0.9740        |

# Model evaluation of hybrid models

Based on the performance of individual models, we decided to explore hybrid architectures to leverage the strengths of different models. Consequently, we developed Transformer-CNN and CNN-GRU hybrid models. These hybrid models will be evaluated to assess their effectiveness 
 


## CNN-Transformer hhybrid model

In [23]:
from common_utils import cnn_transformer_evaluate
from CNN_Transformer_hybrid import CNNTransformerHybrid


In [24]:
model_path = "./model/cnn_transformer_model.pth"

CNN_transformer_model = CNNTransformerHybrid(
    input_dim=187, 
    num_classes=5,  
    num_heads=8, 
    num_layers=6  
).to(device)

CNN_transformer_model.load_state_dict(torch.load(model_path, weights_only=True))
print("Model loaded successfully.")

Model loaded successfully.




In [25]:
# label_encoder = LabelEncoder()
# y = label_encoder.fit_transform(y_label.values.ravel())



In [26]:
criterion = torch.nn.CrossEntropyLoss()  

epoch_loss, epoch_acc, all_preds, all_labels = cnn_transformer_evaluate(CNN_transformer_model, test_loader, criterion, device)
print(f"Loading model from: {model_path}")
print(f"Test Loss: {epoch_loss:.4f} | Test Accuracy: {epoch_acc:.4f}")


Loading model from: ./model/cnn_transformer_model.pth
Test Loss: 0.0666 | Test Accuracy: 0.9850


### model run on augmented dataset

In [27]:
model_path = "./model/cnn_transformer_model_augment.pth"

CNN_transformer_model.load_state_dict(torch.load(model_path, weights_only=True))
print("Model loaded successfully.")

epoch_loss, epoch_acc, all_preds, all_labels = cnn_transformer_evaluate(CNN_transformer_model, test_loader, criterion, device)
print(f"Loading model from: {model_path}")
print(f"Test Loss: {epoch_loss:.4f} | Test Accuracy: {epoch_acc:.4f}")


Model loaded successfully.
Loading model from: ./model/cnn_transformer_model_augment.pth
Test Loss: 0.0671 | Test Accuracy: 0.9855


### model run on smote dataset

In [28]:

model_path = "./model/cnn_transformer_model_smote.pth"

CNN_transformer_model.load_state_dict(torch.load(model_path, weights_only=True))
print("Model loaded successfully.")

epoch_loss, epoch_acc, all_preds, all_labels = cnn_transformer_evaluate(CNN_transformer_model, test_loader, criterion, device)
print(f"Loading model from: {model_path}")
print(f"Test Loss: {epoch_loss:.4f} | Test Accuracy: {epoch_acc:.4f}")


Model loaded successfully.
Loading model from: ./model/cnn_transformer_model_smote.pth
Test Loss: 0.1010 | Test Accuracy: 0.9832


Test Loss: 0.0666 | Test Accuracy: 0.9850
Test Loss: 0.0714 | Test Accuracy: 0.9857
Test Loss: 0.1000 | Test Accuracy: 0.9744

## CNN-GRU model

In [29]:
from common_utils import CNN_GRU
CNN_GRU_model = CNN_GRU()

In [30]:
model_path = './model/CNN_GRU_model.pth'
CNN_GRU_model.load_state_dict(torch.load(model_path, weights_only=True))
CNNGRULoss, CNNGRUacc = evaluate(CNN_GRU_model, test_loader, device, criterion=nn.CrossEntropyLoss())
print(f"Test Loss: {CNNGRULoss:.4f} | Test Accuracy: {CNNGRUacc:.4f}")

RuntimeError: Input type (torch.cuda.FloatTensor) and weight type (torch.FloatTensor) should be the same