# Task A: Deep Learning for ECG Heartbeat Classification

We shall evaluate all models on unseen data by training only on the mitbih_train.csv and testing on mitbih_test.csv here

In [3]:
import torch
print("PyTorch version:", torch.__version__)

# Check if CUDA is available
print("CUDA available:", torch.cuda.is_available())

# Get current CUDA device index (if available)
if torch.cuda.is_available():
    print("Current CUDA device index:", torch.cuda.current_device())
    print("CUDA device name:", torch.cuda.get_device_name(torch.cuda.current_device()))
else:
    print("No CUDA devices found.")

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

PyTorch version: 2.5.0+cu118
CUDA available: True
Current CUDA device index: 0
CUDA device name: NVIDIA GeForce RTX 3070


In [4]:
import os

import pandas as pd
import numpy as np
from torch import nn, optim
import torch.nn.functional as F
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder


In [5]:

dataframes = {}
directory_path = 'Heartbeat_Dataset'
all_files = os.listdir(directory_path)

for file in os.listdir(directory_path):
    if file.endswith('.csv'):
        file_path = os.path.join(directory_path, file)
        # Remove the .csv extension for the DataFrame name
        df_name = os.path.splitext(file)[0]
        dataframes[df_name] = pd.read_csv(file_path, header=None)


In [6]:
print(dataframes.keys())


dict_keys(['mitbih_test', 'mitbih_train', 'ptbdb_abnormal', 'ptbdb_normal'])


In [7]:

test_df = dataframes['mitbih_test']

print(test_df.shape)

(21892, 188)


# Data Augmentation
 here we map numerical values  of its categories to string labels 

In [8]:
# x_data = data_df.iloc[:, 2:]
# y_label = data_df[['type']]


labels = {
    0.0: "N",
    1.0: "S",
    2.0: "V",
    3.0: "F",
    4.0: "Q"
}

test_df.iloc[:, -1] = test_df.iloc[:, -1].replace(labels)

# Now get the value counts for the renamed last column
train_counts = test_df.iloc[:, -1].value_counts()

# Print the counts
print(train_counts)

187
N    18118
Q     1608
V     1448
S      556
F      162
Name: count, dtype: int64


1        N
2        N
3        N
4        N
        ..
21887    Q
21888    Q
21889    Q
21890    Q
21891    Q
Name: 187, Length: 21892, dtype: object' has dtype incompatible with float64, please explicitly cast to a compatible dtype first.
  test_df.iloc[:, -1] = test_df.iloc[:, -1].replace(labels)


In [9]:
x_data = test_df.iloc[:,:187]
y_label = test_df.iloc[:,-1]

In [10]:
y_label.value_counts()

187
N    18118
Q     1608
V     1448
S      556
F      162
Name: count, dtype: int64

# Loading CNN model

In [11]:
from torch.utils.data import DataLoader, TensorDataset
import torch.nn.functional as F
from common_utils import CNN1D


label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y_label)  # Encode labels without replacement

model_path = "./model/cnn_model.pth"
num_classes = len(label_encoder.classes_)
loaded_CNNmodel = CNN1D(num_classes).to(device)  # Reinitialize model with the same architecture
loaded_CNNmodel.load_state_dict(torch.load(model_path, weights_only=True))
print("Model loaded successfully.")

Model loaded successfully.


In [12]:
from common_utils import evaluateCNN_model

test_accuracy = evaluateCNN_model(x_data, y_label, num_classes, loaded_CNNmodel, batch_size=32, device='cuda' if torch.cuda.is_available() else 'cpu')


Test Accuracy for CNN model: 0.9823


# Loading Transformer model

In [13]:
# Loading transformer modules and constants
from pytorch_lightning import Trainer
from pytorch_lightning.callbacks import ModelCheckpoint


batch_size = 128
num_workers = 0

input_size = 200
num_classes = 5
num_heads = 5
depth = 6
max_epochs = 22
lr = 1e-4
dropout = 0.0

In [14]:
path = "./heartbeat_Dataset"
from common_utils import LitTransformer, LitMITBIH
model = LitTransformer(input_size, num_classes, num_heads, depth, max_epochs, lr, dropout)
datamodule = LitMITBIH( path, batch_size, num_workers, length=input_size)
datamodule.setup()


In [15]:
save_path = "./model/"
ckpt_name = "ecg-transformer"

model_checkpoint = ModelCheckpoint(
    dirpath=os.path.join(save_path, "checkpoints"),
    filename=ckpt_name,
    save_top_k=1,
    verbose=True,
    monitor='val_acc',
    # monitor='test_acc',
    mode='max',
)
trainer = Trainer(accelerator="gpu", devices=1,

                    max_epochs=max_epochs,
                    logger=False,
                    callbacks=[model_checkpoint]
                )

print(f"Loading checkpoint: {ckpt_name}.ckpt")
model = model.load_from_checkpoint(
    os.path.join(save_path, "checkpoints", ckpt_name+".ckpt")
)
trainer.test(model, datamodule=datamodule)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  return torch.load(f, map_location=map_location)


Loading checkpoint: ecg-transformer.ckpt


You are using a CUDA device ('NVIDIA GeForce RTX 3070') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
  rank_zero_warn(


Testing: 0it [00:00, ?it/s]

[{'test_acc': 97.78797149658203, 'test_loss': 0.11676190793514252}]

# Loading RNN model

In [16]:
model_path = "./model/simple_rnn_model.pth"

rnn_model = RNNModel(inputSize, hiddenSize, numLayers, numClasses)
rnn_model.load_state_dict(torch.load(model_path, weights_only=True))
print("Model loaded successfully.")

NameError: name 'RNNModel' is not defined

These are the results obtained from the different models
| Model        | Test accuracy |
|--------------|---------------|
| CNN          | 0.9823        |
| Transformers | 0.9779        |
| RNN          | 0.8279        |
| RNN(LSTM)    | 0.8278        |
| RNN(GRU)     | 0.9740        |