Load original labels and save encoded labels

In [7]:
import numpy as np
from sklearn.preprocessing import LabelEncoder

def encode_and_save_labels(
    input_npz="../src/Preprocessed_data/ecg_data.npz",
    encoded_labels_path="ecg_labels_encoded.npy",
    original_labels_path="ecg_original_labels.npy"
):
    # Load original labels from your npz data file
    data = np.load(input_npz)
    labels = data['labels']  # shape: (num_beats,)

    # Encode string labels to integers
    le = LabelEncoder()
    labels_encoded = le.fit_transform(labels)

    # Save directly in current folder
    np.save(encoded_labels_path, labels_encoded)
    np.save(original_labels_path, labels)

    print(f"Encoded labels saved to: {encoded_labels_path}")
    print(f"Original labels saved to: {original_labels_path}")

if __name__ == "__main__":
    encode_and_save_labels()


Encoded labels saved to: ecg_labels_encoded.npy
Original labels saved to: ecg_original_labels.npy


Imports & load data (sanity checks)

In [8]:
import os
import numpy as np
from collections import Counter

npz_path = "../src/Preprocessed_data/ecg_data.npz"  # change if needed
encoded_labels_path = "ecg_labels_encoded.npy"      # should be in Train_classifier

# load npz and labels
data = np.load(npz_path)
beats = data['beats']              # expected shapes: (N, 90) or (N, 90, 1) or (N, 1, 90)
labels_encoded = np.load(encoded_labels_path)  # integers

print("raw beats shape:", beats.shape)
print("labels shape:", labels_encoded.shape)
print("label counts:", Counter(labels_encoded))

# Normalize shape -> (N, 1, L) for PyTorch conv1d convention
if beats.ndim == 2:
    # (N, L) -> (N, 1, L)
    beats = beats[:, np.newaxis, :]
elif beats.ndim == 3 and beats.shape[-1] == 1:
    # most common: (N, 90, 1) -> (N, 1, 90)
    beats = np.transpose(beats, (0, 2, 1))
# if it's already (N,1,L) we do nothing

print("after reshape beats shape:", beats.shape)
assert beats.ndim == 3 and beats.shape[1] == 1, "beats must be (N,1,L) after reshape"

raw beats shape: (109487, 90, 1)
labels shape: (109487,)
label counts: Counter({np.int64(1): 90625, np.int64(2): 8043, np.int64(4): 7235, np.int64(3): 2781, np.int64(0): 803})
after reshape beats shape: (109487, 1, 90)


Train / validation / test split (stratified)

In [9]:
# Cell 2: stratified train/val/test split
from sklearn.model_selection import train_test_split

X = beats
y = labels_encoded

# Keep a held-out test set: 15% of data
X_trainval, X_test, y_trainval, y_test = train_test_split(
    X, y, test_size=0.15, stratify=y, random_state=42
)

# Split trainval into train + val so val is ~15% of whole dataset as well
val_fraction_of_trainval = 0.15 / 0.85  # ≈ 0.17647
X_train, X_val, y_train, y_val = train_test_split(
    X_trainval, y_trainval, test_size=val_fraction_of_trainval, stratify=y_trainval, random_state=42
)

print("train:", X_train.shape, y_train.shape)
print("val:  ", X_val.shape, y_val.shape)
print("test: ", X_test.shape, y_test.shape)
print("train label counts:", Counter(y_train))
print("val   label counts:", Counter(y_val))
print("test  label counts:", Counter(y_test))


train: (76640, 1, 90) (76640,)
val:   (16423, 1, 90) (16423,)
test:  (16424, 1, 90) (16424,)
train label counts: Counter({np.int64(1): 63436, np.int64(2): 5630, np.int64(4): 5065, np.int64(3): 1947, np.int64(0): 562})
val   label counts: Counter({np.int64(1): 13594, np.int64(2): 1206, np.int64(4): 1085, np.int64(3): 417, np.int64(0): 121})
test  label counts: Counter({np.int64(1): 13595, np.int64(2): 1207, np.int64(4): 1085, np.int64(3): 417, np.int64(0): 120})


Prepare Tensors

In [10]:
import torch

# Convert numpy to torch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)

X_val_tensor = torch.tensor(X_val, dtype=torch.float32)
y_val_tensor = torch.tensor(y_val, dtype=torch.long)

X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.long)


Compute Class Weights

In [18]:
import numpy as np
import torch

# Compute inverse frequency weights for CrossEntropyLoss
class_counts = np.bincount(y_train)
class_weights = 1. / class_counts
class_weights = class_weights / class_weights.sum()  # normalize if needed

print("Class counts:", class_counts)
print("Class weights:", class_weights)

# Convert to torch tensor for the loss function
weights_tensor = torch.tensor(class_weights, dtype=torch.float32)


Class counts: [  562 63436  5630  1947  5065]
Class weights: [0.66300315 0.00587376 0.06618255 0.19137533 0.07356521]


Create WeightedRandomSampler

In [19]:
from torch.utils.data import TensorDataset, DataLoader, WeightedRandomSampler

# Convert to torch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)

X_val_tensor = torch.tensor(X_val, dtype=torch.float32)
y_val_tensor = torch.tensor(y_val, dtype=torch.long)

X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.long)

# Create datasets
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
val_dataset = TensorDataset(X_val_tensor, y_val_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

# Sample weights for each instance in training data
sample_weights = class_weights[y_train]  # numpy indexing
sampler = WeightedRandomSampler(
    weights=torch.tensor(sample_weights, dtype=torch.float32),
    num_samples=len(sample_weights),
    replacement=True
)

# DataLoaders
batch_size = 64
train_loader = DataLoader(train_dataset, batch_size=batch_size, sampler=sampler)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)


Define Model

In [20]:
import torch.nn as nn

class ECGClassifier(nn.Module):
    def __init__(self, num_classes):
        super(ECGClassifier, self).__init__()
        self.conv1 = nn.Conv1d(1, 16, kernel_size=3, padding=1)
        self.bn1 = nn.BatchNorm1d(16)
        self.pool = nn.MaxPool1d(2)
        self.relu = nn.ReLU()

        self.conv2 = nn.Conv1d(16, 32, kernel_size=3, padding=1)
        self.bn2 = nn.BatchNorm1d(32)

        self.fc1 = nn.Linear(32 * 22, 64)
        self.fc2 = nn.Linear(64, num_classes)

    def forward(self, x):
        x = self.pool(self.relu(self.bn1(self.conv1(x))))
        x = self.pool(self.relu(self.bn2(self.conv2(x))))
        x = x.view(x.size(0), -1)  # flatten
        x = self.relu(self.fc1(x))
        x = self.fc2(x)
        return x


Loss & Optimizer

In [21]:
model = ECGClassifier(num_classes=len(class_counts))

# Weighted CrossEntropyLoss
criterion = nn.CrossEntropyLoss(weight=weights_tensor)

optimizer = torch.optim.Adam(model.parameters(), lr=0.001)


Training Loop with Metrics

In [23]:
from sklearn.metrics import classification_report

def train_model(model, train_loader, val_loader, criterion, optimizer, epochs=10):
    for epoch in range(epochs):
        model.train()
        train_loss = 0
        for X_batch, y_batch in train_loader:
            optimizer.zero_grad()
            outputs = model(X_batch)
            loss = criterion(outputs, y_batch)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()

        # Validation
        model.eval()
        y_true, y_pred = [], []
        with torch.no_grad():
            for X_batch, y_batch in val_loader:
                outputs = model(X_batch)
                preds = torch.argmax(outputs, dim=1)
                y_true.extend(y_batch.numpy())
                y_pred.extend(preds.numpy())

        print(f"\nEpoch {epoch+1}/{epochs}")
        print(f"Train Loss: {train_loss/len(train_loader):.4f}")
        print(classification_report(y_true, y_pred, digits=4))


Train & Test

In [24]:
# Train the model
train_model(model, train_loader, val_loader, criterion, optimizer, epochs=10)

# Test evaluation
model.eval()
y_true, y_pred = [], []
with torch.no_grad():
    for X_batch, y_batch in test_loader:
        outputs = model(X_batch)
        preds = torch.argmax(outputs, dim=1)
        y_true.extend(y_batch.numpy())
        y_pred.extend(preds.numpy())

print("\nFinal Test Performance:")
print(classification_report(y_true, y_pred, digits=4))



Epoch 1/10
Train Loss: 0.1730
              precision    recall  f1-score   support

           0     0.0394    0.9339    0.0757       121
           1     0.9958    0.0524    0.0995     13594
           2     0.8755    0.9793    0.9245      1206
           3     0.0404    0.9640    0.0776       417
           4     0.6006    0.8581    0.7066      1085

    accuracy                         0.2033     16423
   macro avg     0.5104    0.7575    0.3768     16423
weighted avg     0.9296    0.2033    0.1995     16423


Epoch 2/10
Train Loss: 0.0750
              precision    recall  f1-score   support

           0     0.0460    0.9256    0.0877       121
           1     0.9979    0.2085    0.3449     13594
           2     0.8936    0.9892    0.9390      1206
           3     0.0517    0.9616    0.0981       417
           4     0.4968    0.9419    0.6505      1085

    accuracy                         0.3387     16423
   macro avg     0.4972    0.8054    0.4240     16423
weighted avg   