In [None]:
import wfdb
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import balanced_accuracy_score, recall_score
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import KFold
from torch.utils.data import TensorDataset, DataLoader, SubsetRandomSampler
from sklearn.metrics import classification_report
from torchvision import transforms
import torch
import torch.nn as nn

2025-05-12 16:38:30.370461: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-05-12 16:38:30.370528: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-05-12 16:38:30.372067: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-05-12 16:38:30.380118: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [None]:
import dataset_manager

In [3]:
X, y = dataset_manager.load()

Loading cached data...


In [4]:
categories, counts = np.unique(y, return_counts=True)
for cat, count in zip(categories, counts):
    print(cat, count)

A 2546
E 106
F 802
L 8071
N 75011
R 7255
S 2
V 7129
a 150
e 16
j 229


In [5]:
X.shape

(101317, 360)

In [6]:
# Removing categories with less than 100 elements
indexes_filter = np.isin(y, categories[counts >= 100])

X = X[indexes_filter]
y = y[indexes_filter]

In [7]:
categories, counts = np.unique(y, return_counts=True)
for cat, count in zip(categories, counts):
    print(cat, count)

A 2546
E 106
F 802
L 8071
N 75011
R 7255
V 7129
a 150
j 229


In [8]:
# Removing the excess elements from category N
indices_N = np.where(y == 'N')[0]

indices_to_remove = np.random.choice(indices_N, size=len(indices_N) - 8000, replace=False)
X = np.delete(X, indices_to_remove, axis=0)
y = np.delete(y, indices_to_remove, axis=0)

In [9]:
categories, counts = np.unique(y, return_counts=True)
for cat, count in zip(categories, counts):
    print(cat, count)

A 2546
E 106
F 802
L 8071
N 8000
R 7255
V 7129
a 150
j 229


In [10]:
label_encoder = LabelEncoder()
integer_encoded = label_encoder.fit_transform(y)
one_hot = to_categorical(integer_encoded)

In [11]:
label_encoder.classes_

array(['A', 'E', 'F', 'L', 'N', 'R', 'V', 'a', 'j'], dtype='<U1')

# Training

In [15]:


# Reshape X per PyTorch CNN1D: (N, 1, L)
X = X.reshape(X.shape[0], 1, X.shape[1])


In [16]:
def training(model, _x, _y, n_epochs = 50, device = "cuda", sample_weight=None):
    
    # Converting in tensor
    X_train_tensor = torch.tensor(_x, dtype=torch.float32).to(device)
    y_train_tensor = torch.tensor(_y, dtype=torch.long).to(device)

    # Creating DataLoader
    train_dataset = TensorDataset(X_train_tensor, y_train_tensor)

    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

    training_np(model, train_loader, n_epochs, device, sample_weight)

    

def training_np(model, train_loader, n_epochs = 50, device = "cuda", sample_weight=None):

    model.to(device)
    criterion = nn.BCELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

    if(sample_weight is not None):
        sample_weight = torch.tensor(sample_weight).to(device)


    for epoch in range(n_epochs):
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0
        
        for inputs, labels in train_loader:
            inputs = inputs.to(device)
            labels = labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels.float())
            if(sample_weight is not None):
                loss = loss * sample_weight 
                loss = loss.mean()
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item() * inputs.size(0)
            total += labels.size(0)

        avg_loss = running_loss / total    
        print(f"Epoch {epoch+1}: Loss={avg_loss:.4f}")

In [17]:
models = [
    # ECGLSTM(input_length=X.shape[2], output=len(categories)),
    # ECGCNN(input_length=X.shape[2], output=len(categories)),
    # ECGHYBRID(input_length=X.shape[2], output=len(categories))
]

EPOCHS = 5

performances = {}

In [18]:
for model in models:
    print(f"Training {model.name()} model:")
    performances[model.name()] = {}
    kf = KFold(2, shuffle=True)

    balanced_accuracy = 0
    n_accuracy = 0
    a_accuracy = 0
    count = 0

    for train_index, val_index in kf.split(X):
        count += 1
        X_train, X_val = X[train_index, :,:], X[val_index, :,:]
        y_train, y_val = one_hot[train_index], one_hot[val_index]
        y_val_symb = y[train_index]

        training(model, X_train, y_train, n_epochs = EPOCHS)

        # Test Dataset and DataLoader
        test_dataset = TensorDataset(torch.tensor(X_val, dtype=torch.float32), torch.tensor(y_val, dtype=torch.float32))
        test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

        # Evaluation
        model.eval()
        predictions = []

        with torch.no_grad():
            for X_batch, _ in test_loader:
                X_batch = X_batch.to("cuda")
                output = model(X_batch).to("cpu")
                predictions.append(output)

        # Output concatenation
        test_output = torch.cat(predictions).detach().numpy()

        model.train()

        y_true_labels = np.argmax(y_val, axis=1)
        y_pred_labels = np.argmax(test_output, axis=1)

        for i, category in enumerate(label_encoder.classes_):
            recall = recall_score(y_true_labels == i, y_pred_labels == i)
            if category in performances[model.name()]:
                performances[model.name()][category] = performances[model.name()][category] + recall
            else:
                performances[model.name()][category] = recall

    for category in performances[model.name()]:
        performances[model.name()][category] = performances[model.name()][category] / count
    print("")

In [19]:
for model, perf in performances.items():
    print(model,"performances:")
    for category in perf:
        print("\t",category, perf[category])
    print("")