In [1]:
import pandas as pd
import numpy as np
import os
import sys
import torch as torch


In [2]:
# Add the project root path if not already present
PROJECT_ROOT = os.path.abspath("..")  # move up one level from notebooks/
if PROJECT_ROOT not in sys.path:
    sys.path.append(PROJECT_ROOT)

# Visual confirmation
print("[✓] Project at:", PROJECT_ROOT)

from src.data.load_data import load_train_data, EDGCDataset, load_test_data, EDGCTestDataset
from src.data.stratified_split import stratified_split_pad_torch, pad_test_torch
from src.models.model_trainer import Trainer

[✓] Project at: c:\Users\toby_\Documents\TU_Berlin\Semestre 3\AMLS\AMLS_packed


In [3]:
X_train, y_train = load_train_data()

durations = np.array([len(x) / 300 for x in X_train])

cls_count = y_train[0].groupby(y_train[0]).count()

[✓] Loaded X_train with 6179 sequences
[✓] Loaded y_train with shape (6179, 1)


In [4]:
X_train, X_val, lengths_train, lengths_val, y_train, y_val = stratified_split_pad_torch(
    X_train, y_train
)

print(X_train.shape, y_train.shape)




train_dataset = EDGCDataset(X_train, lengths_train, y_train)
val_dataset = EDGCDataset(X_val, lengths_val, y_val)

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=32)

X_test = load_test_data()

X_test, lengths_test = pad_test_torch(X_test)

test_dataset = EDGCTestDataset(X_test, lengths_test)

test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=32)

torch.Size([5564, 18286]) torch.Size([5564, 1])
[✓] Loaded X_test with 2649 sequences


In [5]:
from src.models.model_1 import ECGNet
from src.models.hyperparamter_tunning import hyperparameter_search

## Lets use augment data for improving performance of the model training. First with CNN-LSTM Model Architecture. 

##### This is the loop for choosing the best model parameter combination with augmented data. The function hyperparameter_search has the parameter augmented_data, which initialize a different data pipeline for loading and processing the data. When this parameter is False, it only takes the raw matrix X_train. However, for augmented_data = True, the pipeline implements time stretch, time_shift, add noise, amplitude scale amd random crop. 

In [7]:
# Configuration
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

param_grid = {
    "lst_hidden_size": [32, 64, 128],
    
    "learning_rate": [.01, 0.001, 0.0005],
    
    "dropout": [0.1, 0.2, .5],
}
fixed = {
    "num_classes": 4,
    "signal_length": X_train.shape[1],
    "n_fft": 512,
    "hop_length": 256,
    "conv1_padding": 1,
    "conv2_padding": 1,
    "conv1_kernel": 3,
    "conv2_kernel": 3,
    "lstm_num_layers": 1,
    "conv1_channels": 32,
    "conv2_channels": 32
}

results = hyperparameter_search(
    ECGNet,
    param_grid,
    fixed,
    device=device,
    epochs=7,
    train_loader=train_loader,
    val_loader=val_loader,
    augmented_data = True
)


🔧 Training with config: {'num_classes': 4, 'signal_length': 18286, 'n_fft': 512, 'hop_length': 256, 'conv1_padding': 1, 'conv2_padding': 1, 'conv1_kernel': 3, 'conv2_kernel': 3, 'lstm_num_layers': 1, 'conv1_channels': 32, 'conv2_channels': 32, 'lst_hidden_size': 32, 'learning_rate': 0.01, 'dropout': 0.1}
Epoch 1/7 | Train Loss: 1.0292 | Train F1: 0.1876 | Val Loss: 1.0027 | Val F1: 0.1856
Epoch 2/7 | Train Loss: 1.0173 | Train F1: 0.1856 | Val Loss: 1.0097 | Val F1: 0.1856
Epoch 3/7 | Train Loss: 1.0164 | Train F1: 0.1853 | Val Loss: 1.0110 | Val F1: 0.1856
Epoch 4/7 | Train Loss: 1.0150 | Train F1: 0.1856 | Val Loss: 1.0059 | Val F1: 0.1856
Epoch 5/7 | Train Loss: 1.0131 | Train F1: 0.1853 | Val Loss: 1.0083 | Val F1: 0.1856
Epoch 6/7 | Train Loss: 1.0150 | Train F1: 0.1853 | Val Loss: 1.0042 | Val F1: 0.1856
Epoch 7/7 | Train Loss: 1.0169 | Train F1: 0.1853 | Val Loss: 1.0052 | Val F1: 0.1856

🔧 Training with config: {'num_classes': 4, 'signal_length': 18286, 'n_fft': 512, 'hop_leng

## We get our best parameter selection for the model trained with data augmentation for CNN-LSTM model

In [6]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = ECGNet(
    num_classes=4,
    n_fft=512,
    hop_length=256,
    conv1_padding=1,
    conv2_padding=1,
    conv1_kernel=3,
    conv2_kernel=3,
    lstm_num_layers=1,
    conv1_channels=32,
    conv2_channels=32,
    lst_hidden_size=128,
    dropout=0.1,
    signal_length=X_train.shape[1],
    device=device,
).to(device)

optimizer = torch.optim.Adam(model.parameters(), lr=5e-4)

criterion = torch.nn.CrossEntropyLoss()

trainer = Trainer(model, optimizer, criterion, augment_data=True, device=device)

history = trainer.fit(train_loader, val_loader, epochs=50)

train_loss, train_f1 = trainer.evaluate(train_loader)

val_loss, val_f1 = trainer.evaluate(val_loader)


cm, report = trainer.detailed_metrics(val_loader, class_names=["class_0", "class_1", "class_2", "class_3"])
print(report)

model.eval()  # Modo evaluación

all_preds = []

with torch.no_grad():
    for X_batch, lengths_batch in test_loader:
        X_batch = X_batch.to(device)
        lengths_batch = lengths_batch.to(device)
            
        outputs = model(X_batch, lengths_batch)
        preds = torch.argmax(outputs, dim=1)  # clase con mayor probabilidad
        all_preds.extend(preds.cpu().numpy())
    
df = pd.DataFrame({'predicted_label': all_preds})
    
df.to_csv('augment.csv', index=False)


Epoch 1/50 - Train Loss: 0.9759 - Train F1: 0.2054 - Val Loss: 0.9094 - Val F1: 0.2011
Epoch 2/50 - Train Loss: 0.8775 - Train F1: 0.2868 - Val Loss: 0.7988 - Val F1: 0.2897
Epoch 3/50 - Train Loss: 0.7906 - Train F1: 0.3879 - Val Loss: 0.7371 - Val F1: 0.5188
Epoch 4/50 - Train Loss: 0.7445 - Train F1: 0.4803 - Val Loss: 0.7088 - Val F1: 0.4938
Epoch 5/50 - Train Loss: 0.7256 - Train F1: 0.5288 - Val Loss: 0.6912 - Val F1: 0.5521
Epoch 6/50 - Train Loss: 0.6868 - Train F1: 0.5708 - Val Loss: 0.7062 - Val F1: 0.5532
Epoch 7/50 - Train Loss: 0.6688 - Train F1: 0.6013 - Val Loss: 0.6405 - Val F1: 0.5611
Epoch 8/50 - Train Loss: 0.6454 - Train F1: 0.6093 - Val Loss: 0.6347 - Val F1: 0.5831
Epoch 9/50 - Train Loss: 0.6270 - Train F1: 0.6138 - Val Loss: 0.6132 - Val F1: 0.6343
Epoch 10/50 - Train Loss: 0.6102 - Train F1: 0.6456 - Val Loss: 0.6282 - Val F1: 0.6356
Epoch 11/50 - Train Loss: 0.5871 - Train F1: 0.6744 - Val Loss: 0.5763 - Val F1: 0.6977
Epoch 12/50 - Train Loss: 0.5794 - Train 

## Lets use augment data for improving performance of the model training. Now with TCN classifier. 
##### This is the loop for choosing the best model parameter combination with augmented data. The function hyperparameter_search has the parameter augmented_data, which initialize a different data pipeline for loading and processing the data. When this parameter is False, it only takes the raw matrix X_train. However, for augmented_data = True, the pipeline implements time stretch, time_shift, add noise, amplitude scale amd random crop. 

In [14]:
from src.models.model_2 import TCN_STFT_Classifier


param_grid = {
    # Configuraciones donde len(hidden_channels) == num_levels
    
        'hidden_channels': [[64,128,128,128],[128,128,128,128]],
    'dropout': [0.1, 0.2, 0.3],
    'kernel_size': [3, 5],
    'num_levels': [3,4]
}


fixed = {
    "num_classes": 4,
    "n_fft": 256,
    "hop_length": 128,
    "kernel_size": 3,
    "learning_rate" : .001,
}

results = hyperparameter_search(
    TCN_STFT_Classifier,
    param_grid,
    fixed,
    device=device,
    epochs=5,
    train_loader=train_loader,
    val_loader=val_loader,
    augmented_data = True
)


🔧 Training with config: {'num_classes': 4, 'n_fft': 256, 'hop_length': 128, 'kernel_size': 3, 'learning_rate': 0.001, 'hidden_channels': [64, 128, 128, 128], 'dropout': 0.1, 'num_levels': 3}
Epoch 1/5 | Train Loss: 0.9341 | Train F1: 0.2667 | Val Loss: 0.8246 | Val F1: 0.3874
Epoch 2/5 | Train Loss: 0.7934 | Train F1: 0.3967 | Val Loss: 0.7065 | Val F1: 0.4543
Epoch 3/5 | Train Loss: 0.7096 | Train F1: 0.4864 | Val Loss: 0.6723 | Val F1: 0.5496
Epoch 4/5 | Train Loss: 0.6553 | Train F1: 0.5339 | Val Loss: 0.6016 | Val F1: 0.6278
Epoch 5/5 | Train Loss: 0.6234 | Train F1: 0.5856 | Val Loss: 0.6080 | Val F1: 0.6162

🔧 Training with config: {'num_classes': 4, 'n_fft': 256, 'hop_length': 128, 'kernel_size': 3, 'learning_rate': 0.001, 'hidden_channels': [64, 128, 128, 128], 'dropout': 0.1, 'num_levels': 4}
Epoch 1/5 | Train Loss: 0.9239 | Train F1: 0.2720 | Val Loss: 0.8040 | Val F1: 0.3220
Epoch 2/5 | Train Loss: 0.7996 | Train F1: 0.4021 | Val Loss: 0.7239 | Val F1: 0.4130
Epoch 3/5 | Tr

## We get our best parameter selection for the model trained with data augmentation for TCN classifier

In [7]:
from src.models.model_2 import TCN_STFT_Classifier
from src.models.model_trainer import Trainer
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


model = TCN_STFT_Classifier(
    num_classes=4,
    hop_length = 128,
    n_fft = 256,
    kernel_size = 5, 
    hidden_channels=  [128, 128, 128, 128],
    dropout = 0.1,
    num_levels = 3,
    device=device,
).to(device)

optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

criterion = torch.nn.CrossEntropyLoss()

trainer = Trainer(model, optimizer, criterion, device=device, augment_data = True)

history = trainer.fit(train_loader, val_loader, epochs=50)

train_loss, train_f1 = trainer.evaluate(train_loader)

val_loss, val_f1 = trainer.evaluate(val_loader)

cm, report = trainer.detailed_metrics(val_loader, class_names=["class_0", "class_1", "class_2", "class_3"])
print(report)

model.eval()  # Modo evaluación

all_preds = []

with torch.no_grad():
    for X_batch, lengths_batch in test_loader:
        X_batch = X_batch.to(device)
        lengths_batch = lengths_batch.to(device)
            
        outputs = model(X_batch, lengths_batch)
        preds = torch.argmax(outputs, dim=1)  # clase con mayor probabilidad
        all_preds.extend(preds.cpu().numpy())
    
df = pd.DataFrame({'predicted_label': all_preds})
    
df.to_csv('augment.csv', index=False)

  WeightNorm.apply(module, name, dim)


Epoch 1/50 - Train Loss: 0.9650 - Train F1: 0.2736 - Val Loss: 0.8357 - Val F1: 0.4054
Epoch 2/50 - Train Loss: 0.8224 - Train F1: 0.3944 - Val Loss: 0.7376 - Val F1: 0.4935
Epoch 3/50 - Train Loss: 0.7503 - Train F1: 0.4549 - Val Loss: 0.6875 - Val F1: 0.4449
Epoch 4/50 - Train Loss: 0.6898 - Train F1: 0.5139 - Val Loss: 0.6970 - Val F1: 0.5146
Epoch 5/50 - Train Loss: 0.6438 - Train F1: 0.5835 - Val Loss: 0.6304 - Val F1: 0.5737
Epoch 6/50 - Train Loss: 0.6245 - Train F1: 0.5828 - Val Loss: 0.6476 - Val F1: 0.4823
Epoch 7/50 - Train Loss: 0.5934 - Train F1: 0.6272 - Val Loss: 0.5826 - Val F1: 0.6639
Epoch 8/50 - Train Loss: 0.5788 - Train F1: 0.6337 - Val Loss: 0.6112 - Val F1: 0.6324
Epoch 9/50 - Train Loss: 0.5585 - Train F1: 0.6468 - Val Loss: 0.5903 - Val F1: 0.6286
Epoch 10/50 - Train Loss: 0.5346 - Train F1: 0.6806 - Val Loss: 0.5894 - Val F1: 0.6740
Epoch 11/50 - Train Loss: 0.5312 - Train F1: 0.6820 - Val Loss: 0.5735 - Val F1: 0.6734
Epoch 12/50 - Train Loss: 0.5180 - Train 