In [12]:
import pandas as pd
import numpy as np
import os
import sys
import torch as torch
import pandas as pd

In [13]:
# Add the project root path if not already present
PROJECT_ROOT = os.path.abspath("..")  # move up one level from notebooks/
if PROJECT_ROOT not in sys.path:
    sys.path.append(PROJECT_ROOT)

# Visual confirmation
print("[✓] Project at:", PROJECT_ROOT)

from src.data.load_data import load_train_data, EDGCDataset
from src.data.stratified_split import stratified_split_pad_torch
from src.models.model_trainer import Trainer

[✓] Project at: c:\Users\toby_\Documents\TU_Berlin\Semestre 3\AMLS\AMLS_packed


In [14]:
X_train, y_train = load_train_data()

durations = np.array([len(x) / 300 for x in X_train])

cls_count = y_train[0].groupby(y_train[0]).count()

[✓] Loaded X_train with 6179 sequences
[✓] Loaded y_train with shape (6179, 1)


In [15]:
X_train, X_val, lengths_train, lengths_val, y_train, y_val = stratified_split_pad_torch(
    X_train, y_train
)

print(X_train.shape, y_train.shape)




train_dataset = EDGCDataset(X_train, lengths_train, y_train)
val_dataset = EDGCDataset(X_val, lengths_val, y_val)

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=32)

torch.Size([5564, 18286]) torch.Size([5564, 1])


# Model Definition

## ECGNet Model Architecture

The `ECGNet` class implements a neural network for ECG signal classification. The architecture consists of:

- **Spectrogram transformation**: Converts raw ECG signals into spectrograms using Short-Time Fourier Transform (STFT).
- **Convolutional layers**: Two sequential 2D convolutional layers with ReLU activations and max pooling, which extract spatial features from the spectrogram.
- **Recurrent layer**: An LSTM layer processes the sequence of features, capturing temporal dependencies.
- **Fully connected layer**: The final linear layer maps the LSTM output to the target number of classes.

The model expects ECG signals and their lengths as input, and produces class logits for classification tasks.

The `ECGNet` class is implemented on the module /models/model_1.py  

In [16]:
from src.models.model_1 import ECGNet
from src.models.hyperparamter_tunning import hyperparameter_search

In [17]:
print(f"X_train: {X_train.shape}")
print(f"lengths_train: {lengths_train.shape}")
print(f"y_train: {y_train.shape}")

X_train: torch.Size([5564, 18286])
lengths_train: torch.Size([5564])
y_train: torch.Size([5564, 1])


### 🔍 Hyperparameter Search and Model Selection

In this section, we perform **hyperparameter tuning** to improve the performance of our machine learning models.

When training neural networks, model performance is highly sensitive to hyperparameters such as the number of layers, hidden units, learning rate, dropout rate, and more. Manually selecting these values is inefficient and often suboptimal. Therefore, we use a **grid search strategy** to systematically explore combinations of hyperparameter values.

---

#### ✅ Why Hyperparameter Tuning?

- Different combinations can lead to **very different results**, even with the same architecture.
- Some configurations may **overfit** or **underfit**, while others may **generalize better**.
- Automated tuning helps us identify the **best performing model** on the validation set without manual trial-and-error.

---

#### ⚙️ How it Works

1. We define a **search space**, i.e., a dictionary of hyperparameter values to test.
2. We generate all **combinations** using Cartesian product (`itertools.product`).
3. For each combination:
   - Initialize the model with the current hyperparameters.
   - Train it for a fixed number of epochs on the training set.
   - Evaluate its performance on the validation set using **F1-score** and **loss**.
4. Store and compare results, selecting the configuration that performs **best on validation data**.

This process is repeated for each of the two model architectures used in this project:

- A **CNN-LSTM hybrid** based on time-frequency STFT features.
- A **Temporal Convolutional Network (TCN)** using a stacked 1D convolutional architecture.

---

By applying this strategy, we ensure our final models are both **well-tuned** and **generalizable**, which is essential for real-world performance.


In [12]:
# Configuration
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

param_grid = {
    "lst_hidden_size": [32, 64, 128],
    
    "learning_rate": [.01, 0.001, 0.0005],
    
    "dropout": [0.1, 0.2, .5],
}
fixed = {
    "num_classes": 4,
    "signal_length": X_train.shape[1],
    "n_fft": 512,
    "hop_length": 256,
    "conv1_padding": 1,
    "conv2_padding": 1,
    "conv1_kernel": 3,
    "conv2_kernel": 3,
    "lstm_num_layers": 1,
    "conv1_channels": 32,
    "conv2_channels": 32
}

results = hyperparameter_search(
    ECGNet,
    param_grid,
    fixed,
    device=device,
    epochs=7,
    train_loader=train_loader,
    val_loader=val_loader,
)


🔧 Training with config: {'num_classes': 4, 'signal_length': 18286, 'n_fft': 512, 'hop_length': 256, 'conv1_padding': 1, 'conv2_padding': 1, 'conv1_kernel': 3, 'conv2_kernel': 3, 'lstm_num_layers': 1, 'conv1_channels': 32, 'conv2_channels': 32, 'lst_hidden_size': 32, 'learning_rate': 0.01, 'dropout': 0.1}
Epoch 1/7 | Train Loss: 1.0238 | Train F1: 0.1853 | Val Loss: 1.0021 | Val F1: 0.1856
Epoch 2/7 | Train Loss: 1.0173 | Train F1: 0.1853 | Val Loss: 1.0039 | Val F1: 0.1856
Epoch 3/7 | Train Loss: 1.0159 | Train F1: 0.1853 | Val Loss: 1.0058 | Val F1: 0.1856
Epoch 4/7 | Train Loss: 1.0139 | Train F1: 0.1863 | Val Loss: 1.0019 | Val F1: 0.1856
Epoch 5/7 | Train Loss: 1.0156 | Train F1: 0.1853 | Val Loss: 1.0137 | Val F1: 0.1856
Epoch 6/7 | Train Loss: 1.0143 | Train F1: 0.1856 | Val Loss: 0.9956 | Val F1: 0.1856
Epoch 7/7 | Train Loss: 1.0069 | Train F1: 0.1853 | Val Loss: 0.9901 | Val F1: 0.1856

🔧 Training with config: {'num_classes': 4, 'signal_length': 18286, 'n_fft': 512, 'hop_leng

### 🧪 Best Hyperparameter Configuration for CNN-LSTM model

After performing an extensive grid search over several key hyperparameters of the `CNN-LSTM` architecture, the best-performing configuration (based on validation F1-score) was identified as:

- `dropout = .1`
- `lst_hidden_size = 128`
- `learning_rate = 0.001`

This combination yielded the highest balance between training and validation performance, indicating that the model can generalize well without overfitting.

---

### 🚀 Final Training Setup

With the optimal hyperparameters selected, we now proceed to **train the final version of CNN-LSTM** using a larger number of epochs (50) to fully exploit the model's capacity:


In [18]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = ECGNet(
    num_classes=4,
    n_fft=512,
    hop_length=256,
    conv1_padding=1,
    conv2_padding=1,
    conv1_kernel=3,
    conv2_kernel=3,
    lstm_num_layers=1,
    conv1_channels=32,
    conv2_channels=32,
    lst_hidden_size=128,
    dropout=0.1,
    signal_length=X_train.shape[1],
    device=device,
).to(device)

optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

criterion = torch.nn.CrossEntropyLoss()

trainer = Trainer(model, optimizer, criterion, augment_data=False, device=device)

history = trainer.fit(train_loader, val_loader, epochs=50)

train_loss, train_f1 = trainer.evaluate(train_loader)

val_loss, val_f1 = trainer.evaluate(val_loader)

cm, report = trainer.detailed_metrics(val_loader, class_names=["class_0", "class_1", "class_2", "class_3"])
print(report)

model.eval()  

all_preds = []

with torch.no_grad():
    for X_batch, lengths_batch in test_loader:
        X_batch = X_batch.to(device)
        lengths_batch = lengths_batch.to(device)
        
        outputs = model(X_batch, lengths_batch)
        preds = torch.argmax(outputs, dim=1)  
        all_preds.extend(preds.cpu().numpy())
        


# Guardar como base.csv (puedes cambiar el nombre a 'augment.csv' o 'reduced.csv')
df = pd.DataFrame({'predicted_label': all_preds})

df.to_csv('base.csv', index=False)

Epoch 1/50 - Train Loss: 0.9872 - Train F1: 0.1904 - Val Loss: 0.9549 - Val F1: 0.1856
Epoch 2/50 - Train Loss: 0.9387 - Train F1: 0.2269 - Val Loss: 0.9151 - Val F1: 0.3043
Epoch 3/50 - Train Loss: 0.8768 - Train F1: 0.2833 - Val Loss: 0.8154 - Val F1: 0.3088
Epoch 4/50 - Train Loss: 0.7838 - Train F1: 0.4139 - Val Loss: 0.7662 - Val F1: 0.3663
Epoch 5/50 - Train Loss: 0.7129 - Train F1: 0.5202 - Val Loss: 0.6318 - Val F1: 0.5549
Epoch 6/50 - Train Loss: 0.6683 - Train F1: 0.5611 - Val Loss: 0.6131 - Val F1: 0.5908
Epoch 7/50 - Train Loss: 0.6334 - Train F1: 0.6164 - Val Loss: 0.5845 - Val F1: 0.6763
Epoch 8/50 - Train Loss: 0.5986 - Train F1: 0.6598 - Val Loss: 0.5708 - Val F1: 0.6665
Epoch 9/50 - Train Loss: 0.5900 - Train F1: 0.6433 - Val Loss: 0.5662 - Val F1: 0.6876
Epoch 10/50 - Train Loss: 0.5613 - Train F1: 0.6833 - Val Loss: 0.5501 - Val F1: 0.7191
Epoch 11/50 - Train Loss: 0.5487 - Train F1: 0.7017 - Val Loss: 0.5361 - Val F1: 0.7054
Epoch 12/50 - Train Loss: 0.5321 - Train 

### 🔍 Hyperparameter Search and Model Selection (TCN)

In this section, we perform **hyperparameter tuning** for the `TCN_STFT_Classifier` model, which combines a Temporal Convolutional Network (TCN). The goal is to identify the best-performing configuration of this architecture on the validation set.

---

#### ⚙️ How it Works

1. We define a **grid of hyperparameters** to test, including:
   - `dropout` rates
   - TCN `hidden_channels` per layer
   - `kernel size`
   - `num levels`
2. Using `itertools.product`, we generate all possible **combinations** from this grid.
3. For each combination:
   - Instantiate the model with the current hyperparameters.
   - Train it for a fixed number of epochs (e.g., 10) on the training set.
   - Evaluate its performance using **F1-score** and **loss** on the validation set.
4. Track the best configuration based on **validation F1-score**.

---

This tuning process ensures that our TCN model, augmented with time-frequency features from STFT, is both **optimized** and **robust** to overfitting or underfitting. It complements the tuning process performed on our first model (`ECGNet`), allowing us to compare both architectures fairly under their best conditions.


In [13]:
from src.models.model_2 import TCN_STFT_Classifier


param_grid = {
    # Configuraciones donde len(hidden_channels) == num_levels
    
        'hidden_channels': [[64,128,128,128],[128,128,128,128]],
    'dropout': [0.1, 0.2, 0.3],
    'kernel_size': [3, 5],
    'num_levels': [3,4]
}


fixed = {
    "num_classes": 4,
    "n_fft": 256,
    "hop_length": 128,
    "kernel_size": 3,
    "learning_rate" : .001,
}

results = hyperparameter_search(
    TCN_STFT_Classifier,
    param_grid,
    fixed,
    device=device,
    epochs=5,
    train_loader=train_loader,
    val_loader=val_loader,
)


🔧 Training with config: {'num_classes': 4, 'n_fft': 256, 'hop_length': 128, 'kernel_size': 3, 'hidden_channels': [64, 128, 128], 'dropout': 0.1, 'num_levels': 3}
Learning rate: 0.001


  WeightNorm.apply(module, name, dim)


Epoch 1/5 | Train Loss: 0.8957 | Train F1: 0.3041 | Val Loss: 0.7980 | Val F1: 0.4038
Epoch 2/5 | Train Loss: 0.7564 | Train F1: 0.4473 | Val Loss: 0.7375 | Val F1: 0.4446
Epoch 3/5 | Train Loss: 0.6787 | Train F1: 0.5305 | Val Loss: 0.6653 | Val F1: 0.6193
Epoch 4/5 | Train Loss: 0.6359 | Train F1: 0.5815 | Val Loss: 0.6691 | Val F1: 0.5076
Epoch 5/5 | Train Loss: 0.6122 | Train F1: 0.5919 | Val Loss: 0.6083 | Val F1: 0.5734

🔧 Training with config: {'num_classes': 4, 'n_fft': 256, 'hop_length': 128, 'kernel_size': 3, 'hidden_channels': [64, 128, 128, 128], 'dropout': 0.1, 'num_levels': 4}
Learning rate: 0.001


  WeightNorm.apply(module, name, dim)


Epoch 1/5 | Train Loss: 0.9070 | Train F1: 0.2869 | Val Loss: 0.8204 | Val F1: 0.3681
Epoch 2/5 | Train Loss: 0.7834 | Train F1: 0.4224 | Val Loss: 0.7378 | Val F1: 0.4814
Epoch 3/5 | Train Loss: 0.7238 | Train F1: 0.4984 | Val Loss: 0.6812 | Val F1: 0.5096
Epoch 4/5 | Train Loss: 0.6698 | Train F1: 0.5475 | Val Loss: 0.6347 | Val F1: 0.4769
Epoch 5/5 | Train Loss: 0.6413 | Train F1: 0.5732 | Val Loss: 0.6350 | Val F1: 0.5744

🔧 Training with config: {'num_classes': 4, 'n_fft': 256, 'hop_length': 128, 'kernel_size': 5, 'hidden_channels': [64, 128, 128], 'dropout': 0.1, 'num_levels': 3}
Learning rate: 0.001
Epoch 1/5 | Train Loss: 0.9172 | Train F1: 0.2912 | Val Loss: 0.8757 | Val F1: 0.3684
Epoch 2/5 | Train Loss: 0.8034 | Train F1: 0.4009 | Val Loss: 0.7960 | Val F1: 0.4224
Epoch 3/5 | Train Loss: 0.7169 | Train F1: 0.4890 | Val Loss: 0.6728 | Val F1: 0.5141
Epoch 4/5 | Train Loss: 0.6523 | Train F1: 0.5715 | Val Loss: 0.6141 | Val F1: 0.6033
Epoch 5/5 | Train Loss: 0.6229 | Train F1:

### 🏁 Final Model Selection: (TCN)

After conducting an extensive grid search over multiple combinations of hyperparameters, we identified the **best-performing configuration** for the `(TCN)` model.

---

#### 🔧 Best Hyperparameters Found

- `kernel size = 3`
- `num levels = 3`
- `dropout = 128`
- `hidden channels = [64, 128, 128]`

These hyperparameters achieved the **highest F1-score** on the validation set among all tested configurations.

---

#### 🚀 Final Training Phase

Using this optimal setup, we now train the final version of the `TCN` model using **50 epochs** to allow the model to fully converge and leverage the learned configuration. This final model is expected to yield improved generalization performance and serve as a strong baseline for comparison against the first architecture (`CNN_LSTM`).

In [None]:
from src.models.model_2 import TCN_STFT_Classifier
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = TCN_STFT_Classifier(
    num_classes=4,
    hop_length = 128,
    n_fft = 256,
    kernel_size = 3, 
    hidden_channels=  [64, 128, 128],
    dropout = 0.2,
    num_levels = 3,
    device=device,
).to(device)

optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

criterion = torch.nn.CrossEntropyLoss()

trainer = Trainer(model, optimizer, criterion, augment_data=False, device=device)

history = trainer.fit(train_loader, val_loader, epochs=50)

train_loss, train_f1 = trainer.evaluate(train_loader)

val_loss, val_f1 = trainer.evaluate(val_loader)

cm, report = trainer.detailed_metrics(val_loader, class_names=["class_0", "class_1", "class_2", "class_3"])
print(report)

model.eval()  

all_preds = []

with torch.no_grad():
    for X_batch, lengths_batch in test_loader:
        X_batch = X_batch.to(device)
        lengths_batch = lengths_batch.to(device)
        
        outputs = model(X_batch, lengths_batch)
        preds = torch.argmax(outputs, dim=1)  
        all_preds.extend(preds.cpu().numpy())
        


# Guardar como base.csv (puedes cambiar el nombre a 'augment.csv' o 'reduced.csv')
df = pd.DataFrame({'predicted_label': all_preds})

df.to_csv('base.csv', index=False)

  WeightNorm.apply(module, name, dim)


Epoch 1/50 - Train Loss: 0.9455 - Train F1: 0.2654 - Val Loss: 0.7854 - Val F1: 0.3621
Epoch 2/50 - Train Loss: 0.7761 - Train F1: 0.4506 - Val Loss: 0.7217 - Val F1: 0.5176
Epoch 3/50 - Train Loss: 0.7156 - Train F1: 0.4853 - Val Loss: 0.6634 - Val F1: 0.5686
Epoch 4/50 - Train Loss: 0.6795 - Train F1: 0.5269 - Val Loss: 0.6553 - Val F1: 0.5142
Epoch 5/50 - Train Loss: 0.6405 - Train F1: 0.5714 - Val Loss: 0.6434 - Val F1: 0.4684
Epoch 6/50 - Train Loss: 0.6368 - Train F1: 0.5711 - Val Loss: 0.6879 - Val F1: 0.5538
Epoch 7/50 - Train Loss: 0.6140 - Train F1: 0.5873 - Val Loss: 0.6863 - Val F1: 0.4769
Epoch 8/50 - Train Loss: 0.5865 - Train F1: 0.6157 - Val Loss: 0.6351 - Val F1: 0.4949
Epoch 9/50 - Train Loss: 0.5819 - Train F1: 0.6471 - Val Loss: 0.5858 - Val F1: 0.6015
Epoch 10/50 - Train Loss: 0.5536 - Train F1: 0.6607 - Val Loss: 0.5457 - Val F1: 0.6634
Epoch 11/50 - Train Loss: 0.5548 - Train F1: 0.6483 - Val Loss: 0.5744 - Val F1: 0.5991
Epoch 12/50 - Train Loss: 0.5391 - Train 