In [1]:
import os
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import StratifiedGroupKFold
from tqdm.auto import tqdm 
import wandb
from torch.optim.lr_scheduler import CosineAnnealingLR
import sys
from pathlib import Path




sys.path.insert(0, os.path.abspath(os.path.join(os.getcwd(), "..", "..", "..")))
from src.utils.k_folds_creator import KFoldCreator
from src.utils.utils import get_models_save_path
from src.utils.constants import Constants 
from src.datasets.eeg_dataset import EEGDataset
from src.models.tcn import TCNModel
wandb.login()




2025-10-21 21:21:10,022 :: root :: INFO :: Initialising Utils
2025-10-21 21:21:10,024 :: root :: INFO :: Initialising Datasets
2025-10-21 21:21:10,049 :: root :: INFO :: Initialising Models
[34m[1mwandb[0m: Currently logged in as: [33mmaikotrede[0m ([33mhms-hslu-aicomp-hs25[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

In [2]:

class CFG:
    # Input Sequence Length :
    #   - Original Sampling Rate: 200 Hz
    #   - Downsample Factor: 2
    #   - Effective Sampling Rate: 200 Hz / 2 = 100 Hz
    #   - Clip Duration: 50 seconds
    #   - Required Input Length: 100 Hz * 50 s = 5000 timesteps

    # Receptive Field Calculation TCN:
    #   - The RF formula is: 1 + Sum[(kernel_size - 1) * dilation] for each layer.
    #   - With 8 layers, the dilations are: 1, 2, 4, 8, 16, 32, 64, 128.
    #   - Kernel Size: 21, so (kernel_size - 1) = 20.
    #   - RF = 1 + (20 * 1) + (20 * 2) + (20 * 4) + (20 * 8) + (20 * 16) + (20 * 32) + (20 * 64) + (20 * 128)
    #   - RF = 1 + 20 * (1 + 2 + 4 + 8 + 16 + 32 + 64 + 128)
    #   - RF = 1 + 20 * (255)
    #   - RF = 1 + 5100 = 5101 timesteps
    seed = 42
    n_splits = 5
    data_path = '../../../data/'

    model_name = 'TCN'
    num_tcn_channels = [64, 128, 128, 256, 256, 512, 512, 512]
    kernel_size = 21 
    dropout = 0.35
    target_size = 6


    original_sampling_rate = 200 # Hz
    sequence_duration = 50
    num_channels = 20 # Number of input EEG channels
    
    enable_downsampling = True
    downsample_factor = 3 # 2 would be optimal, but 3 is used to fit in memory. maybe if we can get a better GPU...


    batch_size = 32
    num_workers = 0
    epochs = 15
    lr = 1e-4

if CFG.enable_downsampling:
    CFG.sampling_rate = CFG.original_sampling_rate // CFG.downsample_factor
else:
    CFG.sampling_rate = CFG.original_sampling_rate

CFG.sequence_length = CFG.sequence_duration * CFG.sampling_rate

def set_seed(seed):
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)

set_seed(CFG.seed)
TARGETS = Constants.TARGETS



In [3]:


def get_dataloaders(df, fold_id):
    train_df = df[df['fold'] != fold_id].reset_index(drop=True)
    valid_df = df[df['fold'] == fold_id].reset_index(drop=True)

    downsample_factor = CFG.downsample_factor if CFG.enable_downsampling else 1

    train_dataset = EEGDataset(
        df=train_df, data_path=CFG.data_path, mode='train', 
        downsample_factor=downsample_factor
    )
    valid_dataset = EEGDataset(
        df=valid_df, data_path=CFG.data_path, mode='valid', 
        downsample_factor=downsample_factor
    )

    train_loader = DataLoader(
        train_dataset, batch_size=CFG.batch_size, shuffle=True,
        num_workers=CFG.num_workers, pin_memory=True, drop_last=True
    )
    
    valid_loader = DataLoader(
        valid_dataset, batch_size=CFG.batch_size, shuffle=False,
        num_workers=CFG.num_workers, pin_memory=True, drop_last=False
    )
    
    return train_loader, valid_loader


In [4]:
def run_training(df, DATA_PREPARATION_VOTE_METHOD):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print(f"Using device: {device}")
    
    all_oof_preds = []
    all_oof_labels = []

    for fold in range(CFG.n_splits):
        print(f"\n========== FOLD {fold} ==========")

        config = {
            "architecture": CFG.model_name, "tcn_channels": CFG.num_tcn_channels,
            "kernel_size": CFG.kernel_size, "dropout": CFG.dropout,
            "fold": fold, "features": "raw_eeg", "sequence_duration": f"{CFG.sequence_duration}s",
            "optimizer": "AdamW", "learning_rate": CFG.lr, "batch_size": CFG.batch_size,
            "epochs": CFG.epochs, "seed": CFG.seed, "Scheduler": "CosineAnnealingLR" 
        }

        wandb.init(
            project="hms-aicomp-tcn",
            name=f"tcn-raw-eeg-fold{fold}", 
            tags=['tcn-baseline', f'fold{fold}'],
            config=config
        )

        model = TCNModel(
            num_inputs=CFG.num_channels,
            num_outputs=CFG.target_size,
            channel_sizes=CFG.num_tcn_channels,
            kernel_size=CFG.kernel_size,
            dropout=CFG.dropout,
            causal=False,
            use_skip_connections=True
        )
        model.to(device)

        optimizer = torch.optim.AdamW(model.parameters(), lr=CFG.lr)
        scheduler = CosineAnnealingLR(optimizer, T_max=CFG.epochs)
        loss_fn = nn.KLDivLoss(reduction='batchmean')
        train_loader, valid_loader = get_dataloaders(df, fold)

        best_val_loss = float('inf')
        best_model_path = get_models_save_path() / "TCNModel" / DATA_PREPARATION_VOTE_METHOD / f'best_model_fold{fold}.pth'
        best_model_path.parent.mkdir(parents=True, exist_ok=True)
        
        for epoch in range(CFG.epochs):
            print(f"   --- Epoch {epoch+1}/{CFG.epochs} ---")
            
            model.train()
            train_loss = 0
            for signals, labels in tqdm(train_loader, desc="Training"):
                signals, labels = signals.to(device), labels.to(device)
                optimizer.zero_grad()
                outputs = model(signals)
                log_probs = F.log_softmax(outputs, dim=1)
                loss = loss_fn(log_probs, labels)
                loss.backward()
                torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
                optimizer.step()
                train_loss += loss.item() * signals.size(0)
            train_loss /= len(train_loader.dataset)

            model.eval()
            valid_loss = 0
            with torch.no_grad():
                for signals, labels in tqdm(valid_loader, desc="Validation"):
                    signals, labels = signals.to(device), labels.to(device)
                    outputs = model(signals)
                    log_probs = F.log_softmax(outputs, dim=1)
                    loss = loss_fn(log_probs, labels)
                    valid_loss += loss.item() * signals.size(0)
            valid_loss /= len(valid_loader.dataset)
            
            epoch_lr = optimizer.param_groups[0]['lr']
            print(f"   Epoch {epoch+1}: Train Loss = {train_loss:.4f}, Valid Loss = {valid_loss:.4f}, LR = {epoch_lr:.6f}")
            wandb.log({
                "epoch": epoch + 1, "train/epoch_loss": train_loss, "val/loss": valid_loss,
                "val/kl_div": valid_loss, "train/epoch_lr": epoch_lr
            })

            if valid_loss < best_val_loss:
                best_val_loss = valid_loss
                torch.save(model.state_dict(), best_model_path)
                print(f"   New best model saved with validation loss: {best_val_loss:.4f}")
            scheduler.step()

        print("   --- Generating OOF predictions for fold {fold} ---")
        if best_model_path.exists():
            model.load_state_dict(torch.load(best_model_path))
            model.eval()
            
            fold_oof_preds = []
            fold_oof_labels = []

            with torch.no_grad():
                for signals, labels in tqdm(valid_loader, desc=f"OOF Prediction Fold {fold}"):
                    signals = signals.to(device)
                    outputs = model(signals)
                    probs = F.softmax(outputs, dim=1).cpu()
                    
                    fold_oof_preds.append(probs)
                    fold_oof_labels.append(labels.cpu()) 
            
            all_oof_preds.append(torch.cat(fold_oof_preds).numpy())
            all_oof_labels.append(torch.cat(fold_oof_labels).numpy())
            
            print(f"   Finished OOF predictions for fold {fold}")
        else:
            print(f"   WARNING: No model file found at {best_model_path}. Skipping OOF for this fold.")

        wandb.summary['best_val_kl_div'] = best_val_loss
        artifact = wandb.Artifact(f'model-fold{fold}', type='model')
        artifact.add_file(best_model_path)
        wandb.log_artifact(artifact)
        wandb.finish()
        
    final_oof_preds = np.concatenate(all_oof_preds)
    final_oof_labels = np.concatenate(all_oof_labels)

    return final_oof_preds, final_oof_labels

In [None]:
if __name__ == '__main__':
    DATA_PREPARATION_VOTE_METHOD = "sum_and_normalize"

    print("Preparing data and creating folds...")
    df = pd.read_csv(CFG.data_path + 'processed_data_sum_votes_window.csv') 

    label_map = {t: i for i, t in enumerate(TARGETS)}
    df['expert_consensus'] = df[TARGETS].idxmax(axis=1)

    print('Train shape:', df.shape)
    print('Targets', list(TARGETS))

    fold_creator = KFoldCreator(n_splits=CFG.n_splits, seed=CFG.seed)
    df = fold_creator.create_folds(df, stratify_col='expert_consensus', group_col='patient_id')

    print("Folds created. Value counts per fold:")
    print(df['fold'].value_counts())

    oof_preds, oof_labels = run_training(df, DATA_PREPARATION_VOTE_METHOD)

    print("\nCalculating final OOF score...")
    oof_preds_tensor = torch.tensor(oof_preds, dtype=torch.float32)
    oof_labels_tensor = torch.tensor(oof_labels, dtype=torch.float32)

    log_oof_preds_tensor = torch.log(oof_preds_tensor)

    kl_loss_fn = nn.KLDivLoss(reduction='batchmean')
    overall_oof_score = kl_loss_fn(log_oof_preds_tensor, oof_labels_tensor).item()

    print(f"\nOverall OOF KL Score: {overall_oof_score:.4f}")

Preparing data and creating folds...
Train shape: (17089, 12)
Targets ['seizure_vote', 'lpd_vote', 'gpd_vote', 'lrda_vote', 'grda_vote', 'other_vote']
Folds created. Value counts per fold:
fold
0    3741
1    3703
2    3527
4    3081
3    3037
Name: count, dtype: int64
Using device: cuda



   --- Epoch 1/15 ---


Training:   0%|          | 0/417 [00:00<?, ?it/s]

Validation:   0%|          | 0/117 [00:00<?, ?it/s]

   Epoch 1: Train Loss = 1.2292, Valid Loss = 1.2785, LR = 0.000100
   New best model saved with validation loss: 1.2785
   --- Epoch 2/15 ---


Training:   0%|          | 0/417 [00:00<?, ?it/s]

Validation:   0%|          | 0/117 [00:00<?, ?it/s]

   Epoch 2: Train Loss = 1.0880, Valid Loss = 1.1291, LR = 0.000099
   New best model saved with validation loss: 1.1291
   --- Epoch 3/15 ---


Training:   0%|          | 0/417 [00:00<?, ?it/s]

Validation:   0%|          | 0/117 [00:00<?, ?it/s]

   Epoch 3: Train Loss = 0.9278, Valid Loss = 0.9857, LR = 0.000096
   New best model saved with validation loss: 0.9857
   --- Epoch 4/15 ---


Training:   0%|          | 0/417 [00:00<?, ?it/s]

Validation:   0%|          | 0/117 [00:00<?, ?it/s]

   Epoch 4: Train Loss = 0.8243, Valid Loss = 0.8823, LR = 0.000090
   New best model saved with validation loss: 0.8823
   --- Epoch 5/15 ---


Training:   0%|          | 0/417 [00:00<?, ?it/s]

Validation:   0%|          | 0/117 [00:00<?, ?it/s]

   Epoch 5: Train Loss = 0.7603, Valid Loss = 0.8838, LR = 0.000083
   --- Epoch 6/15 ---


Training:   0%|          | 0/417 [00:00<?, ?it/s]

Validation:   0%|          | 0/117 [00:00<?, ?it/s]

   Epoch 6: Train Loss = 0.7061, Valid Loss = 0.8719, LR = 0.000075
   New best model saved with validation loss: 0.8719
   --- Epoch 7/15 ---


Training:   0%|          | 0/417 [00:00<?, ?it/s]

Validation:   0%|          | 0/117 [00:00<?, ?it/s]

   Epoch 7: Train Loss = 0.6492, Valid Loss = 0.8375, LR = 0.000065
   New best model saved with validation loss: 0.8375
   --- Epoch 8/15 ---


Training:   0%|          | 0/417 [00:00<?, ?it/s]

Validation:   0%|          | 0/117 [00:00<?, ?it/s]

   Epoch 8: Train Loss = 0.5970, Valid Loss = 0.8693, LR = 0.000055
   --- Epoch 9/15 ---


Training:   0%|          | 0/417 [00:00<?, ?it/s]

Validation:   0%|          | 0/117 [00:00<?, ?it/s]

   Epoch 9: Train Loss = 0.5311, Valid Loss = 0.8896, LR = 0.000045
   --- Epoch 10/15 ---


Training:   0%|          | 0/417 [00:00<?, ?it/s]

Validation:   0%|          | 0/117 [00:00<?, ?it/s]

   Epoch 10: Train Loss = 0.4771, Valid Loss = 0.9112, LR = 0.000035
   --- Epoch 11/15 ---


Training:   0%|          | 0/417 [00:00<?, ?it/s]

Validation:   0%|          | 0/117 [00:00<?, ?it/s]

   Epoch 11: Train Loss = 0.4231, Valid Loss = 0.8901, LR = 0.000025
   --- Epoch 12/15 ---


Training:   0%|          | 0/417 [00:00<?, ?it/s]

Validation:   0%|          | 0/117 [00:00<?, ?it/s]

   Epoch 12: Train Loss = 0.3798, Valid Loss = 0.9244, LR = 0.000017
   --- Epoch 13/15 ---


Training:   0%|          | 0/417 [00:00<?, ?it/s]

Validation:   0%|          | 0/117 [00:00<?, ?it/s]

   Epoch 13: Train Loss = 0.3467, Valid Loss = 0.9297, LR = 0.000010
   --- Epoch 14/15 ---


Training:   0%|          | 0/417 [00:00<?, ?it/s]

Validation:   0%|          | 0/117 [00:00<?, ?it/s]

   Epoch 14: Train Loss = 0.3275, Valid Loss = 0.9644, LR = 0.000004
   --- Epoch 15/15 ---


Training:   0%|          | 0/417 [00:00<?, ?it/s]

Validation:   0%|          | 0/117 [00:00<?, ?it/s]

   Epoch 15: Train Loss = 0.3160, Valid Loss = 0.9491, LR = 0.000001
   --- Generating OOF predictions for fold {fold} ---


OOF Prediction Fold 0:   0%|          | 0/117 [00:00<?, ?it/s]

   Finished OOF predictions for fold 0


0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
train/epoch_loss,█▇▆▅▄▄▄▃▃▂▂▁▁▁▁
train/epoch_lr,███▇▇▆▆▅▄▃▃▂▂▁▁
val/kl_div,█▆▃▂▂▂▁▂▂▂▂▂▂▃▃
val/loss,█▆▃▂▂▂▁▂▂▂▂▂▂▃▃

0,1
best_val_kl_div,0.83752
epoch,15.0
train/epoch_loss,0.31596
train/epoch_lr,0.0
val/kl_div,0.94905
val/loss,0.94905





   --- Epoch 1/15 ---


Training:   0%|          | 0/418 [00:00<?, ?it/s]

Validation:   0%|          | 0/116 [00:00<?, ?it/s]

   Epoch 1: Train Loss = 1.2293, Valid Loss = 1.2170, LR = 0.000100
   New best model saved with validation loss: 1.2170
   --- Epoch 2/15 ---


Training:   0%|          | 0/418 [00:00<?, ?it/s]

Validation:   0%|          | 0/116 [00:00<?, ?it/s]

   Epoch 2: Train Loss = 1.0564, Valid Loss = 1.0640, LR = 0.000099
   New best model saved with validation loss: 1.0640
   --- Epoch 3/15 ---


Training:   0%|          | 0/418 [00:00<?, ?it/s]

Validation:   0%|          | 0/116 [00:00<?, ?it/s]

   Epoch 3: Train Loss = 0.8867, Valid Loss = 1.0158, LR = 0.000096
   New best model saved with validation loss: 1.0158
   --- Epoch 4/15 ---


Training:   0%|          | 0/418 [00:00<?, ?it/s]

Validation:   0%|          | 0/116 [00:00<?, ?it/s]

   Epoch 4: Train Loss = 0.7943, Valid Loss = 0.8965, LR = 0.000090
   New best model saved with validation loss: 0.8965
   --- Epoch 5/15 ---


Training:   0%|          | 0/418 [00:00<?, ?it/s]

Validation:   0%|          | 0/116 [00:00<?, ?it/s]

   Epoch 5: Train Loss = 0.7316, Valid Loss = 0.8743, LR = 0.000083
   New best model saved with validation loss: 0.8743
   --- Epoch 6/15 ---


Training:   0%|          | 0/418 [00:00<?, ?it/s]

Validation:   0%|          | 0/116 [00:00<?, ?it/s]

   Epoch 6: Train Loss = 0.6815, Valid Loss = 0.9081, LR = 0.000075
   --- Epoch 7/15 ---


Training:   0%|          | 0/418 [00:00<?, ?it/s]

Validation:   0%|          | 0/116 [00:00<?, ?it/s]

   Epoch 7: Train Loss = 0.6191, Valid Loss = 0.8572, LR = 0.000065
   New best model saved with validation loss: 0.8572
   --- Epoch 8/15 ---


Training:   0%|          | 0/418 [00:00<?, ?it/s]

Validation:   0%|          | 0/116 [00:00<?, ?it/s]

   Epoch 8: Train Loss = 0.5555, Valid Loss = 0.8685, LR = 0.000055
   --- Epoch 9/15 ---


Training:   0%|          | 0/418 [00:00<?, ?it/s]

Validation:   0%|          | 0/116 [00:00<?, ?it/s]

   Epoch 9: Train Loss = 0.4896, Valid Loss = 0.8758, LR = 0.000045
   --- Epoch 10/15 ---


Training:   0%|          | 0/418 [00:00<?, ?it/s]

Validation:   0%|          | 0/116 [00:00<?, ?it/s]

   Epoch 10: Train Loss = 0.4293, Valid Loss = 0.8782, LR = 0.000035
   --- Epoch 11/15 ---


Training:   0%|          | 0/418 [00:00<?, ?it/s]

Validation:   0%|          | 0/116 [00:00<?, ?it/s]

   Epoch 11: Train Loss = 0.3760, Valid Loss = 0.9049, LR = 0.000025
   --- Epoch 12/15 ---


Training:   0%|          | 0/418 [00:00<?, ?it/s]

Validation:   0%|          | 0/116 [00:00<?, ?it/s]

   Epoch 12: Train Loss = 0.3284, Valid Loss = 1.0107, LR = 0.000017
   --- Epoch 13/15 ---


Training:   0%|          | 0/418 [00:00<?, ?it/s]

Validation:   0%|          | 0/116 [00:00<?, ?it/s]

   Epoch 13: Train Loss = 0.2972, Valid Loss = 0.9450, LR = 0.000010
   --- Epoch 14/15 ---


Training:   0%|          | 0/418 [00:00<?, ?it/s]

Validation:   0%|          | 0/116 [00:00<?, ?it/s]

   Epoch 14: Train Loss = 0.2754, Valid Loss = 0.9515, LR = 0.000004
   --- Epoch 15/15 ---


Training:   0%|          | 0/418 [00:00<?, ?it/s]

Validation:   0%|          | 0/116 [00:00<?, ?it/s]

   Epoch 15: Train Loss = 0.2670, Valid Loss = 0.9739, LR = 0.000001
   --- Generating OOF predictions for fold {fold} ---


OOF Prediction Fold 1:   0%|          | 0/116 [00:00<?, ?it/s]

   Finished OOF predictions for fold 1


0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
train/epoch_loss,█▇▆▅▄▄▄▃▃▂▂▁▁▁▁
train/epoch_lr,███▇▇▆▆▅▄▃▃▂▂▁▁
val/kl_div,█▅▄▂▁▂▁▁▁▁▂▄▃▃▃
val/loss,█▅▄▂▁▂▁▁▁▁▂▄▃▃▃

0,1
best_val_kl_div,0.85724
epoch,15.0
train/epoch_loss,0.26699
train/epoch_lr,0.0
val/kl_div,0.97392
val/loss,0.97392





   --- Epoch 1/15 ---


Training:   0%|          | 0/423 [00:00<?, ?it/s]

Validation:   0%|          | 0/111 [00:00<?, ?it/s]

   Epoch 1: Train Loss = 1.2680, Valid Loss = 1.2180, LR = 0.000100
   New best model saved with validation loss: 1.2180
   --- Epoch 2/15 ---


Training:   0%|          | 0/423 [00:00<?, ?it/s]

Validation:   0%|          | 0/111 [00:00<?, ?it/s]

   Epoch 2: Train Loss = 1.1066, Valid Loss = 1.0233, LR = 0.000099
   New best model saved with validation loss: 1.0233
   --- Epoch 3/15 ---


Training:   0%|          | 0/423 [00:00<?, ?it/s]

Validation:   0%|          | 0/111 [00:00<?, ?it/s]

   Epoch 3: Train Loss = 0.8885, Valid Loss = 0.9770, LR = 0.000096
   New best model saved with validation loss: 0.9770
   --- Epoch 4/15 ---


Training:   0%|          | 0/423 [00:00<?, ?it/s]

Validation:   0%|          | 0/111 [00:00<?, ?it/s]

   Epoch 4: Train Loss = 0.7986, Valid Loss = 0.8690, LR = 0.000090
   New best model saved with validation loss: 0.8690
   --- Epoch 5/15 ---


Training:   0%|          | 0/423 [00:00<?, ?it/s]

Validation:   0%|          | 0/111 [00:00<?, ?it/s]

   Epoch 5: Train Loss = 0.7309, Valid Loss = 0.9065, LR = 0.000083
   --- Epoch 6/15 ---


Training:   0%|          | 0/423 [00:00<?, ?it/s]

Validation:   0%|          | 0/111 [00:00<?, ?it/s]

   Epoch 6: Train Loss = 0.6642, Valid Loss = 0.8560, LR = 0.000075
   New best model saved with validation loss: 0.8560
   --- Epoch 7/15 ---


Training:   0%|          | 0/423 [00:00<?, ?it/s]

Validation:   0%|          | 0/111 [00:00<?, ?it/s]

   Epoch 7: Train Loss = 0.5999, Valid Loss = 0.8660, LR = 0.000065
   --- Epoch 8/15 ---


Training:   0%|          | 0/423 [00:00<?, ?it/s]

Validation:   0%|          | 0/111 [00:00<?, ?it/s]

   Epoch 8: Train Loss = 0.5357, Valid Loss = 0.8936, LR = 0.000055
   --- Epoch 9/15 ---


Training:   0%|          | 0/423 [00:00<?, ?it/s]

Validation:   0%|          | 0/111 [00:00<?, ?it/s]

   Epoch 9: Train Loss = 0.4632, Valid Loss = 0.8776, LR = 0.000045
   --- Epoch 10/15 ---


Training:   0%|          | 0/423 [00:00<?, ?it/s]

Validation:   0%|          | 0/111 [00:00<?, ?it/s]

   Epoch 10: Train Loss = 0.4027, Valid Loss = 0.9013, LR = 0.000035
   --- Epoch 11/15 ---


Training:   0%|          | 0/423 [00:00<?, ?it/s]

Validation:   0%|          | 0/111 [00:00<?, ?it/s]

   Epoch 11: Train Loss = 0.3531, Valid Loss = 0.8982, LR = 0.000025
   --- Epoch 12/15 ---


Training:   0%|          | 0/423 [00:00<?, ?it/s]

Validation:   0%|          | 0/111 [00:00<?, ?it/s]

   Epoch 12: Train Loss = 0.3090, Valid Loss = 0.9328, LR = 0.000017
   --- Epoch 13/15 ---


Training:   0%|          | 0/423 [00:00<?, ?it/s]

Validation:   0%|          | 0/111 [00:00<?, ?it/s]

   Epoch 13: Train Loss = 0.2810, Valid Loss = 0.9628, LR = 0.000010
   --- Epoch 14/15 ---


Training:   0%|          | 0/423 [00:00<?, ?it/s]

Validation:   0%|          | 0/111 [00:00<?, ?it/s]

   Epoch 14: Train Loss = 0.2640, Valid Loss = 0.9698, LR = 0.000004
   --- Epoch 15/15 ---


Training:   0%|          | 0/423 [00:00<?, ?it/s]

Validation:   0%|          | 0/111 [00:00<?, ?it/s]

   Epoch 15: Train Loss = 0.2536, Valid Loss = 0.9530, LR = 0.000001
   --- Generating OOF predictions for fold {fold} ---


OOF Prediction Fold 2:   0%|          | 0/111 [00:00<?, ?it/s]

   Finished OOF predictions for fold 2


0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
train/epoch_loss,█▇▅▅▄▄▃▃▂▂▂▁▁▁▁
train/epoch_lr,███▇▇▆▆▅▄▃▃▂▂▁▁
val/kl_div,█▄▃▁▂▁▁▂▁▂▂▂▃▃▃
val/loss,█▄▃▁▂▁▁▂▁▂▂▂▃▃▃

0,1
best_val_kl_div,0.85602
epoch,15.0
train/epoch_loss,0.25364
train/epoch_lr,0.0
val/kl_div,0.95304
val/loss,0.95304





   --- Epoch 1/15 ---


Training:   0%|          | 0/439 [00:00<?, ?it/s]

Validation:   0%|          | 0/95 [00:00<?, ?it/s]

   Epoch 1: Train Loss = 1.2230, Valid Loss = 1.3005, LR = 0.000100
   New best model saved with validation loss: 1.3005
   --- Epoch 2/15 ---


Training:   0%|          | 0/439 [00:00<?, ?it/s]

Validation:   0%|          | 0/95 [00:00<?, ?it/s]

   Epoch 2: Train Loss = 1.0301, Valid Loss = 1.0525, LR = 0.000099
   New best model saved with validation loss: 1.0525
   --- Epoch 3/15 ---


Training:   0%|          | 0/439 [00:00<?, ?it/s]

Validation:   0%|          | 0/95 [00:00<?, ?it/s]

   Epoch 3: Train Loss = 0.8631, Valid Loss = 0.9498, LR = 0.000096
   New best model saved with validation loss: 0.9498
   --- Epoch 4/15 ---


Training:   0%|          | 0/439 [00:00<?, ?it/s]

Validation:   0%|          | 0/95 [00:00<?, ?it/s]

   Epoch 4: Train Loss = 0.7699, Valid Loss = 0.9050, LR = 0.000090
   New best model saved with validation loss: 0.9050
   --- Epoch 5/15 ---


Training:   0%|          | 0/439 [00:00<?, ?it/s]

Validation:   0%|          | 0/95 [00:00<?, ?it/s]

   Epoch 5: Train Loss = 0.7021, Valid Loss = 0.8906, LR = 0.000083
   New best model saved with validation loss: 0.8906
   --- Epoch 6/15 ---


Training:   0%|          | 0/439 [00:00<?, ?it/s]

Validation:   0%|          | 0/95 [00:00<?, ?it/s]

   Epoch 6: Train Loss = 0.6438, Valid Loss = 0.8789, LR = 0.000075
   New best model saved with validation loss: 0.8789
   --- Epoch 7/15 ---


Training:   0%|          | 0/439 [00:00<?, ?it/s]

Validation:   0%|          | 0/95 [00:00<?, ?it/s]

   Epoch 7: Train Loss = 0.5768, Valid Loss = 0.9529, LR = 0.000065
   --- Epoch 8/15 ---


Training:   0%|          | 0/439 [00:00<?, ?it/s]

Validation:   0%|          | 0/95 [00:00<?, ?it/s]

   Epoch 8: Train Loss = 0.5103, Valid Loss = 0.9965, LR = 0.000055
   --- Epoch 9/15 ---


Training:   0%|          | 0/439 [00:00<?, ?it/s]

Validation:   0%|          | 0/95 [00:00<?, ?it/s]

   Epoch 9: Train Loss = 0.4427, Valid Loss = 0.9444, LR = 0.000045
   --- Epoch 10/15 ---


Training:   0%|          | 0/439 [00:00<?, ?it/s]

Validation:   0%|          | 0/95 [00:00<?, ?it/s]

   Epoch 10: Train Loss = 0.3820, Valid Loss = 0.9476, LR = 0.000035
   --- Epoch 11/15 ---


Training:   0%|          | 0/439 [00:00<?, ?it/s]

Validation:   0%|          | 0/95 [00:00<?, ?it/s]

   Epoch 11: Train Loss = 0.3260, Valid Loss = 1.0258, LR = 0.000025
   --- Epoch 12/15 ---


Training:   0%|          | 0/439 [00:00<?, ?it/s]

Validation:   0%|          | 0/95 [00:00<?, ?it/s]

   Epoch 12: Train Loss = 0.2834, Valid Loss = 1.0319, LR = 0.000017
   --- Epoch 13/15 ---


Training:   0%|          | 0/439 [00:00<?, ?it/s]

Validation:   0%|          | 0/95 [00:00<?, ?it/s]

   Epoch 13: Train Loss = 0.2542, Valid Loss = 1.0275, LR = 0.000010
   --- Epoch 14/15 ---


Training:   0%|          | 0/439 [00:00<?, ?it/s]

Validation:   0%|          | 0/95 [00:00<?, ?it/s]

   Epoch 14: Train Loss = 0.2375, Valid Loss = 1.0656, LR = 0.000004
   --- Epoch 15/15 ---


Training:   0%|          | 0/439 [00:00<?, ?it/s]

Validation:   0%|          | 0/95 [00:00<?, ?it/s]

   Epoch 15: Train Loss = 0.2276, Valid Loss = 1.0342, LR = 0.000001
   --- Generating OOF predictions for fold {fold} ---


OOF Prediction Fold 3:   0%|          | 0/95 [00:00<?, ?it/s]

   Finished OOF predictions for fold 3


0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
train/epoch_loss,█▇▅▅▄▄▃▃▃▂▂▁▁▁▁
train/epoch_lr,███▇▇▆▆▅▄▃▃▂▂▁▁
val/kl_div,█▄▂▁▁▁▂▃▂▂▃▄▃▄▄
val/loss,█▄▂▁▁▁▂▃▂▂▃▄▃▄▄

0,1
best_val_kl_div,0.87892
epoch,15.0
train/epoch_loss,0.22756
train/epoch_lr,0.0
val/kl_div,1.03421
val/loss,1.03421





   --- Epoch 1/15 ---


Training:   0%|          | 0/437 [00:00<?, ?it/s]

Validation:   0%|          | 0/97 [00:00<?, ?it/s]

   Epoch 1: Train Loss = 1.2358, Valid Loss = 1.2410, LR = 0.000100
   New best model saved with validation loss: 1.2410
   --- Epoch 2/15 ---


Training:   0%|          | 0/437 [00:00<?, ?it/s]

Validation:   0%|          | 0/97 [00:00<?, ?it/s]

   Epoch 2: Train Loss = 1.0328, Valid Loss = 1.0655, LR = 0.000099
   New best model saved with validation loss: 1.0655
   --- Epoch 3/15 ---


Training:   0%|          | 0/437 [00:00<?, ?it/s]

Validation:   0%|          | 0/97 [00:00<?, ?it/s]

   Epoch 3: Train Loss = 0.8499, Valid Loss = 1.0598, LR = 0.000096
   New best model saved with validation loss: 1.0598
   --- Epoch 4/15 ---


Training:   0%|          | 0/437 [00:00<?, ?it/s]

Validation:   0%|          | 0/97 [00:00<?, ?it/s]

   Epoch 4: Train Loss = 0.7650, Valid Loss = 1.0050, LR = 0.000090
   New best model saved with validation loss: 1.0050
   --- Epoch 5/15 ---


Training:   0%|          | 0/437 [00:00<?, ?it/s]

Validation:   0%|          | 0/97 [00:00<?, ?it/s]

   Epoch 5: Train Loss = 0.7028, Valid Loss = 0.9831, LR = 0.000083
   New best model saved with validation loss: 0.9831
   --- Epoch 6/15 ---


Training:   0%|          | 0/437 [00:00<?, ?it/s]

Validation:   0%|          | 0/97 [00:00<?, ?it/s]

   Epoch 6: Train Loss = 0.6475, Valid Loss = 0.9696, LR = 0.000075
   New best model saved with validation loss: 0.9696
   --- Epoch 7/15 ---


Training:   0%|          | 0/437 [00:00<?, ?it/s]

Validation:   0%|          | 0/97 [00:00<?, ?it/s]

   Epoch 7: Train Loss = 0.5958, Valid Loss = 0.9528, LR = 0.000065
   New best model saved with validation loss: 0.9528
   --- Epoch 8/15 ---


Training:   0%|          | 0/437 [00:00<?, ?it/s]

Validation:   0%|          | 0/97 [00:00<?, ?it/s]

   Epoch 8: Train Loss = 0.5242, Valid Loss = 0.9923, LR = 0.000055
   --- Epoch 9/15 ---


Training:   0%|          | 0/437 [00:00<?, ?it/s]

Validation:   0%|          | 0/97 [00:00<?, ?it/s]

   Epoch 9: Train Loss = 0.4607, Valid Loss = 1.0141, LR = 0.000045
   --- Epoch 10/15 ---


Training:   0%|          | 0/437 [00:00<?, ?it/s]

Validation:   0%|          | 0/97 [00:00<?, ?it/s]

   Epoch 10: Train Loss = 0.4014, Valid Loss = 1.0387, LR = 0.000035
   --- Epoch 11/15 ---


Training:   0%|          | 0/437 [00:00<?, ?it/s]

Validation:   0%|          | 0/97 [00:00<?, ?it/s]

   Epoch 11: Train Loss = 0.3477, Valid Loss = 1.0572, LR = 0.000025
   --- Epoch 12/15 ---


Training:   0%|          | 0/437 [00:00<?, ?it/s]

Validation:   0%|          | 0/97 [00:00<?, ?it/s]

   Epoch 12: Train Loss = 0.3043, Valid Loss = 1.1388, LR = 0.000017
   --- Epoch 13/15 ---


Training:   0%|          | 0/437 [00:00<?, ?it/s]

Validation:   0%|          | 0/97 [00:00<?, ?it/s]

   Epoch 13: Train Loss = 0.2725, Valid Loss = 1.1474, LR = 0.000010
   --- Epoch 14/15 ---


Training:   0%|          | 0/437 [00:00<?, ?it/s]

Validation:   0%|          | 0/97 [00:00<?, ?it/s]

   Epoch 14: Train Loss = 0.2586, Valid Loss = 1.1364, LR = 0.000004
   --- Epoch 15/15 ---


Training:   0%|          | 0/437 [00:00<?, ?it/s]

Validation:   0%|          | 0/97 [00:00<?, ?it/s]

   Epoch 15: Train Loss = 0.2500, Valid Loss = 1.1380, LR = 0.000001
   --- Generating OOF predictions for fold {fold} ---


OOF Prediction Fold 4:   0%|          | 0/97 [00:00<?, ?it/s]

   Finished OOF predictions for fold 4


0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
train/epoch_loss,█▇▅▅▄▄▃▃▂▂▂▁▁▁▁
train/epoch_lr,███▇▇▆▆▅▄▃▃▂▂▁▁
val/kl_div,█▄▄▂▂▁▁▂▂▃▄▆▆▅▅
val/loss,█▄▄▂▂▁▁▂▂▃▄▆▆▅▅

0,1
best_val_kl_div,0.95284
epoch,15.0
train/epoch_loss,0.24998
train/epoch_lr,0.0
val/kl_div,1.13796
val/loss,1.13796



Calculating final OOF score...

Overall OOF KL Score (comparable to XGBoost): 0.8738


In [6]:
print(f"\nOverall OOF KL Score: {overall_oof_score:.4f}")


Overall OOF KL Score: 0.8738
