In [None]:
import os
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from tqdm import tqdm 
import wandb
from torch.optim.lr_scheduler import CosineAnnealingLR
import sys
from torch.amp import autocast, GradScaler

sys.path.insert(0, os.path.abspath(os.path.join(os.getcwd(), "..", "..", "..")))
from src.utils.k_folds_creator import KFoldCreator
from src.utils.utils import get_models_save_path, set_seeds
from src.utils.constants import Constants 
from src.datasets.eeg_dataset_montage import EEGDatasetMontage
from src.models.gru_convolution_attention import NodeAttentionModel

wandb.login()

2025-11-29 09:24:28,519 :: root :: INFO :: Initialising Utils
2025-11-29 09:24:28,905 :: root :: INFO :: Initialising Datasets
2025-11-29 09:24:28,913 :: root :: INFO :: Initialising Models


Skipping module cbramod_dataset due to missing dependency: No module named 'mne'
Skipping module cbramod_model due to missing dependency: No module named 'einops'


[34m[1mwandb[0m: Currently logged in as: [33mmaikotrede[0m ([33mhms-hslu-aicomp-hs25[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

In [None]:
class CFG:
    seed = 42
    n_splits = 5
    data_path = '../../../data/'

    model_name = 'GRUConvNodeAttentionModel'
    hidden_units = 256
    num_layers = 1
    target_size = 6 
    
    num_cnn_blocks = 4 
    
    sampling_rate = 200 # Hz
    sequence_duration = 50 
    downsample_factor = 1
    
    num_channels = 19
    
    dropout = 0.4
    batch_size = 32
    num_workers = 8
    epochs = 50
    lr = 1e-4
    patience = 10
    min_delta = 0.001
    
    use_attention = True

    use_mixup = True
    mixup_alpha = 0.5      # beta distribution parameter (1.0 = uniform distribution between 0 and 1)
    
    

CFG.sequence_length = CFG.sequence_duration * CFG.sampling_rate 

set_seeds(CFG.seed)

TARGETS = Constants.TARGETS

In [3]:
def mixup_data(x, y, alpha=1.0, device='cuda'):
    if alpha > 0:
        lam = np.random.beta(alpha, alpha)
    else:
        lam = 1

    batch_size = x.size(0)
    index = torch.randperm(batch_size).to(device)

    mixed_x = lam * x + (1 - lam) * x[index, :]

    mixed_y = lam * y + (1 - lam) * y[index, :]
    
    return mixed_x, mixed_y

In [4]:
def get_dataloaders(df, fold_id):
    train_df = df[df['fold'] != fold_id].reset_index(drop=True)
    valid_df = df[df['fold'] == fold_id].reset_index(drop=True)

    train_dataset = EEGDatasetMontage(df=train_df, data_path=CFG.data_path, mode='train', downsample_factor=CFG.downsample_factor, augmentations=["channel_mask"])

    valid_dataset = EEGDatasetMontage(df=valid_df, data_path=CFG.data_path, mode='valid', downsample_factor=CFG.downsample_factor)

    train_loader = DataLoader(
        train_dataset, batch_size=CFG.batch_size, shuffle=True,
        num_workers=CFG.num_workers, pin_memory=True, drop_last=True, persistent_workers=True if CFG.num_workers > 0 else False
    )
    
    valid_loader = DataLoader(
        valid_dataset, batch_size=CFG.batch_size, shuffle=False,
        num_workers=CFG.num_workers, pin_memory=True, drop_last=False, persistent_workers=True if CFG.num_workers > 0 else False
    )
    
    return train_loader, valid_loader

In [5]:
def run_training(df, DATA_PREPARATION_VOTE_METHOD):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print(f"Using device: {device}")

    autocast_enabled = (device.type == 'cuda')
    
    all_oof = []
    all_true = []
    
    fold_scores = []

    for fold in range(CFG.n_splits):
        print(f"\n========== FOLD {fold} ==========")

        experiment_group = f"montages_block_{CFG.num_cnn_blocks}_attention_{CFG.use_attention}_attentionpool4headsMP_RESNET_AUG(MU+CM))"
        experiment_name = f"{experiment_group}_fold{fold}"

        config = {
            "architecture": CFG.model_name, "hidden_units": CFG.hidden_units, 
            "num_layers": CFG.num_layers,
            "fold": fold, "features": "raw_eeg", 
            "sequence_duration": f"{CFG.sequence_duration}s",
            "optimizer": "AdamW", "learning_rate": CFG.lr, 
            "batch_size": CFG.batch_size,
            "epochs": CFG.epochs, "seed": CFG.seed, 
            "Scheduler": "CosineAnnealingLR",
            
            "num_cnn_blocks": CFG.num_cnn_blocks 
        }

        wandb.init(
            project="hms-aicomp-gru-conv",
            name=experiment_name,  
            group=experiment_group,
            tags=['gru-conv', f'fold{fold}', f'montages_block_{CFG.num_cnn_blocks}_attention_{CFG.use_attention}'],
            config=config
        )

        model = NodeAttentionModel(
            num_nodes=CFG.num_channels,       
            node_embed_size=256,              
            hidden_size=CFG.hidden_units,    
            num_layers=CFG.num_layers,       
            num_classes=CFG.target_size,     
            num_cnn_blocks=CFG.num_cnn_blocks,
            dropout=CFG.dropout,
            use_inception=True              
        )
        model.to(device)

        optimizer = torch.optim.AdamW(model.parameters(), lr=CFG.lr, weight_decay=1e-2)
        scheduler = CosineAnnealingLR(optimizer, T_max=CFG.epochs)
        loss_fn = nn.KLDivLoss(reduction='batchmean')
        train_loader, valid_loader = get_dataloaders(df, fold)

        scaler = GradScaler(enabled=autocast_enabled)

        best_val_loss = float('inf')
        best_model_path = None
        patience_counter = 0
        
        for epoch in range(CFG.epochs):
            print(f"   --- Epoch {epoch+1}/{CFG.epochs} ---")
            
            model.train()
            train_loss = 0
            for signals, labels in tqdm(train_loader, desc="Training"):
                signals, labels = signals.to(device), labels.to(device)
                if CFG.use_mixup:
                    signals, labels = mixup_data(signals, labels, alpha=CFG.mixup_alpha, device=device)
                optimizer.zero_grad()

                with autocast(enabled=autocast_enabled, device_type=device.type):
                    outputs = model(signals)
                    log_probs = F.log_softmax(outputs, dim=1)
                    loss = loss_fn(log_probs, labels)

                scaler.scale(loss).backward()

                scaler.unscale_(optimizer)

                torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)

                scaler.step(optimizer)
                scaler.update()

                train_loss += loss.item() * signals.size(0)
                wandb.log({"train/loss": loss.item()})
            train_loss /= len(train_loader.dataset)

            model.eval()
            valid_loss = 0
            with torch.no_grad():
                for i, (signals, labels) in enumerate(tqdm(valid_loader, desc="Validation")):
                    signals, labels = signals.to(device), labels.to(device)
                    
                    with autocast(enabled=autocast_enabled, device_type=device.type):
                        outputs = model(signals)
                        log_probs = F.log_softmax(outputs, dim=1)
                        loss = loss_fn(log_probs, labels)
                        
                    valid_loss += loss.item() * signals.size(0)

            valid_loss /= len(valid_loader.dataset)
            
            epoch_lr = optimizer.param_groups[0]['lr']
            print(f"   Epoch {epoch+1}: Train Loss = {train_loss:.4f}, Valid Loss = {valid_loss:.4f}, LR = {epoch_lr:.6f}")
            wandb.log({
                "epoch": epoch + 1, "train/epoch_loss": train_loss, "val/loss": valid_loss,
                "val/kl_div": valid_loss, "train/epoch_lr": epoch_lr
            })

            if valid_loss < best_val_loss - CFG.min_delta:
                best_val_loss = valid_loss
                patience_counter = 0 
                
                best_model_path = get_models_save_path() / "GRUConvNodeAttentionModel" / DATA_PREPARATION_VOTE_METHOD / f'best_model_fold{fold}.pth'
                best_model_path.parent.mkdir(parents=True, exist_ok=True)
                torch.save(model.state_dict(), best_model_path)
                print(f"   New best model saved with validation loss: {best_val_loss:.4f}")

            else:
                patience_counter += 1
                print(f"   No improvement. Patience: {patience_counter}/{CFG.patience}")

            if patience_counter >= CFG.patience:
                print(f"   Early stopping triggered after {patience_counter} epochs without improvement.")
                wandb.log({"early_stopped_epoch": epoch + 1})
                break 
            scheduler.step()
        
        fold_scores.append(best_val_loss)
        wandb.summary['best_val_kl_div'] = best_val_loss

        if best_model_path:
            print(f"Loading best model from fold {fold} to generate OOF predictions...")
            model.load_state_dict(torch.load(best_model_path))
            model.eval()

            fold_oof_preds = []
            fold_oof_labels = []
            with torch.no_grad():
                for signals, labels in tqdm(valid_loader, desc=f"Generating OOF for Fold {fold}"):
                    signals = signals.to(device)
                    
                    with autocast(enabled=autocast_enabled, device_type=device.type):
                        outputs = model(signals)
                        probs = F.softmax(outputs, dim=1).cpu().numpy()
                        
                    fold_oof_preds.append(probs)
                    fold_oof_labels.append(labels.cpu().numpy())
            
            all_oof.extend(np.concatenate(fold_oof_preds))
            all_true.extend(np.concatenate(fold_oof_labels))

            artifact = wandb.Artifact(f'model-fold{fold}', type='model')
            artifact.add_file(best_model_path)
            wandb.log_artifact(artifact)
            print(f"\nLogged artifact for fold {fold} with best validation loss: {best_val_loss:.4f}")
        else:
            print("\nNo best model was saved during training for this fold.")
        
        wandb.finish()

    print("\nCalculating final OOF CV score from all collected predictions...")
    
    valid_indices = df[df['fold'].isin(range(CFG.n_splits))].index
    oof_df = pd.DataFrame(all_oof, index=valid_indices, columns=TARGETS)
    true_df = pd.DataFrame(all_true, index=valid_indices, columns=TARGETS)
    
    oof_df = oof_df.sort_index()
    true_df = true_df.sort_index()

    oof_tensor = torch.tensor(oof_df.values, dtype=torch.float32)
    true_tensor = torch.tensor(true_df.values, dtype=torch.float32)

    kl_loss = nn.KLDivLoss(reduction="batchmean")
    overall_cv_score = kl_loss(torch.log(oof_tensor), true_tensor).item()

    return overall_cv_score, fold_scores

DATA_PREPARATION_VOTE_METHOD = "max_vote_window"

print("Preparing data and creating folds...")
df = pd.read_csv(CFG.data_path + 'processed_data_max_vote_window.csv') 

label_map = {t: i for i, t in enumerate(TARGETS)}
df['expert_consensus'] = df[TARGETS].idxmax(axis=1)

print('Train shape:', df.shape)
print('Targets', list(TARGETS))

fold_creator = KFoldCreator(n_splits=CFG.n_splits, seed=CFG.seed)
df = fold_creator.create_folds(df, stratify_col='expert_consensus', group_col='patient_id')

print("Folds created. Value counts per fold:")
print(df['fold'].value_counts())

Preparing data and creating folds...
Train shape: (17089, 15)
Targets ['seizure_vote', 'lpd_vote', 'gpd_vote', 'lrda_vote', 'grda_vote', 'other_vote']
Folds created. Value counts per fold:
fold
0    4067
1    3658
2    3381
4    3358
3    2625
Name: count, dtype: int64


In [6]:
experiments_to_run = [3]  

experiment_results = {}

print(f"Starting systematic evaluation for num_cnn_blocks: {experiments_to_run}...")

for num_blocks in experiments_to_run:
    print("\n" + "="*60)
    print(f"      STARTING EXPERIMENT: {num_blocks} CNN BLOCKS")
    print("="*60)

    CFG.num_cnn_blocks = num_blocks

    overall_cv_score, all_fold_scores = run_training(df, DATA_PREPARATION_VOTE_METHOD)
    
    mean_fold_score = np.mean(all_fold_scores)
    
    experiment_results[num_blocks] = {
        'overall_cv_score': overall_cv_score,
        'mean_fold_score': mean_fold_score
    }
    
    print("\n" + "="*60)
    print(f"      FINISHED EXPERIMENT: {num_blocks} CNN BLOCKS")
    print(f"      OOF KL Score (all folds): {overall_cv_score:.4f}")
    print(f"      Mean of fold scores: {mean_fold_score:.4f}")
    print("="*60)


for num_blocks, results in experiment_results.items():
    print(f"{num_blocks:<12} | {results['overall_cv_score']:<18.4f} | {results['mean_fold_score']:<18.4f}")

print("="*50)
print("All experiments complete. Check wandb for detailed charts.")

Starting systematic evaluation for num_cnn_blocks: [3]...

      STARTING EXPERIMENT: 3 CNN BLOCKS
Using device: cuda



   --- Epoch 1/50 ---


Training:   0%|          | 0/406 [00:00<?, ?it/s]

Validation:   0%|          | 0/128 [00:00<?, ?it/s]

   Epoch 1: Train Loss = 0.9163, Valid Loss = 1.0686, LR = 0.000100
   New best model saved with validation loss: 1.0686
   --- Epoch 2/50 ---


Training:   0%|          | 0/406 [00:00<?, ?it/s]

Validation:   0%|          | 0/128 [00:00<?, ?it/s]

   Epoch 2: Train Loss = 0.7423, Valid Loss = 0.8731, LR = 0.000100
   New best model saved with validation loss: 0.8731
   --- Epoch 3/50 ---


Training:   0%|          | 0/406 [00:00<?, ?it/s]

Validation:   0%|          | 0/128 [00:00<?, ?it/s]

   Epoch 3: Train Loss = 0.6692, Valid Loss = 0.8872, LR = 0.000100
   No improvement. Patience: 1/10
   --- Epoch 4/50 ---


Training:   0%|          | 0/406 [00:00<?, ?it/s]

Validation:   0%|          | 0/128 [00:00<?, ?it/s]

   Epoch 4: Train Loss = 0.6254, Valid Loss = 0.8100, LR = 0.000099
   New best model saved with validation loss: 0.8100
   --- Epoch 5/50 ---


Training:   0%|          | 0/406 [00:00<?, ?it/s]

Validation:   0%|          | 0/128 [00:00<?, ?it/s]

   Epoch 5: Train Loss = 0.5984, Valid Loss = 0.7300, LR = 0.000098
   New best model saved with validation loss: 0.7300
   --- Epoch 6/50 ---


Training:   0%|          | 0/406 [00:00<?, ?it/s]

Validation:   0%|          | 0/128 [00:00<?, ?it/s]

   Epoch 6: Train Loss = 0.5845, Valid Loss = 0.7767, LR = 0.000098
   No improvement. Patience: 1/10
   --- Epoch 7/50 ---


Training:   0%|          | 0/406 [00:00<?, ?it/s]

Validation:   0%|          | 0/128 [00:00<?, ?it/s]

   Epoch 7: Train Loss = 0.5718, Valid Loss = 0.7439, LR = 0.000096
   No improvement. Patience: 2/10
   --- Epoch 8/50 ---


Training:   0%|          | 0/406 [00:00<?, ?it/s]

Validation:   0%|          | 0/128 [00:00<?, ?it/s]

   Epoch 8: Train Loss = 0.5636, Valid Loss = 0.6919, LR = 0.000095
   New best model saved with validation loss: 0.6919
   --- Epoch 9/50 ---


Training:   0%|          | 0/406 [00:00<?, ?it/s]

Validation:   0%|          | 0/128 [00:00<?, ?it/s]

   Epoch 9: Train Loss = 0.5551, Valid Loss = 0.7359, LR = 0.000094
   No improvement. Patience: 1/10
   --- Epoch 10/50 ---


Training:   0%|          | 0/406 [00:00<?, ?it/s]

Validation:   0%|          | 0/128 [00:00<?, ?it/s]

   Epoch 10: Train Loss = 0.5483, Valid Loss = 0.6954, LR = 0.000092
   No improvement. Patience: 2/10
   --- Epoch 11/50 ---


Training:   0%|          | 0/406 [00:00<?, ?it/s]

Validation:   0%|          | 0/128 [00:00<?, ?it/s]

   Epoch 11: Train Loss = 0.5344, Valid Loss = 0.6911, LR = 0.000090
   No improvement. Patience: 3/10
   --- Epoch 12/50 ---


Training:   0%|          | 0/406 [00:00<?, ?it/s]

Validation:   0%|          | 0/128 [00:00<?, ?it/s]

   Epoch 12: Train Loss = 0.5280, Valid Loss = 0.6932, LR = 0.000089
   No improvement. Patience: 4/10
   --- Epoch 13/50 ---


Training:   0%|          | 0/406 [00:00<?, ?it/s]

Validation:   0%|          | 0/128 [00:00<?, ?it/s]

   Epoch 13: Train Loss = 0.5191, Valid Loss = 0.6701, LR = 0.000086
   New best model saved with validation loss: 0.6701
   --- Epoch 14/50 ---


Training:   0%|          | 0/406 [00:00<?, ?it/s]

Validation:   0%|          | 0/128 [00:00<?, ?it/s]

   Epoch 14: Train Loss = 0.5156, Valid Loss = 0.6694, LR = 0.000084
   No improvement. Patience: 1/10
   --- Epoch 15/50 ---


Training:   0%|          | 0/406 [00:00<?, ?it/s]

Validation:   0%|          | 0/128 [00:00<?, ?it/s]

   Epoch 15: Train Loss = 0.5088, Valid Loss = 0.6707, LR = 0.000082
   No improvement. Patience: 2/10
   --- Epoch 16/50 ---


Training:   0%|          | 0/406 [00:00<?, ?it/s]

Validation:   0%|          | 0/128 [00:00<?, ?it/s]

   Epoch 16: Train Loss = 0.5032, Valid Loss = 0.6640, LR = 0.000079
   New best model saved with validation loss: 0.6640
   --- Epoch 17/50 ---


Training:   0%|          | 0/406 [00:00<?, ?it/s]

Validation:   0%|          | 0/128 [00:00<?, ?it/s]

   Epoch 17: Train Loss = 0.5067, Valid Loss = 0.6615, LR = 0.000077
   New best model saved with validation loss: 0.6615
   --- Epoch 18/50 ---


Training:   0%|          | 0/406 [00:00<?, ?it/s]

Validation:   0%|          | 0/128 [00:00<?, ?it/s]

   Epoch 18: Train Loss = 0.4923, Valid Loss = 0.6734, LR = 0.000074
   No improvement. Patience: 1/10
   --- Epoch 19/50 ---


Training:   0%|          | 0/406 [00:00<?, ?it/s]

Validation:   0%|          | 0/128 [00:00<?, ?it/s]

   Epoch 19: Train Loss = 0.4912, Valid Loss = 0.6877, LR = 0.000071
   No improvement. Patience: 2/10
   --- Epoch 20/50 ---


Training:   0%|          | 0/406 [00:00<?, ?it/s]

Validation:   0%|          | 0/128 [00:00<?, ?it/s]

   Epoch 20: Train Loss = 0.4844, Valid Loss = 0.6646, LR = 0.000068
   No improvement. Patience: 3/10
   --- Epoch 21/50 ---


Training:   0%|          | 0/406 [00:00<?, ?it/s]

Validation:   0%|          | 0/128 [00:00<?, ?it/s]

   Epoch 21: Train Loss = 0.4798, Valid Loss = 0.6195, LR = 0.000065
   New best model saved with validation loss: 0.6195
   --- Epoch 22/50 ---


Training:   0%|          | 0/406 [00:00<?, ?it/s]

Validation:   0%|          | 0/128 [00:00<?, ?it/s]

   Epoch 22: Train Loss = 0.4715, Valid Loss = 0.6450, LR = 0.000062
   No improvement. Patience: 1/10
   --- Epoch 23/50 ---


Training:   0%|          | 0/406 [00:00<?, ?it/s]

Validation:   0%|          | 0/128 [00:00<?, ?it/s]

   Epoch 23: Train Loss = 0.4701, Valid Loss = 0.6369, LR = 0.000059
   No improvement. Patience: 2/10
   --- Epoch 24/50 ---


Training:   0%|          | 0/406 [00:00<?, ?it/s]

Validation:   0%|          | 0/128 [00:00<?, ?it/s]

   Epoch 24: Train Loss = 0.4619, Valid Loss = 0.6724, LR = 0.000056
   No improvement. Patience: 3/10
   --- Epoch 25/50 ---


Training:   0%|          | 0/406 [00:00<?, ?it/s]

Validation:   0%|          | 0/128 [00:00<?, ?it/s]

   Epoch 25: Train Loss = 0.4645, Valid Loss = 0.6319, LR = 0.000053
   No improvement. Patience: 4/10
   --- Epoch 26/50 ---


Training:   0%|          | 0/406 [00:00<?, ?it/s]

Validation:   0%|          | 0/128 [00:00<?, ?it/s]

   Epoch 26: Train Loss = 0.4601, Valid Loss = 0.6342, LR = 0.000050
   No improvement. Patience: 5/10
   --- Epoch 27/50 ---


Training:   0%|          | 0/406 [00:00<?, ?it/s]

Validation:   0%|          | 0/128 [00:00<?, ?it/s]

   Epoch 27: Train Loss = 0.4502, Valid Loss = 0.6416, LR = 0.000047
   No improvement. Patience: 6/10
   --- Epoch 28/50 ---


Training:   0%|          | 0/406 [00:00<?, ?it/s]

Validation:   0%|          | 0/128 [00:00<?, ?it/s]

   Epoch 28: Train Loss = 0.4418, Valid Loss = 0.6411, LR = 0.000044
   No improvement. Patience: 7/10
   --- Epoch 29/50 ---


Training:   0%|          | 0/406 [00:00<?, ?it/s]

Validation:   0%|          | 0/128 [00:00<?, ?it/s]

   Epoch 29: Train Loss = 0.4486, Valid Loss = 0.6560, LR = 0.000041
   No improvement. Patience: 8/10
   --- Epoch 30/50 ---


Training:   0%|          | 0/406 [00:00<?, ?it/s]

Validation:   0%|          | 0/128 [00:00<?, ?it/s]

   Epoch 30: Train Loss = 0.4414, Valid Loss = 0.6425, LR = 0.000038
   No improvement. Patience: 9/10
   --- Epoch 31/50 ---


Training:   0%|          | 0/406 [00:00<?, ?it/s]

Validation:   0%|          | 0/128 [00:00<?, ?it/s]

   Epoch 31: Train Loss = 0.4278, Valid Loss = 0.6330, LR = 0.000035
   No improvement. Patience: 10/10
   Early stopping triggered after 10 epochs without improvement.
Loading best model from fold 0 to generate OOF predictions...


Generating OOF for Fold 0:   0%|          | 0/128 [00:00<?, ?it/s]


Logged artifact for fold 0 with best validation loss: 0.6195


0,1
early_stopped_epoch,▁
epoch,▁▁▁▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▅▆▆▆▆▇▇▇▇███
train/epoch_loss,█▆▄▄▃▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▁▂▁▁▁▁▁▁
train/epoch_lr,███████▇▇▇▇▇▇▆▆▆▆▅▅▅▄▄▄▃▃▃▂▂▂▁▁
train/loss,█▆▅▇█▆▆▄▅▆▃▆▆▆▅▃▅▂▃▃▂▄▅▃▃▁▄▃▄▄▄▁▃▃▃▃▂▂▂▄
val/kl_div,█▅▅▄▃▃▃▂▃▂▂▂▂▂▂▂▂▂▂▂▁▁▁▂▁▁▁▁▂▁▁
val/loss,█▅▅▄▃▃▃▂▃▂▂▂▂▂▂▂▂▂▂▂▁▁▁▂▁▁▁▁▂▁▁

0,1
best_val_kl_div,0.61954
early_stopped_epoch,31.0
epoch,31.0
train/epoch_loss,0.42784
train/epoch_lr,3e-05
train/loss,0.3987
val/kl_div,0.63299
val/loss,0.63299





   --- Epoch 1/50 ---


Training:   0%|          | 0/419 [00:00<?, ?it/s]

Validation:   0%|          | 0/115 [00:00<?, ?it/s]

   Epoch 1: Train Loss = 0.9657, Valid Loss = 0.9498, LR = 0.000100
   New best model saved with validation loss: 0.9498
   --- Epoch 2/50 ---


Training:   0%|          | 0/419 [00:00<?, ?it/s]

Validation:   0%|          | 0/115 [00:00<?, ?it/s]

   Epoch 2: Train Loss = 0.7924, Valid Loss = 0.7728, LR = 0.000100
   New best model saved with validation loss: 0.7728
   --- Epoch 3/50 ---


Training:   0%|          | 0/419 [00:00<?, ?it/s]

Validation:   0%|          | 0/115 [00:00<?, ?it/s]

   Epoch 3: Train Loss = 0.6937, Valid Loss = 0.7362, LR = 0.000100
   New best model saved with validation loss: 0.7362
   --- Epoch 4/50 ---


Training:   0%|          | 0/419 [00:00<?, ?it/s]

Validation:   0%|          | 0/115 [00:00<?, ?it/s]

   Epoch 4: Train Loss = 0.6444, Valid Loss = 0.6732, LR = 0.000099
   New best model saved with validation loss: 0.6732
   --- Epoch 5/50 ---


Training:   0%|          | 0/419 [00:00<?, ?it/s]

Validation:   0%|          | 0/115 [00:00<?, ?it/s]

   Epoch 5: Train Loss = 0.6176, Valid Loss = 0.6495, LR = 0.000098
   New best model saved with validation loss: 0.6495
   --- Epoch 6/50 ---


Training:   0%|          | 0/419 [00:00<?, ?it/s]

Validation:   0%|          | 0/115 [00:00<?, ?it/s]

   Epoch 6: Train Loss = 0.5979, Valid Loss = 0.6301, LR = 0.000098
   New best model saved with validation loss: 0.6301
   --- Epoch 7/50 ---


Training:   0%|          | 0/419 [00:00<?, ?it/s]

Validation:   0%|          | 0/115 [00:00<?, ?it/s]

   Epoch 7: Train Loss = 0.5791, Valid Loss = 0.6167, LR = 0.000096
   New best model saved with validation loss: 0.6167
   --- Epoch 8/50 ---


Training:   0%|          | 0/419 [00:00<?, ?it/s]

Validation:   0%|          | 0/115 [00:00<?, ?it/s]

   Epoch 8: Train Loss = 0.5626, Valid Loss = 0.6387, LR = 0.000095
   No improvement. Patience: 1/10
   --- Epoch 9/50 ---


Training:   0%|          | 0/419 [00:00<?, ?it/s]

Validation:   0%|          | 0/115 [00:00<?, ?it/s]

   Epoch 9: Train Loss = 0.5567, Valid Loss = 0.6229, LR = 0.000094
   No improvement. Patience: 2/10
   --- Epoch 10/50 ---


Training:   0%|          | 0/419 [00:00<?, ?it/s]

Validation:   0%|          | 0/115 [00:00<?, ?it/s]

   Epoch 10: Train Loss = 0.5560, Valid Loss = 0.6001, LR = 0.000092
   New best model saved with validation loss: 0.6001
   --- Epoch 11/50 ---


Training:   0%|          | 0/419 [00:00<?, ?it/s]

Validation:   0%|          | 0/115 [00:00<?, ?it/s]

   Epoch 11: Train Loss = 0.5384, Valid Loss = 0.6097, LR = 0.000090
   No improvement. Patience: 1/10
   --- Epoch 12/50 ---


Training:   0%|          | 0/419 [00:00<?, ?it/s]

Validation:   0%|          | 0/115 [00:00<?, ?it/s]

   Epoch 12: Train Loss = 0.5350, Valid Loss = 0.5993, LR = 0.000089
   No improvement. Patience: 2/10
   --- Epoch 13/50 ---


Training:   0%|          | 0/419 [00:00<?, ?it/s]

Validation:   0%|          | 0/115 [00:00<?, ?it/s]

   Epoch 13: Train Loss = 0.5258, Valid Loss = 0.5945, LR = 0.000086
   New best model saved with validation loss: 0.5945
   --- Epoch 14/50 ---


Training:   0%|          | 0/419 [00:00<?, ?it/s]

Validation:   0%|          | 0/115 [00:00<?, ?it/s]

   Epoch 14: Train Loss = 0.5218, Valid Loss = 0.5930, LR = 0.000084
   New best model saved with validation loss: 0.5930
   --- Epoch 15/50 ---


Training:   0%|          | 0/419 [00:00<?, ?it/s]

Validation:   0%|          | 0/115 [00:00<?, ?it/s]

   Epoch 15: Train Loss = 0.5143, Valid Loss = 0.5946, LR = 0.000082
   No improvement. Patience: 1/10
   --- Epoch 16/50 ---


Training:   0%|          | 0/419 [00:00<?, ?it/s]

Validation:   0%|          | 0/115 [00:00<?, ?it/s]

   Epoch 16: Train Loss = 0.5058, Valid Loss = 0.6011, LR = 0.000079
   No improvement. Patience: 2/10
   --- Epoch 17/50 ---


Training:   0%|          | 0/419 [00:00<?, ?it/s]

Validation:   0%|          | 0/115 [00:00<?, ?it/s]

   Epoch 17: Train Loss = 0.5054, Valid Loss = 0.5863, LR = 0.000077
   New best model saved with validation loss: 0.5863
   --- Epoch 18/50 ---


Training:   0%|          | 0/419 [00:00<?, ?it/s]

Validation:   0%|          | 0/115 [00:00<?, ?it/s]

   Epoch 18: Train Loss = 0.5027, Valid Loss = 0.5831, LR = 0.000074
   New best model saved with validation loss: 0.5831
   --- Epoch 19/50 ---


Training:   0%|          | 0/419 [00:00<?, ?it/s]

Validation:   0%|          | 0/115 [00:00<?, ?it/s]

   Epoch 19: Train Loss = 0.4912, Valid Loss = 0.5666, LR = 0.000071
   New best model saved with validation loss: 0.5666
   --- Epoch 20/50 ---


Training:   0%|          | 0/419 [00:00<?, ?it/s]

Validation:   0%|          | 0/115 [00:00<?, ?it/s]

   Epoch 20: Train Loss = 0.4890, Valid Loss = 0.5784, LR = 0.000068
   No improvement. Patience: 1/10
   --- Epoch 21/50 ---


Training:   0%|          | 0/419 [00:00<?, ?it/s]

Validation:   0%|          | 0/115 [00:00<?, ?it/s]

   Epoch 21: Train Loss = 0.4792, Valid Loss = 0.5757, LR = 0.000065
   No improvement. Patience: 2/10
   --- Epoch 22/50 ---


Training:   0%|          | 0/419 [00:00<?, ?it/s]

Validation:   0%|          | 0/115 [00:00<?, ?it/s]

   Epoch 22: Train Loss = 0.4741, Valid Loss = 0.5878, LR = 0.000062
   No improvement. Patience: 3/10
   --- Epoch 23/50 ---


Training:   0%|          | 0/419 [00:00<?, ?it/s]

Validation:   0%|          | 0/115 [00:00<?, ?it/s]

   Epoch 23: Train Loss = 0.4710, Valid Loss = 0.6137, LR = 0.000059
   No improvement. Patience: 4/10
   --- Epoch 24/50 ---


Training:   0%|          | 0/419 [00:00<?, ?it/s]

Validation:   0%|          | 0/115 [00:00<?, ?it/s]

   Epoch 24: Train Loss = 0.4639, Valid Loss = 0.5794, LR = 0.000056
   No improvement. Patience: 5/10
   --- Epoch 25/50 ---


Training:   0%|          | 0/419 [00:00<?, ?it/s]

Validation:   0%|          | 0/115 [00:00<?, ?it/s]

   Epoch 25: Train Loss = 0.4609, Valid Loss = 0.5702, LR = 0.000053
   No improvement. Patience: 6/10
   --- Epoch 26/50 ---


Training:   0%|          | 0/419 [00:00<?, ?it/s]

Validation:   0%|          | 0/115 [00:00<?, ?it/s]

   Epoch 26: Train Loss = 0.4615, Valid Loss = 0.6046, LR = 0.000050
   No improvement. Patience: 7/10
   --- Epoch 27/50 ---


Training:   0%|          | 0/419 [00:00<?, ?it/s]

Validation:   0%|          | 0/115 [00:00<?, ?it/s]

   Epoch 27: Train Loss = 0.4523, Valid Loss = 0.5683, LR = 0.000047
   No improvement. Patience: 8/10
   --- Epoch 28/50 ---


Training:   0%|          | 0/419 [00:00<?, ?it/s]

Validation:   0%|          | 0/115 [00:00<?, ?it/s]

   Epoch 28: Train Loss = 0.4570, Valid Loss = 0.5490, LR = 0.000044
   New best model saved with validation loss: 0.5490
   --- Epoch 29/50 ---


Training:   0%|          | 0/419 [00:00<?, ?it/s]

Validation:   0%|          | 0/115 [00:00<?, ?it/s]

   Epoch 29: Train Loss = 0.4539, Valid Loss = 0.5836, LR = 0.000041
   No improvement. Patience: 1/10
   --- Epoch 30/50 ---


Training:   0%|          | 0/419 [00:00<?, ?it/s]

Validation:   0%|          | 0/115 [00:00<?, ?it/s]

   Epoch 30: Train Loss = 0.4465, Valid Loss = 0.5619, LR = 0.000038
   No improvement. Patience: 2/10
   --- Epoch 31/50 ---


Training:   0%|          | 0/419 [00:00<?, ?it/s]

Validation:   0%|          | 0/115 [00:00<?, ?it/s]

   Epoch 31: Train Loss = 0.4414, Valid Loss = 0.5737, LR = 0.000035
   No improvement. Patience: 3/10
   --- Epoch 32/50 ---


Training:   0%|          | 0/419 [00:00<?, ?it/s]

Validation:   0%|          | 0/115 [00:00<?, ?it/s]

   Epoch 32: Train Loss = 0.4362, Valid Loss = 0.5694, LR = 0.000032
   No improvement. Patience: 4/10
   --- Epoch 33/50 ---


Training:   0%|          | 0/419 [00:00<?, ?it/s]

Validation:   0%|          | 0/115 [00:00<?, ?it/s]

   Epoch 33: Train Loss = 0.4326, Valid Loss = 0.5552, LR = 0.000029
   No improvement. Patience: 5/10
   --- Epoch 34/50 ---


Training:   0%|          | 0/419 [00:00<?, ?it/s]

Validation:   0%|          | 0/115 [00:00<?, ?it/s]

   Epoch 34: Train Loss = 0.4254, Valid Loss = 0.5759, LR = 0.000026
   No improvement. Patience: 6/10
   --- Epoch 35/50 ---


Training:   0%|          | 0/419 [00:00<?, ?it/s]

Validation:   0%|          | 0/115 [00:00<?, ?it/s]

   Epoch 35: Train Loss = 0.4297, Valid Loss = 0.5549, LR = 0.000023
   No improvement. Patience: 7/10
   --- Epoch 36/50 ---


Training:   0%|          | 0/419 [00:00<?, ?it/s]

Validation:   0%|          | 0/115 [00:00<?, ?it/s]

   Epoch 36: Train Loss = 0.4274, Valid Loss = 0.5497, LR = 0.000021
   No improvement. Patience: 8/10
   --- Epoch 37/50 ---


Training:   0%|          | 0/419 [00:00<?, ?it/s]

Validation:   0%|          | 0/115 [00:00<?, ?it/s]

   Epoch 37: Train Loss = 0.4220, Valid Loss = 0.5407, LR = 0.000018
   New best model saved with validation loss: 0.5407
   --- Epoch 38/50 ---


Training:   0%|          | 0/419 [00:00<?, ?it/s]

Validation:   0%|          | 0/115 [00:00<?, ?it/s]

   Epoch 38: Train Loss = 0.4222, Valid Loss = 0.5539, LR = 0.000016
   No improvement. Patience: 1/10
   --- Epoch 39/50 ---


Training:   0%|          | 0/419 [00:00<?, ?it/s]

Validation:   0%|          | 0/115 [00:00<?, ?it/s]

   Epoch 39: Train Loss = 0.4173, Valid Loss = 0.5473, LR = 0.000014
   No improvement. Patience: 2/10
   --- Epoch 40/50 ---


Training:   0%|          | 0/419 [00:00<?, ?it/s]

Validation:   0%|          | 0/115 [00:00<?, ?it/s]

   Epoch 40: Train Loss = 0.4096, Valid Loss = 0.5424, LR = 0.000011
   No improvement. Patience: 3/10
   --- Epoch 41/50 ---


Training:   0%|          | 0/419 [00:00<?, ?it/s]

Validation:   0%|          | 0/115 [00:00<?, ?it/s]

   Epoch 41: Train Loss = 0.4144, Valid Loss = 0.5485, LR = 0.000010
   No improvement. Patience: 4/10
   --- Epoch 42/50 ---


Training:   0%|          | 0/419 [00:00<?, ?it/s]

Validation:   0%|          | 0/115 [00:00<?, ?it/s]

   Epoch 42: Train Loss = 0.4140, Valid Loss = 0.5524, LR = 0.000008
   No improvement. Patience: 5/10
   --- Epoch 43/50 ---


Training:   0%|          | 0/419 [00:00<?, ?it/s]

Validation:   0%|          | 0/115 [00:00<?, ?it/s]

   Epoch 43: Train Loss = 0.4106, Valid Loss = 0.5541, LR = 0.000006
   No improvement. Patience: 6/10
   --- Epoch 44/50 ---


Training:   0%|          | 0/419 [00:00<?, ?it/s]

Validation:   0%|          | 0/115 [00:00<?, ?it/s]

   Epoch 44: Train Loss = 0.4097, Valid Loss = 0.5539, LR = 0.000005
   No improvement. Patience: 7/10
   --- Epoch 45/50 ---


Training:   0%|          | 0/419 [00:00<?, ?it/s]

Validation:   0%|          | 0/115 [00:00<?, ?it/s]

   Epoch 45: Train Loss = 0.4085, Valid Loss = 0.5504, LR = 0.000004
   No improvement. Patience: 8/10
   --- Epoch 46/50 ---


Training:   0%|          | 0/419 [00:00<?, ?it/s]

Validation:   0%|          | 0/115 [00:00<?, ?it/s]

   Epoch 46: Train Loss = 0.4125, Valid Loss = 0.5523, LR = 0.000002
   No improvement. Patience: 9/10
   --- Epoch 47/50 ---


Training:   0%|          | 0/419 [00:00<?, ?it/s]

Validation:   0%|          | 0/115 [00:00<?, ?it/s]

   Epoch 47: Train Loss = 0.4094, Valid Loss = 0.5553, LR = 0.000002
   No improvement. Patience: 10/10
   Early stopping triggered after 10 epochs without improvement.
Loading best model from fold 1 to generate OOF predictions...


Generating OOF for Fold 1:   0%|          | 0/115 [00:00<?, ?it/s]


Logged artifact for fold 1 with best validation loss: 0.5407


0,1
early_stopped_epoch,▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
train/epoch_loss,█▆▅▄▄▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train/epoch_lr,████████▇▇▇▇▇▇▆▆▆▆▅▅▅▅▄▄▄▄▃▃▃▃▂▂▂▂▂▁▁▁▁▁
train/loss,█▇▆▇▅▆▃▃▅▄▅▃▅▇▃▃▂▄▂▃▄▁▅▆▄▆▅▃▄▃▁▂▂▁▆▂▂▃▃▄
val/kl_div,█▅▄▃▃▃▃▂▂▂▂▂▂▂▂▂▁▂▂▂▂▂▂▁▂▁▂▁▂▁▁▁▁▁▁▁▁▁▁▁
val/loss,█▅▄▃▃▃▃▂▂▂▂▂▂▂▂▂▁▂▂▂▂▂▂▁▂▁▂▁▂▁▁▁▁▁▁▁▁▁▁▁

0,1
best_val_kl_div,0.54072
early_stopped_epoch,47.0
epoch,47.0
train/epoch_loss,0.40941
train/epoch_lr,0.0
train/loss,0.38375
val/kl_div,0.55526
val/loss,0.55526





   --- Epoch 1/50 ---


Training:   0%|          | 0/428 [00:00<?, ?it/s]

Validation:   0%|          | 0/106 [00:00<?, ?it/s]

   Epoch 1: Train Loss = 0.9575, Valid Loss = 0.9639, LR = 0.000100
   New best model saved with validation loss: 0.9639
   --- Epoch 2/50 ---


Training:   0%|          | 0/428 [00:00<?, ?it/s]

Validation:   0%|          | 0/106 [00:00<?, ?it/s]

   Epoch 2: Train Loss = 0.7624, Valid Loss = 0.7141, LR = 0.000100
   New best model saved with validation loss: 0.7141
   --- Epoch 3/50 ---


Training:   0%|          | 0/428 [00:00<?, ?it/s]

Validation:   0%|          | 0/106 [00:00<?, ?it/s]

   Epoch 3: Train Loss = 0.6791, Valid Loss = 0.7656, LR = 0.000100
   No improvement. Patience: 1/10
   --- Epoch 4/50 ---


Training:   0%|          | 0/428 [00:00<?, ?it/s]

Validation:   0%|          | 0/106 [00:00<?, ?it/s]

   Epoch 4: Train Loss = 0.6396, Valid Loss = 0.6497, LR = 0.000099
   New best model saved with validation loss: 0.6497
   --- Epoch 5/50 ---


Training:   0%|          | 0/428 [00:00<?, ?it/s]

Validation:   0%|          | 0/106 [00:00<?, ?it/s]

   Epoch 5: Train Loss = 0.6113, Valid Loss = 0.6913, LR = 0.000098
   No improvement. Patience: 1/10
   --- Epoch 6/50 ---


Training:   0%|          | 0/428 [00:00<?, ?it/s]

Validation:   0%|          | 0/106 [00:00<?, ?it/s]

   Epoch 6: Train Loss = 0.6069, Valid Loss = 0.5922, LR = 0.000098
   New best model saved with validation loss: 0.5922
   --- Epoch 7/50 ---


Training:   0%|          | 0/428 [00:00<?, ?it/s]

Validation:   0%|          | 0/106 [00:00<?, ?it/s]

   Epoch 7: Train Loss = 0.5842, Valid Loss = 0.6081, LR = 0.000096
   No improvement. Patience: 1/10
   --- Epoch 8/50 ---


Training:   0%|          | 0/428 [00:00<?, ?it/s]

Validation:   0%|          | 0/106 [00:00<?, ?it/s]

   Epoch 8: Train Loss = 0.5731, Valid Loss = 0.5955, LR = 0.000095
   No improvement. Patience: 2/10
   --- Epoch 9/50 ---


Training:   0%|          | 0/428 [00:00<?, ?it/s]

Validation:   0%|          | 0/106 [00:00<?, ?it/s]

   Epoch 9: Train Loss = 0.5673, Valid Loss = 0.5734, LR = 0.000094
   New best model saved with validation loss: 0.5734
   --- Epoch 10/50 ---


Training:   0%|          | 0/428 [00:00<?, ?it/s]

Validation:   0%|          | 0/106 [00:00<?, ?it/s]

   Epoch 10: Train Loss = 0.5573, Valid Loss = 0.5501, LR = 0.000092
   New best model saved with validation loss: 0.5501
   --- Epoch 11/50 ---


Training:   0%|          | 0/428 [00:00<?, ?it/s]

Validation:   0%|          | 0/106 [00:00<?, ?it/s]

   Epoch 11: Train Loss = 0.5498, Valid Loss = 0.5860, LR = 0.000090
   No improvement. Patience: 1/10
   --- Epoch 12/50 ---


Training:   0%|          | 0/428 [00:00<?, ?it/s]

Validation:   0%|          | 0/106 [00:00<?, ?it/s]

   Epoch 12: Train Loss = 0.5421, Valid Loss = 0.5510, LR = 0.000089
   No improvement. Patience: 2/10
   --- Epoch 13/50 ---


Training:   0%|          | 0/428 [00:00<?, ?it/s]

Validation:   0%|          | 0/106 [00:00<?, ?it/s]

   Epoch 13: Train Loss = 0.5360, Valid Loss = 0.5338, LR = 0.000086
   New best model saved with validation loss: 0.5338
   --- Epoch 14/50 ---


Training:   0%|          | 0/428 [00:00<?, ?it/s]

Validation:   0%|          | 0/106 [00:00<?, ?it/s]

   Epoch 14: Train Loss = 0.5262, Valid Loss = 0.5677, LR = 0.000084
   No improvement. Patience: 1/10
   --- Epoch 15/50 ---


Training:   0%|          | 0/428 [00:00<?, ?it/s]

Validation:   0%|          | 0/106 [00:00<?, ?it/s]

   Epoch 15: Train Loss = 0.5214, Valid Loss = 0.5519, LR = 0.000082
   No improvement. Patience: 2/10
   --- Epoch 16/50 ---


Training:   0%|          | 0/428 [00:00<?, ?it/s]

Validation:   0%|          | 0/106 [00:00<?, ?it/s]

   Epoch 16: Train Loss = 0.5174, Valid Loss = 0.5579, LR = 0.000079
   No improvement. Patience: 3/10
   --- Epoch 17/50 ---


Training:   0%|          | 0/428 [00:00<?, ?it/s]

Validation:   0%|          | 0/106 [00:00<?, ?it/s]

   Epoch 17: Train Loss = 0.5095, Valid Loss = 0.5609, LR = 0.000077
   No improvement. Patience: 4/10
   --- Epoch 18/50 ---


Training:   0%|          | 0/428 [00:00<?, ?it/s]

Validation:   0%|          | 0/106 [00:00<?, ?it/s]

   Epoch 18: Train Loss = 0.5031, Valid Loss = 0.5374, LR = 0.000074
   No improvement. Patience: 5/10
   --- Epoch 19/50 ---


Training:   0%|          | 0/428 [00:00<?, ?it/s]

Validation:   0%|          | 0/106 [00:00<?, ?it/s]

   Epoch 19: Train Loss = 0.4992, Valid Loss = 0.5445, LR = 0.000071
   No improvement. Patience: 6/10
   --- Epoch 20/50 ---


Training:   0%|          | 0/428 [00:00<?, ?it/s]

Validation:   0%|          | 0/106 [00:00<?, ?it/s]

   Epoch 20: Train Loss = 0.4946, Valid Loss = 0.5237, LR = 0.000068
   New best model saved with validation loss: 0.5237
   --- Epoch 21/50 ---


Training:   0%|          | 0/428 [00:00<?, ?it/s]

Validation:   0%|          | 0/106 [00:00<?, ?it/s]

   Epoch 21: Train Loss = 0.4937, Valid Loss = 0.5758, LR = 0.000065
   No improvement. Patience: 1/10
   --- Epoch 22/50 ---


Training:   0%|          | 0/428 [00:00<?, ?it/s]

Validation:   0%|          | 0/106 [00:00<?, ?it/s]

   Epoch 22: Train Loss = 0.4780, Valid Loss = 0.5454, LR = 0.000062
   No improvement. Patience: 2/10
   --- Epoch 23/50 ---


Training:   0%|          | 0/428 [00:00<?, ?it/s]

Validation:   0%|          | 0/106 [00:00<?, ?it/s]

   Epoch 23: Train Loss = 0.4820, Valid Loss = 0.5595, LR = 0.000059
   No improvement. Patience: 3/10
   --- Epoch 24/50 ---


Training:   0%|          | 0/428 [00:00<?, ?it/s]

Validation:   0%|          | 0/106 [00:00<?, ?it/s]

   Epoch 24: Train Loss = 0.4765, Valid Loss = 0.5470, LR = 0.000056
   No improvement. Patience: 4/10
   --- Epoch 25/50 ---


Training:   0%|          | 0/428 [00:00<?, ?it/s]

Validation:   0%|          | 0/106 [00:00<?, ?it/s]

   Epoch 25: Train Loss = 0.4673, Valid Loss = 0.5261, LR = 0.000053
   No improvement. Patience: 5/10
   --- Epoch 26/50 ---


Training:   0%|          | 0/428 [00:00<?, ?it/s]

Validation:   0%|          | 0/106 [00:00<?, ?it/s]

   Epoch 26: Train Loss = 0.4638, Valid Loss = 0.5419, LR = 0.000050
   No improvement. Patience: 6/10
   --- Epoch 27/50 ---


Training:   0%|          | 0/428 [00:00<?, ?it/s]

Validation:   0%|          | 0/106 [00:00<?, ?it/s]

   Epoch 27: Train Loss = 0.4622, Valid Loss = 0.5386, LR = 0.000047
   No improvement. Patience: 7/10
   --- Epoch 28/50 ---


Training:   0%|          | 0/428 [00:00<?, ?it/s]

Validation:   0%|          | 0/106 [00:00<?, ?it/s]

   Epoch 28: Train Loss = 0.4541, Valid Loss = 0.5303, LR = 0.000044
   No improvement. Patience: 8/10
   --- Epoch 29/50 ---


Training:   0%|          | 0/428 [00:00<?, ?it/s]

Validation:   0%|          | 0/106 [00:00<?, ?it/s]

   Epoch 29: Train Loss = 0.4537, Valid Loss = 0.5340, LR = 0.000041
   No improvement. Patience: 9/10
   --- Epoch 30/50 ---


Training:   0%|          | 0/428 [00:00<?, ?it/s]

Validation:   0%|          | 0/106 [00:00<?, ?it/s]

   Epoch 30: Train Loss = 0.4531, Valid Loss = 0.5367, LR = 0.000038
   No improvement. Patience: 10/10
   Early stopping triggered after 10 epochs without improvement.
Loading best model from fold 2 to generate OOF predictions...


Generating OOF for Fold 2:   0%|          | 0/106 [00:00<?, ?it/s]


Logged artifact for fold 2 with best validation loss: 0.5237


0,1
early_stopped_epoch,▁
epoch,▁▁▁▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇███
train/epoch_loss,█▅▄▄▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁
train/epoch_lr,███████▇▇▇▇▇▆▆▆▆▅▅▅▄▄▄▃▃▃▂▂▂▁▁
train/loss,█▆▆▄▄▆▃▃▄▄▄▅▄▄▄▄▃▆▂▅▃▃▃▂▃▂▂▅▂▃▂▂▂▃▅▃▂▃▅▁
val/kl_div,█▄▅▃▄▂▂▂▂▁▂▁▁▂▁▂▂▁▁▁▂▁▂▁▁▁▁▁▁▁
val/loss,█▄▅▃▄▂▂▂▂▁▂▁▁▂▁▂▂▁▁▁▂▁▂▁▁▁▁▁▁▁

0,1
best_val_kl_div,0.52374
early_stopped_epoch,30.0
epoch,30.0
train/epoch_loss,0.4531
train/epoch_lr,4e-05
train/loss,0.54885
val/kl_div,0.53669
val/loss,0.53669





   --- Epoch 1/50 ---


Training:   0%|          | 0/452 [00:00<?, ?it/s]

Validation:   0%|          | 0/83 [00:00<?, ?it/s]

   Epoch 1: Train Loss = 0.9025, Valid Loss = 0.9132, LR = 0.000100
   New best model saved with validation loss: 0.9132
   --- Epoch 2/50 ---


Training:   0%|          | 0/452 [00:00<?, ?it/s]

Validation:   0%|          | 0/83 [00:00<?, ?it/s]

   Epoch 2: Train Loss = 0.7229, Valid Loss = 0.7851, LR = 0.000100
   New best model saved with validation loss: 0.7851
   --- Epoch 3/50 ---


Training:   0%|          | 0/452 [00:00<?, ?it/s]

Validation:   0%|          | 0/83 [00:00<?, ?it/s]

   Epoch 3: Train Loss = 0.6575, Valid Loss = 0.7821, LR = 0.000100
   New best model saved with validation loss: 0.7821
   --- Epoch 4/50 ---


Training:   0%|          | 0/452 [00:00<?, ?it/s]

Validation:   0%|          | 0/83 [00:00<?, ?it/s]

   Epoch 4: Train Loss = 0.6248, Valid Loss = 0.7461, LR = 0.000099
   New best model saved with validation loss: 0.7461
   --- Epoch 5/50 ---


Training:   0%|          | 0/452 [00:00<?, ?it/s]

Validation:   0%|          | 0/83 [00:00<?, ?it/s]

   Epoch 5: Train Loss = 0.5963, Valid Loss = 0.6774, LR = 0.000098
   New best model saved with validation loss: 0.6774
   --- Epoch 6/50 ---


Training:   0%|          | 0/452 [00:00<?, ?it/s]

Validation:   0%|          | 0/83 [00:00<?, ?it/s]

   Epoch 6: Train Loss = 0.5821, Valid Loss = 0.6453, LR = 0.000098
   New best model saved with validation loss: 0.6453
   --- Epoch 7/50 ---


Training:   0%|          | 0/452 [00:00<?, ?it/s]

Validation:   0%|          | 0/83 [00:00<?, ?it/s]

   Epoch 7: Train Loss = 0.5707, Valid Loss = 0.6524, LR = 0.000096
   No improvement. Patience: 1/10
   --- Epoch 8/50 ---


Training:   0%|          | 0/452 [00:00<?, ?it/s]

Validation:   0%|          | 0/83 [00:00<?, ?it/s]

   Epoch 8: Train Loss = 0.5592, Valid Loss = 0.6507, LR = 0.000095
   No improvement. Patience: 2/10
   --- Epoch 9/50 ---


Training:   0%|          | 0/452 [00:00<?, ?it/s]

Validation:   0%|          | 0/83 [00:00<?, ?it/s]

   Epoch 9: Train Loss = 0.5471, Valid Loss = 0.6373, LR = 0.000094
   New best model saved with validation loss: 0.6373
   --- Epoch 10/50 ---


Training:   0%|          | 0/452 [00:00<?, ?it/s]

Validation:   0%|          | 0/83 [00:00<?, ?it/s]

   Epoch 10: Train Loss = 0.5418, Valid Loss = 0.6201, LR = 0.000092
   New best model saved with validation loss: 0.6201
   --- Epoch 11/50 ---


Training:   0%|          | 0/452 [00:00<?, ?it/s]

Validation:   0%|          | 0/83 [00:00<?, ?it/s]

   Epoch 11: Train Loss = 0.5318, Valid Loss = 0.6279, LR = 0.000090
   No improvement. Patience: 1/10
   --- Epoch 12/50 ---


Training:   0%|          | 0/452 [00:00<?, ?it/s]

Validation:   0%|          | 0/83 [00:00<?, ?it/s]

   Epoch 12: Train Loss = 0.5298, Valid Loss = 0.6185, LR = 0.000089
   New best model saved with validation loss: 0.6185
   --- Epoch 13/50 ---


Training:   0%|          | 0/452 [00:00<?, ?it/s]

Validation:   0%|          | 0/83 [00:00<?, ?it/s]

   Epoch 13: Train Loss = 0.5211, Valid Loss = 0.6381, LR = 0.000086
   No improvement. Patience: 1/10
   --- Epoch 14/50 ---


Training:   0%|          | 0/452 [00:00<?, ?it/s]

Validation:   0%|          | 0/83 [00:00<?, ?it/s]

   Epoch 14: Train Loss = 0.5137, Valid Loss = 0.6447, LR = 0.000084
   No improvement. Patience: 2/10
   --- Epoch 15/50 ---


Training:   0%|          | 0/452 [00:00<?, ?it/s]

Validation:   0%|          | 0/83 [00:00<?, ?it/s]

   Epoch 15: Train Loss = 0.5068, Valid Loss = 0.6026, LR = 0.000082
   New best model saved with validation loss: 0.6026
   --- Epoch 16/50 ---


Training:   0%|          | 0/452 [00:00<?, ?it/s]

Validation:   0%|          | 0/83 [00:00<?, ?it/s]

   Epoch 16: Train Loss = 0.5017, Valid Loss = 0.6449, LR = 0.000079
   No improvement. Patience: 1/10
   --- Epoch 17/50 ---


Training:   0%|          | 0/452 [00:00<?, ?it/s]

Validation:   0%|          | 0/83 [00:00<?, ?it/s]

   Epoch 17: Train Loss = 0.4893, Valid Loss = 0.6084, LR = 0.000077
   No improvement. Patience: 2/10
   --- Epoch 18/50 ---


Training:   0%|          | 0/452 [00:00<?, ?it/s]

Validation:   0%|          | 0/83 [00:00<?, ?it/s]

   Epoch 18: Train Loss = 0.4882, Valid Loss = 0.6074, LR = 0.000074
   No improvement. Patience: 3/10
   --- Epoch 19/50 ---


Training:   0%|          | 0/452 [00:00<?, ?it/s]

Validation:   0%|          | 0/83 [00:00<?, ?it/s]

   Epoch 19: Train Loss = 0.4859, Valid Loss = 0.5906, LR = 0.000071
   New best model saved with validation loss: 0.5906
   --- Epoch 20/50 ---


Training:   0%|          | 0/452 [00:00<?, ?it/s]

Validation:   0%|          | 0/83 [00:00<?, ?it/s]

   Epoch 20: Train Loss = 0.4801, Valid Loss = 0.6028, LR = 0.000068
   No improvement. Patience: 1/10
   --- Epoch 21/50 ---


Training:   0%|          | 0/452 [00:00<?, ?it/s]

Validation:   0%|          | 0/83 [00:00<?, ?it/s]

   Epoch 21: Train Loss = 0.4737, Valid Loss = 0.5826, LR = 0.000065
   New best model saved with validation loss: 0.5826
   --- Epoch 22/50 ---


Training:   0%|          | 0/452 [00:00<?, ?it/s]

Validation:   0%|          | 0/83 [00:00<?, ?it/s]

   Epoch 22: Train Loss = 0.4698, Valid Loss = 0.5975, LR = 0.000062
   No improvement. Patience: 1/10
   --- Epoch 23/50 ---


Training:   0%|          | 0/452 [00:00<?, ?it/s]

Validation:   0%|          | 0/83 [00:00<?, ?it/s]

   Epoch 23: Train Loss = 0.4676, Valid Loss = 0.5928, LR = 0.000059
   No improvement. Patience: 2/10
   --- Epoch 24/50 ---


Training:   0%|          | 0/452 [00:00<?, ?it/s]

Validation:   0%|          | 0/83 [00:00<?, ?it/s]

   Epoch 24: Train Loss = 0.4615, Valid Loss = 0.5790, LR = 0.000056
   New best model saved with validation loss: 0.5790
   --- Epoch 25/50 ---


Training:   0%|          | 0/452 [00:00<?, ?it/s]

Validation:   0%|          | 0/83 [00:00<?, ?it/s]

   Epoch 25: Train Loss = 0.4552, Valid Loss = 0.5839, LR = 0.000053
   No improvement. Patience: 1/10
   --- Epoch 26/50 ---


Training:   0%|          | 0/452 [00:00<?, ?it/s]

Validation:   0%|          | 0/83 [00:00<?, ?it/s]

   Epoch 26: Train Loss = 0.4473, Valid Loss = 0.6023, LR = 0.000050
   No improvement. Patience: 2/10
   --- Epoch 27/50 ---


Training:   0%|          | 0/452 [00:00<?, ?it/s]

Validation:   0%|          | 0/83 [00:00<?, ?it/s]

   Epoch 27: Train Loss = 0.4472, Valid Loss = 0.6137, LR = 0.000047
   No improvement. Patience: 3/10
   --- Epoch 28/50 ---


Training:   0%|          | 0/452 [00:00<?, ?it/s]

Validation:   0%|          | 0/83 [00:00<?, ?it/s]

   Epoch 28: Train Loss = 0.4433, Valid Loss = 0.5738, LR = 0.000044
   New best model saved with validation loss: 0.5738
   --- Epoch 29/50 ---


Training:   0%|          | 0/452 [00:00<?, ?it/s]

Validation:   0%|          | 0/83 [00:00<?, ?it/s]

   Epoch 29: Train Loss = 0.4396, Valid Loss = 0.5818, LR = 0.000041
   No improvement. Patience: 1/10
   --- Epoch 30/50 ---


Training:   0%|          | 0/452 [00:00<?, ?it/s]

Validation:   0%|          | 0/83 [00:00<?, ?it/s]

   Epoch 30: Train Loss = 0.4339, Valid Loss = 0.5699, LR = 0.000038
   New best model saved with validation loss: 0.5699
   --- Epoch 31/50 ---


Training:   0%|          | 0/452 [00:00<?, ?it/s]

Validation:   0%|          | 0/83 [00:00<?, ?it/s]

   Epoch 31: Train Loss = 0.4305, Valid Loss = 0.5972, LR = 0.000035
   No improvement. Patience: 1/10
   --- Epoch 32/50 ---


Training:   0%|          | 0/452 [00:00<?, ?it/s]

Validation:   0%|          | 0/83 [00:00<?, ?it/s]

   Epoch 32: Train Loss = 0.4270, Valid Loss = 0.5941, LR = 0.000032
   No improvement. Patience: 2/10
   --- Epoch 33/50 ---


Training:   0%|          | 0/452 [00:00<?, ?it/s]

Validation:   0%|          | 0/83 [00:00<?, ?it/s]

   Epoch 33: Train Loss = 0.4255, Valid Loss = 0.6057, LR = 0.000029
   No improvement. Patience: 3/10
   --- Epoch 34/50 ---


Training:   0%|          | 0/452 [00:00<?, ?it/s]

Validation:   0%|          | 0/83 [00:00<?, ?it/s]

   Epoch 34: Train Loss = 0.4193, Valid Loss = 0.5980, LR = 0.000026
   No improvement. Patience: 4/10
   --- Epoch 35/50 ---


Training:   0%|          | 0/452 [00:00<?, ?it/s]

Validation:   0%|          | 0/83 [00:00<?, ?it/s]

   Epoch 35: Train Loss = 0.4213, Valid Loss = 0.5875, LR = 0.000023
   No improvement. Patience: 5/10
   --- Epoch 36/50 ---


Training:   0%|          | 0/452 [00:00<?, ?it/s]

Validation:   0%|          | 0/83 [00:00<?, ?it/s]

   Epoch 36: Train Loss = 0.4139, Valid Loss = 0.5837, LR = 0.000021
   No improvement. Patience: 6/10
   --- Epoch 37/50 ---


Training:   0%|          | 0/452 [00:00<?, ?it/s]

Validation:   0%|          | 0/83 [00:00<?, ?it/s]

   Epoch 37: Train Loss = 0.4118, Valid Loss = 0.5810, LR = 0.000018
   No improvement. Patience: 7/10
   --- Epoch 38/50 ---


Training:   0%|          | 0/452 [00:00<?, ?it/s]

Validation:   0%|          | 0/83 [00:00<?, ?it/s]

   Epoch 38: Train Loss = 0.4105, Valid Loss = 0.5950, LR = 0.000016
   No improvement. Patience: 8/10
   --- Epoch 39/50 ---


Training:   0%|          | 0/452 [00:00<?, ?it/s]

Validation:   0%|          | 0/83 [00:00<?, ?it/s]

   Epoch 39: Train Loss = 0.4045, Valid Loss = 0.5856, LR = 0.000014
   No improvement. Patience: 9/10
   --- Epoch 40/50 ---


Training:   0%|          | 0/452 [00:00<?, ?it/s]

Validation:   0%|          | 0/83 [00:00<?, ?it/s]

   Epoch 40: Train Loss = 0.4074, Valid Loss = 0.5890, LR = 0.000011
   No improvement. Patience: 10/10
   Early stopping triggered after 10 epochs without improvement.
Loading best model from fold 3 to generate OOF predictions...


Generating OOF for Fold 3:   0%|          | 0/83 [00:00<?, ?it/s]


Logged artifact for fold 3 with best validation loss: 0.5699


0,1
early_stopped_epoch,▁
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train/epoch_loss,█▅▅▄▄▃▃▃▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁
train/epoch_lr,█████████▇▇▇▇▇▇▆▆▆▆▆▅▅▅▅▄▄▄▄▃▃▃▃▂▂▂▂▂▁▁▁
train/loss,█▄▅▄▄▄▄▄▂▃▃▂▄▂▃▄▃▂▃▃▅▄▂▁▂▃▂▃▂▁▂▂▂▄▁▂▃▂▁▂
val/kl_div,█▅▅▅▃▃▃▃▂▂▂▂▂▃▂▃▂▂▁▂▁▂▁▁▁▂▂▁▁▁▂▁▂▂▁▁▁▂▁▁
val/loss,█▅▅▅▃▃▃▃▂▂▂▂▂▃▂▃▂▂▁▂▁▂▁▁▁▂▂▁▁▁▂▁▂▂▁▁▁▂▁▁

0,1
best_val_kl_div,0.56988
early_stopped_epoch,40.0
epoch,40.0
train/epoch_loss,0.40742
train/epoch_lr,1e-05
train/loss,0.39544
val/kl_div,0.589
val/loss,0.589





   --- Epoch 1/50 ---


Training:   0%|          | 0/429 [00:00<?, ?it/s]

Validation:   0%|          | 0/105 [00:00<?, ?it/s]

   Epoch 1: Train Loss = 0.9525, Valid Loss = 0.9854, LR = 0.000100
   New best model saved with validation loss: 0.9854
   --- Epoch 2/50 ---


Training:   0%|          | 0/429 [00:00<?, ?it/s]

Validation:   0%|          | 0/105 [00:00<?, ?it/s]

   Epoch 2: Train Loss = 0.7526, Valid Loss = 0.7649, LR = 0.000100
   New best model saved with validation loss: 0.7649
   --- Epoch 3/50 ---


Training:   0%|          | 0/429 [00:00<?, ?it/s]

Validation:   0%|          | 0/105 [00:00<?, ?it/s]

   Epoch 3: Train Loss = 0.6615, Valid Loss = 0.7213, LR = 0.000100
   New best model saved with validation loss: 0.7213
   --- Epoch 4/50 ---


Training:   0%|          | 0/429 [00:00<?, ?it/s]

Validation:   0%|          | 0/105 [00:00<?, ?it/s]

   Epoch 4: Train Loss = 0.6242, Valid Loss = 0.7710, LR = 0.000099
   No improvement. Patience: 1/10
   --- Epoch 5/50 ---


Training:   0%|          | 0/429 [00:00<?, ?it/s]

Validation:   0%|          | 0/105 [00:00<?, ?it/s]

   Epoch 5: Train Loss = 0.6052, Valid Loss = 0.6541, LR = 0.000098
   New best model saved with validation loss: 0.6541
   --- Epoch 6/50 ---


Training:   0%|          | 0/429 [00:00<?, ?it/s]

Validation:   0%|          | 0/105 [00:00<?, ?it/s]

   Epoch 6: Train Loss = 0.5888, Valid Loss = 0.6757, LR = 0.000098
   No improvement. Patience: 1/10
   --- Epoch 7/50 ---


Training:   0%|          | 0/429 [00:00<?, ?it/s]

Validation:   0%|          | 0/105 [00:00<?, ?it/s]

   Epoch 7: Train Loss = 0.5701, Valid Loss = 0.6409, LR = 0.000096
   New best model saved with validation loss: 0.6409
   --- Epoch 8/50 ---


Training:   0%|          | 0/429 [00:00<?, ?it/s]

Validation:   0%|          | 0/105 [00:00<?, ?it/s]

   Epoch 8: Train Loss = 0.5575, Valid Loss = 0.6170, LR = 0.000095
   New best model saved with validation loss: 0.6170
   --- Epoch 9/50 ---


Training:   0%|          | 0/429 [00:00<?, ?it/s]

Validation:   0%|          | 0/105 [00:00<?, ?it/s]

   Epoch 9: Train Loss = 0.5468, Valid Loss = 0.6344, LR = 0.000094
   No improvement. Patience: 1/10
   --- Epoch 10/50 ---


Training:   0%|          | 0/429 [00:00<?, ?it/s]

Validation:   0%|          | 0/105 [00:00<?, ?it/s]

   Epoch 10: Train Loss = 0.5447, Valid Loss = 0.6351, LR = 0.000092
   No improvement. Patience: 2/10
   --- Epoch 11/50 ---


Training:   0%|          | 0/429 [00:00<?, ?it/s]

Validation:   0%|          | 0/105 [00:00<?, ?it/s]

   Epoch 11: Train Loss = 0.5359, Valid Loss = 0.6718, LR = 0.000090
   No improvement. Patience: 3/10
   --- Epoch 12/50 ---


Training:   0%|          | 0/429 [00:00<?, ?it/s]

Validation:   0%|          | 0/105 [00:00<?, ?it/s]

   Epoch 12: Train Loss = 0.5283, Valid Loss = 0.6085, LR = 0.000089
   New best model saved with validation loss: 0.6085
   --- Epoch 13/50 ---


Training:   0%|          | 0/429 [00:00<?, ?it/s]

Validation:   0%|          | 0/105 [00:00<?, ?it/s]

   Epoch 13: Train Loss = 0.5236, Valid Loss = 0.6277, LR = 0.000086
   No improvement. Patience: 1/10
   --- Epoch 14/50 ---


Training:   0%|          | 0/429 [00:00<?, ?it/s]

Validation:   0%|          | 0/105 [00:00<?, ?it/s]

   Epoch 14: Train Loss = 0.5153, Valid Loss = 0.6302, LR = 0.000084
   No improvement. Patience: 2/10
   --- Epoch 15/50 ---


Training:   0%|          | 0/429 [00:00<?, ?it/s]

Validation:   0%|          | 0/105 [00:00<?, ?it/s]

   Epoch 15: Train Loss = 0.5128, Valid Loss = 0.6072, LR = 0.000082
   New best model saved with validation loss: 0.6072
   --- Epoch 16/50 ---


Training:   0%|          | 0/429 [00:00<?, ?it/s]

Validation:   0%|          | 0/105 [00:00<?, ?it/s]

   Epoch 16: Train Loss = 0.5060, Valid Loss = 0.6116, LR = 0.000079
   No improvement. Patience: 1/10
   --- Epoch 17/50 ---


Training:   0%|          | 0/429 [00:00<?, ?it/s]

Validation:   0%|          | 0/105 [00:00<?, ?it/s]

   Epoch 17: Train Loss = 0.4974, Valid Loss = 0.6171, LR = 0.000077
   No improvement. Patience: 2/10
   --- Epoch 18/50 ---


Training:   0%|          | 0/429 [00:00<?, ?it/s]

Validation:   0%|          | 0/105 [00:00<?, ?it/s]

   Epoch 18: Train Loss = 0.4889, Valid Loss = 0.6075, LR = 0.000074
   No improvement. Patience: 3/10
   --- Epoch 19/50 ---


Training:   0%|          | 0/429 [00:00<?, ?it/s]

Validation:   0%|          | 0/105 [00:00<?, ?it/s]

   Epoch 19: Train Loss = 0.4848, Valid Loss = 0.6135, LR = 0.000071
   No improvement. Patience: 4/10
   --- Epoch 20/50 ---


Training:   0%|          | 0/429 [00:00<?, ?it/s]

Validation:   0%|          | 0/105 [00:00<?, ?it/s]

   Epoch 20: Train Loss = 0.4802, Valid Loss = 0.6427, LR = 0.000068
   No improvement. Patience: 5/10
   --- Epoch 21/50 ---


Training:   0%|          | 0/429 [00:00<?, ?it/s]

Validation:   0%|          | 0/105 [00:00<?, ?it/s]

   Epoch 21: Train Loss = 0.4816, Valid Loss = 0.6123, LR = 0.000065
   No improvement. Patience: 6/10
   --- Epoch 22/50 ---


Training:   0%|          | 0/429 [00:00<?, ?it/s]

Validation:   0%|          | 0/105 [00:00<?, ?it/s]

   Epoch 22: Train Loss = 0.4728, Valid Loss = 0.6165, LR = 0.000062
   No improvement. Patience: 7/10
   --- Epoch 23/50 ---


Training:   0%|          | 0/429 [00:00<?, ?it/s]

Validation:   0%|          | 0/105 [00:00<?, ?it/s]

   Epoch 23: Train Loss = 0.4681, Valid Loss = 0.6134, LR = 0.000059
   No improvement. Patience: 8/10
   --- Epoch 24/50 ---


Training:   0%|          | 0/429 [00:00<?, ?it/s]

Validation:   0%|          | 0/105 [00:00<?, ?it/s]

   Epoch 24: Train Loss = 0.4672, Valid Loss = 0.6113, LR = 0.000056
   No improvement. Patience: 9/10
   --- Epoch 25/50 ---


Training:   0%|          | 0/429 [00:00<?, ?it/s]

Validation:   0%|          | 0/105 [00:00<?, ?it/s]

   Epoch 25: Train Loss = 0.4614, Valid Loss = 0.5800, LR = 0.000053
   New best model saved with validation loss: 0.5800
   --- Epoch 26/50 ---


Training:   0%|          | 0/429 [00:00<?, ?it/s]

Validation:   0%|          | 0/105 [00:00<?, ?it/s]

   Epoch 26: Train Loss = 0.4564, Valid Loss = 0.6065, LR = 0.000050
   No improvement. Patience: 1/10
   --- Epoch 27/50 ---


Training:   0%|          | 0/429 [00:00<?, ?it/s]

Validation:   0%|          | 0/105 [00:00<?, ?it/s]

   Epoch 27: Train Loss = 0.4486, Valid Loss = 0.6152, LR = 0.000047
   No improvement. Patience: 2/10
   --- Epoch 28/50 ---


Training:   0%|          | 0/429 [00:00<?, ?it/s]

Validation:   0%|          | 0/105 [00:00<?, ?it/s]

   Epoch 28: Train Loss = 0.4458, Valid Loss = 0.5786, LR = 0.000044
   New best model saved with validation loss: 0.5786
   --- Epoch 29/50 ---


Training:   0%|          | 0/429 [00:00<?, ?it/s]

Validation:   0%|          | 0/105 [00:00<?, ?it/s]

   Epoch 29: Train Loss = 0.4418, Valid Loss = 0.5807, LR = 0.000041
   No improvement. Patience: 1/10
   --- Epoch 30/50 ---


Training:   0%|          | 0/429 [00:00<?, ?it/s]

Validation:   0%|          | 0/105 [00:00<?, ?it/s]

   Epoch 30: Train Loss = 0.4394, Valid Loss = 0.5780, LR = 0.000038
   No improvement. Patience: 2/10
   --- Epoch 31/50 ---


Training:   0%|          | 0/429 [00:00<?, ?it/s]

Validation:   0%|          | 0/105 [00:00<?, ?it/s]

   Epoch 31: Train Loss = 0.4372, Valid Loss = 0.5820, LR = 0.000035
   No improvement. Patience: 3/10
   --- Epoch 32/50 ---


Training:   0%|          | 0/429 [00:00<?, ?it/s]

Validation:   0%|          | 0/105 [00:00<?, ?it/s]

   Epoch 32: Train Loss = 0.4342, Valid Loss = 0.5764, LR = 0.000032
   New best model saved with validation loss: 0.5764
   --- Epoch 33/50 ---


Training:   0%|          | 0/429 [00:00<?, ?it/s]

Validation:   0%|          | 0/105 [00:00<?, ?it/s]

   Epoch 33: Train Loss = 0.4279, Valid Loss = 0.5835, LR = 0.000029
   No improvement. Patience: 1/10
   --- Epoch 34/50 ---


Training:   0%|          | 0/429 [00:00<?, ?it/s]

Validation:   0%|          | 0/105 [00:00<?, ?it/s]

   Epoch 34: Train Loss = 0.4229, Valid Loss = 0.5866, LR = 0.000026
   No improvement. Patience: 2/10
   --- Epoch 35/50 ---


Training:   0%|          | 0/429 [00:00<?, ?it/s]

Validation:   0%|          | 0/105 [00:00<?, ?it/s]

   Epoch 35: Train Loss = 0.4233, Valid Loss = 0.5865, LR = 0.000023
   No improvement. Patience: 3/10
   --- Epoch 36/50 ---


Training:   0%|          | 0/429 [00:00<?, ?it/s]

Validation:   0%|          | 0/105 [00:00<?, ?it/s]

   Epoch 36: Train Loss = 0.4249, Valid Loss = 0.5784, LR = 0.000021
   No improvement. Patience: 4/10
   --- Epoch 37/50 ---


Training:   0%|          | 0/429 [00:00<?, ?it/s]

Validation:   0%|          | 0/105 [00:00<?, ?it/s]

   Epoch 37: Train Loss = 0.4191, Valid Loss = 0.5812, LR = 0.000018
   No improvement. Patience: 5/10
   --- Epoch 38/50 ---


Training:   0%|          | 0/429 [00:00<?, ?it/s]

Validation:   0%|          | 0/105 [00:00<?, ?it/s]

   Epoch 38: Train Loss = 0.4152, Valid Loss = 0.5949, LR = 0.000016
   No improvement. Patience: 6/10
   --- Epoch 39/50 ---


Training:   0%|          | 0/429 [00:00<?, ?it/s]

Validation:   0%|          | 0/105 [00:00<?, ?it/s]

   Epoch 39: Train Loss = 0.4151, Valid Loss = 0.5806, LR = 0.000014
   No improvement. Patience: 7/10
   --- Epoch 40/50 ---


Training:   0%|          | 0/429 [00:00<?, ?it/s]

Validation:   0%|          | 0/105 [00:00<?, ?it/s]

   Epoch 40: Train Loss = 0.4083, Valid Loss = 0.5760, LR = 0.000011
   No improvement. Patience: 8/10
   --- Epoch 41/50 ---


Training:   0%|          | 0/429 [00:00<?, ?it/s]

Validation:   0%|          | 0/105 [00:00<?, ?it/s]

   Epoch 41: Train Loss = 0.4038, Valid Loss = 0.5791, LR = 0.000010
   No improvement. Patience: 9/10
   --- Epoch 42/50 ---


Training:   0%|          | 0/429 [00:00<?, ?it/s]

Validation:   0%|          | 0/105 [00:00<?, ?it/s]

   Epoch 42: Train Loss = 0.4099, Valid Loss = 0.5767, LR = 0.000008
   No improvement. Patience: 10/10
   Early stopping triggered after 10 epochs without improvement.
Loading best model from fold 4 to generate OOF predictions...


Generating OOF for Fold 4:   0%|          | 0/105 [00:00<?, ?it/s]


Logged artifact for fold 4 with best validation loss: 0.5764


0,1
early_stopped_epoch,▁
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇██
train/epoch_loss,█▅▄▄▄▃▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train/epoch_lr,█████████▇▇▇▇▇▇▆▆▆▆▆▅▅▅▄▄▄▄▃▃▃▃▃▂▂▂▂▂▁▁▁
train/loss,▆▅█▄▄▂▆▆▂▄▅▁▁▂▂▂▃▃▃▃▃▄▃▅▄▂▂▃▂▁▂▁▄▁▂▁▂▁▂▁
val/kl_div,█▄▃▄▂▃▂▂▂▂▃▂▂▂▂▂▂▂▂▂▂▂▂▁▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val/loss,█▄▃▄▂▃▂▂▂▂▃▂▂▂▂▂▂▂▂▂▂▂▂▁▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
best_val_kl_div,0.57639
early_stopped_epoch,42.0
epoch,42.0
train/epoch_loss,0.40988
train/epoch_lr,1e-05
train/loss,0.57576
val/kl_div,0.57672
val/loss,0.57672



Calculating final OOF CV score from all collected predictions...

      FINISHED EXPERIMENT: 3 CNN BLOCKS
      OOF KL Score (all folds): 0.5676
      Mean of fold scores: 0.5661
3            | 0.5676             | 0.5661            
All experiments complete. Check wandb for detailed charts.


In [7]:
print(f"OOF KL Score (calculated across all folds): {overall_cv_score:.4f}")
print(f"Mean of individual fold scores: {np.mean(all_fold_scores):.4f}")

OOF KL Score (calculated across all folds): 0.5676
Mean of individual fold scores: 0.5661
