In [1]:
import os
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import StratifiedGroupKFold
from tqdm.auto import tqdm 
import wandb
from torch.optim.lr_scheduler import CosineAnnealingLR
import sys
from pathlib import Path

sys.path.insert(0, os.path.abspath(os.path.join(os.getcwd(), "..", "..", "..")))
from src.utils.k_folds_creator import KFoldCreator
from src.utils.utils import get_models_save_path
from src.utils.constants import Constants 
from src.datasets.eeg_dataset import EEGDataset
from src.models.gru import GRUModel

wandb.login()

2025-10-16 09:18:36,737 :: root :: INFO :: Initialising Utils
2025-10-16 09:18:36,739 :: root :: INFO :: Initialising Datasets
2025-10-16 09:18:36,764 :: root :: INFO :: Initialising Models
[34m[1mwandb[0m: Currently logged in as: [33mmaikotrede[0m ([33mhms-hslu-aicomp-hs25[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

In [2]:
class CFG:
    seed = 42
    n_splits = 5
    data_path = '../../../data/'

    model_name = 'GRU'
    hidden_units = 128
    num_layers = 2
    target_size = 6 
    
    sampling_rate = 200 # Hz
    sequence_duration = 50 

    num_channels = 20 
    
    batch_size = 32
    num_workers = 0
    epochs = 30
    lr = 1e-3

CFG.sequence_length = CFG.sequence_duration * CFG.sampling_rate 

def set_seed(seed):
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)

set_seed(CFG.seed)

TARGETS = Constants.TARGETS


In [3]:
def get_dataloaders(df, fold_id):
    train_df = df[df['fold'] != fold_id].reset_index(drop=True)
    valid_df = df[df['fold'] == fold_id].reset_index(drop=True)

    train_dataset = EEGDataset(df=train_df, data_path=CFG.data_path, mode='train')

    valid_dataset = EEGDataset(df=valid_df, data_path=CFG.data_path, mode='valid')

    train_loader = DataLoader(
        train_dataset, batch_size=CFG.batch_size, shuffle=True,
        num_workers=CFG.num_workers, pin_memory=True, drop_last=True
    )
    
    valid_loader = DataLoader(
        valid_dataset, batch_size=CFG.batch_size, shuffle=False,
        num_workers=CFG.num_workers, pin_memory=True, drop_last=False
    )
    
    return train_loader, valid_loader

In [None]:


def run_training(df, DATA_PREPARATION_VOTE_METHOD):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print(f"Using device: {device}")
    
    fold_scores = []

    for fold in range(CFG.n_splits):
        print(f"\n========== FOLD {fold} ==========")

        config = {
            "architecture": CFG.model_name, "hidden_units": CFG.hidden_units, "num_layers": CFG.num_layers,
            "fold": fold, "features": "raw_eeg", "sequence_duration": f"{CFG.sequence_duration}s",
            "optimizer": "AdamW", "learning_rate": CFG.lr, "batch_size": CFG.batch_size,
            "epochs": CFG.epochs, "seed": CFG.seed, "Scheduler": "CosineAnnealingLR" 
        }

        wandb.init(
            project="hms-aicomp-gru",
            name=f"gru-raw-eeg-fold{fold}", 
            tags=['gru-baseline', f'fold{fold}'],
            config=config
        )

        model = GRUModel(
            input_size=CFG.num_channels,
            hidden_size=CFG.hidden_units,
            num_layers=CFG.num_layers,
            num_classes=CFG.target_size
        )
        model.to(device)

        optimizer = torch.optim.AdamW(model.parameters(), lr=CFG.lr)
        scheduler = CosineAnnealingLR(optimizer, T_max=CFG.epochs)
        loss_fn = nn.KLDivLoss(reduction='batchmean')
        train_loader, valid_loader = get_dataloaders(df, fold)

        best_val_loss = float('inf')
        best_model_path = None

        for epoch in range(CFG.epochs):
            print(f"   --- Epoch {epoch+1}/{CFG.epochs} ---")
            
            model.train()
            train_loss = 0
            for signals, labels in tqdm(train_loader, desc="Training"):
                signals, labels = signals.to(device), labels.to(device)

                optimizer.zero_grad()
                outputs = model(signals)
                log_probs = F.log_softmax(outputs, dim=1)
                loss = loss_fn(log_probs, labels)
                loss.backward()

                torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
                optimizer.step()

                train_loss += loss.item() * signals.size(0)
                wandb.log({"train/loss": loss.item()})
            train_loss /= len(train_loader.dataset)

            model.eval()
            valid_loss = 0
            with torch.no_grad():
                for i, (signals, labels) in enumerate(tqdm(valid_loader, desc="Validation")):
                    signals, labels = signals.to(device), labels.to(device)
                    outputs = model(signals)
                    log_probs = F.log_softmax(outputs, dim=1)
                    loss = loss_fn(log_probs, labels)
                    valid_loss += loss.item() * signals.size(0)


            valid_loss /= len(valid_loader.dataset)
            
            epoch_lr = optimizer.param_groups[0]['lr']
            print(f"   Epoch {epoch+1}: Train Loss = {train_loss:.4f}, Valid Loss = {valid_loss:.4f}, LR = {epoch_lr:.6f}")
            wandb.log({
                "epoch": epoch + 1, "train/epoch_loss": train_loss, "val/loss": valid_loss,
                "val/kl_div": valid_loss, "train/epoch_lr": epoch_lr
            })

            if valid_loss < best_val_loss:
                best_val_loss = valid_loss
                best_model_path = get_models_save_path() / "GRUModel" / DATA_PREPARATION_VOTE_METHOD / f'best_model_fold{fold}.pth'
                best_model_path.parent.mkdir(parents=True, exist_ok=True)
                torch.save(model.state_dict(), best_model_path)
                print(f"   New best model saved with validation loss: {best_val_loss:.4f}")
            scheduler.step()
        
        fold_scores.append(best_val_loss)
        wandb.summary['best_val_kl_div'] = best_val_loss
        if best_model_path:
            artifact = wandb.Artifact(f'model-fold{fold}', type='model')
            artifact.add_file(best_model_path)
            wandb.log_artifact(artifact)
            print(f"\nLogged artifact for fold {fold} with best validation loss: {best_val_loss:.4f}")
        else:
            print("\nNo best model was saved during training for this fold.")
        wandb.finish()
    return fold_scores

DATA_PREPARATION_VOTE_METHOD = "sum_and_normalize"

print("Preparing data and creating folds...")
df = pd.read_csv(CFG.data_path + 'processed_data_sum_votes_window.csv') 

label_map = {t: i for i, t in enumerate(TARGETS)}
df['expert_consensus'] = df[TARGETS].idxmax(axis=1)

print('Train shape:', df.shape)
print('Targets', list(TARGETS))

fold_creator = KFoldCreator(n_splits=CFG.n_splits, seed=CFG.seed)
df = fold_creator.create_folds(df, stratify_col='expert_consensus', group_col='patient_id')

print("Folds created. Value counts per fold:")
print(df['fold'].value_counts())



Preparing data and creating folds...
Train shape: (17089, 12)
Targets ['seizure_vote', 'lpd_vote', 'gpd_vote', 'lrda_vote', 'grda_vote', 'other_vote']
Folds created. Value counts per fold:
fold
0    3741
1    3703
2    3527
4    3081
3    3037
Name: count, dtype: int64


In [5]:
df

Unnamed: 0,eeg_id,spectrogram_id,min_offset,max_offset,patient_id,expert_consensus,seizure_vote,lpd_vote,gpd_vote,lrda_vote,grda_vote,other_vote,fold
0,568657,789577333,0.0,16.0,20654,other_vote,0.0,0.000000,0.25,0.000000,0.166667,0.583333,0
1,582999,1552638400,0.0,38.0,20230,lpd_vote,0.0,0.857143,0.00,0.071429,0.000000,0.071429,0
2,642382,14960202,1008.0,1032.0,5955,other_vote,0.0,0.000000,0.00,0.000000,0.000000,1.000000,2
3,751790,618728447,908.0,908.0,38549,gpd_vote,0.0,0.000000,1.00,0.000000,0.000000,0.000000,3
4,778705,52296320,0.0,0.0,40955,other_vote,0.0,0.000000,0.00,0.000000,0.000000,1.000000,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...
17084,4293354003,1188113564,0.0,0.0,16610,grda_vote,0.0,0.000000,0.00,0.000000,0.500000,0.500000,2
17085,4293843368,1549502620,0.0,0.0,15065,grda_vote,0.0,0.000000,0.00,0.000000,0.500000,0.500000,2
17086,4294455489,2105480289,0.0,0.0,56,other_vote,0.0,0.000000,0.00,0.000000,0.000000,1.000000,4
17087,4294858825,657299228,0.0,12.0,4312,other_vote,0.0,0.000000,0.00,0.000000,0.066667,0.933333,0


In [6]:
if __name__ == '__main__':
    all_fold_scores = run_training(df, DATA_PREPARATION_VOTE_METHOD)
    cv_score = np.mean(all_fold_scores)
    print(f"\nCross-Validation Score (Mean KL Divergence across folds): {cv_score:.4f}")

Using device: cuda



   --- Epoch 1/30 ---


Training:   0%|          | 0/417 [00:00<?, ?it/s]

Validation:   0%|          | 0/117 [00:00<?, ?it/s]

   Epoch 1: Train Loss = 1.2492, Valid Loss = 1.3072, LR = 0.001000
   New best model saved with validation loss: 1.3072
   --- Epoch 2/30 ---


Training:   0%|          | 0/417 [00:00<?, ?it/s]

Validation:   0%|          | 0/117 [00:00<?, ?it/s]

   Epoch 2: Train Loss = 1.2237, Valid Loss = 1.3090, LR = 0.000997
   --- Epoch 3/30 ---


Training:   0%|          | 0/417 [00:00<?, ?it/s]

Validation:   0%|          | 0/117 [00:00<?, ?it/s]

   Epoch 3: Train Loss = 1.2065, Valid Loss = 1.3252, LR = 0.000989
   --- Epoch 4/30 ---


Training:   0%|          | 0/417 [00:00<?, ?it/s]

Validation:   0%|          | 0/117 [00:00<?, ?it/s]

   Epoch 4: Train Loss = 1.1910, Valid Loss = 1.3061, LR = 0.000976
   New best model saved with validation loss: 1.3061
   --- Epoch 5/30 ---


Training:   0%|          | 0/417 [00:00<?, ?it/s]

Validation:   0%|          | 0/117 [00:00<?, ?it/s]

   Epoch 5: Train Loss = 1.1667, Valid Loss = 1.3047, LR = 0.000957
   New best model saved with validation loss: 1.3047
   --- Epoch 6/30 ---


Training:   0%|          | 0/417 [00:00<?, ?it/s]

Validation:   0%|          | 0/117 [00:00<?, ?it/s]

   Epoch 6: Train Loss = 1.1359, Valid Loss = 1.2781, LR = 0.000933
   New best model saved with validation loss: 1.2781
   --- Epoch 7/30 ---


Training:   0%|          | 0/417 [00:00<?, ?it/s]

Validation:   0%|          | 0/117 [00:00<?, ?it/s]

   Epoch 7: Train Loss = 1.0917, Valid Loss = 1.3273, LR = 0.000905
   --- Epoch 8/30 ---


Training:   0%|          | 0/417 [00:00<?, ?it/s]

Validation:   0%|          | 0/117 [00:00<?, ?it/s]

   Epoch 8: Train Loss = 1.0431, Valid Loss = 1.2780, LR = 0.000872
   New best model saved with validation loss: 1.2780
   --- Epoch 9/30 ---


Training:   0%|          | 0/417 [00:00<?, ?it/s]

Validation:   0%|          | 0/117 [00:00<?, ?it/s]

   Epoch 9: Train Loss = 0.9737, Valid Loss = 1.2446, LR = 0.000835
   New best model saved with validation loss: 1.2446
   --- Epoch 10/30 ---


Training:   0%|          | 0/417 [00:00<?, ?it/s]

Validation:   0%|          | 0/117 [00:00<?, ?it/s]

   Epoch 10: Train Loss = 0.9124, Valid Loss = 1.2339, LR = 0.000794
   New best model saved with validation loss: 1.2339
   --- Epoch 11/30 ---


Training:   0%|          | 0/417 [00:00<?, ?it/s]

Validation:   0%|          | 0/117 [00:00<?, ?it/s]

   Epoch 11: Train Loss = 0.8597, Valid Loss = 1.2483, LR = 0.000750
   --- Epoch 12/30 ---


Training:   0%|          | 0/417 [00:00<?, ?it/s]

Validation:   0%|          | 0/117 [00:00<?, ?it/s]

   Epoch 12: Train Loss = 0.8096, Valid Loss = 1.1889, LR = 0.000703
   New best model saved with validation loss: 1.1889
   --- Epoch 13/30 ---


Training:   0%|          | 0/417 [00:00<?, ?it/s]

Validation:   0%|          | 0/117 [00:00<?, ?it/s]

   Epoch 13: Train Loss = 0.7602, Valid Loss = 1.1886, LR = 0.000655
   New best model saved with validation loss: 1.1886
   --- Epoch 14/30 ---


Training:   0%|          | 0/417 [00:00<?, ?it/s]

Validation:   0%|          | 0/117 [00:00<?, ?it/s]

   Epoch 14: Train Loss = 0.7169, Valid Loss = 1.1966, LR = 0.000604
   --- Epoch 15/30 ---


Training:   0%|          | 0/417 [00:00<?, ?it/s]

Validation:   0%|          | 0/117 [00:00<?, ?it/s]

   Epoch 15: Train Loss = 0.6675, Valid Loss = 1.1629, LR = 0.000552
   New best model saved with validation loss: 1.1629
   --- Epoch 16/30 ---


Training:   0%|          | 0/417 [00:00<?, ?it/s]

Validation:   0%|          | 0/117 [00:00<?, ?it/s]

   Epoch 16: Train Loss = 0.6301, Valid Loss = 1.2936, LR = 0.000500
   --- Epoch 17/30 ---


Training:   0%|          | 0/417 [00:00<?, ?it/s]

Validation:   0%|          | 0/117 [00:00<?, ?it/s]

   Epoch 17: Train Loss = 0.5852, Valid Loss = 1.2757, LR = 0.000448
   --- Epoch 18/30 ---


Training:   0%|          | 0/417 [00:00<?, ?it/s]

Validation:   0%|          | 0/117 [00:00<?, ?it/s]

   Epoch 18: Train Loss = 0.5564, Valid Loss = 1.2544, LR = 0.000396
   --- Epoch 19/30 ---


Training:   0%|          | 0/417 [00:00<?, ?it/s]

Validation:   0%|          | 0/117 [00:00<?, ?it/s]

   Epoch 19: Train Loss = 0.5214, Valid Loss = 1.2552, LR = 0.000345
   --- Epoch 20/30 ---


Training:   0%|          | 0/417 [00:00<?, ?it/s]

Validation:   0%|          | 0/117 [00:00<?, ?it/s]

   Epoch 20: Train Loss = 0.4858, Valid Loss = 1.3322, LR = 0.000297
   --- Epoch 21/30 ---


Training:   0%|          | 0/417 [00:00<?, ?it/s]

Validation:   0%|          | 0/117 [00:00<?, ?it/s]

   Epoch 21: Train Loss = 0.4556, Valid Loss = 1.3168, LR = 0.000250
   --- Epoch 22/30 ---


Training:   0%|          | 0/417 [00:00<?, ?it/s]

Validation:   0%|          | 0/117 [00:00<?, ?it/s]

   Epoch 22: Train Loss = 0.4332, Valid Loss = 1.3680, LR = 0.000206
   --- Epoch 23/30 ---


Training:   0%|          | 0/417 [00:00<?, ?it/s]

Validation:   0%|          | 0/117 [00:00<?, ?it/s]

   Epoch 23: Train Loss = 0.4124, Valid Loss = 1.3692, LR = 0.000165
   --- Epoch 24/30 ---


Training:   0%|          | 0/417 [00:00<?, ?it/s]

Validation:   0%|          | 0/117 [00:00<?, ?it/s]

   Epoch 24: Train Loss = 0.3941, Valid Loss = 1.3929, LR = 0.000128
   --- Epoch 25/30 ---


Training:   0%|          | 0/417 [00:00<?, ?it/s]

Validation:   0%|          | 0/117 [00:00<?, ?it/s]

   Epoch 25: Train Loss = 0.3807, Valid Loss = 1.4224, LR = 0.000095
   --- Epoch 26/30 ---


Training:   0%|          | 0/417 [00:00<?, ?it/s]

Validation:   0%|          | 0/117 [00:00<?, ?it/s]

   Epoch 26: Train Loss = 0.3706, Valid Loss = 1.4062, LR = 0.000067
   --- Epoch 27/30 ---


Training:   0%|          | 0/417 [00:00<?, ?it/s]

Validation:   0%|          | 0/117 [00:00<?, ?it/s]

   Epoch 27: Train Loss = 0.3606, Valid Loss = 1.4206, LR = 0.000043
   --- Epoch 28/30 ---


Training:   0%|          | 0/417 [00:00<?, ?it/s]

Validation:   0%|          | 0/117 [00:00<?, ?it/s]

   Epoch 28: Train Loss = 0.3539, Valid Loss = 1.4306, LR = 0.000024
   --- Epoch 29/30 ---


Training:   0%|          | 0/417 [00:00<?, ?it/s]

Validation:   0%|          | 0/117 [00:00<?, ?it/s]

   Epoch 29: Train Loss = 0.3484, Valid Loss = 1.4329, LR = 0.000011
   --- Epoch 30/30 ---


Training:   0%|          | 0/417 [00:00<?, ?it/s]

Validation:   0%|          | 0/117 [00:00<?, ?it/s]

   Epoch 30: Train Loss = 0.3474, Valid Loss = 1.4345, LR = 0.000003

Logged artifact for fold 0 with best validation loss: 1.1629


0,1
epoch,▁▁▁▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇███
gradients/gru_layer_0_norm,▁▁▁▂▁▂▂▃▂▃▃▂█▃▃▅▃▄▅▅▄▄▄▅▅▄▅▄▅▄▄▇▄▄▃▄▄▆▃▅
gradients/total_norm,▁▂▂▁▂▁▂▂▂▃▃▂▅▃▄▆▅▅▄▆▆▄▆▆▅▄▆▅█▇▅▆▇▆▄▆▆█▅▅
train/epoch_loss,████▇▇▇▆▆▅▅▅▄▄▃▃▃▃▂▂▂▂▂▁▁▁▁▁▁▁
train/epoch_lr,██████▇▇▇▇▆▆▆▅▅▄▄▄▃▃▃▂▂▂▂▁▁▁▁▁
train/loss,███▇█▇▇▆▇▇▅▇▆▅▅▂▄▆▄▄▃▄▃▂▂▂▂▂▄▂▂▂▁▁▁▁▁▂▁▁
val/kl_div,▅▅▅▅▅▄▅▄▃▃▃▂▂▂▁▄▄▃▃▅▅▆▆▇█▇████
val/loss,▅▅▅▅▅▄▅▄▃▃▃▂▂▂▁▄▄▃▃▅▅▆▆▇█▇████

0,1
best_val_kl_div,1.16293
epoch,30.0
gradients/gru_layer_0_norm,1.31256
gradients/total_norm,3.11618
train/epoch_loss,0.34742
train/epoch_lr,0.0
train/loss,0.45641
val/kl_div,1.43448
val/loss,1.43448





   --- Epoch 1/30 ---


Training:   0%|          | 0/418 [00:00<?, ?it/s]

Validation:   0%|          | 0/116 [00:00<?, ?it/s]

   Epoch 1: Train Loss = 1.2600, Valid Loss = 1.2701, LR = 0.001000
   New best model saved with validation loss: 1.2701
   --- Epoch 2/30 ---


Training:   0%|          | 0/418 [00:00<?, ?it/s]

Validation:   0%|          | 0/116 [00:00<?, ?it/s]

   Epoch 2: Train Loss = 1.2347, Valid Loss = 1.2786, LR = 0.000997
   --- Epoch 3/30 ---


Training:   0%|          | 0/418 [00:00<?, ?it/s]

Validation:   0%|          | 0/116 [00:00<?, ?it/s]

   Epoch 3: Train Loss = 1.2188, Valid Loss = 1.2817, LR = 0.000989
   --- Epoch 4/30 ---


Training:   0%|          | 0/418 [00:00<?, ?it/s]

Validation:   0%|          | 0/116 [00:00<?, ?it/s]

   Epoch 4: Train Loss = 1.2038, Valid Loss = 1.2848, LR = 0.000976
   --- Epoch 5/30 ---


Training:   0%|          | 0/418 [00:00<?, ?it/s]

Validation:   0%|          | 0/116 [00:00<?, ?it/s]

   Epoch 5: Train Loss = 1.1832, Valid Loss = 1.2927, LR = 0.000957
   --- Epoch 6/30 ---


Training:   0%|          | 0/418 [00:00<?, ?it/s]

Validation:   0%|          | 0/116 [00:00<?, ?it/s]

   Epoch 6: Train Loss = 1.1485, Valid Loss = 1.2780, LR = 0.000933
   --- Epoch 7/30 ---


Training:   0%|          | 0/418 [00:00<?, ?it/s]

Validation:   0%|          | 0/116 [00:00<?, ?it/s]

   Epoch 7: Train Loss = 1.1053, Valid Loss = 1.2358, LR = 0.000905
   New best model saved with validation loss: 1.2358
   --- Epoch 8/30 ---


Training:   0%|          | 0/418 [00:00<?, ?it/s]

Validation:   0%|          | 0/116 [00:00<?, ?it/s]

   Epoch 8: Train Loss = 1.0282, Valid Loss = 1.1723, LR = 0.000872
   New best model saved with validation loss: 1.1723
   --- Epoch 9/30 ---


Training:   0%|          | 0/418 [00:00<?, ?it/s]

Validation:   0%|          | 0/116 [00:00<?, ?it/s]

   Epoch 9: Train Loss = 0.9388, Valid Loss = 1.0984, LR = 0.000835
   New best model saved with validation loss: 1.0984
   --- Epoch 10/30 ---


Training:   0%|          | 0/418 [00:00<?, ?it/s]

Validation:   0%|          | 0/116 [00:00<?, ?it/s]

   Epoch 10: Train Loss = 0.8482, Valid Loss = 1.1026, LR = 0.000794
   --- Epoch 11/30 ---


Training:   0%|          | 0/418 [00:00<?, ?it/s]

Validation:   0%|          | 0/116 [00:00<?, ?it/s]

   Epoch 11: Train Loss = 0.7905, Valid Loss = 1.0821, LR = 0.000750
   New best model saved with validation loss: 1.0821
   --- Epoch 12/30 ---


Training:   0%|          | 0/418 [00:00<?, ?it/s]

Validation:   0%|          | 0/116 [00:00<?, ?it/s]

   Epoch 12: Train Loss = 0.7365, Valid Loss = 1.0657, LR = 0.000703
   New best model saved with validation loss: 1.0657
   --- Epoch 13/30 ---


Training:   0%|          | 0/418 [00:00<?, ?it/s]

Validation:   0%|          | 0/116 [00:00<?, ?it/s]

   Epoch 13: Train Loss = 0.6881, Valid Loss = 1.0652, LR = 0.000655
   New best model saved with validation loss: 1.0652
   --- Epoch 14/30 ---


Training:   0%|          | 0/418 [00:00<?, ?it/s]

Validation:   0%|          | 0/116 [00:00<?, ?it/s]

   Epoch 14: Train Loss = 0.6490, Valid Loss = 1.0768, LR = 0.000604
   --- Epoch 15/30 ---


Training:   0%|          | 0/418 [00:00<?, ?it/s]

Validation:   0%|          | 0/116 [00:00<?, ?it/s]

   Epoch 15: Train Loss = 0.6030, Valid Loss = 1.0606, LR = 0.000552
   New best model saved with validation loss: 1.0606
   --- Epoch 16/30 ---


Training:   0%|          | 0/418 [00:00<?, ?it/s]

Validation:   0%|          | 0/116 [00:00<?, ?it/s]

   Epoch 16: Train Loss = 0.5632, Valid Loss = 1.1014, LR = 0.000500
   --- Epoch 17/30 ---


Training:   0%|          | 0/418 [00:00<?, ?it/s]

Validation:   0%|          | 0/116 [00:00<?, ?it/s]

   Epoch 17: Train Loss = 0.5259, Valid Loss = 1.1283, LR = 0.000448
   --- Epoch 18/30 ---


Training:   0%|          | 0/418 [00:00<?, ?it/s]

Validation:   0%|          | 0/116 [00:00<?, ?it/s]

   Epoch 18: Train Loss = 0.4891, Valid Loss = 1.1752, LR = 0.000396
   --- Epoch 19/30 ---


Training:   0%|          | 0/418 [00:00<?, ?it/s]

Validation:   0%|          | 0/116 [00:00<?, ?it/s]

   Epoch 19: Train Loss = 0.4548, Valid Loss = 1.1790, LR = 0.000345
   --- Epoch 20/30 ---


Training:   0%|          | 0/418 [00:00<?, ?it/s]

Validation:   0%|          | 0/116 [00:00<?, ?it/s]

   Epoch 20: Train Loss = 0.4211, Valid Loss = 1.2073, LR = 0.000297
   --- Epoch 21/30 ---


Training:   0%|          | 0/418 [00:00<?, ?it/s]

Validation:   0%|          | 0/116 [00:00<?, ?it/s]

   Epoch 21: Train Loss = 0.3975, Valid Loss = 1.2231, LR = 0.000250
   --- Epoch 22/30 ---


Training:   0%|          | 0/418 [00:00<?, ?it/s]

Validation:   0%|          | 0/116 [00:00<?, ?it/s]

   Epoch 22: Train Loss = 0.3730, Valid Loss = 1.2713, LR = 0.000206
   --- Epoch 23/30 ---


Training:   0%|          | 0/418 [00:00<?, ?it/s]

Validation:   0%|          | 0/116 [00:00<?, ?it/s]

   Epoch 23: Train Loss = 0.3566, Valid Loss = 1.2295, LR = 0.000165
   --- Epoch 24/30 ---


Training:   0%|          | 0/418 [00:00<?, ?it/s]

Validation:   0%|          | 0/116 [00:00<?, ?it/s]

   Epoch 24: Train Loss = 0.3396, Valid Loss = 1.2593, LR = 0.000128
   --- Epoch 25/30 ---


Training:   0%|          | 0/418 [00:00<?, ?it/s]

Validation:   0%|          | 0/116 [00:00<?, ?it/s]

   Epoch 25: Train Loss = 0.3288, Valid Loss = 1.3109, LR = 0.000095
   --- Epoch 26/30 ---


Training:   0%|          | 0/418 [00:00<?, ?it/s]

Validation:   0%|          | 0/116 [00:00<?, ?it/s]

   Epoch 26: Train Loss = 0.3122, Valid Loss = 1.2945, LR = 0.000067
   --- Epoch 27/30 ---


Training:   0%|          | 0/418 [00:00<?, ?it/s]

Validation:   0%|          | 0/116 [00:00<?, ?it/s]

   Epoch 27: Train Loss = 0.3053, Valid Loss = 1.3021, LR = 0.000043
   --- Epoch 28/30 ---


Training:   0%|          | 0/418 [00:00<?, ?it/s]

Validation:   0%|          | 0/116 [00:00<?, ?it/s]

   Epoch 28: Train Loss = 0.3012, Valid Loss = 1.3040, LR = 0.000024
   --- Epoch 29/30 ---


Training:   0%|          | 0/418 [00:00<?, ?it/s]

Validation:   0%|          | 0/116 [00:00<?, ?it/s]

   Epoch 29: Train Loss = 0.2970, Valid Loss = 1.3111, LR = 0.000011
   --- Epoch 30/30 ---


Training:   0%|          | 0/418 [00:00<?, ?it/s]

Validation:   0%|          | 0/116 [00:00<?, ?it/s]

   Epoch 30: Train Loss = 0.2995, Valid Loss = 1.3090, LR = 0.000003

Logged artifact for fold 1 with best validation loss: 1.0606


0,1
epoch,▁▁▁▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇███
gradients/gru_layer_0_norm,▁▁▁▂▁▁▂▂▂▂▃▃▅▄▃▃▆▃▄▄▄▃▄▄▄▄▅▄▃▅▅▄▄▅▅▄▄▄█▄
gradients/total_norm,▂▂▁▁▁▂▂▂▂▂▃▄▄▅▃▄▃▅▄▄▄▅▆▄▅█▆▆▆█▅▄▇█▅▅▅▄▃▇
train/epoch_loss,████▇▇▇▆▆▅▅▄▄▄▃▃▃▂▂▂▂▂▁▁▁▁▁▁▁▁
train/epoch_lr,██████▇▇▇▇▆▆▆▅▅▄▄▄▃▃▃▂▂▂▂▁▁▁▁▁
train/loss,▆▇▇▇▇▇▇▆▆█▅▇▆▆▆▅▄▄▃▅▃▄▄▃▃▃▃▂▂▃▂▃▃▂▂▃▁▂▁▂
val/kl_div,▇▇▇▇▇▇▆▄▂▂▂▁▁▁▁▂▃▄▄▅▆▇▆▇██████
val/loss,▇▇▇▇▇▇▆▄▂▂▂▁▁▁▁▂▃▄▄▅▆▇▆▇██████

0,1
best_val_kl_div,1.06057
epoch,30.0
gradients/gru_layer_0_norm,0.74158
gradients/total_norm,1.62277
train/epoch_loss,0.29954
train/epoch_lr,0.0
train/loss,0.22007
val/kl_div,1.30899
val/loss,1.30899





   --- Epoch 1/30 ---


Training:   0%|          | 0/423 [00:00<?, ?it/s]

Validation:   0%|          | 0/111 [00:00<?, ?it/s]

   Epoch 1: Train Loss = 1.2821, Valid Loss = 1.1774, LR = 0.001000
   New best model saved with validation loss: 1.1774
   --- Epoch 2/30 ---


Training:   0%|          | 0/423 [00:00<?, ?it/s]

Validation:   0%|          | 0/111 [00:00<?, ?it/s]

   Epoch 2: Train Loss = 1.2556, Valid Loss = 1.2021, LR = 0.000997
   --- Epoch 3/30 ---


Training:   0%|          | 0/423 [00:00<?, ?it/s]

Validation:   0%|          | 0/111 [00:00<?, ?it/s]

   Epoch 3: Train Loss = 1.2358, Valid Loss = 1.1797, LR = 0.000989
   --- Epoch 4/30 ---


Training:   0%|          | 0/423 [00:00<?, ?it/s]

Validation:   0%|          | 0/111 [00:00<?, ?it/s]

   Epoch 4: Train Loss = 1.2145, Valid Loss = 1.1722, LR = 0.000976
   New best model saved with validation loss: 1.1722
   --- Epoch 5/30 ---


Training:   0%|          | 0/423 [00:00<?, ?it/s]

Validation:   0%|          | 0/111 [00:00<?, ?it/s]

   Epoch 5: Train Loss = 1.1864, Valid Loss = 1.1677, LR = 0.000957
   New best model saved with validation loss: 1.1677
   --- Epoch 6/30 ---


Training:   0%|          | 0/423 [00:00<?, ?it/s]

Validation:   0%|          | 0/111 [00:00<?, ?it/s]

   Epoch 6: Train Loss = 1.1549, Valid Loss = 1.1693, LR = 0.000933
   --- Epoch 7/30 ---


Training:   0%|          | 0/423 [00:00<?, ?it/s]

Validation:   0%|          | 0/111 [00:00<?, ?it/s]

   Epoch 7: Train Loss = 1.1066, Valid Loss = 1.1508, LR = 0.000905
   New best model saved with validation loss: 1.1508
   --- Epoch 8/30 ---


Training:   0%|          | 0/423 [00:00<?, ?it/s]

Validation:   0%|          | 0/111 [00:00<?, ?it/s]

   Epoch 8: Train Loss = 1.0304, Valid Loss = 1.1219, LR = 0.000872
   New best model saved with validation loss: 1.1219
   --- Epoch 9/30 ---


Training:   0%|          | 0/423 [00:00<?, ?it/s]

Validation:   0%|          | 0/111 [00:00<?, ?it/s]

   Epoch 9: Train Loss = 0.9441, Valid Loss = 1.0711, LR = 0.000835
   New best model saved with validation loss: 1.0711
   --- Epoch 10/30 ---


Training:   0%|          | 0/423 [00:00<?, ?it/s]

Validation:   0%|          | 0/111 [00:00<?, ?it/s]

   Epoch 10: Train Loss = 0.8538, Valid Loss = 1.0754, LR = 0.000794
   --- Epoch 11/30 ---


Training:   0%|          | 0/423 [00:00<?, ?it/s]

Validation:   0%|          | 0/111 [00:00<?, ?it/s]

   Epoch 11: Train Loss = 0.7931, Valid Loss = 1.0912, LR = 0.000750
   --- Epoch 12/30 ---


Training:   0%|          | 0/423 [00:00<?, ?it/s]

Validation:   0%|          | 0/111 [00:00<?, ?it/s]

   Epoch 12: Train Loss = 0.7338, Valid Loss = 1.0431, LR = 0.000703
   New best model saved with validation loss: 1.0431
   --- Epoch 13/30 ---


Training:   0%|          | 0/423 [00:00<?, ?it/s]

Validation:   0%|          | 0/111 [00:00<?, ?it/s]

   Epoch 13: Train Loss = 0.6868, Valid Loss = 1.0329, LR = 0.000655
   New best model saved with validation loss: 1.0329
   --- Epoch 14/30 ---


Training:   0%|          | 0/423 [00:00<?, ?it/s]

Validation:   0%|          | 0/111 [00:00<?, ?it/s]

   Epoch 14: Train Loss = 0.6442, Valid Loss = 1.0983, LR = 0.000604
   --- Epoch 15/30 ---


Training:   0%|          | 0/423 [00:00<?, ?it/s]

Validation:   0%|          | 0/111 [00:00<?, ?it/s]

   Epoch 15: Train Loss = 0.5982, Valid Loss = 1.0304, LR = 0.000552
   New best model saved with validation loss: 1.0304
   --- Epoch 16/30 ---


Training:   0%|          | 0/423 [00:00<?, ?it/s]

Validation:   0%|          | 0/111 [00:00<?, ?it/s]

   Epoch 16: Train Loss = 0.5554, Valid Loss = 1.0704, LR = 0.000500
   --- Epoch 17/30 ---


Training:   0%|          | 0/423 [00:00<?, ?it/s]

Validation:   0%|          | 0/111 [00:00<?, ?it/s]

   Epoch 17: Train Loss = 0.5149, Valid Loss = 1.0901, LR = 0.000448
   --- Epoch 18/30 ---


Training:   0%|          | 0/423 [00:00<?, ?it/s]

Validation:   0%|          | 0/111 [00:00<?, ?it/s]

   Epoch 18: Train Loss = 0.4791, Valid Loss = 1.1197, LR = 0.000396
   --- Epoch 19/30 ---


Training:   0%|          | 0/423 [00:00<?, ?it/s]

Validation:   0%|          | 0/111 [00:00<?, ?it/s]

   Epoch 19: Train Loss = 0.4486, Valid Loss = 1.1150, LR = 0.000345
   --- Epoch 20/30 ---


Training:   0%|          | 0/423 [00:00<?, ?it/s]

Validation:   0%|          | 0/111 [00:00<?, ?it/s]

   Epoch 20: Train Loss = 0.4167, Valid Loss = 1.1526, LR = 0.000297
   --- Epoch 21/30 ---


Training:   0%|          | 0/423 [00:00<?, ?it/s]

Validation:   0%|          | 0/111 [00:00<?, ?it/s]

   Epoch 21: Train Loss = 0.3848, Valid Loss = 1.1938, LR = 0.000250
   --- Epoch 22/30 ---


Training:   0%|          | 0/423 [00:00<?, ?it/s]

Validation:   0%|          | 0/111 [00:00<?, ?it/s]

   Epoch 22: Train Loss = 0.3713, Valid Loss = 1.2079, LR = 0.000206
   --- Epoch 23/30 ---


Training:   0%|          | 0/423 [00:00<?, ?it/s]

Validation:   0%|          | 0/111 [00:00<?, ?it/s]

   Epoch 23: Train Loss = 0.3485, Valid Loss = 1.2293, LR = 0.000165
   --- Epoch 24/30 ---


Training:   0%|          | 0/423 [00:00<?, ?it/s]

Validation:   0%|          | 0/111 [00:00<?, ?it/s]

   Epoch 24: Train Loss = 0.3327, Valid Loss = 1.2703, LR = 0.000128
   --- Epoch 25/30 ---


Training:   0%|          | 0/423 [00:00<?, ?it/s]

Validation:   0%|          | 0/111 [00:00<?, ?it/s]

   Epoch 25: Train Loss = 0.3203, Valid Loss = 1.2440, LR = 0.000095
   --- Epoch 26/30 ---


Training:   0%|          | 0/423 [00:00<?, ?it/s]

Validation:   0%|          | 0/111 [00:00<?, ?it/s]

   Epoch 26: Train Loss = 0.3106, Valid Loss = 1.2731, LR = 0.000067
   --- Epoch 27/30 ---


Training:   0%|          | 0/423 [00:00<?, ?it/s]

Validation:   0%|          | 0/111 [00:00<?, ?it/s]

   Epoch 27: Train Loss = 0.3042, Valid Loss = 1.3012, LR = 0.000043
   --- Epoch 28/30 ---


Training:   0%|          | 0/423 [00:00<?, ?it/s]

Validation:   0%|          | 0/111 [00:00<?, ?it/s]

   Epoch 28: Train Loss = 0.2977, Valid Loss = 1.2999, LR = 0.000024
   --- Epoch 29/30 ---


Training:   0%|          | 0/423 [00:00<?, ?it/s]

Validation:   0%|          | 0/111 [00:00<?, ?it/s]

   Epoch 29: Train Loss = 0.2963, Valid Loss = 1.3071, LR = 0.000011
   --- Epoch 30/30 ---


Training:   0%|          | 0/423 [00:00<?, ?it/s]

Validation:   0%|          | 0/111 [00:00<?, ?it/s]

   Epoch 30: Train Loss = 0.2896, Valid Loss = 1.3032, LR = 0.000003

Logged artifact for fold 2 with best validation loss: 1.0304


0,1
epoch,▁▁▁▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇███
gradients/gru_layer_0_norm,▁▁▂▂▂▂▃▂▃▆▅▃▅▅▄▄▅▄▄▇▅█▆▄▅▇▄▄▄▇▅▄▄▆▄▅▅█▆▇
gradients/total_norm,▂▁▂▁▁▂▂▂▂▂▄▄▄▄▃▄▃▃▄▃▃▃▃▄▃▄▄▅▄▅▄▃▅▃█▃▂▃▄▄
train/epoch_loss,████▇▇▇▆▆▅▅▄▄▄▃▃▃▂▂▂▂▂▁▁▁▁▁▁▁▁
train/epoch_lr,██████▇▇▇▇▆▆▆▅▅▄▄▄▃▃▃▂▂▂▂▁▁▁▁▁
train/loss,███▇▇▆▆▇▅▇▆▆▇▄▅▆▃▄▄▅▃▄▃▃▃▃▁▂▂▂▃▂▂▂▂▁▂▁▂▁
val/kl_div,▅▅▅▅▄▅▄▃▂▂▃▁▁▃▁▂▃▃▃▄▅▅▆▇▆▇████
val/loss,▅▅▅▅▄▅▄▃▂▂▃▁▁▃▁▂▃▃▃▄▅▅▆▇▆▇████

0,1
best_val_kl_div,1.03035
epoch,30.0
gradients/gru_layer_0_norm,0.87028
gradients/total_norm,1.98319
train/epoch_loss,0.28961
train/epoch_lr,0.0
train/loss,0.41114
val/kl_div,1.30319
val/loss,1.30319





   --- Epoch 1/30 ---


Training:   0%|          | 0/439 [00:00<?, ?it/s]

Validation:   0%|          | 0/95 [00:00<?, ?it/s]

   Epoch 1: Train Loss = 1.2606, Valid Loss = 1.3059, LR = 0.001000
   New best model saved with validation loss: 1.3059
   --- Epoch 2/30 ---


Training:   0%|          | 0/439 [00:00<?, ?it/s]

Validation:   0%|          | 0/95 [00:00<?, ?it/s]

   Epoch 2: Train Loss = 1.2337, Valid Loss = 1.2906, LR = 0.000997
   New best model saved with validation loss: 1.2906
   --- Epoch 3/30 ---


Training:   0%|          | 0/439 [00:00<?, ?it/s]

Validation:   0%|          | 0/95 [00:00<?, ?it/s]

   Epoch 3: Train Loss = 1.2173, Valid Loss = 1.2667, LR = 0.000989
   New best model saved with validation loss: 1.2667
   --- Epoch 4/30 ---


Training:   0%|          | 0/439 [00:00<?, ?it/s]

Validation:   0%|          | 0/95 [00:00<?, ?it/s]

   Epoch 4: Train Loss = 1.2004, Valid Loss = 1.2839, LR = 0.000976
   --- Epoch 5/30 ---


Training:   0%|          | 0/439 [00:00<?, ?it/s]

Validation:   0%|          | 0/95 [00:00<?, ?it/s]

   Epoch 5: Train Loss = 1.1764, Valid Loss = 1.2945, LR = 0.000957
   --- Epoch 6/30 ---


Training:   0%|          | 0/439 [00:00<?, ?it/s]

Validation:   0%|          | 0/95 [00:00<?, ?it/s]

   Epoch 6: Train Loss = 1.1437, Valid Loss = 1.3166, LR = 0.000933
   --- Epoch 7/30 ---


Training:   0%|          | 0/439 [00:00<?, ?it/s]

Validation:   0%|          | 0/95 [00:00<?, ?it/s]

   Epoch 7: Train Loss = 1.1065, Valid Loss = 1.2871, LR = 0.000905
   --- Epoch 8/30 ---


Training:   0%|          | 0/439 [00:00<?, ?it/s]

Validation:   0%|          | 0/95 [00:00<?, ?it/s]

   Epoch 8: Train Loss = 1.0424, Valid Loss = 1.2567, LR = 0.000872
   New best model saved with validation loss: 1.2567
   --- Epoch 9/30 ---


Training:   0%|          | 0/439 [00:00<?, ?it/s]

Validation:   0%|          | 0/95 [00:00<?, ?it/s]

   Epoch 9: Train Loss = 0.9595, Valid Loss = 1.2457, LR = 0.000835
   New best model saved with validation loss: 1.2457
   --- Epoch 10/30 ---


Training:   0%|          | 0/439 [00:00<?, ?it/s]

Validation:   0%|          | 0/95 [00:00<?, ?it/s]

   Epoch 10: Train Loss = 0.8786, Valid Loss = 1.2491, LR = 0.000794
   --- Epoch 11/30 ---


Training:   0%|          | 0/439 [00:00<?, ?it/s]

Validation:   0%|          | 0/95 [00:00<?, ?it/s]

   Epoch 11: Train Loss = 0.8208, Valid Loss = 1.2098, LR = 0.000750
   New best model saved with validation loss: 1.2098
   --- Epoch 12/30 ---


Training:   0%|          | 0/439 [00:00<?, ?it/s]

Validation:   0%|          | 0/95 [00:00<?, ?it/s]

   Epoch 12: Train Loss = 0.7600, Valid Loss = 1.1252, LR = 0.000703
   New best model saved with validation loss: 1.1252
   --- Epoch 13/30 ---


Training:   0%|          | 0/439 [00:00<?, ?it/s]

Validation:   0%|          | 0/95 [00:00<?, ?it/s]

   Epoch 13: Train Loss = 0.7110, Valid Loss = 1.1184, LR = 0.000655
   New best model saved with validation loss: 1.1184
   --- Epoch 14/30 ---


Training:   0%|          | 0/439 [00:00<?, ?it/s]

Validation:   0%|          | 0/95 [00:00<?, ?it/s]

   Epoch 14: Train Loss = 0.6620, Valid Loss = 1.1228, LR = 0.000604
   --- Epoch 15/30 ---


Training:   0%|          | 0/439 [00:00<?, ?it/s]

Validation:   0%|          | 0/95 [00:00<?, ?it/s]

   Epoch 15: Train Loss = 0.6099, Valid Loss = 1.1134, LR = 0.000552
   New best model saved with validation loss: 1.1134
   --- Epoch 16/30 ---


Training:   0%|          | 0/439 [00:00<?, ?it/s]

Validation:   0%|          | 0/95 [00:00<?, ?it/s]

   Epoch 16: Train Loss = 0.5696, Valid Loss = 1.1082, LR = 0.000500
   New best model saved with validation loss: 1.1082
   --- Epoch 17/30 ---


Training:   0%|          | 0/439 [00:00<?, ?it/s]

Validation:   0%|          | 0/95 [00:00<?, ?it/s]

   Epoch 17: Train Loss = 0.5279, Valid Loss = 1.1415, LR = 0.000448
   --- Epoch 18/30 ---


Training:   0%|          | 0/439 [00:00<?, ?it/s]

Validation:   0%|          | 0/95 [00:00<?, ?it/s]

   Epoch 18: Train Loss = 0.4871, Valid Loss = 1.1652, LR = 0.000396
   --- Epoch 19/30 ---


Training:   0%|          | 0/439 [00:00<?, ?it/s]

Validation:   0%|          | 0/95 [00:00<?, ?it/s]

   Epoch 19: Train Loss = 0.4570, Valid Loss = 1.2098, LR = 0.000345
   --- Epoch 20/30 ---


Training:   0%|          | 0/439 [00:00<?, ?it/s]

Validation:   0%|          | 0/95 [00:00<?, ?it/s]

   Epoch 20: Train Loss = 0.4272, Valid Loss = 1.2393, LR = 0.000297
   --- Epoch 21/30 ---


Training:   0%|          | 0/439 [00:00<?, ?it/s]

Validation:   0%|          | 0/95 [00:00<?, ?it/s]

   Epoch 21: Train Loss = 0.4023, Valid Loss = 1.2177, LR = 0.000250
   --- Epoch 22/30 ---


Training:   0%|          | 0/439 [00:00<?, ?it/s]

Validation:   0%|          | 0/95 [00:00<?, ?it/s]

   Epoch 22: Train Loss = 0.3739, Valid Loss = 1.2889, LR = 0.000206
   --- Epoch 23/30 ---


Training:   0%|          | 0/439 [00:00<?, ?it/s]

Validation:   0%|          | 0/95 [00:00<?, ?it/s]

   Epoch 23: Train Loss = 0.3606, Valid Loss = 1.3087, LR = 0.000165
   --- Epoch 24/30 ---


Training:   0%|          | 0/439 [00:00<?, ?it/s]

Validation:   0%|          | 0/95 [00:00<?, ?it/s]

   Epoch 24: Train Loss = 0.3410, Valid Loss = 1.3493, LR = 0.000128
   --- Epoch 25/30 ---


Training:   0%|          | 0/439 [00:00<?, ?it/s]

Validation:   0%|          | 0/95 [00:00<?, ?it/s]

   Epoch 25: Train Loss = 0.3281, Valid Loss = 1.3616, LR = 0.000095
   --- Epoch 26/30 ---


Training:   0%|          | 0/439 [00:00<?, ?it/s]

Validation:   0%|          | 0/95 [00:00<?, ?it/s]

   Epoch 26: Train Loss = 0.3158, Valid Loss = 1.3361, LR = 0.000067
   --- Epoch 27/30 ---


Training:   0%|          | 0/439 [00:00<?, ?it/s]

Validation:   0%|          | 0/95 [00:00<?, ?it/s]

   Epoch 27: Train Loss = 0.3119, Valid Loss = 1.3494, LR = 0.000043
   --- Epoch 28/30 ---


Training:   0%|          | 0/439 [00:00<?, ?it/s]

Validation:   0%|          | 0/95 [00:00<?, ?it/s]

   Epoch 28: Train Loss = 0.3035, Valid Loss = 1.3643, LR = 0.000024
   --- Epoch 29/30 ---


Training:   0%|          | 0/439 [00:00<?, ?it/s]

Validation:   0%|          | 0/95 [00:00<?, ?it/s]

   Epoch 29: Train Loss = 0.2997, Valid Loss = 1.3568, LR = 0.000011
   --- Epoch 30/30 ---


Training:   0%|          | 0/439 [00:00<?, ?it/s]

Validation:   0%|          | 0/95 [00:00<?, ?it/s]

   Epoch 30: Train Loss = 0.2973, Valid Loss = 1.3608, LR = 0.000003

Logged artifact for fold 3 with best validation loss: 1.1082


0,1
epoch,▁▁▁▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇███
gradients/gru_layer_0_norm,▁▁▁▂▂▂▂▂▂▂▃▅▃▅▄▅▃▄▄▃▄▅▆▄█▇▅▃▄▄▆▇▇▄▅▃▄▅▆▆
gradients/total_norm,▁▁▁▁▁▂▃▂▃▂▂▂▃▃▃▃▂▂▃▃▃▄▃█▃▃▄▄▃▂▂▃▂▃▄▂▃▃▃▃
train/epoch_loss,████▇▇▇▆▆▅▅▄▄▄▃▃▃▂▂▂▂▂▁▁▁▁▁▁▁▁
train/epoch_lr,██████▇▇▇▇▆▆▆▅▅▄▄▄▃▃▃▂▂▂▂▁▁▁▁▁
train/loss,█▇▆▇▆▇█▇▇▆▄▅▆▄▆▄▂▄▄▄▂▃▃▁▂▂▂▃▂▁▁▂▂▁▁▁▂▁▁▁
val/kl_div,▆▆▅▆▆▇▆▅▅▅▄▁▁▁▁▁▂▃▄▅▄▆▆██▇████
val/loss,▆▆▅▆▆▇▆▅▅▅▄▁▁▁▁▁▂▃▄▅▄▆▆██▇████

0,1
best_val_kl_div,1.10818
epoch,30.0
gradients/gru_layer_0_norm,0.97533
gradients/total_norm,1.99172
train/epoch_loss,0.29728
train/epoch_lr,0.0
train/loss,0.23411
val/kl_div,1.36081
val/loss,1.36081





   --- Epoch 1/30 ---


Training:   0%|          | 0/437 [00:00<?, ?it/s]

Validation:   0%|          | 0/97 [00:00<?, ?it/s]

   Epoch 1: Train Loss = 1.2652, Valid Loss = 1.2622, LR = 0.001000
   New best model saved with validation loss: 1.2622
   --- Epoch 2/30 ---


Training:   0%|          | 0/437 [00:00<?, ?it/s]

Validation:   0%|          | 0/97 [00:00<?, ?it/s]

   Epoch 2: Train Loss = 1.2343, Valid Loss = 1.2542, LR = 0.000997
   New best model saved with validation loss: 1.2542
   --- Epoch 3/30 ---


Training:   0%|          | 0/437 [00:00<?, ?it/s]

Validation:   0%|          | 0/97 [00:00<?, ?it/s]

   Epoch 3: Train Loss = 1.2206, Valid Loss = 1.2514, LR = 0.000989
   New best model saved with validation loss: 1.2514
   --- Epoch 4/30 ---


Training:   0%|          | 0/437 [00:00<?, ?it/s]

Validation:   0%|          | 0/97 [00:00<?, ?it/s]

   Epoch 4: Train Loss = 1.2021, Valid Loss = 1.2662, LR = 0.000976
   --- Epoch 5/30 ---


Training:   0%|          | 0/437 [00:00<?, ?it/s]

Validation:   0%|          | 0/97 [00:00<?, ?it/s]

   Epoch 5: Train Loss = 1.1737, Valid Loss = 1.2839, LR = 0.000957
   --- Epoch 6/30 ---


Training:   0%|          | 0/437 [00:00<?, ?it/s]

Validation:   0%|          | 0/97 [00:00<?, ?it/s]

   Epoch 6: Train Loss = 1.1288, Valid Loss = 1.3126, LR = 0.000933
   --- Epoch 7/30 ---


Training:   0%|          | 0/437 [00:00<?, ?it/s]

Validation:   0%|          | 0/97 [00:00<?, ?it/s]

   Epoch 7: Train Loss = 1.0683, Valid Loss = 1.3146, LR = 0.000905
   --- Epoch 8/30 ---


Training:   0%|          | 0/437 [00:00<?, ?it/s]

Validation:   0%|          | 0/97 [00:00<?, ?it/s]

   Epoch 8: Train Loss = 0.9933, Valid Loss = 1.2546, LR = 0.000872
   --- Epoch 9/30 ---


Training:   0%|          | 0/437 [00:00<?, ?it/s]

Validation:   0%|          | 0/97 [00:00<?, ?it/s]

   Epoch 9: Train Loss = 0.9071, Valid Loss = 1.2612, LR = 0.000835
   --- Epoch 10/30 ---


Training:   0%|          | 0/437 [00:00<?, ?it/s]

Validation:   0%|          | 0/97 [00:00<?, ?it/s]

   Epoch 10: Train Loss = 0.8363, Valid Loss = 1.2367, LR = 0.000794
   New best model saved with validation loss: 1.2367
   --- Epoch 11/30 ---


Training:   0%|          | 0/437 [00:00<?, ?it/s]

Validation:   0%|          | 0/97 [00:00<?, ?it/s]

   Epoch 11: Train Loss = 0.7798, Valid Loss = 1.3153, LR = 0.000750
   --- Epoch 12/30 ---


Training:   0%|          | 0/437 [00:00<?, ?it/s]

Validation:   0%|          | 0/97 [00:00<?, ?it/s]

   Epoch 12: Train Loss = 0.7331, Valid Loss = 1.2398, LR = 0.000703
   --- Epoch 13/30 ---


Training:   0%|          | 0/437 [00:00<?, ?it/s]

Validation:   0%|          | 0/97 [00:00<?, ?it/s]

   Epoch 13: Train Loss = 0.6887, Valid Loss = 1.2721, LR = 0.000655
   --- Epoch 14/30 ---


Training:   0%|          | 0/437 [00:00<?, ?it/s]

Validation:   0%|          | 0/97 [00:00<?, ?it/s]

   Epoch 14: Train Loss = 0.6498, Valid Loss = 1.2843, LR = 0.000604
   --- Epoch 15/30 ---


Training:   0%|          | 0/437 [00:00<?, ?it/s]

Validation:   0%|          | 0/97 [00:00<?, ?it/s]

   Epoch 15: Train Loss = 0.6109, Valid Loss = 1.3528, LR = 0.000552
   --- Epoch 16/30 ---


Training:   0%|          | 0/437 [00:00<?, ?it/s]

Validation:   0%|          | 0/97 [00:00<?, ?it/s]

   Epoch 16: Train Loss = 0.5748, Valid Loss = 1.3517, LR = 0.000500
   --- Epoch 17/30 ---


Training:   0%|          | 0/437 [00:00<?, ?it/s]

Validation:   0%|          | 0/97 [00:00<?, ?it/s]

   Epoch 17: Train Loss = 0.5366, Valid Loss = 1.3767, LR = 0.000448
   --- Epoch 18/30 ---


Training:   0%|          | 0/437 [00:00<?, ?it/s]

Validation:   0%|          | 0/97 [00:00<?, ?it/s]

   Epoch 18: Train Loss = 0.5017, Valid Loss = 1.3662, LR = 0.000396
   --- Epoch 19/30 ---


Training:   0%|          | 0/437 [00:00<?, ?it/s]

Validation:   0%|          | 0/97 [00:00<?, ?it/s]

   Epoch 19: Train Loss = 0.4710, Valid Loss = 1.4181, LR = 0.000345
   --- Epoch 20/30 ---


Training:   0%|          | 0/437 [00:00<?, ?it/s]

Validation:   0%|          | 0/97 [00:00<?, ?it/s]

   Epoch 20: Train Loss = 0.4383, Valid Loss = 1.4339, LR = 0.000297
   --- Epoch 21/30 ---


Training:   0%|          | 0/437 [00:00<?, ?it/s]

Validation:   0%|          | 0/97 [00:00<?, ?it/s]

   Epoch 21: Train Loss = 0.4128, Valid Loss = 1.4759, LR = 0.000250
   --- Epoch 22/30 ---


Training:   0%|          | 0/437 [00:00<?, ?it/s]

Validation:   0%|          | 0/97 [00:00<?, ?it/s]

   Epoch 22: Train Loss = 0.3884, Valid Loss = 1.5335, LR = 0.000206
   --- Epoch 23/30 ---


Training:   0%|          | 0/437 [00:00<?, ?it/s]

Validation:   0%|          | 0/97 [00:00<?, ?it/s]

   Epoch 23: Train Loss = 0.3694, Valid Loss = 1.5853, LR = 0.000165
   --- Epoch 24/30 ---


Training:   0%|          | 0/437 [00:00<?, ?it/s]

Validation:   0%|          | 0/97 [00:00<?, ?it/s]

   Epoch 24: Train Loss = 0.3524, Valid Loss = 1.5709, LR = 0.000128
   --- Epoch 25/30 ---


Training:   0%|          | 0/437 [00:00<?, ?it/s]

Validation:   0%|          | 0/97 [00:00<?, ?it/s]

   Epoch 25: Train Loss = 0.3412, Valid Loss = 1.6184, LR = 0.000095
   --- Epoch 26/30 ---


Training:   0%|          | 0/437 [00:00<?, ?it/s]

Validation:   0%|          | 0/97 [00:00<?, ?it/s]

   Epoch 26: Train Loss = 0.3281, Valid Loss = 1.6352, LR = 0.000067
   --- Epoch 27/30 ---


Training:   0%|          | 0/437 [00:00<?, ?it/s]

Validation:   0%|          | 0/97 [00:00<?, ?it/s]

   Epoch 27: Train Loss = 0.3188, Valid Loss = 1.6260, LR = 0.000043
   --- Epoch 28/30 ---


Training:   0%|          | 0/437 [00:00<?, ?it/s]

Validation:   0%|          | 0/97 [00:00<?, ?it/s]

   Epoch 28: Train Loss = 0.3125, Valid Loss = 1.6446, LR = 0.000024
   --- Epoch 29/30 ---


Training:   0%|          | 0/437 [00:00<?, ?it/s]

Validation:   0%|          | 0/97 [00:00<?, ?it/s]

   Epoch 29: Train Loss = 0.3058, Valid Loss = 1.6556, LR = 0.000011
   --- Epoch 30/30 ---


Training:   0%|          | 0/437 [00:00<?, ?it/s]

Validation:   0%|          | 0/97 [00:00<?, ?it/s]

   Epoch 30: Train Loss = 0.3071, Valid Loss = 1.6521, LR = 0.000003

Logged artifact for fold 4 with best validation loss: 1.2367


0,1
epoch,▁▁▁▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇███
gradients/gru_layer_0_norm,▁▁▂▃▄▂▃▂▄▂▄▃█▃▅▇▃▆▄▅▄▆▄▆▅▅▅▆▇█▇▄█▅▇▅▆▅▅▆
gradients/total_norm,▁▁▁▁▂▃▆▃▃▃▄▇▄▃▄▅▃▅█▄▄▄▄▆▇█▃▅▇▅▄▅▄▆▃▄▅▄▄▄
train/epoch_loss,████▇▇▇▆▅▅▄▄▄▄▃▃▃▂▂▂▂▂▁▁▁▁▁▁▁▁
train/epoch_lr,██████▇▇▇▇▆▆▆▅▅▄▄▄▃▃▃▂▂▂▂▁▁▁▁▁
train/loss,█▇██▇▇▆▆▆▆▆▄▄▅▄▃▃▃▄▃▃▃▂▃▃▃▂▂▂▂▁▂▂▂▂▁▂▂▁▂
val/kl_div,▁▁▁▁▂▂▂▁▁▁▂▁▂▂▃▃▃▃▄▄▅▆▇▇▇█████
val/loss,▁▁▁▁▂▂▂▁▁▁▂▁▂▂▃▃▃▃▄▄▅▆▇▇▇█████

0,1
best_val_kl_div,1.23668
epoch,30.0
gradients/gru_layer_0_norm,0.62049
gradients/total_norm,1.54394
train/epoch_loss,0.30707
train/epoch_lr,0.0
train/loss,0.28277
val/kl_div,1.65209
val/loss,1.65209



Cross-Validation Score (Mean KL Divergence across folds): 1.1197


In [7]:
cv_score = np.mean(all_fold_scores)
print(f"\nCross-Validation Score (Mean KL Divergence across folds): {cv_score:.4f}")


Cross-Validation Score (Mean KL Divergence across folds): 1.1197
