In [1]:
import os
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import ConcatDataset, DataLoader, SubsetRandomSampler
from torchvision.datasets import ImageFolder
from torchvision import transforms
from skopt import gp_minimize, load
from skopt.space import Real
from skopt.callbacks import CheckpointSaver
from sklearn.model_selection import KFold
import time
import re
import wandb

wandb.login()

from tqdm import tqdm


[34m[1mwandb[0m: [wandb.login()] Loaded credentials for https://api.wandb.ai from C:\Users\JMN\_netrc.
[34m[1mwandb[0m: Currently logged in as: [33m2121jmmn[0m ([33m2121jmmn-danmarks-tekniske-universitet-dtu[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [None]:

# ==========================================
# Checkpoint Configuration Variables
# ==========================================
CHECKPOINT_BASE_NAME = '3d_cv_optimization'
USE_CHECKPOINT = True   # Set to True to resume from a checkpoint, False to start new
DESIRED_CHECKPOINT_ID = None  # Set to None for latest, or an integer for a specific checkpoint ID

# Local checkpoint directory
DRIVE_DIR = r"c:\Users\JMN\Documents\Privat\Uddannelse\ActiveML\mini-projekt\BO_Checkpoints"
os.makedirs(DRIVE_DIR, exist_ok=True)

# ==========================================
# 1. Data Preprocessing & Loading
# ==========================================
# Local dataset path
dataset_path = r"c:\Users\JMN\Documents\Privat\Uddannelse\ActiveML\mini-projekt\dataset"
print(f"Contents of {dataset_path}: {os.listdir(dataset_path)}")

# --- Compute (or load cached) dataset-specific normalization statistics ---
import json
NORM_STATS_FILE = os.path.join(DRIVE_DIR, "dataset_norm_stats.json")

if os.path.exists(NORM_STATS_FILE):
    with open(NORM_STATS_FILE, "r") as f:
        _stats = json.load(f)
    DATASET_MEAN = _stats["mean"]
    DATASET_STD  = _stats["std"]
    print(f"Loaded cached normalization stats from {NORM_STATS_FILE}")
else:
    print("Computing dataset-specific normalization statistics (first run)...")
    _tmp_transform = transforms.Compose([
        transforms.Resize((128, 128)),
        transforms.ToTensor(),
    ])
    _tmp_train = ImageFolder(os.path.join(dataset_path, "Training"), transform=_tmp_transform)
    _tmp_test  = ImageFolder(os.path.join(dataset_path, "Testing"),  transform=_tmp_transform)
    _tmp_all   = ConcatDataset([_tmp_train, _tmp_test])
    _tmp_loader = DataLoader(_tmp_all, batch_size=256, shuffle=False, num_workers=0)

    _mean = torch.zeros(3)
    _std  = torch.zeros(3)
    _n_pixels = 0
    for imgs, _ in tqdm(_tmp_loader, desc="Norm stats", leave=False):
        b, c, h, w = imgs.shape
        _n_pixels += b * h * w
        _mean += imgs.sum(dim=[0, 2, 3])
        _std  += (imgs ** 2).sum(dim=[0, 2, 3])

    DATASET_MEAN = (_mean / _n_pixels).tolist()
    DATASET_STD  = ((_std / _n_pixels - torch.tensor(DATASET_MEAN) ** 2).sqrt()).tolist()
    del _tmp_transform, _tmp_train, _tmp_test, _tmp_all, _tmp_loader, _mean, _std, _n_pixels

    # Save for future runs
    with open(NORM_STATS_FILE, "w") as f:
        json.dump({"mean": DATASET_MEAN, "std": DATASET_STD}, f, indent=2)
    print(f"Saved normalization stats to {NORM_STATS_FILE}")

print(f"Dataset mean: {DATASET_MEAN}")
print(f"Dataset std:  {DATASET_STD}")

# --- Final transform with computed statistics ---
transform = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
    transforms.Normalize(mean=DATASET_MEAN, std=DATASET_STD)
])

training_dataset = ImageFolder(os.path.join(dataset_path, "Training"), transform=transform)
testing_dataset  = ImageFolder(os.path.join(dataset_path, "Testing"),  transform=transform)
dataset = ConcatDataset([training_dataset, testing_dataset])
print(f"Total dataset size: {len(dataset)} images")

# ==========================================
# 2. Model Definition — SimpleTumorCNN
# ==========================================
class SimpleTumorCNN(nn.Module):
    """
    Lightweight custom CNN (~24k parameters).
    3 conv blocks with BatchNorm, AdaptiveAvgPool, and a single FC head.
    """
    def __init__(self, num_classes=4, dropout_rate=0.1):
        super(SimpleTumorCNN, self).__init__()
        self.features = nn.Sequential(
            # Block 1: 3 -> 16 channels
            nn.Conv2d(3, 16, kernel_size=3, padding=1),
            nn.BatchNorm2d(16),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2),
            # Block 2: 16 -> 32 channels
            nn.Conv2d(16, 32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2),
            # Block 3: 32 -> 64 channels
            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2),
            # Global pooling
            nn.AdaptiveAvgPool2d((1, 1)),
        )
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Dropout(dropout_rate),
            nn.Linear(64, num_classes),
        )

    def forward(self, x):
        x = self.features(x)
        x = self.classifier(x)
        return x

# Verify parameter count
_tmp_model = SimpleTumorCNN(num_classes=4, dropout_rate=0.1)
_param_count = sum(p.numel() for p in _tmp_model.parameters())
print(f"SimpleTumorCNN parameter count: {_param_count:,}")
del _tmp_model

criterion = nn.CrossEntropyLoss()

# ==========================================
# 3. Training Params & BO Configuration
# ==========================================
CALLS = 100        # Total BO trials
EPOCHS = 50        # Epochs per trial per fold
BATCH_SIZE = 32
NUM_WORKERS = 3
N_FOLDS = 3        # 3-Fold Cross-Validation
SEED = 42

# 3D Search Space
search_space = [
    Real(1e-4, 1e-1, prior='log-uniform', name='learning_rate'),
    Real(1e-5, 1e-2, prior='log-uniform', name='weight_decay'),
    Real(0.0,  0.5,  prior='uniform',     name='dropout'),
]

# Global state for trial numbering and WandB grouping
current_call = 0
checkpoint_id_for_this_run = 0  # Will be set by main block; used as WandB group

def get_checkpoint_id(base_name, find_latest=False):
    """
    Generates a new unique ID for new runs or finds the latest existing ID for resuming.
    """
    existing_ids = []
    for f_name in os.listdir(DRIVE_DIR):
        match = re.match(rf'^{re.escape(base_name)}_(\d+)\.pkl$', f_name)
        if match:
            existing_ids.append(int(match.group(1)))

    if find_latest:
        return max(existing_ids) if existing_ids else None
    else:
        if not existing_ids:
            return 0
        else:
            existing_ids.sort()
            for i, _id in enumerate(existing_ids):
                if i != _id:
                    return i
            return len(existing_ids)


# ==========================================
# 4. Objective Function (3-Fold CV)
# ==========================================
def train_model(params):
    """
    Objective function for Bayesian Optimization.
    Trains SimpleTumorCNN with 3-Fold CV and returns mean validation loss.
    """
    global current_call, checkpoint_id_for_this_run
    current_call += 1

    learning_rate = params[0]
    weight_decay  = params[1]
    dropout       = params[2]

    # Clear GPU memory from previous trial
    if torch.cuda.is_available():
        torch.cuda.empty_cache()

    # Initialize WandB for this trial
    run = wandb.init(
        entity="2121jmmn-danmarks-tekniske-universitet-dtu",
        group=f"{CHECKPOINT_BASE_NAME}_{checkpoint_id_for_this_run}",
        name=f"trial_{current_call}",
        config={
            "learning_rate": learning_rate,
            "weight_decay": weight_decay,
            "dropout": dropout,
            "batch_size": BATCH_SIZE,
            "epochs": EPOCHS,
            "n_folds": N_FOLDS,
            "optimizer": "AdamW",
            "trial": current_call,
        }
    )

    print(f"\n{'='*60}")
    print(f"  Trial {current_call}/{CALLS}")
    print(f"  lr={learning_rate:.6f}  wd={weight_decay:.6f}  dropout={dropout:.4f}")
    print(f"{'='*60}")

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"Using device: {device}")

    # --- 3-Fold Cross-Validation ---
    kfold = KFold(n_splits=N_FOLDS, shuffle=True, random_state=SEED)
    fold_losses = []
    fold_accuracies = []

    for fold_idx, (train_idx, val_idx) in enumerate(kfold.split(range(len(dataset)))):
        print(f"\n  --- Fold {fold_idx + 1}/{N_FOLDS} ---")

        # Samplers for this fold
        train_sampler = SubsetRandomSampler(train_idx)
        val_sampler   = SubsetRandomSampler(val_idx)

        workers = NUM_WORKERS
        train_loader = DataLoader(dataset, batch_size=BATCH_SIZE,
                                  sampler=train_sampler,
                                  num_workers=workers, persistent_workers=True)
        val_loader   = DataLoader(dataset, batch_size=BATCH_SIZE,
                                  sampler=val_sampler,
                                  num_workers=workers, persistent_workers=True)

        # Fresh model & optimizer per fold
        model = SimpleTumorCNN(num_classes=4, dropout_rate=dropout).to(device)
        optimizer = optim.AdamW(model.parameters(), lr=learning_rate, weight_decay=weight_decay)

        # --- Training loop ---
        for epoch in range(EPOCHS):
            model.train()
            running_loss = 0.0
            data_time = 0.0
            compute_time = 0.0

            pbar = tqdm(train_loader, desc=f"  Fold {fold_idx+1} Epoch {epoch+1}/{EPOCHS}", leave=False)
            end = time.time()

            for _batch_idx, (inputs, labels) in enumerate(pbar):
                data_time += time.time() - end

                comp_start = time.time()
                inputs, labels = inputs.to(device), labels.to(device)

                optimizer.zero_grad()
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()
                running_loss += loss.item()
                compute_time += time.time() - comp_start

                total_time = data_time + compute_time
                data_pct = 100 * data_time / total_time if total_time > 0 else 0

                elapsed = pbar.format_dict.get('elapsed', 0)
                remaining = (pbar.format_dict.get('total', 1) - pbar.format_dict.get('n', 0)) \
                            * pbar.format_dict.get('elapsed', 0) \
                            / max(pbar.format_dict.get('n', 1), 1)
                epoch_total = elapsed + remaining
                et_min, et_sec = divmod(int(epoch_total), 60)

                pbar.set_postfix({
                    'loss': f'{loss.item():.4f}',
                    'epoch_est': f'{et_min:02d}:{et_sec:02d}',
                    'data%': f'{data_pct:.0f}%'
                })
                end = time.time()

            avg_train_loss = running_loss / len(train_loader)
            wandb.log({
                "fold": fold_idx + 1,
                "epoch": epoch + 1,
                "train_loss": avg_train_loss,
                "data_loading_pct": data_pct,
            })

        # --- Validation for this fold ---
        model.eval()
        val_loss = 0.0
        correct = 0
        total = 0
        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                val_loss += loss.item()
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        avg_fold_val_loss = val_loss / len(val_loader)
        fold_accuracy = 100 * correct / total
        fold_losses.append(avg_fold_val_loss)
        fold_accuracies.append(fold_accuracy)

        wandb.log({
            "fold": fold_idx + 1,
            "fold_val_loss": avg_fold_val_loss,
            "fold_val_accuracy": fold_accuracy,
        })
        print(f"  Fold {fold_idx+1} — Val Loss: {avg_fold_val_loss:.4f}, Accuracy: {fold_accuracy:.2f}%")

        # Cleanup per fold
        del model, optimizer, train_loader, val_loader
        if torch.cuda.is_available():
            torch.cuda.empty_cache()

    # --- Average across folds ---
    mean_val_loss = float(np.mean(fold_losses))
    mean_accuracy = float(np.mean(fold_accuracies))

    wandb.log({
        "mean_cv_val_loss": mean_val_loss,
        "mean_cv_val_accuracy": mean_accuracy,
    })

    print(f"\n  Trial {current_call} finished — Mean CV Loss: {mean_val_loss:.4f}, Mean Accuracy: {mean_accuracy:.2f}%")
    run.finish()

    return mean_val_loss


# ==========================================
# 5. Checkpoint Logic & Bayesian Optimization
# ==========================================
if __name__ == '__main__':
    x0 = None
    y0 = None
    current_call = 0
    checkpoint_id_for_this_run = None
    checkpoint_file = None

    if USE_CHECKPOINT:
        if DESIRED_CHECKPOINT_ID is not None:
            checkpoint_id_for_this_run = DESIRED_CHECKPOINT_ID
            checkpoint_file = f'{DRIVE_DIR}/{CHECKPOINT_BASE_NAME}_{checkpoint_id_for_this_run}.pkl'

            if os.path.exists(checkpoint_file):
                print(f"Attempting to load specific checkpoint from {checkpoint_file}...")
                try:
                    res_loaded = load(checkpoint_file)
                    x0 = res_loaded.x_iters
                    y0 = res_loaded.func_vals
                    current_call = len(x0)
                    print(f"Resuming from {current_call} previous calls from ID {checkpoint_id_for_this_run}.")
                except Exception as e:
                    print(f"WARNING: Could not load checkpoint {checkpoint_file}: {e}. Starting new.")
                    checkpoint_id_for_this_run = get_checkpoint_id(CHECKPOINT_BASE_NAME, find_latest=False)
                    checkpoint_file = f'{DRIVE_DIR}/{CHECKPOINT_BASE_NAME}_{checkpoint_id_for_this_run}.pkl'
                    print(f"Starting new optimization with checkpoint ID {checkpoint_id_for_this_run}.")
            else:
                print(f"ERROR: Checkpoint file {checkpoint_file} not found. Starting new optimization.")
                checkpoint_id_for_this_run = get_checkpoint_id(CHECKPOINT_BASE_NAME, find_latest=False)
                checkpoint_file = f'{DRIVE_DIR}/{CHECKPOINT_BASE_NAME}_{checkpoint_id_for_this_run}.pkl'
                print(f"Starting new optimization with checkpoint ID {checkpoint_id_for_this_run}.")

        else:
            latest_id = get_checkpoint_id(CHECKPOINT_BASE_NAME, find_latest=True)
            if latest_id is not None:
                checkpoint_id_for_this_run = latest_id
                checkpoint_file = f'{DRIVE_DIR}/{CHECKPOINT_BASE_NAME}_{checkpoint_id_for_this_run}.pkl'
                print(f"Attempting to load latest checkpoint from {checkpoint_file}...")
                try:
                    res_loaded = load(checkpoint_file)
                    x0 = res_loaded.x_iters
                    y0 = res_loaded.func_vals
                    current_call = len(x0)
                    print(f"Resuming from {current_call} previous calls from latest ID {checkpoint_id_for_this_run}.")
                except Exception as e:
                    print(f"WARNING: Could not load checkpoint {checkpoint_file}: {e}. Starting new.")
                    checkpoint_id_for_this_run = get_checkpoint_id(CHECKPOINT_BASE_NAME, find_latest=False)
                    checkpoint_file = f'{DRIVE_DIR}/{CHECKPOINT_BASE_NAME}_{checkpoint_id_for_this_run}.pkl'
                    print(f"Starting new optimization with checkpoint ID {checkpoint_id_for_this_run}.")
            else:
                print("No existing checkpoints found. Starting new optimization.")
                checkpoint_id_for_this_run = get_checkpoint_id(CHECKPOINT_BASE_NAME, find_latest=False)
                checkpoint_file = f'{DRIVE_DIR}/{CHECKPOINT_BASE_NAME}_{checkpoint_id_for_this_run}.pkl'
                print(f"Starting new optimization with checkpoint ID {checkpoint_id_for_this_run}.")

    else:
        print("USE_CHECKPOINT is False. Starting a brand new optimization.")
        checkpoint_id_for_this_run = get_checkpoint_id(CHECKPOINT_BASE_NAME, find_latest=False)
        checkpoint_file = f'{DRIVE_DIR}/{CHECKPOINT_BASE_NAME}_{checkpoint_id_for_this_run}.pkl'
        print(f"New optimization will use checkpoint ID {checkpoint_id_for_this_run}.")

    if checkpoint_file is None:
        checkpoint_id_for_this_run = get_checkpoint_id(CHECKPOINT_BASE_NAME, find_latest=False)
        checkpoint_file = f'{DRIVE_DIR}/{CHECKPOINT_BASE_NAME}_{checkpoint_id_for_this_run}.pkl'

    checkpoint_callback = CheckpointSaver(checkpoint_file)
    remaining_calls = max(0, CALLS - current_call)

    print(f"Starting optimization with {remaining_calls} remaining calls (Total CALLS: {CALLS})...")
    start_time = time.time()

    if remaining_calls > 0:
        # Resume-aware initial random points: 20 total, minus already-evaluated points
        required_random = max(0, 20 - len(x0 if x0 is not None else []))

        res = gp_minimize(
            train_model,
            search_space,                      # 3D: [lr, weight_decay, dropout]
            acq_func="EI",                     # Expected Improvement
            xi=0.1,                           # Exploration bias
            n_calls=remaining_calls,
            n_initial_points=required_random,
            noise="gaussian",
            random_state=SEED,
            callback=[checkpoint_callback],
            x0=x0,
            y0=y0,
        )
    else:
        print(f"All {CALLS} calls already completed based on loaded checkpoint.")
        if x0 is not None and y0 is not None:
            best_idx = np.argmin(y0)
            best_lr      = x0[best_idx][0]
            best_wd      = x0[best_idx][1]
            best_dropout = x0[best_idx][2]
            best_loss    = y0[best_idx]

            class MockResult:
                def __init__(self, x, fun):
                    self.x = x
                    self.fun = fun

            res = MockResult([best_lr, best_wd, best_dropout], best_loss)
            print(f"Best from checkpoint — LR: {res.x[0]:.6f}, WD: {res.x[1]:.6f}, "
                  f"Dropout: {res.x[2]:.4f}, Loss: {res.fun:.4f}")
        else:
            print("No results to display as no checkpoint was loaded and no new calls were made.")

    end_time = time.time()
    print(f"\nOptimization finished in {(end_time - start_time)/60:.2f} minutes.")
    if 'res' in locals():
        print(f"Best LR: {res.x[0]:.6f}, Best Weight Decay: {res.x[1]:.6f}, "
              f"Best Dropout: {res.x[2]:.4f}, Best Loss: {res.fun:.4f}")



Contents of c:\Users\JMN\Documents\Privat\Uddannelse\ActiveML\mini-projekt\dataset: ['Testing', 'Training']
Loaded cached normalization stats from c:\Users\JMN\Documents\Privat\Uddannelse\ActiveML\mini-projekt\BO_Checkpoints\dataset_norm_stats.json
Dataset mean: [0.18654859066009521, 0.18655261397361755, 0.18659797310829163]
Dataset std:  [0.19559581577777863, 0.19559480249881744, 0.1956312358379364]
Total dataset size: 7200 images
SimpleTumorCNN parameter count: 24,068
No existing checkpoints found. Starting new optimization.
Starting new optimization with checkpoint ID 0.
Starting optimization with 100 remaining calls (Total CALLS: 100)...


0,1
data_loading_pct,█▁▁▁▁▂▂▂▂▂▂
epoch,▁▂▂▃▄▅▅▆▇▇█
fold,▁▁▁▁▁▁▁▁▁▁▁
train_loss,█▆▅▄▃▃▂▂▂▁▁

0,1
data_loading_pct,6.04214
epoch,11.0
fold,1.0
train_loss,0.64313



  Trial 1/100
  lr=0.024526  wd=0.000036  dropout=0.3898
Using device: cpu

  --- Fold 1/3 ---


                                                                                                               

  Fold 1 — Val Loss: 0.3204, Accuracy: 88.67%

  --- Fold 2/3 ---


                                                                                                               

  Fold 2 — Val Loss: 0.3000, Accuracy: 90.62%

  --- Fold 3/3 ---


                                                                                                               

  Fold 3 — Val Loss: 0.4169, Accuracy: 86.04%

  Trial 1 finished — Mean CV Loss: 0.3458, Mean Accuracy: 88.44%


0,1
data_loading_pct,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▂▃▃▄▅▅▅▆▆█▂▂▃▃▄▄▅▅▅▇▇▇█▁▂▂▂▂▂▃▄▄▄▄▅▆▆▇█
fold,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▅▅▅▅▅▅▅▅▅▅▅▅▅████████████
fold_val_accuracy,▅█▁
fold_val_loss,▂▁█
mean_cv_val_accuracy,▁
mean_cv_val_loss,▁
train_loss,▅▅▅▃▃▂▂▂▂▂▁▁▁▁▆▅▅▃▃▂▂▁▂▁▁▁▁█▆▅▅▄▄▄▄▃▂▂▂▁

0,1
data_loading_pct,3.62312
epoch,50.0
fold,3.0
fold_val_accuracy,86.04167
fold_val_loss,0.41686
mean_cv_val_accuracy,88.44444
mean_cv_val_loss,0.34576
train_loss,0.30448



  Trial 2/100
  lr=0.006174  wd=0.000218  dropout=0.0500
Using device: cpu

  --- Fold 1/3 ---


                                                                                                               

  Fold 1 — Val Loss: 0.3261, Accuracy: 88.42%

  --- Fold 2/3 ---


                                                                                                               

  Fold 2 — Val Loss: 0.3950, Accuracy: 85.83%

  --- Fold 3/3 ---


                                                                                                               

  Fold 3 — Val Loss: 0.4163, Accuracy: 85.25%

  Trial 2 finished — Mean CV Loss: 0.3792, Mean Accuracy: 86.50%


0,1
data_loading_pct,▃▂▄▃▂▁▃▂▃▂▁▂▂▇▇▇▇▆▇▇▇▇▆▇▇▇▇▇█▇▇▇▇▇█▇▇▇██
epoch,▂▂▂▂▃▃▄▅▅▅▆▇▇█▁▂▂▂▃▃▅▆▆▆▆▇█▁▁▂▂▄▅▅▅▅▆▇▇█
fold,▁▁▁▁▁▁▁▁▁▁▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅██████████████
fold_val_accuracy,█▂▁
fold_val_loss,▁▆█
mean_cv_val_accuracy,▁
mean_cv_val_loss,▁
train_loss,▆▄▄▄▄▃▃▃▃▃▂▂▂▂▁▁▁▁▁▅▄▄▃▃▄▂▁▂█▅▄▃▃▂▂▂▂▁▁▁

0,1
data_loading_pct,4.51807
epoch,50.0
fold,3.0
fold_val_accuracy,85.25
fold_val_loss,0.41633
mean_cv_val_accuracy,86.5
mean_cv_val_loss,0.37915
train_loss,0.24959



  Trial 3/100
  lr=0.002386  wd=0.000100  dropout=0.0714
Using device: cpu

  --- Fold 1/3 ---


                                                                                                               

  Fold 1 — Val Loss: 0.5021, Accuracy: 80.38%

  --- Fold 2/3 ---


                                                                                                               

  Fold 2 — Val Loss: 0.5455, Accuracy: 77.67%

  --- Fold 3/3 ---


                                                                                                               

  Fold 3 — Val Loss: 0.6381, Accuracy: 73.79%

  Trial 3 finished — Mean CV Loss: 0.5619, Mean Accuracy: 77.28%


0,1
data_loading_pct,▆▅▆▇▅▅▅▇▆▅▅▄▆▇▆▅▅▆▅▆▄▅▆█▇▄▄▄▄▅▆▆▆▇▄▃▆▅▆▁
epoch,▁▁▁▂▃▃▄▄▄▄▆▆▆▆▁▃▃▃▃▄▅▅▆▆▇▇▇███▂▂▂▃▃▄▅▅▅▇
fold,▁▁▁▁▁▁▁▁▁▁▁▁▁▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅████████████
fold_val_accuracy,█▅▁
fold_val_loss,▁▃█
mean_cv_val_accuracy,▁
mean_cv_val_loss,▁
train_loss,▇▆▅▄▃▃▃▃▃▂▁▁█▆▆▅▄▄▄▃▃▂▂▂▁▁▁▅▄▄▄▃▃▂▂▂▂▂▂▁

0,1
data_loading_pct,4.27829
epoch,50.0
fold,3.0
fold_val_accuracy,73.79167
fold_val_loss,0.63813
mean_cv_val_accuracy,77.27778
mean_cv_val_loss,0.56193
train_loss,0.30125



  Trial 4/100
  lr=0.008967  wd=0.000015  dropout=0.3610
Using device: cpu

  --- Fold 1/3 ---


                                                                                                               

  Fold 1 — Val Loss: 0.4481, Accuracy: 84.00%

  --- Fold 2/3 ---


                                                                                                               

  Fold 2 — Val Loss: 0.4136, Accuracy: 85.17%

  --- Fold 3/3 ---


                                                                                                               

  Fold 3 — Val Loss: 0.4035, Accuracy: 86.58%

  Trial 4 finished — Mean CV Loss: 0.4218, Mean Accuracy: 85.25%


0,1
data_loading_pct,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁█▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▂▂▃▃▅▅▆▇▇▇█▁▂▂▂▃▃▄▅▅▅▆███▂▂▃▃▃▄▅▆▆▇▇█
fold,▁▁▁▁▁▁▁▁▁▁▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅█████████████
fold_val_accuracy,▁▄█
fold_val_loss,█▃▁
mean_cv_val_accuracy,▁
mean_cv_val_loss,▁
train_loss,▇▄▄▄▄▃▃▂▂▂▂▂▂▁▁▇▅▅▄▃▂▃▂▂▂▂▁▁█▆▃▃▃▃▃▂▂▂▂▁

0,1
data_loading_pct,3.67188
epoch,50.0
fold,3.0
fold_val_accuracy,86.58333
fold_val_loss,0.40353
mean_cv_val_accuracy,85.25
mean_cv_val_loss,0.42176
train_loss,0.34222



  Trial 5/100
  lr=0.065412  wd=0.000010  dropout=0.4961
Using device: cpu

  --- Fold 1/3 ---


                                                                                                               

  Fold 1 — Val Loss: 0.4418, Accuracy: 85.12%

  --- Fold 2/3 ---


                                                                                                               

  Fold 2 — Val Loss: 0.5270, Accuracy: 81.50%

  --- Fold 3/3 ---


                                                                                                               

  Fold 3 — Val Loss: 0.3921, Accuracy: 86.92%

  Trial 5 finished — Mean CV Loss: 0.4536, Mean Accuracy: 84.51%


0,1
data_loading_pct,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▂▂▃▅▅▅▆█▁▂▃▃▄▄▄▄▅▆▆▆▆▇▇█▂▃▃▅▅▅▆▆▆▆▇▇▇█
fold,▁▁▁▁▁▁▁▁▁▁▁▁▁▅▅▅▅▅▅▅▅▅▅▅▅▅▅█████████████
fold_val_accuracy,▆▁█
fold_val_loss,▄█▁
mean_cv_val_accuracy,▁
mean_cv_val_loss,▁
train_loss,▇▆▆▅▅▄▄▄▃▃▂▂▂▂▂▆▅▅▅▄▄▃▃▃▃▂▂▂▂▂▁▁█▅▅▄▃▂▂▂

0,1
data_loading_pct,3.37889
epoch,50.0
fold,3.0
fold_val_accuracy,86.91667
fold_val_loss,0.39211
mean_cv_val_accuracy,84.51389
mean_cv_val_loss,0.45364
train_loss,0.51504



  Trial 6/100
  lr=0.007119  wd=0.000684  dropout=0.0035
Using device: cpu

  --- Fold 1/3 ---


                                                                                                               

  Fold 1 — Val Loss: 0.2486, Accuracy: 91.17%

  --- Fold 2/3 ---


                                                                                                               

  Fold 2 — Val Loss: 0.3341, Accuracy: 88.79%

  --- Fold 3/3 ---


                                                                                                               

  Fold 3 — Val Loss: 0.3733, Accuracy: 87.62%

  Trial 6 finished — Mean CV Loss: 0.3187, Mean Accuracy: 89.19%


0,1
data_loading_pct,▃▁▁▁▂▁▁▅▄▃▆▅▄▃▄▄▅▃▄▅▄▅▂▁▄▁▆▅▄█▂▃▃▆▂▂▄▃▂▄
epoch,▁▂▂▂▃▃▄▅▅▅▆▆▆▇▇██▁▁▂▃▄▅▅▅▆▆▇▇█▁▁▂▂▂▅▇▇▇█
fold,▁▁▁▁▁▁▁▁▁▅▅▅▅▅▅▅▅▅▅▅▅███████████████████
fold_val_accuracy,█▃▁
fold_val_loss,▁▆█
mean_cv_val_accuracy,▁
mean_cv_val_loss,▁
train_loss,▆▆▅▄▄▃▃▃▃▂▂▂▁▁▁▆▅▅▄▃▂▂▂▂▂▂▁▁█▇▄▄▄▄▄▃▃▂▂▁

0,1
data_loading_pct,3.24704
epoch,50.0
fold,3.0
fold_val_accuracy,87.625
fold_val_loss,0.37333
mean_cv_val_accuracy,89.19444
mean_cv_val_loss,0.31869
train_loss,0.2197



  Trial 7/100
  lr=0.000117  wd=0.000375  dropout=0.1999
Using device: cpu

  --- Fold 1/3 ---


                                                                                                               

  Fold 1 — Val Loss: 0.5062, Accuracy: 82.79%

  --- Fold 2/3 ---


                                                                                                               

  Fold 2 — Val Loss: 0.5073, Accuracy: 82.08%

  --- Fold 3/3 ---


                                                                                                               

  Fold 3 — Val Loss: 0.5256, Accuracy: 81.79%

  Trial 7 finished — Mean CV Loss: 0.5130, Mean Accuracy: 82.22%


0,1
data_loading_pct,▁▁▁▁▁▁▁▁▁▁▁▁▁█▁▁▁▁▁▁▁▁▁▁▁▁█▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▂▃▄▄▅▅▅▇██▁▂▃▃▄▄▅▅▅▆▆▆▇█▁▁▂▂▃▃▃▄▅▆▆▆█
fold,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅███████████
fold_val_accuracy,█▃▁
fold_val_loss,▁▁█
mean_cv_val_accuracy,▁
mean_cv_val_loss,▁
train_loss,▅▅▃▃▃▂▂▂▂▂▁█▆▆▅▃▃▃▂▂▂▂▁▁▁▁▆▆▅▄▃▃▃▃▂▂▂▂▂▁

0,1
data_loading_pct,3.6607
epoch,50.0
fold,3.0
fold_val_accuracy,81.79167
fold_val_loss,0.52559
mean_cv_val_accuracy,82.22222
mean_cv_val_loss,0.51302
train_loss,0.56706



  Trial 8/100
  lr=0.000138  wd=0.008342  dropout=0.1164
Using device: cpu

  --- Fold 1/3 ---


                                                                                                               

  Fold 1 — Val Loss: 0.4610, Accuracy: 83.04%

  --- Fold 2/3 ---


                                                                                                               

  Fold 2 — Val Loss: 0.4778, Accuracy: 83.33%

  --- Fold 3/3 ---


                                                                                                               

  Fold 3 — Val Loss: 0.4939, Accuracy: 82.88%

  Trial 8 finished — Mean CV Loss: 0.4776, Mean Accuracy: 83.08%


0,1
data_loading_pct,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁█▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▂▂▃▃▃▄▄▅▆▇▇██▂▂▂▃▃▃▅▅▆▇▇▁▁▁▂▃▄▄▄▄▅▅▆▆▆▇
fold,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▅▅▅▅▅▅▅▅▅▅▅███████████
fold_val_accuracy,▄█▁
fold_val_loss,▁▅█
mean_cv_val_accuracy,▁
mean_cv_val_loss,▁
train_loss,▆▄▃▃▃▃▃▃▂▂▂▂▂▂▂▂▁▅▄▃▂▂▂▂▂▁▁▁█▄▃▃▃▃▂▂▂▂▂▁

0,1
data_loading_pct,3.38938
epoch,50.0
fold,3.0
fold_val_accuracy,82.875
fold_val_loss,0.49394
mean_cv_val_accuracy,83.08333
mean_cv_val_loss,0.47757
train_loss,0.53516



  Trial 9/100
  lr=0.000187  wd=0.000716  dropout=0.1912
Using device: cpu

  --- Fold 1/3 ---


                                                                                                               

  Fold 1 — Val Loss: 0.4984, Accuracy: 81.75%

  --- Fold 2/3 ---


                                                                                                               

  Fold 2 — Val Loss: 0.7448, Accuracy: 68.08%

  --- Fold 3/3 ---


                                                                                                               

  Fold 3 — Val Loss: 0.5035, Accuracy: 83.12%

  Trial 9 finished — Mean CV Loss: 0.5822, Mean Accuracy: 77.65%


0,1
data_loading_pct,█▇▄▆▅▆▂▃▅▇▄▄▅▇▇▄▂▄▅▄▄▇▆▅▃▃▂▂▃▁▂▁▃▄▂▂▃▃▂▃
epoch,▁▁▂▂▂▃▄▅▅▅▆▇▇▇▁▂▄▄▅▅▆▇▇▇▇█▁▂▂▂▃▄▄▄▄▅▅▆▇█
fold,▁▁▁▁▁▁▁▁▁▁▁▁▅▅▅▅▅▅▅▅▅▅▅▅▅███████████████
fold_val_accuracy,▇▁█
fold_val_loss,▁█▁
mean_cv_val_accuracy,▁
mean_cv_val_loss,▁
train_loss,▇▄▄▃▃▂▂▂▂▂▂▁▁▁█▄▄▄▃▂▂▂▂▂▁▁▁▁▄▄▃▃▃▃▃▂▂▂▁▁

0,1
data_loading_pct,2.89792
epoch,50.0
fold,3.0
fold_val_accuracy,83.125
fold_val_loss,0.50351
mean_cv_val_accuracy,77.65278
mean_cv_val_loss,0.58224
train_loss,0.51809



  Trial 10/100
  lr=0.089062  wd=0.000251  dropout=0.4300
Using device: cpu

  --- Fold 1/3 ---


                                                                                                               

  Fold 1 — Val Loss: 0.4296, Accuracy: 84.96%

  --- Fold 2/3 ---


                                                                                                               

  Fold 2 — Val Loss: 0.5090, Accuracy: 84.00%

  --- Fold 3/3 ---


                                                                                                                

  Fold 3 — Val Loss: 0.4835, Accuracy: 82.54%

  Trial 10 finished — Mean CV Loss: 0.4740, Mean Accuracy: 83.83%


0,1
data_loading_pct,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁█▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▂▃▄▄▅▅▅▆▆▇▇█▂▂▂▃▄▅▅▆▆▇▁▂▂▂▃▄▅▅▅▆▆▇▇▇█
fold,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▅▅▅▅▅▅▅▅▅▅███████████████
fold_val_accuracy,█▅▁
fold_val_loss,▁█▆
mean_cv_val_accuracy,▁
mean_cv_val_loss,▁
train_loss,▅▅▄▄▄▃▃▃▃▂▅▅▅▅▄▃▃▃▃▂▂▁▁▁▁█▆▅▅▅▄▄▄▃▃▃▂▃▂▂

0,1
data_loading_pct,3.89553
epoch,50.0
fold,3.0
fold_val_accuracy,82.54167
fold_val_loss,0.48352
mean_cv_val_accuracy,83.83333
mean_cv_val_loss,0.47403
train_loss,0.60275



  Trial 11/100
  lr=0.010988  wd=0.000225  dropout=0.0066
Using device: cpu

  --- Fold 1/3 ---


                                                                                                               

  Fold 1 — Val Loss: 0.4473, Accuracy: 87.08%

  --- Fold 2/3 ---


                                                                                                               

  Fold 2 — Val Loss: 0.5921, Accuracy: 84.75%

  --- Fold 3/3 ---


                                                                                                               

  Fold 3 — Val Loss: 0.5681, Accuracy: 83.50%

  Trial 11 finished — Mean CV Loss: 0.5358, Mean Accuracy: 85.11%


0,1
data_loading_pct,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▂▂▂▃▄▄▅▆▆▇▇▇▇█▁▂▂▂▃▃▄▅▆▇▇█▂▂▃▃▄▄▅▆▆▆▇█
fold,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▅▅▅▅▅▅▅▅▅▅▅██████████████
fold_val_accuracy,█▃▁
fold_val_loss,▁█▇
mean_cv_val_accuracy,▁
mean_cv_val_loss,▁
train_loss,█▆▆▅▅▃▃▂▂▂▁▂▁▁▁▅▄▄▃▃▂▂▂▂▂█▆▆▅▅▃▃▂▂▂▂▂▁▁▁

0,1
data_loading_pct,3.8655
epoch,50.0
fold,3.0
fold_val_accuracy,83.5
fold_val_loss,0.56806
mean_cv_val_accuracy,85.11111
mean_cv_val_loss,0.53581
train_loss,0.18679



  Trial 12/100
  lr=0.067082  wd=0.000490  dropout=0.1927
Using device: cpu

  --- Fold 1/3 ---


                                                                                                               

  Fold 1 — Val Loss: 0.5352, Accuracy: 80.88%

  --- Fold 2/3 ---


                                                                                                               

  Fold 2 — Val Loss: 0.3793, Accuracy: 86.79%

  --- Fold 3/3 ---


                                                                                                               

  Fold 3 — Val Loss: 0.2730, Accuracy: 90.92%

  Trial 12 finished — Mean CV Loss: 0.3958, Mean Accuracy: 86.19%


0,1
data_loading_pct,█▁▁▁▁▁▁▁▁▁▁█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▂▂▂▂▄▄▆▇▇▇█▁▁▁▂▂▂▂▂▃▃▄▅▆▇▇█▂▂▃▄▄▄▅▆▇▇▇█
fold,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▅▅▅▅▅▅▅▅▅▅▅████████████
fold_val_accuracy,▁▅█
fold_val_loss,█▄▁
mean_cv_val_accuracy,▁
mean_cv_val_loss,▁
train_loss,▅▅▅▄▄▃▂▂▂▂▁▁▁▅▄▄▄▃▃▂▂▂▂▂▁▁▁█▆▄▃▃▃▃▃▂▂▂▂▁

0,1
data_loading_pct,3.77695
epoch,50.0
fold,3.0
fold_val_accuracy,90.91667
fold_val_loss,0.273
mean_cv_val_accuracy,86.19444
mean_cv_val_loss,0.39583
train_loss,0.3189



  Trial 13/100
  lr=0.000112  wd=0.000049  dropout=0.1205
Using device: cpu

  --- Fold 1/3 ---


                                                                                                               

  Fold 1 — Val Loss: 0.5010, Accuracy: 81.83%

  --- Fold 2/3 ---


                                                                                                               

  Fold 2 — Val Loss: 0.5244, Accuracy: 81.42%

  --- Fold 3/3 ---


                                                                                                               

  Fold 3 — Val Loss: 0.4794, Accuracy: 83.88%

  Trial 13 finished — Mean CV Loss: 0.5016, Mean Accuracy: 82.38%


0,1
data_loading_pct,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▂▂▃▃▄▅▅▅▆▆▇▇██▂▂▂▃▅▅▆▆▇█▁▁▂▃▄▄▄▄▅▆▆▇██
fold,▁▁▁▁▁▁▁▁▁▁▁▅▅▅▅▅▅▅▅▅▅▅▅▅▅███████████████
fold_val_accuracy,▂▁█
fold_val_loss,▄█▁
mean_cv_val_accuracy,▁
mean_cv_val_loss,▁
train_loss,█▇▅▄▄▂▂▂▂▂▂▂▂▂█▅▄▄▃▃▂▂▂▂▂▁▁▁▁▅▄▄▄▃▃▃▂▂▂▂

0,1
data_loading_pct,3.83235
epoch,50.0
fold,3.0
fold_val_accuracy,83.875
fold_val_loss,0.47939
mean_cv_val_accuracy,82.375
mean_cv_val_loss,0.50158
train_loss,0.54205



  Trial 14/100
  lr=0.011215  wd=0.000676  dropout=0.4166
Using device: cpu

  --- Fold 1/3 ---


                                                                                                               

  Fold 1 — Val Loss: 0.3722, Accuracy: 87.21%

  --- Fold 2/3 ---


                                                                                                               

  Fold 2 — Val Loss: 0.4073, Accuracy: 86.17%

  --- Fold 3/3 ---


                                                                                                               

  Fold 3 — Val Loss: 0.3694, Accuracy: 86.67%

  Trial 14 finished — Mean CV Loss: 0.3830, Mean Accuracy: 86.68%


0,1
data_loading_pct,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▂▃▃▃▄▄▄▅▅▆▆▆▇█▁▂▂▃▄▇▇███▁▁▂▂▂▃▃▃▄▄▅▆▇▇█
fold,▁▁▁▁▁▁▁▁▁▁▁▁▅▅▅▅▅▅▅▅▅▅▅▅▅███████████████
fold_val_accuracy,█▁▄
fold_val_loss,▂█▁
mean_cv_val_accuracy,▁
mean_cv_val_loss,▁
train_loss,▆▅▄▄▄▃▃▃▂▂▂▂▁▁▆▅▄▃▃▃▂▂▁█▇▅▅▅▅▄▄▃▃▃▃▂▂▂▂▂

0,1
data_loading_pct,3.82122
epoch,50.0
fold,3.0
fold_val_accuracy,86.66667
fold_val_loss,0.36945
mean_cv_val_accuracy,86.68056
mean_cv_val_loss,0.383
train_loss,0.34092



  Trial 15/100
  lr=0.000331  wd=0.000149  dropout=0.0911
Using device: cpu

  --- Fold 1/3 ---


                                                                                                               

  Fold 1 — Val Loss: 0.4713, Accuracy: 78.92%

  --- Fold 2/3 ---


                                                                                                               

  Fold 2 — Val Loss: 0.4882, Accuracy: 82.42%

  --- Fold 3/3 ---


                                                                                                               

  Fold 3 — Val Loss: 0.6065, Accuracy: 77.42%

  Trial 15 finished — Mean CV Loss: 0.5220, Mean Accuracy: 79.58%


0,1
data_loading_pct,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▂▂▂▃▄▄▅▆▇▇██▁▂▃▃▃▃▄▄▄▅▅▅▆▆▇█▁▂▃▃▄▄▆▆▇▇█
fold,▁▁▁▁▁▁▁▁▁▁▁▁▁▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅██████████
fold_val_accuracy,▃█▁
fold_val_loss,▁▂█
mean_cv_val_accuracy,▁
mean_cv_val_loss,▁
train_loss,▅▃▃▃▃▂▂▂▂▁▁█▆▅▄▃▃▂▂▂▂▂▂▁▁▁▁▁█▆▄▃▃▃▂▁▁▁▁▁

0,1
data_loading_pct,3.51546
epoch,50.0
fold,3.0
fold_val_accuracy,77.41667
fold_val_loss,0.60647
mean_cv_val_accuracy,79.58333
mean_cv_val_loss,0.52199
train_loss,0.42505



  Trial 16/100
  lr=0.018454  wd=0.000189  dropout=0.1040
Using device: cpu

  --- Fold 1/3 ---


                                                                                                               

  Fold 1 — Val Loss: 0.3743, Accuracy: 85.12%

  --- Fold 2/3 ---


                                                                                                               

  Fold 2 — Val Loss: 0.2794, Accuracy: 90.42%

  --- Fold 3/3 ---


                                                                                                               

  Fold 3 — Val Loss: 0.2515, Accuracy: 91.58%

  Trial 16 finished — Mean CV Loss: 0.3018, Mean Accuracy: 89.04%


0,1
data_loading_pct,▂▄▅▄▂▃▅▄▄▂▄▂▂▄▃▅▂▃▂▅▅▃▅▂▄▁▄▅▄▄▃▂▅▂▃▁▅█▆▃
epoch,▁▁▁▂▂▄▄▅▆▆▇▇█▁▂▂▄▄▄▄▆▆▇▇▇██▁▂▂▂▂▃▄▄▆▆▆▆█
fold,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅██████████
fold_val_accuracy,▁▇█
fold_val_loss,█▃▁
mean_cv_val_accuracy,▁
mean_cv_val_loss,▁
train_loss,▇▆▄▄▄▃▃▂▂▂▁▁█▅▅▃▃▃▂▂▂▂▁▁▇▆▅▄▄▄▃▃▃▃▂▂▂▁▂▁

0,1
data_loading_pct,3.49579
epoch,50.0
fold,3.0
fold_val_accuracy,91.58333
fold_val_loss,0.25155
mean_cv_val_accuracy,89.04167
mean_cv_val_loss,0.30175
train_loss,0.19016



  Trial 17/100
  lr=0.005048  wd=0.000012  dropout=0.4211
Using device: cpu

  --- Fold 1/3 ---


                                                                                                               

  Fold 1 — Val Loss: 0.3847, Accuracy: 87.75%

  --- Fold 2/3 ---


                                                                                                               

  Fold 2 — Val Loss: 0.4292, Accuracy: 81.67%

  --- Fold 3/3 ---


                                                                                                               

  Fold 3 — Val Loss: 0.4452, Accuracy: 82.46%

  Trial 17 finished — Mean CV Loss: 0.4197, Mean Accuracy: 83.96%


0,1
data_loading_pct,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▂▂▂▃▄▄▄▄▅▆▆▇▇▁▂▄▄▅▆▆▇▇▇▁▁▁▂▂▃▄▄▅▅▆▆▇▇█
fold,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅█████████
fold_val_accuracy,█▁▂
fold_val_loss,▁▆█
mean_cv_val_accuracy,▁
mean_cv_val_loss,▁
train_loss,▇▅▄▄▄▃▃▃▃▃▂▃▂▂▂▁█▆▅▃▂▂▂▁▂▁▁▆▆▄▄▄▃▃▃▂▂▂▂▁

0,1
data_loading_pct,3.57228
epoch,50.0
fold,3.0
fold_val_accuracy,82.45833
fold_val_loss,0.44517
mean_cv_val_accuracy,83.95833
mean_cv_val_loss,0.41968
train_loss,0.39171



  Trial 18/100
  lr=0.002235  wd=0.000153  dropout=0.4633
Using device: cpu

  --- Fold 1/3 ---


                                                                                                               

  Fold 1 — Val Loss: 0.5774, Accuracy: 75.79%

  --- Fold 2/3 ---


                                                                                                               

  Fold 2 — Val Loss: 0.5739, Accuracy: 79.29%

  --- Fold 3/3 ---


                                                                                                               

  Fold 3 — Val Loss: 0.3879, Accuracy: 86.58%

  Trial 18 finished — Mean CV Loss: 0.5131, Mean Accuracy: 80.56%


0,1
data_loading_pct,▁▁▁▁▁▁▁▁▁▁▁▁▁█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▃▃▄▄▅▆▆▇█▁▂▂▂▂▄▄▅▅▆▇██▁▂▃▄▄▅▅▆▆▆▆▇▇▇███
fold,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▅▅▅▅▅▅▅▅▅▅▅▅█████████████
fold_val_accuracy,▁▃█
fold_val_loss,██▁
mean_cv_val_accuracy,▁
mean_cv_val_loss,▁
train_loss,▇▆▆▅▄▄▃▃▃▃▂▂▂▂▂▁▁▇▆▅▄▄▃▃▃▂▂▂▁█▄▄▃▃▃▂▂▁▁▁

0,1
data_loading_pct,3.54266
epoch,50.0
fold,3.0
fold_val_accuracy,86.58333
fold_val_loss,0.38786
mean_cv_val_accuracy,80.55556
mean_cv_val_loss,0.51305
train_loss,0.44973



  Trial 19/100
  lr=0.015199  wd=0.000095  dropout=0.2852
Using device: cpu

  --- Fold 1/3 ---


                                                                                                               

  Fold 1 — Val Loss: 0.3045, Accuracy: 89.00%

  --- Fold 2/3 ---


                                                                                                                      

  Fold 2 — Val Loss: 0.6303, Accuracy: 81.71%

  --- Fold 3/3 ---


                                                                                                               

  Fold 3 — Val Loss: 0.2614, Accuracy: 91.12%

  Trial 19 finished — Mean CV Loss: 0.3987, Mean Accuracy: 87.28%


0,1
data_loading_pct,▁▁▁▁▁▁▁▁▁▁▁█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▂▂▂▃▃▄▄▅▅▅▅▅▆▇▇█▁▁▁▂▂▄▄▅▅▆▇▇█▁▂▂▂▃▅▅▅▆▆▇
fold,▁▁▁▁▁▁▁▁▁▁▁▁▁▅▅▅▅▅▅▅▅▅▅▅▅███████████████
fold_val_accuracy,▆▁█
fold_val_loss,▂█▁
mean_cv_val_accuracy,▁
mean_cv_val_loss,▁
train_loss,█▅▅▅▄▃▂▂▁█▅▄▄▄▃▃▃▂▂▂▂▂▂▁▁▅▅▄▄▄▃▃▃▃▂▂▂▂▂▁

0,1
data_loading_pct,3.62174
epoch,50.0
fold,3.0
fold_val_accuracy,91.125
fold_val_loss,0.26136
mean_cv_val_accuracy,87.27778
mean_cv_val_loss,0.39872
train_loss,0.27247



  Trial 20/100
  lr=0.003652  wd=0.007647  dropout=0.4223
Using device: cpu

  --- Fold 1/3 ---


                                                                                                               

  Fold 1 — Val Loss: 0.3985, Accuracy: 85.62%

  --- Fold 2/3 ---


                                                                                                               

  Fold 2 — Val Loss: 0.4128, Accuracy: 84.21%

  --- Fold 3/3 ---


                                                                                                               

  Fold 3 — Val Loss: 0.3598, Accuracy: 86.21%

  Trial 20 finished — Mean CV Loss: 0.3904, Mean Accuracy: 85.35%


0,1
data_loading_pct,▄▁█▁▃▆▃▅▃▂▂▇▂▄▃▄▆▅▆▇▆▅▅▇▆▅▄▇▆▆▆▅▅▅▆▆▆▅▆▅
epoch,▁▁▁▂▃▄▅▆▇▇▁▁▁▂▂▃▃▄▄▄▅▆▆▇█▁▂▃▃▄▄▅▅▆▆▆▇▇▇█
fold,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅██████████
fold_val_accuracy,▆▁█
fold_val_loss,▆█▁
mean_cv_val_accuracy,▁
mean_cv_val_loss,▁
train_loss,█▇▇▆▅▄▄▃▃▃▂▂▂▂▁▆▆▆▄▃▃▂▂▂▂▂▁▁▅▅▃▄▃▃▃▂▂▁▁▁

0,1
data_loading_pct,3.59087
epoch,50.0
fold,3.0
fold_val_accuracy,86.20833
fold_val_loss,0.3598
mean_cv_val_accuracy,85.34722
mean_cv_val_loss,0.39038
train_loss,0.40173



  Trial 21/100
  lr=0.032796  wd=0.009879  dropout=0.3764
Using device: cpu

  --- Fold 1/3 ---


                                                                                                               

  Fold 1 — Val Loss: 0.8352, Accuracy: 68.04%

  --- Fold 2/3 ---


                                                                                                               

  Fold 2 — Val Loss: 1.0706, Accuracy: 62.08%

  --- Fold 3/3 ---


                                                                                                               

  Fold 3 — Val Loss: 0.4261, Accuracy: 84.46%

  Trial 21 finished — Mean CV Loss: 0.7773, Mean Accuracy: 71.53%


0,1
data_loading_pct,▄▄▅▃█▄▃▇▆▄▆▆▄▅▄▆▃▄▃▂▅▁▄▄▃▂▄▄▆▂▂▅▆▆▅▄▃▅▅▁
epoch,▁▁▂▂▃▄▅▅▆▆▆▇▇▁▂▄▄▄▅▆▆▆▇▇█▁▂▂▂▃▃▄▅▅▅▆▇▇██
fold,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▅▅▅▅▅▅▅▅▅▅███████████
fold_val_accuracy,▃▁█
fold_val_loss,▅█▁
mean_cv_val_accuracy,▁
mean_cv_val_loss,▁
train_loss,▇▆▅▅▄▃▃▃▃▃▃▂▃▂▂▇▆▅▅▄▃▃▂▂▂▂▁▁▁▁█▆▅▄▄▃▃▃▂▂

0,1
data_loading_pct,4.65226
epoch,50.0
fold,3.0
fold_val_accuracy,84.45833
fold_val_loss,0.42608
mean_cv_val_accuracy,71.52778
mean_cv_val_loss,0.77727
train_loss,0.47789



  Trial 22/100
  lr=0.025017  wd=0.000162  dropout=0.5000
Using device: cpu

  --- Fold 1/3 ---


                                                                                                               

  Fold 1 — Val Loss: 0.3100, Accuracy: 88.62%

  --- Fold 2/3 ---


                                                                                                               

  Fold 2 — Val Loss: 0.4235, Accuracy: 84.21%

  --- Fold 3/3 ---


                                                                                                               

  Fold 3 — Val Loss: 0.3778, Accuracy: 87.46%

  Trial 22 finished — Mean CV Loss: 0.3705, Mean Accuracy: 86.76%


0,1
data_loading_pct,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▂▂▂▃▄▄▅▅▅▆▆▇▇█▂▃▃▃▃▄▅▅▅▆▆▆▆██▁▂▂▃▃▄▅▆▆█
fold,▁▁▁▁▁▁▁▁▁▁▁▁▁▅▅▅▅▅▅▅▅▅▅▅▅▅██████████████
fold_val_accuracy,█▁▆
fold_val_loss,▁█▅
mean_cv_val_accuracy,▁
mean_cv_val_loss,▁
train_loss,█▆▆▅▄▂▂▂▂▂▁▇▆▆▅▄▄▃▃▃▂▂▂▂▂▂▁▇▅▅▃▃▃▃▃▃▂▂▂▁

0,1
data_loading_pct,4.33073
epoch,50.0
fold,3.0
fold_val_accuracy,87.45833
fold_val_loss,0.3778
mean_cv_val_accuracy,86.76389
mean_cv_val_loss,0.37045
train_loss,0.37834



  Trial 23/100
  lr=0.008014  wd=0.002893  dropout=0.5000
Using device: cpu

  --- Fold 1/3 ---


                                                                                                               

  Fold 1 — Val Loss: 0.3865, Accuracy: 85.67%

  --- Fold 2/3 ---


                                                                                                               

  Fold 2 — Val Loss: 0.4054, Accuracy: 86.04%

  --- Fold 3/3 ---


                                                                                                                  

  Fold 3 — Val Loss: 0.3930, Accuracy: 85.38%

  Trial 23 finished — Mean CV Loss: 0.3949, Mean Accuracy: 85.69%


0,1
data_loading_pct,▆▆█▇▆█▅█▇▆▆▆▅▅▅▇▆▅▇▆▅▅▆▃▆▆▆▅▅▅▇▅▇▇█▁▂▄▂▃
epoch,▁▁▂▂▂▃▃▄▄▄▅▇▇▇█▂▂▂▄▄▄▄▅▅▅▆▆▇██▂▂▂▃▄▅▅▆▇█
fold,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▅▅▅▅▅▅▅▅▅▅▅▅███████████
fold_val_accuracy,▄█▁
fold_val_loss,▁█▃
mean_cv_val_accuracy,▁
mean_cv_val_loss,▁
train_loss,█▆▅▅▄▄▄▄▄▃▂▂▂▂▂▁▂▅▄▄▄▃▃▃▂▂▂▂▂▂▁▄▄▃▂▁▁▁▁▁

0,1
data_loading_pct,3.31323
epoch,50.0
fold,3.0
fold_val_accuracy,85.375
fold_val_loss,0.39295
mean_cv_val_accuracy,85.69444
mean_cv_val_loss,0.39493
train_loss,0.38771



  Trial 24/100
  lr=0.005188  wd=0.001243  dropout=0.0000
Using device: cpu

  --- Fold 1/3 ---


  Fold 1 Epoch 21/50:  19%|█▉        | 29/150 [00:02<00:10, 11.96it/s, loss=0.3313, epoch_est=00:14, data%=8%] 