# Feature Extraction

We have extracted features from the frames using pre-trained VideoMAE model. We have saved these as .npy files which can be used for all the tasks.

In [2]:
import os, random, warnings
import numpy as np
import torch
from torch.utils.data import Dataset
from transformers import VideoMAEFeatureExtractor, VideoMAEModel
from tqdm import tqdm
from PIL import Image
import pandas as pd

class Config:
    FRAMES_ROOT  = "frames"
    METADATA_CSV = "FinalDataset.csv"
    MODEL_NAME   = "MCG-NJU/videomae-base"
    NUM_FRAMES   = 700
    CHUNK_SIZE   = 16
    OVERLAP      = 2
    FEATURES_DIR = "videomae_features_new"
    DEVICE       = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    CLIP_BATCH   = 4

random.seed(42); np.random.seed(42); torch.manual_seed(42)

class VideoDataset(Dataset):
    def __init__(self, df, fe, skip_existing=True):
        self.vids = df["video_id"].tolist()  # List of videos
        self.fe   = fe                       # Feature Extractor
        self.skip = skip_existing            # Skip existing videos
        os.makedirs(Config.FEATURES_DIR, exist_ok=True)

    def __len__(self): 
        return len(self.vids)

    def __getitem__(self, idx):
        vid = self.vids[idx]   # VideoID
        out_path = os.path.join(Config.FEATURES_DIR, f"{vid}_temporal.npy")  # Output path
        if self.skip and os.path.exists(out_path):   # Skip if the path already exists
            return None

        fdir   = os.path.join(Config.FRAMES_ROOT, vid)  # Video directory - Each video directory has 700 frames.
        fpaths = sorted([os.path.join(fdir, f)
                         for f in os.listdir(fdir)
                         if f.lower().endswith((".jpg"))])[:Config.NUM_FRAMES]  # Frame paths of the particular video

        frames = []
        for fp in fpaths:
            try:
                frames.append(Image.open(fp).convert("RGB"))# Open the image and convert it to RGB format - Convert method - Returns a converted copy of this image
            except Exception as e:
                warnings.warn(f"Skipped corrupt frame {fp}: {e}")  # Skips if it's not able to open or convert the image with a warning

        # Sliding window logic
        stride  = Config.CHUNK_SIZE - Config.OVERLAP  # Stride = 14
        clips   = []
        for i in range(0, len(frames), stride):  # Moves with a stride of 12, so the last two frames of the first one are added to the next clip
            clip = frames[i:i + Config.CHUNK_SIZE] # Get a overlapping clip of 16 frames
            if len(clip) < Config.CHUNK_SIZE:
                clip += [frames[-1]] * (Config.CHUNK_SIZE - len(clip))  # If the clip (mostly last) is less than 16 frames, pad it with a few frames from the previous chunk
            clips.append(clip) # Make a list of clips

        # Feature Extractor: Expects as input images/arrays
        # Output: [batch_size, num_frames, number_of_channels, height, width]
        tensors = []
        for clip in clips:
            pv = self.fe(clip, return_tensors="pt").pixel_values  # The feature extractor returns tensors of shape (1,16,3,224,224)
            tensors.append(pv)                      # Save tensors in a list
        pixel_values = torch.cat(tensors, dim=0)               # Concatenate all tensors along the batch dimension: (num_clips,16,3,224,224)
        return {"video_id": vid, "pixel_values": pixel_values, "n_frames": len(frames)}

# Take a sequence of token embeddings and convert them into frame-level embeddings
def tokens_to_frames(tok: torch.Tensor, n_frames=16):
    tok = tok[1:]                        # VideoMAE adds a [CLS] token at the beginning: drop CLS
    tokens_per_frame = tok.size(0) // n_frames  # Get the number of tokens per frame
    tok = tok[:tokens_per_frame * n_frames]  # Ensures the token count is exactly divisible by n_frames
    frames = tok.reshape(n_frames, tokens_per_frame, -1).mean(1) # Reshape and take mean across tokens for each frame
    return frames                        # (16,768) - 16 frame-level embeddings, each of size 768

def extract_temporal(skip_existing=True):
    df  = pd.read_csv(Config.METADATA_CSV)
    fe  = VideoMAEFeatureExtractor.from_pretrained(Config.MODEL_NAME) # Define the feature extractor
    model = VideoMAEModel.from_pretrained(Config.MODEL_NAME).to(Config.DEVICE).eval() # Pretrained videomae model

    ds = VideoDataset(df, fe, skip_existing)
    for sample in tqdm(ds, desc="videos"):
        if sample is None:
            continue

        vid    = sample["video_id"]
        inp    = sample["pixel_values"].to(Config.DEVICE)    # Input: (N,16,3,224,224)
        nfrm   = sample["n_frames"]
        stride = Config.CHUNK_SIZE - Config.OVERLAP
        feat_dim = 768

        clip_feats = []
        for i in range(0, inp.size(0), Config.CLIP_BATCH): # Process all clips (inp.size(0) is the number of clips per video)
            batch = inp[i:i+Config.CLIP_BATCH]               # keep (B,16,3,H,W)
            with torch.no_grad():                            # We're freezing the model weights
                out = model(batch).last_hidden_state         # Run batch through VideoMAE model: (B, seq+1, 768)
            for j in range(out.size(0)):
                clip_feats.append(tokens_to_frames(out[j], Config.CHUNK_SIZE).cpu()) # Convert tokens to frame-level features and save them in a list
        clip_feats = torch.stack(clip_feats)             # Combine features to form one tensor per video - (num_clips,16,768)

        blended = np.zeros((nfrm, feat_dim), dtype=np.float32) # Will store the final per-frame feature vectors.
        counts  = np.zeros(nfrm,      dtype=np.float32)   # Tracks how many times each frame is "touched" (due to overlap).

        w = np.ones(Config.CHUNK_SIZE, dtype=np.float32) # Will track weights used to blend clip features into final frame-wise features
        # If there is overlap, we need a smooth transition, so we give lower weights to the frames at the edges
        if Config.OVERLAP:
            w[:Config.OVERLAP]  = np.linspace(0, 1, Config.OVERLAP, endpoint=False) # First overlap - weights between 0 to 1
            w[-Config.OVERLAP:] = np.linspace(1, 0, Config.OVERLAP, endpoint=False) # Last overlap - weights between 1 to 0

        for k, clip in enumerate(clip_feats):  # (num_clips, 16, 768)
            start = k * stride   # first frame index
            end   = min(start + Config.CHUNK_SIZE, nfrm)  # last frame index
            w_use = w[:end - start]   # Extract relevant portion of the weight array
            blended[start:end] += clip[:end - start].numpy() * w_use[:, None] # Multiply the clips frame features with corresponding weights
            counts[start:end]  += w_use # Track the sum of weights for each frame so that we can normalize later

        blended[counts > 0] /= counts[counts > 0, None] # Weighted average

        np.save(os.path.join(Config.FEATURES_DIR, f"{vid}_temporal.npy"), blended) # Save the features

extract_temporal(skip_existing=True)
print("✓  Temporal features saved")

# Import libraries and set the seed

In [3]:
# ----------------------------- imports & config -----------------------------
import os
from pathlib import Path
import copy
import math
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import optuna
import pickle
import matplotlib.pyplot as plt
import seaborn as sns

SEED = 42
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
BATCH_SIZE = 5
N_EPOCHS = 40
PATIENCE = 10
N_TRIALS = 30
FEATURE_DIR = "videomae_features_new"
MODEL_DIR = Path("models_final") 
MODEL_DIR.mkdir(exist_ok=True)
RESULTS_DIR = Path("results_final") 
RESULTS_DIR.mkdir(exist_ok=True)
PIN = torch.cuda.is_available()
NUM_WORKERS = 0

def set_seed(seed: int = SEED):
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)

set_seed()

# Dataset Class

In [4]:
class TemporalFeatureDataset(Dataset):
    def __init__(self, dataframe: pd.DataFrame, feature_dir: str, target_col: str = "log_view_count"):
        self.df = dataframe.reset_index(drop=True)
        self.feature_dir = Path(feature_dir)  # Base directory containing .npy feature files
        self.target_col = target_col

    def __len__(self): return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]      # select the idx-th row
        vid = row["video_id"]        # fetch the video ID
        x = np.load(self.feature_dir / f"{vid}_temporal.npy")   # load features: expected shape [T, D]
        y = row[self.target_col]     # fetch the target for this sample
        return torch.tensor(x, dtype=torch.float32), torch.tensor(y, dtype=torch.float32) # Convert to Pytorch tensors

def safe_pearsonr(preds: np.ndarray, targets: np.ndarray, eps: float = 1e-8) -> float:
    preds, targets = preds.squeeze(), targets.squeeze()   # Squeeze removes all dimensions of size 1
    cov = ((preds - preds.mean()) * (targets - targets.mean())).mean()  # Calculate the covariance
    return float(cov / ((preds.std() + eps) * (targets.std() + eps)))  # Calculate and return the pearson r - a small epsilon is added to avoid division by zero

def log_to_views(arr: np.ndarray) -> np.ndarray:
    return np.expm1(arr)        # Log view count is converted back to view -> Inverse of log1p

# Prediction Model - Transformer

In [5]:
class TransformerRegressor(nn.Module):
    def __init__(self, d_model, nhead, num_layers):
        super().__init__()
        # One encoder block (self-attention + FFN + residual + norm)
        encoder_layer = nn.TransformerEncoderLayer(d_model, nhead) # By default, batch_first=False (expects [T, B, D]).
        self.transformer = nn.TransformerEncoder(encoder_layer, num_layers) # Stack 'num_layers' encoder blocks
        self.regressor = nn.Linear(d_model, 1)  # Linear regression head mapping pooled [B, D] -> scalar

    def forward(self, x):  # x: (B, T, D) -> (5, 700, 758)
        x = x.permute(1, 0, 2)  # Permute as by default the transformer expects [T, B, D]
        x = self.transformer(x)  # (T, B, D)
        x = x.mean(dim=0)  # global average pooling over time
        return self.regressor(x).squeeze(-1) # Regress to a single scalar

# Training Loop

In [6]:
def train_one_epoch(model, loader, criterion, optimizer, clip_norm: float = 1.0):
    model.train()         # Put the model in training mode
    total_huber, preds, tgts = 0.0, [], []
    for x, y in loader:
        x, y = x.to(DEVICE), y.to(DEVICE)  # Move the inputs and targets to the device (GPU)
        y_hat = model(x)     # Forward pass - Get the predictions
        loss = criterion(y_hat, y)    # Compute training loss (Huber)
        optimizer.zero_grad()       # Reset gradients 
        loss.backward()             # backpropagate to compute grads
        if clip_norm is not None:     # optionally clip gradient norm to stabilize training
            nn.utils.clip_grad_norm_(model.parameters(), clip_norm)
        optimizer.step()             # update parameters
        total_huber += loss.item() * x.size(0)   # accumulate sum of loss over samples in this batch
        preds.append(y_hat.detach().cpu().numpy())   # Collect predictions on cpu for epoch metrics
        tgts.append(y.cpu().numpy())   # Collect targets on CPU
    preds = np.concatenate(preds)  # Concatenate per batch arrays into [N] arrays for whole epoch metrics
    tgts = np.concatenate(tgts)  
    mse = float(np.mean((preds - tgts) ** 2)) # Epoch MSE
    pc  = safe_pearsonr(preds, tgts)    # Epoch PC
    hub = total_huber / len(loader.dataset)   # Average huber over all samples in the epoch
    return hub, mse, pc

@torch.no_grad()     # Disable autograd for evaluation
def evaluate(model, loader):
    model.eval()    # Set the model in eval mode
    preds, tgts = [], []
    for x, y in loader:
        x, y = x.to(DEVICE), y.to(DEVICE)   # Move to device
        preds.append(model(x).cpu().numpy())  # Forward pass -> Collect predictions
        tgts.append(y.cpu().numpy())      # Collect targets
    preds = np.concatenate(preds)     # [N] predictions
    tgts = np.concatenate(tgts)       # [N] targets
    mse = float(np.mean((preds - tgts) ** 2))   # MSE over full split
    pc  = safe_pearsonr(preds, tgts)      # PC over full split
    return mse, pc, preds, tgts

def train_and_evaluate(model, train_loader, val_loader, lr, weight_decay,
                       n_epochs=N_EPOCHS, patience=PATIENCE, verbose=True):
    crit = nn.HuberLoss(delta=1.0)         # Training criteria - Robust to outliers  
    opt  = optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)   # Adam optimizer

    best_val = float("inf") 
    best_pc = None
    best_state = None; bad = 0

    # Epoch Loop
    for ep in range(1, n_epochs + 1):
        tr_hub, tr_mse, tr_pc = train_one_epoch(model, train_loader, crit, opt)  # one training epoch
        val_mse, val_pc, _, _ = evaluate(model, val_loader)    # evaluation on validation split
        if verbose:
            print(f"Epoch {ep:02d}: Train Huber={tr_hub:.4f}, Train MSE={tr_mse:.4f}, PC={tr_pc:.4f} | "
                  f"Val MSE={val_mse:.4f}, PC={val_pc:.4f}")
        if val_mse < best_val:   # If there is improvement in the validation MSE, save that as the best MSE and PC
            best_val, best_pc = float(val_mse), float(val_pc)  
            best_state = copy.deepcopy(model.state_dict())
            bad = 0
        else:
            bad += 1
            if bad >= patience:
                if verbose: print("Early stopping"); break

    model.load_state_dict(best_state)   # restore best checkpoint before returning
    return model, best_val, best_pc

# Optuna for Hyperparameter Tuning

In [7]:
# ------------------------------ optuna objective ----------------------------
def objective_builder(train_df, val_df, input_dim):
    def objective(trial):
        # loaders (trial-specific RNG for shuffling)
        print(f"\n[trial {trial.number}] build loaders...", flush=True)
        g = torch.Generator().manual_seed(SEED + trial.number)  # per-trial determenistic shuffle
        train_loader = DataLoader(
            TemporalFeatureDataset(train_df, FEATURE_DIR), # dataset for training
            batch_size=BATCH_SIZE, shuffle=True, generator=g, 
            num_workers=NUM_WORKERS, pin_memory=PIN       # pin mem if on CUDA for faster host->device copies
        )
        val_loader = DataLoader(
            TemporalFeatureDataset(val_df, FEATURE_DIR),
            batch_size=BATCH_SIZE, shuffle=False,    # No shuffle in case of validation
            num_workers=NUM_WORKERS, pin_memory=PIN
        )

        # hyperparams
        nhead   = trial.suggest_categorical("nhead", [4, 8])  # Suggests either 4 or 8
        num_layers      = trial.suggest_int("num_layers", 1, 4)   # Suggests any integer between 1 and 4
        lr              = trial.suggest_float("lr", 1e-4, 1e-2, log=True)   # Suggests any positive float between the range sampled log uniformly
        weight_decay    = trial.suggest_float("weight_decay", 1e-6, 1e-3, log=True) # Any positive float - also log uniform

        model = TransformerRegressor(d_model=768, nhead=nhead, num_layers=num_layers).to(DEVICE) # Create an instance of the model
        model, val_mse, val_pc = train_and_evaluate(model, train_loader, val_loader, lr, weight_decay, verbose=True) # Train and evaluate
        
        trial.set_user_attr("val_pc", float(val_pc)) # store Pearson on val
        trial.set_user_attr("state_dict", copy.deepcopy(model.state_dict())) # Stores a full copy of weights on every trial
        return float(val_mse) # Optuna will minimize this objective 
    return objective

In [8]:
# One function to run the full experiment
def run_split(name: str, train_ids: list, val_ids: list, input_dim: int = 768):
    print(f"\n=== Running split: {name} ===")
    df = pd.read_csv("FinalDataset.csv") # load target dataframe with log_view_count and video_id
    
    train_df = df[df.video_id.isin(train_ids)].reset_index(drop=True)  # Prepare the train DF
    val_df   = df[df.video_id.isin(val_ids)].reset_index(drop=True)    # Prepare the validation DF

    # run Optuna
    study = optuna.create_study(direction="minimize", sampler=optuna.samplers.TPESampler(seed=SEED))  # TPESampler with a fixed seed for reproducible sampling order
    study.optimize(objective_builder(train_df, val_df, input_dim), n_trials=N_TRIALS)
    best = study.best_params    # Extract the trial's params and score
    print("Best hyper-parameters:", best)
    print("Best Val MSE (Optuna):", study.best_trial.value)

    # rebuild best model and load the exact best-epoch weights saved in the trial
    model = TransformerRegressor(d_model=768, nhead=best["nhead"], num_layers=best["num_layers"]).to(DEVICE)
    model.load_state_dict(study.best_trial.user_attrs["state_dict"])  # Restore the best state dict

    # eval loaders (no shuffle)
    train_loader = DataLoader(TemporalFeatureDataset(train_df, FEATURE_DIR),
                              batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS, pin_memory=PIN)
    val_loader   = DataLoader(TemporalFeatureDataset(val_df, FEATURE_DIR),
                              batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS, pin_memory=PIN)

    tr_mse, tr_pc, _, _ = evaluate(model, train_loader)
    vl_mse, vl_pc, vl_preds, vl_tgts = evaluate(model, val_loader)
    print(f"Reloaded eval — Train MSE={tr_mse:.4f}, PC={tr_pc:.4f} | Val MSE={vl_mse:.4f}, PC={vl_pc:.4f} "
          f"| ΔMSE={abs(vl_mse - study.best_trial.value):.6f}")   # Check that reloaded model gives the same MSE

    # save bundle + study
    bundle_path = MODEL_DIR / f"video_reg_{name}.pt"
    torch.save({
        "state_dict": study.best_trial.user_attrs["state_dict"],   # Exact best weights
        "best_params": best,                     # Optuna best parameters
        "val_mse": study.best_trial.value,   # Best val MSE
        "val_pc": study.best_trial.user_attrs["val_pc"],  # Best val PC
    }, bundle_path)

    with open(MODEL_DIR / f"video_reg_{name}.pkl", "wb") as f:
        pickle.dump(study, f)      # Save the entire study

    # Save the validation predictions for late-fusion
    out = pd.DataFrame({
        "video_id": val_df["video_id"],
        "y_true" : vl_tgts.astype(float),
        "y_pred"  : vl_preds.astype(float),
    })
    fname = RESULTS_DIR / f"video_reg_val_{name}.csv"
    out.to_csv(fname, index=False)
    print(f"Saved VAL preds → {fname}")

    return {
        "model": model, "train_df": train_df, "val_df": val_df,
        "vl_preds": vl_preds, "vl_tgts": vl_tgts, "vl_mse": vl_mse, "vl_pc": vl_pc,
        "bundle_path": bundle_path
    }

# Experiments

## Person-Independent Split

In [7]:
df = pd.read_csv('FinalDataset.csv')

train_ids_ind = set(os.listdir('Person-Independent_Split/train'))
val_ids_ind = set(os.listdir('Person-Independent_Split/val'))

res_ind = run_split("ind", train_ids_ind, val_ids_ind, input_dim=768)

[I 2025-08-17 13:22:54,914] A new study created in memory with name: no-name-8e426c5e-188e-4f76-ab28-8e0b8ceef538



=== Running split: ind ===

[trial 0] build loaders...




Epoch 01: Train Huber=3.0442, Train MSE=19.4923, PC=0.1382 | Val MSE=16.1234, PC=-0.0613
Epoch 02: Train Huber=2.5197, Train MSE=13.9490, PC=0.3691 | Val MSE=24.5051, PC=0.0154
Epoch 03: Train Huber=2.4539, Train MSE=15.2144, PC=0.3670 | Val MSE=17.1605, PC=0.0925
Epoch 04: Train Huber=2.3317, Train MSE=14.0142, PC=0.4078 | Val MSE=19.6433, PC=0.1440
Epoch 05: Train Huber=2.3208, Train MSE=13.8958, PC=0.4347 | Val MSE=23.6013, PC=-0.0262
Epoch 06: Train Huber=2.3411, Train MSE=14.2941, PC=0.4217 | Val MSE=18.8402, PC=0.0941
Epoch 07: Train Huber=2.2435, Train MSE=13.4543, PC=0.4481 | Val MSE=15.1642, PC=0.1950
Epoch 08: Train Huber=2.2803, Train MSE=13.2981, PC=0.4486 | Val MSE=20.0946, PC=0.0837
Epoch 09: Train Huber=2.2356, Train MSE=13.0736, PC=0.4523 | Val MSE=17.9258, PC=0.1207
Epoch 10: Train Huber=2.1923, Train MSE=12.8365, PC=0.4626 | Val MSE=21.5928, PC=0.0228
Epoch 11: Train Huber=2.2290, Train MSE=13.0291, PC=0.4563 | Val MSE=19.8727, PC=0.0958
Epoch 12: Train Huber=2.2213, 

[I 2025-08-17 13:24:40,272] Trial 0 finished with value: 15.164175033569336 and parameters: {'hidden_dim': 232, 'nhead': 4, 'num_layers': 3, 'lr': 0.0002051338263087451, 'weight_decay': 2.9375384576328313e-06}. Best is trial 0 with value: 15.164175033569336.


Epoch 17: Train Huber=2.0956, Train MSE=12.0955, PC=0.5047 | Val MSE=22.2655, PC=0.0084
Early stopping

[trial 1] build loaders...




Epoch 01: Train Huber=3.0669, Train MSE=19.2967, PC=0.1630 | Val MSE=30.2277, PC=-0.0345
Epoch 02: Train Huber=2.5683, Train MSE=15.3208, PC=0.3233 | Val MSE=22.3615, PC=-0.0946
Epoch 03: Train Huber=2.4101, Train MSE=14.1539, PC=0.4053 | Val MSE=25.6819, PC=-0.0332
Epoch 04: Train Huber=2.3112, Train MSE=13.7409, PC=0.4340 | Val MSE=16.0933, PC=0.1309
Epoch 05: Train Huber=2.3096, Train MSE=13.6079, PC=0.4303 | Val MSE=16.4088, PC=0.1126
Epoch 06: Train Huber=2.2902, Train MSE=13.1402, PC=0.4443 | Val MSE=24.2518, PC=0.0241
Epoch 07: Train Huber=2.3149, Train MSE=14.0750, PC=0.4324 | Val MSE=21.8683, PC=0.0094
Epoch 08: Train Huber=2.2534, Train MSE=12.9104, PC=0.4641 | Val MSE=20.4334, PC=0.0582
Epoch 09: Train Huber=2.1952, Train MSE=13.0907, PC=0.4665 | Val MSE=19.2623, PC=0.0932
Epoch 10: Train Huber=2.1599, Train MSE=12.8418, PC=0.4800 | Val MSE=23.4825, PC=-0.0041
Epoch 11: Train Huber=2.2382, Train MSE=13.5311, PC=0.4403 | Val MSE=18.9698, PC=0.0983
Epoch 12: Train Huber=2.2079

[I 2025-08-17 13:27:07,614] Trial 1 finished with value: 15.375410079956055 and parameters: {'hidden_dim': 90, 'nhead': 4, 'num_layers': 3, 'lr': 0.00010994335574766199, 'weight_decay': 0.0008123245085588687}. Best is trial 0 with value: 15.164175033569336.


Epoch 24: Train Huber=2.2171, Train MSE=12.8059, PC=0.4818 | Val MSE=21.9207, PC=0.0545
Early stopping

[trial 2] build loaders...
Epoch 01: Train Huber=3.2111, Train MSE=21.6119, PC=0.0369 | Val MSE=16.5383, PC=0.0832
Epoch 02: Train Huber=2.6944, Train MSE=16.4813, PC=0.2726 | Val MSE=29.7748, PC=-0.0046
Epoch 03: Train Huber=2.3199, Train MSE=13.9648, PC=0.4327 | Val MSE=23.4330, PC=0.0600
Epoch 04: Train Huber=2.2544, Train MSE=13.3800, PC=0.4569 | Val MSE=20.6921, PC=0.0753
Epoch 05: Train Huber=2.2644, Train MSE=13.4510, PC=0.4589 | Val MSE=17.4829, PC=0.1216
Epoch 06: Train Huber=2.2579, Train MSE=13.3007, PC=0.4583 | Val MSE=15.4804, PC=0.1155
Epoch 07: Train Huber=2.2057, Train MSE=12.9881, PC=0.4693 | Val MSE=21.8720, PC=0.0509
Epoch 08: Train Huber=2.1266, Train MSE=12.7280, PC=0.4972 | Val MSE=22.9650, PC=0.1011
Epoch 09: Train Huber=2.1409, Train MSE=12.7215, PC=0.4867 | Val MSE=25.3808, PC=0.0292
Epoch 10: Train Huber=1.9876, Train MSE=11.8494, PC=0.5508 | Val MSE=20.0217

[I 2025-08-17 13:27:48,292] Trial 2 finished with value: 15.480372428894043 and parameters: {'hidden_dim': 437, 'nhead': 4, 'num_layers': 1, 'lr': 0.0004059611610484307, 'weight_decay': 3.752055855124284e-05}. Best is trial 0 with value: 15.164175033569336.


Epoch 16: Train Huber=1.6157, Train MSE=9.2176, PC=0.6633 | Val MSE=20.8301, PC=0.1861
Early stopping

[trial 3] build loaders...
Epoch 01: Train Huber=3.0155, Train MSE=18.7328, PC=0.1304 | Val MSE=19.6372, PC=0.0429
Epoch 02: Train Huber=2.5382, Train MSE=14.5121, PC=0.3518 | Val MSE=23.1805, PC=0.0420
Epoch 03: Train Huber=2.6296, Train MSE=15.8888, PC=0.3515 | Val MSE=18.3852, PC=0.0776
Epoch 04: Train Huber=2.4286, Train MSE=14.4113, PC=0.3909 | Val MSE=20.7766, PC=0.0696
Epoch 05: Train Huber=2.2439, Train MSE=13.3259, PC=0.4556 | Val MSE=20.4818, PC=0.1003
Epoch 06: Train Huber=2.2380, Train MSE=13.2374, PC=0.4614 | Val MSE=24.6716, PC=0.0929
Epoch 07: Train Huber=2.2011, Train MSE=13.3763, PC=0.4622 | Val MSE=23.4465, PC=0.0920
Epoch 08: Train Huber=2.1175, Train MSE=12.3370, PC=0.5050 | Val MSE=19.4172, PC=0.1037
Epoch 09: Train Huber=2.0917, Train MSE=12.5536, PC=0.5109 | Val MSE=20.6075, PC=0.0560
Epoch 10: Train Huber=1.9021, Train MSE=11.4916, PC=0.5634 | Val MSE=22.9255, 

[I 2025-08-17 13:28:22,129] Trial 3 finished with value: 18.385221481323242 and parameters: {'hidden_dim': 257, 'nhead': 8, 'num_layers': 1, 'lr': 0.0003839629299804173, 'weight_decay': 1.2562773503807034e-05}. Best is trial 0 with value: 15.164175033569336.


Epoch 13: Train Huber=1.6056, Train MSE=9.5415, PC=0.6539 | Val MSE=24.2842, PC=0.0798
Early stopping

[trial 4] build loaders...
Epoch 01: Train Huber=3.1498, Train MSE=19.3276, PC=0.0327 | Val MSE=18.6776, PC=0.0182
Epoch 02: Train Huber=3.0807, Train MSE=17.4048, PC=-0.0262 | Val MSE=13.8355, PC=0.1530
Epoch 03: Train Huber=3.1124, Train MSE=18.1523, PC=-0.0726 | Val MSE=17.1056, PC=0.2164
Epoch 04: Train Huber=3.1029, Train MSE=18.1885, PC=-0.0591 | Val MSE=43.2808, PC=-0.0079
Epoch 05: Train Huber=3.0762, Train MSE=19.2126, PC=0.0214 | Val MSE=20.9907, PC=-0.0620
Epoch 06: Train Huber=3.0513, Train MSE=16.7584, PC=-0.0189 | Val MSE=15.8820, PC=-0.0330
Epoch 07: Train Huber=3.0359, Train MSE=17.2521, PC=0.0724 | Val MSE=23.1535, PC=0.0570
Epoch 08: Train Huber=3.0904, Train MSE=17.3188, PC=-0.0921 | Val MSE=13.6287, PC=0.1784
Epoch 09: Train Huber=3.0474, Train MSE=17.1013, PC=0.0260 | Val MSE=13.8121, PC=0.0197
Epoch 10: Train Huber=3.0736, Train MSE=17.3623, PC=-0.0329 | Val MSE=

[I 2025-08-17 13:30:13,315] Trial 4 finished with value: 13.628695487976074 and parameters: {'hidden_dim': 268, 'nhead': 4, 'num_layers': 3, 'lr': 0.0015304852121831463, 'weight_decay': 1.3783237455007196e-06}. Best is trial 4 with value: 13.628695487976074.


Epoch 18: Train Huber=3.0333, Train MSE=16.8243, PC=-0.0918 | Val MSE=22.1780, PC=0.1772
Early stopping

[trial 5] build loaders...
Epoch 01: Train Huber=3.7315, Train MSE=27.1772, PC=-0.0262 | Val MSE=67.5396, PC=0.0000
Epoch 02: Train Huber=3.4745, Train MSE=23.3755, PC=0.0085 | Val MSE=17.3546, PC=-0.1723
Epoch 03: Train Huber=3.0994, Train MSE=18.3225, PC=-0.0029 | Val MSE=13.6868, PC=-0.0201
Epoch 04: Train Huber=3.2694, Train MSE=19.1343, PC=-0.0646 | Val MSE=13.8302, PC=0.1595
Epoch 05: Train Huber=3.1669, Train MSE=18.3230, PC=-0.0518 | Val MSE=17.6626, PC=-0.0480
Epoch 06: Train Huber=3.1581, Train MSE=18.4496, PC=0.0712 | Val MSE=17.3357, PC=-0.0000
Epoch 07: Train Huber=3.2608, Train MSE=19.7532, PC=-0.0351 | Val MSE=16.9982, PC=0.0000
Epoch 08: Train Huber=3.1640, Train MSE=19.8153, PC=-0.0123 | Val MSE=24.6789, PC=0.0000
Epoch 09: Train Huber=3.4046, Train MSE=22.4796, PC=0.0169 | Val MSE=56.5736, PC=-0.0000
Epoch 10: Train Huber=3.5630, Train MSE=24.3774, PC=0.0124 | Val 

[I 2025-08-17 13:31:54,712] Trial 5 finished with value: 13.686786651611328 and parameters: {'hidden_dim': 336, 'nhead': 4, 'num_layers': 4, 'lr': 0.00853618986286683, 'weight_decay': 0.0002661901888489054}. Best is trial 4 with value: 13.628695487976074.


Epoch 13: Train Huber=3.2581, Train MSE=19.2748, PC=-0.0235 | Val MSE=15.7330, PC=0.0000
Early stopping

[trial 6] build loaders...
Epoch 01: Train Huber=2.8463, Train MSE=17.3737, PC=0.2425 | Val MSE=19.7158, PC=0.0546
Epoch 02: Train Huber=2.7031, Train MSE=16.3315, PC=0.2869 | Val MSE=19.5499, PC=0.0864
Epoch 03: Train Huber=2.5512, Train MSE=15.1042, PC=0.3483 | Val MSE=22.9696, PC=0.0140
Epoch 04: Train Huber=2.3100, Train MSE=13.5097, PC=0.4333 | Val MSE=20.3778, PC=0.0853
Epoch 05: Train Huber=2.2492, Train MSE=13.2943, PC=0.4665 | Val MSE=21.9847, PC=0.0159
Epoch 06: Train Huber=2.2119, Train MSE=13.3127, PC=0.4587 | Val MSE=20.0398, PC=0.1023
Epoch 07: Train Huber=2.1757, Train MSE=12.7649, PC=0.4795 | Val MSE=20.8744, PC=0.0641
Epoch 08: Train Huber=2.2554, Train MSE=13.3734, PC=0.4418 | Val MSE=21.5150, PC=0.1279
Epoch 09: Train Huber=2.1805, Train MSE=13.0352, PC=0.4780 | Val MSE=19.9992, PC=0.1188
Epoch 10: Train Huber=2.0595, Train MSE=12.2643, PC=0.5114 | Val MSE=22.2953

[I 2025-08-17 13:32:48,517] Trial 6 finished with value: 19.549907684326172 and parameters: {'hidden_dim': 200, 'nhead': 8, 'num_layers': 2, 'lr': 0.00017541893487450815, 'weight_decay': 3.058656666978529e-05}. Best is trial 4 with value: 13.628695487976074.


Epoch 12: Train Huber=2.0875, Train MSE=12.3067, PC=0.5108 | Val MSE=21.3264, PC=0.1678
Early stopping

[trial 7] build loaders...
Epoch 01: Train Huber=3.4564, Train MSE=23.0399, PC=-0.0094 | Val MSE=21.5938, PC=0.0445
Epoch 02: Train Huber=2.9862, Train MSE=16.3942, PC=0.0355 | Val MSE=14.0103, PC=0.0691
Epoch 03: Train Huber=3.0560, Train MSE=17.2718, PC=0.0045 | Val MSE=19.8886, PC=-0.0176
Epoch 04: Train Huber=3.0042, Train MSE=18.3172, PC=0.0520 | Val MSE=21.8308, PC=-0.0602
Epoch 05: Train Huber=2.9076, Train MSE=17.2200, PC=0.0876 | Val MSE=23.5255, PC=-0.0706
Epoch 06: Train Huber=2.7636, Train MSE=16.3074, PC=0.1958 | Val MSE=25.8353, PC=0.0061
Epoch 07: Train Huber=2.8814, Train MSE=16.2233, PC=0.1573 | Val MSE=17.4625, PC=-0.0318
Epoch 08: Train Huber=2.5506, Train MSE=14.0877, PC=0.3730 | Val MSE=23.8501, PC=0.1188
Epoch 09: Train Huber=2.7526, Train MSE=15.7520, PC=0.2154 | Val MSE=16.9447, PC=0.0996
Epoch 10: Train Huber=3.0580, Train MSE=18.4978, PC=-0.0077 | Val MSE=15

[I 2025-08-17 13:34:01,283] Trial 7 finished with value: 14.010329246520996 and parameters: {'hidden_dim': 79, 'nhead': 4, 'num_layers': 3, 'lr': 0.0004201672054372534, 'weight_decay': 3.632486956676606e-05}. Best is trial 4 with value: 13.628695487976074.


Epoch 12: Train Huber=2.9914, Train MSE=16.0923, PC=-0.0541 | Val MSE=18.4854, PC=-0.1334
Early stopping

[trial 8] build loaders...
Epoch 01: Train Huber=3.6378, Train MSE=25.5704, PC=-0.0237 | Val MSE=16.9148, PC=0.0259
Epoch 02: Train Huber=3.5115, Train MSE=26.4435, PC=-0.0143 | Val MSE=13.6887, PC=0.0205
Epoch 03: Train Huber=3.0397, Train MSE=17.2740, PC=0.0666 | Val MSE=19.5614, PC=-0.0025
Epoch 04: Train Huber=3.4544, Train MSE=22.2550, PC=0.0207 | Val MSE=23.0876, PC=-0.0159
Epoch 05: Train Huber=3.0238, Train MSE=17.5266, PC=0.0806 | Val MSE=26.3983, PC=-0.0612
Epoch 06: Train Huber=3.2062, Train MSE=20.3763, PC=0.1240 | Val MSE=23.7588, PC=0.0564
Epoch 07: Train Huber=3.2482, Train MSE=20.0275, PC=-0.1244 | Val MSE=25.2083, PC=-0.0867
Epoch 08: Train Huber=3.3688, Train MSE=22.5855, PC=0.0186 | Val MSE=23.8503, PC=-0.0000
Epoch 09: Train Huber=3.1186, Train MSE=17.7757, PC=0.0031 | Val MSE=13.6578, PC=-0.0347
Epoch 10: Train Huber=3.2772, Train MSE=19.7725, PC=-0.0917 | Val 

[I 2025-08-17 13:36:33,634] Trial 8 finished with value: 13.657798767089844 and parameters: {'hidden_dim': 309, 'nhead': 8, 'num_layers': 4, 'lr': 0.007568292060167619, 'weight_decay': 0.00048359527764659497}. Best is trial 4 with value: 13.628695487976074.


Epoch 19: Train Huber=3.0975, Train MSE=17.3688, PC=-0.1044 | Val MSE=18.2528, PC=-0.0474
Early stopping

[trial 9] build loaders...
Epoch 01: Train Huber=3.1506, Train MSE=20.9318, PC=0.1196 | Val MSE=20.1378, PC=0.0606
Epoch 02: Train Huber=2.4159, Train MSE=13.9478, PC=0.3948 | Val MSE=21.9562, PC=0.0364
Epoch 03: Train Huber=2.3616, Train MSE=14.1237, PC=0.4065 | Val MSE=17.9060, PC=0.0839
Epoch 04: Train Huber=2.2801, Train MSE=13.1135, PC=0.4472 | Val MSE=20.2666, PC=0.0790
Epoch 05: Train Huber=2.2736, Train MSE=13.2240, PC=0.4427 | Val MSE=18.6829, PC=0.1125
Epoch 06: Train Huber=2.1616, Train MSE=12.6558, PC=0.4860 | Val MSE=20.8534, PC=0.0421
Epoch 07: Train Huber=2.1667, Train MSE=12.9949, PC=0.4807 | Val MSE=21.4492, PC=0.0524
Epoch 08: Train Huber=2.0686, Train MSE=12.7379, PC=0.4922 | Val MSE=24.0536, PC=0.0854
Epoch 09: Train Huber=2.1304, Train MSE=12.7512, PC=0.5058 | Val MSE=19.6216, PC=0.1280
Epoch 10: Train Huber=2.0561, Train MSE=12.2060, PC=0.5139 | Val MSE=18.294

[I 2025-08-17 13:38:14,392] Trial 9 finished with value: 14.838336944580078 and parameters: {'hidden_dim': 332, 'nhead': 4, 'num_layers': 1, 'lr': 0.00012315571723666037, 'weight_decay': 9.462175356461487e-06}. Best is trial 4 with value: 13.628695487976074.


Epoch 40: Train Huber=0.4239, Train MSE=2.0841, PC=0.9319 | Val MSE=18.7012, PC=0.2510

[trial 10] build loaders...
Epoch 01: Train Huber=3.4252, Train MSE=22.9648, PC=-0.0475 | Val MSE=13.6286, PC=0.0995
Epoch 02: Train Huber=3.0846, Train MSE=19.1446, PC=0.0395 | Val MSE=20.8692, PC=0.0973
Epoch 03: Train Huber=2.9612, Train MSE=16.8771, PC=0.0818 | Val MSE=14.8056, PC=-0.0141
Epoch 04: Train Huber=3.0159, Train MSE=16.8456, PC=0.0175 | Val MSE=38.0397, PC=-0.0900
Epoch 05: Train Huber=3.1163, Train MSE=17.8780, PC=-0.0828 | Val MSE=14.6271, PC=-0.0521
Epoch 06: Train Huber=3.2065, Train MSE=19.7012, PC=-0.0003 | Val MSE=20.2421, PC=-0.0428
Epoch 07: Train Huber=3.0094, Train MSE=16.6325, PC=0.0355 | Val MSE=14.2514, PC=0.0232
Epoch 08: Train Huber=3.1607, Train MSE=19.2402, PC=0.0421 | Val MSE=13.6365, PC=0.1466
Epoch 09: Train Huber=3.0447, Train MSE=16.7029, PC=-0.0318 | Val MSE=16.6432, PC=-0.0656
Epoch 10: Train Huber=3.0593, Train MSE=17.1222, PC=-0.0559 | Val MSE=13.9230, PC=0

[I 2025-08-17 13:39:03,135] Trial 10 finished with value: 13.628628730773926 and parameters: {'hidden_dim': 488, 'nhead': 8, 'num_layers': 2, 'lr': 0.0022005828785027445, 'weight_decay': 1.1054533004009806e-06}. Best is trial 10 with value: 13.628628730773926.


Epoch 11: Train Huber=3.0450, Train MSE=16.9713, PC=0.0571 | Val MSE=14.0251, PC=-0.0406
Early stopping

[trial 11] build loaders...
Epoch 01: Train Huber=3.2719, Train MSE=20.0911, PC=-0.0204 | Val MSE=14.4940, PC=-0.2928
Epoch 02: Train Huber=3.4111, Train MSE=21.2010, PC=-0.0304 | Val MSE=13.6591, PC=0.2552
Epoch 03: Train Huber=2.9756, Train MSE=16.0857, PC=0.0461 | Val MSE=17.4942, PC=-0.1281
Epoch 04: Train Huber=3.0048, Train MSE=16.5555, PC=0.0822 | Val MSE=19.8039, PC=-0.1431
Epoch 05: Train Huber=2.9760, Train MSE=16.0295, PC=0.0802 | Val MSE=22.3845, PC=-0.1082
Epoch 06: Train Huber=3.1942, Train MSE=19.8437, PC=0.0378 | Val MSE=16.2713, PC=-0.2513
Epoch 07: Train Huber=3.0333, Train MSE=17.2161, PC=0.0120 | Val MSE=13.7844, PC=-0.1190
Epoch 08: Train Huber=3.0288, Train MSE=17.1085, PC=-0.0339 | Val MSE=21.4479, PC=0.0398
Epoch 09: Train Huber=3.0067, Train MSE=16.5892, PC=0.0113 | Val MSE=15.4747, PC=0.1393
Epoch 10: Train Huber=3.0246, Train MSE=16.7941, PC=-0.0063 | Val 

[I 2025-08-17 13:39:57,269] Trial 11 finished with value: 13.6591157913208 and parameters: {'hidden_dim': 471, 'nhead': 8, 'num_layers': 2, 'lr': 0.0017937722033317504, 'weight_decay': 1.026076807471788e-06}. Best is trial 10 with value: 13.628628730773926.


Epoch 12: Train Huber=3.0939, Train MSE=17.9029, PC=-0.0631 | Val MSE=17.6320, PC=0.1841
Early stopping

[trial 12] build loaders...
Epoch 01: Train Huber=3.1729, Train MSE=19.1507, PC=0.0689 | Val MSE=15.1836, PC=0.0869
Epoch 02: Train Huber=3.0950, Train MSE=17.6680, PC=-0.0060 | Val MSE=19.1699, PC=-0.1279
Epoch 03: Train Huber=3.3153, Train MSE=20.9422, PC=-0.0178 | Val MSE=29.2143, PC=0.0347
Epoch 04: Train Huber=3.0517, Train MSE=18.0563, PC=-0.0116 | Val MSE=13.7593, PC=-0.0160
Epoch 05: Train Huber=3.1302, Train MSE=18.0481, PC=-0.0539 | Val MSE=26.5548, PC=0.0416
Epoch 06: Train Huber=3.2013, Train MSE=19.7334, PC=-0.0584 | Val MSE=20.7690, PC=-0.0519
Epoch 07: Train Huber=3.0202, Train MSE=17.5977, PC=0.0575 | Val MSE=34.3575, PC=-0.1805
Epoch 08: Train Huber=3.0407, Train MSE=17.0531, PC=-0.0273 | Val MSE=15.0238, PC=0.1059
Epoch 09: Train Huber=3.1703, Train MSE=18.3348, PC=0.0023 | Val MSE=15.3230, PC=0.0391
Epoch 10: Train Huber=3.2019, Train MSE=18.7255, PC=-0.0844 | Val

[I 2025-08-17 13:40:59,783] Trial 12 finished with value: 13.759349822998047 and parameters: {'hidden_dim': 402, 'nhead': 8, 'num_layers': 2, 'lr': 0.002148112566613969, 'weight_decay': 1.124576902080239e-06}. Best is trial 10 with value: 13.628628730773926.


Epoch 14: Train Huber=3.1102, Train MSE=17.4690, PC=-0.1025 | Val MSE=22.0629, PC=-0.1083
Early stopping

[trial 13] build loaders...
Epoch 01: Train Huber=3.2216, Train MSE=20.0198, PC=0.0325 | Val MSE=13.6528, PC=0.0108
Epoch 02: Train Huber=3.0538, Train MSE=18.8062, PC=0.0768 | Val MSE=17.9924, PC=-0.0016
Epoch 03: Train Huber=3.0455, Train MSE=16.6492, PC=0.0238 | Val MSE=33.1781, PC=-0.0498
Epoch 04: Train Huber=2.9344, Train MSE=16.8635, PC=0.1073 | Val MSE=15.1231, PC=0.0315
Epoch 05: Train Huber=3.5288, Train MSE=22.9259, PC=-0.1704 | Val MSE=15.8613, PC=-0.0211
Epoch 06: Train Huber=3.0657, Train MSE=17.2135, PC=-0.0877 | Val MSE=37.8780, PC=0.1104
Epoch 07: Train Huber=3.0571, Train MSE=17.2068, PC=-0.0201 | Val MSE=26.6252, PC=-0.0000
Epoch 08: Train Huber=3.0726, Train MSE=17.8652, PC=0.0525 | Val MSE=30.6251, PC=0.0557
Epoch 09: Train Huber=3.0212, Train MSE=16.9940, PC=-0.0208 | Val MSE=24.1833, PC=0.0483
Epoch 10: Train Huber=3.1135, Train MSE=18.0585, PC=-0.0659 | Val 

[I 2025-08-17 13:42:09,268] Trial 13 finished with value: 13.652800559997559 and parameters: {'hidden_dim': 171, 'nhead': 8, 'num_layers': 3, 'lr': 0.0030018887188126616, 'weight_decay': 3.112170374008624e-06}. Best is trial 10 with value: 13.628628730773926.


Epoch 11: Train Huber=3.0872, Train MSE=17.4574, PC=-0.0532 | Val MSE=37.3331, PC=-0.0186
Early stopping

[trial 14] build loaders...
Epoch 01: Train Huber=3.1354, Train MSE=19.7005, PC=0.1018 | Val MSE=13.7557, PC=0.1865
Epoch 02: Train Huber=2.8392, Train MSE=16.4132, PC=0.2314 | Val MSE=38.5722, PC=-0.0287
Epoch 03: Train Huber=2.9202, Train MSE=17.3500, PC=0.1223 | Val MSE=13.3547, PC=0.1905
Epoch 04: Train Huber=2.8405, Train MSE=16.4373, PC=0.2270 | Val MSE=18.7979, PC=-0.0982
Epoch 05: Train Huber=2.5330, Train MSE=15.1392, PC=0.3335 | Val MSE=21.5254, PC=-0.1456
Epoch 06: Train Huber=2.7767, Train MSE=16.3829, PC=0.2136 | Val MSE=24.9632, PC=0.0778
Epoch 07: Train Huber=2.6837, Train MSE=16.1012, PC=0.2906 | Val MSE=17.5832, PC=0.1261
Epoch 08: Train Huber=2.5211, Train MSE=14.5855, PC=0.3381 | Val MSE=16.9820, PC=0.0783
Epoch 09: Train Huber=2.4991, Train MSE=14.7815, PC=0.3610 | Val MSE=22.7718, PC=0.0345
Epoch 10: Train Huber=2.4132, Train MSE=13.7377, PC=0.4005 | Val MSE=21

[I 2025-08-17 13:43:06,120] Trial 14 finished with value: 13.354726791381836 and parameters: {'hidden_dim': 380, 'nhead': 4, 'num_layers': 2, 'lr': 0.0009819347379144987, 'weight_decay': 3.491097047653223e-06}. Best is trial 14 with value: 13.354726791381836.


Epoch 13: Train Huber=2.9310, Train MSE=17.1307, PC=0.1321 | Val MSE=20.7732, PC=0.1669
Early stopping

[trial 15] build loaders...
Epoch 01: Train Huber=3.0500, Train MSE=18.7131, PC=0.1825 | Val MSE=19.6628, PC=0.0937
Epoch 02: Train Huber=2.7687, Train MSE=16.5669, PC=0.2674 | Val MSE=18.4396, PC=0.1139
Epoch 03: Train Huber=2.4293, Train MSE=14.1193, PC=0.3922 | Val MSE=28.9399, PC=-0.0059
Epoch 04: Train Huber=2.4317, Train MSE=14.1691, PC=0.3974 | Val MSE=19.3167, PC=0.0922
Epoch 05: Train Huber=2.3592, Train MSE=13.7957, PC=0.4124 | Val MSE=21.9319, PC=0.0234
Epoch 06: Train Huber=2.2997, Train MSE=13.2682, PC=0.4490 | Val MSE=19.4931, PC=0.0813
Epoch 07: Train Huber=2.4869, Train MSE=15.0435, PC=0.3645 | Val MSE=24.5988, PC=-0.0371
Epoch 08: Train Huber=2.4981, Train MSE=14.8367, PC=0.3396 | Val MSE=25.8900, PC=-0.0360
Epoch 09: Train Huber=2.4000, Train MSE=14.4582, PC=0.3985 | Val MSE=21.2271, PC=-0.0527
Epoch 10: Train Huber=2.5089, Train MSE=14.3882, PC=0.3660 | Val MSE=26.

[I 2025-08-17 13:43:59,405] Trial 15 finished with value: 18.439605712890625 and parameters: {'hidden_dim': 505, 'nhead': 8, 'num_layers': 2, 'lr': 0.0008463435801653472, 'weight_decay': 3.6811934126159423e-06}. Best is trial 14 with value: 13.354726791381836.


Epoch 12: Train Huber=2.4446, Train MSE=14.2920, PC=0.3929 | Val MSE=24.8777, PC=-0.1277
Early stopping

[trial 16] build loaders...
Epoch 01: Train Huber=3.5380, Train MSE=24.5026, PC=-0.0760 | Val MSE=14.1119, PC=-0.0660
Epoch 02: Train Huber=3.1993, Train MSE=19.3044, PC=0.0097 | Val MSE=31.9984, PC=-0.0463
Epoch 03: Train Huber=3.0621, Train MSE=17.5713, PC=-0.0063 | Val MSE=27.0084, PC=-0.0194
Epoch 04: Train Huber=3.1172, Train MSE=18.3441, PC=0.0227 | Val MSE=14.4312, PC=0.0056
Epoch 05: Train Huber=3.1697, Train MSE=19.0381, PC=-0.0755 | Val MSE=29.7525, PC=0.0187
Epoch 06: Train Huber=3.1463, Train MSE=18.7847, PC=-0.0482 | Val MSE=24.1611, PC=0.0019
Epoch 07: Train Huber=3.0914, Train MSE=17.0827, PC=-0.0826 | Val MSE=13.7939, PC=0.0402
Epoch 08: Train Huber=3.0454, Train MSE=17.8094, PC=-0.0605 | Val MSE=18.2451, PC=0.0564
Epoch 09: Train Huber=3.2554, Train MSE=19.9013, PC=0.0073 | Val MSE=34.3469, PC=0.2299
Epoch 10: Train Huber=3.2508, Train MSE=19.7652, PC=-0.0099 | Val 

[I 2025-08-17 13:45:15,427] Trial 16 finished with value: 13.793856620788574 and parameters: {'hidden_dim': 392, 'nhead': 8, 'num_layers': 2, 'lr': 0.0038413104579816506, 'weight_decay': 8.64380195313648e-06}. Best is trial 14 with value: 13.354726791381836.


Epoch 17: Train Huber=3.0544, Train MSE=17.0872, PC=-0.1221 | Val MSE=16.0809, PC=-0.0531
Early stopping

[trial 17] build loaders...
Epoch 01: Train Huber=2.9573, Train MSE=19.0043, PC=0.1863 | Val MSE=22.6499, PC=-0.0138
Epoch 02: Train Huber=2.4203, Train MSE=14.0865, PC=0.4036 | Val MSE=24.9247, PC=-0.0104
Epoch 03: Train Huber=2.3473, Train MSE=13.9061, PC=0.4314 | Val MSE=17.3909, PC=0.1130
Epoch 04: Train Huber=2.3168, Train MSE=14.2119, PC=0.4098 | Val MSE=26.2729, PC=-0.0006
Epoch 05: Train Huber=2.3961, Train MSE=14.3666, PC=0.4240 | Val MSE=16.1472, PC=0.0930
Epoch 06: Train Huber=2.3667, Train MSE=14.4460, PC=0.4048 | Val MSE=20.6786, PC=0.1084
Epoch 07: Train Huber=2.2406, Train MSE=13.2088, PC=0.4601 | Val MSE=21.0802, PC=0.0014
Epoch 08: Train Huber=2.4858, Train MSE=14.3318, PC=0.3805 | Val MSE=21.7670, PC=0.0113
Epoch 09: Train Huber=2.2524, Train MSE=13.4932, PC=0.4619 | Val MSE=19.4125, PC=0.0062
Epoch 10: Train Huber=2.2978, Train MSE=13.7103, PC=0.4315 | Val MSE=19

[I 2025-08-17 13:45:53,188] Trial 17 finished with value: 16.147197723388672 and parameters: {'hidden_dim': 511, 'nhead': 4, 'num_layers': 1, 'lr': 0.0008399037432330655, 'weight_decay': 0.00016379288268615277}. Best is trial 14 with value: 13.354726791381836.


Epoch 15: Train Huber=1.9548, Train MSE=11.3106, PC=0.5587 | Val MSE=19.6310, PC=0.0864
Early stopping

[trial 18] build loaders...
Epoch 01: Train Huber=3.7787, Train MSE=27.2223, PC=-0.1400 | Val MSE=17.6274, PC=0.1330
Epoch 02: Train Huber=3.2670, Train MSE=19.1102, PC=-0.1290 | Val MSE=24.1338, PC=-0.1167
Epoch 03: Train Huber=3.1560, Train MSE=18.8448, PC=0.0437 | Val MSE=21.4756, PC=0.0825
Epoch 04: Train Huber=3.1945, Train MSE=18.7773, PC=-0.0628 | Val MSE=13.6316, PC=0.0950
Epoch 05: Train Huber=3.3673, Train MSE=21.0154, PC=-0.0616 | Val MSE=17.6867, PC=0.0970
Epoch 06: Train Huber=3.0114, Train MSE=16.7611, PC=0.0265 | Val MSE=25.3509, PC=0.0306
Epoch 07: Train Huber=3.1265, Train MSE=18.2578, PC=-0.0665 | Val MSE=16.2042, PC=0.0225
Epoch 08: Train Huber=2.9664, Train MSE=17.4517, PC=0.0980 | Val MSE=17.5000, PC=-0.1296
Epoch 09: Train Huber=3.1113, Train MSE=17.9243, PC=-0.0792 | Val MSE=19.2570, PC=-0.0851
Epoch 10: Train Huber=3.0288, Train MSE=16.6791, PC=-0.0031 | Val M

[I 2025-08-17 13:46:53,615] Trial 18 finished with value: 13.631623268127441 and parameters: {'hidden_dim': 385, 'nhead': 4, 'num_layers': 2, 'lr': 0.004702693051940083, 'weight_decay': 4.335386606184339e-06}. Best is trial 14 with value: 13.354726791381836.


Epoch 14: Train Huber=2.9433, Train MSE=16.9110, PC=0.1040 | Val MSE=14.9069, PC=-0.0282
Early stopping

[trial 19] build loaders...
Epoch 01: Train Huber=2.9475, Train MSE=17.6437, PC=0.2025 | Val MSE=15.5590, PC=0.1481
Epoch 02: Train Huber=2.7532, Train MSE=16.9437, PC=0.2623 | Val MSE=15.1385, PC=0.1817
Epoch 03: Train Huber=2.4286, Train MSE=14.2490, PC=0.3875 | Val MSE=22.4981, PC=0.0344
Epoch 04: Train Huber=2.3210, Train MSE=14.1381, PC=0.4286 | Val MSE=19.3570, PC=0.0424
Epoch 05: Train Huber=2.2204, Train MSE=12.5456, PC=0.4852 | Val MSE=19.5476, PC=0.0474
Epoch 06: Train Huber=2.4080, Train MSE=14.5590, PC=0.4029 | Val MSE=26.2228, PC=0.0085
Epoch 07: Train Huber=2.2494, Train MSE=13.4158, PC=0.4548 | Val MSE=24.2565, PC=-0.0052
Epoch 08: Train Huber=2.2774, Train MSE=14.1524, PC=0.4500 | Val MSE=22.2582, PC=0.0562
Epoch 09: Train Huber=2.2144, Train MSE=13.2661, PC=0.4846 | Val MSE=29.0604, PC=0.0373
Epoch 10: Train Huber=2.1122, Train MSE=12.5086, PC=0.5100 | Val MSE=18.38

[I 2025-08-17 13:47:24,990] Trial 19 finished with value: 15.138510704040527 and parameters: {'hidden_dim': 448, 'nhead': 8, 'num_layers': 1, 'lr': 0.0013266044654655276, 'weight_decay': 2.0395592719493216e-06}. Best is trial 14 with value: 13.354726791381836.


Epoch 12: Train Huber=2.1505, Train MSE=12.7734, PC=0.5050 | Val MSE=28.5897, PC=0.1190
Early stopping

[trial 20] build loaders...
Epoch 01: Train Huber=3.2273, Train MSE=21.1390, PC=0.0698 | Val MSE=20.0506, PC=0.1154
Epoch 02: Train Huber=2.9030, Train MSE=18.0201, PC=0.1320 | Val MSE=27.3716, PC=0.0048
Epoch 03: Train Huber=2.6918, Train MSE=16.2319, PC=0.2768 | Val MSE=14.4307, PC=0.0651
Epoch 04: Train Huber=2.6834, Train MSE=14.7793, PC=0.2879 | Val MSE=18.3716, PC=0.0573
Epoch 05: Train Huber=2.3265, Train MSE=13.4378, PC=0.4348 | Val MSE=19.5653, PC=0.0395
Epoch 06: Train Huber=2.3046, Train MSE=14.0457, PC=0.4142 | Val MSE=18.2749, PC=0.0454
Epoch 07: Train Huber=2.2491, Train MSE=13.0437, PC=0.4516 | Val MSE=22.9679, PC=-0.0024
Epoch 08: Train Huber=2.3085, Train MSE=13.2232, PC=0.4457 | Val MSE=21.8565, PC=0.0685
Epoch 09: Train Huber=2.1861, Train MSE=12.8224, PC=0.4764 | Val MSE=18.9548, PC=0.0954
Epoch 10: Train Huber=2.2776, Train MSE=13.3562, PC=0.4489 | Val MSE=19.389

[I 2025-08-17 13:48:23,138] Trial 20 finished with value: 14.430673599243164 and parameters: {'hidden_dim': 374, 'nhead': 8, 'num_layers': 2, 'lr': 0.0005926321440719593, 'weight_decay': 7.513270429331075e-05}. Best is trial 14 with value: 13.354726791381836.


Epoch 13: Train Huber=2.5011, Train MSE=15.3667, PC=0.3498 | Val MSE=17.1927, PC=0.1148
Early stopping

[trial 21] build loaders...
Epoch 01: Train Huber=3.2784, Train MSE=20.6399, PC=0.0328 | Val MSE=13.8308, PC=0.0348
Epoch 02: Train Huber=3.1005, Train MSE=17.4281, PC=-0.0851 | Val MSE=17.9344, PC=-0.0193
Epoch 03: Train Huber=3.0801, Train MSE=17.5218, PC=0.0088 | Val MSE=23.7764, PC=-0.0026
Epoch 04: Train Huber=2.9844, Train MSE=16.2502, PC=-0.0598 | Val MSE=24.7854, PC=0.0623
Epoch 05: Train Huber=3.1066, Train MSE=17.6012, PC=0.0876 | Val MSE=32.8422, PC=-0.0698
Epoch 06: Train Huber=3.0207, Train MSE=17.5763, PC=0.0816 | Val MSE=17.0682, PC=-0.0730
Epoch 07: Train Huber=2.9980, Train MSE=16.5813, PC=0.0613 | Val MSE=17.1570, PC=0.1236
Epoch 08: Train Huber=3.0835, Train MSE=17.1552, PC=-0.0397 | Val MSE=16.8988, PC=-0.0785
Epoch 09: Train Huber=3.1681, Train MSE=18.9637, PC=0.0196 | Val MSE=18.2318, PC=-0.0035
Epoch 10: Train Huber=3.0240, Train MSE=16.6064, PC=-0.0482 | Val M

[I 2025-08-17 13:49:30,938] Trial 21 finished with value: 13.830821990966797 and parameters: {'hidden_dim': 135, 'nhead': 4, 'num_layers': 3, 'lr': 0.0011406775530474218, 'weight_decay': 1.5773049626255062e-06}. Best is trial 14 with value: 13.354726791381836.


Epoch 11: Train Huber=3.0002, Train MSE=16.2622, PC=-0.0588 | Val MSE=14.5217, PC=-0.0472
Early stopping

[trial 22] build loaders...
Epoch 01: Train Huber=3.1874, Train MSE=19.4900, PC=0.0673 | Val MSE=38.0055, PC=0.0992
Epoch 02: Train Huber=3.0564, Train MSE=17.0615, PC=0.0698 | Val MSE=18.7337, PC=0.0402
Epoch 03: Train Huber=3.0792, Train MSE=17.4193, PC=-0.0746 | Val MSE=28.8249, PC=-0.0558
Epoch 04: Train Huber=3.1386, Train MSE=18.1047, PC=0.0084 | Val MSE=24.4131, PC=-0.0419
Epoch 05: Train Huber=3.0658, Train MSE=17.4729, PC=0.0100 | Val MSE=13.6307, PC=0.0112
Epoch 06: Train Huber=3.1669, Train MSE=18.0655, PC=-0.0838 | Val MSE=19.7399, PC=0.0567
Epoch 07: Train Huber=3.0806, Train MSE=17.5047, PC=-0.0604 | Val MSE=19.1363, PC=0.0481
Epoch 08: Train Huber=2.9803, Train MSE=16.1351, PC=-0.0269 | Val MSE=22.2327, PC=0.0058
Epoch 09: Train Huber=3.0883, Train MSE=17.6854, PC=-0.0029 | Val MSE=22.2559, PC=0.0498
Epoch 10: Train Huber=3.0979, Train MSE=18.0140, PC=-0.0784 | Val M

[I 2025-08-17 13:51:29,898] Trial 22 finished with value: 13.630701065063477 and parameters: {'hidden_dim': 292, 'nhead': 4, 'num_layers': 4, 'lr': 0.0021061180968532487, 'weight_decay': 5.807810520681872e-06}. Best is trial 14 with value: 13.354726791381836.


Epoch 15: Train Huber=2.9966, Train MSE=16.8149, PC=-0.0383 | Val MSE=23.1419, PC=0.0220
Early stopping

[trial 23] build loaders...
Epoch 01: Train Huber=3.3100, Train MSE=20.9173, PC=-0.0641 | Val MSE=17.1429, PC=0.0000
Epoch 02: Train Huber=3.0616, Train MSE=17.0661, PC=-0.0662 | Val MSE=16.5176, PC=-0.0976
Epoch 03: Train Huber=3.0382, Train MSE=17.5374, PC=0.0509 | Val MSE=16.4475, PC=0.1288
Epoch 04: Train Huber=3.0290, Train MSE=17.0645, PC=-0.0427 | Val MSE=15.6718, PC=-0.0123
Epoch 05: Train Huber=3.0073, Train MSE=16.4654, PC=-0.1159 | Val MSE=18.3377, PC=0.0520
Epoch 06: Train Huber=3.1372, Train MSE=17.5075, PC=0.0016 | Val MSE=14.1308, PC=-0.0388
Epoch 07: Train Huber=3.0462, Train MSE=17.3017, PC=0.0038 | Val MSE=13.7230, PC=-0.0019
Epoch 08: Train Huber=3.0989, Train MSE=17.6526, PC=-0.1025 | Val MSE=16.2333, PC=-0.0434
Epoch 09: Train Huber=3.0963, Train MSE=16.9628, PC=-0.1200 | Val MSE=16.1651, PC=0.0645
Epoch 10: Train Huber=3.0403, Train MSE=17.2578, PC=-0.0671 | Va

[I 2025-08-17 13:53:16,674] Trial 23 finished with value: 13.722952842712402 and parameters: {'hidden_dim': 257, 'nhead': 4, 'num_layers': 3, 'lr': 0.0015481431970918748, 'weight_decay': 2.2181098757200457e-06}. Best is trial 14 with value: 13.354726791381836.


Epoch 17: Train Huber=3.0289, Train MSE=16.6366, PC=-0.1063 | Val MSE=16.9759, PC=0.0350
Early stopping

[trial 24] build loaders...
Epoch 01: Train Huber=2.9379, Train MSE=18.8401, PC=0.1953 | Val MSE=21.2901, PC=-0.0089
Epoch 02: Train Huber=2.4887, Train MSE=14.6057, PC=0.3613 | Val MSE=23.7954, PC=-0.0522
Epoch 03: Train Huber=2.4332, Train MSE=14.0633, PC=0.4051 | Val MSE=24.1750, PC=0.0695
Epoch 04: Train Huber=2.5224, Train MSE=15.2405, PC=0.3734 | Val MSE=26.0035, PC=0.0789
Epoch 05: Train Huber=2.4797, Train MSE=14.2661, PC=0.3741 | Val MSE=20.6349, PC=0.0588
Epoch 06: Train Huber=2.4811, Train MSE=14.4906, PC=0.3598 | Val MSE=24.0949, PC=-0.0276
Epoch 07: Train Huber=2.4454, Train MSE=14.6682, PC=0.3700 | Val MSE=16.7236, PC=0.1346
Epoch 08: Train Huber=2.5535, Train MSE=15.0592, PC=0.3394 | Val MSE=21.8757, PC=-0.0526
Epoch 09: Train Huber=2.6646, Train MSE=15.4598, PC=0.2630 | Val MSE=20.9127, PC=-0.0521
Epoch 10: Train Huber=2.8258, Train MSE=16.7407, PC=0.1951 | Val MSE=2

[I 2025-08-17 13:54:30,916] Trial 24 finished with value: 16.723600387573242 and parameters: {'hidden_dim': 440, 'nhead': 4, 'num_layers': 2, 'lr': 0.0007220274697181623, 'weight_decay': 1.468518897463753e-05}. Best is trial 14 with value: 13.354726791381836.


Epoch 17: Train Huber=2.8115, Train MSE=15.4010, PC=0.2264 | Val MSE=32.0297, PC=0.2821
Early stopping

[trial 25] build loaders...
Epoch 01: Train Huber=3.3270, Train MSE=20.8689, PC=-0.0701 | Val MSE=21.7572, PC=-0.0684
Epoch 02: Train Huber=3.1163, Train MSE=18.0386, PC=-0.0599 | Val MSE=13.9438, PC=-0.0162
Epoch 03: Train Huber=3.0519, Train MSE=18.3148, PC=0.0297 | Val MSE=38.2155, PC=0.0555
Epoch 04: Train Huber=3.1519, Train MSE=18.3576, PC=0.0532 | Val MSE=36.8137, PC=-0.0250
Epoch 05: Train Huber=3.1886, Train MSE=18.0821, PC=-0.0985 | Val MSE=17.2732, PC=-0.1194
Epoch 06: Train Huber=3.1237, Train MSE=18.3651, PC=0.0022 | Val MSE=18.2044, PC=-0.0222
Epoch 07: Train Huber=3.0407, Train MSE=16.7036, PC=-0.0498 | Val MSE=20.4018, PC=0.0632
Epoch 08: Train Huber=3.0303, Train MSE=16.6943, PC=0.0096 | Val MSE=14.1567, PC=-0.0549
Epoch 09: Train Huber=3.2246, Train MSE=20.0975, PC=0.0507 | Val MSE=33.6850, PC=0.1464
Epoch 10: Train Huber=3.1386, Train MSE=19.2297, PC=0.0183 | Val M

[I 2025-08-17 13:55:45,495] Trial 25 finished with value: 13.943769454956055 and parameters: {'hidden_dim': 343, 'nhead': 4, 'num_layers': 3, 'lr': 0.0028254389819034813, 'weight_decay': 1.4696469262281577e-06}. Best is trial 14 with value: 13.354726791381836.


Epoch 12: Train Huber=3.0393, Train MSE=16.5492, PC=0.0449 | Val MSE=22.5788, PC=0.0742
Early stopping

[trial 26] build loaders...
Epoch 01: Train Huber=3.5154, Train MSE=23.3264, PC=-0.0696 | Val MSE=46.1546, PC=0.0760
Epoch 02: Train Huber=3.7465, Train MSE=26.4754, PC=-0.0279 | Val MSE=16.8993, PC=0.1212
Epoch 03: Train Huber=3.3830, Train MSE=21.6449, PC=0.0055 | Val MSE=45.9202, PC=-0.0379
Epoch 04: Train Huber=3.4689, Train MSE=23.1899, PC=-0.0298 | Val MSE=50.8104, PC=0.1825
Epoch 05: Train Huber=3.4904, Train MSE=23.6780, PC=0.0082 | Val MSE=15.7205, PC=0.1190
Epoch 06: Train Huber=3.2102, Train MSE=19.4411, PC=0.0351 | Val MSE=14.1305, PC=-0.0856
Epoch 07: Train Huber=3.0130, Train MSE=16.5164, PC=-0.0127 | Val MSE=15.0590, PC=-0.0431
Epoch 08: Train Huber=3.1119, Train MSE=17.6932, PC=0.0232 | Val MSE=18.6064, PC=-0.0235
Epoch 09: Train Huber=3.0311, Train MSE=16.5280, PC=-0.0613 | Val MSE=18.8963, PC=-0.0285
Epoch 10: Train Huber=3.0241, Train MSE=16.7404, PC=0.0354 | Val M

[I 2025-08-17 13:56:54,184] Trial 26 finished with value: 14.130537986755371 and parameters: {'hidden_dim': 484, 'nhead': 4, 'num_layers': 2, 'lr': 0.005741185664628491, 'weight_decay': 4.516459445009431e-06}. Best is trial 14 with value: 13.354726791381836.


Epoch 16: Train Huber=2.9497, Train MSE=16.2609, PC=0.0747 | Val MSE=39.1188, PC=0.0567
Early stopping

[trial 27] build loaders...
Epoch 01: Train Huber=3.2948, Train MSE=21.2683, PC=0.0045 | Val MSE=14.2565, PC=0.0465
Epoch 02: Train Huber=3.0989, Train MSE=18.2424, PC=-0.0940 | Val MSE=13.7024, PC=0.0581
Epoch 03: Train Huber=3.1293, Train MSE=18.0427, PC=0.0112 | Val MSE=13.6313, PC=0.0854
Epoch 04: Train Huber=3.2687, Train MSE=18.9616, PC=-0.1123 | Val MSE=19.2277, PC=0.0048
Epoch 05: Train Huber=3.0470, Train MSE=17.5720, PC=-0.0383 | Val MSE=23.5259, PC=-0.0466
Epoch 06: Train Huber=3.0810, Train MSE=17.0363, PC=-0.0561 | Val MSE=23.3388, PC=0.0920
Epoch 07: Train Huber=3.0138, Train MSE=16.8710, PC=-0.0390 | Val MSE=17.7911, PC=0.2539
Epoch 08: Train Huber=3.0063, Train MSE=16.5269, PC=-0.1089 | Val MSE=17.7156, PC=-0.1211
Epoch 09: Train Huber=3.0272, Train MSE=16.5192, PC=-0.1346 | Val MSE=21.7491, PC=0.0959
Epoch 10: Train Huber=2.9930, Train MSE=16.1364, PC=-0.0172 | Val M

[I 2025-08-17 13:58:12,832] Trial 27 finished with value: 13.631325721740723 and parameters: {'hidden_dim': 410, 'nhead': 4, 'num_layers': 3, 'lr': 0.0011933618815879164, 'weight_decay': 2.154982034598333e-06}. Best is trial 14 with value: 13.354726791381836.


Epoch 13: Train Huber=3.0107, Train MSE=17.1503, PC=0.0214 | Val MSE=17.8903, PC=-0.0520
Early stopping

[trial 28] build loaders...
Epoch 01: Train Huber=3.4026, Train MSE=22.2245, PC=-0.0211 | Val MSE=20.5531, PC=0.0107
Epoch 02: Train Huber=3.0377, Train MSE=17.0082, PC=-0.0508 | Val MSE=28.1583, PC=0.0501
Epoch 03: Train Huber=3.0973, Train MSE=17.8484, PC=0.0472 | Val MSE=14.5627, PC=-0.0725
Epoch 04: Train Huber=3.0738, Train MSE=17.3741, PC=-0.0424 | Val MSE=15.8190, PC=0.0851
Epoch 05: Train Huber=3.0548, Train MSE=17.6489, PC=-0.0005 | Val MSE=29.5040, PC=-0.0213
Epoch 06: Train Huber=3.0797, Train MSE=18.3129, PC=0.0526 | Val MSE=17.8074, PC=-0.0428
Epoch 07: Train Huber=3.2949, Train MSE=21.1157, PC=-0.0617 | Val MSE=15.5608, PC=-0.0492
Epoch 08: Train Huber=2.9811, Train MSE=16.4921, PC=0.1156 | Val MSE=22.2835, PC=0.0225
Epoch 09: Train Huber=2.9483, Train MSE=16.5455, PC=0.0818 | Val MSE=13.9498, PC=0.0086
Epoch 10: Train Huber=2.9899, Train MSE=16.4488, PC=0.0291 | Val M

[I 2025-08-17 14:02:11,628] Trial 28 finished with value: 13.629014015197754 and parameters: {'hidden_dim': 263, 'nhead': 8, 'num_layers': 4, 'lr': 0.002851507546999549, 'weight_decay': 6.1768584114518685e-06}. Best is trial 14 with value: 13.354726791381836.


Epoch 29: Train Huber=3.0627, Train MSE=17.2630, PC=-0.1183 | Val MSE=19.3321, PC=0.0269
Early stopping

[trial 29] build loaders...
Epoch 01: Train Huber=3.0461, Train MSE=19.5714, PC=0.1678 | Val MSE=13.9921, PC=0.1163
Epoch 02: Train Huber=2.6120, Train MSE=15.3339, PC=0.3184 | Val MSE=18.7341, PC=0.0984
Epoch 03: Train Huber=2.3661, Train MSE=14.0815, PC=0.3952 | Val MSE=19.7116, PC=0.1010
Epoch 04: Train Huber=2.2605, Train MSE=13.7579, PC=0.4326 | Val MSE=21.6956, PC=0.1179
Epoch 05: Train Huber=2.4787, Train MSE=14.9964, PC=0.3603 | Val MSE=18.9520, PC=0.1985
Epoch 06: Train Huber=2.3573, Train MSE=14.3683, PC=0.3977 | Val MSE=25.6734, PC=-0.0344
Epoch 07: Train Huber=2.3485, Train MSE=13.4240, PC=0.4210 | Val MSE=16.8253, PC=0.1301
Epoch 08: Train Huber=2.2878, Train MSE=13.5647, PC=0.4217 | Val MSE=17.4668, PC=0.2113
Epoch 09: Train Huber=2.6627, Train MSE=15.9513, PC=0.2825 | Val MSE=24.4803, PC=-0.1127
Epoch 10: Train Huber=2.3887, Train MSE=13.8036, PC=0.3948 | Val MSE=17.3

[I 2025-08-17 14:02:59,671] Trial 29 finished with value: 13.992088317871094 and parameters: {'hidden_dim': 207, 'nhead': 4, 'num_layers': 2, 'lr': 0.0005114187209362978, 'weight_decay': 1.0027558934608288e-06}. Best is trial 14 with value: 13.354726791381836.


Epoch 11: Train Huber=2.2938, Train MSE=12.9832, PC=0.4552 | Val MSE=15.5455, PC=0.1387
Early stopping
Best hyper-parameters: {'hidden_dim': 380, 'nhead': 4, 'num_layers': 2, 'lr': 0.0009819347379144987, 'weight_decay': 3.491097047653223e-06}
Best Val MSE (Optuna): 13.354726791381836
Reloaded eval — Train MSE=15.5618, PC=0.3269 | Val MSE=13.3547, PC=0.1905 | ΔMSE=0.000000
Saved VAL preds → results_final\video_reg_val_ind.csv


In [9]:
import torch

blob = torch.load('models_final/video_reg_ind.pt', map_location=DEVICE)
best_params = blob["best_params"]

best_model = TransformerRegressor(
    d_model=768,
    nhead=best_params["nhead"],
    num_layers=best_params["num_layers"]).to(DEVICE)

best_model.load_state_dict(blob["state_dict"])
best_model.eval()

df = pd.read_csv('FinalDataset.csv')

train_ids_ind = set(os.listdir('Person-Independent_Split/train'))
val_ids_ind = set(os.listdir('Person-Independent_Split/val'))
train_df_ind = df[df.video_id.isin(train_ids_ind)].reset_index(drop=True)
val_df_ind   = df[df.video_id.isin(val_ids_ind)].reset_index(drop=True)

# Final evaluation metrics
g = torch.Generator().manual_seed(SEED)
train_loader = DataLoader(TemporalFeatureDataset(train_df_ind, 'videomae_features_new'), batch_size=BATCH_SIZE, shuffle=True, generator=g, 
                          num_workers=0, pin_memory=True)
val_loader = DataLoader(TemporalFeatureDataset(val_df_ind, 'videomae_features_new'),batch_size=BATCH_SIZE, shuffle=False, 
                        num_workers=0, pin_memory=True)
tr_mse, tr_pc, tr_preds, tr_tgts = evaluate(best_model, train_loader)
vl_mse, vl_pc, vl_preds, vl_tgts = evaluate(best_model, val_loader)
print(f"Best Val (Optuna): MSE={blob["val_mse"]:.4f}, PC={blob['val_pc']:.4f}")
print(f"Reloaded model eval: Val MSE={vl_mse:.4f}, PC={vl_pc:.4f}")

  blob = torch.load('models_final/video_reg_ind.pt', map_location=DEVICE)


Best Val (Optuna): MSE=13.3547, PC=0.1905
Reloaded model eval: Val MSE=13.3547, PC=0.1905


# Person-Dependent Split

In [24]:
df = pd.read_csv('FinalDataset.csv')

train_ids_dep = set(os.listdir('Person-Dependent_Split/train'))
val_ids_dep = set(os.listdir('Person-Dependent_Split/val'))

res_ind = run_split("dep", train_ids_dep, val_ids_dep, input_dim=768)

[I 2025-08-17 10:37:32,354] A new study created in memory with name: no-name-b64a9f5f-9674-4bca-88fd-399f0e21a148



=== Running split: dep ===

[trial 0] build loaders...




Epoch 01: Train Huber=3.0603, Train MSE=19.5646, PC=0.2249 | Val MSE=17.5592, PC=0.4534
Epoch 02: Train Huber=2.6461, Train MSE=15.5485, PC=0.4136 | Val MSE=18.2344, PC=0.4385
Epoch 03: Train Huber=2.4218, Train MSE=14.5399, PC=0.4530 | Val MSE=21.8907, PC=0.5580
Epoch 04: Train Huber=2.3868, Train MSE=14.1763, PC=0.4913 | Val MSE=18.4461, PC=0.4102
Epoch 05: Train Huber=2.4785, Train MSE=14.1192, PC=0.4708 | Val MSE=19.6633, PC=0.2445
Epoch 06: Train Huber=2.5270, Train MSE=15.1056, PC=0.4323 | Val MSE=16.6824, PC=0.4871
Epoch 07: Train Huber=2.3647, Train MSE=14.1475, PC=0.4836 | Val MSE=17.5346, PC=0.4226
Epoch 08: Train Huber=2.3848, Train MSE=13.9005, PC=0.4805 | Val MSE=18.4053, PC=0.3748
Epoch 09: Train Huber=2.4369, Train MSE=14.3565, PC=0.4601 | Val MSE=16.8220, PC=0.5139
Epoch 10: Train Huber=2.3026, Train MSE=13.5091, PC=0.5081 | Val MSE=16.4718, PC=0.5173
Epoch 11: Train Huber=2.2882, Train MSE=13.4011, PC=0.5208 | Val MSE=17.7211, PC=0.4849
Epoch 12: Train Huber=2.3194, Tr

[I 2025-08-17 10:41:01,620] Trial 0 finished with value: 14.763419151306152 and parameters: {'hidden_dim': 232, 'nhead': 4, 'num_layers': 3, 'lr': 0.0002051338263087451, 'weight_decay': 2.9375384576328313e-06}. Best is trial 0 with value: 14.763419151306152.


Epoch 34: Train Huber=2.4312, Train MSE=14.8752, PC=0.4434 | Val MSE=15.1751, PC=0.5919
Early stopping

[trial 1] build loaders...
Epoch 01: Train Huber=3.3753, Train MSE=22.4964, PC=0.0804 | Val MSE=17.4682, PC=0.4521
Epoch 02: Train Huber=2.7134, Train MSE=16.3182, PC=0.3937 | Val MSE=20.4041, PC=0.4901
Epoch 03: Train Huber=2.4465, Train MSE=14.3858, PC=0.4616 | Val MSE=17.7670, PC=0.4792
Epoch 04: Train Huber=2.4196, Train MSE=14.2995, PC=0.4614 | Val MSE=16.4940, PC=0.4605
Epoch 05: Train Huber=2.3974, Train MSE=14.2071, PC=0.4720 | Val MSE=15.8505, PC=0.5244
Epoch 06: Train Huber=2.3939, Train MSE=14.5130, PC=0.4671 | Val MSE=16.9554, PC=0.4731
Epoch 07: Train Huber=2.4253, Train MSE=14.6990, PC=0.4586 | Val MSE=16.4658, PC=0.5363
Epoch 08: Train Huber=2.3233, Train MSE=13.9707, PC=0.4915 | Val MSE=16.6698, PC=0.4390
Epoch 09: Train Huber=2.2640, Train MSE=13.2129, PC=0.5220 | Val MSE=15.8051, PC=0.5177
Epoch 10: Train Huber=2.2591, Train MSE=13.0928, PC=0.5257 | Val MSE=15.6982,

[I 2025-08-17 10:43:53,971] Trial 1 finished with value: 14.934013366699219 and parameters: {'hidden_dim': 90, 'nhead': 4, 'num_layers': 3, 'lr': 0.00010994335574766199, 'weight_decay': 0.0008123245085588687}. Best is trial 0 with value: 14.763419151306152.


Epoch 28: Train Huber=2.0049, Train MSE=11.4916, PC=0.6042 | Val MSE=15.9582, PC=0.5328
Early stopping

[trial 2] build loaders...
Epoch 01: Train Huber=3.2491, Train MSE=21.3413, PC=0.1730 | Val MSE=19.4101, PC=0.4014
Epoch 02: Train Huber=2.4727, Train MSE=14.8235, PC=0.4528 | Val MSE=16.7112, PC=0.5106
Epoch 03: Train Huber=2.4069, Train MSE=14.1276, PC=0.4703 | Val MSE=18.2304, PC=0.4325
Epoch 04: Train Huber=2.4089, Train MSE=14.3314, PC=0.4669 | Val MSE=16.5298, PC=0.4876
Epoch 05: Train Huber=2.3233, Train MSE=13.4339, PC=0.5100 | Val MSE=21.0677, PC=0.5710
Epoch 06: Train Huber=2.2992, Train MSE=13.4489, PC=0.5153 | Val MSE=13.6934, PC=0.5857
Epoch 07: Train Huber=2.3128, Train MSE=13.3452, PC=0.5134 | Val MSE=18.0311, PC=0.4350
Epoch 08: Train Huber=2.2140, Train MSE=13.1362, PC=0.5329 | Val MSE=15.6396, PC=0.5823
Epoch 09: Train Huber=2.1725, Train MSE=12.5384, PC=0.5523 | Val MSE=15.5253, PC=0.5839
Epoch 10: Train Huber=2.0846, Train MSE=12.1725, PC=0.5738 | Val MSE=15.5297,

[I 2025-08-17 10:44:34,560] Trial 2 finished with value: 13.69343376159668 and parameters: {'hidden_dim': 437, 'nhead': 4, 'num_layers': 1, 'lr': 0.0004059611610484307, 'weight_decay': 3.752055855124284e-05}. Best is trial 2 with value: 13.69343376159668.


Epoch 16: Train Huber=1.9406, Train MSE=11.1862, PC=0.6299 | Val MSE=13.7194, PC=0.5923
Early stopping

[trial 3] build loaders...
Epoch 01: Train Huber=2.9589, Train MSE=18.4349, PC=0.2750 | Val MSE=15.2676, PC=0.5087
Epoch 02: Train Huber=2.5344, Train MSE=15.1351, PC=0.4338 | Val MSE=16.3013, PC=0.5217
Epoch 03: Train Huber=2.3246, Train MSE=13.7652, PC=0.5012 | Val MSE=20.1225, PC=0.4826
Epoch 04: Train Huber=2.3885, Train MSE=14.0991, PC=0.4857 | Val MSE=17.3084, PC=0.4732
Epoch 05: Train Huber=2.3062, Train MSE=13.8813, PC=0.5015 | Val MSE=18.3372, PC=0.4983
Epoch 06: Train Huber=2.2913, Train MSE=13.3544, PC=0.5046 | Val MSE=13.9068, PC=0.5694
Epoch 07: Train Huber=2.2719, Train MSE=13.3491, PC=0.5314 | Val MSE=14.7076, PC=0.5746
Epoch 08: Train Huber=2.2494, Train MSE=13.0908, PC=0.5272 | Val MSE=14.8117, PC=0.5474
Epoch 09: Train Huber=2.1792, Train MSE=12.8873, PC=0.5503 | Val MSE=16.9382, PC=0.6029
Epoch 10: Train Huber=2.1435, Train MSE=13.0548, PC=0.5585 | Val MSE=17.8926,

[I 2025-08-17 10:45:16,058] Trial 3 finished with value: 13.906835556030273 and parameters: {'hidden_dim': 257, 'nhead': 8, 'num_layers': 1, 'lr': 0.0003839629299804173, 'weight_decay': 1.2562773503807034e-05}. Best is trial 2 with value: 13.69343376159668.


Epoch 16: Train Huber=1.7344, Train MSE=9.0716, PC=0.7023 | Val MSE=14.5939, PC=0.5614
Early stopping

[trial 4] build loaders...
Epoch 01: Train Huber=3.5816, Train MSE=23.6517, PC=0.0172 | Val MSE=21.8707, PC=-0.0210
Epoch 02: Train Huber=3.3334, Train MSE=20.2707, PC=-0.0866 | Val MSE=32.2241, PC=-0.1012
Epoch 03: Train Huber=3.3555, Train MSE=21.4541, PC=-0.0095 | Val MSE=21.4819, PC=-0.2251
Epoch 04: Train Huber=3.1546, Train MSE=18.1556, PC=0.0262 | Val MSE=25.4871, PC=-0.0782
Epoch 05: Train Huber=3.4088, Train MSE=20.4826, PC=-0.0599 | Val MSE=21.9032, PC=0.0000
Epoch 06: Train Huber=3.2062, Train MSE=18.8400, PC=-0.0546 | Val MSE=20.4523, PC=0.0010
Epoch 07: Train Huber=3.3587, Train MSE=20.7448, PC=-0.0032 | Val MSE=22.2152, PC=0.0000
Epoch 08: Train Huber=3.1694, Train MSE=18.5488, PC=0.0254 | Val MSE=25.3560, PC=0.0000
Epoch 09: Train Huber=3.1657, Train MSE=18.6518, PC=-0.0429 | Val MSE=23.4326, PC=-0.0000
Epoch 10: Train Huber=3.1692, Train MSE=18.5741, PC=0.0502 | Val MS

[I 2025-08-17 10:46:54,146] Trial 4 finished with value: 20.45233917236328 and parameters: {'hidden_dim': 268, 'nhead': 4, 'num_layers': 3, 'lr': 0.0015304852121831463, 'weight_decay': 1.3783237455007196e-06}. Best is trial 2 with value: 13.69343376159668.


Epoch 16: Train Huber=3.1401, Train MSE=18.0814, PC=-0.0039 | Val MSE=22.7503, PC=0.0234
Early stopping

[trial 5] build loaders...
Epoch 01: Train Huber=3.7302, Train MSE=27.5494, PC=0.0586 | Val MSE=20.8259, PC=0.3786
Epoch 02: Train Huber=3.9299, Train MSE=27.6686, PC=0.0160 | Val MSE=25.1870, PC=-0.0632
Epoch 03: Train Huber=3.4223, Train MSE=22.6848, PC=-0.0206 | Val MSE=45.5426, PC=-0.0274
Epoch 04: Train Huber=3.4746, Train MSE=22.4702, PC=-0.0588 | Val MSE=22.7333, PC=-0.0288
Epoch 05: Train Huber=3.3876, Train MSE=21.6046, PC=-0.0525 | Val MSE=20.5846, PC=-0.0301
Epoch 06: Train Huber=3.3079, Train MSE=20.1521, PC=0.0477 | Val MSE=31.9749, PC=-0.0102
Epoch 07: Train Huber=3.5069, Train MSE=23.6393, PC=-0.0403 | Val MSE=21.6230, PC=0.0000
Epoch 08: Train Huber=3.4652, Train MSE=22.7969, PC=-0.0624 | Val MSE=24.3050, PC=0.0000
Epoch 09: Train Huber=3.4640, Train MSE=22.4270, PC=-0.2289 | Val MSE=20.2760, PC=0.0000
Epoch 10: Train Huber=3.2261, Train MSE=19.2849, PC=0.0730 | Val 

[I 2025-08-17 10:49:56,549] Trial 5 finished with value: 20.270835876464844 and parameters: {'hidden_dim': 336, 'nhead': 4, 'num_layers': 4, 'lr': 0.00853618986286683, 'weight_decay': 0.0002661901888489054}. Best is trial 2 with value: 13.69343376159668.


Epoch 23: Train Huber=3.2540, Train MSE=19.9765, PC=0.0405 | Val MSE=20.4314, PC=0.0000
Early stopping

[trial 6] build loaders...
Epoch 01: Train Huber=3.0366, Train MSE=19.9418, PC=0.2520 | Val MSE=17.4077, PC=0.4492
Epoch 02: Train Huber=2.5866, Train MSE=15.4780, PC=0.4074 | Val MSE=18.2058, PC=0.5463
Epoch 03: Train Huber=2.3563, Train MSE=14.2915, PC=0.4824 | Val MSE=15.1378, PC=0.5541
Epoch 04: Train Huber=2.5474, Train MSE=15.9338, PC=0.4155 | Val MSE=19.2251, PC=0.5281
Epoch 05: Train Huber=2.3145, Train MSE=13.2780, PC=0.5128 | Val MSE=17.7538, PC=0.4937
Epoch 06: Train Huber=2.4212, Train MSE=14.2892, PC=0.4753 | Val MSE=15.5946, PC=0.5302
Epoch 07: Train Huber=2.3084, Train MSE=14.0105, PC=0.4988 | Val MSE=16.6910, PC=0.4873
Epoch 08: Train Huber=2.3378, Train MSE=13.3171, PC=0.5110 | Val MSE=13.7064, PC=0.5904
Epoch 09: Train Huber=2.2732, Train MSE=13.3665, PC=0.5141 | Val MSE=15.1296, PC=0.5730
Epoch 10: Train Huber=2.1768, Train MSE=12.7517, PC=0.5493 | Val MSE=14.8495,

[I 2025-08-17 10:52:20,447] Trial 6 finished with value: 13.296364784240723 and parameters: {'hidden_dim': 200, 'nhead': 8, 'num_layers': 2, 'lr': 0.00017541893487450815, 'weight_decay': 3.058656666978529e-05}. Best is trial 6 with value: 13.296364784240723.


Epoch 32: Train Huber=0.8646, Train MSE=3.9510, PC=0.8829 | Val MSE=16.9874, PC=0.5974
Early stopping

[trial 7] build loaders...
Epoch 01: Train Huber=3.1937, Train MSE=20.8671, PC=0.1952 | Val MSE=16.3136, PC=0.5364
Epoch 02: Train Huber=2.6710, Train MSE=16.7615, PC=0.3497 | Val MSE=14.4457, PC=0.5780
Epoch 03: Train Huber=2.3918, Train MSE=13.9986, PC=0.4779 | Val MSE=17.5478, PC=0.4721
Epoch 04: Train Huber=2.3964, Train MSE=14.3319, PC=0.4784 | Val MSE=18.6671, PC=0.4568
Epoch 05: Train Huber=2.4420, Train MSE=15.1286, PC=0.4471 | Val MSE=18.9888, PC=0.3462
Epoch 06: Train Huber=2.7639, Train MSE=16.5793, PC=0.3269 | Val MSE=19.8761, PC=0.5349
Epoch 07: Train Huber=2.5764, Train MSE=14.9664, PC=0.4148 | Val MSE=22.3091, PC=0.0885
Epoch 08: Train Huber=2.6132, Train MSE=15.7309, PC=0.3908 | Val MSE=18.1696, PC=0.4292
Epoch 09: Train Huber=2.3939, Train MSE=14.0310, PC=0.4763 | Val MSE=16.6085, PC=0.5218
Epoch 10: Train Huber=2.4301, Train MSE=14.8856, PC=0.4729 | Val MSE=19.6348, 

[I 2025-08-17 10:53:34,418] Trial 7 finished with value: 14.445747375488281 and parameters: {'hidden_dim': 79, 'nhead': 4, 'num_layers': 3, 'lr': 0.0004201672054372534, 'weight_decay': 3.632486956676606e-05}. Best is trial 6 with value: 13.296364784240723.


Epoch 12: Train Huber=2.3637, Train MSE=13.8502, PC=0.4809 | Val MSE=17.9482, PC=0.4153
Early stopping

[trial 8] build loaders...
Epoch 01: Train Huber=3.6416, Train MSE=24.8154, PC=-0.0460 | Val MSE=42.9097, PC=0.0398
Epoch 02: Train Huber=3.5660, Train MSE=22.9497, PC=0.0995 | Val MSE=25.4848, PC=-0.0678
Epoch 03: Train Huber=3.3737, Train MSE=21.2529, PC=-0.0115 | Val MSE=26.6536, PC=0.0552
Epoch 04: Train Huber=3.4680, Train MSE=22.2441, PC=-0.0386 | Val MSE=25.2889, PC=-0.0637
Epoch 05: Train Huber=3.2990, Train MSE=20.9621, PC=0.0157 | Val MSE=27.6779, PC=0.0092
Epoch 06: Train Huber=3.2218, Train MSE=20.2021, PC=-0.0399 | Val MSE=29.4856, PC=0.0000
Epoch 07: Train Huber=3.3160, Train MSE=20.3234, PC=-0.0387 | Val MSE=21.7306, PC=-0.0000
Epoch 08: Train Huber=3.2670, Train MSE=20.3885, PC=0.0347 | Val MSE=20.2888, PC=0.0615
Epoch 09: Train Huber=3.2068, Train MSE=18.8723, PC=-0.0291 | Val MSE=27.9724, PC=0.0000
Epoch 10: Train Huber=3.1838, Train MSE=18.6726, PC=0.0053 | Val MSE

[I 2025-08-17 10:56:33,449] Trial 8 finished with value: 20.27543830871582 and parameters: {'hidden_dim': 309, 'nhead': 8, 'num_layers': 4, 'lr': 0.007568292060167619, 'weight_decay': 0.00048359527764659497}. Best is trial 6 with value: 13.296364784240723.


Epoch 22: Train Huber=3.1676, Train MSE=18.3609, PC=-0.0546 | Val MSE=24.7973, PC=-0.0000
Early stopping

[trial 9] build loaders...
Epoch 01: Train Huber=3.2292, Train MSE=22.8786, PC=0.1075 | Val MSE=16.4633, PC=0.4741
Epoch 02: Train Huber=2.5368, Train MSE=15.2410, PC=0.4248 | Val MSE=15.8577, PC=0.5239
Epoch 03: Train Huber=2.4359, Train MSE=14.3073, PC=0.4580 | Val MSE=14.7193, PC=0.5235
Epoch 04: Train Huber=2.3784, Train MSE=14.0731, PC=0.4792 | Val MSE=15.1507, PC=0.5658
Epoch 05: Train Huber=2.2574, Train MSE=13.6328, PC=0.5101 | Val MSE=15.2840, PC=0.5059
Epoch 06: Train Huber=2.3791, Train MSE=13.8540, PC=0.4836 | Val MSE=15.2234, PC=0.5659
Epoch 07: Train Huber=2.2486, Train MSE=13.1584, PC=0.5318 | Val MSE=13.8917, PC=0.5974
Epoch 08: Train Huber=2.1501, Train MSE=12.5617, PC=0.5662 | Val MSE=14.7494, PC=0.6234
Epoch 09: Train Huber=2.1190, Train MSE=12.3774, PC=0.5692 | Val MSE=15.9524, PC=0.5191
Epoch 10: Train Huber=2.0728, Train MSE=11.6908, PC=0.5864 | Val MSE=14.199

[I 2025-08-17 10:58:01,775] Trial 9 finished with value: 11.634990692138672 and parameters: {'hidden_dim': 332, 'nhead': 4, 'num_layers': 1, 'lr': 0.00012315571723666037, 'weight_decay': 9.462175356461487e-06}. Best is trial 9 with value: 11.634990692138672.


Epoch 35: Train Huber=0.7285, Train MSE=3.0296, PC=0.9114 | Val MSE=14.1286, PC=0.6970
Early stopping

[trial 10] build loaders...
Epoch 01: Train Huber=3.6306, Train MSE=24.5395, PC=-0.0827 | Val MSE=22.2282, PC=-0.2067
Epoch 02: Train Huber=3.4179, Train MSE=22.1629, PC=-0.0275 | Val MSE=33.4494, PC=-0.2523
Epoch 03: Train Huber=3.3218, Train MSE=20.3584, PC=0.0471 | Val MSE=34.2782, PC=0.1217
Epoch 04: Train Huber=3.3579, Train MSE=20.9519, PC=-0.0535 | Val MSE=21.6174, PC=0.0120
Epoch 05: Train Huber=3.2731, Train MSE=20.3432, PC=0.0294 | Val MSE=20.5876, PC=0.0548
Epoch 06: Train Huber=3.3366, Train MSE=20.4306, PC=-0.0314 | Val MSE=21.8485, PC=-0.0419
Epoch 07: Train Huber=3.2011, Train MSE=18.8484, PC=0.0118 | Val MSE=27.3242, PC=-0.0246
Epoch 08: Train Huber=3.3637, Train MSE=20.2471, PC=-0.1141 | Val MSE=21.2888, PC=-0.2894
Epoch 09: Train Huber=3.1436, Train MSE=17.9377, PC=-0.0294 | Val MSE=21.6159, PC=-0.0165
Epoch 10: Train Huber=3.1833, Train MSE=18.5200, PC=-0.0291 | Val

[I 2025-08-17 10:59:45,492] Trial 10 finished with value: 20.27104377746582 and parameters: {'hidden_dim': 492, 'nhead': 8, 'num_layers': 2, 'lr': 0.0019036906911441944, 'weight_decay': 6.577496589979122e-06}. Best is trial 9 with value: 11.634990692138672.


Epoch 23: Train Huber=3.1671, Train MSE=18.7386, PC=0.0574 | Val MSE=26.0075, PC=0.0378
Early stopping

[trial 11] build loaders...
Epoch 01: Train Huber=2.9205, Train MSE=19.4492, PC=0.2437 | Val MSE=25.9493, PC=0.4079
Epoch 02: Train Huber=2.5648, Train MSE=15.5008, PC=0.4018 | Val MSE=16.5741, PC=0.4396
Epoch 03: Train Huber=2.3960, Train MSE=14.9505, PC=0.4564 | Val MSE=21.1240, PC=0.5222
Epoch 04: Train Huber=2.3631, Train MSE=14.0320, PC=0.4797 | Val MSE=15.9758, PC=0.5197
Epoch 05: Train Huber=2.4586, Train MSE=15.3187, PC=0.4439 | Val MSE=15.3036, PC=0.4978
Epoch 06: Train Huber=2.2665, Train MSE=13.8347, PC=0.5021 | Val MSE=16.9957, PC=0.6025
Epoch 07: Train Huber=2.3009, Train MSE=13.9611, PC=0.4962 | Val MSE=16.3907, PC=0.5033
Epoch 08: Train Huber=2.2979, Train MSE=13.8752, PC=0.5051 | Val MSE=19.5812, PC=0.5219
Epoch 09: Train Huber=2.2961, Train MSE=13.5396, PC=0.5070 | Val MSE=19.7668, PC=0.5628
Epoch 10: Train Huber=2.1666, Train MSE=12.8264, PC=0.5480 | Val MSE=14.8839

[I 2025-08-17 11:01:33,006] Trial 11 finished with value: 13.636795043945312 and parameters: {'hidden_dim': 180, 'nhead': 8, 'num_layers': 2, 'lr': 0.00011745878674220751, 'weight_decay': 8.533716477932941e-05}. Best is trial 9 with value: 11.634990692138672.


Epoch 23: Train Huber=1.4227, Train MSE=7.3380, PC=0.7678 | Val MSE=14.3326, PC=0.6152
Early stopping

[trial 12] build loaders...
Epoch 01: Train Huber=3.2128, Train MSE=20.9846, PC=0.2015 | Val MSE=21.3114, PC=0.4388
Epoch 02: Train Huber=2.5023, Train MSE=14.7806, PC=0.4246 | Val MSE=17.6935, PC=0.5036
Epoch 03: Train Huber=2.3682, Train MSE=14.1916, PC=0.4727 | Val MSE=16.1756, PC=0.5292
Epoch 04: Train Huber=2.4488, Train MSE=14.9996, PC=0.4401 | Val MSE=16.1647, PC=0.4929
Epoch 05: Train Huber=2.3433, Train MSE=13.6415, PC=0.4887 | Val MSE=16.6375, PC=0.4878
Epoch 06: Train Huber=2.2573, Train MSE=13.4329, PC=0.5121 | Val MSE=18.1006, PC=0.3968
Epoch 07: Train Huber=2.2258, Train MSE=13.1164, PC=0.5327 | Val MSE=16.3588, PC=0.5773
Epoch 08: Train Huber=2.4930, Train MSE=15.0001, PC=0.4391 | Val MSE=15.8041, PC=0.5289
Epoch 09: Train Huber=2.1809, Train MSE=12.8985, PC=0.5480 | Val MSE=19.0581, PC=0.5728
Epoch 10: Train Huber=2.1666, Train MSE=12.6172, PC=0.5539 | Val MSE=16.4606,

[I 2025-08-17 11:02:53,212] Trial 12 finished with value: 14.445280075073242 and parameters: {'hidden_dim': 375, 'nhead': 8, 'num_layers': 1, 'lr': 0.00023540312753194602, 'weight_decay': 1.0654865730022553e-05}. Best is trial 9 with value: 11.634990692138672.


Epoch 30: Train Huber=0.9202, Train MSE=4.3107, PC=0.8713 | Val MSE=19.4020, PC=0.5381
Early stopping

[trial 13] build loaders...
Epoch 01: Train Huber=3.5029, Train MSE=22.5413, PC=0.0405 | Val MSE=21.7911, PC=0.4078
Epoch 02: Train Huber=3.1736, Train MSE=18.3504, PC=0.0170 | Val MSE=28.1398, PC=0.4223
Epoch 03: Train Huber=3.3329, Train MSE=20.4189, PC=0.0522 | Val MSE=20.2769, PC=-0.2300
Epoch 04: Train Huber=3.2189, Train MSE=18.8134, PC=-0.0791 | Val MSE=20.6139, PC=0.3341
Epoch 05: Train Huber=3.1673, Train MSE=18.4056, PC=-0.0073 | Val MSE=19.1967, PC=0.2647
Epoch 06: Train Huber=2.7379, Train MSE=16.4348, PC=0.3446 | Val MSE=24.8927, PC=0.2904
Epoch 07: Train Huber=3.1155, Train MSE=20.8241, PC=0.1684 | Val MSE=24.7487, PC=0.0755
Epoch 08: Train Huber=3.1107, Train MSE=17.8204, PC=0.1040 | Val MSE=21.0458, PC=0.1398
Epoch 09: Train Huber=3.1180, Train MSE=18.2351, PC=0.0542 | Val MSE=22.6333, PC=0.2097
Epoch 10: Train Huber=3.1428, Train MSE=19.3810, PC=0.1354 | Val MSE=25.61

[I 2025-08-17 11:04:02,560] Trial 13 finished with value: 19.196727752685547 and parameters: {'hidden_dim': 171, 'nhead': 8, 'num_layers': 2, 'lr': 0.000855519010443308, 'weight_decay': 0.00010635074528464759}. Best is trial 9 with value: 11.634990692138672.


Epoch 15: Train Huber=3.1952, Train MSE=18.4873, PC=-0.0465 | Val MSE=21.5546, PC=0.4368
Early stopping

[trial 14] build loaders...
Epoch 01: Train Huber=2.8831, Train MSE=18.8493, PC=0.2680 | Val MSE=18.2630, PC=0.5049
Epoch 02: Train Huber=2.5447, Train MSE=15.3373, PC=0.4252 | Val MSE=16.2369, PC=0.4826
Epoch 03: Train Huber=2.4405, Train MSE=14.6571, PC=0.4687 | Val MSE=16.5708, PC=0.4271
Epoch 04: Train Huber=2.3690, Train MSE=13.7801, PC=0.4959 | Val MSE=15.8472, PC=0.5918
Epoch 05: Train Huber=2.3472, Train MSE=14.0705, PC=0.4790 | Val MSE=15.6814, PC=0.5365
Epoch 06: Train Huber=2.2400, Train MSE=13.5462, PC=0.5255 | Val MSE=15.2377, PC=0.5322
Epoch 07: Train Huber=2.2491, Train MSE=13.1230, PC=0.5269 | Val MSE=14.8306, PC=0.5525
Epoch 08: Train Huber=2.1610, Train MSE=12.4671, PC=0.5656 | Val MSE=15.4348, PC=0.5278
Epoch 09: Train Huber=2.0401, Train MSE=11.5246, PC=0.6008 | Val MSE=14.5536, PC=0.5870
Epoch 10: Train Huber=2.0614, Train MSE=11.8557, PC=0.5860 | Val MSE=15.875

[I 2025-08-17 11:04:53,245] Trial 14 finished with value: 14.553606986999512 and parameters: {'hidden_dim': 388, 'nhead': 4, 'num_layers': 1, 'lr': 0.00018255433393763343, 'weight_decay': 1.607230106754806e-05}. Best is trial 9 with value: 11.634990692138672.


Epoch 19: Train Huber=1.5315, Train MSE=7.7658, PC=0.7544 | Val MSE=16.8195, PC=0.5644
Early stopping

[trial 15] build loaders...
Epoch 01: Train Huber=2.9316, Train MSE=18.9959, PC=0.2717 | Val MSE=19.6157, PC=0.5741
Epoch 02: Train Huber=2.4844, Train MSE=15.0476, PC=0.4329 | Val MSE=17.2012, PC=0.4038
Epoch 03: Train Huber=2.6260, Train MSE=15.3679, PC=0.3891 | Val MSE=17.0573, PC=0.4186
Epoch 04: Train Huber=2.4728, Train MSE=14.8839, PC=0.4430 | Val MSE=18.4032, PC=0.4817
Epoch 05: Train Huber=2.6401, Train MSE=16.5339, PC=0.3772 | Val MSE=20.2402, PC=0.2843
Epoch 06: Train Huber=2.7904, Train MSE=16.5410, PC=0.3235 | Val MSE=15.2919, PC=0.5275
Epoch 07: Train Huber=2.5155, Train MSE=14.9593, PC=0.4240 | Val MSE=19.6219, PC=0.3827
Epoch 08: Train Huber=2.5924, Train MSE=15.1360, PC=0.4148 | Val MSE=15.3644, PC=0.5360
Epoch 09: Train Huber=2.6443, Train MSE=15.7956, PC=0.4048 | Val MSE=17.7742, PC=0.4679
Epoch 10: Train Huber=2.4231, Train MSE=13.9676, PC=0.4794 | Val MSE=18.9086,

[I 2025-08-17 11:06:37,610] Trial 15 finished with value: 14.382996559143066 and parameters: {'hidden_dim': 190, 'nhead': 8, 'num_layers': 2, 'lr': 0.0006514930374067479, 'weight_decay': 3.4269912905230855e-06}. Best is trial 9 with value: 11.634990692138672.


Epoch 23: Train Huber=2.3932, Train MSE=14.3336, PC=0.4785 | Val MSE=18.5163, PC=0.4293
Early stopping

[trial 16] build loaders...
Epoch 01: Train Huber=3.4526, Train MSE=22.5136, PC=-0.0405 | Val MSE=26.0503, PC=0.0471
Epoch 02: Train Huber=2.9971, Train MSE=17.5785, PC=0.2241 | Val MSE=19.5694, PC=0.5232
Epoch 03: Train Huber=2.6671, Train MSE=16.4747, PC=0.3824 | Val MSE=19.5329, PC=0.2658
Epoch 04: Train Huber=2.8508, Train MSE=18.1745, PC=0.2848 | Val MSE=19.0669, PC=0.2992
Epoch 05: Train Huber=2.8574, Train MSE=18.4032, PC=0.2953 | Val MSE=16.8673, PC=0.4919
Epoch 06: Train Huber=2.5784, Train MSE=15.3588, PC=0.4339 | Val MSE=18.6685, PC=0.4688
Epoch 07: Train Huber=2.7560, Train MSE=16.8922, PC=0.3509 | Val MSE=33.9102, PC=0.3955
Epoch 08: Train Huber=2.5463, Train MSE=15.7577, PC=0.4187 | Val MSE=15.0442, PC=0.5121
Epoch 09: Train Huber=2.5070, Train MSE=15.1274, PC=0.4228 | Val MSE=17.4509, PC=0.5085
Epoch 10: Train Huber=2.4546, Train MSE=14.1257, PC=0.4827 | Val MSE=16.769

[I 2025-08-17 11:07:24,190] Trial 16 finished with value: 15.044203758239746 and parameters: {'hidden_dim': 121, 'nhead': 8, 'num_layers': 1, 'lr': 0.002122329727228719, 'weight_decay': 0.00011462529646364284}. Best is trial 9 with value: 11.634990692138672.


Epoch 18: Train Huber=2.3107, Train MSE=13.8615, PC=0.4921 | Val MSE=16.0363, PC=0.4852
Early stopping

[trial 17] build loaders...
Epoch 01: Train Huber=3.5471, Train MSE=24.2508, PC=0.0361 | Val MSE=21.3228, PC=-0.2784
Epoch 02: Train Huber=3.6038, Train MSE=23.4597, PC=-0.0527 | Val MSE=31.6379, PC=0.4236
Epoch 03: Train Huber=3.1521, Train MSE=18.9067, PC=0.0821 | Val MSE=23.6396, PC=0.4220
Epoch 04: Train Huber=3.3900, Train MSE=20.6930, PC=-0.0987 | Val MSE=28.2133, PC=0.1896
Epoch 05: Train Huber=3.3508, Train MSE=20.8235, PC=-0.0694 | Val MSE=20.3615, PC=0.4091
Epoch 06: Train Huber=3.2785, Train MSE=19.5872, PC=-0.0389 | Val MSE=22.5069, PC=-0.2797
Epoch 07: Train Huber=3.3494, Train MSE=20.7803, PC=0.0030 | Val MSE=21.8319, PC=-0.2031
Epoch 08: Train Huber=3.2516, Train MSE=19.9950, PC=0.0283 | Val MSE=20.3727, PC=-0.4300
Epoch 09: Train Huber=3.2568, Train MSE=19.0901, PC=-0.0166 | Val MSE=22.5105, PC=0.0456
Epoch 10: Train Huber=3.2826, Train MSE=19.4265, PC=-0.0446 | Val M

[I 2025-08-17 11:08:29,195] Trial 17 finished with value: 20.36153793334961 and parameters: {'hidden_dim': 353, 'nhead': 4, 'num_layers': 2, 'lr': 0.004131731287812954, 'weight_decay': 2.4834734338553117e-05}. Best is trial 9 with value: 11.634990692138672.


Epoch 15: Train Huber=3.3262, Train MSE=21.2013, PC=-0.0111 | Val MSE=20.5058, PC=0.2121
Early stopping

[trial 18] build loaders...
Epoch 01: Train Huber=3.0507, Train MSE=20.6772, PC=0.2135 | Val MSE=16.5927, PC=0.4676
Epoch 02: Train Huber=2.5394, Train MSE=15.3256, PC=0.4106 | Val MSE=16.5058, PC=0.5040
Epoch 03: Train Huber=2.3650, Train MSE=14.0192, PC=0.4837 | Val MSE=16.1698, PC=0.5486
Epoch 04: Train Huber=2.3041, Train MSE=13.5541, PC=0.4992 | Val MSE=14.3429, PC=0.5696
Epoch 05: Train Huber=2.2530, Train MSE=13.5840, PC=0.5129 | Val MSE=14.2202, PC=0.5801
Epoch 06: Train Huber=2.2301, Train MSE=13.3107, PC=0.5244 | Val MSE=14.7705, PC=0.5472
Epoch 07: Train Huber=2.2086, Train MSE=12.9244, PC=0.5301 | Val MSE=15.3495, PC=0.5936
Epoch 08: Train Huber=2.2502, Train MSE=12.9020, PC=0.5414 | Val MSE=14.4310, PC=0.5676
Epoch 09: Train Huber=2.1146, Train MSE=12.4305, PC=0.5625 | Val MSE=14.0110, PC=0.6170
Epoch 10: Train Huber=2.0763, Train MSE=12.1059, PC=0.5758 | Val MSE=15.421

[I 2025-08-17 11:10:02,495] Trial 18 finished with value: 12.239497184753418 and parameters: {'hidden_dim': 220, 'nhead': 4, 'num_layers': 1, 'lr': 0.00015840461542924918, 'weight_decay': 6.06133022762108e-06}. Best is trial 9 with value: 11.634990692138672.


Epoch 37: Train Huber=0.5659, Train MSE=1.9638, PC=0.9428 | Val MSE=12.8961, PC=0.7031
Early stopping

[trial 19] build loaders...
Epoch 01: Train Huber=3.0104, Train MSE=18.8624, PC=0.2858 | Val MSE=16.8624, PC=0.4659
Epoch 02: Train Huber=2.5257, Train MSE=14.9619, PC=0.4336 | Val MSE=17.9919, PC=0.4978
Epoch 03: Train Huber=2.4654, Train MSE=14.3690, PC=0.4540 | Val MSE=16.1213, PC=0.5076
Epoch 04: Train Huber=2.4411, Train MSE=14.7246, PC=0.4595 | Val MSE=15.2955, PC=0.5382
Epoch 05: Train Huber=2.4046, Train MSE=14.6616, PC=0.4652 | Val MSE=15.2385, PC=0.5315
Epoch 06: Train Huber=2.3231, Train MSE=13.8995, PC=0.4878 | Val MSE=15.1209, PC=0.5896
Epoch 07: Train Huber=2.2596, Train MSE=13.2509, PC=0.5282 | Val MSE=14.1677, PC=0.5768
Epoch 08: Train Huber=2.1400, Train MSE=12.3661, PC=0.5583 | Val MSE=15.3004, PC=0.5254
Epoch 09: Train Huber=2.1011, Train MSE=12.0669, PC=0.5741 | Val MSE=12.5141, PC=0.6265
Epoch 10: Train Huber=2.2005, Train MSE=13.0393, PC=0.5348 | Val MSE=14.7182,

[I 2025-08-17 11:10:50,616] Trial 19 finished with value: 12.514138221740723 and parameters: {'hidden_dim': 432, 'nhead': 4, 'num_layers': 1, 'lr': 0.0001051909502813386, 'weight_decay': 1.1667255937334867e-06}. Best is trial 9 with value: 11.634990692138672.


Epoch 19: Train Huber=1.5337, Train MSE=8.2525, PC=0.7316 | Val MSE=14.4027, PC=0.6160
Early stopping

[trial 20] build loaders...
Epoch 01: Train Huber=3.4185, Train MSE=24.0281, PC=0.1181 | Val MSE=19.7371, PC=0.4760
Epoch 02: Train Huber=2.4768, Train MSE=15.0092, PC=0.4500 | Val MSE=16.3774, PC=0.5455
Epoch 03: Train Huber=2.3413, Train MSE=13.4889, PC=0.4963 | Val MSE=21.6152, PC=0.5063
Epoch 04: Train Huber=2.3683, Train MSE=13.6377, PC=0.4951 | Val MSE=17.0916, PC=0.4203
Epoch 05: Train Huber=2.2427, Train MSE=13.3175, PC=0.5106 | Val MSE=15.4316, PC=0.5617
Epoch 06: Train Huber=2.2305, Train MSE=13.0592, PC=0.5329 | Val MSE=15.9730, PC=0.5993
Epoch 07: Train Huber=2.2236, Train MSE=12.7966, PC=0.5419 | Val MSE=17.5736, PC=0.4222
Epoch 08: Train Huber=2.1181, Train MSE=12.1725, PC=0.5746 | Val MSE=15.6326, PC=0.5913
Epoch 09: Train Huber=2.0776, Train MSE=12.4130, PC=0.5662 | Val MSE=23.3123, PC=0.4170
Epoch 10: Train Huber=2.2528, Train MSE=13.0699, PC=0.5295 | Val MSE=15.6947,

[I 2025-08-17 11:12:33,109] Trial 20 finished with value: 14.440999031066895 and parameters: {'hidden_dim': 296, 'nhead': 4, 'num_layers': 1, 'lr': 0.0003171946683848896, 'weight_decay': 4.977730011140566e-06}. Best is trial 9 with value: 11.634990692138672.


Epoch 40: Train Huber=0.5237, Train MSE=2.2334, PC=0.9347 | Val MSE=16.7077, PC=0.5401

[trial 21] build loaders...
Epoch 01: Train Huber=3.0864, Train MSE=19.7476, PC=0.2106 | Val MSE=17.8412, PC=0.4523
Epoch 02: Train Huber=2.7105, Train MSE=16.6139, PC=0.3756 | Val MSE=17.0005, PC=0.4914
Epoch 03: Train Huber=2.4828, Train MSE=15.3952, PC=0.4278 | Val MSE=15.6576, PC=0.4850
Epoch 04: Train Huber=2.3495, Train MSE=14.1488, PC=0.4779 | Val MSE=15.0672, PC=0.5372
Epoch 05: Train Huber=2.3394, Train MSE=13.8781, PC=0.4802 | Val MSE=15.7242, PC=0.5421
Epoch 06: Train Huber=2.2559, Train MSE=13.6705, PC=0.5126 | Val MSE=15.5337, PC=0.5486
Epoch 07: Train Huber=2.1740, Train MSE=12.6784, PC=0.5434 | Val MSE=15.6216, PC=0.4981
Epoch 08: Train Huber=2.1863, Train MSE=13.0563, PC=0.5275 | Val MSE=15.9567, PC=0.5075
Epoch 09: Train Huber=2.1200, Train MSE=12.4544, PC=0.5636 | Val MSE=14.7437, PC=0.5625
Epoch 10: Train Huber=2.0393, Train MSE=11.5310, PC=0.6030 | Val MSE=14.1422, PC=0.5863
Epoc

[I 2025-08-17 11:13:35,816] Trial 21 finished with value: 12.956894874572754 and parameters: {'hidden_dim': 435, 'nhead': 4, 'num_layers': 1, 'lr': 0.00010255298588593511, 'weight_decay': 1.242853030798922e-06}. Best is trial 9 with value: 11.634990692138672.


Epoch 25: Train Huber=1.3074, Train MSE=6.5535, PC=0.7938 | Val MSE=14.8820, PC=0.5647
Early stopping

[trial 22] build loaders...
Epoch 01: Train Huber=3.1220, Train MSE=19.6394, PC=0.1726 | Val MSE=16.6799, PC=0.4746
Epoch 02: Train Huber=2.5287, Train MSE=15.2393, PC=0.4212 | Val MSE=17.0432, PC=0.5170
Epoch 03: Train Huber=2.4942, Train MSE=14.9857, PC=0.4342 | Val MSE=15.8332, PC=0.4926
Epoch 04: Train Huber=2.3618, Train MSE=13.9735, PC=0.4944 | Val MSE=15.9033, PC=0.5056
Epoch 05: Train Huber=2.3025, Train MSE=13.8999, PC=0.5013 | Val MSE=17.6119, PC=0.4049
Epoch 06: Train Huber=2.3411, Train MSE=13.9501, PC=0.4860 | Val MSE=16.0478, PC=0.5519
Epoch 07: Train Huber=2.2280, Train MSE=13.2240, PC=0.5237 | Val MSE=14.1841, PC=0.5843
Epoch 08: Train Huber=2.1613, Train MSE=12.3593, PC=0.5586 | Val MSE=15.3223, PC=0.5462
Epoch 09: Train Huber=2.1115, Train MSE=12.4358, PC=0.5608 | Val MSE=14.9380, PC=0.5579
Epoch 10: Train Huber=2.0643, Train MSE=11.9441, PC=0.5809 | Val MSE=15.9832,

[I 2025-08-17 11:14:45,774] Trial 22 finished with value: 12.461448669433594 and parameters: {'hidden_dim': 496, 'nhead': 4, 'num_layers': 1, 'lr': 0.00014214421526539866, 'weight_decay': 1.9739584183449646e-06}. Best is trial 9 with value: 11.634990692138672.


Epoch 28: Train Huber=0.9166, Train MSE=4.1307, PC=0.8764 | Val MSE=13.2697, PC=0.6484
Early stopping

[trial 23] build loaders...
Epoch 01: Train Huber=3.0089, Train MSE=18.8841, PC=0.2809 | Val MSE=20.1698, PC=0.4689
Epoch 02: Train Huber=2.4685, Train MSE=14.6421, PC=0.4510 | Val MSE=21.7529, PC=0.4988
Epoch 03: Train Huber=2.4459, Train MSE=13.9892, PC=0.4719 | Val MSE=16.4178, PC=0.4907
Epoch 04: Train Huber=2.4779, Train MSE=14.8724, PC=0.4697 | Val MSE=14.6350, PC=0.5373
Epoch 05: Train Huber=2.3483, Train MSE=14.2945, PC=0.4992 | Val MSE=18.6001, PC=0.3983
Epoch 06: Train Huber=2.2371, Train MSE=13.2746, PC=0.5285 | Val MSE=14.2581, PC=0.5724
Epoch 07: Train Huber=2.2135, Train MSE=12.8689, PC=0.5327 | Val MSE=14.8433, PC=0.5792
Epoch 08: Train Huber=2.1611, Train MSE=12.8143, PC=0.5468 | Val MSE=15.6089, PC=0.5503
Epoch 09: Train Huber=2.1683, Train MSE=12.7274, PC=0.5512 | Val MSE=14.9375, PC=0.5965
Epoch 10: Train Huber=2.0527, Train MSE=12.0736, PC=0.5753 | Val MSE=16.6292,

[I 2025-08-17 11:16:08,330] Trial 23 finished with value: 12.79366397857666 and parameters: {'hidden_dim': 496, 'nhead': 4, 'num_layers': 1, 'lr': 0.00016205757030618662, 'weight_decay': 2.8404114886555435e-06}. Best is trial 9 with value: 11.634990692138672.


Epoch 33: Train Huber=0.7483, Train MSE=3.3278, PC=0.9017 | Val MSE=14.0320, PC=0.6248
Early stopping

[trial 24] build loaders...
Epoch 01: Train Huber=3.0363, Train MSE=21.1587, PC=0.2290 | Val MSE=16.8147, PC=0.4720
Epoch 02: Train Huber=2.4296, Train MSE=14.1424, PC=0.4743 | Val MSE=16.5565, PC=0.5839
Epoch 03: Train Huber=2.4296, Train MSE=14.4993, PC=0.4610 | Val MSE=17.6311, PC=0.4699
Epoch 04: Train Huber=2.3718, Train MSE=13.9206, PC=0.4856 | Val MSE=15.7071, PC=0.5137
Epoch 05: Train Huber=2.2679, Train MSE=13.3611, PC=0.5168 | Val MSE=13.4467, PC=0.6093
Epoch 06: Train Huber=2.3164, Train MSE=13.6017, PC=0.5015 | Val MSE=15.3505, PC=0.5953
Epoch 07: Train Huber=2.1815, Train MSE=12.8316, PC=0.5461 | Val MSE=16.2957, PC=0.5427
Epoch 08: Train Huber=2.1985, Train MSE=12.6195, PC=0.5661 | Val MSE=17.5240, PC=0.4480
Epoch 09: Train Huber=2.0327, Train MSE=11.4818, PC=0.6031 | Val MSE=15.2565, PC=0.6004
Epoch 10: Train Huber=1.9302, Train MSE=10.5048, PC=0.6426 | Val MSE=24.8726,

[I 2025-08-17 11:16:46,110] Trial 24 finished with value: 13.446743965148926 and parameters: {'hidden_dim': 146, 'nhead': 4, 'num_layers': 1, 'lr': 0.0005264515322041251, 'weight_decay': 6.929716954568364e-06}. Best is trial 9 with value: 11.634990692138672.


Epoch 15: Train Huber=1.6320, Train MSE=8.6849, PC=0.7249 | Val MSE=17.4553, PC=0.6441
Early stopping

[trial 25] build loaders...
Epoch 01: Train Huber=3.0768, Train MSE=21.5846, PC=0.1694 | Val MSE=25.2963, PC=0.5103
Epoch 02: Train Huber=2.5197, Train MSE=14.9062, PC=0.4436 | Val MSE=15.8232, PC=0.5179
Epoch 03: Train Huber=2.3324, Train MSE=14.1239, PC=0.4812 | Val MSE=25.4030, PC=0.4640
Epoch 04: Train Huber=2.4966, Train MSE=15.7990, PC=0.4282 | Val MSE=16.7144, PC=0.4415
Epoch 05: Train Huber=2.2786, Train MSE=13.2358, PC=0.5281 | Val MSE=21.7341, PC=0.4161
Epoch 06: Train Huber=2.8434, Train MSE=17.2738, PC=0.3268 | Val MSE=19.4590, PC=0.4984
Epoch 07: Train Huber=2.4598, Train MSE=14.6953, PC=0.4459 | Val MSE=20.0694, PC=0.2122
Epoch 08: Train Huber=2.4919, Train MSE=14.3406, PC=0.4527 | Val MSE=17.7556, PC=0.5173
Epoch 09: Train Huber=2.3443, Train MSE=13.5924, PC=0.4970 | Val MSE=15.9099, PC=0.5524
Epoch 10: Train Huber=2.3006, Train MSE=13.6091, PC=0.5059 | Val MSE=17.7057,

[I 2025-08-17 11:17:37,946] Trial 25 finished with value: 15.823229789733887 and parameters: {'hidden_dim': 233, 'nhead': 4, 'num_layers': 2, 'lr': 0.000256416393409436, 'weight_decay': 2.2465741431510888e-06}. Best is trial 9 with value: 11.634990692138672.


Epoch 12: Train Huber=2.2157, Train MSE=13.0712, PC=0.5298 | Val MSE=17.9434, PC=0.5344
Early stopping

[trial 26] build loaders...
Epoch 01: Train Huber=3.1899, Train MSE=21.3262, PC=0.1614 | Val MSE=16.5972, PC=0.4511
Epoch 02: Train Huber=2.5196, Train MSE=15.4517, PC=0.4333 | Val MSE=17.8603, PC=0.5342
Epoch 03: Train Huber=2.4368, Train MSE=14.1584, PC=0.4603 | Val MSE=15.8840, PC=0.5092
Epoch 04: Train Huber=2.3321, Train MSE=13.9888, PC=0.4987 | Val MSE=17.1328, PC=0.5698
Epoch 05: Train Huber=2.2711, Train MSE=13.5485, PC=0.5158 | Val MSE=15.3106, PC=0.5218
Epoch 06: Train Huber=2.2595, Train MSE=13.7918, PC=0.5047 | Val MSE=17.4347, PC=0.4335
Epoch 07: Train Huber=2.2644, Train MSE=13.2291, PC=0.5224 | Val MSE=16.3764, PC=0.5322
Epoch 08: Train Huber=2.2766, Train MSE=13.3824, PC=0.5203 | Val MSE=14.7861, PC=0.5572
Epoch 09: Train Huber=2.1731, Train MSE=12.6888, PC=0.5429 | Val MSE=16.2618, PC=0.5919
Epoch 10: Train Huber=2.0674, Train MSE=11.9791, PC=0.5839 | Val MSE=17.0007

[I 2025-08-17 11:18:40,742] Trial 26 finished with value: 12.663272857666016 and parameters: {'hidden_dim': 458, 'nhead': 4, 'num_layers': 1, 'lr': 0.00014275820766386575, 'weight_decay': 5.662010884712578e-06}. Best is trial 9 with value: 11.634990692138672.


Epoch 25: Train Huber=1.1324, Train MSE=5.4114, PC=0.8364 | Val MSE=14.6076, PC=0.5887
Early stopping

[trial 27] build loaders...
Epoch 01: Train Huber=2.8087, Train MSE=18.3587, PC=0.2947 | Val MSE=18.9842, PC=0.2966
Epoch 02: Train Huber=2.6941, Train MSE=16.3365, PC=0.3502 | Val MSE=18.4711, PC=0.4411
Epoch 03: Train Huber=2.4234, Train MSE=14.3148, PC=0.4668 | Val MSE=16.6994, PC=0.4420
Epoch 04: Train Huber=2.4030, Train MSE=14.2148, PC=0.4715 | Val MSE=18.1873, PC=0.4502
Epoch 05: Train Huber=2.3158, Train MSE=13.4105, PC=0.5176 | Val MSE=15.1434, PC=0.5370
Epoch 06: Train Huber=2.3161, Train MSE=13.1244, PC=0.5159 | Val MSE=17.6307, PC=0.5846
Epoch 07: Train Huber=2.3837, Train MSE=13.8244, PC=0.4993 | Val MSE=15.1090, PC=0.5604
Epoch 08: Train Huber=2.2711, Train MSE=12.9600, PC=0.5336 | Val MSE=17.1754, PC=0.5644
Epoch 09: Train Huber=2.2830, Train MSE=13.3170, PC=0.5167 | Val MSE=15.1699, PC=0.5543
Epoch 10: Train Huber=2.2837, Train MSE=13.0391, PC=0.5281 | Val MSE=19.0687,

[I 2025-08-17 11:19:54,062] Trial 27 finished with value: 15.108980178833008 and parameters: {'hidden_dim': 397, 'nhead': 4, 'num_layers': 2, 'lr': 0.0002821486774575908, 'weight_decay': 1.9746619239000415e-06}. Best is trial 9 with value: 11.634990692138672.


Epoch 17: Train Huber=2.1644, Train MSE=13.0492, PC=0.5451 | Val MSE=19.6766, PC=0.3578
Early stopping

[trial 28] build loaders...
Epoch 01: Train Huber=3.0167, Train MSE=19.7080, PC=0.2305 | Val MSE=24.1116, PC=0.4492
Epoch 02: Train Huber=2.5503, Train MSE=14.9596, PC=0.4256 | Val MSE=16.5580, PC=0.4668
Epoch 03: Train Huber=2.4392, Train MSE=14.1511, PC=0.4715 | Val MSE=17.5622, PC=0.5271
Epoch 04: Train Huber=2.4847, Train MSE=15.2876, PC=0.4448 | Val MSE=17.7925, PC=0.5366
Epoch 05: Train Huber=2.3567, Train MSE=14.1490, PC=0.4864 | Val MSE=14.8978, PC=0.5568
Epoch 06: Train Huber=2.2135, Train MSE=13.1633, PC=0.5337 | Val MSE=15.5033, PC=0.5621
Epoch 07: Train Huber=2.1227, Train MSE=12.4955, PC=0.5640 | Val MSE=17.0904, PC=0.5906
Epoch 08: Train Huber=2.0921, Train MSE=11.9419, PC=0.5828 | Val MSE=15.4197, PC=0.5813
Epoch 09: Train Huber=2.0747, Train MSE=12.5318, PC=0.5667 | Val MSE=14.9015, PC=0.5736
Epoch 10: Train Huber=1.9529, Train MSE=10.8438, PC=0.6304 | Val MSE=15.4725

[I 2025-08-17 11:21:00,486] Trial 28 finished with value: 13.710877418518066 and parameters: {'hidden_dim': 322, 'nhead': 4, 'num_layers': 1, 'lr': 0.00014914799080664363, 'weight_decay': 9.383844544152492e-06}. Best is trial 9 with value: 11.634990692138672.


Epoch 26: Train Huber=1.1316, Train MSE=5.1059, PC=0.8447 | Val MSE=15.6164, PC=0.5404
Early stopping

[trial 29] build loaders...
Epoch 01: Train Huber=3.0568, Train MSE=20.7987, PC=0.2217 | Val MSE=17.0595, PC=0.4649
Epoch 02: Train Huber=2.5239, Train MSE=15.3189, PC=0.4401 | Val MSE=16.6978, PC=0.4831
Epoch 03: Train Huber=2.5911, Train MSE=16.2991, PC=0.3825 | Val MSE=17.6554, PC=0.4163
Epoch 04: Train Huber=2.4101, Train MSE=14.2364, PC=0.4795 | Val MSE=18.1350, PC=0.5582
Epoch 05: Train Huber=2.3859, Train MSE=14.5163, PC=0.4702 | Val MSE=15.7348, PC=0.4957
Epoch 06: Train Huber=2.4069, Train MSE=14.5794, PC=0.4708 | Val MSE=15.8341, PC=0.5695
Epoch 07: Train Huber=2.4214, Train MSE=14.7618, PC=0.4661 | Val MSE=20.3454, PC=0.5047
Epoch 08: Train Huber=2.2792, Train MSE=13.5419, PC=0.5100 | Val MSE=17.7422, PC=0.5395
Epoch 09: Train Huber=2.3559, Train MSE=14.0017, PC=0.4927 | Val MSE=16.2429, PC=0.5358
Epoch 10: Train Huber=2.2528, Train MSE=13.0708, PC=0.5483 | Val MSE=17.8376,

[I 2025-08-17 11:21:53,215] Trial 29 finished with value: 13.948163032531738 and parameters: {'hidden_dim': 241, 'nhead': 4, 'num_layers': 1, 'lr': 0.0009652973366171388, 'weight_decay': 3.846546389413561e-06}. Best is trial 9 with value: 11.634990692138672.


Epoch 21: Train Huber=1.4185, Train MSE=6.7159, PC=0.7923 | Val MSE=22.3458, PC=0.3216
Early stopping
Best hyper-parameters: {'hidden_dim': 332, 'nhead': 4, 'num_layers': 1, 'lr': 0.00012315571723666037, 'weight_decay': 9.462175356461487e-06}
Best Val MSE (Optuna): 11.634990692138672
Reloaded eval — Train MSE=4.7953, PC=0.8565 | Val MSE=11.6350, PC=0.6951 | ΔMSE=0.000000
Saved VAL preds → results_final\video_reg_val_dep.csv


In [49]:
import torch

blob = torch.load('models_final/video_reg_dep.pt', map_location=DEVICE)
best_params = blob["best_params"]

best_model = TransformerRegressor(
    d_model=768,
    nhead=best_params["nhead"],
    num_layers=best_params["num_layers"]).to(DEVICE)

best_model.load_state_dict(blob["state_dict"])
best_model.eval()

df = pd.read_csv('FinalDataset.csv')

train_ids_dep = set(os.listdir('Person-Dependent_Split/train'))
val_ids_dep = set(os.listdir('Person-Dependent_Split/val'))
train_df_dep = df[df.video_id.isin(train_ids_dep)].reset_index(drop=True)
val_df_dep   = df[df.video_id.isin(val_ids_dep)].reset_index(drop=True)

# Final evaluation metrics
g = torch.Generator().manual_seed(SEED)
train_loader = DataLoader(TemporalFeatureDataset(train_df_dep, 'videomae_features_new'), batch_size=BATCH_SIZE, shuffle=True, generator=g, 
                          num_workers=0, pin_memory=True)
val_loader = DataLoader(TemporalFeatureDataset(val_df_dep, 'videomae_features_new'),batch_size=BATCH_SIZE, shuffle=False, 
                        num_workers=0, pin_memory=True)
tr_mse, tr_pc, tr_preds, tr_tgts = evaluate(best_model, train_loader)
vl_mse, vl_pc, vl_preds, vl_tgts = evaluate(best_model, val_loader)
print(f"Best Val (Optuna): MSE={blob["val_mse"]:.4f}, PC={blob['val_pc']:.4f}")
print(f"Reloaded model eval: Val MSE={vl_mse:.4f}, PC={vl_pc:.4f}")

  blob = torch.load('models_final/video_reg_dep.pt', map_location=DEVICE)


Best Val (Optuna): MSE=11.6350, PC=0.6951
Reloaded model eval: Val MSE=11.6350, PC=0.6951


# Test Predictions

In [1]:
import os
import json
import math
import copy
import numpy as np
import pandas as pd
from pathlib import Path

# Ensure reproducibility
SEED = 42
os.environ["PYTHONHASHSEED"] = str(SEED)
os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8"  # for deterministic CuBLAS paths

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

from sklearn.model_selection import StratifiedGroupKFold, StratifiedKFold
import pickle

FEATURE_DIR = "videomae_features_new"
CSV_PATH    = "FinalDataset.csv" 

MODEL_DIR   = Path("models_final")  
MODEL_DIR.mkdir(exist_ok=True)
RESULTS_DIR = Path("results_final") 
RESULTS_DIR.mkdir(exist_ok=True)

BATCH_SIZE_DEFAULT = 8
N_EPOCHS    = 40
PATIENCE    = 8
INPUT_DIM   = 768
NUM_WORKERS = 0
DEVICE      = torch.device("cuda" if torch.cuda.is_available() else "cpu")
PIN         = torch.cuda.is_available()

LABEL_COL_MAP = {
    "ind": "view_range_enc_ind",
    "dep": "view_range_enc_dep",
}

# Set the seeds
def set_seed(seed=SEED):
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark     = False

# Initialize each DataLoader worker with a different deterministic seed
def worker_init_fn(worker_id):
    s = SEED + worker_id
    np.random.seed(s)
    torch.manual_seed(s)

set_seed(SEED)

# Same as above
def safe_pearsonr(preds, targets, eps=1e-8):
    preds   = np.asarray(preds).squeeze()
    targets = np.asarray(targets).squeeze()
    cov = ((preds - preds.mean()) * (targets - targets.mean())).mean()
    return float(cov / ((preds.std() + eps) * (targets.std() + eps)))

def log_to_views(arr):
    return np.expm1(np.asarray(arr, dtype=float))

# Return set of file names without extension
def read_ids_from_dir(dir_path: str):
    assert os.path.isdir(dir_path), f"Missing directory: {dir_path}"
    return set(os.path.splitext(fn)[0] for fn in os.listdir(dir_path))

def get_dfs_for_split(split_tag: str):
    """
    Returns trainval_df and test_df for the split by reading id-lists from folders:
      Person-<Split>_Split/{train,val,test}
    Uses CSV with original encoded labels (no recomputation).
    """
    df = pd.read_csv(CSV_PATH)
    base = f"Person-{'Independent' if split_tag=='ind' else 'Dependent'}_Split"
    train_ids = read_ids_from_dir(os.path.join(base, "train"))
    val_ids   = read_ids_from_dir(os.path.join(base, "val"))
    test_ids  = read_ids_from_dir(os.path.join(base, "test"))

    trainval_ids = train_ids | val_ids
    trainval_df  = df[df.video_id.isin(trainval_ids)].reset_index(drop=True)  # train+val
    test_df      = df[df.video_id.isin(test_ids)].reset_index(drop=True)      # test

    need = {"video_id","channel_id","log_view_count","view_count",
            "view_range_enc_ind","view_range_enc_dep"}
    assert need.issubset(trainval_df.columns), f"Missing in trainval: {need - set(trainval_df.columns)}"
    assert need.issubset(test_df.columns), f"Missing in test: {need - set(test_df.columns)}"
    return trainval_df, test_df

def build_internal_val_from_labels(trainval_df: pd.DataFrame, split_tag: str,
                                   val_rel=0.15, seed=SEED):
    """
    Build ONE internal validation split from train+val using the PRECOMPUTED
    encoded labels (no new bins). PI: StratifiedGroupKFold (group=channel_id).
    PD: StratifiedKFold. We take the first fold to approximate ~val_rel.
    """
    label_col = LABEL_COL_MAP[split_tag]
    assert label_col in trainval_df.columns, f"Missing {label_col}."
    assert "channel_id" in trainval_df.columns, "Missing channel_id."

    y = pd.Series(trainval_df[label_col]).astype(str)
    if not y.str.fullmatch(r"\d+").all():
        y = pd.Series(pd.Categorical(y).codes)
    else:
        y = y.astype(int)

    n_splits = max(2, int(round(1.0 / val_rel)))  # e.g., 0.15 -> ~7
    idx = np.arange(len(trainval_df))

    if split_tag == "ind":
        groups = trainval_df["channel_id"].values
        cv = StratifiedGroupKFold(n_splits=n_splits, shuffle=True, random_state=seed)   # no channel overlap for ind
        tr_idx, va_idx = next(cv.split(idx, y, groups=groups)) 
    else:
        cv = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=seed)    # channel overlap
        tr_idx, va_idx = next(cv.split(idx.reshape(-1,1), y))

    return tr_idx, va_idx

def load_best_reg_bundle(split_tag: str):
    """
    Loads:
      - state_dict (if you saved one) from models*/video_reg_{split}.pt OR video_trans_reg_{split}.pt
      - best_params from accompanying pkl (optional)
    """
    pt_path = MODEL_DIR / f"video_reg_{split_tag}.pt"

    state_obj = torch.load(pt_path, map_location=DEVICE)
    if isinstance(state_obj, dict) and "state_dict" in state_obj:
        state_dict = state_obj["state_dict"]
        best_params_from_pt = state_obj.get("best_params", None)
    else:
        state_dict = state_obj
        best_params_from_pt = None

    pkl_path = MODEL_DIR / f"video_reg_{split_tag}.pkl"
    
    best_params = None
    if pkl_path.exists():
        with open(pkl_path, "rb") as f:
            bundle = pickle.load(f)
    best_params = bundle.get("best_params") or bundle.get("params") or None

    if best_params is None:
        best_params = best_params_from_pt

    return state_dict, best_params

# ======================= Runner (train+val -> test) ===========
def train_on_trainval_then_test_reg(split_tag: str):
    """
    Regression pipeline:
    - Load best params/state (state used only as init; params drive the model)
    - Create one internal val from train+val via PRECOMPUTED labels (no new bins)
    - Train with Huber, early stop on Val MSE (log space)
    - Evaluate on TEST: MSE(log), Pearson r, and MSE(views)
    - Save predictions CSV (video_id, y_true, y_pred) + save final train+val-tuned model
    """
    set_seed(SEED)
    print(f"\n===== [{split_tag}] REG: Train+Val -> Test (labels from CSV) =====")

    # Dataframes
    trainval_df, test_df = get_dfs_for_split(split_tag)
    print(f"train+val: {len(trainval_df)} | test: {len(test_df)}")

    # Load best bundle (state dict + hyperparams)
    state_dict, best_params = load_best_reg_bundle(split_tag)
    batch_size = int(best_params.get("batch_size", BATCH_SIZE_DEFAULT))

    # Build internal split for early stopping
    tr_idx, va_idx = build_internal_val_from_labels(trainval_df, split_tag=split_tag, val_rel=0.15, seed=SEED)
    tr_in = trainval_df.iloc[tr_idx].reset_index(drop=True)
    va_in = trainval_df.iloc[va_idx].reset_index(drop=True)
    print(f"internal train: {len(tr_in)} | internal val: {len(va_in)}")

    # Model
    model = TransformerRegressor(
        d_model=int(best_params["d_model"]),
        nhead=int(best_params["nhead"]),
        num_layers=int(best_params["num_layers"])
    ).to(DEVICE)

    # Loaders
    g = torch.Generator().manual_seed(SEED)
    tr_loader = DataLoader(
        TemporalFeatureDataset(tr_in, feature_dir=FEATURE_DIR, target_col="log_view_count"),
        batch_size=batch_size, shuffle=True, generator=g,
        num_workers=NUM_WORKERS, pin_memory=PIN,
        worker_init_fn=worker_init_fn if NUM_WORKERS>0 else None
    )
    va_loader = DataLoader(
        TemporalFeatureDataset(va_in, feature_dir=FEATURE_DIR, target_col="log_view_count"),
        batch_size=batch_size, shuffle=False,
        num_workers=NUM_WORKERS, pin_memory=PIN,
        worker_init_fn=worker_init_fn if NUM_WORKERS>0 else None
    )
    te_loader = DataLoader(
        TemporalFeatureDataset(test_df, feature_dir=FEATURE_DIR, target_col="log_view_count"),
        batch_size=batch_size, shuffle=False,
        num_workers=NUM_WORKERS, pin_memory=PIN,
        worker_init_fn=worker_init_fn if NUM_WORKERS>0 else None
    )

    # Train with early stopping on val MSE (log)
    criterion = nn.HuberLoss(delta=1.0)   # Huber loss
    optimizer = optim.Adam(model.parameters(), lr=float(best_params["lr"]), weight_decay=float(best_params["weight_decay"])) # Get the best params

    best_val, best_state, best_pc, best_epoch, bad = float("inf"), None, None, None, 0
    for ep in range(1, N_EPOCHS + 1):
        tr_hub, tr_mse, tr_pc = train_one_epoch(model, tr_loader, criterion, optimizer)  # Train the model
        va_mse, va_pc, _, _   = evaluate(model, va_loader)   # Evaluate
        print(f"Epoch {ep:02d} | Train Huber={tr_hub:.4f} MSE={tr_mse:.4f} PC={tr_pc:.4f} || "
              f"Val MSE={va_mse:.4f} PC={va_pc:.4f}")
        if va_mse < best_val:
            best_val, best_pc = float(va_mse), float(va_pc)
            best_state = copy.deepcopy(model.state_dict())
            best_epoch = ep
            bad = 0
        else:
            bad += 1
            if bad >= PATIENCE:
                print("Early stopping.")
                break

    if best_state is not None:
        model.load_state_dict(best_state)
    print(f"[{split_tag}] Best epoch: {best_epoch} | Best Val MSE={best_val:.4f} | Val PC={best_pc:.4f}")

    # Test evaluation
    te_mse, te_pc, te_preds, te_tgts = evaluate(model, te_loader)   # Evaluate on test
    te_views_mse = float(np.mean((log_to_views(te_preds) - log_to_views(te_tgts))**2))  # Get the raw views

    print(f"[{split_tag}] TEST — MSE(log)={te_mse:.4f} | PC={te_pc:.4f} | MSE(views)={te_views_mse:.2f}")

    # Save predictions CSV (slim)
    out = test_df[["video_id"]].copy()
    out["y_true"] = test_df["log_view_count"].to_numpy()
    out["y_pred"] = np.asarray(te_preds).reshape(-1)
    save_csv = RESULTS_DIR / f"video_reg_test_{split_tag}.csv"
    out.to_csv(save_csv, index=False)
    print(f"[{split_tag}] Saved test predictions to: {save_csv}")

    # Save the train+val-tuned model bundle
    final_bundle = {
        "state_dict": model.state_dict(),
        "best_params": best_params,
        "internal_val_mse": float(best_val),
        "internal_val_pc":  float(best_pc),
        "seed": SEED,
    }
    final_path = MODEL_DIR / f"video_reg_final_{split_tag}.pt"
    torch.save(final_bundle, final_path)
    print(f"[{split_tag}] Saved final model bundle to: {final_path}")


    return {
        "model": model,
        "test_mse_log": te_mse,
        "test_pc": te_pc,
        "test_mse_views": te_views_mse,
        "preds_path": save_csv,
        "best_epoch": best_epoch
    }

# ======================= Run both splits =====================
if __name__ == "__main__":
    res_ind = train_on_trainval_then_test_reg("ind")
    res_dep = train_on_trainval_then_test_reg("dep")


===== [ind] REG: Train+Val -> Test (labels from CSV) =====
train+val: 350 | test: 150
internal train: 295 | internal val: 55


  state_obj = torch.load(pt_path, map_location=DEVICE)


[ind] Loaded init weights (strict=False).
Epoch 01 | Train Huber=3.1840 MSE=19.0002 PC=0.0163 || Val MSE=14.4978 PC=0.5847
Epoch 02 | Train Huber=2.7116 MSE=15.4706 PC=0.2398 || Val MSE=9.5465 PC=0.6348
Epoch 03 | Train Huber=2.3861 MSE=14.3495 PC=0.3510 || Val MSE=9.4672 PC=0.6280
Epoch 04 | Train Huber=2.3966 MSE=14.5108 PC=0.3724 || Val MSE=13.2801 PC=0.5669
Epoch 05 | Train Huber=2.3484 MSE=14.3251 PC=0.3838 || Val MSE=9.2055 PC=0.6272
Epoch 06 | Train Huber=2.1744 MSE=13.6041 PC=0.4307 || Val MSE=12.4418 PC=0.5989
Epoch 07 | Train Huber=2.1434 MSE=13.0432 PC=0.4721 || Val MSE=10.3460 PC=0.5998
Epoch 08 | Train Huber=2.0468 MSE=12.4753 PC=0.5089 || Val MSE=12.5429 PC=0.5426
Epoch 09 | Train Huber=2.0901 MSE=11.5850 PC=0.5304 || Val MSE=10.5114 PC=0.5850
Epoch 10 | Train Huber=2.0117 MSE=11.8975 PC=0.5578 || Val MSE=18.8625 PC=0.1444
Epoch 11 | Train Huber=1.9453 MSE=11.1193 PC=0.5646 || Val MSE=9.9445 PC=0.5935
Epoch 12 | Train Huber=2.0235 MSE=12.0965 PC=0.5234 || Val MSE=13.0835 

  state_obj = torch.load(pt_path, map_location=DEVICE)


Epoch 01 | Train Huber=4.0380 MSE=31.6314 PC=-0.0489 || Val MSE=16.7622 PC=0.4311
Epoch 02 | Train Huber=3.0549 MSE=17.9320 PC=0.3453 || Val MSE=15.5354 PC=0.3350
Epoch 03 | Train Huber=2.8352 MSE=16.2316 PC=0.4239 || Val MSE=14.5150 PC=0.4098
Epoch 04 | Train Huber=2.6522 MSE=14.9041 PC=0.4697 || Val MSE=14.4166 PC=0.4126
Epoch 05 | Train Huber=2.5648 MSE=14.7092 PC=0.4526 || Val MSE=14.0705 PC=0.4162
Epoch 06 | Train Huber=2.4978 MSE=14.0182 PC=0.4968 || Val MSE=14.2284 PC=0.3976
Epoch 07 | Train Huber=2.3367 MSE=13.1949 PC=0.5280 || Val MSE=15.4379 PC=0.3607
Epoch 08 | Train Huber=2.2855 MSE=12.8853 PC=0.5479 || Val MSE=15.5132 PC=0.3993
Epoch 09 | Train Huber=2.2530 MSE=12.8772 PC=0.5525 || Val MSE=16.2282 PC=0.3484
Epoch 10 | Train Huber=2.1904 MSE=12.4974 PC=0.5678 || Val MSE=16.9641 PC=0.3223
Epoch 11 | Train Huber=2.0198 MSE=11.3784 PC=0.6176 || Val MSE=17.1819 PC=0.3213
Epoch 12 | Train Huber=1.9836 MSE=11.4122 PC=0.6191 || Val MSE=19.9119 PC=0.3393
Epoch 13 | Train Huber=2.06