# Training & inference notebook
Credit to [Tarun Mishra](https://www.kaggle.com/tarundirector) – this code is heavily based on his [notebook](https://www.kaggle.com/code/tarundirector/sensor-pulse-viz-eda-for-bfrb-detection?scriptVersionId=243465321).

## Setup

### imports

In [1]:
import re
import os
import gc
import json 
import math
import shutil
import random
import warnings
from glob import glob
from os.path import join
from functools import partial
from datetime import datetime
from tqdm.notebook import tqdm
from collections import Counter
from operator import methodcaller
from os.path import join, realpath
from typing import Optional, Literal
from typing import Optional, Literal, Iterator
from itertools import pairwise, starmap, product

import torch
import optuna
import numpy as np
import pandas as pd
import polars as pl
from numpy import ndarray
from torch import nn, Tensor
from numpy.linalg import norm
import torch.nn.functional as F
from torch.optim import Optimizer
from pandas import DataFrame as DF
from optuna.trial import TrialState
from sklearn.metrics import f1_score
from kagglehub import competition_download
from torch.utils.data import TensorDataset
from scipy.spatial.transform import Rotation
import kaggle_evaluation.cmi_inference_server
from torch.utils.data import DataLoader as DL
from sklearn.model_selection import GroupKFold
from rich.progress import Progress, Task, track
from sklearn.model_selection import train_test_split
from numpy.lib.stride_tricks import sliding_window_view
from torch.utils.data import Dataset, DataLoader, Subset
from sklearn.model_selection import StratifiedGroupKFold
from sklearn.utils.class_weight import compute_class_weight
from torch.optim.lr_scheduler import ConstantLR, LRScheduler, _LRScheduler

### Configs

In [2]:
# Dataset
COMPETITION_HANDLE = "cmi-detect-behavior-with-sensor-data"
TARGET_NAMES = sorted([
    "Above ear - pull hair",
    "Cheek - pinch skin",
    "Eyebrow - pull hair",
    "Eyelash - pull hair",
    "Feel around in tray and pull out an object",
    "Forehead - pull hairline",
    "Forehead - scratch",
    "Neck - pinch skin",
    "Neck - scratch",
    "Text on phone",
    "Wave hello",
    "Write name in air",
    "Write name on leg",
    "Drink from bottle/cup",
    "Pinch knee/leg skin",
    "Pull air toward your face",
    "Scratch knee/leg skin",
    "Glasses on/off"
])
BFRB_GESTURES = [
    'Above ear - pull hair',
    'Forehead - pull hairline',
    'Forehead - scratch',
    'Eyebrow - pull hair',
    'Eyelash - pull hair',
    'Neck - pinch skin',
    'Neck - scratch',
    'Cheek - pinch skin'
]
BFRB_INDICES = [idx for idx, gesture in enumerate(TARGET_NAMES) if gesture in BFRB_GESTURES]
IMU_FEATS_PREFIXES = (
    "acc",
    "linear_acc",
    "rot",
    "angular",
    "euler",
    "quat_rot_mag",
    "delta_rot_mag",
)
QUATERNION_COLS = ['rot_w', 'rot_x', 'rot_y', 'rot_z']
GRAVITY_WORLD = np.array([0, 0, 9.81], "float32")
RAW_ACCELRATION_COLS = ["acc_x", "acc_y", "acc_z"]
LINEAR_ACC_COLS = ["linear_" + col for col in RAW_ACCELRATION_COLS] # Acceleration without gravity
COMPETITION_HANDLE = "cmi-detect-behavior-with-sensor-data"
CATEGORY_COLUMNS = [
    'row_id',
    'sequence_type',
    'sequence_id',
    'subject',
    'orientation',
    'behavior',
    'phase',
    'gesture',
]
META_DATA_COLUMNS = [
    'row_id',
    'sequence_type',
    'sequence_id',
    'sequence_counter',
    'subject',
    'orientation',
    'behavior',
    'phase',
    'gesture',
]
DATASET_DF_DTYPES = {
    "acc_x": "float32", "acc_y": "float32", "acc_z": "float32",
    "thm_1":"float32", "thm_2":"float32", "thm_3":"float32", "thm_4":"float32", "thm_5":"float32",
    "sequence_counter": "int32",
    **{col: "category" for col in CATEGORY_COLUMNS},
    **{f"tof_{i_1}_v{i_2}": "float32" for i_1, i_2 in product(range(1, 5), range(64))},
}
PREPROCESSED_DATASET_HANDLE = "mauroabidalcarrer/prepocessed-cmi-2025"
# The quantile of the sequences len used to pad/truncate during preprocessing
SEQUENCE_NORMED_LEN_QUANTILE = 0.95
# SAMPLING_FREQUENCY = 10 #Hz
VALIDATION_FRACTION = 0.2
EPSILON=1e-8
DELTA_ROTATION_ANGULAR_VELOCITY_COLS = ["angular_vel_x", "angular_vel_y", "angular_vel_z"]
DELTA_ROTATION_AXES_COLS = ["rotation_axis_x", "rotation_axis_y", "rotation_axis_z"]
EULER_ANGLES_COLS = ["euler_x", "euler_y", "euler_z"]
pad_trunc_mode_type = Literal["pre", "center", "post"]
SEQ_PAD_TRUNC_MODE: pad_trunc_mode_type = "center"
DEFAULT_VERSION_NOTES = "Preprocessed Child Mind Institue 2025 competition preprocessed dataset."
NB_COLS_PER_TOF_SENSOR = 64
TOF_PATCH_SIZE = 2
assert ((NB_COLS_PER_TOF_SENSOR // 2) % TOF_PATCH_SIZE) == 0, "tof side len should be dividable by TOF_PATCH_SIZE!"
TOF_AGG_FUNCTIONS = [
    "mean",
    "std",
    "median",
    "min",
    "max",
]
# Data augmentation
JITTER = 0.25
SCALING = 0.2
MIXUP = 0.3
# Training loop
NB_CROSS_VALIDATIONS = 5
TRAIN_BATCH_SIZE = 256
VALIDATION_BATCH_SIZE = 4 * TRAIN_BATCH_SIZE
PATIENCE = 8
# Optimizer
WEIGHT_DECAY = 3e-3
# Scheduler
TRAINING_EPOCHS = 25 # Including warmup epochs
WARMUP_EPOCHS = 3
WARMUP_LR_INIT = 1.822126131809773e-05
MAX_TO_MIN_LR_DIV_FACTOR = 100
LR_CYCLE_FACTOR = 0.5
CYCLE_LENGTH_FACTOR = 0.9
INIT_CYCLE_EPOCHS = 6
# MIN_LR = 3.810323058740104e-09
# MAX_LR = 1e-3
# Mock training loop
MOCK_TRAINING_EPOCHS = 20
MOCK_TRAINING_GAMMA = 1.01
CHANNELS_DIMENSION = 1
SEED = 42

### Seed everything

In [3]:
def seed_everything(seed=42):
    """Set all random seeds for reproducibility"""
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    os.environ['PYTHONHASHSEED'] = str(seed)
    os.environ['CUBLAS_WORKSPACE_CONFIG'] = ':4096:8'
    torch.use_deterministic_algorithms(True, warn_only=True)
seed_everything(seed=SEED)

### Supress performance warngings

In [4]:
warnings.filterwarnings(
    "ignore",
    message=(
        "DataFrame is highly fragmented.  This is usually the result of "
        "calling `frame.insert` many times.*"
    ),
    category=pd.errors.PerformanceWarning,
)

### device setup

In [5]:
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
device

device(type='cuda')

## Dataset

### Preprocessing

In [6]:
def get_feature_cols(df:DF) -> list[str]:
    return sorted(list(set(df.columns) - set(META_DATA_COLUMNS) - set(TARGET_NAMES)))

# Missing ToF values are already imputed by -1 which is inconvinient since we want all missing values to be NaN.    
# So we replace them by NaN and then perform imputing.
def get_fillna_val_per_feature_col(df:DF) -> dict:
    return {col: 1.0 if col == 'rot_w' else 0 for col in get_feature_cols(df)}

def imputed_features(df:DF) -> DF:
    # Missing ToF values are already imputed by -1 which is inconvinient since we want all missing values to be NaN.    
    # So we replace them by NaN and then perform imputing.  
    tof_vals_to_nan = {col: -1.0 for col in df.columns if col.startswith("tof")}
    # fillna_val_per_col = {col: 1.0 if col == 'rot_w' else 0 for col in df.columns}

    df[get_feature_cols(df)] = (
        df
        .loc[:, get_feature_cols(df)]
        # df.replace with np.nan sets dtype to floar64 so we set it back to float32
        .replace(tof_vals_to_nan, value=np.nan)
        .astype("float32")
        .groupby(df["sequence_id"], observed=True, as_index=False)
        .ffill()
        .groupby(df["sequence_id"], observed=True, as_index=False)
        .bfill()
        # In case there are only nan in the column in the sequence
        .fillna(get_fillna_val_per_feature_col(df))
    )
    return df

def standardize_tof_cols_names(df: DF) -> DF:
    renamed_cols = {}
    pattern = re.compile(r"^(tof_\d_v)(\d)$")  # match 'tof_X_vY' where Y is a single digit

    for col in df.columns:
        match = pattern.match(col)
        if match:
            prefix, version = match.groups()
            new_col = f"{prefix}0{version}"
            renamed_cols[col] = new_col

    return df.rename(columns=renamed_cols)

def norm_quat_rotations(df:DF) -> DF:
    df[QUATERNION_COLS] /= np.linalg.norm(df[QUATERNION_COLS], axis=1, keepdims=True)
    return df

def add_linear_acc_cols(df:DF) -> DF:
    # Vectorized version of https://www.kaggle.com/code/wasupandceacar/lb-0-82-5fold-single-bert-model#Dataset `remove_gravity_from_acc`
    rotations:Rotation = Rotation.from_quat(df[QUATERNION_COLS])
    gravity_sensor_frame = rotations.apply(GRAVITY_WORLD, inverse=True).astype("float32")
    df[LINEAR_ACC_COLS] = df[RAW_ACCELRATION_COLS] - gravity_sensor_frame
    return df

def add_acc_magnitude(df:DF, acc_cols:list[str], acc_mag_col_name:str) -> DF:
    return df.assign(**{acc_mag_col_name: np.linalg.norm(df.loc[:, acc_cols], axis=1)})

def add_quat_angle_mag(df:DF) -> DF:
    return df.assign(quat_rot_mag=np.arccos(df["rot_w"]) * 2)

def add_angular_velocity_features(df:DF) -> DF:
    rotations = Rotation.from_quat(df[QUATERNION_COLS])
    delta_rotations = rotations[1:] * rotations[:-1].inv()
    delta_rot_velocity = delta_rotations.as_rotvec()
    # Add extra line to avoid shape mismatch
    delta_rot_velocity = np.vstack((np.zeros((1, 3)), delta_rot_velocity))
    delta_rot_magnitude = norm(delta_rot_velocity, axis=1, keepdims=True)
    delta_rot_axes = delta_rot_velocity / (delta_rot_magnitude + EPSILON)
    df[DELTA_ROTATION_ANGULAR_VELOCITY_COLS] = delta_rot_velocity
    df[DELTA_ROTATION_AXES_COLS] = delta_rot_axes
    df["delta_rot_mag"] = delta_rot_magnitude.squeeze()

    return df

def rot_euler_angles(df:DF) -> ndarray:
    df[EULER_ANGLES_COLS] = (
        Rotation
        .from_quat(df[QUATERNION_COLS])
        .as_euler("xyz")
        .squeeze()
    )
    return df

def agg_tof_patch(tof_views:np.ndarray, f_name:str) -> ndarray:
    views_agg_func = methodcaller(f_name, tof_views, axis=(1, 2))
    return (
        views_agg_func(np)
        .reshape(tof_views.shape[0], -1)
    )

def agg_tof_cols_per_sensor(df:DF) -> DF:
    """
    ## Description:
    Computes the sensor and patch sensor wise stats.
    ## Resturns:
    The dataframe with the added stats.
    """
    for tof_idx in tqdm(range(1, 6)):
        tof_name = f"tof_{tof_idx}"
        all_tof_cols = [f"{tof_name}_v{v_idx:02d}" for v_idx in range(64)]
        tof_feats = (
            df
            .loc[:, all_tof_cols]
            .values
            .reshape(-1, 8, 8)
        )
        agg_func = partial(df[all_tof_cols].agg, axis="columns")
        mk_fe_col_name = lambda f_name: tof_name + "_" + f_name
        engineered_feats = DF({mk_fe_col_name(f_name): agg_func(f_name) for f_name in TOF_AGG_FUNCTIONS})
        stats_cols_names = list(map(mk_fe_col_name, TOF_AGG_FUNCTIONS))
        # Patch Feature engineering
        tof_views:np.ndarray = sliding_window_view(tof_feats, (TOF_PATCH_SIZE, TOF_PATCH_SIZE), (1, 2))
        patch_fe = {}
        for f_name in TOF_AGG_FUNCTIONS:
            tof_patch_stats = agg_tof_patch(tof_views, f_name)
            for patch_idx in range(tof_patch_stats.shape[1]):
                key = mk_fe_col_name(f_name) + f"_{patch_idx:02d}"
                patch_fe[key] = tof_patch_stats[:, patch_idx]
        patch_df = DF(patch_fe)
        # concat results
        df = pd.concat(
            (
                df.drop(columns=filter(df.columns.__contains__, stats_cols_names)),
                engineered_feats,
                patch_df,
            ),
            axis="columns",
        )
    return df

def add_diff_features(df:DF) -> DF:
    return pd.concat(
        (
            df,
            (
                df
                .groupby("sequence_id", as_index=False, observed=True)
                [get_feature_cols(df)]
                .diff()
                .fillna(get_fillna_val_per_feature_col(df))
                .add_suffix("_diff")
            )
        ),
        axis="columns",
    )

def one_hot_encode_targets(df:DF) -> DF:
    one_hot_target = pd.get_dummies(df["gesture"])
    df[TARGET_NAMES] = one_hot_target[TARGET_NAMES]
    return df

def length_normed_sequence_feat_arr(
        sequence: DF,
        normed_sequence_len: int,
        SEQ_PAD_TRUNC_MODE:Literal["pre", "center", "post"]
    ) -> ndarray:
    features = (
        sequence
        .loc[:, get_feature_cols(sequence)]
        .values
    )
    len_diff = abs(normed_sequence_len - len(features))
    len_diff_h = len_diff // 2 # half len diff
    len_diff_r = len_diff % 2 # len diff remainder
    if len(features) < normed_sequence_len:
        padding_dict = {
            "pre": (len_diff, 0),
            "center": (len_diff_h + len_diff_r, len_diff_h),
            "post": (0, len_diff),
        }
        padded_features = np.pad(
            features,
            (padding_dict[SEQ_PAD_TRUNC_MODE], (0, 0)),
        )
        return padded_features
    elif len(features) > normed_sequence_len:
        truncating_dict = {
            "pre": slice(len_diff),
            "center": slice(len_diff_h, -len_diff_h),
            "post": slice(0, -len_diff),
        }
        return features[len_diff // 2:-len_diff // 2]
    else:
        return features

def df_to_ndarrays(df:DF, normed_sequence_len:int, seq_pad_trunc_mode:str) -> tuple[np.ndarray, np.ndarray]:
    sequence_it = df.groupby("sequence_id", observed=True, as_index=False)
    x = np.empty(
        shape=(len(sequence_it), normed_sequence_len, len(get_feature_cols(df))),
        dtype="float32"
    )
    y = np.empty(
        shape=(len(sequence_it), len(TARGET_NAMES)),
        dtype="float32"
    )
    for sequence_idx, (_, sequence) in tqdm(enumerate(sequence_it), total=len(sequence_it)):
        normed_seq_feat_arr = length_normed_sequence_feat_arr(sequence, normed_sequence_len, seq_pad_trunc_mode)
        x[sequence_idx] = normed_seq_feat_arr
        # Take the first value as they are(or at least should be) all the same in a single sequence
        y[sequence_idx] = sequence[TARGET_NAMES].iloc[0].values

    return x, y

def get_normed_seq_len(dataset:DF) -> int:
    return int(
        dataset
        .groupby("sequence_id", observed=True)
        .size()
        .quantile(SEQUENCE_NORMED_LEN_QUANTILE)
    )

def fold_dfs_to_ndarrays(train:DF, validation:DF, dataset_normed_seq_len:int, seq_pad_trunc_mode:str) -> tuple[ndarray, ndarray, ndarray, ndarray]:
    """
    Returns:
        (train X, train Y, validation X, validation Y)
    """
    # full_dataset_normed_seq_len = get_normed_seq_len(df)
    return (
        *df_to_ndarrays(train, dataset_normed_seq_len, seq_pad_trunc_mode),
        *df_to_ndarrays(validation, dataset_normed_seq_len, seq_pad_trunc_mode),
    )

In [7]:
def preprocess_competitino_dataset() -> DF:
    csv_path = competition_download(COMPETITION_HANDLE, path="train.csv")
    return (
        pd.read_csv(csv_path, dtype=DATASET_DF_DTYPES)
        .pipe(imputed_features)
        .pipe(standardize_tof_cols_names)
        .pipe(norm_quat_rotations)
        .pipe(add_linear_acc_cols)
        .pipe(add_acc_magnitude, RAW_ACCELRATION_COLS, "acc_mag")
        .pipe(add_acc_magnitude, LINEAR_ACC_COLS, "linear_acc_mag")
        .pipe(add_quat_angle_mag)
        .pipe(add_angular_velocity_features)
        .pipe(rot_euler_angles)
        .pipe(add_quat_angle_mag)
        .pipe(one_hot_encode_targets)
        .pipe(agg_tof_cols_per_sensor)
        .pipe(add_diff_features)
    )

def save_sequence_meta_data(df:DF) -> DF:
    seq_meta_data = (
        df
        .groupby("sequence_id", as_index=False, observed=True)
        [META_DATA_COLUMNS]
        .last()
    )
    seq_meta_data.to_parquet("preprocessed_dataset/sequences_meta_data.parquet")
    np.save(
        "preprocessed_dataset/auxialiary_Y.npy",
        pd.get_dummies(seq_meta_data["orientation"]).values,
    )

def save_df_meta_data(df:DF):
    full_dataset_meta_data = {
        "mean": df[get_feature_cols(df)].mean().astype("float32").to_dict(),
        "std": df[get_feature_cols(df)].std().astype("float32").to_dict(),
        "pad_seq_len": get_normed_seq_len(df),
        "feature_cols": get_feature_cols(df),
        "n_aux_classes": df["orientation"].nunique(),
    }
    with open("preprocessed_dataset/full_dataset_meta_data.json", "w") as fp:
        json.dump(full_dataset_meta_data, fp, indent=4)
    
def create_preprocessed_dataset():
    shutil.rmtree("preprocessed_dataset", ignore_errors=True)
    os.makedirs("preprocessed_dataset")
    df = preprocess_competitino_dataset()
    full_dataset_sequence_length_norm = get_normed_seq_len(df)
    full_x, full_y = df_to_ndarrays(df, full_dataset_sequence_length_norm, SEQ_PAD_TRUNC_MODE)
    np.save(join("preprocessed_dataset", "X.npy"), full_x, allow_pickle=False)
    np.save(join("preprocessed_dataset", "Y.npy"), full_y, allow_pickle=False)
    # Save meta data
    save_sequence_meta_data(df)
    save_df_meta_data(df)

In [8]:
# create_preprocessed_dataset()

### Dataset class

In [9]:
class CMIDataset(TensorDataset):
    def __init__(self):
        x = np.load(join("preprocessed_dataset", "X.npy")).swapaxes(1, 2)
        y = np.load(join("preprocessed_dataset", "Y.npy"))
        auxiliary_y = np.load(join("preprocessed_dataset", "auxialiary_Y.npy"))
        super().__init__(
            torch.from_numpy(x).to(device),
            torch.from_numpy(y).to(device),
            torch.from_numpy(auxiliary_y).to(device),
        )

In [10]:
auxiliary_y = np.load(join("preprocessed_dataset", "auxialiary_Y.npy"))
n_aux_classes = auxiliary_y.shape[1]

#### Meta data loading

In [11]:
meta_data_path = join(
    "preprocessed_dataset",
    "full_dataset_meta_data.json"
)
with open(meta_data_path, "r") as fp:
    meta_data = json.load(fp)
# Convert target names into a ndarray to index it batchwise.
def get_sensor_indices(sensor_prefix: str) -> list[int]:
    is_sensor_feat = methodcaller("startswith", sensor_prefix)
    return [feat_idx for feat_idx, feat in enumerate(meta_data["feature_cols"]) if is_sensor_feat(feat)]

tof_idx = get_sensor_indices("tof")
thm_idx = get_sensor_indices("thm")
imu_idx = list(filter(lambda idx: idx not in tof_idx + thm_idx, range(len(meta_data["feature_cols"]))))

## Model definition

In [12]:
class MultiScaleConvs(nn.Module):
    def __init__(self, in_channels:int, kernel_sizes:list[int]):
        super().__init__()
        def mk_conv_block(k_size) -> nn.Sequential:
            return nn.Sequential(
                nn.Conv1d(in_channels, in_channels, k_size, padding=k_size // 2, groups=in_channels),
                nn.BatchNorm1d(in_channels),
                nn.ReLU(),
            )
        self.convs = nn.ModuleList(map(mk_conv_block, kernel_sizes))

    def forward(self, x:Tensor) -> Tensor:
        yes = torch.cat([conv(x) for conv in self.convs] + [x], dim=1)
        # print("stem output shape:", yes.shape)
        return yes

class ImuFeatureExtractor(nn.Module):
    def __init__(self, in_channels:int, kernel_size:int=15):
        super().__init__()

        self.lpf = nn.Conv1d(
            in_channels,
            in_channels,
            kernel_size=kernel_size,
            padding=kernel_size//2,
            groups=in_channels,
            bias=False,
        )
        nn.init.kaiming_uniform_(self.lpf.weight, a=math.sqrt(5))

    def forward(self, x:Tensor) -> Tensor:
        lpf_output = self.lpf(x)
        hpf_output = x - lpf_output
        return torch.cat((lpf_output, hpf_output, x), dim=1)  # (B, C_out, T)

class SqueezeExcitationBlock(nn.Module):
    # Copy/paste of https://www.kaggle.com/code/wasupandceacar/lb-0-82-5fold-single-bert-model#Model implementation
    def __init__(self, channels:int, reduction:int=8):
        super().__init__()
        self.fc1 = nn.Linear(channels, channels // reduction, bias=True)
        self.fc2 = nn.Linear(channels // reduction, channels, bias=True)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        # x: (B, C, L)
        se = F.adaptive_avg_pool1d(x, 1).squeeze(-1)      # -> (B, C)
        se = F.relu(self.fc1(se), inplace=True)          # -> (B, C//r)
        se = self.sigmoid(self.fc2(se)).unsqueeze(-1)    # -> (B, C, 1)
        return x * se

class ResidualBlock(nn.Module):
    def __init__(self, in_chns:int, out_chns:int, dropout_ratio:float=0.3, se_reduction:int=8, kernel_size:int=3):
        super().__init__()
        self.blocks = nn.Sequential(
            nn.Conv1d(in_chns, out_chns, kernel_size=kernel_size, padding=kernel_size // 2, bias=False),
            nn.BatchNorm1d(out_chns),
            nn.ReLU(),
            nn.Conv1d(out_chns, out_chns, kernel_size=kernel_size, padding=kernel_size // 2, bias=False),
            nn.BatchNorm1d(out_chns),
            SqueezeExcitationBlock(out_chns, se_reduction),
        )
        self.head = nn.Sequential(nn.ReLU(), nn.Dropout(dropout_ratio))
        if in_chns == out_chns:
            self.skip_connection = nn.Identity() 
        else:
            # TODO: set bias to False ?
            self.skip_connection = nn.Sequential(
                nn.Conv1d(in_chns, out_chns, 1, bias=False),
                nn.BatchNorm1d(out_chns)
            )
            self.head.insert(1, nn.MaxPool1d(2))

    def forward(self, x:Tensor) -> Tensor:
        activaition_maps = self.skip_connection(x) + self.blocks(x)
        return self.head(activaition_maps)

class MBConvBlock(nn.Module):
    # From this schema: https://media.licdn.com/dms/image/v2/D5612AQFjbDOm5uyxdw/article-inline_image-shrink_1500_2232/article-inline_image-shrink_1500_2232/0/1683677500817?e=1758153600&v=beta&t=n48_UW5TZTyDPhRFlJXSidUQQPQpuC756M0kNeKmYTY
    def __init__(self, in_chns:int, out_chns:int, se_reduction:int=8, expansion_ratio:int=4, dropout_ratio:float=0.3):
        super().__init__()
        expanded_channels = in_chns * expansion_ratio
        self.blocks = nn.Sequential(
            nn.Conv1d(in_chns, expanded_channels, kernel_size=1, bias=False),
            nn.BatchNorm1d(expanded_channels),
            nn.ReLU(),
            nn.Conv1d(
                expanded_channels,
                expanded_channels,
                kernel_size=3,
                padding=1,
                groups=expanded_channels,
                bias=False,
            ),
            nn.BatchNorm1d(expanded_channels),
            nn.ReLU(),
            SqueezeExcitationBlock(expanded_channels, se_reduction),
            nn.Conv1d(expanded_channels, out_chns, kernel_size=1, bias=False)
        )
        self.head = nn.Sequential(
            nn.BatchNorm1d(out_chns)
            # nn.ReLU(),
            # nn.Dropout(dropout_ratio),
        )
        if in_chns == out_chns:
            self.skip_connection = nn.Identity() 
        else:
            # TODO: set bias to False ?
            self.skip_connection = nn.Sequential(
                nn.Conv1d(in_chns, out_chns, 1, bias=False),
                nn.BatchNorm1d(out_chns)
            )
            self.head.add_module("max_pool", nn.MaxPool1d(2))
            
    def forward(self, x:Tensor) -> Tensor:
        activaition_maps = self.skip_connection(x) + self.blocks(x)
        return self.head(activaition_maps)

class AdditiveAttentionLayer(nn.Module):
    # Copied (and slightly modified) from https://www.kaggle.com/code/myso1987/cmi3-pyroch-baseline-model-add-aug-folds
    def __init__(self, hidden_dim):
        super().__init__()
        self.attention = nn.Linear(hidden_dim, 1, bias=True)

    def forward(self, x: Tensor) -> Tensor:
        # x shape: (batch, channels, seq_len)
        x = x.swapaxes(1, 2)
        # x shape: (batch, seq_len, hidden_dim)
        scores = torch.tanh(self.attention(x))  # (batch, seq_len, 1)
        weights = F.softmax(scores.squeeze(-1), dim=1)  # (batch, seq_len)
        context = torch.sum(x * weights.unsqueeze(-1), dim=1)  # (batch, hidden_dim)
        return context

class AlexNet(nn.Sequential):
    def __init__(self, channels:list[int], dropout_ratio:float):
        def mk_conv_block(in_channels:int, out_channels:int) -> nn.Module:
            return nn.Sequential(
                nn.Conv1d(in_channels, out_channels, 3, padding=1, bias=False),
                nn.BatchNorm1d(out_channels),
                nn.MaxPool1d(2),
                nn.Dropout(dropout_ratio),
            )
        return super().__init__(*list(starmap(mk_conv_block, pairwise(channels))))

class CMIHARModule(nn.Module):
    def __init__(
            self,
            imu_idx:list[int],
            thm_idx:list[int],
            tof_idx:list[int],
            mlp_width:int,
            n_classes:int,
            n_aux_classes:Optional[int]=None,
            dataset_x:Optional[Tensor]=None,
            tof_dropout_ratio:float=0,
            thm_dropout_ratio:float=0,
        ):
        super().__init__()
        self.imu_idx = imu_idx
        self.tof_idx = tof_idx
        self.thm_idx = thm_idx
        if dataset_x is not None:
            x_mean = dataset_x.mean(dim=(0, 2), keepdim=True)
            x_std = dataset_x.std(dim=(0, 2), keepdim=True)
            self.register_buffer("x_mean", x_mean)
            self.register_buffer("x_std", x_std)
        else:
            x_stats_size = (1, len(meta_data["feature_cols"]), 1)
            self.register_buffer("x_mean", torch.empty(x_stats_size))
            self.register_buffer("x_std", torch.empty(x_stats_size))
        self.imu_branch = nn.Sequential(
            ResidualBlock(len(imu_idx), 219),
            ResidualBlock(219, 500),
        )
        self.tof_branch = AlexNet([len(tof_idx), 82, 500], tof_dropout_ratio)
        self.thm_branch = AlexNet([len(thm_idx), 82, 500], thm_dropout_ratio)
        self.rnn = nn.GRU(500 * 3, mlp_width // 2, bidirectional=True)
        self.attention = AdditiveAttentionLayer(mlp_width)
        self.meain_head = nn.Sequential(
            # Head
            nn.LazyLinear(mlp_width, bias=False),
            nn.BatchNorm1d(mlp_width),
            nn.ReLU(),
            nn.Linear(mlp_width, mlp_width // 2, bias=False),
            nn.BatchNorm1d(mlp_width // 2),
            nn.ReLU(),
            nn.Linear(mlp_width // 2, n_classes),
        )
        if n_aux_classes is not None:
            self.aux_head = nn.Sequential(
                # Head
                nn.LazyLinear(mlp_width, bias=False),
                nn.BatchNorm1d(mlp_width),
                nn.ReLU(),
                nn.Linear(mlp_width, mlp_width // 2, bias=False),
                nn.BatchNorm1d(mlp_width // 2),
                nn.ReLU(),
                nn.Linear(mlp_width // 2, n_aux_classes),
            )

    def forward(self, x:Tensor) -> Tensor:
        assert self.x_mean is not None and self.x_std is not None, f"Nor x_mean nor x_std should be None.\nx_std: {self.x_std}\nx_mean: {self.x_mean}"
        x = (x - self.x_mean) / self.x_std
        concatenated_activation_maps = torch.cat(
            (
                self.imu_branch(x[:, self.imu_idx]),
                self.thm_branch(x[:, self.thm_idx]),
                self.tof_branch(x[:, self.tof_idx]),
            ),
            dim=CHANNELS_DIMENSION,
        )
        lstm_output, _  = self.rnn(concatenated_activation_maps.swapaxes(1, 2))
        lstm_output = lstm_output.swapaxes(1, 2) # redundant
        attended = self.attention(lstm_output)
        if hasattr(self, "aux_head"):
            return self.meain_head(attended), self.aux_head(attended)
        return self.meain_head(attended)

### Create model function

In [13]:
def mk_model(
    dataset_x:Optional[Tensor]=None,
    n_aux_classes:Optional[int]=None,
) -> nn.Module:
    return (
        CMIHARModule(
            imu_idx=imu_idx,
            thm_idx=thm_idx,
            tof_idx=tof_idx,
            mlp_width=256,
            n_classes=18,
            dataset_x=dataset_x,
            n_aux_classes=n_aux_classes,
        )
        .to(device)
    )

display(mk_model(torch.arange(12).view(2, 2, -1).float()))
print("input channels:", len(meta_data["feature_cols"]))

CMIHARModule(
  (imu_branch): Sequential(
    (0): ResidualBlock(
      (blocks): Sequential(
        (0): Conv1d(46, 219, kernel_size=(3,), stride=(1,), padding=(1,), bias=False)
        (1): BatchNorm1d(219, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU()
        (3): Conv1d(219, 219, kernel_size=(3,), stride=(1,), padding=(1,), bias=False)
        (4): BatchNorm1d(219, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (5): SqueezeExcitationBlock(
          (fc1): Linear(in_features=219, out_features=27, bias=True)
          (fc2): Linear(in_features=27, out_features=219, bias=True)
          (sigmoid): Sigmoid()
        )
      )
      (head): Sequential(
        (0): ReLU()
        (1): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
        (2): Dropout(p=0.3, inplace=False)
      )
      (skip_connection): Sequential(
        (0): Conv1d(46, 219, kernel_size=(1,), stride=(1,), bias=False)
       

input channels: 946


## Training

In [14]:
class CosineAnnealingWarmupRestarts(_LRScheduler):
    def __init__(
        self,
        optimizer: Optimizer,
        warmup_steps: int,
        max_lr: float,
        min_lr: float,
        cycle_length: int,
        cycle_mult: float = 1.0,
        gamma: float = 1.0,
        last_epoch: int = -1,
    ) -> None:
        """
        Args:
            optimizer: Wrapped optimizer.
            warmup_steps: Number of steps for linear warmup.
            max_lr: Initial maximum learning rate.
            min_lr: Minimum learning rate after decay.
            cycle_length: Initial number of steps per cosine cycle.
            cycle_mult: Multiplicative factor for increasing cycle lengths.
            gamma: Multiplicative decay factor for max_lr after each cycle.
            last_epoch: The index of last epoch. Default: -1.
        """
        self.warmup_steps = warmup_steps
        self.max_lr = max_lr
        self.min_lr = min_lr
        self.cycle_length = cycle_length
        self.cycle_mult = cycle_mult
        self.gamma = gamma

        self.current_cycle = 0
        self.cycle_step = 0
        self.lr = max_lr

        super().__init__(optimizer, last_epoch)

    def get_lr(self) -> list[float]:
        if self.last_epoch < self.warmup_steps:
            # Linear warmup
            scale = (self.last_epoch + 1) / self.warmup_steps
            return [self.min_lr + scale * (self.max_lr - self.min_lr) for _ in self.base_lrs]

        # Adjust for post-warmup step index
        t = self.cycle_step
        T = self.cycle_length

        cosine_decay = 0.5 * (1 + math.cos(math.pi * t / T))
        lr = self.min_lr + (self.max_lr - self.min_lr) * cosine_decay

        return [lr for _ in self.base_lrs]

    def step(self, epoch: Optional[int] = None) -> None:
        if self.last_epoch >= self.warmup_steps:
            self.cycle_step += 1
            if self.cycle_step >= self.cycle_length:
                self.current_cycle += 1
                self.cycle_step = 0
                self.cycle_length = max(int(self.cycle_length * self.cycle_mult), 1)
                self.max_lr *= self.gamma
        super().step(epoch)

In [15]:
def mixup_data(
    x:Tensor,
    y:Tensor,
    aux_y:Optional[Tensor],
    alpha=0.2
) -> tuple[Tensor, Tensor] | tuple[Tensor, Tensor, Tensor]:
    """
    Return mixed inputs and mixed targets (one-hot) for mixup.
    x: Tensor of shape (batch_size, features, seq_len)
    y: Tensor of shape (batch_size, num_classes)
    """
    if alpha > 0:
        lam = np.random.beta(alpha, alpha)
    else:
        lam = 1.0
    batch_size = x.size(0)
    index = torch.randperm(batch_size).to(x.device)

    mixed_x = lam * x + (1 - lam) * x[index, :]
    mixed_y = lam * y + (1 - lam) * y[index, :]
    if aux_y is not None:
        mixed_aux_y = lam * aux_y + (1 - lam) * aux_y[index, :]
        return mixed_x, mixed_y, mixed_aux_y
    else:
        return mixed_x, mixed_y

In [16]:
def train_model(
        model:nn.Module,
        train_loader:DL,
        criterion:callable,
        optimizer:torch.optim.Optimizer,
        scheduler:_LRScheduler,
    ) -> dict:
    "Train model on a single epoch"
    train_metrics = {}
    model.train()
    train_metrics["train_loss"] = 0.0
    total = 0
    for batch_x, batch_y, batch_aux_y in train_loader:
        batch_aux_y = batch_aux_y.clone()
        batch_x = batch_x.to(device).clone()
        add_noise = torch.randn_like(batch_x, device=device) * 0.04
        scale_noise = torch.rand_like(batch_x, device=device) * (1.1 - 0.9) + 0.9
        batch_x = (add_noise + batch_x) * scale_noise
        batch_x[:TRAIN_BATCH_SIZE // 2, tof_idx + thm_idx] = 0.0
        batch_y = batch_y.to(device)
        batch_x = batch_x.float()
        
        batch_x, batch_y, batch_aux_y = mixup_data(batch_x, batch_y, batch_aux_y)

        optimizer.zero_grad()
        outputs, aux_output = model(batch_x)
        loss = criterion(outputs, batch_y) + criterion(aux_output, batch_aux_y)
        loss.backward()
        optimizer.step()
        scheduler.step()

        train_metrics["train_loss"] += loss.item() * batch_x.size(0)
        total += batch_x.size(0)
    train_metrics["train_loss"] /= total

    return train_metrics

In [17]:
def evaluate_model(model:nn.Module, validation_loader:DL, criterion:callable) -> dict:
    model.eval()
    eval_metrics = {}
    eval_metrics["val_loss"] = 0.0
    total = 0
    all_true = []
    all_pred = []

    with torch.no_grad():
        for batch_x, batch_y, _ in validation_loader:
            batch_x = batch_x.to(device).clone()
            batch_y = batch_y.to(device)
            # batch_aux_y = batch_aux_y.to(device)
            batch_x[:VALIDATION_BATCH_SIZE // 2, tof_idx + thm_idx] = 0.0

            outputs, _ = model(batch_x)
            loss = criterion(outputs, batch_y)
            eval_metrics["val_loss"] += loss.item() * batch_x.size(0)
            total += batch_x.size(0)

            # Get predicted class indices
            preds = torch.argmax(outputs, dim=1).cpu().numpy()
            # Get true class indices from one-hot
            trues = torch.argmax(batch_y, dim=1).cpu().numpy()

            all_true.append(trues)
            all_pred.append(preds)

    eval_metrics["val_loss"] /= total
    all_true = np.concatenate(all_true)
    all_pred = np.concatenate(all_pred)

    # Compute competition metrics
    # Binary classification: BFRB (1) vs non-BFRB (0)
    binary_true = np.isin(all_true, BFRB_INDICES).astype(int)
    binary_pred = np.isin(all_pred, BFRB_INDICES).astype(int)
    eval_metrics["binary_f1"] = f1_score(binary_true, binary_pred)

    # Collapse non-BFRB gestures into a single class
    collapsed_true = np.where(
        np.isin(all_true, BFRB_INDICES),
        all_true,
        len(BFRB_GESTURES)  # Single non-BFRB class
    )
    collapsed_pred = np.where(
        np.isin(all_pred, BFRB_INDICES),
        all_pred,
        len(BFRB_GESTURES)  # Single non-BFRB class
    )

    # Macro F1 on collapsed classes
    eval_metrics["macro_f1"] = f1_score(collapsed_true, collapsed_pred, average='macro')
    eval_metrics["final_metric"] = (eval_metrics["binary_f1"] + eval_metrics["macro_f1"]) / 2

    return eval_metrics

In [18]:
def train_model_on_all_epochs(
        model:nn.Module,
        train_loader:DL,
        validation_loader:DL,
        criterion:callable,
        optimizer:torch.optim.Optimizer,
        scheduler:_LRScheduler,
        fold:int,
    ) -> DF:

    metrics:list[dict] = []
    # Early stopping
    best_metric = -np.inf
    best_binary_f1 = -np.inf
    best_macro_f1 = -np.inf
    epochs_no_improve = 0

    for epoch in range(1, TRAINING_EPOCHS + 1):
        train_metrics = train_model(model, train_loader, criterion, optimizer, scheduler)
        validation_metrics = evaluate_model(model, validation_loader, criterion)
        metrics.append({"fold": fold, "epoch": epoch} | train_metrics | validation_metrics)

        print(f"Epoch {epoch:02d}: Binary F1 = {validation_metrics['binary_f1']:.4f}, Macro F1 = {validation_metrics['macro_f1']:.4f}, Final Metric = {validation_metrics['final_metric']:.4f}")

        if validation_metrics["final_metric"] > best_metric:
            best_metric = validation_metrics["final_metric"]
            best_binary_f1 = validation_metrics["binary_f1"]
            best_macro_f1 = validation_metrics["macro_f1"]
            epochs_no_improve = 0
            best_model_state = model.state_dict()
            print(f"  New best metric! Saving model...")
        else:
            epochs_no_improve += 1
            if epochs_no_improve >= PATIENCE:
                print(f"Early stopping triggered at epoch {epoch}")
                model.load_state_dict(best_model_state)
                break

    torch.save(best_model_state, f"best_model_fold{fold}.pth")

    return DF.from_records(metrics).set_index(["fold", "epoch"])

In [19]:
def sgkf_from_tensor_dataset(
    dataset: TensorDataset,
    n_splits: int = 5,
    shuffle: bool = True,
) -> Iterator[tuple[Subset, Subset]]:
    # Load sequence meta data to get classes and groups parameters
    seq_meta = pd.read_parquet("preprocessed_dataset/sequences_meta_data.parquet")
    X, *_ = dataset.tensors
    sgkf = StratifiedGroupKFold(
        n_splits=n_splits,
        shuffle=shuffle,
    )

    for train_idx, val_idx in sgkf.split(X.cpu().numpy(), seq_meta["gesture"], seq_meta["subject"]):
        yield Subset(dataset, train_idx), Subset(dataset, val_idx)

In [20]:
def train_on_all_folds(lr_scheduler_kw:dict, optimizer_kw:dict) -> tuple[float, DF]:
    seed_everything(seed=SEED)

    metrics:DF = DF()
    full_dataset = CMIDataset()
    folds_it = sgkf_from_tensor_dataset(full_dataset, NB_CROSS_VALIDATIONS)

    for fold_idx, (train_dataset, validation_dataset) in enumerate(folds_it):
        seed_everything(seed=SEED + fold_idx)
        # Debugging
        print(f"\n{'='*50}")
        print("training:", fold_idx + 1)
        print(f"Fold {fold_idx + 1}/{NB_CROSS_VALIDATIONS}")
        criterion = torch.nn.CrossEntropyLoss(label_smoothing=0.1)
        train_loader = DL(train_dataset, TRAIN_BATCH_SIZE, shuffle=True, drop_last=False)
        validation_loader = DL(validation_dataset, VALIDATION_BATCH_SIZE, shuffle=False, drop_last=False)
        print("train dataset indices:", len(train_dataset.indices))
        print("validation dataset indices:", len(validation_dataset.indices))
        all_train_x = train_dataset.dataset.tensors[0][train_dataset.indices]
        model = mk_model(all_train_x, meta_data["n_aux_classes"])

        # Optimizer et scheduler
        optimizer = torch.optim.AdamW(
            model.parameters(),
            WARMUP_LR_INIT,
            weight_decay=optimizer_kw["weight_decay"],
            betas=(optimizer_kw["beta_0"], optimizer_kw["beta_1"]),
        )
        steps_per_epoch = len(train_loader)
        scheduler = CosineAnnealingWarmupRestarts(
            optimizer,
            warmup_steps=lr_scheduler_kw["warmup_epochs"] * steps_per_epoch,
            cycle_mult=lr_scheduler_kw["cycle_mult"],
            max_lr=lr_scheduler_kw["max_lr"],
            min_lr=lr_scheduler_kw["max_lr"] / lr_scheduler_kw["max_to_min_div_factor"],
            cycle_length=lr_scheduler_kw["init_cycle_epochs"] * steps_per_epoch,
            gamma=lr_scheduler_kw["lr_cycle_factor"],
        ) 
        fold_metrics = train_model_on_all_epochs(
            model,
            train_loader,
            validation_loader,
            criterion,
            optimizer,
            scheduler,
            fold_idx,
        )
        # Free memory used by datasets and data loaders
        del train_dataset
        del validation_dataset
        del train_loader
        del validation_loader
        gc.collect()
        torch.cuda.empty_cache()

        best_fold_metrics = fold_metrics.loc[fold_metrics["final_metric"].idxmax()]
        final_fold_metrics = fold_metrics.iloc[-1]
        print(f"Best validation metrics - Binary F1: {best_fold_metrics['binary_f1']:.4f}, Macro F1: {best_fold_metrics['macro_f1']:.4f}, Final: {best_fold_metrics['final_metric']:.4f}")
        print(f"Final validation metrics - Binary F1: {final_fold_metrics['binary_f1']:.4f}, Macro F1: {final_fold_metrics['macro_f1']:.4f}, Final: {final_fold_metrics['final_metric']:.4f}")

        metrics = pd.concat((metrics, fold_metrics))

    print("\n" + "="*50)
    print("Cross-Validation Results")
    print("="*50)

    # Statistiques pour les meilleures métriques
    best_metrics:DF = (
        metrics
        .loc[:, ["binary_f1", "macro_f1", "final_metric"]]
        .groupby(level=0)
        .max()
    )

    print("\nBest Fold-wise Metrics:")
    display(best_metrics)
    
    print("\nGlobal Statistics (Best Metrics):")
    print(f"Mean Best Final Metric: {best_metrics['final_metric'].mean():.4f} ± {best_metrics['final_metric'].std():.4f}")
    print(f"Mean Best Binary F1: {best_metrics['binary_f1'].mean():.4f} ± {best_metrics['binary_f1'].std():.4f}")
    print(f"Mean Best Macro F1: {best_metrics['macro_f1'].mean():.4f} ± {best_metrics['macro_f1'].std():.4f}")
    
    return best_metrics["final_metric"].mean(), metrics

In [27]:
mean_best_cv_score, metrics = train_on_all_folds(
    lr_scheduler_kw={
        "warmup_epochs": 8,
        "cycle_mult": 0.7994284370327427,
        "max_lr": 0.005581907927062619,
        "max_to_min_div_factor": 275.0,
        "init_cycle_epochs": 5,
        "lr_cycle_factor": 0.5033112105827083,
    },
    optimizer_kw={
        "weight_decay": 0.0006702308864102119,
        "beta_0": 0.9089203414971434,
        "beta_1": 0.9969898035522793,
    }
)
print("mean best CV:", mean_best_cv_score)
display(metrics)


training: 1
Fold 1/5
train dataset indices: 6623
validation dataset indices: 1528
Epoch 01: Binary F1 = 0.7809, Macro F1 = 0.2075, Final Metric = 0.4942
  New best metric! Saving model...
Epoch 02: Binary F1 = 0.9115, Macro F1 = 0.3174, Final Metric = 0.6145
  New best metric! Saving model...
Epoch 03: Binary F1 = 0.9632, Macro F1 = 0.4072, Final Metric = 0.6852
  New best metric! Saving model...
Epoch 04: Binary F1 = 0.9673, Macro F1 = 0.4301, Final Metric = 0.6987
  New best metric! Saving model...
Epoch 05: Binary F1 = 0.9177, Macro F1 = 0.4205, Final Metric = 0.6691
Epoch 06: Binary F1 = 0.9650, Macro F1 = 0.5193, Final Metric = 0.7422
  New best metric! Saving model...
Epoch 07: Binary F1 = 0.9595, Macro F1 = 0.4795, Final Metric = 0.7195
Epoch 08: Binary F1 = 0.9560, Macro F1 = 0.4625, Final Metric = 0.7092
Epoch 09: Binary F1 = 0.9774, Macro F1 = 0.5263, Final Metric = 0.7518
  New best metric! Saving model...
Epoch 10: Binary F1 = 0.9885, Macro F1 = 0.5786, Final Metric = 0.78

Unnamed: 0_level_0,binary_f1,macro_f1,final_metric
fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0.991123,0.669662,0.830139
1,0.968059,0.58992,0.77899
2,0.974815,0.605067,0.788758
3,0.945455,0.541383,0.741822
4,0.977409,0.572269,0.773951



Global Statistics (Best Metrics):
Mean Best Final Metric: 0.7827 ± 0.0318
Mean Best Binary F1: 0.9714 ± 0.0167
Mean Best Macro F1: 0.5957 ± 0.0477
mean best CV: 0.7827318390357785


Unnamed: 0_level_0,Unnamed: 1_level_0,train_loss,val_loss,binary_f1,macro_f1,final_metric
fold,epoch,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,1,3.855682,2.579842,0.780890,0.207507,0.494198
0,2,3.139727,2.067492,0.911458,0.317444,0.614451
0,3,2.791345,1.765862,0.963197,0.407174,0.685185
0,4,2.521093,1.760678,0.967300,0.430104,0.698702
0,5,2.689573,1.743890,0.917749,0.420530,0.669140
...,...,...,...,...,...,...
4,21,1.974361,1.416081,0.973223,0.566158,0.769690
4,22,1.941672,1.414669,0.972748,0.564224,0.768486
4,23,1.781919,1.401836,0.975452,0.563893,0.769672
4,24,1.891745,1.397817,0.975632,0.572269,0.773951


## Hyperparameter tuning

In [None]:
# def objective(trial: optuna.trial.Trial) -> float:
#     return train_on_all_folds(
#         lr_scheduler_kw={
#             "warmup_epochs": trial.suggest_int("warmup_epochs", 8, 12),
#             "cycle_mult": trial.suggest_float("cycle_mult", 0.5, 2),
#             "max_lr": trial.suggest_float("max_lr", 0.005581907927062619 / 3, 0.005581907927062619 * 3),
#             "max_to_min_div_factor": trial.suggest_float("max_to_min_div_factor", 100, 300, step=25),
#             "init_cycle_epochs": trial.suggest_int("init_cycle_epochs", 2, 10),
#             "lr_cycle_factor": trial.suggest_float("lr_cycle_factor", 0.3, 1),
#         },
#         optimizer_kw={
#             "weight_decay": trial.suggest_float("weight_decay", 5e-4, 1e-3),
#             "beta_0":trial.suggest_float("beta_0", 0.8, 0.999),
#             "beta_1":trial.suggest_float("beta_1", 0.99, 0.9999),
#         }
#     )[0]

def objective(trial: optuna.trial.Trial) -> float:
    return train_on_all_folds(
        lr_scheduler_kw={
            "warmup_epochs": trial.suggest_int("warmup_epochs", 8, 12),
            "cycle_mult": trial.suggest_float("cycle_mult", 0.5, 1.1),
            "max_lr": trial.suggest_float("max_lr", 0.005581907927062619 / 3, 0.005581907927062619 * 3),
            "max_to_min_div_factor": trial.suggest_float("max_to_min_div_factor", 100, 300, step=25),
            "init_cycle_epochs": trial.suggest_int("init_cycle_epochs", 2, 10),
            "lr_cycle_factor": trial.suggest_float("lr_cycle_factor", 0.3, 1),
        },
        optimizer_kw={
            "weight_decay": trial.suggest_float("weight_decay", 5e-4, 1e-3),
            "beta_0":trial.suggest_float("beta_0", 0.8, 0.999),
            "beta_1":trial.suggest_float("beta_1", 0.99, 0.9999),
        }
    )[0]

In [29]:
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=100, timeout=60 * 60 * 2)

pruned_trials = study.get_trials(deepcopy=False, states=[TrialState.PRUNED])
complete_trials = study.get_trials(deepcopy=False, states=[TrialState.COMPLETE])

print("Study statistics: ")
print("  Number of finished trials: ", len(study.trials))
print("  Number of pruned trials: ", len(pruned_trials))
print("  Number of complete trials: ", len(complete_trials))

print("Best trial:")
trial = study.best_trial

print("  Value: ", trial.value)

print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))

[I 2025-08-15 16:48:47,829] A new study created in memory with name: no-name-52a47584-3aa2-407d-8b90-cb76b38dedac



training: 1
Fold 1/5
train dataset indices: 6623
validation dataset indices: 1528
Epoch 01: Binary F1 = 0.7451, Macro F1 = 0.1777, Final Metric = 0.4614
  New best metric! Saving model...
Epoch 02: Binary F1 = 0.8355, Macro F1 = 0.2548, Final Metric = 0.5452
  New best metric! Saving model...
Epoch 03: Binary F1 = 0.9393, Macro F1 = 0.3718, Final Metric = 0.6555
  New best metric! Saving model...
Epoch 04: Binary F1 = 0.9645, Macro F1 = 0.3712, Final Metric = 0.6679
  New best metric! Saving model...
Epoch 05: Binary F1 = 0.9572, Macro F1 = 0.4226, Final Metric = 0.6899
  New best metric! Saving model...
Epoch 06: Binary F1 = 0.9663, Macro F1 = 0.4814, Final Metric = 0.7238
  New best metric! Saving model...
Epoch 07: Binary F1 = 0.9650, Macro F1 = 0.4959, Final Metric = 0.7305
  New best metric! Saving model...
Epoch 08: Binary F1 = 0.9693, Macro F1 = 0.4956, Final Metric = 0.7324
  New best metric! Saving model...
Epoch 09: Binary F1 = 0.9659, Macro F1 = 0.5014, Final Metric = 0.733

Unnamed: 0_level_0,binary_f1,macro_f1,final_metric
fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0.989594,0.663708,0.826394
1,0.968719,0.582551,0.772793
2,0.975442,0.621291,0.797838
3,0.954173,0.552133,0.751516
4,0.969035,0.57064,0.769823


[I 2025-08-15 16:52:37,240] Trial 0 finished with value: 0.7836728651738101 and parameters: {'warmup_epochs': 10, 'cycle_mult': 1.475328135004869, 'max_lr': 0.0028448434114718437, 'max_to_min_div_factor': 150.0, 'init_cycle_epochs': 8, 'lr_cycle_factor': 0.8687801038506187, 'weight_decay': 0.0006709409500262523, 'beta_0': 0.9309434756898641, 'beta_1': 0.9902424355163821}. Best is trial 0 with value: 0.7836728651738101.



Global Statistics (Best Metrics):
Mean Best Final Metric: 0.7837 ± 0.0290
Mean Best Binary F1: 0.9714 ± 0.0128
Mean Best Macro F1: 0.5981 ± 0.0446

training: 1
Fold 1/5
train dataset indices: 6623
validation dataset indices: 1528
Epoch 01: Binary F1 = 0.8110, Macro F1 = 0.1675, Final Metric = 0.4892
  New best metric! Saving model...
Epoch 02: Binary F1 = 0.9154, Macro F1 = 0.2719, Final Metric = 0.5936
  New best metric! Saving model...
Epoch 03: Binary F1 = 0.9493, Macro F1 = 0.3699, Final Metric = 0.6596
  New best metric! Saving model...
Epoch 04: Binary F1 = 0.9620, Macro F1 = 0.3998, Final Metric = 0.6809
  New best metric! Saving model...
Epoch 05: Binary F1 = 0.9612, Macro F1 = 0.4734, Final Metric = 0.7173
  New best metric! Saving model...
Epoch 06: Binary F1 = 0.9802, Macro F1 = 0.5093, Final Metric = 0.7448
  New best metric! Saving model...
Epoch 07: Binary F1 = 0.9666, Macro F1 = 0.5181, Final Metric = 0.7423
Epoch 08: Binary F1 = 0.7975, Macro F1 = 0.4400, Final Metric 

Unnamed: 0_level_0,binary_f1,macro_f1,final_metric
fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0.991701,0.65741,0.82428
1,0.966226,0.605025,0.784505
2,0.976285,0.637034,0.806358
3,0.952058,0.556307,0.753877
4,0.972998,0.572725,0.769924


[I 2025-08-15 16:56:14,039] Trial 1 finished with value: 0.7877889868546157 and parameters: {'warmup_epochs': 8, 'cycle_mult': 0.5492030865459527, 'max_lr': 0.002433535370414122, 'max_to_min_div_factor': 250.0, 'init_cycle_epochs': 7, 'lr_cycle_factor': 0.5977559552589569, 'weight_decay': 0.0006121785906188671, 'beta_0': 0.818848602815761, 'beta_1': 0.9906031399399764}. Best is trial 1 with value: 0.7877889868546157.



Global Statistics (Best Metrics):
Mean Best Final Metric: 0.7878 ± 0.0281
Mean Best Binary F1: 0.9719 ± 0.0145
Mean Best Macro F1: 0.6057 ± 0.0424

training: 1
Fold 1/5
train dataset indices: 6623
validation dataset indices: 1528
Epoch 01: Binary F1 = 0.8474, Macro F1 = 0.2247, Final Metric = 0.5361
  New best metric! Saving model...
Epoch 02: Binary F1 = 0.9341, Macro F1 = 0.3275, Final Metric = 0.6308
  New best metric! Saving model...
Epoch 03: Binary F1 = 0.7938, Macro F1 = 0.3170, Final Metric = 0.5554
Epoch 04: Binary F1 = 0.9551, Macro F1 = 0.3862, Final Metric = 0.6706
  New best metric! Saving model...
Epoch 05: Binary F1 = 0.9638, Macro F1 = 0.4613, Final Metric = 0.7126
  New best metric! Saving model...
Epoch 06: Binary F1 = 0.9609, Macro F1 = 0.4341, Final Metric = 0.6975
Epoch 07: Binary F1 = 0.9223, Macro F1 = 0.4280, Final Metric = 0.6752
Epoch 08: Binary F1 = 0.9606, Macro F1 = 0.4852, Final Metric = 0.7229
  New best metric! Saving model...
Epoch 09: Binary F1 = 0.97

Unnamed: 0_level_0,binary_f1,macro_f1,final_metric
fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0.990615,0.663774,0.825866
1,0.965007,0.585382,0.775195
2,0.97123,0.602762,0.785974
3,0.950147,0.536058,0.739541
4,0.972031,0.570409,0.77122


[I 2025-08-15 17:00:01,898] Trial 2 finished with value: 0.7795589658840318 and parameters: {'warmup_epochs': 9, 'cycle_mult': 1.130967087914256, 'max_lr': 0.011680665054210514, 'max_to_min_div_factor': 200.0, 'init_cycle_epochs': 10, 'lr_cycle_factor': 0.9468657628874835, 'weight_decay': 0.0007946415936433313, 'beta_0': 0.8559480408583455, 'beta_1': 0.9928112881612698}. Best is trial 1 with value: 0.7877889868546157.



Global Statistics (Best Metrics):
Mean Best Final Metric: 0.7796 ± 0.0311
Mean Best Binary F1: 0.9698 ± 0.0146
Mean Best Macro F1: 0.5917 ± 0.0472

training: 1
Fold 1/5
train dataset indices: 6623
validation dataset indices: 1528
Epoch 01: Binary F1 = 0.7032, Macro F1 = 0.2392, Final Metric = 0.4712
  New best metric! Saving model...
Epoch 02: Binary F1 = 0.9467, Macro F1 = 0.3104, Final Metric = 0.6286
  New best metric! Saving model...
Epoch 03: Binary F1 = 0.9163, Macro F1 = 0.3693, Final Metric = 0.6428
  New best metric! Saving model...
Epoch 04: Binary F1 = 0.9316, Macro F1 = 0.4471, Final Metric = 0.6894
  New best metric! Saving model...
Epoch 05: Binary F1 = 0.8876, Macro F1 = 0.4418, Final Metric = 0.6647
Epoch 06: Binary F1 = 0.9824, Macro F1 = 0.5337, Final Metric = 0.7580
  New best metric! Saving model...
Epoch 07: Binary F1 = 0.9533, Macro F1 = 0.4474, Final Metric = 0.7004
Epoch 08: Binary F1 = 0.9452, Macro F1 = 0.4759, Final Metric = 0.7105
Epoch 09: Binary F1 = 0.97

Unnamed: 0_level_0,binary_f1,macro_f1,final_metric
fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0.989594,0.656349,0.822971
1,0.962319,0.590244,0.772099
2,0.97265,0.611932,0.792291
3,0.949781,0.561195,0.752792
4,0.971429,0.57532,0.773374


[I 2025-08-15 17:03:49,542] Trial 3 finished with value: 0.7827055183746182 and parameters: {'warmup_epochs': 11, 'cycle_mult': 1.2658874082724718, 'max_lr': 0.016280674495039504, 'max_to_min_div_factor': 150.0, 'init_cycle_epochs': 10, 'lr_cycle_factor': 0.6235427109748884, 'weight_decay': 0.0008714657258603214, 'beta_0': 0.8355463734818672, 'beta_1': 0.9908350129142843}. Best is trial 1 with value: 0.7877889868546157.



Global Statistics (Best Metrics):
Mean Best Final Metric: 0.7827 ± 0.0265
Mean Best Binary F1: 0.9692 ± 0.0146
Mean Best Macro F1: 0.5990 ± 0.0372

training: 1
Fold 1/5
train dataset indices: 6623
validation dataset indices: 1528
Epoch 01: Binary F1 = 0.8444, Macro F1 = 0.2265, Final Metric = 0.5354
  New best metric! Saving model...
Epoch 02: Binary F1 = 0.9148, Macro F1 = 0.3225, Final Metric = 0.6186
  New best metric! Saving model...
Epoch 03: Binary F1 = 0.9322, Macro F1 = 0.4016, Final Metric = 0.6669
  New best metric! Saving model...
Epoch 04: Binary F1 = 0.9520, Macro F1 = 0.3788, Final Metric = 0.6654
Epoch 05: Binary F1 = 0.9462, Macro F1 = 0.4504, Final Metric = 0.6983
  New best metric! Saving model...
Epoch 06: Binary F1 = 0.9676, Macro F1 = 0.4878, Final Metric = 0.7277
  New best metric! Saving model...
Epoch 07: Binary F1 = 0.9509, Macro F1 = 0.4660, Final Metric = 0.7085
Epoch 08: Binary F1 = 0.9442, Macro F1 = 0.4780, Final Metric = 0.7111
Epoch 09: Binary F1 = 0.96

Unnamed: 0_level_0,binary_f1,macro_f1,final_metric
fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0.992685,0.663758,0.827958
1,0.967359,0.593435,0.779408
2,0.976285,0.622954,0.79933
3,0.956183,0.554472,0.752088
4,0.974828,0.579027,0.776928


[I 2025-08-15 17:07:36,834] Trial 4 finished with value: 0.7871424800435258 and parameters: {'warmup_epochs': 9, 'cycle_mult': 0.575575018085766, 'max_lr': 0.007828902445837414, 'max_to_min_div_factor': 125.0, 'init_cycle_epochs': 7, 'lr_cycle_factor': 0.5210672788345871, 'weight_decay': 0.0007976414764158325, 'beta_0': 0.844751773752412, 'beta_1': 0.9947528526774767}. Best is trial 1 with value: 0.7877889868546157.



Global Statistics (Best Metrics):
Mean Best Final Metric: 0.7871 ± 0.0283
Mean Best Binary F1: 0.9735 ± 0.0134
Mean Best Macro F1: 0.6027 ± 0.0422

training: 1
Fold 1/5
train dataset indices: 6623
validation dataset indices: 1528
Epoch 01: Binary F1 = 0.8912, Macro F1 = 0.2780, Final Metric = 0.5846
  New best metric! Saving model...
Epoch 02: Binary F1 = 0.9523, Macro F1 = 0.3104, Final Metric = 0.6314
  New best metric! Saving model...
Epoch 03: Binary F1 = 0.8750, Macro F1 = 0.3512, Final Metric = 0.6131
Epoch 04: Binary F1 = 0.9718, Macro F1 = 0.3960, Final Metric = 0.6839
  New best metric! Saving model...
Epoch 05: Binary F1 = 0.7980, Macro F1 = 0.3764, Final Metric = 0.5872
Epoch 06: Binary F1 = 0.9689, Macro F1 = 0.4704, Final Metric = 0.7196
  New best metric! Saving model...
Epoch 07: Binary F1 = 0.9647, Macro F1 = 0.4304, Final Metric = 0.6976
Epoch 08: Binary F1 = 0.9534, Macro F1 = 0.4959, Final Metric = 0.7246
  New best metric! Saving model...
Epoch 09: Binary F1 = 0.95

Unnamed: 0_level_0,binary_f1,macro_f1,final_metric
fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0.989551,0.65382,0.820125
1,0.966502,0.581666,0.773643
2,0.97474,0.622177,0.797481
3,0.949119,0.561519,0.752736
4,0.972274,0.557891,0.764489


[I 2025-08-15 17:11:25,973] Trial 5 finished with value: 0.7816948388046613 and parameters: {'warmup_epochs': 10, 'cycle_mult': 0.9597060278159361, 'max_lr': 0.01594289937275698, 'max_to_min_div_factor': 125.0, 'init_cycle_epochs': 3, 'lr_cycle_factor': 0.501284797626935, 'weight_decay': 0.0005043946569379963, 'beta_0': 0.8776598654880907, 'beta_1': 0.9911134911808652}. Best is trial 1 with value: 0.7877889868546157.



Global Statistics (Best Metrics):
Mean Best Final Metric: 0.7817 ± 0.0270
Mean Best Binary F1: 0.9704 ± 0.0146
Mean Best Macro F1: 0.5954 ± 0.0415

training: 1
Fold 1/5
train dataset indices: 6623
validation dataset indices: 1528
Epoch 01: Binary F1 = 0.7761, Macro F1 = 0.1705, Final Metric = 0.4733
  New best metric! Saving model...
Epoch 02: Binary F1 = 0.8992, Macro F1 = 0.2946, Final Metric = 0.5969
  New best metric! Saving model...
Epoch 03: Binary F1 = 0.9346, Macro F1 = 0.3448, Final Metric = 0.6397
  New best metric! Saving model...
Epoch 04: Binary F1 = 0.9562, Macro F1 = 0.4465, Final Metric = 0.7014
  New best metric! Saving model...
Epoch 05: Binary F1 = 0.9369, Macro F1 = 0.3678, Final Metric = 0.6523
Epoch 06: Binary F1 = 0.9686, Macro F1 = 0.4401, Final Metric = 0.7044
  New best metric! Saving model...
Epoch 07: Binary F1 = 0.9777, Macro F1 = 0.4674, Final Metric = 0.7226
  New best metric! Saving model...
Epoch 08: Binary F1 = 0.9606, Macro F1 = 0.5000, Final Metric 

Unnamed: 0_level_0,binary_f1,macro_f1,final_metric
fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0.988052,0.663652,0.825264
1,0.963843,0.575441,0.765765
2,0.971316,0.601629,0.784637
3,0.944472,0.533861,0.739166
4,0.9681,0.545973,0.755298


[I 2025-08-15 17:15:13,253] Trial 6 finished with value: 0.7740260611777074 and parameters: {'warmup_epochs': 8, 'cycle_mult': 0.9860726322064617, 'max_lr': 0.012878693026976432, 'max_to_min_div_factor': 275.0, 'init_cycle_epochs': 5, 'lr_cycle_factor': 0.6903525427933577, 'weight_decay': 0.0008198225930285267, 'beta_0': 0.9608605927585594, 'beta_1': 0.9924435859580583}. Best is trial 1 with value: 0.7877889868546157.



Global Statistics (Best Metrics):
Mean Best Final Metric: 0.7740 ± 0.0331
Mean Best Binary F1: 0.9672 ± 0.0157
Mean Best Macro F1: 0.5841 ± 0.0517

training: 1
Fold 1/5
train dataset indices: 6623
validation dataset indices: 1528
Epoch 01: Binary F1 = 0.7927, Macro F1 = 0.2266, Final Metric = 0.5096
  New best metric! Saving model...
Epoch 02: Binary F1 = 0.9478, Macro F1 = 0.3147, Final Metric = 0.6312
  New best metric! Saving model...
Epoch 03: Binary F1 = 0.8600, Macro F1 = 0.2959, Final Metric = 0.5779
Epoch 04: Binary F1 = 0.9353, Macro F1 = 0.3912, Final Metric = 0.6633
  New best metric! Saving model...
Epoch 05: Binary F1 = 0.9547, Macro F1 = 0.3764, Final Metric = 0.6655
  New best metric! Saving model...
Epoch 06: Binary F1 = 0.9795, Macro F1 = 0.4392, Final Metric = 0.7093
  New best metric! Saving model...
Epoch 07: Binary F1 = 0.9559, Macro F1 = 0.4795, Final Metric = 0.7177
  New best metric! Saving model...
Epoch 08: Binary F1 = 0.9742, Macro F1 = 0.4993, Final Metric 

Unnamed: 0_level_0,binary_f1,macro_f1,final_metric
fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0.990576,0.635634,0.813105
1,0.96592,0.580654,0.769629
2,0.973346,0.580457,0.77297
3,0.944499,0.532889,0.733078
4,0.97198,0.561461,0.763829


[I 2025-08-15 17:18:58,327] Trial 7 finished with value: 0.7705221725133526 and parameters: {'warmup_epochs': 9, 'cycle_mult': 0.918039144544482, 'max_lr': 0.01673095548901419, 'max_to_min_div_factor': 225.0, 'init_cycle_epochs': 7, 'lr_cycle_factor': 0.6481108423277886, 'weight_decay': 0.000679042932203987, 'beta_0': 0.9502250428469909, 'beta_1': 0.9931046474620946}. Best is trial 1 with value: 0.7877889868546157.



Global Statistics (Best Metrics):
Mean Best Final Metric: 0.7705 ± 0.0286
Mean Best Binary F1: 0.9693 ± 0.0166
Mean Best Macro F1: 0.5782 ± 0.0376

training: 1
Fold 1/5
train dataset indices: 6623
validation dataset indices: 1528
Epoch 01: Binary F1 = 0.7607, Macro F1 = 0.1959, Final Metric = 0.4783
  New best metric! Saving model...
Epoch 02: Binary F1 = 0.9160, Macro F1 = 0.3140, Final Metric = 0.6150
  New best metric! Saving model...
Epoch 03: Binary F1 = 0.9469, Macro F1 = 0.3493, Final Metric = 0.6481
  New best metric! Saving model...
Epoch 04: Binary F1 = 0.9492, Macro F1 = 0.3952, Final Metric = 0.6722
  New best metric! Saving model...
Epoch 05: Binary F1 = 0.9660, Macro F1 = 0.4002, Final Metric = 0.6831
  New best metric! Saving model...
Epoch 06: Binary F1 = 0.9719, Macro F1 = 0.4461, Final Metric = 0.7090
  New best metric! Saving model...
Epoch 07: Binary F1 = 0.9402, Macro F1 = 0.4784, Final Metric = 0.7093
  New best metric! Saving model...
Epoch 08: Binary F1 = 0.964

Unnamed: 0_level_0,binary_f1,macro_f1,final_metric
fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0.989562,0.642917,0.816239
1,0.960794,0.590081,0.774921
2,0.972112,0.595725,0.782378
3,0.944198,0.517785,0.730991
4,0.970441,0.552009,0.759297


[I 2025-08-15 17:22:44,189] Trial 8 finished with value: 0.7727654143913297 and parameters: {'warmup_epochs': 9, 'cycle_mult': 1.2686284653979996, 'max_lr': 0.015073762834754433, 'max_to_min_div_factor': 300.0, 'init_cycle_epochs': 10, 'lr_cycle_factor': 0.955181117708715, 'weight_decay': 0.0005055047254995542, 'beta_0': 0.963243438652944, 'beta_1': 0.9944311102916772}. Best is trial 1 with value: 0.7877889868546157.



Global Statistics (Best Metrics):
Mean Best Final Metric: 0.7728 ± 0.0313
Mean Best Binary F1: 0.9674 ± 0.0166
Mean Best Macro F1: 0.5797 ± 0.0473

training: 1
Fold 1/5
train dataset indices: 6623
validation dataset indices: 1528
Epoch 01: Binary F1 = 0.3784, Macro F1 = 0.1515, Final Metric = 0.2649
  New best metric! Saving model...
Epoch 02: Binary F1 = 0.7574, Macro F1 = 0.1667, Final Metric = 0.4620
  New best metric! Saving model...
Epoch 03: Binary F1 = 0.8027, Macro F1 = 0.2902, Final Metric = 0.5464
  New best metric! Saving model...
Epoch 04: Binary F1 = 0.8393, Macro F1 = 0.2965, Final Metric = 0.5679
  New best metric! Saving model...
Epoch 05: Binary F1 = 0.9501, Macro F1 = 0.4108, Final Metric = 0.6804
  New best metric! Saving model...
Epoch 06: Binary F1 = 0.9477, Macro F1 = 0.4206, Final Metric = 0.6841
  New best metric! Saving model...
Epoch 07: Binary F1 = 0.9476, Macro F1 = 0.4573, Final Metric = 0.7024
  New best metric! Saving model...
Epoch 08: Binary F1 = 0.974

Unnamed: 0_level_0,binary_f1,macro_f1,final_metric
fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0.984211,0.595074,0.788302
1,0.955875,0.5595,0.756203
2,0.970778,0.536203,0.751476
3,0.935357,0.498946,0.711973
4,0.961433,0.519339,0.740183


[I 2025-08-15 17:26:32,474] Trial 9 finished with value: 0.7496274907548572 and parameters: {'warmup_epochs': 8, 'cycle_mult': 1.7753793438083088, 'max_lr': 0.015400046384068306, 'max_to_min_div_factor': 250.0, 'init_cycle_epochs': 4, 'lr_cycle_factor': 0.31077427769761784, 'weight_decay': 0.000932627813219246, 'beta_0': 0.9895515712305799, 'beta_1': 0.9904943686707152}. Best is trial 1 with value: 0.7877889868546157.



Global Statistics (Best Metrics):
Mean Best Final Metric: 0.7496 ± 0.0276
Mean Best Binary F1: 0.9615 ± 0.0181
Mean Best Macro F1: 0.5418 ± 0.0372

training: 1
Fold 1/5
train dataset indices: 6623
validation dataset indices: 1528
Epoch 01: Binary F1 = 0.6855, Macro F1 = 0.1766, Final Metric = 0.4311
  New best metric! Saving model...
Epoch 02: Binary F1 = 0.9025, Macro F1 = 0.3159, Final Metric = 0.6092
  New best metric! Saving model...
Epoch 03: Binary F1 = 0.9358, Macro F1 = 0.3752, Final Metric = 0.6555
  New best metric! Saving model...
Epoch 04: Binary F1 = 0.9621, Macro F1 = 0.3811, Final Metric = 0.6716
  New best metric! Saving model...
Epoch 05: Binary F1 = 0.9572, Macro F1 = 0.4480, Final Metric = 0.7026
  New best metric! Saving model...
Epoch 06: Binary F1 = 0.9741, Macro F1 = 0.4338, Final Metric = 0.7040
  New best metric! Saving model...
Epoch 07: Binary F1 = 0.9693, Macro F1 = 0.5190, Final Metric = 0.7442
  New best metric! Saving model...
Epoch 08: Binary F1 = 0.963

Unnamed: 0_level_0,binary_f1,macro_f1,final_metric
fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0.989551,0.654858,0.82194
1,0.961652,0.58896,0.775306
2,0.970356,0.617103,0.791843
3,0.953454,0.538561,0.742887
4,0.967565,0.56046,0.762373


[I 2025-08-15 17:30:17,402] Trial 10 finished with value: 0.7788698736692433 and parameters: {'warmup_epochs': 12, 'cycle_mult': 0.5844883808789315, 'max_lr': 0.002547637584478295, 'max_to_min_div_factor': 200.0, 'init_cycle_epochs': 2, 'lr_cycle_factor': 0.306931946078658, 'weight_decay': 0.0006292559854874491, 'beta_0': 0.8056756777889523, 'beta_1': 0.9981605870299527}. Best is trial 1 with value: 0.7877889868546157.



Global Statistics (Best Metrics):
Mean Best Final Metric: 0.7789 ± 0.0300
Mean Best Binary F1: 0.9685 ± 0.0134
Mean Best Macro F1: 0.5920 ± 0.0459

training: 1
Fold 1/5
train dataset indices: 6623
validation dataset indices: 1528
Epoch 01: Binary F1 = 0.8545, Macro F1 = 0.2338, Final Metric = 0.5441
  New best metric! Saving model...
Epoch 02: Binary F1 = 0.9222, Macro F1 = 0.3176, Final Metric = 0.6199
  New best metric! Saving model...
Epoch 03: Binary F1 = 0.9364, Macro F1 = 0.3917, Final Metric = 0.6641
  New best metric! Saving model...
Epoch 04: Binary F1 = 0.9691, Macro F1 = 0.4323, Final Metric = 0.7007
  New best metric! Saving model...
Epoch 05: Binary F1 = 0.8868, Macro F1 = 0.4376, Final Metric = 0.6622
Epoch 06: Binary F1 = 0.9547, Macro F1 = 0.4782, Final Metric = 0.7165
  New best metric! Saving model...
Epoch 07: Binary F1 = 0.9695, Macro F1 = 0.4871, Final Metric = 0.7283
  New best metric! Saving model...
Epoch 08: Binary F1 = 0.9698, Macro F1 = 0.4857, Final Metric 

Unnamed: 0_level_0,binary_f1,macro_f1,final_metric
fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0.990635,0.670224,0.829635
1,0.969936,0.591813,0.7787
2,0.977295,0.622462,0.798891
3,0.945598,0.552557,0.748747
4,0.97017,0.589026,0.778369


[I 2025-08-15 17:34:06,472] Trial 11 finished with value: 0.7868682716693807 and parameters: {'warmup_epochs': 8, 'cycle_mult': 0.5523787994317021, 'max_lr': 0.006026392980142377, 'max_to_min_div_factor': 100.0, 'init_cycle_epochs': 7, 'lr_cycle_factor': 0.4778405914763333, 'weight_decay': 0.00073558391148588, 'beta_0': 0.8059841606737487, 'beta_1': 0.9974612477518945}. Best is trial 1 with value: 0.7877889868546157.



Global Statistics (Best Metrics):
Mean Best Final Metric: 0.7869 ± 0.0299
Mean Best Binary F1: 0.9707 ± 0.0164
Mean Best Macro F1: 0.6052 ± 0.0440

training: 1
Fold 1/5
train dataset indices: 6623
validation dataset indices: 1528
Epoch 01: Binary F1 = 0.8753, Macro F1 = 0.2294, Final Metric = 0.5524
  New best metric! Saving model...
Epoch 02: Binary F1 = 0.8468, Macro F1 = 0.2781, Final Metric = 0.5625
  New best metric! Saving model...
Epoch 03: Binary F1 = 0.9468, Macro F1 = 0.3487, Final Metric = 0.6477
  New best metric! Saving model...
Epoch 04: Binary F1 = 0.9360, Macro F1 = 0.4675, Final Metric = 0.7017
  New best metric! Saving model...
Epoch 05: Binary F1 = 0.9574, Macro F1 = 0.5016, Final Metric = 0.7295
  New best metric! Saving model...
Epoch 06: Binary F1 = 0.9612, Macro F1 = 0.4862, Final Metric = 0.7237
Epoch 07: Binary F1 = 0.9630, Macro F1 = 0.4695, Final Metric = 0.7162
Epoch 08: Binary F1 = 0.9216, Macro F1 = 0.5048, Final Metric = 0.7132
Epoch 09: Binary F1 = 0.96

Unnamed: 0_level_0,binary_f1,macro_f1,final_metric
fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0.990586,0.659333,0.824447
1,0.966502,0.597228,0.78081
2,0.974283,0.630326,0.800523
3,0.948343,0.557051,0.751064
4,0.971376,0.593348,0.781731


[I 2025-08-15 17:37:55,141] Trial 12 finished with value: 0.7877150324479375 and parameters: {'warmup_epochs': 9, 'cycle_mult': 0.6526378808539373, 'max_lr': 0.007508651406021236, 'max_to_min_div_factor': 175.0, 'init_cycle_epochs': 6, 'lr_cycle_factor': 0.5152219975812403, 'weight_decay': 0.0006023171640618408, 'beta_0': 0.838911146669458, 'beta_1': 0.9962261203074921}. Best is trial 1 with value: 0.7877889868546157.



Global Statistics (Best Metrics):
Mean Best Final Metric: 0.7877 ± 0.0271
Mean Best Binary F1: 0.9702 ± 0.0152
Mean Best Macro F1: 0.6075 ± 0.0389

training: 1
Fold 1/5
train dataset indices: 6623
validation dataset indices: 1528
Epoch 01: Binary F1 = 0.7826, Macro F1 = 0.1867, Final Metric = 0.4846
  New best metric! Saving model...
Epoch 02: Binary F1 = 0.9099, Macro F1 = 0.3085, Final Metric = 0.6092
  New best metric! Saving model...
Epoch 03: Binary F1 = 0.9627, Macro F1 = 0.3706, Final Metric = 0.6667
  New best metric! Saving model...
Epoch 04: Binary F1 = 0.9660, Macro F1 = 0.4198, Final Metric = 0.6929
  New best metric! Saving model...
Epoch 05: Binary F1 = 0.9434, Macro F1 = 0.4239, Final Metric = 0.6836
Epoch 06: Binary F1 = 0.9729, Macro F1 = 0.4842, Final Metric = 0.7286
  New best metric! Saving model...
Epoch 07: Binary F1 = 0.9727, Macro F1 = 0.4888, Final Metric = 0.7307
  New best metric! Saving model...
Epoch 08: Binary F1 = 0.9767, Macro F1 = 0.4284, Final Metric 

Unnamed: 0_level_0,binary_f1,macro_f1,final_metric
fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0.991113,0.655227,0.820829
1,0.967456,0.588597,0.776108
2,0.977517,0.618213,0.797754
3,0.949336,0.546381,0.747858
4,0.975229,0.584622,0.779926


[I 2025-08-15 17:41:39,015] Trial 13 finished with value: 0.7844948539471484 and parameters: {'warmup_epochs': 10, 'cycle_mult': 0.7958563505459075, 'max_lr': 0.0051578651745854435, 'max_to_min_div_factor': 175.0, 'init_cycle_epochs': 5, 'lr_cycle_factor': 0.7617046882235534, 'weight_decay': 0.0006065198068901362, 'beta_0': 0.9108799004726539, 'beta_1': 0.9966826814478927}. Best is trial 1 with value: 0.7877889868546157.



Global Statistics (Best Metrics):
Mean Best Final Metric: 0.7845 ± 0.0271
Mean Best Binary F1: 0.9721 ± 0.0153
Mean Best Macro F1: 0.5986 ± 0.0407

training: 1
Fold 1/5
train dataset indices: 6623
validation dataset indices: 1528
Epoch 01: Binary F1 = 0.8315, Macro F1 = 0.2174, Final Metric = 0.5244
  New best metric! Saving model...
Epoch 02: Binary F1 = 0.9413, Macro F1 = 0.3142, Final Metric = 0.6278
  New best metric! Saving model...
Epoch 03: Binary F1 = 0.9194, Macro F1 = 0.3633, Final Metric = 0.6414
  New best metric! Saving model...
Epoch 04: Binary F1 = 0.9381, Macro F1 = 0.3246, Final Metric = 0.6314
Epoch 05: Binary F1 = 0.9192, Macro F1 = 0.4423, Final Metric = 0.6808
  New best metric! Saving model...
Epoch 06: Binary F1 = 0.9669, Macro F1 = 0.4158, Final Metric = 0.6913
  New best metric! Saving model...
Epoch 07: Binary F1 = 0.9708, Macro F1 = 0.4824, Final Metric = 0.7266
  New best metric! Saving model...
Epoch 08: Binary F1 = 0.9645, Macro F1 = 0.4470, Final Metric 

Unnamed: 0_level_0,binary_f1,macro_f1,final_metric
fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0.99115,0.662222,0.825208
1,0.967933,0.586024,0.776978
2,0.974283,0.611567,0.792567
3,0.950787,0.554093,0.749348
4,0.978311,0.59168,0.784995


[I 2025-08-15 17:45:25,789] Trial 14 finished with value: 0.7858192388903277 and parameters: {'warmup_epochs': 8, 'cycle_mult': 0.6798171774034418, 'max_lr': 0.009639706620246711, 'max_to_min_div_factor': 250.0, 'init_cycle_epochs': 8, 'lr_cycle_factor': 0.41987809480591537, 'weight_decay': 0.0005952831340633004, 'beta_0': 0.8817379852267433, 'beta_1': 0.9959987547291734}. Best is trial 1 with value: 0.7877889868546157.



Global Statistics (Best Metrics):
Mean Best Final Metric: 0.7858 ± 0.0274
Mean Best Binary F1: 0.9725 ± 0.0148
Mean Best Macro F1: 0.6011 ± 0.0399

training: 1
Fold 1/5
train dataset indices: 6623
validation dataset indices: 1528
Epoch 01: Binary F1 = 0.7936, Macro F1 = 0.2076, Final Metric = 0.5006
  New best metric! Saving model...
Epoch 02: Binary F1 = 0.9292, Macro F1 = 0.3206, Final Metric = 0.6249
  New best metric! Saving model...
Epoch 03: Binary F1 = 0.9431, Macro F1 = 0.3818, Final Metric = 0.6624
  New best metric! Saving model...
Epoch 04: Binary F1 = 0.9677, Macro F1 = 0.4211, Final Metric = 0.6944
  New best metric! Saving model...
Epoch 05: Binary F1 = 0.9534, Macro F1 = 0.4640, Final Metric = 0.7087
  New best metric! Saving model...
Epoch 06: Binary F1 = 0.9757, Macro F1 = 0.5224, Final Metric = 0.7490
  New best metric! Saving model...
Epoch 07: Binary F1 = 0.9573, Macro F1 = 0.4943, Final Metric = 0.7258
Epoch 08: Binary F1 = 0.9297, Macro F1 = 0.4489, Final Metric 

Unnamed: 0_level_0,binary_f1,macro_f1,final_metric
fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0.991169,0.652412,0.8194
1,0.972127,0.599462,0.78455
2,0.980257,0.621408,0.800832
3,0.951825,0.543514,0.744575
4,0.97337,0.590627,0.781463


[I 2025-08-15 17:49:08,161] Trial 15 finished with value: 0.7861640285471683 and parameters: {'warmup_epochs': 9, 'cycle_mult': 1.895379324584644, 'max_lr': 0.004941049639043977, 'max_to_min_div_factor': 225.0, 'init_cycle_epochs': 6, 'lr_cycle_factor': 0.582203087410914, 'weight_decay': 0.0005645808210458157, 'beta_0': 0.8301033605094865, 'beta_1': 0.9953075087426161}. Best is trial 1 with value: 0.7877889868546157.



Global Statistics (Best Metrics):
Mean Best Final Metric: 0.7862 ± 0.0277
Mean Best Binary F1: 0.9737 ± 0.0144
Mean Best Macro F1: 0.6015 ± 0.0402

training: 1
Fold 1/5
train dataset indices: 6623
validation dataset indices: 1528
Epoch 01: Binary F1 = 0.7463, Macro F1 = 0.2072, Final Metric = 0.4768
  New best metric! Saving model...
Epoch 02: Binary F1 = 0.9292, Macro F1 = 0.3228, Final Metric = 0.6260
  New best metric! Saving model...
Epoch 03: Binary F1 = 0.9567, Macro F1 = 0.3690, Final Metric = 0.6628
  New best metric! Saving model...
Epoch 04: Binary F1 = 0.9578, Macro F1 = 0.4117, Final Metric = 0.6848
  New best metric! Saving model...
Epoch 05: Binary F1 = 0.9124, Macro F1 = 0.4236, Final Metric = 0.6680
Epoch 06: Binary F1 = 0.9779, Macro F1 = 0.4997, Final Metric = 0.7388
  New best metric! Saving model...
Epoch 07: Binary F1 = 0.9563, Macro F1 = 0.4860, Final Metric = 0.7211
Epoch 08: Binary F1 = 0.9561, Macro F1 = 0.5018, Final Metric = 0.7290
Epoch 09: Binary F1 = 0.97

Unnamed: 0_level_0,binary_f1,macro_f1,final_metric
fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0.989022,0.661268,0.824635
1,0.970287,0.594462,0.780198
2,0.97446,0.61353,0.793438
3,0.949336,0.542876,0.746106
4,0.976103,0.569803,0.770708


[I 2025-08-15 17:52:49,264] Trial 16 finished with value: 0.7830170733281819 and parameters: {'warmup_epochs': 11, 'cycle_mult': 0.7740606931346845, 'max_lr': 0.007870689966573705, 'max_to_min_div_factor': 175.0, 'init_cycle_epochs': 5, 'lr_cycle_factor': 0.7687357087312408, 'weight_decay': 0.000692995833062656, 'beta_0': 0.8674514011862705, 'beta_1': 0.99397339241041}. Best is trial 1 with value: 0.7877889868546157.



Global Statistics (Best Metrics):
Mean Best Final Metric: 0.7830 ± 0.0290
Mean Best Binary F1: 0.9718 ± 0.0144
Mean Best Macro F1: 0.5964 ± 0.0449

training: 1
Fold 1/5
train dataset indices: 6623
validation dataset indices: 1528
Epoch 01: Binary F1 = 0.8315, Macro F1 = 0.1680, Final Metric = 0.4998
  New best metric! Saving model...
Epoch 02: Binary F1 = 0.9234, Macro F1 = 0.3210, Final Metric = 0.6222
  New best metric! Saving model...
Epoch 03: Binary F1 = 0.9516, Macro F1 = 0.3598, Final Metric = 0.6557
  New best metric! Saving model...
Epoch 04: Binary F1 = 0.9622, Macro F1 = 0.4105, Final Metric = 0.6863
  New best metric! Saving model...
Epoch 05: Binary F1 = 0.9248, Macro F1 = 0.4701, Final Metric = 0.6974
  New best metric! Saving model...
Epoch 06: Binary F1 = 0.9780, Macro F1 = 0.5065, Final Metric = 0.7423
  New best metric! Saving model...
Epoch 07: Binary F1 = 0.9430, Macro F1 = 0.4528, Final Metric = 0.6979
Epoch 08: Binary F1 = 0.9126, Macro F1 = 0.4623, Final Metric 

Unnamed: 0_level_0,binary_f1,macro_f1,final_metric
fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0.989045,0.661896,0.82547
1,0.966914,0.59331,0.778356
2,0.980431,0.616419,0.797054
3,0.950611,0.548406,0.746768
4,0.973443,0.593521,0.782264


[I 2025-08-15 17:56:30,152] Trial 17 finished with value: 0.7859827093791296 and parameters: {'warmup_epochs': 8, 'cycle_mult': 1.6284684949949821, 'max_lr': 0.003999907578012536, 'max_to_min_div_factor': 300.0, 'init_cycle_epochs': 8, 'lr_cycle_factor': 0.4129093876577258, 'weight_decay': 0.0005584826138081391, 'beta_0': 0.8214147588937345, 'beta_1': 0.9992385324831964}. Best is trial 1 with value: 0.7877889868546157.



Global Statistics (Best Metrics):
Mean Best Final Metric: 0.7860 ± 0.0287
Mean Best Binary F1: 0.9721 ± 0.0146
Mean Best Macro F1: 0.6027 ± 0.0413

training: 1
Fold 1/5
train dataset indices: 6623
validation dataset indices: 1528
Epoch 01: Binary F1 = 0.8419, Macro F1 = 0.2034, Final Metric = 0.5227
  New best metric! Saving model...
Epoch 02: Binary F1 = 0.9275, Macro F1 = 0.3415, Final Metric = 0.6345
  New best metric! Saving model...
Epoch 03: Binary F1 = 0.9347, Macro F1 = 0.4016, Final Metric = 0.6682
  New best metric! Saving model...
Epoch 04: Binary F1 = 0.9639, Macro F1 = 0.4234, Final Metric = 0.6937
  New best metric! Saving model...
Epoch 05: Binary F1 = 0.9029, Macro F1 = 0.4239, Final Metric = 0.6634
Epoch 06: Binary F1 = 0.9709, Macro F1 = 0.4894, Final Metric = 0.7302
  New best metric! Saving model...
Epoch 07: Binary F1 = 0.9636, Macro F1 = 0.4438, Final Metric = 0.7037
Epoch 08: Binary F1 = 0.9716, Macro F1 = 0.4823, Final Metric = 0.7269
Epoch 09: Binary F1 = 0.94

Unnamed: 0_level_0,binary_f1,macro_f1,final_metric
fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0.98954,0.64644,0.816944
1,0.968566,0.608398,0.788197
2,0.975248,0.616467,0.794187
3,0.951028,0.554655,0.75213
4,0.974666,0.589343,0.782005


[I 2025-08-15 18:00:18,654] Trial 18 finished with value: 0.7866924095233288 and parameters: {'warmup_epochs': 9, 'cycle_mult': 0.7911538776493375, 'max_lr': 0.007053793087059459, 'max_to_min_div_factor': 250.0, 'init_cycle_epochs': 6, 'lr_cycle_factor': 0.5684235072702433, 'weight_decay': 0.0007398308175314775, 'beta_0': 0.8971292245029371, 'beta_1': 0.9918305428683825}. Best is trial 1 with value: 0.7877889868546157.



Global Statistics (Best Metrics):
Mean Best Final Metric: 0.7867 ± 0.0234
Mean Best Binary F1: 0.9718 ± 0.0139
Mean Best Macro F1: 0.6031 ± 0.0340

training: 1
Fold 1/5
train dataset indices: 6623
validation dataset indices: 1528
Epoch 01: Binary F1 = 0.8254, Macro F1 = 0.2458, Final Metric = 0.5356
  New best metric! Saving model...
Epoch 02: Binary F1 = 0.9521, Macro F1 = 0.3444, Final Metric = 0.6482
  New best metric! Saving model...
Epoch 03: Binary F1 = 0.9554, Macro F1 = 0.3268, Final Metric = 0.6411
Epoch 04: Binary F1 = 0.9631, Macro F1 = 0.4385, Final Metric = 0.7008
  New best metric! Saving model...
Epoch 05: Binary F1 = 0.9365, Macro F1 = 0.4414, Final Metric = 0.6889
Epoch 06: Binary F1 = 0.9664, Macro F1 = 0.5139, Final Metric = 0.7402
  New best metric! Saving model...
Epoch 07: Binary F1 = 0.9333, Macro F1 = 0.4388, Final Metric = 0.6860
Epoch 08: Binary F1 = 0.9533, Macro F1 = 0.4440, Final Metric = 0.6986
Epoch 09: Binary F1 = 0.9645, Macro F1 = 0.4789, Final Metric

Unnamed: 0_level_0,binary_f1,macro_f1,final_metric
fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0.991649,0.667219,0.82918
1,0.968028,0.595277,0.781652
2,0.975658,0.6137,0.793148
3,0.957416,0.566997,0.762206
4,0.974828,0.575146,0.773799


[I 2025-08-15 18:04:06,676] Trial 19 finished with value: 0.7879970418306042 and parameters: {'warmup_epochs': 11, 'cycle_mult': 0.5048340943657956, 'max_lr': 0.010197597899100515, 'max_to_min_div_factor': 225.0, 'init_cycle_epochs': 9, 'lr_cycle_factor': 0.7574776616033075, 'weight_decay': 0.0009883454509466867, 'beta_0': 0.8001553403314247, 'beta_1': 0.9961422361586677}. Best is trial 19 with value: 0.7879970418306042.



Global Statistics (Best Metrics):
Mean Best Final Metric: 0.7880 ± 0.0256
Mean Best Binary F1: 0.9735 ± 0.0125
Mean Best Macro F1: 0.6037 ± 0.0399

training: 1
Fold 1/5
train dataset indices: 6623
validation dataset indices: 1528
Epoch 01: Binary F1 = 0.8655, Macro F1 = 0.2121, Final Metric = 0.5388
  New best metric! Saving model...
Epoch 02: Binary F1 = 0.9406, Macro F1 = 0.3279, Final Metric = 0.6343
  New best metric! Saving model...
Epoch 03: Binary F1 = 0.9439, Macro F1 = 0.3868, Final Metric = 0.6653
  New best metric! Saving model...
Epoch 04: Binary F1 = 0.9701, Macro F1 = 0.4239, Final Metric = 0.6970
  New best metric! Saving model...
Epoch 05: Binary F1 = 0.8637, Macro F1 = 0.4141, Final Metric = 0.6389
Epoch 06: Binary F1 = 0.9764, Macro F1 = 0.4926, Final Metric = 0.7345
  New best metric! Saving model...
Epoch 07: Binary F1 = 0.9545, Macro F1 = 0.5166, Final Metric = 0.7356
  New best metric! Saving model...
Epoch 08: Binary F1 = 0.9624, Macro F1 = 0.4747, Final Metric 

Unnamed: 0_level_0,binary_f1,macro_f1,final_metric
fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0.99013,0.657493,0.822315
1,0.967359,0.588457,0.777718
2,0.974914,0.616519,0.793289
3,0.956989,0.545644,0.748883
4,0.973096,0.582159,0.77615


[I 2025-08-15 18:07:56,037] Trial 20 finished with value: 0.7836710463494068 and parameters: {'warmup_epochs': 12, 'cycle_mult': 0.5069541951822095, 'max_lr': 0.010210759650853145, 'max_to_min_div_factor': 275.0, 'init_cycle_epochs': 9, 'lr_cycle_factor': 0.7352946119152342, 'weight_decay': 0.000995576523337893, 'beta_0': 0.8060038610814554, 'beta_1': 0.9991047301538153}. Best is trial 19 with value: 0.7879970418306042.



Global Statistics (Best Metrics):
Mean Best Final Metric: 0.7837 ± 0.0269
Mean Best Binary F1: 0.9725 ± 0.0121
Mean Best Macro F1: 0.5981 ± 0.0417

training: 1
Fold 1/5
train dataset indices: 6623
validation dataset indices: 1528
Epoch 01: Binary F1 = 0.7335, Macro F1 = 0.2114, Final Metric = 0.4724
  New best metric! Saving model...
Epoch 02: Binary F1 = 0.9181, Macro F1 = 0.3374, Final Metric = 0.6277
  New best metric! Saving model...
Epoch 03: Binary F1 = 0.9542, Macro F1 = 0.4104, Final Metric = 0.6823
  New best metric! Saving model...
Epoch 04: Binary F1 = 0.9357, Macro F1 = 0.4190, Final Metric = 0.6773
Epoch 05: Binary F1 = 0.8349, Macro F1 = 0.3527, Final Metric = 0.5938
Epoch 06: Binary F1 = 0.9761, Macro F1 = 0.4311, Final Metric = 0.7036
  New best metric! Saving model...
Epoch 07: Binary F1 = 0.9521, Macro F1 = 0.4353, Final Metric = 0.6937
Epoch 08: Binary F1 = 0.9702, Macro F1 = 0.4621, Final Metric = 0.7162
  New best metric! Saving model...
Epoch 09: Binary F1 = 0.97

Unnamed: 0_level_0,binary_f1,macro_f1,final_metric
fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0.986972,0.674259,0.829526
1,0.971036,0.605768,0.788402
2,0.976285,0.604711,0.790498
3,0.948768,0.555535,0.752152
4,0.975252,0.576891,0.775155


[I 2025-08-15 18:11:43,990] Trial 21 finished with value: 0.787146428231857 and parameters: {'warmup_epochs': 11, 'cycle_mult': 0.6898103066439115, 'max_lr': 0.011800215472423967, 'max_to_min_div_factor': 225.0, 'init_cycle_epochs': 9, 'lr_cycle_factor': 0.8530281419669524, 'weight_decay': 0.000899393300753312, 'beta_0': 0.8232528931963805, 'beta_1': 0.9960729087548634}. Best is trial 19 with value: 0.7879970418306042.



Global Statistics (Best Metrics):
Mean Best Final Metric: 0.7871 ± 0.0282
Mean Best Binary F1: 0.9717 ± 0.0141
Mean Best Macro F1: 0.6034 ± 0.0448

training: 1
Fold 1/5
train dataset indices: 6623
validation dataset indices: 1528
Epoch 01: Binary F1 = 0.8792, Macro F1 = 0.2137, Final Metric = 0.5464
  New best metric! Saving model...
Epoch 02: Binary F1 = 0.9547, Macro F1 = 0.3792, Final Metric = 0.6669
  New best metric! Saving model...
Epoch 03: Binary F1 = 0.9376, Macro F1 = 0.3659, Final Metric = 0.6517
Epoch 04: Binary F1 = 0.9643, Macro F1 = 0.4280, Final Metric = 0.6962
  New best metric! Saving model...
Epoch 05: Binary F1 = 0.9280, Macro F1 = 0.4220, Final Metric = 0.6750
Epoch 06: Binary F1 = 0.9671, Macro F1 = 0.4462, Final Metric = 0.7066
  New best metric! Saving model...
Epoch 07: Binary F1 = 0.9641, Macro F1 = 0.4582, Final Metric = 0.7111
  New best metric! Saving model...
Epoch 08: Binary F1 = 0.9522, Macro F1 = 0.4924, Final Metric = 0.7223
  New best metric! Saving 

Unnamed: 0_level_0,binary_f1,macro_f1,final_metric
fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0.991684,0.661066,0.825841
1,0.969638,0.589019,0.777038
2,0.975296,0.614674,0.793349
3,0.945278,0.547753,0.745107
4,0.973297,0.572807,0.773052


[I 2025-08-15 18:15:31,867] Trial 22 finished with value: 0.7828771246887568 and parameters: {'warmup_epochs': 11, 'cycle_mult': 0.5048169146279287, 'max_lr': 0.008496438804606083, 'max_to_min_div_factor': 175.0, 'init_cycle_epochs': 9, 'lr_cycle_factor': 0.6976622270360068, 'weight_decay': 0.0006457351952531516, 'beta_0': 0.8488883713784278, 'beta_1': 0.9973344573218299}. Best is trial 19 with value: 0.7879970418306042.



Global Statistics (Best Metrics):
Mean Best Final Metric: 0.7829 ± 0.0296
Mean Best Binary F1: 0.9710 ± 0.0167
Mean Best Macro F1: 0.5971 ± 0.0433

training: 1
Fold 1/5
train dataset indices: 6623
validation dataset indices: 1528
Epoch 01: Binary F1 = 0.8851, Macro F1 = 0.2456, Final Metric = 0.5654
  New best metric! Saving model...
Epoch 02: Binary F1 = 0.8889, Macro F1 = 0.3206, Final Metric = 0.6048
  New best metric! Saving model...
Epoch 03: Binary F1 = 0.8172, Macro F1 = 0.3325, Final Metric = 0.5748
Epoch 04: Binary F1 = 0.9393, Macro F1 = 0.4780, Final Metric = 0.7086
  New best metric! Saving model...
Epoch 05: Binary F1 = 0.8773, Macro F1 = 0.4186, Final Metric = 0.6480
Epoch 06: Binary F1 = 0.9660, Macro F1 = 0.4981, Final Metric = 0.7320
  New best metric! Saving model...
Epoch 07: Binary F1 = 0.9524, Macro F1 = 0.4817, Final Metric = 0.7170
Epoch 08: Binary F1 = 0.9556, Macro F1 = 0.4529, Final Metric = 0.7042
Epoch 09: Binary F1 = 0.9781, Macro F1 = 0.4940, Final Metric

Unnamed: 0_level_0,binary_f1,macro_f1,final_metric
fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0.990089,0.656617,0.822045
1,0.962236,0.595077,0.777201
2,0.971571,0.607199,0.789385
3,0.951872,0.542439,0.745059
4,0.97235,0.564634,0.768492


[I 2025-08-15 18:19:19,336] Trial 23 finished with value: 0.7804365022353819 and parameters: {'warmup_epochs': 10, 'cycle_mult': 0.6652144903269536, 'max_lr': 0.010680581237887864, 'max_to_min_div_factor': 225.0, 'init_cycle_epochs': 7, 'lr_cycle_factor': 0.8356334792668797, 'weight_decay': 0.0005577329774151429, 'beta_0': 0.821143514797762, 'beta_1': 0.9954331527640121}. Best is trial 19 with value: 0.7879970418306042.



Global Statistics (Best Metrics):
Mean Best Final Metric: 0.7804 ± 0.0284
Mean Best Binary F1: 0.9696 ± 0.0141
Mean Best Macro F1: 0.5932 ± 0.0436

training: 1
Fold 1/5
train dataset indices: 6623
validation dataset indices: 1528
Epoch 01: Binary F1 = 0.8682, Macro F1 = 0.2399, Final Metric = 0.5541
  New best metric! Saving model...
Epoch 02: Binary F1 = 0.9412, Macro F1 = 0.3382, Final Metric = 0.6397
  New best metric! Saving model...
Epoch 03: Binary F1 = 0.8880, Macro F1 = 0.3230, Final Metric = 0.6055
Epoch 04: Binary F1 = 0.9617, Macro F1 = 0.4076, Final Metric = 0.6847
  New best metric! Saving model...
Epoch 05: Binary F1 = 0.9159, Macro F1 = 0.4216, Final Metric = 0.6687
Epoch 06: Binary F1 = 0.9619, Macro F1 = 0.4935, Final Metric = 0.7277
  New best metric! Saving model...
Epoch 07: Binary F1 = 0.9574, Macro F1 = 0.4331, Final Metric = 0.6952
Epoch 08: Binary F1 = 0.9635, Macro F1 = 0.4603, Final Metric = 0.7119
Epoch 09: Binary F1 = 0.9673, Macro F1 = 0.5158, Final Metric

Unnamed: 0_level_0,binary_f1,macro_f1,final_metric
fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0.987927,0.664566,0.823097
1,0.971456,0.59549,0.780725
2,0.975223,0.623816,0.797722
3,0.951737,0.543859,0.742224
4,0.972553,0.55402,0.760642


[I 2025-08-15 18:23:07,788] Trial 24 finished with value: 0.7808817251387591 and parameters: {'warmup_epochs': 10, 'cycle_mult': 1.0682656065586027, 'max_lr': 0.01362166253725502, 'max_to_min_div_factor': 200.0, 'init_cycle_epochs': 6, 'lr_cycle_factor': 0.5740314996906133, 'weight_decay': 0.0009337061753150423, 'beta_0': 0.8000263624420145, 'beta_1': 0.9938084190631834}. Best is trial 19 with value: 0.7879970418306042.



Global Statistics (Best Metrics):
Mean Best Final Metric: 0.7809 ± 0.0315
Mean Best Binary F1: 0.9718 ± 0.0130
Mean Best Macro F1: 0.5964 ± 0.0499

training: 1
Fold 1/5
train dataset indices: 6623
validation dataset indices: 1528
Epoch 01: Binary F1 = 0.8234, Macro F1 = 0.2095, Final Metric = 0.5164
  New best metric! Saving model...
Epoch 02: Binary F1 = 0.9283, Macro F1 = 0.3326, Final Metric = 0.6305
  New best metric! Saving model...
Epoch 03: Binary F1 = 0.9644, Macro F1 = 0.3734, Final Metric = 0.6689
  New best metric! Saving model...
Epoch 04: Binary F1 = 0.9502, Macro F1 = 0.3639, Final Metric = 0.6571
Epoch 05: Binary F1 = 0.9569, Macro F1 = 0.4476, Final Metric = 0.7023
  New best metric! Saving model...
Epoch 06: Binary F1 = 0.9762, Macro F1 = 0.4650, Final Metric = 0.7206
  New best metric! Saving model...
Epoch 07: Binary F1 = 0.9528, Macro F1 = 0.4490, Final Metric = 0.7009
Epoch 08: Binary F1 = 0.9267, Macro F1 = 0.4729, Final Metric = 0.6998
Epoch 09: Binary F1 = 0.95

Unnamed: 0_level_0,binary_f1,macro_f1,final_metric
fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0.990078,0.656755,0.822619
1,0.967173,0.599607,0.781779
2,0.980798,0.623206,0.801831
3,0.949119,0.565779,0.754755
4,0.974828,0.57333,0.772904


[I 2025-08-15 18:26:58,133] Trial 25 finished with value: 0.7867774457266783 and parameters: {'warmup_epochs': 12, 'cycle_mult': 0.8424498439048208, 'max_lr': 0.006576441807187855, 'max_to_min_div_factor': 275.0, 'init_cycle_epochs': 4, 'lr_cycle_factor': 0.4325644856698367, 'weight_decay': 0.0007162777656892126, 'beta_0': 0.859043932580318, 'beta_1': 0.9981505013601437}. Best is trial 19 with value: 0.7879970418306042.



Global Statistics (Best Metrics):
Mean Best Final Metric: 0.7868 ± 0.0262
Mean Best Binary F1: 0.9724 ± 0.0155
Mean Best Macro F1: 0.6037 ± 0.0373

training: 1
Fold 1/5
train dataset indices: 6623
validation dataset indices: 1528
Epoch 01: Binary F1 = 0.8924, Macro F1 = 0.2245, Final Metric = 0.5585
  New best metric! Saving model...
Epoch 02: Binary F1 = 0.8806, Macro F1 = 0.3153, Final Metric = 0.5979
  New best metric! Saving model...
Epoch 03: Binary F1 = 0.9109, Macro F1 = 0.3503, Final Metric = 0.6306
  New best metric! Saving model...
Epoch 04: Binary F1 = 0.9582, Macro F1 = 0.4213, Final Metric = 0.6898
  New best metric! Saving model...
Epoch 05: Binary F1 = 0.8522, Macro F1 = 0.3873, Final Metric = 0.6197
Epoch 06: Binary F1 = 0.9600, Macro F1 = 0.4429, Final Metric = 0.7014
  New best metric! Saving model...
Epoch 07: Binary F1 = 0.9434, Macro F1 = 0.4717, Final Metric = 0.7076
  New best metric! Saving model...
Epoch 08: Binary F1 = 0.9671, Macro F1 = 0.4699, Final Metric 

Unnamed: 0_level_0,binary_f1,macro_f1,final_metric
fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0.987964,0.647446,0.817168
1,0.965787,0.594404,0.778794
2,0.973214,0.612036,0.792625
3,0.955414,0.554878,0.75366
4,0.973346,0.576553,0.774949


[I 2025-08-15 18:30:43,359] Trial 26 finished with value: 0.7834392442737782 and parameters: {'warmup_epochs': 8, 'cycle_mult': 0.6662910839499138, 'max_lr': 0.009012757164597261, 'max_to_min_div_factor': 200.0, 'init_cycle_epochs': 8, 'lr_cycle_factor': 0.620424138612229, 'weight_decay': 0.0008594274767869819, 'beta_0': 0.8328097659711974, 'beta_1': 0.9962165256263968}. Best is trial 19 with value: 0.7879970418306042.



Global Statistics (Best Metrics):
Mean Best Final Metric: 0.7834 ± 0.0235
Mean Best Binary F1: 0.9711 ± 0.0119
Mean Best Macro F1: 0.5971 ± 0.0352

training: 1
Fold 1/5
train dataset indices: 6623
validation dataset indices: 1528
Epoch 01: Binary F1 = 0.8059, Macro F1 = 0.1996, Final Metric = 0.5027
  New best metric! Saving model...
Epoch 02: Binary F1 = 0.9187, Macro F1 = 0.3221, Final Metric = 0.6204
  New best metric! Saving model...
Epoch 03: Binary F1 = 0.9525, Macro F1 = 0.3623, Final Metric = 0.6574
  New best metric! Saving model...
Epoch 04: Binary F1 = 0.9642, Macro F1 = 0.4227, Final Metric = 0.6935
  New best metric! Saving model...
Epoch 05: Binary F1 = 0.9626, Macro F1 = 0.4320, Final Metric = 0.6973
  New best metric! Saving model...
Epoch 06: Binary F1 = 0.9712, Macro F1 = 0.4789, Final Metric = 0.7250
  New best metric! Saving model...
Epoch 07: Binary F1 = 0.9438, Macro F1 = 0.4671, Final Metric = 0.7054
Epoch 08: Binary F1 = 0.9432, Macro F1 = 0.4420, Final Metric 

Unnamed: 0_level_0,binary_f1,macro_f1,final_metric
fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0.990078,0.67466,0.831836
1,0.966093,0.594789,0.779846
2,0.975394,0.617422,0.796408
3,0.951819,0.558131,0.753506
4,0.975115,0.602359,0.788347


[I 2025-08-15 18:34:32,148] Trial 27 finished with value: 0.7899885046705655 and parameters: {'warmup_epochs': 11, 'cycle_mult': 0.8810668245305839, 'max_lr': 0.003979602517698732, 'max_to_min_div_factor': 250.0, 'init_cycle_epochs': 6, 'lr_cycle_factor': 0.3723735743970316, 'weight_decay': 0.0009904970031138828, 'beta_0': 0.8192696806499521, 'beta_1': 0.997040160907}. Best is trial 27 with value: 0.7899885046705655.



Global Statistics (Best Metrics):
Mean Best Final Metric: 0.7900 ± 0.0284
Mean Best Binary F1: 0.9717 ± 0.0141
Mean Best Macro F1: 0.6095 ± 0.0425

training: 1
Fold 1/5
train dataset indices: 6623
validation dataset indices: 1528
Epoch 01: Binary F1 = 0.8120, Macro F1 = 0.1460, Final Metric = 0.4790
  New best metric! Saving model...
Epoch 02: Binary F1 = 0.8873, Macro F1 = 0.2508, Final Metric = 0.5691
  New best metric! Saving model...
Epoch 03: Binary F1 = 0.9197, Macro F1 = 0.2982, Final Metric = 0.6090
  New best metric! Saving model...
Epoch 04: Binary F1 = 0.9559, Macro F1 = 0.4058, Final Metric = 0.6808
  New best metric! Saving model...
Epoch 05: Binary F1 = 0.9410, Macro F1 = 0.4206, Final Metric = 0.6808
Epoch 06: Binary F1 = 0.9760, Macro F1 = 0.4363, Final Metric = 0.7062
  New best metric! Saving model...
Epoch 07: Binary F1 = 0.9642, Macro F1 = 0.5016, Final Metric = 0.7329
  New best metric! Saving model...
Epoch 08: Binary F1 = 0.9412, Macro F1 = 0.4741, Final Metric 

Unnamed: 0_level_0,binary_f1,macro_f1,final_metric
fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0.992175,0.650543,0.820052
1,0.967423,0.590728,0.777393
2,0.975394,0.620589,0.797967
3,0.950852,0.542837,0.746757
4,0.968392,0.587695,0.778044


[I 2025-08-15 18:38:20,149] Trial 28 finished with value: 0.7840425925971788 and parameters: {'warmup_epochs': 11, 'cycle_mult': 1.044043162588411, 'max_lr': 0.0019061043892177637, 'max_to_min_div_factor': 250.0, 'init_cycle_epochs': 9, 'lr_cycle_factor': 0.3493012021780458, 'weight_decay': 0.0009944262809998378, 'beta_0': 0.8126240775208189, 'beta_1': 0.997258599268832}. Best is trial 27 with value: 0.7899885046705655.



Global Statistics (Best Metrics):
Mean Best Final Metric: 0.7840 ± 0.0272
Mean Best Binary F1: 0.9708 ± 0.0149
Mean Best Macro F1: 0.5985 ± 0.0402

training: 1
Fold 1/5
train dataset indices: 6623
validation dataset indices: 1528
Epoch 01: Binary F1 = 0.7547, Macro F1 = 0.2022, Final Metric = 0.4784
  New best metric! Saving model...
Epoch 02: Binary F1 = 0.8251, Macro F1 = 0.2765, Final Metric = 0.5508
  New best metric! Saving model...
Epoch 03: Binary F1 = 0.9463, Macro F1 = 0.3824, Final Metric = 0.6643
  New best metric! Saving model...
Epoch 04: Binary F1 = 0.9656, Macro F1 = 0.3860, Final Metric = 0.6758
  New best metric! Saving model...
Epoch 05: Binary F1 = 0.9645, Macro F1 = 0.4080, Final Metric = 0.6863
  New best metric! Saving model...
Epoch 06: Binary F1 = 0.9649, Macro F1 = 0.5332, Final Metric = 0.7490
  New best metric! Saving model...
Epoch 07: Binary F1 = 0.9653, Macro F1 = 0.5167, Final Metric = 0.7410
Epoch 08: Binary F1 = 0.9741, Macro F1 = 0.4929, Final Metric 

Unnamed: 0_level_0,binary_f1,macro_f1,final_metric
fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0.989562,0.668113,0.828384
1,0.964775,0.603472,0.783747
2,0.973294,0.616083,0.793642
3,0.950611,0.536205,0.742173
4,0.973346,0.586184,0.779056


[I 2025-08-15 18:42:09,241] Trial 29 finished with value: 0.785400468709808 and parameters: {'warmup_epochs': 11, 'cycle_mult': 1.4867498747524368, 'max_lr': 0.0036806844625005726, 'max_to_min_div_factor': 275.0, 'init_cycle_epochs': 8, 'lr_cycle_factor': 0.9992912681786859, 'weight_decay': 0.0009553005818870526, 'beta_0': 0.9262602706140088, 'beta_1': 0.9980864938027497}. Best is trial 27 with value: 0.7899885046705655.



Global Statistics (Best Metrics):
Mean Best Final Metric: 0.7854 ± 0.0309
Mean Best Binary F1: 0.9703 ± 0.0142
Mean Best Macro F1: 0.6020 ± 0.0478

training: 1
Fold 1/5
train dataset indices: 6623
validation dataset indices: 1528
Epoch 01: Binary F1 = 0.7098, Macro F1 = 0.1785, Final Metric = 0.4441
  New best metric! Saving model...
Epoch 02: Binary F1 = 0.9005, Macro F1 = 0.2891, Final Metric = 0.5948
  New best metric! Saving model...
Epoch 03: Binary F1 = 0.9500, Macro F1 = 0.3625, Final Metric = 0.6562
  New best metric! Saving model...
Epoch 04: Binary F1 = 0.9524, Macro F1 = 0.3955, Final Metric = 0.6740
  New best metric! Saving model...
Epoch 05: Binary F1 = 0.9575, Macro F1 = 0.4432, Final Metric = 0.7004
  New best metric! Saving model...
Epoch 06: Binary F1 = 0.9712, Macro F1 = 0.5059, Final Metric = 0.7385
  New best metric! Saving model...
Epoch 07: Binary F1 = 0.9640, Macro F1 = 0.4797, Final Metric = 0.7219
Epoch 08: Binary F1 = 0.9664, Macro F1 = 0.4304, Final Metric 

Unnamed: 0_level_0,binary_f1,macro_f1,final_metric
fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0.992183,0.664894,0.827497
1,0.971234,0.600927,0.784364
2,0.975992,0.619919,0.796167
3,0.950195,0.554251,0.750427
4,0.97619,0.571656,0.773815


[I 2025-08-15 18:45:58,640] Trial 30 finished with value: 0.7864539682804554 and parameters: {'warmup_epochs': 12, 'cycle_mult': 1.192553441571035, 'max_lr': 0.00362201643457426, 'max_to_min_div_factor': 250.0, 'init_cycle_epochs': 7, 'lr_cycle_factor': 0.8020570637409613, 'weight_decay': 0.0009661848286926198, 'beta_0': 0.892218544839655, 'beta_1': 0.990068393880571}. Best is trial 27 with value: 0.7899885046705655.



Global Statistics (Best Metrics):
Mean Best Final Metric: 0.7865 ± 0.0285
Mean Best Binary F1: 0.9732 ± 0.0151
Mean Best Macro F1: 0.6023 ± 0.0432

training: 1
Fold 1/5
train dataset indices: 6623
validation dataset indices: 1528
Epoch 01: Binary F1 = 0.8233, Macro F1 = 0.1782, Final Metric = 0.5008
  New best metric! Saving model...
Epoch 02: Binary F1 = 0.9269, Macro F1 = 0.3191, Final Metric = 0.6230
  New best metric! Saving model...
Epoch 03: Binary F1 = 0.9565, Macro F1 = 0.3851, Final Metric = 0.6708
  New best metric! Saving model...
Epoch 04: Binary F1 = 0.9654, Macro F1 = 0.4359, Final Metric = 0.7006
  New best metric! Saving model...
Epoch 05: Binary F1 = 0.9329, Macro F1 = 0.4163, Final Metric = 0.6746
Epoch 06: Binary F1 = 0.9717, Macro F1 = 0.4518, Final Metric = 0.7118
  New best metric! Saving model...
Epoch 07: Binary F1 = 0.9574, Macro F1 = 0.5175, Final Metric = 0.7375
  New best metric! Saving model...
Epoch 08: Binary F1 = 0.9494, Macro F1 = 0.4658, Final Metric 

Unnamed: 0_level_0,binary_f1,macro_f1,final_metric
fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0.991076,0.669766,0.8294
1,0.966469,0.600244,0.783019
2,0.980873,0.623902,0.801344
3,0.948199,0.557696,0.752948
4,0.973321,0.580699,0.77701


[I 2025-08-15 18:49:43,426] Trial 31 finished with value: 0.7887442069143467 and parameters: {'warmup_epochs': 10, 'cycle_mult': 0.875391516467918, 'max_lr': 0.004647759090091744, 'max_to_min_div_factor': 225.0, 'init_cycle_epochs': 6, 'lr_cycle_factor': 0.38359988178821347, 'weight_decay': 0.0006451967103574715, 'beta_0': 0.8403629586495359, 'beta_1': 0.9967460461755895}. Best is trial 27 with value: 0.7899885046705655.



Global Statistics (Best Metrics):
Mean Best Final Metric: 0.7887 ± 0.0286
Mean Best Binary F1: 0.9720 ± 0.0161
Mean Best Macro F1: 0.6065 ± 0.0430
Study statistics: 
  Number of finished trials:  32
  Number of pruned trials:  0
  Number of complete trials:  32
Best trial:
  Value:  0.7899885046705655
  Params: 
    warmup_epochs: 11
    cycle_mult: 0.8810668245305839
    max_lr: 0.003979602517698732
    max_to_min_div_factor: 250.0
    init_cycle_epochs: 6
    lr_cycle_factor: 0.3723735743970316
    weight_decay: 0.0009904970031138828
    beta_0: 0.8192696806499521
    beta_1: 0.997040160907


## Submission

### Reloading best models

In [None]:
model_ensemble = []
for fold in range(NB_CROSS_VALIDATIONS):
    model = mk_model(n_aux_classes=meta_data["n_aux_classes"])
    checkpoint = torch.load(f"best_model_fold{fold}.pth", map_location=device, weights_only=True)
    model.load_state_dict(checkpoint)
    model.eval()
    model_ensemble.append(model)

### Define prediction function

In [25]:
def preprocess_sequence_at_inference(sequence_df:pl.DataFrame) -> ndarray:
    return (
        sequence_df                     
        .to_pandas()                            # Convert to pandas dataframe.
        .pipe(imputed_features)                 # Impute missing data.
        .pipe(standardize_tof_cols_names)
        .pipe(norm_quat_rotations)              # Norm quaternions
        .pipe(add_linear_acc_cols)              # Add gravity free acceleration.
        .pipe(add_acc_magnitude, RAW_ACCELRATION_COLS, "acc_mag")
        .pipe(add_acc_magnitude, LINEAR_ACC_COLS, "linear_acc_mag")
        .pipe(add_quat_angle_mag)
        .pipe(add_angular_velocity_features)
        .pipe(rot_euler_angles)                 # Add rotation acc expressed as euler angles.
        .pipe(agg_tof_cols_per_sensor)          # Aggregate ToF columns.
        .pipe(add_diff_features)                # 
        .loc[:, sorted(meta_data["feature_cols"])]      # Retain only the usefull columns a.k.a features.
        # .sub(meta_data["mean"])                 # Subtract features by their mean, std norm pt.1.
        # .div(meta_data["std"])                  # Divide by Standard deviation, std norm pt.2.
        .pipe(length_normed_sequence_feat_arr, meta_data["pad_seq_len"], SEQ_PAD_TRUNC_MODE)  # get feature ndarray of sequence.
        .T                                      # Transpose to swap channel and X dimensions.
    )

def predict(sequence: pl.DataFrame, _: pl.DataFrame) -> str:
    """
    Kaggle evaluation API will call this for each sequence.
    sequence: polars DataFrame for a single sequence
    demographics: unused in this model
    Returns: predicted gesture string
    """
    x_tensor = (
        torch.unsqueeze(Tensor(preprocess_sequence_at_inference(sequence)), dim=0)
        .float()
        .to(device)
    )
    print(x_tensor.shape)

    all_outputs = []
    with torch.no_grad():
        for model_idx, model in enumerate(model_ensemble): # Only take the first one bc it's the only one that takes in the correct input shape
            outputs, _ = model(x_tensor)
            all_outputs.append(outputs)

    avg_outputs = torch.mean(torch.stack(all_outputs), dim=0)
    pred_idx = torch.argmax(avg_outputs, dim=1).item()

    return str(TARGET_NAMES[pred_idx])

### Run inference server

In [26]:
inference_server = kaggle_evaluation.cmi_inference_server.CMIInferenceServer(predict)

if os.getenv('KAGGLE_IS_COMPETITION_RERUN'):
    inference_server.serve()
else:
    competition_dataset_path = competition_download(COMPETITION_HANDLE)
    inference_server.run_local_gateway(
        data_paths=(
            join(competition_dataset_path, 'test.csv'),
            join(competition_dataset_path, 'test_demographics.csv'),
        )
    )
    inference_server = kaggle_evaluation.cmi_inference_server.CMIInferenceServer(predict)

                This exceeds the startup time limit of 900 seconds that the gateway will enforce
                during the rerun on the hidden test set. Start the server before performing any time consuming steps.


  0%|          | 0/5 [00:00<?, ?it/s]

torch.Size([1, 946, 127])


  0%|          | 0/5 [00:00<?, ?it/s]

torch.Size([1, 946, 127])
