# Training & inference notebook
Credit to [Tarun Mishra](https://www.kaggle.com/tarundirector) – this code is heavily based on his [notebook](https://www.kaggle.com/code/tarundirector/sensor-pulse-viz-eda-for-bfrb-detection?scriptVersionId=243465321).

## Setup

### imports

In [11]:
import re
import os
import gc
import json 
import math
import shutil
import random
import warnings
from glob import glob
from os.path import join
from functools import partial
from datetime import datetime
from tqdm.notebook import tqdm
from collections import Counter
from operator import methodcaller
from os.path import join, realpath
from typing import Optional, Literal
from typing import Optional, Literal, Iterator
from itertools import pairwise, starmap, product

import torch
import optuna
import numpy as np
import pandas as pd
import polars as pl
from numpy import ndarray
from torch import nn, Tensor
from numpy.linalg import norm
import torch.nn.functional as F
from torch.optim import Optimizer
from pandas import DataFrame as DF
from optuna.trial import TrialState
from sklearn.metrics import f1_score
from kagglehub import competition_download
from torch.utils.data import TensorDataset
from scipy.spatial.transform import Rotation
import kaggle_evaluation.cmi_inference_server
from torch.utils.data import DataLoader as DL
from sklearn.model_selection import GroupKFold
from rich.progress import Progress, Task, track
from sklearn.model_selection import train_test_split
from numpy.lib.stride_tricks import sliding_window_view
from torch.utils.data import Dataset, DataLoader, Subset
from sklearn.model_selection import StratifiedGroupKFold
from sklearn.utils.class_weight import compute_class_weight
from torch.optim.lr_scheduler import ConstantLR, LRScheduler, _LRScheduler

### Configs

In [47]:
# Dataset
COMPETITION_HANDLE = "cmi-detect-behavior-with-sensor-data"
TARGET_NAMES = sorted([
    "Above ear - pull hair",
    "Cheek - pinch skin",
    "Eyebrow - pull hair",
    "Eyelash - pull hair",
    "Feel around in tray and pull out an object",
    "Forehead - pull hairline",
    "Forehead - scratch",
    "Neck - pinch skin",
    "Neck - scratch",
    "Text on phone",
    "Wave hello",
    "Write name in air",
    "Write name on leg",
    "Drink from bottle/cup",
    "Pinch knee/leg skin",
    "Pull air toward your face",
    "Scratch knee/leg skin",
    "Glasses on/off"
])
BFRB_GESTURES = [
    'Above ear - pull hair',
    'Forehead - pull hairline',
    'Forehead - scratch',
    'Eyebrow - pull hair',
    'Eyelash - pull hair',
    'Neck - pinch skin',
    'Neck - scratch',
    'Cheek - pinch skin'
]
BFRB_INDICES = [idx for idx, gesture in enumerate(TARGET_NAMES) if gesture in BFRB_GESTURES]
IMU_FEATS_PREFIXES = (
    "acc",
    "linear_acc",
    "rot",
    "angular",
    "euler",
    "quat_rot_mag",
    "delta_rot_mag",
)
QUATERNION_COLS = ['rot_w', 'rot_x', 'rot_y', 'rot_z']
GRAVITY_WORLD = np.array([0, 0, 9.81], "float32")
RAW_ACCELRATION_COLS = ["acc_x", "acc_y", "acc_z"]
LINEAR_ACC_COLS = ["linear_" + col for col in RAW_ACCELRATION_COLS] # Acceleration without gravity
COMPETITION_HANDLE = "cmi-detect-behavior-with-sensor-data"
CATEGORY_COLUMNS = [
    'row_id',
    'sequence_type',
    'sequence_id',
    'subject',
    'orientation',
    'behavior',
    'phase',
    'gesture',
]
META_DATA_COLUMNS = [
    'row_id',
    'sequence_type',
    'sequence_id',
    'sequence_counter',
    'subject',
    'orientation',
    'behavior',
    'phase',
    'gesture',
]
DATASET_DF_DTYPES = {
    "acc_x": "float32", "acc_y": "float32", "acc_z": "float32",
    "thm_1":"float32", "thm_2":"float32", "thm_3":"float32", "thm_4":"float32", "thm_5":"float32",
    "sequence_counter": "int32",
    **{col: "category" for col in CATEGORY_COLUMNS},
    **{f"tof_{i_1}_v{i_2}": "float32" for i_1, i_2 in product(range(1, 5), range(64))},
}
PREPROCESSED_DATASET_HANDLE = "mauroabidalcarrer/prepocessed-cmi-2025"
# The quantile of the sequences len used to pad/truncate during preprocessing
SEQUENCE_NORMED_LEN_QUANTILE = 0.95
# SAMPLING_FREQUENCY = 10 #Hz
VALIDATION_FRACTION = 0.2
EPSILON=1e-8
DELTA_ROTATION_ANGULAR_VELOCITY_COLS = ["angular_vel_x", "angular_vel_y", "angular_vel_z"]
DELTA_ROTATION_AXES_COLS = ["rotation_axis_x", "rotation_axis_y", "rotation_axis_z"]
EULER_ANGLES_COLS = ["euler_x", "euler_y", "euler_z"]
pad_trunc_mode_type = Literal["pre", "center", "post"]
SEQ_PAD_TRUNC_MODE: pad_trunc_mode_type = "center"
DEFAULT_VERSION_NOTES = "Preprocessed Child Mind Institue 2025 competition preprocessed dataset."
NB_COLS_PER_TOF_SENSOR = 64
TOF_PATCH_SIZE = 2
assert ((NB_COLS_PER_TOF_SENSOR // 2) % TOF_PATCH_SIZE) == 0, "tof side len should be dividable by TOF_PATCH_SIZE!"
TOF_AGG_FUNCTIONS = [
    "mean",
    "std",
    "median",
    "min",
    "max",
]
# Data augmentation
JITTER = 0.25
SCALING = 0.2
MIXUP = 0.3
# Training loop
NB_CROSS_VALIDATIONS = 10
TRAIN_BATCH_SIZE = 256
VALIDATION_BATCH_SIZE = 4 * TRAIN_BATCH_SIZE
PATIENCE = 8
# Optimizer
WEIGHT_DECAY = 3e-3
# Scheduler
TRAINING_EPOCHS = 35 # Including warmup epochs
WARMUP_EPOCHS = 3
WARMUP_LR_INIT = 1.822126131809773e-05
MAX_TO_MIN_LR_DIV_FACTOR = 100
LR_CYCLE_FACTOR = 0.5
CYCLE_LENGTH_FACTOR = 0.9
INIT_CYCLE_EPOCHS = 6
# Mock training loop
MOCK_TRAINING_EPOCHS = 20
MOCK_TRAINING_GAMMA = 1.01
CHANNELS_DIMENSION = 1
SEED = 42
FOLDS_VAL_SCORE_ORDER = [
    4,
    7,
    1,
    9,
    6,
    2,
    3,
    8,
    0,
    5,
]
# model
MODEL_NAME = "cmi-model"
MODEL_VARIATION = "single_model_architecture"

### Seed everything

In [13]:
def seed_everything(seed=42):
    """Set all random seeds for reproducibility"""
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    os.environ['PYTHONHASHSEED'] = str(seed)
    os.environ['CUBLAS_WORKSPACE_CONFIG'] = ':4096:8'
    torch.use_deterministic_algorithms(True, warn_only=True)
seed_everything(seed=SEED)

### Supress performance warngings

In [14]:
warnings.filterwarnings(
    "ignore",
    message=(
        "DataFrame is highly fragmented.  This is usually the result of "
        "calling `frame.insert` many times.*"
    ),
    category=pd.errors.PerformanceWarning,
)

### device setup

In [15]:
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
device

device(type='cuda')

## Dataset

### Preprocessing

In [16]:
def get_feature_cols(df:DF) -> list[str]:
    return sorted(list(set(df.columns) - set(META_DATA_COLUMNS) - set(TARGET_NAMES)))

# Missing ToF values are already imputed by -1 which is inconvinient since we want all missing values to be NaN.    
# So we replace them by NaN and then perform imputing.
def get_fillna_val_per_feature_col(df:DF) -> dict:
    return {col: 1.0 if col == 'rot_w' else 0 for col in get_feature_cols(df)}

def imputed_features(df:DF) -> DF:
    # Missing ToF values are already imputed by -1 which is inconvinient since we want all missing values to be NaN.    
    # So we replace them by NaN and then perform imputing.  
    tof_vals_to_nan = {col: -1.0 for col in df.columns if col.startswith("tof")}
    # fillna_val_per_col = {col: 1.0 if col == 'rot_w' else 0 for col in df.columns}

    df[get_feature_cols(df)] = (
        df
        .loc[:, get_feature_cols(df)]
        # df.replace with np.nan sets dtype to floar64 so we set it back to float32
        .replace(tof_vals_to_nan, value=np.nan)
        .astype("float32")
        .groupby(df["sequence_id"], observed=True, as_index=False)
        .ffill()
        .groupby(df["sequence_id"], observed=True, as_index=False)
        .bfill()
        # In case there are only nan in the column in the sequence
        .fillna(get_fillna_val_per_feature_col(df))
    )
    return df

def standardize_tof_cols_names(df: DF) -> DF:
    renamed_cols = {}
    pattern = re.compile(r"^(tof_\d_v)(\d)$")  # match 'tof_X_vY' where Y is a single digit

    for col in df.columns:
        match = pattern.match(col)
        if match:
            prefix, version = match.groups()
            new_col = f"{prefix}0{version}"
            renamed_cols[col] = new_col

    return df.rename(columns=renamed_cols)

def norm_quat_rotations(df:DF) -> DF:
    df[QUATERNION_COLS] /= np.linalg.norm(df[QUATERNION_COLS], axis=1, keepdims=True)
    return df

def add_linear_acc_cols(df:DF) -> DF:
    # Vectorized version of https://www.kaggle.com/code/wasupandceacar/lb-0-82-5fold-single-bert-model#Dataset `remove_gravity_from_acc`
    rotations:Rotation = Rotation.from_quat(df[QUATERNION_COLS])
    gravity_sensor_frame = rotations.apply(GRAVITY_WORLD, inverse=True).astype("float32")
    df[LINEAR_ACC_COLS] = df[RAW_ACCELRATION_COLS] - gravity_sensor_frame
    return df

def add_acc_magnitude(df:DF, acc_cols:list[str], acc_mag_col_name:str) -> DF:
    return df.assign(**{acc_mag_col_name: np.linalg.norm(df.loc[:, acc_cols], axis=1)})

def add_quat_angle_mag(df:DF) -> DF:
    return df.assign(quat_rot_mag=np.arccos(df["rot_w"]) * 2)

def add_angular_velocity_features(df:DF) -> DF:
    rotations = Rotation.from_quat(df[QUATERNION_COLS])
    delta_rotations = rotations[1:] * rotations[:-1].inv()
    delta_rot_velocity = delta_rotations.as_rotvec()
    # Add extra line to avoid shape mismatch
    delta_rot_velocity = np.vstack((np.zeros((1, 3)), delta_rot_velocity))
    delta_rot_magnitude = norm(delta_rot_velocity, axis=1, keepdims=True)
    delta_rot_axes = delta_rot_velocity / (delta_rot_magnitude + EPSILON)
    df[DELTA_ROTATION_ANGULAR_VELOCITY_COLS] = delta_rot_velocity
    df[DELTA_ROTATION_AXES_COLS] = delta_rot_axes
    df["delta_rot_mag"] = delta_rot_magnitude.squeeze()

    return df

def rot_euler_angles(df:DF) -> ndarray:
    df[EULER_ANGLES_COLS] = (
        Rotation
        .from_quat(df[QUATERNION_COLS])
        .as_euler("xyz")
        .squeeze()
    )
    return df

def agg_tof_patch(tof_views:np.ndarray, f_name:str) -> ndarray:
    views_agg_func = methodcaller(f_name, tof_views, axis=(1, 2))
    return (
        views_agg_func(np)
        .reshape(tof_views.shape[0], -1)
    )

def agg_tof_cols_per_sensor(df:DF) -> DF:
    """
    ## Description:
    Computes the sensor and patch sensor wise stats.
    ## Resturns:
    The dataframe with the added stats.
    """
    for tof_idx in tqdm(range(1, 6)):
        tof_name = f"tof_{tof_idx}"
        all_tof_cols = [f"{tof_name}_v{v_idx:02d}" for v_idx in range(64)]
        tof_feats = (
            df
            .loc[:, all_tof_cols]
            .values
            .reshape(-1, 8, 8)
        )
        agg_func = partial(df[all_tof_cols].agg, axis="columns")
        mk_fe_col_name = lambda f_name: tof_name + "_" + f_name
        engineered_feats = DF({mk_fe_col_name(f_name): agg_func(f_name) for f_name in TOF_AGG_FUNCTIONS})
        stats_cols_names = list(map(mk_fe_col_name, TOF_AGG_FUNCTIONS))
        # Patch Feature engineering
        tof_views:np.ndarray = sliding_window_view(tof_feats, (TOF_PATCH_SIZE, TOF_PATCH_SIZE), (1, 2))
        patch_fe = {}
        for f_name in TOF_AGG_FUNCTIONS:
            tof_patch_stats = agg_tof_patch(tof_views, f_name)
            for patch_idx in range(tof_patch_stats.shape[1]):
                key = mk_fe_col_name(f_name) + f"_{patch_idx:02d}"
                patch_fe[key] = tof_patch_stats[:, patch_idx]
        patch_df = DF(patch_fe)
        # concat results
        df = pd.concat(
            (
                df.drop(columns=filter(df.columns.__contains__, stats_cols_names)),
                engineered_feats,
                patch_df,
            ),
            axis="columns",
        )
    return df

def add_diff_features(df:DF) -> DF:
    return pd.concat(
        (
            df,
            (
                df
                .groupby("sequence_id", as_index=False, observed=True)
                [get_feature_cols(df)]
                .diff()
                .fillna(get_fillna_val_per_feature_col(df))
                .add_suffix("_diff")
            )
        ),
        axis="columns",
    )

def one_hot_encode_targets(df:DF) -> DF:
    one_hot_target = pd.get_dummies(df["gesture"])
    df[TARGET_NAMES] = one_hot_target[TARGET_NAMES]
    return df

def length_normed_sequence_feat_arr(
        sequence: DF,
        normed_sequence_len: int,
        SEQ_PAD_TRUNC_MODE:Literal["pre", "center", "post"]
    ) -> ndarray:
    features = (
        sequence
        .loc[:, get_feature_cols(sequence)]
        .values
    )
    len_diff = abs(normed_sequence_len - len(features))
    len_diff_h = len_diff // 2 # half len diff
    len_diff_r = len_diff % 2 # len diff remainder
    if len(features) < normed_sequence_len:
        padding_dict = {
            "pre": (len_diff, 0),
            "center": (len_diff_h + len_diff_r, len_diff_h),
            "post": (0, len_diff),
        }
        padded_features = np.pad(
            features,
            (padding_dict[SEQ_PAD_TRUNC_MODE], (0, 0)),
        )
        return padded_features
    elif len(features) > normed_sequence_len:
        truncating_dict = {
            "pre": slice(len_diff),
            "center": slice(len_diff_h, -len_diff_h),
            "post": slice(0, -len_diff),
        }
        return features[len_diff // 2:-len_diff // 2]
    else:
        return features

def df_to_ndarrays(df:DF, normed_sequence_len:int, seq_pad_trunc_mode:str) -> tuple[np.ndarray, np.ndarray]:
    sequence_it = df.groupby("sequence_id", observed=True, as_index=False)
    x = np.empty(
        shape=(len(sequence_it), normed_sequence_len, len(get_feature_cols(df))),
        dtype="float32"
    )
    y = np.empty(
        shape=(len(sequence_it), len(TARGET_NAMES)),
        dtype="float32"
    )
    for sequence_idx, (_, sequence) in tqdm(enumerate(sequence_it), total=len(sequence_it)):
        normed_seq_feat_arr = length_normed_sequence_feat_arr(sequence, normed_sequence_len, seq_pad_trunc_mode)
        x[sequence_idx] = normed_seq_feat_arr
        # Take the first value as they are(or at least should be) all the same in a single sequence
        y[sequence_idx] = sequence[TARGET_NAMES].iloc[0].values

    return x, y

def get_normed_seq_len(dataset:DF) -> int:
    return int(
        dataset
        .groupby("sequence_id", observed=True)
        .size()
        .quantile(SEQUENCE_NORMED_LEN_QUANTILE)
    )

def fold_dfs_to_ndarrays(train:DF, validation:DF, dataset_normed_seq_len:int, seq_pad_trunc_mode:str) -> tuple[ndarray, ndarray, ndarray, ndarray]:
    """
    Returns:
        (train X, train Y, validation X, validation Y)
    """
    # full_dataset_normed_seq_len = get_normed_seq_len(df)
    return (
        *df_to_ndarrays(train, dataset_normed_seq_len, seq_pad_trunc_mode),
        *df_to_ndarrays(validation, dataset_normed_seq_len, seq_pad_trunc_mode),
    )

In [17]:
def preprocess_competitino_dataset() -> DF:
    csv_path = competition_download(COMPETITION_HANDLE, path="train.csv")
    return (
        pd.read_csv(csv_path, dtype=DATASET_DF_DTYPES)
        .pipe(imputed_features)
        .pipe(standardize_tof_cols_names)
        .pipe(norm_quat_rotations)
        .pipe(add_linear_acc_cols)
        .pipe(add_acc_magnitude, RAW_ACCELRATION_COLS, "acc_mag")
        .pipe(add_acc_magnitude, LINEAR_ACC_COLS, "linear_acc_mag")
        .pipe(add_quat_angle_mag)
        .pipe(add_angular_velocity_features)
        .pipe(rot_euler_angles)
        .pipe(add_quat_angle_mag)
        .pipe(one_hot_encode_targets)
        .pipe(agg_tof_cols_per_sensor)
        .pipe(add_diff_features)
    )

def save_sequence_meta_data(df:DF) -> DF:
    seq_meta_data = (
        df
        .groupby("sequence_id", as_index=False, observed=True)
        [META_DATA_COLUMNS]
        .last()
    )
    seq_meta_data.to_parquet("preprocessed_dataset/sequences_meta_data.parquet")
    np.save(
        "preprocessed_dataset/auxialiary_Y.npy",
        pd.get_dummies(seq_meta_data["orientation"]).values,
    )

def save_df_meta_data(df:DF):
    full_dataset_meta_data = {
        "mean": df[get_feature_cols(df)].mean().astype("float32").to_dict(),
        "std": df[get_feature_cols(df)].std().astype("float32").to_dict(),
        "pad_seq_len": get_normed_seq_len(df),
        "feature_cols": get_feature_cols(df),
        "n_aux_classes": df["orientation"].nunique(),
    }
    with open("preprocessed_dataset/full_dataset_meta_data.json", "w") as fp:
        json.dump(full_dataset_meta_data, fp, indent=4)
    
def create_preprocessed_dataset():
    shutil.rmtree("preprocessed_dataset", ignore_errors=True)
    os.makedirs("preprocessed_dataset")
    df = preprocess_competitino_dataset()
    full_dataset_sequence_length_norm = get_normed_seq_len(df)
    full_x, full_y = df_to_ndarrays(df, full_dataset_sequence_length_norm, SEQ_PAD_TRUNC_MODE)
    np.save(join("preprocessed_dataset", "X.npy"), full_x, allow_pickle=False)
    np.save(join("preprocessed_dataset", "Y.npy"), full_y, allow_pickle=False)
    # Save meta data
    save_sequence_meta_data(df)
    save_df_meta_data(df)

In [18]:
create_preprocessed_dataset()

Downloading from https://www.kaggle.com/api/v1/competitions/data/download/cmi-detect-behavior-with-sensor-data/train.csv...


100%|██████████| 1.04G/1.04G [00:30<00:00, 37.1MB/s]


  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/8151 [00:00<?, ?it/s]

### Dataset class

In [19]:
class CMIDataset(TensorDataset):
    def __init__(self):
        x = np.load(join("preprocessed_dataset", "X.npy")).swapaxes(1, 2)
        y = np.load(join("preprocessed_dataset", "Y.npy"))
        auxiliary_y = np.load(join("preprocessed_dataset", "auxialiary_Y.npy"))
        super().__init__(
            torch.from_numpy(x).to(device),
            torch.from_numpy(y).to(device),
            torch.from_numpy(auxiliary_y).to(device),
        )

#### Meta data loading

In [20]:
meta_data_path = join(
    "preprocessed_dataset",
    "full_dataset_meta_data.json"
)
with open(meta_data_path, "r") as fp:
    meta_data = json.load(fp)
# Convert target names into a ndarray to index it batchwise.
def get_sensor_indices(sensor_prefix: str) -> list[int]:
    is_sensor_feat = methodcaller("startswith", sensor_prefix)
    return [feat_idx for feat_idx, feat in enumerate(meta_data["feature_cols"]) if is_sensor_feat(feat)]

tof_idx = get_sensor_indices("tof")
thm_idx = get_sensor_indices("thm")
imu_idx = list(filter(lambda idx: idx not in tof_idx + thm_idx, range(len(meta_data["feature_cols"]))))

## Model definition

In [39]:
class MultiScaleConvs(nn.Module):
    def __init__(self, in_channels:int, kernel_sizes:list[int]):
        super().__init__()
        def mk_conv_block(k_size) -> nn.Sequential:
            return nn.Sequential(
                nn.Conv1d(in_channels, in_channels, k_size, padding=k_size // 2, groups=in_channels),
                nn.BatchNorm1d(in_channels),
                nn.ReLU(),
            )
        self.convs = nn.ModuleList(map(mk_conv_block, kernel_sizes))

    def forward(self, x:Tensor) -> Tensor:
        yes = torch.cat([conv(x) for conv in self.convs] + [x], dim=1)
        # print("stem output shape:", yes.shape)
        return yes

class ImuFeatureExtractor(nn.Module):
    def __init__(self, in_channels:int, kernel_size:int=15):
        super().__init__()

        self.lpf = nn.Conv1d(
            in_channels,
            in_channels,
            kernel_size=kernel_size,
            padding=kernel_size//2,
            groups=in_channels,
            bias=False,
        )
        nn.init.kaiming_uniform_(self.lpf.weight, a=math.sqrt(5))

    def forward(self, x:Tensor) -> Tensor:
        lpf_output = self.lpf(x)
        hpf_output = x - lpf_output
        return torch.cat((lpf_output, hpf_output, x), dim=1)  # (B, C_out, T)

class SqueezeExcitationBlock(nn.Module):
    # Copy/paste of https://www.kaggle.com/code/wasupandceacar/lb-0-82-5fold-single-bert-model#Model implementation
    def __init__(self, channels:int, reduction:int=8):
        super().__init__()
        self.fc1 = nn.Linear(channels, channels // reduction, bias=True)
        self.fc2 = nn.Linear(channels // reduction, channels, bias=True)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        # x: (B, C, L)
        se = F.adaptive_avg_pool1d(x, 1).squeeze(-1)      # -> (B, C)
        se = F.relu(self.fc1(se), inplace=True)          # -> (B, C//r)
        se = self.sigmoid(self.fc2(se)).unsqueeze(-1)    # -> (B, C, 1)
        return x * se

class ResidualBlock(nn.Module):
    def __init__(self, in_chns:int, out_chns:int, dropout_ratio:float=0.3, se_reduction:int=8, kernel_size:int=3):
        super().__init__()
        self.blocks = nn.Sequential(
            nn.Conv1d(in_chns, out_chns, kernel_size=kernel_size, padding=kernel_size // 2, bias=False),
            nn.BatchNorm1d(out_chns),
            nn.ReLU(),
            nn.Conv1d(out_chns, out_chns, kernel_size=kernel_size, padding=kernel_size // 2, bias=False),
            nn.BatchNorm1d(out_chns),
            SqueezeExcitationBlock(out_chns, se_reduction),
        )
        self.head = nn.Sequential(nn.ReLU(), nn.Dropout(dropout_ratio))
        if in_chns == out_chns:
            self.skip_connection = nn.Identity() 
        else:
            # TODO: set bias to False ?
            self.skip_connection = nn.Sequential(
                nn.Conv1d(in_chns, out_chns, 1, bias=False),
                nn.BatchNorm1d(out_chns)
            )
            self.head.insert(1, nn.MaxPool1d(2))

    def forward(self, x:Tensor) -> Tensor:
        activaition_maps = self.skip_connection(x) + self.blocks(x)
        return self.head(activaition_maps)

class MBConvBlock(nn.Module):
    # From this schema: https://media.licdn.com/dms/image/v2/D5612AQFjbDOm5uyxdw/article-inline_image-shrink_1500_2232/article-inline_image-shrink_1500_2232/0/1683677500817?e=1758153600&v=beta&t=n48_UW5TZTyDPhRFlJXSidUQQPQpuC756M0kNeKmYTY
    def __init__(self, in_chns:int, out_chns:int, se_reduction:int=8, expansion_ratio:int=4, dropout_ratio:float=0.3):
        super().__init__()
        expanded_channels = in_chns * expansion_ratio
        self.blocks = nn.Sequential(
            nn.Conv1d(in_chns, expanded_channels, kernel_size=1, bias=False),
            nn.BatchNorm1d(expanded_channels),
            nn.ReLU(),
            nn.Conv1d(
                expanded_channels,
                expanded_channels,
                kernel_size=3,
                padding=1,
                groups=expanded_channels,
                bias=False,
            ),
            nn.BatchNorm1d(expanded_channels),
            nn.ReLU(),
            SqueezeExcitationBlock(expanded_channels, se_reduction),
            nn.Conv1d(expanded_channels, out_chns, kernel_size=1, bias=False)
        )
        self.head = nn.Sequential(
            nn.BatchNorm1d(out_chns)
            # nn.ReLU(),
            # nn.Dropout(dropout_ratio),
        )
        if in_chns == out_chns:
            self.skip_connection = nn.Identity() 
        else:
            # TODO: set bias to False ?
            self.skip_connection = nn.Sequential(
                nn.Conv1d(in_chns, out_chns, 1, bias=False),
                nn.BatchNorm1d(out_chns)
            )
            self.head.add_module("max_pool", nn.MaxPool1d(2))
            
    def forward(self, x:Tensor) -> Tensor:
        activaition_maps = self.skip_connection(x) + self.blocks(x)
        return self.head(activaition_maps)

class AdditiveAttentionLayer(nn.Module):
    # Copied (and slightly modified) from https://www.kaggle.com/code/myso1987/cmi3-pyroch-baseline-model-add-aug-folds
    def __init__(self, hidden_dim):
        super().__init__()
        self.attention = nn.Linear(hidden_dim, 1, bias=True)

    def forward(self, x: Tensor) -> Tensor:
        # x shape: (batch, channels, seq_len)
        x = x.swapaxes(1, 2)
        # x shape: (batch, seq_len, hidden_dim)
        scores = torch.tanh(self.attention(x))  # (batch, seq_len, 1)
        weights = F.softmax(scores.squeeze(-1), dim=1)  # (batch, seq_len)
        context = torch.sum(x * weights.unsqueeze(-1), dim=1)  # (batch, hidden_dim)
        return context

class AlexNet(nn.Sequential):
    def __init__(self, channels:list[int], dropout_ratio:float):
        def mk_conv_block(in_channels:int, out_channels:int) -> nn.Module:
            return nn.Sequential(
                nn.Conv1d(in_channels, out_channels, 3, padding=1, bias=False),
                nn.BatchNorm1d(out_channels),
                nn.MaxPool1d(2),
                nn.Dropout(dropout_ratio),
            )
        return super().__init__(*list(starmap(mk_conv_block, pairwise(channels))))

class CMIHARModule(nn.Module):
    def __init__(
            self,
            imu_idx:list[int],
            thm_idx:list[int],
            tof_idx:list[int],
            mlp_width:int,
            n_classes:int,
            n_aux_classes:Optional[int]=None,
            dataset_x:Optional[Tensor]=None,
            tof_dropout_ratio:float=0,
            thm_dropout_ratio:float=0,
            imu_dropout_ratio:float=0,
        ):
        super().__init__()
        self.imu_idx = imu_idx
        self.tof_idx = tof_idx
        self.thm_idx = thm_idx
        if dataset_x is not None:
            x_mean = dataset_x.mean(dim=(0, 2), keepdim=True)
            x_std = dataset_x.std(dim=(0, 2), keepdim=True)
            self.register_buffer("x_mean", x_mean)
            self.register_buffer("x_std", x_std)
        else:
            x_stats_size = (1, len(meta_data["feature_cols"]), 1)
            self.register_buffer("x_mean", torch.empty(x_stats_size))
            self.register_buffer("x_std", torch.empty(x_stats_size))
        self.imu_branch = nn.Sequential(
            ResidualBlock(len(imu_idx), 219, imu_dropout_ratio),
            ResidualBlock(219, 500, imu_dropout_ratio),
        )
        self.tof_branch = AlexNet([len(tof_idx), 82, 500], tof_dropout_ratio)
        self.thm_branch = AlexNet([len(thm_idx), 82, 500], thm_dropout_ratio)
        self.rnn = nn.GRU(500 * 3, mlp_width // 2, bidirectional=True)
        self.attention = AdditiveAttentionLayer(mlp_width)
        self.meain_head = nn.Sequential(
            # Head
            nn.LazyLinear(mlp_width, bias=False),
            nn.BatchNorm1d(mlp_width),
            nn.ReLU(),
            nn.Linear(mlp_width, mlp_width // 2, bias=False),
            nn.BatchNorm1d(mlp_width // 2),
            nn.ReLU(),
            nn.Linear(mlp_width // 2, n_classes),
        )
        if n_aux_classes is not None:
            self.aux_head = nn.Sequential(
                # Head
                nn.LazyLinear(mlp_width, bias=False),
                nn.BatchNorm1d(mlp_width),
                nn.ReLU(),
                nn.Linear(mlp_width, mlp_width // 2, bias=False),
                nn.BatchNorm1d(mlp_width // 2),
                nn.ReLU(),
                nn.Linear(mlp_width // 2, n_aux_classes),
            )

    def forward(self, x:Tensor) -> Tensor:
        assert self.x_mean is not None and self.x_std is not None, f"Nor x_mean nor x_std should be None.\nx_std: {self.x_std}\nx_mean: {self.x_mean}"
        x = (x - self.x_mean) / self.x_std
        concatenated_activation_maps = torch.cat(
            (
                self.imu_branch(x[:, self.imu_idx]),
                self.thm_branch(x[:, self.thm_idx]),
                self.tof_branch(x[:, self.tof_idx]),
            ),
            dim=CHANNELS_DIMENSION,
        )
        lstm_output, _  = self.rnn(concatenated_activation_maps.swapaxes(1, 2))
        lstm_output = lstm_output.swapaxes(1, 2) # redundant
        attended = self.attention(lstm_output)
        if hasattr(self, "aux_head"):
            return self.meain_head(attended), self.aux_head(attended)
        return self.meain_head(attended)

### Create model function

In [None]:
def mk_model(
    dataset_x:Optional[Tensor]=None,
    n_aux_classes:Optional[int]=None,
) -> nn.Module:
    return (
        CMIHARModule(
            imu_idx=imu_idx,
            thm_idx=thm_idx,
            tof_idx=tof_idx,
            mlp_width=256,
            n_classes=18,
            dataset_x=dataset_x,
            n_aux_classes=n_aux_classes,
            imu_dropout_ratio=0.2,
            tof_dropout_ratio=0.2,
            thm_dropout_ratio=0.2,

        )
        .to(device)
    )

display(mk_model(torch.arange(12).view(2, 2, -1).float()))
print("input channels:", len(meta_data["feature_cols"]))

CMIHARModule(
  (imu_branch): Sequential(
    (0): ResidualBlock(
      (blocks): Sequential(
        (0): Conv1d(46, 219, kernel_size=(3,), stride=(1,), padding=(1,), bias=False)
        (1): BatchNorm1d(219, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU()
        (3): Conv1d(219, 219, kernel_size=(3,), stride=(1,), padding=(1,), bias=False)
        (4): BatchNorm1d(219, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (5): SqueezeExcitationBlock(
          (fc1): Linear(in_features=219, out_features=27, bias=True)
          (fc2): Linear(in_features=27, out_features=219, bias=True)
          (sigmoid): Sigmoid()
        )
      )
      (head): Sequential(
        (0): ReLU()
        (1): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
        (2): Dropout(p=0.2, inplace=False)
      )
      (skip_connection): Sequential(
        (0): Conv1d(46, 219, kernel_size=(1,), stride=(1,), bias=False)
       

input channels: 946


## Training

In [23]:
class CosineAnnealingWarmupRestarts(_LRScheduler):
    def __init__(
        self,
        optimizer: Optimizer,
        warmup_steps: int,
        max_lr: float,
        min_lr: float,
        cycle_length: int,
        cycle_mult: float = 1.0,
        gamma: float = 1.0,
        last_epoch: int = -1,
    ) -> None:
        """
        Args:
            optimizer: Wrapped optimizer.
            warmup_steps: Number of steps for linear warmup.
            max_lr: Initial maximum learning rate.
            min_lr: Minimum learning rate after decay.
            cycle_length: Initial number of steps per cosine cycle.
            cycle_mult: Multiplicative factor for increasing cycle lengths.
            gamma: Multiplicative decay factor for max_lr after each cycle.
            last_epoch: The index of last epoch. Default: -1.
        """
        self.warmup_steps = warmup_steps
        self.max_lr = max_lr
        self.min_lr = min_lr
        self.cycle_length = cycle_length
        self.cycle_mult = cycle_mult
        self.gamma = gamma

        self.current_cycle = 0
        self.cycle_step = 0
        self.lr = max_lr

        super().__init__(optimizer, last_epoch)

    def get_lr(self) -> list[float]:
        if self.last_epoch < self.warmup_steps:
            # Linear warmup
            scale = (self.last_epoch + 1) / self.warmup_steps
            return [self.min_lr + scale * (self.max_lr - self.min_lr) for _ in self.base_lrs]

        # Adjust for post-warmup step index
        t = self.cycle_step
        T = self.cycle_length

        cosine_decay = 0.5 * (1 + math.cos(math.pi * t / T))
        lr = self.min_lr + (self.max_lr - self.min_lr) * cosine_decay

        return [lr for _ in self.base_lrs]

    def step(self, epoch: Optional[int] = None) -> None:
        if self.last_epoch >= self.warmup_steps:
            self.cycle_step += 1
            if self.cycle_step >= self.cycle_length:
                self.current_cycle += 1
                self.cycle_step = 0
                self.cycle_length = max(int(self.cycle_length * self.cycle_mult), 1)
                self.max_lr *= self.gamma
        super().step(epoch)

In [24]:
def mixup_data(
    x:Tensor,
    y:Tensor,
    aux_y:Optional[Tensor],
    alpha=0.2
) -> tuple[Tensor, Tensor] | tuple[Tensor, Tensor, Tensor]:
    """
    Return mixed inputs and mixed targets (one-hot) for mixup.
    x: Tensor of shape (batch_size, features, seq_len)
    y: Tensor of shape (batch_size, num_classes)
    """
    if alpha > 0:
        lam = np.random.beta(alpha, alpha)
    else:
        lam = 1.0
    batch_size = x.size(0)
    index = torch.randperm(batch_size).to(x.device)

    mixed_x = lam * x + (1 - lam) * x[index, :]
    mixed_y = lam * y + (1 - lam) * y[index, :]
    if aux_y is not None:
        mixed_aux_y = lam * aux_y + (1 - lam) * aux_y[index, :]
        return mixed_x, mixed_y, mixed_aux_y
    else:
        return mixed_x, mixed_y

In [25]:
def train_model(
        model:nn.Module,
        train_loader:DL,
        criterion:callable,
        optimizer:torch.optim.Optimizer,
        scheduler:_LRScheduler,
        training_kw:dict,
    ) -> dict:
    "Train model on a single epoch"
    train_metrics = {}
    model.train()
    train_metrics["train_loss"] = 0.0
    total = 0
    for batch_x, batch_y, batch_aux_y in train_loader:
        batch_aux_y = batch_aux_y.clone()
        batch_x = batch_x.to(device).clone()
        add_noise = torch.randn_like(batch_x, device=device) * 0.04
        scale_noise = torch.rand_like(batch_x, device=device) * (1.1 - 0.9) + 0.9
        batch_x = (add_noise + batch_x) * scale_noise
        batch_x[:TRAIN_BATCH_SIZE // 2, tof_idx + thm_idx] = 0.0
        batch_y = batch_y.to(device)
        batch_x = batch_x.float()
        
        batch_x, batch_y, batch_aux_y = mixup_data(batch_x, batch_y, batch_aux_y)

        optimizer.zero_grad()
        outputs, aux_output = model(batch_x)
        loss = criterion(outputs, batch_y) + criterion(aux_output, batch_aux_y) * training_kw["aux_loss_weigth"]
        loss.backward()
        optimizer.step()
        scheduler.step()

        train_metrics["train_loss"] += loss.item() * batch_x.size(0)
        total += batch_x.size(0)
    train_metrics["train_loss"] /= total

    return train_metrics

In [26]:
def evaluate_model(model:nn.Module, validation_loader:DL, criterion:callable) -> dict:
    model.eval()
    eval_metrics = {}
    eval_metrics["val_loss"] = 0.0
    total = 0
    all_true = []
    all_pred = []

    with torch.no_grad():
        for batch_x, batch_y, _ in validation_loader:
            batch_x = batch_x.to(device).clone()
            batch_y = batch_y.to(device)
            # batch_aux_y = batch_aux_y.to(device)
            batch_x[:VALIDATION_BATCH_SIZE // 2, tof_idx + thm_idx] = 0.0

            outputs, _ = model(batch_x)
            loss = criterion(outputs, batch_y)
            eval_metrics["val_loss"] += loss.item() * batch_x.size(0)
            total += batch_x.size(0)

            # Get predicted class indices
            preds = torch.argmax(outputs, dim=1).cpu().numpy()
            # Get true class indices from one-hot
            trues = torch.argmax(batch_y, dim=1).cpu().numpy()

            all_true.append(trues)
            all_pred.append(preds)

    eval_metrics["val_loss"] /= total
    all_true = np.concatenate(all_true)
    all_pred = np.concatenate(all_pred)

    # Compute competition metrics
    # Binary classification: BFRB (1) vs non-BFRB (0)
    binary_true = np.isin(all_true, BFRB_INDICES).astype(int)
    binary_pred = np.isin(all_pred, BFRB_INDICES).astype(int)
    eval_metrics["binary_f1"] = f1_score(binary_true, binary_pred)

    # Collapse non-BFRB gestures into a single class
    collapsed_true = np.where(
        np.isin(all_true, BFRB_INDICES),
        all_true,
        len(BFRB_GESTURES)  # Single non-BFRB class
    )
    collapsed_pred = np.where(
        np.isin(all_pred, BFRB_INDICES),
        all_pred,
        len(BFRB_GESTURES)  # Single non-BFRB class
    )

    # Macro F1 on collapsed classes
    eval_metrics["macro_f1"] = f1_score(collapsed_true, collapsed_pred, average='macro')
    eval_metrics["final_metric"] = (eval_metrics["binary_f1"] + eval_metrics["macro_f1"]) / 2

    return eval_metrics

In [30]:
def train_model_on_all_epochs(
        model:nn.Module,
        train_loader:DL,
        validation_loader:DL,
        criterion:callable,
        optimizer:torch.optim.Optimizer,
        scheduler:_LRScheduler,
        fold:int,
        training_kw:dict,
    ) -> DF:

    metrics:list[dict] = []
    # Early stopping
    best_metric = -np.inf
    epochs_no_improve = 0

    for epoch in range(1, TRAINING_EPOCHS + 1):
        train_metrics = train_model(model, train_loader, criterion, optimizer, scheduler, training_kw)
        validation_metrics = evaluate_model(model, validation_loader, criterion)
        metrics.append({"fold": fold, "epoch": epoch} | train_metrics | validation_metrics)

        print(f"Epoch {epoch:02d}: Binary F1 = {validation_metrics['binary_f1']:.4f}, Macro F1 = {validation_metrics['macro_f1']:.4f}, Final Metric = {validation_metrics['final_metric']:.4f}")

        if validation_metrics["final_metric"] > best_metric:
            best_metric = validation_metrics["final_metric"]
            epochs_no_improve = 0
            best_model_state = model.state_dict()
            print(f"  New best metric! Saving model...")
        else:
            epochs_no_improve += 1
            if epochs_no_improve >= PATIENCE:
                print(f"Early stopping triggered at epoch {epoch}")
                model.load_state_dict(best_model_state)
                break
    os.makedirs("models", exist_ok=True)
    torch.save(best_model_state, f"models/model_fold_{fold}.pth")

    return DF.from_records(metrics).set_index(["fold", "epoch"])

In [28]:
def sgkf_from_tensor_dataset(
    dataset: TensorDataset,
    n_splits: int = 5,
    shuffle: bool = True,
) -> Iterator[tuple[Subset, Subset]]:
    # Load sequence meta data to get classes and groups parameters
    seq_meta = pd.read_parquet("preprocessed_dataset/sequences_meta_data.parquet")
    X, *_ = dataset.tensors
    sgkf = StratifiedGroupKFold(
        n_splits=n_splits,
        shuffle=shuffle,
    )

    fold_indices = list(sgkf.split(X.cpu().numpy(), seq_meta["gesture"], seq_meta["subject"]))
    fold_indices

    for fold_idx in FOLDS_VAL_SCORE_ORDER:
        seed_everything(seed=SEED + fold_idx)
        train_idx, val_idx = fold_indices[fold_idx]
        yield Subset(dataset, train_idx), Subset(dataset, val_idx)

In [29]:
def train_on_all_folds(
    lr_scheduler_kw:dict,
    optimizer_kw:dict,
    training_kw:dict,
    best_folds_scores:Optional[dict]=None,
) -> tuple[float, DF]:
    seed_everything(seed=SEED)
    metrics:DF = DF()
    full_dataset = CMIDataset()
    folds_it = sgkf_from_tensor_dataset(full_dataset, NB_CROSS_VALIDATIONS)
    fold_training_early_stopped = False
    for fold_idx, (train_dataset, validation_dataset) in enumerate(folds_it):
        # Debugging
        print(f"\n{'='*50}")
        print("training:", fold_idx + 1)
        print(f"Fold {fold_idx + 1}/{NB_CROSS_VALIDATIONS}")
        criterion = torch.nn.CrossEntropyLoss(label_smoothing=0.1)
        train_loader = DL(train_dataset, TRAIN_BATCH_SIZE, shuffle=True, drop_last=False)
        validation_loader = DL(validation_dataset, VALIDATION_BATCH_SIZE, shuffle=False, drop_last=False)
        all_train_x = train_dataset.dataset.tensors[0][train_dataset.indices]
        model = mk_model(all_train_x, meta_data["n_aux_classes"])
        # Optimizer et scheduler
        optimizer = torch.optim.AdamW(
            model.parameters(),
            WARMUP_LR_INIT,
            weight_decay=optimizer_kw["weight_decay"],
            betas=(optimizer_kw["beta_0"], optimizer_kw["beta_1"]),
        )
        steps_per_epoch = len(train_loader)
        scheduler = CosineAnnealingWarmupRestarts(
            optimizer,
            warmup_steps=lr_scheduler_kw["warmup_epochs"] * steps_per_epoch,
            cycle_mult=lr_scheduler_kw["cycle_mult"],
            max_lr=lr_scheduler_kw["max_lr"],
            min_lr=lr_scheduler_kw["max_lr"] / lr_scheduler_kw["max_to_min_div_factor"],
            cycle_length=lr_scheduler_kw["init_cycle_epochs"] * steps_per_epoch,
            gamma=lr_scheduler_kw["lr_cycle_factor"],
        ) 
        fold_metrics = train_model_on_all_epochs(
            model,
            train_loader,
            validation_loader,
            criterion,
            optimizer,
            scheduler,
            fold_idx,
            training_kw,
        )

        best_fold_metrics = fold_metrics.loc[fold_metrics["final_metric"].idxmax()]
        final_fold_metrics = fold_metrics.iloc[-1]
        print(f"Best validation metrics - Binary F1: {best_fold_metrics['binary_f1']:.4f}, Macro F1: {best_fold_metrics['macro_f1']:.4f}, Final: {best_fold_metrics['final_metric']:.4f}")
        print(f"Final validation metrics - Binary F1: {final_fold_metrics['binary_f1']:.4f}, Macro F1: {final_fold_metrics['macro_f1']:.4f}, Final: {final_fold_metrics['final_metric']:.4f}")

        metrics = pd.concat((metrics, fold_metrics))

        if (
            best_folds_scores is not None
            and
            best_fold_metrics["final_metric"] < best_folds_scores[fold_idx]
        ):
            print(f"Fold wise early stopping triggered at fold {fold_idx} score: {best_fold_metrics['final_metric']}, best score: {best_folds_scores[fold_idx]}")
            fold_training_early_stopped = True
            break

    if not fold_training_early_stopped and best_folds_scores is not None:
        new_best_folds_scores = (
            metrics
            .groupby(level=0)
            .max()
            ["final_metric"]
            .to_dict()
        )
        print(
            "Found new best fold scores:",
            new_best_folds_scores,
            "Old best scores:",
            best_folds_scores,
            sep="\n"
        )
        best_folds_scores.update(new_best_folds_scores)
    print("\n" + "="*50)
    print("Cross-Validation Results")
    print("="*50)

    # Statistiques pour les meilleures métriques
    best_metrics:DF = (
        metrics
        .loc[:, ["binary_f1", "macro_f1", "final_metric"]]
        .groupby(level=0)
        .max()
    )

    print("\nBest Fold-wise Metrics:")
    display(best_metrics)
    print("\nGlobal Statistics (Best Metrics):")
    print(f"Mean Best Final Metric: {best_metrics['final_metric'].mean():.4f} ± {best_metrics['final_metric'].std():.4f}")
    print(f"Mean Best Binary F1: {best_metrics['binary_f1'].mean():.4f} ± {best_metrics['binary_f1'].std():.4f}")
    print(f"Mean Best Macro F1: {best_metrics['macro_f1'].mean():.4f} ± {best_metrics['macro_f1'].std():.4f}")

    return best_metrics["final_metric"].mean(), metrics

In [42]:
import kagglehub

if not os.getenv('KAGGLE_IS_COMPETITION_RERUN'):
    mean_best_cv_score, metrics = train_on_all_folds(
        lr_scheduler_kw={
            'warmup_epochs': 15,
            'cycle_mult': 1.5,
            'max_lr': 0.004921271951375079,
            'init_cycle_epochs': 5,
            'lr_cycle_factor': 0.3,
            'max_to_min_div_factor': 250,
        },
        optimizer_kw={
            'weight_decay': 0.0005345701899759787, 
            'beta_0': 0.8767300066532935,
            'beta_1': 0.9935604207618539,
        },
        training_kw={'aux_loss_weigth': 0.30000000000000004},
    )
    user_input = input("Upload model ensemble?").lower()
    if user_input == "yes":
        kagglehub.model_upload(
            handle=join(
                kagglehub.whoami()["username"],
                MODEL_NAME,
                "pyTorch",
                MODEL_VARIATION,
            ),
            local_model_dir="models",
            version_notes=input("Please provide model version notes:")
        )
    elif user_input == "no":
        print("Model has not been uploaded to kaggle.")
    else:
        print("User input was not understood, model has not been uploaded to kaggle.")


training: 1
Fold 1/10
Epoch 01: Binary F1 = 0.7639, Macro F1 = 0.1905, Final Metric = 0.4772
  New best metric! Saving model...
Epoch 02: Binary F1 = 0.8563, Macro F1 = 0.2463, Final Metric = 0.5513
  New best metric! Saving model...
Epoch 03: Binary F1 = 0.8910, Macro F1 = 0.3188, Final Metric = 0.6049
  New best metric! Saving model...
Epoch 04: Binary F1 = 0.9006, Macro F1 = 0.3570, Final Metric = 0.6288
  New best metric! Saving model...
Epoch 05: Binary F1 = 0.8661, Macro F1 = 0.3064, Final Metric = 0.5862
Epoch 06: Binary F1 = 0.8846, Macro F1 = 0.3690, Final Metric = 0.6268
Epoch 07: Binary F1 = 0.8958, Macro F1 = 0.4091, Final Metric = 0.6524
  New best metric! Saving model...
Epoch 08: Binary F1 = 0.8717, Macro F1 = 0.3919, Final Metric = 0.6318
Epoch 09: Binary F1 = 0.8778, Macro F1 = 0.4142, Final Metric = 0.6460
Epoch 10: Binary F1 = 0.8758, Macro F1 = 0.3747, Final Metric = 0.6253
Epoch 11: Binary F1 = 0.9084, Macro F1 = 0.3905, Final Metric = 0.6494
Epoch 12: Binary F1 =

Unnamed: 0_level_0,binary_f1,macro_f1,final_metric
fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0.908382,0.426994,0.652413
1,0.964637,0.586566,0.769302
2,0.974359,0.610592,0.790925
3,0.981535,0.627425,0.80297
4,0.976699,0.627614,0.802157
5,0.989289,0.633634,0.810488
6,0.993197,0.643848,0.81705
7,0.977072,0.6539,0.812866
8,0.993737,0.676444,0.831946
9,0.990157,0.685202,0.837182



Global Statistics (Best Metrics):
Mean Best Final Metric: 0.7927 ± 0.0530
Mean Best Binary F1: 0.9749 ± 0.0252
Mean Best Macro F1: 0.6172 ± 0.0729
Kaggle credentials successfully validated.
Uploading Model https://www.kaggle.com/models/mauroabidalcarrer/CMI-model/pytorch/single_model_architecture ...
Starting upload for file models/model_fold_0.pth


Uploading: 100%|██████████| 13.6M/13.6M [00:01<00:00, 8.99MB/s]

Upload successful: models/model_fold_0.pth (13MB)
Starting upload for file models/model_fold_1.pth



Uploading: 100%|██████████| 13.6M/13.6M [00:01<00:00, 9.57MB/s]

Upload successful: models/model_fold_1.pth (13MB)
Starting upload for file models/model_fold_2.pth



Uploading: 100%|██████████| 13.6M/13.6M [00:01<00:00, 8.88MB/s]

Upload successful: models/model_fold_2.pth (13MB)
Starting upload for file models/model_fold_3.pth



Uploading: 100%|██████████| 13.6M/13.6M [00:01<00:00, 9.10MB/s]

Upload successful: models/model_fold_3.pth (13MB)
Starting upload for file models/model_fold_4.pth



Uploading: 100%|██████████| 13.6M/13.6M [00:01<00:00, 9.08MB/s]

Upload successful: models/model_fold_4.pth (13MB)
Starting upload for file models/model_fold_5.pth



Uploading: 100%|██████████| 13.6M/13.6M [00:01<00:00, 9.10MB/s]

Upload successful: models/model_fold_5.pth (13MB)
Starting upload for file models/model_fold_6.pth



Uploading: 100%|██████████| 13.6M/13.6M [00:01<00:00, 8.92MB/s]

Upload successful: models/model_fold_6.pth (13MB)
Starting upload for file models/model_fold_7.pth



Uploading: 100%|██████████| 13.6M/13.6M [00:01<00:00, 8.32MB/s]

Upload successful: models/model_fold_7.pth (13MB)
Starting upload for file models/model_fold_8.pth



Uploading: 100%|██████████| 13.6M/13.6M [00:01<00:00, 9.11MB/s]

Upload successful: models/model_fold_8.pth (13MB)
Starting upload for file models/model_fold_9.pth



Uploading: 100%|██████████| 13.6M/13.6M [00:01<00:00, 9.10MB/s]

Upload successful: models/model_fold_9.pth (13MB)





Your model instance version has been created.
Files are being processed...
See at: https://www.kaggle.com/models/mauroabidalcarrer/CMI-model/pytorch/single_model_architecture


## Hyperparameter tuning

In [None]:
def objective(trial: optuna.trial.Trial, best_folds_scores:dict) -> float:
    return train_on_all_folds(
        lr_scheduler_kw={
            "warmup_epochs": trial.suggest_int("warmup_epochs", 12, 15),
            "cycle_mult": trial.suggest_float("cycle_mult", 0.9, 1.6, step=0.1),
            "max_lr": trial.suggest_float("max_lr", 0.005581907927062619 / 1.5, 0.005581907927062619 * 1.5, step=0.0001),
            "max_to_min_div_factor": 250, #trial.suggest_float("max_to_min_div_factor", 100, 300, step=25),
            "init_cycle_epochs": trial.suggest_int("init_cycle_epochs", 2, 10, ),
            "lr_cycle_factor": trial.suggest_float("lr_cycle_factor", 0.25, 0.6, step=0.05),
        },
        optimizer_kw={
            "weight_decay": trial.suggest_float("weight_decay", 5e-4, 1e-3),
            "beta_0":trial.suggest_float("beta_0", 0.8, 0.999),
            "beta_1":trial.suggest_float("beta_1", 0.99, 0.9999),
        },
        training_kw={"aux_loss_weigth": trial.suggest_float("aux_loss_weigth", 0, 1, step=0.1)},
        best_folds_scores=best_folds_scores,
    )[0]

In [None]:
best_folds_scores = {fold_idx:0 for fold_idx in range(NB_CROSS_VALIDATIONS)}

study = optuna.create_study(direction="maximize")
study.optimize(partial(objective, best_folds_scores=best_folds_scores), n_trials=1000, timeout=60 * 60 * 8)

pruned_trials = study.get_trials(deepcopy=False, states=[TrialState.PRUNED])
complete_trials = study.get_trials(deepcopy=False, states=[TrialState.COMPLETE])

print("Study statistics: ")
print("  Number of finished trials: ", len(study.trials))
print("  Number of pruned trials: ", len(pruned_trials))
print("  Number of complete trials: ", len(complete_trials))
print("Best trial:")
trial = study.best_trial

print("  Value: ", trial.value)

print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))

[I 2025-08-16 14:06:15,718] A new study created in memory with name: no-name-07cb892d-9af7-4d27-84dd-b2f2cd15c05d



training: 1
Fold 1/10
Epoch 01: Binary F1 = 0.7218, Macro F1 = 0.1440, Final Metric = 0.4329
  New best metric! Saving model...
Epoch 02: Binary F1 = 0.8406, Macro F1 = 0.2022, Final Metric = 0.5214
  New best metric! Saving model...
Epoch 03: Binary F1 = 0.8420, Macro F1 = 0.3058, Final Metric = 0.5739
  New best metric! Saving model...
Epoch 04: Binary F1 = 0.8750, Macro F1 = 0.3434, Final Metric = 0.6092
  New best metric! Saving model...
Epoch 05: Binary F1 = 0.8586, Macro F1 = 0.3673, Final Metric = 0.6130
  New best metric! Saving model...
Epoch 06: Binary F1 = 0.8707, Macro F1 = 0.3792, Final Metric = 0.6249
  New best metric! Saving model...
Epoch 07: Binary F1 = 0.8786, Macro F1 = 0.3777, Final Metric = 0.6281
  New best metric! Saving model...
Epoch 08: Binary F1 = 0.8957, Macro F1 = 0.3919, Final Metric = 0.6438
  New best metric! Saving model...
Epoch 09: Binary F1 = 0.8806, Macro F1 = 0.4008, Final Metric = 0.6407
Epoch 10: Binary F1 = 0.8875, Macro F1 = 0.4004, Final Met

Unnamed: 0_level_0,binary_f1,macro_f1,final_metric
fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0.919028,0.500331,0.705931
1,0.962302,0.590633,0.770266
2,0.970646,0.59811,0.781711
3,0.976517,0.595595,0.785937
4,0.979552,0.630882,0.803745
5,0.985366,0.636029,0.806707
6,0.991237,0.613595,0.797601
7,0.979842,0.639176,0.806337
8,0.994786,0.661161,0.826931
9,0.990177,0.662491,0.822821


[I 2025-08-16 14:17:00,599] Trial 0 finished with value: 0.790798707199152 and parameters: {'cycle_mult': 1.1, 'max_lr': 0.004565339422903338, 'init_cycle_epochs': 4, 'lr_cycle_factor': 0.55, 'weight_decay': 0.0008004399626336614, 'beta_0': 0.9780353340881647, 'beta_1': 0.990909465320876, 'aux_loss_weigth': 0.09}. Best is trial 0 with value: 0.790798707199152.



Global Statistics (Best Metrics):
Mean Best Final Metric: 0.7908 ± 0.0346
Mean Best Binary F1: 0.9749 ± 0.0220
Mean Best Macro F1: 0.6128 ± 0.0472





training: 1
Fold 1/10
Epoch 01: Binary F1 = 0.7816, Macro F1 = 0.1675, Final Metric = 0.4746
  New best metric! Saving model...
Epoch 02: Binary F1 = 0.8599, Macro F1 = 0.2433, Final Metric = 0.5516
  New best metric! Saving model...
Epoch 03: Binary F1 = 0.8963, Macro F1 = 0.3512, Final Metric = 0.6238
  New best metric! Saving model...
Epoch 04: Binary F1 = 0.8790, Macro F1 = 0.3672, Final Metric = 0.6231
Epoch 05: Binary F1 = 0.8828, Macro F1 = 0.3654, Final Metric = 0.6241
  New best metric! Saving model...
Epoch 06: Binary F1 = 0.8055, Macro F1 = 0.3123, Final Metric = 0.5589
Epoch 07: Binary F1 = 0.8942, Macro F1 = 0.3880, Final Metric = 0.6411
  New best metric! Saving model...
Epoch 08: Binary F1 = 0.8909, Macro F1 = 0.3861, Final Metric = 0.6385
Epoch 09: Binary F1 = 0.8766, Macro F1 = 0.4041, Final Metric = 0.6403
Epoch 10: Binary F1 = 0.8593, Macro F1 = 0.3954, Final Metric = 0.6273
Epoch 11: Binary F1 = 0.8713, Macro F1 = 0.4288, Final Metric = 0.6500
  New best metric! Sa

Unnamed: 0_level_0,binary_f1,macro_f1,final_metric
fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0.908718,0.501707,0.702602


[I 2025-08-16 14:18:15,282] Trial 1 finished with value: 0.7026015733077402 and parameters: {'cycle_mult': 0.8, 'max_lr': 0.006186948192983599, 'init_cycle_epochs': 6, 'lr_cycle_factor': 0.3, 'weight_decay': 0.0007389705795677398, 'beta_0': 0.8971688915880249, 'beta_1': 0.9985866634872135, 'aux_loss_weigth': 0.44999999999999996}. Best is trial 0 with value: 0.790798707199152.



Global Statistics (Best Metrics):
Mean Best Final Metric: 0.7026 ± nan
Mean Best Binary F1: 0.9087 ± nan
Mean Best Macro F1: 0.5017 ± nan





training: 1
Fold 1/10
Epoch 01: Binary F1 = 0.7386, Macro F1 = 0.1717, Final Metric = 0.4551
  New best metric! Saving model...
Epoch 02: Binary F1 = 0.8625, Macro F1 = 0.2421, Final Metric = 0.5523
  New best metric! Saving model...
Epoch 03: Binary F1 = 0.8673, Macro F1 = 0.2790, Final Metric = 0.5731
  New best metric! Saving model...
Epoch 04: Binary F1 = 0.8638, Macro F1 = 0.3460, Final Metric = 0.6049
  New best metric! Saving model...
Epoch 05: Binary F1 = 0.8891, Macro F1 = 0.3876, Final Metric = 0.6383
  New best metric! Saving model...
Epoch 06: Binary F1 = 0.8873, Macro F1 = 0.3925, Final Metric = 0.6399
  New best metric! Saving model...
Epoch 07: Binary F1 = 0.8717, Macro F1 = 0.3905, Final Metric = 0.6311
Epoch 08: Binary F1 = 0.8903, Macro F1 = 0.4094, Final Metric = 0.6499
  New best metric! Saving model...
Epoch 09: Binary F1 = 0.8884, Macro F1 = 0.4062, Final Metric = 0.6473
Epoch 10: Binary F1 = 0.8590, Macro F1 = 0.4066, Final Metric = 0.6328
Epoch 11: Binary F1 = 

Unnamed: 0_level_0,binary_f1,macro_f1,final_metric
fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0.915832,0.499249,0.700474


[I 2025-08-16 14:19:14,170] Trial 2 finished with value: 0.7004744689627171 and parameters: {'cycle_mult': 1.0, 'max_lr': 0.0077018154073807375, 'init_cycle_epochs': 10, 'lr_cycle_factor': 0.3, 'weight_decay': 0.0008572153354337126, 'beta_0': 0.9793172925559381, 'beta_1': 0.9948823326946068, 'aux_loss_weigth': 0.54}. Best is trial 0 with value: 0.790798707199152.



Global Statistics (Best Metrics):
Mean Best Final Metric: 0.7005 ± nan
Mean Best Binary F1: 0.9158 ± nan
Mean Best Macro F1: 0.4992 ± nan





training: 1
Fold 1/10
Epoch 01: Binary F1 = 0.7867, Macro F1 = 0.1974, Final Metric = 0.4920
  New best metric! Saving model...
Epoch 02: Binary F1 = 0.8218, Macro F1 = 0.2420, Final Metric = 0.5319
  New best metric! Saving model...
Epoch 03: Binary F1 = 0.8638, Macro F1 = 0.2905, Final Metric = 0.5772
  New best metric! Saving model...
Epoch 04: Binary F1 = 0.8453, Macro F1 = 0.3592, Final Metric = 0.6022
  New best metric! Saving model...
Epoch 05: Binary F1 = 0.8962, Macro F1 = 0.3639, Final Metric = 0.6301
  New best metric! Saving model...
Epoch 06: Binary F1 = 0.8096, Macro F1 = 0.3460, Final Metric = 0.5778
Epoch 07: Binary F1 = 0.8762, Macro F1 = 0.4146, Final Metric = 0.6454
  New best metric! Saving model...
Epoch 08: Binary F1 = 0.9115, Macro F1 = 0.4040, Final Metric = 0.6577
  New best metric! Saving model...
Epoch 09: Binary F1 = 0.8828, Macro F1 = 0.3774, Final Metric = 0.6301
Epoch 10: Binary F1 = 0.8610, Macro F1 = 0.3941, Final Metric = 0.6275
Epoch 11: Binary F1 = 

Unnamed: 0_level_0,binary_f1,macro_f1,final_metric
fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0.911469,0.499612,0.699141


[I 2025-08-16 14:20:16,879] Trial 3 finished with value: 0.6991407677652848 and parameters: {'cycle_mult': 0.9, 'max_lr': 0.004519325914687909, 'init_cycle_epochs': 9, 'lr_cycle_factor': 0.35, 'weight_decay': 0.0005992297442468452, 'beta_0': 0.9203003451402638, 'beta_1': 0.9937104160805936, 'aux_loss_weigth': 0.24}. Best is trial 0 with value: 0.790798707199152.



Global Statistics (Best Metrics):
Mean Best Final Metric: 0.6991 ± nan
Mean Best Binary F1: 0.9115 ± nan
Mean Best Macro F1: 0.4996 ± nan





training: 1
Fold 1/10
Epoch 01: Binary F1 = 0.8381, Macro F1 = 0.2020, Final Metric = 0.5200
  New best metric! Saving model...
Epoch 02: Binary F1 = 0.8800, Macro F1 = 0.2849, Final Metric = 0.5824
  New best metric! Saving model...
Epoch 03: Binary F1 = 0.8935, Macro F1 = 0.3647, Final Metric = 0.6291
  New best metric! Saving model...
Epoch 04: Binary F1 = 0.8494, Macro F1 = 0.3510, Final Metric = 0.6002
Epoch 05: Binary F1 = 0.8652, Macro F1 = 0.3244, Final Metric = 0.5948
Epoch 06: Binary F1 = 0.8757, Macro F1 = 0.3917, Final Metric = 0.6337
  New best metric! Saving model...
Epoch 07: Binary F1 = 0.9057, Macro F1 = 0.3936, Final Metric = 0.6496
  New best metric! Saving model...
Epoch 08: Binary F1 = 0.8999, Macro F1 = 0.4050, Final Metric = 0.6525
  New best metric! Saving model...
Epoch 09: Binary F1 = 0.8219, Macro F1 = 0.3753, Final Metric = 0.5986
Epoch 10: Binary F1 = 0.8939, Macro F1 = 0.3979, Final Metric = 0.6459
Epoch 11: Binary F1 = 0.8479, Macro F1 = 0.3490, Final Me

Unnamed: 0_level_0,binary_f1,macro_f1,final_metric
fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0.909281,0.503573,0.704364


[I 2025-08-16 14:21:30,383] Trial 4 finished with value: 0.7043636767814081 and parameters: {'cycle_mult': 1.55, 'max_lr': 0.00827357222857069, 'init_cycle_epochs': 2, 'lr_cycle_factor': 0.6, 'weight_decay': 0.0008368788897687048, 'beta_0': 0.8065240077610794, 'beta_1': 0.9986400488504121, 'aux_loss_weigth': 0.0}. Best is trial 0 with value: 0.790798707199152.



Global Statistics (Best Metrics):
Mean Best Final Metric: 0.7044 ± nan
Mean Best Binary F1: 0.9093 ± nan
Mean Best Macro F1: 0.5036 ± nan





training: 1
Fold 1/10
Epoch 01: Binary F1 = 0.7439, Macro F1 = 0.1842, Final Metric = 0.4641
  New best metric! Saving model...
Epoch 02: Binary F1 = 0.8506, Macro F1 = 0.2725, Final Metric = 0.5616
  New best metric! Saving model...
Epoch 03: Binary F1 = 0.8697, Macro F1 = 0.3045, Final Metric = 0.5871
  New best metric! Saving model...
Epoch 04: Binary F1 = 0.8481, Macro F1 = 0.3473, Final Metric = 0.5977
  New best metric! Saving model...
Epoch 05: Binary F1 = 0.8288, Macro F1 = 0.3601, Final Metric = 0.5944
Epoch 06: Binary F1 = 0.8569, Macro F1 = 0.3352, Final Metric = 0.5960
Epoch 07: Binary F1 = 0.8759, Macro F1 = 0.3652, Final Metric = 0.6206
  New best metric! Saving model...
Epoch 08: Binary F1 = 0.9008, Macro F1 = 0.3763, Final Metric = 0.6386
  New best metric! Saving model...
Epoch 09: Binary F1 = 0.9047, Macro F1 = 0.4271, Final Metric = 0.6659
  New best metric! Saving model...
Epoch 10: Binary F1 = 0.8727, Macro F1 = 0.4254, Final Metric = 0.6491
Epoch 11: Binary F1 = 

Unnamed: 0_level_0,binary_f1,macro_f1,final_metric
fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0.904714,0.49101,0.695082


[I 2025-08-16 14:22:26,644] Trial 5 finished with value: 0.6950824774515098 and parameters: {'cycle_mult': 0.8500000000000001, 'max_lr': 0.0053375244051215, 'init_cycle_epochs': 6, 'lr_cycle_factor': 0.3, 'weight_decay': 0.0007842837916661569, 'beta_0': 0.9059627639628449, 'beta_1': 0.9992508503333548, 'aux_loss_weigth': 0.54}. Best is trial 0 with value: 0.790798707199152.



Global Statistics (Best Metrics):
Mean Best Final Metric: 0.6951 ± nan
Mean Best Binary F1: 0.9047 ± nan
Mean Best Macro F1: 0.4910 ± nan





training: 1
Fold 1/10
Epoch 01: Binary F1 = 0.7336, Macro F1 = 0.1715, Final Metric = 0.4525
  New best metric! Saving model...
Epoch 02: Binary F1 = 0.8592, Macro F1 = 0.2626, Final Metric = 0.5609
  New best metric! Saving model...
Epoch 03: Binary F1 = 0.8760, Macro F1 = 0.2992, Final Metric = 0.5876
  New best metric! Saving model...
Epoch 04: Binary F1 = 0.8158, Macro F1 = 0.3249, Final Metric = 0.5704
Epoch 05: Binary F1 = 0.8759, Macro F1 = 0.3371, Final Metric = 0.6065
  New best metric! Saving model...
Epoch 06: Binary F1 = 0.8593, Macro F1 = 0.3460, Final Metric = 0.6026
Epoch 07: Binary F1 = 0.9070, Macro F1 = 0.4007, Final Metric = 0.6538
  New best metric! Saving model...
Epoch 08: Binary F1 = 0.8796, Macro F1 = 0.4143, Final Metric = 0.6470
Epoch 09: Binary F1 = 0.8790, Macro F1 = 0.4062, Final Metric = 0.6426
Epoch 10: Binary F1 = 0.8780, Macro F1 = 0.3907, Final Metric = 0.6343
Epoch 11: Binary F1 = 0.9114, Macro F1 = 0.4149, Final Metric = 0.6632
  New best metric! Sa

Unnamed: 0_level_0,binary_f1,macro_f1,final_metric
fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0.911854,0.4863,0.696865


[I 2025-08-16 14:23:36,501] Trial 6 finished with value: 0.6968647511492845 and parameters: {'cycle_mult': 1.35, 'max_lr': 0.004309902030508373, 'init_cycle_epochs': 7, 'lr_cycle_factor': 0.3, 'weight_decay': 0.0005035527080196524, 'beta_0': 0.8657713577009972, 'beta_1': 0.9953750557862314, 'aux_loss_weigth': 0.57}. Best is trial 0 with value: 0.790798707199152.



Global Statistics (Best Metrics):
Mean Best Final Metric: 0.6969 ± nan
Mean Best Binary F1: 0.9119 ± nan
Mean Best Macro F1: 0.4863 ± nan





training: 1
Fold 1/10
Epoch 01: Binary F1 = 0.7525, Macro F1 = 0.1931, Final Metric = 0.4728
  New best metric! Saving model...
Epoch 02: Binary F1 = 0.8663, Macro F1 = 0.2924, Final Metric = 0.5793
  New best metric! Saving model...
Epoch 03: Binary F1 = 0.8638, Macro F1 = 0.3425, Final Metric = 0.6032
  New best metric! Saving model...
Epoch 04: Binary F1 = 0.8864, Macro F1 = 0.4006, Final Metric = 0.6435
  New best metric! Saving model...
Epoch 05: Binary F1 = 0.8633, Macro F1 = 0.3371, Final Metric = 0.6002
Epoch 06: Binary F1 = 0.8322, Macro F1 = 0.3241, Final Metric = 0.5782
Epoch 07: Binary F1 = 0.8877, Macro F1 = 0.3897, Final Metric = 0.6387
Epoch 08: Binary F1 = 0.8909, Macro F1 = 0.4006, Final Metric = 0.6458
  New best metric! Saving model...
Epoch 09: Binary F1 = 0.8818, Macro F1 = 0.3857, Final Metric = 0.6338
Epoch 10: Binary F1 = 0.9053, Macro F1 = 0.4002, Final Metric = 0.6528
  New best metric! Saving model...
Epoch 11: Binary F1 = 0.8753, Macro F1 = 0.4078, Final Me

Unnamed: 0_level_0,binary_f1,macro_f1,final_metric
fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0.911885,0.47969,0.691829


[I 2025-08-16 14:24:36,982] Trial 7 finished with value: 0.6918285629977318 and parameters: {'cycle_mult': 1.1500000000000001, 'max_lr': 0.007784942665938542, 'init_cycle_epochs': 10, 'lr_cycle_factor': 0.5, 'weight_decay': 0.0005042631405773448, 'beta_0': 0.8939290240603287, 'beta_1': 0.9950989907739618, 'aux_loss_weigth': 0.48}. Best is trial 0 with value: 0.790798707199152.



Global Statistics (Best Metrics):
Mean Best Final Metric: 0.6918 ± nan
Mean Best Binary F1: 0.9119 ± nan
Mean Best Macro F1: 0.4797 ± nan





training: 1
Fold 1/10
Epoch 01: Binary F1 = 0.7824, Macro F1 = 0.1600, Final Metric = 0.4712
  New best metric! Saving model...
Epoch 02: Binary F1 = 0.8569, Macro F1 = 0.2533, Final Metric = 0.5551
  New best metric! Saving model...
Epoch 03: Binary F1 = 0.8715, Macro F1 = 0.2899, Final Metric = 0.5807
  New best metric! Saving model...
Epoch 04: Binary F1 = 0.8437, Macro F1 = 0.3429, Final Metric = 0.5933
  New best metric! Saving model...
Epoch 05: Binary F1 = 0.8875, Macro F1 = 0.3733, Final Metric = 0.6304
  New best metric! Saving model...
Epoch 06: Binary F1 = 0.8557, Macro F1 = 0.3735, Final Metric = 0.6146
Epoch 07: Binary F1 = 0.8911, Macro F1 = 0.3650, Final Metric = 0.6281
Epoch 08: Binary F1 = 0.8773, Macro F1 = 0.4176, Final Metric = 0.6474
  New best metric! Saving model...
Epoch 09: Binary F1 = 0.8822, Macro F1 = 0.3700, Final Metric = 0.6261
Epoch 10: Binary F1 = 0.8680, Macro F1 = 0.3949, Final Metric = 0.6315
Epoch 11: Binary F1 = 0.8469, Macro F1 = 0.4153, Final Me

Unnamed: 0_level_0,binary_f1,macro_f1,final_metric
fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0.901961,0.497516,0.695743


[I 2025-08-16 14:25:51,060] Trial 8 finished with value: 0.6957434067804613 and parameters: {'cycle_mult': 0.9, 'max_lr': 0.00446159193645615, 'init_cycle_epochs': 6, 'lr_cycle_factor': 0.6, 'weight_decay': 0.0008700492322290636, 'beta_0': 0.9454110608473826, 'beta_1': 0.9955874178449956, 'aux_loss_weigth': 0.42}. Best is trial 0 with value: 0.790798707199152.



Global Statistics (Best Metrics):
Mean Best Final Metric: 0.6957 ± nan
Mean Best Binary F1: 0.9020 ± nan
Mean Best Macro F1: 0.4975 ± nan





training: 1
Fold 1/10
Epoch 01: Binary F1 = 0.7590, Macro F1 = 0.1717, Final Metric = 0.4653
  New best metric! Saving model...
Epoch 02: Binary F1 = 0.8803, Macro F1 = 0.2631, Final Metric = 0.5717
  New best metric! Saving model...
Epoch 03: Binary F1 = 0.8895, Macro F1 = 0.3621, Final Metric = 0.6258
  New best metric! Saving model...
Epoch 04: Binary F1 = 0.8137, Macro F1 = 0.3583, Final Metric = 0.5860
Epoch 05: Binary F1 = 0.8875, Macro F1 = 0.3332, Final Metric = 0.6104
Epoch 06: Binary F1 = 0.8381, Macro F1 = 0.3541, Final Metric = 0.5961
Epoch 07: Binary F1 = 0.8836, Macro F1 = 0.4021, Final Metric = 0.6428
  New best metric! Saving model...
Epoch 08: Binary F1 = 0.8599, Macro F1 = 0.4271, Final Metric = 0.6435
  New best metric! Saving model...
Epoch 09: Binary F1 = 0.8882, Macro F1 = 0.3999, Final Metric = 0.6440
  New best metric! Saving model...
Epoch 10: Binary F1 = 0.8821, Macro F1 = 0.3179, Final Metric = 0.6000
Epoch 11: Binary F1 = 0.8809, Macro F1 = 0.4024, Final Me

Unnamed: 0_level_0,binary_f1,macro_f1,final_metric
fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0.893924,0.497458,0.692125


[I 2025-08-16 14:26:50,855] Trial 9 finished with value: 0.6921253471692167 and parameters: {'cycle_mult': 0.8500000000000001, 'max_lr': 0.007189985075018576, 'init_cycle_epochs': 7, 'lr_cycle_factor': 0.3, 'weight_decay': 0.0009325509162713781, 'beta_0': 0.8284219257384097, 'beta_1': 0.9954778704817149, 'aux_loss_weigth': 0.96}. Best is trial 0 with value: 0.790798707199152.



Global Statistics (Best Metrics):
Mean Best Final Metric: 0.6921 ± nan
Mean Best Binary F1: 0.8939 ± nan
Mean Best Macro F1: 0.4975 ± nan





training: 1
Fold 1/10
Epoch 01: Binary F1 = 0.6956, Macro F1 = 0.1065, Final Metric = 0.4010
  New best metric! Saving model...
Epoch 02: Binary F1 = 0.7785, Macro F1 = 0.1903, Final Metric = 0.4844
  New best metric! Saving model...
Epoch 03: Binary F1 = 0.8580, Macro F1 = 0.2471, Final Metric = 0.5526
  New best metric! Saving model...
Epoch 04: Binary F1 = 0.8709, Macro F1 = 0.2999, Final Metric = 0.5854
  New best metric! Saving model...
Epoch 05: Binary F1 = 0.8529, Macro F1 = 0.3257, Final Metric = 0.5893
  New best metric! Saving model...
Epoch 06: Binary F1 = 0.8601, Macro F1 = 0.3566, Final Metric = 0.6084
  New best metric! Saving model...
Epoch 07: Binary F1 = 0.8812, Macro F1 = 0.3826, Final Metric = 0.6319
  New best metric! Saving model...
Epoch 08: Binary F1 = 0.8877, Macro F1 = 0.3899, Final Metric = 0.6388
  New best metric! Saving model...
Epoch 09: Binary F1 = 0.8811, Macro F1 = 0.3988, Final Metric = 0.6400
  New best metric! Saving model...
Epoch 10: Binary F1 = 0

Unnamed: 0_level_0,binary_f1,macro_f1,final_metric
fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0.910569,0.48197,0.691192


[I 2025-08-16 14:27:56,978] Trial 10 finished with value: 0.6911922406578338 and parameters: {'cycle_mult': 1.25, 'max_lr': 0.0037509001801456013, 'init_cycle_epochs': 3, 'lr_cycle_factor': 0.5, 'weight_decay': 0.0006832161176884816, 'beta_0': 0.9899601903608582, 'beta_1': 0.9912726633657085, 'aux_loss_weigth': 0.0}. Best is trial 0 with value: 0.790798707199152.



Global Statistics (Best Metrics):
Mean Best Final Metric: 0.6912 ± nan
Mean Best Binary F1: 0.9106 ± nan
Mean Best Macro F1: 0.4820 ± nan





training: 1
Fold 1/10
Epoch 01: Binary F1 = 0.7909, Macro F1 = 0.1885, Final Metric = 0.4897
  New best metric! Saving model...
Epoch 02: Binary F1 = 0.8282, Macro F1 = 0.2267, Final Metric = 0.5275
  New best metric! Saving model...
Epoch 03: Binary F1 = 0.9055, Macro F1 = 0.3378, Final Metric = 0.6216
  New best metric! Saving model...
Epoch 04: Binary F1 = 0.8863, Macro F1 = 0.3463, Final Metric = 0.6163
Epoch 05: Binary F1 = 0.8682, Macro F1 = 0.3595, Final Metric = 0.6139
Epoch 06: Binary F1 = 0.8445, Macro F1 = 0.3090, Final Metric = 0.5768
Epoch 07: Binary F1 = 0.9111, Macro F1 = 0.3873, Final Metric = 0.6492
  New best metric! Saving model...
Epoch 08: Binary F1 = 0.8697, Macro F1 = 0.4046, Final Metric = 0.6371
Epoch 09: Binary F1 = 0.8755, Macro F1 = 0.3910, Final Metric = 0.6333
Epoch 10: Binary F1 = 0.9094, Macro F1 = 0.4071, Final Metric = 0.6583
  New best metric! Saving model...
Epoch 11: Binary F1 = 0.8786, Macro F1 = 0.4101, Final Metric = 0.6444
Epoch 12: Binary F1 =

Unnamed: 0_level_0,binary_f1,macro_f1,final_metric
fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0.913131,0.490224,0.696956


[I 2025-08-16 14:28:46,165] Trial 11 finished with value: 0.6969563974478186 and parameters: {'cycle_mult': 1.6, 'max_lr': 0.0064268285019057625, 'init_cycle_epochs': 2, 'lr_cycle_factor': 0.6, 'weight_decay': 0.0009991809758926212, 'beta_0': 0.8092400295046676, 'beta_1': 0.9910786913318904, 'aux_loss_weigth': 0.0}. Best is trial 0 with value: 0.790798707199152.



Global Statistics (Best Metrics):
Mean Best Final Metric: 0.6970 ± nan
Mean Best Binary F1: 0.9131 ± nan
Mean Best Macro F1: 0.4902 ± nan





training: 1
Fold 1/10
Epoch 01: Binary F1 = 0.8105, Macro F1 = 0.1814, Final Metric = 0.4960
  New best metric! Saving model...
Epoch 02: Binary F1 = 0.8475, Macro F1 = 0.2532, Final Metric = 0.5504
  New best metric! Saving model...
Epoch 03: Binary F1 = 0.8633, Macro F1 = 0.3469, Final Metric = 0.6051
  New best metric! Saving model...
Epoch 04: Binary F1 = 0.8778, Macro F1 = 0.3556, Final Metric = 0.6167
  New best metric! Saving model...
Epoch 05: Binary F1 = 0.8814, Macro F1 = 0.3652, Final Metric = 0.6233
  New best metric! Saving model...
Epoch 06: Binary F1 = 0.8189, Macro F1 = 0.3350, Final Metric = 0.5770
Epoch 07: Binary F1 = 0.8974, Macro F1 = 0.4205, Final Metric = 0.6590
  New best metric! Saving model...
Epoch 08: Binary F1 = 0.8926, Macro F1 = 0.3740, Final Metric = 0.6333
Epoch 09: Binary F1 = 0.8627, Macro F1 = 0.3803, Final Metric = 0.6215
Epoch 10: Binary F1 = 0.8999, Macro F1 = 0.4538, Final Metric = 0.6769
  New best metric! Saving model...
Epoch 11: Binary F1 = 

Unnamed: 0_level_0,binary_f1,macro_f1,final_metric
fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0.91129,0.480278,0.695034


[I 2025-08-16 14:29:52,815] Trial 12 finished with value: 0.695033712204743 and parameters: {'cycle_mult': 1.55, 'max_lr': 0.005255381579362428, 'init_cycle_epochs': 4, 'lr_cycle_factor': 0.5, 'weight_decay': 0.0007962078330161323, 'beta_0': 0.8491910279912853, 'beta_1': 0.9975592932453099, 'aux_loss_weigth': 0.21}. Best is trial 0 with value: 0.790798707199152.



Global Statistics (Best Metrics):
Mean Best Final Metric: 0.6950 ± nan
Mean Best Binary F1: 0.9113 ± nan
Mean Best Macro F1: 0.4803 ± nan





training: 1
Fold 1/10
Epoch 01: Binary F1 = 0.7525, Macro F1 = 0.1803, Final Metric = 0.4664
  New best metric! Saving model...
Epoch 02: Binary F1 = 0.8661, Macro F1 = 0.2595, Final Metric = 0.5628
  New best metric! Saving model...
Epoch 03: Binary F1 = 0.8493, Macro F1 = 0.2872, Final Metric = 0.5682
  New best metric! Saving model...
Epoch 04: Binary F1 = 0.8491, Macro F1 = 0.3701, Final Metric = 0.6096
  New best metric! Saving model...
Epoch 05: Binary F1 = 0.8947, Macro F1 = 0.3537, Final Metric = 0.6242
  New best metric! Saving model...
Epoch 06: Binary F1 = 0.8642, Macro F1 = 0.3721, Final Metric = 0.6182
Epoch 07: Binary F1 = 0.8914, Macro F1 = 0.3908, Final Metric = 0.6411
  New best metric! Saving model...
Epoch 08: Binary F1 = 0.8909, Macro F1 = 0.3741, Final Metric = 0.6325
Epoch 09: Binary F1 = 0.8984, Macro F1 = 0.3706, Final Metric = 0.6345
Epoch 10: Binary F1 = 0.8562, Macro F1 = 0.4057, Final Metric = 0.6310
Epoch 11: Binary F1 = 0.9010, Macro F1 = 0.4079, Final Me

Unnamed: 0_level_0,binary_f1,macro_f1,final_metric
fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0.920082,0.500379,0.703122


[I 2025-08-16 14:31:05,652] Trial 13 finished with value: 0.7031222135204878 and parameters: {'cycle_mult': 1.4000000000000001, 'max_lr': 0.008306835422046816, 'init_cycle_epochs': 4, 'lr_cycle_factor': 0.55, 'weight_decay': 0.0006841565472793117, 'beta_0': 0.9632005780872865, 'beta_1': 0.9926295032415311, 'aux_loss_weigth': 0.21}. Best is trial 0 with value: 0.790798707199152.



Global Statistics (Best Metrics):
Mean Best Final Metric: 0.7031 ± nan
Mean Best Binary F1: 0.9201 ± nan
Mean Best Macro F1: 0.5004 ± nan





training: 1
Fold 1/10
Epoch 01: Binary F1 = 0.7714, Macro F1 = 0.1747, Final Metric = 0.4730
  New best metric! Saving model...
Epoch 02: Binary F1 = 0.8533, Macro F1 = 0.2623, Final Metric = 0.5578
  New best metric! Saving model...
Epoch 03: Binary F1 = 0.8569, Macro F1 = 0.3231, Final Metric = 0.5900
  New best metric! Saving model...
Epoch 04: Binary F1 = 0.8099, Macro F1 = 0.3236, Final Metric = 0.5667
Epoch 05: Binary F1 = 0.8903, Macro F1 = 0.3608, Final Metric = 0.6255
  New best metric! Saving model...
Epoch 06: Binary F1 = 0.8728, Macro F1 = 0.3689, Final Metric = 0.6208
Epoch 07: Binary F1 = 0.9036, Macro F1 = 0.3674, Final Metric = 0.6355
  New best metric! Saving model...
Epoch 08: Binary F1 = 0.9035, Macro F1 = 0.3613, Final Metric = 0.6324
Epoch 09: Binary F1 = 0.8837, Macro F1 = 0.3436, Final Metric = 0.6136
Epoch 10: Binary F1 = 0.8732, Macro F1 = 0.4014, Final Metric = 0.6373
  New best metric! Saving model...
Epoch 11: Binary F1 = 0.8944, Macro F1 = 0.4119, Final Me

Unnamed: 0_level_0,binary_f1,macro_f1,final_metric
fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0.911315,0.480923,0.696119


[I 2025-08-16 14:32:15,487] Trial 14 finished with value: 0.6961189200074941 and parameters: {'cycle_mult': 1.1, 'max_lr': 0.006893025879551263, 'init_cycle_epochs': 2, 'lr_cycle_factor': 0.45, 'weight_decay': 0.0008683193954856034, 'beta_0': 0.9338339786496943, 'beta_1': 0.9903029064387696, 'aux_loss_weigth': 0.12}. Best is trial 0 with value: 0.790798707199152.



Global Statistics (Best Metrics):
Mean Best Final Metric: 0.6961 ± nan
Mean Best Binary F1: 0.9113 ± nan
Mean Best Macro F1: 0.4809 ± nan





training: 1
Fold 1/10
Epoch 01: Binary F1 = 0.7632, Macro F1 = 0.1909, Final Metric = 0.4770
  New best metric! Saving model...
Epoch 02: Binary F1 = 0.8523, Macro F1 = 0.2601, Final Metric = 0.5562
  New best metric! Saving model...
Epoch 03: Binary F1 = 0.9016, Macro F1 = 0.2939, Final Metric = 0.5977
  New best metric! Saving model...
Epoch 04: Binary F1 = 0.8538, Macro F1 = 0.3098, Final Metric = 0.5818
Epoch 05: Binary F1 = 0.8840, Macro F1 = 0.3328, Final Metric = 0.6084
  New best metric! Saving model...
Epoch 06: Binary F1 = 0.8321, Macro F1 = 0.3094, Final Metric = 0.5707
Epoch 07: Binary F1 = 0.8846, Macro F1 = 0.3645, Final Metric = 0.6245
  New best metric! Saving model...
Epoch 08: Binary F1 = 0.8928, Macro F1 = 0.3964, Final Metric = 0.6446
  New best metric! Saving model...
Epoch 09: Binary F1 = 0.8816, Macro F1 = 0.3658, Final Metric = 0.6237
Epoch 10: Binary F1 = 0.8704, Macro F1 = 0.3926, Final Metric = 0.6315
Epoch 11: Binary F1 = 0.8993, Macro F1 = 0.4061, Final Me

Unnamed: 0_level_0,binary_f1,macro_f1,final_metric
fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0.901575,0.492125,0.69622


[I 2025-08-16 14:33:29,389] Trial 15 finished with value: 0.6962198154109175 and parameters: {'cycle_mult': 1.4500000000000002, 'max_lr': 0.005415746690548205, 'init_cycle_epochs': 4, 'lr_cycle_factor': 0.4, 'weight_decay': 0.0007226747176513914, 'beta_0': 0.8621429827772944, 'beta_1': 0.9966916831829374, 'aux_loss_weigth': 0.75}. Best is trial 0 with value: 0.790798707199152.



Global Statistics (Best Metrics):
Mean Best Final Metric: 0.6962 ± nan
Mean Best Binary F1: 0.9016 ± nan
Mean Best Macro F1: 0.4921 ± nan





training: 1
Fold 1/10
Epoch 01: Binary F1 = 0.7741, Macro F1 = 0.2052, Final Metric = 0.4897
  New best metric! Saving model...
Epoch 02: Binary F1 = 0.8659, Macro F1 = 0.2381, Final Metric = 0.5520
  New best metric! Saving model...
Epoch 03: Binary F1 = 0.8382, Macro F1 = 0.3339, Final Metric = 0.5861
  New best metric! Saving model...
Epoch 04: Binary F1 = 0.8911, Macro F1 = 0.3528, Final Metric = 0.6220
  New best metric! Saving model...
Epoch 05: Binary F1 = 0.8869, Macro F1 = 0.3445, Final Metric = 0.6157
Epoch 06: Binary F1 = 0.8081, Macro F1 = 0.3539, Final Metric = 0.5810
Epoch 07: Binary F1 = 0.8896, Macro F1 = 0.4023, Final Metric = 0.6459
  New best metric! Saving model...
Epoch 08: Binary F1 = 0.9016, Macro F1 = 0.4156, Final Metric = 0.6586
  New best metric! Saving model...
Epoch 09: Binary F1 = 0.9025, Macro F1 = 0.3514, Final Metric = 0.6270
Epoch 10: Binary F1 = 0.8898, Macro F1 = 0.4143, Final Metric = 0.6520
Epoch 11: Binary F1 = 0.9069, Macro F1 = 0.4202, Final Me

Unnamed: 0_level_0,binary_f1,macro_f1,final_metric
fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0.918699,0.503134,0.706393
1,0.968,0.58883,0.774193
2,0.974257,0.607664,0.785387
3,0.979472,0.607921,0.792697
4,0.979631,0.63708,0.805738
5,0.986301,0.625754,0.805034


[I 2025-08-16 14:39:23,394] Trial 16 finished with value: 0.7782404180603795 and parameters: {'cycle_mult': 1.25, 'max_lr': 0.00578119421166697, 'init_cycle_epochs': 3, 'lr_cycle_factor': 0.55, 'weight_decay': 0.0008166882495606176, 'beta_0': 0.8020577438022727, 'beta_1': 0.9928583607035719, 'aux_loss_weigth': 0.32999999999999996}. Best is trial 0 with value: 0.790798707199152.



Global Statistics (Best Metrics):
Mean Best Final Metric: 0.7782 ± 0.0372
Mean Best Binary F1: 0.9677 ± 0.0248
Mean Best Macro F1: 0.5951 ± 0.0480





training: 1
Fold 1/10
Epoch 01: Binary F1 = 0.7734, Macro F1 = 0.1749, Final Metric = 0.4742
  New best metric! Saving model...
Epoch 02: Binary F1 = 0.8569, Macro F1 = 0.2282, Final Metric = 0.5425
  New best metric! Saving model...
Epoch 03: Binary F1 = 0.8503, Macro F1 = 0.2818, Final Metric = 0.5660
  New best metric! Saving model...
Epoch 04: Binary F1 = 0.8829, Macro F1 = 0.3837, Final Metric = 0.6333
  New best metric! Saving model...
Epoch 05: Binary F1 = 0.8943, Macro F1 = 0.3808, Final Metric = 0.6376
  New best metric! Saving model...
Epoch 06: Binary F1 = 0.8403, Macro F1 = 0.3706, Final Metric = 0.6055
Epoch 07: Binary F1 = 0.8983, Macro F1 = 0.3984, Final Metric = 0.6484
  New best metric! Saving model...
Epoch 08: Binary F1 = 0.8780, Macro F1 = 0.3525, Final Metric = 0.6152
Epoch 09: Binary F1 = 0.8896, Macro F1 = 0.4012, Final Metric = 0.6454
Epoch 10: Binary F1 = 0.8761, Macro F1 = 0.3735, Final Metric = 0.6248
Epoch 11: Binary F1 = 0.8824, Macro F1 = 0.4165, Final Me

Unnamed: 0_level_0,binary_f1,macro_f1,final_metric
fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0.910732,0.477073,0.685984


[I 2025-08-16 14:40:18,412] Trial 17 finished with value: 0.6859844405501928 and parameters: {'cycle_mult': 1.25, 'max_lr': 0.00569558461964787, 'init_cycle_epochs': 5, 'lr_cycle_factor': 0.55, 'weight_decay': 0.0009489232313777552, 'beta_0': 0.9610621118635436, 'beta_1': 0.9926423710000554, 'aux_loss_weigth': 0.36}. Best is trial 0 with value: 0.790798707199152.



Global Statistics (Best Metrics):
Mean Best Final Metric: 0.6860 ± nan
Mean Best Binary F1: 0.9107 ± nan
Mean Best Macro F1: 0.4771 ± nan





training: 1
Fold 1/10
Epoch 01: Binary F1 = 0.7760, Macro F1 = 0.1767, Final Metric = 0.4764
  New best metric! Saving model...
Epoch 02: Binary F1 = 0.8552, Macro F1 = 0.2315, Final Metric = 0.5434
  New best metric! Saving model...
Epoch 03: Binary F1 = 0.8791, Macro F1 = 0.3212, Final Metric = 0.6001
  New best metric! Saving model...
Epoch 04: Binary F1 = 0.8706, Macro F1 = 0.3407, Final Metric = 0.6056
  New best metric! Saving model...
Epoch 05: Binary F1 = 0.8909, Macro F1 = 0.3710, Final Metric = 0.6310
  New best metric! Saving model...
Epoch 06: Binary F1 = 0.8866, Macro F1 = 0.3531, Final Metric = 0.6199
Epoch 07: Binary F1 = 0.8926, Macro F1 = 0.3995, Final Metric = 0.6460
  New best metric! Saving model...
Epoch 08: Binary F1 = 0.9109, Macro F1 = 0.4165, Final Metric = 0.6637
  New best metric! Saving model...
Epoch 09: Binary F1 = 0.8872, Macro F1 = 0.3805, Final Metric = 0.6339
Epoch 10: Binary F1 = 0.8859, Macro F1 = 0.4011, Final Metric = 0.6435
Epoch 11: Binary F1 = 

Unnamed: 0_level_0,binary_f1,macro_f1,final_metric
fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0.910931,0.487947,0.69361


[I 2025-08-16 14:41:32,335] Trial 18 finished with value: 0.6936100326406713 and parameters: {'cycle_mult': 1.05, 'max_lr': 0.004851701530640543, 'init_cycle_epochs': 3, 'lr_cycle_factor': 0.45, 'weight_decay': 0.0006203177036362908, 'beta_0': 0.8765120611442218, 'beta_1': 0.9925155965933138, 'aux_loss_weigth': 0.3}. Best is trial 0 with value: 0.790798707199152.



Global Statistics (Best Metrics):
Mean Best Final Metric: 0.6936 ± nan
Mean Best Binary F1: 0.9109 ± nan
Mean Best Macro F1: 0.4879 ± nan





training: 1
Fold 1/10
Epoch 01: Binary F1 = 0.7850, Macro F1 = 0.1732, Final Metric = 0.4791
  New best metric! Saving model...
Epoch 02: Binary F1 = 0.8509, Macro F1 = 0.2376, Final Metric = 0.5442
  New best metric! Saving model...
Epoch 03: Binary F1 = 0.9063, Macro F1 = 0.3481, Final Metric = 0.6272
  New best metric! Saving model...
Epoch 04: Binary F1 = 0.8634, Macro F1 = 0.3221, Final Metric = 0.5927
Epoch 05: Binary F1 = 0.8487, Macro F1 = 0.3566, Final Metric = 0.6026
Epoch 06: Binary F1 = 0.8509, Macro F1 = 0.3244, Final Metric = 0.5876
Epoch 07: Binary F1 = 0.9022, Macro F1 = 0.4124, Final Metric = 0.6573
  New best metric! Saving model...
Epoch 08: Binary F1 = 0.9028, Macro F1 = 0.4259, Final Metric = 0.6643
  New best metric! Saving model...
Epoch 09: Binary F1 = 0.8565, Macro F1 = 0.4062, Final Metric = 0.6314
Epoch 10: Binary F1 = 0.8777, Macro F1 = 0.4134, Final Metric = 0.6456
Epoch 11: Binary F1 = 0.8968, Macro F1 = 0.4097, Final Metric = 0.6533
Epoch 12: Binary F1 =

Unnamed: 0_level_0,binary_f1,macro_f1,final_metric
fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0.911315,0.488869,0.695024


[I 2025-08-16 14:42:27,579] Trial 19 finished with value: 0.6950241291294473 and parameters: {'cycle_mult': 1.2000000000000002, 'max_lr': 0.0037367215644999877, 'init_cycle_epochs': 5, 'lr_cycle_factor': 0.55, 'weight_decay': 0.0008023560322343247, 'beta_0': 0.840011118350898, 'beta_1': 0.9934903077575484, 'aux_loss_weigth': 0.12}. Best is trial 0 with value: 0.790798707199152.



Global Statistics (Best Metrics):
Mean Best Final Metric: 0.6950 ± nan
Mean Best Binary F1: 0.9113 ± nan
Mean Best Macro F1: 0.4889 ± nan





training: 1
Fold 1/10
Epoch 01: Binary F1 = 0.7955, Macro F1 = 0.1849, Final Metric = 0.4902
  New best metric! Saving model...
Epoch 02: Binary F1 = 0.8594, Macro F1 = 0.2671, Final Metric = 0.5633
  New best metric! Saving model...
Epoch 03: Binary F1 = 0.8737, Macro F1 = 0.3025, Final Metric = 0.5881
  New best metric! Saving model...
Epoch 04: Binary F1 = 0.8300, Macro F1 = 0.3541, Final Metric = 0.5920
  New best metric! Saving model...
Epoch 05: Binary F1 = 0.8797, Macro F1 = 0.3954, Final Metric = 0.6376
  New best metric! Saving model...
Epoch 06: Binary F1 = 0.8364, Macro F1 = 0.3415, Final Metric = 0.5889
Epoch 07: Binary F1 = 0.8822, Macro F1 = 0.4299, Final Metric = 0.6560
  New best metric! Saving model...
Epoch 08: Binary F1 = 0.8623, Macro F1 = 0.3633, Final Metric = 0.6128
Epoch 09: Binary F1 = 0.8857, Macro F1 = 0.3755, Final Metric = 0.6306
Epoch 10: Binary F1 = 0.8855, Macro F1 = 0.3808, Final Metric = 0.6332
Epoch 11: Binary F1 = 0.8945, Macro F1 = 0.4133, Final Me

Unnamed: 0_level_0,binary_f1,macro_f1,final_metric
fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0.894515,0.49318,0.691959


[I 2025-08-16 14:43:27,449] Trial 20 finished with value: 0.691959377414704 and parameters: {'cycle_mult': 1.3, 'max_lr': 0.005727906681553038, 'init_cycle_epochs': 3, 'lr_cycle_factor': 0.4, 'weight_decay': 0.0006738753046953709, 'beta_0': 0.9341777023097015, 'beta_1': 0.9900731818832431, 'aux_loss_weigth': 0.6599999999999999}. Best is trial 0 with value: 0.790798707199152.



Global Statistics (Best Metrics):
Mean Best Final Metric: 0.6920 ± nan
Mean Best Binary F1: 0.8945 ± nan
Mean Best Macro F1: 0.4932 ± nan





training: 1
Fold 1/10
Epoch 01: Binary F1 = 0.7764, Macro F1 = 0.2039, Final Metric = 0.4902
  New best metric! Saving model...
Epoch 02: Binary F1 = 0.8372, Macro F1 = 0.2593, Final Metric = 0.5482
  New best metric! Saving model...
Epoch 03: Binary F1 = 0.8919, Macro F1 = 0.3553, Final Metric = 0.6236
  New best metric! Saving model...
Epoch 04: Binary F1 = 0.8807, Macro F1 = 0.3332, Final Metric = 0.6069
Epoch 05: Binary F1 = 0.8884, Macro F1 = 0.3658, Final Metric = 0.6271
  New best metric! Saving model...
Epoch 06: Binary F1 = 0.7618, Macro F1 = 0.3165, Final Metric = 0.5392
Epoch 07: Binary F1 = 0.9131, Macro F1 = 0.3612, Final Metric = 0.6372
  New best metric! Saving model...
Epoch 08: Binary F1 = 0.8777, Macro F1 = 0.4103, Final Metric = 0.6440
  New best metric! Saving model...
Epoch 09: Binary F1 = 0.8849, Macro F1 = 0.3739, Final Metric = 0.6294
Epoch 10: Binary F1 = 0.8884, Macro F1 = 0.4331, Final Metric = 0.6607
  New best metric! Saving model...
Epoch 11: Binary F1 = 

Unnamed: 0_level_0,binary_f1,macro_f1,final_metric
fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0.913131,0.510346,0.70841
1,0.958,0.579049,0.767568


[I 2025-08-16 14:45:44,082] Trial 21 finished with value: 0.7379892793907907 and parameters: {'cycle_mult': 1.5, 'max_lr': 0.006459814939014678, 'init_cycle_epochs': 2, 'lr_cycle_factor': 0.55, 'weight_decay': 0.0008442103184308215, 'beta_0': 0.8067830729629702, 'beta_1': 0.9998844447814769, 'aux_loss_weigth': 0.06}. Best is trial 0 with value: 0.790798707199152.



Global Statistics (Best Metrics):
Mean Best Final Metric: 0.7380 ± 0.0418
Mean Best Binary F1: 0.9356 ± 0.0317
Mean Best Macro F1: 0.5447 ± 0.0486





training: 1
Fold 1/10
Epoch 01: Binary F1 = 0.8272, Macro F1 = 0.1983, Final Metric = 0.5127
  New best metric! Saving model...
Epoch 02: Binary F1 = 0.8607, Macro F1 = 0.2257, Final Metric = 0.5432
  New best metric! Saving model...
Epoch 03: Binary F1 = 0.8904, Macro F1 = 0.3579, Final Metric = 0.6241
  New best metric! Saving model...
Epoch 04: Binary F1 = 0.8527, Macro F1 = 0.3297, Final Metric = 0.5912
Epoch 05: Binary F1 = 0.8687, Macro F1 = 0.3150, Final Metric = 0.5918
Epoch 06: Binary F1 = 0.8320, Macro F1 = 0.3022, Final Metric = 0.5671
Epoch 07: Binary F1 = 0.9104, Macro F1 = 0.4200, Final Metric = 0.6652
  New best metric! Saving model...
Epoch 08: Binary F1 = 0.8994, Macro F1 = 0.4068, Final Metric = 0.6531
Epoch 09: Binary F1 = 0.8490, Macro F1 = 0.3622, Final Metric = 0.6056
Epoch 10: Binary F1 = 0.8843, Macro F1 = 0.3909, Final Metric = 0.6376
Epoch 11: Binary F1 = 0.8896, Macro F1 = 0.3494, Final Metric = 0.6195
Epoch 12: Binary F1 = 0.8812, Macro F1 = 0.4171, Final M

Unnamed: 0_level_0,binary_f1,macro_f1,final_metric
fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0.910359,0.496801,0.693077


[I 2025-08-16 14:46:42,824] Trial 22 finished with value: 0.6930769327096481 and parameters: {'cycle_mult': 1.4500000000000002, 'max_lr': 0.006654045519687663, 'init_cycle_epochs': 3, 'lr_cycle_factor': 0.55, 'weight_decay': 0.0009092394207550153, 'beta_0': 0.8251018242707153, 'beta_1': 0.9912574908300706, 'aux_loss_weigth': 0.12}. Best is trial 0 with value: 0.790798707199152.



Global Statistics (Best Metrics):
Mean Best Final Metric: 0.6931 ± nan
Mean Best Binary F1: 0.9104 ± nan
Mean Best Macro F1: 0.4968 ± nan





training: 1
Fold 1/10
Epoch 01: Binary F1 = 0.7872, Macro F1 = 0.1940, Final Metric = 0.4906
  New best metric! Saving model...
Epoch 02: Binary F1 = 0.8596, Macro F1 = 0.2509, Final Metric = 0.5552
  New best metric! Saving model...
Epoch 03: Binary F1 = 0.9030, Macro F1 = 0.3288, Final Metric = 0.6159
  New best metric! Saving model...
Epoch 04: Binary F1 = 0.8912, Macro F1 = 0.3170, Final Metric = 0.6041
Epoch 05: Binary F1 = 0.8794, Macro F1 = 0.3241, Final Metric = 0.6018
Epoch 06: Binary F1 = 0.7995, Macro F1 = 0.3431, Final Metric = 0.5713
Epoch 07: Binary F1 = 0.8959, Macro F1 = 0.3718, Final Metric = 0.6339
  New best metric! Saving model...
Epoch 08: Binary F1 = 0.9093, Macro F1 = 0.4149, Final Metric = 0.6621
  New best metric! Saving model...
Epoch 09: Binary F1 = 0.8667, Macro F1 = 0.3865, Final Metric = 0.6266
Epoch 10: Binary F1 = 0.8855, Macro F1 = 0.4078, Final Metric = 0.6467
Epoch 11: Binary F1 = 0.9020, Macro F1 = 0.3914, Final Metric = 0.6467
Epoch 12: Binary F1 =

Unnamed: 0_level_0,binary_f1,macro_f1,final_metric
fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0.909272,0.481045,0.690624


[I 2025-08-16 14:47:43,037] Trial 23 finished with value: 0.6906243176700498 and parameters: {'cycle_mult': 1.0, 'max_lr': 0.005944600741141472, 'init_cycle_epochs': 2, 'lr_cycle_factor': 0.5, 'weight_decay': 0.0007714411355774434, 'beta_0': 0.8168257294225333, 'beta_1': 0.9998899633328435, 'aux_loss_weigth': 0.09}. Best is trial 0 with value: 0.790798707199152.



Global Statistics (Best Metrics):
Mean Best Final Metric: 0.6906 ± nan
Mean Best Binary F1: 0.9093 ± nan
Mean Best Macro F1: 0.4810 ± nan





training: 1
Fold 1/10
Epoch 01: Binary F1 = 0.7823, Macro F1 = 0.1626, Final Metric = 0.4725
  New best metric! Saving model...
Epoch 02: Binary F1 = 0.8797, Macro F1 = 0.2609, Final Metric = 0.5703
  New best metric! Saving model...
Epoch 03: Binary F1 = 0.8941, Macro F1 = 0.3330, Final Metric = 0.6135
  New best metric! Saving model...
Epoch 04: Binary F1 = 0.8144, Macro F1 = 0.3260, Final Metric = 0.5702
Epoch 05: Binary F1 = 0.8889, Macro F1 = 0.3348, Final Metric = 0.6118
Epoch 06: Binary F1 = 0.8549, Macro F1 = 0.3148, Final Metric = 0.5849
Epoch 07: Binary F1 = 0.9079, Macro F1 = 0.3993, Final Metric = 0.6536
  New best metric! Saving model...
Epoch 08: Binary F1 = 0.9025, Macro F1 = 0.3740, Final Metric = 0.6382
Epoch 09: Binary F1 = 0.8923, Macro F1 = 0.4036, Final Metric = 0.6480
Epoch 10: Binary F1 = 0.9039, Macro F1 = 0.3948, Final Metric = 0.6494
Epoch 11: Binary F1 = 0.9070, Macro F1 = 0.4399, Final Metric = 0.6735
  New best metric! Saving model...
Epoch 12: Binary F1 =

Unnamed: 0_level_0,binary_f1,macro_f1,final_metric
fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0.907856,0.495725,0.696202


[I 2025-08-16 14:48:57,309] Trial 24 finished with value: 0.6962015374308299 and parameters: {'cycle_mult': 1.1500000000000001, 'max_lr': 0.004950783831687027, 'init_cycle_epochs': 4, 'lr_cycle_factor': 0.6, 'weight_decay': 0.0008282912845469947, 'beta_0': 0.8027520965880166, 'beta_1': 0.9939690810978942, 'aux_loss_weigth': 0.32999999999999996}. Best is trial 0 with value: 0.790798707199152.



Global Statistics (Best Metrics):
Mean Best Final Metric: 0.6962 ± nan
Mean Best Binary F1: 0.9079 ± nan
Mean Best Macro F1: 0.4957 ± nan





training: 1
Fold 1/10
Epoch 01: Binary F1 = 0.7483, Macro F1 = 0.1832, Final Metric = 0.4658
  New best metric! Saving model...
Epoch 02: Binary F1 = 0.8614, Macro F1 = 0.2562, Final Metric = 0.5588
  New best metric! Saving model...
Epoch 03: Binary F1 = 0.9029, Macro F1 = 0.3558, Final Metric = 0.6294
  New best metric! Saving model...
Epoch 04: Binary F1 = 0.8197, Macro F1 = 0.3213, Final Metric = 0.5705
Epoch 05: Binary F1 = 0.8720, Macro F1 = 0.3056, Final Metric = 0.5888
Epoch 06: Binary F1 = 0.8222, Macro F1 = 0.3309, Final Metric = 0.5766
Epoch 07: Binary F1 = 0.9052, Macro F1 = 0.4278, Final Metric = 0.6665
  New best metric! Saving model...
Epoch 08: Binary F1 = 0.8951, Macro F1 = 0.3976, Final Metric = 0.6464
Epoch 09: Binary F1 = 0.8911, Macro F1 = 0.3811, Final Metric = 0.6361
Epoch 10: Binary F1 = 0.9167, Macro F1 = 0.4114, Final Metric = 0.6641
Epoch 11: Binary F1 = 0.9000, Macro F1 = 0.4101, Final Metric = 0.6551
Epoch 12: Binary F1 = 0.8905, Macro F1 = 0.3992, Final M

Unnamed: 0_level_0,binary_f1,macro_f1,final_metric
fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0.916748,0.493904,0.700988


[I 2025-08-16 14:50:05,167] Trial 25 finished with value: 0.7009882861608153 and parameters: {'cycle_mult': 1.35, 'max_lr': 0.006306668267065028, 'init_cycle_epochs': 5, 'lr_cycle_factor': 0.55, 'weight_decay': 0.000905621434723567, 'beta_0': 0.8390856531859597, 'beta_1': 0.9917824797310146, 'aux_loss_weigth': 0.21}. Best is trial 0 with value: 0.790798707199152.



Global Statistics (Best Metrics):
Mean Best Final Metric: 0.7010 ± nan
Mean Best Binary F1: 0.9167 ± nan
Mean Best Macro F1: 0.4939 ± nan





training: 1
Fold 1/10
Epoch 01: Binary F1 = 0.8140, Macro F1 = 0.1972, Final Metric = 0.5056
  New best metric! Saving model...
Epoch 02: Binary F1 = 0.8501, Macro F1 = 0.2545, Final Metric = 0.5523
  New best metric! Saving model...
Epoch 03: Binary F1 = 0.8920, Macro F1 = 0.3443, Final Metric = 0.6181
  New best metric! Saving model...
Epoch 04: Binary F1 = 0.8389, Macro F1 = 0.3451, Final Metric = 0.5920
Epoch 05: Binary F1 = 0.8429, Macro F1 = 0.3113, Final Metric = 0.5771
Epoch 06: Binary F1 = 0.8465, Macro F1 = 0.3630, Final Metric = 0.6047
Epoch 07: Binary F1 = 0.9068, Macro F1 = 0.3884, Final Metric = 0.6476
  New best metric! Saving model...
Epoch 08: Binary F1 = 0.8933, Macro F1 = 0.3799, Final Metric = 0.6366
Epoch 09: Binary F1 = 0.9054, Macro F1 = 0.3833, Final Metric = 0.6443
Epoch 10: Binary F1 = 0.8983, Macro F1 = 0.4212, Final Metric = 0.6597
  New best metric! Saving model...
Epoch 11: Binary F1 = 0.9037, Macro F1 = 0.4065, Final Metric = 0.6551
Epoch 12: Binary F1 =

Unnamed: 0_level_0,binary_f1,macro_f1,final_metric
fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0.911111,0.511612,0.705226


[I 2025-08-16 14:51:13,488] Trial 26 finished with value: 0.7052262842223385 and parameters: {'cycle_mult': 1.5, 'max_lr': 0.006988156697518414, 'init_cycle_epochs': 3, 'lr_cycle_factor': 0.45, 'weight_decay': 0.000748935560037522, 'beta_0': 0.8761909741681878, 'beta_1': 0.9973712530707102, 'aux_loss_weigth': 0.06}. Best is trial 0 with value: 0.790798707199152.



Global Statistics (Best Metrics):
Mean Best Final Metric: 0.7052 ± nan
Mean Best Binary F1: 0.9111 ± nan
Mean Best Macro F1: 0.5116 ± nan





training: 1
Fold 1/10
Epoch 01: Binary F1 = 0.7888, Macro F1 = 0.1718, Final Metric = 0.4803
  New best metric! Saving model...
Epoch 02: Binary F1 = 0.8569, Macro F1 = 0.2428, Final Metric = 0.5498
  New best metric! Saving model...
Epoch 03: Binary F1 = 0.8931, Macro F1 = 0.3448, Final Metric = 0.6190
  New best metric! Saving model...
Epoch 04: Binary F1 = 0.8703, Macro F1 = 0.3338, Final Metric = 0.6020
Epoch 05: Binary F1 = 0.8909, Macro F1 = 0.3153, Final Metric = 0.6031
Epoch 06: Binary F1 = 0.8171, Macro F1 = 0.3454, Final Metric = 0.5813
Epoch 07: Binary F1 = 0.8893, Macro F1 = 0.3786, Final Metric = 0.6340
  New best metric! Saving model...
Epoch 08: Binary F1 = 0.8836, Macro F1 = 0.4171, Final Metric = 0.6503
  New best metric! Saving model...
Epoch 09: Binary F1 = 0.8669, Macro F1 = 0.3382, Final Metric = 0.6025
Epoch 10: Binary F1 = 0.8959, Macro F1 = 0.4113, Final Metric = 0.6536
  New best metric! Saving model...
Epoch 11: Binary F1 = 0.9013, Macro F1 = 0.3873, Final Me

Unnamed: 0_level_0,binary_f1,macro_f1,final_metric
fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0.904366,0.483277,0.689473


[I 2025-08-16 14:52:27,331] Trial 27 finished with value: 0.6894728416621666 and parameters: {'cycle_mult': 1.05, 'max_lr': 0.0073555109563227155, 'init_cycle_epochs': 2, 'lr_cycle_factor': 0.5, 'weight_decay': 0.0008236511502322843, 'beta_0': 0.8000484211723112, 'beta_1': 0.9920304620852897, 'aux_loss_weigth': 0.27}. Best is trial 0 with value: 0.790798707199152.



Global Statistics (Best Metrics):
Mean Best Final Metric: 0.6895 ± nan
Mean Best Binary F1: 0.9044 ± nan
Mean Best Macro F1: 0.4833 ± nan





training: 1
Fold 1/10
Epoch 01: Binary F1 = 0.7724, Macro F1 = 0.1537, Final Metric = 0.4630
  New best metric! Saving model...
Epoch 02: Binary F1 = 0.8182, Macro F1 = 0.2203, Final Metric = 0.5192
  New best metric! Saving model...
Epoch 03: Binary F1 = 0.8494, Macro F1 = 0.2584, Final Metric = 0.5539
  New best metric! Saving model...
Epoch 04: Binary F1 = 0.8506, Macro F1 = 0.2847, Final Metric = 0.5677
  New best metric! Saving model...
Epoch 05: Binary F1 = 0.8642, Macro F1 = 0.3187, Final Metric = 0.5914
  New best metric! Saving model...
Epoch 06: Binary F1 = 0.8774, Macro F1 = 0.3136, Final Metric = 0.5955
  New best metric! Saving model...
Epoch 07: Binary F1 = 0.8565, Macro F1 = 0.3543, Final Metric = 0.6054
  New best metric! Saving model...
Epoch 08: Binary F1 = 0.8596, Macro F1 = 0.3847, Final Metric = 0.6221
  New best metric! Saving model...
Epoch 09: Binary F1 = 0.8733, Macro F1 = 0.3836, Final Metric = 0.6284
  New best metric! Saving model...
Epoch 10: Binary F1 = 0

Unnamed: 0_level_0,binary_f1,macro_f1,final_metric
fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0.901639,0.460383,0.675936


[I 2025-08-16 14:53:35,484] Trial 28 finished with value: 0.6759362938108758 and parameters: {'cycle_mult': 1.2000000000000002, 'max_lr': 0.004167047862280614, 'init_cycle_epochs': 4, 'lr_cycle_factor': 0.55, 'weight_decay': 0.0009728654325085906, 'beta_0': 0.9956634123693521, 'beta_1': 0.9942437770250054, 'aux_loss_weigth': 0.39}. Best is trial 0 with value: 0.790798707199152.



Global Statistics (Best Metrics):
Mean Best Final Metric: 0.6759 ± nan
Mean Best Binary F1: 0.9016 ± nan
Mean Best Macro F1: 0.4604 ± nan





training: 1
Fold 1/10
Epoch 01: Binary F1 = 0.7875, Macro F1 = 0.2075, Final Metric = 0.4975
  New best metric! Saving model...
Epoch 02: Binary F1 = 0.8353, Macro F1 = 0.2862, Final Metric = 0.5608
  New best metric! Saving model...
Epoch 03: Binary F1 = 0.8836, Macro F1 = 0.3199, Final Metric = 0.6017
  New best metric! Saving model...
Epoch 04: Binary F1 = 0.8790, Macro F1 = 0.3472, Final Metric = 0.6131
  New best metric! Saving model...
Epoch 05: Binary F1 = 0.8462, Macro F1 = 0.3096, Final Metric = 0.5779
Epoch 06: Binary F1 = 0.8193, Macro F1 = 0.3536, Final Metric = 0.5865
Epoch 07: Binary F1 = 0.8905, Macro F1 = 0.3994, Final Metric = 0.6449
  New best metric! Saving model...
Epoch 08: Binary F1 = 0.8959, Macro F1 = 0.3487, Final Metric = 0.6223
Epoch 09: Binary F1 = 0.8811, Macro F1 = 0.3686, Final Metric = 0.6248
Epoch 10: Binary F1 = 0.8442, Macro F1 = 0.3693, Final Metric = 0.6068
Epoch 11: Binary F1 = 0.9071, Macro F1 = 0.4283, Final Metric = 0.6677
  New best metric! Sa

Unnamed: 0_level_0,binary_f1,macro_f1,final_metric
fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0.924471,0.498564,0.708165
1,0.966337,0.575228,0.767256


[I 2025-08-16 14:55:40,934] Trial 29 finished with value: 0.7377106502421807 and parameters: {'cycle_mult': 1.3, 'max_lr': 0.006224009354639957, 'init_cycle_epochs': 7, 'lr_cycle_factor': 0.25, 'weight_decay': 0.0007259148352704511, 'beta_0': 0.8926526459260292, 'beta_1': 0.9932277857384868, 'aux_loss_weigth': 0.18}. Best is trial 0 with value: 0.790798707199152.



Global Statistics (Best Metrics):
Mean Best Final Metric: 0.7377 ± 0.0418
Mean Best Binary F1: 0.9454 ± 0.0296
Mean Best Macro F1: 0.5369 ± 0.0542





training: 1
Fold 1/10
Epoch 01: Binary F1 = 0.8082, Macro F1 = 0.1857, Final Metric = 0.4969
  New best metric! Saving model...
Epoch 02: Binary F1 = 0.8503, Macro F1 = 0.2707, Final Metric = 0.5605
  New best metric! Saving model...
Epoch 03: Binary F1 = 0.9004, Macro F1 = 0.3300, Final Metric = 0.6152
  New best metric! Saving model...
Epoch 04: Binary F1 = 0.8593, Macro F1 = 0.3201, Final Metric = 0.5897
Epoch 05: Binary F1 = 0.8803, Macro F1 = 0.3554, Final Metric = 0.6179
  New best metric! Saving model...
Epoch 06: Binary F1 = 0.8750, Macro F1 = 0.3643, Final Metric = 0.6196
  New best metric! Saving model...
Epoch 07: Binary F1 = 0.9050, Macro F1 = 0.3866, Final Metric = 0.6458
  New best metric! Saving model...
Epoch 08: Binary F1 = 0.8935, Macro F1 = 0.3545, Final Metric = 0.6240
Epoch 09: Binary F1 = 0.8672, Macro F1 = 0.3921, Final Metric = 0.6297
Epoch 10: Binary F1 = 0.8709, Macro F1 = 0.4135, Final Metric = 0.6422
Epoch 11: Binary F1 = 0.8964, Macro F1 = 0.3902, Final Me

Unnamed: 0_level_0,binary_f1,macro_f1,final_metric
fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0.91498,0.504861,0.704349


[I 2025-08-16 14:56:54,646] Trial 30 finished with value: 0.7043486604714587 and parameters: {'cycle_mult': 0.9500000000000001, 'max_lr': 0.004854210030301031, 'init_cycle_epochs': 5, 'lr_cycle_factor': 0.6, 'weight_decay': 0.0008780983755917026, 'beta_0': 0.8236341191438679, 'beta_1': 0.9906853275413146, 'aux_loss_weigth': 0.06}. Best is trial 0 with value: 0.790798707199152.



Global Statistics (Best Metrics):
Mean Best Final Metric: 0.7043 ± nan
Mean Best Binary F1: 0.9150 ± nan
Mean Best Macro F1: 0.5049 ± nan





training: 1
Fold 1/10
Epoch 01: Binary F1 = 0.7562, Macro F1 = 0.1795, Final Metric = 0.4679
  New best metric! Saving model...
Epoch 02: Binary F1 = 0.8425, Macro F1 = 0.2476, Final Metric = 0.5450
  New best metric! Saving model...
Epoch 03: Binary F1 = 0.8811, Macro F1 = 0.3194, Final Metric = 0.6002
  New best metric! Saving model...
Epoch 04: Binary F1 = 0.8277, Macro F1 = 0.3230, Final Metric = 0.5754
Epoch 05: Binary F1 = 0.8964, Macro F1 = 0.3773, Final Metric = 0.6368
  New best metric! Saving model...
Epoch 06: Binary F1 = 0.8293, Macro F1 = 0.3735, Final Metric = 0.6014
Epoch 07: Binary F1 = 0.9065, Macro F1 = 0.3926, Final Metric = 0.6495
  New best metric! Saving model...
Epoch 08: Binary F1 = 0.8923, Macro F1 = 0.3639, Final Metric = 0.6281
Epoch 09: Binary F1 = 0.8755, Macro F1 = 0.3915, Final Metric = 0.6335
Epoch 10: Binary F1 = 0.8873, Macro F1 = 0.3872, Final Metric = 0.6372
Epoch 11: Binary F1 = 0.8835, Macro F1 = 0.4272, Final Metric = 0.6554
  New best metric! Sa

Unnamed: 0_level_0,binary_f1,macro_f1,final_metric
fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0.912821,0.500519,0.702567


[I 2025-08-16 14:57:55,933] Trial 31 finished with value: 0.7025672222894406 and parameters: {'cycle_mult': 1.3, 'max_lr': 0.006059108831945029, 'init_cycle_epochs': 8, 'lr_cycle_factor': 0.25, 'weight_decay': 0.0007252920687180701, 'beta_0': 0.8978496093034802, 'beta_1': 0.9929838217609255, 'aux_loss_weigth': 0.18}. Best is trial 0 with value: 0.790798707199152.



Global Statistics (Best Metrics):
Mean Best Final Metric: 0.7026 ± nan
Mean Best Binary F1: 0.9128 ± nan
Mean Best Macro F1: 0.5005 ± nan





training: 1
Fold 1/10
Epoch 01: Binary F1 = 0.8032, Macro F1 = 0.1728, Final Metric = 0.4880
  New best metric! Saving model...
Epoch 02: Binary F1 = 0.8660, Macro F1 = 0.2403, Final Metric = 0.5532
  New best metric! Saving model...
Epoch 03: Binary F1 = 0.8577, Macro F1 = 0.2931, Final Metric = 0.5754
  New best metric! Saving model...
Epoch 04: Binary F1 = 0.8652, Macro F1 = 0.3458, Final Metric = 0.6055
  New best metric! Saving model...
Epoch 05: Binary F1 = 0.8925, Macro F1 = 0.3929, Final Metric = 0.6427
  New best metric! Saving model...
Epoch 06: Binary F1 = 0.8658, Macro F1 = 0.3530, Final Metric = 0.6094
Epoch 07: Binary F1 = 0.8534, Macro F1 = 0.4105, Final Metric = 0.6319
Epoch 08: Binary F1 = 0.8905, Macro F1 = 0.3749, Final Metric = 0.6327
Epoch 09: Binary F1 = 0.9002, Macro F1 = 0.4020, Final Metric = 0.6511
  New best metric! Saving model...
Epoch 10: Binary F1 = 0.8562, Macro F1 = 0.3851, Final Metric = 0.6207
Epoch 11: Binary F1 = 0.8978, Macro F1 = 0.4530, Final Me

Unnamed: 0_level_0,binary_f1,macro_f1,final_metric
fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0.918699,0.49019,0.70216


[I 2025-08-16 14:59:10,767] Trial 32 finished with value: 0.7021600152792876 and parameters: {'cycle_mult': 1.1500000000000001, 'max_lr': 0.006609007985393466, 'init_cycle_epochs': 7, 'lr_cycle_factor': 0.25, 'weight_decay': 0.0007532878184004685, 'beta_0': 0.9675660991243027, 'beta_1': 0.9944654747114536, 'aux_loss_weigth': 0.15}. Best is trial 0 with value: 0.790798707199152.



Global Statistics (Best Metrics):
Mean Best Final Metric: 0.7022 ± nan
Mean Best Binary F1: 0.9187 ± nan
Mean Best Macro F1: 0.4902 ± nan





training: 1
Fold 1/10
Epoch 01: Binary F1 = 0.7866, Macro F1 = 0.1822, Final Metric = 0.4844
  New best metric! Saving model...
Epoch 02: Binary F1 = 0.8569, Macro F1 = 0.2304, Final Metric = 0.5436
  New best metric! Saving model...
Epoch 03: Binary F1 = 0.8834, Macro F1 = 0.3481, Final Metric = 0.6157
  New best metric! Saving model...
Epoch 04: Binary F1 = 0.8615, Macro F1 = 0.3357, Final Metric = 0.5986
Epoch 05: Binary F1 = 0.8825, Macro F1 = 0.3763, Final Metric = 0.6294
  New best metric! Saving model...
Epoch 06: Binary F1 = 0.7913, Macro F1 = 0.3406, Final Metric = 0.5660
Epoch 07: Binary F1 = 0.8966, Macro F1 = 0.3754, Final Metric = 0.6360
  New best metric! Saving model...
Epoch 08: Binary F1 = 0.8916, Macro F1 = 0.3781, Final Metric = 0.6348
Epoch 09: Binary F1 = 0.8842, Macro F1 = 0.4022, Final Metric = 0.6432
  New best metric! Saving model...
Epoch 10: Binary F1 = 0.8887, Macro F1 = 0.3722, Final Metric = 0.6305
Epoch 11: Binary F1 = 0.8882, Macro F1 = 0.4090, Final Me

Unnamed: 0_level_0,binary_f1,macro_f1,final_metric
fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0.910952,0.494346,0.691266


[I 2025-08-16 15:00:09,730] Trial 33 finished with value: 0.6912657088820289 and parameters: {'cycle_mult': 1.4000000000000001, 'max_lr': 0.005855880314167729, 'init_cycle_epochs': 3, 'lr_cycle_factor': 0.35, 'weight_decay': 0.0008141580461874676, 'beta_0': 0.9127815795305407, 'beta_1': 0.9933833812393373, 'aux_loss_weigth': 0.27}. Best is trial 0 with value: 0.790798707199152.



Global Statistics (Best Metrics):
Mean Best Final Metric: 0.6913 ± nan
Mean Best Binary F1: 0.9110 ± nan
Mean Best Macro F1: 0.4943 ± nan





training: 1
Fold 1/10
Epoch 01: Binary F1 = 0.8046, Macro F1 = 0.2047, Final Metric = 0.5047
  New best metric! Saving model...
Epoch 02: Binary F1 = 0.8569, Macro F1 = 0.2616, Final Metric = 0.5592
  New best metric! Saving model...
Epoch 03: Binary F1 = 0.8799, Macro F1 = 0.3364, Final Metric = 0.6082
  New best metric! Saving model...
Epoch 04: Binary F1 = 0.8363, Macro F1 = 0.3575, Final Metric = 0.5969
Epoch 05: Binary F1 = 0.8938, Macro F1 = 0.3616, Final Metric = 0.6277
  New best metric! Saving model...
Epoch 06: Binary F1 = 0.8066, Macro F1 = 0.3427, Final Metric = 0.5746
Epoch 07: Binary F1 = 0.8990, Macro F1 = 0.3977, Final Metric = 0.6483
  New best metric! Saving model...
Epoch 08: Binary F1 = 0.8837, Macro F1 = 0.4108, Final Metric = 0.6472
Epoch 09: Binary F1 = 0.8654, Macro F1 = 0.3921, Final Metric = 0.6288
Epoch 10: Binary F1 = 0.8976, Macro F1 = 0.4317, Final Metric = 0.6647
  New best metric! Saving model...
Epoch 11: Binary F1 = 0.9074, Macro F1 = 0.3858, Final Me

Unnamed: 0_level_0,binary_f1,macro_f1,final_metric
fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0.910586,0.495152,0.702597


[I 2025-08-16 15:01:19,703] Trial 34 finished with value: 0.7025970004336928 and parameters: {'cycle_mult': 1.25, 'max_lr': 0.006204267626669194, 'init_cycle_epochs': 8, 'lr_cycle_factor': 0.35, 'weight_decay': 0.0007078142766392195, 'beta_0': 0.8521361624396132, 'beta_1': 0.9962222579554165, 'aux_loss_weigth': 0.06}. Best is trial 0 with value: 0.790798707199152.



Global Statistics (Best Metrics):
Mean Best Final Metric: 0.7026 ± nan
Mean Best Binary F1: 0.9106 ± nan
Mean Best Macro F1: 0.4952 ± nan





training: 1
Fold 1/10
Epoch 01: Binary F1 = 0.7914, Macro F1 = 0.1824, Final Metric = 0.4869
  New best metric! Saving model...
Epoch 02: Binary F1 = 0.8470, Macro F1 = 0.2934, Final Metric = 0.5702
  New best metric! Saving model...
Epoch 03: Binary F1 = 0.8907, Macro F1 = 0.3442, Final Metric = 0.6174
  New best metric! Saving model...
Epoch 04: Binary F1 = 0.8381, Macro F1 = 0.3437, Final Metric = 0.5909
Epoch 05: Binary F1 = 0.8841, Macro F1 = 0.3304, Final Metric = 0.6073
Epoch 06: Binary F1 = 0.8146, Macro F1 = 0.3141, Final Metric = 0.5643
Epoch 07: Binary F1 = 0.9100, Macro F1 = 0.4099, Final Metric = 0.6600
  New best metric! Saving model...
Epoch 08: Binary F1 = 0.9134, Macro F1 = 0.4295, Final Metric = 0.6715
  New best metric! Saving model...
Epoch 09: Binary F1 = 0.8936, Macro F1 = 0.3911, Final Metric = 0.6423
Epoch 10: Binary F1 = 0.8988, Macro F1 = 0.3880, Final Metric = 0.6434
Epoch 11: Binary F1 = 0.8830, Macro F1 = 0.3998, Final Metric = 0.6414
Epoch 12: Binary F1 =

Unnamed: 0_level_0,binary_f1,macro_f1,final_metric
fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0.916917,0.489383,0.700076


[I 2025-08-16 15:02:29,639] Trial 35 finished with value: 0.7000762215113343 and parameters: {'cycle_mult': 1.1, 'max_lr': 0.005515937700171414, 'init_cycle_epochs': 6, 'lr_cycle_factor': 0.45, 'weight_decay': 0.0006237961184259126, 'beta_0': 0.888764100406536, 'beta_1': 0.9918841258733638, 'aux_loss_weigth': 0.3}. Best is trial 0 with value: 0.790798707199152.



Global Statistics (Best Metrics):
Mean Best Final Metric: 0.7001 ± nan
Mean Best Binary F1: 0.9169 ± nan
Mean Best Macro F1: 0.4894 ± nan





training: 1
Fold 1/10
Epoch 01: Binary F1 = 0.7983, Macro F1 = 0.1971, Final Metric = 0.4977
  New best metric! Saving model...
Epoch 02: Binary F1 = 0.8527, Macro F1 = 0.2426, Final Metric = 0.5476
  New best metric! Saving model...
Epoch 03: Binary F1 = 0.8527, Macro F1 = 0.3427, Final Metric = 0.5977
  New best metric! Saving model...
Epoch 04: Binary F1 = 0.8157, Macro F1 = 0.3491, Final Metric = 0.5824
Epoch 05: Binary F1 = 0.8670, Macro F1 = 0.3851, Final Metric = 0.6260
  New best metric! Saving model...
Epoch 06: Binary F1 = 0.8601, Macro F1 = 0.3526, Final Metric = 0.6064
Epoch 07: Binary F1 = 0.9028, Macro F1 = 0.4137, Final Metric = 0.6583
  New best metric! Saving model...
Epoch 08: Binary F1 = 0.9009, Macro F1 = 0.3764, Final Metric = 0.6387
Epoch 09: Binary F1 = 0.8904, Macro F1 = 0.3848, Final Metric = 0.6376
Epoch 10: Binary F1 = 0.8710, Macro F1 = 0.4068, Final Metric = 0.6389
Epoch 11: Binary F1 = 0.8898, Macro F1 = 0.4116, Final Metric = 0.6507
Epoch 12: Binary F1 =

Unnamed: 0_level_0,binary_f1,macro_f1,final_metric
fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0.913481,0.490547,0.698607


[I 2025-08-16 15:03:43,191] Trial 36 finished with value: 0.6986065838506842 and parameters: {'cycle_mult': 1.3, 'max_lr': 0.005153037479818448, 'init_cycle_epochs': 8, 'lr_cycle_factor': 0.4, 'weight_decay': 0.0008543039366008189, 'beta_0': 0.9265819026200064, 'beta_1': 0.9983097962648242, 'aux_loss_weigth': 0.18}. Best is trial 0 with value: 0.790798707199152.



Global Statistics (Best Metrics):
Mean Best Final Metric: 0.6986 ± nan
Mean Best Binary F1: 0.9135 ± nan
Mean Best Macro F1: 0.4905 ± nan





training: 1
Fold 1/10
Epoch 01: Binary F1 = 0.7975, Macro F1 = 0.1930, Final Metric = 0.4953
  New best metric! Saving model...
Epoch 02: Binary F1 = 0.8219, Macro F1 = 0.2291, Final Metric = 0.5255
  New best metric! Saving model...
Epoch 03: Binary F1 = 0.8628, Macro F1 = 0.2887, Final Metric = 0.5758
  New best metric! Saving model...
Epoch 04: Binary F1 = 0.8728, Macro F1 = 0.3353, Final Metric = 0.6041
  New best metric! Saving model...
Epoch 05: Binary F1 = 0.8765, Macro F1 = 0.3808, Final Metric = 0.6287
  New best metric! Saving model...
Epoch 06: Binary F1 = 0.8631, Macro F1 = 0.3828, Final Metric = 0.6229
Epoch 07: Binary F1 = 0.8724, Macro F1 = 0.3721, Final Metric = 0.6223
Epoch 08: Binary F1 = 0.8668, Macro F1 = 0.4106, Final Metric = 0.6387
  New best metric! Saving model...
Epoch 09: Binary F1 = 0.9063, Macro F1 = 0.3961, Final Metric = 0.6512
  New best metric! Saving model...
Epoch 10: Binary F1 = 0.8925, Macro F1 = 0.4300, Final Metric = 0.6613
  New best metric! Sav

Unnamed: 0_level_0,binary_f1,macro_f1,final_metric
fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0.917577,0.483649,0.694132


[I 2025-08-16 15:04:57,143] Trial 37 finished with value: 0.6941323699095607 and parameters: {'cycle_mult': 1.6, 'max_lr': 0.0066251956968984725, 'init_cycle_epochs': 9, 'lr_cycle_factor': 0.5, 'weight_decay': 0.0007656747544243087, 'beta_0': 0.9764016215931669, 'beta_1': 0.9947148372389127, 'aux_loss_weigth': 0.44999999999999996}. Best is trial 0 with value: 0.790798707199152.



Global Statistics (Best Metrics):
Mean Best Final Metric: 0.6941 ± nan
Mean Best Binary F1: 0.9176 ± nan
Mean Best Macro F1: 0.4836 ± nan





training: 1
Fold 1/10
Epoch 01: Binary F1 = 0.7879, Macro F1 = 0.2016, Final Metric = 0.4947
  New best metric! Saving model...
Epoch 02: Binary F1 = 0.8444, Macro F1 = 0.2702, Final Metric = 0.5573
  New best metric! Saving model...
Epoch 03: Binary F1 = 0.8831, Macro F1 = 0.3593, Final Metric = 0.6212
  New best metric! Saving model...
Epoch 04: Binary F1 = 0.8832, Macro F1 = 0.3462, Final Metric = 0.6147
Epoch 05: Binary F1 = 0.8729, Macro F1 = 0.3511, Final Metric = 0.6120
Epoch 06: Binary F1 = 0.8746, Macro F1 = 0.3518, Final Metric = 0.6132
Epoch 07: Binary F1 = 0.9173, Macro F1 = 0.3643, Final Metric = 0.6408
  New best metric! Saving model...
Epoch 08: Binary F1 = 0.9043, Macro F1 = 0.4034, Final Metric = 0.6539
  New best metric! Saving model...
Epoch 09: Binary F1 = 0.8644, Macro F1 = 0.3886, Final Metric = 0.6265
Epoch 10: Binary F1 = 0.9114, Macro F1 = 0.3886, Final Metric = 0.6500
Epoch 11: Binary F1 = 0.8856, Macro F1 = 0.4158, Final Metric = 0.6507
Epoch 12: Binary F1 =

Unnamed: 0_level_0,binary_f1,macro_f1,final_metric
fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0.917263,0.486382,0.699512


[I 2025-08-16 15:05:49,867] Trial 38 finished with value: 0.6995115826691883 and parameters: {'cycle_mult': 1.35, 'max_lr': 0.007376159386535594, 'init_cycle_epochs': 2, 'lr_cycle_factor': 0.6, 'weight_decay': 0.0005585952153413778, 'beta_0': 0.816431772153366, 'beta_1': 0.9931107150849056, 'aux_loss_weigth': 0.0}. Best is trial 0 with value: 0.790798707199152.



Global Statistics (Best Metrics):
Mean Best Final Metric: 0.6995 ± nan
Mean Best Binary F1: 0.9173 ± nan
Mean Best Macro F1: 0.4864 ± nan





training: 1
Fold 1/10
Epoch 01: Binary F1 = 0.7799, Macro F1 = 0.1809, Final Metric = 0.4804
  New best metric! Saving model...
Epoch 02: Binary F1 = 0.8604, Macro F1 = 0.2256, Final Metric = 0.5430
  New best metric! Saving model...
Epoch 03: Binary F1 = 0.8789, Macro F1 = 0.3293, Final Metric = 0.6041
  New best metric! Saving model...
Epoch 04: Binary F1 = 0.8274, Macro F1 = 0.3531, Final Metric = 0.5903
Epoch 05: Binary F1 = 0.8742, Macro F1 = 0.3113, Final Metric = 0.5928
Epoch 06: Binary F1 = 0.8198, Macro F1 = 0.3478, Final Metric = 0.5838
Epoch 07: Binary F1 = 0.9024, Macro F1 = 0.4059, Final Metric = 0.6542
  New best metric! Saving model...
Epoch 08: Binary F1 = 0.8865, Macro F1 = 0.3892, Final Metric = 0.6378
Epoch 09: Binary F1 = 0.8984, Macro F1 = 0.3448, Final Metric = 0.6216
Epoch 10: Binary F1 = 0.8978, Macro F1 = 0.3770, Final Metric = 0.6374
Epoch 11: Binary F1 = 0.8815, Macro F1 = 0.3681, Final Metric = 0.6248
Epoch 12: Binary F1 = 0.9135, Macro F1 = 0.4398, Final M

Unnamed: 0_level_0,binary_f1,macro_f1,final_metric
fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0.91353,0.490629,0.69241


[I 2025-08-16 15:06:58,906] Trial 39 finished with value: 0.6924101622914822 and parameters: {'cycle_mult': 1.5, 'max_lr': 0.00781126247140095, 'init_cycle_epochs': 6, 'lr_cycle_factor': 0.55, 'weight_decay': 0.0007926816029623884, 'beta_0': 0.8349962895169518, 'beta_1': 0.9939524046118752, 'aux_loss_weigth': 0.99}. Best is trial 0 with value: 0.790798707199152.



Global Statistics (Best Metrics):
Mean Best Final Metric: 0.6924 ± nan
Mean Best Binary F1: 0.9135 ± nan
Mean Best Macro F1: 0.4906 ± nan





training: 1
Fold 1/10
Epoch 01: Binary F1 = 0.7827, Macro F1 = 0.1861, Final Metric = 0.4844
  New best metric! Saving model...
Epoch 02: Binary F1 = 0.8647, Macro F1 = 0.2419, Final Metric = 0.5533
  New best metric! Saving model...
Epoch 03: Binary F1 = 0.8457, Macro F1 = 0.3107, Final Metric = 0.5782
  New best metric! Saving model...
Epoch 04: Binary F1 = 0.8643, Macro F1 = 0.3739, Final Metric = 0.6191
  New best metric! Saving model...
Epoch 05: Binary F1 = 0.8953, Macro F1 = 0.3631, Final Metric = 0.6292
  New best metric! Saving model...
Epoch 06: Binary F1 = 0.8589, Macro F1 = 0.3747, Final Metric = 0.6168
Epoch 07: Binary F1 = 0.9076, Macro F1 = 0.3935, Final Metric = 0.6506
  New best metric! Saving model...
Epoch 08: Binary F1 = 0.8941, Macro F1 = 0.3819, Final Metric = 0.6380
Epoch 09: Binary F1 = 0.8903, Macro F1 = 0.3852, Final Metric = 0.6377
Epoch 10: Binary F1 = 0.8601, Macro F1 = 0.4042, Final Metric = 0.6322
Epoch 11: Binary F1 = 0.8896, Macro F1 = 0.4206, Final Me

Unnamed: 0_level_0,binary_f1,macro_f1,final_metric
fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0.90853,0.5037,0.703296


[I 2025-08-16 15:07:57,308] Trial 40 finished with value: 0.7032960681243543 and parameters: {'cycle_mult': 1.1, 'max_lr': 0.006405369462403425, 'init_cycle_epochs': 9, 'lr_cycle_factor': 0.25, 'weight_decay': 0.0008436952641484049, 'beta_0': 0.9525150651361342, 'beta_1': 0.9922515112198153, 'aux_loss_weigth': 0.24}. Best is trial 0 with value: 0.790798707199152.



Global Statistics (Best Metrics):
Mean Best Final Metric: 0.7033 ± nan
Mean Best Binary F1: 0.9085 ± nan
Mean Best Macro F1: 0.5037 ± nan





training: 1
Fold 1/10
Epoch 01: Binary F1 = 0.7781, Macro F1 = 0.1923, Final Metric = 0.4852
  New best metric! Saving model...
Epoch 02: Binary F1 = 0.8455, Macro F1 = 0.2606, Final Metric = 0.5530
  New best metric! Saving model...
Epoch 03: Binary F1 = 0.8764, Macro F1 = 0.3552, Final Metric = 0.6158
  New best metric! Saving model...
Epoch 04: Binary F1 = 0.8337, Macro F1 = 0.3351, Final Metric = 0.5844
Epoch 05: Binary F1 = 0.8949, Macro F1 = 0.3522, Final Metric = 0.6236
  New best metric! Saving model...
Epoch 06: Binary F1 = 0.8464, Macro F1 = 0.3485, Final Metric = 0.5974
Epoch 07: Binary F1 = 0.8882, Macro F1 = 0.3921, Final Metric = 0.6402
  New best metric! Saving model...
Epoch 08: Binary F1 = 0.8475, Macro F1 = 0.3990, Final Metric = 0.6233
Epoch 09: Binary F1 = 0.9143, Macro F1 = 0.3749, Final Metric = 0.6446
  New best metric! Saving model...
Epoch 10: Binary F1 = 0.8931, Macro F1 = 0.3841, Final Metric = 0.6386
Epoch 11: Binary F1 = 0.8900, Macro F1 = 0.4326, Final Me

Unnamed: 0_level_0,binary_f1,macro_f1,final_metric
fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0.914341,0.499619,0.702299


[I 2025-08-16 15:09:11,048] Trial 41 finished with value: 0.7022992699058888 and parameters: {'cycle_mult': 1.5, 'max_lr': 0.0069178306514896875, 'init_cycle_epochs': 3, 'lr_cycle_factor': 0.45, 'weight_decay': 0.0007387513086688898, 'beta_0': 0.8815269368868605, 'beta_1': 0.998263174163781, 'aux_loss_weigth': 0.06}. Best is trial 0 with value: 0.790798707199152.



Global Statistics (Best Metrics):
Mean Best Final Metric: 0.7023 ± nan
Mean Best Binary F1: 0.9143 ± nan
Mean Best Macro F1: 0.4996 ± nan





training: 1
Fold 1/10
Epoch 01: Binary F1 = 0.8075, Macro F1 = 0.1886, Final Metric = 0.4981
  New best metric! Saving model...
Epoch 02: Binary F1 = 0.8618, Macro F1 = 0.2494, Final Metric = 0.5556
  New best metric! Saving model...
Epoch 03: Binary F1 = 0.8547, Macro F1 = 0.2973, Final Metric = 0.5760
  New best metric! Saving model...
Epoch 04: Binary F1 = 0.8335, Macro F1 = 0.3594, Final Metric = 0.5965
  New best metric! Saving model...
Epoch 05: Binary F1 = 0.8453, Macro F1 = 0.3712, Final Metric = 0.6083
  New best metric! Saving model...
Epoch 06: Binary F1 = 0.8653, Macro F1 = 0.3129, Final Metric = 0.5891
Epoch 07: Binary F1 = 0.8847, Macro F1 = 0.3945, Final Metric = 0.6396
  New best metric! Saving model...
Epoch 08: Binary F1 = 0.8550, Macro F1 = 0.3808, Final Metric = 0.6179
Epoch 09: Binary F1 = 0.9064, Macro F1 = 0.3844, Final Metric = 0.6454
  New best metric! Saving model...
Epoch 10: Binary F1 = 0.8919, Macro F1 = 0.4105, Final Metric = 0.6512
  New best metric! Sav

Unnamed: 0_level_0,binary_f1,macro_f1,final_metric
fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0.909274,0.488623,0.694827


[I 2025-08-16 15:10:02,583] Trial 42 finished with value: 0.6948267289584803 and parameters: {'cycle_mult': 1.5, 'max_lr': 0.006940430494016697, 'init_cycle_epochs': 3, 'lr_cycle_factor': 0.35, 'weight_decay': 0.0007742257695111025, 'beta_0': 0.9101915015711739, 'beta_1': 0.9974648073461954, 'aux_loss_weigth': 0.06}. Best is trial 0 with value: 0.790798707199152.



Global Statistics (Best Metrics):
Mean Best Final Metric: 0.6948 ± nan
Mean Best Binary F1: 0.9093 ± nan
Mean Best Macro F1: 0.4886 ± nan





training: 1
Fold 1/10
Epoch 01: Binary F1 = 0.7655, Macro F1 = 0.1852, Final Metric = 0.4753
  New best metric! Saving model...
Epoch 02: Binary F1 = 0.8335, Macro F1 = 0.2318, Final Metric = 0.5326
  New best metric! Saving model...
Epoch 03: Binary F1 = 0.8676, Macro F1 = 0.3089, Final Metric = 0.5882
  New best metric! Saving model...
Epoch 04: Binary F1 = 0.8866, Macro F1 = 0.3597, Final Metric = 0.6231
  New best metric! Saving model...
Epoch 05: Binary F1 = 0.8789, Macro F1 = 0.3621, Final Metric = 0.6205
Epoch 06: Binary F1 = 0.8277, Macro F1 = 0.3599, Final Metric = 0.5938
Epoch 07: Binary F1 = 0.8849, Macro F1 = 0.4220, Final Metric = 0.6534
  New best metric! Saving model...
Epoch 08: Binary F1 = 0.8912, Macro F1 = 0.3874, Final Metric = 0.6393
Epoch 09: Binary F1 = 0.8963, Macro F1 = 0.3872, Final Metric = 0.6417
Epoch 10: Binary F1 = 0.8734, Macro F1 = 0.4084, Final Metric = 0.6409
Epoch 11: Binary F1 = 0.8726, Macro F1 = 0.4287, Final Metric = 0.6507
Epoch 12: Binary F1 =

Unnamed: 0_level_0,binary_f1,macro_f1,final_metric
fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0.922156,0.500508,0.706015
1,0.960784,0.562348,0.756541


[I 2025-08-16 15:12:07,396] Trial 43 finished with value: 0.7312780071914924 and parameters: {'cycle_mult': 1.55, 'max_lr': 0.0041242662538763025, 'init_cycle_epochs': 7, 'lr_cycle_factor': 0.5, 'weight_decay': 0.0006561772642507304, 'beta_0': 0.8772052787875991, 'beta_1': 0.9989520782012677, 'aux_loss_weigth': 0.03}. Best is trial 0 with value: 0.790798707199152.



Global Statistics (Best Metrics):
Mean Best Final Metric: 0.7313 ± 0.0357
Mean Best Binary F1: 0.9415 ± 0.0273
Mean Best Macro F1: 0.5314 ± 0.0437





training: 1
Fold 1/10
Epoch 01: Binary F1 = 0.8045, Macro F1 = 0.1580, Final Metric = 0.4813
  New best metric! Saving model...
Epoch 02: Binary F1 = 0.8300, Macro F1 = 0.2435, Final Metric = 0.5367
  New best metric! Saving model...
Epoch 03: Binary F1 = 0.8840, Macro F1 = 0.3289, Final Metric = 0.6065
  New best metric! Saving model...
Epoch 04: Binary F1 = 0.8415, Macro F1 = 0.3355, Final Metric = 0.5885
Epoch 05: Binary F1 = 0.8757, Macro F1 = 0.3522, Final Metric = 0.6139
  New best metric! Saving model...
Epoch 06: Binary F1 = 0.8613, Macro F1 = 0.3755, Final Metric = 0.6184
  New best metric! Saving model...
Epoch 07: Binary F1 = 0.8812, Macro F1 = 0.3924, Final Metric = 0.6368
  New best metric! Saving model...
Epoch 08: Binary F1 = 0.9102, Macro F1 = 0.3880, Final Metric = 0.6491
  New best metric! Saving model...
Epoch 09: Binary F1 = 0.8742, Macro F1 = 0.4259, Final Metric = 0.6500
  New best metric! Saving model...
Epoch 10: Binary F1 = 0.9004, Macro F1 = 0.4140, Final Met

Unnamed: 0_level_0,binary_f1,macro_f1,final_metric
fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0.921212,0.490668,0.70064


[I 2025-08-16 15:13:17,426] Trial 44 finished with value: 0.7006400470631886 and parameters: {'cycle_mult': 1.55, 'max_lr': 0.004282551316152239, 'init_cycle_epochs': 7, 'lr_cycle_factor': 0.5, 'weight_decay': 0.0006550928753974378, 'beta_0': 0.8634809007448842, 'beta_1': 0.9992401744240755, 'aux_loss_weigth': 0.0}. Best is trial 0 with value: 0.790798707199152.



Global Statistics (Best Metrics):
Mean Best Final Metric: 0.7006 ± nan
Mean Best Binary F1: 0.9212 ± nan
Mean Best Macro F1: 0.4907 ± nan





training: 1
Fold 1/10
Epoch 01: Binary F1 = 0.7346, Macro F1 = 0.1952, Final Metric = 0.4649
  New best metric! Saving model...
Epoch 02: Binary F1 = 0.8383, Macro F1 = 0.2128, Final Metric = 0.5255
  New best metric! Saving model...
Epoch 03: Binary F1 = 0.8664, Macro F1 = 0.3245, Final Metric = 0.5955
  New best metric! Saving model...
Epoch 04: Binary F1 = 0.8652, Macro F1 = 0.3673, Final Metric = 0.6162
  New best metric! Saving model...
Epoch 05: Binary F1 = 0.8767, Macro F1 = 0.3573, Final Metric = 0.6170
  New best metric! Saving model...
Epoch 06: Binary F1 = 0.8173, Macro F1 = 0.3264, Final Metric = 0.5718
Epoch 07: Binary F1 = 0.9108, Macro F1 = 0.3605, Final Metric = 0.6356
  New best metric! Saving model...
Epoch 08: Binary F1 = 0.8836, Macro F1 = 0.4143, Final Metric = 0.6490
  New best metric! Saving model...
Epoch 09: Binary F1 = 0.8879, Macro F1 = 0.4021, Final Metric = 0.6450
Epoch 10: Binary F1 = 0.8593, Macro F1 = 0.3822, Final Metric = 0.6207
Epoch 11: Binary F1 = 

Unnamed: 0_level_0,binary_f1,macro_f1,final_metric
fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0.913706,0.496707,0.703304


[I 2025-08-16 15:14:31,323] Trial 45 finished with value: 0.7033037334163093 and parameters: {'cycle_mult': 1.4000000000000001, 'max_lr': 0.004000413172018238, 'init_cycle_epochs': 8, 'lr_cycle_factor': 0.55, 'weight_decay': 0.0008893457722863881, 'beta_0': 0.8880295071906952, 'beta_1': 0.9996531380987002, 'aux_loss_weigth': 0.15}. Best is trial 0 with value: 0.790798707199152.



Global Statistics (Best Metrics):
Mean Best Final Metric: 0.7033 ± nan
Mean Best Binary F1: 0.9137 ± nan
Mean Best Macro F1: 0.4967 ± nan





training: 1
Fold 1/10
Epoch 01: Binary F1 = 0.7174, Macro F1 = 0.1693, Final Metric = 0.4433
  New best metric! Saving model...
Epoch 02: Binary F1 = 0.8532, Macro F1 = 0.2604, Final Metric = 0.5568
  New best metric! Saving model...
Epoch 03: Binary F1 = 0.8788, Macro F1 = 0.2475, Final Metric = 0.5632
  New best metric! Saving model...
Epoch 04: Binary F1 = 0.8398, Macro F1 = 0.3237, Final Metric = 0.5818
  New best metric! Saving model...
Epoch 05: Binary F1 = 0.8574, Macro F1 = 0.3439, Final Metric = 0.6007
  New best metric! Saving model...
Epoch 06: Binary F1 = 0.8307, Macro F1 = 0.3677, Final Metric = 0.5992
Epoch 07: Binary F1 = 0.9052, Macro F1 = 0.4035, Final Metric = 0.6543
  New best metric! Saving model...
Epoch 08: Binary F1 = 0.8914, Macro F1 = 0.3917, Final Metric = 0.6416
Epoch 09: Binary F1 = 0.8698, Macro F1 = 0.3876, Final Metric = 0.6287
Epoch 10: Binary F1 = 0.8595, Macro F1 = 0.3975, Final Metric = 0.6285
Epoch 11: Binary F1 = 0.8910, Macro F1 = 0.4054, Final Me

Unnamed: 0_level_0,binary_f1,macro_f1,final_metric
fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0.905155,0.48852,0.685008


[I 2025-08-16 15:15:41,974] Trial 46 finished with value: 0.685008368066111 and parameters: {'cycle_mult': 1.55, 'max_lr': 0.004565460783101369, 'init_cycle_epochs': 7, 'lr_cycle_factor': 0.5, 'weight_decay': 0.000569432289537198, 'beta_0': 0.8525189861411042, 'beta_1': 0.9989250669190842, 'aux_loss_weigth': 0.87}. Best is trial 0 with value: 0.790798707199152.



Global Statistics (Best Metrics):
Mean Best Final Metric: 0.6850 ± nan
Mean Best Binary F1: 0.9052 ± nan
Mean Best Macro F1: 0.4885 ± nan





training: 1
Fold 1/10
Epoch 01: Binary F1 = 0.6968, Macro F1 = 0.1844, Final Metric = 0.4406
  New best metric! Saving model...
Epoch 02: Binary F1 = 0.8499, Macro F1 = 0.2494, Final Metric = 0.5496
  New best metric! Saving model...
Epoch 03: Binary F1 = 0.8838, Macro F1 = 0.2949, Final Metric = 0.5893
  New best metric! Saving model...
Epoch 04: Binary F1 = 0.8468, Macro F1 = 0.3036, Final Metric = 0.5752
Epoch 05: Binary F1 = 0.8565, Macro F1 = 0.3658, Final Metric = 0.6112
  New best metric! Saving model...
Epoch 06: Binary F1 = 0.8448, Macro F1 = 0.3667, Final Metric = 0.6058
Epoch 07: Binary F1 = 0.8978, Macro F1 = 0.4024, Final Metric = 0.6501
  New best metric! Saving model...
Epoch 08: Binary F1 = 0.8955, Macro F1 = 0.4090, Final Metric = 0.6522
  New best metric! Saving model...
Epoch 09: Binary F1 = 0.8760, Macro F1 = 0.4116, Final Metric = 0.6438
Epoch 10: Binary F1 = 0.8678, Macro F1 = 0.4043, Final Metric = 0.6360
Epoch 11: Binary F1 = 0.8850, Macro F1 = 0.4214, Final Me

Unnamed: 0_level_0,binary_f1,macro_f1,final_metric
fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0.911315,0.493198,0.698788


[I 2025-08-16 15:16:45,945] Trial 47 finished with value: 0.698788327723804 and parameters: {'cycle_mult': 0.8, 'max_lr': 0.004597898322821663, 'init_cycle_epochs': 6, 'lr_cycle_factor': 0.6, 'weight_decay': 0.0007031170406447193, 'beta_0': 0.9044465062149821, 'beta_1': 0.9958239559291173, 'aux_loss_weigth': 0.51}. Best is trial 0 with value: 0.790798707199152.



Global Statistics (Best Metrics):
Mean Best Final Metric: 0.6988 ± nan
Mean Best Binary F1: 0.9113 ± nan
Mean Best Macro F1: 0.4932 ± nan





training: 1
Fold 1/10
Epoch 01: Binary F1 = 0.7671, Macro F1 = 0.1837, Final Metric = 0.4754
  New best metric! Saving model...
Epoch 02: Binary F1 = 0.8641, Macro F1 = 0.2505, Final Metric = 0.5573
  New best metric! Saving model...
Epoch 03: Binary F1 = 0.8806, Macro F1 = 0.3086, Final Metric = 0.5946
  New best metric! Saving model...
Epoch 04: Binary F1 = 0.8620, Macro F1 = 0.3510, Final Metric = 0.6065
  New best metric! Saving model...
Epoch 05: Binary F1 = 0.8724, Macro F1 = 0.3361, Final Metric = 0.6042
Epoch 06: Binary F1 = 0.8153, Macro F1 = 0.3289, Final Metric = 0.5721
Epoch 07: Binary F1 = 0.8875, Macro F1 = 0.3894, Final Metric = 0.6385
  New best metric! Saving model...
Epoch 08: Binary F1 = 0.8907, Macro F1 = 0.4030, Final Metric = 0.6469
  New best metric! Saving model...
Epoch 09: Binary F1 = 0.8976, Macro F1 = 0.3784, Final Metric = 0.6380
Epoch 10: Binary F1 = 0.8917, Macro F1 = 0.3747, Final Metric = 0.6332
Epoch 11: Binary F1 = 0.9022, Macro F1 = 0.3920, Final Me

Unnamed: 0_level_0,binary_f1,macro_f1,final_metric
fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0.916077,0.507222,0.71117
1,0.965585,0.584744,0.774655
2,0.974359,0.607163,0.789898
3,0.978474,0.619339,0.798842
4,0.975657,0.632348,0.803003


[I 2025-08-16 15:22:07,176] Trial 48 finished with value: 0.7755138782893267 and parameters: {'cycle_mult': 1.4500000000000002, 'max_lr': 0.0051168089055962235, 'init_cycle_epochs': 2, 'lr_cycle_factor': 0.55, 'weight_decay': 0.0006521570783434823, 'beta_0': 0.8083147656653696, 'beta_1': 0.9908159903029322, 'aux_loss_weigth': 0.15}. Best is trial 0 with value: 0.790798707199152.



Global Statistics (Best Metrics):
Mean Best Final Metric: 0.7755 ± 0.0376
Mean Best Binary F1: 0.9620 ± 0.0261
Mean Best Macro F1: 0.5902 ± 0.0496





training: 1
Fold 1/10
Epoch 01: Binary F1 = 0.7449, Macro F1 = 0.1874, Final Metric = 0.4662
  New best metric! Saving model...
Epoch 02: Binary F1 = 0.8685, Macro F1 = 0.2493, Final Metric = 0.5589
  New best metric! Saving model...
Epoch 03: Binary F1 = 0.8862, Macro F1 = 0.3585, Final Metric = 0.6223
  New best metric! Saving model...
Epoch 04: Binary F1 = 0.8556, Macro F1 = 0.3145, Final Metric = 0.5851
Epoch 05: Binary F1 = 0.8637, Macro F1 = 0.3028, Final Metric = 0.5833
Epoch 06: Binary F1 = 0.8335, Macro F1 = 0.3164, Final Metric = 0.5750
Epoch 07: Binary F1 = 0.9025, Macro F1 = 0.3873, Final Metric = 0.6449
  New best metric! Saving model...
Epoch 08: Binary F1 = 0.8747, Macro F1 = 0.4128, Final Metric = 0.6438
Epoch 09: Binary F1 = 0.9015, Macro F1 = 0.3718, Final Metric = 0.6366
Epoch 10: Binary F1 = 0.8893, Macro F1 = 0.3909, Final Metric = 0.6401
Epoch 11: Binary F1 = 0.9085, Macro F1 = 0.3966, Final Metric = 0.6526
  New best metric! Saving model...
Epoch 12: Binary F1 =

## Submission

### Reloading best models

In [None]:
def load_model_ensemble(parent_dir:str) -> list[nn.Module]:
    model_ensemble = []
    for fold in range(NB_CROSS_VALIDATIONS):
        model = mk_model(n_aux_classes=meta_data["n_aux_classes"])
        checkpoint = torch.load(
            join(
                parent_dir,
                f"model_fold_{fold}.pth"
            ),
            map_location=device,
            weights_only=True
        )
        model.load_state_dict(checkpoint)
        model.eval()
        model_ensemble.append(model)
    
    return model_ensemble
    
if not os.getenv('KAGGLE_IS_COMPETITION_RERUN'):
    model_ensemble = load_model_ensemble("models")
else:
    models_dir = kagglehub.model_download(
        join(
            kagglehub.whoami()["username"],
            MODEL_NAME,
            "pyTorch",
            MODEL_VARIATION,
        )
    )
    model_ensemble = load_model_ensemble("models")

Kaggle credentials successfully validated.


Downloading 10 files:   0%|          | 0/10 [00:00<?, ?it/s]

Downloading from https://www.kaggle.com/api/v1/models/mauroabidalcarrer/cmi-model/pyTorch/single_model_architecture/2/download/model_fold_2.pth...
Downloading from https://www.kaggle.com/api/v1/models/mauroabidalcarrer/cmi-model/pyTorch/single_model_architecture/2/download/model_fold_1.pth...





Downloading from https://www.kaggle.com/api/v1/models/mauroabidalcarrer/cmi-model/pyTorch/single_model_architecture/2/download/model_fold_6.pth...


  0%|          | 0.00/12.9M [00:00<?, ?B/s][A

[A[A
[A

Downloading from https://www.kaggle.com/api/v1/models/mauroabidalcarrer/cmi-model/pyTorch/single_model_architecture/2/download/model_fold_3.pth...
Downloading from https://www.kaggle.com/api/v1/models/mauroabidalcarrer/cmi-model/pyTorch/single_model_architecture/2/download/model_fold_5.pth...





[A[A[A



[A[A[A[A

Downloading from https://www.kaggle.com/api/v1/models/mauroabidalcarrer/cmi-model/pyTorch/single_model_architecture/2/download/model_fold_4.pth...







[A[A[A[A[A

Downloading from https://www.kaggle.com/api/v1/models/mauroabidalcarrer/cmi-model/pyTorch/single_model_architecture/2/download/model_fold_0.pth...








[A[A[A[A[A[A

Downloading from https://www.kaggle.com/api/v1/models/mauroabidalcarrer/cmi-model/pyTorch/single_model_architecture/2/download/model_fold_7.pth...









[A[A[A[A[A[A[A
[A

[A[A


[A[A[A



[A[A[A[A


[A[A[A
[A

[A[A



[A[A[A[A
[A

[A[A


[A[A[A



[A[A[A[A




[A[A[A[A[A
[A





[A[A[A[A[A[A

[A[A






[A[A[A[A[A[A[A



[A[A[A[A


[A[A[A




[A[A[A[A[A
[A





[A[A[A[A[A[A






[A[A[A[A[A[A[A



[A[A[A[A

[A[A




[A[A[A[A[A


[A[A[A
[A





[A[A[A[A[A[A






[A[A[A[A[A[A[A



[A[A[A[A




[A[A[A[A[A

[A[A


[A[A[A





[A[A[A[A[A[A
[A






[A[A[A[A[A[A[A




[A[A[A[A[A



[A[A[A[A





[A[A[A[A[A[A

100%|██████████| 12.9M/12.9M [00:01<00:00, 7.64MB/s]



[A[A[A






[A[A[A[A[A[A[A
[A



[A[A[A[A





[A[A[A[A[A[A




100%|██████████| 12.9M/12.9M [00:01<00:00, 7.34MB/s]







[A[A[A[A[A[A[A




[A[A[A[A[A



[A[A[A[A





100%|██████████| 12.9M/12.9M [00:01<00:00, 8.54MB/s]

[A



[A[A[A[A






100%|██████████| 12.9M/12.9M [00:

Downloading from https://www.kaggle.com/api/v1/models/mauroabidalcarrer/cmi-model/pyTorch/single_model_architecture/2/download/model_fold_9.pth...




Downloading from https://www.kaggle.com/api/v1/models/mauroabidalcarrer/cmi-model/pyTorch/single_model_architecture/2/download/model_fold_8.pth...



[A
[A
[A
[A
100%|██████████| 12.9M/12.9M [00:01<00:00, 9.98MB/s]

[A
[A
100%|██████████| 12.9M/12.9M [00:01<00:00, 9.44MB/s]


### Define prediction function

In [49]:
def preprocess_sequence_at_inference(sequence_df:pl.DataFrame) -> ndarray:
    return (
        sequence_df                     
        .to_pandas()                            # Convert to pandas dataframe.
        .pipe(imputed_features)                 # Impute missing data.
        .pipe(standardize_tof_cols_names)
        .pipe(norm_quat_rotations)              # Norm quaternions
        .pipe(add_linear_acc_cols)              # Add gravity free acceleration.
        .pipe(add_acc_magnitude, RAW_ACCELRATION_COLS, "acc_mag")
        .pipe(add_acc_magnitude, LINEAR_ACC_COLS, "linear_acc_mag")
        .pipe(add_quat_angle_mag)
        .pipe(add_angular_velocity_features)
        .pipe(rot_euler_angles)                 # Add rotation acc expressed as euler angles.
        .pipe(agg_tof_cols_per_sensor)          # Aggregate ToF columns.
        .pipe(add_diff_features)                # 
        .loc[:, sorted(meta_data["feature_cols"])]      # Retain only the usefull columns a.k.a features.
        # .sub(meta_data["mean"])                 # Subtract features by their mean, std norm pt.1.
        # .div(meta_data["std"])                  # Divide by Standard deviation, std norm pt.2.
        .pipe(length_normed_sequence_feat_arr, meta_data["pad_seq_len"], SEQ_PAD_TRUNC_MODE)  # get feature ndarray of sequence.
        .T                                      # Transpose to swap channel and X dimensions.
    )

def predict(sequence: pl.DataFrame, _: pl.DataFrame) -> str:
    """
    Kaggle evaluation API will call this for each sequence.
    sequence: polars DataFrame for a single sequence
    demographics: unused in this model
    Returns: predicted gesture string
    """
    x_tensor = (
        torch.unsqueeze(Tensor(preprocess_sequence_at_inference(sequence)), dim=0)
        .float()
        .to(device)
    )
    print(x_tensor.shape)

    all_outputs = []
    with torch.no_grad():
        for model_idx, model in enumerate(model_ensemble): # Only take the first one bc it's the only one that takes in the correct input shape
            outputs, _ = model(x_tensor)
            all_outputs.append(outputs)

    avg_outputs = torch.mean(torch.stack(all_outputs), dim=0)
    pred_idx = torch.argmax(avg_outputs, dim=1).item()

    return str(TARGET_NAMES[pred_idx])

### Run inference server

In [None]:
inference_server = kaggle_evaluation.cmi_inference_server.CMIInferenceServer(predict)

if os.getenv('KAGGLE_IS_COMPETITION_RERUN'):
    inference_server.serve()
else:
    competition_dataset_path = competition_down2load(COMPETITION_HANDLE)
    inference_server.run_local_gateway(
        data_paths=(
            join(competition_dataset_path, 'test.csv'),
            join(competition_dataset_path, 'test_demographics.csv'),
        )
    )
    inference_server = kaggle_evaluation.cmi_inference_server.CMIInferenceServer(predict)

  0%|          | 0/5 [00:00<?, ?it/s]

torch.Size([1, 946, 127])


  0%|          | 0/5 [00:00<?, ?it/s]

torch.Size([1, 946, 127])
