In [4]:
import numpy as np
import pandas as pd
import torch
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import LabelEncoder, StandardScaler
from torch.utils.data import TensorDataset
from warnings import filterwarnings

filterwarnings('ignore')

## Prepare data:

In [5]:
def get_X_cat(df, cat_cols, transformers=None):
    """
    Apply a specific categorical data transformer or a LabelEncoder if None.
    """
    if transformers is None:
        transformers = [LabelEncoder().fit(df[col]) for col in cat_cols]
    return transformers, np.array(
        [transformer.transform(df[col]) for col, transformer in zip(cat_cols, transformers)]
    ).T


def preprocess_data(train, val):
    """
    Standardize numerical variables and transform (Label-encode) categoricals.
    Fill NA values with mean for numerical.
    Create torch dataloaders to prepare data for training and evaluation.
    """
    X_cat_train, X_cat_val, numerical, transformers = get_categoricals(train, val)
    scaler = StandardScaler()
    imp = SimpleImputer(missing_values=np.nan, strategy='mean', add_indicator=True)
    X_num_train = imp.fit_transform(train[numerical])
    X_num_train = scaler.fit_transform(X_num_train)
    X_num_val = imp.transform(val[numerical])
    X_num_val = scaler.transform(X_num_val)
    dl_train = init_dl(X_cat_train, X_num_train, train, training=True)
    dl_val = init_dl(X_cat_val, X_num_val, val)
    return X_cat_val, X_num_train, X_num_val, dl_train, dl_val, transformers


def get_categoricals(train, val):
    """
    Remove constant categorical columns and transform them using LabelEncoder.
    Return the label-transformers for each categorical column, categorical dataframes and numerical columns.
    """
    categorical_cols, numerical = get_feature_types(train)
    remove = []
    for col in categorical_cols:
        if train[col].nunique() == 1:
            remove.append(col)
        ind = ~val[col].isin(train[col])
        if ind.any():
            val.loc[ind, col] = np.nan
    categorical_cols = [col for col in categorical_cols if col not in remove]
    transformers, X_cat_train = get_X_cat(train, categorical_cols)
    _, X_cat_val = get_X_cat(val, categorical_cols, transformers)
    return X_cat_train, X_cat_val, numerical, transformers


def init_dl(X_cat, X_num, df, training=False):
    """
    Initialize data loaders with 4 dimensions : categorical dataframe, numerical dataframe and target values (efs and efs_time).
    Notice that efs_time is log-transformed.
    Fix batch size to 2048 and return dataloader for training or validation depending on training value.
    """
    ds_train = TensorDataset(
        torch.tensor(X_cat, dtype=torch.long),
        torch.tensor(X_num, dtype=torch.float32),
        torch.tensor(df.efs_time.values, dtype=torch.float32).log(),
        torch.tensor(df.efs.values, dtype=torch.long)
    )
    bs = 2048
    dl_train = torch.utils.data.DataLoader(ds_train, batch_size=bs, pin_memory=True, shuffle=training)
    return dl_train


def get_feature_types(train):
    """
    Utility function to return categorical and numerical column names.
    """
    categorical_cols = [col for i, col in enumerate(train.columns) if ((train[col].dtype == "object") | (2 < train[col].nunique() < 25))]
    RMV = ["ID", "efs", "efs_time", "y"]
    FEATURES = [c for c in train.columns if not c in RMV]
    numerical = [i for i in FEATURES if i not in categorical_cols]
    return categorical_cols, numerical


def add_features(df):
    """
    Create some new features to help the model focus on specific patterns.
    """
    # sex_match = df.sex_match.astype(str)
    # sex_match = sex_match.str.split("-").str[0] == sex_match.str.split("-").str[1]
    # df['sex_match_bool'] = sex_match
    # df.loc[df.sex_match.isna(), 'sex_match_bool'] = np.nan
    # df['big_age'] = df.age_at_hct > 16
    # df.loc[df.year_hct == 2019, 'year_hct'] = 2020
    df['is_cyto_score_same'] = (df['cyto_score'] == df['cyto_score_detail']).astype(int)
    # df['strange_age'] = df.age_at_hct == 0.044
    # df['age_bin'] = pd.cut(df.age_at_hct, [0, 0.0441, 16, 30, 50, 100])
    # df['age_ts'] = df.age_at_hct / df.donor_age
    df['year_hct'] -= 2000
    
    return df


def load_data():
    """
    Load data and add features.
    """
    test = pd.read_csv("./test.csv")
    test = add_features(test)
    # print("Test shape:", test.shape)
    train = pd.read_csv("./train.csv")
    train = add_features(train)
    # print("Train shape:", train.shape)
    return test, train


## Define models with pairwise ranking loss

The model is defined in 3 steps :
* Embedding class for categorical data
* MLP for numerical and categorical data
* Final model trained with pairwise ranking loss with selection of valid pairs

In [10]:
import functools
from typing import List

import pytorch_lightning as pl
import numpy as np
import torch
from lifelines.utils import concordance_index
from pytorch_lightning.cli import ReduceLROnPlateau
from pytorch_tabular.models.common.layers import ODST
from torch import nn
from pytorch_lightning.utilities import grad_norm


class CatEmbeddings(nn.Module):
    """
    Embedding module for the categorical dataframe.
    """
    def __init__(
        self,
        projection_dim: int,
        categorical_cardinality: List[int],
        embedding_dim: int
    ):
        """
        projection_dim: The dimension of the final output after projecting the concatenated embeddings into a lower-dimensional space.
        categorical_cardinality: A list where each element represents the number of unique categories (cardinality) in each categorical feature.
        embedding_dim: The size of the embedding space for each categorical feature.
        self.embeddings: list of embedding layers for each categorical feature.
        self.projection: sequential neural network that goes from the embedding to the output projection dimension with GELU activation.
        """
        super(CatEmbeddings, self).__init__()
        self.embeddings = nn.ModuleList([
            nn.Embedding(cardinality, embedding_dim)
            for cardinality in categorical_cardinality
        ])
        self.projection = nn.Sequential(
            nn.Linear(embedding_dim * len(categorical_cardinality), projection_dim),
            nn.GELU(),
            nn.Linear(projection_dim, projection_dim)
        )

    def forward(self, x_cat):
        """
        Apply the projection on concatened embeddings that contains all categorical features.
        """
        x_cat = [embedding(x_cat[:, i]) for i, embedding in enumerate(self.embeddings)]
        x_cat = torch.cat(x_cat, dim=1)
        return self.projection(x_cat)


class NN(nn.Module):
    """
    Train a model on both categorical embeddings and numerical data.
    """
    def __init__(
            self,
            continuous_dim: int,
            categorical_cardinality: List[int],
            embedding_dim: int,
            projection_dim: int,
            hidden_dim: int,
            dropout: float = 0
    ):
        """
        continuous_dim: The number of continuous features.
        categorical_cardinality: A list of integers representing the number of unique categories in each categorical feature.
        embedding_dim: The dimensionality of the embedding space for each categorical feature.
        projection_dim: The size of the projected output space for the categorical embeddings.
        hidden_dim: The number of neurons in the hidden layer of the MLP.
        dropout: The dropout rate applied in the network.
        self.embeddings: previous embeddings for categorical data.
        self.mlp: defines an MLP model with an ODST layer followed by batch normalization and dropout.
        self.out: linear output layer that maps the output of the MLP to a single value
        self.dropout: defines dropout
        Weights initialization with xavier normal algorithm and biases with zeros.
        """
        super(NN, self).__init__()
        self.embeddings = CatEmbeddings(projection_dim, categorical_cardinality, embedding_dim)
        self.mlp = nn.Sequential(
            ODST(projection_dim + continuous_dim, hidden_dim),
            nn.BatchNorm1d(hidden_dim),
            nn.Dropout(dropout)
        )
        self.out = nn.Linear(hidden_dim, 1)
        self.dropout = nn.Dropout(dropout)

        # initialize weights
        for m in self.modules():
            if isinstance(m, nn.Linear):
                nn.init.xavier_normal_(m.weight)
                nn.init.zeros_(m.bias)

    def forward(self, x_cat, x_cont):
        """
        Create embedding layers for categorical data, concatenate with continous variables.
        Add dropout and goes through MLP and return raw output and 1-dimensional output as well.
        """
        x = self.embeddings(x_cat)
        x = torch.cat([x, x_cont], dim=1)
        x = self.dropout(x)
        x = self.mlp(x)
        return self.out(x), x


@functools.lru_cache
def combinations(N):
    """
    calculates all possible 2-combinations (pairs) of a tensor of indices from 0 to N-1, 
    and caches the result using functools.lru_cache for optimization
    """
    ind = torch.arange(N)
    comb = torch.combinations(ind, r=2)
    return comb.cuda()


class LitNN(pl.LightningModule):
    """
    Main Model creation and losses definition to fully train the model.
    """
    def __init__(
            self,
            continuous_dim: int,
            categorical_cardinality: List[int],
            embedding_dim: int,
            projection_dim: int,
            hidden_dim: int,
            lr: float = 1e-3,
            dropout: float = 0.2,
            weight_decay: float = 1e-3,
            aux_weight: float = 0.1,
            margin: float = 0.5,
            race_index: int = 0
    ):
        """
        continuous_dim: The number of continuous input features.
        categorical_cardinality: A list of integers, where each element corresponds to the number of unique categories for each categorical feature.
        embedding_dim: The dimension of the embeddings for the categorical features.
        projection_dim: The dimension of the projected space after embedding concatenation.
        hidden_dim: The size of the hidden layers in the feedforward network (MLP).
        lr: The learning rate for the optimizer.
        dropout: Dropout probability to avoid overfitting.
        weight_decay: The L2 regularization term for the optimizer.
        aux_weight: Weight used for auxiliary tasks.
        margin: Margin used in some loss functions.
        race_index: An index that refer to race_group in the input data.
        """
        super(LitNN, self).__init__()
        self.save_hyperparameters()

        # Creates an instance of the NN model defined above
        self.model = NN(
            continuous_dim=self.hparams.continuous_dim,
            categorical_cardinality=self.hparams.categorical_cardinality,
            embedding_dim=self.hparams.embedding_dim,
            projection_dim=self.hparams.projection_dim,
            hidden_dim=self.hparams.hidden_dim,
            dropout=self.hparams.dropout
        )
        self.targets = []

        # Defines a small feedforward neural network that performs an auxiliary task with 1-dimensional output
        self.aux_cls = nn.Sequential(
            nn.Linear(self.hparams.hidden_dim, self.hparams.hidden_dim // 2),
            nn.GELU(),
            nn.Linear(self.hparams.hidden_dim // 2, self.hparams.hidden_dim // 4),
            nn.GELU(),
            nn.Linear(self.hparams.hidden_dim // 4, 1)
        )

    def on_before_optimizer_step(self, optimizer):
        """
        Compute the 2-norm for each layer
        If using mixed precision, the gradients are already unscaled here
        """
        norms = grad_norm(self.model, norm_type=2)
        self.log_dict(norms)

    def forward(self, x_cat, x_cont):
        """
        Forward pass that outputs the 1-dimensional prediction and the embeddings (raw output)
        """
        x, emb = self.model(x_cat, x_cont)
        return x.squeeze(1), emb

    def training_step(self, batch, batch_idx):
        """
        defines how the model processes each batch of data during training.
        A batch is a combination of : categorical data, continuous data, efs_time (y) and efs event.
        y_hat is the efs_time prediction on all data and aux_pred is auxiliary prediction on embeddings.
        Calculates loss and race_group loss on full data.
        Auxiliary loss is calculated with an event mask, ignoring efs=0 predictions and taking the average.
        Returns loss and aux_loss multiplied by weight defined above.
        """
        x_cat, x_cont, y, efs = batch
        y_hat, emb = self(x_cat, x_cont)
        aux_pred = self.aux_cls(emb).squeeze(1)
        loss, race_loss = self.get_full_loss(efs, x_cat, y, y_hat)
        aux_loss = nn.functional.mse_loss(aux_pred, y, reduction='none')
        aux_mask = efs == 1
        aux_loss = (aux_loss * aux_mask).sum() / aux_mask.sum()
        self.log("train_loss", loss, on_epoch=True, prog_bar=True, logger=True)
        self.log("race_loss", race_loss, on_epoch=True, prog_bar=True, logger=True, on_step=False)
        self.log("aux_loss", aux_loss, on_epoch=True, prog_bar=True, logger=True, on_step=False)
        return loss + aux_loss * self.hparams.aux_weight

    def get_full_loss(self, efs, x_cat, y, y_hat):
        """
        Output loss and race_group loss.
        """
        loss = self.calc_loss(y, y_hat, efs)
        race_loss = self.get_race_losses(efs, x_cat, y, y_hat)
        loss += 0.5 * race_loss
        return loss, race_loss

    def get_race_losses(self, efs, x_cat, y, y_hat):
        """
        Calculate loss for each race_group based on deviation/variance.
        """
        races = torch.unique(x_cat[:, self.hparams.race_index])
        race_losses = []
        for race in races:
            ind = x_cat[:, self.hparams.race_index] == race
            race_losses.append(self.calc_loss(y[ind], y_hat[ind], efs[ind]))
        race_loss = sum(race_losses) / len(race_losses)
        races_loss_std = sum((r - race_loss)**2 for r in race_losses) / len(race_losses)
        return torch.sqrt(races_loss_std)

    def calc_loss(self, y, y_hat, efs):
        """
        Most important part of the model : loss function used for training.
        We face survival data with event indicators along with time-to-event.

        This function computes the main loss by the following the steps :
        * create all data pairs with "combinations" function (= all "two subjects" combinations)
        * make sure that we have at least 1 event in each pair
        * convert y to +1 or -1 depending on the correct ranking
        * loss is computed using a margin-based hinge loss
        * mask is applied to ensure only valid pairs are being used (censored data can't be ranked with event in some cases)
        * average loss on all pairs is returned
        """
        N = y.shape[0]
        comb = combinations(N)
        comb = comb[(efs[comb[:, 0]] == 1) | (efs[comb[:, 1]] == 1)]
        pred_left = y_hat[comb[:, 0]]
        pred_right = y_hat[comb[:, 1]]
        y_left = y[comb[:, 0]]
        y_right = y[comb[:, 1]]
        y = 2 * (y_left > y_right).int() - 1
        loss = nn.functional.relu(-y * (pred_left - pred_right) + self.hparams.margin)
        mask = self.get_mask(comb, efs, y_left, y_right)
        loss = (loss.double() * (mask.double())).sum() / mask.sum()
        return loss

    def get_mask(self, comb, efs, y_left, y_right):
        """
        Defines all invalid comparisons :
        * Case 1: "Left outlived Right" but Right is censored
        * Case 2: "Right outlived Left" but Left is censored
        Masks for case 1 and case 2 are combined using |= operator and inverted using ~ to create a "valid pair mask"
        """
        left_outlived = y_left >= y_right
        left_1_right_0 = (efs[comb[:, 0]] == 1) & (efs[comb[:, 1]] == 0)
        mask2 = (left_outlived & left_1_right_0)
        right_outlived = y_right >= y_left
        right_1_left_0 = (efs[comb[:, 1]] == 1) & (efs[comb[:, 0]] == 0)
        mask2 |= (right_outlived & right_1_left_0)
        mask2 = ~mask2
        mask = mask2
        return mask

    def validation_step(self, batch, batch_idx):
        """
        This method defines how the model processes each batch during validation
        """
        x_cat, x_cont, y, efs = batch
        y_hat, emb = self(x_cat, x_cont)
        loss, race_loss = self.get_full_loss(efs, x_cat, y, y_hat)
        self.targets.append([y, y_hat.detach(), efs, x_cat[:, self.hparams.race_index]])
        self.log("val_loss", loss, on_epoch=True, prog_bar=True, logger=True)
        return loss

    def on_validation_epoch_end(self):
        """
        At the end of the validation epoch, it computes and logs the concordance index
        """
        cindex, metric = self._calc_cindex()
        self.log("cindex_stratified", metric, on_epoch=True, prog_bar=True, logger=True)
        self.log("cindex_simple", cindex, on_epoch=True, prog_bar=True, logger=True)
        self.targets.clear()

    def _calc_cindex(self):
        """
        Calculate c-index accounting for each race_group or global.
        """
        y = torch.cat([t[0] for t in self.targets]).cpu().numpy()
        y_hat = torch.cat([t[1] for t in self.targets]).cpu().numpy()
        efs = torch.cat([t[2] for t in self.targets]).cpu().numpy()
        races = torch.cat([t[3] for t in self.targets]).cpu().numpy()
        metric = self._metric(efs, races, y, y_hat)
        cindex = concordance_index(y, y_hat, efs)
        return cindex, metric

    def _metric(self, efs, races, y, y_hat):
        """
        Calculate c-index accounting for each race_group
        """
        metric_list = []
        for race in np.unique(races):
            y_ = y[races == race]
            y_hat_ = y_hat[races == race]
            efs_ = efs[races == race]
            metric_list.append(concordance_index(y_, y_hat_, efs_))
        metric = float(np.mean(metric_list) - np.sqrt(np.var(metric_list)))
        return metric

    def test_step(self, batch, batch_idx):
        """
        Same as training step but to log test data
        """
        x_cat, x_cont, y, efs = batch
        y_hat, emb = self(x_cat, x_cont)
        loss, race_loss = self.get_full_loss(efs, x_cat, y, y_hat)
        self.targets.append([y, y_hat.detach(), efs, x_cat[:, self.hparams.race_index]])
        self.log("test_loss", loss)
        return loss

    def on_test_epoch_end(self) -> None:
        """
        At the end of the test epoch, calculates and logs the concordance index for the test set
        """
        cindex, metric = self._calc_cindex()
        self.log("test_cindex_stratified", metric, on_epoch=True, prog_bar=True, logger=True)
        self.log("test_cindex_simple", cindex, on_epoch=True, prog_bar=True, logger=True)
        self.targets.clear()


    def configure_optimizers(self):
        """
        configures the optimizer and learning rate scheduler:
        * Optimizer: Adam optimizer with weight decay (L2 regularization).
        * Scheduler: Cosine Annealing scheduler, which adjusts the learning rate according to a cosine curve.
        """
        optimizer = torch.optim.Adam(self.parameters(), lr=self.hparams.lr, weight_decay=self.hparams.weight_decay)
        scheduler_config = {
            "scheduler": torch.optim.lr_scheduler.CosineAnnealingLR(
                optimizer,
                T_max=45,
                eta_min=6e-3
            ),
            "interval": "epoch",
            "frequency": 1,
            "strict": False,
        }

        return {"optimizer": optimizer, "lr_scheduler": scheduler_config}

In [11]:
import json
import pytorch_lightning as pl
import numpy as np, pandas as pd
import matplotlib.pyplot as plt
import torch
from pytorch_lightning.callbacks import LearningRateMonitor, TQDMProgressBar
from pytorch_lightning.callbacks import StochasticWeightAveraging, EarlyStopping
from sklearn.model_selection import StratifiedKFold

pl.seed_everything(42)

def main(hparams):
    """
    Main function to train the model.
    The steps are as following :
    * load data and fill efs and efs time for test data with 1
    * initialize pred array with 0
    * get categorical and numerical columns
    * split the train data on the stratified criterion : race_group * newborns yes/no
    * preprocess the fold data (create dataloaders)
    * train the model and create final submission output
    """
    test, train_original = load_data()
    test['efs_time'] = 1
    test['efs'] = 1
    oof_nn_pairwise = np.zeros(len(train_original))
    test_pred = np.zeros(test.shape[0])
    categorical_cols, numerical = get_feature_types(train_original)
    kf = StratifiedKFold(n_splits=5, shuffle=True, )
    for i, (train_index, test_index) in enumerate(
        kf.split(
            train_original, train_original.race_group.astype(str)
        )
    ):
        tt = train_original.copy()
        train = tt.iloc[train_index]
        val = tt.iloc[test_index]
        X_cat_val, X_num_train, X_num_val, dl_train, dl_val, transformers = preprocess_data(train, val)
        model = train_final(X_num_train, dl_train, dl_val, transformers, categorical_cols=categorical_cols)
        oof_pred, _ = model.cuda().eval()(
            torch.tensor(X_cat_val, dtype=torch.long).cuda(),
            torch.tensor(X_num_val, dtype=torch.float32).cuda()
        )
        oof_nn_pairwise[test_index] = oof_pred.detach().cpu().numpy()
        # Create submission
        train = tt.iloc[train_index]
        X_cat_val, X_num_train, X_num_val, dl_train, dl_val, transformers = preprocess_data(train, test)
        pred, _ = model.cuda().eval()(
            torch.tensor(X_cat_val, dtype=torch.long).cuda(),
            torch.tensor(X_num_val, dtype=torch.float32).cuda()
        )
        test_pred += pred.detach().cpu().numpy()
        
    return -test_pred, -oof_nn_pairwise

def train_final(X_num_train, dl_train, dl_val, transformers, hparams=None, categorical_cols=None):
    """
    Defines model hyperparameters and fit the model.
    """
    if hparams is None:
        hparams = {
            "embedding_dim": 16,
            "projection_dim": 112,
            "hidden_dim": 56,
            "lr": 0.06464861983337984,
            "dropout": 0.05463240181423116,
            "aux_weight": 0.26545778308743806,
            "margin": 0.2588153271003354,
            "weight_decay": 0.0002773544957610778
        }
    model = LitNN(
        continuous_dim=X_num_train.shape[1],
        categorical_cardinality=[len(t.classes_) for t in transformers],
        race_index=categorical_cols.index("race_group"),
        **hparams
    )
    checkpoint_callback = pl.callbacks.ModelCheckpoint(monitor="val_loss", save_top_k=1)
    trainer = pl.Trainer(
        accelerator='cuda',
        max_epochs=100,
        log_every_n_steps=10,
        callbacks=[
            checkpoint_callback,
            LearningRateMonitor(logging_interval='epoch'),
            TQDMProgressBar(),
            StochasticWeightAveraging(swa_lrs=1e-5, swa_epoch_start=40, annealing_epochs=15),
            EarlyStopping(monitor='train_loss', patience=20, mode='min')
        ],
    )
    trainer.fit(model, dl_train)
    trainer.test(model, dl_val)
    return model.eval()

def score(solution: pd.DataFrame, submission: pd.DataFrame, row_id_column_name: str) -> float:
    
    del solution[row_id_column_name]
    del submission[row_id_column_name]
    
    event_label = 'efs'
    interval_label = 'efs_time'
    prediction_label = 'prediction'
    for col in submission.columns:
        if not pd.api.types.is_numeric_dtype(submission[col]):
            raise Exception(f'Submission column {col} must be a number')
    # Merging solution and submission dfs on ID
    merged_df = pd.concat([solution, submission], axis=1)
    merged_df.reset_index(inplace=True)
    merged_df_race_dict = dict(merged_df.groupby(['race_group']).groups)
    metric_list = []
    for race in merged_df_race_dict.keys():
        # Retrieving values from y_test based on index
        indices = sorted(merged_df_race_dict[race])
        merged_df_race = merged_df.iloc[indices]
        # Calculate the concordance index
        c_index_race = concordance_index(
                        merged_df_race[interval_label],
                        -merged_df_race[prediction_label],
                        merged_df_race[event_label])
        metric_list.append(c_index_race)
    return float(np.mean(metric_list)-np.sqrt(np.var(metric_list)))

Seed set to 42


In [12]:
hparams = None
pairwise_ranking_pred, pairwise_ranking_oof = main(hparams)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 2.0 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
161 K     Total params
0.645     Total estimated model params size (MB)
73        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
`Trainer.fit` stopped: `max_epochs=101` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 2.0 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
161 K     Total params
0.646     Total estimated model params size (MB)
73        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
`Trainer.fit` stopped: `max_epochs=101` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 2.0 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
161 K     Total params
0.645     Total estimated model params size (MB)
73        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 2.0 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
161 K     Total params
0.645     Total estimated model params size (MB)
73        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 2.0 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
161 K     Total params
0.645     Total estimated model params size (MB)
73        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

In [14]:
def stratified_cindex_validation_data(pairwise_ranking_oof):
    test, train = load_data()

    y_true = train[["ID","efs","efs_time","race_group"]].copy()
    y_pred = train[["ID"]].copy()
    y_pred["prediction"] = pairwise_ranking_oof
    m = score(y_true.copy(), y_pred.copy(), "ID")
    print(f"\nPairwise ranking NN CV =", m)
    return m

def optimize_params(hparams):
    _, pairwise_ranking_oof = main(hparams)
    m = stratified_cindex_validation_data(pairwise_ranking_oof)
    return m

In [14]:
import optuna
from IPython.display import clear_output

def objective(trial):
    # Define hyperparameter search space
    hparams = {
        "embedding_dim": trial.suggest_int("embedding_dim", 8, 32),
        "projection_dim": trial.suggest_int("projection_dim", 64, 256),
        "hidden_dim": trial.suggest_int("hidden_dim", 32, 128),
        "aux_weight": trial.suggest_float("aux_weight", 0.1, 0.5),
        "margin": trial.suggest_float("margin", 0.1, 1.0),
        "weight_decay": trial.suggest_float("weight_decay", 1e-6, 1e-2, log=True),
        "lr": trial.suggest_float("lr", 1e-4, 1e-2, log=True),
        "dropout": trial.suggest_float("dropout", 0.0, 0.5),
    }
    
    # Modify your main function to accept hparams and return c-index
    stratified_cindex = optimize_params(hparams)
    return stratified_cindex

# Run optimization
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=50,
               show_progress_bar=True,  # Shows a progress bar instead of many print lines
                callbacks=[
                    lambda study, trial: print(f"\nTrial {trial.number}: {trial.value}")
                ])

print(f"Best parameters: {study.best_params}")
print(f"Best stratified c-index: {study.best_value}")

[I 2025-02-26 18:16:02,044] A new study created in memory with name: no-name-5382b2eb-f38a-4da4-b91b-ea0c900c752f


  0%|          | 0/50 [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
`Trainer.fit` stopped: `max_epochs=101` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
`Trainer.fit` stopped: `max_epochs=101` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]


Pairwise ranking NN CV = 0.6800724940282228
[I 2025-02-26 18:23:54,887] Trial 0 finished with value: 0.6800724940282228 and parameters: {'embedding_dim': 30, 'projection_dim': 240, 'hidden_dim': 105, 'aux_weight': 0.15712834848045049, 'margin': 0.9608176809795179, 'weight_decay': 0.004223741171036505, 'lr': 0.00029586763869969074, 'dropout': 0.11247727607431562}. Best is trial 0 with value: 0.6800724940282228.

Trial 0: 0.6800724940282228


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
`Trainer.fit` stopped: `max_epochs=101` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
`Trainer.fit` stopped: `max_epochs=101` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
`Trainer.fit` stopped: `max_epochs=101` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]


Pairwise ranking NN CV = 0.6798099569893358
[I 2025-02-26 18:32:07,086] Trial 1 finished with value: 0.6798099569893358 and parameters: {'embedding_dim': 14, 'projection_dim': 143, 'hidden_dim': 68, 'aux_weight': 0.4391691564893384, 'margin': 0.1866688480933515, 'weight_decay': 0.0005986552495872608, 'lr': 0.001845939288242923, 'dropout': 0.022812781390487702}. Best is trial 0 with value: 0.6800724940282228.

Trial 1: 0.6798099569893358


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
`Trainer.fit` stopped: `max_epochs=101` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
`Trainer.fit` stopped: `max_epochs=101` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
`Trainer.fit` stopped: `max_epochs=101` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]


Pairwise ranking NN CV = 0.6797258351708004
[I 2025-02-26 18:40:00,232] Trial 2 finished with value: 0.6797258351708004 and parameters: {'embedding_dim': 11, 'projection_dim': 231, 'hidden_dim': 95, 'aux_weight': 0.352199678633626, 'margin': 0.49678653309322707, 'weight_decay': 0.008558028637928502, 'lr': 0.00016271661902044854, 'dropout': 0.40565773746469413}. Best is trial 0 with value: 0.6800724940282228.

Trial 2: 0.6797258351708004


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]


Pairwise ranking NN CV = 0.6777834718385501
[I 2025-02-26 18:47:09,780] Trial 3 finished with value: 0.6777834718385501 and parameters: {'embedding_dim': 10, 'projection_dim': 176, 'hidden_dim': 87, 'aux_weight': 0.1778971719999119, 'margin': 0.8988689148196094, 'weight_decay': 4.5413464627185146e-06, 'lr': 0.00018296743004270202, 'dropout': 0.2120331127484864}. Best is trial 0 with value: 0.6800724940282228.

Trial 3: 0.6777834718385501


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
`Trainer.fit` stopped: `max_epochs=101` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]


Pairwise ranking NN CV = 0.6793054106797468
[I 2025-02-26 18:54:35,348] Trial 4 finished with value: 0.6793054106797468 and parameters: {'embedding_dim': 14, 'projection_dim': 228, 'hidden_dim': 126, 'aux_weight': 0.31037764504720555, 'margin': 0.5817526642641889, 'weight_decay': 2.8851839068274573e-05, 'lr': 0.00249408116493866, 'dropout': 0.22615817477104672}. Best is trial 0 with value: 0.6800724940282228.

Trial 4: 0.6793054106797468


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
`Trainer.fit` stopped: `max_epochs=101` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
`Trainer.fit` stopped: `max_epochs=101` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
`Trainer.fit` stopped: `max_epochs=101` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
`Trainer.fit` stopped: `max_epochs=101` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]


Pairwise ranking NN CV = 0.6787955228493681
[I 2025-02-26 19:03:01,720] Trial 5 finished with value: 0.6787955228493681 and parameters: {'embedding_dim': 13, 'projection_dim': 159, 'hidden_dim': 36, 'aux_weight': 0.319599886562191, 'margin': 0.8379096635940826, 'weight_decay': 8.86392029564351e-06, 'lr': 0.003231973127641365, 'dropout': 0.27567448667095584}. Best is trial 0 with value: 0.6800724940282228.

Trial 5: 0.6787955228493681


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]


Pairwise ranking NN CV = 0.6789060990269473
[I 2025-02-26 19:10:21,936] Trial 6 finished with value: 0.6789060990269473 and parameters: {'embedding_dim': 26, 'projection_dim': 237, 'hidden_dim': 67, 'aux_weight': 0.3635562177512952, 'margin': 0.8903441498913324, 'weight_decay': 3.9441330964928366e-05, 'lr': 0.00020217997868899137, 'dropout': 0.12970407412674878}. Best is trial 0 with value: 0.6800724940282228.

Trial 6: 0.6789060990269473


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
`Trainer.fit` stopped: `max_epochs=101` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
`Trainer.fit` stopped: `max_epochs=101` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
`Trainer.fit` stopped: `max_epochs=101` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]


Pairwise ranking NN CV = 0.678980568902933
[I 2025-02-26 19:18:22,283] Trial 7 finished with value: 0.678980568902933 and parameters: {'embedding_dim': 11, 'projection_dim': 67, 'hidden_dim': 99, 'aux_weight': 0.4074467663169409, 'margin': 0.22018982159102723, 'weight_decay': 0.0008998708087363969, 'lr': 0.0003731822194356349, 'dropout': 0.2520245054721797}. Best is trial 0 with value: 0.6800724940282228.

Trial 7: 0.678980568902933


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
`Trainer.fit` stopped: `max_epochs=101` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
`Trainer.fit` stopped: `max_epochs=101` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]


Pairwise ranking NN CV = 0.6788312321231451
[I 2025-02-26 19:26:05,920] Trial 8 finished with value: 0.6788312321231451 and parameters: {'embedding_dim': 17, 'projection_dim': 218, 'hidden_dim': 54, 'aux_weight': 0.18991817698751579, 'margin': 0.6821247162886557, 'weight_decay': 0.0001557774257257226, 'lr': 0.002734738988830341, 'dropout': 0.2003056312223025}. Best is trial 0 with value: 0.6800724940282228.

Trial 8: 0.6788312321231451


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
`Trainer.fit` stopped: `max_epochs=101` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
`Trainer.fit` stopped: `max_epochs=101` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
`Trainer.fit` stopped: `max_epochs=101` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
`Trainer.fit` stopped: `max_epochs=101` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]


Pairwise ranking NN CV = 0.6808045089032465
[I 2025-02-26 19:34:38,338] Trial 9 finished with value: 0.6808045089032465 and parameters: {'embedding_dim': 13, 'projection_dim': 198, 'hidden_dim': 115, 'aux_weight': 0.4290855914439119, 'margin': 0.8852731846052492, 'weight_decay': 0.00029090127727058067, 'lr': 0.0060136681355904, 'dropout': 0.15291353050658352}. Best is trial 9 with value: 0.6808045089032465.

Trial 9: 0.6808045089032465


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
`Trainer.fit` stopped: `max_epochs=101` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
`Trainer.fit` stopped: `max_epochs=101` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]


Pairwise ranking NN CV = 0.6788171097665676
[I 2025-02-26 19:42:18,416] Trial 10 finished with value: 0.6788171097665676 and parameters: {'embedding_dim': 22, 'projection_dim': 112, 'hidden_dim': 127, 'aux_weight': 0.4921601287320309, 'margin': 0.42539856787780767, 'weight_decay': 1.63809515332366e-06, 'lr': 0.008177169215006233, 'dropout': 0.45378052683237735}. Best is trial 9 with value: 0.6808045089032465.

Trial 10: 0.6788171097665676


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
`Trainer.fit` stopped: `max_epochs=101` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
`Trainer.fit` stopped: `max_epochs=101` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
`Trainer.fit` stopped: `max_epochs=101` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
`Trainer.fit` stopped: `max_epochs=101` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]


Pairwise ranking NN CV = 0.6788282767983042
[I 2025-02-26 19:50:35,981] Trial 11 finished with value: 0.6788282767983042 and parameters: {'embedding_dim': 32, 'projection_dim': 187, 'hidden_dim': 110, 'aux_weight': 0.10365088890897545, 'margin': 0.9604399446124344, 'weight_decay': 0.005997995537046484, 'lr': 0.0006919358236842963, 'dropout': 0.06242235285432793}. Best is trial 9 with value: 0.6808045089032465.

Trial 11: 0.6788282767983042


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
`Trainer.fit` stopped: `max_epochs=101` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
`Trainer.fit` stopped: `max_epochs=101` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
`Trainer.fit` stopped: `max_epochs=101` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
`Trainer.fit` stopped: `max_epochs=101` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]


Pairwise ranking NN CV = 0.6776870189812133
[I 2025-02-26 19:58:53,556] Trial 12 finished with value: 0.6776870189812133 and parameters: {'embedding_dim': 32, 'projection_dim': 255, 'hidden_dim': 110, 'aux_weight': 0.2416444297874673, 'margin': 0.7562774717181354, 'weight_decay': 0.0012736898499406321, 'lr': 0.007494562144012775, 'dropout': 0.11634853400492429}. Best is trial 9 with value: 0.6808045089032465.

Trial 12: 0.6776870189812133


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
`Trainer.fit` stopped: `max_epochs=101` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
`Trainer.fit` stopped: `max_epochs=101` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]


Pairwise ranking NN CV = 0.6805045234157342
[I 2025-02-26 20:06:42,022] Trial 13 finished with value: 0.6805045234157342 and parameters: {'embedding_dim': 25, 'projection_dim': 200, 'hidden_dim': 109, 'aux_weight': 0.2508786425704015, 'margin': 0.7286449119683158, 'weight_decay': 0.000215335025044513, 'lr': 0.0006553545011199341, 'dropout': 0.12978527572271203}. Best is trial 9 with value: 0.6808045089032465.

Trial 13: 0.6805045234157342


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
`Trainer.fit` stopped: `max_epochs=101` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]


Pairwise ranking NN CV = 0.6779287732382342
[I 2025-02-26 20:14:40,035] Trial 14 finished with value: 0.6779287732382342 and parameters: {'embedding_dim': 21, 'projection_dim': 196, 'hidden_dim': 117, 'aux_weight': 0.24336496857498324, 'margin': 0.6986161958310311, 'weight_decay': 0.00018867226039255368, 'lr': 0.0009290109113197278, 'dropout': 0.33051904718074876}. Best is trial 9 with value: 0.6808045089032465.

Trial 14: 0.6779287732382342


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
`Trainer.fit` stopped: `max_epochs=101` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
`Trainer.fit` stopped: `max_epochs=101` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]


Pairwise ranking NN CV = 0.6811979879773888
[I 2025-02-26 20:22:43,990] Trial 15 finished with value: 0.6811979879773888 and parameters: {'embedding_dim': 25, 'projection_dim': 206, 'hidden_dim': 83, 'aux_weight': 0.4978234572218914, 'margin': 0.7890235475620722, 'weight_decay': 0.00034140596768033444, 'lr': 0.0005283367382461206, 'dropout': 0.17065510518579707}. Best is trial 15 with value: 0.6811979879773888.

Trial 15: 0.6811979879773888


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
`Trainer.fit` stopped: `max_epochs=101` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]


Pairwise ranking NN CV = 0.6792521284193168
[I 2025-02-26 20:30:32,449] Trial 16 finished with value: 0.6792521284193168 and parameters: {'embedding_dim': 17, 'projection_dim': 137, 'hidden_dim': 81, 'aux_weight': 0.4840594456947372, 'margin': 0.8073708397568709, 'weight_decay': 4.5731431969886056e-05, 'lr': 0.001350668515888514, 'dropout': 0.3330903392156518}. Best is trial 15 with value: 0.6811979879773888.

Trial 16: 0.6792521284193168


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
`Trainer.fit` stopped: `max_epochs=101` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
`Trainer.fit` stopped: `max_epochs=101` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
`Trainer.fit` stopped: `max_epochs=101` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]


Pairwise ranking NN CV = 0.6805593979165128
[I 2025-02-26 20:38:43,177] Trial 17 finished with value: 0.6805593979165128 and parameters: {'embedding_dim': 26, 'projection_dim': 206, 'hidden_dim': 61, 'aux_weight': 0.4314171369249499, 'margin': 0.5971506443551877, 'weight_decay': 0.00046122912916684715, 'lr': 0.004466250635601773, 'dropout': 0.1907964238666164}. Best is trial 15 with value: 0.6811979879773888.

Trial 17: 0.6805593979165128


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
`Trainer.fit` stopped: `max_epochs=101` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
`Trainer.fit` stopped: `max_epochs=101` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
`Trainer.fit` stopped: `max_epochs=101` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]


Pairwise ranking NN CV = 0.6800622828992401
[I 2025-02-26 20:47:09,198] Trial 18 finished with value: 0.6800622828992401 and parameters: {'embedding_dim': 18, 'projection_dim': 170, 'hidden_dim': 40, 'aux_weight': 0.45989292461829423, 'margin': 0.34828039416440326, 'weight_decay': 0.002011008012633458, 'lr': 0.0004458881426621057, 'dropout': 0.014471195089843736}. Best is trial 15 with value: 0.6811979879773888.

Trial 18: 0.6800622828992401


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
`Trainer.fit` stopped: `max_epochs=101` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
`Trainer.fit` stopped: `max_epochs=101` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]


Pairwise ranking NN CV = 0.6816997272770401
[I 2025-02-26 20:54:59,490] Trial 19 finished with value: 0.6816997272770401 and parameters: {'embedding_dim': 8, 'projection_dim': 107, 'hidden_dim': 89, 'aux_weight': 0.3907146473659543, 'margin': 0.9819812451631065, 'weight_decay': 0.00035651832909954314, 'lr': 0.0046512245656074366, 'dropout': 0.1657096285613812}. Best is trial 19 with value: 0.6816997272770401.

Trial 19: 0.6816997272770401


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
`Trainer.fit` stopped: `max_epochs=101` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
`Trainer.fit` stopped: `max_epochs=101` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]


Pairwise ranking NN CV = 0.6791234534935909
[I 2025-02-26 21:02:48,711] Trial 20 finished with value: 0.6791234534935909 and parameters: {'embedding_dim': 8, 'projection_dim': 92, 'hidden_dim': 77, 'aux_weight': 0.3868838261894879, 'margin': 0.9891718174736244, 'weight_decay': 7.706931887542731e-05, 'lr': 0.00011996077287696694, 'dropout': 0.30041332413755273}. Best is trial 19 with value: 0.6816997272770401.

Trial 20: 0.6791234534935909


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
`Trainer.fit` stopped: `max_epochs=101` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]


Pairwise ranking NN CV = 0.6818122984239037
[I 2025-02-26 21:09:57,546] Trial 21 finished with value: 0.6818122984239037 and parameters: {'embedding_dim': 8, 'projection_dim': 129, 'hidden_dim': 92, 'aux_weight': 0.4095237944795065, 'margin': 0.8532839214126697, 'weight_decay': 0.0003721117926250429, 'lr': 0.005048346217566541, 'dropout': 0.16621343699947616}. Best is trial 21 with value: 0.6818122984239037.

Trial 21: 0.6818122984239037


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
`Trainer.fit` stopped: `max_epochs=101` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]


Pairwise ranking NN CV = 0.6806754171692383
[I 2025-02-26 21:17:29,898] Trial 22 finished with value: 0.6806754171692383 and parameters: {'embedding_dim': 8, 'projection_dim': 115, 'hidden_dim': 90, 'aux_weight': 0.3862279168953995, 'margin': 0.8163260241317305, 'weight_decay': 0.0019893328353369154, 'lr': 0.0046577173585763895, 'dropout': 0.08178906096375926}. Best is trial 21 with value: 0.6818122984239037.

Trial 22: 0.6806754171692383


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]


Pairwise ranking NN CV = 0.6820244091207405
[I 2025-02-26 21:24:23,482] Trial 23 finished with value: 0.6820244091207405 and parameters: {'embedding_dim': 23, 'projection_dim': 118, 'hidden_dim': 78, 'aux_weight': 0.4641625901175085, 'margin': 0.6260627526465252, 'weight_decay': 0.00010135585004468713, 'lr': 0.0015653665132187551, 'dropout': 0.17485849482784396}. Best is trial 23 with value: 0.6820244091207405.

Trial 23: 0.6820244091207405


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
`Trainer.fit` stopped: `max_epochs=101` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]


Pairwise ranking NN CV = 0.6795683442188706
[I 2025-02-26 21:31:38,114] Trial 24 finished with value: 0.6795683442188706 and parameters: {'embedding_dim': 23, 'projection_dim': 107, 'hidden_dim': 73, 'aux_weight': 0.3372957168660773, 'margin': 0.6346699462318074, 'weight_decay': 0.00011939701399957672, 'lr': 0.001544242530239019, 'dropout': 0.07086397067775192}. Best is trial 23 with value: 0.6820244091207405.

Trial 24: 0.6795683442188706


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
`Trainer.fit` stopped: `max_epochs=101` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]


Pairwise ranking NN CV = 0.6806474204538481
[I 2025-02-26 21:39:02,921] Trial 25 finished with value: 0.6806474204538481 and parameters: {'embedding_dim': 20, 'projection_dim': 133, 'hidden_dim': 98, 'aux_weight': 0.4573176762958452, 'margin': 0.4945620190463286, 'weight_decay': 1.5966058016800212e-05, 'lr': 0.009878871531586743, 'dropout': 0.16512406826864715}. Best is trial 23 with value: 0.6820244091207405.

Trial 25: 0.6806474204538481


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]


Pairwise ranking NN CV = 0.6791245835065122
[I 2025-02-26 21:46:12,923] Trial 26 finished with value: 0.6791245835065122 and parameters: {'embedding_dim': 28, 'projection_dim': 82, 'hidden_dim': 91, 'aux_weight': 0.40118287926168994, 'margin': 0.2982901744073447, 'weight_decay': 8.355018596905676e-05, 'lr': 0.004208085688901418, 'dropout': 0.22824195135963268}. Best is trial 23 with value: 0.6820244091207405.

Trial 26: 0.6791245835065122


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
`Trainer.fit` stopped: `max_epochs=101` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]


Pairwise ranking NN CV = 0.6792202340777307
[I 2025-02-26 21:54:00,293] Trial 27 finished with value: 0.6792202340777307 and parameters: {'embedding_dim': 9, 'projection_dim': 125, 'hidden_dim': 51, 'aux_weight': 0.2719007655715617, 'margin': 0.9174556795024477, 'weight_decay': 0.0006690796887720597, 'lr': 0.002029346970991433, 'dropout': 0.2594541447722841}. Best is trial 23 with value: 0.6820244091207405.

Trial 27: 0.6792202340777307


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
`Trainer.fit` stopped: `max_epochs=101` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
`Trainer.fit` stopped: `max_epochs=101` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]


Pairwise ranking NN CV = 0.6794035462376096
[I 2025-02-26 22:02:03,100] Trial 28 finished with value: 0.6794035462376096 and parameters: {'embedding_dim': 18, 'projection_dim': 95, 'hidden_dim': 72, 'aux_weight': 0.4591637583102422, 'margin': 0.11611675630614082, 'weight_decay': 6.949866133631621e-05, 'lr': 0.003394061217180032, 'dropout': 0.17444734293525901}. Best is trial 23 with value: 0.6820244091207405.

Trial 28: 0.6794035462376096


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
`Trainer.fit` stopped: `max_epochs=101` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
`Trainer.fit` stopped: `max_epochs=101` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
`Trainer.fit` stopped: `max_epochs=101` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]


Pairwise ranking NN CV = 0.6799252075074557
[I 2025-02-26 22:09:58,145] Trial 29 finished with value: 0.6799252075074557 and parameters: {'embedding_dim': 15, 'projection_dim': 151, 'hidden_dim': 102, 'aux_weight': 0.37349418171073334, 'margin': 0.963974714178753, 'weight_decay': 0.0024172455681115623, 'lr': 0.0012803334779399344, 'dropout': 0.10250735408236412}. Best is trial 23 with value: 0.6820244091207405.

Trial 29: 0.6799252075074557


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
`Trainer.fit` stopped: `max_epochs=101` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
`Trainer.fit` stopped: `max_epochs=101` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]


Pairwise ranking NN CV = 0.6786431962636943
[I 2025-02-26 22:18:13,558] Trial 30 finished with value: 0.6786431962636943 and parameters: {'embedding_dim': 29, 'projection_dim': 124, 'hidden_dim': 85, 'aux_weight': 0.40897299515865104, 'margin': 0.6563106574158867, 'weight_decay': 1.7708330711900645e-05, 'lr': 0.005458459544047035, 'dropout': 0.05401342423850615}. Best is trial 23 with value: 0.6820244091207405.

Trial 30: 0.6786431962636943


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
`Trainer.fit` stopped: `max_epochs=101` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
`Trainer.fit` stopped: `max_epochs=101` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
`Trainer.fit` stopped: `max_epochs=101` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]


Pairwise ranking NN CV = 0.680651313441425
[I 2025-02-26 22:26:11,306] Trial 31 finished with value: 0.680651313441425 and parameters: {'embedding_dim': 24, 'projection_dim': 92, 'hidden_dim': 81, 'aux_weight': 0.4862964045318025, 'margin': 0.764032757211492, 'weight_decay': 0.0003379121251720542, 'lr': 0.0009018974055440278, 'dropout': 0.1451091106410867}. Best is trial 23 with value: 0.6820244091207405.

Trial 31: 0.680651313441425


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
`Trainer.fit` stopped: `max_epochs=101` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]


Pairwise ranking NN CV = 0.6800872327898841
[I 2025-02-26 22:33:38,465] Trial 32 finished with value: 0.6800872327898841 and parameters: {'embedding_dim': 27, 'projection_dim': 148, 'hidden_dim': 94, 'aux_weight': 0.49766587645589927, 'margin': 0.8448695435072816, 'weight_decay': 0.00039605517924119207, 'lr': 0.0005220291010234726, 'dropout': 0.18248290139200732}. Best is trial 23 with value: 0.6820244091207405.

Trial 32: 0.6800872327898841


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
`Trainer.fit` stopped: `max_epochs=101` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
`Trainer.fit` stopped: `max_epochs=101` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
`Trainer.fit` stopped: `max_epochs=101` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]


Pairwise ranking NN CV = 0.6804709274392098
[I 2025-02-26 22:41:42,380] Trial 33 finished with value: 0.6804709274392098 and parameters: {'embedding_dim': 20, 'projection_dim': 105, 'hidden_dim': 84, 'aux_weight': 0.44346954828417223, 'margin': 0.8033763722205869, 'weight_decay': 0.0011072033383749326, 'lr': 0.00033678023673380005, 'dropout': 0.0921002218191494}. Best is trial 23 with value: 0.6820244091207405.

Trial 33: 0.6804709274392098


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
`Trainer.fit` stopped: `max_epochs=101` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
`Trainer.fit` stopped: `max_epochs=101` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
`Trainer.fit` stopped: `max_epochs=101` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
`Trainer.fit` stopped: `max_epochs=101` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
`Trainer.fit` stopped: `max_epochs=101` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]


Pairwise ranking NN CV = 0.6807748917048189
[I 2025-02-26 22:50:20,503] Trial 34 finished with value: 0.6807748917048189 and parameters: {'embedding_dim': 11, 'projection_dim': 124, 'hidden_dim': 63, 'aux_weight': 0.42276992681212094, 'margin': 0.5381410963906207, 'weight_decay': 0.0006377322240224892, 'lr': 0.002037888033261566, 'dropout': 0.21611484784593177}. Best is trial 23 with value: 0.6820244091207405.

Trial 34: 0.6807748917048189


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
`Trainer.fit` stopped: `max_epochs=101` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
`Trainer.fit` stopped: `max_epochs=101` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
`Trainer.fit` stopped: `max_epochs=101` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]


Pairwise ranking NN CV = 0.6809859640794077
[I 2025-02-26 22:58:36,635] Trial 35 finished with value: 0.6809859640794077 and parameters: {'embedding_dim': 23, 'projection_dim': 72, 'hidden_dim': 76, 'aux_weight': 0.4703926588605066, 'margin': 0.9292939403071698, 'weight_decay': 0.00024566568765422545, 'lr': 0.00027747034651176246, 'dropout': 0.15856793945592892}. Best is trial 23 with value: 0.6820244091207405.

Trial 35: 0.6809859640794077


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
`Trainer.fit` stopped: `max_epochs=101` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
`Trainer.fit` stopped: `max_epochs=101` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]


Pairwise ranking NN CV = 0.6802703803975462
[I 2025-02-26 23:06:20,510] Trial 36 finished with value: 0.6802703803975462 and parameters: {'embedding_dim': 10, 'projection_dim': 167, 'hidden_dim': 89, 'aux_weight': 0.3443824887683466, 'margin': 0.9970171902599937, 'weight_decay': 0.00013713272424810145, 'lr': 0.006586301856236868, 'dropout': 0.29103813594287475}. Best is trial 23 with value: 0.6820244091207405.

Trial 36: 0.6802703803975462


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
`Trainer.fit` stopped: `max_epochs=101` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]


Pairwise ranking NN CV = 0.6812687404473781
[I 2025-02-26 23:14:12,782] Trial 37 finished with value: 0.6812687404473781 and parameters: {'embedding_dim': 30, 'projection_dim': 181, 'hidden_dim': 94, 'aux_weight': 0.28524021387388265, 'margin': 0.8492838639862331, 'weight_decay': 0.0005037664508981855, 'lr': 0.002917940383016524, 'dropout': 0.23451703245537156}. Best is trial 23 with value: 0.6820244091207405.

Trial 37: 0.6812687404473781


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
`Trainer.fit` stopped: `max_epochs=101` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]


Pairwise ranking NN CV = 0.6800782653361446
[I 2025-02-26 23:22:10,691] Trial 38 finished with value: 0.6800782653361446 and parameters: {'embedding_dim': 30, 'projection_dim': 152, 'hidden_dim': 105, 'aux_weight': 0.2893947808513633, 'margin': 0.8745698609810394, 'weight_decay': 4.8669213649108236e-05, 'lr': 0.0032680497990978125, 'dropout': 0.23923359103045627}. Best is trial 23 with value: 0.6820244091207405.

Trial 38: 0.6800782653361446


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
`Trainer.fit` stopped: `max_epochs=101` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]


Pairwise ranking NN CV = 0.6817841235297588
[I 2025-02-26 23:29:52,312] Trial 39 finished with value: 0.6817841235297588 and parameters: {'embedding_dim': 12, 'projection_dim': 135, 'hidden_dim': 95, 'aux_weight': 0.3190744138687067, 'margin': 0.8598081395110753, 'weight_decay': 0.0034744550765317367, 'lr': 0.0024656582315228143, 'dropout': 0.3861699100193599}. Best is trial 23 with value: 0.6820244091207405.

Trial 39: 0.6817841235297588


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
`Trainer.fit` stopped: `max_epochs=101` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
`Trainer.fit` stopped: `max_epochs=101` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]


Pairwise ranking NN CV = 0.6782121755491153
[I 2025-02-26 23:38:02,223] Trial 40 finished with value: 0.6782121755491153 and parameters: {'embedding_dim': 12, 'projection_dim': 137, 'hidden_dim': 101, 'aux_weight': 0.31580131242491716, 'margin': 0.9315726203080299, 'weight_decay': 0.008802080276238015, 'lr': 0.002326241865969883, 'dropout': 0.3956861211625785}. Best is trial 23 with value: 0.6820244091207405.

Trial 40: 0.6782121755491153


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
`Trainer.fit` stopped: `max_epochs=101` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]


Pairwise ranking NN CV = 0.6799447133386853
[I 2025-02-26 23:45:48,054] Trial 41 finished with value: 0.6799447133386853 and parameters: {'embedding_dim': 9, 'projection_dim': 177, 'hidden_dim': 94, 'aux_weight': 0.3286932390734366, 'margin': 0.8736837733688049, 'weight_decay': 0.0041604346319168125, 'lr': 0.002803207897400107, 'dropout': 0.47180595836383726}. Best is trial 23 with value: 0.6820244091207405.

Trial 41: 0.6799447133386853


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
`Trainer.fit` stopped: `max_epochs=101` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
`Trainer.fit` stopped: `max_epochs=101` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]


Pairwise ranking NN CV = 0.6796632090803645
[I 2025-02-26 23:53:52,038] Trial 42 finished with value: 0.6796632090803645 and parameters: {'embedding_dim': 15, 'projection_dim': 160, 'hidden_dim': 96, 'aux_weight': 0.2943778913491925, 'margin': 0.8423756124612997, 'weight_decay': 0.003428861943223065, 'lr': 0.0016104072780315423, 'dropout': 0.35683075093765193}. Best is trial 23 with value: 0.6820244091207405.

Trial 42: 0.6796632090803645


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
`Trainer.fit` stopped: `max_epochs=101` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
`Trainer.fit` stopped: `max_epochs=101` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]


Pairwise ranking NN CV = 0.6803338302096832
[I 2025-02-27 00:02:05,843] Trial 43 finished with value: 0.6803338302096832 and parameters: {'embedding_dim': 13, 'projection_dim': 102, 'hidden_dim': 90, 'aux_weight': 0.3625128771975467, 'margin': 0.7234823646377225, 'weight_decay': 0.0008618357830522933, 'lr': 0.0039528053416944195, 'dropout': 0.2021132188735787}. Best is trial 23 with value: 0.6820244091207405.

Trial 43: 0.6803338302096832


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]


Pairwise ranking NN CV = 0.6786416619698328
[I 2025-02-27 00:09:21,960] Trial 44 finished with value: 0.6786416619698328 and parameters: {'embedding_dim': 10, 'projection_dim': 114, 'hidden_dim': 87, 'aux_weight': 0.22392091180746468, 'margin': 0.95536532015765, 'weight_decay': 0.0013182573012286267, 'lr': 0.005367627926694362, 'dropout': 0.2378188072993046}. Best is trial 23 with value: 0.6820244091207405.

Trial 44: 0.6786416619698328


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
`Trainer.fit` stopped: `max_epochs=101` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
`Trainer.fit` stopped: `max_epochs=101` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]


Pairwise ranking NN CV = 0.6802097342767687
[I 2025-02-27 00:17:12,358] Trial 45 finished with value: 0.6802097342767687 and parameters: {'embedding_dim': 8, 'projection_dim': 130, 'hidden_dim': 68, 'aux_weight': 0.26644481753814253, 'margin': 0.8949338209743152, 'weight_decay': 2.7461358042496947e-05, 'lr': 0.002959123984139793, 'dropout': 0.2755847220477451}. Best is trial 23 with value: 0.6820244091207405.

Trial 45: 0.6802097342767687


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
`Trainer.fit` stopped: `max_epochs=101` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
`Trainer.fit` stopped: `max_epochs=101` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
`Trainer.fit` stopped: `max_epochs=101` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
`Trainer.fit` stopped: `max_epochs=101` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]


Pairwise ranking NN CV = 0.6804503800155126
[I 2025-02-27 00:25:47,370] Trial 46 finished with value: 0.6804503800155126 and parameters: {'embedding_dim': 12, 'projection_dim': 144, 'hidden_dim': 120, 'aux_weight': 0.21361287800559542, 'margin': 0.4402107830003529, 'weight_decay': 4.054117208169494e-06, 'lr': 0.0023554940499411667, 'dropout': 0.13252871265173866}. Best is trial 23 with value: 0.6820244091207405.

Trial 46: 0.6804503800155126


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
`Trainer.fit` stopped: `max_epochs=101` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
`Trainer.fit` stopped: `max_epochs=101` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
`Trainer.fit` stopped: `max_epochs=101` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
`Trainer.fit` stopped: `max_epochs=101` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]


Pairwise ranking NN CV = 0.6790533084603543
[I 2025-02-27 00:34:21,428] Trial 47 finished with value: 0.6790533084603543 and parameters: {'embedding_dim': 31, 'projection_dim': 182, 'hidden_dim': 104, 'aux_weight': 0.14426067175796767, 'margin': 0.7562881497747161, 'weight_decay': 0.0004913075487814468, 'lr': 0.0012252585405370755, 'dropout': 0.25695246720933707}. Best is trial 23 with value: 0.6820244091207405.

Trial 47: 0.6790533084603543


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]


Pairwise ranking NN CV = 0.6788902167468903
[I 2025-02-27 00:41:34,121] Trial 48 finished with value: 0.6788902167468903 and parameters: {'embedding_dim': 16, 'projection_dim': 117, 'hidden_dim': 77, 'aux_weight': 0.4137741483326797, 'margin': 0.846186307049143, 'weight_decay': 0.00016853920882978796, 'lr': 0.0035504392111308223, 'dropout': 0.42699918454352775}. Best is trial 23 with value: 0.6820244091207405.

Trial 48: 0.6788902167468903


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | NN         | 159 K  | train
1 | aux_cls | Sequential | 1.6 K  | train
-----------------------------------------------
160 K     Trainable params
769       Non-trainable params
160 K     Total params
0.644     Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Swapping scheduler `CosineAnnealingLR` for `SWALR`
`Trainer.fit` stopped: `max_epochs=101` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]


Pairwise ranking NN CV = 0.678968439470664
[I 2025-02-27 00:49:15,041] Trial 49 finished with value: 0.678968439470664 and parameters: {'embedding_dim': 9, 'projection_dim': 157, 'hidden_dim': 94, 'aux_weight': 0.35169402928985105, 'margin': 0.6040526473187442, 'weight_decay': 0.00010661818166918106, 'lr': 0.007900703345730654, 'dropout': 0.21117476472323005}. Best is trial 23 with value: 0.6820244091207405.

Trial 49: 0.678968439470664
Best parameters: {'embedding_dim': 23, 'projection_dim': 118, 'hidden_dim': 78, 'aux_weight': 0.4641625901175085, 'margin': 0.6260627526465252, 'weight_decay': 0.00010135585004468713, 'lr': 0.0015653665132187551, 'dropout': 0.17485849482784396}
Best stratified c-index: 0.6820244091207405


In [15]:
print(f"Best stratified c-index: {study.best_value}")

Best stratified c-index: 0.6820244091207405


In [38]:
sub_data = pd.read_csv("./sample_submission.csv")
sub_data['prediction'] = pairwise_ranking_pred
sub_data.to_csv('submission.csv', index=False)
sub_data

Unnamed: 0,ID,prediction
0,28800,-2.241742
1,28801,0.139749
2,28802,-3.001781
