In [1]:
!pip install scanpy
!pip install leidenalg
!pip install optuna

Collecting scanpy
  Obtaining dependency information for scanpy from https://files.pythonhosted.org/packages/3b/72/436046ca332b933ca7d09cd45b86154232203e068e8307a102d5349e9444/scanpy-1.9.8-py3-none-any.whl.metadata
  Downloading scanpy-1.9.8-py3-none-any.whl.metadata (6.0 kB)
Collecting anndata>=0.7.4 (from scanpy)
  Obtaining dependency information for anndata>=0.7.4 from https://files.pythonhosted.org/packages/42/c7/23c2b5d3dd764650d235b574fdcac8d7252e6ae2f90273f65e95b9ca4e34/anndata-0.10.5.post1-py3-none-any.whl.metadata
  Downloading anndata-0.10.5.post1-py3-none-any.whl.metadata (6.5 kB)
Collecting natsort (from scanpy)
  Obtaining dependency information for natsort from https://files.pythonhosted.org/packages/ef/82/7a9d0550484a62c6da82858ee9419f3dd1ccc9aa1c26a1e43da3ecd20b0d/natsort-8.4.0-py3-none-any.whl.metadata
  Downloading natsort-8.4.0-py3-none-any.whl.metadata (21 kB)
Collecting seaborn>=0.13.0 (from scanpy)
  Obtaining dependency information for seaborn>=0.13.0 from https

In [2]:
import numpy as np
import pandas as pd
import scanpy as sc
import os
import random
import pickle
import sklearn
from scipy.spatial.distance import cdist
from scipy.spatial.distance import pdist, squareform
from typing import List
## Imports for plotting
import matplotlib.pyplot as plt
import seaborn as sns

## PyTorch
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset
import torch.optim as optim

import pytorch_lightning as pl
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from collections import defaultdict
import optuna
import gc

In [None]:
import warnings
from sklearn.exceptions import DataConversionWarning

# Suppress DataConversionWarning
warnings.filterwarnings(action='ignore', category=DataConversionWarning)
warnings.simplefilter(action='ignore', category=FutureWarning)

In [3]:
sc.settings.verbosity = 3             # verbosity: errors (0), warnings (1), info (2), hints (3)
sc.logging.print_header()
sc.settings.set_figure_params(dpi=80, facecolor='white')
SEED = 111
random.seed(SEED)
np.random.seed(SEED)

scanpy==1.9.8 anndata==0.10.5.post1 umap==0.5.5 numpy==1.24.3 scipy==1.11.4 pandas==2.0.3 scikit-learn==1.2.2 statsmodels==0.14.0 igraph==0.11.3 pynndescent==0.5.11


In [None]:
#dataset='Stephenson'
dataset='COMBAT'

## Data preprocessing

In [4]:
if dataset=='Stephenson':
    # Stephenson Dataset 
    adata = sc.read_h5ad("/kaggle/input/stephenson-et-al-covid-19-single-cell-dataset/Stephenson_processed.h5ad")
    donors = adata.obs['sample_id'].unique()

    # Splitting data into train-test-validation (cant perform cross-validation!)
    test_set= adata[adata.obs['Site']=='Sanger']
    # train_val_set= adata[~(adata.obs['Site']=='Cambridge')]
    train_set= adata[adata.obs['Site']=='Ncl']
    validation_set= adata[adata.obs['Site']=='Cambridge']
    test_patients_ids=test_set.obs['sample_id'].unique()
    layers = ['X_pca', 'X_scVI', 'X_scANVI']
else: 
    # COMBAT dataset 
    
    adata = sc.read_h5ad("/kaggle/input/a-blood-atlas-of-covid-19-combat-preprocessed/combat_processed.h5ad")
    donors = adata.obs['scRNASeq_sample_ID'].unique()

    # Splitting data into test and remaining data for cross-validation
    train_val_set_ids, test_ids = train_test_split(
        adata.obs['Pool_ID'].unique(), 
        test_size=0.2,  # 20% of data for testing
        random_state=42  # For reproducibility
    )

    train_val_set = adata[adata.obs['Pool_ID'].isin(train_val_set_ids)]
    test_set = adata[adata.obs['Pool_ID'].isin(test_ids)]
    test_patients_ids=test_set.obs['scRNASeq_sample_ID'].unique()
    train_val_unique_pool_ids = train_val_set.obs['Pool_ID'].unique()
    layers = ['X_pca', 'X_scVI_Pool_ID', 'X_scANVI_Pool_ID']

AnnData object with n_obs × n_vars = 783704 × 3000
    obs: 'Annotation_cluster_id', 'Annotation_cluster_name', 'Annotation_minor_subset', 'Annotation_major_subset', 'Annotation_cell_type', 'GEX_region', 'QC_ngenes', 'QC_total_UMI', 'QC_pct_mitochondrial', 'QC_scrub_doublet_scores', 'TCR_chain_composition', 'TCR_clone_ID', 'TCR_clone_count', 'TCR_clone_proportion', 'TCR_contains_unproductive', 'TCR_doublet', 'TCR_chain_TRA', 'TCR_v_gene_TRA', 'TCR_d_gene_TRA', 'TCR_j_gene_TRA', 'TCR_c_gene_TRA', 'TCR_productive_TRA', 'TCR_cdr3_TRA', 'TCR_umis_TRA', 'TCR_chain_TRA2', 'TCR_v_gene_TRA2', 'TCR_d_gene_TRA2', 'TCR_j_gene_TRA2', 'TCR_c_gene_TRA2', 'TCR_productive_TRA2', 'TCR_cdr3_TRA2', 'TCR_umis_TRA2', 'TCR_chain_TRB', 'TCR_v_gene_TRB', 'TCR_d_gene_TRB', 'TCR_j_gene_TRB', 'TCR_c_gene_TRB', 'TCR_productive_TRB', 'TCR_chain_TRB2', 'TCR_v_gene_TRB2', 'TCR_d_gene_TRB2', 'TCR_j_gene_TRB2', 'TCR_c_gene_TRB2', 'TCR_productive_TRB2', 'TCR_cdr3_TRB2', 'TCR_umis_TRB2', 'BCR_umis_HC', 'BCR_contig_qc_HC

In [5]:
# nr of patients in the dataset
len(donors)

140

## Contrastive learning

In this part of the notebook, we build a neutron network by adjusting the hyperparameters such as depth (adjustable by **num_layers_values**), number of neurons in a layer (**hidden_size_values**), match size (**batch_sizes**). We also select a different number of cells for each patient to build the pseudo-bulk, which is also an adjustable parameter.

In [6]:
# In this notebook, we use data loaders with heavier computational processing. It is recommended to use as many
# workers as possible in a data loader, which corresponds to the number of CPU cores
NUM_WORKERS = os.cpu_count()
  
# Setting the seed
pl.seed_everything(42)

# Ensure that all operations are deterministic on GPU (if used) for reproducibility
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu")
print("Device:", device)
print("Number of workers:", NUM_WORKERS)

Device: cpu
Number of workers: 4


In [11]:
from scipy.sparse import issparse
class SamplesDataset(Dataset):
    def __init__(self, data, unique_categories: List[str], sample_col: str, layer: str):
        """
        A dataset that holds sample cells based on unique categories.

        Args:
            data (YourDataType): The dataset containing the data.
            unique_categories (List[str]): List of unique category values.
            sample_col (str): The column indicating sample information.
            layer (str, optional): The data layer to be used. Defaults to "X_pca".
        """
        self.data = data
        self.unique_categories = unique_categories
        self.sample_cells = []
        self.sample_col = sample_col
        self.layer = layer
        
        for sample_id in self.unique_categories:
            sample = self.data.obsm[layer][self.data.obs[sample_col] == sample_id]
            # Convert sparse matrix to dense, if necessary
            if issparse(sample):
                sample = sample.toarray()  # Convert sparse to dense
            if len(sample) > 0:
                self.sample_cells.append(sample)

    def __len__(self):
        return len(self.sample_cells)

    def __getitem__(self, idx):
        return self.sample_cells[idx]
    

class TransformedPairDataset(Dataset):
    """Create two different subsamples based on one sample from the original dataset."""

    def __init__(self, dataset: Dataset, subset_size: int = 1000, validation = False):
        """
        Initialize a dataset with transformed pairs.
        Args:
            dataset (Dataset): The input dataset.
            subset_size (int, optional): Size of the transformed subset. Defaults to 1000.
        """
        self.dataset = dataset  # The input dataset
        self.subset_size = subset_size  # Size of the transformed subset
        self.validation = validation

    def __len__(self):
        """
        Get the length of the dataset.
        Returns:
            int: The length of the dataset.
        """
        return len(self.dataset)

    def __getitem__(self, idx):
        """
        Get a pair of transformed subsamples based on the original sample.
        Args:
            idx (int): Index of the original sample.
        Returns:
            tuple: A tuple containing two transformed subsamples.
        """
        sample = self.dataset[idx]  # Get the original sample
        
        ##########################
        # Randomly select indices for the transformed subsamples
        if self.validation:
            actual_subset_size = min(sample.shape[0], self.subset_size)
            random_indices_1 = np.random.choice(sample.shape[0], size=actual_subset_size)
            random_indices_2 = np.random.choice(sample.shape[0], size=actual_subset_size)
        else:
            random_indices_1 = np.random.choice(sample.shape[0], size=self.subset_size)
            random_indices_2 = np.random.choice(sample.shape[0], size=self.subset_size)
        ##########################
        
        # Compute the mean of the selected indices to create transformed subsamples
        random_sample_1 = sample[random_indices_1].mean(axis=0)
        random_sample_2 = sample[random_indices_2].mean(axis=0)
        
        return random_sample_1, random_sample_2


class DynamicNetwork(nn.Module):
    def __init__(self, n_input_features: int, n_output_features: int, num_layers: int, hidden_size: int):
        """
        Initialize a dynamic neural network.

        Args:
            n_input_features (int): Number of input features.
            n_output_features (int): Number of output features.
            num_layers (int): Number of hidden layers.
            hidden_size (int): Size of the hidden layers.
        """
        super().__init__()

        layers = []
        for _ in range(num_layers):
            
            # Add a linear layer followed by a ReLU activation
            layers.extend([
                nn.Linear(n_input_features, hidden_size),
                nn.ReLU()
            ])
            n_input_features = hidden_size  # Update the number of input features for the next layer

        # Add the final linear layer
        layers.append(nn.Linear(hidden_size, n_output_features))

        # Create a sequential module with the defined layers
        self.fc = nn.Sequential(*layers) # скелет

    def forward(self, x):
        """
        Forward pass through the neural network.
        Args:
            x (tensor): Input data.
        Returns:
            tensor: Output of the network.
        """
        return self.fc(x)
    
# # network with dropout
# class DynamicNetwork(nn.Module):
#     def __init__(self, n_input_features: int, n_output_features: int, num_layers: int, hidden_size: int):
#         """
#         Initialize a dynamic neural network.

#         Args:
#             n_input_features (int): Number of input features.
#             n_output_features (int): Number of output features.
#             num_layers (int): Number of hidden layers.
#             hidden_size (int): Size of the hidden layers.
#         """
#         super().__init__()

#         layers = []
#         for _ in range(num_layers):
#             # Add a linear layer followed by a ReLU activation and then Dropout
#             layers.extend([
#                 nn.Linear(n_input_features, hidden_size),
#                 nn.BatchNorm1d(hidden_size),
#                 nn.ReLU(),
#                 nn.Dropout(0.1)  # Dropout layer with a dropout probability of 0.2
#             ])
#             n_input_features = hidden_size  # Update the number of input features for the next layer

#         # Add the final linear layer without dropout right before output
#         layers.append(nn.Linear(hidden_size, n_output_features))

#         # Create a sequential module with the defined layers
#         self.fc = nn.Sequential(*layers)

#     def forward(self, x):
#         """
#         Forward pass through the neural network.
#         Args:
#             x (tensor): Input data.
#         Returns:
#             tensor: Output of the network.
#         """
#         return self.fc(x)

class InfoNCECauchy(nn.Module):
    def __init__(self, temperature):
        super().__init__()
        self.temperature = temperature

    def forward(self, features):
        
        # Calculate the batch size by dividing the total feature size by 2
        batch_size = features.size(0) // 2
        
        # Split the input features into two parts: a and b
        a = features[:batch_size]
        b = features[batch_size:]
        
        # Compute the pairwise similarities for same-domain (a-a) and different-domain (a-b, b-b) pairs
        sim_aa = 1 / (torch.cdist(a, a) * self.temperature).square().add(1)
        sim_bb = 1 / (torch.cdist(b, b) * self.temperature).square().add(1)
        sim_ab = 1 / (torch.cdist(a, b) * self.temperature).square().add(1)

        # Calculate the tempered alignment term using the logarithm of the diagonal of sim_ab
        
        tempered_alignment = torch.diag(sim_ab).log_().mean()
        # tempered_alignment = torch.diagonal_copy(sim_ab).log_().mean()

        # Exclude self inner product by masking the diagonal elements
        self_mask = torch.eye(batch_size, dtype=bool, device=sim_aa.device)
        sim_aa.masked_fill_(self_mask, 0.0)
        sim_bb.masked_fill_(self_mask, 0.0)
        
        # Calculate the logsumexp terms for both combinations of similarities
        logsumexp_1 = torch.hstack((sim_ab.T, sim_bb)).sum(1).log_().mean()
        logsumexp_2 = torch.hstack((sim_aa, sim_ab)).sum(1).log_().mean()
        
        # Compute the raw uniformity as the sum of logsumexp terms
        raw_uniformity = logsumexp_1 + logsumexp_2
        
        # Calculate the final loss as the negative difference between tempered alignment and raw uniformity
        loss = -(tempered_alignment - raw_uniformity / 2)
        return loss

In [15]:
!mkdir Models
!mkdir Representations

In [16]:
model_folder = 'Models/'
representation_folder = 'Representations/'

In [18]:
# optuna objective for COMBAT dataset
k_folds = 5
kf = KFold(n_splits=k_folds, shuffle=True, random_state=42)
num_epochs = 1000
def objective(trial):
    # Sample hyperparameters
    learning_rate = trial.suggest_float("learning_rate", 1e-4, 1e-1, log=True)
    weight_decay = trial.suggest_float("weight_decay",1e-7,1e-4 ,log=True)
    temperature=trial.suggest_float("temperature",0.01,1)
    batch_size = trial.suggest_int("batch_size", 4,16)
    num_layers = trial.suggest_int("num_layers", 1,4)
    hidden_size = trial.suggest_int("hidden_size", 30,400)
    cell_subset_size = trial.suggest_int("cell_subset_size", 100,6000)
    
    
    # Early stopping parameters
    patience = 10
    best_loss = np.inf
    epochs_no_improve = 0
    
    fold_validation_losses = []

    # Training loop
    for fold, (train_idx, val_idx) in enumerate(kf.split(train_val_unique_pool_ids)):
        train_pool_ids = train_val_unique_pool_ids[train_idx]
        val_pool_ids = train_val_unique_pool_ids[val_idx]
        train_set = train_val_set[train_val_set.obs['Pool_ID'].isin(train_pool_ids)]
        validation_set = train_val_set[train_val_set.obs['Pool_ID'].isin(val_pool_ids)]
        
        train_dataset = SamplesDataset(train_set, donors, sample_col="scRNASeq_sample_ID",layer=layer)
        validation_dataset = SamplesDataset(validation_set, donors, sample_col="scRNASeq_sample_ID",layer=layer)

        train_pairs_dataset = TransformedPairDataset(train_dataset, subset_size=cell_subset_size)
        train_dataloader = DataLoader(train_pairs_dataset, batch_size=batch_size, shuffle=True)

        validation_pairs_dataset = TransformedPairDataset(validation_dataset, subset_size=cell_subset_size, validation=True)
        validation_dataloader = DataLoader(validation_pairs_dataset, batch_size=batch_size, shuffle=True)

        # Model and optimizer for this fold
        network = DynamicNetwork(n_input_features=adata.obsm[layer].shape[1], n_output_features=30, num_layers=num_layers, hidden_size=hidden_size)
        network.to(device)
        optimizer = optim.Adam(network.parameters(), lr=learning_rate, weight_decay=weight_decay)
        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=10, threshold=0.01)
        criterion = InfoNCECauchy(temperature)
        epoch_train_losses = [] # for each epoch
        epoch_validation_losses = [] # for each epoch       

        for epoch in range(num_epochs):
            training_loss = 0  # for each batch
            network.train()
            for batch_idx, (batch) in enumerate(train_dataloader):
                optimizer.zero_grad()
                data1, data2 = batch
                
                samples = torch.vstack((data1, data2)).to(device)
                features = network(samples)
                loss = criterion(features)
                loss.backward()
                optimizer.step()
                
                training_loss += loss.item()
            
            validation_loss = 0
            network.eval()
            with torch.no_grad():  # No gradient computation for validation pass
                for batch_idx, (batch) in enumerate(validation_dataloader):
                    data1, data2 = batch
                    samples = torch.vstack((data1, data2)).to(device)
                    features = network(samples)
                    loss = criterion(features)
                    validation_loss += loss.item()

            avg_training_loss = training_loss / len(train_dataloader)
            avg_validation_loss = validation_loss / len(validation_dataloader)
            epoch_train_losses.append(avg_training_loss)
            epoch_validation_losses.append(avg_validation_loss)
            
            # Early stopping check
            if avg_validation_loss < best_loss:
                best_loss = avg_validation_loss
                epochs_no_improve = 0
            else:
                epochs_no_improve += 1
                if epochs_no_improve >= patience:
                    print(f"Early stopping triggered at epoch {epoch + 1}")
                    break
            scheduler.step(avg_validation_loss)
        
        fold_validation_losses.append(np.mean(epoch_validation_losses))
        del train_dataset, validation_dataset, train_pairs_dataset, train_dataloader, validation_pairs_dataset, validation_dataloader, network, optimizer, scheduler
        torch.cuda.empty_cache()  # Clear CUDA cache
        gc.collect()
        
    average_validation_loss = np.mean(fold_validation_losses)
    return average_validation_loss

# Create a study object and specify the optimization direction
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=100)  # Run 100 trials

# Print the best hyperparameters
print('Best hyperparameters:', study.best_params)

# Convert all trials into a DataFrame
df_trials = study.trials_dataframe()

# Assuming 'minimize' direction; for 'maximize', use ascending=False
df_sorted = df_trials.sort_values(by="value", ascending=True)

# Select top N trials - Change N to your desired number of top trials
N = 5
top_n_trials = df_sorted.head(N)
# After reviewing the top N trials, you can proceed with reinitializing and retraining your model
# with the best hyperparameters if you decide that's the best course of action
best_trial = study.best_trial
best_params = best_trial.params
# After the study, identify and save the best trial
best_trial = study.best_trial
# Reinitialize and retrain the model with best hyperparameters
best_params = best_trial.params
# #########################################

[I 2024-03-04 22:41:14,651] A new study created in memory with name: no-name-5d7a5779-1932-45c7-b720-6349f8113202


Early stopping triggered at epoch 55
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1


[I 2024-03-04 22:42:00,504] Trial 0 finished with value: 1.1281214681132272 and parameters: {'learning_rate': 0.0036471112054958735, 'weight_decay': 1.4361184324857036e-06, 'temperature': 0.7322186984904779, 'batch_size': 9, 'num_layers': 2, 'hidden_size': 63, 'cell_subset_size': 2145}. Best is trial 0 with value: 1.1281214681132272.


Early stopping triggered at epoch 48
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1


[I 2024-03-04 22:42:37,972] Trial 1 finished with value: 0.26811918130765355 and parameters: {'learning_rate': 0.00918464301599831, 'weight_decay': 4.005625708860381e-07, 'temperature': 0.6926466819344572, 'batch_size': 6, 'num_layers': 1, 'hidden_size': 110, 'cell_subset_size': 1652}. Best is trial 1 with value: 0.26811918130765355.


Early stopping triggered at epoch 34
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1


[I 2024-03-04 22:43:16,639] Trial 2 finished with value: 1.7450984387070523 and parameters: {'learning_rate': 0.00016412562689514706, 'weight_decay': 1.5037619612333564e-06, 'temperature': 0.62848784231472, 'batch_size': 6, 'num_layers': 4, 'hidden_size': 367, 'cell_subset_size': 3192}. Best is trial 1 with value: 0.26811918130765355.


Early stopping triggered at epoch 18
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1


[I 2024-03-04 22:43:50,783] Trial 3 finished with value: 0.08148123732319584 and parameters: {'learning_rate': 0.06673363984784171, 'weight_decay': 3.929959093674805e-06, 'temperature': 0.5350428360346935, 'batch_size': 10, 'num_layers': 1, 'hidden_size': 198, 'cell_subset_size': 2237}. Best is trial 3 with value: 0.08148123732319584.


Early stopping triggered at epoch 21
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1


[I 2024-03-04 22:44:25,673] Trial 4 finished with value: 0.12037784550465694 and parameters: {'learning_rate': 0.02525407469606807, 'weight_decay': 3.75983999087417e-07, 'temperature': 0.34267091395523763, 'batch_size': 4, 'num_layers': 2, 'hidden_size': 309, 'cell_subset_size': 1121}. Best is trial 3 with value: 0.08148123732319584.


Early stopping triggered at epoch 36
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1


[I 2024-03-04 22:45:01,924] Trial 5 finished with value: 1.9456845505370033 and parameters: {'learning_rate': 0.002888920873268753, 'weight_decay': 2.9524497929003683e-05, 'temperature': 0.3018813615257709, 'batch_size': 15, 'num_layers': 3, 'hidden_size': 197, 'cell_subset_size': 2222}. Best is trial 3 with value: 0.08148123732319584.


Early stopping triggered at epoch 51
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1


[I 2024-03-04 22:45:42,742] Trial 6 finished with value: 1.8211081329221819 and parameters: {'learning_rate': 0.00010325448304785051, 'weight_decay': 3.021418316832195e-06, 'temperature': 0.14667360748490502, 'batch_size': 5, 'num_layers': 3, 'hidden_size': 321, 'cell_subset_size': 1968}. Best is trial 3 with value: 0.08148123732319584.


Early stopping triggered at epoch 47
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1


[I 2024-03-04 22:46:24,114] Trial 7 finished with value: 2.0378535000250695 and parameters: {'learning_rate': 0.00017287792924758368, 'weight_decay': 4.091334564689119e-05, 'temperature': 0.7490122978816659, 'batch_size': 8, 'num_layers': 2, 'hidden_size': 241, 'cell_subset_size': 5137}. Best is trial 3 with value: 0.08148123732319584.


Early stopping triggered at epoch 85
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1


[I 2024-03-04 22:47:04,578] Trial 8 finished with value: 1.7794791991774943 and parameters: {'learning_rate': 0.0006364485631276072, 'weight_decay': 7.481051720850334e-07, 'temperature': 0.15345539347338824, 'batch_size': 13, 'num_layers': 2, 'hidden_size': 152, 'cell_subset_size': 1486}. Best is trial 3 with value: 0.08148123732319584.


Early stopping triggered at epoch 83
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1


[I 2024-03-04 22:47:45,705] Trial 9 finished with value: 2.27132720923328 and parameters: {'learning_rate': 0.0006951233845308689, 'weight_decay': 2.4301114294929033e-07, 'temperature': 0.2642118764706883, 'batch_size': 10, 'num_layers': 2, 'hidden_size': 47, 'cell_subset_size': 2518}. Best is trial 3 with value: 0.08148123732319584.


Early stopping triggered at epoch 18
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1


[I 2024-03-04 22:48:20,800] Trial 10 finished with value: 0.050046466898035116 and parameters: {'learning_rate': 0.09348428695946973, 'weight_decay': 9.567726612219439e-06, 'temperature': 0.9966805049303413, 'batch_size': 12, 'num_layers': 1, 'hidden_size': 240, 'cell_subset_size': 3890}. Best is trial 10 with value: 0.050046466898035116.


Early stopping triggered at epoch 21
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1


[I 2024-03-04 22:48:58,199] Trial 11 finished with value: 0.06180847864302378 and parameters: {'learning_rate': 0.08913073232972729, 'weight_decay': 1.0686586245835531e-05, 'temperature': 0.9714149137145911, 'batch_size': 12, 'num_layers': 1, 'hidden_size': 247, 'cell_subset_size': 4172}. Best is trial 10 with value: 0.050046466898035116.


Early stopping triggered at epoch 29
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1


[I 2024-03-04 22:49:37,303] Trial 12 finished with value: 0.04721635895213862 and parameters: {'learning_rate': 0.09879514181486068, 'weight_decay': 9.896740999809667e-06, 'temperature': 0.9780917055982087, 'batch_size': 13, 'num_layers': 1, 'hidden_size': 266, 'cell_subset_size': 4196}. Best is trial 12 with value: 0.04721635895213862.


Early stopping triggered at epoch 28
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1


[I 2024-03-04 22:50:14,141] Trial 13 finished with value: 0.13630555399826597 and parameters: {'learning_rate': 0.0286493062974668, 'weight_decay': 1.1283390553831651e-05, 'temperature': 0.994130860986733, 'batch_size': 16, 'num_layers': 1, 'hidden_size': 293, 'cell_subset_size': 3925}. Best is trial 12 with value: 0.04721635895213862.


Early stopping triggered at epoch 25
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1


[I 2024-03-04 22:50:50,420] Trial 14 finished with value: 0.06399410351117452 and parameters: {'learning_rate': 0.027129814856764155, 'weight_decay': 8.053096272353932e-05, 'temperature': 0.8694613894708785, 'batch_size': 13, 'num_layers': 1, 'hidden_size': 398, 'cell_subset_size': 5757}. Best is trial 12 with value: 0.04721635895213862.


Early stopping triggered at epoch 23
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 17
Early stopping triggered at epoch 1


[I 2024-03-04 22:51:28,711] Trial 15 finished with value: 0.24309456116708036 and parameters: {'learning_rate': 0.009435586536269564, 'weight_decay': 9.716345713705855e-06, 'temperature': 0.8597700363910165, 'batch_size': 12, 'num_layers': 4, 'hidden_size': 268, 'cell_subset_size': 4477}. Best is trial 12 with value: 0.04721635895213862.


Early stopping triggered at epoch 22
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1


[I 2024-03-04 22:52:02,929] Trial 16 finished with value: 0.4499957702376626 and parameters: {'learning_rate': 0.09772952153752387, 'weight_decay': 5.046926835318114e-06, 'temperature': 0.8280683784198031, 'batch_size': 15, 'num_layers': 1, 'hidden_size': 172, 'cell_subset_size': 396}. Best is trial 12 with value: 0.04721635895213862.


Early stopping triggered at epoch 18
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1


[I 2024-03-04 22:52:38,559] Trial 17 finished with value: 0.09337697735539188 and parameters: {'learning_rate': 0.038434682958609745, 'weight_decay': 2.4165091191375557e-05, 'temperature': 0.46623825236161803, 'batch_size': 12, 'num_layers': 3, 'hidden_size': 338, 'cell_subset_size': 3451}. Best is trial 12 with value: 0.04721635895213862.


Early stopping triggered at epoch 54
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1


[I 2024-03-04 22:53:19,248] Trial 18 finished with value: 2.7438451536275723 and parameters: {'learning_rate': 0.00967568203103852, 'weight_decay': 1.1208707622636684e-07, 'temperature': 0.028234893114311765, 'batch_size': 14, 'num_layers': 1, 'hidden_size': 132, 'cell_subset_size': 5441}. Best is trial 12 with value: 0.04721635895213862.


Early stopping triggered at epoch 13
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1


[I 2024-03-04 22:53:53,798] Trial 19 finished with value: 0.048002888606144836 and parameters: {'learning_rate': 0.05297126258470591, 'weight_decay': 9.853277664006084e-05, 'temperature': 0.9266480172015658, 'batch_size': 11, 'num_layers': 2, 'hidden_size': 230, 'cell_subset_size': 4842}. Best is trial 12 with value: 0.04721635895213862.


Early stopping triggered at epoch 30
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1


[I 2024-03-04 22:54:31,421] Trial 20 finished with value: 0.23785195236404738 and parameters: {'learning_rate': 0.003935201852486402, 'weight_decay': 9.695814949372025e-05, 'temperature': 0.8764167923973665, 'batch_size': 8, 'num_layers': 2, 'hidden_size': 218, 'cell_subset_size': 4715}. Best is trial 12 with value: 0.04721635895213862.


Early stopping triggered at epoch 19
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1


[I 2024-03-04 22:55:06,603] Trial 21 finished with value: 0.04942706969746372 and parameters: {'learning_rate': 0.04993910494741539, 'weight_decay': 1.807363162065634e-05, 'temperature': 0.9903099482702621, 'batch_size': 11, 'num_layers': 1, 'hidden_size': 273, 'cell_subset_size': 3593}. Best is trial 12 with value: 0.04721635895213862.


Early stopping triggered at epoch 23
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1


[I 2024-03-04 22:55:43,518] Trial 22 finished with value: 0.060461448586505394 and parameters: {'learning_rate': 0.04742381765740066, 'weight_decay': 5.5045197033903366e-05, 'temperature': 0.9349081963180874, 'batch_size': 11, 'num_layers': 2, 'hidden_size': 268, 'cell_subset_size': 4947}. Best is trial 12 with value: 0.04721635895213862.


Early stopping triggered at epoch 38
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1


[I 2024-03-04 22:56:20,163] Trial 23 finished with value: 0.12625900093923534 and parameters: {'learning_rate': 0.016418954163732277, 'weight_decay': 2.0425686037689265e-05, 'temperature': 0.7952073542782712, 'batch_size': 10, 'num_layers': 1, 'hidden_size': 274, 'cell_subset_size': 2825}. Best is trial 12 with value: 0.04721635895213862.


Early stopping triggered at epoch 24
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1


[I 2024-03-04 22:56:56,433] Trial 24 finished with value: 0.17927414973576866 and parameters: {'learning_rate': 0.05073157948730621, 'weight_decay': 1.9302516288824846e-05, 'temperature': 0.6332430493306467, 'batch_size': 14, 'num_layers': 3, 'hidden_size': 339, 'cell_subset_size': 3548}. Best is trial 12 with value: 0.04721635895213862.


Early stopping triggered at epoch 31
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1


[I 2024-03-04 22:57:33,425] Trial 25 finished with value: 0.10987580668541694 and parameters: {'learning_rate': 0.014922054385485045, 'weight_decay': 5.20463991247247e-05, 'temperature': 0.912285384489153, 'batch_size': 11, 'num_layers': 2, 'hidden_size': 211, 'cell_subset_size': 4322}. Best is trial 12 with value: 0.04721635895213862.


Early stopping triggered at epoch 20
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1


[I 2024-03-04 22:58:09,312] Trial 26 finished with value: 0.07130466654896736 and parameters: {'learning_rate': 0.017212971409452688, 'weight_decay': 5.3882816256778376e-06, 'temperature': 0.7920873663679148, 'batch_size': 8, 'num_layers': 1, 'hidden_size': 299, 'cell_subset_size': 5994}. Best is trial 12 with value: 0.04721635895213862.


Early stopping triggered at epoch 57
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1


[I 2024-03-04 22:58:50,087] Trial 27 finished with value: 1.1878126693312188 and parameters: {'learning_rate': 0.0017344163443388197, 'weight_decay': 2.3307516809027138e-06, 'temperature': 0.919412360767027, 'batch_size': 11, 'num_layers': 1, 'hidden_size': 181, 'cell_subset_size': 5276}. Best is trial 12 with value: 0.04721635895213862.


Early stopping triggered at epoch 23
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1


[I 2024-03-04 22:59:24,968] Trial 28 finished with value: 0.059364365149235376 and parameters: {'learning_rate': 0.05458240384067068, 'weight_decay': 3.501630941896645e-05, 'temperature': 0.6664386162888403, 'batch_size': 13, 'num_layers': 2, 'hidden_size': 230, 'cell_subset_size': 2951}. Best is trial 12 with value: 0.04721635895213862.


Early stopping triggered at epoch 25
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1


[I 2024-03-04 23:00:01,857] Trial 29 finished with value: 0.14733468821644782 and parameters: {'learning_rate': 0.005635367505949122, 'weight_decay': 6.941507413773244e-06, 'temperature': 0.5694778008981042, 'batch_size': 9, 'num_layers': 2, 'hidden_size': 288, 'cell_subset_size': 4730}. Best is trial 12 with value: 0.04721635895213862.


Early stopping triggered at epoch 35
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1


[I 2024-03-04 23:00:39,924] Trial 30 finished with value: 0.10066054480416434 and parameters: {'learning_rate': 0.029186180763200974, 'weight_decay': 1.5311298921898366e-05, 'temperature': 0.4344495672850527, 'batch_size': 9, 'num_layers': 3, 'hidden_size': 85, 'cell_subset_size': 3692}. Best is trial 12 with value: 0.04721635895213862.


Early stopping triggered at epoch 23
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1


[I 2024-03-04 23:01:15,124] Trial 31 finished with value: 0.039195928366287894 and parameters: {'learning_rate': 0.08966026268995395, 'weight_decay': 7.561993091529313e-06, 'temperature': 0.9930746844816444, 'batch_size': 12, 'num_layers': 1, 'hidden_size': 255, 'cell_subset_size': 3919}. Best is trial 31 with value: 0.039195928366287894.


Early stopping triggered at epoch 29
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1


[I 2024-03-04 23:01:51,899] Trial 32 finished with value: 0.039533343397337814 and parameters: {'learning_rate': 0.06495115583492657, 'weight_decay': 1.5396883974374368e-05, 'temperature': 0.734414972518975, 'batch_size': 11, 'num_layers': 1, 'hidden_size': 256, 'cell_subset_size': 4186}. Best is trial 31 with value: 0.039195928366287894.


Early stopping triggered at epoch 19
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1


[I 2024-03-04 23:02:27,309] Trial 33 finished with value: 0.0551784919019331 and parameters: {'learning_rate': 0.07119976042969488, 'weight_decay': 1.6617612532743147e-06, 'temperature': 0.750014003689442, 'batch_size': 13, 'num_layers': 1, 'hidden_size': 261, 'cell_subset_size': 4141}. Best is trial 31 with value: 0.039195928366287894.


Early stopping triggered at epoch 35
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1


[I 2024-03-04 23:03:04,131] Trial 34 finished with value: 0.1025350248245966 and parameters: {'learning_rate': 0.036608762532662285, 'weight_decay': 6.365740371009903e-06, 'temperature': 0.7135416683302329, 'batch_size': 10, 'num_layers': 1, 'hidden_size': 228, 'cell_subset_size': 3263}. Best is trial 31 with value: 0.039195928366287894.


Early stopping triggered at epoch 24
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1


[I 2024-03-04 23:03:39,770] Trial 35 finished with value: 0.09593905607859295 and parameters: {'learning_rate': 0.06965848469151797, 'weight_decay': 9.989164841841557e-07, 'temperature': 0.927530317527165, 'batch_size': 14, 'num_layers': 1, 'hidden_size': 330, 'cell_subset_size': 4419}. Best is trial 31 with value: 0.039195928366287894.


Early stopping triggered at epoch 26
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1


[I 2024-03-04 23:04:17,154] Trial 36 finished with value: 0.14291027876046986 and parameters: {'learning_rate': 0.020203374320144386, 'weight_decay': 3.5039645636658506e-06, 'temperature': 0.8110922164581021, 'batch_size': 7, 'num_layers': 4, 'hidden_size': 362, 'cell_subset_size': 4753}. Best is trial 31 with value: 0.039195928366287894.


Early stopping triggered at epoch 28
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1


[I 2024-03-04 23:04:54,384] Trial 37 finished with value: 0.05222228197824388 and parameters: {'learning_rate': 0.06725904276970852, 'weight_decay': 6.723486136929008e-05, 'temperature': 0.8847335625349407, 'batch_size': 12, 'num_layers': 2, 'hidden_size': 196, 'cell_subset_size': 5531}. Best is trial 31 with value: 0.039195928366287894.


Early stopping triggered at epoch 19
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1


[I 2024-03-04 23:05:29,670] Trial 38 finished with value: 0.1403066735518606 and parameters: {'learning_rate': 0.037077911837420825, 'weight_decay': 1.2719572897262861e-05, 'temperature': 0.601538786480267, 'batch_size': 15, 'num_layers': 1, 'hidden_size': 251, 'cell_subset_size': 3968}. Best is trial 31 with value: 0.039195928366287894.


Early stopping triggered at epoch 21
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1


[I 2024-03-04 23:06:05,654] Trial 39 finished with value: 0.08783584322248186 and parameters: {'learning_rate': 0.095774817101301, 'weight_decay': 2.4294120791235663e-06, 'temperature': 0.7547373392107961, 'batch_size': 10, 'num_layers': 2, 'hidden_size': 309, 'cell_subset_size': 5095}. Best is trial 31 with value: 0.039195928366287894.


Early stopping triggered at epoch 56
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1


[I 2024-03-04 23:06:44,662] Trial 40 finished with value: 0.21496309474820183 and parameters: {'learning_rate': 0.013025475089082106, 'weight_decay': 3.289265305530264e-05, 'temperature': 0.9465300075202954, 'batch_size': 11, 'num_layers': 1, 'hidden_size': 161, 'cell_subset_size': 3099}. Best is trial 31 with value: 0.039195928366287894.


Early stopping triggered at epoch 15
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1


[I 2024-03-04 23:07:19,107] Trial 41 finished with value: 0.06418427891201443 and parameters: {'learning_rate': 0.05025880612530433, 'weight_decay': 1.7282953457260154e-05, 'temperature': 0.9978204515476214, 'batch_size': 11, 'num_layers': 1, 'hidden_size': 275, 'cell_subset_size': 3718}. Best is trial 31 with value: 0.039195928366287894.


Early stopping triggered at epoch 36
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1


[I 2024-03-04 23:07:55,287] Trial 42 finished with value: 0.0598589261372884 and parameters: {'learning_rate': 0.06492242683561923, 'weight_decay': 7.449889442619422e-06, 'temperature': 0.84218436627068, 'batch_size': 13, 'num_layers': 1, 'hidden_size': 207, 'cell_subset_size': 2704}. Best is trial 31 with value: 0.039195928366287894.


Early stopping triggered at epoch 31
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1


[I 2024-03-04 23:08:31,008] Trial 43 finished with value: 0.07588590421984273 and parameters: {'learning_rate': 0.04004598299912218, 'weight_decay': 2.5552580284210245e-05, 'temperature': 0.9480370250389712, 'batch_size': 12, 'num_layers': 1, 'hidden_size': 253, 'cell_subset_size': 3298}. Best is trial 31 with value: 0.039195928366287894.


Early stopping triggered at epoch 32
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1


[I 2024-03-04 23:09:08,620] Trial 44 finished with value: 0.06894901990890503 and parameters: {'learning_rate': 0.0244502761523261, 'weight_decay': 4.356591692789458e-05, 'temperature': 0.8938162491574742, 'batch_size': 9, 'num_layers': 1, 'hidden_size': 281, 'cell_subset_size': 4497}. Best is trial 31 with value: 0.039195928366287894.


Early stopping triggered at epoch 41
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1


[I 2024-03-04 23:09:47,141] Trial 45 finished with value: 0.8586211417385232 and parameters: {'learning_rate': 0.0013616083001040215, 'weight_decay': 4.265238081045819e-06, 'temperature': 0.998997221312186, 'batch_size': 11, 'num_layers': 2, 'hidden_size': 312, 'cell_subset_size': 4125}. Best is trial 31 with value: 0.039195928366287894.


Early stopping triggered at epoch 55
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1


[I 2024-03-04 23:10:27,056] Trial 46 finished with value: 0.06469578410639906 and parameters: {'learning_rate': 0.07453205178405521, 'weight_decay': 7.5302091974685995e-06, 'temperature': 0.6853316561255834, 'batch_size': 10, 'num_layers': 1, 'hidden_size': 230, 'cell_subset_size': 3770}. Best is trial 31 with value: 0.039195928366287894.


Early stopping triggered at epoch 38
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1


[I 2024-03-04 23:11:06,888] Trial 47 finished with value: 0.03318614153037394 and parameters: {'learning_rate': 0.05534607518509512, 'weight_decay': 1.504458250647822e-05, 'temperature': 0.9552119667714386, 'batch_size': 4, 'num_layers': 1, 'hidden_size': 187, 'cell_subset_size': 4872}. Best is trial 47 with value: 0.03318614153037394.


Early stopping triggered at epoch 28
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 19
Early stopping triggered at epoch 1


[I 2024-03-04 23:11:47,522] Trial 48 finished with value: 0.021833613332437007 and parameters: {'learning_rate': 0.09585055274993697, 'weight_decay': 9.463958100775882e-06, 'temperature': 0.8364018549200548, 'batch_size': 4, 'num_layers': 1, 'hidden_size': 136, 'cell_subset_size': 5002}. Best is trial 48 with value: 0.021833613332437007.


Early stopping triggered at epoch 23
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1


[I 2024-03-04 23:12:25,288] Trial 49 finished with value: 0.02360083568170204 and parameters: {'learning_rate': 0.09821917473071282, 'weight_decay': 7.822876319951038e-06, 'temperature': 0.7830574132564483, 'batch_size': 4, 'num_layers': 1, 'hidden_size': 118, 'cell_subset_size': 5536}. Best is trial 48 with value: 0.021833613332437007.


Early stopping triggered at epoch 88
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1


[I 2024-03-04 23:13:14,602] Trial 50 finished with value: 1.1816590047598658 and parameters: {'learning_rate': 0.0003834068892182148, 'weight_decay': 1.3245638843005849e-05, 'temperature': 0.7676771770277082, 'batch_size': 4, 'num_layers': 1, 'hidden_size': 118, 'cell_subset_size': 5649}. Best is trial 48 with value: 0.021833613332437007.


Early stopping triggered at epoch 28
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1


[I 2024-03-04 23:13:52,339] Trial 51 finished with value: 0.033844462746665596 and parameters: {'learning_rate': 0.09835063276626778, 'weight_decay': 8.533990727707893e-06, 'temperature': 0.8286652991600257, 'batch_size': 5, 'num_layers': 1, 'hidden_size': 100, 'cell_subset_size': 5372}. Best is trial 48 with value: 0.021833613332437007.


Early stopping triggered at epoch 29
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1


[I 2024-03-04 23:14:29,384] Trial 52 finished with value: 0.04792550007502237 and parameters: {'learning_rate': 0.07907881334097203, 'weight_decay': 9.356105290443463e-06, 'temperature': 0.8503417997033205, 'batch_size': 5, 'num_layers': 1, 'hidden_size': 93, 'cell_subset_size': 5237}. Best is trial 48 with value: 0.021833613332437007.


Early stopping triggered at epoch 39
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1


[I 2024-03-04 23:15:09,797] Trial 53 finished with value: 0.016907496941395296 and parameters: {'learning_rate': 0.09881731201158052, 'weight_decay': 5.308831706373277e-06, 'temperature': 0.8328467821557369, 'batch_size': 5, 'num_layers': 1, 'hidden_size': 137, 'cell_subset_size': 5987}. Best is trial 53 with value: 0.016907496941395296.


Early stopping triggered at epoch 14
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1


[I 2024-03-04 23:15:45,158] Trial 54 finished with value: 0.02841300169626872 and parameters: {'learning_rate': 0.09832053927894312, 'weight_decay': 4.3990582230953765e-06, 'temperature': 0.8112792971674498, 'batch_size': 5, 'num_layers': 1, 'hidden_size': 144, 'cell_subset_size': 5866}. Best is trial 53 with value: 0.016907496941395296.


Early stopping triggered at epoch 33
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1


[I 2024-03-04 23:16:24,059] Trial 55 finished with value: 0.03613344503171516 and parameters: {'learning_rate': 0.09867271038189752, 'weight_decay': 4.560607440903489e-06, 'temperature': 0.8083610462687074, 'batch_size': 5, 'num_layers': 1, 'hidden_size': 143, 'cell_subset_size': 5974}. Best is trial 53 with value: 0.016907496941395296.


Early stopping triggered at epoch 22
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1


[I 2024-03-04 23:17:00,957] Trial 56 finished with value: 0.06218378301951792 and parameters: {'learning_rate': 0.03256374118376797, 'weight_decay': 2.8283052372855936e-06, 'temperature': 0.7119286763258824, 'batch_size': 4, 'num_layers': 1, 'hidden_size': 30, 'cell_subset_size': 5760}. Best is trial 53 with value: 0.016907496941395296.


Early stopping triggered at epoch 27
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1


[I 2024-03-04 23:17:38,057] Trial 57 finished with value: 0.04182838475262678 and parameters: {'learning_rate': 0.05883222639589471, 'weight_decay': 2.0308264654711492e-06, 'temperature': 0.8424451461575236, 'batch_size': 6, 'num_layers': 1, 'hidden_size': 113, 'cell_subset_size': 5453}. Best is trial 53 with value: 0.016907496941395296.


Early stopping triggered at epoch 55
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1


[I 2024-03-04 23:18:20,320] Trial 58 finished with value: 0.09876871796268406 and parameters: {'learning_rate': 0.023937369531461847, 'weight_decay': 3.867414741555739e-06, 'temperature': 0.6610815635992525, 'batch_size': 5, 'num_layers': 1, 'hidden_size': 84, 'cell_subset_size': 5027}. Best is trial 53 with value: 0.016907496941395296.


Early stopping triggered at epoch 29
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1


[I 2024-03-04 23:18:57,544] Trial 59 finished with value: 0.05789516865522012 and parameters: {'learning_rate': 0.04632928991722156, 'weight_decay': 5.262930737277704e-06, 'temperature': 0.7828970250346192, 'batch_size': 6, 'num_layers': 1, 'hidden_size': 128, 'cell_subset_size': 5812}. Best is trial 53 with value: 0.016907496941395296.


Early stopping triggered at epoch 36
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1


[I 2024-03-04 23:19:36,540] Trial 60 finished with value: 0.028644933019365583 and parameters: {'learning_rate': 0.08070391614784374, 'weight_decay': 9.314036843162045e-06, 'temperature': 0.880931556252346, 'batch_size': 4, 'num_layers': 1, 'hidden_size': 69, 'cell_subset_size': 5347}. Best is trial 53 with value: 0.016907496941395296.


Early stopping triggered at epoch 42
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1


[I 2024-03-04 23:20:16,585] Trial 61 finished with value: 0.024498108049639228 and parameters: {'learning_rate': 0.07936930845067139, 'weight_decay': 9.841725980894088e-06, 'temperature': 0.8768462897480261, 'batch_size': 4, 'num_layers': 1, 'hidden_size': 65, 'cell_subset_size': 5357}. Best is trial 53 with value: 0.016907496941395296.


Early stopping triggered at epoch 31
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 18


[I 2024-03-04 23:20:58,663] Trial 62 finished with value: 0.02146347365315853 and parameters: {'learning_rate': 0.07915864325964297, 'weight_decay': 1.033821230912988e-05, 'temperature': 0.8790622754496535, 'batch_size': 4, 'num_layers': 1, 'hidden_size': 75, 'cell_subset_size': 5735}. Best is trial 53 with value: 0.016907496941395296.


Early stopping triggered at epoch 28
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1


[I 2024-03-04 23:21:36,616] Trial 63 finished with value: 0.04865677156010452 and parameters: {'learning_rate': 0.0780856665383601, 'weight_decay': 1.136130177597502e-05, 'temperature': 0.88866321260932, 'batch_size': 4, 'num_layers': 1, 'hidden_size': 65, 'cell_subset_size': 5599}. Best is trial 53 with value: 0.016907496941395296.


Early stopping triggered at epoch 17
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1


[I 2024-03-04 23:22:12,818] Trial 64 finished with value: 0.0322756256375994 and parameters: {'learning_rate': 0.07768941720919782, 'weight_decay': 5.8667605277516544e-06, 'temperature': 0.3938489796242774, 'batch_size': 4, 'num_layers': 1, 'hidden_size': 65, 'cell_subset_size': 5882}. Best is trial 53 with value: 0.016907496941395296.


Early stopping triggered at epoch 30
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1


[I 2024-03-04 23:22:51,320] Trial 65 finished with value: 0.037148746490478515 and parameters: {'learning_rate': 0.03793497241190391, 'weight_decay': 3.446063319763239e-06, 'temperature': 0.8647860115763839, 'batch_size': 6, 'num_layers': 1, 'hidden_size': 73, 'cell_subset_size': 5243}. Best is trial 53 with value: 0.016907496941395296.


Early stopping triggered at epoch 31
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1


[I 2024-03-04 23:23:30,740] Trial 66 finished with value: 0.06280920306841532 and parameters: {'learning_rate': 0.08146420200611496, 'weight_decay': 1.0456767587115301e-05, 'temperature': 0.512213285224941, 'batch_size': 5, 'num_layers': 1, 'hidden_size': 44, 'cell_subset_size': 5481}. Best is trial 53 with value: 0.016907496941395296.


Early stopping triggered at epoch 26
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1


[I 2024-03-04 23:24:08,231] Trial 67 finished with value: 0.07202846281803571 and parameters: {'learning_rate': 0.057708084097645225, 'weight_decay': 5.743958804483041e-06, 'temperature': 0.23024281284052944, 'batch_size': 7, 'num_layers': 1, 'hidden_size': 145, 'cell_subset_size': 5674}. Best is trial 53 with value: 0.016907496941395296.


Early stopping triggered at epoch 29
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1


[I 2024-03-04 23:24:44,593] Trial 68 finished with value: 0.06125644879975343 and parameters: {'learning_rate': 0.04351816434516019, 'weight_decay': 8.316875430985491e-06, 'temperature': 0.7813818767791125, 'batch_size': 4, 'num_layers': 1, 'hidden_size': 102, 'cell_subset_size': 1830}. Best is trial 53 with value: 0.016907496941395296.


Early stopping triggered at epoch 17
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1


[I 2024-03-04 23:25:20,896] Trial 69 finished with value: 0.36381249743349414 and parameters: {'learning_rate': 0.006133855984765212, 'weight_decay': 1.1137403383001618e-06, 'temperature': 0.9038729700180639, 'batch_size': 5, 'num_layers': 4, 'hidden_size': 45, 'cell_subset_size': 5170}. Best is trial 53 with value: 0.016907496941395296.


Early stopping triggered at epoch 25
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1


[I 2024-03-04 23:25:58,265] Trial 70 finished with value: 0.05697164605458578 and parameters: {'learning_rate': 0.031029484144635227, 'weight_decay': 2.3102568997386138e-05, 'temperature': 0.7210387932272637, 'batch_size': 6, 'num_layers': 2, 'hidden_size': 128, 'cell_subset_size': 5981}. Best is trial 53 with value: 0.016907496941395296.


Early stopping triggered at epoch 21
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1


[I 2024-03-04 23:26:34,687] Trial 71 finished with value: 0.03321659329797135 and parameters: {'learning_rate': 0.07802793678380099, 'weight_decay': 6.149772177329912e-06, 'temperature': 0.3875070411892301, 'batch_size': 4, 'num_layers': 1, 'hidden_size': 70, 'cell_subset_size': 5358}. Best is trial 53 with value: 0.016907496941395296.


Early stopping triggered at epoch 41
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1


[I 2024-03-04 23:27:14,864] Trial 72 finished with value: 0.040416095169579114 and parameters: {'learning_rate': 0.06183715127531925, 'weight_decay': 4.701074859152205e-06, 'temperature': 0.33935440237823494, 'batch_size': 4, 'num_layers': 1, 'hidden_size': 59, 'cell_subset_size': 5794}. Best is trial 53 with value: 0.016907496941395296.


Early stopping triggered at epoch 19
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1


[I 2024-03-04 23:27:50,816] Trial 73 finished with value: 0.037327108885112564 and parameters: {'learning_rate': 0.09886231114384819, 'weight_decay': 2.956308078825108e-06, 'temperature': 0.8180030380477565, 'batch_size': 5, 'num_layers': 1, 'hidden_size': 82, 'cell_subset_size': 5799}. Best is trial 53 with value: 0.016907496941395296.


Early stopping triggered at epoch 22
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1


[I 2024-03-04 23:28:24,886] Trial 74 finished with value: 0.0893382843438681 and parameters: {'learning_rate': 0.0794422577269068, 'weight_decay': 6.049318174271272e-06, 'temperature': 0.4547078885307267, 'batch_size': 4, 'num_layers': 1, 'hidden_size': 167, 'cell_subset_size': 1097}. Best is trial 53 with value: 0.016907496941395296.


Early stopping triggered at epoch 29
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1


[I 2024-03-04 23:29:03,289] Trial 75 finished with value: 0.04356939259714681 and parameters: {'learning_rate': 0.06675124376788891, 'weight_decay': 1.247211657876555e-05, 'temperature': 0.3948841431818165, 'batch_size': 4, 'num_layers': 1, 'hidden_size': 58, 'cell_subset_size': 5911}. Best is trial 53 with value: 0.016907496941395296.


Early stopping triggered at epoch 45
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1


[I 2024-03-04 23:29:43,715] Trial 76 finished with value: 0.11004202649825148 and parameters: {'learning_rate': 0.04448906072599521, 'weight_decay': 9.435822417129463e-06, 'temperature': 0.8657235671631605, 'batch_size': 7, 'num_layers': 1, 'hidden_size': 32, 'cell_subset_size': 5564}. Best is trial 53 with value: 0.016907496941395296.


Early stopping triggered at epoch 15
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1


[I 2024-03-04 23:30:20,469] Trial 77 finished with value: 0.10335648592975404 and parameters: {'learning_rate': 0.08364323940659633, 'weight_decay': 4.6442851494439673e-07, 'temperature': 0.031924265756807235, 'batch_size': 5, 'num_layers': 1, 'hidden_size': 119, 'cell_subset_size': 5051}. Best is trial 53 with value: 0.016907496941395296.


Early stopping triggered at epoch 22
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 11
Early stopping triggered at epoch 1


[I 2024-03-04 23:31:00,085] Trial 78 finished with value: 0.08945050889795478 and parameters: {'learning_rate': 0.052600521600326605, 'weight_decay': 6.905919941390915e-06, 'temperature': 0.9141418795190289, 'batch_size': 4, 'num_layers': 3, 'hidden_size': 101, 'cell_subset_size': 4646}. Best is trial 53 with value: 0.016907496941395296.


Early stopping triggered at epoch 40
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1


[I 2024-03-04 23:31:40,552] Trial 79 finished with value: 0.06301198669274648 and parameters: {'learning_rate': 0.06339832571282668, 'weight_decay': 3.9900206522443214e-06, 'temperature': 0.25674848776353454, 'batch_size': 6, 'num_layers': 1, 'hidden_size': 155, 'cell_subset_size': 5666}. Best is trial 53 with value: 0.016907496941395296.


Early stopping triggered at epoch 86
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1


[I 2024-03-04 23:32:29,838] Trial 80 finished with value: 1.3519352668617848 and parameters: {'learning_rate': 0.00018523052113139733, 'weight_decay': 1.3797556670781274e-05, 'temperature': 0.5531536086980688, 'batch_size': 4, 'num_layers': 1, 'hidden_size': 135, 'cell_subset_size': 5366}. Best is trial 53 with value: 0.016907496941395296.


Early stopping triggered at epoch 33
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1


[I 2024-03-04 23:33:08,393] Trial 81 finished with value: 0.0265711556781422 and parameters: {'learning_rate': 0.05569728243391608, 'weight_decay': 1.4813028572590447e-05, 'temperature': 0.951433407417296, 'batch_size': 5, 'num_layers': 1, 'hidden_size': 181, 'cell_subset_size': 4944}. Best is trial 53 with value: 0.016907496941395296.


Early stopping triggered at epoch 36
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1


[I 2024-03-04 23:33:48,705] Trial 82 finished with value: 0.028500254672986485 and parameters: {'learning_rate': 0.08313841549305716, 'weight_decay': 1.0627701877164883e-05, 'temperature': 0.8736869912676357, 'batch_size': 5, 'num_layers': 1, 'hidden_size': 142, 'cell_subset_size': 4941}. Best is trial 53 with value: 0.016907496941395296.


Early stopping triggered at epoch 22
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1


[I 2024-03-04 23:34:26,730] Trial 83 finished with value: 0.025091149770852295 and parameters: {'learning_rate': 0.09942282471881689, 'weight_decay': 1.983870567600305e-05, 'temperature': 0.9635153904918757, 'batch_size': 5, 'num_layers': 1, 'hidden_size': 175, 'cell_subset_size': 4955}. Best is trial 53 with value: 0.016907496941395296.


Early stopping triggered at epoch 32
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 17
Early stopping triggered at epoch 1


[I 2024-03-04 23:35:09,145] Trial 84 finished with value: 0.023441276553214763 and parameters: {'learning_rate': 0.0906761736889273, 'weight_decay': 1.7931200136761586e-05, 'temperature': 0.9686827084743693, 'batch_size': 5, 'num_layers': 1, 'hidden_size': 179, 'cell_subset_size': 4950}. Best is trial 53 with value: 0.016907496941395296.


Early stopping triggered at epoch 22
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1


[I 2024-03-04 23:35:46,676] Trial 85 finished with value: 0.026966246185880716 and parameters: {'learning_rate': 0.09886987830212321, 'weight_decay': 2.963421847056225e-05, 'temperature': 0.9491356432038869, 'batch_size': 6, 'num_layers': 1, 'hidden_size': 183, 'cell_subset_size': 4682}. Best is trial 53 with value: 0.016907496941395296.


Early stopping triggered at epoch 11
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1


[I 2024-03-04 23:36:21,899] Trial 86 finished with value: 0.03909006335518577 and parameters: {'learning_rate': 0.05960923151280789, 'weight_decay': 2.01857858237268e-05, 'temperature': 0.9782746470617898, 'batch_size': 6, 'num_layers': 1, 'hidden_size': 186, 'cell_subset_size': 4635}. Best is trial 53 with value: 0.016907496941395296.


Early stopping triggered at epoch 40
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1


[I 2024-03-04 23:37:01,563] Trial 87 finished with value: 0.052020161747932434 and parameters: {'learning_rate': 0.0492357850599255, 'weight_decay': 3.088409437478018e-05, 'temperature': 0.9700323322323345, 'batch_size': 5, 'num_layers': 2, 'hidden_size': 172, 'cell_subset_size': 4845}. Best is trial 53 with value: 0.016907496941395296.


Early stopping triggered at epoch 68
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1


[I 2024-03-04 23:37:44,660] Trial 88 finished with value: 0.5179095901329728 and parameters: {'learning_rate': 0.002539576480025484, 'weight_decay': 2.4971918181650945e-05, 'temperature': 0.9313924546435737, 'batch_size': 6, 'num_layers': 1, 'hidden_size': 199, 'cell_subset_size': 4468}. Best is trial 53 with value: 0.016907496941395296.


Early stopping triggered at epoch 26
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1


[I 2024-03-04 23:38:22,129] Trial 89 finished with value: 0.030736843057167833 and parameters: {'learning_rate': 0.06881016389239039, 'weight_decay': 4.085928061835764e-05, 'temperature': 0.9444441313975697, 'batch_size': 5, 'num_layers': 1, 'hidden_size': 178, 'cell_subset_size': 5120}. Best is trial 53 with value: 0.016907496941395296.


Early stopping triggered at epoch 15
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1


[I 2024-03-04 23:38:57,383] Trial 90 finished with value: 0.07564822157224019 and parameters: {'learning_rate': 0.034342355343739, 'weight_decay': 1.8713300978080273e-05, 'temperature': 0.9590908752265054, 'batch_size': 7, 'num_layers': 1, 'hidden_size': 157, 'cell_subset_size': 4999}. Best is trial 53 with value: 0.016907496941395296.


Early stopping triggered at epoch 16
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1


[I 2024-03-04 23:39:34,212] Trial 91 finished with value: 0.03707477102677027 and parameters: {'learning_rate': 0.09859547069289838, 'weight_decay': 1.5325282142025343e-05, 'temperature': 0.9026323153824357, 'batch_size': 5, 'num_layers': 1, 'hidden_size': 166, 'cell_subset_size': 5520}. Best is trial 53 with value: 0.016907496941395296.


Early stopping triggered at epoch 35
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1


[I 2024-03-04 23:40:13,843] Trial 92 finished with value: 0.023273576668330603 and parameters: {'learning_rate': 0.08652288070002222, 'weight_decay': 2.2516127644904462e-05, 'temperature': 0.8419313213239065, 'batch_size': 5, 'num_layers': 1, 'hidden_size': 152, 'cell_subset_size': 5240}. Best is trial 53 with value: 0.016907496941395296.


Early stopping triggered at epoch 27
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1


[I 2024-03-04 23:40:51,725] Trial 93 finished with value: 0.02960212234361672 and parameters: {'learning_rate': 0.04299722212770513, 'weight_decay': 2.7102762486917805e-05, 'temperature': 0.8412602294380432, 'batch_size': 5, 'num_layers': 1, 'hidden_size': 194, 'cell_subset_size': 4803}. Best is trial 53 with value: 0.016907496941395296.


Early stopping triggered at epoch 20
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1


[I 2024-03-04 23:41:28,032] Trial 94 finished with value: 0.018883691004344394 and parameters: {'learning_rate': 0.06780693627275794, 'weight_decay': 3.4491636771333e-05, 'temperature': 0.9264903384222846, 'batch_size': 4, 'num_layers': 1, 'hidden_size': 175, 'cell_subset_size': 5224}. Best is trial 53 with value: 0.016907496941395296.


Early stopping triggered at epoch 21
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 20
Early stopping triggered at epoch 1


[I 2024-03-04 23:42:08,907] Trial 95 finished with value: 0.02701171756652342 and parameters: {'learning_rate': 0.066485566499155, 'weight_decay': 3.779871984456658e-05, 'temperature': 0.9199595029397202, 'batch_size': 4, 'num_layers': 1, 'hidden_size': 218, 'cell_subset_size': 5189}. Best is trial 53 with value: 0.016907496941395296.


Early stopping triggered at epoch 20
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1


[I 2024-03-04 23:42:45,502] Trial 96 finished with value: 0.030809908083506997 and parameters: {'learning_rate': 0.055720421545989426, 'weight_decay': 2.1946165395219967e-05, 'temperature': 0.9728653161198266, 'batch_size': 4, 'num_layers': 1, 'hidden_size': 152, 'cell_subset_size': 4943}. Best is trial 53 with value: 0.016907496941395296.


Early stopping triggered at epoch 16
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1


[I 2024-03-04 23:43:20,960] Trial 97 finished with value: 0.05533595929543178 and parameters: {'learning_rate': 0.07127909700847335, 'weight_decay': 5.141226121062406e-05, 'temperature': 0.8961830807337682, 'batch_size': 5, 'num_layers': 1, 'hidden_size': 174, 'cell_subset_size': 4294}. Best is trial 53 with value: 0.016907496941395296.


Early stopping triggered at epoch 17
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1


[I 2024-03-04 23:43:57,008] Trial 98 finished with value: 0.024564752558700175 and parameters: {'learning_rate': 0.08720309705852915, 'weight_decay': 1.628781865561087e-05, 'temperature': 0.7619204481768905, 'batch_size': 4, 'num_layers': 1, 'hidden_size': 206, 'cell_subset_size': 5232}. Best is trial 53 with value: 0.016907496941395296.


Early stopping triggered at epoch 25
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1
Early stopping triggered at epoch 1


[I 2024-03-04 23:44:34,457] Trial 99 finished with value: 0.025827082089015418 and parameters: {'learning_rate': 0.08608301172151576, 'weight_decay': 1.8151073864430793e-05, 'temperature': 0.7956591522884167, 'batch_size': 4, 'num_layers': 1, 'hidden_size': 122, 'cell_subset_size': 5307}. Best is trial 53 with value: 0.016907496941395296.


Best hyperparameters: {'learning_rate': 0.09881731201158052, 'weight_decay': 5.308831706373277e-06, 'temperature': 0.8328467821557369, 'batch_size': 5, 'num_layers': 1, 'hidden_size': 137, 'cell_subset_size': 5987}


In [None]:
with open('optuna_top_n_trial.pkl', 'wb') as file:
    pickle.dump(top_n_trials, file)

In [None]:
best_hyperparameters=best_params
best_hyperparameters

In [None]:
# training and creating representations 
layer_losses_dict={}
for layer in layers:
    if layer=='X_pca':
        best_hyp_list=X_pca_trials
    if layer=='X_scANVI_Pool_ID':
        best_hyp_list=X_scANVI_Pool_ID_trials
    if layer=='X_scVI_Pool_ID':
        best_hyp_list=X_scVI_Pool_ID_trials
    
    if dataset=='COMBAT':
        # whole data
        data_mean_for_all_cells= SamplesDataset(adata, donors, sample_col="scRNASeq_sample_ID",layer=layer)
        #test data only!
        #data_mean_for_all_cells= SamplesDataset(test_set, donors, sample_col="scRNASeq_sample_ID",layer=layer)
    else:
        # whole data
        data_mean_for_all_cells= SamplesDataset(adata, donors, sample_col="sample_id",layer=layer)
        #test data only!
        #data_mean_for_all_cells= SamplesDataset(test_set, donors, sample_col="sample_id",layer=layer)


    means_for_all_cells = []
    # Iterate through the dataset to calculate the mean of each data sample along the rows (axis=0)
    for data in data_mean_for_all_cells:
        means_for_all_cells.append(data.mean(axis=0))
    # Convert the list of means into a NumPy array and then into a PyTorch tensor
    means_for_all_cells = torch.tensor(np.array(means_for_all_cells)).to(device)


    # iterate over best hypterparameter sets from optuna
    #for optuna_trial, row in top_n_trials.iterrows():
    # grid search but with cell subset size and kfold cross val
    kf = KFold(n_splits=5, shuffle=True, random_state=42)
    learning_rate = best_hyp_list.get('learning_rate')
    batch_size = best_hyp_list.get('batch_size')
    num_epochs = 200
    num_layers=best_hyp_list.get('num_layers')
    hidden_size = best_hyp_list.get('hidden_size')
    cell_subset_size =best_hyp_list.get('cell_subset_size')
    weight_decay =best_hyp_list.get('weight_decay')
    temperature =best_hyp_list.get('temperature')
    losses_dict = {}
    fold_train_loss=[]
    fold_validation_loss=[]
    
    if dataset=='COMBAT':
        # Iterate over folds
        for fold, (train_idx, val_idx) in enumerate(kf.split(train_val_unique_pool_ids)):
            # Initialize the network and optimizer once for each hyperparameter combination
            network = DynamicNetwork(n_input_features=adata.obsm[layer].shape[1], 
                                     n_output_features=30, num_layers=num_layers, hidden_size=hidden_size)
            network.to(device)
            optimizer = torch.optim.Adam(network.parameters(), lr=learning_rate, weight_decay=weight_decay)
            scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)
            criterion = InfoNCECauchy(temperature)
            # Splitting the dataset and creating dataloaders for this fold
            train_pool_ids = train_val_unique_pool_ids[train_idx]
            val_pool_ids = train_val_unique_pool_ids[val_idx]
            train_set = train_val_set[train_val_set.obs['Pool_ID'].isin(train_pool_ids)]
            validation_set = train_val_set[train_val_set.obs['Pool_ID'].isin(val_pool_ids)]

            train_dataset = SamplesDataset(train_set, donors, sample_col="scRNASeq_sample_ID",layer=layer)
            validation_dataset = SamplesDataset(validation_set, donors, sample_col="scRNASeq_sample_ID",layer=layer)

            train_pairs_dataset = TransformedPairDataset(train_dataset, subset_size=cell_subset_size)
            validation_pairs_dataset = TransformedPairDataset(validation_dataset, subset_size=cell_subset_size, validation=True)

            train_dataloader = DataLoader(train_pairs_dataset, batch_size=batch_size, shuffle=True)
            validation_dataloader = DataLoader(validation_pairs_dataset, batch_size=batch_size, shuffle=True)
    else: 
        pair_dataset=TransformedPairDataset(dataset, subset_size=cell_subset_size)
        fold = 99
        network = DynamicNetwork(n_input_features=adata.obsm[layer].shape[1], 
                             n_output_features=30, num_layers=num_layers, hidden_size=hidden_size)
        network.to(device)
        optimizer = torch.optim.Adam(network.parameters(), lr=learning_rate, weight_decay=weight_decay)
        scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)
        criterion = InfoNCECauchy(temperature)
        dataloader=DataLoader(pair_dataset, batch_size=batch_size, shuffle=True)


    # Initialize lists to store losses for this fold
    epoch_train_losses = []
    epoch_validation_losses = []

    # Training loop
    for epoch in range(num_epochs):
        training_loss = 0
        network.train()
        for batch_idx, (batch) in enumerate(train_dataloader):
            optimizer.zero_grad()
            data1, data2 = batch
            samples = torch.vstack((data1, data2)).to(device)
            features = network(samples)
            loss = criterion(features)
            loss.backward()
            optimizer.step()
            training_loss += loss.item()
        validation_loss = 0
        network.eval()
        with torch.no_grad():
            for batch_idx, (batch) in enumerate(validation_dataloader):
                data1, data2 = batch
                samples = torch.vstack((data1, data2)).to(device)
                features = network(samples)
                loss = criterion(features)
                validation_loss += loss.item()

        avg_training_loss = training_loss / len(train_dataloader)
        avg_validation_loss = validation_loss / len(validation_dataloader)
        epoch_train_losses.append(avg_training_loss)
        epoch_validation_losses.append(avg_validation_loss)
        # Update scheduler
        scheduler.step()

    # Append this fold loss to the fold lists
    fold_train_loss.append(epoch_train_losses)
    fold_validation_loss.append(epoch_validation_losses)

    # Compute average losses across folds
    avg_train_losses = [sum(x)/len(x) for x in zip(*fold_train_loss)]
    avg_validation_losses = [sum(x)/len(x) for x in zip(*fold_validation_loss)]

    # Store the averaged losses in the losses_dict
    config_key = (cell_subset_size,fold, batch_size, num_layers, hidden_size, learning_rate, weight_decay)
    losses_dict[config_key] = (avg_train_losses, avg_validation_losses)
    # Save the model and representation for this hyperparameter combination
    model_name = f"{layer}_model_{cell_subset_size}_{fold}_{batch_size}_{num_layers}_{hidden_size}_{num_epochs}_{learning_rate}_{weight_decay}.pt"

    save_path = os.path.join(model_folder, model_name)
    torch.save(network.state_dict(), save_path)

    representation = []
    with torch.no_grad():
        representation = network(means_for_all_cells).cpu().detach().numpy()

    # Save the representation as a CSV file
    representation_df = pd.DataFrame(representation)
    representation_name = f"{layer}_representation_{cell_subset_size}_{fold}_{batch_size}_{num_layers}_{hidden_size}_{num_epochs}_{learning_rate}_{weight_decay}.csv"
    representation_path = os.path.join(representation_folder, representation_name)
    representation_df.to_csv(representation_path, index=False)
    layer_losses_dict[layer]=losses_dict
    with open(f"losses_dict.pkl", 'wb') as file:
        pickle.dump(layer_losses_dict, file)
    output = {}
    for key, subdict in layer_losses_dict.items():
        output[key] = {}
        for subkey, (array1, array2) in subdict.items():
            avg1 =  sum(array1) / len(array1)
            avg2 =  sum(array2) / len(array2)
            output[key][subkey] = (avg1, avg2)
            print(output[key][subkey])
    with open(f"avg_losses_dict.pkl", 'wb') as file:
        pickle.dump(output, file)
