In [1]:
import os
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import torch
from torch.utils.data import TensorDataset, DataLoader

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [50]:
from custom_nets.resnet import ResNet, train_model, evaluate
import optuna

## Pre-processing

This is all some quite elaborate steps to load in the data. It definetely does not need to be as difficult as this. The main goal is to have a train, val and test set of features and labels. As long as you have that all correctly defined it should all work a-ok.

In [51]:
microbiome = pd.read_csv('../../data/raw/curated_metagenomics/relative_abundance.csv',index_col=0).transpose()
metadata = pd.read_csv('../../data/raw/curated_metagenomics/metadata.csv',index_col='sample_id',low_memory=False)

For this example we will try to classify disease from healthy based on microbiome. Disease is classified as diseased (according to the original data) and BMI<16 | BMI=>30. These are the boundaries of severe underweight and obesity.

In [52]:
#get stool samples
metadata = metadata.loc[metadata.body_site == 'stool',:]

#Add obesity disease tags to disease BMI
to_change = metadata.BMI>=30
metadata.loc[to_change,'disease'] = 'obesity'

to_change = metadata.BMI<16
metadata.loc[to_change,'disease'] = 'severe_underweight'

# Remove all disease NaNs
metadata = metadata.loc[metadata.disease==metadata.disease,:]

#Take only the adults
to_keep = metadata.age_category != 'newborn'
metadata = metadata.loc[to_keep,:]

# Get the overlapping set of samples between metadata and microbiome data
overlapping_samples = list(set(metadata.index) & set(microbiome.index))
microbiome= microbiome.loc[overlapping_samples,:]
metadata = metadata.loc[overlapping_samples,:]


base_metadata = metadata.loc[metadata.study_name != 'HMP_2019_ibdmdb',:]
base_microbiome = microbiome.loc[base_metadata.index,:]

target_metadata = metadata.loc[metadata.study_name == 'HMP_2019_ibdmdb',:]
target_microbiome = microbiome.loc[target_metadata.index,:]

Here the class labels and feature names are defined.

In [53]:
y = np.asarray( base_metadata.disease != 'healthy',dtype=int)
feature_names = microbiome.columns

## Split data

Here we split the data into the train val and test sets. Since the curated set is rather big, we stick to 2000 test samples and 1000 validation samples. This can of course be tuned.

After splitting the data is transformed to tensors which are moved to the device, the train code assumes all tensors and moves are already moved to the GPU, this is beneficial as it speeds up the loading of the data a lot. But, it is something to be mindful about.

In [54]:
X_train, X_val, y_train, y_val = train_test_split(base_microbiome,y, test_size=2000)

#Transfer to tensors and bring to device
X_train = torch.tensor(np.asarray(X_train), dtype=torch.float32).to(device)
y_train = torch.tensor(y_train, dtype=torch.float32).to(device)

X_val = torch.tensor(np.asarray(X_val), dtype=torch.float32).to(device)
y_val = torch.tensor(y_val, dtype=torch.float32).to(device)

In [55]:
train_loader = DataLoader( TensorDataset(X_train,y_train), batch_size=256, shuffle=True)
val_loader = DataLoader(TensorDataset(X_val,y_val), batch_size=256, shuffle=True,)

dataloaders = {
    'train' : train_loader,
    'val' : val_loader,
}

dataset_sizes = {
    'train' : len(y_train),
    'val' : len(y_val)
}

## Define the objective function for optuna

This is where we define the objective function to be optimized by optuna. This code is also adapted from the sample example as the ResNet code. The trial variables are all optimized in order to maximize validation AUC.


In [56]:
def objective(trial):
    """Define the objective function"""
    residual_dropout_check = trial.suggest_categorical("residual_dropout_check", [True,False])
    residual_dropout = trial.suggest_float('residual_dropout',0,0.5)

    weight_decay_check = trial.suggest_categorical("weight_decay_check", [True,False])
    weight_decay = trial.suggest_float('weight_decay', 1e-6, 1e-3, log=True)

    lr = trial.suggest_float('learning_rate', 1e-4, 1e-2, log=True)

    if residual_dropout_check:
        params = {
            'd_numerical': len(feature_names),
            'd': trial.suggest_int('d', 64, 512),
            'd_hidden_factor' : trial.suggest_int('d_hidden_factor', 1, 4),
            'n_layers' : trial.suggest_int('n_layers', 1, 8),
            'hidden_dropout' : trial.suggest_float('hidden_dropout',0,0.5),
            'residual_dropout' : residual_dropout,
            'd_out':1
        }
    else:
        params = {
            'd_numerical': len(feature_names),
            'd': trial.suggest_int('d', 64, 512),
            'd_hidden_factor' : trial.suggest_int('d_hidden_factor', 1, 4),
            'n_layers' : trial.suggest_int('n_layers', 1, 8),
            'hidden_dropout' : trial.suggest_float('hidden_dropout',0,0.5),
            'residual_dropout' : 0,
            'd_out':1
        }
    
    model = ResNet(**params)
    model = model.to(device)
    criterion = torch.nn.BCEWithLogitsLoss()
    if weight_decay_check:
        optimizer = torch.optim.AdamW(model.parameters(), lr=lr, weight_decay=weight_decay)
    else:
        optimizer = torch.optim.AdamW(model.parameters(), lr=lr)


    model,results_dict = train_model(model, dataloaders, criterion, optimizer, dataset_sizes, phases= ['train','val'])

    return results_dict['best_val_auc']

Optimize the parameters with optuna, the more trials the better. This is of course a trade-off with computational time.

In [57]:
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=500)

[32m[I 2023-05-08 12:16:13,777][0m A new study created in memory with name: no-name-d8340243-c59f-43ed-a9a6-8c8a3b24da65[0m
 10%|▉         | 98/1000 [00:24<03:48,  3.95it/s]
[32m[I 2023-05-08 12:16:38,596][0m Trial 0 finished with value: 0.7127220630645752 and parameters: {'residual_dropout_check': True, 'residual_dropout': 0.04175273632842147, 'weight_decay_check': False, 'weight_decay': 0.00019875907965767722, 'learning_rate': 0.0007761607804378421, 'd': 223, 'd_hidden_factor': 4, 'n_layers': 7, 'hidden_dropout': 0.17815968283737493}. Best is trial 0 with value: 0.7127220630645752.[0m


early stopping...


 10%|▉         | 95/1000 [00:17<02:45,  5.47it/s]
[32m[I 2023-05-08 12:16:55,969][0m Trial 1 finished with value: 0.7261648774147034 and parameters: {'residual_dropout_check': False, 'residual_dropout': 0.03191365987222772, 'weight_decay_check': False, 'weight_decay': 4.432823531787194e-05, 'learning_rate': 0.00012212505001069657, 'd': 438, 'd_hidden_factor': 1, 'n_layers': 4, 'hidden_dropout': 0.18517956292272086}. Best is trial 1 with value: 0.7261648774147034.[0m


early stopping...


  8%|▊         | 77/1000 [00:15<03:10,  4.85it/s]
[32m[I 2023-05-08 12:17:11,852][0m Trial 2 finished with value: 0.7206567525863647 and parameters: {'residual_dropout_check': False, 'residual_dropout': 0.0932470177459912, 'weight_decay_check': False, 'weight_decay': 3.3199484289991568e-06, 'learning_rate': 0.0002535753813506154, 'd': 458, 'd_hidden_factor': 1, 'n_layers': 5, 'hidden_dropout': 0.03955745677796951}. Best is trial 1 with value: 0.7261648774147034.[0m


early stopping...


  7%|▋         | 68/1000 [00:16<03:42,  4.18it/s]
[32m[I 2023-05-08 12:17:28,129][0m Trial 3 finished with value: 0.7135445475578308 and parameters: {'residual_dropout_check': True, 'residual_dropout': 0.3424368957690487, 'weight_decay_check': True, 'weight_decay': 0.000222061584430163, 'learning_rate': 0.002761189856366083, 'd': 129, 'd_hidden_factor': 1, 'n_layers': 6, 'hidden_dropout': 0.3294250951101268}. Best is trial 1 with value: 0.7261648774147034.[0m


early stopping...


  8%|▊         | 78/1000 [00:16<03:16,  4.69it/s]
[32m[I 2023-05-08 12:17:44,780][0m Trial 4 finished with value: 0.7028762102127075 and parameters: {'residual_dropout_check': True, 'residual_dropout': 0.4502059825243959, 'weight_decay_check': False, 'weight_decay': 0.00010287708647863993, 'learning_rate': 0.0065923095975545885, 'd': 420, 'd_hidden_factor': 1, 'n_layers': 5, 'hidden_dropout': 0.09878515597232906}. Best is trial 1 with value: 0.7261648774147034.[0m


early stopping...


  6%|▌         | 56/1000 [00:08<02:28,  6.37it/s]
[32m[I 2023-05-08 12:17:53,580][0m Trial 5 finished with value: 0.7117321491241455 and parameters: {'residual_dropout_check': False, 'residual_dropout': 0.16999732847897564, 'weight_decay_check': False, 'weight_decay': 3.0353896691455658e-05, 'learning_rate': 0.0016296371844104183, 'd': 145, 'd_hidden_factor': 3, 'n_layers': 3, 'hidden_dropout': 0.031590279793642984}. Best is trial 1 with value: 0.7261648774147034.[0m


early stopping...


 10%|█         | 100/1000 [00:28<04:20,  3.45it/s]
[32m[I 2023-05-08 12:18:22,567][0m Trial 6 finished with value: 0.701957106590271 and parameters: {'residual_dropout_check': False, 'residual_dropout': 0.18908845424706255, 'weight_decay_check': False, 'weight_decay': 0.0006204435451834113, 'learning_rate': 0.0011939484024556196, 'd': 434, 'd_hidden_factor': 1, 'n_layers': 8, 'hidden_dropout': 0.01182199120268923}. Best is trial 1 with value: 0.7261648774147034.[0m


early stopping...


  7%|▋         | 67/1000 [00:12<02:48,  5.52it/s]
[32m[I 2023-05-08 12:18:34,727][0m Trial 7 finished with value: 0.7244147062301636 and parameters: {'residual_dropout_check': False, 'residual_dropout': 0.4093455177547336, 'weight_decay_check': True, 'weight_decay': 4.837201591787351e-05, 'learning_rate': 0.0003256166313118961, 'd': 457, 'd_hidden_factor': 3, 'n_layers': 3, 'hidden_dropout': 0.17806414147216448}. Best is trial 1 with value: 0.7261648774147034.[0m


early stopping...


  6%|▌         | 59/1000 [00:15<04:01,  3.89it/s]
[32m[I 2023-05-08 12:18:49,931][0m Trial 8 finished with value: 0.717130720615387 and parameters: {'residual_dropout_check': True, 'residual_dropout': 0.006493037669066837, 'weight_decay_check': False, 'weight_decay': 0.00043260952429649324, 'learning_rate': 0.00041600970357661267, 'd': 489, 'd_hidden_factor': 3, 'n_layers': 5, 'hidden_dropout': 0.30620445303444266}. Best is trial 1 with value: 0.7261648774147034.[0m


early stopping...


 11%|█         | 106/1000 [00:27<03:53,  3.83it/s]
[32m[I 2023-05-08 12:19:17,594][0m Trial 9 finished with value: 0.7259393930435181 and parameters: {'residual_dropout_check': True, 'residual_dropout': 0.3092036930070632, 'weight_decay_check': True, 'weight_decay': 0.0007818552822116465, 'learning_rate': 0.0012534855584600997, 'd': 170, 'd_hidden_factor': 1, 'n_layers': 8, 'hidden_dropout': 0.4568237587916389}. Best is trial 1 with value: 0.7261648774147034.[0m


early stopping...


 10%|▉         | 98/1000 [00:11<01:46,  8.48it/s]
[32m[I 2023-05-08 12:19:29,179][0m Trial 10 finished with value: 0.7306185960769653 and parameters: {'residual_dropout_check': False, 'residual_dropout': 0.11908345874322424, 'weight_decay_check': True, 'weight_decay': 1.2514162794695123e-05, 'learning_rate': 0.00013427266896041826, 'd': 322, 'd_hidden_factor': 2, 'n_layers': 1, 'hidden_dropout': 0.22223125192027104}. Best is trial 10 with value: 0.7306185960769653.[0m


early stopping...


 11%|█         | 112/1000 [00:13<01:45,  8.41it/s]
[32m[I 2023-05-08 12:19:42,524][0m Trial 11 finished with value: 0.7319950461387634 and parameters: {'residual_dropout_check': False, 'residual_dropout': 0.10877779970816032, 'weight_decay_check': True, 'weight_decay': 1.1333208803976601e-05, 'learning_rate': 0.00010988640480042723, 'd': 326, 'd_hidden_factor': 2, 'n_layers': 1, 'hidden_dropout': 0.22136894971939589}. Best is trial 11 with value: 0.7319950461387634.[0m


early stopping...


  9%|▉         | 91/1000 [00:10<01:48,  8.34it/s]
[32m[I 2023-05-08 12:19:53,456][0m Trial 12 finished with value: 0.7401680946350098 and parameters: {'residual_dropout_check': False, 'residual_dropout': 0.14052949391159336, 'weight_decay_check': True, 'weight_decay': 1.1126487673688453e-05, 'learning_rate': 0.00010303207164730228, 'd': 325, 'd_hidden_factor': 2, 'n_layers': 1, 'hidden_dropout': 0.2694821923959966}. Best is trial 12 with value: 0.7401680946350098.[0m


early stopping...


  8%|▊         | 84/1000 [00:09<01:48,  8.45it/s]
[32m[I 2023-05-08 12:20:03,418][0m Trial 13 finished with value: 0.7348940372467041 and parameters: {'residual_dropout_check': False, 'residual_dropout': 0.2378342902067368, 'weight_decay_check': True, 'weight_decay': 6.16037028279846e-06, 'learning_rate': 0.00010253492919932331, 'd': 334, 'd_hidden_factor': 2, 'n_layers': 1, 'hidden_dropout': 0.28272455098017557}. Best is trial 12 with value: 0.7401680946350098.[0m


early stopping...


  9%|▉         | 88/1000 [00:12<02:06,  7.22it/s]
[32m[I 2023-05-08 12:20:15,629][0m Trial 14 finished with value: 0.7301074862480164 and parameters: {'residual_dropout_check': False, 'residual_dropout': 0.2369478228387123, 'weight_decay_check': True, 'weight_decay': 1.0603121982895457e-06, 'learning_rate': 0.00020505374459125578, 'd': 273, 'd_hidden_factor': 2, 'n_layers': 2, 'hidden_dropout': 0.3084697966357775}. Best is trial 12 with value: 0.7401680946350098.[0m


early stopping...


  8%|▊         | 81/1000 [00:11<02:08,  7.14it/s]
[32m[I 2023-05-08 12:20:26,999][0m Trial 15 finished with value: 0.7358947992324829 and parameters: {'residual_dropout_check': False, 'residual_dropout': 0.26419852427715507, 'weight_decay_check': True, 'weight_decay': 8.78178547987337e-06, 'learning_rate': 0.00010167559110284761, 'd': 362, 'd_hidden_factor': 2, 'n_layers': 2, 'hidden_dropout': 0.3855045401870477}. Best is trial 12 with value: 0.7401680946350098.[0m


early stopping...


  6%|▋         | 65/1000 [00:09<02:13,  7.02it/s]
[32m[I 2023-05-08 12:20:36,285][0m Trial 16 finished with value: 0.7239809632301331 and parameters: {'residual_dropout_check': False, 'residual_dropout': 0.2917156418223129, 'weight_decay_check': True, 'weight_decay': 1.8113598823104622e-05, 'learning_rate': 0.0004914215046250176, 'd': 379, 'd_hidden_factor': 2, 'n_layers': 2, 'hidden_dropout': 0.3867808902511405}. Best is trial 12 with value: 0.7401680946350098.[0m


early stopping...


 11%|█         | 112/1000 [00:16<02:14,  6.59it/s]
[32m[I 2023-05-08 12:20:53,299][0m Trial 17 finished with value: 0.7263430953025818 and parameters: {'residual_dropout_check': False, 'residual_dropout': 0.17887049029854896, 'weight_decay_check': True, 'weight_decay': 4.318281778101168e-06, 'learning_rate': 0.00021232198437066258, 'd': 67, 'd_hidden_factor': 4, 'n_layers': 3, 'hidden_dropout': 0.37719724030254753}. Best is trial 12 with value: 0.7401680946350098.[0m


early stopping...


  7%|▋         | 71/1000 [00:09<02:10,  7.12it/s]
[32m[I 2023-05-08 12:21:03,305][0m Trial 18 finished with value: 0.7325490713119507 and parameters: {'residual_dropout_check': False, 'residual_dropout': 0.36431177349377986, 'weight_decay_check': True, 'weight_decay': 7.311961614963367e-06, 'learning_rate': 0.00018614723213656998, 'd': 275, 'd_hidden_factor': 3, 'n_layers': 2, 'hidden_dropout': 0.4953489778211827}. Best is trial 12 with value: 0.7401680946350098.[0m


early stopping...


  6%|▌         | 62/1000 [00:11<02:53,  5.42it/s]
[32m[I 2023-05-08 12:21:14,778][0m Trial 19 finished with value: 0.7226624488830566 and parameters: {'residual_dropout_check': False, 'residual_dropout': 0.2547156023442313, 'weight_decay_check': True, 'weight_decay': 2.0613982198002486e-05, 'learning_rate': 0.0005195623550334237, 'd': 376, 'd_hidden_factor': 2, 'n_layers': 4, 'hidden_dropout': 0.37972431960649644}. Best is trial 12 with value: 0.7401680946350098.[0m


early stopping...


  6%|▌         | 61/1000 [00:08<02:06,  7.43it/s]
[32m[I 2023-05-08 12:21:23,017][0m Trial 20 finished with value: 0.7310416102409363 and parameters: {'residual_dropout_check': False, 'residual_dropout': 0.4948363928323944, 'weight_decay_check': True, 'weight_decay': 2.406620407888622e-06, 'learning_rate': 0.000303126439373493, 'd': 236, 'd_hidden_factor': 3, 'n_layers': 2, 'hidden_dropout': 0.26305618473339987}. Best is trial 12 with value: 0.7401680946350098.[0m


early stopping...


  9%|▉         | 90/1000 [00:10<01:47,  8.46it/s]
[32m[I 2023-05-08 12:21:33,686][0m Trial 21 finished with value: 0.732703685760498 and parameters: {'residual_dropout_check': False, 'residual_dropout': 0.2348178835305228, 'weight_decay_check': True, 'weight_decay': 6.8068576308494375e-06, 'learning_rate': 0.00010223719182829822, 'd': 351, 'd_hidden_factor': 2, 'n_layers': 1, 'hidden_dropout': 0.27341560757378486}. Best is trial 12 with value: 0.7401680946350098.[0m


early stopping...


 10%|█         | 102/1000 [00:12<01:47,  8.36it/s]
[32m[I 2023-05-08 12:21:45,906][0m Trial 22 finished with value: 0.7297531962394714 and parameters: {'residual_dropout_check': False, 'residual_dropout': 0.2822553215960321, 'weight_decay_check': True, 'weight_decay': 7.2915550495734655e-06, 'learning_rate': 0.0001555692567426894, 'd': 315, 'd_hidden_factor': 2, 'n_layers': 1, 'hidden_dropout': 0.35158564247789503}. Best is trial 12 with value: 0.7401680946350098.[0m


early stopping...


  8%|▊         | 80/1000 [00:09<01:49,  8.38it/s]
[32m[I 2023-05-08 12:21:55,479][0m Trial 23 finished with value: 0.74402916431427 and parameters: {'residual_dropout_check': False, 'residual_dropout': 0.2093664628305559, 'weight_decay_check': True, 'weight_decay': 1.9833718716131727e-06, 'learning_rate': 0.00016729970528646192, 'd': 378, 'd_hidden_factor': 2, 'n_layers': 1, 'hidden_dropout': 0.408261819840367}. Best is trial 23 with value: 0.74402916431427.[0m


early stopping...


  7%|▋         | 69/1000 [00:09<02:11,  7.06it/s]
[32m[I 2023-05-08 12:22:05,290][0m Trial 24 finished with value: 0.7383728623390198 and parameters: {'residual_dropout_check': False, 'residual_dropout': 0.19944601414289442, 'weight_decay_check': True, 'weight_decay': 1.5234999256266495e-06, 'learning_rate': 0.00016316432159424837, 'd': 390, 'd_hidden_factor': 2, 'n_layers': 2, 'hidden_dropout': 0.42465388681305183}. Best is trial 23 with value: 0.74402916431427.[0m


early stopping...


 13%|█▎        | 131/1000 [00:22<02:27,  5.89it/s]
[32m[I 2023-05-08 12:22:27,555][0m Trial 25 finished with value: 0.7360665202140808 and parameters: {'residual_dropout_check': False, 'residual_dropout': 0.1932086747359639, 'weight_decay_check': True, 'weight_decay': 1.6958677091955014e-06, 'learning_rate': 0.00015742266175982622, 'd': 405, 'd_hidden_factor': 3, 'n_layers': 3, 'hidden_dropout': 0.4367156068749793}. Best is trial 23 with value: 0.74402916431427.[0m


early stopping...


  7%|▋         | 71/1000 [00:08<01:47,  8.64it/s]
[32m[I 2023-05-08 12:22:35,800][0m Trial 26 finished with value: 0.7277002334594727 and parameters: {'residual_dropout_check': True, 'residual_dropout': 0.14021364514745258, 'weight_decay_check': True, 'weight_decay': 1.6896742373722146e-06, 'learning_rate': 0.00023231604162211674, 'd': 508, 'd_hidden_factor': 2, 'n_layers': 1, 'hidden_dropout': 0.42462325687405533}. Best is trial 23 with value: 0.74402916431427.[0m


early stopping...


  8%|▊         | 84/1000 [00:11<02:08,  7.15it/s]
[32m[I 2023-05-08 12:22:47,586][0m Trial 27 finished with value: 0.7346922159194946 and parameters: {'residual_dropout_check': False, 'residual_dropout': 0.1466259468136747, 'weight_decay_check': True, 'weight_decay': 1.0483046308831168e-06, 'learning_rate': 0.00015890171423418338, 'd': 397, 'd_hidden_factor': 2, 'n_layers': 2, 'hidden_dropout': 0.499402471979962}. Best is trial 23 with value: 0.74402916431427.[0m


early stopping...


  6%|▌         | 60/1000 [00:11<02:55,  5.36it/s]
[32m[I 2023-05-08 12:22:58,807][0m Trial 28 finished with value: 0.7357401251792908 and parameters: {'residual_dropout_check': False, 'residual_dropout': 0.22640224512338242, 'weight_decay_check': True, 'weight_decay': 3.576506286235947e-06, 'learning_rate': 0.0003360598800839839, 'd': 290, 'd_hidden_factor': 3, 'n_layers': 4, 'hidden_dropout': 0.41344153106754783}. Best is trial 23 with value: 0.74402916431427.[0m


early stopping...


 12%|█▏        | 117/1000 [00:13<01:44,  8.42it/s]
[32m[I 2023-05-08 12:23:12,731][0m Trial 29 finished with value: 0.7320530414581299 and parameters: {'residual_dropout_check': True, 'residual_dropout': 0.0637435552000491, 'weight_decay_check': True, 'weight_decay': 2.3187298830050085e-06, 'learning_rate': 0.00016735067291920398, 'd': 234, 'd_hidden_factor': 4, 'n_layers': 1, 'hidden_dropout': 0.3420558472389346}. Best is trial 23 with value: 0.74402916431427.[0m


early stopping...


  7%|▋         | 68/1000 [00:16<03:46,  4.12it/s]
[32m[I 2023-05-08 12:23:29,279][0m Trial 30 finished with value: 0.7261326313018799 and parameters: {'residual_dropout_check': False, 'residual_dropout': 0.20441817108201205, 'weight_decay_check': True, 'weight_decay': 4.062462282216484e-06, 'learning_rate': 0.0002579143364367364, 'd': 302, 'd_hidden_factor': 1, 'n_layers': 7, 'hidden_dropout': 0.4657495578064583}. Best is trial 23 with value: 0.74402916431427.[0m


early stopping...


  6%|▋         | 63/1000 [00:10<02:41,  5.81it/s]
[32m[I 2023-05-08 12:23:40,155][0m Trial 31 finished with value: 0.726407527923584 and parameters: {'residual_dropout_check': False, 'residual_dropout': 0.15717725798170407, 'weight_decay_check': True, 'weight_decay': 1.9676570402891997e-06, 'learning_rate': 0.00014336840350536554, 'd': 402, 'd_hidden_factor': 3, 'n_layers': 3, 'hidden_dropout': 0.4442414126613795}. Best is trial 23 with value: 0.74402916431427.[0m


early stopping...


  6%|▋         | 63/1000 [00:11<02:46,  5.63it/s]
[32m[I 2023-05-08 12:23:51,381][0m Trial 32 finished with value: 0.7299679517745972 and parameters: {'residual_dropout_check': False, 'residual_dropout': 0.18497605040462964, 'weight_decay_check': True, 'weight_decay': 1.52533202976542e-06, 'learning_rate': 0.00015086955644536007, 'd': 408, 'd_hidden_factor': 4, 'n_layers': 3, 'hidden_dropout': 0.41984731361838246}. Best is trial 23 with value: 0.74402916431427.[0m


early stopping...


 10%|▉         | 95/1000 [00:14<02:18,  6.54it/s]
[32m[I 2023-05-08 12:24:05,942][0m Trial 33 finished with value: 0.7276057600975037 and parameters: {'residual_dropout_check': False, 'residual_dropout': 0.20668796143330037, 'weight_decay_check': True, 'weight_decay': 2.686892879813491e-06, 'learning_rate': 0.0002293040418018153, 'd': 469, 'd_hidden_factor': 3, 'n_layers': 2, 'hidden_dropout': 0.46372895309419515}. Best is trial 23 with value: 0.74402916431427.[0m


early stopping...


  7%|▋         | 74/1000 [00:13<02:49,  5.48it/s]
[32m[I 2023-05-08 12:24:19,495][0m Trial 34 finished with value: 0.7304446697235107 and parameters: {'residual_dropout_check': False, 'residual_dropout': 0.1303804847731916, 'weight_decay_check': False, 'weight_decay': 1.5671672524097848e-06, 'learning_rate': 0.0001367317535919714, 'd': 348, 'd_hidden_factor': 2, 'n_layers': 4, 'hidden_dropout': 0.3492677070142649}. Best is trial 23 with value: 0.74402916431427.[0m


early stopping...


  6%|▌         | 57/1000 [00:08<02:17,  6.87it/s]
[32m[I 2023-05-08 12:24:27,821][0m Trial 35 finished with value: 0.7191234827041626 and parameters: {'residual_dropout_check': False, 'residual_dropout': 0.09544600387751846, 'weight_decay_check': True, 'weight_decay': 3.924509716396087e-06, 'learning_rate': 0.0007397671253092827, 'd': 370, 'd_hidden_factor': 3, 'n_layers': 2, 'hidden_dropout': 0.4169359311670256}. Best is trial 23 with value: 0.74402916431427.[0m


early stopping...


  7%|▋         | 72/1000 [00:12<02:36,  5.93it/s]
[32m[I 2023-05-08 12:24:39,994][0m Trial 36 finished with value: 0.7271376252174377 and parameters: {'residual_dropout_check': True, 'residual_dropout': 0.16304056432267477, 'weight_decay_check': False, 'weight_decay': 1.0469908760330546e-06, 'learning_rate': 0.00018188440475245553, 'd': 438, 'd_hidden_factor': 1, 'n_layers': 3, 'hidden_dropout': 0.44115518017802796}. Best is trial 23 with value: 0.74402916431427.[0m


early stopping...


  8%|▊         | 81/1000 [00:09<01:50,  8.35it/s]
[32m[I 2023-05-08 12:24:49,723][0m Trial 37 finished with value: 0.7347824573516846 and parameters: {'residual_dropout_check': False, 'residual_dropout': 0.20948616495658007, 'weight_decay_check': True, 'weight_decay': 2.7140288089284206e-06, 'learning_rate': 0.00026318674194917354, 'd': 431, 'd_hidden_factor': 2, 'n_layers': 1, 'hidden_dropout': 0.32146180421540405}. Best is trial 23 with value: 0.74402916431427.[0m


early stopping...


 10%|█         | 101/1000 [00:15<02:18,  6.51it/s]
[32m[I 2023-05-08 12:25:05,263][0m Trial 38 finished with value: 0.7486675381660461 and parameters: {'residual_dropout_check': False, 'residual_dropout': 0.06944318775083826, 'weight_decay_check': False, 'weight_decay': 4.8166797389757286e-06, 'learning_rate': 0.00012098231698413304, 'd': 260, 'd_hidden_factor': 1, 'n_layers': 3, 'hidden_dropout': 0.39984533490044816}. Best is trial 38 with value: 0.7486675381660461.[0m


early stopping...


  8%|▊         | 84/1000 [00:11<02:02,  7.51it/s]
[32m[I 2023-05-08 12:25:16,477][0m Trial 39 finished with value: 0.7344560027122498 and parameters: {'residual_dropout_check': False, 'residual_dropout': 0.06707654656115346, 'weight_decay_check': False, 'weight_decay': 5.3452386277124976e-06, 'learning_rate': 0.00012500511259164163, 'd': 200, 'd_hidden_factor': 1, 'n_layers': 2, 'hidden_dropout': 0.3563314545029269}. Best is trial 38 with value: 0.7486675381660461.[0m


early stopping...


 13%|█▎        | 128/1000 [00:23<02:39,  5.47it/s]
[32m[I 2023-05-08 12:25:39,886][0m Trial 40 finished with value: 0.7410012483596802 and parameters: {'residual_dropout_check': True, 'residual_dropout': 0.0826229187369496, 'weight_decay_check': False, 'weight_decay': 5.004948276137832e-06, 'learning_rate': 0.00012343630646134682, 'd': 265, 'd_hidden_factor': 1, 'n_layers': 4, 'hidden_dropout': 0.40656037907241366}. Best is trial 38 with value: 0.7486675381660461.[0m


early stopping...


 11%|█▏        | 113/1000 [00:20<02:40,  5.52it/s]
[32m[I 2023-05-08 12:26:00,376][0m Trial 41 finished with value: 0.7447420954704285 and parameters: {'residual_dropout_check': True, 'residual_dropout': 0.0790138167265689, 'weight_decay_check': False, 'weight_decay': 5.24573817645222e-06, 'learning_rate': 0.00012276697532316862, 'd': 259, 'd_hidden_factor': 1, 'n_layers': 4, 'hidden_dropout': 0.40093503760089294}. Best is trial 38 with value: 0.7486675381660461.[0m


early stopping...


  8%|▊         | 81/1000 [00:16<03:02,  5.04it/s]
[32m[I 2023-05-08 12:26:16,482][0m Trial 42 finished with value: 0.7265105843544006 and parameters: {'residual_dropout_check': True, 'residual_dropout': 0.03861136450792224, 'weight_decay_check': False, 'weight_decay': 4.9329916733966255e-06, 'learning_rate': 0.00012413451044794655, 'd': 263, 'd_hidden_factor': 1, 'n_layers': 5, 'hidden_dropout': 0.3294784907223337}. Best is trial 38 with value: 0.7486675381660461.[0m


early stopping...


 12%|█▏        | 123/1000 [00:25<03:04,  4.75it/s]
[32m[I 2023-05-08 12:26:42,429][0m Trial 43 finished with value: 0.7352634072303772 and parameters: {'residual_dropout_check': True, 'residual_dropout': 0.08023665844274909, 'weight_decay_check': False, 'weight_decay': 3.3796387537089315e-06, 'learning_rate': 0.00012231740366508133, 'd': 210, 'd_hidden_factor': 1, 'n_layers': 6, 'hidden_dropout': 0.3959628445176928}. Best is trial 38 with value: 0.7486675381660461.[0m


early stopping...


 12%|█▏        | 117/1000 [00:20<02:35,  5.68it/s]
[32m[I 2023-05-08 12:27:03,057][0m Trial 44 finished with value: 0.7312090992927551 and parameters: {'residual_dropout_check': True, 'residual_dropout': 0.10626859894655921, 'weight_decay_check': False, 'weight_decay': 5.444378361242369e-06, 'learning_rate': 0.00019007737230597465, 'd': 249, 'd_hidden_factor': 1, 'n_layers': 4, 'hidden_dropout': 0.3702519881901713}. Best is trial 38 with value: 0.7486675381660461.[0m


early stopping...


 11%|█▏        | 114/1000 [00:22<02:54,  5.08it/s]
[32m[I 2023-05-08 12:27:25,510][0m Trial 45 finished with value: 0.7411860227584839 and parameters: {'residual_dropout_check': True, 'residual_dropout': 0.010485401905779207, 'weight_decay_check': False, 'weight_decay': 1.0572383869319095e-05, 'learning_rate': 0.00011743627504934013, 'd': 176, 'd_hidden_factor': 1, 'n_layers': 5, 'hidden_dropout': 0.4042129517718678}. Best is trial 38 with value: 0.7486675381660461.[0m


early stopping...


 15%|█▍        | 146/1000 [00:28<02:46,  5.13it/s]
[32m[I 2023-05-08 12:27:54,023][0m Trial 46 finished with value: 0.7414994835853577 and parameters: {'residual_dropout_check': True, 'residual_dropout': 0.009099611956206876, 'weight_decay_check': False, 'weight_decay': 8.669699040274304e-06, 'learning_rate': 0.00012906871602560012, 'd': 175, 'd_hidden_factor': 1, 'n_layers': 5, 'hidden_dropout': 0.40800739674399683}. Best is trial 38 with value: 0.7486675381660461.[0m


early stopping...


 10%|█         | 105/1000 [00:20<02:55,  5.09it/s]
[32m[I 2023-05-08 12:28:14,687][0m Trial 47 finished with value: 0.730650782585144 and parameters: {'residual_dropout_check': True, 'residual_dropout': 0.012682641542712494, 'weight_decay_check': False, 'weight_decay': 1.4842085756029815e-05, 'learning_rate': 0.0002096736727485965, 'd': 166, 'd_hidden_factor': 1, 'n_layers': 5, 'hidden_dropout': 0.3948351132876182}. Best is trial 38 with value: 0.7486675381660461.[0m


early stopping...


 12%|█▏        | 115/1000 [00:25<03:14,  4.54it/s]
[32m[I 2023-05-08 12:28:40,050][0m Trial 48 finished with value: 0.7451200485229492 and parameters: {'residual_dropout_check': True, 'residual_dropout': 0.0005889471036048671, 'weight_decay_check': False, 'weight_decay': 8.91936691711362e-06, 'learning_rate': 0.00010227463292389674, 'd': 112, 'd_hidden_factor': 1, 'n_layers': 6, 'hidden_dropout': 0.36113004042529495}. Best is trial 38 with value: 0.7486675381660461.[0m


early stopping...


 15%|█▍        | 149/1000 [00:32<03:05,  4.58it/s]
[32m[I 2023-05-08 12:29:12,590][0m Trial 49 finished with value: 0.7352548837661743 and parameters: {'residual_dropout_check': True, 'residual_dropout': 0.04924328817585369, 'weight_decay_check': False, 'weight_decay': 9.967627977864536e-06, 'learning_rate': 0.0002861020028701855, 'd': 77, 'd_hidden_factor': 1, 'n_layers': 6, 'hidden_dropout': 0.37123870904777534}. Best is trial 38 with value: 0.7486675381660461.[0m


early stopping...


 10%|█         | 101/1000 [00:22<03:22,  4.44it/s]
[32m[I 2023-05-08 12:29:35,351][0m Trial 50 finished with value: 0.7327380776405334 and parameters: {'residual_dropout_check': True, 'residual_dropout': 0.025541281605102273, 'weight_decay_check': False, 'weight_decay': 7.820324496838342e-06, 'learning_rate': 0.00010146020951431912, 'd': 99, 'd_hidden_factor': 1, 'n_layers': 6, 'hidden_dropout': 0.30449434305817946}. Best is trial 38 with value: 0.7486675381660461.[0m


early stopping...


 13%|█▎        | 132/1000 [00:26<02:51,  5.06it/s]
[32m[I 2023-05-08 12:30:01,459][0m Trial 51 finished with value: 0.7388453483581543 and parameters: {'residual_dropout_check': True, 'residual_dropout': 0.0012943189173149619, 'weight_decay_check': False, 'weight_decay': 9.580784984397872e-06, 'learning_rate': 0.00012800190606758612, 'd': 128, 'd_hidden_factor': 1, 'n_layers': 5, 'hidden_dropout': 0.3991127533884796}. Best is trial 38 with value: 0.7486675381660461.[0m


early stopping...


 15%|█▌        | 154/1000 [00:39<03:37,  3.89it/s]
[32m[I 2023-05-08 12:30:41,058][0m Trial 52 finished with value: 0.7313637137413025 and parameters: {'residual_dropout_check': True, 'residual_dropout': 0.029913782753951637, 'weight_decay_check': False, 'weight_decay': 1.2721253671350668e-05, 'learning_rate': 0.00011952933993780679, 'd': 137, 'd_hidden_factor': 1, 'n_layers': 7, 'hidden_dropout': 0.3608341004685992}. Best is trial 38 with value: 0.7486675381660461.[0m


early stopping...


 10%|█         | 100/1000 [00:19<02:56,  5.11it/s]
[32m[I 2023-05-08 12:31:00,661][0m Trial 53 finished with value: 0.7363070249557495 and parameters: {'residual_dropout_check': True, 'residual_dropout': 0.05235676030074928, 'weight_decay_check': False, 'weight_decay': 2.36757419531143e-05, 'learning_rate': 0.00018386553956771263, 'd': 170, 'd_hidden_factor': 1, 'n_layers': 5, 'hidden_dropout': 0.39366792534006345}. Best is trial 38 with value: 0.7486675381660461.[0m


early stopping...


 10%|▉         | 98/1000 [00:22<03:31,  4.27it/s]
[32m[I 2023-05-08 12:31:23,639][0m Trial 54 finished with value: 0.7351903915405273 and parameters: {'residual_dropout_check': True, 'residual_dropout': 0.0001867006280247968, 'weight_decay_check': False, 'weight_decay': 1.5435259883692613e-05, 'learning_rate': 0.00014165739811004577, 'd': 101, 'd_hidden_factor': 1, 'n_layers': 6, 'hidden_dropout': 0.3733797216839023}. Best is trial 38 with value: 0.7486675381660461.[0m


early stopping...


 10%|▉         | 96/1000 [00:19<03:02,  4.95it/s]
[32m[I 2023-05-08 12:31:43,075][0m Trial 55 finished with value: 0.731282114982605 and parameters: {'residual_dropout_check': True, 'residual_dropout': 0.02533955315475006, 'weight_decay_check': False, 'weight_decay': 9.566261577194403e-06, 'learning_rate': 0.00010525338670848765, 'd': 187, 'd_hidden_factor': 1, 'n_layers': 5, 'hidden_dropout': 0.47722092929995363}. Best is trial 38 with value: 0.7486675381660461.[0m


early stopping...


  8%|▊         | 80/1000 [00:17<03:26,  4.46it/s]
[32m[I 2023-05-08 12:32:01,061][0m Trial 56 finished with value: 0.7382612228393555 and parameters: {'residual_dropout_check': True, 'residual_dropout': 0.04575444147640408, 'weight_decay_check': False, 'weight_decay': 6.266034069441802e-06, 'learning_rate': 0.0002198535690398172, 'd': 153, 'd_hidden_factor': 1, 'n_layers': 6, 'hidden_dropout': 0.4450674003413478}. Best is trial 38 with value: 0.7486675381660461.[0m


early stopping...


 10%|▉         | 95/1000 [00:16<02:40,  5.64it/s]
[32m[I 2023-05-08 12:32:17,927][0m Trial 57 finished with value: 0.7394121885299683 and parameters: {'residual_dropout_check': True, 'residual_dropout': 0.01715954047309251, 'weight_decay_check': False, 'weight_decay': 4.019600680939076e-05, 'learning_rate': 0.00018541753826335493, 'd': 216, 'd_hidden_factor': 1, 'n_layers': 4, 'hidden_dropout': 0.4310742355086082}. Best is trial 38 with value: 0.7486675381660461.[0m


early stopping...


  9%|▉         | 92/1000 [00:18<03:00,  5.04it/s]
[32m[I 2023-05-08 12:32:36,215][0m Trial 58 finished with value: 0.7303115129470825 and parameters: {'residual_dropout_check': True, 'residual_dropout': 0.06312634929369576, 'weight_decay_check': False, 'weight_decay': 8.151714869958584e-06, 'learning_rate': 0.00014238062153133375, 'd': 124, 'd_hidden_factor': 1, 'n_layers': 5, 'hidden_dropout': 0.4054679433004702}. Best is trial 38 with value: 0.7486675381660461.[0m


early stopping...


 14%|█▍        | 145/1000 [00:38<03:48,  3.74it/s]
[32m[I 2023-05-08 12:33:15,043][0m Trial 59 finished with value: 0.7348855137825012 and parameters: {'residual_dropout_check': True, 'residual_dropout': 0.03532046018862166, 'weight_decay_check': False, 'weight_decay': 3.2487810575152607e-06, 'learning_rate': 0.00010038761529024162, 'd': 111, 'd_hidden_factor': 1, 'n_layers': 7, 'hidden_dropout': 0.3833086800425443}. Best is trial 38 with value: 0.7486675381660461.[0m


early stopping...


 13%|█▎        | 128/1000 [00:28<03:14,  4.48it/s]
[32m[I 2023-05-08 12:33:43,627][0m Trial 60 finished with value: 0.734215497970581 and parameters: {'residual_dropout_check': True, 'residual_dropout': 0.019637459441699506, 'weight_decay_check': False, 'weight_decay': 1.1616530136919752e-05, 'learning_rate': 0.00012030020563462463, 'd': 190, 'd_hidden_factor': 1, 'n_layers': 6, 'hidden_dropout': 0.4587739953884713}. Best is trial 38 with value: 0.7486675381660461.[0m


early stopping...


  9%|▉         | 93/1000 [00:17<02:50,  5.34it/s]
[32m[I 2023-05-08 12:34:01,090][0m Trial 61 finished with value: 0.725801944732666 and parameters: {'residual_dropout_check': True, 'residual_dropout': 0.08245725627331064, 'weight_decay_check': False, 'weight_decay': 6.18285544702339e-06, 'learning_rate': 0.00011748415229680012, 'd': 254, 'd_hidden_factor': 1, 'n_layers': 4, 'hidden_dropout': 0.4080271666977665}. Best is trial 38 with value: 0.7486675381660461.[0m


early stopping...


  8%|▊         | 79/1000 [00:14<02:48,  5.46it/s]
[32m[I 2023-05-08 12:34:15,591][0m Trial 62 finished with value: 0.7328025102615356 and parameters: {'residual_dropout_check': True, 'residual_dropout': 0.08445297522868353, 'weight_decay_check': False, 'weight_decay': 4.964789274425533e-06, 'learning_rate': 0.00015953687084025462, 'd': 303, 'd_hidden_factor': 1, 'n_layers': 4, 'hidden_dropout': 0.40632453358779985}. Best is trial 38 with value: 0.7486675381660461.[0m


early stopping...


  8%|▊         | 80/1000 [00:16<03:10,  4.83it/s]
[32m[I 2023-05-08 12:34:32,182][0m Trial 63 finished with value: 0.7325104475021362 and parameters: {'residual_dropout_check': True, 'residual_dropout': 0.12269720352928133, 'weight_decay_check': False, 'weight_decay': 4.978238207058557e-06, 'learning_rate': 0.00013271117399863387, 'd': 286, 'd_hidden_factor': 1, 'n_layers': 5, 'hidden_dropout': 0.42948346530571085}. Best is trial 38 with value: 0.7486675381660461.[0m


early stopping...


  7%|▋         | 69/1000 [00:12<02:46,  5.58it/s]
[32m[I 2023-05-08 12:34:44,587][0m Trial 64 finished with value: 0.7256430387496948 and parameters: {'residual_dropout_check': True, 'residual_dropout': 0.10357964364799169, 'weight_decay_check': False, 'weight_decay': 7.976359554035574e-06, 'learning_rate': 0.00017265295986940932, 'd': 236, 'd_hidden_factor': 1, 'n_layers': 4, 'hidden_dropout': 0.3663480556756171}. Best is trial 38 with value: 0.7486675381660461.[0m


early stopping...


 10%|█         | 104/1000 [00:20<02:59,  5.00it/s]
[32m[I 2023-05-08 12:35:05,416][0m Trial 65 finished with value: 0.7344946265220642 and parameters: {'residual_dropout_check': True, 'residual_dropout': 0.07145882754453904, 'weight_decay_check': False, 'weight_decay': 4.105248506490547e-06, 'learning_rate': 0.00011490105900753824, 'd': 225, 'd_hidden_factor': 1, 'n_layers': 5, 'hidden_dropout': 0.3448935575475749}. Best is trial 38 with value: 0.7486675381660461.[0m


early stopping...


 17%|█▋        | 171/1000 [00:27<02:12,  6.26it/s]
[32m[I 2023-05-08 12:35:32,764][0m Trial 66 finished with value: 0.7381710410118103 and parameters: {'residual_dropout_check': True, 'residual_dropout': 0.04998643933435077, 'weight_decay_check': False, 'weight_decay': 3.1388541905884395e-06, 'learning_rate': 0.00014018179640560454, 'd': 157, 'd_hidden_factor': 1, 'n_layers': 3, 'hidden_dropout': 0.38699710862020265}. Best is trial 38 with value: 0.7486675381660461.[0m


early stopping...


 10%|▉         | 99/1000 [00:18<02:46,  5.41it/s]
[32m[I 2023-05-08 12:35:51,092][0m Trial 67 finished with value: 0.7315484285354614 and parameters: {'residual_dropout_check': True, 'residual_dropout': 0.0003662840204065095, 'weight_decay_check': False, 'weight_decay': 6.543628164249699e-06, 'learning_rate': 0.00020181159217645147, 'd': 273, 'd_hidden_factor': 1, 'n_layers': 4, 'hidden_dropout': 0.4480471850825238}. Best is trial 38 with value: 0.7486675381660461.[0m


early stopping...


  7%|▋         | 72/1000 [00:19<04:15,  3.63it/s]
[32m[I 2023-05-08 12:36:10,991][0m Trial 68 finished with value: 0.7349241375923157 and parameters: {'residual_dropout_check': True, 'residual_dropout': 0.11293817996082034, 'weight_decay_check': False, 'weight_decay': 1.3231823225612102e-05, 'learning_rate': 0.0001646815283034392, 'd': 248, 'd_hidden_factor': 2, 'n_layers': 8, 'hidden_dropout': 0.41538788340980876}. Best is trial 38 with value: 0.7486675381660461.[0m


early stopping...


  8%|▊         | 76/1000 [00:15<03:07,  4.94it/s]
[32m[I 2023-05-08 12:36:26,407][0m Trial 69 finished with value: 0.7425817847251892 and parameters: {'residual_dropout_check': True, 'residual_dropout': 0.057992216758520045, 'weight_decay_check': False, 'weight_decay': 1.8377989430527493e-05, 'learning_rate': 0.00024524364113275406, 'd': 179, 'd_hidden_factor': 1, 'n_layers': 5, 'hidden_dropout': 0.4207476864756371}. Best is trial 38 with value: 0.7486675381660461.[0m


early stopping...


 12%|█▎        | 125/1000 [00:25<03:00,  4.86it/s]
[32m[I 2023-05-08 12:36:52,182][0m Trial 70 finished with value: 0.7269314527511597 and parameters: {'residual_dropout_check': True, 'residual_dropout': 0.03652159595223862, 'weight_decay_check': False, 'weight_decay': 1.7933400600579228e-05, 'learning_rate': 0.0002358366673060231, 'd': 180, 'd_hidden_factor': 2, 'n_layers': 5, 'hidden_dropout': 0.43002139949884544}. Best is trial 38 with value: 0.7486675381660461.[0m


early stopping...


  8%|▊         | 84/1000 [00:18<03:23,  4.50it/s]
[32m[I 2023-05-08 12:37:10,901][0m Trial 71 finished with value: 0.7399920225143433 and parameters: {'residual_dropout_check': True, 'residual_dropout': 0.09111313504346706, 'weight_decay_check': False, 'weight_decay': 9.326923093365518e-06, 'learning_rate': 0.00011582622999768326, 'd': 197, 'd_hidden_factor': 1, 'n_layers': 6, 'hidden_dropout': 0.3838690538486739}. Best is trial 38 with value: 0.7486675381660461.[0m


early stopping...


 11%|█         | 111/1000 [00:22<02:59,  4.95it/s]
[32m[I 2023-05-08 12:37:33,372][0m Trial 72 finished with value: 0.7266780734062195 and parameters: {'residual_dropout_check': True, 'residual_dropout': 0.06048932463756634, 'weight_decay_check': False, 'weight_decay': 2.4178770541860777e-05, 'learning_rate': 0.0001482424864543238, 'd': 147, 'd_hidden_factor': 1, 'n_layers': 5, 'hidden_dropout': 0.41568514673908646}. Best is trial 38 with value: 0.7486675381660461.[0m


early stopping...


  8%|▊         | 79/1000 [00:14<02:48,  5.46it/s]
[32m[I 2023-05-08 12:37:47,867][0m Trial 73 finished with value: 0.7343271374702454 and parameters: {'residual_dropout_check': True, 'residual_dropout': 0.011653249231761937, 'weight_decay_check': False, 'weight_decay': 1.1180306024844644e-05, 'learning_rate': 0.00011062530937047708, 'd': 211, 'd_hidden_factor': 1, 'n_layers': 4, 'hidden_dropout': 0.4017339771150094}. Best is trial 38 with value: 0.7486675381660461.[0m


early stopping...


 13%|█▎        | 132/1000 [00:21<02:23,  6.06it/s]
[32m[I 2023-05-08 12:38:09,663][0m Trial 74 finished with value: 0.7279386520385742 and parameters: {'residual_dropout_check': True, 'residual_dropout': 0.07274287341666559, 'weight_decay_check': False, 'weight_decay': 7.093663425576622e-06, 'learning_rate': 0.00017215416857708848, 'd': 81, 'd_hidden_factor': 1, 'n_layers': 3, 'hidden_dropout': 0.43573051214465547}. Best is trial 38 with value: 0.7486675381660461.[0m


early stopping...


 11%|█         | 111/1000 [00:21<02:50,  5.21it/s]
[32m[I 2023-05-08 12:38:30,998][0m Trial 75 finished with value: 0.7287696599960327 and parameters: {'residual_dropout_check': True, 'residual_dropout': 0.04020063058406026, 'weight_decay_check': False, 'weight_decay': 4.584633509285634e-06, 'learning_rate': 0.00013057573382507586, 'd': 285, 'd_hidden_factor': 1, 'n_layers': 4, 'hidden_dropout': 0.3594554758239285}. Best is trial 38 with value: 0.7486675381660461.[0m


early stopping...


  9%|▉         | 89/1000 [00:18<03:09,  4.80it/s]
[32m[I 2023-05-08 12:38:49,576][0m Trial 76 finished with value: 0.740301251411438 and parameters: {'residual_dropout_check': False, 'residual_dropout': 0.019246167631774988, 'weight_decay_check': False, 'weight_decay': 6.0237021929980885e-06, 'learning_rate': 0.00010108204318387768, 'd': 263, 'd_hidden_factor': 1, 'n_layers': 5, 'hidden_dropout': 0.46813247491338156}. Best is trial 38 with value: 0.7486675381660461.[0m


early stopping...


 12%|█▏        | 116/1000 [00:27<03:31,  4.18it/s]
[32m[I 2023-05-08 12:39:17,388][0m Trial 77 finished with value: 0.737226128578186 and parameters: {'residual_dropout_check': True, 'residual_dropout': 0.09674611863751426, 'weight_decay_check': False, 'weight_decay': 3.878729094763418e-06, 'learning_rate': 0.0001524461939462288, 'd': 307, 'd_hidden_factor': 1, 'n_layers': 6, 'hidden_dropout': 0.4839619175513848}. Best is trial 38 with value: 0.7486675381660461.[0m


early stopping...


  8%|▊         | 84/1000 [00:14<02:33,  5.96it/s]
[32m[I 2023-05-08 12:39:31,525][0m Trial 78 finished with value: 0.746825098991394 and parameters: {'residual_dropout_check': False, 'residual_dropout': 0.057142584999236794, 'weight_decay_check': False, 'weight_decay': 1.5775803796710097e-05, 'learning_rate': 0.00020236934171533145, 'd': 347, 'd_hidden_factor': 1, 'n_layers': 3, 'hidden_dropout': 0.4195607862861634}. Best is trial 38 with value: 0.7486675381660461.[0m


early stopping...


  9%|▉         | 90/1000 [00:15<02:32,  5.98it/s]
[32m[I 2023-05-08 12:39:46,617][0m Trial 79 finished with value: 0.7343571782112122 and parameters: {'residual_dropout_check': False, 'residual_dropout': 0.05611057549621991, 'weight_decay_check': False, 'weight_decay': 1.6897011008623444e-05, 'learning_rate': 0.0002011998429825019, 'd': 351, 'd_hidden_factor': 1, 'n_layers': 3, 'hidden_dropout': 0.4527695119045489}. Best is trial 38 with value: 0.7486675381660461.[0m


early stopping...


  7%|▋         | 71/1000 [00:12<02:41,  5.75it/s]
[32m[I 2023-05-08 12:39:59,011][0m Trial 80 finished with value: 0.7397686839103699 and parameters: {'residual_dropout_check': False, 'residual_dropout': 0.032438381576554084, 'weight_decay_check': False, 'weight_decay': 1.3923723465583502e-05, 'learning_rate': 0.00032888443462804985, 'd': 382, 'd_hidden_factor': 2, 'n_layers': 3, 'hidden_dropout': 0.42128000313008934}. Best is trial 38 with value: 0.7486675381660461.[0m


early stopping...


 11%|█▏        | 114/1000 [00:20<02:42,  5.45it/s]
[32m[I 2023-05-08 12:40:19,947][0m Trial 81 finished with value: 0.7376513481140137 and parameters: {'residual_dropout_check': False, 'residual_dropout': 0.07249753196552121, 'weight_decay_check': False, 'weight_decay': 1.0989429347658196e-05, 'learning_rate': 0.0001377635275052678, 'd': 231, 'd_hidden_factor': 1, 'n_layers': 4, 'hidden_dropout': 0.38755782939778394}. Best is trial 38 with value: 0.7486675381660461.[0m


early stopping...


  7%|▋         | 69/1000 [00:14<03:17,  4.72it/s]
[32m[I 2023-05-08 12:40:34,600][0m Trial 82 finished with value: 0.7230661511421204 and parameters: {'residual_dropout_check': False, 'residual_dropout': 0.04668296158166967, 'weight_decay_check': False, 'weight_decay': 8.408918279978068e-06, 'learning_rate': 0.0002578850729056799, 'd': 331, 'd_hidden_factor': 1, 'n_layers': 5, 'hidden_dropout': 0.40750935731696414}. Best is trial 38 with value: 0.7486675381660461.[0m


early stopping...


  9%|▉         | 89/1000 [00:14<02:27,  6.16it/s]
[32m[I 2023-05-08 12:40:49,068][0m Trial 83 finished with value: 0.7413148283958435 and parameters: {'residual_dropout_check': False, 'residual_dropout': 0.011776919862549987, 'weight_decay_check': False, 'weight_decay': 1.950744056782918e-05, 'learning_rate': 0.0001673315149967112, 'd': 174, 'd_hidden_factor': 1, 'n_layers': 3, 'hidden_dropout': 0.43864104855843494}. Best is trial 38 with value: 0.7486675381660461.[0m


early stopping...


 12%|█▏        | 123/1000 [00:19<02:20,  6.22it/s]
[32m[I 2023-05-08 12:41:08,860][0m Trial 84 finished with value: 0.7373248934745789 and parameters: {'residual_dropout_check': False, 'residual_dropout': 0.0076233472856766255, 'weight_decay_check': False, 'weight_decay': 2.0109215179235246e-05, 'learning_rate': 0.00017588072568300688, 'd': 118, 'd_hidden_factor': 1, 'n_layers': 3, 'hidden_dropout': 0.44347405212282887}. Best is trial 38 with value: 0.7486675381660461.[0m


early stopping...


  7%|▋         | 74/1000 [00:10<02:12,  6.99it/s]
[32m[I 2023-05-08 12:41:19,482][0m Trial 85 finished with value: 0.7299593091011047 and parameters: {'residual_dropout_check': False, 'residual_dropout': 0.024610679313681386, 'weight_decay_check': False, 'weight_decay': 1.6108826201682395e-05, 'learning_rate': 0.00022461213824841945, 'd': 177, 'd_hidden_factor': 1, 'n_layers': 2, 'hidden_dropout': 0.4263541490416397}. Best is trial 38 with value: 0.7486675381660461.[0m


early stopping...


  7%|▋         | 68/1000 [00:11<02:36,  5.97it/s]
[32m[I 2023-05-08 12:41:30,915][0m Trial 86 finished with value: 0.7453175783157349 and parameters: {'residual_dropout_check': False, 'residual_dropout': 0.05936579784514807, 'weight_decay_check': False, 'weight_decay': 2.895977503275869e-05, 'learning_rate': 0.00020199190511264182, 'd': 364, 'd_hidden_factor': 1, 'n_layers': 3, 'hidden_dropout': 0.37254411262259124}. Best is trial 38 with value: 0.7486675381660461.[0m


early stopping...


  8%|▊         | 83/1000 [00:14<02:34,  5.92it/s]
[32m[I 2023-05-08 12:41:44,968][0m Trial 87 finished with value: 0.7332104444503784 and parameters: {'residual_dropout_check': False, 'residual_dropout': 0.05309592536106789, 'weight_decay_check': False, 'weight_decay': 2.8977945475537147e-05, 'learning_rate': 0.0002948425030257873, 'd': 418, 'd_hidden_factor': 1, 'n_layers': 3, 'hidden_dropout': 0.3681602753562131}. Best is trial 38 with value: 0.7486675381660461.[0m


early stopping...


 10%|▉         | 96/1000 [00:15<02:26,  6.16it/s]
[32m[I 2023-05-08 12:42:00,579][0m Trial 88 finished with value: 0.7244576811790466 and parameters: {'residual_dropout_check': False, 'residual_dropout': 0.13210367123362915, 'weight_decay_check': False, 'weight_decay': 3.302865971102478e-05, 'learning_rate': 0.00019722871556068322, 'd': 339, 'd_hidden_factor': 1, 'n_layers': 3, 'hidden_dropout': 0.3765805803265795}. Best is trial 38 with value: 0.7486675381660461.[0m


early stopping...


  7%|▋         | 68/1000 [00:09<02:13,  6.99it/s]
[32m[I 2023-05-08 12:42:10,345][0m Trial 89 finished with value: 0.7327810525894165 and parameters: {'residual_dropout_check': False, 'residual_dropout': 0.06161640819620082, 'weight_decay_check': False, 'weight_decay': 2.0975102754490438e-05, 'learning_rate': 0.00023800892092628508, 'd': 364, 'd_hidden_factor': 1, 'n_layers': 2, 'hidden_dropout': 0.45242798281118723}. Best is trial 38 with value: 0.7486675381660461.[0m


early stopping...


  6%|▌         | 61/1000 [00:10<02:34,  6.07it/s]
[32m[I 2023-05-08 12:42:20,432][0m Trial 90 finished with value: 0.7264547348022461 and parameters: {'residual_dropout_check': False, 'residual_dropout': 0.03745367570614352, 'weight_decay_check': False, 'weight_decay': 1.247192297865798e-05, 'learning_rate': 0.0003628254615344644, 'd': 386, 'd_hidden_factor': 2, 'n_layers': 3, 'hidden_dropout': 0.3907910345087777}. Best is trial 38 with value: 0.7486675381660461.[0m


early stopping...


 13%|█▎        | 133/1000 [00:20<02:13,  6.48it/s]
[32m[I 2023-05-08 12:42:40,984][0m Trial 91 finished with value: 0.7276444435119629 and parameters: {'residual_dropout_check': False, 'residual_dropout': 0.011344909355717917, 'weight_decay_check': False, 'weight_decay': 1.5485096766939332e-05, 'learning_rate': 0.00015701894322834586, 'd': 161, 'd_hidden_factor': 1, 'n_layers': 3, 'hidden_dropout': 0.3949898423055739}. Best is trial 38 with value: 0.7486675381660461.[0m


early stopping...


 10%|▉         | 97/1000 [00:13<02:02,  7.36it/s]
[32m[I 2023-05-08 12:42:54,199][0m Trial 92 finished with value: 0.7378832101821899 and parameters: {'residual_dropout_check': False, 'residual_dropout': 0.026976086286959924, 'weight_decay_check': False, 'weight_decay': 1.0391082020528643e-05, 'learning_rate': 0.0001103517679324778, 'd': 136, 'd_hidden_factor': 1, 'n_layers': 2, 'hidden_dropout': 0.4181613113202988}. Best is trial 38 with value: 0.7486675381660461.[0m


early stopping...


  8%|▊         | 81/1000 [00:19<03:44,  4.10it/s]
[32m[I 2023-05-08 12:43:13,995][0m Trial 93 finished with value: 0.7404859066009521 and parameters: {'residual_dropout_check': False, 'residual_dropout': 0.013092126388703138, 'weight_decay_check': False, 'weight_decay': 1.8596940417092065e-05, 'learning_rate': 0.00013037168095509302, 'd': 317, 'd_hidden_factor': 1, 'n_layers': 7, 'hidden_dropout': 0.37772485441732606}. Best is trial 38 with value: 0.7486675381660461.[0m


early stopping...


 11%|█         | 108/1000 [00:21<02:53,  5.13it/s]
[32m[I 2023-05-08 12:43:35,078][0m Trial 94 finished with value: 0.7358882427215576 and parameters: {'residual_dropout_check': False, 'residual_dropout': 0.042690912410326326, 'weight_decay_check': False, 'weight_decay': 1.3963689729448599e-05, 'learning_rate': 0.00018336627412287082, 'd': 200, 'd_hidden_factor': 1, 'n_layers': 5, 'hidden_dropout': 0.4322345266093755}. Best is trial 38 with value: 0.7486675381660461.[0m


early stopping...


  6%|▋         | 64/1000 [00:10<02:28,  6.31it/s]
[32m[I 2023-05-08 12:43:45,256][0m Trial 95 finished with value: 0.7323215007781982 and parameters: {'residual_dropout_check': False, 'residual_dropout': 0.02368098946797524, 'weight_decay_check': True, 'weight_decay': 9.210648567352854e-06, 'learning_rate': 0.00014665610741018312, 'd': 344, 'd_hidden_factor': 1, 'n_layers': 3, 'hidden_dropout': 0.4013558181666945}. Best is trial 38 with value: 0.7486675381660461.[0m


early stopping...


  9%|▉         | 88/1000 [00:15<02:44,  5.53it/s]
[32m[I 2023-05-08 12:44:01,205][0m Trial 96 finished with value: 0.7356628179550171 and parameters: {'residual_dropout_check': False, 'residual_dropout': 0.07422942386701673, 'weight_decay_check': False, 'weight_decay': 1.2223649066965898e-05, 'learning_rate': 0.00020834933991400044, 'd': 354, 'd_hidden_factor': 1, 'n_layers': 4, 'hidden_dropout': 0.4390603900622954}. Best is trial 38 with value: 0.7486675381660461.[0m


early stopping...


  7%|▋         | 70/1000 [00:08<01:53,  8.22it/s]
[32m[I 2023-05-08 12:44:09,749][0m Trial 97 finished with value: 0.7338547706604004 and parameters: {'residual_dropout_check': False, 'residual_dropout': 0.0007487766651656964, 'weight_decay_check': False, 'weight_decay': 7.309946530861361e-06, 'learning_rate': 0.00016559241487982724, 'd': 368, 'd_hidden_factor': 1, 'n_layers': 1, 'hidden_dropout': 0.3379781142787588}. Best is trial 38 with value: 0.7486675381660461.[0m


early stopping...


 10%|▉         | 96/1000 [00:20<03:12,  4.68it/s]
[32m[I 2023-05-08 12:44:30,277][0m Trial 98 finished with value: 0.7322356104850769 and parameters: {'residual_dropout_check': False, 'residual_dropout': 0.05957291419104122, 'weight_decay_check': False, 'weight_decay': 2.4072945637426422e-05, 'learning_rate': 0.00011088722887161267, 'd': 142, 'd_hidden_factor': 1, 'n_layers': 6, 'hidden_dropout': 0.35258846221758194}. Best is trial 38 with value: 0.7486675381660461.[0m


early stopping...


 11%|█         | 109/1000 [00:21<02:59,  4.96it/s]
[32m[I 2023-05-08 12:44:52,278][0m Trial 99 finished with value: 0.7266201376914978 and parameters: {'residual_dropout_check': False, 'residual_dropout': 0.09743085981273847, 'weight_decay_check': True, 'weight_decay': 5.403685062937649e-05, 'learning_rate': 0.00013365548910936346, 'd': 171, 'd_hidden_factor': 4, 'n_layers': 5, 'hidden_dropout': 0.41678380628418715}. Best is trial 38 with value: 0.7486675381660461.[0m


early stopping...


  7%|▋         | 69/1000 [00:12<02:47,  5.57it/s]
[32m[I 2023-05-08 12:45:04,701][0m Trial 100 finished with value: 0.7255571484565735 and parameters: {'residual_dropout_check': True, 'residual_dropout': 0.033335606428191625, 'weight_decay_check': False, 'weight_decay': 8.543399134056021e-06, 'learning_rate': 0.0002757169285181193, 'd': 221, 'd_hidden_factor': 1, 'n_layers': 4, 'hidden_dropout': 0.3662585303613677}. Best is trial 38 with value: 0.7486675381660461.[0m


early stopping...


  8%|▊         | 82/1000 [00:15<02:50,  5.38it/s]
[32m[I 2023-05-08 12:45:19,968][0m Trial 101 finished with value: 0.7328497171401978 and parameters: {'residual_dropout_check': True, 'residual_dropout': 0.08717446990450124, 'weight_decay_check': False, 'weight_decay': 5.499694056871689e-06, 'learning_rate': 0.00012263448995071088, 'd': 261, 'd_hidden_factor': 1, 'n_layers': 4, 'hidden_dropout': 0.40516496096917526}. Best is trial 38 with value: 0.7486675381660461.[0m


early stopping...


  8%|▊         | 79/1000 [00:13<02:31,  6.07it/s]
[32m[I 2023-05-08 12:45:33,025][0m Trial 102 finished with value: 0.7350229024887085 and parameters: {'residual_dropout_check': True, 'residual_dropout': 0.07850815136269948, 'weight_decay_check': False, 'weight_decay': 4.378376367703094e-06, 'learning_rate': 0.00014787448288848288, 'd': 296, 'd_hidden_factor': 1, 'n_layers': 3, 'hidden_dropout': 0.3938681179316287}. Best is trial 38 with value: 0.7486675381660461.[0m


early stopping...


  8%|▊         | 78/1000 [00:14<02:49,  5.44it/s]
[32m[I 2023-05-08 12:45:47,392][0m Trial 103 finished with value: 0.7318834066390991 and parameters: {'residual_dropout_check': True, 'residual_dropout': 0.04865823451179033, 'weight_decay_check': False, 'weight_decay': 7.139641417193106e-06, 'learning_rate': 0.00011317348391771732, 'd': 244, 'd_hidden_factor': 1, 'n_layers': 4, 'hidden_dropout': 0.42450824732136816}. Best is trial 38 with value: 0.7486675381660461.[0m


early stopping...


  9%|▉         | 88/1000 [00:17<03:05,  4.92it/s]
[32m[I 2023-05-08 12:46:05,315][0m Trial 104 finished with value: 0.7340565919876099 and parameters: {'residual_dropout_check': True, 'residual_dropout': 0.11639148586733815, 'weight_decay_check': False, 'weight_decay': 5.661928385190984e-06, 'learning_rate': 0.0001232130900414778, 'd': 186, 'd_hidden_factor': 1, 'n_layers': 5, 'hidden_dropout': 0.4107589816350465}. Best is trial 38 with value: 0.7486675381660461.[0m


early stopping...


 14%|█▎        | 135/1000 [00:24<02:35,  5.55it/s]
[32m[I 2023-05-08 12:46:29,663][0m Trial 105 finished with value: 0.7316106557846069 and parameters: {'residual_dropout_check': True, 'residual_dropout': 0.06674764667060562, 'weight_decay_check': False, 'weight_decay': 2.9099233342353244e-06, 'learning_rate': 0.00016871601701320306, 'd': 100, 'd_hidden_factor': 1, 'n_layers': 4, 'hidden_dropout': 0.37966480534191116}. Best is trial 38 with value: 0.7486675381660461.[0m


early stopping...


  8%|▊         | 82/1000 [00:13<02:32,  6.00it/s]
[32m[I 2023-05-08 12:46:43,365][0m Trial 106 finished with value: 0.7323945164680481 and parameters: {'residual_dropout_check': True, 'residual_dropout': 0.018622291789178683, 'weight_decay_check': False, 'weight_decay': 1.0154275385284041e-05, 'learning_rate': 0.00010024441241240047, 'd': 396, 'd_hidden_factor': 1, 'n_layers': 3, 'hidden_dropout': 0.43555216151379633}. Best is trial 38 with value: 0.7486675381660461.[0m


early stopping...


  8%|▊         | 83/1000 [00:16<03:04,  4.96it/s]
[32m[I 2023-05-08 12:47:00,127][0m Trial 107 finished with value: 0.7279622554779053 and parameters: {'residual_dropout_check': True, 'residual_dropout': 0.08943200772033505, 'weight_decay_check': False, 'weight_decay': 2.4926650792964094e-06, 'learning_rate': 0.0001902067215141312, 'd': 209, 'd_hidden_factor': 1, 'n_layers': 5, 'hidden_dropout': 0.44920290403356034}. Best is trial 38 with value: 0.7486675381660461.[0m


early stopping...


  7%|▋         | 70/1000 [00:16<03:40,  4.22it/s]
[32m[I 2023-05-08 12:47:16,744][0m Trial 108 finished with value: 0.7286837100982666 and parameters: {'residual_dropout_check': True, 'residual_dropout': 0.10890322664981152, 'weight_decay_check': False, 'weight_decay': 2.117756169367402e-06, 'learning_rate': 0.00013454085068761028, 'd': 324, 'd_hidden_factor': 1, 'n_layers': 6, 'hidden_dropout': 0.40050671524852705}. Best is trial 38 with value: 0.7486675381660461.[0m


early stopping...


 12%|█▏        | 116/1000 [00:15<01:59,  7.41it/s]
[32m[I 2023-05-08 12:47:32,437][0m Trial 109 finished with value: 0.7277023792266846 and parameters: {'residual_dropout_check': False, 'residual_dropout': 0.04425961686621614, 'weight_decay_check': True, 'weight_decay': 3.501429277882585e-06, 'learning_rate': 0.00015468588754572748, 'd': 82, 'd_hidden_factor': 1, 'n_layers': 2, 'hidden_dropout': 0.38748006143936287}. Best is trial 38 with value: 0.7486675381660461.[0m


early stopping...


 13%|█▎        | 131/1000 [00:24<02:45,  5.25it/s]
[32m[I 2023-05-08 12:47:57,436][0m Trial 110 finished with value: 0.734870433807373 and parameters: {'residual_dropout_check': True, 'residual_dropout': 0.055538282555893714, 'weight_decay_check': False, 'weight_decay': 6.134696070956664e-06, 'learning_rate': 0.00010880514336501298, 'd': 452, 'd_hidden_factor': 1, 'n_layers': 4, 'hidden_dropout': 0.36049362534165597}. Best is trial 38 with value: 0.7486675381660461.[0m


early stopping...


  8%|▊         | 75/1000 [00:20<04:10,  3.70it/s]
[32m[I 2023-05-08 12:48:17,759][0m Trial 111 finished with value: 0.7351904511451721 and parameters: {'residual_dropout_check': False, 'residual_dropout': 0.027779554339069847, 'weight_decay_check': False, 'weight_decay': 1.4286875900386323e-05, 'learning_rate': 0.00012977055850178684, 'd': 356, 'd_hidden_factor': 1, 'n_layers': 8, 'hidden_dropout': 0.375730258625017}. Best is trial 38 with value: 0.7486675381660461.[0m


early stopping...


 10%|█         | 105/1000 [00:27<03:58,  3.76it/s]
[32m[I 2023-05-08 12:48:45,753][0m Trial 112 finished with value: 0.7407177686691284 and parameters: {'residual_dropout_check': False, 'residual_dropout': 0.01038689422522227, 'weight_decay_check': False, 'weight_decay': 1.8708846099732836e-05, 'learning_rate': 0.00012515545769812243, 'd': 314, 'd_hidden_factor': 1, 'n_layers': 7, 'hidden_dropout': 0.4107898711676095}. Best is trial 38 with value: 0.7486675381660461.[0m


early stopping...


 10%|█         | 105/1000 [00:23<03:18,  4.52it/s]
[32m[I 2023-05-08 12:49:09,027][0m Trial 113 finished with value: 0.7386090755462646 and parameters: {'residual_dropout_check': False, 'residual_dropout': 0.008467636510958266, 'weight_decay_check': False, 'weight_decay': 1.628207943082328e-05, 'learning_rate': 0.0001419449454762346, 'd': 272, 'd_hidden_factor': 1, 'n_layers': 6, 'hidden_dropout': 0.41062246343721215}. Best is trial 38 with value: 0.7486675381660461.[0m


early stopping...


  9%|▊         | 86/1000 [00:21<03:45,  4.05it/s]
[32m[I 2023-05-08 12:49:30,289][0m Trial 114 finished with value: 0.7430843114852905 and parameters: {'residual_dropout_check': False, 'residual_dropout': 0.015665202102797947, 'weight_decay_check': False, 'weight_decay': 1.1938959641933211e-05, 'learning_rate': 0.00011977265096102295, 'd': 334, 'd_hidden_factor': 1, 'n_layers': 7, 'hidden_dropout': 0.4266195678112439}. Best is trial 38 with value: 0.7486675381660461.[0m


early stopping...


  8%|▊         | 85/1000 [00:20<03:38,  4.19it/s]
[32m[I 2023-05-08 12:49:50,597][0m Trial 115 finished with value: 0.7392833232879639 and parameters: {'residual_dropout_check': False, 'residual_dropout': 0.035338511180720344, 'weight_decay_check': False, 'weight_decay': 1.1861968422813343e-05, 'learning_rate': 0.00017962071855913813, 'd': 148, 'd_hidden_factor': 1, 'n_layers': 7, 'hidden_dropout': 0.42575026649654707}. Best is trial 38 with value: 0.7486675381660461.[0m


early stopping...


 18%|█▊        | 180/1000 [00:34<02:38,  5.19it/s]
[32m[I 2023-05-08 12:50:25,325][0m Trial 116 finished with value: 0.736850380897522 and parameters: {'residual_dropout_check': False, 'residual_dropout': 0.018857277998601298, 'weight_decay_check': False, 'weight_decay': 7.983617657650235e-06, 'learning_rate': 0.00011684552766166761, 'd': 64, 'd_hidden_factor': 1, 'n_layers': 5, 'hidden_dropout': 0.4376566285049196}. Best is trial 38 with value: 0.7486675381660461.[0m


early stopping...


  8%|▊         | 82/1000 [00:13<02:32,  6.01it/s]
[32m[I 2023-05-08 12:50:38,994][0m Trial 117 finished with value: 0.7383857369422913 and parameters: {'residual_dropout_check': True, 'residual_dropout': 0.07804792581025985, 'weight_decay_check': False, 'weight_decay': 9.24337978227626e-06, 'learning_rate': 0.00021342563864127896, 'd': 336, 'd_hidden_factor': 1, 'n_layers': 3, 'hidden_dropout': 0.4699289907002486}. Best is trial 38 with value: 0.7486675381660461.[0m


early stopping...


  9%|▉         | 91/1000 [00:24<04:08,  3.66it/s]
[32m[I 2023-05-08 12:51:03,895][0m Trial 118 finished with value: 0.7333822846412659 and parameters: {'residual_dropout_check': False, 'residual_dropout': 0.0006468628556670179, 'weight_decay_check': False, 'weight_decay': 1.0748500609066771e-05, 'learning_rate': 0.00015108936969922035, 'd': 375, 'd_hidden_factor': 1, 'n_layers': 8, 'hidden_dropout': 0.39839860068947264}. Best is trial 38 with value: 0.7486675381660461.[0m


early stopping...


 10%|▉         | 97/1000 [00:21<03:20,  4.50it/s]
[32m[I 2023-05-08 12:51:25,499][0m Trial 119 finished with value: 0.7390214204788208 and parameters: {'residual_dropout_check': True, 'residual_dropout': 0.06454210759621974, 'weight_decay_check': True, 'weight_decay': 4.768159022597984e-06, 'learning_rate': 0.00016818102356728012, 'd': 290, 'd_hidden_factor': 3, 'n_layers': 5, 'hidden_dropout': 0.41797833118720606}. Best is trial 38 with value: 0.7486675381660461.[0m


early stopping...


 12%|█▏        | 121/1000 [00:18<02:17,  6.41it/s]
[32m[I 2023-05-08 12:51:44,395][0m Trial 120 finished with value: 0.745710551738739 and parameters: {'residual_dropout_check': False, 'residual_dropout': 0.05146008080084584, 'weight_decay_check': False, 'weight_decay': 3.907671611717773e-06, 'learning_rate': 0.00010869181873496343, 'd': 193, 'd_hidden_factor': 1, 'n_layers': 3, 'hidden_dropout': 0.4504003320080312}. Best is trial 38 with value: 0.7486675381660461.[0m


early stopping...


  9%|▉         | 89/1000 [00:14<02:23,  6.34it/s]
[32m[I 2023-05-08 12:51:58,460][0m Trial 121 finished with value: 0.7400779128074646 and parameters: {'residual_dropout_check': False, 'residual_dropout': 0.04314364161879866, 'weight_decay_check': False, 'weight_decay': 3.654408493190036e-06, 'learning_rate': 0.00010928519537693766, 'd': 183, 'd_hidden_factor': 1, 'n_layers': 3, 'hidden_dropout': 0.4594672424550605}. Best is trial 38 with value: 0.7486675381660461.[0m


early stopping...


  9%|▉         | 88/1000 [00:13<02:21,  6.43it/s]
[32m[I 2023-05-08 12:52:12,182][0m Trial 122 finished with value: 0.7342240810394287 and parameters: {'residual_dropout_check': False, 'residual_dropout': 0.05453630550100729, 'weight_decay_check': False, 'weight_decay': 6.820523751678344e-06, 'learning_rate': 0.000100881074814359, 'd': 200, 'd_hidden_factor': 1, 'n_layers': 3, 'hidden_dropout': 0.45385013393838275}. Best is trial 38 with value: 0.7486675381660461.[0m


early stopping...


 12%|█▏        | 122/1000 [00:28<03:24,  4.29it/s]
[32m[I 2023-05-08 12:52:40,687][0m Trial 123 finished with value: 0.7312692403793335 and parameters: {'residual_dropout_check': False, 'residual_dropout': 0.02480809189003955, 'weight_decay_check': False, 'weight_decay': 5.144824342036111e-06, 'learning_rate': 0.00012194105840949667, 'd': 164, 'd_hidden_factor': 1, 'n_layers': 7, 'hidden_dropout': 0.44317811532811857}. Best is trial 38 with value: 0.7486675381660461.[0m


early stopping...


  8%|▊         | 77/1000 [00:12<02:27,  6.24it/s]
[32m[I 2023-05-08 12:52:53,058][0m Trial 124 finished with value: 0.7325834631919861 and parameters: {'residual_dropout_check': False, 'residual_dropout': 0.0333431849963892, 'weight_decay_check': False, 'weight_decay': 2.9797801391230547e-06, 'learning_rate': 0.00014216983456038118, 'd': 280, 'd_hidden_factor': 1, 'n_layers': 3, 'hidden_dropout': 0.4260214216428973}. Best is trial 38 with value: 0.7486675381660461.[0m


early stopping...


 12%|█▏        | 124/1000 [00:16<01:59,  7.32it/s]
[32m[I 2023-05-08 12:53:10,032][0m Trial 125 finished with value: 0.7383191585540771 and parameters: {'residual_dropout_check': False, 'residual_dropout': 0.07013088892101098, 'weight_decay_check': False, 'weight_decay': 6.394851426850101e-06, 'learning_rate': 0.00011619820703533565, 'd': 174, 'd_hidden_factor': 1, 'n_layers': 2, 'hidden_dropout': 0.3921402624281922}. Best is trial 38 with value: 0.7486675381660461.[0m


early stopping...


 11%|█▏        | 113/1000 [00:20<02:43,  5.43it/s]
[32m[I 2023-05-08 12:53:30,888][0m Trial 126 finished with value: 0.7453175783157349 and parameters: {'residual_dropout_check': True, 'residual_dropout': 0.1020068216626472, 'weight_decay_check': False, 'weight_decay': 4.110052695357838e-06, 'learning_rate': 0.00013335061169925317, 'd': 240, 'd_hidden_factor': 1, 'n_layers': 4, 'hidden_dropout': 0.4327333202034131}. Best is trial 38 with value: 0.7486675381660461.[0m


early stopping...


 10%|▉         | 97/1000 [00:15<02:21,  6.39it/s]
[32m[I 2023-05-08 12:53:46,108][0m Trial 127 finished with value: 0.7340995073318481 and parameters: {'residual_dropout_check': False, 'residual_dropout': 0.17378003712066, 'weight_decay_check': False, 'weight_decay': 4.2073411848079464e-06, 'learning_rate': 0.00013557745717464515, 'd': 194, 'd_hidden_factor': 1, 'n_layers': 3, 'hidden_dropout': 0.4880131943287218}. Best is trial 38 with value: 0.7486675381660461.[0m


early stopping...


 12%|█▏        | 117/1000 [00:21<02:41,  5.47it/s]
[32m[I 2023-05-08 12:54:07,527][0m Trial 128 finished with value: 0.7328841090202332 and parameters: {'residual_dropout_check': True, 'residual_dropout': 0.1534747484001163, 'weight_decay_check': False, 'weight_decay': 3.507768194774729e-06, 'learning_rate': 0.00024313663853974142, 'd': 213, 'd_hidden_factor': 1, 'n_layers': 4, 'hidden_dropout': 0.4710396974165982}. Best is trial 38 with value: 0.7486675381660461.[0m


early stopping...


 10%|▉         | 96/1000 [00:15<02:26,  6.17it/s]
[32m[I 2023-05-08 12:54:23,113][0m Trial 129 finished with value: 0.7303329706192017 and parameters: {'residual_dropout_check': False, 'residual_dropout': 0.1008854989310677, 'weight_decay_check': False, 'weight_decay': 1.3148751267447195e-05, 'learning_rate': 0.0001626131930907397, 'd': 243, 'd_hidden_factor': 2, 'n_layers': 3, 'hidden_dropout': 0.44123491855963093}. Best is trial 38 with value: 0.7486675381660461.[0m


early stopping...


 10%|▉         | 96/1000 [00:13<02:09,  6.98it/s]
[32m[I 2023-05-08 12:54:36,907][0m Trial 130 finished with value: 0.738342821598053 and parameters: {'residual_dropout_check': True, 'residual_dropout': 0.01790819119329104, 'weight_decay_check': True, 'weight_decay': 1.8749518708992673e-06, 'learning_rate': 0.00019910462281256316, 'd': 255, 'd_hidden_factor': 1, 'n_layers': 2, 'hidden_dropout': 0.46067577525161657}. Best is trial 38 with value: 0.7486675381660461.[0m


early stopping...


  8%|▊         | 84/1000 [00:15<02:46,  5.49it/s]
[32m[I 2023-05-08 12:54:52,235][0m Trial 131 finished with value: 0.7307538986206055 and parameters: {'residual_dropout_check': True, 'residual_dropout': 0.08234579303809239, 'weight_decay_check': False, 'weight_decay': 4.321426327650111e-06, 'learning_rate': 0.0001317797282592412, 'd': 228, 'd_hidden_factor': 1, 'n_layers': 4, 'hidden_dropout': 0.43056041411663787}. Best is trial 38 with value: 0.7486675381660461.[0m


early stopping...


 16%|█▌        | 162/1000 [00:29<02:31,  5.52it/s]
[32m[I 2023-05-08 12:55:21,605][0m Trial 132 finished with value: 0.7258492112159729 and parameters: {'residual_dropout_check': True, 'residual_dropout': 0.0920795084283609, 'weight_decay_check': False, 'weight_decay': 2.678720994530702e-06, 'learning_rate': 0.00010814842989878315, 'd': 153, 'd_hidden_factor': 1, 'n_layers': 4, 'hidden_dropout': 0.41771072143415233}. Best is trial 38 with value: 0.7486675381660461.[0m


early stopping...


  7%|▋         | 74/1000 [00:13<02:53,  5.35it/s]
[32m[I 2023-05-08 12:55:35,476][0m Trial 133 finished with value: 0.7318748235702515 and parameters: {'residual_dropout_check': True, 'residual_dropout': 0.049603481570719685, 'weight_decay_check': False, 'weight_decay': 4.9882516420064915e-06, 'learning_rate': 0.00015166816434660562, 'd': 258, 'd_hidden_factor': 1, 'n_layers': 4, 'hidden_dropout': 0.40276182566382007}. Best is trial 38 with value: 0.7486675381660461.[0m


early stopping...


  9%|▊         | 86/1000 [00:17<03:07,  4.88it/s]
[32m[I 2023-05-08 12:55:53,150][0m Trial 134 finished with value: 0.7297145128250122 and parameters: {'residual_dropout_check': True, 'residual_dropout': 0.06466788913297963, 'weight_decay_check': False, 'weight_decay': 3.8109176846183065e-06, 'learning_rate': 0.0001000225812193273, 'd': 268, 'd_hidden_factor': 1, 'n_layers': 5, 'hidden_dropout': 0.3822443874541081}. Best is trial 38 with value: 0.7486675381660461.[0m


early stopping...


 10%|█         | 103/1000 [00:12<01:50,  8.14it/s]
[32m[I 2023-05-08 12:56:05,841][0m Trial 135 finished with value: 0.7393757104873657 and parameters: {'residual_dropout_check': True, 'residual_dropout': 0.040241914315842835, 'weight_decay_check': False, 'weight_decay': 5.7691760277159946e-06, 'learning_rate': 0.00012054021137874953, 'd': 239, 'd_hidden_factor': 1, 'n_layers': 1, 'hidden_dropout': 0.40861862781325814}. Best is trial 38 with value: 0.7486675381660461.[0m


early stopping...


 10%|▉         | 98/1000 [00:17<02:37,  5.72it/s]
[32m[I 2023-05-08 12:56:23,018][0m Trial 136 finished with value: 0.7444543242454529 and parameters: {'residual_dropout_check': False, 'residual_dropout': 0.10497314265695035, 'weight_decay_check': False, 'weight_decay': 7.845292240268858e-06, 'learning_rate': 0.00018033910131850793, 'd': 206, 'd_hidden_factor': 1, 'n_layers': 4, 'hidden_dropout': 0.4214417337048046}. Best is trial 38 with value: 0.7486675381660461.[0m


early stopping...


  8%|▊         | 85/1000 [00:16<03:00,  5.07it/s]
[32m[I 2023-05-08 12:56:39,820][0m Trial 137 finished with value: 0.735078752040863 and parameters: {'residual_dropout_check': False, 'residual_dropout': 0.1275708641905311, 'weight_decay_check': False, 'weight_decay': 7.832300812709454e-06, 'learning_rate': 0.00022627988907890505, 'd': 190, 'd_hidden_factor': 1, 'n_layers': 5, 'hidden_dropout': 0.448159949832374}. Best is trial 38 with value: 0.7486675381660461.[0m


early stopping...


 11%|█         | 111/1000 [00:19<02:35,  5.71it/s]
[32m[I 2023-05-08 12:56:59,288][0m Trial 138 finished with value: 0.7382354736328125 and parameters: {'residual_dropout_check': False, 'residual_dropout': 0.10989565297801152, 'weight_decay_check': False, 'weight_decay': 8.86233463397341e-06, 'learning_rate': 0.00018328104121944974, 'd': 220, 'd_hidden_factor': 1, 'n_layers': 4, 'hidden_dropout': 0.42386750746584273}. Best is trial 38 with value: 0.7486675381660461.[0m


early stopping...


 10%|█         | 100/1000 [00:15<02:21,  6.35it/s]
[32m[I 2023-05-08 12:57:15,069][0m Trial 139 finished with value: 0.7424550652503967 and parameters: {'residual_dropout_check': False, 'residual_dropout': 0.010654410260717541, 'weight_decay_check': False, 'weight_decay': 2.1620584215174067e-05, 'learning_rate': 0.0002650945130962673, 'd': 204, 'd_hidden_factor': 1, 'n_layers': 3, 'hidden_dropout': 0.43040993110473746}. Best is trial 38 with value: 0.7486675381660461.[0m


early stopping...


 10%|▉         | 96/1000 [00:15<02:22,  6.36it/s]
[32m[I 2023-05-08 12:57:30,187][0m Trial 140 finished with value: 0.7204506397247314 and parameters: {'residual_dropout_check': False, 'residual_dropout': 0.10227469701183331, 'weight_decay_check': False, 'weight_decay': 2.2169616643487073e-05, 'learning_rate': 0.0002543590382406742, 'd': 203, 'd_hidden_factor': 1, 'n_layers': 3, 'hidden_dropout': 0.4345423476449316}. Best is trial 38 with value: 0.7486675381660461.[0m


early stopping...


  7%|▋         | 69/1000 [00:10<02:24,  6.44it/s]
[32m[I 2023-05-08 12:57:40,933][0m Trial 141 finished with value: 0.7314410209655762 and parameters: {'residual_dropout_check': False, 'residual_dropout': 0.011028637657047168, 'weight_decay_check': False, 'weight_decay': 2.6115896221610793e-05, 'learning_rate': 0.0002746862686907915, 'd': 176, 'd_hidden_factor': 1, 'n_layers': 3, 'hidden_dropout': 0.4193819757382324}. Best is trial 38 with value: 0.7486675381660461.[0m


early stopping...


  8%|▊         | 77/1000 [00:12<02:25,  6.34it/s]
[32m[I 2023-05-08 12:57:53,106][0m Trial 142 finished with value: 0.7326263785362244 and parameters: {'residual_dropout_check': False, 'residual_dropout': 0.027207189806268725, 'weight_decay_check': False, 'weight_decay': 1.931948599897482e-05, 'learning_rate': 0.00021572969605677294, 'd': 162, 'd_hidden_factor': 1, 'n_layers': 3, 'hidden_dropout': 0.45648538634399294}. Best is trial 38 with value: 0.7486675381660461.[0m


early stopping...


 11%|█▏        | 113/1000 [00:17<02:20,  6.33it/s]
[32m[I 2023-05-08 12:58:11,004][0m Trial 143 finished with value: 0.7413427233695984 and parameters: {'residual_dropout_check': False, 'residual_dropout': 0.0008905851436119376, 'weight_decay_check': False, 'weight_decay': 1.6589460370076018e-05, 'learning_rate': 0.0001720719508278105, 'd': 181, 'd_hidden_factor': 1, 'n_layers': 3, 'hidden_dropout': 0.39720150202679816}. Best is trial 38 with value: 0.7486675381660461.[0m


early stopping...


  7%|▋         | 73/1000 [00:11<02:24,  6.42it/s]
[32m[I 2023-05-08 12:58:22,412][0m Trial 144 finished with value: 0.7373979687690735 and parameters: {'residual_dropout_check': False, 'residual_dropout': 0.01566855389318449, 'weight_decay_check': False, 'weight_decay': 1.6717076251262543e-05, 'learning_rate': 0.0001810657134623017, 'd': 210, 'd_hidden_factor': 1, 'n_layers': 3, 'hidden_dropout': 0.3943302533691409}. Best is trial 38 with value: 0.7486675381660461.[0m


early stopping...


 10%|█         | 103/1000 [00:16<02:23,  6.23it/s]
[32m[I 2023-05-08 12:58:38,969][0m Trial 145 finished with value: 0.7309213876724243 and parameters: {'residual_dropout_check': False, 'residual_dropout': 0.00211901003530221, 'weight_decay_check': False, 'weight_decay': 1.999815320794187e-05, 'learning_rate': 0.00020235403221467108, 'd': 190, 'd_hidden_factor': 1, 'n_layers': 3, 'hidden_dropout': 0.4379717276050716}. Best is trial 38 with value: 0.7486675381660461.[0m


early stopping...


  8%|▊         | 76/1000 [00:12<02:30,  6.14it/s]
[32m[I 2023-05-08 12:58:51,393][0m Trial 146 finished with value: 0.7370500564575195 and parameters: {'residual_dropout_check': False, 'residual_dropout': 0.05597303103439116, 'weight_decay_check': False, 'weight_decay': 1.444827701180788e-05, 'learning_rate': 0.0002479726120475611, 'd': 362, 'd_hidden_factor': 1, 'n_layers': 3, 'hidden_dropout': 0.4277919107032008}. Best is trial 38 with value: 0.7486675381660461.[0m


early stopping...


 10%|█         | 102/1000 [00:16<02:23,  6.24it/s]
[32m[I 2023-05-08 12:59:07,781][0m Trial 147 finished with value: 0.7441966533660889 and parameters: {'residual_dropout_check': False, 'residual_dropout': 0.22487357580235331, 'weight_decay_check': False, 'weight_decay': 1.2316651410127915e-06, 'learning_rate': 0.00016288671373739498, 'd': 227, 'd_hidden_factor': 1, 'n_layers': 3, 'hidden_dropout': 0.41236589834650395}. Best is trial 38 with value: 0.7486675381660461.[0m


early stopping...


  6%|▌         | 62/1000 [00:10<02:33,  6.13it/s]
[32m[I 2023-05-08 12:59:17,942][0m Trial 148 finished with value: 0.7301010489463806 and parameters: {'residual_dropout_check': False, 'residual_dropout': 0.24771035293437876, 'weight_decay_check': True, 'weight_decay': 1.2785878646303682e-06, 'learning_rate': 0.0003104479416436742, 'd': 229, 'd_hidden_factor': 3, 'n_layers': 3, 'hidden_dropout': 0.38598360997978626}. Best is trial 38 with value: 0.7486675381660461.[0m


early stopping...


 11%|█         | 110/1000 [00:19<02:39,  5.59it/s]
[32m[I 2023-05-08 12:59:37,643][0m Trial 149 finished with value: 0.7406319379806519 and parameters: {'residual_dropout_check': False, 'residual_dropout': 0.20520912002089425, 'weight_decay_check': False, 'weight_decay': 1.221461897609562e-06, 'learning_rate': 0.00015642249132158967, 'd': 247, 'd_hidden_factor': 1, 'n_layers': 4, 'hidden_dropout': 0.4129632041982712}. Best is trial 38 with value: 0.7486675381660461.[0m


early stopping...


  9%|▉         | 90/1000 [00:14<02:27,  6.17it/s]
[32m[I 2023-05-08 12:59:52,270][0m Trial 150 finished with value: 0.7362082600593567 and parameters: {'residual_dropout_check': False, 'residual_dropout': 0.18346272982719397, 'weight_decay_check': False, 'weight_decay': 1.4372543210985635e-06, 'learning_rate': 0.00014516193415979906, 'd': 220, 'd_hidden_factor': 4, 'n_layers': 3, 'hidden_dropout': 0.36710791729348957}. Best is trial 38 with value: 0.7486675381660461.[0m


early stopping...


  7%|▋         | 71/1000 [00:11<02:28,  6.27it/s]
[32m[I 2023-05-08 13:00:03,624][0m Trial 151 finished with value: 0.7337602376937866 and parameters: {'residual_dropout_check': False, 'residual_dropout': 0.12180980067164254, 'weight_decay_check': False, 'weight_decay': 2.231895045752804e-06, 'learning_rate': 0.00016875315471853668, 'd': 183, 'd_hidden_factor': 1, 'n_layers': 3, 'hidden_dropout': 0.40146884890106993}. Best is trial 38 with value: 0.7486675381660461.[0m


early stopping...


 11%|█▏        | 113/1000 [00:17<02:15,  6.52it/s]
[32m[I 2023-05-08 13:00:20,984][0m Trial 152 finished with value: 0.7381688952445984 and parameters: {'residual_dropout_check': False, 'residual_dropout': 0.2315202979820594, 'weight_decay_check': False, 'weight_decay': 2.805241761409646e-05, 'learning_rate': 0.00022714991684824034, 'd': 203, 'd_hidden_factor': 1, 'n_layers': 3, 'hidden_dropout': 0.44735744213596024}. Best is trial 38 with value: 0.7486675381660461.[0m


early stopping...


 11%|█         | 109/1000 [00:17<02:27,  6.06it/s]
[32m[I 2023-05-08 13:00:39,008][0m Trial 153 finished with value: 0.7455323338508606 and parameters: {'residual_dropout_check': False, 'residual_dropout': 0.07416491403387383, 'weight_decay_check': False, 'weight_decay': 1.9882042984767648e-06, 'learning_rate': 0.0001888307934029683, 'd': 377, 'd_hidden_factor': 1, 'n_layers': 3, 'hidden_dropout': 0.4154099359883766}. Best is trial 38 with value: 0.7486675381660461.[0m


early stopping...


  7%|▋         | 72/1000 [00:10<02:14,  6.91it/s]
[32m[I 2023-05-08 13:00:49,472][0m Trial 154 finished with value: 0.7347995638847351 and parameters: {'residual_dropout_check': False, 'residual_dropout': 0.0765782664916586, 'weight_decay_check': False, 'weight_decay': 1.575205741250939e-06, 'learning_rate': 0.00019704845558940346, 'd': 376, 'd_hidden_factor': 1, 'n_layers': 2, 'hidden_dropout': 0.4195180318562366}. Best is trial 38 with value: 0.7486675381660461.[0m


early stopping...


 11%|█         | 110/1000 [00:17<02:25,  6.13it/s]
[32m[I 2023-05-08 13:01:07,448][0m Trial 155 finished with value: 0.733637809753418 and parameters: {'residual_dropout_check': False, 'residual_dropout': 0.06861253179226683, 'weight_decay_check': False, 'weight_decay': 2.0860526071198005e-06, 'learning_rate': 0.00017676241401656445, 'd': 395, 'd_hidden_factor': 1, 'n_layers': 3, 'hidden_dropout': 0.39539646221128866}. Best is trial 38 with value: 0.7486675381660461.[0m


early stopping...


  6%|▋         | 63/1000 [00:11<02:54,  5.36it/s]
[32m[I 2023-05-08 13:01:19,244][0m Trial 156 finished with value: 0.7338418364524841 and parameters: {'residual_dropout_check': False, 'residual_dropout': 0.08860676664023194, 'weight_decay_check': False, 'weight_decay': 1.916064532269951e-06, 'learning_rate': 0.00019185538118117893, 'd': 411, 'd_hidden_factor': 1, 'n_layers': 4, 'hidden_dropout': 0.4098588548321287}. Best is trial 38 with value: 0.7486675381660461.[0m


early stopping...


 11%|█         | 107/1000 [00:15<02:09,  6.89it/s]
[32m[I 2023-05-08 13:01:34,803][0m Trial 157 finished with value: 0.7376041412353516 and parameters: {'residual_dropout_check': False, 'residual_dropout': 0.2776070103363339, 'weight_decay_check': False, 'weight_decay': 2.665274219979295e-06, 'learning_rate': 0.00013566981584593618, 'd': 384, 'd_hidden_factor': 1, 'n_layers': 2, 'hidden_dropout': 0.37980678788874345}. Best is trial 38 with value: 0.7486675381660461.[0m


early stopping...


  9%|▉         | 91/1000 [00:14<02:24,  6.29it/s]
[32m[I 2023-05-08 13:01:49,297][0m Trial 158 finished with value: 0.7291433215141296 and parameters: {'residual_dropout_check': False, 'residual_dropout': 0.21990055182072668, 'weight_decay_check': False, 'weight_decay': 2.463505010715103e-06, 'learning_rate': 0.00015708970215265445, 'd': 235, 'd_hidden_factor': 1, 'n_layers': 3, 'hidden_dropout': 0.4237573812720435}. Best is trial 38 with value: 0.7486675381660461.[0m


early stopping...


  8%|▊         | 78/1000 [00:12<02:31,  6.08it/s]
[32m[I 2023-05-08 13:02:02,172][0m Trial 159 finished with value: 0.7363328337669373 and parameters: {'residual_dropout_check': False, 'residual_dropout': 0.04547873849441363, 'weight_decay_check': False, 'weight_decay': 2.9117427693200006e-06, 'learning_rate': 0.00021208077381462193, 'd': 369, 'd_hidden_factor': 1, 'n_layers': 3, 'hidden_dropout': 0.4318665069315329}. Best is trial 38 with value: 0.7486675381660461.[0m


early stopping...


  7%|▋         | 74/1000 [00:13<02:47,  5.54it/s]
[32m[I 2023-05-08 13:02:15,572][0m Trial 160 finished with value: 0.7343228459358215 and parameters: {'residual_dropout_check': False, 'residual_dropout': 0.17038245470641766, 'weight_decay_check': True, 'weight_decay': 1.033536271102889e-06, 'learning_rate': 0.0002664261921395082, 'd': 343, 'd_hidden_factor': 1, 'n_layers': 4, 'hidden_dropout': 0.40419049223208875}. Best is trial 38 with value: 0.7486675381660461.[0m


early stopping...


 10%|█         | 105/1000 [00:16<02:22,  6.28it/s]
[32m[I 2023-05-08 13:02:32,328][0m Trial 161 finished with value: 0.7413899898529053 and parameters: {'residual_dropout_check': False, 'residual_dropout': 0.1413027631224772, 'weight_decay_check': False, 'weight_decay': 2.334539633466037e-05, 'learning_rate': 0.00016598737788949062, 'd': 197, 'd_hidden_factor': 1, 'n_layers': 3, 'hidden_dropout': 0.41371845912300953}. Best is trial 38 with value: 0.7486675381660461.[0m


early stopping...


  8%|▊         | 84/1000 [00:13<02:22,  6.42it/s]
[32m[I 2023-05-08 13:02:45,447][0m Trial 162 finished with value: 0.7294009923934937 and parameters: {'residual_dropout_check': False, 'residual_dropout': 0.18764537819520716, 'weight_decay_check': False, 'weight_decay': 1.7305873041929496e-06, 'learning_rate': 0.000776000102059697, 'd': 224, 'd_hidden_factor': 1, 'n_layers': 3, 'hidden_dropout': 0.4154359605656669}. Best is trial 38 with value: 0.7486675381660461.[0m


early stopping...


  9%|▉         | 90/1000 [00:14<02:30,  6.06it/s]
[32m[I 2023-05-08 13:03:00,328][0m Trial 163 finished with value: 0.735546886920929 and parameters: {'residual_dropout_check': False, 'residual_dropout': 0.14743389334816454, 'weight_decay_check': False, 'weight_decay': 2.4321826299721422e-05, 'learning_rate': 0.00014269422735353364, 'd': 360, 'd_hidden_factor': 1, 'n_layers': 3, 'hidden_dropout': 0.3867149291071996}. Best is trial 38 with value: 0.7486675381660461.[0m


early stopping...


 12%|█▏        | 117/1000 [00:18<02:21,  6.26it/s]
[32m[I 2023-05-08 13:03:19,066][0m Trial 164 finished with value: 0.7383965253829956 and parameters: {'residual_dropout_check': False, 'residual_dropout': 0.13674727672318285, 'weight_decay_check': False, 'weight_decay': 3.0408516584722547e-05, 'learning_rate': 0.0003641107915362561, 'd': 197, 'd_hidden_factor': 1, 'n_layers': 3, 'hidden_dropout': 0.41162973224803673}. Best is trial 38 with value: 0.7486675381660461.[0m


early stopping...


 12%|█▏        | 117/1000 [00:18<02:19,  6.32it/s]
[32m[I 2023-05-08 13:03:37,611][0m Trial 165 finished with value: 0.7175688147544861 and parameters: {'residual_dropout_check': False, 'residual_dropout': 0.0843785661164837, 'weight_decay_check': False, 'weight_decay': 3.3500985014921586e-06, 'learning_rate': 0.0005071460790325807, 'd': 207, 'd_hidden_factor': 1, 'n_layers': 3, 'hidden_dropout': 0.4297071309279889}. Best is trial 38 with value: 0.7486675381660461.[0m


early stopping...


  9%|▉         | 91/1000 [00:19<03:16,  4.63it/s]
[32m[I 2023-05-08 13:03:57,316][0m Trial 166 finished with value: 0.7277432084083557 and parameters: {'residual_dropout_check': False, 'residual_dropout': 0.21860725787640178, 'weight_decay_check': False, 'weight_decay': 3.397203105027948e-05, 'learning_rate': 0.0001282548742397646, 'd': 216, 'd_hidden_factor': 1, 'n_layers': 6, 'hidden_dropout': 0.39664073391476423}. Best is trial 38 with value: 0.7486675381660461.[0m


early stopping...


 10%|█         | 104/1000 [00:16<02:26,  6.12it/s]
[32m[I 2023-05-08 13:04:14,338][0m Trial 167 finished with value: 0.7339921593666077 and parameters: {'residual_dropout_check': False, 'residual_dropout': 0.09194244250378766, 'weight_decay_check': False, 'weight_decay': 2.1627943826927086e-05, 'learning_rate': 0.0001722459658405482, 'd': 388, 'd_hidden_factor': 1, 'n_layers': 3, 'hidden_dropout': 0.353979053395372}. Best is trial 38 with value: 0.7486675381660461.[0m


early stopping...


  9%|▉         | 93/1000 [00:16<02:44,  5.53it/s]
[32m[I 2023-05-08 13:04:31,212][0m Trial 168 finished with value: 0.7284389734268188 and parameters: {'residual_dropout_check': False, 'residual_dropout': 0.10687147947582021, 'weight_decay_check': False, 'weight_decay': 1.7539079588950515e-05, 'learning_rate': 0.00019593022911660224, 'd': 351, 'd_hidden_factor': 1, 'n_layers': 4, 'hidden_dropout': 0.28882503017854005}. Best is trial 38 with value: 0.7486675381660461.[0m


early stopping...


  8%|▊         | 82/1000 [00:13<02:27,  6.24it/s]
[32m[I 2023-05-08 13:04:44,389][0m Trial 169 finished with value: 0.7323987483978271 and parameters: {'residual_dropout_check': False, 'residual_dropout': 0.06996784490546275, 'weight_decay_check': False, 'weight_decay': 1.2183570795248295e-05, 'learning_rate': 0.0002382576193623696, 'd': 190, 'd_hidden_factor': 1, 'n_layers': 3, 'hidden_dropout': 0.4432427617928128}. Best is trial 38 with value: 0.7486675381660461.[0m


early stopping...


  6%|▋         | 63/1000 [00:10<02:31,  6.20it/s]
[32m[I 2023-05-08 13:04:54,587][0m Trial 170 finished with value: 0.7238048315048218 and parameters: {'residual_dropout_check': False, 'residual_dropout': 0.11583254294699454, 'weight_decay_check': False, 'weight_decay': 1.5450000650479768e-05, 'learning_rate': 0.000637974915721982, 'd': 255, 'd_hidden_factor': 1, 'n_layers': 3, 'hidden_dropout': 0.3698389289750755}. Best is trial 38 with value: 0.7486675381660461.[0m


early stopping...


  8%|▊         | 83/1000 [00:13<02:26,  6.28it/s]
[32m[I 2023-05-08 13:05:07,838][0m Trial 171 finished with value: 0.7360837459564209 and parameters: {'residual_dropout_check': False, 'residual_dropout': 0.1606891059716367, 'weight_decay_check': False, 'weight_decay': 2.1658437084989067e-05, 'learning_rate': 0.00016263202757230106, 'd': 183, 'd_hidden_factor': 1, 'n_layers': 3, 'hidden_dropout': 0.42278470715091965}. Best is trial 38 with value: 0.7486675381660461.[0m


early stopping...


  9%|▉         | 93/1000 [00:14<02:23,  6.34it/s]
[32m[I 2023-05-08 13:05:22,547][0m Trial 172 finished with value: 0.7302041053771973 and parameters: {'residual_dropout_check': False, 'residual_dropout': 0.006629432214404141, 'weight_decay_check': False, 'weight_decay': 1.028804724903323e-05, 'learning_rate': 0.0001121298630397116, 'd': 194, 'd_hidden_factor': 1, 'n_layers': 3, 'hidden_dropout': 0.44117763649988995}. Best is trial 38 with value: 0.7486675381660461.[0m


early stopping...


 10%|▉         | 95/1000 [00:14<02:22,  6.37it/s]
[32m[I 2023-05-08 13:05:37,503][0m Trial 173 finished with value: 0.7339320182800293 and parameters: {'residual_dropout_check': False, 'residual_dropout': 0.029472286391624858, 'weight_decay_check': False, 'weight_decay': 1.781491917387171e-06, 'learning_rate': 0.00017987433331931668, 'd': 165, 'd_hidden_factor': 1, 'n_layers': 3, 'hidden_dropout': 0.4634113226548127}. Best is trial 38 with value: 0.7486675381660461.[0m


early stopping...


 10%|█         | 105/1000 [00:16<02:21,  6.35it/s]
[32m[I 2023-05-08 13:05:54,084][0m Trial 174 finished with value: 0.729620099067688 and parameters: {'residual_dropout_check': False, 'residual_dropout': 0.02080679685766859, 'weight_decay_check': False, 'weight_decay': 2.2955631032490393e-06, 'learning_rate': 0.00014689525020997134, 'd': 171, 'd_hidden_factor': 1, 'n_layers': 3, 'hidden_dropout': 0.40516531575428105}. Best is trial 38 with value: 0.7486675381660461.[0m


early stopping...


 11%|█         | 112/1000 [00:24<03:14,  4.57it/s]
[32m[I 2023-05-08 13:06:18,623][0m Trial 175 finished with value: 0.7496123909950256 and parameters: {'residual_dropout_check': False, 'residual_dropout': 0.05878606732944105, 'weight_decay_check': False, 'weight_decay': 2.5801326785758673e-05, 'learning_rate': 0.00012631098223906615, 'd': 207, 'd_hidden_factor': 1, 'n_layers': 6, 'hidden_dropout': 0.43325771345552005}. Best is trial 175 with value: 0.7496123909950256.[0m


early stopping...


  6%|▋         | 63/1000 [00:13<03:24,  4.58it/s]
[32m[I 2023-05-08 13:06:32,428][0m Trial 176 finished with value: 0.7226753234863281 and parameters: {'residual_dropout_check': False, 'residual_dropout': 0.06046512205895482, 'weight_decay_check': False, 'weight_decay': 1.263884572741292e-06, 'learning_rate': 0.00012633198992917136, 'd': 202, 'd_hidden_factor': 1, 'n_layers': 6, 'hidden_dropout': 0.24221847854309747}. Best is trial 175 with value: 0.7496123909950256.[0m


early stopping...


  9%|▉         | 93/1000 [00:22<03:36,  4.19it/s]
[32m[I 2023-05-08 13:06:54,646][0m Trial 177 finished with value: 0.7407393455505371 and parameters: {'residual_dropout_check': False, 'residual_dropout': 0.057665931708229666, 'weight_decay_check': False, 'weight_decay': 2.5229227377165268e-05, 'learning_rate': 0.00011179501210550118, 'd': 227, 'd_hidden_factor': 1, 'n_layers': 7, 'hidden_dropout': 0.4153527584973709}. Best is trial 175 with value: 0.7496123909950256.[0m


early stopping...


 11%|█         | 109/1000 [00:26<03:40,  4.04it/s]
[32m[I 2023-05-08 13:07:21,676][0m Trial 178 finished with value: 0.7463977336883545 and parameters: {'residual_dropout_check': False, 'residual_dropout': 0.0733916325805359, 'weight_decay_check': True, 'weight_decay': 6.951212069458189e-06, 'learning_rate': 0.00013666033451139808, 'd': 211, 'd_hidden_factor': 1, 'n_layers': 7, 'hidden_dropout': 0.43347880569244124}. Best is trial 175 with value: 0.7496123909950256.[0m


early stopping...


  7%|▋         | 68/1000 [00:16<03:47,  4.10it/s]
[32m[I 2023-05-08 13:07:38,299][0m Trial 179 finished with value: 0.7160269618034363 and parameters: {'residual_dropout_check': False, 'residual_dropout': 0.07764669733707137, 'weight_decay_check': True, 'weight_decay': 6.925689789829472e-06, 'learning_rate': 0.0010327521818996397, 'd': 215, 'd_hidden_factor': 1, 'n_layers': 7, 'hidden_dropout': 0.43518927809273317}. Best is trial 175 with value: 0.7496123909950256.[0m


early stopping...


  9%|▉         | 92/1000 [00:22<03:40,  4.11it/s]
[32m[I 2023-05-08 13:08:00,705][0m Trial 180 finished with value: 0.7447721362113953 and parameters: {'residual_dropout_check': False, 'residual_dropout': 0.10111719644649551, 'weight_decay_check': True, 'weight_decay': 5.41552345849644e-06, 'learning_rate': 0.0001333000998072351, 'd': 240, 'd_hidden_factor': 1, 'n_layers': 7, 'hidden_dropout': 0.4538841473317638}. Best is trial 175 with value: 0.7496123909950256.[0m


early stopping...


  8%|▊         | 80/1000 [00:19<03:43,  4.11it/s]
[32m[I 2023-05-08 13:08:20,198][0m Trial 181 finished with value: 0.7326822280883789 and parameters: {'residual_dropout_check': False, 'residual_dropout': 0.0930591896963701, 'weight_decay_check': True, 'weight_decay': 5.9303312485116965e-06, 'learning_rate': 0.00013428871014932944, 'd': 246, 'd_hidden_factor': 1, 'n_layers': 7, 'hidden_dropout': 0.4738594726095751}. Best is trial 175 with value: 0.7496123909950256.[0m


early stopping...


  8%|▊         | 85/1000 [00:18<03:16,  4.66it/s]
[32m[I 2023-05-08 13:08:38,492][0m Trial 182 finished with value: 0.7405374646186829 and parameters: {'residual_dropout_check': False, 'residual_dropout': 0.08062828565977123, 'weight_decay_check': True, 'weight_decay': 4.396320057008362e-06, 'learning_rate': 0.00012056104207589112, 'd': 232, 'd_hidden_factor': 1, 'n_layers': 7, 'hidden_dropout': 0.4533301225255044}. Best is trial 175 with value: 0.7496123909950256.[0m


early stopping...


 12%|█▏        | 124/1000 [00:29<03:27,  4.21it/s]
[32m[I 2023-05-08 13:09:07,973][0m Trial 183 finished with value: 0.7365432381629944 and parameters: {'residual_dropout_check': False, 'residual_dropout': 0.09978958240306326, 'weight_decay_check': True, 'weight_decay': 5.058274497310479e-06, 'learning_rate': 0.00014172109026442585, 'd': 241, 'd_hidden_factor': 1, 'n_layers': 7, 'hidden_dropout': 0.4268548493203708}. Best is trial 175 with value: 0.7496123909950256.[0m


early stopping...


 13%|█▎        | 126/1000 [00:29<03:25,  4.25it/s]
[32m[I 2023-05-08 13:09:37,637][0m Trial 184 finished with value: 0.7331503629684448 and parameters: {'residual_dropout_check': False, 'residual_dropout': 0.06859419586762658, 'weight_decay_check': True, 'weight_decay': 7.0212811357359275e-06, 'learning_rate': 0.00010897828773978421, 'd': 205, 'd_hidden_factor': 1, 'n_layers': 7, 'hidden_dropout': 0.4537746594168931}. Best is trial 175 with value: 0.7496123909950256.[0m


early stopping...


 11%|█▏        | 114/1000 [00:27<03:30,  4.20it/s]
[32m[I 2023-05-08 13:10:04,791][0m Trial 185 finished with value: 0.7262872457504272 and parameters: {'residual_dropout_check': False, 'residual_dropout': 0.13758133413059082, 'weight_decay_check': True, 'weight_decay': 8.29651363645488e-06, 'learning_rate': 0.00015446776111081991, 'd': 211, 'd_hidden_factor': 1, 'n_layers': 7, 'hidden_dropout': 0.4356883920697527}. Best is trial 175 with value: 0.7496123909950256.[0m


early stopping...


  8%|▊         | 79/1000 [00:18<03:31,  4.35it/s]
[32m[I 2023-05-08 13:10:22,989][0m Trial 186 finished with value: 0.7320551872253418 and parameters: {'residual_dropout_check': True, 'residual_dropout': 0.11835385471949207, 'weight_decay_check': True, 'weight_decay': 5.584229692408481e-06, 'learning_rate': 0.00012886099667178998, 'd': 278, 'd_hidden_factor': 1, 'n_layers': 6, 'hidden_dropout': 0.4206434134674962}. Best is trial 175 with value: 0.7496123909950256.[0m


early stopping...


 10%|█         | 101/1000 [00:24<03:39,  4.10it/s]
[32m[I 2023-05-08 13:10:47,652][0m Trial 187 finished with value: 0.7301225066184998 and parameters: {'residual_dropout_check': False, 'residual_dropout': 0.048326724484013456, 'weight_decay_check': True, 'weight_decay': 4.358870727357091e-06, 'learning_rate': 0.00011823328098192386, 'd': 219, 'd_hidden_factor': 1, 'n_layers': 7, 'hidden_dropout': 0.4645953210538801}. Best is trial 175 with value: 0.7496123909950256.[0m


early stopping...


  7%|▋         | 70/1000 [00:15<03:32,  4.38it/s]
[32m[I 2023-05-08 13:11:03,692][0m Trial 188 finished with value: 0.7061918377876282 and parameters: {'residual_dropout_check': False, 'residual_dropout': 0.1923201034291779, 'weight_decay_check': True, 'weight_decay': 1.4900957617307543e-06, 'learning_rate': 0.0016916143829381926, 'd': 371, 'd_hidden_factor': 1, 'n_layers': 6, 'hidden_dropout': 0.4830160070659423}. Best is trial 175 with value: 0.7496123909950256.[0m


early stopping...


  8%|▊         | 78/1000 [00:19<03:49,  4.02it/s]
[32m[I 2023-05-08 13:11:23,148][0m Trial 189 finished with value: 0.734335720539093 and parameters: {'residual_dropout_check': True, 'residual_dropout': 0.061379513158393274, 'weight_decay_check': True, 'weight_decay': 6.226939526710456e-06, 'learning_rate': 0.00013954408374522093, 'd': 236, 'd_hidden_factor': 1, 'n_layers': 7, 'hidden_dropout': 0.44683159347223955}. Best is trial 175 with value: 0.7496123909950256.[0m


early stopping...


  8%|▊         | 75/1000 [00:20<04:12,  3.66it/s]
[32m[I 2023-05-08 13:11:43,681][0m Trial 190 finished with value: 0.7481994032859802 and parameters: {'residual_dropout_check': False, 'residual_dropout': 0.07606271867513749, 'weight_decay_check': False, 'weight_decay': 3.7430335292340182e-06, 'learning_rate': 0.00010507080924459215, 'd': 268, 'd_hidden_factor': 1, 'n_layers': 8, 'hidden_dropout': 0.41013348276491973}. Best is trial 175 with value: 0.7496123909950256.[0m


early stopping...


 10%|█         | 102/1000 [00:27<04:04,  3.67it/s]
[32m[I 2023-05-08 13:12:11,554][0m Trial 191 finished with value: 0.7363972663879395 and parameters: {'residual_dropout_check': False, 'residual_dropout': 0.07447284345119101, 'weight_decay_check': False, 'weight_decay': 3.886390930992537e-06, 'learning_rate': 0.00011872062032355393, 'd': 252, 'd_hidden_factor': 1, 'n_layers': 8, 'hidden_dropout': 0.4096896255793521}. Best is trial 175 with value: 0.7496123909950256.[0m


early stopping...


 11%|█         | 111/1000 [00:29<03:58,  3.73it/s]
[32m[I 2023-05-08 13:12:41,382][0m Trial 192 finished with value: 0.7422295808792114 and parameters: {'residual_dropout_check': False, 'residual_dropout': 0.08673270799980251, 'weight_decay_check': False, 'weight_decay': 3.1338048958901393e-06, 'learning_rate': 0.00010415811918724336, 'd': 267, 'd_hidden_factor': 1, 'n_layers': 8, 'hidden_dropout': 0.43099434427653194}. Best is trial 175 with value: 0.7496123909950256.[0m


early stopping...


  9%|▉         | 91/1000 [00:20<03:25,  4.43it/s]
[32m[I 2023-05-08 13:13:01,970][0m Trial 193 finished with value: 0.7351345419883728 and parameters: {'residual_dropout_check': False, 'residual_dropout': 0.08594822716454112, 'weight_decay_check': False, 'weight_decay': 3.1863390065332235e-06, 'learning_rate': 0.00010210587540127228, 'd': 269, 'd_hidden_factor': 1, 'n_layers': 6, 'hidden_dropout': 0.42860264468220827}. Best is trial 175 with value: 0.7496123909950256.[0m


early stopping...


 13%|█▎        | 133/1000 [00:35<03:53,  3.71it/s]
[32m[I 2023-05-08 13:13:37,847][0m Trial 194 finished with value: 0.7253080606460571 and parameters: {'residual_dropout_check': False, 'residual_dropout': 0.05268602376066678, 'weight_decay_check': False, 'weight_decay': 4.753183097161921e-06, 'learning_rate': 0.0001079615157705312, 'd': 264, 'd_hidden_factor': 1, 'n_layers': 8, 'hidden_dropout': 0.43692316711796586}. Best is trial 175 with value: 0.7496123909950256.[0m


early stopping...


  9%|▊         | 87/1000 [00:23<04:08,  3.67it/s]
[32m[I 2023-05-08 13:14:01,572][0m Trial 195 finished with value: 0.728649377822876 and parameters: {'residual_dropout_check': False, 'residual_dropout': 0.09734246471519006, 'weight_decay_check': False, 'weight_decay': 3.566818405053959e-06, 'learning_rate': 0.0001064035109919612, 'd': 277, 'd_hidden_factor': 1, 'n_layers': 8, 'hidden_dropout': 0.42320474150988957}. Best is trial 175 with value: 0.7496123909950256.[0m


early stopping...


  7%|▋         | 67/1000 [00:18<04:19,  3.59it/s]
[32m[I 2023-05-08 13:14:20,264][0m Trial 196 finished with value: 0.7211936116218567 and parameters: {'residual_dropout_check': False, 'residual_dropout': 0.24126928467668335, 'weight_decay_check': False, 'weight_decay': 3.1856468550088742e-06, 'learning_rate': 0.00012689459519178264, 'd': 259, 'd_hidden_factor': 2, 'n_layers': 8, 'hidden_dropout': 0.32498737768466723}. Best is trial 175 with value: 0.7496123909950256.[0m


early stopping...


 16%|█▋        | 165/1000 [00:48<04:07,  3.38it/s]
[32m[I 2023-05-08 13:15:09,136][0m Trial 197 finished with value: 0.7407286167144775 and parameters: {'residual_dropout_check': True, 'residual_dropout': 0.07460738414227133, 'weight_decay_check': True, 'weight_decay': 4.028443283641536e-06, 'learning_rate': 0.00011615502984009498, 'd': 291, 'd_hidden_factor': 1, 'n_layers': 8, 'hidden_dropout': 0.40298389159814135}. Best is trial 175 with value: 0.7496123909950256.[0m


early stopping...


  8%|▊         | 79/1000 [00:22<04:27,  3.44it/s]
[32m[I 2023-05-08 13:15:32,147][0m Trial 198 finished with value: 0.7273995876312256 and parameters: {'residual_dropout_check': False, 'residual_dropout': 0.08543908441124179, 'weight_decay_check': False, 'weight_decay': 2.5876150873893474e-06, 'learning_rate': 0.00010110731238980266, 'd': 251, 'd_hidden_factor': 1, 'n_layers': 8, 'hidden_dropout': 0.4504102327369312}. Best is trial 175 with value: 0.7496123909950256.[0m


early stopping...


 10%|█         | 104/1000 [00:28<04:04,  3.66it/s]
[32m[I 2023-05-08 13:16:00,600][0m Trial 199 finished with value: 0.7290316224098206 and parameters: {'residual_dropout_check': False, 'residual_dropout': 0.04062223757756008, 'weight_decay_check': False, 'weight_decay': 5.509534095655811e-06, 'learning_rate': 0.00012951481498739054, 'd': 331, 'd_hidden_factor': 1, 'n_layers': 8, 'hidden_dropout': 0.392373746388397}. Best is trial 175 with value: 0.7496123909950256.[0m


early stopping...


## Retrain the model with the best params
You might recognize this from the objective function, essentially this is the same code, but it will pick the best params found by optuna.

In [58]:
best_params = study.best_params

params = {
    'd_numerical':len(feature_names),
    'd':best_params['d'],
    'd_hidden_factor' : best_params['d_hidden_factor'],
    'n_layers' : best_params['n_layers'],
    'hidden_dropout' : best_params['hidden_dropout'],
    'residual_dropout' : best_params['residual_dropout'],
    'd_out' : 1,
}

model = ResNet(**params)
model = model.to(device)
criterion = torch.nn.BCEWithLogitsLoss()

if best_params['weight_decay_check']:
    optimizer = torch.optim.AdamW(model.parameters(), lr=best_params['learning_rate'], weight_decay=best_params['weight_decay'])
else:
    optimizer = torch.optim.AdamW(model.parameters(), lr=best_params['learning_rate'])    

model,results_dict = train_model(model, dataloaders, criterion, optimizer, dataset_sizes, phases= ['train','val'])

 13%|█▎        | 128/1000 [00:29<03:18,  4.39it/s]

early stopping...





In [65]:
import pickle
with open('../../data/raw/curated_metagenomics/resnet_params_T2D.pkl', 'wb') as fp:
    pickle.dump(params, fp)

In [66]:
torch.save(model.state_dict(), '../../data/raw/curated_metagenomics/resnet_curated_T2D.pt')