# Tuning Script

In [4]:
# Parameters subject to change in every script - involved in naming

MODEL = 'tfc'
PREPROCESSING = 'normalise'
TUNER = 'yangzhoub'
OPTIMISED_METRIC = 'f1'

---

In [5]:
# detect whether this is a google environment

COLAB_ENVIRONMENT = False

try:
    from google.colab import drive
    drive.mount('/content/drive')
    COLAB_ENVIRONMENT = True
except:
    pass

Mounted at /content/drive


In [6]:
import sys
import os
import pickle

if COLAB_ENVIRONMENT:
    home_directory = './drive/MyDrive/LAB/COMP90089__GroupWork__Py/' # my home directory is stored in ./LAB of google drive
    if MODEL == 'ebr':
        !pip install interpret==0.5.0
    elif MODEL == 'cbr':
        !pip install catboost
    elif MODEL in ['tfc', 'mlpc']:
        !pip install PyTorch2Sklearn
    !pip install JXAutoML
else:
    home_directory = '../../'

from JXAutoML.JiaoCheng_10CV import JiaoCheng_10CV as Tuner

import pandas as pd

Collecting PyTorch2Sklearn
  Downloading PyTorch2Sklearn-0.2.4-py3-none-any.whl.metadata (33 kB)
Downloading PyTorch2Sklearn-0.2.4-py3-none-any.whl (19 kB)
Installing collected packages: PyTorch2Sklearn
Successfully installed PyTorch2Sklearn-0.2.4
Collecting JXAutoML
  Downloading JXAutoML-0.4.4-py3-none-any.whl.metadata (67 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m67.2/67.2 kB[0m [31m3.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading JXAutoML-0.4.4-py3-none-any.whl (52 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m52.1/52.1 kB[0m [31m4.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: JXAutoML
Successfully installed JXAutoML-0.4.4


In [7]:
# Parameters that are same for every script

FOLDS = 10  # we have decided to do 10 fold validation

SEED = 42

TASK_TYPE = 'Classification'  # this task is a classification task

# we have decided to tune features as hp
TUNE_FEATURES_AS_HYPERPARAMETERS = True

---

In [8]:
# model to use to tune
from PyTorch2Sklearn.Transformer import Transformer as clf
from PyTorch2Sklearn.environment import *
from PyTorch2Sklearn.utils.data import TabularDataFactory, TabularDataset


# what values to try for each hyperparameter
parameter_choices = {
    'hidden_dim': (16, 32, 64, 256),
    'num_transformer_layers': (1, 2, 3),
    'num_mlp_layers': (1, 2, 3),
    'dropout': (0, 0.05, 0.1, 0.2),
    'batch_size': (32, 128, 512),
    'nhead': (2, 4, 8, 16),
    'epochs': (5, 10, 20, 30),
    'lr': (1e-5, 1e-4, 1e-3, 1e-2, 1e-1),
    'batchnorm': (False, True),
    'grad_clip': (False, True),
}

# what values to set non-tuneable parameters/hyperparameters
non_tunable_hyperparameters_dict = {
    'random_state': SEED,
    'loss': nn.CrossEntropyLoss(),
    'rootpath': './',
    'TabularDataFactory': TabularDataFactory,
    'TabularDataset': TabularDataset,
    'name': 'Transformer',
    'output_dim': 2,
    'mode': 'Classification',
    'verbose': True,
    'share_embedding_mlp': False,
    'use_cls': False
}

# what order to tune hyperparameters in (JiaoCheng specific)
tuning_order = [
                'features',
                'num_transformer_layers',
                'num_mlp_layers',
                'nhead',
                'hidden_dim',
                'dropout',
                'batchnorm',
                'grad_clip',
                'lr',
                'epochs',
                'batch_size'
                ]

# what order to tune hyperparameters in (JiaoCheng specific)
default_hyperparameter_values = {
    'features': 0,
    'num_transformer_layers': 1,
    'num_mlp_layers': 1,
    'nhead': 4,
    'hidden_dim': 16,
    'dropout': 0.1,
    'batchnorm': False,
    'grad_clip': False,
    'lr': 1e-2,
    'epochs': 10,
    'batch_size': 128
}

---

Read in and Prepare Data

In [9]:
# Import data and store as lists

train_x_list = []
train_y_list = []
val_x_list = []
val_y_list = []

for i in range(FOLDS):
    train_data = pd.read_csv(
        f'{home_directory}data/curated/modelling/{PREPROCESSING}/train_{i}.csv')
    val_data = pd.read_csv(
        f'{home_directory}data/curated/modelling/{PREPROCESSING}/val_{i}.csv')

    # get the correct rows for X and y
    train_x = train_data.drop(
        ['mortality_status', 'subject_id', 'hadm_id', 'stay_id'], axis=1)
    train_y = train_data['mortality_status']
    val_x = val_data.drop(
        ['mortality_status', 'subject_id', 'hadm_id', 'stay_id'], axis=1)
    val_y = val_data['mortality_status']

    train_x_list.append(train_x)
    train_y_list.append(train_y)
    val_x_list.append(val_x)
    val_y_list.append(val_y)

In [10]:
# Import feature importance ordering

with open(f'{home_directory}models/xgb_feature_importance_ordering_{PREPROCESSING}.pickle', 'rb') as f:
    feature_importance_ordering = pickle.load(f)

Set parameters

In [11]:
# initialisation
tuner = Tuner()

print('---')

# define what model we are tuning
tuner.read_in_model(clf, TASK_TYPE, optimised_metric = OPTIMISED_METRIC, pytorch_model=True)

print('---')

# read in the data for training and validation
tuner.read_in_data(train_x_list, train_y_list, val_x_list, val_y_list)

print('---')

# set what hp values to tune
tuner.set_hyperparameters(parameter_choices)
# WARNING: this may take a while if no. tuneable hyperparameters are large

print('---')

# set up hp values that need to be changed from default but NOT to be tuned
tuner.set_non_tuneable_hyperparameters(non_tunable_hyperparameters_dict)

print('---')

# set up feature importance ordering

if TUNE_FEATURES_AS_HYPERPARAMETERS:
  tuner.set_features(feature_importance_ordering)
  # WARNING: this may take a while if no. tuneable hyperparameters are large

print('---')

# set up the order of hyperparameters when iteratively tuning using JiaoCheng
tuner.set_tuning_order(tuning_order)

print('---')

# set up the default hp values for first iteration of tuning JiaoCheng
tuner.set_hyperparameter_default_values(default_hyperparameter_values)

print('---')

try: # try to read in previous results to continue tuning ...
  tuner.read_in_tuning_result_df(f'{home_directory}models/tuning/{TUNER}_{MODEL}_{PREPROCESSING}.csv')
except:
  print('No previous tuning data read in')

print('---')

# set up where to save the tuning result csv
tuner.set_tuning_result_saving_address(f'{home_directory}models/tuning/{TUNER}_{MODEL}_{PREPROCESSING}.csv')

print('---')

# set up where to save the current best model
tuner.set_best_model_saving_address(f'{home_directory}models/tmp_models/{TUNER}_{MODEL}_{PREPROCESSING}')

JiaoCheng Initialised
---
Successfully read in model <class 'PyTorch2Sklearn.Transformer.Transformer'>, which is a Classification model optimising for f1
---
Read in Train X data list
Read in Train y data list
Read in Val X data list
Read in Val y data list
---
Successfully recorded hyperparameter choices
---
Successfully recorded non_tuneable_hyperparameter choices
---
Successfully recorded tuneable feature combination choices and updated relevant internal structures
---
---
---
Successfully read in tuning result of 103 rows, for 103.0 combos
---
Successfully set tuning output address
---
Successfully set best model output address


In [12]:
# ignore warnings

import warnings
warnings.filterwarnings("ignore")

In [13]:
# begin tuning ... (or continue training)

tuner.tune()


Default combo: [0, 0, 0, 2, 1, 1, 1, 3, 0, 0, 0] 


ROUND 1

Round 1 
Hyperparameter: features (index: 10) 

As new Best Combo (0, 0, 0, 2, 1, 1, 1, 3, 0, 0, 0) was read in, best_clf is set to None
Already Trained and Tested combination (val score of 0.0):
            {'hidden_dim': 16, 'num_transformer_layers': 1, 'num_mlp_layers': 1, 'dropout': 0.1, 'batch_size': 128, 'nhead': 4, 'epochs': 10, 'lr': 0.01, 'batchnorm': False, 'grad_clip': False, 'features': 0}
            Current best combo (with val score 0.0):
                    {'hidden_dim': 16, 'num_transformer_layers': 1, 'num_mlp_layers': 1, 'dropout': 0.1, 'batch_size': 128, 'nhead': 4, 'epochs': 10, 'lr': 0.01, 'batchnorm': False, 'grad_clip': False, 'features': 0} 
        Has trained 103 of 6635520 combinations so far
Already Trained and Tested combination (val score of 0.0):
            {'hidden_dim': 16, 'num_transformer_layers': 1, 'num_mlp_layers': 1, 'dropout': 0.1, 'batch_size': 128, 'nhead': 4, 'epochs': 10, 'lr': 

100%|██████████| 10/10 [00:08<00:00,  1.18it/s]
100%|██████████| 10/10 [00:06<00:00,  1.44it/s]
100%|██████████| 10/10 [00:06<00:00,  1.45it/s]
100%|██████████| 10/10 [00:07<00:00,  1.32it/s]
100%|██████████| 10/10 [00:06<00:00,  1.59it/s]
100%|██████████| 10/10 [00:07<00:00,  1.32it/s]
100%|██████████| 10/10 [00:06<00:00,  1.57it/s]
100%|██████████| 10/10 [00:07<00:00,  1.31it/s]
100%|██████████| 10/10 [00:06<00:00,  1.58it/s]
100%|██████████| 10/10 [00:07<00:00,  1.32it/s]


Trained and Tested combination 104 of 6635520, taking 7.62 seconds to get val score of 0.2724: 
                {'hidden_dim': 32, 'num_transformer_layers': 1, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 4, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 32}, 
            Current best combo with val score 0.507: 
                    {'hidden_dim': 32, 'num_transformer_layers': 1, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 4, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 8} 


100%|██████████| 10/10 [00:06<00:00,  1.57it/s]
100%|██████████| 10/10 [00:07<00:00,  1.27it/s]
100%|██████████| 10/10 [00:06<00:00,  1.56it/s]
100%|██████████| 10/10 [00:07<00:00,  1.30it/s]
100%|██████████| 10/10 [00:07<00:00,  1.36it/s]
100%|██████████| 10/10 [00:07<00:00,  1.39it/s]
100%|██████████| 10/10 [00:07<00:00,  1.26it/s]
100%|██████████| 10/10 [00:06<00:00,  1.53it/s]
100%|██████████| 10/10 [00:07<00:00,  1.26it/s]
100%|██████████| 10/10 [00:06<00:00,  1.52it/s]


Trained and Tested combination 105 of 6635520, taking 6.64 seconds to get val score of 0.2058: 
                {'hidden_dim': 32, 'num_transformer_layers': 1, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 4, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 33}, 
            Current best combo with val score 0.507: 
                    {'hidden_dim': 32, 'num_transformer_layers': 1, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 4, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 8} 


100%|██████████| 10/10 [00:08<00:00,  1.24it/s]
100%|██████████| 10/10 [00:07<00:00,  1.43it/s]
100%|██████████| 10/10 [00:07<00:00,  1.36it/s]
100%|██████████| 10/10 [00:07<00:00,  1.29it/s]
100%|██████████| 10/10 [00:06<00:00,  1.54it/s]
100%|██████████| 10/10 [00:07<00:00,  1.29it/s]
100%|██████████| 10/10 [00:06<00:00,  1.55it/s]
100%|██████████| 10/10 [00:07<00:00,  1.29it/s]
100%|██████████| 10/10 [00:06<00:00,  1.55it/s]
100%|██████████| 10/10 [00:07<00:00,  1.29it/s]


Trained and Tested combination 106 of 6635520, taking 7.8 seconds to get val score of 0.1405: 
                {'hidden_dim': 32, 'num_transformer_layers': 1, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 4, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 34}, 
            Current best combo with val score 0.507: 
                    {'hidden_dim': 32, 'num_transformer_layers': 1, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 4, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 8} 


100%|██████████| 10/10 [00:07<00:00,  1.40it/s]
100%|██████████| 10/10 [00:07<00:00,  1.31it/s]
100%|██████████| 10/10 [00:08<00:00,  1.23it/s]
100%|██████████| 10/10 [00:07<00:00,  1.43it/s]
100%|██████████| 10/10 [00:08<00:00,  1.22it/s]
100%|██████████| 10/10 [00:06<00:00,  1.51it/s]
100%|██████████| 10/10 [00:07<00:00,  1.26it/s]
100%|██████████| 10/10 [00:07<00:00,  1.36it/s]
100%|██████████| 10/10 [00:07<00:00,  1.36it/s]
100%|██████████| 10/10 [00:08<00:00,  1.24it/s]


Trained and Tested combination 107 of 6635520, taking 8.13 seconds to get val score of 0.3631: 
                {'hidden_dim': 32, 'num_transformer_layers': 1, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 4, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 35}, 
            Current best combo with val score 0.507: 
                    {'hidden_dim': 32, 'num_transformer_layers': 1, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 4, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 8} 


100%|██████████| 10/10 [00:06<00:00,  1.44it/s]
100%|██████████| 10/10 [00:07<00:00,  1.26it/s]
100%|██████████| 10/10 [00:06<00:00,  1.50it/s]
100%|██████████| 10/10 [00:07<00:00,  1.26it/s]
100%|██████████| 10/10 [00:06<00:00,  1.44it/s]
100%|██████████| 10/10 [00:07<00:00,  1.36it/s]
100%|██████████| 10/10 [00:07<00:00,  1.29it/s]
100%|██████████| 10/10 [00:06<00:00,  1.50it/s]
100%|██████████| 10/10 [00:07<00:00,  1.27it/s]
100%|██████████| 10/10 [00:06<00:00,  1.51it/s]


Trained and Tested combination 108 of 6635520, taking 6.65 seconds to get val score of 0.1667: 
                {'hidden_dim': 32, 'num_transformer_layers': 1, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 4, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 36}, 
            Current best combo with val score 0.507: 
                    {'hidden_dim': 32, 'num_transformer_layers': 1, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 4, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 8} 


100%|██████████| 10/10 [00:07<00:00,  1.26it/s]
100%|██████████| 10/10 [00:06<00:00,  1.46it/s]
100%|██████████| 10/10 [00:07<00:00,  1.30it/s]
100%|██████████| 10/10 [00:07<00:00,  1.32it/s]
100%|██████████| 10/10 [00:07<00:00,  1.39it/s]
100%|██████████| 10/10 [00:08<00:00,  1.22it/s]
100%|██████████| 10/10 [00:06<00:00,  1.43it/s]
100%|██████████| 10/10 [00:08<00:00,  1.20it/s]
100%|██████████| 10/10 [00:07<00:00,  1.34it/s]
100%|██████████| 10/10 [00:07<00:00,  1.39it/s]


Trained and Tested combination 109 of 6635520, taking 7.27 seconds to get val score of 0.2085: 
                {'hidden_dim': 32, 'num_transformer_layers': 1, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 4, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 37}, 
            Current best combo with val score 0.507: 
                    {'hidden_dim': 32, 'num_transformer_layers': 1, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 4, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 8} 


100%|██████████| 10/10 [00:08<00:00,  1.24it/s]
100%|██████████| 10/10 [00:06<00:00,  1.45it/s]
100%|██████████| 10/10 [00:08<00:00,  1.21it/s]
100%|██████████| 10/10 [00:07<00:00,  1.36it/s]
100%|██████████| 10/10 [00:08<00:00,  1.24it/s]
100%|██████████| 10/10 [00:08<00:00,  1.23it/s]
100%|██████████| 10/10 [00:06<00:00,  1.47it/s]
100%|██████████| 10/10 [00:07<00:00,  1.26it/s]
100%|██████████| 10/10 [00:06<00:00,  1.47it/s]
100%|██████████| 10/10 [00:08<00:00,  1.24it/s]


Trained and Tested combination 110 of 6635520, taking 8.12 seconds to get val score of 0.2259: 
                {'hidden_dim': 32, 'num_transformer_layers': 1, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 4, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 38}, 
            Current best combo with val score 0.507: 
                    {'hidden_dim': 32, 'num_transformer_layers': 1, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 4, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 8} 


100%|██████████| 10/10 [00:07<00:00,  1.32it/s]
100%|██████████| 10/10 [00:07<00:00,  1.35it/s]
100%|██████████| 10/10 [00:07<00:00,  1.25it/s]
100%|██████████| 10/10 [00:06<00:00,  1.47it/s]
100%|██████████| 10/10 [00:08<00:00,  1.24it/s]
100%|██████████| 10/10 [00:07<00:00,  1.43it/s]
100%|██████████| 10/10 [00:07<00:00,  1.26it/s]
100%|██████████| 10/10 [00:08<00:00,  1.23it/s]
100%|██████████| 10/10 [00:07<00:00,  1.38it/s]
100%|██████████| 10/10 [00:08<00:00,  1.18it/s]


Trained and Tested combination 111 of 6635520, taking 8.52 seconds to get val score of 0.1326: 
                {'hidden_dim': 32, 'num_transformer_layers': 1, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 4, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 39}, 
            Current best combo with val score 0.507: 
                    {'hidden_dim': 32, 'num_transformer_layers': 1, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 4, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 8} 


100%|██████████| 10/10 [00:07<00:00,  1.35it/s]
100%|██████████| 10/10 [00:08<00:00,  1.24it/s]
100%|██████████| 10/10 [00:08<00:00,  1.25it/s]
100%|██████████| 10/10 [00:06<00:00,  1.44it/s]
100%|██████████| 10/10 [00:08<00:00,  1.22it/s]
100%|██████████| 10/10 [00:07<00:00,  1.42it/s]
100%|██████████| 10/10 [00:08<00:00,  1.21it/s]
100%|██████████| 10/10 [00:08<00:00,  1.20it/s]
100%|██████████| 10/10 [00:07<00:00,  1.40it/s]
100%|██████████| 10/10 [00:08<00:00,  1.16it/s]


Trained and Tested combination 112 of 6635520, taking 8.66 seconds to get val score of 0.2006: 
                {'hidden_dim': 32, 'num_transformer_layers': 1, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 4, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 40}, 
            Current best combo with val score 0.507: 
                    {'hidden_dim': 32, 'num_transformer_layers': 1, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 4, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 8} 


100%|██████████| 10/10 [00:07<00:00,  1.32it/s]
100%|██████████| 10/10 [00:07<00:00,  1.25it/s]
100%|██████████| 10/10 [00:08<00:00,  1.19it/s]
100%|██████████| 10/10 [00:07<00:00,  1.40it/s]
100%|██████████| 10/10 [00:08<00:00,  1.18it/s]
100%|██████████| 10/10 [00:07<00:00,  1.38it/s]
100%|██████████| 10/10 [00:08<00:00,  1.25it/s]
100%|██████████| 10/10 [00:08<00:00,  1.20it/s]
100%|██████████| 10/10 [00:07<00:00,  1.40it/s]
100%|██████████| 10/10 [00:08<00:00,  1.18it/s]


Trained and Tested combination 113 of 6635520, taking 8.52 seconds to get val score of 0.2418: 
                {'hidden_dim': 32, 'num_transformer_layers': 1, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 4, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 41}, 
            Current best combo with val score 0.507: 
                    {'hidden_dim': 32, 'num_transformer_layers': 1, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 4, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 8} 


100%|██████████| 10/10 [00:08<00:00,  1.21it/s]
100%|██████████| 10/10 [00:08<00:00,  1.24it/s]
100%|██████████| 10/10 [00:08<00:00,  1.13it/s]
100%|██████████| 10/10 [00:07<00:00,  1.35it/s]
100%|██████████| 10/10 [00:08<00:00,  1.19it/s]
100%|██████████| 10/10 [00:08<00:00,  1.18it/s]
100%|██████████| 10/10 [00:07<00:00,  1.39it/s]
100%|██████████| 10/10 [00:08<00:00,  1.18it/s]
100%|██████████| 10/10 [00:07<00:00,  1.32it/s]
100%|██████████| 10/10 [00:07<00:00,  1.26it/s]


Trained and Tested combination 114 of 6635520, taking 8.04 seconds to get val score of 0.2301: 
                {'hidden_dim': 32, 'num_transformer_layers': 1, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 4, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 42}, 
            Current best combo with val score 0.507: 
                    {'hidden_dim': 32, 'num_transformer_layers': 1, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 4, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 8} 


100%|██████████| 10/10 [00:08<00:00,  1.16it/s]
100%|██████████| 10/10 [00:07<00:00,  1.37it/s]
100%|██████████| 10/10 [00:08<00:00,  1.18it/s]
100%|██████████| 10/10 [00:08<00:00,  1.25it/s]
100%|██████████| 10/10 [00:07<00:00,  1.30it/s]
100%|██████████| 10/10 [00:08<00:00,  1.13it/s]
100%|██████████| 10/10 [00:07<00:00,  1.27it/s]
100%|██████████| 10/10 [00:08<00:00,  1.21it/s]
100%|██████████| 10/10 [00:08<00:00,  1.15it/s]
100%|██████████| 10/10 [00:07<00:00,  1.38it/s]


Trained and Tested combination 115 of 6635520, taking 7.3 seconds to get val score of 0.1876: 
                {'hidden_dim': 32, 'num_transformer_layers': 1, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 4, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 43}, 
            Current best combo with val score 0.507: 
                    {'hidden_dim': 32, 'num_transformer_layers': 1, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 4, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 8} 


100%|██████████| 10/10 [00:08<00:00,  1.16it/s]
100%|██████████| 10/10 [00:08<00:00,  1.18it/s]
100%|██████████| 10/10 [00:07<00:00,  1.34it/s]
100%|██████████| 10/10 [00:08<00:00,  1.18it/s]
100%|██████████| 10/10 [00:07<00:00,  1.31it/s]
100%|██████████| 10/10 [00:08<00:00,  1.22it/s]
100%|██████████| 10/10 [00:08<00:00,  1.17it/s]
100%|██████████| 10/10 [00:07<00:00,  1.36it/s]
100%|██████████| 10/10 [00:08<00:00,  1.11it/s]
100%|██████████| 10/10 [00:08<00:00,  1.12it/s]


Trained and Tested combination 116 of 6635520, taking 8.94 seconds to get val score of 0.238: 
                {'hidden_dim': 32, 'num_transformer_layers': 1, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 4, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 44}, 
            Current best combo with val score 0.507: 
                    {'hidden_dim': 32, 'num_transformer_layers': 1, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 4, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 8} 


100%|██████████| 10/10 [00:07<00:00,  1.32it/s]
100%|██████████| 10/10 [00:08<00:00,  1.13it/s]
100%|██████████| 10/10 [00:08<00:00,  1.17it/s]
100%|██████████| 10/10 [00:07<00:00,  1.33it/s]
100%|██████████| 10/10 [00:08<00:00,  1.14it/s]
100%|██████████| 10/10 [00:07<00:00,  1.27it/s]
100%|██████████| 10/10 [00:07<00:00,  1.29it/s]
100%|██████████| 10/10 [00:08<00:00,  1.22it/s]
100%|██████████| 10/10 [00:06<00:00,  1.46it/s]
100%|██████████| 10/10 [00:08<00:00,  1.24it/s]


Trained and Tested combination 117 of 6635520, taking 8.14 seconds to get val score of 0.2685: 
                {'hidden_dim': 32, 'num_transformer_layers': 1, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 4, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 45}, 
            Current best combo with val score 0.507: 
                    {'hidden_dim': 32, 'num_transformer_layers': 1, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 4, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 8} 


100%|██████████| 10/10 [00:07<00:00,  1.32it/s]
100%|██████████| 10/10 [00:07<00:00,  1.32it/s]
100%|██████████| 10/10 [00:08<00:00,  1.23it/s]
100%|██████████| 10/10 [00:07<00:00,  1.41it/s]
100%|██████████| 10/10 [00:08<00:00,  1.21it/s]
100%|██████████| 10/10 [00:07<00:00,  1.35it/s]
100%|██████████| 10/10 [00:08<00:00,  1.20it/s]
100%|██████████| 10/10 [00:08<00:00,  1.22it/s]
100%|██████████| 10/10 [00:07<00:00,  1.42it/s]
100%|██████████| 10/10 [00:08<00:00,  1.23it/s]


Trained and Tested combination 118 of 6635520, taking 8.19 seconds to get val score of 0.179: 
                {'hidden_dim': 32, 'num_transformer_layers': 1, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 4, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 46}, 
            Current best combo with val score 0.507: 
                    {'hidden_dim': 32, 'num_transformer_layers': 1, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 4, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 8} 


100%|██████████| 10/10 [00:08<00:00,  1.24it/s]
100%|██████████| 10/10 [00:07<00:00,  1.42it/s]
100%|██████████| 10/10 [00:08<00:00,  1.22it/s]
100%|██████████| 10/10 [00:07<00:00,  1.41it/s]
100%|██████████| 10/10 [00:08<00:00,  1.22it/s]
100%|██████████| 10/10 [00:08<00:00,  1.25it/s]
100%|██████████| 10/10 [00:07<00:00,  1.42it/s]
100%|██████████| 10/10 [00:08<00:00,  1.22it/s]
100%|██████████| 10/10 [00:07<00:00,  1.39it/s]
100%|██████████| 10/10 [00:08<00:00,  1.25it/s]


Trained and Tested combination 119 of 6635520, taking 8.08 seconds to get val score of 0.1913: 
                {'hidden_dim': 32, 'num_transformer_layers': 1, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 4, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 47}, 
            Current best combo with val score 0.507: 
                    {'hidden_dim': 32, 'num_transformer_layers': 1, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 4, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 8} 

Best combo after this hyperparameter: (1, 0, 0, 0, 1, 1, 1, 2, 0, 1, 8) , NOT UPDATED SINCE LAST HYPERPARAMETER


Round 2 
Hyperparameter: num_transformer_layers (index: 1) 

Already Trained and Tested combination (val score of 0.507):
            {'hidden_dim': 32, 'num_transformer_layers': 1, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 4, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip

100%|██████████| 10/10 [00:02<00:00,  4.56it/s]
100%|██████████| 10/10 [00:02<00:00,  4.58it/s]
100%|██████████| 10/10 [00:03<00:00,  3.21it/s]
100%|██████████| 10/10 [00:02<00:00,  4.61it/s]
100%|██████████| 10/10 [00:02<00:00,  4.60it/s]
100%|██████████| 10/10 [00:02<00:00,  4.44it/s]
100%|██████████| 10/10 [00:02<00:00,  4.32it/s]
100%|██████████| 10/10 [00:02<00:00,  3.43it/s]
100%|██████████| 10/10 [00:02<00:00,  3.79it/s]
100%|██████████| 10/10 [00:02<00:00,  4.49it/s]


Trained and Tested combination 120 of 6635520, taking 2.24 seconds to get val score of 0.5184: 
                {'hidden_dim': 32, 'num_transformer_layers': 2, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 4, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 8}, 
            Current best combo with val score 0.5184: 
                    {'hidden_dim': 32, 'num_transformer_layers': 2, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 4, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 8} 


100%|██████████| 10/10 [00:02<00:00,  3.52it/s]
100%|██████████| 10/10 [00:03<00:00,  3.16it/s]
100%|██████████| 10/10 [00:03<00:00,  3.07it/s]
100%|██████████| 10/10 [00:02<00:00,  3.73it/s]
100%|██████████| 10/10 [00:02<00:00,  3.79it/s]
100%|██████████| 10/10 [00:02<00:00,  3.74it/s]
100%|██████████| 10/10 [00:03<00:00,  2.70it/s]
100%|██████████| 10/10 [00:02<00:00,  3.76it/s]
100%|██████████| 10/10 [00:02<00:00,  3.84it/s]
100%|██████████| 10/10 [00:02<00:00,  3.78it/s]


Trained and Tested combination 121 of 6635520, taking 2.66 seconds to get val score of 0.5372: 
                {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 4, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 8}, 
            Current best combo with val score 0.5372: 
                    {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 4, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 8} 

Best combo after this hyperparameter: (1, 2, 0, 0, 1, 1, 1, 2, 0, 1, 8) , UPDATED SINCE LAST HYPERPARAMETER


Round 2 
Hyperparameter: num_mlp_layers (index: 2) 

Already Trained and Tested combination (val score of 0.5372):
            {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 4, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'f

100%|██████████| 10/10 [00:03<00:00,  2.94it/s]
100%|██████████| 10/10 [00:02<00:00,  3.36it/s]
100%|██████████| 10/10 [00:02<00:00,  3.66it/s]
100%|██████████| 10/10 [00:02<00:00,  3.69it/s]
100%|██████████| 10/10 [00:02<00:00,  3.45it/s]
100%|██████████| 10/10 [00:03<00:00,  2.93it/s]
100%|██████████| 10/10 [00:02<00:00,  3.75it/s]
100%|██████████| 10/10 [00:02<00:00,  3.78it/s]
100%|██████████| 10/10 [00:02<00:00,  3.84it/s]
100%|██████████| 10/10 [00:03<00:00,  2.81it/s]


Trained and Tested combination 122 of 6635520, taking 3.58 seconds to get val score of 0.5372: 
                {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 2, 'dropout': 0, 'batch_size': 128, 'nhead': 4, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 8}, 
            Current best combo with val score 0.5372: 
                    {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 4, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 8} 


100%|██████████| 10/10 [00:02<00:00,  3.64it/s]
100%|██████████| 10/10 [00:02<00:00,  3.70it/s]
100%|██████████| 10/10 [00:02<00:00,  3.69it/s]
100%|██████████| 10/10 [00:03<00:00,  3.11it/s]
100%|██████████| 10/10 [00:03<00:00,  3.04it/s]
100%|██████████| 10/10 [00:02<00:00,  3.73it/s]
100%|██████████| 10/10 [00:02<00:00,  3.75it/s]
100%|██████████| 10/10 [00:02<00:00,  3.65it/s]
100%|██████████| 10/10 [00:03<00:00,  2.76it/s]
100%|██████████| 10/10 [00:02<00:00,  3.74it/s]


Trained and Tested combination 123 of 6635520, taking 2.69 seconds to get val score of 0.4875: 
                {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 3, 'dropout': 0, 'batch_size': 128, 'nhead': 4, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 8}, 
            Current best combo with val score 0.5372: 
                    {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 4, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 8} 

Best combo after this hyperparameter: (1, 2, 0, 0, 1, 1, 1, 2, 0, 1, 8) , NOT UPDATED SINCE LAST HYPERPARAMETER


Round 2 
Hyperparameter: nhead (index: 5) 



100%|██████████| 10/10 [00:02<00:00,  3.77it/s]
100%|██████████| 10/10 [00:02<00:00,  3.78it/s]
100%|██████████| 10/10 [00:03<00:00,  3.04it/s]
100%|██████████| 10/10 [00:02<00:00,  3.37it/s]
100%|██████████| 10/10 [00:02<00:00,  3.79it/s]
100%|██████████| 10/10 [00:02<00:00,  3.72it/s]
100%|██████████| 10/10 [00:02<00:00,  3.60it/s]
100%|██████████| 10/10 [00:03<00:00,  2.79it/s]
100%|██████████| 10/10 [00:02<00:00,  3.82it/s]
100%|██████████| 10/10 [00:02<00:00,  3.83it/s]


Trained and Tested combination 124 of 6635520, taking 2.63 seconds to get val score of 0.5421: 
                {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 2, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 8}, 
            Current best combo with val score 0.5421: 
                    {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 2, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 8} 
Already Trained and Tested combination (val score of 0.5372):
            {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 4, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 8}
            Current best combo (with val score 0.5421):
                    {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropo

100%|██████████| 10/10 [00:02<00:00,  3.79it/s]
100%|██████████| 10/10 [00:03<00:00,  2.89it/s]
100%|██████████| 10/10 [00:02<00:00,  3.47it/s]
100%|██████████| 10/10 [00:02<00:00,  3.82it/s]
100%|██████████| 10/10 [00:02<00:00,  3.71it/s]
100%|██████████| 10/10 [00:02<00:00,  3.44it/s]
100%|██████████| 10/10 [00:03<00:00,  2.96it/s]
100%|██████████| 10/10 [00:02<00:00,  3.73it/s]
100%|██████████| 10/10 [00:02<00:00,  3.75it/s]
100%|██████████| 10/10 [00:02<00:00,  3.80it/s]


Trained and Tested combination 125 of 6635520, taking 2.65 seconds to get val score of 0.5476: 
                {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 8}, 
            Current best combo with val score 0.5476: 
                    {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 8} 


100%|██████████| 10/10 [00:03<00:00,  2.67it/s]
100%|██████████| 10/10 [00:02<00:00,  3.72it/s]
100%|██████████| 10/10 [00:02<00:00,  3.73it/s]
100%|██████████| 10/10 [00:02<00:00,  3.76it/s]
100%|██████████| 10/10 [00:03<00:00,  3.12it/s]
100%|██████████| 10/10 [00:03<00:00,  3.24it/s]
100%|██████████| 10/10 [00:02<00:00,  3.80it/s]
100%|██████████| 10/10 [00:02<00:00,  3.75it/s]
100%|██████████| 10/10 [00:02<00:00,  3.65it/s]
100%|██████████| 10/10 [00:03<00:00,  2.80it/s]


Trained and Tested combination 126 of 6635520, taking 3.6 seconds to get val score of 0.5415: 
                {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 16, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 8}, 
            Current best combo with val score 0.5476: 
                    {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 8} 

Best combo after this hyperparameter: (1, 2, 0, 0, 1, 2, 1, 2, 0, 1, 8) , UPDATED SINCE LAST HYPERPARAMETER


Round 2 
Hyperparameter: hidden_dim (index: 0) 



100%|██████████| 10/10 [00:02<00:00,  3.79it/s]
100%|██████████| 10/10 [00:02<00:00,  3.75it/s]
100%|██████████| 10/10 [00:02<00:00,  3.78it/s]
100%|██████████| 10/10 [00:03<00:00,  2.90it/s]
100%|██████████| 10/10 [00:02<00:00,  3.42it/s]
100%|██████████| 10/10 [00:02<00:00,  3.75it/s]
100%|██████████| 10/10 [00:02<00:00,  3.77it/s]
100%|██████████| 10/10 [00:03<00:00,  3.28it/s]
100%|██████████| 10/10 [00:03<00:00,  2.88it/s]
100%|██████████| 10/10 [00:02<00:00,  3.62it/s]


Trained and Tested combination 127 of 6635520, taking 2.78 seconds to get val score of 0.5313: 
                {'hidden_dim': 16, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 8}, 
            Current best combo with val score 0.5476: 
                    {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 8} 
Already Trained and Tested combination (val score of 0.5476):
            {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 8}
            Current best combo (with val score 0.5476):
                    {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropo

100%|██████████| 10/10 [00:02<00:00,  3.57it/s]
100%|██████████| 10/10 [00:02<00:00,  3.51it/s]
100%|██████████| 10/10 [00:03<00:00,  2.67it/s]
100%|██████████| 10/10 [00:02<00:00,  3.62it/s]
100%|██████████| 10/10 [00:02<00:00,  3.67it/s]
100%|██████████| 10/10 [00:02<00:00,  3.71it/s]
100%|██████████| 10/10 [00:03<00:00,  2.79it/s]
100%|██████████| 10/10 [00:02<00:00,  3.54it/s]
100%|██████████| 10/10 [00:02<00:00,  3.69it/s]
100%|██████████| 10/10 [00:02<00:00,  3.69it/s]


Trained and Tested combination 128 of 6635520, taking 2.73 seconds to get val score of 0.4805: 
                {'hidden_dim': 64, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 8}, 
            Current best combo with val score 0.5476: 
                    {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 8} 


100%|██████████| 10/10 [00:03<00:00,  2.93it/s]
100%|██████████| 10/10 [00:03<00:00,  3.00it/s]
100%|██████████| 10/10 [00:02<00:00,  3.44it/s]
100%|██████████| 10/10 [00:02<00:00,  3.49it/s]
100%|██████████| 10/10 [00:03<00:00,  3.11it/s]
100%|██████████| 10/10 [00:03<00:00,  2.95it/s]
100%|██████████| 10/10 [00:02<00:00,  3.54it/s]
100%|██████████| 10/10 [00:02<00:00,  3.55it/s]
100%|██████████| 10/10 [00:03<00:00,  3.31it/s]
100%|██████████| 10/10 [00:03<00:00,  2.83it/s]


Trained and Tested combination 129 of 6635520, taking 3.55 seconds to get val score of 0.4146: 
                {'hidden_dim': 256, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 8}, 
            Current best combo with val score 0.5476: 
                    {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 8} 

Best combo after this hyperparameter: (1, 2, 0, 0, 1, 2, 1, 2, 0, 1, 8) , NOT UPDATED SINCE LAST HYPERPARAMETER


Round 2 
Hyperparameter: dropout (index: 3) 

Already Trained and Tested combination (val score of 0.5476):
            {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'fea

100%|██████████| 10/10 [00:02<00:00,  3.39it/s]
100%|██████████| 10/10 [00:02<00:00,  3.42it/s]
100%|██████████| 10/10 [00:03<00:00,  3.20it/s]
100%|██████████| 10/10 [00:03<00:00,  2.76it/s]
100%|██████████| 10/10 [00:02<00:00,  3.47it/s]
100%|██████████| 10/10 [00:02<00:00,  3.48it/s]
100%|██████████| 10/10 [00:03<00:00,  3.33it/s]
100%|██████████| 10/10 [00:03<00:00,  2.67it/s]
100%|██████████| 10/10 [00:02<00:00,  3.37it/s]
100%|██████████| 10/10 [00:02<00:00,  3.48it/s]


Trained and Tested combination 130 of 6635520, taking 2.89 seconds to get val score of 0.5237: 
                {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0.05, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 8}, 
            Current best combo with val score 0.5476: 
                    {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 8} 


100%|██████████| 10/10 [00:03<00:00,  3.32it/s]
100%|██████████| 10/10 [00:03<00:00,  2.55it/s]
100%|██████████| 10/10 [00:02<00:00,  3.48it/s]
100%|██████████| 10/10 [00:02<00:00,  3.49it/s]
100%|██████████| 10/10 [00:02<00:00,  3.46it/s]
100%|██████████| 10/10 [00:03<00:00,  2.57it/s]
100%|██████████| 10/10 [00:02<00:00,  3.50it/s]
100%|██████████| 10/10 [00:02<00:00,  3.47it/s]
100%|██████████| 10/10 [00:02<00:00,  3.51it/s]
100%|██████████| 10/10 [00:03<00:00,  2.57it/s]


Trained and Tested combination 131 of 6635520, taking 3.91 seconds to get val score of 0.5131: 
                {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0.1, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 8}, 
            Current best combo with val score 0.5476: 
                    {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 8} 


100%|██████████| 10/10 [00:02<00:00,  3.44it/s]
100%|██████████| 10/10 [00:02<00:00,  3.50it/s]
100%|██████████| 10/10 [00:02<00:00,  3.50it/s]
100%|██████████| 10/10 [00:03<00:00,  2.69it/s]
100%|██████████| 10/10 [00:02<00:00,  3.34it/s]
100%|██████████| 10/10 [00:02<00:00,  3.48it/s]
100%|██████████| 10/10 [00:02<00:00,  3.50it/s]
100%|██████████| 10/10 [00:03<00:00,  2.92it/s]
100%|██████████| 10/10 [00:03<00:00,  3.03it/s]
100%|██████████| 10/10 [00:02<00:00,  3.39it/s]


Trained and Tested combination 132 of 6635520, taking 2.97 seconds to get val score of 0.493: 
                {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0.2, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 8}, 
            Current best combo with val score 0.5476: 
                    {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 8} 

Best combo after this hyperparameter: (1, 2, 0, 0, 1, 2, 1, 2, 0, 1, 8) , NOT UPDATED SINCE LAST HYPERPARAMETER


Round 2 
Hyperparameter: batchnorm (index: 8) 

Already Trained and Tested combination (val score of 0.5476):
            {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'f

100%|██████████| 10/10 [00:03<00:00,  3.17it/s]
100%|██████████| 10/10 [00:03<00:00,  2.63it/s]
100%|██████████| 10/10 [00:03<00:00,  2.98it/s]
100%|██████████| 10/10 [00:03<00:00,  3.22it/s]
100%|██████████| 10/10 [00:03<00:00,  3.24it/s]
100%|██████████| 10/10 [00:04<00:00,  2.35it/s]
100%|██████████| 10/10 [00:03<00:00,  3.23it/s]
100%|██████████| 10/10 [00:03<00:00,  3.18it/s]
100%|██████████| 10/10 [00:03<00:00,  3.18it/s]
100%|██████████| 10/10 [00:04<00:00,  2.41it/s]


Trained and Tested combination 133 of 6635520, taking 4.18 seconds to get val score of 0.4864: 
                {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': True, 'grad_clip': True, 'features': 8}, 
            Current best combo with val score 0.5476: 
                    {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 8} 

Best combo after this hyperparameter: (1, 2, 0, 0, 1, 2, 1, 2, 0, 1, 8) , NOT UPDATED SINCE LAST HYPERPARAMETER


Round 2 
Hyperparameter: grad_clip (index: 9) 



100%|██████████| 10/10 [00:02<00:00,  4.01it/s]
100%|██████████| 10/10 [00:02<00:00,  4.00it/s]
100%|██████████| 10/10 [00:02<00:00,  3.98it/s]
100%|██████████| 10/10 [00:03<00:00,  3.32it/s]
100%|██████████| 10/10 [00:03<00:00,  3.31it/s]
100%|██████████| 10/10 [00:02<00:00,  4.00it/s]
100%|██████████| 10/10 [00:02<00:00,  4.08it/s]
100%|██████████| 10/10 [00:02<00:00,  3.93it/s]
100%|██████████| 10/10 [00:03<00:00,  2.96it/s]
100%|██████████| 10/10 [00:02<00:00,  3.74it/s]


Trained and Tested combination 134 of 6635520, taking 2.7 seconds to get val score of 0.5327: 
                {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': False, 'features': 8}, 
            Current best combo with val score 0.5476: 
                    {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 8} 
Already Trained and Tested combination (val score of 0.5476):
            {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 8}
            Current best combo (with val score 0.5476):
                    {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropo

100%|██████████| 10/10 [00:02<00:00,  3.72it/s]
100%|██████████| 10/10 [00:02<00:00,  3.76it/s]
100%|██████████| 10/10 [00:02<00:00,  3.45it/s]
100%|██████████| 10/10 [00:03<00:00,  2.89it/s]
100%|██████████| 10/10 [00:02<00:00,  3.74it/s]
100%|██████████| 10/10 [00:02<00:00,  3.77it/s]
100%|██████████| 10/10 [00:02<00:00,  3.72it/s]
100%|██████████| 10/10 [00:03<00:00,  2.76it/s]
100%|██████████| 10/10 [00:02<00:00,  3.68it/s]
100%|██████████| 10/10 [00:02<00:00,  3.82it/s]


Trained and Tested combination 135 of 6635520, taking 2.63 seconds to get val score of 0.0: 
                {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 1e-05, 'batchnorm': False, 'grad_clip': True, 'features': 8}, 
            Current best combo with val score 0.5476: 
                    {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 8} 


100%|██████████| 10/10 [00:02<00:00,  3.79it/s]
100%|██████████| 10/10 [00:03<00:00,  3.25it/s]
100%|██████████| 10/10 [00:03<00:00,  3.04it/s]
100%|██████████| 10/10 [00:02<00:00,  3.78it/s]
100%|██████████| 10/10 [00:02<00:00,  3.78it/s]
100%|██████████| 10/10 [00:02<00:00,  3.83it/s]
100%|██████████| 10/10 [00:03<00:00,  2.78it/s]
100%|██████████| 10/10 [00:02<00:00,  3.78it/s]
100%|██████████| 10/10 [00:02<00:00,  3.75it/s]
100%|██████████| 10/10 [00:02<00:00,  3.78it/s]


Trained and Tested combination 136 of 6635520, taking 2.66 seconds to get val score of 0.4814: 
                {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.0001, 'batchnorm': False, 'grad_clip': True, 'features': 8}, 
            Current best combo with val score 0.5476: 
                    {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 8} 
Already Trained and Tested combination (val score of 0.5476):
            {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 8}
            Current best combo (with val score 0.5476):
                    {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'drop

100%|██████████| 10/10 [00:03<00:00,  3.19it/s]
100%|██████████| 10/10 [00:03<00:00,  3.17it/s]
100%|██████████| 10/10 [00:02<00:00,  3.77it/s]
100%|██████████| 10/10 [00:02<00:00,  3.80it/s]
100%|██████████| 10/10 [00:02<00:00,  3.74it/s]
100%|██████████| 10/10 [00:03<00:00,  2.78it/s]
100%|██████████| 10/10 [00:02<00:00,  3.82it/s]
100%|██████████| 10/10 [00:02<00:00,  3.80it/s]
100%|██████████| 10/10 [00:02<00:00,  3.81it/s]
100%|██████████| 10/10 [00:03<00:00,  3.14it/s]


Trained and Tested combination 137 of 6635520, taking 3.2 seconds to get val score of 0.2927: 
                {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.01, 'batchnorm': False, 'grad_clip': True, 'features': 8}, 
            Current best combo with val score 0.5476: 
                    {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 8} 


100%|██████████| 10/10 [00:03<00:00,  3.23it/s]
100%|██████████| 10/10 [00:02<00:00,  3.80it/s]
100%|██████████| 10/10 [00:02<00:00,  3.81it/s]
100%|██████████| 10/10 [00:02<00:00,  3.58it/s]
100%|██████████| 10/10 [00:03<00:00,  2.83it/s]
100%|██████████| 10/10 [00:02<00:00,  3.84it/s]
100%|██████████| 10/10 [00:02<00:00,  3.79it/s]
100%|██████████| 10/10 [00:02<00:00,  3.76it/s]
100%|██████████| 10/10 [00:03<00:00,  3.04it/s]
100%|██████████| 10/10 [00:02<00:00,  3.42it/s]


Trained and Tested combination 138 of 6635520, taking 2.94 seconds to get val score of 0.0: 
                {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.1, 'batchnorm': False, 'grad_clip': True, 'features': 8}, 
            Current best combo with val score 0.5476: 
                    {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 8} 

Best combo after this hyperparameter: (1, 2, 0, 0, 1, 2, 1, 2, 0, 1, 8) , NOT UPDATED SINCE LAST HYPERPARAMETER


Round 2 
Hyperparameter: epochs (index: 6) 



100%|██████████| 5/5 [00:01<00:00,  3.71it/s]
100%|██████████| 5/5 [00:01<00:00,  3.67it/s]
100%|██████████| 5/5 [00:01<00:00,  3.75it/s]
100%|██████████| 5/5 [00:01<00:00,  3.76it/s]
100%|██████████| 5/5 [00:01<00:00,  3.85it/s]
100%|██████████| 5/5 [00:01<00:00,  2.75it/s]
100%|██████████| 5/5 [00:01<00:00,  2.57it/s]
100%|██████████| 5/5 [00:01<00:00,  3.70it/s]
100%|██████████| 5/5 [00:01<00:00,  3.80it/s]
100%|██████████| 5/5 [00:01<00:00,  3.74it/s]


Trained and Tested combination 139 of 6635520, taking 1.35 seconds to get val score of 0.4753: 
                {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 5, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 8}, 
            Current best combo with val score 0.5476: 
                    {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 8} 
Already Trained and Tested combination (val score of 0.5476):
            {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 8}
            Current best combo (with val score 0.5476):
                    {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropou

100%|██████████| 20/20 [00:05<00:00,  3.52it/s]
100%|██████████| 20/20 [00:06<00:00,  3.21it/s]
100%|██████████| 20/20 [00:05<00:00,  3.76it/s]
100%|██████████| 20/20 [00:06<00:00,  3.10it/s]
100%|██████████| 20/20 [00:05<00:00,  3.77it/s]
100%|██████████| 20/20 [00:06<00:00,  3.09it/s]
100%|██████████| 20/20 [00:05<00:00,  3.76it/s]
100%|██████████| 20/20 [00:05<00:00,  3.46it/s]
100%|██████████| 20/20 [00:05<00:00,  3.37it/s]
100%|██████████| 20/20 [00:05<00:00,  3.78it/s]


Trained and Tested combination 140 of 6635520, taking 5.31 seconds to get val score of 0.5183: 
                {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 20, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 8}, 
            Current best combo with val score 0.5476: 
                    {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 8} 


100%|██████████| 30/30 [00:09<00:00,  3.29it/s]
100%|██████████| 30/30 [00:08<00:00,  3.34it/s]
100%|██████████| 30/30 [00:07<00:00,  3.77it/s]
100%|██████████| 30/30 [00:09<00:00,  3.30it/s]
100%|██████████| 30/30 [00:08<00:00,  3.34it/s]
100%|██████████| 30/30 [00:07<00:00,  3.85it/s]
100%|██████████| 30/30 [00:08<00:00,  3.35it/s]
100%|██████████| 30/30 [00:08<00:00,  3.44it/s]
100%|██████████| 30/30 [00:07<00:00,  3.78it/s]
100%|██████████| 30/30 [00:08<00:00,  3.34it/s]


Trained and Tested combination 141 of 6635520, taking 8.99 seconds to get val score of 0.5145: 
                {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 30, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 8}, 
            Current best combo with val score 0.5476: 
                    {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 8} 

Best combo after this hyperparameter: (1, 2, 0, 0, 1, 2, 1, 2, 0, 1, 8) , NOT UPDATED SINCE LAST HYPERPARAMETER


Round 2 
Hyperparameter: batch_size (index: 4) 



100%|██████████| 10/10 [00:10<00:00,  1.09s/it]
100%|██████████| 10/10 [00:09<00:00,  1.01it/s]
100%|██████████| 10/10 [00:10<00:00,  1.03s/it]
100%|██████████| 10/10 [00:10<00:00,  1.09s/it]
100%|██████████| 10/10 [00:10<00:00,  1.06s/it]
100%|██████████| 10/10 [00:10<00:00,  1.07s/it]
100%|██████████| 10/10 [00:10<00:00,  1.01s/it]
100%|██████████| 10/10 [00:10<00:00,  1.01s/it]
100%|██████████| 10/10 [00:10<00:00,  1.06s/it]
100%|██████████| 10/10 [00:10<00:00,  1.06s/it]


Trained and Tested combination 142 of 6635520, taking 10.64 seconds to get val score of 0.5271: 
                {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 32, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 8}, 
            Current best combo with val score 0.5476: 
                    {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 8} 
Already Trained and Tested combination (val score of 0.5476):
            {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 8}
            Current best combo (with val score 0.5476):
                    {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropo

100%|██████████| 10/10 [00:02<00:00,  4.59it/s]
100%|██████████| 10/10 [00:01<00:00,  8.41it/s]
100%|██████████| 10/10 [00:01<00:00,  8.38it/s]
100%|██████████| 10/10 [00:01<00:00,  8.43it/s]
100%|██████████| 10/10 [00:01<00:00,  7.00it/s]
100%|██████████| 10/10 [00:01<00:00,  7.01it/s]
100%|██████████| 10/10 [00:01<00:00,  7.84it/s]
100%|██████████| 10/10 [00:01<00:00,  8.32it/s]
100%|██████████| 10/10 [00:01<00:00,  8.43it/s]
100%|██████████| 10/10 [00:01<00:00,  8.41it/s]


Trained and Tested combination 143 of 6635520, taking 1.2 seconds to get val score of 0.4871: 
                {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 512, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 8}, 
            Current best combo with val score 0.5476: 
                    {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 8} 

Best combo after this hyperparameter: (1, 2, 0, 0, 1, 2, 1, 2, 0, 1, 8) , NOT UPDATED SINCE LAST HYPERPARAMETER


ROUND 3

Round 3 
Hyperparameter: features (index: 10) 



100%|██████████| 10/10 [00:01<00:00,  5.11it/s]
100%|██████████| 10/10 [00:01<00:00,  5.10it/s]
100%|██████████| 10/10 [00:02<00:00,  4.24it/s]
100%|██████████| 10/10 [00:02<00:00,  3.79it/s]
100%|██████████| 10/10 [00:01<00:00,  5.14it/s]
100%|██████████| 10/10 [00:01<00:00,  5.12it/s]
100%|██████████| 10/10 [00:01<00:00,  5.26it/s]
100%|██████████| 10/10 [00:01<00:00,  5.19it/s]
100%|██████████| 10/10 [00:02<00:00,  4.30it/s]
100%|██████████| 10/10 [00:02<00:00,  3.87it/s]


Trained and Tested combination 144 of 6635520, taking 2.6 seconds to get val score of 0.0: 
                {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 0}, 
            Current best combo with val score 0.5476: 
                    {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 8} 


100%|██████████| 10/10 [00:02<00:00,  4.70it/s]
100%|██████████| 10/10 [00:02<00:00,  4.69it/s]
100%|██████████| 10/10 [00:02<00:00,  4.68it/s]
100%|██████████| 10/10 [00:02<00:00,  4.68it/s]
100%|██████████| 10/10 [00:02<00:00,  3.68it/s]
100%|██████████| 10/10 [00:02<00:00,  3.89it/s]
100%|██████████| 10/10 [00:02<00:00,  4.65it/s]
100%|██████████| 10/10 [00:02<00:00,  4.75it/s]
100%|██████████| 10/10 [00:02<00:00,  4.65it/s]
100%|██████████| 10/10 [00:02<00:00,  4.50it/s]


Trained and Tested combination 145 of 6635520, taking 2.23 seconds to get val score of 0.0: 
                {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 1}, 
            Current best combo with val score 0.5476: 
                    {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 8} 


100%|██████████| 10/10 [00:03<00:00,  3.24it/s]
100%|██████████| 10/10 [00:02<00:00,  4.55it/s]
100%|██████████| 10/10 [00:02<00:00,  4.52it/s]
100%|██████████| 10/10 [00:02<00:00,  4.57it/s]
100%|██████████| 10/10 [00:02<00:00,  4.62it/s]
100%|██████████| 10/10 [00:02<00:00,  3.61it/s]
100%|██████████| 10/10 [00:02<00:00,  3.83it/s]
100%|██████████| 10/10 [00:02<00:00,  4.49it/s]
100%|██████████| 10/10 [00:02<00:00,  4.64it/s]
100%|██████████| 10/10 [00:02<00:00,  4.59it/s]


Trained and Tested combination 146 of 6635520, taking 2.19 seconds to get val score of 0.0: 
                {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 2}, 
            Current best combo with val score 0.5476: 
                    {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 8} 


100%|██████████| 10/10 [00:02<00:00,  3.83it/s]
100%|██████████| 10/10 [00:02<00:00,  3.38it/s]
100%|██████████| 10/10 [00:02<00:00,  4.38it/s]
100%|██████████| 10/10 [00:02<00:00,  4.47it/s]
100%|██████████| 10/10 [00:02<00:00,  4.46it/s]
100%|██████████| 10/10 [00:02<00:00,  4.09it/s]
100%|██████████| 10/10 [00:03<00:00,  3.17it/s]
100%|██████████| 10/10 [00:02<00:00,  4.41it/s]
100%|██████████| 10/10 [00:02<00:00,  4.48it/s]
100%|██████████| 10/10 [00:02<00:00,  4.39it/s]


Trained and Tested combination 147 of 6635520, taking 2.3 seconds to get val score of 0.0: 
                {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 3}, 
            Current best combo with val score 0.5476: 
                    {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 8} 


100%|██████████| 10/10 [00:02<00:00,  4.20it/s]
100%|██████████| 10/10 [00:03<00:00,  3.04it/s]
100%|██████████| 10/10 [00:02<00:00,  4.28it/s]
100%|██████████| 10/10 [00:02<00:00,  4.21it/s]
100%|██████████| 10/10 [00:02<00:00,  4.17it/s]
100%|██████████| 10/10 [00:02<00:00,  4.31it/s]
100%|██████████| 10/10 [00:03<00:00,  3.04it/s]
100%|██████████| 10/10 [00:02<00:00,  4.23it/s]
100%|██████████| 10/10 [00:02<00:00,  4.22it/s]
100%|██████████| 10/10 [00:02<00:00,  4.20it/s]


Trained and Tested combination 148 of 6635520, taking 2.4 seconds to get val score of 0.0: 
                {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 4}, 
            Current best combo with val score 0.5476: 
                    {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 8} 


100%|██████████| 10/10 [00:02<00:00,  3.93it/s]
100%|██████████| 10/10 [00:03<00:00,  2.95it/s]
100%|██████████| 10/10 [00:02<00:00,  4.13it/s]
100%|██████████| 10/10 [00:02<00:00,  4.12it/s]
100%|██████████| 10/10 [00:02<00:00,  4.16it/s]
100%|██████████| 10/10 [00:02<00:00,  3.86it/s]
100%|██████████| 10/10 [00:03<00:00,  3.05it/s]
100%|██████████| 10/10 [00:02<00:00,  4.18it/s]
100%|██████████| 10/10 [00:02<00:00,  4.17it/s]
100%|██████████| 10/10 [00:02<00:00,  4.18it/s]


Trained and Tested combination 149 of 6635520, taking 2.41 seconds to get val score of 0.4797: 
                {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 5}, 
            Current best combo with val score 0.5476: 
                    {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 8} 


100%|██████████| 10/10 [00:02<00:00,  3.47it/s]
100%|██████████| 10/10 [00:03<00:00,  3.18it/s]
100%|██████████| 10/10 [00:02<00:00,  4.03it/s]
100%|██████████| 10/10 [00:02<00:00,  4.08it/s]
100%|██████████| 10/10 [00:02<00:00,  4.01it/s]
100%|██████████| 10/10 [00:03<00:00,  3.29it/s]
100%|██████████| 10/10 [00:02<00:00,  3.39it/s]
100%|██████████| 10/10 [00:02<00:00,  4.04it/s]
100%|██████████| 10/10 [00:02<00:00,  4.10it/s]
100%|██████████| 10/10 [00:02<00:00,  4.08it/s]


Trained and Tested combination 150 of 6635520, taking 2.47 seconds to get val score of 0.5161: 
                {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 6}, 
            Current best combo with val score 0.5476: 
                    {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 8} 


100%|██████████| 10/10 [00:03<00:00,  2.87it/s]
100%|██████████| 10/10 [00:02<00:00,  3.66it/s]
100%|██████████| 10/10 [00:02<00:00,  3.87it/s]
100%|██████████| 10/10 [00:02<00:00,  3.87it/s]
100%|██████████| 10/10 [00:02<00:00,  3.54it/s]
100%|██████████| 10/10 [00:03<00:00,  3.01it/s]
100%|██████████| 10/10 [00:02<00:00,  3.89it/s]
100%|██████████| 10/10 [00:02<00:00,  3.89it/s]
100%|██████████| 10/10 [00:02<00:00,  3.90it/s]
100%|██████████| 10/10 [00:03<00:00,  3.02it/s]


Trained and Tested combination 151 of 6635520, taking 3.33 seconds to get val score of 0.4941: 
                {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 7}, 
            Current best combo with val score 0.5476: 
                    {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 8} 
Already Trained and Tested combination (val score of 0.5476):
            {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 8}
            Current best combo (with val score 0.5476):
                    {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropo

100%|██████████| 10/10 [00:05<00:00,  1.76it/s]
100%|██████████| 10/10 [00:05<00:00,  1.82it/s]
100%|██████████| 10/10 [00:06<00:00,  1.66it/s]
100%|██████████| 10/10 [00:05<00:00,  1.91it/s]
100%|██████████| 10/10 [00:06<00:00,  1.58it/s]
100%|██████████| 10/10 [00:05<00:00,  1.90it/s]
100%|██████████| 10/10 [00:06<00:00,  1.59it/s]
100%|██████████| 10/10 [00:05<00:00,  1.91it/s]
100%|██████████| 10/10 [00:05<00:00,  1.72it/s]
100%|██████████| 10/10 [00:05<00:00,  1.76it/s]


Trained and Tested combination 152 of 6635520, taking 5.73 seconds to get val score of 0.4197: 
                {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 9}, 
            Current best combo with val score 0.5476: 
                    {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 8} 


100%|██████████| 10/10 [00:05<00:00,  1.81it/s]
100%|██████████| 10/10 [00:06<00:00,  1.63it/s]
100%|██████████| 10/10 [00:05<00:00,  1.94it/s]
100%|██████████| 10/10 [00:06<00:00,  1.56it/s]
100%|██████████| 10/10 [00:05<00:00,  1.92it/s]
100%|██████████| 10/10 [00:06<00:00,  1.53it/s]
100%|██████████| 10/10 [00:05<00:00,  1.91it/s]
100%|██████████| 10/10 [00:05<00:00,  1.70it/s]
100%|██████████| 10/10 [00:05<00:00,  1.82it/s]
100%|██████████| 10/10 [00:05<00:00,  1.88it/s]


Trained and Tested combination 153 of 6635520, taking 5.34 seconds to get val score of 0.1765: 
                {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 10}, 
            Current best combo with val score 0.5476: 
                    {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 8} 


100%|██████████| 10/10 [00:06<00:00,  1.59it/s]
100%|██████████| 10/10 [00:05<00:00,  1.89it/s]
100%|██████████| 10/10 [00:06<00:00,  1.56it/s]
100%|██████████| 10/10 [00:05<00:00,  1.90it/s]
100%|██████████| 10/10 [00:06<00:00,  1.53it/s]
100%|██████████| 10/10 [00:05<00:00,  1.89it/s]
100%|██████████| 10/10 [00:06<00:00,  1.61it/s]
100%|██████████| 10/10 [00:05<00:00,  1.91it/s]
100%|██████████| 10/10 [00:05<00:00,  1.72it/s]
100%|██████████| 10/10 [00:05<00:00,  1.71it/s]


Trained and Tested combination 154 of 6635520, taking 5.91 seconds to get val score of 0.0727: 
                {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 11}, 
            Current best combo with val score 0.5476: 
                    {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 8} 


100%|██████████| 10/10 [00:05<00:00,  1.85it/s]
100%|██████████| 10/10 [00:06<00:00,  1.54it/s]
100%|██████████| 10/10 [00:05<00:00,  1.88it/s]
100%|██████████| 10/10 [00:06<00:00,  1.54it/s]
100%|██████████| 10/10 [00:05<00:00,  1.87it/s]
100%|██████████| 10/10 [00:06<00:00,  1.55it/s]
100%|██████████| 10/10 [00:05<00:00,  1.89it/s]
100%|██████████| 10/10 [00:06<00:00,  1.57it/s]
100%|██████████| 10/10 [00:05<00:00,  1.88it/s]
100%|██████████| 10/10 [00:05<00:00,  1.72it/s]


Trained and Tested combination 155 of 6635520, taking 5.86 seconds to get val score of 0.0357: 
                {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 12}, 
            Current best combo with val score 0.5476: 
                    {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 8} 


100%|██████████| 10/10 [00:05<00:00,  1.67it/s]
100%|██████████| 10/10 [00:05<00:00,  1.77it/s]
100%|██████████| 10/10 [00:06<00:00,  1.58it/s]
100%|██████████| 10/10 [00:05<00:00,  1.83it/s]
100%|██████████| 10/10 [00:06<00:00,  1.52it/s]
100%|██████████| 10/10 [00:05<00:00,  1.82it/s]
100%|██████████| 10/10 [00:06<00:00,  1.49it/s]
100%|██████████| 10/10 [00:05<00:00,  1.83it/s]
100%|██████████| 10/10 [00:06<00:00,  1.49it/s]
100%|██████████| 10/10 [00:05<00:00,  1.81it/s]


Trained and Tested combination 156 of 6635520, taking 5.56 seconds to get val score of 0.057: 
                {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 13}, 
            Current best combo with val score 0.5476: 
                    {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 8} 


100%|██████████| 10/10 [00:06<00:00,  1.48it/s]
100%|██████████| 10/10 [00:05<00:00,  1.78it/s]
100%|██████████| 10/10 [00:06<00:00,  1.51it/s]
100%|██████████| 10/10 [00:05<00:00,  1.76it/s]
100%|██████████| 10/10 [00:06<00:00,  1.47it/s]
100%|██████████| 10/10 [00:05<00:00,  1.77it/s]
100%|██████████| 10/10 [00:06<00:00,  1.60it/s]
100%|██████████| 10/10 [00:05<00:00,  1.75it/s]
100%|██████████| 10/10 [00:05<00:00,  1.69it/s]
100%|██████████| 10/10 [00:06<00:00,  1.62it/s]


Trained and Tested combination 157 of 6635520, taking 6.22 seconds to get val score of 0.0852: 
                {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 14}, 
            Current best combo with val score 0.5476: 
                    {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 8} 


100%|██████████| 10/10 [00:05<00:00,  1.74it/s]
100%|██████████| 10/10 [00:06<00:00,  1.53it/s]
100%|██████████| 10/10 [00:05<00:00,  1.76it/s]
100%|██████████| 10/10 [00:06<00:00,  1.48it/s]
100%|██████████| 10/10 [00:05<00:00,  1.80it/s]
100%|██████████| 10/10 [00:06<00:00,  1.50it/s]
100%|██████████| 10/10 [00:05<00:00,  1.79it/s]
100%|██████████| 10/10 [00:06<00:00,  1.48it/s]
100%|██████████| 10/10 [00:05<00:00,  1.81it/s]
100%|██████████| 10/10 [00:06<00:00,  1.50it/s]


Trained and Tested combination 158 of 6635520, taking 6.71 seconds to get val score of 0.0298: 
                {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 15}, 
            Current best combo with val score 0.5476: 
                    {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 8} 


100%|██████████| 10/10 [00:05<00:00,  1.76it/s]
100%|██████████| 10/10 [00:06<00:00,  1.47it/s]
100%|██████████| 10/10 [00:05<00:00,  1.76it/s]
100%|██████████| 10/10 [00:06<00:00,  1.46it/s]
100%|██████████| 10/10 [00:05<00:00,  1.77it/s]
100%|██████████| 10/10 [00:06<00:00,  1.48it/s]
100%|██████████| 10/10 [00:05<00:00,  1.78it/s]
100%|██████████| 10/10 [00:06<00:00,  1.51it/s]
100%|██████████| 10/10 [00:05<00:00,  1.78it/s]
100%|██████████| 10/10 [00:06<00:00,  1.58it/s]


Trained and Tested combination 159 of 6635520, taking 6.34 seconds to get val score of 0.0927: 
                {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 16}, 
            Current best combo with val score 0.5476: 
                    {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 8} 


100%|██████████| 10/10 [00:05<00:00,  1.71it/s]
100%|██████████| 10/10 [00:06<00:00,  1.55it/s]
100%|██████████| 10/10 [00:05<00:00,  1.68it/s]
100%|██████████| 10/10 [00:06<00:00,  1.58it/s]
100%|██████████| 10/10 [00:06<00:00,  1.65it/s]
100%|██████████| 10/10 [00:06<00:00,  1.59it/s]
100%|██████████| 10/10 [00:06<00:00,  1.59it/s]
100%|██████████| 10/10 [00:06<00:00,  1.63it/s]
100%|██████████| 10/10 [00:06<00:00,  1.57it/s]
100%|██████████| 10/10 [00:05<00:00,  1.68it/s]


Trained and Tested combination 160 of 6635520, taking 5.97 seconds to get val score of 0.0814: 
                {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 17}, 
            Current best combo with val score 0.5476: 
                    {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 8} 


100%|██████████| 10/10 [00:06<00:00,  1.50it/s]
100%|██████████| 10/10 [00:06<00:00,  1.65it/s]
100%|██████████| 10/10 [00:06<00:00,  1.52it/s]
100%|██████████| 10/10 [00:05<00:00,  1.68it/s]
100%|██████████| 10/10 [00:06<00:00,  1.49it/s]
100%|██████████| 10/10 [00:05<00:00,  1.71it/s]
100%|██████████| 10/10 [00:06<00:00,  1.47it/s]
100%|██████████| 10/10 [00:05<00:00,  1.71it/s]
100%|██████████| 10/10 [00:06<00:00,  1.46it/s]
100%|██████████| 10/10 [00:05<00:00,  1.70it/s]


Trained and Tested combination 161 of 6635520, taking 5.92 seconds to get val score of 0.0227: 
                {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 18}, 
            Current best combo with val score 0.5476: 
                    {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 8} 


100%|██████████| 10/10 [00:07<00:00,  1.42it/s]
100%|██████████| 10/10 [00:06<00:00,  1.59it/s]
100%|██████████| 10/10 [00:06<00:00,  1.53it/s]
100%|██████████| 10/10 [00:06<00:00,  1.59it/s]
100%|██████████| 10/10 [00:06<00:00,  1.55it/s]
100%|██████████| 10/10 [00:06<00:00,  1.58it/s]
100%|██████████| 10/10 [00:06<00:00,  1.54it/s]
100%|██████████| 10/10 [00:06<00:00,  1.56it/s]
100%|██████████| 10/10 [00:06<00:00,  1.57it/s]
100%|██████████| 10/10 [00:06<00:00,  1.55it/s]


Trained and Tested combination 162 of 6635520, taking 6.48 seconds to get val score of 0.0067: 
                {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 19}, 
            Current best combo with val score 0.5476: 
                    {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 8} 


100%|██████████| 10/10 [00:06<00:00,  1.56it/s]
100%|██████████| 10/10 [00:06<00:00,  1.52it/s]
100%|██████████| 10/10 [00:06<00:00,  1.56it/s]
100%|██████████| 10/10 [00:06<00:00,  1.51it/s]
100%|██████████| 10/10 [00:06<00:00,  1.59it/s]
100%|██████████| 10/10 [00:06<00:00,  1.49it/s]
100%|██████████| 10/10 [00:06<00:00,  1.60it/s]
100%|██████████| 10/10 [00:06<00:00,  1.47it/s]
100%|██████████| 10/10 [00:06<00:00,  1.60it/s]
100%|██████████| 10/10 [00:06<00:00,  1.46it/s]


Trained and Tested combination 163 of 6635520, taking 6.88 seconds to get val score of 0.0067: 
                {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 20}, 
            Current best combo with val score 0.5476: 
                    {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 8} 


100%|██████████| 10/10 [00:06<00:00,  1.62it/s]
100%|██████████| 10/10 [00:07<00:00,  1.38it/s]
100%|██████████| 10/10 [00:06<00:00,  1.64it/s]
100%|██████████| 10/10 [00:07<00:00,  1.39it/s]
100%|██████████| 10/10 [00:06<00:00,  1.64it/s]
100%|██████████| 10/10 [00:07<00:00,  1.38it/s]
100%|██████████| 10/10 [00:06<00:00,  1.63it/s]
100%|██████████| 10/10 [00:07<00:00,  1.39it/s]
100%|██████████| 10/10 [00:06<00:00,  1.66it/s]
100%|██████████| 10/10 [00:07<00:00,  1.39it/s]


Trained and Tested combination 164 of 6635520, taking 7.22 seconds to get val score of 0.0232: 
                {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 21}, 
            Current best combo with val score 0.5476: 
                    {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 8} 


100%|██████████| 10/10 [00:06<00:00,  1.62it/s]
100%|██████████| 10/10 [00:07<00:00,  1.37it/s]
100%|██████████| 10/10 [00:06<00:00,  1.62it/s]
100%|██████████| 10/10 [00:07<00:00,  1.37it/s]
100%|██████████| 10/10 [00:06<00:00,  1.62it/s]
100%|██████████| 10/10 [00:07<00:00,  1.38it/s]
100%|██████████| 10/10 [00:06<00:00,  1.61it/s]
100%|██████████| 10/10 [00:07<00:00,  1.42it/s]
100%|██████████| 10/10 [00:06<00:00,  1.59it/s]
100%|██████████| 10/10 [00:06<00:00,  1.45it/s]


Trained and Tested combination 165 of 6635520, taking 6.95 seconds to get val score of 0.0033: 
                {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 22}, 
            Current best combo with val score 0.5476: 
                    {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 8} 


100%|██████████| 10/10 [00:06<00:00,  1.44it/s]
100%|██████████| 10/10 [00:06<00:00,  1.52it/s]
100%|██████████| 10/10 [00:07<00:00,  1.39it/s]
100%|██████████| 10/10 [00:06<00:00,  1.59it/s]
100%|██████████| 10/10 [00:07<00:00,  1.35it/s]
100%|██████████| 10/10 [00:06<00:00,  1.60it/s]
100%|██████████| 10/10 [00:07<00:00,  1.36it/s]
100%|██████████| 10/10 [00:06<00:00,  1.60it/s]
100%|██████████| 10/10 [00:07<00:00,  1.37it/s]
100%|██████████| 10/10 [00:06<00:00,  1.59it/s]


Trained and Tested combination 166 of 6635520, taking 6.34 seconds to get val score of 0.0245: 
                {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 23}, 
            Current best combo with val score 0.5476: 
                    {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 8} 


100%|██████████| 10/10 [00:07<00:00,  1.32it/s]
100%|██████████| 10/10 [00:06<00:00,  1.58it/s]
100%|██████████| 10/10 [00:07<00:00,  1.35it/s]
100%|██████████| 10/10 [00:06<00:00,  1.53it/s]
100%|██████████| 10/10 [00:06<00:00,  1.43it/s]
100%|██████████| 10/10 [00:06<00:00,  1.47it/s]
100%|██████████| 10/10 [00:06<00:00,  1.51it/s]
100%|██████████| 10/10 [00:07<00:00,  1.37it/s]
100%|██████████| 10/10 [00:06<00:00,  1.59it/s]
100%|██████████| 10/10 [00:07<00:00,  1.33it/s]


Trained and Tested combination 167 of 6635520, taking 7.55 seconds to get val score of 0.013: 
                {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 24}, 
            Current best combo with val score 0.5476: 
                    {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 8} 


100%|██████████| 10/10 [00:06<00:00,  1.57it/s]
100%|██████████| 10/10 [00:07<00:00,  1.33it/s]
100%|██████████| 10/10 [00:06<00:00,  1.55it/s]
100%|██████████| 10/10 [00:07<00:00,  1.32it/s]
100%|██████████| 10/10 [00:06<00:00,  1.56it/s]
100%|██████████| 10/10 [00:07<00:00,  1.32it/s]
100%|██████████| 10/10 [00:06<00:00,  1.49it/s]
100%|██████████| 10/10 [00:07<00:00,  1.42it/s]
100%|██████████| 10/10 [00:07<00:00,  1.34it/s]
100%|██████████| 10/10 [00:06<00:00,  1.58it/s]


Trained and Tested combination 168 of 6635520, taking 6.37 seconds to get val score of 0.0348: 
                {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 25}, 
            Current best combo with val score 0.5476: 
                    {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 8} 


100%|██████████| 10/10 [00:07<00:00,  1.30it/s]
100%|██████████| 10/10 [00:06<00:00,  1.56it/s]
100%|██████████| 10/10 [00:07<00:00,  1.30it/s]
100%|██████████| 10/10 [00:06<00:00,  1.56it/s]
100%|██████████| 10/10 [00:07<00:00,  1.32it/s]
100%|██████████| 10/10 [00:06<00:00,  1.53it/s]
100%|██████████| 10/10 [00:07<00:00,  1.38it/s]
100%|██████████| 10/10 [00:06<00:00,  1.43it/s]
100%|██████████| 10/10 [00:06<00:00,  1.47it/s]
100%|██████████| 10/10 [00:07<00:00,  1.32it/s]


Trained and Tested combination 169 of 6635520, taking 7.6 seconds to get val score of 0.0667: 
                {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 26}, 
            Current best combo with val score 0.5476: 
                    {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 8} 


100%|██████████| 10/10 [00:06<00:00,  1.52it/s]
100%|██████████| 10/10 [00:07<00:00,  1.30it/s]
100%|██████████| 10/10 [00:06<00:00,  1.55it/s]
100%|██████████| 10/10 [00:07<00:00,  1.31it/s]
100%|██████████| 10/10 [00:06<00:00,  1.54it/s]
100%|██████████| 10/10 [00:07<00:00,  1.31it/s]
100%|██████████| 10/10 [00:06<00:00,  1.43it/s]
100%|██████████| 10/10 [00:07<00:00,  1.41it/s]
100%|██████████| 10/10 [00:07<00:00,  1.31it/s]
100%|██████████| 10/10 [00:06<00:00,  1.55it/s]


Trained and Tested combination 170 of 6635520, taking 6.5 seconds to get val score of 0.0761: 
                {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 27}, 
            Current best combo with val score 0.5476: 
                    {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 8} 


100%|██████████| 10/10 [00:07<00:00,  1.28it/s]
100%|██████████| 10/10 [00:06<00:00,  1.53it/s]
100%|██████████| 10/10 [00:07<00:00,  1.29it/s]
100%|██████████| 10/10 [00:06<00:00,  1.52it/s]
100%|██████████| 10/10 [00:07<00:00,  1.30it/s]
100%|██████████| 10/10 [00:06<00:00,  1.44it/s]
100%|██████████| 10/10 [00:07<00:00,  1.39it/s]
100%|██████████| 10/10 [00:07<00:00,  1.33it/s]
100%|██████████| 10/10 [00:06<00:00,  1.51it/s]
100%|██████████| 10/10 [00:07<00:00,  1.31it/s]


Trained and Tested combination 171 of 6635520, taking 7.69 seconds to get val score of 0.059: 
                {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 28}, 
            Current best combo with val score 0.5476: 
                    {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 8} 


100%|██████████| 10/10 [00:06<00:00,  1.49it/s]
100%|██████████| 10/10 [00:07<00:00,  1.29it/s]
100%|██████████| 10/10 [00:06<00:00,  1.51it/s]
100%|██████████| 10/10 [00:07<00:00,  1.33it/s]
100%|██████████| 10/10 [00:07<00:00,  1.35it/s]
100%|██████████| 10/10 [00:06<00:00,  1.47it/s]
100%|██████████| 10/10 [00:07<00:00,  1.28it/s]
100%|██████████| 10/10 [00:06<00:00,  1.52it/s]
100%|██████████| 10/10 [00:07<00:00,  1.29it/s]
100%|██████████| 10/10 [00:06<00:00,  1.52it/s]


Trained and Tested combination 172 of 6635520, taking 6.61 seconds to get val score of 0.0259: 
                {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 29}, 
            Current best combo with val score 0.5476: 
                    {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 8} 


100%|██████████| 10/10 [00:07<00:00,  1.26it/s]
100%|██████████| 10/10 [00:07<00:00,  1.38it/s]
100%|██████████| 10/10 [00:07<00:00,  1.39it/s]
100%|██████████| 10/10 [00:07<00:00,  1.28it/s]
100%|██████████| 10/10 [00:06<00:00,  1.49it/s]
100%|██████████| 10/10 [00:07<00:00,  1.29it/s]
100%|██████████| 10/10 [00:06<00:00,  1.50it/s]
100%|██████████| 10/10 [00:07<00:00,  1.29it/s]
100%|██████████| 10/10 [00:07<00:00,  1.40it/s]
100%|██████████| 10/10 [00:07<00:00,  1.37it/s]


Trained and Tested combination 173 of 6635520, taking 7.33 seconds to get val score of 0.081: 
                {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 30}, 
            Current best combo with val score 0.5476: 
                    {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 8} 


100%|██████████| 10/10 [00:07<00:00,  1.27it/s]
100%|██████████| 10/10 [00:06<00:00,  1.49it/s]
100%|██████████| 10/10 [00:07<00:00,  1.26it/s]
100%|██████████| 10/10 [00:06<00:00,  1.48it/s]
100%|██████████| 10/10 [00:07<00:00,  1.28it/s]
100%|██████████| 10/10 [00:07<00:00,  1.35it/s]
100%|██████████| 10/10 [00:06<00:00,  1.43it/s]
100%|██████████| 10/10 [00:07<00:00,  1.26it/s]
100%|██████████| 10/10 [00:06<00:00,  1.49it/s]
100%|██████████| 10/10 [00:07<00:00,  1.28it/s]


Trained and Tested combination 174 of 6635520, taking 7.88 seconds to get val score of 0.0889: 
                {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 31}, 
            Current best combo with val score 0.5476: 
                    {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 8} 


100%|██████████| 10/10 [00:07<00:00,  1.41it/s]
100%|██████████| 10/10 [00:07<00:00,  1.30it/s]
100%|██████████| 10/10 [00:07<00:00,  1.26it/s]
100%|██████████| 10/10 [00:06<00:00,  1.47it/s]
100%|██████████| 10/10 [00:07<00:00,  1.27it/s]
100%|██████████| 10/10 [00:06<00:00,  1.46it/s]
100%|██████████| 10/10 [00:08<00:00,  1.25it/s]
100%|██████████| 10/10 [00:07<00:00,  1.36it/s]
100%|██████████| 10/10 [00:07<00:00,  1.39it/s]
100%|██████████| 10/10 [00:07<00:00,  1.26it/s]


Trained and Tested combination 175 of 6635520, taking 7.96 seconds to get val score of 0.1649: 
                {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 32}, 
            Current best combo with val score 0.5476: 
                    {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 8} 


100%|██████████| 10/10 [00:06<00:00,  1.44it/s]
100%|██████████| 10/10 [00:07<00:00,  1.27it/s]
100%|██████████| 10/10 [00:07<00:00,  1.36it/s]
100%|██████████| 10/10 [00:07<00:00,  1.37it/s]
100%|██████████| 10/10 [00:07<00:00,  1.26it/s]
100%|██████████| 10/10 [00:06<00:00,  1.46it/s]
100%|██████████| 10/10 [00:07<00:00,  1.27it/s]
100%|██████████| 10/10 [00:07<00:00,  1.39it/s]
100%|██████████| 10/10 [00:07<00:00,  1.35it/s]
100%|██████████| 10/10 [00:07<00:00,  1.26it/s]


Trained and Tested combination 176 of 6635520, taking 7.96 seconds to get val score of 0.1492: 
                {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 33}, 
            Current best combo with val score 0.5476: 
                    {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 8} 


100%|██████████| 10/10 [00:07<00:00,  1.42it/s]
100%|██████████| 10/10 [00:08<00:00,  1.24it/s]
100%|██████████| 10/10 [00:07<00:00,  1.39it/s]
100%|██████████| 10/10 [00:07<00:00,  1.31it/s]
100%|██████████| 10/10 [00:08<00:00,  1.24it/s]
100%|██████████| 10/10 [00:06<00:00,  1.44it/s]
100%|██████████| 10/10 [00:08<00:00,  1.24it/s]
100%|██████████| 10/10 [00:07<00:00,  1.37it/s]
100%|██████████| 10/10 [00:07<00:00,  1.31it/s]
100%|██████████| 10/10 [00:08<00:00,  1.22it/s]


Trained and Tested combination 177 of 6635520, taking 8.23 seconds to get val score of 0.1533: 
                {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 34}, 
            Current best combo with val score 0.5476: 
                    {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 8} 


100%|██████████| 10/10 [00:07<00:00,  1.32it/s]
100%|██████████| 10/10 [00:08<00:00,  1.19it/s]
100%|██████████| 10/10 [00:08<00:00,  1.21it/s]
100%|██████████| 10/10 [00:07<00:00,  1.41it/s]
100%|██████████| 10/10 [00:08<00:00,  1.20it/s]
100%|██████████| 10/10 [00:07<00:00,  1.29it/s]
100%|██████████| 10/10 [00:07<00:00,  1.27it/s]
100%|██████████| 10/10 [00:08<00:00,  1.22it/s]
100%|██████████| 10/10 [00:07<00:00,  1.40it/s]
100%|██████████| 10/10 [00:08<00:00,  1.23it/s]


Trained and Tested combination 178 of 6635520, taking 8.15 seconds to get val score of 0.2163: 
                {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 35}, 
            Current best combo with val score 0.5476: 
                    {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 8} 


100%|██████████| 10/10 [00:08<00:00,  1.24it/s]
100%|██████████| 10/10 [00:07<00:00,  1.40it/s]
100%|██████████| 10/10 [00:08<00:00,  1.20it/s]
100%|██████████| 10/10 [00:07<00:00,  1.33it/s]
100%|██████████| 10/10 [00:07<00:00,  1.25it/s]
100%|██████████| 10/10 [00:08<00:00,  1.17it/s]
100%|██████████| 10/10 [00:07<00:00,  1.33it/s]
100%|██████████| 10/10 [00:08<00:00,  1.17it/s]
100%|██████████| 10/10 [00:08<00:00,  1.20it/s]
100%|██████████| 10/10 [00:07<00:00,  1.35it/s]


Trained and Tested combination 179 of 6635520, taking 7.47 seconds to get val score of 0.1573: 
                {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 36}, 
            Current best combo with val score 0.5476: 
                    {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 8} 


100%|██████████| 10/10 [00:08<00:00,  1.20it/s]
100%|██████████| 10/10 [00:07<00:00,  1.26it/s]
100%|██████████| 10/10 [00:07<00:00,  1.35it/s]
100%|██████████| 10/10 [00:08<00:00,  1.22it/s]
100%|██████████| 10/10 [00:07<00:00,  1.40it/s]
100%|██████████| 10/10 [00:08<00:00,  1.24it/s]
100%|██████████| 10/10 [00:08<00:00,  1.22it/s]
100%|██████████| 10/10 [00:07<00:00,  1.40it/s]
100%|██████████| 10/10 [00:08<00:00,  1.20it/s]
100%|██████████| 10/10 [00:07<00:00,  1.29it/s]


Trained and Tested combination 180 of 6635520, taking 7.78 seconds to get val score of 0.133: 
                {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 37}, 
            Current best combo with val score 0.5476: 
                    {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 8} 


100%|██████████| 10/10 [00:07<00:00,  1.30it/s]
100%|██████████| 10/10 [00:08<00:00,  1.21it/s]
100%|██████████| 10/10 [00:07<00:00,  1.38it/s]
100%|██████████| 10/10 [00:08<00:00,  1.21it/s]
100%|██████████| 10/10 [00:08<00:00,  1.21it/s]
100%|██████████| 10/10 [00:07<00:00,  1.39it/s]
100%|██████████| 10/10 [00:08<00:00,  1.19it/s]
100%|██████████| 10/10 [00:07<00:00,  1.28it/s]
100%|██████████| 10/10 [00:07<00:00,  1.31it/s]
100%|██████████| 10/10 [00:08<00:00,  1.19it/s]


Trained and Tested combination 181 of 6635520, taking 8.47 seconds to get val score of 0.1523: 
                {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 38}, 
            Current best combo with val score 0.5476: 
                    {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 8} 


100%|██████████| 10/10 [00:07<00:00,  1.36it/s]
100%|██████████| 10/10 [00:08<00:00,  1.21it/s]
100%|██████████| 10/10 [00:08<00:00,  1.19it/s]
100%|██████████| 10/10 [00:07<00:00,  1.35it/s]
100%|██████████| 10/10 [00:08<00:00,  1.18it/s]
100%|██████████| 10/10 [00:08<00:00,  1.23it/s]
100%|██████████| 10/10 [00:07<00:00,  1.35it/s]
100%|██████████| 10/10 [00:08<00:00,  1.21it/s]
100%|██████████| 10/10 [00:07<00:00,  1.30it/s]
100%|██████████| 10/10 [00:07<00:00,  1.27it/s]


Trained and Tested combination 182 of 6635520, taking 7.96 seconds to get val score of 0.1021: 
                {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 39}, 
            Current best combo with val score 0.5476: 
                    {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 8} 


100%|██████████| 10/10 [00:08<00:00,  1.17it/s]
100%|██████████| 10/10 [00:07<00:00,  1.36it/s]
100%|██████████| 10/10 [00:08<00:00,  1.19it/s]
100%|██████████| 10/10 [00:08<00:00,  1.18it/s]
100%|██████████| 10/10 [00:07<00:00,  1.36it/s]
100%|██████████| 10/10 [00:08<00:00,  1.18it/s]
100%|██████████| 10/10 [00:08<00:00,  1.20it/s]
100%|██████████| 10/10 [00:07<00:00,  1.36it/s]
100%|██████████| 10/10 [00:08<00:00,  1.18it/s]
100%|██████████| 10/10 [00:07<00:00,  1.30it/s]


Trained and Tested combination 183 of 6635520, taking 7.76 seconds to get val score of 0.1755: 
                {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 40}, 
            Current best combo with val score 0.5476: 
                    {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 8} 


100%|██████████| 10/10 [00:08<00:00,  1.24it/s]
100%|██████████| 10/10 [00:08<00:00,  1.17it/s]
100%|██████████| 10/10 [00:07<00:00,  1.32it/s]
100%|██████████| 10/10 [00:08<00:00,  1.18it/s]
100%|██████████| 10/10 [00:08<00:00,  1.17it/s]
100%|██████████| 10/10 [00:07<00:00,  1.32it/s]
100%|██████████| 10/10 [00:08<00:00,  1.17it/s]
100%|██████████| 10/10 [00:08<00:00,  1.16it/s]
100%|██████████| 10/10 [00:07<00:00,  1.34it/s]
100%|██████████| 10/10 [00:08<00:00,  1.16it/s]


Trained and Tested combination 184 of 6635520, taking 8.64 seconds to get val score of 0.1709: 
                {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 41}, 
            Current best combo with val score 0.5476: 
                    {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 8} 


100%|██████████| 10/10 [00:08<00:00,  1.14it/s]
100%|██████████| 10/10 [00:07<00:00,  1.30it/s]
100%|██████████| 10/10 [00:08<00:00,  1.16it/s]
100%|██████████| 10/10 [00:08<00:00,  1.16it/s]
100%|██████████| 10/10 [00:07<00:00,  1.32it/s]
100%|██████████| 10/10 [00:08<00:00,  1.15it/s]
100%|██████████| 10/10 [00:08<00:00,  1.16it/s]
100%|██████████| 10/10 [00:07<00:00,  1.32it/s]
100%|██████████| 10/10 [00:08<00:00,  1.14it/s]
100%|██████████| 10/10 [00:08<00:00,  1.18it/s]


Trained and Tested combination 185 of 6635520, taking 8.52 seconds to get val score of 0.1834: 
                {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 42}, 
            Current best combo with val score 0.5476: 
                    {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 8} 


100%|██████████| 10/10 [00:07<00:00,  1.29it/s]
100%|██████████| 10/10 [00:08<00:00,  1.15it/s]
100%|██████████| 10/10 [00:08<00:00,  1.17it/s]
100%|██████████| 10/10 [00:07<00:00,  1.30it/s]
100%|██████████| 10/10 [00:08<00:00,  1.14it/s]
100%|██████████| 10/10 [00:08<00:00,  1.18it/s]
100%|██████████| 10/10 [00:07<00:00,  1.28it/s]
100%|██████████| 10/10 [00:08<00:00,  1.14it/s]
100%|██████████| 10/10 [00:08<00:00,  1.21it/s]
100%|██████████| 10/10 [00:07<00:00,  1.27it/s]


Trained and Tested combination 186 of 6635520, taking 7.93 seconds to get val score of 0.1782: 
                {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 43}, 
            Current best combo with val score 0.5476: 
                    {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 8} 


100%|██████████| 10/10 [00:08<00:00,  1.13it/s]
100%|██████████| 10/10 [00:08<00:00,  1.17it/s]
100%|██████████| 10/10 [00:07<00:00,  1.29it/s]
100%|██████████| 10/10 [00:08<00:00,  1.13it/s]
100%|██████████| 10/10 [00:08<00:00,  1.17it/s]
100%|██████████| 10/10 [00:07<00:00,  1.29it/s]
100%|██████████| 10/10 [00:08<00:00,  1.13it/s]
100%|██████████| 10/10 [00:08<00:00,  1.13it/s]
100%|██████████| 10/10 [00:07<00:00,  1.27it/s]
100%|██████████| 10/10 [00:08<00:00,  1.13it/s]


Trained and Tested combination 187 of 6635520, taking 8.92 seconds to get val score of 0.1516: 
                {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 44}, 
            Current best combo with val score 0.5476: 
                    {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 8} 


100%|██████████| 10/10 [00:09<00:00,  1.09it/s]
100%|██████████| 10/10 [00:08<00:00,  1.25it/s]
100%|██████████| 10/10 [00:08<00:00,  1.12it/s]
100%|██████████| 10/10 [00:08<00:00,  1.12it/s]
100%|██████████| 10/10 [00:07<00:00,  1.27it/s]
100%|██████████| 10/10 [00:08<00:00,  1.13it/s]
100%|██████████| 10/10 [00:08<00:00,  1.12it/s]
100%|██████████| 10/10 [00:08<00:00,  1.20it/s]
100%|██████████| 10/10 [00:08<00:00,  1.22it/s]
100%|██████████| 10/10 [00:08<00:00,  1.12it/s]


Trained and Tested combination 188 of 6635520, taking 8.99 seconds to get val score of 0.1394: 
                {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 45}, 
            Current best combo with val score 0.5476: 
                    {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 8} 


100%|██████████| 10/10 [00:08<00:00,  1.13it/s]
100%|██████████| 10/10 [00:08<00:00,  1.23it/s]
100%|██████████| 10/10 [00:08<00:00,  1.11it/s]
100%|██████████| 10/10 [00:09<00:00,  1.11it/s]
100%|██████████| 10/10 [00:07<00:00,  1.28it/s]
100%|██████████| 10/10 [00:09<00:00,  1.09it/s]
100%|██████████| 10/10 [00:09<00:00,  1.10it/s]
100%|██████████| 10/10 [00:07<00:00,  1.26it/s]
100%|██████████| 10/10 [00:09<00:00,  1.10it/s]
100%|██████████| 10/10 [00:09<00:00,  1.11it/s]


Trained and Tested combination 189 of 6635520, taking 9.08 seconds to get val score of 0.1235: 
                {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 46}, 
            Current best combo with val score 0.5476: 
                    {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 8} 


100%|██████████| 10/10 [00:08<00:00,  1.18it/s]
100%|██████████| 10/10 [00:08<00:00,  1.13it/s]
100%|██████████| 10/10 [00:09<00:00,  1.08it/s]
100%|██████████| 10/10 [00:08<00:00,  1.14it/s]
100%|██████████| 10/10 [00:07<00:00,  1.25it/s]
100%|██████████| 10/10 [00:08<00:00,  1.11it/s]
100%|██████████| 10/10 [00:09<00:00,  1.10it/s]
100%|██████████| 10/10 [00:07<00:00,  1.26it/s]
100%|██████████| 10/10 [00:09<00:00,  1.10it/s]
100%|██████████| 10/10 [00:09<00:00,  1.08it/s]


Trained and Tested combination 190 of 6635520, taking 9.34 seconds to get val score of 0.1488: 
                {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 47}, 
            Current best combo with val score 0.5476: 
                    {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 8} 

Best combo after this hyperparameter: (1, 2, 0, 0, 1, 2, 1, 2, 0, 1, 8) , NOT UPDATED SINCE LAST HYPERPARAMETER


Round 3 
Hyperparameter: num_transformer_layers (index: 1) 



100%|██████████| 10/10 [00:01<00:00,  5.48it/s]
100%|██████████| 10/10 [00:01<00:00,  5.35it/s]
100%|██████████| 10/10 [00:01<00:00,  5.52it/s]
100%|██████████| 10/10 [00:01<00:00,  5.50it/s]
100%|██████████| 10/10 [00:02<00:00,  4.07it/s]
100%|██████████| 10/10 [00:02<00:00,  4.98it/s]
100%|██████████| 10/10 [00:01<00:00,  5.59it/s]
100%|██████████| 10/10 [00:01<00:00,  5.62it/s]
100%|██████████| 10/10 [00:01<00:00,  5.50it/s]
100%|██████████| 10/10 [00:01<00:00,  5.53it/s]


Trained and Tested combination 191 of 6635520, taking 1.83 seconds to get val score of 0.5064: 
                {'hidden_dim': 32, 'num_transformer_layers': 1, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 8}, 
            Current best combo with val score 0.5476: 
                    {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 8} 


100%|██████████| 10/10 [00:02<00:00,  3.56it/s]
100%|██████████| 10/10 [00:02<00:00,  3.76it/s]
100%|██████████| 10/10 [00:02<00:00,  4.46it/s]
100%|██████████| 10/10 [00:02<00:00,  4.51it/s]
100%|██████████| 10/10 [00:02<00:00,  4.48it/s]
100%|██████████| 10/10 [00:02<00:00,  3.75it/s]
100%|██████████| 10/10 [00:02<00:00,  3.58it/s]
100%|██████████| 10/10 [00:02<00:00,  4.56it/s]
100%|██████████| 10/10 [00:02<00:00,  4.53it/s]
100%|██████████| 10/10 [00:02<00:00,  4.49it/s]


Trained and Tested combination 192 of 6635520, taking 2.24 seconds to get val score of 0.5275: 
                {'hidden_dim': 32, 'num_transformer_layers': 2, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 8}, 
            Current best combo with val score 0.5476: 
                    {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 8} 
Already Trained and Tested combination (val score of 0.5476):
            {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 8}
            Current best combo (with val score 0.5476):
                    {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropo

100%|██████████| 10/10 [00:02<00:00,  3.37it/s]
100%|██████████| 10/10 [00:03<00:00,  3.03it/s]
100%|██████████| 10/10 [00:02<00:00,  3.73it/s]
100%|██████████| 10/10 [00:02<00:00,  3.79it/s]
100%|██████████| 10/10 [00:02<00:00,  3.80it/s]
100%|██████████| 10/10 [00:03<00:00,  2.76it/s]
100%|██████████| 10/10 [00:02<00:00,  3.81it/s]
100%|██████████| 10/10 [00:02<00:00,  3.79it/s]
100%|██████████| 10/10 [00:02<00:00,  3.80it/s]
100%|██████████| 10/10 [00:03<00:00,  3.24it/s]


Trained and Tested combination 193 of 6635520, taking 3.1 seconds to get val score of 0.5476: 
                {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 2, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 8}, 
            Current best combo with val score 0.5476: 
                    {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 8} 


100%|██████████| 10/10 [00:03<00:00,  3.11it/s]
100%|██████████| 10/10 [00:02<00:00,  3.67it/s]
100%|██████████| 10/10 [00:02<00:00,  3.64it/s]
100%|██████████| 10/10 [00:02<00:00,  3.57it/s]
100%|██████████| 10/10 [00:03<00:00,  2.86it/s]
100%|██████████| 10/10 [00:02<00:00,  3.73it/s]
100%|██████████| 10/10 [00:02<00:00,  3.68it/s]
100%|██████████| 10/10 [00:02<00:00,  3.73it/s]
100%|██████████| 10/10 [00:03<00:00,  2.82it/s]
100%|██████████| 10/10 [00:02<00:00,  3.74it/s]


Trained and Tested combination 194 of 6635520, taking 2.69 seconds to get val score of 0.5014: 
                {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 3, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 8}, 
            Current best combo with val score 0.5476: 
                    {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 8, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'features': 8} 

Best combo after this hyperparameter: (1, 2, 0, 0, 1, 2, 1, 2, 0, 1, 8) , NOT UPDATED SINCE LAST HYPERPARAMETER


Round 3 
Hyperparameter: nhead (index: 5) 

Already Trained and Tested combination (val score of 0.5421):
            {'hidden_dim': 32, 'num_transformer_layers': 3, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 2, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': True, 'featur