# Tuning Script

In [1]:
# Parameters subject to change in every script - involved in naming

MODEL = 'tfc'
PREPROCESSING = 'smote_normalise'
TUNER = 'yangzhoub'
OPTIMISED_METRIC = 'f1'

---

In [2]:
# detect whether this is a google environment

COLAB_ENVIRONMENT = False

try:
    from google.colab import drive
    drive.mount('/content/drive')
    COLAB_ENVIRONMENT = True
except:
    pass

Mounted at /content/drive


In [3]:
import sys
import os
import pickle

if COLAB_ENVIRONMENT:
    home_directory = './drive/MyDrive/LAB/COMP90089__GroupWork__Py/' # my home directory is stored in ./LAB of google drive
    if MODEL == 'ebr':
        !pip install interpret==0.5.0
    elif MODEL == 'cbr':
        !pip install catboost
    elif MODEL in ['tfc', 'mlpc']:
        !pip install PyTorch2Sklearn
    !pip install JXAutoML
else:
    home_directory = '../../'

from JXAutoML.JiaoCheng_10CV import JiaoCheng_10CV as Tuner

import pandas as pd

Collecting PyTorch2Sklearn
  Downloading PyTorch2Sklearn-0.2.4-py3-none-any.whl.metadata (33 kB)
Downloading PyTorch2Sklearn-0.2.4-py3-none-any.whl (19 kB)
Installing collected packages: PyTorch2Sklearn
Successfully installed PyTorch2Sklearn-0.2.4
Collecting JXAutoML
  Downloading JXAutoML-0.4.4-py3-none-any.whl.metadata (67 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m67.2/67.2 kB[0m [31m6.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading JXAutoML-0.4.4-py3-none-any.whl (52 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m52.1/52.1 kB[0m [31m5.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: JXAutoML
Successfully installed JXAutoML-0.4.4


In [4]:
# Parameters that are same for every script

FOLDS = 10  # we have decided to do 10 fold validation

SEED = 42

TASK_TYPE = 'Classification'  # this task is a classification task

# we have decided to tune features as hp
TUNE_FEATURES_AS_HYPERPARAMETERS = True

---

In [5]:
# model to use to tune
from PyTorch2Sklearn.Transformer import Transformer as clf
from PyTorch2Sklearn.environment import *
from PyTorch2Sklearn.utils.data import TabularDataFactory, TabularDataset


# what values to try for each hyperparameter
parameter_choices = {
    'hidden_dim': (16, 32, 64, 256),
    'num_transformer_layers': (1, 2, 3),
    'num_mlp_layers': (1, 2, 3),
    'dropout': (0, 0.05, 0.1, 0.2),
    'batch_size': (32, 128, 512),
    'nhead': (2, 4, 8, 16),
    'epochs': (5, 10, 20, 30),
    'lr': (1e-5, 1e-4, 1e-3, 1e-2, 1e-1),
    'batchnorm': (False, True),
    'grad_clip': (False, True),
}

# what values to set non-tuneable parameters/hyperparameters
non_tunable_hyperparameters_dict = {
    'random_state': SEED,
    'loss': nn.CrossEntropyLoss(),
    'rootpath': './',
    'TabularDataFactory': TabularDataFactory,
    'TabularDataset': TabularDataset,
    'name': 'Transformer',
    'output_dim': 2,
    'mode': 'Classification',
    'verbose': True,
    'share_embedding_mlp': False,
    'use_cls': False
}

# what order to tune hyperparameters in (JiaoCheng specific)
tuning_order = [
                'features',
                'num_transformer_layers',
                'num_mlp_layers',
                'nhead',
                'hidden_dim',
                'dropout',
                'batchnorm',
                'grad_clip',
                'lr',
                'epochs',
                'batch_size'
                ]

# what order to tune hyperparameters in (JiaoCheng specific)
default_hyperparameter_values = {
    'features': 0,
    'num_transformer_layers': 1,
    'num_mlp_layers': 1,
    'nhead': 4,
    'hidden_dim': 16,
    'dropout': 0.1,
    'batchnorm': False,
    'grad_clip': False,
    'lr': 1e-2,
    'epochs': 10,
    'batch_size': 128
}

---

Read in and Prepare Data

In [6]:
# Import data and store as lists

train_x_list = []
train_y_list = []
val_x_list = []
val_y_list = []

for i in range(FOLDS):
    train_data = pd.read_csv(
        f'{home_directory}data/curated/modelling/{PREPROCESSING}/train_{i}.csv')
    val_data = pd.read_csv(
        f'{home_directory}data/curated/modelling/{PREPROCESSING}/val_{i}.csv')

    # get the correct rows for X and y
    train_x = train_data.drop(
        ['mortality_status', 'subject_id', 'hadm_id', 'stay_id'], axis=1)
    train_y = train_data['mortality_status']
    val_x = val_data.drop(
        ['mortality_status', 'subject_id', 'hadm_id', 'stay_id'], axis=1)
    val_y = val_data['mortality_status']

    train_x_list.append(train_x)
    train_y_list.append(train_y)
    val_x_list.append(val_x)
    val_y_list.append(val_y)

In [7]:
# Import feature importance ordering

with open(f'{home_directory}models/xgb_feature_importance_ordering_{PREPROCESSING}.pickle', 'rb') as f:
    feature_importance_ordering = pickle.load(f)

Set parameters

In [8]:
# initialisation
tuner = Tuner()

print('---')

# define what model we are tuning
tuner.read_in_model(clf, TASK_TYPE, optimised_metric = OPTIMISED_METRIC, pytorch_model=True)

print('---')

# read in the data for training and validation
tuner.read_in_data(train_x_list, train_y_list, val_x_list, val_y_list)

print('---')

# set what hp values to tune
tuner.set_hyperparameters(parameter_choices)
# WARNING: this may take a while if no. tuneable hyperparameters are large

print('---')

# set up hp values that need to be changed from default but NOT to be tuned
tuner.set_non_tuneable_hyperparameters(non_tunable_hyperparameters_dict)

print('---')

# set up feature importance ordering

if TUNE_FEATURES_AS_HYPERPARAMETERS:
  tuner.set_features(feature_importance_ordering)
  # WARNING: this may take a while if no. tuneable hyperparameters are large

print('---')

# set up the order of hyperparameters when iteratively tuning using JiaoCheng
tuner.set_tuning_order(tuning_order)

print('---')

# set up the default hp values for first iteration of tuning JiaoCheng
tuner.set_hyperparameter_default_values(default_hyperparameter_values)

print('---')

try: # try to read in previous results to continue tuning ...
  tuner.read_in_tuning_result_df(f'{home_directory}models/tuning/{TUNER}_{MODEL}_{PREPROCESSING}.csv')
except:
  print('No previous tuning data read in')

print('---')

# set up where to save the tuning result csv
tuner.set_tuning_result_saving_address(f'{home_directory}models/tuning/{TUNER}_{MODEL}_{PREPROCESSING}.csv')

print('---')

# set up where to save the current best model
tuner.set_best_model_saving_address(f'{home_directory}models/tmp_models/{TUNER}_{MODEL}_{PREPROCESSING}')

JiaoCheng Initialised
---
Successfully read in model <class 'PyTorch2Sklearn.Transformer.Transformer'>, which is a Classification model optimising for f1
---
Read in Train X data list
Read in Train y data list
Read in Val X data list
Read in Val y data list
---
Successfully recorded hyperparameter choices
---
Successfully recorded non_tuneable_hyperparameter choices
---
Successfully recorded tuneable feature combination choices and updated relevant internal structures
---
---
---
Successfully read in tuning result of 57 rows, for 57.0 combos
---
Successfully set tuning output address
---
Successfully set best model output address


In [9]:
# ignore warnings

import warnings
warnings.filterwarnings("ignore")

In [10]:
# begin tuning ... (or continue training)

tuner.tune()


Default combo: [0, 0, 0, 2, 1, 1, 1, 3, 0, 0, 0] 


ROUND 1

Round 1 
Hyperparameter: features (index: 10) 

As new Best Combo (0, 0, 0, 2, 1, 1, 1, 3, 0, 0, 0) was read in, best_clf is set to None
Already Trained and Tested combination (val score of 0.3071):
            {'hidden_dim': 16, 'num_transformer_layers': 1, 'num_mlp_layers': 1, 'dropout': 0.1, 'batch_size': 128, 'nhead': 4, 'epochs': 10, 'lr': 0.01, 'batchnorm': False, 'grad_clip': False, 'features': 0}
            Current best combo (with val score 0.3071):
                    {'hidden_dim': 16, 'num_transformer_layers': 1, 'num_mlp_layers': 1, 'dropout': 0.1, 'batch_size': 128, 'nhead': 4, 'epochs': 10, 'lr': 0.01, 'batchnorm': False, 'grad_clip': False, 'features': 0} 
        Has trained 57 of 6635520 combinations so far
Already Trained and Tested combination (val score of 0.0675):
            {'hidden_dim': 16, 'num_transformer_layers': 1, 'num_mlp_layers': 1, 'dropout': 0.1, 'batch_size': 128, 'nhead': 4, 'epochs': 10

100%|██████████| 10/10 [00:18<00:00,  1.82s/it]
100%|██████████| 10/10 [00:17<00:00,  1.76s/it]
100%|██████████| 10/10 [00:17<00:00,  1.76s/it]
100%|██████████| 10/10 [00:17<00:00,  1.76s/it]
100%|██████████| 10/10 [00:18<00:00,  1.81s/it]
100%|██████████| 10/10 [00:17<00:00,  1.78s/it]
100%|██████████| 10/10 [00:17<00:00,  1.76s/it]
100%|██████████| 10/10 [00:17<00:00,  1.79s/it]
100%|██████████| 10/10 [00:17<00:00,  1.77s/it]
100%|██████████| 10/10 [00:17<00:00,  1.79s/it]


Trained and Tested combination 58 of 6635520, taking 17.99 seconds to get val score of 0.2805: 
                {'hidden_dim': 256, 'num_transformer_layers': 1, 'num_mlp_layers': 1, 'dropout': 0.1, 'batch_size': 128, 'nhead': 4, 'epochs': 10, 'lr': 0.01, 'batchnorm': False, 'grad_clip': False, 'features': 33}, 
            Current best combo with val score 0.3117: 
                    {'hidden_dim': 16, 'num_transformer_layers': 1, 'num_mlp_layers': 1, 'dropout': 0.1, 'batch_size': 128, 'nhead': 4, 'epochs': 10, 'lr': 0.01, 'batchnorm': False, 'grad_clip': False, 'features': 33} 

Best combo after this hyperparameter: (0, 0, 0, 2, 1, 1, 1, 3, 0, 0, 33) , NOT UPDATED SINCE LAST HYPERPARAMETER


Round 1 
Hyperparameter: dropout (index: 3) 



100%|██████████| 10/10 [00:09<00:00,  1.01it/s]
100%|██████████| 10/10 [00:10<00:00,  1.06s/it]
100%|██████████| 10/10 [00:10<00:00,  1.05s/it]
100%|██████████| 10/10 [00:10<00:00,  1.06s/it]
100%|██████████| 10/10 [00:09<00:00,  1.01it/s]
100%|██████████| 10/10 [00:09<00:00,  1.02it/s]
100%|██████████| 10/10 [00:10<00:00,  1.05s/it]
100%|██████████| 10/10 [00:10<00:00,  1.08s/it]
100%|██████████| 10/10 [00:10<00:00,  1.06s/it]
100%|██████████| 10/10 [00:10<00:00,  1.01s/it]


Trained and Tested combination 59 of 6635520, taking 10.16 seconds to get val score of 0.281: 
                {'hidden_dim': 16, 'num_transformer_layers': 1, 'num_mlp_layers': 1, 'dropout': 0, 'batch_size': 128, 'nhead': 4, 'epochs': 10, 'lr': 0.01, 'batchnorm': False, 'grad_clip': False, 'features': 33}, 
            Current best combo with val score 0.3117: 
                    {'hidden_dim': 16, 'num_transformer_layers': 1, 'num_mlp_layers': 1, 'dropout': 0.1, 'batch_size': 128, 'nhead': 4, 'epochs': 10, 'lr': 0.01, 'batchnorm': False, 'grad_clip': False, 'features': 33} 


100%|██████████| 10/10 [00:11<00:00,  1.10s/it]
100%|██████████| 10/10 [00:11<00:00,  1.12s/it]
100%|██████████| 10/10 [00:11<00:00,  1.16s/it]
100%|██████████| 10/10 [00:11<00:00,  1.17s/it]
100%|██████████| 10/10 [00:11<00:00,  1.18s/it]
100%|██████████| 10/10 [00:11<00:00,  1.19s/it]
100%|██████████| 10/10 [00:11<00:00,  1.18s/it]
100%|██████████| 10/10 [00:11<00:00,  1.17s/it]
100%|██████████| 10/10 [00:11<00:00,  1.16s/it]
100%|██████████| 10/10 [00:11<00:00,  1.14s/it]


Trained and Tested combination 60 of 6635520, taking 11.44 seconds to get val score of 0.281: 
                {'hidden_dim': 16, 'num_transformer_layers': 1, 'num_mlp_layers': 1, 'dropout': 0.05, 'batch_size': 128, 'nhead': 4, 'epochs': 10, 'lr': 0.01, 'batchnorm': False, 'grad_clip': False, 'features': 33}, 
            Current best combo with val score 0.3117: 
                    {'hidden_dim': 16, 'num_transformer_layers': 1, 'num_mlp_layers': 1, 'dropout': 0.1, 'batch_size': 128, 'nhead': 4, 'epochs': 10, 'lr': 0.01, 'batchnorm': False, 'grad_clip': False, 'features': 33} 
Already Trained and Tested combination (val score of 0.3117):
            {'hidden_dim': 16, 'num_transformer_layers': 1, 'num_mlp_layers': 1, 'dropout': 0.1, 'batch_size': 128, 'nhead': 4, 'epochs': 10, 'lr': 0.01, 'batchnorm': False, 'grad_clip': False, 'features': 33}
            Current best combo (with val score 0.3117):
                    {'hidden_dim': 16, 'num_transformer_layers': 1, 'num_mlp_layers': 

100%|██████████| 10/10 [00:10<00:00,  1.09s/it]
100%|██████████| 10/10 [00:11<00:00,  1.11s/it]
100%|██████████| 10/10 [00:11<00:00,  1.17s/it]
100%|██████████| 10/10 [00:11<00:00,  1.19s/it]
100%|██████████| 10/10 [00:12<00:00,  1.20s/it]
100%|██████████| 10/10 [00:11<00:00,  1.19s/it]
100%|██████████| 10/10 [00:11<00:00,  1.20s/it]
100%|██████████| 10/10 [00:11<00:00,  1.19s/it]
100%|██████████| 10/10 [00:11<00:00,  1.18s/it]
100%|██████████| 10/10 [00:11<00:00,  1.17s/it]


Trained and Tested combination 61 of 6635520, taking 11.75 seconds to get val score of 0.2493: 
                {'hidden_dim': 16, 'num_transformer_layers': 1, 'num_mlp_layers': 1, 'dropout': 0.2, 'batch_size': 128, 'nhead': 4, 'epochs': 10, 'lr': 0.01, 'batchnorm': False, 'grad_clip': False, 'features': 33}, 
            Current best combo with val score 0.3117: 
                    {'hidden_dim': 16, 'num_transformer_layers': 1, 'num_mlp_layers': 1, 'dropout': 0.1, 'batch_size': 128, 'nhead': 4, 'epochs': 10, 'lr': 0.01, 'batchnorm': False, 'grad_clip': False, 'features': 33} 

Best combo after this hyperparameter: (0, 0, 0, 2, 1, 1, 1, 3, 0, 0, 33) , NOT UPDATED SINCE LAST HYPERPARAMETER


Round 1 
Hyperparameter: batchnorm (index: 8) 

Already Trained and Tested combination (val score of 0.3117):
            {'hidden_dim': 16, 'num_transformer_layers': 1, 'num_mlp_layers': 1, 'dropout': 0.1, 'batch_size': 128, 'nhead': 4, 'epochs': 10, 'lr': 0.01, 'batchnorm': False, 'grad_clip': F

100%|██████████| 10/10 [00:16<00:00,  1.66s/it]
100%|██████████| 10/10 [00:16<00:00,  1.68s/it]
100%|██████████| 10/10 [00:17<00:00,  1.77s/it]
100%|██████████| 10/10 [00:16<00:00,  1.68s/it]
100%|██████████| 10/10 [00:16<00:00,  1.67s/it]
100%|██████████| 10/10 [00:17<00:00,  1.78s/it]
100%|██████████| 10/10 [00:16<00:00,  1.67s/it]
100%|██████████| 10/10 [00:16<00:00,  1.66s/it]
100%|██████████| 10/10 [00:17<00:00,  1.76s/it]
100%|██████████| 10/10 [00:16<00:00,  1.67s/it]


Trained and Tested combination 62 of 6635520, taking 16.79 seconds to get val score of 0.0: 
                {'hidden_dim': 16, 'num_transformer_layers': 1, 'num_mlp_layers': 1, 'dropout': 0.1, 'batch_size': 128, 'nhead': 4, 'epochs': 10, 'lr': 0.01, 'batchnorm': True, 'grad_clip': False, 'features': 33}, 
            Current best combo with val score 0.3117: 
                    {'hidden_dim': 16, 'num_transformer_layers': 1, 'num_mlp_layers': 1, 'dropout': 0.1, 'batch_size': 128, 'nhead': 4, 'epochs': 10, 'lr': 0.01, 'batchnorm': False, 'grad_clip': False, 'features': 33} 

Best combo after this hyperparameter: (0, 0, 0, 2, 1, 1, 1, 3, 0, 0, 33) , NOT UPDATED SINCE LAST HYPERPARAMETER


Round 1 
Hyperparameter: grad_clip (index: 9) 

Already Trained and Tested combination (val score of 0.3117):
            {'hidden_dim': 16, 'num_transformer_layers': 1, 'num_mlp_layers': 1, 'dropout': 0.1, 'batch_size': 128, 'nhead': 4, 'epochs': 10, 'lr': 0.01, 'batchnorm': False, 'grad_clip': False

100%|██████████| 10/10 [00:12<00:00,  1.26s/it]
100%|██████████| 10/10 [00:12<00:00,  1.25s/it]
100%|██████████| 10/10 [00:12<00:00,  1.25s/it]
100%|██████████| 10/10 [00:12<00:00,  1.24s/it]
100%|██████████| 10/10 [00:12<00:00,  1.25s/it]
100%|██████████| 10/10 [00:12<00:00,  1.26s/it]
100%|██████████| 10/10 [00:12<00:00,  1.25s/it]
100%|██████████| 10/10 [00:12<00:00,  1.25s/it]
100%|██████████| 10/10 [00:12<00:00,  1.26s/it]
100%|██████████| 10/10 [00:12<00:00,  1.24s/it]


Trained and Tested combination 63 of 6635520, taking 12.5 seconds to get val score of 0.1556: 
                {'hidden_dim': 16, 'num_transformer_layers': 1, 'num_mlp_layers': 1, 'dropout': 0.1, 'batch_size': 128, 'nhead': 4, 'epochs': 10, 'lr': 0.01, 'batchnorm': False, 'grad_clip': True, 'features': 33}, 
            Current best combo with val score 0.3117: 
                    {'hidden_dim': 16, 'num_transformer_layers': 1, 'num_mlp_layers': 1, 'dropout': 0.1, 'batch_size': 128, 'nhead': 4, 'epochs': 10, 'lr': 0.01, 'batchnorm': False, 'grad_clip': False, 'features': 33} 

Best combo after this hyperparameter: (0, 0, 0, 2, 1, 1, 1, 3, 0, 0, 33) , NOT UPDATED SINCE LAST HYPERPARAMETER


Round 1 
Hyperparameter: lr (index: 7) 



100%|██████████| 10/10 [00:11<00:00,  1.19s/it]
100%|██████████| 10/10 [00:11<00:00,  1.13s/it]
100%|██████████| 10/10 [00:11<00:00,  1.11s/it]
100%|██████████| 10/10 [00:11<00:00,  1.16s/it]
100%|██████████| 10/10 [00:11<00:00,  1.19s/it]
100%|██████████| 10/10 [00:11<00:00,  1.19s/it]
100%|██████████| 10/10 [00:11<00:00,  1.19s/it]
100%|██████████| 10/10 [00:11<00:00,  1.19s/it]
100%|██████████| 10/10 [00:11<00:00,  1.17s/it]
100%|██████████| 10/10 [00:11<00:00,  1.19s/it]


Trained and Tested combination 64 of 6635520, taking 11.95 seconds to get val score of 0.0: 
                {'hidden_dim': 16, 'num_transformer_layers': 1, 'num_mlp_layers': 1, 'dropout': 0.1, 'batch_size': 128, 'nhead': 4, 'epochs': 10, 'lr': 1e-05, 'batchnorm': False, 'grad_clip': False, 'features': 33}, 
            Current best combo with val score 0.3117: 
                    {'hidden_dim': 16, 'num_transformer_layers': 1, 'num_mlp_layers': 1, 'dropout': 0.1, 'batch_size': 128, 'nhead': 4, 'epochs': 10, 'lr': 0.01, 'batchnorm': False, 'grad_clip': False, 'features': 33} 


100%|██████████| 10/10 [00:11<00:00,  1.18s/it]
100%|██████████| 10/10 [00:11<00:00,  1.17s/it]
100%|██████████| 10/10 [00:11<00:00,  1.12s/it]
100%|██████████| 10/10 [00:11<00:00,  1.11s/it]
100%|██████████| 10/10 [00:11<00:00,  1.16s/it]
100%|██████████| 10/10 [00:11<00:00,  1.19s/it]
100%|██████████| 10/10 [00:11<00:00,  1.18s/it]
100%|██████████| 10/10 [00:11<00:00,  1.18s/it]
100%|██████████| 10/10 [00:11<00:00,  1.18s/it]
100%|██████████| 10/10 [00:11<00:00,  1.18s/it]


Trained and Tested combination 65 of 6635520, taking 11.85 seconds to get val score of 0.0: 
                {'hidden_dim': 16, 'num_transformer_layers': 1, 'num_mlp_layers': 1, 'dropout': 0.1, 'batch_size': 128, 'nhead': 4, 'epochs': 10, 'lr': 0.0001, 'batchnorm': False, 'grad_clip': False, 'features': 33}, 
            Current best combo with val score 0.3117: 
                    {'hidden_dim': 16, 'num_transformer_layers': 1, 'num_mlp_layers': 1, 'dropout': 0.1, 'batch_size': 128, 'nhead': 4, 'epochs': 10, 'lr': 0.01, 'batchnorm': False, 'grad_clip': False, 'features': 33} 


100%|██████████| 10/10 [00:11<00:00,  1.19s/it]
100%|██████████| 10/10 [00:11<00:00,  1.18s/it]
100%|██████████| 10/10 [00:11<00:00,  1.20s/it]
100%|██████████| 10/10 [00:11<00:00,  1.14s/it]
100%|██████████| 10/10 [00:11<00:00,  1.11s/it]
100%|██████████| 10/10 [00:11<00:00,  1.14s/it]
100%|██████████| 10/10 [00:11<00:00,  1.18s/it]
100%|██████████| 10/10 [00:11<00:00,  1.19s/it]
100%|██████████| 10/10 [00:11<00:00,  1.19s/it]
100%|██████████| 10/10 [00:11<00:00,  1.20s/it]


Trained and Tested combination 66 of 6635520, taking 12.02 seconds to get val score of 0.0: 
                {'hidden_dim': 16, 'num_transformer_layers': 1, 'num_mlp_layers': 1, 'dropout': 0.1, 'batch_size': 128, 'nhead': 4, 'epochs': 10, 'lr': 0.001, 'batchnorm': False, 'grad_clip': False, 'features': 33}, 
            Current best combo with val score 0.3117: 
                    {'hidden_dim': 16, 'num_transformer_layers': 1, 'num_mlp_layers': 1, 'dropout': 0.1, 'batch_size': 128, 'nhead': 4, 'epochs': 10, 'lr': 0.01, 'batchnorm': False, 'grad_clip': False, 'features': 33} 
Already Trained and Tested combination (val score of 0.3117):
            {'hidden_dim': 16, 'num_transformer_layers': 1, 'num_mlp_layers': 1, 'dropout': 0.1, 'batch_size': 128, 'nhead': 4, 'epochs': 10, 'lr': 0.01, 'batchnorm': False, 'grad_clip': False, 'features': 33}
            Current best combo (with val score 0.3117):
                    {'hidden_dim': 16, 'num_transformer_layers': 1, 'num_mlp_layers': 1,

100%|██████████| 10/10 [00:11<00:00,  1.20s/it]
100%|██████████| 10/10 [00:11<00:00,  1.19s/it]
100%|██████████| 10/10 [00:12<00:00,  1.20s/it]
100%|██████████| 10/10 [00:12<00:00,  1.21s/it]
100%|██████████| 10/10 [00:12<00:00,  1.21s/it]
100%|██████████| 10/10 [00:12<00:00,  1.20s/it]
100%|██████████| 10/10 [00:11<00:00,  1.17s/it]
100%|██████████| 10/10 [00:11<00:00,  1.15s/it]
100%|██████████| 10/10 [00:11<00:00,  1.15s/it]
100%|██████████| 10/10 [00:11<00:00,  1.19s/it]


Trained and Tested combination 67 of 6635520, taking 12.0 seconds to get val score of 0.0: 
                {'hidden_dim': 16, 'num_transformer_layers': 1, 'num_mlp_layers': 1, 'dropout': 0.1, 'batch_size': 128, 'nhead': 4, 'epochs': 10, 'lr': 0.1, 'batchnorm': False, 'grad_clip': False, 'features': 33}, 
            Current best combo with val score 0.3117: 
                    {'hidden_dim': 16, 'num_transformer_layers': 1, 'num_mlp_layers': 1, 'dropout': 0.1, 'batch_size': 128, 'nhead': 4, 'epochs': 10, 'lr': 0.01, 'batchnorm': False, 'grad_clip': False, 'features': 33} 

Best combo after this hyperparameter: (0, 0, 0, 2, 1, 1, 1, 3, 0, 0, 33) , NOT UPDATED SINCE LAST HYPERPARAMETER


Round 1 
Hyperparameter: epochs (index: 6) 



100%|██████████| 5/5 [00:06<00:00,  1.33s/it]
100%|██████████| 5/5 [00:05<00:00,  1.10s/it]
100%|██████████| 5/5 [00:06<00:00,  1.32s/it]
100%|██████████| 5/5 [00:05<00:00,  1.09s/it]
100%|██████████| 5/5 [00:06<00:00,  1.32s/it]
100%|██████████| 5/5 [00:05<00:00,  1.08s/it]
100%|██████████| 5/5 [00:06<00:00,  1.28s/it]
100%|██████████| 5/5 [00:05<00:00,  1.07s/it]
100%|██████████| 5/5 [00:06<00:00,  1.30s/it]
100%|██████████| 5/5 [00:05<00:00,  1.05s/it]


Trained and Tested combination 68 of 6635520, taking 5.34 seconds to get val score of 0.2493: 
                {'hidden_dim': 16, 'num_transformer_layers': 1, 'num_mlp_layers': 1, 'dropout': 0.1, 'batch_size': 128, 'nhead': 4, 'epochs': 5, 'lr': 0.01, 'batchnorm': False, 'grad_clip': False, 'features': 33}, 
            Current best combo with val score 0.3117: 
                    {'hidden_dim': 16, 'num_transformer_layers': 1, 'num_mlp_layers': 1, 'dropout': 0.1, 'batch_size': 128, 'nhead': 4, 'epochs': 10, 'lr': 0.01, 'batchnorm': False, 'grad_clip': False, 'features': 33} 
Already Trained and Tested combination (val score of 0.3117):
            {'hidden_dim': 16, 'num_transformer_layers': 1, 'num_mlp_layers': 1, 'dropout': 0.1, 'batch_size': 128, 'nhead': 4, 'epochs': 10, 'lr': 0.01, 'batchnorm': False, 'grad_clip': False, 'features': 33}
            Current best combo (with val score 0.3117):
                    {'hidden_dim': 16, 'num_transformer_layers': 1, 'num_mlp_layers': 1,

100%|██████████| 20/20 [00:23<00:00,  1.18s/it]
100%|██████████| 20/20 [00:23<00:00,  1.17s/it]
100%|██████████| 20/20 [00:23<00:00,  1.16s/it]
100%|██████████| 20/20 [00:23<00:00,  1.17s/it]
100%|██████████| 20/20 [00:24<00:00,  1.21s/it]
100%|██████████| 20/20 [00:23<00:00,  1.19s/it]
100%|██████████| 20/20 [00:23<00:00,  1.19s/it]
100%|██████████| 20/20 [00:23<00:00,  1.16s/it]
100%|██████████| 20/20 [00:23<00:00,  1.16s/it]
100%|██████████| 20/20 [00:23<00:00,  1.20s/it]


Trained and Tested combination 69 of 6635520, taking 24.0 seconds to get val score of 0.2805: 
                {'hidden_dim': 16, 'num_transformer_layers': 1, 'num_mlp_layers': 1, 'dropout': 0.1, 'batch_size': 128, 'nhead': 4, 'epochs': 20, 'lr': 0.01, 'batchnorm': False, 'grad_clip': False, 'features': 33}, 
            Current best combo with val score 0.3117: 
                    {'hidden_dim': 16, 'num_transformer_layers': 1, 'num_mlp_layers': 1, 'dropout': 0.1, 'batch_size': 128, 'nhead': 4, 'epochs': 10, 'lr': 0.01, 'batchnorm': False, 'grad_clip': False, 'features': 33} 


100%|██████████| 30/30 [00:35<00:00,  1.19s/it]
100%|██████████| 30/30 [00:34<00:00,  1.15s/it]
100%|██████████| 30/30 [00:35<00:00,  1.18s/it]
100%|██████████| 30/30 [00:35<00:00,  1.19s/it]
100%|██████████| 30/30 [00:35<00:00,  1.17s/it]
100%|██████████| 30/30 [00:35<00:00,  1.19s/it]
100%|██████████| 30/30 [00:35<00:00,  1.17s/it]
100%|██████████| 30/30 [00:34<00:00,  1.13s/it]
100%|██████████| 30/30 [00:35<00:00,  1.18s/it]
100%|██████████| 30/30 [00:35<00:00,  1.17s/it]


Trained and Tested combination 70 of 6635520, taking 35.26 seconds to get val score of 0.2181: 
                {'hidden_dim': 16, 'num_transformer_layers': 1, 'num_mlp_layers': 1, 'dropout': 0.1, 'batch_size': 128, 'nhead': 4, 'epochs': 30, 'lr': 0.01, 'batchnorm': False, 'grad_clip': False, 'features': 33}, 
            Current best combo with val score 0.3117: 
                    {'hidden_dim': 16, 'num_transformer_layers': 1, 'num_mlp_layers': 1, 'dropout': 0.1, 'batch_size': 128, 'nhead': 4, 'epochs': 10, 'lr': 0.01, 'batchnorm': False, 'grad_clip': False, 'features': 33} 

Best combo after this hyperparameter: (0, 0, 0, 2, 1, 1, 1, 3, 0, 0, 33) , NOT UPDATED SINCE LAST HYPERPARAMETER


Round 1 
Hyperparameter: batch_size (index: 4) 



100%|██████████| 10/10 [00:44<00:00,  4.41s/it]
100%|██████████| 10/10 [00:44<00:00,  4.44s/it]
100%|██████████| 10/10 [00:44<00:00,  4.49s/it]
100%|██████████| 10/10 [00:44<00:00,  4.46s/it]
100%|██████████| 10/10 [00:45<00:00,  4.51s/it]
100%|██████████| 10/10 [00:44<00:00,  4.40s/it]
100%|██████████| 10/10 [00:45<00:00,  4.54s/it]
100%|██████████| 10/10 [00:44<00:00,  4.46s/it]
100%|██████████| 10/10 [00:45<00:00,  4.57s/it]
100%|██████████| 10/10 [00:44<00:00,  4.43s/it]


Trained and Tested combination 71 of 6635520, taking 44.38 seconds to get val score of 0.0624: 
                {'hidden_dim': 16, 'num_transformer_layers': 1, 'num_mlp_layers': 1, 'dropout': 0.1, 'batch_size': 32, 'nhead': 4, 'epochs': 10, 'lr': 0.01, 'batchnorm': False, 'grad_clip': False, 'features': 33}, 
            Current best combo with val score 0.3117: 
                    {'hidden_dim': 16, 'num_transformer_layers': 1, 'num_mlp_layers': 1, 'dropout': 0.1, 'batch_size': 128, 'nhead': 4, 'epochs': 10, 'lr': 0.01, 'batchnorm': False, 'grad_clip': False, 'features': 33} 
Already Trained and Tested combination (val score of 0.3117):
            {'hidden_dim': 16, 'num_transformer_layers': 1, 'num_mlp_layers': 1, 'dropout': 0.1, 'batch_size': 128, 'nhead': 4, 'epochs': 10, 'lr': 0.01, 'batchnorm': False, 'grad_clip': False, 'features': 33}
            Current best combo (with val score 0.3117):
                    {'hidden_dim': 16, 'num_transformer_layers': 1, 'num_mlp_layers': 1

100%|██████████| 10/10 [00:04<00:00,  2.42it/s]
100%|██████████| 10/10 [00:04<00:00,  2.31it/s]
100%|██████████| 10/10 [00:03<00:00,  2.58it/s]
100%|██████████| 10/10 [00:03<00:00,  2.53it/s]
100%|██████████| 10/10 [00:04<00:00,  2.30it/s]
100%|██████████| 10/10 [00:03<00:00,  2.59it/s]
100%|██████████| 10/10 [00:03<00:00,  2.57it/s]
100%|██████████| 10/10 [00:04<00:00,  2.26it/s]
100%|██████████| 10/10 [00:03<00:00,  2.61it/s]
100%|██████████| 10/10 [00:03<00:00,  2.59it/s]


Trained and Tested combination 72 of 6635520, taking 3.91 seconds to get val score of 0.2185: 
                {'hidden_dim': 16, 'num_transformer_layers': 1, 'num_mlp_layers': 1, 'dropout': 0.1, 'batch_size': 512, 'nhead': 4, 'epochs': 10, 'lr': 0.01, 'batchnorm': False, 'grad_clip': False, 'features': 33}, 
            Current best combo with val score 0.3117: 
                    {'hidden_dim': 16, 'num_transformer_layers': 1, 'num_mlp_layers': 1, 'dropout': 0.1, 'batch_size': 128, 'nhead': 4, 'epochs': 10, 'lr': 0.01, 'batchnorm': False, 'grad_clip': False, 'features': 33} 

Best combo after this hyperparameter: (0, 0, 0, 2, 1, 1, 1, 3, 0, 0, 33) , NOT UPDATED SINCE LAST HYPERPARAMETER


ROUND 2

Round 2 
Hyperparameter: features (index: 10) 

Already Trained and Tested combination (val score of 0.3071):
            {'hidden_dim': 16, 'num_transformer_layers': 1, 'num_mlp_layers': 1, 'dropout': 0.1, 'batch_size': 128, 'nhead': 4, 'epochs': 10, 'lr': 0.01, 'batchnorm': False, 'grad_