<a href="https://colab.research.google.com/github/Mainakdeb/moa-classification/blob/master/hyperparameter_search.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [2]:
!cp /content/drive/"My Drive"/kaggle/lish-moa.zip /content/

In [None]:
!unzip lish-moa.zip

In [None]:
!pip install pip install iterative-stratification

In [None]:
!pip install optuna

In [6]:
from sklearn.preprocessing import MinMaxScaler
        
import torch
import torch.nn as nn
from torch.nn import functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torch.utils.data.dataset import random_split
from torch.optim.lr_scheduler import ReduceLROnPlateau

from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import Normalizer
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import StratifiedKFold
from sklearn.utils.class_weight import compute_class_weight
from sklearn.preprocessing import StandardScaler
from imblearn.over_sampling import RandomOverSampler

import optuna
import numpy as np 
import pandas as pd 
from IPython.display import clear_output
import matplotlib.pyplot as plt
from scipy.ndimage.filters import gaussian_filter1d   ## smoother
from tqdm.notebook import tqdm, tnrange

from iterstrat.ml_stratifiers import MultilabelStratifiedKFold

plt.rcParams['figure.figsize'] = 15, 7

CGREEN  = '\33[32m'
CBLUE =  '\033[34m'
CRED = '\033[1;31m'
CEND  = '\33[0m'

def seed_everything(seed=42):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    
seed_everything(seed=42)



In [7]:
if torch.cuda.is_available():
    device='cuda'
else:
    device='cpu'
    
device


'cuda'

In [8]:
train_features = pd.read_csv('train_features.csv')
train_targets = pd.read_csv('train_targets_scored.csv')
train_targets_s = train_targets
test_features = pd.read_csv('test_features.csv')

ss = pd.read_csv('sample_submission.csv')

In [9]:
class TorchStandardScaler:
  def fit(self, x):
    self.mean = x.mean(0, keepdim=True)
    self.std = x.std(0, unbiased=False, keepdim=True)
  def transform(self, x):
    x -= self.mean
    x /= (self.std + 1e-7)
    return x

In [10]:
def preprocess(df):
    df = df.copy()
    df.loc[:, 'cp_type'] = df.loc[:, 'cp_type'].map({'trt_cp': 0, 'ctl_vehicle': 1})
    df.loc[:, 'cp_dose'] = df.loc[:, 'cp_dose'].map({'D1': 0, 'D2': 1})
    return df

train = preprocess(train_features)
test = preprocess(test_features)

del train_targets['sig_id']

target = train_targets.loc[train['cp_type']==0].reset_index(drop=True)
train = train.loc[train['cp_type']==0].reset_index(drop=True)

In [11]:
top_features = [  1,   2,   3,   4,   5,   6,   7,   9,  11,  14,  15,  16,  17,
        18,  19,  20,  21,  22,  23,  24,  25,  26,  27,  29,  30,  31,
        32,  33,  35,  36,  37,  38,  39,  40,  41,  42,  43,  44,  46,
        47,  48,  49,  50,  51,  52,  53,  54,  55,  56,  58,  59,  60,
        61,  62,  63,  64,  65,  66,  67,  68,  69,  70,  71,  72,  73,
        74,  75,  76,  78,  79,  80,  81,  82,  83,  84,  86,  87,  88,
        89,  90,  91,  92,  93,  94,  95,  96,  97,  98,  99, 100, 101,
       102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114,
       115, 116, 117, 118, 120, 121, 122, 123, 124, 125, 126, 127, 128,
       129, 130, 131, 132, 133, 136, 137, 138, 139, 140, 141, 142, 143,
       144, 145, 146, 147, 149, 150, 151, 152, 153, 154, 155, 156, 157,
       158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170,
       171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183,
       184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 197,
       198, 199, 200, 202, 203, 204, 205, 206, 208, 209, 210, 211, 212,
       213, 214, 215, 216, 217, 218, 219, 220, 221, 223, 224, 225, 226,
       227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239,
       240, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253,
       254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266,
       267, 268, 269, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280,
       281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 294,
       295, 296, 298, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309,
       310, 311, 312, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323,
       324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336,
       337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349,
       350, 351, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362,
       363, 364, 365, 366, 367, 368, 369, 370, 371, 374, 375, 376, 377,
       378, 379, 380, 381, 382, 383, 384, 385, 386, 387, 388, 390, 391,
       392, 393, 394, 395, 396, 397, 398, 399, 400, 401, 402, 403, 404,
       405, 406, 407, 408, 409, 411, 412, 413, 414, 415, 416, 417, 418,
       419, 420, 421, 422, 423, 424, 425, 426, 427, 428, 429, 430, 431,
       432, 434, 435, 436, 437, 438, 439, 440, 442, 443, 444, 445, 446,
       447, 448, 449, 450, 453, 454, 456, 457, 458, 459, 460, 461, 462,
       463, 464, 465, 466, 467, 468, 469, 470, 471, 472, 473, 474, 475,
       476, 477, 478, 479, 481, 482, 483, 484, 485, 486, 487, 488, 489,
       490, 491, 492, 493, 494, 495, 496, 498, 500, 501, 502, 503, 505,
       506, 507, 509, 510, 511, 512, 513, 514, 515, 518, 519, 520, 521,
       522, 523, 524, 525, 526, 527, 528, 530, 531, 532, 534, 535, 536,
       538, 539, 540, 541, 542, 543, 544, 545, 546, 547, 549, 550, 551,
       552, 554, 557, 559, 560, 561, 562, 565, 566, 567, 568, 569, 570,
       571, 572, 573, 574, 575, 577, 578, 580, 581, 582, 583, 584, 585,
       586, 587, 588, 589, 590, 591, 592, 593, 594, 595, 596, 597, 599,
       600, 601, 602, 606, 607, 608, 609, 611, 612, 613, 615, 616, 617,
       618, 619, 620, 621, 622, 623, 624, 625, 626, 627, 628, 629, 630,
       631, 632, 633, 634, 635, 636, 637, 638, 639, 641, 642, 643, 644,
       645, 646, 647, 648, 649, 650, 651, 652, 654, 655, 656, 658, 659,
       660, 661, 662, 663, 664, 665, 666, 667, 668, 669, 670, 671, 672,
       673, 674, 675, 676, 677, 678, 679, 680, 681, 682, 683, 684, 685,
       686, 687, 688, 689, 691, 692, 693, 694, 695, 696, 697, 699, 700,
       701, 702, 704, 705, 707, 708, 709, 710, 711, 713, 714, 716, 717,
       718, 720, 721, 723, 724, 725, 726, 727, 728, 729, 730, 731, 732,
       733, 734, 735, 737, 738, 739, 740, 742, 743, 744, 745, 746, 747,
       748, 749, 750, 751, 752, 753, 754, 755, 756, 757, 759, 760, 761,
       762, 763, 764, 765, 766, 767, 768, 769, 770, 771, 772, 773, 774,
       775, 776, 777, 779, 780, 781, 782, 783, 784, 785, 786, 787, 788,
       789, 790, 792, 793, 794, 795, 796, 797, 798, 800, 801, 802, 803,
       804, 805, 806, 808, 809, 811, 813, 814, 815, 816, 817, 818, 819,
       821, 822, 823, 825, 826, 827, 828, 829, 830, 831, 832, 834, 835,
       837, 838, 839, 840, 841, 842, 845, 846, 847, 848, 850, 851, 852,
       854, 855, 856, 858, 859, 860, 861, 862, 864, 866, 867, 868, 869,
       870, 871, 872, 873, 874]

all_columns = train.columns
train=train[all_columns[top_features]]
test = test[all_columns[top_features]]
train.shape, test.shape

((21948, 785), (3982, 785))

In [12]:
train = train.values
target = target.values
test = test.values

In [13]:
train.shape, target.shape

((21948, 785), (21948, 206))

In [14]:
class TrainDataset(Dataset):
    def __init__(self, train,targets, noise ):
        
        self.features  = train
        self.targets = targets
        self.noise = noise
        
    def sizes(self):
        print("features size = ", self.features.shape[1])
        print("targets size = ", self.targets.shape[1])
        
    def __len__(self):
        return self.features.shape[0]

    def __getitem__(self, idx):
        feature = torch.tensor(self.features[idx]).float()
        target = torch.tensor(self.targets[idx]).float()
        return feature, target


In [15]:
def get_lr(optimizer):
    for param_group in optimizer.param_groups:
        return param_group['lr']
        
def show_lr(learning_rates):
    plt.plot(learning_rates, label = "learning rate")
    plt.ylabel("Learning rate", fontsize = 15)
    plt.grid(True)
    plt.legend()
    plt.show()

def train_step(x, y, model, optimizer, criterion):
    optimizer.zero_grad()
    pred = model(x.to(device))
    y = y.float()
    loss = criterion(pred,y.to(device))
    loss.backward()
    optimizer.step()
    return loss.item()

In [16]:
class Model(nn.Module):
  def __init__(self, nfeatures, ntargets, nlayers, hidden_size, dropout):
    super().__init__()
    layers = []
    for _ in range(nlayers):
      if len(layers) == 0:
        layers.append(nn.Linear(nfeatures, hidden_size))
        layers.append(nn.BatchNorm1d(hidden_size))
        layers.append(nn.Dropout(dropout))
        layers.append(nn.LeakyReLU())
      else:
        layers.append(nn.Linear(hidden_size, hidden_size))
        layers.append(nn.BatchNorm1d(hidden_size))
        layers.append(nn.Dropout(dropout))
        layers.append(nn.LeakyReLU())

    layers.append(nn.Linear(hidden_size, ntargets))

    self.model = nn.Sequential(*layers)

  def forward(self, x):
    return self.model(x)

In [23]:
def run_training( params, save_model=False):
  NFOLDS = 5
  EPOCHS = 10 ## changes here 
  mskf = MultilabelStratifiedKFold(n_splits=NFOLDS, shuffle=True, random_state=0)

  fold_val_losses = list()

  for k , (train_idx,valid_idx) in enumerate(mskf.split(train,target)):

      x_train,x_valid,y_train,y_valid = train[train_idx,:],train[valid_idx,:],target[train_idx,:],target[valid_idx,:]
      
      train_dataset = TrainDataset(x_train, y_train, noise = False)
      valid_dataset = TrainDataset(x_valid, y_valid, noise = False)
      
      train_loader = DataLoader(dataset=train_dataset, batch_size=128, shuffle=True, num_workers = 8)
      val_loader = DataLoader(dataset=valid_dataset, batch_size=256, shuffle = True, num_workers = 8)
      

      model = Model(nfeatures=x_train.shape[1], 
                    ntargets=y_train.shape[1],
                    nlayers=params["num_layers"], 
                    hidden_size=params["hidden_size"], 
                    dropout=params["dropout"])
      
      model = model.cuda()
      optimizer = optim.Adam(model.parameters(), lr = params["learning_rate"], weight_decay=1e-5)
      scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 
                                                      mode='min', 
                                                      factor=0.5, 
                                                      patience=3, 
                                                      eps=1e-4, 
                                                      verbose=True)
      criterion = nn.BCEWithLogitsLoss()

      eng = Engine(model, optimizer, device='cuda')
      best_loss = 10000

      print(CRED ,"fold ", str(k+1), CEND)

      for epoch in range(EPOCHS):
        train_loss = eng.train(train_loader)
        valid_loss = eng.evaluate(val_loader)
        print("train_loss:", train_loss, "val_loss:", valid_loss)
        if valid_loss<best_loss:
          best_loss = valid_loss
          if save_model:
            torch.save(model.state_dict(), "model_{fold}.pth")
      
  return(best_loss)

  print(CBLUE, "Training complete", CEND)

In [24]:
class Engine:
  def __init__(self, model, optimizer, device):
    self.model = model
    self.device = device
    self.optimizer = optimizer

  @staticmethod
  def loss_fn(targets, outputs):
    return nn.BCEWithLogitsLoss()(outputs, targets)

  def train(self, data_loader):
    self.model.train()
    final_loss=0
    for data in data_loader:
      self.optimizer.zero_grad()
      inputs, targets = data
      inputs, targets = inputs.to(device), targets.to(device)
      outputs = self.model(inputs)
      loss = self.loss_fn(targets, outputs)
      loss.backward()
      self.optimizer.step()
      final_loss += loss.item()
    return(final_loss / len(data_loader))


  def evaluate(self, data_loader):
    self.model.train()
    final_loss=0
    for data in data_loader:
      #self.optimizer.zero_grad()
      inputs, targets = data
      inputs, targets = inputs.to(device), targets.to(device)
      outputs = self.model(inputs)
      loss = self.loss_fn(targets, outputs)
      #loss.backward()
      #self.optimizer.step()
      final_loss += loss.item()
    return(final_loss / len(data_loader))

In [28]:
# params = {
#       "num_layers":trial.suggest_int("num_layer", 1, 8),
#       "hidden_size":trial.suggest_int("hidden_size", 16, 4096),
#       "dropout": trial.suggest_uniform("dropout", 0.1, 0.7),
#       "learning_rate": trial.suggest_loguniform("learning_rate", 1e-6, 1e-2)
#   }
# run_training(params, save_model=False)

In [26]:
def objective(trial):
  params = {
      "num_layers":trial.suggest_int("num_layer", 1, 8),
      "hidden_size":trial.suggest_int("hidden_size", 16, 4096),
      "dropout": trial.suggest_uniform("dropout", 0.1, 0.7),
      "learning_rate": trial.suggest_loguniform("learning_rate", 1e-6, 1e-2)
  }

  loss_ = run_training(params, save_model=False)
  return(loss_)

In [27]:
study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=20)

print("best_trial:")
trial_ = study.best_trial
print(trial_)

[I 2020-09-26 05:54:06,147] A new study created in memory with name: no-name-ee38c3fd-5b1d-436c-aaf4-9469cbda9cf5


 fold  1 
train_loss: 0.043743044785831284 val_loss: 0.021343196638756327
train_loss: 0.02116268497986206 val_loss: 0.020676683220598433
train_loss: 0.020207170693986656 val_loss: 0.019720713194045756
train_loss: 0.01940260318013421 val_loss: 0.018940692353579733
train_loss: 0.01882745702818468 val_loss: 0.018534616981115606
train_loss: 0.018510718789437542 val_loss: 0.01847604838096433
train_loss: 0.01823858735219076 val_loss: 0.01821031028197871
train_loss: 0.01812999578786717 val_loss: 0.01797429658472538
train_loss: 0.017969645674515894 val_loss: 0.01779964416184359
train_loss: 0.017864463236722826 val_loss: 0.01830590350760354
 fold  2 
train_loss: 0.04391568210785803 val_loss: 0.021582691412833
train_loss: 0.021215533716199192 val_loss: 0.020386476380129654
train_loss: 0.020119138035005417 val_loss: 0.019981973701053195
train_loss: 0.019343324747962364 val_loss: 0.01923203778763612
train_loss: 0.018833740344406037 val_loss: 0.01918898171020879
train_loss: 0.01846751036203426 val_

[I 2020-09-26 05:56:41,592] Trial 0 finished with value: 0.01809049728843901 and parameters: {'num_layer': 6, 'hidden_size': 1052, 'dropout': 0.5188347969850853, 'learning_rate': 0.0008831212239556775}. Best is trial 0 with value: 0.01809049728843901.


 fold  1 
train_loss: 0.03650541653505702 val_loss: 0.020441576424572203
train_loss: 0.019983187884740208 val_loss: 0.019850569466749828
train_loss: 0.019189611880405657 val_loss: 0.01866108277398679
train_loss: 0.01876079143308427 val_loss: 0.018565644199649494
train_loss: 0.018553689000723156 val_loss: 0.01841845146069924
train_loss: 0.018383346437274115 val_loss: 0.01832727508412467
train_loss: 0.01838922692273838 val_loss: 0.01821949312256442
train_loss: 0.018229413237692654 val_loss: 0.018506406289007928
train_loss: 0.018156288657337427 val_loss: 0.018654074002471235
train_loss: 0.0180750146670186 val_loss: 0.018389592775040202
 fold  2 
train_loss: 0.03636835280643857 val_loss: 0.0203969848031799
train_loss: 0.01993950075753357 val_loss: 0.01944553293287754
train_loss: 0.019059326202757118 val_loss: 0.01889761785666148
train_loss: 0.018667442406919123 val_loss: 0.018903950850168865
train_loss: 0.018492234526607004 val_loss: 0.018950306706958346
train_loss: 0.01843927412604292 val

[I 2020-09-26 05:59:25,976] Trial 1 finished with value: 0.018089856228066817 and parameters: {'num_layer': 7, 'hidden_size': 721, 'dropout': 0.3384970726057638, 'learning_rate': 0.0019213365993020527}. Best is trial 1 with value: 0.018089856228066817.


 fold  1 
train_loss: 0.6642833369365637 val_loss: 0.6139914790789286
train_loss: 0.5666958663774573 val_loss: 0.5239511430263519
train_loss: 0.48269844033579895 val_loss: 0.44589446153905654
train_loss: 0.41087092804735986 val_loss: 0.37997738023598987
train_loss: 0.35085854193438654 val_loss: 0.32510429951879716
train_loss: 0.3011719262686329 val_loss: 0.2799010607931349
train_loss: 0.2603053202231725 val_loss: 0.24229951037300956
train_loss: 0.22628068837566653 val_loss: 0.2113594181007809
train_loss: 0.19784164677063623 val_loss: 0.1854535150859091
train_loss: 0.1739025056578111 val_loss: 0.1632181571589576
 fold  2 
train_loss: 0.6498746742372927 val_loss: 0.5997591416041056
train_loss: 0.5535408841527026 val_loss: 0.5108786722024282
train_loss: 0.4706258564323619 val_loss: 0.4341221782896254
train_loss: 0.4002356421256411 val_loss: 0.3697380969921748
train_loss: 0.3416860757962517 val_loss: 0.31638158361117047
train_loss: 0.2932517884866051 val_loss: 0.2724996308485667
train_loss

[I 2020-09-26 06:02:41,000] Trial 2 finished with value: 0.16100065492921406 and parameters: {'num_layer': 6, 'hidden_size': 2787, 'dropout': 0.14503428979548028, 'learning_rate': 1.5534075287068437e-06}. Best is trial 1 with value: 0.018089856228066817.


 fold  1 
train_loss: 0.03088665543043095 val_loss: 0.020884927465683885
train_loss: 0.020129336530099743 val_loss: 0.019850535939137142
train_loss: 0.019367103086973446 val_loss: 0.01954799010935757
train_loss: 0.019105907097674797 val_loss: 0.01890353879166974
train_loss: 0.018860390409827232 val_loss: 0.019061053689155314
train_loss: 0.018886232174986948 val_loss: 0.018562389744652644
train_loss: 0.01898094492060119 val_loss: 0.019019623287022114
train_loss: 0.019085173987769995 val_loss: 0.018833105348878436
train_loss: 0.01906961255936303 val_loss: 0.01904902524418301
train_loss: 0.019086054944689724 val_loss: 0.018990434499250516
 fold  2 
train_loss: 0.030099278693829758 val_loss: 0.02118291664454672
train_loss: 0.020382466705758936 val_loss: 0.020171596358219784
train_loss: 0.019404628503041855 val_loss: 0.01938761832813422
train_loss: 0.019184751139170883 val_loss: 0.01944707954923312
train_loss: 0.01901932854367339 val_loss: 0.018900133876336947
train_loss: 0.0190840255185637

[I 2020-09-26 06:05:00,603] Trial 3 finished with value: 0.01890930864546034 and parameters: {'num_layer': 4, 'hidden_size': 1663, 'dropout': 0.16307639956155834, 'learning_rate': 0.007604576348209199}. Best is trial 1 with value: 0.018089856228066817.


 fold  1 
train_loss: 0.6968648679878401 val_loss: 0.6563395096196069
train_loss: 0.6181175890176193 val_loss: 0.583483037021425
train_loss: 0.5472093643485636 val_loss: 0.5165839095910391
train_loss: 0.4829794257015422 val_loss: 0.45554548170831466
train_loss: 0.425919807691505 val_loss: 0.4017651660574807
train_loss: 0.37590533883675287 val_loss: 0.3550354556904899
train_loss: 0.33281251593776373 val_loss: 0.3146530075205697
train_loss: 0.29551305157550867 val_loss: 0.28008224566777545
train_loss: 0.26335948662481445 val_loss: 0.25030087845193016
train_loss: 0.2360152890500815 val_loss: 0.22457850890027153
 fold  2 
train_loss: 0.6868013204007909 val_loss: 0.6471011406845517
train_loss: 0.6087735904299695 val_loss: 0.5735419922404819
train_loss: 0.5382542301347291 val_loss: 0.5064620325962702
train_loss: 0.4745602860398915 val_loss: 0.4467582073476579
train_loss: 0.4182714182829511 val_loss: 0.39376993974049884
train_loss: 0.3689023638549058 val_loss: 0.3479609870248371
train_loss: 0

[I 2020-09-26 06:08:35,614] Trial 4 finished with value: 0.2207100209262636 and parameters: {'num_layer': 6, 'hidden_size': 3184, 'dropout': 0.4377058242487215, 'learning_rate': 1.1138116364225768e-06}. Best is trial 1 with value: 0.018089856228066817.


 fold  1 
train_loss: 0.6151247950999633 val_loss: 0.49207105073663926
train_loss: 0.3952506860529167 val_loss: 0.31758784916665816
train_loss: 0.26019609104032104 val_loss: 0.2151799574494362
train_loss: 0.18158708966296652 val_loss: 0.15522060791651407
train_loss: 0.13417684491993725 val_loss: 0.1176775416566266
train_loss: 0.10432827029971109 val_loss: 0.093449669993586
train_loss: 0.08435163609143617 val_loss: 0.0770620529850324
train_loss: 0.07049525844986024 val_loss: 0.06545595534973675
train_loss: 0.060508422850482704 val_loss: 0.05669556351171599
train_loss: 0.053180794623019036 val_loss: 0.05030591723819574
 fold  2 
train_loss: 0.5969049589357515 val_loss: 0.4753780961036682
train_loss: 0.3815236162880193 val_loss: 0.3061240878370073
train_loss: 0.25115497647852136 val_loss: 0.20755533377329508
train_loss: 0.17547979864521304 val_loss: 0.1500218858321508
train_loss: 0.13010804101392842 val_loss: 0.11429813380042712
train_loss: 0.10128460160416106 val_loss: 0.0909534150527583

[I 2020-09-26 06:12:45,024] Trial 5 finished with value: 0.05015210476186541 and parameters: {'num_layer': 7, 'hidden_size': 3233, 'dropout': 0.6193981419816988, 'learning_rate': 4.165938075566149e-06}. Best is trial 1 with value: 0.018089856228066817.


 fold  1 
train_loss: 0.03293439647371786 val_loss: 0.020307278881470364
train_loss: 0.019030170015774776 val_loss: 0.018621237638096016
train_loss: 0.018468527926429026 val_loss: 0.018763719230062433
train_loss: 0.01851545374813503 val_loss: 0.018424669539348945
train_loss: 0.01860906349738007 val_loss: 0.018551115050084062
train_loss: 0.018648617840169565 val_loss: 0.018806213926937845
train_loss: 0.018694207537919283 val_loss: 0.018735127937462594
train_loss: 0.018703254479644955 val_loss: 0.018353188255180914
train_loss: 0.018662483235686155 val_loss: 0.019095926752520934
train_loss: 0.018653138446203178 val_loss: 0.018534400086436007
 fold  2 
train_loss: 0.03254761199966289 val_loss: 0.019909600002898112
train_loss: 0.019010838526098625 val_loss: 0.018664310582809977
train_loss: 0.018608638395865757 val_loss: 0.018532876649664506
train_loss: 0.01851814766616925 val_loss: 0.01889954424566693
train_loss: 0.018545819338465084 val_loss: 0.018678776402440336
train_loss: 0.018526222543

[I 2020-09-26 06:14:36,718] Trial 6 finished with value: 0.018470706107715767 and parameters: {'num_layer': 1, 'hidden_size': 1295, 'dropout': 0.28016787603649573, 'learning_rate': 0.009502422021513527}. Best is trial 1 with value: 0.018089856228066817.


 fold  1 
train_loss: 0.028835664651748062 val_loss: 0.01998532170222865
train_loss: 0.019417749015965324 val_loss: 0.018648916338053014
train_loss: 0.018754574692929567 val_loss: 0.0183451222255826
train_loss: 0.018346820087374552 val_loss: 0.018168181129213836
train_loss: 0.0182151655882489 val_loss: 0.018609686340722773
train_loss: 0.018120806050095438 val_loss: 0.018333837596906558
train_loss: 0.018094853916461918 val_loss: 0.018045142396456666
train_loss: 0.018096213505697855 val_loss: 0.018030543501178425
train_loss: 0.018072235708435375 val_loss: 0.01826270752482944
train_loss: 0.017967635306759155 val_loss: 0.017979278850058716
 fold  2 
train_loss: 0.029126453955752262 val_loss: 0.019887456877364054
train_loss: 0.0193469582422488 val_loss: 0.019333414629929595
train_loss: 0.018623027336392282 val_loss: 0.018979814007050462
train_loss: 0.018437012361929468 val_loss: 0.01845993779392706
train_loss: 0.018258746996845886 val_loss: 0.01818419403086106
train_loss: 0.0180525201696740

[I 2020-09-26 06:16:48,701] Trial 7 finished with value: 0.01777702807966206 and parameters: {'num_layer': 3, 'hidden_size': 2003, 'dropout': 0.3511757636099212, 'learning_rate': 0.0033357927638280713}. Best is trial 7 with value: 0.01777702807966206.


 fold  1 
train_loss: 0.03044311295978833 val_loss: 0.02055494725290272
train_loss: 0.0204283957761051 val_loss: 0.02100184549474054
train_loss: 0.01972603118991938 val_loss: 0.019154510874715116
train_loss: 0.01932605276343183 val_loss: 0.019656938811143238
train_loss: 0.019060101191364767 val_loss: 0.0194419389590621
train_loss: 0.018982133281457682 val_loss: 0.019226913547350302
train_loss: 0.01883515042077372 val_loss: 0.018798722471627925
train_loss: 0.018816492850959734 val_loss: 0.01864133237136735
train_loss: 0.01876377733424306 val_loss: 0.018741528762297496
train_loss: 0.01864190589285631 val_loss: 0.018264416087832715
 fold  2 
train_loss: 0.030130901873327683 val_loss: 0.020505967032578256
train_loss: 0.020248925920737827 val_loss: 0.019552553693453472
train_loss: 0.019394344589470522 val_loss: 0.019381248495644994
train_loss: 0.01886829013760755 val_loss: 0.01893586054858234
train_loss: 0.018746134487615116 val_loss: 0.01897217043572002
train_loss: 0.01870727130090413 val_

[I 2020-09-26 06:20:33,393] Trial 8 finished with value: 0.018144183688693576 and parameters: {'num_layer': 8, 'hidden_size': 2607, 'dropout': 0.4013797519939254, 'learning_rate': 0.0015363231998321502}. Best is trial 7 with value: 0.01777702807966206.


 fold  1 
train_loss: 0.03149076024799243 val_loss: 0.01868260672522916
train_loss: 0.01808860057802952 val_loss: 0.017919432889256213
train_loss: 0.017431349385583748 val_loss: 0.01771824186046918
train_loss: 0.01702320944868784 val_loss: 0.017361194361001253
train_loss: 0.016671937694638105 val_loss: 0.017070844769477844
train_loss: 0.01632803887722717 val_loss: 0.017190953250974417
train_loss: 0.016105791904788086 val_loss: 0.017002052161842585
train_loss: 0.015805258125444685 val_loss: 0.016818586219516065
train_loss: 0.015575575723272303 val_loss: 0.017133603752073314
train_loss: 0.01540291235120832 val_loss: 0.017300736935188372
 fold  2 
train_loss: 0.031086959943607235 val_loss: 0.01861319991035594
train_loss: 0.018058074145591345 val_loss: 0.017751732013291784
train_loss: 0.017429058777465336 val_loss: 0.017772296650542155
train_loss: 0.017135410103946924 val_loss: 0.017362661763197847
train_loss: 0.01676448112677621 val_loss: 0.017654996261828475
train_loss: 0.016507808496986

[I 2020-09-26 06:22:47,319] Trial 9 finished with value: 0.016764250118285418 and parameters: {'num_layer': 3, 'hidden_size': 1894, 'dropout': 0.15739205763019304, 'learning_rate': 0.0011353568729844193}. Best is trial 9 with value: 0.016764250118285418.


 fold  1 
train_loss: 0.06592363691416339 val_loss: 0.021961183701124456
train_loss: 0.020150980588210667 val_loss: 0.019206307259284787
train_loss: 0.018278266303241253 val_loss: 0.018639640882611275
train_loss: 0.01710982191497865 val_loss: 0.017854212472836178
train_loss: 0.01617044516150718 val_loss: 0.017641084372169442
train_loss: 0.01532377521979852 val_loss: 0.017487618347836867
train_loss: 0.014559761067663414 val_loss: 0.01714132136354844
train_loss: 0.013738122087079977 val_loss: 0.016895542593879834
train_loss: 0.012991225178205017 val_loss: 0.017184819457017712
train_loss: 0.012300251072029705 val_loss: 0.01685146428644657
 fold  2 
train_loss: 0.06753828288798315 val_loss: 0.022049437794420455
train_loss: 0.020123765560919823 val_loss: 0.019738847700258095
train_loss: 0.01822884467875828 val_loss: 0.01856619885398282
train_loss: 0.01705795700621346 val_loss: 0.018104274550245866
train_loss: 0.016179535160030144 val_loss: 0.017860683509045176
train_loss: 0.0153163741005287

[I 2020-09-26 06:24:41,016] Trial 10 finished with value: 0.017031505703926086 and parameters: {'num_layer': 1, 'hidden_size': 3911, 'dropout': 0.22594218092039553, 'learning_rate': 0.00012026704264571361}. Best is trial 9 with value: 0.016764250118285418.


 fold  1 
train_loss: 0.13096850054959455 val_loss: 0.033955153077840805
train_loss: 0.026932238020758698 val_loss: 0.023453434618810814
train_loss: 0.021504429182496624 val_loss: 0.020902887918055058
train_loss: 0.019553066771207512 val_loss: 0.019707091049187712
train_loss: 0.018411494467569435 val_loss: 0.019007548068960507
train_loss: 0.017678310628980398 val_loss: 0.01863131258222792
train_loss: 0.01697709109040274 val_loss: 0.018399859571622476
train_loss: 0.01637735674697636 val_loss: 0.018018311510483425
train_loss: 0.015879493692646855 val_loss: 0.017867113049659464
train_loss: 0.015377524592306303 val_loss: 0.017558164325439267
 fold  2 
train_loss: 0.1303119712603697 val_loss: 0.033516918826434344
train_loss: 0.026957316958493946 val_loss: 0.023429817106160853
train_loss: 0.021541569246978 val_loss: 0.020518533781998687
train_loss: 0.019543701202433178 val_loss: 0.019558530093895063
train_loss: 0.018428587179253067 val_loss: 0.019105889317062166
train_loss: 0.017709493421126

[I 2020-09-26 06:26:34,074] Trial 11 finished with value: 0.017635313069654837 and parameters: {'num_layer': 1, 'hidden_size': 2280, 'dropout': 0.10109736699512295, 'learning_rate': 8.364734744218163e-05}. Best is trial 9 with value: 0.016764250118285418.


 fold  1 
train_loss: 0.6279185232908829 val_loss: 0.5459213223722246
train_loss: 0.47390033736609033 val_loss: 0.40692011680867934
train_loss: 0.35069089631239575 val_loss: 0.2981966783603032
train_loss: 0.25643820164428244 val_loss: 0.2182487431499693
train_loss: 0.18860155991885974 val_loss: 0.16134761936134762
train_loss: 0.14090731340473978 val_loss: 0.12227734757794274
train_loss: 0.1090227106343145 val_loss: 0.09584006418784459
train_loss: 0.0864042957731779 val_loss: 0.07691787969734934
train_loss: 0.0712967588957669 val_loss: 0.06407796943353282
train_loss: 0.06012844585854074 val_loss: 0.05571607107089625
 fold  2 
train_loss: 0.6330210527648097 val_loss: 0.5491271283891466
train_loss: 0.474320978358172 val_loss: 0.4037303494082557
train_loss: 0.34488625630088476 val_loss: 0.29138988422022927
train_loss: 0.2467796720456386 val_loss: 0.20893151147498024
train_loss: 0.1799274592295937 val_loss: 0.15455272959338295
train_loss: 0.13542478823143503 val_loss: 0.11805953333775203
tr

[I 2020-09-26 06:28:37,055] Trial 12 finished with value: 0.055266672331425876 and parameters: {'num_layer': 2, 'hidden_size': 62, 'dropout': 0.23186919064045042, 'learning_rate': 0.00010489938265935068}. Best is trial 9 with value: 0.016764250118285418.


 fold  1 
train_loss: 0.045315457359496235 val_loss: 0.01894083846774366
train_loss: 0.018046756956618334 val_loss: 0.017831797815031476
train_loss: 0.01658264511818255 val_loss: 0.01703664743238025
train_loss: 0.015105906867192707 val_loss: 0.017388468660000298
train_loss: 0.013338054051163836 val_loss: 0.017163944223688707
train_loss: 0.01149927474477369 val_loss: 0.017660554808874924
train_loss: 0.009640844724397513 val_loss: 0.018001668258673616
train_loss: 0.007855806614447763 val_loss: 0.01808151633789142
train_loss: 0.006039677673707838 val_loss: 0.018684030510485172
train_loss: 0.004668105574176256 val_loss: 0.019147346934510603
 fold  2 
train_loss: 0.04668865747668821 val_loss: 0.019011763752334647
train_loss: 0.01798967663468658 val_loss: 0.01782354112300608
train_loss: 0.016562136571746374 val_loss: 0.017704059680302937
train_loss: 0.015091097792205603 val_loss: 0.01736797361324231
train_loss: 0.013366774190217257 val_loss: 0.01726991119277146
train_loss: 0.0114961562111325

[I 2020-09-26 06:31:10,826] Trial 13 finished with value: 0.01723725027922127 and parameters: {'num_layer': 3, 'hidden_size': 3548, 'dropout': 0.23406821664145047, 'learning_rate': 0.00019710567763290364}. Best is trial 9 with value: 0.016764250118285418.


 fold  1 
train_loss: 0.2392981930271439 val_loss: 0.06974356952640745
train_loss: 0.04670461240238038 val_loss: 0.03345166105363104
train_loss: 0.028836241491354893 val_loss: 0.025387560638288658
train_loss: 0.023551473879943722 val_loss: 0.022245855381091435
train_loss: 0.021173234813023304 val_loss: 0.020673017534944747
train_loss: 0.019780814202259415 val_loss: 0.019969121139082644
train_loss: 0.01881348934240531 val_loss: 0.01894149153182904
train_loss: 0.018123443724344605 val_loss: 0.018556712091796927
train_loss: 0.017510909452170566 val_loss: 0.018258322237266436
train_loss: 0.016963921602059534 val_loss: 0.017932950105104182
 fold  2 
train_loss: 0.2383767082963301 val_loss: 0.07052961240212123
train_loss: 0.04661883764724801 val_loss: 0.03379653187261687
train_loss: 0.02867980139411014 val_loss: 0.025696646215187177
train_loss: 0.023481524873362934 val_loss: 0.022561362530622218
train_loss: 0.02113289971822414 val_loss: 0.02095648729138904
train_loss: 0.019718730336298115 va

[I 2020-09-26 06:33:25,293] Trial 14 finished with value: 0.018142098871370155 and parameters: {'num_layer': 2, 'hidden_size': 4038, 'dropout': 0.10770604563853593, 'learning_rate': 1.993304030365116e-05}. Best is trial 9 with value: 0.016764250118285418.


 fold  1 
train_loss: 0.03363849580341923 val_loss: 0.01868396707706981
train_loss: 0.018061741086505892 val_loss: 0.017704335920926597
train_loss: 0.01729258114768975 val_loss: 0.017338577140536573
train_loss: 0.016537667441087357 val_loss: 0.016973094083368778
train_loss: 0.01591484758840955 val_loss: 0.016973312478512526
train_loss: 0.015233160212528015 val_loss: 0.017051603852046862
train_loss: 0.014755471211358688 val_loss: 0.017114581695447367
train_loss: 0.01413895965864261 val_loss: 0.017032019845727418
train_loss: 0.01363834096253782 val_loss: 0.017672182785140142
train_loss: 0.013123964519658382 val_loss: 0.017202204196817346
 fold  2 
train_loss: 0.034106590852573296 val_loss: 0.018979036973582372
train_loss: 0.018105636863712814 val_loss: 0.01771507727810078
train_loss: 0.017212521170090506 val_loss: 0.01809958792808983
train_loss: 0.01671092696757852 val_loss: 0.017814649165504508
train_loss: 0.016070322133600712 val_loss: 0.0174249450986584
train_loss: 0.01545095599620886

[I 2020-09-26 06:36:45,333] Trial 15 finished with value: 0.01704975486629539 and parameters: {'num_layer': 4, 'hidden_size': 3914, 'dropout': 0.22633017344380432, 'learning_rate': 0.0004001925820350279}. Best is trial 9 with value: 0.016764250118285418.


 fold  1 
train_loss: 0.5856528204420338 val_loss: 0.4657350894477632
train_loss: 0.37716285042140796 val_loss: 0.30139420098728603
train_loss: 0.24826741715272269 val_loss: 0.2019280335969395
train_loss: 0.16986286337824835 val_loss: 0.14157222625282076
train_loss: 0.1221928118687609 val_loss: 0.10468049595753352
train_loss: 0.09244336317414822 val_loss: 0.08096822392609385
train_loss: 0.07292198109022086 val_loss: 0.06525559764769343
train_loss: 0.06009867325749086 val_loss: 0.0542386161784331
train_loss: 0.051051011929909386 val_loss: 0.046749494348963104
train_loss: 0.04446731858711312 val_loss: 0.04131027455959055
 fold  2 
train_loss: 0.5942910477734994 val_loss: 0.4737155950731701
train_loss: 0.3834346401086752 val_loss: 0.30645838379859924
train_loss: 0.2512385795513789 val_loss: 0.20481947892242008
train_loss: 0.1709410108733868 val_loss: 0.14299633850653967
train_loss: 0.12243706711392471 val_loss: 0.1055089785820908
train_loss: 0.09228028707962105 val_loss: 0.081618120272954

[I 2020-09-26 06:38:48,571] Trial 16 finished with value: 0.041426479195555053 and parameters: {'num_layer': 2, 'hidden_size': 423, 'dropout': 0.18690111600519338, 'learning_rate': 2.678986599916121e-05}. Best is trial 9 with value: 0.016764250118285418.


 fold  1 
train_loss: 0.04333103114766055 val_loss: 0.01917903259810474
train_loss: 0.018054589750650135 val_loss: 0.018129721180432372
train_loss: 0.016449657228329907 val_loss: 0.01735695746416847
train_loss: 0.015207872739520626 val_loss: 0.017150089455147583
train_loss: 0.014099098276346922 val_loss: 0.017242641602125432
train_loss: 0.013208319129341322 val_loss: 0.017023402731865644
train_loss: 0.012389272113965042 val_loss: 0.017214753665030003
train_loss: 0.01163320648956342 val_loss: 0.01731832677291499
train_loss: 0.010947373283999985 val_loss: 0.01693940002264248
train_loss: 0.010595767380858677 val_loss: 0.016962301244752273
 fold  2 
train_loss: 0.04358168546974227 val_loss: 0.01950059396525224
train_loss: 0.018078128442816113 val_loss: 0.018264322852094967
train_loss: 0.016440208245446716 val_loss: 0.017857529533406098
train_loss: 0.015178256998837425 val_loss: 0.01752246507546968
train_loss: 0.014144606604848219 val_loss: 0.017267107342680294
train_loss: 0.013267787038416

[I 2020-09-26 06:40:42,284] Trial 17 finished with value: 0.01691754798715313 and parameters: {'num_layer': 1, 'hidden_size': 1990, 'dropout': 0.30236857647578136, 'learning_rate': 0.0004529221755793316}. Best is trial 9 with value: 0.016764250118285418.


 fold  1 
train_loss: 0.04480706496785084 val_loss: 0.01965240202844143
train_loss: 0.018951686267889498 val_loss: 0.018580824240214296
train_loss: 0.017851890361719372 val_loss: 0.01774555134276549
train_loss: 0.01723722595235576 val_loss: 0.017622331985168986
train_loss: 0.016857454155072355 val_loss: 0.017353243608441617
train_loss: 0.016451478348639997 val_loss: 0.017123380882872477
train_loss: 0.01602077712237403 val_loss: 0.017353584420763783
train_loss: 0.015656640908370417 val_loss: 0.01718384638014767
train_loss: 0.015278107790357393 val_loss: 0.017141662538051605
train_loss: 0.014787716075670029 val_loss: 0.017187018775277667
 fold  2 
train_loss: 0.04436535760760307 val_loss: 0.019786341529753473
train_loss: 0.01886998097398791 val_loss: 0.018504344547788303
train_loss: 0.01784546678021982 val_loss: 0.01807844576736291
train_loss: 0.017158294278804376 val_loss: 0.018108613478640716
train_loss: 0.016781424313945616 val_loss: 0.017725515593257215
train_loss: 0.0162953116485606

[I 2020-09-26 06:43:14,867] Trial 18 finished with value: 0.017234986130562093 and parameters: {'num_layer': 5, 'hidden_size': 1787, 'dropout': 0.316428024102369, 'learning_rate': 0.00043538971847463726}. Best is trial 9 with value: 0.016764250118285418.


 fold  1 
train_loss: 0.02978992919721033 val_loss: 0.02009709044877026
train_loss: 0.019737743887294462 val_loss: 0.019398805271420214
train_loss: 0.018983170687072518 val_loss: 0.01851717082576619
train_loss: 0.01863463698764858 val_loss: 0.01818245856298341
train_loss: 0.018496409706447437 val_loss: 0.018281709868460894
train_loss: 0.01837314075479905 val_loss: 0.018558475499351818
train_loss: 0.018412175365602194 val_loss: 0.017958281096071005
train_loss: 0.018441458941315828 val_loss: 0.018352523342602782
train_loss: 0.01846834774925441 val_loss: 0.01837344912605153
train_loss: 0.01839180171246762 val_loss: 0.018632179747025173
 fold  2 
train_loss: 0.029388575517720936 val_loss: 0.020360642733673256
train_loss: 0.01947236443073421 val_loss: 0.01900698451532258
train_loss: 0.018816998464635748 val_loss: 0.01906106931467851
train_loss: 0.018716178024592606 val_loss: 0.018713584169745445
train_loss: 0.018366377152826473 val_loss: 0.01864211613105403
train_loss: 0.01831304626809298 v

[I 2020-09-26 06:45:28,621] Trial 19 finished with value: 0.018270036826531093 and parameters: {'num_layer': 3, 'hidden_size': 1419, 'dropout': 0.4523398838430158, 'learning_rate': 0.00401867937543168}. Best is trial 9 with value: 0.016764250118285418.


best_trial:
FrozenTrial(number=9, value=0.016764250118285418, datetime_start=datetime.datetime(2020, 9, 26, 6, 20, 33, 402214), datetime_complete=datetime.datetime(2020, 9, 26, 6, 22, 47, 319401), params={'num_layer': 3, 'hidden_size': 1894, 'dropout': 0.15739205763019304, 'learning_rate': 0.0011353568729844193}, distributions={'num_layer': IntUniformDistribution(high=8, low=1, step=1), 'hidden_size': IntUniformDistribution(high=4096, low=16, step=1), 'dropout': UniformDistribution(high=0.7, low=0.1), 'learning_rate': LogUniformDistribution(high=0.01, low=1e-06)}, user_attrs={}, system_attrs={}, intermediate_values={}, trial_id=9, state=TrialState.COMPLETE)
