In [1]:
import pandas as pd 
import numpy as np 
from scipy.stats import entropy
from sklearn.model_selection import GroupKFold
import matplotlib.pyplot as plt

from engine_hms_trainer import *
from engine_hms_model import CustomModel, JobConfig, ModelConfig

import torch
from torch import nn
import torch.nn.functional as F

  _torch_pytree._register_pytree_node(


In [2]:
seed_everything(JobConfig.SEED)

ModelConfig.EPOCHS = 6
ModelConfig.USE_EEG_SPECTROGRAMS = False
ModelConfig.MODEL_BACKBONE = 'tf_efficientnet_b0'
ModelConfig.MODEL_NAME = "ENet_b0_two_stages"

hms_predictor = HMSPredictor(JobConfig, ModelConfig)

****************************************************************************************************
Script Start: Sat Mar  9 11:07:46 2024
Initializing HMS Predictor...
Model Name: ENet_b0_two_stages
Drop Rate: 0.15
Drop Path Rate: 0.25
Augment: False
Enropy Split: 5.5
Device: cuda
Output Dir: ./outputs/
****************************************************************************************************


In [3]:
train_easy, train_hard, all_specs, all_eegs = hms_predictor.load_train_data()

print(train_easy.shape)
print(train_hard.shape)

# check if contain NaN
print(train_easy.isnull().sum().sum())
print(train_hard.isnull().sum().sum())

display(train_easy.head())
print(" ")
display(train_hard.head())

(11999, 14)
(5090, 14)
0
0


Unnamed: 0,eeg_id,spectrogram_id,min,max,patient_id,target,total_votes,entropy,seizure_vote,lpd_vote,gpd_vote,lrda_vote,grda_vote,other_vote
0,642382,14960202,1008.0,1032.0,5955,Other,2,7.802343,0.0,0.0,0.0,0.0,0.0,1.0
1,751790,618728447,908.0,908.0,38549,GPD,1,7.802343,0.0,0.0,1.0,0.0,0.0,0.0
2,778705,52296320,0.0,0.0,40955,Other,2,7.68682,0.0,0.0,0.0,0.0,0.0,1.0
3,1629671,2036345030,0.0,160.0,37481,Seizure,51,7.619243,1.0,0.0,0.0,0.0,0.0,0.0
4,2061593,320962633,1450.0,1450.0,23828,Other,1,7.802343,0.0,0.0,0.0,0.0,0.0,1.0


 


Unnamed: 0,eeg_id,spectrogram_id,min,max,patient_id,target,total_votes,entropy,seizure_vote,lpd_vote,gpd_vote,lrda_vote,grda_vote,other_vote
0,568657,789577333,0.0,16.0,20654,Other,48,3.341757,0.0,0.0,0.25,0.0,0.166667,0.583333
1,582999,1552638400,0.0,38.0,20230,LPD,154,3.550549,0.0,0.857143,0.0,0.071429,0.0,0.071429
2,1895581,128369999,1138.0,1138.0,47999,Other,13,3.565051,0.076923,0.0,0.0,0.0,0.076923,0.846154
3,2482631,978166025,1902.0,1944.0,20606,Other,105,1.431066,0.0,0.0,0.133333,0.066667,0.133333,0.666667
4,2521897,673742515,0.0,4.0,62117,Other,24,1.516203,0.0,0.0,0.083333,0.083333,0.333333,0.5


In [4]:
# Use only half data for fast debugging
# train_easy = train_easy[:len(train_easy)//2]
# train_hard = train_hard[:len(train_hard)//2]

hms_predictor.train_folds(train_easy, train_hard, all_specs, all_eegs)

Fold: 0 First Training


Train:   0%|          | 0/599 [00:00<?, ?batch/s]

Epoch: [1][0/599]Elapsed 1.10s | Loss: 0.8628 Grad: 60302.8008 LR: 4.0000e-06
Epoch: [1][50/599]Elapsed 4.58s | Loss: 0.8277 Grad: 81543.1094 LR: 5.1479e-06
Epoch: [1][100/599]Elapsed 7.99s | Loss: 0.8201 Grad: 69052.3359 LR: 8.5368e-06
Epoch: [1][150/599]Elapsed 11.40s | Loss: 0.8151 Grad: 59384.4102 LR: 1.4005e-05
Epoch: [1][200/599]Elapsed 14.80s | Loss: 0.8104 Grad: 80533.5703 LR: 2.1290e-05
Epoch: [1][250/599]Elapsed 18.21s | Loss: 0.8008 Grad: 71337.6875 LR: 3.0044e-05
Epoch: [1][300/599]Elapsed 21.63s | Loss: 0.7874 Grad: 80855.9688 LR: 3.9848e-05
Epoch: [1][350/599]Elapsed 25.04s | Loss: 0.7734 Grad: 65898.6953 LR: 5.0233e-05
Epoch: [1][400/599]Elapsed 28.45s | Loss: 0.7565 Grad: 70670.7422 LR: 6.0703e-05
Epoch: [1][450/599]Elapsed 31.87s | Loss: 0.7395 Grad: 116367.9531 LR: 7.0757e-05
Epoch: [1][500/599]Elapsed 35.29s | Loss: 0.7241 Grad: 100187.2031 LR: 7.9913e-05
Epoch: [1][550/599]Elapsed 38.72s | Loss: 0.7072 Grad: 117778.1328 LR: 8.7735e-05
Epoch: [1][598/599]Elapsed 42.0

Valid:   0%|          | 0/150 [00:00<?, ?batch/s]

Epoch: [1][0/150]Elapsed 0.08s | Loss: 0.6171
Epoch: [1][50/150]Elapsed 2.57s | Loss: 0.5308
Epoch: [1][100/150]Elapsed 5.04s | Loss: 0.5385


----------------------------------------------------------------------------------------------------
Epoch 1 - Average Train Loss: 0.6922 | Average Valid Loss: 0.5349 | Time: 49.63s
Best model found in epoch 1 | valid loss: 0.5349


Train:   0%|          | 0/599 [00:00<?, ?batch/s]

Epoch: [2][0/599]Elapsed 0.06s | Loss: 0.4631 Grad: 148207.6875 LR: 9.3639e-05
Epoch: [2][50/599]Elapsed 3.50s | Loss: 0.4639 Grad: 143588.1250 LR: 9.7834e-05
Epoch: [2][100/599]Elapsed 6.88s | Loss: 0.4493 Grad: 136521.0312 LR: 9.9837e-05
Epoch: [2][150/599]Elapsed 10.34s | Loss: 0.4547 Grad: 118721.1172 LR: 9.9994e-05
Epoch: [2][200/599]Elapsed 13.71s | Loss: 0.4486 Grad: 72160.8672 LR: 9.9961e-05
Epoch: [2][250/599]Elapsed 17.06s | Loss: 0.4409 Grad: 127047.6328 LR: 9.9899e-05
Epoch: [2][300/599]Elapsed 20.50s | Loss: 0.4375 Grad: 73681.8438 LR: 9.9807e-05
Epoch: [2][350/599]Elapsed 23.94s | Loss: 0.4342 Grad: 62093.4258 LR: 9.9685e-05
Epoch: [2][400/599]Elapsed 27.38s | Loss: 0.4278 Grad: 47319.7148 LR: 9.9535e-05
Epoch: [2][450/599]Elapsed 30.80s | Loss: 0.4231 Grad: 89525.0156 LR: 9.9355e-05
Epoch: [2][500/599]Elapsed 34.16s | Loss: 0.4200 Grad: 94009.1953 LR: 9.9146e-05
Epoch: [2][550/599]Elapsed 37.60s | Loss: 0.4159 Grad: 76588.6797 LR: 9.8908e-05
Epoch: [2][598/599]Elapsed 40

Valid:   0%|          | 0/150 [00:00<?, ?batch/s]

Epoch: [2][0/150]Elapsed 0.06s | Loss: 0.5607
Epoch: [2][50/150]Elapsed 2.56s | Loss: 0.4951
Epoch: [2][100/150]Elapsed 5.04s | Loss: 0.5025


----------------------------------------------------------------------------------------------------
Epoch 2 - Average Train Loss: 0.4131 | Average Valid Loss: 0.5034 | Time: 48.44s
Best model found in epoch 2 | valid loss: 0.5034


Train:   0%|          | 0/599 [00:00<?, ?batch/s]

Epoch: [3][0/599]Elapsed 0.07s | Loss: 0.3111 Grad: 116638.8828 LR: 9.8653e-05
Epoch: [3][50/599]Elapsed 3.50s | Loss: 0.3270 Grad: 74738.3438 LR: 9.8359e-05
Epoch: [3][100/599]Elapsed 6.94s | Loss: 0.3228 Grad: 67815.6250 LR: 9.8036e-05
Epoch: [3][150/599]Elapsed 10.39s | Loss: 0.3366 Grad: 81282.9766 LR: 9.7685e-05
Epoch: [3][200/599]Elapsed 13.83s | Loss: 0.3335 Grad: 53059.0273 LR: 9.7306e-05
Epoch: [3][250/599]Elapsed 17.27s | Loss: 0.3307 Grad: 73283.8828 LR: 9.6899e-05
Epoch: [3][300/599]Elapsed 20.71s | Loss: 0.3338 Grad: 62155.9180 LR: 9.6464e-05
Epoch: [3][350/599]Elapsed 24.15s | Loss: 0.3324 Grad: 85081.2031 LR: 9.6002e-05
Epoch: [3][400/599]Elapsed 27.59s | Loss: 0.3290 Grad: 89898.9766 LR: 9.5513e-05
Epoch: [3][450/599]Elapsed 31.04s | Loss: 0.3252 Grad: 102364.0625 LR: 9.4997e-05
Epoch: [3][500/599]Elapsed 34.48s | Loss: 0.3246 Grad: 95788.0469 LR: 9.4455e-05
Epoch: [3][550/599]Elapsed 37.92s | Loss: 0.3218 Grad: 73146.0703 LR: 9.3886e-05
Epoch: [3][598/599]Elapsed 41.23

Valid:   0%|          | 0/150 [00:00<?, ?batch/s]

Epoch: [3][0/150]Elapsed 0.05s | Loss: 0.5566
Epoch: [3][50/150]Elapsed 2.53s | Loss: 0.4930
Epoch: [3][100/150]Elapsed 5.01s | Loss: 0.4982


----------------------------------------------------------------------------------------------------
Epoch 3 - Average Train Loss: 0.3199 | Average Valid Loss: 0.5024 | Time: 48.81s
Best model found in epoch 3 | valid loss: 0.5024


Train:   0%|          | 0/599 [00:00<?, ?batch/s]

Epoch: [4][0/599]Elapsed 0.06s | Loss: 0.2834 Grad: 137497.0312 LR: 9.3316e-05
Epoch: [4][50/599]Elapsed 3.44s | Loss: 0.2683 Grad: 72664.6172 LR: 9.2697e-05
Epoch: [4][100/599]Elapsed 6.81s | Loss: 0.2623 Grad: 54847.7148 LR: 9.2053e-05
Epoch: [4][150/599]Elapsed 10.19s | Loss: 0.2727 Grad: 66864.1719 LR: 9.1384e-05
Epoch: [4][200/599]Elapsed 13.64s | Loss: 0.2682 Grad: 50364.9453 LR: 9.0691e-05
Epoch: [4][250/599]Elapsed 17.10s | Loss: 0.2669 Grad: 52476.3164 LR: 8.9973e-05
Epoch: [4][300/599]Elapsed 20.47s | Loss: 0.2697 Grad: 71041.9219 LR: 8.9233e-05
Epoch: [4][350/599]Elapsed 23.84s | Loss: 0.2677 Grad: 77401.2500 LR: 8.8469e-05
Epoch: [4][400/599]Elapsed 27.21s | Loss: 0.2660 Grad: 48209.2305 LR: 8.7682e-05
Epoch: [4][450/599]Elapsed 30.56s | Loss: 0.2635 Grad: 104782.0703 LR: 8.6873e-05
Epoch: [4][500/599]Elapsed 34.00s | Loss: 0.2628 Grad: 90160.7266 LR: 8.6043e-05
Epoch: [4][550/599]Elapsed 37.46s | Loss: 0.2609 Grad: 84262.8750 LR: 8.5191e-05
Epoch: [4][598/599]Elapsed 40.70

Valid:   0%|          | 0/150 [00:00<?, ?batch/s]

Epoch: [4][0/150]Elapsed 0.06s | Loss: 0.5594
Epoch: [4][50/150]Elapsed 2.59s | Loss: 0.5056
Epoch: [4][100/150]Elapsed 5.10s | Loss: 0.5038


----------------------------------------------------------------------------------------------------
Epoch 4 - Average Train Loss: 0.2601 | Average Valid Loss: 0.5096 | Time: 48.39s


Train:   0%|          | 0/599 [00:00<?, ?batch/s]

Epoch: [5][0/599]Elapsed 0.06s | Loss: 0.2323 Grad: 155720.5000 LR: 8.4354e-05
Epoch: [5][50/599]Elapsed 3.54s | Loss: 0.2219 Grad: 94450.0781 LR: 8.3462e-05
Epoch: [5][100/599]Elapsed 7.00s | Loss: 0.2124 Grad: 72929.2109 LR: 8.2550e-05
Epoch: [5][150/599]Elapsed 10.46s | Loss: 0.2188 Grad: 77399.7031 LR: 8.1619e-05
Epoch: [5][200/599]Elapsed 13.84s | Loss: 0.2166 Grad: 69826.6172 LR: 8.0670e-05
Epoch: [5][250/599]Elapsed 17.30s | Loss: 0.2146 Grad: 81023.2734 LR: 7.9702e-05
Epoch: [5][300/599]Elapsed 20.76s | Loss: 0.2176 Grad: 54411.5586 LR: 7.8717e-05
Epoch: [5][350/599]Elapsed 24.22s | Loss: 0.2175 Grad: 69284.2734 LR: 7.7715e-05
Epoch: [5][400/599]Elapsed 27.69s | Loss: 0.2161 Grad: 40715.9648 LR: 7.6697e-05
Epoch: [5][450/599]Elapsed 31.15s | Loss: 0.2132 Grad: 68231.2188 LR: 7.5663e-05
Epoch: [5][500/599]Elapsed 34.62s | Loss: 0.2131 Grad: 52436.6758 LR: 7.4614e-05
Epoch: [5][550/599]Elapsed 38.02s | Loss: 0.2123 Grad: 45532.7891 LR: 7.3550e-05
Epoch: [5][598/599]Elapsed 41.37s

Valid:   0%|          | 0/150 [00:00<?, ?batch/s]

Epoch: [5][0/150]Elapsed 0.06s | Loss: 0.6966
Epoch: [5][50/150]Elapsed 2.58s | Loss: 0.5430
Epoch: [5][100/150]Elapsed 5.06s | Loss: 0.5427


----------------------------------------------------------------------------------------------------
Epoch 5 - Average Train Loss: 0.2117 | Average Valid Loss: 0.5472 | Time: 49.01s


Train:   0%|          | 0/599 [00:00<?, ?batch/s]

Epoch: [6][0/599]Elapsed 0.06s | Loss: 0.1748 Grad: 128618.4531 LR: 7.2516e-05
Epoch: [6][50/599]Elapsed 3.44s | Loss: 0.1799 Grad: 82027.0625 LR: 7.1426e-05
Epoch: [6][100/599]Elapsed 6.81s | Loss: 0.1749 Grad: 74936.6875 LR: 7.0323e-05
Epoch: [6][150/599]Elapsed 10.19s | Loss: 0.1817 Grad: 34901.1055 LR: 6.9208e-05
Epoch: [6][200/599]Elapsed 13.62s | Loss: 0.1790 Grad: 29790.0840 LR: 6.8082e-05
Epoch: [6][250/599]Elapsed 17.10s | Loss: 0.1775 Grad: 41810.1875 LR: 6.6945e-05
Epoch: [6][300/599]Elapsed 20.53s | Loss: 0.1806 Grad: 46831.0117 LR: 6.5799e-05
Epoch: [6][350/599]Elapsed 23.91s | Loss: 0.1811 Grad: 26301.0000 LR: 6.4642e-05
Epoch: [6][400/599]Elapsed 27.36s | Loss: 0.1792 Grad: 50107.2148 LR: 6.3478e-05
Epoch: [6][450/599]Elapsed 30.84s | Loss: 0.1781 Grad: 45988.8164 LR: 6.2305e-05
Epoch: [6][500/599]Elapsed 34.30s | Loss: 0.1783 Grad: 38112.3438 LR: 6.1125e-05
Epoch: [6][550/599]Elapsed 37.69s | Loss: 0.1771 Grad: 51881.6133 LR: 5.9939e-05
Epoch: [6][598/599]Elapsed 41.06s

Valid:   0%|          | 0/150 [00:00<?, ?batch/s]

Epoch: [6][0/150]Elapsed 0.06s | Loss: 0.7317
Epoch: [6][50/150]Elapsed 2.56s | Loss: 0.5476
Epoch: [6][100/150]Elapsed 5.06s | Loss: 0.5499


----------------------------------------------------------------------------------------------------
Epoch 6 - Average Train Loss: 0.1767 | Average Valid Loss: 0.5549 | Time: 48.71s
Fold: 1 First Training


Train:   0%|          | 0/599 [00:00<?, ?batch/s]

Epoch: [1][0/599]Elapsed 0.06s | Loss: 0.8231 Grad: 71729.5781 LR: 4.0000e-06
Epoch: [1][50/599]Elapsed 3.45s | Loss: 0.8244 Grad: 65756.4766 LR: 5.1479e-06
Epoch: [1][100/599]Elapsed 6.91s | Loss: 0.8135 Grad: 71433.5078 LR: 8.5368e-06
Epoch: [1][150/599]Elapsed 10.36s | Loss: 0.8095 Grad: 61726.9062 LR: 1.4005e-05
Epoch: [1][200/599]Elapsed 13.83s | Loss: 0.8046 Grad: 61011.7070 LR: 2.1290e-05
Epoch: [1][250/599]Elapsed 17.31s | Loss: 0.7964 Grad: 89332.9141 LR: 3.0044e-05
Epoch: [1][300/599]Elapsed 20.78s | Loss: 0.7825 Grad: 77029.7500 LR: 3.9848e-05
Epoch: [1][350/599]Elapsed 24.20s | Loss: 0.7697 Grad: 100012.2500 LR: 5.0233e-05
Epoch: [1][400/599]Elapsed 27.67s | Loss: 0.7518 Grad: 67541.1094 LR: 6.0703e-05
Epoch: [1][450/599]Elapsed 31.10s | Loss: 0.7332 Grad: 113584.6953 LR: 7.0757e-05
Epoch: [1][500/599]Elapsed 34.55s | Loss: 0.7171 Grad: 84446.5234 LR: 7.9913e-05
Epoch: [1][550/599]Elapsed 37.93s | Loss: 0.7007 Grad: 101492.2266 LR: 8.7735e-05
Epoch: [1][598/599]Elapsed 41.1

Valid:   0%|          | 0/150 [00:00<?, ?batch/s]

Epoch: [1][0/150]Elapsed 0.05s | Loss: 0.4355
Epoch: [1][50/150]Elapsed 2.54s | Loss: 0.5368
Epoch: [1][100/150]Elapsed 5.02s | Loss: 0.5427


----------------------------------------------------------------------------------------------------
Epoch 1 - Average Train Loss: 0.6852 | Average Valid Loss: 0.5505 | Time: 48.76s
Best model found in epoch 1 | valid loss: 0.5505


Train:   0%|          | 0/599 [00:00<?, ?batch/s]

Epoch: [2][0/599]Elapsed 0.06s | Loss: 0.5847 Grad: 162017.4062 LR: 9.3639e-05
Epoch: [2][50/599]Elapsed 3.44s | Loss: 0.4710 Grad: 106606.1562 LR: 9.7834e-05
Epoch: [2][100/599]Elapsed 6.89s | Loss: 0.4545 Grad: 118239.7656 LR: 9.9837e-05
Epoch: [2][150/599]Elapsed 10.35s | Loss: 0.4537 Grad: 61907.9883 LR: 9.9994e-05
Epoch: [2][200/599]Elapsed 13.80s | Loss: 0.4482 Grad: 53194.7266 LR: 9.9961e-05
Epoch: [2][250/599]Elapsed 17.27s | Loss: 0.4396 Grad: 27687.4004 LR: 9.9899e-05
Epoch: [2][300/599]Elapsed 20.73s | Loss: 0.4347 Grad: 48821.3555 LR: 9.9807e-05
Epoch: [2][350/599]Elapsed 24.19s | Loss: 0.4326 Grad: 30896.7520 LR: 9.9685e-05
Epoch: [2][400/599]Elapsed 27.59s | Loss: 0.4249 Grad: 24306.6914 LR: 9.9535e-05
Epoch: [2][450/599]Elapsed 30.98s | Loss: 0.4187 Grad: 36817.2188 LR: 9.9355e-05
Epoch: [2][500/599]Elapsed 34.39s | Loss: 0.4145 Grad: 31095.1445 LR: 9.9146e-05
Epoch: [2][550/599]Elapsed 37.83s | Loss: 0.4105 Grad: 34140.0391 LR: 9.8908e-05
Epoch: [2][598/599]Elapsed 41.1

Valid:   0%|          | 0/150 [00:00<?, ?batch/s]

Epoch: [2][0/150]Elapsed 0.06s | Loss: 0.4418
Epoch: [2][50/150]Elapsed 2.55s | Loss: 0.4550
Epoch: [2][100/150]Elapsed 5.03s | Loss: 0.4619


----------------------------------------------------------------------------------------------------
Epoch 2 - Average Train Loss: 0.4076 | Average Valid Loss: 0.4711 | Time: 48.75s
Best model found in epoch 2 | valid loss: 0.4711


Train:   0%|          | 0/599 [00:00<?, ?batch/s]

Epoch: [3][0/599]Elapsed 0.06s | Loss: 0.3876 Grad: 104232.2422 LR: 9.8653e-05
Epoch: [3][50/599]Elapsed 3.50s | Loss: 0.3268 Grad: 61136.9844 LR: 9.8359e-05
Epoch: [3][100/599]Elapsed 6.95s | Loss: 0.3268 Grad: 59031.0820 LR: 9.8036e-05
Epoch: [3][150/599]Elapsed 10.41s | Loss: 0.3330 Grad: 64196.6250 LR: 9.7685e-05
Epoch: [3][200/599]Elapsed 13.88s | Loss: 0.3302 Grad: 71451.6328 LR: 9.7306e-05
Epoch: [3][250/599]Elapsed 17.27s | Loss: 0.3291 Grad: 62776.3281 LR: 9.6899e-05
Epoch: [3][300/599]Elapsed 20.67s | Loss: 0.3281 Grad: 65250.1992 LR: 9.6464e-05
Epoch: [3][350/599]Elapsed 24.14s | Loss: 0.3263 Grad: 106637.9688 LR: 9.6002e-05
Epoch: [3][400/599]Elapsed 27.60s | Loss: 0.3208 Grad: 32409.3164 LR: 9.5513e-05
Epoch: [3][450/599]Elapsed 31.06s | Loss: 0.3174 Grad: 113350.1562 LR: 9.4997e-05
Epoch: [3][500/599]Elapsed 34.60s | Loss: 0.3158 Grad: 59303.1836 LR: 9.4455e-05
Epoch: [3][550/599]Elapsed 38.01s | Loss: 0.3137 Grad: 55921.3320 LR: 9.3886e-05
Epoch: [3][598/599]Elapsed 41.3

Valid:   0%|          | 0/150 [00:00<?, ?batch/s]

Epoch: [3][0/150]Elapsed 0.06s | Loss: 0.4443
Epoch: [3][50/150]Elapsed 2.55s | Loss: 0.4656
Epoch: [3][100/150]Elapsed 5.04s | Loss: 0.4730


----------------------------------------------------------------------------------------------------
Epoch 3 - Average Train Loss: 0.3136 | Average Valid Loss: 0.4819 | Time: 48.96s


Train:   0%|          | 0/599 [00:00<?, ?batch/s]

Epoch: [4][0/599]Elapsed 0.06s | Loss: 0.3310 Grad: 161223.6875 LR: 9.3316e-05
Epoch: [4][50/599]Elapsed 3.50s | Loss: 0.2668 Grad: 63833.6289 LR: 9.2697e-05
Epoch: [4][100/599]Elapsed 6.95s | Loss: 0.2697 Grad: 70727.1875 LR: 9.2053e-05
Epoch: [4][150/599]Elapsed 10.41s | Loss: 0.2749 Grad: 76609.3594 LR: 9.1384e-05
Epoch: [4][200/599]Elapsed 13.89s | Loss: 0.2713 Grad: 61431.0586 LR: 9.0691e-05
Epoch: [4][250/599]Elapsed 17.36s | Loss: 0.2704 Grad: 65080.1367 LR: 8.9973e-05
Epoch: [4][300/599]Elapsed 20.79s | Loss: 0.2713 Grad: 91866.4375 LR: 8.9233e-05
Epoch: [4][350/599]Elapsed 24.26s | Loss: 0.2682 Grad: 109556.1328 LR: 8.8469e-05
Epoch: [4][400/599]Elapsed 27.69s | Loss: 0.2636 Grad: 47894.2930 LR: 8.7682e-05
Epoch: [4][450/599]Elapsed 31.07s | Loss: 0.2607 Grad: 63557.8555 LR: 8.6873e-05
Epoch: [4][500/599]Elapsed 34.51s | Loss: 0.2598 Grad: 73016.4375 LR: 8.6043e-05
Epoch: [4][550/599]Elapsed 37.95s | Loss: 0.2581 Grad: 70956.6484 LR: 8.5191e-05
Epoch: [4][598/599]Elapsed 41.18

Valid:   0%|          | 0/150 [00:00<?, ?batch/s]

Epoch: [4][0/150]Elapsed 0.05s | Loss: 0.4672
Epoch: [4][50/150]Elapsed 2.54s | Loss: 0.4767
Epoch: [4][100/150]Elapsed 5.03s | Loss: 0.4819


----------------------------------------------------------------------------------------------------
Epoch 4 - Average Train Loss: 0.2579 | Average Valid Loss: 0.4943 | Time: 48.80s


Train:   0%|          | 0/599 [00:00<?, ?batch/s]

Epoch: [5][0/599]Elapsed 0.06s | Loss: 0.3122 Grad: 165440.2812 LR: 8.4354e-05
Epoch: [5][50/599]Elapsed 3.50s | Loss: 0.2183 Grad: 86809.3281 LR: 8.3462e-05
Epoch: [5][100/599]Elapsed 6.88s | Loss: 0.2192 Grad: 75993.0703 LR: 8.2550e-05
Epoch: [5][150/599]Elapsed 10.27s | Loss: 0.2256 Grad: 99115.7578 LR: 8.1619e-05
Epoch: [5][200/599]Elapsed 13.74s | Loss: 0.2237 Grad: 75436.1641 LR: 8.0670e-05
Epoch: [5][250/599]Elapsed 17.17s | Loss: 0.2224 Grad: 70326.8594 LR: 7.9702e-05
Epoch: [5][300/599]Elapsed 20.58s | Loss: 0.2228 Grad: 88847.4844 LR: 7.8717e-05
Epoch: [5][350/599]Elapsed 24.05s | Loss: 0.2201 Grad: 71143.0469 LR: 7.7715e-05
Epoch: [5][400/599]Elapsed 27.49s | Loss: 0.2158 Grad: 69995.5078 LR: 7.6697e-05
Epoch: [5][450/599]Elapsed 30.96s | Loss: 0.2127 Grad: 77181.7578 LR: 7.5663e-05
Epoch: [5][500/599]Elapsed 34.38s | Loss: 0.2118 Grad: 52999.3828 LR: 7.4614e-05
Epoch: [5][550/599]Elapsed 37.85s | Loss: 0.2106 Grad: 74316.6641 LR: 7.3550e-05
Epoch: [5][598/599]Elapsed 41.12s

Valid:   0%|          | 0/150 [00:00<?, ?batch/s]

Epoch: [5][0/150]Elapsed 0.06s | Loss: 0.4485
Epoch: [5][50/150]Elapsed 2.55s | Loss: 0.5090
Epoch: [5][100/150]Elapsed 5.04s | Loss: 0.5136


----------------------------------------------------------------------------------------------------
Epoch 5 - Average Train Loss: 0.2101 | Average Valid Loss: 0.5282 | Time: 48.75s


Train:   0%|          | 0/599 [00:00<?, ?batch/s]

Epoch: [6][0/599]Elapsed 0.08s | Loss: 0.2886 Grad: 201498.6562 LR: 7.2516e-05
Epoch: [6][50/599]Elapsed 3.49s | Loss: 0.1770 Grad: 63330.0273 LR: 7.1426e-05
Epoch: [6][100/599]Elapsed 6.85s | Loss: 0.1769 Grad: 59994.6289 LR: 7.0323e-05
Epoch: [6][150/599]Elapsed 10.31s | Loss: 0.1788 Grad: 99308.5469 LR: 6.9208e-05
Epoch: [6][200/599]Elapsed 13.74s | Loss: 0.1787 Grad: 78098.6641 LR: 6.8082e-05
Epoch: [6][250/599]Elapsed 17.22s | Loss: 0.1795 Grad: 53675.1211 LR: 6.6945e-05
Epoch: [6][300/599]Elapsed 20.66s | Loss: 0.1809 Grad: 114952.3750 LR: 6.5799e-05
Epoch: [6][350/599]Elapsed 24.13s | Loss: 0.1787 Grad: 53669.7969 LR: 6.4642e-05
Epoch: [6][400/599]Elapsed 27.55s | Loss: 0.1760 Grad: 85777.3047 LR: 6.3478e-05
Epoch: [6][450/599]Elapsed 31.01s | Loss: 0.1737 Grad: 81064.0781 LR: 6.2305e-05
Epoch: [6][500/599]Elapsed 34.42s | Loss: 0.1731 Grad: 90414.8594 LR: 6.1125e-05
Epoch: [6][550/599]Elapsed 37.81s | Loss: 0.1734 Grad: 50849.0508 LR: 5.9939e-05
Epoch: [6][598/599]Elapsed 41.07

Valid:   0%|          | 0/150 [00:00<?, ?batch/s]

Epoch: [6][0/150]Elapsed 0.06s | Loss: 0.4523
Epoch: [6][50/150]Elapsed 2.56s | Loss: 0.5057
Epoch: [6][100/150]Elapsed 5.05s | Loss: 0.5141


----------------------------------------------------------------------------------------------------
Epoch 6 - Average Train Loss: 0.1736 | Average Valid Loss: 0.5291 | Time: 48.71s
Fold: 2 First Training


Train:   0%|          | 0/599 [00:00<?, ?batch/s]

Epoch: [1][0/599]Elapsed 0.07s | Loss: 0.8055 Grad: 86716.1016 LR: 4.0000e-06
Epoch: [1][50/599]Elapsed 3.50s | Loss: 0.8231 Grad: 65793.7578 LR: 5.1479e-06
Epoch: [1][100/599]Elapsed 6.83s | Loss: 0.8175 Grad: 61464.6641 LR: 8.5368e-06
Epoch: [1][150/599]Elapsed 10.23s | Loss: 0.8123 Grad: 64466.4062 LR: 1.4005e-05
Epoch: [1][200/599]Elapsed 13.68s | Loss: 0.8073 Grad: 71612.8828 LR: 2.1290e-05
Epoch: [1][250/599]Elapsed 17.15s | Loss: 0.7979 Grad: 81671.4297 LR: 3.0044e-05
Epoch: [1][300/599]Elapsed 20.61s | Loss: 0.7845 Grad: 68699.7656 LR: 3.9848e-05
Epoch: [1][350/599]Elapsed 24.06s | Loss: 0.7707 Grad: 109776.6719 LR: 5.0233e-05
Epoch: [1][400/599]Elapsed 27.52s | Loss: 0.7550 Grad: 41561.9102 LR: 6.0703e-05
Epoch: [1][450/599]Elapsed 31.00s | Loss: 0.7362 Grad: 65276.9648 LR: 7.0757e-05
Epoch: [1][500/599]Elapsed 34.46s | Loss: 0.7200 Grad: 65139.7734 LR: 7.9913e-05
Epoch: [1][550/599]Elapsed 37.91s | Loss: 0.7032 Grad: 55938.5312 LR: 8.7735e-05
Epoch: [1][598/599]Elapsed 41.15s

Valid:   0%|          | 0/150 [00:00<?, ?batch/s]

Epoch: [1][0/150]Elapsed 0.05s | Loss: 0.3997
Epoch: [1][50/150]Elapsed 2.54s | Loss: 0.5327
Epoch: [1][100/150]Elapsed 5.02s | Loss: 0.5268


----------------------------------------------------------------------------------------------------
Epoch 1 - Average Train Loss: 0.6881 | Average Valid Loss: 0.5238 | Time: 48.74s
Best model found in epoch 1 | valid loss: 0.5238


Train:   0%|          | 0/599 [00:00<?, ?batch/s]

Epoch: [2][0/599]Elapsed 0.06s | Loss: 0.4557 Grad: 111054.4375 LR: 9.3639e-05
Epoch: [2][50/599]Elapsed 3.51s | Loss: 0.4686 Grad: 123132.7969 LR: 9.7834e-05
Epoch: [2][100/599]Elapsed 6.96s | Loss: 0.4502 Grad: 107818.8594 LR: 9.9837e-05
Epoch: [2][150/599]Elapsed 10.41s | Loss: 0.4523 Grad: 116565.8828 LR: 9.9994e-05
Epoch: [2][200/599]Elapsed 13.86s | Loss: 0.4476 Grad: 114765.3906 LR: 9.9961e-05
Epoch: [2][250/599]Elapsed 17.33s | Loss: 0.4398 Grad: 55494.9219 LR: 9.9899e-05
Epoch: [2][300/599]Elapsed 20.80s | Loss: 0.4351 Grad: 61288.2852 LR: 9.9807e-05
Epoch: [2][350/599]Elapsed 24.28s | Loss: 0.4295 Grad: 43668.2969 LR: 9.9685e-05
Epoch: [2][400/599]Elapsed 27.75s | Loss: 0.4244 Grad: 59019.6484 LR: 9.9535e-05
Epoch: [2][450/599]Elapsed 31.22s | Loss: 0.4191 Grad: 67268.6094 LR: 9.9355e-05
Epoch: [2][500/599]Elapsed 34.69s | Loss: 0.4168 Grad: 68756.4297 LR: 9.9146e-05
Epoch: [2][550/599]Elapsed 38.16s | Loss: 0.4135 Grad: 77588.9609 LR: 9.8908e-05
Epoch: [2][598/599]Elapsed 41

Valid:   0%|          | 0/150 [00:00<?, ?batch/s]

Epoch: [2][0/150]Elapsed 0.05s | Loss: 0.2506
Epoch: [2][50/150]Elapsed 2.54s | Loss: 0.4615
Epoch: [2][100/150]Elapsed 5.02s | Loss: 0.4576


----------------------------------------------------------------------------------------------------
Epoch 2 - Average Train Loss: 0.4104 | Average Valid Loss: 0.4548 | Time: 49.07s
Best model found in epoch 2 | valid loss: 0.4548


Train:   0%|          | 0/599 [00:00<?, ?batch/s]

Epoch: [3][0/599]Elapsed 0.06s | Loss: 0.2758 Grad: 112054.5312 LR: 9.8653e-05
Epoch: [3][50/599]Elapsed 3.51s | Loss: 0.3305 Grad: 133637.2031 LR: 9.8359e-05
Epoch: [3][100/599]Elapsed 6.94s | Loss: 0.3246 Grad: 40816.5195 LR: 9.8036e-05
Epoch: [3][150/599]Elapsed 10.39s | Loss: 0.3322 Grad: 63861.1523 LR: 9.7685e-05
Epoch: [3][200/599]Elapsed 13.83s | Loss: 0.3317 Grad: 67193.9688 LR: 9.7306e-05
Epoch: [3][250/599]Elapsed 17.28s | Loss: 0.3248 Grad: 57472.2188 LR: 9.6899e-05
Epoch: [3][300/599]Elapsed 20.74s | Loss: 0.3241 Grad: 68177.0469 LR: 9.6464e-05
Epoch: [3][350/599]Elapsed 24.20s | Loss: 0.3236 Grad: 48435.2969 LR: 9.6002e-05
Epoch: [3][400/599]Elapsed 27.67s | Loss: 0.3216 Grad: 71791.2656 LR: 9.5513e-05
Epoch: [3][450/599]Elapsed 31.13s | Loss: 0.3179 Grad: 104306.8438 LR: 9.4997e-05
Epoch: [3][500/599]Elapsed 34.58s | Loss: 0.3183 Grad: 76967.9453 LR: 9.4455e-05
Epoch: [3][550/599]Elapsed 38.04s | Loss: 0.3176 Grad: 79349.7109 LR: 9.3886e-05
Epoch: [3][598/599]Elapsed 41.3

Valid:   0%|          | 0/150 [00:00<?, ?batch/s]

Epoch: [3][0/150]Elapsed 0.05s | Loss: 0.2499
Epoch: [3][50/150]Elapsed 2.54s | Loss: 0.4755
Epoch: [3][100/150]Elapsed 5.04s | Loss: 0.4691


----------------------------------------------------------------------------------------------------
Epoch 3 - Average Train Loss: 0.3162 | Average Valid Loss: 0.4666 | Time: 48.97s


Train:   0%|          | 0/599 [00:00<?, ?batch/s]

Epoch: [4][0/599]Elapsed 0.06s | Loss: 0.2658 Grad: 129012.1875 LR: 9.3316e-05
Epoch: [4][50/599]Elapsed 3.51s | Loss: 0.2749 Grad: 62137.3438 LR: 9.2697e-05
Epoch: [4][100/599]Elapsed 6.96s | Loss: 0.2682 Grad: 44319.1172 LR: 9.2053e-05
Epoch: [4][150/599]Elapsed 10.34s | Loss: 0.2767 Grad: 60545.6367 LR: 9.1384e-05
Epoch: [4][200/599]Elapsed 13.73s | Loss: 0.2743 Grad: 96600.5312 LR: 9.0691e-05
Epoch: [4][250/599]Elapsed 17.16s | Loss: 0.2693 Grad: 64188.1094 LR: 8.9973e-05
Epoch: [4][300/599]Elapsed 20.63s | Loss: 0.2691 Grad: 87928.8516 LR: 8.9233e-05
Epoch: [4][350/599]Elapsed 24.06s | Loss: 0.2661 Grad: 42501.2812 LR: 8.8469e-05
Epoch: [4][400/599]Elapsed 27.53s | Loss: 0.2629 Grad: 75488.5312 LR: 8.7682e-05
Epoch: [4][450/599]Elapsed 30.97s | Loss: 0.2602 Grad: 78783.1172 LR: 8.6873e-05
Epoch: [4][500/599]Elapsed 34.43s | Loss: 0.2600 Grad: 54669.2578 LR: 8.6043e-05
Epoch: [4][550/599]Elapsed 37.93s | Loss: 0.2593 Grad: 78903.0000 LR: 8.5191e-05
Epoch: [4][598/599]Elapsed 41.27s

Valid:   0%|          | 0/150 [00:00<?, ?batch/s]

Epoch: [4][0/150]Elapsed 0.06s | Loss: 0.2572
Epoch: [4][50/150]Elapsed 2.58s | Loss: 0.4606
Epoch: [4][100/150]Elapsed 5.10s | Loss: 0.4611


----------------------------------------------------------------------------------------------------
Epoch 4 - Average Train Loss: 0.2586 | Average Valid Loss: 0.4595 | Time: 48.97s


Train:   0%|          | 0/599 [00:00<?, ?batch/s]

Epoch: [5][0/599]Elapsed 0.06s | Loss: 0.2245 Grad: 179550.3438 LR: 8.4354e-05
Epoch: [5][50/599]Elapsed 3.50s | Loss: 0.2306 Grad: 98690.7031 LR: 8.3462e-05
Epoch: [5][100/599]Elapsed 6.97s | Loss: 0.2226 Grad: 51038.5273 LR: 8.2550e-05
Epoch: [5][150/599]Elapsed 10.45s | Loss: 0.2276 Grad: 84670.3438 LR: 8.1619e-05
Epoch: [5][200/599]Elapsed 13.94s | Loss: 0.2255 Grad: 84955.9766 LR: 8.0670e-05
Epoch: [5][250/599]Elapsed 17.42s | Loss: 0.2204 Grad: 59205.4219 LR: 7.9702e-05
Epoch: [5][300/599]Elapsed 20.91s | Loss: 0.2203 Grad: 114424.0469 LR: 7.8717e-05
Epoch: [5][350/599]Elapsed 24.37s | Loss: 0.2182 Grad: 49738.5312 LR: 7.7715e-05
Epoch: [5][400/599]Elapsed 27.77s | Loss: 0.2150 Grad: 59609.2422 LR: 7.6697e-05
Epoch: [5][450/599]Elapsed 31.16s | Loss: 0.2127 Grad: 73940.5234 LR: 7.5663e-05
Epoch: [5][500/599]Elapsed 34.63s | Loss: 0.2125 Grad: 59024.0625 LR: 7.4614e-05
Epoch: [5][550/599]Elapsed 38.08s | Loss: 0.2118 Grad: 101753.6797 LR: 7.3550e-05
Epoch: [5][598/599]Elapsed 41.4

Valid:   0%|          | 0/150 [00:00<?, ?batch/s]

Epoch: [5][0/150]Elapsed 0.05s | Loss: 0.2815
Epoch: [5][50/150]Elapsed 2.54s | Loss: 0.4804
Epoch: [5][100/150]Elapsed 5.01s | Loss: 0.4792


----------------------------------------------------------------------------------------------------
Epoch 5 - Average Train Loss: 0.2114 | Average Valid Loss: 0.4783 | Time: 48.99s


Train:   0%|          | 0/599 [00:00<?, ?batch/s]

Epoch: [6][0/599]Elapsed 0.06s | Loss: 0.1360 Grad: 153872.5156 LR: 7.2516e-05
Epoch: [6][50/599]Elapsed 3.53s | Loss: 0.1903 Grad: 59533.5586 LR: 7.1426e-05
Epoch: [6][100/599]Elapsed 6.97s | Loss: 0.1804 Grad: 35689.2461 LR: 7.0323e-05
Epoch: [6][150/599]Elapsed 10.42s | Loss: 0.1846 Grad: 85895.6953 LR: 6.9208e-05
Epoch: [6][200/599]Elapsed 13.87s | Loss: 0.1830 Grad: 71629.3750 LR: 6.8082e-05
Epoch: [6][250/599]Elapsed 17.32s | Loss: 0.1801 Grad: 73921.4531 LR: 6.6945e-05
Epoch: [6][300/599]Elapsed 20.79s | Loss: 0.1805 Grad: 74843.6016 LR: 6.5799e-05
Epoch: [6][350/599]Elapsed 24.21s | Loss: 0.1802 Grad: 47959.8711 LR: 6.4642e-05
Epoch: [6][400/599]Elapsed 27.61s | Loss: 0.1771 Grad: 80449.6797 LR: 6.3478e-05
Epoch: [6][450/599]Elapsed 30.98s | Loss: 0.1750 Grad: 188752.7188 LR: 6.2305e-05
Epoch: [6][500/599]Elapsed 34.43s | Loss: 0.1748 Grad: 61497.1875 LR: 6.1125e-05
Epoch: [6][550/599]Elapsed 37.88s | Loss: 0.1745 Grad: 101418.8359 LR: 5.9939e-05
Epoch: [6][598/599]Elapsed 41.2

Valid:   0%|          | 0/150 [00:00<?, ?batch/s]

Epoch: [6][0/150]Elapsed 0.05s | Loss: 0.2244
Epoch: [6][50/150]Elapsed 2.53s | Loss: 0.5102
Epoch: [6][100/150]Elapsed 5.01s | Loss: 0.5068


----------------------------------------------------------------------------------------------------
Epoch 6 - Average Train Loss: 0.1741 | Average Valid Loss: 0.5046 | Time: 48.77s
Fold: 3 First Training


Train:   0%|          | 0/599 [00:00<?, ?batch/s]

Epoch: [1][0/599]Elapsed 0.06s | Loss: 0.8271 Grad: 73150.2188 LR: 4.0000e-06
Epoch: [1][50/599]Elapsed 3.50s | Loss: 0.8282 Grad: 75182.1328 LR: 5.1479e-06
Epoch: [1][100/599]Elapsed 6.93s | Loss: 0.8240 Grad: 66904.7500 LR: 8.5368e-06
Epoch: [1][150/599]Elapsed 10.37s | Loss: 0.8195 Grad: 66749.3203 LR: 1.4005e-05
Epoch: [1][200/599]Elapsed 13.81s | Loss: 0.8132 Grad: 69089.9531 LR: 2.1290e-05
Epoch: [1][250/599]Elapsed 17.26s | Loss: 0.8046 Grad: 111079.8516 LR: 3.0044e-05
Epoch: [1][300/599]Elapsed 20.73s | Loss: 0.7918 Grad: 83852.1484 LR: 3.9848e-05
Epoch: [1][350/599]Elapsed 24.19s | Loss: 0.7778 Grad: 92853.5391 LR: 5.0233e-05
Epoch: [1][400/599]Elapsed 27.64s | Loss: 0.7616 Grad: 88480.1250 LR: 6.0703e-05
Epoch: [1][450/599]Elapsed 31.03s | Loss: 0.7434 Grad: 126165.2891 LR: 7.0757e-05
Epoch: [1][500/599]Elapsed 34.45s | Loss: 0.7277 Grad: 153298.0625 LR: 7.9913e-05
Epoch: [1][550/599]Elapsed 37.90s | Loss: 0.7106 Grad: 112588.4219 LR: 8.7735e-05
Epoch: [1][598/599]Elapsed 41.

Valid:   0%|          | 0/150 [00:00<?, ?batch/s]

Epoch: [1][0/150]Elapsed 0.05s | Loss: 0.4231
Epoch: [1][50/150]Elapsed 2.53s | Loss: 0.4940
Epoch: [1][100/150]Elapsed 5.00s | Loss: 0.4951


----------------------------------------------------------------------------------------------------
Epoch 1 - Average Train Loss: 0.6962 | Average Valid Loss: 0.4952 | Time: 48.78s
Best model found in epoch 1 | valid loss: 0.4952


Train:   0%|          | 0/599 [00:00<?, ?batch/s]

Epoch: [2][0/599]Elapsed 0.06s | Loss: 0.5529 Grad: 153390.3594 LR: 9.3639e-05
Epoch: [2][50/599]Elapsed 3.52s | Loss: 0.4849 Grad: 148355.6562 LR: 9.7834e-05
Epoch: [2][100/599]Elapsed 6.94s | Loss: 0.4755 Grad: 119541.0078 LR: 9.9837e-05
Epoch: [2][150/599]Elapsed 10.38s | Loss: 0.4783 Grad: 232835.0469 LR: 9.9994e-05
Epoch: [2][200/599]Elapsed 13.85s | Loss: 0.4709 Grad: 71464.9688 LR: 9.9961e-05
Epoch: [2][250/599]Elapsed 17.25s | Loss: 0.4634 Grad: 74044.3438 LR: 9.9899e-05
Epoch: [2][300/599]Elapsed 20.70s | Loss: 0.4576 Grad: 72093.5000 LR: 9.9807e-05
Epoch: [2][350/599]Elapsed 24.14s | Loss: 0.4527 Grad: 57449.5312 LR: 9.9685e-05
Epoch: [2][400/599]Elapsed 27.55s | Loss: 0.4446 Grad: 52475.4844 LR: 9.9535e-05
Epoch: [2][450/599]Elapsed 31.01s | Loss: 0.4380 Grad: 65502.8320 LR: 9.9355e-05
Epoch: [2][500/599]Elapsed 34.49s | Loss: 0.4342 Grad: 94868.7656 LR: 9.9146e-05
Epoch: [2][550/599]Elapsed 37.96s | Loss: 0.4293 Grad: 52978.2539 LR: 9.8908e-05
Epoch: [2][598/599]Elapsed 41.

Valid:   0%|          | 0/150 [00:00<?, ?batch/s]

Epoch: [2][0/150]Elapsed 0.05s | Loss: 0.2868
Epoch: [2][50/150]Elapsed 2.53s | Loss: 0.4083
Epoch: [2][100/150]Elapsed 5.01s | Loss: 0.4117


----------------------------------------------------------------------------------------------------
Epoch 2 - Average Train Loss: 0.4259 | Average Valid Loss: 0.4157 | Time: 48.85s
Best model found in epoch 2 | valid loss: 0.4157


Train:   0%|          | 0/599 [00:00<?, ?batch/s]

Epoch: [3][0/599]Elapsed 0.06s | Loss: 0.3650 Grad: 136304.6250 LR: 9.8653e-05
Epoch: [3][50/599]Elapsed 3.41s | Loss: 0.3489 Grad: 81012.3359 LR: 9.8359e-05
Epoch: [3][100/599]Elapsed 6.74s | Loss: 0.3431 Grad: 64947.9688 LR: 9.8036e-05
Epoch: [3][150/599]Elapsed 10.08s | Loss: 0.3536 Grad: 75946.2656 LR: 9.7685e-05
Epoch: [3][200/599]Elapsed 13.46s | Loss: 0.3513 Grad: 75735.4922 LR: 9.7306e-05
Epoch: [3][250/599]Elapsed 16.85s | Loss: 0.3456 Grad: 68862.5391 LR: 9.6899e-05
Epoch: [3][300/599]Elapsed 20.24s | Loss: 0.3438 Grad: 71854.1641 LR: 9.6464e-05
Epoch: [3][350/599]Elapsed 23.62s | Loss: 0.3411 Grad: 60954.2422 LR: 9.6002e-05
Epoch: [3][400/599]Elapsed 27.00s | Loss: 0.3362 Grad: 76673.0547 LR: 9.5513e-05
Epoch: [3][450/599]Elapsed 30.39s | Loss: 0.3330 Grad: 89048.4844 LR: 9.4997e-05
Epoch: [3][500/599]Elapsed 33.76s | Loss: 0.3317 Grad: 103758.7031 LR: 9.4455e-05
Epoch: [3][550/599]Elapsed 37.13s | Loss: 0.3298 Grad: 70727.3359 LR: 9.3886e-05
Epoch: [3][598/599]Elapsed 40.46

Valid:   0%|          | 0/150 [00:00<?, ?batch/s]

Epoch: [3][0/150]Elapsed 0.05s | Loss: 0.2428
Epoch: [3][50/150]Elapsed 2.53s | Loss: 0.3907
Epoch: [3][100/150]Elapsed 5.01s | Loss: 0.3946


----------------------------------------------------------------------------------------------------
Epoch 3 - Average Train Loss: 0.3286 | Average Valid Loss: 0.4008 | Time: 48.04s
Best model found in epoch 3 | valid loss: 0.4008


Train:   0%|          | 0/599 [00:00<?, ?batch/s]

Epoch: [4][0/599]Elapsed 0.06s | Loss: 0.2559 Grad: 154094.7188 LR: 9.3316e-05
Epoch: [4][50/599]Elapsed 3.51s | Loss: 0.2910 Grad: 101874.1875 LR: 9.2697e-05
Epoch: [4][100/599]Elapsed 6.98s | Loss: 0.2850 Grad: 64227.0352 LR: 9.2053e-05
Epoch: [4][150/599]Elapsed 10.44s | Loss: 0.2960 Grad: 83048.5859 LR: 9.1384e-05
Epoch: [4][200/599]Elapsed 13.80s | Loss: 0.2951 Grad: 86518.4141 LR: 9.0691e-05
Epoch: [4][250/599]Elapsed 17.18s | Loss: 0.2889 Grad: 65500.6406 LR: 8.9973e-05
Epoch: [4][300/599]Elapsed 20.57s | Loss: 0.2875 Grad: 83085.8359 LR: 8.9233e-05
Epoch: [4][350/599]Elapsed 24.02s | Loss: 0.2840 Grad: 63419.2070 LR: 8.8469e-05
Epoch: [4][400/599]Elapsed 27.50s | Loss: 0.2781 Grad: 76497.4219 LR: 8.7682e-05
Epoch: [4][450/599]Elapsed 30.91s | Loss: 0.2739 Grad: 118229.1016 LR: 8.6873e-05
Epoch: [4][500/599]Elapsed 34.36s | Loss: 0.2728 Grad: 111213.6797 LR: 8.6043e-05
Epoch: [4][550/599]Elapsed 37.82s | Loss: 0.2711 Grad: 64045.8438 LR: 8.5191e-05
Epoch: [4][598/599]Elapsed 41.

Valid:   0%|          | 0/150 [00:00<?, ?batch/s]

Epoch: [4][0/150]Elapsed 0.05s | Loss: 0.2335
Epoch: [4][50/150]Elapsed 2.53s | Loss: 0.3959
Epoch: [4][100/150]Elapsed 5.01s | Loss: 0.4039


----------------------------------------------------------------------------------------------------
Epoch 4 - Average Train Loss: 0.2698 | Average Valid Loss: 0.4106 | Time: 48.73s


Train:   0%|          | 0/599 [00:00<?, ?batch/s]

Epoch: [5][0/599]Elapsed 0.06s | Loss: 0.1654 Grad: 158756.5781 LR: 8.4354e-05
Epoch: [5][50/599]Elapsed 3.49s | Loss: 0.2313 Grad: 101457.1172 LR: 8.3462e-05
Epoch: [5][100/599]Elapsed 6.91s | Loss: 0.2312 Grad: 77091.4297 LR: 8.2550e-05
Epoch: [5][150/599]Elapsed 10.27s | Loss: 0.2400 Grad: 86728.6719 LR: 8.1619e-05
Epoch: [5][200/599]Elapsed 13.60s | Loss: 0.2406 Grad: 79686.6719 LR: 8.0670e-05
Epoch: [5][250/599]Elapsed 16.96s | Loss: 0.2354 Grad: 84472.7031 LR: 7.9702e-05
Epoch: [5][300/599]Elapsed 20.34s | Loss: 0.2338 Grad: 67101.9062 LR: 7.8717e-05
Epoch: [5][350/599]Elapsed 23.71s | Loss: 0.2332 Grad: 45131.5234 LR: 7.7715e-05
Epoch: [5][400/599]Elapsed 27.08s | Loss: 0.2291 Grad: 77721.5547 LR: 7.6697e-05
Epoch: [5][450/599]Elapsed 30.52s | Loss: 0.2259 Grad: 75193.9766 LR: 7.5663e-05
Epoch: [5][500/599]Elapsed 33.97s | Loss: 0.2250 Grad: 109453.1016 LR: 7.4614e-05
Epoch: [5][550/599]Elapsed 37.42s | Loss: 0.2240 Grad: 83442.8750 LR: 7.3550e-05
Epoch: [5][598/599]Elapsed 40.6

Valid:   0%|          | 0/150 [00:00<?, ?batch/s]

Epoch: [5][0/150]Elapsed 0.05s | Loss: 0.2242
Epoch: [5][50/150]Elapsed 2.53s | Loss: 0.4081
Epoch: [5][100/150]Elapsed 5.01s | Loss: 0.4136


----------------------------------------------------------------------------------------------------
Epoch 5 - Average Train Loss: 0.2226 | Average Valid Loss: 0.4212 | Time: 48.28s


Train:   0%|          | 0/599 [00:00<?, ?batch/s]

Epoch: [6][0/599]Elapsed 0.07s | Loss: 0.1262 Grad: 195827.1406 LR: 7.2516e-05
Epoch: [6][50/599]Elapsed 3.46s | Loss: 0.1957 Grad: 97791.5469 LR: 7.1426e-05
Epoch: [6][100/599]Elapsed 6.86s | Loss: 0.1855 Grad: 80384.5625 LR: 7.0323e-05
Epoch: [6][150/599]Elapsed 10.28s | Loss: 0.1971 Grad: 116602.7266 LR: 6.9208e-05
Epoch: [6][200/599]Elapsed 13.67s | Loss: 0.1962 Grad: 90452.0078 LR: 6.8082e-05
Epoch: [6][250/599]Elapsed 17.08s | Loss: 0.1911 Grad: 107975.9062 LR: 6.6945e-05
Epoch: [6][300/599]Elapsed 20.56s | Loss: 0.1906 Grad: 61394.0898 LR: 6.5799e-05
Epoch: [6][350/599]Elapsed 24.03s | Loss: 0.1885 Grad: 52935.5078 LR: 6.4642e-05
Epoch: [6][400/599]Elapsed 27.46s | Loss: 0.1848 Grad: 62266.8516 LR: 6.3478e-05
Epoch: [6][450/599]Elapsed 30.92s | Loss: 0.1838 Grad: 90115.9062 LR: 6.2305e-05
Epoch: [6][500/599]Elapsed 34.39s | Loss: 0.1823 Grad: 76084.7578 LR: 6.1125e-05
Epoch: [6][550/599]Elapsed 37.88s | Loss: 0.1810 Grad: 67428.2109 LR: 5.9939e-05
Epoch: [6][598/599]Elapsed 41.1

Valid:   0%|          | 0/150 [00:00<?, ?batch/s]

Epoch: [6][0/150]Elapsed 0.06s | Loss: 0.2278
Epoch: [6][50/150]Elapsed 2.54s | Loss: 0.4277
Epoch: [6][100/150]Elapsed 5.03s | Loss: 0.4361


----------------------------------------------------------------------------------------------------
Epoch 6 - Average Train Loss: 0.1806 | Average Valid Loss: 0.4449 | Time: 48.75s
Fold: 4 First Training


Train:   0%|          | 0/600 [00:00<?, ?batch/s]

Epoch: [1][0/600]Elapsed 0.06s | Loss: 0.8106 Grad: 72527.2891 LR: 4.0000e-06
Epoch: [1][50/600]Elapsed 3.46s | Loss: 0.8121 Grad: 70309.7734 LR: 5.1441e-06
Epoch: [1][100/600]Elapsed 6.88s | Loss: 0.8072 Grad: 76239.2500 LR: 8.5219e-06
Epoch: [1][150/600]Elapsed 10.37s | Loss: 0.8015 Grad: 58559.8320 LR: 1.3972e-05
Epoch: [1][200/600]Elapsed 13.75s | Loss: 0.7965 Grad: 64884.7109 LR: 2.1236e-05
Epoch: [1][250/600]Elapsed 17.16s | Loss: 0.7876 Grad: 73454.1484 LR: 2.9966e-05
Epoch: [1][300/600]Elapsed 20.53s | Loss: 0.7759 Grad: 84563.8906 LR: 3.9746e-05
Epoch: [1][350/600]Elapsed 23.88s | Loss: 0.7620 Grad: 132643.2031 LR: 5.0110e-05
Epoch: [1][400/600]Elapsed 27.25s | Loss: 0.7460 Grad: 121330.9609 LR: 6.0565e-05
Epoch: [1][450/600]Elapsed 30.70s | Loss: 0.7277 Grad: 107031.1641 LR: 7.0611e-05
Epoch: [1][500/600]Elapsed 34.16s | Loss: 0.7145 Grad: 132470.4219 LR: 7.9770e-05
Epoch: [1][550/600]Elapsed 37.60s | Loss: 0.6981 Grad: 82687.5547 LR: 8.7605e-05
Epoch: [1][599/600]Elapsed 41.

Valid:   0%|          | 0/150 [00:00<?, ?batch/s]

Epoch: [1][0/150]Elapsed 0.06s | Loss: 0.5025
Epoch: [1][50/150]Elapsed 2.56s | Loss: 0.4824
Epoch: [1][100/150]Elapsed 5.05s | Loss: 0.4972


----------------------------------------------------------------------------------------------------
Epoch 1 - Average Train Loss: 0.6841 | Average Valid Loss: 0.5019 | Time: 48.67s
Best model found in epoch 1 | valid loss: 0.5019


Train:   0%|          | 0/600 [00:00<?, ?batch/s]

Epoch: [2][0/600]Elapsed 0.06s | Loss: 0.5030 Grad: 102587.4062 LR: 9.3743e-05
Epoch: [2][50/600]Elapsed 3.55s | Loss: 0.4845 Grad: 98887.5469 LR: 9.7891e-05
Epoch: [2][100/600]Elapsed 7.01s | Loss: 0.4745 Grad: 106822.7500 LR: 9.9851e-05
Epoch: [2][150/600]Elapsed 10.49s | Loss: 0.4774 Grad: 188618.7969 LR: 9.9994e-05
Epoch: [2][200/600]Elapsed 13.98s | Loss: 0.4712 Grad: 67284.0156 LR: 9.9961e-05
Epoch: [2][250/600]Elapsed 17.38s | Loss: 0.4619 Grad: 49747.7734 LR: 9.9898e-05
Epoch: [2][300/600]Elapsed 20.81s | Loss: 0.4562 Grad: 81661.5469 LR: 9.9806e-05
Epoch: [2][350/600]Elapsed 24.24s | Loss: 0.4519 Grad: 97439.6250 LR: 9.9684e-05
Epoch: [2][400/600]Elapsed 27.68s | Loss: 0.4442 Grad: 70048.1406 LR: 9.9534e-05
Epoch: [2][450/600]Elapsed 31.15s | Loss: 0.4360 Grad: 135055.3906 LR: 9.9354e-05
Epoch: [2][500/600]Elapsed 34.64s | Loss: 0.4320 Grad: 74260.7344 LR: 9.9145e-05
Epoch: [2][550/600]Elapsed 38.04s | Loss: 0.4261 Grad: 50331.1562 LR: 9.8908e-05
Epoch: [2][599/600]Elapsed 41.

Valid:   0%|          | 0/150 [00:00<?, ?batch/s]

Epoch: [2][0/150]Elapsed 0.06s | Loss: 0.3513
Epoch: [2][50/150]Elapsed 2.56s | Loss: 0.3692
Epoch: [2][100/150]Elapsed 5.07s | Loss: 0.3869


----------------------------------------------------------------------------------------------------
Epoch 2 - Average Train Loss: 0.4225 | Average Valid Loss: 0.3945 | Time: 49.09s
Best model found in epoch 2 | valid loss: 0.3945


Train:   0%|          | 0/600 [00:00<?, ?batch/s]

Epoch: [3][0/600]Elapsed 0.06s | Loss: 0.3927 Grad: 117686.9062 LR: 9.8642e-05
Epoch: [3][50/600]Elapsed 3.56s | Loss: 0.3458 Grad: 103830.0859 LR: 9.8347e-05
Epoch: [3][100/600]Elapsed 7.09s | Loss: 0.3455 Grad: 122273.9531 LR: 9.8024e-05
Epoch: [3][150/600]Elapsed 10.57s | Loss: 0.3500 Grad: 60162.7461 LR: 9.7672e-05
Epoch: [3][200/600]Elapsed 13.99s | Loss: 0.3480 Grad: 105345.5469 LR: 9.7293e-05
Epoch: [3][250/600]Elapsed 17.49s | Loss: 0.3441 Grad: 56619.5312 LR: 9.6886e-05
Epoch: [3][300/600]Elapsed 20.96s | Loss: 0.3422 Grad: 54429.4023 LR: 9.6451e-05
Epoch: [3][350/600]Elapsed 24.39s | Loss: 0.3410 Grad: 92264.0625 LR: 9.5989e-05
Epoch: [3][400/600]Elapsed 27.80s | Loss: 0.3374 Grad: 54930.2422 LR: 9.5500e-05
Epoch: [3][450/600]Elapsed 31.27s | Loss: 0.3322 Grad: 82542.1406 LR: 9.4984e-05
Epoch: [3][500/600]Elapsed 34.70s | Loss: 0.3317 Grad: 72138.2109 LR: 9.4442e-05
Epoch: [3][550/600]Elapsed 38.16s | Loss: 0.3288 Grad: 47383.2344 LR: 9.3874e-05
Epoch: [3][599/600]Elapsed 41.

Valid:   0%|          | 0/150 [00:00<?, ?batch/s]

Epoch: [3][0/150]Elapsed 0.05s | Loss: 0.3706
Epoch: [3][50/150]Elapsed 2.56s | Loss: 0.3648
Epoch: [3][100/150]Elapsed 5.05s | Loss: 0.3811


----------------------------------------------------------------------------------------------------
Epoch 3 - Average Train Loss: 0.3269 | Average Valid Loss: 0.3865 | Time: 49.19s
Best model found in epoch 3 | valid loss: 0.3865


Train:   0%|          | 0/600 [00:00<?, ?batch/s]

Epoch: [4][0/600]Elapsed 0.08s | Loss: 0.3551 Grad: 114665.2109 LR: 9.3280e-05
Epoch: [4][50/600]Elapsed 3.47s | Loss: 0.2818 Grad: 117887.8047 LR: 9.2660e-05
Epoch: [4][100/600]Elapsed 6.89s | Loss: 0.2881 Grad: 88394.4609 LR: 9.2016e-05
Epoch: [4][150/600]Elapsed 10.39s | Loss: 0.2944 Grad: 76411.5391 LR: 9.1347e-05
Epoch: [4][200/600]Elapsed 13.88s | Loss: 0.2936 Grad: 95598.7422 LR: 9.0653e-05
Epoch: [4][250/600]Elapsed 17.36s | Loss: 0.2892 Grad: 52543.9609 LR: 8.9936e-05
Epoch: [4][300/600]Elapsed 20.84s | Loss: 0.2870 Grad: 72424.5156 LR: 8.9195e-05
Epoch: [4][350/600]Elapsed 24.32s | Loss: 0.2845 Grad: 73128.8281 LR: 8.8431e-05
Epoch: [4][400/600]Elapsed 27.87s | Loss: 0.2800 Grad: 74204.3672 LR: 8.7645e-05
Epoch: [4][450/600]Elapsed 31.40s | Loss: 0.2756 Grad: 128197.0000 LR: 8.6836e-05
Epoch: [4][500/600]Elapsed 34.88s | Loss: 0.2749 Grad: 33122.9414 LR: 8.6006e-05
Epoch: [4][550/600]Elapsed 38.35s | Loss: 0.2711 Grad: 34600.3320 LR: 8.5155e-05
Epoch: [4][599/600]Elapsed 41.7

Valid:   0%|          | 0/150 [00:00<?, ?batch/s]

Epoch: [4][0/150]Elapsed 0.05s | Loss: 0.3720
Epoch: [4][50/150]Elapsed 2.54s | Loss: 0.3742
Epoch: [4][100/150]Elapsed 5.02s | Loss: 0.3905


----------------------------------------------------------------------------------------------------
Epoch 4 - Average Train Loss: 0.2707 | Average Valid Loss: 0.3947 | Time: 49.34s


Train:   0%|          | 0/600 [00:00<?, ?batch/s]

Epoch: [5][0/600]Elapsed 0.08s | Loss: 0.3098 Grad: 138784.5312 LR: 8.4283e-05
Epoch: [5][50/600]Elapsed 3.46s | Loss: 0.2352 Grad: 71723.0000 LR: 8.3391e-05
Epoch: [5][100/600]Elapsed 6.91s | Loss: 0.2386 Grad: 77463.2656 LR: 8.2479e-05
Epoch: [5][150/600]Elapsed 10.37s | Loss: 0.2429 Grad: 101408.0859 LR: 8.1549e-05
Epoch: [5][200/600]Elapsed 13.73s | Loss: 0.2385 Grad: 137473.5781 LR: 8.0599e-05
Epoch: [5][250/600]Elapsed 17.10s | Loss: 0.2367 Grad: 44225.3516 LR: 7.9632e-05
Epoch: [5][300/600]Elapsed 20.55s | Loss: 0.2353 Grad: 75808.6562 LR: 7.8648e-05
Epoch: [5][350/600]Elapsed 24.01s | Loss: 0.2342 Grad: 68770.3438 LR: 7.7646e-05
Epoch: [5][400/600]Elapsed 27.47s | Loss: 0.2305 Grad: 89368.0078 LR: 7.6629e-05
Epoch: [5][450/600]Elapsed 30.93s | Loss: 0.2261 Grad: 66862.2812 LR: 7.5595e-05
Epoch: [5][500/600]Elapsed 34.39s | Loss: 0.2258 Grad: 66946.5781 LR: 7.4547e-05
Epoch: [5][550/600]Elapsed 37.84s | Loss: 0.2236 Grad: 100575.7578 LR: 7.3484e-05
Epoch: [5][599/600]Elapsed 41.

Valid:   0%|          | 0/150 [00:00<?, ?batch/s]

Epoch: [5][0/150]Elapsed 0.05s | Loss: 0.2940
Epoch: [5][50/150]Elapsed 2.55s | Loss: 0.3901
Epoch: [5][100/150]Elapsed 5.03s | Loss: 0.4031


----------------------------------------------------------------------------------------------------
Epoch 5 - Average Train Loss: 0.2225 | Average Valid Loss: 0.4100 | Time: 48.85s


Train:   0%|          | 0/600 [00:00<?, ?batch/s]

Epoch: [6][0/600]Elapsed 0.07s | Loss: 0.3177 Grad: nan LR: 7.2408e-05
Epoch: [6][50/600]Elapsed 3.53s | Loss: 0.2070 Grad: 66750.0391 LR: 7.1318e-05
Epoch: [6][100/600]Elapsed 6.98s | Loss: 0.1982 Grad: 71688.0391 LR: 7.0216e-05
Epoch: [6][150/600]Elapsed 10.43s | Loss: 0.1982 Grad: 104121.0625 LR: 6.9102e-05
Epoch: [6][200/600]Elapsed 13.86s | Loss: 0.1987 Grad: 105754.1406 LR: 6.7976e-05
Epoch: [6][250/600]Elapsed 17.26s | Loss: 0.1979 Grad: 65525.5508 LR: 6.6841e-05
Epoch: [6][300/600]Elapsed 20.68s | Loss: 0.1968 Grad: 86692.9219 LR: 6.5695e-05
Epoch: [6][350/600]Elapsed 24.08s | Loss: 0.1959 Grad: 80240.1797 LR: 6.4540e-05
Epoch: [6][400/600]Elapsed 27.52s | Loss: 0.1926 Grad: 87865.1328 LR: 6.3377e-05
Epoch: [6][450/600]Elapsed 31.00s | Loss: 0.1906 Grad: 112931.4219 LR: 6.2205e-05
Epoch: [6][500/600]Elapsed 34.45s | Loss: 0.1906 Grad: 60645.8672 LR: 6.1027e-05
Epoch: [6][550/600]Elapsed 37.92s | Loss: 0.1884 Grad: 80524.6562 LR: 5.9842e-05
Epoch: [6][599/600]Elapsed 41.32s | Lo

Valid:   0%|          | 0/150 [00:00<?, ?batch/s]

Epoch: [6][0/150]Elapsed 0.05s | Loss: 0.2793
Epoch: [6][50/150]Elapsed 2.54s | Loss: 0.4026
Epoch: [6][100/150]Elapsed 5.02s | Loss: 0.4192


----------------------------------------------------------------------------------------------------
Epoch 6 - Average Train Loss: 0.1880 | Average Valid Loss: 0.4304 | Time: 48.91s
CV Result (Stage=1): 0.9855982522563917 (torch) | 0.985598251871659 (kaggle)
Elapse: 24.46 min 
Fold: 0 Second training


Train:   0%|          | 0/254 [00:00<?, ?batch/s]

Epoch: [1][0/254]Elapsed 0.07s | Loss: 0.4320 Grad: 239570.6406 LR: 4.0000e-06
Epoch: [1][50/254]Elapsed 3.48s | Loss: 0.4649 Grad: 150610.1094 LR: 1.0315e-05
Epoch: [1][100/254]Elapsed 6.89s | Loss: 0.4435 Grad: 102181.4297 LR: 2.7599e-05
Epoch: [1][150/254]Elapsed 10.27s | Loss: 0.4245 Grad: 122371.3984 LR: 5.1303e-05
Epoch: [1][200/254]Elapsed 13.64s | Loss: 0.3951 Grad: 65829.1094 LR: 7.5190e-05
Epoch: [1][250/254]Elapsed 17.07s | Loss: 0.3707 Grad: 55697.4883 LR: 9.2976e-05
Epoch: [1][253/254]Elapsed 17.30s | Loss: 0.3699 Grad: 102518.8672 LR: 9.3978e-05


Valid:   0%|          | 0/64 [00:00<?, ?batch/s]

Epoch: [1][0/64]Elapsed 0.05s | Loss: 0.2703
Epoch: [1][50/64]Elapsed 2.54s | Loss: 0.2686


----------------------------------------------------------------------------------------------------
Epoch 1 - Average Train Loss: 0.3699 | Average Valid Loss: 0.2637 | Time: 20.64s
Best model found in epoch 1 | valid loss: 0.2637


Train:   0%|          | 0/254 [00:00<?, ?batch/s]

Epoch: [2][0/254]Elapsed 0.07s | Loss: 0.2153 Grad: 105025.1094 LR: 9.3978e-05
Epoch: [2][50/254]Elapsed 3.55s | Loss: 0.2403 Grad: 104387.4453 LR: 1.0000e-04
Epoch: [2][100/254]Elapsed 7.02s | Loss: 0.2362 Grad: 95527.2109 LR: 9.9914e-05
Epoch: [2][150/254]Elapsed 10.48s | Loss: 0.2335 Grad: 87240.5391 LR: 9.9665e-05
Epoch: [2][200/254]Elapsed 13.97s | Loss: 0.2279 Grad: 74612.3516 LR: 9.9253e-05
Epoch: [2][250/254]Elapsed 17.44s | Loss: 0.2235 Grad: 75101.0859 LR: 9.8679e-05
Epoch: [2][253/254]Elapsed 17.67s | Loss: 0.2233 Grad: 133654.7031 LR: 9.8626e-05


Valid:   0%|          | 0/64 [00:00<?, ?batch/s]

Epoch: [2][0/64]Elapsed 0.05s | Loss: 0.2398
Epoch: [2][50/64]Elapsed 2.54s | Loss: 0.2291


----------------------------------------------------------------------------------------------------
Epoch 2 - Average Train Loss: 0.2233 | Average Valid Loss: 0.2273 | Time: 20.98s
Best model found in epoch 2 | valid loss: 0.2273


Train:   0%|          | 0/254 [00:00<?, ?batch/s]

Epoch: [3][0/254]Elapsed 0.06s | Loss: 0.1963 Grad: 83009.8516 LR: 9.8626e-05
Epoch: [3][50/254]Elapsed 3.48s | Loss: 0.2026 Grad: 85819.7969 LR: 9.7881e-05
Epoch: [3][100/254]Elapsed 6.88s | Loss: 0.1961 Grad: 69478.0078 LR: 9.6978e-05
Epoch: [3][150/254]Elapsed 10.33s | Loss: 0.1978 Grad: 75730.3516 LR: 9.5922e-05
Epoch: [3][200/254]Elapsed 13.80s | Loss: 0.1939 Grad: 88001.7891 LR: 9.4715e-05
Epoch: [3][250/254]Elapsed 17.23s | Loss: 0.1918 Grad: 74174.1641 LR: 9.3361e-05
Epoch: [3][253/254]Elapsed 17.46s | Loss: 0.1919 Grad: 112918.7891 LR: 9.3247e-05


Valid:   0%|          | 0/64 [00:00<?, ?batch/s]

Epoch: [3][0/64]Elapsed 0.05s | Loss: 0.2225
Epoch: [3][50/64]Elapsed 2.54s | Loss: 0.2243


----------------------------------------------------------------------------------------------------
Epoch 3 - Average Train Loss: 0.1919 | Average Valid Loss: 0.2224 | Time: 20.77s
Best model found in epoch 3 | valid loss: 0.2224


Train:   0%|          | 0/254 [00:00<?, ?batch/s]

Epoch: [4][0/254]Elapsed 0.06s | Loss: 0.1661 Grad: 70357.6016 LR: 9.3247e-05
Epoch: [4][50/254]Elapsed 3.50s | Loss: 0.1791 Grad: 70986.4922 LR: 9.1740e-05
Epoch: [4][100/254]Elapsed 6.97s | Loss: 0.1744 Grad: 65400.7266 LR: 9.0096e-05
Epoch: [4][150/254]Elapsed 10.44s | Loss: 0.1780 Grad: 72729.7734 LR: 8.8322e-05
Epoch: [4][200/254]Elapsed 13.87s | Loss: 0.1756 Grad: 86468.6875 LR: 8.6421e-05
Epoch: [4][250/254]Elapsed 17.36s | Loss: 0.1741 Grad: 84352.7656 LR: 8.4402e-05
Epoch: [4][253/254]Elapsed 17.60s | Loss: 0.1742 Grad: 113097.9844 LR: 8.4235e-05


Valid:   0%|          | 0/64 [00:00<?, ?batch/s]

Epoch: [4][0/64]Elapsed 0.05s | Loss: 0.2262
Epoch: [4][50/64]Elapsed 2.55s | Loss: 0.2239


----------------------------------------------------------------------------------------------------
Epoch 4 - Average Train Loss: 0.1742 | Average Valid Loss: 0.2217 | Time: 20.92s
Best model found in epoch 4 | valid loss: 0.2217


Train:   0%|          | 0/254 [00:00<?, ?batch/s]

Epoch: [5][0/254]Elapsed 0.06s | Loss: 0.1255 Grad: 58785.8516 LR: 8.4235e-05
Epoch: [5][50/254]Elapsed 3.47s | Loss: 0.1636 Grad: 84227.6094 LR: 8.2094e-05
Epoch: [5][100/254]Elapsed 6.90s | Loss: 0.1575 Grad: 69629.9375 LR: 7.9848e-05
Epoch: [5][150/254]Elapsed 10.34s | Loss: 0.1616 Grad: 74575.2500 LR: 7.7504e-05
Epoch: [5][200/254]Elapsed 13.78s | Loss: 0.1595 Grad: 75634.9453 LR: 7.5069e-05
Epoch: [5][250/254]Elapsed 17.27s | Loss: 0.1582 Grad: 67535.1719 LR: 7.2553e-05
Epoch: [5][253/254]Elapsed 17.51s | Loss: 0.1584 Grad: 119831.2188 LR: 7.2349e-05


Valid:   0%|          | 0/64 [00:00<?, ?batch/s]

Epoch: [5][0/64]Elapsed 0.06s | Loss: 0.2214
Epoch: [5][50/64]Elapsed 2.56s | Loss: 0.2259


----------------------------------------------------------------------------------------------------
Epoch 5 - Average Train Loss: 0.1584 | Average Valid Loss: 0.2232 | Time: 20.84s


Train:   0%|          | 0/254 [00:00<?, ?batch/s]

Epoch: [6][0/254]Elapsed 0.06s | Loss: 0.1383 Grad: 73915.1172 LR: 7.2349e-05
Epoch: [6][50/254]Elapsed 3.49s | Loss: 0.1507 Grad: 65940.6406 LR: 6.9753e-05
Epoch: [6][100/254]Elapsed 6.88s | Loss: 0.1454 Grad: 63314.0898 LR: 6.7093e-05
Epoch: [6][150/254]Elapsed 10.33s | Loss: 0.1489 Grad: 79391.5312 LR: 6.4376e-05
Epoch: [6][200/254]Elapsed 13.80s | Loss: 0.1469 Grad: 105117.9922 LR: 6.1613e-05
Epoch: [6][250/254]Elapsed 17.27s | Loss: 0.1457 Grad: 79209.1719 LR: 5.8812e-05
Epoch: [6][253/254]Elapsed 17.51s | Loss: 0.1458 Grad: 118156.5000 LR: 5.8586e-05


Valid:   0%|          | 0/64 [00:00<?, ?batch/s]

Epoch: [6][0/64]Elapsed 0.05s | Loss: 0.2228
Epoch: [6][50/64]Elapsed 2.54s | Loss: 0.2260


----------------------------------------------------------------------------------------------------
Epoch 6 - Average Train Loss: 0.1458 | Average Valid Loss: 0.2236 | Time: 20.81s
Fold: 1 Second training


Train:   0%|          | 0/254 [00:00<?, ?batch/s]

Epoch: [1][0/254]Elapsed 0.06s | Loss: 0.4531 Grad: 248045.1562 LR: 4.0000e-06




Epoch: [1][50/254]Elapsed 3.51s | Loss: 0.4728 Grad: 131984.2500 LR: 1.0315e-05
Epoch: [1][100/254]Elapsed 6.97s | Loss: 0.4430 Grad: 109597.7656 LR: 2.7599e-05
Epoch: [1][150/254]Elapsed 10.42s | Loss: 0.4210 Grad: 81105.9219 LR: 5.1303e-05
Epoch: [1][200/254]Elapsed 13.82s | Loss: 0.4005 Grad: 119608.9531 LR: 7.5190e-05
Epoch: [1][250/254]Elapsed 17.21s | Loss: 0.3746 Grad: 70348.4922 LR: 9.2976e-05
Epoch: [1][253/254]Elapsed 17.44s | Loss: 0.3732 Grad: 93861.1562 LR: 9.3978e-05


Valid:   0%|          | 0/64 [00:00<?, ?batch/s]

Epoch: [1][0/64]Elapsed 0.05s | Loss: 0.3088
Epoch: [1][50/64]Elapsed 2.54s | Loss: 0.2691


----------------------------------------------------------------------------------------------------
Epoch 1 - Average Train Loss: 0.3732 | Average Valid Loss: 0.2674 | Time: 20.75s
Best model found in epoch 1 | valid loss: 0.2674


Train:   0%|          | 0/254 [00:00<?, ?batch/s]

Epoch: [2][0/254]Elapsed 0.07s | Loss: 0.2295 Grad: 144662.4062 LR: 9.3978e-05
Epoch: [2][50/254]Elapsed 3.64s | Loss: 0.2452 Grad: 75385.3438 LR: 1.0000e-04
Epoch: [2][100/254]Elapsed 7.11s | Loss: 0.2372 Grad: 59144.1875 LR: 9.9914e-05
Epoch: [2][150/254]Elapsed 10.57s | Loss: 0.2307 Grad: 44911.7734 LR: 9.9665e-05
Epoch: [2][200/254]Elapsed 14.04s | Loss: 0.2258 Grad: 51468.6914 LR: 9.9253e-05
Epoch: [2][250/254]Elapsed 17.51s | Loss: 0.2205 Grad: 37472.3555 LR: 9.8679e-05
Epoch: [2][253/254]Elapsed 17.74s | Loss: 0.2201 Grad: 91401.7812 LR: 9.8626e-05


Valid:   0%|          | 0/64 [00:00<?, ?batch/s]

Epoch: [2][0/64]Elapsed 0.06s | Loss: 0.2514
Epoch: [2][50/64]Elapsed 2.56s | Loss: 0.2274


----------------------------------------------------------------------------------------------------
Epoch 2 - Average Train Loss: 0.2201 | Average Valid Loss: 0.2258 | Time: 21.08s
Best model found in epoch 2 | valid loss: 0.2258


Train:   0%|          | 0/254 [00:00<?, ?batch/s]

Epoch: [3][0/254]Elapsed 0.07s | Loss: 0.2149 Grad: 161970.6875 LR: 9.8626e-05
Epoch: [3][50/254]Elapsed 3.50s | Loss: 0.1970 Grad: 101183.9844 LR: 9.7881e-05
Epoch: [3][100/254]Elapsed 6.92s | Loss: 0.1951 Grad: 53982.4414 LR: 9.6978e-05
Epoch: [3][150/254]Elapsed 10.36s | Loss: 0.1913 Grad: 40598.1367 LR: 9.5922e-05
Epoch: [3][200/254]Elapsed 13.75s | Loss: 0.1890 Grad: 44281.9805 LR: 9.4715e-05
Epoch: [3][250/254]Elapsed 17.17s | Loss: 0.1874 Grad: 32696.3398 LR: 9.3361e-05
Epoch: [3][253/254]Elapsed 17.40s | Loss: 0.1872 Grad: 71949.5078 LR: 9.3247e-05


Valid:   0%|          | 0/64 [00:00<?, ?batch/s]

Epoch: [3][0/64]Elapsed 0.05s | Loss: 0.2494
Epoch: [3][50/64]Elapsed 2.55s | Loss: 0.2227


----------------------------------------------------------------------------------------------------
Epoch 3 - Average Train Loss: 0.1872 | Average Valid Loss: 0.2220 | Time: 20.72s
Best model found in epoch 3 | valid loss: 0.2220


Train:   0%|          | 0/254 [00:00<?, ?batch/s]

Epoch: [4][0/254]Elapsed 0.08s | Loss: 0.1672 Grad: 101955.5234 LR: 9.3247e-05
Epoch: [4][50/254]Elapsed 3.60s | Loss: 0.1770 Grad: 153015.3438 LR: 9.1740e-05
Epoch: [4][100/254]Elapsed 7.01s | Loss: 0.1748 Grad: 108041.9531 LR: 9.0096e-05
Epoch: [4][150/254]Elapsed 10.41s | Loss: 0.1735 Grad: 75238.8359 LR: 8.8322e-05
Epoch: [4][200/254]Elapsed 13.93s | Loss: 0.1720 Grad: 96828.2656 LR: 8.6421e-05
Epoch: [4][250/254]Elapsed 17.43s | Loss: 0.1696 Grad: 76262.9375 LR: 8.4402e-05
Epoch: [4][253/254]Elapsed 17.66s | Loss: 0.1695 Grad: 128445.3984 LR: 8.4235e-05


Valid:   0%|          | 0/64 [00:00<?, ?batch/s]

Epoch: [4][0/64]Elapsed 0.06s | Loss: 0.2500
Epoch: [4][50/64]Elapsed 2.55s | Loss: 0.2246


----------------------------------------------------------------------------------------------------
Epoch 4 - Average Train Loss: 0.1695 | Average Valid Loss: 0.2236 | Time: 20.99s


Train:   0%|          | 0/254 [00:00<?, ?batch/s]

Epoch: [5][0/254]Elapsed 0.08s | Loss: 0.1561 Grad: 98184.3125 LR: 8.4235e-05
Epoch: [5][50/254]Elapsed 3.52s | Loss: 0.1591 Grad: 105567.5078 LR: 8.2094e-05
Epoch: [5][100/254]Elapsed 6.86s | Loss: 0.1573 Grad: 104082.7812 LR: 7.9848e-05
Epoch: [5][150/254]Elapsed 10.24s | Loss: 0.1558 Grad: 117269.9844 LR: 7.7504e-05
Epoch: [5][200/254]Elapsed 13.62s | Loss: 0.1550 Grad: 112419.7031 LR: 7.5069e-05
Epoch: [5][250/254]Elapsed 17.06s | Loss: 0.1532 Grad: 58187.5156 LR: 7.2553e-05
Epoch: [5][253/254]Elapsed 17.29s | Loss: 0.1531 Grad: 106282.0781 LR: 7.2349e-05


Valid:   0%|          | 0/64 [00:00<?, ?batch/s]

Epoch: [5][0/64]Elapsed 0.05s | Loss: 0.2500
Epoch: [5][50/64]Elapsed 2.54s | Loss: 0.2261


----------------------------------------------------------------------------------------------------
Epoch 5 - Average Train Loss: 0.1531 | Average Valid Loss: 0.2270 | Time: 20.60s


Train:   0%|          | 0/254 [00:00<?, ?batch/s]

Epoch: [6][0/254]Elapsed 0.06s | Loss: 0.1270 Grad: 91206.5703 LR: 7.2349e-05
Epoch: [6][50/254]Elapsed 3.48s | Loss: 0.1473 Grad: 124102.3828 LR: 6.9753e-05
Epoch: [6][100/254]Elapsed 6.84s | Loss: 0.1450 Grad: 94885.7656 LR: 6.7093e-05
Epoch: [6][150/254]Elapsed 10.26s | Loss: 0.1437 Grad: 64555.1016 LR: 6.4376e-05
Epoch: [6][200/254]Elapsed 13.72s | Loss: 0.1432 Grad: 82692.7422 LR: 6.1613e-05
Epoch: [6][250/254]Elapsed 17.19s | Loss: 0.1422 Grad: 69349.7891 LR: 5.8812e-05
Epoch: [6][253/254]Elapsed 17.43s | Loss: 0.1422 Grad: 111221.2109 LR: 5.8586e-05


Valid:   0%|          | 0/64 [00:00<?, ?batch/s]

Epoch: [6][0/64]Elapsed 0.05s | Loss: 0.2574
Epoch: [6][50/64]Elapsed 2.54s | Loss: 0.2236


----------------------------------------------------------------------------------------------------
Epoch 6 - Average Train Loss: 0.1422 | Average Valid Loss: 0.2249 | Time: 20.74s
Fold: 2 Second training


Train:   0%|          | 0/254 [00:00<?, ?batch/s]

Epoch: [1][0/254]Elapsed 0.06s | Loss: 0.5714 Grad: 264439.3438 LR: 4.0000e-06
Epoch: [1][50/254]Elapsed 3.45s | Loss: 0.4645 Grad: 139453.5938 LR: 1.0315e-05
Epoch: [1][100/254]Elapsed 6.89s | Loss: 0.4393 Grad: 57718.1406 LR: 2.7599e-05
Epoch: [1][150/254]Elapsed 10.37s | Loss: 0.4202 Grad: 42862.3320 LR: 5.1303e-05
Epoch: [1][200/254]Elapsed 13.85s | Loss: 0.3938 Grad: 40566.6680 LR: 7.5190e-05
Epoch: [1][250/254]Elapsed 17.33s | Loss: 0.3704 Grad: 32381.2695 LR: 9.2976e-05
Epoch: [1][253/254]Elapsed 17.56s | Loss: 0.3689 Grad: 43699.5508 LR: 9.3978e-05


Valid:   0%|          | 0/64 [00:00<?, ?batch/s]

Epoch: [1][0/64]Elapsed 0.05s | Loss: 0.3022
Epoch: [1][50/64]Elapsed 2.55s | Loss: 0.2783


----------------------------------------------------------------------------------------------------
Epoch 1 - Average Train Loss: 0.3689 | Average Valid Loss: 0.2699 | Time: 20.88s
Best model found in epoch 1 | valid loss: 0.2699


Train:   0%|          | 0/254 [00:00<?, ?batch/s]

Epoch: [2][0/254]Elapsed 0.06s | Loss: 0.3290 Grad: 180054.5469 LR: 9.3978e-05
Epoch: [2][50/254]Elapsed 3.54s | Loss: 0.2422 Grad: 133155.2500 LR: 1.0000e-04
Epoch: [2][100/254]Elapsed 7.00s | Loss: 0.2295 Grad: 122587.8750 LR: 9.9914e-05
Epoch: [2][150/254]Elapsed 10.46s | Loss: 0.2272 Grad: 94410.3672 LR: 9.9665e-05
Epoch: [2][200/254]Elapsed 13.93s | Loss: 0.2232 Grad: 119755.6641 LR: 9.9253e-05
Epoch: [2][250/254]Elapsed 17.39s | Loss: 0.2193 Grad: 79861.3906 LR: 9.8679e-05
Epoch: [2][253/254]Elapsed 17.62s | Loss: 0.2190 Grad: 133940.4062 LR: 9.8626e-05


Valid:   0%|          | 0/64 [00:00<?, ?batch/s]

Epoch: [2][0/64]Elapsed 0.05s | Loss: 0.2180
Epoch: [2][50/64]Elapsed 2.55s | Loss: 0.2358


----------------------------------------------------------------------------------------------------
Epoch 2 - Average Train Loss: 0.2190 | Average Valid Loss: 0.2295 | Time: 20.95s
Best model found in epoch 2 | valid loss: 0.2295


Train:   0%|          | 0/254 [00:00<?, ?batch/s]

Epoch: [3][0/254]Elapsed 0.06s | Loss: 0.2281 Grad: 96458.8125 LR: 9.8626e-05
Epoch: [3][50/254]Elapsed 3.51s | Loss: 0.2003 Grad: 78984.8984 LR: 9.7881e-05
Epoch: [3][100/254]Elapsed 6.94s | Loss: 0.1923 Grad: 108648.3984 LR: 9.6978e-05
Epoch: [3][150/254]Elapsed 10.33s | Loss: 0.1928 Grad: 87815.0469 LR: 9.5922e-05
Epoch: [3][200/254]Elapsed 13.75s | Loss: 0.1921 Grad: 88756.4609 LR: 9.4715e-05
Epoch: [3][250/254]Elapsed 17.24s | Loss: 0.1907 Grad: 69307.1484 LR: 9.3361e-05
Epoch: [3][253/254]Elapsed 17.47s | Loss: 0.1905 Grad: 105696.2578 LR: 9.3247e-05


Valid:   0%|          | 0/64 [00:00<?, ?batch/s]

Epoch: [3][0/64]Elapsed 0.05s | Loss: 0.1888
Epoch: [3][50/64]Elapsed 2.55s | Loss: 0.2277


----------------------------------------------------------------------------------------------------
Epoch 3 - Average Train Loss: 0.1905 | Average Valid Loss: 0.2222 | Time: 20.80s
Best model found in epoch 3 | valid loss: 0.2222


Train:   0%|          | 0/254 [00:00<?, ?batch/s]

Epoch: [4][0/254]Elapsed 0.06s | Loss: 0.1833 Grad: 95302.0000 LR: 9.3247e-05
Epoch: [4][50/254]Elapsed 3.53s | Loss: 0.1760 Grad: 84530.0156 LR: 9.1740e-05
Epoch: [4][100/254]Elapsed 7.00s | Loss: 0.1683 Grad: 81608.6797 LR: 9.0096e-05
Epoch: [4][150/254]Elapsed 10.51s | Loss: 0.1693 Grad: 79513.1875 LR: 8.8322e-05
Epoch: [4][200/254]Elapsed 13.92s | Loss: 0.1702 Grad: 86409.6641 LR: 8.6421e-05
Epoch: [4][250/254]Elapsed 17.38s | Loss: 0.1688 Grad: 68504.6172 LR: 8.4402e-05
Epoch: [4][253/254]Elapsed 17.61s | Loss: 0.1688 Grad: 98646.5625 LR: 8.4235e-05


Valid:   0%|          | 0/64 [00:00<?, ?batch/s]

Epoch: [4][0/64]Elapsed 0.06s | Loss: 0.1966
Epoch: [4][50/64]Elapsed 2.59s | Loss: 0.2286


----------------------------------------------------------------------------------------------------
Epoch 4 - Average Train Loss: 0.1688 | Average Valid Loss: 0.2225 | Time: 20.98s


Train:   0%|          | 0/254 [00:00<?, ?batch/s]

Epoch: [5][0/254]Elapsed 0.08s | Loss: 0.1709 Grad: 100179.3281 LR: 8.4235e-05
Epoch: [5][50/254]Elapsed 3.52s | Loss: 0.1621 Grad: 85282.1172 LR: 8.2094e-05
Epoch: [5][100/254]Elapsed 6.97s | Loss: 0.1574 Grad: 101899.0781 LR: 7.9848e-05
Epoch: [5][150/254]Elapsed 10.58s | Loss: 0.1570 Grad: 67581.6562 LR: 7.7504e-05
Epoch: [5][200/254]Elapsed 14.12s | Loss: 0.1575 Grad: 69337.9844 LR: 7.5069e-05
Epoch: [5][250/254]Elapsed 17.61s | Loss: 0.1564 Grad: 69393.6016 LR: 7.2553e-05
Epoch: [5][253/254]Elapsed 17.84s | Loss: 0.1562 Grad: 104552.4688 LR: 7.2349e-05


Valid:   0%|          | 0/64 [00:00<?, ?batch/s]

Epoch: [5][0/64]Elapsed 0.05s | Loss: 0.1905
Epoch: [5][50/64]Elapsed 2.62s | Loss: 0.2301


----------------------------------------------------------------------------------------------------
Epoch 5 - Average Train Loss: 0.1562 | Average Valid Loss: 0.2241 | Time: 21.27s


Train:   0%|          | 0/254 [00:00<?, ?batch/s]

Epoch: [6][0/254]Elapsed 0.06s | Loss: 0.1771 Grad: 101256.0312 LR: 7.2349e-05
Epoch: [6][50/254]Elapsed 3.46s | Loss: 0.1506 Grad: 74801.3203 LR: 6.9753e-05
Epoch: [6][100/254]Elapsed 6.92s | Loss: 0.1435 Grad: 94179.9688 LR: 6.7093e-05
Epoch: [6][150/254]Elapsed 10.36s | Loss: 0.1438 Grad: 67197.7734 LR: 6.4376e-05
Epoch: [6][200/254]Elapsed 13.77s | Loss: 0.1440 Grad: 74371.7734 LR: 6.1613e-05
Epoch: [6][250/254]Elapsed 17.15s | Loss: 0.1431 Grad: 92009.8594 LR: 5.8812e-05
Epoch: [6][253/254]Elapsed 17.38s | Loss: 0.1432 Grad: 118386.0625 LR: 5.8586e-05


Valid:   0%|          | 0/64 [00:00<?, ?batch/s]

Epoch: [6][0/64]Elapsed 0.05s | Loss: 0.2001
Epoch: [6][50/64]Elapsed 2.54s | Loss: 0.2325


----------------------------------------------------------------------------------------------------
Epoch 6 - Average Train Loss: 0.1432 | Average Valid Loss: 0.2264 | Time: 20.69s
Fold: 3 Second training


Train:   0%|          | 0/254 [00:00<?, ?batch/s]

Epoch: [1][0/254]Elapsed 0.06s | Loss: 0.5603 Grad: nan LR: 4.0000e-06




Epoch: [1][50/254]Elapsed 3.53s | Loss: 0.4845 Grad: 152488.4375 LR: 1.0315e-05
Epoch: [1][100/254]Elapsed 6.91s | Loss: 0.4569 Grad: 113469.5703 LR: 2.7599e-05
Epoch: [1][150/254]Elapsed 10.33s | Loss: 0.4400 Grad: 119099.0312 LR: 5.1303e-05
Epoch: [1][200/254]Elapsed 13.79s | Loss: 0.4110 Grad: 111728.8750 LR: 7.5190e-05
Epoch: [1][250/254]Elapsed 17.26s | Loss: 0.3815 Grad: 58026.6875 LR: 9.2976e-05
Epoch: [1][253/254]Elapsed 17.50s | Loss: 0.3798 Grad: 84420.7344 LR: 9.3978e-05


Valid:   0%|          | 0/64 [00:00<?, ?batch/s]

Epoch: [1][0/64]Elapsed 0.05s | Loss: 0.3052
Epoch: [1][50/64]Elapsed 2.54s | Loss: 0.2493


----------------------------------------------------------------------------------------------------
Epoch 1 - Average Train Loss: 0.3798 | Average Valid Loss: 0.2543 | Time: 20.82s
Best model found in epoch 1 | valid loss: 0.2543


Train:   0%|          | 0/254 [00:00<?, ?batch/s]

Epoch: [2][0/254]Elapsed 0.06s | Loss: 0.3058 Grad: 152288.4219 LR: 9.3978e-05
Epoch: [2][50/254]Elapsed 3.57s | Loss: 0.2487 Grad: 149153.7812 LR: 1.0000e-04
Epoch: [2][100/254]Elapsed 7.04s | Loss: 0.2403 Grad: 105489.0156 LR: 9.9914e-05
Epoch: [2][150/254]Elapsed 10.53s | Loss: 0.2392 Grad: 117486.0078 LR: 9.9665e-05
Epoch: [2][200/254]Elapsed 13.97s | Loss: 0.2326 Grad: 116708.5312 LR: 9.9253e-05
Epoch: [2][250/254]Elapsed 17.35s | Loss: 0.2266 Grad: 64124.5781 LR: 9.8679e-05
Epoch: [2][253/254]Elapsed 17.58s | Loss: 0.2261 Grad: 186849.0156 LR: 9.8626e-05


Valid:   0%|          | 0/64 [00:00<?, ?batch/s]

Epoch: [2][0/64]Elapsed 0.06s | Loss: 0.2441
Epoch: [2][50/64]Elapsed 2.60s | Loss: 0.2183


----------------------------------------------------------------------------------------------------
Epoch 2 - Average Train Loss: 0.2261 | Average Valid Loss: 0.2228 | Time: 20.96s
Best model found in epoch 2 | valid loss: 0.2228


Train:   0%|          | 0/254 [00:00<?, ?batch/s]

Epoch: [3][0/254]Elapsed 0.06s | Loss: 0.2158 Grad: 103087.3594 LR: 9.8626e-05
Epoch: [3][50/254]Elapsed 3.46s | Loss: 0.2076 Grad: 87855.3281 LR: 9.7881e-05
Epoch: [3][100/254]Elapsed 6.83s | Loss: 0.2000 Grad: 99872.4531 LR: 9.6978e-05
Epoch: [3][150/254]Elapsed 10.25s | Loss: 0.2010 Grad: 81679.0547 LR: 9.5922e-05
Epoch: [3][200/254]Elapsed 13.77s | Loss: 0.1981 Grad: 91254.1406 LR: 9.4715e-05
Epoch: [3][250/254]Elapsed 17.26s | Loss: 0.1944 Grad: 57435.1758 LR: 9.3361e-05
Epoch: [3][253/254]Elapsed 17.49s | Loss: 0.1941 Grad: 151204.1406 LR: 9.3247e-05


Valid:   0%|          | 0/64 [00:00<?, ?batch/s]

Epoch: [3][0/64]Elapsed 0.06s | Loss: 0.2467
Epoch: [3][50/64]Elapsed 2.58s | Loss: 0.2153


----------------------------------------------------------------------------------------------------
Epoch 3 - Average Train Loss: 0.1941 | Average Valid Loss: 0.2194 | Time: 20.85s
Best model found in epoch 3 | valid loss: 0.2194


Train:   0%|          | 0/254 [00:00<?, ?batch/s]

Epoch: [4][0/254]Elapsed 0.08s | Loss: 0.1987 Grad: 88818.6406 LR: 9.3247e-05
Epoch: [4][50/254]Elapsed 3.52s | Loss: 0.1847 Grad: 83496.5078 LR: 9.1740e-05
Epoch: [4][100/254]Elapsed 6.98s | Loss: 0.1802 Grad: 107739.0312 LR: 9.0096e-05
Epoch: [4][150/254]Elapsed 10.47s | Loss: 0.1811 Grad: 81127.3438 LR: 8.8322e-05
Epoch: [4][200/254]Elapsed 13.86s | Loss: 0.1788 Grad: 85240.0859 LR: 8.6421e-05
Epoch: [4][250/254]Elapsed 17.26s | Loss: 0.1745 Grad: 69666.0781 LR: 8.4402e-05
Epoch: [4][253/254]Elapsed 17.49s | Loss: 0.1742 Grad: 144311.0938 LR: 8.4235e-05


Valid:   0%|          | 0/64 [00:00<?, ?batch/s]

Epoch: [4][0/64]Elapsed 0.05s | Loss: 0.2490
Epoch: [4][50/64]Elapsed 2.55s | Loss: 0.2179


----------------------------------------------------------------------------------------------------
Epoch 4 - Average Train Loss: 0.1742 | Average Valid Loss: 0.2219 | Time: 20.82s


Train:   0%|          | 0/254 [00:00<?, ?batch/s]

Epoch: [5][0/254]Elapsed 0.06s | Loss: 0.1906 Grad: 82977.5391 LR: 8.4235e-05
Epoch: [5][50/254]Elapsed 3.60s | Loss: 0.1668 Grad: 77782.0938 LR: 8.2094e-05
Epoch: [5][100/254]Elapsed 7.07s | Loss: 0.1607 Grad: 108057.1172 LR: 7.9848e-05
Epoch: [5][150/254]Elapsed 10.56s | Loss: 0.1611 Grad: 73206.2656 LR: 7.7504e-05
Epoch: [5][200/254]Elapsed 14.03s | Loss: 0.1603 Grad: 88184.8516 LR: 7.5069e-05
Epoch: [5][250/254]Elapsed 17.51s | Loss: 0.1575 Grad: 54020.2930 LR: 7.2553e-05
Epoch: [5][253/254]Elapsed 17.75s | Loss: 0.1573 Grad: 147243.6875 LR: 7.2349e-05


Valid:   0%|          | 0/64 [00:00<?, ?batch/s]

Epoch: [5][0/64]Elapsed 0.06s | Loss: 0.2589
Epoch: [5][50/64]Elapsed 2.57s | Loss: 0.2222


----------------------------------------------------------------------------------------------------
Epoch 5 - Average Train Loss: 0.1573 | Average Valid Loss: 0.2265 | Time: 21.09s


Train:   0%|          | 0/254 [00:00<?, ?batch/s]

Epoch: [6][0/254]Elapsed 0.08s | Loss: 0.1633 Grad: 86311.4453 LR: 7.2349e-05
Epoch: [6][50/254]Elapsed 3.57s | Loss: 0.1515 Grad: 77258.2266 LR: 6.9753e-05
Epoch: [6][100/254]Elapsed 7.05s | Loss: 0.1477 Grad: 85174.1328 LR: 6.7093e-05
Epoch: [6][150/254]Elapsed 10.43s | Loss: 0.1490 Grad: 83347.5469 LR: 6.4376e-05
Epoch: [6][200/254]Elapsed 13.88s | Loss: 0.1474 Grad: 65400.1406 LR: 6.1613e-05
Epoch: [6][250/254]Elapsed 17.29s | Loss: 0.1447 Grad: 62440.7422 LR: 5.8812e-05
Epoch: [6][253/254]Elapsed 17.52s | Loss: 0.1445 Grad: 126965.3828 LR: 5.8586e-05


Valid:   0%|          | 0/64 [00:00<?, ?batch/s]

Epoch: [6][0/64]Elapsed 0.06s | Loss: 0.2541
Epoch: [6][50/64]Elapsed 2.55s | Loss: 0.2267


----------------------------------------------------------------------------------------------------
Epoch 6 - Average Train Loss: 0.1445 | Average Valid Loss: 0.2309 | Time: 20.84s
Fold: 4 Second training


Train:   0%|          | 0/254 [00:00<?, ?batch/s]

Epoch: [1][0/254]Elapsed 0.06s | Loss: 0.5792 Grad: 219915.9219 LR: 4.0000e-06




Epoch: [1][50/254]Elapsed 3.52s | Loss: 0.4893 Grad: 84745.9453 LR: 1.0315e-05
Epoch: [1][100/254]Elapsed 6.97s | Loss: 0.4595 Grad: 49988.1172 LR: 2.7599e-05
Epoch: [1][150/254]Elapsed 10.43s | Loss: 0.4328 Grad: 42992.1953 LR: 5.1303e-05
Epoch: [1][200/254]Elapsed 13.89s | Loss: 0.4040 Grad: 36164.2383 LR: 7.5190e-05
Epoch: [1][250/254]Elapsed 17.35s | Loss: 0.3752 Grad: 28878.4062 LR: 9.2976e-05
Epoch: [1][253/254]Elapsed 17.59s | Loss: 0.3737 Grad: 44991.6758 LR: 9.3978e-05


Valid:   0%|          | 0/64 [00:00<?, ?batch/s]

Epoch: [1][0/64]Elapsed 0.05s | Loss: 0.3792
Epoch: [1][50/64]Elapsed 2.55s | Loss: 0.2813


----------------------------------------------------------------------------------------------------
Epoch 1 - Average Train Loss: 0.3737 | Average Valid Loss: 0.2820 | Time: 20.92s
Best model found in epoch 1 | valid loss: 0.2820


Train:   0%|          | 0/254 [00:00<?, ?batch/s]

Epoch: [2][0/254]Elapsed 0.06s | Loss: 0.3204 Grad: 151825.0312 LR: 9.3978e-05
Epoch: [2][50/254]Elapsed 3.52s | Loss: 0.2390 Grad: 104404.7422 LR: 1.0000e-04
Epoch: [2][100/254]Elapsed 7.00s | Loss: 0.2357 Grad: 105228.6328 LR: 9.9914e-05
Epoch: [2][150/254]Elapsed 10.46s | Loss: 0.2299 Grad: 94001.0312 LR: 9.9665e-05
Epoch: [2][200/254]Elapsed 13.93s | Loss: 0.2243 Grad: 77119.2891 LR: 9.9253e-05
Epoch: [2][250/254]Elapsed 17.39s | Loss: 0.2187 Grad: 85521.8438 LR: 9.8679e-05
Epoch: [2][253/254]Elapsed 17.63s | Loss: 0.2185 Grad: 145509.1875 LR: 9.8626e-05


Valid:   0%|          | 0/64 [00:00<?, ?batch/s]

Epoch: [2][0/64]Elapsed 0.06s | Loss: 0.3172
Epoch: [2][50/64]Elapsed 2.55s | Loss: 0.2457


----------------------------------------------------------------------------------------------------
Epoch 2 - Average Train Loss: 0.2185 | Average Valid Loss: 0.2464 | Time: 20.95s
Best model found in epoch 2 | valid loss: 0.2464


Train:   0%|          | 0/254 [00:00<?, ?batch/s]

Epoch: [3][0/254]Elapsed 0.06s | Loss: 0.2860 Grad: 114013.4531 LR: 9.8626e-05
Epoch: [3][50/254]Elapsed 3.48s | Loss: 0.1967 Grad: 79503.9062 LR: 9.7881e-05
Epoch: [3][100/254]Elapsed 6.98s | Loss: 0.1943 Grad: 80553.4141 LR: 9.6978e-05
Epoch: [3][150/254]Elapsed 10.44s | Loss: 0.1945 Grad: 78792.8594 LR: 9.5922e-05
Epoch: [3][200/254]Elapsed 13.90s | Loss: 0.1921 Grad: 67619.0781 LR: 9.4715e-05
Epoch: [3][250/254]Elapsed 17.38s | Loss: 0.1892 Grad: 82045.0312 LR: 9.3361e-05
Epoch: [3][253/254]Elapsed 17.61s | Loss: 0.1889 Grad: 150340.2344 LR: 9.3247e-05


Valid:   0%|          | 0/64 [00:00<?, ?batch/s]

Epoch: [3][0/64]Elapsed 0.05s | Loss: 0.3104
Epoch: [3][50/64]Elapsed 2.54s | Loss: 0.2411


----------------------------------------------------------------------------------------------------
Epoch 3 - Average Train Loss: 0.1889 | Average Valid Loss: 0.2412 | Time: 20.93s
Best model found in epoch 3 | valid loss: 0.2412


Train:   0%|          | 0/254 [00:00<?, ?batch/s]

Epoch: [4][0/254]Elapsed 0.06s | Loss: 0.2473 Grad: 92279.7734 LR: 9.3247e-05
Epoch: [4][50/254]Elapsed 3.53s | Loss: 0.1774 Grad: 71364.8203 LR: 9.1740e-05
Epoch: [4][100/254]Elapsed 6.99s | Loss: 0.1768 Grad: 70907.4609 LR: 9.0096e-05
Epoch: [4][150/254]Elapsed 10.51s | Loss: 0.1766 Grad: 96141.6641 LR: 8.8322e-05
Epoch: [4][200/254]Elapsed 14.06s | Loss: 0.1740 Grad: 52853.8281 LR: 8.6421e-05
Epoch: [4][250/254]Elapsed 17.55s | Loss: 0.1709 Grad: 72424.9531 LR: 8.4402e-05
Epoch: [4][253/254]Elapsed 17.78s | Loss: 0.1707 Grad: 139214.0781 LR: 8.4235e-05


Valid:   0%|          | 0/64 [00:00<?, ?batch/s]

Epoch: [4][0/64]Elapsed 0.06s | Loss: 0.3103
Epoch: [4][50/64]Elapsed 2.55s | Loss: 0.2430


----------------------------------------------------------------------------------------------------
Epoch 4 - Average Train Loss: 0.1707 | Average Valid Loss: 0.2424 | Time: 21.11s


Train:   0%|          | 0/254 [00:00<?, ?batch/s]

Epoch: [5][0/254]Elapsed 0.08s | Loss: 0.2214 Grad: 117490.8984 LR: 8.4235e-05
Epoch: [5][50/254]Elapsed 3.57s | Loss: 0.1599 Grad: 73340.3125 LR: 8.2094e-05
Epoch: [5][100/254]Elapsed 7.01s | Loss: 0.1597 Grad: 101037.4062 LR: 7.9848e-05
Epoch: [5][150/254]Elapsed 10.49s | Loss: 0.1591 Grad: 52073.1836 LR: 7.7504e-05
Epoch: [5][200/254]Elapsed 13.96s | Loss: 0.1579 Grad: 65826.1094 LR: 7.5069e-05
Epoch: [5][250/254]Elapsed 17.51s | Loss: 0.1554 Grad: 71239.8047 LR: 7.2553e-05
Epoch: [5][253/254]Elapsed 17.74s | Loss: 0.1553 Grad: 126597.9766 LR: 7.2349e-05


Valid:   0%|          | 0/64 [00:00<?, ?batch/s]

Epoch: [5][0/64]Elapsed 0.05s | Loss: 0.3253
Epoch: [5][50/64]Elapsed 2.55s | Loss: 0.2465


----------------------------------------------------------------------------------------------------
Epoch 5 - Average Train Loss: 0.1553 | Average Valid Loss: 0.2459 | Time: 21.06s


Train:   0%|          | 0/254 [00:00<?, ?batch/s]

Epoch: [6][0/254]Elapsed 0.06s | Loss: 0.2577 Grad: 118668.1875 LR: 7.2349e-05
Epoch: [6][50/254]Elapsed 3.50s | Loss: 0.1486 Grad: 84628.1484 LR: 6.9753e-05
Epoch: [6][100/254]Elapsed 6.88s | Loss: 0.1455 Grad: 93573.3594 LR: 6.7093e-05
Epoch: [6][150/254]Elapsed 10.27s | Loss: 0.1465 Grad: 58606.5195 LR: 6.4376e-05
Epoch: [6][200/254]Elapsed 13.68s | Loss: 0.1457 Grad: 67193.9688 LR: 6.1613e-05
Epoch: [6][250/254]Elapsed 17.17s | Loss: 0.1435 Grad: 99349.8906 LR: 5.8812e-05
Epoch: [6][253/254]Elapsed 17.40s | Loss: 0.1435 Grad: 121446.5312 LR: 5.8586e-05


Valid:   0%|          | 0/64 [00:00<?, ?batch/s]

Epoch: [6][0/64]Elapsed 0.05s | Loss: 0.3413
Epoch: [6][50/64]Elapsed 2.57s | Loss: 0.2483


----------------------------------------------------------------------------------------------------
Epoch 6 - Average Train Loss: 0.1435 | Average Valid Loss: 0.2473 | Time: 20.75s
CV Result (Stage=2): 0.4612615043492557 (torch) | 0.4612615035278433 (kaggle)
Elapse: 34.95 min 


In [None]:
dataset = CustomDataset(train_easy, TARGETS, ModelConfig, all_specs, all_eegs, mode='test')

X, y = dataset[0]
print(X.shape, y.shape)

model = CustomModel(ModelConfig, num_classes=6, pretrained=True)
y_pred = model(X.unsqueeze(0))

print(y_pred.shape)

In [44]:
from kl_divergence import score as kl_score


def calc_kl_div(p, q, criterion):
    
    p = torch.tensor(p.astype(np.float32)).unsqueeze(0)
    q = torch.tensor(q.astype(np.float32)).unsqueeze(0)
    return criterion(F.log_softmax(p, dim=1), q).item()

def calc_kaggle_score(solution, submission):
    solution = solution.to_frame().T
    solution[TARGETS] = solution[TARGETS].astype(np.float32)
    submission = submission.to_frame().T
    submission.columns = ['eeg_id'] + TARGETS
    submission[TARGETS] = submission[TARGETS].astype(np.float32)
    
    return kl_score(solution, submission, 'eeg_id')

In [51]:
def evaluate_oof(oof_csv_path):
    oof_df = pd.read_csv(oof_csv_path)
    softmax = nn.Softmax(dim=1)
    criterion = nn.KLDivLoss(reduction="batchmean")

    oof_df["kl_loss"] = oof_df.apply(lambda row: 
        calc_kl_div(row[TARGETS_PRED].values, row[TARGETS].values, criterion), axis=1
        )

    kl_loss_all = criterion(
        F.log_softmax(torch.tensor(oof_df[TARGETS_PRED].values.astype(np.float32)), dim=1),
        torch.tensor(oof_df[TARGETS].values.astype(np.float32)),
        )

    print(f"KL Loss All: {kl_loss_all}")
    print(f"KL Loss Mean: {oof_df['kl_loss'].mean()}")

    y_pred = oof_df[TARGETS].values.astype(np.float32)
    oof_df[TARGETS_PRED] = softmax(torch.tensor(y_pred)).numpy()

    solution = oof_df[['eeg_id'] + TARGETS].copy()
    submission = oof_df[['eeg_id'] + TARGETS_PRED].copy()
    submission.columns = ['eeg_id'] + TARGETS

    kaggle_score_all = kl_score(solution, submission, 'eeg_id')
    
    oof_df['kaggle_score'] = oof_df.apply(lambda row:
        calc_kaggle_score(row[['eeg_id'] + TARGETS], row[['eeg_id'] + TARGETS_PRED]), axis=1
        )

    print(f"Kaggle Score All: {kaggle_score_all}")
    print(f"Kaggle Score Mean: {oof_df['kaggle_score'].mean()}")

    return oof_df, kl_loss_all, kaggle_score_all


In [52]:
oof_1, kl_loss_all, kaggle_score_all = evaluate_oof(f"{JobConfig.OUTPUT_DIR}/oof_1.csv")
oof_2, kl_loss_all, kaggle_score_all = evaluate_oof(f"{JobConfig.OUTPUT_DIR}/oof_2.csv")

KL Loss All: 0.9855982661247253
KL Loss Mean: 0.9855982593370717


In [None]:
fig, axes = plt.subplots(4, 4, figsize=(10, 10), sharex=True, sharey=True)

# rows = oof_df.iloc[-len(axes.ravel()):, :]
rows = oof_1.sample(len(axes.ravel()))

for i, (idx, row) in enumerate(rows.iterrows()):

    ax = axes.ravel()[i]
    ax.plot(row[TARGETS].values, label='True')
    ax.plot(row[TARGETS_PRED].values, label='Pred')
    ax.set_title(f"{idx} | {row['target']} | KL: {row['kl_loss']:.4f}")
    ax.set_xticks(range(6))
    ax.set_xticklabels(BRAIN_ACTIVITY)
    ax.grid(True)
    ax.legend()

fig.tight_layout()
plt.show()

In [None]:
fig, axes = plt.subplots(4, 4, figsize=(10, 10), sharex=True, sharey=True)

rows = oof_2.iloc[100:100+len(axes.ravel()), :]
# rows = oof_df.sample(len(axes.ravel()))

for i, (idx, row) in enumerate(rows.iterrows()):

    ax = axes.ravel()[i]
    ax.plot(row[TARGETS].values, label='True')
    ax.plot(row[TARGETS_PRED].values, label='Pred')
    ax.set_title(f"{idx} | {row['target']} | KL: {row['kl_loss']:.4f}")
    ax.set_xticks(range(6))
    ax.set_xticklabels(BRAIN_ACTIVITY)
    ax.grid(True)
    ax.legend()

fig.tight_layout()
plt.show()