In [1]:
import pandas as pd 
import numpy as np 
from scipy.stats import entropy
from sklearn.model_selection import GroupKFold
import matplotlib.pyplot as plt

from engine_hms_trainer import *
from engine_hms_model import CustomModel, JobConfig, ModelConfig

import torch
from torch import nn
import torch.nn.functional as F

  _torch_pytree._register_pytree_node(


In [2]:
seed_everything(JobConfig.SEED)

ModelConfig.EPOCHS = 6
ModelConfig.USE_EEG_SPECTROGRAMS = False
ModelConfig.MODEL_BACKBONE = 'tf_efficientnet_b0'
ModelConfig.MODEL_NAME = "ENet_b0_two_stages_xymask"
ModelConfig.AUGMENT = True
ModelConfig.AUGMENTATIONS = ['xy_masking']

hms_predictor = HMSPredictor(JobConfig, ModelConfig)

****************************************************************************************************
Script Start: Sat Mar  9 14:35:30 2024
Initializing HMS Predictor...
Model Name: ENet_b0_two_stages_xymask
Drop Rate: 0.15
Drop Path Rate: 0.25
Augment: True
Augmentations: ['xy_masking']
Enropy Split: 5.5
Device: cuda
Output Dir: ./outputs/
****************************************************************************************************


In [3]:
train_easy, train_hard, all_specs, all_eegs = hms_predictor.load_train_data()

print(train_easy.shape)
print(train_hard.shape)

# check if contain NaN
print(train_easy.isnull().sum().sum())
print(train_hard.isnull().sum().sum())

display(train_easy.head())
print(" ")
display(train_hard.head())

(11999, 14)
(5090, 14)
0
0


Unnamed: 0,eeg_id,spectrogram_id,min,max,patient_id,target,total_votes,entropy,seizure_vote,lpd_vote,gpd_vote,lrda_vote,grda_vote,other_vote
0,642382,14960202,1008.0,1032.0,5955,Other,2,7.802343,0.0,0.0,0.0,0.0,0.0,1.0
1,751790,618728447,908.0,908.0,38549,GPD,1,7.802343,0.0,0.0,1.0,0.0,0.0,0.0
2,778705,52296320,0.0,0.0,40955,Other,2,7.68682,0.0,0.0,0.0,0.0,0.0,1.0
3,1629671,2036345030,0.0,160.0,37481,Seizure,51,7.619243,1.0,0.0,0.0,0.0,0.0,0.0
4,2061593,320962633,1450.0,1450.0,23828,Other,1,7.802343,0.0,0.0,0.0,0.0,0.0,1.0


 


Unnamed: 0,eeg_id,spectrogram_id,min,max,patient_id,target,total_votes,entropy,seizure_vote,lpd_vote,gpd_vote,lrda_vote,grda_vote,other_vote
0,568657,789577333,0.0,16.0,20654,Other,48,3.341757,0.0,0.0,0.25,0.0,0.166667,0.583333
1,582999,1552638400,0.0,38.0,20230,LPD,154,3.550549,0.0,0.857143,0.0,0.071429,0.0,0.071429
2,1895581,128369999,1138.0,1138.0,47999,Other,13,3.565051,0.076923,0.0,0.0,0.0,0.076923,0.846154
3,2482631,978166025,1902.0,1944.0,20606,Other,105,1.431066,0.0,0.0,0.133333,0.066667,0.133333,0.666667
4,2521897,673742515,0.0,4.0,62117,Other,24,1.516203,0.0,0.0,0.083333,0.083333,0.333333,0.5


In [4]:
# Use only half data for fast debugging
# train_easy = train_easy[:len(train_easy)//2]
# train_hard = train_hard[:len(train_hard)//2]

hms_predictor.train_folds(train_easy, train_hard, all_specs, all_eegs)

Fold: 0 First Training


Train:   0%|          | 0/599 [00:00<?, ?batch/s]

Epoch: [1][0/599]Elapsed 1.02s | Loss: 0.8618 Grad: 68081.0781 LR: 4.0000e-06
Epoch: [1][50/599]Elapsed 4.47s | Loss: 0.8274 Grad: 83081.7344 LR: 5.1479e-06
Epoch: [1][100/599]Elapsed 7.88s | Loss: 0.8203 Grad: 68477.1484 LR: 8.5368e-06
Epoch: [1][150/599]Elapsed 11.29s | Loss: 0.8157 Grad: 56965.2031 LR: 1.4005e-05
Epoch: [1][200/599]Elapsed 14.70s | Loss: 0.8114 Grad: 75494.9922 LR: 2.1290e-05
Epoch: [1][250/599]Elapsed 18.11s | Loss: 0.8021 Grad: 75641.4844 LR: 3.0044e-05
Epoch: [1][300/599]Elapsed 21.53s | Loss: 0.7888 Grad: 79959.8438 LR: 3.9848e-05
Epoch: [1][350/599]Elapsed 24.95s | Loss: 0.7756 Grad: 67523.0312 LR: 5.0233e-05
Epoch: [1][400/599]Elapsed 28.37s | Loss: 0.7593 Grad: 73504.3984 LR: 6.0703e-05
Epoch: [1][450/599]Elapsed 31.80s | Loss: 0.7431 Grad: 112829.3672 LR: 7.0757e-05
Epoch: [1][500/599]Elapsed 35.23s | Loss: 0.7288 Grad: 98570.4531 LR: 7.9913e-05
Epoch: [1][550/599]Elapsed 38.65s | Loss: 0.7125 Grad: 114198.7969 LR: 8.7735e-05
Epoch: [1][598/599]Elapsed 41.83

Valid:   0%|          | 0/150 [00:00<?, ?batch/s]

Epoch: [1][0/150]Elapsed 0.08s | Loss: 0.6532
Epoch: [1][50/150]Elapsed 2.54s | Loss: 0.5358
Epoch: [1][100/150]Elapsed 4.99s | Loss: 0.5427


----------------------------------------------------------------------------------------------------
Epoch 1 - Average Train Loss: 0.6982 | Average Valid Loss: 0.5392 | Time: 49.38s
Best model found in epoch 1 | valid loss: 0.5392


Train:   0%|          | 0/599 [00:00<?, ?batch/s]

Epoch: [2][0/599]Elapsed 0.07s | Loss: 0.5243 Grad: 146826.5469 LR: 9.3639e-05
Epoch: [2][50/599]Elapsed 3.49s | Loss: 0.4806 Grad: 138272.5625 LR: 9.7834e-05
Epoch: [2][100/599]Elapsed 6.98s | Loss: 0.4634 Grad: 126305.8594 LR: 9.9837e-05
Epoch: [2][150/599]Elapsed 10.44s | Loss: 0.4673 Grad: 185150.3281 LR: 9.9994e-05
Epoch: [2][200/599]Elapsed 13.88s | Loss: 0.4628 Grad: 56984.5352 LR: 9.9961e-05
Epoch: [2][250/599]Elapsed 17.26s | Loss: 0.4543 Grad: 53325.8906 LR: 9.9899e-05
Epoch: [2][300/599]Elapsed 20.65s | Loss: 0.4503 Grad: 68864.8438 LR: 9.9807e-05
Epoch: [2][350/599]Elapsed 24.15s | Loss: 0.4471 Grad: 62995.4961 LR: 9.9685e-05
Epoch: [2][400/599]Elapsed 27.59s | Loss: 0.4410 Grad: 47953.1484 LR: 9.9535e-05
Epoch: [2][450/599]Elapsed 30.97s | Loss: 0.4368 Grad: 79791.7578 LR: 9.9355e-05
Epoch: [2][500/599]Elapsed 34.33s | Loss: 0.4336 Grad: 81917.0781 LR: 9.9146e-05
Epoch: [2][550/599]Elapsed 37.68s | Loss: 0.4297 Grad: 85611.5234 LR: 9.8908e-05
Epoch: [2][598/599]Elapsed 40.

Valid:   0%|          | 0/150 [00:00<?, ?batch/s]

Epoch: [2][0/150]Elapsed 0.05s | Loss: 0.6106
Epoch: [2][50/150]Elapsed 2.52s | Loss: 0.4916
Epoch: [2][100/150]Elapsed 5.00s | Loss: 0.4988


----------------------------------------------------------------------------------------------------
Epoch 2 - Average Train Loss: 0.4265 | Average Valid Loss: 0.4991 | Time: 48.48s
Best model found in epoch 2 | valid loss: 0.4991


Train:   0%|          | 0/599 [00:00<?, ?batch/s]

Epoch: [3][0/599]Elapsed 0.07s | Loss: 0.3234 Grad: 121026.1719 LR: 9.8653e-05
Epoch: [3][50/599]Elapsed 3.57s | Loss: 0.3398 Grad: 138360.7188 LR: 9.8359e-05
Epoch: [3][100/599]Elapsed 7.01s | Loss: 0.3335 Grad: 100027.5625 LR: 9.8036e-05
Epoch: [3][150/599]Elapsed 10.45s | Loss: 0.3473 Grad: 67275.5156 LR: 9.7685e-05
Epoch: [3][200/599]Elapsed 13.82s | Loss: 0.3465 Grad: 58645.1953 LR: 9.7306e-05
Epoch: [3][250/599]Elapsed 17.28s | Loss: 0.3430 Grad: 65104.1406 LR: 9.6899e-05
Epoch: [3][300/599]Elapsed 20.75s | Loss: 0.3460 Grad: 49312.0312 LR: 9.6464e-05
Epoch: [3][350/599]Elapsed 24.19s | Loss: 0.3465 Grad: 77126.7578 LR: 9.6002e-05
Epoch: [3][400/599]Elapsed 27.68s | Loss: 0.3437 Grad: 89757.5156 LR: 9.5513e-05
Epoch: [3][450/599]Elapsed 31.07s | Loss: 0.3403 Grad: 76999.4375 LR: 9.4997e-05
Epoch: [3][500/599]Elapsed 34.46s | Loss: 0.3401 Grad: 92279.7500 LR: 9.4455e-05
Epoch: [3][550/599]Elapsed 37.91s | Loss: 0.3375 Grad: 71634.4062 LR: 9.3886e-05
Epoch: [3][598/599]Elapsed 41.2

Valid:   0%|          | 0/150 [00:00<?, ?batch/s]

Epoch: [3][0/150]Elapsed 0.05s | Loss: 0.6137
Epoch: [3][50/150]Elapsed 2.53s | Loss: 0.4912
Epoch: [3][100/150]Elapsed 5.01s | Loss: 0.4977


----------------------------------------------------------------------------------------------------
Epoch 3 - Average Train Loss: 0.3359 | Average Valid Loss: 0.5008 | Time: 48.82s


Train:   0%|          | 0/599 [00:00<?, ?batch/s]

Epoch: [4][0/599]Elapsed 0.06s | Loss: 0.2675 Grad: 130877.3438 LR: 9.3316e-05
Epoch: [4][50/599]Elapsed 3.53s | Loss: 0.2866 Grad: 82683.9531 LR: 9.2697e-05
Epoch: [4][100/599]Elapsed 7.01s | Loss: 0.2799 Grad: 54261.9688 LR: 9.2053e-05
Epoch: [4][150/599]Elapsed 10.49s | Loss: 0.2907 Grad: 45255.4375 LR: 9.1384e-05
Epoch: [4][200/599]Elapsed 13.97s | Loss: 0.2870 Grad: 52986.6875 LR: 9.0691e-05
Epoch: [4][250/599]Elapsed 17.45s | Loss: 0.2865 Grad: 73751.8516 LR: 8.9973e-05
Epoch: [4][300/599]Elapsed 20.96s | Loss: 0.2902 Grad: 60935.7109 LR: 8.9233e-05
Epoch: [4][350/599]Elapsed 24.45s | Loss: 0.2890 Grad: 69817.7969 LR: 8.8469e-05
Epoch: [4][400/599]Elapsed 28.02s | Loss: 0.2871 Grad: 62477.0391 LR: 8.7682e-05
Epoch: [4][450/599]Elapsed 31.46s | Loss: 0.2855 Grad: 102706.7344 LR: 8.6873e-05
Epoch: [4][500/599]Elapsed 34.99s | Loss: 0.2862 Grad: 75845.0703 LR: 8.6043e-05
Epoch: [4][550/599]Elapsed 38.43s | Loss: 0.2849 Grad: 69711.2109 LR: 8.5191e-05
Epoch: [4][598/599]Elapsed 41.76

Valid:   0%|          | 0/150 [00:00<?, ?batch/s]

Epoch: [4][0/150]Elapsed 0.06s | Loss: 0.5910
Epoch: [4][50/150]Elapsed 2.56s | Loss: 0.4945
Epoch: [4][100/150]Elapsed 5.04s | Loss: 0.4949


----------------------------------------------------------------------------------------------------
Epoch 4 - Average Train Loss: 0.2843 | Average Valid Loss: 0.5016 | Time: 49.37s


Train:   0%|          | 0/599 [00:00<?, ?batch/s]

Epoch: [5][0/599]Elapsed 0.08s | Loss: 0.2353 Grad: 141100.5000 LR: 8.4354e-05
Epoch: [5][50/599]Elapsed 3.52s | Loss: 0.2393 Grad: 159640.2344 LR: 8.3462e-05
Epoch: [5][100/599]Elapsed 6.98s | Loss: 0.2323 Grad: 57370.4297 LR: 8.2550e-05
Epoch: [5][150/599]Elapsed 10.48s | Loss: 0.2423 Grad: 111005.8125 LR: 8.1619e-05
Epoch: [5][200/599]Elapsed 14.02s | Loss: 0.2431 Grad: 94556.4531 LR: 8.0670e-05
Epoch: [5][250/599]Elapsed 17.49s | Loss: 0.2426 Grad: 54562.3359 LR: 7.9702e-05
Epoch: [5][300/599]Elapsed 21.03s | Loss: 0.2464 Grad: 61739.9023 LR: 7.8717e-05
Epoch: [5][350/599]Elapsed 24.52s | Loss: 0.2465 Grad: 92217.5547 LR: 7.7715e-05
Epoch: [5][400/599]Elapsed 28.08s | Loss: 0.2452 Grad: 29766.9102 LR: 7.6697e-05
Epoch: [5][450/599]Elapsed 31.58s | Loss: 0.2436 Grad: 99118.2188 LR: 7.5663e-05
Epoch: [5][500/599]Elapsed 35.07s | Loss: 0.2443 Grad: 106608.8359 LR: 7.4614e-05
Epoch: [5][550/599]Elapsed 38.57s | Loss: 0.2433 Grad: 80476.6641 LR: 7.3550e-05
Epoch: [5][598/599]Elapsed 41.

Valid:   0%|          | 0/150 [00:00<?, ?batch/s]

Epoch: [5][0/150]Elapsed 0.05s | Loss: 0.6607
Epoch: [5][50/150]Elapsed 2.54s | Loss: 0.5248
Epoch: [5][100/150]Elapsed 5.02s | Loss: 0.5255


----------------------------------------------------------------------------------------------------
Epoch 5 - Average Train Loss: 0.2426 | Average Valid Loss: 0.5290 | Time: 49.51s


Train:   0%|          | 0/599 [00:00<?, ?batch/s]

Epoch: [6][0/599]Elapsed 0.07s | Loss: 0.2042 Grad: 159138.3125 LR: 7.2516e-05
Epoch: [6][50/599]Elapsed 3.54s | Loss: 0.2069 Grad: 86808.1406 LR: 7.1426e-05
Epoch: [6][100/599]Elapsed 6.91s | Loss: 0.2034 Grad: 67464.9219 LR: 7.0323e-05
Epoch: [6][150/599]Elapsed 10.38s | Loss: 0.2103 Grad: 60794.1602 LR: 6.9208e-05
Epoch: [6][200/599]Elapsed 13.85s | Loss: 0.2092 Grad: 83211.8828 LR: 6.8082e-05
Epoch: [6][250/599]Elapsed 17.38s | Loss: 0.2097 Grad: 30880.3730 LR: 6.6945e-05
Epoch: [6][300/599]Elapsed 20.89s | Loss: 0.2128 Grad: 35804.3750 LR: 6.5799e-05
Epoch: [6][350/599]Elapsed 24.35s | Loss: 0.2141 Grad: 27790.9082 LR: 6.4642e-05
Epoch: [6][400/599]Elapsed 27.88s | Loss: 0.2121 Grad: 37047.2734 LR: 6.3478e-05
Epoch: [6][450/599]Elapsed 31.39s | Loss: 0.2114 Grad: 41524.5859 LR: 6.2305e-05
Epoch: [6][500/599]Elapsed 34.90s | Loss: 0.2123 Grad: 56474.6250 LR: 6.1125e-05
Epoch: [6][550/599]Elapsed 38.45s | Loss: 0.2116 Grad: 53606.8672 LR: 5.9939e-05
Epoch: [6][598/599]Elapsed 41.79s

Valid:   0%|          | 0/150 [00:00<?, ?batch/s]

Epoch: [6][0/150]Elapsed 0.06s | Loss: 0.6423
Epoch: [6][50/150]Elapsed 2.55s | Loss: 0.5419
Epoch: [6][100/150]Elapsed 5.03s | Loss: 0.5458


----------------------------------------------------------------------------------------------------
Epoch 6 - Average Train Loss: 0.2117 | Average Valid Loss: 0.5491 | Time: 49.39s
Fold: 1 First Training


Train:   0%|          | 0/599 [00:00<?, ?batch/s]

Epoch: [1][0/599]Elapsed 0.06s | Loss: 0.8076 Grad: 70652.9688 LR: 4.0000e-06
Epoch: [1][50/599]Elapsed 3.53s | Loss: 0.8248 Grad: 75112.7500 LR: 5.1479e-06
Epoch: [1][100/599]Elapsed 7.00s | Loss: 0.8139 Grad: 72554.6953 LR: 8.5368e-06
Epoch: [1][150/599]Elapsed 10.48s | Loss: 0.8099 Grad: 63287.0273 LR: 1.4005e-05
Epoch: [1][200/599]Elapsed 13.94s | Loss: 0.8056 Grad: 73193.9766 LR: 2.1290e-05
Epoch: [1][250/599]Elapsed 17.44s | Loss: 0.7975 Grad: 73414.6094 LR: 3.0044e-05
Epoch: [1][300/599]Elapsed 20.93s | Loss: 0.7839 Grad: 92076.7656 LR: 3.9848e-05
Epoch: [1][350/599]Elapsed 24.43s | Loss: 0.7726 Grad: 134991.5781 LR: 5.0233e-05
Epoch: [1][400/599]Elapsed 27.91s | Loss: 0.7557 Grad: 67293.8203 LR: 6.0703e-05
Epoch: [1][450/599]Elapsed 31.39s | Loss: 0.7379 Grad: 103851.4375 LR: 7.0757e-05
Epoch: [1][500/599]Elapsed 34.89s | Loss: 0.7238 Grad: 85717.9062 LR: 7.9913e-05
Epoch: [1][550/599]Elapsed 38.44s | Loss: 0.7085 Grad: 133785.1719 LR: 8.7735e-05
Epoch: [1][598/599]Elapsed 41.8

Valid:   0%|          | 0/150 [00:00<?, ?batch/s]

Epoch: [1][0/150]Elapsed 0.05s | Loss: 0.4299
Epoch: [1][50/150]Elapsed 2.54s | Loss: 0.5516
Epoch: [1][100/150]Elapsed 5.02s | Loss: 0.5642


----------------------------------------------------------------------------------------------------
Epoch 1 - Average Train Loss: 0.6954 | Average Valid Loss: 0.5698 | Time: 49.39s
Best model found in epoch 1 | valid loss: 0.5698


Train:   0%|          | 0/599 [00:00<?, ?batch/s]

Epoch: [2][0/599]Elapsed 0.08s | Loss: 0.5868 Grad: 160990.1406 LR: 9.3639e-05
Epoch: [2][50/599]Elapsed 3.56s | Loss: 0.5023 Grad: 126407.7031 LR: 9.7834e-05
Epoch: [2][100/599]Elapsed 7.02s | Loss: 0.4808 Grad: 93542.2422 LR: 9.9837e-05
Epoch: [2][150/599]Elapsed 10.53s | Loss: 0.4821 Grad: 152556.3750 LR: 9.9994e-05
Epoch: [2][200/599]Elapsed 13.97s | Loss: 0.4754 Grad: 85359.3516 LR: 9.9961e-05
Epoch: [2][250/599]Elapsed 17.41s | Loss: 0.4668 Grad: 44447.3672 LR: 9.9899e-05
Epoch: [2][300/599]Elapsed 20.82s | Loss: 0.4614 Grad: 60227.3594 LR: 9.9807e-05
Epoch: [2][350/599]Elapsed 24.26s | Loss: 0.4593 Grad: 81388.5078 LR: 9.9685e-05
Epoch: [2][400/599]Elapsed 27.75s | Loss: 0.4509 Grad: 69126.3438 LR: 9.9535e-05
Epoch: [2][450/599]Elapsed 31.24s | Loss: 0.4457 Grad: 90198.4375 LR: 9.9355e-05
Epoch: [2][500/599]Elapsed 34.69s | Loss: 0.4413 Grad: 64338.8438 LR: 9.9146e-05
Epoch: [2][550/599]Elapsed 38.22s | Loss: 0.4365 Grad: 63968.4844 LR: 9.8908e-05
Epoch: [2][598/599]Elapsed 41.6

Valid:   0%|          | 0/150 [00:00<?, ?batch/s]

Epoch: [2][0/150]Elapsed 0.06s | Loss: 0.4403
Epoch: [2][50/150]Elapsed 2.56s | Loss: 0.4454
Epoch: [2][100/150]Elapsed 5.06s | Loss: 0.4512


----------------------------------------------------------------------------------------------------
Epoch 2 - Average Train Loss: 0.4337 | Average Valid Loss: 0.4577 | Time: 49.26s
Best model found in epoch 2 | valid loss: 0.4577


Train:   0%|          | 0/599 [00:00<?, ?batch/s]

Epoch: [3][0/599]Elapsed 0.06s | Loss: 0.4376 Grad: 127078.7500 LR: 9.8653e-05
Epoch: [3][50/599]Elapsed 3.53s | Loss: 0.3519 Grad: 134958.3438 LR: 9.8359e-05
Epoch: [3][100/599]Elapsed 6.97s | Loss: 0.3503 Grad: 32518.5996 LR: 9.8036e-05
Epoch: [3][150/599]Elapsed 10.45s | Loss: 0.3572 Grad: 47053.9844 LR: 9.7685e-05
Epoch: [3][200/599]Elapsed 13.93s | Loss: 0.3530 Grad: 35638.6562 LR: 9.7306e-05
Epoch: [3][250/599]Elapsed 17.43s | Loss: 0.3523 Grad: 28605.8223 LR: 9.6899e-05
Epoch: [3][300/599]Elapsed 20.93s | Loss: 0.3503 Grad: 36526.7617 LR: 9.6464e-05
Epoch: [3][350/599]Elapsed 24.42s | Loss: 0.3494 Grad: 35101.5547 LR: 9.6002e-05
Epoch: [3][400/599]Elapsed 27.93s | Loss: 0.3440 Grad: 54850.8906 LR: 9.5513e-05
Epoch: [3][450/599]Elapsed 31.42s | Loss: 0.3411 Grad: 42199.1562 LR: 9.4997e-05
Epoch: [3][500/599]Elapsed 34.92s | Loss: 0.3395 Grad: 29201.4492 LR: 9.4455e-05
Epoch: [3][550/599]Elapsed 38.42s | Loss: 0.3381 Grad: 34244.0977 LR: 9.3886e-05
Epoch: [3][598/599]Elapsed 41.72

Valid:   0%|          | 0/150 [00:00<?, ?batch/s]

Epoch: [3][0/150]Elapsed 0.06s | Loss: 0.4284
Epoch: [3][50/150]Elapsed 2.57s | Loss: 0.4424
Epoch: [3][100/150]Elapsed 5.05s | Loss: 0.4457


----------------------------------------------------------------------------------------------------
Epoch 3 - Average Train Loss: 0.3387 | Average Valid Loss: 0.4489 | Time: 49.35s
Best model found in epoch 3 | valid loss: 0.4489


Train:   0%|          | 0/599 [00:00<?, ?batch/s]

Epoch: [4][0/599]Elapsed 0.06s | Loss: 0.4397 Grad: 156814.9375 LR: 9.3316e-05
Epoch: [4][50/599]Elapsed 3.46s | Loss: 0.2918 Grad: 68905.4453 LR: 9.2697e-05
Epoch: [4][100/599]Elapsed 6.86s | Loss: 0.2937 Grad: 77523.4062 LR: 9.2053e-05
Epoch: [4][150/599]Elapsed 10.40s | Loss: 0.3038 Grad: 83695.4141 LR: 9.1384e-05
Epoch: [4][200/599]Elapsed 13.84s | Loss: 0.3009 Grad: 55061.6914 LR: 9.0691e-05
Epoch: [4][250/599]Elapsed 17.30s | Loss: 0.2987 Grad: 71236.2891 LR: 8.9973e-05
Epoch: [4][300/599]Elapsed 20.76s | Loss: 0.2999 Grad: 68911.8984 LR: 8.9233e-05
Epoch: [4][350/599]Elapsed 24.19s | Loss: 0.2973 Grad: 90787.8672 LR: 8.8469e-05
Epoch: [4][400/599]Elapsed 27.61s | Loss: 0.2934 Grad: 87149.0078 LR: 8.7682e-05
Epoch: [4][450/599]Elapsed 31.07s | Loss: 0.2915 Grad: 86085.3906 LR: 8.6873e-05
Epoch: [4][500/599]Elapsed 34.57s | Loss: 0.2904 Grad: 71843.8750 LR: 8.6043e-05
Epoch: [4][550/599]Elapsed 38.09s | Loss: 0.2893 Grad: 90338.6562 LR: 8.5191e-05
Epoch: [4][598/599]Elapsed 41.45s

Valid:   0%|          | 0/150 [00:00<?, ?batch/s]

Epoch: [4][0/150]Elapsed 0.05s | Loss: 0.4282
Epoch: [4][50/150]Elapsed 2.54s | Loss: 0.4379
Epoch: [4][100/150]Elapsed 5.02s | Loss: 0.4428


----------------------------------------------------------------------------------------------------
Epoch 4 - Average Train Loss: 0.2898 | Average Valid Loss: 0.4493 | Time: 49.05s


Train:   0%|          | 0/599 [00:00<?, ?batch/s]

Epoch: [5][0/599]Elapsed 0.07s | Loss: 0.3555 Grad: 227940.0156 LR: 8.4354e-05
Epoch: [5][50/599]Elapsed 3.59s | Loss: 0.2435 Grad: 48777.3711 LR: 8.3462e-05
Epoch: [5][100/599]Elapsed 7.10s | Loss: 0.2422 Grad: 80779.6797 LR: 8.2550e-05
Epoch: [5][150/599]Elapsed 10.61s | Loss: 0.2496 Grad: 112875.6406 LR: 8.1619e-05
Epoch: [5][200/599]Elapsed 14.10s | Loss: 0.2509 Grad: 83060.9062 LR: 8.0670e-05
Epoch: [5][250/599]Elapsed 17.61s | Loss: 0.2504 Grad: 55814.5820 LR: 7.9702e-05
Epoch: [5][300/599]Elapsed 21.05s | Loss: 0.2512 Grad: 119741.3516 LR: 7.8717e-05
Epoch: [5][350/599]Elapsed 24.49s | Loss: 0.2492 Grad: 98852.1797 LR: 7.7715e-05
Epoch: [5][400/599]Elapsed 27.91s | Loss: 0.2455 Grad: 63338.0977 LR: 7.6697e-05
Epoch: [5][450/599]Elapsed 31.42s | Loss: 0.2435 Grad: 118086.8672 LR: 7.5663e-05
Epoch: [5][500/599]Elapsed 34.86s | Loss: 0.2432 Grad: 76097.7578 LR: 7.4614e-05
Epoch: [5][550/599]Elapsed 38.27s | Loss: 0.2433 Grad: 100825.1250 LR: 7.3550e-05
Epoch: [5][598/599]Elapsed 41

Valid:   0%|          | 0/150 [00:00<?, ?batch/s]

Epoch: [5][0/150]Elapsed 0.06s | Loss: 0.3947
Epoch: [5][50/150]Elapsed 2.55s | Loss: 0.4413
Epoch: [5][100/150]Elapsed 5.03s | Loss: 0.4509


----------------------------------------------------------------------------------------------------
Epoch 5 - Average Train Loss: 0.2436 | Average Valid Loss: 0.4618 | Time: 49.25s


Train:   0%|          | 0/599 [00:00<?, ?batch/s]

Epoch: [6][0/599]Elapsed 0.07s | Loss: 0.3139 Grad: 250170.6250 LR: 7.2516e-05
Epoch: [6][50/599]Elapsed 3.54s | Loss: 0.2159 Grad: 64113.0938 LR: 7.1426e-05
Epoch: [6][100/599]Elapsed 7.08s | Loss: 0.2091 Grad: 59819.3711 LR: 7.0323e-05
Epoch: [6][150/599]Elapsed 10.61s | Loss: 0.2149 Grad: 145976.6250 LR: 6.9208e-05
Epoch: [6][200/599]Elapsed 14.03s | Loss: 0.2143 Grad: 86048.2578 LR: 6.8082e-05
Epoch: [6][250/599]Elapsed 17.49s | Loss: 0.2141 Grad: 63691.8086 LR: 6.6945e-05
Epoch: [6][300/599]Elapsed 21.00s | Loss: 0.2151 Grad: 95061.7656 LR: 6.5799e-05
Epoch: [6][350/599]Elapsed 24.51s | Loss: 0.2132 Grad: 79890.3125 LR: 6.4642e-05
Epoch: [6][400/599]Elapsed 28.01s | Loss: 0.2097 Grad: 108455.6641 LR: 6.3478e-05
Epoch: [6][450/599]Elapsed 31.48s | Loss: 0.2077 Grad: 110815.8125 LR: 6.2305e-05
Epoch: [6][500/599]Elapsed 34.90s | Loss: 0.2071 Grad: 120763.8516 LR: 6.1125e-05
Epoch: [6][550/599]Elapsed 38.36s | Loss: 0.2078 Grad: 85485.8750 LR: 5.9939e-05
Epoch: [6][598/599]Elapsed 41

Valid:   0%|          | 0/150 [00:00<?, ?batch/s]

Epoch: [6][0/150]Elapsed 0.06s | Loss: 0.3611
Epoch: [6][50/150]Elapsed 2.55s | Loss: 0.4511
Epoch: [6][100/150]Elapsed 5.03s | Loss: 0.4640


----------------------------------------------------------------------------------------------------
Epoch 6 - Average Train Loss: 0.2086 | Average Valid Loss: 0.4759 | Time: 49.28s
Fold: 2 First Training


Train:   0%|          | 0/599 [00:00<?, ?batch/s]

Epoch: [1][0/599]Elapsed 0.07s | Loss: 0.8106 Grad: 60681.3125 LR: 4.0000e-06
Epoch: [1][50/599]Elapsed 3.48s | Loss: 0.8284 Grad: 68660.5859 LR: 5.1479e-06
Epoch: [1][100/599]Elapsed 6.92s | Loss: 0.8246 Grad: 64340.4219 LR: 8.5368e-06
Epoch: [1][150/599]Elapsed 10.36s | Loss: 0.8207 Grad: 64689.8516 LR: 1.4005e-05
Epoch: [1][200/599]Elapsed 13.77s | Loss: 0.8167 Grad: 77816.9375 LR: 2.1290e-05
Epoch: [1][250/599]Elapsed 17.24s | Loss: 0.8083 Grad: 75379.7969 LR: 3.0044e-05
Epoch: [1][300/599]Elapsed 20.74s | Loss: 0.7955 Grad: 76514.1328 LR: 3.9848e-05
Epoch: [1][350/599]Elapsed 24.24s | Loss: 0.7813 Grad: 116215.4219 LR: 5.0233e-05
Epoch: [1][400/599]Elapsed 27.80s | Loss: 0.7660 Grad: 77683.7188 LR: 6.0703e-05
Epoch: [1][450/599]Elapsed 31.31s | Loss: 0.7477 Grad: 131603.3906 LR: 7.0757e-05
Epoch: [1][500/599]Elapsed 34.83s | Loss: 0.7316 Grad: 130988.3438 LR: 7.9913e-05
Epoch: [1][550/599]Elapsed 38.38s | Loss: 0.7155 Grad: 103806.3516 LR: 8.7735e-05
Epoch: [1][598/599]Elapsed 41.

Valid:   0%|          | 0/150 [00:00<?, ?batch/s]

Epoch: [1][0/150]Elapsed 0.05s | Loss: 0.3788
Epoch: [1][50/150]Elapsed 2.55s | Loss: 0.5391
Epoch: [1][100/150]Elapsed 5.04s | Loss: 0.5309


----------------------------------------------------------------------------------------------------
Epoch 1 - Average Train Loss: 0.7004 | Average Valid Loss: 0.5305 | Time: 49.28s
Best model found in epoch 1 | valid loss: 0.5305


Train:   0%|          | 0/599 [00:00<?, ?batch/s]

Epoch: [2][0/599]Elapsed 0.06s | Loss: 0.4442 Grad: 163582.1875 LR: 9.3639e-05
Epoch: [2][50/599]Elapsed 3.56s | Loss: 0.4810 Grad: 152650.4844 LR: 9.7834e-05
Epoch: [2][100/599]Elapsed 7.06s | Loss: 0.4575 Grad: 111470.3438 LR: 9.9837e-05
Epoch: [2][150/599]Elapsed 10.56s | Loss: 0.4610 Grad: 182751.6406 LR: 9.9994e-05
Epoch: [2][200/599]Elapsed 14.05s | Loss: 0.4573 Grad: 110787.5469 LR: 9.9961e-05
Epoch: [2][250/599]Elapsed 17.55s | Loss: 0.4495 Grad: 132482.5781 LR: 9.9899e-05
Epoch: [2][300/599]Elapsed 21.04s | Loss: 0.4441 Grad: 63029.2227 LR: 9.9807e-05
Epoch: [2][350/599]Elapsed 24.48s | Loss: 0.4381 Grad: 55457.8789 LR: 9.9685e-05
Epoch: [2][400/599]Elapsed 27.91s | Loss: 0.4319 Grad: 50512.4258 LR: 9.9535e-05
Epoch: [2][450/599]Elapsed 31.34s | Loss: 0.4275 Grad: 57195.8711 LR: 9.9355e-05
Epoch: [2][500/599]Elapsed 34.77s | Loss: 0.4237 Grad: 61807.7344 LR: 9.9146e-05
Epoch: [2][550/599]Elapsed 38.19s | Loss: 0.4208 Grad: 86279.6953 LR: 9.8908e-05
Epoch: [2][598/599]Elapsed 4

Valid:   0%|          | 0/150 [00:00<?, ?batch/s]

Epoch: [2][0/150]Elapsed 0.05s | Loss: 0.2279
Epoch: [2][50/150]Elapsed 2.54s | Loss: 0.4605
Epoch: [2][100/150]Elapsed 5.03s | Loss: 0.4512


----------------------------------------------------------------------------------------------------
Epoch 2 - Average Train Loss: 0.4182 | Average Valid Loss: 0.4495 | Time: 49.20s
Best model found in epoch 2 | valid loss: 0.4495


Train:   0%|          | 0/599 [00:00<?, ?batch/s]

Epoch: [3][0/599]Elapsed 0.06s | Loss: 0.2688 Grad: 121645.2266 LR: 9.8653e-05
Epoch: [3][50/599]Elapsed 3.58s | Loss: 0.3483 Grad: 167528.1719 LR: 9.8359e-05
Epoch: [3][100/599]Elapsed 6.96s | Loss: 0.3360 Grad: 45110.6641 LR: 9.8036e-05
Epoch: [3][150/599]Elapsed 10.41s | Loss: 0.3435 Grad: 86489.2344 LR: 9.7685e-05
Epoch: [3][200/599]Elapsed 13.81s | Loss: 0.3441 Grad: 70575.2812 LR: 9.7306e-05
Epoch: [3][250/599]Elapsed 17.30s | Loss: 0.3372 Grad: 72044.1406 LR: 9.6899e-05
Epoch: [3][300/599]Elapsed 20.72s | Loss: 0.3380 Grad: 68730.0469 LR: 9.6464e-05
Epoch: [3][350/599]Elapsed 24.20s | Loss: 0.3365 Grad: 59787.3477 LR: 9.6002e-05
Epoch: [3][400/599]Elapsed 27.65s | Loss: 0.3333 Grad: 65854.5469 LR: 9.5513e-05
Epoch: [3][450/599]Elapsed 31.04s | Loss: 0.3308 Grad: 81815.0234 LR: 9.4997e-05
Epoch: [3][500/599]Elapsed 34.45s | Loss: 0.3307 Grad: 73043.0156 LR: 9.4455e-05
Epoch: [3][550/599]Elapsed 37.83s | Loss: 0.3298 Grad: 87225.2734 LR: 9.3886e-05
Epoch: [3][598/599]Elapsed 41.10

Valid:   0%|          | 0/150 [00:00<?, ?batch/s]

Epoch: [3][0/150]Elapsed 0.05s | Loss: 0.2450
Epoch: [3][50/150]Elapsed 2.55s | Loss: 0.4569
Epoch: [3][100/150]Elapsed 5.04s | Loss: 0.4519


----------------------------------------------------------------------------------------------------
Epoch 3 - Average Train Loss: 0.3294 | Average Valid Loss: 0.4488 | Time: 48.72s
Best model found in epoch 3 | valid loss: 0.4488


Train:   0%|          | 0/599 [00:00<?, ?batch/s]

Epoch: [4][0/599]Elapsed 0.06s | Loss: 0.2164 Grad: 119300.3594 LR: 9.3316e-05
Epoch: [4][50/599]Elapsed 3.46s | Loss: 0.2866 Grad: 66102.4453 LR: 9.2697e-05
Epoch: [4][100/599]Elapsed 6.84s | Loss: 0.2788 Grad: 36750.4961 LR: 9.2053e-05
Epoch: [4][150/599]Elapsed 10.21s | Loss: 0.2890 Grad: 66751.3047 LR: 9.1384e-05
Epoch: [4][200/599]Elapsed 13.59s | Loss: 0.2889 Grad: 74222.4688 LR: 9.0691e-05
Epoch: [4][250/599]Elapsed 17.05s | Loss: 0.2833 Grad: 65985.4922 LR: 8.9973e-05
Epoch: [4][300/599]Elapsed 20.53s | Loss: 0.2839 Grad: 100056.0312 LR: 8.9233e-05
Epoch: [4][350/599]Elapsed 23.97s | Loss: 0.2815 Grad: 90487.9062 LR: 8.8469e-05
Epoch: [4][400/599]Elapsed 27.39s | Loss: 0.2784 Grad: 82210.8281 LR: 8.7682e-05
Epoch: [4][450/599]Elapsed 30.80s | Loss: 0.2767 Grad: 80104.6719 LR: 8.6873e-05
Epoch: [4][500/599]Elapsed 34.28s | Loss: 0.2772 Grad: 50413.8438 LR: 8.6043e-05
Epoch: [4][550/599]Elapsed 37.75s | Loss: 0.2776 Grad: 96120.5938 LR: 8.5191e-05
Epoch: [4][598/599]Elapsed 41.03

Valid:   0%|          | 0/150 [00:00<?, ?batch/s]

Epoch: [4][0/150]Elapsed 0.05s | Loss: 0.2769
Epoch: [4][50/150]Elapsed 2.55s | Loss: 0.4481
Epoch: [4][100/150]Elapsed 5.04s | Loss: 0.4453


----------------------------------------------------------------------------------------------------
Epoch 4 - Average Train Loss: 0.2774 | Average Valid Loss: 0.4433 | Time: 48.65s
Best model found in epoch 4 | valid loss: 0.4433


Train:   0%|          | 0/599 [00:00<?, ?batch/s]

Epoch: [5][0/599]Elapsed 0.06s | Loss: 0.2308 Grad: 159205.4375 LR: 8.4354e-05
Epoch: [5][50/599]Elapsed 3.48s | Loss: 0.2567 Grad: 106852.8984 LR: 8.3462e-05
Epoch: [5][100/599]Elapsed 6.92s | Loss: 0.2437 Grad: 50855.5234 LR: 8.2550e-05
Epoch: [5][150/599]Elapsed 10.42s | Loss: 0.2498 Grad: 58615.1836 LR: 8.1619e-05
Epoch: [5][200/599]Elapsed 13.91s | Loss: 0.2489 Grad: 108771.4609 LR: 8.0670e-05
Epoch: [5][250/599]Elapsed 17.42s | Loss: 0.2435 Grad: 85446.2500 LR: 7.9702e-05
Epoch: [5][300/599]Elapsed 20.93s | Loss: 0.2438 Grad: 99253.1094 LR: 7.8717e-05
Epoch: [5][350/599]Elapsed 24.44s | Loss: 0.2424 Grad: 50132.5781 LR: 7.7715e-05
Epoch: [5][400/599]Elapsed 27.94s | Loss: 0.2383 Grad: 70708.8594 LR: 7.6697e-05
Epoch: [5][450/599]Elapsed 31.36s | Loss: 0.2370 Grad: 81946.4141 LR: 7.5663e-05
Epoch: [5][500/599]Elapsed 34.83s | Loss: 0.2373 Grad: 84613.0312 LR: 7.4614e-05
Epoch: [5][550/599]Elapsed 38.24s | Loss: 0.2369 Grad: 100215.7812 LR: 7.3550e-05
Epoch: [5][598/599]Elapsed 41.

Valid:   0%|          | 0/150 [00:00<?, ?batch/s]

Epoch: [5][0/150]Elapsed 0.05s | Loss: 0.2540
Epoch: [5][50/150]Elapsed 2.55s | Loss: 0.4517
Epoch: [5][100/150]Elapsed 5.05s | Loss: 0.4477


----------------------------------------------------------------------------------------------------
Epoch 5 - Average Train Loss: 0.2373 | Average Valid Loss: 0.4509 | Time: 49.15s


Train:   0%|          | 0/599 [00:00<?, ?batch/s]

Epoch: [6][0/599]Elapsed 0.06s | Loss: 0.2257 Grad: 181567.3281 LR: 7.2516e-05
Epoch: [6][50/599]Elapsed 3.52s | Loss: 0.2195 Grad: 95615.0547 LR: 7.1426e-05
Epoch: [6][100/599]Elapsed 7.01s | Loss: 0.2054 Grad: 38626.2773 LR: 7.0323e-05
Epoch: [6][150/599]Elapsed 10.43s | Loss: 0.2131 Grad: 76080.6641 LR: 6.9208e-05
Epoch: [6][200/599]Elapsed 13.84s | Loss: 0.2144 Grad: 63116.6289 LR: 6.8082e-05
Epoch: [6][250/599]Elapsed 17.22s | Loss: 0.2098 Grad: 82685.6641 LR: 6.6945e-05
Epoch: [6][300/599]Elapsed 20.65s | Loss: 0.2109 Grad: 81228.8125 LR: 6.5799e-05
Epoch: [6][350/599]Elapsed 24.13s | Loss: 0.2106 Grad: 56149.9766 LR: 6.4642e-05
Epoch: [6][400/599]Elapsed 27.62s | Loss: 0.2077 Grad: 70607.7422 LR: 6.3478e-05
Epoch: [6][450/599]Elapsed 31.11s | Loss: 0.2067 Grad: 106423.0703 LR: 6.2305e-05
Epoch: [6][500/599]Elapsed 34.59s | Loss: 0.2070 Grad: 64966.0430 LR: 6.1125e-05
Epoch: [6][550/599]Elapsed 38.08s | Loss: 0.2064 Grad: 92144.9844 LR: 5.9939e-05
Epoch: [6][598/599]Elapsed 41.43

Valid:   0%|          | 0/150 [00:00<?, ?batch/s]

Epoch: [6][0/150]Elapsed 0.05s | Loss: 0.2612
Epoch: [6][50/150]Elapsed 2.54s | Loss: 0.4652
Epoch: [6][100/150]Elapsed 5.03s | Loss: 0.4659


----------------------------------------------------------------------------------------------------
Epoch 6 - Average Train Loss: 0.2060 | Average Valid Loss: 0.4665 | Time: 49.04s
Fold: 3 First Training


Train:   0%|          | 0/599 [00:00<?, ?batch/s]

Epoch: [1][0/599]Elapsed 0.06s | Loss: 0.8263 Grad: 66292.6562 LR: 4.0000e-06
Epoch: [1][50/599]Elapsed 3.55s | Loss: 0.8301 Grad: 78074.8438 LR: 5.1479e-06
Epoch: [1][100/599]Elapsed 7.03s | Loss: 0.8264 Grad: 69345.1797 LR: 8.5368e-06
Epoch: [1][150/599]Elapsed 10.53s | Loss: 0.8214 Grad: 72371.3906 LR: 1.4005e-05
Epoch: [1][200/599]Elapsed 14.03s | Loss: 0.8159 Grad: 85184.0391 LR: 2.1290e-05
Epoch: [1][250/599]Elapsed 17.48s | Loss: 0.8077 Grad: 80243.4531 LR: 3.0044e-05
Epoch: [1][300/599]Elapsed 20.91s | Loss: 0.7949 Grad: 75729.7031 LR: 3.9848e-05
Epoch: [1][350/599]Elapsed 24.35s | Loss: 0.7821 Grad: 87129.3125 LR: 5.0233e-05
Epoch: [1][400/599]Elapsed 27.79s | Loss: 0.7668 Grad: 77247.2734 LR: 6.0703e-05
Epoch: [1][450/599]Elapsed 31.22s | Loss: 0.7501 Grad: 161374.8438 LR: 7.0757e-05
Epoch: [1][500/599]Elapsed 34.62s | Loss: 0.7365 Grad: 119276.9375 LR: 7.9913e-05
Epoch: [1][550/599]Elapsed 38.02s | Loss: 0.7200 Grad: 126719.9922 LR: 8.7735e-05
Epoch: [1][598/599]Elapsed 41.3

Valid:   0%|          | 0/150 [00:00<?, ?batch/s]

Epoch: [1][0/150]Elapsed 0.05s | Loss: 0.4435
Epoch: [1][50/150]Elapsed 2.54s | Loss: 0.4986
Epoch: [1][100/150]Elapsed 5.04s | Loss: 0.4996


----------------------------------------------------------------------------------------------------
Epoch 1 - Average Train Loss: 0.7059 | Average Valid Loss: 0.4967 | Time: 48.93s
Best model found in epoch 1 | valid loss: 0.4967


Train:   0%|          | 0/599 [00:00<?, ?batch/s]

Epoch: [2][0/599]Elapsed 0.07s | Loss: 0.5397 Grad: 155350.0156 LR: 9.3639e-05
Epoch: [2][50/599]Elapsed 3.55s | Loss: 0.4972 Grad: 156088.1250 LR: 9.7834e-05
Epoch: [2][100/599]Elapsed 7.02s | Loss: 0.4819 Grad: 108863.1641 LR: 9.9837e-05
Epoch: [2][150/599]Elapsed 10.51s | Loss: 0.4827 Grad: 133896.4531 LR: 9.9994e-05
Epoch: [2][200/599]Elapsed 13.93s | Loss: 0.4759 Grad: 118201.1562 LR: 9.9961e-05
Epoch: [2][250/599]Elapsed 17.31s | Loss: 0.4673 Grad: 144261.0156 LR: 9.9899e-05
Epoch: [2][300/599]Elapsed 20.73s | Loss: 0.4622 Grad: 161011.7500 LR: 9.9807e-05
Epoch: [2][350/599]Elapsed 24.15s | Loss: 0.4568 Grad: 147317.9062 LR: 9.9685e-05
Epoch: [2][400/599]Elapsed 27.57s | Loss: 0.4503 Grad: 105023.6016 LR: 9.9535e-05
Epoch: [2][450/599]Elapsed 31.03s | Loss: 0.4436 Grad: 61014.9961 LR: 9.9355e-05
Epoch: [2][500/599]Elapsed 34.53s | Loss: 0.4411 Grad: 80546.2031 LR: 9.9146e-05
Epoch: [2][550/599]Elapsed 38.03s | Loss: 0.4362 Grad: 48713.0977 LR: 9.8908e-05
Epoch: [2][598/599]Elapse

Valid:   0%|          | 0/150 [00:00<?, ?batch/s]

Epoch: [2][0/150]Elapsed 0.05s | Loss: 0.2866
Epoch: [2][50/150]Elapsed 2.54s | Loss: 0.4057
Epoch: [2][100/150]Elapsed 5.04s | Loss: 0.4076


----------------------------------------------------------------------------------------------------
Epoch 2 - Average Train Loss: 0.4326 | Average Valid Loss: 0.4076 | Time: 48.95s
Best model found in epoch 2 | valid loss: 0.4076


Train:   0%|          | 0/599 [00:00<?, ?batch/s]

Epoch: [3][0/599]Elapsed 0.06s | Loss: 0.3827 Grad: 167417.6094 LR: 9.8653e-05
Epoch: [3][50/599]Elapsed 3.50s | Loss: 0.3618 Grad: 75503.3125 LR: 9.8359e-05
Epoch: [3][100/599]Elapsed 6.96s | Loss: 0.3511 Grad: 61321.1680 LR: 9.8036e-05
Epoch: [3][150/599]Elapsed 10.42s | Loss: 0.3615 Grad: 93382.2031 LR: 9.7685e-05
Epoch: [3][200/599]Elapsed 13.87s | Loss: 0.3596 Grad: 61527.7695 LR: 9.7306e-05
Epoch: [3][250/599]Elapsed 17.34s | Loss: 0.3515 Grad: 55234.2812 LR: 9.6899e-05
Epoch: [3][300/599]Elapsed 20.83s | Loss: 0.3520 Grad: 54705.6641 LR: 9.6464e-05
Epoch: [3][350/599]Elapsed 24.32s | Loss: 0.3491 Grad: 58600.8203 LR: 9.6002e-05
Epoch: [3][400/599]Elapsed 27.75s | Loss: 0.3447 Grad: 71742.0000 LR: 9.5513e-05
Epoch: [3][450/599]Elapsed 31.17s | Loss: 0.3430 Grad: 75998.1094 LR: 9.4997e-05
Epoch: [3][500/599]Elapsed 34.59s | Loss: 0.3437 Grad: 115637.4609 LR: 9.4455e-05
Epoch: [3][550/599]Elapsed 38.00s | Loss: 0.3415 Grad: 51960.8359 LR: 9.3886e-05
Epoch: [3][598/599]Elapsed 41.31

Valid:   0%|          | 0/150 [00:00<?, ?batch/s]

Epoch: [3][0/150]Elapsed 0.05s | Loss: 0.2862
Epoch: [3][50/150]Elapsed 2.53s | Loss: 0.3944
Epoch: [3][100/150]Elapsed 5.02s | Loss: 0.3953


----------------------------------------------------------------------------------------------------
Epoch 3 - Average Train Loss: 0.3402 | Average Valid Loss: 0.3965 | Time: 48.91s
Best model found in epoch 3 | valid loss: 0.3965


Train:   0%|          | 0/599 [00:00<?, ?batch/s]

Epoch: [4][0/599]Elapsed 0.06s | Loss: 0.3100 Grad: 166808.4844 LR: 9.3316e-05
Epoch: [4][50/599]Elapsed 3.54s | Loss: 0.3044 Grad: 91465.9609 LR: 9.2697e-05
Epoch: [4][100/599]Elapsed 7.03s | Loss: 0.2993 Grad: 80728.5156 LR: 9.2053e-05
Epoch: [4][150/599]Elapsed 10.47s | Loss: 0.3095 Grad: 116447.3203 LR: 9.1384e-05
Epoch: [4][200/599]Elapsed 13.96s | Loss: 0.3078 Grad: 62806.9258 LR: 9.0691e-05
Epoch: [4][250/599]Elapsed 17.45s | Loss: 0.3007 Grad: 61115.0352 LR: 8.9973e-05
Epoch: [4][300/599]Elapsed 20.93s | Loss: 0.3016 Grad: 78199.1641 LR: 8.9233e-05
Epoch: [4][350/599]Elapsed 24.42s | Loss: 0.2986 Grad: 68446.2578 LR: 8.8469e-05
Epoch: [4][400/599]Elapsed 27.91s | Loss: 0.2935 Grad: 58786.9648 LR: 8.7682e-05
Epoch: [4][450/599]Elapsed 31.40s | Loss: 0.2908 Grad: 139844.0781 LR: 8.6873e-05
Epoch: [4][500/599]Elapsed 34.88s | Loss: 0.2911 Grad: 104197.0078 LR: 8.6043e-05
Epoch: [4][550/599]Elapsed 38.37s | Loss: 0.2898 Grad: 49400.4570 LR: 8.5191e-05
Epoch: [4][598/599]Elapsed 41.

Valid:   0%|          | 0/150 [00:00<?, ?batch/s]

Epoch: [4][0/150]Elapsed 0.06s | Loss: 0.2825
Epoch: [4][50/150]Elapsed 2.55s | Loss: 0.3921
Epoch: [4][100/150]Elapsed 5.03s | Loss: 0.3976


----------------------------------------------------------------------------------------------------
Epoch 4 - Average Train Loss: 0.2890 | Average Valid Loss: 0.3997 | Time: 49.33s


Train:   0%|          | 0/599 [00:00<?, ?batch/s]

Epoch: [5][0/599]Elapsed 0.06s | Loss: 0.2100 Grad: 145404.9375 LR: 8.4354e-05
Epoch: [5][50/599]Elapsed 3.53s | Loss: 0.2490 Grad: 188354.9688 LR: 8.3462e-05
Epoch: [5][100/599]Elapsed 7.02s | Loss: 0.2469 Grad: 66064.1406 LR: 8.2550e-05
Epoch: [5][150/599]Elapsed 10.51s | Loss: 0.2595 Grad: 90349.5078 LR: 8.1619e-05
Epoch: [5][200/599]Elapsed 13.97s | Loss: 0.2591 Grad: 103012.7266 LR: 8.0670e-05
Epoch: [5][250/599]Elapsed 17.41s | Loss: 0.2544 Grad: 87515.1172 LR: 7.9702e-05
Epoch: [5][300/599]Elapsed 20.90s | Loss: 0.2564 Grad: 66430.2656 LR: 7.8717e-05
Epoch: [5][350/599]Elapsed 24.43s | Loss: 0.2552 Grad: 65460.6016 LR: 7.7715e-05
Epoch: [5][400/599]Elapsed 27.91s | Loss: 0.2509 Grad: 49852.5938 LR: 7.6697e-05
Epoch: [5][450/599]Elapsed 31.36s | Loss: 0.2489 Grad: 100691.7500 LR: 7.5663e-05
Epoch: [5][500/599]Elapsed 34.86s | Loss: 0.2499 Grad: 104848.1172 LR: 7.4614e-05
Epoch: [5][550/599]Elapsed 38.39s | Loss: 0.2486 Grad: 32520.7188 LR: 7.3550e-05
Epoch: [5][598/599]Elapsed 41

Valid:   0%|          | 0/150 [00:00<?, ?batch/s]

Epoch: [5][0/150]Elapsed 0.06s | Loss: 0.2659
Epoch: [5][50/150]Elapsed 2.53s | Loss: 0.4028
Epoch: [5][100/150]Elapsed 5.01s | Loss: 0.4060


----------------------------------------------------------------------------------------------------
Epoch 5 - Average Train Loss: 0.2477 | Average Valid Loss: 0.4079 | Time: 49.36s


Train:   0%|          | 0/599 [00:00<?, ?batch/s]

Epoch: [6][0/599]Elapsed 0.06s | Loss: 0.1287 Grad: 197655.0156 LR: 7.2516e-05
Epoch: [6][50/599]Elapsed 3.53s | Loss: 0.2257 Grad: 222054.4844 LR: 7.1426e-05
Epoch: [6][100/599]Elapsed 7.01s | Loss: 0.2190 Grad: 127620.8281 LR: 7.0323e-05
Epoch: [6][150/599]Elapsed 10.50s | Loss: 0.2314 Grad: 97770.2812 LR: 6.9208e-05
Epoch: [6][200/599]Elapsed 14.00s | Loss: 0.2266 Grad: 66730.8438 LR: 6.8082e-05
Epoch: [6][250/599]Elapsed 17.51s | Loss: 0.2192 Grad: 81750.8672 LR: 6.6945e-05
Epoch: [6][300/599]Elapsed 21.04s | Loss: 0.2200 Grad: 66116.5078 LR: 6.5799e-05
Epoch: [6][350/599]Elapsed 24.53s | Loss: 0.2170 Grad: 53800.5312 LR: 6.4642e-05
Epoch: [6][400/599]Elapsed 28.04s | Loss: 0.2136 Grad: 66470.5625 LR: 6.3478e-05
Epoch: [6][450/599]Elapsed 31.52s | Loss: 0.2125 Grad: 84726.6797 LR: 6.2305e-05
Epoch: [6][500/599]Elapsed 34.95s | Loss: 0.2131 Grad: 97710.5703 LR: 6.1125e-05
Epoch: [6][550/599]Elapsed 38.37s | Loss: 0.2121 Grad: 58339.1680 LR: 5.9939e-05
Epoch: [6][598/599]Elapsed 41.7

Valid:   0%|          | 0/150 [00:00<?, ?batch/s]

Epoch: [6][0/150]Elapsed 0.05s | Loss: 0.2668
Epoch: [6][50/150]Elapsed 2.54s | Loss: 0.4041
Epoch: [6][100/150]Elapsed 5.03s | Loss: 0.4106


----------------------------------------------------------------------------------------------------
Epoch 6 - Average Train Loss: 0.2102 | Average Valid Loss: 0.4127 | Time: 49.33s
Fold: 4 First Training


Train:   0%|          | 0/600 [00:00<?, ?batch/s]

Epoch: [1][0/600]Elapsed 0.07s | Loss: 0.8121 Grad: 76840.1562 LR: 4.0000e-06
Epoch: [1][50/600]Elapsed 3.62s | Loss: 0.8180 Grad: 67944.8906 LR: 5.1441e-06
Epoch: [1][100/600]Elapsed 7.16s | Loss: 0.8134 Grad: 70874.6875 LR: 8.5219e-06
Epoch: [1][150/600]Elapsed 10.69s | Loss: 0.8082 Grad: 67467.3125 LR: 1.3972e-05
Epoch: [1][200/600]Elapsed 14.22s | Loss: 0.8033 Grad: 76475.2422 LR: 2.1236e-05
Epoch: [1][250/600]Elapsed 17.75s | Loss: 0.7944 Grad: 80010.2812 LR: 2.9966e-05
Epoch: [1][300/600]Elapsed 21.23s | Loss: 0.7828 Grad: 105986.1094 LR: 3.9746e-05
Epoch: [1][350/600]Elapsed 24.67s | Loss: 0.7695 Grad: 102772.5391 LR: 5.0110e-05
Epoch: [1][400/600]Elapsed 28.09s | Loss: 0.7546 Grad: 106261.5703 LR: 6.0565e-05
Epoch: [1][450/600]Elapsed 31.56s | Loss: 0.7365 Grad: 137843.1562 LR: 7.0611e-05
Epoch: [1][500/600]Elapsed 34.97s | Loss: 0.7233 Grad: 127845.0469 LR: 7.9770e-05
Epoch: [1][550/600]Elapsed 38.39s | Loss: 0.7068 Grad: 75683.8828 LR: 8.7605e-05
Epoch: [1][599/600]Elapsed 41

Valid:   0%|          | 0/150 [00:00<?, ?batch/s]

Epoch: [1][0/150]Elapsed 0.05s | Loss: 0.5084
Epoch: [1][50/150]Elapsed 2.54s | Loss: 0.4823
Epoch: [1][100/150]Elapsed 5.03s | Loss: 0.4947


----------------------------------------------------------------------------------------------------
Epoch 1 - Average Train Loss: 0.6919 | Average Valid Loss: 0.4984 | Time: 49.43s
Best model found in epoch 1 | valid loss: 0.4984


Train:   0%|          | 0/600 [00:00<?, ?batch/s]

Epoch: [2][0/600]Elapsed 0.07s | Loss: 0.5212 Grad: 90270.5312 LR: 9.3743e-05
Epoch: [2][50/600]Elapsed 3.46s | Loss: 0.4896 Grad: 76492.4609 LR: 9.7891e-05
Epoch: [2][100/600]Elapsed 6.91s | Loss: 0.4775 Grad: 115666.0078 LR: 9.9851e-05
Epoch: [2][150/600]Elapsed 10.39s | Loss: 0.4783 Grad: 148585.4531 LR: 9.9994e-05
Epoch: [2][200/600]Elapsed 13.88s | Loss: 0.4752 Grad: 69747.9062 LR: 9.9961e-05
Epoch: [2][250/600]Elapsed 17.34s | Loss: 0.4666 Grad: 68742.0625 LR: 9.9898e-05
Epoch: [2][300/600]Elapsed 20.79s | Loss: 0.4596 Grad: 57098.4648 LR: 9.9806e-05
Epoch: [2][350/600]Elapsed 24.31s | Loss: 0.4554 Grad: 98235.8203 LR: 9.9684e-05
Epoch: [2][400/600]Elapsed 27.81s | Loss: 0.4489 Grad: 61206.2812 LR: 9.9534e-05
Epoch: [2][450/600]Elapsed 31.33s | Loss: 0.4415 Grad: 77380.2578 LR: 9.9354e-05
Epoch: [2][500/600]Elapsed 34.85s | Loss: 0.4379 Grad: 72071.1406 LR: 9.9145e-05
Epoch: [2][550/600]Elapsed 38.37s | Loss: 0.4339 Grad: 51256.4023 LR: 9.8908e-05
Epoch: [2][599/600]Elapsed 41.82

Valid:   0%|          | 0/150 [00:00<?, ?batch/s]

Epoch: [2][0/150]Elapsed 0.05s | Loss: 0.3639
Epoch: [2][50/150]Elapsed 2.54s | Loss: 0.3666
Epoch: [2][100/150]Elapsed 5.03s | Loss: 0.3792


----------------------------------------------------------------------------------------------------
Epoch 2 - Average Train Loss: 0.4302 | Average Valid Loss: 0.3863 | Time: 49.43s
Best model found in epoch 2 | valid loss: 0.3863


Train:   0%|          | 0/600 [00:00<?, ?batch/s]

Epoch: [3][0/600]Elapsed 0.06s | Loss: 0.4192 Grad: 117673.2188 LR: 9.8642e-05
Epoch: [3][50/600]Elapsed 3.53s | Loss: 0.3610 Grad: 96486.0938 LR: 9.8347e-05
Epoch: [3][100/600]Elapsed 7.00s | Loss: 0.3566 Grad: 110302.7266 LR: 9.8024e-05
Epoch: [3][150/600]Elapsed 10.42s | Loss: 0.3606 Grad: 137024.2188 LR: 9.7672e-05
Epoch: [3][200/600]Elapsed 13.92s | Loss: 0.3605 Grad: 88129.9297 LR: 9.7293e-05
Epoch: [3][250/600]Elapsed 17.37s | Loss: 0.3562 Grad: 67358.4688 LR: 9.6886e-05
Epoch: [3][300/600]Elapsed 20.81s | Loss: 0.3537 Grad: 58624.5195 LR: 9.6451e-05
Epoch: [3][350/600]Elapsed 24.25s | Loss: 0.3525 Grad: 100250.5000 LR: 9.5989e-05
Epoch: [3][400/600]Elapsed 27.69s | Loss: 0.3487 Grad: 67715.4453 LR: 9.5500e-05
Epoch: [3][450/600]Elapsed 31.18s | Loss: 0.3444 Grad: 93299.8203 LR: 9.4984e-05
Epoch: [3][500/600]Elapsed 34.67s | Loss: 0.3435 Grad: 74751.6406 LR: 9.4442e-05
Epoch: [3][550/600]Elapsed 38.16s | Loss: 0.3413 Grad: 59283.6289 LR: 9.3874e-05
Epoch: [3][599/600]Elapsed 41.

Valid:   0%|          | 0/150 [00:00<?, ?batch/s]

Epoch: [3][0/150]Elapsed 0.05s | Loss: 0.3864
Epoch: [3][50/150]Elapsed 2.54s | Loss: 0.3664
Epoch: [3][100/150]Elapsed 5.03s | Loss: 0.3766


----------------------------------------------------------------------------------------------------
Epoch 3 - Average Train Loss: 0.3404 | Average Valid Loss: 0.3842 | Time: 49.17s
Best model found in epoch 3 | valid loss: 0.3842


Train:   0%|          | 0/600 [00:00<?, ?batch/s]

Epoch: [4][0/600]Elapsed 0.06s | Loss: 0.3559 Grad: 138487.4531 LR: 9.3280e-05
Epoch: [4][50/600]Elapsed 3.45s | Loss: 0.2971 Grad: 99315.3516 LR: 9.2660e-05
Epoch: [4][100/600]Elapsed 6.87s | Loss: 0.3061 Grad: 123217.4453 LR: 9.2016e-05
Epoch: [4][150/600]Elapsed 10.37s | Loss: 0.3086 Grad: 134134.3594 LR: 9.1347e-05
Epoch: [4][200/600]Elapsed 13.86s | Loss: 0.3070 Grad: 92682.7109 LR: 9.0653e-05
Epoch: [4][250/600]Elapsed 17.26s | Loss: 0.3047 Grad: 70472.2031 LR: 8.9936e-05
Epoch: [4][300/600]Elapsed 20.72s | Loss: 0.3024 Grad: 69025.9297 LR: 8.9195e-05
Epoch: [4][350/600]Elapsed 24.22s | Loss: 0.3000 Grad: 130366.5078 LR: 8.8431e-05
Epoch: [4][400/600]Elapsed 27.74s | Loss: 0.2962 Grad: 98738.5312 LR: 8.7645e-05
Epoch: [4][450/600]Elapsed 31.23s | Loss: 0.2929 Grad: 96190.5234 LR: 8.6836e-05
Epoch: [4][500/600]Elapsed 34.68s | Loss: 0.2933 Grad: 68916.1094 LR: 8.6006e-05
Epoch: [4][550/600]Elapsed 38.15s | Loss: 0.2907 Grad: 59371.0234 LR: 8.5155e-05
Epoch: [4][599/600]Elapsed 41.

Valid:   0%|          | 0/150 [00:00<?, ?batch/s]

Epoch: [4][0/150]Elapsed 0.05s | Loss: 0.4068
Epoch: [4][50/150]Elapsed 2.54s | Loss: 0.3774
Epoch: [4][100/150]Elapsed 5.04s | Loss: 0.3862


----------------------------------------------------------------------------------------------------
Epoch 4 - Average Train Loss: 0.2905 | Average Valid Loss: 0.3917 | Time: 49.17s


Train:   0%|          | 0/600 [00:00<?, ?batch/s]

Epoch: [5][0/600]Elapsed 0.06s | Loss: 0.3279 Grad: 136104.3594 LR: 8.4283e-05
Epoch: [5][50/600]Elapsed 3.47s | Loss: 0.2600 Grad: 115267.7031 LR: 8.3391e-05
Epoch: [5][100/600]Elapsed 6.87s | Loss: 0.2565 Grad: 71099.7422 LR: 8.2479e-05
Epoch: [5][150/600]Elapsed 10.33s | Loss: 0.2629 Grad: 116346.5703 LR: 8.1549e-05
Epoch: [5][200/600]Elapsed 13.82s | Loss: 0.2602 Grad: 99342.7344 LR: 8.0599e-05
Epoch: [5][250/600]Elapsed 17.31s | Loss: 0.2608 Grad: 72624.2656 LR: 7.9632e-05
Epoch: [5][300/600]Elapsed 20.81s | Loss: 0.2607 Grad: 65400.6055 LR: 7.8648e-05
Epoch: [5][350/600]Elapsed 24.24s | Loss: 0.2594 Grad: 107388.6016 LR: 7.7646e-05
Epoch: [5][400/600]Elapsed 27.66s | Loss: 0.2558 Grad: 82005.1953 LR: 7.6629e-05
Epoch: [5][450/600]Elapsed 31.12s | Loss: 0.2507 Grad: 53453.7461 LR: 7.5595e-05
Epoch: [5][500/600]Elapsed 34.54s | Loss: 0.2507 Grad: 57025.0781 LR: 7.4547e-05
Epoch: [5][550/600]Elapsed 37.96s | Loss: 0.2492 Grad: 60398.5195 LR: 7.3484e-05
Epoch: [5][599/600]Elapsed 41.

Valid:   0%|          | 0/150 [00:00<?, ?batch/s]

Epoch: [5][0/150]Elapsed 0.05s | Loss: 0.3293
Epoch: [5][50/150]Elapsed 2.55s | Loss: 0.3626
Epoch: [5][100/150]Elapsed 5.03s | Loss: 0.3708


----------------------------------------------------------------------------------------------------
Epoch 5 - Average Train Loss: 0.2489 | Average Valid Loss: 0.3800 | Time: 48.92s
Best model found in epoch 5 | valid loss: 0.3800


Train:   0%|          | 0/600 [00:00<?, ?batch/s]

Epoch: [6][0/600]Elapsed 0.06s | Loss: 0.3538 Grad: 189309.0000 LR: 7.2408e-05
Epoch: [6][50/600]Elapsed 3.60s | Loss: 0.2225 Grad: 65321.4961 LR: 7.1318e-05
Epoch: [6][100/600]Elapsed 7.08s | Loss: 0.2207 Grad: 71258.9844 LR: 7.0216e-05
Epoch: [6][150/600]Elapsed 10.54s | Loss: 0.2255 Grad: 66719.6016 LR: 6.9102e-05
Epoch: [6][200/600]Elapsed 13.94s | Loss: 0.2269 Grad: 145121.7812 LR: 6.7976e-05
Epoch: [6][250/600]Elapsed 17.45s | Loss: 0.2258 Grad: 56446.4453 LR: 6.6841e-05
Epoch: [6][300/600]Elapsed 20.96s | Loss: 0.2247 Grad: 97016.5078 LR: 6.5695e-05
Epoch: [6][350/600]Elapsed 24.47s | Loss: 0.2220 Grad: 119819.5156 LR: 6.4540e-05
Epoch: [6][400/600]Elapsed 27.93s | Loss: 0.2200 Grad: 116030.3203 LR: 6.3377e-05
Epoch: [6][450/600]Elapsed 31.34s | Loss: 0.2180 Grad: 109662.4375 LR: 6.2205e-05
Epoch: [6][500/600]Elapsed 34.74s | Loss: 0.2175 Grad: 47371.6523 LR: 6.1027e-05
Epoch: [6][550/600]Elapsed 38.13s | Loss: 0.2166 Grad: 41076.7891 LR: 5.9842e-05
Epoch: [6][599/600]Elapsed 41

Valid:   0%|          | 0/150 [00:00<?, ?batch/s]

Epoch: [6][0/150]Elapsed 0.05s | Loss: 0.3583
Epoch: [6][50/150]Elapsed 2.55s | Loss: 0.3695
Epoch: [6][100/150]Elapsed 5.05s | Loss: 0.3793


----------------------------------------------------------------------------------------------------
Epoch 6 - Average Train Loss: 0.2161 | Average Valid Loss: 0.3915 | Time: 49.09s
CV Result (Stage=1): 0.9182993253742385 (torch) | 0.9182993255320206 (kaggle)
Elapse: 24.63 min 
Fold: 0 Second training


Train:   0%|          | 0/254 [00:00<?, ?batch/s]

Epoch: [1][0/254]Elapsed 0.07s | Loss: 0.4125 Grad: 187878.8281 LR: 4.0000e-06
Epoch: [1][50/254]Elapsed 3.62s | Loss: 0.4349 Grad: 137255.8750 LR: 1.0315e-05
Epoch: [1][100/254]Elapsed 7.13s | Loss: 0.4153 Grad: 93811.8828 LR: 2.7599e-05
Epoch: [1][150/254]Elapsed 10.67s | Loss: 0.3933 Grad: 76873.0000 LR: 5.1303e-05
Epoch: [1][200/254]Elapsed 14.14s | Loss: 0.3667 Grad: 50817.1328 LR: 7.5190e-05
Epoch: [1][250/254]Elapsed 17.60s | Loss: 0.3466 Grad: 47387.6797 LR: 9.2976e-05
Epoch: [1][253/254]Elapsed 17.84s | Loss: 0.3457 Grad: 85293.1797 LR: 9.3978e-05


Valid:   0%|          | 0/64 [00:00<?, ?batch/s]

Epoch: [1][0/64]Elapsed 0.06s | Loss: 0.2502
Epoch: [1][50/64]Elapsed 2.57s | Loss: 0.2612


----------------------------------------------------------------------------------------------------
Epoch 1 - Average Train Loss: 0.3457 | Average Valid Loss: 0.2559 | Time: 21.22s
Best model found in epoch 1 | valid loss: 0.2559


Train:   0%|          | 0/254 [00:00<?, ?batch/s]

Epoch: [2][0/254]Elapsed 0.07s | Loss: 0.2388 Grad: 91275.3906 LR: 9.3978e-05
Epoch: [2][50/254]Elapsed 3.59s | Loss: 0.2325 Grad: 83043.2656 LR: 1.0000e-04
Epoch: [2][100/254]Elapsed 7.13s | Loss: 0.2333 Grad: 86987.0078 LR: 9.9914e-05
Epoch: [2][150/254]Elapsed 10.63s | Loss: 0.2322 Grad: 81805.2969 LR: 9.9665e-05
Epoch: [2][200/254]Elapsed 14.11s | Loss: 0.2275 Grad: 56796.7070 LR: 9.9253e-05
Epoch: [2][250/254]Elapsed 17.64s | Loss: 0.2235 Grad: 63340.8047 LR: 9.8679e-05
Epoch: [2][253/254]Elapsed 17.87s | Loss: 0.2234 Grad: 117359.3750 LR: 9.8626e-05


Valid:   0%|          | 0/64 [00:00<?, ?batch/s]

Epoch: [2][0/64]Elapsed 0.06s | Loss: 0.2303
Epoch: [2][50/64]Elapsed 2.56s | Loss: 0.2291


----------------------------------------------------------------------------------------------------
Epoch 2 - Average Train Loss: 0.2234 | Average Valid Loss: 0.2266 | Time: 21.22s
Best model found in epoch 2 | valid loss: 0.2266


Train:   0%|          | 0/254 [00:00<?, ?batch/s]

Epoch: [3][0/254]Elapsed 0.07s | Loss: 0.1930 Grad: 71390.8906 LR: 9.8626e-05
Epoch: [3][50/254]Elapsed 3.58s | Loss: 0.2055 Grad: 68779.7578 LR: 9.7881e-05
Epoch: [3][100/254]Elapsed 7.10s | Loss: 0.2005 Grad: 67228.8750 LR: 9.6978e-05
Epoch: [3][150/254]Elapsed 10.59s | Loss: 0.2017 Grad: 72190.9453 LR: 9.5922e-05
Epoch: [3][200/254]Elapsed 14.03s | Loss: 0.1988 Grad: 72571.5547 LR: 9.4715e-05
Epoch: [3][250/254]Elapsed 17.56s | Loss: 0.1965 Grad: 64643.5703 LR: 9.3361e-05
Epoch: [3][253/254]Elapsed 17.80s | Loss: 0.1965 Grad: 97214.7109 LR: 9.3247e-05


Valid:   0%|          | 0/64 [00:00<?, ?batch/s]

Epoch: [3][0/64]Elapsed 0.05s | Loss: 0.2138
Epoch: [3][50/64]Elapsed 2.56s | Loss: 0.2230


----------------------------------------------------------------------------------------------------
Epoch 3 - Average Train Loss: 0.1965 | Average Valid Loss: 0.2208 | Time: 21.14s
Best model found in epoch 3 | valid loss: 0.2208


Train:   0%|          | 0/254 [00:00<?, ?batch/s]

Epoch: [4][0/254]Elapsed 0.06s | Loss: 0.1618 Grad: 61156.9727 LR: 9.3247e-05
Epoch: [4][50/254]Elapsed 3.57s | Loss: 0.1860 Grad: 77712.7031 LR: 9.1740e-05
Epoch: [4][100/254]Elapsed 7.10s | Loss: 0.1817 Grad: 65373.9883 LR: 9.0096e-05
Epoch: [4][150/254]Elapsed 10.62s | Loss: 0.1852 Grad: 78474.1562 LR: 8.8322e-05
Epoch: [4][200/254]Elapsed 14.16s | Loss: 0.1833 Grad: 71138.1719 LR: 8.6421e-05
Epoch: [4][250/254]Elapsed 17.63s | Loss: 0.1819 Grad: 75946.9062 LR: 8.4402e-05
Epoch: [4][253/254]Elapsed 17.86s | Loss: 0.1819 Grad: 112702.4609 LR: 8.4235e-05


Valid:   0%|          | 0/64 [00:00<?, ?batch/s]

Epoch: [4][0/64]Elapsed 0.05s | Loss: 0.1987
Epoch: [4][50/64]Elapsed 2.55s | Loss: 0.2204


----------------------------------------------------------------------------------------------------
Epoch 4 - Average Train Loss: 0.1819 | Average Valid Loss: 0.2182 | Time: 21.19s
Best model found in epoch 4 | valid loss: 0.2182


Train:   0%|          | 0/254 [00:00<?, ?batch/s]

Epoch: [5][0/254]Elapsed 0.09s | Loss: 0.1445 Grad: 58185.8945 LR: 8.4235e-05
Epoch: [5][50/254]Elapsed 3.48s | Loss: 0.1710 Grad: 77802.0547 LR: 8.2094e-05
Epoch: [5][100/254]Elapsed 6.85s | Loss: 0.1652 Grad: 59102.7773 LR: 7.9848e-05
Epoch: [5][150/254]Elapsed 10.32s | Loss: 0.1679 Grad: 60205.2070 LR: 7.7504e-05
Epoch: [5][200/254]Elapsed 13.78s | Loss: 0.1679 Grad: 62376.0000 LR: 7.5069e-05
Epoch: [5][250/254]Elapsed 17.26s | Loss: 0.1663 Grad: 68946.1250 LR: 7.2553e-05
Epoch: [5][253/254]Elapsed 17.49s | Loss: 0.1664 Grad: 94462.6484 LR: 7.2349e-05


Valid:   0%|          | 0/64 [00:00<?, ?batch/s]

Epoch: [5][0/64]Elapsed 0.05s | Loss: 0.1982
Epoch: [5][50/64]Elapsed 2.56s | Loss: 0.2213


----------------------------------------------------------------------------------------------------
Epoch 5 - Average Train Loss: 0.1664 | Average Valid Loss: 0.2192 | Time: 20.82s


Train:   0%|          | 0/254 [00:00<?, ?batch/s]

Epoch: [6][0/254]Elapsed 0.06s | Loss: 0.1388 Grad: 57236.6562 LR: 7.2349e-05
Epoch: [6][50/254]Elapsed 3.54s | Loss: 0.1621 Grad: 55268.4648 LR: 6.9753e-05
Epoch: [6][100/254]Elapsed 6.97s | Loss: 0.1572 Grad: 68734.4297 LR: 6.7093e-05
Epoch: [6][150/254]Elapsed 10.44s | Loss: 0.1591 Grad: 75523.9453 LR: 6.4376e-05
Epoch: [6][200/254]Elapsed 13.95s | Loss: 0.1576 Grad: 80075.7500 LR: 6.1613e-05
Epoch: [6][250/254]Elapsed 17.44s | Loss: 0.1555 Grad: 69286.2109 LR: 5.8812e-05
Epoch: [6][253/254]Elapsed 17.67s | Loss: 0.1556 Grad: 102668.0156 LR: 5.8586e-05


Valid:   0%|          | 0/64 [00:00<?, ?batch/s]

Epoch: [6][0/64]Elapsed 0.05s | Loss: 0.1949
Epoch: [6][50/64]Elapsed 2.56s | Loss: 0.2206


----------------------------------------------------------------------------------------------------
Epoch 6 - Average Train Loss: 0.1556 | Average Valid Loss: 0.2184 | Time: 21.00s
Fold: 1 Second training


Train:   0%|          | 0/254 [00:00<?, ?batch/s]

Epoch: [1][0/254]Elapsed 0.06s | Loss: 0.4755 Grad: 228704.0781 LR: 4.0000e-06
Epoch: [1][50/254]Elapsed 3.46s | Loss: 0.4483 Grad: 117208.3281 LR: 1.0315e-05
Epoch: [1][100/254]Elapsed 6.97s | Loss: 0.4230 Grad: 102364.7812 LR: 2.7599e-05
Epoch: [1][150/254]Elapsed 10.54s | Loss: 0.4020 Grad: 87198.1328 LR: 5.1303e-05
Epoch: [1][200/254]Elapsed 14.03s | Loss: 0.3781 Grad: 95982.1406 LR: 7.5190e-05
Epoch: [1][250/254]Elapsed 17.49s | Loss: 0.3576 Grad: 72741.8516 LR: 9.2976e-05
Epoch: [1][253/254]Elapsed 17.72s | Loss: 0.3564 Grad: 80328.8672 LR: 9.3978e-05


Valid:   0%|          | 0/64 [00:00<?, ?batch/s]

Epoch: [1][0/64]Elapsed 0.05s | Loss: 0.2539
Epoch: [1][50/64]Elapsed 2.55s | Loss: 0.2540


----------------------------------------------------------------------------------------------------
Epoch 1 - Average Train Loss: 0.3564 | Average Valid Loss: 0.2533 | Time: 21.04s
Best model found in epoch 1 | valid loss: 0.2533


Train:   0%|          | 0/254 [00:00<?, ?batch/s]

Epoch: [2][0/254]Elapsed 0.06s | Loss: 0.2512 Grad: 120373.0312 LR: 9.3978e-05
Epoch: [2][50/254]Elapsed 3.57s | Loss: 0.2449 Grad: 108423.4609 LR: 1.0000e-04
Epoch: [2][100/254]Elapsed 7.03s | Loss: 0.2368 Grad: 104595.2734 LR: 9.9914e-05
Epoch: [2][150/254]Elapsed 10.49s | Loss: 0.2316 Grad: 119025.2344 LR: 9.9665e-05
Epoch: [2][200/254]Elapsed 13.94s | Loss: 0.2274 Grad: 123996.2031 LR: 9.9253e-05
Epoch: [2][250/254]Elapsed 17.39s | Loss: 0.2228 Grad: 89150.3828 LR: 9.8679e-05
Epoch: [2][253/254]Elapsed 17.62s | Loss: 0.2225 Grad: 122746.8281 LR: 9.8626e-05


Valid:   0%|          | 0/64 [00:00<?, ?batch/s]

Epoch: [2][0/64]Elapsed 0.05s | Loss: 0.2317
Epoch: [2][50/64]Elapsed 2.55s | Loss: 0.2194


----------------------------------------------------------------------------------------------------
Epoch 2 - Average Train Loss: 0.2225 | Average Valid Loss: 0.2190 | Time: 20.94s
Best model found in epoch 2 | valid loss: 0.2190


Train:   0%|          | 0/254 [00:00<?, ?batch/s]

Epoch: [3][0/254]Elapsed 0.06s | Loss: 0.2017 Grad: 96730.0312 LR: 9.8626e-05
Epoch: [3][50/254]Elapsed 3.50s | Loss: 0.2035 Grad: 102228.7109 LR: 9.7881e-05
Epoch: [3][100/254]Elapsed 7.00s | Loss: 0.2024 Grad: 113150.0000 LR: 9.6978e-05
Epoch: [3][150/254]Elapsed 10.50s | Loss: 0.1986 Grad: 71505.0156 LR: 9.5922e-05
Epoch: [3][200/254]Elapsed 13.98s | Loss: 0.1972 Grad: 91801.4844 LR: 9.4715e-05
Epoch: [3][250/254]Elapsed 17.42s | Loss: 0.1950 Grad: 72382.7969 LR: 9.3361e-05
Epoch: [3][253/254]Elapsed 17.65s | Loss: 0.1950 Grad: 117705.6172 LR: 9.3247e-05


Valid:   0%|          | 0/64 [00:00<?, ?batch/s]

Epoch: [3][0/64]Elapsed 0.05s | Loss: 0.2262
Epoch: [3][50/64]Elapsed 2.54s | Loss: 0.2136


----------------------------------------------------------------------------------------------------
Epoch 3 - Average Train Loss: 0.1950 | Average Valid Loss: 0.2134 | Time: 20.97s
Best model found in epoch 3 | valid loss: 0.2134


Train:   0%|          | 0/254 [00:00<?, ?batch/s]

Epoch: [4][0/254]Elapsed 0.06s | Loss: 0.1673 Grad: 88886.5156 LR: 9.3247e-05
Epoch: [4][50/254]Elapsed 3.58s | Loss: 0.1831 Grad: 86996.6562 LR: 9.1740e-05
Epoch: [4][100/254]Elapsed 7.11s | Loss: 0.1821 Grad: 99282.7031 LR: 9.0096e-05
Epoch: [4][150/254]Elapsed 10.59s | Loss: 0.1803 Grad: 85388.4141 LR: 8.8322e-05
Epoch: [4][200/254]Elapsed 14.13s | Loss: 0.1785 Grad: 110139.5469 LR: 8.6421e-05
Epoch: [4][250/254]Elapsed 17.62s | Loss: 0.1766 Grad: 66318.7578 LR: 8.4402e-05
Epoch: [4][253/254]Elapsed 17.86s | Loss: 0.1765 Grad: 114073.0234 LR: 8.4235e-05


Valid:   0%|          | 0/64 [00:00<?, ?batch/s]

Epoch: [4][0/64]Elapsed 0.05s | Loss: 0.2419
Epoch: [4][50/64]Elapsed 2.56s | Loss: 0.2155


----------------------------------------------------------------------------------------------------
Epoch 4 - Average Train Loss: 0.1765 | Average Valid Loss: 0.2154 | Time: 21.19s


Train:   0%|          | 0/254 [00:00<?, ?batch/s]

Epoch: [5][0/254]Elapsed 0.06s | Loss: 0.1615 Grad: 80043.0391 LR: 8.4235e-05
Epoch: [5][50/254]Elapsed 3.52s | Loss: 0.1714 Grad: 89899.2969 LR: 8.2094e-05
Epoch: [5][100/254]Elapsed 7.03s | Loss: 0.1702 Grad: 85649.1797 LR: 7.9848e-05
Epoch: [5][150/254]Elapsed 10.56s | Loss: 0.1683 Grad: 79145.1250 LR: 7.7504e-05
Epoch: [5][200/254]Elapsed 14.09s | Loss: 0.1670 Grad: 129376.5625 LR: 7.5069e-05
Epoch: [5][250/254]Elapsed 17.55s | Loss: 0.1663 Grad: 57462.4180 LR: 7.2553e-05
Epoch: [5][253/254]Elapsed 17.78s | Loss: 0.1660 Grad: 94581.7031 LR: 7.2349e-05


Valid:   0%|          | 0/64 [00:00<?, ?batch/s]

Epoch: [5][0/64]Elapsed 0.05s | Loss: 0.2452
Epoch: [5][50/64]Elapsed 2.55s | Loss: 0.2178


----------------------------------------------------------------------------------------------------
Epoch 5 - Average Train Loss: 0.1660 | Average Valid Loss: 0.2188 | Time: 21.10s


Train:   0%|          | 0/254 [00:00<?, ?batch/s]

Epoch: [6][0/254]Elapsed 0.06s | Loss: 0.1498 Grad: 87336.5391 LR: 7.2349e-05
Epoch: [6][50/254]Elapsed 3.54s | Loss: 0.1591 Grad: 75689.3594 LR: 6.9753e-05
Epoch: [6][100/254]Elapsed 6.97s | Loss: 0.1568 Grad: 66068.8516 LR: 6.7093e-05
Epoch: [6][150/254]Elapsed 10.43s | Loss: 0.1551 Grad: 83587.8438 LR: 6.4376e-05
Epoch: [6][200/254]Elapsed 13.92s | Loss: 0.1548 Grad: 112885.3984 LR: 6.1613e-05
Epoch: [6][250/254]Elapsed 17.37s | Loss: 0.1538 Grad: 61249.0312 LR: 5.8812e-05
Epoch: [6][253/254]Elapsed 17.60s | Loss: 0.1537 Grad: 138972.2344 LR: 5.8586e-05


Valid:   0%|          | 0/64 [00:00<?, ?batch/s]

Epoch: [6][0/64]Elapsed 0.05s | Loss: 0.2435
Epoch: [6][50/64]Elapsed 2.55s | Loss: 0.2172


----------------------------------------------------------------------------------------------------
Epoch 6 - Average Train Loss: 0.1537 | Average Valid Loss: 0.2188 | Time: 20.93s
Fold: 2 Second training


Train:   0%|          | 0/254 [00:00<?, ?batch/s]

Epoch: [1][0/254]Elapsed 0.06s | Loss: 0.5509 Grad: 239064.0312 LR: 4.0000e-06
Epoch: [1][50/254]Elapsed 3.45s | Loss: 0.4367 Grad: 100269.7891 LR: 1.0315e-05
Epoch: [1][100/254]Elapsed 6.84s | Loss: 0.4053 Grad: 78855.6719 LR: 2.7599e-05
Epoch: [1][150/254]Elapsed 10.25s | Loss: 0.3895 Grad: 78549.2578 LR: 5.1303e-05
Epoch: [1][200/254]Elapsed 13.65s | Loss: 0.3682 Grad: 39347.4297 LR: 7.5190e-05
Epoch: [1][250/254]Elapsed 17.12s | Loss: 0.3485 Grad: 27903.2715 LR: 9.2976e-05
Epoch: [1][253/254]Elapsed 17.35s | Loss: 0.3468 Grad: 34455.3047 LR: 9.3978e-05


Valid:   0%|          | 0/64 [00:00<?, ?batch/s]

Epoch: [1][0/64]Elapsed 0.05s | Loss: 0.2371
Epoch: [1][50/64]Elapsed 2.56s | Loss: 0.2465


----------------------------------------------------------------------------------------------------
Epoch 1 - Average Train Loss: 0.3468 | Average Valid Loss: 0.2439 | Time: 20.68s
Best model found in epoch 1 | valid loss: 0.2439


Train:   0%|          | 0/254 [00:00<?, ?batch/s]

Epoch: [2][0/254]Elapsed 0.06s | Loss: 0.3162 Grad: 169466.7500 LR: 9.3978e-05
Epoch: [2][50/254]Elapsed 3.52s | Loss: 0.2336 Grad: 106675.5469 LR: 1.0000e-04
Epoch: [2][100/254]Elapsed 7.03s | Loss: 0.2241 Grad: 100746.7500 LR: 9.9914e-05
Epoch: [2][150/254]Elapsed 10.54s | Loss: 0.2225 Grad: 85058.3906 LR: 9.9665e-05
Epoch: [2][200/254]Elapsed 14.06s | Loss: 0.2201 Grad: 85882.9922 LR: 9.9253e-05
Epoch: [2][250/254]Elapsed 17.60s | Loss: 0.2174 Grad: 68011.0469 LR: 9.8679e-05
Epoch: [2][253/254]Elapsed 17.84s | Loss: 0.2169 Grad: 121372.2734 LR: 9.8626e-05


Valid:   0%|          | 0/64 [00:00<?, ?batch/s]

Epoch: [2][0/64]Elapsed 0.06s | Loss: 0.1796
Epoch: [2][50/64]Elapsed 2.59s | Loss: 0.2180


----------------------------------------------------------------------------------------------------
Epoch 2 - Average Train Loss: 0.2169 | Average Valid Loss: 0.2159 | Time: 21.21s
Best model found in epoch 2 | valid loss: 0.2159


Train:   0%|          | 0/254 [00:00<?, ?batch/s]

Epoch: [3][0/254]Elapsed 0.06s | Loss: 0.2298 Grad: 99934.7578 LR: 9.8626e-05
Epoch: [3][50/254]Elapsed 3.54s | Loss: 0.2014 Grad: 76411.3125 LR: 9.7881e-05
Epoch: [3][100/254]Elapsed 7.00s | Loss: 0.1948 Grad: 89015.2500 LR: 9.6978e-05
Epoch: [3][150/254]Elapsed 10.46s | Loss: 0.1955 Grad: 88526.6250 LR: 9.5922e-05
Epoch: [3][200/254]Elapsed 13.98s | Loss: 0.1948 Grad: 77147.2656 LR: 9.4715e-05
Epoch: [3][250/254]Elapsed 17.41s | Loss: 0.1932 Grad: 67271.5781 LR: 9.3361e-05
Epoch: [3][253/254]Elapsed 17.64s | Loss: 0.1930 Grad: 115520.3203 LR: 9.3247e-05


Valid:   0%|          | 0/64 [00:00<?, ?batch/s]

Epoch: [3][0/64]Elapsed 0.05s | Loss: 0.1757
Epoch: [3][50/64]Elapsed 2.55s | Loss: 0.2142


----------------------------------------------------------------------------------------------------
Epoch 3 - Average Train Loss: 0.1930 | Average Valid Loss: 0.2131 | Time: 20.96s
Best model found in epoch 3 | valid loss: 0.2131


Train:   0%|          | 0/254 [00:00<?, ?batch/s]

Epoch: [4][0/254]Elapsed 0.06s | Loss: 0.2079 Grad: 103280.8750 LR: 9.3247e-05
Epoch: [4][50/254]Elapsed 3.47s | Loss: 0.1855 Grad: 73037.9453 LR: 9.1740e-05
Epoch: [4][100/254]Elapsed 6.87s | Loss: 0.1769 Grad: 92075.0391 LR: 9.0096e-05
Epoch: [4][150/254]Elapsed 10.27s | Loss: 0.1776 Grad: 74007.3047 LR: 8.8322e-05
Epoch: [4][200/254]Elapsed 13.75s | Loss: 0.1785 Grad: 85509.7188 LR: 8.6421e-05
Epoch: [4][250/254]Elapsed 17.27s | Loss: 0.1777 Grad: 58744.3672 LR: 8.4402e-05
Epoch: [4][253/254]Elapsed 17.50s | Loss: 0.1776 Grad: 98912.9531 LR: 8.4235e-05


Valid:   0%|          | 0/64 [00:00<?, ?batch/s]

Epoch: [4][0/64]Elapsed 0.05s | Loss: 0.1878
Epoch: [4][50/64]Elapsed 2.55s | Loss: 0.2140


----------------------------------------------------------------------------------------------------
Epoch 4 - Average Train Loss: 0.1776 | Average Valid Loss: 0.2118 | Time: 20.83s
Best model found in epoch 4 | valid loss: 0.2118


Train:   0%|          | 0/254 [00:00<?, ?batch/s]

Epoch: [5][0/254]Elapsed 0.06s | Loss: 0.1942 Grad: 113977.6641 LR: 8.4235e-05
Epoch: [5][50/254]Elapsed 3.55s | Loss: 0.1690 Grad: 87186.0859 LR: 8.2094e-05
Epoch: [5][100/254]Elapsed 6.98s | Loss: 0.1660 Grad: 77873.7969 LR: 7.9848e-05
Epoch: [5][150/254]Elapsed 10.39s | Loss: 0.1669 Grad: 76010.1016 LR: 7.7504e-05
Epoch: [5][200/254]Elapsed 13.81s | Loss: 0.1671 Grad: 70211.6328 LR: 7.5069e-05
Epoch: [5][250/254]Elapsed 17.24s | Loss: 0.1662 Grad: 68105.6797 LR: 7.2553e-05
Epoch: [5][253/254]Elapsed 17.47s | Loss: 0.1660 Grad: 102939.6953 LR: 7.2349e-05


Valid:   0%|          | 0/64 [00:00<?, ?batch/s]

Epoch: [5][0/64]Elapsed 0.05s | Loss: 0.1811
Epoch: [5][50/64]Elapsed 2.55s | Loss: 0.2153


----------------------------------------------------------------------------------------------------
Epoch 5 - Average Train Loss: 0.1660 | Average Valid Loss: 0.2133 | Time: 20.79s


Train:   0%|          | 0/254 [00:00<?, ?batch/s]

Epoch: [6][0/254]Elapsed 0.06s | Loss: 0.1974 Grad: 110132.9531 LR: 7.2349e-05
Epoch: [6][50/254]Elapsed 3.47s | Loss: 0.1631 Grad: 73758.8750 LR: 6.9753e-05
Epoch: [6][100/254]Elapsed 6.96s | Loss: 0.1560 Grad: 76927.5703 LR: 6.7093e-05
Epoch: [6][150/254]Elapsed 10.40s | Loss: 0.1554 Grad: 70440.6328 LR: 6.4376e-05
Epoch: [6][200/254]Elapsed 13.93s | Loss: 0.1554 Grad: 84905.4141 LR: 6.1613e-05
Epoch: [6][250/254]Elapsed 17.42s | Loss: 0.1539 Grad: 63763.6016 LR: 5.8812e-05
Epoch: [6][253/254]Elapsed 17.65s | Loss: 0.1539 Grad: 118239.9844 LR: 5.8586e-05


Valid:   0%|          | 0/64 [00:00<?, ?batch/s]

Epoch: [6][0/64]Elapsed 0.05s | Loss: 0.1882
Epoch: [6][50/64]Elapsed 2.57s | Loss: 0.2142


----------------------------------------------------------------------------------------------------
Epoch 6 - Average Train Loss: 0.1539 | Average Valid Loss: 0.2116 | Time: 20.99s
Best model found in epoch 6 | valid loss: 0.2116
Fold: 3 Second training


Train:   0%|          | 0/254 [00:00<?, ?batch/s]

Epoch: [1][0/254]Elapsed 0.07s | Loss: 0.4871 Grad: 255273.1250 LR: 4.0000e-06
Epoch: [1][50/254]Elapsed 3.57s | Loss: 0.4647 Grad: 85845.8594 LR: 1.0315e-05
Epoch: [1][100/254]Elapsed 7.02s | Loss: 0.4281 Grad: 90062.7656 LR: 2.7599e-05
Epoch: [1][150/254]Elapsed 10.50s | Loss: 0.4125 Grad: 102075.9219 LR: 5.1303e-05
Epoch: [1][200/254]Elapsed 14.01s | Loss: 0.3888 Grad: 132100.0312 LR: 7.5190e-05
Epoch: [1][250/254]Elapsed 17.46s | Loss: 0.3615 Grad: 66986.9766 LR: 9.2976e-05
Epoch: [1][253/254]Elapsed 17.70s | Loss: 0.3602 Grad: 100346.9766 LR: 9.3978e-05


Valid:   0%|          | 0/64 [00:00<?, ?batch/s]

Epoch: [1][0/64]Elapsed 0.06s | Loss: 0.2599
Epoch: [1][50/64]Elapsed 2.59s | Loss: 0.2478


----------------------------------------------------------------------------------------------------
Epoch 1 - Average Train Loss: 0.3602 | Average Valid Loss: 0.2499 | Time: 21.06s
Best model found in epoch 1 | valid loss: 0.2499


Train:   0%|          | 0/254 [00:00<?, ?batch/s]

Epoch: [2][0/254]Elapsed 0.08s | Loss: 0.2920 Grad: 140746.4688 LR: 9.3978e-05
Epoch: [2][50/254]Elapsed 3.61s | Loss: 0.2538 Grad: 110985.1562 LR: 1.0000e-04
Epoch: [2][100/254]Elapsed 7.05s | Loss: 0.2396 Grad: 76152.2734 LR: 9.9914e-05
Epoch: [2][150/254]Elapsed 10.48s | Loss: 0.2370 Grad: 94690.6406 LR: 9.9665e-05
Epoch: [2][200/254]Elapsed 13.92s | Loss: 0.2309 Grad: 120677.3281 LR: 9.9253e-05
Epoch: [2][250/254]Elapsed 17.37s | Loss: 0.2250 Grad: 71111.9141 LR: 9.8679e-05
Epoch: [2][253/254]Elapsed 17.60s | Loss: 0.2250 Grad: 150240.7344 LR: 9.8626e-05


Valid:   0%|          | 0/64 [00:00<?, ?batch/s]

Epoch: [2][0/64]Elapsed 0.05s | Loss: 0.2152
Epoch: [2][50/64]Elapsed 2.55s | Loss: 0.2141


----------------------------------------------------------------------------------------------------
Epoch 2 - Average Train Loss: 0.2250 | Average Valid Loss: 0.2148 | Time: 20.93s
Best model found in epoch 2 | valid loss: 0.2148


Train:   0%|          | 0/254 [00:00<?, ?batch/s]

Epoch: [3][0/254]Elapsed 0.06s | Loss: 0.2289 Grad: 104851.9062 LR: 9.8626e-05
Epoch: [3][50/254]Elapsed 3.46s | Loss: 0.2135 Grad: 66296.2344 LR: 9.7881e-05
Epoch: [3][100/254]Elapsed 6.95s | Loss: 0.2031 Grad: 68415.7344 LR: 9.6978e-05
Epoch: [3][150/254]Elapsed 10.41s | Loss: 0.2034 Grad: 61739.6992 LR: 9.5922e-05
Epoch: [3][200/254]Elapsed 13.87s | Loss: 0.2014 Grad: 95143.1172 LR: 9.4715e-05
Epoch: [3][250/254]Elapsed 17.46s | Loss: 0.1971 Grad: 76845.8594 LR: 9.3361e-05
Epoch: [3][253/254]Elapsed 17.68s | Loss: 0.1969 Grad: 106872.4219 LR: 9.3247e-05


Valid:   0%|          | 0/64 [00:00<?, ?batch/s]

Epoch: [3][0/64]Elapsed 0.05s | Loss: 0.2188
Epoch: [3][50/64]Elapsed 2.55s | Loss: 0.2084


----------------------------------------------------------------------------------------------------
Epoch 3 - Average Train Loss: 0.1969 | Average Valid Loss: 0.2095 | Time: 21.01s
Best model found in epoch 3 | valid loss: 0.2095


Train:   0%|          | 0/254 [00:00<?, ?batch/s]

Epoch: [4][0/254]Elapsed 0.06s | Loss: 0.1846 Grad: 85762.1797 LR: 9.3247e-05
Epoch: [4][50/254]Elapsed 3.53s | Loss: 0.1946 Grad: 72326.6172 LR: 9.1740e-05
Epoch: [4][100/254]Elapsed 7.01s | Loss: 0.1858 Grad: 77762.8672 LR: 9.0096e-05
Epoch: [4][150/254]Elapsed 10.52s | Loss: 0.1867 Grad: 82540.2891 LR: 8.8322e-05
Epoch: [4][200/254]Elapsed 13.97s | Loss: 0.1849 Grad: 107621.9453 LR: 8.6421e-05
Epoch: [4][250/254]Elapsed 17.40s | Loss: 0.1807 Grad: 81970.4062 LR: 8.4402e-05
Epoch: [4][253/254]Elapsed 17.64s | Loss: 0.1804 Grad: 113259.2109 LR: 8.4235e-05


Valid:   0%|          | 0/64 [00:00<?, ?batch/s]

Epoch: [4][0/64]Elapsed 0.05s | Loss: 0.2245
Epoch: [4][50/64]Elapsed 2.55s | Loss: 0.2119


----------------------------------------------------------------------------------------------------
Epoch 4 - Average Train Loss: 0.1804 | Average Valid Loss: 0.2126 | Time: 20.96s


Train:   0%|          | 0/254 [00:00<?, ?batch/s]

Epoch: [5][0/254]Elapsed 0.06s | Loss: 0.2288 Grad: 96571.3750 LR: 8.4235e-05
Epoch: [5][50/254]Elapsed 3.57s | Loss: 0.1818 Grad: 67270.6875 LR: 8.2094e-05
Epoch: [5][100/254]Elapsed 7.02s | Loss: 0.1712 Grad: 68608.7109 LR: 7.9848e-05
Epoch: [5][150/254]Elapsed 10.47s | Loss: 0.1707 Grad: 72610.6094 LR: 7.7504e-05
Epoch: [5][200/254]Elapsed 13.92s | Loss: 0.1685 Grad: 85258.5703 LR: 7.5069e-05
Epoch: [5][250/254]Elapsed 17.35s | Loss: 0.1653 Grad: 51875.8945 LR: 7.2553e-05
Epoch: [5][253/254]Elapsed 17.59s | Loss: 0.1653 Grad: 148628.0781 LR: 7.2349e-05


Valid:   0%|          | 0/64 [00:00<?, ?batch/s]

Epoch: [5][0/64]Elapsed 0.05s | Loss: 0.2294
Epoch: [5][50/64]Elapsed 2.55s | Loss: 0.2145


----------------------------------------------------------------------------------------------------
Epoch 5 - Average Train Loss: 0.1653 | Average Valid Loss: 0.2156 | Time: 20.91s


Train:   0%|          | 0/254 [00:00<?, ?batch/s]

Epoch: [6][0/254]Elapsed 0.06s | Loss: 0.1587 Grad: 73132.4844 LR: 7.2349e-05
Epoch: [6][50/254]Elapsed 3.49s | Loss: 0.1686 Grad: 59882.1719 LR: 6.9753e-05
Epoch: [6][100/254]Elapsed 6.91s | Loss: 0.1616 Grad: 76447.6562 LR: 6.7093e-05
Epoch: [6][150/254]Elapsed 10.43s | Loss: 0.1619 Grad: 67082.3281 LR: 6.4376e-05
Epoch: [6][200/254]Elapsed 13.93s | Loss: 0.1606 Grad: 88392.3906 LR: 6.1613e-05
Epoch: [6][250/254]Elapsed 17.35s | Loss: 0.1575 Grad: 54894.8867 LR: 5.8812e-05
Epoch: [6][253/254]Elapsed 17.58s | Loss: 0.1574 Grad: 123474.6328 LR: 5.8586e-05


Valid:   0%|          | 0/64 [00:00<?, ?batch/s]

Epoch: [6][0/64]Elapsed 0.05s | Loss: 0.2189
Epoch: [6][50/64]Elapsed 2.55s | Loss: 0.2154


----------------------------------------------------------------------------------------------------
Epoch 6 - Average Train Loss: 0.1574 | Average Valid Loss: 0.2159 | Time: 20.91s
Fold: 4 Second training


Train:   0%|          | 0/254 [00:00<?, ?batch/s]

Epoch: [1][0/254]Elapsed 0.06s | Loss: 0.6422 Grad: 235099.8125 LR: 4.0000e-06
Epoch: [1][50/254]Elapsed 3.48s | Loss: 0.4555 Grad: 81606.5625 LR: 1.0315e-05
Epoch: [1][100/254]Elapsed 6.88s | Loss: 0.4304 Grad: 95448.4062 LR: 2.7599e-05
Epoch: [1][150/254]Elapsed 10.29s | Loss: 0.4050 Grad: 83683.2109 LR: 5.1303e-05
Epoch: [1][200/254]Elapsed 13.81s | Loss: 0.3807 Grad: 62286.2617 LR: 7.5190e-05
Epoch: [1][250/254]Elapsed 17.33s | Loss: 0.3559 Grad: 36916.1641 LR: 9.2976e-05
Epoch: [1][253/254]Elapsed 17.56s | Loss: 0.3547 Grad: 91303.4609 LR: 9.3978e-05


Valid:   0%|          | 0/64 [00:00<?, ?batch/s]

Epoch: [1][0/64]Elapsed 0.06s | Loss: 0.3805
Epoch: [1][50/64]Elapsed 2.57s | Loss: 0.2796


----------------------------------------------------------------------------------------------------
Epoch 1 - Average Train Loss: 0.3547 | Average Valid Loss: 0.2815 | Time: 20.91s
Best model found in epoch 1 | valid loss: 0.2815


Train:   0%|          | 0/254 [00:00<?, ?batch/s]

Epoch: [2][0/254]Elapsed 0.07s | Loss: 0.3375 Grad: 114605.0625 LR: 9.3978e-05
Epoch: [2][50/254]Elapsed 3.55s | Loss: 0.2361 Grad: 103848.2812 LR: 1.0000e-04
Epoch: [2][100/254]Elapsed 7.05s | Loss: 0.2336 Grad: 90935.6484 LR: 9.9914e-05
Epoch: [2][150/254]Elapsed 10.52s | Loss: 0.2286 Grad: 80629.2031 LR: 9.9665e-05
Epoch: [2][200/254]Elapsed 14.00s | Loss: 0.2238 Grad: 63933.9531 LR: 9.9253e-05
Epoch: [2][250/254]Elapsed 17.50s | Loss: 0.2186 Grad: 68664.7188 LR: 9.8679e-05
Epoch: [2][253/254]Elapsed 17.74s | Loss: 0.2183 Grad: 119007.9531 LR: 9.8626e-05


Valid:   0%|          | 0/64 [00:00<?, ?batch/s]

Epoch: [2][0/64]Elapsed 0.05s | Loss: 0.3234
Epoch: [2][50/64]Elapsed 2.58s | Loss: 0.2461


----------------------------------------------------------------------------------------------------
Epoch 2 - Average Train Loss: 0.2183 | Average Valid Loss: 0.2467 | Time: 21.10s
Best model found in epoch 2 | valid loss: 0.2467


Train:   0%|          | 0/254 [00:00<?, ?batch/s]

Epoch: [3][0/254]Elapsed 0.07s | Loss: 0.2926 Grad: 95589.4453 LR: 9.8626e-05
Epoch: [3][50/254]Elapsed 3.50s | Loss: 0.1994 Grad: 68291.1797 LR: 9.7881e-05
Epoch: [3][100/254]Elapsed 6.94s | Loss: 0.1976 Grad: 67808.4922 LR: 9.6978e-05
Epoch: [3][150/254]Elapsed 10.44s | Loss: 0.1982 Grad: 63470.7266 LR: 9.5922e-05
Epoch: [3][200/254]Elapsed 13.90s | Loss: 0.1960 Grad: 62164.4805 LR: 9.4715e-05
Epoch: [3][250/254]Elapsed 17.41s | Loss: 0.1933 Grad: 68452.9375 LR: 9.3361e-05
Epoch: [3][253/254]Elapsed 17.65s | Loss: 0.1930 Grad: 97073.8516 LR: 9.3247e-05


Valid:   0%|          | 0/64 [00:00<?, ?batch/s]

Epoch: [3][0/64]Elapsed 0.05s | Loss: 0.3009
Epoch: [3][50/64]Elapsed 2.55s | Loss: 0.2421


----------------------------------------------------------------------------------------------------
Epoch 3 - Average Train Loss: 0.1930 | Average Valid Loss: 0.2415 | Time: 20.98s
Best model found in epoch 3 | valid loss: 0.2415


Train:   0%|          | 0/254 [00:00<?, ?batch/s]

Epoch: [4][0/254]Elapsed 0.06s | Loss: 0.2780 Grad: 96160.8281 LR: 9.3247e-05
Epoch: [4][50/254]Elapsed 3.55s | Loss: 0.1849 Grad: 76477.9922 LR: 9.1740e-05
Epoch: [4][100/254]Elapsed 7.04s | Loss: 0.1843 Grad: 84132.8281 LR: 9.0096e-05
Epoch: [4][150/254]Elapsed 10.63s | Loss: 0.1824 Grad: 80764.3281 LR: 8.8322e-05
Epoch: [4][200/254]Elapsed 14.16s | Loss: 0.1808 Grad: 53158.0938 LR: 8.6421e-05
Epoch: [4][250/254]Elapsed 17.67s | Loss: 0.1778 Grad: 64915.7461 LR: 8.4402e-05
Epoch: [4][253/254]Elapsed 17.90s | Loss: 0.1776 Grad: 112200.3359 LR: 8.4235e-05


Valid:   0%|          | 0/64 [00:00<?, ?batch/s]

Epoch: [4][0/64]Elapsed 0.05s | Loss: 0.3049
Epoch: [4][50/64]Elapsed 2.54s | Loss: 0.2429


----------------------------------------------------------------------------------------------------
Epoch 4 - Average Train Loss: 0.1776 | Average Valid Loss: 0.2416 | Time: 21.21s


Train:   0%|          | 0/254 [00:00<?, ?batch/s]

Epoch: [5][0/254]Elapsed 0.06s | Loss: 0.2362 Grad: 81596.3828 LR: 8.4235e-05
Epoch: [5][50/254]Elapsed 3.55s | Loss: 0.1673 Grad: 66058.8750 LR: 8.2094e-05
Epoch: [5][100/254]Elapsed 7.03s | Loss: 0.1689 Grad: 83903.8828 LR: 7.9848e-05
Epoch: [5][150/254]Elapsed 10.50s | Loss: 0.1682 Grad: 72293.0312 LR: 7.7504e-05
Epoch: [5][200/254]Elapsed 13.93s | Loss: 0.1658 Grad: 60010.1680 LR: 7.5069e-05
Epoch: [5][250/254]Elapsed 17.37s | Loss: 0.1636 Grad: 75627.6797 LR: 7.2553e-05
Epoch: [5][253/254]Elapsed 17.60s | Loss: 0.1636 Grad: 109473.3828 LR: 7.2349e-05


Valid:   0%|          | 0/64 [00:00<?, ?batch/s]

Epoch: [5][0/64]Elapsed 0.05s | Loss: 0.3049
Epoch: [5][50/64]Elapsed 2.54s | Loss: 0.2443


----------------------------------------------------------------------------------------------------
Epoch 5 - Average Train Loss: 0.1636 | Average Valid Loss: 0.2426 | Time: 20.92s


Train:   0%|          | 0/254 [00:00<?, ?batch/s]

Epoch: [6][0/254]Elapsed 0.06s | Loss: 0.2531 Grad: 98204.1875 LR: 7.2349e-05
Epoch: [6][50/254]Elapsed 3.62s | Loss: 0.1588 Grad: 67030.6016 LR: 6.9753e-05
Epoch: [6][100/254]Elapsed 7.09s | Loss: 0.1550 Grad: 64899.3008 LR: 6.7093e-05
Epoch: [6][150/254]Elapsed 10.54s | Loss: 0.1560 Grad: 57258.4336 LR: 6.4376e-05
Epoch: [6][200/254]Elapsed 14.05s | Loss: 0.1553 Grad: 55085.1172 LR: 6.1613e-05
Epoch: [6][250/254]Elapsed 17.54s | Loss: 0.1529 Grad: 82547.2422 LR: 5.8812e-05
Epoch: [6][253/254]Elapsed 17.77s | Loss: 0.1528 Grad: 122302.8281 LR: 5.8586e-05


Valid:   0%|          | 0/64 [00:00<?, ?batch/s]

Epoch: [6][0/64]Elapsed 0.05s | Loss: 0.3147
Epoch: [6][50/64]Elapsed 2.55s | Loss: 0.2442


----------------------------------------------------------------------------------------------------
Epoch 6 - Average Train Loss: 0.1528 | Average Valid Loss: 0.2414 | Time: 21.10s
Best model found in epoch 6 | valid loss: 0.2414
CV Result (Stage=2): 0.4424395827373606 (torch) | 0.4424395818829547 (kaggle)
Elapse: 35.17 min 


In [None]:
dataset = CustomDataset(train_easy, TARGETS, ModelConfig, all_specs, all_eegs, mode='test')

X, y = dataset[0]
print(X.shape, y.shape)

model = CustomModel(ModelConfig, num_classes=6, pretrained=True)
y_pred = model(X.unsqueeze(0))

print(y_pred.shape)

In [None]:
from kl_divergence import score as kl_score


def calc_kl_div(p, q, criterion):
    
    p = torch.tensor(p.astype(np.float32)).unsqueeze(0)
    q = torch.tensor(q.astype(np.float32)).unsqueeze(0)
    return criterion(F.log_softmax(p, dim=1), q).item()

def calc_kaggle_score(solution, submission):
    solution = solution.to_frame().T
    solution[TARGETS] = solution[TARGETS].astype(np.float32)
    submission = submission.to_frame().T
    submission.columns = ['eeg_id'] + TARGETS
    submission[TARGETS] = submission[TARGETS].astype(np.float32)
    
    return kl_score(solution, submission, 'eeg_id')

In [None]:
def evaluate_oof(oof_csv_path):
    oof_df = pd.read_csv(oof_csv_path)
    softmax = nn.Softmax(dim=1)
    criterion = nn.KLDivLoss(reduction="batchmean")

    oof_df["kl_loss"] = oof_df.apply(lambda row: 
        calc_kl_div(row[TARGETS_PRED].values, row[TARGETS].values, criterion), axis=1
        )

    kl_loss_all = criterion(
        F.log_softmax(torch.tensor(oof_df[TARGETS_PRED].values.astype(np.float32)), dim=1),
        torch.tensor(oof_df[TARGETS].values.astype(np.float32)),
        )

    print(f"KL Loss All: {kl_loss_all}")
    print(f"KL Loss Mean: {oof_df['kl_loss'].mean()}")

    y_pred = oof_df[TARGETS].values.astype(np.float32)
    oof_df[TARGETS_PRED] = softmax(torch.tensor(y_pred)).numpy()

    solution = oof_df[['eeg_id'] + TARGETS].copy()
    submission = oof_df[['eeg_id'] + TARGETS_PRED].copy()
    submission.columns = ['eeg_id'] + TARGETS

    kaggle_score_all = kl_score(solution, submission, 'eeg_id')
    
    oof_df['kaggle_score'] = oof_df.apply(lambda row:
        calc_kaggle_score(row[['eeg_id'] + TARGETS], row[['eeg_id'] + TARGETS_PRED]), axis=1
        )

    print(f"Kaggle Score All: {kaggle_score_all}")
    print(f"Kaggle Score Mean: {oof_df['kaggle_score'].mean()}")

    return oof_df, kl_loss_all, kaggle_score_all


In [None]:
oof_1, kl_loss_all, kaggle_score_all = evaluate_oof(f"{JobConfig.OUTPUT_DIR}/oof_1.csv")
oof_2, kl_loss_all, kaggle_score_all = evaluate_oof(f"{JobConfig.OUTPUT_DIR}/oof_2.csv")

In [None]:
fig, axes = plt.subplots(4, 4, figsize=(10, 10), sharex=True, sharey=True)

# rows = oof_df.iloc[-len(axes.ravel()):, :]
rows = oof_1.sample(len(axes.ravel()))

for i, (idx, row) in enumerate(rows.iterrows()):

    ax = axes.ravel()[i]
    ax.plot(row[TARGETS].values, label='True')
    ax.plot(row[TARGETS_PRED].values, label='Pred')
    ax.set_title(f"{idx} | {row['target']} | KL: {row['kl_loss']:.4f}")
    ax.set_xticks(range(6))
    ax.set_xticklabels(BRAIN_ACTIVITY)
    ax.grid(True)
    ax.legend()

fig.tight_layout()
plt.show()

In [None]:
row = oof_1.loc[2619]

min_pred = row[TARGETS_PRED].min()
max_pred = row[TARGETS_PRED].max()

targets_norm = (row[TARGETS] - row[TARGETS].min()) / (row[TARGETS].max() - row[TARGETS].min())

print(targets_norm)

In [None]:
fig, axes = plt.subplots(4, 4, figsize=(10, 10), sharex=True, sharey=True)

rows = oof_2.iloc[5:5+len(axes.ravel()), :]
# rows = oof_df.sample(len(axes.ravel()))

for i, (idx, row) in enumerate(rows.iterrows()):

    ax = axes.ravel()[i]
    y_true = row[TARGETS].values
    y_pred = row[TARGETS_PRED].values
    y_norm = (y_true - y_true.min()) / (y_true.max() - y_true.min())

    ax.plot(row[TARGETS].values, label='True')
    ax.plot(row[TARGETS_PRED].values, label='Pred')
    ax.plot(y_norm, "b:", label='True Norm')

    ax.set_title(f"{idx} | {row['target']} | KL: {row['kl_loss']:.4f}")
    ax.set_xticks(range(6))
    ax.set_xticklabels(BRAIN_ACTIVITY)
    ax.grid(True)
    ax.legend()

fig.tight_layout()
plt.show()

In [None]:
row = oof_2.loc[6]

min_pred = row[TARGETS_PRED].min()
max_pred = row[TARGETS_PRED].max()
print(min_pred, max_pred)

print(row[TARGETS_PRED])

targets_norm = (row[TARGETS] - row[TARGETS].min()) / (row[TARGETS].max() - row[TARGETS].min())

targets_norm = targets_norm / targets_norm.sum()

print(targets_norm)