In [1]:
import pandas as pd 
import numpy as np 
from scipy.stats import entropy
import matplotlib.pyplot as plt

from engine_hms_trainer import *
from engine_hms_model import CustomModel, JobConfig, ModelConfig

import torch
from torch import nn
import torch.nn.functional as F

  _torch_pytree._register_pytree_node(


In [2]:
seed_everything(JobConfig.SEED)

ModelConfig.EPOCHS = 6
ModelConfig.MODEL_BACKBONE = 'tf_efficientnet_b2'
ModelConfig.MODEL_NAME = "ENet_b2_xymasking_regularized"
ModelConfig.AUGMENT = True
ModelConfig.USE_KAGGLE_SPECTROGRAMS = True
ModelConfig.USE_EEG_SPECTROGRAMS = True
ModelConfig.REGULARIZATION = 0.05
ModelConfig.AUGMENTATIONS = ['xy_masking']

hms_predictor = HMSPredictor(JobConfig, ModelConfig)

****************************************************************************************************
Script Start: Sun Mar 10 21:04:11 2024
Initializing HMS Predictor...
Model Name: ENet_b2_xymasking_regularized
Drop Rate: 0.15
Drop Path Rate: 0.25
Augment: True
Augmentations: ['xy_masking']
Regularization: 0.05
Enropy Split: 5.5
Device: cuda
Output Dir: ./outputs/
****************************************************************************************************


In [3]:
train_easy, train_hard, all_specs, all_eegs = hms_predictor.load_train_data()

print(train_easy.shape)
print(train_hard.shape)

# check if contain NaN
print(train_easy.isnull().sum().sum())
print(train_hard.isnull().sum().sum())

display(train_easy.head())
print(" ")
display(train_hard.head())

(12440, 14)
(5536, 14)
0
0


Unnamed: 0,eeg_id,spectrogram_id,min,max,patient_id,target,total_votes,entropy,seizure_vote,lpd_vote,gpd_vote,lrda_vote,grda_vote,other_vote
0,642382,14960202,1008.0,1032.0,5955,Other,2,7.802343,0.0,0.0,0.0,0.0,0.0,1.0
1,751790,618728447,908.0,908.0,38549,GPD,1,7.802343,0.0,0.0,1.0,0.0,0.0,0.0
2,778705,52296320,0.0,0.0,40955,Other,2,7.68682,0.0,0.0,0.0,0.0,0.0,1.0
3,1629671,2036345030,0.0,160.0,37481,Seizure,51,7.619243,1.0,0.0,0.0,0.0,0.0,0.0
4,2061593,320962633,1450.0,1450.0,23828,Other,1,7.802343,0.0,0.0,0.0,0.0,0.0,1.0


 


Unnamed: 0,eeg_id,spectrogram_id,min,max,patient_id,target,total_votes,entropy,seizure_vote,lpd_vote,gpd_vote,lrda_vote,grda_vote,other_vote
0,568657,789577333,0.0,16.0,20654,Other,48,3.341757,0.0,0.0,0.25,0.0,0.166667,0.583333
1,582999,1552638400,0.0,38.0,20230,LPD,154,3.550549,0.0,0.857143,0.0,0.071429,0.0,0.071429
2,1895581,128369999,1138.0,1138.0,47999,Other,13,3.565051,0.076923,0.0,0.0,0.0,0.076923,0.846154
3,2482631,978166025,1902.0,1944.0,20606,Other,105,1.431066,0.0,0.0,0.133333,0.066667,0.133333,0.666667
4,2521897,673742515,0.0,4.0,62117,Other,24,1.516203,0.0,0.0,0.083333,0.083333,0.333333,0.5


In [4]:
hms_predictor.train_folds(train_easy, train_hard, all_specs, all_eegs)

Fold: 0 || Valid size 3596 
- First Stage 


Train:   0%|          | 0/622 [00:00<?, ?batch/s]

Epoch: [1][0/622]Elapsed 1.08s | Loss: 0.8221 Grad: 67759.5938 LR: 4.0000e-06
Epoch: [1][50/622]Elapsed 8.19s | Loss: 0.8347 Grad: 85776.9141 LR: 5.0647e-06
Epoch: [1][100/622]Elapsed 15.28s | Loss: 0.8282 Grad: 70398.4844 LR: 8.2116e-06
Epoch: [1][150/622]Elapsed 22.38s | Loss: 0.8234 Grad: 75172.6094 LR: 1.3301e-05
Epoch: [1][200/622]Elapsed 29.50s | Loss: 0.8154 Grad: 72303.0234 LR: 2.0107e-05
Epoch: [1][250/622]Elapsed 36.63s | Loss: 0.8048 Grad: 70600.3047 LR: 2.8328e-05
Epoch: [1][300/622]Elapsed 43.78s | Loss: 0.7915 Grad: 77726.5234 LR: 3.7599e-05
Epoch: [1][350/622]Elapsed 50.93s | Loss: 0.7748 Grad: 114699.8047 LR: 4.7509e-05
Epoch: [1][400/622]Elapsed 58.08s | Loss: 0.7554 Grad: 57983.6250 LR: 5.7619e-05
Epoch: [1][450/622]Elapsed 65.26s | Loss: 0.7347 Grad: 74722.4062 LR: 6.7479e-05
Epoch: [1][500/622]Elapsed 72.46s | Loss: 0.7165 Grad: 70013.5781 LR: 7.6652e-05
Epoch: [1][550/622]Elapsed 79.70s | Loss: 0.7009 Grad: 70525.8594 LR: 8.4732e-05
Epoch: [1][600/622]Elapsed 86.92

Valid:   0%|          | 0/225 [00:00<?, ?batch/s]

Epoch: [1][0/225]Elapsed 0.10s | Loss: 0.3753
Epoch: [1][50/225]Elapsed 4.91s | Loss: 0.5451
Epoch: [1][100/225]Elapsed 9.74s | Loss: 0.5455
Epoch: [1][150/225]Elapsed 14.56s | Loss: 0.5408
Epoch: [1][200/225]Elapsed 19.40s | Loss: 0.4920


----------------------------------------------------------------------------------------------------
Epoch 1 - Average Train Loss: 0.6764 | Average Valid Loss: 0.4747 | Time: 111.93s
Best model found in epoch 1 | valid loss: 0.4747


Train:   0%|          | 0/622 [00:00<?, ?batch/s]

Epoch: [2][0/622]Elapsed 0.12s | Loss: 0.3876 Grad: 134640.5469 LR: 9.3737e-05
Epoch: [2][50/622]Elapsed 7.41s | Loss: 0.4541 Grad: 57929.2383 LR: 9.7777e-05
Epoch: [2][100/622]Elapsed 14.70s | Loss: 0.4441 Grad: 86795.0234 LR: 9.9786e-05
Epoch: [2][150/622]Elapsed 22.03s | Loss: 0.4428 Grad: 93855.8281 LR: 9.9996e-05
Epoch: [2][200/622]Elapsed 29.35s | Loss: 0.4411 Grad: 74381.1875 LR: 9.9967e-05
Epoch: [2][250/622]Elapsed 36.65s | Loss: 0.4344 Grad: 64519.0156 LR: 9.9911e-05
Epoch: [2][300/622]Elapsed 43.98s | Loss: 0.4297 Grad: 116668.5547 LR: 9.9828e-05
Epoch: [2][350/622]Elapsed 51.31s | Loss: 0.4269 Grad: 89636.9922 LR: 9.9717e-05
Epoch: [2][400/622]Elapsed 58.64s | Loss: 0.4205 Grad: 45553.0898 LR: 9.9579e-05
Epoch: [2][450/622]Elapsed 65.95s | Loss: 0.4141 Grad: 40900.9805 LR: 9.9415e-05
Epoch: [2][500/622]Elapsed 73.27s | Loss: 0.4110 Grad: 26506.7910 LR: 9.9223e-05
Epoch: [2][550/622]Elapsed 80.61s | Loss: 0.4103 Grad: 47036.3477 LR: 9.9004e-05
Epoch: [2][600/622]Elapsed 87.9

Valid:   0%|          | 0/225 [00:00<?, ?batch/s]

Epoch: [2][0/225]Elapsed 0.11s | Loss: 0.3628
Epoch: [2][50/225]Elapsed 4.94s | Loss: 0.4572
Epoch: [2][100/225]Elapsed 9.77s | Loss: 0.4590
Epoch: [2][150/225]Elapsed 14.59s | Loss: 0.4600
Epoch: [2][200/225]Elapsed 19.42s | Loss: 0.4258


----------------------------------------------------------------------------------------------------
Epoch 2 - Average Train Loss: 0.4053 | Average Valid Loss: 0.4160 | Time: 113.03s
Best model found in epoch 2 | valid loss: 0.4160


Train:   0%|          | 0/622 [00:00<?, ?batch/s]

Epoch: [3][0/622]Elapsed 0.10s | Loss: 0.3368 Grad: 122108.3594 LR: 9.8642e-05
Epoch: [3][50/622]Elapsed 7.45s | Loss: 0.3356 Grad: 46974.6406 LR: 9.8358e-05
Epoch: [3][100/622]Elapsed 14.88s | Loss: 0.3327 Grad: 110191.8047 LR: 9.8048e-05
Epoch: [3][150/622]Elapsed 22.32s | Loss: 0.3392 Grad: 41000.6602 LR: 9.7711e-05
Epoch: [3][200/622]Elapsed 29.73s | Loss: 0.3411 Grad: 30088.9941 LR: 9.7349e-05
Epoch: [3][250/622]Elapsed 37.14s | Loss: 0.3402 Grad: 25114.0234 LR: 9.6960e-05
Epoch: [3][300/622]Elapsed 44.53s | Loss: 0.3378 Grad: 27425.8555 LR: 9.6546e-05
Epoch: [3][350/622]Elapsed 51.91s | Loss: 0.3406 Grad: 44168.6875 LR: 9.6106e-05
Epoch: [3][400/622]Elapsed 59.27s | Loss: 0.3352 Grad: 21572.1660 LR: 9.5642e-05
Epoch: [3][450/622]Elapsed 66.64s | Loss: 0.3317 Grad: 29025.2539 LR: 9.5152e-05
Epoch: [3][500/622]Elapsed 73.99s | Loss: 0.3301 Grad: 28332.9512 LR: 9.4638e-05
Epoch: [3][550/622]Elapsed 81.36s | Loss: 0.3310 Grad: 35894.1875 LR: 9.4099e-05
Epoch: [3][600/622]Elapsed 88.7

Valid:   0%|          | 0/225 [00:00<?, ?batch/s]

Epoch: [3][0/225]Elapsed 0.10s | Loss: 0.3446
Epoch: [3][50/225]Elapsed 4.94s | Loss: 0.4414
Epoch: [3][100/225]Elapsed 9.76s | Loss: 0.4439
Epoch: [3][150/225]Elapsed 14.57s | Loss: 0.4451
Epoch: [3][200/225]Elapsed 19.40s | Loss: 0.4289


----------------------------------------------------------------------------------------------------
Epoch 3 - Average Train Loss: 0.3292 | Average Valid Loss: 0.4234 | Time: 113.78s


Train:   0%|          | 0/622 [00:00<?, ?batch/s]

Epoch: [4][0/622]Elapsed 0.10s | Loss: 0.2395 Grad: 127062.9531 LR: 9.3281e-05
Epoch: [4][50/622]Elapsed 7.42s | Loss: 0.2924 Grad: 54743.0156 LR: 9.2683e-05
Epoch: [4][100/622]Elapsed 14.81s | Loss: 0.2875 Grad: 82322.2344 LR: 9.2063e-05
Epoch: [4][150/622]Elapsed 22.21s | Loss: 0.2924 Grad: 107097.0859 LR: 9.1420e-05
Epoch: [4][200/622]Elapsed 29.62s | Loss: 0.2942 Grad: 65296.2422 LR: 9.0754e-05
Epoch: [4][250/622]Elapsed 37.03s | Loss: 0.2927 Grad: 38420.3477 LR: 9.0065e-05
Epoch: [4][300/622]Elapsed 44.42s | Loss: 0.2926 Grad: 56653.7969 LR: 8.9355e-05
Epoch: [4][350/622]Elapsed 51.79s | Loss: 0.2939 Grad: 89634.9141 LR: 8.8624e-05
Epoch: [4][400/622]Elapsed 59.15s | Loss: 0.2891 Grad: 36383.5938 LR: 8.7871e-05
Epoch: [4][450/622]Elapsed 66.50s | Loss: 0.2849 Grad: 43564.6875 LR: 8.7097e-05
Epoch: [4][500/622]Elapsed 73.85s | Loss: 0.2839 Grad: 35692.6367 LR: 8.6303e-05
Epoch: [4][550/622]Elapsed 81.20s | Loss: 0.2857 Grad: 33745.6836 LR: 8.5490e-05
Epoch: [4][600/622]Elapsed 88.5

Valid:   0%|          | 0/225 [00:00<?, ?batch/s]

Epoch: [4][0/225]Elapsed 0.10s | Loss: 0.3483
Epoch: [4][50/225]Elapsed 5.06s | Loss: 0.4455
Epoch: [4][100/225]Elapsed 9.88s | Loss: 0.4492
Epoch: [4][150/225]Elapsed 14.70s | Loss: 0.4487
Epoch: [4][200/225]Elapsed 19.51s | Loss: 0.4356


----------------------------------------------------------------------------------------------------
Epoch 4 - Average Train Loss: 0.2834 | Average Valid Loss: 0.4300 | Time: 113.68s


Train:   0%|          | 0/622 [00:00<?, ?batch/s]

Epoch: [5][0/622]Elapsed 0.12s | Loss: 0.2845 Grad: 157769.3438 LR: 8.4284e-05
Epoch: [5][50/622]Elapsed 7.42s | Loss: 0.2562 Grad: 26149.1016 LR: 8.3424e-05
Epoch: [5][100/622]Elapsed 14.81s | Loss: 0.2504 Grad: 41737.1328 LR: 8.2546e-05
Epoch: [5][150/622]Elapsed 22.22s | Loss: 0.2587 Grad: 37339.7031 LR: 8.1650e-05
Epoch: [5][200/622]Elapsed 29.60s | Loss: 0.2616 Grad: 23621.0723 LR: 8.0736e-05
Epoch: [5][250/622]Elapsed 37.00s | Loss: 0.2597 Grad: 24032.1152 LR: 7.9806e-05
Epoch: [5][300/622]Elapsed 44.39s | Loss: 0.2578 Grad: 26139.4629 LR: 7.8859e-05
Epoch: [5][350/622]Elapsed 51.78s | Loss: 0.2581 Grad: 34564.6289 LR: 7.7897e-05
Epoch: [5][400/622]Elapsed 59.18s | Loss: 0.2548 Grad: 49651.1758 LR: 7.6920e-05
Epoch: [5][450/622]Elapsed 66.57s | Loss: 0.2531 Grad: 44647.3125 LR: 7.5927e-05
Epoch: [5][500/622]Elapsed 73.97s | Loss: 0.2530 Grad: 33632.9883 LR: 7.4921e-05
Epoch: [5][550/622]Elapsed 81.36s | Loss: 0.2539 Grad: 42921.1367 LR: 7.3901e-05
Epoch: [5][600/622]Elapsed 88.74

Valid:   0%|          | 0/225 [00:00<?, ?batch/s]

Epoch: [5][0/225]Elapsed 0.10s | Loss: 0.3211
Epoch: [5][50/225]Elapsed 4.92s | Loss: 0.4469
Epoch: [5][100/225]Elapsed 9.74s | Loss: 0.4492
Epoch: [5][150/225]Elapsed 14.54s | Loss: 0.4502
Epoch: [5][200/225]Elapsed 19.37s | Loss: 0.4392


----------------------------------------------------------------------------------------------------
Epoch 5 - Average Train Loss: 0.2519 | Average Valid Loss: 0.4339 | Time: 113.78s


Train:   0%|          | 0/622 [00:00<?, ?batch/s]

Epoch: [6][0/622]Elapsed 0.10s | Loss: 0.2589 Grad: 207724.1719 LR: 7.2409e-05
Epoch: [6][50/622]Elapsed 7.46s | Loss: 0.2311 Grad: 55201.3047 LR: 7.1358e-05
Epoch: [6][100/622]Elapsed 14.93s | Loss: 0.2254 Grad: 71398.8203 LR: 7.0296e-05
Epoch: [6][150/622]Elapsed 22.38s | Loss: 0.2302 Grad: 35944.9766 LR: 6.9222e-05
Epoch: [6][200/622]Elapsed 29.81s | Loss: 0.2326 Grad: 45300.2383 LR: 6.8138e-05
Epoch: [6][250/622]Elapsed 37.24s | Loss: 0.2321 Grad: 31103.1289 LR: 6.7044e-05
Epoch: [6][300/622]Elapsed 44.65s | Loss: 0.2302 Grad: 37393.8633 LR: 6.5940e-05
Epoch: [6][350/622]Elapsed 52.06s | Loss: 0.2294 Grad: 54497.0430 LR: 6.4828e-05
Epoch: [6][400/622]Elapsed 59.47s | Loss: 0.2252 Grad: 29435.9180 LR: 6.3708e-05
Epoch: [6][450/622]Elapsed 66.92s | Loss: 0.2234 Grad: 28272.6660 LR: 6.2581e-05
Epoch: [6][500/622]Elapsed 74.31s | Loss: 0.2224 Grad: 17294.7363 LR: 6.1446e-05
Epoch: [6][550/622]Elapsed 81.72s | Loss: 0.2224 Grad: 34787.5039 LR: 6.0305e-05
Epoch: [6][600/622]Elapsed 89.11

Valid:   0%|          | 0/225 [00:00<?, ?batch/s]

Epoch: [6][0/225]Elapsed 0.11s | Loss: 0.3583
Epoch: [6][50/225]Elapsed 4.94s | Loss: 0.4570
Epoch: [6][100/225]Elapsed 9.76s | Loss: 0.4636
Epoch: [6][150/225]Elapsed 14.58s | Loss: 0.4613
Epoch: [6][200/225]Elapsed 19.43s | Loss: 0.4561


----------------------------------------------------------------------------------------------------
Epoch 6 - Average Train Loss: 0.2213 | Average Valid Loss: 0.4523 | Time: 114.20s
Fold 0 Valid Loss: (Easy) 0.8624 | (Hard) 0.5922
Elapse: 11.36 min 
- Second Stage 
Use Checkpoint: ENet_b2_xymasking_regularized_fold_0_stage_1.pth


Train:   0%|          | 0/276 [00:00<?, ?batch/s]

Epoch: [1][0/276]Elapsed 0.10s | Loss: 0.5161 Grad: inf LR: 4.0000e-06




Epoch: [1][50/276]Elapsed 7.46s | Loss: 0.4027 Grad: 82342.7500 LR: 9.3614e-06
Epoch: [1][100/276]Elapsed 14.95s | Loss: 0.3818 Grad: 61007.3711 LR: 2.4248e-05
Epoch: [1][150/276]Elapsed 22.41s | Loss: 0.3596 Grad: 36772.1484 LR: 4.5334e-05
Epoch: [1][200/276]Elapsed 29.85s | Loss: 0.3371 Grad: 28614.8027 LR: 6.7909e-05
Epoch: [1][250/276]Elapsed 37.31s | Loss: 0.3191 Grad: 23968.0957 LR: 8.6930e-05
Epoch: [1][275/276]Elapsed 41.09s | Loss: 0.3123 Grad: 34309.7969 LR: 9.3946e-05


Valid:   0%|          | 0/225 [00:00<?, ?batch/s]

Epoch: [1][0/225]Elapsed 0.11s | Loss: 0.4352
Epoch: [1][50/225]Elapsed 4.94s | Loss: 0.4681
Epoch: [1][100/225]Elapsed 9.76s | Loss: 0.4729
Epoch: [1][150/225]Elapsed 14.58s | Loss: 0.4675
Epoch: [1][200/225]Elapsed 19.41s | Loss: 0.4176


----------------------------------------------------------------------------------------------------
Epoch 1 - Average Train Loss: 0.3123 | Average Valid Loss: 0.3989 | Time: 62.99s
Best model found in epoch 1 | valid loss: 0.3989


Train:   0%|          | 0/276 [00:00<?, ?batch/s]

Epoch: [2][0/276]Elapsed 0.11s | Loss: 0.3027 Grad: 112014.0547 LR: 9.3946e-05
Epoch: [2][50/276]Elapsed 7.53s | Loss: 0.2416 Grad: 78912.8359 LR: 9.9978e-05
Epoch: [2][100/276]Elapsed 15.00s | Loss: 0.2316 Grad: 85281.5469 LR: 9.9939e-05
Epoch: [2][150/276]Elapsed 22.47s | Loss: 0.2299 Grad: 56059.5859 LR: 9.9740e-05
Epoch: [2][200/276]Elapsed 29.93s | Loss: 0.2277 Grad: 54994.7656 LR: 9.9403e-05
Epoch: [2][250/276]Elapsed 37.39s | Loss: 0.2244 Grad: 35820.3281 LR: 9.8929e-05
Epoch: [2][275/276]Elapsed 41.19s | Loss: 0.2231 Grad: 65394.6094 LR: 9.8628e-05


Valid:   0%|          | 0/225 [00:00<?, ?batch/s]

Epoch: [2][0/225]Elapsed 0.10s | Loss: 0.3722
Epoch: [2][50/225]Elapsed 4.93s | Loss: 0.4593
Epoch: [2][100/225]Elapsed 9.75s | Loss: 0.4602
Epoch: [2][150/225]Elapsed 14.57s | Loss: 0.4564
Epoch: [2][200/225]Elapsed 19.41s | Loss: 0.4044


----------------------------------------------------------------------------------------------------
Epoch 2 - Average Train Loss: 0.2231 | Average Valid Loss: 0.3851 | Time: 63.09s
Best model found in epoch 2 | valid loss: 0.3851


Train:   0%|          | 0/276 [00:00<?, ?batch/s]

Epoch: [3][0/276]Elapsed 0.10s | Loss: 0.2881 Grad: 137321.2031 LR: 9.8628e-05
Epoch: [3][50/276]Elapsed 7.52s | Loss: 0.2088 Grad: 79847.6562 LR: 9.7948e-05
Epoch: [3][100/276]Elapsed 14.98s | Loss: 0.2032 Grad: 111834.8516 LR: 9.7135e-05
Epoch: [3][150/276]Elapsed 22.46s | Loss: 0.2031 Grad: 47467.9180 LR: 9.6191e-05
Epoch: [3][200/276]Elapsed 29.89s | Loss: 0.2017 Grad: 28996.3086 LR: 9.5119e-05
Epoch: [3][250/276]Elapsed 37.35s | Loss: 0.1993 Grad: 33562.1016 LR: 9.3922e-05
Epoch: [3][275/276]Elapsed 41.12s | Loss: 0.1983 Grad: 71319.6484 LR: 9.3251e-05


Valid:   0%|          | 0/225 [00:00<?, ?batch/s]

Epoch: [3][0/225]Elapsed 0.10s | Loss: 0.3681
Epoch: [3][50/225]Elapsed 4.94s | Loss: 0.4474
Epoch: [3][100/225]Elapsed 9.77s | Loss: 0.4479
Epoch: [3][150/225]Elapsed 14.59s | Loss: 0.4442
Epoch: [3][200/225]Elapsed 19.42s | Loss: 0.3939


----------------------------------------------------------------------------------------------------
Epoch 3 - Average Train Loss: 0.1983 | Average Valid Loss: 0.3749 | Time: 63.02s
Best model found in epoch 3 | valid loss: 0.3749


Train:   0%|          | 0/276 [00:00<?, ?batch/s]

Epoch: [4][0/276]Elapsed 0.10s | Loss: 0.1959 Grad: 104084.6797 LR: 9.3251e-05
Epoch: [4][50/276]Elapsed 7.49s | Loss: 0.1870 Grad: 111419.5078 LR: 9.1870e-05
Epoch: [4][100/276]Elapsed 14.95s | Loss: 0.1841 Grad: 110376.7578 LR: 9.0373e-05
Epoch: [4][150/276]Elapsed 22.42s | Loss: 0.1833 Grad: 98583.4453 LR: 8.8763e-05
Epoch: [4][200/276]Elapsed 29.85s | Loss: 0.1831 Grad: 48400.2656 LR: 8.7047e-05
Epoch: [4][250/276]Elapsed 37.28s | Loss: 0.1813 Grad: 65226.2812 LR: 8.5227e-05
Epoch: [4][275/276]Elapsed 41.06s | Loss: 0.1808 Grad: 108772.2812 LR: 8.4242e-05


Valid:   0%|          | 0/225 [00:00<?, ?batch/s]

Epoch: [4][0/225]Elapsed 0.10s | Loss: 0.3602
Epoch: [4][50/225]Elapsed 4.92s | Loss: 0.4433
Epoch: [4][100/225]Elapsed 9.75s | Loss: 0.4431
Epoch: [4][150/225]Elapsed 14.57s | Loss: 0.4395
Epoch: [4][200/225]Elapsed 19.42s | Loss: 0.3896


----------------------------------------------------------------------------------------------------
Epoch 4 - Average Train Loss: 0.1808 | Average Valid Loss: 0.3707 | Time: 62.99s
Best model found in epoch 4 | valid loss: 0.3707


Train:   0%|          | 0/276 [00:00<?, ?batch/s]

Epoch: [5][0/276]Elapsed 0.10s | Loss: 0.1671 Grad: 73255.0938 LR: 8.4242e-05
Epoch: [5][50/276]Elapsed 7.47s | Loss: 0.1734 Grad: 91843.0469 LR: 8.2275e-05
Epoch: [5][100/276]Elapsed 14.95s | Loss: 0.1689 Grad: 83368.4219 LR: 8.0220e-05
Epoch: [5][150/276]Elapsed 22.40s | Loss: 0.1696 Grad: 68683.5938 LR: 7.8080e-05
Epoch: [5][200/276]Elapsed 29.86s | Loss: 0.1695 Grad: 55079.6836 LR: 7.5863e-05
Epoch: [5][250/276]Elapsed 37.29s | Loss: 0.1683 Grad: 64409.8398 LR: 7.3573e-05
Epoch: [5][275/276]Elapsed 41.07s | Loss: 0.1678 Grad: 110484.5078 LR: 7.2357e-05


Valid:   0%|          | 0/225 [00:00<?, ?batch/s]

Epoch: [5][0/225]Elapsed 0.11s | Loss: 0.3750
Epoch: [5][50/225]Elapsed 4.93s | Loss: 0.4535
Epoch: [5][100/225]Elapsed 9.75s | Loss: 0.4511
Epoch: [5][150/225]Elapsed 14.56s | Loss: 0.4467
Epoch: [5][200/225]Elapsed 19.39s | Loss: 0.3964


----------------------------------------------------------------------------------------------------
Epoch 5 - Average Train Loss: 0.1678 | Average Valid Loss: 0.3772 | Time: 62.95s


Train:   0%|          | 0/276 [00:00<?, ?batch/s]

Epoch: [6][0/276]Elapsed 0.10s | Loss: 0.1575 Grad: 69541.6641 LR: 7.2357e-05
Epoch: [6][50/276]Elapsed 7.52s | Loss: 0.1597 Grad: 91453.9609 LR: 6.9971e-05
Epoch: [6][100/276]Elapsed 14.99s | Loss: 0.1590 Grad: 41748.3516 LR: 6.7529e-05
Epoch: [6][150/276]Elapsed 22.49s | Loss: 0.1592 Grad: 44972.4219 LR: 6.5039e-05
Epoch: [6][200/276]Elapsed 29.95s | Loss: 0.1591 Grad: 31805.9805 LR: 6.2507e-05
Epoch: [6][250/276]Elapsed 37.41s | Loss: 0.1569 Grad: 44812.0273 LR: 5.9941e-05
Epoch: [6][275/276]Elapsed 41.20s | Loss: 0.1565 Grad: 53095.1875 LR: 5.8595e-05


Valid:   0%|          | 0/225 [00:00<?, ?batch/s]

Epoch: [6][0/225]Elapsed 0.10s | Loss: 0.3457
Epoch: [6][50/225]Elapsed 4.94s | Loss: 0.4483
Epoch: [6][100/225]Elapsed 9.78s | Loss: 0.4460
Epoch: [6][150/225]Elapsed 14.61s | Loss: 0.4413
Epoch: [6][200/225]Elapsed 19.45s | Loss: 0.3915


----------------------------------------------------------------------------------------------------
Epoch 6 - Average Train Loss: 0.1565 | Average Valid Loss: 0.3728 | Time: 63.15s
Fold 0 Valid Loss: (Easy) 0.8315 | (Hard) 0.3958
Elapse: 6.31 min 
Fold: 1 || Valid size 3595 
- First Stage 


Train:   0%|          | 0/622 [00:00<?, ?batch/s]

Epoch: [1][0/622]Elapsed 0.11s | Loss: 0.8383 Grad: 79158.1094 LR: 4.0000e-06
Epoch: [1][50/622]Elapsed 7.45s | Loss: 0.8259 Grad: 52531.4102 LR: 5.0647e-06
Epoch: [1][100/622]Elapsed 14.90s | Loss: 0.8205 Grad: 75132.8984 LR: 8.2116e-06
Epoch: [1][150/622]Elapsed 22.34s | Loss: 0.8141 Grad: 91490.2188 LR: 1.3301e-05
Epoch: [1][200/622]Elapsed 29.80s | Loss: 0.8067 Grad: 83789.4688 LR: 2.0107e-05
Epoch: [1][250/622]Elapsed 37.25s | Loss: 0.7940 Grad: 38833.7344 LR: 2.8328e-05
Epoch: [1][300/622]Elapsed 44.69s | Loss: 0.7817 Grad: 42801.6445 LR: 3.7599e-05
Epoch: [1][350/622]Elapsed 52.13s | Loss: 0.7642 Grad: 53166.4062 LR: 4.7509e-05
Epoch: [1][400/622]Elapsed 59.57s | Loss: 0.7456 Grad: 90187.9297 LR: 5.7619e-05
Epoch: [1][450/622]Elapsed 66.97s | Loss: 0.7261 Grad: 50625.6367 LR: 6.7479e-05
Epoch: [1][500/622]Elapsed 74.36s | Loss: 0.7091 Grad: 44992.9023 LR: 7.6652e-05
Epoch: [1][550/622]Elapsed 81.78s | Loss: 0.6914 Grad: 79865.7969 LR: 8.4732e-05
Epoch: [1][600/622]Elapsed 89.19s

Valid:   0%|          | 0/225 [00:00<?, ?batch/s]

Epoch: [1][0/225]Elapsed 0.10s | Loss: 0.4972
Epoch: [1][50/225]Elapsed 4.93s | Loss: 0.5204
Epoch: [1][100/225]Elapsed 9.74s | Loss: 0.5437
Epoch: [1][150/225]Elapsed 14.56s | Loss: 0.5485
Epoch: [1][200/225]Elapsed 19.38s | Loss: 0.5030


----------------------------------------------------------------------------------------------------
Epoch 1 - Average Train Loss: 0.6686 | Average Valid Loss: 0.4872 | Time: 114.23s
Best model found in epoch 1 | valid loss: 0.4872


Train:   0%|          | 0/622 [00:00<?, ?batch/s]

Epoch: [2][0/622]Elapsed 0.11s | Loss: 0.4686 Grad: 202639.8125 LR: 9.3737e-05
Epoch: [2][50/622]Elapsed 7.47s | Loss: 0.4499 Grad: 79498.2188 LR: 9.7777e-05
Epoch: [2][100/622]Elapsed 14.94s | Loss: 0.4353 Grad: 55546.8164 LR: 9.9786e-05
Epoch: [2][150/622]Elapsed 22.40s | Loss: 0.4355 Grad: 56933.4414 LR: 9.9996e-05
Epoch: [2][200/622]Elapsed 29.81s | Loss: 0.4309 Grad: 74504.2891 LR: 9.9967e-05
Epoch: [2][250/622]Elapsed 37.19s | Loss: 0.4251 Grad: 74611.7656 LR: 9.9911e-05
Epoch: [2][300/622]Elapsed 44.58s | Loss: 0.4247 Grad: 100883.1484 LR: 9.9828e-05
Epoch: [2][350/622]Elapsed 51.98s | Loss: 0.4208 Grad: 60962.7188 LR: 9.9717e-05
Epoch: [2][400/622]Elapsed 59.37s | Loss: 0.4172 Grad: 72757.6562 LR: 9.9579e-05
Epoch: [2][450/622]Elapsed 66.76s | Loss: 0.4128 Grad: 52201.3477 LR: 9.9415e-05
Epoch: [2][500/622]Elapsed 74.13s | Loss: 0.4087 Grad: 113845.2422 LR: 9.9223e-05
Epoch: [2][550/622]Elapsed 81.50s | Loss: 0.4059 Grad: 33698.1133 LR: 9.9004e-05
Epoch: [2][600/622]Elapsed 88.

Valid:   0%|          | 0/225 [00:00<?, ?batch/s]

Epoch: [2][0/225]Elapsed 0.10s | Loss: 0.5055
Epoch: [2][50/225]Elapsed 4.94s | Loss: 0.4528
Epoch: [2][100/225]Elapsed 9.79s | Loss: 0.4676
Epoch: [2][150/225]Elapsed 14.60s | Loss: 0.4720
Epoch: [2][200/225]Elapsed 19.42s | Loss: 0.4459


----------------------------------------------------------------------------------------------------
Epoch 2 - Average Train Loss: 0.4022 | Average Valid Loss: 0.4369 | Time: 113.94s
Best model found in epoch 2 | valid loss: 0.4369


Train:   0%|          | 0/622 [00:00<?, ?batch/s]

Epoch: [3][0/622]Elapsed 0.10s | Loss: 0.4023 Grad: 112001.5469 LR: 9.8642e-05
Epoch: [3][50/622]Elapsed 7.44s | Loss: 0.3363 Grad: 59236.0352 LR: 9.8358e-05
Epoch: [3][100/622]Elapsed 14.88s | Loss: 0.3296 Grad: 39492.2305 LR: 9.8048e-05
Epoch: [3][150/622]Elapsed 22.32s | Loss: 0.3371 Grad: 83009.4766 LR: 9.7711e-05
Epoch: [3][200/622]Elapsed 29.76s | Loss: 0.3366 Grad: 104183.4922 LR: 9.7349e-05
Epoch: [3][250/622]Elapsed 37.22s | Loss: 0.3364 Grad: 93108.0391 LR: 9.6960e-05
Epoch: [3][300/622]Elapsed 44.64s | Loss: 0.3378 Grad: 43324.7266 LR: 9.6546e-05
Epoch: [3][350/622]Elapsed 52.07s | Loss: 0.3377 Grad: 17566.2656 LR: 9.6106e-05
Epoch: [3][400/622]Elapsed 59.47s | Loss: 0.3348 Grad: 56850.0547 LR: 9.5642e-05
Epoch: [3][450/622]Elapsed 66.86s | Loss: 0.3318 Grad: 46391.8984 LR: 9.5152e-05
Epoch: [3][500/622]Elapsed 74.24s | Loss: 0.3298 Grad: 66163.0859 LR: 9.4638e-05
Epoch: [3][550/622]Elapsed 81.66s | Loss: 0.3294 Grad: 21924.1250 LR: 9.4099e-05
Epoch: [3][600/622]Elapsed 89.0

Valid:   0%|          | 0/225 [00:00<?, ?batch/s]

Epoch: [3][0/225]Elapsed 0.11s | Loss: 0.4903
Epoch: [3][50/225]Elapsed 4.93s | Loss: 0.4474
Epoch: [3][100/225]Elapsed 9.75s | Loss: 0.4559
Epoch: [3][150/225]Elapsed 14.58s | Loss: 0.4572
Epoch: [3][200/225]Elapsed 19.40s | Loss: 0.4375


----------------------------------------------------------------------------------------------------
Epoch 3 - Average Train Loss: 0.3278 | Average Valid Loss: 0.4310 | Time: 114.08s
Best model found in epoch 3 | valid loss: 0.4310


Train:   0%|          | 0/622 [00:00<?, ?batch/s]

Epoch: [4][0/622]Elapsed 0.10s | Loss: 0.3508 Grad: inf LR: 9.3281e-05
Epoch: [4][50/622]Elapsed 7.46s | Loss: 0.2873 Grad: 83328.3047 LR: 9.2683e-05
Epoch: [4][100/622]Elapsed 14.91s | Loss: 0.2890 Grad: 43193.2109 LR: 9.2063e-05
Epoch: [4][150/622]Elapsed 22.35s | Loss: 0.2969 Grad: 74651.0938 LR: 9.1420e-05
Epoch: [4][200/622]Elapsed 29.78s | Loss: 0.2953 Grad: 96063.3359 LR: 9.0754e-05
Epoch: [4][250/622]Elapsed 37.20s | Loss: 0.2935 Grad: 39696.0234 LR: 9.0065e-05
Epoch: [4][300/622]Elapsed 44.64s | Loss: 0.2946 Grad: 55424.0312 LR: 8.9355e-05
Epoch: [4][350/622]Elapsed 52.06s | Loss: 0.2939 Grad: 23965.0332 LR: 8.8624e-05
Epoch: [4][400/622]Elapsed 59.46s | Loss: 0.2914 Grad: 64090.7227 LR: 8.7871e-05
Epoch: [4][450/622]Elapsed 66.86s | Loss: 0.2894 Grad: 40600.9492 LR: 8.7097e-05
Epoch: [4][500/622]Elapsed 74.26s | Loss: 0.2883 Grad: 59428.5469 LR: 8.6303e-05
Epoch: [4][550/622]Elapsed 81.65s | Loss: 0.2872 Grad: 17141.1797 LR: 8.5490e-05
Epoch: [4][600/622]Elapsed 89.02s | Loss

Valid:   0%|          | 0/225 [00:00<?, ?batch/s]

Epoch: [4][0/225]Elapsed 0.10s | Loss: 0.5709
Epoch: [4][50/225]Elapsed 4.95s | Loss: 0.4575
Epoch: [4][100/225]Elapsed 9.77s | Loss: 0.4648
Epoch: [4][150/225]Elapsed 14.59s | Loss: 0.4651
Epoch: [4][200/225]Elapsed 19.41s | Loss: 0.4407


----------------------------------------------------------------------------------------------------
Epoch 4 - Average Train Loss: 0.2849 | Average Valid Loss: 0.4349 | Time: 114.11s


Train:   0%|          | 0/622 [00:00<?, ?batch/s]

Epoch: [5][0/622]Elapsed 0.10s | Loss: 0.4192 Grad: nan LR: 8.4284e-05
Epoch: [5][50/622]Elapsed 7.47s | Loss: 0.2582 Grad: 65800.6797 LR: 8.3424e-05
Epoch: [5][100/622]Elapsed 14.93s | Loss: 0.2594 Grad: 14772.6123 LR: 8.2546e-05
Epoch: [5][150/622]Elapsed 22.37s | Loss: 0.2628 Grad: 43556.4648 LR: 8.1650e-05
Epoch: [5][200/622]Elapsed 29.82s | Loss: 0.2616 Grad: 62910.5547 LR: 8.0736e-05
Epoch: [5][250/622]Elapsed 37.25s | Loss: 0.2595 Grad: 29560.4121 LR: 7.9806e-05
Epoch: [5][300/622]Elapsed 44.69s | Loss: 0.2594 Grad: 54934.1875 LR: 7.8859e-05
Epoch: [5][350/622]Elapsed 52.12s | Loss: 0.2602 Grad: 18609.8359 LR: 7.7897e-05
Epoch: [5][400/622]Elapsed 59.56s | Loss: 0.2587 Grad: 32712.0645 LR: 7.6920e-05
Epoch: [5][450/622]Elapsed 67.00s | Loss: 0.2574 Grad: 39968.8477 LR: 7.5927e-05
Epoch: [5][500/622]Elapsed 74.43s | Loss: 0.2559 Grad: 90518.7734 LR: 7.4921e-05
Epoch: [5][550/622]Elapsed 81.85s | Loss: 0.2555 Grad: 33688.4727 LR: 7.3901e-05
Epoch: [5][600/622]Elapsed 89.25s | Loss

Valid:   0%|          | 0/225 [00:00<?, ?batch/s]

Epoch: [5][0/225]Elapsed 0.10s | Loss: 0.5340
Epoch: [5][50/225]Elapsed 4.93s | Loss: 0.4493
Epoch: [5][100/225]Elapsed 9.76s | Loss: 0.4525
Epoch: [5][150/225]Elapsed 14.58s | Loss: 0.4525
Epoch: [5][200/225]Elapsed 19.42s | Loss: 0.4315


----------------------------------------------------------------------------------------------------
Epoch 5 - Average Train Loss: 0.2532 | Average Valid Loss: 0.4262 | Time: 114.32s
Best model found in epoch 5 | valid loss: 0.4262


Train:   0%|          | 0/622 [00:00<?, ?batch/s]

Epoch: [6][0/622]Elapsed 0.10s | Loss: 0.3622 Grad: inf LR: 7.2409e-05
Epoch: [6][50/622]Elapsed 7.49s | Loss: 0.2340 Grad: 48858.1523 LR: 7.1358e-05
Epoch: [6][100/622]Elapsed 14.95s | Loss: 0.2282 Grad: 16625.6523 LR: 7.0296e-05
Epoch: [6][150/622]Elapsed 22.42s | Loss: 0.2315 Grad: 35127.7773 LR: 6.9222e-05
Epoch: [6][200/622]Elapsed 29.87s | Loss: 0.2307 Grad: 44569.7500 LR: 6.8138e-05
Epoch: [6][250/622]Elapsed 37.30s | Loss: 0.2294 Grad: 59300.6758 LR: 6.7044e-05
Epoch: [6][300/622]Elapsed 44.71s | Loss: 0.2284 Grad: 64844.1484 LR: 6.5940e-05
Epoch: [6][350/622]Elapsed 52.11s | Loss: 0.2290 Grad: 35126.9102 LR: 6.4828e-05
Epoch: [6][400/622]Elapsed 59.51s | Loss: 0.2279 Grad: 58181.5078 LR: 6.3708e-05
Epoch: [6][450/622]Elapsed 66.93s | Loss: 0.2273 Grad: 34408.4219 LR: 6.2581e-05
Epoch: [6][500/622]Elapsed 74.34s | Loss: 0.2263 Grad: 61449.5352 LR: 6.1446e-05
Epoch: [6][550/622]Elapsed 81.74s | Loss: 0.2258 Grad: 38461.6953 LR: 6.0305e-05
Epoch: [6][600/622]Elapsed 89.12s | Loss

Valid:   0%|          | 0/225 [00:00<?, ?batch/s]

Epoch: [6][0/225]Elapsed 0.10s | Loss: 0.5375
Epoch: [6][50/225]Elapsed 4.92s | Loss: 0.4631
Epoch: [6][100/225]Elapsed 9.74s | Loss: 0.4626
Epoch: [6][150/225]Elapsed 14.57s | Loss: 0.4639
Epoch: [6][200/225]Elapsed 19.41s | Loss: 0.4446


----------------------------------------------------------------------------------------------------
Epoch 6 - Average Train Loss: 0.2248 | Average Valid Loss: 0.4417 | Time: 114.18s
Fold 1 Valid Loss: (Easy) 0.8300 | (Hard) 0.6859
Elapse: 11.42 min 
- Second Stage 
Use Checkpoint: ENet_b2_xymasking_regularized_fold_1_stage_1.pth


Train:   0%|          | 0/276 [00:00<?, ?batch/s]

Epoch: [1][0/276]Elapsed 0.11s | Loss: 0.4016 Grad: nan LR: 4.0000e-06




Epoch: [1][50/276]Elapsed 7.44s | Loss: 0.4054 Grad: 75051.7969 LR: 9.3614e-06
Epoch: [1][100/276]Elapsed 14.91s | Loss: 0.3800 Grad: 63275.6602 LR: 2.4248e-05
Epoch: [1][150/276]Elapsed 22.41s | Loss: 0.3506 Grad: 36675.5898 LR: 4.5334e-05
Epoch: [1][200/276]Elapsed 29.89s | Loss: 0.3305 Grad: 44761.0703 LR: 6.7909e-05
Epoch: [1][250/276]Elapsed 37.34s | Loss: 0.3132 Grad: 26917.4688 LR: 8.6930e-05
Epoch: [1][275/276]Elapsed 41.14s | Loss: 0.3058 Grad: 27793.4375 LR: 9.3946e-05


Valid:   0%|          | 0/225 [00:00<?, ?batch/s]

Epoch: [1][0/225]Elapsed 0.10s | Loss: 0.6141
Epoch: [1][50/225]Elapsed 4.95s | Loss: 0.4883
Epoch: [1][100/225]Elapsed 9.78s | Loss: 0.4932
Epoch: [1][150/225]Elapsed 14.62s | Loss: 0.4931
Epoch: [1][200/225]Elapsed 19.46s | Loss: 0.4327


----------------------------------------------------------------------------------------------------
Epoch 1 - Average Train Loss: 0.3058 | Average Valid Loss: 0.4117 | Time: 63.09s
Best model found in epoch 1 | valid loss: 0.4117


Train:   0%|          | 0/276 [00:00<?, ?batch/s]

Epoch: [2][0/276]Elapsed 0.11s | Loss: 0.2557 Grad: 119306.7266 LR: 9.3946e-05
Epoch: [2][50/276]Elapsed 7.53s | Loss: 0.2340 Grad: 102418.6172 LR: 9.9978e-05
Epoch: [2][100/276]Elapsed 15.04s | Loss: 0.2253 Grad: 50715.6680 LR: 9.9939e-05
Epoch: [2][150/276]Elapsed 22.52s | Loss: 0.2220 Grad: 51788.3984 LR: 9.9740e-05
Epoch: [2][200/276]Elapsed 29.99s | Loss: 0.2210 Grad: 57535.4531 LR: 9.9403e-05
Epoch: [2][250/276]Elapsed 37.42s | Loss: 0.2175 Grad: 51505.0977 LR: 9.8929e-05
Epoch: [2][275/276]Elapsed 41.20s | Loss: 0.2155 Grad: 63634.1016 LR: 9.8628e-05


Valid:   0%|          | 0/225 [00:00<?, ?batch/s]

Epoch: [2][0/225]Elapsed 0.10s | Loss: 0.6251
Epoch: [2][50/225]Elapsed 4.96s | Loss: 0.4676
Epoch: [2][100/225]Elapsed 9.82s | Loss: 0.4776
Epoch: [2][150/225]Elapsed 14.67s | Loss: 0.4802
Epoch: [2][200/225]Elapsed 19.51s | Loss: 0.4200


----------------------------------------------------------------------------------------------------
Epoch 2 - Average Train Loss: 0.2155 | Average Valid Loss: 0.3983 | Time: 63.21s
Best model found in epoch 2 | valid loss: 0.3983


Train:   0%|          | 0/276 [00:00<?, ?batch/s]

Epoch: [3][0/276]Elapsed 0.10s | Loss: 0.2195 Grad: 91370.0078 LR: 9.8628e-05
Epoch: [3][50/276]Elapsed 7.53s | Loss: 0.2017 Grad: 89750.0781 LR: 9.7948e-05
Epoch: [3][100/276]Elapsed 15.00s | Loss: 0.1980 Grad: 73570.4062 LR: 9.7135e-05
Epoch: [3][150/276]Elapsed 22.46s | Loss: 0.1968 Grad: 71318.2266 LR: 9.6191e-05
Epoch: [3][200/276]Elapsed 29.93s | Loss: 0.1975 Grad: 84870.2344 LR: 9.5119e-05
Epoch: [3][250/276]Elapsed 37.39s | Loss: 0.1967 Grad: 101720.3828 LR: 9.3922e-05
Epoch: [3][275/276]Elapsed 41.18s | Loss: 0.1951 Grad: 130811.1875 LR: 9.3251e-05


Valid:   0%|          | 0/225 [00:00<?, ?batch/s]

Epoch: [3][0/225]Elapsed 0.10s | Loss: 0.5689
Epoch: [3][50/225]Elapsed 4.95s | Loss: 0.4566
Epoch: [3][100/225]Elapsed 9.80s | Loss: 0.4710
Epoch: [3][150/225]Elapsed 14.66s | Loss: 0.4734
Epoch: [3][200/225]Elapsed 19.58s | Loss: 0.4142


----------------------------------------------------------------------------------------------------
Epoch 3 - Average Train Loss: 0.1951 | Average Valid Loss: 0.3930 | Time: 63.26s
Best model found in epoch 3 | valid loss: 0.3930


Train:   0%|          | 0/276 [00:00<?, ?batch/s]

Epoch: [4][0/276]Elapsed 0.12s | Loss: 0.2336 Grad: 111647.8438 LR: 9.3251e-05
Epoch: [4][50/276]Elapsed 7.55s | Loss: 0.1892 Grad: 63541.4062 LR: 9.1870e-05
Epoch: [4][100/276]Elapsed 15.06s | Loss: 0.1851 Grad: 85824.4141 LR: 9.0373e-05
Epoch: [4][150/276]Elapsed 22.56s | Loss: 0.1827 Grad: 74715.2344 LR: 8.8763e-05
Epoch: [4][200/276]Elapsed 30.05s | Loss: 0.1821 Grad: 107296.3359 LR: 8.7047e-05
Epoch: [4][250/276]Elapsed 37.53s | Loss: 0.1811 Grad: 54290.9219 LR: 8.5227e-05
Epoch: [4][275/276]Elapsed 41.35s | Loss: 0.1799 Grad: 97321.1562 LR: 8.4242e-05


Valid:   0%|          | 0/225 [00:00<?, ?batch/s]

Epoch: [4][0/225]Elapsed 0.10s | Loss: 0.6014
Epoch: [4][50/225]Elapsed 4.97s | Loss: 0.4622
Epoch: [4][100/225]Elapsed 9.82s | Loss: 0.4778
Epoch: [4][150/225]Elapsed 14.65s | Loss: 0.4813
Epoch: [4][200/225]Elapsed 19.51s | Loss: 0.4204


----------------------------------------------------------------------------------------------------
Epoch 4 - Average Train Loss: 0.1799 | Average Valid Loss: 0.3986 | Time: 63.35s


Train:   0%|          | 0/276 [00:00<?, ?batch/s]

Epoch: [5][0/276]Elapsed 0.10s | Loss: 0.1812 Grad: 107305.5703 LR: 8.4242e-05
Epoch: [5][50/276]Elapsed 7.53s | Loss: 0.1776 Grad: 68681.7266 LR: 8.2275e-05
Epoch: [5][100/276]Elapsed 15.00s | Loss: 0.1716 Grad: 79622.2812 LR: 8.0220e-05
Epoch: [5][150/276]Elapsed 22.48s | Loss: 0.1688 Grad: 95421.9609 LR: 7.8080e-05
Epoch: [5][200/276]Elapsed 30.06s | Loss: 0.1692 Grad: 84722.3047 LR: 7.5863e-05
Epoch: [5][250/276]Elapsed 37.64s | Loss: 0.1680 Grad: 73421.9844 LR: 7.3573e-05
Epoch: [5][275/276]Elapsed 41.44s | Loss: 0.1665 Grad: 89013.8594 LR: 7.2357e-05


Valid:   0%|          | 0/225 [00:00<?, ?batch/s]

Epoch: [5][0/225]Elapsed 0.11s | Loss: 0.5792
Epoch: [5][50/225]Elapsed 5.40s | Loss: 0.4537
Epoch: [5][100/225]Elapsed 10.65s | Loss: 0.4714
Epoch: [5][150/225]Elapsed 15.45s | Loss: 0.4764
Epoch: [5][200/225]Elapsed 20.26s | Loss: 0.4166


----------------------------------------------------------------------------------------------------
Epoch 5 - Average Train Loss: 0.1665 | Average Valid Loss: 0.3955 | Time: 64.17s


Train:   0%|          | 0/276 [00:00<?, ?batch/s]

Epoch: [6][0/276]Elapsed 0.10s | Loss: 0.1850 Grad: 98850.4062 LR: 7.2357e-05
Epoch: [6][50/276]Elapsed 7.53s | Loss: 0.1598 Grad: 82490.7188 LR: 6.9971e-05
Epoch: [6][100/276]Elapsed 14.98s | Loss: 0.1552 Grad: 43016.8555 LR: 6.7529e-05
Epoch: [6][150/276]Elapsed 22.42s | Loss: 0.1541 Grad: 34888.7578 LR: 6.5039e-05
Epoch: [6][200/276]Elapsed 29.87s | Loss: 0.1544 Grad: 46137.1328 LR: 6.2507e-05
Epoch: [6][250/276]Elapsed 37.31s | Loss: 0.1543 Grad: 31722.1035 LR: 5.9941e-05
Epoch: [6][275/276]Elapsed 41.09s | Loss: 0.1537 Grad: 41027.7930 LR: 5.8595e-05


Valid:   0%|          | 0/225 [00:00<?, ?batch/s]

Epoch: [6][0/225]Elapsed 0.11s | Loss: 0.5685
Epoch: [6][50/225]Elapsed 4.96s | Loss: 0.4480
Epoch: [6][100/225]Elapsed 9.80s | Loss: 0.4655
Epoch: [6][150/225]Elapsed 14.65s | Loss: 0.4697
Epoch: [6][200/225]Elapsed 19.51s | Loss: 0.4116


----------------------------------------------------------------------------------------------------
Epoch 6 - Average Train Loss: 0.1537 | Average Valid Loss: 0.3911 | Time: 63.10s
Best model found in epoch 6 | valid loss: 0.3911
Fold 1 Valid Loss: (Easy) 0.8823 | (Hard) 0.3985
Elapse: 6.34 min 
Fold: 2 || Valid size 3595 
- First Stage 


Train:   0%|          | 0/622 [00:00<?, ?batch/s]

Epoch: [1][0/622]Elapsed 0.11s | Loss: 0.8128 Grad: 66046.0391 LR: 4.0000e-06
Epoch: [1][50/622]Elapsed 7.69s | Loss: 0.8317 Grad: 68226.0156 LR: 5.0647e-06
Epoch: [1][100/622]Elapsed 15.28s | Loss: 0.8266 Grad: 72304.0938 LR: 8.2116e-06
Epoch: [1][150/622]Elapsed 22.88s | Loss: 0.8195 Grad: 74520.5859 LR: 1.3301e-05
Epoch: [1][200/622]Elapsed 30.51s | Loss: 0.8125 Grad: 97709.8906 LR: 2.0107e-05
Epoch: [1][250/622]Elapsed 38.14s | Loss: 0.8016 Grad: 69091.5078 LR: 2.8328e-05
Epoch: [1][300/622]Elapsed 45.61s | Loss: 0.7896 Grad: 92098.1719 LR: 3.7599e-05
Epoch: [1][350/622]Elapsed 52.99s | Loss: 0.7742 Grad: 51662.9688 LR: 4.7509e-05
Epoch: [1][400/622]Elapsed 60.38s | Loss: 0.7577 Grad: 27606.9473 LR: 5.7619e-05
Epoch: [1][450/622]Elapsed 67.76s | Loss: 0.7411 Grad: 32942.7109 LR: 6.7479e-05
Epoch: [1][500/622]Elapsed 75.14s | Loss: 0.7249 Grad: 29058.8027 LR: 7.6652e-05
Epoch: [1][550/622]Elapsed 82.53s | Loss: 0.7092 Grad: 39712.4414 LR: 8.4732e-05
Epoch: [1][600/622]Elapsed 89.89s

Valid:   0%|          | 0/225 [00:00<?, ?batch/s]

Epoch: [1][0/225]Elapsed 0.11s | Loss: 0.5695
Epoch: [1][50/225]Elapsed 5.19s | Loss: 0.4911
Epoch: [1][100/225]Elapsed 10.03s | Loss: 0.4845
Epoch: [1][150/225]Elapsed 14.86s | Loss: 0.4911
Epoch: [1][200/225]Elapsed 19.70s | Loss: 0.4515


----------------------------------------------------------------------------------------------------
Epoch 1 - Average Train Loss: 0.6860 | Average Valid Loss: 0.4358 | Time: 115.26s
Best model found in epoch 1 | valid loss: 0.4358


Train:   0%|          | 0/622 [00:00<?, ?batch/s]

Epoch: [2][0/622]Elapsed 0.10s | Loss: 0.4188 Grad: 73430.1016 LR: 9.3737e-05
Epoch: [2][50/622]Elapsed 7.42s | Loss: 0.4564 Grad: 46988.2891 LR: 9.7777e-05
Epoch: [2][100/622]Elapsed 14.83s | Loss: 0.4464 Grad: 43813.9453 LR: 9.9786e-05
Epoch: [2][150/622]Elapsed 22.25s | Loss: 0.4491 Grad: 35164.9180 LR: 9.9996e-05
Epoch: [2][200/622]Elapsed 29.68s | Loss: 0.4436 Grad: 31796.0840 LR: 9.9967e-05
Epoch: [2][250/622]Elapsed 37.10s | Loss: 0.4408 Grad: 63739.9844 LR: 9.9911e-05
Epoch: [2][300/622]Elapsed 44.53s | Loss: 0.4379 Grad: 39971.8477 LR: 9.9828e-05
Epoch: [2][350/622]Elapsed 51.99s | Loss: 0.4373 Grad: 24935.0293 LR: 9.9717e-05
Epoch: [2][400/622]Elapsed 59.40s | Loss: 0.4331 Grad: 59055.4062 LR: 9.9579e-05
Epoch: [2][450/622]Elapsed 66.83s | Loss: 0.4277 Grad: 33894.8555 LR: 9.9415e-05
Epoch: [2][500/622]Elapsed 74.26s | Loss: 0.4227 Grad: 34445.3711 LR: 9.9223e-05
Epoch: [2][550/622]Elapsed 81.69s | Loss: 0.4200 Grad: 40410.0469 LR: 9.9004e-05
Epoch: [2][600/622]Elapsed 89.09s

Valid:   0%|          | 0/225 [00:00<?, ?batch/s]

Epoch: [2][0/225]Elapsed 0.10s | Loss: 0.4303
Epoch: [2][50/225]Elapsed 4.94s | Loss: 0.4119
Epoch: [2][100/225]Elapsed 9.76s | Loss: 0.4095
Epoch: [2][150/225]Elapsed 14.59s | Loss: 0.4142
Epoch: [2][200/225]Elapsed 19.43s | Loss: 0.3951


----------------------------------------------------------------------------------------------------
Epoch 2 - Average Train Loss: 0.4154 | Average Valid Loss: 0.3859 | Time: 114.19s
Best model found in epoch 2 | valid loss: 0.3859


Train:   0%|          | 0/622 [00:00<?, ?batch/s]

Epoch: [3][0/622]Elapsed 0.11s | Loss: 0.2881 Grad: 89959.3828 LR: 9.8642e-05
Epoch: [3][50/622]Elapsed 7.52s | Loss: 0.3453 Grad: 70237.3125 LR: 9.8358e-05
Epoch: [3][100/622]Elapsed 15.02s | Loss: 0.3419 Grad: 78598.0469 LR: 9.8048e-05
Epoch: [3][150/622]Elapsed 22.46s | Loss: 0.3437 Grad: 105334.4609 LR: 9.7711e-05
Epoch: [3][200/622]Elapsed 29.92s | Loss: 0.3420 Grad: 68843.6562 LR: 9.7349e-05
Epoch: [3][250/622]Elapsed 37.38s | Loss: 0.3407 Grad: 47342.5703 LR: 9.6960e-05
Epoch: [3][300/622]Elapsed 44.91s | Loss: 0.3415 Grad: 80684.5781 LR: 9.6546e-05
Epoch: [3][350/622]Elapsed 52.46s | Loss: 0.3432 Grad: 65496.7266 LR: 9.6106e-05
Epoch: [3][400/622]Elapsed 59.90s | Loss: 0.3419 Grad: 107429.2500 LR: 9.5642e-05
Epoch: [3][450/622]Elapsed 67.32s | Loss: 0.3387 Grad: 54632.8867 LR: 9.5152e-05
Epoch: [3][500/622]Elapsed 74.76s | Loss: 0.3372 Grad: 79255.6719 LR: 9.4638e-05
Epoch: [3][550/622]Elapsed 82.20s | Loss: 0.3376 Grad: 84995.2969 LR: 9.4099e-05
Epoch: [3][600/622]Elapsed 89.6

Valid:   0%|          | 0/225 [00:00<?, ?batch/s]

Epoch: [3][0/225]Elapsed 0.10s | Loss: 0.3924
Epoch: [3][50/225]Elapsed 4.95s | Loss: 0.4120
Epoch: [3][100/225]Elapsed 9.79s | Loss: 0.4111
Epoch: [3][150/225]Elapsed 14.63s | Loss: 0.4131
Epoch: [3][200/225]Elapsed 19.48s | Loss: 0.3997


----------------------------------------------------------------------------------------------------
Epoch 3 - Average Train Loss: 0.3361 | Average Valid Loss: 0.3932 | Time: 114.81s


Train:   0%|          | 0/622 [00:00<?, ?batch/s]

Epoch: [4][0/622]Elapsed 0.10s | Loss: 0.2561 Grad: 99968.0703 LR: 9.3281e-05
Epoch: [4][50/622]Elapsed 7.54s | Loss: 0.2948 Grad: 74926.5391 LR: 9.2683e-05
Epoch: [4][100/622]Elapsed 15.07s | Loss: 0.2923 Grad: 109156.3672 LR: 9.2063e-05
Epoch: [4][150/622]Elapsed 22.68s | Loss: 0.2967 Grad: 92943.8750 LR: 9.1420e-05
Epoch: [4][200/622]Elapsed 30.19s | Loss: 0.2926 Grad: 62764.7578 LR: 9.0754e-05
Epoch: [4][250/622]Elapsed 37.71s | Loss: 0.2901 Grad: 70058.4062 LR: 9.0065e-05
Epoch: [4][300/622]Elapsed 45.15s | Loss: 0.2886 Grad: 54229.7305 LR: 8.9355e-05
Epoch: [4][350/622]Elapsed 52.58s | Loss: 0.2907 Grad: 56368.3672 LR: 8.8624e-05
Epoch: [4][400/622]Elapsed 60.02s | Loss: 0.2894 Grad: 107289.3672 LR: 8.7871e-05
Epoch: [4][450/622]Elapsed 67.43s | Loss: 0.2862 Grad: 83178.8750 LR: 8.7097e-05
Epoch: [4][500/622]Elapsed 74.87s | Loss: 0.2864 Grad: 53299.7305 LR: 8.6303e-05
Epoch: [4][550/622]Elapsed 82.29s | Loss: 0.2859 Grad: 36402.0430 LR: 8.5490e-05
Epoch: [4][600/622]Elapsed 89.7

Valid:   0%|          | 0/225 [00:00<?, ?batch/s]

Epoch: [4][0/225]Elapsed 0.11s | Loss: 0.3456
Epoch: [4][50/225]Elapsed 4.93s | Loss: 0.3929
Epoch: [4][100/225]Elapsed 9.74s | Loss: 0.3963
Epoch: [4][150/225]Elapsed 14.54s | Loss: 0.4005
Epoch: [4][200/225]Elapsed 19.37s | Loss: 0.3944


----------------------------------------------------------------------------------------------------
Epoch 4 - Average Train Loss: 0.2853 | Average Valid Loss: 0.3906 | Time: 114.73s


Train:   0%|          | 0/622 [00:00<?, ?batch/s]

Epoch: [5][0/622]Elapsed 0.10s | Loss: 0.2441 Grad: 137094.2031 LR: 8.4284e-05
Epoch: [5][50/622]Elapsed 7.49s | Loss: 0.2628 Grad: 56588.2617 LR: 8.3424e-05
Epoch: [5][100/622]Elapsed 14.97s | Loss: 0.2562 Grad: 88446.6406 LR: 8.2546e-05
Epoch: [5][150/622]Elapsed 22.45s | Loss: 0.2627 Grad: 80484.4297 LR: 8.1650e-05
Epoch: [5][200/622]Elapsed 29.93s | Loss: 0.2580 Grad: 72883.6953 LR: 8.0736e-05
Epoch: [5][250/622]Elapsed 37.40s | Loss: 0.2571 Grad: 87915.1328 LR: 7.9806e-05
Epoch: [5][300/622]Elapsed 44.84s | Loss: 0.2568 Grad: 35048.8945 LR: 7.8859e-05
Epoch: [5][350/622]Elapsed 52.27s | Loss: 0.2574 Grad: 31129.7930 LR: 7.7897e-05
Epoch: [5][400/622]Elapsed 59.71s | Loss: 0.2550 Grad: 42705.0352 LR: 7.6920e-05
Epoch: [5][450/622]Elapsed 67.16s | Loss: 0.2531 Grad: 30426.4824 LR: 7.5927e-05
Epoch: [5][500/622]Elapsed 74.58s | Loss: 0.2539 Grad: 34987.6836 LR: 7.4921e-05
Epoch: [5][550/622]Elapsed 82.00s | Loss: 0.2538 Grad: 27747.1406 LR: 7.3901e-05
Epoch: [5][600/622]Elapsed 89.42

Valid:   0%|          | 0/225 [00:00<?, ?batch/s]

Epoch: [5][0/225]Elapsed 0.10s | Loss: 0.3287
Epoch: [5][50/225]Elapsed 4.90s | Loss: 0.4112
Epoch: [5][100/225]Elapsed 9.70s | Loss: 0.4112
Epoch: [5][150/225]Elapsed 14.51s | Loss: 0.4141
Epoch: [5][200/225]Elapsed 19.32s | Loss: 0.4113


----------------------------------------------------------------------------------------------------
Epoch 5 - Average Train Loss: 0.2526 | Average Valid Loss: 0.4077 | Time: 114.40s


Train:   0%|          | 0/622 [00:00<?, ?batch/s]

Epoch: [6][0/622]Elapsed 0.10s | Loss: 0.2220 Grad: inf LR: 7.2409e-05
Epoch: [6][50/622]Elapsed 7.47s | Loss: 0.2278 Grad: 63433.8828 LR: 7.1358e-05
Epoch: [6][100/622]Elapsed 14.93s | Loss: 0.2250 Grad: 114993.3750 LR: 7.0296e-05
Epoch: [6][150/622]Elapsed 22.43s | Loss: 0.2273 Grad: 71642.6406 LR: 6.9222e-05
Epoch: [6][200/622]Elapsed 30.02s | Loss: 0.2279 Grad: 69376.9922 LR: 6.8138e-05
Epoch: [6][250/622]Elapsed 37.49s | Loss: 0.2271 Grad: 62211.8750 LR: 6.7044e-05
Epoch: [6][300/622]Elapsed 44.95s | Loss: 0.2251 Grad: 50644.0195 LR: 6.5940e-05
Epoch: [6][350/622]Elapsed 52.40s | Loss: 0.2264 Grad: 29989.2480 LR: 6.4828e-05
Epoch: [6][400/622]Elapsed 59.90s | Loss: 0.2240 Grad: 70087.8203 LR: 6.3708e-05
Epoch: [6][450/622]Elapsed 67.33s | Loss: 0.2225 Grad: 29710.6445 LR: 6.2581e-05
Epoch: [6][500/622]Elapsed 74.75s | Loss: 0.2233 Grad: 40070.1836 LR: 6.1446e-05
Epoch: [6][550/622]Elapsed 82.16s | Loss: 0.2238 Grad: 42524.0547 LR: 6.0305e-05
Epoch: [6][600/622]Elapsed 89.56s | Los

Valid:   0%|          | 0/225 [00:00<?, ?batch/s]

Epoch: [6][0/225]Elapsed 0.11s | Loss: 0.3399
Epoch: [6][50/225]Elapsed 4.92s | Loss: 0.4031
Epoch: [6][100/225]Elapsed 9.73s | Loss: 0.4057
Epoch: [6][150/225]Elapsed 14.53s | Loss: 0.4107
Epoch: [6][200/225]Elapsed 19.35s | Loss: 0.4112


----------------------------------------------------------------------------------------------------
Epoch 6 - Average Train Loss: 0.2233 | Average Valid Loss: 0.4092 | Time: 114.56s
Fold 2 Valid Loss: (Easy) 0.7717 | (Hard) 0.5921
Elapse: 11.47 min 
- Second Stage 
Use Checkpoint: ENet_b2_xymasking_regularized_fold_2_stage_1.pth


Train:   0%|          | 0/276 [00:00<?, ?batch/s]

Epoch: [1][0/276]Elapsed 0.10s | Loss: 0.4126 Grad: nan LR: 4.0000e-06




Epoch: [1][50/276]Elapsed 7.47s | Loss: 0.4005 Grad: 60329.0078 LR: 9.3614e-06
Epoch: [1][100/276]Elapsed 14.93s | Loss: 0.3758 Grad: 42237.3789 LR: 2.4248e-05
Epoch: [1][150/276]Elapsed 22.39s | Loss: 0.3494 Grad: 37713.8438 LR: 4.5334e-05
Epoch: [1][200/276]Elapsed 29.84s | Loss: 0.3286 Grad: 34128.4844 LR: 6.7909e-05
Epoch: [1][250/276]Elapsed 37.29s | Loss: 0.3109 Grad: 19578.7500 LR: 8.6930e-05
Epoch: [1][275/276]Elapsed 41.06s | Loss: 0.3032 Grad: 42286.1875 LR: 9.3946e-05


Valid:   0%|          | 0/225 [00:00<?, ?batch/s]

Epoch: [1][0/225]Elapsed 0.10s | Loss: 0.3885
Epoch: [1][50/225]Elapsed 4.92s | Loss: 0.4313
Epoch: [1][100/225]Elapsed 9.73s | Loss: 0.4304
Epoch: [1][150/225]Elapsed 14.54s | Loss: 0.4371
Epoch: [1][200/225]Elapsed 19.36s | Loss: 0.3964


----------------------------------------------------------------------------------------------------
Epoch 1 - Average Train Loss: 0.3032 | Average Valid Loss: 0.3799 | Time: 62.89s
Best model found in epoch 1 | valid loss: 0.3799


Train:   0%|          | 0/276 [00:00<?, ?batch/s]

Epoch: [2][0/276]Elapsed 0.10s | Loss: 0.2166 Grad: 103307.0625 LR: 9.3946e-05
Epoch: [2][50/276]Elapsed 7.49s | Loss: 0.2273 Grad: 104654.4609 LR: 9.9978e-05
Epoch: [2][100/276]Elapsed 14.95s | Loss: 0.2237 Grad: 111968.2266 LR: 9.9939e-05
Epoch: [2][150/276]Elapsed 22.40s | Loss: 0.2243 Grad: 127680.3438 LR: 9.9740e-05
Epoch: [2][200/276]Elapsed 29.84s | Loss: 0.2215 Grad: 89686.5781 LR: 9.9403e-05
Epoch: [2][250/276]Elapsed 37.28s | Loss: 0.2181 Grad: 79052.5312 LR: 9.8929e-05
Epoch: [2][275/276]Elapsed 41.06s | Loss: 0.2161 Grad: 96524.5391 LR: 9.8628e-05


Valid:   0%|          | 0/225 [00:00<?, ?batch/s]

Epoch: [2][0/225]Elapsed 0.10s | Loss: 0.4193
Epoch: [2][50/225]Elapsed 4.91s | Loss: 0.4239
Epoch: [2][100/225]Elapsed 9.72s | Loss: 0.4232
Epoch: [2][150/225]Elapsed 14.54s | Loss: 0.4281
Epoch: [2][200/225]Elapsed 19.36s | Loss: 0.3855


----------------------------------------------------------------------------------------------------
Epoch 2 - Average Train Loss: 0.2161 | Average Valid Loss: 0.3680 | Time: 62.90s
Best model found in epoch 2 | valid loss: 0.3680


Train:   0%|          | 0/276 [00:00<?, ?batch/s]

Epoch: [3][0/276]Elapsed 0.10s | Loss: 0.1959 Grad: 92328.6797 LR: 9.8628e-05
Epoch: [3][50/276]Elapsed 7.49s | Loss: 0.2019 Grad: 87683.0703 LR: 9.7948e-05
Epoch: [3][100/276]Elapsed 14.95s | Loss: 0.1976 Grad: 92484.4375 LR: 9.7135e-05
Epoch: [3][150/276]Elapsed 22.39s | Loss: 0.1979 Grad: 77758.9141 LR: 9.6191e-05
Epoch: [3][200/276]Elapsed 29.82s | Loss: 0.1973 Grad: 49590.7148 LR: 9.5119e-05
Epoch: [3][250/276]Elapsed 37.23s | Loss: 0.1951 Grad: 31714.2871 LR: 9.3922e-05
Epoch: [3][275/276]Elapsed 41.00s | Loss: 0.1939 Grad: 62319.5977 LR: 9.3251e-05


Valid:   0%|          | 0/225 [00:00<?, ?batch/s]

Epoch: [3][0/225]Elapsed 0.10s | Loss: 0.4221
Epoch: [3][50/225]Elapsed 4.91s | Loss: 0.4269
Epoch: [3][100/225]Elapsed 9.72s | Loss: 0.4263
Epoch: [3][150/225]Elapsed 14.53s | Loss: 0.4319
Epoch: [3][200/225]Elapsed 19.35s | Loss: 0.3893


----------------------------------------------------------------------------------------------------
Epoch 3 - Average Train Loss: 0.1939 | Average Valid Loss: 0.3714 | Time: 62.83s


Train:   0%|          | 0/276 [00:00<?, ?batch/s]

Epoch: [4][0/276]Elapsed 0.10s | Loss: 0.2098 Grad: 84611.8516 LR: 9.3251e-05
Epoch: [4][50/276]Elapsed 7.48s | Loss: 0.1855 Grad: 76040.2500 LR: 9.1870e-05
Epoch: [4][100/276]Elapsed 14.91s | Loss: 0.1828 Grad: 70294.7344 LR: 9.0373e-05
Epoch: [4][150/276]Elapsed 22.35s | Loss: 0.1827 Grad: 92194.9141 LR: 8.8763e-05
Epoch: [4][200/276]Elapsed 29.78s | Loss: 0.1819 Grad: 68562.7500 LR: 8.7047e-05
Epoch: [4][250/276]Elapsed 37.19s | Loss: 0.1792 Grad: 63279.1641 LR: 8.5227e-05
Epoch: [4][275/276]Elapsed 40.96s | Loss: 0.1779 Grad: 148717.0156 LR: 8.4242e-05


Valid:   0%|          | 0/225 [00:00<?, ?batch/s]

Epoch: [4][0/225]Elapsed 0.10s | Loss: 0.4271
Epoch: [4][50/225]Elapsed 4.91s | Loss: 0.4231
Epoch: [4][100/225]Elapsed 9.72s | Loss: 0.4250
Epoch: [4][150/225]Elapsed 14.53s | Loss: 0.4295
Epoch: [4][200/225]Elapsed 19.37s | Loss: 0.3887


----------------------------------------------------------------------------------------------------
Epoch 4 - Average Train Loss: 0.1779 | Average Valid Loss: 0.3713 | Time: 62.80s


Train:   0%|          | 0/276 [00:00<?, ?batch/s]

Epoch: [5][0/276]Elapsed 0.10s | Loss: 0.1546 Grad: 102110.0234 LR: 8.4242e-05
Epoch: [5][50/276]Elapsed 7.47s | Loss: 0.1687 Grad: 100745.1719 LR: 8.2275e-05
Epoch: [5][100/276]Elapsed 14.92s | Loss: 0.1676 Grad: 89602.1797 LR: 8.0220e-05
Epoch: [5][150/276]Elapsed 22.37s | Loss: 0.1685 Grad: 65928.7812 LR: 7.8080e-05
Epoch: [5][200/276]Elapsed 29.80s | Loss: 0.1677 Grad: 114546.6953 LR: 7.5863e-05
Epoch: [5][250/276]Elapsed 37.23s | Loss: 0.1656 Grad: 67155.1953 LR: 7.3573e-05
Epoch: [5][275/276]Elapsed 41.02s | Loss: 0.1647 Grad: 114515.9531 LR: 7.2357e-05


Valid:   0%|          | 0/225 [00:00<?, ?batch/s]

Epoch: [5][0/225]Elapsed 0.10s | Loss: 0.4144
Epoch: [5][50/225]Elapsed 4.91s | Loss: 0.4120
Epoch: [5][100/225]Elapsed 9.72s | Loss: 0.4143
Epoch: [5][150/225]Elapsed 14.53s | Loss: 0.4194
Epoch: [5][200/225]Elapsed 19.35s | Loss: 0.3790


----------------------------------------------------------------------------------------------------
Epoch 5 - Average Train Loss: 0.1647 | Average Valid Loss: 0.3621 | Time: 62.84s
Best model found in epoch 5 | valid loss: 0.3621


Train:   0%|          | 0/276 [00:00<?, ?batch/s]

Epoch: [6][0/276]Elapsed 0.10s | Loss: 0.1653 Grad: 84360.8672 LR: 7.2357e-05
Epoch: [6][50/276]Elapsed 7.55s | Loss: 0.1584 Grad: 84387.0000 LR: 6.9971e-05
Epoch: [6][100/276]Elapsed 15.05s | Loss: 0.1566 Grad: 90069.8203 LR: 6.7529e-05
Epoch: [6][150/276]Elapsed 22.54s | Loss: 0.1563 Grad: 57023.3867 LR: 6.5039e-05
Epoch: [6][200/276]Elapsed 30.03s | Loss: 0.1553 Grad: 92074.7344 LR: 6.2507e-05
Epoch: [6][250/276]Elapsed 37.51s | Loss: 0.1538 Grad: 74824.9609 LR: 5.9941e-05
Epoch: [6][275/276]Elapsed 41.30s | Loss: 0.1530 Grad: 109172.1094 LR: 5.8595e-05


Valid:   0%|          | 0/225 [00:00<?, ?batch/s]

Epoch: [6][0/225]Elapsed 0.10s | Loss: 0.4185
Epoch: [6][50/225]Elapsed 4.92s | Loss: 0.4109
Epoch: [6][100/225]Elapsed 9.73s | Loss: 0.4115
Epoch: [6][150/225]Elapsed 14.55s | Loss: 0.4173
Epoch: [6][200/225]Elapsed 19.39s | Loss: 0.3788


----------------------------------------------------------------------------------------------------
Epoch 6 - Average Train Loss: 0.1530 | Average Valid Loss: 0.3620 | Time: 63.17s
Best model found in epoch 6 | valid loss: 0.3620
Fold 2 Valid Loss: (Easy) 0.7918 | (Hard) 0.4190
Elapse: 6.30 min 
Fold: 3 || Valid size 3595 
- First Stage 


Train:   0%|          | 0/622 [00:00<?, ?batch/s]

Epoch: [1][0/622]Elapsed 0.11s | Loss: 0.8071 Grad: 56681.9375 LR: 4.0000e-06
Epoch: [1][50/622]Elapsed 7.47s | Loss: 0.8235 Grad: 75427.5547 LR: 5.0647e-06
Epoch: [1][100/622]Elapsed 15.00s | Loss: 0.8151 Grad: 75355.6016 LR: 8.2116e-06
Epoch: [1][150/622]Elapsed 22.53s | Loss: 0.8092 Grad: 68175.9766 LR: 1.3301e-05
Epoch: [1][200/622]Elapsed 30.04s | Loss: 0.8019 Grad: 84742.8828 LR: 2.0107e-05
Epoch: [1][250/622]Elapsed 37.52s | Loss: 0.7911 Grad: 91319.9766 LR: 2.8328e-05
Epoch: [1][300/622]Elapsed 44.99s | Loss: 0.7780 Grad: 86660.9141 LR: 3.7599e-05
Epoch: [1][350/622]Elapsed 52.45s | Loss: 0.7626 Grad: 85013.7266 LR: 4.7509e-05
Epoch: [1][400/622]Elapsed 59.90s | Loss: 0.7463 Grad: 112828.6406 LR: 5.7619e-05
Epoch: [1][450/622]Elapsed 67.34s | Loss: 0.7284 Grad: 161642.7031 LR: 6.7479e-05
Epoch: [1][500/622]Elapsed 74.77s | Loss: 0.7124 Grad: 129863.7812 LR: 7.6652e-05
Epoch: [1][550/622]Elapsed 82.21s | Loss: 0.6967 Grad: 62008.5352 LR: 8.4732e-05
Epoch: [1][600/622]Elapsed 89.

Valid:   0%|          | 0/225 [00:00<?, ?batch/s]

Epoch: [1][0/225]Elapsed 0.10s | Loss: 0.3210
Epoch: [1][50/225]Elapsed 4.90s | Loss: 0.4854
Epoch: [1][100/225]Elapsed 9.70s | Loss: 0.4903
Epoch: [1][150/225]Elapsed 14.50s | Loss: 0.4915
Epoch: [1][200/225]Elapsed 19.29s | Loss: 0.4550


----------------------------------------------------------------------------------------------------
Epoch 1 - Average Train Loss: 0.6743 | Average Valid Loss: 0.4412 | Time: 114.59s
Best model found in epoch 1 | valid loss: 0.4412


Train:   0%|          | 0/622 [00:00<?, ?batch/s]

Epoch: [2][0/622]Elapsed 0.10s | Loss: 0.4925 Grad: 142003.3281 LR: 9.3737e-05
Epoch: [2][50/622]Elapsed 7.53s | Loss: 0.4414 Grad: 108199.6016 LR: 9.7777e-05
Epoch: [2][100/622]Elapsed 15.03s | Loss: 0.4378 Grad: 75036.5547 LR: 9.9786e-05
Epoch: [2][150/622]Elapsed 22.52s | Loss: 0.4425 Grad: 85748.9375 LR: 9.9996e-05
Epoch: [2][200/622]Elapsed 30.00s | Loss: 0.4397 Grad: 117653.7031 LR: 9.9967e-05
Epoch: [2][250/622]Elapsed 37.48s | Loss: 0.4302 Grad: 96392.5469 LR: 9.9911e-05
Epoch: [2][300/622]Elapsed 44.95s | Loss: 0.4277 Grad: 87809.4844 LR: 9.9828e-05
Epoch: [2][350/622]Elapsed 52.40s | Loss: 0.4245 Grad: 80133.6641 LR: 9.9717e-05
Epoch: [2][400/622]Elapsed 59.85s | Loss: 0.4195 Grad: 94259.6953 LR: 9.9579e-05
Epoch: [2][450/622]Elapsed 67.31s | Loss: 0.4137 Grad: 100417.7812 LR: 9.9415e-05
Epoch: [2][500/622]Elapsed 74.75s | Loss: 0.4128 Grad: 43856.7578 LR: 9.9223e-05
Epoch: [2][550/622]Elapsed 82.17s | Loss: 0.4105 Grad: 33022.3555 LR: 9.9004e-05
Epoch: [2][600/622]Elapsed 89

Valid:   0%|          | 0/225 [00:00<?, ?batch/s]

Epoch: [2][0/225]Elapsed 0.11s | Loss: 0.2977
Epoch: [2][50/225]Elapsed 4.90s | Loss: 0.4042
Epoch: [2][100/225]Elapsed 9.70s | Loss: 0.4095
Epoch: [2][150/225]Elapsed 14.50s | Loss: 0.4100
Epoch: [2][200/225]Elapsed 19.30s | Loss: 0.3839


----------------------------------------------------------------------------------------------------
Epoch 2 - Average Train Loss: 0.4070 | Average Valid Loss: 0.3741 | Time: 114.62s
Best model found in epoch 2 | valid loss: 0.3741


Train:   0%|          | 0/622 [00:00<?, ?batch/s]

Epoch: [3][0/622]Elapsed 0.11s | Loss: 0.4241 Grad: inf LR: 9.8642e-05
Epoch: [3][50/622]Elapsed 7.54s | Loss: 0.3365 Grad: 87448.8672 LR: 9.8358e-05
Epoch: [3][100/622]Elapsed 15.04s | Loss: 0.3349 Grad: 59634.7422 LR: 9.8048e-05
Epoch: [3][150/622]Elapsed 22.56s | Loss: 0.3441 Grad: 96537.9297 LR: 9.7711e-05
Epoch: [3][200/622]Elapsed 30.02s | Loss: 0.3409 Grad: 116119.0469 LR: 9.7349e-05
Epoch: [3][250/622]Elapsed 37.48s | Loss: 0.3378 Grad: 94611.9141 LR: 9.6960e-05
Epoch: [3][300/622]Elapsed 44.94s | Loss: 0.3374 Grad: 32601.0527 LR: 9.6546e-05
Epoch: [3][350/622]Elapsed 52.38s | Loss: 0.3354 Grad: 37633.7188 LR: 9.6106e-05
Epoch: [3][400/622]Elapsed 59.80s | Loss: 0.3333 Grad: 37113.2031 LR: 9.5642e-05
Epoch: [3][450/622]Elapsed 67.23s | Loss: 0.3297 Grad: 45700.6289 LR: 9.5152e-05
Epoch: [3][500/622]Elapsed 74.65s | Loss: 0.3299 Grad: 49161.8516 LR: 9.4638e-05
Epoch: [3][550/622]Elapsed 82.07s | Loss: 0.3304 Grad: 32452.8320 LR: 9.4099e-05
Epoch: [3][600/622]Elapsed 89.49s | Los

Valid:   0%|          | 0/225 [00:00<?, ?batch/s]

Epoch: [3][0/225]Elapsed 0.10s | Loss: 0.2881
Epoch: [3][50/225]Elapsed 4.90s | Loss: 0.3992
Epoch: [3][100/225]Elapsed 9.69s | Loss: 0.3977
Epoch: [3][150/225]Elapsed 14.50s | Loss: 0.3999
Epoch: [3][200/225]Elapsed 19.31s | Loss: 0.3793


----------------------------------------------------------------------------------------------------
Epoch 3 - Average Train Loss: 0.3292 | Average Valid Loss: 0.3722 | Time: 114.45s
Best model found in epoch 3 | valid loss: 0.3722


Train:   0%|          | 0/622 [00:00<?, ?batch/s]

Epoch: [4][0/622]Elapsed 0.10s | Loss: 0.3340 Grad: 144102.5156 LR: 9.3281e-05
Epoch: [4][50/622]Elapsed 7.51s | Loss: 0.2840 Grad: 92824.4922 LR: 9.2683e-05
Epoch: [4][100/622]Elapsed 14.99s | Loss: 0.2840 Grad: 94238.6406 LR: 9.2063e-05
Epoch: [4][150/622]Elapsed 22.47s | Loss: 0.2937 Grad: 99510.3750 LR: 9.1420e-05
Epoch: [4][200/622]Elapsed 29.94s | Loss: 0.2918 Grad: 98769.4219 LR: 9.0754e-05
Epoch: [4][250/622]Elapsed 37.40s | Loss: 0.2891 Grad: 36087.4648 LR: 9.0065e-05
Epoch: [4][300/622]Elapsed 44.88s | Loss: 0.2908 Grad: 55202.4961 LR: 8.9355e-05
Epoch: [4][350/622]Elapsed 52.32s | Loss: 0.2891 Grad: 20939.6992 LR: 8.8624e-05
Epoch: [4][400/622]Elapsed 59.76s | Loss: 0.2870 Grad: 40792.0625 LR: 8.7871e-05
Epoch: [4][450/622]Elapsed 67.17s | Loss: 0.2832 Grad: 51698.8203 LR: 8.7097e-05
Epoch: [4][500/622]Elapsed 74.59s | Loss: 0.2836 Grad: 39727.1719 LR: 8.6303e-05
Epoch: [4][550/622]Elapsed 82.03s | Loss: 0.2848 Grad: 34145.7344 LR: 8.5490e-05
Epoch: [4][600/622]Elapsed 89.44

Valid:   0%|          | 0/225 [00:00<?, ?batch/s]

Epoch: [4][0/225]Elapsed 0.11s | Loss: 0.3168
Epoch: [4][50/225]Elapsed 4.91s | Loss: 0.3872
Epoch: [4][100/225]Elapsed 9.71s | Loss: 0.3871
Epoch: [4][150/225]Elapsed 14.51s | Loss: 0.3891
Epoch: [4][200/225]Elapsed 19.32s | Loss: 0.3756


----------------------------------------------------------------------------------------------------
Epoch 4 - Average Train Loss: 0.2845 | Average Valid Loss: 0.3711 | Time: 114.43s
Best model found in epoch 4 | valid loss: 0.3711


Train:   0%|          | 0/622 [00:00<?, ?batch/s]

Epoch: [5][0/622]Elapsed 0.10s | Loss: 0.3361 Grad: 170578.5469 LR: 8.4284e-05
Epoch: [5][50/622]Elapsed 7.55s | Loss: 0.2494 Grad: 92229.4844 LR: 8.3424e-05
Epoch: [5][100/622]Elapsed 15.03s | Loss: 0.2550 Grad: 39197.8164 LR: 8.2546e-05
Epoch: [5][150/622]Elapsed 22.63s | Loss: 0.2636 Grad: 59729.1719 LR: 8.1650e-05
Epoch: [5][200/622]Elapsed 30.09s | Loss: 0.2621 Grad: 98212.9297 LR: 8.0736e-05
Epoch: [5][250/622]Elapsed 37.54s | Loss: 0.2598 Grad: 51410.2773 LR: 7.9806e-05
Epoch: [5][300/622]Elapsed 44.99s | Loss: 0.2601 Grad: 46727.7695 LR: 7.8859e-05
Epoch: [5][350/622]Elapsed 52.44s | Loss: 0.2581 Grad: 39333.8555 LR: 7.7897e-05
Epoch: [5][400/622]Elapsed 59.88s | Loss: 0.2554 Grad: 58909.5586 LR: 7.6920e-05
Epoch: [5][450/622]Elapsed 67.29s | Loss: 0.2518 Grad: 40708.9336 LR: 7.5927e-05
Epoch: [5][500/622]Elapsed 74.70s | Loss: 0.2515 Grad: 47923.7891 LR: 7.4921e-05
Epoch: [5][550/622]Elapsed 82.12s | Loss: 0.2510 Grad: 32817.4453 LR: 7.3901e-05
Epoch: [5][600/622]Elapsed 89.53

Valid:   0%|          | 0/225 [00:00<?, ?batch/s]

Epoch: [5][0/225]Elapsed 0.10s | Loss: 0.3108
Epoch: [5][50/225]Elapsed 4.88s | Loss: 0.3949
Epoch: [5][100/225]Elapsed 9.65s | Loss: 0.3942
Epoch: [5][150/225]Elapsed 14.43s | Loss: 0.3979
Epoch: [5][200/225]Elapsed 19.21s | Loss: 0.3873


----------------------------------------------------------------------------------------------------
Epoch 5 - Average Train Loss: 0.2515 | Average Valid Loss: 0.3836 | Time: 114.39s


Train:   0%|          | 0/622 [00:00<?, ?batch/s]

Epoch: [6][0/622]Elapsed 0.10s | Loss: 0.3335 Grad: nan LR: 7.2409e-05
Epoch: [6][50/622]Elapsed 7.51s | Loss: 0.2295 Grad: 65706.7891 LR: 7.1358e-05
Epoch: [6][100/622]Elapsed 15.01s | Loss: 0.2261 Grad: 39899.6328 LR: 7.0296e-05
Epoch: [6][150/622]Elapsed 22.48s | Loss: 0.2332 Grad: 89253.2500 LR: 6.9222e-05
Epoch: [6][200/622]Elapsed 29.96s | Loss: 0.2304 Grad: 57811.9492 LR: 6.8138e-05
Epoch: [6][250/622]Elapsed 37.42s | Loss: 0.2283 Grad: 37294.6016 LR: 6.7044e-05
Epoch: [6][300/622]Elapsed 44.87s | Loss: 0.2283 Grad: 31144.8770 LR: 6.5940e-05
Epoch: [6][350/622]Elapsed 52.32s | Loss: 0.2264 Grad: 22796.5508 LR: 6.4828e-05
Epoch: [6][400/622]Elapsed 59.77s | Loss: 0.2248 Grad: 25940.8809 LR: 6.3708e-05
Epoch: [6][450/622]Elapsed 67.20s | Loss: 0.2225 Grad: 27759.6777 LR: 6.2581e-05
Epoch: [6][500/622]Elapsed 74.63s | Loss: 0.2229 Grad: 29006.5645 LR: 6.1446e-05
Epoch: [6][550/622]Elapsed 82.04s | Loss: 0.2222 Grad: 22640.0645 LR: 6.0305e-05
Epoch: [6][600/622]Elapsed 89.44s | Loss

Valid:   0%|          | 0/225 [00:00<?, ?batch/s]

Epoch: [6][0/225]Elapsed 0.10s | Loss: 0.3335
Epoch: [6][50/225]Elapsed 4.88s | Loss: 0.4057
Epoch: [6][100/225]Elapsed 9.66s | Loss: 0.4057
Epoch: [6][150/225]Elapsed 14.44s | Loss: 0.4074
Epoch: [6][200/225]Elapsed 19.22s | Loss: 0.3996


----------------------------------------------------------------------------------------------------
Epoch 6 - Average Train Loss: 0.2224 | Average Valid Loss: 0.3971 | Time: 114.31s
Fold 3 Valid Loss: (Easy) 0.7104 | (Hard) 0.6014
Elapse: 11.46 min 
- Second Stage 
Use Checkpoint: ENet_b2_xymasking_regularized_fold_3_stage_1.pth


Train:   0%|          | 0/276 [00:00<?, ?batch/s]

Epoch: [1][0/276]Elapsed 0.10s | Loss: 0.4914 Grad: nan LR: 4.0000e-06




Epoch: [1][50/276]Elapsed 7.46s | Loss: 0.4076 Grad: 52081.0625 LR: 9.3614e-06
Epoch: [1][100/276]Elapsed 14.95s | Loss: 0.3746 Grad: 59001.4258 LR: 2.4248e-05
Epoch: [1][150/276]Elapsed 22.43s | Loss: 0.3505 Grad: 33746.2891 LR: 4.5334e-05
Epoch: [1][200/276]Elapsed 29.89s | Loss: 0.3300 Grad: 27624.1074 LR: 6.7909e-05
Epoch: [1][250/276]Elapsed 37.34s | Loss: 0.3137 Grad: 23597.0312 LR: 8.6930e-05
Epoch: [1][275/276]Elapsed 41.13s | Loss: 0.3057 Grad: 34472.6133 LR: 9.3946e-05


Valid:   0%|          | 0/225 [00:00<?, ?batch/s]

Epoch: [1][0/225]Elapsed 0.11s | Loss: 0.3532
Epoch: [1][50/225]Elapsed 4.89s | Loss: 0.4348
Epoch: [1][100/225]Elapsed 9.68s | Loss: 0.4407
Epoch: [1][150/225]Elapsed 14.46s | Loss: 0.4423
Epoch: [1][200/225]Elapsed 19.25s | Loss: 0.3955


----------------------------------------------------------------------------------------------------
Epoch 1 - Average Train Loss: 0.3057 | Average Valid Loss: 0.3773 | Time: 62.84s
Best model found in epoch 1 | valid loss: 0.3773


Train:   0%|          | 0/276 [00:00<?, ?batch/s]

Epoch: [2][0/276]Elapsed 0.10s | Loss: 0.2783 Grad: inf LR: 9.3946e-05
Epoch: [2][50/276]Elapsed 7.55s | Loss: 0.2316 Grad: 54934.4297 LR: 9.9978e-05
Epoch: [2][100/276]Elapsed 15.07s | Loss: 0.2233 Grad: 59994.3047 LR: 9.9939e-05
Epoch: [2][150/276]Elapsed 22.57s | Loss: 0.2213 Grad: 47690.7852 LR: 9.9740e-05
Epoch: [2][200/276]Elapsed 30.04s | Loss: 0.2205 Grad: 41387.6797 LR: 9.9403e-05
Epoch: [2][250/276]Elapsed 37.50s | Loss: 0.2180 Grad: 55251.9141 LR: 9.8929e-05
Epoch: [2][275/276]Elapsed 41.29s | Loss: 0.2158 Grad: 66260.8281 LR: 9.8628e-05


Valid:   0%|          | 0/225 [00:00<?, ?batch/s]

Epoch: [2][0/225]Elapsed 0.10s | Loss: 0.3515
Epoch: [2][50/225]Elapsed 4.89s | Loss: 0.4257
Epoch: [2][100/225]Elapsed 9.67s | Loss: 0.4311
Epoch: [2][150/225]Elapsed 14.45s | Loss: 0.4333
Epoch: [2][200/225]Elapsed 19.23s | Loss: 0.3862


----------------------------------------------------------------------------------------------------
Epoch 2 - Average Train Loss: 0.2158 | Average Valid Loss: 0.3677 | Time: 62.99s
Best model found in epoch 2 | valid loss: 0.3677


Train:   0%|          | 0/276 [00:00<?, ?batch/s]

Epoch: [3][0/276]Elapsed 0.10s | Loss: 0.2499 Grad: 132886.4375 LR: 9.8628e-05
Epoch: [3][50/276]Elapsed 7.55s | Loss: 0.2067 Grad: 51808.4844 LR: 9.7948e-05
Epoch: [3][100/276]Elapsed 15.07s | Loss: 0.1994 Grad: 51422.5977 LR: 9.7135e-05
Epoch: [3][150/276]Elapsed 22.59s | Loss: 0.1962 Grad: 58891.8672 LR: 9.6191e-05
Epoch: [3][200/276]Elapsed 30.08s | Loss: 0.1962 Grad: 40505.0117 LR: 9.5119e-05
Epoch: [3][250/276]Elapsed 37.55s | Loss: 0.1953 Grad: 41967.5000 LR: 9.3922e-05
Epoch: [3][275/276]Elapsed 41.34s | Loss: 0.1937 Grad: 57312.7266 LR: 9.3251e-05


Valid:   0%|          | 0/225 [00:00<?, ?batch/s]

Epoch: [3][0/225]Elapsed 0.10s | Loss: 0.3594
Epoch: [3][50/225]Elapsed 4.90s | Loss: 0.4110
Epoch: [3][100/225]Elapsed 9.68s | Loss: 0.4163
Epoch: [3][150/225]Elapsed 14.45s | Loss: 0.4176
Epoch: [3][200/225]Elapsed 19.24s | Loss: 0.3729


----------------------------------------------------------------------------------------------------
Epoch 3 - Average Train Loss: 0.1937 | Average Valid Loss: 0.3553 | Time: 63.05s
Best model found in epoch 3 | valid loss: 0.3553


Train:   0%|          | 0/276 [00:00<?, ?batch/s]

Epoch: [4][0/276]Elapsed 0.10s | Loss: 0.2216 Grad: 106404.8672 LR: 9.3251e-05
Epoch: [4][50/276]Elapsed 7.53s | Loss: 0.1865 Grad: 54526.3789 LR: 9.1870e-05
Epoch: [4][100/276]Elapsed 15.03s | Loss: 0.1812 Grad: 48116.0742 LR: 9.0373e-05
Epoch: [4][150/276]Elapsed 22.55s | Loss: 0.1797 Grad: 51689.0938 LR: 8.8763e-05
Epoch: [4][200/276]Elapsed 30.06s | Loss: 0.1798 Grad: 53267.5508 LR: 8.7047e-05
Epoch: [4][250/276]Elapsed 37.54s | Loss: 0.1793 Grad: 55354.0547 LR: 8.5227e-05
Epoch: [4][275/276]Elapsed 41.35s | Loss: 0.1781 Grad: 49070.4727 LR: 8.4242e-05


Valid:   0%|          | 0/225 [00:00<?, ?batch/s]

Epoch: [4][0/225]Elapsed 0.11s | Loss: 0.3523
Epoch: [4][50/225]Elapsed 4.89s | Loss: 0.4043
Epoch: [4][100/225]Elapsed 9.67s | Loss: 0.4084
Epoch: [4][150/225]Elapsed 14.46s | Loss: 0.4109
Epoch: [4][200/225]Elapsed 19.26s | Loss: 0.3678


----------------------------------------------------------------------------------------------------
Epoch 4 - Average Train Loss: 0.1781 | Average Valid Loss: 0.3509 | Time: 63.09s
Best model found in epoch 4 | valid loss: 0.3509


Train:   0%|          | 0/276 [00:00<?, ?batch/s]

Epoch: [5][0/276]Elapsed 0.10s | Loss: 0.2086 Grad: 109730.6797 LR: 8.4242e-05
Epoch: [5][50/276]Elapsed 7.56s | Loss: 0.1772 Grad: 45801.2461 LR: 8.2275e-05
Epoch: [5][100/276]Elapsed 15.09s | Loss: 0.1714 Grad: 47489.1523 LR: 8.0220e-05
Epoch: [5][150/276]Elapsed 22.60s | Loss: 0.1684 Grad: 47065.4688 LR: 7.8080e-05
Epoch: [5][200/276]Elapsed 30.11s | Loss: 0.1683 Grad: 48507.8203 LR: 7.5863e-05
Epoch: [5][250/276]Elapsed 37.60s | Loss: 0.1676 Grad: 44410.2617 LR: 7.3573e-05
Epoch: [5][275/276]Elapsed 41.40s | Loss: 0.1667 Grad: 51129.4297 LR: 7.2357e-05


Valid:   0%|          | 0/225 [00:00<?, ?batch/s]

Epoch: [5][0/225]Elapsed 0.11s | Loss: 0.3604
Epoch: [5][50/225]Elapsed 4.89s | Loss: 0.3988
Epoch: [5][100/225]Elapsed 9.67s | Loss: 0.4026
Epoch: [5][150/225]Elapsed 14.45s | Loss: 0.4051
Epoch: [5][200/225]Elapsed 19.24s | Loss: 0.3640


----------------------------------------------------------------------------------------------------
Epoch 5 - Average Train Loss: 0.1667 | Average Valid Loss: 0.3476 | Time: 63.10s
Best model found in epoch 5 | valid loss: 0.3476


Train:   0%|          | 0/276 [00:00<?, ?batch/s]

Epoch: [6][0/276]Elapsed 0.12s | Loss: 0.2185 Grad: inf LR: 7.2357e-05
Epoch: [6][50/276]Elapsed 7.57s | Loss: 0.1635 Grad: 42386.9375 LR: 6.9971e-05
Epoch: [6][100/276]Elapsed 15.07s | Loss: 0.1574 Grad: 57240.2266 LR: 6.7529e-05
Epoch: [6][150/276]Elapsed 22.58s | Loss: 0.1563 Grad: 37466.2891 LR: 6.5039e-05
Epoch: [6][200/276]Elapsed 30.06s | Loss: 0.1563 Grad: 43469.8164 LR: 6.2507e-05
Epoch: [6][250/276]Elapsed 37.56s | Loss: 0.1563 Grad: 54923.1406 LR: 5.9941e-05
Epoch: [6][275/276]Elapsed 41.36s | Loss: 0.1555 Grad: 39737.1992 LR: 5.8595e-05


Valid:   0%|          | 0/225 [00:00<?, ?batch/s]

Epoch: [6][0/225]Elapsed 0.10s | Loss: 0.3589
Epoch: [6][50/225]Elapsed 4.88s | Loss: 0.4011
Epoch: [6][100/225]Elapsed 9.66s | Loss: 0.4051
Epoch: [6][150/225]Elapsed 14.45s | Loss: 0.4082
Epoch: [6][200/225]Elapsed 19.23s | Loss: 0.3670


----------------------------------------------------------------------------------------------------
Epoch 6 - Average Train Loss: 0.1555 | Average Valid Loss: 0.3507 | Time: 63.05s
Fold 3 Valid Loss: (Easy) 0.7635 | (Hard) 0.3915
Elapse: 6.31 min 
Fold: 4 || Valid size 3595 
- First Stage 


Train:   0%|          | 0/622 [00:00<?, ?batch/s]

Epoch: [1][0/622]Elapsed 0.11s | Loss: 0.8064 Grad: 64061.0078 LR: 4.0000e-06
Epoch: [1][50/622]Elapsed 7.50s | Loss: 0.8167 Grad: 64793.3594 LR: 5.0647e-06
Epoch: [1][100/622]Elapsed 15.00s | Loss: 0.8110 Grad: 68399.2422 LR: 8.2116e-06
Epoch: [1][150/622]Elapsed 22.51s | Loss: 0.8051 Grad: 68088.0859 LR: 1.3301e-05
Epoch: [1][200/622]Elapsed 29.99s | Loss: 0.7993 Grad: 83308.2188 LR: 2.0107e-05
Epoch: [1][250/622]Elapsed 37.45s | Loss: 0.7879 Grad: 127577.2500 LR: 2.8328e-05
Epoch: [1][300/622]Elapsed 44.91s | Loss: 0.7774 Grad: 60354.3398 LR: 3.7599e-05
Epoch: [1][350/622]Elapsed 52.34s | Loss: 0.7636 Grad: 52143.8320 LR: 4.7509e-05
Epoch: [1][400/622]Elapsed 59.78s | Loss: 0.7477 Grad: 68086.3750 LR: 5.7619e-05
Epoch: [1][450/622]Elapsed 67.23s | Loss: 0.7309 Grad: 58977.1641 LR: 6.7479e-05
Epoch: [1][500/622]Elapsed 74.66s | Loss: 0.7159 Grad: 71876.6562 LR: 7.6652e-05
Epoch: [1][550/622]Elapsed 82.11s | Loss: 0.7022 Grad: 50506.0781 LR: 8.4732e-05
Epoch: [1][600/622]Elapsed 89.55

Valid:   0%|          | 0/225 [00:00<?, ?batch/s]

Epoch: [1][0/225]Elapsed 0.11s | Loss: 0.3707
Epoch: [1][50/225]Elapsed 4.92s | Loss: 0.4399
Epoch: [1][100/225]Elapsed 9.73s | Loss: 0.4524
Epoch: [1][150/225]Elapsed 14.54s | Loss: 0.4551
Epoch: [1][200/225]Elapsed 19.36s | Loss: 0.4258


----------------------------------------------------------------------------------------------------
Epoch 1 - Average Train Loss: 0.6822 | Average Valid Loss: 0.4177 | Time: 114.59s
Best model found in epoch 1 | valid loss: 0.4177


Train:   0%|          | 0/622 [00:00<?, ?batch/s]

Epoch: [2][0/622]Elapsed 0.11s | Loss: 0.5644 Grad: 126046.0938 LR: 9.3737e-05
Epoch: [2][50/622]Elapsed 7.54s | Loss: 0.4729 Grad: 25402.2656 LR: 9.7777e-05
Epoch: [2][100/622]Elapsed 15.04s | Loss: 0.4670 Grad: 41572.3711 LR: 9.9786e-05
Epoch: [2][150/622]Elapsed 22.51s | Loss: 0.4720 Grad: 57585.6211 LR: 9.9996e-05
Epoch: [2][200/622]Elapsed 30.00s | Loss: 0.4610 Grad: 55915.7422 LR: 9.9967e-05
Epoch: [2][250/622]Elapsed 37.45s | Loss: 0.4547 Grad: 32823.9258 LR: 9.9911e-05
Epoch: [2][300/622]Elapsed 44.90s | Loss: 0.4509 Grad: 37385.5078 LR: 9.9828e-05
Epoch: [2][350/622]Elapsed 52.34s | Loss: 0.4454 Grad: 38437.6133 LR: 9.9717e-05
Epoch: [2][400/622]Elapsed 59.77s | Loss: 0.4389 Grad: 66606.9531 LR: 9.9579e-05
Epoch: [2][450/622]Elapsed 67.19s | Loss: 0.4333 Grad: 26378.1367 LR: 9.9415e-05
Epoch: [2][500/622]Elapsed 74.62s | Loss: 0.4289 Grad: 46952.6680 LR: 9.9223e-05
Epoch: [2][550/622]Elapsed 82.04s | Loss: 0.4264 Grad: 26605.4102 LR: 9.9004e-05
Epoch: [2][600/622]Elapsed 89.48

Valid:   0%|          | 0/225 [00:00<?, ?batch/s]

Epoch: [2][0/225]Elapsed 0.11s | Loss: 0.3615
Epoch: [2][50/225]Elapsed 4.92s | Loss: 0.3967
Epoch: [2][100/225]Elapsed 9.73s | Loss: 0.4063
Epoch: [2][150/225]Elapsed 14.53s | Loss: 0.4084
Epoch: [2][200/225]Elapsed 19.35s | Loss: 0.3906


----------------------------------------------------------------------------------------------------
Epoch 2 - Average Train Loss: 0.4215 | Average Valid Loss: 0.3893 | Time: 114.49s
Best model found in epoch 2 | valid loss: 0.3893


Train:   0%|          | 0/622 [00:00<?, ?batch/s]

Epoch: [3][0/622]Elapsed 0.10s | Loss: 0.4914 Grad: 216018.9062 LR: 9.8642e-05
Epoch: [3][50/622]Elapsed 7.53s | Loss: 0.3514 Grad: 92759.4375 LR: 9.8358e-05
Epoch: [3][100/622]Elapsed 15.03s | Loss: 0.3505 Grad: 77074.5781 LR: 9.8048e-05
Epoch: [3][150/622]Elapsed 22.51s | Loss: 0.3612 Grad: 70239.0078 LR: 9.7711e-05
Epoch: [3][200/622]Elapsed 29.97s | Loss: 0.3559 Grad: 32633.0410 LR: 9.7349e-05
Epoch: [3][250/622]Elapsed 37.43s | Loss: 0.3519 Grad: 30817.7168 LR: 9.6960e-05
Epoch: [3][300/622]Elapsed 44.88s | Loss: 0.3511 Grad: 38716.0117 LR: 9.6546e-05
Epoch: [3][350/622]Elapsed 52.34s | Loss: 0.3508 Grad: 32475.0938 LR: 9.6106e-05
Epoch: [3][400/622]Elapsed 59.80s | Loss: 0.3456 Grad: 40082.5117 LR: 9.5642e-05
Epoch: [3][450/622]Elapsed 67.24s | Loss: 0.3428 Grad: 47680.5625 LR: 9.5152e-05
Epoch: [3][500/622]Elapsed 74.68s | Loss: 0.3412 Grad: 48697.9336 LR: 9.4638e-05
Epoch: [3][550/622]Elapsed 82.11s | Loss: 0.3408 Grad: 30796.3965 LR: 9.4099e-05
Epoch: [3][600/622]Elapsed 89.54

Valid:   0%|          | 0/225 [00:00<?, ?batch/s]

Epoch: [3][0/225]Elapsed 0.10s | Loss: 0.3865
Epoch: [3][50/225]Elapsed 4.91s | Loss: 0.3726
Epoch: [3][100/225]Elapsed 9.71s | Loss: 0.3811
Epoch: [3][150/225]Elapsed 14.51s | Loss: 0.3816
Epoch: [3][200/225]Elapsed 19.33s | Loss: 0.3709


----------------------------------------------------------------------------------------------------
Epoch 3 - Average Train Loss: 0.3391 | Average Valid Loss: 0.3713 | Time: 114.54s
Best model found in epoch 3 | valid loss: 0.3713


Train:   0%|          | 0/622 [00:00<?, ?batch/s]

Epoch: [4][0/622]Elapsed 0.10s | Loss: 0.3933 Grad: inf LR: 9.3281e-05
Epoch: [4][50/622]Elapsed 7.54s | Loss: 0.2988 Grad: 78865.7266 LR: 9.2683e-05
Epoch: [4][100/622]Elapsed 15.03s | Loss: 0.2971 Grad: 38417.6875 LR: 9.2063e-05
Epoch: [4][150/622]Elapsed 22.52s | Loss: 0.3112 Grad: 69769.7031 LR: 9.1420e-05
Epoch: [4][200/622]Elapsed 30.00s | Loss: 0.3074 Grad: 43266.7891 LR: 9.0754e-05
Epoch: [4][250/622]Elapsed 37.47s | Loss: 0.3051 Grad: 30268.7402 LR: 9.0065e-05
Epoch: [4][300/622]Elapsed 44.93s | Loss: 0.3035 Grad: 31173.2520 LR: 8.9355e-05
Epoch: [4][350/622]Elapsed 52.37s | Loss: 0.3028 Grad: 40134.3594 LR: 8.8624e-05
Epoch: [4][400/622]Elapsed 59.82s | Loss: 0.2994 Grad: 42471.0859 LR: 8.7871e-05
Epoch: [4][450/622]Elapsed 67.29s | Loss: 0.2965 Grad: 38743.5781 LR: 8.7097e-05
Epoch: [4][500/622]Elapsed 74.73s | Loss: 0.2949 Grad: 44534.4453 LR: 8.6303e-05
Epoch: [4][550/622]Elapsed 82.17s | Loss: 0.2947 Grad: 37375.7891 LR: 8.5490e-05
Epoch: [4][600/622]Elapsed 89.59s | Loss

Valid:   0%|          | 0/225 [00:00<?, ?batch/s]

Epoch: [4][0/225]Elapsed 0.10s | Loss: 0.3761
Epoch: [4][50/225]Elapsed 4.92s | Loss: 0.3772
Epoch: [4][100/225]Elapsed 9.73s | Loss: 0.3843
Epoch: [4][150/225]Elapsed 14.53s | Loss: 0.3863
Epoch: [4][200/225]Elapsed 19.35s | Loss: 0.3813


----------------------------------------------------------------------------------------------------
Epoch 4 - Average Train Loss: 0.2928 | Average Valid Loss: 0.3845 | Time: 114.62s


Train:   0%|          | 0/622 [00:00<?, ?batch/s]

Epoch: [5][0/622]Elapsed 0.11s | Loss: 0.4358 Grad: nan LR: 8.4284e-05
Epoch: [5][50/622]Elapsed 7.54s | Loss: 0.2699 Grad: 27891.8535 LR: 8.3424e-05
Epoch: [5][100/622]Elapsed 15.04s | Loss: 0.2664 Grad: 36372.7109 LR: 8.2546e-05
Epoch: [5][150/622]Elapsed 22.54s | Loss: 0.2738 Grad: 44308.2305 LR: 8.1650e-05
Epoch: [5][200/622]Elapsed 30.03s | Loss: 0.2711 Grad: 34494.0664 LR: 8.0736e-05
Epoch: [5][250/622]Elapsed 37.49s | Loss: 0.2683 Grad: 31820.2012 LR: 7.9806e-05
Epoch: [5][300/622]Elapsed 44.95s | Loss: 0.2657 Grad: 29989.5527 LR: 7.8859e-05
Epoch: [5][350/622]Elapsed 52.46s | Loss: 0.2643 Grad: 31635.4199 LR: 7.7897e-05
Epoch: [5][400/622]Elapsed 59.93s | Loss: 0.2606 Grad: 37056.6250 LR: 7.6920e-05
Epoch: [5][450/622]Elapsed 67.35s | Loss: 0.2583 Grad: 35950.8125 LR: 7.5927e-05
Epoch: [5][500/622]Elapsed 74.77s | Loss: 0.2571 Grad: 41355.3945 LR: 7.4921e-05
Epoch: [5][550/622]Elapsed 82.20s | Loss: 0.2575 Grad: 50405.0977 LR: 7.3901e-05
Epoch: [5][600/622]Elapsed 89.63s | Loss

Valid:   0%|          | 0/225 [00:00<?, ?batch/s]

Epoch: [5][0/225]Elapsed 0.11s | Loss: 0.3592
Epoch: [5][50/225]Elapsed 4.94s | Loss: 0.3537
Epoch: [5][100/225]Elapsed 9.75s | Loss: 0.3622
Epoch: [5][150/225]Elapsed 14.59s | Loss: 0.3658
Epoch: [5][200/225]Elapsed 19.48s | Loss: 0.3638


----------------------------------------------------------------------------------------------------
Epoch 5 - Average Train Loss: 0.2569 | Average Valid Loss: 0.3682 | Time: 114.81s
Best model found in epoch 5 | valid loss: 0.3682


Train:   0%|          | 0/622 [00:00<?, ?batch/s]

Epoch: [6][0/622]Elapsed 0.10s | Loss: 0.5477 Grad: nan LR: 7.2409e-05
Epoch: [6][50/622]Elapsed 7.54s | Loss: 0.2488 Grad: 70518.7969 LR: 7.1358e-05
Epoch: [6][100/622]Elapsed 15.03s | Loss: 0.2432 Grad: 63162.6719 LR: 7.0296e-05
Epoch: [6][150/622]Elapsed 22.50s | Loss: 0.2468 Grad: 49200.3281 LR: 6.9222e-05
Epoch: [6][200/622]Elapsed 29.95s | Loss: 0.2422 Grad: 32411.5703 LR: 6.8138e-05
Epoch: [6][250/622]Elapsed 37.39s | Loss: 0.2409 Grad: 33370.3203 LR: 6.7044e-05
Epoch: [6][300/622]Elapsed 44.83s | Loss: 0.2405 Grad: 32139.3984 LR: 6.5940e-05
Epoch: [6][350/622]Elapsed 52.26s | Loss: 0.2384 Grad: 24262.8008 LR: 6.4828e-05
Epoch: [6][400/622]Elapsed 59.69s | Loss: 0.2355 Grad: 63933.6250 LR: 6.3708e-05
Epoch: [6][450/622]Elapsed 67.11s | Loss: 0.2330 Grad: 50096.5234 LR: 6.2581e-05
Epoch: [6][500/622]Elapsed 74.54s | Loss: 0.2319 Grad: 45208.4766 LR: 6.1446e-05
Epoch: [6][550/622]Elapsed 81.97s | Loss: 0.2319 Grad: 27614.0137 LR: 6.0305e-05
Epoch: [6][600/622]Elapsed 89.39s | Loss

Valid:   0%|          | 0/225 [00:00<?, ?batch/s]

Epoch: [6][0/225]Elapsed 0.10s | Loss: 0.3604
Epoch: [6][50/225]Elapsed 4.92s | Loss: 0.3625
Epoch: [6][100/225]Elapsed 9.73s | Loss: 0.3747
Epoch: [6][150/225]Elapsed 14.55s | Loss: 0.3784
Epoch: [6][200/225]Elapsed 19.38s | Loss: 0.3842


----------------------------------------------------------------------------------------------------
Epoch 6 - Average Train Loss: 0.2306 | Average Valid Loss: 0.3912 | Time: 114.45s
Fold 4 Valid Loss: (Easy) 0.6664 | (Hard) 0.6624
Elapse: 11.47 min 
- Second Stage 
Use Checkpoint: ENet_b2_xymasking_regularized_fold_4_stage_1.pth


Train:   0%|          | 0/276 [00:00<?, ?batch/s]

Epoch: [1][0/276]Elapsed 0.11s | Loss: 0.5297 Grad: nan LR: 4.0000e-06




Epoch: [1][50/276]Elapsed 7.49s | Loss: 0.4098 Grad: 59904.8633 LR: 9.3614e-06
Epoch: [1][100/276]Elapsed 15.00s | Loss: 0.3807 Grad: 41834.5195 LR: 2.4248e-05
Epoch: [1][150/276]Elapsed 22.51s | Loss: 0.3575 Grad: 35087.8359 LR: 4.5334e-05
Epoch: [1][200/276]Elapsed 30.01s | Loss: 0.3362 Grad: 18159.8203 LR: 6.7909e-05
Epoch: [1][250/276]Elapsed 37.49s | Loss: 0.3191 Grad: 23251.2285 LR: 8.6930e-05
Epoch: [1][275/276]Elapsed 41.30s | Loss: 0.3114 Grad: 35215.0078 LR: 9.3946e-05


Valid:   0%|          | 0/225 [00:00<?, ?batch/s]

Epoch: [1][0/225]Elapsed 0.10s | Loss: 0.3577
Epoch: [1][50/225]Elapsed 4.93s | Loss: 0.4154
Epoch: [1][100/225]Elapsed 9.75s | Loss: 0.4148
Epoch: [1][150/225]Elapsed 14.57s | Loss: 0.4202
Epoch: [1][200/225]Elapsed 19.39s | Loss: 0.3776


----------------------------------------------------------------------------------------------------
Epoch 1 - Average Train Loss: 0.3114 | Average Valid Loss: 0.3638 | Time: 63.19s
Best model found in epoch 1 | valid loss: 0.3638


Train:   0%|          | 0/276 [00:00<?, ?batch/s]

Epoch: [2][0/276]Elapsed 0.10s | Loss: 0.2482 Grad: 98170.6875 LR: 9.3946e-05
Epoch: [2][50/276]Elapsed 7.57s | Loss: 0.2314 Grad: 141291.9844 LR: 9.9978e-05
Epoch: [2][100/276]Elapsed 15.08s | Loss: 0.2285 Grad: 40251.2070 LR: 9.9939e-05
Epoch: [2][150/276]Elapsed 22.60s | Loss: 0.2265 Grad: 34137.3359 LR: 9.9740e-05
Epoch: [2][200/276]Elapsed 30.08s | Loss: 0.2240 Grad: 27190.9941 LR: 9.9403e-05
Epoch: [2][250/276]Elapsed 37.56s | Loss: 0.2204 Grad: 36539.2344 LR: 9.8929e-05
Epoch: [2][275/276]Elapsed 41.38s | Loss: 0.2192 Grad: 58187.1367 LR: 9.8628e-05


Valid:   0%|          | 0/225 [00:00<?, ?batch/s]

Epoch: [2][0/225]Elapsed 0.11s | Loss: 0.3483
Epoch: [2][50/225]Elapsed 4.93s | Loss: 0.4068
Epoch: [2][100/225]Elapsed 9.78s | Loss: 0.4064
Epoch: [2][150/225]Elapsed 14.58s | Loss: 0.4112
Epoch: [2][200/225]Elapsed 19.40s | Loss: 0.3684


----------------------------------------------------------------------------------------------------
Epoch 2 - Average Train Loss: 0.2192 | Average Valid Loss: 0.3538 | Time: 63.28s
Best model found in epoch 2 | valid loss: 0.3538


Train:   0%|          | 0/276 [00:00<?, ?batch/s]

Epoch: [3][0/276]Elapsed 0.10s | Loss: 0.2235 Grad: 87243.9062 LR: 9.8628e-05
Epoch: [3][50/276]Elapsed 7.55s | Loss: 0.2045 Grad: 103060.2812 LR: 9.7948e-05
Epoch: [3][100/276]Elapsed 15.08s | Loss: 0.2007 Grad: 81523.5703 LR: 9.7135e-05
Epoch: [3][150/276]Elapsed 22.60s | Loss: 0.1999 Grad: 86186.5547 LR: 9.6191e-05
Epoch: [3][200/276]Elapsed 30.07s | Loss: 0.1984 Grad: 64042.3828 LR: 9.5119e-05
Epoch: [3][250/276]Elapsed 37.57s | Loss: 0.1974 Grad: 74504.4219 LR: 9.3922e-05
Epoch: [3][275/276]Elapsed 41.38s | Loss: 0.1960 Grad: 92086.8047 LR: 9.3251e-05


Valid:   0%|          | 0/225 [00:00<?, ?batch/s]

Epoch: [3][0/225]Elapsed 0.11s | Loss: 0.3256
Epoch: [3][50/225]Elapsed 4.94s | Loss: 0.3903
Epoch: [3][100/225]Elapsed 9.77s | Loss: 0.3917
Epoch: [3][150/225]Elapsed 14.59s | Loss: 0.3962
Epoch: [3][200/225]Elapsed 19.43s | Loss: 0.3563


----------------------------------------------------------------------------------------------------
Epoch 3 - Average Train Loss: 0.1960 | Average Valid Loss: 0.3427 | Time: 63.30s
Best model found in epoch 3 | valid loss: 0.3427


Train:   0%|          | 0/276 [00:00<?, ?batch/s]

Epoch: [4][0/276]Elapsed 0.10s | Loss: 0.1861 Grad: 77468.9375 LR: 9.3251e-05
Epoch: [4][50/276]Elapsed 7.58s | Loss: 0.1913 Grad: 105753.5469 LR: 9.1870e-05
Epoch: [4][100/276]Elapsed 15.09s | Loss: 0.1878 Grad: 59374.4727 LR: 9.0373e-05
Epoch: [4][150/276]Elapsed 22.62s | Loss: 0.1855 Grad: 66068.0078 LR: 8.8763e-05
Epoch: [4][200/276]Elapsed 30.11s | Loss: 0.1841 Grad: 28814.9258 LR: 8.7047e-05
Epoch: [4][250/276]Elapsed 37.61s | Loss: 0.1816 Grad: 27604.5840 LR: 8.5227e-05
Epoch: [4][275/276]Elapsed 41.42s | Loss: 0.1809 Grad: 47479.3594 LR: 8.4242e-05


Valid:   0%|          | 0/225 [00:00<?, ?batch/s]

Epoch: [4][0/225]Elapsed 0.10s | Loss: 0.3530
Epoch: [4][50/225]Elapsed 4.92s | Loss: 0.3960
Epoch: [4][100/225]Elapsed 9.74s | Loss: 0.3943
Epoch: [4][150/225]Elapsed 14.56s | Loss: 0.3996
Epoch: [4][200/225]Elapsed 19.39s | Loss: 0.3595


----------------------------------------------------------------------------------------------------
Epoch 4 - Average Train Loss: 0.1809 | Average Valid Loss: 0.3457 | Time: 63.29s


Train:   0%|          | 0/276 [00:00<?, ?batch/s]

Epoch: [5][0/276]Elapsed 0.11s | Loss: 0.2130 Grad: 96472.9297 LR: 8.4242e-05
Epoch: [5][50/276]Elapsed 7.59s | Loss: 0.1773 Grad: 114481.2109 LR: 8.2275e-05
Epoch: [5][100/276]Elapsed 15.11s | Loss: 0.1735 Grad: 86380.8594 LR: 8.0220e-05
Epoch: [5][150/276]Elapsed 22.64s | Loss: 0.1719 Grad: 87120.5625 LR: 7.8080e-05
Epoch: [5][200/276]Elapsed 30.15s | Loss: 0.1703 Grad: 61511.5977 LR: 7.5863e-05
Epoch: [5][250/276]Elapsed 37.65s | Loss: 0.1690 Grad: 88478.9922 LR: 7.3573e-05
Epoch: [5][275/276]Elapsed 41.45s | Loss: 0.1678 Grad: 142737.7656 LR: 7.2357e-05


Valid:   0%|          | 0/225 [00:00<?, ?batch/s]

Epoch: [5][0/225]Elapsed 0.11s | Loss: 0.3527
Epoch: [5][50/225]Elapsed 4.91s | Loss: 0.4014
Epoch: [5][100/225]Elapsed 9.72s | Loss: 0.3993
Epoch: [5][150/225]Elapsed 14.52s | Loss: 0.4052
Epoch: [5][200/225]Elapsed 19.33s | Loss: 0.3638


----------------------------------------------------------------------------------------------------
Epoch 5 - Average Train Loss: 0.1678 | Average Valid Loss: 0.3492 | Time: 63.26s


Train:   0%|          | 0/276 [00:00<?, ?batch/s]

Epoch: [6][0/276]Elapsed 0.10s | Loss: 0.1456 Grad: 84348.5547 LR: 7.2357e-05
Epoch: [6][50/276]Elapsed 7.58s | Loss: 0.1621 Grad: 71455.9062 LR: 6.9971e-05
Epoch: [6][100/276]Elapsed 15.13s | Loss: 0.1588 Grad: 59666.8164 LR: 6.7529e-05
Epoch: [6][150/276]Elapsed 22.68s | Loss: 0.1596 Grad: 96081.8984 LR: 6.5039e-05
Epoch: [6][200/276]Elapsed 30.18s | Loss: 0.1588 Grad: 52496.0664 LR: 6.2507e-05
Epoch: [6][250/276]Elapsed 37.68s | Loss: 0.1571 Grad: 81130.6094 LR: 5.9941e-05
Epoch: [6][275/276]Elapsed 41.49s | Loss: 0.1565 Grad: 164283.5781 LR: 5.8595e-05


Valid:   0%|          | 0/225 [00:00<?, ?batch/s]

Epoch: [6][0/225]Elapsed 0.10s | Loss: 0.3514
Epoch: [6][50/225]Elapsed 4.92s | Loss: 0.3884
Epoch: [6][100/225]Elapsed 9.74s | Loss: 0.3872
Epoch: [6][150/225]Elapsed 14.56s | Loss: 0.3921
Epoch: [6][200/225]Elapsed 19.38s | Loss: 0.3541


----------------------------------------------------------------------------------------------------
Epoch 6 - Average Train Loss: 0.1565 | Average Valid Loss: 0.3406 | Time: 63.36s
Best model found in epoch 6 | valid loss: 0.3406
Fold 4 Valid Loss: (Easy) 0.7402 | (Hard) 0.4020
Elapse: 6.33 min 
Training Complete!
CV Result (Stage=1): 0.7246476499242523
CV Result (Stage=2): 0.678525046121371
Elapse: 88.80 min 


In [None]:
dataset = CustomDataset(train_easy, TARGETS, ModelConfig, all_specs, all_eegs, mode='test')

X, y = dataset[0]
print(X.shape, y.shape)

model = CustomModel(ModelConfig, num_classes=6, pretrained=True)
y_pred = model(X.unsqueeze(0))

print(y_pred.shape)

In [None]:
oof_df1 = pd.read_csv("./outputs/ENet_b2_xymasking_cutmix/oof_1.csv")
oof_df2 = pd.read_csv("./outputs/ENet_b2_xymasking_cutmix/oof_2.csv")

cv_1 = evaluate_oof(oof_df1)
cv_2 = evaluate_oof(oof_df2)

print(cv_1)
print(cv_2)

In [None]:
from kl_divergence import score as kl_score


def calc_kl_div(p, q, criterion):
    
    p = torch.tensor(p.astype(np.float32)).unsqueeze(0)
    q = torch.tensor(q.astype(np.float32)).unsqueeze(0)
    return criterion(F.log_softmax(p, dim=1), q).item()

def calc_kaggle_score(solution, submission):
    solution = solution.to_frame().T
    solution[TARGETS] = solution[TARGETS].astype(np.float32)
    submission = submission.to_frame().T
    submission.columns = ['eeg_id'] + TARGETS
    submission[TARGETS] = submission[TARGETS].astype(np.float32)
    
    return kl_score(solution, submission, 'eeg_id')

In [None]:
def evaluate_oof(oof_csv_path):
    oof_df = pd.read_csv(oof_csv_path)
    softmax = nn.Softmax(dim=1)
    criterion = nn.KLDivLoss(reduction="batchmean")

    oof_df["kl_loss"] = oof_df.apply(lambda row: 
        calc_kl_div(row[TARGETS_PRED].values, row[TARGETS].values, criterion), axis=1
        )

    kl_loss_all = criterion(
        F.log_softmax(torch.tensor(oof_df[TARGETS_PRED].values.astype(np.float32)), dim=1),
        torch.tensor(oof_df[TARGETS].values.astype(np.float32)),
        )

    print(f"KL Loss All: {kl_loss_all}")
    print(f"KL Loss Mean: {oof_df['kl_loss'].mean()}")

    y_pred = oof_df[TARGETS].values.astype(np.float32)
    oof_df[TARGETS_PRED] = softmax(torch.tensor(y_pred)).numpy()

    solution = oof_df[['eeg_id'] + TARGETS].copy()
    submission = oof_df[['eeg_id'] + TARGETS_PRED].copy()
    submission.columns = ['eeg_id'] + TARGETS

    kaggle_score_all = kl_score(solution, submission, 'eeg_id')
    
    oof_df['kaggle_score'] = oof_df.apply(lambda row:
        calc_kaggle_score(row[['eeg_id'] + TARGETS], row[['eeg_id'] + TARGETS_PRED]), axis=1
        )

    print(f"Kaggle Score All: {kaggle_score_all}")
    print(f"Kaggle Score Mean: {oof_df['kaggle_score'].mean()}")

    return oof_df, kl_loss_all, kaggle_score_all


In [None]:
oof_1, kl_loss_all, kaggle_score_all = evaluate_oof(f"{JobConfig.OUTPUT_DIR}/oof_1.csv")
oof_2, kl_loss_all, kaggle_score_all = evaluate_oof(f"{JobConfig.OUTPUT_DIR}/oof_2.csv")

In [None]:
fig, axes = plt.subplots(4, 4, figsize=(10, 10), sharex=True, sharey=True)

# rows = oof_df.iloc[-len(axes.ravel()):, :]
rows = oof_1.sample(len(axes.ravel()))

for i, (idx, row) in enumerate(rows.iterrows()):

    ax = axes.ravel()[i]
    ax.plot(row[TARGETS].values, label='True')
    ax.plot(row[TARGETS_PRED].values, label='Pred')
    ax.set_title(f"{idx} | {row['target']} | KL: {row['kl_loss']:.4f}")
    ax.set_xticks(range(6))
    ax.set_xticklabels(BRAIN_ACTIVITY)
    ax.grid(True)
    ax.legend()

fig.tight_layout()
fig.savefig(f"{JobConfig.OUTPUT_DIR}/oof_examples_1.png")
plt.show()

In [None]:
fig, axes = plt.subplots(4, 4, figsize=(10, 10), sharex=True, sharey=True)

# rows = oof_2.iloc[5:5+len(axes.ravel()), :]
rows = oof_2.sample(len(axes.ravel()))

for i, (idx, row) in enumerate(rows.iterrows()):

    ax = axes.ravel()[i]
    y_true = row[TARGETS].values
    y_pred = row[TARGETS_PRED].values
    y_norm = (y_true - y_true.min()) / (y_true.max() - y_true.min())

    ax.plot(row[TARGETS].values, label='True')
    ax.plot(row[TARGETS_PRED].values, label='Pred')
    ax.plot(y_norm, "b:", label='True Norm')

    ax.set_title(f"{idx} | {row['target']} | KL: {row['kl_loss']:.4f}")
    ax.set_xticks(range(6))
    ax.set_xticklabels(BRAIN_ACTIVITY)
    ax.grid(True)
    ax.legend()

fig.tight_layout()
fig.savefig(f"{JobConfig.OUTPUT_DIR}/oof_examples_2.png")
plt.show()

In [None]:
row = oof_2.loc[6]

min_pred = row[TARGETS_PRED].min()
max_pred = row[TARGETS_PRED].max()
print(min_pred, max_pred)

print(row[TARGETS_PRED])

targets_norm = (row[TARGETS] - row[TARGETS].min()) / (row[TARGETS].max() - row[TARGETS].min())

targets_norm = targets_norm / targets_norm.sum()

print(targets_norm)