In [1]:
import pandas as pd 
import numpy as np 
from scipy.stats import entropy
import matplotlib.pyplot as plt

from engine_hms_trainer import *
from engine_hms_model import CustomModel, JobConfig, ModelConfig

import torch
from torch import nn
import torch.nn.functional as F

  _torch_pytree._register_pytree_node(


In [2]:
seed_everything(JobConfig.SEED)

ModelConfig.EPOCHS = 6
ModelConfig.USE_EEG_SPECTROGRAMS = False
ModelConfig.MODEL_BACKBONE = 'tf_efficientnet_b2'
ModelConfig.MODEL_NAME = "ENet_b2_xymask"
ModelConfig.AUGMENT = True
ModelConfig.USE_KAGGLE_SPECTROGRAMS = True
ModelConfig.USE_EEG_SPECTROGRAMS = True

ModelConfig.AUGMENTATIONS = ['xy_masking']

hms_predictor = HMSPredictor(JobConfig, ModelConfig)

****************************************************************************************************
Script Start: Sun Mar 10 00:46:23 2024
Initializing HMS Predictor...
Model Name: ENet_b2_xymask
Drop Rate: 0.15
Drop Path Rate: 0.25
Augment: True
Augmentations: ['xy_masking']
Enropy Split: 5.5
Device: cuda
Output Dir: ./outputs/
****************************************************************************************************


In [3]:
train_easy, train_hard, all_specs, all_eegs = hms_predictor.load_train_data()

print(train_easy.shape)
print(train_hard.shape)

# check if contain NaN
print(train_easy.isnull().sum().sum())
print(train_hard.isnull().sum().sum())

display(train_easy.head())
print(" ")
display(train_hard.head())

(12440, 14)
(5536, 14)
0
0


Unnamed: 0,eeg_id,spectrogram_id,min,max,patient_id,target,total_votes,entropy,seizure_vote,lpd_vote,gpd_vote,lrda_vote,grda_vote,other_vote
0,642382,14960202,1008.0,1032.0,5955,Other,2,7.802343,0.0,0.0,0.0,0.0,0.0,1.0
1,751790,618728447,908.0,908.0,38549,GPD,1,7.802343,0.0,0.0,1.0,0.0,0.0,0.0
2,778705,52296320,0.0,0.0,40955,Other,2,7.68682,0.0,0.0,0.0,0.0,0.0,1.0
3,1629671,2036345030,0.0,160.0,37481,Seizure,51,7.619243,1.0,0.0,0.0,0.0,0.0,0.0
4,2061593,320962633,1450.0,1450.0,23828,Other,1,7.802343,0.0,0.0,0.0,0.0,0.0,1.0


 


Unnamed: 0,eeg_id,spectrogram_id,min,max,patient_id,target,total_votes,entropy,seizure_vote,lpd_vote,gpd_vote,lrda_vote,grda_vote,other_vote
0,568657,789577333,0.0,16.0,20654,Other,48,3.341757,0.0,0.0,0.25,0.0,0.166667,0.583333
1,582999,1552638400,0.0,38.0,20230,LPD,154,3.550549,0.0,0.857143,0.0,0.071429,0.0,0.071429
2,1895581,128369999,1138.0,1138.0,47999,Other,13,3.565051,0.076923,0.0,0.0,0.0,0.076923,0.846154
3,2482631,978166025,1902.0,1944.0,20606,Other,105,1.431066,0.0,0.0,0.133333,0.066667,0.133333,0.666667
4,2521897,673742515,0.0,4.0,62117,Other,24,1.516203,0.0,0.0,0.083333,0.083333,0.333333,0.5


In [4]:
hms_predictor.train_folds(train_easy, train_hard, all_specs, all_eegs)

Fold: 0 || Valid size 3596 
- First Stage 


Train:   0%|          | 0/622 [00:00<?, ?batch/s]

Epoch: [1][0/622]Elapsed 1.08s | Loss: 0.8220 Grad: 67731.3828 LR: 4.0000e-06
Epoch: [1][50/622]Elapsed 8.17s | Loss: 0.8346 Grad: 85699.5469 LR: 5.0647e-06
Epoch: [1][100/622]Elapsed 15.29s | Loss: 0.8281 Grad: 70401.9922 LR: 8.2116e-06
Epoch: [1][150/622]Elapsed 22.41s | Loss: 0.8233 Grad: 75559.6250 LR: 1.3301e-05
Epoch: [1][200/622]Elapsed 29.53s | Loss: 0.8153 Grad: 72208.4922 LR: 2.0107e-05
Epoch: [1][250/622]Elapsed 36.67s | Loss: 0.8045 Grad: 70698.4688 LR: 2.8328e-05
Epoch: [1][300/622]Elapsed 43.82s | Loss: 0.7909 Grad: 78792.6719 LR: 3.7599e-05
Epoch: [1][350/622]Elapsed 51.00s | Loss: 0.7736 Grad: 118346.3359 LR: 4.7509e-05
Epoch: [1][400/622]Elapsed 58.20s | Loss: 0.7534 Grad: 59210.1992 LR: 5.7619e-05
Epoch: [1][450/622]Elapsed 65.43s | Loss: 0.7316 Grad: 76243.6875 LR: 6.7479e-05
Epoch: [1][500/622]Elapsed 72.67s | Loss: 0.7124 Grad: 71753.1094 LR: 7.6652e-05
Epoch: [1][550/622]Elapsed 79.88s | Loss: 0.6960 Grad: 73817.9453 LR: 8.4732e-05
Epoch: [1][600/622]Elapsed 87.10

Valid:   0%|          | 0/225 [00:00<?, ?batch/s]

Epoch: [1][0/225]Elapsed 0.10s | Loss: 0.3521
Epoch: [1][50/225]Elapsed 4.94s | Loss: 0.5367
Epoch: [1][100/225]Elapsed 9.77s | Loss: 0.5370
Epoch: [1][150/225]Elapsed 14.60s | Loss: 0.5320
Epoch: [1][200/225]Elapsed 19.46s | Loss: 0.4840


----------------------------------------------------------------------------------------------------
Epoch 1 - Average Train Loss: 0.6699 | Average Valid Loss: 0.4670 | Time: 112.17s
Best model found in epoch 1 | valid loss: 0.4670


Train:   0%|          | 0/622 [00:00<?, ?batch/s]

Epoch: [2][0/622]Elapsed 0.10s | Loss: 0.3647 Grad: 149451.5312 LR: 9.3737e-05
Epoch: [2][50/622]Elapsed 7.37s | Loss: 0.4326 Grad: 62924.1523 LR: 9.7777e-05
Epoch: [2][100/622]Elapsed 14.72s | Loss: 0.4220 Grad: 90355.3750 LR: 9.9786e-05
Epoch: [2][150/622]Elapsed 22.11s | Loss: 0.4204 Grad: 49365.2891 LR: 9.9996e-05
Epoch: [2][200/622]Elapsed 29.49s | Loss: 0.4184 Grad: 37034.8984 LR: 9.9967e-05
Epoch: [2][250/622]Elapsed 36.86s | Loss: 0.4110 Grad: 31872.4395 LR: 9.9911e-05
Epoch: [2][300/622]Elapsed 44.22s | Loss: 0.4057 Grad: 60893.3438 LR: 9.9828e-05
Epoch: [2][350/622]Elapsed 51.61s | Loss: 0.4026 Grad: 48400.3359 LR: 9.9717e-05
Epoch: [2][400/622]Elapsed 58.95s | Loss: 0.3955 Grad: 23087.1836 LR: 9.9579e-05
Epoch: [2][450/622]Elapsed 66.30s | Loss: 0.3881 Grad: 44029.6172 LR: 9.9415e-05
Epoch: [2][500/622]Elapsed 73.65s | Loss: 0.3848 Grad: 27803.1016 LR: 9.9223e-05
Epoch: [2][550/622]Elapsed 81.00s | Loss: 0.3841 Grad: 50971.1719 LR: 9.9004e-05
Epoch: [2][600/622]Elapsed 88.35

Valid:   0%|          | 0/225 [00:00<?, ?batch/s]

Epoch: [2][0/225]Elapsed 0.10s | Loss: 0.3426
Epoch: [2][50/225]Elapsed 4.94s | Loss: 0.4441
Epoch: [2][100/225]Elapsed 9.77s | Loss: 0.4462
Epoch: [2][150/225]Elapsed 14.59s | Loss: 0.4484
Epoch: [2][200/225]Elapsed 19.44s | Loss: 0.4165


----------------------------------------------------------------------------------------------------
Epoch 2 - Average Train Loss: 0.3786 | Average Valid Loss: 0.4081 | Time: 113.43s
Best model found in epoch 2 | valid loss: 0.4081


Train:   0%|          | 0/622 [00:00<?, ?batch/s]

Epoch: [3][0/622]Elapsed 0.10s | Loss: 0.2989 Grad: 131582.7969 LR: 9.8642e-05
Epoch: [3][50/622]Elapsed 7.46s | Loss: 0.3011 Grad: 49325.7539 LR: 9.8358e-05
Epoch: [3][100/622]Elapsed 14.90s | Loss: 0.2977 Grad: 122732.9766 LR: 9.8048e-05
Epoch: [3][150/622]Elapsed 22.33s | Loss: 0.3050 Grad: 43385.8242 LR: 9.7711e-05
Epoch: [3][200/622]Elapsed 29.76s | Loss: 0.3069 Grad: 29812.1562 LR: 9.7349e-05
Epoch: [3][250/622]Elapsed 37.22s | Loss: 0.3058 Grad: 25272.7148 LR: 9.6960e-05
Epoch: [3][300/622]Elapsed 44.64s | Loss: 0.3033 Grad: 29568.8418 LR: 9.6546e-05
Epoch: [3][350/622]Elapsed 52.06s | Loss: 0.3062 Grad: 48146.9766 LR: 9.6106e-05
Epoch: [3][400/622]Elapsed 59.48s | Loss: 0.3002 Grad: 22185.4883 LR: 9.5642e-05
Epoch: [3][450/622]Elapsed 66.91s | Loss: 0.2960 Grad: 30812.9062 LR: 9.5152e-05
Epoch: [3][500/622]Elapsed 74.34s | Loss: 0.2942 Grad: 29210.2734 LR: 9.4638e-05
Epoch: [3][550/622]Elapsed 81.75s | Loss: 0.2951 Grad: 42554.5703 LR: 9.4099e-05
Epoch: [3][600/622]Elapsed 89.1

Valid:   0%|          | 0/225 [00:00<?, ?batch/s]

Epoch: [3][0/225]Elapsed 0.10s | Loss: 0.3156
Epoch: [3][50/225]Elapsed 4.93s | Loss: 0.4314
Epoch: [3][100/225]Elapsed 9.75s | Loss: 0.4339
Epoch: [3][150/225]Elapsed 14.53s | Loss: 0.4356
Epoch: [3][200/225]Elapsed 19.32s | Loss: 0.4217


----------------------------------------------------------------------------------------------------
Epoch 3 - Average Train Loss: 0.2932 | Average Valid Loss: 0.4177 | Time: 114.13s


Train:   0%|          | 0/622 [00:00<?, ?batch/s]

Epoch: [4][0/622]Elapsed 0.10s | Loss: 0.1849 Grad: 130271.0078 LR: 9.3281e-05
Epoch: [4][50/622]Elapsed 7.51s | Loss: 0.2529 Grad: 26867.1777 LR: 9.2683e-05
Epoch: [4][100/622]Elapsed 14.97s | Loss: 0.2470 Grad: 44417.0742 LR: 9.2063e-05
Epoch: [4][150/622]Elapsed 22.42s | Loss: 0.2523 Grad: 58387.7734 LR: 9.1420e-05
Epoch: [4][200/622]Elapsed 29.87s | Loss: 0.2543 Grad: 34293.4531 LR: 9.0754e-05
Epoch: [4][250/622]Elapsed 37.30s | Loss: 0.2529 Grad: 18504.4902 LR: 9.0065e-05
Epoch: [4][300/622]Elapsed 44.72s | Loss: 0.2527 Grad: 30702.2910 LR: 8.9355e-05
Epoch: [4][350/622]Elapsed 52.14s | Loss: 0.2541 Grad: 48462.7578 LR: 8.8624e-05
Epoch: [4][400/622]Elapsed 59.55s | Loss: 0.2486 Grad: 24215.0137 LR: 8.7871e-05
Epoch: [4][450/622]Elapsed 66.98s | Loss: 0.2434 Grad: 47173.6484 LR: 8.7097e-05
Epoch: [4][500/622]Elapsed 74.42s | Loss: 0.2423 Grad: 41595.6172 LR: 8.6303e-05
Epoch: [4][550/622]Elapsed 81.85s | Loss: 0.2441 Grad: 39099.7305 LR: 8.5490e-05
Epoch: [4][600/622]Elapsed 89.25

Valid:   0%|          | 0/225 [00:00<?, ?batch/s]

Epoch: [4][0/225]Elapsed 0.10s | Loss: 0.3225
Epoch: [4][50/225]Elapsed 4.88s | Loss: 0.4401
Epoch: [4][100/225]Elapsed 9.69s | Loss: 0.4433
Epoch: [4][150/225]Elapsed 14.50s | Loss: 0.4436
Epoch: [4][200/225]Elapsed 19.32s | Loss: 0.4325


----------------------------------------------------------------------------------------------------
Epoch 4 - Average Train Loss: 0.2416 | Average Valid Loss: 0.4282 | Time: 114.22s


Train:   0%|          | 0/622 [00:00<?, ?batch/s]

Epoch: [5][0/622]Elapsed 0.10s | Loss: 0.2198 Grad: 162993.0312 LR: 8.4284e-05
Epoch: [5][50/622]Elapsed 7.54s | Loss: 0.2117 Grad: 29758.4648 LR: 8.3424e-05
Epoch: [5][100/622]Elapsed 15.01s | Loss: 0.2048 Grad: 38943.4805 LR: 8.2546e-05
Epoch: [5][150/622]Elapsed 22.52s | Loss: 0.2131 Grad: 39602.3359 LR: 8.1650e-05
Epoch: [5][200/622]Elapsed 29.98s | Loss: 0.2162 Grad: 28816.3398 LR: 8.0736e-05
Epoch: [5][250/622]Elapsed 37.42s | Loss: 0.2138 Grad: 25072.4902 LR: 7.9806e-05
Epoch: [5][300/622]Elapsed 44.85s | Loss: 0.2114 Grad: 27906.9082 LR: 7.8859e-05
Epoch: [5][350/622]Elapsed 52.29s | Loss: 0.2114 Grad: 36871.9375 LR: 7.7897e-05
Epoch: [5][400/622]Elapsed 59.70s | Loss: 0.2077 Grad: 57041.8750 LR: 7.6920e-05
Epoch: [5][450/622]Elapsed 67.12s | Loss: 0.2052 Grad: 57072.8672 LR: 7.5927e-05
Epoch: [5][500/622]Elapsed 74.51s | Loss: 0.2049 Grad: 25914.4160 LR: 7.4921e-05
Epoch: [5][550/622]Elapsed 81.91s | Loss: 0.2063 Grad: 51203.0312 LR: 7.3901e-05
Epoch: [5][600/622]Elapsed 89.35

Valid:   0%|          | 0/225 [00:00<?, ?batch/s]

Epoch: [5][0/225]Elapsed 0.11s | Loss: 0.2984
Epoch: [5][50/225]Elapsed 4.93s | Loss: 0.4490
Epoch: [5][100/225]Elapsed 9.74s | Loss: 0.4495
Epoch: [5][150/225]Elapsed 14.54s | Loss: 0.4525
Epoch: [5][200/225]Elapsed 19.36s | Loss: 0.4454


----------------------------------------------------------------------------------------------------
Epoch 5 - Average Train Loss: 0.2042 | Average Valid Loss: 0.4425 | Time: 114.40s


Train:   0%|          | 0/622 [00:00<?, ?batch/s]

Epoch: [6][0/622]Elapsed 0.11s | Loss: 0.1971 Grad: 229036.6875 LR: 7.2409e-05
Epoch: [6][50/622]Elapsed 7.48s | Loss: 0.1824 Grad: 27124.2637 LR: 7.1358e-05
Epoch: [6][100/622]Elapsed 14.95s | Loss: 0.1768 Grad: 35226.4492 LR: 7.0296e-05
Epoch: [6][150/622]Elapsed 22.39s | Loss: 0.1812 Grad: 41538.5352 LR: 6.9222e-05
Epoch: [6][200/622]Elapsed 29.83s | Loss: 0.1840 Grad: 54086.1914 LR: 6.8138e-05
Epoch: [6][250/622]Elapsed 37.27s | Loss: 0.1838 Grad: 35163.1992 LR: 6.7044e-05
Epoch: [6][300/622]Elapsed 44.70s | Loss: 0.1813 Grad: 40802.1211 LR: 6.5940e-05
Epoch: [6][350/622]Elapsed 52.13s | Loss: 0.1803 Grad: 55207.8281 LR: 6.4828e-05
Epoch: [6][400/622]Elapsed 59.57s | Loss: 0.1754 Grad: 28926.9434 LR: 6.3708e-05
Epoch: [6][450/622]Elapsed 67.01s | Loss: 0.1727 Grad: 31264.4414 LR: 6.2581e-05
Epoch: [6][500/622]Elapsed 74.44s | Loss: 0.1712 Grad: 17770.4707 LR: 6.1446e-05
Epoch: [6][550/622]Elapsed 81.87s | Loss: 0.1712 Grad: 43745.3125 LR: 6.0305e-05
Epoch: [6][600/622]Elapsed 89.28

Valid:   0%|          | 0/225 [00:00<?, ?batch/s]

Epoch: [6][0/225]Elapsed 0.10s | Loss: 0.3582
Epoch: [6][50/225]Elapsed 4.90s | Loss: 0.4707
Epoch: [6][100/225]Elapsed 9.69s | Loss: 0.4787
Epoch: [6][150/225]Elapsed 14.48s | Loss: 0.4782
Epoch: [6][200/225]Elapsed 19.29s | Loss: 0.4763


----------------------------------------------------------------------------------------------------
Epoch 6 - Average Train Loss: 0.1697 | Average Valid Loss: 0.4742 | Time: 114.23s
Fold 0 Valid Loss: 
Easy: 0.8951 | Hard: 0.6388
Elapse: 11.40 min 
- Second Stage 
Use Checkpoint: ENet_b2_xymask_fold_0_stage_1.pth


Train:   0%|          | 0/276 [00:00<?, ?batch/s]

Epoch: [1][0/276]Elapsed 0.10s | Loss: 0.6521 Grad: nan LR: 4.0000e-06




Epoch: [1][50/276]Elapsed 7.44s | Loss: 0.4548 Grad: 94100.8594 LR: 9.3614e-06
Epoch: [1][100/276]Elapsed 14.91s | Loss: 0.4342 Grad: 79477.4922 LR: 2.4248e-05
Epoch: [1][150/276]Elapsed 22.38s | Loss: 0.4065 Grad: 46323.3906 LR: 4.5334e-05
Epoch: [1][200/276]Elapsed 29.85s | Loss: 0.3745 Grad: 29432.5996 LR: 6.7909e-05
Epoch: [1][250/276]Elapsed 37.30s | Loss: 0.3482 Grad: 25979.0859 LR: 8.6930e-05
Epoch: [1][275/276]Elapsed 41.11s | Loss: 0.3374 Grad: 36008.8477 LR: 9.3946e-05


Valid:   0%|          | 0/225 [00:00<?, ?batch/s]

Epoch: [1][0/225]Elapsed 0.11s | Loss: 0.4283
Epoch: [1][50/225]Elapsed 4.92s | Loss: 0.4450
Epoch: [1][100/225]Elapsed 9.72s | Loss: 0.4483
Epoch: [1][150/225]Elapsed 14.54s | Loss: 0.4443
Epoch: [1][200/225]Elapsed 19.35s | Loss: 0.3965


----------------------------------------------------------------------------------------------------
Epoch 1 - Average Train Loss: 0.3374 | Average Valid Loss: 0.3787 | Time: 62.94s
Best model found in epoch 1 | valid loss: 0.3787


Train:   0%|          | 0/276 [00:00<?, ?batch/s]

Epoch: [2][0/276]Elapsed 0.10s | Loss: 0.3010 Grad: nan LR: 9.3946e-05
Epoch: [2][50/276]Elapsed 7.49s | Loss: 0.2283 Grad: 73792.9531 LR: 9.9978e-05
Epoch: [2][100/276]Elapsed 14.97s | Loss: 0.2174 Grad: 110963.1953 LR: 9.9939e-05
Epoch: [2][150/276]Elapsed 22.42s | Loss: 0.2149 Grad: 54119.9414 LR: 9.9740e-05
Epoch: [2][200/276]Elapsed 29.88s | Loss: 0.2120 Grad: 64724.0703 LR: 9.9403e-05
Epoch: [2][250/276]Elapsed 37.34s | Loss: 0.2079 Grad: 41688.7227 LR: 9.8929e-05
Epoch: [2][275/276]Elapsed 41.12s | Loss: 0.2067 Grad: 74235.5156 LR: 9.8628e-05


Valid:   0%|          | 0/225 [00:00<?, ?batch/s]

Epoch: [2][0/225]Elapsed 0.11s | Loss: 0.3390
Epoch: [2][50/225]Elapsed 4.93s | Loss: 0.4379
Epoch: [2][100/225]Elapsed 9.74s | Loss: 0.4382
Epoch: [2][150/225]Elapsed 14.55s | Loss: 0.4350
Epoch: [2][200/225]Elapsed 19.38s | Loss: 0.3841


----------------------------------------------------------------------------------------------------
Epoch 2 - Average Train Loss: 0.2067 | Average Valid Loss: 0.3651 | Time: 62.98s
Best model found in epoch 2 | valid loss: 0.3651


Train:   0%|          | 0/276 [00:00<?, ?batch/s]

Epoch: [3][0/276]Elapsed 0.10s | Loss: 0.2917 Grad: 189928.5938 LR: 9.8628e-05
Epoch: [3][50/276]Elapsed 7.51s | Loss: 0.1907 Grad: 45940.3477 LR: 9.7948e-05
Epoch: [3][100/276]Elapsed 15.00s | Loss: 0.1847 Grad: 69405.4219 LR: 9.7135e-05
Epoch: [3][150/276]Elapsed 22.44s | Loss: 0.1843 Grad: 48978.7695 LR: 9.6191e-05
Epoch: [3][200/276]Elapsed 29.87s | Loss: 0.1826 Grad: 32440.0586 LR: 9.5119e-05
Epoch: [3][250/276]Elapsed 37.29s | Loss: 0.1800 Grad: 35709.1992 LR: 9.3922e-05
Epoch: [3][275/276]Elapsed 41.06s | Loss: 0.1791 Grad: 77738.2109 LR: 9.3251e-05


Valid:   0%|          | 0/225 [00:00<?, ?batch/s]

Epoch: [3][0/225]Elapsed 0.10s | Loss: 0.3279
Epoch: [3][50/225]Elapsed 4.91s | Loss: 0.4229
Epoch: [3][100/225]Elapsed 9.72s | Loss: 0.4236
Epoch: [3][150/225]Elapsed 14.53s | Loss: 0.4205
Epoch: [3][200/225]Elapsed 19.36s | Loss: 0.3712


----------------------------------------------------------------------------------------------------
Epoch 3 - Average Train Loss: 0.1791 | Average Valid Loss: 0.3526 | Time: 62.90s
Best model found in epoch 3 | valid loss: 0.3526


Train:   0%|          | 0/276 [00:00<?, ?batch/s]

Epoch: [4][0/276]Elapsed 0.11s | Loss: 0.1955 Grad: 124167.5781 LR: 9.3251e-05
Epoch: [4][50/276]Elapsed 7.48s | Loss: 0.1677 Grad: 130851.2500 LR: 9.1870e-05
Epoch: [4][100/276]Elapsed 14.92s | Loss: 0.1654 Grad: 141889.1250 LR: 9.0373e-05
Epoch: [4][150/276]Elapsed 22.38s | Loss: 0.1646 Grad: 102266.6250 LR: 8.8763e-05
Epoch: [4][200/276]Elapsed 29.82s | Loss: 0.1639 Grad: 57032.6875 LR: 8.7047e-05
Epoch: [4][250/276]Elapsed 37.25s | Loss: 0.1617 Grad: 77901.7109 LR: 8.5227e-05
Epoch: [4][275/276]Elapsed 41.06s | Loss: 0.1612 Grad: 119717.8359 LR: 8.4242e-05


Valid:   0%|          | 0/225 [00:00<?, ?batch/s]

Epoch: [4][0/225]Elapsed 0.10s | Loss: 0.3233
Epoch: [4][50/225]Elapsed 4.92s | Loss: 0.4179
Epoch: [4][100/225]Elapsed 9.75s | Loss: 0.4181
Epoch: [4][150/225]Elapsed 14.58s | Loss: 0.4152
Epoch: [4][200/225]Elapsed 19.41s | Loss: 0.3667


----------------------------------------------------------------------------------------------------
Epoch 4 - Average Train Loss: 0.1612 | Average Valid Loss: 0.3483 | Time: 62.96s
Best model found in epoch 4 | valid loss: 0.3483


Train:   0%|          | 0/276 [00:00<?, ?batch/s]

Epoch: [5][0/276]Elapsed 0.10s | Loss: 0.1618 Grad: 83491.6250 LR: 8.4242e-05
Epoch: [5][50/276]Elapsed 7.49s | Loss: 0.1542 Grad: 91924.5938 LR: 8.2275e-05
Epoch: [5][100/276]Elapsed 14.95s | Loss: 0.1488 Grad: 103052.4688 LR: 8.0220e-05
Epoch: [5][150/276]Elapsed 22.44s | Loss: 0.1489 Grad: 74675.7109 LR: 7.8080e-05
Epoch: [5][200/276]Elapsed 29.85s | Loss: 0.1483 Grad: 52619.8203 LR: 7.5863e-05
Epoch: [5][250/276]Elapsed 37.27s | Loss: 0.1469 Grad: 66618.9922 LR: 7.3573e-05
Epoch: [5][275/276]Elapsed 41.04s | Loss: 0.1464 Grad: 117523.7812 LR: 7.2357e-05


Valid:   0%|          | 0/225 [00:00<?, ?batch/s]

Epoch: [5][0/225]Elapsed 0.11s | Loss: 0.3316
Epoch: [5][50/225]Elapsed 4.94s | Loss: 0.4296
Epoch: [5][100/225]Elapsed 9.75s | Loss: 0.4273
Epoch: [5][150/225]Elapsed 14.56s | Loss: 0.4233
Epoch: [5][200/225]Elapsed 19.38s | Loss: 0.3738


----------------------------------------------------------------------------------------------------
Epoch 5 - Average Train Loss: 0.1464 | Average Valid Loss: 0.3549 | Time: 62.91s


Train:   0%|          | 0/276 [00:00<?, ?batch/s]

Epoch: [6][0/276]Elapsed 0.10s | Loss: 0.1440 Grad: 76021.1562 LR: 7.2357e-05
Epoch: [6][50/276]Elapsed 7.49s | Loss: 0.1365 Grad: 77988.0078 LR: 6.9971e-05
Epoch: [6][100/276]Elapsed 14.97s | Loss: 0.1362 Grad: 129757.1172 LR: 6.7529e-05
Epoch: [6][150/276]Elapsed 22.44s | Loss: 0.1367 Grad: 106577.1016 LR: 6.5039e-05
Epoch: [6][200/276]Elapsed 29.89s | Loss: 0.1365 Grad: 72352.9766 LR: 6.2507e-05
Epoch: [6][250/276]Elapsed 37.34s | Loss: 0.1341 Grad: 110907.5156 LR: 5.9941e-05
Epoch: [6][275/276]Elapsed 41.12s | Loss: 0.1339 Grad: 55976.5547 LR: 5.8595e-05


Valid:   0%|          | 0/225 [00:00<?, ?batch/s]

Epoch: [6][0/225]Elapsed 0.10s | Loss: 0.2966
Epoch: [6][50/225]Elapsed 4.91s | Loss: 0.4231
Epoch: [6][100/225]Elapsed 9.72s | Loss: 0.4216
Epoch: [6][150/225]Elapsed 14.54s | Loss: 0.4169
Epoch: [6][200/225]Elapsed 19.37s | Loss: 0.3681


----------------------------------------------------------------------------------------------------
Epoch 6 - Average Train Loss: 0.1339 | Average Valid Loss: 0.3495 | Time: 62.98s
Fold 0 Valid Loss: 
Easy: 0.8287 | Hard: 0.3999
Elapse: 17.70 min 
Fold: 1 || Valid size 3595 
- First Stage 


Train:   0%|          | 0/622 [00:00<?, ?batch/s]

Epoch: [1][0/622]Elapsed 0.10s | Loss: 0.8383 Grad: 79178.3672 LR: 4.0000e-06
Epoch: [1][50/622]Elapsed 7.43s | Loss: 0.8258 Grad: 52341.9453 LR: 5.0647e-06
Epoch: [1][100/622]Elapsed 14.86s | Loss: 0.8205 Grad: 75250.2812 LR: 8.2116e-06
Epoch: [1][150/622]Elapsed 22.31s | Loss: 0.8140 Grad: 91786.8750 LR: 1.3301e-05
Epoch: [1][200/622]Elapsed 29.75s | Loss: 0.8066 Grad: 84235.8203 LR: 2.0107e-05
Epoch: [1][250/622]Elapsed 37.17s | Loss: 0.7937 Grad: 39432.7109 LR: 2.8328e-05
Epoch: [1][300/622]Elapsed 44.58s | Loss: 0.7810 Grad: 44135.5938 LR: 3.7599e-05
Epoch: [1][350/622]Elapsed 51.97s | Loss: 0.7629 Grad: 54122.6562 LR: 4.7509e-05
Epoch: [1][400/622]Elapsed 59.38s | Loss: 0.7434 Grad: 95066.4609 LR: 5.7619e-05
Epoch: [1][450/622]Elapsed 66.77s | Loss: 0.7228 Grad: 52241.8828 LR: 6.7479e-05
Epoch: [1][500/622]Elapsed 74.16s | Loss: 0.7049 Grad: 46159.8555 LR: 7.6652e-05
Epoch: [1][550/622]Elapsed 81.56s | Loss: 0.6864 Grad: 81157.5078 LR: 8.4732e-05
Epoch: [1][600/622]Elapsed 88.96s

Valid:   0%|          | 0/225 [00:00<?, ?batch/s]

Epoch: [1][0/225]Elapsed 0.10s | Loss: 0.4669
Epoch: [1][50/225]Elapsed 4.89s | Loss: 0.5039
Epoch: [1][100/225]Elapsed 9.68s | Loss: 0.5290
Epoch: [1][150/225]Elapsed 14.47s | Loss: 0.5342
Epoch: [1][200/225]Elapsed 19.29s | Loss: 0.4900


----------------------------------------------------------------------------------------------------
Epoch 1 - Average Train Loss: 0.6622 | Average Valid Loss: 0.4750 | Time: 113.90s
Best model found in epoch 1 | valid loss: 0.4750


Train:   0%|          | 0/622 [00:00<?, ?batch/s]

Epoch: [2][0/622]Elapsed 0.10s | Loss: 0.4490 Grad: 187277.1562 LR: 9.3737e-05
Epoch: [2][50/622]Elapsed 7.54s | Loss: 0.4302 Grad: 85278.2031 LR: 9.7777e-05
Epoch: [2][100/622]Elapsed 15.01s | Loss: 0.4135 Grad: 61995.9531 LR: 9.9786e-05
Epoch: [2][150/622]Elapsed 22.50s | Loss: 0.4132 Grad: 62129.7383 LR: 9.9996e-05
Epoch: [2][200/622]Elapsed 29.96s | Loss: 0.4079 Grad: 82348.5078 LR: 9.9967e-05
Epoch: [2][250/622]Elapsed 37.39s | Loss: 0.4015 Grad: 81442.2031 LR: 9.9911e-05
Epoch: [2][300/622]Elapsed 44.83s | Loss: 0.4008 Grad: 111012.2812 LR: 9.9828e-05
Epoch: [2][350/622]Elapsed 52.27s | Loss: 0.3963 Grad: 70084.2188 LR: 9.9717e-05
Epoch: [2][400/622]Elapsed 59.68s | Loss: 0.3922 Grad: 91746.7344 LR: 9.9579e-05
Epoch: [2][450/622]Elapsed 67.09s | Loss: 0.3873 Grad: 61504.5586 LR: 9.9415e-05
Epoch: [2][500/622]Elapsed 74.49s | Loss: 0.3827 Grad: 117298.9531 LR: 9.9223e-05
Epoch: [2][550/622]Elapsed 81.89s | Loss: 0.3796 Grad: 35407.4570 LR: 9.9004e-05
Epoch: [2][600/622]Elapsed 89.

Valid:   0%|          | 0/225 [00:00<?, ?batch/s]

Epoch: [2][0/225]Elapsed 0.11s | Loss: 0.4865
Epoch: [2][50/225]Elapsed 4.91s | Loss: 0.4300
Epoch: [2][100/225]Elapsed 9.71s | Loss: 0.4448
Epoch: [2][150/225]Elapsed 14.51s | Loss: 0.4502
Epoch: [2][200/225]Elapsed 19.32s | Loss: 0.4295


----------------------------------------------------------------------------------------------------
Epoch 2 - Average Train Loss: 0.3754 | Average Valid Loss: 0.4224 | Time: 114.27s
Best model found in epoch 2 | valid loss: 0.4224


Train:   0%|          | 0/622 [00:00<?, ?batch/s]

Epoch: [3][0/622]Elapsed 0.10s | Loss: 0.3772 Grad: inf LR: 9.8642e-05
Epoch: [3][50/622]Elapsed 7.45s | Loss: 0.3044 Grad: 33182.0586 LR: 9.8358e-05
Epoch: [3][100/622]Elapsed 14.88s | Loss: 0.2966 Grad: 21168.6035 LR: 9.8048e-05
Epoch: [3][150/622]Elapsed 22.33s | Loss: 0.3041 Grad: 45705.2656 LR: 9.7711e-05
Epoch: [3][200/622]Elapsed 29.78s | Loss: 0.3028 Grad: 54556.9180 LR: 9.7349e-05
Epoch: [3][250/622]Elapsed 37.22s | Loss: 0.3027 Grad: 49356.9922 LR: 9.6960e-05
Epoch: [3][300/622]Elapsed 44.66s | Loss: 0.3043 Grad: 51116.7031 LR: 9.6546e-05
Epoch: [3][350/622]Elapsed 52.07s | Loss: 0.3043 Grad: 20133.2734 LR: 9.6106e-05
Epoch: [3][400/622]Elapsed 59.50s | Loss: 0.3010 Grad: 58608.6016 LR: 9.5642e-05
Epoch: [3][450/622]Elapsed 66.89s | Loss: 0.2974 Grad: 51655.5508 LR: 9.5152e-05
Epoch: [3][500/622]Elapsed 74.29s | Loss: 0.2951 Grad: 71698.7656 LR: 9.4638e-05
Epoch: [3][550/622]Elapsed 81.69s | Loss: 0.2944 Grad: 22604.4746 LR: 9.4099e-05
Epoch: [3][600/622]Elapsed 89.07s | Loss

Valid:   0%|          | 0/225 [00:00<?, ?batch/s]

Epoch: [3][0/225]Elapsed 0.10s | Loss: 0.4712
Epoch: [3][50/225]Elapsed 4.90s | Loss: 0.4197
Epoch: [3][100/225]Elapsed 9.70s | Loss: 0.4292
Epoch: [3][150/225]Elapsed 14.50s | Loss: 0.4313
Epoch: [3][200/225]Elapsed 19.31s | Loss: 0.4171


----------------------------------------------------------------------------------------------------
Epoch 3 - Average Train Loss: 0.2926 | Average Valid Loss: 0.4127 | Time: 114.02s
Best model found in epoch 3 | valid loss: 0.4127


Train:   0%|          | 0/622 [00:00<?, ?batch/s]

Epoch: [4][0/622]Elapsed 0.10s | Loss: 0.3196 Grad: nan LR: 9.3281e-05
Epoch: [4][50/622]Elapsed 7.46s | Loss: 0.2490 Grad: 101750.0859 LR: 9.2683e-05
Epoch: [4][100/622]Elapsed 14.91s | Loss: 0.2493 Grad: 23026.6270 LR: 9.2063e-05
Epoch: [4][150/622]Elapsed 22.37s | Loss: 0.2584 Grad: 41077.5625 LR: 9.1420e-05
Epoch: [4][200/622]Elapsed 29.81s | Loss: 0.2563 Grad: 100050.3359 LR: 9.0754e-05
Epoch: [4][250/622]Elapsed 37.25s | Loss: 0.2545 Grad: 44140.0977 LR: 9.0065e-05
Epoch: [4][300/622]Elapsed 44.69s | Loss: 0.2555 Grad: 63526.9570 LR: 8.9355e-05
Epoch: [4][350/622]Elapsed 52.11s | Loss: 0.2544 Grad: 27388.1836 LR: 8.8624e-05
Epoch: [4][400/622]Elapsed 59.53s | Loss: 0.2514 Grad: 66402.0781 LR: 8.7871e-05
Epoch: [4][450/622]Elapsed 66.96s | Loss: 0.2490 Grad: 46465.7539 LR: 8.7097e-05
Epoch: [4][500/622]Elapsed 74.40s | Loss: 0.2475 Grad: 64375.0156 LR: 8.6303e-05
Epoch: [4][550/622]Elapsed 81.84s | Loss: 0.2462 Grad: 20649.5156 LR: 8.5490e-05
Epoch: [4][600/622]Elapsed 89.27s | Lo

Valid:   0%|          | 0/225 [00:00<?, ?batch/s]

Epoch: [4][0/225]Elapsed 0.11s | Loss: 0.5776
Epoch: [4][50/225]Elapsed 4.92s | Loss: 0.4338
Epoch: [4][100/225]Elapsed 9.73s | Loss: 0.4424
Epoch: [4][150/225]Elapsed 14.54s | Loss: 0.4448
Epoch: [4][200/225]Elapsed 19.36s | Loss: 0.4263


----------------------------------------------------------------------------------------------------
Epoch 4 - Average Train Loss: 0.2437 | Average Valid Loss: 0.4230 | Time: 114.30s


Train:   0%|          | 0/622 [00:00<?, ?batch/s]

Epoch: [5][0/622]Elapsed 0.10s | Loss: 0.3860 Grad: nan LR: 8.4284e-05
Epoch: [5][50/622]Elapsed 7.48s | Loss: 0.2166 Grad: 78436.1641 LR: 8.3424e-05
Epoch: [5][100/622]Elapsed 14.93s | Loss: 0.2151 Grad: 15394.2139 LR: 8.2546e-05
Epoch: [5][150/622]Elapsed 22.37s | Loss: 0.2187 Grad: 46252.4727 LR: 8.1650e-05
Epoch: [5][200/622]Elapsed 29.81s | Loss: 0.2167 Grad: 70281.3125 LR: 8.0736e-05
Epoch: [5][250/622]Elapsed 37.22s | Loss: 0.2146 Grad: 32124.1230 LR: 7.9806e-05
Epoch: [5][300/622]Elapsed 44.63s | Loss: 0.2143 Grad: 67650.8984 LR: 7.8859e-05
Epoch: [5][350/622]Elapsed 52.01s | Loss: 0.2147 Grad: 18733.2305 LR: 7.7897e-05
Epoch: [5][400/622]Elapsed 59.40s | Loss: 0.2129 Grad: 36449.6328 LR: 7.6920e-05
Epoch: [5][450/622]Elapsed 66.80s | Loss: 0.2113 Grad: 50128.6406 LR: 7.5927e-05
Epoch: [5][500/622]Elapsed 74.19s | Loss: 0.2097 Grad: 70481.4375 LR: 7.4921e-05
Epoch: [5][550/622]Elapsed 81.57s | Loss: 0.2092 Grad: 38415.9297 LR: 7.3901e-05
Epoch: [5][600/622]Elapsed 88.94s | Loss

Valid:   0%|          | 0/225 [00:00<?, ?batch/s]

Epoch: [5][0/225]Elapsed 0.10s | Loss: 0.5431
Epoch: [5][50/225]Elapsed 4.91s | Loss: 0.4396
Epoch: [5][100/225]Elapsed 9.72s | Loss: 0.4463
Epoch: [5][150/225]Elapsed 14.52s | Loss: 0.4474
Epoch: [5][200/225]Elapsed 19.33s | Loss: 0.4313


----------------------------------------------------------------------------------------------------
Epoch 5 - Average Train Loss: 0.2065 | Average Valid Loss: 0.4284 | Time: 113.90s


Train:   0%|          | 0/622 [00:00<?, ?batch/s]

Epoch: [6][0/622]Elapsed 0.10s | Loss: 0.3112 Grad: nan LR: 7.2409e-05
Epoch: [6][50/622]Elapsed 7.46s | Loss: 0.1840 Grad: 55764.0703 LR: 7.1358e-05
Epoch: [6][100/622]Elapsed 14.94s | Loss: 0.1784 Grad: 19264.1113 LR: 7.0296e-05
Epoch: [6][150/622]Elapsed 22.40s | Loss: 0.1816 Grad: 39981.3672 LR: 6.9222e-05
Epoch: [6][200/622]Elapsed 29.85s | Loss: 0.1801 Grad: 54573.6250 LR: 6.8138e-05
Epoch: [6][250/622]Elapsed 37.31s | Loss: 0.1792 Grad: 63990.4805 LR: 6.7044e-05
Epoch: [6][300/622]Elapsed 44.73s | Loss: 0.1776 Grad: 69162.8281 LR: 6.5940e-05
Epoch: [6][350/622]Elapsed 52.14s | Loss: 0.1782 Grad: 35841.6953 LR: 6.4828e-05
Epoch: [6][400/622]Elapsed 59.56s | Loss: 0.1771 Grad: 65838.2656 LR: 6.3708e-05
Epoch: [6][450/622]Elapsed 66.97s | Loss: 0.1762 Grad: 40035.4141 LR: 6.2581e-05
Epoch: [6][500/622]Elapsed 74.37s | Loss: 0.1749 Grad: 84028.4297 LR: 6.1446e-05
Epoch: [6][550/622]Elapsed 81.80s | Loss: 0.1744 Grad: 47432.7422 LR: 6.0305e-05
Epoch: [6][600/622]Elapsed 89.17s | Loss

Valid:   0%|          | 0/225 [00:00<?, ?batch/s]

Epoch: [6][0/225]Elapsed 0.10s | Loss: 0.5345
Epoch: [6][50/225]Elapsed 4.91s | Loss: 0.4558
Epoch: [6][100/225]Elapsed 9.72s | Loss: 0.4592
Epoch: [6][150/225]Elapsed 14.52s | Loss: 0.4609
Epoch: [6][200/225]Elapsed 19.34s | Loss: 0.4518


----------------------------------------------------------------------------------------------------
Epoch 6 - Average Train Loss: 0.1733 | Average Valid Loss: 0.4539 | Time: 114.13s
Fold 1 Valid Loss: 
Easy: 0.8578 | Hard: 0.7527
Elapse: 29.12 min 
- Second Stage 
Use Checkpoint: ENet_b2_xymask_fold_1_stage_1.pth


Train:   0%|          | 0/276 [00:00<?, ?batch/s]

Epoch: [1][0/276]Elapsed 0.10s | Loss: 0.4653 Grad: nan LR: 4.0000e-06




Epoch: [1][50/276]Elapsed 7.39s | Loss: 0.4687 Grad: 103882.9062 LR: 9.3614e-06
Epoch: [1][100/276]Elapsed 14.81s | Loss: 0.4338 Grad: 84685.9141 LR: 2.4248e-05
Epoch: [1][150/276]Elapsed 22.20s | Loss: 0.3955 Grad: 41306.9180 LR: 4.5334e-05
Epoch: [1][200/276]Elapsed 29.62s | Loss: 0.3672 Grad: 54102.8750 LR: 6.7909e-05
Epoch: [1][250/276]Elapsed 37.02s | Loss: 0.3413 Grad: 35566.1250 LR: 8.6930e-05
Epoch: [1][275/276]Elapsed 40.78s | Loss: 0.3304 Grad: 33490.4336 LR: 9.3946e-05


Valid:   0%|          | 0/225 [00:00<?, ?batch/s]

Epoch: [1][0/225]Elapsed 0.10s | Loss: 0.6134
Epoch: [1][50/225]Elapsed 4.90s | Loss: 0.4732
Epoch: [1][100/225]Elapsed 9.71s | Loss: 0.4831
Epoch: [1][150/225]Elapsed 14.51s | Loss: 0.4827
Epoch: [1][200/225]Elapsed 19.33s | Loss: 0.4219


----------------------------------------------------------------------------------------------------
Epoch 1 - Average Train Loss: 0.3304 | Average Valid Loss: 0.4009 | Time: 62.59s
Best model found in epoch 1 | valid loss: 0.4009


Train:   0%|          | 0/276 [00:00<?, ?batch/s]

Epoch: [2][0/276]Elapsed 0.10s | Loss: 0.2503 Grad: 131172.5781 LR: 9.3946e-05
Epoch: [2][50/276]Elapsed 7.46s | Loss: 0.2217 Grad: 61921.0859 LR: 9.9978e-05
Epoch: [2][100/276]Elapsed 14.91s | Loss: 0.2103 Grad: 52253.5586 LR: 9.9939e-05
Epoch: [2][150/276]Elapsed 22.34s | Loss: 0.2060 Grad: 54038.9492 LR: 9.9740e-05
Epoch: [2][200/276]Elapsed 29.78s | Loss: 0.2047 Grad: 70464.2109 LR: 9.9403e-05
Epoch: [2][250/276]Elapsed 37.20s | Loss: 0.2003 Grad: 57072.2188 LR: 9.8929e-05
Epoch: [2][275/276]Elapsed 40.97s | Loss: 0.1982 Grad: 70537.1719 LR: 9.8628e-05


Valid:   0%|          | 0/225 [00:00<?, ?batch/s]

Epoch: [2][0/225]Elapsed 0.10s | Loss: 0.6013
Epoch: [2][50/225]Elapsed 4.91s | Loss: 0.4488
Epoch: [2][100/225]Elapsed 9.72s | Loss: 0.4622
Epoch: [2][150/225]Elapsed 14.52s | Loss: 0.4648
Epoch: [2][200/225]Elapsed 19.35s | Loss: 0.4041


----------------------------------------------------------------------------------------------------
Epoch 2 - Average Train Loss: 0.1982 | Average Valid Loss: 0.3826 | Time: 62.79s
Best model found in epoch 2 | valid loss: 0.3826


Train:   0%|          | 0/276 [00:00<?, ?batch/s]

Epoch: [3][0/276]Elapsed 0.11s | Loss: 0.2055 Grad: 106657.0781 LR: 9.8628e-05
Epoch: [3][50/276]Elapsed 7.49s | Loss: 0.1825 Grad: 141512.7344 LR: 9.7948e-05
Epoch: [3][100/276]Elapsed 14.93s | Loss: 0.1778 Grad: 92359.7500 LR: 9.7135e-05
Epoch: [3][150/276]Elapsed 22.37s | Loss: 0.1766 Grad: 86413.5234 LR: 9.6191e-05
Epoch: [3][200/276]Elapsed 29.83s | Loss: 0.1777 Grad: 85443.1094 LR: 9.5119e-05
Epoch: [3][250/276]Elapsed 37.24s | Loss: 0.1766 Grad: 97945.0078 LR: 9.3922e-05
Epoch: [3][275/276]Elapsed 41.01s | Loss: 0.1751 Grad: 66420.7266 LR: 9.3251e-05


Valid:   0%|          | 0/225 [00:00<?, ?batch/s]

Epoch: [3][0/225]Elapsed 0.10s | Loss: 0.5344
Epoch: [3][50/225]Elapsed 4.91s | Loss: 0.4332
Epoch: [3][100/225]Elapsed 9.71s | Loss: 0.4514
Epoch: [3][150/225]Elapsed 14.52s | Loss: 0.4537
Epoch: [3][200/225]Elapsed 19.33s | Loss: 0.3948


----------------------------------------------------------------------------------------------------
Epoch 3 - Average Train Loss: 0.1751 | Average Valid Loss: 0.3740 | Time: 62.82s
Best model found in epoch 3 | valid loss: 0.3740


Train:   0%|          | 0/276 [00:00<?, ?batch/s]

Epoch: [4][0/276]Elapsed 0.10s | Loss: 0.2259 Grad: 117783.5234 LR: 9.3251e-05
Epoch: [4][50/276]Elapsed 7.47s | Loss: 0.1680 Grad: 83320.7500 LR: 9.1870e-05
Epoch: [4][100/276]Elapsed 14.92s | Loss: 0.1632 Grad: 117717.6172 LR: 9.0373e-05
Epoch: [4][150/276]Elapsed 22.37s | Loss: 0.1614 Grad: 79086.1484 LR: 8.8763e-05
Epoch: [4][200/276]Elapsed 29.78s | Loss: 0.1613 Grad: 119830.4609 LR: 8.7047e-05
Epoch: [4][250/276]Elapsed 37.21s | Loss: 0.1603 Grad: 64011.4766 LR: 8.5227e-05
Epoch: [4][275/276]Elapsed 40.98s | Loss: 0.1592 Grad: 92795.0469 LR: 8.4242e-05


Valid:   0%|          | 0/225 [00:00<?, ?batch/s]

Epoch: [4][0/225]Elapsed 0.10s | Loss: 0.5568
Epoch: [4][50/225]Elapsed 4.91s | Loss: 0.4394
Epoch: [4][100/225]Elapsed 9.72s | Loss: 0.4580
Epoch: [4][150/225]Elapsed 14.55s | Loss: 0.4621
Epoch: [4][200/225]Elapsed 19.38s | Loss: 0.4010


----------------------------------------------------------------------------------------------------
Epoch 4 - Average Train Loss: 0.1592 | Average Valid Loss: 0.3796 | Time: 62.83s


Train:   0%|          | 0/276 [00:00<?, ?batch/s]

Epoch: [5][0/276]Elapsed 0.11s | Loss: 0.1747 Grad: 116743.9844 LR: 8.4242e-05
Epoch: [5][50/276]Elapsed 7.52s | Loss: 0.1582 Grad: 87591.1953 LR: 8.2275e-05
Epoch: [5][100/276]Elapsed 14.97s | Loss: 0.1512 Grad: 84993.4062 LR: 8.0220e-05
Epoch: [5][150/276]Elapsed 22.41s | Loss: 0.1477 Grad: 83281.9141 LR: 7.8080e-05
Epoch: [5][200/276]Elapsed 29.84s | Loss: 0.1483 Grad: 81496.0625 LR: 7.5863e-05
Epoch: [5][250/276]Elapsed 37.27s | Loss: 0.1469 Grad: 88470.6875 LR: 7.3573e-05
Epoch: [5][275/276]Elapsed 41.04s | Loss: 0.1454 Grad: 109368.1641 LR: 7.2357e-05


Valid:   0%|          | 0/225 [00:00<?, ?batch/s]

Epoch: [5][0/225]Elapsed 0.10s | Loss: 0.5513
Epoch: [5][50/225]Elapsed 4.91s | Loss: 0.4351
Epoch: [5][100/225]Elapsed 9.73s | Loss: 0.4555
Epoch: [5][150/225]Elapsed 14.54s | Loss: 0.4615
Epoch: [5][200/225]Elapsed 19.35s | Loss: 0.4010


----------------------------------------------------------------------------------------------------
Epoch 5 - Average Train Loss: 0.1454 | Average Valid Loss: 0.3797 | Time: 62.86s


Train:   0%|          | 0/276 [00:00<?, ?batch/s]

Epoch: [6][0/276]Elapsed 0.10s | Loss: 0.1687 Grad: 112033.3281 LR: 7.2357e-05
Epoch: [6][50/276]Elapsed 7.47s | Loss: 0.1388 Grad: 100386.8828 LR: 6.9971e-05
Epoch: [6][100/276]Elapsed 14.90s | Loss: 0.1326 Grad: 52016.5234 LR: 6.7529e-05
Epoch: [6][150/276]Elapsed 22.35s | Loss: 0.1313 Grad: 35140.4023 LR: 6.5039e-05
Epoch: [6][200/276]Elapsed 29.80s | Loss: 0.1320 Grad: 43564.1523 LR: 6.2507e-05
Epoch: [6][250/276]Elapsed 37.22s | Loss: 0.1318 Grad: 33908.1055 LR: 5.9941e-05
Epoch: [6][275/276]Elapsed 40.99s | Loss: 0.1313 Grad: 50782.8203 LR: 5.8595e-05


Valid:   0%|          | 0/225 [00:00<?, ?batch/s]

Epoch: [6][0/225]Elapsed 0.11s | Loss: 0.5456
Epoch: [6][50/225]Elapsed 4.93s | Loss: 0.4307
Epoch: [6][100/225]Elapsed 9.74s | Loss: 0.4506
Epoch: [6][150/225]Elapsed 14.54s | Loss: 0.4568
Epoch: [6][200/225]Elapsed 19.35s | Loss: 0.3973


----------------------------------------------------------------------------------------------------
Epoch 6 - Average Train Loss: 0.1313 | Average Valid Loss: 0.3766 | Time: 62.81s
Fold 1 Valid Loss: 
Easy: 0.9005 | Hard: 0.4055
Elapse: 35.41 min 
Fold: 2 || Valid size 3595 
- First Stage 


Train:   0%|          | 0/622 [00:00<?, ?batch/s]

Epoch: [1][0/622]Elapsed 0.11s | Loss: 0.8127 Grad: 66017.9922 LR: 4.0000e-06
Epoch: [1][50/622]Elapsed 7.49s | Loss: 0.8317 Grad: 68348.2734 LR: 5.0647e-06
Epoch: [1][100/622]Elapsed 14.95s | Loss: 0.8265 Grad: 72459.4219 LR: 8.2116e-06
Epoch: [1][150/622]Elapsed 22.45s | Loss: 0.8194 Grad: 74472.0234 LR: 1.3301e-05
Epoch: [1][200/622]Elapsed 29.89s | Loss: 0.8124 Grad: 98583.0859 LR: 2.0107e-05
Epoch: [1][250/622]Elapsed 37.34s | Loss: 0.8013 Grad: 69353.2500 LR: 2.8328e-05
Epoch: [1][300/622]Elapsed 44.76s | Loss: 0.7890 Grad: 92803.4844 LR: 3.7599e-05
Epoch: [1][350/622]Elapsed 52.19s | Loss: 0.7730 Grad: 53064.4766 LR: 4.7509e-05
Epoch: [1][400/622]Elapsed 59.62s | Loss: 0.7557 Grad: 28716.3223 LR: 5.7619e-05
Epoch: [1][450/622]Elapsed 67.07s | Loss: 0.7382 Grad: 34427.3320 LR: 6.7479e-05
Epoch: [1][500/622]Elapsed 74.52s | Loss: 0.7213 Grad: 31233.0430 LR: 7.6652e-05
Epoch: [1][550/622]Elapsed 81.96s | Loss: 0.7048 Grad: 41050.9336 LR: 8.4732e-05
Epoch: [1][600/622]Elapsed 89.38s

Valid:   0%|          | 0/225 [00:00<?, ?batch/s]

Epoch: [1][0/225]Elapsed 0.11s | Loss: 0.5596
Epoch: [1][50/225]Elapsed 4.96s | Loss: 0.4736
Epoch: [1][100/225]Elapsed 9.80s | Loss: 0.4664
Epoch: [1][150/225]Elapsed 14.66s | Loss: 0.4737
Epoch: [1][200/225]Elapsed 19.53s | Loss: 0.4363


----------------------------------------------------------------------------------------------------
Epoch 1 - Average Train Loss: 0.6802 | Average Valid Loss: 0.4211 | Time: 114.59s
Best model found in epoch 1 | valid loss: 0.4211


Train:   0%|          | 0/622 [00:00<?, ?batch/s]

Epoch: [2][0/622]Elapsed 0.10s | Loss: 0.3970 Grad: 75662.5000 LR: 9.3737e-05
Epoch: [2][50/622]Elapsed 7.49s | Loss: 0.4350 Grad: 51029.7773 LR: 9.7777e-05
Epoch: [2][100/622]Elapsed 14.98s | Loss: 0.4240 Grad: 58605.9023 LR: 9.9786e-05
Epoch: [2][150/622]Elapsed 22.46s | Loss: 0.4268 Grad: 37219.4062 LR: 9.9996e-05
Epoch: [2][200/622]Elapsed 29.93s | Loss: 0.4204 Grad: 33071.9180 LR: 9.9967e-05
Epoch: [2][250/622]Elapsed 37.39s | Loss: 0.4173 Grad: 72890.7109 LR: 9.9911e-05
Epoch: [2][300/622]Elapsed 44.85s | Loss: 0.4141 Grad: 42119.5859 LR: 9.9828e-05
Epoch: [2][350/622]Elapsed 52.28s | Loss: 0.4135 Grad: 23933.9609 LR: 9.9717e-05
Epoch: [2][400/622]Elapsed 59.72s | Loss: 0.4088 Grad: 62980.0781 LR: 9.9579e-05
Epoch: [2][450/622]Elapsed 67.19s | Loss: 0.4030 Grad: 36049.7461 LR: 9.9415e-05
Epoch: [2][500/622]Elapsed 74.62s | Loss: 0.3974 Grad: 37652.3594 LR: 9.9223e-05
Epoch: [2][550/622]Elapsed 82.02s | Loss: 0.3943 Grad: 45082.7656 LR: 9.9004e-05
Epoch: [2][600/622]Elapsed 89.42s

Valid:   0%|          | 0/225 [00:00<?, ?batch/s]

Epoch: [2][0/225]Elapsed 0.10s | Loss: 0.4158
Epoch: [2][50/225]Elapsed 4.94s | Loss: 0.3883
Epoch: [2][100/225]Elapsed 9.77s | Loss: 0.3852
Epoch: [2][150/225]Elapsed 14.61s | Loss: 0.3915
Epoch: [2][200/225]Elapsed 19.46s | Loss: 0.3767


----------------------------------------------------------------------------------------------------
Epoch 2 - Average Train Loss: 0.3894 | Average Valid Loss: 0.3687 | Time: 114.53s
Best model found in epoch 2 | valid loss: 0.3687


Train:   0%|          | 0/622 [00:00<?, ?batch/s]

Epoch: [3][0/622]Elapsed 0.12s | Loss: 0.2546 Grad: 105360.5469 LR: 9.8642e-05
Epoch: [3][50/622]Elapsed 7.52s | Loss: 0.3112 Grad: 76941.4531 LR: 9.8358e-05
Epoch: [3][100/622]Elapsed 14.99s | Loss: 0.3073 Grad: 76430.6484 LR: 9.8048e-05
Epoch: [3][150/622]Elapsed 22.45s | Loss: 0.3102 Grad: 119234.8516 LR: 9.7711e-05
Epoch: [3][200/622]Elapsed 29.91s | Loss: 0.3079 Grad: 68940.2656 LR: 9.7349e-05
Epoch: [3][250/622]Elapsed 37.34s | Loss: 0.3064 Grad: 52626.4844 LR: 9.6960e-05
Epoch: [3][300/622]Elapsed 44.78s | Loss: 0.3068 Grad: 45359.5078 LR: 9.6546e-05
Epoch: [3][350/622]Elapsed 52.21s | Loss: 0.3089 Grad: 32666.1816 LR: 9.6106e-05
Epoch: [3][400/622]Elapsed 59.64s | Loss: 0.3074 Grad: 57764.5469 LR: 9.5642e-05
Epoch: [3][450/622]Elapsed 67.08s | Loss: 0.3039 Grad: 28773.7305 LR: 9.5152e-05
Epoch: [3][500/622]Elapsed 74.52s | Loss: 0.3022 Grad: 41403.0781 LR: 9.4638e-05
Epoch: [3][550/622]Elapsed 81.96s | Loss: 0.3024 Grad: 41821.1914 LR: 9.4099e-05
Epoch: [3][600/622]Elapsed 89.3

Valid:   0%|          | 0/225 [00:00<?, ?batch/s]

Epoch: [3][0/225]Elapsed 0.10s | Loss: 0.3538
Epoch: [3][50/225]Elapsed 4.94s | Loss: 0.3821
Epoch: [3][100/225]Elapsed 9.77s | Loss: 0.3813
Epoch: [3][150/225]Elapsed 14.63s | Loss: 0.3853
Epoch: [3][200/225]Elapsed 19.48s | Loss: 0.3789


----------------------------------------------------------------------------------------------------
Epoch 3 - Average Train Loss: 0.3006 | Average Valid Loss: 0.3738 | Time: 114.56s


Train:   0%|          | 0/622 [00:00<?, ?batch/s]

Epoch: [4][0/622]Elapsed 0.10s | Loss: 0.2009 Grad: 90538.6484 LR: 9.3281e-05
Epoch: [4][50/622]Elapsed 7.55s | Loss: 0.2534 Grad: 44837.6680 LR: 9.2683e-05
Epoch: [4][100/622]Elapsed 15.07s | Loss: 0.2509 Grad: 54825.6875 LR: 9.2063e-05
Epoch: [4][150/622]Elapsed 22.59s | Loss: 0.2565 Grad: 43202.9922 LR: 9.1420e-05
Epoch: [4][200/622]Elapsed 30.09s | Loss: 0.2518 Grad: 33261.8477 LR: 9.0754e-05
Epoch: [4][250/622]Elapsed 37.58s | Loss: 0.2488 Grad: 38315.3164 LR: 9.0065e-05
Epoch: [4][300/622]Elapsed 45.05s | Loss: 0.2471 Grad: 31465.1562 LR: 8.9355e-05
Epoch: [4][350/622]Elapsed 52.49s | Loss: 0.2495 Grad: 33799.1328 LR: 8.8624e-05
Epoch: [4][400/622]Elapsed 59.93s | Loss: 0.2482 Grad: 60333.3711 LR: 8.7871e-05
Epoch: [4][450/622]Elapsed 67.38s | Loss: 0.2445 Grad: 40508.5352 LR: 8.7097e-05
Epoch: [4][500/622]Elapsed 74.83s | Loss: 0.2445 Grad: 53494.6484 LR: 8.6303e-05
Epoch: [4][550/622]Elapsed 82.25s | Loss: 0.2439 Grad: 40743.1211 LR: 8.5490e-05
Epoch: [4][600/622]Elapsed 89.67s

Valid:   0%|          | 0/225 [00:00<?, ?batch/s]

Epoch: [4][0/225]Elapsed 0.10s | Loss: 0.3199
Epoch: [4][50/225]Elapsed 4.94s | Loss: 0.3609
Epoch: [4][100/225]Elapsed 9.78s | Loss: 0.3659
Epoch: [4][150/225]Elapsed 14.62s | Loss: 0.3724
Epoch: [4][200/225]Elapsed 19.48s | Loss: 0.3759


----------------------------------------------------------------------------------------------------
Epoch 4 - Average Train Loss: 0.2431 | Average Valid Loss: 0.3754 | Time: 114.82s


Train:   0%|          | 0/622 [00:00<?, ?batch/s]

Epoch: [5][0/622]Elapsed 0.10s | Loss: 0.1981 Grad: 198554.0938 LR: 8.4284e-05
Epoch: [5][50/622]Elapsed 7.50s | Loss: 0.2207 Grad: 37496.2070 LR: 8.3424e-05
Epoch: [5][100/622]Elapsed 14.97s | Loss: 0.2112 Grad: 54275.2812 LR: 8.2546e-05
Epoch: [5][150/622]Elapsed 22.45s | Loss: 0.2183 Grad: 52562.2812 LR: 8.1650e-05
Epoch: [5][200/622]Elapsed 29.90s | Loss: 0.2127 Grad: 38994.7266 LR: 8.0736e-05
Epoch: [5][250/622]Elapsed 37.37s | Loss: 0.2120 Grad: 42478.5234 LR: 7.9806e-05
Epoch: [5][300/622]Elapsed 44.85s | Loss: 0.2115 Grad: 20147.0664 LR: 7.8859e-05
Epoch: [5][350/622]Elapsed 52.31s | Loss: 0.2122 Grad: 16879.9023 LR: 7.7897e-05
Epoch: [5][400/622]Elapsed 59.77s | Loss: 0.2093 Grad: 24413.4160 LR: 7.6920e-05
Epoch: [5][450/622]Elapsed 67.23s | Loss: 0.2071 Grad: 13162.5928 LR: 7.5927e-05
Epoch: [5][500/622]Elapsed 74.66s | Loss: 0.2076 Grad: 23222.6367 LR: 7.4921e-05
Epoch: [5][550/622]Elapsed 82.11s | Loss: 0.2072 Grad: 16897.6035 LR: 7.3901e-05
Epoch: [5][600/622]Elapsed 89.55

Valid:   0%|          | 0/225 [00:00<?, ?batch/s]

Epoch: [5][0/225]Elapsed 0.10s | Loss: 0.3384
Epoch: [5][50/225]Elapsed 4.96s | Loss: 0.3898
Epoch: [5][100/225]Elapsed 9.81s | Loss: 0.3903
Epoch: [5][150/225]Elapsed 14.68s | Loss: 0.3953
Epoch: [5][200/225]Elapsed 19.55s | Loss: 0.4040


----------------------------------------------------------------------------------------------------
Epoch 5 - Average Train Loss: 0.2061 | Average Valid Loss: 0.4036 | Time: 114.80s


Train:   0%|          | 0/622 [00:00<?, ?batch/s]

Epoch: [6][0/622]Elapsed 0.10s | Loss: 0.1894 Grad: nan LR: 7.2409e-05
Epoch: [6][50/622]Elapsed 7.54s | Loss: 0.1861 Grad: 39798.6562 LR: 7.1358e-05
Epoch: [6][100/622]Elapsed 15.04s | Loss: 0.1779 Grad: 72093.7031 LR: 7.0296e-05
Epoch: [6][150/622]Elapsed 22.54s | Loss: 0.1794 Grad: 37994.3633 LR: 6.9222e-05
Epoch: [6][200/622]Elapsed 30.03s | Loss: 0.1795 Grad: 35655.0820 LR: 6.8138e-05
Epoch: [6][250/622]Elapsed 37.52s | Loss: 0.1774 Grad: 34273.1016 LR: 6.7044e-05
Epoch: [6][300/622]Elapsed 45.00s | Loss: 0.1749 Grad: 58288.3047 LR: 6.5940e-05
Epoch: [6][350/622]Elapsed 52.46s | Loss: 0.1760 Grad: 32811.5039 LR: 6.4828e-05
Epoch: [6][400/622]Elapsed 59.90s | Loss: 0.1735 Grad: 84955.0234 LR: 6.3708e-05
Epoch: [6][450/622]Elapsed 67.35s | Loss: 0.1716 Grad: 35493.5117 LR: 6.2581e-05
Epoch: [6][500/622]Elapsed 74.79s | Loss: 0.1722 Grad: 45364.0508 LR: 6.1446e-05
Epoch: [6][550/622]Elapsed 82.20s | Loss: 0.1725 Grad: 53184.8477 LR: 6.0305e-05
Epoch: [6][600/622]Elapsed 89.61s | Loss

Valid:   0%|          | 0/225 [00:00<?, ?batch/s]

Epoch: [6][0/225]Elapsed 0.10s | Loss: 0.3307
Epoch: [6][50/225]Elapsed 4.94s | Loss: 0.3883
Epoch: [6][100/225]Elapsed 9.77s | Loss: 0.3911
Epoch: [6][150/225]Elapsed 14.60s | Loss: 0.3999
Epoch: [6][200/225]Elapsed 19.47s | Loss: 0.4120


----------------------------------------------------------------------------------------------------
Epoch 6 - Average Train Loss: 0.1721 | Average Valid Loss: 0.4133 | Time: 114.76s
Fold 2 Valid Loss: 
Easy: 0.7837 | Hard: 0.6334
Elapse: 46.88 min 
- Second Stage 
Use Checkpoint: ENet_b2_xymask_fold_2_stage_1.pth


Train:   0%|          | 0/276 [00:00<?, ?batch/s]

Epoch: [1][0/276]Elapsed 0.11s | Loss: 0.4824 Grad: nan LR: 4.0000e-06




Epoch: [1][50/276]Elapsed 7.52s | Loss: 0.4612 Grad: 84924.9922 LR: 9.3614e-06
Epoch: [1][100/276]Elapsed 15.02s | Loss: 0.4328 Grad: 55997.1836 LR: 2.4248e-05
Epoch: [1][150/276]Elapsed 22.53s | Loss: 0.3984 Grad: 48680.0781 LR: 4.5334e-05
Epoch: [1][200/276]Elapsed 30.01s | Loss: 0.3689 Grad: 47521.5234 LR: 6.7909e-05
Epoch: [1][250/276]Elapsed 37.52s | Loss: 0.3425 Grad: 21533.0117 LR: 8.6930e-05
Epoch: [1][275/276]Elapsed 41.32s | Loss: 0.3312 Grad: 44582.5742 LR: 9.3946e-05


Valid:   0%|          | 0/225 [00:00<?, ?batch/s]

Epoch: [1][0/225]Elapsed 0.10s | Loss: 0.3548
Epoch: [1][50/225]Elapsed 4.95s | Loss: 0.4029
Epoch: [1][100/225]Elapsed 9.79s | Loss: 0.4024
Epoch: [1][150/225]Elapsed 14.65s | Loss: 0.4088
Epoch: [1][200/225]Elapsed 19.53s | Loss: 0.3728


----------------------------------------------------------------------------------------------------
Epoch 1 - Average Train Loss: 0.3312 | Average Valid Loss: 0.3572 | Time: 63.35s
Best model found in epoch 1 | valid loss: 0.3572


Train:   0%|          | 0/276 [00:00<?, ?batch/s]

Epoch: [2][0/276]Elapsed 0.12s | Loss: 0.1911 Grad: 109278.9766 LR: 9.3946e-05
Epoch: [2][50/276]Elapsed 7.54s | Loss: 0.2133 Grad: 104015.5078 LR: 9.9978e-05
Epoch: [2][100/276]Elapsed 15.04s | Loss: 0.2082 Grad: 75965.9375 LR: 9.9939e-05
Epoch: [2][150/276]Elapsed 22.56s | Loss: 0.2083 Grad: 63193.3867 LR: 9.9740e-05
Epoch: [2][200/276]Elapsed 30.07s | Loss: 0.2054 Grad: 62500.8516 LR: 9.9403e-05
Epoch: [2][250/276]Elapsed 37.55s | Loss: 0.2015 Grad: 42973.2930 LR: 9.8929e-05
Epoch: [2][275/276]Elapsed 41.34s | Loss: 0.1993 Grad: 54689.9492 LR: 9.8628e-05


Valid:   0%|          | 0/225 [00:00<?, ?batch/s]

Epoch: [2][0/225]Elapsed 0.10s | Loss: 0.3839
Epoch: [2][50/225]Elapsed 4.95s | Loss: 0.3966
Epoch: [2][100/225]Elapsed 9.79s | Loss: 0.3963
Epoch: [2][150/225]Elapsed 14.62s | Loss: 0.4008
Epoch: [2][200/225]Elapsed 19.47s | Loss: 0.3606


----------------------------------------------------------------------------------------------------
Epoch 2 - Average Train Loss: 0.1993 | Average Valid Loss: 0.3433 | Time: 63.30s
Best model found in epoch 2 | valid loss: 0.3433


Train:   0%|          | 0/276 [00:00<?, ?batch/s]

Epoch: [3][0/276]Elapsed 0.10s | Loss: 0.1709 Grad: 103908.8750 LR: 9.8628e-05
Epoch: [3][50/276]Elapsed 7.59s | Loss: 0.1841 Grad: 109054.4141 LR: 9.7948e-05
Epoch: [3][100/276]Elapsed 15.11s | Loss: 0.1789 Grad: 111116.6875 LR: 9.7135e-05
Epoch: [3][150/276]Elapsed 22.60s | Loss: 0.1794 Grad: 78453.7109 LR: 9.6191e-05
Epoch: [3][200/276]Elapsed 30.13s | Loss: 0.1786 Grad: 120105.6250 LR: 9.5119e-05
Epoch: [3][250/276]Elapsed 37.60s | Loss: 0.1764 Grad: 36199.4023 LR: 9.3922e-05
Epoch: [3][275/276]Elapsed 41.40s | Loss: 0.1752 Grad: 69640.3438 LR: 9.3251e-05


Valid:   0%|          | 0/225 [00:00<?, ?batch/s]

Epoch: [3][0/225]Elapsed 0.10s | Loss: 0.3822
Epoch: [3][50/225]Elapsed 4.97s | Loss: 0.4006
Epoch: [3][100/225]Elapsed 9.82s | Loss: 0.4007
Epoch: [3][150/225]Elapsed 14.68s | Loss: 0.4062
Epoch: [3][200/225]Elapsed 19.55s | Loss: 0.3653


----------------------------------------------------------------------------------------------------
Epoch 3 - Average Train Loss: 0.1752 | Average Valid Loss: 0.3474 | Time: 63.45s


Train:   0%|          | 0/276 [00:00<?, ?batch/s]

Epoch: [4][0/276]Elapsed 0.10s | Loss: 0.1922 Grad: 108421.4219 LR: 9.3251e-05
Epoch: [4][50/276]Elapsed 7.53s | Loss: 0.1674 Grad: 93813.2344 LR: 9.1870e-05
Epoch: [4][100/276]Elapsed 15.02s | Loss: 0.1628 Grad: 73969.1484 LR: 9.0373e-05
Epoch: [4][150/276]Elapsed 22.51s | Loss: 0.1628 Grad: 43275.7305 LR: 8.8763e-05
Epoch: [4][200/276]Elapsed 29.97s | Loss: 0.1619 Grad: 41092.0312 LR: 8.7047e-05
Epoch: [4][250/276]Elapsed 37.44s | Loss: 0.1594 Grad: 34679.8789 LR: 8.5227e-05
Epoch: [4][275/276]Elapsed 41.23s | Loss: 0.1581 Grad: 72619.4688 LR: 8.4242e-05


Valid:   0%|          | 0/225 [00:00<?, ?batch/s]

Epoch: [4][0/225]Elapsed 0.10s | Loss: 0.3907
Epoch: [4][50/225]Elapsed 4.95s | Loss: 0.3921
Epoch: [4][100/225]Elapsed 9.79s | Loss: 0.3951
Epoch: [4][150/225]Elapsed 14.63s | Loss: 0.3997
Epoch: [4][200/225]Elapsed 19.50s | Loss: 0.3615


----------------------------------------------------------------------------------------------------
Epoch 4 - Average Train Loss: 0.1581 | Average Valid Loss: 0.3444 | Time: 63.22s


Train:   0%|          | 0/276 [00:00<?, ?batch/s]

Epoch: [5][0/276]Elapsed 0.10s | Loss: 0.1354 Grad: 82398.8516 LR: 8.4242e-05
Epoch: [5][50/276]Elapsed 7.56s | Loss: 0.1491 Grad: 128636.2891 LR: 8.2275e-05
Epoch: [5][100/276]Elapsed 15.07s | Loss: 0.1475 Grad: 102229.3281 LR: 8.0220e-05
Epoch: [5][150/276]Elapsed 22.55s | Loss: 0.1483 Grad: 125854.2422 LR: 7.8080e-05
Epoch: [5][200/276]Elapsed 30.03s | Loss: 0.1473 Grad: 133146.2344 LR: 7.5863e-05
Epoch: [5][250/276]Elapsed 37.51s | Loss: 0.1449 Grad: 64801.0352 LR: 7.3573e-05
Epoch: [5][275/276]Elapsed 41.30s | Loss: 0.1440 Grad: 126286.7578 LR: 7.2357e-05


Valid:   0%|          | 0/225 [00:00<?, ?batch/s]

Epoch: [5][0/225]Elapsed 0.10s | Loss: 0.3830
Epoch: [5][50/225]Elapsed 4.95s | Loss: 0.3893
Epoch: [5][100/225]Elapsed 9.81s | Loss: 0.3914
Epoch: [5][150/225]Elapsed 14.67s | Loss: 0.3963
Epoch: [5][200/225]Elapsed 19.54s | Loss: 0.3572


----------------------------------------------------------------------------------------------------
Epoch 5 - Average Train Loss: 0.1440 | Average Valid Loss: 0.3401 | Time: 63.34s
Best model found in epoch 5 | valid loss: 0.3401


Train:   0%|          | 0/276 [00:00<?, ?batch/s]

Epoch: [6][0/276]Elapsed 0.10s | Loss: 0.1434 Grad: 97462.6797 LR: 7.2357e-05
Epoch: [6][50/276]Elapsed 7.58s | Loss: 0.1366 Grad: 45513.9766 LR: 6.9971e-05
Epoch: [6][100/276]Elapsed 15.12s | Loss: 0.1343 Grad: 46694.1328 LR: 6.7529e-05
Epoch: [6][150/276]Elapsed 22.63s | Loss: 0.1345 Grad: 32006.2383 LR: 6.5039e-05
Epoch: [6][200/276]Elapsed 30.10s | Loss: 0.1335 Grad: 61134.9844 LR: 6.2507e-05
Epoch: [6][250/276]Elapsed 37.59s | Loss: 0.1320 Grad: 32261.8496 LR: 5.9941e-05
Epoch: [6][275/276]Elapsed 41.41s | Loss: 0.1314 Grad: 59639.2930 LR: 5.8595e-05


Valid:   0%|          | 0/225 [00:00<?, ?batch/s]

Epoch: [6][0/225]Elapsed 0.11s | Loss: 0.3931
Epoch: [6][50/225]Elapsed 4.97s | Loss: 0.3910
Epoch: [6][100/225]Elapsed 9.82s | Loss: 0.3919
Epoch: [6][150/225]Elapsed 14.66s | Loss: 0.3971
Epoch: [6][200/225]Elapsed 19.52s | Loss: 0.3591


----------------------------------------------------------------------------------------------------
Epoch 6 - Average Train Loss: 0.1314 | Average Valid Loss: 0.3420 | Time: 63.42s
Fold 2 Valid Loss: 
Easy: 0.7975 | Hard: 0.4165
Elapse: 53.23 min 
Fold: 3 || Valid size 3595 
- First Stage 


Train:   0%|          | 0/622 [00:00<?, ?batch/s]

Epoch: [1][0/622]Elapsed 0.11s | Loss: 0.8071 Grad: 56704.1289 LR: 4.0000e-06
Epoch: [1][50/622]Elapsed 7.50s | Loss: 0.8234 Grad: 75440.8750 LR: 5.0647e-06
Epoch: [1][100/622]Elapsed 14.99s | Loss: 0.8150 Grad: 75474.0234 LR: 8.2116e-06
Epoch: [1][150/622]Elapsed 22.48s | Loss: 0.8091 Grad: 68174.7812 LR: 1.3301e-05
Epoch: [1][200/622]Elapsed 29.95s | Loss: 0.8017 Grad: 84182.1875 LR: 2.0107e-05
Epoch: [1][250/622]Elapsed 37.39s | Loss: 0.7907 Grad: 92148.8281 LR: 2.8328e-05
Epoch: [1][300/622]Elapsed 44.81s | Loss: 0.7772 Grad: 87734.5312 LR: 3.7599e-05
Epoch: [1][350/622]Elapsed 52.22s | Loss: 0.7612 Grad: 87133.2500 LR: 4.7509e-05
Epoch: [1][400/622]Elapsed 59.65s | Loss: 0.7440 Grad: 119156.2266 LR: 5.7619e-05
Epoch: [1][450/622]Elapsed 67.07s | Loss: 0.7253 Grad: 166985.7031 LR: 6.7479e-05
Epoch: [1][500/622]Elapsed 74.48s | Loss: 0.7087 Grad: 136173.6719 LR: 7.6652e-05
Epoch: [1][550/622]Elapsed 81.88s | Loss: 0.6922 Grad: 65353.2070 LR: 8.4732e-05
Epoch: [1][600/622]Elapsed 89.

Valid:   0%|          | 0/225 [00:00<?, ?batch/s]

Epoch: [1][0/225]Elapsed 0.10s | Loss: 0.2988
Epoch: [1][50/225]Elapsed 4.90s | Loss: 0.4660
Epoch: [1][100/225]Elapsed 9.70s | Loss: 0.4710
Epoch: [1][150/225]Elapsed 14.51s | Loss: 0.4720
Epoch: [1][200/225]Elapsed 19.32s | Loss: 0.4376


----------------------------------------------------------------------------------------------------
Epoch 1 - Average Train Loss: 0.6685 | Average Valid Loss: 0.4246 | Time: 114.25s
Best model found in epoch 1 | valid loss: 0.4246


Train:   0%|          | 0/622 [00:00<?, ?batch/s]

Epoch: [2][0/622]Elapsed 0.10s | Loss: 0.4770 Grad: 145457.9062 LR: 9.3737e-05
Epoch: [2][50/622]Elapsed 7.47s | Loss: 0.4209 Grad: 117690.2266 LR: 9.7777e-05
Epoch: [2][100/622]Elapsed 14.93s | Loss: 0.4157 Grad: 77470.2422 LR: 9.9786e-05
Epoch: [2][150/622]Elapsed 22.38s | Loss: 0.4207 Grad: 98309.1797 LR: 9.9996e-05
Epoch: [2][200/622]Elapsed 29.83s | Loss: 0.4175 Grad: 128676.2188 LR: 9.9967e-05
Epoch: [2][250/622]Elapsed 37.27s | Loss: 0.4072 Grad: 52364.8672 LR: 9.9911e-05
Epoch: [2][300/622]Elapsed 44.72s | Loss: 0.4042 Grad: 46445.9648 LR: 9.9828e-05
Epoch: [2][350/622]Elapsed 52.17s | Loss: 0.4005 Grad: 40351.7148 LR: 9.9717e-05
Epoch: [2][400/622]Elapsed 59.61s | Loss: 0.3949 Grad: 47483.0625 LR: 9.9579e-05
Epoch: [2][450/622]Elapsed 67.05s | Loss: 0.3884 Grad: 55190.8242 LR: 9.9415e-05
Epoch: [2][500/622]Elapsed 74.49s | Loss: 0.3871 Grad: 44608.3398 LR: 9.9223e-05
Epoch: [2][550/622]Elapsed 81.94s | Loss: 0.3848 Grad: 32893.2266 LR: 9.9004e-05
Epoch: [2][600/622]Elapsed 89.

Valid:   0%|          | 0/225 [00:00<?, ?batch/s]

Epoch: [2][0/225]Elapsed 0.11s | Loss: 0.2530
Epoch: [2][50/225]Elapsed 4.97s | Loss: 0.3735
Epoch: [2][100/225]Elapsed 9.81s | Loss: 0.3785
Epoch: [2][150/225]Elapsed 14.65s | Loss: 0.3803
Epoch: [2][200/225]Elapsed 19.50s | Loss: 0.3588


----------------------------------------------------------------------------------------------------
Epoch 2 - Average Train Loss: 0.3808 | Average Valid Loss: 0.3506 | Time: 114.63s
Best model found in epoch 2 | valid loss: 0.3506


Train:   0%|          | 0/622 [00:00<?, ?batch/s]

Epoch: [3][0/622]Elapsed 0.10s | Loss: 0.3984 Grad: inf LR: 9.8642e-05
Epoch: [3][50/622]Elapsed 7.56s | Loss: 0.3023 Grad: 93618.9609 LR: 9.8358e-05
Epoch: [3][100/622]Elapsed 15.08s | Loss: 0.3007 Grad: 65372.0703 LR: 9.8048e-05
Epoch: [3][150/622]Elapsed 22.58s | Loss: 0.3106 Grad: 112553.7031 LR: 9.7711e-05
Epoch: [3][200/622]Elapsed 30.06s | Loss: 0.3068 Grad: 132056.0938 LR: 9.7349e-05
Epoch: [3][250/622]Elapsed 37.53s | Loss: 0.3038 Grad: 109304.5859 LR: 9.6960e-05
Epoch: [3][300/622]Elapsed 45.01s | Loss: 0.3035 Grad: 70801.0625 LR: 9.6546e-05
Epoch: [3][350/622]Elapsed 52.47s | Loss: 0.3010 Grad: 67069.2266 LR: 9.6106e-05
Epoch: [3][400/622]Elapsed 59.94s | Loss: 0.2987 Grad: 72745.7500 LR: 9.5642e-05
Epoch: [3][450/622]Elapsed 67.39s | Loss: 0.2946 Grad: 94386.5469 LR: 9.5152e-05
Epoch: [3][500/622]Elapsed 74.85s | Loss: 0.2944 Grad: 119895.8125 LR: 9.4638e-05
Epoch: [3][550/622]Elapsed 82.30s | Loss: 0.2948 Grad: 35075.1602 LR: 9.4099e-05
Epoch: [3][600/622]Elapsed 89.75s | 

Valid:   0%|          | 0/225 [00:00<?, ?batch/s]

Epoch: [3][0/225]Elapsed 0.10s | Loss: 0.2456
Epoch: [3][50/225]Elapsed 4.94s | Loss: 0.3657
Epoch: [3][100/225]Elapsed 9.77s | Loss: 0.3629
Epoch: [3][150/225]Elapsed 14.66s | Loss: 0.3661
Epoch: [3][200/225]Elapsed 19.52s | Loss: 0.3530


----------------------------------------------------------------------------------------------------
Epoch 3 - Average Train Loss: 0.2935 | Average Valid Loss: 0.3487 | Time: 114.97s
Best model found in epoch 3 | valid loss: 0.3487


Train:   0%|          | 0/622 [00:00<?, ?batch/s]

Epoch: [4][0/622]Elapsed 0.11s | Loss: 0.2957 Grad: 154701.5000 LR: 9.3281e-05
Epoch: [4][50/622]Elapsed 7.55s | Loss: 0.2424 Grad: inf LR: 9.2683e-05
Epoch: [4][100/622]Elapsed 15.06s | Loss: 0.2429 Grad: 58698.2461 LR: 9.2063e-05
Epoch: [4][150/622]Elapsed 22.56s | Loss: 0.2534 Grad: 59409.7227 LR: 9.1420e-05
Epoch: [4][200/622]Elapsed 30.02s | Loss: 0.2509 Grad: 53636.1758 LR: 9.0754e-05
Epoch: [4][250/622]Elapsed 37.49s | Loss: 0.2482 Grad: 41638.5156 LR: 9.0065e-05
Epoch: [4][300/622]Elapsed 44.94s | Loss: 0.2497 Grad: 66006.9766 LR: 8.9355e-05
Epoch: [4][350/622]Elapsed 52.39s | Loss: 0.2478 Grad: 20101.9414 LR: 8.8624e-05
Epoch: [4][400/622]Elapsed 59.84s | Loss: 0.2452 Grad: 40179.1016 LR: 8.7871e-05
Epoch: [4][450/622]Elapsed 67.28s | Loss: 0.2408 Grad: 53568.3828 LR: 8.7097e-05
Epoch: [4][500/622]Elapsed 74.73s | Loss: 0.2410 Grad: 46518.4336 LR: 8.6303e-05
Epoch: [4][550/622]Elapsed 82.16s | Loss: 0.2423 Grad: 34593.3047 LR: 8.5490e-05
Epoch: [4][600/622]Elapsed 89.61s | Los

Valid:   0%|          | 0/225 [00:00<?, ?batch/s]

Epoch: [4][0/225]Elapsed 0.10s | Loss: 0.2725
Epoch: [4][50/225]Elapsed 4.94s | Loss: 0.3526
Epoch: [4][100/225]Elapsed 9.78s | Loss: 0.3527
Epoch: [4][150/225]Elapsed 14.63s | Loss: 0.3558
Epoch: [4][200/225]Elapsed 19.48s | Loss: 0.3500


----------------------------------------------------------------------------------------------------
Epoch 4 - Average Train Loss: 0.2424 | Average Valid Loss: 0.3484 | Time: 114.77s
Best model found in epoch 4 | valid loss: 0.3484


Train:   0%|          | 0/622 [00:00<?, ?batch/s]

Epoch: [5][0/622]Elapsed 0.10s | Loss: 0.2856 Grad: 171627.5156 LR: 8.4284e-05
Epoch: [5][50/622]Elapsed 7.54s | Loss: 0.2040 Grad: 55848.5000 LR: 8.3424e-05
Epoch: [5][100/622]Elapsed 15.04s | Loss: 0.2084 Grad: 38147.0859 LR: 8.2546e-05
Epoch: [5][150/622]Elapsed 22.54s | Loss: 0.2186 Grad: 72684.2188 LR: 8.1650e-05
Epoch: [5][200/622]Elapsed 30.03s | Loss: 0.2167 Grad: 102063.4531 LR: 8.0736e-05
Epoch: [5][250/622]Elapsed 37.50s | Loss: 0.2137 Grad: 58165.6797 LR: 7.9806e-05
Epoch: [5][300/622]Elapsed 44.99s | Loss: 0.2140 Grad: 54450.4883 LR: 7.8859e-05
Epoch: [5][350/622]Elapsed 52.48s | Loss: 0.2119 Grad: 48258.2656 LR: 7.7897e-05
Epoch: [5][400/622]Elapsed 59.94s | Loss: 0.2087 Grad: 69021.2734 LR: 7.6920e-05
Epoch: [5][450/622]Elapsed 67.44s | Loss: 0.2044 Grad: 37591.9492 LR: 7.5927e-05
Epoch: [5][500/622]Elapsed 74.92s | Loss: 0.2036 Grad: 53541.8164 LR: 7.4921e-05
Epoch: [5][550/622]Elapsed 82.36s | Loss: 0.2033 Grad: 42071.5039 LR: 7.3901e-05
Epoch: [5][600/622]Elapsed 89.8

Valid:   0%|          | 0/225 [00:00<?, ?batch/s]

Epoch: [5][0/225]Elapsed 0.10s | Loss: 0.2570
Epoch: [5][50/225]Elapsed 4.95s | Loss: 0.3709
Epoch: [5][100/225]Elapsed 9.80s | Loss: 0.3679
Epoch: [5][150/225]Elapsed 14.66s | Loss: 0.3737
Epoch: [5][200/225]Elapsed 19.51s | Loss: 0.3749


----------------------------------------------------------------------------------------------------
Epoch 5 - Average Train Loss: 0.2038 | Average Valid Loss: 0.3760 | Time: 115.05s


Train:   0%|          | 0/622 [00:00<?, ?batch/s]

Epoch: [6][0/622]Elapsed 0.10s | Loss: 0.2827 Grad: nan LR: 7.2409e-05
Epoch: [6][50/622]Elapsed 7.51s | Loss: 0.1802 Grad: 97031.4062 LR: 7.1358e-05
Epoch: [6][100/622]Elapsed 14.97s | Loss: 0.1752 Grad: 42230.3750 LR: 7.0296e-05
Epoch: [6][150/622]Elapsed 22.43s | Loss: 0.1841 Grad: 96973.6641 LR: 6.9222e-05
Epoch: [6][200/622]Elapsed 29.88s | Loss: 0.1813 Grad: 64364.3438 LR: 6.8138e-05
Epoch: [6][250/622]Elapsed 37.31s | Loss: 0.1783 Grad: 42403.8594 LR: 6.7044e-05
Epoch: [6][300/622]Elapsed 44.77s | Loss: 0.1784 Grad: 38584.8867 LR: 6.5940e-05
Epoch: [6][350/622]Elapsed 52.22s | Loss: 0.1761 Grad: 22076.9805 LR: 6.4828e-05
Epoch: [6][400/622]Elapsed 59.66s | Loss: 0.1741 Grad: 25429.3086 LR: 6.3708e-05
Epoch: [6][450/622]Elapsed 67.10s | Loss: 0.1715 Grad: 29830.8711 LR: 6.2581e-05
Epoch: [6][500/622]Elapsed 74.53s | Loss: 0.1714 Grad: 32679.3965 LR: 6.1446e-05
Epoch: [6][550/622]Elapsed 81.98s | Loss: 0.1708 Grad: 26406.1992 LR: 6.0305e-05
Epoch: [6][600/622]Elapsed 89.44s | Loss

Valid:   0%|          | 0/225 [00:00<?, ?batch/s]

Epoch: [6][0/225]Elapsed 0.11s | Loss: 0.2943
Epoch: [6][50/225]Elapsed 4.96s | Loss: 0.3814
Epoch: [6][100/225]Elapsed 9.81s | Loss: 0.3790
Epoch: [6][150/225]Elapsed 14.66s | Loss: 0.3816
Epoch: [6][200/225]Elapsed 19.50s | Loss: 0.3871


----------------------------------------------------------------------------------------------------
Epoch 6 - Average Train Loss: 0.1710 | Average Valid Loss: 0.3900 | Time: 114.64s
Fold 3 Valid Loss: 
Easy: 0.7112 | Hard: 0.6645
Elapse: 64.71 min 
- Second Stage 
Use Checkpoint: ENet_b2_xymask_fold_3_stage_1.pth


Train:   0%|          | 0/276 [00:00<?, ?batch/s]

Epoch: [1][0/276]Elapsed 0.11s | Loss: 0.5797 Grad: nan LR: 4.0000e-06




Epoch: [1][50/276]Elapsed 7.49s | Loss: 0.4571 Grad: 70549.6641 LR: 9.3614e-06
Epoch: [1][100/276]Elapsed 15.01s | Loss: 0.4154 Grad: 84844.7422 LR: 2.4248e-05
Epoch: [1][150/276]Elapsed 22.52s | Loss: 0.3857 Grad: 56504.3047 LR: 4.5334e-05
Epoch: [1][200/276]Elapsed 30.00s | Loss: 0.3585 Grad: 28563.4355 LR: 6.7909e-05
Epoch: [1][250/276]Elapsed 37.45s | Loss: 0.3359 Grad: 29991.5254 LR: 8.6930e-05
Epoch: [1][275/276]Elapsed 41.24s | Loss: 0.3246 Grad: 41186.2891 LR: 9.3946e-05


Valid:   0%|          | 0/225 [00:00<?, ?batch/s]

Epoch: [1][0/225]Elapsed 0.10s | Loss: 0.3141
Epoch: [1][50/225]Elapsed 4.93s | Loss: 0.4091
Epoch: [1][100/225]Elapsed 9.78s | Loss: 0.4115
Epoch: [1][150/225]Elapsed 14.61s | Loss: 0.4130
Epoch: [1][200/225]Elapsed 19.45s | Loss: 0.3694


----------------------------------------------------------------------------------------------------
Epoch 1 - Average Train Loss: 0.3246 | Average Valid Loss: 0.3522 | Time: 63.19s
Best model found in epoch 1 | valid loss: 0.3522


Train:   0%|          | 0/276 [00:00<?, ?batch/s]

Epoch: [2][0/276]Elapsed 0.11s | Loss: 0.2711 Grad: nan LR: 9.3946e-05
Epoch: [2][50/276]Elapsed 7.55s | Loss: 0.2161 Grad: 77163.2891 LR: 9.9978e-05
Epoch: [2][100/276]Elapsed 15.06s | Loss: 0.2059 Grad: 64753.1133 LR: 9.9939e-05
Epoch: [2][150/276]Elapsed 22.56s | Loss: 0.2035 Grad: 54975.5977 LR: 9.9740e-05
Epoch: [2][200/276]Elapsed 30.03s | Loss: 0.2028 Grad: 46310.9219 LR: 9.9403e-05
Epoch: [2][250/276]Elapsed 37.50s | Loss: 0.2002 Grad: 65514.8906 LR: 9.8929e-05
Epoch: [2][275/276]Elapsed 41.30s | Loss: 0.1980 Grad: 64819.8828 LR: 9.8628e-05


Valid:   0%|          | 0/225 [00:00<?, ?batch/s]

Epoch: [2][0/225]Elapsed 0.11s | Loss: 0.3168
Epoch: [2][50/225]Elapsed 4.96s | Loss: 0.4021
Epoch: [2][100/225]Elapsed 9.80s | Loss: 0.4042
Epoch: [2][150/225]Elapsed 14.65s | Loss: 0.4062
Epoch: [2][200/225]Elapsed 19.51s | Loss: 0.3609


----------------------------------------------------------------------------------------------------
Epoch 2 - Average Train Loss: 0.1980 | Average Valid Loss: 0.3428 | Time: 63.32s
Best model found in epoch 2 | valid loss: 0.3428


Train:   0%|          | 0/276 [00:00<?, ?batch/s]

Epoch: [3][0/276]Elapsed 0.11s | Loss: 0.2317 Grad: 140241.0938 LR: 9.8628e-05
Epoch: [3][50/276]Elapsed 7.56s | Loss: 0.1877 Grad: 50625.4688 LR: 9.7948e-05
Epoch: [3][100/276]Elapsed 15.07s | Loss: 0.1797 Grad: 57760.6875 LR: 9.7135e-05
Epoch: [3][150/276]Elapsed 22.60s | Loss: 0.1766 Grad: 65319.6055 LR: 9.6191e-05
Epoch: [3][200/276]Elapsed 30.09s | Loss: 0.1766 Grad: 54260.6055 LR: 9.5119e-05
Epoch: [3][250/276]Elapsed 37.57s | Loss: 0.1757 Grad: 54563.5430 LR: 9.3922e-05
Epoch: [3][275/276]Elapsed 41.38s | Loss: 0.1742 Grad: 60723.7070 LR: 9.3251e-05


Valid:   0%|          | 0/225 [00:00<?, ?batch/s]

Epoch: [3][0/225]Elapsed 0.10s | Loss: 0.3253
Epoch: [3][50/225]Elapsed 4.93s | Loss: 0.3896
Epoch: [3][100/225]Elapsed 9.83s | Loss: 0.3911
Epoch: [3][150/225]Elapsed 14.67s | Loss: 0.3922
Epoch: [3][200/225]Elapsed 19.56s | Loss: 0.3489


----------------------------------------------------------------------------------------------------
Epoch 3 - Average Train Loss: 0.1742 | Average Valid Loss: 0.3315 | Time: 63.44s
Best model found in epoch 3 | valid loss: 0.3315


Train:   0%|          | 0/276 [00:00<?, ?batch/s]

Epoch: [4][0/276]Elapsed 0.10s | Loss: 0.1979 Grad: 95085.6719 LR: 9.3251e-05
Epoch: [4][50/276]Elapsed 7.56s | Loss: 0.1650 Grad: 60790.2500 LR: 9.1870e-05
Epoch: [4][100/276]Elapsed 15.10s | Loss: 0.1601 Grad: 48807.8203 LR: 9.0373e-05
Epoch: [4][150/276]Elapsed 22.63s | Loss: 0.1584 Grad: 59000.0742 LR: 8.8763e-05
Epoch: [4][200/276]Elapsed 30.14s | Loss: 0.1586 Grad: 62465.8711 LR: 8.7047e-05
Epoch: [4][250/276]Elapsed 37.63s | Loss: 0.1585 Grad: 57982.9570 LR: 8.5227e-05
Epoch: [4][275/276]Elapsed 41.41s | Loss: 0.1573 Grad: 48590.3477 LR: 8.4242e-05


Valid:   0%|          | 0/225 [00:00<?, ?batch/s]

Epoch: [4][0/225]Elapsed 0.11s | Loss: 0.3168
Epoch: [4][50/225]Elapsed 4.92s | Loss: 0.3827
Epoch: [4][100/225]Elapsed 9.75s | Loss: 0.3825
Epoch: [4][150/225]Elapsed 14.56s | Loss: 0.3849
Epoch: [4][200/225]Elapsed 19.39s | Loss: 0.3430


----------------------------------------------------------------------------------------------------
Epoch 4 - Average Train Loss: 0.1573 | Average Valid Loss: 0.3263 | Time: 63.28s
Best model found in epoch 4 | valid loss: 0.3263


Train:   0%|          | 0/276 [00:00<?, ?batch/s]

Epoch: [5][0/276]Elapsed 0.11s | Loss: 0.1883 Grad: 97296.0312 LR: 8.4242e-05
Epoch: [5][50/276]Elapsed 7.57s | Loss: 0.1549 Grad: 43064.0430 LR: 8.2275e-05
Epoch: [5][100/276]Elapsed 15.09s | Loss: 0.1499 Grad: 55266.4453 LR: 8.0220e-05
Epoch: [5][150/276]Elapsed 22.59s | Loss: 0.1472 Grad: 61549.3398 LR: 7.8080e-05
Epoch: [5][200/276]Elapsed 30.05s | Loss: 0.1471 Grad: 42641.9102 LR: 7.5863e-05
Epoch: [5][250/276]Elapsed 37.52s | Loss: 0.1467 Grad: 46273.5820 LR: 7.3573e-05
Epoch: [5][275/276]Elapsed 41.31s | Loss: 0.1457 Grad: 55570.3203 LR: 7.2357e-05


Valid:   0%|          | 0/225 [00:00<?, ?batch/s]

Epoch: [5][0/225]Elapsed 0.11s | Loss: 0.3283
Epoch: [5][50/225]Elapsed 4.93s | Loss: 0.3791
Epoch: [5][100/225]Elapsed 9.74s | Loss: 0.3784
Epoch: [5][150/225]Elapsed 14.55s | Loss: 0.3809
Epoch: [5][200/225]Elapsed 19.40s | Loss: 0.3407


----------------------------------------------------------------------------------------------------
Epoch 5 - Average Train Loss: 0.1457 | Average Valid Loss: 0.3246 | Time: 63.18s
Best model found in epoch 5 | valid loss: 0.3246


Train:   0%|          | 0/276 [00:00<?, ?batch/s]

Epoch: [6][0/276]Elapsed 0.10s | Loss: 0.1826 Grad: inf LR: 7.2357e-05
Epoch: [6][50/276]Elapsed 7.54s | Loss: 0.1408 Grad: 51472.8164 LR: 6.9971e-05
Epoch: [6][100/276]Elapsed 15.04s | Loss: 0.1347 Grad: 72920.0938 LR: 6.7529e-05
Epoch: [6][150/276]Elapsed 22.52s | Loss: 0.1340 Grad: 39224.4062 LR: 6.5039e-05
Epoch: [6][200/276]Elapsed 29.98s | Loss: 0.1338 Grad: 44018.0234 LR: 6.2507e-05
Epoch: [6][250/276]Elapsed 37.43s | Loss: 0.1343 Grad: 77152.1328 LR: 5.9941e-05
Epoch: [6][275/276]Elapsed 41.22s | Loss: 0.1336 Grad: 45919.6172 LR: 5.8595e-05


Valid:   0%|          | 0/225 [00:00<?, ?batch/s]

Epoch: [6][0/225]Elapsed 0.10s | Loss: 0.3300
Epoch: [6][50/225]Elapsed 4.92s | Loss: 0.3788
Epoch: [6][100/225]Elapsed 9.76s | Loss: 0.3787
Epoch: [6][150/225]Elapsed 14.58s | Loss: 0.3819
Epoch: [6][200/225]Elapsed 19.42s | Loss: 0.3420


----------------------------------------------------------------------------------------------------
Epoch 6 - Average Train Loss: 0.1336 | Average Valid Loss: 0.3261 | Time: 63.14s
Fold 3 Valid Loss: 
Easy: 0.7617 | Hard: 0.3960
Elapse: 71.04 min 
Fold: 4 || Valid size 3595 
- First Stage 


Train:   0%|          | 0/622 [00:00<?, ?batch/s]

Epoch: [1][0/622]Elapsed 0.10s | Loss: 0.8062 Grad: 64099.4883 LR: 4.0000e-06
Epoch: [1][50/622]Elapsed 7.46s | Loss: 0.8166 Grad: 64757.6211 LR: 5.0647e-06
Epoch: [1][100/622]Elapsed 14.95s | Loss: 0.8108 Grad: 68396.2109 LR: 8.2116e-06
Epoch: [1][150/622]Elapsed 22.46s | Loss: 0.8050 Grad: 67837.3672 LR: 1.3301e-05
Epoch: [1][200/622]Elapsed 29.94s | Loss: 0.7990 Grad: 83437.7266 LR: 2.0107e-05
Epoch: [1][250/622]Elapsed 37.42s | Loss: 0.7874 Grad: 130117.7578 LR: 2.8328e-05
Epoch: [1][300/622]Elapsed 44.90s | Loss: 0.7765 Grad: 62074.4766 LR: 3.7599e-05
Epoch: [1][350/622]Elapsed 52.36s | Loss: 0.7621 Grad: 54183.6914 LR: 4.7509e-05
Epoch: [1][400/622]Elapsed 59.82s | Loss: 0.7455 Grad: 70138.4531 LR: 5.7619e-05
Epoch: [1][450/622]Elapsed 67.36s | Loss: 0.7277 Grad: 60115.5977 LR: 6.7479e-05
Epoch: [1][500/622]Elapsed 74.80s | Loss: 0.7119 Grad: 74453.3828 LR: 7.6652e-05
Epoch: [1][550/622]Elapsed 82.24s | Loss: 0.6976 Grad: 51040.0156 LR: 8.4732e-05
Epoch: [1][600/622]Elapsed 89.67

Valid:   0%|          | 0/225 [00:00<?, ?batch/s]

Epoch: [1][0/225]Elapsed 0.10s | Loss: 0.3510
Epoch: [1][50/225]Elapsed 4.94s | Loss: 0.4206
Epoch: [1][100/225]Elapsed 9.77s | Loss: 0.4339
Epoch: [1][150/225]Elapsed 14.60s | Loss: 0.4369
Epoch: [1][200/225]Elapsed 19.43s | Loss: 0.4102


----------------------------------------------------------------------------------------------------
Epoch 1 - Average Train Loss: 0.6764 | Average Valid Loss: 0.4033 | Time: 114.79s
Best model found in epoch 1 | valid loss: 0.4033


Train:   0%|          | 0/622 [00:00<?, ?batch/s]

Epoch: [2][0/622]Elapsed 0.10s | Loss: 0.5581 Grad: 130524.3203 LR: 9.3737e-05
Epoch: [2][50/622]Elapsed 7.55s | Loss: 0.4550 Grad: 26739.1973 LR: 9.7777e-05
Epoch: [2][100/622]Elapsed 15.06s | Loss: 0.4478 Grad: 41193.5508 LR: 9.9786e-05
Epoch: [2][150/622]Elapsed 22.58s | Loss: 0.4527 Grad: 58263.7109 LR: 9.9996e-05
Epoch: [2][200/622]Elapsed 30.07s | Loss: 0.4406 Grad: 60475.2500 LR: 9.9967e-05
Epoch: [2][250/622]Elapsed 37.54s | Loss: 0.4336 Grad: 36338.0977 LR: 9.9911e-05
Epoch: [2][300/622]Elapsed 45.01s | Loss: 0.4290 Grad: 41001.0625 LR: 9.9828e-05
Epoch: [2][350/622]Elapsed 52.47s | Loss: 0.4229 Grad: 40961.1641 LR: 9.9717e-05
Epoch: [2][400/622]Elapsed 59.92s | Loss: 0.4157 Grad: 73939.8125 LR: 9.9579e-05
Epoch: [2][450/622]Elapsed 67.37s | Loss: 0.4094 Grad: 25970.1621 LR: 9.9415e-05
Epoch: [2][500/622]Elapsed 74.80s | Loss: 0.4045 Grad: 51672.7227 LR: 9.9223e-05
Epoch: [2][550/622]Elapsed 82.22s | Loss: 0.4018 Grad: 28320.7012 LR: 9.9004e-05
Epoch: [2][600/622]Elapsed 89.67

Valid:   0%|          | 0/225 [00:00<?, ?batch/s]

Epoch: [2][0/225]Elapsed 0.10s | Loss: 0.3242
Epoch: [2][50/225]Elapsed 4.94s | Loss: 0.3646
Epoch: [2][100/225]Elapsed 9.76s | Loss: 0.3756
Epoch: [2][150/225]Elapsed 14.60s | Loss: 0.3777
Epoch: [2][200/225]Elapsed 19.45s | Loss: 0.3650


----------------------------------------------------------------------------------------------------
Epoch 2 - Average Train Loss: 0.3963 | Average Valid Loss: 0.3666 | Time: 114.82s
Best model found in epoch 2 | valid loss: 0.3666


Train:   0%|          | 0/622 [00:00<?, ?batch/s]

Epoch: [3][0/622]Elapsed 0.10s | Loss: 0.4644 Grad: 241798.8438 LR: 9.8642e-05
Epoch: [3][50/622]Elapsed 7.54s | Loss: 0.3166 Grad: 49238.7109 LR: 9.8358e-05
Epoch: [3][100/622]Elapsed 15.02s | Loss: 0.3159 Grad: 41999.6797 LR: 9.8048e-05
Epoch: [3][150/622]Elapsed 22.48s | Loss: 0.3286 Grad: 83208.7969 LR: 9.7711e-05
Epoch: [3][200/622]Elapsed 29.95s | Loss: 0.3230 Grad: 39225.8320 LR: 9.7349e-05
Epoch: [3][250/622]Elapsed 37.41s | Loss: 0.3187 Grad: 31834.4570 LR: 9.6960e-05
Epoch: [3][300/622]Elapsed 44.88s | Loss: 0.3176 Grad: 43387.1562 LR: 9.6546e-05
Epoch: [3][350/622]Elapsed 52.33s | Loss: 0.3173 Grad: 34426.2969 LR: 9.6106e-05
Epoch: [3][400/622]Elapsed 59.79s | Loss: 0.3116 Grad: 47810.3945 LR: 9.5642e-05
Epoch: [3][450/622]Elapsed 67.25s | Loss: 0.3086 Grad: 48099.4805 LR: 9.5152e-05
Epoch: [3][500/622]Elapsed 74.68s | Loss: 0.3069 Grad: 56328.7188 LR: 9.4638e-05
Epoch: [3][550/622]Elapsed 82.10s | Loss: 0.3065 Grad: 34446.9453 LR: 9.4099e-05
Epoch: [3][600/622]Elapsed 89.53

Valid:   0%|          | 0/225 [00:00<?, ?batch/s]

Epoch: [3][0/225]Elapsed 0.10s | Loss: 0.3509
Epoch: [3][50/225]Elapsed 4.96s | Loss: 0.3453
Epoch: [3][100/225]Elapsed 9.79s | Loss: 0.3538
Epoch: [3][150/225]Elapsed 14.64s | Loss: 0.3539
Epoch: [3][200/225]Elapsed 19.51s | Loss: 0.3513


----------------------------------------------------------------------------------------------------
Epoch 3 - Average Train Loss: 0.3046 | Average Valid Loss: 0.3552 | Time: 114.73s
Best model found in epoch 3 | valid loss: 0.3552


Train:   0%|          | 0/622 [00:00<?, ?batch/s]

Epoch: [4][0/622]Elapsed 0.11s | Loss: 0.3716 Grad: inf LR: 9.3281e-05
Epoch: [4][50/622]Elapsed 7.55s | Loss: 0.2592 Grad: 39982.3281 LR: 9.2683e-05
Epoch: [4][100/622]Elapsed 15.02s | Loss: 0.2568 Grad: 43399.5469 LR: 9.2063e-05
Epoch: [4][150/622]Elapsed 22.49s | Loss: 0.2720 Grad: 72681.0000 LR: 9.1420e-05
Epoch: [4][200/622]Elapsed 29.95s | Loss: 0.2677 Grad: 55051.9336 LR: 9.0754e-05
Epoch: [4][250/622]Elapsed 37.39s | Loss: 0.2655 Grad: 30065.7852 LR: 9.0065e-05
Epoch: [4][300/622]Elapsed 44.84s | Loss: 0.2635 Grad: 28986.1855 LR: 8.9355e-05
Epoch: [4][350/622]Elapsed 52.28s | Loss: 0.2626 Grad: 38871.4297 LR: 8.8624e-05
Epoch: [4][400/622]Elapsed 59.68s | Loss: 0.2585 Grad: 51782.9766 LR: 8.7871e-05
Epoch: [4][450/622]Elapsed 67.12s | Loss: 0.2553 Grad: 39287.0977 LR: 8.7097e-05
Epoch: [4][500/622]Elapsed 74.56s | Loss: 0.2537 Grad: 68903.0859 LR: 8.6303e-05
Epoch: [4][550/622]Elapsed 82.01s | Loss: 0.2535 Grad: 43316.2656 LR: 8.5490e-05
Epoch: [4][600/622]Elapsed 89.44s | Loss

Valid:   0%|          | 0/225 [00:00<?, ?batch/s]

Epoch: [4][0/225]Elapsed 0.10s | Loss: 0.3536
Epoch: [4][50/225]Elapsed 4.93s | Loss: 0.3441
Epoch: [4][100/225]Elapsed 9.76s | Loss: 0.3529
Epoch: [4][150/225]Elapsed 14.59s | Loss: 0.3543
Epoch: [4][200/225]Elapsed 19.43s | Loss: 0.3607


----------------------------------------------------------------------------------------------------
Epoch 4 - Average Train Loss: 0.2515 | Average Valid Loss: 0.3685 | Time: 114.57s


Train:   0%|          | 0/622 [00:00<?, ?batch/s]

Epoch: [5][0/622]Elapsed 0.10s | Loss: 0.4106 Grad: nan LR: 8.4284e-05
Epoch: [5][50/622]Elapsed 7.55s | Loss: 0.2286 Grad: 31240.6348 LR: 8.3424e-05
Epoch: [5][100/622]Elapsed 15.07s | Loss: 0.2214 Grad: 50218.8359 LR: 8.2546e-05
Epoch: [5][150/622]Elapsed 22.57s | Loss: 0.2280 Grad: 48293.3633 LR: 8.1650e-05
Epoch: [5][200/622]Elapsed 30.05s | Loss: 0.2246 Grad: 46123.1484 LR: 8.0736e-05
Epoch: [5][250/622]Elapsed 37.51s | Loss: 0.2222 Grad: 37696.8281 LR: 7.9806e-05
Epoch: [5][300/622]Elapsed 44.96s | Loss: 0.2197 Grad: 37286.2188 LR: 7.8859e-05
Epoch: [5][350/622]Elapsed 52.41s | Loss: 0.2181 Grad: 35065.7109 LR: 7.7897e-05
Epoch: [5][400/622]Elapsed 59.85s | Loss: 0.2138 Grad: 51238.9414 LR: 7.6920e-05
Epoch: [5][450/622]Elapsed 67.27s | Loss: 0.2110 Grad: 34550.2500 LR: 7.5927e-05
Epoch: [5][500/622]Elapsed 74.69s | Loss: 0.2098 Grad: 44126.2031 LR: 7.4921e-05
Epoch: [5][550/622]Elapsed 82.09s | Loss: 0.2105 Grad: 47607.3906 LR: 7.3901e-05
Epoch: [5][600/622]Elapsed 89.50s | Loss

Valid:   0%|          | 0/225 [00:00<?, ?batch/s]

Epoch: [5][0/225]Elapsed 0.10s | Loss: 0.3456
Epoch: [5][50/225]Elapsed 4.94s | Loss: 0.3315
Epoch: [5][100/225]Elapsed 9.78s | Loss: 0.3382
Epoch: [5][150/225]Elapsed 14.62s | Loss: 0.3407
Epoch: [5][200/225]Elapsed 19.46s | Loss: 0.3518


----------------------------------------------------------------------------------------------------
Epoch 5 - Average Train Loss: 0.2098 | Average Valid Loss: 0.3619 | Time: 114.63s


Train:   0%|          | 0/622 [00:00<?, ?batch/s]

Epoch: [6][0/622]Elapsed 0.10s | Loss: 0.5590 Grad: nan LR: 7.2409e-05
Epoch: [6][50/622]Elapsed 7.50s | Loss: 0.2009 Grad: 86461.4688 LR: 7.1358e-05
Epoch: [6][100/622]Elapsed 15.00s | Loss: 0.1926 Grad: 69057.2734 LR: 7.0296e-05
Epoch: [6][150/622]Elapsed 22.46s | Loss: 0.1959 Grad: 58490.1211 LR: 6.9222e-05
Epoch: [6][200/622]Elapsed 29.94s | Loss: 0.1914 Grad: 41005.5078 LR: 6.8138e-05
Epoch: [6][250/622]Elapsed 37.38s | Loss: 0.1906 Grad: 36226.3125 LR: 6.7044e-05
Epoch: [6][300/622]Elapsed 44.81s | Loss: 0.1900 Grad: 31871.2930 LR: 6.5940e-05
Epoch: [6][350/622]Elapsed 52.22s | Loss: 0.1881 Grad: 27824.7773 LR: 6.4828e-05
Epoch: [6][400/622]Elapsed 59.64s | Loss: 0.1849 Grad: 57660.2070 LR: 6.3708e-05
Epoch: [6][450/622]Elapsed 67.07s | Loss: 0.1820 Grad: 60556.5977 LR: 6.2581e-05
Epoch: [6][500/622]Elapsed 74.49s | Loss: 0.1808 Grad: 42580.4648 LR: 6.1446e-05
Epoch: [6][550/622]Elapsed 81.91s | Loss: 0.1807 Grad: 34584.0781 LR: 6.0305e-05
Epoch: [6][600/622]Elapsed 89.34s | Loss

Valid:   0%|          | 0/225 [00:00<?, ?batch/s]

Epoch: [6][0/225]Elapsed 0.10s | Loss: 0.3349
Epoch: [6][50/225]Elapsed 4.94s | Loss: 0.3431
Epoch: [6][100/225]Elapsed 9.78s | Loss: 0.3539
Epoch: [6][150/225]Elapsed 14.62s | Loss: 0.3566
Epoch: [6][200/225]Elapsed 19.46s | Loss: 0.3792


----------------------------------------------------------------------------------------------------
Epoch 6 - Average Train Loss: 0.1793 | Average Valid Loss: 0.3928 | Time: 114.47s
Fold 4 Valid Loss: 
Easy: 0.7140 | Hard: 0.7024
Elapse: 82.52 min 
- Second Stage 
Use Checkpoint: ENet_b2_xymask_fold_4_stage_1.pth


Train:   0%|          | 0/276 [00:00<?, ?batch/s]

Epoch: [1][0/276]Elapsed 0.10s | Loss: 0.6478 Grad: nan LR: 4.0000e-06




Epoch: [1][50/276]Elapsed 7.47s | Loss: 0.4755 Grad: 73264.0547 LR: 9.3614e-06
Epoch: [1][100/276]Elapsed 14.95s | Loss: 0.4353 Grad: 60728.8359 LR: 2.4248e-05
Epoch: [1][150/276]Elapsed 22.41s | Loss: 0.4069 Grad: 42559.7852 LR: 4.5334e-05
Epoch: [1][200/276]Elapsed 29.87s | Loss: 0.3775 Grad: 22504.7949 LR: 6.7909e-05
Epoch: [1][250/276]Elapsed 37.31s | Loss: 0.3518 Grad: 29975.4609 LR: 8.6930e-05
Epoch: [1][275/276]Elapsed 41.09s | Loss: 0.3400 Grad: 34761.2227 LR: 9.3946e-05


Valid:   0%|          | 0/225 [00:00<?, ?batch/s]

Epoch: [1][0/225]Elapsed 0.10s | Loss: 0.3445
Epoch: [1][50/225]Elapsed 4.95s | Loss: 0.3856
Epoch: [1][100/225]Elapsed 9.80s | Loss: 0.3824
Epoch: [1][150/225]Elapsed 14.66s | Loss: 0.3882
Epoch: [1][200/225]Elapsed 19.55s | Loss: 0.3500


----------------------------------------------------------------------------------------------------
Epoch 1 - Average Train Loss: 0.3400 | Average Valid Loss: 0.3379 | Time: 63.14s
Best model found in epoch 1 | valid loss: 0.3379


Train:   0%|          | 0/276 [00:00<?, ?batch/s]

Epoch: [2][0/276]Elapsed 0.11s | Loss: 0.2406 Grad: 115981.2266 LR: 9.3946e-05
Epoch: [2][50/276]Elapsed 7.54s | Loss: 0.2181 Grad: 77954.6094 LR: 9.9978e-05
Epoch: [2][100/276]Elapsed 15.01s | Loss: 0.2122 Grad: 44986.3750 LR: 9.9939e-05
Epoch: [2][150/276]Elapsed 22.48s | Loss: 0.2100 Grad: 35845.9648 LR: 9.9740e-05
Epoch: [2][200/276]Elapsed 29.96s | Loss: 0.2069 Grad: 27692.3320 LR: 9.9403e-05
Epoch: [2][250/276]Elapsed 37.43s | Loss: 0.2032 Grad: 40004.0781 LR: 9.8929e-05
Epoch: [2][275/276]Elapsed 41.23s | Loss: 0.2018 Grad: 70791.1797 LR: 9.8628e-05


Valid:   0%|          | 0/225 [00:00<?, ?batch/s]

Epoch: [2][0/225]Elapsed 0.11s | Loss: 0.3264
Epoch: [2][50/225]Elapsed 4.96s | Loss: 0.3772
Epoch: [2][100/225]Elapsed 9.80s | Loss: 0.3752
Epoch: [2][150/225]Elapsed 14.65s | Loss: 0.3801
Epoch: [2][200/225]Elapsed 19.50s | Loss: 0.3406


----------------------------------------------------------------------------------------------------
Epoch 2 - Average Train Loss: 0.2018 | Average Valid Loss: 0.3272 | Time: 63.24s
Best model found in epoch 2 | valid loss: 0.3272


Train:   0%|          | 0/276 [00:00<?, ?batch/s]

Epoch: [3][0/276]Elapsed 0.12s | Loss: 0.2082 Grad: 100684.0938 LR: 9.8628e-05
Epoch: [3][50/276]Elapsed 7.57s | Loss: 0.1883 Grad: 100948.7969 LR: 9.7948e-05
Epoch: [3][100/276]Elapsed 15.10s | Loss: 0.1818 Grad: 43117.1562 LR: 9.7135e-05
Epoch: [3][150/276]Elapsed 22.58s | Loss: 0.1807 Grad: 49223.4688 LR: 9.6191e-05
Epoch: [3][200/276]Elapsed 30.05s | Loss: 0.1795 Grad: 34724.5000 LR: 9.5119e-05
Epoch: [3][250/276]Elapsed 37.52s | Loss: 0.1788 Grad: 40839.2070 LR: 9.3922e-05
Epoch: [3][275/276]Elapsed 41.32s | Loss: 0.1774 Grad: 52842.1133 LR: 9.3251e-05


Valid:   0%|          | 0/225 [00:00<?, ?batch/s]

Epoch: [3][0/225]Elapsed 0.10s | Loss: 0.3086
Epoch: [3][50/225]Elapsed 4.96s | Loss: 0.3617
Epoch: [3][100/225]Elapsed 9.82s | Loss: 0.3623
Epoch: [3][150/225]Elapsed 14.68s | Loss: 0.3671
Epoch: [3][200/225]Elapsed 19.56s | Loss: 0.3301


----------------------------------------------------------------------------------------------------
Epoch 3 - Average Train Loss: 0.1774 | Average Valid Loss: 0.3175 | Time: 63.39s
Best model found in epoch 3 | valid loss: 0.3175


Train:   0%|          | 0/276 [00:00<?, ?batch/s]

Epoch: [4][0/276]Elapsed 0.12s | Loss: 0.1751 Grad: 88973.1016 LR: 9.3251e-05
Epoch: [4][50/276]Elapsed 7.56s | Loss: 0.1718 Grad: 104837.1797 LR: 9.1870e-05
Epoch: [4][100/276]Elapsed 15.05s | Loss: 0.1679 Grad: 63472.2344 LR: 9.0373e-05
Epoch: [4][150/276]Elapsed 22.53s | Loss: 0.1658 Grad: 37884.9727 LR: 8.8763e-05
Epoch: [4][200/276]Elapsed 30.00s | Loss: 0.1646 Grad: 29160.5352 LR: 8.7047e-05
Epoch: [4][250/276]Elapsed 37.44s | Loss: 0.1621 Grad: 29004.7734 LR: 8.5227e-05
Epoch: [4][275/276]Elapsed 41.25s | Loss: 0.1613 Grad: 49365.8945 LR: 8.4242e-05


Valid:   0%|          | 0/225 [00:00<?, ?batch/s]

Epoch: [4][0/225]Elapsed 0.11s | Loss: 0.3341
Epoch: [4][50/225]Elapsed 4.97s | Loss: 0.3680
Epoch: [4][100/225]Elapsed 9.82s | Loss: 0.3655
Epoch: [4][150/225]Elapsed 14.66s | Loss: 0.3706
Epoch: [4][200/225]Elapsed 19.51s | Loss: 0.3331


----------------------------------------------------------------------------------------------------
Epoch 4 - Average Train Loss: 0.1613 | Average Valid Loss: 0.3202 | Time: 63.26s


Train:   0%|          | 0/276 [00:00<?, ?batch/s]

Epoch: [5][0/276]Elapsed 0.10s | Loss: 0.2052 Grad: 114942.8828 LR: 8.4242e-05
Epoch: [5][50/276]Elapsed 7.55s | Loss: 0.1584 Grad: 159977.8750 LR: 8.2275e-05
Epoch: [5][100/276]Elapsed 15.06s | Loss: 0.1531 Grad: 92996.0312 LR: 8.0220e-05
Epoch: [5][150/276]Elapsed 22.58s | Loss: 0.1517 Grad: 87903.6641 LR: 7.8080e-05
Epoch: [5][200/276]Elapsed 30.06s | Loss: 0.1497 Grad: 76490.6094 LR: 7.5863e-05
Epoch: [5][250/276]Elapsed 37.52s | Loss: 0.1482 Grad: 94272.1094 LR: 7.3573e-05
Epoch: [5][275/276]Elapsed 41.31s | Loss: 0.1470 Grad: 130012.3203 LR: 7.2357e-05


Valid:   0%|          | 0/225 [00:00<?, ?batch/s]

Epoch: [5][0/225]Elapsed 0.10s | Loss: 0.3320
Epoch: [5][50/225]Elapsed 4.94s | Loss: 0.3766
Epoch: [5][100/225]Elapsed 9.83s | Loss: 0.3732
Epoch: [5][150/225]Elapsed 14.71s | Loss: 0.3801
Epoch: [5][200/225]Elapsed 19.57s | Loss: 0.3408


----------------------------------------------------------------------------------------------------
Epoch 5 - Average Train Loss: 0.1470 | Average Valid Loss: 0.3270 | Time: 63.37s


Train:   0%|          | 0/276 [00:00<?, ?batch/s]

Epoch: [6][0/276]Elapsed 0.10s | Loss: 0.1284 Grad: 106348.1250 LR: 7.2357e-05
Epoch: [6][50/276]Elapsed 7.53s | Loss: 0.1407 Grad: 93076.1172 LR: 6.9971e-05
Epoch: [6][100/276]Elapsed 15.01s | Loss: 0.1366 Grad: 55417.1328 LR: 6.7529e-05
Epoch: [6][150/276]Elapsed 22.49s | Loss: 0.1379 Grad: 100059.5703 LR: 6.5039e-05
Epoch: [6][200/276]Elapsed 29.95s | Loss: 0.1370 Grad: 56523.1875 LR: 6.2507e-05
Epoch: [6][250/276]Elapsed 37.41s | Loss: 0.1353 Grad: 77437.1172 LR: 5.9941e-05
Epoch: [6][275/276]Elapsed 41.20s | Loss: 0.1346 Grad: 159898.2812 LR: 5.8595e-05


Valid:   0%|          | 0/225 [00:00<?, ?batch/s]

Epoch: [6][0/225]Elapsed 0.10s | Loss: 0.3241
Epoch: [6][50/225]Elapsed 4.94s | Loss: 0.3623
Epoch: [6][100/225]Elapsed 9.78s | Loss: 0.3599
Epoch: [6][150/225]Elapsed 14.62s | Loss: 0.3651
Epoch: [6][200/225]Elapsed 19.46s | Loss: 0.3297


----------------------------------------------------------------------------------------------------
Epoch 6 - Average Train Loss: 0.1346 | Average Valid Loss: 0.3172 | Time: 63.15s
Best model found in epoch 6 | valid loss: 0.3172
Fold 4 Valid Loss: 
Easy: 0.7334 | Hard: 0.4122
Elapse: 88.86 min 


IndexError: invalid index of a 0-dim tensor. Use `tensor.item()` in Python or `tensor.item<T>()` in C++ to convert a 0-dim tensor to a number

In [None]:
dataset = CustomDataset(train_easy, TARGETS, ModelConfig, all_specs, all_eegs, mode='test')

X, y = dataset[0]
print(X.shape, y.shape)

model = CustomModel(ModelConfig, num_classes=6, pretrained=True)
y_pred = model(X.unsqueeze(0))

print(y_pred.shape)

In [None]:
from kl_divergence import score as kl_score


def calc_kl_div(p, q, criterion):
    
    p = torch.tensor(p.astype(np.float32)).unsqueeze(0)
    q = torch.tensor(q.astype(np.float32)).unsqueeze(0)
    return criterion(F.log_softmax(p, dim=1), q).item()

def calc_kaggle_score(solution, submission):
    solution = solution.to_frame().T
    solution[TARGETS] = solution[TARGETS].astype(np.float32)
    submission = submission.to_frame().T
    submission.columns = ['eeg_id'] + TARGETS
    submission[TARGETS] = submission[TARGETS].astype(np.float32)
    
    return kl_score(solution, submission, 'eeg_id')

In [None]:
def evaluate_oof(oof_csv_path):
    oof_df = pd.read_csv(oof_csv_path)
    softmax = nn.Softmax(dim=1)
    criterion = nn.KLDivLoss(reduction="batchmean")

    oof_df["kl_loss"] = oof_df.apply(lambda row: 
        calc_kl_div(row[TARGETS_PRED].values, row[TARGETS].values, criterion), axis=1
        )

    kl_loss_all = criterion(
        F.log_softmax(torch.tensor(oof_df[TARGETS_PRED].values.astype(np.float32)), dim=1),
        torch.tensor(oof_df[TARGETS].values.astype(np.float32)),
        )

    print(f"KL Loss All: {kl_loss_all}")
    print(f"KL Loss Mean: {oof_df['kl_loss'].mean()}")

    y_pred = oof_df[TARGETS].values.astype(np.float32)
    oof_df[TARGETS_PRED] = softmax(torch.tensor(y_pred)).numpy()

    solution = oof_df[['eeg_id'] + TARGETS].copy()
    submission = oof_df[['eeg_id'] + TARGETS_PRED].copy()
    submission.columns = ['eeg_id'] + TARGETS

    kaggle_score_all = kl_score(solution, submission, 'eeg_id')
    
    oof_df['kaggle_score'] = oof_df.apply(lambda row:
        calc_kaggle_score(row[['eeg_id'] + TARGETS], row[['eeg_id'] + TARGETS_PRED]), axis=1
        )

    print(f"Kaggle Score All: {kaggle_score_all}")
    print(f"Kaggle Score Mean: {oof_df['kaggle_score'].mean()}")

    return oof_df, kl_loss_all, kaggle_score_all


In [None]:
oof_1, kl_loss_all, kaggle_score_all = evaluate_oof(f"{JobConfig.OUTPUT_DIR}/oof_1.csv")
oof_2, kl_loss_all, kaggle_score_all = evaluate_oof(f"{JobConfig.OUTPUT_DIR}/oof_2.csv")

In [None]:
fig, axes = plt.subplots(4, 4, figsize=(10, 10), sharex=True, sharey=True)

# rows = oof_df.iloc[-len(axes.ravel()):, :]
rows = oof_1.sample(len(axes.ravel()))

for i, (idx, row) in enumerate(rows.iterrows()):

    ax = axes.ravel()[i]
    ax.plot(row[TARGETS].values, label='True')
    ax.plot(row[TARGETS_PRED].values, label='Pred')
    ax.set_title(f"{idx} | {row['target']} | KL: {row['kl_loss']:.4f}")
    ax.set_xticks(range(6))
    ax.set_xticklabels(BRAIN_ACTIVITY)
    ax.grid(True)
    ax.legend()

fig.tight_layout()
fig.savefig(f"{JobConfig.OUTPUT_DIR}/oof_examples_1.png")
plt.show()

In [None]:
fig, axes = plt.subplots(4, 4, figsize=(10, 10), sharex=True, sharey=True)

# rows = oof_2.iloc[5:5+len(axes.ravel()), :]
rows = oof_2.sample(len(axes.ravel()))

for i, (idx, row) in enumerate(rows.iterrows()):

    ax = axes.ravel()[i]
    y_true = row[TARGETS].values
    y_pred = row[TARGETS_PRED].values
    y_norm = (y_true - y_true.min()) / (y_true.max() - y_true.min())

    ax.plot(row[TARGETS].values, label='True')
    ax.plot(row[TARGETS_PRED].values, label='Pred')
    ax.plot(y_norm, "b:", label='True Norm')

    ax.set_title(f"{idx} | {row['target']} | KL: {row['kl_loss']:.4f}")
    ax.set_xticks(range(6))
    ax.set_xticklabels(BRAIN_ACTIVITY)
    ax.grid(True)
    ax.legend()

fig.tight_layout()
fig.savefig(f"{JobConfig.OUTPUT_DIR}/oof_examples_2.png")
plt.show()

In [None]:
row = oof_2.loc[6]

min_pred = row[TARGETS_PRED].min()
max_pred = row[TARGETS_PRED].max()
print(min_pred, max_pred)

print(row[TARGETS_PRED])

targets_norm = (row[TARGETS] - row[TARGETS].min()) / (row[TARGETS].max() - row[TARGETS].min())

targets_norm = targets_norm / targets_norm.sum()

print(targets_norm)