In [1]:
import torch
import torch.nn.functional as F
import pandas
import torchaudio
import torchaudio.transforms as T
import os
from ..src.utils.datasets import CommonVoiceDataset

from torch.utils.data import Dataset, DataLoader
#from sklearn.model_selection import train_test_split
torchaudio.set_audio_backend("soundfile")

In [11]:
from torchmetrics import Metric
from torch import Tensor
class ExpectedCost(Metric):
    """
    Wrapper on the expected cost as defined in https://github.com/luferrer/expected_cost
    """
    def __init__(self,task = "binary", costs=None, priors=None, sample_weight=None, adjusted=False,num_classes = 2):
        super().__init__()
        self.conf = ConfusionMatrix(task=task,num_classes=num_classes)
        if costs is None:
            
            costs = 1-torch.eye(num_classes)
            
            self.costs = costs
        else:
            self.costs = costs
        self.priors = priors
        self.sample_weight = sample_weight
        self.adjusted=adjusted
        self.num_classes = num_classes
        
            
        self.add_state("bincounts", default=torch.zeros(num_classes), dist_reduce_fx="sum")
        self.add_state("matrix", default=torch.zeros((num_classes,num_classes)), dist_reduce_fx="sum")
        
    def _input_format(self,preds,target):
        return preds,target
    def update(self, preds: Tensor, target: Tensor):
        preds, target = self._input_format(preds, target)
        
        assert preds.shape == target.shape
        self.matrix = self.conf(preds,target)
        
        self.bincounts = torch.bincount(target,minlength=self.num_classes)
        
    def compute(self):
        self.matrix = self.matrix/self.matrix.sum(axis=1, keepdims=True)
        priors = (self.bincounts/self.bincounts.sum()).unsqueeze(0).T
        if self.adjusted:
            # When adjusted is true, normalize the average cost
            # with the cost of a naive system that always makes
            # the min cost decision.
            norm_value = torch.min(torch.dot(priors.T, self.costs))
        else:
            norm_value = 1.0
                
        ave_cost = torch.sum(priors * self.costs * self.matrix)

        return ave_cost / norm_value

### Common Voice (gender)

In [5]:
dataset_train = CommonVoiceDataset(mode="train")


Cleaning.......OK


In [6]:
dataset_valid = CommonVoiceDataset(mode="validation")

Cleaning.......OK


In [3]:
dataset_test = CommonVoiceDataset(mode="test")

Cleaning.......OK


### Dataset test

In [8]:
import IPython.display as ipd


print(f"Length of train set: {len(dataset_train)}")

print(f"Length of valid set: {len(dataset_valid)}")

Length of train set: 364831
Length of valid set: 2566


In [6]:
audio,label = dataset_train.__getitem__(1034)
print(label)
ipd.display(ipd.Audio(audio, rate=16000))

tensor(1)


## Load NMF-based multilabel segmentation model

In [1]:
!python -m pip install numpy==1.23.5

Collecting numpy==1.23.5
  Using cached numpy-1.23.5-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (17.1 MB)
Installing collected packages: numpy
  Attempting uninstall: numpy
    Found existing installation: numpy 1.26.2
    Uninstalling numpy-1.26.2:
      Successfully uninstalled numpy-1.26.2
[31mERROR: Could not install packages due to an OSError: [Errno 16] Périphérique ou ressource occupé: '.nfs000000000a1f059900000002'
[0m[31m
[0m

In [9]:
import yaml
from pyannote.audio import Model

w_nmf = torch.load("/lium/raid01_b/tmario/phD/JSALT2023/src/1.explainability/nmf/src/nmf/w_nmf_256_1024_nn_augment.pt")
with open("/lium/raid01_b/tmario/phD/JSALT2023/src/1.explainability/nmf/config_wavlm.yaml","rb") as fh:
    cfg = yaml.safe_load(fh)
model = Model.from_pretrained("/lium/raid01_b/tmario/phD/JSALT2023/src/1.explainability/nmf/3MASNMF/augment/X.Segmentation.Full_WP2-augment-10.0-1.0-0.1-augTrue/models/23_11_14.ckpt",
                              w_nmf=w_nmf,
                              wavlm_cfg=cfg["model"]["wavlm"],
                              spec_kw={"win_length":1024, "hop_length":320,"n_fft":1024})
model.freeze()

Type of NMF matrix: <class 'torch.Tensor'> torch.Size([513, 256])


Using cache found in /lium/home/mlebour/.cache/torch/hub/s3prl_s3prl_main


In [4]:
import yaml
from pyannote.audio import Model

w_nmf = torch.load("/lium/raid01_b/tmario/phD/JSALT2023/src/1.explainability/nmf/src/nmf/w_nmf_256_1024_nn_augment.pt")
with open("/lium/raid01_b/tmario/phD/JSALT2023/src/1.explainability/nmf/config_wavlm.yaml","rb") as fh:
    cfg = yaml.safe_load(fh)
model = Model.from_pretrained("/lium/raid01_b/tmario/phD/JSALT2023/src/1.explainability/nmf/3MASNMF/make_it_sparser_256/X.Segmentation.Full_WP2-make_it_sparser_256-10.0-5.0-0.1-augTrue/models/23_12_12.ckpt",
                              w_nmf=w_nmf,
                              wavlm_cfg=cfg["model"]["wavlm"],
                              spec_kw={"win_length":1024, "hop_length":320,"n_fft":1024})
model.freeze()

Type of NMF matrix: <class 'torch.Tensor'> torch.Size([513, 256])


Using cache found in /lium/home/mlebour/.cache/torch/hub/s3prl_s3prl_main


### Classifier instance

In [93]:
#nmf_order=256
#n_classes = len(dataset_train.get_classes())
#classif_probe=torch.nn.Linear(nmf_order,n_classes,bias=True)

In [8]:
class Probe(torch.nn.Module):
    
    def __init__(self,input_size=256,hidden_size=128,output_size=2):
        super().__init__()
        self.probe = torch.nn.Sequential(torch.nn.Linear(2*input_size,hidden_size),
                            torch.nn.ReLU(),
                            torch.nn.Linear(hidden_size,output_size))
        
    def forward(self,x):
        x_mean = x.mean(dim=1)
        x_std = x.std(dim=1)
        
        y = self.probe(torch.cat([x_mean,x_std],dim=-1))
        
        return y
        
        

In [6]:
class LinProbe(torch.nn.Module):
    
    def __init__(self,input_size=256,output_size=10):
        super().__init__()
        self.probe = torch.nn.Sequential(torch.nn.Linear(2*input_size,output_size))
    
    def forward(self,x):
        x_mean = x.mean(dim=1)
        x_std = x.std(dim=1)
        
        y = self.probe(torch.cat([x_mean,x_std],dim=-1))
        
        return y

In [5]:
class LinProbe(torch.nn.Module):
    
    def __init__(self,input_size=256,output_size=10,agreg = "mean_std"):
        super().__init__()
        self.agreg = agreg
        weight = 1 if agreg in ["mean","std"] else 2
        
        self.probe = torch.nn.Sequential(torch.nn.Linear(weight*input_size,output_size))
    
    def forward(self,x):
        x_mean = None
        x_std = None
        if "mean" in self.agreg:
            x_mean = x.mean(dim=1)
        if "std" in self.agreg:
            x_std = x.std(dim=1)
        if x_mean is not None and x_std is not None:
            input_x = torch.cat([x_mean,x_std],dim=-1)
        elif x_mean is not None:
            input_x = x_mean
        elif x_std is not None:
            input_x = x_std
        
        y = self.probe(input_x)
        
        return y

In [7]:
from tqdm import tqdm

def train_epoch(seg_model,classif_probe,dataloader,loss_fun,optimizer,device="cpu"):
    avg_loss=0
    last_avg_loss=0
    for batch_idx, (data,label) in enumerate(tqdm(dataloader)):    
        optimizer.zero_grad()
        
        with torch.no_grad():
            feat = seg_model.wavlm(data.to(device))
            emb = seg_model.emb_transform(feat)
        
        logits = classif_probe(emb.permute(0,2,1))
        
        loss = loss_fun(logits,label.to(device))
        loss.backward()
        optimizer.step()
        
        avg_loss += loss
        
        if batch_idx%10 == 0:
            avg_loss/=10
            last_avg_loss=avg_loss
            print(f"Train loss: {avg_loss}")
            avg_loss=0
        
    return classif_probe, last_avg_loss

def valid_epoch(seg_model,classif_probe,dataloader,loss_fun,device="cpu"):
    valid_loss=0.0
    for batch_idx, (data, label) in enumerate(tqdm(dataloader,desc="Validation")):        
        
        with torch.no_grad():
            feat = seg_model.wavlm(data.to(device))
            emb = seg_model.emb_transform(feat)
        
        logits = classif_probe(emb.permute(0,2,1))   
        
        valid_loss += loss_fun(logits,label.to(device))
        
    return valid_loss/(batch_idx+1)

In [None]:
n_epoch = 50

classif_probe = LinProbe(input_size=256,output_size=2)

train_loader= DataLoader(dataset_train, batch_size=64, shuffle=True,num_workers=10)
valid_loader = DataLoader(dataset_valid, batch_size=64, shuffle=False,num_workers=10)

loss_fn = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(classif_probe.parameters(),lr=0.01)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model.to(device)
classif_probe.to(device)

best_valid = 1e8

train_loss = []
valid_loss = []
for epoch in range(1,n_epoch+1):
    print(f"Epoch {epoch}")
    classif_probe, train_loss_t = train_epoch(seg_model=model,classif_probe=classif_probe,dataloader=train_loader,loss_fun=loss_fn,optimizer=optimizer,device=device)
    valid_loss_t = valid_epoch(seg_model=model,classif_probe=classif_probe,dataloader=valid_loader,loss_fun=loss_fn,device=device)
    print(f"Validation loss = {valid_loss}")
    
    train_loss.append(train_loss_t)
    valid_loss.append(valid_loss_t)
    
    if valid_loss_t < best_valid:
        torch.save(classif_probe.state_dict(), "gender_probe.pt")
        best_valid = valid_loss_t
    

Epoch 1


  0%|          | 1/5701 [00:12<19:51:49, 12.55s/it]

Train loss: 0.06980812549591064


  0%|          | 11/5701 [00:23<3:39:22,  2.31s/it]

Train loss: 0.4540952146053314


  0%|          | 21/5701 [00:35<3:57:29,  2.51s/it]

Train loss: 0.41364535689353943


  1%|          | 31/5701 [00:46<3:48:36,  2.42s/it]

Train loss: 0.44970670342445374


  1%|          | 41/5701 [00:57<3:13:29,  2.05s/it]

Train loss: 0.3920177221298218


  1%|          | 51/5701 [01:09<4:00:52,  2.56s/it]

Train loss: 0.40775853395462036


  1%|          | 61/5701 [01:20<3:52:42,  2.48s/it]

Train loss: 0.4037185311317444


  1%|          | 71/5701 [01:32<3:41:56,  2.37s/it]

Train loss: 0.4072030484676361


  1%|▏         | 81/5701 [01:43<3:32:37,  2.27s/it]

Train loss: 0.4361262321472168


  2%|▏         | 91/5701 [01:54<3:23:00,  2.17s/it]

Train loss: 0.3995996117591858


  2%|▏         | 101/5701 [02:05<3:26:03,  2.21s/it]

Train loss: 0.3957160413265228


  2%|▏         | 111/5701 [02:16<3:41:25,  2.38s/it]

Train loss: 0.3607637882232666


  2%|▏         | 121/5701 [02:28<3:43:16,  2.40s/it]

Train loss: 0.3884222209453583


  2%|▏         | 131/5701 [02:39<3:22:44,  2.18s/it]

Train loss: 0.3858530819416046


  2%|▏         | 141/5701 [02:50<3:36:22,  2.33s/it]

Train loss: 0.38827836513519287


  3%|▎         | 151/5701 [03:02<3:45:52,  2.44s/it]

Train loss: 0.36284565925598145


  3%|▎         | 161/5701 [03:13<3:33:54,  2.32s/it]

Train loss: 0.3624430000782013


  3%|▎         | 171/5701 [03:24<3:21:28,  2.19s/it]

Train loss: 0.4107716977596283


  3%|▎         | 181/5701 [03:35<3:19:54,  2.17s/it]

Train loss: 0.36683961749076843


  3%|▎         | 191/5701 [03:46<3:32:43,  2.32s/it]

Train loss: 0.37030601501464844


  4%|▎         | 201/5701 [03:57<3:24:06,  2.23s/it]

Train loss: 0.3852930963039398


  4%|▎         | 211/5701 [04:07<3:09:57,  2.08s/it]

Train loss: 0.35552236437797546


  4%|▍         | 221/5701 [04:18<3:17:22,  2.16s/it]

Train loss: 0.3207571506500244


  4%|▍         | 231/5701 [04:29<3:00:37,  1.98s/it]

Train loss: 0.3269231617450714


  4%|▍         | 241/5701 [04:40<2:42:59,  1.79s/it]

Train loss: 0.3533475995063782


  4%|▍         | 251/5701 [04:50<2:14:56,  1.49s/it]

Train loss: 0.3704613149166107


  5%|▍         | 261/5701 [05:02<2:45:09,  1.82s/it]

Train loss: 0.3397768437862396


  5%|▍         | 271/5701 [05:14<3:02:53,  2.02s/it]

Train loss: 0.37295326590538025


  5%|▍         | 281/5701 [05:25<2:25:14,  1.61s/it]

Train loss: 0.3505987823009491


  5%|▌         | 291/5701 [05:36<2:14:05,  1.49s/it]

Train loss: 0.34021979570388794


  5%|▌         | 301/5701 [05:47<2:22:12,  1.58s/it]

Train loss: 0.37510165572166443


  5%|▌         | 311/5701 [05:59<2:49:10,  1.88s/it]

Train loss: 0.3651340901851654


  6%|▌         | 321/5701 [06:10<2:24:18,  1.61s/it]

Train loss: 0.3094501197338104


  6%|▌         | 331/5701 [06:21<2:16:08,  1.52s/it]

Train loss: 0.36700671911239624


  6%|▌         | 341/5701 [06:32<2:10:08,  1.46s/it]

Train loss: 0.3817783296108246


  6%|▌         | 351/5701 [06:43<2:10:57,  1.47s/it]

Train loss: 0.3961728513240814


  6%|▋         | 361/5701 [06:54<2:18:17,  1.55s/it]

Train loss: 0.3790450394153595


  7%|▋         | 371/5701 [07:05<2:24:30,  1.63s/it]

Train loss: 0.3479795455932617


  7%|▋         | 381/5701 [07:17<2:41:02,  1.82s/it]

Train loss: 0.3175511658191681


  7%|▋         | 391/5701 [07:27<2:32:45,  1.73s/it]

Train loss: 0.3684385418891907


  7%|▋         | 401/5701 [07:39<2:33:34,  1.74s/it]

Train loss: 0.34536635875701904


  7%|▋         | 411/5701 [07:50<2:32:15,  1.73s/it]

Train loss: 0.3812594413757324


  7%|▋         | 421/5701 [08:01<2:30:35,  1.71s/it]

Train loss: 0.34896978735923767


  8%|▊         | 431/5701 [08:13<2:36:51,  1.79s/it]

Train loss: 0.3665807247161865


  8%|▊         | 441/5701 [08:24<2:44:56,  1.88s/it]

Train loss: 0.31894350051879883


  8%|▊         | 451/5701 [08:36<3:02:12,  2.08s/it]

Train loss: 0.3695865571498871


  8%|▊         | 461/5701 [08:48<3:09:36,  2.17s/it]

Train loss: 0.3343559205532074


  8%|▊         | 471/5701 [09:00<3:07:23,  2.15s/it]

Train loss: 0.36165136098861694


  8%|▊         | 481/5701 [09:11<2:59:29,  2.06s/it]

Train loss: 0.3150061070919037


  9%|▊         | 491/5701 [09:22<2:34:57,  1.78s/it]

Train loss: 0.3435063660144806


  9%|▉         | 501/5701 [09:33<2:42:06,  1.87s/it]

Train loss: 0.31072762608528137


  9%|▉         | 511/5701 [09:44<2:41:49,  1.87s/it]

Train loss: 0.35350748896598816


  9%|▉         | 521/5701 [09:56<2:37:18,  1.82s/it]

Train loss: 0.35058656334877014


  9%|▉         | 531/5701 [10:07<2:33:19,  1.78s/it]

Train loss: 0.3814127445220947


  9%|▉         | 541/5701 [10:19<2:38:14,  1.84s/it]

Train loss: 0.3709779679775238


 10%|▉         | 551/5701 [10:30<2:36:33,  1.82s/it]

Train loss: 0.3211245536804199


 10%|▉         | 561/5701 [10:42<2:35:09,  1.81s/it]

Train loss: 0.3440621793270111


 10%|█         | 571/5701 [10:54<2:39:49,  1.87s/it]

Train loss: 0.3271035850048065


 10%|█         | 581/5701 [11:05<2:32:38,  1.79s/it]

Train loss: 0.3487691879272461


 10%|█         | 591/5701 [11:17<2:38:07,  1.86s/it]

Train loss: 0.33357998728752136


 11%|█         | 601/5701 [11:28<2:29:14,  1.76s/it]

Train loss: 0.3648241460323334


 11%|█         | 611/5701 [11:39<2:28:07,  1.75s/it]

Train loss: 0.3394957482814789


 11%|█         | 621/5701 [11:51<2:31:51,  1.79s/it]

Train loss: 0.2942076623439789


 11%|█         | 631/5701 [12:02<2:30:31,  1.78s/it]

Train loss: 0.34498199820518494


 11%|█         | 641/5701 [12:13<2:10:58,  1.55s/it]

Train loss: 0.32312920689582825


 11%|█▏        | 651/5701 [12:25<2:16:34,  1.62s/it]

Train loss: 0.29064568877220154


 12%|█▏        | 661/5701 [12:36<2:21:38,  1.69s/it]

Train loss: 0.27858877182006836


 12%|█▏        | 671/5701 [12:46<1:35:57,  1.14s/it]

Train loss: 0.30902335047721863


 12%|█▏        | 681/5701 [12:57<1:41:39,  1.22s/it]

Train loss: 0.33398789167404175


 12%|█▏        | 691/5701 [13:09<1:48:01,  1.29s/it]

Train loss: 0.30316397547721863


 12%|█▏        | 701/5701 [13:21<1:58:28,  1.42s/it]

Train loss: 0.29431581497192383


 12%|█▏        | 711/5701 [13:32<1:55:20,  1.39s/it]

Train loss: 0.31607162952423096


 13%|█▎        | 721/5701 [13:44<2:13:23,  1.61s/it]

Train loss: 0.2911529541015625


 13%|█▎        | 731/5701 [13:56<2:18:54,  1.68s/it]

Train loss: 0.32180213928222656


 13%|█▎        | 741/5701 [14:08<2:24:25,  1.75s/it]

Train loss: 0.31429827213287354


 13%|█▎        | 751/5701 [14:19<2:08:45,  1.56s/it]

Train loss: 0.29994210600852966


 13%|█▎        | 761/5701 [14:30<2:02:42,  1.49s/it]

Train loss: 0.3179164528846741


 14%|█▎        | 771/5701 [14:41<1:59:12,  1.45s/it]

Train loss: 0.3119381070137024


 14%|█▎        | 781/5701 [14:52<1:52:45,  1.38s/it]

Train loss: 0.2998480498790741


 14%|█▍        | 791/5701 [15:04<1:40:50,  1.23s/it]

Train loss: 0.3069384694099426


 14%|█▍        | 801/5701 [15:15<1:49:41,  1.34s/it]

Train loss: 0.30965280532836914


 14%|█▍        | 811/5701 [15:26<1:45:22,  1.29s/it]

Train loss: 0.30348727107048035


 14%|█▍        | 821/5701 [15:38<1:43:56,  1.28s/it]

Train loss: 0.3045113980770111


 15%|█▍        | 831/5701 [15:49<1:39:11,  1.22s/it]

Train loss: 0.2800511121749878


 15%|█▍        | 841/5701 [16:01<1:29:51,  1.11s/it]

Train loss: 0.2849617302417755


 15%|█▍        | 851/5701 [16:12<1:14:27,  1.09it/s]

Train loss: 0.2972641587257385


 15%|█▌        | 861/5701 [16:23<1:15:10,  1.07it/s]

Train loss: 0.3151080310344696


 15%|█▌        | 871/5701 [16:35<1:13:04,  1.10it/s]

Train loss: 0.29781341552734375


 15%|█▌        | 881/5701 [16:47<1:08:23,  1.17it/s]

Train loss: 0.29239940643310547


 16%|█▌        | 891/5701 [16:58<1:08:06,  1.18it/s]

Train loss: 0.3054793179035187


 16%|█▌        | 901/5701 [17:09<1:09:18,  1.15it/s]

Train loss: 0.3427875339984894


 16%|█▌        | 911/5701 [17:20<1:07:05,  1.19it/s]

Train loss: 0.3208281695842743


 16%|█▌        | 921/5701 [17:33<1:12:35,  1.10it/s]

Train loss: 0.2910716235637665


 16%|█▋        | 931/5701 [17:43<1:03:44,  1.25it/s]

Train loss: 0.278619647026062


 17%|█▋        | 941/5701 [17:55<1:10:16,  1.13it/s]

Train loss: 0.2989034354686737


 17%|█▋        | 951/5701 [18:06<1:08:56,  1.15it/s]

Train loss: 0.31787192821502686


 17%|█▋        | 961/5701 [18:18<1:09:30,  1.14it/s]

Train loss: 0.33418557047843933


 17%|█▋        | 971/5701 [18:29<1:09:21,  1.14it/s]

Train loss: 0.28415045142173767


 17%|█▋        | 981/5701 [18:41<1:09:10,  1.14it/s]

Train loss: 0.29297301173210144


 17%|█▋        | 991/5701 [18:53<1:07:48,  1.16it/s]

Train loss: 0.2985546588897705


 18%|█▊        | 1001/5701 [19:03<1:03:33,  1.23it/s]

Train loss: 0.31285202503204346


 18%|█▊        | 1011/5701 [19:14<1:07:12,  1.16it/s]

Train loss: 0.30664822459220886


 18%|█▊        | 1021/5701 [19:26<1:09:09,  1.13it/s]

Train loss: 0.32020965218544006


 18%|█▊        | 1031/5701 [19:38<1:10:27,  1.10it/s]

Train loss: 0.2789575755596161


 18%|█▊        | 1041/5701 [19:49<1:05:34,  1.18it/s]

Train loss: 0.2776087522506714


 18%|█▊        | 1051/5701 [20:01<1:05:48,  1.18it/s]

Train loss: 0.26770174503326416


 19%|█▊        | 1061/5701 [20:12<1:07:45,  1.14it/s]

Train loss: 0.31740447878837585


 19%|█▉        | 1071/5701 [20:24<1:07:23,  1.15it/s]

Train loss: 0.26014429330825806


 19%|█▉        | 1081/5701 [20:34<1:03:43,  1.21it/s]

Train loss: 0.29727914929389954


 19%|█▉        | 1091/5701 [20:46<1:08:52,  1.12it/s]

Train loss: 0.2888288199901581


 19%|█▉        | 1101/5701 [20:58<1:05:49,  1.16it/s]

Train loss: 0.28153032064437866


 19%|█▉        | 1111/5701 [21:09<1:07:56,  1.13it/s]

Train loss: 0.3101196885108948


 20%|█▉        | 1121/5701 [21:21<1:05:25,  1.17it/s]

Train loss: 0.3212093412876129


 20%|█▉        | 1131/5701 [21:31<1:03:18,  1.20it/s]

Train loss: 0.2772084176540375


 20%|██        | 1141/5701 [21:44<1:08:27,  1.11it/s]

Train loss: 0.29941660165786743


 20%|██        | 1151/5701 [21:54<1:02:58,  1.20it/s]

Train loss: 0.3066835403442383


 20%|██        | 1161/5701 [22:06<1:07:03,  1.13it/s]

Train loss: 0.26429855823516846


 21%|██        | 1171/5701 [22:18<1:06:15,  1.14it/s]

Train loss: 0.28948354721069336


 21%|██        | 1181/5701 [22:30<1:07:57,  1.11it/s]

Train loss: 0.3156440556049347


 21%|██        | 1191/5701 [22:40<1:00:25,  1.24it/s]

Train loss: 0.2799277901649475


 21%|██        | 1201/5701 [22:52<1:04:19,  1.17it/s]

Train loss: 0.30007562041282654


 21%|██        | 1211/5701 [23:03<1:06:29,  1.13it/s]

Train loss: 0.272222101688385


 21%|██▏       | 1221/5701 [23:15<1:05:53,  1.13it/s]

Train loss: 0.2541232109069824


 22%|██▏       | 1231/5701 [23:26<1:03:53,  1.17it/s]

Train loss: 0.2996438145637512


 22%|██▏       | 1241/5701 [23:38<1:05:10,  1.14it/s]

Train loss: 0.2850145399570465


 22%|██▏       | 1251/5701 [23:49<1:03:25,  1.17it/s]

Train loss: 0.26609793305397034


 22%|██▏       | 1261/5701 [24:01<1:04:31,  1.15it/s]

Train loss: 0.2816592752933502


 22%|██▏       | 1271/5701 [24:12<1:04:30,  1.14it/s]

Train loss: 0.26329123973846436


 22%|██▏       | 1281/5701 [24:24<1:02:40,  1.18it/s]

Train loss: 0.2794969975948334


 23%|██▎       | 1291/5701 [24:35<1:04:48,  1.13it/s]

Train loss: 0.2701645791530609


 23%|██▎       | 1301/5701 [24:47<1:02:49,  1.17it/s]

Train loss: 0.25843262672424316


 23%|██▎       | 1311/5701 [24:58<1:02:48,  1.16it/s]

Train loss: 0.3011404573917389


 23%|██▎       | 1321/5701 [25:09<1:03:50,  1.14it/s]

Train loss: 0.27474117279052734


 23%|██▎       | 1331/5701 [25:20<1:01:39,  1.18it/s]

Train loss: 0.27297940850257874


 24%|██▎       | 1341/5701 [25:32<1:04:42,  1.12it/s]

Train loss: 0.25206470489501953


 24%|██▎       | 1351/5701 [25:43<58:26,  1.24it/s]  

Train loss: 0.28299403190612793


 24%|██▍       | 1361/5701 [25:55<1:04:22,  1.12it/s]

Train loss: 0.2583896219730377


 24%|██▍       | 1371/5701 [26:05<59:53,  1.20it/s]  

Train loss: 0.2795620262622833


 24%|██▍       | 1381/5701 [26:17<1:02:31,  1.15it/s]

Train loss: 0.3006371259689331


 24%|██▍       | 1391/5701 [26:28<1:01:24,  1.17it/s]

Train loss: 0.2671321630477905


 25%|██▍       | 1401/5701 [26:39<1:00:57,  1.18it/s]

Train loss: 0.2782440483570099


 25%|██▍       | 1411/5701 [26:50<1:00:09,  1.19it/s]

Train loss: 0.27314624190330505


 25%|██▍       | 1421/5701 [27:01<1:00:31,  1.18it/s]

Train loss: 0.2589154541492462


 25%|██▌       | 1431/5701 [27:12<59:16,  1.20it/s]  

Train loss: 0.27707499265670776


 25%|██▌       | 1441/5701 [27:24<1:04:03,  1.11it/s]

Train loss: 0.25996777415275574


 25%|██▌       | 1451/5701 [27:36<1:01:14,  1.16it/s]

Train loss: 0.2678473889827728


 26%|██▌       | 1461/5701 [27:47<1:02:00,  1.14it/s]

Train loss: 0.26975002884864807


 26%|██▌       | 1471/5701 [27:59<1:00:20,  1.17it/s]

Train loss: 0.28004369139671326


 26%|██▌       | 1481/5701 [28:09<57:21,  1.23it/s]  

Train loss: 0.2717975378036499


 26%|██▌       | 1491/5701 [28:21<1:01:48,  1.14it/s]

Train loss: 0.2816929519176483


 26%|██▋       | 1501/5701 [28:33<1:02:55,  1.11it/s]

Train loss: 0.26927638053894043


 27%|██▋       | 1511/5701 [28:44<1:00:10,  1.16it/s]

Train loss: 0.25232619047164917


 27%|██▋       | 1521/5701 [28:56<1:00:06,  1.16it/s]

Train loss: 0.2422923594713211


 27%|██▋       | 1531/5701 [29:07<1:00:10,  1.16it/s]

Train loss: 0.24636335670948029


 27%|██▋       | 1541/5701 [29:19<1:00:36,  1.14it/s]

Train loss: 0.2557365894317627


 27%|██▋       | 1551/5701 [29:30<57:47,  1.20it/s]  

Train loss: 0.26438891887664795


 27%|██▋       | 1561/5701 [29:42<1:02:13,  1.11it/s]

Train loss: 0.2633924186229706


 28%|██▊       | 1571/5701 [29:52<54:40,  1.26it/s]  

Train loss: 0.28315016627311707


 28%|██▊       | 1581/5701 [30:03<59:52,  1.15it/s]  

Train loss: 0.2715393006801605


 28%|██▊       | 1591/5701 [30:15<59:35,  1.15it/s]  

Train loss: 0.2330886423587799


 28%|██▊       | 1601/5701 [30:27<1:00:10,  1.14it/s]

Train loss: 0.27220645546913147


 28%|██▊       | 1611/5701 [30:38<58:02,  1.17it/s]  

Train loss: 0.26035481691360474


 28%|██▊       | 1621/5701 [30:49<57:13,  1.19it/s]  

Train loss: 0.23399530351161957


 29%|██▊       | 1631/5701 [31:00<58:01,  1.17it/s]  

Train loss: 0.262124627828598


 29%|██▉       | 1641/5701 [31:11<57:49,  1.17it/s]  

Train loss: 0.26534998416900635


 29%|██▉       | 1651/5701 [31:23<58:53,  1.15it/s]  

Train loss: 0.268932968378067


 29%|██▉       | 1661/5701 [31:35<59:10,  1.14it/s]  

Train loss: 0.23837529122829437


 29%|██▉       | 1671/5701 [31:46<56:42,  1.18it/s]  

Train loss: 0.25085461139678955


 29%|██▉       | 1681/5701 [31:56<55:29,  1.21it/s]  

Train loss: 0.2610049843788147


 30%|██▉       | 1691/5701 [32:08<58:21,  1.15it/s]  

Train loss: 0.25081178545951843


 30%|██▉       | 1701/5701 [32:19<58:15,  1.14it/s]  

Train loss: 0.23774485290050507


 30%|███       | 1711/5701 [32:31<57:41,  1.15it/s]  

Train loss: 0.26091915369033813


 30%|███       | 1721/5701 [32:42<57:26,  1.15it/s]  

Train loss: 0.2539750039577484


 30%|███       | 1731/5701 [32:54<56:36,  1.17it/s]  

Train loss: 0.24967288970947266


 31%|███       | 1741/5701 [33:05<57:56,  1.14it/s]  

Train loss: 0.23365484178066254


 31%|███       | 1751/5701 [33:17<58:10,  1.13it/s]  

Train loss: 0.2803145945072174


 31%|███       | 1761/5701 [33:29<57:20,  1.15it/s]  

Train loss: 0.22896452248096466


 31%|███       | 1771/5701 [33:40<54:31,  1.20it/s]  

Train loss: 0.23887358605861664


 31%|███       | 1781/5701 [33:52<58:15,  1.12it/s]  

Train loss: 0.26878097653388977


 31%|███▏      | 1791/5701 [34:03<55:27,  1.18it/s]  

Train loss: 0.25354447960853577


 32%|███▏      | 1801/5701 [34:14<56:41,  1.15it/s]  

Train loss: 0.22592265903949738


 32%|███▏      | 1811/5701 [34:25<53:56,  1.20it/s]  

Train loss: 0.25101879239082336


 32%|███▏      | 1821/5701 [34:37<56:33,  1.14it/s]  

Train loss: 0.2158663272857666


 32%|███▏      | 1831/5701 [34:48<55:48,  1.16it/s]  

Train loss: 0.2531242072582245


 32%|███▏      | 1841/5701 [35:00<56:09,  1.15it/s]  

Train loss: 0.24115709960460663


 32%|███▏      | 1851/5701 [35:11<55:21,  1.16it/s]  

Train loss: 0.24086995422840118


 33%|███▎      | 1861/5701 [35:22<54:47,  1.17it/s]  

Train loss: 0.24853114783763885


 33%|███▎      | 1871/5701 [35:34<56:28,  1.13it/s]  

Train loss: 0.2458360493183136


 33%|███▎      | 1881/5701 [35:45<53:33,  1.19it/s]  

Train loss: 0.2674117982387543


 33%|███▎      | 1891/5701 [35:57<56:43,  1.12it/s]  

Train loss: 0.27543875575065613


 33%|███▎      | 1901/5701 [36:08<54:13,  1.17it/s]  

Train loss: 0.25890350341796875


 34%|███▎      | 1911/5701 [36:20<55:33,  1.14it/s]  

Train loss: 0.23969843983650208


 34%|███▎      | 1921/5701 [36:31<52:54,  1.19it/s]  

Train loss: 0.22895057499408722


 34%|███▍      | 1931/5701 [36:43<55:02,  1.14it/s]  

Train loss: 0.24327662587165833


 34%|███▍      | 1941/5701 [36:53<52:36,  1.19it/s]  

Train loss: 0.2847212851047516


 34%|███▍      | 1951/5701 [37:05<54:56,  1.14it/s]  

Train loss: 0.22332382202148438


 34%|███▍      | 1961/5701 [37:16<52:19,  1.19it/s]  

Train loss: 0.2418799251317978


 35%|███▍      | 1971/5701 [37:29<57:12,  1.09it/s]  

Train loss: 0.2519298493862152


 35%|███▍      | 1981/5701 [37:39<50:29,  1.23it/s]  

Train loss: 0.2518930435180664


 35%|███▍      | 1991/5701 [37:50<51:44,  1.19it/s]  

Train loss: 0.2644837200641632


 35%|███▌      | 2001/5701 [38:01<53:30,  1.15it/s]  

Train loss: 0.2654689848423004


 35%|███▌      | 2011/5701 [38:13<53:05,  1.16it/s]  

Train loss: 0.23297572135925293


 35%|███▌      | 2021/5701 [38:24<52:28,  1.17it/s]  

Train loss: 0.22900119423866272


 36%|███▌      | 2031/5701 [38:35<53:35,  1.14it/s]  

Train loss: 0.24795746803283691


 36%|███▌      | 2041/5701 [38:47<51:43,  1.18it/s]  

Train loss: 0.24548926949501038


 36%|███▌      | 2051/5701 [38:58<52:54,  1.15it/s]  

Train loss: 0.23387502133846283


 36%|███▌      | 2061/5701 [39:09<52:08,  1.16it/s]  

Train loss: 0.22903171181678772


 36%|███▋      | 2071/5701 [39:21<53:05,  1.14it/s]  

Train loss: 0.23511970043182373


 37%|███▋      | 2081/5701 [39:33<54:35,  1.11it/s]  

Train loss: 0.2044333964586258


 37%|███▋      | 2091/5701 [39:45<51:38,  1.17it/s]  

Train loss: 0.23998241126537323


 37%|███▋      | 2101/5701 [39:56<52:50,  1.14it/s]  

Train loss: 0.24612848460674286


 37%|███▋      | 2111/5701 [40:07<50:50,  1.18it/s]  

Train loss: 0.2212039977312088


 37%|███▋      | 2121/5701 [40:19<51:04,  1.17it/s]  

Train loss: 0.23519018292427063


 37%|███▋      | 2131/5701 [40:29<49:02,  1.21it/s]  

Train loss: 0.2600659132003784


 38%|███▊      | 2141/5701 [40:41<51:16,  1.16it/s]  

Train loss: 0.23853054642677307


 38%|███▊      | 2151/5701 [40:53<52:52,  1.12it/s]  

Train loss: 0.27084115147590637


 38%|███▊      | 2161/5701 [41:04<49:48,  1.18it/s]  

Train loss: 0.23925307393074036


 38%|███▊      | 2171/5701 [41:15<51:44,  1.14it/s]  

Train loss: 0.24238288402557373


 38%|███▊      | 2181/5701 [41:27<49:59,  1.17it/s]  

Train loss: 0.24703626334667206


 38%|███▊      | 2191/5701 [41:38<50:26,  1.16it/s]  

Train loss: 0.24214282631874084


 39%|███▊      | 2201/5701 [41:50<51:38,  1.13it/s]  

Train loss: 0.23498240113258362


 39%|███▉      | 2211/5701 [42:01<48:57,  1.19it/s]  

Train loss: 0.23417377471923828


 39%|███▉      | 2221/5701 [42:12<49:19,  1.18it/s]  

Train loss: 0.25283318758010864


 39%|███▉      | 2231/5701 [42:23<49:12,  1.18it/s]  

Train loss: 0.23991088569164276


 39%|███▉      | 2241/5701 [42:34<49:50,  1.16it/s]  

Train loss: 0.244100883603096


 39%|███▉      | 2251/5701 [42:46<51:12,  1.12it/s]  

Train loss: 0.27251866459846497


 40%|███▉      | 2261/5701 [42:57<47:55,  1.20it/s]  

Train loss: 0.2466287463903427


 40%|███▉      | 2271/5701 [43:09<50:17,  1.14it/s]  

Train loss: 0.21838168799877167


 40%|████      | 2281/5701 [43:21<50:10,  1.14it/s]  

Train loss: 0.21853695809841156


 40%|████      | 2291/5701 [43:32<48:03,  1.18it/s]  

Train loss: 0.2672923505306244


 40%|████      | 2301/5701 [43:43<49:20,  1.15it/s]  

Train loss: 0.21800830960273743


 41%|████      | 2311/5701 [43:55<48:16,  1.17it/s]  

Train loss: 0.24605083465576172


 41%|████      | 2321/5701 [44:06<49:15,  1.14it/s]  

Train loss: 0.243862584233284


 41%|████      | 2331/5701 [44:17<46:23,  1.21it/s]  

Train loss: 0.2336364984512329


 41%|████      | 2341/5701 [44:28<47:28,  1.18it/s]  

Train loss: 0.2618381083011627


 41%|████      | 2351/5701 [44:39<47:19,  1.18it/s]  

Train loss: 0.23275350034236908


 41%|████▏     | 2361/5701 [44:50<47:50,  1.16it/s]  

Train loss: 0.21579661965370178


 42%|████▏     | 2371/5701 [45:02<48:48,  1.14it/s]  

Train loss: 0.23282134532928467


 42%|████▏     | 2381/5701 [45:13<46:21,  1.19it/s]  

Train loss: 0.22884981334209442


 42%|████▏     | 2391/5701 [45:24<46:36,  1.18it/s]  

Train loss: 0.21756120026111603


 42%|████▏     | 2401/5701 [45:36<48:26,  1.14it/s]  

Train loss: 0.18695196509361267


 42%|████▏     | 2411/5701 [45:47<46:16,  1.18it/s]  

Train loss: 0.2233635038137436


 42%|████▏     | 2421/5701 [45:59<47:51,  1.14it/s]  

Train loss: 0.24490320682525635


 43%|████▎     | 2431/5701 [46:10<48:22,  1.13it/s]  

Train loss: 0.19635526835918427


 43%|████▎     | 2441/5701 [46:21<45:48,  1.19it/s]  

Train loss: 0.2421242743730545


 43%|████▎     | 2451/5701 [46:33<46:57,  1.15it/s]  

Train loss: 0.21650440990924835


 43%|████▎     | 2461/5701 [46:44<46:01,  1.17it/s]  

Train loss: 0.19909439980983734


 43%|████▎     | 2471/5701 [46:55<44:42,  1.20it/s]  

Train loss: 0.2256513386964798


 44%|████▎     | 2481/5701 [47:06<46:18,  1.16it/s]  

Train loss: 0.2392430603504181


 44%|████▎     | 2491/5701 [47:17<44:19,  1.21it/s]  

Train loss: 0.21862037479877472


 44%|████▍     | 2501/5701 [47:28<46:22,  1.15it/s]  

Train loss: 0.22456414997577667


 44%|████▍     | 2511/5701 [47:40<45:27,  1.17it/s]  

Train loss: 0.24423407018184662


 44%|████▍     | 2521/5701 [47:51<44:41,  1.19it/s]  

Train loss: 0.2037927210330963


 44%|████▍     | 2531/5701 [48:02<45:34,  1.16it/s]  

Train loss: 0.23679713904857635


 45%|████▍     | 2541/5701 [48:14<46:51,  1.12it/s]  

Train loss: 0.23927128314971924


 45%|████▍     | 2551/5701 [48:24<42:33,  1.23it/s]  

Train loss: 0.22887979447841644


 45%|████▍     | 2561/5701 [48:36<45:08,  1.16it/s]  

Train loss: 0.2208057940006256


 45%|████▌     | 2571/5701 [48:48<46:22,  1.13it/s]  

Train loss: 0.22700738906860352


 45%|████▌     | 2581/5701 [48:59<43:37,  1.19it/s]  

Train loss: 0.21660800278186798


 45%|████▌     | 2591/5701 [49:11<46:44,  1.11it/s]  

Train loss: 0.20827102661132812


 46%|████▌     | 2601/5701 [49:22<43:16,  1.19it/s]  

Train loss: 0.1947132796049118


 46%|████▌     | 2611/5701 [49:34<45:41,  1.13it/s]  

Train loss: 0.23885126411914825


 46%|████▌     | 2621/5701 [49:45<43:52,  1.17it/s]  

Train loss: 0.23926399648189545


 46%|████▌     | 2631/5701 [49:57<46:00,  1.11it/s]  

Train loss: 0.22547321021556854


 46%|████▋     | 2641/5701 [50:08<42:13,  1.21it/s]  

Train loss: 0.21329529583454132


 47%|████▋     | 2651/5701 [50:19<44:26,  1.14it/s]  

Train loss: 0.22044768929481506


 47%|████▋     | 2661/5701 [50:31<45:37,  1.11it/s]  

Train loss: 0.22187577188014984


 47%|████▋     | 2671/5701 [50:42<42:44,  1.18it/s]  

Train loss: 0.21021996438503265


 47%|████▋     | 2681/5701 [50:54<43:41,  1.15it/s]  

Train loss: 0.19244378805160522


 47%|████▋     | 2691/5701 [51:06<44:15,  1.13it/s]  

Train loss: 0.21703162789344788


 47%|████▋     | 2701/5701 [51:16<40:58,  1.22it/s]  

Train loss: 0.2152196168899536


 48%|████▊     | 2711/5701 [51:28<45:02,  1.11it/s]  

Train loss: 0.20058925449848175


 48%|████▊     | 2721/5701 [51:40<43:31,  1.14it/s]  

Train loss: 0.22947750985622406


 48%|████▊     | 2731/5701 [51:51<43:07,  1.15it/s]  

Train loss: 0.2237357348203659


 48%|████▊     | 2741/5701 [52:03<43:41,  1.13it/s]  

Train loss: 0.24991288781166077


 48%|████▊     | 2751/5701 [52:14<39:45,  1.24it/s]  

Train loss: 0.20114688575267792


 48%|████▊     | 2761/5701 [52:25<43:02,  1.14it/s]  

Train loss: 0.2156580686569214


 49%|████▊     | 2771/5701 [52:37<43:30,  1.12it/s]  

Train loss: 0.22252269089221954


 49%|████▉     | 2781/5701 [52:49<42:28,  1.15it/s]  

Train loss: 0.22153842449188232


 49%|████▉     | 2791/5701 [52:59<40:21,  1.20it/s]  

Train loss: 0.22100535035133362


 49%|████▉     | 2801/5701 [53:11<41:37,  1.16it/s]  

Train loss: 0.20661425590515137


 49%|████▉     | 2811/5701 [53:22<41:39,  1.16it/s]  

Train loss: 0.1913110464811325


 49%|████▉     | 2821/5701 [53:34<42:27,  1.13it/s]  

Train loss: 0.2324366569519043


 50%|████▉     | 2831/5701 [53:46<42:05,  1.14it/s]  

Train loss: 0.20599830150604248


 50%|████▉     | 2841/5701 [53:57<39:59,  1.19it/s]  

Train loss: 0.22495566308498383


 50%|█████     | 2851/5701 [54:08<41:26,  1.15it/s]  

Train loss: 0.23348884284496307


 50%|█████     | 2861/5701 [54:19<40:33,  1.17it/s]  

Train loss: 0.23021462559700012


 50%|█████     | 2871/5701 [54:31<40:33,  1.16it/s]  

Train loss: 0.20108187198638916


 51%|█████     | 2881/5701 [54:42<41:09,  1.14it/s]  

Train loss: 0.22302091121673584


 51%|█████     | 2891/5701 [54:53<39:14,  1.19it/s]  

Train loss: 0.24174602329730988


 51%|█████     | 2901/5701 [55:05<41:45,  1.12it/s]  

Train loss: 0.1925923079252243


 51%|█████     | 2911/5701 [55:17<40:01,  1.16it/s]  

Train loss: 0.23393188416957855


 51%|█████     | 2921/5701 [55:28<39:17,  1.18it/s]  

Train loss: 0.22467902302742004


 51%|█████▏    | 2931/5701 [55:39<38:40,  1.19it/s]  

Train loss: 0.18101635575294495


 52%|█████▏    | 2941/5701 [55:51<41:15,  1.11it/s]  

Train loss: 0.21579909324645996


 52%|█████▏    | 2951/5701 [56:02<40:31,  1.13it/s]  

Train loss: 0.1963050663471222


 52%|█████▏    | 2961/5701 [56:13<37:27,  1.22it/s]  

Train loss: 0.17805153131484985


 52%|█████▏    | 2971/5701 [56:24<38:37,  1.18it/s]  

Train loss: 0.2242312878370285


 52%|█████▏    | 2981/5701 [56:36<40:40,  1.11it/s]  

Train loss: 0.19531166553497314


 52%|█████▏    | 2991/5701 [56:47<38:47,  1.16it/s]  

Train loss: 0.20768921077251434


 53%|█████▎    | 3001/5701 [56:58<37:32,  1.20it/s]  

Train loss: 0.23422324657440186


 53%|█████▎    | 3011/5701 [57:10<39:54,  1.12it/s]  

Train loss: 0.20981669425964355


 53%|█████▎    | 3021/5701 [57:21<38:16,  1.17it/s]  

Train loss: 0.19775818288326263


 53%|█████▎    | 3031/5701 [57:34<41:15,  1.08it/s]  

Train loss: 0.23920397460460663


 53%|█████▎    | 3041/5701 [57:44<34:41,  1.28it/s]  

Train loss: 0.20936301350593567


 54%|█████▎    | 3051/5701 [57:55<38:47,  1.14it/s]  

Train loss: 0.20587562024593353


 54%|█████▎    | 3061/5701 [58:07<37:58,  1.16it/s]  

Train loss: 0.21901166439056396


 54%|█████▍    | 3071/5701 [58:19<39:25,  1.11it/s]  

Train loss: 0.2380216121673584


 54%|█████▍    | 3081/5701 [58:30<36:10,  1.21it/s]  

Train loss: 0.2069278210401535


 54%|█████▍    | 3091/5701 [58:42<39:03,  1.11it/s]  

Train loss: 0.1889580935239792


 54%|█████▍    | 3101/5701 [58:53<37:59,  1.14it/s]  

Train loss: 0.18724198639392853


 55%|█████▍    | 3111/5701 [59:04<36:01,  1.20it/s]  

Train loss: 0.18900305032730103


 55%|█████▍    | 3121/5701 [59:16<37:19,  1.15it/s]  

Train loss: 0.2028842270374298


 55%|█████▍    | 3131/5701 [59:27<37:42,  1.14it/s]  

Train loss: 0.2169332057237625


 55%|█████▌    | 3141/5701 [59:39<37:59,  1.12it/s]  

Train loss: 0.21402494609355927


 55%|█████▌    | 3151/5701 [59:50<35:13,  1.21it/s]  

Train loss: 0.21802513301372528


 55%|█████▌    | 3161/5701 [1:00:02<37:26,  1.13it/s]  

Train loss: 0.2187548130750656


 56%|█████▌    | 3171/5701 [1:00:13<36:00,  1.17it/s]  

Train loss: 0.20959973335266113


 56%|█████▌    | 3181/5701 [1:00:25<37:58,  1.11it/s]  

Train loss: 0.20187948644161224


 56%|█████▌    | 3191/5701 [1:00:36<35:04,  1.19it/s]  

Train loss: 0.22803528606891632


 56%|█████▌    | 3201/5701 [1:00:47<35:23,  1.18it/s]  

Train loss: 0.1875370740890503


 56%|█████▋    | 3211/5701 [1:00:59<35:57,  1.15it/s]  

Train loss: 0.21628563106060028


 56%|█████▋    | 3221/5701 [1:01:10<35:09,  1.18it/s]  

Train loss: 0.21468806266784668


 57%|█████▋    | 3231/5701 [1:01:21<35:06,  1.17it/s]  

Train loss: 0.22401228547096252


 57%|█████▋    | 3241/5701 [1:01:32<35:48,  1.15it/s]  

Train loss: 0.19713667035102844


 57%|█████▋    | 3251/5701 [1:01:44<35:04,  1.16it/s]  

Train loss: 0.22678795456886292


 57%|█████▋    | 3261/5701 [1:01:55<33:51,  1.20it/s]  

Train loss: 0.19118772447109222


 57%|█████▋    | 3271/5701 [1:02:07<37:18,  1.09it/s]  

Train loss: 0.1991020292043686


 58%|█████▊    | 3281/5701 [1:02:18<33:22,  1.21it/s]  

Train loss: 0.19643619656562805


 58%|█████▊    | 3291/5701 [1:02:29<34:04,  1.18it/s]  

Train loss: 0.2053651064634323


 58%|█████▊    | 3301/5701 [1:02:40<33:36,  1.19it/s]  

Train loss: 0.20918165147304535


 58%|█████▊    | 3311/5701 [1:02:51<34:12,  1.16it/s]  

Train loss: 0.21182870864868164


 58%|█████▊    | 3321/5701 [1:03:02<34:03,  1.16it/s]  

Train loss: 0.21297959983348846


 58%|█████▊    | 3331/5701 [1:03:14<34:08,  1.16it/s]  

Train loss: 0.21487008035182953


 59%|█████▊    | 3341/5701 [1:03:25<34:38,  1.14it/s]  

Train loss: 0.20231695473194122


 59%|█████▉    | 3351/5701 [1:03:36<31:41,  1.24it/s]  

Train loss: 0.19717516005039215


 59%|█████▉    | 3361/5701 [1:03:48<34:45,  1.12it/s]  

Train loss: 0.2063264101743698


 59%|█████▉    | 3371/5701 [1:04:00<34:42,  1.12it/s]  

Train loss: 0.21697233617305756


 59%|█████▉    | 3381/5701 [1:04:11<33:11,  1.16it/s]  

Train loss: 0.2083558887243271


 59%|█████▉    | 3391/5701 [1:04:22<31:43,  1.21it/s]  

Train loss: 0.20448051393032074


 60%|█████▉    | 3401/5701 [1:04:34<34:42,  1.10it/s]  

Train loss: 0.2186535894870758


 60%|█████▉    | 3411/5701 [1:04:45<31:41,  1.20it/s]  

Train loss: 0.2127024233341217


 60%|██████    | 3421/5701 [1:04:56<32:25,  1.17it/s]  

Train loss: 0.20166872441768646


 60%|██████    | 3431/5701 [1:05:07<32:39,  1.16it/s]  

Train loss: 0.18817590177059174


 60%|██████    | 3441/5701 [1:05:19<33:47,  1.11it/s]  

Train loss: 0.2049475759267807


 61%|██████    | 3451/5701 [1:05:29<30:07,  1.24it/s]  

Train loss: 0.20861442387104034


 61%|██████    | 3461/5701 [1:05:41<33:10,  1.13it/s]  

Train loss: 0.20202241837978363


 61%|██████    | 3471/5701 [1:05:53<32:22,  1.15it/s]  

Train loss: 0.21549104154109955


 61%|██████    | 3481/5701 [1:06:05<32:38,  1.13it/s]  

Train loss: 0.199442058801651


 61%|██████    | 3491/5701 [1:06:15<30:20,  1.21it/s]  

Train loss: 0.2238324135541916


 61%|██████▏   | 3501/5701 [1:06:27<32:55,  1.11it/s]  

Train loss: 0.1789696216583252


 62%|██████▏   | 3511/5701 [1:06:39<31:46,  1.15it/s]  

Train loss: 0.18013636767864227


 62%|██████▏   | 3521/5701 [1:06:49<30:03,  1.21it/s]  

Train loss: 0.20169325172901154


 62%|██████▏   | 3531/5701 [1:07:01<32:20,  1.12it/s]  

Train loss: 0.1757827252149582


 62%|██████▏   | 3541/5701 [1:07:12<30:21,  1.19it/s]  

Train loss: 0.19636355340480804


 62%|██████▏   | 3551/5701 [1:07:24<30:53,  1.16it/s]  

Train loss: 0.19959934055805206


 62%|██████▏   | 3561/5701 [1:07:35<31:21,  1.14it/s]  

Train loss: 0.1942148059606552


 63%|██████▎   | 3571/5701 [1:07:46<29:19,  1.21it/s]  

Train loss: 0.17782892286777496


 63%|██████▎   | 3581/5701 [1:07:58<31:09,  1.13it/s]  

Train loss: 0.19047309458255768


 63%|██████▎   | 3591/5701 [1:08:10<31:31,  1.12it/s]  

Train loss: 0.18918143212795258


 63%|██████▎   | 3601/5701 [1:08:21<30:01,  1.17it/s]  

Train loss: 0.20094473659992218


 63%|██████▎   | 3611/5701 [1:08:32<29:30,  1.18it/s]  

Train loss: 0.1790677160024643


 64%|██████▎   | 3621/5701 [1:08:43<28:59,  1.20it/s]  

Train loss: 0.20471607148647308


 64%|██████▎   | 3631/5701 [1:08:55<30:11,  1.14it/s]  

Train loss: 0.19297216832637787


 64%|██████▍   | 3641/5701 [1:09:06<29:26,  1.17it/s]  

Train loss: 0.21763975918293


 64%|██████▍   | 3651/5701 [1:09:18<30:12,  1.13it/s]  

Train loss: 0.18976202607154846


 64%|██████▍   | 3661/5701 [1:09:28<26:57,  1.26it/s]  

Train loss: 0.17353931069374084


 64%|██████▍   | 3671/5701 [1:09:39<29:35,  1.14it/s]  

Train loss: 0.1964455544948578


 65%|██████▍   | 3681/5701 [1:09:51<29:25,  1.14it/s]  

Train loss: 0.20297732949256897


 65%|██████▍   | 3691/5701 [1:10:03<29:13,  1.15it/s]  

Train loss: 0.17897692322731018


 65%|██████▍   | 3701/5701 [1:10:14<28:22,  1.17it/s]  

Train loss: 0.20364069938659668


 65%|██████▌   | 3711/5701 [1:10:25<27:42,  1.20it/s]  

Train loss: 0.19751380383968353


 65%|██████▌   | 3721/5701 [1:10:35<27:35,  1.20it/s]  

Train loss: 0.1860901564359665


 65%|██████▌   | 3731/5701 [1:10:48<29:30,  1.11it/s]  

Train loss: 0.19307267665863037


 66%|██████▌   | 3741/5701 [1:10:59<28:53,  1.13it/s]  

Train loss: 0.2154441922903061


 66%|██████▌   | 3751/5701 [1:11:11<27:40,  1.17it/s]  

Train loss: 0.1824803501367569


 66%|██████▌   | 3761/5701 [1:11:21<27:05,  1.19it/s]  

Train loss: 0.18621592223644257


 66%|██████▌   | 3771/5701 [1:11:34<29:13,  1.10it/s]  

Train loss: 0.21673977375030518


 66%|██████▋   | 3781/5701 [1:11:45<26:42,  1.20it/s]  

Train loss: 0.23717227578163147


 66%|██████▋   | 3791/5701 [1:11:56<27:01,  1.18it/s]  

Train loss: 0.21562233567237854


 67%|██████▋   | 3801/5701 [1:12:07<26:37,  1.19it/s]  

Train loss: 0.20711159706115723


 67%|██████▋   | 3811/5701 [1:12:18<26:44,  1.18it/s]  

Train loss: 0.20791395008563995


 67%|██████▋   | 3821/5701 [1:12:29<26:22,  1.19it/s]  

Train loss: 0.21799245476722717


 67%|██████▋   | 3831/5701 [1:12:40<26:59,  1.15it/s]  

Train loss: 0.2242768257856369


 67%|██████▋   | 3841/5701 [1:12:51<26:31,  1.17it/s]  

Train loss: 0.19144922494888306


 68%|██████▊   | 3851/5701 [1:13:02<25:20,  1.22it/s]  

Train loss: 0.2083808183670044


 68%|██████▊   | 3861/5701 [1:13:14<27:18,  1.12it/s]  

Train loss: 0.2194489985704422


 68%|██████▊   | 3871/5701 [1:13:26<26:55,  1.13it/s]  

Train loss: 0.21396084129810333


 68%|██████▊   | 3881/5701 [1:13:37<25:19,  1.20it/s]  

Train loss: 0.21309120953083038


 68%|██████▊   | 3891/5701 [1:13:48<26:15,  1.15it/s]  

Train loss: 0.2196609079837799


 68%|██████▊   | 3901/5701 [1:14:00<26:00,  1.15it/s]  

Train loss: 0.19279852509498596


 69%|██████▊   | 3911/5701 [1:14:10<24:44,  1.21it/s]  

Train loss: 0.19675572216510773


 69%|██████▉   | 3921/5701 [1:14:22<25:49,  1.15it/s]  

Train loss: 0.21820466220378876


 69%|██████▉   | 3931/5701 [1:14:33<25:38,  1.15it/s]  

Train loss: 0.19756554067134857


 69%|██████▉   | 3941/5701 [1:14:44<24:04,  1.22it/s]  

Train loss: 0.20386409759521484


 69%|██████▉   | 3951/5701 [1:14:56<25:54,  1.13it/s]  

Train loss: 0.19501231610774994


 69%|██████▉   | 3961/5701 [1:15:07<24:52,  1.17it/s]  

Train loss: 0.1959778517484665


 70%|██████▉   | 3971/5701 [1:15:18<24:35,  1.17it/s]  

Train loss: 0.1833096593618393


 70%|██████▉   | 3981/5701 [1:15:29<24:26,  1.17it/s]  

Train loss: 0.1766122430562973


 70%|███████   | 3991/5701 [1:15:41<25:25,  1.12it/s]  

Train loss: 0.21836209297180176


 70%|███████   | 4001/5701 [1:15:53<24:37,  1.15it/s]  

Train loss: 0.18186317384243011


 70%|███████   | 4011/5701 [1:16:03<22:53,  1.23it/s]

Train loss: 0.17948873341083527


 71%|███████   | 4021/5701 [1:16:15<24:35,  1.14it/s]  

Train loss: 0.16706083714962006


 71%|███████   | 4031/5701 [1:16:26<24:22,  1.14it/s]  

Train loss: 0.19422179460525513


 71%|███████   | 4041/5701 [1:16:38<24:00,  1.15it/s]  

Train loss: 0.1672690212726593


 71%|███████   | 4051/5701 [1:16:48<22:18,  1.23it/s]

Train loss: 0.18128351867198944


 71%|███████   | 4061/5701 [1:17:00<23:58,  1.14it/s]  

Train loss: 0.17498290538787842


 71%|███████▏  | 4071/5701 [1:17:11<23:19,  1.17it/s]  

Train loss: 0.1726142019033432


 72%|███████▏  | 4081/5701 [1:17:22<22:22,  1.21it/s]

Train loss: 0.1814410239458084


 72%|███████▏  | 4091/5701 [1:17:33<22:56,  1.17it/s]  

Train loss: 0.196272611618042


 72%|███████▏  | 4101/5701 [1:17:45<23:08,  1.15it/s]  

Train loss: 0.15999072790145874


 72%|███████▏  | 4111/5701 [1:17:55<22:02,  1.20it/s]

Train loss: 0.20646019279956818


 72%|███████▏  | 4121/5701 [1:18:07<23:22,  1.13it/s]  

Train loss: 0.20061607658863068


 72%|███████▏  | 4131/5701 [1:18:18<21:34,  1.21it/s]

Train loss: 0.19929012656211853


 73%|███████▎  | 4141/5701 [1:18:29<22:21,  1.16it/s]  

Train loss: 0.19442373514175415


 73%|███████▎  | 4151/5701 [1:18:41<22:22,  1.15it/s]  

Train loss: 0.1743047535419464


 73%|███████▎  | 4161/5701 [1:18:51<21:27,  1.20it/s]

Train loss: 0.18690459430217743


 73%|███████▎  | 4171/5701 [1:19:03<21:45,  1.17it/s]

Train loss: 0.2073747217655182


 73%|███████▎  | 4181/5701 [1:19:14<21:30,  1.18it/s]

Train loss: 0.15337754786014557


 74%|███████▎  | 4191/5701 [1:19:25<21:41,  1.16it/s]

Train loss: 0.20052115619182587


 74%|███████▎  | 4201/5701 [1:19:37<22:27,  1.11it/s]  

Train loss: 0.20307175815105438


 74%|███████▍  | 4211/5701 [1:19:48<20:23,  1.22it/s]

Train loss: 0.1626075953245163


 74%|███████▍  | 4221/5701 [1:20:00<22:12,  1.11it/s]  

Train loss: 0.19689247012138367


 74%|███████▍  | 4231/5701 [1:20:11<21:32,  1.14it/s]

Train loss: 0.1754232794046402


 74%|███████▍  | 4241/5701 [1:20:22<19:28,  1.25it/s]

Train loss: 0.2137693464756012


 75%|███████▍  | 4251/5701 [1:20:34<21:34,  1.12it/s]

Train loss: 0.19735276699066162


 75%|███████▍  | 4261/5701 [1:20:45<20:28,  1.17it/s]

Train loss: 0.17733658850193024


 75%|███████▍  | 4271/5701 [1:20:56<21:02,  1.13it/s]

Train loss: 0.18456046283245087


 75%|███████▌  | 4281/5701 [1:21:08<20:10,  1.17it/s]

Train loss: 0.16608268022537231


 75%|███████▌  | 4291/5701 [1:21:18<19:36,  1.20it/s]

Train loss: 0.17346113920211792


 75%|███████▌  | 4301/5701 [1:21:30<19:51,  1.17it/s]

Train loss: 0.21717718243598938


 76%|███████▌  | 4311/5701 [1:21:41<20:30,  1.13it/s]

Train loss: 0.18846341967582703


 76%|███████▌  | 4321/5701 [1:21:53<20:00,  1.15it/s]

Train loss: 0.19910739362239838


 76%|███████▌  | 4331/5701 [1:22:04<19:37,  1.16it/s]

Train loss: 0.19997696578502655


 76%|███████▌  | 4341/5701 [1:22:15<19:09,  1.18it/s]

Train loss: 0.18738213181495667


 76%|███████▋  | 4351/5701 [1:22:27<19:43,  1.14it/s]

Train loss: 0.14513278007507324


 76%|███████▋  | 4361/5701 [1:22:38<19:29,  1.15it/s]

Train loss: 0.18439532816410065


 77%|███████▋  | 4371/5701 [1:22:49<18:16,  1.21it/s]

Train loss: 0.19448654353618622


 77%|███████▋  | 4381/5701 [1:23:01<19:40,  1.12it/s]

Train loss: 0.17745773494243622


 77%|███████▋  | 4391/5701 [1:23:13<19:36,  1.11it/s]

Train loss: 0.1852814108133316


 77%|███████▋  | 4401/5701 [1:23:24<18:01,  1.20it/s]

Train loss: 0.16732420027256012


 77%|███████▋  | 4411/5701 [1:23:35<18:41,  1.15it/s]

Train loss: 0.21458211541175842


 78%|███████▊  | 4421/5701 [1:23:47<18:07,  1.18it/s]

Train loss: 0.18979361653327942


 78%|███████▊  | 4431/5701 [1:23:58<18:49,  1.12it/s]

Train loss: 0.16982589662075043


 78%|███████▊  | 4441/5701 [1:24:10<17:58,  1.17it/s]

Train loss: 0.17995275557041168


 78%|███████▊  | 4451/5701 [1:24:21<17:41,  1.18it/s]

Train loss: 0.18941985070705414


 78%|███████▊  | 4461/5701 [1:24:32<17:21,  1.19it/s]

Train loss: 0.19764964282512665


 78%|███████▊  | 4471/5701 [1:24:43<17:44,  1.15it/s]

Train loss: 0.1999375820159912


 79%|███████▊  | 4481/5701 [1:24:54<16:57,  1.20it/s]

Train loss: 0.17587310075759888


 79%|███████▉  | 4491/5701 [1:25:07<18:28,  1.09it/s]

Train loss: 0.1654646247625351


 79%|███████▉  | 4501/5701 [1:25:17<15:55,  1.26it/s]

Train loss: 0.1633647382259369


 79%|███████▉  | 4511/5701 [1:25:28<17:32,  1.13it/s]

Train loss: 0.2149924784898758


 79%|███████▉  | 4521/5701 [1:25:40<17:10,  1.15it/s]

Train loss: 0.18529917299747467


 79%|███████▉  | 4531/5701 [1:25:52<17:11,  1.13it/s]

Train loss: 0.1955205649137497


 80%|███████▉  | 4541/5701 [1:26:04<17:19,  1.12it/s]

Train loss: 0.1748487949371338


 80%|███████▉  | 4551/5701 [1:26:15<16:39,  1.15it/s]

Train loss: 0.1917216032743454


 80%|████████  | 4561/5701 [1:26:26<16:12,  1.17it/s]

Train loss: 0.1789829283952713


 80%|████████  | 4571/5701 [1:26:38<15:55,  1.18it/s]

Train loss: 0.17749840021133423


 80%|████████  | 4581/5701 [1:26:49<16:24,  1.14it/s]

Train loss: 0.17431655526161194


 81%|████████  | 4591/5701 [1:27:01<16:12,  1.14it/s]

Train loss: 0.20165185630321503


 81%|████████  | 4601/5701 [1:27:12<15:33,  1.18it/s]

Train loss: 0.18714331090450287


 81%|████████  | 4611/5701 [1:27:23<15:45,  1.15it/s]

Train loss: 0.18698669970035553


 81%|████████  | 4621/5701 [1:27:35<16:00,  1.12it/s]

Train loss: 0.13834843039512634


 81%|████████  | 4631/5701 [1:27:46<14:34,  1.22it/s]

Train loss: 0.17496632039546967


 81%|████████▏ | 4641/5701 [1:27:57<14:55,  1.18it/s]

Train loss: 0.1756833791732788


 82%|████████▏ | 4651/5701 [1:28:09<15:18,  1.14it/s]

Train loss: 0.17560645937919617


 82%|████████▏ | 4661/5701 [1:28:21<15:35,  1.11it/s]

Train loss: 0.17916792631149292


 82%|████████▏ | 4671/5701 [1:28:32<14:48,  1.16it/s]

Train loss: 0.17167989909648895


 82%|████████▏ | 4681/5701 [1:28:43<14:19,  1.19it/s]

Train loss: 0.18637412786483765


 82%|████████▏ | 4691/5701 [1:28:54<14:18,  1.18it/s]

Train loss: 0.214527890086174


 82%|████████▏ | 4701/5701 [1:29:06<14:55,  1.12it/s]

Train loss: 0.19532708823680878


 83%|████████▎ | 4711/5701 [1:29:18<14:26,  1.14it/s]

Train loss: 0.20714154839515686


 83%|████████▎ | 4721/5701 [1:29:28<12:45,  1.28it/s]

Train loss: 0.19757471978664398


 83%|████████▎ | 4731/5701 [1:29:39<14:20,  1.13it/s]

Train loss: 0.19540131092071533


 83%|████████▎ | 4741/5701 [1:29:51<14:18,  1.12it/s]

Train loss: 0.17790161073207855


 83%|████████▎ | 4751/5701 [1:30:03<13:54,  1.14it/s]

Train loss: 0.17929351329803467


 84%|████████▎ | 4761/5701 [1:30:15<14:02,  1.12it/s]

Train loss: 0.16077706217765808


 84%|████████▎ | 4771/5701 [1:30:25<12:08,  1.28it/s]

Train loss: 0.1782841831445694


 84%|████████▍ | 4781/5701 [1:30:36<13:10,  1.16it/s]

Train loss: 0.18650977313518524


 84%|████████▍ | 4791/5701 [1:30:48<13:20,  1.14it/s]

Train loss: 0.18557731807231903


 84%|████████▍ | 4801/5701 [1:31:00<13:04,  1.15it/s]

Train loss: 0.180887371301651


 84%|████████▍ | 4811/5701 [1:31:11<12:41,  1.17it/s]

Train loss: 0.18030986189842224


 85%|████████▍ | 4821/5701 [1:31:22<12:09,  1.21it/s]

Train loss: 0.18139247596263885


 85%|████████▍ | 4831/5701 [1:31:33<12:19,  1.18it/s]

Train loss: 0.1889897882938385


 85%|████████▍ | 4841/5701 [1:31:44<12:26,  1.15it/s]

Train loss: 0.19239374995231628


 85%|████████▌ | 4851/5701 [1:31:56<12:20,  1.15it/s]

Train loss: 0.17252236604690552


 85%|████████▌ | 4861/5701 [1:32:07<12:01,  1.16it/s]

Train loss: 0.18960794806480408


 85%|████████▌ | 4871/5701 [1:32:18<11:50,  1.17it/s]

Train loss: 0.17969657480716705


 86%|████████▌ | 4881/5701 [1:32:29<11:39,  1.17it/s]

Train loss: 0.1755005568265915


 86%|████████▌ | 4891/5701 [1:32:40<11:19,  1.19it/s]

Train loss: 0.16946038603782654


 86%|████████▌ | 4901/5701 [1:32:52<11:25,  1.17it/s]

Train loss: 0.18622325360774994


 86%|████████▌ | 4911/5701 [1:33:03<11:27,  1.15it/s]

Train loss: 0.18148748576641083


 86%|████████▋ | 4921/5701 [1:33:14<11:13,  1.16it/s]

Train loss: 0.1463737040758133


 86%|████████▋ | 4931/5701 [1:33:26<10:54,  1.18it/s]

Train loss: 0.16683335602283478


 87%|████████▋ | 4941/5701 [1:33:37<11:06,  1.14it/s]

Train loss: 0.17466525733470917


 87%|████████▋ | 4951/5701 [1:33:48<10:22,  1.20it/s]

Train loss: 0.17436730861663818


 87%|████████▋ | 4961/5701 [1:33:59<10:40,  1.16it/s]

Train loss: 0.1469908207654953


 87%|████████▋ | 4971/5701 [1:34:11<10:23,  1.17it/s]

Train loss: 0.18832170963287354


 87%|████████▋ | 4981/5701 [1:34:21<10:01,  1.20it/s]

Train loss: 0.18201394379138947


 88%|████████▊ | 4991/5701 [1:34:33<10:19,  1.15it/s]

Train loss: 0.14014539122581482


 88%|████████▊ | 5001/5701 [1:34:45<10:13,  1.14it/s]

Train loss: 0.1416696012020111


 88%|████████▊ | 5011/5701 [1:34:55<09:29,  1.21it/s]

Train loss: 0.16408050060272217


 88%|████████▊ | 5021/5701 [1:35:06<09:37,  1.18it/s]

Train loss: 0.1890505999326706


 88%|████████▊ | 5031/5701 [1:35:18<09:39,  1.16it/s]

Train loss: 0.17156396806240082


 88%|████████▊ | 5041/5701 [1:35:29<09:13,  1.19it/s]

Train loss: 0.18537211418151855


 89%|████████▊ | 5051/5701 [1:35:40<09:21,  1.16it/s]

Train loss: 0.17541413009166718


 89%|████████▉ | 5061/5701 [1:35:51<09:10,  1.16it/s]

Train loss: 0.16078463196754456


 89%|████████▉ | 5071/5701 [1:36:02<08:44,  1.20it/s]

Train loss: 0.19358505308628082


 89%|████████▉ | 5081/5701 [1:36:14<08:51,  1.17it/s]

Train loss: 0.1911824345588684


 89%|████████▉ | 5091/5701 [1:36:25<08:47,  1.16it/s]

Train loss: 0.17671361565589905


 89%|████████▉ | 5101/5701 [1:36:36<08:16,  1.21it/s]

Train loss: 0.1705787032842636


 90%|████████▉ | 5111/5701 [1:36:47<08:38,  1.14it/s]

Train loss: 0.16409797966480255


 90%|████████▉ | 5121/5701 [1:36:59<08:20,  1.16it/s]

Train loss: 0.1820683628320694


 90%|█████████ | 5131/5701 [1:37:10<08:05,  1.18it/s]

Train loss: 0.16188214719295502


 90%|█████████ | 5141/5701 [1:37:21<08:06,  1.15it/s]

Train loss: 0.14617006480693817


 90%|█████████ | 5151/5701 [1:37:32<07:43,  1.19it/s]

Train loss: 0.12900856137275696


 91%|█████████ | 5161/5701 [1:37:44<07:45,  1.16it/s]

Train loss: 0.19299088418483734


 91%|█████████ | 5171/5701 [1:37:55<07:40,  1.15it/s]

Train loss: 0.1678672879934311


 91%|█████████ | 5181/5701 [1:38:07<07:33,  1.15it/s]

Train loss: 0.1633472591638565


 91%|█████████ | 5191/5701 [1:38:16<06:35,  1.29it/s]

Train loss: 0.157288059592247


 91%|█████████ | 5201/5701 [1:38:28<07:27,  1.12it/s]

Train loss: 0.20150232315063477


 91%|█████████▏| 5211/5701 [1:38:40<07:03,  1.16it/s]

Train loss: 0.17557786405086517


 92%|█████████▏| 5221/5701 [1:38:51<06:55,  1.16it/s]

Train loss: 0.17017586529254913


 92%|█████████▏| 5231/5701 [1:39:03<06:54,  1.13it/s]

Train loss: 0.1702246218919754


 92%|█████████▏| 5241/5701 [1:39:13<06:16,  1.22it/s]

Train loss: 0.17968080937862396


 92%|█████████▏| 5251/5701 [1:39:25<06:26,  1.16it/s]

Train loss: 0.150469109416008


 92%|█████████▏| 5261/5701 [1:39:35<05:55,  1.24it/s]

Train loss: 0.1400614082813263


 92%|█████████▏| 5271/5701 [1:39:46<06:09,  1.16it/s]

Train loss: 0.1983417123556137


 93%|█████████▎| 5281/5701 [1:39:56<05:35,  1.25it/s]

Train loss: 0.13798625767230988


 93%|█████████▎| 5291/5701 [1:40:08<05:51,  1.17it/s]

Train loss: 0.16856394708156586


 93%|█████████▎| 5301/5701 [1:40:19<05:46,  1.15it/s]

Train loss: 0.1490020602941513


 93%|█████████▎| 5311/5701 [1:40:30<05:21,  1.21it/s]

Train loss: 0.1874084770679474


 93%|█████████▎| 5321/5701 [1:40:41<05:25,  1.17it/s]

Train loss: 0.1588328331708908


 94%|█████████▎| 5331/5701 [1:40:51<04:55,  1.25it/s]

Train loss: 0.15871669352054596


 94%|█████████▎| 5341/5701 [1:41:02<05:09,  1.16it/s]

Train loss: 0.17881059646606445


 94%|█████████▍| 5351/5701 [1:41:13<04:54,  1.19it/s]

Train loss: 0.17239917814731598


 94%|█████████▍| 5361/5701 [1:41:24<04:37,  1.22it/s]

Train loss: 0.16995127499103546


 94%|█████████▍| 5371/5701 [1:41:35<04:38,  1.18it/s]

Train loss: 0.15269865095615387


 94%|█████████▍| 5381/5701 [1:41:45<04:22,  1.22it/s]

Train loss: 0.15682439506053925


 95%|█████████▍| 5391/5701 [1:41:57<04:25,  1.17it/s]

Train loss: 0.17154864966869354


 95%|█████████▍| 5401/5701 [1:42:07<04:07,  1.21it/s]

Train loss: 0.1813150942325592


 95%|█████████▍| 5411/5701 [1:42:19<04:09,  1.16it/s]

Train loss: 0.1813696324825287


 95%|█████████▌| 5421/5701 [1:42:29<03:43,  1.25it/s]

Train loss: 0.15704002976417542


 95%|█████████▌| 5431/5701 [1:42:40<03:48,  1.18it/s]

Train loss: 0.18803249299526215


 95%|█████████▌| 5441/5701 [1:42:51<03:40,  1.18it/s]

Train loss: 0.185161292552948


 96%|█████████▌| 5451/5701 [1:43:02<03:38,  1.15it/s]

Train loss: 0.16473828256130219


 96%|█████████▌| 5461/5701 [1:43:13<03:17,  1.22it/s]

Train loss: 0.15388327836990356


 96%|█████████▌| 5471/5701 [1:43:24<03:13,  1.19it/s]

Train loss: 0.17027807235717773


 96%|█████████▌| 5481/5701 [1:43:35<03:01,  1.22it/s]

Train loss: 0.15683302283287048


 96%|█████████▋| 5491/5701 [1:43:46<03:00,  1.17it/s]

Train loss: 0.18473516404628754


 96%|█████████▋| 5501/5701 [1:43:57<02:48,  1.19it/s]

Train loss: 0.1843516081571579


 97%|█████████▋| 5511/5701 [1:44:08<02:44,  1.16it/s]

Train loss: 0.17727379500865936


 97%|█████████▋| 5521/5701 [1:44:19<02:28,  1.21it/s]

Train loss: 0.1702301949262619


 97%|█████████▋| 5531/5701 [1:44:30<02:24,  1.17it/s]

Train loss: 0.14084218442440033


 97%|█████████▋| 5541/5701 [1:44:40<02:07,  1.25it/s]

Train loss: 0.1749516874551773


 97%|█████████▋| 5551/5701 [1:44:51<02:03,  1.22it/s]

Train loss: 0.1785101741552353


 98%|█████████▊| 5561/5701 [1:45:01<01:54,  1.22it/s]

Train loss: 0.14084146916866302


 98%|█████████▊| 5571/5701 [1:45:12<01:50,  1.18it/s]

Train loss: 0.16559399664402008


 98%|█████████▊| 5581/5701 [1:45:24<01:42,  1.17it/s]

Train loss: 0.18650732934474945


 98%|█████████▊| 5591/5701 [1:45:35<01:32,  1.19it/s]

Train loss: 0.17536503076553345


 98%|█████████▊| 5601/5701 [1:45:45<01:23,  1.20it/s]

Train loss: 0.14303824305534363


 98%|█████████▊| 5611/5701 [1:45:57<01:18,  1.14it/s]

Train loss: 0.1745070368051529


 99%|█████████▊| 5621/5701 [1:46:08<01:06,  1.20it/s]

Train loss: 0.16933965682983398


 99%|█████████▉| 5631/5701 [1:46:19<00:59,  1.19it/s]

Train loss: 0.17860804498195648


 99%|█████████▉| 5641/5701 [1:46:30<00:51,  1.17it/s]

Train loss: 0.14980293810367584


 99%|█████████▉| 5651/5701 [1:46:41<00:40,  1.22it/s]

Train loss: 0.16464625298976898


 99%|█████████▉| 5661/5701 [1:46:51<00:32,  1.22it/s]

Train loss: 0.17193996906280518


 99%|█████████▉| 5671/5701 [1:47:02<00:25,  1.18it/s]

Train loss: 0.16236720979213715


100%|█████████▉| 5681/5701 [1:47:13<00:16,  1.19it/s]

Train loss: 0.17896150052547455


100%|█████████▉| 5691/5701 [1:47:23<00:07,  1.31it/s]

Train loss: 0.1421319991350174


100%|██████████| 5701/5701 [1:47:28<00:00,  1.13s/it]


Train loss: 0.18350958824157715


Validation: 100%|██████████| 41/41 [00:53<00:00,  1.31s/it]


Validation loss = []
Epoch 2


  0%|          | 1/5701 [00:12<19:30:55, 12.33s/it]

Train loss: 0.016910763457417488


  0%|          | 11/5701 [00:23<3:42:36,  2.35s/it]

Train loss: 0.1872243732213974


  0%|          | 21/5701 [00:34<3:21:17,  2.13s/it]

Train loss: 0.18841393291950226


  1%|          | 31/5701 [00:44<3:08:33,  2.00s/it]

Train loss: 0.15799255669116974


  1%|          | 41/5701 [00:55<3:07:09,  1.98s/it]

Train loss: 0.17792588472366333


  1%|          | 51/5701 [01:05<2:43:54,  1.74s/it]

Train loss: 0.1666337251663208


  1%|          | 61/5701 [01:16<2:30:14,  1.60s/it]

Train loss: 0.16103947162628174


  1%|          | 71/5701 [01:28<2:32:55,  1.63s/it]

Train loss: 0.18272645771503448


  1%|▏         | 81/5701 [01:39<2:36:52,  1.67s/it]

Train loss: 0.18714988231658936


  2%|▏         | 91/5701 [01:49<2:31:23,  1.62s/it]

Train loss: 0.1948036402463913


  2%|▏         | 101/5701 [02:00<2:27:44,  1.58s/it]

Train loss: 0.14811979234218597


  2%|▏         | 111/5701 [02:12<2:46:49,  1.79s/it]

Train loss: 0.15899720788002014


  2%|▏         | 121/5701 [02:22<2:13:34,  1.44s/it]

Train loss: 0.15677689015865326


  2%|▏         | 131/5701 [02:33<2:20:06,  1.51s/it]

Train loss: 0.16317398846149445


  2%|▏         | 141/5701 [02:43<2:05:47,  1.36s/it]

Train loss: 0.18091082572937012


  3%|▎         | 151/5701 [02:54<2:26:22,  1.58s/it]

Train loss: 0.1776576042175293


  3%|▎         | 161/5701 [03:05<2:27:13,  1.59s/it]

Train loss: 0.15804891288280487


  3%|▎         | 171/5701 [03:16<2:20:39,  1.53s/it]

Train loss: 0.13537859916687012


  3%|▎         | 181/5701 [03:26<1:51:56,  1.22s/it]

Train loss: 0.14800666272640228


  3%|▎         | 191/5701 [03:37<1:43:44,  1.13s/it]

Train loss: 0.16681957244873047


  4%|▎         | 201/5701 [03:48<1:17:56,  1.18it/s]

Train loss: 0.16863064467906952


  4%|▎         | 211/5701 [03:59<59:44,  1.53it/s]  

Train loss: 0.14331486821174622


  4%|▍         | 221/5701 [04:10<1:01:03,  1.50it/s]

Train loss: 0.15091274678707123


  4%|▍         | 231/5701 [04:20<1:03:03,  1.45it/s]

Train loss: 0.15011529624462128


  4%|▍         | 241/5701 [04:31<59:15,  1.54it/s]  

Train loss: 0.20489411056041718


  4%|▍         | 251/5701 [04:43<59:47,  1.52it/s]  

Train loss: 0.17755703628063202


  5%|▍         | 261/5701 [04:54<59:09,  1.53it/s]  

Train loss: 0.16865430772304535


  5%|▍         | 271/5701 [05:05<58:39,  1.54it/s]  

Train loss: 0.16895756125450134


  5%|▍         | 281/5701 [05:16<1:06:35,  1.36it/s]

Train loss: 0.17010235786437988


  5%|▌         | 291/5701 [05:27<1:17:00,  1.17it/s]

Train loss: 0.16385380923748016


  5%|▌         | 301/5701 [05:38<1:06:22,  1.36it/s]

Train loss: 0.1721438765525818


  5%|▌         | 311/5701 [05:49<1:04:16,  1.40it/s]

Train loss: 0.18194089829921722


  6%|▌         | 321/5701 [06:00<58:01,  1.55it/s]  

Train loss: 0.16956090927124023


  6%|▌         | 331/5701 [06:11<56:44,  1.58it/s]  

Train loss: 0.1548166424036026


  6%|▌         | 341/5701 [06:23<57:23,  1.56it/s]  

Train loss: 0.1580268293619156


  6%|▌         | 351/5701 [06:33<56:25,  1.58it/s]  

Train loss: 0.19445686042308807


  6%|▋         | 361/5701 [06:44<56:16,  1.58it/s]  

Train loss: 0.1493176966905594


  7%|▋         | 371/5701 [06:56<56:55,  1.56it/s]  

Train loss: 0.14348535239696503


  7%|▋         | 381/5701 [07:07<56:29,  1.57it/s]  

Train loss: 0.15389399230480194


  7%|▋         | 391/5701 [07:19<1:20:36,  1.10it/s]

Train loss: 0.18705664575099945


  7%|▋         | 401/5701 [07:30<1:38:58,  1.12s/it]

Train loss: 0.17108093202114105


  7%|▋         | 411/5701 [07:41<1:34:56,  1.08s/it]

Train loss: 0.1507546752691269


  7%|▋         | 421/5701 [07:53<1:40:44,  1.14s/it]

Train loss: 0.1599762886762619


  8%|▊         | 431/5701 [08:03<1:24:17,  1.04it/s]

Train loss: 0.1498173326253891


  8%|▊         | 441/5701 [08:14<1:05:04,  1.35it/s]

Train loss: 0.15976642072200775


  8%|▊         | 451/5701 [08:25<1:08:05,  1.29it/s]

Train loss: 0.1343177855014801


  8%|▊         | 461/5701 [08:37<55:54,  1.56it/s]  

Train loss: 0.17310737073421478


  8%|▊         | 471/5701 [08:47<1:00:55,  1.43it/s]

Train loss: 0.15619632601737976


  8%|▊         | 481/5701 [08:59<56:21,  1.54it/s]  

Train loss: 0.16397826373577118


  9%|▊         | 491/5701 [09:10<55:33,  1.56it/s]  

Train loss: 0.18547312915325165


  9%|▉         | 501/5701 [09:21<55:17,  1.57it/s]  

Train loss: 0.1444460153579712


  9%|▉         | 511/5701 [09:32<54:54,  1.58it/s]  

Train loss: 0.17901550233364105


  9%|▉         | 521/5701 [09:44<55:28,  1.56it/s]  

Train loss: 0.17425787448883057


  9%|▉         | 531/5701 [09:54<53:24,  1.61it/s]  

Train loss: 0.13866521418094635


  9%|▉         | 541/5701 [10:05<54:52,  1.57it/s]  

Train loss: 0.1619335412979126


 10%|▉         | 551/5701 [10:17<55:07,  1.56it/s]  

Train loss: 0.15181522071361542


 10%|▉         | 561/5701 [10:29<55:24,  1.55it/s]  

Train loss: 0.16298191249370575


 10%|█         | 571/5701 [10:39<54:11,  1.58it/s]  

Train loss: 0.14683596789836884


 10%|█         | 581/5701 [10:50<54:05,  1.58it/s]  

Train loss: 0.16511045396327972


 10%|█         | 591/5701 [11:02<54:48,  1.55it/s]  

Train loss: 0.20519815385341644


 11%|█         | 601/5701 [11:13<53:39,  1.58it/s]  

Train loss: 0.17409701645374298


 11%|█         | 611/5701 [11:24<53:53,  1.57it/s]  

Train loss: 0.16474802792072296


 11%|█         | 621/5701 [11:35<54:30,  1.55it/s]  

Train loss: 0.1527831107378006


 11%|█         | 631/5701 [11:46<53:36,  1.58it/s]  

Train loss: 0.15977749228477478


 11%|█         | 641/5701 [11:58<53:37,  1.57it/s]  

Train loss: 0.16023828089237213


 11%|█▏        | 651/5701 [12:09<53:31,  1.57it/s]  

Train loss: 0.1571686863899231


 12%|█▏        | 661/5701 [12:20<53:12,  1.58it/s]  

Train loss: 0.14991703629493713


 12%|█▏        | 671/5701 [12:31<53:05,  1.58it/s]  

Train loss: 0.1719224900007248


 12%|█▏        | 681/5701 [12:42<53:34,  1.56it/s]  

Train loss: 0.17958587408065796


 12%|█▏        | 691/5701 [12:53<52:53,  1.58it/s]  

Train loss: 0.18337814509868622


 12%|█▏        | 701/5701 [13:04<52:27,  1.59it/s]  

Train loss: 0.14845238626003265


 12%|█▏        | 711/5701 [13:15<53:03,  1.57it/s]  

Train loss: 0.15680579841136932


 13%|█▎        | 721/5701 [13:27<53:08,  1.56it/s]  

Train loss: 0.13808564841747284


 13%|█▎        | 731/5701 [13:38<52:42,  1.57it/s]  

Train loss: 0.13942696154117584


 13%|█▎        | 741/5701 [13:49<52:15,  1.58it/s]  

Train loss: 0.14408040046691895


 13%|█▎        | 751/5701 [14:00<52:27,  1.57it/s]  

Train loss: 0.19418805837631226


 13%|█▎        | 761/5701 [14:10<51:06,  1.61it/s]  

Train loss: 0.17642800509929657


 14%|█▎        | 771/5701 [14:21<52:41,  1.56it/s]  

Train loss: 0.1329791396856308


 14%|█▎        | 781/5701 [14:33<52:05,  1.57it/s]  

Train loss: 0.14931361377239227


 14%|█▍        | 791/5701 [14:44<52:31,  1.56it/s]  

Train loss: 0.16297589242458344


 14%|█▍        | 801/5701 [14:54<51:06,  1.60it/s]  

Train loss: 0.17501503229141235


 14%|█▍        | 811/5701 [15:06<51:47,  1.57it/s]  

Train loss: 0.13657395541667938


 14%|█▍        | 821/5701 [15:16<51:52,  1.57it/s]  

Train loss: 0.15157125890254974


 15%|█▍        | 831/5701 [15:28<52:05,  1.56it/s]  

Train loss: 0.1357986032962799


 15%|█▍        | 841/5701 [15:39<50:59,  1.59it/s]  

Train loss: 0.1587330549955368


 15%|█▍        | 851/5701 [15:50<51:55,  1.56it/s]  

Train loss: 0.1406341940164566


 15%|█▌        | 861/5701 [16:00<50:15,  1.61it/s]  

Train loss: 0.14119045436382294


 15%|█▌        | 871/5701 [16:12<51:35,  1.56it/s]  

Train loss: 0.1343073695898056


 15%|█▌        | 881/5701 [16:23<51:50,  1.55it/s]  

Train loss: 0.13911676406860352


 16%|█▌        | 891/5701 [16:34<50:47,  1.58it/s]  

Train loss: 0.1910322606563568


 16%|█▌        | 901/5701 [16:45<50:48,  1.57it/s]  

Train loss: 0.17359255254268646


 16%|█▌        | 911/5701 [16:56<50:45,  1.57it/s]  

Train loss: 0.15355664491653442


 16%|█▌        | 921/5701 [17:07<50:40,  1.57it/s]  

Train loss: 0.15902188420295715


 16%|█▋        | 931/5701 [17:19<50:50,  1.56it/s]  

Train loss: 0.14567045867443085


 17%|█▋        | 941/5701 [17:30<50:27,  1.57it/s]  

Train loss: 0.18381479382514954


 17%|█▋        | 951/5701 [17:41<50:01,  1.58it/s]  

Train loss: 0.18035349249839783


 17%|█▋        | 961/5701 [17:52<50:12,  1.57it/s]  

Train loss: 0.168137326836586


 17%|█▋        | 971/5701 [18:03<50:34,  1.56it/s]  

Train loss: 0.13825273513793945


 17%|█▋        | 981/5701 [18:14<49:36,  1.59it/s]  

Train loss: 0.1649281233549118


 17%|█▋        | 991/5701 [18:25<50:00,  1.57it/s]  

Train loss: 0.15581654012203217


 18%|█▊        | 1001/5701 [18:36<49:38,  1.58it/s] 

Train loss: 0.1670982986688614


 18%|█▊        | 1011/5701 [18:47<49:36,  1.58it/s]  

Train loss: 0.15703745186328888


 18%|█▊        | 1021/5701 [18:58<49:30,  1.58it/s]  

Train loss: 0.1736871749162674


 18%|█▊        | 1031/5701 [19:09<49:09,  1.58it/s]  

Train loss: 0.16528932750225067


 18%|█▊        | 1041/5701 [19:21<49:48,  1.56it/s]  

Train loss: 0.1519862413406372


 18%|█▊        | 1051/5701 [19:31<48:58,  1.58it/s]  

Train loss: 0.16217942535877228


 19%|█▊        | 1061/5701 [19:42<48:42,  1.59it/s]  

Train loss: 0.1651039719581604


 19%|█▉        | 1071/5701 [19:53<49:24,  1.56it/s]  

Train loss: 0.1484488844871521


 19%|█▉        | 1081/5701 [20:05<50:36,  1.52it/s]  

Train loss: 0.14289766550064087


 19%|█▉        | 1091/5701 [20:16<51:31,  1.49it/s]  

Train loss: 0.1507682055234909


 19%|█▉        | 1101/5701 [20:28<51:35,  1.49it/s]  

Train loss: 0.16882644593715668


 19%|█▉        | 1111/5701 [20:39<52:41,  1.45it/s]  

Train loss: 0.15569010376930237


 20%|█▉        | 1121/5701 [20:50<53:16,  1.43it/s]  

Train loss: 0.16470685601234436


 20%|█▉        | 1131/5701 [21:02<53:55,  1.41it/s]  

Train loss: 0.17294906079769135


 20%|██        | 1141/5701 [21:13<53:04,  1.43it/s]  

Train loss: 0.17624390125274658


 20%|██        | 1151/5701 [21:24<53:46,  1.41it/s]  

Train loss: 0.16791556775569916


 20%|██        | 1161/5701 [21:36<54:33,  1.39it/s]  

Train loss: 0.1501806229352951


 21%|██        | 1171/5701 [21:46<52:57,  1.43it/s]  

Train loss: 0.1277657002210617


 21%|██        | 1181/5701 [21:58<53:34,  1.41it/s]  

Train loss: 0.18488596379756927


 21%|██        | 1191/5701 [22:09<52:27,  1.43it/s]  

Train loss: 0.16487406194210052


 21%|██        | 1201/5701 [22:19<52:16,  1.43it/s]  

Train loss: 0.14392760396003723


 21%|██        | 1211/5701 [22:31<53:44,  1.39it/s]  

Train loss: 0.1662694662809372


 21%|██▏       | 1221/5701 [22:43<53:35,  1.39it/s]  

Train loss: 0.1524633914232254


 22%|██▏       | 1231/5701 [22:52<50:24,  1.48it/s]  

Train loss: 0.16881005465984344


 22%|██▏       | 1241/5701 [23:04<52:54,  1.40it/s]  

Train loss: 0.15313869714736938


 22%|██▏       | 1251/5701 [23:15<52:09,  1.42it/s]  

Train loss: 0.15371903777122498


 22%|██▏       | 1261/5701 [23:27<52:57,  1.40it/s]  

Train loss: 0.16219310462474823


 22%|██▏       | 1271/5701 [23:38<53:11,  1.39it/s]  

Train loss: 0.15406814217567444


 22%|██▏       | 1281/5701 [23:50<52:43,  1.40it/s]  

Train loss: 0.14759725332260132


 23%|██▎       | 1291/5701 [24:00<50:31,  1.45it/s]  

Train loss: 0.1373053938150406


 23%|██▎       | 1301/5701 [24:12<52:55,  1.39it/s]  

Train loss: 0.17070749402046204


 23%|██▎       | 1311/5701 [24:23<50:49,  1.44it/s]  

Train loss: 0.1599702090024948


 23%|██▎       | 1321/5701 [24:34<51:19,  1.42it/s]  

Train loss: 0.17664992809295654


 23%|██▎       | 1331/5701 [24:45<51:06,  1.43it/s]  

Train loss: 0.1486492156982422


 24%|██▎       | 1341/5701 [24:56<51:19,  1.42it/s]  

Train loss: 0.15104661881923676


 24%|██▎       | 1351/5701 [25:07<51:06,  1.42it/s]  

Train loss: 0.1753426194190979


 24%|██▍       | 1361/5701 [25:18<51:17,  1.41it/s]  

Train loss: 0.14424674212932587


 24%|██▍       | 1371/5701 [25:29<50:15,  1.44it/s]  

Train loss: 0.16503861546516418


 24%|██▍       | 1381/5701 [25:40<50:10,  1.43it/s]  

Train loss: 0.15068243443965912


 24%|██▍       | 1391/5701 [25:50<49:54,  1.44it/s]  

Train loss: 0.17230284214019775


 25%|██▍       | 1401/5701 [26:02<51:26,  1.39it/s]  

Train loss: 0.1413438767194748


 25%|██▍       | 1411/5701 [26:13<50:19,  1.42it/s]  

Train loss: 0.1496800035238266


 25%|██▍       | 1421/5701 [26:24<49:15,  1.45it/s]  

Train loss: 0.13054506480693817


 25%|██▌       | 1431/5701 [26:34<49:54,  1.43it/s]  

Train loss: 0.14846336841583252


 25%|██▌       | 1441/5701 [26:46<51:38,  1.38it/s]  

Train loss: 0.1542769968509674


 25%|██▌       | 1451/5701 [26:57<50:05,  1.41it/s]  

Train loss: 0.158472940325737


 26%|██▌       | 1461/5701 [27:08<49:33,  1.43it/s]  

Train loss: 0.14999406039714813


 26%|██▌       | 1471/5701 [27:19<49:48,  1.42it/s]  

Train loss: 0.13711412250995636


 26%|██▌       | 1481/5701 [27:30<48:59,  1.44it/s]  

Train loss: 0.14679460227489471


 26%|██▌       | 1491/5701 [27:41<49:28,  1.42it/s]  

Train loss: 0.14019834995269775


 26%|██▋       | 1501/5701 [27:52<49:48,  1.41it/s]  

Train loss: 0.15264691412448883


 27%|██▋       | 1511/5701 [28:05<51:04,  1.37it/s]  

Train loss: 0.1764814704656601


 27%|██▋       | 1521/5701 [28:16<49:46,  1.40it/s]  

Train loss: 0.14768977463245392


 27%|██▋       | 1531/5701 [28:26<46:15,  1.50it/s]  

Train loss: 0.13555192947387695


 27%|██▋       | 1541/5701 [28:38<50:18,  1.38it/s]  

Train loss: 0.1441345363855362


 27%|██▋       | 1551/5701 [28:49<48:38,  1.42it/s]  

Train loss: 0.16475553810596466


 27%|██▋       | 1561/5701 [29:00<49:34,  1.39it/s]  

Train loss: 0.14159062504768372


 28%|██▊       | 1571/5701 [29:11<47:33,  1.45it/s]  

Train loss: 0.1659919172525406


 28%|██▊       | 1581/5701 [29:22<48:31,  1.42it/s]  

Train loss: 0.15320968627929688


 28%|██▊       | 1591/5701 [29:32<47:35,  1.44it/s]  

Train loss: 0.18072430789470673


 28%|██▊       | 1601/5701 [29:44<48:33,  1.41it/s]  

Train loss: 0.15652355551719666


 28%|██▊       | 1611/5701 [29:55<48:25,  1.41it/s]  

Train loss: 0.14335261285305023


 28%|██▊       | 1621/5701 [30:06<47:31,  1.43it/s]  

Train loss: 0.14101503789424896


 29%|██▊       | 1631/5701 [30:18<49:23,  1.37it/s]  

Train loss: 0.1521506905555725


 29%|██▉       | 1641/5701 [30:28<46:18,  1.46it/s]  

Train loss: 0.16576580703258514


 29%|██▉       | 1651/5701 [30:39<48:04,  1.40it/s]  

Train loss: 0.15232329070568085


 29%|██▉       | 1661/5701 [30:51<48:46,  1.38it/s]  

Train loss: 0.15173621475696564


 29%|██▉       | 1671/5701 [31:02<47:46,  1.41it/s]  

Train loss: 0.14829805493354797


 29%|██▉       | 1681/5701 [31:13<47:21,  1.41it/s]  

Train loss: 0.13245443999767303


 30%|██▉       | 1691/5701 [31:25<47:11,  1.42it/s]  

Train loss: 0.14593544602394104


 30%|██▉       | 1701/5701 [31:36<47:46,  1.40it/s]  

Train loss: 0.13868360221385956


 30%|███       | 1711/5701 [31:45<43:43,  1.52it/s]  

Train loss: 0.1618683636188507


 30%|███       | 1721/5701 [31:57<47:56,  1.38it/s]  

Train loss: 0.15496449172496796


 30%|███       | 1731/5701 [32:09<47:29,  1.39it/s]  

Train loss: 0.14730560779571533


 31%|███       | 1741/5701 [32:20<47:04,  1.40it/s]  

Train loss: 0.14845596253871918


 31%|███       | 1751/5701 [32:31<45:50,  1.44it/s]  

Train loss: 0.13756541907787323


 31%|███       | 1761/5701 [32:42<46:25,  1.41it/s]  

Train loss: 0.15798379480838776


 31%|███       | 1771/5701 [32:53<45:45,  1.43it/s]  

Train loss: 0.1491490751504898


 31%|███       | 1781/5701 [33:04<46:26,  1.41it/s]  

Train loss: 0.15465371310710907


 31%|███▏      | 1791/5701 [33:15<45:42,  1.43it/s]  

Train loss: 0.1558992713689804


 32%|███▏      | 1801/5701 [33:26<45:47,  1.42it/s]  

Train loss: 0.13246609270572662


 32%|███▏      | 1811/5701 [33:37<44:56,  1.44it/s]  

Train loss: 0.15372759103775024


 32%|███▏      | 1821/5701 [33:48<45:16,  1.43it/s]  

Train loss: 0.14148865640163422


 32%|███▏      | 1831/5701 [33:59<45:16,  1.42it/s]  

Train loss: 0.12855909764766693


 32%|███▏      | 1841/5701 [34:10<46:16,  1.39it/s]  

Train loss: 0.13180193305015564


 32%|███▏      | 1851/5701 [34:21<44:13,  1.45it/s]  

Train loss: 0.14524748921394348


 33%|███▎      | 1861/5701 [34:31<44:16,  1.45it/s]  

Train loss: 0.1282617151737213


 33%|███▎      | 1871/5701 [34:43<45:58,  1.39it/s]  

Train loss: 0.17497378587722778


 33%|███▎      | 1881/5701 [34:55<45:51,  1.39it/s]  

Train loss: 0.14939244091510773


 33%|███▎      | 1891/5701 [35:06<44:47,  1.42it/s]  

Train loss: 0.13928578794002533


 33%|███▎      | 1901/5701 [35:16<43:11,  1.47it/s]  

Train loss: 0.13920818269252777


 34%|███▎      | 1911/5701 [35:27<45:39,  1.38it/s]  

Train loss: 0.15678241848945618


 34%|███▎      | 1921/5701 [35:39<44:22,  1.42it/s]  

Train loss: 0.1623648852109909


 34%|███▍      | 1931/5701 [35:49<43:55,  1.43it/s]  

Train loss: 0.15390340983867645


 34%|███▍      | 1941/5701 [36:01<44:50,  1.40it/s]  

Train loss: 0.15014532208442688


 34%|███▍      | 1951/5701 [36:12<44:11,  1.41it/s]  

Train loss: 0.16803425550460815


 34%|███▍      | 1961/5701 [36:23<44:33,  1.40it/s]  

Train loss: 0.15534041821956635


 35%|███▍      | 1971/5701 [36:33<41:51,  1.49it/s]  

Train loss: 0.15031519532203674


 35%|███▍      | 1981/5701 [36:45<45:00,  1.38it/s]  

Train loss: 0.15066790580749512


 35%|███▍      | 1991/5701 [36:57<44:27,  1.39it/s]  

Train loss: 0.16713523864746094


 35%|███▌      | 2001/5701 [37:07<42:24,  1.45it/s]  

Train loss: 0.15959268808364868


 35%|███▌      | 2011/5701 [37:18<43:25,  1.42it/s]  

Train loss: 0.1264268159866333


 35%|███▌      | 2021/5701 [37:30<43:44,  1.40it/s]  

Train loss: 0.17218099534511566


 36%|███▌      | 2031/5701 [37:41<44:02,  1.39it/s]  

Train loss: 0.1668521761894226


 36%|███▌      | 2041/5701 [37:52<42:26,  1.44it/s]  

Train loss: 0.14942945539951324


 36%|███▌      | 2051/5701 [38:02<41:49,  1.45it/s]  

Train loss: 0.1602707952260971


 36%|███▌      | 2061/5701 [38:13<42:18,  1.43it/s]  

Train loss: 0.15777307748794556


 36%|███▋      | 2071/5701 [38:24<42:19,  1.43it/s]  

Train loss: 0.16426068544387817


 37%|███▋      | 2081/5701 [38:36<43:43,  1.38it/s]  

Train loss: 0.17264719307422638


 37%|███▋      | 2091/5701 [38:46<40:20,  1.49it/s]  

Train loss: 0.15755902230739594


 37%|███▋      | 2101/5701 [38:57<42:30,  1.41it/s]  

Train loss: 0.15189075469970703


 37%|███▋      | 2111/5701 [39:08<42:41,  1.40it/s]  

Train loss: 0.1305733025074005


 37%|███▋      | 2121/5701 [39:20<42:55,  1.39it/s]  

Train loss: 0.14992915093898773


 37%|███▋      | 2131/5701 [39:31<41:28,  1.43it/s]  

Train loss: 0.1611790508031845


 38%|███▊      | 2141/5701 [39:42<41:40,  1.42it/s]  

Train loss: 0.13521280884742737


 38%|███▊      | 2151/5701 [39:52<40:48,  1.45it/s]  

Train loss: 0.14403723180294037


 38%|███▊      | 2161/5701 [40:04<41:58,  1.41it/s]  

Train loss: 0.14536228775978088


 38%|███▊      | 2171/5701 [40:16<42:42,  1.38it/s]  

Train loss: 0.14715801179409027


 38%|███▊      | 2181/5701 [40:27<41:24,  1.42it/s]  

Train loss: 0.1455351859331131


 38%|███▊      | 2191/5701 [40:37<40:19,  1.45it/s]  

Train loss: 0.1460273563861847


 39%|███▊      | 2201/5701 [40:49<41:42,  1.40it/s]  

Train loss: 0.15610089898109436


 39%|███▉      | 2211/5701 [41:00<41:21,  1.41it/s]  

Train loss: 0.14926446974277496


 39%|███▉      | 2221/5701 [41:11<40:44,  1.42it/s]  

Train loss: 0.1563594788312912


 39%|███▉      | 2231/5701 [41:22<41:16,  1.40it/s]  

Train loss: 0.15454404056072235


 39%|███▉      | 2241/5701 [41:32<39:20,  1.47it/s]  

Train loss: 0.1764141023159027


 39%|███▉      | 2251/5701 [41:43<40:24,  1.42it/s]  

Train loss: 0.15996964275836945


 40%|███▉      | 2261/5701 [41:54<39:35,  1.45it/s]  

Train loss: 0.17156052589416504


 40%|███▉      | 2271/5701 [42:05<41:19,  1.38it/s]  

Train loss: 0.15222732722759247


 40%|████      | 2281/5701 [42:16<40:18,  1.41it/s]  

Train loss: 0.16642503440380096


 40%|████      | 2291/5701 [42:27<40:06,  1.42it/s]  

Train loss: 0.14494846761226654


 40%|████      | 2301/5701 [42:39<41:02,  1.38it/s]  

Train loss: 0.14145678281784058


 41%|████      | 2311/5701 [42:50<39:12,  1.44it/s]  

Train loss: 0.13714900612831116


 41%|████      | 2321/5701 [43:01<39:33,  1.42it/s]  

Train loss: 0.1343899816274643


 41%|████      | 2331/5701 [43:12<40:18,  1.39it/s]  

Train loss: 0.15264640748500824


 41%|████      | 2341/5701 [43:23<39:32,  1.42it/s]  

Train loss: 0.16169524192810059


 41%|████      | 2351/5701 [43:35<40:21,  1.38it/s]  

Train loss: 0.1657719910144806


 41%|████▏     | 2361/5701 [43:46<38:59,  1.43it/s]  

Train loss: 0.13962416350841522


 42%|████▏     | 2371/5701 [43:57<39:33,  1.40it/s]  

Train loss: 0.15743674337863922


 42%|████▏     | 2381/5701 [44:08<38:14,  1.45it/s]  

Train loss: 0.13476316630840302


 42%|████▏     | 2391/5701 [44:19<39:46,  1.39it/s]  

Train loss: 0.1486871987581253


 42%|████▏     | 2401/5701 [44:30<38:16,  1.44it/s]  

Train loss: 0.15822866559028625


 42%|████▏     | 2411/5701 [44:40<37:36,  1.46it/s]  

Train loss: 0.14847201108932495


 42%|████▏     | 2421/5701 [44:51<38:26,  1.42it/s]  

Train loss: 0.13967566192150116


 43%|████▎     | 2431/5701 [45:03<39:19,  1.39it/s]  

Train loss: 0.13976208865642548


 43%|████▎     | 2441/5701 [45:14<38:31,  1.41it/s]  

Train loss: 0.15302257239818573


 43%|████▎     | 2451/5701 [45:25<37:58,  1.43it/s]  

Train loss: 0.13228964805603027


 43%|████▎     | 2461/5701 [45:37<38:28,  1.40it/s]  

Train loss: 0.1416240781545639


 43%|████▎     | 2471/5701 [45:47<37:43,  1.43it/s]  

Train loss: 0.14123223721981049


 44%|████▎     | 2481/5701 [45:58<37:18,  1.44it/s]  

Train loss: 0.14334170520305634


 44%|████▎     | 2491/5701 [46:09<37:35,  1.42it/s]  

Train loss: 0.16214263439178467


 44%|████▍     | 2501/5701 [46:21<39:09,  1.36it/s]  

Train loss: 0.1585519015789032


 44%|████▍     | 2511/5701 [46:32<37:10,  1.43it/s]  

Train loss: 0.1445060521364212


 44%|████▍     | 2521/5701 [46:43<37:19,  1.42it/s]  

Train loss: 0.1499386876821518


 44%|████▍     | 2531/5701 [46:54<36:37,  1.44it/s]  

Train loss: 0.15971408784389496


 45%|████▍     | 2541/5701 [47:05<37:31,  1.40it/s]  

Train loss: 0.13590563833713531


 45%|████▍     | 2551/5701 [47:17<37:31,  1.40it/s]  

Train loss: 0.16876067221164703


 45%|████▍     | 2561/5701 [47:28<36:49,  1.42it/s]  

Train loss: 0.15805013477802277


 45%|████▌     | 2571/5701 [47:38<35:27,  1.47it/s]  

Train loss: 0.13732969760894775


 45%|████▌     | 2581/5701 [47:50<37:22,  1.39it/s]  

Train loss: 0.16184701025485992


 45%|████▌     | 2591/5701 [48:00<36:21,  1.43it/s]  

Train loss: 0.13085876405239105


 46%|████▌     | 2601/5701 [48:12<36:52,  1.40it/s]  

Train loss: 0.164137601852417


 46%|████▌     | 2611/5701 [48:23<36:03,  1.43it/s]  

Train loss: 0.1598483920097351


 46%|████▌     | 2621/5701 [48:34<36:38,  1.40it/s]  

Train loss: 0.14776961505413055


 46%|████▌     | 2631/5701 [48:45<36:09,  1.41it/s]  

Train loss: 0.13707254827022552


 46%|████▋     | 2641/5701 [48:55<34:20,  1.48it/s]  

Train loss: 0.14572109282016754


 47%|████▋     | 2651/5701 [49:07<36:14,  1.40it/s]  

Train loss: 0.14457030594348907


 47%|████▋     | 2661/5701 [49:19<37:18,  1.36it/s]  

Train loss: 0.1505354344844818


 47%|████▋     | 2671/5701 [49:29<34:50,  1.45it/s]  

Train loss: 0.14541871845722198


 47%|████▋     | 2681/5701 [49:41<36:19,  1.39it/s]  

Train loss: 0.16147172451019287


 47%|████▋     | 2691/5701 [49:51<34:15,  1.46it/s]  

Train loss: 0.13441763818264008


 47%|████▋     | 2701/5701 [50:03<36:08,  1.38it/s]  

Train loss: 0.13847211003303528


 48%|████▊     | 2711/5701 [50:14<34:51,  1.43it/s]  

Train loss: 0.14772146940231323


 48%|████▊     | 2721/5701 [50:26<35:47,  1.39it/s]  

Train loss: 0.13975222408771515


 48%|████▊     | 2731/5701 [50:37<34:46,  1.42it/s]  

Train loss: 0.1726793497800827


 48%|████▊     | 2741/5701 [50:48<34:39,  1.42it/s]  

Train loss: 0.14277999103069305


 48%|████▊     | 2751/5701 [50:59<34:35,  1.42it/s]  

Train loss: 0.13961480557918549


 48%|████▊     | 2761/5701 [51:10<34:17,  1.43it/s]  

Train loss: 0.1726699322462082


 49%|████▊     | 2771/5701 [51:21<34:43,  1.41it/s]  

Train loss: 0.15222789347171783


 49%|████▉     | 2781/5701 [51:32<33:53,  1.44it/s]  

Train loss: 0.14890778064727783


 49%|████▉     | 2791/5701 [51:43<34:38,  1.40it/s]  

Train loss: 0.13795192539691925


 49%|████▉     | 2801/5701 [51:54<34:17,  1.41it/s]  

Train loss: 0.14428485929965973


 49%|████▉     | 2811/5701 [52:05<32:58,  1.46it/s]  

Train loss: 0.1687413454055786


 49%|████▉     | 2821/5701 [52:16<33:48,  1.42it/s]  

Train loss: 0.13183192908763885


 50%|████▉     | 2831/5701 [52:27<34:30,  1.39it/s]  

Train loss: 0.1357547640800476


 50%|████▉     | 2841/5701 [52:38<33:13,  1.43it/s]  

Train loss: 0.13872520625591278


 50%|█████     | 2851/5701 [52:50<34:04,  1.39it/s]  

Train loss: 0.16132520139217377


 50%|█████     | 2861/5701 [53:00<32:46,  1.44it/s]  

Train loss: 0.1277078539133072


 50%|█████     | 2871/5701 [53:11<32:57,  1.43it/s]  

Train loss: 0.1652001142501831


 51%|█████     | 2881/5701 [53:23<33:49,  1.39it/s]  

Train loss: 0.15131664276123047


 51%|█████     | 2891/5701 [53:34<32:44,  1.43it/s]  

Train loss: 0.19222168624401093


 51%|█████     | 2901/5701 [53:45<33:33,  1.39it/s]  

Train loss: 0.1571289598941803


 51%|█████     | 2911/5701 [53:55<31:48,  1.46it/s]  

Train loss: 0.16927018761634827


 51%|█████     | 2921/5701 [54:07<33:16,  1.39it/s]  

Train loss: 0.1315656453371048


 51%|█████▏    | 2931/5701 [54:17<31:38,  1.46it/s]  

Train loss: 0.16916897892951965


 52%|█████▏    | 2941/5701 [54:29<32:31,  1.41it/s]  

Train loss: 0.13734187185764313


 52%|█████▏    | 2951/5701 [54:40<32:34,  1.41it/s]  

Train loss: 0.13620233535766602


 52%|█████▏    | 2961/5701 [54:51<31:55,  1.43it/s]  

Train loss: 0.16916845738887787


 52%|█████▏    | 2971/5701 [55:02<32:29,  1.40it/s]  

Train loss: 0.13987885415554047


 52%|█████▏    | 2981/5701 [55:13<31:40,  1.43it/s]  

Train loss: 0.1529027670621872


 52%|█████▏    | 2991/5701 [55:25<32:57,  1.37it/s]  

Train loss: 0.13138119876384735


 53%|█████▎    | 3001/5701 [55:36<31:30,  1.43it/s]  

Train loss: 0.13648252189159393


 53%|█████▎    | 3011/5701 [55:47<31:11,  1.44it/s]  

Train loss: 0.16062267124652863


 53%|█████▎    | 3021/5701 [55:58<31:38,  1.41it/s]  

Train loss: 0.1333216279745102


 53%|█████▎    | 3031/5701 [56:08<30:39,  1.45it/s]  

Train loss: 0.13950364291667938


 53%|█████▎    | 3041/5701 [56:20<31:54,  1.39it/s]  

Train loss: 0.12800948321819305


 54%|█████▎    | 3051/5701 [56:30<30:48,  1.43it/s]  

Train loss: 0.13639451563358307


 54%|█████▎    | 3061/5701 [56:41<30:50,  1.43it/s]  

Train loss: 0.15906968712806702


 54%|█████▍    | 3071/5701 [56:52<30:57,  1.42it/s]  

Train loss: 0.14183983206748962


 54%|█████▍    | 3081/5701 [57:04<31:17,  1.40it/s]  

Train loss: 0.1307276487350464


 54%|█████▍    | 3091/5701 [57:14<29:21,  1.48it/s]  

Train loss: 0.1576886922121048


 54%|█████▍    | 3101/5701 [57:26<31:35,  1.37it/s]  

Train loss: 0.1659865379333496


 55%|█████▍    | 3111/5701 [57:36<29:53,  1.44it/s]  

Train loss: 0.15377631783485413


 55%|█████▍    | 3121/5701 [57:48<30:41,  1.40it/s]  

Train loss: 0.16277635097503662


 55%|█████▍    | 3131/5701 [57:59<30:29,  1.40it/s]  

Train loss: 0.1520160287618637


 55%|█████▌    | 3141/5701 [58:11<30:47,  1.39it/s]  

Train loss: 0.13454699516296387


 55%|█████▌    | 3151/5701 [58:22<29:54,  1.42it/s]  

Train loss: 0.1678268164396286


 55%|█████▌    | 3161/5701 [58:33<30:19,  1.40it/s]  

Train loss: 0.16213363409042358


 56%|█████▌    | 3171/5701 [58:43<28:05,  1.50it/s]  

Train loss: 0.13831666111946106


 56%|█████▌    | 3181/5701 [58:54<29:55,  1.40it/s]  

Train loss: 0.13776464760303497


 56%|█████▌    | 3191/5701 [59:06<29:36,  1.41it/s]  

Train loss: 0.14554761350154877


 56%|█████▌    | 3201/5701 [59:16<28:59,  1.44it/s]  

Train loss: 0.16640017926692963


 56%|█████▋    | 3211/5701 [59:26<28:05,  1.48it/s]  

Train loss: 0.1544910967350006


 56%|█████▋    | 3221/5701 [59:38<29:20,  1.41it/s]  

Train loss: 0.12397892773151398


 57%|█████▋    | 3231/5701 [59:49<29:40,  1.39it/s]  

Train loss: 0.12658053636550903


 57%|█████▋    | 3241/5701 [1:00:00<28:42,  1.43it/s]

Train loss: 0.16831889748573303


 57%|█████▋    | 3251/5701 [1:00:11<28:57,  1.41it/s]  

Train loss: 0.14585323631763458


 57%|█████▋    | 3261/5701 [1:00:23<29:42,  1.37it/s]  

Train loss: 0.15001845359802246


 57%|█████▋    | 3271/5701 [1:00:34<28:15,  1.43it/s]  

Train loss: 0.14990729093551636


 58%|█████▊    | 3281/5701 [1:00:45<28:44,  1.40it/s]  

Train loss: 0.15316717326641083


 58%|█████▊    | 3291/5701 [1:00:57<28:36,  1.40it/s]  

Train loss: 0.16074122488498688


 58%|█████▊    | 3301/5701 [1:01:08<28:04,  1.43it/s]  

Train loss: 0.15990136563777924


 58%|█████▊    | 3311/5701 [1:01:17<26:38,  1.50it/s]  

Train loss: 0.1478946954011917


 58%|█████▊    | 3321/5701 [1:01:29<28:14,  1.40it/s]  

Train loss: 0.15328127145767212


 58%|█████▊    | 3331/5701 [1:01:41<28:59,  1.36it/s]  

Train loss: 0.1095389872789383


 59%|█████▊    | 3341/5701 [1:01:50<26:00,  1.51it/s]  

Train loss: 0.16196826100349426


 59%|█████▉    | 3351/5701 [1:02:02<27:52,  1.40it/s]  

Train loss: 0.15424898266792297


 59%|█████▉    | 3361/5701 [1:02:13<27:29,  1.42it/s]  

Train loss: 0.13603492081165314


 59%|█████▉    | 3371/5701 [1:02:24<27:19,  1.42it/s]  

Train loss: 0.15441690385341644


 59%|█████▉    | 3381/5701 [1:02:35<28:02,  1.38it/s]  

Train loss: 0.14489398896694183


 59%|█████▉    | 3391/5701 [1:02:47<27:15,  1.41it/s]  

Train loss: 0.11325480043888092


 60%|█████▉    | 3401/5701 [1:02:57<26:40,  1.44it/s]  

Train loss: 0.17526832222938538


 60%|█████▉    | 3411/5701 [1:03:09<27:22,  1.39it/s]  

Train loss: 0.1373162418603897


 60%|██████    | 3421/5701 [1:03:19<25:56,  1.46it/s]  

Train loss: 0.1385909616947174


 60%|██████    | 3431/5701 [1:03:31<27:04,  1.40it/s]  

Train loss: 0.12492605298757553


 60%|██████    | 3441/5701 [1:03:41<26:11,  1.44it/s]  

Train loss: 0.14718063175678253


 61%|██████    | 3451/5701 [1:03:52<26:32,  1.41it/s]  

Train loss: 0.13225989043712616


 61%|██████    | 3461/5701 [1:04:04<26:15,  1.42it/s]  

Train loss: 0.13536036014556885


 61%|██████    | 3471/5701 [1:04:15<26:23,  1.41it/s]  

Train loss: 0.1585049331188202


 61%|██████    | 3481/5701 [1:04:26<25:49,  1.43it/s]  

Train loss: 0.1446845382452011


 61%|██████    | 3491/5701 [1:04:38<26:53,  1.37it/s]  

Train loss: 0.13638795912265778


 61%|██████▏   | 3501/5701 [1:04:48<25:24,  1.44it/s]  

Train loss: 0.14242635667324066


 62%|██████▏   | 3511/5701 [1:04:59<25:26,  1.43it/s]  

Train loss: 0.13111630082130432


 62%|██████▏   | 3521/5701 [1:05:10<25:52,  1.40it/s]  

Train loss: 0.15925416350364685


 62%|██████▏   | 3531/5701 [1:05:22<25:28,  1.42it/s]  

Train loss: 0.11634600162506104


 62%|██████▏   | 3541/5701 [1:05:33<25:17,  1.42it/s]  

Train loss: 0.13780269026756287


 62%|██████▏   | 3551/5701 [1:05:44<25:30,  1.40it/s]  

Train loss: 0.13793008029460907


 62%|██████▏   | 3561/5701 [1:05:55<24:40,  1.45it/s]  

Train loss: 0.1656203418970108


 63%|██████▎   | 3571/5701 [1:06:05<24:27,  1.45it/s]  

Train loss: 0.15200532972812653


 63%|██████▎   | 3581/5701 [1:06:16<24:58,  1.41it/s]  

Train loss: 0.1556561291217804


 63%|██████▎   | 3591/5701 [1:06:27<24:22,  1.44it/s]  

Train loss: 0.1527428776025772


 63%|██████▎   | 3601/5701 [1:06:38<25:01,  1.40it/s]  

Train loss: 0.14423726499080658


 63%|██████▎   | 3611/5701 [1:06:49<24:25,  1.43it/s]  

Train loss: 0.13669677078723907


 64%|██████▎   | 3621/5701 [1:07:00<24:12,  1.43it/s]  

Train loss: 0.18776392936706543


 64%|██████▎   | 3631/5701 [1:07:11<24:04,  1.43it/s]  

Train loss: 0.1385166049003601


 64%|██████▍   | 3641/5701 [1:07:23<24:59,  1.37it/s]  

Train loss: 0.1242586150765419


 64%|██████▍   | 3651/5701 [1:07:32<23:02,  1.48it/s]  

Train loss: 0.1454993635416031


 64%|██████▍   | 3661/5701 [1:07:44<24:12,  1.40it/s]  

Train loss: 0.1290062665939331


 64%|██████▍   | 3671/5701 [1:07:55<23:57,  1.41it/s]  

Train loss: 0.13437651097774506


 65%|██████▍   | 3681/5701 [1:08:06<23:45,  1.42it/s]  

Train loss: 0.14918065071105957


 65%|██████▍   | 3691/5701 [1:08:18<24:26,  1.37it/s]  

Train loss: 0.1393420696258545


 65%|██████▍   | 3701/5701 [1:08:29<23:31,  1.42it/s]  

Train loss: 0.14253421127796173


 65%|██████▌   | 3711/5701 [1:08:40<23:15,  1.43it/s]  

Train loss: 0.14093837141990662


 65%|██████▌   | 3721/5701 [1:08:51<23:06,  1.43it/s]  

Train loss: 0.13756214082241058


 65%|██████▌   | 3731/5701 [1:09:02<23:20,  1.41it/s]  

Train loss: 0.14122122526168823


 66%|██████▌   | 3741/5701 [1:09:13<22:50,  1.43it/s]  

Train loss: 0.12617410719394684


 66%|██████▌   | 3751/5701 [1:09:24<22:36,  1.44it/s]  

Train loss: 0.1270476132631302


 66%|██████▌   | 3761/5701 [1:09:35<22:40,  1.43it/s]  

Train loss: 0.13867172598838806


 66%|██████▌   | 3771/5701 [1:09:45<22:30,  1.43it/s]  

Train loss: 0.13960975408554077


 66%|██████▋   | 3781/5701 [1:09:57<22:44,  1.41it/s]  

Train loss: 0.14923863112926483


 66%|██████▋   | 3791/5701 [1:10:08<22:32,  1.41it/s]  

Train loss: 0.14907650649547577


 67%|██████▋   | 3801/5701 [1:10:19<24:04,  1.31it/s]  

Train loss: 0.1357761025428772


 67%|██████▋   | 3811/5701 [1:10:30<22:18,  1.41it/s]  

Train loss: 0.15213970839977264


 67%|██████▋   | 3821/5701 [1:10:42<22:30,  1.39it/s]  

Train loss: 0.16232971847057343


 67%|██████▋   | 3831/5701 [1:10:53<22:04,  1.41it/s]  

Train loss: 0.15012696385383606


 67%|██████▋   | 3841/5701 [1:11:04<21:41,  1.43it/s]  

Train loss: 0.1562824696302414


 68%|██████▊   | 3851/5701 [1:11:15<21:45,  1.42it/s]  

Train loss: 0.12890295684337616


 68%|██████▊   | 3861/5701 [1:11:26<22:15,  1.38it/s]  

Train loss: 0.16022495925426483


 68%|██████▊   | 3871/5701 [1:11:37<23:23,  1.30it/s]  

Train loss: 0.11974123865365982


 68%|██████▊   | 3881/5701 [1:11:48<23:10,  1.31it/s]  

Train loss: 0.16755320131778717


 68%|██████▊   | 3891/5701 [1:11:59<23:08,  1.30it/s]

Train loss: 0.12453088909387589


 68%|██████▊   | 3901/5701 [1:12:10<23:59,  1.25it/s]  

Train loss: 0.14378376305103302


 69%|██████▊   | 3911/5701 [1:12:21<22:49,  1.31it/s]  

Train loss: 0.15805254876613617


 69%|██████▉   | 3921/5701 [1:12:32<25:51,  1.15it/s]

Train loss: 0.15963158011436462


 69%|██████▉   | 3931/5701 [1:12:43<23:03,  1.28it/s]

Train loss: 0.1272038072347641


 69%|██████▉   | 3941/5701 [1:12:54<23:23,  1.25it/s]  

Train loss: 0.1440843790769577


 69%|██████▉   | 3951/5701 [1:13:05<20:51,  1.40it/s]  

Train loss: 0.135725736618042


 69%|██████▉   | 3961/5701 [1:13:15<20:15,  1.43it/s]  

Train loss: 0.11880870163440704


 70%|██████▉   | 3971/5701 [1:13:27<21:02,  1.37it/s]  

Train loss: 0.1414722502231598


 70%|██████▉   | 3981/5701 [1:13:38<19:55,  1.44it/s]  

Train loss: 0.13641414046287537


 70%|███████   | 3991/5701 [1:13:50<20:34,  1.38it/s]  

Train loss: 0.12839852273464203


 70%|███████   | 4001/5701 [1:14:00<19:42,  1.44it/s]

Train loss: 0.1284683346748352


 70%|███████   | 4011/5701 [1:14:12<19:58,  1.41it/s]  

Train loss: 0.16369962692260742


 71%|███████   | 4021/5701 [1:14:22<19:14,  1.45it/s]

Train loss: 0.14567740261554718


 71%|███████   | 4031/5701 [1:14:33<19:51,  1.40it/s]  

Train loss: 0.1458817422389984


 71%|███████   | 4041/5701 [1:14:44<19:23,  1.43it/s]  

Train loss: 0.15658541023731232


 71%|███████   | 4051/5701 [1:14:55<19:10,  1.43it/s]

Train loss: 0.18394260108470917


 71%|███████   | 4061/5701 [1:15:06<19:30,  1.40it/s]  

Train loss: 0.14793916046619415


 71%|███████▏  | 4071/5701 [1:15:17<18:59,  1.43it/s]

Train loss: 0.11957956850528717


 72%|███████▏  | 4081/5701 [1:15:28<18:41,  1.44it/s]

Train loss: 0.13887716829776764


 72%|███████▏  | 4091/5701 [1:15:39<19:21,  1.39it/s]  

Train loss: 0.1551138013601303


 72%|███████▏  | 4101/5701 [1:15:50<18:44,  1.42it/s]

Train loss: 0.13081970810890198


 72%|███████▏  | 4111/5701 [1:16:01<18:23,  1.44it/s]

Train loss: 0.15091602504253387


 72%|███████▏  | 4121/5701 [1:16:13<18:59,  1.39it/s]  

Train loss: 0.11929550021886826


 72%|███████▏  | 4131/5701 [1:16:24<18:34,  1.41it/s]  

Train loss: 0.14437487721443176


 73%|███████▎  | 4141/5701 [1:16:35<18:06,  1.44it/s]

Train loss: 0.13321129977703094


 73%|███████▎  | 4151/5701 [1:16:45<18:00,  1.43it/s]

Train loss: 0.14265790581703186


 73%|███████▎  | 4161/5701 [1:16:57<18:16,  1.40it/s]

Train loss: 0.1382429152727127


 73%|███████▎  | 4171/5701 [1:17:07<17:51,  1.43it/s]

Train loss: 0.125362366437912


 73%|███████▎  | 4181/5701 [1:17:19<18:09,  1.40it/s]  

Train loss: 0.16971825063228607


 74%|███████▎  | 4191/5701 [1:17:29<16:49,  1.50it/s]

Train loss: 0.14000199735164642


 74%|███████▎  | 4201/5701 [1:17:40<17:50,  1.40it/s]

Train loss: 0.12227784842252731


 74%|███████▍  | 4211/5701 [1:17:51<17:29,  1.42it/s]

Train loss: 0.14366048574447632


 74%|███████▍  | 4221/5701 [1:18:03<17:55,  1.38it/s]  

Train loss: 0.12586894631385803


 74%|███████▍  | 4231/5701 [1:18:13<16:45,  1.46it/s]

Train loss: 0.1528482437133789


 74%|███████▍  | 4241/5701 [1:18:25<17:25,  1.40it/s]

Train loss: 0.1329294741153717


 75%|███████▍  | 4251/5701 [1:18:36<17:07,  1.41it/s]

Train loss: 0.16401606798171997


 75%|███████▍  | 4261/5701 [1:18:46<16:34,  1.45it/s]

Train loss: 0.15697114169597626


 75%|███████▍  | 4271/5701 [1:18:58<17:14,  1.38it/s]

Train loss: 0.14086559414863586


 75%|███████▌  | 4281/5701 [1:19:08<16:06,  1.47it/s]

Train loss: 0.12225341796875


 75%|███████▌  | 4291/5701 [1:19:20<17:04,  1.38it/s]

Train loss: 0.1653900444507599


 75%|███████▌  | 4301/5701 [1:19:31<16:36,  1.40it/s]

Train loss: 0.12791842222213745


 76%|███████▌  | 4311/5701 [1:19:43<16:27,  1.41it/s]

Train loss: 0.1262175291776657


 76%|███████▌  | 4321/5701 [1:19:52<15:30,  1.48it/s]

Train loss: 0.09746335446834564


 76%|███████▌  | 4331/5701 [1:20:04<16:11,  1.41it/s]

Train loss: 0.1404440701007843


 76%|███████▌  | 4341/5701 [1:20:15<16:21,  1.39it/s]

Train loss: 0.13072924315929413


 76%|███████▋  | 4351/5701 [1:20:26<15:48,  1.42it/s]

Train loss: 0.1501063108444214


 76%|███████▋  | 4361/5701 [1:20:37<15:40,  1.43it/s]

Train loss: 0.16716665029525757


 77%|███████▋  | 4371/5701 [1:20:48<15:35,  1.42it/s]

Train loss: 0.15510182082653046


 77%|███████▋  | 4381/5701 [1:21:00<15:53,  1.38it/s]

Train loss: 0.13455085456371307


 77%|███████▋  | 4391/5701 [1:21:11<15:26,  1.41it/s]

Train loss: 0.14813946187496185


 77%|███████▋  | 4401/5701 [1:21:21<14:44,  1.47it/s]

Train loss: 0.14344249665737152


 77%|███████▋  | 4411/5701 [1:21:33<15:28,  1.39it/s]

Train loss: 0.1492028683423996


 78%|███████▊  | 4421/5701 [1:21:44<15:02,  1.42it/s]

Train loss: 0.15416677296161652


 78%|███████▊  | 4431/5701 [1:21:55<14:52,  1.42it/s]

Train loss: 0.13918985426425934


 78%|███████▊  | 4441/5701 [1:22:07<15:09,  1.38it/s]

Train loss: 0.13242658972740173


 78%|███████▊  | 4451/5701 [1:22:17<13:59,  1.49it/s]

Train loss: 0.1555555909872055


 78%|███████▊  | 4461/5701 [1:22:29<14:59,  1.38it/s]

Train loss: 0.1478862166404724


 78%|███████▊  | 4471/5701 [1:22:39<14:09,  1.45it/s]

Train loss: 0.11583874374628067


 79%|███████▊  | 4481/5701 [1:22:50<14:29,  1.40it/s]

Train loss: 0.14523397386074066


 79%|███████▉  | 4491/5701 [1:23:01<13:54,  1.45it/s]

Train loss: 0.13775759935379028


 79%|███████▉  | 4501/5701 [1:23:13<14:27,  1.38it/s]

Train loss: 0.1441507339477539


 79%|███████▉  | 4511/5701 [1:23:24<14:12,  1.40it/s]

Train loss: 0.12709741294384003


 79%|███████▉  | 4521/5701 [1:23:35<13:38,  1.44it/s]

Train loss: 0.15700854361057281


 79%|███████▉  | 4531/5701 [1:23:47<14:06,  1.38it/s]

Train loss: 0.14566738903522491


 80%|███████▉  | 4541/5701 [1:23:56<13:05,  1.48it/s]

Train loss: 0.15796580910682678


 80%|███████▉  | 4551/5701 [1:24:07<13:22,  1.43it/s]

Train loss: 0.14244449138641357


 80%|████████  | 4561/5701 [1:24:18<13:32,  1.40it/s]

Train loss: 0.13521917164325714


 80%|████████  | 4571/5701 [1:24:29<13:13,  1.42it/s]

Train loss: 0.1623523086309433


 80%|████████  | 4581/5701 [1:24:40<13:08,  1.42it/s]

Train loss: 0.14940284192562103


 81%|████████  | 4591/5701 [1:24:51<13:02,  1.42it/s]

Train loss: 0.14094236493110657


 81%|████████  | 4601/5701 [1:25:03<13:03,  1.40it/s]

Train loss: 0.13651393353939056


 81%|████████  | 4611/5701 [1:25:14<12:36,  1.44it/s]

Train loss: 0.14172515273094177


 81%|████████  | 4621/5701 [1:25:25<12:46,  1.41it/s]

Train loss: 0.11931611597537994


 81%|████████  | 4631/5701 [1:25:35<12:14,  1.46it/s]

Train loss: 0.13542310893535614


 81%|████████▏ | 4641/5701 [1:25:46<12:34,  1.40it/s]

Train loss: 0.1369689255952835


 82%|████████▏ | 4651/5701 [1:25:57<12:21,  1.42it/s]

Train loss: 0.11092599481344223


 82%|████████▏ | 4661/5701 [1:26:09<12:21,  1.40it/s]

Train loss: 0.16424323618412018


 82%|████████▏ | 4671/5701 [1:26:19<11:46,  1.46it/s]

Train loss: 0.1490364372730255


 82%|████████▏ | 4681/5701 [1:26:31<12:06,  1.40it/s]

Train loss: 0.12883508205413818


 82%|████████▏ | 4691/5701 [1:26:42<12:03,  1.40it/s]

Train loss: 0.10898482799530029


 82%|████████▏ | 4701/5701 [1:26:52<11:26,  1.46it/s]

Train loss: 0.12993848323822021


 83%|████████▎ | 4711/5701 [1:27:03<11:34,  1.43it/s]

Train loss: 0.1562330573797226


 83%|████████▎ | 4721/5701 [1:27:15<11:54,  1.37it/s]

Train loss: 0.16335292160511017


 83%|████████▎ | 4731/5701 [1:27:26<11:16,  1.43it/s]

Train loss: 0.14779765903949738


 83%|████████▎ | 4741/5701 [1:27:37<11:19,  1.41it/s]

Train loss: 0.14912736415863037


 83%|████████▎ | 4751/5701 [1:27:48<11:02,  1.43it/s]

Train loss: 0.12274717539548874


 84%|████████▎ | 4761/5701 [1:28:00<11:16,  1.39it/s]

Train loss: 0.1178632602095604


 84%|████████▎ | 4771/5701 [1:28:11<11:10,  1.39it/s]

Train loss: 0.17336978018283844


 84%|████████▍ | 4781/5701 [1:28:22<10:36,  1.45it/s]

Train loss: 0.12814436852931976


 84%|████████▍ | 4791/5701 [1:28:33<10:43,  1.41it/s]

Train loss: 0.12714552879333496


 84%|████████▍ | 4801/5701 [1:28:43<10:23,  1.44it/s]

Train loss: 0.14124515652656555


 84%|████████▍ | 4811/5701 [1:28:54<10:32,  1.41it/s]

Train loss: 0.1425933688879013


 85%|████████▍ | 4821/5701 [1:29:05<10:21,  1.42it/s]

Train loss: 0.14223052561283112


 85%|████████▍ | 4831/5701 [1:29:17<10:34,  1.37it/s]

Train loss: 0.14351169764995575


 85%|████████▍ | 4841/5701 [1:29:27<09:34,  1.50it/s]

Train loss: 0.13390497863292694


 85%|████████▌ | 4851/5701 [1:29:38<10:17,  1.38it/s]

Train loss: 0.13443689048290253


 85%|████████▌ | 4861/5701 [1:29:50<10:10,  1.38it/s]

Train loss: 0.12866251170635223


 85%|████████▌ | 4871/5701 [1:30:01<09:38,  1.43it/s]

Train loss: 0.11964119970798492


 86%|████████▌ | 4881/5701 [1:30:12<09:44,  1.40it/s]

Train loss: 0.11319147795438766


 86%|████████▌ | 4891/5701 [1:30:24<09:33,  1.41it/s]

Train loss: 0.14220228791236877


 86%|████████▌ | 4901/5701 [1:30:35<09:23,  1.42it/s]

Train loss: 0.1340782791376114


 86%|████████▌ | 4911/5701 [1:30:45<09:05,  1.45it/s]

Train loss: 0.1193385124206543


 86%|████████▋ | 4921/5701 [1:30:56<09:14,  1.41it/s]

Train loss: 0.16159699857234955


 86%|████████▋ | 4931/5701 [1:31:07<09:03,  1.42it/s]

Train loss: 0.12899379432201385


 87%|████████▋ | 4941/5701 [1:31:18<08:41,  1.46it/s]

Train loss: 0.135294571518898


 87%|████████▋ | 4951/5701 [1:31:29<08:50,  1.41it/s]

Train loss: 0.15720835328102112


 87%|████████▋ | 4961/5701 [1:31:41<09:03,  1.36it/s]

Train loss: 0.1649629920721054


 87%|████████▋ | 4971/5701 [1:31:51<08:22,  1.45it/s]

Train loss: 0.14712603390216827


 87%|████████▋ | 4981/5701 [1:32:02<08:27,  1.42it/s]

Train loss: 0.11908116191625595


 88%|████████▊ | 4991/5701 [1:32:13<08:18,  1.43it/s]

Train loss: 0.14624778926372528


 88%|████████▊ | 5001/5701 [1:32:25<08:18,  1.40it/s]

Train loss: 0.13553009927272797


 88%|████████▊ | 5011/5701 [1:32:36<08:13,  1.40it/s]

Train loss: 0.13189615309238434


 88%|████████▊ | 5021/5701 [1:32:47<07:49,  1.45it/s]

Train loss: 0.12612783908843994


 88%|████████▊ | 5031/5701 [1:32:58<07:55,  1.41it/s]

Train loss: 0.11313197761774063


 88%|████████▊ | 5041/5701 [1:33:09<07:42,  1.43it/s]

Train loss: 0.15308795869350433


 89%|████████▊ | 5051/5701 [1:33:20<07:39,  1.42it/s]

Train loss: 0.15663814544677734


 89%|████████▉ | 5061/5701 [1:33:31<07:31,  1.42it/s]

Train loss: 0.15124113857746124


 89%|████████▉ | 5071/5701 [1:33:42<07:19,  1.43it/s]

Train loss: 0.11965830624103546


 89%|████████▉ | 5081/5701 [1:33:53<07:11,  1.44it/s]

Train loss: 0.13109630346298218


 89%|████████▉ | 5091/5701 [1:34:04<07:09,  1.42it/s]

Train loss: 0.1395929902791977


 89%|████████▉ | 5101/5701 [1:34:14<06:58,  1.43it/s]

Train loss: 0.13847030699253082


 90%|████████▉ | 5111/5701 [1:34:25<06:54,  1.42it/s]

Train loss: 0.13343529403209686


 90%|████████▉ | 5121/5701 [1:34:37<06:52,  1.40it/s]

Train loss: 0.127621129155159


 90%|█████████ | 5131/5701 [1:34:48<06:43,  1.41it/s]

Train loss: 0.14266899228096008


 90%|█████████ | 5141/5701 [1:34:59<06:37,  1.41it/s]

Train loss: 0.14722910523414612


 90%|█████████ | 5151/5701 [1:35:10<06:26,  1.42it/s]

Train loss: 0.13882416486740112


 91%|█████████ | 5161/5701 [1:35:21<06:18,  1.43it/s]

Train loss: 0.13108594715595245


 91%|█████████ | 5171/5701 [1:35:31<05:57,  1.48it/s]

Train loss: 0.14428016543388367


 91%|█████████ | 5181/5701 [1:35:43<06:13,  1.39it/s]

Train loss: 0.1309618204832077


 91%|█████████ | 5191/5701 [1:35:54<05:58,  1.42it/s]

Train loss: 0.13242009282112122


 91%|█████████ | 5201/5701 [1:36:05<05:56,  1.40it/s]

Train loss: 0.12515465915203094


 91%|█████████▏| 5211/5701 [1:36:16<05:41,  1.43it/s]

Train loss: 0.13782863318920135


 92%|█████████▏| 5221/5701 [1:36:27<05:34,  1.43it/s]

Train loss: 0.1510661244392395


 92%|█████████▏| 5231/5701 [1:36:38<05:29,  1.43it/s]

Train loss: 0.14752869307994843


 92%|█████████▏| 5241/5701 [1:36:48<05:15,  1.46it/s]

Train loss: 0.0919191762804985


 92%|█████████▏| 5251/5701 [1:36:59<05:19,  1.41it/s]

Train loss: 0.12008955329656601


 92%|█████████▏| 5261/5701 [1:37:10<05:05,  1.44it/s]

Train loss: 0.1411305069923401


 92%|█████████▏| 5271/5701 [1:37:20<04:53,  1.46it/s]

Train loss: 0.12576107680797577


 93%|█████████▎| 5281/5701 [1:37:32<05:00,  1.40it/s]

Train loss: 0.12492825835943222


 93%|█████████▎| 5291/5701 [1:37:42<04:36,  1.48it/s]

Train loss: 0.15213017165660858


 93%|█████████▎| 5301/5701 [1:37:52<04:33,  1.46it/s]

Train loss: 0.10583970695734024


 93%|█████████▎| 5311/5701 [1:38:02<04:28,  1.45it/s]

Train loss: 0.12729571759700775


 93%|█████████▎| 5321/5701 [1:38:13<04:25,  1.43it/s]

Train loss: 0.13594220578670502


 94%|█████████▎| 5331/5701 [1:38:24<04:23,  1.41it/s]

Train loss: 0.1473149061203003


 94%|█████████▎| 5341/5701 [1:38:35<04:07,  1.45it/s]

Train loss: 0.13801956176757812


 94%|█████████▍| 5351/5701 [1:38:45<04:01,  1.45it/s]

Train loss: 0.1514981985092163


 94%|█████████▍| 5361/5701 [1:38:56<04:00,  1.42it/s]

Train loss: 0.1344767063856125


 94%|█████████▍| 5371/5701 [1:39:07<03:54,  1.41it/s]

Train loss: 0.11812650412321091


 94%|█████████▍| 5381/5701 [1:39:19<03:53,  1.37it/s]

Train loss: 0.13381898403167725


 95%|█████████▍| 5391/5701 [1:39:30<03:35,  1.44it/s]

Train loss: 0.143058642745018


 95%|█████████▍| 5401/5701 [1:39:41<03:31,  1.42it/s]

Train loss: 0.12284474819898605


 95%|█████████▍| 5411/5701 [1:39:52<03:23,  1.43it/s]

Train loss: 0.1377868801355362


 95%|█████████▌| 5421/5701 [1:40:02<03:14,  1.44it/s]

Train loss: 0.15104256570339203


 95%|█████████▌| 5431/5701 [1:40:13<03:06,  1.45it/s]

Train loss: 0.13246853649616241


 95%|█████████▌| 5441/5701 [1:40:23<02:58,  1.46it/s]

Train loss: 0.1486053615808487


 96%|█████████▌| 5451/5701 [1:40:34<02:54,  1.43it/s]

Train loss: 0.11251217126846313


 96%|█████████▌| 5461/5701 [1:40:45<02:49,  1.42it/s]

Train loss: 0.1365036815404892


 96%|█████████▌| 5471/5701 [1:40:55<02:39,  1.44it/s]

Train loss: 0.13980291783809662


 96%|█████████▌| 5481/5701 [1:41:06<02:31,  1.45it/s]

Train loss: 0.1661597490310669


 96%|█████████▋| 5491/5701 [1:41:17<02:27,  1.43it/s]

Train loss: 0.12145332247018814


 96%|█████████▋| 5501/5701 [1:41:27<02:20,  1.42it/s]

Train loss: 0.14755427837371826


 97%|█████████▋| 5511/5701 [1:41:38<02:15,  1.40it/s]

Train loss: 0.12075700610876083


 97%|█████████▋| 5521/5701 [1:41:49<02:06,  1.42it/s]

Train loss: 0.17484518885612488


 97%|█████████▋| 5531/5701 [1:42:00<02:01,  1.40it/s]

Train loss: 0.14372067153453827


 97%|█████████▋| 5541/5701 [1:42:11<01:57,  1.36it/s]

Train loss: 0.13947106897830963


 97%|█████████▋| 5551/5701 [1:42:22<01:52,  1.34it/s]

Train loss: 0.13153426349163055


 98%|█████████▊| 5561/5701 [1:42:33<01:49,  1.28it/s]

Train loss: 0.1189374104142189


 98%|█████████▊| 5571/5701 [1:42:44<01:38,  1.33it/s]

Train loss: 0.124912790954113


 98%|█████████▊| 5581/5701 [1:42:55<01:34,  1.27it/s]

Train loss: 0.13236041367053986


 98%|█████████▊| 5591/5701 [1:43:05<01:22,  1.33it/s]

Train loss: 0.12603192031383514


 98%|█████████▊| 5601/5701 [1:43:17<01:21,  1.22it/s]

Train loss: 0.14154429733753204


 98%|█████████▊| 5611/5701 [1:43:28<01:12,  1.24it/s]

Train loss: 0.1324857920408249


 99%|█████████▊| 5621/5701 [1:43:38<01:04,  1.25it/s]

Train loss: 0.10799647867679596


 99%|█████████▉| 5631/5701 [1:43:49<00:56,  1.24it/s]

Train loss: 0.15238474309444427


 99%|█████████▉| 5641/5701 [1:44:00<00:50,  1.19it/s]

Train loss: 0.12482905387878418


 99%|█████████▉| 5651/5701 [1:44:11<00:41,  1.20it/s]

Train loss: 0.12446656078100204


 99%|█████████▉| 5661/5701 [1:44:22<00:34,  1.16it/s]

Train loss: 0.12981466948986053


 99%|█████████▉| 5671/5701 [1:44:33<00:25,  1.18it/s]

Train loss: 0.13110677897930145


100%|█████████▉| 5681/5701 [1:44:44<00:16,  1.19it/s]

Train loss: 0.152993842959404


100%|█████████▉| 5691/5701 [1:44:54<00:07,  1.25it/s]

Train loss: 0.15800325572490692


100%|██████████| 5701/5701 [1:45:00<00:00,  1.11s/it]


Train loss: 0.13672563433647156


Validation: 100%|██████████| 41/41 [00:52<00:00,  1.29s/it]


Validation loss = [tensor(0.2578, device='cuda:0', grad_fn=<DivBackward0>)]
Epoch 3


  0%|          | 1/5701 [00:12<19:52:33, 12.55s/it]

Train loss: 0.018901431933045387


  0%|          | 11/5701 [00:23<3:41:28,  2.34s/it]

Train loss: 0.15171539783477783


  0%|          | 21/5701 [00:35<3:45:05,  2.38s/it]

Train loss: 0.13846318423748016


  1%|          | 31/5701 [00:46<3:29:08,  2.21s/it]

Train loss: 0.14027968049049377


  1%|          | 41/5701 [00:57<3:29:14,  2.22s/it]

Train loss: 0.12913966178894043


  1%|          | 51/5701 [01:07<3:19:27,  2.12s/it]

Train loss: 0.14114925265312195


  1%|          | 61/5701 [01:18<3:23:10,  2.16s/it]

Train loss: 0.13233862817287445


  1%|          | 71/5701 [01:29<3:13:11,  2.06s/it]

Train loss: 0.12934985756874084


  1%|▏         | 81/5701 [01:40<3:11:42,  2.05s/it]

Train loss: 0.12185650318861008


  2%|▏         | 91/5701 [01:50<2:55:35,  1.88s/it]

Train loss: 0.15528427064418793


  2%|▏         | 101/5701 [02:01<2:53:59,  1.86s/it]

Train loss: 0.12107044458389282


  2%|▏         | 111/5701 [02:12<3:06:25,  2.00s/it]

Train loss: 0.1276191920042038


  2%|▏         | 121/5701 [02:24<3:19:03,  2.14s/it]

Train loss: 0.14126180112361908


  2%|▏         | 131/5701 [02:34<2:53:43,  1.87s/it]

Train loss: 0.14313681423664093


  2%|▏         | 141/5701 [02:44<2:18:42,  1.50s/it]

Train loss: 0.16105790436267853


  3%|▎         | 151/5701 [02:55<2:11:49,  1.43s/it]

Train loss: 0.12891940772533417


  3%|▎         | 161/5701 [03:06<2:01:44,  1.32s/it]

Train loss: 0.1334274858236313


  3%|▎         | 171/5701 [03:17<2:18:30,  1.50s/it]

Train loss: 0.1342102438211441


  3%|▎         | 181/5701 [03:28<2:14:20,  1.46s/it]

Train loss: 0.13210682570934296


  3%|▎         | 191/5701 [03:39<1:58:07,  1.29s/it]

Train loss: 0.13952518999576569


  4%|▎         | 201/5701 [03:50<2:05:55,  1.37s/it]

Train loss: 0.13459840416908264


  4%|▎         | 211/5701 [04:00<1:44:13,  1.14s/it]

Train loss: 0.14193585515022278


  4%|▍         | 221/5701 [04:11<1:27:04,  1.05it/s]

Train loss: 0.16785813868045807


  4%|▍         | 231/5701 [04:21<59:43,  1.53it/s]  

Train loss: 0.13213372230529785


  4%|▍         | 241/5701 [04:32<1:00:54,  1.49it/s]

Train loss: 0.14059582352638245


  4%|▍         | 251/5701 [04:43<1:00:49,  1.49it/s]

Train loss: 0.10717198997735977


  5%|▍         | 261/5701 [04:54<59:40,  1.52it/s]  

Train loss: 0.12291427701711655


  5%|▍         | 271/5701 [05:04<59:17,  1.53it/s]  

Train loss: 0.13469263911247253


  5%|▍         | 281/5701 [05:16<59:54,  1.51it/s]  

Train loss: 0.11822179704904556


  5%|▌         | 291/5701 [05:25<58:47,  1.53it/s]  

Train loss: 0.12482811510562897


  5%|▌         | 301/5701 [05:36<59:15,  1.52it/s]  

Train loss: 0.14860522747039795


  5%|▌         | 311/5701 [05:47<59:37,  1.51it/s]  

Train loss: 0.13769394159317017


  6%|▌         | 321/5701 [05:58<1:00:26,  1.48it/s]

Train loss: 0.1374615877866745


  6%|▌         | 331/5701 [06:09<59:26,  1.51it/s]  

Train loss: 0.13334278762340546


  6%|▌         | 341/5701 [06:20<58:33,  1.53it/s]  

Train loss: 0.1186242625117302


  6%|▌         | 351/5701 [06:31<59:13,  1.51it/s]  

Train loss: 0.13787627220153809


  6%|▋         | 361/5701 [06:42<59:13,  1.50it/s]  

Train loss: 0.11809351295232773


  7%|▋         | 371/5701 [06:52<58:32,  1.52it/s]  

Train loss: 0.10844387114048004


  7%|▋         | 381/5701 [07:03<59:31,  1.49it/s]  

Train loss: 0.15558183193206787


  7%|▋         | 391/5701 [07:14<58:41,  1.51it/s]  

Train loss: 0.1526707410812378


  7%|▋         | 401/5701 [07:25<59:07,  1.49it/s]  

Train loss: 0.15068228542804718


  7%|▋         | 411/5701 [07:37<1:00:08,  1.47it/s]

Train loss: 0.15206919610500336


  7%|▋         | 421/5701 [07:48<58:04,  1.52it/s]  

Train loss: 0.13241206109523773


  8%|▊         | 431/5701 [07:59<58:29,  1.50it/s]  

Train loss: 0.14330172538757324


  8%|▊         | 441/5701 [08:10<1:03:24,  1.38it/s]

Train loss: 0.13468535244464874


  8%|▊         | 451/5701 [08:21<1:00:12,  1.45it/s]

Train loss: 0.12447606772184372


  8%|▊         | 461/5701 [08:32<1:01:34,  1.42it/s]

Train loss: 0.13439549505710602


  8%|▊         | 471/5701 [08:42<56:43,  1.54it/s]  

Train loss: 0.14465133845806122


  8%|▊         | 481/5701 [08:53<57:59,  1.50it/s]  

Train loss: 0.12329983711242676


  9%|▊         | 491/5701 [09:05<58:57,  1.47it/s]  

Train loss: 0.12663257122039795


  9%|▉         | 501/5701 [09:16<59:27,  1.46it/s]  

Train loss: 0.12028529495000839


  9%|▉         | 511/5701 [09:28<1:03:25,  1.36it/s]

Train loss: 0.17376767098903656


  9%|▉         | 521/5701 [09:39<1:05:11,  1.32it/s]

Train loss: 0.12130894511938095


  9%|▉         | 531/5701 [09:51<1:05:01,  1.33it/s]

Train loss: 0.14234328269958496


  9%|▉         | 541/5701 [10:01<1:04:39,  1.33it/s]

Train loss: 0.13919851183891296


 10%|▉         | 551/5701 [10:12<1:03:02,  1.36it/s]

Train loss: 0.13113166391849518


 10%|▉         | 561/5701 [10:23<1:01:50,  1.39it/s]

Train loss: 0.1607418805360794


 10%|█         | 571/5701 [10:34<1:03:26,  1.35it/s]

Train loss: 0.13888095319271088


 10%|█         | 581/5701 [10:45<1:05:51,  1.30it/s]

Train loss: 0.1400863081216812


 10%|█         | 591/5701 [10:56<1:03:32,  1.34it/s]

Train loss: 0.13306869566440582


 11%|█         | 601/5701 [11:07<1:03:22,  1.34it/s]

Train loss: 0.12905506789684296


 11%|█         | 611/5701 [11:18<1:07:30,  1.26it/s]

Train loss: 0.14627382159233093


 11%|█         | 621/5701 [11:29<1:09:27,  1.22it/s]

Train loss: 0.16203807294368744


 11%|█         | 631/5701 [11:41<1:09:35,  1.21it/s]

Train loss: 0.13276295363903046


 11%|█         | 641/5701 [11:52<1:11:31,  1.18it/s]

Train loss: 0.10211371630430222


 11%|█▏        | 651/5701 [12:03<1:08:47,  1.22it/s]

Train loss: 0.1304113268852234


 12%|█▏        | 661/5701 [12:15<1:12:25,  1.16it/s]

Train loss: 0.15233831107616425


 12%|█▏        | 671/5701 [12:25<1:09:56,  1.20it/s]

Train loss: 0.1228807345032692


 12%|█▏        | 681/5701 [12:36<1:08:29,  1.22it/s]

Train loss: 0.13883757591247559


 12%|█▏        | 691/5701 [12:48<1:12:05,  1.16it/s]

Train loss: 0.14139917492866516


 12%|█▏        | 701/5701 [12:59<1:09:47,  1.19it/s]

Train loss: 0.14225566387176514


 12%|█▏        | 711/5701 [13:10<1:08:33,  1.21it/s]

Train loss: 0.13969330489635468


 13%|█▎        | 721/5701 [13:21<1:11:08,  1.17it/s]

Train loss: 0.1426880806684494


 13%|█▎        | 731/5701 [13:32<1:08:14,  1.21it/s]

Train loss: 0.1350250095129013


 13%|█▎        | 741/5701 [13:43<1:10:07,  1.18it/s]

Train loss: 0.14537550508975983


 13%|█▎        | 751/5701 [13:54<1:09:56,  1.18it/s]

Train loss: 0.14926661550998688


 13%|█▎        | 761/5701 [14:05<1:09:10,  1.19it/s]

Train loss: 0.1431325227022171


 14%|█▎        | 771/5701 [14:16<1:10:18,  1.17it/s]

Train loss: 0.14376594126224518


 14%|█▎        | 781/5701 [14:28<1:11:53,  1.14it/s]

Train loss: 0.14892037212848663


 14%|█▍        | 791/5701 [14:38<1:11:14,  1.15it/s]

Train loss: 0.12841220200061798


 14%|█▍        | 801/5701 [14:51<1:16:49,  1.06it/s]

Train loss: 0.14681558310985565


 14%|█▍        | 811/5701 [15:02<1:16:49,  1.06it/s]

Train loss: 0.16640041768550873


 14%|█▍        | 821/5701 [15:14<1:20:43,  1.01it/s]

Train loss: 0.1274501234292984


 15%|█▍        | 831/5701 [15:25<1:17:20,  1.05it/s]

Train loss: 0.13355039060115814


 15%|█▍        | 841/5701 [15:36<1:20:58,  1.00it/s]

Train loss: 0.14824175834655762


 15%|█▍        | 851/5701 [15:48<1:19:05,  1.02it/s]

Train loss: 0.13348113000392914


 15%|█▌        | 861/5701 [16:01<1:33:47,  1.16s/it]

Train loss: 0.13949428498744965


 15%|█▌        | 871/5701 [16:13<1:21:39,  1.01s/it]

Train loss: 0.16569924354553223


 15%|█▌        | 881/5701 [16:28<1:40:06,  1.25s/it]

Train loss: 0.1284940540790558


 16%|█▌        | 891/5701 [16:43<1:43:13,  1.29s/it]

Train loss: 0.13047145307064056


 16%|█▌        | 901/5701 [16:59<1:43:00,  1.29s/it]

Train loss: 0.16345030069351196


 16%|█▌        | 911/5701 [17:13<1:37:14,  1.22s/it]

Train loss: 0.10396873950958252


 16%|█▌        | 921/5701 [17:27<1:35:50,  1.20s/it]

Train loss: 0.14989806711673737


 16%|█▋        | 931/5701 [17:42<1:35:37,  1.20s/it]

Train loss: 0.11795302480459213


 17%|█▋        | 941/5701 [17:57<1:43:08,  1.30s/it]

Train loss: 0.13326320052146912


 17%|█▋        | 951/5701 [18:11<1:31:41,  1.16s/it]

Train loss: 0.13198693096637726


 17%|█▋        | 961/5701 [18:22<1:17:29,  1.02it/s]

Train loss: 0.10955236107110977


 17%|█▋        | 971/5701 [18:33<1:14:03,  1.06it/s]

Train loss: 0.12040793150663376


 17%|█▋        | 981/5701 [18:45<1:20:59,  1.03s/it]

Train loss: 0.12979364395141602


 17%|█▋        | 991/5701 [18:56<1:14:19,  1.06it/s]

Train loss: 0.1416931003332138


 18%|█▊        | 1001/5701 [19:07<1:17:16,  1.01it/s]

Train loss: 0.1275925487279892


 18%|█▊        | 1011/5701 [19:17<1:08:21,  1.14it/s]

Train loss: 0.1412493735551834


 18%|█▊        | 1021/5701 [19:28<1:13:53,  1.06it/s]

Train loss: 0.12821078300476074


 18%|█▊        | 1031/5701 [19:39<1:16:41,  1.01it/s]

Train loss: 0.1199636235833168


 18%|█▊        | 1041/5701 [19:50<1:14:32,  1.04it/s]

Train loss: 0.14373070001602173


 18%|█▊        | 1051/5701 [20:01<1:14:23,  1.04it/s]

Train loss: 0.10572537034749985


 19%|█▊        | 1061/5701 [20:12<1:13:00,  1.06it/s]

Train loss: 0.12042202800512314


 19%|█▉        | 1071/5701 [20:23<1:14:32,  1.04it/s]

Train loss: 0.14759254455566406


 19%|█▉        | 1081/5701 [20:34<1:14:15,  1.04it/s]

Train loss: 0.11888140439987183


 19%|█▉        | 1091/5701 [20:50<1:36:10,  1.25s/it]

Train loss: 0.17734844982624054


 19%|█▉        | 1101/5701 [21:05<1:34:30,  1.23s/it]

Train loss: 0.11024665087461472


 19%|█▉        | 1111/5701 [21:19<1:30:21,  1.18s/it]

Train loss: 0.1436109095811844


 20%|█▉        | 1121/5701 [21:30<1:12:18,  1.06it/s]

Train loss: 0.12420612573623657


 20%|█▉        | 1131/5701 [21:41<1:10:38,  1.08it/s]

Train loss: 0.10759704560041428


 20%|██        | 1141/5701 [21:52<1:16:17,  1.00s/it]

Train loss: 0.14228931069374084


 20%|██        | 1151/5701 [22:04<1:18:36,  1.04s/it]

Train loss: 0.15069235861301422


 20%|██        | 1161/5701 [22:16<1:15:48,  1.00s/it]

Train loss: 0.1322207897901535


 21%|██        | 1171/5701 [22:27<1:12:43,  1.04it/s]

Train loss: 0.13229656219482422


 21%|██        | 1181/5701 [22:38<1:14:10,  1.02it/s]

Train loss: 0.12365539371967316


 21%|██        | 1191/5701 [22:49<1:10:13,  1.07it/s]

Train loss: 0.15831978619098663


 21%|██        | 1201/5701 [23:00<1:12:05,  1.04it/s]

Train loss: 0.12606045603752136


 21%|██        | 1211/5701 [23:12<1:15:51,  1.01s/it]

Train loss: 0.12440162897109985


 21%|██▏       | 1221/5701 [23:23<1:11:01,  1.05it/s]

Train loss: 0.12308197468519211


 22%|██▏       | 1231/5701 [23:34<1:12:12,  1.03it/s]

Train loss: 0.11273103207349777


 22%|██▏       | 1241/5701 [23:45<1:15:47,  1.02s/it]

Train loss: 0.13243061304092407


 22%|██▏       | 1251/5701 [23:56<1:09:59,  1.06it/s]

Train loss: 0.13364091515541077


 22%|██▏       | 1261/5701 [24:07<1:09:36,  1.06it/s]

Train loss: 0.11754874140024185


 22%|██▏       | 1271/5701 [24:18<1:12:46,  1.01it/s]

Train loss: 0.13442383706569672


 22%|██▏       | 1281/5701 [24:30<1:13:05,  1.01it/s]

Train loss: 0.1233355775475502


 23%|██▎       | 1291/5701 [24:41<1:13:25,  1.00it/s]

Train loss: 0.14734943211078644


 23%|██▎       | 1301/5701 [24:52<1:07:46,  1.08it/s]

Train loss: 0.1380012482404709


 23%|██▎       | 1311/5701 [25:02<1:06:34,  1.10it/s]

Train loss: 0.10151275247335434


 23%|██▎       | 1321/5701 [25:14<1:15:29,  1.03s/it]

Train loss: 0.1168341413140297


 23%|██▎       | 1331/5701 [25:25<1:10:10,  1.04it/s]

Train loss: 0.11944641917943954


 24%|██▎       | 1341/5701 [25:37<1:12:58,  1.00s/it]

Train loss: 0.128589928150177


 24%|██▎       | 1351/5701 [25:48<1:11:42,  1.01it/s]

Train loss: 0.12258674204349518


 24%|██▍       | 1361/5701 [25:59<1:11:33,  1.01it/s]

Train loss: 0.14030896127223969


 24%|██▍       | 1371/5701 [26:09<1:04:30,  1.12it/s]

Train loss: 0.1145838052034378


 24%|██▍       | 1381/5701 [26:20<1:09:21,  1.04it/s]

Train loss: 0.11565295606851578


 24%|██▍       | 1391/5701 [26:32<1:09:38,  1.03it/s]

Train loss: 0.14099101722240448


 25%|██▍       | 1401/5701 [26:43<1:10:28,  1.02it/s]

Train loss: 0.1124161034822464


 25%|██▍       | 1411/5701 [26:54<1:08:05,  1.05it/s]

Train loss: 0.13975059986114502


 25%|██▍       | 1421/5701 [27:05<1:11:05,  1.00it/s]

Train loss: 0.1379629373550415


 25%|██▌       | 1431/5701 [27:16<1:08:19,  1.04it/s]

Train loss: 0.15415558218955994


 25%|██▌       | 1441/5701 [27:27<1:08:14,  1.04it/s]

Train loss: 0.11973942816257477


 25%|██▌       | 1451/5701 [27:39<1:12:12,  1.02s/it]

Train loss: 0.12789641320705414


 26%|██▌       | 1461/5701 [27:49<1:05:23,  1.08it/s]

Train loss: 0.12329945713281631


 26%|██▌       | 1471/5701 [28:00<1:07:42,  1.04it/s]

Train loss: 0.14687608182430267


 26%|██▌       | 1481/5701 [28:11<1:05:56,  1.07it/s]

Train loss: 0.12538710236549377


 26%|██▌       | 1491/5701 [28:24<1:14:50,  1.07s/it]

Train loss: 0.1397252380847931


 26%|██▋       | 1501/5701 [28:34<1:06:42,  1.05it/s]

Train loss: 0.10599620640277863


 27%|██▋       | 1511/5701 [28:45<1:05:47,  1.06it/s]

Train loss: 0.11887993663549423


 27%|██▋       | 1521/5701 [28:56<1:07:39,  1.03it/s]

Train loss: 0.10909616947174072


 27%|██▋       | 1531/5701 [29:07<1:05:33,  1.06it/s]

Train loss: 0.10809560120105743


 27%|██▋       | 1541/5701 [29:19<1:09:20,  1.00s/it]

Train loss: 0.14183400571346283


 27%|██▋       | 1551/5701 [29:29<1:04:04,  1.08it/s]

Train loss: 0.14971236884593964


 27%|██▋       | 1561/5701 [29:40<1:06:37,  1.04it/s]

Train loss: 0.11154311895370483


 28%|██▊       | 1571/5701 [29:52<1:08:55,  1.00s/it]

Train loss: 0.13500533998012543


 28%|██▊       | 1581/5701 [30:03<1:07:02,  1.02it/s]

Train loss: 0.1270327866077423


 28%|██▊       | 1591/5701 [30:14<1:05:10,  1.05it/s]

Train loss: 0.12016545981168747


 28%|██▊       | 1601/5701 [30:24<1:04:41,  1.06it/s]

Train loss: 0.1241186186671257


 28%|██▊       | 1611/5701 [30:35<1:03:31,  1.07it/s]

Train loss: 0.14024734497070312


 28%|██▊       | 1621/5701 [30:46<1:07:25,  1.01it/s]

Train loss: 0.1262217015028


 29%|██▊       | 1631/5701 [30:58<1:06:31,  1.02it/s]

Train loss: 0.14948759973049164


 29%|██▉       | 1641/5701 [31:09<1:04:28,  1.05it/s]

Train loss: 0.12725494801998138


 29%|██▉       | 1651/5701 [31:20<1:04:56,  1.04it/s]

Train loss: 0.105007603764534


 29%|██▉       | 1661/5701 [31:30<1:02:51,  1.07it/s]

Train loss: 0.156651571393013


 29%|██▉       | 1671/5701 [31:41<1:04:06,  1.05it/s]

Train loss: 0.11494211107492447


 29%|██▉       | 1681/5701 [31:52<1:02:23,  1.07it/s]

Train loss: 0.11679009348154068


 30%|██▉       | 1691/5701 [32:03<1:03:38,  1.05it/s]

Train loss: 0.14115311205387115


 30%|██▉       | 1701/5701 [32:14<1:07:07,  1.01s/it]

Train loss: 0.15439634025096893


 30%|███       | 1711/5701 [32:25<1:04:46,  1.03it/s]

Train loss: 0.15077926218509674


 30%|███       | 1721/5701 [32:37<1:07:23,  1.02s/it]

Train loss: 0.150678813457489


 30%|███       | 1731/5701 [32:48<1:04:10,  1.03it/s]

Train loss: 0.09691432118415833


 31%|███       | 1741/5701 [32:59<59:55,  1.10it/s]  

Train loss: 0.13214077055454254


 31%|███       | 1751/5701 [33:09<1:02:18,  1.06it/s]

Train loss: 0.13026736676692963


 31%|███       | 1761/5701 [33:21<1:03:37,  1.03it/s]

Train loss: 0.1304931491613388


 31%|███       | 1771/5701 [33:32<1:05:01,  1.01it/s]

Train loss: 0.12374210357666016


 31%|███       | 1781/5701 [33:43<1:01:23,  1.06it/s]

Train loss: 0.16163752973079681


 31%|███▏      | 1791/5701 [33:53<1:00:18,  1.08it/s]

Train loss: 0.15426695346832275


 32%|███▏      | 1801/5701 [34:04<1:02:15,  1.04it/s]

Train loss: 0.16162653267383575


 32%|███▏      | 1811/5701 [34:15<1:03:18,  1.02it/s]

Train loss: 0.12263542413711548


 32%|███▏      | 1821/5701 [34:26<1:01:39,  1.05it/s]

Train loss: 0.12123548984527588


 32%|███▏      | 1831/5701 [34:37<1:01:21,  1.05it/s]

Train loss: 0.1394733488559723


 32%|███▏      | 1841/5701 [34:49<1:04:49,  1.01s/it]

Train loss: 0.15385082364082336


 32%|███▏      | 1851/5701 [35:00<59:55,  1.07it/s]  

Train loss: 0.10395591706037521


 33%|███▎      | 1861/5701 [35:10<59:26,  1.08it/s]  

Train loss: 0.12923592329025269


 33%|███▎      | 1871/5701 [35:22<1:04:34,  1.01s/it]

Train loss: 0.14278553426265717


 33%|███▎      | 1881/5701 [35:33<1:00:00,  1.06it/s]

Train loss: 0.13645091652870178


 33%|███▎      | 1891/5701 [35:44<1:00:53,  1.04it/s]

Train loss: 0.1248321458697319


 33%|███▎      | 1901/5701 [35:55<1:01:23,  1.03it/s]

Train loss: 0.1541178673505783


 34%|███▎      | 1911/5701 [36:06<1:01:00,  1.04it/s]

Train loss: 0.1377122849225998


 34%|███▎      | 1921/5701 [36:18<1:03:22,  1.01s/it]

Train loss: 0.13888321816921234


 34%|███▍      | 1931/5701 [36:28<56:10,  1.12it/s]  

Train loss: 0.13522978127002716


 34%|███▍      | 1941/5701 [36:39<1:03:16,  1.01s/it]

Train loss: 0.11263442039489746


 34%|███▍      | 1951/5701 [36:51<1:02:34,  1.00s/it]

Train loss: 0.11117706447839737


 34%|███▍      | 1961/5701 [37:02<1:01:01,  1.02it/s]

Train loss: 0.12146792560815811


 35%|███▍      | 1971/5701 [37:13<58:46,  1.06it/s]  

Train loss: 0.13817283511161804


 35%|███▍      | 1981/5701 [37:24<1:00:42,  1.02it/s]

Train loss: 0.1535763293504715


 35%|███▍      | 1991/5701 [37:35<59:38,  1.04it/s]  

Train loss: 0.13093926012516022


 35%|███▌      | 2001/5701 [37:47<1:00:30,  1.02it/s]

Train loss: 0.12594859302043915


 35%|███▌      | 2011/5701 [37:58<59:07,  1.04it/s]  

Train loss: 0.12343692034482956


 35%|███▌      | 2021/5701 [38:09<1:01:11,  1.00it/s]

Train loss: 0.14798708260059357


 36%|███▌      | 2031/5701 [38:19<54:32,  1.12it/s]  

Train loss: 0.14757131040096283


 36%|███▌      | 2041/5701 [38:30<57:48,  1.06it/s]  

Train loss: 0.12924990057945251


 36%|███▌      | 2051/5701 [38:41<59:11,  1.03it/s]  

Train loss: 0.13832472264766693


 36%|███▌      | 2061/5701 [38:53<1:00:52,  1.00s/it]

Train loss: 0.14012068510055542


 36%|███▋      | 2071/5701 [39:04<58:16,  1.04it/s]  

Train loss: 0.13273130357265472


 37%|███▋      | 2081/5701 [39:16<1:00:35,  1.00s/it]

Train loss: 0.11468999832868576


 37%|███▋      | 2091/5701 [39:25<52:52,  1.14it/s]  

Train loss: 0.12641797959804535


 37%|███▋      | 2101/5701 [39:38<1:02:11,  1.04s/it]

Train loss: 0.13361090421676636


 37%|███▋      | 2111/5701 [39:48<53:23,  1.12it/s]  

Train loss: 0.11721505969762802


 37%|███▋      | 2121/5701 [40:00<1:01:04,  1.02s/it]

Train loss: 0.1612715721130371


 37%|███▋      | 2131/5701 [40:10<55:41,  1.07it/s]  

Train loss: 0.12610210478305817


 38%|███▊      | 2141/5701 [40:21<57:06,  1.04it/s]  

Train loss: 0.13536247611045837


 38%|███▊      | 2151/5701 [40:32<55:07,  1.07it/s]  

Train loss: 0.1386515200138092


 38%|███▊      | 2161/5701 [40:43<56:11,  1.05it/s]  

Train loss: 0.11888016760349274


 38%|███▊      | 2171/5701 [40:53<53:39,  1.10it/s]  

Train loss: 0.14042788743972778


 38%|███▊      | 2181/5701 [41:05<58:28,  1.00it/s]  

Train loss: 0.1493770033121109


 38%|███▊      | 2191/5701 [41:15<55:43,  1.05it/s]  

Train loss: 0.13871121406555176


 39%|███▊      | 2201/5701 [41:27<58:25,  1.00s/it]  

Train loss: 0.10199911892414093


 39%|███▉      | 2211/5701 [41:38<57:13,  1.02it/s]  

Train loss: 0.13315610587596893


 39%|███▉      | 2221/5701 [41:49<55:13,  1.05it/s]  

Train loss: 0.13724783062934875


 39%|███▉      | 2231/5701 [42:00<54:31,  1.06it/s]  

Train loss: 0.14652852714061737


 39%|███▉      | 2241/5701 [42:11<53:36,  1.08it/s]  

Train loss: 0.11334746330976486


 39%|███▉      | 2251/5701 [42:22<56:01,  1.03it/s]  

Train loss: 0.1387968361377716


 40%|███▉      | 2261/5701 [42:33<54:20,  1.05it/s]  

Train loss: 0.13551615178585052


 40%|███▉      | 2271/5701 [42:44<56:20,  1.01it/s]  

Train loss: 0.12109088152647018


 40%|████      | 2281/5701 [42:55<54:02,  1.05it/s]  

Train loss: 0.09933582693338394


 40%|████      | 2291/5701 [43:06<54:56,  1.03it/s]  

Train loss: 0.1216387078166008


 40%|████      | 2301/5701 [43:17<52:04,  1.09it/s]  

Train loss: 0.1392745077610016


 41%|████      | 2311/5701 [43:28<54:30,  1.04it/s]  

Train loss: 0.1559157818555832


 41%|████      | 2321/5701 [43:39<56:36,  1.00s/it]  

Train loss: 0.10816668719053268


 41%|████      | 2331/5701 [43:50<52:30,  1.07it/s]  

Train loss: 0.14269159734249115


 41%|████      | 2341/5701 [44:01<54:40,  1.02it/s]  

Train loss: 0.1416022628545761


 41%|████      | 2351/5701 [44:12<51:55,  1.08it/s]  

Train loss: 0.14758504927158356


 41%|████▏     | 2361/5701 [44:23<55:38,  1.00it/s]  

Train loss: 0.12193930149078369


 42%|████▏     | 2371/5701 [44:33<49:30,  1.12it/s]  

Train loss: 0.15042927861213684


 42%|████▏     | 2381/5701 [44:45<54:45,  1.01it/s]  

Train loss: 0.14237962663173676


 42%|████▏     | 2391/5701 [44:56<54:34,  1.01it/s]  

Train loss: 0.13106822967529297


 42%|████▏     | 2401/5701 [45:07<52:45,  1.04it/s]  

Train loss: 0.15165627002716064


 42%|████▏     | 2411/5701 [45:18<52:16,  1.05it/s]  

Train loss: 0.1257832795381546


 42%|████▏     | 2421/5701 [45:29<53:34,  1.02it/s]  

Train loss: 0.11230891942977905


 43%|████▎     | 2431/5701 [45:40<52:04,  1.05it/s]  

Train loss: 0.11074798554182053


 43%|████▎     | 2441/5701 [45:52<52:47,  1.03it/s]  

Train loss: 0.12667423486709595


 43%|████▎     | 2451/5701 [46:02<50:14,  1.08it/s]  

Train loss: 0.14691098034381866


 43%|████▎     | 2461/5701 [46:13<51:41,  1.04it/s]  

Train loss: 0.10570396482944489


 43%|████▎     | 2471/5701 [46:24<50:40,  1.06it/s]  

Train loss: 0.1294669359922409


 44%|████▎     | 2481/5701 [46:35<52:07,  1.03it/s]  

Train loss: 0.12028119713068008


 44%|████▎     | 2491/5701 [46:46<50:58,  1.05it/s]  

Train loss: 0.12665465474128723


 44%|████▍     | 2501/5701 [46:56<49:17,  1.08it/s]  

Train loss: 0.13738927245140076


 44%|████▍     | 2511/5701 [47:08<51:44,  1.03it/s]  

Train loss: 0.13810111582279205


 44%|████▍     | 2521/5701 [47:19<51:11,  1.04it/s]  

Train loss: 0.15839341282844543


 44%|████▍     | 2531/5701 [47:30<53:11,  1.01s/it]  

Train loss: 0.12781791388988495


 45%|████▍     | 2541/5701 [47:40<46:22,  1.14it/s]  

Train loss: 0.12784987688064575


 45%|████▍     | 2551/5701 [47:52<52:18,  1.00it/s]  

Train loss: 0.12901948392391205


 45%|████▍     | 2561/5701 [48:04<55:02,  1.05s/it]  

Train loss: 0.15148355066776276


 45%|████▌     | 2571/5701 [48:14<47:19,  1.10it/s]  

Train loss: 0.13308990001678467


 45%|████▌     | 2581/5701 [48:26<51:12,  1.02it/s]  

Train loss: 0.14260868728160858


 45%|████▌     | 2591/5701 [48:37<49:43,  1.04it/s]  

Train loss: 0.11352747678756714


 46%|████▌     | 2601/5701 [48:48<48:59,  1.05it/s]  

Train loss: 0.11258529871702194


 46%|████▌     | 2611/5701 [48:59<49:24,  1.04it/s]  

Train loss: 0.1504916548728943


 46%|████▌     | 2621/5701 [49:10<49:54,  1.03it/s]  

Train loss: 0.11842294037342072


 46%|████▌     | 2631/5701 [49:21<50:52,  1.01it/s]  

Train loss: 0.11637888103723526


 46%|████▋     | 2641/5701 [49:32<48:15,  1.06it/s]  

Train loss: 0.13877753913402557


 47%|████▋     | 2651/5701 [49:43<49:25,  1.03it/s]  

Train loss: 0.13778139650821686


 47%|████▋     | 2661/5701 [49:54<46:50,  1.08it/s]  

Train loss: 0.11381397396326065


 47%|████▋     | 2671/5701 [50:05<47:26,  1.06it/s]  

Train loss: 0.16072523593902588


 47%|████▋     | 2681/5701 [50:15<46:59,  1.07it/s]  

Train loss: 0.1556868851184845


 47%|████▋     | 2691/5701 [50:27<49:25,  1.02it/s]  

Train loss: 0.12167470902204514


 47%|████▋     | 2701/5701 [50:38<50:21,  1.01s/it]  

Train loss: 0.1451077163219452


 48%|████▊     | 2711/5701 [50:49<46:18,  1.08it/s]  

Train loss: 0.10839467495679855


 48%|████▊     | 2721/5701 [51:00<48:00,  1.03it/s]  

Train loss: 0.13213060796260834


 48%|████▊     | 2731/5701 [51:11<48:47,  1.01it/s]  

Train loss: 0.15475602447986603


 48%|████▊     | 2741/5701 [51:22<47:45,  1.03it/s]  

Train loss: 0.13592566549777985


 48%|████▊     | 2751/5701 [51:34<48:05,  1.02it/s]  

Train loss: 0.13740232586860657


 48%|████▊     | 2761/5701 [51:45<49:17,  1.01s/it]  

Train loss: 0.12273502349853516


 49%|████▊     | 2771/5701 [51:57<50:30,  1.03s/it]  

Train loss: 0.11867332458496094


 49%|████▉     | 2781/5701 [52:07<41:54,  1.16it/s]  

Train loss: 0.1271791011095047


 49%|████▉     | 2791/5701 [52:19<49:17,  1.02s/it]  

Train loss: 0.12978065013885498


 49%|████▉     | 2801/5701 [52:29<45:13,  1.07it/s]  

Train loss: 0.11518188565969467


 49%|████▉     | 2811/5701 [52:40<46:08,  1.04it/s]  

Train loss: 0.12191837280988693


 49%|████▉     | 2821/5701 [52:52<46:46,  1.03it/s]  

Train loss: 0.1648402363061905


 50%|████▉     | 2831/5701 [53:02<44:59,  1.06it/s]  

Train loss: 0.12127570062875748


 50%|████▉     | 2841/5701 [53:13<44:09,  1.08it/s]  

Train loss: 0.17910946905612946


 50%|█████     | 2851/5701 [53:25<47:35,  1.00s/it]  

Train loss: 0.1562688797712326


 50%|█████     | 2861/5701 [53:35<44:50,  1.06it/s]  

Train loss: 0.10967164486646652


 50%|█████     | 2871/5701 [53:46<44:53,  1.05it/s]  

Train loss: 0.12064924091100693


 51%|█████     | 2881/5701 [53:57<45:28,  1.03it/s]  

Train loss: 0.13988900184631348


 51%|█████     | 2891/5701 [54:08<43:01,  1.09it/s]  

Train loss: 0.13634832203388214


 51%|█████     | 2901/5701 [54:19<46:31,  1.00it/s]  

Train loss: 0.1455034464597702


 51%|█████     | 2911/5701 [54:30<43:11,  1.08it/s]  

Train loss: 0.11447101831436157


 51%|█████     | 2921/5701 [54:41<45:13,  1.02it/s]  

Train loss: 0.11444021761417389


 51%|█████▏    | 2931/5701 [54:52<42:53,  1.08it/s]  

Train loss: 0.13721057772636414


 52%|█████▏    | 2941/5701 [55:03<44:44,  1.03it/s]  

Train loss: 0.1454882174730301


 52%|█████▏    | 2951/5701 [55:14<42:36,  1.08it/s]  

Train loss: 0.16513724625110626


 52%|█████▏    | 2961/5701 [55:25<46:05,  1.01s/it]  

Train loss: 0.1180666908621788


 52%|█████▏    | 2971/5701 [55:36<43:55,  1.04it/s]  

Train loss: 0.12251927703619003


 52%|█████▏    | 2981/5701 [55:47<43:32,  1.04it/s]  

Train loss: 0.14324133098125458


 52%|█████▏    | 2991/5701 [55:58<41:16,  1.09it/s]  

Train loss: 0.12862509489059448


 53%|█████▎    | 3001/5701 [56:09<44:30,  1.01it/s]  

Train loss: 0.12274359911680222


 53%|█████▎    | 3011/5701 [56:20<42:01,  1.07it/s]  

Train loss: 0.12377816438674927


 53%|█████▎    | 3021/5701 [56:31<41:37,  1.07it/s]  

Train loss: 0.11959719657897949


 53%|█████▎    | 3031/5701 [56:42<43:58,  1.01it/s]  

Train loss: 0.1384647935628891


 53%|█████▎    | 3041/5701 [56:53<43:11,  1.03it/s]  

Train loss: 0.14627717435359955


 54%|█████▎    | 3051/5701 [57:03<39:42,  1.11it/s]  

Train loss: 0.12800033390522003


 54%|█████▎    | 3061/5701 [57:15<43:51,  1.00it/s]  

Train loss: 0.12640392780303955


 54%|█████▍    | 3071/5701 [57:26<41:49,  1.05it/s]  

Train loss: 0.11860322952270508


 54%|█████▍    | 3081/5701 [57:37<43:08,  1.01it/s]  

Train loss: 0.12480755150318146


 54%|█████▍    | 3091/5701 [57:49<43:26,  1.00it/s]  

Train loss: 0.09903830289840698


 54%|█████▍    | 3101/5701 [57:58<35:21,  1.23it/s]  

Train loss: 0.1267416924238205


 55%|█████▍    | 3111/5701 [58:10<44:12,  1.02s/it]  

Train loss: 0.13485321402549744


 55%|█████▍    | 3121/5701 [58:21<41:59,  1.02it/s]  

Train loss: 0.16551953554153442


 55%|█████▍    | 3131/5701 [58:32<40:17,  1.06it/s]  

Train loss: 0.1261560022830963


 55%|█████▌    | 3141/5701 [58:43<41:47,  1.02it/s]  

Train loss: 0.1378321647644043


 55%|█████▌    | 3151/5701 [58:53<37:54,  1.12it/s]  

Train loss: 0.11961424350738525


 55%|█████▌    | 3161/5701 [59:05<42:33,  1.01s/it]  

Train loss: 0.11397595703601837


 56%|█████▌    | 3171/5701 [59:16<40:31,  1.04it/s]  

Train loss: 0.1323445737361908


 56%|█████▌    | 3181/5701 [59:26<39:36,  1.06it/s]  

Train loss: 0.12215681374073029


 56%|█████▌    | 3191/5701 [59:38<41:18,  1.01it/s]  

Train loss: 0.11236841976642609


 56%|█████▌    | 3201/5701 [59:49<39:57,  1.04it/s]  

Train loss: 0.11466548591852188


 56%|█████▋    | 3211/5701 [1:00:00<39:52,  1.04it/s]

Train loss: 0.12255670875310898


 56%|█████▋    | 3221/5701 [1:00:11<39:19,  1.05it/s]  

Train loss: 0.14374135434627533


 57%|█████▋    | 3231/5701 [1:00:23<43:02,  1.05s/it]  

Train loss: 0.1446322798728943


 57%|█████▋    | 3241/5701 [1:00:34<38:25,  1.07it/s]  

Train loss: 0.08183517307043076


 57%|█████▋    | 3251/5701 [1:00:45<39:52,  1.02it/s]  

Train loss: 0.11270930618047714


 57%|█████▋    | 3261/5701 [1:00:56<38:26,  1.06it/s]  

Train loss: 0.14844249188899994


 57%|█████▋    | 3271/5701 [1:01:07<38:12,  1.06it/s]  

Train loss: 0.13455572724342346


 58%|█████▊    | 3281/5701 [1:01:18<40:15,  1.00it/s]  

Train loss: 0.11593040078878403


 58%|█████▊    | 3291/5701 [1:01:29<38:08,  1.05it/s]  

Train loss: 0.10434343665838242


 58%|█████▊    | 3301/5701 [1:01:40<37:29,  1.07it/s]  

Train loss: 0.13225480914115906


 58%|█████▊    | 3311/5701 [1:01:51<39:16,  1.01it/s]  

Train loss: 0.11863762140274048


 58%|█████▊    | 3321/5701 [1:02:02<38:49,  1.02it/s]  

Train loss: 0.12458926439285278


 58%|█████▊    | 3331/5701 [1:02:13<36:51,  1.07it/s]  

Train loss: 0.12807869911193848


 59%|█████▊    | 3341/5701 [1:02:24<38:54,  1.01it/s]  

Train loss: 0.14631398022174835


 59%|█████▉    | 3351/5701 [1:02:36<38:51,  1.01it/s]  

Train loss: 0.11353155225515366


 59%|█████▉    | 3361/5701 [1:02:47<37:57,  1.03it/s]  

Train loss: 0.14172033965587616


 59%|█████▉    | 3371/5701 [1:02:58<38:10,  1.02it/s]  

Train loss: 0.13437174260616302


 59%|█████▉    | 3381/5701 [1:03:09<35:12,  1.10it/s]  

Train loss: 0.14619137346744537


 59%|█████▉    | 3391/5701 [1:03:20<37:11,  1.04it/s]  

Train loss: 0.1175648495554924


 60%|█████▉    | 3401/5701 [1:03:31<35:51,  1.07it/s]  

Train loss: 0.10575107485055923


 60%|█████▉    | 3411/5701 [1:03:42<38:15,  1.00s/it]  

Train loss: 0.1424144208431244


 60%|██████    | 3421/5701 [1:03:53<36:02,  1.05it/s]  

Train loss: 0.09664858877658844


 60%|██████    | 3431/5701 [1:04:04<35:58,  1.05it/s]  

Train loss: 0.13111381232738495


 60%|██████    | 3441/5701 [1:04:15<36:40,  1.03it/s]  

Train loss: 0.09749748557806015


 61%|██████    | 3451/5701 [1:04:26<35:10,  1.07it/s]  

Train loss: 0.10947411507368088


 61%|██████    | 3461/5701 [1:04:37<36:45,  1.02it/s]  

Train loss: 0.1037357822060585


 61%|██████    | 3471/5701 [1:04:48<36:20,  1.02it/s]  

Train loss: 0.12924091517925262


 61%|██████    | 3481/5701 [1:04:59<35:21,  1.05it/s]  

Train loss: 0.13929729163646698


 61%|██████    | 3491/5701 [1:05:10<34:56,  1.05it/s]  

Train loss: 0.120096854865551


 61%|██████▏   | 3501/5701 [1:05:22<36:22,  1.01it/s]  

Train loss: 0.11777794361114502


 62%|██████▏   | 3511/5701 [1:05:32<33:47,  1.08it/s]  

Train loss: 0.1233249232172966


 62%|██████▏   | 3521/5701 [1:05:43<32:59,  1.10it/s]  

Train loss: 0.13919663429260254


 62%|██████▏   | 3531/5701 [1:05:55<37:07,  1.03s/it]  

Train loss: 0.10734542459249496


 62%|██████▏   | 3541/5701 [1:06:04<31:29,  1.14it/s]  

Train loss: 0.13062773644924164


 62%|██████▏   | 3551/5701 [1:06:16<34:55,  1.03it/s]  

Train loss: 0.12667876482009888


 62%|██████▏   | 3561/5701 [1:06:28<37:10,  1.04s/it]  

Train loss: 0.12656420469284058


 63%|██████▎   | 3571/5701 [1:06:39<33:26,  1.06it/s]  

Train loss: 0.13023172318935394


 63%|██████▎   | 3581/5701 [1:06:50<35:14,  1.00it/s]  

Train loss: 0.12495315074920654


 63%|██████▎   | 3591/5701 [1:07:00<31:21,  1.12it/s]  

Train loss: 0.16437703371047974


 63%|██████▎   | 3601/5701 [1:07:11<34:09,  1.02it/s]  

Train loss: 0.14395634829998016


 63%|██████▎   | 3611/5701 [1:07:23<34:34,  1.01it/s]  

Train loss: 0.11451368778944016


 64%|██████▎   | 3621/5701 [1:07:34<34:28,  1.01it/s]  

Train loss: 0.1130930557847023


 64%|██████▎   | 3631/5701 [1:07:45<33:15,  1.04it/s]  

Train loss: 0.11492065340280533


 64%|██████▍   | 3641/5701 [1:07:56<32:00,  1.07it/s]  

Train loss: 0.15589870512485504


 64%|██████▍   | 3651/5701 [1:08:07<33:24,  1.02it/s]  

Train loss: 0.12586970627307892


 64%|██████▍   | 3661/5701 [1:08:19<34:01,  1.00s/it]  

Train loss: 0.13570153713226318


 64%|██████▍   | 3671/5701 [1:08:30<32:27,  1.04it/s]  

Train loss: 0.13093605637550354


 65%|██████▍   | 3681/5701 [1:08:40<31:10,  1.08it/s]  

Train loss: 0.1447734832763672


 65%|██████▍   | 3691/5701 [1:08:52<33:21,  1.00it/s]  

Train loss: 0.10403936356306076


 65%|██████▍   | 3701/5701 [1:09:03<32:05,  1.04it/s]  

Train loss: 0.14742490649223328


 65%|██████▌   | 3711/5701 [1:09:14<32:25,  1.02it/s]  

Train loss: 0.13492034375667572


 65%|██████▌   | 3721/5701 [1:09:26<32:59,  1.00it/s]  

Train loss: 0.13871683180332184


 65%|██████▌   | 3731/5701 [1:09:36<29:56,  1.10it/s]  

Train loss: 0.12392201274633408


 66%|██████▌   | 3741/5701 [1:09:48<32:20,  1.01it/s]  

Train loss: 0.12858641147613525


 66%|██████▌   | 3751/5701 [1:09:59<31:58,  1.02it/s]  

Train loss: 0.12069430202245712


 66%|██████▌   | 3761/5701 [1:10:08<27:09,  1.19it/s]

Train loss: 0.11079876869916916


 66%|██████▌   | 3771/5701 [1:10:20<32:54,  1.02s/it]  

Train loss: 0.1308148205280304


 66%|██████▋   | 3781/5701 [1:10:30<27:52,  1.15it/s]

Train loss: 0.10992264002561569


 66%|██████▋   | 3791/5701 [1:10:41<30:11,  1.05it/s]  

Train loss: 0.1282108724117279


 67%|██████▋   | 3801/5701 [1:10:52<31:20,  1.01it/s]  

Train loss: 0.14368250966072083


 67%|██████▋   | 3811/5701 [1:11:04<32:09,  1.02s/it]  

Train loss: 0.11731921881437302


 67%|██████▋   | 3821/5701 [1:11:16<32:27,  1.04s/it]  

Train loss: 0.1418427675962448


 67%|██████▋   | 3831/5701 [1:11:26<28:22,  1.10it/s]  

Train loss: 0.12323331087827682


 67%|██████▋   | 3841/5701 [1:11:38<30:43,  1.01it/s]  

Train loss: 0.0940876379609108


 68%|██████▊   | 3851/5701 [1:11:49<29:10,  1.06it/s]  

Train loss: 0.11986678093671799


 68%|██████▊   | 3861/5701 [1:11:59<28:19,  1.08it/s]  

Train loss: 0.18200673162937164


 68%|██████▊   | 3871/5701 [1:12:10<29:31,  1.03it/s]  

Train loss: 0.12974940240383148


 68%|██████▊   | 3881/5701 [1:12:22<30:51,  1.02s/it]  

Train loss: 0.1184532567858696


 68%|██████▊   | 3891/5701 [1:12:33<29:23,  1.03it/s]  

Train loss: 0.10599309206008911


 68%|██████▊   | 3901/5701 [1:12:44<27:59,  1.07it/s]  

Train loss: 0.11927562206983566


 69%|██████▊   | 3911/5701 [1:12:56<29:44,  1.00it/s]  

Train loss: 0.13646428287029266


 69%|██████▉   | 3921/5701 [1:13:05<26:02,  1.14it/s]

Train loss: 0.1324782520532608


 69%|██████▉   | 3931/5701 [1:13:17<29:00,  1.02it/s]  

Train loss: 0.15634432435035706


 69%|██████▉   | 3941/5701 [1:13:27<27:11,  1.08it/s]  

Train loss: 0.1433328241109848


 69%|██████▉   | 3951/5701 [1:13:39<28:44,  1.01it/s]  

Train loss: 0.11405707895755768


 69%|██████▉   | 3961/5701 [1:13:50<27:57,  1.04it/s]  

Train loss: 0.09232793003320694


 70%|██████▉   | 3971/5701 [1:14:01<28:27,  1.01it/s]  

Train loss: 0.12607493996620178


 70%|██████▉   | 3981/5701 [1:14:13<28:52,  1.01s/it]  

Train loss: 0.119672991335392


 70%|███████   | 3991/5701 [1:14:24<28:04,  1.02it/s]  

Train loss: 0.10112426429986954


 70%|███████   | 4001/5701 [1:14:34<25:44,  1.10it/s]

Train loss: 0.12846998870372772


 70%|███████   | 4011/5701 [1:14:46<28:21,  1.01s/it]  

Train loss: 0.11247140169143677


 71%|███████   | 4021/5701 [1:14:57<26:22,  1.06it/s]  

Train loss: 0.10597367584705353


 71%|███████   | 4031/5701 [1:15:08<27:20,  1.02it/s]  

Train loss: 0.12782812118530273


 71%|███████   | 4041/5701 [1:15:19<26:35,  1.04it/s]  

Train loss: 0.12137214094400406


 71%|███████   | 4051/5701 [1:15:30<26:25,  1.04it/s]  

Train loss: 0.12530317902565002


 71%|███████   | 4061/5701 [1:15:42<26:49,  1.02it/s]  

Train loss: 0.11237629503011703


 71%|███████▏  | 4071/5701 [1:15:53<25:53,  1.05it/s]  

Train loss: 0.11959777027368546


 72%|███████▏  | 4081/5701 [1:16:03<24:49,  1.09it/s]

Train loss: 0.12324164062738419


 72%|███████▏  | 4091/5701 [1:16:14<26:21,  1.02it/s]  

Train loss: 0.1386546939611435


 72%|███████▏  | 4101/5701 [1:16:25<24:57,  1.07it/s]

Train loss: 0.15297655761241913


 72%|███████▏  | 4111/5701 [1:16:37<27:11,  1.03s/it]  

Train loss: 0.11427479982376099


 72%|███████▏  | 4121/5701 [1:16:47<23:16,  1.13it/s]

Train loss: 0.11614757031202316


 72%|███████▏  | 4131/5701 [1:16:59<26:10,  1.00s/it]  

Train loss: 0.10435602813959122


 73%|███████▎  | 4141/5701 [1:17:09<24:00,  1.08it/s]

Train loss: 0.11114975064992905


 73%|███████▎  | 4151/5701 [1:17:20<24:18,  1.06it/s]

Train loss: 0.13468022644519806


 73%|███████▎  | 4161/5701 [1:17:32<26:21,  1.03s/it]  

Train loss: 0.13943473994731903


 73%|███████▎  | 4171/5701 [1:17:43<25:05,  1.02it/s]

Train loss: 0.14569373428821564


 73%|███████▎  | 4181/5701 [1:17:54<24:12,  1.05it/s]

Train loss: 0.11653973162174225


 74%|███████▎  | 4191/5701 [1:18:05<24:14,  1.04it/s]

Train loss: 0.10531222820281982


 74%|███████▎  | 4201/5701 [1:18:16<23:44,  1.05it/s]

Train loss: 0.1223716139793396


 74%|███████▍  | 4211/5701 [1:18:27<23:48,  1.04it/s]

Train loss: 0.12205920368432999


 74%|███████▍  | 4221/5701 [1:18:38<23:41,  1.04it/s]

Train loss: 0.11741886287927628


 74%|███████▍  | 4231/5701 [1:18:49<23:22,  1.05it/s]

Train loss: 0.12372275441884995


 74%|███████▍  | 4241/5701 [1:19:00<23:10,  1.05it/s]

Train loss: 0.11792504042387009


 75%|███████▍  | 4251/5701 [1:19:11<23:31,  1.03it/s]

Train loss: 0.13252507150173187


 75%|███████▍  | 4261/5701 [1:19:23<23:56,  1.00it/s]

Train loss: 0.11247111856937408


 75%|███████▍  | 4271/5701 [1:19:32<20:35,  1.16it/s]

Train loss: 0.1360062211751938


 75%|███████▌  | 4281/5701 [1:19:43<22:40,  1.04it/s]

Train loss: 0.13227413594722748


 75%|███████▌  | 4291/5701 [1:19:54<21:59,  1.07it/s]

Train loss: 0.12691406905651093


 75%|███████▌  | 4301/5701 [1:20:05<22:09,  1.05it/s]

Train loss: 0.13676831126213074


 76%|███████▌  | 4311/5701 [1:20:16<22:51,  1.01it/s]

Train loss: 0.13042721152305603


 76%|███████▌  | 4321/5701 [1:20:27<22:25,  1.03it/s]

Train loss: 0.11903645843267441


 76%|███████▌  | 4331/5701 [1:20:39<22:17,  1.02it/s]

Train loss: 0.13770842552185059


 76%|███████▌  | 4341/5701 [1:20:50<22:28,  1.01it/s]

Train loss: 0.11206045001745224


 76%|███████▋  | 4351/5701 [1:21:01<21:26,  1.05it/s]

Train loss: 0.12558461725711823


 76%|███████▋  | 4361/5701 [1:21:13<22:52,  1.02s/it]

Train loss: 0.14390569925308228


 77%|███████▋  | 4371/5701 [1:21:24<21:34,  1.03it/s]

Train loss: 0.13445349037647247


 77%|███████▋  | 4381/5701 [1:21:34<19:57,  1.10it/s]

Train loss: 0.12134444713592529


 77%|███████▋  | 4391/5701 [1:21:46<21:42,  1.01it/s]

Train loss: 0.11409010738134384


 77%|███████▋  | 4401/5701 [1:21:56<19:58,  1.09it/s]

Train loss: 0.12099186331033707


 77%|███████▋  | 4411/5701 [1:22:07<20:02,  1.07it/s]

Train loss: 0.11434855312108994


 78%|███████▊  | 4421/5701 [1:22:19<22:07,  1.04s/it]

Train loss: 0.12133464962244034


 78%|███████▊  | 4431/5701 [1:22:30<19:42,  1.07it/s]

Train loss: 0.10159560292959213


 78%|███████▊  | 4441/5701 [1:22:41<21:13,  1.01s/it]

Train loss: 0.16888056695461273


 78%|███████▊  | 4451/5701 [1:22:51<18:29,  1.13it/s]

Train loss: 0.14090411365032196


 78%|███████▊  | 4461/5701 [1:23:03<20:05,  1.03it/s]

Train loss: 0.1317163109779358


 78%|███████▊  | 4471/5701 [1:23:14<20:26,  1.00it/s]

Train loss: 0.09867644309997559


 79%|███████▊  | 4481/5701 [1:23:25<19:35,  1.04it/s]

Train loss: 0.13492177426815033


 79%|███████▉  | 4491/5701 [1:23:35<18:00,  1.12it/s]

Train loss: 0.1318797916173935


 79%|███████▉  | 4501/5701 [1:23:45<17:47,  1.12it/s]

Train loss: 0.125595822930336


 79%|███████▉  | 4511/5701 [1:23:56<19:14,  1.03it/s]

Train loss: 0.10708978027105331


 79%|███████▉  | 4521/5701 [1:24:08<19:26,  1.01it/s]

Train loss: 0.1367504596710205


 79%|███████▉  | 4531/5701 [1:24:19<19:11,  1.02it/s]

Train loss: 0.1222444549202919


 80%|███████▉  | 4541/5701 [1:24:30<18:41,  1.03it/s]

Train loss: 0.11758216470479965


 80%|███████▉  | 4551/5701 [1:24:41<18:15,  1.05it/s]

Train loss: 0.14280904829502106


 80%|████████  | 4561/5701 [1:24:53<18:56,  1.00it/s]

Train loss: 0.1370367854833603


 80%|████████  | 4571/5701 [1:25:04<18:07,  1.04it/s]

Train loss: 0.11426140367984772


 80%|████████  | 4581/5701 [1:25:16<19:28,  1.04s/it]

Train loss: 0.10064850002527237


 81%|████████  | 4591/5701 [1:25:26<16:27,  1.12it/s]

Train loss: 0.12253991514444351


 81%|████████  | 4601/5701 [1:25:37<18:27,  1.01s/it]

Train loss: 0.1386771947145462


 81%|████████  | 4611/5701 [1:25:48<16:54,  1.07it/s]

Train loss: 0.1576816290616989


 81%|████████  | 4621/5701 [1:26:00<18:25,  1.02s/it]

Train loss: 0.10140018910169601


 81%|████████  | 4631/5701 [1:26:11<17:27,  1.02it/s]

Train loss: 0.15993033349514008


 81%|████████▏ | 4641/5701 [1:26:21<15:02,  1.17it/s]

Train loss: 0.11017628014087677


 82%|████████▏ | 4651/5701 [1:26:32<17:00,  1.03it/s]

Train loss: 0.15101608633995056


 82%|████████▏ | 4661/5701 [1:26:44<17:22,  1.00s/it]

Train loss: 0.11959481239318848


 82%|████████▏ | 4671/5701 [1:26:55<16:38,  1.03it/s]

Train loss: 0.13621960580348969


 82%|████████▏ | 4681/5701 [1:27:06<16:56,  1.00it/s]

Train loss: 0.11919254064559937


 82%|████████▏ | 4691/5701 [1:27:17<15:55,  1.06it/s]

Train loss: 0.10617361217737198


 82%|████████▏ | 4701/5701 [1:27:28<16:20,  1.02it/s]

Train loss: 0.1117679625749588


 83%|████████▎ | 4711/5701 [1:27:40<16:20,  1.01it/s]

Train loss: 0.1176694855093956


 83%|████████▎ | 4721/5701 [1:27:50<14:34,  1.12it/s]

Train loss: 0.10052478313446045


 83%|████████▎ | 4731/5701 [1:28:01<15:59,  1.01it/s]

Train loss: 0.12040962278842926


 83%|████████▎ | 4741/5701 [1:28:13<15:56,  1.00it/s]

Train loss: 0.11763258278369904


 83%|████████▎ | 4751/5701 [1:28:24<15:37,  1.01it/s]

Train loss: 0.15592515468597412


 84%|████████▎ | 4761/5701 [1:28:35<14:44,  1.06it/s]

Train loss: 0.11784398555755615


 84%|████████▎ | 4771/5701 [1:28:46<14:55,  1.04it/s]

Train loss: 0.1118859052658081


 84%|████████▍ | 4781/5701 [1:28:57<14:29,  1.06it/s]

Train loss: 0.1094161793589592


 84%|████████▍ | 4791/5701 [1:29:09<15:20,  1.01s/it]

Train loss: 0.1435534507036209


 84%|████████▍ | 4801/5701 [1:29:20<14:52,  1.01it/s]

Train loss: 0.10560667514801025


 84%|████████▍ | 4811/5701 [1:29:30<12:49,  1.16it/s]

Train loss: 0.11738172918558121


 85%|████████▍ | 4821/5701 [1:29:41<13:55,  1.05it/s]

Train loss: 0.12153314799070358


 85%|████████▍ | 4831/5701 [1:29:53<15:17,  1.05s/it]

Train loss: 0.10686353594064713


 85%|████████▍ | 4841/5701 [1:30:03<13:13,  1.08it/s]

Train loss: 0.10803834348917007


 85%|████████▌ | 4851/5701 [1:30:15<14:15,  1.01s/it]

Train loss: 0.13735073804855347


 85%|████████▌ | 4861/5701 [1:30:27<13:56,  1.00it/s]

Train loss: 0.12917524576187134


 85%|████████▌ | 4871/5701 [1:30:37<12:45,  1.08it/s]

Train loss: 0.1379852145910263


 86%|████████▌ | 4881/5701 [1:30:49<13:43,  1.00s/it]

Train loss: 0.12937383353710175


 86%|████████▌ | 4891/5701 [1:31:00<13:09,  1.03it/s]

Train loss: 0.15352317690849304


 86%|████████▌ | 4901/5701 [1:31:10<12:27,  1.07it/s]

Train loss: 0.149790957570076


 86%|████████▌ | 4911/5701 [1:31:22<12:53,  1.02it/s]

Train loss: 0.1466086059808731


 86%|████████▋ | 4921/5701 [1:31:33<12:44,  1.02it/s]

Train loss: 0.11383696645498276


 86%|████████▋ | 4931/5701 [1:31:43<11:12,  1.14it/s]

Train loss: 0.13715365529060364


 87%|████████▋ | 4941/5701 [1:31:54<12:05,  1.05it/s]

Train loss: 0.11369539797306061


 87%|████████▋ | 4951/5701 [1:32:05<12:02,  1.04it/s]

Train loss: 0.1300196796655655


 87%|████████▋ | 4961/5701 [1:32:17<12:42,  1.03s/it]

Train loss: 0.13830210268497467


 87%|████████▋ | 4971/5701 [1:32:28<11:33,  1.05it/s]

Train loss: 0.09754276275634766


 87%|████████▋ | 4981/5701 [1:32:39<11:57,  1.00it/s]

Train loss: 0.14555945992469788


 88%|████████▊ | 4991/5701 [1:32:51<11:58,  1.01s/it]

Train loss: 0.11409280449151993


 88%|████████▊ | 5001/5701 [1:33:02<11:01,  1.06it/s]

Train loss: 0.0966077595949173


 88%|████████▊ | 5011/5701 [1:33:13<11:10,  1.03it/s]

Train loss: 0.10155651718378067


 88%|████████▊ | 5021/5701 [1:33:23<10:13,  1.11it/s]

Train loss: 0.09710578620433807


 88%|████████▊ | 5031/5701 [1:33:35<11:19,  1.01s/it]

Train loss: 0.11215726286172867


 88%|████████▊ | 5041/5701 [1:33:47<11:03,  1.01s/it]

Train loss: 0.12443528324365616


 89%|████████▊ | 5051/5701 [1:33:58<10:25,  1.04it/s]

Train loss: 0.12574462592601776


 89%|████████▉ | 5061/5701 [1:34:08<09:59,  1.07it/s]

Train loss: 0.1338665932416916


 89%|████████▉ | 5071/5701 [1:34:19<10:02,  1.04it/s]

Train loss: 0.13850407302379608


 89%|████████▉ | 5081/5701 [1:34:30<10:05,  1.02it/s]

Train loss: 0.11429914087057114


 89%|████████▉ | 5091/5701 [1:34:40<08:59,  1.13it/s]

Train loss: 0.11986712366342545


 89%|████████▉ | 5101/5701 [1:34:52<09:49,  1.02it/s]

Train loss: 0.11766400188207626


 90%|████████▉ | 5111/5701 [1:35:03<09:20,  1.05it/s]

Train loss: 0.11339562386274338


 90%|████████▉ | 5121/5701 [1:35:14<09:15,  1.04it/s]

Train loss: 0.11121769994497299


 90%|█████████ | 5131/5701 [1:35:25<09:41,  1.02s/it]

Train loss: 0.10952526330947876


 90%|█████████ | 5141/5701 [1:35:37<09:12,  1.01it/s]

Train loss: 0.11680176109075546


 90%|█████████ | 5151/5701 [1:35:47<08:26,  1.09it/s]

Train loss: 0.12193375825881958


 91%|█████████ | 5161/5701 [1:35:58<08:27,  1.06it/s]

Train loss: 0.10389373451471329


 91%|█████████ | 5171/5701 [1:36:09<08:44,  1.01it/s]

Train loss: 0.09664007276296616


 91%|█████████ | 5181/5701 [1:36:21<08:44,  1.01s/it]

Train loss: 0.1310318261384964


 91%|█████████ | 5191/5701 [1:36:32<08:15,  1.03it/s]

Train loss: 0.13542315363883972


 91%|█████████ | 5201/5701 [1:36:43<07:59,  1.04it/s]

Train loss: 0.13153545558452606


 91%|█████████▏| 5211/5701 [1:36:54<07:50,  1.04it/s]

Train loss: 0.10533910989761353


 92%|█████████▏| 5221/5701 [1:37:06<07:52,  1.02it/s]

Train loss: 0.12706172466278076


 92%|█████████▏| 5231/5701 [1:37:16<07:22,  1.06it/s]

Train loss: 0.11820628494024277


 92%|█████████▏| 5241/5701 [1:37:27<06:56,  1.10it/s]

Train loss: 0.16574831306934357


 92%|█████████▏| 5251/5701 [1:37:37<06:44,  1.11it/s]

Train loss: 0.10878828912973404


 92%|█████████▏| 5261/5701 [1:37:48<07:00,  1.05it/s]

Train loss: 0.1419324427843094


 92%|█████████▏| 5271/5701 [1:37:58<06:36,  1.08it/s]

Train loss: 0.11077084392309189


 93%|█████████▎| 5281/5701 [1:38:09<06:22,  1.10it/s]

Train loss: 0.1600368469953537


 93%|█████████▎| 5291/5701 [1:38:20<06:30,  1.05it/s]

Train loss: 0.12411113828420639


 93%|█████████▎| 5301/5701 [1:38:30<06:09,  1.08it/s]

Train loss: 0.12360002845525742


 93%|█████████▎| 5311/5701 [1:38:41<06:09,  1.05it/s]

Train loss: 0.13731156289577484


 93%|█████████▎| 5321/5701 [1:38:52<06:02,  1.05it/s]

Train loss: 0.12028681486845016


 94%|█████████▎| 5331/5701 [1:39:02<05:37,  1.09it/s]

Train loss: 0.16138456761837006


 94%|█████████▎| 5341/5701 [1:39:13<05:40,  1.06it/s]

Train loss: 0.09634502232074738


 94%|█████████▍| 5351/5701 [1:39:23<05:06,  1.14it/s]

Train loss: 0.15054330229759216


 94%|█████████▍| 5361/5701 [1:39:33<05:10,  1.10it/s]

Train loss: 0.17063561081886292


 94%|█████████▍| 5371/5701 [1:39:44<05:12,  1.06it/s]

Train loss: 0.11358771473169327


 94%|█████████▍| 5381/5701 [1:39:55<04:58,  1.07it/s]

Train loss: 0.13793297111988068


 95%|█████████▍| 5391/5701 [1:40:05<04:41,  1.10it/s]

Train loss: 0.11802990734577179


 95%|█████████▍| 5401/5701 [1:40:16<04:40,  1.07it/s]

Train loss: 0.11624493449926376


 95%|█████████▍| 5411/5701 [1:40:27<04:36,  1.05it/s]

Train loss: 0.11626594513654709


 95%|█████████▌| 5421/5701 [1:40:37<04:39,  1.00it/s]

Train loss: 0.11482474952936172


 95%|█████████▌| 5431/5701 [1:40:48<05:00,  1.11s/it]

Train loss: 0.12746340036392212


 95%|█████████▌| 5441/5701 [1:40:59<05:23,  1.24s/it]

Train loss: 0.13655096292495728


 96%|█████████▌| 5451/5701 [1:41:11<05:29,  1.32s/it]

Train loss: 0.13618773221969604


 96%|█████████▌| 5461/5701 [1:41:21<05:17,  1.32s/it]

Train loss: 0.1411970853805542


 96%|█████████▌| 5471/5701 [1:41:32<05:19,  1.39s/it]

Train loss: 0.1338997185230255


 96%|█████████▌| 5481/5701 [1:41:43<05:31,  1.51s/it]

Train loss: 0.12333126366138458


 96%|█████████▋| 5491/5701 [1:41:54<05:13,  1.49s/it]

Train loss: 0.15856800973415375


 96%|█████████▋| 5501/5701 [1:42:04<04:45,  1.43s/it]

Train loss: 0.1263372302055359


 97%|█████████▋| 5511/5701 [1:42:15<04:24,  1.39s/it]

Train loss: 0.10579361766576767


 97%|█████████▋| 5521/5701 [1:42:26<04:01,  1.34s/it]

Train loss: 0.16492204368114471


 97%|█████████▋| 5531/5701 [1:42:37<03:41,  1.30s/it]

Train loss: 0.11577055603265762


 97%|█████████▋| 5541/5701 [1:42:47<03:26,  1.29s/it]

Train loss: 0.11083421856164932


 97%|█████████▋| 5551/5701 [1:42:59<03:22,  1.35s/it]

Train loss: 0.09315500408411026


 98%|█████████▊| 5561/5701 [1:43:09<03:07,  1.34s/it]

Train loss: 0.15009601414203644


 98%|█████████▊| 5571/5701 [1:43:20<02:51,  1.32s/it]

Train loss: 0.13622558116912842


 98%|█████████▊| 5581/5701 [1:43:32<02:54,  1.46s/it]

Train loss: 0.1397683322429657


 98%|█████████▊| 5591/5701 [1:43:42<02:17,  1.25s/it]

Train loss: 0.12444664537906647


 98%|█████████▊| 5601/5701 [1:43:52<01:53,  1.13s/it]

Train loss: 0.1466331034898758


 98%|█████████▊| 5611/5701 [1:44:04<01:56,  1.29s/it]

Train loss: 0.11327653378248215


 99%|█████████▊| 5621/5701 [1:44:14<01:37,  1.21s/it]

Train loss: 0.1364467591047287


 99%|█████████▉| 5631/5701 [1:44:26<01:41,  1.45s/it]

Train loss: 0.13444189727306366


 99%|█████████▉| 5641/5701 [1:44:37<01:29,  1.49s/it]

Train loss: 0.12839441001415253


 99%|█████████▉| 5651/5701 [1:44:48<01:10,  1.41s/it]

Train loss: 0.13367849588394165


 99%|█████████▉| 5661/5701 [1:44:59<01:00,  1.50s/it]

Train loss: 0.10898315906524658


 99%|█████████▉| 5671/5701 [1:45:10<00:48,  1.63s/it]

Train loss: 0.14052556455135345


100%|█████████▉| 5681/5701 [1:45:21<00:32,  1.60s/it]

Train loss: 0.13033659756183624


100%|█████████▉| 5691/5701 [1:45:28<00:07,  1.39it/s]

Train loss: 0.09789690375328064


100%|██████████| 5701/5701 [1:45:34<00:00,  1.11s/it]


Train loss: 0.11179689317941666


Validation: 100%|██████████| 41/41 [00:53<00:00,  1.29s/it]


Validation loss = [tensor(0.2578, device='cuda:0', grad_fn=<DivBackward0>), tensor(0.2334, device='cuda:0', grad_fn=<DivBackward0>)]
Epoch 4


  0%|          | 1/5701 [00:12<19:38:28, 12.40s/it]

Train loss: 0.008394621312618256


  0%|          | 11/5701 [00:22<3:22:07,  2.13s/it]

Train loss: 0.13275060057640076


  0%|          | 21/5701 [00:33<2:58:27,  1.89s/it]

Train loss: 0.14409314095973969


  1%|          | 31/5701 [00:45<3:13:58,  2.05s/it]

Train loss: 0.11273559182882309


  1%|          | 41/5701 [00:55<2:57:40,  1.88s/it]

Train loss: 0.1407780796289444


  1%|          | 51/5701 [01:06<2:48:40,  1.79s/it]

Train loss: 0.13412201404571533


  1%|          | 61/5701 [01:17<2:44:32,  1.75s/it]

Train loss: 0.12458920478820801


  1%|          | 71/5701 [01:28<2:42:59,  1.74s/it]

Train loss: 0.10294608026742935


  1%|▏         | 81/5701 [01:38<2:16:15,  1.45s/it]

Train loss: 0.12922947108745575


  2%|▏         | 91/5701 [01:48<2:15:10,  1.45s/it]

Train loss: 0.11130813509225845


  2%|▏         | 101/5701 [02:00<2:09:43,  1.39s/it]

Train loss: 0.12743639945983887


  2%|▏         | 111/5701 [02:10<1:54:38,  1.23s/it]

Train loss: 0.10662876814603806


  2%|▏         | 121/5701 [02:21<1:42:35,  1.10s/it]

Train loss: 0.12489380687475204


  2%|▏         | 131/5701 [02:32<1:24:02,  1.10it/s]

Train loss: 0.12298973649740219


  2%|▏         | 141/5701 [02:42<1:00:37,  1.53it/s]

Train loss: 0.12537702918052673


  3%|▎         | 151/5701 [02:53<1:02:14,  1.49it/s]

Train loss: 0.11526329815387726


  3%|▎         | 161/5701 [03:04<1:01:46,  1.49it/s]

Train loss: 0.11165564507246017


  3%|▎         | 171/5701 [03:16<1:03:21,  1.45it/s]

Train loss: 0.13110965490341187


  3%|▎         | 181/5701 [03:27<1:03:59,  1.44it/s]

Train loss: 0.1248735785484314


  3%|▎         | 191/5701 [03:38<1:10:30,  1.30it/s]

Train loss: 0.11368068307638168


  4%|▎         | 201/5701 [03:49<1:30:14,  1.02it/s]

Train loss: 0.13488970696926117


  4%|▎         | 211/5701 [03:59<1:11:47,  1.27it/s]

Train loss: 0.1368699073791504


  4%|▍         | 221/5701 [04:11<1:04:44,  1.41it/s]

Train loss: 0.1306184083223343


  4%|▍         | 231/5701 [04:21<1:04:43,  1.41it/s]

Train loss: 0.1134641095995903


  4%|▍         | 241/5701 [04:32<1:02:14,  1.46it/s]

Train loss: 0.11958346515893936


  4%|▍         | 251/5701 [04:43<1:04:08,  1.42it/s]

Train loss: 0.1454734206199646


  5%|▍         | 261/5701 [04:54<1:04:55,  1.40it/s]

Train loss: 0.13039563596248627


  5%|▍         | 271/5701 [05:05<1:14:28,  1.22it/s]

Train loss: 0.11191929876804352


  5%|▍         | 281/5701 [05:16<1:18:10,  1.16it/s]

Train loss: 0.13721832633018494


  5%|▌         | 291/5701 [05:27<1:28:39,  1.02it/s]

Train loss: 0.1227993369102478


  5%|▌         | 301/5701 [05:38<1:37:52,  1.09s/it]

Train loss: 0.11602994054555893


  5%|▌         | 311/5701 [05:50<2:12:13,  1.47s/it]

Train loss: 0.1081480011343956


  6%|▌         | 321/5701 [06:00<1:14:54,  1.20it/s]

Train loss: 0.12394329160451889


  6%|▌         | 331/5701 [06:10<55:47,  1.60it/s]  

Train loss: 0.10412926971912384


  6%|▌         | 341/5701 [06:22<56:37,  1.58it/s]  

Train loss: 0.11660595238208771


  6%|▌         | 351/5701 [06:33<56:03,  1.59it/s]  

Train loss: 0.1455349177122116


  6%|▋         | 361/5701 [06:44<56:06,  1.59it/s]  

Train loss: 0.13122235238552094


  7%|▋         | 371/5701 [06:56<56:11,  1.58it/s]  

Train loss: 0.11620968580245972


  7%|▋         | 381/5701 [07:07<55:09,  1.61it/s]  

Train loss: 0.12388070672750473


  7%|▋         | 391/5701 [07:18<55:42,  1.59it/s]  

Train loss: 0.0979161262512207


  7%|▋         | 401/5701 [07:29<55:09,  1.60it/s]  

Train loss: 0.1028609424829483


  7%|▋         | 411/5701 [07:40<55:17,  1.59it/s]  

Train loss: 0.1128728911280632


  7%|▋         | 421/5701 [07:52<55:16,  1.59it/s]  

Train loss: 0.12631382048130035


  8%|▊         | 431/5701 [08:03<54:48,  1.60it/s]  

Train loss: 0.1385277658700943


  8%|▊         | 441/5701 [08:13<54:33,  1.61it/s]  

Train loss: 0.15459807217121124


  8%|▊         | 451/5701 [08:25<55:28,  1.58it/s]  

Train loss: 0.0877070501446724


  8%|▊         | 461/5701 [08:36<54:13,  1.61it/s]  

Train loss: 0.11026179045438766


  8%|▊         | 471/5701 [08:47<54:53,  1.59it/s]  

Train loss: 0.15263564884662628


  8%|▊         | 481/5701 [08:58<54:12,  1.60it/s]  

Train loss: 0.11942508071660995


  9%|▊         | 491/5701 [09:09<53:52,  1.61it/s]  

Train loss: 0.12472911179065704


  9%|▉         | 501/5701 [09:19<53:55,  1.61it/s]  

Train loss: 0.1250455528497696


  9%|▉         | 511/5701 [09:31<54:15,  1.59it/s]  

Train loss: 0.1257239431142807


  9%|▉         | 521/5701 [09:41<53:27,  1.62it/s]  

Train loss: 0.1349073201417923


  9%|▉         | 531/5701 [09:53<54:34,  1.58it/s]  

Train loss: 0.14433349668979645


  9%|▉         | 541/5701 [10:03<52:52,  1.63it/s]  

Train loss: 0.11816094070672989


 10%|▉         | 551/5701 [10:14<53:59,  1.59it/s]  

Train loss: 0.15478911995887756


 10%|▉         | 561/5701 [10:25<53:37,  1.60it/s]  

Train loss: 0.13034825026988983


 10%|█         | 571/5701 [10:36<53:30,  1.60it/s]  

Train loss: 0.11276107281446457


 10%|█         | 581/5701 [10:48<54:05,  1.58it/s]  

Train loss: 0.12554973363876343


 10%|█         | 591/5701 [10:58<51:58,  1.64it/s]  

Train loss: 0.10123894363641739


 11%|█         | 601/5701 [11:09<53:27,  1.59it/s]  

Train loss: 0.13320611417293549


 11%|█         | 611/5701 [11:21<54:14,  1.56it/s]  

Train loss: 0.11707606166601181


 11%|█         | 621/5701 [11:31<52:22,  1.62it/s]  

Train loss: 0.11806467920541763


 11%|█         | 631/5701 [11:43<53:38,  1.58it/s]  

Train loss: 0.12622784078121185


 11%|█         | 641/5701 [11:53<51:42,  1.63it/s]  

Train loss: 0.10528018325567245


 11%|█▏        | 651/5701 [12:05<52:39,  1.60it/s]  

Train loss: 0.14446963369846344


 12%|█▏        | 661/5701 [12:16<53:01,  1.58it/s]  

Train loss: 0.11459290236234665


 12%|█▏        | 671/5701 [12:27<52:01,  1.61it/s]  

Train loss: 0.10728088766336441


 12%|█▏        | 681/5701 [12:37<52:00,  1.61it/s]  

Train loss: 0.1544433832168579


 12%|█▏        | 691/5701 [12:49<52:42,  1.58it/s]  

Train loss: 0.11136467754840851


 12%|█▏        | 701/5701 [13:00<51:54,  1.61it/s]  

Train loss: 0.10473278909921646


 12%|█▏        | 711/5701 [13:11<51:58,  1.60it/s]  

Train loss: 0.11140074580907822


 13%|█▎        | 721/5701 [13:22<51:35,  1.61it/s]  

Train loss: 0.1458900421857834


 13%|█▎        | 731/5701 [13:33<52:10,  1.59it/s]  

Train loss: 0.12770330905914307


 13%|█▎        | 741/5701 [13:44<51:45,  1.60it/s]  

Train loss: 0.14588412642478943


 13%|█▎        | 751/5701 [13:56<52:12,  1.58it/s]  

Train loss: 0.1374439001083374


 13%|█▎        | 761/5701 [14:07<50:52,  1.62it/s]  

Train loss: 0.10105271637439728


 14%|█▎        | 771/5701 [14:18<50:58,  1.61it/s]  

Train loss: 0.12015976011753082


 14%|█▎        | 781/5701 [14:30<52:13,  1.57it/s]  

Train loss: 0.10907286405563354


 14%|█▍        | 791/5701 [14:39<49:31,  1.65it/s]  

Train loss: 0.11895880848169327


 14%|█▍        | 801/5701 [14:51<51:57,  1.57it/s]  

Train loss: 0.12457305192947388


 14%|█▍        | 811/5701 [15:02<51:39,  1.58it/s]  

Train loss: 0.1153697520494461


 14%|█▍        | 821/5701 [15:12<50:16,  1.62it/s]  

Train loss: 0.12160559743642807


 15%|█▍        | 831/5701 [15:25<51:47,  1.57it/s]  

Train loss: 0.12844066321849823


 15%|█▍        | 841/5701 [15:35<50:06,  1.62it/s]  

Train loss: 0.12415208667516708


 15%|█▍        | 851/5701 [15:45<52:10,  1.55it/s]  

Train loss: 0.13947762548923492


 15%|█▌        | 861/5701 [15:57<1:06:01,  1.22it/s]

Train loss: 0.1459176242351532


 15%|█▌        | 871/5701 [16:08<1:04:36,  1.25it/s]

Train loss: 0.11846832185983658


 15%|█▌        | 881/5701 [16:20<1:08:13,  1.18it/s]

Train loss: 0.09389537572860718


 16%|█▌        | 891/5701 [16:32<1:19:08,  1.01it/s]

Train loss: 0.09076123684644699


 16%|█▌        | 901/5701 [16:43<1:21:54,  1.02s/it]

Train loss: 0.11838608235120773


 16%|█▌        | 911/5701 [16:54<1:29:13,  1.12s/it]

Train loss: 0.11140068620443344


 16%|█▌        | 921/5701 [17:04<1:08:11,  1.17it/s]

Train loss: 0.12754347920417786


 16%|█▋        | 931/5701 [17:16<1:17:12,  1.03it/s]

Train loss: 0.12007099390029907


 17%|█▋        | 941/5701 [17:25<49:29,  1.60it/s]  

Train loss: 0.1367707997560501


 17%|█▋        | 951/5701 [17:37<1:11:42,  1.10it/s]

Train loss: 0.10894577950239182


 17%|█▋        | 961/5701 [17:48<1:09:15,  1.14it/s]

Train loss: 0.1428331583738327


 17%|█▋        | 971/5701 [18:00<1:14:17,  1.06it/s]

Train loss: 0.14332406222820282


 17%|█▋        | 981/5701 [18:11<1:16:00,  1.03it/s]

Train loss: 0.13248009979724884


 17%|█▋        | 991/5701 [18:22<1:15:14,  1.04it/s]

Train loss: 0.10980997234582901


 18%|█▊        | 1001/5701 [18:33<1:20:16,  1.02s/it]

Train loss: 0.12971056997776031


 18%|█▊        | 1011/5701 [18:44<1:24:26,  1.08s/it]

Train loss: 0.11038476228713989


 18%|█▊        | 1021/5701 [18:54<1:17:38,  1.00it/s]

Train loss: 0.13543762266635895


 18%|█▊        | 1031/5701 [19:05<1:14:59,  1.04it/s]

Train loss: 0.14161285758018494


 18%|█▊        | 1041/5701 [19:16<1:08:44,  1.13it/s]

Train loss: 0.08381398767232895


 18%|█▊        | 1051/5701 [19:27<1:02:09,  1.25it/s]

Train loss: 0.13264918327331543


 19%|█▊        | 1061/5701 [19:38<1:00:39,  1.27it/s]

Train loss: 0.1253618597984314


 19%|█▉        | 1071/5701 [19:49<55:00,  1.40it/s]  

Train loss: 0.137682244181633


 19%|█▉        | 1081/5701 [19:59<48:15,  1.60it/s]  

Train loss: 0.12769368290901184


 19%|█▉        | 1091/5701 [20:11<48:11,  1.59it/s]  

Train loss: 0.1240530014038086


 19%|█▉        | 1101/5701 [20:22<47:55,  1.60it/s]  

Train loss: 0.14382223784923553


 19%|█▉        | 1111/5701 [20:33<48:23,  1.58it/s]  

Train loss: 0.10435593128204346


 20%|█▉        | 1121/5701 [20:43<46:39,  1.64it/s]  

Train loss: 0.13544602692127228


 20%|█▉        | 1131/5701 [20:55<48:00,  1.59it/s]  

Train loss: 0.1412491798400879


 20%|██        | 1141/5701 [21:06<47:28,  1.60it/s]  

Train loss: 0.1295529007911682


 20%|██        | 1151/5701 [21:16<46:44,  1.62it/s]  

Train loss: 0.09699614346027374


 20%|██        | 1161/5701 [21:28<48:43,  1.55it/s]  

Train loss: 0.12171737104654312


 21%|██        | 1171/5701 [21:39<47:00,  1.61it/s]  

Train loss: 0.12858544290065765


 21%|██        | 1181/5701 [21:50<47:20,  1.59it/s]  

Train loss: 0.10709909349679947


 21%|██        | 1191/5701 [22:01<47:35,  1.58it/s]  

Train loss: 0.10880129784345627


 21%|██        | 1201/5701 [22:12<46:50,  1.60it/s]  

Train loss: 0.129048153758049


 21%|██        | 1211/5701 [22:24<47:32,  1.57it/s]  

Train loss: 0.16245023906230927


 21%|██▏       | 1221/5701 [22:34<46:14,  1.61it/s]  

Train loss: 0.11115814745426178


 22%|██▏       | 1231/5701 [22:45<46:20,  1.61it/s]  

Train loss: 0.14588718116283417


 22%|██▏       | 1241/5701 [22:56<46:33,  1.60it/s]  

Train loss: 0.11564315855503082


 22%|██▏       | 1251/5701 [23:08<46:52,  1.58it/s]  

Train loss: 0.13292260468006134


 22%|██▏       | 1261/5701 [23:19<46:04,  1.61it/s]  

Train loss: 0.11849895864725113


 22%|██▏       | 1271/5701 [23:30<46:35,  1.58it/s]  

Train loss: 0.08846592158079147


 22%|██▏       | 1281/5701 [23:40<45:38,  1.61it/s]  

Train loss: 0.1348157674074173


 23%|██▎       | 1291/5701 [23:51<45:35,  1.61it/s]  

Train loss: 0.1413162499666214


 23%|██▎       | 1301/5701 [24:03<46:25,  1.58it/s]  

Train loss: 0.13870815932750702


 23%|██▎       | 1311/5701 [24:14<45:51,  1.60it/s]  

Train loss: 0.10906477272510529


 23%|██▎       | 1321/5701 [24:25<45:06,  1.62it/s]  

Train loss: 0.14529842138290405


 23%|██▎       | 1331/5701 [24:37<46:12,  1.58it/s]  

Train loss: 0.13008932769298553


 24%|██▎       | 1341/5701 [24:47<44:58,  1.62it/s]  

Train loss: 0.14301948249340057


 24%|██▎       | 1351/5701 [24:58<45:54,  1.58it/s]  

Train loss: 0.0961218997836113


 24%|██▍       | 1361/5701 [25:10<45:27,  1.59it/s]  

Train loss: 0.11658845096826553


 24%|██▍       | 1371/5701 [25:20<44:36,  1.62it/s]  

Train loss: 0.10266444832086563


 24%|██▍       | 1381/5701 [25:32<45:59,  1.57it/s]  

Train loss: 0.1285688430070877


 24%|██▍       | 1391/5701 [25:42<43:38,  1.65it/s]  

Train loss: 0.13183057308197021


 25%|██▍       | 1401/5701 [25:54<45:44,  1.57it/s]  

Train loss: 0.13122016191482544


 25%|██▍       | 1411/5701 [26:05<45:45,  1.56it/s]  

Train loss: 0.16112034022808075


 25%|██▍       | 1421/5701 [26:16<44:18,  1.61it/s]  

Train loss: 0.13235899806022644


 25%|██▌       | 1431/5701 [26:28<44:47,  1.59it/s]  

Train loss: 0.10799584537744522


 25%|██▌       | 1441/5701 [26:38<44:24,  1.60it/s]  

Train loss: 0.106702521443367


 25%|██▌       | 1451/5701 [26:50<45:23,  1.56it/s]  

Train loss: 0.10802289098501205


 26%|██▌       | 1461/5701 [27:00<43:22,  1.63it/s]  

Train loss: 0.1278366893529892


 26%|██▌       | 1471/5701 [27:12<44:15,  1.59it/s]  

Train loss: 0.12246034294366837


 26%|██▌       | 1481/5701 [27:23<44:21,  1.59it/s]  

Train loss: 0.13500352203845978


 26%|██▌       | 1491/5701 [27:35<44:10,  1.59it/s]  

Train loss: 0.11851119250059128


 26%|██▋       | 1501/5701 [27:45<43:16,  1.62it/s]  

Train loss: 0.10778673738241196


 27%|██▋       | 1511/5701 [27:56<43:31,  1.60it/s]  

Train loss: 0.14413979649543762


 27%|██▋       | 1521/5701 [28:07<43:37,  1.60it/s]  

Train loss: 0.13403765857219696


 27%|██▋       | 1531/5701 [28:19<43:51,  1.58it/s]  

Train loss: 0.07782415300607681


 27%|██▋       | 1541/5701 [28:29<42:33,  1.63it/s]  

Train loss: 0.14353038370609283


 27%|██▋       | 1551/5701 [28:41<43:39,  1.58it/s]  

Train loss: 0.11486741155385971


 27%|██▋       | 1561/5701 [28:53<43:48,  1.57it/s]  

Train loss: 0.11169026046991348


 28%|██▊       | 1571/5701 [29:03<42:28,  1.62it/s]  

Train loss: 0.14044710993766785


 28%|██▊       | 1581/5701 [29:14<42:29,  1.62it/s]  

Train loss: 0.1303868442773819


 28%|██▊       | 1591/5701 [29:25<42:53,  1.60it/s]  

Train loss: 0.14053747057914734


 28%|██▊       | 1601/5701 [29:36<43:10,  1.58it/s]  

Train loss: 0.12646359205245972


 28%|██▊       | 1611/5701 [29:47<41:50,  1.63it/s]  

Train loss: 0.1366734355688095


 28%|██▊       | 1621/5701 [29:58<42:48,  1.59it/s]  

Train loss: 0.12779144942760468


 29%|██▊       | 1631/5701 [30:09<42:17,  1.60it/s]  

Train loss: 0.11266543716192245


 29%|██▉       | 1641/5701 [30:20<46:10,  1.47it/s]  

Train loss: 0.11692805588245392


 29%|██▉       | 1651/5701 [30:32<42:26,  1.59it/s]  

Train loss: 0.13261662423610687


 29%|██▉       | 1661/5701 [30:43<42:16,  1.59it/s]  

Train loss: 0.1122921034693718


 29%|██▉       | 1671/5701 [30:53<41:11,  1.63it/s]  

Train loss: 0.12049444764852524


 29%|██▉       | 1681/5701 [31:04<41:45,  1.60it/s]  

Train loss: 0.15107876062393188


 30%|██▉       | 1691/5701 [31:15<42:11,  1.58it/s]  

Train loss: 0.11879129707813263


 30%|██▉       | 1701/5701 [31:27<42:25,  1.57it/s]  

Train loss: 0.1096704974770546


 30%|███       | 1711/5701 [31:38<41:41,  1.59it/s]  

Train loss: 0.11814475059509277


 30%|███       | 1721/5701 [31:50<41:49,  1.59it/s]  

Train loss: 0.12515889108181


 30%|███       | 1731/5701 [32:00<41:00,  1.61it/s]  

Train loss: 0.11573021858930588


 31%|███       | 1741/5701 [32:11<41:12,  1.60it/s]  

Train loss: 0.14306877553462982


 31%|███       | 1751/5701 [32:22<41:28,  1.59it/s]  

Train loss: 0.13282214105129242


 31%|███       | 1761/5701 [32:34<41:40,  1.58it/s]  

Train loss: 0.10392507165670395


 31%|███       | 1771/5701 [32:45<40:46,  1.61it/s]  

Train loss: 0.11881434172391891


 31%|███       | 1781/5701 [32:56<40:54,  1.60it/s]  

Train loss: 0.1259087175130844


 31%|███▏      | 1791/5701 [33:07<40:06,  1.62it/s]  

Train loss: 0.11820148676633835


 32%|███▏      | 1801/5701 [33:18<40:43,  1.60it/s]  

Train loss: 0.12650631368160248


 32%|███▏      | 1811/5701 [33:28<39:57,  1.62it/s]  

Train loss: 0.12785351276397705


 32%|███▏      | 1821/5701 [33:39<40:47,  1.59it/s]  

Train loss: 0.13546141982078552


 32%|███▏      | 1831/5701 [33:51<40:22,  1.60it/s]  

Train loss: 0.15582239627838135


 32%|███▏      | 1841/5701 [34:01<40:09,  1.60it/s]  

Train loss: 0.15896284580230713


 32%|███▏      | 1851/5701 [34:13<40:31,  1.58it/s]  

Train loss: 0.09778868407011032


 33%|███▎      | 1861/5701 [34:23<39:30,  1.62it/s]  

Train loss: 0.08694805204868317


 33%|███▎      | 1871/5701 [34:35<40:18,  1.58it/s]  

Train loss: 0.11121954768896103


 33%|███▎      | 1881/5701 [34:46<39:40,  1.60it/s]  

Train loss: 0.13359324634075165


 33%|███▎      | 1891/5701 [34:57<39:59,  1.59it/s]  

Train loss: 0.1372007429599762


 33%|███▎      | 1901/5701 [35:08<39:37,  1.60it/s]  

Train loss: 0.1434510201215744


 34%|███▎      | 1911/5701 [35:18<38:36,  1.64it/s]  

Train loss: 0.11729313433170319


 34%|███▎      | 1921/5701 [35:29<39:08,  1.61it/s]  

Train loss: 0.10868816822767258


 34%|███▍      | 1931/5701 [35:40<39:29,  1.59it/s]  

Train loss: 0.12271877378225327


 34%|███▍      | 1941/5701 [35:51<39:15,  1.60it/s]  

Train loss: 0.14403381943702698


 34%|███▍      | 1951/5701 [36:02<38:38,  1.62it/s]  

Train loss: 0.12274674326181412


 34%|███▍      | 1961/5701 [36:14<39:24,  1.58it/s]  

Train loss: 0.13237105309963226


 35%|███▍      | 1971/5701 [36:25<39:10,  1.59it/s]  

Train loss: 0.10535676777362823


 35%|███▍      | 1981/5701 [36:36<38:48,  1.60it/s]  

Train loss: 0.138047456741333


 35%|███▍      | 1991/5701 [36:47<38:45,  1.60it/s]  

Train loss: 0.11082763969898224


 35%|███▌      | 2001/5701 [36:59<38:41,  1.59it/s]  

Train loss: 0.1226431280374527


 35%|███▌      | 2011/5701 [37:10<38:52,  1.58it/s]  

Train loss: 0.12365100532770157


 35%|███▌      | 2021/5701 [37:20<38:02,  1.61it/s]  

Train loss: 0.11483361572027206


 36%|███▌      | 2031/5701 [37:32<38:52,  1.57it/s]  

Train loss: 0.12145723402500153


 36%|███▌      | 2041/5701 [37:44<38:22,  1.59it/s]  

Train loss: 0.13485337793827057


 36%|███▌      | 2051/5701 [37:55<38:14,  1.59it/s]  

Train loss: 0.11552728712558746


 36%|███▌      | 2061/5701 [38:05<37:19,  1.63it/s]  

Train loss: 0.1456991732120514


 36%|███▋      | 2071/5701 [38:16<37:29,  1.61it/s]  

Train loss: 0.10601291805505753


 37%|███▋      | 2081/5701 [38:27<38:14,  1.58it/s]  

Train loss: 0.09608713537454605


 37%|███▋      | 2091/5701 [38:38<37:29,  1.61it/s]  

Train loss: 0.1174103394150734


 37%|███▋      | 2101/5701 [38:49<37:13,  1.61it/s]  

Train loss: 0.12772300839424133


 37%|███▋      | 2111/5701 [39:01<37:45,  1.58it/s]  

Train loss: 0.1170281171798706


 37%|███▋      | 2121/5701 [39:10<35:55,  1.66it/s]  

Train loss: 0.1099616214632988


 37%|███▋      | 2131/5701 [39:22<37:40,  1.58it/s]  

Train loss: 0.13435962796211243


 38%|███▊      | 2141/5701 [39:33<37:29,  1.58it/s]  

Train loss: 0.1039053201675415


 38%|███▊      | 2151/5701 [39:45<37:45,  1.57it/s]  

Train loss: 0.1208488866686821


 38%|███▊      | 2161/5701 [39:56<36:46,  1.60it/s]  

Train loss: 0.10181563347578049


 38%|███▊      | 2171/5701 [40:07<36:35,  1.61it/s]  

Train loss: 0.11799557507038116


 38%|███▊      | 2181/5701 [40:18<37:12,  1.58it/s]  

Train loss: 0.1215267926454544


 38%|███▊      | 2191/5701 [40:30<36:44,  1.59it/s]  

Train loss: 0.1064927950501442


 39%|███▊      | 2201/5701 [40:40<35:48,  1.63it/s]  

Train loss: 0.12196780741214752


 39%|███▉      | 2211/5701 [40:51<36:26,  1.60it/s]  

Train loss: 0.12875598669052124


 39%|███▉      | 2221/5701 [41:02<36:19,  1.60it/s]  

Train loss: 0.12120957672595978


 39%|███▉      | 2231/5701 [41:13<36:00,  1.61it/s]  

Train loss: 0.1231854185461998


 39%|███▉      | 2241/5701 [41:24<36:04,  1.60it/s]  

Train loss: 0.09622746706008911


 39%|███▉      | 2251/5701 [41:36<36:05,  1.59it/s]  

Train loss: 0.13175968825817108


 40%|███▉      | 2261/5701 [41:46<35:05,  1.63it/s]  

Train loss: 0.1570938676595688


 40%|███▉      | 2271/5701 [41:57<35:43,  1.60it/s]  

Train loss: 0.11254262924194336


 40%|████      | 2281/5701 [42:09<35:53,  1.59it/s]  

Train loss: 0.09985478222370148


 40%|████      | 2291/5701 [42:20<35:42,  1.59it/s]  

Train loss: 0.11636823415756226


 40%|████      | 2301/5701 [42:31<35:11,  1.61it/s]  

Train loss: 0.1336953490972519


 41%|████      | 2311/5701 [42:41<35:00,  1.61it/s]  

Train loss: 0.11210417747497559


 41%|████      | 2321/5701 [42:52<35:20,  1.59it/s]  

Train loss: 0.1215331181883812


 41%|████      | 2331/5701 [43:03<35:15,  1.59it/s]  

Train loss: 0.11940466612577438


 41%|████      | 2341/5701 [43:14<35:03,  1.60it/s]  

Train loss: 0.12907974421977997


 41%|████      | 2351/5701 [43:26<35:02,  1.59it/s]  

Train loss: 0.16605249047279358


 41%|████▏     | 2361/5701 [43:37<34:56,  1.59it/s]  

Train loss: 0.12392275780439377


 42%|████▏     | 2371/5701 [43:48<34:39,  1.60it/s]  

Train loss: 0.10854444652795792


 42%|████▏     | 2381/5701 [43:59<34:51,  1.59it/s]  

Train loss: 0.1611499786376953


 42%|████▏     | 2391/5701 [44:11<34:59,  1.58it/s]  

Train loss: 0.1209070011973381


 42%|████▏     | 2401/5701 [44:22<34:05,  1.61it/s]  

Train loss: 0.0858599916100502


 42%|████▏     | 2411/5701 [44:33<34:14,  1.60it/s]  

Train loss: 0.13252636790275574


 42%|████▏     | 2421/5701 [44:44<34:20,  1.59it/s]  

Train loss: 0.12671802937984467


 43%|████▎     | 2431/5701 [44:55<34:33,  1.58it/s]  

Train loss: 0.10643073171377182


 43%|████▎     | 2441/5701 [45:06<33:48,  1.61it/s]  

Train loss: 0.10643016546964645


 43%|████▎     | 2451/5701 [45:16<32:57,  1.64it/s]  

Train loss: 0.09459997713565826


 43%|████▎     | 2461/5701 [45:27<33:48,  1.60it/s]  

Train loss: 0.1139996200799942


 43%|████▎     | 2471/5701 [45:39<34:00,  1.58it/s]  

Train loss: 0.13086238503456116


 44%|████▎     | 2481/5701 [45:50<33:43,  1.59it/s]  

Train loss: 0.13904276490211487


 44%|████▎     | 2491/5701 [46:02<34:01,  1.57it/s]  

Train loss: 0.15219905972480774


 44%|████▍     | 2501/5701 [46:12<33:07,  1.61it/s]  

Train loss: 0.1506623476743698


 44%|████▍     | 2511/5701 [46:23<32:54,  1.62it/s]  

Train loss: 0.12715290486812592


 44%|████▍     | 2521/5701 [46:35<33:31,  1.58it/s]  

Train loss: 0.12302255630493164


 44%|████▍     | 2531/5701 [46:46<32:39,  1.62it/s]  

Train loss: 0.12888161838054657


 45%|████▍     | 2541/5701 [46:57<32:59,  1.60it/s]  

Train loss: 0.13615694642066956


 45%|████▍     | 2551/5701 [47:08<32:58,  1.59it/s]  

Train loss: 0.0964633896946907


 45%|████▍     | 2561/5701 [47:17<31:45,  1.65it/s]  

Train loss: 0.11288405954837799


 45%|████▌     | 2571/5701 [47:29<32:52,  1.59it/s]  

Train loss: 0.12457124143838882


 45%|████▌     | 2581/5701 [47:40<32:58,  1.58it/s]  

Train loss: 0.12222003936767578


 45%|████▌     | 2591/5701 [47:51<32:04,  1.62it/s]  

Train loss: 0.1422237604856491


 46%|████▌     | 2601/5701 [48:02<32:08,  1.61it/s]  

Train loss: 0.10591024160385132


 46%|████▌     | 2611/5701 [48:13<32:25,  1.59it/s]  

Train loss: 0.1292242854833603


 46%|████▌     | 2621/5701 [48:25<32:27,  1.58it/s]  

Train loss: 0.10709591209888458


 46%|████▌     | 2631/5701 [48:36<32:15,  1.59it/s]  

Train loss: 0.11451653391122818


 46%|████▋     | 2641/5701 [48:47<31:43,  1.61it/s]  

Train loss: 0.10075324028730392


 47%|████▋     | 2651/5701 [48:59<31:52,  1.59it/s]  

Train loss: 0.10636498034000397


 47%|████▋     | 2661/5701 [49:09<31:18,  1.62it/s]  

Train loss: 0.13392619788646698


 47%|████▋     | 2671/5701 [49:20<31:41,  1.59it/s]  

Train loss: 0.1557123064994812


 47%|████▋     | 2681/5701 [49:31<31:49,  1.58it/s]  

Train loss: 0.11206071823835373


 47%|████▋     | 2691/5701 [49:42<31:19,  1.60it/s]  

Train loss: 0.12313751131296158


 47%|████▋     | 2701/5701 [49:54<31:40,  1.58it/s]  

Train loss: 0.12954024970531464


 48%|████▊     | 2711/5701 [50:05<31:14,  1.59it/s]  

Train loss: 0.13702940940856934


 48%|████▊     | 2721/5701 [50:16<31:01,  1.60it/s]  

Train loss: 0.13311104476451874


 48%|████▊     | 2731/5701 [50:27<31:08,  1.59it/s]  

Train loss: 0.12557341158390045


 48%|████▊     | 2741/5701 [50:38<30:39,  1.61it/s]  

Train loss: 0.13763004541397095


 48%|████▊     | 2751/5701 [50:50<31:17,  1.57it/s]  

Train loss: 0.13967959582805634


 48%|████▊     | 2761/5701 [51:01<30:34,  1.60it/s]  

Train loss: 0.1312887966632843


 49%|████▊     | 2771/5701 [51:11<29:48,  1.64it/s]  

Train loss: 0.12710486352443695


 49%|████▉     | 2781/5701 [51:22<30:11,  1.61it/s]  

Train loss: 0.09981381148099899


 49%|████▉     | 2791/5701 [51:33<30:20,  1.60it/s]  

Train loss: 0.10800271481275558


 49%|████▉     | 2801/5701 [51:44<30:19,  1.59it/s]  

Train loss: 0.12826094031333923


 49%|████▉     | 2811/5701 [51:55<29:57,  1.61it/s]  

Train loss: 0.11159958690404892


 49%|████▉     | 2821/5701 [52:07<31:03,  1.55it/s]  

Train loss: 0.1145833358168602


 50%|████▉     | 2831/5701 [52:18<29:58,  1.60it/s]  

Train loss: 0.14793160557746887


 50%|████▉     | 2841/5701 [52:29<30:01,  1.59it/s]  

Train loss: 0.1375160664319992


 50%|█████     | 2851/5701 [52:39<29:11,  1.63it/s]  

Train loss: 0.14060305058956146


 50%|█████     | 2861/5701 [52:51<29:41,  1.59it/s]  

Train loss: 0.13702958822250366


 50%|█████     | 2871/5701 [53:02<29:43,  1.59it/s]  

Train loss: 0.14443467557430267


 51%|█████     | 2881/5701 [53:13<29:28,  1.59it/s]  

Train loss: 0.10326077044010162


 51%|█████     | 2891/5701 [53:24<29:21,  1.60it/s]  

Train loss: 0.14726610481739044


 51%|█████     | 2901/5701 [53:34<28:33,  1.63it/s]  

Train loss: 0.11442901194095612


 51%|█████     | 2911/5701 [53:46<29:10,  1.59it/s]  

Train loss: 0.14350534975528717


 51%|█████     | 2921/5701 [53:58<29:33,  1.57it/s]  

Train loss: 0.1361415833234787


 51%|█████▏    | 2931/5701 [54:08<28:44,  1.61it/s]  

Train loss: 0.12749312818050385


 52%|█████▏    | 2941/5701 [54:20<28:55,  1.59it/s]  

Train loss: 0.11556746810674667


 52%|█████▏    | 2951/5701 [54:31<28:44,  1.59it/s]  

Train loss: 0.1265239715576172


 52%|█████▏    | 2961/5701 [54:42<28:30,  1.60it/s]  

Train loss: 0.12149323523044586


 52%|█████▏    | 2971/5701 [54:53<28:33,  1.59it/s]  

Train loss: 0.10162222385406494


 52%|█████▏    | 2981/5701 [55:04<28:21,  1.60it/s]  

Train loss: 0.09202965348958969


 52%|█████▏    | 2991/5701 [55:15<28:14,  1.60it/s]  

Train loss: 0.11342012882232666


 53%|█████▎    | 3001/5701 [55:25<27:42,  1.62it/s]  

Train loss: 0.1226683259010315


 53%|█████▎    | 3011/5701 [55:38<28:39,  1.56it/s]  

Train loss: 0.09893357008695602


 53%|█████▎    | 3021/5701 [55:48<27:37,  1.62it/s]  

Train loss: 0.15813057124614716


 53%|█████▎    | 3031/5701 [56:00<28:07,  1.58it/s]  

Train loss: 0.10414504259824753


 53%|█████▎    | 3041/5701 [56:11<28:07,  1.58it/s]  

Train loss: 0.1232319101691246


 54%|█████▎    | 3051/5701 [56:22<27:20,  1.62it/s]  

Train loss: 0.13049961626529694


 54%|█████▎    | 3061/5701 [56:33<27:54,  1.58it/s]  

Train loss: 0.1154974028468132


 54%|█████▍    | 3071/5701 [56:43<26:55,  1.63it/s]  

Train loss: 0.1253078430891037


 54%|█████▍    | 3081/5701 [56:54<27:16,  1.60it/s]  

Train loss: 0.1353089064359665


 54%|█████▍    | 3091/5701 [57:06<27:31,  1.58it/s]  

Train loss: 0.13228891789913177


 54%|█████▍    | 3101/5701 [57:16<26:29,  1.64it/s]  

Train loss: 0.11533080786466599


 55%|█████▍    | 3111/5701 [57:28<27:07,  1.59it/s]  

Train loss: 0.1082281619310379


 55%|█████▍    | 3121/5701 [57:39<26:52,  1.60it/s]  

Train loss: 0.14535807073116302


 55%|█████▍    | 3131/5701 [57:50<26:49,  1.60it/s]  

Train loss: 0.125868558883667


 55%|█████▌    | 3141/5701 [58:02<27:06,  1.57it/s]  

Train loss: 0.11607678234577179


 55%|█████▌    | 3151/5701 [58:13<26:41,  1.59it/s]  

Train loss: 0.12053962796926498


 55%|█████▌    | 3161/5701 [58:24<26:26,  1.60it/s]  

Train loss: 0.12209372967481613


 56%|█████▌    | 3171/5701 [58:34<25:57,  1.62it/s]  

Train loss: 0.13433733582496643


 56%|█████▌    | 3181/5701 [58:45<26:13,  1.60it/s]  

Train loss: 0.14367817342281342


 56%|█████▌    | 3191/5701 [58:56<26:21,  1.59it/s]  

Train loss: 0.09307380765676498


 56%|█████▌    | 3201/5701 [59:07<25:54,  1.61it/s]  

Train loss: 0.09261168539524078


 56%|█████▋    | 3211/5701 [59:18<25:44,  1.61it/s]  

Train loss: 0.11157002300024033


 56%|█████▋    | 3221/5701 [59:29<26:02,  1.59it/s]  

Train loss: 0.14093738794326782


 57%|█████▋    | 3231/5701 [59:40<25:39,  1.60it/s]  

Train loss: 0.12656845152378082


 57%|█████▋    | 3241/5701 [59:51<25:34,  1.60it/s]  

Train loss: 0.10872616618871689


 57%|█████▋    | 3251/5701 [1:00:02<25:23,  1.61it/s]

Train loss: 0.10980889946222305


 57%|█████▋    | 3261/5701 [1:00:13<25:31,  1.59it/s]  

Train loss: 0.11450377851724625


 57%|█████▋    | 3271/5701 [1:00:24<25:04,  1.62it/s]  

Train loss: 0.12438734620809555


 58%|█████▊    | 3281/5701 [1:00:35<25:18,  1.59it/s]  

Train loss: 0.11421666294336319


 58%|█████▊    | 3291/5701 [1:00:45<25:02,  1.60it/s]  

Train loss: 0.11692088097333908


 58%|█████▊    | 3301/5701 [1:00:56<24:55,  1.60it/s]  

Train loss: 0.12350153177976608


 58%|█████▊    | 3311/5701 [1:01:08<25:19,  1.57it/s]  

Train loss: 0.1603560894727707


 58%|█████▊    | 3321/5701 [1:01:18<24:22,  1.63it/s]  

Train loss: 0.13234172761440277


 58%|█████▊    | 3331/5701 [1:01:30<25:16,  1.56it/s]  

Train loss: 0.1175093874335289


 59%|█████▊    | 3341/5701 [1:01:41<24:39,  1.60it/s]  

Train loss: 0.12188167870044708


 59%|█████▉    | 3351/5701 [1:01:52<24:31,  1.60it/s]  

Train loss: 0.13300859928131104


 59%|█████▉    | 3361/5701 [1:02:02<24:21,  1.60it/s]  

Train loss: 0.107380211353302


 59%|█████▉    | 3371/5701 [1:02:13<24:12,  1.60it/s]  

Train loss: 0.14271382987499237


 59%|█████▉    | 3381/5701 [1:02:24<23:55,  1.62it/s]  

Train loss: 0.10152784734964371


 59%|█████▉    | 3391/5701 [1:02:35<24:13,  1.59it/s]  

Train loss: 0.11381158977746964


 60%|█████▉    | 3401/5701 [1:02:47<24:17,  1.58it/s]  

Train loss: 0.13311053812503815


 60%|█████▉    | 3411/5701 [1:02:57<23:18,  1.64it/s]  

Train loss: 0.1195904016494751


 60%|██████    | 3421/5701 [1:03:08<23:47,  1.60it/s]  

Train loss: 0.09908434003591537


 60%|██████    | 3431/5701 [1:03:19<23:34,  1.60it/s]  

Train loss: 0.14440785348415375


 60%|██████    | 3441/5701 [1:03:30<23:29,  1.60it/s]  

Train loss: 0.12015968561172485


 61%|██████    | 3451/5701 [1:03:41<23:36,  1.59it/s]  

Train loss: 0.12423312664031982


 61%|██████    | 3461/5701 [1:03:53<23:46,  1.57it/s]  

Train loss: 0.12023003399372101


 61%|██████    | 3471/5701 [1:04:03<22:48,  1.63it/s]  

Train loss: 0.11868009716272354


 61%|██████    | 3481/5701 [1:04:15<23:17,  1.59it/s]  

Train loss: 0.12381737679243088


 61%|██████    | 3491/5701 [1:04:26<23:07,  1.59it/s]  

Train loss: 0.09629460424184799


 61%|██████▏   | 3501/5701 [1:04:37<23:08,  1.58it/s]  

Train loss: 0.08723122626543045


 62%|██████▏   | 3511/5701 [1:04:48<22:41,  1.61it/s]  

Train loss: 0.1419813483953476


 62%|██████▏   | 3521/5701 [1:04:59<23:05,  1.57it/s]  

Train loss: 0.12049509584903717


 62%|██████▏   | 3531/5701 [1:05:10<22:29,  1.61it/s]  

Train loss: 0.12251728028059006


 62%|██████▏   | 3541/5701 [1:05:22<22:46,  1.58it/s]  

Train loss: 0.10437104851007462


 62%|██████▏   | 3551/5701 [1:05:32<22:25,  1.60it/s]  

Train loss: 0.11748039722442627


 62%|██████▏   | 3561/5701 [1:05:43<22:25,  1.59it/s]  

Train loss: 0.1563938707113266


 63%|██████▎   | 3571/5701 [1:05:54<22:08,  1.60it/s]  

Train loss: 0.12835575640201569


 63%|██████▎   | 3581/5701 [1:06:05<22:03,  1.60it/s]  

Train loss: 0.11961197108030319


 63%|██████▎   | 3591/5701 [1:06:16<22:01,  1.60it/s]  

Train loss: 0.1294526308774948


 63%|██████▎   | 3601/5701 [1:06:27<21:54,  1.60it/s]  

Train loss: 0.11374267190694809


 63%|██████▎   | 3611/5701 [1:06:38<21:42,  1.60it/s]  

Train loss: 0.10185133665800095


 64%|██████▎   | 3621/5701 [1:06:50<21:57,  1.58it/s]  

Train loss: 0.11188294738531113


 64%|██████▎   | 3631/5701 [1:07:00<21:21,  1.62it/s]  

Train loss: 0.09554465115070343


 64%|██████▍   | 3641/5701 [1:07:12<21:43,  1.58it/s]  

Train loss: 0.10791563242673874


 64%|██████▍   | 3651/5701 [1:07:22<21:01,  1.62it/s]  

Train loss: 0.10759831964969635


 64%|██████▍   | 3661/5701 [1:07:33<21:33,  1.58it/s]  

Train loss: 0.12294995784759521


 64%|██████▍   | 3671/5701 [1:07:45<22:48,  1.48it/s]  

Train loss: 0.11922714859247208


 65%|██████▍   | 3681/5701 [1:07:56<23:17,  1.45it/s]  

Train loss: 0.13362915813922882


 65%|██████▍   | 3691/5701 [1:08:07<21:08,  1.58it/s]  

Train loss: 0.12547218799591064


 65%|██████▍   | 3701/5701 [1:08:18<20:35,  1.62it/s]  

Train loss: 0.11962588131427765


 65%|██████▌   | 3711/5701 [1:08:29<20:41,  1.60it/s]  

Train loss: 0.10299360007047653


 65%|██████▌   | 3721/5701 [1:08:40<20:40,  1.60it/s]  

Train loss: 0.105657197535038


 65%|██████▌   | 3731/5701 [1:08:51<20:37,  1.59it/s]  

Train loss: 0.1455717831850052


 66%|██████▌   | 3741/5701 [1:09:02<20:17,  1.61it/s]  

Train loss: 0.14953140914440155


 66%|██████▌   | 3751/5701 [1:09:14<20:26,  1.59it/s]  

Train loss: 0.12874244153499603


 66%|██████▌   | 3761/5701 [1:09:25<20:09,  1.60it/s]  

Train loss: 0.14052248001098633


 66%|██████▌   | 3771/5701 [1:09:35<20:21,  1.58it/s]  

Train loss: 0.11190568655729294


 66%|██████▋   | 3781/5701 [1:09:47<20:35,  1.55it/s]  

Train loss: 0.1402691900730133


 66%|██████▋   | 3791/5701 [1:09:58<19:57,  1.59it/s]  

Train loss: 0.12068420648574829


 67%|██████▋   | 3801/5701 [1:10:08<19:16,  1.64it/s]

Train loss: 0.11492278426885605


 67%|██████▋   | 3811/5701 [1:10:20<19:50,  1.59it/s]  

Train loss: 0.11181820929050446


 67%|██████▋   | 3821/5701 [1:10:31<19:33,  1.60it/s]  

Train loss: 0.13271485269069672


 67%|██████▋   | 3831/5701 [1:10:42<19:38,  1.59it/s]  

Train loss: 0.10003580898046494


 67%|██████▋   | 3841/5701 [1:10:52<19:07,  1.62it/s]  

Train loss: 0.12537817656993866


 68%|██████▊   | 3851/5701 [1:11:04<19:31,  1.58it/s]  

Train loss: 0.10534681379795074


 68%|██████▊   | 3861/5701 [1:11:15<19:05,  1.61it/s]  

Train loss: 0.11576435714960098


 68%|██████▊   | 3871/5701 [1:11:27<19:21,  1.57it/s]  

Train loss: 0.1103852167725563


 68%|██████▊   | 3881/5701 [1:11:38<19:09,  1.58it/s]  

Train loss: 0.11925413459539413


 68%|██████▊   | 3891/5701 [1:11:49<18:45,  1.61it/s]  

Train loss: 0.12422201782464981


 68%|██████▊   | 3901/5701 [1:12:00<18:39,  1.61it/s]  

Train loss: 0.10447575896978378


 69%|██████▊   | 3911/5701 [1:12:12<18:38,  1.60it/s]  

Train loss: 0.11326588690280914


 69%|██████▉   | 3921/5701 [1:12:22<18:30,  1.60it/s]  

Train loss: 0.1265237182378769


 69%|██████▉   | 3931/5701 [1:12:33<18:11,  1.62it/s]  

Train loss: 0.13077281415462494


 69%|██████▉   | 3941/5701 [1:12:45<18:50,  1.56it/s]  

Train loss: 0.15866169333457947


 69%|██████▉   | 3951/5701 [1:12:56<18:21,  1.59it/s]  

Train loss: 0.11529866605997086


 69%|██████▉   | 3961/5701 [1:13:08<18:09,  1.60it/s]  

Train loss: 0.13094930350780487


 70%|██████▉   | 3971/5701 [1:13:19<18:16,  1.58it/s]  

Train loss: 0.11688985675573349


 70%|██████▉   | 3981/5701 [1:13:29<17:16,  1.66it/s]

Train loss: 0.1090448722243309


 70%|███████   | 3991/5701 [1:13:41<18:12,  1.56it/s]  

Train loss: 0.09851091355085373


 70%|███████   | 4001/5701 [1:13:51<17:34,  1.61it/s]

Train loss: 0.12274595350027084


 70%|███████   | 4011/5701 [1:14:03<17:44,  1.59it/s]  

Train loss: 0.1444525420665741


 71%|███████   | 4021/5701 [1:14:14<17:42,  1.58it/s]  

Train loss: 0.1296178251504898


 71%|███████   | 4031/5701 [1:14:25<17:23,  1.60it/s]  

Train loss: 0.11964293569326401


 71%|███████   | 4041/5701 [1:14:36<17:12,  1.61it/s]  

Train loss: 0.10569780319929123


 71%|███████   | 4051/5701 [1:14:48<17:30,  1.57it/s]  

Train loss: 0.14880099892616272


 71%|███████   | 4061/5701 [1:14:58<16:32,  1.65it/s]

Train loss: 0.12791724503040314


 71%|███████▏  | 4071/5701 [1:15:09<16:55,  1.61it/s]  

Train loss: 0.12258821725845337


 72%|███████▏  | 4081/5701 [1:15:21<17:05,  1.58it/s]  

Train loss: 0.11343415826559067


 72%|███████▏  | 4091/5701 [1:15:32<16:37,  1.61it/s]

Train loss: 0.104275643825531


 72%|███████▏  | 4101/5701 [1:15:42<16:38,  1.60it/s]

Train loss: 0.12657909095287323


 72%|███████▏  | 4111/5701 [1:15:54<16:52,  1.57it/s]  

Train loss: 0.12472782284021378


 72%|███████▏  | 4121/5701 [1:16:05<16:35,  1.59it/s]  

Train loss: 0.1299694925546646


 72%|███████▏  | 4131/5701 [1:16:16<16:09,  1.62it/s]

Train loss: 0.1176522970199585


 73%|███████▎  | 4141/5701 [1:16:27<16:12,  1.60it/s]

Train loss: 0.10329419374465942


 73%|███████▎  | 4151/5701 [1:16:38<16:10,  1.60it/s]

Train loss: 0.12960565090179443


 73%|███████▎  | 4161/5701 [1:16:49<15:52,  1.62it/s]

Train loss: 0.122364841401577


 73%|███████▎  | 4171/5701 [1:17:00<16:04,  1.59it/s]  

Train loss: 0.11302949488162994


 73%|███████▎  | 4181/5701 [1:17:11<15:48,  1.60it/s]

Train loss: 0.16050653159618378


 74%|███████▎  | 4191/5701 [1:17:23<15:49,  1.59it/s]

Train loss: 0.1296260505914688


 74%|███████▎  | 4201/5701 [1:17:33<15:34,  1.60it/s]

Train loss: 0.12236563116312027


 74%|███████▍  | 4211/5701 [1:17:44<15:27,  1.61it/s]

Train loss: 0.09641022235155106


 74%|███████▍  | 4221/5701 [1:17:56<15:30,  1.59it/s]

Train loss: 0.13476966321468353


 74%|███████▍  | 4231/5701 [1:18:07<15:27,  1.59it/s]

Train loss: 0.12028159201145172


 74%|███████▍  | 4241/5701 [1:18:19<15:17,  1.59it/s]

Train loss: 0.14300012588500977


 75%|███████▍  | 4251/5701 [1:18:30<15:14,  1.59it/s]

Train loss: 0.09773336350917816


 75%|███████▍  | 4261/5701 [1:18:41<14:55,  1.61it/s]

Train loss: 0.10742004215717316


 75%|███████▍  | 4271/5701 [1:18:51<14:48,  1.61it/s]

Train loss: 0.11640510708093643


 75%|███████▌  | 4281/5701 [1:19:03<14:52,  1.59it/s]

Train loss: 0.11744286864995956


 75%|███████▌  | 4291/5701 [1:19:14<14:56,  1.57it/s]

Train loss: 0.09128622710704803


 75%|███████▌  | 4301/5701 [1:19:26<14:36,  1.60it/s]

Train loss: 0.0961795374751091


 76%|███████▌  | 4311/5701 [1:19:36<14:17,  1.62it/s]

Train loss: 0.13167649507522583


 76%|███████▌  | 4321/5701 [1:19:48<14:44,  1.56it/s]  

Train loss: 0.1403130441904068


 76%|███████▌  | 4331/5701 [1:20:00<14:23,  1.59it/s]

Train loss: 0.1261042058467865


 76%|███████▌  | 4341/5701 [1:20:09<13:45,  1.65it/s]

Train loss: 0.09293170273303986


 76%|███████▋  | 4351/5701 [1:20:21<14:04,  1.60it/s]

Train loss: 0.10835148394107819


 76%|███████▋  | 4361/5701 [1:20:32<14:04,  1.59it/s]

Train loss: 0.13303348422050476


 77%|███████▋  | 4371/5701 [1:20:42<13:25,  1.65it/s]

Train loss: 0.12078096717596054


 77%|███████▋  | 4381/5701 [1:20:54<13:57,  1.58it/s]

Train loss: 0.11873561143875122


 77%|███████▋  | 4391/5701 [1:21:05<13:48,  1.58it/s]

Train loss: 0.10574986040592194


 77%|███████▋  | 4401/5701 [1:21:17<13:41,  1.58it/s]

Train loss: 0.10784317553043365


 77%|███████▋  | 4411/5701 [1:21:27<13:16,  1.62it/s]

Train loss: 0.12713919579982758


 78%|███████▊  | 4421/5701 [1:21:38<13:20,  1.60it/s]

Train loss: 0.11152571439743042


 78%|███████▊  | 4431/5701 [1:21:49<13:09,  1.61it/s]

Train loss: 0.13449351489543915


 78%|███████▊  | 4441/5701 [1:22:00<13:11,  1.59it/s]

Train loss: 0.12297158688306808


 78%|███████▊  | 4451/5701 [1:22:11<13:00,  1.60it/s]

Train loss: 0.11743441969156265


 78%|███████▊  | 4461/5701 [1:22:22<12:54,  1.60it/s]

Train loss: 0.12004496157169342


 78%|███████▊  | 4471/5701 [1:22:33<12:49,  1.60it/s]

Train loss: 0.12535910308361053


 79%|███████▊  | 4481/5701 [1:22:44<12:39,  1.61it/s]

Train loss: 0.10416126251220703


 79%|███████▉  | 4491/5701 [1:22:55<12:32,  1.61it/s]

Train loss: 0.11135804653167725


 79%|███████▉  | 4501/5701 [1:23:07<12:39,  1.58it/s]

Train loss: 0.0945214331150055


 79%|███████▉  | 4511/5701 [1:23:17<12:07,  1.64it/s]

Train loss: 0.11605584621429443


 79%|███████▉  | 4521/5701 [1:23:28<12:20,  1.59it/s]

Train loss: 0.13254599273204803


 79%|███████▉  | 4531/5701 [1:23:39<12:11,  1.60it/s]

Train loss: 0.1435009241104126


 80%|███████▉  | 4541/5701 [1:23:51<12:14,  1.58it/s]

Train loss: 0.08923441171646118


 80%|███████▉  | 4551/5701 [1:24:02<12:09,  1.58it/s]

Train loss: 0.11090898513793945


 80%|████████  | 4561/5701 [1:24:13<11:55,  1.59it/s]

Train loss: 0.11123865097761154


 80%|████████  | 4571/5701 [1:24:24<11:58,  1.57it/s]

Train loss: 0.08791390061378479


 80%|████████  | 4581/5701 [1:24:35<11:31,  1.62it/s]

Train loss: 0.13397358357906342


 81%|████████  | 4591/5701 [1:24:47<11:57,  1.55it/s]

Train loss: 0.1482042670249939


 81%|████████  | 4601/5701 [1:24:58<11:35,  1.58it/s]

Train loss: 0.11039359867572784


 81%|████████  | 4611/5701 [1:25:08<11:18,  1.61it/s]

Train loss: 0.11210747063159943


 81%|████████  | 4621/5701 [1:25:20<11:26,  1.57it/s]

Train loss: 0.11603214591741562


 81%|████████  | 4631/5701 [1:25:31<11:15,  1.58it/s]

Train loss: 0.08575709164142609


 81%|████████▏ | 4641/5701 [1:25:42<11:07,  1.59it/s]

Train loss: 0.11064131557941437


 82%|████████▏ | 4651/5701 [1:25:53<11:07,  1.57it/s]

Train loss: 0.10854637622833252


 82%|████████▏ | 4661/5701 [1:26:04<10:51,  1.60it/s]

Train loss: 0.11385392397642136


 82%|████████▏ | 4671/5701 [1:26:15<10:53,  1.58it/s]

Train loss: 0.09980769455432892


 82%|████████▏ | 4681/5701 [1:26:26<10:44,  1.58it/s]

Train loss: 0.09033475071191788


 82%|████████▏ | 4691/5701 [1:26:37<10:41,  1.58it/s]

Train loss: 0.131301611661911


 82%|████████▏ | 4701/5701 [1:26:48<10:27,  1.59it/s]

Train loss: 0.11912184953689575


 83%|████████▎ | 4711/5701 [1:27:00<10:39,  1.55it/s]

Train loss: 0.1267673224210739


 83%|████████▎ | 4721/5701 [1:27:10<10:02,  1.63it/s]

Train loss: 0.11451467126607895


 83%|████████▎ | 4731/5701 [1:27:20<10:11,  1.59it/s]

Train loss: 0.10610588639974594


 83%|████████▎ | 4741/5701 [1:27:31<10:05,  1.58it/s]

Train loss: 0.10465595871210098


 83%|████████▎ | 4751/5701 [1:27:43<10:12,  1.55it/s]

Train loss: 0.12197631597518921


 84%|████████▎ | 4761/5701 [1:27:54<09:51,  1.59it/s]

Train loss: 0.1148192510008812


 84%|████████▎ | 4771/5701 [1:28:05<09:49,  1.58it/s]

Train loss: 0.12440355867147446


 84%|████████▍ | 4781/5701 [1:28:15<09:35,  1.60it/s]

Train loss: 0.10111583769321442


 84%|████████▍ | 4791/5701 [1:28:28<09:49,  1.54it/s]

Train loss: 0.13196776807308197


 84%|████████▍ | 4801/5701 [1:28:39<09:35,  1.56it/s]

Train loss: 0.1236438900232315


 84%|████████▍ | 4811/5701 [1:28:50<09:31,  1.56it/s]

Train loss: 0.10630575567483902


 85%|████████▍ | 4821/5701 [1:29:01<09:12,  1.59it/s]

Train loss: 0.1039091944694519


 85%|████████▍ | 4831/5701 [1:29:12<09:09,  1.58it/s]

Train loss: 0.13254611194133759


 85%|████████▍ | 4841/5701 [1:29:22<08:59,  1.59it/s]

Train loss: 0.1106293797492981


 85%|████████▌ | 4851/5701 [1:29:34<08:59,  1.58it/s]

Train loss: 0.10231561958789825


 85%|████████▌ | 4861/5701 [1:29:45<08:48,  1.59it/s]

Train loss: 0.12409871071577072


 85%|████████▌ | 4871/5701 [1:29:57<08:49,  1.57it/s]

Train loss: 0.12184613198041916


 86%|████████▌ | 4881/5701 [1:30:07<08:29,  1.61it/s]

Train loss: 0.11593891680240631


 86%|████████▌ | 4891/5701 [1:30:18<08:30,  1.59it/s]

Train loss: 0.08988859504461288


 86%|████████▌ | 4901/5701 [1:30:29<08:19,  1.60it/s]

Train loss: 0.11903496831655502


 86%|████████▌ | 4911/5701 [1:30:41<08:29,  1.55it/s]

Train loss: 0.12192336469888687


 86%|████████▋ | 4921/5701 [1:30:52<08:07,  1.60it/s]

Train loss: 0.13330695033073425


 86%|████████▋ | 4931/5701 [1:31:03<08:05,  1.59it/s]

Train loss: 0.11381576210260391


 87%|████████▋ | 4941/5701 [1:31:14<08:03,  1.57it/s]

Train loss: 0.15778858959674835


 87%|████████▋ | 4951/5701 [1:31:24<07:48,  1.60it/s]

Train loss: 0.10134699195623398


 87%|████████▋ | 4961/5701 [1:31:36<07:44,  1.59it/s]

Train loss: 0.15901294350624084


 87%|████████▋ | 4971/5701 [1:31:47<07:44,  1.57it/s]

Train loss: 0.1029648557305336


 87%|████████▋ | 4981/5701 [1:31:58<07:36,  1.58it/s]

Train loss: 0.10476025193929672


 88%|████████▊ | 4991/5701 [1:32:10<07:47,  1.52it/s]

Train loss: 0.11493785679340363


 88%|████████▊ | 5001/5701 [1:32:21<09:42,  1.20it/s]

Train loss: 0.09986995160579681


 88%|████████▊ | 5011/5701 [1:32:32<09:21,  1.23it/s]

Train loss: 0.12638543546199799


 88%|████████▊ | 5021/5701 [1:32:43<07:22,  1.54it/s]

Train loss: 0.13937784731388092


 88%|████████▊ | 5031/5701 [1:32:54<07:57,  1.40it/s]

Train loss: 0.12026984989643097


 88%|████████▊ | 5041/5701 [1:33:05<08:06,  1.36it/s]

Train loss: 0.13250088691711426


 89%|████████▊ | 5051/5701 [1:33:16<07:53,  1.37it/s]

Train loss: 0.12516562640666962


 89%|████████▉ | 5061/5701 [1:33:28<09:12,  1.16it/s]

Train loss: 0.14266370236873627


 89%|████████▉ | 5071/5701 [1:33:39<08:42,  1.21it/s]

Train loss: 0.09676497429609299


 89%|████████▉ | 5081/5701 [1:33:50<09:24,  1.10it/s]

Train loss: 0.12096746265888214


 89%|████████▉ | 5091/5701 [1:34:01<09:36,  1.06it/s]

Train loss: 0.12612496316432953


 89%|████████▉ | 5101/5701 [1:34:12<08:32,  1.17it/s]

Train loss: 0.11527655273675919


 90%|████████▉ | 5111/5701 [1:34:22<06:05,  1.62it/s]

Train loss: 0.14086011052131653


 90%|████████▉ | 5121/5701 [1:34:34<06:05,  1.59it/s]

Train loss: 0.11964597553014755


 90%|█████████ | 5131/5701 [1:34:44<05:52,  1.62it/s]

Train loss: 0.10969986766576767


 90%|█████████ | 5141/5701 [1:34:56<05:52,  1.59it/s]

Train loss: 0.14432743191719055


 90%|█████████ | 5151/5701 [1:35:06<05:43,  1.60it/s]

Train loss: 0.12257887423038483


 91%|█████████ | 5161/5701 [1:35:17<05:33,  1.62it/s]

Train loss: 0.11787933111190796


 91%|█████████ | 5171/5701 [1:35:28<05:35,  1.58it/s]

Train loss: 0.09467093646526337


 91%|█████████ | 5181/5701 [1:35:39<05:22,  1.61it/s]

Train loss: 0.10542910546064377


 91%|█████████ | 5191/5701 [1:35:50<05:43,  1.49it/s]

Train loss: 0.10768280178308487


 91%|█████████ | 5201/5701 [1:36:02<05:18,  1.57it/s]

Train loss: 0.12027382105588913


 91%|█████████▏| 5211/5701 [1:36:12<05:03,  1.61it/s]

Train loss: 0.13281437754631042


 92%|█████████▏| 5221/5701 [1:36:24<05:04,  1.58it/s]

Train loss: 0.12074977159500122


 92%|█████████▏| 5231/5701 [1:36:35<04:55,  1.59it/s]

Train loss: 0.1193368062376976


 92%|█████████▏| 5241/5701 [1:36:45<04:40,  1.64it/s]

Train loss: 0.1185351237654686


 92%|█████████▏| 5251/5701 [1:36:56<04:39,  1.61it/s]

Train loss: 0.1250522881746292


 92%|█████████▏| 5261/5701 [1:37:06<04:31,  1.62it/s]

Train loss: 0.15229597687721252


 92%|█████████▏| 5271/5701 [1:37:17<04:25,  1.62it/s]

Train loss: 0.13297966122627258


 93%|█████████▎| 5281/5701 [1:37:28<04:20,  1.61it/s]

Train loss: 0.12597297132015228


 93%|█████████▎| 5291/5701 [1:37:38<04:14,  1.61it/s]

Train loss: 0.1150684580206871


 93%|█████████▎| 5301/5701 [1:37:49<04:11,  1.59it/s]

Train loss: 0.13289523124694824


 93%|█████████▎| 5311/5701 [1:38:00<04:40,  1.39it/s]

Train loss: 0.1094994768500328


 93%|█████████▎| 5321/5701 [1:38:11<04:47,  1.32it/s]

Train loss: 0.1398850828409195


 94%|█████████▎| 5331/5701 [1:38:22<04:55,  1.25it/s]

Train loss: 0.14260965585708618


 94%|█████████▎| 5341/5701 [1:38:33<04:34,  1.31it/s]

Train loss: 0.12089202553033829


 94%|█████████▍| 5351/5701 [1:38:44<04:44,  1.23it/s]

Train loss: 0.10154079645872116


 94%|█████████▍| 5361/5701 [1:38:54<03:57,  1.43it/s]

Train loss: 0.10863025486469269


 94%|█████████▍| 5371/5701 [1:39:04<03:40,  1.50it/s]

Train loss: 0.14921437203884125


 94%|█████████▍| 5381/5701 [1:39:15<04:25,  1.21it/s]

Train loss: 0.11157889664173126


 95%|█████████▍| 5391/5701 [1:39:26<03:39,  1.41it/s]

Train loss: 0.12141311168670654


 95%|█████████▍| 5401/5701 [1:39:37<04:29,  1.11it/s]

Train loss: 0.09845677763223648


 95%|█████████▍| 5411/5701 [1:39:48<04:20,  1.11it/s]

Train loss: 0.11333958059549332


 95%|█████████▌| 5421/5701 [1:39:59<04:44,  1.01s/it]

Train loss: 0.12869107723236084


 95%|█████████▌| 5431/5701 [1:40:10<04:01,  1.12it/s]

Train loss: 0.12519632279872894


 95%|█████████▌| 5441/5701 [1:40:20<03:02,  1.42it/s]

Train loss: 0.1076367050409317


 96%|█████████▌| 5451/5701 [1:40:31<03:18,  1.26it/s]

Train loss: 0.11423680931329727


 96%|█████████▌| 5461/5701 [1:40:42<03:27,  1.16it/s]

Train loss: 0.12733611464500427


 96%|█████████▌| 5471/5701 [1:40:52<02:43,  1.41it/s]

Train loss: 0.1288297176361084


 96%|█████████▌| 5481/5701 [1:41:03<02:40,  1.37it/s]

Train loss: 0.14926724135875702


 96%|█████████▋| 5491/5701 [1:41:14<02:12,  1.58it/s]

Train loss: 0.1208476573228836


 96%|█████████▋| 5501/5701 [1:41:24<02:19,  1.44it/s]

Train loss: 0.1326851099729538


 97%|█████████▋| 5511/5701 [1:41:35<02:03,  1.53it/s]

Train loss: 0.13069985806941986


 97%|█████████▋| 5521/5701 [1:41:47<02:16,  1.32it/s]

Train loss: 0.1261477917432785


 97%|█████████▋| 5531/5701 [1:41:57<02:01,  1.40it/s]

Train loss: 0.11628115177154541


 97%|█████████▋| 5541/5701 [1:42:08<02:13,  1.20it/s]

Train loss: 0.1257672756910324


 97%|█████████▋| 5551/5701 [1:42:19<01:51,  1.35it/s]

Train loss: 0.10914316028356552


 98%|█████████▊| 5561/5701 [1:42:30<01:28,  1.57it/s]

Train loss: 0.13450416922569275


 98%|█████████▊| 5571/5701 [1:42:42<02:31,  1.17s/it]

Train loss: 0.09596767276525497


 98%|█████████▊| 5581/5701 [1:42:53<02:40,  1.34s/it]

Train loss: 0.11106591671705246


 98%|█████████▊| 5591/5701 [1:43:04<02:18,  1.26s/it]

Train loss: 0.09833165258169174


 98%|█████████▊| 5601/5701 [1:43:15<02:03,  1.23s/it]

Train loss: 0.12566913664340973


 98%|█████████▊| 5611/5701 [1:43:27<02:04,  1.38s/it]

Train loss: 0.14598266780376434


 99%|█████████▊| 5621/5701 [1:43:39<02:28,  1.86s/it]

Train loss: 0.12997232377529144


 99%|█████████▉| 5631/5701 [1:43:50<02:12,  1.90s/it]

Train loss: 0.13332776725292206


 99%|█████████▉| 5641/5701 [1:44:01<02:06,  2.10s/it]

Train loss: 0.09228552132844925


 99%|█████████▉| 5651/5701 [1:44:13<02:07,  2.55s/it]

Train loss: 0.14629046618938446


 99%|█████████▉| 5661/5701 [1:44:23<01:19,  1.99s/it]

Train loss: 0.1085641011595726


 99%|█████████▉| 5671/5701 [1:44:34<01:05,  2.19s/it]

Train loss: 0.10938552767038345


100%|█████████▉| 5681/5701 [1:44:45<00:41,  2.09s/it]

Train loss: 0.09226936101913452


100%|█████████▉| 5691/5701 [1:44:52<00:11,  1.13s/it]

Train loss: 0.11949408054351807


100%|██████████| 5701/5701 [1:44:58<00:00,  1.10s/it]


Train loss: 0.11943018436431885


Validation: 100%|██████████| 41/41 [00:52<00:00,  1.28s/it]


Validation loss = [tensor(0.2578, device='cuda:0', grad_fn=<DivBackward0>), tensor(0.2334, device='cuda:0', grad_fn=<DivBackward0>), tensor(0.2331, device='cuda:0', grad_fn=<DivBackward0>)]
Epoch 5


  0%|          | 1/5701 [00:12<19:42:07, 12.44s/it]

Train loss: 0.008374089375138283


  0%|          | 11/5701 [00:22<3:21:05,  2.12s/it]

Train loss: 0.13022422790527344


  0%|          | 21/5701 [00:34<3:39:52,  2.32s/it]

Train loss: 0.10686840862035751


  1%|          | 31/5701 [00:45<3:31:46,  2.24s/it]

Train loss: 0.10368472337722778


  1%|          | 41/5701 [00:56<3:28:12,  2.21s/it]

Train loss: 0.12308857589960098


  1%|          | 51/5701 [01:06<3:18:41,  2.11s/it]

Train loss: 0.11962471157312393


  1%|          | 61/5701 [01:17<3:21:07,  2.14s/it]

Train loss: 0.10073204338550568


  1%|          | 71/5701 [01:27<3:18:27,  2.12s/it]

Train loss: 0.10746077448129654


  1%|▏         | 81/5701 [01:38<3:14:19,  2.07s/it]

Train loss: 0.10489214956760406


  2%|▏         | 91/5701 [01:49<3:30:32,  2.25s/it]

Train loss: 0.10547282546758652


  2%|▏         | 101/5701 [02:00<3:39:28,  2.35s/it]

Train loss: 0.13983406126499176


  2%|▏         | 111/5701 [02:11<3:19:57,  2.15s/it]

Train loss: 0.11995750665664673


  2%|▏         | 121/5701 [02:22<3:22:19,  2.18s/it]

Train loss: 0.10900502651929855


  2%|▏         | 131/5701 [02:33<3:28:10,  2.24s/it]

Train loss: 0.14702045917510986


  2%|▏         | 141/5701 [02:44<3:30:57,  2.28s/it]

Train loss: 0.10766543447971344


  3%|▎         | 151/5701 [02:54<2:58:16,  1.93s/it]

Train loss: 0.10662199556827545


  3%|▎         | 161/5701 [03:05<3:09:07,  2.05s/it]

Train loss: 0.1141788586974144


  3%|▎         | 171/5701 [03:16<3:06:08,  2.02s/it]

Train loss: 0.11408283561468124


  3%|▎         | 181/5701 [03:27<3:08:18,  2.05s/it]

Train loss: 0.10570992529392242


  3%|▎         | 191/5701 [03:39<3:17:53,  2.15s/it]

Train loss: 0.11357919126749039


  4%|▎         | 201/5701 [03:50<3:06:22,  2.03s/it]

Train loss: 0.12984657287597656


  4%|▎         | 211/5701 [04:01<3:29:14,  2.29s/it]

Train loss: 0.12579940259456635


  4%|▍         | 221/5701 [04:12<3:08:57,  2.07s/it]

Train loss: 0.12452816963195801


  4%|▍         | 231/5701 [04:22<3:01:58,  2.00s/it]

Train loss: 0.13348518311977386


  4%|▍         | 241/5701 [04:32<3:04:18,  2.03s/it]

Train loss: 0.14905618131160736


  4%|▍         | 251/5701 [04:43<2:37:11,  1.73s/it]

Train loss: 0.09780014306306839


  5%|▍         | 261/5701 [04:53<2:26:37,  1.62s/it]

Train loss: 0.08196859806776047


  5%|▍         | 271/5701 [05:03<1:54:40,  1.27s/it]

Train loss: 0.17875823378562927


  5%|▍         | 281/5701 [05:14<1:29:34,  1.01it/s]

Train loss: 0.10012626647949219


  5%|▌         | 291/5701 [05:23<1:02:52,  1.43it/s]

Train loss: 0.11483898013830185


  5%|▌         | 301/5701 [05:34<1:05:16,  1.38it/s]

Train loss: 0.13015130162239075


  5%|▌         | 311/5701 [05:44<1:01:01,  1.47it/s]

Train loss: 0.12005044519901276


  6%|▌         | 321/5701 [05:56<1:02:23,  1.44it/s]

Train loss: 0.13192163407802582


  6%|▌         | 331/5701 [06:07<56:26,  1.59it/s]  

Train loss: 0.10774184763431549


  6%|▌         | 341/5701 [06:18<57:30,  1.55it/s]  

Train loss: 0.12960918247699738


  6%|▌         | 351/5701 [06:28<55:38,  1.60it/s]  

Train loss: 0.11424702405929565


  6%|▋         | 361/5701 [06:40<56:21,  1.58it/s]  

Train loss: 0.1365259438753128


  7%|▋         | 371/5701 [06:50<55:33,  1.60it/s]  

Train loss: 0.1319098174571991


  7%|▋         | 381/5701 [07:01<55:50,  1.59it/s]  

Train loss: 0.11533761024475098


  7%|▋         | 391/5701 [07:13<56:18,  1.57it/s]  

Train loss: 0.11139613389968872


  7%|▋         | 401/5701 [07:23<54:51,  1.61it/s]  

Train loss: 0.10737619549036026


  7%|▋         | 411/5701 [07:35<56:33,  1.56it/s]  

Train loss: 0.11364060640335083


  7%|▋         | 421/5701 [07:46<56:02,  1.57it/s]  

Train loss: 0.11325597018003464


  8%|▊         | 431/5701 [07:56<54:30,  1.61it/s]  

Train loss: 0.1288301646709442


  8%|▊         | 441/5701 [08:07<54:38,  1.60it/s]  

Train loss: 0.08588802069425583


  8%|▊         | 451/5701 [08:18<55:10,  1.59it/s]  

Train loss: 0.09951180219650269


  8%|▊         | 461/5701 [08:29<55:20,  1.58it/s]  

Train loss: 0.14135898649692535


  8%|▊         | 471/5701 [08:40<54:58,  1.59it/s]  

Train loss: 0.13067768514156342


  8%|▊         | 481/5701 [08:51<54:57,  1.58it/s]  

Train loss: 0.10857423394918442


  9%|▊         | 491/5701 [09:01<54:27,  1.59it/s]  

Train loss: 0.12553681433200836


  9%|▉         | 501/5701 [09:11<54:13,  1.60it/s]  

Train loss: 0.11639951914548874


  9%|▉         | 511/5701 [09:22<54:35,  1.58it/s]  

Train loss: 0.10740634053945541


  9%|▉         | 521/5701 [09:34<55:21,  1.56it/s]  

Train loss: 0.10589021444320679


  9%|▉         | 531/5701 [09:44<54:15,  1.59it/s]  

Train loss: 0.11285751312971115


  9%|▉         | 541/5701 [09:55<55:38,  1.55it/s]  

Train loss: 0.11246363073587418


 10%|▉         | 551/5701 [10:06<54:31,  1.57it/s]  

Train loss: 0.10507398098707199


 10%|▉         | 561/5701 [10:18<56:46,  1.51it/s]  

Train loss: 0.12392496317625046


 10%|█         | 571/5701 [10:29<55:12,  1.55it/s]  

Train loss: 0.12823763489723206


 10%|█         | 581/5701 [10:40<55:36,  1.53it/s]  

Train loss: 0.10687471926212311


 10%|█         | 591/5701 [10:51<56:16,  1.51it/s]  

Train loss: 0.11332676559686661


 11%|█         | 601/5701 [11:02<55:22,  1.54it/s]  

Train loss: 0.1362328827381134


 11%|█         | 611/5701 [11:13<55:18,  1.53it/s]  

Train loss: 0.12010020017623901


 11%|█         | 621/5701 [11:25<57:06,  1.48it/s]  

Train loss: 0.14987781643867493


 11%|█         | 631/5701 [11:37<58:55,  1.43it/s]  

Train loss: 0.12213273346424103


 11%|█         | 641/5701 [11:48<59:25,  1.42it/s]  

Train loss: 0.1255946010351181


 11%|█▏        | 651/5701 [11:59<1:01:54,  1.36it/s]

Train loss: 0.10320200771093369


 12%|█▏        | 661/5701 [12:10<1:03:58,  1.31it/s]

Train loss: 0.13882054388523102


 12%|█▏        | 671/5701 [12:21<1:03:43,  1.32it/s]

Train loss: 0.13239358365535736


 12%|█▏        | 681/5701 [12:32<1:04:02,  1.31it/s]

Train loss: 0.10905315726995468


 12%|█▏        | 691/5701 [12:44<1:04:26,  1.30it/s]

Train loss: 0.10935584455728531


 12%|█▏        | 701/5701 [12:55<1:03:28,  1.31it/s]

Train loss: 0.12042087316513062


 12%|█▏        | 711/5701 [13:06<1:03:17,  1.31it/s]

Train loss: 0.12635384500026703


 13%|█▎        | 721/5701 [13:17<1:04:32,  1.29it/s]

Train loss: 0.09552580118179321


 13%|█▎        | 731/5701 [13:26<57:50,  1.43it/s]  

Train loss: 0.14320015907287598


 13%|█▎        | 741/5701 [13:38<1:03:20,  1.31it/s]

Train loss: 0.1564343273639679


 13%|█▎        | 751/5701 [13:49<1:03:01,  1.31it/s]

Train loss: 0.1131935864686966


 13%|█▎        | 761/5701 [14:00<1:04:16,  1.28it/s]

Train loss: 0.11193076521158218


 14%|█▎        | 771/5701 [14:11<1:03:07,  1.30it/s]

Train loss: 0.12563727796077728


 14%|█▎        | 781/5701 [14:22<1:02:13,  1.32it/s]

Train loss: 0.12559689581394196


 14%|█▍        | 791/5701 [14:33<1:02:48,  1.30it/s]

Train loss: 0.13204382359981537


 14%|█▍        | 801/5701 [14:45<1:03:17,  1.29it/s]

Train loss: 0.13295771181583405


 14%|█▍        | 811/5701 [14:56<1:02:31,  1.30it/s]

Train loss: 0.14245830476284027


 14%|█▍        | 821/5701 [15:06<59:02,  1.38it/s]  

Train loss: 0.11273622512817383


 15%|█▍        | 831/5701 [15:16<59:59,  1.35it/s]  

Train loss: 0.13259273767471313


 15%|█▍        | 841/5701 [15:28<1:03:10,  1.28it/s]

Train loss: 0.1020379438996315


 15%|█▍        | 851/5701 [15:39<1:01:55,  1.31it/s]

Train loss: 0.10618763417005539


 15%|█▌        | 861/5701 [15:50<1:01:09,  1.32it/s]

Train loss: 0.09665267914533615


 15%|█▌        | 871/5701 [16:01<1:00:36,  1.33it/s]

Train loss: 0.13012532889842987


 15%|█▌        | 881/5701 [16:12<1:02:40,  1.28it/s]

Train loss: 0.13178583979606628


 16%|█▌        | 891/5701 [16:22<58:20,  1.37it/s]  

Train loss: 0.10405969619750977


 16%|█▌        | 901/5701 [16:34<1:04:00,  1.25it/s]

Train loss: 0.09947331994771957


 16%|█▌        | 911/5701 [16:45<1:00:24,  1.32it/s]

Train loss: 0.10600413382053375


 16%|█▌        | 921/5701 [16:56<1:01:16,  1.30it/s]

Train loss: 0.10484812408685684


 16%|█▋        | 931/5701 [17:07<58:49,  1.35it/s]  

Train loss: 0.12566953897476196


 17%|█▋        | 941/5701 [17:17<58:56,  1.35it/s]  

Train loss: 0.10631269216537476


 17%|█▋        | 951/5701 [17:28<59:58,  1.32it/s]  

Train loss: 0.12227814644575119


 17%|█▋        | 961/5701 [17:39<1:01:13,  1.29it/s]

Train loss: 0.11524760723114014


 17%|█▋        | 971/5701 [17:50<58:57,  1.34it/s]  

Train loss: 0.08986504375934601


 17%|█▋        | 981/5701 [18:01<59:30,  1.32it/s]  

Train loss: 0.1263134777545929


 17%|█▋        | 991/5701 [18:12<59:58,  1.31it/s]  

Train loss: 0.12045402824878693


 18%|█▊        | 1001/5701 [18:24<1:01:15,  1.28it/s]

Train loss: 0.11636006087064743


 18%|█▊        | 1011/5701 [18:34<57:48,  1.35it/s]  

Train loss: 0.11907362937927246


 18%|█▊        | 1021/5701 [18:45<1:00:04,  1.30it/s]

Train loss: 0.13208822906017303


 18%|█▊        | 1031/5701 [18:56<59:23,  1.31it/s]  

Train loss: 0.12638644874095917


 18%|█▊        | 1041/5701 [19:07<59:08,  1.31it/s]  

Train loss: 0.1460869163274765


 18%|█▊        | 1051/5701 [19:18<59:06,  1.31it/s]  

Train loss: 0.11463314294815063


 19%|█▊        | 1061/5701 [19:28<56:29,  1.37it/s]  

Train loss: 0.09589388221502304


 19%|█▉        | 1071/5701 [19:40<1:00:27,  1.28it/s]

Train loss: 0.09643573313951492


 19%|█▉        | 1081/5701 [19:51<57:47,  1.33it/s]  

Train loss: 0.1283128410577774


 19%|█▉        | 1091/5701 [20:02<59:09,  1.30it/s]  

Train loss: 0.11428636312484741


 19%|█▉        | 1101/5701 [20:13<57:28,  1.33it/s]  

Train loss: 0.12582886219024658


 19%|█▉        | 1111/5701 [20:23<56:59,  1.34it/s]  

Train loss: 0.10869442671537399


 20%|█▉        | 1121/5701 [20:34<58:17,  1.31it/s]  

Train loss: 0.08595741540193558


 20%|█▉        | 1131/5701 [20:45<57:59,  1.31it/s]  

Train loss: 0.10906919091939926


 20%|██        | 1141/5701 [20:56<57:55,  1.31it/s]  

Train loss: 0.12935875356197357


 20%|██        | 1151/5701 [21:07<55:59,  1.35it/s]  

Train loss: 0.1389276534318924


 20%|██        | 1161/5701 [21:19<1:00:29,  1.25it/s]

Train loss: 0.09065865725278854


 21%|██        | 1171/5701 [21:29<54:44,  1.38it/s]  

Train loss: 0.13097159564495087


 21%|██        | 1181/5701 [21:39<56:45,  1.33it/s]  

Train loss: 0.14110969007015228


 21%|██        | 1191/5701 [21:50<57:08,  1.32it/s]  

Train loss: 0.12298037856817245


 21%|██        | 1201/5701 [22:02<57:49,  1.30it/s]  

Train loss: 0.08879844099283218


 21%|██        | 1211/5701 [22:13<57:38,  1.30it/s]  

Train loss: 0.1192879006266594


 21%|██▏       | 1221/5701 [22:23<53:57,  1.38it/s]  

Train loss: 0.11324780434370041


 22%|██▏       | 1231/5701 [22:35<58:53,  1.26it/s]  

Train loss: 0.11084841936826706


 22%|██▏       | 1241/5701 [22:46<57:36,  1.29it/s]  

Train loss: 0.17222081124782562


 22%|██▏       | 1251/5701 [22:57<55:58,  1.33it/s]  

Train loss: 0.14067672193050385


 22%|██▏       | 1261/5701 [23:07<54:20,  1.36it/s]  

Train loss: 0.12240065634250641


 22%|██▏       | 1271/5701 [23:18<55:17,  1.34it/s]  

Train loss: 0.14712093770503998


 22%|██▏       | 1281/5701 [23:29<57:00,  1.29it/s]  

Train loss: 0.0988515317440033


 23%|██▎       | 1291/5701 [23:40<56:40,  1.30it/s]  

Train loss: 0.10953648388385773


 23%|██▎       | 1301/5701 [23:51<55:00,  1.33it/s]  

Train loss: 0.10623618215322495


 23%|██▎       | 1311/5701 [24:02<55:50,  1.31it/s]  

Train loss: 0.12436695396900177


 23%|██▎       | 1321/5701 [24:13<54:13,  1.35it/s]  

Train loss: 0.11449003219604492


 23%|██▎       | 1331/5701 [24:24<56:37,  1.29it/s]  

Train loss: 0.1277163326740265


 24%|██▎       | 1341/5701 [24:35<55:37,  1.31it/s]  

Train loss: 0.10844234377145767


 24%|██▎       | 1351/5701 [24:46<55:08,  1.31it/s]  

Train loss: 0.11328873783349991


 24%|██▍       | 1361/5701 [24:57<53:34,  1.35it/s]  

Train loss: 0.12037412077188492


 24%|██▍       | 1371/5701 [25:09<57:17,  1.26it/s]  

Train loss: 0.09826827049255371


 24%|██▍       | 1381/5701 [25:18<52:12,  1.38it/s]  

Train loss: 0.11733432114124298


 24%|██▍       | 1391/5701 [25:30<54:40,  1.31it/s]  

Train loss: 0.10082723200321198


 25%|██▍       | 1401/5701 [25:40<54:15,  1.32it/s]  

Train loss: 0.10543914139270782


 25%|██▍       | 1411/5701 [25:52<54:34,  1.31it/s]  

Train loss: 0.13790486752986908


 25%|██▍       | 1421/5701 [26:02<52:41,  1.35it/s]  

Train loss: 0.15798072516918182


 25%|██▌       | 1431/5701 [26:13<54:29,  1.31it/s]  

Train loss: 0.11298437416553497


 25%|██▌       | 1441/5701 [26:23<52:22,  1.36it/s]  

Train loss: 0.14909277856349945


 25%|██▌       | 1451/5701 [26:34<53:19,  1.33it/s]  

Train loss: 0.1362295150756836


 26%|██▌       | 1461/5701 [26:46<54:57,  1.29it/s]  

Train loss: 0.11798371374607086


 26%|██▌       | 1471/5701 [26:57<53:36,  1.31it/s]  

Train loss: 0.09925471991300583


 26%|██▌       | 1481/5701 [27:08<53:14,  1.32it/s]  

Train loss: 0.1587788313627243


 26%|██▌       | 1491/5701 [27:19<53:46,  1.30it/s]  

Train loss: 0.14118316769599915


 26%|██▋       | 1501/5701 [27:29<52:13,  1.34it/s]  

Train loss: 0.1473805457353592


 27%|██▋       | 1511/5701 [27:41<53:26,  1.31it/s]  

Train loss: 0.11108729988336563


 27%|██▋       | 1521/5701 [27:51<51:07,  1.36it/s]  

Train loss: 0.10075072199106216


 27%|██▋       | 1531/5701 [28:02<54:22,  1.28it/s]  

Train loss: 0.11012446880340576


 27%|██▋       | 1541/5701 [28:14<52:55,  1.31it/s]  

Train loss: 0.11820385605096817


 27%|██▋       | 1551/5701 [28:24<50:26,  1.37it/s]  

Train loss: 0.11969783157110214


 27%|██▋       | 1561/5701 [28:35<54:09,  1.27it/s]  

Train loss: 0.12830254435539246


 28%|██▊       | 1571/5701 [28:45<50:07,  1.37it/s]  

Train loss: 0.11594674736261368


 28%|██▊       | 1581/5701 [28:57<52:54,  1.30it/s]  

Train loss: 0.09701692312955856


 28%|██▊       | 1591/5701 [29:08<52:17,  1.31it/s]  

Train loss: 0.13732950389385223


 28%|██▊       | 1601/5701 [29:19<52:22,  1.30it/s]  

Train loss: 0.09920697659254074


 28%|██▊       | 1611/5701 [29:29<49:50,  1.37it/s]  

Train loss: 0.15005527436733246


 28%|██▊       | 1621/5701 [29:40<51:59,  1.31it/s]  

Train loss: 0.14820677042007446


 29%|██▊       | 1631/5701 [29:51<52:56,  1.28it/s]  

Train loss: 0.08876865357160568


 29%|██▉       | 1641/5701 [30:02<50:08,  1.35it/s]  

Train loss: 0.11325474828481674


 29%|██▉       | 1651/5701 [30:13<51:45,  1.30it/s]  

Train loss: 0.08317355811595917


 29%|██▉       | 1661/5701 [30:25<52:33,  1.28it/s]  

Train loss: 0.11321469396352768


 29%|██▉       | 1671/5701 [30:36<50:42,  1.32it/s]  

Train loss: 0.13963879644870758


 29%|██▉       | 1681/5701 [30:46<50:05,  1.34it/s]  

Train loss: 0.11135252565145493


 30%|██▉       | 1691/5701 [30:57<51:10,  1.31it/s]  

Train loss: 0.12694577872753143


 30%|██▉       | 1701/5701 [31:09<51:44,  1.29it/s]  

Train loss: 0.08728053420782089


 30%|███       | 1711/5701 [31:19<49:18,  1.35it/s]  

Train loss: 0.12446705251932144


 30%|███       | 1721/5701 [31:29<48:23,  1.37it/s]  

Train loss: 0.09705368429422379


 30%|███       | 1731/5701 [31:40<51:07,  1.29it/s]  

Train loss: 0.11352985352277756


 31%|███       | 1741/5701 [31:51<50:10,  1.32it/s]  

Train loss: 0.1228092685341835


 31%|███       | 1751/5701 [32:02<48:54,  1.35it/s]  

Train loss: 0.11369321495294571


 31%|███       | 1761/5701 [32:13<49:24,  1.33it/s]  

Train loss: 0.09883569926023483


 31%|███       | 1771/5701 [32:24<51:08,  1.28it/s]  

Train loss: 0.12812185287475586


 31%|███       | 1781/5701 [32:35<49:06,  1.33it/s]  

Train loss: 0.11516089737415314


 31%|███▏      | 1791/5701 [32:45<48:31,  1.34it/s]  

Train loss: 0.09958471357822418


 32%|███▏      | 1801/5701 [32:56<49:35,  1.31it/s]  

Train loss: 0.10028566420078278


 32%|███▏      | 1811/5701 [33:07<49:16,  1.32it/s]  

Train loss: 0.100421242415905


 32%|███▏      | 1821/5701 [33:19<49:56,  1.29it/s]  

Train loss: 0.10500525683164597


 32%|███▏      | 1831/5701 [33:30<48:46,  1.32it/s]  

Train loss: 0.12198711931705475


 32%|███▏      | 1841/5701 [33:40<48:10,  1.34it/s]  

Train loss: 0.108228400349617


 32%|███▏      | 1851/5701 [33:52<49:56,  1.28it/s]  

Train loss: 0.0891636461019516


 33%|███▎      | 1861/5701 [34:02<47:08,  1.36it/s]  

Train loss: 0.12631598114967346


 33%|███▎      | 1871/5701 [34:13<47:55,  1.33it/s]  

Train loss: 0.12698739767074585


 33%|███▎      | 1881/5701 [34:23<47:46,  1.33it/s]  

Train loss: 0.10920687019824982


 33%|███▎      | 1891/5701 [34:34<48:25,  1.31it/s]  

Train loss: 0.12173555046319962


 33%|███▎      | 1901/5701 [34:46<48:47,  1.30it/s]  

Train loss: 0.09018459171056747


 34%|███▎      | 1911/5701 [34:57<48:38,  1.30it/s]  

Train loss: 0.12412263453006744


 34%|███▎      | 1921/5701 [35:08<47:12,  1.33it/s]  

Train loss: 0.13347777724266052


 34%|███▍      | 1931/5701 [35:19<48:26,  1.30it/s]  

Train loss: 0.11947337538003922


 34%|███▍      | 1941/5701 [35:30<46:58,  1.33it/s]  

Train loss: 0.10394106060266495


 34%|███▍      | 1951/5701 [35:42<49:53,  1.25it/s]  

Train loss: 0.12681131064891815


 34%|███▍      | 1961/5701 [35:53<47:43,  1.31it/s]  

Train loss: 0.14078138768672943


 35%|███▍      | 1971/5701 [36:04<46:41,  1.33it/s]  

Train loss: 0.1259133368730545


 35%|███▍      | 1981/5701 [36:14<45:25,  1.37it/s]  

Train loss: 0.132197305560112


 35%|███▍      | 1991/5701 [36:24<46:27,  1.33it/s]  

Train loss: 0.12565618753433228


 35%|███▌      | 2001/5701 [36:36<47:44,  1.29it/s]  

Train loss: 0.1330903321504593


 35%|███▌      | 2011/5701 [36:47<47:12,  1.30it/s]  

Train loss: 0.1287662535905838


 35%|███▌      | 2021/5701 [36:58<46:36,  1.32it/s]  

Train loss: 0.10924185812473297


 36%|███▌      | 2031/5701 [37:09<47:20,  1.29it/s]  

Train loss: 0.12212629616260529


 36%|███▌      | 2041/5701 [37:20<46:27,  1.31it/s]  

Train loss: 0.11890405416488647


 36%|███▌      | 2051/5701 [37:30<44:35,  1.36it/s]  

Train loss: 0.11407577991485596


 36%|███▌      | 2061/5701 [37:42<47:35,  1.27it/s]  

Train loss: 0.10066195577383041


 36%|███▋      | 2071/5701 [37:53<45:17,  1.34it/s]  

Train loss: 0.1287483423948288


 37%|███▋      | 2081/5701 [38:04<45:43,  1.32it/s]  

Train loss: 0.13148808479309082


 37%|███▋      | 2091/5701 [38:14<44:53,  1.34it/s]  

Train loss: 0.10611359030008316


 37%|███▋      | 2101/5701 [38:25<46:14,  1.30it/s]  

Train loss: 0.13329969346523285


 37%|███▋      | 2111/5701 [38:36<45:12,  1.32it/s]  

Train loss: 0.10540354251861572


 37%|███▋      | 2121/5701 [38:47<44:53,  1.33it/s]  

Train loss: 0.12147118896245956


 37%|███▋      | 2131/5701 [38:58<45:27,  1.31it/s]  

Train loss: 0.13757465779781342


 38%|███▊      | 2141/5701 [39:09<45:08,  1.31it/s]  

Train loss: 0.12092538923025131


 38%|███▊      | 2151/5701 [39:20<45:23,  1.30it/s]  

Train loss: 0.1228136196732521


 38%|███▊      | 2161/5701 [39:31<44:06,  1.34it/s]  

Train loss: 0.08786594122648239


 38%|███▊      | 2171/5701 [39:42<45:12,  1.30it/s]  

Train loss: 0.11890383064746857


 38%|███▊      | 2181/5701 [39:53<44:00,  1.33it/s]  

Train loss: 0.09419167786836624


 38%|███▊      | 2191/5701 [40:04<44:40,  1.31it/s]  

Train loss: 0.12105143070220947


 39%|███▊      | 2201/5701 [40:14<43:11,  1.35it/s]  

Train loss: 0.16846181452274323


 39%|███▉      | 2211/5701 [40:26<45:14,  1.29it/s]  

Train loss: 0.12091195583343506


 39%|███▉      | 2221/5701 [40:36<42:36,  1.36it/s]  

Train loss: 0.09674444049596786


 39%|███▉      | 2231/5701 [40:47<44:56,  1.29it/s]  

Train loss: 0.11394160985946655


 39%|███▉      | 2241/5701 [40:58<43:06,  1.34it/s]  

Train loss: 0.12337625026702881


 39%|███▉      | 2251/5701 [41:08<42:07,  1.36it/s]  

Train loss: 0.10492771863937378


 40%|███▉      | 2261/5701 [41:20<44:31,  1.29it/s]  

Train loss: 0.10610343515872955


 40%|███▉      | 2271/5701 [41:31<43:42,  1.31it/s]  

Train loss: 0.11304403841495514


 40%|████      | 2281/5701 [41:42<42:48,  1.33it/s]  

Train loss: 0.14027854800224304


 40%|████      | 2291/5701 [41:53<44:22,  1.28it/s]  

Train loss: 0.1092395931482315


 40%|████      | 2301/5701 [42:03<40:49,  1.39it/s]  

Train loss: 0.11464835703372955


 41%|████      | 2311/5701 [42:15<43:57,  1.29it/s]  

Train loss: 0.138874813914299


 41%|████      | 2321/5701 [42:25<42:23,  1.33it/s]  

Train loss: 0.128529891371727


 41%|████      | 2331/5701 [42:37<43:39,  1.29it/s]  

Train loss: 0.14251773059368134


 41%|████      | 2341/5701 [42:47<42:12,  1.33it/s]  

Train loss: 0.09069038927555084


 41%|████      | 2351/5701 [42:58<42:11,  1.32it/s]  

Train loss: 0.11961819231510162


 41%|████▏     | 2361/5701 [43:09<42:25,  1.31it/s]  

Train loss: 0.12612824141979218


 42%|████▏     | 2371/5701 [43:21<42:46,  1.30it/s]  

Train loss: 0.10902810096740723


 42%|████▏     | 2381/5701 [43:31<40:57,  1.35it/s]  

Train loss: 0.14947105944156647


 42%|████▏     | 2391/5701 [43:43<43:01,  1.28it/s]  

Train loss: 0.1390673965215683


 42%|████▏     | 2401/5701 [43:53<41:01,  1.34it/s]  

Train loss: 0.11805751174688339


 42%|████▏     | 2411/5701 [44:05<42:14,  1.30it/s]  

Train loss: 0.13932260870933533


 42%|████▏     | 2421/5701 [44:16<42:20,  1.29it/s]  

Train loss: 0.0993899405002594


 43%|████▎     | 2431/5701 [44:25<37:38,  1.45it/s]  

Train loss: 0.14148633182048798


 43%|████▎     | 2441/5701 [44:36<40:48,  1.33it/s]  

Train loss: 0.12637704610824585


 43%|████▎     | 2451/5701 [44:48<42:49,  1.26it/s]  

Train loss: 0.11233451217412949


 43%|████▎     | 2461/5701 [44:58<38:30,  1.40it/s]  

Train loss: 0.12831400334835052


 43%|████▎     | 2471/5701 [45:10<41:42,  1.29it/s]  

Train loss: 0.1354503631591797


 44%|████▎     | 2481/5701 [45:20<39:23,  1.36it/s]  

Train loss: 0.1344226747751236


 44%|████▎     | 2491/5701 [45:32<43:10,  1.24it/s]  

Train loss: 0.09560880810022354


 44%|████▍     | 2501/5701 [45:43<39:44,  1.34it/s]  

Train loss: 0.1115778237581253


 44%|████▍     | 2511/5701 [45:54<39:31,  1.34it/s]  

Train loss: 0.1323174089193344


 44%|████▍     | 2521/5701 [46:05<39:40,  1.34it/s]  

Train loss: 0.11210396140813828


 44%|████▍     | 2531/5701 [46:16<40:07,  1.32it/s]  

Train loss: 0.11540845781564713


 45%|████▍     | 2541/5701 [46:26<38:25,  1.37it/s]  

Train loss: 0.14068296551704407


 45%|████▍     | 2551/5701 [46:38<40:41,  1.29it/s]  

Train loss: 0.10363707691431046


 45%|████▍     | 2561/5701 [46:48<38:05,  1.37it/s]  

Train loss: 0.11153781414031982


 45%|████▌     | 2571/5701 [47:00<40:37,  1.28it/s]  

Train loss: 0.13667011260986328


 45%|████▌     | 2581/5701 [47:10<38:43,  1.34it/s]  

Train loss: 0.09499358385801315


 45%|████▌     | 2591/5701 [47:21<38:32,  1.34it/s]  

Train loss: 0.10910449177026749


 46%|████▌     | 2601/5701 [47:33<40:01,  1.29it/s]  

Train loss: 0.12547102570533752


 46%|████▌     | 2611/5701 [47:42<36:09,  1.42it/s]  

Train loss: 0.12170928716659546


 46%|████▌     | 2621/5701 [47:54<39:08,  1.31it/s]  

Train loss: 0.11802226305007935


 46%|████▌     | 2631/5701 [48:04<38:10,  1.34it/s]  

Train loss: 0.10367170721292496


 46%|████▋     | 2641/5701 [48:15<38:13,  1.33it/s]  

Train loss: 0.1175510436296463


 47%|████▋     | 2651/5701 [48:26<37:46,  1.35it/s]  

Train loss: 0.09163986891508102


 47%|████▋     | 2661/5701 [48:38<38:59,  1.30it/s]  

Train loss: 0.15458054840564728


 47%|████▋     | 2671/5701 [48:49<38:03,  1.33it/s]  

Train loss: 0.10467572510242462


 47%|████▋     | 2681/5701 [49:00<38:39,  1.30it/s]  

Train loss: 0.12285973876714706


 47%|████▋     | 2691/5701 [49:11<37:33,  1.34it/s]  

Train loss: 0.1260218471288681


 47%|████▋     | 2701/5701 [49:21<35:45,  1.40it/s]  

Train loss: 0.1276630461215973


 48%|████▊     | 2711/5701 [49:32<37:39,  1.32it/s]  

Train loss: 0.13867005705833435


 48%|████▊     | 2721/5701 [49:43<37:14,  1.33it/s]  

Train loss: 0.10854219645261765


 48%|████▊     | 2731/5701 [49:55<38:49,  1.27it/s]  

Train loss: 0.11862387508153915


 48%|████▊     | 2741/5701 [50:06<36:52,  1.34it/s]  

Train loss: 0.12016581743955612


 48%|████▊     | 2751/5701 [50:16<36:33,  1.34it/s]  

Train loss: 0.09879440814256668


 48%|████▊     | 2761/5701 [50:28<38:11,  1.28it/s]  

Train loss: 0.12747707962989807


 49%|████▊     | 2771/5701 [50:39<36:19,  1.34it/s]  

Train loss: 0.13207168877124786


 49%|████▉     | 2781/5701 [50:50<37:04,  1.31it/s]  

Train loss: 0.1155829057097435


 49%|████▉     | 2791/5701 [51:01<36:17,  1.34it/s]  

Train loss: 0.10774564743041992


 49%|████▉     | 2801/5701 [51:12<35:58,  1.34it/s]  

Train loss: 0.13878904283046722


 49%|████▉     | 2811/5701 [51:22<35:38,  1.35it/s]  

Train loss: 0.1068306565284729


 49%|████▉     | 2821/5701 [51:32<35:01,  1.37it/s]  

Train loss: 0.10694154351949692


 50%|████▉     | 2831/5701 [51:44<36:48,  1.30it/s]  

Train loss: 0.0973382517695427


 50%|████▉     | 2841/5701 [51:55<36:06,  1.32it/s]  

Train loss: 0.13034093379974365


 50%|█████     | 2851/5701 [52:07<37:26,  1.27it/s]  

Train loss: 0.11556600779294968


 50%|█████     | 2861/5701 [52:17<34:53,  1.36it/s]  

Train loss: 0.1024099737405777


 50%|█████     | 2871/5701 [52:29<35:53,  1.31it/s]  

Train loss: 0.08581700176000595


 51%|█████     | 2881/5701 [52:39<33:52,  1.39it/s]  

Train loss: 0.11906403303146362


 51%|█████     | 2891/5701 [52:50<35:06,  1.33it/s]  

Train loss: 0.12625963985919952


 51%|█████     | 2901/5701 [53:01<35:26,  1.32it/s]  

Train loss: 0.10991634428501129


 51%|█████     | 2911/5701 [53:12<35:54,  1.30it/s]  

Train loss: 0.12779267132282257


 51%|█████     | 2921/5701 [53:23<34:38,  1.34it/s]  

Train loss: 0.10274399816989899


 51%|█████▏    | 2931/5701 [53:34<33:43,  1.37it/s]  

Train loss: 0.15462110936641693


 52%|█████▏    | 2941/5701 [53:45<34:36,  1.33it/s]  

Train loss: 0.14238671958446503


 52%|█████▏    | 2951/5701 [53:56<34:53,  1.31it/s]  

Train loss: 0.1255333423614502


 52%|█████▏    | 2961/5701 [54:07<33:46,  1.35it/s]  

Train loss: 0.09095152467489243


 52%|█████▏    | 2971/5701 [54:17<34:00,  1.34it/s]  

Train loss: 0.10984613746404648


 52%|█████▏    | 2981/5701 [54:28<32:52,  1.38it/s]  

Train loss: 0.12952639162540436


 52%|█████▏    | 2991/5701 [54:39<34:14,  1.32it/s]  

Train loss: 0.09885655343532562


 53%|█████▎    | 3001/5701 [54:50<33:52,  1.33it/s]  

Train loss: 0.12039502710103989


 53%|█████▎    | 3011/5701 [55:01<33:30,  1.34it/s]  

Train loss: 0.12329580634832382


 53%|█████▎    | 3021/5701 [55:12<33:56,  1.32it/s]  

Train loss: 0.14373157918453217


 53%|█████▎    | 3031/5701 [55:22<33:14,  1.34it/s]  

Train loss: 0.10251015424728394


 53%|█████▎    | 3041/5701 [55:34<34:03,  1.30it/s]  

Train loss: 0.10659165680408478


 54%|█████▎    | 3051/5701 [55:45<32:38,  1.35it/s]  

Train loss: 0.13073894381523132


 54%|█████▎    | 3061/5701 [55:55<32:32,  1.35it/s]  

Train loss: 0.13118287920951843


 54%|█████▍    | 3071/5701 [56:06<33:05,  1.32it/s]  

Train loss: 0.11369458585977554


 54%|█████▍    | 3081/5701 [56:17<32:43,  1.33it/s]  

Train loss: 0.13222388923168182


 54%|█████▍    | 3091/5701 [56:28<33:04,  1.31it/s]  

Train loss: 0.11831682920455933


 54%|█████▍    | 3101/5701 [56:40<33:07,  1.31it/s]  

Train loss: 0.10680099576711655


 55%|█████▍    | 3111/5701 [56:50<31:25,  1.37it/s]  

Train loss: 0.10156162828207016


 55%|█████▍    | 3121/5701 [57:01<31:57,  1.35it/s]  

Train loss: 0.11500697582960129


 55%|█████▍    | 3131/5701 [57:13<33:30,  1.28it/s]  

Train loss: 0.1378905177116394


 55%|█████▌    | 3141/5701 [57:22<30:25,  1.40it/s]  

Train loss: 0.10660701245069504


 55%|█████▌    | 3151/5701 [57:33<31:35,  1.34it/s]  

Train loss: 0.1080719381570816


 55%|█████▌    | 3161/5701 [57:44<31:58,  1.32it/s]  

Train loss: 0.12288669496774673


 56%|█████▌    | 3171/5701 [57:55<31:26,  1.34it/s]  

Train loss: 0.09407784789800644


 56%|█████▌    | 3181/5701 [58:06<32:06,  1.31it/s]  

Train loss: 0.0963209792971611


 56%|█████▌    | 3191/5701 [58:17<31:11,  1.34it/s]  

Train loss: 0.11271446198225021


 56%|█████▌    | 3201/5701 [58:28<31:43,  1.31it/s]  

Train loss: 0.1161736473441124


 56%|█████▋    | 3211/5701 [58:40<31:19,  1.32it/s]  

Train loss: 0.09168948978185654


 56%|█████▋    | 3221/5701 [58:51<31:39,  1.31it/s]  

Train loss: 0.11464855819940567


 57%|█████▋    | 3231/5701 [59:01<29:14,  1.41it/s]  

Train loss: 0.12942104041576385


 57%|█████▋    | 3241/5701 [59:12<31:33,  1.30it/s]  

Train loss: 0.10177314281463623


 57%|█████▋    | 3251/5701 [59:23<30:48,  1.33it/s]  

Train loss: 0.13175013661384583


 57%|█████▋    | 3261/5701 [59:35<31:24,  1.29it/s]  

Train loss: 0.12570196390151978


 57%|█████▋    | 3271/5701 [59:44<28:09,  1.44it/s]  

Train loss: 0.10982684046030045


 58%|█████▊    | 3281/5701 [59:56<31:57,  1.26it/s]  

Train loss: 0.10956016927957535


 58%|█████▊    | 3291/5701 [1:00:07<30:17,  1.33it/s]  

Train loss: 0.11731059849262238


 58%|█████▊    | 3301/5701 [1:00:18<30:04,  1.33it/s]  

Train loss: 0.11709313839673996


 58%|█████▊    | 3311/5701 [1:00:29<29:14,  1.36it/s]  

Train loss: 0.11011061817407608


 58%|█████▊    | 3321/5701 [1:00:40<30:03,  1.32it/s]  

Train loss: 0.1021498367190361


 58%|█████▊    | 3331/5701 [1:00:51<29:20,  1.35it/s]  

Train loss: 0.10734863579273224


 59%|█████▊    | 3341/5701 [1:01:01<28:59,  1.36it/s]  

Train loss: 0.09074504673480988


 59%|█████▉    | 3351/5701 [1:01:12<29:28,  1.33it/s]  

Train loss: 0.09989739954471588


 59%|█████▉    | 3361/5701 [1:01:24<30:31,  1.28it/s]  

Train loss: 0.12521812319755554


 59%|█████▉    | 3371/5701 [1:01:35<29:20,  1.32it/s]  

Train loss: 0.08683385699987411


 59%|█████▉    | 3381/5701 [1:01:45<27:58,  1.38it/s]  

Train loss: 0.0969802662730217


 59%|█████▉    | 3391/5701 [1:01:57<29:58,  1.28it/s]  

Train loss: 0.11070837080478668


 60%|█████▉    | 3401/5701 [1:02:09<29:43,  1.29it/s]  

Train loss: 0.11762972921133041


 60%|█████▉    | 3411/5701 [1:02:19<28:19,  1.35it/s]  

Train loss: 0.07233010232448578


 60%|██████    | 3421/5701 [1:02:30<28:05,  1.35it/s]  

Train loss: 0.1102987751364708


 60%|██████    | 3431/5701 [1:02:42<29:38,  1.28it/s]  

Train loss: 0.10686314105987549


 60%|██████    | 3441/5701 [1:02:50<25:22,  1.48it/s]

Train loss: 0.10930047184228897


 61%|██████    | 3451/5701 [1:03:02<29:12,  1.28it/s]  

Train loss: 0.10747312754392624


 61%|██████    | 3461/5701 [1:03:14<29:04,  1.28it/s]  

Train loss: 0.08966375887393951


 61%|██████    | 3471/5701 [1:03:24<27:28,  1.35it/s]  

Train loss: 0.10056161135435104


 61%|██████    | 3481/5701 [1:03:36<28:21,  1.30it/s]  

Train loss: 0.11265275627374649


 61%|██████    | 3491/5701 [1:03:46<26:51,  1.37it/s]  

Train loss: 0.1308603733778


 61%|██████▏   | 3501/5701 [1:03:57<27:32,  1.33it/s]  

Train loss: 0.13556675612926483


 62%|██████▏   | 3511/5701 [1:04:09<28:18,  1.29it/s]  

Train loss: 0.1369560956954956


 62%|██████▏   | 3521/5701 [1:04:19<27:08,  1.34it/s]  

Train loss: 0.1366233378648758


 62%|██████▏   | 3531/5701 [1:04:30<26:24,  1.37it/s]  

Train loss: 0.10993432998657227


 62%|██████▏   | 3541/5701 [1:04:41<27:19,  1.32it/s]  

Train loss: 0.12922634184360504


 62%|██████▏   | 3551/5701 [1:04:52<27:15,  1.31it/s]  

Train loss: 0.08554457128047943


 62%|██████▏   | 3561/5701 [1:05:03<26:51,  1.33it/s]  

Train loss: 0.11493001133203506


 63%|██████▎   | 3571/5701 [1:05:15<27:42,  1.28it/s]  

Train loss: 0.10753946751356125


 63%|██████▎   | 3581/5701 [1:05:25<25:55,  1.36it/s]  

Train loss: 0.09795928746461868


 63%|██████▎   | 3591/5701 [1:05:37<26:49,  1.31it/s]  

Train loss: 0.1110227108001709


 63%|██████▎   | 3601/5701 [1:05:47<25:54,  1.35it/s]  

Train loss: 0.08236075937747955


 63%|██████▎   | 3611/5701 [1:05:58<25:58,  1.34it/s]  

Train loss: 0.10671508312225342


 64%|██████▎   | 3621/5701 [1:06:09<26:03,  1.33it/s]  

Train loss: 0.10794220119714737


 64%|██████▎   | 3631/5701 [1:06:20<25:35,  1.35it/s]  

Train loss: 0.11311953514814377


 64%|██████▍   | 3641/5701 [1:06:30<25:33,  1.34it/s]  

Train loss: 0.11045583337545395


 64%|██████▍   | 3651/5701 [1:06:41<25:46,  1.33it/s]  

Train loss: 0.11890055984258652


 64%|██████▍   | 3661/5701 [1:06:53<25:44,  1.32it/s]  

Train loss: 0.11142704635858536


 64%|██████▍   | 3671/5701 [1:07:03<25:26,  1.33it/s]  

Train loss: 0.09074313193559647


 65%|██████▍   | 3681/5701 [1:07:15<25:41,  1.31it/s]  

Train loss: 0.16170978546142578


 65%|██████▍   | 3691/5701 [1:07:24<23:17,  1.44it/s]

Train loss: 0.10531556606292725


 65%|██████▍   | 3701/5701 [1:07:36<25:56,  1.29it/s]  

Train loss: 0.14227665960788727


 65%|██████▌   | 3711/5701 [1:07:47<25:06,  1.32it/s]  

Train loss: 0.0947139635682106


 65%|██████▌   | 3721/5701 [1:07:58<25:19,  1.30it/s]  

Train loss: 0.12269603461027145


 65%|██████▌   | 3731/5701 [1:08:08<23:43,  1.38it/s]  

Train loss: 0.1325562298297882


 66%|██████▌   | 3741/5701 [1:08:20<25:37,  1.28it/s]  

Train loss: 0.09566942602396011


 66%|██████▌   | 3751/5701 [1:08:31<24:20,  1.34it/s]  

Train loss: 0.11233947426080704


 66%|██████▌   | 3761/5701 [1:08:43<25:07,  1.29it/s]  

Train loss: 0.11777997016906738


 66%|██████▌   | 3771/5701 [1:08:53<22:45,  1.41it/s]

Train loss: 0.1459113210439682


 66%|██████▋   | 3781/5701 [1:09:04<24:41,  1.30it/s]  

Train loss: 0.10632429271936417


 66%|██████▋   | 3791/5701 [1:09:16<24:34,  1.30it/s]  

Train loss: 0.10815570503473282


 67%|██████▋   | 3801/5701 [1:09:27<23:54,  1.32it/s]  

Train loss: 0.12032987177371979


 67%|██████▋   | 3811/5701 [1:09:38<23:51,  1.32it/s]  

Train loss: 0.13415949046611786


 67%|██████▋   | 3821/5701 [1:09:49<23:53,  1.31it/s]  

Train loss: 0.11487232893705368


 67%|██████▋   | 3831/5701 [1:10:00<23:15,  1.34it/s]  

Train loss: 0.09785432368516922


 67%|██████▋   | 3841/5701 [1:10:12<24:03,  1.29it/s]  

Train loss: 0.11554886400699615


 68%|██████▊   | 3851/5701 [1:10:22<22:11,  1.39it/s]  

Train loss: 0.149341881275177


 68%|██████▊   | 3861/5701 [1:10:33<23:44,  1.29it/s]  

Train loss: 0.12510041892528534


 68%|██████▊   | 3871/5701 [1:10:44<22:25,  1.36it/s]  

Train loss: 0.11580463498830795


 68%|██████▊   | 3881/5701 [1:10:56<24:22,  1.24it/s]  

Train loss: 0.1159914880990982


 68%|██████▊   | 3891/5701 [1:11:06<21:19,  1.41it/s]

Train loss: 0.1245000883936882


 68%|██████▊   | 3901/5701 [1:11:18<23:36,  1.27it/s]  

Train loss: 0.11089559644460678


 69%|██████▊   | 3911/5701 [1:11:28<21:27,  1.39it/s]

Train loss: 0.11347907781600952


 69%|██████▉   | 3921/5701 [1:11:39<22:02,  1.35it/s]  

Train loss: 0.1286706030368805


 69%|██████▉   | 3931/5701 [1:11:50<22:25,  1.32it/s]  

Train loss: 0.11935888975858688


 69%|██████▉   | 3941/5701 [1:12:01<22:28,  1.31it/s]  

Train loss: 0.13356158137321472


 69%|██████▉   | 3951/5701 [1:12:12<21:58,  1.33it/s]  

Train loss: 0.11462950706481934


 69%|██████▉   | 3961/5701 [1:12:23<21:35,  1.34it/s]  

Train loss: 0.10650088638067245


 70%|██████▉   | 3971/5701 [1:12:34<21:32,  1.34it/s]  

Train loss: 0.10060902684926987


 70%|██████▉   | 3981/5701 [1:12:46<22:27,  1.28it/s]  

Train loss: 0.11010154336690903


 70%|███████   | 3991/5701 [1:12:56<21:11,  1.35it/s]  

Train loss: 0.10481023788452148


 70%|███████   | 4001/5701 [1:13:08<21:26,  1.32it/s]  

Train loss: 0.11662056297063828


 70%|███████   | 4011/5701 [1:13:18<21:07,  1.33it/s]  

Train loss: 0.11429400742053986


 71%|███████   | 4021/5701 [1:13:30<21:51,  1.28it/s]  

Train loss: 0.11559009552001953


 71%|███████   | 4031/5701 [1:13:41<20:23,  1.36it/s]

Train loss: 0.10028579086065292


 71%|███████   | 4041/5701 [1:13:52<21:20,  1.30it/s]  

Train loss: 0.10981136560440063


 71%|███████   | 4051/5701 [1:14:03<20:20,  1.35it/s]

Train loss: 0.08297369629144669


 71%|███████   | 4061/5701 [1:14:14<20:25,  1.34it/s]

Train loss: 0.0942571610212326


 71%|███████▏  | 4071/5701 [1:14:25<20:56,  1.30it/s]  

Train loss: 0.1529303640127182


 72%|███████▏  | 4081/5701 [1:14:36<20:10,  1.34it/s]

Train loss: 0.09951335191726685


 72%|███████▏  | 4091/5701 [1:14:47<20:34,  1.30it/s]  

Train loss: 0.10300500690937042


 72%|███████▏  | 4101/5701 [1:14:58<19:58,  1.33it/s]

Train loss: 0.11913514137268066


 72%|███████▏  | 4111/5701 [1:15:10<20:21,  1.30it/s]  

Train loss: 0.12258874624967575


 72%|███████▏  | 4121/5701 [1:15:20<19:39,  1.34it/s]

Train loss: 0.1212247833609581


 72%|███████▏  | 4131/5701 [1:15:32<20:16,  1.29it/s]  

Train loss: 0.16663731634616852


 73%|███████▎  | 4141/5701 [1:15:42<18:38,  1.40it/s]

Train loss: 0.10831227153539658


 73%|███████▎  | 4151/5701 [1:15:54<19:51,  1.30it/s]  

Train loss: 0.09697198122739792


 73%|███████▎  | 4161/5701 [1:16:04<19:17,  1.33it/s]

Train loss: 0.10147061198949814


 73%|███████▎  | 4171/5701 [1:16:16<19:33,  1.30it/s]  

Train loss: 0.13272260129451752


 73%|███████▎  | 4181/5701 [1:16:27<19:02,  1.33it/s]

Train loss: 0.12471868842840195


 74%|███████▎  | 4191/5701 [1:16:38<18:49,  1.34it/s]

Train loss: 0.12736065685749054


 74%|███████▎  | 4201/5701 [1:16:49<19:02,  1.31it/s]

Train loss: 0.11173267662525177


 74%|███████▍  | 4211/5701 [1:17:00<18:58,  1.31it/s]

Train loss: 0.09304102510213852


 74%|███████▍  | 4221/5701 [1:17:10<17:36,  1.40it/s]

Train loss: 0.13832251727581024


 74%|███████▍  | 4231/5701 [1:17:21<18:32,  1.32it/s]

Train loss: 0.12292539328336716


 74%|███████▍  | 4241/5701 [1:17:33<18:54,  1.29it/s]

Train loss: 0.15393473207950592


 75%|███████▍  | 4251/5701 [1:17:44<18:40,  1.29it/s]

Train loss: 0.13676346838474274


 75%|███████▍  | 4261/5701 [1:17:56<18:26,  1.30it/s]

Train loss: 0.13192147016525269


 75%|███████▍  | 4271/5701 [1:18:07<18:02,  1.32it/s]

Train loss: 0.1168844923377037


 75%|███████▌  | 4281/5701 [1:18:16<16:12,  1.46it/s]

Train loss: 0.14342188835144043


 75%|███████▌  | 4291/5701 [1:18:27<18:08,  1.29it/s]

Train loss: 0.10696949064731598


 75%|███████▌  | 4301/5701 [1:18:39<18:06,  1.29it/s]

Train loss: 0.1085517406463623


 76%|███████▌  | 4311/5701 [1:18:50<17:18,  1.34it/s]

Train loss: 0.13109394907951355


 76%|███████▌  | 4321/5701 [1:19:00<17:10,  1.34it/s]

Train loss: 0.1304270178079605


 76%|███████▌  | 4331/5701 [1:19:11<17:12,  1.33it/s]

Train loss: 0.1115533635020256


 76%|███████▌  | 4341/5701 [1:19:22<16:54,  1.34it/s]

Train loss: 0.10163606703281403


 76%|███████▋  | 4351/5701 [1:19:33<16:58,  1.33it/s]

Train loss: 0.12237723916769028


 76%|███████▋  | 4361/5701 [1:19:44<16:44,  1.33it/s]

Train loss: 0.1160978451371193


 77%|███████▋  | 4371/5701 [1:19:55<16:52,  1.31it/s]

Train loss: 0.13237278163433075


 77%|███████▋  | 4381/5701 [1:20:06<16:38,  1.32it/s]

Train loss: 0.11848652362823486


 77%|███████▋  | 4391/5701 [1:20:17<16:38,  1.31it/s]

Train loss: 0.12252549082040787


 77%|███████▋  | 4401/5701 [1:20:28<16:16,  1.33it/s]

Train loss: 0.11929046362638474


 77%|███████▋  | 4411/5701 [1:20:39<15:57,  1.35it/s]

Train loss: 0.13357491791248322


 78%|███████▊  | 4421/5701 [1:20:51<16:42,  1.28it/s]

Train loss: 0.10917698591947556


 78%|███████▊  | 4431/5701 [1:21:01<15:37,  1.35it/s]

Train loss: 0.16009913384914398


 78%|███████▊  | 4441/5701 [1:21:13<16:03,  1.31it/s]

Train loss: 0.12101592123508453


 78%|███████▊  | 4451/5701 [1:21:23<15:24,  1.35it/s]

Train loss: 0.14110036194324493


 78%|███████▊  | 4461/5701 [1:21:35<15:52,  1.30it/s]

Train loss: 0.1339566707611084


 78%|███████▊  | 4471/5701 [1:21:46<15:42,  1.31it/s]

Train loss: 0.10378368943929672


 79%|███████▊  | 4481/5701 [1:21:57<15:10,  1.34it/s]

Train loss: 0.10126042366027832


 79%|███████▉  | 4491/5701 [1:22:09<15:58,  1.26it/s]

Train loss: 0.1525043100118637


 79%|███████▉  | 4501/5701 [1:22:20<14:59,  1.33it/s]

Train loss: 0.12127713114023209


 79%|███████▉  | 4511/5701 [1:22:31<14:52,  1.33it/s]

Train loss: 0.1419854462146759


 79%|███████▉  | 4521/5701 [1:22:42<14:48,  1.33it/s]

Train loss: 0.11923729628324509


 79%|███████▉  | 4531/5701 [1:22:54<15:30,  1.26it/s]

Train loss: 0.11484692245721817


 80%|███████▉  | 4541/5701 [1:23:05<14:33,  1.33it/s]

Train loss: 0.09944779425859451


 80%|███████▉  | 4551/5701 [1:23:16<14:15,  1.34it/s]

Train loss: 0.10920923203229904


 80%|████████  | 4561/5701 [1:23:26<14:05,  1.35it/s]

Train loss: 0.1195678636431694


 80%|████████  | 4571/5701 [1:23:37<14:03,  1.34it/s]

Train loss: 0.09672202169895172


 80%|████████  | 4581/5701 [1:23:48<13:52,  1.35it/s]

Train loss: 0.10699101537466049


 81%|████████  | 4591/5701 [1:23:58<13:35,  1.36it/s]

Train loss: 0.09036727994680405


 81%|████████  | 4601/5701 [1:24:09<13:53,  1.32it/s]

Train loss: 0.13264110684394836


 81%|████████  | 4611/5701 [1:24:21<13:55,  1.30it/s]

Train loss: 0.13287189602851868


 81%|████████  | 4621/5701 [1:24:32<13:52,  1.30it/s]

Train loss: 0.12835197150707245


 81%|████████  | 4631/5701 [1:24:44<13:37,  1.31it/s]

Train loss: 0.12443989515304565


 81%|████████▏ | 4641/5701 [1:24:55<13:18,  1.33it/s]

Train loss: 0.11214138567447662


 82%|████████▏ | 4651/5701 [1:25:06<13:33,  1.29it/s]

Train loss: 0.11233096569776535


 82%|████████▏ | 4661/5701 [1:25:16<12:26,  1.39it/s]

Train loss: 0.12849703431129456


 82%|████████▏ | 4671/5701 [1:25:28<13:07,  1.31it/s]

Train loss: 0.1394122689962387


 82%|████████▏ | 4681/5701 [1:25:39<13:02,  1.30it/s]

Train loss: 0.1561611294746399


 82%|████████▏ | 4691/5701 [1:25:49<11:59,  1.40it/s]

Train loss: 0.08024352788925171


 82%|████████▏ | 4701/5701 [1:26:00<12:47,  1.30it/s]

Train loss: 0.12059234827756882


 83%|████████▎ | 4711/5701 [1:26:11<12:30,  1.32it/s]

Train loss: 0.10061577707529068


 83%|████████▎ | 4721/5701 [1:26:22<12:07,  1.35it/s]

Train loss: 0.14557714760303497


 83%|████████▎ | 4731/5701 [1:26:33<12:00,  1.35it/s]

Train loss: 0.08127973973751068


 83%|████████▎ | 4741/5701 [1:26:45<12:34,  1.27it/s]

Train loss: 0.12921179831027985


 83%|████████▎ | 4751/5701 [1:26:56<12:17,  1.29it/s]

Train loss: 0.14270973205566406


 84%|████████▎ | 4761/5701 [1:27:05<10:47,  1.45it/s]

Train loss: 0.0836094543337822


 84%|████████▎ | 4771/5701 [1:27:17<11:45,  1.32it/s]

Train loss: 0.12346942722797394


 84%|████████▍ | 4781/5701 [1:27:28<11:34,  1.32it/s]

Train loss: 0.10277098417282104


 84%|████████▍ | 4791/5701 [1:27:40<11:58,  1.27it/s]

Train loss: 0.12783505022525787


 84%|████████▍ | 4801/5701 [1:27:51<11:15,  1.33it/s]

Train loss: 0.14509163796901703


 84%|████████▍ | 4811/5701 [1:28:03<11:36,  1.28it/s]

Train loss: 0.09490668028593063


 85%|████████▍ | 4821/5701 [1:28:13<10:56,  1.34it/s]

Train loss: 0.11521545797586441


 85%|████████▍ | 4831/5701 [1:28:24<10:52,  1.33it/s]

Train loss: 0.10463815182447433


 85%|████████▍ | 4841/5701 [1:28:35<10:41,  1.34it/s]

Train loss: 0.1359649896621704


 85%|████████▌ | 4851/5701 [1:28:47<11:01,  1.28it/s]

Train loss: 0.1360958367586136


 85%|████████▌ | 4861/5701 [1:28:57<10:09,  1.38it/s]

Train loss: 0.11327508836984634


 85%|████████▌ | 4871/5701 [1:29:09<10:45,  1.29it/s]

Train loss: 0.132012739777565


 86%|████████▌ | 4881/5701 [1:29:20<10:21,  1.32it/s]

Train loss: 0.11475545167922974


 86%|████████▌ | 4891/5701 [1:29:31<10:26,  1.29it/s]

Train loss: 0.12314202636480331


 86%|████████▌ | 4901/5701 [1:29:42<09:50,  1.36it/s]

Train loss: 0.11418717354536057


 86%|████████▌ | 4911/5701 [1:29:53<09:44,  1.35it/s]

Train loss: 0.10221803188323975


 86%|████████▋ | 4921/5701 [1:30:04<10:01,  1.30it/s]

Train loss: 0.12341586500406265


 86%|████████▋ | 4931/5701 [1:30:15<09:29,  1.35it/s]

Train loss: 0.11819373816251755


 87%|████████▋ | 4941/5701 [1:30:26<09:45,  1.30it/s]

Train loss: 0.10557427257299423


 87%|████████▋ | 4951/5701 [1:30:37<09:03,  1.38it/s]

Train loss: 0.11600241810083389


 87%|████████▋ | 4961/5701 [1:30:47<09:06,  1.35it/s]

Train loss: 0.09862598031759262


 87%|████████▋ | 4971/5701 [1:30:59<09:20,  1.30it/s]

Train loss: 0.09040713310241699


 87%|████████▋ | 4981/5701 [1:31:10<09:13,  1.30it/s]

Train loss: 0.13991501927375793


 88%|████████▊ | 4991/5701 [1:31:22<09:08,  1.30it/s]

Train loss: 0.12447475641965866


 88%|████████▊ | 5001/5701 [1:31:32<08:34,  1.36it/s]

Train loss: 0.1097886711359024


 88%|████████▊ | 5011/5701 [1:31:43<08:44,  1.32it/s]

Train loss: 0.12031906098127365


 88%|████████▊ | 5021/5701 [1:31:54<08:35,  1.32it/s]

Train loss: 0.11373620480298996


 88%|████████▊ | 5031/5701 [1:32:05<08:21,  1.34it/s]

Train loss: 0.13319745659828186


 88%|████████▊ | 5041/5701 [1:32:16<08:11,  1.34it/s]

Train loss: 0.10149969160556793


 89%|████████▊ | 5051/5701 [1:32:27<08:10,  1.33it/s]

Train loss: 0.09782399982213974


 89%|████████▉ | 5061/5701 [1:32:38<07:55,  1.35it/s]

Train loss: 0.11243361234664917


 89%|████████▉ | 5067/5701 [1:32:47<14:40,  1.39s/it]

In [1]:
epochs = range(1,n_epoch+1)
epochs = range(1,20)
train_loss = [i.detach().cpu() for i in train_loss]
valid_loss = [i.detach().cpu() for i in valid_loss]

fig, ax = plt.subplots(nrows=1,ncols=1,figsize=(7,5),layout="constrained")
ax.plot(epochs,train_loss,linewidth=2,color="b",label='Train')
ax.plot(epochs,valid_loss,linewidth=2,color="r",label='Valid')
ax.grid()

ax.legend()

ax.set_xlabel("Epoch index")
ax.set_ylabel("Cross Entropy")
ax.set_title("Training curves for gender")

plt.show()

fig.savefig("train_gender_detection.png",dpi=300)

NameError: name 'n_epoch' is not defined

In [6]:
import matplotlib.pyplot as plt

In [7]:
test_loader= DataLoader(dataset_test, batch_size=64, shuffle=False,num_workers=10)

In [19]:
next(iter(test_loader))

[tensor([[-1.2368e-14, -2.2741e-14, -7.3303e-15,  ...,  4.4143e-02,
           7.2552e-02, -6.1471e-03],
         [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ..., -4.5468e-03,
          -6.2040e-03, -1.0184e-02],
         [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ..., -4.1556e-02,
           3.0061e-02,  4.0455e-02],
         ...,
         [-1.1845e-12,  1.1349e-12, -1.3403e-12,  ...,  3.7740e-03,
           5.3200e-03,  5.6067e-03],
         [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  1.5192e-02,
           1.6598e-02,  1.7288e-02],
         [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ..., -2.9863e-05,
          -2.8084e-05,  2.0304e-04]]),
 tensor([0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0])]

In [8]:
ckpt = torch.load("./probes/sparse_gender_probe_256_mean.pt")

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)
classif_probe = LinProbe(input_size=256,output_size=2,agreg="mean")
classif_probe.load_state_dict(ckpt)
classif_probe.to(device)
model.to(device)

cuda


MASNMF(
  (spec_feat): SpecFeat(
    (spec): Spectrogram()
  )
  (wavlm): WavLM_Feats(
    (feature_extract): UpstreamExpert(
      (model): WavLM(
        (feature_extractor): ConvFeatureExtractionModel(
          (conv_layers): ModuleList(
            (0): Sequential(
              (0): Conv1d(1, 512, kernel_size=(10,), stride=(5,), bias=False)
              (1): Dropout(p=0.0, inplace=False)
              (2): Sequential(
                (0): TransposeLast()
                (1): Fp32LayerNorm((512,), eps=1e-05, elementwise_affine=True)
                (2): TransposeLast()
              )
              (3): GELU()
            )
            (1): Sequential(
              (0): Conv1d(512, 512, kernel_size=(3,), stride=(2,), bias=False)
              (1): Dropout(p=0.0, inplace=False)
              (2): Sequential(
                (0): TransposeLast()
                (1): Fp32LayerNorm((512,), eps=1e-05, elementwise_affine=True)
                (2): TransposeLast()
              )
     

In [9]:
from tqdm import tqdm
pred = []
lab = []
for batch_idx, (data,label) in enumerate(tqdm(test_loader)):    

    with torch.no_grad():
        feat = model.wavlm(data.to(device))
        emb = model.emb_transform(feat)
        lab.extend(label.detach().cpu().numpy())
        
        logits = classif_probe(emb.permute(0,2,1))
        #print(logits.shape)
        pred.extend(logits.argmax(dim=-1).detach().cpu().numpy())

        

100%|██████████| 34/34 [00:45<00:00,  1.33s/it]


In [12]:
from torchmetrics import F1Score,Accuracy,ConfusionMatrix,Recall
f1 = F1Score(task="binary")
recall = Recall(task="multiclass",num_classes=2,average="macro")
acc = Accuracy(task='binary')
conf = ConfusionMatrix(task='binary')events
ecp = ExpectedCost(task="binary")

In [13]:
import numpy as np
tpred = torch.from_numpy(np.array(pred))
tlabel = torch.from_numpy(np.array(lab))

In [14]:

from expected_cost import ec
#print("0 Male, 1 Female")
print(f"N Female = {(tlabel==1).sum()} , N Male = {(tlabel==0).sum()}")
print("UAR =",recall(tpred,tlabel))
print("Acc =",acc(tpred,tlabel))
print("F1score Female =",f1(tpred,tlabel))
print("F1score Male =",f1((tpred*-1)+1,(tlabel*-1)+1))
print("Confusion = ",conf(tpred,tlabel))
print("\nCUSTOM EC")
print("Expected cost",ecp(tpred,tlabel))
print("Acc from EC",1-ecp(tpred,tlabel))


N Female = 457 , N Male = 1697
UAR = tensor(0.8960)
Acc = tensor(0.9243)
F1score Female = tensor(0.8260)
F1score Male = tensor(0.9516)
Confusion =  tensor([[1604,   93],
        [  70,  387]])

CUSTOM EC
Expected cost tensor(0.0757)
Acc from EC tensor(0.9243)
