In [1]:
import os
import logging
import random
import sys

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import random
from tqdm import tqdm

import torch
from torch.utils.data import (DataLoader ,RandomSampler, SequentialSampler, TensorDataset)
import torch.nn.functional as f
from torch.utils.data import TensorDataset
from torch.optim import Adam

import transformers
from transformers import RobertaForSequenceClassification, RobertaTokenizer
from transformers import AdamW, RobertaConfig
from sklearn.metrics import (accuracy_score, 
                             precision_recall_curve,
                             f1_score,
                             auc)
import torch.nn as nn
from transformers import (AutoConfig, 
                          AutoTokenizer, 
                          RobertaForSequenceClassification,
                          Trainer,
                          TrainingArguments,
                          DataCollatorWithPadding,
                          EarlyStoppingCallback)

from torch.nn import CrossEntropyLoss
from sklearn.metrics import confusion_matrix, classification_report, matthews_corrcoef, f1_score, recall_score, precision_score

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def seed_everything(seed: int = 42, contain_cuda: bool = False):
  os.environ['PYTHONHASHSEED'] = str(seed)
  random.seed(seed)
  np.random.seed(seed)

  torch.backends.cudnn.deterministic = True
  torch.backends.cudnn.benchmark = False

  torch.manual_seed(seed)
  torch.cuda.manual_seed(seed)
  torch.cuda.manual_seed_all(seed)
  print(f"Seed set as {seed}")

seed = 42
seed_everything(seed)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

Seed set as 42


In [3]:
transformers.logging.set_verbosity_error() # 오류 메세지만 출력하는 것이다. 

In [4]:
''' Tokenizer '''
tokenizer = RobertaTokenizer.from_pretrained('microsoft/graphcodebert-base')
tokenizer.truncation_side = 'left' # 설정된 길이만큼 tokenize를 한 다음 초과되는 부분을 왼쪽에서 부터 자른다. 

In [5]:
''' Config '''
config = RobertaConfig.from_pretrained('microsoft/graphcodebert-base') 

''' Pretrained model'''
pretrained = RobertaForSequenceClassification.from_pretrained('microsoft/graphcodebert-base')

In [6]:
''' Load dataset'''
train_df = pd.read_csv("/home/workspace/DACON/CodeSim/Dataset/train_data_lv1.csv")
valid_df = pd.read_csv("/home/workspace/DACON/CodeSim/Dataset/valid_data_lv1.csv")

In [7]:
'''train valid dataset tokenize'''
from datasets import concatenate_datasets, load_dataset

train_dataset = load_dataset("csv", data_files='/home/workspace/DACON/CodeSim/Dataset/train_data_lv1.csv')

''' output '''
# DatasetDict({
#     train: Dataset({
#         features: ['code1', 'code2', 'similar'],
#         num_rows: 500000
#     })
# })

# train_dataset = load_dataset("csv", data_files='/home/workspace/DACON/CodeSim/Dataset/train_data_lv1.csv')['train'] v1 
train_dataset = load_dataset("csv", data_files='/home/workspace/DACON/CodeSim/Dataset/train_data_v2.csv')['train'] 
''' output '''
# DaDataset({
#     features: ['code1', 'code2', 'similar'],
#     num_rows: 500000
# })

# valid_dataset = load_dataset("csv", data_files='/home/workspace/DACON/CodeSim/Dataset/valid_data_lv1.csv')['train'] v 1
valid_dataset = load_dataset("csv", data_files='/home/workspace/DACON/CodeSim/Dataset/valid_data.csv')['train']
''' output '''
# DaDataset({
#     features: ['code1', 'code2', 'similar'],
#     num_rows: 500000
# })

rawdataset = concatenate_datasets([train_dataset, valid_dataset])

def example_fn(examples):
    outputs = tokenizer(examples['code1'], examples['code2'], padding='max_length', max_length=512, truncation=True)
    outputs['labels'] = examples['similar']
    return outputs

dataset = rawdataset.map(example_fn, remove_columns=['code1', 'code2', 'similar']) # remove_columns tokennum만 있으면 되니깐 나머지는 제거
''' train dataset.column_names '''
# ['input_ids', 'attention_mask', 'labels']


Map: 100%|██████████| 1100000/1100000 [41:34<00:00, 440.89 examples/s]


' train dataset.column_names '

In [8]:
import easydict
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(f'current device : {device}')

args = easydict.EasyDict({
    "seed":42,
    "optimizer":"AdamW",
    "scheduler":"linear",
    "warmup_steps":500,
    "cycle_mult":1.2,
    "batch_size": 16,
    "patience":5,
    "n_splits":6,
    "epochs":3,
    "lr": 2e-05,
    "criterion":'cross',
    "smoothing": 0.0,
    "model": "microsoft/graphcodebert-base",
    "logging_wrong_samples":True,
    })

project_name = "graphcodebert_Bs16_OptAdamW_ScduLinear_Sm0.0"
args.update(
            {
                "project_name":project_name,
                "model_name":project_name,
             }
            )

seed_everything(args.seed)

current device : cuda:0
Seed set as 42


In [9]:
import torch.nn.functional as F
class FocalLoss(nn.Module):
  def __init__(self, weight=None,
               gamma=2., reduction='mean'):
    nn.Module.__init__(self)
    self.weight = weight
    self.gamma = gamma
    self.reduction = reduction

  def forward(self, input_tensor, target_tensor):
    log_prob = F.log_softmax(input_tensor, dim=-1)
    prob = torch.exp(log_prob)
    return F.nll_loss(
        ((1 - prob) ** self.gamma) * log_prob,
        target_tensor,
        weight=self.weight,
        reduction=self.reduction
        )

class LabelSmoothingLoss(nn.Module):
  def __init__(self, classes=3, smoothing=0.0, dim=-1):
    super(LabelSmoothingLoss, self).__init__()
    self.confidence = 1.0 - smoothing
    self.smoothing = smoothing
    self.cls = classes
    self.dim = dim

  def forward(self, pred, target):
    pred = pred.log_softmax(dim=self.dim)
    with torch.no_grad():
      true_dist = torch.zeros_like(pred)
      true_dist.fill_(self.smoothing / (self.cls - 1))
      true_dist.scatter_(1, target.data.unsqueeze(1), self.confidence)
    return torch.mean(torch.sum(-true_dist * pred, dim=self.dim))


def get_criterion(args):
  if args.smoothing!=0 and args.criterion == 'smoothing':
    criterion = LabelSmoothingLoss(smoothing=args.smoothing)
  elif args.criterion == 'cross':
    criterion = nn.CrossEntropyLoss()
  elif args.criterion == 'focal':
    criterion = FocalLoss(gamma=2.0)
  else:
    raise NotImplementedError('Criterion not available')
  return criterion

In [10]:
import gc
gc.collect()
torch.cuda.empty_cache()

In [11]:
def get_optimizer(model, args):
  if args.optimizer == "Adam":
    optimizer = Adam(model.parameters(), lr=args.lr, weight_decay=0.01)
  elif args.optimizer == "AdamW":
    optimizer = AdamW(model.parameters(), lr=args.lr, weight_decay=0.01)
  else:
    raise NotImplementedError('Optimizer not available')
  
def get_criterion(args):
  if args.smoothing!=0 and args.criterion == 'smoothing':
    criterion = LabelSmoothingLoss(smoothing=args.smoothing)
  elif args.criterion == 'cross':
    criterion = nn.CrossEntropyLoss()
  elif args.criterion == 'focal':
    criterion = FocalLoss(gamma=2.0)
  else:
    raise NotImplementedError('Criterion not available')
  return criterion

In [12]:
import math
from torch.optim.lr_scheduler import ReduceLROnPlateau, _LRScheduler
from transformers import (get_scheduler, 
                          get_cosine_with_hard_restarts_schedule_with_warmup,
                          get_linear_schedule_with_warmup)


class CosineAnnealingWarmupRestarts(_LRScheduler):
  """
    optimizer (Optimizer): Wrapped optimizer.
    first_cycle_steps (int): First cycle step size.
    cycle_mult(float): Cycle steps magnification. Default: -1.
    max_lr(float): First cycle's max learning rate. Default: 0.1.
    min_lr(float): Min learning rate. Default: 0.001.
    warmup_steps(int): Linear warmup step size. Default: 0.
    gamma(float): Decrease rate of max learning rate by cycle. Default: 1.
    last_epoch (int): The index of last epoch. Default: -1.
  """
  def __init__(self,
               optimizer : torch.optim.Optimizer,
               first_cycle_steps : int,
               cycle_mult : float = 1.,
               max_lr : float = 0.1,
               min_lr : float = 0.001,
               warmup_steps : int = 0,
               gamma : float = 1.,
               last_epoch : int = -1
               ):
    assert warmup_steps < first_cycle_steps
        
    self.first_cycle_steps = first_cycle_steps # first cycle step size
    self.cycle_mult = cycle_mult # cycle steps magnification
    self.base_max_lr = max_lr # first max learning rate
    self.max_lr = max_lr # max learning rate in the current cycle
    self.min_lr = min_lr # min learning rate
    self.warmup_steps = warmup_steps # warmup step size
    self.gamma = gamma # decrease rate of max learning rate by cycle
    
    self.cur_cycle_steps = first_cycle_steps # first cycle step size
    self.cycle = 0 # cycle count
    self.step_in_cycle = last_epoch # step size of the current cycle
    
    super(CosineAnnealingWarmupRestarts, self).__init__(optimizer, last_epoch)
        
    # set learning rate min_lr
    self.init_lr()
    
  def init_lr(self):
    self.base_lrs = []
    for param_group in self.optimizer.param_groups:
      param_group['lr'] = self.min_lr
      self.base_lrs.append(self.min_lr)
    
  def get_lr(self):
    if self.step_in_cycle == -1:
      return self.base_lrs
    elif self.step_in_cycle < self.warmup_steps:
      return [(self.max_lr - base_lr)*self.step_in_cycle / self.warmup_steps + base_lr for base_lr in self.base_lrs]
    else:
      return [base_lr + (self.max_lr - base_lr) \
              * (1 + math.cos(math.pi * (self.step_in_cycle-self.warmup_steps) \
                              / (self.cur_cycle_steps - self.warmup_steps))) / 2
              for base_lr in self.base_lrs]

  def step(self, epoch=None):
    if epoch is None:
      epoch = self.last_epoch + 1
      self.step_in_cycle = self.step_in_cycle + 1
      if self.step_in_cycle >= self.cur_cycle_steps:
        self.cycle += 1
        self.step_in_cycle = self.step_in_cycle - self.cur_cycle_steps
        self.cur_cycle_steps = int((self.cur_cycle_steps - self.warmup_steps) * self.cycle_mult) + self.warmup_steps
      else:
        if epoch >= self.first_cycle_steps:
          if self.cycle_mult == 1.:
            self.step_in_cycle = epoch % self.first_cycle_steps
            self.cycle = epoch // self.first_cycle_steps
          else:
            n = int(math.log((epoch / self.first_cycle_steps * (self.cycle_mult - 1) + 1), self.cycle_mult))
            self.cycle = n
            self.step_in_cycle = epoch - int(self.first_cycle_steps * (self.cycle_mult ** n - 1) / (self.cycle_mult - 1))
            self.cur_cycle_steps = self.first_cycle_steps * self.cycle_mult ** (n)
        else:
          self.cur_cycle_steps = self.first_cycle_steps
          self.step_in_cycle = epoch
                
        self.max_lr = self.base_max_lr * (self.gamma**self.cycle)
        self.last_epoch = math.floor(epoch)
        for param_group, lr in zip(self.optimizer.param_groups, self.get_lr()):
            param_group['lr'] = lr


def get_scheduler(optimizer, args, total_batch_):
  if args.scheduler == "plateau":
      scheduler = ReduceLROnPlateau(
          optimizer, patience=2, factor=0.85, mode="max", verbose=True
      )
  elif args.scheduler == "linear":
      scheduler = get_linear_schedule_with_warmup(
          optimizer,
          # num_warmup_steps=int(total_batch_*args.epochs*0.1),
          num_warmup_steps=args.warmup_steps,
          num_training_steps=int(total_batch_*args.epochs),
      )
  elif args.scheduler == "cosine":
      scheduler = CosineAnnealingWarmupRestarts(  
          optimizer,
          first_cycle_steps=200,
          warmup_steps=args.warmup_steps,
          cycle_mult=args.cycle_mult,
          max_lr=args.lr,
          min_lr=args.lr * 0.01,
          gamma=0.9,
      )
  else:
    raise NotImplementedError('LR Scheduler not available')

  return scheduler

In [13]:
criterion = get_criterion(args)
config =  AutoConfig.from_pretrained("microsoft/graphcodebert-base")
config.num_labels = 2
model = RobertaForSequenceClassification.from_pretrained("microsoft/graphcodebert-base", config=config)
model.to(device)

best_val_acc_list = []
gap = int(len(dataset) / args.n_splits)

In [14]:
from torch.optim.lr_scheduler import StepLR
f = 1

print(f"---------------------------------- {f} fold----------------------------------")

os.makedirs(f'./models/{args.model_name}/{f}-fold', exist_ok=True)

total_size = len(dataset)
total_ids = list(range(total_size))
del_ids = list(range((f-1)*gap, f*gap))
training_ids = set(total_ids) - set(del_ids)

training_dset = dataset.select(list(training_ids))
eval_dset = dataset.select(del_ids)

collator = DataCollatorWithPadding(tokenizer=tokenizer)

trainloader = DataLoader(training_dset,
                          batch_size=16,
                          shuffle=True,
                          collate_fn = collator
                          )

validloader = DataLoader(eval_dset,
                          batch_size=16,
                          shuffle=False,
                          collate_fn = collator
                          )

total_batch_ = len(trainloader)
valid_batch_ = len(validloader)

optimizer = AdamW(model.parameters(), lr=args.lr, weight_decay=0.01)
scheduler = StepLR(optimizer, step_size=200, gamma=0.5)

---------------------------------- 1 fold----------------------------------




In [15]:
torch.cuda.is_available()

True

In [16]:
f = 1
e = 1

print(f"------------------------------ {f} fold {e} epoch------------------------------")

model.train()
epoch_perform, batch_perform = np.zeros(2), np.zeros(2)
print()	
progress_bar = tqdm(enumerate(trainloader), total=len(trainloader), leave=True, position=0,)
for j, v in progress_bar:
  input_ids, attention_mask, labels = v['input_ids'].to(device), v['attention_mask'].to(device), v['labels'].to(device)
  
  optimizer.zero_grad()
  outputs = model(input_ids, attention_mask) ## label을 안 넣어서 logits값만 출력
  output = outputs.logits # The outputs object is a SequenceClassifierOutput
  loss = criterion(output, labels)
  loss.backward()
  optimizer.step()
  scheduler.step()
  predict = output.argmax(dim=-1)
  predict = predict.detach().cpu().numpy()
  labels = labels.detach().cpu().numpy()
  acc = accuracy_score(labels, predict)

  batch_perform += np.array([loss.item(), acc])
  epoch_perform += np.array([loss.item(), acc])

  if (j + 1) % 50 == 0:
    print(
        f"Epoch {e} #{j + 1} -- loss: {batch_perform[0] / 50}, acc: {batch_perform[1] / 50}"
    )
    batch_perform = np.zeros(2)
print()
print(
    f"Epoch {e} loss: {epoch_perform[0] / total_batch_}, acc: {epoch_perform[1] / total_batch_}"
    )
torch.save(model.state_dict(), f"./models/{args.model_name}/{f}-fold/train.pt")

------------------------------ 1 fold 1 epoch------------------------------



  0%|          | 50/57292 [00:18<5:40:16,  2.80it/s]

Epoch 1 #50 -- loss: 0.7003777742385864, acc: 0.4975


  0%|          | 100/57292 [00:36<5:39:50,  2.80it/s]

Epoch 1 #100 -- loss: 0.6924503004550934, acc: 0.53625


  0%|          | 150/57292 [00:53<5:42:26,  2.78it/s]

Epoch 1 #150 -- loss: 0.6269937944412232, acc: 0.64625


  0%|          | 200/57292 [01:11<5:42:04,  2.78it/s]

Epoch 1 #200 -- loss: 0.4195492911338806, acc: 0.82125


  0%|          | 250/57292 [01:30<5:42:58,  2.77it/s]

Epoch 1 #250 -- loss: 0.30800932094454764, acc: 0.87


  1%|          | 300/57292 [01:48<5:43:49,  2.76it/s]

Epoch 1 #300 -- loss: 0.3301308085024357, acc: 0.85875


  1%|          | 350/57292 [02:06<5:44:28,  2.76it/s]

Epoch 1 #350 -- loss: 0.30002165466547015, acc: 0.8825


  1%|          | 400/57292 [02:24<5:44:50,  2.75it/s]

Epoch 1 #400 -- loss: 0.22285037919878958, acc: 0.91375


  1%|          | 450/57292 [02:42<5:44:05,  2.75it/s]

Epoch 1 #450 -- loss: 0.20415976010262965, acc: 0.925


  1%|          | 500/57292 [03:00<5:43:40,  2.75it/s]

Epoch 1 #500 -- loss: 0.2145673344284296, acc: 0.9175


  1%|          | 550/57292 [03:18<5:43:40,  2.75it/s]

Epoch 1 #550 -- loss: 0.19419529173523187, acc: 0.93


  1%|          | 600/57292 [03:36<5:43:47,  2.75it/s]

Epoch 1 #600 -- loss: 0.24331883825361728, acc: 0.9


  1%|          | 650/57292 [03:55<5:42:54,  2.75it/s]

Epoch 1 #650 -- loss: 0.19848810844123363, acc: 0.92625


  1%|          | 700/57292 [04:13<5:43:21,  2.75it/s]

Epoch 1 #700 -- loss: 0.20181290321052076, acc: 0.92625


  1%|▏         | 750/57292 [04:31<5:42:59,  2.75it/s]

Epoch 1 #750 -- loss: 0.18528881933540106, acc: 0.93125


  1%|▏         | 800/57292 [04:49<5:43:10,  2.74it/s]

Epoch 1 #800 -- loss: 0.21384169951081275, acc: 0.91625


  1%|▏         | 850/57292 [05:08<5:42:51,  2.74it/s]

Epoch 1 #850 -- loss: 0.19298603869974612, acc: 0.92875


  2%|▏         | 900/57292 [05:26<5:43:08,  2.74it/s]

Epoch 1 #900 -- loss: 0.18009661998599769, acc: 0.925


  2%|▏         | 950/57292 [05:44<5:42:40,  2.74it/s]

Epoch 1 #950 -- loss: 0.1784050240367651, acc: 0.93375


  2%|▏         | 1000/57292 [06:02<5:42:18,  2.74it/s]

Epoch 1 #1000 -- loss: 0.22505451645702124, acc: 0.91375


  2%|▏         | 1050/57292 [06:21<5:42:24,  2.74it/s]

Epoch 1 #1050 -- loss: 0.20812785286456348, acc: 0.9125


  2%|▏         | 1100/57292 [06:39<5:41:42,  2.74it/s]

Epoch 1 #1100 -- loss: 0.17106347776949404, acc: 0.93375


  2%|▏         | 1150/57292 [06:57<5:41:29,  2.74it/s]

Epoch 1 #1150 -- loss: 0.1985294869542122, acc: 0.93


  2%|▏         | 1200/57292 [07:15<5:41:13,  2.74it/s]

Epoch 1 #1200 -- loss: 0.2021388988941908, acc: 0.91625


  2%|▏         | 1250/57292 [07:34<5:41:01,  2.74it/s]

Epoch 1 #1250 -- loss: 0.20961575526744128, acc: 0.91375


  2%|▏         | 1300/57292 [07:52<5:41:12,  2.73it/s]

Epoch 1 #1300 -- loss: 0.2250447853654623, acc: 0.9125


  2%|▏         | 1350/57292 [08:10<5:40:47,  2.74it/s]

Epoch 1 #1350 -- loss: 0.19014738082885743, acc: 0.93125


  2%|▏         | 1400/57292 [08:28<5:40:34,  2.74it/s]

Epoch 1 #1400 -- loss: 0.17330869041383268, acc: 0.92625


  3%|▎         | 1450/57292 [08:47<5:40:13,  2.74it/s]

Epoch 1 #1450 -- loss: 0.16669756889343262, acc: 0.93875


  3%|▎         | 1500/57292 [09:05<5:39:28,  2.74it/s]

Epoch 1 #1500 -- loss: 0.20928422689437867, acc: 0.9175


  3%|▎         | 1550/57292 [09:23<5:39:16,  2.74it/s]

Epoch 1 #1550 -- loss: 0.1926231925934553, acc: 0.92375


  3%|▎         | 1600/57292 [09:41<5:39:29,  2.73it/s]

Epoch 1 #1600 -- loss: 0.16973799124360084, acc: 0.93875


  3%|▎         | 1650/57292 [10:00<5:39:17,  2.73it/s]

Epoch 1 #1650 -- loss: 0.22218938559293747, acc: 0.9125


  3%|▎         | 1700/57292 [10:18<5:38:35,  2.74it/s]

Epoch 1 #1700 -- loss: 0.19753103733062743, acc: 0.92375


  3%|▎         | 1750/57292 [10:36<5:38:13,  2.74it/s]

Epoch 1 #1750 -- loss: 0.1543306576088071, acc: 0.94625


  3%|▎         | 1800/57292 [10:55<5:38:19,  2.73it/s]

Epoch 1 #1800 -- loss: 0.1764044639095664, acc: 0.925


  3%|▎         | 1850/57292 [11:13<5:37:44,  2.74it/s]

Epoch 1 #1850 -- loss: 0.18615485448390245, acc: 0.9375


  3%|▎         | 1900/57292 [11:31<5:37:44,  2.73it/s]

Epoch 1 #1900 -- loss: 0.216841545291245, acc: 0.91375


  3%|▎         | 1950/57292 [11:49<5:37:09,  2.74it/s]

Epoch 1 #1950 -- loss: 0.1823667885363102, acc: 0.92625


  3%|▎         | 2000/57292 [12:08<5:37:06,  2.73it/s]

Epoch 1 #2000 -- loss: 0.20210072793066503, acc: 0.92


  4%|▎         | 2050/57292 [12:26<5:36:16,  2.74it/s]

Epoch 1 #2050 -- loss: 0.18881204761564732, acc: 0.9275


  4%|▎         | 2100/57292 [12:44<5:36:14,  2.74it/s]

Epoch 1 #2100 -- loss: 0.17547712035477162, acc: 0.92375


  4%|▍         | 2150/57292 [13:03<5:36:48,  2.73it/s]

Epoch 1 #2150 -- loss: 0.17688158877193927, acc: 0.93375


  4%|▍         | 2200/57292 [13:21<5:36:24,  2.73it/s]

Epoch 1 #2200 -- loss: 0.19858124807476998, acc: 0.91375


  4%|▍         | 2250/57292 [13:39<5:35:48,  2.73it/s]

Epoch 1 #2250 -- loss: 0.15244340777397156, acc: 0.94125


  4%|▍         | 2300/57292 [13:57<5:35:40,  2.73it/s]

Epoch 1 #2300 -- loss: 0.19557176381349564, acc: 0.92


  4%|▍         | 2350/57292 [14:16<5:35:05,  2.73it/s]

Epoch 1 #2350 -- loss: 0.18894727841019632, acc: 0.92125


  4%|▍         | 2400/57292 [14:34<5:34:54,  2.73it/s]

Epoch 1 #2400 -- loss: 0.18313471272587775, acc: 0.93125


  4%|▍         | 2450/57292 [14:52<5:34:44,  2.73it/s]

Epoch 1 #2450 -- loss: 0.1799726890772581, acc: 0.9225


  4%|▍         | 2500/57292 [15:11<5:33:48,  2.74it/s]

Epoch 1 #2500 -- loss: 0.17385078359395265, acc: 0.9275


  4%|▍         | 2550/57292 [15:29<5:33:50,  2.73it/s]

Epoch 1 #2550 -- loss: 0.18956273652613162, acc: 0.9375


  5%|▍         | 2600/57292 [15:47<5:33:15,  2.74it/s]

Epoch 1 #2600 -- loss: 0.20163699842989444, acc: 0.92375


  5%|▍         | 2650/57292 [16:05<5:33:37,  2.73it/s]

Epoch 1 #2650 -- loss: 0.1668065505102277, acc: 0.94625


  5%|▍         | 2700/57292 [16:24<5:32:50,  2.73it/s]

Epoch 1 #2700 -- loss: 0.17124563824385405, acc: 0.93375


  5%|▍         | 2750/57292 [16:42<5:32:55,  2.73it/s]

Epoch 1 #2750 -- loss: 0.17339814383536578, acc: 0.94125


  5%|▍         | 2800/57292 [17:00<5:32:20,  2.73it/s]

Epoch 1 #2800 -- loss: 0.18009927719831467, acc: 0.94


  5%|▍         | 2850/57292 [17:19<5:32:23,  2.73it/s]

Epoch 1 #2850 -- loss: 0.15309026524424552, acc: 0.93375


  5%|▌         | 2900/57292 [17:37<5:31:37,  2.73it/s]

Epoch 1 #2900 -- loss: 0.1841368593275547, acc: 0.9325


  5%|▌         | 2950/57292 [17:55<5:31:10,  2.73it/s]

Epoch 1 #2950 -- loss: 0.17292645774781704, acc: 0.935


  5%|▌         | 3000/57292 [18:14<5:31:09,  2.73it/s]

Epoch 1 #3000 -- loss: 0.18543720103800296, acc: 0.93


  5%|▌         | 3050/57292 [18:32<5:30:56,  2.73it/s]

Epoch 1 #3050 -- loss: 0.18201141618192196, acc: 0.92


  5%|▌         | 3100/57292 [18:50<5:30:29,  2.73it/s]

Epoch 1 #3100 -- loss: 0.2043162804096937, acc: 0.92375


  5%|▌         | 3150/57292 [19:08<5:29:46,  2.74it/s]

Epoch 1 #3150 -- loss: 0.1861595130339265, acc: 0.9225


  6%|▌         | 3200/57292 [19:27<5:29:27,  2.74it/s]

Epoch 1 #3200 -- loss: 0.17934727478772403, acc: 0.93125


  6%|▌         | 3250/57292 [19:45<5:28:56,  2.74it/s]

Epoch 1 #3250 -- loss: 0.18560319617390633, acc: 0.92625


  6%|▌         | 3300/57292 [20:03<5:28:58,  2.74it/s]

Epoch 1 #3300 -- loss: 0.2067783395946026, acc: 0.925


  6%|▌         | 3350/57292 [20:22<5:28:41,  2.74it/s]

Epoch 1 #3350 -- loss: 0.18700018923729658, acc: 0.92375


  6%|▌         | 3400/57292 [20:40<5:29:12,  2.73it/s]

Epoch 1 #3400 -- loss: 0.20268157631158829, acc: 0.925


  6%|▌         | 3450/57292 [20:58<5:28:07,  2.73it/s]

Epoch 1 #3450 -- loss: 0.17926213420927525, acc: 0.93375


  6%|▌         | 3500/57292 [21:16<5:28:08,  2.73it/s]

Epoch 1 #3500 -- loss: 0.18134622659534216, acc: 0.935


  6%|▌         | 3550/57292 [21:35<5:27:27,  2.74it/s]

Epoch 1 #3550 -- loss: 0.1653490687534213, acc: 0.935


  6%|▋         | 3600/57292 [21:53<5:27:25,  2.73it/s]

Epoch 1 #3600 -- loss: 0.2245097167044878, acc: 0.9075


  6%|▋         | 3650/57292 [22:11<5:26:45,  2.74it/s]

Epoch 1 #3650 -- loss: 0.19273767575621606, acc: 0.92625


  6%|▋         | 3700/57292 [22:30<5:26:42,  2.73it/s]

Epoch 1 #3700 -- loss: 0.1737824547290802, acc: 0.93625


  7%|▋         | 3750/57292 [22:48<5:26:20,  2.73it/s]

Epoch 1 #3750 -- loss: 0.21624593377113344, acc: 0.9125


  7%|▋         | 3800/57292 [23:06<5:25:54,  2.74it/s]

Epoch 1 #3800 -- loss: 0.22453344643115997, acc: 0.9025


  7%|▋         | 3850/57292 [23:24<5:26:12,  2.73it/s]

Epoch 1 #3850 -- loss: 0.18562189511954785, acc: 0.93875


  7%|▋         | 3900/57292 [23:43<5:25:32,  2.73it/s]

Epoch 1 #3900 -- loss: 0.16054918989539146, acc: 0.94125


  7%|▋         | 3950/57292 [24:01<5:25:12,  2.73it/s]

Epoch 1 #3950 -- loss: 0.18765939734876155, acc: 0.92375


  7%|▋         | 4000/57292 [24:19<5:24:53,  2.73it/s]

Epoch 1 #4000 -- loss: 0.17774392001330852, acc: 0.94375


  7%|▋         | 4050/57292 [24:38<5:24:29,  2.73it/s]

Epoch 1 #4050 -- loss: 0.17107047408819198, acc: 0.9325


  7%|▋         | 4100/57292 [24:56<5:24:06,  2.74it/s]

Epoch 1 #4100 -- loss: 0.180301171541214, acc: 0.925


  7%|▋         | 4150/57292 [25:14<5:23:56,  2.73it/s]

Epoch 1 #4150 -- loss: 0.1542711950838566, acc: 0.94375


  7%|▋         | 4200/57292 [25:32<5:23:24,  2.74it/s]

Epoch 1 #4200 -- loss: 0.20103191513568164, acc: 0.92125


  7%|▋         | 4250/57292 [25:51<5:23:31,  2.73it/s]

Epoch 1 #4250 -- loss: 0.21923979088664056, acc: 0.92


  8%|▊         | 4300/57292 [26:09<5:23:09,  2.73it/s]

Epoch 1 #4300 -- loss: 0.18963747426867486, acc: 0.935


  8%|▊         | 4350/57292 [26:27<5:22:39,  2.73it/s]

Epoch 1 #4350 -- loss: 0.1808674143254757, acc: 0.92625


  8%|▊         | 4400/57292 [26:46<5:21:49,  2.74it/s]

Epoch 1 #4400 -- loss: 0.17505867633968591, acc: 0.92125


  8%|▊         | 4450/57292 [27:04<5:22:06,  2.73it/s]

Epoch 1 #4450 -- loss: 0.21236057639122008, acc: 0.9025


  8%|▊         | 4500/57292 [27:22<5:22:11,  2.73it/s]

Epoch 1 #4500 -- loss: 0.18565069392323494, acc: 0.935


  8%|▊         | 4550/57292 [27:40<5:21:11,  2.74it/s]

Epoch 1 #4550 -- loss: 0.1948115125671029, acc: 0.92


  8%|▊         | 4600/57292 [27:59<5:21:00,  2.74it/s]

Epoch 1 #4600 -- loss: 0.2168584678322077, acc: 0.9125


  8%|▊         | 4650/57292 [28:17<5:21:46,  2.73it/s]

Epoch 1 #4650 -- loss: 0.18336406722664833, acc: 0.9225


  8%|▊         | 4700/57292 [28:35<5:20:07,  2.74it/s]

Epoch 1 #4700 -- loss: 0.1877829037606716, acc: 0.9225


  8%|▊         | 4750/57292 [28:54<5:20:29,  2.73it/s]

Epoch 1 #4750 -- loss: 0.1626604068093002, acc: 0.93625


  8%|▊         | 4800/57292 [29:12<5:20:27,  2.73it/s]

Epoch 1 #4800 -- loss: 0.2511793278157711, acc: 0.8975


  8%|▊         | 4850/57292 [29:30<5:19:39,  2.73it/s]

Epoch 1 #4850 -- loss: 0.17302931115031242, acc: 0.93875


  9%|▊         | 4900/57292 [29:49<5:19:50,  2.73it/s]

Epoch 1 #4900 -- loss: 0.18827276822179556, acc: 0.9275


  9%|▊         | 4950/57292 [30:07<5:19:23,  2.73it/s]

Epoch 1 #4950 -- loss: 0.1802759151905775, acc: 0.935


  9%|▊         | 5000/57292 [30:25<5:18:36,  2.74it/s]

Epoch 1 #5000 -- loss: 0.21974024161696434, acc: 0.91125


  9%|▉         | 5050/57292 [30:43<5:18:27,  2.73it/s]

Epoch 1 #5050 -- loss: 0.1996795864403248, acc: 0.93375


  9%|▉         | 5100/57292 [31:02<5:18:26,  2.73it/s]

Epoch 1 #5100 -- loss: 0.1996856413781643, acc: 0.9275


  9%|▉         | 5150/57292 [31:20<5:18:00,  2.73it/s]

Epoch 1 #5150 -- loss: 0.18019231617450715, acc: 0.92625


  9%|▉         | 5200/57292 [31:38<5:18:01,  2.73it/s]

Epoch 1 #5200 -- loss: 0.1847270303592086, acc: 0.93125


  9%|▉         | 5250/57292 [31:57<5:17:51,  2.73it/s]

Epoch 1 #5250 -- loss: 0.19167393803596497, acc: 0.925


  9%|▉         | 5300/57292 [32:15<5:17:09,  2.73it/s]

Epoch 1 #5300 -- loss: 0.2080316276475787, acc: 0.9175


  9%|▉         | 5350/57292 [32:33<5:16:41,  2.73it/s]

Epoch 1 #5350 -- loss: 0.19145082626491786, acc: 0.92875


  9%|▉         | 5400/57292 [32:51<5:16:14,  2.73it/s]

Epoch 1 #5400 -- loss: 0.15443080727010966, acc: 0.94375


 10%|▉         | 5450/57292 [33:10<5:16:43,  2.73it/s]

Epoch 1 #5450 -- loss: 0.17213069714605808, acc: 0.94125


 10%|▉         | 5500/57292 [33:28<5:16:28,  2.73it/s]

Epoch 1 #5500 -- loss: 0.15662774696946144, acc: 0.935


 10%|▉         | 5550/57292 [33:46<5:15:24,  2.73it/s]

Epoch 1 #5550 -- loss: 0.1973504462093115, acc: 0.9225


 10%|▉         | 5600/57292 [34:05<5:15:08,  2.73it/s]

Epoch 1 #5600 -- loss: 0.15909694328904153, acc: 0.935


 10%|▉         | 5650/57292 [34:23<5:15:29,  2.73it/s]

Epoch 1 #5650 -- loss: 0.19618325877934695, acc: 0.9275


 10%|▉         | 5700/57292 [34:41<5:15:04,  2.73it/s]

Epoch 1 #5700 -- loss: 0.2058339486271143, acc: 0.92


 10%|█         | 5750/57292 [35:00<5:14:43,  2.73it/s]

Epoch 1 #5750 -- loss: 0.21308739122003317, acc: 0.91125


 10%|█         | 5800/57292 [35:18<5:13:50,  2.73it/s]

Epoch 1 #5800 -- loss: 0.2072967265546322, acc: 0.91875


 10%|█         | 5850/57292 [35:36<5:14:44,  2.72it/s]

Epoch 1 #5850 -- loss: 0.19975218303501607, acc: 0.9225


 10%|█         | 5900/57292 [35:54<5:13:47,  2.73it/s]

Epoch 1 #5900 -- loss: 0.17962337810546158, acc: 0.93375


 10%|█         | 5950/57292 [36:13<5:13:21,  2.73it/s]

Epoch 1 #5950 -- loss: 0.18307493016123771, acc: 0.93125


 10%|█         | 6000/57292 [36:31<5:13:41,  2.73it/s]

Epoch 1 #6000 -- loss: 0.18313279416412115, acc: 0.93625


 11%|█         | 6050/57292 [36:49<5:13:11,  2.73it/s]

Epoch 1 #6050 -- loss: 0.17870107177644967, acc: 0.93


 11%|█         | 6100/57292 [37:08<5:12:32,  2.73it/s]

Epoch 1 #6100 -- loss: 0.18953684452921152, acc: 0.92375


 11%|█         | 6150/57292 [37:26<5:12:19,  2.73it/s]

Epoch 1 #6150 -- loss: 0.19369390197098255, acc: 0.92375


 11%|█         | 6200/57292 [37:44<5:11:45,  2.73it/s]

Epoch 1 #6200 -- loss: 0.17635438296943903, acc: 0.92875


 11%|█         | 6250/57292 [38:03<5:10:58,  2.74it/s]

Epoch 1 #6250 -- loss: 0.20130745209753514, acc: 0.92


 11%|█         | 6300/57292 [38:21<5:11:07,  2.73it/s]

Epoch 1 #6300 -- loss: 0.18093709874898195, acc: 0.92375


 11%|█         | 6350/57292 [38:39<5:11:14,  2.73it/s]

Epoch 1 #6350 -- loss: 0.17656877797096968, acc: 0.935


 11%|█         | 6400/57292 [38:58<5:10:02,  2.74it/s]

Epoch 1 #6400 -- loss: 0.1618574323132634, acc: 0.93875


 11%|█▏        | 6450/57292 [39:16<5:10:01,  2.73it/s]

Epoch 1 #6450 -- loss: 0.1825945521146059, acc: 0.92875


 11%|█▏        | 6500/57292 [39:34<5:09:22,  2.74it/s]

Epoch 1 #6500 -- loss: 0.17287477649748326, acc: 0.9325


 11%|█▏        | 6550/57292 [39:53<5:09:42,  2.73it/s]

Epoch 1 #6550 -- loss: 0.20430655792355537, acc: 0.91125


 12%|█▏        | 6600/57292 [40:11<5:08:50,  2.74it/s]

Epoch 1 #6600 -- loss: 0.1885421358793974, acc: 0.9275


 12%|█▏        | 6650/57292 [40:29<5:08:53,  2.73it/s]

Epoch 1 #6650 -- loss: 0.16108036831021308, acc: 0.9475


 12%|█▏        | 6700/57292 [40:47<5:08:01,  2.74it/s]

Epoch 1 #6700 -- loss: 0.21026648312807084, acc: 0.92375


 12%|█▏        | 6750/57292 [41:06<5:08:49,  2.73it/s]

Epoch 1 #6750 -- loss: 0.14223691504448652, acc: 0.94875


 12%|█▏        | 6800/57292 [41:24<5:08:03,  2.73it/s]

Epoch 1 #6800 -- loss: 0.17851904965937138, acc: 0.93375


 12%|█▏        | 6850/57292 [41:42<5:07:42,  2.73it/s]

Epoch 1 #6850 -- loss: 0.17549221571534873, acc: 0.925


 12%|█▏        | 6900/57292 [42:01<5:07:06,  2.73it/s]

Epoch 1 #6900 -- loss: 0.16688159834593536, acc: 0.92625


 12%|█▏        | 6950/57292 [42:19<5:06:47,  2.73it/s]

Epoch 1 #6950 -- loss: 0.17687431287020444, acc: 0.9375


 12%|█▏        | 7000/57292 [42:37<5:06:15,  2.74it/s]

Epoch 1 #7000 -- loss: 0.1870381835848093, acc: 0.93


 12%|█▏        | 7050/57292 [42:55<5:06:26,  2.73it/s]

Epoch 1 #7050 -- loss: 0.19503101784735918, acc: 0.925


 12%|█▏        | 7100/57292 [43:14<5:05:34,  2.74it/s]

Epoch 1 #7100 -- loss: 0.1861802126467228, acc: 0.9325


 12%|█▏        | 7150/57292 [43:32<5:05:44,  2.73it/s]

Epoch 1 #7150 -- loss: 0.1963225954025984, acc: 0.92125


 13%|█▎        | 7200/57292 [43:50<5:05:34,  2.73it/s]

Epoch 1 #7200 -- loss: 0.18796640232205392, acc: 0.9275


 13%|█▎        | 7250/57292 [44:09<5:04:50,  2.74it/s]

Epoch 1 #7250 -- loss: 0.18673690762370826, acc: 0.92875


 13%|█▎        | 7300/57292 [44:27<5:04:43,  2.73it/s]

Epoch 1 #7300 -- loss: 0.20144677706062794, acc: 0.91875


 13%|█▎        | 7350/57292 [44:45<5:04:36,  2.73it/s]

Epoch 1 #7350 -- loss: 0.1537449738010764, acc: 0.94375


 13%|█▎        | 7400/57292 [45:03<5:04:10,  2.73it/s]

Epoch 1 #7400 -- loss: 0.2049794241040945, acc: 0.915


 13%|█▎        | 7450/57292 [45:22<5:03:51,  2.73it/s]

Epoch 1 #7450 -- loss: 0.1938347679749131, acc: 0.925


 13%|█▎        | 7500/57292 [45:40<5:03:23,  2.74it/s]

Epoch 1 #7500 -- loss: 0.18631877172738315, acc: 0.92875


 13%|█▎        | 7550/57292 [45:58<5:02:23,  2.74it/s]

Epoch 1 #7550 -- loss: 0.20456772185862065, acc: 0.915


 13%|█▎        | 7600/57292 [46:16<5:02:57,  2.73it/s]

Epoch 1 #7600 -- loss: 0.1823919116705656, acc: 0.935


 13%|█▎        | 7650/57292 [46:35<5:02:47,  2.73it/s]

Epoch 1 #7650 -- loss: 0.17319821190088988, acc: 0.93625


 13%|█▎        | 7700/57292 [46:53<5:02:08,  2.74it/s]

Epoch 1 #7700 -- loss: 0.23098346933722497, acc: 0.9175


 14%|█▎        | 7750/57292 [47:11<5:01:29,  2.74it/s]

Epoch 1 #7750 -- loss: 0.18602771930396556, acc: 0.9225


 14%|█▎        | 7800/57292 [47:30<5:01:09,  2.74it/s]

Epoch 1 #7800 -- loss: 0.18197546839714052, acc: 0.93


 14%|█▎        | 7850/57292 [47:48<5:01:20,  2.73it/s]

Epoch 1 #7850 -- loss: 0.21544381529092788, acc: 0.915


 14%|█▍        | 7900/57292 [48:06<5:01:10,  2.73it/s]

Epoch 1 #7900 -- loss: 0.16322142113000154, acc: 0.935


 14%|█▍        | 7950/57292 [48:24<5:00:34,  2.74it/s]

Epoch 1 #7950 -- loss: 0.16546178236603737, acc: 0.9375


 14%|█▍        | 8000/57292 [48:43<5:00:36,  2.73it/s]

Epoch 1 #8000 -- loss: 0.18241714216768742, acc: 0.92875


 14%|█▍        | 8050/57292 [49:01<5:00:26,  2.73it/s]

Epoch 1 #8050 -- loss: 0.17939143538475036, acc: 0.9275


 14%|█▍        | 8100/57292 [49:19<4:59:59,  2.73it/s]

Epoch 1 #8100 -- loss: 0.19877280458807944, acc: 0.92875


 14%|█▍        | 8150/57292 [49:38<4:59:48,  2.73it/s]

Epoch 1 #8150 -- loss: 0.19241019655019045, acc: 0.93375


 14%|█▍        | 8200/57292 [49:56<4:59:06,  2.74it/s]

Epoch 1 #8200 -- loss: 0.2132522138953209, acc: 0.9125


 14%|█▍        | 8250/57292 [50:14<4:58:52,  2.73it/s]

Epoch 1 #8250 -- loss: 0.23237568274140358, acc: 0.90625


 14%|█▍        | 8300/57292 [50:32<4:58:52,  2.73it/s]

Epoch 1 #8300 -- loss: 0.20187040504068135, acc: 0.92125


 15%|█▍        | 8350/57292 [50:51<4:58:56,  2.73it/s]

Epoch 1 #8350 -- loss: 0.18167885582894086, acc: 0.93125


 15%|█▍        | 8400/57292 [51:09<4:58:27,  2.73it/s]

Epoch 1 #8400 -- loss: 0.19283217508345843, acc: 0.91875


 15%|█▍        | 8450/57292 [51:27<4:58:23,  2.73it/s]

Epoch 1 #8450 -- loss: 0.18192119181156158, acc: 0.9275


 15%|█▍        | 8500/57292 [51:46<4:57:49,  2.73it/s]

Epoch 1 #8500 -- loss: 0.17691324956715107, acc: 0.93875


 15%|█▍        | 8550/57292 [52:04<4:57:39,  2.73it/s]

Epoch 1 #8550 -- loss: 0.1758886842802167, acc: 0.93


 15%|█▌        | 8600/57292 [52:22<4:56:48,  2.73it/s]

Epoch 1 #8600 -- loss: 0.18733459398150443, acc: 0.92


 15%|█▌        | 8650/57292 [52:41<4:57:28,  2.73it/s]

Epoch 1 #8650 -- loss: 0.18020940147340297, acc: 0.9325


 15%|█▌        | 8700/57292 [52:59<4:56:24,  2.73it/s]

Epoch 1 #8700 -- loss: 0.19168118160218, acc: 0.9275


 15%|█▌        | 8750/57292 [53:17<4:56:21,  2.73it/s]

Epoch 1 #8750 -- loss: 0.16431906297802926, acc: 0.94375


 15%|█▌        | 8800/57292 [53:35<4:55:40,  2.73it/s]

Epoch 1 #8800 -- loss: 0.1655087712407112, acc: 0.9325


 15%|█▌        | 8850/57292 [53:54<4:55:09,  2.74it/s]

Epoch 1 #8850 -- loss: 0.18893350906670092, acc: 0.92625


 16%|█▌        | 8900/57292 [54:12<4:55:22,  2.73it/s]

Epoch 1 #8900 -- loss: 0.16708932120352984, acc: 0.92625


 16%|█▌        | 8950/57292 [54:30<4:55:10,  2.73it/s]

Epoch 1 #8950 -- loss: 0.1792132543027401, acc: 0.92125


 16%|█▌        | 9000/57292 [54:49<4:54:21,  2.73it/s]

Epoch 1 #9000 -- loss: 0.20023041624575855, acc: 0.915


 16%|█▌        | 9050/57292 [55:07<4:54:17,  2.73it/s]

Epoch 1 #9050 -- loss: 0.1803526196628809, acc: 0.925


 16%|█▌        | 9100/57292 [55:25<4:53:40,  2.74it/s]

Epoch 1 #9100 -- loss: 0.20163463167846202, acc: 0.92625


 16%|█▌        | 9150/57292 [55:43<4:54:13,  2.73it/s]

Epoch 1 #9150 -- loss: 0.18510302618145943, acc: 0.93375


 16%|█▌        | 9200/57292 [56:02<4:53:19,  2.73it/s]

Epoch 1 #9200 -- loss: 0.16762526389211416, acc: 0.94375


 16%|█▌        | 9250/57292 [56:20<4:53:21,  2.73it/s]

Epoch 1 #9250 -- loss: 0.1815593622252345, acc: 0.92625


 16%|█▌        | 9300/57292 [56:38<4:52:10,  2.74it/s]

Epoch 1 #9300 -- loss: 0.19526593830436467, acc: 0.9225


 16%|█▋        | 9350/57292 [56:57<4:51:39,  2.74it/s]

Epoch 1 #9350 -- loss: 0.22493887990713118, acc: 0.91875


 16%|█▋        | 9400/57292 [57:15<4:51:39,  2.74it/s]

Epoch 1 #9400 -- loss: 0.19726622357964516, acc: 0.92625


 16%|█▋        | 9450/57292 [57:33<4:51:21,  2.74it/s]

Epoch 1 #9450 -- loss: 0.18501684125512838, acc: 0.92125


 17%|█▋        | 9500/57292 [57:51<4:51:09,  2.74it/s]

Epoch 1 #9500 -- loss: 0.18898753948509694, acc: 0.93625


 17%|█▋        | 9550/57292 [58:10<4:51:15,  2.73it/s]

Epoch 1 #9550 -- loss: 0.17776010006666185, acc: 0.93875


 17%|█▋        | 9600/57292 [58:28<4:50:27,  2.74it/s]

Epoch 1 #9600 -- loss: 0.20489519651979207, acc: 0.91


 17%|█▋        | 9650/57292 [58:46<4:50:09,  2.74it/s]

Epoch 1 #9650 -- loss: 0.1901144677400589, acc: 0.93


 17%|█▋        | 9700/57292 [59:05<4:49:48,  2.74it/s]

Epoch 1 #9700 -- loss: 0.17136526498943566, acc: 0.9375


 17%|█▋        | 9750/57292 [59:23<4:49:35,  2.74it/s]

Epoch 1 #9750 -- loss: 0.1732952093333006, acc: 0.93125


 17%|█▋        | 9800/57292 [59:41<4:48:58,  2.74it/s]

Epoch 1 #9800 -- loss: 0.16870561309158802, acc: 0.9325


 17%|█▋        | 9850/57292 [59:59<4:49:20,  2.73it/s]

Epoch 1 #9850 -- loss: 0.1743403860926628, acc: 0.93125


 17%|█▋        | 9900/57292 [1:00:18<4:48:15,  2.74it/s]

Epoch 1 #9900 -- loss: 0.19498536571860314, acc: 0.91625


 17%|█▋        | 9950/57292 [1:00:36<4:48:33,  2.73it/s]

Epoch 1 #9950 -- loss: 0.1679042860493064, acc: 0.93875


 17%|█▋        | 10000/57292 [1:00:54<4:47:47,  2.74it/s]

Epoch 1 #10000 -- loss: 0.1931044378131628, acc: 0.92875


 18%|█▊        | 10050/57292 [1:01:12<4:47:23,  2.74it/s]

Epoch 1 #10050 -- loss: 0.18931709855794907, acc: 0.91875


 18%|█▊        | 10100/57292 [1:01:31<4:47:25,  2.74it/s]

Epoch 1 #10100 -- loss: 0.18928086891770363, acc: 0.9175


 18%|█▊        | 10150/57292 [1:01:49<4:47:50,  2.73it/s]

Epoch 1 #10150 -- loss: 0.20640594601631165, acc: 0.92125


 18%|█▊        | 10200/57292 [1:02:07<4:46:54,  2.74it/s]

Epoch 1 #10200 -- loss: 0.1671110088378191, acc: 0.9375


 18%|█▊        | 10250/57292 [1:02:26<4:47:07,  2.73it/s]

Epoch 1 #10250 -- loss: 0.16005077531561254, acc: 0.93625


 18%|█▊        | 10300/57292 [1:02:44<4:46:58,  2.73it/s]

Epoch 1 #10300 -- loss: 0.18202390715479852, acc: 0.925


 18%|█▊        | 10350/57292 [1:03:02<4:45:46,  2.74it/s]

Epoch 1 #10350 -- loss: 0.20098179310560227, acc: 0.92625


 18%|█▊        | 10400/57292 [1:03:20<4:46:00,  2.73it/s]

Epoch 1 #10400 -- loss: 0.18424935169517995, acc: 0.93125


 18%|█▊        | 10450/57292 [1:03:39<4:45:48,  2.73it/s]

Epoch 1 #10450 -- loss: 0.1831528329476714, acc: 0.9275


 18%|█▊        | 10500/57292 [1:03:57<4:44:53,  2.74it/s]

Epoch 1 #10500 -- loss: 0.19348118860274555, acc: 0.92625


 18%|█▊        | 10550/57292 [1:04:15<4:44:30,  2.74it/s]

Epoch 1 #10550 -- loss: 0.19038808174431324, acc: 0.92625


 19%|█▊        | 10600/57292 [1:04:34<4:45:17,  2.73it/s]

Epoch 1 #10600 -- loss: 0.1521913878619671, acc: 0.94125


 19%|█▊        | 10650/57292 [1:04:52<4:44:53,  2.73it/s]

Epoch 1 #10650 -- loss: 0.20094013132154942, acc: 0.92


 19%|█▊        | 10700/57292 [1:05:10<4:44:50,  2.73it/s]

Epoch 1 #10700 -- loss: 0.22407738581299783, acc: 0.91


 19%|█▉        | 10750/57292 [1:05:29<4:43:41,  2.73it/s]

Epoch 1 #10750 -- loss: 0.1823707127571106, acc: 0.9225


 19%|█▉        | 10800/57292 [1:05:47<4:44:02,  2.73it/s]

Epoch 1 #10800 -- loss: 0.2216666976362467, acc: 0.9225


 19%|█▉        | 10850/57292 [1:06:05<4:44:06,  2.72it/s]

Epoch 1 #10850 -- loss: 0.18819150157272815, acc: 0.92375


 19%|█▉        | 10900/57292 [1:06:24<4:43:14,  2.73it/s]

Epoch 1 #10900 -- loss: 0.1837086056917906, acc: 0.93125


 19%|█▉        | 10950/57292 [1:06:42<4:42:40,  2.73it/s]

Epoch 1 #10950 -- loss: 0.16653096996247768, acc: 0.93625


 19%|█▉        | 11000/57292 [1:07:00<4:42:29,  2.73it/s]

Epoch 1 #11000 -- loss: 0.18345936495810747, acc: 0.92375


 19%|█▉        | 11050/57292 [1:07:18<4:42:17,  2.73it/s]

Epoch 1 #11050 -- loss: 0.18545860156416893, acc: 0.9275


 19%|█▉        | 11100/57292 [1:07:37<4:42:11,  2.73it/s]

Epoch 1 #11100 -- loss: 0.17719298649579288, acc: 0.92625


 19%|█▉        | 11150/57292 [1:07:55<4:40:50,  2.74it/s]

Epoch 1 #11150 -- loss: 0.18915630914270878, acc: 0.92875


 20%|█▉        | 11200/57292 [1:08:13<4:41:03,  2.73it/s]

Epoch 1 #11200 -- loss: 0.18862302727997304, acc: 0.93


 20%|█▉        | 11250/57292 [1:08:32<4:41:24,  2.73it/s]

Epoch 1 #11250 -- loss: 0.2049124875664711, acc: 0.9275


 20%|█▉        | 11300/57292 [1:08:50<4:40:19,  2.73it/s]

Epoch 1 #11300 -- loss: 0.21046467714011668, acc: 0.925


 20%|█▉        | 11350/57292 [1:09:08<4:40:08,  2.73it/s]

Epoch 1 #11350 -- loss: 0.19512553710490466, acc: 0.92125


 20%|█▉        | 11400/57292 [1:09:27<4:39:52,  2.73it/s]

Epoch 1 #11400 -- loss: 0.17506917983293532, acc: 0.9325


 20%|█▉        | 11450/57292 [1:09:45<4:38:58,  2.74it/s]

Epoch 1 #11450 -- loss: 0.20681206688284873, acc: 0.9225


 20%|██        | 11500/57292 [1:10:03<4:39:09,  2.73it/s]

Epoch 1 #11500 -- loss: 0.212859855145216, acc: 0.92375


 20%|██        | 11550/57292 [1:10:21<4:38:44,  2.74it/s]

Epoch 1 #11550 -- loss: 0.1785413508117199, acc: 0.92875


 20%|██        | 11600/57292 [1:10:40<4:38:08,  2.74it/s]

Epoch 1 #11600 -- loss: 0.18557209581136702, acc: 0.925


 20%|██        | 11650/57292 [1:10:58<4:37:26,  2.74it/s]

Epoch 1 #11650 -- loss: 0.18363958768546582, acc: 0.9175


 20%|██        | 11700/57292 [1:11:16<4:38:10,  2.73it/s]

Epoch 1 #11700 -- loss: 0.2030733772367239, acc: 0.91625


 21%|██        | 11750/57292 [1:11:34<4:37:09,  2.74it/s]

Epoch 1 #11750 -- loss: 0.21342952087521552, acc: 0.915


 21%|██        | 11800/57292 [1:11:53<4:36:49,  2.74it/s]

Epoch 1 #11800 -- loss: 0.16694813959300517, acc: 0.94125


 21%|██        | 11850/57292 [1:12:11<4:36:27,  2.74it/s]

Epoch 1 #11850 -- loss: 0.1835482122004032, acc: 0.93125


 21%|██        | 11900/57292 [1:12:29<4:36:18,  2.74it/s]

Epoch 1 #11900 -- loss: 0.19821986861526966, acc: 0.905


 21%|██        | 11950/57292 [1:12:47<4:36:11,  2.74it/s]

Epoch 1 #11950 -- loss: 0.2211827130615711, acc: 0.90875


 21%|██        | 12000/57292 [1:13:06<4:36:05,  2.73it/s]

Epoch 1 #12000 -- loss: 0.17744563333690166, acc: 0.935


 21%|██        | 12050/57292 [1:13:24<4:35:23,  2.74it/s]

Epoch 1 #12050 -- loss: 0.23280643220990896, acc: 0.905


 21%|██        | 12100/57292 [1:13:42<4:34:51,  2.74it/s]

Epoch 1 #12100 -- loss: 0.16588044956326484, acc: 0.93875


 21%|██        | 12150/57292 [1:14:01<4:34:49,  2.74it/s]

Epoch 1 #12150 -- loss: 0.1935301997512579, acc: 0.91875


 21%|██▏       | 12200/57292 [1:14:19<4:34:22,  2.74it/s]

Epoch 1 #12200 -- loss: 0.16212984800338745, acc: 0.93375


 21%|██▏       | 12250/57292 [1:14:37<4:34:14,  2.74it/s]

Epoch 1 #12250 -- loss: 0.2050205408781767, acc: 0.92


 21%|██▏       | 12300/57292 [1:14:55<4:33:50,  2.74it/s]

Epoch 1 #12300 -- loss: 0.16128157816827296, acc: 0.9325


 22%|██▏       | 12350/57292 [1:15:14<4:33:45,  2.74it/s]

Epoch 1 #12350 -- loss: 0.19662919707596302, acc: 0.92125


 22%|██▏       | 12400/57292 [1:15:32<4:33:28,  2.74it/s]

Epoch 1 #12400 -- loss: 0.1842714747786522, acc: 0.9175


 22%|██▏       | 12450/57292 [1:15:50<4:33:08,  2.74it/s]

Epoch 1 #12450 -- loss: 0.16155113466084003, acc: 0.9375


 22%|██▏       | 12500/57292 [1:16:08<4:32:49,  2.74it/s]

Epoch 1 #12500 -- loss: 0.18864455308765174, acc: 0.91875


 22%|██▏       | 12550/57292 [1:16:27<4:32:42,  2.73it/s]

Epoch 1 #12550 -- loss: 0.19241170931607485, acc: 0.925


 22%|██▏       | 12600/57292 [1:16:45<4:32:00,  2.74it/s]

Epoch 1 #12600 -- loss: 0.2022173384949565, acc: 0.91875


 22%|██▏       | 12650/57292 [1:17:03<4:31:55,  2.74it/s]

Epoch 1 #12650 -- loss: 0.16117893487215043, acc: 0.93875


 22%|██▏       | 12700/57292 [1:17:21<4:31:46,  2.73it/s]

Epoch 1 #12700 -- loss: 0.18974237833172083, acc: 0.92875


 22%|██▏       | 12750/57292 [1:17:40<4:31:33,  2.73it/s]

Epoch 1 #12750 -- loss: 0.17128879122436047, acc: 0.93


 22%|██▏       | 12800/57292 [1:17:58<4:30:49,  2.74it/s]

Epoch 1 #12800 -- loss: 0.16199217267334462, acc: 0.93625


 22%|██▏       | 12850/57292 [1:18:16<4:30:28,  2.74it/s]

Epoch 1 #12850 -- loss: 0.1525236852839589, acc: 0.9425


 23%|██▎       | 12900/57292 [1:18:35<4:30:23,  2.74it/s]

Epoch 1 #12900 -- loss: 0.16244941718876363, acc: 0.945


 23%|██▎       | 12950/57292 [1:18:53<4:30:22,  2.73it/s]

Epoch 1 #12950 -- loss: 0.15783160600811244, acc: 0.9475


 23%|██▎       | 13000/57292 [1:19:11<4:29:56,  2.73it/s]

Epoch 1 #13000 -- loss: 0.17247141063213348, acc: 0.935


 23%|██▎       | 13050/57292 [1:19:29<4:29:57,  2.73it/s]

Epoch 1 #13050 -- loss: 0.17659536592662334, acc: 0.92875


 23%|██▎       | 13100/57292 [1:19:48<4:30:21,  2.72it/s]

Epoch 1 #13100 -- loss: 0.15653008468449114, acc: 0.9375


 23%|██▎       | 13150/57292 [1:20:06<4:29:22,  2.73it/s]

Epoch 1 #13150 -- loss: 0.17900002431124448, acc: 0.93


 23%|██▎       | 13200/57292 [1:20:24<4:28:19,  2.74it/s]

Epoch 1 #13200 -- loss: 0.18387863036245108, acc: 0.92625


 23%|██▎       | 13250/57292 [1:20:43<4:28:17,  2.74it/s]

Epoch 1 #13250 -- loss: 0.20641015127301215, acc: 0.91875


 23%|██▎       | 13300/57292 [1:21:01<4:27:09,  2.74it/s]

Epoch 1 #13300 -- loss: 0.20057308994233608, acc: 0.91375


 23%|██▎       | 13350/57292 [1:21:19<4:27:51,  2.73it/s]

Epoch 1 #13350 -- loss: 0.19624780371785164, acc: 0.9175


 23%|██▎       | 13400/57292 [1:21:37<4:27:12,  2.74it/s]

Epoch 1 #13400 -- loss: 0.18861096244305373, acc: 0.93375


 23%|██▎       | 13450/57292 [1:21:56<4:27:06,  2.74it/s]

Epoch 1 #13450 -- loss: 0.20138284418731928, acc: 0.91625


 24%|██▎       | 13500/57292 [1:22:14<4:26:53,  2.73it/s]

Epoch 1 #13500 -- loss: 0.17644298635423183, acc: 0.9275


 24%|██▎       | 13550/57292 [1:22:32<4:26:14,  2.74it/s]

Epoch 1 #13550 -- loss: 0.1922954512387514, acc: 0.92375


 24%|██▎       | 13600/57292 [1:22:50<4:25:49,  2.74it/s]

Epoch 1 #13600 -- loss: 0.16687415458261967, acc: 0.9325


 24%|██▍       | 13650/57292 [1:23:09<4:25:42,  2.74it/s]

Epoch 1 #13650 -- loss: 0.2123319938033819, acc: 0.9175


 24%|██▍       | 13700/57292 [1:23:27<4:25:46,  2.73it/s]

Epoch 1 #13700 -- loss: 0.18066183011978865, acc: 0.92625


 24%|██▍       | 13750/57292 [1:23:45<4:25:23,  2.73it/s]

Epoch 1 #13750 -- loss: 0.2161950348317623, acc: 0.9225


 24%|██▍       | 13800/57292 [1:24:04<4:25:08,  2.73it/s]

Epoch 1 #13800 -- loss: 0.16758782643824816, acc: 0.93875


 24%|██▍       | 13850/57292 [1:24:22<4:24:56,  2.73it/s]

Epoch 1 #13850 -- loss: 0.19594436541199683, acc: 0.9225


 24%|██▍       | 13900/57292 [1:24:40<4:24:41,  2.73it/s]

Epoch 1 #13900 -- loss: 0.20811727963387966, acc: 0.91


 24%|██▍       | 13950/57292 [1:24:58<4:23:55,  2.74it/s]

Epoch 1 #13950 -- loss: 0.19236299861222506, acc: 0.92125


 24%|██▍       | 14000/57292 [1:25:17<4:23:23,  2.74it/s]

Epoch 1 #14000 -- loss: 0.20297350578010082, acc: 0.9175


 25%|██▍       | 14050/57292 [1:25:35<4:23:39,  2.73it/s]

Epoch 1 #14050 -- loss: 0.16857676923274995, acc: 0.9325


 25%|██▍       | 14100/57292 [1:25:53<4:22:48,  2.74it/s]

Epoch 1 #14100 -- loss: 0.1734204503148794, acc: 0.94125


 25%|██▍       | 14150/57292 [1:26:11<4:23:11,  2.73it/s]

Epoch 1 #14150 -- loss: 0.18672353997826577, acc: 0.92375


 25%|██▍       | 14200/57292 [1:26:30<4:22:37,  2.73it/s]

Epoch 1 #14200 -- loss: 0.17812741801142692, acc: 0.93375


 25%|██▍       | 14250/57292 [1:26:48<4:22:20,  2.73it/s]

Epoch 1 #14250 -- loss: 0.18035585954785346, acc: 0.93375


 25%|██▍       | 14300/57292 [1:27:06<4:21:47,  2.74it/s]

Epoch 1 #14300 -- loss: 0.1799249029904604, acc: 0.93


 25%|██▌       | 14350/57292 [1:27:25<4:21:43,  2.73it/s]

Epoch 1 #14350 -- loss: 0.18161262810230255, acc: 0.92625


 25%|██▌       | 14400/57292 [1:27:43<4:20:52,  2.74it/s]

Epoch 1 #14400 -- loss: 0.15915812619030476, acc: 0.93625


 25%|██▌       | 14450/57292 [1:28:01<4:21:02,  2.74it/s]

Epoch 1 #14450 -- loss: 0.16439684581011535, acc: 0.92875


 25%|██▌       | 14500/57292 [1:28:19<4:20:43,  2.74it/s]

Epoch 1 #14500 -- loss: 0.19628339316695928, acc: 0.925


 25%|██▌       | 14550/57292 [1:28:38<4:19:57,  2.74it/s]

Epoch 1 #14550 -- loss: 0.21397351033985615, acc: 0.9075


 25%|██▌       | 14600/57292 [1:28:56<4:20:18,  2.73it/s]

Epoch 1 #14600 -- loss: 0.20763504795730114, acc: 0.9275


 26%|██▌       | 14650/57292 [1:29:14<4:19:21,  2.74it/s]

Epoch 1 #14650 -- loss: 0.17442395247519016, acc: 0.9325


 26%|██▌       | 14700/57292 [1:29:32<4:19:44,  2.73it/s]

Epoch 1 #14700 -- loss: 0.20109226919710635, acc: 0.91375


 26%|██▌       | 14750/57292 [1:29:51<4:18:44,  2.74it/s]

Epoch 1 #14750 -- loss: 0.16912334449589253, acc: 0.935


 26%|██▌       | 14800/57292 [1:30:09<4:18:53,  2.74it/s]

Epoch 1 #14800 -- loss: 0.17903203584253788, acc: 0.93625


 26%|██▌       | 14850/57292 [1:30:27<4:18:45,  2.73it/s]

Epoch 1 #14850 -- loss: 0.18616938270628453, acc: 0.925


 26%|██▌       | 14900/57292 [1:30:46<4:18:29,  2.73it/s]

Epoch 1 #14900 -- loss: 0.18096837926656006, acc: 0.93125


 26%|██▌       | 14950/57292 [1:31:04<4:17:55,  2.74it/s]

Epoch 1 #14950 -- loss: 0.17055353194475173, acc: 0.93875


 26%|██▌       | 15000/57292 [1:31:22<4:17:42,  2.74it/s]

Epoch 1 #15000 -- loss: 0.18425693396478893, acc: 0.92375


 26%|██▋       | 15050/57292 [1:31:40<4:17:21,  2.74it/s]

Epoch 1 #15050 -- loss: 0.1917313304543495, acc: 0.91625


 26%|██▋       | 15100/57292 [1:31:59<4:17:06,  2.74it/s]

Epoch 1 #15100 -- loss: 0.17741550855338573, acc: 0.93125


 26%|██▋       | 15150/57292 [1:32:17<4:17:13,  2.73it/s]

Epoch 1 #15150 -- loss: 0.21377615727484225, acc: 0.9075


 27%|██▋       | 15200/57292 [1:32:35<4:16:47,  2.73it/s]

Epoch 1 #15200 -- loss: 0.17773650374263525, acc: 0.935


 27%|██▋       | 15250/57292 [1:32:54<4:16:24,  2.73it/s]

Epoch 1 #15250 -- loss: 0.1849061006680131, acc: 0.9275


 27%|██▋       | 15300/57292 [1:33:12<4:15:58,  2.73it/s]

Epoch 1 #15300 -- loss: 0.1920171445608139, acc: 0.92625


 27%|██▋       | 15350/57292 [1:33:30<4:16:00,  2.73it/s]

Epoch 1 #15350 -- loss: 0.18238079756498338, acc: 0.92375


 27%|██▋       | 15400/57292 [1:33:48<4:16:09,  2.73it/s]

Epoch 1 #15400 -- loss: 0.195090614259243, acc: 0.92125


 27%|██▋       | 15450/57292 [1:34:07<4:15:30,  2.73it/s]

Epoch 1 #15450 -- loss: 0.19687533840537072, acc: 0.91125


 27%|██▋       | 15500/57292 [1:34:25<4:15:08,  2.73it/s]

Epoch 1 #15500 -- loss: 0.1815586780011654, acc: 0.92875


 27%|██▋       | 15550/57292 [1:34:43<4:14:57,  2.73it/s]

Epoch 1 #15550 -- loss: 0.20539778254926205, acc: 0.92


 27%|██▋       | 15600/57292 [1:35:02<4:14:30,  2.73it/s]

Epoch 1 #15600 -- loss: 0.20041307620704174, acc: 0.915


 27%|██▋       | 15650/57292 [1:35:20<4:13:38,  2.74it/s]

Epoch 1 #15650 -- loss: 0.14750075478106736, acc: 0.94375


 27%|██▋       | 15700/57292 [1:35:38<4:13:09,  2.74it/s]

Epoch 1 #15700 -- loss: 0.1931135755777359, acc: 0.91875


 27%|██▋       | 15750/57292 [1:35:57<4:13:13,  2.73it/s]

Epoch 1 #15750 -- loss: 0.19096079133450986, acc: 0.92625


 28%|██▊       | 15800/57292 [1:36:15<4:12:30,  2.74it/s]

Epoch 1 #15800 -- loss: 0.1974847337603569, acc: 0.9225


 28%|██▊       | 15850/57292 [1:36:33<4:12:45,  2.73it/s]

Epoch 1 #15850 -- loss: 0.21810296826064587, acc: 0.90375


 28%|██▊       | 15900/57292 [1:36:51<4:12:09,  2.74it/s]

Epoch 1 #15900 -- loss: 0.17515963360667228, acc: 0.93375


 28%|██▊       | 15950/57292 [1:37:10<4:11:44,  2.74it/s]

Epoch 1 #15950 -- loss: 0.21146356485784054, acc: 0.9225


 28%|██▊       | 16000/57292 [1:37:28<4:11:24,  2.74it/s]

Epoch 1 #16000 -- loss: 0.19798109274357556, acc: 0.92375


 28%|██▊       | 16050/57292 [1:37:46<4:11:18,  2.74it/s]

Epoch 1 #16050 -- loss: 0.14917550697922707, acc: 0.94875


 28%|██▊       | 16100/57292 [1:38:05<4:10:31,  2.74it/s]

Epoch 1 #16100 -- loss: 0.17069510802626608, acc: 0.9275


 28%|██▊       | 16150/57292 [1:38:23<4:10:30,  2.74it/s]

Epoch 1 #16150 -- loss: 0.1834948732703924, acc: 0.92375


 28%|██▊       | 16200/57292 [1:38:41<4:10:36,  2.73it/s]

Epoch 1 #16200 -- loss: 0.18369216129183769, acc: 0.92625


 28%|██▊       | 16250/57292 [1:38:59<4:10:05,  2.74it/s]

Epoch 1 #16250 -- loss: 0.16571986816823484, acc: 0.94


 28%|██▊       | 16300/57292 [1:39:18<4:09:31,  2.74it/s]

Epoch 1 #16300 -- loss: 0.19569246478378774, acc: 0.9225


 29%|██▊       | 16350/57292 [1:39:36<4:09:34,  2.73it/s]

Epoch 1 #16350 -- loss: 0.2142572420835495, acc: 0.90875


 29%|██▊       | 16400/57292 [1:39:54<4:09:21,  2.73it/s]

Epoch 1 #16400 -- loss: 0.1856237268447876, acc: 0.925


 29%|██▊       | 16450/57292 [1:40:12<4:08:55,  2.73it/s]

Epoch 1 #16450 -- loss: 0.18937498189508914, acc: 0.925


 29%|██▉       | 16500/57292 [1:40:31<4:08:12,  2.74it/s]

Epoch 1 #16500 -- loss: 0.1892052637785673, acc: 0.93125


 29%|██▉       | 16550/57292 [1:40:49<4:08:12,  2.74it/s]

Epoch 1 #16550 -- loss: 0.17881806652992963, acc: 0.93


 29%|██▉       | 16600/57292 [1:41:07<4:08:05,  2.73it/s]

Epoch 1 #16600 -- loss: 0.23042339257895947, acc: 0.90375


 29%|██▉       | 16650/57292 [1:41:26<4:07:53,  2.73it/s]

Epoch 1 #16650 -- loss: 0.16968673285096883, acc: 0.9325


 29%|██▉       | 16700/57292 [1:41:44<4:07:23,  2.73it/s]

Epoch 1 #16700 -- loss: 0.21095137611031534, acc: 0.91125


 29%|██▉       | 16750/57292 [1:42:02<4:07:48,  2.73it/s]

Epoch 1 #16750 -- loss: 0.17980480402708054, acc: 0.9375


 29%|██▉       | 16800/57292 [1:42:20<4:07:07,  2.73it/s]

Epoch 1 #16800 -- loss: 0.21142792370170355, acc: 0.915


 29%|██▉       | 16850/57292 [1:42:39<4:06:12,  2.74it/s]

Epoch 1 #16850 -- loss: 0.18985228091478348, acc: 0.9175


 29%|██▉       | 16900/57292 [1:42:57<4:05:51,  2.74it/s]

Epoch 1 #16900 -- loss: 0.2164659920707345, acc: 0.92


 30%|██▉       | 16950/57292 [1:43:15<4:06:02,  2.73it/s]

Epoch 1 #16950 -- loss: 0.16586645133793354, acc: 0.94125


 30%|██▉       | 17000/57292 [1:43:34<4:05:27,  2.74it/s]

Epoch 1 #17000 -- loss: 0.1701043190062046, acc: 0.935


 30%|██▉       | 17050/57292 [1:43:52<4:05:07,  2.74it/s]

Epoch 1 #17050 -- loss: 0.17749300271272658, acc: 0.92875


 30%|██▉       | 17100/57292 [1:44:10<4:04:38,  2.74it/s]

Epoch 1 #17100 -- loss: 0.22363393872976303, acc: 0.91


 30%|██▉       | 17150/57292 [1:44:28<4:05:12,  2.73it/s]

Epoch 1 #17150 -- loss: 0.16385021086782217, acc: 0.94375


 30%|███       | 17200/57292 [1:44:47<4:04:47,  2.73it/s]

Epoch 1 #17200 -- loss: 0.21009319096803666, acc: 0.91375


 30%|███       | 17250/57292 [1:45:05<4:04:22,  2.73it/s]

Epoch 1 #17250 -- loss: 0.153529988899827, acc: 0.94125


 30%|███       | 17300/57292 [1:45:23<4:03:40,  2.74it/s]

Epoch 1 #17300 -- loss: 0.16608396884053944, acc: 0.93125


 30%|███       | 17350/57292 [1:45:42<4:03:11,  2.74it/s]

Epoch 1 #17350 -- loss: 0.1887134338915348, acc: 0.93


 30%|███       | 17400/57292 [1:46:00<4:03:36,  2.73it/s]

Epoch 1 #17400 -- loss: 0.18180978529155253, acc: 0.935


 30%|███       | 17450/57292 [1:46:18<4:02:58,  2.73it/s]

Epoch 1 #17450 -- loss: 0.2091617002338171, acc: 0.9225


 31%|███       | 17500/57292 [1:46:36<4:02:38,  2.73it/s]

Epoch 1 #17500 -- loss: 0.17830681685358285, acc: 0.92875


 31%|███       | 17550/57292 [1:46:55<4:02:18,  2.73it/s]

Epoch 1 #17550 -- loss: 0.18477230027318, acc: 0.93


 31%|███       | 17600/57292 [1:47:13<4:01:37,  2.74it/s]

Epoch 1 #17600 -- loss: 0.19094713777303696, acc: 0.91625


 31%|███       | 17650/57292 [1:47:31<4:01:43,  2.73it/s]

Epoch 1 #17650 -- loss: 0.1767811718210578, acc: 0.93125


 31%|███       | 17700/57292 [1:47:50<4:01:21,  2.73it/s]

Epoch 1 #17700 -- loss: 0.18798668272793292, acc: 0.92


 31%|███       | 17750/57292 [1:48:08<4:00:37,  2.74it/s]

Epoch 1 #17750 -- loss: 0.2018036478385329, acc: 0.9275


 31%|███       | 17800/57292 [1:48:26<4:00:42,  2.73it/s]

Epoch 1 #17800 -- loss: 0.16791569717228413, acc: 0.93375


 31%|███       | 17850/57292 [1:48:44<4:00:01,  2.74it/s]

Epoch 1 #17850 -- loss: 0.21518391098827125, acc: 0.9075


 31%|███       | 17900/57292 [1:49:03<3:59:41,  2.74it/s]

Epoch 1 #17900 -- loss: 0.19959613040089608, acc: 0.9275


 31%|███▏      | 17950/57292 [1:49:21<3:59:38,  2.74it/s]

Epoch 1 #17950 -- loss: 0.18621192894876004, acc: 0.91625


 31%|███▏      | 18000/57292 [1:49:39<3:59:35,  2.73it/s]

Epoch 1 #18000 -- loss: 0.17960905119776727, acc: 0.92125


 32%|███▏      | 18050/57292 [1:49:58<3:59:06,  2.74it/s]

Epoch 1 #18050 -- loss: 0.16376587830483913, acc: 0.9375


 32%|███▏      | 18100/57292 [1:50:16<3:58:48,  2.74it/s]

Epoch 1 #18100 -- loss: 0.2016765533387661, acc: 0.91375


 32%|███▏      | 18150/57292 [1:50:34<3:58:33,  2.73it/s]

Epoch 1 #18150 -- loss: 0.20523068085312843, acc: 0.9225


 32%|███▏      | 18200/57292 [1:50:52<3:57:58,  2.74it/s]

Epoch 1 #18200 -- loss: 0.173886946067214, acc: 0.935


 32%|███▏      | 18250/57292 [1:51:11<3:58:24,  2.73it/s]

Epoch 1 #18250 -- loss: 0.18139850113540887, acc: 0.93875


 32%|███▏      | 18300/57292 [1:51:29<3:57:27,  2.74it/s]

Epoch 1 #18300 -- loss: 0.20437518641352653, acc: 0.9225


 32%|███▏      | 18350/57292 [1:51:47<3:57:07,  2.74it/s]

Epoch 1 #18350 -- loss: 0.16228509284555911, acc: 0.93875


 32%|███▏      | 18400/57292 [1:52:05<3:56:50,  2.74it/s]

Epoch 1 #18400 -- loss: 0.1901218268275261, acc: 0.92375


 32%|███▏      | 18450/57292 [1:52:24<3:56:40,  2.74it/s]

Epoch 1 #18450 -- loss: 0.1758252640813589, acc: 0.93


 32%|███▏      | 18500/57292 [1:52:42<3:56:26,  2.73it/s]

Epoch 1 #18500 -- loss: 0.21062596447765827, acc: 0.92375


 32%|███▏      | 18550/57292 [1:53:00<3:55:51,  2.74it/s]

Epoch 1 #18550 -- loss: 0.16896159790456294, acc: 0.9375


 32%|███▏      | 18600/57292 [1:53:19<3:55:30,  2.74it/s]

Epoch 1 #18600 -- loss: 0.1892860071361065, acc: 0.92625


 33%|███▎      | 18650/57292 [1:53:37<3:55:48,  2.73it/s]

Epoch 1 #18650 -- loss: 0.18219366159290076, acc: 0.92375


 33%|███▎      | 18700/57292 [1:53:55<3:55:25,  2.73it/s]

Epoch 1 #18700 -- loss: 0.18158282313495874, acc: 0.92875


 33%|███▎      | 18750/57292 [1:54:13<3:55:33,  2.73it/s]

Epoch 1 #18750 -- loss: 0.14965746082365514, acc: 0.945


 33%|███▎      | 18800/57292 [1:54:32<3:54:53,  2.73it/s]

Epoch 1 #18800 -- loss: 0.2039525019004941, acc: 0.925


 33%|███▎      | 18850/57292 [1:54:50<3:54:36,  2.73it/s]

Epoch 1 #18850 -- loss: 0.17245406329631804, acc: 0.92625


 33%|███▎      | 18900/57292 [1:55:08<3:54:28,  2.73it/s]

Epoch 1 #18900 -- loss: 0.20544083628803492, acc: 0.92


 33%|███▎      | 18950/57292 [1:55:27<3:53:46,  2.73it/s]

Epoch 1 #18950 -- loss: 0.19073774132877588, acc: 0.92


 33%|███▎      | 19000/57292 [1:55:45<3:53:37,  2.73it/s]

Epoch 1 #19000 -- loss: 0.1771586564555764, acc: 0.9325


 33%|███▎      | 19050/57292 [1:56:03<3:53:31,  2.73it/s]

Epoch 1 #19050 -- loss: 0.18696999095380307, acc: 0.92625


 33%|███▎      | 19100/57292 [1:56:22<3:52:35,  2.74it/s]

Epoch 1 #19100 -- loss: 0.18310806054621934, acc: 0.9325


 33%|███▎      | 19150/57292 [1:56:40<3:52:50,  2.73it/s]

Epoch 1 #19150 -- loss: 0.2155585753545165, acc: 0.91125


 34%|███▎      | 19200/57292 [1:56:58<3:52:14,  2.73it/s]

Epoch 1 #19200 -- loss: 0.15353654097765684, acc: 0.94


 34%|███▎      | 19250/57292 [1:57:16<3:51:37,  2.74it/s]

Epoch 1 #19250 -- loss: 0.22086191661655902, acc: 0.91625


 34%|███▎      | 19300/57292 [1:57:35<3:51:23,  2.74it/s]

Epoch 1 #19300 -- loss: 0.1849597428739071, acc: 0.9275


 34%|███▍      | 19350/57292 [1:57:53<3:51:17,  2.73it/s]

Epoch 1 #19350 -- loss: 0.1763819045946002, acc: 0.9275


 34%|███▍      | 19400/57292 [1:58:11<3:50:47,  2.74it/s]

Epoch 1 #19400 -- loss: 0.190829305537045, acc: 0.9275


 34%|███▍      | 19450/57292 [1:58:30<3:50:35,  2.74it/s]

Epoch 1 #19450 -- loss: 0.1886374945193529, acc: 0.92875


 34%|███▍      | 19500/57292 [1:58:48<3:50:33,  2.73it/s]

Epoch 1 #19500 -- loss: 0.18920865289866926, acc: 0.93125


 34%|███▍      | 19550/57292 [1:59:06<3:50:04,  2.73it/s]

Epoch 1 #19550 -- loss: 0.1815987095050514, acc: 0.92875


 34%|███▍      | 19600/57292 [1:59:24<3:49:51,  2.73it/s]

Epoch 1 #19600 -- loss: 0.21162602875381709, acc: 0.91125


 34%|███▍      | 19650/57292 [1:59:43<3:49:16,  2.74it/s]

Epoch 1 #19650 -- loss: 0.211012897901237, acc: 0.9175


 34%|███▍      | 19700/57292 [2:00:01<3:48:51,  2.74it/s]

Epoch 1 #19700 -- loss: 0.2030224947631359, acc: 0.91375


 34%|███▍      | 19750/57292 [2:00:19<3:48:58,  2.73it/s]

Epoch 1 #19750 -- loss: 0.1828910641372204, acc: 0.92875


 35%|███▍      | 19800/57292 [2:00:38<3:48:13,  2.74it/s]

Epoch 1 #19800 -- loss: 0.21989698555320503, acc: 0.9175


 35%|███▍      | 19850/57292 [2:00:56<3:48:26,  2.73it/s]

Epoch 1 #19850 -- loss: 0.1811528319120407, acc: 0.9325


 35%|███▍      | 19900/57292 [2:01:14<3:47:50,  2.74it/s]

Epoch 1 #19900 -- loss: 0.20971330687403678, acc: 0.91875


 35%|███▍      | 19950/57292 [2:01:32<3:47:32,  2.74it/s]

Epoch 1 #19950 -- loss: 0.19465063992887735, acc: 0.9175


 35%|███▍      | 20000/57292 [2:01:51<3:46:55,  2.74it/s]

Epoch 1 #20000 -- loss: 0.19982721880078316, acc: 0.92375


 35%|███▍      | 20050/57292 [2:02:09<3:46:51,  2.74it/s]

Epoch 1 #20050 -- loss: 0.15820109125226736, acc: 0.9325


 35%|███▌      | 20100/57292 [2:02:27<3:46:45,  2.73it/s]

Epoch 1 #20100 -- loss: 0.19552943274378776, acc: 0.92625


 35%|███▌      | 20150/57292 [2:02:46<3:46:27,  2.73it/s]

Epoch 1 #20150 -- loss: 0.18062918417155743, acc: 0.93375


 35%|███▌      | 20200/57292 [2:03:04<3:45:54,  2.74it/s]

Epoch 1 #20200 -- loss: 0.15167241662740708, acc: 0.93375


 35%|███▌      | 20250/57292 [2:03:22<3:45:54,  2.73it/s]

Epoch 1 #20250 -- loss: 0.22056228876113892, acc: 0.91


 35%|███▌      | 20300/57292 [2:03:40<3:45:25,  2.74it/s]

Epoch 1 #20300 -- loss: 0.18719693645834923, acc: 0.9275


 36%|███▌      | 20350/57292 [2:03:59<3:45:14,  2.73it/s]

Epoch 1 #20350 -- loss: 0.21319314029067754, acc: 0.9175


 36%|███▌      | 20400/57292 [2:04:17<3:44:42,  2.74it/s]

Epoch 1 #20400 -- loss: 0.15894375197589397, acc: 0.9325


 36%|███▌      | 20450/57292 [2:04:35<3:44:38,  2.73it/s]

Epoch 1 #20450 -- loss: 0.19209760412573815, acc: 0.93


 36%|███▌      | 20500/57292 [2:04:54<3:43:56,  2.74it/s]

Epoch 1 #20500 -- loss: 0.16964883953332902, acc: 0.935


 36%|███▌      | 20550/57292 [2:05:12<3:44:22,  2.73it/s]

Epoch 1 #20550 -- loss: 0.20054090354591608, acc: 0.9225


 36%|███▌      | 20600/57292 [2:05:30<3:43:31,  2.74it/s]

Epoch 1 #20600 -- loss: 0.15931159883737564, acc: 0.94


 36%|███▌      | 20650/57292 [2:05:48<3:43:01,  2.74it/s]

Epoch 1 #20650 -- loss: 0.20302597485482693, acc: 0.91125


 36%|███▌      | 20700/57292 [2:06:07<3:42:36,  2.74it/s]

Epoch 1 #20700 -- loss: 0.20574098356068135, acc: 0.91125


 36%|███▌      | 20750/57292 [2:06:25<3:42:39,  2.74it/s]

Epoch 1 #20750 -- loss: 0.1960864358395338, acc: 0.9225


 36%|███▋      | 20800/57292 [2:06:43<3:42:22,  2.74it/s]

Epoch 1 #20800 -- loss: 0.17909799844026567, acc: 0.93625


 36%|███▋      | 20850/57292 [2:07:02<3:41:50,  2.74it/s]

Epoch 1 #20850 -- loss: 0.18719773165881634, acc: 0.935


 36%|███▋      | 20900/57292 [2:07:20<3:41:39,  2.74it/s]

Epoch 1 #20900 -- loss: 0.19329512394964696, acc: 0.91875


 37%|███▋      | 20950/57292 [2:07:38<3:41:08,  2.74it/s]

Epoch 1 #20950 -- loss: 0.16202765192836524, acc: 0.94375


 37%|███▋      | 21000/57292 [2:07:56<3:40:39,  2.74it/s]

Epoch 1 #21000 -- loss: 0.1650541314482689, acc: 0.93


 37%|███▋      | 21050/57292 [2:08:15<3:40:52,  2.73it/s]

Epoch 1 #21050 -- loss: 0.2024378038942814, acc: 0.92875


 37%|███▋      | 21100/57292 [2:08:33<3:40:36,  2.73it/s]

Epoch 1 #21100 -- loss: 0.17133657723665238, acc: 0.94125


 37%|███▋      | 21150/57292 [2:08:51<3:40:15,  2.73it/s]

Epoch 1 #21150 -- loss: 0.18192769601941108, acc: 0.91875


 37%|███▋      | 21200/57292 [2:09:09<3:40:02,  2.73it/s]

Epoch 1 #21200 -- loss: 0.21715888313949108, acc: 0.91125


 37%|███▋      | 21250/57292 [2:09:28<3:39:51,  2.73it/s]

Epoch 1 #21250 -- loss: 0.20374020352959632, acc: 0.91875


 37%|███▋      | 21300/57292 [2:09:46<3:39:32,  2.73it/s]

Epoch 1 #21300 -- loss: 0.1632453890517354, acc: 0.9375


 37%|███▋      | 21350/57292 [2:10:04<3:39:00,  2.74it/s]

Epoch 1 #21350 -- loss: 0.1698297068849206, acc: 0.9375


 37%|███▋      | 21400/57292 [2:10:23<3:38:40,  2.74it/s]

Epoch 1 #21400 -- loss: 0.1778084050863981, acc: 0.9375


 37%|███▋      | 21450/57292 [2:10:41<3:38:29,  2.73it/s]

Epoch 1 #21450 -- loss: 0.18716895598918198, acc: 0.935


 38%|███▊      | 21500/57292 [2:10:59<3:37:53,  2.74it/s]

Epoch 1 #21500 -- loss: 0.17504858653992414, acc: 0.9275


 38%|███▊      | 21550/57292 [2:11:17<3:37:57,  2.73it/s]

Epoch 1 #21550 -- loss: 0.19120354026556016, acc: 0.92375


 38%|███▊      | 21600/57292 [2:11:36<3:37:12,  2.74it/s]

Epoch 1 #21600 -- loss: 0.18498405635356904, acc: 0.92375


 38%|███▊      | 21650/57292 [2:11:54<3:37:16,  2.73it/s]

Epoch 1 #21650 -- loss: 0.1948458357155323, acc: 0.9325


 38%|███▊      | 21700/57292 [2:12:12<3:36:48,  2.74it/s]

Epoch 1 #21700 -- loss: 0.2053220435976982, acc: 0.9175


 38%|███▊      | 21750/57292 [2:12:31<3:36:47,  2.73it/s]

Epoch 1 #21750 -- loss: 0.19402855597436428, acc: 0.9225


 38%|███▊      | 21800/57292 [2:12:49<3:36:15,  2.74it/s]

Epoch 1 #21800 -- loss: 0.16293065153062344, acc: 0.935


 38%|███▊      | 21850/57292 [2:13:07<3:36:18,  2.73it/s]

Epoch 1 #21850 -- loss: 0.15695471614599227, acc: 0.9425


 38%|███▊      | 21900/57292 [2:13:25<3:35:07,  2.74it/s]

Epoch 1 #21900 -- loss: 0.19592953089624643, acc: 0.93


 38%|███▊      | 21950/57292 [2:13:44<3:35:13,  2.74it/s]

Epoch 1 #21950 -- loss: 0.17742813214659692, acc: 0.9275


 38%|███▊      | 22000/57292 [2:14:02<3:35:56,  2.72it/s]

Epoch 1 #22000 -- loss: 0.1740155641362071, acc: 0.92625


 38%|███▊      | 22050/57292 [2:14:20<3:35:07,  2.73it/s]

Epoch 1 #22050 -- loss: 0.1636739578843117, acc: 0.94


 39%|███▊      | 22100/57292 [2:14:39<3:34:53,  2.73it/s]

Epoch 1 #22100 -- loss: 0.16815183334052564, acc: 0.93875


 39%|███▊      | 22150/57292 [2:14:57<3:34:23,  2.73it/s]

Epoch 1 #22150 -- loss: 0.191772975474596, acc: 0.93


 39%|███▊      | 22200/57292 [2:15:15<3:34:10,  2.73it/s]

Epoch 1 #22200 -- loss: 0.18130421549081802, acc: 0.935


 39%|███▉      | 22250/57292 [2:15:34<3:33:59,  2.73it/s]

Epoch 1 #22250 -- loss: 0.19917643170803787, acc: 0.9225


 39%|███▉      | 22300/57292 [2:15:52<3:33:27,  2.73it/s]

Epoch 1 #22300 -- loss: 0.20692533757537604, acc: 0.91625


 39%|███▉      | 22350/57292 [2:16:10<3:33:11,  2.73it/s]

Epoch 1 #22350 -- loss: 0.21150301732122898, acc: 0.91375


 39%|███▉      | 22400/57292 [2:16:29<3:32:54,  2.73it/s]

Epoch 1 #22400 -- loss: 0.21717306427657604, acc: 0.90875


 39%|███▉      | 22450/57292 [2:16:47<3:32:52,  2.73it/s]

Epoch 1 #22450 -- loss: 0.17945005610585213, acc: 0.9175


 39%|███▉      | 22500/57292 [2:17:05<3:32:13,  2.73it/s]

Epoch 1 #22500 -- loss: 0.21152676418423652, acc: 0.91125


 39%|███▉      | 22550/57292 [2:17:23<3:32:06,  2.73it/s]

Epoch 1 #22550 -- loss: 0.19780754171311854, acc: 0.91375


 39%|███▉      | 22600/57292 [2:17:42<3:31:31,  2.73it/s]

Epoch 1 #22600 -- loss: 0.17474169567227363, acc: 0.93375


 40%|███▉      | 22650/57292 [2:18:00<3:31:29,  2.73it/s]

Epoch 1 #22650 -- loss: 0.18874829843640328, acc: 0.925


 40%|███▉      | 22700/57292 [2:18:18<3:31:01,  2.73it/s]

Epoch 1 #22700 -- loss: 0.1875541101768613, acc: 0.9325


 40%|███▉      | 22750/57292 [2:18:37<3:30:30,  2.73it/s]

Epoch 1 #22750 -- loss: 0.20455368626862763, acc: 0.92


 40%|███▉      | 22800/57292 [2:18:55<3:30:15,  2.73it/s]

Epoch 1 #22800 -- loss: 0.1649104367569089, acc: 0.92875


 40%|███▉      | 22850/57292 [2:19:13<3:30:10,  2.73it/s]

Epoch 1 #22850 -- loss: 0.20490612175315617, acc: 0.9175


 40%|███▉      | 22900/57292 [2:19:32<3:29:38,  2.73it/s]

Epoch 1 #22900 -- loss: 0.16848895024508237, acc: 0.93375


 40%|████      | 22950/57292 [2:19:50<3:29:19,  2.73it/s]

Epoch 1 #22950 -- loss: 0.20081597104668616, acc: 0.92625


 40%|████      | 23000/57292 [2:20:08<3:29:04,  2.73it/s]

Epoch 1 #23000 -- loss: 0.18990544702857734, acc: 0.92375


 40%|████      | 23050/57292 [2:20:26<3:28:36,  2.74it/s]

Epoch 1 #23050 -- loss: 0.19118935503065587, acc: 0.93625


 40%|████      | 23100/57292 [2:20:45<3:27:48,  2.74it/s]

Epoch 1 #23100 -- loss: 0.1979269400984049, acc: 0.9275


 40%|████      | 23150/57292 [2:21:03<3:27:41,  2.74it/s]

Epoch 1 #23150 -- loss: 0.18717897351831198, acc: 0.93625


 40%|████      | 23200/57292 [2:21:21<3:27:37,  2.74it/s]

Epoch 1 #23200 -- loss: 0.21609872803092003, acc: 0.91375


 41%|████      | 23250/57292 [2:21:39<3:27:42,  2.73it/s]

Epoch 1 #23250 -- loss: 0.16841463431715964, acc: 0.9325


 41%|████      | 23300/57292 [2:21:58<3:27:28,  2.73it/s]

Epoch 1 #23300 -- loss: 0.18619968328624964, acc: 0.92625


 41%|████      | 23350/57292 [2:22:16<3:26:44,  2.74it/s]

Epoch 1 #23350 -- loss: 0.18981411438435317, acc: 0.92125


 41%|████      | 23400/57292 [2:22:34<3:26:41,  2.73it/s]

Epoch 1 #23400 -- loss: 0.17915267400443555, acc: 0.93625


 41%|████      | 23450/57292 [2:22:53<3:26:29,  2.73it/s]

Epoch 1 #23450 -- loss: 0.18291760981082916, acc: 0.92375


 41%|████      | 23500/57292 [2:23:11<3:26:13,  2.73it/s]

Epoch 1 #23500 -- loss: 0.19069761771708726, acc: 0.91625


 41%|████      | 23550/57292 [2:23:29<3:25:48,  2.73it/s]

Epoch 1 #23550 -- loss: 0.20865960083901883, acc: 0.92


 41%|████      | 23600/57292 [2:23:48<3:25:31,  2.73it/s]

Epoch 1 #23600 -- loss: 0.16643453996628524, acc: 0.935


 41%|████▏     | 23650/57292 [2:24:06<3:25:00,  2.74it/s]

Epoch 1 #23650 -- loss: 0.18374342255294324, acc: 0.92875


 41%|████▏     | 23700/57292 [2:24:24<3:24:28,  2.74it/s]

Epoch 1 #23700 -- loss: 0.18623744018375873, acc: 0.9325


 41%|████▏     | 23750/57292 [2:24:42<3:24:07,  2.74it/s]

Epoch 1 #23750 -- loss: 0.23002976324409247, acc: 0.905


 42%|████▏     | 23800/57292 [2:25:01<3:23:48,  2.74it/s]

Epoch 1 #23800 -- loss: 0.17774979524314405, acc: 0.93


 42%|████▏     | 23850/57292 [2:25:19<3:23:36,  2.74it/s]

Epoch 1 #23850 -- loss: 0.19711482997983695, acc: 0.92875


 42%|████▏     | 23900/57292 [2:25:37<3:23:25,  2.74it/s]

Epoch 1 #23900 -- loss: 0.19417082823812962, acc: 0.92


 42%|████▏     | 23950/57292 [2:25:55<3:23:05,  2.74it/s]

Epoch 1 #23950 -- loss: 0.21114538557827472, acc: 0.92625


 42%|████▏     | 24000/57292 [2:26:14<3:22:58,  2.73it/s]

Epoch 1 #24000 -- loss: 0.19631837591528892, acc: 0.91625


 42%|████▏     | 24050/57292 [2:26:32<3:22:25,  2.74it/s]

Epoch 1 #24050 -- loss: 0.22590470228344203, acc: 0.91375


 42%|████▏     | 24100/57292 [2:26:50<3:21:47,  2.74it/s]

Epoch 1 #24100 -- loss: 0.13296154115349054, acc: 0.94375


 42%|████▏     | 24150/57292 [2:27:08<3:22:04,  2.73it/s]

Epoch 1 #24150 -- loss: 0.18459053836762906, acc: 0.92375


 42%|████▏     | 24200/57292 [2:27:27<3:21:26,  2.74it/s]

Epoch 1 #24200 -- loss: 0.2129516451433301, acc: 0.9125


 42%|████▏     | 24250/57292 [2:27:45<3:21:12,  2.74it/s]

Epoch 1 #24250 -- loss: 0.1828885678201914, acc: 0.91875


 42%|████▏     | 24300/57292 [2:28:03<3:21:09,  2.73it/s]

Epoch 1 #24300 -- loss: 0.198293639048934, acc: 0.92


 43%|████▎     | 24350/57292 [2:28:22<3:20:28,  2.74it/s]

Epoch 1 #24350 -- loss: 0.18737967543303966, acc: 0.93


 43%|████▎     | 24400/57292 [2:28:40<3:20:20,  2.74it/s]

Epoch 1 #24400 -- loss: 0.18663397446274757, acc: 0.92


 43%|████▎     | 24450/57292 [2:28:58<3:19:42,  2.74it/s]

Epoch 1 #24450 -- loss: 0.21813093658536672, acc: 0.9025


 43%|████▎     | 24500/57292 [2:29:16<3:19:25,  2.74it/s]

Epoch 1 #24500 -- loss: 0.20285741567611695, acc: 0.91875


 43%|████▎     | 24550/57292 [2:29:35<3:19:58,  2.73it/s]

Epoch 1 #24550 -- loss: 0.18863343864679336, acc: 0.93125


 43%|████▎     | 24600/57292 [2:29:53<3:19:49,  2.73it/s]

Epoch 1 #24600 -- loss: 0.18787123903632164, acc: 0.935


 43%|████▎     | 24650/57292 [2:30:11<3:19:09,  2.73it/s]

Epoch 1 #24650 -- loss: 0.20507463548332452, acc: 0.9075


 43%|████▎     | 24700/57292 [2:30:30<3:18:47,  2.73it/s]

Epoch 1 #24700 -- loss: 0.17728847198188305, acc: 0.9375


 43%|████▎     | 24750/57292 [2:30:48<3:18:23,  2.73it/s]

Epoch 1 #24750 -- loss: 0.16469547860324382, acc: 0.93375


 43%|████▎     | 24800/57292 [2:31:06<3:18:06,  2.73it/s]

Epoch 1 #24800 -- loss: 0.1915108959749341, acc: 0.925


 43%|████▎     | 24850/57292 [2:31:24<3:17:12,  2.74it/s]

Epoch 1 #24850 -- loss: 0.16540855988860131, acc: 0.94


 43%|████▎     | 24900/57292 [2:31:43<3:17:22,  2.74it/s]

Epoch 1 #24900 -- loss: 0.18140898145735262, acc: 0.93125


 44%|████▎     | 24950/57292 [2:32:01<3:16:51,  2.74it/s]

Epoch 1 #24950 -- loss: 0.21413910545408726, acc: 0.9125


 44%|████▎     | 25000/57292 [2:32:19<3:16:48,  2.73it/s]

Epoch 1 #25000 -- loss: 0.19578499682247638, acc: 0.915


 44%|████▎     | 25050/57292 [2:32:38<3:16:17,  2.74it/s]

Epoch 1 #25050 -- loss: 0.22922138892114163, acc: 0.92


 44%|████▍     | 25100/57292 [2:32:56<3:16:24,  2.73it/s]

Epoch 1 #25100 -- loss: 0.2453681343793869, acc: 0.895


 44%|████▍     | 25150/57292 [2:33:14<3:15:49,  2.74it/s]

Epoch 1 #25150 -- loss: 0.19345294788479805, acc: 0.92625


 44%|████▍     | 25200/57292 [2:33:32<3:15:32,  2.74it/s]

Epoch 1 #25200 -- loss: 0.21645639576017855, acc: 0.91375


 44%|████▍     | 25250/57292 [2:33:51<3:15:36,  2.73it/s]

Epoch 1 #25250 -- loss: 0.17684972256422044, acc: 0.93375


 44%|████▍     | 25300/57292 [2:34:09<3:15:52,  2.72it/s]

Epoch 1 #25300 -- loss: 0.15170675560832023, acc: 0.93625


 44%|████▍     | 25350/57292 [2:34:27<3:14:58,  2.73it/s]

Epoch 1 #25350 -- loss: 0.168333663828671, acc: 0.935


 44%|████▍     | 25400/57292 [2:34:46<3:14:30,  2.73it/s]

Epoch 1 #25400 -- loss: 0.1698142107576132, acc: 0.93


 44%|████▍     | 25450/57292 [2:35:04<3:14:12,  2.73it/s]

Epoch 1 #25450 -- loss: 0.16588588256388903, acc: 0.92625


 45%|████▍     | 25500/57292 [2:35:22<3:13:49,  2.73it/s]

Epoch 1 #25500 -- loss: 0.1903883497416973, acc: 0.93125


 45%|████▍     | 25550/57292 [2:35:41<3:13:28,  2.73it/s]

Epoch 1 #25550 -- loss: 0.19092700153589248, acc: 0.9275


 45%|████▍     | 25600/57292 [2:35:59<3:12:46,  2.74it/s]

Epoch 1 #25600 -- loss: 0.19816147953271865, acc: 0.91625


 45%|████▍     | 25650/57292 [2:36:17<3:12:55,  2.73it/s]

Epoch 1 #25650 -- loss: 0.1788436533883214, acc: 0.9275


 45%|████▍     | 25700/57292 [2:36:35<3:12:25,  2.74it/s]

Epoch 1 #25700 -- loss: 0.16241825271397828, acc: 0.94


 45%|████▍     | 25750/57292 [2:36:54<3:12:09,  2.74it/s]

Epoch 1 #25750 -- loss: 0.2159306765347719, acc: 0.91875


 45%|████▌     | 25800/57292 [2:37:12<3:12:03,  2.73it/s]

Epoch 1 #25800 -- loss: 0.17643802784383297, acc: 0.9275


 45%|████▌     | 25850/57292 [2:37:30<3:11:37,  2.73it/s]

Epoch 1 #25850 -- loss: 0.1811808431893587, acc: 0.93375


 45%|████▌     | 25900/57292 [2:37:49<3:11:15,  2.74it/s]

Epoch 1 #25900 -- loss: 0.18917776115238666, acc: 0.92125


 45%|████▌     | 25950/57292 [2:38:07<3:10:54,  2.74it/s]

Epoch 1 #25950 -- loss: 0.16521857358515263, acc: 0.93125


 45%|████▌     | 26000/57292 [2:38:25<3:11:01,  2.73it/s]

Epoch 1 #26000 -- loss: 0.2263074093312025, acc: 0.91625


 45%|████▌     | 26050/57292 [2:38:44<3:10:27,  2.73it/s]

Epoch 1 #26050 -- loss: 0.19788885165005923, acc: 0.92125


 46%|████▌     | 26100/57292 [2:39:02<3:10:09,  2.73it/s]

Epoch 1 #26100 -- loss: 0.1767611475661397, acc: 0.93


 46%|████▌     | 26150/57292 [2:39:20<3:09:55,  2.73it/s]

Epoch 1 #26150 -- loss: 0.19869180645793677, acc: 0.9175


 46%|████▌     | 26200/57292 [2:39:38<3:09:41,  2.73it/s]

Epoch 1 #26200 -- loss: 0.16612085454165937, acc: 0.945


 46%|████▌     | 26250/57292 [2:39:57<3:09:09,  2.74it/s]

Epoch 1 #26250 -- loss: 0.1472315363585949, acc: 0.94


 46%|████▌     | 26300/57292 [2:40:15<3:09:06,  2.73it/s]

Epoch 1 #26300 -- loss: 0.16813253797590733, acc: 0.93


 46%|████▌     | 26350/57292 [2:40:33<3:08:33,  2.73it/s]

Epoch 1 #26350 -- loss: 0.18242787461727858, acc: 0.925


 46%|████▌     | 26400/57292 [2:40:52<3:08:23,  2.73it/s]

Epoch 1 #26400 -- loss: 0.16184143498539924, acc: 0.93625


 46%|████▌     | 26450/57292 [2:41:10<3:07:52,  2.74it/s]

Epoch 1 #26450 -- loss: 0.20773750238120556, acc: 0.91625


 46%|████▋     | 26500/57292 [2:41:28<3:07:27,  2.74it/s]

Epoch 1 #26500 -- loss: 0.19312515541911124, acc: 0.92


 46%|████▋     | 26550/57292 [2:41:46<3:07:31,  2.73it/s]

Epoch 1 #26550 -- loss: 0.1942111773788929, acc: 0.92875


 46%|████▋     | 26600/57292 [2:42:05<3:07:09,  2.73it/s]

Epoch 1 #26600 -- loss: 0.21003270607441663, acc: 0.91375


 47%|████▋     | 26650/57292 [2:42:23<3:06:34,  2.74it/s]

Epoch 1 #26650 -- loss: 0.1990119583904743, acc: 0.91875


 47%|████▋     | 26700/57292 [2:42:41<3:06:45,  2.73it/s]

Epoch 1 #26700 -- loss: 0.16435538180172443, acc: 0.9325


 47%|████▋     | 26750/57292 [2:43:00<3:06:04,  2.74it/s]

Epoch 1 #26750 -- loss: 0.18485933609306812, acc: 0.92125


 47%|████▋     | 26800/57292 [2:43:18<3:05:52,  2.73it/s]

Epoch 1 #26800 -- loss: 0.17924898602068423, acc: 0.92875


 47%|████▋     | 26850/57292 [2:43:36<3:05:17,  2.74it/s]

Epoch 1 #26850 -- loss: 0.20435124062001706, acc: 0.92125


 47%|████▋     | 26900/57292 [2:43:54<3:05:14,  2.73it/s]

Epoch 1 #26900 -- loss: 0.17970201469957828, acc: 0.93


 47%|████▋     | 26950/57292 [2:44:13<3:04:30,  2.74it/s]

Epoch 1 #26950 -- loss: 0.19899677511304617, acc: 0.9225


 47%|████▋     | 27000/57292 [2:44:31<3:04:59,  2.73it/s]

Epoch 1 #27000 -- loss: 0.1953956239670515, acc: 0.91875


 47%|████▋     | 27050/57292 [2:44:49<3:04:08,  2.74it/s]

Epoch 1 #27050 -- loss: 0.15315276518464088, acc: 0.94375


 47%|████▋     | 27100/57292 [2:45:08<3:04:18,  2.73it/s]

Epoch 1 #27100 -- loss: 0.1863718844205141, acc: 0.9275


 47%|████▋     | 27150/57292 [2:45:26<3:03:40,  2.74it/s]

Epoch 1 #27150 -- loss: 0.2237624738365412, acc: 0.9175


 47%|████▋     | 27200/57292 [2:45:44<3:03:08,  2.74it/s]

Epoch 1 #27200 -- loss: 0.22685832381248475, acc: 0.90625


 48%|████▊     | 27250/57292 [2:46:02<3:02:57,  2.74it/s]

Epoch 1 #27250 -- loss: 0.17852582812309264, acc: 0.93


 48%|████▊     | 27300/57292 [2:46:21<3:02:55,  2.73it/s]

Epoch 1 #27300 -- loss: 0.17328773520886898, acc: 0.92375


 48%|████▊     | 27350/57292 [2:46:39<3:02:32,  2.73it/s]

Epoch 1 #27350 -- loss: 0.18433474514633416, acc: 0.92625


 48%|████▊     | 27400/57292 [2:46:57<3:02:03,  2.74it/s]

Epoch 1 #27400 -- loss: 0.19992057278752326, acc: 0.91875


 48%|████▊     | 27450/57292 [2:47:16<3:02:05,  2.73it/s]

Epoch 1 #27450 -- loss: 0.18444017112255096, acc: 0.9175


 48%|████▊     | 27500/57292 [2:47:34<3:01:58,  2.73it/s]

Epoch 1 #27500 -- loss: 0.17811657074838877, acc: 0.9375


 48%|████▊     | 27550/57292 [2:47:52<3:01:45,  2.73it/s]

Epoch 1 #27550 -- loss: 0.174996287971735, acc: 0.935


 48%|████▊     | 27600/57292 [2:48:10<3:01:22,  2.73it/s]

Epoch 1 #27600 -- loss: 0.20233234114944934, acc: 0.91375


 48%|████▊     | 27650/57292 [2:48:29<3:01:14,  2.73it/s]

Epoch 1 #27650 -- loss: 0.19179406635463236, acc: 0.9275


 48%|████▊     | 27700/57292 [2:48:47<3:00:47,  2.73it/s]

Epoch 1 #27700 -- loss: 0.19755143743008374, acc: 0.92875


 48%|████▊     | 27750/57292 [2:49:05<3:00:20,  2.73it/s]

Epoch 1 #27750 -- loss: 0.19790661070495844, acc: 0.92


 49%|████▊     | 27800/57292 [2:49:24<3:00:09,  2.73it/s]

Epoch 1 #27800 -- loss: 0.22546921882778406, acc: 0.90625


 49%|████▊     | 27850/57292 [2:49:42<2:59:34,  2.73it/s]

Epoch 1 #27850 -- loss: 0.17625910185277463, acc: 0.935


 49%|████▊     | 27900/57292 [2:50:00<2:59:35,  2.73it/s]

Epoch 1 #27900 -- loss: 0.1907177112996578, acc: 0.915


 49%|████▉     | 27950/57292 [2:50:19<2:59:11,  2.73it/s]

Epoch 1 #27950 -- loss: 0.17703528575599192, acc: 0.93


 49%|████▉     | 28000/57292 [2:50:37<2:58:53,  2.73it/s]

Epoch 1 #28000 -- loss: 0.2112265684455633, acc: 0.92125


 49%|████▉     | 28050/57292 [2:50:55<2:58:13,  2.73it/s]

Epoch 1 #28050 -- loss: 0.17986554272472857, acc: 0.9325


 49%|████▉     | 28100/57292 [2:51:14<2:57:59,  2.73it/s]

Epoch 1 #28100 -- loss: 0.1819264715909958, acc: 0.93375


 49%|████▉     | 28150/57292 [2:51:32<2:57:47,  2.73it/s]

Epoch 1 #28150 -- loss: 0.19753977566957473, acc: 0.9275


 49%|████▉     | 28200/57292 [2:51:50<2:57:31,  2.73it/s]

Epoch 1 #28200 -- loss: 0.20670926570892334, acc: 0.9175


 49%|████▉     | 28250/57292 [2:52:09<2:57:23,  2.73it/s]

Epoch 1 #28250 -- loss: 0.20017834294587375, acc: 0.92625


 49%|████▉     | 28300/57292 [2:52:27<2:57:04,  2.73it/s]

Epoch 1 #28300 -- loss: 0.19916707288473845, acc: 0.925


 49%|████▉     | 28350/57292 [2:52:45<2:56:53,  2.73it/s]

Epoch 1 #28350 -- loss: 0.19310160249471664, acc: 0.9275


 50%|████▉     | 28400/57292 [2:53:04<2:56:33,  2.73it/s]

Epoch 1 #28400 -- loss: 0.18119346804916858, acc: 0.9325


 50%|████▉     | 28450/57292 [2:53:22<2:55:57,  2.73it/s]

Epoch 1 #28450 -- loss: 0.17574116930365563, acc: 0.94


 50%|████▉     | 28500/57292 [2:53:40<2:55:52,  2.73it/s]

Epoch 1 #28500 -- loss: 0.17972614046186208, acc: 0.925


 50%|████▉     | 28550/57292 [2:53:59<2:55:22,  2.73it/s]

Epoch 1 #28550 -- loss: 0.18422275193035603, acc: 0.92375


 50%|████▉     | 28600/57292 [2:54:17<2:55:10,  2.73it/s]

Epoch 1 #28600 -- loss: 0.19637932371348144, acc: 0.92125


 50%|█████     | 28650/57292 [2:54:35<2:54:44,  2.73it/s]

Epoch 1 #28650 -- loss: 0.18357639521360397, acc: 0.92125


 50%|█████     | 28700/57292 [2:54:54<2:54:48,  2.73it/s]

Epoch 1 #28700 -- loss: 0.20380501963198186, acc: 0.91125


 50%|█████     | 28750/57292 [2:55:12<2:54:28,  2.73it/s]

Epoch 1 #28750 -- loss: 0.17425923682749273, acc: 0.9425


 50%|█████     | 28800/57292 [2:55:30<2:53:31,  2.74it/s]

Epoch 1 #28800 -- loss: 0.18601136796176435, acc: 0.93375


 50%|█████     | 28850/57292 [2:55:48<2:53:24,  2.73it/s]

Epoch 1 #28850 -- loss: 0.17833643816411496, acc: 0.93


 50%|█████     | 28900/57292 [2:56:07<2:53:18,  2.73it/s]

Epoch 1 #28900 -- loss: 0.19407139603048562, acc: 0.92625


 51%|█████     | 28950/57292 [2:56:25<2:52:49,  2.73it/s]

Epoch 1 #28950 -- loss: 0.18378071364015341, acc: 0.9225


 51%|█████     | 29000/57292 [2:56:43<2:52:26,  2.73it/s]

Epoch 1 #29000 -- loss: 0.20402124673128127, acc: 0.91125


 51%|█████     | 29050/57292 [2:57:02<2:52:04,  2.74it/s]

Epoch 1 #29050 -- loss: 0.2139652808010578, acc: 0.91375


 51%|█████     | 29100/57292 [2:57:20<2:52:07,  2.73it/s]

Epoch 1 #29100 -- loss: 0.17371580932289363, acc: 0.925


 51%|█████     | 29150/57292 [2:57:38<2:51:45,  2.73it/s]

Epoch 1 #29150 -- loss: 0.18921688035130502, acc: 0.9275


 51%|█████     | 29200/57292 [2:57:57<2:51:32,  2.73it/s]

Epoch 1 #29200 -- loss: 0.16759261656552554, acc: 0.93875


 51%|█████     | 29250/57292 [2:58:15<2:51:26,  2.73it/s]

Epoch 1 #29250 -- loss: 0.2096661315113306, acc: 0.92125


 51%|█████     | 29300/57292 [2:58:33<2:50:41,  2.73it/s]

Epoch 1 #29300 -- loss: 0.17542642869055272, acc: 0.9375


 51%|█████     | 29350/57292 [2:58:51<2:49:49,  2.74it/s]

Epoch 1 #29350 -- loss: 0.18385856732726097, acc: 0.9275


 51%|█████▏    | 29400/57292 [2:59:10<2:49:34,  2.74it/s]

Epoch 1 #29400 -- loss: 0.16949704714119435, acc: 0.93625


 51%|█████▏    | 29450/57292 [2:59:28<2:49:19,  2.74it/s]

Epoch 1 #29450 -- loss: 0.2099244936928153, acc: 0.92


 51%|█████▏    | 29500/57292 [2:59:46<2:49:35,  2.73it/s]

Epoch 1 #29500 -- loss: 0.2056674451008439, acc: 0.91875


 52%|█████▏    | 29550/57292 [3:00:05<2:49:06,  2.73it/s]

Epoch 1 #29550 -- loss: 0.19800602905452253, acc: 0.9225


 52%|█████▏    | 29600/57292 [3:00:23<2:48:32,  2.74it/s]

Epoch 1 #29600 -- loss: 0.18866006664931775, acc: 0.92625


 52%|█████▏    | 29650/57292 [3:00:41<2:48:24,  2.74it/s]

Epoch 1 #29650 -- loss: 0.14696397449821233, acc: 0.9425


 52%|█████▏    | 29700/57292 [3:00:59<2:47:59,  2.74it/s]

Epoch 1 #29700 -- loss: 0.14345696434378624, acc: 0.945


 52%|█████▏    | 29750/57292 [3:01:18<2:48:23,  2.73it/s]

Epoch 1 #29750 -- loss: 0.20544622018933295, acc: 0.92375


 52%|█████▏    | 29800/57292 [3:01:36<2:48:09,  2.72it/s]

Epoch 1 #29800 -- loss: 0.22920940175652504, acc: 0.90125


 52%|█████▏    | 29850/57292 [3:01:54<2:47:21,  2.73it/s]

Epoch 1 #29850 -- loss: 0.1801839678734541, acc: 0.9325


 52%|█████▏    | 29900/57292 [3:02:13<2:47:08,  2.73it/s]

Epoch 1 #29900 -- loss: 0.17316775254905223, acc: 0.93375


 52%|█████▏    | 29950/57292 [3:02:31<2:46:44,  2.73it/s]

Epoch 1 #29950 -- loss: 0.1997014081478119, acc: 0.925


 52%|█████▏    | 30000/57292 [3:02:49<2:46:41,  2.73it/s]

Epoch 1 #30000 -- loss: 0.19108471959829332, acc: 0.92125


 52%|█████▏    | 30050/57292 [3:03:08<2:46:14,  2.73it/s]

Epoch 1 #30050 -- loss: 0.1975140954554081, acc: 0.92125


 53%|█████▎    | 30100/57292 [3:03:26<2:45:38,  2.74it/s]

Epoch 1 #30100 -- loss: 0.18074567355215548, acc: 0.9275


 53%|█████▎    | 30150/57292 [3:03:44<2:45:15,  2.74it/s]

Epoch 1 #30150 -- loss: 0.18042218763381243, acc: 0.9325


 53%|█████▎    | 30200/57292 [3:04:02<2:45:14,  2.73it/s]

Epoch 1 #30200 -- loss: 0.1715795124694705, acc: 0.93625


 53%|█████▎    | 30250/57292 [3:04:21<2:44:58,  2.73it/s]

Epoch 1 #30250 -- loss: 0.16284089703112842, acc: 0.94125


 53%|█████▎    | 30300/57292 [3:04:39<2:44:45,  2.73it/s]

Epoch 1 #30300 -- loss: 0.19973493605852127, acc: 0.91625


 53%|█████▎    | 30350/57292 [3:04:57<2:44:21,  2.73it/s]

Epoch 1 #30350 -- loss: 0.1704942325875163, acc: 0.94125


 53%|█████▎    | 30400/57292 [3:05:16<2:44:13,  2.73it/s]

Epoch 1 #30400 -- loss: 0.17558981023728848, acc: 0.9275


 53%|█████▎    | 30450/57292 [3:05:34<2:43:41,  2.73it/s]

Epoch 1 #30450 -- loss: 0.21702650383114816, acc: 0.91125


 53%|█████▎    | 30500/57292 [3:05:52<2:43:13,  2.74it/s]

Epoch 1 #30500 -- loss: 0.164860395565629, acc: 0.93375


 53%|█████▎    | 30550/57292 [3:06:11<2:43:08,  2.73it/s]

Epoch 1 #30550 -- loss: 0.17982508428394794, acc: 0.9375


 53%|█████▎    | 30600/57292 [3:06:29<2:42:56,  2.73it/s]

Epoch 1 #30600 -- loss: 0.16367477118968965, acc: 0.92625


 53%|█████▎    | 30650/57292 [3:06:47<2:42:29,  2.73it/s]

Epoch 1 #30650 -- loss: 0.16335211873054503, acc: 0.9375


 54%|█████▎    | 30700/57292 [3:07:05<2:41:56,  2.74it/s]

Epoch 1 #30700 -- loss: 0.20896826095879079, acc: 0.91375


 54%|█████▎    | 30750/57292 [3:07:24<2:41:55,  2.73it/s]

Epoch 1 #30750 -- loss: 0.1913462208211422, acc: 0.93


 54%|█████▍    | 30800/57292 [3:07:42<2:41:27,  2.73it/s]

Epoch 1 #30800 -- loss: 0.1826399637386203, acc: 0.92


 54%|█████▍    | 30850/57292 [3:08:00<2:41:07,  2.74it/s]

Epoch 1 #30850 -- loss: 0.16609778087586163, acc: 0.93125


 54%|█████▍    | 30900/57292 [3:08:19<2:41:10,  2.73it/s]

Epoch 1 #30900 -- loss: 0.23920722357928753, acc: 0.9125


 54%|█████▍    | 30950/57292 [3:08:37<2:40:40,  2.73it/s]

Epoch 1 #30950 -- loss: 0.16109797529876232, acc: 0.9325


 54%|█████▍    | 31000/57292 [3:08:55<2:40:16,  2.73it/s]

Epoch 1 #31000 -- loss: 0.2114377274736762, acc: 0.9175


 54%|█████▍    | 31050/57292 [3:09:13<2:39:57,  2.73it/s]

Epoch 1 #31050 -- loss: 0.17878817919641732, acc: 0.935


 54%|█████▍    | 31100/57292 [3:09:32<2:39:59,  2.73it/s]

Epoch 1 #31100 -- loss: 0.18335187561810018, acc: 0.9325


 54%|█████▍    | 31150/57292 [3:09:50<2:39:23,  2.73it/s]

Epoch 1 #31150 -- loss: 0.1828352351114154, acc: 0.9325


 54%|█████▍    | 31200/57292 [3:10:08<2:39:03,  2.73it/s]

Epoch 1 #31200 -- loss: 0.1903565053641796, acc: 0.9325


 55%|█████▍    | 31250/57292 [3:10:27<2:38:40,  2.74it/s]

Epoch 1 #31250 -- loss: 0.17340950887650253, acc: 0.93375


 55%|█████▍    | 31300/57292 [3:10:45<2:38:25,  2.73it/s]

Epoch 1 #31300 -- loss: 0.15783707939088346, acc: 0.93


 55%|█████▍    | 31350/57292 [3:11:03<2:38:02,  2.74it/s]

Epoch 1 #31350 -- loss: 0.20302519008517264, acc: 0.9225


 55%|█████▍    | 31400/57292 [3:11:22<2:38:03,  2.73it/s]

Epoch 1 #31400 -- loss: 0.21672870747745038, acc: 0.9125


 55%|█████▍    | 31450/57292 [3:11:40<2:37:26,  2.74it/s]

Epoch 1 #31450 -- loss: 0.19187733739614488, acc: 0.925


 55%|█████▍    | 31500/57292 [3:11:58<2:37:07,  2.74it/s]

Epoch 1 #31500 -- loss: 0.20594681292772293, acc: 0.9175


 55%|█████▌    | 31550/57292 [3:12:16<2:36:44,  2.74it/s]

Epoch 1 #31550 -- loss: 0.18035394735634327, acc: 0.9275


 55%|█████▌    | 31600/57292 [3:12:35<2:36:52,  2.73it/s]

Epoch 1 #31600 -- loss: 0.1866762275993824, acc: 0.9275


 55%|█████▌    | 31650/57292 [3:12:53<2:36:26,  2.73it/s]

Epoch 1 #31650 -- loss: 0.1920825646072626, acc: 0.925


 55%|█████▌    | 31700/57292 [3:13:11<2:36:10,  2.73it/s]

Epoch 1 #31700 -- loss: 0.16685716930776834, acc: 0.93625


 55%|█████▌    | 31750/57292 [3:13:30<2:35:51,  2.73it/s]

Epoch 1 #31750 -- loss: 0.18819856889545916, acc: 0.9325


 56%|█████▌    | 31800/57292 [3:13:48<2:35:40,  2.73it/s]

Epoch 1 #31800 -- loss: 0.17420450504869223, acc: 0.93625


 56%|█████▌    | 31850/57292 [3:14:06<2:35:06,  2.73it/s]

Epoch 1 #31850 -- loss: 0.20901858884841204, acc: 0.91375


 56%|█████▌    | 31900/57292 [3:14:24<2:34:51,  2.73it/s]

Epoch 1 #31900 -- loss: 0.19129493102431297, acc: 0.92125


 56%|█████▌    | 31950/57292 [3:14:43<2:34:12,  2.74it/s]

Epoch 1 #31950 -- loss: 0.20140863746404647, acc: 0.9225


 56%|█████▌    | 32000/57292 [3:15:01<2:34:02,  2.74it/s]

Epoch 1 #32000 -- loss: 0.1761389534920454, acc: 0.92375


 56%|█████▌    | 32050/57292 [3:15:19<2:33:43,  2.74it/s]

Epoch 1 #32050 -- loss: 0.18163665384054184, acc: 0.92875


 56%|█████▌    | 32100/57292 [3:15:38<2:33:25,  2.74it/s]

Epoch 1 #32100 -- loss: 0.1810153592005372, acc: 0.93


 56%|█████▌    | 32150/57292 [3:15:56<2:33:01,  2.74it/s]

Epoch 1 #32150 -- loss: 0.25184407740831377, acc: 0.8925


 56%|█████▌    | 32200/57292 [3:16:14<2:33:19,  2.73it/s]

Epoch 1 #32200 -- loss: 0.19337188139557837, acc: 0.915


 56%|█████▋    | 32250/57292 [3:16:32<2:32:29,  2.74it/s]

Epoch 1 #32250 -- loss: 0.17113324631005525, acc: 0.9375


 56%|█████▋    | 32300/57292 [3:16:51<2:32:17,  2.73it/s]

Epoch 1 #32300 -- loss: 0.2003012267872691, acc: 0.9125


 56%|█████▋    | 32350/57292 [3:17:09<2:32:03,  2.73it/s]

Epoch 1 #32350 -- loss: 0.15035267695784568, acc: 0.93875


 57%|█████▋    | 32400/57292 [3:17:27<2:32:00,  2.73it/s]

Epoch 1 #32400 -- loss: 0.21057283967733384, acc: 0.9275


 57%|█████▋    | 32450/57292 [3:17:46<2:31:32,  2.73it/s]

Epoch 1 #32450 -- loss: 0.17031643591821194, acc: 0.93875


 57%|█████▋    | 32500/57292 [3:18:04<2:31:25,  2.73it/s]

Epoch 1 #32500 -- loss: 0.17307064220309257, acc: 0.93625


 57%|█████▋    | 32550/57292 [3:18:22<2:30:42,  2.74it/s]

Epoch 1 #32550 -- loss: 0.1813558131828904, acc: 0.92


 57%|█████▋    | 32600/57292 [3:18:40<2:30:25,  2.74it/s]

Epoch 1 #32600 -- loss: 0.1948533895984292, acc: 0.92125


 57%|█████▋    | 32650/57292 [3:18:59<2:30:08,  2.74it/s]

Epoch 1 #32650 -- loss: 0.18354540735483169, acc: 0.9275


 57%|█████▋    | 32700/57292 [3:19:17<2:29:44,  2.74it/s]

Epoch 1 #32700 -- loss: 0.16968600697815417, acc: 0.9425


 57%|█████▋    | 32750/57292 [3:19:35<2:29:54,  2.73it/s]

Epoch 1 #32750 -- loss: 0.15253887820988893, acc: 0.93375


 57%|█████▋    | 32800/57292 [3:19:54<2:29:30,  2.73it/s]

Epoch 1 #32800 -- loss: 0.1862166278064251, acc: 0.9275


 57%|█████▋    | 32850/57292 [3:20:12<2:28:45,  2.74it/s]

Epoch 1 #32850 -- loss: 0.17413612406700849, acc: 0.93125


 57%|█████▋    | 32900/57292 [3:20:30<2:28:59,  2.73it/s]

Epoch 1 #32900 -- loss: 0.18516152165830135, acc: 0.9225


 58%|█████▊    | 32950/57292 [3:20:49<2:28:17,  2.74it/s]

Epoch 1 #32950 -- loss: 0.19647206883877516, acc: 0.91875


 58%|█████▊    | 33000/57292 [3:21:07<2:28:12,  2.73it/s]

Epoch 1 #33000 -- loss: 0.17310428977012635, acc: 0.93125


 58%|█████▊    | 33050/57292 [3:21:25<2:27:24,  2.74it/s]

Epoch 1 #33050 -- loss: 0.20938104160130025, acc: 0.91375


 58%|█████▊    | 33100/57292 [3:21:43<2:27:23,  2.74it/s]

Epoch 1 #33100 -- loss: 0.1974620234966278, acc: 0.92375


 58%|█████▊    | 33150/57292 [3:22:02<2:27:04,  2.74it/s]

Epoch 1 #33150 -- loss: 0.17543806917965413, acc: 0.93375


 58%|█████▊    | 33200/57292 [3:22:20<2:27:08,  2.73it/s]

Epoch 1 #33200 -- loss: 0.20812082514166833, acc: 0.92375


 58%|█████▊    | 33250/57292 [3:22:38<2:26:39,  2.73it/s]

Epoch 1 #33250 -- loss: 0.18021483276039363, acc: 0.9375


 58%|█████▊    | 33300/57292 [3:22:57<2:26:20,  2.73it/s]

Epoch 1 #33300 -- loss: 0.14296036202460527, acc: 0.94375


 58%|█████▊    | 33350/57292 [3:23:15<2:25:55,  2.73it/s]

Epoch 1 #33350 -- loss: 0.18231400728225708, acc: 0.935


 58%|█████▊    | 33400/57292 [3:23:33<2:25:30,  2.74it/s]

Epoch 1 #33400 -- loss: 0.17838625825941562, acc: 0.92875


 58%|█████▊    | 33450/57292 [3:23:51<2:25:23,  2.73it/s]

Epoch 1 #33450 -- loss: 0.20155621580779554, acc: 0.91625


 58%|█████▊    | 33500/57292 [3:24:10<2:24:54,  2.74it/s]

Epoch 1 #33500 -- loss: 0.16912826135754586, acc: 0.9375


 59%|█████▊    | 33550/57292 [3:24:28<2:24:36,  2.74it/s]

Epoch 1 #33550 -- loss: 0.19158214308321475, acc: 0.92


 59%|█████▊    | 33600/57292 [3:24:46<2:24:38,  2.73it/s]

Epoch 1 #33600 -- loss: 0.1680457853525877, acc: 0.93625


 59%|█████▊    | 33650/57292 [3:25:05<2:24:05,  2.73it/s]

Epoch 1 #33650 -- loss: 0.18776575610041618, acc: 0.93625


 59%|█████▉    | 33700/57292 [3:25:23<2:23:48,  2.73it/s]

Epoch 1 #33700 -- loss: 0.18342205457389354, acc: 0.9325


 59%|█████▉    | 33750/57292 [3:25:41<2:23:38,  2.73it/s]

Epoch 1 #33750 -- loss: 0.20112072847783566, acc: 0.9175


 59%|█████▉    | 33800/57292 [3:25:59<2:23:16,  2.73it/s]

Epoch 1 #33800 -- loss: 0.17004373878240586, acc: 0.93125


 59%|█████▉    | 33850/57292 [3:26:18<2:23:09,  2.73it/s]

Epoch 1 #33850 -- loss: 0.1665505488216877, acc: 0.93875


 59%|█████▉    | 33900/57292 [3:26:36<2:22:17,  2.74it/s]

Epoch 1 #33900 -- loss: 0.19033150434494017, acc: 0.93125


 59%|█████▉    | 33950/57292 [3:26:54<2:22:18,  2.73it/s]

Epoch 1 #33950 -- loss: 0.16970996282994746, acc: 0.94125


 59%|█████▉    | 34000/57292 [3:27:13<2:21:47,  2.74it/s]

Epoch 1 #34000 -- loss: 0.20443472273647786, acc: 0.92375


 59%|█████▉    | 34050/57292 [3:27:31<2:21:40,  2.73it/s]

Epoch 1 #34050 -- loss: 0.214683997631073, acc: 0.91875


 60%|█████▉    | 34100/57292 [3:27:49<2:21:34,  2.73it/s]

Epoch 1 #34100 -- loss: 0.1733694525808096, acc: 0.9375


 60%|█████▉    | 34150/57292 [3:28:07<2:21:05,  2.73it/s]

Epoch 1 #34150 -- loss: 0.2152660507336259, acc: 0.915


 60%|█████▉    | 34200/57292 [3:28:26<2:20:47,  2.73it/s]

Epoch 1 #34200 -- loss: 0.17249961279332637, acc: 0.94125


 60%|█████▉    | 34250/57292 [3:28:44<2:20:33,  2.73it/s]

Epoch 1 #34250 -- loss: 0.17429660975933076, acc: 0.94125


 60%|█████▉    | 34300/57292 [3:29:02<2:20:03,  2.74it/s]

Epoch 1 #34300 -- loss: 0.14744910791516305, acc: 0.945


 60%|█████▉    | 34350/57292 [3:29:21<2:19:53,  2.73it/s]

Epoch 1 #34350 -- loss: 0.1710223940387368, acc: 0.92875


 60%|██████    | 34400/57292 [3:29:39<2:19:34,  2.73it/s]

Epoch 1 #34400 -- loss: 0.21092407569289207, acc: 0.91625


 60%|██████    | 34450/57292 [3:29:57<2:18:54,  2.74it/s]

Epoch 1 #34450 -- loss: 0.1735321345180273, acc: 0.935


 60%|██████    | 34500/57292 [3:30:15<2:18:53,  2.73it/s]

Epoch 1 #34500 -- loss: 0.170521256364882, acc: 0.935


 60%|██████    | 34550/57292 [3:30:34<2:18:50,  2.73it/s]

Epoch 1 #34550 -- loss: 0.19055171109735966, acc: 0.92


 60%|██████    | 34600/57292 [3:30:52<2:18:17,  2.73it/s]

Epoch 1 #34600 -- loss: 0.18944893676787614, acc: 0.93125


 60%|██████    | 34650/57292 [3:31:10<2:18:12,  2.73it/s]

Epoch 1 #34650 -- loss: 0.20551916401833295, acc: 0.9175


 61%|██████    | 34700/57292 [3:31:29<2:17:43,  2.73it/s]

Epoch 1 #34700 -- loss: 0.17756973303854465, acc: 0.93125


 61%|██████    | 34750/57292 [3:31:47<2:17:15,  2.74it/s]

Epoch 1 #34750 -- loss: 0.16672704715281725, acc: 0.93875


 61%|██████    | 34800/57292 [3:32:05<2:17:01,  2.74it/s]

Epoch 1 #34800 -- loss: 0.20619366519153118, acc: 0.9225


 61%|██████    | 34850/57292 [3:32:23<2:17:00,  2.73it/s]

Epoch 1 #34850 -- loss: 0.16632607135921718, acc: 0.93625


 61%|██████    | 34900/57292 [3:32:42<2:16:40,  2.73it/s]

Epoch 1 #34900 -- loss: 0.1758127611503005, acc: 0.93625


 61%|██████    | 34950/57292 [3:33:00<2:16:00,  2.74it/s]

Epoch 1 #34950 -- loss: 0.1555781425535679, acc: 0.945


 61%|██████    | 35000/57292 [3:33:18<2:15:56,  2.73it/s]

Epoch 1 #35000 -- loss: 0.17710350416600704, acc: 0.9275


 61%|██████    | 35050/57292 [3:33:37<2:15:40,  2.73it/s]

Epoch 1 #35050 -- loss: 0.16997820653021337, acc: 0.93375


 61%|██████▏   | 35100/57292 [3:33:55<2:15:18,  2.73it/s]

Epoch 1 #35100 -- loss: 0.18194025203585626, acc: 0.93


 61%|██████▏   | 35150/57292 [3:34:13<2:15:05,  2.73it/s]

Epoch 1 #35150 -- loss: 0.20596929408609868, acc: 0.9125


 61%|██████▏   | 35200/57292 [3:34:31<2:14:57,  2.73it/s]

Epoch 1 #35200 -- loss: 0.15578531078994273, acc: 0.94125


 62%|██████▏   | 35250/57292 [3:34:50<2:14:24,  2.73it/s]

Epoch 1 #35250 -- loss: 0.20545594781637191, acc: 0.9225


 62%|██████▏   | 35300/57292 [3:35:08<2:14:02,  2.73it/s]

Epoch 1 #35300 -- loss: 0.1888387330621481, acc: 0.925


 62%|██████▏   | 35350/57292 [3:35:26<2:13:41,  2.74it/s]

Epoch 1 #35350 -- loss: 0.19495186410844326, acc: 0.91875


 62%|██████▏   | 35400/57292 [3:35:45<2:13:35,  2.73it/s]

Epoch 1 #35400 -- loss: 0.17329496253281831, acc: 0.92625


 62%|██████▏   | 35450/57292 [3:36:03<2:13:14,  2.73it/s]

Epoch 1 #35450 -- loss: 0.16870064113289118, acc: 0.9375


 62%|██████▏   | 35500/57292 [3:36:21<2:12:51,  2.73it/s]

Epoch 1 #35500 -- loss: 0.19047013971954585, acc: 0.92875


 62%|██████▏   | 35550/57292 [3:36:40<2:12:36,  2.73it/s]

Epoch 1 #35550 -- loss: 0.1711027991771698, acc: 0.9425


 62%|██████▏   | 35600/57292 [3:36:58<2:12:21,  2.73it/s]

Epoch 1 #35600 -- loss: 0.18159855648875237, acc: 0.9325


 62%|██████▏   | 35650/57292 [3:37:16<2:11:44,  2.74it/s]

Epoch 1 #35650 -- loss: 0.20178398698568345, acc: 0.92


 62%|██████▏   | 35700/57292 [3:37:34<2:11:38,  2.73it/s]

Epoch 1 #35700 -- loss: 0.18824156239628792, acc: 0.925


 62%|██████▏   | 35750/57292 [3:37:53<2:11:37,  2.73it/s]

Epoch 1 #35750 -- loss: 0.17514702185988426, acc: 0.92875


 62%|██████▏   | 35800/57292 [3:38:11<2:11:19,  2.73it/s]

Epoch 1 #35800 -- loss: 0.1754222357273102, acc: 0.9375


 63%|██████▎   | 35850/57292 [3:38:29<2:10:41,  2.73it/s]

Epoch 1 #35850 -- loss: 0.18565844126045705, acc: 0.92125


 63%|██████▎   | 35900/57292 [3:38:48<2:10:25,  2.73it/s]

Epoch 1 #35900 -- loss: 0.16995019644498824, acc: 0.935


 63%|██████▎   | 35950/57292 [3:39:06<2:10:11,  2.73it/s]

Epoch 1 #35950 -- loss: 0.20061326272785662, acc: 0.92375


 63%|██████▎   | 36000/57292 [3:39:24<2:09:43,  2.74it/s]

Epoch 1 #36000 -- loss: 0.20387446880340576, acc: 0.9225


 63%|██████▎   | 36050/57292 [3:39:42<2:09:20,  2.74it/s]

Epoch 1 #36050 -- loss: 0.20526526626199484, acc: 0.9125


 63%|██████▎   | 36100/57292 [3:40:01<2:09:05,  2.74it/s]

Epoch 1 #36100 -- loss: 0.19848357450217008, acc: 0.9275


 63%|██████▎   | 36150/57292 [3:40:19<2:08:57,  2.73it/s]

Epoch 1 #36150 -- loss: 0.22380035039037466, acc: 0.9075


 63%|██████▎   | 36200/57292 [3:40:37<2:08:42,  2.73it/s]

Epoch 1 #36200 -- loss: 0.1912165505439043, acc: 0.93


 63%|██████▎   | 36250/57292 [3:40:56<2:08:23,  2.73it/s]

Epoch 1 #36250 -- loss: 0.2082636881247163, acc: 0.91375


 63%|██████▎   | 36300/57292 [3:41:14<2:07:56,  2.73it/s]

Epoch 1 #36300 -- loss: 0.1905165646225214, acc: 0.925


 63%|██████▎   | 36350/57292 [3:41:32<2:07:40,  2.73it/s]

Epoch 1 #36350 -- loss: 0.1658952483534813, acc: 0.93625


 64%|██████▎   | 36400/57292 [3:41:50<2:07:32,  2.73it/s]

Epoch 1 #36400 -- loss: 0.19193550273776055, acc: 0.93375


 64%|██████▎   | 36450/57292 [3:42:09<2:07:03,  2.73it/s]

Epoch 1 #36450 -- loss: 0.18738309983164073, acc: 0.92


 64%|██████▎   | 36500/57292 [3:42:27<2:06:44,  2.73it/s]

Epoch 1 #36500 -- loss: 0.18791755907237528, acc: 0.9275


 64%|██████▍   | 36550/57292 [3:42:45<2:06:29,  2.73it/s]

Epoch 1 #36550 -- loss: 0.17191557824611664, acc: 0.93125


 64%|██████▍   | 36600/57292 [3:43:04<2:06:13,  2.73it/s]

Epoch 1 #36600 -- loss: 0.23068172588944436, acc: 0.9175


 64%|██████▍   | 36650/57292 [3:43:22<2:05:46,  2.74it/s]

Epoch 1 #36650 -- loss: 0.19760040894150735, acc: 0.92125


 64%|██████▍   | 36700/57292 [3:43:40<2:05:27,  2.74it/s]

Epoch 1 #36700 -- loss: 0.15686057906597853, acc: 0.93875


 64%|██████▍   | 36750/57292 [3:43:58<2:05:11,  2.73it/s]

Epoch 1 #36750 -- loss: 0.1908442920446396, acc: 0.90875


 64%|██████▍   | 36800/57292 [3:44:17<2:05:04,  2.73it/s]

Epoch 1 #36800 -- loss: 0.19863741669803858, acc: 0.915


 64%|██████▍   | 36850/57292 [3:44:35<2:04:42,  2.73it/s]

Epoch 1 #36850 -- loss: 0.22543241776525974, acc: 0.91625


 64%|██████▍   | 36900/57292 [3:44:53<2:04:29,  2.73it/s]

Epoch 1 #36900 -- loss: 0.2034864952415228, acc: 0.91625


 64%|██████▍   | 36950/57292 [3:45:12<2:04:06,  2.73it/s]

Epoch 1 #36950 -- loss: 0.18049086820334195, acc: 0.935


 65%|██████▍   | 37000/57292 [3:45:30<2:03:39,  2.74it/s]

Epoch 1 #37000 -- loss: 0.19629830811172724, acc: 0.9225


 65%|██████▍   | 37050/57292 [3:45:48<2:03:43,  2.73it/s]

Epoch 1 #37050 -- loss: 0.18313190024346113, acc: 0.9325


 65%|██████▍   | 37100/57292 [3:46:07<2:03:10,  2.73it/s]

Epoch 1 #37100 -- loss: 0.19754809338599444, acc: 0.91625


 65%|██████▍   | 37150/57292 [3:46:25<2:02:46,  2.73it/s]

Epoch 1 #37150 -- loss: 0.1902750950306654, acc: 0.93


 65%|██████▍   | 37200/57292 [3:46:43<2:02:34,  2.73it/s]

Epoch 1 #37200 -- loss: 0.1890983662009239, acc: 0.92875


 65%|██████▌   | 37250/57292 [3:47:01<2:02:08,  2.73it/s]

Epoch 1 #37250 -- loss: 0.18096625190228224, acc: 0.93


 65%|██████▌   | 37300/57292 [3:47:20<2:01:46,  2.74it/s]

Epoch 1 #37300 -- loss: 0.2052516270428896, acc: 0.91125


 65%|██████▌   | 37350/57292 [3:47:38<2:01:50,  2.73it/s]

Epoch 1 #37350 -- loss: 0.16606017716228963, acc: 0.935


 65%|██████▌   | 37400/57292 [3:47:56<2:01:14,  2.73it/s]

Epoch 1 #37400 -- loss: 0.17499352779239416, acc: 0.9275


 65%|██████▌   | 37450/57292 [3:48:15<2:00:52,  2.74it/s]

Epoch 1 #37450 -- loss: 0.17223661497235299, acc: 0.93


 65%|██████▌   | 37500/57292 [3:48:33<2:00:43,  2.73it/s]

Epoch 1 #37500 -- loss: 0.19484199419617654, acc: 0.92125


 66%|██████▌   | 37550/57292 [3:48:51<2:00:44,  2.73it/s]

Epoch 1 #37550 -- loss: 0.1882308391481638, acc: 0.925


 66%|██████▌   | 37600/57292 [3:49:09<2:00:09,  2.73it/s]

Epoch 1 #37600 -- loss: 0.2108724534139037, acc: 0.90875


 66%|██████▌   | 37650/57292 [3:49:28<1:59:54,  2.73it/s]

Epoch 1 #37650 -- loss: 0.18989826306700708, acc: 0.93


 66%|██████▌   | 37700/57292 [3:49:46<1:59:35,  2.73it/s]

Epoch 1 #37700 -- loss: 0.16419020984321833, acc: 0.94375


 66%|██████▌   | 37750/57292 [3:50:04<1:59:19,  2.73it/s]

Epoch 1 #37750 -- loss: 0.2065175899863243, acc: 0.9225


 66%|██████▌   | 37800/57292 [3:50:23<1:58:52,  2.73it/s]

Epoch 1 #37800 -- loss: 0.180828370526433, acc: 0.92625


 66%|██████▌   | 37850/57292 [3:50:41<1:58:12,  2.74it/s]

Epoch 1 #37850 -- loss: 0.1753838451206684, acc: 0.93


 66%|██████▌   | 37900/57292 [3:50:59<1:58:01,  2.74it/s]

Epoch 1 #37900 -- loss: 0.218737107552588, acc: 0.91375


 66%|██████▌   | 37950/57292 [3:51:18<1:58:01,  2.73it/s]

Epoch 1 #37950 -- loss: 0.1898925532028079, acc: 0.9275


 66%|██████▋   | 38000/57292 [3:51:36<1:57:41,  2.73it/s]

Epoch 1 #38000 -- loss: 0.16409257996827364, acc: 0.9425


 66%|██████▋   | 38050/57292 [3:51:54<1:57:20,  2.73it/s]

Epoch 1 #38050 -- loss: 0.193166837207973, acc: 0.92875


 67%|██████▋   | 38100/57292 [3:52:12<1:56:55,  2.74it/s]

Epoch 1 #38100 -- loss: 0.179597567692399, acc: 0.92125


 67%|██████▋   | 38150/57292 [3:52:31<1:56:55,  2.73it/s]

Epoch 1 #38150 -- loss: 0.18850408129394056, acc: 0.9275


 67%|██████▋   | 38200/57292 [3:52:49<1:56:36,  2.73it/s]

Epoch 1 #38200 -- loss: 0.13803360596299172, acc: 0.9475


 67%|██████▋   | 38250/57292 [3:53:07<1:56:01,  2.74it/s]

Epoch 1 #38250 -- loss: 0.1968220068514347, acc: 0.9275


 67%|██████▋   | 38300/57292 [3:53:26<1:55:37,  2.74it/s]

Epoch 1 #38300 -- loss: 0.18109933137893677, acc: 0.93375


 67%|██████▋   | 38350/57292 [3:53:44<1:55:23,  2.74it/s]

Epoch 1 #38350 -- loss: 0.1770707792788744, acc: 0.93125


 67%|██████▋   | 38400/57292 [3:54:02<1:54:56,  2.74it/s]

Epoch 1 #38400 -- loss: 0.17989904433488846, acc: 0.93625


 67%|██████▋   | 38450/57292 [3:54:20<1:54:59,  2.73it/s]

Epoch 1 #38450 -- loss: 0.25054654277861116, acc: 0.9025


 67%|██████▋   | 38500/57292 [3:54:39<1:54:28,  2.74it/s]

Epoch 1 #38500 -- loss: 0.18994666926562787, acc: 0.92875


 67%|██████▋   | 38550/57292 [3:54:57<1:54:14,  2.73it/s]

Epoch 1 #38550 -- loss: 0.19527852579951285, acc: 0.91375


 67%|██████▋   | 38600/57292 [3:55:15<1:53:57,  2.73it/s]

Epoch 1 #38600 -- loss: 0.20872398287057878, acc: 0.91875


 67%|██████▋   | 38650/57292 [3:55:34<1:53:25,  2.74it/s]

Epoch 1 #38650 -- loss: 0.20155034974217415, acc: 0.92


 68%|██████▊   | 38700/57292 [3:55:52<1:53:20,  2.73it/s]

Epoch 1 #38700 -- loss: 0.1821232706680894, acc: 0.935


 68%|██████▊   | 38750/57292 [3:56:10<1:53:01,  2.73it/s]

Epoch 1 #38750 -- loss: 0.18829626135528088, acc: 0.91875


 68%|██████▊   | 38800/57292 [3:56:28<1:52:46,  2.73it/s]

Epoch 1 #38800 -- loss: 0.19543256767094136, acc: 0.9175


 68%|██████▊   | 38850/57292 [3:56:47<1:52:22,  2.73it/s]

Epoch 1 #38850 -- loss: 0.16419942576438187, acc: 0.93625


 68%|██████▊   | 38900/57292 [3:57:05<1:52:13,  2.73it/s]

Epoch 1 #38900 -- loss: 0.2101676494628191, acc: 0.9275


 68%|██████▊   | 38950/57292 [3:57:23<1:51:33,  2.74it/s]

Epoch 1 #38950 -- loss: 0.1632771125435829, acc: 0.93875


 68%|██████▊   | 39000/57292 [3:57:41<1:51:29,  2.73it/s]

Epoch 1 #39000 -- loss: 0.2224742928892374, acc: 0.91625


 68%|██████▊   | 39050/57292 [3:58:00<1:51:03,  2.74it/s]

Epoch 1 #39050 -- loss: 0.17175888873636722, acc: 0.93875


 68%|██████▊   | 39100/57292 [3:58:18<1:50:45,  2.74it/s]

Epoch 1 #39100 -- loss: 0.19925888650119306, acc: 0.92125


 68%|██████▊   | 39150/57292 [3:58:36<1:50:42,  2.73it/s]

Epoch 1 #39150 -- loss: 0.21837081365287303, acc: 0.92


 68%|██████▊   | 39200/57292 [3:58:55<1:50:27,  2.73it/s]

Epoch 1 #39200 -- loss: 0.183063937202096, acc: 0.92625


 69%|██████▊   | 39250/57292 [3:59:13<1:49:55,  2.74it/s]

Epoch 1 #39250 -- loss: 0.16761653382331132, acc: 0.94375


 69%|██████▊   | 39300/57292 [3:59:31<1:49:42,  2.73it/s]

Epoch 1 #39300 -- loss: 0.19767094939947127, acc: 0.92


 69%|██████▊   | 39350/57292 [3:59:49<1:49:15,  2.74it/s]

Epoch 1 #39350 -- loss: 0.17735285293310882, acc: 0.93


 69%|██████▉   | 39400/57292 [4:00:08<1:49:02,  2.73it/s]

Epoch 1 #39400 -- loss: 0.1660220906510949, acc: 0.93625


 69%|██████▉   | 39450/57292 [4:00:26<1:48:41,  2.74it/s]

Epoch 1 #39450 -- loss: 0.1686074861884117, acc: 0.93875


 69%|██████▉   | 39500/57292 [4:00:44<1:48:26,  2.73it/s]

Epoch 1 #39500 -- loss: 0.19221795007586479, acc: 0.9275


 69%|██████▉   | 39550/57292 [4:01:03<1:48:06,  2.74it/s]

Epoch 1 #39550 -- loss: 0.1730588658154011, acc: 0.93625


 69%|██████▉   | 39600/57292 [4:01:21<1:47:47,  2.74it/s]

Epoch 1 #39600 -- loss: 0.20005345080047846, acc: 0.92125


 69%|██████▉   | 39650/57292 [4:01:39<1:47:32,  2.73it/s]

Epoch 1 #39650 -- loss: 0.17552444383502006, acc: 0.9325


 69%|██████▉   | 39700/57292 [4:01:57<1:47:13,  2.73it/s]

Epoch 1 #39700 -- loss: 0.1885742290318012, acc: 0.92375


 69%|██████▉   | 39750/57292 [4:02:16<1:46:40,  2.74it/s]

Epoch 1 #39750 -- loss: 0.1921531406417489, acc: 0.92875


 69%|██████▉   | 39800/57292 [4:02:34<1:46:32,  2.74it/s]

Epoch 1 #39800 -- loss: 0.1608738860115409, acc: 0.94


 70%|██████▉   | 39850/57292 [4:02:52<1:46:15,  2.74it/s]

Epoch 1 #39850 -- loss: 0.1705746216326952, acc: 0.93875


 70%|██████▉   | 39900/57292 [4:03:11<1:46:06,  2.73it/s]

Epoch 1 #39900 -- loss: 0.16980925042182207, acc: 0.94125


 70%|██████▉   | 39950/57292 [4:03:29<1:45:33,  2.74it/s]

Epoch 1 #39950 -- loss: 0.17549528285861016, acc: 0.9325


 70%|██████▉   | 40000/57292 [4:03:47<1:45:17,  2.74it/s]

Epoch 1 #40000 -- loss: 0.20170695014297962, acc: 0.91875


 70%|██████▉   | 40050/57292 [4:04:05<1:44:57,  2.74it/s]

Epoch 1 #40050 -- loss: 0.1936147141084075, acc: 0.92375


 70%|██████▉   | 40100/57292 [4:04:24<1:44:39,  2.74it/s]

Epoch 1 #40100 -- loss: 0.18069417007267474, acc: 0.92375


 70%|███████   | 40150/57292 [4:04:42<1:44:35,  2.73it/s]

Epoch 1 #40150 -- loss: 0.1989974609017372, acc: 0.9125


 70%|███████   | 40200/57292 [4:05:00<1:44:06,  2.74it/s]

Epoch 1 #40200 -- loss: 0.19346800610423087, acc: 0.9275


 70%|███████   | 40250/57292 [4:05:18<1:43:53,  2.73it/s]

Epoch 1 #40250 -- loss: 0.18137574348598717, acc: 0.935


 70%|███████   | 40300/57292 [4:05:37<1:43:24,  2.74it/s]

Epoch 1 #40300 -- loss: 0.17712100353091956, acc: 0.93375


 70%|███████   | 40350/57292 [4:05:55<1:43:17,  2.73it/s]

Epoch 1 #40350 -- loss: 0.1967194301262498, acc: 0.93


 71%|███████   | 40400/57292 [4:06:13<1:43:00,  2.73it/s]

Epoch 1 #40400 -- loss: 0.19297126218676566, acc: 0.93


 71%|███████   | 40450/57292 [4:06:32<1:42:39,  2.73it/s]

Epoch 1 #40450 -- loss: 0.20249783962965012, acc: 0.92375


 71%|███████   | 40500/57292 [4:06:50<1:42:28,  2.73it/s]

Epoch 1 #40500 -- loss: 0.1624926983565092, acc: 0.9375


 71%|███████   | 40550/57292 [4:07:08<1:42:01,  2.73it/s]

Epoch 1 #40550 -- loss: 0.19395403992384672, acc: 0.9225


 71%|███████   | 40600/57292 [4:07:26<1:41:39,  2.74it/s]

Epoch 1 #40600 -- loss: 0.16329155676066875, acc: 0.9375


 71%|███████   | 40650/57292 [4:07:45<1:41:39,  2.73it/s]

Epoch 1 #40650 -- loss: 0.19743353370577096, acc: 0.92375


 71%|███████   | 40700/57292 [4:08:03<1:41:19,  2.73it/s]

Epoch 1 #40700 -- loss: 0.19036569260060787, acc: 0.925


 71%|███████   | 40750/57292 [4:08:21<1:40:40,  2.74it/s]

Epoch 1 #40750 -- loss: 0.16985324010252953, acc: 0.93625


 71%|███████   | 40800/57292 [4:08:40<1:40:27,  2.74it/s]

Epoch 1 #40800 -- loss: 0.16153852608054875, acc: 0.9375


 71%|███████▏  | 40850/57292 [4:08:58<1:40:12,  2.73it/s]

Epoch 1 #40850 -- loss: 0.16996366068720817, acc: 0.94125


 71%|███████▏  | 40900/57292 [4:09:16<1:39:46,  2.74it/s]

Epoch 1 #40900 -- loss: 0.18604810379445552, acc: 0.93


 71%|███████▏  | 40950/57292 [4:09:34<1:39:27,  2.74it/s]

Epoch 1 #40950 -- loss: 0.15849059082567693, acc: 0.93375


 72%|███████▏  | 41000/57292 [4:09:53<1:39:14,  2.74it/s]

Epoch 1 #41000 -- loss: 0.1840656090900302, acc: 0.92375


 72%|███████▏  | 41050/57292 [4:10:11<1:38:58,  2.73it/s]

Epoch 1 #41050 -- loss: 0.15547417908906935, acc: 0.93625


 72%|███████▏  | 41100/57292 [4:10:29<1:38:52,  2.73it/s]

Epoch 1 #41100 -- loss: 0.20508787237107753, acc: 0.9175


 72%|███████▏  | 41150/57292 [4:10:48<1:38:27,  2.73it/s]

Epoch 1 #41150 -- loss: 0.17519831359386445, acc: 0.93125


 72%|███████▏  | 41200/57292 [4:11:06<1:38:03,  2.74it/s]

Epoch 1 #41200 -- loss: 0.18484410785138608, acc: 0.93


 72%|███████▏  | 41250/57292 [4:11:24<1:37:44,  2.74it/s]

Epoch 1 #41250 -- loss: 0.16523029554635285, acc: 0.93625


 72%|███████▏  | 41300/57292 [4:11:42<1:37:28,  2.73it/s]

Epoch 1 #41300 -- loss: 0.15217108838260174, acc: 0.94375


 72%|███████▏  | 41350/57292 [4:12:01<1:37:13,  2.73it/s]

Epoch 1 #41350 -- loss: 0.21105834752321243, acc: 0.9175


 72%|███████▏  | 41400/57292 [4:12:19<1:36:41,  2.74it/s]

Epoch 1 #41400 -- loss: 0.14871194429695606, acc: 0.94


 72%|███████▏  | 41450/57292 [4:12:37<1:36:19,  2.74it/s]

Epoch 1 #41450 -- loss: 0.1952632585912943, acc: 0.92875


 72%|███████▏  | 41500/57292 [4:12:55<1:36:18,  2.73it/s]

Epoch 1 #41500 -- loss: 0.1865914599597454, acc: 0.92625


 73%|███████▎  | 41550/57292 [4:13:14<1:35:56,  2.73it/s]

Epoch 1 #41550 -- loss: 0.1942909589409828, acc: 0.91125


 73%|███████▎  | 41600/57292 [4:13:32<1:35:34,  2.74it/s]

Epoch 1 #41600 -- loss: 0.1768971808999777, acc: 0.9275


 73%|███████▎  | 41650/57292 [4:13:50<1:35:23,  2.73it/s]

Epoch 1 #41650 -- loss: 0.20474607296288014, acc: 0.91


 73%|███████▎  | 41700/57292 [4:14:09<1:34:59,  2.74it/s]

Epoch 1 #41700 -- loss: 0.18029728569090367, acc: 0.93375


 73%|███████▎  | 41750/57292 [4:14:27<1:34:50,  2.73it/s]

Epoch 1 #41750 -- loss: 0.18663549832999707, acc: 0.92875


 73%|███████▎  | 41800/57292 [4:14:45<1:34:21,  2.74it/s]

Epoch 1 #41800 -- loss: 0.20481916517019272, acc: 0.92


 73%|███████▎  | 41850/57292 [4:15:03<1:34:09,  2.73it/s]

Epoch 1 #41850 -- loss: 0.17956544801592828, acc: 0.94


 73%|███████▎  | 41900/57292 [4:15:22<1:34:00,  2.73it/s]

Epoch 1 #41900 -- loss: 0.1953187654912472, acc: 0.92


 73%|███████▎  | 41950/57292 [4:15:40<1:33:35,  2.73it/s]

Epoch 1 #41950 -- loss: 0.15331650204956532, acc: 0.93875


 73%|███████▎  | 42000/57292 [4:15:58<1:33:23,  2.73it/s]

Epoch 1 #42000 -- loss: 0.18232572834938765, acc: 0.93375


 73%|███████▎  | 42050/57292 [4:16:17<1:32:55,  2.73it/s]

Epoch 1 #42050 -- loss: 0.17201045244932175, acc: 0.93875


 73%|███████▎  | 42100/57292 [4:16:35<1:32:31,  2.74it/s]

Epoch 1 #42100 -- loss: 0.19441883742809296, acc: 0.92


 74%|███████▎  | 42150/57292 [4:16:53<1:32:08,  2.74it/s]

Epoch 1 #42150 -- loss: 0.18252425320446491, acc: 0.92625


 74%|███████▎  | 42200/57292 [4:17:12<1:31:51,  2.74it/s]

Epoch 1 #42200 -- loss: 0.1810651072859764, acc: 0.9275


 74%|███████▎  | 42250/57292 [4:17:30<1:31:42,  2.73it/s]

Epoch 1 #42250 -- loss: 0.1844690949842334, acc: 0.9275


 74%|███████▍  | 42300/57292 [4:17:48<1:31:24,  2.73it/s]

Epoch 1 #42300 -- loss: 0.19810468852519988, acc: 0.9125


 74%|███████▍  | 42350/57292 [4:18:06<1:31:10,  2.73it/s]

Epoch 1 #42350 -- loss: 0.2179160800203681, acc: 0.90375


 74%|███████▍  | 42400/57292 [4:18:25<1:30:45,  2.73it/s]

Epoch 1 #42400 -- loss: 0.14153799030929803, acc: 0.9475


 74%|███████▍  | 42450/57292 [4:18:43<1:30:26,  2.74it/s]

Epoch 1 #42450 -- loss: 0.19918622002005576, acc: 0.91625


 74%|███████▍  | 42500/57292 [4:19:01<1:30:14,  2.73it/s]

Epoch 1 #42500 -- loss: 0.21762516297399997, acc: 0.92


 74%|███████▍  | 42550/57292 [4:19:20<1:30:16,  2.72it/s]

Epoch 1 #42550 -- loss: 0.19218232788145542, acc: 0.91875


 74%|███████▍  | 42600/57292 [4:19:38<1:29:30,  2.74it/s]

Epoch 1 #42600 -- loss: 0.14663874100893737, acc: 0.93625


 74%|███████▍  | 42650/57292 [4:19:56<1:29:14,  2.73it/s]

Epoch 1 #42650 -- loss: 0.17041836008429528, acc: 0.92875


 75%|███████▍  | 42700/57292 [4:20:14<1:28:56,  2.73it/s]

Epoch 1 #42700 -- loss: 0.1597196003049612, acc: 0.945


 75%|███████▍  | 42750/57292 [4:20:33<1:28:46,  2.73it/s]

Epoch 1 #42750 -- loss: 0.16129947829991578, acc: 0.94375


 75%|███████▍  | 42800/57292 [4:20:51<1:28:12,  2.74it/s]

Epoch 1 #42800 -- loss: 0.20773918624967336, acc: 0.91875


 75%|███████▍  | 42850/57292 [4:21:09<1:28:07,  2.73it/s]

Epoch 1 #42850 -- loss: 0.17398524526506662, acc: 0.9425


 75%|███████▍  | 42900/57292 [4:21:28<1:27:51,  2.73it/s]

Epoch 1 #42900 -- loss: 0.21459005780518056, acc: 0.92375


 75%|███████▍  | 42950/57292 [4:21:46<1:27:37,  2.73it/s]

Epoch 1 #42950 -- loss: 0.20401532284915447, acc: 0.91375


 75%|███████▌  | 43000/57292 [4:22:04<1:27:16,  2.73it/s]

Epoch 1 #43000 -- loss: 0.17527046747505665, acc: 0.935


 75%|███████▌  | 43050/57292 [4:22:23<1:26:48,  2.73it/s]

Epoch 1 #43050 -- loss: 0.17996082305908204, acc: 0.93


 75%|███████▌  | 43100/57292 [4:22:41<1:26:27,  2.74it/s]

Epoch 1 #43100 -- loss: 0.19113244652748107, acc: 0.925


 75%|███████▌  | 43150/57292 [4:22:59<1:26:33,  2.72it/s]

Epoch 1 #43150 -- loss: 0.17209757156670094, acc: 0.92125


 75%|███████▌  | 43200/57292 [4:23:17<1:25:58,  2.73it/s]

Epoch 1 #43200 -- loss: 0.19208232410252093, acc: 0.925


 75%|███████▌  | 43250/57292 [4:23:36<1:25:49,  2.73it/s]

Epoch 1 #43250 -- loss: 0.2053499311208725, acc: 0.92125


 76%|███████▌  | 43300/57292 [4:23:54<1:25:24,  2.73it/s]

Epoch 1 #43300 -- loss: 0.19056995678693056, acc: 0.9225


 76%|███████▌  | 43350/57292 [4:24:12<1:25:06,  2.73it/s]

Epoch 1 #43350 -- loss: 0.21800120137631893, acc: 0.91875


 76%|███████▌  | 43400/57292 [4:24:31<1:24:39,  2.74it/s]

Epoch 1 #43400 -- loss: 0.19660349190235138, acc: 0.93375


 76%|███████▌  | 43450/57292 [4:24:49<1:24:24,  2.73it/s]

Epoch 1 #43450 -- loss: 0.16088941387832165, acc: 0.93875


 76%|███████▌  | 43500/57292 [4:25:07<1:24:02,  2.73it/s]

Epoch 1 #43500 -- loss: 0.1780518002063036, acc: 0.93


 76%|███████▌  | 43550/57292 [4:25:26<1:23:48,  2.73it/s]

Epoch 1 #43550 -- loss: 0.17570795446634294, acc: 0.93875


 76%|███████▌  | 43600/57292 [4:25:44<1:23:29,  2.73it/s]

Epoch 1 #43600 -- loss: 0.21414239168167115, acc: 0.91625


 76%|███████▌  | 43650/57292 [4:26:02<1:23:04,  2.74it/s]

Epoch 1 #43650 -- loss: 0.16430358231067657, acc: 0.93625


 76%|███████▋  | 43700/57292 [4:26:20<1:23:06,  2.73it/s]

Epoch 1 #43700 -- loss: 0.14848096262663601, acc: 0.94


 76%|███████▋  | 43750/57292 [4:26:39<1:22:34,  2.73it/s]

Epoch 1 #43750 -- loss: 0.17880448173731567, acc: 0.92875


 76%|███████▋  | 43800/57292 [4:26:57<1:22:12,  2.74it/s]

Epoch 1 #43800 -- loss: 0.16271240234375, acc: 0.935


 77%|███████▋  | 43850/57292 [4:27:15<1:21:52,  2.74it/s]

Epoch 1 #43850 -- loss: 0.2185332317650318, acc: 0.91


 77%|███████▋  | 43900/57292 [4:27:34<1:21:38,  2.73it/s]

Epoch 1 #43900 -- loss: 0.18054204571992158, acc: 0.93625


 77%|███████▋  | 43950/57292 [4:27:52<1:21:25,  2.73it/s]

Epoch 1 #43950 -- loss: 0.18258405227214097, acc: 0.94125


 77%|███████▋  | 44000/57292 [4:28:10<1:21:05,  2.73it/s]

Epoch 1 #44000 -- loss: 0.1587200090289116, acc: 0.93875


 77%|███████▋  | 44050/57292 [4:28:29<1:20:40,  2.74it/s]

Epoch 1 #44050 -- loss: 0.181429520919919, acc: 0.93


 77%|███████▋  | 44100/57292 [4:28:47<1:20:20,  2.74it/s]

Epoch 1 #44100 -- loss: 0.1775766720250249, acc: 0.93


 77%|███████▋  | 44150/57292 [4:29:05<1:20:05,  2.73it/s]

Epoch 1 #44150 -- loss: 0.20109374165534974, acc: 0.92875


 77%|███████▋  | 44200/57292 [4:29:23<1:19:45,  2.74it/s]

Epoch 1 #44200 -- loss: 0.19689192440360784, acc: 0.92125


 77%|███████▋  | 44250/57292 [4:29:42<1:19:19,  2.74it/s]

Epoch 1 #44250 -- loss: 0.18669462244957685, acc: 0.93


 77%|███████▋  | 44300/57292 [4:30:00<1:19:14,  2.73it/s]

Epoch 1 #44300 -- loss: 0.16227683387696742, acc: 0.9375


 77%|███████▋  | 44350/57292 [4:30:18<1:18:50,  2.74it/s]

Epoch 1 #44350 -- loss: 0.1882185149937868, acc: 0.91625


 77%|███████▋  | 44400/57292 [4:30:36<1:18:32,  2.74it/s]

Epoch 1 #44400 -- loss: 0.18768729280680418, acc: 0.92625


 78%|███████▊  | 44450/57292 [4:30:55<1:18:27,  2.73it/s]

Epoch 1 #44450 -- loss: 0.1951431592553854, acc: 0.925


 78%|███████▊  | 44500/57292 [4:31:13<1:17:55,  2.74it/s]

Epoch 1 #44500 -- loss: 0.1709484503790736, acc: 0.93875


 78%|███████▊  | 44550/57292 [4:31:31<1:17:36,  2.74it/s]

Epoch 1 #44550 -- loss: 0.15645585916936397, acc: 0.9425


 78%|███████▊  | 44600/57292 [4:31:50<1:17:17,  2.74it/s]

Epoch 1 #44600 -- loss: 0.20735773392021656, acc: 0.9075


 78%|███████▊  | 44650/57292 [4:32:08<1:16:58,  2.74it/s]

Epoch 1 #44650 -- loss: 0.17697828747332095, acc: 0.93125


 78%|███████▊  | 44700/57292 [4:32:26<1:16:36,  2.74it/s]

Epoch 1 #44700 -- loss: 0.17248864479362966, acc: 0.93625


 78%|███████▊  | 44750/57292 [4:32:44<1:16:28,  2.73it/s]

Epoch 1 #44750 -- loss: 0.18118727017194033, acc: 0.9275


 78%|███████▊  | 44800/57292 [4:33:03<1:16:18,  2.73it/s]

Epoch 1 #44800 -- loss: 0.17352611012756824, acc: 0.93375


 78%|███████▊  | 44850/57292 [4:33:21<1:15:57,  2.73it/s]

Epoch 1 #44850 -- loss: 0.18977711752057075, acc: 0.92875


 78%|███████▊  | 44900/57292 [4:33:39<1:15:34,  2.73it/s]

Epoch 1 #44900 -- loss: 0.19667495686560868, acc: 0.93125


 78%|███████▊  | 44950/57292 [4:33:58<1:15:13,  2.73it/s]

Epoch 1 #44950 -- loss: 0.16600599139928818, acc: 0.93125


 79%|███████▊  | 45000/57292 [4:34:16<1:14:56,  2.73it/s]

Epoch 1 #45000 -- loss: 0.1908674906939268, acc: 0.935


 79%|███████▊  | 45050/57292 [4:34:34<1:14:37,  2.73it/s]

Epoch 1 #45050 -- loss: 0.20650924198329448, acc: 0.9225


 79%|███████▊  | 45100/57292 [4:34:52<1:14:13,  2.74it/s]

Epoch 1 #45100 -- loss: 0.1861179667338729, acc: 0.92375


 79%|███████▉  | 45150/57292 [4:35:11<1:14:02,  2.73it/s]

Epoch 1 #45150 -- loss: 0.2039530200138688, acc: 0.92375


 79%|███████▉  | 45200/57292 [4:35:29<1:13:43,  2.73it/s]

Epoch 1 #45200 -- loss: 0.19247638404369355, acc: 0.92375


 79%|███████▉  | 45250/57292 [4:35:47<1:13:25,  2.73it/s]

Epoch 1 #45250 -- loss: 0.17500993341207505, acc: 0.9275


 79%|███████▉  | 45300/57292 [4:36:06<1:13:07,  2.73it/s]

Epoch 1 #45300 -- loss: 0.2059715824574232, acc: 0.92625


 79%|███████▉  | 45350/57292 [4:36:24<1:12:52,  2.73it/s]

Epoch 1 #45350 -- loss: 0.20315253268927336, acc: 0.9175


 79%|███████▉  | 45400/57292 [4:36:42<1:12:32,  2.73it/s]

Epoch 1 #45400 -- loss: 0.18982502982020377, acc: 0.92625


 79%|███████▉  | 45450/57292 [4:37:01<1:12:12,  2.73it/s]

Epoch 1 #45450 -- loss: 0.2136209996789694, acc: 0.92125


 79%|███████▉  | 45500/57292 [4:37:19<1:11:56,  2.73it/s]

Epoch 1 #45500 -- loss: 0.1741430503129959, acc: 0.9325


 80%|███████▉  | 45550/57292 [4:37:37<1:11:35,  2.73it/s]

Epoch 1 #45550 -- loss: 0.17879458103328943, acc: 0.93125


 80%|███████▉  | 45600/57292 [4:37:55<1:11:13,  2.74it/s]

Epoch 1 #45600 -- loss: 0.18930165529251097, acc: 0.92625


 80%|███████▉  | 45650/57292 [4:38:14<1:10:59,  2.73it/s]

Epoch 1 #45650 -- loss: 0.17353679075837136, acc: 0.93


 80%|███████▉  | 45700/57292 [4:38:32<1:10:46,  2.73it/s]

Epoch 1 #45700 -- loss: 0.17953016221523285, acc: 0.92875


 80%|███████▉  | 45750/57292 [4:38:50<1:10:22,  2.73it/s]

Epoch 1 #45750 -- loss: 0.1555239137634635, acc: 0.935


 80%|███████▉  | 45800/57292 [4:39:09<1:10:05,  2.73it/s]

Epoch 1 #45800 -- loss: 0.19866056367754936, acc: 0.9225


 80%|████████  | 45850/57292 [4:39:27<1:09:39,  2.74it/s]

Epoch 1 #45850 -- loss: 0.17459854256361723, acc: 0.93


 80%|████████  | 45900/57292 [4:39:45<1:09:19,  2.74it/s]

Epoch 1 #45900 -- loss: 0.195043600872159, acc: 0.925


 80%|████████  | 45950/57292 [4:40:03<1:09:05,  2.74it/s]

Epoch 1 #45950 -- loss: 0.19389766689389945, acc: 0.92125


 80%|████████  | 46000/57292 [4:40:22<1:08:56,  2.73it/s]

Epoch 1 #46000 -- loss: 0.1995466547086835, acc: 0.9225


 80%|████████  | 46050/57292 [4:40:40<1:08:27,  2.74it/s]

Epoch 1 #46050 -- loss: 0.1828963029012084, acc: 0.93375


 80%|████████  | 46100/57292 [4:40:58<1:08:17,  2.73it/s]

Epoch 1 #46100 -- loss: 0.19542089946568011, acc: 0.91625


 81%|████████  | 46150/57292 [4:41:17<1:07:57,  2.73it/s]

Epoch 1 #46150 -- loss: 0.17722647733986377, acc: 0.9375


 81%|████████  | 46200/57292 [4:41:35<1:07:35,  2.73it/s]

Epoch 1 #46200 -- loss: 0.16064983621239662, acc: 0.94


 81%|████████  | 46250/57292 [4:41:53<1:07:15,  2.74it/s]

Epoch 1 #46250 -- loss: 0.18047728803008795, acc: 0.92375


 81%|████████  | 46300/57292 [4:42:11<1:07:01,  2.73it/s]

Epoch 1 #46300 -- loss: 0.1924350880086422, acc: 0.9225


 81%|████████  | 46350/57292 [4:42:30<1:06:34,  2.74it/s]

Epoch 1 #46350 -- loss: 0.1892035648226738, acc: 0.925


 81%|████████  | 46400/57292 [4:42:48<1:06:30,  2.73it/s]

Epoch 1 #46400 -- loss: 0.21332126654684544, acc: 0.91


 81%|████████  | 46450/57292 [4:43:06<1:06:10,  2.73it/s]

Epoch 1 #46450 -- loss: 0.2143794748187065, acc: 0.92375


 81%|████████  | 46500/57292 [4:43:25<1:05:55,  2.73it/s]

Epoch 1 #46500 -- loss: 0.18817824110388756, acc: 0.93375


 81%|████████▏ | 46550/57292 [4:43:43<1:05:32,  2.73it/s]

Epoch 1 #46550 -- loss: 0.1848781606554985, acc: 0.935


 81%|████████▏ | 46600/57292 [4:44:01<1:05:20,  2.73it/s]

Epoch 1 #46600 -- loss: 0.1957397847622633, acc: 0.92


 81%|████████▏ | 46650/57292 [4:44:20<1:04:56,  2.73it/s]

Epoch 1 #46650 -- loss: 0.19312045603990555, acc: 0.92


 82%|████████▏ | 46700/57292 [4:44:38<1:04:28,  2.74it/s]

Epoch 1 #46700 -- loss: 0.2080063644051552, acc: 0.92


 82%|████████▏ | 46750/57292 [4:44:56<1:04:13,  2.74it/s]

Epoch 1 #46750 -- loss: 0.21911095824092627, acc: 0.91375


 82%|████████▏ | 46800/57292 [4:45:14<1:04:02,  2.73it/s]

Epoch 1 #46800 -- loss: 0.16228936582803727, acc: 0.93875


 82%|████████▏ | 46850/57292 [4:45:33<1:03:42,  2.73it/s]

Epoch 1 #46850 -- loss: 0.23949980486184358, acc: 0.90375


 82%|████████▏ | 46900/57292 [4:45:51<1:03:26,  2.73it/s]

Epoch 1 #46900 -- loss: 0.17824926801025867, acc: 0.93


 82%|████████▏ | 46950/57292 [4:46:09<1:03:02,  2.73it/s]

Epoch 1 #46950 -- loss: 0.2193104714155197, acc: 0.9125


 82%|████████▏ | 47000/57292 [4:46:28<1:02:41,  2.74it/s]

Epoch 1 #47000 -- loss: 0.19909528911113739, acc: 0.9225


 82%|████████▏ | 47050/57292 [4:46:46<1:02:18,  2.74it/s]

Epoch 1 #47050 -- loss: 0.2166711327433586, acc: 0.9125


 82%|████████▏ | 47100/57292 [4:47:04<1:02:01,  2.74it/s]

Epoch 1 #47100 -- loss: 0.22721138272434474, acc: 0.91125


 82%|████████▏ | 47150/57292 [4:47:22<1:01:51,  2.73it/s]

Epoch 1 #47150 -- loss: 0.1604573005437851, acc: 0.93


 82%|████████▏ | 47200/57292 [4:47:41<1:01:35,  2.73it/s]

Epoch 1 #47200 -- loss: 0.18508143581449985, acc: 0.92625


 82%|████████▏ | 47250/57292 [4:47:59<1:01:16,  2.73it/s]

Epoch 1 #47250 -- loss: 0.19934541165828704, acc: 0.92875


 83%|████████▎ | 47300/57292 [4:48:17<1:01:01,  2.73it/s]

Epoch 1 #47300 -- loss: 0.22289735034108163, acc: 0.9125


 83%|████████▎ | 47350/57292 [4:48:36<1:00:46,  2.73it/s]

Epoch 1 #47350 -- loss: 0.18430303536355497, acc: 0.92125


 83%|████████▎ | 47400/57292 [4:48:54<1:00:14,  2.74it/s]

Epoch 1 #47400 -- loss: 0.1958104371279478, acc: 0.93


 83%|████████▎ | 47450/57292 [4:49:12<59:57,  2.74it/s]  

Epoch 1 #47450 -- loss: 0.19255232103168965, acc: 0.935


 83%|████████▎ | 47500/57292 [4:49:30<59:39,  2.74it/s]  

Epoch 1 #47500 -- loss: 0.2134855503588915, acc: 0.91875


 83%|████████▎ | 47550/57292 [4:49:49<59:19,  2.74it/s]

Epoch 1 #47550 -- loss: 0.21393391281366347, acc: 0.91625


 83%|████████▎ | 47600/57292 [4:50:07<59:01,  2.74it/s]

Epoch 1 #47600 -- loss: 0.15996308248490096, acc: 0.94


 83%|████████▎ | 47650/57292 [4:50:25<58:40,  2.74it/s]

Epoch 1 #47650 -- loss: 0.18544455155730247, acc: 0.92875


 83%|████████▎ | 47700/57292 [4:50:44<58:22,  2.74it/s]

Epoch 1 #47700 -- loss: 0.19077561013400554, acc: 0.93


 83%|████████▎ | 47750/57292 [4:51:02<58:04,  2.74it/s]

Epoch 1 #47750 -- loss: 0.20844088695943355, acc: 0.91


 83%|████████▎ | 47800/57292 [4:51:20<57:52,  2.73it/s]

Epoch 1 #47800 -- loss: 0.16941700786352157, acc: 0.93125


 84%|████████▎ | 47850/57292 [4:51:38<57:33,  2.73it/s]

Epoch 1 #47850 -- loss: 0.22235677655786276, acc: 0.91375


 84%|████████▎ | 47900/57292 [4:51:57<57:12,  2.74it/s]

Epoch 1 #47900 -- loss: 0.1914040429890156, acc: 0.915


 84%|████████▎ | 47950/57292 [4:52:15<56:54,  2.74it/s]

Epoch 1 #47950 -- loss: 0.17058213748037815, acc: 0.93625


 84%|████████▍ | 48000/57292 [4:52:33<56:31,  2.74it/s]

Epoch 1 #48000 -- loss: 0.15877425603568554, acc: 0.94


 84%|████████▍ | 48050/57292 [4:52:52<56:20,  2.73it/s]

Epoch 1 #48050 -- loss: 0.18898879148066045, acc: 0.93625


 84%|████████▍ | 48100/57292 [4:53:10<56:06,  2.73it/s]

Epoch 1 #48100 -- loss: 0.18787753015756606, acc: 0.93


 84%|████████▍ | 48150/57292 [4:53:28<55:37,  2.74it/s]

Epoch 1 #48150 -- loss: 0.18802260655909778, acc: 0.9225


 84%|████████▍ | 48200/57292 [4:53:46<55:24,  2.74it/s]

Epoch 1 #48200 -- loss: 0.20297612905502319, acc: 0.91625


 84%|████████▍ | 48250/57292 [4:54:05<55:02,  2.74it/s]

Epoch 1 #48250 -- loss: 0.18926479369401933, acc: 0.91875


 84%|████████▍ | 48300/57292 [4:54:23<54:50,  2.73it/s]

Epoch 1 #48300 -- loss: 0.21654243234544993, acc: 0.9125


 84%|████████▍ | 48350/57292 [4:54:41<54:30,  2.73it/s]

Epoch 1 #48350 -- loss: 0.1614375611394644, acc: 0.93125


 84%|████████▍ | 48400/57292 [4:55:00<54:08,  2.74it/s]

Epoch 1 #48400 -- loss: 0.20653082840144635, acc: 0.92


 85%|████████▍ | 48450/57292 [4:55:18<53:48,  2.74it/s]

Epoch 1 #48450 -- loss: 0.2152370683848858, acc: 0.9175


 85%|████████▍ | 48500/57292 [4:55:36<53:33,  2.74it/s]

Epoch 1 #48500 -- loss: 0.18652027599513532, acc: 0.925


 85%|████████▍ | 48550/57292 [4:55:54<53:15,  2.74it/s]

Epoch 1 #48550 -- loss: 0.17719294399023056, acc: 0.93


 85%|████████▍ | 48600/57292 [4:56:13<52:59,  2.73it/s]

Epoch 1 #48600 -- loss: 0.1726754292473197, acc: 0.9325


 85%|████████▍ | 48650/57292 [4:56:31<52:42,  2.73it/s]

Epoch 1 #48650 -- loss: 0.17470418445765973, acc: 0.93


 85%|████████▌ | 48700/57292 [4:56:49<52:19,  2.74it/s]

Epoch 1 #48700 -- loss: 0.1432436789944768, acc: 0.95125


 85%|████████▌ | 48750/57292 [4:57:07<52:03,  2.73it/s]

Epoch 1 #48750 -- loss: 0.1973897183686495, acc: 0.91625


 85%|████████▌ | 48800/57292 [4:57:26<51:47,  2.73it/s]

Epoch 1 #48800 -- loss: 0.18387439753860235, acc: 0.92125


 85%|████████▌ | 48850/57292 [4:57:44<51:29,  2.73it/s]

Epoch 1 #48850 -- loss: 0.19175621069967747, acc: 0.92125


 85%|████████▌ | 48900/57292 [4:58:02<51:03,  2.74it/s]

Epoch 1 #48900 -- loss: 0.20503754153847695, acc: 0.92125


 85%|████████▌ | 48950/57292 [4:58:21<50:57,  2.73it/s]

Epoch 1 #48950 -- loss: 0.18366487473249435, acc: 0.9225


 86%|████████▌ | 49000/57292 [4:58:39<50:39,  2.73it/s]

Epoch 1 #49000 -- loss: 0.17682183101773263, acc: 0.93


 86%|████████▌ | 49050/57292 [4:58:57<50:13,  2.73it/s]

Epoch 1 #49050 -- loss: 0.18758882857859135, acc: 0.93125


 86%|████████▌ | 49100/57292 [4:59:15<49:58,  2.73it/s]

Epoch 1 #49100 -- loss: 0.2172421269863844, acc: 0.9125


 86%|████████▌ | 49150/57292 [4:59:34<49:32,  2.74it/s]

Epoch 1 #49150 -- loss: 0.18328676745295525, acc: 0.93375


 86%|████████▌ | 49200/57292 [4:59:52<49:20,  2.73it/s]

Epoch 1 #49200 -- loss: 0.1778907237201929, acc: 0.92


 86%|████████▌ | 49250/57292 [5:00:10<49:05,  2.73it/s]

Epoch 1 #49250 -- loss: 0.19412967927753924, acc: 0.9325


 86%|████████▌ | 49300/57292 [5:00:29<48:44,  2.73it/s]

Epoch 1 #49300 -- loss: 0.16157101698219775, acc: 0.9325


 86%|████████▌ | 49350/57292 [5:00:47<48:27,  2.73it/s]

Epoch 1 #49350 -- loss: 0.18095915205776691, acc: 0.92875


 86%|████████▌ | 49400/57292 [5:01:05<48:09,  2.73it/s]

Epoch 1 #49400 -- loss: 0.19905145779252054, acc: 0.91375


 86%|████████▋ | 49450/57292 [5:01:24<47:48,  2.73it/s]

Epoch 1 #49450 -- loss: 0.14282106228172778, acc: 0.9425


 86%|████████▋ | 49500/57292 [5:01:42<47:27,  2.74it/s]

Epoch 1 #49500 -- loss: 0.19991707764565944, acc: 0.9175


 86%|████████▋ | 49550/57292 [5:02:00<47:09,  2.74it/s]

Epoch 1 #49550 -- loss: 0.19386669747531415, acc: 0.9225


 87%|████████▋ | 49600/57292 [5:02:18<46:49,  2.74it/s]

Epoch 1 #49600 -- loss: 0.1860455248132348, acc: 0.93875


 87%|████████▋ | 49650/57292 [5:02:37<46:38,  2.73it/s]

Epoch 1 #49650 -- loss: 0.2093557742983103, acc: 0.91125


 87%|████████▋ | 49700/57292 [5:02:55<46:14,  2.74it/s]

Epoch 1 #49700 -- loss: 0.2015854698419571, acc: 0.9125


 87%|████████▋ | 49750/57292 [5:03:13<45:57,  2.74it/s]

Epoch 1 #49750 -- loss: 0.1612217354401946, acc: 0.93875


 87%|████████▋ | 49800/57292 [5:03:32<45:48,  2.73it/s]

Epoch 1 #49800 -- loss: 0.16807653717696666, acc: 0.935


 87%|████████▋ | 49850/57292 [5:03:50<45:25,  2.73it/s]

Epoch 1 #49850 -- loss: 0.18351899810135364, acc: 0.93125


 87%|████████▋ | 49900/57292 [5:04:08<45:04,  2.73it/s]

Epoch 1 #49900 -- loss: 0.1960654792189598, acc: 0.92375


 87%|████████▋ | 49950/57292 [5:04:26<44:44,  2.73it/s]

Epoch 1 #49950 -- loss: 0.19906028110533952, acc: 0.92625


 87%|████████▋ | 50000/57292 [5:04:45<44:26,  2.74it/s]

Epoch 1 #50000 -- loss: 0.17527770321816205, acc: 0.93125


 87%|████████▋ | 50050/57292 [5:05:03<44:08,  2.73it/s]

Epoch 1 #50050 -- loss: 0.17695550177246333, acc: 0.92375


 87%|████████▋ | 50100/57292 [5:05:21<43:54,  2.73it/s]

Epoch 1 #50100 -- loss: 0.1939211294054985, acc: 0.92375


 88%|████████▊ | 50150/57292 [5:05:40<43:29,  2.74it/s]

Epoch 1 #50150 -- loss: 0.20927415862679483, acc: 0.91625


 88%|████████▊ | 50200/57292 [5:05:58<43:12,  2.74it/s]

Epoch 1 #50200 -- loss: 0.17025665126740933, acc: 0.92375


 88%|████████▊ | 50250/57292 [5:06:16<42:53,  2.74it/s]

Epoch 1 #50250 -- loss: 0.20171105094254016, acc: 0.91625


 88%|████████▊ | 50300/57292 [5:06:34<42:40,  2.73it/s]

Epoch 1 #50300 -- loss: 0.18262504693120718, acc: 0.9275


 88%|████████▊ | 50350/57292 [5:06:53<42:12,  2.74it/s]

Epoch 1 #50350 -- loss: 0.18279401630163192, acc: 0.9325


 88%|████████▊ | 50400/57292 [5:07:11<41:57,  2.74it/s]

Epoch 1 #50400 -- loss: 0.19100800424814224, acc: 0.92125


 88%|████████▊ | 50450/57292 [5:07:29<41:45,  2.73it/s]

Epoch 1 #50450 -- loss: 0.16709176167845727, acc: 0.93875


 88%|████████▊ | 50500/57292 [5:07:48<41:25,  2.73it/s]

Epoch 1 #50500 -- loss: 0.18500598780810834, acc: 0.93625


 88%|████████▊ | 50550/57292 [5:08:06<41:09,  2.73it/s]

Epoch 1 #50550 -- loss: 0.21624928906559945, acc: 0.9125


 88%|████████▊ | 50600/57292 [5:08:24<40:45,  2.74it/s]

Epoch 1 #50600 -- loss: 0.18291431967169047, acc: 0.93375


 88%|████████▊ | 50650/57292 [5:08:43<40:31,  2.73it/s]

Epoch 1 #50650 -- loss: 0.20158892408013343, acc: 0.91375


 88%|████████▊ | 50700/57292 [5:09:01<40:08,  2.74it/s]

Epoch 1 #50700 -- loss: 0.14646110266447068, acc: 0.9425


 89%|████████▊ | 50750/57292 [5:09:19<39:49,  2.74it/s]

Epoch 1 #50750 -- loss: 0.1925018621236086, acc: 0.92125


 89%|████████▊ | 50800/57292 [5:09:37<39:34,  2.73it/s]

Epoch 1 #50800 -- loss: 0.1719327440485358, acc: 0.93625


 89%|████████▉ | 50850/57292 [5:09:56<39:15,  2.73it/s]

Epoch 1 #50850 -- loss: 0.1542229764536023, acc: 0.94875


 89%|████████▉ | 50900/57292 [5:10:14<39:10,  2.72it/s]

Epoch 1 #50900 -- loss: 0.21900555901229382, acc: 0.91125


 89%|████████▉ | 50950/57292 [5:10:32<38:41,  2.73it/s]

Epoch 1 #50950 -- loss: 0.17561246633529662, acc: 0.93


 89%|████████▉ | 51000/57292 [5:10:51<38:23,  2.73it/s]

Epoch 1 #51000 -- loss: 0.19098199293017387, acc: 0.9175


 89%|████████▉ | 51050/57292 [5:11:09<37:56,  2.74it/s]

Epoch 1 #51050 -- loss: 0.18887363895773887, acc: 0.93


 89%|████████▉ | 51100/57292 [5:11:27<37:42,  2.74it/s]

Epoch 1 #51100 -- loss: 0.19524701423943042, acc: 0.92125


 89%|████████▉ | 51150/57292 [5:11:45<37:21,  2.74it/s]

Epoch 1 #51150 -- loss: 0.16341525044292213, acc: 0.9475


 89%|████████▉ | 51200/57292 [5:12:04<37:07,  2.74it/s]

Epoch 1 #51200 -- loss: 0.18171628918498756, acc: 0.9275


 89%|████████▉ | 51250/57292 [5:12:22<36:43,  2.74it/s]

Epoch 1 #51250 -- loss: 0.20480958350002765, acc: 0.91125


 90%|████████▉ | 51300/57292 [5:12:40<36:33,  2.73it/s]

Epoch 1 #51300 -- loss: 0.16310198333114387, acc: 0.94125


 90%|████████▉ | 51350/57292 [5:12:58<36:10,  2.74it/s]

Epoch 1 #51350 -- loss: 0.1940007670223713, acc: 0.92


 90%|████████▉ | 51400/57292 [5:13:17<35:58,  2.73it/s]

Epoch 1 #51400 -- loss: 0.19531028985977172, acc: 0.92375


 90%|████████▉ | 51450/57292 [5:13:35<35:38,  2.73it/s]

Epoch 1 #51450 -- loss: 0.14769638899713755, acc: 0.95


 90%|████████▉ | 51500/57292 [5:13:53<35:23,  2.73it/s]

Epoch 1 #51500 -- loss: 0.1727568505331874, acc: 0.945


 90%|████████▉ | 51550/57292 [5:14:12<35:01,  2.73it/s]

Epoch 1 #51550 -- loss: 0.19939244501292705, acc: 0.92125


 90%|█████████ | 51600/57292 [5:14:30<34:38,  2.74it/s]

Epoch 1 #51600 -- loss: 0.18579039577394724, acc: 0.93


 90%|█████████ | 51650/57292 [5:14:48<34:24,  2.73it/s]

Epoch 1 #51650 -- loss: 0.19882470533251762, acc: 0.9175


 90%|█████████ | 51700/57292 [5:15:06<34:03,  2.74it/s]

Epoch 1 #51700 -- loss: 0.13922037571668625, acc: 0.95375


 90%|█████████ | 51750/57292 [5:15:25<33:50,  2.73it/s]

Epoch 1 #51750 -- loss: 0.19241606574505568, acc: 0.9325


 90%|█████████ | 51800/57292 [5:15:43<33:31,  2.73it/s]

Epoch 1 #51800 -- loss: 0.19522709622979165, acc: 0.9225


 91%|█████████ | 51850/57292 [5:16:01<33:07,  2.74it/s]

Epoch 1 #51850 -- loss: 0.18371578965336086, acc: 0.925


 91%|█████████ | 51900/57292 [5:16:20<32:49,  2.74it/s]

Epoch 1 #51900 -- loss: 0.18564101722091436, acc: 0.9325


 91%|█████████ | 51950/57292 [5:16:38<32:34,  2.73it/s]

Epoch 1 #51950 -- loss: 0.17327870145440102, acc: 0.9325


 91%|█████████ | 52000/57292 [5:16:56<32:15,  2.73it/s]

Epoch 1 #52000 -- loss: 0.18165668226778509, acc: 0.935


 91%|█████████ | 52050/57292 [5:17:14<31:55,  2.74it/s]

Epoch 1 #52050 -- loss: 0.18065183341503144, acc: 0.93125


 91%|█████████ | 52100/57292 [5:17:33<31:40,  2.73it/s]

Epoch 1 #52100 -- loss: 0.18906875722110272, acc: 0.9225


 91%|█████████ | 52150/57292 [5:17:51<31:22,  2.73it/s]

Epoch 1 #52150 -- loss: 0.2189701134711504, acc: 0.91


 91%|█████████ | 52200/57292 [5:18:09<31:02,  2.73it/s]

Epoch 1 #52200 -- loss: 0.15427958708256484, acc: 0.93875


 91%|█████████ | 52250/57292 [5:18:28<30:42,  2.74it/s]

Epoch 1 #52250 -- loss: 0.21404112316668034, acc: 0.90125


 91%|█████████▏| 52300/57292 [5:18:46<30:23,  2.74it/s]

Epoch 1 #52300 -- loss: 0.19435551185160876, acc: 0.91625


 91%|█████████▏| 52350/57292 [5:19:04<30:05,  2.74it/s]

Epoch 1 #52350 -- loss: 0.17202216193079947, acc: 0.935


 91%|█████████▏| 52400/57292 [5:19:22<29:50,  2.73it/s]

Epoch 1 #52400 -- loss: 0.16206918478012086, acc: 0.945


 92%|█████████▏| 52450/57292 [5:19:41<29:28,  2.74it/s]

Epoch 1 #52450 -- loss: 0.1546388166025281, acc: 0.945


 92%|█████████▏| 52500/57292 [5:19:59<29:10,  2.74it/s]

Epoch 1 #52500 -- loss: 0.2031403923034668, acc: 0.93


 92%|█████████▏| 52550/57292 [5:20:17<28:54,  2.73it/s]

Epoch 1 #52550 -- loss: 0.1956342189759016, acc: 0.915


 92%|█████████▏| 52600/57292 [5:20:36<28:35,  2.73it/s]

Epoch 1 #52600 -- loss: 0.17622105799615384, acc: 0.9275


 92%|█████████▏| 52650/57292 [5:20:54<28:18,  2.73it/s]

Epoch 1 #52650 -- loss: 0.1833796177059412, acc: 0.92875


 92%|█████████▏| 52700/57292 [5:21:12<27:59,  2.73it/s]

Epoch 1 #52700 -- loss: 0.17709537781774998, acc: 0.93


 92%|█████████▏| 52750/57292 [5:21:31<27:41,  2.73it/s]

Epoch 1 #52750 -- loss: 0.1875907366722822, acc: 0.9325


 92%|█████████▏| 52800/57292 [5:21:49<27:21,  2.74it/s]

Epoch 1 #52800 -- loss: 0.20121912628412247, acc: 0.92125


 92%|█████████▏| 52850/57292 [5:22:07<27:02,  2.74it/s]

Epoch 1 #52850 -- loss: 0.19075631275773047, acc: 0.925


 92%|█████████▏| 52900/57292 [5:22:25<26:50,  2.73it/s]

Epoch 1 #52900 -- loss: 0.20306929502636195, acc: 0.91875


 92%|█████████▏| 52950/57292 [5:22:44<26:26,  2.74it/s]

Epoch 1 #52950 -- loss: 0.1878956439346075, acc: 0.935


 93%|█████████▎| 53000/57292 [5:23:02<26:10,  2.73it/s]

Epoch 1 #53000 -- loss: 0.1877460854128003, acc: 0.9175


 93%|█████████▎| 53050/57292 [5:23:20<25:50,  2.74it/s]

Epoch 1 #53050 -- loss: 0.2056482593715191, acc: 0.90875


 93%|█████████▎| 53100/57292 [5:23:38<25:34,  2.73it/s]

Epoch 1 #53100 -- loss: 0.17677607245743274, acc: 0.935


 93%|█████████▎| 53150/57292 [5:23:57<25:14,  2.74it/s]

Epoch 1 #53150 -- loss: 0.17507975183427335, acc: 0.93125


 93%|█████████▎| 53200/57292 [5:24:15<24:58,  2.73it/s]

Epoch 1 #53200 -- loss: 0.21037532839924097, acc: 0.91125


 93%|█████████▎| 53250/57292 [5:24:33<24:39,  2.73it/s]

Epoch 1 #53250 -- loss: 0.20382663488388061, acc: 0.91875


 93%|█████████▎| 53300/57292 [5:24:52<24:20,  2.73it/s]

Epoch 1 #53300 -- loss: 0.16809966064989568, acc: 0.935


 93%|█████████▎| 53350/57292 [5:25:10<24:00,  2.74it/s]

Epoch 1 #53350 -- loss: 0.16712881613522768, acc: 0.93


 93%|█████████▎| 53400/57292 [5:25:28<23:43,  2.73it/s]

Epoch 1 #53400 -- loss: 0.16601832162588834, acc: 0.93125


 93%|█████████▎| 53450/57292 [5:25:47<23:24,  2.73it/s]

Epoch 1 #53450 -- loss: 0.19917913287878036, acc: 0.92625


 93%|█████████▎| 53500/57292 [5:26:05<23:05,  2.74it/s]

Epoch 1 #53500 -- loss: 0.19976968135684728, acc: 0.915


 93%|█████████▎| 53550/57292 [5:26:23<22:49,  2.73it/s]

Epoch 1 #53550 -- loss: 0.18367417242377995, acc: 0.935


 94%|█████████▎| 53600/57292 [5:26:41<22:31,  2.73it/s]

Epoch 1 #53600 -- loss: 0.1649343717843294, acc: 0.9425


 94%|█████████▎| 53650/57292 [5:27:00<22:10,  2.74it/s]

Epoch 1 #53650 -- loss: 0.2063593700528145, acc: 0.92375


 94%|█████████▎| 53700/57292 [5:27:18<21:56,  2.73it/s]

Epoch 1 #53700 -- loss: 0.22794632632285355, acc: 0.905


 94%|█████████▍| 53750/57292 [5:27:36<21:35,  2.73it/s]

Epoch 1 #53750 -- loss: 0.20275715976953507, acc: 0.92875


 94%|█████████▍| 53800/57292 [5:27:55<21:14,  2.74it/s]

Epoch 1 #53800 -- loss: 0.19951325513422488, acc: 0.92125


 94%|█████████▍| 53850/57292 [5:28:13<20:56,  2.74it/s]

Epoch 1 #53850 -- loss: 0.17233856976032258, acc: 0.94125


 94%|█████████▍| 53900/57292 [5:28:31<20:41,  2.73it/s]

Epoch 1 #53900 -- loss: 0.16419184096157552, acc: 0.93875


 94%|█████████▍| 53950/57292 [5:28:49<20:22,  2.73it/s]

Epoch 1 #53950 -- loss: 0.18736467979848384, acc: 0.9275


 94%|█████████▍| 54000/57292 [5:29:08<20:04,  2.73it/s]

Epoch 1 #54000 -- loss: 0.1914371033757925, acc: 0.9325


 94%|█████████▍| 54050/57292 [5:29:26<19:44,  2.74it/s]

Epoch 1 #54050 -- loss: 0.16059890039265157, acc: 0.9375


 94%|█████████▍| 54100/57292 [5:29:44<19:24,  2.74it/s]

Epoch 1 #54100 -- loss: 0.18001441303640603, acc: 0.93125


 95%|█████████▍| 54150/57292 [5:30:03<19:09,  2.73it/s]

Epoch 1 #54150 -- loss: 0.1779993385076523, acc: 0.9275


 95%|█████████▍| 54200/57292 [5:30:21<18:50,  2.73it/s]

Epoch 1 #54200 -- loss: 0.19038956869393586, acc: 0.92125


 95%|█████████▍| 54250/57292 [5:30:39<18:31,  2.74it/s]

Epoch 1 #54250 -- loss: 0.18955905254930258, acc: 0.9175


 95%|█████████▍| 54300/57292 [5:30:57<18:12,  2.74it/s]

Epoch 1 #54300 -- loss: 0.19763240452855826, acc: 0.92125


 95%|█████████▍| 54350/57292 [5:31:16<17:56,  2.73it/s]

Epoch 1 #54350 -- loss: 0.1853001379221678, acc: 0.925


 95%|█████████▍| 54400/57292 [5:31:34<17:37,  2.74it/s]

Epoch 1 #54400 -- loss: 0.19238710332661868, acc: 0.92125


 95%|█████████▌| 54450/57292 [5:31:52<17:19,  2.73it/s]

Epoch 1 #54450 -- loss: 0.17318099983036517, acc: 0.93


 95%|█████████▌| 54500/57292 [5:32:11<17:03,  2.73it/s]

Epoch 1 #54500 -- loss: 0.20134912692010404, acc: 0.92125


 95%|█████████▌| 54550/57292 [5:32:29<16:44,  2.73it/s]

Epoch 1 #54550 -- loss: 0.20138255067169666, acc: 0.92125


 95%|█████████▌| 54600/57292 [5:32:47<16:25,  2.73it/s]

Epoch 1 #54600 -- loss: 0.19123331259936094, acc: 0.92375


 95%|█████████▌| 54650/57292 [5:33:05<16:07,  2.73it/s]

Epoch 1 #54650 -- loss: 0.17205522887408733, acc: 0.93375


 95%|█████████▌| 54700/57292 [5:33:24<15:49,  2.73it/s]

Epoch 1 #54700 -- loss: 0.1701092142611742, acc: 0.93625


 96%|█████████▌| 54750/57292 [5:33:42<15:30,  2.73it/s]

Epoch 1 #54750 -- loss: 0.19015413522720337, acc: 0.925


 96%|█████████▌| 54800/57292 [5:34:00<15:11,  2.73it/s]

Epoch 1 #54800 -- loss: 0.168585807941854, acc: 0.92875


 96%|█████████▌| 54850/57292 [5:34:19<14:53,  2.73it/s]

Epoch 1 #54850 -- loss: 0.17314442932605745, acc: 0.9375


 96%|█████████▌| 54900/57292 [5:34:37<14:34,  2.74it/s]

Epoch 1 #54900 -- loss: 0.18370024617761374, acc: 0.93375


 96%|█████████▌| 54950/57292 [5:34:55<14:14,  2.74it/s]

Epoch 1 #54950 -- loss: 0.21385529801249503, acc: 0.91375


 96%|█████████▌| 55000/57292 [5:35:13<13:56,  2.74it/s]

Epoch 1 #55000 -- loss: 0.21004266161471605, acc: 0.91375


 96%|█████████▌| 55050/57292 [5:35:32<13:39,  2.74it/s]

Epoch 1 #55050 -- loss: 0.17504001043736936, acc: 0.93875


 96%|█████████▌| 55100/57292 [5:35:50<13:21,  2.74it/s]

Epoch 1 #55100 -- loss: 0.20059173297137023, acc: 0.93


 96%|█████████▋| 55150/57292 [5:36:08<13:03,  2.73it/s]

Epoch 1 #55150 -- loss: 0.19170892782509327, acc: 0.92125


 96%|█████████▋| 55200/57292 [5:36:27<12:43,  2.74it/s]

Epoch 1 #55200 -- loss: 0.172342917509377, acc: 0.93125


 96%|█████████▋| 55250/57292 [5:36:45<12:25,  2.74it/s]

Epoch 1 #55250 -- loss: 0.19396650679409505, acc: 0.92125


 97%|█████████▋| 55300/57292 [5:37:03<12:08,  2.73it/s]

Epoch 1 #55300 -- loss: 0.1825324784964323, acc: 0.92375


 97%|█████████▋| 55350/57292 [5:37:21<11:50,  2.74it/s]

Epoch 1 #55350 -- loss: 0.22837552830576896, acc: 0.91125


 97%|█████████▋| 55400/57292 [5:37:40<11:30,  2.74it/s]

Epoch 1 #55400 -- loss: 0.2085829069465399, acc: 0.92625


 97%|█████████▋| 55450/57292 [5:37:58<11:13,  2.74it/s]

Epoch 1 #55450 -- loss: 0.21810526866465807, acc: 0.93125


 97%|█████████▋| 55500/57292 [5:38:16<10:54,  2.74it/s]

Epoch 1 #55500 -- loss: 0.173740400262177, acc: 0.9225


 97%|█████████▋| 55550/57292 [5:38:35<10:37,  2.73it/s]

Epoch 1 #55550 -- loss: 0.16850074894726277, acc: 0.9425


 97%|█████████▋| 55600/57292 [5:38:53<10:18,  2.74it/s]

Epoch 1 #55600 -- loss: 0.20438879307359456, acc: 0.915


 97%|█████████▋| 55650/57292 [5:39:11<09:59,  2.74it/s]

Epoch 1 #55650 -- loss: 0.1527794472500682, acc: 0.94


 97%|█████████▋| 55700/57292 [5:39:29<09:41,  2.74it/s]

Epoch 1 #55700 -- loss: 0.19143277075141668, acc: 0.92


 97%|█████████▋| 55750/57292 [5:39:48<09:22,  2.74it/s]

Epoch 1 #55750 -- loss: 0.19763396359980107, acc: 0.92625


 97%|█████████▋| 55800/57292 [5:40:06<09:05,  2.73it/s]

Epoch 1 #55800 -- loss: 0.17509099643677473, acc: 0.935


 97%|█████████▋| 55850/57292 [5:40:24<08:47,  2.73it/s]

Epoch 1 #55850 -- loss: 0.18793164394795894, acc: 0.93125


 98%|█████████▊| 55900/57292 [5:40:42<08:30,  2.73it/s]

Epoch 1 #55900 -- loss: 0.16960286900401114, acc: 0.94125


 98%|█████████▊| 55950/57292 [5:41:01<08:10,  2.73it/s]

Epoch 1 #55950 -- loss: 0.19168318063020706, acc: 0.93


 98%|█████████▊| 56000/57292 [5:41:19<07:51,  2.74it/s]

Epoch 1 #56000 -- loss: 0.16256262604147195, acc: 0.92875


 98%|█████████▊| 56050/57292 [5:41:37<07:33,  2.74it/s]

Epoch 1 #56050 -- loss: 0.1680361257120967, acc: 0.93


 98%|█████████▊| 56100/57292 [5:41:56<07:15,  2.74it/s]

Epoch 1 #56100 -- loss: 0.19268042054027318, acc: 0.92875


 98%|█████████▊| 56150/57292 [5:42:14<06:57,  2.74it/s]

Epoch 1 #56150 -- loss: 0.1776293385028839, acc: 0.93375


 98%|█████████▊| 56200/57292 [5:42:32<06:39,  2.73it/s]

Epoch 1 #56200 -- loss: 0.206857006624341, acc: 0.91625


 98%|█████████▊| 56250/57292 [5:42:50<06:20,  2.74it/s]

Epoch 1 #56250 -- loss: 0.16397019658237696, acc: 0.93375


 98%|█████████▊| 56300/57292 [5:43:09<06:02,  2.73it/s]

Epoch 1 #56300 -- loss: 0.20881321787834167, acc: 0.915


 98%|█████████▊| 56350/57292 [5:43:27<05:45,  2.73it/s]

Epoch 1 #56350 -- loss: 0.1958602847531438, acc: 0.9175


 98%|█████████▊| 56400/57292 [5:43:45<05:26,  2.73it/s]

Epoch 1 #56400 -- loss: 0.25283928096294406, acc: 0.91


 99%|█████████▊| 56450/57292 [5:44:04<05:08,  2.73it/s]

Epoch 1 #56450 -- loss: 0.16224692918360234, acc: 0.9325


 99%|█████████▊| 56500/57292 [5:44:22<04:50,  2.73it/s]

Epoch 1 #56500 -- loss: 0.20231818594038486, acc: 0.925


 99%|█████████▊| 56550/57292 [5:44:40<04:31,  2.73it/s]

Epoch 1 #56550 -- loss: 0.16085880890488624, acc: 0.93


 99%|█████████▉| 56600/57292 [5:44:58<04:13,  2.73it/s]

Epoch 1 #56600 -- loss: 0.17500212036073207, acc: 0.93


 99%|█████████▉| 56650/57292 [5:45:17<03:55,  2.73it/s]

Epoch 1 #56650 -- loss: 0.16837091848254204, acc: 0.92125


 99%|█████████▉| 56700/57292 [5:45:35<03:36,  2.73it/s]

Epoch 1 #56700 -- loss: 0.19298644162714482, acc: 0.9175


 99%|█████████▉| 56750/57292 [5:45:53<03:18,  2.73it/s]

Epoch 1 #56750 -- loss: 0.19451874904334546, acc: 0.91375


 99%|█████████▉| 56800/57292 [5:46:12<03:00,  2.73it/s]

Epoch 1 #56800 -- loss: 0.17715422093868255, acc: 0.93125


 99%|█████████▉| 56850/57292 [5:46:30<02:41,  2.73it/s]

Epoch 1 #56850 -- loss: 0.18625618048012257, acc: 0.93625


 99%|█████████▉| 56900/57292 [5:46:48<02:23,  2.73it/s]

Epoch 1 #56900 -- loss: 0.191310241445899, acc: 0.92625


 99%|█████████▉| 56950/57292 [5:47:07<02:05,  2.74it/s]

Epoch 1 #56950 -- loss: 0.19259231530129908, acc: 0.92125


 99%|█████████▉| 57000/57292 [5:47:25<01:46,  2.73it/s]

Epoch 1 #57000 -- loss: 0.17150121808052063, acc: 0.93


100%|█████████▉| 57050/57292 [5:47:43<01:28,  2.73it/s]

Epoch 1 #57050 -- loss: 0.2086722433194518, acc: 0.92


100%|█████████▉| 57100/57292 [5:48:01<01:10,  2.73it/s]

Epoch 1 #57100 -- loss: 0.20429555222392082, acc: 0.91875


100%|█████████▉| 57150/57292 [5:48:20<00:51,  2.74it/s]

Epoch 1 #57150 -- loss: 0.20538081277161838, acc: 0.91125


100%|█████████▉| 57200/57292 [5:48:38<00:33,  2.73it/s]

Epoch 1 #57200 -- loss: 0.18198486134409905, acc: 0.93


100%|█████████▉| 57250/57292 [5:48:56<00:15,  2.73it/s]

Epoch 1 #57250 -- loss: 0.18691764999181032, acc: 0.92125


100%|██████████| 57292/57292 [5:49:12<00:00,  2.73it/s]



Epoch 1 loss: 0.1891539253410111, acc: 0.9255787139883087


In [None]:
f = 1
e = 1
best_val_loss, best_val_acc, = np.inf, 0

# Validation
load_path = f'./models/{args.model_name}/{f}-fold/train.pt'
model.load_state_dict(torch.load(load_path,map_location=device))
model.to(device)
model.eval()
valid_perform = np.zeros(2)

all_valid_predict_lst = []
all_valid_labels_lst = []


with torch.no_grad():
    for v in validloader:
      input_ids, attention_mask, valid_labels = v["input_ids"].to(device), v["attention_mask"].to(device), v["labels"].to(device)
      
      valid_outputs = model(input_ids, attention_mask)
      valid_output = valid_outputs.logits
      valid_loss = criterion(valid_output, valid_labels)
      
      valid_predict = valid_output.argmax(dim=-1)
      valid_predict = valid_predict.detach().cpu().numpy()
      valid_labels = valid_labels.detach().cpu().numpy()

      valid_acc = accuracy_score(valid_labels, valid_predict)
      valid_perform += np.array([valid_loss.item(), valid_acc])

      all_valid_predict_lst += list(valid_predict)
      all_valid_labels_lst += list(valid_labels)

# Model 저장

val_total_loss = valid_perform[0] / valid_batch_
val_total_acc = valid_perform[1] / valid_batch_
best_val_loss = min(best_val_loss, val_total_loss)

if val_total_acc > best_val_acc:
    print(f"New best model for val accuracy : {val_total_acc}! saving the best model..")
    torch.save(model.state_dict(), f"./models/{args.model_name}/{f}-fold/best.pt")

    best_val_acc = val_total_acc

print()
print(
    f">>>> Validation loss: {val_total_loss}, Acc: {val_total_acc}"
    )
print()

best_val_acc_list.append(best_val_acc)
print('='*50)
print(f"{f}fold best_val_acc_list : {best_val_acc_list}")
print('='*15, f'{f}fold Final Score(ACC) : {np.mean(best_val_acc_list)}', '='*15)

New best model for val accuracy : 0.93472! saving the best model..

>>>> Validation loss: 0.16979252351015806, Acc: 0.93472

1fold best_val_acc_list : [0.93472]
