In [8]:
"""
model trainer
"""
from torch.autograd import Variable
from data import SpeechDataset
from torch.utils.data import DataLoader
import torch
from time import time
from torch.nn import Softmax
import numpy as np
import pandas as pd
import os
from random import choice
from resnet import ResModel
from tqdm import tqdm


def create_directory(dir):
    if not os.path.exists(dir):
        os.makedirs(dir)

def get_time(now, start):
    time_in_min = int((now - start) / 60)
    return time_in_min

# 학습을 위한 기본 설정값을 지정한다
BATCH_SIZE = 32  # 데이터 묶음에 해당하는 batch_size는 GPU 메모리에 알맞게 지정한다
mGPU = True  # multi-GPU를 사용할 경우에는 True로 지정한다
epochs = 20  # 모델이 훈련 데이터를 학습하는 횟수를 지정한다
mode = 'cv' # 교차 검증 모드(cv) or 테스트 모드(test)
model_name = 'model/model_resnet_aug.pth'  # 모델 결과물을 저장할 때 모델 이름을 지정한다

# ResNet 모델을 활성화한다
loss_fn = torch.nn.CrossEntropyLoss()
model = ResModel
speechmodel = torch.nn.DataParallel(model()) if mGPU else model()
speechmodel = speechmodel.cuda()

# SpeechDataset을 활성화한다
labels = ['yes', 'no', 'up', 'down', 'left', 'right', 'on', 'off', 'stop', 'go']
label_to_int = dict(zip(labels, range(len(labels))))
int_to_label = dict(zip(range(len(labels)), labels))
int_to_label.update({len(labels): 'unknown', len(labels) + 1: 'silence'})

# 모드에 따라 학습 및 검증에 사용할 파일을 선택한다
trn = 'input/trn.txt' if mode == 'cv' else 'input/trn_all.txt'
tst = 'input/val.txt' if mode == 'cv' else 'input/tst.txt'


trn = [line.strip() for line in open(trn, 'r').readlines()]
wav_list = [line.split(',')[-1] for line in trn]
label_list = [line.split(',')[0] for line in trn]

# 학습용 SpeechDataset을 불러온다
traindataset = SpeechDataset(mode='train', label_to_int=label_to_int, wav_list=wav_list, label_list=label_list)

start_time = time()
aug_train_acc_list=[]
aug_val_acc_list = []
for e in range(epochs):
    print("training epoch ", e)
    # learning_rate를 epoch마다 다르게 지정한다
    learning_rate = 0.01 if e < 10 else 0.001
    optimizer = torch.optim.SGD(filter(lambda p: p.requires_grad, speechmodel.parameters()), lr=learning_rate, momentum=0.9, weight_decay=0.00001)
    # 모델을 학습하기 위하여 .train() 함수를 실행한다
    speechmodel.train()
    train_loss=[]

    total_correct = 0
    num_labels = 0
    trainloader = DataLoader(traindataset, BATCH_SIZE, shuffle=True)
    
    # 학습을 수행한다
    for batch_idx, batch_data in enumerate(tqdm(trainloader)):
        # batch_size 만큼의 음성 데이터(spec)와 정답값(label)을 받아온다
        spec = batch_data['spec']
        label = batch_data['label']
        spec, label = Variable(spec.cuda()), Variable(label.cuda())
        # 현재 모델의 예측값(y_pred)을 계산한다
        y_pred = speechmodel(spec)
        _, pred_labels = torch.max(y_pred.data, 1)
        correct = (pred_labels == label.data).sum()
        # 정답과 예측값간의 차이(loss)를 계산한다 
        loss = loss_fn(y_pred, label)

        total_correct += correct
        num_labels += len(label)
    
        optimizer.zero_grad()
        # loss를 기반으로 back-propagation을 수행한다
        loss.backward()
        # 모델 파라미터를 업데이트한다. (실질적 학습)
        optimizer.step()
        
    train_acc=100. * total_correct / num_labels
        
    aug_train_acc_list.append(train_acc.item())
        
    # 훈련 데이터에서의 정확률을 기록한다
    print("training accuracy:", train_acc,"train_loss",loss, get_time(time(), start_time))

    # 교차 검증 모드의 경우, 검증 데이터에 대한 정확률을 기록한다
    if mode == 'cv':
        # 현재 학습 중인 모델을 임시로 저장한다
        torch.save(speechmodel.state_dict(), '{}_cv'.format(model_name))
        
        # 검증 데이터를 불러온다
        softmax = Softmax(dim=1)
        tst_list = [line.strip() for line in open(tst, 'r').readlines()]
        wav_list = [line.split(',')[-1] for line in tst_list]
        label_list = [line.split(',')[0] for line in tst_list]
        cvdataset = SpeechDataset(mode='test', label_to_int=label_to_int, wav_list=wav_list)
        cvloader = DataLoader(cvdataset, BATCH_SIZE, shuffle=False)

        # 모델을 불러와 .eval() 함수로 검증 준비를 한다
        speechmodel = torch.nn.DataParallel(model()) if mGPU else model()
        speechmodel.load_state_dict(torch.load('{}_cv'.format(model_name)))
        speechmodel = speechmodel.cuda()
        speechmodel.eval()

        # 검증 데이터를 batch_size만큼씩 받아오며 예측값을 저장한다
        fnames, preds = [], []
        for batch_idx, batch_data in enumerate(tqdm(cvloader)):
            spec = Variable(batch_data['spec'].cuda())
            fname = batch_data['id']
            y_pred = softmax(speechmodel(spec))
            preds.append(y_pred.data.cpu().numpy())
            fnames += fname

        preds = np.vstack(preds)
        preds = [int_to_label[x] for x in np.argmax(preds, 1)]
        fnames = [fname.split('/')[-2] for fname in fnames]
        num_correct = 0 
        
        for true, pred in zip(fnames, preds):
            if true == pred:
                num_correct += 1
        val_acc = 100. * num_correct / len(preds)
        aug_val_acc_list.append(val_acc)

        # 검증 데이터의 정확률을 기록한다
        print("cv accuracy:", val_acc, get_time(time(), start_time))

# 학습이 완료된 모델을 저장한다
create_directory("model")
torch.save(speechmodel.state_dict(), model_name)

create_directory("val")
pd.DataFrame({'fname': fnames, 'pred_label': preds}).to_csv("val/{}.csv".format(model_name.split('/')[-1]), index=False)


# 테스트 데이터에 대한 예측값을 파일에 저장한다
print("doing prediction...")
softmax = Softmax(dim=1)

# 테스트 데이터를 불러온다
tst = [line.strip() for line in open(tst, 'r').readlines()]
wav_list = [line.split(',')[-1] for line in tst]
testdataset = SpeechDataset(mode='test', label_to_int=label_to_int, wav_list=wav_list)
testloader = DataLoader(testdataset, BATCH_SIZE, shuffle=False)

# 모델을 불러온다
speechmodel = torch.nn.DataParallel(model()) if mGPU else model()
speechmodel.load_state_dict(torch.load(model_name))
speechmodel = speechmodel.cuda()
speechmodel.eval()
    
test_fnames, test_labels = [], []
pred_scores = []



# 테스트 데이터에 대한 예측값을 계산한다
for batch_idx, batch_data in enumerate(tqdm(testloader)):
    spec = Variable(batch_data['spec'].cuda())
    fname = batch_data['id']
    y_pred = softmax(speechmodel(spec))
    pred_scores.append(y_pred.data.cpu().numpy())
    test_fnames += fname

# 가장 높은 확률값을 가진 예측값을 label 형태로 저장한다
final_pred = np.vstack(pred_scores)
final_labels = [int_to_label[x] for x in np.argmax(final_pred, 1)]
test_fnames = [x.split("/")[-1] for x in test_fnames]

# 테스트 파일 명과 예측값을 sub 폴더 아래 저장한다. 캐글에 직접 업로드 할 수 있는 파일 포맷이다.
create_directory("sub")
pd.DataFrame({'fname': test_fnames, 'label': final_labels}).to_csv("sub/{}.csv".format(model_name.split('/')[-1]), index=False)


  0%|          | 0/985 [00:00<?, ?it/s][A
  0%|          | 1/985 [00:00<02:24,  6.79it/s][A

training epoch  0



  0%|          | 3/985 [00:00<01:39,  9.90it/s][A
  1%|          | 5/985 [00:00<01:25, 11.50it/s][A
  1%|          | 7/985 [00:00<01:19, 12.24it/s][A
  1%|          | 9/985 [00:00<01:16, 12.80it/s][A
  1%|          | 11/985 [00:00<01:13, 13.24it/s][A
  1%|▏         | 13/985 [00:00<01:11, 13.59it/s][A
  2%|▏         | 15/985 [00:01<01:10, 13.80it/s][A
  2%|▏         | 17/985 [00:01<01:10, 13.82it/s][A
  2%|▏         | 19/985 [00:01<01:09, 13.99it/s][A
  2%|▏         | 21/985 [00:01<01:07, 14.19it/s][A
  2%|▏         | 23/985 [00:01<01:07, 14.26it/s][A
  3%|▎         | 25/985 [00:01<01:06, 14.38it/s][A
  3%|▎         | 27/985 [00:01<01:06, 14.39it/s][A
  3%|▎         | 29/985 [00:02<01:06, 14.37it/s][A
  3%|▎         | 31/985 [00:02<01:06, 14.36it/s][A
  3%|▎         | 33/985 [00:02<01:06, 14.35it/s][A
  4%|▎         | 35/985 [00:02<01:06, 14.33it/s][A
  4%|▍         | 37/985 [00:02<01:05, 14.41it/s][A
  4%|▍         | 39/985 [00:02<01:05, 14.43it/s][A
  4%|▍         

 63%|██████▎   | 621/985 [00:42<00:24, 14.74it/s][A
 63%|██████▎   | 623/985 [00:42<00:24, 14.75it/s][A
 63%|██████▎   | 625/985 [00:42<00:24, 14.75it/s][A
 64%|██████▎   | 627/985 [00:42<00:24, 14.75it/s][A
 64%|██████▍   | 629/985 [00:42<00:24, 14.76it/s][A
 64%|██████▍   | 631/985 [00:42<00:23, 14.76it/s][A
 64%|██████▍   | 633/985 [00:42<00:23, 14.75it/s][A
 64%|██████▍   | 635/985 [00:43<00:23, 14.75it/s][A
 65%|██████▍   | 637/985 [00:43<00:23, 14.75it/s][A
 65%|██████▍   | 639/985 [00:43<00:23, 14.74it/s][A
 65%|██████▌   | 641/985 [00:43<00:23, 14.74it/s][A
 65%|██████▌   | 643/985 [00:43<00:23, 14.73it/s][A
 65%|██████▌   | 645/985 [00:43<00:23, 14.72it/s][A
 66%|██████▌   | 647/985 [00:43<00:22, 14.72it/s][A
 66%|██████▌   | 649/985 [00:44<00:22, 14.72it/s][A
 66%|██████▌   | 651/985 [00:44<00:22, 14.73it/s][A
 66%|██████▋   | 653/985 [00:44<00:22, 14.73it/s][A
 66%|██████▋   | 655/985 [00:44<00:22, 14.72it/s][A
 67%|██████▋   | 657/985 [00:44<00:22, 14.72it

training accuracy: tensor(35.4208, device='cuda:0') train_loss tensor(2.7169, device='cuda:0', grad_fn=<NllLossBackward>) 1



  2%|▏         | 2/104 [00:00<00:07, 13.77it/s][A
  4%|▍         | 4/104 [00:00<00:06, 15.80it/s][A
  6%|▌         | 6/104 [00:00<00:05, 16.63it/s][A
  8%|▊         | 8/104 [00:00<00:05, 17.16it/s][A
 10%|▉         | 10/104 [00:00<00:05, 17.39it/s][A
 12%|█▏        | 12/104 [00:00<00:05, 17.29it/s][A
 13%|█▎        | 14/104 [00:00<00:05, 17.50it/s][A
 15%|█▌        | 16/104 [00:00<00:05, 17.39it/s][A
 17%|█▋        | 18/104 [00:01<00:04, 17.53it/s][A
 19%|█▉        | 20/104 [00:01<00:04, 17.40it/s][A
 21%|██        | 22/104 [00:01<00:04, 17.52it/s][A
 23%|██▎       | 24/104 [00:01<00:04, 17.59it/s][A
 25%|██▌       | 26/104 [00:01<00:04, 17.67it/s][A
 27%|██▋       | 28/104 [00:01<00:04, 17.53it/s][A
 29%|██▉       | 30/104 [00:01<00:04, 17.21it/s][A
 31%|███       | 32/104 [00:01<00:04, 17.17it/s][A
 33%|███▎      | 34/104 [00:02<00:04, 16.96it/s][A
 35%|███▍      | 36/104 [00:02<00:04, 16.93it/s][A
 37%|███▋      | 38/104 [00:02<00:03, 17.00it/s][A
 38%|███▊      

cv accuracy: 13.41978287092883 1
training epoch  1



  0%|          | 4/985 [00:00<01:01, 16.00it/s][A
  1%|          | 6/985 [00:00<01:02, 15.79it/s][A
  1%|          | 8/985 [00:00<01:01, 15.99it/s][A
  1%|          | 10/985 [00:00<01:04, 15.16it/s][A
  1%|          | 12/985 [00:00<01:03, 15.35it/s][A
  1%|▏         | 14/985 [00:00<01:06, 14.57it/s][A
  2%|▏         | 16/985 [00:01<01:05, 14.80it/s][A
  2%|▏         | 18/985 [00:01<01:05, 14.75it/s][A
  2%|▏         | 20/985 [00:01<01:04, 14.94it/s][A
  2%|▏         | 22/985 [00:01<01:04, 14.82it/s][A
  2%|▏         | 24/985 [00:01<01:06, 14.53it/s][A
  3%|▎         | 26/985 [00:01<01:05, 14.64it/s][A
  3%|▎         | 28/985 [00:01<01:04, 14.79it/s][A
  3%|▎         | 30/985 [00:02<01:04, 14.70it/s][A
  3%|▎         | 32/985 [00:02<01:05, 14.54it/s][A
  3%|▎         | 34/985 [00:02<01:05, 14.44it/s][A
  4%|▎         | 36/985 [00:02<01:05, 14.55it/s][A
  4%|▍         | 38/985 [00:02<01:05, 14.49it/s][A
  4%|▍         | 40/985 [00:02<01:05, 14.41it/s][A
  4%|▍        

 63%|██████▎   | 622/985 [00:41<00:24, 14.99it/s][A
 63%|██████▎   | 624/985 [00:41<00:24, 14.99it/s][A
 64%|██████▎   | 626/985 [00:41<00:23, 14.99it/s][A
 64%|██████▍   | 628/985 [00:41<00:23, 14.99it/s][A
 64%|██████▍   | 630/985 [00:42<00:23, 14.98it/s][A
 64%|██████▍   | 632/985 [00:42<00:23, 14.98it/s][A
 64%|██████▍   | 634/985 [00:42<00:23, 14.97it/s][A
 65%|██████▍   | 636/985 [00:42<00:23, 14.96it/s][A
 65%|██████▍   | 638/985 [00:42<00:23, 14.95it/s][A
 65%|██████▍   | 640/985 [00:42<00:23, 14.96it/s][A
 65%|██████▌   | 642/985 [00:42<00:22, 14.96it/s][A
 65%|██████▌   | 644/985 [00:43<00:22, 14.96it/s][A
 66%|██████▌   | 646/985 [00:43<00:22, 14.97it/s][A
 66%|██████▌   | 648/985 [00:43<00:22, 14.97it/s][A
 66%|██████▌   | 650/985 [00:43<00:22, 14.97it/s][A
 66%|██████▌   | 652/985 [00:43<00:22, 14.97it/s][A
 66%|██████▋   | 654/985 [00:43<00:22, 14.96it/s][A
 67%|██████▋   | 656/985 [00:43<00:21, 14.97it/s][A
 67%|██████▋   | 658/985 [00:43<00:21, 14.97it

training accuracy: tensor(59.9174, device='cuda:0') train_loss tensor(4.1231, device='cuda:0', grad_fn=<NllLossBackward>) 2



  4%|▍         | 4/104 [00:00<00:05, 17.15it/s][A
  6%|▌         | 6/104 [00:00<00:05, 17.00it/s][A
  8%|▊         | 8/104 [00:00<00:05, 16.69it/s][A
 10%|▉         | 10/104 [00:00<00:05, 16.13it/s][A
 12%|█▏        | 12/104 [00:00<00:05, 16.24it/s][A
 13%|█▎        | 14/104 [00:00<00:05, 15.97it/s][A
 15%|█▌        | 16/104 [00:01<00:05, 15.75it/s][A
 17%|█▋        | 18/104 [00:01<00:05, 15.86it/s][A
 19%|█▉        | 20/104 [00:01<00:05, 15.69it/s][A
 21%|██        | 22/104 [00:01<00:05, 15.74it/s][A
 23%|██▎       | 24/104 [00:01<00:05, 15.58it/s][A
 25%|██▌       | 26/104 [00:01<00:05, 15.48it/s][A
 27%|██▋       | 28/104 [00:01<00:04, 15.56it/s][A
 29%|██▉       | 30/104 [00:01<00:04, 15.59it/s][A
 31%|███       | 32/104 [00:02<00:04, 15.51it/s][A
 33%|███▎      | 34/104 [00:02<00:04, 15.21it/s][A
 35%|███▍      | 36/104 [00:02<00:04, 15.25it/s][A
 37%|███▋      | 38/104 [00:02<00:04, 15.29it/s][A
 38%|███▊      | 40/104 [00:02<00:04, 15.28it/s][A
 40%|████     

cv accuracy: 33.50422195416164 2
training epoch  2



  0%|          | 4/985 [00:00<01:01, 16.02it/s][A
  1%|          | 6/985 [00:00<01:02, 15.65it/s][A
  1%|          | 8/985 [00:00<01:02, 15.56it/s][A
  1%|          | 10/985 [00:00<01:03, 15.24it/s][A
  1%|          | 12/985 [00:00<01:03, 15.28it/s][A
  1%|▏         | 14/985 [00:00<01:04, 15.02it/s][A
  2%|▏         | 16/985 [00:01<01:05, 14.84it/s][A
  2%|▏         | 18/985 [00:01<01:06, 14.61it/s][A
  2%|▏         | 20/985 [00:01<01:05, 14.69it/s][A
  2%|▏         | 22/985 [00:01<01:05, 14.65it/s][A
  2%|▏         | 24/985 [00:01<01:05, 14.61it/s][A
  3%|▎         | 26/985 [00:01<01:05, 14.57it/s][A
  3%|▎         | 28/985 [00:01<01:05, 14.59it/s][A
  3%|▎         | 30/985 [00:02<01:05, 14.51it/s][A
  3%|▎         | 32/985 [00:02<01:05, 14.48it/s][A
  3%|▎         | 34/985 [00:02<01:06, 14.34it/s][A
  4%|▎         | 36/985 [00:02<01:06, 14.36it/s][A
  4%|▍         | 38/985 [00:02<01:07, 14.12it/s][A
  4%|▍         | 40/985 [00:02<01:06, 14.16it/s][A
  4%|▍        

 63%|██████▎   | 622/985 [00:43<00:25, 14.17it/s][A
 63%|██████▎   | 624/985 [00:44<00:25, 14.18it/s][A
 64%|██████▎   | 626/985 [00:44<00:25, 14.18it/s][A
 64%|██████▍   | 628/985 [00:44<00:25, 14.18it/s][A
 64%|██████▍   | 630/985 [00:44<00:25, 14.19it/s][A
 64%|██████▍   | 632/985 [00:44<00:24, 14.19it/s][A
 64%|██████▍   | 634/985 [00:44<00:24, 14.20it/s][A
 65%|██████▍   | 636/985 [00:44<00:24, 14.20it/s][A
 65%|██████▍   | 638/985 [00:44<00:24, 14.20it/s][A
 65%|██████▍   | 640/985 [00:45<00:24, 14.19it/s][A
 65%|██████▌   | 642/985 [00:45<00:24, 14.18it/s][A
 65%|██████▌   | 644/985 [00:45<00:24, 14.18it/s][A
 66%|██████▌   | 646/985 [00:45<00:23, 14.18it/s][A
 66%|██████▌   | 648/985 [00:45<00:23, 14.18it/s][A
 66%|██████▌   | 650/985 [00:45<00:23, 14.18it/s][A
 66%|██████▌   | 652/985 [00:45<00:23, 14.18it/s][A
 66%|██████▋   | 654/985 [00:46<00:23, 14.18it/s][A
 67%|██████▋   | 656/985 [00:46<00:23, 14.18it/s][A
 67%|██████▋   | 658/985 [00:46<00:23, 14.18it

training accuracy: tensor(77.2912, device='cuda:0') train_loss tensor(3.7670, device='cuda:0', grad_fn=<NllLossBackward>) 3



  2%|▏         | 2/104 [00:00<00:06, 16.46it/s][A
  4%|▍         | 4/104 [00:00<00:06, 16.48it/s][A
  6%|▌         | 6/104 [00:00<00:05, 16.35it/s][A
  8%|▊         | 8/104 [00:00<00:05, 16.83it/s][A
 10%|▉         | 10/104 [00:00<00:05, 16.15it/s][A
 12%|█▏        | 12/104 [00:00<00:05, 15.79it/s][A
 13%|█▎        | 14/104 [00:00<00:05, 15.86it/s][A
 15%|█▌        | 16/104 [00:01<00:05, 15.54it/s][A
 17%|█▋        | 18/104 [00:01<00:05, 15.32it/s][A
 19%|█▉        | 20/104 [00:01<00:05, 15.39it/s][A
 21%|██        | 22/104 [00:01<00:05, 15.22it/s][A
 23%|██▎       | 24/104 [00:01<00:05, 14.94it/s][A
 25%|██▌       | 26/104 [00:01<00:05, 15.17it/s][A
 27%|██▋       | 28/104 [00:01<00:05, 15.09it/s][A
 29%|██▉       | 30/104 [00:02<00:04, 14.94it/s][A
 31%|███       | 32/104 [00:02<00:04, 14.94it/s][A
 33%|███▎      | 34/104 [00:02<00:04, 15.03it/s][A
 35%|███▍      | 36/104 [00:02<00:04, 14.95it/s][A
 37%|███▋      | 38/104 [00:02<00:04, 14.97it/s][A
 38%|███▊      

cv accuracy: 56.905910735826296 3
training epoch  3



  0%|          | 4/985 [00:00<01:10, 13.96it/s][A
  1%|          | 6/985 [00:00<01:11, 13.73it/s][A
  1%|          | 8/985 [00:00<01:10, 13.87it/s][A
  1%|          | 10/985 [00:00<01:10, 13.84it/s][A
  1%|          | 12/985 [00:00<01:09, 14.07it/s][A
  1%|▏         | 14/985 [00:00<01:07, 14.29it/s][A
  2%|▏         | 16/985 [00:01<01:07, 14.36it/s][A
  2%|▏         | 18/985 [00:01<01:07, 14.33it/s][A
  2%|▏         | 20/985 [00:01<01:07, 14.39it/s][A
  2%|▏         | 22/985 [00:01<01:07, 14.35it/s][A
  2%|▏         | 24/985 [00:01<01:06, 14.38it/s][A
  3%|▎         | 26/985 [00:01<01:06, 14.36it/s][A
  3%|▎         | 28/985 [00:01<01:06, 14.43it/s][A
  3%|▎         | 30/985 [00:02<01:05, 14.48it/s][A
  3%|▎         | 32/985 [00:02<01:05, 14.61it/s][A
  3%|▎         | 34/985 [00:02<01:04, 14.69it/s][A
  4%|▎         | 36/985 [00:02<01:04, 14.77it/s][A
  4%|▍         | 38/985 [00:02<01:03, 14.81it/s][A
  4%|▍         | 40/985 [00:02<01:03, 14.81it/s][A
  4%|▍        

 63%|██████▎   | 622/985 [00:42<00:24, 14.69it/s][A
 63%|██████▎   | 624/985 [00:42<00:24, 14.70it/s][A
 64%|██████▎   | 626/985 [00:42<00:24, 14.70it/s][A
 64%|██████▍   | 628/985 [00:42<00:24, 14.70it/s][A
 64%|██████▍   | 630/985 [00:42<00:24, 14.71it/s][A
 64%|██████▍   | 632/985 [00:42<00:23, 14.71it/s][A
 64%|██████▍   | 634/985 [00:43<00:23, 14.72it/s][A
 65%|██████▍   | 636/985 [00:43<00:23, 14.73it/s][A
 65%|██████▍   | 638/985 [00:43<00:23, 14.73it/s][A
 65%|██████▍   | 640/985 [00:43<00:23, 14.73it/s][A
 65%|██████▌   | 642/985 [00:43<00:23, 14.74it/s][A
 65%|██████▌   | 644/985 [00:43<00:23, 14.73it/s][A
 66%|██████▌   | 646/985 [00:43<00:23, 14.73it/s][A
 66%|██████▌   | 648/985 [00:43<00:22, 14.73it/s][A
 66%|██████▌   | 650/985 [00:44<00:22, 14.74it/s][A
 66%|██████▌   | 652/985 [00:44<00:22, 14.74it/s][A
 66%|██████▋   | 654/985 [00:44<00:22, 14.73it/s][A
 67%|██████▋   | 656/985 [00:44<00:22, 14.73it/s][A
 67%|██████▋   | 658/985 [00:44<00:22, 14.73it

training accuracy: tensor(84.1124, device='cuda:0') train_loss tensor(3.5697, device='cuda:0', grad_fn=<NllLossBackward>) 4



  4%|▍         | 4/104 [00:00<00:06, 15.74it/s][A
  6%|▌         | 6/104 [00:00<00:06, 15.48it/s][A
  8%|▊         | 8/104 [00:00<00:06, 15.67it/s][A
 10%|▉         | 10/104 [00:00<00:06, 15.45it/s][A
 12%|█▏        | 12/104 [00:00<00:05, 15.70it/s][A
 13%|█▎        | 14/104 [00:00<00:05, 15.75it/s][A
 15%|█▌        | 16/104 [00:01<00:05, 15.64it/s][A
 17%|█▋        | 18/104 [00:01<00:05, 15.67it/s][A
 19%|█▉        | 20/104 [00:01<00:05, 15.54it/s][A
 21%|██        | 22/104 [00:01<00:05, 15.42it/s][A
 23%|██▎       | 24/104 [00:01<00:05, 15.26it/s][A
 25%|██▌       | 26/104 [00:01<00:05, 15.33it/s][A
 27%|██▋       | 28/104 [00:01<00:04, 15.25it/s][A
 29%|██▉       | 30/104 [00:01<00:04, 15.19it/s][A
 31%|███       | 32/104 [00:02<00:04, 15.27it/s][A
 33%|███▎      | 34/104 [00:02<00:04, 15.32it/s][A
 35%|███▍      | 36/104 [00:02<00:04, 15.26it/s][A
 37%|███▋      | 38/104 [00:02<00:04, 15.30it/s][A
 38%|███▊      | 40/104 [00:02<00:04, 15.36it/s][A
 40%|████     

cv accuracy: 56.30277442702051 4
training epoch  4



  0%|          | 4/985 [00:00<01:04, 15.10it/s][A
  1%|          | 6/985 [00:00<01:03, 15.35it/s][A
  1%|          | 8/985 [00:00<01:04, 15.21it/s][A
  1%|          | 10/985 [00:00<01:05, 15.00it/s][A
  1%|          | 12/985 [00:00<01:04, 15.00it/s][A
  1%|▏         | 14/985 [00:00<01:05, 14.87it/s][A
  2%|▏         | 16/985 [00:01<01:05, 14.76it/s][A
  2%|▏         | 18/985 [00:01<01:05, 14.87it/s][A
  2%|▏         | 20/985 [00:01<01:05, 14.84it/s][A
  2%|▏         | 22/985 [00:01<01:04, 14.89it/s][A
  2%|▏         | 24/985 [00:01<01:05, 14.67it/s][A
  3%|▎         | 26/985 [00:01<01:05, 14.70it/s][A
  3%|▎         | 28/985 [00:01<01:05, 14.64it/s][A
  3%|▎         | 30/985 [00:02<01:05, 14.61it/s][A
  3%|▎         | 32/985 [00:02<01:05, 14.57it/s][A
  3%|▎         | 34/985 [00:02<01:05, 14.61it/s][A
  4%|▎         | 36/985 [00:02<01:05, 14.58it/s][A
  4%|▍         | 38/985 [00:02<01:04, 14.68it/s][A
  4%|▍         | 40/985 [00:02<01:04, 14.70it/s][A
  4%|▍        

 63%|██████▎   | 622/985 [00:41<00:24, 14.88it/s][A
 63%|██████▎   | 624/985 [00:41<00:24, 14.88it/s][A
 64%|██████▎   | 626/985 [00:42<00:24, 14.88it/s][A
 64%|██████▍   | 628/985 [00:42<00:24, 14.87it/s][A
 64%|██████▍   | 630/985 [00:42<00:23, 14.88it/s][A
 64%|██████▍   | 632/985 [00:42<00:23, 14.88it/s][A
 64%|██████▍   | 634/985 [00:42<00:23, 14.89it/s][A
 65%|██████▍   | 636/985 [00:42<00:23, 14.88it/s][A
 65%|██████▍   | 638/985 [00:42<00:23, 14.87it/s][A
 65%|██████▍   | 640/985 [00:43<00:23, 14.88it/s][A
 65%|██████▌   | 642/985 [00:43<00:23, 14.88it/s][A
 65%|██████▌   | 644/985 [00:43<00:22, 14.88it/s][A
 66%|██████▌   | 646/985 [00:43<00:22, 14.89it/s][A
 66%|██████▌   | 648/985 [00:43<00:22, 14.89it/s][A
 66%|██████▌   | 650/985 [00:43<00:22, 14.89it/s][A
 66%|██████▌   | 652/985 [00:43<00:22, 14.90it/s][A
 66%|██████▋   | 654/985 [00:43<00:22, 14.90it/s][A
 67%|██████▋   | 656/985 [00:44<00:22, 14.90it/s][A
 67%|██████▋   | 658/985 [00:44<00:21, 14.91it

training accuracy: tensor(86.9673, device='cuda:0') train_loss tensor(4.5958, device='cuda:0', grad_fn=<NllLossBackward>) 6



  4%|▍         | 4/104 [00:00<00:05, 17.60it/s][A
  6%|▌         | 6/104 [00:00<00:05, 16.83it/s][A
  8%|▊         | 8/104 [00:00<00:05, 16.77it/s][A
 10%|▉         | 10/104 [00:00<00:05, 17.12it/s][A
 12%|█▏        | 12/104 [00:00<00:05, 17.33it/s][A
 13%|█▎        | 14/104 [00:00<00:05, 17.19it/s][A
 15%|█▌        | 16/104 [00:00<00:05, 17.35it/s][A
 17%|█▋        | 18/104 [00:01<00:05, 17.17it/s][A
 19%|█▉        | 20/104 [00:01<00:04, 17.07it/s][A
 21%|██        | 22/104 [00:01<00:04, 17.19it/s][A
 23%|██▎       | 24/104 [00:01<00:04, 17.10it/s][A
 25%|██▌       | 26/104 [00:01<00:04, 17.01it/s][A
 27%|██▋       | 28/104 [00:01<00:04, 17.13it/s][A
 29%|██▉       | 30/104 [00:01<00:04, 17.10it/s][A
 31%|███       | 32/104 [00:01<00:04, 17.18it/s][A
 33%|███▎      | 34/104 [00:01<00:04, 17.12it/s][A
 35%|███▍      | 36/104 [00:02<00:03, 17.20it/s][A
 37%|███▋      | 38/104 [00:02<00:03, 17.13it/s][A
 38%|███▊      | 40/104 [00:02<00:03, 17.19it/s][A
 40%|████     

cv accuracy: 56.393244873341374 6
training epoch  5



  0%|          | 4/985 [00:00<01:05, 14.97it/s][A
  1%|          | 6/985 [00:00<01:05, 15.01it/s][A
  1%|          | 8/985 [00:00<01:04, 15.25it/s][A
  1%|          | 10/985 [00:00<01:04, 15.09it/s][A
  1%|          | 12/985 [00:00<01:04, 15.05it/s][A
  1%|▏         | 14/985 [00:00<01:04, 15.08it/s][A
  2%|▏         | 16/985 [00:01<01:05, 14.84it/s][A
  2%|▏         | 18/985 [00:01<01:05, 14.83it/s][A
  2%|▏         | 20/985 [00:01<01:04, 15.00it/s][A
  2%|▏         | 22/985 [00:01<01:04, 14.95it/s][A
  2%|▏         | 24/985 [00:01<01:04, 14.95it/s][A
  3%|▎         | 26/985 [00:01<01:04, 14.98it/s][A
  3%|▎         | 28/985 [00:01<01:03, 15.01it/s][A
  3%|▎         | 30/985 [00:01<01:03, 15.10it/s][A
  3%|▎         | 32/985 [00:02<01:03, 15.03it/s][A
  3%|▎         | 34/985 [00:02<01:02, 15.12it/s][A
  4%|▎         | 36/985 [00:02<01:03, 15.05it/s][A
  4%|▍         | 38/985 [00:02<01:02, 15.06it/s][A
  4%|▍         | 40/985 [00:02<01:02, 15.07it/s][A
  4%|▍        

 63%|██████▎   | 622/985 [00:41<00:24, 14.89it/s][A
 63%|██████▎   | 624/985 [00:41<00:24, 14.89it/s][A
 64%|██████▎   | 626/985 [00:42<00:24, 14.89it/s][A
 64%|██████▍   | 628/985 [00:42<00:23, 14.88it/s][A
 64%|██████▍   | 630/985 [00:42<00:23, 14.88it/s][A
 64%|██████▍   | 632/985 [00:42<00:23, 14.88it/s][A
 64%|██████▍   | 634/985 [00:42<00:23, 14.87it/s][A
 65%|██████▍   | 636/985 [00:42<00:23, 14.87it/s][A
 65%|██████▍   | 638/985 [00:42<00:23, 14.88it/s][A
 65%|██████▍   | 640/985 [00:43<00:23, 14.88it/s][A
 65%|██████▌   | 642/985 [00:43<00:23, 14.88it/s][A
 65%|██████▌   | 644/985 [00:43<00:22, 14.87it/s][A
 66%|██████▌   | 646/985 [00:43<00:22, 14.88it/s][A
 66%|██████▌   | 648/985 [00:43<00:22, 14.88it/s][A
 66%|██████▌   | 650/985 [00:43<00:22, 14.89it/s][A
 66%|██████▌   | 652/985 [00:43<00:22, 14.89it/s][A
 66%|██████▋   | 654/985 [00:43<00:22, 14.89it/s][A
 67%|██████▋   | 656/985 [00:44<00:22, 14.89it/s][A
 67%|██████▋   | 658/985 [00:44<00:21, 14.89it

training accuracy: tensor(88.9298, device='cuda:0') train_loss tensor(1.9920, device='cuda:0', grad_fn=<NllLossBackward>) 7



  4%|▍         | 4/104 [00:00<00:06, 15.20it/s][A
  6%|▌         | 6/104 [00:00<00:07, 13.95it/s][A
  8%|▊         | 8/104 [00:00<00:06, 14.41it/s][A
 10%|▉         | 10/104 [00:00<00:06, 14.44it/s][A
 12%|█▏        | 12/104 [00:00<00:06, 15.01it/s][A
 13%|█▎        | 14/104 [00:00<00:05, 15.17it/s][A
 15%|█▌        | 16/104 [00:01<00:05, 15.35it/s][A
 17%|█▋        | 18/104 [00:01<00:05, 15.27it/s][A
 19%|█▉        | 20/104 [00:01<00:05, 15.37it/s][A
 21%|██        | 22/104 [00:01<00:05, 15.27it/s][A
 23%|██▎       | 24/104 [00:01<00:05, 15.38it/s][A
 25%|██▌       | 26/104 [00:01<00:05, 15.58it/s][A
 27%|██▋       | 28/104 [00:01<00:04, 15.51it/s][A
 29%|██▉       | 30/104 [00:01<00:04, 15.33it/s][A
 31%|███       | 32/104 [00:02<00:04, 15.35it/s][A
 33%|███▎      | 34/104 [00:02<00:04, 15.31it/s][A
 35%|███▍      | 36/104 [00:02<00:04, 15.24it/s][A
 37%|███▋      | 38/104 [00:02<00:04, 15.19it/s][A
 38%|███▊      | 40/104 [00:02<00:04, 15.24it/s][A
 40%|████     

cv accuracy: 67.73220747889023 7
training epoch  6



  0%|          | 4/985 [00:00<01:09, 14.17it/s][A
  1%|          | 6/985 [00:00<01:06, 14.64it/s][A
  1%|          | 8/985 [00:00<01:04, 15.03it/s][A
  1%|          | 10/985 [00:00<01:07, 14.42it/s][A
  1%|          | 12/985 [00:00<01:07, 14.50it/s][A
  1%|▏         | 14/985 [00:00<01:07, 14.41it/s][A
  2%|▏         | 16/985 [00:01<01:07, 14.27it/s][A
  2%|▏         | 18/985 [00:01<01:07, 14.40it/s][A
  2%|▏         | 20/985 [00:01<01:07, 14.39it/s][A
  2%|▏         | 22/985 [00:01<01:07, 14.37it/s][A
  2%|▏         | 24/985 [00:01<01:07, 14.34it/s][A
  3%|▎         | 26/985 [00:01<01:06, 14.40it/s][A
  3%|▎         | 28/985 [00:01<01:06, 14.48it/s][A
  3%|▎         | 30/985 [00:02<01:05, 14.51it/s][A
  3%|▎         | 32/985 [00:02<01:05, 14.56it/s][A
  3%|▎         | 34/985 [00:02<01:05, 14.54it/s][A
  4%|▎         | 36/985 [00:02<01:05, 14.59it/s][A
  4%|▍         | 38/985 [00:02<01:04, 14.60it/s][A
  4%|▍         | 40/985 [00:02<01:04, 14.65it/s][A
  4%|▍        

 63%|██████▎   | 622/985 [00:42<00:25, 14.50it/s][A
 63%|██████▎   | 624/985 [00:43<00:24, 14.49it/s][A
 64%|██████▎   | 626/985 [00:43<00:24, 14.50it/s][A
 64%|██████▍   | 628/985 [00:43<00:24, 14.50it/s][A
 64%|██████▍   | 630/985 [00:43<00:24, 14.50it/s][A
 64%|██████▍   | 632/985 [00:43<00:24, 14.49it/s][A
 64%|██████▍   | 634/985 [00:43<00:24, 14.49it/s][A
 65%|██████▍   | 636/985 [00:43<00:24, 14.49it/s][A
 65%|██████▍   | 638/985 [00:44<00:23, 14.50it/s][A
 65%|██████▍   | 640/985 [00:44<00:23, 14.49it/s][A
 65%|██████▌   | 642/985 [00:44<00:23, 14.49it/s][A
 65%|██████▌   | 644/985 [00:44<00:23, 14.49it/s][A
 66%|██████▌   | 646/985 [00:44<00:23, 14.49it/s][A
 66%|██████▌   | 648/985 [00:44<00:23, 14.49it/s][A
 66%|██████▌   | 650/985 [00:44<00:23, 14.49it/s][A
 66%|██████▌   | 652/985 [00:44<00:22, 14.49it/s][A
 66%|██████▋   | 654/985 [00:45<00:22, 14.49it/s][A
 67%|██████▋   | 656/985 [00:45<00:22, 14.48it/s][A
 67%|██████▋   | 658/985 [00:45<00:22, 14.46it

training accuracy: tensor(90.0953, device='cuda:0') train_loss tensor(4.6780, device='cuda:0', grad_fn=<NllLossBackward>) 8



  2%|▏         | 2/104 [00:00<00:07, 14.14it/s][A
  4%|▍         | 4/104 [00:00<00:06, 15.25it/s][A
  5%|▍         | 5/104 [00:00<00:07, 13.70it/s][A
  7%|▋         | 7/104 [00:00<00:06, 14.82it/s][A
  9%|▊         | 9/104 [00:00<00:06, 15.25it/s][A
 11%|█         | 11/104 [00:00<00:06, 15.47it/s][A
 12%|█▎        | 13/104 [00:00<00:05, 15.29it/s][A
 14%|█▍        | 15/104 [00:00<00:05, 15.41it/s][A
 16%|█▋        | 17/104 [00:01<00:05, 15.45it/s][A
 18%|█▊        | 19/104 [00:01<00:05, 15.40it/s][A
 20%|██        | 21/104 [00:01<00:05, 15.28it/s][A
 22%|██▏       | 23/104 [00:01<00:05, 15.14it/s][A
 24%|██▍       | 25/104 [00:01<00:05, 15.06it/s][A
 26%|██▌       | 27/104 [00:01<00:05, 15.16it/s][A
 28%|██▊       | 29/104 [00:01<00:04, 15.26it/s][A
 30%|██▉       | 31/104 [00:02<00:04, 15.24it/s][A
 32%|███▏      | 33/104 [00:02<00:04, 15.18it/s][A
 34%|███▎      | 35/104 [00:02<00:04, 15.24it/s][A
 36%|███▌      | 37/104 [00:02<00:04, 15.31it/s][A
 38%|███▊      |

cv accuracy: 67.37032569360676 8
training epoch  7



  0%|          | 4/985 [00:00<01:22, 11.94it/s][A
  1%|          | 6/985 [00:00<01:17, 12.65it/s][A
  1%|          | 7/985 [00:00<01:21, 12.05it/s][A
  1%|          | 9/985 [00:00<01:18, 12.47it/s][A
  1%|          | 11/985 [00:00<01:16, 12.69it/s][A
  1%|▏         | 13/985 [00:01<01:15, 12.88it/s][A
  2%|▏         | 15/985 [00:01<01:13, 13.28it/s][A
  2%|▏         | 17/985 [00:01<01:11, 13.60it/s][A
  2%|▏         | 19/985 [00:01<01:10, 13.76it/s][A
  2%|▏         | 21/985 [00:01<01:09, 13.83it/s][A
  2%|▏         | 23/985 [00:01<01:09, 13.92it/s][A
  3%|▎         | 25/985 [00:01<01:08, 14.05it/s][A
  3%|▎         | 27/985 [00:01<01:07, 14.10it/s][A
  3%|▎         | 29/985 [00:02<01:07, 14.17it/s][A
  3%|▎         | 31/985 [00:02<01:07, 14.21it/s][A
  3%|▎         | 33/985 [00:02<01:06, 14.29it/s][A
  4%|▎         | 35/985 [00:02<01:06, 14.27it/s][A
  4%|▍         | 37/985 [00:02<01:05, 14.39it/s][A
  4%|▍         | 39/985 [00:02<01:05, 14.41it/s][A
  4%|▍         

 63%|██████▎   | 621/985 [00:42<00:24, 14.76it/s][A
 63%|██████▎   | 623/985 [00:42<00:24, 14.77it/s][A
 63%|██████▎   | 625/985 [00:42<00:24, 14.77it/s][A
 64%|██████▎   | 627/985 [00:42<00:24, 14.77it/s][A
 64%|██████▍   | 629/985 [00:42<00:24, 14.77it/s][A
 64%|██████▍   | 631/985 [00:42<00:23, 14.77it/s][A
 64%|██████▍   | 633/985 [00:42<00:23, 14.77it/s][A
 64%|██████▍   | 635/985 [00:42<00:23, 14.78it/s][A
 65%|██████▍   | 637/985 [00:43<00:23, 14.78it/s][A
 65%|██████▍   | 639/985 [00:43<00:23, 14.78it/s][A
 65%|██████▌   | 641/985 [00:43<00:23, 14.79it/s][A
 65%|██████▌   | 643/985 [00:43<00:23, 14.79it/s][A
 65%|██████▌   | 645/985 [00:43<00:22, 14.79it/s][A
 66%|██████▌   | 647/985 [00:43<00:22, 14.79it/s][A
 66%|██████▌   | 649/985 [00:43<00:22, 14.80it/s][A
 66%|██████▌   | 651/985 [00:43<00:22, 14.80it/s][A
 66%|██████▋   | 653/985 [00:44<00:22, 14.80it/s][A
 66%|██████▋   | 655/985 [00:44<00:22, 14.80it/s][A
 67%|██████▋   | 657/985 [00:44<00:22, 14.80it

training accuracy: tensor(90.8479, device='cuda:0') train_loss tensor(2.3231, device='cuda:0', grad_fn=<NllLossBackward>) 9



  2%|▏         | 2/104 [00:00<00:07, 13.61it/s][A
  4%|▍         | 4/104 [00:00<00:06, 15.74it/s][A
  6%|▌         | 6/104 [00:00<00:06, 16.09it/s][A
  8%|▊         | 8/104 [00:00<00:06, 15.18it/s][A
 10%|▉         | 10/104 [00:00<00:06, 15.02it/s][A
 12%|█▏        | 12/104 [00:00<00:06, 14.96it/s][A
 13%|█▎        | 14/104 [00:00<00:06, 14.95it/s][A
 15%|█▌        | 16/104 [00:01<00:05, 14.74it/s][A
 17%|█▋        | 18/104 [00:01<00:05, 14.51it/s][A
 19%|█▉        | 20/104 [00:01<00:05, 14.40it/s][A
 21%|██        | 22/104 [00:01<00:05, 14.58it/s][A
 23%|██▎       | 24/104 [00:01<00:05, 14.71it/s][A
 25%|██▌       | 26/104 [00:01<00:05, 14.81it/s][A
 27%|██▋       | 28/104 [00:01<00:05, 14.53it/s][A
 29%|██▉       | 30/104 [00:02<00:05, 14.67it/s][A
 31%|███       | 32/104 [00:02<00:04, 14.69it/s][A
 33%|███▎      | 34/104 [00:02<00:04, 14.60it/s][A
 35%|███▍      | 36/104 [00:02<00:04, 14.70it/s][A
 37%|███▋      | 38/104 [00:02<00:04, 14.65it/s][A
 38%|███▊      

cv accuracy: 67.58142340168878 9
training epoch  8



  0%|          | 3/985 [00:00<01:20, 12.26it/s][A
  1%|          | 5/985 [00:00<01:15, 13.00it/s][A
  1%|          | 7/985 [00:00<01:11, 13.58it/s][A
  1%|          | 9/985 [00:00<01:11, 13.67it/s][A
  1%|          | 11/985 [00:00<01:08, 14.12it/s][A
  1%|▏         | 13/985 [00:00<01:08, 14.16it/s][A
  2%|▏         | 15/985 [00:01<01:07, 14.42it/s][A
  2%|▏         | 17/985 [00:01<01:06, 14.53it/s][A
  2%|▏         | 19/985 [00:01<01:06, 14.46it/s][A
  2%|▏         | 21/985 [00:01<01:06, 14.56it/s][A
  2%|▏         | 23/985 [00:01<01:06, 14.48it/s][A
  3%|▎         | 25/985 [00:01<01:06, 14.48it/s][A
  3%|▎         | 27/985 [00:01<01:05, 14.55it/s][A
  3%|▎         | 29/985 [00:01<01:05, 14.54it/s][A
  3%|▎         | 31/985 [00:02<01:05, 14.57it/s][A
  3%|▎         | 33/985 [00:02<01:05, 14.52it/s][A
  4%|▎         | 35/985 [00:02<01:06, 14.39it/s][A
  4%|▍         | 37/985 [00:02<01:05, 14.40it/s][A
  4%|▍         | 39/985 [00:02<01:05, 14.49it/s][A
  4%|▍         

 63%|██████▎   | 621/985 [00:40<00:23, 15.42it/s][A
 63%|██████▎   | 623/985 [00:40<00:23, 15.43it/s][A
 63%|██████▎   | 625/985 [00:40<00:23, 15.43it/s][A
 64%|██████▎   | 627/985 [00:40<00:23, 15.43it/s][A
 64%|██████▍   | 629/985 [00:40<00:23, 15.43it/s][A
 64%|██████▍   | 631/985 [00:40<00:22, 15.43it/s][A
 64%|██████▍   | 633/985 [00:41<00:22, 15.43it/s][A
 64%|██████▍   | 635/985 [00:41<00:22, 15.43it/s][A
 65%|██████▍   | 637/985 [00:41<00:22, 15.44it/s][A
 65%|██████▍   | 639/985 [00:41<00:22, 15.43it/s][A
 65%|██████▌   | 641/985 [00:41<00:22, 15.42it/s][A
 65%|██████▌   | 643/985 [00:41<00:22, 15.42it/s][A
 65%|██████▌   | 645/985 [00:41<00:22, 15.42it/s][A
 66%|██████▌   | 647/985 [00:41<00:21, 15.42it/s][A
 66%|██████▌   | 649/985 [00:42<00:21, 15.42it/s][A
 66%|██████▌   | 651/985 [00:42<00:21, 15.43it/s][A
 66%|██████▋   | 653/985 [00:42<00:21, 15.42it/s][A
 66%|██████▋   | 655/985 [00:42<00:21, 15.43it/s][A
 67%|██████▋   | 657/985 [00:42<00:21, 15.42it

training accuracy: tensor(91.8514, device='cuda:0') train_loss tensor(4.6192, device='cuda:0', grad_fn=<NllLossBackward>) 10



  4%|▍         | 4/104 [00:00<00:05, 18.50it/s][A
  6%|▌         | 6/104 [00:00<00:05, 17.81it/s][A
  8%|▊         | 8/104 [00:00<00:05, 17.98it/s][A
 10%|▉         | 10/104 [00:00<00:05, 18.07it/s][A
 12%|█▏        | 12/104 [00:00<00:05, 17.68it/s][A
 13%|█▎        | 14/104 [00:00<00:05, 17.80it/s][A
 15%|█▌        | 16/104 [00:00<00:05, 17.59it/s][A
 17%|█▋        | 18/104 [00:01<00:04, 17.70it/s][A
 19%|█▉        | 20/104 [00:01<00:04, 17.79it/s][A
 21%|██        | 22/104 [00:01<00:04, 17.70it/s][A
 23%|██▎       | 24/104 [00:01<00:04, 17.78it/s][A
 25%|██▌       | 26/104 [00:01<00:04, 17.85it/s][A
 27%|██▋       | 28/104 [00:01<00:04, 17.91it/s][A
 29%|██▉       | 30/104 [00:01<00:04, 17.96it/s][A
 31%|███       | 32/104 [00:01<00:04, 17.87it/s][A
 33%|███▎      | 34/104 [00:01<00:03, 17.90it/s][A
 35%|███▍      | 36/104 [00:02<00:03, 17.81it/s][A
 37%|███▋      | 38/104 [00:02<00:03, 17.85it/s][A
 38%|███▊      | 40/104 [00:02<00:03, 17.87it/s][A
 40%|████     

cv accuracy: 66.04342581423401 10
training epoch  9



  0%|          | 4/985 [00:00<00:59, 16.45it/s][A
  1%|          | 6/985 [00:00<00:59, 16.37it/s][A
  1%|          | 8/985 [00:00<00:59, 16.33it/s][A
  1%|          | 10/985 [00:00<01:02, 15.60it/s][A
  1%|          | 12/985 [00:00<01:01, 15.70it/s][A
  1%|▏         | 14/985 [00:00<01:04, 15.16it/s][A
  2%|▏         | 16/985 [00:01<01:03, 15.27it/s][A
  2%|▏         | 18/985 [00:01<01:03, 15.27it/s][A
  2%|▏         | 20/985 [00:01<01:02, 15.39it/s][A
  2%|▏         | 22/985 [00:01<01:02, 15.48it/s][A
  2%|▏         | 24/985 [00:01<01:01, 15.55it/s][A
  3%|▎         | 26/985 [00:01<01:01, 15.64it/s][A
  3%|▎         | 28/985 [00:01<01:01, 15.67it/s][A
  3%|▎         | 30/985 [00:01<01:00, 15.70it/s][A
  3%|▎         | 32/985 [00:02<01:00, 15.75it/s][A
  3%|▎         | 34/985 [00:02<01:00, 15.81it/s][A
  4%|▎         | 36/985 [00:02<01:00, 15.75it/s][A
  4%|▍         | 38/985 [00:02<00:59, 15.79it/s][A
  4%|▍         | 40/985 [00:02<01:00, 15.70it/s][A
  4%|▍        

 63%|██████▎   | 622/985 [00:41<00:23, 15.13it/s][A
 63%|██████▎   | 624/985 [00:41<00:23, 15.13it/s][A
 64%|██████▎   | 626/985 [00:41<00:23, 15.13it/s][A
 64%|██████▍   | 628/985 [00:41<00:23, 15.13it/s][A
 64%|██████▍   | 630/985 [00:41<00:23, 15.13it/s][A
 64%|██████▍   | 632/985 [00:41<00:23, 15.13it/s][A
 64%|██████▍   | 634/985 [00:41<00:23, 15.11it/s][A
 65%|██████▍   | 636/985 [00:42<00:23, 15.12it/s][A
 65%|██████▍   | 638/985 [00:42<00:22, 15.12it/s][A
 65%|██████▍   | 640/985 [00:42<00:22, 15.12it/s][A
 65%|██████▌   | 642/985 [00:42<00:22, 15.12it/s][A
 65%|██████▌   | 644/985 [00:42<00:22, 15.12it/s][A
 66%|██████▌   | 646/985 [00:42<00:22, 15.12it/s][A
 66%|██████▌   | 648/985 [00:42<00:22, 15.12it/s][A
 66%|██████▌   | 650/985 [00:43<00:22, 15.12it/s][A
 66%|██████▌   | 652/985 [00:43<00:22, 15.12it/s][A
 66%|██████▋   | 654/985 [00:43<00:21, 15.12it/s][A
 67%|██████▋   | 656/985 [00:43<00:21, 15.12it/s][A
 67%|██████▋   | 658/985 [00:43<00:21, 15.11it

training accuracy: tensor(92.2547, device='cuda:0') train_loss tensor(2.3065, device='cuda:0', grad_fn=<NllLossBackward>) 12



  4%|▍         | 4/104 [00:00<00:06, 15.01it/s][A
  6%|▌         | 6/104 [00:00<00:06, 14.77it/s][A
  8%|▊         | 8/104 [00:00<00:06, 13.81it/s][A
 10%|▉         | 10/104 [00:00<00:06, 14.22it/s][A
 12%|█▏        | 12/104 [00:00<00:06, 14.81it/s][A
 13%|█▎        | 14/104 [00:00<00:06, 14.81it/s][A
 15%|█▌        | 16/104 [00:01<00:05, 15.21it/s][A
 17%|█▋        | 18/104 [00:01<00:05, 15.14it/s][A
 19%|█▉        | 20/104 [00:01<00:05, 15.27it/s][A
 21%|██        | 22/104 [00:01<00:05, 15.16it/s][A
 23%|██▎       | 24/104 [00:01<00:05, 15.08it/s][A
 25%|██▌       | 26/104 [00:01<00:05, 14.92it/s][A
 27%|██▋       | 28/104 [00:01<00:05, 14.98it/s][A
 29%|██▉       | 30/104 [00:02<00:04, 14.93it/s][A
 31%|███       | 32/104 [00:02<00:04, 14.90it/s][A
 33%|███▎      | 34/104 [00:02<00:04, 14.95it/s][A
 35%|███▍      | 36/104 [00:02<00:04, 14.81it/s][A
 37%|███▋      | 38/104 [00:02<00:04, 14.74it/s][A
 38%|███▊      | 40/104 [00:02<00:04, 14.60it/s][A
 40%|████     

cv accuracy: 70.05428226779252 12
training epoch  10



  0%|          | 4/985 [00:00<01:13, 13.36it/s][A
  1%|          | 6/985 [00:00<01:11, 13.62it/s][A
  1%|          | 8/985 [00:00<01:09, 14.08it/s][A
  1%|          | 10/985 [00:00<01:09, 14.08it/s][A
  1%|          | 12/985 [00:00<01:08, 14.24it/s][A
  1%|▏         | 14/985 [00:00<01:06, 14.50it/s][A
  2%|▏         | 16/985 [00:01<01:06, 14.61it/s][A
  2%|▏         | 18/985 [00:01<01:06, 14.53it/s][A
  2%|▏         | 20/985 [00:01<01:06, 14.59it/s][A
  2%|▏         | 22/985 [00:01<01:06, 14.55it/s][A
  2%|▏         | 24/985 [00:01<01:05, 14.63it/s][A
  3%|▎         | 26/985 [00:01<01:05, 14.66it/s][A
  3%|▎         | 28/985 [00:01<01:05, 14.70it/s][A
  3%|▎         | 30/985 [00:02<01:04, 14.74it/s][A
  3%|▎         | 32/985 [00:02<01:04, 14.76it/s][A
  3%|▎         | 34/985 [00:02<01:05, 14.49it/s][A
  4%|▎         | 36/985 [00:02<01:05, 14.47it/s][A
  4%|▍         | 38/985 [00:02<01:05, 14.49it/s][A
  4%|▍         | 40/985 [00:02<01:04, 14.54it/s][A
  4%|▍        

 63%|██████▎   | 622/985 [00:42<00:24, 14.77it/s][A
 63%|██████▎   | 624/985 [00:42<00:24, 14.77it/s][A
 64%|██████▎   | 626/985 [00:42<00:24, 14.77it/s][A
 64%|██████▍   | 628/985 [00:42<00:24, 14.77it/s][A
 64%|██████▍   | 630/985 [00:42<00:24, 14.78it/s][A
 64%|██████▍   | 632/985 [00:42<00:23, 14.78it/s][A
 64%|██████▍   | 634/985 [00:42<00:23, 14.78it/s][A
 65%|██████▍   | 636/985 [00:43<00:23, 14.77it/s][A
 65%|██████▍   | 638/985 [00:43<00:23, 14.77it/s][A
 65%|██████▍   | 640/985 [00:43<00:23, 14.76it/s][A
 65%|██████▌   | 642/985 [00:43<00:23, 14.75it/s][A
 65%|██████▌   | 644/985 [00:43<00:23, 14.75it/s][A
 66%|██████▌   | 646/985 [00:43<00:22, 14.75it/s][A
 66%|██████▌   | 648/985 [00:43<00:22, 14.74it/s][A
 66%|██████▌   | 650/985 [00:44<00:22, 14.75it/s][A
 66%|██████▌   | 652/985 [00:44<00:22, 14.74it/s][A
 66%|██████▋   | 654/985 [00:44<00:22, 14.74it/s][A
 67%|██████▋   | 656/985 [00:44<00:22, 14.73it/s][A
 67%|██████▋   | 658/985 [00:44<00:22, 14.74it

training accuracy: tensor(94.6015, device='cuda:0') train_loss tensor(4.9400, device='cuda:0', grad_fn=<NllLossBackward>) 13



  4%|▍         | 4/104 [00:00<00:05, 17.49it/s][A
  6%|▌         | 6/104 [00:00<00:05, 16.81it/s][A
  8%|▊         | 8/104 [00:00<00:05, 17.20it/s][A
 10%|▉         | 10/104 [00:00<00:05, 17.07it/s][A
 12%|█▏        | 12/104 [00:00<00:05, 17.32it/s][A
 13%|█▎        | 14/104 [00:00<00:05, 17.25it/s][A
 15%|█▌        | 16/104 [00:00<00:05, 17.39it/s][A
 17%|█▋        | 18/104 [00:01<00:04, 17.29it/s][A
 19%|█▉        | 20/104 [00:01<00:04, 17.41it/s][A
 21%|██        | 22/104 [00:01<00:04, 17.35it/s][A
 23%|██▎       | 24/104 [00:01<00:04, 17.18it/s][A
 25%|██▌       | 26/104 [00:01<00:04, 17.11it/s][A
 27%|██▋       | 28/104 [00:01<00:04, 17.19it/s][A
 29%|██▉       | 30/104 [00:01<00:04, 17.16it/s][A
 31%|███       | 32/104 [00:01<00:04, 17.06it/s][A
 33%|███▎      | 34/104 [00:01<00:04, 17.02it/s][A
 35%|███▍      | 36/104 [00:02<00:03, 17.10it/s][A
 37%|███▋      | 38/104 [00:02<00:03, 17.08it/s][A
 38%|███▊      | 40/104 [00:02<00:03, 16.99it/s][A
 40%|████     

cv accuracy: 70.71773220747889 13
training epoch  11



  0%|          | 3/985 [00:00<01:12, 13.61it/s][A
  1%|          | 5/985 [00:00<01:14, 13.13it/s][A
  1%|          | 7/985 [00:00<01:13, 13.22it/s][A
  1%|          | 9/985 [00:00<01:10, 13.83it/s][A
  1%|          | 11/985 [00:00<01:09, 13.99it/s][A
  1%|▏         | 13/985 [00:00<01:08, 14.21it/s][A
  2%|▏         | 15/985 [00:01<01:07, 14.45it/s][A
  2%|▏         | 17/985 [00:01<01:07, 14.42it/s][A
  2%|▏         | 19/985 [00:01<01:06, 14.61it/s][A
  2%|▏         | 21/985 [00:01<01:06, 14.59it/s][A
  2%|▏         | 23/985 [00:01<01:05, 14.62it/s][A
  3%|▎         | 25/985 [00:01<01:05, 14.65it/s][A
  3%|▎         | 27/985 [00:01<01:04, 14.78it/s][A
  3%|▎         | 29/985 [00:01<01:04, 14.88it/s][A
  3%|▎         | 31/985 [00:02<01:03, 14.99it/s][A
  3%|▎         | 33/985 [00:02<01:03, 15.08it/s][A
  4%|▎         | 35/985 [00:02<01:03, 15.03it/s][A
  4%|▍         | 37/985 [00:02<01:02, 15.11it/s][A
  4%|▍         | 39/985 [00:02<01:02, 15.15it/s][A
  4%|▍         

 63%|██████▎   | 621/985 [00:41<00:24, 14.94it/s][A
 63%|██████▎   | 623/985 [00:41<00:24, 14.93it/s][A
 63%|██████▎   | 625/985 [00:41<00:24, 14.93it/s][A
 64%|██████▎   | 627/985 [00:41<00:23, 14.93it/s][A
 64%|██████▍   | 629/985 [00:42<00:23, 14.92it/s][A
 64%|██████▍   | 631/985 [00:42<00:23, 14.92it/s][A
 64%|██████▍   | 633/985 [00:42<00:23, 14.92it/s][A
 64%|██████▍   | 635/985 [00:42<00:23, 14.92it/s][A
 65%|██████▍   | 637/985 [00:42<00:23, 14.93it/s][A
 65%|██████▍   | 639/985 [00:42<00:23, 14.92it/s][A
 65%|██████▌   | 641/985 [00:42<00:23, 14.92it/s][A
 65%|██████▌   | 643/985 [00:43<00:22, 14.92it/s][A
 65%|██████▌   | 645/985 [00:43<00:22, 14.93it/s][A
 66%|██████▌   | 647/985 [00:43<00:22, 14.93it/s][A
 66%|██████▌   | 649/985 [00:43<00:22, 14.92it/s][A
 66%|██████▌   | 651/985 [00:43<00:22, 14.93it/s][A
 66%|██████▋   | 653/985 [00:43<00:22, 14.93it/s][A
 66%|██████▋   | 655/985 [00:43<00:22, 14.93it/s][A
 67%|██████▋   | 657/985 [00:44<00:21, 14.93it

training accuracy: tensor(95.1762, device='cuda:0') train_loss tensor(4.7132, device='cuda:0', grad_fn=<NllLossBackward>) 14



  4%|▍         | 4/104 [00:00<00:05, 18.63it/s][A
  6%|▌         | 6/104 [00:00<00:05, 17.95it/s][A
  8%|▊         | 8/104 [00:00<00:05, 18.08it/s][A
 10%|▉         | 10/104 [00:00<00:05, 17.65it/s][A
 12%|█▏        | 12/104 [00:00<00:05, 17.82it/s][A
 13%|█▎        | 14/104 [00:00<00:05, 17.53it/s][A
 15%|█▌        | 16/104 [00:00<00:05, 17.21it/s][A
 17%|█▋        | 18/104 [00:01<00:04, 17.28it/s][A
 19%|█▉        | 20/104 [00:01<00:04, 17.09it/s][A
 21%|██        | 22/104 [00:01<00:04, 16.94it/s][A
 23%|██▎       | 24/104 [00:01<00:04, 16.53it/s][A
 25%|██▌       | 26/104 [00:01<00:04, 16.66it/s][A
 27%|██▋       | 28/104 [00:01<00:04, 16.52it/s][A
 29%|██▉       | 30/104 [00:01<00:04, 16.31it/s][A
 31%|███       | 32/104 [00:01<00:04, 16.08it/s][A
 33%|███▎      | 34/104 [00:02<00:04, 16.15it/s][A
 35%|███▍      | 36/104 [00:02<00:04, 15.89it/s][A
 37%|███▋      | 38/104 [00:02<00:04, 15.78it/s][A
 38%|███▊      | 40/104 [00:02<00:04, 15.81it/s][A
 40%|████     

cv accuracy: 71.10977080820265 14
training epoch  12



  0%|          | 3/985 [00:00<01:16, 12.84it/s][A
  1%|          | 5/985 [00:00<01:14, 13.22it/s][A
  1%|          | 7/985 [00:00<01:12, 13.44it/s][A
  1%|          | 9/985 [00:00<01:10, 13.79it/s][A
  1%|          | 11/985 [00:00<01:08, 14.23it/s][A
  1%|▏         | 13/985 [00:00<01:08, 14.22it/s][A
  2%|▏         | 15/985 [00:01<01:07, 14.36it/s][A
  2%|▏         | 17/985 [00:01<01:06, 14.56it/s][A
  2%|▏         | 19/985 [00:01<01:06, 14.53it/s][A
  2%|▏         | 21/985 [00:01<01:06, 14.60it/s][A
  2%|▏         | 23/985 [00:01<01:05, 14.64it/s][A
  3%|▎         | 25/985 [00:01<01:05, 14.69it/s][A
  3%|▎         | 27/985 [00:01<01:04, 14.82it/s][A
  3%|▎         | 29/985 [00:01<01:04, 14.78it/s][A
  3%|▎         | 31/985 [00:02<01:04, 14.79it/s][A
  3%|▎         | 33/985 [00:02<01:03, 14.89it/s][A
  4%|▎         | 35/985 [00:02<01:03, 14.85it/s][A
  4%|▍         | 37/985 [00:02<01:03, 14.87it/s][A
  4%|▍         | 39/985 [00:02<01:03, 14.89it/s][A
  4%|▍         

 63%|██████▎   | 621/985 [00:42<00:24, 14.74it/s][A
 63%|██████▎   | 623/985 [00:42<00:24, 14.74it/s][A
 63%|██████▎   | 625/985 [00:42<00:24, 14.74it/s][A
 64%|██████▎   | 627/985 [00:42<00:24, 14.74it/s][A
 64%|██████▍   | 629/985 [00:42<00:24, 14.74it/s][A
 64%|██████▍   | 631/985 [00:42<00:24, 14.74it/s][A
 64%|██████▍   | 633/985 [00:42<00:23, 14.75it/s][A
 64%|██████▍   | 635/985 [00:43<00:23, 14.75it/s][A
 65%|██████▍   | 637/985 [00:43<00:23, 14.75it/s][A
 65%|██████▍   | 639/985 [00:43<00:23, 14.75it/s][A
 65%|██████▌   | 641/985 [00:43<00:23, 14.75it/s][A
 65%|██████▌   | 643/985 [00:43<00:23, 14.75it/s][A
 65%|██████▌   | 645/985 [00:43<00:23, 14.75it/s][A
 66%|██████▌   | 647/985 [00:43<00:22, 14.75it/s][A
 66%|██████▌   | 649/985 [00:44<00:22, 14.75it/s][A
 66%|██████▌   | 651/985 [00:44<00:22, 14.75it/s][A
 66%|██████▋   | 653/985 [00:44<00:22, 14.75it/s][A
 66%|██████▋   | 655/985 [00:44<00:22, 14.76it/s][A
 67%|██████▋   | 657/985 [00:44<00:22, 14.76it

training accuracy: tensor(95.3350, device='cuda:0') train_loss tensor(6.0430, device='cuda:0', grad_fn=<NllLossBackward>) 15



  4%|▍         | 4/104 [00:00<00:05, 17.76it/s][A
  6%|▌         | 6/104 [00:00<00:05, 17.96it/s][A
  8%|▊         | 8/104 [00:00<00:05, 16.74it/s][A
 10%|▉         | 10/104 [00:00<00:05, 16.61it/s][A
 12%|█▏        | 12/104 [00:00<00:05, 16.29it/s][A
 13%|█▎        | 14/104 [00:00<00:05, 16.11it/s][A
 15%|█▌        | 16/104 [00:01<00:05, 15.71it/s][A
 17%|█▋        | 18/104 [00:01<00:05, 15.24it/s][A
 19%|█▉        | 20/104 [00:01<00:05, 15.22it/s][A
 21%|██        | 22/104 [00:01<00:05, 15.12it/s][A
 23%|██▎       | 24/104 [00:01<00:05, 15.00it/s][A
 25%|██▌       | 26/104 [00:01<00:05, 14.79it/s][A
 27%|██▋       | 28/104 [00:01<00:05, 14.78it/s][A
 29%|██▉       | 30/104 [00:02<00:05, 14.66it/s][A
 31%|███       | 32/104 [00:02<00:04, 14.86it/s][A
 33%|███▎      | 34/104 [00:02<00:04, 14.92it/s][A
 35%|███▍      | 36/104 [00:02<00:04, 15.01it/s][A
 37%|███▋      | 38/104 [00:02<00:04, 15.09it/s][A
 38%|███▊      | 40/104 [00:02<00:04, 15.14it/s][A
 40%|████     

cv accuracy: 70.50663449939687 15
training epoch  13



  0%|          | 4/985 [00:00<01:11, 13.67it/s][A
  1%|          | 6/985 [00:00<01:10, 13.91it/s][A
  1%|          | 8/985 [00:00<01:07, 14.52it/s][A
  1%|          | 10/985 [00:00<01:06, 14.64it/s][A
  1%|          | 12/985 [00:00<01:05, 14.77it/s][A
  1%|▏         | 14/985 [00:00<01:05, 14.82it/s][A
  2%|▏         | 16/985 [00:01<01:04, 15.01it/s][A
  2%|▏         | 18/985 [00:01<01:05, 14.88it/s][A
  2%|▏         | 20/985 [00:01<01:06, 14.46it/s][A
  2%|▏         | 22/985 [00:01<01:06, 14.46it/s][A
  2%|▏         | 24/985 [00:01<01:06, 14.52it/s][A
  3%|▎         | 26/985 [00:01<01:05, 14.57it/s][A
  3%|▎         | 28/985 [00:01<01:05, 14.61it/s][A
  3%|▎         | 30/985 [00:02<01:05, 14.58it/s][A
  3%|▎         | 32/985 [00:02<01:05, 14.64it/s][A
  3%|▎         | 34/985 [00:02<01:04, 14.73it/s][A
  4%|▎         | 36/985 [00:02<01:04, 14.81it/s][A
  4%|▍         | 38/985 [00:02<01:04, 14.76it/s][A
  4%|▍         | 40/985 [00:02<01:03, 14.80it/s][A
  4%|▍        

 63%|██████▎   | 622/985 [00:42<00:24, 14.73it/s][A
 63%|██████▎   | 624/985 [00:42<00:24, 14.74it/s][A
 64%|██████▎   | 626/985 [00:42<00:24, 14.74it/s][A
 64%|██████▍   | 628/985 [00:42<00:24, 14.75it/s][A
 64%|██████▍   | 630/985 [00:42<00:24, 14.75it/s][A
 64%|██████▍   | 632/985 [00:42<00:23, 14.76it/s][A
 64%|██████▍   | 634/985 [00:42<00:23, 14.76it/s][A
 65%|██████▍   | 636/985 [00:43<00:23, 14.77it/s][A
 65%|██████▍   | 638/985 [00:43<00:23, 14.77it/s][A
 65%|██████▍   | 640/985 [00:43<00:23, 14.77it/s][A
 65%|██████▌   | 642/985 [00:43<00:23, 14.77it/s][A
 65%|██████▌   | 644/985 [00:43<00:23, 14.78it/s][A
 66%|██████▌   | 646/985 [00:43<00:22, 14.78it/s][A
 66%|██████▌   | 648/985 [00:43<00:22, 14.78it/s][A
 66%|██████▌   | 650/985 [00:43<00:22, 14.78it/s][A
 66%|██████▌   | 652/985 [00:44<00:22, 14.79it/s][A
 66%|██████▋   | 654/985 [00:44<00:22, 14.79it/s][A
 67%|██████▋   | 656/985 [00:44<00:22, 14.79it/s][A
 67%|██████▋   | 658/985 [00:44<00:22, 14.79it

training accuracy: tensor(95.3604, device='cuda:0') train_loss tensor(4.5057, device='cuda:0', grad_fn=<NllLossBackward>) 16



  4%|▍         | 4/104 [00:00<00:05, 18.57it/s][A
  6%|▌         | 6/104 [00:00<00:05, 18.49it/s][A
  8%|▊         | 8/104 [00:00<00:05, 18.46it/s][A
 10%|▉         | 10/104 [00:00<00:05, 18.50it/s][A
 12%|█▏        | 12/104 [00:00<00:05, 18.17it/s][A
 13%|█▎        | 14/104 [00:00<00:04, 18.23it/s][A
 15%|█▌        | 16/104 [00:00<00:04, 18.29it/s][A
 17%|█▋        | 18/104 [00:00<00:04, 18.30it/s][A
 19%|█▉        | 20/104 [00:01<00:04, 18.32it/s][A
 21%|██        | 22/104 [00:01<00:04, 18.34it/s][A
 23%|██▎       | 24/104 [00:01<00:04, 18.36it/s][A
 25%|██▌       | 26/104 [00:01<00:04, 18.38it/s][A
 27%|██▋       | 28/104 [00:01<00:04, 18.39it/s][A
 29%|██▉       | 30/104 [00:01<00:04, 18.41it/s][A
 31%|███       | 32/104 [00:01<00:03, 18.26it/s][A
 33%|███▎      | 34/104 [00:01<00:03, 18.29it/s][A
 35%|███▍      | 36/104 [00:01<00:03, 18.30it/s][A
 37%|███▋      | 38/104 [00:02<00:03, 18.12it/s][A
 38%|███▊      | 40/104 [00:02<00:03, 17.85it/s][A
 40%|████     

cv accuracy: 70.65741857659832 17
training epoch  14



  0%|          | 4/985 [00:00<00:59, 16.46it/s][A
  1%|          | 6/985 [00:00<01:00, 16.06it/s][A
  1%|          | 8/985 [00:00<01:00, 16.11it/s][A
  1%|          | 10/985 [00:00<01:00, 16.22it/s][A
  1%|          | 12/985 [00:00<00:59, 16.27it/s][A
  1%|▏         | 14/985 [00:00<00:59, 16.27it/s][A
  2%|▏         | 16/985 [00:00<00:59, 16.32it/s][A
  2%|▏         | 18/985 [00:01<00:59, 16.34it/s][A
  2%|▏         | 20/985 [00:01<00:59, 16.33it/s][A
  2%|▏         | 22/985 [00:01<00:58, 16.37it/s][A
  2%|▏         | 24/985 [00:01<00:58, 16.39it/s][A
  3%|▎         | 26/985 [00:01<00:58, 16.37it/s][A
  3%|▎         | 28/985 [00:01<00:58, 16.41it/s][A
  3%|▎         | 30/985 [00:01<00:58, 16.40it/s][A
  3%|▎         | 32/985 [00:01<00:58, 16.26it/s][A
  3%|▎         | 34/985 [00:02<00:58, 16.26it/s][A
  4%|▎         | 36/985 [00:02<00:58, 16.27it/s][A
  4%|▍         | 38/985 [00:02<00:58, 16.29it/s][A
  4%|▍         | 40/985 [00:02<00:57, 16.30it/s][A
  4%|▍        

 63%|██████▎   | 622/985 [00:41<00:23, 15.14it/s][A
 63%|██████▎   | 624/985 [00:41<00:23, 15.13it/s][A
 64%|██████▎   | 626/985 [00:41<00:23, 15.13it/s][A
 64%|██████▍   | 628/985 [00:41<00:23, 15.12it/s][A
 64%|██████▍   | 630/985 [00:41<00:23, 15.12it/s][A
 64%|██████▍   | 632/985 [00:41<00:23, 15.12it/s][A
 64%|██████▍   | 634/985 [00:41<00:23, 15.12it/s][A
 65%|██████▍   | 636/985 [00:42<00:23, 15.11it/s][A
 65%|██████▍   | 638/985 [00:42<00:22, 15.11it/s][A
 65%|██████▍   | 640/985 [00:42<00:22, 15.11it/s][A
 65%|██████▌   | 642/985 [00:42<00:22, 15.11it/s][A
 65%|██████▌   | 644/985 [00:42<00:22, 15.11it/s][A
 66%|██████▌   | 646/985 [00:42<00:22, 15.11it/s][A
 66%|██████▌   | 648/985 [00:42<00:22, 15.11it/s][A
 66%|██████▌   | 650/985 [00:43<00:22, 15.11it/s][A
 66%|██████▌   | 652/985 [00:43<00:22, 15.11it/s][A
 66%|██████▋   | 654/985 [00:43<00:21, 15.11it/s][A
 67%|██████▋   | 656/985 [00:43<00:21, 15.12it/s][A
 67%|██████▋   | 658/985 [00:43<00:21, 15.11it

training accuracy: tensor(95.6335, device='cuda:0') train_loss tensor(2.3879, device='cuda:0', grad_fn=<NllLossBackward>) 18



  4%|▍         | 4/104 [00:00<00:06, 15.82it/s][A
  6%|▌         | 6/104 [00:00<00:06, 16.11it/s][A
  8%|▊         | 8/104 [00:00<00:06, 15.77it/s][A
 10%|▉         | 10/104 [00:00<00:05, 15.81it/s][A
 12%|█▏        | 12/104 [00:00<00:05, 15.82it/s][A
 13%|█▎        | 14/104 [00:00<00:05, 15.92it/s][A
 15%|█▌        | 16/104 [00:01<00:05, 15.71it/s][A
 17%|█▋        | 18/104 [00:01<00:05, 15.47it/s][A
 19%|█▉        | 20/104 [00:01<00:05, 15.00it/s][A
 21%|██        | 22/104 [00:01<00:05, 15.11it/s][A
 23%|██▎       | 24/104 [00:01<00:05, 15.35it/s][A
 25%|██▌       | 26/104 [00:01<00:05, 15.31it/s][A
 27%|██▋       | 28/104 [00:01<00:04, 15.35it/s][A
 29%|██▉       | 30/104 [00:01<00:04, 15.37it/s][A
 31%|███       | 32/104 [00:02<00:04, 15.31it/s][A
 33%|███▎      | 34/104 [00:02<00:04, 15.35it/s][A
 35%|███▍      | 36/104 [00:02<00:04, 15.38it/s][A
 37%|███▋      | 38/104 [00:02<00:04, 15.36it/s][A
 38%|███▊      | 40/104 [00:02<00:04, 15.30it/s][A
 40%|████     

cv accuracy: 70.9589867310012 18
training epoch  15



  0%|          | 4/985 [00:00<01:06, 14.74it/s][A
  1%|          | 6/985 [00:00<01:05, 14.90it/s][A
  1%|          | 8/985 [00:00<01:05, 14.93it/s][A
  1%|          | 10/985 [00:00<01:04, 15.04it/s][A
  1%|          | 12/985 [00:00<01:07, 14.38it/s][A
  1%|▏         | 14/985 [00:00<01:07, 14.48it/s][A
  2%|▏         | 16/985 [00:01<01:06, 14.53it/s][A
  2%|▏         | 18/985 [00:01<01:06, 14.52it/s][A
  2%|▏         | 20/985 [00:01<01:06, 14.59it/s][A
  2%|▏         | 22/985 [00:01<01:05, 14.62it/s][A
  2%|▏         | 24/985 [00:01<01:05, 14.69it/s][A
  3%|▎         | 26/985 [00:01<01:05, 14.61it/s][A
  3%|▎         | 28/985 [00:01<01:04, 14.73it/s][A
  3%|▎         | 30/985 [00:02<01:04, 14.75it/s][A
  3%|▎         | 32/985 [00:02<01:04, 14.84it/s][A
  3%|▎         | 34/985 [00:02<01:03, 14.92it/s][A
  4%|▎         | 36/985 [00:02<01:04, 14.76it/s][A
  4%|▍         | 38/985 [00:02<01:03, 14.81it/s][A
  4%|▍         | 40/985 [00:02<01:03, 14.84it/s][A
  4%|▍        

 63%|██████▎   | 622/985 [00:42<00:24, 14.55it/s][A
 63%|██████▎   | 624/985 [00:42<00:24, 14.54it/s][A
 64%|██████▎   | 626/985 [00:43<00:24, 14.55it/s][A
 64%|██████▍   | 628/985 [00:43<00:24, 14.54it/s][A
 64%|██████▍   | 630/985 [00:43<00:24, 14.54it/s][A
 64%|██████▍   | 632/985 [00:43<00:24, 14.54it/s][A
 64%|██████▍   | 634/985 [00:43<00:24, 14.54it/s][A
 65%|██████▍   | 636/985 [00:43<00:24, 14.54it/s][A
 65%|██████▍   | 638/985 [00:43<00:23, 14.54it/s][A
 65%|██████▍   | 640/985 [00:43<00:23, 14.55it/s][A
 65%|██████▌   | 642/985 [00:44<00:23, 14.55it/s][A
 65%|██████▌   | 644/985 [00:44<00:23, 14.54it/s][A
 66%|██████▌   | 646/985 [00:44<00:23, 14.53it/s][A
 66%|██████▌   | 648/985 [00:44<00:23, 14.54it/s][A
 66%|██████▌   | 650/985 [00:44<00:23, 14.54it/s][A
 66%|██████▌   | 652/985 [00:44<00:22, 14.54it/s][A
 66%|██████▋   | 654/985 [00:44<00:22, 14.54it/s][A
 67%|██████▋   | 656/985 [00:45<00:22, 14.53it/s][A
 67%|██████▋   | 658/985 [00:45<00:22, 14.53it

training accuracy: tensor(95.7955, device='cuda:0') train_loss tensor(5.0664, device='cuda:0', grad_fn=<NllLossBackward>) 19



  4%|▍         | 4/104 [00:00<00:05, 18.64it/s][A
  6%|▌         | 6/104 [00:00<00:05, 17.80it/s][A
  8%|▊         | 8/104 [00:00<00:05, 17.95it/s][A
 10%|▉         | 10/104 [00:00<00:05, 17.46it/s][A
 12%|█▏        | 12/104 [00:00<00:05, 17.64it/s][A
 13%|█▎        | 14/104 [00:00<00:05, 17.73it/s][A
 15%|█▌        | 16/104 [00:00<00:04, 17.62it/s][A
 17%|█▋        | 18/104 [00:01<00:04, 17.71it/s][A
 19%|█▉        | 20/104 [00:01<00:04, 17.58it/s][A
 21%|██        | 22/104 [00:01<00:04, 17.68it/s][A
 23%|██▎       | 24/104 [00:01<00:04, 17.74it/s][A
 25%|██▌       | 26/104 [00:01<00:04, 17.66it/s][A
 27%|██▋       | 28/104 [00:01<00:04, 17.72it/s][A
 29%|██▉       | 30/104 [00:01<00:04, 17.64it/s][A
 31%|███       | 32/104 [00:01<00:04, 17.52it/s][A
 33%|███▎      | 34/104 [00:01<00:03, 17.58it/s][A
 35%|███▍      | 36/104 [00:02<00:03, 17.47it/s][A
 37%|███▋      | 38/104 [00:02<00:03, 17.53it/s][A
 38%|███▊      | 40/104 [00:02<00:03, 17.59it/s][A
 40%|████     

cv accuracy: 70.35585042219542 19
training epoch  16



  0%|          | 3/985 [00:00<01:26, 11.38it/s][A
  1%|          | 5/985 [00:00<01:15, 13.03it/s][A
  1%|          | 7/985 [00:00<01:13, 13.27it/s][A
  1%|          | 9/985 [00:00<01:10, 13.85it/s][A
  1%|          | 11/985 [00:00<01:08, 14.21it/s][A
  1%|▏         | 13/985 [00:00<01:06, 14.53it/s][A
  2%|▏         | 15/985 [00:01<01:05, 14.75it/s][A
  2%|▏         | 17/985 [00:01<01:04, 14.96it/s][A
  2%|▏         | 19/985 [00:01<01:03, 15.10it/s][A
  2%|▏         | 21/985 [00:01<01:03, 15.11it/s][A
  2%|▏         | 23/985 [00:01<01:04, 15.02it/s][A
  3%|▎         | 25/985 [00:01<01:03, 15.14it/s][A
  3%|▎         | 27/985 [00:01<01:02, 15.23it/s][A
  3%|▎         | 29/985 [00:01<01:02, 15.30it/s][A
  3%|▎         | 31/985 [00:02<01:02, 15.36it/s][A
  3%|▎         | 33/985 [00:02<01:01, 15.44it/s][A
  4%|▎         | 35/985 [00:02<01:01, 15.50it/s][A
  4%|▍         | 37/985 [00:02<01:01, 15.54it/s][A
  4%|▍         | 39/985 [00:02<01:01, 15.44it/s][A
  4%|▍         

 63%|██████▎   | 621/985 [00:41<00:24, 15.07it/s][A
 63%|██████▎   | 623/985 [00:41<00:24, 15.06it/s][A
 63%|██████▎   | 625/985 [00:41<00:23, 15.05it/s][A
 64%|██████▎   | 627/985 [00:41<00:23, 15.05it/s][A
 64%|██████▍   | 629/985 [00:41<00:23, 15.04it/s][A
 64%|██████▍   | 631/985 [00:41<00:23, 15.04it/s][A
 64%|██████▍   | 633/985 [00:42<00:23, 15.05it/s][A
 64%|██████▍   | 635/985 [00:42<00:23, 15.05it/s][A
 65%|██████▍   | 637/985 [00:42<00:23, 15.05it/s][A
 65%|██████▍   | 639/985 [00:42<00:22, 15.05it/s][A
 65%|██████▌   | 641/985 [00:42<00:22, 15.05it/s][A
 65%|██████▌   | 643/985 [00:42<00:22, 15.04it/s][A
 65%|██████▌   | 645/985 [00:42<00:22, 15.04it/s][A
 66%|██████▌   | 647/985 [00:43<00:22, 15.04it/s][A
 66%|██████▌   | 649/985 [00:43<00:22, 15.05it/s][A
 66%|██████▌   | 651/985 [00:43<00:22, 15.05it/s][A
 66%|██████▋   | 653/985 [00:43<00:22, 15.04it/s][A
 66%|██████▋   | 655/985 [00:43<00:21, 15.04it/s][A
 67%|██████▋   | 657/985 [00:43<00:21, 15.05it

training accuracy: tensor(95.8114, device='cuda:0') train_loss tensor(4.5954, device='cuda:0', grad_fn=<NllLossBackward>) 20



  4%|▍         | 4/104 [00:00<00:05, 18.51it/s][A
  6%|▌         | 6/104 [00:00<00:05, 18.42it/s][A
  8%|▊         | 8/104 [00:00<00:05, 18.22it/s][A
 10%|▉         | 10/104 [00:00<00:05, 18.24it/s][A
 12%|█▏        | 12/104 [00:00<00:05, 17.86it/s][A
 13%|█▎        | 14/104 [00:00<00:05, 17.94it/s][A
 15%|█▌        | 16/104 [00:00<00:04, 17.97it/s][A
 17%|█▋        | 18/104 [00:01<00:04, 17.31it/s][A
 19%|█▉        | 20/104 [00:01<00:04, 17.44it/s][A
 21%|██        | 22/104 [00:01<00:04, 17.53it/s][A
 23%|██▎       | 24/104 [00:01<00:04, 17.59it/s][A
 25%|██▌       | 26/104 [00:01<00:04, 17.66it/s][A
 27%|██▋       | 28/104 [00:01<00:04, 17.73it/s][A
 29%|██▉       | 30/104 [00:01<00:04, 17.78it/s][A
 31%|███       | 32/104 [00:01<00:04, 17.83it/s][A
 33%|███▎      | 34/104 [00:01<00:03, 17.88it/s][A
 35%|███▍      | 36/104 [00:02<00:03, 17.73it/s][A
 37%|███▋      | 38/104 [00:02<00:03, 17.76it/s][A
 38%|███▊      | 40/104 [00:02<00:03, 17.81it/s][A
 40%|████     

cv accuracy: 70.9891435464415 20
training epoch  17



  0%|          | 4/985 [00:00<00:59, 16.62it/s][A
  1%|          | 6/985 [00:00<00:59, 16.46it/s][A
  1%|          | 8/985 [00:00<01:02, 15.65it/s][A
  1%|          | 10/985 [00:00<01:01, 15.76it/s][A
  1%|          | 12/985 [00:00<01:01, 15.86it/s][A
  1%|▏         | 14/985 [00:00<01:01, 15.92it/s][A
  2%|▏         | 16/985 [00:01<01:00, 15.98it/s][A
  2%|▏         | 18/985 [00:01<01:00, 16.04it/s][A
  2%|▏         | 20/985 [00:01<00:59, 16.09it/s][A
  2%|▏         | 22/985 [00:01<00:59, 16.10it/s][A
  2%|▏         | 24/985 [00:01<00:59, 16.14it/s][A
  3%|▎         | 26/985 [00:01<00:59, 16.10it/s][A
  3%|▎         | 28/985 [00:01<00:59, 16.11it/s][A
  3%|▎         | 30/985 [00:01<00:59, 16.02it/s][A
  3%|▎         | 32/985 [00:01<00:59, 16.05it/s][A
  3%|▎         | 34/985 [00:02<00:59, 15.94it/s][A
  4%|▎         | 36/985 [00:02<00:59, 15.95it/s][A
  4%|▍         | 38/985 [00:02<00:59, 15.94it/s][A
  4%|▍         | 40/985 [00:02<00:59, 15.96it/s][A
  4%|▍        

 63%|██████▎   | 622/985 [00:40<00:23, 15.33it/s][A
 63%|██████▎   | 624/985 [00:40<00:23, 15.33it/s][A
 64%|██████▎   | 626/985 [00:40<00:23, 15.33it/s][A
 64%|██████▍   | 628/985 [00:40<00:23, 15.34it/s][A
 64%|██████▍   | 630/985 [00:41<00:23, 15.33it/s][A
 64%|██████▍   | 632/985 [00:41<00:23, 15.34it/s][A
 64%|██████▍   | 634/985 [00:41<00:22, 15.34it/s][A
 65%|██████▍   | 636/985 [00:41<00:22, 15.33it/s][A
 65%|██████▍   | 638/985 [00:41<00:22, 15.33it/s][A
 65%|██████▍   | 640/985 [00:41<00:22, 15.34it/s][A
 65%|██████▌   | 642/985 [00:41<00:22, 15.34it/s][A
 65%|██████▌   | 644/985 [00:42<00:22, 15.33it/s][A
 66%|██████▌   | 646/985 [00:42<00:22, 15.33it/s][A
 66%|██████▌   | 648/985 [00:42<00:21, 15.33it/s][A
 66%|██████▌   | 650/985 [00:42<00:21, 15.33it/s][A
 66%|██████▌   | 652/985 [00:42<00:21, 15.32it/s][A
 66%|██████▋   | 654/985 [00:42<00:21, 15.31it/s][A
 67%|██████▋   | 656/985 [00:42<00:21, 15.31it/s][A
 67%|██████▋   | 658/985 [00:42<00:21, 15.31it

training accuracy: tensor(95.8590, device='cuda:0') train_loss tensor(2.4996, device='cuda:0', grad_fn=<NllLossBackward>) 21



  4%|▍         | 4/104 [00:00<00:06, 15.71it/s][A
  6%|▌         | 6/104 [00:00<00:06, 15.26it/s][A
  8%|▊         | 8/104 [00:00<00:06, 14.61it/s][A
 10%|▉         | 10/104 [00:00<00:06, 14.45it/s][A
 12%|█▏        | 12/104 [00:00<00:06, 14.77it/s][A
 13%|█▎        | 14/104 [00:00<00:06, 14.94it/s][A
 15%|█▌        | 16/104 [00:01<00:06, 14.59it/s][A
 17%|█▋        | 18/104 [00:01<00:05, 14.35it/s][A
 19%|█▉        | 20/104 [00:01<00:05, 14.37it/s][A
 21%|██        | 22/104 [00:01<00:05, 14.35it/s][A
 23%|██▎       | 24/104 [00:01<00:05, 14.33it/s][A
 25%|██▌       | 26/104 [00:01<00:05, 14.33it/s][A
 27%|██▋       | 28/104 [00:01<00:05, 14.11it/s][A
 29%|██▉       | 30/104 [00:02<00:05, 14.25it/s][A
 31%|███       | 32/104 [00:02<00:05, 14.39it/s][A
 33%|███▎      | 34/104 [00:02<00:04, 14.42it/s][A
 35%|███▍      | 36/104 [00:02<00:04, 14.46it/s][A
 37%|███▋      | 38/104 [00:02<00:04, 14.46it/s][A
 38%|███▊      | 40/104 [00:02<00:04, 14.47it/s][A
 40%|████     

cv accuracy: 71.10977080820265 21
training epoch  18



  0%|          | 4/985 [00:00<01:05, 14.89it/s][A
  1%|          | 6/985 [00:00<01:07, 14.57it/s][A
  1%|          | 8/985 [00:00<01:06, 14.77it/s][A
  1%|          | 10/985 [00:00<01:06, 14.64it/s][A
  1%|          | 12/985 [00:00<01:06, 14.57it/s][A
  1%|▏         | 14/985 [00:00<01:07, 14.32it/s][A
  2%|▏         | 16/985 [00:01<01:07, 14.38it/s][A
  2%|▏         | 18/985 [00:01<01:06, 14.44it/s][A
  2%|▏         | 20/985 [00:01<01:06, 14.59it/s][A
  2%|▏         | 22/985 [00:01<01:07, 14.28it/s][A
  2%|▏         | 24/985 [00:01<01:07, 14.25it/s][A
  3%|▎         | 26/985 [00:01<01:06, 14.31it/s][A
  3%|▎         | 28/985 [00:01<01:06, 14.40it/s][A
  3%|▎         | 30/985 [00:02<01:06, 14.45it/s][A
  3%|▎         | 32/985 [00:02<01:05, 14.56it/s][A
  3%|▎         | 34/985 [00:02<01:05, 14.60it/s][A
  4%|▎         | 36/985 [00:02<01:05, 14.40it/s][A
  4%|▍         | 38/985 [00:02<01:05, 14.44it/s][A
  4%|▍         | 40/985 [00:02<01:05, 14.49it/s][A
  4%|▍        

 63%|██████▎   | 622/985 [00:42<00:24, 14.63it/s][A
 63%|██████▎   | 624/985 [00:42<00:24, 14.63it/s][A
 64%|██████▎   | 626/985 [00:42<00:24, 14.62it/s][A
 64%|██████▍   | 628/985 [00:42<00:24, 14.63it/s][A
 64%|██████▍   | 630/985 [00:43<00:24, 14.63it/s][A
 64%|██████▍   | 632/985 [00:43<00:24, 14.63it/s][A
 64%|██████▍   | 634/985 [00:43<00:24, 14.62it/s][A
 65%|██████▍   | 636/985 [00:43<00:23, 14.62it/s][A
 65%|██████▍   | 638/985 [00:43<00:23, 14.62it/s][A
 65%|██████▍   | 640/985 [00:43<00:23, 14.62it/s][A
 65%|██████▌   | 642/985 [00:43<00:23, 14.62it/s][A
 65%|██████▌   | 644/985 [00:44<00:23, 14.62it/s][A
 66%|██████▌   | 646/985 [00:44<00:23, 14.62it/s][A
 66%|██████▌   | 648/985 [00:44<00:23, 14.62it/s][A
 66%|██████▌   | 650/985 [00:44<00:22, 14.62it/s][A
 66%|██████▌   | 652/985 [00:44<00:22, 14.62it/s][A
 66%|██████▋   | 654/985 [00:44<00:22, 14.62it/s][A
 67%|██████▋   | 656/985 [00:44<00:22, 14.62it/s][A
 67%|██████▋   | 658/985 [00:44<00:22, 14.63it

training accuracy: tensor(95.8336, device='cuda:0') train_loss tensor(6.1344, device='cuda:0', grad_fn=<NllLossBackward>) 23



  4%|▍         | 4/104 [00:00<00:06, 16.12it/s][A
  6%|▌         | 6/104 [00:00<00:05, 16.91it/s][A
  8%|▊         | 8/104 [00:00<00:05, 16.89it/s][A
 10%|▉         | 10/104 [00:00<00:05, 16.62it/s][A
 12%|█▏        | 12/104 [00:00<00:05, 16.88it/s][A
 13%|█▎        | 14/104 [00:00<00:05, 17.09it/s][A
 15%|█▌        | 16/104 [00:00<00:05, 16.93it/s][A
 17%|█▋        | 18/104 [00:01<00:05, 16.43it/s][A
 19%|█▉        | 20/104 [00:01<00:05, 16.17it/s][A
 21%|██        | 22/104 [00:01<00:05, 15.70it/s][A
 23%|██▎       | 24/104 [00:01<00:05, 15.58it/s][A
 25%|██▌       | 26/104 [00:01<00:05, 15.37it/s][A
 27%|██▋       | 28/104 [00:01<00:04, 15.24it/s][A
 29%|██▉       | 30/104 [00:01<00:04, 15.13it/s][A
 31%|███       | 32/104 [00:02<00:04, 14.94it/s][A
 33%|███▎      | 34/104 [00:02<00:04, 14.94it/s][A
 35%|███▍      | 36/104 [00:02<00:04, 14.82it/s][A
 37%|███▋      | 38/104 [00:02<00:04, 14.81it/s][A
 38%|███▊      | 40/104 [00:02<00:04, 14.90it/s][A
 40%|████     

cv accuracy: 71.01930036188179 23
training epoch  19



  0%|          | 4/985 [00:00<01:27, 11.18it/s][A
  1%|          | 6/985 [00:00<01:22, 11.89it/s][A
  1%|          | 8/985 [00:00<01:19, 12.34it/s][A
  1%|          | 10/985 [00:00<01:19, 12.26it/s][A
  1%|          | 12/985 [00:00<01:16, 12.65it/s][A
  1%|▏         | 14/985 [00:01<01:17, 12.57it/s][A
  2%|▏         | 16/985 [00:01<01:16, 12.74it/s][A
  2%|▏         | 18/985 [00:01<01:16, 12.67it/s][A
  2%|▏         | 20/985 [00:01<01:16, 12.64it/s][A
  2%|▏         | 22/985 [00:01<01:15, 12.78it/s][A
  2%|▏         | 24/985 [00:01<01:13, 13.02it/s][A
  3%|▎         | 26/985 [00:02<01:14, 12.89it/s][A
  3%|▎         | 28/985 [00:02<01:13, 12.98it/s][A
  3%|▎         | 30/985 [00:02<01:12, 13.09it/s][A
  3%|▎         | 32/985 [00:02<01:11, 13.24it/s][A
  3%|▎         | 34/985 [00:02<01:11, 13.39it/s][A
  4%|▎         | 36/985 [00:02<01:10, 13.42it/s][A
  4%|▍         | 38/985 [00:02<01:10, 13.51it/s][A
  4%|▍         | 40/985 [00:02<01:10, 13.49it/s][A
  4%|▍        

 63%|██████▎   | 622/985 [00:41<00:24, 14.93it/s][A
 63%|██████▎   | 624/985 [00:41<00:24, 14.93it/s][A
 64%|██████▎   | 626/985 [00:41<00:24, 14.93it/s][A
 64%|██████▍   | 628/985 [00:42<00:23, 14.93it/s][A
 64%|██████▍   | 630/985 [00:42<00:23, 14.93it/s][A
 64%|██████▍   | 632/985 [00:42<00:23, 14.94it/s][A
 64%|██████▍   | 634/985 [00:42<00:23, 14.94it/s][A
 65%|██████▍   | 636/985 [00:42<00:23, 14.94it/s][A
 65%|██████▍   | 638/985 [00:42<00:23, 14.95it/s][A
 65%|██████▍   | 640/985 [00:42<00:23, 14.95it/s][A
 65%|██████▌   | 642/985 [00:42<00:22, 14.95it/s][A
 65%|██████▌   | 644/985 [00:43<00:22, 14.95it/s][A
 66%|██████▌   | 646/985 [00:43<00:22, 14.95it/s][A
 66%|██████▌   | 648/985 [00:43<00:22, 14.95it/s][A
 66%|██████▌   | 650/985 [00:43<00:22, 14.95it/s][A
 66%|██████▌   | 652/985 [00:43<00:22, 14.95it/s][A
 66%|██████▋   | 654/985 [00:43<00:22, 14.94it/s][A
 67%|██████▋   | 656/985 [00:43<00:22, 14.94it/s][A
 67%|██████▋   | 658/985 [00:44<00:21, 14.95it

training accuracy: tensor(95.9892, device='cuda:0') train_loss tensor(4.6358, device='cuda:0', grad_fn=<NllLossBackward>) 24



  4%|▍         | 4/104 [00:00<00:05, 17.63it/s][A
  6%|▌         | 6/104 [00:00<00:05, 17.11it/s][A
  8%|▊         | 8/104 [00:00<00:05, 17.49it/s][A
 10%|▉         | 10/104 [00:00<00:05, 17.33it/s][A
 12%|█▏        | 12/104 [00:00<00:05, 17.52it/s][A
 13%|█▎        | 14/104 [00:00<00:05, 17.32it/s][A
 15%|█▌        | 16/104 [00:00<00:05, 17.47it/s][A
 17%|█▋        | 18/104 [00:01<00:04, 17.28it/s][A
 19%|█▉        | 20/104 [00:01<00:04, 17.08it/s][A
 21%|██        | 22/104 [00:01<00:04, 17.21it/s][A
 23%|██▎       | 24/104 [00:01<00:04, 17.31it/s][A
 25%|██▌       | 26/104 [00:01<00:04, 17.25it/s][A
 27%|██▋       | 28/104 [00:01<00:04, 17.19it/s][A
 29%|██▉       | 30/104 [00:01<00:04, 17.26it/s][A
 31%|███       | 32/104 [00:01<00:04, 17.23it/s][A
 33%|███▎      | 34/104 [00:01<00:04, 17.17it/s][A
 35%|███▍      | 36/104 [00:02<00:03, 17.23it/s][A
 37%|███▋      | 38/104 [00:02<00:03, 17.18it/s][A
 38%|███▊      | 40/104 [00:02<00:03, 17.03it/s][A
 40%|████     

cv accuracy: 70.47647768395657 24
doing prediction...



  4%|▍         | 4/104 [00:00<00:05, 17.02it/s][A
  6%|▌         | 6/104 [00:00<00:06, 16.08it/s][A
  8%|▊         | 8/104 [00:00<00:06, 15.77it/s][A
 10%|▉         | 10/104 [00:00<00:06, 14.49it/s][A
 12%|█▏        | 12/104 [00:00<00:06, 14.41it/s][A
 13%|█▎        | 14/104 [00:00<00:06, 14.89it/s][A
 15%|█▌        | 16/104 [00:01<00:05, 15.11it/s][A
 17%|█▋        | 18/104 [00:01<00:05, 15.43it/s][A
 19%|█▉        | 20/104 [00:01<00:05, 15.69it/s][A
 21%|██        | 22/104 [00:01<00:05, 15.55it/s][A
 23%|██▎       | 24/104 [00:01<00:05, 15.64it/s][A
 25%|██▌       | 26/104 [00:01<00:05, 15.36it/s][A
 27%|██▋       | 28/104 [00:01<00:04, 15.28it/s][A
 29%|██▉       | 30/104 [00:01<00:04, 15.36it/s][A
 31%|███       | 32/104 [00:02<00:04, 15.25it/s][A
 33%|███▎      | 34/104 [00:02<00:04, 15.20it/s][A
 35%|███▍      | 36/104 [00:02<00:04, 14.94it/s][A
 37%|███▋      | 38/104 [00:02<00:04, 14.90it/s][A
 38%|███▊      | 40/104 [00:02<00:04, 14.85it/s][A
 40%|████     

In [7]:

aug_val_acc_list

[16.797346200241254,
 34.89143546441496,
 46.17008443908323,
 41.767189384800965,
 47.61761158021713,
 61.82147165259349,
 68.75753920386008,
 60.765983112183356,
 68.99879372738239,
 69.45114595898673,
 71.10977080820265,
 70.71773220747889,
 70.59710494571773,
 70.32569360675512,
 70.9891435464415,
 71.04945717732207,
 70.50663449939687,
 70.9589867310012,
 71.41133896260556,
 71.47165259348613]