In [None]:
from __future__ import print_function
import os
import time
import logging
import argparse
import numpy as np
from visdom import Visdom
from PIL import Image
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from utils import *
from metric.loss import FitNet, AttentionTransfer, RKdAngle, RkdDistance

# Teacher models:
# VGG11/VGG13/VGG16/VGG19, GoogLeNet, AlxNet, ResNet18, ResNet34, 
# ResNet50, ResNet101, ResNet152, ResNeXt29_2x64d, ResNeXt29_4x64d, 
# ResNeXt29_8x64d, ResNeXt29_32x64d, PreActResNet18, PreActResNet34, 
# PreActResNet50, PreActResNet101, PreActResNet152, 
# DenseNet121, DenseNet161, DenseNet169, DenseNet201, 
import models

# Student models:
# myNet, LeNet, FitNet

start_time = time.time()
# os.makedirs('./checkpoint', exist_ok=True)

# Training settings
parser = argparse.ArgumentParser(description='LR_adaptive_learning')

parser.add_argument('--dataset',
                    choices=['CIFAR10',
                             'CIFAR100'
                            ],
                    default='CIFAR10')
parser.add_argument('--teachers',
                    choices=['ResNet32',
                             'ResNet50',
                             'ResNet56',
                             'ResNet110',
                             'DenseNet121'
                            ],
                    default=['ResNet32', 'ResNet56', 'ResNet110'],
                    nargs='+')
parser.add_argument('--student',
                    choices=['ResNet20',
                             'myNet'
                            ],
                    default='ResNet20')

parser.add_argument('--kd_ratio', default=0.7, type=float)
parser.add_argument('--n_class', type=int, default=10, metavar='N', help='num of classes')
parser.add_argument('--T', type=float, default=20.0, metavar='Temputure', help='Temputure for distillation')
parser.add_argument('--batch_size', type=int, default=128, metavar='N', help='input batch size for training')
parser.add_argument('--test_batch_size', type=int, default=128, metavar='N', help='input test batch size for training')
parser.add_argument('--epochs', type=int, default=20, metavar='N', help='number of epochs to train (default: 20)')
parser.add_argument('--lr', type=float, default=0.1, metavar='LR', help='learning rate (default: 0.01)')
parser.add_argument('--momentum', type=float, default=0.9, metavar='M', help='SGD momentum (default: 0.5)')
parser.add_argument('--device', default='cuda:0', type=str, help='device: cuda or cpu')
parser.add_argument('--print_freq', type=int, default=10, metavar='N', help='how many batches to wait before logging training status')

config = ['--epochs', '200', '--T', '5.0', '--device', 'cuda:0']
args = parser.parse_args(config)

device = args.device if torch.cuda.is_available() else 'cpu'
load_dir = './checkpoint/' + args.dataset + '/'

# teachers model
teacher_models = []
for te in args.teachers:
    te_model = getattr(models, te)(num_classes=10)
#     print(te_model)
    te_model.load_state_dict(torch.load(load_dir + te_model.model_name + '.pth'))
    te_model.to(device)
    teacher_models.append(te_model)

st_model = getattr(models, args.student)()  # args.student()
st_model.to(device)

# logging
logfile = load_dir + 'ada_learning_' + st_model.model_name + '.log'
if os.path.exists(logfile):
    os.remove(logfile)
def log_out(info):
    f = open(logfile, mode='a')
    f.write(info)
    f.write('\n')
    f.close()
    print(info)
    
# visualizer
vis = Visdom(env='distill')
loss_win = vis.line(
    X=np.array([0]),
    Y=np.array([0]),
    opts=dict(
            title='ada. loss',
        xtickmin=0,
#         xtickmax=1,
#         xtickstep=5,
        ytickmin=0,
#         ytickmax=1,
        ytickstep=0.5,
#         markers=True,
#         markersymbol='dot',
#         markersize=5,
    ),
    name="loss"
)

acc_win = vis.line(
    X=np.column_stack((0, 0)),
    Y=np.column_stack((0, 0)),
    opts=dict(
        title='ada. ACC',
        xtickmin=0,
#         xtickstep=5,
        ytickmin=0,
        ytickmax=100,
#         markers=True,
#         markersymbol='dot',
#         markersize=5,
        legend=['train_acc', 'test_acc']
    ),
    name="acc"
)


# adapter model
class Adapter():
    def __init__(self, in_models, pool_size):
        # representations of teachers
        pool_ch = pool_size[1]  # 64
        pool_w = pool_size[2]   # 8
        LR_list = []
        torch.manual_seed(1)
        self.theta = torch.randn(len(in_models), pool_ch).to(device)  # [3, 64]
        self.theta.requires_grad_(True)
   
        self.max_feat = nn.MaxPool2d(kernel_size=(pool_w, pool_w), stride=pool_w).to(device)
        self.W = torch.randn(pool_ch, 1).to(device)
        self.W.requires_grad_(True)
        self.val = False

    def loss(self, y, labels, weighted_logits, T=10.0, alpha=0.7):
        y = F.log_softmax(y/T)
        kd_loss = nn.KLDivLoss()(y, weighted_logits) * (T*T * 2.0 * alpha) + F.cross_entropy(y, labels) * (1. - alpha)
        angle_loss = angle_criterion(y, weighted_logits)
        dist_loss = dist_criterion(y, weighted_logits)
        ada_loss = kd_loss + angle_loss + dist_loss
        # Regularation
        if not self.val:
            ada_loss += 0.1 * (torch.sum(self.W * self.W) + torch.sum(torch.sum(self.theta * self.theta, dim=1), dim=0))
        return ada_loss
        
    def gradient(self, lr=0.01):
        self.W.data = self.W.data - lr * self.W.grad.data
        # Manually zero the gradients after updating weights
        self.W.grad.data.zero_()
        
    def eval(self):
        self.val = True
        self.theta.detach()
        self.W.detach()
    
    # input size: [64, 8, 8], [128, 3, 10]
    def forward(self, conv_map, te_logits_list):
        beta = self.max_feat(conv_map)
        beta = torch.squeeze(beta)  # [128, 64]
        
        latent_factor = []
        for t in self.theta:
            latent_factor.append(beta * t)
#         latent_factor = torch.stack(latent_factor, dim=0)  # [3, 128, 64]
        alpha = []
        for lf in latent_factor:  # lf.size:[128, 64]
            alpha.append(lf.mm(self.W))
        alpha = torch.stack(alpha, dim=0)  # [3, 128, 1]
        alpha = torch.squeeze(alpha).transpose(0, 1) # [128, 3]
        miu = F.softmax(alpha)  # [128, 3]
#         miu = torch.unsqueeze(miu, dim=2)
#         weighted_logits = miu * te_logits_list  # [128, 3, 10]
#         weighted_logits = torch.sum(weighted_logits, dim=1)
#         print(weighted_logits)
        
        return miu

# adapter instance
_,_,_,pool_m,_ = st_model(torch.randn(1, 3, 128, 128).to(device))  # get pool_size of student
# reate adapter instance
adapter = Adapter(teacher_models, pool_m.size())


# data
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
train_transform = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomCrop(32, 4),
    transforms.ToTensor(),
    normalize,
])
test_transform = transforms.Compose([transforms.ToTensor(), normalize])
train_set = getattr(datasets, args.dataset)(root='../data', train=True, download=True, transform=train_transform)
test_set = getattr(datasets, args.dataset)(root='../data', train=False, download=False, transform=test_transform)
train_loader = DataLoader(train_set, batch_size=args.batch_size, shuffle=True)
test_loader = DataLoader(test_set, batch_size=args.test_batch_size, shuffle=False)
# optim
optimizer_W = optim.SGD([adapter.W], lr=args.lr, momentum=0.9)
optimizer_theta = optim.SGD([adapter.theta], lr=args.lr, momentum=0.9)
optimizer_sgd = optim.SGD(st_model.parameters(), lr=args.lr, momentum=0.9, weight_decay=5e-4)
lr_scheduler = optim.lr_scheduler.MultiStepLR(optimizer_sgd, gamma=0.1, milestones=[100, 150])
lr_scheduler2 = optim.lr_scheduler.MultiStepLR(optimizer_W, milestones=[40, 70])
lr_scheduler3 = optim.lr_scheduler.MultiStepLR(optimizer_theta, milestones=[40, 70])

# attention transfer loss
dist_criterion = RkdDistance().to(device)
angle_criterion = RKdAngle().to(device)
fitnet_criterion = [FitNet(32, 32), FitNet(32, 32),FitNet(32, 32)]
[f.to(device) for f in fitnet_criterion]


def train_adapter(n_epochs=70, model=st_model):
    print('Training adapter:')
    start_time = time.time()
    model.train()
    for epoch in range(n_epochs):
#         lr_scheduler.step(epoch)
        lr_scheduler2.step()
        lr_scheduler3.step()
        for i, (input, target) in enumerate(train_loader):

            input, target = input.to(device), target.to(device)
            # compute outputs
            b1, b2, b3, pool, output = model(input) # out_feat: 16, 32, 64, 64, - 
            st_maps = [b1, b2, b3, pool]
#             print('b1:{}, b2:{}, b3{}, pool:{}'.format(b1.size(), b2.size(), b3.size(), pool.size()))
# b1:torch.Size([128, 16, 32, 32]), b2:torch.Size([128, 32, 16, 16]), b3torch.Size([128, 64, 8, 8]), pool:torch.Size([128, 64, 1, 1])

            te_scores_list = []
            hint_maps = []
            fit_loss = 0
            for j,te in enumerate(teacher_models):
                te.eval()
                with torch.no_grad():
                    t_b1, t_b2, t_b3, t_pool, t_output = te(input)
#                 print('t_b1:{}, t_b2:{}, t_b3:{}, t_pool:{}'.format(t_b1.size(), t_b2.size(), t_b3.size(), t_pool.size()))
# t_b1:torch.Size([128, 16, 32, 32]), t_b2:torch.Size([128, 32, 16, 16]), t_b3torch.Size([128, 64, 8, 8]), t_pool:torch.Size([128, 64, 1, 1])
                hint_maps.append(t_b2)
                t_output = F.softmax(t_output/args.T)
                te_scores_list.append(t_output)
            te_scores_Tensor = torch.stack(te_scores_list, dim=1)  # size: [128, 3, 10]
            
            optimizer_sgd.zero_grad()
            optimizer_W.zero_grad()
            optimizer_theta.zero_grad()
            
            weight = adapter.forward(pool, te_scores_Tensor)
            weight_t = torch.unsqueeze(weight, dim=2)
            weighted_logits = weight_t * te_scores_Tensor  # [128, 3, 10]
            weighted_logits = torch.sum(weighted_logits, dim=1)
            # compute gradient and do SGD step
            ada_loss = adapter.loss(output, target, weighted_logits, T=args.T, alpha=args.kd_ratio)
            
            weight_f = F.softmax(torch.mean(weight, dim=0))  # weight for fit_loss
            for j in range(len(teacher_models)):
                fit_loss += fitnet_criterion[j](st_maps[1], hint_maps[j]) #weight_f[j] * 
            
#             fit_loss = fitnet_criterion[0](b2, hint_maps[0][3]) + fitnet_criterion[1](b3, hint_maps[1][3]) + fitnet_criterion(pool, hint_maps[2][3])
            loss = ada_loss + fit_loss
            
            loss.backward(retain_graph=True)
            optimizer_sgd.step()
            optimizer_W.step()
            optimizer_theta.step()
            
#          vis.line(np.array([loss.item()]), np.array([ep]), loss_win, update="append")
        log_out('epoch[{}/{}]adapter Loss: {:.4f}'.format(epoch, n_epochs, loss.item()))
#         vis.line(np.array([loss.item()]), np.array([epoch]), loss_win, update="append")
    end_time = time.time()
    log_out("--- adapter training cost {:.3f} mins ---".format((end_time - start_time)/60))


# train with multi-teacher
def train(epoch, model):
    print('Training:')
    # switch to train mode
    model.train()
    adapter.eval()
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    
    end = time.time()
    for i, (input, target) in enumerate(train_loader):

        # measure data loading time
        data_time.update(time.time() - end)

        input, target = input.to(device), target.to(device)
        
        # compute outputs
        b1, b2, b3, pool, output = model(input)
        st_maps = [b1, b2, b3, pool]
        
        te_scores_list = []
        hint_maps = []
        fit_loss = 0
        for j,te in enumerate(teacher_models):
            te.eval()
            with torch.no_grad():
                t_b1, t_b2, t_b3, t_pool, t_output = te(input)
            hint_maps.append(t_b2)
            t_output = F.softmax(t_output/args.T)
            te_scores_list.append(t_output)
        te_scores_Tensor = torch.stack(te_scores_list, dim=1)  # size: [128, 3, 10]
        
        optimizer_sgd.zero_grad()
        
        weight = adapter.forward(pool, te_scores_Tensor)
        weight_t = torch.unsqueeze(weight, dim=2)
        weighted_logits = weight_t * te_scores_Tensor  # [128, 3, 10]
        weighted_logits = torch.sum(weighted_logits, dim=1)
        # compute gradient and do SGD step
        ada_loss = adapter.loss(output, target, weighted_logits, T=args.T, alpha=args.kd_ratio)
        
        weight_f = F.softmax(torch.mean(weight, dim=0))  # weight for fit_loss
#         for j in range(len(teacher_models)):
#             fit_loss += weight_f[j] * fitnet_criterion[j](st_maps[1], hint_maps[j])
        loss = ada_loss # + fit_loss

        loss.backward(retain_graph=True)
        optimizer_sgd.step()

        output = output.float()
        loss = loss.float()
        # measure accuracy and record loss
        train_acc = accuracy(output.data, target.data)[0]
        losses.update(loss.item(), input.size(0))
        top1.update(train_acc, input.size(0))

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if i % args.print_freq == 0:
            log_out('[{0}/{1}]\t'
                  'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                  'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                  'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                  'Prec@1 {top1.val:.3f} ({top1.avg:.3f})'.format(
                      i, len(train_loader), batch_time=batch_time,
                      data_time=data_time, loss=losses, top1=top1))
    return losses.avg, train_acc.cpu().numpy()


def test(model):
    print('Testing:')
    # switch to evaluate mode
    model.eval()
    batch_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()

    end = time.time()
    with torch.no_grad():
        for i, (input, target) in enumerate(test_loader):
            input, target = input.to(device), target.to(device)

            # compute output
            _,_,_,_,output = model(input)
            loss = F.cross_entropy(output, target)

            output = output.float()
            loss = loss.float()

            # measure accuracy and record loss
            test_acc = accuracy(output.data, target.data)[0]
            losses.update(loss.item(), input.size(0))
            top1.update(test_acc, input.size(0))

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

            if i % args.print_freq == 0:
                log_out('Test: [{0}/{1}]\t'
                      'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                      'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                      'Prec@1 {top1.val:.3f} ({top1.avg:.3f})'.format(
                          i, len(test_loader), batch_time=batch_time, loss=losses,
                          top1=top1))

    log_out(' * Prec@1 {top1.avg:.3f}'.format(top1=top1))

    return losses.avg, test_acc.cpu().numpy(), top1.avg.cpu().numpy()

# """
print('StudentNet:\n')
print(st_model)
st_model.apply(weights_init_normal)
train_adapter(n_epochs=80)
# st_model.apply(weights_init_normal)
best_acc = 0
for epoch in range(1, args.epochs + 1):
    log_out("\n===> epoch: {}/{}".format(epoch, args.epochs))
    log_out('current lr {:.5e}'.format(optimizer_sgd.param_groups[0]['lr']))
    lr_scheduler.step(epoch)
    train_loss, train_acc = train(epoch, st_model)
    # visaulize loss
    vis.line(np.array([train_loss]), np.array([epoch]), loss_win, update="append")
    _, test_acc, top1 = test(st_model)
    vis.line(np.column_stack((train_acc, top1)), np.column_stack((epoch, epoch)), acc_win, update="append")
    if top1 > best_acc:
        best_acc = top1
            
# release GPU memory
torch.cuda.empty_cache()
log_out("BEST ACC: {:.3f}".format(best_acc))
log_out("--- {:.3f} mins ---".format((time.time() - start_time)/60))
# """

  init.kaiming_normal(m.weight)


Files already downloaded and verified
StudentNet:

ResNet(
  (conv1): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (bn1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (shortcut): Sequential()
    )
    (1): BasicBlock(
      (conv1): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2



epoch[0/80]adapter Loss: 1.8923
epoch[1/80]adapter Loss: 1.2946
epoch[2/80]adapter Loss: 1.2766
epoch[3/80]adapter Loss: 0.9707
epoch[4/80]adapter Loss: 0.8982
epoch[5/80]adapter Loss: 1.0691
epoch[6/80]adapter Loss: 0.9799
epoch[7/80]adapter Loss: 0.8803
epoch[8/80]adapter Loss: 0.8061
epoch[9/80]adapter Loss: 0.9717
epoch[10/80]adapter Loss: 0.7828
epoch[11/80]adapter Loss: 0.9247
epoch[12/80]adapter Loss: 0.8041
epoch[13/80]adapter Loss: 0.7219
epoch[14/80]adapter Loss: 0.8872
epoch[15/80]adapter Loss: 0.7206
epoch[16/80]adapter Loss: 0.7422
epoch[17/80]adapter Loss: 0.7651
epoch[18/80]adapter Loss: 0.6501
epoch[19/80]adapter Loss: 0.6966
epoch[20/80]adapter Loss: 0.7639
epoch[21/80]adapter Loss: 0.7309
epoch[22/80]adapter Loss: 0.6546
epoch[23/80]adapter Loss: 0.8422
epoch[24/80]adapter Loss: 0.6552
epoch[25/80]adapter Loss: 0.8946
epoch[26/80]adapter Loss: 0.7836
epoch[27/80]adapter Loss: 0.7034
epoch[28/80]adapter Loss: 0.6236
epoch[29/80]adapter Loss: 0.7160
epoch[30/80]adapter 



[10/391]	Time 0.184 (0.186)	Data 0.020 (0.021)	Loss 0.5601 (0.6160)	Prec@1 89.062 (87.500)
[20/391]	Time 0.179 (0.184)	Data 0.020 (0.020)	Loss 0.6172 (0.6336)	Prec@1 85.156 (86.570)
[30/391]	Time 0.181 (0.183)	Data 0.020 (0.020)	Loss 0.6505 (0.6236)	Prec@1 84.375 (86.492)
[40/391]	Time 0.185 (0.182)	Data 0.020 (0.020)	Loss 0.5673 (0.6204)	Prec@1 90.625 (86.662)
[50/391]	Time 0.184 (0.182)	Data 0.020 (0.020)	Loss 0.6040 (0.6169)	Prec@1 84.375 (86.765)
[60/391]	Time 0.145 (0.181)	Data 0.020 (0.020)	Loss 0.6210 (0.6187)	Prec@1 85.156 (86.680)
[70/391]	Time 0.143 (0.176)	Data 0.020 (0.020)	Loss 0.5430 (0.6168)	Prec@1 89.062 (86.818)
[80/391]	Time 0.178 (0.176)	Data 0.019 (0.020)	Loss 0.5997 (0.6120)	Prec@1 85.938 (86.979)
[90/391]	Time 0.186 (0.177)	Data 0.019 (0.020)	Loss 0.7550 (0.6183)	Prec@1 82.031 (86.779)
[100/391]	Time 0.183 (0.177)	Data 0.022 (0.020)	Loss 0.4827 (0.6167)	Prec@1 92.969 (86.819)
[110/391]	Time 0.178 (0.178)	Data 0.019 (0.020)	Loss 0.5332 (0.6182)	Prec@1 90.625 (86.83

Test: [40/79]	Time 0.021 (0.023)	Loss 1.7443 (1.3009)	Prec@1 74.219 (77.496)
Test: [50/79]	Time 0.026 (0.023)	Loss 1.6344 (1.3084)	Prec@1 72.656 (77.313)
Test: [60/79]	Time 0.025 (0.023)	Loss 1.0746 (1.2921)	Prec@1 82.031 (77.267)
Test: [70/79]	Time 0.019 (0.023)	Loss 1.1829 (1.2967)	Prec@1 79.688 (77.256)
 * Prec@1 77.470

===> epoch: 3/200
current lr 1.00000e-01
Training:
[0/391]	Time 0.146 (0.146)	Data 0.021 (0.021)	Loss 0.6570 (0.6570)	Prec@1 84.375 (84.375)
[10/391]	Time 0.183 (0.178)	Data 0.019 (0.020)	Loss 0.5440 (0.6210)	Prec@1 89.844 (86.435)
[20/391]	Time 0.184 (0.180)	Data 0.020 (0.020)	Loss 0.5603 (0.5951)	Prec@1 93.750 (88.021)
[30/391]	Time 0.178 (0.180)	Data 0.019 (0.020)	Loss 0.6262 (0.6071)	Prec@1 87.500 (87.374)
[40/391]	Time 0.179 (0.181)	Data 0.019 (0.020)	Loss 0.4888 (0.6083)	Prec@1 94.531 (87.424)
[50/391]	Time 0.146 (0.174)	Data 0.019 (0.020)	Loss 0.6689 (0.6110)	Prec@1 87.500 (87.255)
[60/391]	Time 0.180 (0.174)	Data 0.019 (0.020)	Loss 0.6715 (0.6089)	Prec@1 85.

[380/391]	Time 0.182 (0.180)	Data 0.020 (0.020)	Loss 0.7517 (0.6238)	Prec@1 81.250 (86.485)
[390/391]	Time 0.127 (0.180)	Data 0.012 (0.020)	Loss 0.6471 (0.6240)	Prec@1 83.750 (86.484)
Testing:
Test: [0/79]	Time 0.019 (0.019)	Loss 0.9725 (0.9725)	Prec@1 80.469 (80.469)
Test: [10/79]	Time 0.020 (0.024)	Loss 1.0587 (0.9829)	Prec@1 81.250 (82.812)
Test: [20/79]	Time 0.026 (0.023)	Loss 0.8265 (0.9849)	Prec@1 82.812 (82.143)
Test: [30/79]	Time 0.025 (0.023)	Loss 1.0979 (0.9873)	Prec@1 82.812 (82.082)
Test: [40/79]	Time 0.019 (0.023)	Loss 1.2189 (0.9783)	Prec@1 77.344 (82.127)
Test: [50/79]	Time 0.020 (0.023)	Loss 1.0179 (0.9564)	Prec@1 80.469 (82.307)
Test: [60/79]	Time 0.023 (0.023)	Loss 1.0990 (0.9440)	Prec@1 80.469 (82.364)
Test: [70/79]	Time 0.026 (0.023)	Loss 0.7253 (0.9504)	Prec@1 86.719 (82.284)
 * Prec@1 82.410

===> epoch: 5/200
current lr 1.00000e-01
Training:
[0/391]	Time 0.182 (0.182)	Data 0.021 (0.021)	Loss 0.5692 (0.5692)	Prec@1 89.062 (89.062)
[10/391]	Time 0.180 (0.181)	Data 

[330/391]	Time 0.181 (0.180)	Data 0.020 (0.020)	Loss 0.6317 (0.6269)	Prec@1 86.719 (86.270)
[340/391]	Time 0.181 (0.180)	Data 0.021 (0.020)	Loss 0.4889 (0.6259)	Prec@1 92.188 (86.352)
[350/391]	Time 0.181 (0.180)	Data 0.021 (0.020)	Loss 0.6873 (0.6259)	Prec@1 83.594 (86.336)
[360/391]	Time 0.183 (0.180)	Data 0.020 (0.020)	Loss 0.7402 (0.6271)	Prec@1 80.469 (86.299)
[370/391]	Time 0.181 (0.180)	Data 0.020 (0.020)	Loss 0.5286 (0.6272)	Prec@1 89.062 (86.281)
[380/391]	Time 0.182 (0.180)	Data 0.020 (0.020)	Loss 0.6298 (0.6276)	Prec@1 85.938 (86.274)
[390/391]	Time 0.127 (0.180)	Data 0.012 (0.020)	Loss 0.6129 (0.6274)	Prec@1 86.250 (86.264)
Testing:
Test: [0/79]	Time 0.019 (0.019)	Loss 0.7162 (0.7162)	Prec@1 82.812 (82.812)
Test: [10/79]	Time 0.022 (0.023)	Loss 1.0453 (1.1123)	Prec@1 79.688 (78.977)
Test: [20/79]	Time 0.026 (0.023)	Loss 1.0251 (1.1824)	Prec@1 79.688 (78.274)
Test: [30/79]	Time 0.025 (0.023)	Loss 0.8229 (1.1969)	Prec@1 82.031 (78.150)
Test: [40/79]	Time 0.020 (0.023)	Loss 1.

[280/391]	Time 0.179 (0.180)	Data 0.019 (0.019)	Loss 0.6345 (0.6307)	Prec@1 82.031 (86.257)
[290/391]	Time 0.179 (0.180)	Data 0.019 (0.019)	Loss 0.5531 (0.6296)	Prec@1 90.625 (86.270)
[300/391]	Time 0.180 (0.180)	Data 0.019 (0.019)	Loss 0.7377 (0.6288)	Prec@1 80.469 (86.296)
[310/391]	Time 0.185 (0.180)	Data 0.020 (0.019)	Loss 0.5873 (0.6283)	Prec@1 88.281 (86.302)
[320/391]	Time 0.181 (0.180)	Data 0.019 (0.019)	Loss 0.6097 (0.6277)	Prec@1 88.281 (86.329)
[330/391]	Time 0.180 (0.180)	Data 0.019 (0.019)	Loss 0.6431 (0.6277)	Prec@1 84.375 (86.329)
[340/391]	Time 0.180 (0.180)	Data 0.019 (0.019)	Loss 0.6244 (0.6285)	Prec@1 86.719 (86.311)
[350/391]	Time 0.178 (0.180)	Data 0.019 (0.019)	Loss 0.6493 (0.6291)	Prec@1 86.719 (86.282)
[360/391]	Time 0.178 (0.180)	Data 0.019 (0.019)	Loss 0.7262 (0.6285)	Prec@1 81.250 (86.299)
[370/391]	Time 0.180 (0.180)	Data 0.019 (0.020)	Loss 0.5619 (0.6274)	Prec@1 89.844 (86.342)
[380/391]	Time 0.183 (0.180)	Data 0.019 (0.020)	Loss 0.5833 (0.6267)	Prec@1 86.7

[230/391]	Time 0.179 (0.180)	Data 0.019 (0.020)	Loss 0.6061 (0.6213)	Prec@1 89.062 (86.732)
[240/391]	Time 0.184 (0.180)	Data 0.019 (0.020)	Loss 0.5530 (0.6211)	Prec@1 89.062 (86.735)
[250/391]	Time 0.180 (0.180)	Data 0.019 (0.020)	Loss 0.5702 (0.6199)	Prec@1 89.844 (86.784)
[260/391]	Time 0.181 (0.181)	Data 0.020 (0.020)	Loss 0.5762 (0.6192)	Prec@1 89.062 (86.812)
[270/391]	Time 0.191 (0.181)	Data 0.029 (0.020)	Loss 0.6627 (0.6216)	Prec@1 85.938 (86.704)
[280/391]	Time 0.180 (0.181)	Data 0.020 (0.020)	Loss 0.6307 (0.6219)	Prec@1 85.938 (86.691)
[290/391]	Time 0.182 (0.181)	Data 0.020 (0.020)	Loss 0.5463 (0.6229)	Prec@1 88.281 (86.619)
[300/391]	Time 0.183 (0.181)	Data 0.019 (0.020)	Loss 0.7429 (0.6240)	Prec@1 82.812 (86.579)
[310/391]	Time 0.182 (0.181)	Data 0.019 (0.020)	Loss 0.5882 (0.6251)	Prec@1 89.844 (86.518)
[320/391]	Time 0.182 (0.181)	Data 0.020 (0.020)	Loss 0.6315 (0.6259)	Prec@1 86.719 (86.470)
[330/391]	Time 0.180 (0.181)	Data 0.020 (0.020)	Loss 0.7414 (0.6261)	Prec@1 82.0

[180/391]	Time 0.179 (0.181)	Data 0.019 (0.019)	Loss 0.5996 (0.6218)	Prec@1 86.719 (86.481)
[190/391]	Time 0.179 (0.181)	Data 0.019 (0.019)	Loss 0.6609 (0.6226)	Prec@1 82.031 (86.432)
[200/391]	Time 0.180 (0.181)	Data 0.020 (0.019)	Loss 0.5959 (0.6224)	Prec@1 88.281 (86.439)
[210/391]	Time 0.188 (0.182)	Data 0.020 (0.019)	Loss 0.5542 (0.6214)	Prec@1 89.844 (86.493)
[220/391]	Time 0.188 (0.182)	Data 0.019 (0.019)	Loss 0.6172 (0.6213)	Prec@1 87.500 (86.500)
[230/391]	Time 0.183 (0.182)	Data 0.019 (0.019)	Loss 0.7358 (0.6241)	Prec@1 85.156 (86.435)
[240/391]	Time 0.182 (0.182)	Data 0.019 (0.019)	Loss 0.6542 (0.6243)	Prec@1 78.906 (86.408)
[250/391]	Time 0.184 (0.182)	Data 0.019 (0.019)	Loss 0.6273 (0.6250)	Prec@1 88.281 (86.414)
[260/391]	Time 0.181 (0.182)	Data 0.020 (0.019)	Loss 0.6447 (0.6252)	Prec@1 86.719 (86.425)
[270/391]	Time 0.179 (0.182)	Data 0.019 (0.019)	Loss 0.6099 (0.6252)	Prec@1 87.500 (86.433)
[280/391]	Time 0.180 (0.182)	Data 0.019 (0.019)	Loss 0.6057 (0.6245)	Prec@1 88.2

[130/391]	Time 0.180 (0.181)	Data 0.019 (0.020)	Loss 0.5922 (0.6228)	Prec@1 85.938 (86.415)
[140/391]	Time 0.179 (0.181)	Data 0.020 (0.020)	Loss 0.6572 (0.6234)	Prec@1 86.719 (86.336)
[150/391]	Time 0.179 (0.181)	Data 0.019 (0.020)	Loss 0.6227 (0.6248)	Prec@1 85.938 (86.320)
[160/391]	Time 0.179 (0.181)	Data 0.020 (0.020)	Loss 0.5329 (0.6243)	Prec@1 88.281 (86.301)
[170/391]	Time 0.178 (0.181)	Data 0.019 (0.020)	Loss 0.7152 (0.6222)	Prec@1 81.250 (86.353)
[180/391]	Time 0.180 (0.181)	Data 0.019 (0.020)	Loss 0.6058 (0.6234)	Prec@1 85.156 (86.322)
[190/391]	Time 0.180 (0.181)	Data 0.020 (0.020)	Loss 0.5290 (0.6241)	Prec@1 91.406 (86.326)
[200/391]	Time 0.179 (0.181)	Data 0.019 (0.020)	Loss 0.6208 (0.6227)	Prec@1 85.156 (86.365)
[210/391]	Time 0.179 (0.181)	Data 0.019 (0.020)	Loss 0.6405 (0.6234)	Prec@1 85.938 (86.308)
[220/391]	Time 0.181 (0.181)	Data 0.019 (0.020)	Loss 0.7280 (0.6230)	Prec@1 82.031 (86.337)
[230/391]	Time 0.181 (0.181)	Data 0.020 (0.020)	Loss 0.6463 (0.6227)	Prec@1 85.9

[80/391]	Time 0.183 (0.180)	Data 0.019 (0.020)	Loss 0.6019 (0.6122)	Prec@1 87.500 (87.056)
[90/391]	Time 0.182 (0.180)	Data 0.019 (0.020)	Loss 0.4809 (0.6159)	Prec@1 94.531 (86.873)
[100/391]	Time 0.179 (0.180)	Data 0.019 (0.020)	Loss 0.6030 (0.6171)	Prec@1 89.062 (86.835)
[110/391]	Time 0.179 (0.180)	Data 0.019 (0.020)	Loss 0.6644 (0.6210)	Prec@1 86.719 (86.620)
[120/391]	Time 0.178 (0.181)	Data 0.019 (0.020)	Loss 0.6280 (0.6224)	Prec@1 83.594 (86.519)
[130/391]	Time 0.179 (0.181)	Data 0.019 (0.020)	Loss 0.6273 (0.6249)	Prec@1 85.156 (86.385)
[140/391]	Time 0.187 (0.181)	Data 0.019 (0.020)	Loss 0.5676 (0.6228)	Prec@1 87.500 (86.425)
[150/391]	Time 0.183 (0.181)	Data 0.019 (0.020)	Loss 0.8337 (0.6233)	Prec@1 76.562 (86.419)
[160/391]	Time 0.183 (0.181)	Data 0.020 (0.020)	Loss 0.6646 (0.6217)	Prec@1 88.281 (86.515)
[170/391]	Time 0.179 (0.181)	Data 0.020 (0.020)	Loss 0.5754 (0.6228)	Prec@1 91.406 (86.458)
[180/391]	Time 0.179 (0.181)	Data 0.019 (0.020)	Loss 0.5343 (0.6232)	Prec@1 92.188

[30/391]	Time 0.179 (0.182)	Data 0.020 (0.020)	Loss 0.4992 (0.5885)	Prec@1 91.406 (87.626)
[40/391]	Time 0.181 (0.182)	Data 0.019 (0.020)	Loss 0.5826 (0.5886)	Prec@1 90.625 (87.862)
[50/391]	Time 0.178 (0.182)	Data 0.019 (0.020)	Loss 0.5333 (0.5856)	Prec@1 89.062 (87.760)
[60/391]	Time 0.183 (0.181)	Data 0.019 (0.020)	Loss 0.6027 (0.5871)	Prec@1 85.156 (87.615)
[70/391]	Time 0.180 (0.184)	Data 0.019 (0.022)	Loss 0.5514 (0.5957)	Prec@1 89.062 (87.379)
[80/391]	Time 0.179 (0.183)	Data 0.019 (0.021)	Loss 0.7935 (0.6037)	Prec@1 80.469 (87.066)
[90/391]	Time 0.183 (0.183)	Data 0.020 (0.021)	Loss 0.6210 (0.6050)	Prec@1 86.719 (86.993)
[100/391]	Time 0.184 (0.183)	Data 0.020 (0.021)	Loss 0.6659 (0.6091)	Prec@1 85.156 (86.812)
[110/391]	Time 0.194 (0.183)	Data 0.023 (0.021)	Loss 0.6536 (0.6083)	Prec@1 85.938 (86.852)
[120/391]	Time 0.185 (0.183)	Data 0.020 (0.021)	Loss 0.7632 (0.6117)	Prec@1 84.375 (86.816)
[130/391]	Time 0.184 (0.183)	Data 0.019 (0.021)	Loss 0.5757 (0.6124)	Prec@1 86.719 (86.

Test: [60/79]	Time 0.021 (0.020)	Loss 1.3461 (1.2984)	Prec@1 78.125 (77.344)
Test: [70/79]	Time 0.022 (0.021)	Loss 1.2548 (1.3106)	Prec@1 78.125 (77.069)
 * Prec@1 77.220

===> epoch: 20/200
current lr 1.00000e-01
Training:
[0/391]	Time 0.183 (0.183)	Data 0.020 (0.020)	Loss 0.5649 (0.5649)	Prec@1 91.406 (91.406)
[10/391]	Time 0.180 (0.183)	Data 0.019 (0.020)	Loss 0.7672 (0.6420)	Prec@1 81.250 (86.932)
[20/391]	Time 0.186 (0.182)	Data 0.022 (0.020)	Loss 0.6595 (0.6242)	Prec@1 85.156 (87.091)
[30/391]	Time 0.187 (0.182)	Data 0.019 (0.020)	Loss 0.6680 (0.6144)	Prec@1 85.938 (87.374)
[40/391]	Time 0.185 (0.182)	Data 0.019 (0.020)	Loss 0.6558 (0.6087)	Prec@1 85.938 (87.329)
[50/391]	Time 0.181 (0.182)	Data 0.025 (0.020)	Loss 0.6033 (0.6057)	Prec@1 89.062 (87.316)
[60/391]	Time 0.179 (0.182)	Data 0.019 (0.020)	Loss 0.6481 (0.6096)	Prec@1 84.375 (87.052)
[70/391]	Time 0.181 (0.181)	Data 0.020 (0.020)	Loss 0.6190 (0.6113)	Prec@1 86.719 (86.961)
[80/391]	Time 0.180 (0.181)	Data 0.019 (0.020)	Lo

Test: [10/79]	Time 0.020 (0.020)	Loss 1.2231 (1.3793)	Prec@1 77.344 (75.781)
Test: [20/79]	Time 0.019 (0.020)	Loss 1.4507 (1.4289)	Prec@1 78.125 (75.409)
Test: [30/79]	Time 0.019 (0.020)	Loss 1.1658 (1.4256)	Prec@1 78.906 (75.252)
Test: [40/79]	Time 0.026 (0.020)	Loss 1.4978 (1.4085)	Prec@1 71.875 (75.457)
Test: [50/79]	Time 0.020 (0.021)	Loss 1.3958 (1.3985)	Prec@1 75.000 (75.322)
Test: [60/79]	Time 0.026 (0.021)	Loss 1.1696 (1.4148)	Prec@1 77.344 (75.282)
Test: [70/79]	Time 0.025 (0.022)	Loss 1.4539 (1.4033)	Prec@1 71.094 (75.473)
 * Prec@1 75.740

===> epoch: 22/200
current lr 1.00000e-01
Training:
[0/391]	Time 0.179 (0.179)	Data 0.020 (0.020)	Loss 0.6997 (0.6997)	Prec@1 83.594 (83.594)
[10/391]	Time 0.186 (0.183)	Data 0.019 (0.021)	Loss 0.6011 (0.6374)	Prec@1 90.625 (86.009)
[20/391]	Time 0.184 (0.183)	Data 0.019 (0.020)	Loss 0.5670 (0.6316)	Prec@1 89.062 (85.975)
[30/391]	Time 0.180 (0.182)	Data 0.020 (0.020)	Loss 0.6074 (0.6315)	Prec@1 85.938 (85.685)
[40/391]	Time 0.182 (0.182)	

[360/391]	Time 0.180 (0.182)	Data 0.019 (0.020)	Loss 0.5553 (0.6238)	Prec@1 89.844 (86.474)
[370/391]	Time 0.145 (0.181)	Data 0.019 (0.020)	Loss 0.5547 (0.6244)	Prec@1 89.062 (86.464)
[380/391]	Time 0.147 (0.180)	Data 0.020 (0.020)	Loss 0.6169 (0.6240)	Prec@1 86.719 (86.499)
[390/391]	Time 0.126 (0.180)	Data 0.012 (0.020)	Loss 0.5771 (0.6235)	Prec@1 90.000 (86.488)
Testing:
Test: [0/79]	Time 0.020 (0.020)	Loss 1.1104 (1.1104)	Prec@1 78.125 (78.125)
Test: [10/79]	Time 0.020 (0.024)	Loss 0.8857 (1.0198)	Prec@1 83.594 (80.469)
Test: [20/79]	Time 0.026 (0.023)	Loss 0.6380 (1.0606)	Prec@1 84.375 (79.390)
Test: [30/79]	Time 0.025 (0.023)	Loss 0.7913 (1.1024)	Prec@1 82.031 (79.234)
Test: [40/79]	Time 0.020 (0.023)	Loss 1.0134 (1.0815)	Prec@1 71.875 (79.211)
Test: [50/79]	Time 0.022 (0.023)	Loss 0.9360 (1.0690)	Prec@1 82.812 (79.442)
Test: [60/79]	Time 0.026 (0.023)	Loss 0.7126 (1.0763)	Prec@1 82.812 (79.380)
Test: [70/79]	Time 0.025 (0.023)	Loss 0.7335 (1.0693)	Prec@1 83.594 (79.434)
 * Prec@

[310/391]	Time 0.181 (0.181)	Data 0.020 (0.020)	Loss 0.6798 (0.6232)	Prec@1 81.250 (86.447)
[320/391]	Time 0.179 (0.181)	Data 0.019 (0.020)	Loss 0.6269 (0.6231)	Prec@1 85.156 (86.480)
[330/391]	Time 0.182 (0.181)	Data 0.020 (0.020)	Loss 0.5934 (0.6248)	Prec@1 85.938 (86.402)
[340/391]	Time 0.180 (0.181)	Data 0.019 (0.020)	Loss 0.7030 (0.6254)	Prec@1 81.250 (86.354)
[350/391]	Time 0.179 (0.181)	Data 0.020 (0.020)	Loss 0.5717 (0.6248)	Prec@1 87.500 (86.378)
[360/391]	Time 0.146 (0.180)	Data 0.019 (0.020)	Loss 0.5831 (0.6247)	Prec@1 89.062 (86.377)
[370/391]	Time 0.180 (0.180)	Data 0.020 (0.020)	Loss 0.6658 (0.6249)	Prec@1 85.156 (86.373)
[380/391]	Time 0.183 (0.180)	Data 0.020 (0.020)	Loss 0.5759 (0.6248)	Prec@1 87.500 (86.387)
[390/391]	Time 0.131 (0.180)	Data 0.012 (0.020)	Loss 0.6519 (0.6247)	Prec@1 86.250 (86.410)
Testing:
Test: [0/79]	Time 0.019 (0.019)	Loss 0.9651 (0.9651)	Prec@1 81.250 (81.250)
Test: [10/79]	Time 0.019 (0.023)	Loss 1.1574 (1.2243)	Prec@1 80.469 (79.048)
Test: [20/

[260/391]	Time 0.180 (0.182)	Data 0.020 (0.020)	Loss 0.7585 (0.6292)	Prec@1 82.812 (86.249)
[270/391]	Time 0.182 (0.182)	Data 0.021 (0.020)	Loss 0.7007 (0.6301)	Prec@1 81.250 (86.214)
[280/391]	Time 0.180 (0.182)	Data 0.020 (0.020)	Loss 0.6420 (0.6302)	Prec@1 86.719 (86.229)
[290/391]	Time 0.183 (0.182)	Data 0.020 (0.020)	Loss 0.6479 (0.6310)	Prec@1 85.938 (86.171)
[300/391]	Time 0.179 (0.182)	Data 0.019 (0.020)	Loss 0.6122 (0.6306)	Prec@1 90.625 (86.228)
[310/391]	Time 0.179 (0.182)	Data 0.020 (0.020)	Loss 0.6601 (0.6305)	Prec@1 85.156 (86.226)
[320/391]	Time 0.183 (0.182)	Data 0.020 (0.020)	Loss 0.5681 (0.6305)	Prec@1 90.625 (86.227)
[330/391]	Time 0.180 (0.182)	Data 0.020 (0.020)	Loss 0.7019 (0.6306)	Prec@1 81.250 (86.230)
[340/391]	Time 0.144 (0.181)	Data 0.019 (0.020)	Loss 0.6956 (0.6306)	Prec@1 83.594 (86.212)
[350/391]	Time 0.186 (0.181)	Data 0.020 (0.020)	Loss 0.5954 (0.6308)	Prec@1 88.281 (86.216)
[360/391]	Time 0.187 (0.181)	Data 0.019 (0.020)	Loss 0.6157 (0.6307)	Prec@1 87.5

[210/391]	Time 0.181 (0.182)	Data 0.019 (0.020)	Loss 0.6895 (0.6322)	Prec@1 82.031 (86.500)
[220/391]	Time 0.184 (0.182)	Data 0.020 (0.020)	Loss 0.5599 (0.6303)	Prec@1 87.500 (86.528)
[230/391]	Time 0.184 (0.182)	Data 0.020 (0.020)	Loss 0.6511 (0.6287)	Prec@1 85.938 (86.540)
[240/391]	Time 0.184 (0.182)	Data 0.020 (0.020)	Loss 0.6148 (0.6276)	Prec@1 86.719 (86.550)
[250/391]	Time 0.180 (0.182)	Data 0.019 (0.020)	Loss 0.6985 (0.6269)	Prec@1 84.375 (86.541)
[260/391]	Time 0.180 (0.182)	Data 0.019 (0.020)	Loss 0.6170 (0.6267)	Prec@1 86.719 (86.530)
[270/391]	Time 0.180 (0.182)	Data 0.019 (0.020)	Loss 0.7035 (0.6262)	Prec@1 82.812 (86.560)
[280/391]	Time 0.180 (0.182)	Data 0.019 (0.020)	Loss 0.7170 (0.6286)	Prec@1 84.375 (86.494)
[290/391]	Time 0.180 (0.182)	Data 0.019 (0.020)	Loss 0.6165 (0.6293)	Prec@1 85.938 (86.453)
[300/391]	Time 0.185 (0.182)	Data 0.020 (0.020)	Loss 0.5880 (0.6288)	Prec@1 90.625 (86.462)
[310/391]	Time 0.180 (0.182)	Data 0.019 (0.020)	Loss 0.6577 (0.6293)	Prec@1 89.0

[160/391]	Time 0.179 (0.182)	Data 0.019 (0.020)	Loss 0.6010 (0.6299)	Prec@1 84.375 (86.432)
[170/391]	Time 0.178 (0.182)	Data 0.019 (0.020)	Loss 0.5387 (0.6308)	Prec@1 89.062 (86.349)
[180/391]	Time 0.181 (0.182)	Data 0.020 (0.020)	Loss 0.6512 (0.6307)	Prec@1 83.594 (86.365)
[190/391]	Time 0.182 (0.182)	Data 0.019 (0.020)	Loss 0.6939 (0.6307)	Prec@1 79.688 (86.359)
[200/391]	Time 0.183 (0.182)	Data 0.019 (0.020)	Loss 0.7223 (0.6322)	Prec@1 77.344 (86.276)
[210/391]	Time 0.188 (0.182)	Data 0.020 (0.020)	Loss 0.6338 (0.6315)	Prec@1 88.281 (86.323)
[220/391]	Time 0.186 (0.182)	Data 0.020 (0.020)	Loss 0.7001 (0.6304)	Prec@1 79.688 (86.312)
[230/391]	Time 0.182 (0.182)	Data 0.020 (0.020)	Loss 0.7238 (0.6315)	Prec@1 82.812 (86.266)
[240/391]	Time 0.179 (0.182)	Data 0.019 (0.020)	Loss 0.5808 (0.6300)	Prec@1 89.062 (86.304)
[250/391]	Time 0.181 (0.182)	Data 0.019 (0.020)	Loss 0.6056 (0.6296)	Prec@1 87.500 (86.286)
[260/391]	Time 0.179 (0.182)	Data 0.020 (0.020)	Loss 0.7000 (0.6301)	Prec@1 86.7

[110/391]	Time 0.180 (0.181)	Data 0.020 (0.020)	Loss 0.6179 (0.6334)	Prec@1 89.062 (85.909)
[120/391]	Time 0.180 (0.182)	Data 0.019 (0.020)	Loss 0.5747 (0.6341)	Prec@1 86.719 (85.776)
[130/391]	Time 0.179 (0.182)	Data 0.019 (0.020)	Loss 0.6904 (0.6351)	Prec@1 81.250 (85.782)
[140/391]	Time 0.179 (0.182)	Data 0.019 (0.020)	Loss 0.6283 (0.6334)	Prec@1 85.156 (85.871)
[150/391]	Time 0.180 (0.182)	Data 0.020 (0.020)	Loss 0.6314 (0.6305)	Prec@1 87.500 (85.953)
[160/391]	Time 0.182 (0.182)	Data 0.019 (0.020)	Loss 0.5259 (0.6275)	Prec@1 88.281 (86.049)
[170/391]	Time 0.187 (0.182)	Data 0.020 (0.020)	Loss 0.5177 (0.6255)	Prec@1 92.969 (86.143)
[180/391]	Time 0.182 (0.182)	Data 0.019 (0.020)	Loss 0.5790 (0.6235)	Prec@1 87.500 (86.235)
[190/391]	Time 0.181 (0.182)	Data 0.019 (0.020)	Loss 0.5814 (0.6237)	Prec@1 89.844 (86.240)
[200/391]	Time 0.181 (0.182)	Data 0.019 (0.020)	Loss 0.5444 (0.6208)	Prec@1 90.625 (86.361)
[210/391]	Time 0.184 (0.182)	Data 0.019 (0.020)	Loss 0.6293 (0.6204)	Prec@1 86.7

[60/391]	Time 0.183 (0.182)	Data 0.019 (0.019)	Loss 0.5593 (0.6116)	Prec@1 88.281 (86.872)
[70/391]	Time 0.188 (0.182)	Data 0.019 (0.019)	Loss 0.6444 (0.6124)	Prec@1 85.938 (86.763)
[80/391]	Time 0.191 (0.182)	Data 0.020 (0.019)	Loss 0.5456 (0.6131)	Prec@1 89.062 (86.786)
[90/391]	Time 0.183 (0.183)	Data 0.020 (0.019)	Loss 0.5787 (0.6103)	Prec@1 89.062 (86.993)
[100/391]	Time 0.183 (0.183)	Data 0.019 (0.019)	Loss 0.5968 (0.6114)	Prec@1 87.500 (86.866)
[110/391]	Time 0.183 (0.183)	Data 0.019 (0.019)	Loss 0.5701 (0.6114)	Prec@1 89.062 (86.909)
[120/391]	Time 0.183 (0.182)	Data 0.019 (0.019)	Loss 0.6918 (0.6145)	Prec@1 82.031 (86.790)
[130/391]	Time 0.180 (0.182)	Data 0.019 (0.019)	Loss 0.6064 (0.6170)	Prec@1 85.156 (86.677)
[140/391]	Time 0.182 (0.182)	Data 0.020 (0.019)	Loss 0.6009 (0.6165)	Prec@1 86.719 (86.791)
[150/391]	Time 0.182 (0.182)	Data 0.022 (0.019)	Loss 0.4965 (0.6166)	Prec@1 91.406 (86.791)
[160/391]	Time 0.180 (0.182)	Data 0.019 (0.019)	Loss 0.6417 (0.6171)	Prec@1 82.812 (

[10/391]	Time 0.182 (0.179)	Data 0.027 (0.020)	Loss 0.6467 (0.6316)	Prec@1 86.719 (85.298)
[20/391]	Time 0.179 (0.181)	Data 0.019 (0.020)	Loss 0.6551 (0.6273)	Prec@1 86.719 (86.124)
[30/391]	Time 0.179 (0.181)	Data 0.019 (0.020)	Loss 0.5121 (0.6144)	Prec@1 90.625 (86.668)
[40/391]	Time 0.185 (0.181)	Data 0.020 (0.020)	Loss 0.6159 (0.6097)	Prec@1 87.500 (86.966)
[50/391]	Time 0.183 (0.181)	Data 0.019 (0.020)	Loss 0.7556 (0.6134)	Prec@1 82.812 (86.887)
[60/391]	Time 0.180 (0.181)	Data 0.019 (0.020)	Loss 0.6136 (0.6141)	Prec@1 85.156 (86.796)
[70/391]	Time 0.182 (0.181)	Data 0.019 (0.020)	Loss 0.6153 (0.6140)	Prec@1 85.938 (86.884)
[80/391]	Time 0.180 (0.181)	Data 0.020 (0.020)	Loss 0.6151 (0.6145)	Prec@1 85.156 (86.728)
[90/391]	Time 0.180 (0.181)	Data 0.019 (0.020)	Loss 0.7604 (0.6200)	Prec@1 80.469 (86.487)
[100/391]	Time 0.182 (0.181)	Data 0.021 (0.020)	Loss 0.6869 (0.6205)	Prec@1 82.812 (86.409)
[110/391]	Time 0.181 (0.181)	Data 0.019 (0.020)	Loss 0.6313 (0.6198)	Prec@1 85.938 (86.45

Test: [40/79]	Time 0.024 (0.023)	Loss 1.3067 (1.0956)	Prec@1 75.781 (80.716)
Test: [50/79]	Time 0.020 (0.023)	Loss 1.1852 (1.0818)	Prec@1 77.344 (80.484)
Test: [60/79]	Time 0.021 (0.023)	Loss 1.0155 (1.0690)	Prec@1 82.812 (80.558)
Test: [70/79]	Time 0.023 (0.023)	Loss 1.1891 (1.0661)	Prec@1 80.469 (80.546)
 * Prec@1 80.520

===> epoch: 39/200
current lr 1.00000e-01
Training:
[0/391]	Time 0.181 (0.181)	Data 0.020 (0.020)	Loss 0.6302 (0.6302)	Prec@1 87.500 (87.500)
[10/391]	Time 0.182 (0.181)	Data 0.020 (0.020)	Loss 0.5862 (0.6005)	Prec@1 89.844 (87.855)
[20/391]	Time 0.180 (0.181)	Data 0.023 (0.020)	Loss 0.6615 (0.6105)	Prec@1 85.156 (87.760)
[30/391]	Time 0.181 (0.180)	Data 0.020 (0.020)	Loss 0.5876 (0.6157)	Prec@1 89.844 (87.273)
[40/391]	Time 0.178 (0.180)	Data 0.021 (0.020)	Loss 0.6677 (0.6114)	Prec@1 82.031 (86.947)
[50/391]	Time 0.179 (0.180)	Data 0.019 (0.020)	Loss 0.5549 (0.6074)	Prec@1 90.625 (87.040)
[60/391]	Time 0.180 (0.180)	Data 0.020 (0.020)	Loss 0.5130 (0.6070)	Prec@1 93

[380/391]	Time 0.181 (0.180)	Data 0.020 (0.020)	Loss 0.5376 (0.6201)	Prec@1 89.844 (86.536)
[390/391]	Time 0.128 (0.180)	Data 0.013 (0.020)	Loss 0.7316 (0.6195)	Prec@1 82.500 (86.540)
Testing:
Test: [0/79]	Time 0.019 (0.019)	Loss 0.8887 (0.8887)	Prec@1 82.031 (82.031)
Test: [10/79]	Time 0.022 (0.023)	Loss 0.6495 (0.9870)	Prec@1 83.594 (82.102)
Test: [20/79]	Time 0.026 (0.023)	Loss 1.0718 (1.0287)	Prec@1 79.688 (81.064)
Test: [30/79]	Time 0.025 (0.023)	Loss 1.2923 (1.0130)	Prec@1 78.906 (81.250)
Test: [40/79]	Time 0.020 (0.023)	Loss 1.1567 (1.0178)	Prec@1 77.344 (81.155)
Test: [50/79]	Time 0.020 (0.023)	Loss 1.4187 (1.0139)	Prec@1 75.781 (81.158)
Test: [60/79]	Time 0.023 (0.023)	Loss 0.8984 (1.0094)	Prec@1 78.906 (81.314)
Test: [70/79]	Time 0.026 (0.023)	Loss 0.8940 (1.0107)	Prec@1 82.812 (81.305)
 * Prec@1 81.470

===> epoch: 41/200
current lr 1.00000e-01
Training:
[0/391]	Time 0.178 (0.178)	Data 0.021 (0.021)	Loss 0.6660 (0.6660)	Prec@1 85.938 (85.938)
[10/391]	Time 0.178 (0.181)	Data

[330/391]	Time 0.180 (0.180)	Data 0.019 (0.020)	Loss 0.6817 (0.6199)	Prec@1 83.594 (86.705)
[340/391]	Time 0.183 (0.180)	Data 0.020 (0.020)	Loss 0.6237 (0.6196)	Prec@1 85.938 (86.716)
[350/391]	Time 0.179 (0.180)	Data 0.019 (0.020)	Loss 0.6228 (0.6192)	Prec@1 84.375 (86.696)
[360/391]	Time 0.178 (0.180)	Data 0.019 (0.020)	Loss 0.7706 (0.6198)	Prec@1 82.031 (86.669)
[370/391]	Time 0.182 (0.180)	Data 0.021 (0.020)	Loss 0.6322 (0.6200)	Prec@1 89.062 (86.645)
[380/391]	Time 0.187 (0.180)	Data 0.019 (0.020)	Loss 0.5266 (0.6188)	Prec@1 91.406 (86.745)
[390/391]	Time 0.133 (0.180)	Data 0.012 (0.020)	Loss 0.7075 (0.6192)	Prec@1 83.750 (86.734)
Testing:
Test: [0/79]	Time 0.020 (0.020)	Loss 0.9013 (0.9013)	Prec@1 79.688 (79.688)
Test: [10/79]	Time 0.024 (0.023)	Loss 1.1591 (1.3565)	Prec@1 78.906 (75.568)
Test: [20/79]	Time 0.026 (0.023)	Loss 1.5978 (1.4688)	Prec@1 72.656 (74.963)
Test: [30/79]	Time 0.033 (0.023)	Loss 1.5632 (1.5173)	Prec@1 67.969 (74.395)
Test: [40/79]	Time 0.019 (0.023)	Loss 1.

[280/391]	Time 0.181 (0.179)	Data 0.020 (0.020)	Loss 0.5293 (0.6232)	Prec@1 90.625 (86.407)
[290/391]	Time 0.187 (0.179)	Data 0.019 (0.020)	Loss 0.5383 (0.6230)	Prec@1 92.188 (86.445)
[300/391]	Time 0.184 (0.179)	Data 0.019 (0.020)	Loss 0.5490 (0.6209)	Prec@1 89.844 (86.521)
[310/391]	Time 0.184 (0.179)	Data 0.020 (0.020)	Loss 0.6400 (0.6212)	Prec@1 87.500 (86.525)
[320/391]	Time 0.180 (0.180)	Data 0.020 (0.020)	Loss 0.5811 (0.6215)	Prec@1 87.500 (86.546)
[330/391]	Time 0.184 (0.180)	Data 0.020 (0.020)	Loss 0.7000 (0.6222)	Prec@1 83.594 (86.520)
[340/391]	Time 0.182 (0.180)	Data 0.019 (0.020)	Loss 0.8249 (0.6220)	Prec@1 77.344 (86.515)
[350/391]	Time 0.180 (0.180)	Data 0.019 (0.020)	Loss 0.7337 (0.6214)	Prec@1 81.250 (86.534)
[360/391]	Time 0.179 (0.180)	Data 0.020 (0.020)	Loss 0.7522 (0.6229)	Prec@1 80.469 (86.502)
[370/391]	Time 0.180 (0.180)	Data 0.020 (0.020)	Loss 0.6395 (0.6229)	Prec@1 85.938 (86.510)
[380/391]	Time 0.185 (0.180)	Data 0.021 (0.020)	Loss 0.6262 (0.6229)	Prec@1 86.7

[230/391]	Time 0.183 (0.180)	Data 0.020 (0.020)	Loss 0.6553 (0.6166)	Prec@1 85.156 (86.533)
[240/391]	Time 0.179 (0.180)	Data 0.019 (0.020)	Loss 0.5476 (0.6157)	Prec@1 89.062 (86.544)
[250/391]	Time 0.178 (0.180)	Data 0.019 (0.020)	Loss 0.5791 (0.6149)	Prec@1 89.844 (86.585)
[260/391]	Time 0.178 (0.180)	Data 0.019 (0.020)	Loss 0.5767 (0.6154)	Prec@1 89.062 (86.590)
[270/391]	Time 0.180 (0.180)	Data 0.019 (0.020)	Loss 0.6793 (0.6164)	Prec@1 85.156 (86.531)
[280/391]	Time 0.184 (0.180)	Data 0.020 (0.020)	Loss 0.6502 (0.6171)	Prec@1 85.938 (86.557)
[290/391]	Time 0.183 (0.180)	Data 0.019 (0.020)	Loss 0.6610 (0.6188)	Prec@1 85.938 (86.499)
[300/391]	Time 0.178 (0.180)	Data 0.019 (0.020)	Loss 0.7032 (0.6194)	Prec@1 82.031 (86.503)
[310/391]	Time 0.180 (0.180)	Data 0.019 (0.020)	Loss 0.5993 (0.6200)	Prec@1 85.156 (86.470)
[320/391]	Time 0.179 (0.180)	Data 0.019 (0.020)	Loss 0.7204 (0.6216)	Prec@1 82.031 (86.407)
[330/391]	Time 0.182 (0.180)	Data 0.020 (0.020)	Loss 0.6555 (0.6219)	Prec@1 83.5

[180/391]	Time 0.184 (0.178)	Data 0.021 (0.020)	Loss 0.6740 (0.6139)	Prec@1 85.938 (86.611)
[190/391]	Time 0.182 (0.178)	Data 0.020 (0.020)	Loss 0.5937 (0.6164)	Prec@1 87.500 (86.498)
[200/391]	Time 0.182 (0.179)	Data 0.021 (0.020)	Loss 0.6177 (0.6179)	Prec@1 90.625 (86.435)
[210/391]	Time 0.179 (0.179)	Data 0.020 (0.020)	Loss 0.6697 (0.6203)	Prec@1 84.375 (86.326)
[220/391]	Time 0.180 (0.179)	Data 0.019 (0.020)	Loss 0.6087 (0.6206)	Prec@1 88.281 (86.316)
[230/391]	Time 0.179 (0.179)	Data 0.020 (0.020)	Loss 0.7512 (0.6212)	Prec@1 81.250 (86.353)
[240/391]	Time 0.185 (0.179)	Data 0.022 (0.020)	Loss 0.6182 (0.6209)	Prec@1 85.938 (86.352)
[250/391]	Time 0.180 (0.179)	Data 0.020 (0.020)	Loss 0.4629 (0.6196)	Prec@1 90.625 (86.414)
[260/391]	Time 0.180 (0.179)	Data 0.019 (0.020)	Loss 0.5844 (0.6195)	Prec@1 88.281 (86.449)
[270/391]	Time 0.180 (0.179)	Data 0.019 (0.020)	Loss 0.5802 (0.6182)	Prec@1 85.938 (86.531)
[280/391]	Time 0.179 (0.179)	Data 0.020 (0.020)	Loss 0.6938 (0.6175)	Prec@1 82.0

[130/391]	Time 0.182 (0.180)	Data 0.019 (0.020)	Loss 0.7997 (0.5994)	Prec@1 81.250 (87.261)
[140/391]	Time 0.144 (0.179)	Data 0.021 (0.020)	Loss 0.6036 (0.6018)	Prec@1 86.719 (87.168)
[150/391]	Time 0.176 (0.177)	Data 0.020 (0.020)	Loss 0.6073 (0.6034)	Prec@1 86.719 (87.065)
[160/391]	Time 0.187 (0.177)	Data 0.020 (0.020)	Loss 0.7306 (0.6012)	Prec@1 83.594 (87.189)
[170/391]	Time 0.181 (0.177)	Data 0.020 (0.020)	Loss 0.5362 (0.6022)	Prec@1 91.406 (87.144)
[180/391]	Time 0.181 (0.178)	Data 0.019 (0.020)	Loss 0.7513 (0.6039)	Prec@1 83.594 (87.073)
[190/391]	Time 0.183 (0.178)	Data 0.019 (0.020)	Loss 0.6583 (0.6040)	Prec@1 89.844 (87.087)
[200/391]	Time 0.180 (0.178)	Data 0.020 (0.020)	Loss 0.6386 (0.6052)	Prec@1 83.594 (87.018)
[210/391]	Time 0.183 (0.178)	Data 0.019 (0.020)	Loss 0.6227 (0.6059)	Prec@1 88.281 (87.011)
[220/391]	Time 0.184 (0.178)	Data 0.020 (0.020)	Loss 0.5653 (0.6081)	Prec@1 89.062 (86.959)
[230/391]	Time 0.183 (0.178)	Data 0.020 (0.020)	Loss 0.6665 (0.6106)	Prec@1 82.0

[80/391]	Time 0.184 (0.180)	Data 0.021 (0.020)	Loss 0.6768 (0.6009)	Prec@1 83.594 (87.162)
[90/391]	Time 0.181 (0.180)	Data 0.020 (0.020)	Loss 0.6213 (0.6030)	Prec@1 86.719 (87.079)
[100/391]	Time 0.185 (0.180)	Data 0.021 (0.020)	Loss 0.6293 (0.6057)	Prec@1 85.156 (86.943)
[110/391]	Time 0.181 (0.181)	Data 0.020 (0.020)	Loss 0.5269 (0.6076)	Prec@1 89.062 (86.860)
[120/391]	Time 0.145 (0.180)	Data 0.020 (0.020)	Loss 0.6829 (0.6076)	Prec@1 83.594 (86.822)
[130/391]	Time 0.146 (0.177)	Data 0.022 (0.020)	Loss 0.5357 (0.6053)	Prec@1 93.750 (86.945)
[140/391]	Time 0.181 (0.177)	Data 0.021 (0.020)	Loss 0.7281 (0.6094)	Prec@1 85.156 (86.841)
[150/391]	Time 0.184 (0.178)	Data 0.020 (0.020)	Loss 0.5895 (0.6107)	Prec@1 87.500 (86.812)
[160/391]	Time 0.179 (0.178)	Data 0.019 (0.020)	Loss 0.7145 (0.6138)	Prec@1 82.812 (86.690)
[170/391]	Time 0.178 (0.178)	Data 0.019 (0.020)	Loss 0.6254 (0.6154)	Prec@1 85.938 (86.641)
[180/391]	Time 0.180 (0.178)	Data 0.020 (0.020)	Loss 0.7107 (0.6141)	Prec@1 83.594

[30/391]	Time 0.179 (0.181)	Data 0.019 (0.020)	Loss 0.6018 (0.6503)	Prec@1 86.719 (85.282)
[40/391]	Time 0.179 (0.181)	Data 0.019 (0.020)	Loss 0.5193 (0.6331)	Prec@1 87.500 (85.938)
[50/391]	Time 0.178 (0.181)	Data 0.020 (0.020)	Loss 0.5690 (0.6281)	Prec@1 85.156 (85.953)
[60/391]	Time 0.183 (0.181)	Data 0.019 (0.020)	Loss 0.6313 (0.6247)	Prec@1 85.156 (85.989)
[70/391]	Time 0.178 (0.181)	Data 0.019 (0.020)	Loss 0.5645 (0.6255)	Prec@1 88.281 (86.147)
[80/391]	Time 0.178 (0.180)	Data 0.020 (0.020)	Loss 0.5346 (0.6232)	Prec@1 90.625 (86.265)
[90/391]	Time 0.181 (0.181)	Data 0.019 (0.020)	Loss 0.5513 (0.6200)	Prec@1 92.188 (86.427)
[100/391]	Time 0.143 (0.180)	Data 0.019 (0.020)	Loss 0.7177 (0.6172)	Prec@1 80.469 (86.494)
[110/391]	Time 0.143 (0.176)	Data 0.020 (0.020)	Loss 0.6466 (0.6156)	Prec@1 85.156 (86.599)
[120/391]	Time 0.181 (0.177)	Data 0.019 (0.020)	Loss 0.6107 (0.6152)	Prec@1 85.938 (86.667)
[130/391]	Time 0.180 (0.177)	Data 0.019 (0.020)	Loss 0.6418 (0.6187)	Prec@1 85.938 (86.

Test: [60/79]	Time 0.019 (0.023)	Loss 1.2438 (1.1420)	Prec@1 81.250 (80.097)
Test: [70/79]	Time 0.021 (0.023)	Loss 0.8732 (1.1569)	Prec@1 82.031 (79.754)
 * Prec@1 79.700

===> epoch: 56/200
current lr 1.00000e-01
Training:
[0/391]	Time 0.183 (0.183)	Data 0.020 (0.020)	Loss 0.6426 (0.6426)	Prec@1 84.375 (84.375)
[10/391]	Time 0.181 (0.183)	Data 0.020 (0.020)	Loss 0.5477 (0.6287)	Prec@1 90.625 (85.582)
[20/391]	Time 0.182 (0.182)	Data 0.019 (0.019)	Loss 0.6086 (0.6277)	Prec@1 88.281 (86.124)
[30/391]	Time 0.186 (0.182)	Data 0.019 (0.019)	Loss 0.7020 (0.6184)	Prec@1 82.031 (86.694)
[40/391]	Time 0.184 (0.182)	Data 0.019 (0.019)	Loss 0.5178 (0.6080)	Prec@1 89.062 (86.986)
[50/391]	Time 0.180 (0.182)	Data 0.019 (0.019)	Loss 0.6272 (0.6056)	Prec@1 82.812 (87.025)
[60/391]	Time 0.181 (0.182)	Data 0.019 (0.019)	Loss 0.5654 (0.6040)	Prec@1 86.719 (87.039)
[70/391]	Time 0.189 (0.183)	Data 0.020 (0.019)	Loss 0.6221 (0.6017)	Prec@1 85.156 (87.137)
[80/391]	Time 0.180 (0.182)	Data 0.019 (0.019)	Lo

Test: [10/79]	Time 0.026 (0.022)	Loss 1.2807 (0.9803)	Prec@1 81.250 (82.102)
Test: [20/79]	Time 0.026 (0.023)	Loss 0.7561 (1.0395)	Prec@1 84.375 (81.101)
Test: [30/79]	Time 0.020 (0.023)	Loss 0.8802 (1.0176)	Prec@1 81.250 (81.578)
Test: [40/79]	Time 0.021 (0.023)	Loss 1.0337 (1.0344)	Prec@1 77.344 (81.345)
Test: [50/79]	Time 0.026 (0.023)	Loss 1.1352 (1.0273)	Prec@1 82.031 (81.373)
Test: [60/79]	Time 0.025 (0.023)	Loss 1.1182 (1.0269)	Prec@1 80.469 (81.519)
Test: [70/79]	Time 0.019 (0.023)	Loss 0.7764 (1.0226)	Prec@1 83.594 (81.448)
 * Prec@1 81.550

===> epoch: 58/200
current lr 1.00000e-01
Training:
[0/391]	Time 0.177 (0.177)	Data 0.020 (0.020)	Loss 0.6628 (0.6628)	Prec@1 85.156 (85.156)
[10/391]	Time 0.180 (0.180)	Data 0.020 (0.020)	Loss 0.6026 (0.6569)	Prec@1 88.281 (86.435)
[20/391]	Time 0.180 (0.181)	Data 0.020 (0.020)	Loss 0.6792 (0.6318)	Prec@1 84.375 (86.979)
[30/391]	Time 0.180 (0.181)	Data 0.019 (0.020)	Loss 0.5794 (0.6166)	Prec@1 88.281 (87.172)
[40/391]	Time 0.181 (0.181)	

[360/391]	Time 0.180 (0.180)	Data 0.019 (0.020)	Loss 0.5903 (0.6161)	Prec@1 90.625 (86.649)
[370/391]	Time 0.182 (0.180)	Data 0.020 (0.020)	Loss 0.6166 (0.6163)	Prec@1 88.281 (86.643)
[380/391]	Time 0.181 (0.180)	Data 0.019 (0.020)	Loss 0.5742 (0.6170)	Prec@1 86.719 (86.622)
[390/391]	Time 0.126 (0.180)	Data 0.012 (0.020)	Loss 0.6345 (0.6178)	Prec@1 85.000 (86.578)
Testing:
Test: [0/79]	Time 0.020 (0.020)	Loss 1.2384 (1.2384)	Prec@1 78.125 (78.125)
Test: [10/79]	Time 0.022 (0.023)	Loss 1.1822 (1.3319)	Prec@1 75.781 (77.415)
Test: [20/79]	Time 0.026 (0.023)	Loss 1.0413 (1.3441)	Prec@1 83.594 (77.121)
Test: [30/79]	Time 0.027 (0.023)	Loss 0.9198 (1.3441)	Prec@1 78.125 (77.193)
Test: [40/79]	Time 0.019 (0.023)	Loss 1.6891 (1.3049)	Prec@1 75.000 (77.572)
Test: [50/79]	Time 0.023 (0.023)	Loss 1.7103 (1.3071)	Prec@1 72.656 (77.512)
Test: [60/79]	Time 0.026 (0.023)	Loss 1.3271 (1.2954)	Prec@1 84.375 (77.907)
Test: [70/79]	Time 0.026 (0.023)	Loss 1.2681 (1.2961)	Prec@1 80.469 (78.037)
 * Prec@

[310/391]	Time 0.184 (0.181)	Data 0.019 (0.020)	Loss 0.7080 (0.6204)	Prec@1 83.594 (86.653)
[320/391]	Time 0.180 (0.181)	Data 0.019 (0.020)	Loss 0.5348 (0.6195)	Prec@1 91.406 (86.677)
[330/391]	Time 0.179 (0.181)	Data 0.019 (0.020)	Loss 0.6399 (0.6197)	Prec@1 85.156 (86.655)
[340/391]	Time 0.180 (0.181)	Data 0.019 (0.020)	Loss 0.5487 (0.6205)	Prec@1 86.719 (86.620)
[350/391]	Time 0.180 (0.181)	Data 0.020 (0.020)	Loss 0.5454 (0.6201)	Prec@1 87.500 (86.636)
[360/391]	Time 0.186 (0.181)	Data 0.022 (0.020)	Loss 0.6310 (0.6198)	Prec@1 84.375 (86.639)
[370/391]	Time 0.180 (0.181)	Data 0.020 (0.020)	Loss 0.6363 (0.6208)	Prec@1 85.156 (86.592)
[380/391]	Time 0.185 (0.181)	Data 0.019 (0.020)	Loss 0.6364 (0.6206)	Prec@1 86.719 (86.606)
[390/391]	Time 0.131 (0.181)	Data 0.012 (0.020)	Loss 0.6942 (0.6195)	Prec@1 82.500 (86.656)
Testing:
Test: [0/79]	Time 0.019 (0.019)	Loss 1.3315 (1.3315)	Prec@1 76.562 (76.562)
Test: [10/79]	Time 0.020 (0.024)	Loss 0.9251 (1.1969)	Prec@1 84.375 (79.190)
Test: [20/

[260/391]	Time 0.179 (0.180)	Data 0.019 (0.020)	Loss 0.7316 (0.6165)	Prec@1 80.469 (86.686)
[270/391]	Time 0.185 (0.180)	Data 0.019 (0.020)	Loss 0.5571 (0.6168)	Prec@1 89.062 (86.667)
[280/391]	Time 0.183 (0.180)	Data 0.019 (0.020)	Loss 0.5808 (0.6176)	Prec@1 87.500 (86.638)
[290/391]	Time 0.185 (0.180)	Data 0.023 (0.020)	Loss 0.6227 (0.6179)	Prec@1 86.719 (86.649)
[300/391]	Time 0.180 (0.180)	Data 0.019 (0.020)	Loss 0.6873 (0.6182)	Prec@1 81.250 (86.643)
[310/391]	Time 0.179 (0.180)	Data 0.020 (0.020)	Loss 0.5718 (0.6166)	Prec@1 87.500 (86.689)
[320/391]	Time 0.180 (0.180)	Data 0.020 (0.020)	Loss 0.6580 (0.6178)	Prec@1 85.938 (86.621)
[330/391]	Time 0.181 (0.180)	Data 0.019 (0.020)	Loss 0.6556 (0.6188)	Prec@1 86.719 (86.584)
[340/391]	Time 0.181 (0.180)	Data 0.020 (0.020)	Loss 0.5621 (0.6193)	Prec@1 89.062 (86.590)
[350/391]	Time 0.185 (0.180)	Data 0.019 (0.020)	Loss 0.6771 (0.6201)	Prec@1 83.594 (86.543)
[360/391]	Time 0.183 (0.180)	Data 0.020 (0.020)	Loss 0.6014 (0.6203)	Prec@1 89.0

[210/391]	Time 0.178 (0.179)	Data 0.019 (0.020)	Loss 0.5677 (0.6193)	Prec@1 91.406 (86.552)
[220/391]	Time 0.179 (0.179)	Data 0.019 (0.020)	Loss 0.7664 (0.6220)	Prec@1 79.688 (86.468)
[230/391]	Time 0.182 (0.179)	Data 0.019 (0.020)	Loss 0.5941 (0.6232)	Prec@1 86.719 (86.418)
[240/391]	Time 0.180 (0.179)	Data 0.020 (0.020)	Loss 0.5885 (0.6226)	Prec@1 89.062 (86.443)
[250/391]	Time 0.181 (0.179)	Data 0.022 (0.020)	Loss 0.5846 (0.6224)	Prec@1 88.281 (86.495)
[260/391]	Time 0.183 (0.179)	Data 0.020 (0.020)	Loss 0.5471 (0.6226)	Prec@1 89.844 (86.488)
[270/391]	Time 0.179 (0.179)	Data 0.020 (0.020)	Loss 0.6012 (0.6224)	Prec@1 88.281 (86.497)
[280/391]	Time 0.180 (0.179)	Data 0.020 (0.020)	Loss 0.7536 (0.6224)	Prec@1 83.594 (86.516)
[290/391]	Time 0.180 (0.179)	Data 0.021 (0.020)	Loss 0.7381 (0.6233)	Prec@1 80.469 (86.450)
[300/391]	Time 0.186 (0.180)	Data 0.019 (0.020)	Loss 0.6826 (0.6240)	Prec@1 82.812 (86.415)
[310/391]	Time 0.181 (0.180)	Data 0.020 (0.020)	Loss 0.6192 (0.6240)	Prec@1 86.7

[160/391]	Time 0.182 (0.180)	Data 0.019 (0.020)	Loss 0.6346 (0.6029)	Prec@1 85.156 (87.112)
[170/391]	Time 0.179 (0.180)	Data 0.019 (0.020)	Loss 0.5673 (0.6036)	Prec@1 88.281 (87.107)
[180/391]	Time 0.180 (0.180)	Data 0.019 (0.020)	Loss 0.5296 (0.6042)	Prec@1 89.062 (87.077)
[190/391]	Time 0.180 (0.180)	Data 0.020 (0.020)	Loss 0.6139 (0.6045)	Prec@1 85.938 (87.046)
[200/391]	Time 0.182 (0.180)	Data 0.019 (0.020)	Loss 0.6005 (0.6050)	Prec@1 87.500 (87.053)
[210/391]	Time 0.181 (0.180)	Data 0.019 (0.020)	Loss 0.5090 (0.6033)	Prec@1 91.406 (87.115)
[220/391]	Time 0.180 (0.180)	Data 0.019 (0.020)	Loss 0.5788 (0.6060)	Prec@1 91.406 (87.044)
[230/391]	Time 0.179 (0.180)	Data 0.020 (0.020)	Loss 0.5580 (0.6058)	Prec@1 89.062 (87.060)
[240/391]	Time 0.182 (0.180)	Data 0.020 (0.020)	Loss 0.6405 (0.6070)	Prec@1 86.719 (86.988)
[250/391]	Time 0.187 (0.180)	Data 0.020 (0.020)	Loss 0.6212 (0.6095)	Prec@1 89.844 (86.899)
[260/391]	Time 0.179 (0.180)	Data 0.020 (0.020)	Loss 0.5838 (0.6103)	Prec@1 87.5

[110/391]	Time 0.182 (0.180)	Data 0.020 (0.020)	Loss 0.5677 (0.6270)	Prec@1 89.844 (85.839)
[120/391]	Time 0.180 (0.180)	Data 0.019 (0.020)	Loss 0.6486 (0.6285)	Prec@1 88.281 (85.905)
[130/391]	Time 0.177 (0.180)	Data 0.019 (0.020)	Loss 0.6284 (0.6274)	Prec@1 85.156 (85.991)
[140/391]	Time 0.178 (0.180)	Data 0.019 (0.020)	Loss 0.5511 (0.6257)	Prec@1 89.844 (86.093)
[150/391]	Time 0.178 (0.180)	Data 0.019 (0.020)	Loss 0.5072 (0.6248)	Prec@1 92.188 (86.124)
[160/391]	Time 0.183 (0.180)	Data 0.020 (0.020)	Loss 0.5935 (0.6236)	Prec@1 89.062 (86.229)
[170/391]	Time 0.180 (0.180)	Data 0.020 (0.020)	Loss 0.6164 (0.6238)	Prec@1 84.375 (86.257)
[180/391]	Time 0.179 (0.180)	Data 0.019 (0.020)	Loss 0.6960 (0.6235)	Prec@1 81.250 (86.257)
[190/391]	Time 0.176 (0.180)	Data 0.019 (0.020)	Loss 0.5178 (0.6229)	Prec@1 92.188 (86.355)
[200/391]	Time 0.180 (0.180)	Data 0.020 (0.020)	Loss 0.6167 (0.6215)	Prec@1 87.500 (86.423)
[210/391]	Time 0.184 (0.180)	Data 0.020 (0.020)	Loss 0.5856 (0.6195)	Prec@1 91.4

[60/391]	Time 0.181 (0.181)	Data 0.019 (0.020)	Loss 0.5965 (0.6175)	Prec@1 85.156 (86.719)
[70/391]	Time 0.179 (0.180)	Data 0.020 (0.020)	Loss 0.5420 (0.6182)	Prec@1 89.844 (86.741)
[80/391]	Time 0.180 (0.180)	Data 0.020 (0.020)	Loss 0.6265 (0.6156)	Prec@1 85.938 (86.786)
[90/391]	Time 0.179 (0.180)	Data 0.019 (0.020)	Loss 0.5956 (0.6141)	Prec@1 89.844 (86.753)
[100/391]	Time 0.182 (0.181)	Data 0.019 (0.020)	Loss 0.5959 (0.6154)	Prec@1 87.500 (86.680)
[110/391]	Time 0.184 (0.181)	Data 0.019 (0.020)	Loss 0.6128 (0.6134)	Prec@1 85.156 (86.796)
[120/391]	Time 0.182 (0.181)	Data 0.020 (0.020)	Loss 0.6736 (0.6134)	Prec@1 84.375 (86.880)
[130/391]	Time 0.178 (0.181)	Data 0.020 (0.020)	Loss 0.5240 (0.6120)	Prec@1 93.750 (86.963)
[140/391]	Time 0.179 (0.181)	Data 0.019 (0.020)	Loss 0.6714 (0.6162)	Prec@1 87.500 (86.830)
[150/391]	Time 0.180 (0.181)	Data 0.019 (0.020)	Loss 0.5709 (0.6164)	Prec@1 86.719 (86.786)
[160/391]	Time 0.185 (0.181)	Data 0.020 (0.020)	Loss 0.6235 (0.6182)	Prec@1 85.156 (

[10/391]	Time 0.181 (0.181)	Data 0.021 (0.020)	Loss 0.5827 (0.6139)	Prec@1 86.719 (87.287)
[20/391]	Time 0.178 (0.181)	Data 0.019 (0.020)	Loss 0.8289 (0.6323)	Prec@1 80.469 (86.235)
[30/391]	Time 0.180 (0.181)	Data 0.020 (0.020)	Loss 0.5927 (0.6313)	Prec@1 88.281 (86.568)
[40/391]	Time 0.179 (0.181)	Data 0.020 (0.020)	Loss 0.6396 (0.6265)	Prec@1 83.594 (86.585)
[50/391]	Time 0.190 (0.181)	Data 0.020 (0.020)	Loss 0.6423 (0.6303)	Prec@1 85.156 (86.137)
[60/391]	Time 0.184 (0.181)	Data 0.020 (0.020)	Loss 0.4975 (0.6212)	Prec@1 92.188 (86.565)
[70/391]	Time 0.179 (0.181)	Data 0.019 (0.020)	Loss 0.6000 (0.6211)	Prec@1 84.375 (86.499)
[80/391]	Time 0.179 (0.181)	Data 0.019 (0.020)	Loss 0.6318 (0.6216)	Prec@1 86.719 (86.400)
[90/391]	Time 0.179 (0.181)	Data 0.020 (0.020)	Loss 0.6319 (0.6201)	Prec@1 85.156 (86.418)
[100/391]	Time 0.179 (0.181)	Data 0.019 (0.020)	Loss 0.6397 (0.6213)	Prec@1 86.719 (86.417)
[110/391]	Time 0.186 (0.181)	Data 0.020 (0.020)	Loss 0.6907 (0.6227)	Prec@1 85.156 (86.28

Test: [50/79]	Time 0.020 (0.020)	Loss 0.9401 (0.9823)	Prec@1 81.250 (81.158)
Test: [60/79]	Time 0.019 (0.020)	Loss 0.7329 (0.9715)	Prec@1 88.281 (81.378)
Test: [70/79]	Time 0.019 (0.020)	Loss 1.0819 (0.9695)	Prec@1 79.688 (81.459)
 * Prec@1 81.480

===> epoch: 75/200
current lr 1.00000e-01
Training:
[0/391]	Time 0.185 (0.185)	Data 0.021 (0.021)	Loss 0.7351 (0.7351)	Prec@1 82.031 (82.031)
[10/391]	Time 0.179 (0.185)	Data 0.019 (0.020)	Loss 0.5691 (0.6370)	Prec@1 85.938 (85.085)
[20/391]	Time 0.181 (0.184)	Data 0.019 (0.020)	Loss 0.5773 (0.6153)	Prec@1 89.062 (86.272)
[30/391]	Time 0.189 (0.183)	Data 0.023 (0.020)	Loss 0.6670 (0.6066)	Prec@1 84.375 (86.769)
[40/391]	Time 0.183 (0.183)	Data 0.020 (0.020)	Loss 0.5773 (0.6020)	Prec@1 89.062 (86.966)
[50/391]	Time 0.183 (0.183)	Data 0.019 (0.020)	Loss 0.6388 (0.6018)	Prec@1 85.938 (87.117)
[60/391]	Time 0.182 (0.183)	Data 0.019 (0.020)	Loss 0.5475 (0.6005)	Prec@1 89.062 (87.244)
[70/391]	Time 0.179 (0.182)	Data 0.019 (0.020)	Loss 0.6298 (0.6

[390/391]	Time 0.120 (0.181)	Data 0.012 (0.020)	Loss 0.6988 (0.6175)	Prec@1 81.250 (86.634)
Testing:
Test: [0/79]	Time 0.022 (0.022)	Loss 1.4628 (1.4628)	Prec@1 73.438 (73.438)
Test: [10/79]	Time 0.020 (0.020)	Loss 1.4101 (1.5716)	Prec@1 75.000 (72.869)
Test: [20/79]	Time 0.019 (0.020)	Loss 1.4443 (1.6180)	Prec@1 77.344 (72.656)
Test: [30/79]	Time 0.019 (0.020)	Loss 1.4609 (1.6431)	Prec@1 75.000 (72.379)
Test: [40/79]	Time 0.020 (0.020)	Loss 1.6813 (1.6174)	Prec@1 70.312 (72.618)
Test: [50/79]	Time 0.020 (0.020)	Loss 1.4560 (1.6036)	Prec@1 73.438 (72.626)
Test: [60/79]	Time 0.019 (0.020)	Loss 1.8221 (1.6064)	Prec@1 70.312 (72.643)
Test: [70/79]	Time 0.019 (0.020)	Loss 1.6517 (1.5918)	Prec@1 73.438 (72.975)
 * Prec@1 73.170

===> epoch: 77/200
current lr 1.00000e-01
Training:
[0/391]	Time 0.172 (0.172)	Data 0.020 (0.020)	Loss 0.6154 (0.6154)	Prec@1 85.938 (85.938)
[10/391]	Time 0.185 (0.181)	Data 0.019 (0.020)	Loss 0.7050 (0.6226)	Prec@1 81.250 (86.364)
[20/391]	Time 0.184 (0.182)	Data 

[340/391]	Time 0.178 (0.182)	Data 0.019 (0.020)	Loss 0.7320 (0.6155)	Prec@1 82.812 (86.671)
[350/391]	Time 0.182 (0.182)	Data 0.023 (0.020)	Loss 0.5657 (0.6158)	Prec@1 89.062 (86.674)
[360/391]	Time 0.180 (0.182)	Data 0.019 (0.020)	Loss 0.5214 (0.6151)	Prec@1 90.625 (86.712)
[370/391]	Time 0.185 (0.182)	Data 0.020 (0.020)	Loss 0.4942 (0.6128)	Prec@1 91.406 (86.795)
[380/391]	Time 0.179 (0.182)	Data 0.019 (0.020)	Loss 0.5563 (0.6134)	Prec@1 90.625 (86.766)
[390/391]	Time 0.112 (0.181)	Data 0.012 (0.020)	Loss 0.6098 (0.6135)	Prec@1 90.000 (86.766)
Testing:
Test: [0/79]	Time 0.020 (0.020)	Loss 0.7148 (0.7148)	Prec@1 85.938 (85.938)
Test: [10/79]	Time 0.020 (0.019)	Loss 0.8802 (0.9288)	Prec@1 83.594 (82.457)
Test: [20/79]	Time 0.019 (0.019)	Loss 0.9524 (0.9691)	Prec@1 82.031 (81.994)
Test: [30/79]	Time 0.020 (0.019)	Loss 0.6247 (0.9521)	Prec@1 86.719 (81.804)
Test: [40/79]	Time 0.019 (0.019)	Loss 1.1247 (0.9483)	Prec@1 78.906 (81.688)
Test: [50/79]	Time 0.019 (0.020)	Loss 1.3586 (0.9503)	P

[290/391]	Time 0.179 (0.182)	Data 0.020 (0.020)	Loss 0.6491 (0.6088)	Prec@1 86.719 (86.899)
[300/391]	Time 0.182 (0.182)	Data 0.022 (0.020)	Loss 0.5482 (0.6096)	Prec@1 89.062 (86.838)
[310/391]	Time 0.181 (0.182)	Data 0.020 (0.020)	Loss 0.6509 (0.6102)	Prec@1 85.156 (86.787)
[320/391]	Time 0.181 (0.182)	Data 0.019 (0.020)	Loss 0.5398 (0.6107)	Prec@1 89.844 (86.767)
[330/391]	Time 0.179 (0.182)	Data 0.019 (0.020)	Loss 0.6424 (0.6109)	Prec@1 83.594 (86.749)
[340/391]	Time 0.183 (0.182)	Data 0.024 (0.020)	Loss 0.5490 (0.6115)	Prec@1 92.188 (86.726)
[350/391]	Time 0.180 (0.182)	Data 0.021 (0.020)	Loss 0.6370 (0.6119)	Prec@1 85.156 (86.717)
[360/391]	Time 0.180 (0.182)	Data 0.019 (0.020)	Loss 0.6071 (0.6107)	Prec@1 87.500 (86.777)
[370/391]	Time 0.179 (0.182)	Data 0.019 (0.020)	Loss 0.6404 (0.6107)	Prec@1 88.281 (86.805)
[380/391]	Time 0.183 (0.182)	Data 0.019 (0.020)	Loss 0.6424 (0.6113)	Prec@1 88.281 (86.780)
[390/391]	Time 0.111 (0.182)	Data 0.012 (0.020)	Loss 0.6272 (0.6115)	Prec@1 86.2

[240/391]	Time 0.183 (0.181)	Data 0.019 (0.020)	Loss 0.6840 (0.6164)	Prec@1 81.250 (86.618)
[250/391]	Time 0.181 (0.181)	Data 0.020 (0.020)	Loss 0.6930 (0.6182)	Prec@1 82.031 (86.529)
[260/391]	Time 0.183 (0.181)	Data 0.019 (0.020)	Loss 0.6031 (0.6198)	Prec@1 87.500 (86.488)
[270/391]	Time 0.181 (0.181)	Data 0.019 (0.020)	Loss 0.7718 (0.6205)	Prec@1 81.250 (86.451)
[280/391]	Time 0.181 (0.181)	Data 0.021 (0.020)	Loss 0.6130 (0.6207)	Prec@1 85.156 (86.446)
[290/391]	Time 0.180 (0.181)	Data 0.020 (0.020)	Loss 0.6129 (0.6205)	Prec@1 89.062 (86.464)
[300/391]	Time 0.179 (0.181)	Data 0.019 (0.020)	Loss 0.7221 (0.6204)	Prec@1 79.688 (86.472)
[310/391]	Time 0.184 (0.181)	Data 0.019 (0.020)	Loss 0.5300 (0.6212)	Prec@1 90.625 (86.455)
[320/391]	Time 0.185 (0.181)	Data 0.019 (0.020)	Loss 0.5449 (0.6204)	Prec@1 90.625 (86.497)
[330/391]	Time 0.182 (0.181)	Data 0.020 (0.020)	Loss 0.6289 (0.6206)	Prec@1 88.281 (86.504)
[340/391]	Time 0.181 (0.181)	Data 0.021 (0.020)	Loss 0.6533 (0.6215)	Prec@1 82.8

[190/391]	Time 0.181 (0.181)	Data 0.019 (0.020)	Loss 0.6248 (0.6126)	Prec@1 82.812 (86.686)
[200/391]	Time 0.180 (0.181)	Data 0.019 (0.020)	Loss 0.5434 (0.6109)	Prec@1 89.062 (86.812)
[210/391]	Time 0.182 (0.181)	Data 0.025 (0.020)	Loss 0.7324 (0.6112)	Prec@1 82.031 (86.852)
[220/391]	Time 0.182 (0.181)	Data 0.019 (0.020)	Loss 0.7452 (0.6112)	Prec@1 84.375 (86.864)
[230/391]	Time 0.185 (0.181)	Data 0.019 (0.020)	Loss 0.6188 (0.6129)	Prec@1 87.500 (86.807)
[240/391]	Time 0.185 (0.181)	Data 0.019 (0.020)	Loss 0.5783 (0.6139)	Prec@1 85.156 (86.764)
[250/391]	Time 0.183 (0.182)	Data 0.019 (0.020)	Loss 0.6377 (0.6149)	Prec@1 88.281 (86.753)
[260/391]	Time 0.186 (0.182)	Data 0.020 (0.020)	Loss 0.6243 (0.6164)	Prec@1 85.156 (86.698)
[270/391]	Time 0.188 (0.182)	Data 0.020 (0.020)	Loss 0.7300 (0.6195)	Prec@1 82.031 (86.549)
[280/391]	Time 0.179 (0.182)	Data 0.019 (0.020)	Loss 0.6828 (0.6202)	Prec@1 84.375 (86.569)
[290/391]	Time 0.180 (0.182)	Data 0.019 (0.020)	Loss 0.6954 (0.6201)	Prec@1 83.5

[140/391]	Time 0.181 (0.181)	Data 0.020 (0.020)	Loss 0.6304 (0.6066)	Prec@1 85.938 (86.913)
[150/391]	Time 0.182 (0.181)	Data 0.019 (0.020)	Loss 0.5913 (0.6087)	Prec@1 89.844 (86.827)
[160/391]	Time 0.181 (0.181)	Data 0.020 (0.020)	Loss 0.5538 (0.6098)	Prec@1 85.156 (86.738)
[170/391]	Time 0.179 (0.181)	Data 0.019 (0.020)	Loss 0.5509 (0.6097)	Prec@1 90.625 (86.787)
[180/391]	Time 0.179 (0.181)	Data 0.019 (0.020)	Loss 0.6638 (0.6117)	Prec@1 85.156 (86.762)
[190/391]	Time 0.186 (0.181)	Data 0.020 (0.020)	Loss 0.5457 (0.6123)	Prec@1 88.281 (86.727)
[200/391]	Time 0.182 (0.181)	Data 0.020 (0.020)	Loss 0.5799 (0.6127)	Prec@1 85.938 (86.664)
[210/391]	Time 0.182 (0.181)	Data 0.019 (0.020)	Loss 0.5784 (0.6115)	Prec@1 87.500 (86.745)
[220/391]	Time 0.184 (0.181)	Data 0.019 (0.020)	Loss 0.5590 (0.6121)	Prec@1 90.625 (86.729)
[230/391]	Time 0.182 (0.181)	Data 0.019 (0.020)	Loss 0.6478 (0.6127)	Prec@1 85.938 (86.722)
[240/391]	Time 0.178 (0.181)	Data 0.019 (0.020)	Loss 0.6423 (0.6123)	Prec@1 85.1

[90/391]	Time 0.183 (0.181)	Data 0.019 (0.020)	Loss 0.4224 (0.6008)	Prec@1 93.750 (87.096)
[100/391]	Time 0.182 (0.181)	Data 0.019 (0.020)	Loss 0.5021 (0.6013)	Prec@1 91.406 (87.067)
[110/391]	Time 0.182 (0.181)	Data 0.019 (0.020)	Loss 0.5537 (0.5977)	Prec@1 87.500 (87.204)
[120/391]	Time 0.180 (0.181)	Data 0.019 (0.020)	Loss 0.5458 (0.5968)	Prec@1 89.844 (87.235)
[130/391]	Time 0.179 (0.182)	Data 0.019 (0.020)	Loss 0.6055 (0.5958)	Prec@1 86.719 (87.303)
[140/391]	Time 0.181 (0.182)	Data 0.020 (0.020)	Loss 0.5774 (0.5964)	Prec@1 87.500 (87.262)
[150/391]	Time 0.181 (0.182)	Data 0.020 (0.020)	Loss 0.6584 (0.5965)	Prec@1 83.594 (87.303)
[160/391]	Time 0.187 (0.182)	Data 0.019 (0.020)	Loss 0.6990 (0.5955)	Prec@1 82.031 (87.354)
[170/391]	Time 0.185 (0.182)	Data 0.019 (0.020)	Loss 0.6224 (0.5951)	Prec@1 89.844 (87.431)
[180/391]	Time 0.184 (0.182)	Data 0.019 (0.020)	Loss 0.5737 (0.5954)	Prec@1 90.625 (87.431)
[190/391]	Time 0.184 (0.182)	Data 0.020 (0.020)	Loss 0.5714 (0.5969)	Prec@1 88.28

[40/391]	Time 0.187 (0.181)	Data 0.023 (0.020)	Loss 0.6173 (0.6042)	Prec@1 85.938 (87.005)
[50/391]	Time 0.178 (0.181)	Data 0.019 (0.020)	Loss 0.5774 (0.5992)	Prec@1 86.719 (87.117)
[60/391]	Time 0.181 (0.181)	Data 0.019 (0.020)	Loss 0.5873 (0.6006)	Prec@1 88.281 (87.103)
[70/391]	Time 0.183 (0.181)	Data 0.020 (0.020)	Loss 0.6868 (0.6000)	Prec@1 81.250 (87.181)
[80/391]	Time 0.183 (0.181)	Data 0.020 (0.020)	Loss 0.6723 (0.6021)	Prec@1 85.938 (87.172)
[90/391]	Time 0.179 (0.181)	Data 0.020 (0.020)	Loss 0.6189 (0.6036)	Prec@1 85.938 (87.131)
[100/391]	Time 0.179 (0.182)	Data 0.019 (0.020)	Loss 0.6461 (0.6077)	Prec@1 83.594 (86.989)
[110/391]	Time 0.180 (0.182)	Data 0.019 (0.020)	Loss 0.5813 (0.6091)	Prec@1 87.500 (86.888)
[120/391]	Time 0.183 (0.182)	Data 0.019 (0.020)	Loss 0.5890 (0.6071)	Prec@1 89.844 (86.951)
[130/391]	Time 0.187 (0.182)	Data 0.019 (0.020)	Loss 0.5903 (0.6062)	Prec@1 88.281 (86.975)
[140/391]	Time 0.189 (0.182)	Data 0.019 (0.020)	Loss 0.5937 (0.6037)	Prec@1 87.500 (87

Test: [70/79]	Time 0.019 (0.019)	Loss 0.8844 (0.9287)	Prec@1 82.031 (82.339)
 * Prec@1 82.510

===> epoch: 92/200
current lr 1.00000e-01
Training:
[0/391]	Time 0.165 (0.165)	Data 0.020 (0.020)	Loss 0.5324 (0.5324)	Prec@1 88.281 (88.281)
[10/391]	Time 0.181 (0.179)	Data 0.020 (0.020)	Loss 0.5246 (0.5992)	Prec@1 89.844 (87.784)
[20/391]	Time 0.181 (0.179)	Data 0.019 (0.020)	Loss 0.5596 (0.5833)	Prec@1 88.281 (87.649)
[30/391]	Time 0.180 (0.180)	Data 0.020 (0.020)	Loss 0.5750 (0.5861)	Prec@1 90.625 (87.802)
[40/391]	Time 0.180 (0.180)	Data 0.019 (0.019)	Loss 0.5580 (0.5888)	Prec@1 88.281 (87.862)
[50/391]	Time 0.185 (0.180)	Data 0.019 (0.019)	Loss 0.5818 (0.5847)	Prec@1 86.719 (88.051)
[60/391]	Time 0.182 (0.180)	Data 0.019 (0.019)	Loss 0.5595 (0.5890)	Prec@1 91.406 (87.795)
[70/391]	Time 0.183 (0.181)	Data 0.019 (0.020)	Loss 0.6751 (0.5985)	Prec@1 86.719 (87.467)
[80/391]	Time 0.182 (0.181)	Data 0.019 (0.020)	Loss 0.5847 (0.5999)	Prec@1 89.062 (87.384)
[90/391]	Time 0.181 (0.181)	Data 0.

Test: [10/79]	Time 0.020 (0.019)	Loss 0.9194 (1.0172)	Prec@1 85.156 (81.960)
Test: [20/79]	Time 0.020 (0.020)	Loss 0.9034 (1.1116)	Prec@1 82.031 (80.841)
Test: [30/79]	Time 0.020 (0.020)	Loss 1.2650 (1.1317)	Prec@1 80.469 (80.292)
Test: [40/79]	Time 0.020 (0.020)	Loss 1.0887 (1.1317)	Prec@1 79.688 (80.221)
Test: [50/79]	Time 0.020 (0.020)	Loss 0.9324 (1.0927)	Prec@1 78.906 (80.469)
Test: [60/79]	Time 0.019 (0.020)	Loss 0.8731 (1.0927)	Prec@1 82.031 (80.546)
Test: [70/79]	Time 0.019 (0.020)	Loss 1.1115 (1.0944)	Prec@1 79.688 (80.447)
 * Prec@1 80.370

===> epoch: 94/200
current lr 1.00000e-01
Training:
[0/391]	Time 0.166 (0.166)	Data 0.021 (0.021)	Loss 0.5929 (0.5929)	Prec@1 90.625 (90.625)
[10/391]	Time 0.182 (0.179)	Data 0.021 (0.020)	Loss 0.6630 (0.6008)	Prec@1 86.719 (87.500)
[20/391]	Time 0.183 (0.181)	Data 0.019 (0.020)	Loss 0.6057 (0.5884)	Prec@1 88.281 (88.281)
[30/391]	Time 0.181 (0.181)	Data 0.020 (0.020)	Loss 0.5655 (0.5863)	Prec@1 85.938 (88.004)
[40/391]	Time 0.180 (0.180)	

[360/391]	Time 0.183 (0.181)	Data 0.021 (0.020)	Loss 0.5601 (0.6104)	Prec@1 89.844 (86.931)
[370/391]	Time 0.182 (0.181)	Data 0.019 (0.020)	Loss 0.5389 (0.6098)	Prec@1 89.062 (86.952)
[380/391]	Time 0.185 (0.181)	Data 0.020 (0.020)	Loss 0.8189 (0.6105)	Prec@1 72.656 (86.920)
[390/391]	Time 0.097 (0.181)	Data 0.013 (0.020)	Loss 0.5768 (0.6106)	Prec@1 90.000 (86.920)
Testing:
Test: [0/79]	Time 0.020 (0.020)	Loss 0.6171 (0.6171)	Prec@1 88.281 (88.281)
Test: [10/79]	Time 0.020 (0.020)	Loss 0.6567 (0.8216)	Prec@1 86.719 (84.162)
Test: [20/79]	Time 0.020 (0.020)	Loss 0.9406 (0.8711)	Prec@1 82.031 (83.371)
Test: [30/79]	Time 0.020 (0.020)	Loss 0.5557 (0.8476)	Prec@1 85.938 (83.191)
Test: [40/79]	Time 0.019 (0.020)	Loss 1.0533 (0.8346)	Prec@1 77.344 (83.251)
Test: [50/79]	Time 0.019 (0.020)	Loss 0.8572 (0.8425)	Prec@1 82.031 (82.935)
Test: [60/79]	Time 0.020 (0.020)	Loss 0.9887 (0.8327)	Prec@1 81.250 (83.222)
Test: [70/79]	Time 0.020 (0.020)	Loss 0.8247 (0.8287)	Prec@1 84.375 (83.319)
 * Prec@

[310/391]	Time 0.187 (0.182)	Data 0.019 (0.020)	Loss 0.6841 (0.6163)	Prec@1 85.156 (86.704)
[320/391]	Time 0.182 (0.182)	Data 0.019 (0.020)	Loss 0.6588 (0.6166)	Prec@1 85.938 (86.692)
[330/391]	Time 0.181 (0.182)	Data 0.019 (0.020)	Loss 0.5854 (0.6164)	Prec@1 89.062 (86.695)
[340/391]	Time 0.184 (0.182)	Data 0.020 (0.020)	Loss 0.6532 (0.6167)	Prec@1 82.031 (86.698)
[350/391]	Time 0.181 (0.182)	Data 0.019 (0.020)	Loss 0.5686 (0.6169)	Prec@1 89.062 (86.681)
[360/391]	Time 0.184 (0.182)	Data 0.019 (0.020)	Loss 0.5523 (0.6159)	Prec@1 88.281 (86.721)
[370/391]	Time 0.180 (0.182)	Data 0.019 (0.020)	Loss 0.6832 (0.6160)	Prec@1 82.812 (86.723)
[380/391]	Time 0.184 (0.182)	Data 0.019 (0.020)	Loss 0.6477 (0.6166)	Prec@1 82.812 (86.690)
[390/391]	Time 0.095 (0.181)	Data 0.013 (0.020)	Loss 0.5200 (0.6157)	Prec@1 91.250 (86.722)
Testing:
Test: [0/79]	Time 0.019 (0.019)	Loss 0.9891 (0.9891)	Prec@1 80.469 (80.469)
Test: [10/79]	Time 0.020 (0.019)	Loss 1.2114 (1.0537)	Prec@1 79.688 (80.611)
Test: [20/

[260/391]	Time 0.181 (0.182)	Data 0.020 (0.020)	Loss 0.6341 (0.6121)	Prec@1 86.719 (86.916)
[270/391]	Time 0.182 (0.182)	Data 0.020 (0.020)	Loss 0.7362 (0.6120)	Prec@1 81.250 (86.892)
[280/391]	Time 0.182 (0.182)	Data 0.020 (0.020)	Loss 0.5296 (0.6119)	Prec@1 89.062 (86.877)
[290/391]	Time 0.184 (0.182)	Data 0.020 (0.020)	Loss 0.5763 (0.6117)	Prec@1 84.375 (86.858)
[300/391]	Time 0.186 (0.182)	Data 0.020 (0.020)	Loss 0.6727 (0.6125)	Prec@1 85.156 (86.828)
[310/391]	Time 0.184 (0.182)	Data 0.019 (0.020)	Loss 0.7396 (0.6129)	Prec@1 79.688 (86.789)
[320/391]	Time 0.182 (0.182)	Data 0.020 (0.020)	Loss 0.4794 (0.6126)	Prec@1 92.188 (86.789)
[330/391]	Time 0.182 (0.182)	Data 0.021 (0.020)	Loss 0.6522 (0.6131)	Prec@1 89.844 (86.761)
[340/391]	Time 0.181 (0.182)	Data 0.019 (0.020)	Loss 0.6483 (0.6129)	Prec@1 85.938 (86.774)
[350/391]	Time 0.181 (0.182)	Data 0.019 (0.020)	Loss 0.6831 (0.6138)	Prec@1 83.594 (86.737)
[360/391]	Time 0.181 (0.182)	Data 0.019 (0.020)	Loss 0.6073 (0.6149)	Prec@1 89.0

[210/391]	Time 0.180 (0.181)	Data 0.019 (0.020)	Loss 0.3603 (0.4278)	Prec@1 94.531 (92.469)
[220/391]	Time 0.187 (0.181)	Data 0.020 (0.020)	Loss 0.4134 (0.4277)	Prec@1 92.969 (92.488)
[230/391]	Time 0.185 (0.181)	Data 0.019 (0.020)	Loss 0.4449 (0.4270)	Prec@1 92.188 (92.512)
[240/391]	Time 0.182 (0.181)	Data 0.020 (0.020)	Loss 0.4543 (0.4267)	Prec@1 90.625 (92.518)
[250/391]	Time 0.182 (0.181)	Data 0.020 (0.020)	Loss 0.4681 (0.4271)	Prec@1 89.844 (92.496)
[260/391]	Time 0.182 (0.181)	Data 0.020 (0.020)	Loss 0.3814 (0.4272)	Prec@1 94.531 (92.499)
[270/391]	Time 0.181 (0.181)	Data 0.019 (0.020)	Loss 0.4640 (0.4265)	Prec@1 89.062 (92.507)
[280/391]	Time 0.181 (0.181)	Data 0.020 (0.020)	Loss 0.3531 (0.4262)	Prec@1 94.531 (92.516)
[290/391]	Time 0.185 (0.181)	Data 0.020 (0.020)	Loss 0.4813 (0.4257)	Prec@1 92.188 (92.550)
[300/391]	Time 0.188 (0.181)	Data 0.020 (0.020)	Loss 0.4570 (0.4262)	Prec@1 90.625 (92.515)
[310/391]	Time 0.184 (0.181)	Data 0.021 (0.020)	Loss 0.4556 (0.4258)	Prec@1 91.4

[160/391]	Time 0.183 (0.181)	Data 0.022 (0.020)	Loss 0.3576 (0.3921)	Prec@1 96.094 (93.852)
[170/391]	Time 0.182 (0.181)	Data 0.021 (0.020)	Loss 0.3947 (0.3929)	Prec@1 93.750 (93.777)
[180/391]	Time 0.181 (0.181)	Data 0.021 (0.020)	Loss 0.4647 (0.3936)	Prec@1 90.625 (93.789)
[190/391]	Time 0.179 (0.181)	Data 0.021 (0.020)	Loss 0.3629 (0.3935)	Prec@1 96.875 (93.791)
[200/391]	Time 0.184 (0.181)	Data 0.019 (0.020)	Loss 0.3833 (0.3935)	Prec@1 94.531 (93.742)
[210/391]	Time 0.185 (0.181)	Data 0.021 (0.020)	Loss 0.3503 (0.3927)	Prec@1 96.094 (93.765)
[220/391]	Time 0.180 (0.181)	Data 0.021 (0.020)	Loss 0.4255 (0.3932)	Prec@1 89.844 (93.732)
[230/391]	Time 0.181 (0.181)	Data 0.020 (0.020)	Loss 0.3312 (0.3925)	Prec@1 96.094 (93.780)
[240/391]	Time 0.181 (0.181)	Data 0.021 (0.020)	Loss 0.3141 (0.3922)	Prec@1 98.438 (93.789)
[250/391]	Time 0.184 (0.181)	Data 0.020 (0.020)	Loss 0.4015 (0.3923)	Prec@1 92.969 (93.762)
[260/391]	Time 0.181 (0.181)	Data 0.022 (0.020)	Loss 0.4170 (0.3922)	Prec@1 92.9

[110/391]	Time 0.179 (0.181)	Data 0.019 (0.020)	Loss 0.2976 (0.3818)	Prec@1 98.438 (94.376)
[120/391]	Time 0.182 (0.181)	Data 0.019 (0.020)	Loss 0.3645 (0.3819)	Prec@1 93.750 (94.409)
[130/391]	Time 0.180 (0.181)	Data 0.020 (0.020)	Loss 0.3750 (0.3834)	Prec@1 94.531 (94.281)
[140/391]	Time 0.180 (0.181)	Data 0.020 (0.020)	Loss 0.3913 (0.3824)	Prec@1 94.531 (94.304)
[150/391]	Time 0.187 (0.181)	Data 0.021 (0.020)	Loss 0.4396 (0.3826)	Prec@1 91.406 (94.298)
[160/391]	Time 0.186 (0.181)	Data 0.021 (0.020)	Loss 0.4733 (0.3833)	Prec@1 91.406 (94.274)
[170/391]	Time 0.180 (0.181)	Data 0.019 (0.020)	Loss 0.3925 (0.3841)	Prec@1 92.969 (94.243)
[180/391]	Time 0.180 (0.181)	Data 0.020 (0.020)	Loss 0.4099 (0.3841)	Prec@1 92.969 (94.212)
[190/391]	Time 0.181 (0.181)	Data 0.019 (0.020)	Loss 0.3414 (0.3831)	Prec@1 96.094 (94.253)
[200/391]	Time 0.183 (0.181)	Data 0.020 (0.020)	Loss 0.3791 (0.3832)	Prec@1 92.969 (94.213)
[210/391]	Time 0.179 (0.181)	Data 0.019 (0.020)	Loss 0.4277 (0.3841)	Prec@1 91.4

[60/391]	Time 0.183 (0.182)	Data 0.019 (0.019)	Loss 0.3501 (0.3681)	Prec@1 96.094 (94.480)
[70/391]	Time 0.186 (0.182)	Data 0.019 (0.019)	Loss 0.3919 (0.3687)	Prec@1 94.531 (94.421)
[80/391]	Time 0.185 (0.182)	Data 0.020 (0.019)	Loss 0.3260 (0.3678)	Prec@1 96.094 (94.454)
[90/391]	Time 0.181 (0.182)	Data 0.019 (0.019)	Loss 0.3366 (0.3685)	Prec@1 96.875 (94.488)
[100/391]	Time 0.182 (0.182)	Data 0.019 (0.019)	Loss 0.3846 (0.3678)	Prec@1 94.531 (94.585)
[110/391]	Time 0.181 (0.182)	Data 0.020 (0.020)	Loss 0.4518 (0.3671)	Prec@1 89.844 (94.595)
[120/391]	Time 0.180 (0.182)	Data 0.019 (0.019)	Loss 0.3550 (0.3661)	Prec@1 94.531 (94.641)
[130/391]	Time 0.179 (0.182)	Data 0.019 (0.020)	Loss 0.3848 (0.3665)	Prec@1 94.531 (94.621)
[140/391]	Time 0.182 (0.182)	Data 0.021 (0.020)	Loss 0.3223 (0.3676)	Prec@1 96.875 (94.559)
[150/391]	Time 0.184 (0.182)	Data 0.019 (0.020)	Loss 0.3756 (0.3683)	Prec@1 92.188 (94.469)
[160/391]	Time 0.182 (0.182)	Data 0.020 (0.020)	Loss 0.4006 (0.3678)	Prec@1 92.969 (

[10/391]	Time 0.178 (0.180)	Data 0.019 (0.020)	Loss 0.3469 (0.3569)	Prec@1 96.094 (95.455)
[20/391]	Time 0.182 (0.181)	Data 0.020 (0.020)	Loss 0.2787 (0.3536)	Prec@1 99.219 (95.387)
[30/391]	Time 0.181 (0.182)	Data 0.019 (0.020)	Loss 0.3382 (0.3538)	Prec@1 95.312 (95.388)
[40/391]	Time 0.179 (0.181)	Data 0.019 (0.020)	Loss 0.3388 (0.3586)	Prec@1 96.094 (95.065)
[50/391]	Time 0.181 (0.181)	Data 0.020 (0.020)	Loss 0.3919 (0.3591)	Prec@1 95.312 (95.113)
[60/391]	Time 0.182 (0.181)	Data 0.019 (0.020)	Loss 0.3462 (0.3578)	Prec@1 96.094 (95.274)
[70/391]	Time 0.182 (0.181)	Data 0.019 (0.020)	Loss 0.4322 (0.3624)	Prec@1 91.406 (95.136)
[80/391]	Time 0.186 (0.182)	Data 0.019 (0.020)	Loss 0.4354 (0.3625)	Prec@1 92.188 (95.052)
[90/391]	Time 0.180 (0.182)	Data 0.019 (0.020)	Loss 0.4279 (0.3643)	Prec@1 91.406 (95.012)
[100/391]	Time 0.179 (0.182)	Data 0.019 (0.020)	Loss 0.4367 (0.3655)	Prec@1 92.188 (94.957)
[110/391]	Time 0.177 (0.181)	Data 0.019 (0.020)	Loss 0.3464 (0.3635)	Prec@1 95.312 (95.01

Test: [40/79]	Time 0.026 (0.023)	Loss 0.5706 (0.4572)	Prec@1 89.844 (91.387)
Test: [50/79]	Time 0.020 (0.023)	Loss 0.4567 (0.4422)	Prec@1 92.188 (91.498)
Test: [60/79]	Time 0.021 (0.023)	Loss 0.3079 (0.4343)	Prec@1 93.750 (91.637)
Test: [70/79]	Time 0.025 (0.023)	Loss 0.4991 (0.4292)	Prec@1 92.188 (91.659)
 * Prec@1 91.660

===> epoch: 111/200
current lr 1.00000e-02
Training:
[0/391]	Time 0.182 (0.182)	Data 0.021 (0.021)	Loss 0.3600 (0.3600)	Prec@1 95.312 (95.312)
[10/391]	Time 0.187 (0.182)	Data 0.020 (0.020)	Loss 0.3359 (0.3484)	Prec@1 96.094 (96.023)
[20/391]	Time 0.183 (0.182)	Data 0.019 (0.020)	Loss 0.4139 (0.3560)	Prec@1 90.625 (95.499)
[30/391]	Time 0.178 (0.182)	Data 0.020 (0.020)	Loss 0.3624 (0.3590)	Prec@1 92.969 (95.338)
[40/391]	Time 0.180 (0.181)	Data 0.019 (0.020)	Loss 0.3370 (0.3561)	Prec@1 95.312 (95.427)
[50/391]	Time 0.179 (0.181)	Data 0.020 (0.020)	Loss 0.3874 (0.3570)	Prec@1 92.188 (95.205)
[60/391]	Time 0.180 (0.181)	Data 0.020 (0.020)	Loss 0.3874 (0.3579)	Prec@1 9

[380/391]	Time 0.179 (0.180)	Data 0.019 (0.020)	Loss 0.4353 (0.3607)	Prec@1 90.625 (94.937)
[390/391]	Time 0.129 (0.179)	Data 0.012 (0.020)	Loss 0.4098 (0.3606)	Prec@1 92.500 (94.934)
Testing:
Test: [0/79]	Time 0.019 (0.019)	Loss 0.4057 (0.4057)	Prec@1 91.406 (91.406)
Test: [10/79]	Time 0.026 (0.022)	Loss 0.3479 (0.4321)	Prec@1 92.969 (91.619)
Test: [20/79]	Time 0.026 (0.023)	Loss 0.3446 (0.4769)	Prec@1 93.750 (91.109)
Test: [30/79]	Time 0.019 (0.023)	Loss 0.4534 (0.4750)	Prec@1 91.406 (91.003)
Test: [40/79]	Time 0.024 (0.023)	Loss 0.6858 (0.4723)	Prec@1 85.938 (90.911)
Test: [50/79]	Time 0.025 (0.023)	Loss 0.4792 (0.4592)	Prec@1 89.062 (90.962)
Test: [60/79]	Time 0.020 (0.023)	Loss 0.3833 (0.4553)	Prec@1 92.969 (91.112)
Test: [70/79]	Time 0.023 (0.023)	Loss 0.4584 (0.4492)	Prec@1 92.188 (91.142)
 * Prec@1 91.240

===> epoch: 113/200
current lr 1.00000e-02
Training:
[0/391]	Time 0.185 (0.185)	Data 0.021 (0.021)	Loss 0.3559 (0.3559)	Prec@1 95.312 (95.312)
[10/391]	Time 0.180 (0.184)	Dat

[330/391]	Time 0.180 (0.181)	Data 0.019 (0.020)	Loss 0.3466 (0.3538)	Prec@1 95.312 (95.416)
[340/391]	Time 0.177 (0.181)	Data 0.019 (0.020)	Loss 0.3909 (0.3536)	Prec@1 92.969 (95.420)
[350/391]	Time 0.143 (0.180)	Data 0.019 (0.020)	Loss 0.3906 (0.3538)	Prec@1 93.750 (95.390)
[360/391]	Time 0.179 (0.180)	Data 0.019 (0.020)	Loss 0.3785 (0.3541)	Prec@1 92.188 (95.364)
[370/391]	Time 0.181 (0.180)	Data 0.020 (0.020)	Loss 0.3530 (0.3545)	Prec@1 97.656 (95.344)
[380/391]	Time 0.183 (0.180)	Data 0.019 (0.020)	Loss 0.3192 (0.3547)	Prec@1 96.094 (95.317)
[390/391]	Time 0.129 (0.180)	Data 0.012 (0.020)	Loss 0.3276 (0.3549)	Prec@1 96.250 (95.300)
Testing:
Test: [0/79]	Time 0.019 (0.019)	Loss 0.2507 (0.2507)	Prec@1 94.531 (94.531)
Test: [10/79]	Time 0.021 (0.023)	Loss 0.4682 (0.4188)	Prec@1 90.625 (91.548)
Test: [20/79]	Time 0.025 (0.023)	Loss 0.4008 (0.4549)	Prec@1 92.188 (91.406)
Test: [30/79]	Time 0.025 (0.023)	Loss 0.3104 (0.4521)	Prec@1 93.750 (91.482)
Test: [40/79]	Time 0.021 (0.023)	Loss 0.

[280/391]	Time 0.178 (0.181)	Data 0.019 (0.020)	Loss 0.3202 (0.3524)	Prec@1 95.312 (95.287)
[290/391]	Time 0.181 (0.181)	Data 0.019 (0.020)	Loss 0.3878 (0.3527)	Prec@1 93.750 (95.288)
[300/391]	Time 0.180 (0.181)	Data 0.020 (0.020)	Loss 0.3713 (0.3532)	Prec@1 96.094 (95.258)
[310/391]	Time 0.183 (0.181)	Data 0.020 (0.020)	Loss 0.3363 (0.3535)	Prec@1 95.312 (95.245)
[320/391]	Time 0.180 (0.181)	Data 0.019 (0.020)	Loss 0.3224 (0.3544)	Prec@1 96.875 (95.218)
[330/391]	Time 0.143 (0.181)	Data 0.020 (0.020)	Loss 0.3416 (0.3545)	Prec@1 95.312 (95.223)
[340/391]	Time 0.181 (0.180)	Data 0.019 (0.020)	Loss 0.3987 (0.3543)	Prec@1 93.750 (95.241)
[350/391]	Time 0.182 (0.180)	Data 0.019 (0.020)	Loss 0.3594 (0.3546)	Prec@1 93.750 (95.217)
[360/391]	Time 0.183 (0.180)	Data 0.019 (0.020)	Loss 0.3290 (0.3543)	Prec@1 96.094 (95.226)
[370/391]	Time 0.179 (0.180)	Data 0.019 (0.020)	Loss 0.3447 (0.3539)	Prec@1 95.312 (95.247)
[380/391]	Time 0.182 (0.180)	Data 0.020 (0.020)	Loss 0.3993 (0.3539)	Prec@1 92.1

[230/391]	Time 0.184 (0.182)	Data 0.020 (0.020)	Loss 0.3596 (0.3462)	Prec@1 94.531 (95.789)
[240/391]	Time 0.179 (0.182)	Data 0.019 (0.020)	Loss 0.3535 (0.3459)	Prec@1 96.094 (95.799)
[250/391]	Time 0.179 (0.181)	Data 0.020 (0.020)	Loss 0.3769 (0.3456)	Prec@1 94.531 (95.811)
[260/391]	Time 0.178 (0.181)	Data 0.020 (0.020)	Loss 0.3158 (0.3453)	Prec@1 96.094 (95.800)
[270/391]	Time 0.179 (0.181)	Data 0.019 (0.020)	Loss 0.3329 (0.3453)	Prec@1 96.094 (95.805)
[280/391]	Time 0.181 (0.181)	Data 0.020 (0.020)	Loss 0.3371 (0.3451)	Prec@1 96.875 (95.796)
[290/391]	Time 0.183 (0.181)	Data 0.019 (0.020)	Loss 0.3894 (0.3457)	Prec@1 93.750 (95.774)
[300/391]	Time 0.179 (0.181)	Data 0.019 (0.020)	Loss 0.3482 (0.3462)	Prec@1 95.312 (95.767)
[310/391]	Time 0.148 (0.181)	Data 0.020 (0.020)	Loss 0.3500 (0.3462)	Prec@1 96.094 (95.772)
[320/391]	Time 0.154 (0.180)	Data 0.019 (0.020)	Loss 0.3454 (0.3468)	Prec@1 96.875 (95.741)
[330/391]	Time 0.180 (0.180)	Data 0.020 (0.020)	Loss 0.3990 (0.3475)	Prec@1 91.4

[180/391]	Time 0.180 (0.181)	Data 0.019 (0.020)	Loss 0.3509 (0.3440)	Prec@1 94.531 (95.852)
[190/391]	Time 0.179 (0.181)	Data 0.019 (0.020)	Loss 0.3024 (0.3435)	Prec@1 96.094 (95.869)
[200/391]	Time 0.179 (0.181)	Data 0.019 (0.020)	Loss 0.2987 (0.3441)	Prec@1 98.438 (95.826)
[210/391]	Time 0.182 (0.181)	Data 0.021 (0.020)	Loss 0.3208 (0.3443)	Prec@1 96.875 (95.812)
[220/391]	Time 0.183 (0.181)	Data 0.020 (0.020)	Loss 0.3259 (0.3441)	Prec@1 96.094 (95.846)
[230/391]	Time 0.188 (0.181)	Data 0.019 (0.020)	Loss 0.3657 (0.3446)	Prec@1 94.531 (95.810)
[240/391]	Time 0.182 (0.181)	Data 0.019 (0.020)	Loss 0.3371 (0.3453)	Prec@1 96.875 (95.766)
[250/391]	Time 0.181 (0.181)	Data 0.019 (0.020)	Loss 0.3282 (0.3454)	Prec@1 95.312 (95.742)
[260/391]	Time 0.181 (0.181)	Data 0.020 (0.020)	Loss 0.3932 (0.3449)	Prec@1 92.188 (95.756)
[270/391]	Time 0.182 (0.181)	Data 0.020 (0.020)	Loss 0.2960 (0.3456)	Prec@1 97.656 (95.739)
[280/391]	Time 0.180 (0.181)	Data 0.019 (0.020)	Loss 0.3519 (0.3464)	Prec@1 94.5

[130/391]	Time 0.183 (0.182)	Data 0.019 (0.020)	Loss 0.3495 (0.3398)	Prec@1 92.969 (95.992)
[140/391]	Time 0.179 (0.182)	Data 0.019 (0.020)	Loss 0.3733 (0.3416)	Prec@1 95.312 (95.933)
[150/391]	Time 0.181 (0.182)	Data 0.020 (0.020)	Loss 0.3246 (0.3425)	Prec@1 94.531 (95.897)
[160/391]	Time 0.184 (0.182)	Data 0.021 (0.020)	Loss 0.3474 (0.3429)	Prec@1 96.875 (95.856)
[170/391]	Time 0.178 (0.182)	Data 0.019 (0.020)	Loss 0.3533 (0.3427)	Prec@1 95.312 (95.879)
[180/391]	Time 0.180 (0.182)	Data 0.019 (0.020)	Loss 0.3561 (0.3430)	Prec@1 96.094 (95.900)
[190/391]	Time 0.183 (0.182)	Data 0.019 (0.020)	Loss 0.3386 (0.3427)	Prec@1 96.875 (95.897)
[200/391]	Time 0.182 (0.182)	Data 0.020 (0.020)	Loss 0.3666 (0.3432)	Prec@1 95.312 (95.888)
[210/391]	Time 0.182 (0.182)	Data 0.024 (0.020)	Loss 0.3747 (0.3439)	Prec@1 96.875 (95.857)
[220/391]	Time 0.178 (0.182)	Data 0.019 (0.020)	Loss 0.3207 (0.3443)	Prec@1 97.656 (95.825)
[230/391]	Time 0.178 (0.182)	Data 0.019 (0.020)	Loss 0.3699 (0.3439)	Prec@1 95.3

[80/391]	Time 0.184 (0.182)	Data 0.019 (0.020)	Loss 0.3269 (0.3422)	Prec@1 96.875 (95.901)
[90/391]	Time 0.185 (0.182)	Data 0.019 (0.020)	Loss 0.3541 (0.3420)	Prec@1 94.531 (95.862)
[100/391]	Time 0.186 (0.182)	Data 0.019 (0.020)	Loss 0.4237 (0.3444)	Prec@1 93.750 (95.738)
[110/391]	Time 0.180 (0.182)	Data 0.020 (0.020)	Loss 0.3547 (0.3458)	Prec@1 96.094 (95.693)
[120/391]	Time 0.179 (0.182)	Data 0.019 (0.020)	Loss 0.3553 (0.3454)	Prec@1 96.875 (95.713)
[130/391]	Time 0.179 (0.181)	Data 0.020 (0.020)	Loss 0.3522 (0.3464)	Prec@1 95.312 (95.694)
[140/391]	Time 0.178 (0.181)	Data 0.019 (0.020)	Loss 0.3404 (0.3465)	Prec@1 97.656 (95.673)
[150/391]	Time 0.182 (0.181)	Data 0.019 (0.020)	Loss 0.3492 (0.3467)	Prec@1 95.312 (95.654)
[160/391]	Time 0.181 (0.181)	Data 0.020 (0.020)	Loss 0.3816 (0.3459)	Prec@1 95.312 (95.696)
[170/391]	Time 0.181 (0.181)	Data 0.020 (0.020)	Loss 0.3992 (0.3468)	Prec@1 95.312 (95.664)
[180/391]	Time 0.177 (0.181)	Data 0.020 (0.020)	Loss 0.3247 (0.3464)	Prec@1 97.656

[30/391]	Time 0.179 (0.181)	Data 0.019 (0.020)	Loss 0.3543 (0.3521)	Prec@1 94.531 (95.464)
[40/391]	Time 0.177 (0.181)	Data 0.019 (0.020)	Loss 0.3079 (0.3522)	Prec@1 98.438 (95.598)
[50/391]	Time 0.184 (0.181)	Data 0.019 (0.020)	Loss 0.3207 (0.3504)	Prec@1 97.656 (95.695)
[60/391]	Time 0.186 (0.181)	Data 0.021 (0.020)	Loss 0.3302 (0.3489)	Prec@1 95.312 (95.748)
[70/391]	Time 0.185 (0.182)	Data 0.019 (0.020)	Loss 0.3535 (0.3490)	Prec@1 95.312 (95.676)
[80/391]	Time 0.178 (0.182)	Data 0.019 (0.020)	Loss 0.3661 (0.3492)	Prec@1 94.531 (95.611)
[90/391]	Time 0.179 (0.182)	Data 0.020 (0.020)	Loss 0.4112 (0.3479)	Prec@1 92.969 (95.639)
[100/391]	Time 0.179 (0.181)	Data 0.019 (0.020)	Loss 0.2974 (0.3469)	Prec@1 96.875 (95.676)
[110/391]	Time 0.179 (0.181)	Data 0.019 (0.020)	Loss 0.3038 (0.3471)	Prec@1 97.656 (95.686)
[120/391]	Time 0.183 (0.181)	Data 0.019 (0.020)	Loss 0.3341 (0.3480)	Prec@1 95.312 (95.609)
[130/391]	Time 0.184 (0.181)	Data 0.019 (0.020)	Loss 0.3832 (0.3479)	Prec@1 94.531 (95.