In [None]:
from __future__ import print_function
import os
import time
import logging
import argparse
from visdom import Visdom
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from utils import *
import dataset

# Teacher models
import models

# Student models


start_time = time.time()
# os.makedirs('./checkpoint', exist_ok=True)

# Training settings
parser = argparse.ArgumentParser(description='PyTorch original KD')
parser.add_argument('--dataset',
                    choices=['CIFAR10',
                             'CIFAR100',
                             'tinyimagenet'
                            ],
                    default='CIFAR10')
parser.add_argument('--teacher',
                    choices=['ResNet32',
                             'ResNet50',
                             'ResNet56',
                             'ResNet110'
                            ],
                    default='ResNet110')
parser.add_argument('--student',
                    choices=[
                             'ResNet8',
                             'ResNet15',
                             'ResNet16',
                             'ResNet20',
                             'myNet'
                            ],
                    default='ResNet20')
parser.add_argument('--n_class', type=int, default=10, metavar='N', help='num of classes')
parser.add_argument('--T', type=float, default=20.0, metavar='Temputure', help='Temputure for distillation')
parser.add_argument('--batch_size', type=int, default=128, metavar='N', help='input batch size for training')
parser.add_argument('--test_batch_size', type=int, default=128, metavar='N', help='input test batch size for training')
parser.add_argument('--epochs', type=int, default=20, metavar='N', help='number of epochs to train (default: 20)')
parser.add_argument('--lr', type=float, default=0.1, metavar='LR', help='learning rate (default: 0.01)')
parser.add_argument('--momentum', type=float, default=0.9, metavar='M', help='SGD momentum (default: 0.5)')
parser.add_argument('--device', default='cuda:1', type=str, help='device: cuda or cpu')
parser.add_argument('--print_freq', type=int, default=10, metavar='N', help='how many batches to wait before logging training status')

config = ['--dataset', 'CIFAR100', '--epochs', '200', '--n_class', '100', '--teacher', 'ResNet110', '--student', 'ResNet8', '--T', '5.0', '--device', 'cuda:0']
args = parser.parse_args(config)

device = args.device if torch.cuda.is_available() else 'cpu'
load_dir = './checkpoint/' + args.dataset + '/'

teacher_model = getattr(models, args.teacher)(args.n_class)
teacher_model.load_state_dict(torch.load(load_dir + args.teacher + '.pth'))
teacher_model.to(device)

st_model = getattr(models, args.student)(args.n_class)  # args.student()
st_model.to(device)

# logging
logfile = load_dir + 'KD_' + st_model.model_name + '.log'
if os.path.exists(logfile):
    os.remove(logfile)
def log_out(info):
    f = open(logfile, mode='a')
    f.write(info)
    f.write('\n')
    f.close()
    print(info)
    
# visualizer
vis = Visdom(env='distill')
loss_win = vis.line(
    X=np.array([0]),
    Y=np.array([0]),
    opts=dict(
        title=args.student + ' KD Loss',
        xtickmin=0,
#         xtickmax=1,
#         xtickstep=5,
        ytickmin=0,
#         ytickmax=10,
#         ytickstep=5,
#         markers=True,
#         markersymbol='dot',
#         markersize=5,
    ),
    name="loss"
)
    
acc_win = vis.line(
    X=np.column_stack((0, 0)),
    Y=np.column_stack((0, 0)),
    opts=dict(
        title=args.student + ' KD ACC',
        xtickmin=0,
#         xtickstep=5,
        ytickmin=0,
        ytickmax=100,
#         markers=True,
#         markersymbol='dot',
#         markersize=5,
        legend=['train_acc', 'test_acc']
    ),
    name="acc"
)


# data
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
train_transform = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomCrop(32, 4),
    transforms.ToTensor(),
    normalize,
])
test_transform = transforms.Compose([transforms.ToTensor(), normalize])
if args.dataset == 'tinyimagenet':
    train_set = dataset.TinyImageNet(root='../data/tiny-imagenet-200', transform=train_transform)
    test_set = dataset.TinyImageNet(root='../data/tiny-imagenet-200', transform=test_transform)
else:
    train_set = getattr(datasets, args.dataset)(root='../data', train=True, download=True, transform=train_transform)
    test_set = getattr(datasets, args.dataset)(root='../data', train=False, download=False, transform=test_transform)
train_loader = DataLoader(train_set, batch_size=args.batch_size, shuffle=True)
test_loader = DataLoader(test_set, batch_size=args.test_batch_size, shuffle=False)

# optimizer = optim.SGD(st_model.parameters(), lr=args.lr, momentum=args.momentum)
optimizer_sgd = optim.SGD(st_model.parameters(), lr=args.lr, momentum=0.9, weight_decay=5e-4)
lr_scheduler = optim.lr_scheduler.MultiStepLR(optimizer_sgd, milestones=[100, 150])

def distillation(y, labels, teacher_scores, T, alpha):
    return nn.KLDivLoss()(F.log_softmax(y/T), F.softmax(teacher_scores/T)) * (T*T * 2.0 * alpha) + F.cross_entropy(y, labels) * (1. - alpha)


def train(epoch, model, loss_fn):
    print('Training:')
    # switch to train mode
    model.train()
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()

    end = time.time()
    for i, (input, target) in enumerate(train_loader):

        # measure data loading time
        data_time.update(time.time() - end)

        input, target = input.to(device), target.to(device)
        optimizer_sgd.zero_grad()
        # compute outputs
        _,_,_,_, output = model(input)
        with torch.no_grad():
            _,_,_,_, t_output = teacher_model(input)

#         print(output.size(), target.size(), teacher_output.size())

        # compute gradient and do SGD step
        loss = loss_fn(output, target, t_output, T=args.T, alpha=0.7)

        loss.backward()
        optimizer_sgd.step()

        output = output.float()
        loss = loss.float()
        # measure accuracy and record loss
        train_acc = accuracy(output.data, target.data)[0]
        losses.update(loss.item(), input.size(0))
        top1.update(train_acc, input.size(0))

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if i % args.print_freq == 0:
            log_out('[{0}/{1}]\t'
                  'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                  'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                  'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                  'Prec@1 {top1.val:.3f} ({top1.avg:.3f})'.format(
                      i, len(train_loader), batch_time=batch_time,
                      data_time=data_time, loss=losses, top1=top1))
    return losses.avg, train_acc.cpu().numpy()


def test(model):
    print('Testing:')
    # switch to evaluate mode
    model.eval()
    batch_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()

    end = time.time()
    with torch.no_grad():
        for i, (input, target) in enumerate(test_loader):
            input, target = input.to(device), target.to(device)

            # compute output
            _,_,_,_, output = model(input)
            loss = F.cross_entropy(output, target)

            output = output.float()
            loss = loss.float()

            # measure accuracy and record loss
            test_acc = accuracy(output.data, target.data)[0]
            losses.update(loss.item(), input.size(0))
            top1.update(test_acc, input.size(0))

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

            if i % args.print_freq == 0:
                log_out('Test: [{0}/{1}]\t'
                      'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                      'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                      'Prec@1 {top1.val:.3f} ({top1.avg:.3f})'.format(
                          i, len(test_loader), batch_time=batch_time, loss=losses,
                          top1=top1))

    log_out(' * Prec@1 {top1.avg:.3f}'.format(top1=top1))

    return losses.avg, test_acc.cpu().numpy(), top1.avg.cpu().numpy()


print('StudentNet:\n')
print(st_model)
best_acc = 0
for epoch in range(1, args.epochs + 1):
    log_out("\n===> epoch: {}/{}".format(epoch, args.epochs))
    log_out('current lr {:.5e}'.format(optimizer_sgd.param_groups[0]['lr']))
    lr_scheduler.step()
    train_loss, train_acc = train(epoch, st_model, loss_fn=distillation)
    # visaulize loss
    vis.line(np.array([train_loss]), np.array([epoch]), loss_win, update="append")
    _, test_acc, top1 = test(st_model)
    best_acc = max(top1, best_acc)
    vis.line(np.column_stack((train_acc, top1)), np.column_stack((epoch, epoch)), acc_win, update="append")

# torch.save(st_model.state_dict(), load_dir + args.teacher + '_distill_' + args.student + '.pth')
# release GPU memory
torch.cuda.empty_cache()

log_out("@ BEST ACC = {:.4f}%".format(best_acc))
log_out("--- {:.3f} mins ---".format((time.time() - start_time)/60))


  init.kaiming_normal(m.weight)
  init.kaiming_normal(m.weight)


Files already downloaded and verified
StudentNet:

ResNet(
  (conv1): Conv2d(3, 8, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (bn1): BatchNorm2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(8, 8, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(8, 8, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (shortcut): Sequential()
    )
    (1): BasicBlock(
      (conv1): Conv2d(8, 8, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(8, 8, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2



[0/391]	Time 1.085 (1.085)	Data 0.071 (0.071)	Loss 1.7962 (1.7962)	Prec@1 0.781 (0.781)
[10/391]	Time 0.086 (0.186)	Data 0.043 (0.047)	Loss 1.6153 (1.7022)	Prec@1 0.781 (0.781)
[20/391]	Time 0.066 (0.135)	Data 0.020 (0.040)	Loss 1.6127 (1.6640)	Prec@1 0.781 (1.042)
[30/391]	Time 0.076 (0.121)	Data 0.034 (0.039)	Loss 1.5903 (1.6418)	Prec@1 1.562 (1.134)
[40/391]	Time 0.079 (0.112)	Data 0.034 (0.038)	Loss 1.5770 (1.6285)	Prec@1 0.781 (1.086)
[50/391]	Time 0.101 (0.107)	Data 0.053 (0.039)	Loss 1.5876 (1.6184)	Prec@1 0.781 (1.195)
[60/391]	Time 0.073 (0.103)	Data 0.029 (0.038)	Loss 1.5613 (1.6117)	Prec@1 0.781 (1.242)
[70/391]	Time 0.070 (0.100)	Data 0.024 (0.037)	Loss 1.5909 (1.6068)	Prec@1 1.562 (1.309)
[80/391]	Time 0.094 (0.099)	Data 0.051 (0.038)	Loss 1.5648 (1.6026)	Prec@1 1.562 (1.360)
[90/391]	Time 0.101 (0.098)	Data 0.037 (0.038)	Loss 1.5920 (1.5998)	Prec@1 0.781 (1.339)
[100/391]	Time 0.082 (0.096)	Data 0.035 (0.038)	Loss 1.5798 (1.5967)	Prec@1 1.562 (1.377)
[110/391]	Time 0.069 

Test: [50/79]	Time 0.037 (0.036)	Loss 3.7433 (3.6788)	Prec@1 10.156 (11.535)
Test: [60/79]	Time 0.032 (0.036)	Loss 3.9528 (3.6883)	Prec@1 8.594 (11.399)
Test: [70/79]	Time 0.036 (0.036)	Loss 3.5814 (3.6869)	Prec@1 12.500 (11.202)
 * Prec@1 11.230

===> epoch: 3/200
current lr 1.00000e-01
Training:
[0/391]	Time 0.092 (0.092)	Data 0.046 (0.046)	Loss 1.2542 (1.2542)	Prec@1 14.062 (14.062)
[10/391]	Time 0.084 (0.083)	Data 0.043 (0.037)	Loss 1.2873 (1.2636)	Prec@1 11.719 (12.642)
[20/391]	Time 0.075 (0.082)	Data 0.032 (0.035)	Loss 1.2615 (1.2599)	Prec@1 12.500 (12.760)
[30/391]	Time 0.086 (0.083)	Data 0.035 (0.035)	Loss 1.2347 (1.2566)	Prec@1 15.625 (12.853)
[40/391]	Time 0.119 (0.085)	Data 0.038 (0.036)	Loss 1.2820 (1.2545)	Prec@1 11.719 (12.919)
[50/391]	Time 0.073 (0.085)	Data 0.027 (0.036)	Loss 1.2528 (1.2542)	Prec@1 14.062 (12.791)
[60/391]	Time 0.080 (0.086)	Data 0.032 (0.036)	Loss 1.2418 (1.2517)	Prec@1 10.156 (12.666)
[70/391]	Time 0.102 (0.087)	Data 0.057 (0.037)	Loss 1.3338 (1.249

[390/391]	Time 0.063 (0.080)	Data 0.022 (0.035)	Loss 1.0946 (1.1350)	Prec@1 22.500 (18.230)
Testing:
Test: [0/79]	Time 0.029 (0.029)	Loss 3.5241 (3.5241)	Prec@1 17.969 (17.969)
Test: [10/79]	Time 0.036 (0.031)	Loss 3.2615 (3.4222)	Prec@1 19.531 (17.045)
Test: [20/79]	Time 0.029 (0.033)	Loss 3.5996 (3.4496)	Prec@1 18.750 (16.629)
Test: [30/79]	Time 0.027 (0.034)	Loss 3.3599 (3.4507)	Prec@1 17.188 (16.532)
Test: [40/79]	Time 0.032 (0.034)	Loss 3.0471 (3.4407)	Prec@1 21.094 (16.845)
Test: [50/79]	Time 0.036 (0.034)	Loss 3.5768 (3.4345)	Prec@1 15.625 (17.034)
Test: [60/79]	Time 0.030 (0.034)	Loss 3.7099 (3.4499)	Prec@1 11.719 (16.790)
Test: [70/79]	Time 0.031 (0.034)	Loss 3.5441 (3.4570)	Prec@1 20.312 (16.714)
 * Prec@1 16.720

===> epoch: 5/200
current lr 1.00000e-01
Training:
[0/391]	Time 0.095 (0.095)	Data 0.041 (0.041)	Loss 1.1298 (1.1298)	Prec@1 20.312 (20.312)
[10/391]	Time 0.089 (0.090)	Data 0.038 (0.040)	Loss 1.0650 (1.1005)	Prec@1 23.438 (18.608)
[20/391]	Time 0.131 (0.086)	Data 0

[340/391]	Time 0.118 (0.086)	Data 0.076 (0.038)	Loss 0.9507 (1.0155)	Prec@1 25.000 (24.759)
[350/391]	Time 0.082 (0.086)	Data 0.034 (0.039)	Loss 0.9890 (1.0149)	Prec@1 28.125 (24.755)
[360/391]	Time 0.084 (0.087)	Data 0.038 (0.039)	Loss 0.9724 (1.0139)	Prec@1 27.344 (24.818)
[370/391]	Time 0.081 (0.086)	Data 0.032 (0.039)	Loss 0.9791 (1.0135)	Prec@1 26.562 (24.829)
[380/391]	Time 0.085 (0.086)	Data 0.046 (0.039)	Loss 0.9286 (1.0129)	Prec@1 30.469 (24.863)
[390/391]	Time 0.059 (0.086)	Data 0.021 (0.039)	Loss 0.9098 (1.0122)	Prec@1 38.750 (24.944)
Testing:
Test: [0/79]	Time 0.042 (0.042)	Loss 3.5247 (3.5247)	Prec@1 23.438 (23.438)
Test: [10/79]	Time 0.034 (0.030)	Loss 3.5012 (3.5426)	Prec@1 18.750 (19.531)
Test: [20/79]	Time 0.030 (0.031)	Loss 3.6688 (3.5339)	Prec@1 17.969 (19.568)
Test: [30/79]	Time 0.029 (0.032)	Loss 3.5882 (3.5195)	Prec@1 25.781 (19.808)
Test: [40/79]	Time 0.057 (0.034)	Loss 3.2428 (3.5056)	Prec@1 22.656 (20.008)
Test: [50/79]	Time 0.038 (0.035)	Loss 3.5431 (3.4930)	P

[290/391]	Time 0.079 (0.103)	Data 0.036 (0.035)	Loss 0.8938 (0.9311)	Prec@1 29.688 (29.755)
[300/391]	Time 0.136 (0.103)	Data 0.036 (0.035)	Loss 0.8769 (0.9309)	Prec@1 33.594 (29.654)
[310/391]	Time 0.126 (0.104)	Data 0.055 (0.036)	Loss 0.9421 (0.9309)	Prec@1 30.469 (29.640)
[320/391]	Time 0.124 (0.103)	Data 0.034 (0.036)	Loss 0.9099 (0.9307)	Prec@1 27.344 (29.602)
[330/391]	Time 0.129 (0.104)	Data 0.038 (0.036)	Loss 0.9164 (0.9303)	Prec@1 28.125 (29.638)
[340/391]	Time 0.129 (0.105)	Data 0.041 (0.036)	Loss 0.8814 (0.9302)	Prec@1 33.594 (29.658)
[350/391]	Time 0.112 (0.105)	Data 0.029 (0.036)	Loss 0.9798 (0.9298)	Prec@1 28.125 (29.618)
[360/391]	Time 0.122 (0.106)	Data 0.036 (0.036)	Loss 1.0051 (0.9307)	Prec@1 24.219 (29.540)
[370/391]	Time 0.161 (0.106)	Data 0.045 (0.036)	Loss 0.9011 (0.9305)	Prec@1 33.594 (29.567)
[380/391]	Time 0.121 (0.106)	Data 0.030 (0.036)	Loss 0.9606 (0.9304)	Prec@1 25.000 (29.591)
[390/391]	Time 0.093 (0.106)	Data 0.020 (0.036)	Loss 0.9449 (0.9294)	Prec@1 30.0

[240/391]	Time 0.093 (0.113)	Data 0.038 (0.038)	Loss 0.9055 (0.8810)	Prec@1 28.125 (32.624)
[250/391]	Time 0.080 (0.112)	Data 0.031 (0.037)	Loss 0.8779 (0.8809)	Prec@1 37.500 (32.651)
[260/391]	Time 0.095 (0.112)	Data 0.035 (0.037)	Loss 0.9193 (0.8822)	Prec@1 30.469 (32.558)
[270/391]	Time 0.101 (0.113)	Data 0.041 (0.038)	Loss 0.8683 (0.8812)	Prec@1 39.062 (32.648)
[280/391]	Time 0.086 (0.112)	Data 0.035 (0.038)	Loss 0.8931 (0.8812)	Prec@1 29.688 (32.598)
[290/391]	Time 0.122 (0.112)	Data 0.059 (0.038)	Loss 0.8377 (0.8806)	Prec@1 33.594 (32.614)
[300/391]	Time 0.120 (0.113)	Data 0.032 (0.038)	Loss 0.8848 (0.8805)	Prec@1 28.906 (32.633)
[310/391]	Time 0.141 (0.113)	Data 0.053 (0.038)	Loss 0.9207 (0.8803)	Prec@1 27.344 (32.634)
[320/391]	Time 0.089 (0.113)	Data 0.039 (0.038)	Loss 0.8720 (0.8803)	Prec@1 31.250 (32.645)
[330/391]	Time 0.157 (0.113)	Data 0.068 (0.038)	Loss 0.8294 (0.8804)	Prec@1 34.375 (32.640)
[340/391]	Time 0.140 (0.113)	Data 0.050 (0.038)	Loss 0.9285 (0.8801)	Prec@1 32.0

[190/391]	Time 0.126 (0.111)	Data 0.042 (0.036)	Loss 0.8257 (0.8566)	Prec@1 39.062 (34.146)
[200/391]	Time 0.088 (0.111)	Data 0.032 (0.036)	Loss 0.8358 (0.8560)	Prec@1 38.281 (34.173)
[210/391]	Time 0.123 (0.112)	Data 0.034 (0.036)	Loss 0.9243 (0.8550)	Prec@1 23.438 (34.205)
[220/391]	Time 0.097 (0.113)	Data 0.035 (0.036)	Loss 0.8038 (0.8538)	Prec@1 39.844 (34.375)
[230/391]	Time 0.099 (0.113)	Data 0.028 (0.036)	Loss 0.8705 (0.8535)	Prec@1 33.594 (34.480)
[240/391]	Time 0.132 (0.113)	Data 0.040 (0.036)	Loss 0.8552 (0.8534)	Prec@1 28.906 (34.524)
[250/391]	Time 0.128 (0.113)	Data 0.054 (0.036)	Loss 0.8365 (0.8526)	Prec@1 34.375 (34.556)
[260/391]	Time 0.114 (0.113)	Data 0.027 (0.036)	Loss 0.9131 (0.8529)	Prec@1 38.281 (34.561)
[270/391]	Time 0.088 (0.113)	Data 0.036 (0.036)	Loss 0.8908 (0.8538)	Prec@1 38.281 (34.513)
[280/391]	Time 0.141 (0.114)	Data 0.051 (0.036)	Loss 0.8475 (0.8536)	Prec@1 35.156 (34.514)
[290/391]	Time 0.131 (0.113)	Data 0.045 (0.036)	Loss 0.8266 (0.8531)	Prec@1 39.0

[140/391]	Time 0.120 (0.115)	Data 0.034 (0.038)	Loss 0.8700 (0.8287)	Prec@1 35.938 (35.688)
[150/391]	Time 0.127 (0.116)	Data 0.037 (0.038)	Loss 0.7947 (0.8287)	Prec@1 36.719 (35.710)
[160/391]	Time 0.117 (0.116)	Data 0.029 (0.038)	Loss 0.8560 (0.8278)	Prec@1 36.719 (35.923)
[170/391]	Time 0.119 (0.116)	Data 0.032 (0.038)	Loss 0.7832 (0.8278)	Prec@1 39.062 (35.956)
[180/391]	Time 0.125 (0.116)	Data 0.034 (0.038)	Loss 0.8442 (0.8283)	Prec@1 34.375 (35.942)
[190/391]	Time 0.088 (0.116)	Data 0.033 (0.038)	Loss 0.7851 (0.8281)	Prec@1 42.188 (35.974)
[200/391]	Time 0.122 (0.116)	Data 0.030 (0.038)	Loss 0.8321 (0.8286)	Prec@1 32.812 (35.941)
[210/391]	Time 0.086 (0.116)	Data 0.033 (0.037)	Loss 0.8537 (0.8285)	Prec@1 35.156 (35.993)
[220/391]	Time 0.121 (0.116)	Data 0.033 (0.037)	Loss 0.8566 (0.8290)	Prec@1 34.375 (35.980)
[230/391]	Time 0.115 (0.116)	Data 0.031 (0.037)	Loss 0.8065 (0.8282)	Prec@1 37.500 (36.025)
[240/391]	Time 0.080 (0.116)	Data 0.028 (0.037)	Loss 0.8618 (0.8284)	Prec@1 35.9

[90/391]	Time 0.118 (0.117)	Data 0.037 (0.037)	Loss 0.8108 (0.8115)	Prec@1 34.375 (37.105)
[100/391]	Time 0.089 (0.116)	Data 0.037 (0.037)	Loss 0.7901 (0.8122)	Prec@1 36.719 (37.059)
[110/391]	Time 0.128 (0.116)	Data 0.040 (0.037)	Loss 0.7852 (0.8131)	Prec@1 42.188 (37.155)
[120/391]	Time 0.128 (0.116)	Data 0.034 (0.037)	Loss 0.8110 (0.8141)	Prec@1 33.594 (37.132)
[130/391]	Time 0.125 (0.117)	Data 0.034 (0.037)	Loss 0.7800 (0.8140)	Prec@1 36.719 (37.100)
[140/391]	Time 0.124 (0.116)	Data 0.033 (0.037)	Loss 0.7612 (0.8135)	Prec@1 41.406 (37.079)
[150/391]	Time 0.118 (0.116)	Data 0.029 (0.037)	Loss 0.8103 (0.8138)	Prec@1 36.719 (36.977)
[160/391]	Time 0.109 (0.116)	Data 0.026 (0.037)	Loss 0.7635 (0.8147)	Prec@1 41.406 (36.893)
[170/391]	Time 0.125 (0.116)	Data 0.033 (0.037)	Loss 0.8352 (0.8162)	Prec@1 35.938 (36.769)
[180/391]	Time 0.079 (0.116)	Data 0.032 (0.037)	Loss 0.8038 (0.8158)	Prec@1 34.375 (36.745)
[190/391]	Time 0.096 (0.116)	Data 0.032 (0.037)	Loss 0.9065 (0.8172)	Prec@1 30.46

[40/391]	Time 0.088 (0.115)	Data 0.032 (0.037)	Loss 0.7830 (0.7948)	Prec@1 36.719 (37.995)
[50/391]	Time 0.080 (0.115)	Data 0.036 (0.038)	Loss 0.7744 (0.7945)	Prec@1 42.188 (37.914)
[60/391]	Time 0.103 (0.115)	Data 0.027 (0.038)	Loss 0.7318 (0.7943)	Prec@1 43.750 (37.987)
[70/391]	Time 0.096 (0.115)	Data 0.032 (0.038)	Loss 0.8059 (0.7997)	Prec@1 39.062 (37.720)
[80/391]	Time 0.114 (0.117)	Data 0.034 (0.038)	Loss 0.7436 (0.7988)	Prec@1 42.969 (37.760)
[90/391]	Time 0.095 (0.117)	Data 0.032 (0.037)	Loss 0.8775 (0.8025)	Prec@1 37.500 (37.680)
[100/391]	Time 0.117 (0.116)	Data 0.039 (0.037)	Loss 0.8520 (0.8036)	Prec@1 34.375 (37.655)
[110/391]	Time 0.123 (0.114)	Data 0.034 (0.037)	Loss 0.7549 (0.8029)	Prec@1 44.531 (37.690)
[120/391]	Time 0.084 (0.114)	Data 0.038 (0.037)	Loss 0.7380 (0.8024)	Prec@1 41.406 (37.816)
[130/391]	Time 0.128 (0.115)	Data 0.051 (0.038)	Loss 0.7723 (0.8031)	Prec@1 39.844 (37.691)
[140/391]	Time 0.107 (0.114)	Data 0.040 (0.037)	Loss 0.7600 (0.8024)	Prec@1 39.844 (37

Test: [70/79]	Time 0.031 (0.041)	Loss 2.8492 (2.8789)	Prec@1 34.375 (28.785)
 * Prec@1 28.790

===> epoch: 20/200
current lr 1.00000e-01
Training:
[0/391]	Time 0.124 (0.124)	Data 0.035 (0.035)	Loss 0.7648 (0.7648)	Prec@1 39.062 (39.062)
[10/391]	Time 0.144 (0.126)	Data 0.061 (0.043)	Loss 0.7762 (0.7733)	Prec@1 37.500 (38.920)
[20/391]	Time 0.075 (0.117)	Data 0.020 (0.037)	Loss 0.7929 (0.7792)	Prec@1 39.062 (38.914)
[30/391]	Time 0.099 (0.121)	Data 0.025 (0.044)	Loss 0.8188 (0.7794)	Prec@1 30.469 (38.609)
[40/391]	Time 0.135 (0.118)	Data 0.060 (0.041)	Loss 0.8919 (0.7906)	Prec@1 32.812 (38.053)
[50/391]	Time 0.122 (0.116)	Data 0.032 (0.041)	Loss 0.8139 (0.7887)	Prec@1 37.500 (38.189)
[60/391]	Time 0.119 (0.116)	Data 0.034 (0.040)	Loss 0.8099 (0.7850)	Prec@1 35.156 (38.358)
[70/391]	Time 0.128 (0.117)	Data 0.027 (0.041)	Loss 0.7379 (0.7872)	Prec@1 43.750 (38.281)
[80/391]	Time 0.128 (0.117)	Data 0.036 (0.039)	Loss 0.8144 (0.7882)	Prec@1 38.281 (38.378)
[90/391]	Time 0.126 (0.116)	Data 0.

Test: [10/79]	Time 0.055 (0.037)	Loss 3.1252 (3.3084)	Prec@1 28.906 (26.562)
Test: [20/79]	Time 0.033 (0.034)	Loss 3.4209 (3.3161)	Prec@1 25.781 (26.935)
Test: [30/79]	Time 0.028 (0.032)	Loss 3.1601 (3.3352)	Prec@1 26.562 (26.436)
Test: [40/79]	Time 0.020 (0.032)	Loss 3.2212 (3.3528)	Prec@1 25.781 (25.915)
Test: [50/79]	Time 0.056 (0.033)	Loss 3.3252 (3.3544)	Prec@1 25.781 (25.996)
Test: [60/79]	Time 0.031 (0.034)	Loss 4.0685 (3.3707)	Prec@1 21.875 (26.050)
Test: [70/79]	Time 0.038 (0.034)	Loss 3.1241 (3.3687)	Prec@1 30.469 (25.990)
 * Prec@1 25.990

===> epoch: 22/200
current lr 1.00000e-01
Training:
[0/391]	Time 0.100 (0.100)	Data 0.040 (0.040)	Loss 0.7917 (0.7917)	Prec@1 34.375 (34.375)
[10/391]	Time 0.131 (0.129)	Data 0.039 (0.043)	Loss 0.7901 (0.7755)	Prec@1 37.500 (39.915)
[20/391]	Time 0.087 (0.121)	Data 0.030 (0.040)	Loss 0.7308 (0.7758)	Prec@1 48.438 (39.993)
[30/391]	Time 0.125 (0.121)	Data 0.033 (0.039)	Loss 0.7589 (0.7685)	Prec@1 38.281 (40.247)
[40/391]	Time 0.096 (0.119)	

[360/391]	Time 0.132 (0.119)	Data 0.040 (0.039)	Loss 0.7796 (0.7733)	Prec@1 35.156 (39.723)
[370/391]	Time 0.136 (0.119)	Data 0.054 (0.039)	Loss 0.7705 (0.7735)	Prec@1 43.750 (39.701)
[380/391]	Time 0.162 (0.119)	Data 0.061 (0.039)	Loss 0.7667 (0.7736)	Prec@1 38.281 (39.647)
[390/391]	Time 0.122 (0.119)	Data 0.039 (0.039)	Loss 0.8739 (0.7741)	Prec@1 27.500 (39.580)
Testing:
Test: [0/79]	Time 0.033 (0.033)	Loss 2.9192 (2.9192)	Prec@1 31.250 (31.250)
Test: [10/79]	Time 0.035 (0.044)	Loss 2.9440 (2.9802)	Prec@1 25.000 (29.474)
Test: [20/79]	Time 0.035 (0.041)	Loss 3.1753 (2.9733)	Prec@1 21.875 (29.055)
Test: [30/79]	Time 0.049 (0.040)	Loss 3.1799 (2.9652)	Prec@1 23.438 (28.352)
Test: [40/79]	Time 0.042 (0.039)	Loss 2.7526 (2.9607)	Prec@1 29.688 (27.934)
Test: [50/79]	Time 0.033 (0.037)	Loss 3.0759 (2.9498)	Prec@1 30.469 (27.803)
Test: [60/79]	Time 0.027 (0.036)	Loss 2.9452 (2.9570)	Prec@1 31.250 (27.882)
Test: [70/79]	Time 0.025 (0.035)	Loss 2.7265 (2.9602)	Prec@1 34.375 (28.169)
 * Prec@

[310/391]	Time 0.101 (0.116)	Data 0.039 (0.038)	Loss 0.8033 (0.7656)	Prec@1 40.625 (40.042)
[320/391]	Time 0.121 (0.116)	Data 0.037 (0.039)	Loss 0.8176 (0.7650)	Prec@1 37.500 (40.080)
[330/391]	Time 0.068 (0.116)	Data 0.036 (0.039)	Loss 0.7513 (0.7646)	Prec@1 39.062 (40.075)
[340/391]	Time 0.085 (0.116)	Data 0.027 (0.039)	Loss 0.7744 (0.7648)	Prec@1 42.188 (40.013)
[350/391]	Time 0.089 (0.115)	Data 0.035 (0.039)	Loss 0.7976 (0.7653)	Prec@1 33.594 (39.942)
[360/391]	Time 0.126 (0.115)	Data 0.034 (0.039)	Loss 0.7868 (0.7658)	Prec@1 39.844 (39.919)
[370/391]	Time 0.114 (0.116)	Data 0.027 (0.039)	Loss 0.7928 (0.7663)	Prec@1 41.406 (39.913)
[380/391]	Time 0.076 (0.116)	Data 0.030 (0.039)	Loss 0.7757 (0.7662)	Prec@1 39.062 (39.905)
[390/391]	Time 0.124 (0.116)	Data 0.036 (0.039)	Loss 0.8476 (0.7662)	Prec@1 25.000 (39.936)
Testing:
Test: [0/79]	Time 0.034 (0.034)	Loss 2.9373 (2.9373)	Prec@1 37.500 (37.500)
Test: [10/79]	Time 0.040 (0.039)	Loss 2.7492 (2.8482)	Prec@1 35.938 (33.026)
Test: [20/

[260/391]	Time 0.114 (0.119)	Data 0.027 (0.039)	Loss 0.7985 (0.7599)	Prec@1 43.750 (40.547)
[270/391]	Time 0.147 (0.119)	Data 0.066 (0.039)	Loss 0.7282 (0.7594)	Prec@1 46.094 (40.579)
[280/391]	Time 0.124 (0.119)	Data 0.036 (0.039)	Loss 0.6800 (0.7593)	Prec@1 46.094 (40.631)
[290/391]	Time 0.093 (0.119)	Data 0.034 (0.039)	Loss 0.7673 (0.7586)	Prec@1 36.719 (40.700)
[300/391]	Time 0.124 (0.119)	Data 0.034 (0.039)	Loss 0.6962 (0.7582)	Prec@1 48.438 (40.778)
[310/391]	Time 0.127 (0.119)	Data 0.033 (0.039)	Loss 0.7331 (0.7577)	Prec@1 41.406 (40.781)
[320/391]	Time 0.091 (0.119)	Data 0.032 (0.039)	Loss 0.7059 (0.7578)	Prec@1 46.875 (40.764)
[330/391]	Time 0.085 (0.119)	Data 0.028 (0.038)	Loss 0.7515 (0.7575)	Prec@1 39.062 (40.757)
[340/391]	Time 0.130 (0.119)	Data 0.033 (0.038)	Loss 0.7876 (0.7575)	Prec@1 38.281 (40.790)
[350/391]	Time 0.122 (0.119)	Data 0.035 (0.038)	Loss 0.7698 (0.7574)	Prec@1 39.844 (40.814)
[360/391]	Time 0.070 (0.119)	Data 0.028 (0.038)	Loss 0.7660 (0.7575)	Prec@1 41.4

[210/391]	Time 0.122 (0.118)	Data 0.038 (0.040)	Loss 0.8135 (0.7532)	Prec@1 44.531 (41.099)
[220/391]	Time 0.108 (0.118)	Data 0.029 (0.040)	Loss 0.6678 (0.7523)	Prec@1 46.094 (41.148)
[230/391]	Time 0.129 (0.118)	Data 0.041 (0.040)	Loss 0.7245 (0.7526)	Prec@1 41.406 (41.153)
[240/391]	Time 0.126 (0.119)	Data 0.037 (0.040)	Loss 0.7968 (0.7530)	Prec@1 35.938 (41.127)
[250/391]	Time 0.117 (0.119)	Data 0.032 (0.040)	Loss 0.7683 (0.7540)	Prec@1 40.625 (41.055)
[260/391]	Time 0.111 (0.119)	Data 0.023 (0.040)	Loss 0.7097 (0.7539)	Prec@1 44.531 (41.041)
[270/391]	Time 0.125 (0.119)	Data 0.032 (0.039)	Loss 0.6630 (0.7536)	Prec@1 46.875 (41.031)
[280/391]	Time 0.126 (0.119)	Data 0.036 (0.039)	Loss 0.7658 (0.7538)	Prec@1 42.188 (41.028)
[290/391]	Time 0.142 (0.119)	Data 0.050 (0.040)	Loss 0.7370 (0.7533)	Prec@1 40.625 (41.041)
[300/391]	Time 0.155 (0.120)	Data 0.046 (0.040)	Loss 0.7284 (0.7528)	Prec@1 43.750 (41.027)
[310/391]	Time 0.077 (0.119)	Data 0.037 (0.039)	Loss 0.7417 (0.7531)	Prec@1 43.7

[160/391]	Time 0.130 (0.122)	Data 0.040 (0.040)	Loss 0.7069 (0.7472)	Prec@1 39.844 (41.367)
[170/391]	Time 0.122 (0.123)	Data 0.036 (0.040)	Loss 0.7324 (0.7478)	Prec@1 42.188 (41.379)
[180/391]	Time 0.107 (0.122)	Data 0.036 (0.040)	Loss 0.7388 (0.7479)	Prec@1 39.844 (41.311)
[190/391]	Time 0.092 (0.122)	Data 0.035 (0.040)	Loss 0.7558 (0.7481)	Prec@1 39.844 (41.300)
[200/391]	Time 0.124 (0.122)	Data 0.035 (0.040)	Loss 0.7603 (0.7482)	Prec@1 36.719 (41.290)
[210/391]	Time 0.074 (0.121)	Data 0.027 (0.040)	Loss 0.8428 (0.7488)	Prec@1 35.156 (41.266)
[220/391]	Time 0.088 (0.121)	Data 0.034 (0.040)	Loss 0.7842 (0.7493)	Prec@1 36.719 (41.258)
[230/391]	Time 0.129 (0.122)	Data 0.057 (0.040)	Loss 0.7037 (0.7485)	Prec@1 44.531 (41.301)
[240/391]	Time 0.146 (0.121)	Data 0.041 (0.040)	Loss 0.7247 (0.7486)	Prec@1 46.875 (41.225)
[250/391]	Time 0.113 (0.121)	Data 0.028 (0.040)	Loss 0.7538 (0.7480)	Prec@1 35.938 (41.232)
[260/391]	Time 0.107 (0.121)	Data 0.047 (0.040)	Loss 0.7629 (0.7482)	Prec@1 43.7

[110/391]	Time 0.137 (0.123)	Data 0.055 (0.040)	Loss 0.7937 (0.7491)	Prec@1 37.500 (41.075)
[120/391]	Time 0.078 (0.122)	Data 0.033 (0.040)	Loss 0.6965 (0.7485)	Prec@1 42.969 (41.096)
[130/391]	Time 0.121 (0.120)	Data 0.065 (0.040)	Loss 0.7551 (0.7467)	Prec@1 42.188 (41.174)
[140/391]	Time 0.166 (0.119)	Data 0.059 (0.039)	Loss 0.6964 (0.7461)	Prec@1 42.969 (41.334)
[150/391]	Time 0.121 (0.119)	Data 0.032 (0.039)	Loss 0.7302 (0.7465)	Prec@1 47.656 (41.406)
[160/391]	Time 0.131 (0.119)	Data 0.037 (0.039)	Loss 0.7406 (0.7474)	Prec@1 40.625 (41.280)
[170/391]	Time 0.116 (0.119)	Data 0.048 (0.040)	Loss 0.7529 (0.7470)	Prec@1 36.719 (41.224)
[180/391]	Time 0.142 (0.120)	Data 0.050 (0.040)	Loss 0.7102 (0.7464)	Prec@1 44.531 (41.320)
[190/391]	Time 0.126 (0.120)	Data 0.033 (0.039)	Loss 0.7822 (0.7463)	Prec@1 42.188 (41.349)
[200/391]	Time 0.142 (0.120)	Data 0.051 (0.040)	Loss 0.7815 (0.7462)	Prec@1 40.625 (41.395)
[210/391]	Time 0.075 (0.120)	Data 0.029 (0.039)	Loss 0.7299 (0.7464)	Prec@1 43.7

[60/391]	Time 0.124 (0.118)	Data 0.037 (0.038)	Loss 0.7839 (0.7353)	Prec@1 39.062 (42.905)
[70/391]	Time 0.101 (0.117)	Data 0.032 (0.037)	Loss 0.8126 (0.7363)	Prec@1 35.938 (42.815)
[80/391]	Time 0.126 (0.117)	Data 0.038 (0.037)	Loss 0.6426 (0.7386)	Prec@1 52.344 (42.631)
[90/391]	Time 0.158 (0.118)	Data 0.030 (0.037)	Loss 0.6938 (0.7391)	Prec@1 44.531 (42.497)
[100/391]	Time 0.117 (0.119)	Data 0.024 (0.038)	Loss 0.6953 (0.7371)	Prec@1 46.094 (42.582)
[110/391]	Time 0.125 (0.120)	Data 0.032 (0.038)	Loss 0.7268 (0.7376)	Prec@1 43.750 (42.610)
[120/391]	Time 0.117 (0.120)	Data 0.033 (0.038)	Loss 0.7328 (0.7379)	Prec@1 41.406 (42.426)
[130/391]	Time 0.112 (0.120)	Data 0.029 (0.038)	Loss 0.7862 (0.7401)	Prec@1 38.281 (42.241)
[140/391]	Time 0.145 (0.119)	Data 0.058 (0.038)	Loss 0.7684 (0.7392)	Prec@1 43.750 (42.276)
[150/391]	Time 0.119 (0.120)	Data 0.033 (0.038)	Loss 0.7454 (0.7403)	Prec@1 42.188 (42.089)
[160/391]	Time 0.145 (0.120)	Data 0.059 (0.038)	Loss 0.7192 (0.7396)	Prec@1 41.406 (

[10/391]	Time 0.106 (0.115)	Data 0.036 (0.040)	Loss 0.7866 (0.7199)	Prec@1 39.062 (41.974)
[20/391]	Time 0.134 (0.120)	Data 0.029 (0.040)	Loss 0.6881 (0.7258)	Prec@1 45.312 (42.597)
[30/391]	Time 0.128 (0.124)	Data 0.038 (0.039)	Loss 0.7900 (0.7356)	Prec@1 39.844 (42.414)
[40/391]	Time 0.104 (0.121)	Data 0.034 (0.038)	Loss 0.7980 (0.7329)	Prec@1 39.844 (42.340)
[50/391]	Time 0.128 (0.119)	Data 0.062 (0.038)	Loss 0.7408 (0.7324)	Prec@1 39.062 (42.601)
[60/391]	Time 0.116 (0.117)	Data 0.023 (0.038)	Loss 0.7958 (0.7367)	Prec@1 43.750 (42.200)
[70/391]	Time 0.127 (0.118)	Data 0.034 (0.039)	Loss 0.7839 (0.7366)	Prec@1 41.406 (42.232)
[80/391]	Time 0.124 (0.118)	Data 0.046 (0.038)	Loss 0.7082 (0.7354)	Prec@1 40.625 (42.274)
[90/391]	Time 0.118 (0.117)	Data 0.031 (0.039)	Loss 0.7910 (0.7356)	Prec@1 40.625 (42.368)
[100/391]	Time 0.118 (0.116)	Data 0.028 (0.038)	Loss 0.7826 (0.7357)	Prec@1 39.844 (42.265)
[110/391]	Time 0.101 (0.115)	Data 0.052 (0.038)	Loss 0.6957 (0.7363)	Prec@1 43.750 (42.16

Test: [40/79]	Time 0.033 (0.036)	Loss 2.8329 (2.6750)	Prec@1 26.562 (32.965)
Test: [50/79]	Time 0.032 (0.035)	Loss 2.7706 (2.6696)	Prec@1 28.906 (32.874)
Test: [60/79]	Time 0.027 (0.035)	Loss 2.8957 (2.6788)	Prec@1 29.688 (32.544)
Test: [70/79]	Time 0.051 (0.036)	Loss 2.5019 (2.6727)	Prec@1 34.375 (32.658)
 * Prec@1 32.680

===> epoch: 39/200
current lr 1.00000e-01
Training:
[0/391]	Time 0.092 (0.092)	Data 0.042 (0.042)	Loss 0.7515 (0.7515)	Prec@1 39.844 (39.844)
[10/391]	Time 0.132 (0.113)	Data 0.030 (0.037)	Loss 0.7730 (0.7508)	Prec@1 38.281 (41.335)
[20/391]	Time 0.126 (0.113)	Data 0.034 (0.037)	Loss 0.7311 (0.7458)	Prec@1 41.406 (40.848)
[30/391]	Time 0.088 (0.111)	Data 0.033 (0.037)	Loss 0.6987 (0.7395)	Prec@1 45.312 (42.137)
[40/391]	Time 0.121 (0.115)	Data 0.034 (0.039)	Loss 0.7780 (0.7429)	Prec@1 40.625 (41.521)
[50/391]	Time 0.116 (0.115)	Data 0.027 (0.038)	Loss 0.7664 (0.7432)	Prec@1 40.625 (41.513)
[60/391]	Time 0.153 (0.118)	Data 0.070 (0.039)	Loss 0.7039 (0.7408)	Prec@1 47

[380/391]	Time 0.133 (0.118)	Data 0.041 (0.039)	Loss 0.6999 (0.7323)	Prec@1 40.625 (42.288)
[390/391]	Time 0.101 (0.117)	Data 0.025 (0.038)	Loss 0.8122 (0.7325)	Prec@1 38.750 (42.292)
Testing:
Test: [0/79]	Time 0.032 (0.032)	Loss 2.6830 (2.6830)	Prec@1 34.375 (34.375)
Test: [10/79]	Time 0.044 (0.031)	Loss 2.4848 (2.7206)	Prec@1 38.281 (32.884)
Test: [20/79]	Time 0.054 (0.035)	Loss 2.7362 (2.6882)	Prec@1 32.812 (33.854)
Test: [30/79]	Time 0.034 (0.038)	Loss 2.4941 (2.6692)	Prec@1 33.594 (33.367)
Test: [40/79]	Time 0.037 (0.038)	Loss 2.2924 (2.6701)	Prec@1 42.188 (33.689)
Test: [50/79]	Time 0.032 (0.038)	Loss 2.9388 (2.6693)	Prec@1 29.688 (33.931)
Test: [60/79]	Time 0.037 (0.038)	Loss 3.0637 (2.6871)	Prec@1 28.906 (33.645)
Test: [70/79]	Time 0.035 (0.038)	Loss 2.7282 (2.6825)	Prec@1 35.938 (33.858)
 * Prec@1 33.710

===> epoch: 41/200
current lr 1.00000e-01
Training:
[0/391]	Time 0.129 (0.129)	Data 0.071 (0.071)	Loss 0.6238 (0.6238)	Prec@1 50.781 (50.781)
[10/391]	Time 0.121 (0.129)	Data

[330/391]	Time 0.131 (0.119)	Data 0.035 (0.039)	Loss 0.6765 (0.7296)	Prec@1 44.531 (42.336)
[340/391]	Time 0.107 (0.119)	Data 0.032 (0.039)	Loss 0.6832 (0.7295)	Prec@1 48.438 (42.375)
[350/391]	Time 0.109 (0.119)	Data 0.039 (0.039)	Loss 0.7180 (0.7303)	Prec@1 42.188 (42.314)
[360/391]	Time 0.138 (0.119)	Data 0.035 (0.039)	Loss 0.7396 (0.7305)	Prec@1 44.531 (42.309)
[370/391]	Time 0.128 (0.119)	Data 0.034 (0.039)	Loss 0.7172 (0.7301)	Prec@1 42.969 (42.308)
[380/391]	Time 0.132 (0.119)	Data 0.032 (0.039)	Loss 0.7036 (0.7309)	Prec@1 50.781 (42.265)
[390/391]	Time 0.100 (0.119)	Data 0.021 (0.039)	Loss 0.7766 (0.7313)	Prec@1 37.500 (42.216)
Testing:
Test: [0/79]	Time 0.033 (0.033)	Loss 2.6920 (2.6920)	Prec@1 35.938 (35.938)
Test: [10/79]	Time 0.029 (0.029)	Loss 2.7397 (2.8017)	Prec@1 32.812 (32.670)
Test: [20/79]	Time 0.033 (0.029)	Loss 3.1366 (2.7952)	Prec@1 26.562 (32.292)
Test: [30/79]	Time 0.040 (0.032)	Loss 2.7964 (2.7947)	Prec@1 30.469 (31.905)
Test: [40/79]	Time 0.038 (0.033)	Loss 2.

[280/391]	Time 0.108 (0.117)	Data 0.036 (0.039)	Loss 0.7488 (0.7253)	Prec@1 39.062 (43.005)
[290/391]	Time 0.181 (0.117)	Data 0.083 (0.039)	Loss 0.7679 (0.7257)	Prec@1 43.750 (42.996)
[300/391]	Time 0.157 (0.117)	Data 0.067 (0.039)	Loss 0.6817 (0.7250)	Prec@1 50.000 (43.036)
[310/391]	Time 0.130 (0.117)	Data 0.037 (0.039)	Loss 0.6832 (0.7248)	Prec@1 50.781 (43.062)
[320/391]	Time 0.098 (0.117)	Data 0.031 (0.039)	Loss 0.6465 (0.7251)	Prec@1 46.094 (43.030)
[330/391]	Time 0.134 (0.117)	Data 0.027 (0.039)	Loss 0.7727 (0.7253)	Prec@1 37.500 (43.009)
[340/391]	Time 0.079 (0.117)	Data 0.035 (0.039)	Loss 0.7133 (0.7254)	Prec@1 41.406 (42.925)
[350/391]	Time 0.090 (0.117)	Data 0.041 (0.039)	Loss 0.7632 (0.7251)	Prec@1 41.406 (42.940)
[360/391]	Time 0.116 (0.117)	Data 0.027 (0.039)	Loss 0.7942 (0.7254)	Prec@1 43.750 (42.904)
[370/391]	Time 0.105 (0.117)	Data 0.046 (0.039)	Loss 0.7429 (0.7252)	Prec@1 49.219 (42.969)
[380/391]	Time 0.127 (0.117)	Data 0.031 (0.039)	Loss 0.7434 (0.7253)	Prec@1 44.5

[230/391]	Time 0.123 (0.119)	Data 0.033 (0.039)	Loss 0.7706 (0.7168)	Prec@1 44.531 (43.649)
[240/391]	Time 0.117 (0.119)	Data 0.030 (0.039)	Loss 0.7105 (0.7173)	Prec@1 49.219 (43.672)
[250/391]	Time 0.088 (0.119)	Data 0.031 (0.039)	Loss 0.6606 (0.7178)	Prec@1 47.656 (43.607)
[260/391]	Time 0.101 (0.118)	Data 0.037 (0.039)	Loss 0.7869 (0.7180)	Prec@1 42.188 (43.669)
[270/391]	Time 0.096 (0.117)	Data 0.036 (0.039)	Loss 0.7326 (0.7174)	Prec@1 39.062 (43.666)
[280/391]	Time 0.115 (0.117)	Data 0.032 (0.039)	Loss 0.7567 (0.7192)	Prec@1 33.594 (43.453)
[290/391]	Time 0.120 (0.117)	Data 0.028 (0.039)	Loss 0.7518 (0.7196)	Prec@1 38.281 (43.455)
[300/391]	Time 0.139 (0.117)	Data 0.047 (0.039)	Loss 0.7152 (0.7203)	Prec@1 45.312 (43.379)
[310/391]	Time 0.125 (0.117)	Data 0.034 (0.039)	Loss 0.7680 (0.7216)	Prec@1 39.062 (43.258)
[320/391]	Time 0.125 (0.117)	Data 0.039 (0.039)	Loss 0.6800 (0.7216)	Prec@1 46.094 (43.200)
[330/391]	Time 0.149 (0.118)	Data 0.061 (0.039)	Loss 0.7057 (0.7220)	Prec@1 46.0

[180/391]	Time 0.118 (0.103)	Data 0.025 (0.021)	Loss 0.7217 (0.7184)	Prec@1 42.969 (43.137)
[190/391]	Time 0.092 (0.103)	Data 0.020 (0.021)	Loss 0.7265 (0.7184)	Prec@1 47.656 (43.198)
[200/391]	Time 0.120 (0.103)	Data 0.020 (0.021)	Loss 0.7376 (0.7188)	Prec@1 42.188 (43.163)
[210/391]	Time 0.130 (0.104)	Data 0.034 (0.021)	Loss 0.6137 (0.7179)	Prec@1 50.000 (43.187)
[220/391]	Time 0.076 (0.104)	Data 0.019 (0.021)	Loss 0.6834 (0.7176)	Prec@1 45.312 (43.312)
[230/391]	Time 0.125 (0.105)	Data 0.024 (0.021)	Loss 0.7623 (0.7189)	Prec@1 39.062 (43.189)
[240/391]	Time 0.099 (0.105)	Data 0.024 (0.021)	Loss 0.6967 (0.7191)	Prec@1 44.531 (43.166)
[250/391]	Time 0.121 (0.105)	Data 0.029 (0.022)	Loss 0.6907 (0.7187)	Prec@1 48.438 (43.190)
[260/391]	Time 0.116 (0.106)	Data 0.020 (0.022)	Loss 0.7099 (0.7185)	Prec@1 47.656 (43.238)
[270/391]	Time 0.117 (0.106)	Data 0.020 (0.022)	Loss 0.7149 (0.7180)	Prec@1 41.406 (43.260)
[280/391]	Time 0.071 (0.106)	Data 0.020 (0.022)	Loss 0.7087 (0.7191)	Prec@1 41.4

[130/391]	Time 0.088 (0.116)	Data 0.033 (0.035)	Loss 0.7307 (0.7161)	Prec@1 46.094 (43.434)
[140/391]	Time 0.115 (0.116)	Data 0.020 (0.034)	Loss 0.7191 (0.7152)	Prec@1 41.406 (43.490)
[150/391]	Time 0.084 (0.116)	Data 0.035 (0.034)	Loss 0.7380 (0.7170)	Prec@1 46.875 (43.383)
[160/391]	Time 0.123 (0.116)	Data 0.031 (0.034)	Loss 0.6815 (0.7167)	Prec@1 49.219 (43.376)
[170/391]	Time 0.121 (0.116)	Data 0.027 (0.034)	Loss 0.7355 (0.7168)	Prec@1 44.531 (43.366)
[180/391]	Time 0.090 (0.116)	Data 0.031 (0.034)	Loss 0.6952 (0.7165)	Prec@1 37.500 (43.387)
[190/391]	Time 0.100 (0.116)	Data 0.024 (0.034)	Loss 0.6933 (0.7166)	Prec@1 43.750 (43.394)
[200/391]	Time 0.069 (0.116)	Data 0.030 (0.034)	Loss 0.7593 (0.7166)	Prec@1 46.875 (43.455)
[210/391]	Time 0.113 (0.116)	Data 0.021 (0.034)	Loss 0.7143 (0.7167)	Prec@1 42.969 (43.424)
[220/391]	Time 0.096 (0.115)	Data 0.026 (0.033)	Loss 0.6208 (0.7171)	Prec@1 54.688 (43.400)
[230/391]	Time 0.130 (0.116)	Data 0.044 (0.034)	Loss 0.7165 (0.7174)	Prec@1 42.1

[80/391]	Time 0.122 (0.115)	Data 0.032 (0.037)	Loss 0.6894 (0.7098)	Prec@1 46.094 (43.962)
[90/391]	Time 0.136 (0.114)	Data 0.034 (0.037)	Loss 0.7499 (0.7129)	Prec@1 39.844 (43.819)
[100/391]	Time 0.128 (0.114)	Data 0.033 (0.037)	Loss 0.6713 (0.7129)	Prec@1 44.531 (43.773)
[110/391]	Time 0.121 (0.116)	Data 0.034 (0.038)	Loss 0.7650 (0.7141)	Prec@1 39.062 (43.490)
[120/391]	Time 0.112 (0.116)	Data 0.040 (0.038)	Loss 0.7596 (0.7157)	Prec@1 45.312 (43.395)
[130/391]	Time 0.107 (0.115)	Data 0.020 (0.037)	Loss 0.6671 (0.7149)	Prec@1 46.875 (43.500)
[140/391]	Time 0.135 (0.115)	Data 0.040 (0.038)	Loss 0.7341 (0.7153)	Prec@1 44.531 (43.512)
[150/391]	Time 0.113 (0.116)	Data 0.032 (0.038)	Loss 0.7125 (0.7146)	Prec@1 42.188 (43.579)
[160/391]	Time 0.116 (0.116)	Data 0.032 (0.038)	Loss 0.7415 (0.7158)	Prec@1 43.750 (43.619)
[170/391]	Time 0.080 (0.116)	Data 0.032 (0.038)	Loss 0.7113 (0.7157)	Prec@1 46.875 (43.704)
[180/391]	Time 0.127 (0.116)	Data 0.037 (0.038)	Loss 0.6997 (0.7147)	Prec@1 44.531

[30/391]	Time 0.101 (0.110)	Data 0.032 (0.035)	Loss 0.7755 (0.7008)	Prec@1 35.156 (44.506)
[40/391]	Time 0.115 (0.111)	Data 0.025 (0.035)	Loss 0.7365 (0.7065)	Prec@1 37.500 (44.150)
[50/391]	Time 0.077 (0.109)	Data 0.035 (0.034)	Loss 0.7370 (0.7072)	Prec@1 46.875 (44.194)
[60/391]	Time 0.109 (0.110)	Data 0.035 (0.035)	Loss 0.6674 (0.7058)	Prec@1 42.969 (44.211)
[70/391]	Time 0.098 (0.112)	Data 0.053 (0.037)	Loss 0.6735 (0.7059)	Prec@1 49.219 (44.300)
[80/391]	Time 0.119 (0.112)	Data 0.039 (0.038)	Loss 0.6643 (0.7074)	Prec@1 48.438 (44.367)
[90/391]	Time 0.093 (0.112)	Data 0.041 (0.038)	Loss 0.6268 (0.7051)	Prec@1 54.688 (44.411)
[100/391]	Time 0.156 (0.114)	Data 0.052 (0.038)	Loss 0.8031 (0.7064)	Prec@1 33.594 (44.261)
[110/391]	Time 0.104 (0.114)	Data 0.035 (0.038)	Loss 0.6847 (0.7060)	Prec@1 46.875 (44.130)
[120/391]	Time 0.148 (0.115)	Data 0.056 (0.038)	Loss 0.6823 (0.7045)	Prec@1 46.094 (44.208)
[130/391]	Time 0.078 (0.115)	Data 0.033 (0.038)	Loss 0.7177 (0.7035)	Prec@1 42.188 (44.

Test: [60/79]	Time 0.022 (0.022)	Loss 2.6443 (2.3506)	Prec@1 35.938 (38.153)
Test: [70/79]	Time 0.026 (0.022)	Loss 2.3767 (2.3451)	Prec@1 35.156 (38.083)
 * Prec@1 37.960

===> epoch: 56/200
current lr 1.00000e-01
Training:
[0/391]	Time 0.085 (0.085)	Data 0.021 (0.021)	Loss 0.7624 (0.7624)	Prec@1 39.062 (39.062)
[10/391]	Time 0.118 (0.104)	Data 0.020 (0.020)	Loss 0.7177 (0.6925)	Prec@1 38.281 (45.526)
[20/391]	Time 0.085 (0.105)	Data 0.020 (0.020)	Loss 0.7177 (0.6947)	Prec@1 43.750 (44.829)
[30/391]	Time 0.117 (0.105)	Data 0.049 (0.023)	Loss 0.6929 (0.6938)	Prec@1 46.875 (45.338)
[40/391]	Time 0.127 (0.111)	Data 0.036 (0.027)	Loss 0.7337 (0.6956)	Prec@1 46.094 (45.084)
[50/391]	Time 0.140 (0.115)	Data 0.036 (0.029)	Loss 0.6771 (0.6966)	Prec@1 44.531 (44.914)
[60/391]	Time 0.111 (0.113)	Data 0.022 (0.030)	Loss 0.7481 (0.7043)	Prec@1 41.406 (44.237)
[70/391]	Time 0.111 (0.113)	Data 0.020 (0.028)	Loss 0.8363 (0.7047)	Prec@1 34.375 (44.080)
[80/391]	Time 0.115 (0.112)	Data 0.022 (0.027)	Lo

Test: [10/79]	Time 0.051 (0.034)	Loss 2.3153 (2.6227)	Prec@1 39.062 (34.233)
Test: [20/79]	Time 0.032 (0.036)	Loss 2.6858 (2.6087)	Prec@1 39.062 (35.119)
Test: [30/79]	Time 0.032 (0.037)	Loss 2.3753 (2.5878)	Prec@1 35.156 (35.912)
Test: [40/79]	Time 0.033 (0.037)	Loss 2.3703 (2.5821)	Prec@1 33.594 (35.899)
Test: [50/79]	Time 0.034 (0.036)	Loss 2.7778 (2.5862)	Prec@1 28.906 (35.646)
Test: [60/79]	Time 0.036 (0.036)	Loss 2.6834 (2.5932)	Prec@1 36.719 (35.476)
Test: [70/79]	Time 0.032 (0.036)	Loss 2.8138 (2.5904)	Prec@1 32.031 (35.321)
 * Prec@1 35.410

===> epoch: 58/200
current lr 1.00000e-01
Training:
[0/391]	Time 0.123 (0.123)	Data 0.043 (0.043)	Loss 0.7071 (0.7071)	Prec@1 42.969 (42.969)
[10/391]	Time 0.083 (0.114)	Data 0.028 (0.042)	Loss 0.7519 (0.7106)	Prec@1 42.969 (42.330)
[20/391]	Time 0.111 (0.113)	Data 0.034 (0.037)	Loss 0.7489 (0.7035)	Prec@1 46.094 (43.601)
[30/391]	Time 0.132 (0.114)	Data 0.042 (0.040)	Loss 0.7458 (0.7002)	Prec@1 40.625 (44.330)
[40/391]	Time 0.122 (0.113)	

[360/391]	Time 0.122 (0.118)	Data 0.042 (0.039)	Loss 0.6939 (0.7129)	Prec@1 39.844 (43.720)
[370/391]	Time 0.140 (0.117)	Data 0.034 (0.039)	Loss 0.6017 (0.7125)	Prec@1 54.688 (43.752)
[380/391]	Time 0.127 (0.118)	Data 0.034 (0.039)	Loss 0.7246 (0.7124)	Prec@1 40.625 (43.703)
[390/391]	Time 0.079 (0.117)	Data 0.020 (0.039)	Loss 0.7753 (0.7127)	Prec@1 37.500 (43.662)
Testing:
Test: [0/79]	Time 0.050 (0.050)	Loss 3.5891 (3.5891)	Prec@1 24.219 (24.219)
Test: [10/79]	Time 0.053 (0.040)	Loss 3.5249 (3.6196)	Prec@1 24.219 (22.940)
Test: [20/79]	Time 0.031 (0.038)	Loss 3.7012 (3.6059)	Prec@1 21.875 (23.549)
Test: [30/79]	Time 0.032 (0.035)	Loss 3.7184 (3.6612)	Prec@1 21.875 (22.782)
Test: [40/79]	Time 0.028 (0.033)	Loss 3.4687 (3.6850)	Prec@1 26.562 (22.771)
Test: [50/79]	Time 0.036 (0.034)	Loss 3.9280 (3.7005)	Prec@1 23.438 (22.779)
Test: [60/79]	Time 0.041 (0.035)	Loss 4.4873 (3.7137)	Prec@1 18.750 (22.567)
Test: [70/79]	Time 0.036 (0.036)	Loss 3.8603 (3.7159)	Prec@1 21.875 (22.557)
 * Prec@

[310/391]	Time 0.152 (0.120)	Data 0.054 (0.038)	Loss 0.6964 (0.7087)	Prec@1 49.219 (43.823)
[320/391]	Time 0.128 (0.120)	Data 0.035 (0.038)	Loss 0.7518 (0.7087)	Prec@1 35.938 (43.828)
[330/391]	Time 0.137 (0.120)	Data 0.048 (0.038)	Loss 0.7570 (0.7092)	Prec@1 39.844 (43.830)
[340/391]	Time 0.117 (0.121)	Data 0.033 (0.038)	Loss 0.7479 (0.7100)	Prec@1 44.531 (43.777)
[350/391]	Time 0.110 (0.121)	Data 0.043 (0.038)	Loss 0.6948 (0.7100)	Prec@1 42.188 (43.786)
[360/391]	Time 0.155 (0.121)	Data 0.070 (0.038)	Loss 0.6974 (0.7098)	Prec@1 44.531 (43.772)
[370/391]	Time 0.127 (0.120)	Data 0.035 (0.038)	Loss 0.6820 (0.7098)	Prec@1 50.000 (43.809)
[380/391]	Time 0.127 (0.120)	Data 0.045 (0.038)	Loss 0.7103 (0.7095)	Prec@1 51.562 (43.859)
[390/391]	Time 0.062 (0.120)	Data 0.024 (0.038)	Loss 0.7817 (0.7099)	Prec@1 43.750 (43.824)
Testing:
Test: [0/79]	Time 0.030 (0.030)	Loss 2.3284 (2.3284)	Prec@1 39.844 (39.844)
Test: [10/79]	Time 0.033 (0.037)	Loss 2.1052 (2.2770)	Prec@1 44.531 (39.915)
Test: [20/

[260/391]	Time 0.097 (0.117)	Data 0.028 (0.037)	Loss 0.6685 (0.7135)	Prec@1 47.656 (43.777)
[270/391]	Time 0.123 (0.117)	Data 0.033 (0.037)	Loss 0.6812 (0.7130)	Prec@1 48.438 (43.813)
[280/391]	Time 0.141 (0.118)	Data 0.051 (0.037)	Loss 0.7299 (0.7133)	Prec@1 49.219 (43.825)
[290/391]	Time 0.105 (0.118)	Data 0.031 (0.037)	Loss 0.7795 (0.7133)	Prec@1 39.844 (43.822)
[300/391]	Time 0.120 (0.117)	Data 0.027 (0.037)	Loss 0.6870 (0.7127)	Prec@1 42.188 (43.843)
[310/391]	Time 0.248 (0.118)	Data 0.121 (0.038)	Loss 0.7125 (0.7120)	Prec@1 42.969 (43.903)
[320/391]	Time 0.172 (0.118)	Data 0.068 (0.038)	Loss 0.6816 (0.7115)	Prec@1 48.438 (43.913)
[330/391]	Time 0.116 (0.118)	Data 0.025 (0.038)	Loss 0.7163 (0.7115)	Prec@1 36.719 (43.910)
[340/391]	Time 0.072 (0.118)	Data 0.028 (0.038)	Loss 0.6214 (0.7111)	Prec@1 50.781 (43.956)
[350/391]	Time 0.128 (0.118)	Data 0.038 (0.038)	Loss 0.6706 (0.7111)	Prec@1 48.438 (43.986)
[360/391]	Time 0.105 (0.117)	Data 0.035 (0.038)	Loss 0.7050 (0.7113)	Prec@1 42.1

[210/391]	Time 0.124 (0.119)	Data 0.035 (0.041)	Loss 0.7837 (0.7037)	Prec@1 35.156 (44.531)
[220/391]	Time 0.135 (0.120)	Data 0.044 (0.041)	Loss 0.7603 (0.7044)	Prec@1 39.062 (44.450)
[230/391]	Time 0.097 (0.119)	Data 0.029 (0.040)	Loss 0.7540 (0.7037)	Prec@1 42.969 (44.497)
[240/391]	Time 0.113 (0.119)	Data 0.037 (0.040)	Loss 0.6117 (0.7033)	Prec@1 45.312 (44.473)
[250/391]	Time 0.145 (0.119)	Data 0.048 (0.040)	Loss 0.7189 (0.7046)	Prec@1 40.625 (44.369)
[260/391]	Time 0.145 (0.119)	Data 0.044 (0.040)	Loss 0.7980 (0.7057)	Prec@1 36.719 (44.331)
[270/391]	Time 0.126 (0.119)	Data 0.036 (0.040)	Loss 0.7042 (0.7061)	Prec@1 46.094 (44.332)
[280/391]	Time 0.135 (0.119)	Data 0.039 (0.040)	Loss 0.7883 (0.7073)	Prec@1 36.719 (44.248)
[290/391]	Time 0.135 (0.120)	Data 0.034 (0.041)	Loss 0.7894 (0.7073)	Prec@1 36.719 (44.252)
[300/391]	Time 0.125 (0.120)	Data 0.036 (0.040)	Loss 0.6742 (0.7071)	Prec@1 46.875 (44.290)
[310/391]	Time 0.139 (0.121)	Data 0.054 (0.041)	Loss 0.7323 (0.7072)	Prec@1 44.5

[160/391]	Time 0.112 (0.118)	Data 0.019 (0.037)	Loss 0.7708 (0.6990)	Prec@1 37.500 (45.046)
[170/391]	Time 0.120 (0.118)	Data 0.040 (0.037)	Loss 0.7038 (0.6993)	Prec@1 45.312 (45.066)
[180/391]	Time 0.117 (0.118)	Data 0.032 (0.037)	Loss 0.6922 (0.6995)	Prec@1 40.625 (45.006)
[190/391]	Time 0.128 (0.117)	Data 0.051 (0.037)	Loss 0.6629 (0.6995)	Prec@1 46.094 (44.953)
[200/391]	Time 0.120 (0.117)	Data 0.029 (0.037)	Loss 0.7502 (0.7007)	Prec@1 37.500 (44.784)
[210/391]	Time 0.143 (0.117)	Data 0.057 (0.038)	Loss 0.6720 (0.7009)	Prec@1 48.438 (44.683)
[220/391]	Time 0.120 (0.117)	Data 0.033 (0.037)	Loss 0.6745 (0.7006)	Prec@1 46.094 (44.719)
[230/391]	Time 0.126 (0.117)	Data 0.036 (0.037)	Loss 0.7145 (0.7009)	Prec@1 43.750 (44.683)
[240/391]	Time 0.135 (0.116)	Data 0.044 (0.037)	Loss 0.7759 (0.7016)	Prec@1 40.625 (44.573)
[250/391]	Time 0.119 (0.115)	Data 0.033 (0.037)	Loss 0.7434 (0.7024)	Prec@1 46.094 (44.534)
[260/391]	Time 0.129 (0.117)	Data 0.036 (0.038)	Loss 0.7357 (0.7029)	Prec@1 43.7

[110/391]	Time 0.076 (0.119)	Data 0.034 (0.039)	Loss 0.7347 (0.7059)	Prec@1 46.094 (43.877)
[120/391]	Time 0.130 (0.118)	Data 0.035 (0.039)	Loss 0.6574 (0.7044)	Prec@1 47.656 (43.950)
[130/391]	Time 0.142 (0.119)	Data 0.055 (0.039)	Loss 0.7001 (0.7048)	Prec@1 46.875 (44.012)
[140/391]	Time 0.131 (0.117)	Data 0.060 (0.039)	Loss 0.7709 (0.7053)	Prec@1 40.625 (44.044)
[150/391]	Time 0.117 (0.117)	Data 0.037 (0.039)	Loss 0.7030 (0.7066)	Prec@1 42.969 (43.885)
[160/391]	Time 0.128 (0.117)	Data 0.033 (0.039)	Loss 0.6107 (0.7053)	Prec@1 48.438 (43.993)
[170/391]	Time 0.101 (0.116)	Data 0.024 (0.038)	Loss 0.7442 (0.7061)	Prec@1 42.969 (43.978)
[180/391]	Time 0.092 (0.117)	Data 0.033 (0.038)	Loss 0.7181 (0.7047)	Prec@1 39.062 (44.126)
[190/391]	Time 0.105 (0.117)	Data 0.040 (0.039)	Loss 0.7533 (0.7040)	Prec@1 42.188 (44.179)
[200/391]	Time 0.098 (0.117)	Data 0.032 (0.039)	Loss 0.6664 (0.7050)	Prec@1 43.750 (44.174)
[210/391]	Time 0.126 (0.117)	Data 0.036 (0.039)	Loss 0.6508 (0.7050)	Prec@1 50.0

[60/391]	Time 0.094 (0.115)	Data 0.019 (0.037)	Loss 0.7416 (0.7073)	Prec@1 41.406 (43.763)
[70/391]	Time 0.114 (0.112)	Data 0.039 (0.037)	Loss 0.7488 (0.7096)	Prec@1 39.062 (43.299)
[80/391]	Time 0.114 (0.111)	Data 0.035 (0.037)	Loss 0.6629 (0.7085)	Prec@1 49.219 (43.644)
[90/391]	Time 0.122 (0.110)	Data 0.041 (0.037)	Loss 0.7346 (0.7074)	Prec@1 47.656 (43.947)
[100/391]	Time 0.092 (0.109)	Data 0.019 (0.036)	Loss 0.6366 (0.7050)	Prec@1 49.219 (43.974)
[110/391]	Time 0.112 (0.109)	Data 0.035 (0.036)	Loss 0.6835 (0.7036)	Prec@1 49.219 (44.186)
[120/391]	Time 0.098 (0.109)	Data 0.035 (0.036)	Loss 0.7300 (0.7030)	Prec@1 45.312 (44.325)
[130/391]	Time 0.098 (0.108)	Data 0.025 (0.036)	Loss 0.7060 (0.7034)	Prec@1 42.969 (44.305)
[140/391]	Time 0.071 (0.107)	Data 0.027 (0.035)	Loss 0.6842 (0.7035)	Prec@1 42.969 (44.182)
[150/391]	Time 0.114 (0.107)	Data 0.037 (0.036)	Loss 0.7231 (0.7034)	Prec@1 48.438 (44.205)
[160/391]	Time 0.109 (0.107)	Data 0.038 (0.036)	Loss 0.7338 (0.7046)	Prec@1 42.188 (

[10/391]	Time 0.127 (0.115)	Data 0.051 (0.043)	Loss 0.6629 (0.6957)	Prec@1 47.656 (46.520)
[20/391]	Time 0.066 (0.100)	Data 0.027 (0.036)	Loss 0.7324 (0.6895)	Prec@1 39.844 (45.610)
[30/391]	Time 0.110 (0.101)	Data 0.035 (0.036)	Loss 0.7032 (0.6941)	Prec@1 46.094 (45.086)
[40/391]	Time 0.120 (0.101)	Data 0.048 (0.037)	Loss 0.8007 (0.6995)	Prec@1 33.594 (44.474)
[50/391]	Time 0.064 (0.101)	Data 0.030 (0.036)	Loss 0.6728 (0.7014)	Prec@1 48.438 (44.301)
[60/391]	Time 0.103 (0.102)	Data 0.028 (0.036)	Loss 0.7122 (0.7042)	Prec@1 45.312 (44.275)
[70/391]	Time 0.092 (0.101)	Data 0.038 (0.036)	Loss 0.6730 (0.7039)	Prec@1 42.969 (44.366)
[80/391]	Time 0.109 (0.103)	Data 0.035 (0.037)	Loss 0.6821 (0.7046)	Prec@1 46.875 (44.444)
[90/391]	Time 0.103 (0.103)	Data 0.030 (0.036)	Loss 0.7050 (0.7061)	Prec@1 43.750 (44.497)
[100/391]	Time 0.084 (0.104)	Data 0.034 (0.037)	Loss 0.6846 (0.7039)	Prec@1 43.750 (44.678)
[110/391]	Time 0.122 (0.105)	Data 0.040 (0.037)	Loss 0.6798 (0.7026)	Prec@1 46.094 (44.72

Test: [40/79]	Time 0.034 (0.038)	Loss 2.1632 (2.4407)	Prec@1 41.406 (36.871)
Test: [50/79]	Time 0.036 (0.038)	Loss 2.3949 (2.4367)	Prec@1 41.406 (36.811)
Test: [60/79]	Time 0.033 (0.037)	Loss 2.8377 (2.4544)	Prec@1 30.469 (36.219)
Test: [70/79]	Time 0.026 (0.037)	Loss 2.4432 (2.4556)	Prec@1 36.719 (36.246)
 * Prec@1 36.240

===> epoch: 75/200
current lr 1.00000e-01
Training:
[0/391]	Time 0.122 (0.122)	Data 0.046 (0.046)	Loss 0.7449 (0.7449)	Prec@1 43.750 (43.750)
[10/391]	Time 0.105 (0.102)	Data 0.026 (0.030)	Loss 0.7507 (0.6996)	Prec@1 43.750 (44.815)
[20/391]	Time 0.111 (0.102)	Data 0.035 (0.033)	Loss 0.7108 (0.7027)	Prec@1 46.875 (45.387)
[30/391]	Time 0.107 (0.102)	Data 0.030 (0.034)	Loss 0.7062 (0.7045)	Prec@1 48.438 (44.985)
[40/391]	Time 0.101 (0.102)	Data 0.029 (0.033)	Loss 0.6602 (0.7025)	Prec@1 48.438 (45.293)
[50/391]	Time 0.116 (0.100)	Data 0.037 (0.032)	Loss 0.6909 (0.7047)	Prec@1 42.969 (45.190)
[60/391]	Time 0.078 (0.101)	Data 0.034 (0.033)	Loss 0.6824 (0.7026)	Prec@1 46

[380/391]	Time 0.136 (0.121)	Data 0.036 (0.038)	Loss 0.7581 (0.7035)	Prec@1 46.875 (44.488)
[390/391]	Time 0.108 (0.120)	Data 0.024 (0.037)	Loss 0.6259 (0.7037)	Prec@1 47.500 (44.456)
Testing:
Test: [0/79]	Time 0.035 (0.035)	Loss 2.6341 (2.6341)	Prec@1 34.375 (34.375)
Test: [10/79]	Time 0.034 (0.036)	Loss 2.3548 (2.5601)	Prec@1 40.625 (36.648)
Test: [20/79]	Time 0.035 (0.037)	Loss 2.6945 (2.5919)	Prec@1 36.719 (35.454)
Test: [30/79]	Time 0.036 (0.036)	Loss 2.2960 (2.5959)	Prec@1 39.844 (35.484)
Test: [40/79]	Time 0.033 (0.038)	Loss 2.1511 (2.5928)	Prec@1 43.750 (35.290)
Test: [50/79]	Time 0.035 (0.038)	Loss 2.5350 (2.5858)	Prec@1 33.594 (35.172)
Test: [60/79]	Time 0.032 (0.039)	Loss 2.9050 (2.5956)	Prec@1 29.688 (34.939)
Test: [70/79]	Time 0.050 (0.038)	Loss 2.6132 (2.5965)	Prec@1 37.500 (34.881)
 * Prec@1 34.920

===> epoch: 77/200
current lr 1.00000e-01
Training:
[0/391]	Time 0.108 (0.108)	Data 0.028 (0.028)	Loss 0.6692 (0.6692)	Prec@1 48.438 (48.438)
[10/391]	Time 0.127 (0.110)	Data

[330/391]	Time 0.141 (0.121)	Data 0.051 (0.039)	Loss 0.6617 (0.7036)	Prec@1 43.750 (44.614)
[340/391]	Time 0.125 (0.121)	Data 0.037 (0.039)	Loss 0.7580 (0.7031)	Prec@1 43.750 (44.655)
[350/391]	Time 0.124 (0.121)	Data 0.036 (0.039)	Loss 0.7542 (0.7036)	Prec@1 43.750 (44.567)
[360/391]	Time 0.119 (0.121)	Data 0.027 (0.039)	Loss 0.6700 (0.7040)	Prec@1 47.656 (44.492)
[370/391]	Time 0.126 (0.121)	Data 0.036 (0.039)	Loss 0.6773 (0.7042)	Prec@1 43.750 (44.439)
[380/391]	Time 0.128 (0.121)	Data 0.036 (0.039)	Loss 0.7564 (0.7042)	Prec@1 36.719 (44.453)
[390/391]	Time 0.093 (0.121)	Data 0.020 (0.039)	Loss 0.6885 (0.7041)	Prec@1 45.000 (44.468)
Testing:
Test: [0/79]	Time 0.026 (0.026)	Loss 3.7356 (3.7356)	Prec@1 22.656 (22.656)
Test: [10/79]	Time 0.040 (0.036)	Loss 3.5795 (3.4556)	Prec@1 25.000 (25.142)
Test: [20/79]	Time 0.027 (0.037)	Loss 3.5342 (3.5136)	Prec@1 26.562 (24.777)
Test: [30/79]	Time 0.031 (0.034)	Loss 3.4189 (3.5093)	Prec@1 19.531 (23.639)
Test: [40/79]	Time 0.037 (0.035)	Loss 3.

[280/391]	Time 0.122 (0.121)	Data 0.032 (0.038)	Loss 0.7045 (0.7034)	Prec@1 41.406 (44.309)
[290/391]	Time 0.121 (0.121)	Data 0.039 (0.038)	Loss 0.6893 (0.7023)	Prec@1 43.750 (44.416)
[300/391]	Time 0.144 (0.121)	Data 0.061 (0.038)	Loss 0.6942 (0.7021)	Prec@1 45.312 (44.453)
[310/391]	Time 0.123 (0.121)	Data 0.033 (0.038)	Loss 0.7158 (0.7027)	Prec@1 43.750 (44.416)
[320/391]	Time 0.124 (0.121)	Data 0.036 (0.038)	Loss 0.6741 (0.7024)	Prec@1 46.094 (44.485)
[330/391]	Time 0.117 (0.121)	Data 0.025 (0.038)	Loss 0.7786 (0.7029)	Prec@1 32.812 (44.359)
[340/391]	Time 0.074 (0.120)	Data 0.032 (0.038)	Loss 0.7094 (0.7034)	Prec@1 43.750 (44.325)
[350/391]	Time 0.088 (0.120)	Data 0.034 (0.038)	Loss 0.7045 (0.7036)	Prec@1 44.531 (44.333)
[360/391]	Time 0.137 (0.120)	Data 0.045 (0.038)	Loss 0.7134 (0.7038)	Prec@1 42.969 (44.323)
[370/391]	Time 0.057 (0.120)	Data 0.025 (0.038)	Loss 0.6839 (0.7030)	Prec@1 42.969 (44.396)
[380/391]	Time 0.114 (0.120)	Data 0.042 (0.038)	Loss 0.6580 (0.7025)	Prec@1 46.8

[230/391]	Time 0.098 (0.120)	Data 0.035 (0.038)	Loss 0.6794 (0.6989)	Prec@1 42.188 (44.754)
[240/391]	Time 0.142 (0.120)	Data 0.058 (0.038)	Loss 0.6582 (0.6991)	Prec@1 52.344 (44.742)
[250/391]	Time 0.132 (0.120)	Data 0.036 (0.038)	Loss 0.6677 (0.6996)	Prec@1 39.062 (44.631)
[260/391]	Time 0.140 (0.120)	Data 0.044 (0.038)	Loss 0.6913 (0.6999)	Prec@1 44.531 (44.621)
[270/391]	Time 0.118 (0.120)	Data 0.029 (0.038)	Loss 0.7684 (0.7011)	Prec@1 43.750 (44.551)
[280/391]	Time 0.141 (0.121)	Data 0.036 (0.038)	Loss 0.7577 (0.7017)	Prec@1 38.281 (44.537)
[290/391]	Time 0.123 (0.121)	Data 0.032 (0.038)	Loss 0.7150 (0.7019)	Prec@1 46.875 (44.563)
[300/391]	Time 0.145 (0.120)	Data 0.046 (0.038)	Loss 0.6922 (0.7018)	Prec@1 47.656 (44.607)
[310/391]	Time 0.081 (0.120)	Data 0.038 (0.038)	Loss 0.6728 (0.7026)	Prec@1 44.531 (44.519)
[320/391]	Time 0.147 (0.120)	Data 0.052 (0.038)	Loss 0.6875 (0.7026)	Prec@1 46.875 (44.502)
[330/391]	Time 0.126 (0.120)	Data 0.036 (0.038)	Loss 0.7643 (0.7027)	Prec@1 39.0

[180/391]	Time 0.152 (0.119)	Data 0.044 (0.037)	Loss 0.7440 (0.6997)	Prec@1 39.844 (44.674)
[190/391]	Time 0.086 (0.119)	Data 0.032 (0.037)	Loss 0.6660 (0.6991)	Prec@1 46.094 (44.732)
[200/391]	Time 0.096 (0.118)	Data 0.037 (0.037)	Loss 0.7000 (0.6996)	Prec@1 48.438 (44.745)
[210/391]	Time 0.147 (0.119)	Data 0.047 (0.037)	Loss 0.6619 (0.6992)	Prec@1 46.875 (44.705)
[220/391]	Time 0.108 (0.118)	Data 0.031 (0.037)	Loss 0.6512 (0.6989)	Prec@1 48.438 (44.666)
[230/391]	Time 0.096 (0.118)	Data 0.034 (0.037)	Loss 0.7663 (0.6998)	Prec@1 38.281 (44.528)
[240/391]	Time 0.092 (0.118)	Data 0.035 (0.037)	Loss 0.6861 (0.6992)	Prec@1 42.969 (44.580)
[250/391]	Time 0.090 (0.118)	Data 0.036 (0.037)	Loss 0.6663 (0.6994)	Prec@1 47.656 (44.544)
[260/391]	Time 0.098 (0.118)	Data 0.033 (0.037)	Loss 0.7404 (0.7004)	Prec@1 43.750 (44.537)
[270/391]	Time 0.128 (0.118)	Data 0.037 (0.036)	Loss 0.6275 (0.7005)	Prec@1 51.562 (44.543)
[280/391]	Time 0.104 (0.118)	Data 0.031 (0.036)	Loss 0.7247 (0.7006)	Prec@1 42.9

[130/391]	Time 0.119 (0.121)	Data 0.051 (0.036)	Loss 0.7055 (0.6938)	Prec@1 44.531 (45.116)
[140/391]	Time 0.127 (0.122)	Data 0.032 (0.037)	Loss 0.7402 (0.6937)	Prec@1 45.312 (45.113)
[150/391]	Time 0.090 (0.121)	Data 0.028 (0.037)	Loss 0.8368 (0.6953)	Prec@1 31.250 (44.971)
[160/391]	Time 0.132 (0.121)	Data 0.036 (0.037)	Loss 0.6644 (0.6957)	Prec@1 50.781 (45.021)
[170/391]	Time 0.139 (0.121)	Data 0.043 (0.037)	Loss 0.7538 (0.6958)	Prec@1 42.188 (45.006)
[180/391]	Time 0.143 (0.121)	Data 0.050 (0.037)	Loss 0.6927 (0.6956)	Prec@1 49.219 (45.114)
[190/391]	Time 0.119 (0.121)	Data 0.029 (0.037)	Loss 0.6517 (0.6946)	Prec@1 48.438 (45.239)
[200/391]	Time 0.137 (0.121)	Data 0.049 (0.037)	Loss 0.7493 (0.6953)	Prec@1 36.719 (45.176)
[210/391]	Time 0.138 (0.121)	Data 0.047 (0.037)	Loss 0.7247 (0.6958)	Prec@1 42.188 (45.131)
[220/391]	Time 0.087 (0.121)	Data 0.033 (0.037)	Loss 0.7315 (0.6961)	Prec@1 43.750 (45.210)
[230/391]	Time 0.130 (0.121)	Data 0.056 (0.037)	Loss 0.6680 (0.6966)	Prec@1 46.8

[80/391]	Time 0.125 (0.116)	Data 0.034 (0.034)	Loss 0.7302 (0.6930)	Prec@1 39.844 (44.917)
[90/391]	Time 0.132 (0.117)	Data 0.039 (0.035)	Loss 0.7138 (0.6915)	Prec@1 48.438 (45.244)
[100/391]	Time 0.138 (0.116)	Data 0.048 (0.034)	Loss 0.6818 (0.6919)	Prec@1 46.094 (45.274)
[110/391]	Time 0.115 (0.117)	Data 0.035 (0.035)	Loss 0.6370 (0.6936)	Prec@1 56.250 (45.228)
[120/391]	Time 0.166 (0.118)	Data 0.033 (0.035)	Loss 0.7034 (0.6927)	Prec@1 42.188 (45.216)
[130/391]	Time 0.096 (0.118)	Data 0.028 (0.035)	Loss 0.6544 (0.6915)	Prec@1 49.219 (45.318)
[140/391]	Time 0.127 (0.118)	Data 0.035 (0.036)	Loss 0.6174 (0.6933)	Prec@1 48.438 (45.113)
[150/391]	Time 0.087 (0.118)	Data 0.038 (0.036)	Loss 0.7355 (0.6942)	Prec@1 46.094 (45.043)
[160/391]	Time 0.140 (0.118)	Data 0.051 (0.036)	Loss 0.6991 (0.6941)	Prec@1 43.750 (45.152)
[170/391]	Time 0.096 (0.118)	Data 0.035 (0.036)	Loss 0.6799 (0.6959)	Prec@1 46.094 (45.107)
[180/391]	Time 0.124 (0.118)	Data 0.033 (0.036)	Loss 0.6625 (0.6964)	Prec@1 45.312

[30/391]	Time 0.144 (0.124)	Data 0.046 (0.037)	Loss 0.6964 (0.7100)	Prec@1 45.312 (43.800)
[40/391]	Time 0.148 (0.125)	Data 0.054 (0.038)	Loss 0.7446 (0.7136)	Prec@1 41.406 (43.731)
[50/391]	Time 0.089 (0.122)	Data 0.034 (0.038)	Loss 0.6902 (0.7106)	Prec@1 45.312 (43.658)
[60/391]	Time 0.129 (0.123)	Data 0.033 (0.038)	Loss 0.6263 (0.7076)	Prec@1 46.094 (44.096)
[70/391]	Time 0.119 (0.121)	Data 0.049 (0.037)	Loss 0.7003 (0.7034)	Prec@1 47.656 (44.443)
[80/391]	Time 0.127 (0.120)	Data 0.045 (0.037)	Loss 0.7383 (0.7029)	Prec@1 42.969 (44.454)
[90/391]	Time 0.124 (0.120)	Data 0.037 (0.037)	Loss 0.7061 (0.7026)	Prec@1 40.625 (44.505)
[100/391]	Time 0.128 (0.121)	Data 0.040 (0.038)	Loss 0.7358 (0.7017)	Prec@1 44.531 (44.694)
[110/391]	Time 0.105 (0.121)	Data 0.032 (0.038)	Loss 0.7091 (0.7026)	Prec@1 42.969 (44.651)
[120/391]	Time 0.141 (0.121)	Data 0.069 (0.038)	Loss 0.7515 (0.7029)	Prec@1 39.844 (44.564)
[130/391]	Time 0.119 (0.121)	Data 0.029 (0.038)	Loss 0.6230 (0.7032)	Prec@1 49.219 (44.

Test: [60/79]	Time 0.037 (0.034)	Loss 2.4915 (2.2978)	Prec@1 33.594 (39.549)
Test: [70/79]	Time 0.036 (0.034)	Loss 2.2631 (2.3031)	Prec@1 39.844 (39.261)
 * Prec@1 39.110

===> epoch: 92/200
current lr 1.00000e-01
Training:
[0/391]	Time 0.121 (0.121)	Data 0.041 (0.041)	Loss 0.6710 (0.6710)	Prec@1 46.875 (46.875)
[10/391]	Time 0.123 (0.121)	Data 0.051 (0.038)	Loss 0.7468 (0.7104)	Prec@1 33.594 (43.182)
[20/391]	Time 0.115 (0.115)	Data 0.053 (0.037)	Loss 0.6588 (0.6860)	Prec@1 46.094 (45.610)
[30/391]	Time 0.149 (0.115)	Data 0.060 (0.036)	Loss 0.6485 (0.6827)	Prec@1 50.781 (46.295)
[40/391]	Time 0.127 (0.116)	Data 0.034 (0.037)	Loss 0.6918 (0.6877)	Prec@1 43.750 (46.284)
[50/391]	Time 0.140 (0.115)	Data 0.050 (0.037)	Loss 0.6518 (0.6878)	Prec@1 50.781 (46.339)
[60/391]	Time 0.144 (0.115)	Data 0.041 (0.036)	Loss 0.7045 (0.6918)	Prec@1 43.750 (45.825)
[70/391]	Time 0.133 (0.117)	Data 0.046 (0.038)	Loss 0.6503 (0.6917)	Prec@1 50.000 (45.676)
[80/391]	Time 0.105 (0.118)	Data 0.036 (0.038)	Lo

Test: [10/79]	Time 0.033 (0.033)	Loss 2.5482 (2.4141)	Prec@1 34.375 (37.784)
Test: [20/79]	Time 0.032 (0.034)	Loss 2.5197 (2.4112)	Prec@1 37.500 (37.240)
Test: [30/79]	Time 0.037 (0.035)	Loss 2.1341 (2.3791)	Prec@1 42.969 (38.256)
Test: [40/79]	Time 0.027 (0.033)	Loss 2.2464 (2.3607)	Prec@1 37.500 (38.681)
Test: [50/79]	Time 0.041 (0.033)	Loss 2.6234 (2.3603)	Prec@1 31.250 (38.373)
Test: [60/79]	Time 0.037 (0.034)	Loss 2.6739 (2.3662)	Prec@1 35.938 (38.435)
Test: [70/79]	Time 0.041 (0.034)	Loss 2.3808 (2.3583)	Prec@1 38.281 (38.490)
 * Prec@1 38.610

===> epoch: 94/200
current lr 1.00000e-01
Training:
[0/391]	Time 0.121 (0.121)	Data 0.045 (0.045)	Loss 0.7059 (0.7059)	Prec@1 44.531 (44.531)
[10/391]	Time 0.113 (0.129)	Data 0.035 (0.045)	Loss 0.6501 (0.6892)	Prec@1 50.781 (45.312)
[20/391]	Time 0.105 (0.121)	Data 0.034 (0.042)	Loss 0.7160 (0.6964)	Prec@1 48.438 (44.978)
[30/391]	Time 0.141 (0.114)	Data 0.047 (0.039)	Loss 0.6885 (0.6934)	Prec@1 46.094 (45.665)
[40/391]	Time 0.123 (0.114)	

[360/391]	Time 0.147 (0.114)	Data 0.052 (0.038)	Loss 0.6873 (0.6985)	Prec@1 47.656 (44.774)
[370/391]	Time 0.145 (0.115)	Data 0.038 (0.038)	Loss 0.6978 (0.6979)	Prec@1 46.094 (44.841)
[380/391]	Time 0.101 (0.115)	Data 0.028 (0.038)	Loss 0.7078 (0.6978)	Prec@1 40.625 (44.822)
[390/391]	Time 0.098 (0.115)	Data 0.030 (0.038)	Loss 0.6834 (0.6988)	Prec@1 47.500 (44.760)
Testing:
Test: [0/79]	Time 0.055 (0.055)	Loss 3.1575 (3.1575)	Prec@1 29.688 (29.688)
Test: [10/79]	Time 0.031 (0.041)	Loss 2.7339 (2.9189)	Prec@1 29.688 (30.469)
Test: [20/79]	Time 0.034 (0.038)	Loss 2.8650 (2.9074)	Prec@1 35.938 (31.287)
Test: [30/79]	Time 0.032 (0.038)	Loss 2.6161 (2.9224)	Prec@1 35.156 (31.200)
Test: [40/79]	Time 0.044 (0.039)	Loss 2.6613 (2.9458)	Prec@1 31.250 (30.888)
Test: [50/79]	Time 0.039 (0.039)	Loss 2.9277 (2.9349)	Prec@1 31.250 (30.990)
Test: [60/79]	Time 0.054 (0.040)	Loss 3.1406 (2.9424)	Prec@1 25.000 (30.686)
Test: [70/79]	Time 0.048 (0.039)	Loss 2.8757 (2.9249)	Prec@1 35.156 (30.733)
 * Prec@

[310/391]	Time 0.099 (0.109)	Data 0.034 (0.038)	Loss 0.7590 (0.6960)	Prec@1 39.062 (44.878)
[320/391]	Time 0.111 (0.109)	Data 0.034 (0.038)	Loss 0.7193 (0.6966)	Prec@1 42.969 (44.816)
[330/391]	Time 0.128 (0.110)	Data 0.036 (0.038)	Loss 0.8070 (0.6975)	Prec@1 40.625 (44.774)
[340/391]	Time 0.125 (0.110)	Data 0.034 (0.038)	Loss 0.7478 (0.6976)	Prec@1 38.281 (44.763)
[350/391]	Time 0.110 (0.110)	Data 0.028 (0.038)	Loss 0.6774 (0.6975)	Prec@1 45.312 (44.774)
[360/391]	Time 0.110 (0.110)	Data 0.032 (0.038)	Loss 0.6970 (0.6975)	Prec@1 39.844 (44.713)
[370/391]	Time 0.132 (0.110)	Data 0.039 (0.038)	Loss 0.7205 (0.6977)	Prec@1 42.969 (44.695)
[380/391]	Time 0.149 (0.110)	Data 0.053 (0.038)	Loss 0.7575 (0.6981)	Prec@1 40.625 (44.665)
[390/391]	Time 0.103 (0.110)	Data 0.024 (0.038)	Loss 0.7325 (0.6984)	Prec@1 43.750 (44.638)
Testing:
Test: [0/79]	Time 0.050 (0.050)	Loss 2.7068 (2.7068)	Prec@1 31.250 (31.250)
Test: [10/79]	Time 0.036 (0.030)	Loss 2.7534 (2.8665)	Prec@1 29.688 (29.688)
Test: [20/

[260/391]	Time 0.177 (0.113)	Data 0.085 (0.039)	Loss 0.6221 (0.6960)	Prec@1 45.312 (45.112)
[270/391]	Time 0.111 (0.113)	Data 0.025 (0.038)	Loss 0.6809 (0.6965)	Prec@1 42.188 (45.050)
[280/391]	Time 0.110 (0.113)	Data 0.043 (0.038)	Loss 0.6189 (0.6958)	Prec@1 54.688 (45.129)
[290/391]	Time 0.123 (0.112)	Data 0.032 (0.038)	Loss 0.7338 (0.6966)	Prec@1 37.500 (45.060)
[300/391]	Time 0.114 (0.113)	Data 0.028 (0.039)	Loss 0.7230 (0.6969)	Prec@1 40.625 (45.009)
[310/391]	Time 0.096 (0.113)	Data 0.020 (0.038)	Loss 0.6981 (0.6968)	Prec@1 46.094 (45.016)
[320/391]	Time 0.138 (0.113)	Data 0.038 (0.038)	Loss 0.7717 (0.6973)	Prec@1 39.844 (44.982)
[330/391]	Time 0.091 (0.112)	Data 0.033 (0.038)	Loss 0.7240 (0.6977)	Prec@1 40.625 (44.911)
[340/391]	Time 0.130 (0.112)	Data 0.038 (0.038)	Loss 0.7187 (0.6977)	Prec@1 43.750 (44.973)
[350/391]	Time 0.086 (0.112)	Data 0.020 (0.038)	Loss 0.6121 (0.6974)	Prec@1 51.562 (44.988)
[360/391]	Time 0.102 (0.112)	Data 0.032 (0.037)	Loss 0.6811 (0.6971)	Prec@1 43.7

[210/391]	Time 0.146 (0.113)	Data 0.062 (0.039)	Loss 0.6157 (0.5756)	Prec@1 50.781 (54.210)
[220/391]	Time 0.123 (0.114)	Data 0.077 (0.040)	Loss 0.5724 (0.5762)	Prec@1 53.906 (54.101)
[230/391]	Time 0.079 (0.114)	Data 0.019 (0.040)	Loss 0.5168 (0.5758)	Prec@1 54.688 (54.136)
[240/391]	Time 0.123 (0.113)	Data 0.032 (0.040)	Loss 0.5800 (0.5753)	Prec@1 47.656 (54.140)
[250/391]	Time 0.131 (0.114)	Data 0.032 (0.040)	Loss 0.5932 (0.5756)	Prec@1 50.781 (54.056)
[260/391]	Time 0.114 (0.113)	Data 0.033 (0.040)	Loss 0.5950 (0.5758)	Prec@1 54.688 (54.041)
[270/391]	Time 0.080 (0.113)	Data 0.027 (0.040)	Loss 0.5506 (0.5757)	Prec@1 59.375 (54.004)
[280/391]	Time 0.125 (0.113)	Data 0.054 (0.040)	Loss 0.5166 (0.5753)	Prec@1 62.500 (54.104)
[290/391]	Time 0.081 (0.113)	Data 0.031 (0.039)	Loss 0.5306 (0.5756)	Prec@1 57.812 (54.129)
[300/391]	Time 0.132 (0.114)	Data 0.037 (0.040)	Loss 0.6067 (0.5762)	Prec@1 51.562 (54.018)
[310/391]	Time 0.119 (0.115)	Data 0.037 (0.040)	Loss 0.5449 (0.5757)	Prec@1 57.0

[160/391]	Time 0.147 (0.115)	Data 0.046 (0.038)	Loss 0.5160 (0.5577)	Prec@1 57.812 (55.425)
[170/391]	Time 0.111 (0.114)	Data 0.036 (0.037)	Loss 0.5553 (0.5574)	Prec@1 58.594 (55.546)
[180/391]	Time 0.082 (0.114)	Data 0.034 (0.038)	Loss 0.5224 (0.5572)	Prec@1 60.938 (55.572)
[190/391]	Time 0.092 (0.115)	Data 0.040 (0.039)	Loss 0.4777 (0.5563)	Prec@1 61.719 (55.681)
[200/391]	Time 0.121 (0.116)	Data 0.041 (0.039)	Loss 0.6126 (0.5573)	Prec@1 50.781 (55.570)
[210/391]	Time 0.080 (0.116)	Data 0.019 (0.039)	Loss 0.5396 (0.5573)	Prec@1 62.500 (55.543)
[220/391]	Time 0.089 (0.115)	Data 0.032 (0.039)	Loss 0.5151 (0.5572)	Prec@1 60.156 (55.607)
[230/391]	Time 0.132 (0.115)	Data 0.039 (0.038)	Loss 0.5306 (0.5578)	Prec@1 56.250 (55.597)
[240/391]	Time 0.099 (0.115)	Data 0.037 (0.038)	Loss 0.5807 (0.5571)	Prec@1 49.219 (55.666)
[250/391]	Time 0.082 (0.115)	Data 0.026 (0.038)	Loss 0.5860 (0.5572)	Prec@1 52.344 (55.624)
[260/391]	Time 0.108 (0.115)	Data 0.034 (0.038)	Loss 0.5485 (0.5579)	Prec@1 54.6

[110/391]	Time 0.109 (0.114)	Data 0.040 (0.038)	Loss 0.5265 (0.5488)	Prec@1 55.469 (55.990)
[120/391]	Time 0.087 (0.114)	Data 0.031 (0.038)	Loss 0.5329 (0.5484)	Prec@1 52.344 (55.882)
[130/391]	Time 0.086 (0.113)	Data 0.030 (0.038)	Loss 0.5235 (0.5482)	Prec@1 53.906 (55.809)
[140/391]	Time 0.089 (0.113)	Data 0.029 (0.038)	Loss 0.5966 (0.5496)	Prec@1 48.438 (55.696)
[150/391]	Time 0.093 (0.113)	Data 0.030 (0.038)	Loss 0.5902 (0.5497)	Prec@1 51.562 (55.645)
[160/391]	Time 0.086 (0.111)	Data 0.029 (0.038)	Loss 0.5690 (0.5494)	Prec@1 60.938 (55.668)
[170/391]	Time 0.076 (0.111)	Data 0.034 (0.038)	Loss 0.4920 (0.5496)	Prec@1 57.031 (55.620)
[180/391]	Time 0.175 (0.111)	Data 0.091 (0.039)	Loss 0.5547 (0.5496)	Prec@1 57.812 (55.577)
[190/391]	Time 0.135 (0.113)	Data 0.038 (0.040)	Loss 0.5336 (0.5499)	Prec@1 59.375 (55.542)
[200/391]	Time 0.101 (0.114)	Data 0.019 (0.040)	Loss 0.5444 (0.5502)	Prec@1 54.688 (55.508)
[210/391]	Time 0.146 (0.114)	Data 0.051 (0.040)	Loss 0.5715 (0.5511)	Prec@1 47.6

[60/391]	Time 0.119 (0.116)	Data 0.033 (0.038)	Loss 0.5371 (0.5444)	Prec@1 53.906 (55.418)
[70/391]	Time 0.088 (0.115)	Data 0.030 (0.038)	Loss 0.4514 (0.5425)	Prec@1 62.500 (55.656)
[80/391]	Time 0.086 (0.116)	Data 0.027 (0.038)	Loss 0.5663 (0.5428)	Prec@1 54.688 (55.633)
[90/391]	Time 0.091 (0.117)	Data 0.033 (0.039)	Loss 0.6123 (0.5429)	Prec@1 50.781 (55.632)
[100/391]	Time 0.127 (0.116)	Data 0.035 (0.039)	Loss 0.5809 (0.5437)	Prec@1 54.688 (55.724)
[110/391]	Time 0.096 (0.117)	Data 0.034 (0.039)	Loss 0.5585 (0.5438)	Prec@1 53.125 (55.624)
[120/391]	Time 0.124 (0.117)	Data 0.059 (0.040)	Loss 0.4839 (0.5458)	Prec@1 60.938 (55.533)
[130/391]	Time 0.129 (0.117)	Data 0.040 (0.039)	Loss 0.5524 (0.5461)	Prec@1 55.469 (55.487)
[140/391]	Time 0.100 (0.116)	Data 0.039 (0.039)	Loss 0.4826 (0.5460)	Prec@1 63.281 (55.502)
[150/391]	Time 0.104 (0.115)	Data 0.032 (0.039)	Loss 0.5446 (0.5466)	Prec@1 56.250 (55.552)
[160/391]	Time 0.106 (0.115)	Data 0.031 (0.039)	Loss 0.5418 (0.5464)	Prec@1 56.250 (

[10/391]	Time 0.149 (0.125)	Data 0.066 (0.044)	Loss 0.5374 (0.5381)	Prec@1 58.594 (57.528)
[20/391]	Time 0.111 (0.118)	Data 0.035 (0.038)	Loss 0.6531 (0.5478)	Prec@1 49.219 (56.696)
[30/391]	Time 0.111 (0.117)	Data 0.034 (0.039)	Loss 0.5590 (0.5481)	Prec@1 53.125 (55.948)
[40/391]	Time 0.107 (0.114)	Data 0.040 (0.038)	Loss 0.6332 (0.5500)	Prec@1 51.562 (55.926)
[50/391]	Time 0.151 (0.115)	Data 0.054 (0.039)	Loss 0.5088 (0.5474)	Prec@1 61.719 (56.005)
[60/391]	Time 0.109 (0.115)	Data 0.035 (0.039)	Loss 0.5046 (0.5439)	Prec@1 58.594 (56.429)
[70/391]	Time 0.108 (0.114)	Data 0.035 (0.039)	Loss 0.5054 (0.5402)	Prec@1 58.594 (56.591)
[80/391]	Time 0.123 (0.113)	Data 0.034 (0.038)	Loss 0.6014 (0.5418)	Prec@1 48.438 (56.472)
[90/391]	Time 0.120 (0.114)	Data 0.035 (0.038)	Loss 0.5811 (0.5427)	Prec@1 53.906 (56.439)
[100/391]	Time 0.083 (0.113)	Data 0.031 (0.037)	Loss 0.5317 (0.5420)	Prec@1 56.250 (56.490)
[110/391]	Time 0.127 (0.113)	Data 0.037 (0.038)	Loss 0.5579 (0.5429)	Prec@1 56.250 (56.37

Test: [40/79]	Time 0.030 (0.032)	Loss 1.4544 (1.6984)	Prec@1 59.375 (52.496)
Test: [50/79]	Time 0.032 (0.034)	Loss 1.7737 (1.6867)	Prec@1 50.781 (52.834)
Test: [60/79]	Time 0.034 (0.034)	Loss 2.0334 (1.6937)	Prec@1 50.781 (52.805)
Test: [70/79]	Time 0.035 (0.035)	Loss 1.7202 (1.6906)	Prec@1 57.031 (52.949)
 * Prec@1 53.000

===> epoch: 111/200
current lr 1.00000e-02
Training:
[0/391]	Time 0.130 (0.130)	Data 0.036 (0.036)	Loss 0.5104 (0.5104)	Prec@1 63.281 (63.281)
[10/391]	Time 0.152 (0.112)	Data 0.042 (0.037)	Loss 0.6295 (0.5523)	Prec@1 53.125 (57.173)
[20/391]	Time 0.114 (0.117)	Data 0.028 (0.038)	Loss 0.5166 (0.5470)	Prec@1 55.469 (55.990)
[30/391]	Time 0.104 (0.116)	Data 0.032 (0.039)	Loss 0.6168 (0.5498)	Prec@1 47.656 (55.570)
[40/391]	Time 0.139 (0.116)	Data 0.046 (0.038)	Loss 0.5659 (0.5534)	Prec@1 50.781 (55.412)
[50/391]	Time 0.074 (0.116)	Data 0.034 (0.039)	Loss 0.5337 (0.5501)	Prec@1 54.688 (55.607)
[60/391]	Time 0.073 (0.114)	Data 0.028 (0.038)	Loss 0.5171 (0.5502)	Prec@1 5

[380/391]	Time 0.099 (0.113)	Data 0.034 (0.037)	Loss 0.5569 (0.5397)	Prec@1 54.688 (56.451)
[390/391]	Time 0.107 (0.112)	Data 0.020 (0.037)	Loss 0.5815 (0.5397)	Prec@1 55.000 (56.452)
Testing:
Test: [0/79]	Time 0.052 (0.052)	Loss 1.6417 (1.6417)	Prec@1 54.688 (54.688)
Test: [10/79]	Time 0.032 (0.038)	Loss 1.5742 (1.6755)	Prec@1 53.125 (53.977)
Test: [20/79]	Time 0.036 (0.036)	Loss 1.6686 (1.6595)	Prec@1 54.688 (53.981)
Test: [30/79]	Time 0.030 (0.037)	Loss 1.5475 (1.6719)	Prec@1 57.031 (53.377)
Test: [40/79]	Time 0.031 (0.038)	Loss 1.4791 (1.6827)	Prec@1 59.375 (52.954)
Test: [50/79]	Time 0.037 (0.037)	Loss 1.8387 (1.6769)	Prec@1 47.656 (52.819)
Test: [60/79]	Time 0.040 (0.038)	Loss 1.9419 (1.6830)	Prec@1 45.312 (52.741)
Test: [70/79]	Time 0.031 (0.037)	Loss 1.7050 (1.6805)	Prec@1 57.031 (52.828)
 * Prec@1 52.940

===> epoch: 113/200
current lr 1.00000e-02
Training:
[0/391]	Time 0.112 (0.112)	Data 0.038 (0.038)	Loss 0.5024 (0.5024)	Prec@1 59.375 (59.375)
[10/391]	Time 0.151 (0.111)	Dat

[330/391]	Time 0.106 (0.112)	Data 0.036 (0.038)	Loss 0.5274 (0.5362)	Prec@1 60.156 (56.731)
[340/391]	Time 0.115 (0.112)	Data 0.034 (0.038)	Loss 0.4993 (0.5361)	Prec@1 62.500 (56.727)
[350/391]	Time 0.107 (0.112)	Data 0.039 (0.038)	Loss 0.5972 (0.5366)	Prec@1 55.469 (56.691)
[360/391]	Time 0.116 (0.112)	Data 0.036 (0.038)	Loss 0.5924 (0.5371)	Prec@1 53.125 (56.650)
[370/391]	Time 0.089 (0.112)	Data 0.029 (0.038)	Loss 0.5558 (0.5373)	Prec@1 58.594 (56.606)
[380/391]	Time 0.082 (0.112)	Data 0.038 (0.038)	Loss 0.5643 (0.5372)	Prec@1 50.781 (56.578)
[390/391]	Time 0.087 (0.112)	Data 0.035 (0.038)	Loss 0.5260 (0.5375)	Prec@1 55.000 (56.542)
Testing:
Test: [0/79]	Time 0.036 (0.036)	Loss 1.6232 (1.6232)	Prec@1 57.812 (57.812)
Test: [10/79]	Time 0.042 (0.041)	Loss 1.6662 (1.6623)	Prec@1 52.344 (54.403)
Test: [20/79]	Time 0.043 (0.041)	Loss 1.6082 (1.6333)	Prec@1 56.250 (54.204)
Test: [30/79]	Time 0.033 (0.041)	Loss 1.5432 (1.6424)	Prec@1 50.781 (53.679)
Test: [40/79]	Time 0.029 (0.039)	Loss 1.

[280/391]	Time 0.137 (0.113)	Data 0.046 (0.037)	Loss 0.5366 (0.5377)	Prec@1 53.906 (56.703)
[290/391]	Time 0.126 (0.113)	Data 0.037 (0.037)	Loss 0.5000 (0.5378)	Prec@1 57.812 (56.653)
[300/391]	Time 0.124 (0.113)	Data 0.040 (0.037)	Loss 0.5813 (0.5380)	Prec@1 53.906 (56.595)
[310/391]	Time 0.108 (0.113)	Data 0.038 (0.037)	Loss 0.5858 (0.5378)	Prec@1 49.219 (56.574)
[320/391]	Time 0.119 (0.113)	Data 0.028 (0.037)	Loss 0.5898 (0.5381)	Prec@1 50.000 (56.481)
[330/391]	Time 0.115 (0.113)	Data 0.034 (0.037)	Loss 0.5199 (0.5379)	Prec@1 61.719 (56.533)
[340/391]	Time 0.109 (0.113)	Data 0.038 (0.037)	Loss 0.5464 (0.5385)	Prec@1 58.594 (56.475)
[350/391]	Time 0.144 (0.113)	Data 0.050 (0.037)	Loss 0.5118 (0.5384)	Prec@1 59.375 (56.486)
[360/391]	Time 0.105 (0.112)	Data 0.035 (0.037)	Loss 0.4979 (0.5386)	Prec@1 57.031 (56.477)
[370/391]	Time 0.080 (0.112)	Data 0.033 (0.037)	Loss 0.5792 (0.5381)	Prec@1 52.344 (56.494)
[380/391]	Time 0.119 (0.112)	Data 0.042 (0.037)	Loss 0.5060 (0.5376)	Prec@1 55.4

[230/391]	Time 0.090 (0.111)	Data 0.036 (0.036)	Loss 0.4927 (0.5361)	Prec@1 57.812 (56.723)
[240/391]	Time 0.076 (0.110)	Data 0.027 (0.036)	Loss 0.5609 (0.5356)	Prec@1 59.375 (56.775)
[250/391]	Time 0.095 (0.109)	Data 0.028 (0.036)	Loss 0.4837 (0.5359)	Prec@1 57.031 (56.723)
[260/391]	Time 0.130 (0.109)	Data 0.037 (0.036)	Loss 0.5999 (0.5361)	Prec@1 45.312 (56.681)
[270/391]	Time 0.125 (0.109)	Data 0.041 (0.036)	Loss 0.5555 (0.5362)	Prec@1 53.906 (56.662)
[280/391]	Time 0.085 (0.109)	Data 0.029 (0.036)	Loss 0.4523 (0.5357)	Prec@1 67.188 (56.675)
[290/391]	Time 0.086 (0.110)	Data 0.035 (0.036)	Loss 0.4883 (0.5351)	Prec@1 57.812 (56.714)
[300/391]	Time 0.116 (0.110)	Data 0.034 (0.036)	Loss 0.5202 (0.5357)	Prec@1 60.156 (56.668)
[310/391]	Time 0.116 (0.110)	Data 0.034 (0.036)	Loss 0.4599 (0.5359)	Prec@1 65.625 (56.657)
[320/391]	Time 0.099 (0.110)	Data 0.049 (0.036)	Loss 0.5465 (0.5364)	Prec@1 51.562 (56.581)
[330/391]	Time 0.154 (0.110)	Data 0.066 (0.036)	Loss 0.4964 (0.5366)	Prec@1 60.1

[180/391]	Time 0.108 (0.118)	Data 0.033 (0.040)	Loss 0.5972 (0.5378)	Prec@1 53.906 (56.474)
[190/391]	Time 0.096 (0.118)	Data 0.029 (0.040)	Loss 0.5897 (0.5390)	Prec@1 49.219 (56.405)
[200/391]	Time 0.110 (0.117)	Data 0.034 (0.040)	Loss 0.5855 (0.5382)	Prec@1 50.000 (56.452)
[210/391]	Time 0.108 (0.117)	Data 0.044 (0.040)	Loss 0.5271 (0.5384)	Prec@1 59.375 (56.435)
[220/391]	Time 0.086 (0.117)	Data 0.037 (0.040)	Loss 0.5392 (0.5379)	Prec@1 52.344 (56.451)
[230/391]	Time 0.096 (0.117)	Data 0.026 (0.040)	Loss 0.5198 (0.5382)	Prec@1 52.344 (56.433)
[240/391]	Time 0.128 (0.117)	Data 0.035 (0.040)	Loss 0.4970 (0.5382)	Prec@1 64.062 (56.445)
[250/391]	Time 0.112 (0.117)	Data 0.035 (0.040)	Loss 0.5549 (0.5390)	Prec@1 56.250 (56.434)
[260/391]	Time 0.126 (0.117)	Data 0.034 (0.040)	Loss 0.4941 (0.5389)	Prec@1 60.938 (56.445)
[270/391]	Time 0.097 (0.116)	Data 0.031 (0.040)	Loss 0.5512 (0.5391)	Prec@1 55.469 (56.443)
[280/391]	Time 0.120 (0.116)	Data 0.039 (0.040)	Loss 0.4980 (0.5394)	Prec@1 61.7

[130/391]	Time 0.075 (0.113)	Data 0.026 (0.037)	Loss 0.5456 (0.5304)	Prec@1 56.250 (57.037)
[140/391]	Time 0.112 (0.112)	Data 0.036 (0.037)	Loss 0.5303 (0.5301)	Prec@1 57.031 (57.076)
[150/391]	Time 0.127 (0.112)	Data 0.055 (0.037)	Loss 0.4882 (0.5307)	Prec@1 60.156 (57.016)
[160/391]	Time 0.110 (0.113)	Data 0.032 (0.038)	Loss 0.5694 (0.5314)	Prec@1 53.125 (57.017)
[170/391]	Time 0.101 (0.113)	Data 0.036 (0.038)	Loss 0.5379 (0.5317)	Prec@1 53.906 (56.976)
[180/391]	Time 0.103 (0.113)	Data 0.034 (0.037)	Loss 0.5859 (0.5326)	Prec@1 52.344 (56.910)
[190/391]	Time 0.093 (0.112)	Data 0.034 (0.037)	Loss 0.5027 (0.5334)	Prec@1 57.031 (56.786)
[200/391]	Time 0.089 (0.111)	Data 0.037 (0.037)	Loss 0.4988 (0.5330)	Prec@1 57.031 (56.748)
[210/391]	Time 0.083 (0.110)	Data 0.036 (0.037)	Loss 0.4571 (0.5328)	Prec@1 61.719 (56.750)
[220/391]	Time 0.148 (0.110)	Data 0.057 (0.037)	Loss 0.5742 (0.5337)	Prec@1 53.125 (56.550)
[230/391]	Time 0.131 (0.110)	Data 0.033 (0.037)	Loss 0.5442 (0.5339)	Prec@1 62.5

[80/391]	Time 0.093 (0.115)	Data 0.032 (0.039)	Loss 0.5625 (0.5365)	Prec@1 54.688 (56.327)
[90/391]	Time 0.103 (0.114)	Data 0.031 (0.038)	Loss 0.6158 (0.5355)	Prec@1 50.781 (56.362)
[100/391]	Time 0.075 (0.113)	Data 0.033 (0.038)	Loss 0.5692 (0.5344)	Prec@1 57.812 (56.459)
[110/391]	Time 0.120 (0.114)	Data 0.032 (0.038)	Loss 0.5874 (0.5355)	Prec@1 50.000 (56.349)
[120/391]	Time 0.074 (0.114)	Data 0.029 (0.039)	Loss 0.5172 (0.5328)	Prec@1 57.812 (56.612)
[130/391]	Time 0.091 (0.113)	Data 0.029 (0.038)	Loss 0.5694 (0.5324)	Prec@1 52.344 (56.578)
[140/391]	Time 0.132 (0.113)	Data 0.045 (0.038)	Loss 0.4989 (0.5320)	Prec@1 60.938 (56.616)
[150/391]	Time 0.097 (0.113)	Data 0.032 (0.038)	Loss 0.5471 (0.5339)	Prec@1 52.344 (56.498)
[160/391]	Time 0.100 (0.113)	Data 0.034 (0.038)	Loss 0.5173 (0.5345)	Prec@1 60.156 (56.546)
[170/391]	Time 0.089 (0.113)	Data 0.028 (0.038)	Loss 0.5333 (0.5351)	Prec@1 54.688 (56.469)
[180/391]	Time 0.085 (0.112)	Data 0.035 (0.038)	Loss 0.4974 (0.5350)	Prec@1 57.031

[30/391]	Time 0.149 (0.111)	Data 0.053 (0.037)	Loss 0.5590 (0.5251)	Prec@1 50.000 (57.082)
[40/391]	Time 0.100 (0.113)	Data 0.036 (0.037)	Loss 0.5375 (0.5265)	Prec@1 58.594 (57.279)
[50/391]	Time 0.119 (0.113)	Data 0.034 (0.037)	Loss 0.5302 (0.5254)	Prec@1 54.688 (57.384)
[60/391]	Time 0.113 (0.112)	Data 0.030 (0.037)	Loss 0.5772 (0.5260)	Prec@1 53.906 (57.300)
[70/391]	Time 0.088 (0.112)	Data 0.031 (0.036)	Loss 0.6057 (0.5287)	Prec@1 51.562 (57.152)
[80/391]	Time 0.119 (0.112)	Data 0.036 (0.036)	Loss 0.5268 (0.5306)	Prec@1 56.250 (57.002)
[90/391]	Time 0.135 (0.111)	Data 0.041 (0.037)	Loss 0.5235 (0.5311)	Prec@1 60.938 (56.945)
[100/391]	Time 0.102 (0.111)	Data 0.027 (0.036)	Loss 0.5084 (0.5318)	Prec@1 57.031 (57.000)
[110/391]	Time 0.122 (0.112)	Data 0.033 (0.037)	Loss 0.5739 (0.5327)	Prec@1 56.250 (56.989)
[120/391]	Time 0.092 (0.112)	Data 0.037 (0.037)	Loss 0.5053 (0.5316)	Prec@1 56.250 (57.064)
[130/391]	Time 0.128 (0.113)	Data 0.036 (0.037)	Loss 0.5579 (0.5317)	Prec@1 56.250 (57.

Test: [60/79]	Time 0.033 (0.037)	Loss 1.9387 (1.7318)	Prec@1 52.344 (51.793)
Test: [70/79]	Time 0.028 (0.036)	Loss 1.7821 (1.7323)	Prec@1 53.125 (51.783)
 * Prec@1 51.910

===> epoch: 128/200
current lr 1.00000e-02
Training:
[0/391]	Time 0.127 (0.127)	Data 0.033 (0.033)	Loss 0.5482 (0.5482)	Prec@1 54.688 (54.688)
[10/391]	Time 0.133 (0.111)	Data 0.037 (0.034)	Loss 0.5373 (0.5426)	Prec@1 53.906 (56.676)
[20/391]	Time 0.126 (0.107)	Data 0.032 (0.035)	Loss 0.5428 (0.5362)	Prec@1 57.031 (57.292)
[30/391]	Time 0.141 (0.113)	Data 0.048 (0.037)	Loss 0.4813 (0.5319)	Prec@1 63.281 (57.434)
[40/391]	Time 0.120 (0.114)	Data 0.029 (0.036)	Loss 0.4355 (0.5374)	Prec@1 69.531 (57.241)
[50/391]	Time 0.090 (0.114)	Data 0.033 (0.036)	Loss 0.5218 (0.5393)	Prec@1 51.562 (56.832)
[60/391]	Time 0.092 (0.113)	Data 0.033 (0.036)	Loss 0.5638 (0.5385)	Prec@1 48.438 (56.916)
[70/391]	Time 0.146 (0.113)	Data 0.048 (0.037)	Loss 0.5098 (0.5390)	Prec@1 58.594 (56.822)
[80/391]	Time 0.095 (0.113)	Data 0.036 (0.037)	L

Test: [10/79]	Time 0.032 (0.034)	Loss 1.7322 (1.6929)	Prec@1 48.438 (53.409)
Test: [20/79]	Time 0.029 (0.032)	Loss 1.7688 (1.6906)	Prec@1 48.438 (53.423)
Test: [30/79]	Time 0.022 (0.032)	Loss 1.5230 (1.6923)	Prec@1 53.125 (52.747)
Test: [40/79]	Time 0.036 (0.031)	Loss 1.4074 (1.7029)	Prec@1 56.250 (52.458)
Test: [50/79]	Time 0.030 (0.031)	Loss 1.7560 (1.6961)	Prec@1 53.125 (52.390)
Test: [60/79]	Time 0.039 (0.033)	Loss 1.9142 (1.7028)	Prec@1 52.344 (52.318)
Test: [70/79]	Time 0.039 (0.034)	Loss 1.7452 (1.7098)	Prec@1 61.719 (52.278)
 * Prec@1 52.330

===> epoch: 130/200
current lr 1.00000e-02
Training:
[0/391]	Time 0.113 (0.113)	Data 0.035 (0.035)	Loss 0.5312 (0.5312)	Prec@1 53.906 (53.906)
[10/391]	Time 0.105 (0.107)	Data 0.035 (0.036)	Loss 0.5501 (0.5171)	Prec@1 52.344 (58.949)
[20/391]	Time 0.103 (0.111)	Data 0.030 (0.038)	Loss 0.5040 (0.5066)	Prec@1 60.156 (59.189)
[30/391]	Time 0.125 (0.109)	Data 0.034 (0.036)	Loss 0.5577 (0.5177)	Prec@1 58.594 (57.863)
[40/391]	Time 0.096 (0.114)

[360/391]	Time 0.100 (0.111)	Data 0.033 (0.038)	Loss 0.4714 (0.5394)	Prec@1 64.062 (56.460)
[370/391]	Time 0.150 (0.111)	Data 0.067 (0.038)	Loss 0.4956 (0.5394)	Prec@1 59.375 (56.486)
[380/391]	Time 0.108 (0.112)	Data 0.024 (0.038)	Loss 0.5470 (0.5385)	Prec@1 57.031 (56.564)
[390/391]	Time 0.067 (0.112)	Data 0.017 (0.038)	Loss 0.5577 (0.5383)	Prec@1 51.250 (56.578)
Testing:
Test: [0/79]	Time 0.027 (0.027)	Loss 1.7826 (1.7826)	Prec@1 51.562 (51.562)
Test: [10/79]	Time 0.037 (0.037)	Loss 1.7149 (1.8009)	Prec@1 54.688 (52.486)
Test: [20/79]	Time 0.033 (0.038)	Loss 2.0827 (1.8108)	Prec@1 50.000 (52.083)
Test: [30/79]	Time 0.045 (0.039)	Loss 1.7041 (1.8219)	Prec@1 50.781 (50.907)
Test: [40/79]	Time 0.048 (0.039)	Loss 1.6844 (1.8266)	Prec@1 53.906 (50.877)
Test: [50/79]	Time 0.036 (0.039)	Loss 1.9957 (1.8266)	Prec@1 48.438 (50.873)
Test: [60/79]	Time 0.037 (0.039)	Loss 2.0776 (1.8288)	Prec@1 49.219 (50.832)
Test: [70/79]	Time 0.034 (0.039)	Loss 1.7809 (1.8302)	Prec@1 53.125 (50.693)
 * Prec@

[310/391]	Time 0.080 (0.114)	Data 0.031 (0.038)	Loss 0.5561 (0.5350)	Prec@1 53.906 (56.750)
[320/391]	Time 0.103 (0.114)	Data 0.029 (0.038)	Loss 0.4917 (0.5349)	Prec@1 56.250 (56.729)
[330/391]	Time 0.083 (0.114)	Data 0.031 (0.038)	Loss 0.5681 (0.5354)	Prec@1 49.219 (56.680)
[340/391]	Time 0.125 (0.114)	Data 0.038 (0.038)	Loss 0.5739 (0.5353)	Prec@1 52.344 (56.674)
[350/391]	Time 0.075 (0.113)	Data 0.027 (0.038)	Loss 0.6412 (0.5358)	Prec@1 44.531 (56.615)
[360/391]	Time 0.081 (0.113)	Data 0.025 (0.038)	Loss 0.5411 (0.5358)	Prec@1 54.688 (56.633)
[370/391]	Time 0.131 (0.113)	Data 0.038 (0.038)	Loss 0.5937 (0.5367)	Prec@1 52.344 (56.597)
[380/391]	Time 0.108 (0.113)	Data 0.034 (0.038)	Loss 0.5144 (0.5365)	Prec@1 67.188 (56.648)
[390/391]	Time 0.064 (0.113)	Data 0.018 (0.038)	Loss 0.5534 (0.5368)	Prec@1 45.000 (56.654)
Testing:
Test: [0/79]	Time 0.027 (0.027)	Loss 1.6029 (1.6029)	Prec@1 58.594 (58.594)
Test: [10/79]	Time 0.031 (0.033)	Loss 1.7213 (1.6894)	Prec@1 52.344 (55.185)
Test: [20/

[260/391]	Time 0.140 (0.112)	Data 0.065 (0.039)	Loss 0.5391 (0.5410)	Prec@1 57.812 (56.271)
[270/391]	Time 0.097 (0.111)	Data 0.030 (0.039)	Loss 0.5174 (0.5405)	Prec@1 54.688 (56.328)
[280/391]	Time 0.119 (0.111)	Data 0.040 (0.039)	Loss 0.5338 (0.5404)	Prec@1 56.250 (56.336)
[290/391]	Time 0.124 (0.111)	Data 0.032 (0.039)	Loss 0.5457 (0.5406)	Prec@1 57.031 (56.280)
[300/391]	Time 0.099 (0.111)	Data 0.035 (0.039)	Loss 0.5625 (0.5408)	Prec@1 58.594 (56.289)
[310/391]	Time 0.122 (0.111)	Data 0.027 (0.039)	Loss 0.5581 (0.5409)	Prec@1 50.781 (56.295)
[320/391]	Time 0.103 (0.111)	Data 0.036 (0.039)	Loss 0.6089 (0.5406)	Prec@1 53.125 (56.306)
[330/391]	Time 0.116 (0.111)	Data 0.049 (0.039)	Loss 0.5291 (0.5398)	Prec@1 59.375 (56.415)
[340/391]	Time 0.128 (0.112)	Data 0.044 (0.039)	Loss 0.5275 (0.5400)	Prec@1 59.375 (56.394)
[350/391]	Time 0.141 (0.112)	Data 0.057 (0.039)	Loss 0.5999 (0.5396)	Prec@1 54.688 (56.461)
[360/391]	Time 0.138 (0.112)	Data 0.047 (0.039)	Loss 0.5218 (0.5401)	Prec@1 59.3

[210/391]	Time 0.094 (0.112)	Data 0.028 (0.037)	Loss 0.4662 (0.5378)	Prec@1 57.812 (56.520)
[220/391]	Time 0.088 (0.112)	Data 0.035 (0.037)	Loss 0.5464 (0.5375)	Prec@1 56.250 (56.575)
[230/391]	Time 0.118 (0.111)	Data 0.033 (0.037)	Loss 0.5183 (0.5371)	Prec@1 61.719 (56.625)
[240/391]	Time 0.146 (0.112)	Data 0.051 (0.038)	Loss 0.6244 (0.5374)	Prec@1 50.000 (56.623)
[250/391]	Time 0.124 (0.112)	Data 0.028 (0.038)	Loss 0.4963 (0.5377)	Prec@1 60.156 (56.586)
[260/391]	Time 0.162 (0.112)	Data 0.049 (0.038)	Loss 0.5151 (0.5373)	Prec@1 55.469 (56.567)
[270/391]	Time 0.085 (0.112)	Data 0.042 (0.038)	Loss 0.5618 (0.5372)	Prec@1 51.562 (56.515)
[280/391]	Time 0.096 (0.113)	Data 0.032 (0.038)	Loss 0.5436 (0.5376)	Prec@1 57.812 (56.564)
[290/391]	Time 0.080 (0.113)	Data 0.024 (0.038)	Loss 0.4709 (0.5371)	Prec@1 62.500 (56.629)
[300/391]	Time 0.170 (0.113)	Data 0.073 (0.038)	Loss 0.5845 (0.5373)	Prec@1 50.781 (56.561)
[310/391]	Time 0.091 (0.114)	Data 0.034 (0.038)	Loss 0.5415 (0.5373)	Prec@1 50.7

[160/391]	Time 0.101 (0.113)	Data 0.037 (0.037)	Loss 0.5836 (0.5337)	Prec@1 57.812 (57.356)
[170/391]	Time 0.123 (0.113)	Data 0.034 (0.037)	Loss 0.5507 (0.5339)	Prec@1 57.031 (57.401)
[180/391]	Time 0.130 (0.113)	Data 0.043 (0.037)	Loss 0.6125 (0.5342)	Prec@1 48.438 (57.247)
[190/391]	Time 0.089 (0.113)	Data 0.036 (0.037)	Loss 0.5835 (0.5358)	Prec@1 56.250 (57.113)
[200/391]	Time 0.082 (0.112)	Data 0.036 (0.037)	Loss 0.5458 (0.5360)	Prec@1 60.156 (57.121)
[210/391]	Time 0.105 (0.113)	Data 0.034 (0.037)	Loss 0.5293 (0.5359)	Prec@1 53.125 (57.105)
[220/391]	Time 0.121 (0.113)	Data 0.037 (0.037)	Loss 0.5528 (0.5358)	Prec@1 55.469 (57.134)
[230/391]	Time 0.112 (0.113)	Data 0.059 (0.038)	Loss 0.5232 (0.5351)	Prec@1 54.688 (57.204)
[240/391]	Time 0.127 (0.113)	Data 0.072 (0.038)	Loss 0.5383 (0.5353)	Prec@1 58.594 (57.200)
[250/391]	Time 0.099 (0.112)	Data 0.039 (0.038)	Loss 0.5317 (0.5359)	Prec@1 59.375 (57.115)
[260/391]	Time 0.110 (0.112)	Data 0.034 (0.038)	Loss 0.5382 (0.5359)	Prec@1 57.0

[110/391]	Time 0.078 (0.116)	Data 0.027 (0.039)	Loss 0.5492 (0.5340)	Prec@1 56.250 (57.278)
[120/391]	Time 0.107 (0.116)	Data 0.039 (0.038)	Loss 0.4858 (0.5337)	Prec@1 57.031 (57.309)
[130/391]	Time 0.134 (0.116)	Data 0.035 (0.039)	Loss 0.5510 (0.5352)	Prec@1 60.156 (57.240)
[140/391]	Time 0.152 (0.117)	Data 0.059 (0.039)	Loss 0.5347 (0.5340)	Prec@1 59.375 (57.247)
[150/391]	Time 0.101 (0.116)	Data 0.054 (0.039)	Loss 0.5264 (0.5336)	Prec@1 58.594 (57.212)
[160/391]	Time 0.109 (0.115)	Data 0.046 (0.039)	Loss 0.5748 (0.5339)	Prec@1 57.812 (57.167)
[170/391]	Time 0.113 (0.115)	Data 0.035 (0.039)	Loss 0.5627 (0.5351)	Prec@1 54.688 (57.050)
[180/391]	Time 0.076 (0.115)	Data 0.030 (0.039)	Loss 0.5126 (0.5352)	Prec@1 57.812 (56.979)
[190/391]	Time 0.128 (0.115)	Data 0.038 (0.039)	Loss 0.5604 (0.5359)	Prec@1 53.906 (56.876)
[200/391]	Time 0.107 (0.115)	Data 0.043 (0.040)	Loss 0.5002 (0.5361)	Prec@1 62.500 (56.880)
[210/391]	Time 0.133 (0.115)	Data 0.040 (0.040)	Loss 0.5667 (0.5363)	Prec@1 57.0

[60/391]	Time 0.126 (0.114)	Data 0.032 (0.037)	Loss 0.5760 (0.5446)	Prec@1 57.031 (56.032)
[70/391]	Time 0.171 (0.116)	Data 0.076 (0.038)	Loss 0.5362 (0.5416)	Prec@1 53.906 (56.349)
[80/391]	Time 0.137 (0.115)	Data 0.047 (0.038)	Loss 0.6060 (0.5441)	Prec@1 61.719 (56.404)
[90/391]	Time 0.084 (0.113)	Data 0.028 (0.038)	Loss 0.6529 (0.5458)	Prec@1 43.750 (56.198)
[100/391]	Time 0.079 (0.114)	Data 0.034 (0.038)	Loss 0.5340 (0.5428)	Prec@1 57.812 (56.559)
[110/391]	Time 0.101 (0.113)	Data 0.039 (0.038)	Loss 0.5637 (0.5420)	Prec@1 54.688 (56.560)
[120/391]	Time 0.115 (0.114)	Data 0.030 (0.038)	Loss 0.4857 (0.5408)	Prec@1 58.594 (56.657)
[130/391]	Time 0.094 (0.113)	Data 0.027 (0.038)	Loss 0.4792 (0.5386)	Prec@1 60.156 (56.787)
[140/391]	Time 0.112 (0.114)	Data 0.034 (0.039)	Loss 0.5525 (0.5394)	Prec@1 57.031 (56.671)
[150/391]	Time 0.135 (0.114)	Data 0.034 (0.039)	Loss 0.4894 (0.5391)	Prec@1 64.062 (56.716)
[160/391]	Time 0.095 (0.115)	Data 0.032 (0.039)	Loss 0.4670 (0.5396)	Prec@1 65.625 (

[10/391]	Time 0.105 (0.106)	Data 0.037 (0.035)	Loss 0.5054 (0.5383)	Prec@1 57.812 (56.818)
[20/391]	Time 0.129 (0.110)	Data 0.034 (0.035)	Loss 0.5125 (0.5384)	Prec@1 62.500 (57.292)
[30/391]	Time 0.118 (0.112)	Data 0.034 (0.035)	Loss 0.5162 (0.5274)	Prec@1 60.156 (58.695)
[40/391]	Time 0.096 (0.111)	Data 0.031 (0.035)	Loss 0.5695 (0.5342)	Prec@1 53.125 (57.851)
[50/391]	Time 0.105 (0.111)	Data 0.030 (0.034)	Loss 0.5337 (0.5332)	Prec@1 60.156 (58.134)
[60/391]	Time 0.115 (0.110)	Data 0.038 (0.034)	Loss 0.4523 (0.5356)	Prec@1 64.062 (57.812)
[70/391]	Time 0.134 (0.111)	Data 0.038 (0.036)	Loss 0.4826 (0.5353)	Prec@1 62.500 (57.570)
[80/391]	Time 0.107 (0.110)	Data 0.027 (0.035)	Loss 0.4807 (0.5349)	Prec@1 57.031 (57.436)
[90/391]	Time 0.128 (0.111)	Data 0.031 (0.035)	Loss 0.5247 (0.5362)	Prec@1 60.156 (57.315)
[100/391]	Time 0.093 (0.111)	Data 0.031 (0.035)	Loss 0.5008 (0.5346)	Prec@1 58.594 (57.433)
[110/391]	Time 0.104 (0.111)	Data 0.030 (0.035)	Loss 0.5178 (0.5350)	Prec@1 59.375 (57.34

Test: [40/79]	Time 0.028 (0.038)	Loss 1.4470 (1.6781)	Prec@1 61.719 (53.563)
Test: [50/79]	Time 0.035 (0.037)	Loss 1.8787 (1.6798)	Prec@1 49.219 (53.278)
Test: [60/79]	Time 0.034 (0.036)	Loss 2.1127 (1.6861)	Prec@1 46.875 (53.112)
Test: [70/79]	Time 0.035 (0.036)	Loss 1.7445 (1.6857)	Prec@1 53.906 (53.202)
 * Prec@1 53.390

===> epoch: 147/200
current lr 1.00000e-02
Training:
[0/391]	Time 0.119 (0.119)	Data 0.056 (0.056)	Loss 0.5099 (0.5099)	Prec@1 56.250 (56.250)
[10/391]	Time 0.139 (0.129)	Data 0.077 (0.053)	Loss 0.4991 (0.5284)	Prec@1 61.719 (57.528)
[20/391]	Time 0.076 (0.125)	Data 0.026 (0.046)	Loss 0.5393 (0.5202)	Prec@1 50.781 (58.222)
[30/391]	Time 0.125 (0.118)	Data 0.032 (0.042)	Loss 0.5583 (0.5235)	Prec@1 54.688 (58.115)
[40/391]	Time 0.120 (0.117)	Data 0.033 (0.041)	Loss 0.5368 (0.5275)	Prec@1 53.906 (57.679)
[50/391]	Time 0.136 (0.117)	Data 0.049 (0.040)	Loss 0.5488 (0.5285)	Prec@1 54.688 (57.506)
[60/391]	Time 0.111 (0.115)	Data 0.026 (0.039)	Loss 0.5197 (0.5322)	Prec@1 5