In [None]:
import os
import time
import argparse
import numpy as np
from visdom import Visdom
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.autograd import Variable
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from utils import *
import dataset

# Teacher models:
# VGG11/VGG13/VGG16/VGG19, GoogLeNet, AlxNet, ResNet18, ResNet34, 
# ResNet50, ResNet101, ResNet152, ResNeXt29_2x64d, ResNeXt29_4x64d, 
# ResNeXt29_8x64d, ResNeXt29_32x64d, PreActResNet18, PreActResNet34, 
# PreActResNet50, PreActResNet101, PreActResNet152, 
# DenseNet121, DenseNet161, DenseNet169, DenseNet201, 
# resnet20:ResNet20, ResNet32, ResNet44, ResNet56, ResNet110, ResNet1202
import models

# Student models:
# myNet, LeNet, FitNet


# main
def main():
    parser = argparse.ArgumentParser(description='cifar model with PyTorch')
    parser.add_argument('--dataset',
                    choices=['MNIST',
                             'CIFAR10',
                             'CIFAR100',
                             'tinyimagenet'
                            ],
                    default='CIFAR10')
    parser.add_argument('--model',
                        choices=['ResNet8',
                                 'ResNet15',
                                 'ResNet16',
                                 'ResNet20',
                                 'ResNet32',
                                 'ResNet44',
                                 'ResNet56',
                                 'ResNet110',
                                 'ResNet34',
                                 'ResNet50',
                                 'ResNet101',
                                 'DenseNet121',
                                 'VGG19'
                                ],
                        default='ResNet110',
                        help='name of net to train')
    parser.add_argument('--n_class', default=10, type=int, help='num of classes')
    parser.add_argument('--lr', default=0.001, type=float, help='learning rate')
    parser.add_argument('--epoch', default=50, type=int, help='number of epoch tp train for') 
    parser.add_argument('--trainBatchSize', default=128, type=int, help='training batch size')
    parser.add_argument('--testBatchSize', default=128, type=int, help='testing batch size')
    parser.add_argument('--device', default='cuda:0', type=str, help='device: cuda or cpu')
    parser.add_argument('--print_freq', '-p', default=20, type=int, metavar='N', help='print frequency (default: 20)')
    parser.add_argument('--save_every', dest='save_every', type=int, default=10, help='Saves checkpoints at every specified number of epochs')
    parser.add_argument('--save_dir', default='./checkpoint/', type=str, help='model and log saving dir')
    
    config_list = ['--dataset', 'CIFAR100', '--model', 'ResNet15', '--n_class', '100', '--lr', '0.1', '--epoch', '200', 
                   '--trainBatchSize', '128', '--testBatchSize', '128', '--device', 'cuda:0']
    args = parser.parse_args(config_list)
    args.save_dir += args.dataset + '/'
    os.makedirs(args.save_dir, exist_ok=True)
    
    solver = Solver(args)
    start_time = time.time()
    solver.run()
    end_time = time.time()
    print('time cost: {:.2f}min'.format((end_time - start_time)/60))


# Solver
class Solver(object):
    def __init__(self, config):
        self.model = None
        self.name = config.model
        self.dataset = config.dataset
        self.n_class = config.n_class
        self.lr = config.lr
        self.epochs = config.epoch
        self.train_batch_size = config.trainBatchSize
        self.test_batch_size = config.testBatchSize
        self.criterion = None
        self.optimizer = None
        self.scheduler = None
        self.device = config.device if torch.cuda.is_available() else 'cpu'
        self.train_loader = None
        self.test_loader = None
        self.print_freq = config.print_freq
        self.save_every = config.save_every
        self.save_dir = config.save_dir
        self.logfile = self.save_dir + self.name + '.log'
        if os.path.exists(self.logfile):
            os.remove(self.logfile)

        
    def print_model(self):
        print(self.model)
        
    def log_out(self, info):
        f = open(self.logfile, mode='a')
        f.write(str(info))
        f.write('\n')
        f.close()
        print(info)
        
    def load_data(self):
        normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        train_transform = transforms.Compose([
            transforms.RandomHorizontalFlip(),
            transforms.RandomCrop(32, 4),
            transforms.ToTensor(),
            normalize,
        ])
        test_transform = transforms.Compose([transforms.ToTensor(), normalize])
        if self.dataset == 'tinyimagenet':
            train_set = dataset.TinyImageNet(root='../data/tiny-imagenet-200', transform=train_transform)
            test_set = dataset.TinyImageNet(root='../data/tiny-imagenet-200', transform=test_transform)
        else:
            train_set = getattr(datasets, self.dataset)(root='../data', train=True, download=True, transform=train_transform)
            test_set = getattr(datasets, self.dataset)(root='../data', train=False, download=False, transform=test_transform)
        self.train_loader = DataLoader(train_set, batch_size=self.train_batch_size, shuffle=True, num_workers=4, pin_memory=True)
        self.test_loader = DataLoader(test_set, batch_size=self.test_batch_size, shuffle=False, num_workers=4, pin_memory=True)
        
    def load_model(self):
        self.model = getattr(models, self.name)(num_classes=self.n_class)
        self.print_model()
        self.model = self.model.to(self.device)

        self.optimizer_sgd = optim.SGD(self.model.parameters(), lr=self.lr, momentum=0.9, weight_decay=5e-4)
        self.lr_scheduler = optim.lr_scheduler.MultiStepLR(self.optimizer_sgd, milestones=[100, 150])
        self.optimizer = optim.Adam(self.model.parameters(), lr=self.lr)
        self.scheduler = optim.lr_scheduler.MultiStepLR(self.optimizer, milestones=[75, 150], gamma=0.5)
        self.criterion = nn.CrossEntropyLoss().to(self.device)
    
    # train
    def train(self):
        print('Training:')
        # switch to train mode
        self.model.train()
        batch_time = AverageMeter()
        data_time = AverageMeter()
        losses = AverageMeter()
        top1 = AverageMeter()

        end = time.time()
        for i, (input, target) in enumerate(self.train_loader):

            # measure data loading time
            data_time.update(time.time() - end)

            input, target = input.to(self.device), target.to(self.device)

            # compute output
            self.optimizer_sgd.zero_grad()
            _,_,_,_,output = self.model(input)
            loss = self.criterion(output, target)

            # compute gradient and do SGD step
            loss.backward()
            self.optimizer_sgd.step()

            output = output.float()
            loss = loss.float()
            # measure accuracy and record loss
            train_acc = self.accuracy(output, target)[0]
            losses.update(loss.item(), input.size(0))
            top1.update(train_acc, input.size(0))

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

            if i % self.print_freq == 0:
                self.log_out('[{0}/{1}]\t'
                      'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                      'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                      'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                      'Prec@1 {top1.val:.3f} ({top1.avg:.3f})'.format(
                          i, len(self.train_loader), batch_time=batch_time,
                          data_time=data_time, loss=losses, top1=top1))
        return losses.avg, train_acc.cpu().numpy()
    
    # test
    def test(self):
        print('Testing:')
        # switch to evaluate mode
        self.model.eval()
        batch_time = AverageMeter()
        losses = AverageMeter()
        top1 = AverageMeter()
    
        end = time.time()
        with torch.no_grad():
            for i, (input, target) in enumerate(self.test_loader):
                input, target = input.to(self.device), target.to(self.device)

                # compute output
                _,_,_,_,output = self.model(input)
                loss = self.criterion(output, target)

                output = output.float()
                loss = loss.float()

                # measure accuracy and record loss
                test_acc = self.accuracy(output, target)[0]
                losses.update(loss.item(), input.size(0))
                top1.update(test_acc, input.size(0))

                # measure elapsed time
                batch_time.update(time.time() - end)
                end = time.time()

                if i % self.print_freq == 0:
                    self.log_out('Test: [{0}/{1}]\t'
                          'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                          'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                          'Prec@1 {top1.val:.3f} ({top1.avg:.3f})'.format(
                              i, len(self.test_loader), batch_time=batch_time, loss=losses,
                              top1=top1))

        self.log_out(' * Prec@1 {top1.avg:.3f}'.format(top1=top1))

        return losses.avg, test_acc.cpu().numpy(), top1.avg.cpu().numpy()

    def save_model(self, epoch=None):
        if epoch != None:
            model_out_path = self.save_dir + self.name + '_' + str(epoch) +  '.pth'
        else:
            model_out_path = self.save_dir + self.name + '.pth'
        torch.save(self.model.state_dict(), model_out_path)
        self.log_out("* Checkpoint saved to {}".format(model_out_path))
    
    # compute accuracy
    def accuracy(self, output, target, topk=(1,)):
        """Computes the precision@k for the specified values of k"""
        maxk = max(topk)
        batch_size = target.size(0)

        _, pred = output.topk(maxk, 1, True, True)
        pred = pred.t()
        correct = pred.eq(target.view(1, -1).expand_as(pred))

        res = []
        for k in topk:
            correct_k = correct[:k].view(-1).float().sum(0)
            res.append(correct_k.mul_(100.0 / batch_size))
        return res

    # run
    def run(self):
        start_time = time.time()
        self.log_out('*********{}********\n'.format(self.name))
        self.load_data()
        self.load_model() 
        accuracy = 0.
        
        vis = Visdom(env='training')
        loss_win = vis.line(
            X=np.array([0]),
            Y=np.array([0]),
            opts=dict(
                title=self.name + ' Loss',
                xlabel='epoch',
                xtickmin=0,
#                 xtickmax=1,
#                 xtickstep=5,
                ylabel='loss',
                ytickmin=0,
#                 ytickmax=1,
#                 ytickstep=0.5,
#                 markers=True,
#                 markersymbol='dot',
#                 markersize=5,
            ),
            name="loss"
        )
        
        acc_win = vis.line(
            X=np.column_stack((0, 0)),
            Y=np.column_stack((0, 0)),
            opts=dict(
                title=self.name + ' Acc',
                xlabel='epoch',
                xtickmin=0,
#                 xtickstep=5,
                ylabel='accuracy',
                ytickmin=0,
                ytickmax=100.0,
#                 ytickstep=5,
#                 markers=True,
#                 markersymbol='dot',
#                 markersize=5,
                legend=['train_acc', 'test_acc']
            ),
            name="acc"
        )
        
        best_acc = 0
        for epoch in range(1, self.epochs + 1):
            self.log_out("\n===> epoch: {}/{}".format(epoch, self.epochs))
            self.log_out('current lr {:.5e}'.format(self.optimizer_sgd.param_groups[0]['lr']))
            train_result = self.train()
            train_loss = train_result[0]
            train_acc = train_result[1]
            self.lr_scheduler.step()
            # visaulize loss
            vis.line(np.array([train_loss]), np.array([epoch]), loss_win, update="append")
#             self.log_out(train_result)
            test_result = self.test()
            test_acc = test_result[1]
            top1 = test_result[2]
#             best_acc = max(top1, best_acc)
            # save the best model.pth
            if top1 > best_acc:
                best_acc = top1
                if epoch > 150:
                    self.save_model()
            # visul accuracy
            vis.line(np.column_stack((train_acc, top1)), np.column_stack((epoch, epoch)), acc_win, update="append")
#             if epoch > 100 and epoch % self.save_every == 0:
#                 self.save_model(epoch)
        
        torch.cuda.empty_cache()
        self.log_out('\nBest ACC: {:.4f}%'.format(best_acc))
        self.log_out("--- {:.3f} mins ---".format((time.time() - start_time)/60))

# start 
if __name__ == '__main__':
    main()


*********ResNet15********

Files already downloaded and verified


  init.kaiming_normal(m.weight)


ResNet(
  (conv1): Conv2d(3, 8, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (bn1): BatchNorm2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(8, 8, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(8, 8, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (shortcut): Sequential()
    )
    (1): BasicBlock(
      (conv1): Conv2d(8, 8, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(8, 8, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(8, eps=1e-05, momentum=0.1, affine=True, track_ru




===> epoch: 1/200
current lr 1.00000e-01
Training:
[0/391]	Time 0.510 (0.510)	Data 0.387 (0.387)	Loss 5.9144 (5.9144)	Prec@1 0.000 (0.000)
[20/391]	Time 0.037 (0.081)	Data 0.000 (0.019)	Loss 4.5735 (4.8541)	Prec@1 3.125 (1.079)
[40/391]	Time 0.023 (0.068)	Data 0.000 (0.010)	Loss 4.5927 (4.7219)	Prec@1 1.562 (1.410)
[60/391]	Time 0.049 (0.062)	Data 0.000 (0.007)	Loss 4.5399 (4.6529)	Prec@1 2.344 (1.601)
[80/391]	Time 0.089 (0.062)	Data 0.000 (0.005)	Loss 4.4581 (4.6133)	Prec@1 1.562 (1.707)
[100/391]	Time 0.026 (0.060)	Data 0.001 (0.004)	Loss 4.4695 (4.5821)	Prec@1 2.344 (1.864)
[120/391]	Time 0.029 (0.060)	Data 0.000 (0.004)	Loss 4.4021 (4.5588)	Prec@1 3.125 (1.995)
[140/391]	Time 0.050 (0.060)	Data 0.000 (0.003)	Loss 4.4178 (4.5385)	Prec@1 3.125 (2.150)
[160/391]	Time 0.047 (0.060)	Data 0.001 (0.003)	Loss 4.4714 (4.5216)	Prec@1 3.125 (2.339)
[180/391]	Time 0.082 (0.059)	Data 0.000 (0.002)	Loss 4.3050 (4.5038)	Prec@1 4.688 (2.495)
[200/391]	Time 0.062 (0.059)	Data 0.000 (0.002)	Loss 4

[360/391]	Time 0.035 (0.059)	Data 0.000 (0.001)	Loss 3.1613 (3.1784)	Prec@1 21.094 (20.916)
[380/391]	Time 0.093 (0.059)	Data 0.000 (0.001)	Loss 3.0769 (3.1727)	Prec@1 19.531 (20.975)
Testing:
Test: [0/79]	Time 0.339 (0.339)	Loss 3.2194 (3.2194)	Prec@1 24.219 (24.219)
Test: [20/79]	Time 0.009 (0.034)	Loss 3.3545 (3.1930)	Prec@1 21.094 (21.540)
Test: [40/79]	Time 0.006 (0.024)	Loss 3.0847 (3.2131)	Prec@1 23.438 (20.884)
Test: [60/79]	Time 0.007 (0.019)	Loss 3.6187 (3.2256)	Prec@1 16.406 (20.838)
 * Prec@1 21.010

===> epoch: 5/200
current lr 1.00000e-01
Training:
[0/391]	Time 0.229 (0.229)	Data 0.201 (0.201)	Loss 3.1229 (3.1229)	Prec@1 23.438 (23.438)
[20/391]	Time 0.087 (0.058)	Data 0.000 (0.010)	Loss 3.1710 (3.0341)	Prec@1 18.750 (24.628)
[40/391]	Time 0.029 (0.058)	Data 0.000 (0.006)	Loss 2.8183 (3.0624)	Prec@1 28.125 (23.266)
[60/391]	Time 0.028 (0.059)	Data 0.000 (0.004)	Loss 3.1067 (3.0508)	Prec@1 23.438 (23.335)
[80/391]	Time 0.086 (0.060)	Data 0.000 (0.003)	Loss 2.9925 (3.0593)	

[220/391]	Time 0.090 (0.059)	Data 0.000 (0.002)	Loss 2.5900 (2.6065)	Prec@1 25.781 (31.738)
[240/391]	Time 0.034 (0.058)	Data 0.000 (0.001)	Loss 2.5809 (2.6025)	Prec@1 32.031 (31.824)
[260/391]	Time 0.043 (0.059)	Data 0.000 (0.001)	Loss 2.5376 (2.6022)	Prec@1 35.156 (31.837)
[280/391]	Time 0.029 (0.059)	Data 0.000 (0.001)	Loss 2.4976 (2.6055)	Prec@1 36.719 (31.806)
[300/391]	Time 0.097 (0.059)	Data 0.000 (0.001)	Loss 2.5505 (2.6086)	Prec@1 35.938 (31.785)
[320/391]	Time 0.030 (0.059)	Data 0.000 (0.001)	Loss 2.4501 (2.6051)	Prec@1 36.719 (31.851)
[340/391]	Time 0.073 (0.059)	Data 0.000 (0.001)	Loss 2.3948 (2.6053)	Prec@1 37.500 (31.903)
[360/391]	Time 0.031 (0.058)	Data 0.000 (0.001)	Loss 2.6915 (2.6036)	Prec@1 27.344 (31.904)
[380/391]	Time 0.055 (0.058)	Data 0.000 (0.001)	Loss 2.4573 (2.6003)	Prec@1 36.719 (31.990)
Testing:
Test: [0/79]	Time 0.246 (0.246)	Loss 2.5863 (2.5863)	Prec@1 35.156 (35.156)
Test: [20/79]	Time 0.006 (0.025)	Loss 2.8938 (2.7093)	Prec@1 30.469 (30.841)
Test: [40/

[80/391]	Time 0.023 (0.059)	Data 0.000 (0.003)	Loss 2.4076 (2.3130)	Prec@1 31.250 (38.378)
[100/391]	Time 0.043 (0.059)	Data 0.000 (0.002)	Loss 2.3065 (2.3040)	Prec@1 35.156 (38.420)
[120/391]	Time 0.047 (0.058)	Data 0.000 (0.002)	Loss 2.4843 (2.3117)	Prec@1 31.250 (37.984)
[140/391]	Time 0.033 (0.057)	Data 0.000 (0.002)	Loss 2.4944 (2.3182)	Prec@1 30.469 (37.921)
[160/391]	Time 0.094 (0.058)	Data 0.000 (0.002)	Loss 2.3425 (2.3239)	Prec@1 36.719 (37.723)
[180/391]	Time 0.090 (0.058)	Data 0.000 (0.001)	Loss 2.4434 (2.3212)	Prec@1 31.250 (37.776)
[200/391]	Time 0.048 (0.058)	Data 0.000 (0.001)	Loss 2.4954 (2.3270)	Prec@1 37.500 (37.683)
[220/391]	Time 0.032 (0.058)	Data 0.000 (0.001)	Loss 2.4148 (2.3336)	Prec@1 32.812 (37.557)
[240/391]	Time 0.053 (0.058)	Data 0.000 (0.001)	Loss 2.3272 (2.3354)	Prec@1 39.062 (37.643)
[260/391]	Time 0.030 (0.058)	Data 0.000 (0.001)	Loss 2.3824 (2.3348)	Prec@1 37.500 (37.710)
[280/391]	Time 0.089 (0.058)	Data 0.000 (0.001)	Loss 2.3237 (2.3335)	Prec@1 41.40

Test: [20/79]	Time 0.013 (0.033)	Loss 2.6005 (2.4683)	Prec@1 36.719 (35.491)
Test: [40/79]	Time 0.013 (0.024)	Loss 2.1406 (2.4894)	Prec@1 46.875 (35.137)
Test: [60/79]	Time 0.017 (0.021)	Loss 2.9140 (2.4998)	Prec@1 34.375 (35.464)
 * Prec@1 35.510

===> epoch: 16/200
current lr 1.00000e-01
Training:
[0/391]	Time 0.377 (0.377)	Data 0.327 (0.327)	Loss 2.1643 (2.1643)	Prec@1 38.281 (38.281)
[20/391]	Time 0.078 (0.071)	Data 0.000 (0.016)	Loss 1.9624 (2.1227)	Prec@1 47.656 (41.890)
[40/391]	Time 0.098 (0.067)	Data 0.000 (0.008)	Loss 2.1922 (2.1550)	Prec@1 40.625 (41.387)
[60/391]	Time 0.071 (0.065)	Data 0.006 (0.006)	Loss 2.3098 (2.1691)	Prec@1 36.719 (41.253)
[80/391]	Time 0.035 (0.064)	Data 0.000 (0.004)	Loss 2.2524 (2.1819)	Prec@1 40.625 (41.194)
[100/391]	Time 0.055 (0.063)	Data 0.000 (0.004)	Loss 2.1989 (2.1835)	Prec@1 40.625 (41.136)
[120/391]	Time 0.050 (0.062)	Data 0.000 (0.003)	Loss 2.1102 (2.1872)	Prec@1 42.969 (41.038)
[140/391]	Time 0.090 (0.061)	Data 0.000 (0.003)	Loss 2.0688 (

[280/391]	Time 0.053 (0.057)	Data 0.000 (0.002)	Loss 2.0605 (2.1314)	Prec@1 46.875 (42.168)
[300/391]	Time 0.029 (0.057)	Data 0.000 (0.002)	Loss 2.1969 (2.1320)	Prec@1 34.375 (42.151)
[320/391]	Time 0.089 (0.057)	Data 0.000 (0.002)	Loss 2.3874 (2.1312)	Prec@1 32.812 (42.153)
[340/391]	Time 0.038 (0.057)	Data 0.000 (0.002)	Loss 2.0972 (2.1324)	Prec@1 45.312 (42.144)
[360/391]	Time 0.094 (0.058)	Data 0.000 (0.002)	Loss 1.9238 (2.1326)	Prec@1 48.438 (42.136)
[380/391]	Time 0.094 (0.058)	Data 0.000 (0.002)	Loss 2.2247 (2.1322)	Prec@1 39.062 (42.138)
Testing:
Test: [0/79]	Time 0.248 (0.248)	Loss 2.6207 (2.6207)	Prec@1 37.500 (37.500)
Test: [20/79]	Time 0.008 (0.025)	Loss 2.6453 (2.5781)	Prec@1 35.938 (36.756)
Test: [40/79]	Time 0.023 (0.019)	Loss 2.5018 (2.6165)	Prec@1 40.625 (35.804)
Test: [60/79]	Time 0.024 (0.018)	Loss 3.0024 (2.6235)	Prec@1 28.125 (34.977)
 * Prec@1 35.090

===> epoch: 20/200
current lr 1.00000e-01
Training:
[0/391]	Time 0.362 (0.362)	Data 0.286 (0.286)	Loss 2.1959 (2.1

[140/391]	Time 0.026 (0.055)	Data 0.000 (0.001)	Loss 2.2323 (2.0577)	Prec@1 37.500 (44.016)
[160/391]	Time 0.031 (0.054)	Data 0.000 (0.001)	Loss 2.0418 (2.0683)	Prec@1 42.188 (43.701)
[180/391]	Time 0.090 (0.054)	Data 0.000 (0.001)	Loss 1.8613 (2.0660)	Prec@1 53.125 (43.703)
[200/391]	Time 0.049 (0.054)	Data 0.000 (0.001)	Loss 1.9650 (2.0664)	Prec@1 44.531 (43.626)
[220/391]	Time 0.092 (0.054)	Data 0.001 (0.001)	Loss 2.2080 (2.0674)	Prec@1 44.531 (43.651)
[240/391]	Time 0.032 (0.054)	Data 0.000 (0.001)	Loss 2.2509 (2.0691)	Prec@1 34.375 (43.598)
[260/391]	Time 0.038 (0.055)	Data 0.000 (0.001)	Loss 2.2411 (2.0680)	Prec@1 39.062 (43.588)
[280/391]	Time 0.033 (0.055)	Data 0.001 (0.001)	Loss 1.8723 (2.0645)	Prec@1 46.094 (43.667)
[300/391]	Time 0.031 (0.056)	Data 0.000 (0.001)	Loss 1.9318 (2.0691)	Prec@1 50.000 (43.529)
[320/391]	Time 0.039 (0.056)	Data 0.000 (0.001)	Loss 2.0445 (2.0696)	Prec@1 50.000 (43.497)
[340/391]	Time 0.125 (0.056)	Data 0.000 (0.001)	Loss 2.0424 (2.0702)	Prec@1 42.1

[0/391]	Time 0.223 (0.223)	Data 0.173 (0.173)	Loss 2.0875 (2.0875)	Prec@1 45.312 (45.312)
[20/391]	Time 0.051 (0.065)	Data 0.000 (0.009)	Loss 1.7028 (2.0198)	Prec@1 51.562 (45.499)
[40/391]	Time 0.070 (0.060)	Data 0.000 (0.005)	Loss 2.2185 (2.0245)	Prec@1 42.969 (44.989)
[60/391]	Time 0.030 (0.060)	Data 0.000 (0.003)	Loss 1.9993 (2.0234)	Prec@1 42.188 (44.531)
[80/391]	Time 0.032 (0.059)	Data 0.000 (0.002)	Loss 2.0397 (2.0347)	Prec@1 36.719 (44.348)
[100/391]	Time 0.034 (0.059)	Data 0.000 (0.002)	Loss 2.0350 (2.0308)	Prec@1 42.188 (44.624)
[120/391]	Time 0.066 (0.059)	Data 0.000 (0.002)	Loss 1.9499 (2.0163)	Prec@1 44.531 (44.835)
[140/391]	Time 0.042 (0.059)	Data 0.000 (0.002)	Loss 2.2466 (2.0228)	Prec@1 37.500 (44.775)
[160/391]	Time 0.034 (0.058)	Data 0.000 (0.001)	Loss 2.1788 (2.0236)	Prec@1 44.531 (44.895)
[180/391]	Time 0.028 (0.057)	Data 0.000 (0.001)	Loss 1.8550 (2.0281)	Prec@1 47.656 (44.652)
[200/391]	Time 0.025 (0.056)	Data 0.000 (0.001)	Loss 1.9990 (2.0335)	Prec@1 50.781 (44

[340/391]	Time 0.048 (0.059)	Data 0.000 (0.002)	Loss 1.9010 (2.0226)	Prec@1 43.750 (44.822)
[360/391]	Time 0.034 (0.059)	Data 0.000 (0.002)	Loss 2.0266 (2.0223)	Prec@1 39.844 (44.800)
[380/391]	Time 0.078 (0.058)	Data 0.000 (0.002)	Loss 2.3218 (2.0230)	Prec@1 38.281 (44.788)
Testing:
Test: [0/79]	Time 0.264 (0.264)	Loss 2.1192 (2.1192)	Prec@1 43.750 (43.750)
Test: [20/79]	Time 0.006 (0.026)	Loss 2.3686 (2.2333)	Prec@1 39.062 (41.332)
Test: [40/79]	Time 0.012 (0.020)	Loss 2.0720 (2.2477)	Prec@1 48.438 (41.311)
Test: [60/79]	Time 0.013 (0.019)	Loss 2.7179 (2.2587)	Prec@1 36.719 (40.881)
 * Prec@1 40.770

===> epoch: 31/200
current lr 1.00000e-01
Training:
[0/391]	Time 0.312 (0.312)	Data 0.230 (0.230)	Loss 2.1101 (2.1101)	Prec@1 42.969 (42.969)
[20/391]	Time 0.031 (0.067)	Data 0.000 (0.011)	Loss 2.0952 (2.0139)	Prec@1 42.188 (43.899)
[40/391]	Time 0.065 (0.064)	Data 0.000 (0.006)	Loss 2.1371 (2.0033)	Prec@1 45.312 (44.607)
[60/391]	Time 0.095 (0.062)	Data 0.000 (0.004)	Loss 2.0014 (2.0116

[200/391]	Time 0.051 (0.057)	Data 0.000 (0.002)	Loss 2.1278 (1.9958)	Prec@1 42.969 (45.231)
[220/391]	Time 0.025 (0.057)	Data 0.000 (0.002)	Loss 2.2059 (1.9958)	Prec@1 37.500 (45.228)
[240/391]	Time 0.074 (0.056)	Data 0.000 (0.002)	Loss 1.9822 (2.0004)	Prec@1 42.969 (45.154)
[260/391]	Time 0.065 (0.056)	Data 0.000 (0.002)	Loss 1.8174 (1.9999)	Prec@1 53.125 (45.178)
[280/391]	Time 0.035 (0.056)	Data 0.000 (0.002)	Loss 1.9907 (2.0015)	Prec@1 49.219 (45.146)
[300/391]	Time 0.038 (0.056)	Data 0.000 (0.002)	Loss 2.0100 (2.0047)	Prec@1 47.656 (45.066)
[320/391]	Time 0.060 (0.056)	Data 0.001 (0.002)	Loss 2.2558 (2.0063)	Prec@1 42.188 (45.016)
[340/391]	Time 0.052 (0.056)	Data 0.000 (0.002)	Loss 2.1186 (2.0048)	Prec@1 47.656 (45.072)
[360/391]	Time 0.060 (0.056)	Data 0.000 (0.001)	Loss 2.0151 (2.0038)	Prec@1 45.312 (45.118)
[380/391]	Time 0.035 (0.056)	Data 0.000 (0.001)	Loss 2.2114 (2.0036)	Prec@1 45.312 (45.200)
Testing:
Test: [0/79]	Time 0.319 (0.319)	Loss 2.3072 (2.3072)	Prec@1 39.844 (39.

[60/391]	Time 0.072 (0.062)	Data 0.001 (0.008)	Loss 2.0730 (1.9571)	Prec@1 39.844 (46.119)
[80/391]	Time 0.041 (0.059)	Data 0.000 (0.006)	Loss 1.8405 (1.9590)	Prec@1 46.875 (46.200)
[100/391]	Time 0.071 (0.058)	Data 0.000 (0.005)	Loss 1.9438 (1.9614)	Prec@1 44.531 (46.071)
[120/391]	Time 0.089 (0.058)	Data 0.000 (0.004)	Loss 2.1234 (1.9564)	Prec@1 46.875 (46.165)
[140/391]	Time 0.021 (0.057)	Data 0.000 (0.004)	Loss 2.1091 (1.9709)	Prec@1 42.969 (45.883)
[160/391]	Time 0.036 (0.057)	Data 0.000 (0.003)	Loss 1.9037 (1.9767)	Prec@1 42.969 (45.798)
[180/391]	Time 0.050 (0.057)	Data 0.000 (0.003)	Loss 1.9009 (1.9822)	Prec@1 50.000 (45.541)
[200/391]	Time 0.073 (0.056)	Data 0.000 (0.003)	Loss 2.2513 (1.9847)	Prec@1 38.281 (45.511)
[220/391]	Time 0.039 (0.056)	Data 0.000 (0.002)	Loss 1.9541 (1.9848)	Prec@1 50.000 (45.585)
[240/391]	Time 0.044 (0.056)	Data 0.000 (0.002)	Loss 1.7317 (1.9860)	Prec@1 55.469 (45.572)
[260/391]	Time 0.030 (0.056)	Data 0.000 (0.002)	Loss 1.8670 (1.9855)	Prec@1 49.219

Testing:
Test: [0/79]	Time 0.327 (0.327)	Loss 2.5386 (2.5386)	Prec@1 32.812 (32.812)
Test: [20/79]	Time 0.039 (0.032)	Loss 2.9842 (2.6136)	Prec@1 25.000 (33.668)
Test: [40/79]	Time 0.016 (0.024)	Loss 2.1876 (2.5994)	Prec@1 38.281 (34.127)
Test: [60/79]	Time 0.006 (0.020)	Loss 2.7602 (2.6233)	Prec@1 33.594 (34.324)
 * Prec@1 34.640

===> epoch: 42/200
current lr 1.00000e-01
Training:
[0/391]	Time 0.347 (0.347)	Data 0.246 (0.246)	Loss 2.1182 (2.1182)	Prec@1 43.750 (43.750)
[20/391]	Time 0.037 (0.065)	Data 0.000 (0.012)	Loss 1.9205 (1.9830)	Prec@1 49.219 (44.680)
[40/391]	Time 0.048 (0.052)	Data 0.000 (0.006)	Loss 1.7994 (1.9576)	Prec@1 53.125 (45.998)
[60/391]	Time 0.025 (0.048)	Data 0.000 (0.004)	Loss 1.7895 (1.9556)	Prec@1 46.094 (46.311)
[80/391]	Time 0.039 (0.047)	Data 0.000 (0.003)	Loss 1.8640 (1.9372)	Prec@1 46.094 (46.701)
[100/391]	Time 0.114 (0.046)	Data 0.021 (0.003)	Loss 1.7211 (1.9401)	Prec@1 48.438 (46.481)
[120/391]	Time 0.033 (0.046)	Data 0.000 (0.003)	Loss 1.9685 (1.9375)

[260/391]	Time 0.026 (0.056)	Data 0.000 (0.002)	Loss 1.8372 (1.9570)	Prec@1 56.250 (46.300)
[280/391]	Time 0.076 (0.056)	Data 0.000 (0.002)	Loss 1.8879 (1.9532)	Prec@1 49.219 (46.425)
[300/391]	Time 0.052 (0.056)	Data 0.000 (0.002)	Loss 1.9267 (1.9519)	Prec@1 49.219 (46.475)
[320/391]	Time 0.078 (0.055)	Data 0.000 (0.001)	Loss 1.7260 (1.9570)	Prec@1 48.438 (46.369)
[340/391]	Time 0.071 (0.055)	Data 0.000 (0.001)	Loss 1.8647 (1.9567)	Prec@1 46.875 (46.481)
[360/391]	Time 0.045 (0.056)	Data 0.000 (0.001)	Loss 2.0922 (1.9592)	Prec@1 42.188 (46.444)
[380/391]	Time 0.048 (0.055)	Data 0.000 (0.001)	Loss 2.0148 (1.9592)	Prec@1 45.312 (46.440)
Testing:
Test: [0/79]	Time 0.354 (0.354)	Loss 2.7959 (2.7959)	Prec@1 32.031 (32.031)
Test: [20/79]	Time 0.030 (0.032)	Loss 3.0693 (2.8728)	Prec@1 32.031 (32.664)
Test: [40/79]	Time 0.025 (0.024)	Loss 2.5565 (2.8383)	Prec@1 39.844 (33.175)
Test: [60/79]	Time 0.010 (0.022)	Loss 2.8496 (2.8282)	Prec@1 35.938 (33.222)
 * Prec@1 33.190

===> epoch: 46/200
cur

[120/391]	Time 0.068 (0.056)	Data 0.000 (0.002)	Loss 1.7647 (1.9397)	Prec@1 50.781 (46.785)
[140/391]	Time 0.088 (0.056)	Data 0.000 (0.001)	Loss 1.9060 (1.9319)	Prec@1 46.875 (46.947)
[160/391]	Time 0.048 (0.055)	Data 0.000 (0.001)	Loss 2.0715 (1.9334)	Prec@1 46.094 (46.909)
[180/391]	Time 0.049 (0.055)	Data 0.001 (0.001)	Loss 2.2159 (1.9348)	Prec@1 46.094 (47.039)
[200/391]	Time 0.061 (0.055)	Data 0.000 (0.001)	Loss 1.9976 (1.9354)	Prec@1 44.531 (47.054)
[220/391]	Time 0.052 (0.055)	Data 0.000 (0.001)	Loss 2.0638 (1.9384)	Prec@1 41.406 (47.038)
[240/391]	Time 0.052 (0.055)	Data 0.000 (0.001)	Loss 1.9614 (1.9373)	Prec@1 42.969 (46.969)
[260/391]	Time 0.078 (0.055)	Data 0.000 (0.001)	Loss 2.2395 (1.9364)	Prec@1 42.188 (47.058)
[280/391]	Time 0.046 (0.055)	Data 0.000 (0.001)	Loss 1.8822 (1.9354)	Prec@1 42.969 (47.081)
[300/391]	Time 0.022 (0.055)	Data 0.000 (0.001)	Loss 2.0048 (1.9395)	Prec@1 43.750 (46.953)
[320/391]	Time 0.081 (0.055)	Data 0.000 (0.001)	Loss 1.8650 (1.9443)	Prec@1 43.7

 * Prec@1 33.550

===> epoch: 53/200
current lr 1.00000e-01
Training:
[0/391]	Time 0.247 (0.247)	Data 0.205 (0.205)	Loss 1.6605 (1.6605)	Prec@1 51.562 (51.562)
[20/391]	Time 0.027 (0.048)	Data 0.000 (0.010)	Loss 2.2460 (1.8674)	Prec@1 34.375 (47.731)
[40/391]	Time 0.047 (0.048)	Data 0.000 (0.005)	Loss 1.9515 (1.9034)	Prec@1 47.656 (47.771)
[60/391]	Time 0.064 (0.051)	Data 0.000 (0.004)	Loss 1.8498 (1.8927)	Prec@1 46.875 (48.181)
[80/391]	Time 0.083 (0.051)	Data 0.000 (0.003)	Loss 2.0157 (1.9011)	Prec@1 42.969 (48.003)
[100/391]	Time 0.055 (0.052)	Data 0.000 (0.003)	Loss 1.6936 (1.8988)	Prec@1 55.469 (48.028)
[120/391]	Time 0.053 (0.053)	Data 0.000 (0.002)	Loss 1.8463 (1.9015)	Prec@1 46.875 (47.695)
[140/391]	Time 0.024 (0.054)	Data 0.000 (0.002)	Loss 1.9299 (1.9010)	Prec@1 45.312 (47.822)
[160/391]	Time 0.036 (0.054)	Data 0.000 (0.002)	Loss 2.2193 (1.9106)	Prec@1 40.625 (47.486)
[180/391]	Time 0.033 (0.054)	Data 0.000 (0.002)	Loss 2.0824 (1.9182)	Prec@1 40.625 (47.389)
[200/391]	Time 0

[320/391]	Time 0.066 (0.055)	Data 0.000 (0.002)	Loss 1.8204 (1.9334)	Prec@1 50.000 (46.834)
[340/391]	Time 0.044 (0.054)	Data 0.000 (0.002)	Loss 1.8576 (1.9320)	Prec@1 46.094 (46.829)
[360/391]	Time 0.044 (0.054)	Data 0.000 (0.002)	Loss 2.0222 (1.9348)	Prec@1 45.312 (46.767)
[380/391]	Time 0.032 (0.054)	Data 0.000 (0.001)	Loss 1.9116 (1.9345)	Prec@1 47.656 (46.744)
Testing:
Test: [0/79]	Time 0.290 (0.290)	Loss 2.2219 (2.2219)	Prec@1 45.312 (45.312)
Test: [20/79]	Time 0.016 (0.031)	Loss 2.5576 (2.3723)	Prec@1 33.594 (38.579)
Test: [40/79]	Time 0.006 (0.024)	Loss 2.2409 (2.3694)	Prec@1 42.188 (37.938)
Test: [60/79]	Time 0.008 (0.022)	Loss 2.7963 (2.3688)	Prec@1 31.250 (37.807)
 * Prec@1 37.570

===> epoch: 57/200
current lr 1.00000e-01
Training:
[0/391]	Time 0.421 (0.421)	Data 0.388 (0.388)	Loss 2.1763 (2.1763)	Prec@1 41.406 (41.406)
[20/391]	Time 0.031 (0.073)	Data 0.000 (0.019)	Loss 1.8959 (1.9441)	Prec@1 55.469 (46.503)
[40/391]	Time 0.043 (0.066)	Data 0.000 (0.010)	Loss 1.9659 (1.908

[180/391]	Time 0.032 (0.054)	Data 0.000 (0.003)	Loss 1.9159 (1.9143)	Prec@1 44.531 (47.341)
[200/391]	Time 0.038 (0.054)	Data 0.000 (0.003)	Loss 1.9438 (1.9153)	Prec@1 43.750 (47.330)
[220/391]	Time 0.028 (0.054)	Data 0.000 (0.002)	Loss 1.9747 (1.9151)	Prec@1 45.312 (47.335)
[240/391]	Time 0.063 (0.054)	Data 0.000 (0.002)	Loss 1.9352 (1.9160)	Prec@1 50.781 (47.433)
[260/391]	Time 0.054 (0.054)	Data 0.000 (0.002)	Loss 2.1120 (1.9199)	Prec@1 40.625 (47.327)
[280/391]	Time 0.091 (0.054)	Data 0.000 (0.002)	Loss 1.6334 (1.9227)	Prec@1 55.469 (47.172)
[300/391]	Time 0.088 (0.054)	Data 0.000 (0.002)	Loss 2.1323 (1.9260)	Prec@1 40.625 (47.054)
[320/391]	Time 0.033 (0.054)	Data 0.000 (0.002)	Loss 1.9089 (1.9295)	Prec@1 49.219 (46.912)
[340/391]	Time 0.033 (0.054)	Data 0.000 (0.002)	Loss 1.9589 (1.9298)	Prec@1 45.312 (46.932)
[360/391]	Time 0.089 (0.054)	Data 0.000 (0.002)	Loss 2.1352 (1.9313)	Prec@1 50.000 (46.847)
[380/391]	Time 0.037 (0.053)	Data 0.000 (0.002)	Loss 1.8399 (1.9326)	Prec@1 50.0

[40/391]	Time 0.066 (0.059)	Data 0.000 (0.010)	Loss 2.0671 (1.9097)	Prec@1 45.312 (47.561)
[60/391]	Time 0.063 (0.053)	Data 0.000 (0.007)	Loss 1.8260 (1.9200)	Prec@1 45.312 (47.323)
[80/391]	Time 0.021 (0.049)	Data 0.000 (0.005)	Loss 1.7288 (1.8993)	Prec@1 53.125 (47.618)
[100/391]	Time 0.100 (0.050)	Data 0.000 (0.004)	Loss 1.8361 (1.8896)	Prec@1 50.000 (47.633)
[120/391]	Time 0.071 (0.052)	Data 0.000 (0.004)	Loss 2.0468 (1.9023)	Prec@1 46.875 (47.437)
[140/391]	Time 0.073 (0.053)	Data 0.000 (0.003)	Loss 1.6540 (1.8979)	Prec@1 55.469 (47.518)
[160/391]	Time 0.062 (0.053)	Data 0.000 (0.003)	Loss 2.1101 (1.9036)	Prec@1 45.312 (47.428)
[180/391]	Time 0.042 (0.053)	Data 0.000 (0.003)	Loss 1.8849 (1.9049)	Prec@1 48.438 (47.384)
[200/391]	Time 0.038 (0.054)	Data 0.000 (0.002)	Loss 2.1555 (1.9055)	Prec@1 42.188 (47.365)
[220/391]	Time 0.057 (0.054)	Data 0.000 (0.002)	Loss 1.6001 (1.9131)	Prec@1 55.469 (47.197)
[240/391]	Time 0.045 (0.055)	Data 0.000 (0.002)	Loss 1.8182 (1.9143)	Prec@1 48.438 

[380/391]	Time 0.090 (0.057)	Data 0.000 (0.001)	Loss 2.1804 (1.9252)	Prec@1 41.406 (46.707)
Testing:
Test: [0/79]	Time 0.253 (0.253)	Loss 2.5252 (2.5252)	Prec@1 42.969 (42.969)
Test: [20/79]	Time 0.007 (0.032)	Loss 2.6400 (2.5167)	Prec@1 34.375 (37.723)
Test: [40/79]	Time 0.006 (0.022)	Loss 2.3108 (2.5100)	Prec@1 38.281 (37.481)
Test: [60/79]	Time 0.010 (0.019)	Loss 3.0905 (2.5405)	Prec@1 32.812 (37.001)
 * Prec@1 37.220

===> epoch: 68/200
current lr 1.00000e-01
Training:
[0/391]	Time 0.315 (0.315)	Data 0.261 (0.261)	Loss 1.8167 (1.8167)	Prec@1 57.812 (57.812)
[20/391]	Time 0.052 (0.082)	Data 0.000 (0.021)	Loss 1.8417 (1.9121)	Prec@1 47.656 (47.805)
[40/391]	Time 0.077 (0.070)	Data 0.000 (0.011)	Loss 1.9561 (1.9267)	Prec@1 50.781 (47.599)
[60/391]	Time 0.051 (0.063)	Data 0.000 (0.007)	Loss 1.7518 (1.9096)	Prec@1 46.875 (47.707)
[80/391]	Time 0.054 (0.058)	Data 0.000 (0.006)	Loss 2.2515 (1.9181)	Prec@1 39.062 (47.328)
[100/391]	Time 0.024 (0.055)	Data 0.000 (0.005)	Loss 1.9151 (1.9108)

[240/391]	Time 0.055 (0.052)	Data 0.000 (0.001)	Loss 1.9607 (1.9156)	Prec@1 46.094 (47.316)
[260/391]	Time 0.020 (0.051)	Data 0.000 (0.001)	Loss 2.0438 (1.9166)	Prec@1 45.312 (47.258)
[280/391]	Time 0.035 (0.051)	Data 0.000 (0.001)	Loss 2.0514 (1.9164)	Prec@1 40.625 (47.250)
[300/391]	Time 0.042 (0.051)	Data 0.003 (0.001)	Loss 2.0173 (1.9172)	Prec@1 46.094 (47.270)
[320/391]	Time 0.094 (0.052)	Data 0.000 (0.001)	Loss 1.9412 (1.9156)	Prec@1 48.438 (47.303)
[340/391]	Time 0.112 (0.052)	Data 0.015 (0.001)	Loss 1.9964 (1.9155)	Prec@1 40.625 (47.290)
[360/391]	Time 0.053 (0.052)	Data 0.000 (0.001)	Loss 1.8010 (1.9135)	Prec@1 50.000 (47.358)
[380/391]	Time 0.032 (0.053)	Data 0.000 (0.001)	Loss 1.8165 (1.9160)	Prec@1 49.219 (47.277)
Testing:
Test: [0/79]	Time 0.320 (0.320)	Loss 2.4000 (2.4000)	Prec@1 40.625 (40.625)
Test: [20/79]	Time 0.024 (0.036)	Loss 2.5803 (2.4902)	Prec@1 39.062 (38.244)
Test: [40/79]	Time 0.011 (0.024)	Loss 2.1872 (2.4554)	Prec@1 45.312 (38.777)
Test: [60/79]	Time 0.012 

[100/391]	Time 0.052 (0.060)	Data 0.000 (0.003)	Loss 1.7497 (1.8982)	Prec@1 52.344 (46.983)
[120/391]	Time 0.091 (0.059)	Data 0.000 (0.003)	Loss 2.0659 (1.8938)	Prec@1 42.188 (47.146)
[140/391]	Time 0.095 (0.060)	Data 0.000 (0.003)	Loss 2.0211 (1.8960)	Prec@1 44.531 (47.036)
[160/391]	Time 0.041 (0.059)	Data 0.000 (0.002)	Loss 1.6969 (1.8899)	Prec@1 52.344 (47.253)
[180/391]	Time 0.054 (0.059)	Data 0.000 (0.002)	Loss 1.9279 (1.8885)	Prec@1 48.438 (47.251)
[200/391]	Time 0.056 (0.058)	Data 0.000 (0.002)	Loss 1.8866 (1.8926)	Prec@1 47.656 (47.205)
[220/391]	Time 0.052 (0.058)	Data 0.000 (0.002)	Loss 1.8803 (1.8967)	Prec@1 49.219 (47.232)
[240/391]	Time 0.025 (0.058)	Data 0.000 (0.002)	Loss 1.8965 (1.8980)	Prec@1 46.875 (47.264)
[260/391]	Time 0.056 (0.057)	Data 0.000 (0.002)	Loss 1.9858 (1.9027)	Prec@1 42.969 (47.234)
[280/391]	Time 0.031 (0.057)	Data 0.000 (0.002)	Loss 2.1175 (1.9064)	Prec@1 40.625 (47.111)
[300/391]	Time 0.058 (0.057)	Data 0.000 (0.001)	Loss 1.7238 (1.9089)	Prec@1 47.6

Test: [40/79]	Time 0.019 (0.024)	Loss 2.2941 (2.4364)	Prec@1 39.844 (39.005)
Test: [60/79]	Time 0.022 (0.021)	Loss 2.7198 (2.4423)	Prec@1 36.719 (38.332)
 * Prec@1 38.400

===> epoch: 79/200
current lr 1.00000e-01
Training:
[0/391]	Time 0.651 (0.651)	Data 0.568 (0.568)	Loss 1.8259 (1.8259)	Prec@1 47.656 (47.656)
[20/391]	Time 0.060 (0.081)	Data 0.000 (0.027)	Loss 2.0824 (1.8912)	Prec@1 44.531 (48.772)
[40/391]	Time 0.068 (0.069)	Data 0.000 (0.014)	Loss 1.6971 (1.9158)	Prec@1 49.219 (47.313)
[60/391]	Time 0.030 (0.063)	Data 0.000 (0.010)	Loss 2.0387 (1.9100)	Prec@1 39.062 (47.118)
[80/391]	Time 0.025 (0.061)	Data 0.000 (0.007)	Loss 2.0553 (1.9064)	Prec@1 46.094 (46.971)
[100/391]	Time 0.053 (0.060)	Data 0.000 (0.006)	Loss 1.7146 (1.8889)	Prec@1 53.125 (47.509)
[120/391]	Time 0.090 (0.059)	Data 0.005 (0.005)	Loss 2.0496 (1.8791)	Prec@1 40.625 (47.727)
[140/391]	Time 0.043 (0.058)	Data 0.000 (0.004)	Loss 1.7873 (1.8762)	Prec@1 45.312 (47.850)
[160/391]	Time 0.051 (0.058)	Data 0.000 (0.004

[300/391]	Time 0.048 (0.051)	Data 0.000 (0.002)	Loss 1.9757 (1.8927)	Prec@1 40.625 (47.944)
[320/391]	Time 0.054 (0.052)	Data 0.000 (0.002)	Loss 2.0671 (1.8918)	Prec@1 45.312 (47.958)
[340/391]	Time 0.044 (0.052)	Data 0.000 (0.002)	Loss 2.2005 (1.8942)	Prec@1 45.312 (47.936)
[360/391]	Time 0.082 (0.053)	Data 0.000 (0.002)	Loss 1.9370 (1.8973)	Prec@1 49.219 (47.894)
[380/391]	Time 0.038 (0.053)	Data 0.000 (0.002)	Loss 1.8268 (1.8995)	Prec@1 50.781 (47.851)
Testing:
Test: [0/79]	Time 0.681 (0.681)	Loss 2.0894 (2.0894)	Prec@1 49.219 (49.219)
Test: [20/79]	Time 0.012 (0.048)	Loss 2.2117 (2.2270)	Prec@1 42.188 (44.085)
Test: [40/79]	Time 0.013 (0.031)	Loss 1.6862 (2.2243)	Prec@1 53.906 (42.873)
Test: [60/79]	Time 0.008 (0.027)	Loss 2.7755 (2.2302)	Prec@1 36.719 (42.418)
 * Prec@1 42.410

===> epoch: 83/200
current lr 1.00000e-01
Training:
[0/391]	Time 0.530 (0.530)	Data 0.479 (0.479)	Loss 1.8219 (1.8219)	Prec@1 49.219 (49.219)
[20/391]	Time 0.065 (0.082)	Data 0.000 (0.026)	Loss 1.8540 (1.87

[160/391]	Time 0.048 (0.053)	Data 0.001 (0.004)	Loss 2.0977 (1.9009)	Prec@1 42.188 (47.394)
[180/391]	Time 0.046 (0.054)	Data 0.000 (0.004)	Loss 1.7638 (1.9071)	Prec@1 50.781 (47.311)
[200/391]	Time 0.094 (0.054)	Data 0.000 (0.004)	Loss 1.6516 (1.9114)	Prec@1 57.031 (47.318)
[220/391]	Time 0.035 (0.054)	Data 0.000 (0.003)	Loss 1.8960 (1.9088)	Prec@1 43.750 (47.370)
[240/391]	Time 0.059 (0.054)	Data 0.000 (0.003)	Loss 1.7454 (1.9049)	Prec@1 56.250 (47.348)
[260/391]	Time 0.077 (0.055)	Data 0.000 (0.003)	Loss 1.8774 (1.8990)	Prec@1 46.875 (47.495)
[280/391]	Time 0.083 (0.054)	Data 0.000 (0.003)	Loss 2.0061 (1.9026)	Prec@1 43.750 (47.403)
[300/391]	Time 0.085 (0.054)	Data 0.001 (0.003)	Loss 2.1667 (1.9045)	Prec@1 37.500 (47.332)
[320/391]	Time 0.106 (0.055)	Data 0.000 (0.002)	Loss 1.9140 (1.9097)	Prec@1 53.906 (47.298)
[340/391]	Time 0.053 (0.055)	Data 0.000 (0.002)	Loss 1.8197 (1.9119)	Prec@1 55.469 (47.310)
[360/391]	Time 0.062 (0.055)	Data 0.000 (0.002)	Loss 1.7968 (1.9099)	Prec@1 45.3

[20/391]	Time 0.022 (0.065)	Data 0.001 (0.018)	Loss 1.9862 (1.8574)	Prec@1 40.625 (49.144)
[40/391]	Time 0.032 (0.054)	Data 0.000 (0.010)	Loss 1.8152 (1.8397)	Prec@1 50.000 (49.905)
[60/391]	Time 0.021 (0.049)	Data 0.000 (0.007)	Loss 1.8781 (1.8441)	Prec@1 51.562 (49.449)
[80/391]	Time 0.042 (0.047)	Data 0.008 (0.005)	Loss 1.9659 (1.8588)	Prec@1 47.656 (49.209)
[100/391]	Time 0.039 (0.047)	Data 0.000 (0.004)	Loss 2.2374 (1.8734)	Prec@1 35.156 (48.484)
[120/391]	Time 0.044 (0.048)	Data 0.000 (0.004)	Loss 1.8429 (1.8759)	Prec@1 44.531 (48.547)
[140/391]	Time 0.034 (0.048)	Data 0.000 (0.003)	Loss 2.0406 (1.8744)	Prec@1 46.875 (48.487)
[160/391]	Time 0.025 (0.049)	Data 0.000 (0.003)	Loss 1.7517 (1.8727)	Prec@1 49.219 (48.666)
[180/391]	Time 0.057 (0.050)	Data 0.000 (0.003)	Loss 1.7776 (1.8763)	Prec@1 46.094 (48.558)
[200/391]	Time 0.072 (0.050)	Data 0.000 (0.003)	Loss 2.1663 (1.8770)	Prec@1 42.969 (48.457)
[220/391]	Time 0.023 (0.050)	Data 0.000 (0.002)	Loss 1.9557 (1.8741)	Prec@1 42.969 (

[360/391]	Time 0.101 (0.056)	Data 0.000 (0.002)	Loss 1.9599 (1.8941)	Prec@1 47.656 (47.667)
[380/391]	Time 0.051 (0.056)	Data 0.000 (0.002)	Loss 1.6530 (1.8938)	Prec@1 53.125 (47.718)
Testing:
Test: [0/79]	Time 0.698 (0.698)	Loss 2.4703 (2.4703)	Prec@1 38.281 (38.281)
Test: [20/79]	Time 0.005 (0.048)	Loss 2.8190 (2.6570)	Prec@1 34.375 (34.375)
Test: [40/79]	Time 0.010 (0.030)	Loss 2.2681 (2.6709)	Prec@1 38.281 (33.708)
Test: [60/79]	Time 0.010 (0.026)	Loss 2.6708 (2.6712)	Prec@1 35.938 (33.530)
 * Prec@1 33.400

===> epoch: 94/200
current lr 1.00000e-01
Training:
[0/391]	Time 0.420 (0.420)	Data 0.373 (0.373)	Loss 1.7039 (1.7039)	Prec@1 53.125 (53.125)
[20/391]	Time 0.052 (0.073)	Data 0.001 (0.018)	Loss 1.8683 (1.8552)	Prec@1 46.875 (48.661)
[40/391]	Time 0.063 (0.066)	Data 0.000 (0.010)	Loss 1.9739 (1.8552)	Prec@1 41.406 (48.476)
[60/391]	Time 0.044 (0.062)	Data 0.000 (0.007)	Loss 1.9115 (1.8889)	Prec@1 46.875 (47.541)
[80/391]	Time 0.049 (0.056)	Data 0.000 (0.005)	Loss 2.0252 (1.8767)

[220/391]	Time 0.031 (0.056)	Data 0.000 (0.002)	Loss 1.9480 (1.8938)	Prec@1 45.312 (47.621)
[240/391]	Time 0.051 (0.056)	Data 0.000 (0.002)	Loss 1.7038 (1.8927)	Prec@1 53.125 (47.721)
[260/391]	Time 0.030 (0.055)	Data 0.000 (0.002)	Loss 1.8710 (1.8961)	Prec@1 45.312 (47.680)
[280/391]	Time 0.044 (0.056)	Data 0.000 (0.002)	Loss 1.9394 (1.8931)	Prec@1 50.000 (47.809)
[300/391]	Time 0.031 (0.055)	Data 0.000 (0.002)	Loss 1.8121 (1.8953)	Prec@1 51.562 (47.778)
[320/391]	Time 0.066 (0.055)	Data 0.000 (0.001)	Loss 2.0985 (1.8938)	Prec@1 45.312 (47.739)
[340/391]	Time 0.076 (0.055)	Data 0.001 (0.001)	Loss 1.9051 (1.8953)	Prec@1 44.531 (47.730)
[360/391]	Time 0.051 (0.055)	Data 0.000 (0.001)	Loss 2.1525 (1.8955)	Prec@1 42.969 (47.702)
[380/391]	Time 0.022 (0.055)	Data 0.000 (0.001)	Loss 1.8430 (1.8968)	Prec@1 49.219 (47.673)
Testing:
Test: [0/79]	Time 0.365 (0.365)	Loss 2.1845 (2.1845)	Prec@1 41.406 (41.406)
Test: [20/79]	Time 0.025 (0.030)	Loss 2.1954 (2.2617)	Prec@1 41.406 (40.439)
Test: [40/

[80/391]	Time 0.079 (0.061)	Data 0.000 (0.005)	Loss 1.3342 (1.6444)	Prec@1 61.719 (54.321)
[100/391]	Time 0.043 (0.060)	Data 0.000 (0.004)	Loss 1.5244 (1.6147)	Prec@1 55.469 (54.827)
[120/391]	Time 0.033 (0.060)	Data 0.000 (0.004)	Loss 1.8570 (1.6136)	Prec@1 50.781 (54.920)
[140/391]	Time 0.038 (0.059)	Data 0.005 (0.003)	Loss 1.5999 (1.5994)	Prec@1 56.250 (55.441)
[160/391]	Time 0.042 (0.058)	Data 0.000 (0.003)	Loss 1.4524 (1.5909)	Prec@1 57.031 (55.624)
[180/391]	Time 0.030 (0.058)	Data 0.000 (0.002)	Loss 1.1944 (1.5809)	Prec@1 68.750 (55.840)
[200/391]	Time 0.036 (0.058)	Data 0.000 (0.002)	Loss 1.7281 (1.5725)	Prec@1 56.250 (56.106)
[220/391]	Time 0.080 (0.058)	Data 0.000 (0.002)	Loss 1.6338 (1.5657)	Prec@1 53.906 (56.254)
[240/391]	Time 0.071 (0.058)	Data 0.000 (0.002)	Loss 1.6115 (1.5590)	Prec@1 53.906 (56.448)
[260/391]	Time 0.031 (0.057)	Data 0.000 (0.002)	Loss 1.4201 (1.5527)	Prec@1 57.031 (56.519)
[280/391]	Time 0.021 (0.057)	Data 0.000 (0.002)	Loss 1.7643 (1.5495)	Prec@1 51.56

Test: [20/79]	Time 0.018 (0.026)	Loss 1.4750 (1.5329)	Prec@1 58.594 (57.515)
Test: [40/79]	Time 0.008 (0.020)	Loss 1.1525 (1.5228)	Prec@1 64.844 (56.688)
Test: [60/79]	Time 0.007 (0.018)	Loss 2.0128 (1.5257)	Prec@1 50.781 (56.493)
 * Prec@1 56.720

===> epoch: 105/200
current lr 1.00000e-02
Training:
[0/391]	Time 0.367 (0.367)	Data 0.298 (0.298)	Loss 1.3446 (1.3446)	Prec@1 59.375 (59.375)
[20/391]	Time 0.040 (0.069)	Data 0.001 (0.015)	Loss 1.4781 (1.3372)	Prec@1 62.500 (61.421)
[40/391]	Time 0.029 (0.062)	Data 0.000 (0.008)	Loss 1.1698 (1.3430)	Prec@1 66.406 (61.757)
[60/391]	Time 0.026 (0.059)	Data 0.000 (0.005)	Loss 1.4944 (1.3256)	Prec@1 64.062 (62.551)
[80/391]	Time 0.055 (0.058)	Data 0.000 (0.004)	Loss 1.2748 (1.3424)	Prec@1 64.844 (62.105)
[100/391]	Time 0.031 (0.057)	Data 0.000 (0.003)	Loss 1.2879 (1.3412)	Prec@1 63.281 (61.904)
[120/391]	Time 0.025 (0.057)	Data 0.000 (0.003)	Loss 1.3828 (1.3430)	Prec@1 57.031 (61.757)
[140/391]	Time 0.059 (0.057)	Data 0.000 (0.003)	Loss 1.3992 

[280/391]	Time 0.053 (0.050)	Data 0.000 (0.002)	Loss 1.4877 (1.3144)	Prec@1 53.906 (62.166)
[300/391]	Time 0.045 (0.050)	Data 0.000 (0.001)	Loss 1.1860 (1.3178)	Prec@1 66.406 (62.067)
[320/391]	Time 0.072 (0.050)	Data 0.000 (0.001)	Loss 1.2674 (1.3160)	Prec@1 67.188 (62.198)
[340/391]	Time 0.084 (0.051)	Data 0.000 (0.001)	Loss 1.1526 (1.3135)	Prec@1 69.531 (62.275)
[360/391]	Time 0.059 (0.051)	Data 0.000 (0.001)	Loss 1.2028 (1.3145)	Prec@1 61.719 (62.219)
[380/391]	Time 0.034 (0.051)	Data 0.000 (0.001)	Loss 1.1602 (1.3144)	Prec@1 66.406 (62.170)
Testing:
Test: [0/79]	Time 0.292 (0.292)	Loss 1.3796 (1.3796)	Prec@1 60.938 (60.938)
Test: [20/79]	Time 0.021 (0.030)	Loss 1.3832 (1.4894)	Prec@1 61.719 (58.966)
Test: [40/79]	Time 0.012 (0.024)	Loss 1.1462 (1.4845)	Prec@1 65.625 (57.984)
Test: [60/79]	Time 0.026 (0.022)	Loss 1.9825 (1.4890)	Prec@1 46.094 (57.454)
 * Prec@1 57.820

===> epoch: 109/200
current lr 1.00000e-02
Training:
[0/391]	Time 0.317 (0.317)	Data 0.248 (0.248)	Loss 1.2597 (1.

[140/391]	Time 0.077 (0.057)	Data 0.000 (0.003)	Loss 1.3657 (1.2847)	Prec@1 60.938 (62.927)
[160/391]	Time 0.086 (0.057)	Data 0.000 (0.003)	Loss 1.3047 (1.2891)	Prec@1 64.062 (62.825)
[180/391]	Time 0.069 (0.057)	Data 0.000 (0.003)	Loss 1.3020 (1.2868)	Prec@1 63.281 (62.828)
[200/391]	Time 0.102 (0.057)	Data 0.000 (0.003)	Loss 1.3410 (1.2884)	Prec@1 64.062 (62.760)
[220/391]	Time 0.061 (0.056)	Data 0.000 (0.002)	Loss 1.2266 (1.2920)	Prec@1 64.844 (62.645)
[240/391]	Time 0.070 (0.055)	Data 0.000 (0.002)	Loss 1.0925 (1.2894)	Prec@1 66.406 (62.779)
[260/391]	Time 0.048 (0.054)	Data 0.000 (0.002)	Loss 1.2092 (1.2894)	Prec@1 62.500 (62.826)
[280/391]	Time 0.029 (0.054)	Data 0.000 (0.002)	Loss 1.4333 (1.2895)	Prec@1 55.469 (62.845)
[300/391]	Time 0.064 (0.054)	Data 0.000 (0.002)	Loss 1.3489 (1.2934)	Prec@1 61.719 (62.697)
[320/391]	Time 0.066 (0.053)	Data 0.000 (0.002)	Loss 1.3681 (1.2918)	Prec@1 60.938 (62.687)
[340/391]	Time 0.051 (0.053)	Data 0.000 (0.002)	Loss 1.4004 (1.2913)	Prec@1 60.9

[0/391]	Time 0.351 (0.351)	Data 0.320 (0.320)	Loss 1.5212 (1.5212)	Prec@1 57.031 (57.031)
[20/391]	Time 0.049 (0.069)	Data 0.000 (0.015)	Loss 1.3010 (1.2494)	Prec@1 59.375 (62.760)
[40/391]	Time 0.100 (0.065)	Data 0.000 (0.008)	Loss 1.3955 (1.2554)	Prec@1 54.688 (63.357)
[60/391]	Time 0.042 (0.062)	Data 0.000 (0.006)	Loss 1.2574 (1.2498)	Prec@1 62.500 (63.691)
[80/391]	Time 0.093 (0.060)	Data 0.000 (0.004)	Loss 1.2939 (1.2550)	Prec@1 64.844 (63.522)
[100/391]	Time 0.089 (0.059)	Data 0.000 (0.004)	Loss 1.3298 (1.2576)	Prec@1 61.719 (63.390)
[120/391]	Time 0.086 (0.058)	Data 0.000 (0.003)	Loss 1.3215 (1.2560)	Prec@1 63.281 (63.488)
[140/391]	Time 0.056 (0.057)	Data 0.001 (0.003)	Loss 1.2343 (1.2569)	Prec@1 62.500 (63.481)
[160/391]	Time 0.031 (0.058)	Data 0.000 (0.003)	Loss 1.3102 (1.2601)	Prec@1 60.938 (63.373)
[180/391]	Time 0.044 (0.057)	Data 0.000 (0.002)	Loss 1.6298 (1.2658)	Prec@1 57.031 (63.264)
[200/391]	Time 0.047 (0.057)	Data 0.000 (0.002)	Loss 1.4051 (1.2703)	Prec@1 59.375 (63

[340/391]	Time 0.071 (0.052)	Data 0.000 (0.001)	Loss 1.3966 (1.2761)	Prec@1 59.375 (62.942)
[360/391]	Time 0.027 (0.052)	Data 0.000 (0.001)	Loss 1.3636 (1.2764)	Prec@1 61.719 (62.959)
[380/391]	Time 0.083 (0.053)	Data 0.000 (0.001)	Loss 1.2480 (1.2770)	Prec@1 57.812 (62.990)
Testing:
Test: [0/79]	Time 0.192 (0.192)	Loss 1.4842 (1.4842)	Prec@1 60.156 (60.156)
Test: [20/79]	Time 0.012 (0.025)	Loss 1.5504 (1.5280)	Prec@1 57.812 (57.217)
Test: [40/79]	Time 0.012 (0.019)	Loss 1.1362 (1.5252)	Prec@1 63.281 (57.127)
Test: [60/79]	Time 0.009 (0.017)	Loss 1.8361 (1.5342)	Prec@1 53.125 (56.775)
 * Prec@1 57.100

===> epoch: 120/200
current lr 1.00000e-02
Training:
[0/391]	Time 0.325 (0.325)	Data 0.287 (0.287)	Loss 1.2522 (1.2522)	Prec@1 60.938 (60.938)
[20/391]	Time 0.068 (0.069)	Data 0.000 (0.014)	Loss 1.0010 (1.2650)	Prec@1 67.969 (64.137)
[40/391]	Time 0.070 (0.063)	Data 0.000 (0.008)	Loss 1.1005 (1.2453)	Prec@1 66.406 (63.548)
[60/391]	Time 0.043 (0.059)	Data 0.000 (0.005)	Loss 1.2355 (1.243

[200/391]	Time 0.029 (0.055)	Data 0.000 (0.001)	Loss 1.1529 (1.2575)	Prec@1 68.750 (63.316)
[220/391]	Time 0.039 (0.056)	Data 0.000 (0.001)	Loss 1.2378 (1.2592)	Prec@1 61.719 (63.267)
[240/391]	Time 0.024 (0.056)	Data 0.000 (0.001)	Loss 1.3171 (1.2629)	Prec@1 64.844 (63.197)
[260/391]	Time 0.077 (0.056)	Data 0.000 (0.001)	Loss 1.1847 (1.2679)	Prec@1 70.312 (63.054)
[280/391]	Time 0.031 (0.055)	Data 0.000 (0.001)	Loss 1.3626 (1.2702)	Prec@1 60.938 (63.034)
[300/391]	Time 0.039 (0.056)	Data 0.000 (0.001)	Loss 1.2727 (1.2710)	Prec@1 60.156 (63.037)
[320/391]	Time 0.022 (0.056)	Data 0.000 (0.001)	Loss 1.2354 (1.2736)	Prec@1 64.062 (62.953)
[340/391]	Time 0.032 (0.056)	Data 0.000 (0.001)	Loss 1.3207 (1.2748)	Prec@1 64.062 (62.910)
[360/391]	Time 0.031 (0.056)	Data 0.000 (0.001)	Loss 1.4585 (1.2785)	Prec@1 55.469 (62.851)
[380/391]	Time 0.073 (0.056)	Data 0.000 (0.001)	Loss 1.1720 (1.2795)	Prec@1 64.062 (62.840)
Testing:
Test: [0/79]	Time 0.239 (0.239)	Loss 1.4328 (1.4328)	Prec@1 57.812 (57.

[60/391]	Time 0.067 (0.055)	Data 0.000 (0.004)	Loss 1.3255 (1.2511)	Prec@1 65.625 (63.409)
[80/391]	Time 0.066 (0.055)	Data 0.000 (0.003)	Loss 1.3407 (1.2669)	Prec@1 57.812 (62.838)
[100/391]	Time 0.023 (0.055)	Data 0.000 (0.003)	Loss 1.1337 (1.2626)	Prec@1 67.188 (62.871)
[120/391]	Time 0.021 (0.054)	Data 0.000 (0.002)	Loss 1.1976 (1.2663)	Prec@1 63.281 (62.952)
[140/391]	Time 0.056 (0.055)	Data 0.000 (0.002)	Loss 1.4265 (1.2645)	Prec@1 55.469 (62.960)
[160/391]	Time 0.085 (0.055)	Data 0.000 (0.002)	Loss 1.3438 (1.2574)	Prec@1 57.031 (63.179)
[180/391]	Time 0.054 (0.055)	Data 0.000 (0.002)	Loss 1.3473 (1.2540)	Prec@1 60.156 (63.286)
[200/391]	Time 0.081 (0.055)	Data 0.000 (0.002)	Loss 1.4345 (1.2645)	Prec@1 57.031 (63.067)
[220/391]	Time 0.051 (0.055)	Data 0.000 (0.001)	Loss 1.3113 (1.2680)	Prec@1 66.406 (63.104)
[240/391]	Time 0.055 (0.054)	Data 0.000 (0.001)	Loss 1.1651 (1.2725)	Prec@1 64.062 (62.960)
[260/391]	Time 0.063 (0.053)	Data 0.000 (0.001)	Loss 1.4745 (1.2760)	Prec@1 57.812

Testing:
Test: [0/79]	Time 0.343 (0.343)	Loss 1.5218 (1.5218)	Prec@1 58.594 (58.594)
Test: [20/79]	Time 0.030 (0.033)	Loss 1.6938 (1.6035)	Prec@1 58.594 (56.027)
Test: [40/79]	Time 0.037 (0.026)	Loss 1.2137 (1.6031)	Prec@1 63.281 (55.774)
Test: [60/79]	Time 0.008 (0.021)	Loss 2.0599 (1.6216)	Prec@1 44.531 (55.149)
 * Prec@1 55.460

===> epoch: 131/200
current lr 1.00000e-02
Training:
[0/391]	Time 0.271 (0.271)	Data 0.211 (0.211)	Loss 1.2516 (1.2516)	Prec@1 61.719 (61.719)
[20/391]	Time 0.047 (0.053)	Data 0.000 (0.010)	Loss 1.2003 (1.2223)	Prec@1 66.406 (65.179)
[40/391]	Time 0.054 (0.048)	Data 0.000 (0.005)	Loss 1.2960 (1.2419)	Prec@1 58.594 (64.425)
[60/391]	Time 0.029 (0.045)	Data 0.000 (0.004)	Loss 1.3879 (1.2527)	Prec@1 60.156 (63.870)
[80/391]	Time 0.045 (0.045)	Data 0.000 (0.003)	Loss 1.0469 (1.2681)	Prec@1 71.875 (63.532)
[100/391]	Time 0.047 (0.046)	Data 0.000 (0.003)	Loss 1.0726 (1.2713)	Prec@1 65.625 (63.196)
[120/391]	Time 0.072 (0.048)	Data 0.001 (0.002)	Loss 1.1946 (1.2680

[260/391]	Time 0.081 (0.057)	Data 0.000 (0.002)	Loss 1.3020 (1.2660)	Prec@1 57.031 (63.347)
[280/391]	Time 0.035 (0.056)	Data 0.000 (0.002)	Loss 1.2482 (1.2675)	Prec@1 62.500 (63.306)
[300/391]	Time 0.037 (0.056)	Data 0.000 (0.002)	Loss 1.4266 (1.2718)	Prec@1 60.938 (63.196)
[320/391]	Time 0.025 (0.056)	Data 0.000 (0.002)	Loss 1.4697 (1.2699)	Prec@1 57.812 (63.181)
[340/391]	Time 0.071 (0.056)	Data 0.000 (0.001)	Loss 1.1151 (1.2734)	Prec@1 70.312 (63.132)
[360/391]	Time 0.043 (0.056)	Data 0.000 (0.001)	Loss 1.4445 (1.2744)	Prec@1 61.719 (63.084)
[380/391]	Time 0.109 (0.056)	Data 0.013 (0.001)	Loss 1.3439 (1.2755)	Prec@1 54.688 (63.017)
Testing:
Test: [0/79]	Time 0.276 (0.276)	Loss 1.4885 (1.4885)	Prec@1 56.250 (56.250)
Test: [20/79]	Time 0.007 (0.029)	Loss 1.5138 (1.5962)	Prec@1 64.062 (55.580)
Test: [40/79]	Time 0.035 (0.025)	Loss 1.1964 (1.5874)	Prec@1 63.281 (55.488)
Test: [60/79]	Time 0.017 (0.022)	Loss 1.9281 (1.5850)	Prec@1 46.875 (55.661)
 * Prec@1 55.650

===> epoch: 135/200
cu

[120/391]	Time 0.024 (0.057)	Data 0.000 (0.003)	Loss 1.2270 (1.2539)	Prec@1 66.406 (63.520)
[140/391]	Time 0.056 (0.055)	Data 0.000 (0.002)	Loss 1.0847 (1.2605)	Prec@1 66.406 (63.514)
[160/391]	Time 0.032 (0.053)	Data 0.000 (0.002)	Loss 1.0541 (1.2621)	Prec@1 70.312 (63.543)
[180/391]	Time 0.024 (0.051)	Data 0.000 (0.002)	Loss 1.2907 (1.2692)	Prec@1 61.719 (63.368)
