In [4]:
from __future__ import print_function
import os
import time
import logging
import argparse
import numpy as np
from visdom import Visdom
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from utils import *

# Teacher models:
# VGG11/VGG13/VGG16/VGG19, GoogLeNet, AlxNet, ResNet18, ResNet34, 
# ResNet50, ResNet101, ResNet152, ResNeXt29_2x64d, ResNeXt29_4x64d, 
# ResNeXt29_8x64d, ResNeXt29_32x64d, PreActResNet18, PreActResNet34, 
# PreActResNet50, PreActResNet101, PreActResNet152, 
# DenseNet121, DenseNet161, DenseNet169, DenseNet201, 
import models

# Student models:
# myNet, LeNet, FitNet

start_time = time.time()
# os.makedirs('./checkpoint', exist_ok=True)

# Training settings
parser = argparse.ArgumentParser(description='PyTorch DML 2S')

parser.add_argument('--dataset',
                    choices=['CIFAR10',
                             'CIFAR100'
                            ],
                    default='CIFAR10')
parser.add_argument('--net1',
                    choices=['ResNet8',
                             'ResNet15',
                             'ResNet16',
                             'ResNet20',
                             'ResNet32',
                             'ResNet50',
                             'ResNet56',
                             'ResNet110'
                            ],
                    default='ResNet20')
parser.add_argument('--net2',
                    choices=['ResNet8',
                             'ResNet15',
                             'ResNet16',
                             'ResNet20',
                             'ResNet32',
                             'ResNet50',
                             'ResNet56',
                             'ResNet110'
                            ],
                    default='ResNet20')

parser.add_argument('--n_class', type=int, default=10, metavar='N', help='num of classes')
parser.add_argument('--batch_size', type=int, default=128, metavar='N', help='input batch size for training')
parser.add_argument('--test_batch_size', type=int, default=128, metavar='N', help='input test batch size for training')
parser.add_argument('--epochs', type=int, default=20, metavar='N', help='number of epochs to train (default: 20)')
parser.add_argument('--lr', type=float, default=0.1, metavar='LR', help='learning rate (default: 0.01)')
parser.add_argument('--momentum', type=float, default=0.9, metavar='M', help='SGD momentum (default: 0.5)')
parser.add_argument('--device', default='cuda:1', type=str, help='device: cuda or cpu')
parser.add_argument('--print_freq', type=int, default=40, metavar='N', help='how many batches to wait before logging training status')

config = ['--epochs', '200', '--net1', 'ResNet8', '--net2', 'ResNet8', '--device', 'cuda:1']
args = parser.parse_args(config)

device = args.device if torch.cuda.is_available() else 'cpu'
save_dir = './checkpoint/' + args.dataset + '/'

# models
net1 = getattr(models, args.net1)(num_classes=args.n_class)
net1.to(device)
net2 = getattr(models, args.net2)(num_classes=args.n_class)
net2.to(device)

# logging
logfile = save_dir + 'DML_2S_' + net1.model_name + '.log'
if os.path.exists(logfile):
    os.remove(logfile)
def log_out(info):
    f = open(logfile, mode='a')
    f.write(info)
    f.write('\n')
    f.close()
    print(info)
    
# visualizer
vis = Visdom(env='distill')
loss_win = vis.line(
    X=np.array([0]),
    Y=np.array([0]),
    opts=dict(
        title='DML Loss',
        xlabel='epoch',
        xtickmin=0,
        ylabel='loss',
        ytickmin=0,
        ytickstep=0.5,
        legend=['net1_loss', 'net2_loss']
    ),
    name="loss"
)

acc_win = vis.line(
    X=np.column_stack((0, 0)),
    Y=np.column_stack((0, 0)),
    opts=dict(
        title='DML Acc',
        xlabel='epoch',
        xtickmin=0,
#         xtickstep=5,
        ylabel='accuracy',
        ytickmin=0,
        ytickmax=100,
#         markers=True,
#         markersymbol='dot',
#         markersize=5,
        legend=['net1_acc', 'net2_acc']
    ),
    name="acc"
)


# data
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
train_transform = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomCrop(32, 4),
    transforms.ToTensor(),
    normalize,
])
test_transform = transforms.Compose([transforms.ToTensor(), normalize])
train_set = datasets.CIFAR10(root='../data', train=True, download=True, transform=train_transform)
test_set = datasets.CIFAR10(root='../data', train=False, download=False, transform=test_transform)
train_loader = DataLoader(train_set, batch_size=args.batch_size, shuffle=True)
test_loader = DataLoader(test_set, batch_size=args.test_batch_size, shuffle=False)

# optimizer = optim.SGD(st_model.parameters(), lr=args.lr, momentum=args.momentum)
optimizer_1 = optim.SGD(net1.parameters(), lr=args.lr, momentum=0.9, weight_decay=5e-4)
optimizer_2 = optim.SGD(net2.parameters(), lr=args.lr, momentum=0.9, weight_decay=5e-4)
lr_scheduler_1 = optim.lr_scheduler.MultiStepLR(optimizer_1, milestones=[100, 150])
lr_scheduler_2 = optim.lr_scheduler.MultiStepLR(optimizer_2, milestones=[100, 150])

def DML_loss(y, labels, logits_1, logits_2):
    return nn.KLDivLoss()(logits_2, logits_1) + F.cross_entropy(y, labels)
    
# train with multi-teacher
def train(epoch, net1, net2):
    print('Training:')
    # switch to train mode
    net1.train()
    net2.train()
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses_1 = AverageMeter()
    losses_2 = AverageMeter()
    top1_1 = AverageMeter()
    top1_2 = AverageMeter()
    
    end = time.time()
    for i, (input, target) in enumerate(train_loader):

        # measure data loading time
        data_time.update(time.time() - end)

        input, target = input.to(device), target.to(device)
        
        # compute outputs
        _,_,_,_, output_1 = net1(input)
        _,_,_,_, output_2 = net2(input)
        logits_1 = F.softmax(output_1)
        logits_2 = F.softmax(output_2)
        
        optimizer_1.zero_grad()
        loss_1 = DML_loss(output_1, target, logits_1, logits_2)
        loss_1.backward(retain_graph=True)
        optimizer_1.step()
        
        optimizer_2.zero_grad()
        loss_2 = DML_loss(output_2, target, logits_2, logits_1)
        loss_2.backward(retain_graph=True)
        optimizer_2.step()

        output_1 = output_1.float()
        loss_1 = loss_1.float()
        # measure accuracy and record loss
        net1_acc = accuracy(output_1, target)[0]
        net2_acc = accuracy(output_2, target)[0]
        losses_1.update(loss_1.item(), input.size(0))
        top1_1.update(net1_acc, input.size(0))
        losses_2.update(loss_2.item(), input.size(0))
        top1_2.update(net2_acc, input.size(0))

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if i % args.print_freq == 0:
            log_out('[{0}/{1}]\t'
                  'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                  'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                  'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                  'Prec@1 ({top1_1.avg:.3f})'.format(
                      i, len(train_loader), batch_time=batch_time,
                      data_time=data_time, loss=losses_1, top1_1=top1_1))
    return losses_1.avg, losses_2.avg, net1_acc.cpu().numpy(), net2_acc.cpu().numpy()


def test(model):
    print('Testing:')
    # switch to evaluate mode
    model.eval()
    batch_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()

    end = time.time()
    with torch.no_grad():
        for i, (input, target) in enumerate(test_loader):
            input, target = input.to(device), target.to(device)

            # compute output
            _,_,_,_,output = model(input)
            loss = F.cross_entropy(output, target)

            output = output.float()
            loss = loss.float()

            # measure accuracy and record loss
            test_acc = accuracy(output.data, target.data)[0]
            losses.update(loss.item(), input.size(0))
            top1.update(test_acc, input.size(0))

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

            if i % args.print_freq == 0:
                log_out('Test: [{0}/{1}]\t'
                      'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                      'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                      'Prec@1 {top1.val:.3f} ({top1.avg:.3f})'.format(
                          i, len(test_loader), batch_time=batch_time, loss=losses,
                          top1=top1))

    log_out(' * {0} Prec@1 {top1.avg:.3f}'.format(model.model_name, top1=top1))

    return losses.avg, test_acc.cpu().numpy(), top1.avg.cpu().numpy()


print('*-----------------DML----------------*')
best_acc = 0
for epoch in range(1, args.epochs + 1):
    log_out("\n===> epoch: {}/{}".format(epoch, args.epochs))
#     log_out('current lr {:.5e}'.format(optimizer_1.param_groups[0]['lr']))
    lr_scheduler_1.step()
    lr_scheduler_2.step()
    train_loss_1, train_loss_2, net1_acc, net2_acc = train(epoch, net1, net2)
    # visaulize loss
    vis.line(np.column_stack((train_loss_1, train_loss_2)), np.column_stack((epoch, epoch)), loss_win, update="append")
    _, test_acc_1, top1_1 = test(net1)
    _, test_acc_2, top1_2 = test(net2)
    vis.line(np.column_stack((top1_1, top1_2)), np.column_stack((epoch, epoch)), acc_win, update="append")
    best_acc = max(top1_1, top1_2, best_acc)

log_out("@ BEST Prec: {:.4f}".format(best_acc))
log_out("--- {:.3f} mins ---".format((time.time() - start_time)/60))


  init.kaiming_normal(m.weight)


Files already downloaded and verified
*-----------------DML----------------*

===> epoch: 1/200
Training:
[0/391]	Time 0.057 (0.057)	Data 0.019 (0.019)	Loss 2.2688 (2.2688)	Prec@1 (10.938)




[40/391]	Time 0.051 (0.052)	Data 0.018 (0.018)	Loss 1.7698 (1.9370)	Prec@1 (18.674)
[80/391]	Time 0.051 (0.051)	Data 0.018 (0.018)	Loss 1.5639 (1.8033)	Prec@1 (23.775)
[120/391]	Time 0.051 (0.051)	Data 0.018 (0.018)	Loss 1.5513 (1.7226)	Prec@1 (27.014)
[160/391]	Time 0.052 (0.051)	Data 0.019 (0.018)	Loss 1.4284 (1.6573)	Prec@1 (29.746)
[200/391]	Time 0.051 (0.051)	Data 0.018 (0.018)	Loss 1.3395 (1.6031)	Prec@1 (32.008)
[240/391]	Time 0.051 (0.051)	Data 0.018 (0.018)	Loss 1.1944 (1.5540)	Prec@1 (34.112)
[280/391]	Time 0.051 (0.051)	Data 0.018 (0.018)	Loss 1.3028 (1.5202)	Prec@1 (35.657)
[320/391]	Time 0.051 (0.051)	Data 0.018 (0.018)	Loss 1.2896 (1.4846)	Prec@1 (37.191)
[360/391]	Time 0.051 (0.051)	Data 0.018 (0.018)	Loss 1.2679 (1.4511)	Prec@1 (38.690)
Testing:
Test: [0/79]	Time 0.017 (0.017)	Loss 1.7067 (1.7067)	Prec@1 44.531 (44.531)
Test: [40/79]	Time 0.017 (0.017)	Loss 1.9192 (1.7234)	Prec@1 35.156 (43.083)
 * ResNet8 Prec@1 42.370
Testing:
Test: [0/79]	Time 0.017 (0.017)	Loss 1.99

Testing:
Test: [0/79]	Time 0.018 (0.018)	Loss 1.1559 (1.1559)	Prec@1 59.375 (59.375)
Test: [40/79]	Time 0.018 (0.017)	Loss 1.1381 (1.1310)	Prec@1 60.156 (61.643)
 * ResNet8 Prec@1 61.330
Testing:
Test: [0/79]	Time 0.017 (0.017)	Loss 1.0789 (1.0789)	Prec@1 64.062 (64.062)
Test: [40/79]	Time 0.016 (0.017)	Loss 1.1719 (1.0656)	Prec@1 58.594 (63.053)
 * ResNet8 Prec@1 63.010

===> epoch: 8/200
Training:
[0/391]	Time 0.052 (0.052)	Data 0.019 (0.019)	Loss 0.5770 (0.5770)	Prec@1 (76.562)
[40/391]	Time 0.051 (0.051)	Data 0.018 (0.018)	Loss 0.4954 (0.6369)	Prec@1 (72.828)
[80/391]	Time 0.051 (0.051)	Data 0.018 (0.018)	Loss 0.6666 (0.6495)	Prec@1 (72.637)
[120/391]	Time 0.051 (0.051)	Data 0.018 (0.018)	Loss 0.6475 (0.6408)	Prec@1 (72.998)
[160/391]	Time 0.051 (0.051)	Data 0.018 (0.018)	Loss 0.7608 (0.6391)	Prec@1 (73.137)
[200/391]	Time 0.051 (0.051)	Data 0.018 (0.018)	Loss 0.6128 (0.6380)	Prec@1 (73.197)
[240/391]	Time 0.051 (0.051)	Data 0.018 (0.018)	Loss 0.6996 (0.6378)	Prec@1 (73.117)
[280/3

[160/391]	Time 0.052 (0.052)	Data 0.019 (0.018)	Loss 0.3517 (0.5405)	Prec@1 (76.854)
[200/391]	Time 0.051 (0.052)	Data 0.018 (0.018)	Loss 0.6590 (0.5424)	Prec@1 (76.765)
[240/391]	Time 0.051 (0.052)	Data 0.018 (0.018)	Loss 0.4526 (0.5472)	Prec@1 (76.559)
[280/391]	Time 0.051 (0.052)	Data 0.018 (0.018)	Loss 0.4772 (0.5441)	Prec@1 (76.765)
[320/391]	Time 0.051 (0.052)	Data 0.018 (0.018)	Loss 0.7697 (0.5443)	Prec@1 (76.762)
[360/391]	Time 0.051 (0.052)	Data 0.018 (0.018)	Loss 0.5341 (0.5457)	Prec@1 (76.712)
Testing:
Test: [0/79]	Time 0.017 (0.017)	Loss 0.8511 (0.8511)	Prec@1 73.438 (73.438)
Test: [40/79]	Time 0.017 (0.017)	Loss 0.9709 (0.8568)	Prec@1 63.281 (71.284)
 * ResNet8 Prec@1 71.420
Testing:
Test: [0/79]	Time 0.018 (0.018)	Loss 0.6973 (0.6973)	Prec@1 77.344 (77.344)
Test: [40/79]	Time 0.027 (0.017)	Loss 0.8350 (0.7460)	Prec@1 69.531 (74.505)
 * ResNet8 Prec@1 74.670

===> epoch: 15/200
Training:
[0/391]	Time 0.054 (0.054)	Data 0.020 (0.020)	Loss 0.5101 (0.5101)	Prec@1 (79.688)
[40

Test: [40/79]	Time 0.016 (0.017)	Loss 0.9110 (0.8522)	Prec@1 75.000 (71.399)
 * ResNet8 Prec@1 71.570

===> epoch: 21/200
Training:
[0/391]	Time 0.053 (0.053)	Data 0.020 (0.020)	Loss 0.5344 (0.5344)	Prec@1 (76.562)
[40/391]	Time 0.051 (0.052)	Data 0.018 (0.018)	Loss 0.4851 (0.5077)	Prec@1 (77.458)
[80/391]	Time 0.052 (0.052)	Data 0.018 (0.018)	Loss 0.5313 (0.5204)	Prec@1 (77.074)
[120/391]	Time 0.052 (0.052)	Data 0.018 (0.018)	Loss 0.6372 (0.5268)	Prec@1 (76.853)
[160/391]	Time 0.052 (0.052)	Data 0.018 (0.018)	Loss 0.5161 (0.5191)	Prec@1 (77.247)
[200/391]	Time 0.052 (0.052)	Data 0.018 (0.018)	Loss 0.4597 (0.5167)	Prec@1 (77.309)
[240/391]	Time 0.052 (0.052)	Data 0.018 (0.018)	Loss 0.4811 (0.5174)	Prec@1 (77.396)
[280/391]	Time 0.052 (0.052)	Data 0.018 (0.018)	Loss 0.5043 (0.5136)	Prec@1 (77.575)
[320/391]	Time 0.052 (0.052)	Data 0.018 (0.018)	Loss 0.4778 (0.5119)	Prec@1 (77.702)
[360/391]	Time 0.053 (0.052)	Data 0.019 (0.018)	Loss 0.6168 (0.5132)	Prec@1 (77.679)
Testing:
Test: [0/79]	

[280/391]	Time 0.051 (0.052)	Data 0.018 (0.018)	Loss 0.6833 (0.4987)	Prec@1 (78.389)
[320/391]	Time 0.052 (0.052)	Data 0.018 (0.018)	Loss 0.4256 (0.5006)	Prec@1 (78.290)
[360/391]	Time 0.051 (0.052)	Data 0.018 (0.018)	Loss 0.5800 (0.5021)	Prec@1 (78.233)
Testing:
Test: [0/79]	Time 0.017 (0.017)	Loss 1.3164 (1.3164)	Prec@1 60.938 (60.938)
Test: [40/79]	Time 0.017 (0.017)	Loss 1.2732 (1.1640)	Prec@1 60.938 (61.319)
 * ResNet8 Prec@1 61.780
Testing:
Test: [0/79]	Time 0.018 (0.018)	Loss 1.3713 (1.3713)	Prec@1 55.469 (55.469)
Test: [40/79]	Time 0.017 (0.017)	Loss 1.5144 (1.4199)	Prec@1 56.250 (56.441)
 * ResNet8 Prec@1 56.310

===> epoch: 28/200
Training:
[0/391]	Time 0.058 (0.058)	Data 0.019 (0.019)	Loss 0.4925 (0.4925)	Prec@1 (79.688)
[40/391]	Time 0.052 (0.052)	Data 0.018 (0.018)	Loss 0.5105 (0.5064)	Prec@1 (78.392)
[80/391]	Time 0.052 (0.052)	Data 0.018 (0.018)	Loss 0.4939 (0.5036)	Prec@1 (78.443)
[120/391]	Time 0.051 (0.052)	Data 0.018 (0.018)	Loss 0.4372 (0.4952)	Prec@1 (78.622)
[160/

[40/391]	Time 0.052 (0.053)	Data 0.018 (0.018)	Loss 0.4384 (0.4919)	Prec@1 (78.678)
[80/391]	Time 0.052 (0.052)	Data 0.018 (0.018)	Loss 0.4702 (0.4905)	Prec@1 (78.752)
[120/391]	Time 0.052 (0.052)	Data 0.018 (0.018)	Loss 0.4024 (0.4884)	Prec@1 (78.822)
[160/391]	Time 0.051 (0.052)	Data 0.018 (0.018)	Loss 0.4875 (0.4849)	Prec@1 (78.994)
[200/391]	Time 0.052 (0.052)	Data 0.018 (0.018)	Loss 0.4470 (0.4892)	Prec@1 (78.813)
[240/391]	Time 0.051 (0.052)	Data 0.018 (0.018)	Loss 0.5300 (0.4914)	Prec@1 (78.686)
[280/391]	Time 0.052 (0.052)	Data 0.018 (0.018)	Loss 0.5513 (0.4932)	Prec@1 (78.581)
[320/391]	Time 0.054 (0.052)	Data 0.020 (0.018)	Loss 0.5799 (0.4951)	Prec@1 (78.548)
[360/391]	Time 0.054 (0.052)	Data 0.020 (0.018)	Loss 0.4121 (0.4921)	Prec@1 (78.653)
Testing:
Test: [0/79]	Time 0.019 (0.019)	Loss 0.6211 (0.6211)	Prec@1 77.344 (77.344)
Test: [40/79]	Time 0.018 (0.019)	Loss 0.7772 (0.6948)	Prec@1 71.875 (75.629)
 * ResNet8 Prec@1 75.500
Testing:
Test: [0/79]	Time 0.019 (0.019)	Loss 0.95

Testing:
Test: [0/79]	Time 0.017 (0.017)	Loss 0.6177 (0.6177)	Prec@1 77.344 (77.344)
Test: [40/79]	Time 0.016 (0.017)	Loss 0.7569 (0.6570)	Prec@1 72.656 (77.839)
 * ResNet8 Prec@1 77.660
Testing:
Test: [0/79]	Time 0.017 (0.017)	Loss 0.6799 (0.6799)	Prec@1 75.000 (75.000)
Test: [40/79]	Time 0.016 (0.017)	Loss 0.8109 (0.7387)	Prec@1 68.750 (75.057)
 * ResNet8 Prec@1 74.560

===> epoch: 41/200
Training:
[0/391]	Time 0.053 (0.053)	Data 0.019 (0.019)	Loss 0.7008 (0.7008)	Prec@1 (75.000)
[40/391]	Time 0.052 (0.053)	Data 0.018 (0.018)	Loss 0.4845 (0.4846)	Prec@1 (79.135)
[80/391]	Time 0.051 (0.052)	Data 0.018 (0.018)	Loss 0.4986 (0.4708)	Prec@1 (79.572)
[120/391]	Time 0.052 (0.052)	Data 0.018 (0.018)	Loss 0.4608 (0.4775)	Prec@1 (79.326)
[160/391]	Time 0.052 (0.052)	Data 0.018 (0.018)	Loss 0.4639 (0.4817)	Prec@1 (79.227)
[200/391]	Time 0.051 (0.052)	Data 0.018 (0.018)	Loss 0.5558 (0.4865)	Prec@1 (79.000)
[240/391]	Time 0.054 (0.052)	Data 0.018 (0.018)	Loss 0.4247 (0.4872)	Prec@1 (78.987)
[280/

[160/391]	Time 0.054 (0.053)	Data 0.018 (0.019)	Loss 0.5633 (0.4707)	Prec@1 (79.348)
[200/391]	Time 0.055 (0.054)	Data 0.020 (0.019)	Loss 0.5013 (0.4723)	Prec@1 (79.338)
[240/391]	Time 0.054 (0.054)	Data 0.019 (0.019)	Loss 0.5863 (0.4729)	Prec@1 (79.211)
[280/391]	Time 0.054 (0.054)	Data 0.019 (0.019)	Loss 0.4189 (0.4726)	Prec@1 (79.201)
[320/391]	Time 0.054 (0.054)	Data 0.020 (0.019)	Loss 0.5206 (0.4735)	Prec@1 (79.101)
[360/391]	Time 0.052 (0.054)	Data 0.018 (0.019)	Loss 0.4205 (0.4769)	Prec@1 (79.021)
Testing:
Test: [0/79]	Time 0.018 (0.018)	Loss 1.0556 (1.0556)	Prec@1 64.062 (64.062)
Test: [40/79]	Time 0.017 (0.017)	Loss 1.3620 (1.0740)	Prec@1 59.375 (67.283)
 * ResNet8 Prec@1 67.860
Testing:
Test: [0/79]	Time 0.017 (0.017)	Loss 0.8018 (0.8018)	Prec@1 70.312 (70.312)
Test: [40/79]	Time 0.017 (0.017)	Loss 0.8591 (0.7168)	Prec@1 73.438 (76.067)
 * ResNet8 Prec@1 75.950

===> epoch: 48/200
Training:
[0/391]	Time 0.058 (0.058)	Data 0.020 (0.020)	Loss 0.4701 (0.4701)	Prec@1 (78.906)
[40

Test: [40/79]	Time 0.021 (0.020)	Loss 0.9217 (0.7311)	Prec@1 67.188 (75.419)
 * ResNet8 Prec@1 75.370

===> epoch: 54/200
Training:
[0/391]	Time 0.078 (0.078)	Data 0.021 (0.021)	Loss 0.4063 (0.4063)	Prec@1 (82.031)
[40/391]	Time 0.099 (0.097)	Data 0.021 (0.020)	Loss 0.4645 (0.4512)	Prec@1 (79.954)
[80/391]	Time 0.093 (0.097)	Data 0.021 (0.020)	Loss 0.6479 (0.4630)	Prec@1 (79.581)
[120/391]	Time 0.105 (0.097)	Data 0.020 (0.020)	Loss 0.4622 (0.4670)	Prec@1 (79.571)
[160/391]	Time 0.102 (0.099)	Data 0.019 (0.020)	Loss 0.5196 (0.4643)	Prec@1 (79.615)
[200/391]	Time 0.103 (0.099)	Data 0.019 (0.020)	Loss 0.4456 (0.4672)	Prec@1 (79.458)
[240/391]	Time 0.103 (0.099)	Data 0.020 (0.020)	Loss 0.5352 (0.4704)	Prec@1 (79.360)
[280/391]	Time 0.098 (0.099)	Data 0.019 (0.020)	Loss 0.4596 (0.4730)	Prec@1 (79.268)
[320/391]	Time 0.106 (0.100)	Data 0.020 (0.020)	Loss 0.5447 (0.4745)	Prec@1 (79.274)
[360/391]	Time 0.105 (0.100)	Data 0.020 (0.020)	Loss 0.3760 (0.4725)	Prec@1 (79.298)
Testing:
Test: [0/79]	

[280/391]	Time 0.106 (0.099)	Data 0.020 (0.020)	Loss 0.5526 (0.4791)	Prec@1 (79.187)
[320/391]	Time 0.105 (0.099)	Data 0.020 (0.020)	Loss 0.6126 (0.4763)	Prec@1 (79.283)
[360/391]	Time 0.102 (0.099)	Data 0.020 (0.020)	Loss 0.5344 (0.4764)	Prec@1 (79.328)
Testing:
Test: [0/79]	Time 0.020 (0.020)	Loss 0.6672 (0.6672)	Prec@1 75.781 (75.781)
Test: [40/79]	Time 0.019 (0.023)	Loss 0.7282 (0.7018)	Prec@1 75.781 (75.648)
 * ResNet8 Prec@1 75.310
Testing:
Test: [0/79]	Time 0.019 (0.019)	Loss 0.6034 (0.6034)	Prec@1 78.125 (78.125)
Test: [40/79]	Time 0.018 (0.019)	Loss 0.8641 (0.7127)	Prec@1 71.094 (75.343)
 * ResNet8 Prec@1 75.010

===> epoch: 61/200
Training:
[0/391]	Time 0.095 (0.095)	Data 0.021 (0.021)	Loss 0.4832 (0.4832)	Prec@1 (81.250)
[40/391]	Time 0.105 (0.099)	Data 0.020 (0.020)	Loss 0.4784 (0.4402)	Prec@1 (80.507)
[80/391]	Time 0.104 (0.101)	Data 0.019 (0.020)	Loss 0.4139 (0.4541)	Prec@1 (79.832)
[120/391]	Time 0.106 (0.101)	Data 0.019 (0.020)	Loss 0.4311 (0.4571)	Prec@1 (79.752)
[160/

[40/391]	Time 0.095 (0.096)	Data 0.021 (0.021)	Loss 0.3759 (0.4806)	Prec@1 (78.944)
[80/391]	Time 0.098 (0.098)	Data 0.020 (0.021)	Loss 0.4288 (0.4758)	Prec@1 (79.340)
[120/391]	Time 0.096 (0.099)	Data 0.020 (0.021)	Loss 0.4495 (0.4793)	Prec@1 (79.171)
[160/391]	Time 0.094 (0.100)	Data 0.019 (0.021)	Loss 0.5079 (0.4795)	Prec@1 (79.139)
[200/391]	Time 0.095 (0.100)	Data 0.019 (0.020)	Loss 0.5335 (0.4708)	Prec@1 (79.454)
[240/391]	Time 0.092 (0.100)	Data 0.019 (0.020)	Loss 0.3867 (0.4666)	Prec@1 (79.606)
[280/391]	Time 0.095 (0.100)	Data 0.020 (0.020)	Loss 0.3930 (0.4670)	Prec@1 (79.523)
[320/391]	Time 0.073 (0.100)	Data 0.020 (0.020)	Loss 0.3514 (0.4653)	Prec@1 (79.634)
[360/391]	Time 0.105 (0.100)	Data 0.020 (0.020)	Loss 0.3731 (0.4607)	Prec@1 (79.770)
Testing:
Test: [0/79]	Time 0.021 (0.021)	Loss 0.7138 (0.7138)	Prec@1 75.781 (75.781)
Test: [40/79]	Time 0.019 (0.021)	Loss 0.8298 (0.7955)	Prec@1 71.875 (72.847)
 * ResNet8 Prec@1 72.830
Testing:
Test: [0/79]	Time 0.020 (0.020)	Loss 0.81

Testing:
Test: [0/79]	Time 0.022 (0.022)	Loss 0.5487 (0.5487)	Prec@1 82.031 (82.031)
Test: [40/79]	Time 0.021 (0.022)	Loss 0.8099 (0.6770)	Prec@1 75.000 (76.677)
 * ResNet8 Prec@1 76.940
Testing:
Test: [0/79]	Time 0.023 (0.023)	Loss 0.8119 (0.8119)	Prec@1 73.438 (73.438)
Test: [40/79]	Time 0.020 (0.022)	Loss 1.1849 (0.9477)	Prec@1 67.188 (69.741)
 * ResNet8 Prec@1 69.560

===> epoch: 74/200
Training:
[0/391]	Time 0.096 (0.096)	Data 0.023 (0.023)	Loss 0.5006 (0.5006)	Prec@1 (83.594)
[40/391]	Time 0.061 (0.070)	Data 0.020 (0.021)	Loss 0.4286 (0.4597)	Prec@1 (79.478)
[80/391]	Time 0.105 (0.079)	Data 0.021 (0.021)	Loss 0.4123 (0.4606)	Prec@1 (79.745)
[120/391]	Time 0.106 (0.087)	Data 0.021 (0.021)	Loss 0.5412 (0.4565)	Prec@1 (79.739)
[160/391]	Time 0.105 (0.091)	Data 0.020 (0.020)	Loss 0.4737 (0.4569)	Prec@1 (79.828)
[200/391]	Time 0.106 (0.093)	Data 0.020 (0.020)	Loss 0.5568 (0.4596)	Prec@1 (79.730)
[240/391]	Time 0.105 (0.094)	Data 0.019 (0.020)	Loss 0.4157 (0.4653)	Prec@1 (79.499)
[280/

[160/391]	Time 0.106 (0.102)	Data 0.019 (0.020)	Loss 0.5877 (0.4586)	Prec@1 (80.061)
[200/391]	Time 0.104 (0.102)	Data 0.020 (0.020)	Loss 0.4901 (0.4606)	Prec@1 (79.983)
[240/391]	Time 0.102 (0.102)	Data 0.019 (0.020)	Loss 0.4154 (0.4634)	Prec@1 (79.830)
[280/391]	Time 0.101 (0.102)	Data 0.021 (0.020)	Loss 0.5544 (0.4638)	Prec@1 (79.807)
[320/391]	Time 0.107 (0.102)	Data 0.020 (0.020)	Loss 0.3770 (0.4679)	Prec@1 (79.622)
[360/391]	Time 0.106 (0.102)	Data 0.020 (0.020)	Loss 0.4092 (0.4684)	Prec@1 (79.599)
Testing:
Test: [0/79]	Time 0.024 (0.024)	Loss 0.7390 (0.7390)	Prec@1 71.875 (71.875)
Test: [40/79]	Time 0.019 (0.021)	Loss 0.7265 (0.8635)	Prec@1 74.219 (71.608)
 * ResNet8 Prec@1 71.250
Testing:
Test: [0/79]	Time 0.024 (0.024)	Loss 0.6503 (0.6503)	Prec@1 79.688 (79.688)
Test: [40/79]	Time 0.019 (0.021)	Loss 0.7237 (0.7050)	Prec@1 75.781 (76.620)
 * ResNet8 Prec@1 76.340

===> epoch: 81/200
Training:
[0/391]	Time 0.098 (0.098)	Data 0.022 (0.022)	Loss 0.5560 (0.5560)	Prec@1 (77.344)
[40

Test: [40/79]	Time 0.022 (0.021)	Loss 0.9635 (0.8738)	Prec@1 67.969 (70.217)
 * ResNet8 Prec@1 70.350

===> epoch: 87/200
Training:
[0/391]	Time 0.100 (0.100)	Data 0.021 (0.021)	Loss 0.4194 (0.4194)	Prec@1 (78.906)
[40/391]	Time 0.099 (0.101)	Data 0.020 (0.020)	Loss 0.4860 (0.4532)	Prec@1 (79.726)
[80/391]	Time 0.099 (0.102)	Data 0.019 (0.020)	Loss 0.4276 (0.4564)	Prec@1 (79.620)
[120/391]	Time 0.098 (0.102)	Data 0.020 (0.020)	Loss 0.5045 (0.4506)	Prec@1 (79.946)
[160/391]	Time 0.101 (0.102)	Data 0.019 (0.020)	Loss 0.4739 (0.4511)	Prec@1 (80.047)
[200/391]	Time 0.100 (0.102)	Data 0.020 (0.020)	Loss 0.5148 (0.4586)	Prec@1 (79.684)
[240/391]	Time 0.102 (0.102)	Data 0.019 (0.020)	Loss 0.4182 (0.4590)	Prec@1 (79.619)
[280/391]	Time 0.098 (0.102)	Data 0.020 (0.020)	Loss 0.5193 (0.4606)	Prec@1 (79.554)
[320/391]	Time 0.100 (0.102)	Data 0.020 (0.020)	Loss 0.2888 (0.4599)	Prec@1 (79.605)
[360/391]	Time 0.096 (0.102)	Data 0.019 (0.020)	Loss 0.4771 (0.4626)	Prec@1 (79.530)
Testing:
Test: [0/79]	

[280/391]	Time 0.108 (0.102)	Data 0.022 (0.020)	Loss 0.5782 (0.4604)	Prec@1 (80.024)
[320/391]	Time 0.105 (0.102)	Data 0.021 (0.021)	Loss 0.5457 (0.4615)	Prec@1 (79.997)
[360/391]	Time 0.110 (0.103)	Data 0.021 (0.021)	Loss 0.4482 (0.4642)	Prec@1 (79.861)
Testing:
Test: [0/79]	Time 0.020 (0.020)	Loss 1.1355 (1.1355)	Prec@1 71.094 (71.094)
Test: [40/79]	Time 0.019 (0.022)	Loss 1.2049 (1.1710)	Prec@1 64.844 (64.882)
 * ResNet8 Prec@1 64.740
Testing:
Test: [0/79]	Time 0.020 (0.020)	Loss 0.8997 (0.8997)	Prec@1 70.312 (70.312)
Test: [40/79]	Time 0.020 (0.021)	Loss 1.0503 (0.9114)	Prec@1 70.312 (69.703)
 * ResNet8 Prec@1 69.830

===> epoch: 94/200
Training:
[0/391]	Time 0.099 (0.099)	Data 0.023 (0.023)	Loss 0.4706 (0.4706)	Prec@1 (76.562)
[40/391]	Time 0.098 (0.103)	Data 0.023 (0.021)	Loss 0.5093 (0.4409)	Prec@1 (80.545)
[80/391]	Time 0.100 (0.103)	Data 0.021 (0.021)	Loss 0.3525 (0.4509)	Prec@1 (80.257)
[120/391]	Time 0.098 (0.103)	Data 0.020 (0.021)	Loss 0.4110 (0.4557)	Prec@1 (80.198)
[160/

[40/391]	Time 0.103 (0.103)	Data 0.020 (0.020)	Loss 0.4802 (0.4120)	Prec@1 (81.460)
[80/391]	Time 0.105 (0.102)	Data 0.019 (0.020)	Loss 0.3067 (0.3748)	Prec@1 (83.102)
[120/391]	Time 0.105 (0.102)	Data 0.019 (0.020)	Loss 0.3424 (0.3596)	Prec@1 (83.684)
[160/391]	Time 0.104 (0.102)	Data 0.019 (0.020)	Loss 0.3303 (0.3510)	Prec@1 (83.967)
[200/391]	Time 0.101 (0.102)	Data 0.019 (0.019)	Loss 0.2846 (0.3453)	Prec@1 (84.188)
[240/391]	Time 0.104 (0.102)	Data 0.019 (0.019)	Loss 0.2618 (0.3403)	Prec@1 (84.317)
[280/391]	Time 0.105 (0.102)	Data 0.019 (0.019)	Loss 0.3348 (0.3373)	Prec@1 (84.456)
[320/391]	Time 0.101 (0.102)	Data 0.019 (0.019)	Loss 0.4798 (0.3334)	Prec@1 (84.545)
[360/391]	Time 0.101 (0.102)	Data 0.019 (0.019)	Loss 0.2116 (0.3290)	Prec@1 (84.708)
Testing:
Test: [0/79]	Time 0.019 (0.019)	Loss 0.3797 (0.3797)	Prec@1 84.375 (84.375)
Test: [40/79]	Time 0.018 (0.020)	Loss 0.4868 (0.4443)	Prec@1 82.812 (84.737)
 * ResNet8 Prec@1 85.040
Testing:
Test: [0/79]	Time 0.019 (0.019)	Loss 0.30

Testing:
Test: [0/79]	Time 0.018 (0.018)	Loss 0.3687 (0.3687)	Prec@1 88.281 (88.281)
Test: [40/79]	Time 0.018 (0.019)	Loss 0.5347 (0.4192)	Prec@1 83.594 (85.614)
 * ResNet8 Prec@1 85.920
Testing:
Test: [0/79]	Time 0.020 (0.020)	Loss 0.2937 (0.2937)	Prec@1 85.938 (85.938)
Test: [40/79]	Time 0.019 (0.020)	Loss 0.5628 (0.4247)	Prec@1 82.812 (85.671)
 * ResNet8 Prec@1 85.720

===> epoch: 107/200
Training:
[0/391]	Time 0.100 (0.100)	Data 0.020 (0.020)	Loss 0.2379 (0.2379)	Prec@1 (89.062)
[40/391]	Time 0.096 (0.101)	Data 0.019 (0.019)	Loss 0.4116 (0.2489)	Prec@1 (87.500)
[80/391]	Time 0.096 (0.101)	Data 0.019 (0.019)	Loss 0.1425 (0.2400)	Prec@1 (87.577)
[120/391]	Time 0.098 (0.101)	Data 0.019 (0.019)	Loss 0.1434 (0.2405)	Prec@1 (87.726)
[160/391]	Time 0.098 (0.102)	Data 0.019 (0.019)	Loss 0.3005 (0.2399)	Prec@1 (87.777)
[200/391]	Time 0.099 (0.102)	Data 0.019 (0.019)	Loss 0.3160 (0.2405)	Prec@1 (87.893)
[240/391]	Time 0.095 (0.101)	Data 0.019 (0.019)	Loss 0.1315 (0.2429)	Prec@1 (87.808)
[280

[160/391]	Time 0.098 (0.102)	Data 0.020 (0.020)	Loss 0.3127 (0.2247)	Prec@1 (88.276)
[200/391]	Time 0.103 (0.102)	Data 0.021 (0.021)	Loss 0.1663 (0.2279)	Prec@1 (88.145)
[240/391]	Time 0.105 (0.102)	Data 0.020 (0.021)	Loss 0.1324 (0.2287)	Prec@1 (88.090)
[280/391]	Time 0.107 (0.102)	Data 0.020 (0.021)	Loss 0.2115 (0.2265)	Prec@1 (88.162)
[320/391]	Time 0.095 (0.102)	Data 0.020 (0.021)	Loss 0.1953 (0.2283)	Prec@1 (88.108)
[360/391]	Time 0.104 (0.102)	Data 0.020 (0.021)	Loss 0.2380 (0.2304)	Prec@1 (88.015)
Testing:
Test: [0/79]	Time 0.021 (0.021)	Loss 0.3467 (0.3467)	Prec@1 89.844 (89.844)
Test: [40/79]	Time 0.020 (0.021)	Loss 0.4652 (0.4310)	Prec@1 82.812 (85.766)
 * ResNet8 Prec@1 85.760
Testing:
Test: [0/79]	Time 0.020 (0.020)	Loss 0.2608 (0.2608)	Prec@1 92.188 (92.188)
Test: [40/79]	Time 0.024 (0.021)	Loss 0.5703 (0.4158)	Prec@1 85.156 (86.338)
 * ResNet8 Prec@1 85.950

===> epoch: 114/200
Training:
[0/391]	Time 0.102 (0.102)	Data 0.022 (0.022)	Loss 0.1926 (0.1926)	Prec@1 (87.500)
[4

Test: [40/79]	Time 0.021 (0.021)	Loss 0.5698 (0.4211)	Prec@1 80.469 (86.204)
 * ResNet8 Prec@1 86.100

===> epoch: 120/200
Training:
[0/391]	Time 0.108 (0.108)	Data 0.022 (0.022)	Loss 0.2363 (0.2363)	Prec@1 (89.062)
[40/391]	Time 0.102 (0.101)	Data 0.020 (0.020)	Loss 0.1879 (0.2056)	Prec@1 (88.815)
[80/391]	Time 0.098 (0.102)	Data 0.020 (0.020)	Loss 0.3017 (0.2106)	Prec@1 (88.764)
[120/391]	Time 0.098 (0.102)	Data 0.020 (0.020)	Loss 0.1913 (0.2082)	Prec@1 (88.759)
[160/391]	Time 0.098 (0.102)	Data 0.019 (0.020)	Loss 0.2415 (0.2140)	Prec@1 (88.669)
[200/391]	Time 0.096 (0.102)	Data 0.019 (0.020)	Loss 0.1279 (0.2168)	Prec@1 (88.604)
[240/391]	Time 0.107 (0.102)	Data 0.021 (0.020)	Loss 0.2100 (0.2200)	Prec@1 (88.511)
[280/391]	Time 0.099 (0.102)	Data 0.020 (0.020)	Loss 0.2433 (0.2202)	Prec@1 (88.551)
[320/391]	Time 0.100 (0.102)	Data 0.020 (0.020)	Loss 0.3831 (0.2204)	Prec@1 (88.544)
[360/391]	Time 0.100 (0.102)	Data 0.020 (0.020)	Loss 0.2521 (0.2208)	Prec@1 (88.506)
Testing:
Test: [0/79]

[280/391]	Time 0.096 (0.103)	Data 0.021 (0.020)	Loss 0.1963 (0.2266)	Prec@1 (88.284)
[320/391]	Time 0.098 (0.103)	Data 0.021 (0.020)	Loss 0.1632 (0.2264)	Prec@1 (88.240)
[360/391]	Time 0.098 (0.103)	Data 0.020 (0.020)	Loss 0.1705 (0.2253)	Prec@1 (88.270)
Testing:
Test: [0/79]	Time 0.022 (0.022)	Loss 0.4076 (0.4076)	Prec@1 88.281 (88.281)
Test: [40/79]	Time 0.020 (0.021)	Loss 0.5610 (0.4835)	Prec@1 81.250 (84.032)
 * ResNet8 Prec@1 83.800
Testing:
Test: [0/79]	Time 0.019 (0.019)	Loss 0.3037 (0.3037)	Prec@1 89.062 (89.062)
Test: [40/79]	Time 0.024 (0.021)	Loss 0.5774 (0.4266)	Prec@1 82.812 (85.271)
 * ResNet8 Prec@1 85.370

===> epoch: 127/200
Training:
[0/391]	Time 0.096 (0.096)	Data 0.021 (0.021)	Loss 0.1531 (0.1531)	Prec@1 (92.188)
[40/391]	Time 0.098 (0.101)	Data 0.019 (0.020)	Loss 0.1677 (0.2334)	Prec@1 (87.862)
[80/391]	Time 0.098 (0.102)	Data 0.019 (0.020)	Loss 0.2538 (0.2292)	Prec@1 (88.108)
[120/391]	Time 0.101 (0.102)	Data 0.019 (0.020)	Loss 0.1260 (0.2286)	Prec@1 (88.236)
[160

[40/391]	Time 0.092 (0.100)	Data 0.020 (0.020)	Loss 0.1868 (0.2097)	Prec@1 (88.796)
[80/391]	Time 0.096 (0.100)	Data 0.020 (0.020)	Loss 0.3039 (0.2150)	Prec@1 (88.715)
[120/391]	Time 0.095 (0.100)	Data 0.019 (0.020)	Loss 0.2005 (0.2183)	Prec@1 (88.714)
[160/391]	Time 0.098 (0.100)	Data 0.020 (0.020)	Loss 0.2197 (0.2160)	Prec@1 (88.815)
[200/391]	Time 0.098 (0.101)	Data 0.021 (0.020)	Loss 0.2265 (0.2179)	Prec@1 (88.670)
[240/391]	Time 0.098 (0.101)	Data 0.020 (0.020)	Loss 0.2405 (0.2195)	Prec@1 (88.586)
[280/391]	Time 0.096 (0.102)	Data 0.020 (0.020)	Loss 0.3081 (0.2193)	Prec@1 (88.607)
[320/391]	Time 0.097 (0.102)	Data 0.020 (0.020)	Loss 0.2414 (0.2209)	Prec@1 (88.517)
[360/391]	Time 0.095 (0.102)	Data 0.019 (0.020)	Loss 0.1977 (0.2248)	Prec@1 (88.374)
Testing:
Test: [0/79]	Time 0.019 (0.019)	Loss 0.4737 (0.4737)	Prec@1 83.594 (83.594)
Test: [40/79]	Time 0.019 (0.021)	Loss 0.6716 (0.4875)	Prec@1 82.031 (84.070)
 * ResNet8 Prec@1 84.220
Testing:
Test: [0/79]	Time 0.019 (0.019)	Loss 0.42

Testing:
Test: [0/79]	Time 0.024 (0.024)	Loss 0.4069 (0.4069)	Prec@1 85.938 (85.938)
Test: [40/79]	Time 0.030 (0.020)	Loss 0.5407 (0.4791)	Prec@1 82.031 (83.975)
 * ResNet8 Prec@1 83.980
Testing:
Test: [0/79]	Time 0.021 (0.021)	Loss 0.3531 (0.3531)	Prec@1 86.719 (86.719)
Test: [40/79]	Time 0.018 (0.020)	Loss 0.5311 (0.4469)	Prec@1 83.594 (84.985)
 * ResNet8 Prec@1 84.710

===> epoch: 140/200
Training:
[0/391]	Time 0.090 (0.090)	Data 0.021 (0.021)	Loss 0.2487 (0.2487)	Prec@1 (86.719)
[40/391]	Time 0.097 (0.102)	Data 0.020 (0.020)	Loss 0.1791 (0.1990)	Prec@1 (89.005)
[80/391]	Time 0.098 (0.102)	Data 0.020 (0.020)	Loss 0.1721 (0.2027)	Prec@1 (89.053)
[120/391]	Time 0.098 (0.103)	Data 0.025 (0.021)	Loss 0.2832 (0.2094)	Prec@1 (88.759)
[160/391]	Time 0.098 (0.103)	Data 0.020 (0.021)	Loss 0.2515 (0.2149)	Prec@1 (88.548)
[200/391]	Time 0.096 (0.102)	Data 0.020 (0.021)	Loss 0.3034 (0.2210)	Prec@1 (88.351)
[240/391]	Time 0.078 (0.102)	Data 0.019 (0.021)	Loss 0.2250 (0.2248)	Prec@1 (88.203)
[280

[160/391]	Time 0.092 (0.102)	Data 0.020 (0.021)	Loss 0.1323 (0.2162)	Prec@1 (88.762)
[200/391]	Time 0.106 (0.102)	Data 0.020 (0.020)	Loss 0.2000 (0.2194)	Prec@1 (88.639)
[240/391]	Time 0.106 (0.102)	Data 0.020 (0.020)	Loss 0.2912 (0.2194)	Prec@1 (88.618)
[280/391]	Time 0.106 (0.102)	Data 0.020 (0.020)	Loss 0.3999 (0.2245)	Prec@1 (88.367)
[320/391]	Time 0.106 (0.102)	Data 0.019 (0.020)	Loss 0.2965 (0.2270)	Prec@1 (88.267)
[360/391]	Time 0.106 (0.102)	Data 0.020 (0.020)	Loss 0.3172 (0.2270)	Prec@1 (88.240)
Testing:
Test: [0/79]	Time 0.020 (0.020)	Loss 0.3772 (0.3772)	Prec@1 84.375 (84.375)
Test: [40/79]	Time 0.019 (0.020)	Loss 0.5143 (0.4439)	Prec@1 82.031 (85.213)
 * ResNet8 Prec@1 85.130
Testing:
Test: [0/79]	Time 0.019 (0.019)	Loss 0.3340 (0.3340)	Prec@1 88.281 (88.281)
Test: [40/79]	Time 0.023 (0.020)	Loss 0.5031 (0.4603)	Prec@1 85.938 (84.184)
 * ResNet8 Prec@1 84.570

===> epoch: 147/200
Training:
[0/391]	Time 0.104 (0.104)	Data 0.021 (0.021)	Loss 0.1539 (0.1539)	Prec@1 (92.969)
[4

Test: [40/79]	Time 0.020 (0.021)	Loss 0.4769 (0.3679)	Prec@1 82.031 (87.595)
 * ResNet8 Prec@1 87.700

===> epoch: 153/200
Training:
[0/391]	Time 0.093 (0.093)	Data 0.024 (0.024)	Loss 0.1044 (0.1044)	Prec@1 (92.969)
[40/391]	Time 0.096 (0.100)	Data 0.020 (0.021)	Loss 0.0858 (0.1371)	Prec@1 (91.711)
[80/391]	Time 0.106 (0.101)	Data 0.020 (0.020)	Loss 0.1114 (0.1367)	Prec@1 (91.676)
[120/391]	Time 0.105 (0.102)	Data 0.020 (0.021)	Loss 0.1493 (0.1407)	Prec@1 (91.432)
[160/391]	Time 0.102 (0.102)	Data 0.020 (0.021)	Loss 0.0846 (0.1430)	Prec@1 (91.338)
[200/391]	Time 0.111 (0.102)	Data 0.034 (0.021)	Loss 0.1330 (0.1444)	Prec@1 (91.317)
[240/391]	Time 0.104 (0.102)	Data 0.023 (0.021)	Loss 0.0645 (0.1444)	Prec@1 (91.410)
[280/391]	Time 0.083 (0.102)	Data 0.020 (0.021)	Loss 0.1812 (0.1444)	Prec@1 (91.426)
[320/391]	Time 0.073 (0.100)	Data 0.020 (0.021)	Loss 0.1531 (0.1446)	Prec@1 (91.443)
[360/391]	Time 0.066 (0.097)	Data 0.021 (0.021)	Loss 0.1146 (0.1455)	Prec@1 (91.419)
Testing:
Test: [0/79]

[280/391]	Time 0.106 (0.102)	Data 0.022 (0.020)	Loss 0.1384 (0.1323)	Prec@1 (91.993)
[320/391]	Time 0.101 (0.102)	Data 0.020 (0.020)	Loss 0.0470 (0.1320)	Prec@1 (91.973)
[360/391]	Time 0.106 (0.103)	Data 0.020 (0.020)	Loss 0.1459 (0.1332)	Prec@1 (91.947)
Testing:
Test: [0/79]	Time 0.020 (0.020)	Loss 0.3205 (0.3205)	Prec@1 89.844 (89.844)
Test: [40/79]	Time 0.024 (0.020)	Loss 0.4276 (0.3620)	Prec@1 87.500 (88.167)
 * ResNet8 Prec@1 88.130
Testing:
Test: [0/79]	Time 0.019 (0.019)	Loss 0.2434 (0.2434)	Prec@1 90.625 (90.625)
Test: [40/79]	Time 0.019 (0.019)	Loss 0.4884 (0.3712)	Prec@1 83.594 (87.633)
 * ResNet8 Prec@1 87.770

===> epoch: 160/200
Training:
[0/391]	Time 0.103 (0.103)	Data 0.021 (0.021)	Loss 0.2663 (0.2663)	Prec@1 (85.938)
[40/391]	Time 0.104 (0.103)	Data 0.020 (0.021)	Loss 0.1633 (0.1466)	Prec@1 (91.006)
[80/391]	Time 0.104 (0.103)	Data 0.020 (0.020)	Loss 0.1179 (0.1412)	Prec@1 (91.358)
[120/391]	Time 0.108 (0.103)	Data 0.023 (0.020)	Loss 0.2189 (0.1340)	Prec@1 (91.677)
[160

[40/391]	Time 0.106 (0.102)	Data 0.020 (0.020)	Loss 0.0771 (0.1225)	Prec@1 (92.340)
[80/391]	Time 0.103 (0.102)	Data 0.020 (0.020)	Loss 0.0659 (0.1329)	Prec@1 (91.763)
[120/391]	Time 0.106 (0.102)	Data 0.020 (0.020)	Loss 0.1935 (0.1301)	Prec@1 (91.910)
[160/391]	Time 0.105 (0.102)	Data 0.019 (0.020)	Loss 0.1245 (0.1292)	Prec@1 (91.867)
[200/391]	Time 0.105 (0.102)	Data 0.019 (0.020)	Loss 0.0133 (0.1284)	Prec@1 (91.908)
[240/391]	Time 0.096 (0.102)	Data 0.020 (0.020)	Loss 0.0587 (0.1286)	Prec@1 (91.918)
[280/391]	Time 0.103 (0.102)	Data 0.020 (0.020)	Loss 0.1186 (0.1290)	Prec@1 (91.932)
[320/391]	Time 0.107 (0.102)	Data 0.020 (0.020)	Loss 0.0842 (0.1282)	Prec@1 (92.012)
[360/391]	Time 0.101 (0.102)	Data 0.021 (0.020)	Loss 0.1046 (0.1272)	Prec@1 (92.036)
Testing:
Test: [0/79]	Time 0.021 (0.021)	Loss 0.2915 (0.2915)	Prec@1 91.406 (91.406)
Test: [40/79]	Time 0.020 (0.022)	Loss 0.4659 (0.3666)	Prec@1 87.500 (88.338)
 * ResNet8 Prec@1 88.280
Testing:
Test: [0/79]	Time 0.021 (0.021)	Loss 0.25

Testing:
Test: [0/79]	Time 0.021 (0.021)	Loss 0.3121 (0.3121)	Prec@1 90.625 (90.625)
Test: [40/79]	Time 0.021 (0.021)	Loss 0.4276 (0.3653)	Prec@1 87.500 (88.434)
 * ResNet8 Prec@1 88.140
Testing:
Test: [0/79]	Time 0.022 (0.022)	Loss 0.2364 (0.2364)	Prec@1 91.406 (91.406)
Test: [40/79]	Time 0.019 (0.020)	Loss 0.4932 (0.3691)	Prec@1 83.594 (87.843)
 * ResNet8 Prec@1 87.760

===> epoch: 173/200
Training:
[0/391]	Time 0.105 (0.105)	Data 0.022 (0.022)	Loss 0.1112 (0.1112)	Prec@1 (92.969)
[40/391]	Time 0.106 (0.103)	Data 0.020 (0.020)	Loss 0.0891 (0.1185)	Prec@1 (92.245)
[80/391]	Time 0.103 (0.102)	Data 0.020 (0.020)	Loss 0.0830 (0.1222)	Prec@1 (91.917)
[120/391]	Time 0.103 (0.102)	Data 0.020 (0.020)	Loss 0.1237 (0.1220)	Prec@1 (92.000)
[160/391]	Time 0.106 (0.102)	Data 0.020 (0.020)	Loss 0.1321 (0.1224)	Prec@1 (92.047)
[200/391]	Time 0.104 (0.103)	Data 0.020 (0.020)	Loss 0.0771 (0.1234)	Prec@1 (92.020)
[240/391]	Time 0.106 (0.103)	Data 0.019 (0.020)	Loss 0.0663 (0.1257)	Prec@1 (91.941)
[280

[160/391]	Time 0.097 (0.103)	Data 0.019 (0.020)	Loss 0.0273 (0.1216)	Prec@1 (91.979)
[200/391]	Time 0.110 (0.103)	Data 0.026 (0.020)	Loss 0.0643 (0.1181)	Prec@1 (92.125)
[240/391]	Time 0.099 (0.103)	Data 0.023 (0.020)	Loss 0.0684 (0.1193)	Prec@1 (92.132)
[280/391]	Time 0.099 (0.103)	Data 0.021 (0.020)	Loss 0.1663 (0.1190)	Prec@1 (92.157)
[320/391]	Time 0.098 (0.103)	Data 0.020 (0.020)	Loss 0.0865 (0.1189)	Prec@1 (92.146)
[360/391]	Time 0.097 (0.103)	Data 0.020 (0.020)	Loss 0.1215 (0.1194)	Prec@1 (92.133)
Testing:
Test: [0/79]	Time 0.025 (0.025)	Loss 0.3333 (0.3333)	Prec@1 89.062 (89.062)
Test: [40/79]	Time 0.021 (0.022)	Loss 0.4253 (0.3651)	Prec@1 85.938 (88.453)
 * ResNet8 Prec@1 88.470
Testing:
Test: [0/79]	Time 0.020 (0.020)	Loss 0.2458 (0.2458)	Prec@1 89.844 (89.844)
Test: [40/79]	Time 0.020 (0.021)	Loss 0.4438 (0.3658)	Prec@1 85.938 (87.729)
 * ResNet8 Prec@1 87.750

===> epoch: 180/200
Training:
[0/391]	Time 0.106 (0.106)	Data 0.021 (0.021)	Loss 0.1574 (0.1574)	Prec@1 (91.406)
[4

Test: [40/79]	Time 0.020 (0.020)	Loss 0.4775 (0.3779)	Prec@1 84.375 (87.367)
 * ResNet8 Prec@1 87.610

===> epoch: 186/200
Training:
[0/391]	Time 0.104 (0.104)	Data 0.022 (0.022)	Loss 0.1847 (0.1847)	Prec@1 (90.625)
[40/391]	Time 0.104 (0.099)	Data 0.020 (0.020)	Loss 0.1176 (0.1155)	Prec@1 (92.454)
[80/391]	Time 0.103 (0.100)	Data 0.021 (0.020)	Loss 0.1361 (0.1187)	Prec@1 (92.139)
[120/391]	Time 0.121 (0.101)	Data 0.021 (0.021)	Loss 0.0959 (0.1154)	Prec@1 (92.284)
[160/391]	Time 0.106 (0.102)	Data 0.020 (0.020)	Loss 0.2145 (0.1143)	Prec@1 (92.357)
[200/391]	Time 0.084 (0.100)	Data 0.021 (0.020)	Loss 0.0375 (0.1147)	Prec@1 (92.339)
[240/391]	Time 0.073 (0.097)	Data 0.020 (0.020)	Loss 0.1116 (0.1156)	Prec@1 (92.317)
[280/391]	Time 0.098 (0.093)	Data 0.020 (0.020)	Loss 0.0384 (0.1161)	Prec@1 (92.249)
[320/391]	Time 0.098 (0.095)	Data 0.021 (0.020)	Loss 0.1712 (0.1159)	Prec@1 (92.229)
[360/391]	Time 0.097 (0.095)	Data 0.020 (0.020)	Loss 0.1369 (0.1165)	Prec@1 (92.205)
Testing:
Test: [0/79]

[280/391]	Time 0.105 (0.101)	Data 0.019 (0.019)	Loss 0.1036 (0.1117)	Prec@1 (92.499)
[320/391]	Time 0.106 (0.101)	Data 0.019 (0.019)	Loss 0.0722 (0.1132)	Prec@1 (92.475)
[360/391]	Time 0.104 (0.101)	Data 0.019 (0.019)	Loss 0.1047 (0.1120)	Prec@1 (92.532)
Testing:
Test: [0/79]	Time 0.025 (0.025)	Loss 0.3663 (0.3663)	Prec@1 85.938 (85.938)
Test: [40/79]	Time 0.019 (0.020)	Loss 0.4589 (0.3804)	Prec@1 86.719 (87.729)
 * ResNet8 Prec@1 87.670
Testing:
Test: [0/79]	Time 0.020 (0.020)	Loss 0.2798 (0.2798)	Prec@1 89.062 (89.062)
Test: [40/79]	Time 0.020 (0.020)	Loss 0.4406 (0.3775)	Prec@1 84.375 (87.195)
 * ResNet8 Prec@1 87.370

===> epoch: 193/200
Training:
[0/391]	Time 0.078 (0.078)	Data 0.020 (0.020)	Loss 0.1086 (0.1086)	Prec@1 (91.406)
[40/391]	Time 0.095 (0.102)	Data 0.019 (0.020)	Loss 0.1427 (0.1061)	Prec@1 (92.721)
[80/391]	Time 0.096 (0.102)	Data 0.019 (0.019)	Loss 0.0929 (0.1040)	Prec@1 (92.901)
[120/391]	Time 0.096 (0.102)	Data 0.022 (0.019)	Loss 0.1618 (0.1049)	Prec@1 (92.891)
[160

[40/391]	Time 0.103 (0.102)	Data 0.020 (0.021)	Loss 0.1630 (0.0995)	Prec@1 (93.083)
[80/391]	Time 0.106 (0.103)	Data 0.019 (0.021)	Loss 0.0759 (0.1056)	Prec@1 (92.747)
[120/391]	Time 0.104 (0.101)	Data 0.019 (0.021)	Loss 0.0842 (0.1098)	Prec@1 (92.549)
[160/391]	Time 0.098 (0.101)	Data 0.019 (0.020)	Loss 0.0700 (0.1095)	Prec@1 (92.615)
[200/391]	Time 0.110 (0.102)	Data 0.024 (0.020)	Loss 0.0946 (0.1099)	Prec@1 (92.642)
[240/391]	Time 0.104 (0.102)	Data 0.019 (0.021)	Loss 0.1288 (0.1088)	Prec@1 (92.706)
[280/391]	Time 0.106 (0.102)	Data 0.022 (0.021)	Loss 0.1814 (0.1096)	Prec@1 (92.657)
[320/391]	Time 0.105 (0.102)	Data 0.020 (0.020)	Loss 0.0850 (0.1088)	Prec@1 (92.669)
[360/391]	Time 0.095 (0.102)	Data 0.019 (0.021)	Loss 0.0896 (0.1095)	Prec@1 (92.661)
Testing:
Test: [0/79]	Time 0.139 (0.139)	Loss 0.3883 (0.3883)	Prec@1 86.719 (86.719)
Test: [40/79]	Time 0.020 (0.029)	Loss 0.4719 (0.3747)	Prec@1 87.500 (87.843)
 * ResNet8 Prec@1 87.940
Testing:
Test: [0/79]	Time 0.029 (0.029)	Loss 0.28