In [None]:
import os
import sys
import torch
import argparse
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import cv2
import numpy as np
import PIL
from PIL import Image
import time
import logging
import argparse
from network import ShuffleNetV1
from utils import accuracy, AvgrageMeter, CrossEntropyLabelSmooth, save_checkpoint, get_lastest_model, get_parameters

class OpencvResize(object):

    def __init__(self, size=256):
        self.size = size

    def __call__(self, img):
        assert isinstance(img, PIL.Image.Image)
        img = np.asarray(img) # (H,W,3) RGB
        img = img[:,:,::-1] # 2 BGR
        img = np.ascontiguousarray(img)
        H, W, _ = img.shape
        target_size = (int(self.size/H * W + 0.5), self.size) if H < W else (self.size, int(self.size/W * H + 0.5))
        img = cv2.resize(img, target_size, interpolation=cv2.INTER_LINEAR)
        img = img[:,:,::-1] # 2 RGB
        img = np.ascontiguousarray(img)
        img = Image.fromarray(img)
        return img

class ToBGRTensor(object):

    def __call__(self, img):
        assert isinstance(img, (np.ndarray, PIL.Image.Image))
        if isinstance(img, PIL.Image.Image):
            img = np.asarray(img)
        img = img[:,:,::-1] # 2 BGR
        img = np.transpose(img, [2, 0, 1]) # 2 (3, H, W)
        img = np.ascontiguousarray(img)
        img = torch.from_numpy(img).float()
        return img

class DataIterator(object):

    def __init__(self, dataloader):
        self.dataloader = dataloader
        self.iterator = enumerate(self.dataloader)

    def next(self):
        try:
            _, data = next(self.iterator)
        except Exception:
            self.iterator = enumerate(self.dataloader)
            _, data = next(self.iterator)
        return data[0], data[1]

def get_args():
    parser = argparse.ArgumentParser("ShuffleNetV1")
    parser.add_argument('--eval', default=False, action='store_true')
    parser.add_argument('--eval-resume', type=str, default='./snet_detnas.pkl', help='path for eval model')
    parser.add_argument('--batch-size', type=int, default=256, help='batch size')
    parser.add_argument('--total-iters', type=int, default=300000, help='total iters')
    parser.add_argument('--learning-rate', type=float, default=0.5, help='init learning rate')
    parser.add_argument('--momentum', type=float, default=0.9, help='momentum')
    parser.add_argument('--weight-decay', type=float, default=4e-5, help='weight decay')
    parser.add_argument('--save', type=str, default='./models', help='path for saving trained models')
    parser.add_argument('--label-smooth', type=float, default=0.1, help='label smoothing')


    parser.add_argument('--auto-continue', type=bool, default=True, help='auto continue')
    parser.add_argument('--display-interval', type=int, default=20, help='display interval')
    parser.add_argument('--val-interval', type=int, default=10000, help='val interval')
    parser.add_argument('--save-interval', type=int, default=10000, help='save interval')


    parser.add_argument('--group', type=int, default=3, help='group number')
    parser.add_argument('--model-size', type=str, default='1.0x', choices=['0.5x', '1.0x', '1.5x', '2.0x'], help='size of the model')

    parser.add_argument('--train-dir', type=str, default='/home/nscc-gz-01/djs_FBIwarning/ImageNet/raw-data/train', help='path to training dataset')
    parser.add_argument('--val-dir', type=str, default='/home/nscc-gz-01/djs_FBIwarning/ImageNet/raw-data/val', help='path to validation dataset')

    args = parser.parse_known_args()[0]#parser.parse_args()
    return args

def main():
    args = get_args()

    # Log
    log_format = '[%(asctime)s] %(message)s'
    logging.basicConfig(stream=sys.stdout, level=logging.INFO,
        format=log_format, datefmt='%d %I:%M:%S')
    t = time.time()
    local_time = time.localtime(t)
    if not os.path.exists('./log'):
        os.mkdir('./log')
    fh = logging.FileHandler(os.path.join('log/train-{}{:02}{}'.format(local_time.tm_year % 2000, local_time.tm_mon, t)))
    fh.setFormatter(logging.Formatter(log_format))
    logging.getLogger().addHandler(fh)

    use_gpu = False
    if torch.cuda.is_available():
        use_gpu = True
        print('Use GPU')

    assert os.path.exists(args.train_dir)
    train_dataset = datasets.ImageFolder(
        args.train_dir,
        transforms.Compose([
            transforms.RandomResizedCrop(224),
            transforms.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4),
            transforms.RandomHorizontalFlip(0.5),
            ToBGRTensor(),
        ])
    )
    train_loader = torch.utils.data.DataLoader(
        train_dataset, batch_size=args.batch_size, shuffle=True,
        num_workers=10, pin_memory=use_gpu)
    train_dataprovider = DataIterator(train_loader)

    assert os.path.exists(args.val_dir)
    val_loader = torch.utils.data.DataLoader(
        datasets.ImageFolder(args.val_dir, transforms.Compose([
            OpencvResize(256),
            transforms.CenterCrop(224),
            ToBGRTensor(),
        ])),
        batch_size=200, shuffle=False,
        num_workers=1, pin_memory=use_gpu
    )
    val_dataprovider = DataIterator(val_loader)
    print('load data successfully')

    model = ShuffleNetV1(group=args.group, model_size=args.model_size)

    optimizer = torch.optim.SGD(get_parameters(model),
                                lr=args.learning_rate,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)
    criterion_smooth = CrossEntropyLabelSmooth(1000, 0.1)

    if use_gpu:
        #model = nn.DataParallel(model)
        loss_function = criterion_smooth.cuda()
        device = torch.device("cuda:0")
    else:
        loss_function = criterion_smooth
        device = torch.device("cpu")

    scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer,
                    lambda step : (1.0-step/args.total_iters) if step <= args.total_iters else 0, last_epoch=-1)

    model = model.to(device)

    all_iters = 0
    if args.auto_continue:
        lastest_model, iters = get_lastest_model()
        if lastest_model is not None:
            all_iters = iters
            checkpoint = torch.load(lastest_model, map_location=None if use_gpu else 'cpu')
            model.load_state_dict(checkpoint['state_dict'], strict=True)
            print('load from checkpoint')
            for i in range(iters):
                scheduler.step()

    args.optimizer = optimizer
    args.loss_function = loss_function
    args.scheduler = scheduler
    args.train_dataprovider = train_dataprovider
    args.val_dataprovider = val_dataprovider

    if args.eval:
        if args.eval_resume is not None:
            checkpoint = torch.load(args.eval_resume, map_location=None if use_gpu else 'cpu')
            load_checkpoint(model, checkpoint)
            validate(model, device, args, all_iters=all_iters)
        exit(0)

    while all_iters < args.total_iters:
        all_iters = train(model, device, args, val_interval=args.val_interval, bn_process=False, all_iters=all_iters)
        validate(model, device, args, all_iters=all_iters)
    all_iters = train(model, device, args, val_interval=int(1280000/args.batch_size), bn_process=True, all_iters=all_iters)
    validate(model, device, args, all_iters=all_iters)
    save_checkpoint({'state_dict': model.state_dict(),}, args.total_iters, tag='bnps-')
    torch.save(model.state_dict(), 'model.mdl')

def adjust_bn_momentum(model, iters):
    for m in model.modules():
        if isinstance(m, nn.BatchNorm2d):
            m.momentum = 1 / iters

def train(model, device, args, *, val_interval, bn_process=False, all_iters=None):

    optimizer = args.optimizer
    loss_function = args.loss_function
    scheduler = args.scheduler
    train_dataprovider = args.train_dataprovider

    t1 = time.time()
    Top1_err, Top5_err = 0.0, 0.0
    model.train()
    for iters in range(1, val_interval + 1):
        scheduler.step()
        if bn_process:
            adjust_bn_momentum(model, iters)

        all_iters += 1
        d_st = time.time()
        data, target = train_dataprovider.next()
        target = target.type(torch.LongTensor)
        data, target = data.to(device), target.to(device)
        data_time = time.time() - d_st

        output = model(data)
        loss = loss_function(output, target)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        prec1, prec5 = accuracy(output, target, topk=(1, 5))

        Top1_err += 1 - prec1.item() / 100
        Top5_err += 1 - prec5.item() / 100

        if all_iters % args.display_interval == 0:
            printInfo = 'TRAIN Iter {}: lr = {:.6f},\tloss = {:.6f},\t'.format(all_iters, scheduler.get_lr()[0], loss.item()) + \
                        'Top-1 err = {:.6f},\t'.format(Top1_err / args.display_interval) + \
                        'Top-5 err = {:.6f},\t'.format(Top5_err / args.display_interval) + \
                        'data_time = {:.6f},\ttrain_time = {:.6f}'.format(data_time, (time.time() - t1) / args.display_interval)
            logging.info(printInfo)
            t1 = time.time()
            Top1_err, Top5_err = 0.0, 0.0

        if all_iters % args.save_interval == 0:
            save_checkpoint({
                'state_dict': model.state_dict(),
                }, all_iters)

    return all_iters

def validate(model, device, args, *, all_iters=None):
    objs = AvgrageMeter()
    top1 = AvgrageMeter()
    top5 = AvgrageMeter()

    loss_function = args.loss_function
    val_dataprovider = args.val_dataprovider

    model.eval()
    max_val_iters = 250
    t1  = time.time()
    with torch.no_grad():
        for _ in range(1, max_val_iters + 1):
            data, target = val_dataprovider.next()
            target = target.type(torch.LongTensor)
            data, target = data.to(device), target.to(device)

            output = model(data)
            loss = loss_function(output, target)

            prec1, prec5 = accuracy(output, target, topk=(1, 5))
            n = data.size(0)
            objs.update(loss.item(), n)
            top1.update(prec1.item(), n)
            top5.update(prec5.item(), n)

    logInfo = 'TEST Iter {}: loss = {:.6f},\t'.format(all_iters, objs.avg) + \
              'Top-1 err = {:.6f},\t'.format(1 - top1.avg / 100) + \
              'Top-5 err = {:.6f},\t'.format(1 - top5.avg / 100) + \
              'val_time = {:.6f}'.format(time.time() - t1)
    logging.info(logInfo)

def load_checkpoint(net, checkpoint):
    from collections import OrderedDict

    temp = OrderedDict()
    if 'state_dict' in checkpoint:
        checkpoint = dict(checkpoint['state_dict'])
    for k in checkpoint:
        k2 = 'module.'+k if not k.startswith('module.') else k
        temp[k2] = checkpoint[k]

    net.load_state_dict(temp, strict=True)

if __name__ == "__main__":
    main()



Use GPU
load data successfully
model size is  1.0x




[10 10:13:16] TRAIN Iter 20: lr = 0.499967,	loss = 8.389388,	Top-1 err = 0.999023,	Top-5 err = 0.993945,	data_time = 0.013552,	train_time = 0.347356
[10 10:13:20] TRAIN Iter 40: lr = 0.499933,	loss = 6.979179,	Top-1 err = 0.998633,	Top-5 err = 0.995313,	data_time = 0.013407,	train_time = 0.182419
[10 10:13:25] TRAIN Iter 60: lr = 0.499900,	loss = 6.913871,	Top-1 err = 0.999609,	Top-5 err = 0.997266,	data_time = 0.013429,	train_time = 0.242323
[10 10:13:30] TRAIN Iter 80: lr = 0.499867,	loss = 6.908343,	Top-1 err = 0.999219,	Top-5 err = 0.996289,	data_time = 0.013489,	train_time = 0.253130
[10 10:13:35] TRAIN Iter 100: lr = 0.499833,	loss = 6.907571,	Top-1 err = 0.997852,	Top-5 err = 0.992578,	data_time = 0.013411,	train_time = 0.245951
[10 10:13:40] TRAIN Iter 120: lr = 0.499800,	loss = 6.906489,	Top-1 err = 0.997266,	Top-5 err = 0.992188,	data_time = 0.013494,	train_time = 0.246264
[10 10:13:45] TRAIN Iter 140: lr = 0.499767,	loss = 6.906039,	Top-1 err = 0.998633,	Top-5 err = 0.994336



[10 10:13:59] TRAIN Iter 200: lr = 0.499667,	loss = 6.886800,	Top-1 err = 0.999219,	Top-5 err = 0.991602,	data_time = 0.013413,	train_time = 0.246444
[10 10:14:05] TRAIN Iter 220: lr = 0.499633,	loss = 6.977248,	Top-1 err = 0.999219,	Top-5 err = 0.994141,	data_time = 0.013407,	train_time = 0.262337
[10 10:14:10] TRAIN Iter 240: lr = 0.499600,	loss = 6.879823,	Top-1 err = 0.999023,	Top-5 err = 0.993164,	data_time = 0.013402,	train_time = 0.266848
[10 10:14:15] TRAIN Iter 260: lr = 0.499567,	loss = 6.911314,	Top-1 err = 0.998047,	Top-5 err = 0.991602,	data_time = 0.013420,	train_time = 0.263815
[10 10:14:20] TRAIN Iter 280: lr = 0.499533,	loss = 6.895028,	Top-1 err = 0.998828,	Top-5 err = 0.994727,	data_time = 0.013391,	train_time = 0.242642
[10 10:14:25] TRAIN Iter 300: lr = 0.499500,	loss = 6.903356,	Top-1 err = 0.997852,	Top-5 err = 0.991992,	data_time = 0.013380,	train_time = 0.230207
[10 10:14:30] TRAIN Iter 320: lr = 0.499467,	loss = 6.893116,	Top-1 err = 0.998047,	Top-5 err = 0.99

[10 10:18:29] TRAIN Iter 1300: lr = 0.497833,	loss = 6.841410,	Top-1 err = 0.998047,	Top-5 err = 0.991016,	data_time = 0.013439,	train_time = 0.254627
[10 10:18:34] TRAIN Iter 1320: lr = 0.497800,	loss = 6.852023,	Top-1 err = 0.998242,	Top-5 err = 0.991602,	data_time = 0.013431,	train_time = 0.236542
[10 10:18:39] TRAIN Iter 1340: lr = 0.497767,	loss = 6.848925,	Top-1 err = 0.998047,	Top-5 err = 0.991016,	data_time = 0.013399,	train_time = 0.252087
[10 10:18:44] TRAIN Iter 1360: lr = 0.497733,	loss = 6.844750,	Top-1 err = 0.998828,	Top-5 err = 0.992578,	data_time = 0.013442,	train_time = 0.241077
[10 10:18:49] TRAIN Iter 1380: lr = 0.497700,	loss = 6.823412,	Top-1 err = 0.999023,	Top-5 err = 0.993164,	data_time = 0.013438,	train_time = 0.236885
[10 10:18:53] TRAIN Iter 1400: lr = 0.497667,	loss = 6.860370,	Top-1 err = 0.998047,	Top-5 err = 0.993359,	data_time = 0.013426,	train_time = 0.234845
[10 10:18:59] TRAIN Iter 1420: lr = 0.497633,	loss = 6.834309,	Top-1 err = 0.997461,	Top-5 err

[10 10:23:00] TRAIN Iter 2400: lr = 0.496000,	loss = 6.508101,	Top-1 err = 0.992383,	Top-5 err = 0.968945,	data_time = 0.013469,	train_time = 0.267707
[10 10:23:05] TRAIN Iter 2420: lr = 0.495967,	loss = 6.499168,	Top-1 err = 0.993359,	Top-5 err = 0.967578,	data_time = 0.013426,	train_time = 0.246189
[10 10:23:09] TRAIN Iter 2440: lr = 0.495933,	loss = 6.533131,	Top-1 err = 0.992578,	Top-5 err = 0.976758,	data_time = 0.013323,	train_time = 0.230553
[10 10:23:14] TRAIN Iter 2460: lr = 0.495900,	loss = 6.527143,	Top-1 err = 0.991211,	Top-5 err = 0.970117,	data_time = 0.013419,	train_time = 0.248527
[10 10:23:19] TRAIN Iter 2480: lr = 0.495867,	loss = 6.529927,	Top-1 err = 0.993555,	Top-5 err = 0.973242,	data_time = 0.021620,	train_time = 0.237815
[10 10:23:24] TRAIN Iter 2500: lr = 0.495833,	loss = 6.544895,	Top-1 err = 0.993555,	Top-5 err = 0.970703,	data_time = 0.013592,	train_time = 0.252203
[10 10:23:29] TRAIN Iter 2520: lr = 0.495800,	loss = 6.540330,	Top-1 err = 0.992969,	Top-5 err

[10 10:27:31] TRAIN Iter 3500: lr = 0.494167,	loss = 6.280019,	Top-1 err = 0.977734,	Top-5 err = 0.926758,	data_time = 0.013419,	train_time = 0.243552
[10 10:27:36] TRAIN Iter 3520: lr = 0.494133,	loss = 6.146085,	Top-1 err = 0.984961,	Top-5 err = 0.929102,	data_time = 0.013428,	train_time = 0.249108
[10 10:27:41] TRAIN Iter 3540: lr = 0.494100,	loss = 6.194787,	Top-1 err = 0.981445,	Top-5 err = 0.929883,	data_time = 0.013399,	train_time = 0.243656
[10 10:27:46] TRAIN Iter 3560: lr = 0.494067,	loss = 6.271719,	Top-1 err = 0.980078,	Top-5 err = 0.923828,	data_time = 0.013439,	train_time = 0.240938
[10 10:27:51] TRAIN Iter 3580: lr = 0.494033,	loss = 6.110240,	Top-1 err = 0.980664,	Top-5 err = 0.922070,	data_time = 0.013408,	train_time = 0.245639
[10 10:27:56] TRAIN Iter 3600: lr = 0.494000,	loss = 6.146304,	Top-1 err = 0.976953,	Top-5 err = 0.914648,	data_time = 0.013414,	train_time = 0.253052
[10 10:28:01] TRAIN Iter 3620: lr = 0.493967,	loss = 6.103838,	Top-1 err = 0.978516,	Top-5 err

[10 10:32:07] TRAIN Iter 4600: lr = 0.492333,	loss = 5.813489,	Top-1 err = 0.958398,	Top-5 err = 0.863672,	data_time = 0.020256,	train_time = 0.255149
[10 10:32:12] TRAIN Iter 4620: lr = 0.492300,	loss = 5.736100,	Top-1 err = 0.955859,	Top-5 err = 0.854297,	data_time = 0.019942,	train_time = 0.276299
[10 10:32:17] TRAIN Iter 4640: lr = 0.492267,	loss = 5.776900,	Top-1 err = 0.956055,	Top-5 err = 0.864648,	data_time = 0.020180,	train_time = 0.240962
[10 10:32:22] TRAIN Iter 4660: lr = 0.492233,	loss = 5.782756,	Top-1 err = 0.955078,	Top-5 err = 0.860156,	data_time = 0.020119,	train_time = 0.251131
[10 10:32:27] TRAIN Iter 4680: lr = 0.492200,	loss = 5.837934,	Top-1 err = 0.959961,	Top-5 err = 0.860742,	data_time = 0.020392,	train_time = 0.253750
[10 10:32:32] TRAIN Iter 4700: lr = 0.492167,	loss = 5.810463,	Top-1 err = 0.956641,	Top-5 err = 0.852930,	data_time = 0.020821,	train_time = 0.250084
[10 10:32:37] TRAIN Iter 4720: lr = 0.492133,	loss = 5.716032,	Top-1 err = 0.958203,	Top-5 err

[10 10:36:38] TRAIN Iter 5700: lr = 0.490500,	loss = 5.509480,	Top-1 err = 0.924805,	Top-5 err = 0.803125,	data_time = 0.013738,	train_time = 0.320014
[10 10:36:43] TRAIN Iter 5720: lr = 0.490467,	loss = 5.530572,	Top-1 err = 0.924609,	Top-5 err = 0.788477,	data_time = 0.013472,	train_time = 0.261667
[10 10:36:48] TRAIN Iter 5740: lr = 0.490433,	loss = 5.445359,	Top-1 err = 0.932031,	Top-5 err = 0.797656,	data_time = 0.019653,	train_time = 0.240515
[10 10:36:53] TRAIN Iter 5760: lr = 0.490400,	loss = 5.682154,	Top-1 err = 0.925977,	Top-5 err = 0.797461,	data_time = 0.019252,	train_time = 0.219418
[10 10:36:58] TRAIN Iter 5780: lr = 0.490367,	loss = 5.497582,	Top-1 err = 0.926172,	Top-5 err = 0.797656,	data_time = 0.020476,	train_time = 0.251473
[10 10:37:03] TRAIN Iter 5800: lr = 0.490333,	loss = 5.445938,	Top-1 err = 0.925391,	Top-5 err = 0.794141,	data_time = 0.020165,	train_time = 0.247932
[10 10:37:07] TRAIN Iter 5820: lr = 0.490300,	loss = 5.507077,	Top-1 err = 0.924219,	Top-5 err



[10 10:39:18] TRAIN Iter 6340: lr = 0.489433,	loss = 5.419553,	Top-1 err = 0.908008,	Top-5 err = 0.761914,	data_time = 0.624231,	train_time = 0.245738
[10 10:39:22] TRAIN Iter 6360: lr = 0.489400,	loss = 5.389239,	Top-1 err = 0.908984,	Top-5 err = 0.763477,	data_time = 0.571707,	train_time = 0.236484
[10 10:39:27] TRAIN Iter 6380: lr = 0.489367,	loss = 5.320107,	Top-1 err = 0.913477,	Top-5 err = 0.765820,	data_time = 0.534848,	train_time = 0.236322
[10 10:39:32] TRAIN Iter 6400: lr = 0.489333,	loss = 5.432893,	Top-1 err = 0.911133,	Top-5 err = 0.771289,	data_time = 0.013707,	train_time = 0.238219
[10 10:39:37] TRAIN Iter 6420: lr = 0.489300,	loss = 5.341533,	Top-1 err = 0.916797,	Top-5 err = 0.762500,	data_time = 0.013758,	train_time = 0.253561
[10 10:39:42] TRAIN Iter 6440: lr = 0.489267,	loss = 5.440566,	Top-1 err = 0.908008,	Top-5 err = 0.756836,	data_time = 0.013769,	train_time = 0.269207
[10 10:39:47] TRAIN Iter 6460: lr = 0.489233,	loss = 5.310203,	Top-1 err = 0.907617,	Top-5 err

[10 10:43:54] TRAIN Iter 7440: lr = 0.487600,	loss = 4.995624,	Top-1 err = 0.880078,	Top-5 err = 0.713086,	data_time = 0.952450,	train_time = 0.317245
[10 10:43:59] TRAIN Iter 7460: lr = 0.487567,	loss = 5.229794,	Top-1 err = 0.875391,	Top-5 err = 0.707812,	data_time = 0.979948,	train_time = 0.246016
[10 10:44:04] TRAIN Iter 7480: lr = 0.487533,	loss = 5.155582,	Top-1 err = 0.874609,	Top-5 err = 0.713672,	data_time = 0.662591,	train_time = 0.252833
[10 10:44:09] TRAIN Iter 7500: lr = 0.487500,	loss = 5.163158,	Top-1 err = 0.882031,	Top-5 err = 0.703516,	data_time = 0.522101,	train_time = 0.249415
[10 10:44:15] TRAIN Iter 7520: lr = 0.487467,	loss = 5.078206,	Top-1 err = 0.876953,	Top-5 err = 0.701367,	data_time = 0.854011,	train_time = 0.265748
[10 10:44:20] TRAIN Iter 7540: lr = 0.487433,	loss = 5.240768,	Top-1 err = 0.867773,	Top-5 err = 0.697070,	data_time = 0.652657,	train_time = 0.260383
[10 10:44:25] TRAIN Iter 7560: lr = 0.487400,	loss = 4.992420,	Top-1 err = 0.881055,	Top-5 err

[10 10:48:30] TRAIN Iter 8540: lr = 0.485767,	loss = 4.962435,	Top-1 err = 0.846680,	Top-5 err = 0.657227,	data_time = 0.013671,	train_time = 0.241132
[10 10:48:35] TRAIN Iter 8560: lr = 0.485733,	loss = 4.855653,	Top-1 err = 0.847461,	Top-5 err = 0.658008,	data_time = 0.013697,	train_time = 0.244639
[10 10:48:40] TRAIN Iter 8580: lr = 0.485700,	loss = 5.032201,	Top-1 err = 0.852930,	Top-5 err = 0.657031,	data_time = 0.013607,	train_time = 0.246281
[10 10:48:45] TRAIN Iter 8600: lr = 0.485667,	loss = 4.860071,	Top-1 err = 0.850586,	Top-5 err = 0.654297,	data_time = 0.020502,	train_time = 0.242546
[10 10:48:50] TRAIN Iter 8620: lr = 0.485633,	loss = 4.807752,	Top-1 err = 0.841211,	Top-5 err = 0.653516,	data_time = 0.020572,	train_time = 0.228937
[10 10:48:54] TRAIN Iter 8640: lr = 0.485600,	loss = 4.820318,	Top-1 err = 0.842578,	Top-5 err = 0.649609,	data_time = 0.019851,	train_time = 0.244510
[10 10:49:00] TRAIN Iter 8660: lr = 0.485567,	loss = 5.027102,	Top-1 err = 0.849805,	Top-5 err

[10 10:53:56] TRAIN Iter 9640: lr = 0.483933,	loss = 4.719443,	Top-1 err = 0.823438,	Top-5 err = 0.610547,	data_time = 0.021041,	train_time = 0.328104
[10 10:54:03] TRAIN Iter 9660: lr = 0.483900,	loss = 4.659409,	Top-1 err = 0.816797,	Top-5 err = 0.611719,	data_time = 0.021137,	train_time = 0.325885
[10 10:54:10] TRAIN Iter 9680: lr = 0.483867,	loss = 4.768734,	Top-1 err = 0.808789,	Top-5 err = 0.596289,	data_time = 0.021286,	train_time = 0.348348
[10 10:54:17] TRAIN Iter 9700: lr = 0.483833,	loss = 5.006783,	Top-1 err = 0.817773,	Top-5 err = 0.607422,	data_time = 0.070879,	train_time = 0.344731
[10 10:54:23] TRAIN Iter 9720: lr = 0.483800,	loss = 4.603710,	Top-1 err = 0.808789,	Top-5 err = 0.602148,	data_time = 0.021735,	train_time = 0.329361
[10 10:54:30] TRAIN Iter 9740: lr = 0.483767,	loss = 4.880537,	Top-1 err = 0.820117,	Top-5 err = 0.616211,	data_time = 0.021053,	train_time = 0.353595
[10 10:54:37] TRAIN Iter 9760: lr = 0.483733,	loss = 4.780190,	Top-1 err = 0.816602,	Top-5 err

[10 11:09:48] TRAIN Iter 10720: lr = 0.482133,	loss = 4.710303,	Top-1 err = 0.794141,	Top-5 err = 0.583789,	data_time = 0.020680,	train_time = 0.302707
[10 11:09:55] TRAIN Iter 10740: lr = 0.482100,	loss = 4.518855,	Top-1 err = 0.795703,	Top-5 err = 0.578320,	data_time = 0.020168,	train_time = 0.328845
[10 11:10:01] TRAIN Iter 10760: lr = 0.482067,	loss = 4.660834,	Top-1 err = 0.791797,	Top-5 err = 0.576172,	data_time = 0.013918,	train_time = 0.323375
[10 11:10:08] TRAIN Iter 10780: lr = 0.482033,	loss = 4.406877,	Top-1 err = 0.794922,	Top-5 err = 0.573242,	data_time = 0.014008,	train_time = 0.354900




[10 11:10:15] TRAIN Iter 10800: lr = 0.482000,	loss = 4.733978,	Top-1 err = 0.787891,	Top-5 err = 0.573828,	data_time = 0.013812,	train_time = 0.324828
[10 11:10:21] TRAIN Iter 10820: lr = 0.481967,	loss = 4.530263,	Top-1 err = 0.792578,	Top-5 err = 0.569531,	data_time = 0.013742,	train_time = 0.318973
[10 11:10:28] TRAIN Iter 10840: lr = 0.481933,	loss = 4.575728,	Top-1 err = 0.792969,	Top-5 err = 0.577734,	data_time = 0.020700,	train_time = 0.362526
[10 11:10:35] TRAIN Iter 10860: lr = 0.481900,	loss = 4.609301,	Top-1 err = 0.804297,	Top-5 err = 0.580664,	data_time = 0.013739,	train_time = 0.334173
[10 11:10:42] TRAIN Iter 10880: lr = 0.481867,	loss = 4.597936,	Top-1 err = 0.788672,	Top-5 err = 0.571289,	data_time = 0.013817,	train_time = 0.362110
[10 11:10:50] TRAIN Iter 10900: lr = 0.481833,	loss = 4.515524,	Top-1 err = 0.781250,	Top-5 err = 0.559961,	data_time = 0.023637,	train_time = 0.364618
[10 11:10:57] TRAIN Iter 10920: lr = 0.481800,	loss = 4.557311,	Top-1 err = 0.788281,	To

[10 11:16:21] TRAIN Iter 11880: lr = 0.480200,	loss = 4.612550,	Top-1 err = 0.779297,	Top-5 err = 0.557813,	data_time = 0.013759,	train_time = 0.353357
[10 11:16:27] TRAIN Iter 11900: lr = 0.480167,	loss = 4.297983,	Top-1 err = 0.770312,	Top-5 err = 0.543555,	data_time = 0.023743,	train_time = 0.308376
[10 11:16:34] TRAIN Iter 11920: lr = 0.480133,	loss = 4.482680,	Top-1 err = 0.780664,	Top-5 err = 0.552539,	data_time = 0.021013,	train_time = 0.355102
[10 11:16:41] TRAIN Iter 11940: lr = 0.480100,	loss = 4.683535,	Top-1 err = 0.774805,	Top-5 err = 0.554102,	data_time = 0.013784,	train_time = 0.343366
[10 11:16:48] TRAIN Iter 11960: lr = 0.480067,	loss = 4.474582,	Top-1 err = 0.767773,	Top-5 err = 0.547852,	data_time = 0.019317,	train_time = 0.323836
[10 11:16:54] TRAIN Iter 11980: lr = 0.480033,	loss = 4.422687,	Top-1 err = 0.771094,	Top-5 err = 0.539453,	data_time = 0.020677,	train_time = 0.303128
[10 11:17:00] TRAIN Iter 12000: lr = 0.480000,	loss = 4.479550,	Top-1 err = 0.768750,	To

[10 11:22:23] TRAIN Iter 12960: lr = 0.478400,	loss = 4.316003,	Top-1 err = 0.760742,	Top-5 err = 0.535352,	data_time = 0.020732,	train_time = 0.344952
[10 11:22:29] TRAIN Iter 12980: lr = 0.478367,	loss = 4.306535,	Top-1 err = 0.753320,	Top-5 err = 0.525195,	data_time = 0.021247,	train_time = 0.308231
[10 11:22:36] TRAIN Iter 13000: lr = 0.478333,	loss = 4.479472,	Top-1 err = 0.750781,	Top-5 err = 0.527148,	data_time = 0.014033,	train_time = 0.360022
[10 11:22:43] TRAIN Iter 13020: lr = 0.478300,	loss = 4.378498,	Top-1 err = 0.760742,	Top-5 err = 0.532031,	data_time = 0.027698,	train_time = 0.339672
[10 11:22:50] TRAIN Iter 13040: lr = 0.478267,	loss = 4.425576,	Top-1 err = 0.755273,	Top-5 err = 0.531445,	data_time = 0.020267,	train_time = 0.336436
[10 11:22:57] TRAIN Iter 13060: lr = 0.478233,	loss = 4.077955,	Top-1 err = 0.751563,	Top-5 err = 0.530469,	data_time = 0.013891,	train_time = 0.335583
[10 11:23:03] TRAIN Iter 13080: lr = 0.478200,	loss = 4.298357,	Top-1 err = 0.757031,	To

[10 11:28:27] TRAIN Iter 14040: lr = 0.476600,	loss = 4.460930,	Top-1 err = 0.743945,	Top-5 err = 0.508398,	data_time = 0.013726,	train_time = 0.343997
[10 11:28:35] TRAIN Iter 14060: lr = 0.476567,	loss = 4.578115,	Top-1 err = 0.740820,	Top-5 err = 0.516602,	data_time = 0.014434,	train_time = 0.363585
[10 11:28:41] TRAIN Iter 14080: lr = 0.476533,	loss = 4.338161,	Top-1 err = 0.736133,	Top-5 err = 0.501172,	data_time = 0.021897,	train_time = 0.304428
[10 11:28:48] TRAIN Iter 14100: lr = 0.476500,	loss = 4.189087,	Top-1 err = 0.741016,	Top-5 err = 0.496680,	data_time = 0.021383,	train_time = 0.342795
[10 11:28:54] TRAIN Iter 14120: lr = 0.476467,	loss = 4.349909,	Top-1 err = 0.746680,	Top-5 err = 0.507617,	data_time = 0.021341,	train_time = 0.336649
[10 11:29:02] TRAIN Iter 14140: lr = 0.476433,	loss = 4.071629,	Top-1 err = 0.735547,	Top-5 err = 0.498437,	data_time = 0.020858,	train_time = 0.363832
[10 11:29:09] TRAIN Iter 14160: lr = 0.476400,	loss = 4.401771,	Top-1 err = 0.730273,	To