In [1]:
import os
import random
import shutil
import time
import warnings

import torch
import torch.nn as nn
import torch.backends.cudnn as cudnn
import torch.optim
import torch.cuda.amp as amp

import torch.utils.data
import torchvision
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import torchvision.models as models

#importing distributed and weights and biases
import torch.distributed as dist
import wandb

In [2]:
wandb.init(project='Homework9-take2', entity='malachyiii')

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


[34m[1mwandb[0m: Currently logged in as: [33mmalachyiii[0m (use `wandb login --relogin` to force relogin)



CondaEnvException: Unable to determine environment

Please re-run this command with one of the following options:

* Provide an environment name via --name or -n
* Re-run this command inside an activated conda environment.



In [3]:
print(torch.cuda.is_available())
print(torch.cuda.current_device())
torch.cuda.device(0)
print(torch.cuda.device_count())
print(torch.cuda.get_device_name(0))
torch.cuda.empty_cache()

True
0
1
Tesla T4


In [4]:
SEED=1
random.seed(SEED)
torch.manual_seed(SEED)
cudnn.deterministic = True

In [5]:
#Trying resnet34 because it is fast
ARCH = torchvision.models.resnet34(pretrained=False)
START_EPOCH = 0
EPOCHS = 3

#The following parameters based on the paper at https://arxiv.org/pdf/1512.03385.pdf
TRAIN_BATCH=256
VAL_BATCH=256
MOMENTUM = 0.9
WEIGHT_DECAY = 1e-4
LR = 0.01
IMG_SIZE = 224

#Logging wandb config

wandb.config = {
  "learning_rate": LR,
  "epochs": EPOCHS,
  "batch_size": TRAIN_BATCH,
  "momentum": MOMENTUM,
  "weight_decay": WEIGHT_DECAY,
  "image_size": IMG_SIZE
}

PRINT_FREQ = 50
WORKERS=2

TRAINDIR="/data/train"
VALDIR="/data/val"

In [6]:
GPU = torch.cuda.current_device()
torch.cuda.device(GPU)
cudnn.benchmark = True

In [7]:
if not torch.cuda.is_available():
    print('GPU not detected.. did you pass through your GPU?')

In [8]:
#Setting up the cluster and the world
WORLD_SIZE = 2
BACKEND = 'nccl'
URL = 'tcp://35.163.183.149:5000'
RANK = 1

dist.init_process_group(backend = BACKEND, init_method= URL,
                                world_size= WORLD_SIZE, rank=RANK)

In [9]:
imagenet_mean_RGB = [0.47889522, 0.47227842, 0.43047404]
imagenet_std_RGB = [0.229, 0.224, 0.225]

In [10]:
transform_train = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    # transformations based on https://arxiv.org/pdf/1512.03385.pdf
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(imagenet_mean_RGB, imagenet_std_RGB),
])

transform_val = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.ToTensor(),
    transforms.Normalize(imagenet_mean_RGB, imagenet_std_RGB),
])

train_dataset = torchvision.datasets.ImageFolder('/data/train', transform=transform_train)
val_dataset = torchvision.datasets.ImageFolder('/data/val', transform=transform_val)

In [11]:
#Setting up the train loader with a distributed Sampler
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=TRAIN_BATCH, 
                              shuffle=False, num_workers=WORKERS, pin_memory=True, 
                                           sampler=torch.utils.data.distributed.DistributedSampler(train_dataset))
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=VAL_BATCH, 
                              shuffle=False, num_workers=WORKERS, sampler=None)

In [12]:
def train(train_loader, model, criterion, optimizer, epoch):
    batch_time = AverageMeter('Time', ':6.3f')
    data_time = AverageMeter('Data', ':6.3f')
    losses = AverageMeter('Loss', ':.4e')
    top1 = AverageMeter('Acc@1', ':6.2f')
    top5 = AverageMeter('Acc@5', ':6.2f')
    progress = ProgressMeter(
        len(train_loader),
        [batch_time, data_time, losses, top1, top5],
        prefix="Epoch: [{}]".format(epoch))
    
    ######################
    # switch model to train mode here
    model.train()
    ################

    end = time.time()
    for i, (images, target) in enumerate(train_loader):
        # measure data loading time
        data_time.update(time.time() - end)

        #####################
        # send the images to cuda device
        if GPU is not None:
            images, target = images.cuda(GPU, non_blocking = True), target.cuda(GPU, non_blocking = True)
        # send the target to cuda device

        
        ####Utilizing PyTorch native AMP####
        with amp.autocast():
            # compute output
            output = model(images)
            # compute loss 
            loss = criterion(output, target)
        
        
        # measure accuracy and record loss
        acc1, acc5 = accuracy(output, target, topk=(1, 5))
        losses.update(loss.item(), images.size(0))
        top1.update(acc1[0], images.size(0))
        top5.update(acc5[0], images.size(0))
        
        #Logging the metrics
        wandb.log({"loss": loss,
                   "acc1": acc1,
                   "acc5": acc5})
        
        wandb.watch(model)

        # compute gradient and do SGD step
        
        #### zero out gradients in the optimier
        optimizer.zero_grad()
        
        ## backprop!
        scaler.scale(loss).backward()
        
        # update the weights!
        scaler.step(optimizer)
        scaler.update()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if i % PRINT_FREQ == 0:
            progress.display(i)

In [13]:
def validate(val_loader, model, criterion):
    batch_time = AverageMeter('Time', ':6.3f')
    losses = AverageMeter('Loss', ':.4e')
    top1 = AverageMeter('Acc@1', ':6.2f')
    top5 = AverageMeter('Acc@5', ':6.2f')
    progress = ProgressMeter(
        len(val_loader),
        [batch_time, losses, top1, top5],
        prefix='Test: ')

    # switch to evaluate mode
    model.eval()

    with torch.no_grad():
        end = time.time()
        for i, (images, target) in enumerate(val_loader):
            
            
            ### send the images and target to cuda
            images, target = images.to(GPU), target.to(GPU)

            # compute output
            output = model(images)

            # compute loss
            loss = criterion(output, target)


            # measure accuracy and record loss
            acc1, acc5 = accuracy(output, target, topk=(1, 5))
            losses.update(loss.item(), images.size(0))
            top1.update(acc1[0], images.size(0))
            top5.update(acc5[0], images.size(0))

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

            if i % PRINT_FREQ == 0:
                progress.display(i)

        # TODO: this should also be done with the ProgressMeter
        print(' * Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}'
              .format(top1=top1, top5=top5))

    return top1.avg

In [14]:
def save_checkpoint(state, is_best, filename='checkpoint.pth.tar'):
    # save the model state!
    torch.save(state, filename) 
    if is_best:
        shutil.copyfile(filename, 'model_best.pth.tar')

In [15]:
class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self, name, fmt=':f'):
        self.name = name
        self.fmt = fmt
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

    def __str__(self):
        fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})'
        return fmtstr.format(**self.__dict__)

In [16]:
class ProgressMeter(object):
    def __init__(self, num_batches, meters, prefix=""):
        self.batch_fmtstr = self._get_batch_fmtstr(num_batches)
        self.meters = meters
        self.prefix = prefix

    def display(self, batch):
        entries = [self.prefix + self.batch_fmtstr.format(batch)]
        entries += [str(meter) for meter in self.meters]
        print('\t'.join(entries))

    def _get_batch_fmtstr(self, num_batches):
        num_digits = len(str(num_batches // 1))
        fmt = '{:' + str(num_digits) + 'd}'
        return '[' + fmt + '/' + fmt.format(num_batches) + ']'

In [17]:
def accuracy(output, target, topk=(1,)):
    """Computes the accuracy over the k top predictions for the specified values of k"""
    with torch.no_grad():
        maxk = max(topk)
        batch_size = target.size(0)

        _, pred = output.topk(maxk, 1, True, True)
        pred = pred.t()
        correct = pred.eq(target.view(1, -1).expand_as(pred))

        res = []
        for k in topk:
            correct_k = correct[:k].reshape(-1).float().sum(0, keepdim=True)
            res.append(correct_k.mul_(100.0 / batch_size))
        return res

In [18]:
def adjust_learning_rate(optimizer, epoch):
    """Sets the learning rate to the initial LR decayed by 10 every 1/3 of the epochs"""
    lr = LR * (0.1 ** (epoch // int(round(EPOCHS/3, 0))))
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr

In [19]:
normalize = transforms.Normalize(mean=imagenet_mean_RGB, std=imagenet_std_RGB)

In [20]:
model = ARCH
model.cuda(GPU)
model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[GPU])

In [21]:
criterion = nn.CrossEntropyLoss().cuda(GPU)
optimizer = torch.optim.SGD(params = model.parameters(), lr=LR, momentum=MOMENTUM, weight_decay=WEIGHT_DECAY)
#Based on the same paper, https://arxiv.org/pdf/1512.03385.pdf
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[2])
scaler = amp.GradScaler()

In [22]:
best_acc1 = 0

In [23]:
#Initiating weights and biases
wandb.watch(model)

for epoch in range(START_EPOCH, EPOCHS):
    #adjust_learning_rate(optimizer, epoch)
    
    # train for one epoch
    train(train_loader, model, criterion, optimizer, epoch)

    # evaluate on validation set
    acc1 = validate(val_loader, model, criterion)

    # remember best acc@1 and save checkpoint
    is_best = acc1 > best_acc1
    best_acc1 = max(acc1, best_acc1)


    save_checkpoint({
        'epoch': epoch + 1,
        'arch': ARCH,
        'state_dict': model.state_dict(),
        'best_acc1': best_acc1,
        'optimizer' : optimizer.state_dict(),
    }, is_best)
    
    scheduler.step(acc1)
    
    print('lr: ' + str(scheduler.get_last_lr()))

Epoch: [0][   0/2503]	Time  9.983 ( 9.983)	Data  2.257 ( 2.257)	Loss 7.0629e+00 (7.0629e+00)	Acc@1   0.00 (  0.00)	Acc@5   1.17 (  1.17)


Epoch: [0][  50/2503]	Time  1.506 ( 1.192)	Data  0.366 ( 0.423)	Loss 6.9042e+00 (6.9521e+00)	Acc@1   0.39 (  0.15)	Acc@5   1.17 (  0.68)


Epoch: [0][ 100/2503]	Time  1.179 ( 1.120)	Data  0.273 ( 0.436)	Loss 6.8957e+00 (6.9120e+00)	Acc@1   0.00 (  0.18)	Acc@5   0.00 (  0.82)


Epoch: [0][ 150/2503]	Time  1.474 ( 1.099)	Data  0.954 ( 0.444)	Loss 6.7762e+00 (6.8758e+00)	Acc@1   0.39 (  0.23)	Acc@5   2.34 (  1.05)


Epoch: [0][ 200/2503]	Time  1.615 ( 1.079)	Data  1.092 ( 0.453)	Loss 6.7043e+00 (6.8398e+00)	Acc@1   0.39 (  0.32)	Acc@5   1.56 (  1.35)


Epoch: [0][ 250/2503]	Time  1.477 ( 1.069)	Data  0.957 ( 0.457)	Loss 6.5989e+00 (6.7921e+00)	Acc@1   0.00 (  0.42)	Acc@5   1.56 (  1.67)


Epoch: [0][ 300/2503]	Time  1.028 ( 1.068)	Data  0.061 ( 0.452)	Loss 6.4721e+00 (6.7414e+00)	Acc@1   0.39 (  0.50)	Acc@5   3.52 (  2.00)


Epoch: [0][ 350/2503]	Time  1.365 ( 1.062)	Data  0.823 ( 0.449)	Loss 6.4464e+00 (6.6900e+00)	Acc@1   0.39 (  0.60)	Acc@5   2.73 (  2.34)


Epoch: [0][ 400/2503]	Time  1.539 ( 1.059)	Data  1.006 ( 0.450)	Loss 6.0390e+00 (6.6365e+00)	Acc@1   2.73 (  0.68)	Acc@5   8.98 (  2.66)


Epoch: [0][ 450/2503]	Time  1.470 ( 1.056)	Data  0.929 ( 0.452)	Loss 6.2054e+00 (6.5872e+00)	Acc@1   1.56 (  0.79)	Acc@5   4.69 (  3.01)


Epoch: [0][ 500/2503]	Time  1.353 ( 1.053)	Data  0.693 ( 0.450)	Loss 6.1336e+00 (6.5376e+00)	Acc@1   1.17 (  0.91)	Acc@5   5.47 (  3.42)


Epoch: [0][ 550/2503]	Time  1.393 ( 1.053)	Data  0.844 ( 0.449)	Loss 6.0744e+00 (6.4900e+00)	Acc@1   2.34 (  1.04)	Acc@5   8.20 (  3.86)


Epoch: [0][ 600/2503]	Time  1.628 ( 1.051)	Data  0.938 ( 0.447)	Loss 5.8102e+00 (6.4442e+00)	Acc@1   3.91 (  1.18)	Acc@5   9.77 (  4.28)


Epoch: [0][ 650/2503]	Time  1.648 ( 1.050)	Data  0.505 ( 0.445)	Loss 5.7644e+00 (6.3985e+00)	Acc@1   3.91 (  1.33)	Acc@5  12.11 (  4.75)


Epoch: [0][ 700/2503]	Time  1.424 ( 1.048)	Data  0.886 ( 0.443)	Loss 5.7927e+00 (6.3507e+00)	Acc@1   3.91 (  1.51)	Acc@5  12.89 (  5.23)


Epoch: [0][ 750/2503]	Time  1.580 ( 1.047)	Data  0.693 ( 0.440)	Loss 5.5420e+00 (6.3067e+00)	Acc@1   2.34 (  1.64)	Acc@5  14.06 (  5.66)


Epoch: [0][ 800/2503]	Time  1.442 ( 1.046)	Data  0.909 ( 0.441)	Loss 5.6841e+00 (6.2647e+00)	Acc@1   3.12 (  1.80)	Acc@5  10.55 (  6.09)


Epoch: [0][ 850/2503]	Time  1.496 ( 1.044)	Data  0.841 ( 0.442)	Loss 5.4517e+00 (6.2214e+00)	Acc@1   6.64 (  1.96)	Acc@5  18.36 (  6.56)


Epoch: [0][ 900/2503]	Time  0.907 ( 1.044)	Data  0.095 ( 0.439)	Loss 5.4248e+00 (6.1811e+00)	Acc@1   5.08 (  2.12)	Acc@5  15.23 (  7.02)


Epoch: [0][ 950/2503]	Time  1.162 ( 1.047)	Data  0.435 ( 0.430)	Loss 5.4214e+00 (6.1403e+00)	Acc@1   7.42 (  2.29)	Acc@5  14.45 (  7.50)


Epoch: [0][1000/2503]	Time  1.373 ( 1.046)	Data  0.580 ( 0.430)	Loss 5.3106e+00 (6.1012e+00)	Acc@1   5.08 (  2.46)	Acc@5  18.36 (  7.97)


Epoch: [0][1050/2503]	Time  1.741 ( 1.051)	Data  0.787 ( 0.425)	Loss 5.4156e+00 (6.0626e+00)	Acc@1   5.08 (  2.62)	Acc@5  16.41 (  8.42)


wandb: ERROR Summary data exceeds maximum size of 10.4MB. Dropping it.


Epoch: [0][1100/2503]	Time  1.610 ( 1.059)	Data  0.639 ( 0.421)	Loss 5.2899e+00 (6.0240e+00)	Acc@1   8.98 (  2.81)	Acc@5  20.70 (  8.91)


Epoch: [0][1150/2503]	Time  1.771 ( 1.067)	Data  0.738 ( 0.419)	Loss 5.2346e+00 (5.9876e+00)	Acc@1   6.64 (  2.99)	Acc@5  16.41 (  9.34)


Epoch: [0][1200/2503]	Time  1.596 ( 1.078)	Data  0.000 ( 0.412)	Loss 4.9963e+00 (5.9515e+00)	Acc@1   7.42 (  3.18)	Acc@5  22.66 (  9.81)


Epoch: [0][1250/2503]	Time  1.652 ( 1.089)	Data  0.000 ( 0.404)	Loss 5.1819e+00 (5.9162e+00)	Acc@1   5.47 (  3.36)	Acc@5  17.97 ( 10.28)


Epoch: [0][1300/2503]	Time  1.942 ( 1.100)	Data  0.000 ( 0.397)	Loss 4.8728e+00 (5.8811e+00)	Acc@1   8.59 (  3.55)	Acc@5  22.66 ( 10.74)


Epoch: [0][1350/2503]	Time  1.810 ( 1.109)	Data  0.000 ( 0.390)	Loss 5.0758e+00 (5.8483e+00)	Acc@1   5.86 (  3.73)	Acc@5  23.44 ( 11.18)


Epoch: [0][1400/2503]	Time  1.533 ( 1.117)	Data  0.000 ( 0.383)	Loss 5.0373e+00 (5.8155e+00)	Acc@1   7.81 (  3.91)	Acc@5  21.48 ( 11.63)


Epoch: [0][1450/2503]	Time  1.736 ( 1.126)	Data  0.000 ( 0.376)	Loss 4.7706e+00 (5.7835e+00)	Acc@1  10.94 (  4.09)	Acc@5  27.34 ( 12.06)


Epoch: [0][1500/2503]	Time  1.327 ( 1.133)	Data  0.000 ( 0.371)	Loss 4.6677e+00 (5.7522e+00)	Acc@1  12.89 (  4.28)	Acc@5  32.03 ( 12.51)


Epoch: [0][1550/2503]	Time  1.428 ( 1.141)	Data  0.000 ( 0.366)	Loss 4.7490e+00 (5.7211e+00)	Acc@1  10.55 (  4.46)	Acc@5  27.73 ( 12.96)


Epoch: [0][1600/2503]	Time  1.751 ( 1.148)	Data  0.000 ( 0.361)	Loss 4.7805e+00 (5.6909e+00)	Acc@1   8.98 (  4.66)	Acc@5  30.47 ( 13.41)


Epoch: [0][1650/2503]	Time  1.467 ( 1.154)	Data  0.000 ( 0.356)	Loss 4.6941e+00 (5.6626e+00)	Acc@1   8.98 (  4.85)	Acc@5  26.95 ( 13.83)


Epoch: [0][1700/2503]	Time  1.599 ( 1.160)	Data  0.000 ( 0.351)	Loss 4.4942e+00 (5.6339e+00)	Acc@1  13.67 (  5.04)	Acc@5  33.20 ( 14.27)


Epoch: [0][1750/2503]	Time  1.763 ( 1.165)	Data  0.000 ( 0.346)	Loss 4.8150e+00 (5.6062e+00)	Acc@1   9.38 (  5.22)	Acc@5  26.17 ( 14.66)


Epoch: [0][1800/2503]	Time  1.261 ( 1.170)	Data  0.000 ( 0.342)	Loss 4.4070e+00 (5.5788e+00)	Acc@1  17.19 (  5.41)	Acc@5  35.94 ( 15.08)


Epoch: [0][1850/2503]	Time  1.509 ( 1.175)	Data  0.000 ( 0.337)	Loss 4.5134e+00 (5.5522e+00)	Acc@1  11.33 (  5.59)	Acc@5  30.47 ( 15.49)


Epoch: [0][1900/2503]	Time  1.571 ( 1.180)	Data  0.000 ( 0.333)	Loss 4.4784e+00 (5.5261e+00)	Acc@1  13.67 (  5.76)	Acc@5  32.03 ( 15.90)


Epoch: [0][1950/2503]	Time  1.391 ( 1.184)	Data  0.000 ( 0.328)	Loss 4.5361e+00 (5.5003e+00)	Acc@1  13.67 (  5.95)	Acc@5  32.81 ( 16.30)


Epoch: [0][2000/2503]	Time  1.549 ( 1.188)	Data  0.000 ( 0.325)	Loss 4.4545e+00 (5.4757e+00)	Acc@1  13.67 (  6.12)	Acc@5  36.33 ( 16.68)


Epoch: [0][2050/2503]	Time  1.381 ( 1.191)	Data  0.000 ( 0.319)	Loss 4.2690e+00 (5.4503e+00)	Acc@1  17.58 (  6.31)	Acc@5  37.50 ( 17.09)


Epoch: [0][2100/2503]	Time  1.290 ( 1.194)	Data  0.000 ( 0.314)	Loss 4.4258e+00 (5.4263e+00)	Acc@1  11.72 (  6.49)	Acc@5  32.81 ( 17.46)


Epoch: [0][2150/2503]	Time  1.485 ( 1.197)	Data  0.000 ( 0.309)	Loss 4.6822e+00 (5.4021e+00)	Acc@1  10.94 (  6.67)	Acc@5  32.81 ( 17.85)


Epoch: [0][2200/2503]	Time  1.511 ( 1.200)	Data  0.000 ( 0.304)	Loss 4.5454e+00 (5.3792e+00)	Acc@1  14.84 (  6.86)	Acc@5  30.47 ( 18.23)


Epoch: [0][2250/2503]	Time  1.597 ( 1.202)	Data  0.000 ( 0.299)	Loss 4.4722e+00 (5.3562e+00)	Acc@1  12.11 (  7.04)	Acc@5  33.59 ( 18.61)


Epoch: [0][2300/2503]	Time  1.531 ( 1.205)	Data  0.000 ( 0.294)	Loss 4.2488e+00 (5.3336e+00)	Acc@1  14.45 (  7.22)	Acc@5  39.45 ( 18.97)


Epoch: [0][2350/2503]	Time  1.249 ( 1.207)	Data  0.000 ( 0.289)	Loss 4.1806e+00 (5.3108e+00)	Acc@1  19.53 (  7.42)	Acc@5  37.11 ( 19.35)


Epoch: [0][2400/2503]	Time  1.338 ( 1.209)	Data  0.000 ( 0.285)	Loss 4.3309e+00 (5.2886e+00)	Acc@1  16.02 (  7.59)	Acc@5  35.94 ( 19.72)


Epoch: [0][2450/2503]	Time  1.532 ( 1.211)	Data  0.000 ( 0.281)	Loss 4.1427e+00 (5.2669e+00)	Acc@1  16.41 (  7.77)	Acc@5  40.23 ( 20.08)


Epoch: [0][2500/2503]	Time  1.498 ( 1.213)	Data  0.000 ( 0.277)	Loss 4.1010e+00 (5.2454e+00)	Acc@1  20.31 (  7.96)	Acc@5  41.80 ( 20.43)


Test: [  0/196]	Time  4.026 ( 4.026)	Loss 3.3728e+00 (3.3728e+00)	Acc@1  25.00 ( 25.00)	Acc@5  62.11 ( 62.11)


Test: [ 50/196]	Time  1.892 ( 1.169)	Loss 4.3885e+00 (4.5006e+00)	Acc@1  11.33 ( 12.81)	Acc@5  36.33 ( 32.36)


Test: [100/196]	Time  1.420 ( 1.159)	Loss 5.7713e+00 (4.6114e+00)	Acc@1   2.73 ( 12.15)	Acc@5  10.94 ( 30.90)


Test: [150/196]	Time  1.811 ( 1.153)	Loss 4.9784e+00 (4.6923e+00)	Acc@1   7.42 ( 11.65)	Acc@5  19.14 ( 29.21)


 * Acc@1 12.156 Acc@5 29.892


lr: [0.001]




Epoch: [1][   0/2503]	Time  4.323 ( 4.323)	Data  2.999 ( 2.999)	Loss 4.3487e+00 (4.3487e+00)	Acc@1  17.97 ( 17.97)	Acc@5  36.33 ( 36.33)


Epoch: [1][  50/2503]	Time  1.340 ( 1.326)	Data  0.137 ( 0.236)	Loss 4.0618e+00 (4.0953e+00)	Acc@1  18.36 ( 18.08)	Acc@5  42.19 ( 39.85)


Epoch: [1][ 100/2503]	Time  1.348 ( 1.306)	Data  0.000 ( 0.194)	Loss 4.0156e+00 (4.0614e+00)	Acc@1  19.53 ( 18.87)	Acc@5  44.53 ( 40.63)


Epoch: [1][ 150/2503]	Time  1.396 ( 1.296)	Data  0.340 ( 0.191)	Loss 4.0053e+00 (4.0447e+00)	Acc@1  20.31 ( 19.20)	Acc@5  42.19 ( 41.06)


Epoch: [1][ 200/2503]	Time  1.456 ( 1.287)	Data  0.430 ( 0.186)	Loss 4.0780e+00 (4.0309e+00)	Acc@1  16.41 ( 19.43)	Acc@5  39.84 ( 41.34)


Epoch: [1][ 250/2503]	Time  1.448 ( 1.285)	Data  0.382 ( 0.184)	Loss 4.0261e+00 (4.0189e+00)	Acc@1  19.53 ( 19.60)	Acc@5  41.02 ( 41.52)


Epoch: [1][ 300/2503]	Time  1.325 ( 1.281)	Data  0.290 ( 0.179)	Loss 4.3324e+00 (4.0131e+00)	Acc@1  15.62 ( 19.66)	Acc@5  33.20 ( 41.65)


Epoch: [1][ 350/2503]	Time  1.200 ( 1.279)	Data  0.119 ( 0.177)	Loss 4.2682e+00 (4.0068e+00)	Acc@1  21.88 ( 19.82)	Acc@5  33.59 ( 41.83)


Epoch: [1][ 400/2503]	Time  1.583 ( 1.278)	Data  0.512 ( 0.174)	Loss 4.0110e+00 (4.0047e+00)	Acc@1  18.36 ( 19.83)	Acc@5  38.28 ( 41.91)


Epoch: [1][ 450/2503]	Time  1.408 ( 1.276)	Data  0.362 ( 0.167)	Loss 3.9181e+00 (4.0014e+00)	Acc@1  20.31 ( 19.89)	Acc@5  43.75 ( 42.03)


Epoch: [1][ 500/2503]	Time  1.297 ( 1.276)	Data  0.000 ( 0.163)	Loss 4.0286e+00 (3.9943e+00)	Acc@1  21.88 ( 19.94)	Acc@5  42.19 ( 42.16)


Epoch: [1][ 550/2503]	Time  1.218 ( 1.276)	Data  0.000 ( 0.148)	Loss 3.9143e+00 (3.9885e+00)	Acc@1  22.66 ( 19.99)	Acc@5  41.80 ( 42.25)


Epoch: [1][ 600/2503]	Time  1.322 ( 1.276)	Data  0.000 ( 0.136)	Loss 3.8561e+00 (3.9826e+00)	Acc@1  23.83 ( 20.08)	Acc@5  42.97 ( 42.42)


Epoch: [1][ 650/2503]	Time  1.378 ( 1.278)	Data  0.002 ( 0.125)	Loss 3.8073e+00 (3.9794e+00)	Acc@1  23.05 ( 20.10)	Acc@5  45.70 ( 42.49)


Epoch: [1][ 700/2503]	Time  1.295 ( 1.278)	Data  0.000 ( 0.117)	Loss 4.1563e+00 (3.9745e+00)	Acc@1  13.67 ( 20.16)	Acc@5  38.28 ( 42.57)


Epoch: [1][ 750/2503]	Time  1.258 ( 1.281)	Data  0.000 ( 0.109)	Loss 3.7962e+00 (3.9707e+00)	Acc@1  22.66 ( 20.19)	Acc@5  46.48 ( 42.63)


Epoch: [1][ 800/2503]	Time  1.277 ( 1.281)	Data  0.000 ( 0.102)	Loss 3.9561e+00 (3.9687e+00)	Acc@1  18.36 ( 20.24)	Acc@5  38.67 ( 42.66)


Epoch: [1][ 850/2503]	Time  1.260 ( 1.282)	Data  0.000 ( 0.096)	Loss 3.9031e+00 (3.9638e+00)	Acc@1  23.44 ( 20.31)	Acc@5  42.97 ( 42.75)


Epoch: [1][ 900/2503]	Time  1.221 ( 1.282)	Data  0.000 ( 0.091)	Loss 3.8347e+00 (3.9618e+00)	Acc@1  21.88 ( 20.36)	Acc@5  44.53 ( 42.80)


Epoch: [1][ 950/2503]	Time  1.277 ( 1.283)	Data  0.000 ( 0.086)	Loss 3.9941e+00 (3.9587e+00)	Acc@1  21.48 ( 20.39)	Acc@5  40.62 ( 42.84)


Epoch: [1][1000/2503]	Time  1.320 ( 1.284)	Data  0.000 ( 0.082)	Loss 3.8757e+00 (3.9567e+00)	Acc@1  20.31 ( 20.41)	Acc@5  43.75 ( 42.89)


Epoch: [1][1050/2503]	Time  1.396 ( 1.285)	Data  0.000 ( 0.078)	Loss 3.9941e+00 (3.9547e+00)	Acc@1  20.31 ( 20.41)	Acc@5  42.97 ( 42.90)


Epoch: [1][1100/2503]	Time  1.258 ( 1.285)	Data  0.000 ( 0.074)	Loss 4.0093e+00 (3.9510e+00)	Acc@1  21.88 ( 20.46)	Acc@5  41.02 ( 42.96)


Epoch: [1][1150/2503]	Time  1.333 ( 1.287)	Data  0.000 ( 0.071)	Loss 3.9138e+00 (3.9484e+00)	Acc@1  19.53 ( 20.51)	Acc@5  40.62 ( 42.99)


Epoch: [1][1200/2503]	Time  1.329 ( 1.288)	Data  0.001 ( 0.068)	Loss 3.8411e+00 (3.9464e+00)	Acc@1  21.09 ( 20.54)	Acc@5  42.19 ( 43.05)


Epoch: [1][1250/2503]	Time  1.345 ( 1.289)	Data  0.000 ( 0.065)	Loss 4.0132e+00 (3.9441e+00)	Acc@1  21.48 ( 20.60)	Acc@5  41.02 ( 43.10)


Epoch: [1][1300/2503]	Time  1.299 ( 1.290)	Data  0.000 ( 0.063)	Loss 3.7929e+00 (3.9418e+00)	Acc@1  22.66 ( 20.63)	Acc@5  50.39 ( 43.13)


Epoch: [1][1350/2503]	Time  1.289 ( 1.291)	Data  0.000 ( 0.061)	Loss 3.9478e+00 (3.9400e+00)	Acc@1  19.14 ( 20.66)	Acc@5  39.45 ( 43.19)


Epoch: [1][1400/2503]	Time  1.284 ( 1.292)	Data  0.001 ( 0.059)	Loss 3.9170e+00 (3.9373e+00)	Acc@1  22.27 ( 20.70)	Acc@5  41.02 ( 43.25)


Epoch: [1][1450/2503]	Time  1.313 ( 1.294)	Data  0.000 ( 0.057)	Loss 3.8217e+00 (3.9341e+00)	Acc@1  20.70 ( 20.73)	Acc@5  44.14 ( 43.31)


Epoch: [1][1500/2503]	Time  1.521 ( 1.296)	Data  0.000 ( 0.055)	Loss 3.7025e+00 (3.9309e+00)	Acc@1  26.17 ( 20.78)	Acc@5  51.95 ( 43.37)


Epoch: [1][1550/2503]	Time  1.457 ( 1.302)	Data  0.000 ( 0.053)	Loss 3.8287e+00 (3.9277e+00)	Acc@1  19.53 ( 20.83)	Acc@5  43.75 ( 43.44)


Epoch: [1][1600/2503]	Time  1.468 ( 1.308)	Data  0.000 ( 0.051)	Loss 3.8315e+00 (3.9244e+00)	Acc@1  19.53 ( 20.87)	Acc@5  44.53 ( 43.50)


Epoch: [1][1650/2503]	Time  1.390 ( 1.313)	Data  0.000 ( 0.050)	Loss 3.8347e+00 (3.9220e+00)	Acc@1  20.31 ( 20.91)	Acc@5  44.14 ( 43.55)


Epoch: [1][1700/2503]	Time  1.474 ( 1.317)	Data  0.000 ( 0.048)	Loss 3.7705e+00 (3.9189e+00)	Acc@1  21.88 ( 20.96)	Acc@5  45.70 ( 43.61)


Epoch: [1][1750/2503]	Time  1.445 ( 1.322)	Data  0.000 ( 0.047)	Loss 3.9497e+00 (3.9165e+00)	Acc@1  23.05 ( 20.98)	Acc@5  46.09 ( 43.64)


Epoch: [1][1800/2503]	Time  1.525 ( 1.326)	Data  0.000 ( 0.046)	Loss 3.5962e+00 (3.9138e+00)	Acc@1  24.61 ( 21.02)	Acc@5  48.44 ( 43.69)


Epoch: [1][1850/2503]	Time  1.466 ( 1.331)	Data  0.000 ( 0.044)	Loss 3.7173e+00 (3.9112e+00)	Acc@1  23.44 ( 21.05)	Acc@5  48.44 ( 43.76)


Epoch: [1][1900/2503]	Time  1.516 ( 1.335)	Data  0.000 ( 0.043)	Loss 3.7220e+00 (3.9081e+00)	Acc@1  23.83 ( 21.09)	Acc@5  48.83 ( 43.82)


Epoch: [1][1950/2503]	Time  1.466 ( 1.339)	Data  0.000 ( 0.042)	Loss 3.8068e+00 (3.9054e+00)	Acc@1  21.88 ( 21.12)	Acc@5  46.88 ( 43.87)


Epoch: [1][2000/2503]	Time  1.486 ( 1.343)	Data  0.000 ( 0.041)	Loss 3.7341e+00 (3.9033e+00)	Acc@1  25.39 ( 21.15)	Acc@5  46.48 ( 43.92)


Epoch: [1][2050/2503]	Time  1.474 ( 1.347)	Data  0.000 ( 0.040)	Loss 3.4919e+00 (3.8996e+00)	Acc@1  24.22 ( 21.21)	Acc@5  52.34 ( 44.00)


Epoch: [1][2100/2503]	Time  1.514 ( 1.350)	Data  0.000 ( 0.039)	Loss 3.7600e+00 (3.8968e+00)	Acc@1  25.00 ( 21.25)	Acc@5  48.83 ( 44.06)


Epoch: [1][2150/2503]	Time  1.486 ( 1.354)	Data  0.000 ( 0.038)	Loss 3.9433e+00 (3.8935e+00)	Acc@1  17.19 ( 21.29)	Acc@5  43.75 ( 44.13)


Epoch: [1][2200/2503]	Time  1.554 ( 1.357)	Data  0.000 ( 0.037)	Loss 3.9645e+00 (3.8912e+00)	Acc@1  23.05 ( 21.32)	Acc@5  41.41 ( 44.17)


Epoch: [1][2250/2503]	Time  1.549 ( 1.361)	Data  0.000 ( 0.037)	Loss 3.9591e+00 (3.8885e+00)	Acc@1  20.31 ( 21.37)	Acc@5  42.58 ( 44.22)


Epoch: [1][2300/2503]	Time  1.474 ( 1.364)	Data  0.000 ( 0.036)	Loss 3.7014e+00 (3.8856e+00)	Acc@1  20.31 ( 21.40)	Acc@5  46.09 ( 44.27)


Epoch: [1][2350/2503]	Time  1.533 ( 1.368)	Data  0.000 ( 0.035)	Loss 3.7996e+00 (3.8820e+00)	Acc@1  22.66 ( 21.46)	Acc@5  46.09 ( 44.33)


Epoch: [1][2400/2503]	Time  1.544 ( 1.371)	Data  0.000 ( 0.034)	Loss 3.7828e+00 (3.8787e+00)	Acc@1  21.48 ( 21.51)	Acc@5  43.75 ( 44.40)


Epoch: [1][2450/2503]	Time  1.492 ( 1.374)	Data  0.000 ( 0.034)	Loss 3.6556e+00 (3.8752e+00)	Acc@1  24.61 ( 21.56)	Acc@5  50.39 ( 44.47)


Epoch: [1][2500/2503]	Time  1.478 ( 1.378)	Data  0.000 ( 0.033)	Loss 3.6549e+00 (3.8716e+00)	Acc@1  23.44 ( 21.62)	Acc@5  50.00 ( 44.54)


Test: [  0/196]	Time  3.087 ( 3.087)	Loss 2.8693e+00 (2.8693e+00)	Acc@1  34.38 ( 34.38)	Acc@5  71.88 ( 71.88)


Test: [ 50/196]	Time  1.812 ( 1.189)	Loss 3.6294e+00 (3.5930e+00)	Acc@1  22.66 ( 23.31)	Acc@5  48.83 ( 48.78)


Test: [100/196]	Time  1.636 ( 1.176)	Loss 5.0297e+00 (3.7431e+00)	Acc@1   8.20 ( 22.25)	Acc@5  20.70 ( 46.24)


Test: [150/196]	Time  2.035 ( 1.170)	Loss 4.6127e+00 (3.8835e+00)	Acc@1  16.02 ( 21.14)	Acc@5  30.47 ( 43.68)


 * Acc@1 21.208 Acc@5 43.588


lr: [0.001]


Epoch: [2][   0/2503]	Time  5.302 ( 5.302)	Data  3.104 ( 3.104)	Loss 3.8702e+00 (3.8702e+00)	Acc@1  23.83 ( 23.83)	Acc@5  40.23 ( 40.23)


Epoch: [2][  50/2503]	Time  1.653 ( 1.729)	Data  0.000 ( 0.061)	Loss 3.8321e+00 (3.7844e+00)	Acc@1  23.83 ( 22.57)	Acc@5  44.92 ( 46.02)


Epoch: [2][ 100/2503]	Time  1.621 ( 1.700)	Data  0.000 ( 0.031)	Loss 3.7961e+00 (3.7846e+00)	Acc@1  23.05 ( 22.59)	Acc@5  46.88 ( 46.06)


Epoch: [2][ 150/2503]	Time  1.628 ( 1.686)	Data  0.000 ( 0.021)	Loss 3.8033e+00 (3.7890e+00)	Acc@1  21.48 ( 22.66)	Acc@5  45.31 ( 46.04)


Epoch: [2][ 200/2503]	Time  1.602 ( 1.680)	Data  0.000 ( 0.016)	Loss 3.8657e+00 (3.7850e+00)	Acc@1  17.19 ( 22.67)	Acc@5  41.41 ( 46.11)


Epoch: [2][ 250/2503]	Time  1.598 ( 1.677)	Data  0.000 ( 0.013)	Loss 3.8499e+00 (3.7804e+00)	Acc@1  22.66 ( 22.70)	Acc@5  42.19 ( 46.17)


Epoch: [2][ 300/2503]	Time  1.693 ( 1.676)	Data  0.000 ( 0.011)	Loss 4.0713e+00 (3.7787e+00)	Acc@1  21.09 ( 22.75)	Acc@5  39.84 ( 46.21)


Epoch: [2][ 350/2503]	Time  1.670 ( 1.677)	Data  0.000 ( 0.009)	Loss 4.0722e+00 (3.7777e+00)	Acc@1  22.27 ( 22.81)	Acc@5  37.89 ( 46.23)


Epoch: [2][ 400/2503]	Time  1.563 ( 1.677)	Data  0.000 ( 0.008)	Loss 3.7713e+00 (3.7791e+00)	Acc@1  23.05 ( 22.76)	Acc@5  43.36 ( 46.20)


Epoch: [2][ 450/2503]	Time  1.682 ( 1.677)	Data  0.000 ( 0.007)	Loss 3.7285e+00 (3.7802e+00)	Acc@1  25.39 ( 22.81)	Acc@5  46.09 ( 46.22)


Epoch: [2][ 500/2503]	Time  1.632 ( 1.679)	Data  0.000 ( 0.007)	Loss 3.8268e+00 (3.7763e+00)	Acc@1  24.22 ( 22.85)	Acc@5  47.66 ( 46.30)


Epoch: [2][ 550/2503]	Time  1.596 ( 1.679)	Data  0.000 ( 0.006)	Loss 3.6564e+00 (3.7732e+00)	Acc@1  25.39 ( 22.88)	Acc@5  46.09 ( 46.31)


Epoch: [2][ 600/2503]	Time  1.670 ( 1.682)	Data  0.000 ( 0.005)	Loss 3.6579e+00 (3.7697e+00)	Acc@1  27.34 ( 22.95)	Acc@5  46.09 ( 46.43)


Epoch: [2][ 650/2503]	Time  1.727 ( 1.683)	Data  0.000 ( 0.005)	Loss 3.6006e+00 (3.7684e+00)	Acc@1  25.00 ( 22.95)	Acc@5  52.34 ( 46.42)


Epoch: [2][ 700/2503]	Time  1.773 ( 1.687)	Data  0.000 ( 0.005)	Loss 4.0092e+00 (3.7657e+00)	Acc@1  16.41 ( 22.99)	Acc@5  41.41 ( 46.49)


Epoch: [2][ 750/2503]	Time  1.742 ( 1.688)	Data  0.000 ( 0.004)	Loss 3.6391e+00 (3.7637e+00)	Acc@1  25.39 ( 23.00)	Acc@5  48.05 ( 46.49)


Epoch: [2][ 800/2503]	Time  1.696 ( 1.689)	Data  0.000 ( 0.004)	Loss 3.7916e+00 (3.7633e+00)	Acc@1  19.14 ( 23.03)	Acc@5  42.97 ( 46.49)


Epoch: [2][ 850/2503]	Time  1.686 ( 1.693)	Data  0.000 ( 0.004)	Loss 3.6971e+00 (3.7596e+00)	Acc@1  27.73 ( 23.10)	Acc@5  49.61 ( 46.57)


Epoch: [2][ 900/2503]	Time  1.765 ( 1.696)	Data  0.000 ( 0.004)	Loss 3.6775e+00 (3.7588e+00)	Acc@1  20.70 ( 23.13)	Acc@5  44.92 ( 46.59)


Epoch: [2][ 950/2503]	Time  1.677 ( 1.698)	Data  0.000 ( 0.004)	Loss 3.8177e+00 (3.7570e+00)	Acc@1  23.83 ( 23.15)	Acc@5  42.97 ( 46.63)


Epoch: [2][1000/2503]	Time  1.743 ( 1.700)	Data  0.000 ( 0.003)	Loss 3.6523e+00 (3.7564e+00)	Acc@1  23.83 ( 23.15)	Acc@5  48.44 ( 46.66)


Epoch: [2][1050/2503]	Time  2.000 ( 1.708)	Data  0.000 ( 0.003)	Loss 3.7629e+00 (3.7553e+00)	Acc@1  23.44 ( 23.15)	Acc@5  47.27 ( 46.67)


Epoch: [2][1100/2503]	Time  1.950 ( 1.716)	Data  0.000 ( 0.003)	Loss 3.8300e+00 (3.7527e+00)	Acc@1  26.17 ( 23.18)	Acc@5  44.53 ( 46.71)


Epoch: [2][1150/2503]	Time  1.790 ( 1.722)	Data  0.000 ( 0.003)	Loss 3.7448e+00 (3.7512e+00)	Acc@1  21.09 ( 23.20)	Acc@5  45.70 ( 46.75)


Epoch: [2][1200/2503]	Time  1.805 ( 1.727)	Data  0.000 ( 0.003)	Loss 3.6716e+00 (3.7505e+00)	Acc@1  22.66 ( 23.21)	Acc@5  45.31 ( 46.78)


Epoch: [2][1250/2503]	Time  1.866 ( 1.733)	Data  0.000 ( 0.003)	Loss 3.8180e+00 (3.7492e+00)	Acc@1  23.05 ( 23.24)	Acc@5  44.53 ( 46.82)


Epoch: [2][1300/2503]	Time  1.878 ( 1.738)	Data  0.000 ( 0.003)	Loss 3.6091e+00 (3.7480e+00)	Acc@1  28.12 ( 23.25)	Acc@5  55.47 ( 46.84)


Epoch: [2][1350/2503]	Time  1.838 ( 1.743)	Data  0.000 ( 0.003)	Loss 3.7537e+00 (3.7468e+00)	Acc@1  22.27 ( 23.26)	Acc@5  45.70 ( 46.88)


Epoch: [2][1400/2503]	Time  1.824 ( 1.748)	Data  0.000 ( 0.003)	Loss 3.7385e+00 (3.7447e+00)	Acc@1  23.44 ( 23.30)	Acc@5  44.14 ( 46.93)


Epoch: [2][1450/2503]	Time  1.918 ( 1.753)	Data  0.000 ( 0.002)	Loss 3.6286e+00 (3.7423e+00)	Acc@1  23.05 ( 23.33)	Acc@5  48.05 ( 46.97)


Epoch: [2][1500/2503]	Time  1.937 ( 1.757)	Data  0.000 ( 0.002)	Loss 3.5364e+00 (3.7399e+00)	Acc@1  30.47 ( 23.37)	Acc@5  52.73 ( 47.01)


Epoch: [2][1550/2503]	Time  1.875 ( 1.761)	Data  0.000 ( 0.002)	Loss 3.6299e+00 (3.7375e+00)	Acc@1  22.66 ( 23.41)	Acc@5  49.61 ( 47.07)


Epoch: [2][1600/2503]	Time  1.878 ( 1.765)	Data  0.000 ( 0.002)	Loss 3.6601e+00 (3.7350e+00)	Acc@1  23.05 ( 23.44)	Acc@5  46.88 ( 47.10)


Epoch: [2][1650/2503]	Time  1.966 ( 1.769)	Data  0.000 ( 0.002)	Loss 3.6911e+00 (3.7333e+00)	Acc@1  23.44 ( 23.47)	Acc@5  48.05 ( 47.13)


Epoch: [2][1700/2503]	Time  1.811 ( 1.773)	Data  0.000 ( 0.002)	Loss 3.6071e+00 (3.7309e+00)	Acc@1  26.56 ( 23.50)	Acc@5  50.00 ( 47.17)


Epoch: [2][1750/2503]	Time  1.910 ( 1.777)	Data  0.000 ( 0.002)	Loss 3.8275e+00 (3.7292e+00)	Acc@1  23.44 ( 23.51)	Acc@5  48.05 ( 47.20)


Epoch: [2][1800/2503]	Time  1.905 ( 1.781)	Data  0.000 ( 0.002)	Loss 3.4494e+00 (3.7270e+00)	Acc@1  29.30 ( 23.55)	Acc@5  51.17 ( 47.24)


Epoch: [2][1850/2503]	Time  1.929 ( 1.785)	Data  0.000 ( 0.002)	Loss 3.5265e+00 (3.7252e+00)	Acc@1  26.95 ( 23.57)	Acc@5  49.22 ( 47.29)


Epoch: [2][1900/2503]	Time  1.894 ( 1.789)	Data  0.000 ( 0.002)	Loss 3.5778e+00 (3.7229e+00)	Acc@1  26.17 ( 23.61)	Acc@5  53.52 ( 47.35)


Epoch: [2][1950/2503]	Time  1.811 ( 1.792)	Data  0.000 ( 0.002)	Loss 3.6199e+00 (3.7209e+00)	Acc@1  24.22 ( 23.62)	Acc@5  50.39 ( 47.39)


Epoch: [2][2000/2503]	Time  1.987 ( 1.796)	Data  0.000 ( 0.002)	Loss 3.5870e+00 (3.7195e+00)	Acc@1  26.17 ( 23.64)	Acc@5  51.95 ( 47.41)


Epoch: [2][2050/2503]	Time  2.145 ( 1.802)	Data  0.000 ( 0.002)	Loss 3.3297e+00 (3.7166e+00)	Acc@1  26.56 ( 23.69)	Acc@5  53.12 ( 47.47)


Epoch: [2][2100/2503]	Time  1.990 ( 1.809)	Data  0.000 ( 0.002)	Loss 3.6014e+00 (3.7147e+00)	Acc@1  27.73 ( 23.72)	Acc@5  51.56 ( 47.50)


Epoch: [2][2150/2503]	Time  1.944 ( 1.814)	Data  0.000 ( 0.002)	Loss 3.8044e+00 (3.7120e+00)	Acc@1  18.36 ( 23.75)	Acc@5  46.09 ( 47.56)


Epoch: [2][2200/2503]	Time  2.106 ( 1.820)	Data  0.000 ( 0.002)	Loss 3.8589e+00 (3.7106e+00)	Acc@1  26.17 ( 23.78)	Acc@5  44.53 ( 47.59)


Epoch: [2][2250/2503]	Time  1.874 ( 1.825)	Data  0.000 ( 0.002)	Loss 3.8324e+00 (3.7085e+00)	Acc@1  23.83 ( 23.81)	Acc@5  44.53 ( 47.63)


Epoch: [2][2300/2503]	Time  2.144 ( 1.830)	Data  0.000 ( 0.002)	Loss 3.6027e+00 (3.7062e+00)	Acc@1  24.22 ( 23.85)	Acc@5  47.66 ( 47.67)


Epoch: [2][2350/2503]	Time  2.049 ( 1.836)	Data  0.000 ( 0.002)	Loss 3.6396e+00 (3.7035e+00)	Acc@1  22.66 ( 23.89)	Acc@5  48.05 ( 47.73)


Epoch: [2][2400/2503]	Time  2.002 ( 1.841)	Data  0.000 ( 0.002)	Loss 3.6613e+00 (3.7010e+00)	Acc@1  25.39 ( 23.93)	Acc@5  48.44 ( 47.78)


Epoch: [2][2450/2503]	Time  2.069 ( 1.846)	Data  0.000 ( 0.002)	Loss 3.5085e+00 (3.6982e+00)	Acc@1  25.39 ( 23.98)	Acc@5  49.61 ( 47.83)


Epoch: [2][2500/2503]	Time  1.755 ( 1.851)	Data  0.000 ( 0.002)	Loss 3.4825e+00 (3.6955e+00)	Acc@1  29.30 ( 24.03)	Acc@5  55.08 ( 47.89)


Test: [  0/196]	Time  3.240 ( 3.240)	Loss 2.7291e+00 (2.7291e+00)	Acc@1  38.67 ( 38.67)	Acc@5  73.44 ( 73.44)


Test: [ 50/196]	Time  1.656 ( 1.173)	Loss 3.4507e+00 (3.4355e+00)	Acc@1  23.05 ( 25.84)	Acc@5  48.83 ( 52.08)


Test: [100/196]	Time  2.064 ( 1.161)	Loss 4.9146e+00 (3.6011e+00)	Acc@1   9.77 ( 24.50)	Acc@5  21.48 ( 49.20)


Test: [150/196]	Time  1.899 ( 1.181)	Loss 4.5324e+00 (3.7452e+00)	Acc@1  16.41 ( 23.11)	Acc@5  31.64 ( 46.54)


 * Acc@1 23.008 Acc@5 46.276


lr: [0.001]
