In [1]:
import os
import random
import shutil
import time
import warnings

import torch
import torch.nn as nn
import torch.backends.cudnn as cudnn
import torch.optim
import torch.cuda.amp as amp

import torch.utils.data
import torchvision
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import torchvision.models as models

#importing distributed and weights and biases
import torch.distributed as dist
import wandb

In [2]:
wandb.init(project='Homework9-take2', entity='malachyiii')

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


[34m[1mwandb[0m: Currently logged in as: [33mmalachyiii[0m (use `wandb login --relogin` to force relogin)



CondaEnvException: Unable to determine environment

Please re-run this command with one of the following options:

* Provide an environment name via --name or -n
* Re-run this command inside an activated conda environment.



In [3]:
print(torch.cuda.is_available())
print(torch.cuda.current_device())
torch.cuda.device(0)
print(torch.cuda.device_count())
print(torch.cuda.get_device_name(0))
torch.cuda.empty_cache()

True
0
1
Tesla T4


In [4]:
SEED=1
random.seed(SEED)
torch.manual_seed(SEED)
cudnn.deterministic = True

In [5]:
#Trying resnet34 because it is fast
ARCH = torchvision.models.resnet34(pretrained=False)
START_EPOCH = 0
EPOCHS = 3

#The following parameters based on the paper at https://arxiv.org/pdf/1512.03385.pdf
TRAIN_BATCH=256
VAL_BATCH=256
MOMENTUM = 0.9
WEIGHT_DECAY = 1e-4
LR = 0.01
IMG_SIZE = 224

#Logging wandb config

wandb.config = {
  "learning_rate": LR,
  "epochs": EPOCHS,
  "batch_size": TRAIN_BATCH,
  "momentum": MOMENTUM,
  "weight_decay": WEIGHT_DECAY,
  "image_size": IMG_SIZE
}

PRINT_FREQ = 50
WORKERS=2

TRAINDIR="/data/train"
VALDIR="/data/val"

In [6]:
GPU = torch.cuda.current_device()
torch.cuda.device(GPU)
cudnn.benchmark = True

In [7]:
if not torch.cuda.is_available():
    print('GPU not detected.. did you pass through your GPU?')

In [8]:
#Setting up the cluster and the world
WORLD_SIZE = 2
BACKEND = 'nccl'
URL = 'tcp://35.163.183.149:5000'
RANK = 0

dist.init_process_group(backend = BACKEND, init_method= URL,
                                world_size= WORLD_SIZE, rank=RANK)

In [9]:
imagenet_mean_RGB = [0.47889522, 0.47227842, 0.43047404]
imagenet_std_RGB = [0.229, 0.224, 0.225]

In [10]:
transform_train = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    # transformations based on https://arxiv.org/pdf/1512.03385.pdf
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(imagenet_mean_RGB, imagenet_std_RGB),
])

transform_val = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.ToTensor(),
    transforms.Normalize(imagenet_mean_RGB, imagenet_std_RGB),
])

train_dataset = torchvision.datasets.ImageFolder('/data/train', transform=transform_train)
val_dataset = torchvision.datasets.ImageFolder('/data/val', transform=transform_val)

In [11]:
#Setting up the train loader with a distributed Sampler
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=TRAIN_BATCH, 
                              shuffle=False, num_workers=WORKERS, pin_memory=True, 
                                           sampler=torch.utils.data.distributed.DistributedSampler(train_dataset))
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=VAL_BATCH, 
                              shuffle=False, num_workers=WORKERS, sampler=None)

In [12]:
def train(train_loader, model, criterion, optimizer, epoch):
    batch_time = AverageMeter('Time', ':6.3f')
    data_time = AverageMeter('Data', ':6.3f')
    losses = AverageMeter('Loss', ':.4e')
    top1 = AverageMeter('Acc@1', ':6.2f')
    top5 = AverageMeter('Acc@5', ':6.2f')
    progress = ProgressMeter(
        len(train_loader),
        [batch_time, data_time, losses, top1, top5],
        prefix="Epoch: [{}]".format(epoch))
    
    ######################
    # switch model to train mode here
    model.train()
    ################

    end = time.time()
    for i, (images, target) in enumerate(train_loader):
        # measure data loading time
        data_time.update(time.time() - end)

        #####################
        # send the images to cuda device
        if GPU is not None:
            images, target = images.cuda(GPU, non_blocking = True), target.cuda(GPU, non_blocking = True)
        # send the target to cuda device

        
        ####Utilizing PyTorch native AMP####
        with amp.autocast():
            # compute output
            output = model(images)
            # compute loss 
            loss = criterion(output, target)
        
        
        # measure accuracy and record loss
        acc1, acc5 = accuracy(output, target, topk=(1, 5))
        losses.update(loss.item(), images.size(0))
        top1.update(acc1[0], images.size(0))
        top5.update(acc5[0], images.size(0))
        
        #Logging the metrics
        wandb.log({"loss": loss,
                   "acc1": acc1,
                   "acc5": acc5})
        
        wandb.watch(model)

        # compute gradient and do SGD step
        
        #### zero out gradients in the optimier
        optimizer.zero_grad()
        
        ## backprop!
        scaler.scale(loss).backward()
        
        # update the weights!
        scaler.step(optimizer)
        scaler.update()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if i % PRINT_FREQ == 0:
            progress.display(i)

In [13]:
def validate(val_loader, model, criterion):
    batch_time = AverageMeter('Time', ':6.3f')
    losses = AverageMeter('Loss', ':.4e')
    top1 = AverageMeter('Acc@1', ':6.2f')
    top5 = AverageMeter('Acc@5', ':6.2f')
    progress = ProgressMeter(
        len(val_loader),
        [batch_time, losses, top1, top5],
        prefix='Test: ')

    # switch to evaluate mode
    model.eval()

    with torch.no_grad():
        end = time.time()
        for i, (images, target) in enumerate(val_loader):
            
            
            ### send the images and target to cuda
            images, target = images.to(GPU), target.to(GPU)

            # compute output
            output = model(images)

            # compute loss
            loss = criterion(output, target)


            # measure accuracy and record loss
            acc1, acc5 = accuracy(output, target, topk=(1, 5))
            losses.update(loss.item(), images.size(0))
            top1.update(acc1[0], images.size(0))
            top5.update(acc5[0], images.size(0))

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

            if i % PRINT_FREQ == 0:
                progress.display(i)

        # TODO: this should also be done with the ProgressMeter
        print(' * Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}'
              .format(top1=top1, top5=top5))

    return top1.avg

In [14]:
def save_checkpoint(state, is_best, filename='checkpoint.pth.tar'):
    # save the model state!
    torch.save(state, filename) 
    if is_best:
        shutil.copyfile(filename, 'model_best.pth.tar')

In [15]:
class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self, name, fmt=':f'):
        self.name = name
        self.fmt = fmt
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

    def __str__(self):
        fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})'
        return fmtstr.format(**self.__dict__)

In [16]:
class ProgressMeter(object):
    def __init__(self, num_batches, meters, prefix=""):
        self.batch_fmtstr = self._get_batch_fmtstr(num_batches)
        self.meters = meters
        self.prefix = prefix

    def display(self, batch):
        entries = [self.prefix + self.batch_fmtstr.format(batch)]
        entries += [str(meter) for meter in self.meters]
        print('\t'.join(entries))

    def _get_batch_fmtstr(self, num_batches):
        num_digits = len(str(num_batches // 1))
        fmt = '{:' + str(num_digits) + 'd}'
        return '[' + fmt + '/' + fmt.format(num_batches) + ']'

In [17]:
def accuracy(output, target, topk=(1,)):
    """Computes the accuracy over the k top predictions for the specified values of k"""
    with torch.no_grad():
        maxk = max(topk)
        batch_size = target.size(0)

        _, pred = output.topk(maxk, 1, True, True)
        pred = pred.t()
        correct = pred.eq(target.view(1, -1).expand_as(pred))

        res = []
        for k in topk:
            correct_k = correct[:k].reshape(-1).float().sum(0, keepdim=True)
            res.append(correct_k.mul_(100.0 / batch_size))
        return res

In [18]:
def adjust_learning_rate(optimizer, epoch):
    """Sets the learning rate to the initial LR decayed by 10 every 1/3 of the epochs"""
    lr = LR * (0.1 ** (epoch // int(round(EPOCHS/3, 0))))
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr

In [19]:
normalize = transforms.Normalize(mean=imagenet_mean_RGB, std=imagenet_std_RGB)

In [20]:
model = ARCH
model.cuda(GPU)
model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[GPU])

In [21]:
criterion = nn.CrossEntropyLoss().cuda(GPU)
optimizer = torch.optim.SGD(params = model.parameters(), lr=LR, momentum=MOMENTUM, weight_decay=WEIGHT_DECAY)
#Based on the same paper, https://arxiv.org/pdf/1512.03385.pdf
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[2])
scaler = amp.GradScaler()

In [22]:
best_acc1 = 0

In [23]:
#Initiating weights and biases
wandb.watch(model)

for epoch in range(START_EPOCH, EPOCHS):
    #adjust_learning_rate(optimizer, epoch)
    
    # train for one epoch
    train(train_loader, model, criterion, optimizer, epoch)

    # evaluate on validation set
    acc1 = validate(val_loader, model, criterion)

    # remember best acc@1 and save checkpoint
    is_best = acc1 > best_acc1
    best_acc1 = max(acc1, best_acc1)


    save_checkpoint({
        'epoch': epoch + 1,
        'arch': ARCH,
        'state_dict': model.state_dict(),
        'best_acc1': best_acc1,
        'optimizer' : optimizer.state_dict(),
    }, is_best)
    
    scheduler.step(acc1)
    
    print('lr: ' + str(scheduler.get_last_lr()))

Epoch: [0][   0/2503]	Time  9.991 ( 9.991)	Data  2.178 ( 2.178)	Loss 7.0532e+00 (7.0532e+00)	Acc@1   0.00 (  0.00)	Acc@5   0.78 (  0.78)


Epoch: [0][  50/2503]	Time  1.503 ( 1.192)	Data  0.980 ( 0.391)	Loss 6.8969e+00 (6.9580e+00)	Acc@1   0.00 (  0.14)	Acc@5   0.78 (  0.66)


Epoch: [0][ 100/2503]	Time  1.179 ( 1.121)	Data  0.653 ( 0.407)	Loss 6.8353e+00 (6.9162e+00)	Acc@1   0.00 (  0.19)	Acc@5   0.78 (  0.84)


Epoch: [0][ 150/2503]	Time  1.471 ( 1.099)	Data  0.799 ( 0.400)	Loss 6.7175e+00 (6.8811e+00)	Acc@1   0.00 (  0.23)	Acc@5   1.95 (  1.06)


Epoch: [0][ 200/2503]	Time  1.611 ( 1.079)	Data  0.935 ( 0.413)	Loss 6.6621e+00 (6.8423e+00)	Acc@1   1.56 (  0.32)	Acc@5   2.34 (  1.35)


Epoch: [0][ 250/2503]	Time  1.476 ( 1.069)	Data  0.791 ( 0.416)	Loss 6.6102e+00 (6.7968e+00)	Acc@1   0.39 (  0.43)	Acc@5   1.95 (  1.67)


Epoch: [0][ 300/2503]	Time  1.025 ( 1.068)	Data  0.501 ( 0.404)	Loss 6.4411e+00 (6.7465e+00)	Acc@1   0.78 (  0.53)	Acc@5   3.91 (  2.01)


Epoch: [0][ 350/2503]	Time  1.365 ( 1.062)	Data  0.774 ( 0.405)	Loss 6.3355e+00 (6.6948e+00)	Acc@1   1.95 (  0.63)	Acc@5   7.42 (  2.39)


Epoch: [0][ 400/2503]	Time  1.539 ( 1.059)	Data  0.672 ( 0.410)	Loss 6.3350e+00 (6.6410e+00)	Acc@1   0.78 (  0.74)	Acc@5   3.52 (  2.81)


Epoch: [0][ 450/2503]	Time  1.468 ( 1.056)	Data  0.792 ( 0.413)	Loss 6.1409e+00 (6.5928e+00)	Acc@1   1.17 (  0.83)	Acc@5   6.64 (  3.14)


Epoch: [0][ 500/2503]	Time  1.353 ( 1.053)	Data  0.805 ( 0.415)	Loss 6.2126e+00 (6.5421e+00)	Acc@1   1.17 (  0.94)	Acc@5   5.47 (  3.52)


Epoch: [0][ 550/2503]	Time  1.392 ( 1.053)	Data  0.836 ( 0.415)	Loss 5.8279e+00 (6.4936e+00)	Acc@1   2.73 (  1.06)	Acc@5  10.94 (  3.93)


Epoch: [0][ 600/2503]	Time  1.629 ( 1.051)	Data  1.072 ( 0.417)	Loss 6.0452e+00 (6.4464e+00)	Acc@1   3.12 (  1.18)	Acc@5  11.33 (  4.35)


Epoch: [0][ 650/2503]	Time  1.648 ( 1.050)	Data  1.089 ( 0.418)	Loss 5.8316e+00 (6.3995e+00)	Acc@1   3.12 (  1.33)	Acc@5   8.59 (  4.80)


Epoch: [0][ 700/2503]	Time  1.420 ( 1.048)	Data  0.685 ( 0.419)	Loss 5.8583e+00 (6.3540e+00)	Acc@1   1.95 (  1.47)	Acc@5  10.55 (  5.25)


Epoch: [0][ 750/2503]	Time  1.583 ( 1.047)	Data  1.056 ( 0.422)	Loss 5.5863e+00 (6.3106e+00)	Acc@1   3.91 (  1.62)	Acc@5  10.16 (  5.68)


Epoch: [0][ 800/2503]	Time  1.444 ( 1.046)	Data  0.614 ( 0.422)	Loss 5.5722e+00 (6.2680e+00)	Acc@1   5.08 (  1.77)	Acc@5  12.89 (  6.13)


Epoch: [0][ 850/2503]	Time  1.495 ( 1.044)	Data  0.951 ( 0.418)	Loss 5.2527e+00 (6.2253e+00)	Acc@1   8.20 (  1.94)	Acc@5  23.83 (  6.59)


Epoch: [0][ 900/2503]	Time  0.907 ( 1.044)	Data  0.330 ( 0.412)	Loss 5.4087e+00 (6.1840e+00)	Acc@1   4.30 (  2.10)	Acc@5  17.58 (  7.04)


Epoch: [0][ 950/2503]	Time  1.162 ( 1.047)	Data  0.605 ( 0.405)	Loss 5.4563e+00 (6.1430e+00)	Acc@1   7.03 (  2.29)	Acc@5  19.53 (  7.52)


Epoch: [0][1000/2503]	Time  1.372 ( 1.046)	Data  0.434 ( 0.406)	Loss 5.3313e+00 (6.1029e+00)	Acc@1   7.81 (  2.46)	Acc@5  19.14 (  8.00)


Epoch: [0][1050/2503]	Time  1.750 ( 1.052)	Data  0.921 ( 0.402)	Loss 5.3074e+00 (6.0641e+00)	Acc@1   4.69 (  2.65)	Acc@5  16.02 (  8.48)


wandb: ERROR Summary data exceeds maximum size of 10.4MB. Dropping it.


Epoch: [0][1100/2503]	Time  1.621 ( 1.059)	Data  0.803 ( 0.402)	Loss 5.0849e+00 (6.0259e+00)	Acc@1   9.77 (  2.82)	Acc@5  19.53 (  8.95)


Epoch: [0][1150/2503]	Time  1.766 ( 1.067)	Data  0.926 ( 0.402)	Loss 5.0389e+00 (5.9883e+00)	Acc@1   5.86 (  3.00)	Acc@5  21.48 (  9.42)


Epoch: [0][1200/2503]	Time  1.609 ( 1.078)	Data  0.822 ( 0.400)	Loss 5.0473e+00 (5.9515e+00)	Acc@1   9.77 (  3.19)	Acc@5  24.61 (  9.91)


Epoch: [0][1250/2503]	Time  1.660 ( 1.089)	Data  0.863 ( 0.398)	Loss 5.1629e+00 (5.9165e+00)	Acc@1   5.08 (  3.37)	Acc@5  21.09 ( 10.37)


Epoch: [0][1300/2503]	Time  1.933 ( 1.100)	Data  1.117 ( 0.395)	Loss 4.9437e+00 (5.8809e+00)	Acc@1   6.64 (  3.55)	Acc@5  23.05 ( 10.84)


Epoch: [0][1350/2503]	Time  1.804 ( 1.109)	Data  1.016 ( 0.394)	Loss 4.9625e+00 (5.8479e+00)	Acc@1   5.86 (  3.72)	Acc@5  25.78 ( 11.29)


Epoch: [0][1400/2503]	Time  1.533 ( 1.117)	Data  0.675 ( 0.391)	Loss 4.9897e+00 (5.8145e+00)	Acc@1  10.16 (  3.91)	Acc@5  24.22 ( 11.74)


Epoch: [0][1450/2503]	Time  1.737 ( 1.126)	Data  0.891 ( 0.390)	Loss 4.8940e+00 (5.7839e+00)	Acc@1   8.20 (  4.10)	Acc@5  22.27 ( 12.17)


Epoch: [0][1500/2503]	Time  1.325 ( 1.133)	Data  0.496 ( 0.388)	Loss 4.7495e+00 (5.7533e+00)	Acc@1   8.98 (  4.28)	Acc@5  24.61 ( 12.59)


Epoch: [0][1550/2503]	Time  1.427 ( 1.141)	Data  0.602 ( 0.386)	Loss 4.8290e+00 (5.7228e+00)	Acc@1   8.20 (  4.47)	Acc@5  24.22 ( 13.02)


Epoch: [0][1600/2503]	Time  1.752 ( 1.148)	Data  0.889 ( 0.385)	Loss 4.8176e+00 (5.6927e+00)	Acc@1   8.98 (  4.67)	Acc@5  25.39 ( 13.47)


Epoch: [0][1650/2503]	Time  1.458 ( 1.154)	Data  0.626 ( 0.383)	Loss 4.6158e+00 (5.6638e+00)	Acc@1  12.89 (  4.85)	Acc@5  31.64 ( 13.88)


Epoch: [0][1700/2503]	Time  1.596 ( 1.160)	Data  0.669 ( 0.380)	Loss 4.6920e+00 (5.6344e+00)	Acc@1  11.33 (  5.04)	Acc@5  33.98 ( 14.31)


Epoch: [0][1750/2503]	Time  1.774 ( 1.165)	Data  0.939 ( 0.379)	Loss 4.9039e+00 (5.6070e+00)	Acc@1   8.98 (  5.23)	Acc@5  26.17 ( 14.73)


Epoch: [0][1800/2503]	Time  1.268 ( 1.170)	Data  0.438 ( 0.377)	Loss 4.5799e+00 (5.5793e+00)	Acc@1  15.62 (  5.41)	Acc@5  31.25 ( 15.14)


Epoch: [0][1850/2503]	Time  1.509 ( 1.175)	Data  0.596 ( 0.376)	Loss 4.6320e+00 (5.5530e+00)	Acc@1  12.50 (  5.59)	Acc@5  29.30 ( 15.53)


Epoch: [0][1900/2503]	Time  1.569 ( 1.180)	Data  0.663 ( 0.375)	Loss 4.8595e+00 (5.5270e+00)	Acc@1  10.55 (  5.77)	Acc@5  23.83 ( 15.92)


Epoch: [0][1950/2503]	Time  1.396 ( 1.184)	Data  0.486 ( 0.373)	Loss 4.5630e+00 (5.5013e+00)	Acc@1  12.89 (  5.95)	Acc@5  30.08 ( 16.33)


Epoch: [0][2000/2503]	Time  1.548 ( 1.188)	Data  0.455 ( 0.371)	Loss 4.4950e+00 (5.4761e+00)	Acc@1  14.06 (  6.14)	Acc@5  32.03 ( 16.72)


Epoch: [0][2050/2503]	Time  1.409 ( 1.191)	Data  0.378 ( 0.367)	Loss 4.3663e+00 (5.4518e+00)	Acc@1  12.89 (  6.30)	Acc@5  32.42 ( 17.11)


Epoch: [0][2100/2503]	Time  1.283 ( 1.194)	Data  0.192 ( 0.362)	Loss 4.4386e+00 (5.4281e+00)	Acc@1  12.11 (  6.48)	Acc@5  30.47 ( 17.48)


Epoch: [0][2150/2503]	Time  1.481 ( 1.197)	Data  0.327 ( 0.358)	Loss 4.4947e+00 (5.4044e+00)	Acc@1  14.06 (  6.65)	Acc@5  30.08 ( 17.86)


Epoch: [0][2200/2503]	Time  1.507 ( 1.200)	Data  0.410 ( 0.354)	Loss 4.5206e+00 (5.3809e+00)	Acc@1  13.28 (  6.83)	Acc@5  33.59 ( 18.23)


Epoch: [0][2250/2503]	Time  1.603 ( 1.202)	Data  0.504 ( 0.351)	Loss 4.3230e+00 (5.3578e+00)	Acc@1  16.41 (  7.01)	Acc@5  32.42 ( 18.60)


Epoch: [0][2300/2503]	Time  1.535 ( 1.205)	Data  0.489 ( 0.348)	Loss 4.5431e+00 (5.3344e+00)	Acc@1  12.50 (  7.19)	Acc@5  31.64 ( 18.97)


Epoch: [0][2350/2503]	Time  1.241 ( 1.207)	Data  0.192 ( 0.344)	Loss 4.5007e+00 (5.3121e+00)	Acc@1  12.11 (  7.37)	Acc@5  30.86 ( 19.33)


Epoch: [0][2400/2503]	Time  1.338 ( 1.209)	Data  0.293 ( 0.341)	Loss 4.1284e+00 (5.2901e+00)	Acc@1  13.67 (  7.54)	Acc@5  37.89 ( 19.69)


Epoch: [0][2450/2503]	Time  1.529 ( 1.211)	Data  0.427 ( 0.337)	Loss 4.1543e+00 (5.2682e+00)	Acc@1  16.02 (  7.72)	Acc@5  40.23 ( 20.05)


Epoch: [0][2500/2503]	Time  1.499 ( 1.213)	Data  0.497 ( 0.334)	Loss 4.2585e+00 (5.2466e+00)	Acc@1  16.41 (  7.90)	Acc@5  39.45 ( 20.41)


Test: [  0/196]	Time  3.946 ( 3.946)	Loss 3.3728e+00 (3.3728e+00)	Acc@1  25.00 ( 25.00)	Acc@5  62.11 ( 62.11)


Test: [ 50/196]	Time  1.672 ( 1.159)	Loss 4.3885e+00 (4.5006e+00)	Acc@1  11.33 ( 12.81)	Acc@5  36.33 ( 32.36)


Test: [100/196]	Time  1.625 ( 1.133)	Loss 5.7713e+00 (4.6114e+00)	Acc@1   2.73 ( 12.15)	Acc@5  10.94 ( 30.90)


Test: [150/196]	Time  1.764 ( 1.130)	Loss 4.9784e+00 (4.6923e+00)	Acc@1   7.42 ( 11.65)	Acc@5  19.14 ( 29.21)


 * Acc@1 12.156 Acc@5 29.892


lr: [0.001]




Epoch: [1][   0/2503]	Time  8.478 ( 8.478)	Data  2.592 ( 2.592)	Loss 4.0899e+00 (4.0899e+00)	Acc@1  17.19 ( 17.19)	Acc@5  41.02 ( 41.02)


Epoch: [1][  50/2503]	Time  1.336 ( 1.407)	Data  0.307 ( 0.202)	Loss 3.9302e+00 (4.1024e+00)	Acc@1  22.66 ( 18.05)	Acc@5  48.05 ( 39.51)


Epoch: [1][ 100/2503]	Time  1.348 ( 1.347)	Data  0.336 ( 0.197)	Loss 3.8952e+00 (4.0790e+00)	Acc@1  20.31 ( 18.39)	Acc@5  44.53 ( 40.13)


Epoch: [1][ 150/2503]	Time  1.390 ( 1.324)	Data  0.357 ( 0.195)	Loss 3.9279e+00 (4.0654e+00)	Acc@1  20.31 ( 18.65)	Acc@5  43.36 ( 40.51)


Epoch: [1][ 200/2503]	Time  1.457 ( 1.307)	Data  0.397 ( 0.193)	Loss 4.0172e+00 (4.0474e+00)	Acc@1  20.70 ( 19.07)	Acc@5  40.62 ( 40.95)


Epoch: [1][ 250/2503]	Time  1.451 ( 1.302)	Data  0.081 ( 0.190)	Loss 3.8399e+00 (4.0397e+00)	Acc@1  23.44 ( 19.28)	Acc@5  47.27 ( 41.18)


Epoch: [1][ 300/2503]	Time  1.324 ( 1.295)	Data  0.272 ( 0.186)	Loss 4.0481e+00 (4.0303e+00)	Acc@1  18.36 ( 19.37)	Acc@5  41.80 ( 41.33)


Epoch: [1][ 350/2503]	Time  1.199 ( 1.291)	Data  0.167 ( 0.182)	Loss 4.0625e+00 (4.0194e+00)	Acc@1  23.83 ( 19.48)	Acc@5  39.45 ( 41.51)


Epoch: [1][ 400/2503]	Time  1.583 ( 1.288)	Data  0.081 ( 0.177)	Loss 4.3022e+00 (4.0136e+00)	Acc@1  16.80 ( 19.57)	Acc@5  35.94 ( 41.63)


Epoch: [1][ 450/2503]	Time  1.402 ( 1.285)	Data  0.253 ( 0.175)	Loss 3.9098e+00 (4.0086e+00)	Acc@1  19.92 ( 19.63)	Acc@5  43.75 ( 41.73)


Epoch: [1][ 500/2503]	Time  1.296 ( 1.284)	Data  0.058 ( 0.173)	Loss 4.1054e+00 (4.0009e+00)	Acc@1  15.62 ( 19.75)	Acc@5  42.97 ( 41.86)


Epoch: [1][ 550/2503]	Time  1.217 ( 1.283)	Data  0.000 ( 0.158)	Loss 3.7920e+00 (3.9951e+00)	Acc@1  22.27 ( 19.88)	Acc@5  46.09 ( 41.99)


Epoch: [1][ 600/2503]	Time  1.349 ( 1.283)	Data  0.000 ( 0.144)	Loss 4.0644e+00 (3.9919e+00)	Acc@1  19.14 ( 19.93)	Acc@5  43.36 ( 42.04)


Epoch: [1][ 650/2503]	Time  1.378 ( 1.284)	Data  0.000 ( 0.133)	Loss 3.9660e+00 (3.9888e+00)	Acc@1  20.31 ( 19.99)	Acc@5  44.92 ( 42.16)


Epoch: [1][ 700/2503]	Time  1.303 ( 1.284)	Data  0.000 ( 0.124)	Loss 4.0706e+00 (3.9831e+00)	Acc@1  20.31 ( 20.05)	Acc@5  42.97 ( 42.28)


Epoch: [1][ 750/2503]	Time  1.258 ( 1.286)	Data  0.000 ( 0.116)	Loss 3.8296e+00 (3.9803e+00)	Acc@1  19.53 ( 20.09)	Acc@5  41.02 ( 42.32)


Epoch: [1][ 800/2503]	Time  1.275 ( 1.286)	Data  0.000 ( 0.109)	Loss 3.9790e+00 (3.9771e+00)	Acc@1  17.19 ( 20.09)	Acc@5  42.58 ( 42.40)


Epoch: [1][ 850/2503]	Time  1.270 ( 1.287)	Data  0.000 ( 0.102)	Loss 3.6785e+00 (3.9745e+00)	Acc@1  24.22 ( 20.14)	Acc@5  48.44 ( 42.45)


Epoch: [1][ 900/2503]	Time  1.216 ( 1.287)	Data  0.000 ( 0.097)	Loss 3.8471e+00 (3.9713e+00)	Acc@1  23.83 ( 20.20)	Acc@5  43.75 ( 42.52)


Epoch: [1][ 950/2503]	Time  1.275 ( 1.287)	Data  0.000 ( 0.091)	Loss 4.0509e+00 (3.9686e+00)	Acc@1  21.88 ( 20.23)	Acc@5  42.97 ( 42.57)


Epoch: [1][1000/2503]	Time  1.316 ( 1.288)	Data  0.000 ( 0.087)	Loss 3.9124e+00 (3.9656e+00)	Acc@1  21.48 ( 20.28)	Acc@5  44.14 ( 42.62)


Epoch: [1][1050/2503]	Time  1.392 ( 1.289)	Data  0.000 ( 0.083)	Loss 3.8581e+00 (3.9627e+00)	Acc@1  21.09 ( 20.31)	Acc@5  42.97 ( 42.68)


Epoch: [1][1100/2503]	Time  1.249 ( 1.289)	Data  0.000 ( 0.079)	Loss 3.8090e+00 (3.9605e+00)	Acc@1  22.66 ( 20.35)	Acc@5  44.53 ( 42.72)


Epoch: [1][1150/2503]	Time  1.332 ( 1.290)	Data  0.000 ( 0.076)	Loss 3.7375e+00 (3.9571e+00)	Acc@1  19.53 ( 20.39)	Acc@5  46.09 ( 42.81)


Epoch: [1][1200/2503]	Time  1.331 ( 1.291)	Data  0.000 ( 0.073)	Loss 3.9024e+00 (3.9539e+00)	Acc@1  22.66 ( 20.44)	Acc@5  46.48 ( 42.88)


Epoch: [1][1250/2503]	Time  1.345 ( 1.292)	Data  0.000 ( 0.070)	Loss 4.0071e+00 (3.9508e+00)	Acc@1  21.09 ( 20.49)	Acc@5  44.14 ( 42.95)


Epoch: [1][1300/2503]	Time  1.301 ( 1.293)	Data  0.000 ( 0.067)	Loss 3.7416e+00 (3.9464e+00)	Acc@1  21.88 ( 20.55)	Acc@5  45.31 ( 43.02)


Epoch: [1][1350/2503]	Time  1.320 ( 1.294)	Data  0.000 ( 0.064)	Loss 4.0025e+00 (3.9438e+00)	Acc@1  18.75 ( 20.57)	Acc@5  41.80 ( 43.04)


Epoch: [1][1400/2503]	Time  1.284 ( 1.295)	Data  0.000 ( 0.062)	Loss 4.0175e+00 (3.9407e+00)	Acc@1  19.53 ( 20.63)	Acc@5  39.06 ( 43.10)


Epoch: [1][1450/2503]	Time  1.323 ( 1.297)	Data  0.000 ( 0.060)	Loss 3.8723e+00 (3.9389e+00)	Acc@1  21.88 ( 20.64)	Acc@5  40.62 ( 43.12)


Epoch: [1][1500/2503]	Time  1.519 ( 1.299)	Data  0.000 ( 0.058)	Loss 3.7384e+00 (3.9373e+00)	Acc@1  22.66 ( 20.65)	Acc@5  46.48 ( 43.15)


Epoch: [1][1550/2503]	Time  1.455 ( 1.305)	Data  0.000 ( 0.056)	Loss 3.8590e+00 (3.9340e+00)	Acc@1  17.97 ( 20.69)	Acc@5  42.19 ( 43.21)


Epoch: [1][1600/2503]	Time  1.474 ( 1.310)	Data  0.000 ( 0.054)	Loss 3.8379e+00 (3.9305e+00)	Acc@1  18.36 ( 20.76)	Acc@5  41.02 ( 43.26)


Epoch: [1][1650/2503]	Time  1.407 ( 1.315)	Data  0.000 ( 0.053)	Loss 3.6912e+00 (3.9275e+00)	Acc@1  25.00 ( 20.78)	Acc@5  48.05 ( 43.33)


Epoch: [1][1700/2503]	Time  1.475 ( 1.320)	Data  0.000 ( 0.051)	Loss 3.9102e+00 (3.9238e+00)	Acc@1  21.88 ( 20.82)	Acc@5  46.09 ( 43.39)


Epoch: [1][1750/2503]	Time  1.448 ( 1.324)	Data  0.000 ( 0.050)	Loss 4.0230e+00 (3.9210e+00)	Acc@1  23.44 ( 20.87)	Acc@5  42.58 ( 43.44)


Epoch: [1][1800/2503]	Time  1.522 ( 1.329)	Data  0.000 ( 0.048)	Loss 3.8239e+00 (3.9175e+00)	Acc@1  26.17 ( 20.92)	Acc@5  47.66 ( 43.51)


Epoch: [1][1850/2503]	Time  1.466 ( 1.333)	Data  0.000 ( 0.047)	Loss 3.7665e+00 (3.9149e+00)	Acc@1  27.34 ( 20.96)	Acc@5  46.09 ( 43.56)


Epoch: [1][1900/2503]	Time  1.511 ( 1.337)	Data  0.000 ( 0.046)	Loss 4.1538e+00 (3.9121e+00)	Acc@1  17.97 ( 20.99)	Acc@5  36.33 ( 43.60)


Epoch: [1][1950/2503]	Time  1.462 ( 1.341)	Data  0.000 ( 0.045)	Loss 3.8627e+00 (3.9093e+00)	Acc@1  23.44 ( 21.04)	Acc@5  41.41 ( 43.66)


Epoch: [1][2000/2503]	Time  1.489 ( 1.345)	Data  0.000 ( 0.044)	Loss 3.7657e+00 (3.9066e+00)	Acc@1  23.05 ( 21.08)	Acc@5  46.88 ( 43.70)


Epoch: [1][2050/2503]	Time  1.475 ( 1.349)	Data  0.000 ( 0.043)	Loss 3.7979e+00 (3.9044e+00)	Acc@1  22.27 ( 21.11)	Acc@5  44.14 ( 43.74)


Epoch: [1][2100/2503]	Time  1.516 ( 1.352)	Data  0.000 ( 0.042)	Loss 3.8242e+00 (3.9019e+00)	Acc@1  22.66 ( 21.14)	Acc@5  46.88 ( 43.79)


Epoch: [1][2150/2503]	Time  1.487 ( 1.356)	Data  0.000 ( 0.041)	Loss 3.8149e+00 (3.8991e+00)	Acc@1  22.66 ( 21.19)	Acc@5  46.09 ( 43.85)


Epoch: [1][2200/2503]	Time  1.565 ( 1.359)	Data  0.000 ( 0.040)	Loss 3.8847e+00 (3.8958e+00)	Acc@1  23.44 ( 21.24)	Acc@5  43.75 ( 43.93)


Epoch: [1][2250/2503]	Time  1.546 ( 1.363)	Data  0.000 ( 0.039)	Loss 3.7281e+00 (3.8926e+00)	Acc@1  24.61 ( 21.28)	Acc@5  46.88 ( 44.00)


Epoch: [1][2300/2503]	Time  1.478 ( 1.366)	Data  0.000 ( 0.038)	Loss 3.9274e+00 (3.8888e+00)	Acc@1  19.92 ( 21.33)	Acc@5  44.53 ( 44.07)


Epoch: [1][2350/2503]	Time  1.532 ( 1.370)	Data  0.001 ( 0.037)	Loss 3.9880e+00 (3.8855e+00)	Acc@1  19.53 ( 21.39)	Acc@5  43.36 ( 44.13)


Epoch: [1][2400/2503]	Time  1.546 ( 1.373)	Data  0.000 ( 0.036)	Loss 3.6511e+00 (3.8822e+00)	Acc@1  19.14 ( 21.43)	Acc@5  52.73 ( 44.20)


Epoch: [1][2450/2503]	Time  1.490 ( 1.376)	Data  0.000 ( 0.036)	Loss 3.6187e+00 (3.8784e+00)	Acc@1  24.61 ( 21.48)	Acc@5  49.22 ( 44.28)


Epoch: [1][2500/2503]	Time  1.482 ( 1.379)	Data  0.000 ( 0.035)	Loss 3.7637e+00 (3.8749e+00)	Acc@1  22.27 ( 21.52)	Acc@5  45.31 ( 44.35)


Test: [  0/196]	Time  3.061 ( 3.061)	Loss 2.8693e+00 (2.8693e+00)	Acc@1  34.38 ( 34.38)	Acc@5  71.88 ( 71.88)


Test: [ 50/196]	Time  0.380 ( 1.160)	Loss 3.6294e+00 (3.5930e+00)	Acc@1  22.66 ( 23.31)	Acc@5  48.83 ( 48.78)


Test: [100/196]	Time  0.379 ( 1.148)	Loss 5.0297e+00 (3.7431e+00)	Acc@1   8.20 ( 22.25)	Acc@5  20.70 ( 46.24)


Test: [150/196]	Time  0.877 ( 1.134)	Loss 4.6127e+00 (3.8835e+00)	Acc@1  16.02 ( 21.14)	Acc@5  30.47 ( 43.68)


 * Acc@1 21.208 Acc@5 43.588


lr: [0.001]


Epoch: [2][   0/2503]	Time 11.240 (11.240)	Data  2.686 ( 2.686)	Loss 3.6586e+00 (3.6586e+00)	Acc@1  24.22 ( 24.22)	Acc@5  50.39 ( 50.39)


Epoch: [2][  50/2503]	Time  1.653 ( 1.846)	Data  0.000 ( 0.053)	Loss 3.7334e+00 (3.7921e+00)	Acc@1  29.30 ( 22.49)	Acc@5  48.83 ( 45.66)


Epoch: [2][ 100/2503]	Time  1.615 ( 1.759)	Data  0.000 ( 0.027)	Loss 3.6454e+00 (3.8016e+00)	Acc@1  23.05 ( 22.53)	Acc@5  47.66 ( 45.68)


Epoch: [2][ 150/2503]	Time  1.634 ( 1.725)	Data  0.000 ( 0.018)	Loss 3.6869e+00 (3.8077e+00)	Acc@1  23.05 ( 22.38)	Acc@5  47.66 ( 45.66)


Epoch: [2][ 200/2503]	Time  1.603 ( 1.710)	Data  0.000 ( 0.014)	Loss 3.6954e+00 (3.8023e+00)	Acc@1  23.44 ( 22.57)	Acc@5  46.48 ( 45.75)


Epoch: [2][ 250/2503]	Time  1.600 ( 1.701)	Data  0.000 ( 0.011)	Loss 3.6265e+00 (3.8031e+00)	Acc@1  26.17 ( 22.58)	Acc@5  51.17 ( 45.76)


Epoch: [2][ 300/2503]	Time  1.692 ( 1.696)	Data  0.000 ( 0.009)	Loss 3.8762e+00 (3.7983e+00)	Acc@1  23.05 ( 22.61)	Acc@5  44.14 ( 45.76)


Epoch: [2][ 350/2503]	Time  1.676 ( 1.694)	Data  0.000 ( 0.008)	Loss 3.8711e+00 (3.7918e+00)	Acc@1  24.22 ( 22.66)	Acc@5  41.80 ( 45.86)


Epoch: [2][ 400/2503]	Time  1.564 ( 1.692)	Data  0.000 ( 0.007)	Loss 4.1263e+00 (3.7898e+00)	Acc@1  17.19 ( 22.68)	Acc@5  39.06 ( 45.92)


Epoch: [2][ 450/2503]	Time  1.677 ( 1.690)	Data  0.000 ( 0.006)	Loss 3.7567e+00 (3.7891e+00)	Acc@1  21.09 ( 22.65)	Acc@5  47.66 ( 45.92)


Epoch: [2][ 500/2503]	Time  1.624 ( 1.690)	Data  0.000 ( 0.006)	Loss 3.8618e+00 (3.7844e+00)	Acc@1  23.44 ( 22.73)	Acc@5  46.88 ( 45.97)


Epoch: [2][ 550/2503]	Time  1.596 ( 1.690)	Data  0.000 ( 0.005)	Loss 3.5875e+00 (3.7813e+00)	Acc@1  22.66 ( 22.81)	Acc@5  49.61 ( 46.00)


Epoch: [2][ 600/2503]	Time  1.669 ( 1.691)	Data  0.000 ( 0.005)	Loss 3.8572e+00 (3.7804e+00)	Acc@1  21.88 ( 22.80)	Acc@5  48.83 ( 46.04)


Epoch: [2][ 650/2503]	Time  1.715 ( 1.692)	Data  0.000 ( 0.004)	Loss 3.8186e+00 (3.7794e+00)	Acc@1  23.44 ( 22.84)	Acc@5  48.83 ( 46.08)


Epoch: [2][ 700/2503]	Time  1.774 ( 1.695)	Data  0.000 ( 0.004)	Loss 3.8723e+00 (3.7755e+00)	Acc@1  21.48 ( 22.87)	Acc@5  48.05 ( 46.15)


Epoch: [2][ 750/2503]	Time  1.728 ( 1.696)	Data  0.000 ( 0.004)	Loss 3.6200e+00 (3.7739e+00)	Acc@1  21.09 ( 22.89)	Acc@5  50.00 ( 46.20)


Epoch: [2][ 800/2503]	Time  1.700 ( 1.697)	Data  0.000 ( 0.004)	Loss 3.8469e+00 (3.7718e+00)	Acc@1  18.36 ( 22.89)	Acc@5  42.19 ( 46.27)


Epoch: [2][ 850/2503]	Time  1.683 ( 1.700)	Data  0.000 ( 0.003)	Loss 3.4909e+00 (3.7705e+00)	Acc@1  25.78 ( 22.93)	Acc@5  54.30 ( 46.27)


Epoch: [2][ 900/2503]	Time  1.765 ( 1.702)	Data  0.000 ( 0.003)	Loss 3.6184e+00 (3.7687e+00)	Acc@1  27.73 ( 22.96)	Acc@5  48.83 ( 46.33)


Epoch: [2][ 950/2503]	Time  1.681 ( 1.704)	Data  0.000 ( 0.003)	Loss 3.8847e+00 (3.7672e+00)	Acc@1  23.44 ( 22.99)	Acc@5  47.66 ( 46.37)


Epoch: [2][1000/2503]	Time  1.740 ( 1.706)	Data  0.000 ( 0.003)	Loss 3.7279e+00 (3.7654e+00)	Acc@1  24.61 ( 23.01)	Acc@5  47.27 ( 46.40)


Epoch: [2][1050/2503]	Time  1.993 ( 1.714)	Data  0.000 ( 0.003)	Loss 3.6772e+00 (3.7639e+00)	Acc@1  22.27 ( 23.03)	Acc@5  46.09 ( 46.44)


Epoch: [2][1100/2503]	Time  1.951 ( 1.721)	Data  0.000 ( 0.003)	Loss 3.6613e+00 (3.7629e+00)	Acc@1  26.56 ( 23.06)	Acc@5  45.70 ( 46.46)


Epoch: [2][1150/2503]	Time  1.797 ( 1.727)	Data  0.000 ( 0.003)	Loss 3.5658e+00 (3.7605e+00)	Acc@1  23.83 ( 23.09)	Acc@5  51.17 ( 46.51)


Epoch: [2][1200/2503]	Time  1.793 ( 1.732)	Data  0.000 ( 0.003)	Loss 3.7338e+00 (3.7584e+00)	Acc@1  26.56 ( 23.13)	Acc@5  47.66 ( 46.57)


Epoch: [2][1250/2503]	Time  1.865 ( 1.738)	Data  0.000 ( 0.002)	Loss 3.8016e+00 (3.7562e+00)	Acc@1  24.61 ( 23.17)	Acc@5  48.44 ( 46.62)


Epoch: [2][1300/2503]	Time  1.881 ( 1.743)	Data  0.000 ( 0.002)	Loss 3.5408e+00 (3.7528e+00)	Acc@1  25.39 ( 23.21)	Acc@5  48.05 ( 46.69)


Epoch: [2][1350/2503]	Time  1.847 ( 1.748)	Data  0.000 ( 0.002)	Loss 3.8584e+00 (3.7510e+00)	Acc@1  20.70 ( 23.22)	Acc@5  43.75 ( 46.70)


Epoch: [2][1400/2503]	Time  1.825 ( 1.752)	Data  0.000 ( 0.002)	Loss 3.8482e+00 (3.7488e+00)	Acc@1  19.53 ( 23.26)	Acc@5  41.80 ( 46.73)


Epoch: [2][1450/2503]	Time  1.912 ( 1.757)	Data  0.000 ( 0.002)	Loss 3.6989e+00 (3.7480e+00)	Acc@1  25.39 ( 23.27)	Acc@5  47.27 ( 46.75)


Epoch: [2][1500/2503]	Time  1.920 ( 1.761)	Data  0.000 ( 0.002)	Loss 3.6085e+00 (3.7469e+00)	Acc@1  21.48 ( 23.29)	Acc@5  47.27 ( 46.77)


Epoch: [2][1550/2503]	Time  1.875 ( 1.765)	Data  0.000 ( 0.002)	Loss 3.7160e+00 (3.7445e+00)	Acc@1  20.70 ( 23.30)	Acc@5  44.92 ( 46.81)


Epoch: [2][1600/2503]	Time  1.881 ( 1.769)	Data  0.000 ( 0.002)	Loss 3.7550e+00 (3.7418e+00)	Acc@1  22.66 ( 23.35)	Acc@5  44.53 ( 46.86)


Epoch: [2][1650/2503]	Time  1.966 ( 1.773)	Data  0.000 ( 0.002)	Loss 3.5371e+00 (3.7395e+00)	Acc@1  28.52 ( 23.37)	Acc@5  50.00 ( 46.91)


Epoch: [2][1700/2503]	Time  1.808 ( 1.776)	Data  0.000 ( 0.002)	Loss 3.7604e+00 (3.7364e+00)	Acc@1  24.22 ( 23.40)	Acc@5  49.61 ( 46.96)


Epoch: [2][1750/2503]	Time  1.908 ( 1.781)	Data  0.000 ( 0.002)	Loss 3.8496e+00 (3.7344e+00)	Acc@1  25.39 ( 23.45)	Acc@5  45.70 ( 47.00)


Epoch: [2][1800/2503]	Time  1.905 ( 1.784)	Data  0.000 ( 0.002)	Loss 3.5993e+00 (3.7318e+00)	Acc@1  26.95 ( 23.47)	Acc@5  50.00 ( 47.04)


Epoch: [2][1850/2503]	Time  1.929 ( 1.788)	Data  0.001 ( 0.002)	Loss 3.5585e+00 (3.7299e+00)	Acc@1  28.91 ( 23.50)	Acc@5  47.66 ( 47.09)


Epoch: [2][1900/2503]	Time  1.899 ( 1.792)	Data  0.000 ( 0.002)	Loss 3.9968e+00 (3.7277e+00)	Acc@1  21.09 ( 23.54)	Acc@5  38.67 ( 47.11)


Epoch: [2][1950/2503]	Time  1.811 ( 1.795)	Data  0.000 ( 0.002)	Loss 3.6691e+00 (3.7256e+00)	Acc@1  23.83 ( 23.58)	Acc@5  44.92 ( 47.16)


Epoch: [2][2000/2503]	Time  1.983 ( 1.799)	Data  0.000 ( 0.002)	Loss 3.5858e+00 (3.7235e+00)	Acc@1  23.44 ( 23.62)	Acc@5  50.78 ( 47.19)


Epoch: [2][2050/2503]	Time  2.154 ( 1.805)	Data  0.000 ( 0.002)	Loss 3.6140e+00 (3.7220e+00)	Acc@1  24.61 ( 23.64)	Acc@5  47.66 ( 47.21)


Epoch: [2][2100/2503]	Time  1.988 ( 1.811)	Data  0.000 ( 0.002)	Loss 3.6347e+00 (3.7202e+00)	Acc@1  25.39 ( 23.66)	Acc@5  48.44 ( 47.26)


Epoch: [2][2150/2503]	Time  1.953 ( 1.817)	Data  0.000 ( 0.002)	Loss 3.6646e+00 (3.7180e+00)	Acc@1  25.00 ( 23.70)	Acc@5  50.78 ( 47.31)


Epoch: [2][2200/2503]	Time  2.107 ( 1.822)	Data  0.000 ( 0.002)	Loss 3.7802e+00 (3.7155e+00)	Acc@1  24.22 ( 23.73)	Acc@5  43.75 ( 47.36)


Epoch: [2][2250/2503]	Time  1.871 ( 1.828)	Data  0.000 ( 0.001)	Loss 3.5598e+00 (3.7129e+00)	Acc@1  26.95 ( 23.77)	Acc@5  50.00 ( 47.41)


Epoch: [2][2300/2503]	Time  2.142 ( 1.833)	Data  0.000 ( 0.001)	Loss 3.7419e+00 (3.7097e+00)	Acc@1  24.61 ( 23.81)	Acc@5  49.22 ( 47.47)


Epoch: [2][2350/2503]	Time  2.049 ( 1.838)	Data  0.000 ( 0.001)	Loss 3.7882e+00 (3.7072e+00)	Acc@1  23.44 ( 23.85)	Acc@5  46.48 ( 47.51)


Epoch: [2][2400/2503]	Time  2.026 ( 1.844)	Data  0.000 ( 0.001)	Loss 3.4983e+00 (3.7047e+00)	Acc@1  23.44 ( 23.88)	Acc@5  52.34 ( 47.57)


Epoch: [2][2450/2503]	Time  2.055 ( 1.848)	Data  0.000 ( 0.001)	Loss 3.5402e+00 (3.7018e+00)	Acc@1  25.39 ( 23.92)	Acc@5  50.39 ( 47.62)


Epoch: [2][2500/2503]	Time  1.756 ( 1.853)	Data  0.000 ( 0.001)	Loss 3.6609e+00 (3.6989e+00)	Acc@1  22.27 ( 23.96)	Acc@5  48.83 ( 47.68)


Test: [  0/196]	Time  3.307 ( 3.307)	Loss 2.7291e+00 (2.7291e+00)	Acc@1  38.67 ( 38.67)	Acc@5  73.44 ( 73.44)


Test: [ 50/196]	Time  2.268 ( 1.234)	Loss 3.4507e+00 (3.4355e+00)	Acc@1  23.05 ( 25.84)	Acc@5  48.83 ( 52.08)


Test: [100/196]	Time  1.575 ( 1.223)	Loss 4.9146e+00 (3.6011e+00)	Acc@1   9.77 ( 24.50)	Acc@5  21.48 ( 49.20)


Test: [150/196]	Time  1.393 ( 1.178)	Loss 4.5324e+00 (3.7452e+00)	Acc@1  16.41 ( 23.11)	Acc@5  31.64 ( 46.54)


 * Acc@1 23.008 Acc@5 46.276


lr: [0.001]
