In [1]:
import os
import random
import shutil
import time
import warnings

import torch
import torch.nn as nn
import torch.backends.cudnn as cudnn
import torch.optim as opt

import torch.utils.data
import torchvision
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import torchvision.models as models


In [2]:
SEED=1

In [3]:
random.seed(SEED)
torch.manual_seed(SEED)
cudnn.deterministic = True

In [4]:
START_EPOCH = 0

### Set the architecture to resnet 18 below

In [5]:
##########################
ARCH = models.resnet18() # set the architecture to RESNET 18
# please look up how to do that
########################
EPOCHS = 40
LR = 0.1
MOMENTUM = 0.9
WEIGHT_DECAY = 1e-4
PRINT_FREQ = 50
TRAIN_BATCH=256
VAL_BATCH=128
WORKERS=4
# TRAINDIR="/workspace/data/imagenet2012/train"
# VALDIR="/workspace/data/imagenet2012/val"

In [6]:
TRAINDIR="/home/user1/work/w251/v3/week05/hw/imageNet-ILSVRC2012/download_and_prepare_imagenet_dataset/train"
VALDIR="/home/user1/work/w251/v3/week05/hw/imageNet-ILSVRC2012/download_and_prepare_imagenet_dataset/val"
TRAINDIR="/home/user1/data/train"
VALDIR="/home/user1/data/val"

### Check if cuda is available here

In [7]:
# check if cuda is available in this cell
# if it is not available, you should not go forward!

torch.cuda.is_available()

True

### Assign your GPU below

In [8]:
# Assign your GPU in this cell
GPU = 0

In [9]:
# set your active device to your GPU in this cell
if torch.cuda.is_available():  
  dev =   "cuda:0"
else:  
  dev = "cpu"  

device = torch.device(dev)

In [10]:
# enable algorithm optimization
cudnn.benchmark = True

### Fill in the heart of the train section below

In [11]:
def train(train_loader, model, criterion, optimizer, epoch, device=torch.device('cpu')):
    batch_time = AverageMeter('Time', ':6.3f')
    data_time = AverageMeter('Data', ':6.3f')
    losses = AverageMeter('Loss', ':.4e')
    top1 = AverageMeter('Acc@1', ':6.2f')
    top5 = AverageMeter('Acc@5', ':6.2f')
    progress = ProgressMeter(
        len(train_loader),
        [batch_time, data_time, losses, top1, top5],
        prefix="Epoch: [{}]".format(epoch))

    ######################
    # switch model to train mode here
    model.train()
    ################

    end = time.time()
    for i, (images, target) in enumerate(train_loader):
        # measure data loading time
        data_time.update(time.time() - end)

        #####################
        # send the images to cuda device
        images = images.cuda(0, non_blocking=True)
        # send the target to cuda device
        target = target.cuda(0, non_blocking=True)
        
        #images = images.to(device)
        #target = target.to(device)
        
        # compute output
        output = model(images)

        # compute loss 
        loss = criterion(output, target)


        # measure accuracy and record loss
        acc1, acc5 = accuracy(output, target, topk=(1, 5))
        losses.update(loss.item(), images.size(0))
        top1.update(acc1[0], images.size(0))
        top5.update(acc5[0], images.size(0))

        # compute gradient and do SGD step
        
        #### zero out gradients in the optimier
        ## optimizer ..??
        optimizer.zero_grad()
        
        ## backprop!
        ### loss... ???
        loss.backward()
        
        # update the weights!
        ## optimier .. ??
        optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if i % PRINT_FREQ == 0:
            progress.display(i)

#### Fill in the validate section below

In [12]:
def validate(val_loader, model, criterion, device):
    batch_time = AverageMeter('Time', ':6.3f')
    losses = AverageMeter('Loss', ':.4e')
    top1 = AverageMeter('Acc@1', ':6.2f')
    top5 = AverageMeter('Acc@5', ':6.2f')
    progress = ProgressMeter(
        len(val_loader),
        [batch_time, losses, top1, top5],
        prefix='Test: ')

    # switch to evaluate mode
    # model ???
    model.eval()

    with torch.no_grad():
        end = time.time()
        for i, (images, target) in enumerate(val_loader):
            
            
            ### send the images and target to cuda
            images = images.to(device)
            target = target.to(device)

            # compute output
            output = model(images)

            # compute loss
            loss  = criterion(output,target)


            # measure accuracy and record loss
            acc1, acc5 = accuracy(output, target, topk=(1, 5))
            losses.update(loss.item(), images.size(0))
            top1.update(acc1[0], images.size(0))
            top5.update(acc5[0], images.size(0))

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

            if i % PRINT_FREQ == 0:
                progress.display(i)

        # TODO: this should also be done with the ProgressMeter
        print(' * Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}'
              .format(top1=top1, top5=top5))

    return top1.avg

### Save the checkpoint

In [13]:
class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self, name, fmt=':f'):
        self.name = name
        self.fmt = fmt
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

    def __str__(self):
        fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})'
        return fmtstr.format(**self.__dict__)

In [14]:
class ProgressMeter(object):
    def __init__(self, num_batches, meters, prefix=""):
        self.batch_fmtstr = self._get_batch_fmtstr(num_batches)
        self.meters = meters
        self.prefix = prefix

    def display(self, batch):
        entries = [self.prefix + self.batch_fmtstr.format(batch)]
        entries += [str(meter) for meter in self.meters]
        print('\t'.join(entries))

    def _get_batch_fmtstr(self, num_batches):
        num_digits = len(str(num_batches // 1))
        fmt = '{:' + str(num_digits) + 'd}'
        return '[' + fmt + '/' + fmt.format(num_batches) + ']'

In [15]:
def save_checkpoint(state, is_best, filename='./checkpoint_192.pth.tar'):
    # save the model state!
    # state ??? 
    torch.save(state, filename)
    if is_best:
        shutil.copyfile(filename, './model_192_best.pth.tar')

In [16]:
# if we are adjusting the LR manually use this
def adjust_learning_rate(optimizer, epoch):
    """Sets the learning rate to the initial LR decayed by 10 every 30 epochs"""
    lr = LR * (0.1 ** (epoch // 30))
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr

In [17]:
def accuracy(output, target, topk=(1,)):
    """Computes the accuracy over the k top predictions for the specified values of k"""
    with torch.no_grad():
        maxk = max(topk)
        batch_size = target.size(0)

        _, pred = output.topk(maxk, 1, True, True)
        pred = pred.t()
        correct = pred.eq(target.view(1, -1).expand_as(pred))

        res = []
        for k in topk:
            correct_k = correct[:k].reshape(-1).float().sum(0, keepdim=True)
            res.append(correct_k.mul_(100.0 / batch_size))
        return res

In [18]:
imagenet_mean_RGB = [0.47889522, 0.47227842, 0.43047404]
imagenet_std_RGB = [0.229, 0.224, 0.225]
cinic_mean_RGB = [0.47889522, 0.47227842, 0.43047404]
cinic_std_RGB = [0.24205776, 0.23828046, 0.25874835]
cifar_mean_RGB = [0.4914, 0.4822, 0.4465]
cifar_std_RGB = [0.2023, 0.1994, 0.2010]

In [19]:
normalize = transforms.Normalize(mean=imagenet_mean_RGB, std=imagenet_std_RGB)

In [20]:
IMG_SIZE = 32
IMG_SIZE = 224

### Initialize the model using the architecture you selected above

In [21]:
# select the model
model = ARCH

### Send the model to the cuda device

In [22]:
# send the model to the cuda device..
model = model.to(device)

### Instantiate the loss to cross entropy

In [23]:
# use the cross-entropy loss
criterion = nn.CrossEntropyLoss()# .cuda(GPU)

### Instantiate the optimizer to SGD

In [24]:
# use SGD .. use the momentum and weight decay vars
optimizer = opt.SGD(model.parameters(), lr=LR, momentum=0.9, weight_decay=WEIGHT_DECAY)

#### Create the learning rate scheduler

In [25]:
# use CosineAnnealingLR
scheduler =  torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=200)

### Create the train dataset object

In [26]:
transform_train = transforms.Compose([
    transforms.RandomResizedCrop(IMG_SIZE),# padding=4),
    #transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(imagenet_mean_RGB, imagenet_std_RGB),
])


In [27]:
train_dataset = torchvision.datasets.ImageFolder(TRAINDIR, transform=transform_train)

### Create the val dataset object

In [28]:
transform_val = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(IMG_SIZE),
    transforms.ToTensor(),
    transforms.Normalize(imagenet_mean_RGB, imagenet_std_RGB),
])

In [29]:
val_dataset = torchvision.datasets.ImageFolder(VALDIR, transform=transform_val)

### Create the train dataloader

In [30]:
# fill this in
train_loader = torch.utils.data.DataLoader(train_dataset,batch_size=256, num_workers=WORKERS, shuffle=True)
#train_loader = torch.utils.data.DataLoader(train_dataset,batch_size=128, shuffle=True)

### Create the c

In [31]:
# fill this in..
val_loader = torch.utils.data.DataLoader(val_dataset,  batch_size=128, num_workers=WORKERS, shuffle=True) 
#val_loader = torch.utils.data.DataLoader(val_dataset,  batch_size=128, shuffle=True) 

In [32]:
best_acc1 = 0

In [None]:
for epoch in range(START_EPOCH, EPOCHS):
#    adjust_learning_rate(optimizer, epoch)

    # train for one epoch
    train(train_loader, model, criterion, optimizer, epoch, device)

    # evaluate on validation set
    acc1 = validate(val_loader, model, criterion, device)

    # remember best acc@1 and save checkpoint
    is_best = acc1 > best_acc1
    best_acc1 = max(acc1, best_acc1)


    save_checkpoint({
        'epoch': epoch + 1,
        'arch': ARCH,
        'state_dict': model.state_dict(),
        'best_acc1': best_acc1,
        'optimizer' : optimizer.state_dict(),
    }, is_best)
    
    scheduler.step()
    print('lr: ' + str(scheduler.get_last_lr()))

Epoch: [0][   0/5005]	Time 21.142 (21.142)	Data  8.075 ( 8.075)	Loss 7.0529e+00 (7.0529e+00)	Acc@1   0.39 (  0.39)	Acc@5   0.39 (  0.39)
Epoch: [0][  50/5005]	Time  1.659 ( 2.245)	Data  0.498 ( 0.852)	Loss 6.8925e+00 (7.0208e+00)	Acc@1   0.39 (  0.19)	Acc@5   1.56 (  0.95)
Epoch: [0][ 100/5005]	Time  1.474 ( 2.134)	Data  0.225 ( 0.851)	Loss 6.7341e+00 (6.9293e+00)	Acc@1   0.00 (  0.27)	Acc@5   3.52 (  1.27)
Epoch: [0][ 150/5005]	Time  3.739 ( 2.136)	Data  2.475 ( 0.873)	Loss 6.7555e+00 (6.8640e+00)	Acc@1   0.39 (  0.37)	Acc@5   2.34 (  1.60)
Epoch: [0][ 200/5005]	Time  1.317 ( 2.096)	Data  0.201 ( 0.850)	Loss 6.5548e+00 (6.8062e+00)	Acc@1   1.17 (  0.48)	Acc@5   3.12 (  1.93)
Epoch: [0][ 250/5005]	Time  1.383 ( 2.085)	Data  0.204 ( 0.850)	Loss 6.6010e+00 (6.7546e+00)	Acc@1   0.39 (  0.55)	Acc@5   2.73 (  2.20)
Epoch: [0][ 300/5005]	Time  1.424 ( 2.074)	Data  0.225 ( 0.847)	Loss 6.5084e+00 (6.7036e+00)	Acc@1   0.39 (  0.64)	Acc@5   4.69 (  2.45)
Epoch: [0][ 350/5005]	Time  2.942 ( 2.074

Epoch: [0][3000/5005]	Time  1.935 ( 2.053)	Data  0.772 ( 0.866)	Loss 4.5836e+00 (5.3701e+00)	Acc@1  14.45 (  7.13)	Acc@5  33.59 ( 18.71)
Epoch: [0][3050/5005]	Time  1.289 ( 2.054)	Data  0.199 ( 0.866)	Loss 4.6082e+00 (5.3556e+00)	Acc@1  13.67 (  7.25)	Acc@5  30.86 ( 18.95)
Epoch: [0][3100/5005]	Time  1.429 ( 2.053)	Data  0.253 ( 0.867)	Loss 4.3122e+00 (5.3402e+00)	Acc@1  18.75 (  7.38)	Acc@5  36.72 ( 19.20)
Epoch: [0][3150/5005]	Time  1.394 ( 2.054)	Data  0.274 ( 0.867)	Loss 4.2300e+00 (5.3261e+00)	Acc@1  18.75 (  7.49)	Acc@5  37.50 ( 19.43)
Epoch: [0][3200/5005]	Time  1.335 ( 2.055)	Data  0.218 ( 0.868)	Loss 4.3442e+00 (5.3118e+00)	Acc@1  19.14 (  7.62)	Acc@5  36.33 ( 19.67)
Epoch: [0][3250/5005]	Time  1.341 ( 2.054)	Data  0.188 ( 0.867)	Loss 4.3503e+00 (5.2977e+00)	Acc@1  14.45 (  7.74)	Acc@5  33.98 ( 19.91)
Epoch: [0][3300/5005]	Time  1.345 ( 2.053)	Data  0.224 ( 0.867)	Loss 4.3842e+00 (5.2837e+00)	Acc@1  15.62 (  7.86)	Acc@5  37.50 ( 20.14)
Epoch: [0][3350/5005]	Time  3.369 ( 2.054

Epoch: [1][ 600/5005]	Time  3.548 ( 2.027)	Data  2.334 ( 0.858)	Loss 3.8250e+00 (3.7838e+00)	Acc@1  22.27 ( 23.72)	Acc@5  49.22 ( 46.74)
Epoch: [1][ 650/5005]	Time  1.387 ( 2.024)	Data  0.239 ( 0.854)	Loss 3.7063e+00 (3.7798e+00)	Acc@1  25.78 ( 23.79)	Acc@5  47.27 ( 46.85)
Epoch: [1][ 700/5005]	Time  2.904 ( 2.023)	Data  1.710 ( 0.852)	Loss 3.9441e+00 (3.7770e+00)	Acc@1  23.05 ( 23.83)	Acc@5  43.75 ( 46.93)
Epoch: [1][ 750/5005]	Time  1.466 ( 2.022)	Data  0.218 ( 0.851)	Loss 3.5698e+00 (3.7748e+00)	Acc@1  25.78 ( 23.87)	Acc@5  53.52 ( 46.95)
Epoch: [1][ 800/5005]	Time  3.727 ( 2.025)	Data  2.631 ( 0.853)	Loss 3.6033e+00 (3.7680e+00)	Acc@1  28.52 ( 23.98)	Acc@5  50.00 ( 47.06)
Epoch: [1][ 850/5005]	Time  1.362 ( 2.023)	Data  0.188 ( 0.851)	Loss 3.6995e+00 (3.7642e+00)	Acc@1  22.27 ( 24.03)	Acc@5  45.31 ( 47.14)
Epoch: [1][ 900/5005]	Time  3.914 ( 2.021)	Data  2.763 ( 0.851)	Loss 3.5826e+00 (3.7580e+00)	Acc@1  29.69 ( 24.15)	Acc@5  52.34 ( 47.25)
Epoch: [1][ 950/5005]	Time  1.804 ( 2.021

Epoch: [1][3600/5005]	Time  1.370 ( 2.041)	Data  0.180 ( 0.863)	Loss 3.2636e+00 (3.5651e+00)	Acc@1  37.11 ( 27.08)	Acc@5  57.42 ( 51.04)
Epoch: [1][3650/5005]	Time  1.371 ( 2.041)	Data  0.205 ( 0.863)	Loss 3.4453e+00 (3.5627e+00)	Acc@1  32.03 ( 27.12)	Acc@5  50.00 ( 51.09)
Epoch: [1][3700/5005]	Time  1.335 ( 2.041)	Data  0.188 ( 0.863)	Loss 3.4276e+00 (3.5596e+00)	Acc@1  30.47 ( 27.16)	Acc@5  53.91 ( 51.15)
Epoch: [1][3750/5005]	Time  1.346 ( 2.041)	Data  0.188 ( 0.864)	Loss 3.0957e+00 (3.5566e+00)	Acc@1  31.25 ( 27.22)	Acc@5  57.42 ( 51.21)
Epoch: [1][3800/5005]	Time  1.394 ( 2.041)	Data  0.180 ( 0.863)	Loss 3.4661e+00 (3.5542e+00)	Acc@1  27.73 ( 27.25)	Acc@5  54.30 ( 51.26)
Epoch: [1][3850/5005]	Time  1.425 ( 2.041)	Data  0.196 ( 0.863)	Loss 3.3027e+00 (3.5511e+00)	Acc@1  32.42 ( 27.30)	Acc@5  55.08 ( 51.31)
Epoch: [1][3900/5005]	Time  1.320 ( 2.040)	Data  0.219 ( 0.863)	Loss 3.2471e+00 (3.5483e+00)	Acc@1  30.47 ( 27.34)	Acc@5  55.08 ( 51.36)
Epoch: [1][3950/5005]	Time  1.359 ( 2.040

Epoch: [2][1200/5005]	Time  4.188 ( 2.025)	Data  3.003 ( 0.849)	Loss 3.2818e+00 (3.1558e+00)	Acc@1  26.17 ( 33.51)	Acc@5  56.64 ( 58.54)
Epoch: [2][1250/5005]	Time  1.331 ( 2.026)	Data  0.236 ( 0.849)	Loss 3.0684e+00 (3.1541e+00)	Acc@1  36.72 ( 33.55)	Acc@5  60.16 ( 58.59)
Epoch: [2][1300/5005]	Time  2.508 ( 2.025)	Data  1.254 ( 0.849)	Loss 2.9180e+00 (3.1546e+00)	Acc@1  34.38 ( 33.55)	Acc@5  63.28 ( 58.57)
Epoch: [2][1350/5005]	Time  1.316 ( 2.024)	Data  0.221 ( 0.848)	Loss 3.3068e+00 (3.1538e+00)	Acc@1  32.03 ( 33.57)	Acc@5  53.12 ( 58.58)
Epoch: [2][1400/5005]	Time  2.510 ( 2.026)	Data  1.367 ( 0.850)	Loss 2.8948e+00 (3.1531e+00)	Acc@1  37.89 ( 33.58)	Acc@5  63.67 ( 58.61)
Epoch: [2][1450/5005]	Time  1.336 ( 2.024)	Data  0.193 ( 0.848)	Loss 3.4094e+00 (3.1515e+00)	Acc@1  30.08 ( 33.61)	Acc@5  53.12 ( 58.65)
Epoch: [2][1500/5005]	Time  1.631 ( 2.024)	Data  0.461 ( 0.848)	Loss 3.3572e+00 (3.1502e+00)	Acc@1  27.34 ( 33.64)	Acc@5  60.16 ( 58.69)
Epoch: [2][1550/5005]	Time  1.377 ( 2.026

Epoch: [2][4200/5005]	Time  1.389 ( 2.020)	Data  0.221 ( 0.846)	Loss 3.1586e+00 (3.0854e+00)	Acc@1  35.16 ( 34.77)	Acc@5  59.77 ( 59.89)
Epoch: [2][4250/5005]	Time  2.397 ( 2.021)	Data  1.169 ( 0.846)	Loss 3.3288e+00 (3.0844e+00)	Acc@1  29.30 ( 34.78)	Acc@5  54.69 ( 59.91)
Epoch: [2][4300/5005]	Time  1.354 ( 2.021)	Data  0.242 ( 0.846)	Loss 3.2023e+00 (3.0832e+00)	Acc@1  30.86 ( 34.80)	Acc@5  55.86 ( 59.93)
Epoch: [2][4350/5005]	Time  1.374 ( 2.021)	Data  0.234 ( 0.846)	Loss 3.0980e+00 (3.0823e+00)	Acc@1  37.89 ( 34.81)	Acc@5  62.50 ( 59.95)
Epoch: [2][4400/5005]	Time  1.359 ( 2.020)	Data  0.197 ( 0.845)	Loss 2.9550e+00 (3.0809e+00)	Acc@1  39.06 ( 34.84)	Acc@5  64.06 ( 59.98)
Epoch: [2][4450/5005]	Time  1.441 ( 2.021)	Data  0.159 ( 0.845)	Loss 3.0366e+00 (3.0803e+00)	Acc@1  34.77 ( 34.85)	Acc@5  61.33 ( 59.99)
Epoch: [2][4500/5005]	Time  1.388 ( 2.020)	Data  0.154 ( 0.845)	Loss 3.1362e+00 (3.0791e+00)	Acc@1  36.33 ( 34.86)	Acc@5  56.25 ( 60.01)
Epoch: [2][4550/5005]	Time  1.387 ( 2.020

Epoch: [3][1800/5005]	Time  1.407 ( 2.021)	Data  0.202 ( 0.845)	Loss 2.8658e+00 (2.9039e+00)	Acc@1  38.28 ( 37.71)	Acc@5  60.55 ( 63.10)
Epoch: [3][1850/5005]	Time  2.438 ( 2.021)	Data  1.303 ( 0.845)	Loss 2.7796e+00 (2.9034e+00)	Acc@1  39.06 ( 37.72)	Acc@5  62.89 ( 63.09)
Epoch: [3][1900/5005]	Time  1.441 ( 2.021)	Data  0.222 ( 0.845)	Loss 2.9166e+00 (2.9033e+00)	Acc@1  33.98 ( 37.73)	Acc@5  64.06 ( 63.10)
Epoch: [3][1950/5005]	Time  1.400 ( 2.021)	Data  0.270 ( 0.845)	Loss 2.9116e+00 (2.9018e+00)	Acc@1  37.89 ( 37.75)	Acc@5  61.72 ( 63.12)
Epoch: [3][2000/5005]	Time  1.401 ( 2.019)	Data  0.210 ( 0.844)	Loss 2.9290e+00 (2.9013e+00)	Acc@1  39.06 ( 37.77)	Acc@5  61.33 ( 63.13)
Epoch: [3][2050/5005]	Time  1.394 ( 2.021)	Data  0.195 ( 0.844)	Loss 2.6996e+00 (2.9004e+00)	Acc@1  42.19 ( 37.79)	Acc@5  65.23 ( 63.15)
Epoch: [3][2100/5005]	Time  3.792 ( 2.020)	Data  2.663 ( 0.845)	Loss 3.0118e+00 (2.9001e+00)	Acc@1  35.55 ( 37.80)	Acc@5  62.50 ( 63.15)
Epoch: [3][2150/5005]	Time  1.279 ( 2.019

Epoch: [3][4800/5005]	Time  2.541 ( 2.053)	Data  1.406 ( 0.860)	Loss 2.5568e+00 (2.8778e+00)	Acc@1  44.53 ( 38.29)	Acc@5  69.53 ( 63.57)
Epoch: [3][4850/5005]	Time  1.301 ( 2.052)	Data  0.167 ( 0.860)	Loss 2.7069e+00 (2.8774e+00)	Acc@1  39.84 ( 38.29)	Acc@5  65.23 ( 63.57)
Epoch: [3][4900/5005]	Time  1.273 ( 2.052)	Data  0.118 ( 0.860)	Loss 3.0521e+00 (2.8768e+00)	Acc@1  37.11 ( 38.30)	Acc@5  60.16 ( 63.58)
Epoch: [3][4950/5005]	Time  1.426 ( 2.052)	Data  0.225 ( 0.860)	Loss 2.9189e+00 (2.8764e+00)	Acc@1  37.50 ( 38.32)	Acc@5  59.77 ( 63.59)
Epoch: [3][5000/5005]	Time  1.091 ( 2.051)	Data  0.174 ( 0.860)	Loss 2.6718e+00 (2.8765e+00)	Acc@1  42.97 ( 38.32)	Acc@5  68.36 ( 63.59)
Test: [  0/391]	Time  4.402 ( 4.402)	Loss 3.1077e+00 (3.1077e+00)	Acc@1  32.03 ( 32.03)	Acc@5  61.72 ( 61.72)
Test: [ 50/391]	Time  0.799 ( 1.084)	Loss 2.8209e+00 (2.5917e+00)	Acc@1  35.94 ( 41.54)	Acc@5  67.19 ( 68.66)
Test: [100/391]	Time  2.498 ( 1.094)	Loss 2.2722e+00 (2.6101e+00)	Acc@1  46.09 ( 41.49)	Acc@5  

Epoch: [4][2400/5005]	Time  1.706 ( 2.031)	Data  0.553 ( 0.857)	Loss 2.7177e+00 (2.7815e+00)	Acc@1  35.94 ( 40.01)	Acc@5  69.14 ( 65.20)
Epoch: [4][2450/5005]	Time  1.438 ( 2.032)	Data  0.186 ( 0.857)	Loss 2.6166e+00 (2.7814e+00)	Acc@1  44.14 ( 40.00)	Acc@5  67.58 ( 65.20)
Epoch: [4][2500/5005]	Time  2.690 ( 2.033)	Data  1.536 ( 0.857)	Loss 2.7272e+00 (2.7814e+00)	Acc@1  42.19 ( 40.01)	Acc@5  68.36 ( 65.20)
Epoch: [4][2550/5005]	Time  1.496 ( 2.033)	Data  0.271 ( 0.858)	Loss 2.8150e+00 (2.7818e+00)	Acc@1  39.45 ( 40.00)	Acc@5  62.89 ( 65.19)
Epoch: [4][2600/5005]	Time  2.866 ( 2.033)	Data  1.640 ( 0.858)	Loss 2.7792e+00 (2.7822e+00)	Acc@1  40.62 ( 39.99)	Acc@5  63.67 ( 65.18)
Epoch: [4][2650/5005]	Time  1.332 ( 2.032)	Data  0.210 ( 0.857)	Loss 2.7957e+00 (2.7824e+00)	Acc@1  38.67 ( 39.98)	Acc@5  65.62 ( 65.18)
Epoch: [4][2700/5005]	Time  1.346 ( 2.031)	Data  0.199 ( 0.856)	Loss 2.6784e+00 (2.7819e+00)	Acc@1  39.84 ( 39.99)	Acc@5  67.19 ( 65.19)
Epoch: [4][2750/5005]	Time  3.060 ( 2.031

Epoch: [5][   0/5005]	Time  8.014 ( 8.014)	Data  6.820 ( 6.820)	Loss 2.7034e+00 (2.7034e+00)	Acc@1  41.80 ( 41.80)	Acc@5  65.23 ( 65.23)
Epoch: [5][  50/5005]	Time  1.509 ( 2.136)	Data  0.197 ( 0.967)	Loss 2.6361e+00 (2.6722e+00)	Acc@1  44.14 ( 41.82)	Acc@5  67.97 ( 67.44)
Epoch: [5][ 100/5005]	Time  3.074 ( 2.104)	Data  1.888 ( 0.935)	Loss 2.5310e+00 (2.6806e+00)	Acc@1  41.80 ( 41.77)	Acc@5  70.31 ( 67.35)
Epoch: [5][ 150/5005]	Time  1.413 ( 2.064)	Data  0.170 ( 0.893)	Loss 2.3858e+00 (2.6907e+00)	Acc@1  48.83 ( 41.57)	Acc@5  70.31 ( 66.91)
Epoch: [5][ 200/5005]	Time  3.578 ( 2.069)	Data  2.458 ( 0.893)	Loss 2.5467e+00 (2.6954e+00)	Acc@1  44.92 ( 41.42)	Acc@5  69.92 ( 66.75)
Epoch: [5][ 250/5005]	Time  1.320 ( 2.054)	Data  0.169 ( 0.877)	Loss 2.6975e+00 (2.6988e+00)	Acc@1  42.19 ( 41.31)	Acc@5  67.58 ( 66.72)
Epoch: [5][ 300/5005]	Time  1.453 ( 2.046)	Data  0.229 ( 0.869)	Loss 2.3002e+00 (2.7005e+00)	Acc@1  46.48 ( 41.29)	Acc@5  72.66 ( 66.69)
Epoch: [5][ 350/5005]	Time  3.868 ( 2.047

Epoch: [5][3000/5005]	Time  1.547 ( 2.115)	Data  0.245 ( 0.891)	Loss 2.6920e+00 (2.7015e+00)	Acc@1  39.06 ( 41.37)	Acc@5  65.23 ( 66.55)
Epoch: [5][3050/5005]	Time  4.293 ( 2.118)	Data  3.050 ( 0.893)	Loss 2.6839e+00 (2.7010e+00)	Acc@1  47.27 ( 41.38)	Acc@5  68.75 ( 66.56)
Epoch: [5][3100/5005]	Time  1.381 ( 2.119)	Data  0.223 ( 0.893)	Loss 2.7882e+00 (2.7008e+00)	Acc@1  41.02 ( 41.38)	Acc@5  66.02 ( 66.56)
Epoch: [5][3150/5005]	Time  3.448 ( 2.120)	Data  2.242 ( 0.893)	Loss 2.7904e+00 (2.7001e+00)	Acc@1  38.67 ( 41.39)	Acc@5  64.06 ( 66.59)
Epoch: [5][3200/5005]	Time  1.393 ( 2.118)	Data  0.223 ( 0.892)	Loss 2.6783e+00 (2.7002e+00)	Acc@1  41.02 ( 41.39)	Acc@5  67.58 ( 66.58)
Epoch: [5][3250/5005]	Time  1.411 ( 2.117)	Data  0.207 ( 0.892)	Loss 2.6278e+00 (2.6997e+00)	Acc@1  43.75 ( 41.40)	Acc@5  67.19 ( 66.59)
Epoch: [5][3300/5005]	Time  1.304 ( 2.115)	Data  0.184 ( 0.891)	Loss 2.6953e+00 (2.6998e+00)	Acc@1  40.62 ( 41.40)	Acc@5  67.97 ( 66.59)
Epoch: [5][3350/5005]	Time  1.487 ( 2.115

Epoch: [6][ 600/5005]	Time  1.392 ( 2.019)	Data  0.268 ( 0.840)	Loss 2.6391e+00 (2.6311e+00)	Acc@1  39.45 ( 42.65)	Acc@5  67.58 ( 67.86)
Epoch: [6][ 650/5005]	Time  1.364 ( 2.019)	Data  0.165 ( 0.841)	Loss 2.6348e+00 (2.6354e+00)	Acc@1  41.02 ( 42.56)	Acc@5  68.75 ( 67.76)
Epoch: [6][ 700/5005]	Time  1.356 ( 2.018)	Data  0.198 ( 0.841)	Loss 2.4944e+00 (2.6367e+00)	Acc@1  44.92 ( 42.54)	Acc@5  70.31 ( 67.73)
Epoch: [6][ 750/5005]	Time  1.315 ( 2.022)	Data  0.177 ( 0.845)	Loss 2.7757e+00 (2.6380e+00)	Acc@1  39.84 ( 42.52)	Acc@5  64.84 ( 67.69)
Epoch: [6][ 800/5005]	Time  1.387 ( 2.018)	Data  0.212 ( 0.841)	Loss 2.5552e+00 (2.6394e+00)	Acc@1  43.75 ( 42.50)	Acc@5  68.75 ( 67.66)
Epoch: [6][ 850/5005]	Time  1.341 ( 2.017)	Data  0.220 ( 0.840)	Loss 2.7626e+00 (2.6406e+00)	Acc@1  36.33 ( 42.47)	Acc@5  66.41 ( 67.63)
Epoch: [6][ 900/5005]	Time  1.345 ( 2.015)	Data  0.200 ( 0.839)	Loss 2.5507e+00 (2.6420e+00)	Acc@1  47.27 ( 42.46)	Acc@5  68.36 ( 67.60)
Epoch: [6][ 950/5005]	Time  1.338 ( 2.012

Epoch: [6][3600/5005]	Time  1.400 ( 2.014)	Data  0.228 ( 0.839)	Loss 2.8307e+00 (2.6511e+00)	Acc@1  39.06 ( 42.34)	Acc@5  65.62 ( 67.42)
Epoch: [6][3650/5005]	Time  3.864 ( 2.014)	Data  2.755 ( 0.839)	Loss 2.7901e+00 (2.6507e+00)	Acc@1  41.41 ( 42.35)	Acc@5  64.45 ( 67.43)
Epoch: [6][3700/5005]	Time  1.388 ( 2.014)	Data  0.201 ( 0.839)	Loss 2.7740e+00 (2.6508e+00)	Acc@1  38.67 ( 42.34)	Acc@5  63.67 ( 67.42)
Epoch: [6][3750/5005]	Time  3.441 ( 2.014)	Data  2.286 ( 0.839)	Loss 2.4371e+00 (2.6510e+00)	Acc@1  44.92 ( 42.34)	Acc@5  69.53 ( 67.42)
Epoch: [6][3800/5005]	Time  1.378 ( 2.014)	Data  0.204 ( 0.839)	Loss 2.5502e+00 (2.6506e+00)	Acc@1  46.88 ( 42.34)	Acc@5  69.92 ( 67.43)
Epoch: [6][3850/5005]	Time  3.039 ( 2.014)	Data  1.944 ( 0.840)	Loss 2.5871e+00 (2.6501e+00)	Acc@1  40.62 ( 42.34)	Acc@5  67.58 ( 67.44)
Epoch: [6][3900/5005]	Time  2.562 ( 2.015)	Data  1.398 ( 0.840)	Loss 2.5876e+00 (2.6502e+00)	Acc@1  41.41 ( 42.33)	Acc@5  69.53 ( 67.44)
Epoch: [6][3950/5005]	Time  1.723 ( 2.014

Epoch: [7][1200/5005]	Time  2.199 ( 2.011)	Data  1.009 ( 0.841)	Loss 2.5239e+00 (2.5952e+00)	Acc@1  44.14 ( 43.19)	Acc@5  69.53 ( 68.47)
Epoch: [7][1250/5005]	Time  1.352 ( 2.012)	Data  0.182 ( 0.842)	Loss 2.4691e+00 (2.5953e+00)	Acc@1  45.31 ( 43.20)	Acc@5  72.27 ( 68.48)
Epoch: [7][1300/5005]	Time  1.526 ( 2.012)	Data  0.210 ( 0.842)	Loss 2.6913e+00 (2.5979e+00)	Acc@1  39.45 ( 43.14)	Acc@5  65.23 ( 68.41)
Epoch: [7][1350/5005]	Time  2.331 ( 2.011)	Data  1.182 ( 0.841)	Loss 2.6453e+00 (2.5982e+00)	Acc@1  42.97 ( 43.16)	Acc@5  69.14 ( 68.41)
Epoch: [7][1400/5005]	Time  1.428 ( 2.012)	Data  0.199 ( 0.843)	Loss 2.6989e+00 (2.5992e+00)	Acc@1  41.02 ( 43.15)	Acc@5  67.97 ( 68.39)
Epoch: [7][1450/5005]	Time  2.552 ( 2.012)	Data  1.375 ( 0.843)	Loss 2.5818e+00 (2.5997e+00)	Acc@1  41.41 ( 43.13)	Acc@5  69.53 ( 68.37)
Epoch: [7][1500/5005]	Time  1.315 ( 2.012)	Data  0.192 ( 0.842)	Loss 2.5613e+00 (2.6010e+00)	Acc@1  42.97 ( 43.09)	Acc@5  68.75 ( 68.36)
Epoch: [7][1550/5005]	Time  4.004 ( 2.013

Epoch: [7][4200/5005]	Time  1.429 ( 2.105)	Data  0.267 ( 0.898)	Loss 2.4092e+00 (2.6109e+00)	Acc@1  44.14 ( 42.96)	Acc@5  69.92 ( 68.15)
Epoch: [7][4250/5005]	Time  1.765 ( 2.105)	Data  0.211 ( 0.899)	Loss 2.4631e+00 (2.6112e+00)	Acc@1  46.48 ( 42.96)	Acc@5  70.31 ( 68.14)
Epoch: [7][4300/5005]	Time  2.084 ( 2.105)	Data  0.891 ( 0.899)	Loss 2.4503e+00 (2.6109e+00)	Acc@1  46.88 ( 42.97)	Acc@5  73.44 ( 68.14)
Epoch: [7][4350/5005]	Time  1.374 ( 2.104)	Data  0.204 ( 0.898)	Loss 2.6604e+00 (2.6110e+00)	Acc@1  44.14 ( 42.96)	Acc@5  68.75 ( 68.15)
Epoch: [7][4400/5005]	Time  1.748 ( 2.105)	Data  0.421 ( 0.898)	Loss 2.5844e+00 (2.6114e+00)	Acc@1  47.66 ( 42.96)	Acc@5  67.97 ( 68.14)
Epoch: [7][4450/5005]	Time  1.461 ( 2.104)	Data  0.251 ( 0.898)	Loss 2.7507e+00 (2.6118e+00)	Acc@1  41.80 ( 42.95)	Acc@5  64.84 ( 68.14)
Epoch: [7][4500/5005]	Time  1.357 ( 2.104)	Data  0.193 ( 0.898)	Loss 2.4245e+00 (2.6115e+00)	Acc@1  46.88 ( 42.96)	Acc@5  70.70 ( 68.15)
Epoch: [7][4550/5005]	Time  1.340 ( 2.103

Epoch: [8][1800/5005]	Time  3.541 ( 2.050)	Data  2.337 ( 0.861)	Loss 2.6039e+00 (2.5733e+00)	Acc@1  42.58 ( 43.79)	Acc@5  67.97 ( 68.82)
Epoch: [8][1850/5005]	Time  1.368 ( 2.049)	Data  0.213 ( 0.860)	Loss 2.5779e+00 (2.5736e+00)	Acc@1  40.62 ( 43.78)	Acc@5  70.31 ( 68.82)
Epoch: [8][1900/5005]	Time  1.458 ( 2.049)	Data  0.238 ( 0.860)	Loss 2.6925e+00 (2.5749e+00)	Acc@1  39.06 ( 43.74)	Acc@5  66.41 ( 68.81)
Epoch: [8][1950/5005]	Time  1.417 ( 2.049)	Data  0.249 ( 0.860)	Loss 2.7196e+00 (2.5755e+00)	Acc@1  38.28 ( 43.74)	Acc@5  67.97 ( 68.80)
Epoch: [8][2000/5005]	Time  1.570 ( 2.048)	Data  0.211 ( 0.860)	Loss 2.6368e+00 (2.5771e+00)	Acc@1  45.31 ( 43.71)	Acc@5  69.14 ( 68.78)
Epoch: [8][2050/5005]	Time  2.379 ( 2.049)	Data  1.239 ( 0.860)	Loss 2.3398e+00 (2.5763e+00)	Acc@1  49.61 ( 43.72)	Acc@5  73.05 ( 68.79)
Epoch: [8][2100/5005]	Time  1.348 ( 2.048)	Data  0.203 ( 0.860)	Loss 2.7001e+00 (2.5756e+00)	Acc@1  39.84 ( 43.73)	Acc@5  65.62 ( 68.80)
Epoch: [8][2150/5005]	Time  2.086 ( 2.048

Epoch: [8][4800/5005]	Time  2.942 ( 2.046)	Data  1.769 ( 0.862)	Loss 2.5666e+00 (2.5840e+00)	Acc@1  41.02 ( 43.59)	Acc@5  68.36 ( 68.60)
Epoch: [8][4850/5005]	Time  1.369 ( 2.046)	Data  0.198 ( 0.862)	Loss 2.4461e+00 (2.5840e+00)	Acc@1  49.61 ( 43.59)	Acc@5  72.27 ( 68.60)
Epoch: [8][4900/5005]	Time  1.421 ( 2.047)	Data  0.264 ( 0.863)	Loss 2.5313e+00 (2.5841e+00)	Acc@1  44.14 ( 43.59)	Acc@5  72.27 ( 68.60)
Epoch: [8][4950/5005]	Time  1.384 ( 2.047)	Data  0.203 ( 0.863)	Loss 2.5262e+00 (2.5841e+00)	Acc@1  44.53 ( 43.59)	Acc@5  69.14 ( 68.61)
Epoch: [8][5000/5005]	Time  2.140 ( 2.047)	Data  1.512 ( 0.863)	Loss 2.5576e+00 (2.5839e+00)	Acc@1  42.58 ( 43.59)	Acc@5  67.19 ( 68.61)
Test: [  0/391]	Time  5.033 ( 5.033)	Loss 2.6098e+00 (2.6098e+00)	Acc@1  40.62 ( 40.62)	Acc@5  65.62 ( 65.62)
Test: [ 50/391]	Time  0.797 ( 1.176)	Loss 2.2414e+00 (2.3756e+00)	Acc@1  49.22 ( 45.99)	Acc@5  80.47 ( 72.37)
Test: [100/391]	Time  0.636 ( 1.152)	Loss 2.6210e+00 (2.3987e+00)	Acc@1  39.84 ( 45.78)	Acc@5  

Epoch: [9][2400/5005]	Time  4.433 ( 2.046)	Data  2.936 ( 0.865)	Loss 2.6889e+00 (2.5542e+00)	Acc@1  40.62 ( 44.04)	Acc@5  67.19 ( 69.12)
Epoch: [9][2450/5005]	Time  1.390 ( 2.047)	Data  0.247 ( 0.866)	Loss 2.5738e+00 (2.5542e+00)	Acc@1  44.14 ( 44.04)	Acc@5  69.53 ( 69.12)
Epoch: [9][2500/5005]	Time  2.393 ( 2.048)	Data  1.179 ( 0.866)	Loss 2.5239e+00 (2.5537e+00)	Acc@1  46.48 ( 44.04)	Acc@5  69.14 ( 69.13)
Epoch: [9][2550/5005]	Time  1.333 ( 2.048)	Data  0.208 ( 0.866)	Loss 2.4515e+00 (2.5538e+00)	Acc@1  47.27 ( 44.04)	Acc@5  67.58 ( 69.13)
Epoch: [9][2600/5005]	Time  1.977 ( 2.048)	Data  0.798 ( 0.866)	Loss 2.5210e+00 (2.5542e+00)	Acc@1  46.88 ( 44.03)	Acc@5  68.75 ( 69.12)
Epoch: [9][2650/5005]	Time  1.537 ( 2.048)	Data  0.253 ( 0.866)	Loss 2.5766e+00 (2.5550e+00)	Acc@1  44.92 ( 44.02)	Acc@5  69.14 ( 69.10)
Epoch: [9][2700/5005]	Time  1.850 ( 2.048)	Data  0.636 ( 0.866)	Loss 2.6782e+00 (2.5546e+00)	Acc@1  40.62 ( 44.01)	Acc@5  66.80 ( 69.11)
Epoch: [9][2750/5005]	Time  1.392 ( 2.048

Epoch: [10][   0/5005]	Time  7.698 ( 7.698)	Data  6.575 ( 6.575)	Loss 2.2622e+00 (2.2622e+00)	Acc@1  51.95 ( 51.95)	Acc@5  74.22 ( 74.22)
Epoch: [10][  50/5005]	Time  1.403 ( 2.090)	Data  0.242 ( 0.928)	Loss 2.4434e+00 (2.4909e+00)	Acc@1  42.58 ( 45.52)	Acc@5  68.36 ( 70.03)
Epoch: [10][ 100/5005]	Time  3.818 ( 2.091)	Data  2.677 ( 0.919)	Loss 2.4750e+00 (2.4997e+00)	Acc@1  45.70 ( 45.03)	Acc@5  73.05 ( 69.72)
Epoch: [10][ 150/5005]	Time  1.413 ( 2.076)	Data  0.209 ( 0.894)	Loss 2.8359e+00 (2.5018e+00)	Acc@1  41.80 ( 45.15)	Acc@5  63.28 ( 69.77)
Epoch: [10][ 200/5005]	Time  3.825 ( 2.075)	Data  2.665 ( 0.889)	Loss 2.5741e+00 (2.5055e+00)	Acc@1  43.36 ( 45.03)	Acc@5  67.58 ( 69.76)
Epoch: [10][ 250/5005]	Time  1.311 ( 2.065)	Data  0.212 ( 0.880)	Loss 2.3540e+00 (2.5060e+00)	Acc@1  45.31 ( 45.00)	Acc@5  73.44 ( 69.77)
Epoch: [10][ 300/5005]	Time  1.924 ( 2.058)	Data  0.726 ( 0.876)	Loss 2.6336e+00 (2.5124e+00)	Acc@1  41.02 ( 44.86)	Acc@5  69.92 ( 69.69)
Epoch: [10][ 350/5005]	Time  1.487

Epoch: [10][3000/5005]	Time  2.719 ( 2.047)	Data  1.581 ( 0.859)	Loss 2.5949e+00 (2.5325e+00)	Acc@1  42.19 ( 44.41)	Acc@5  67.97 ( 69.43)
Epoch: [10][3050/5005]	Time  1.449 ( 2.048)	Data  0.248 ( 0.860)	Loss 2.6498e+00 (2.5325e+00)	Acc@1  44.53 ( 44.41)	Acc@5  71.88 ( 69.43)
Epoch: [10][3100/5005]	Time  2.497 ( 2.047)	Data  1.341 ( 0.859)	Loss 2.4055e+00 (2.5328e+00)	Acc@1  46.88 ( 44.40)	Acc@5  72.66 ( 69.43)
Epoch: [10][3150/5005]	Time  5.096 ( 2.048)	Data  3.868 ( 0.861)	Loss 2.3941e+00 (2.5325e+00)	Acc@1  45.31 ( 44.41)	Acc@5  72.27 ( 69.44)
Epoch: [10][3200/5005]	Time  1.344 ( 2.048)	Data  0.180 ( 0.860)	Loss 2.4372e+00 (2.5323e+00)	Acc@1  46.09 ( 44.41)	Acc@5  71.48 ( 69.44)
Epoch: [10][3250/5005]	Time  2.179 ( 2.048)	Data  1.033 ( 0.860)	Loss 2.8319e+00 (2.5326e+00)	Acc@1  43.36 ( 44.40)	Acc@5  67.58 ( 69.44)
Epoch: [10][3300/5005]	Time  1.416 ( 2.047)	Data  0.212 ( 0.859)	Loss 2.4477e+00 (2.5324e+00)	Acc@1  44.14 ( 44.40)	Acc@5  71.48 ( 69.45)
Epoch: [10][3350/5005]	Time  1.392

Epoch: [11][ 600/5005]	Time  1.418 ( 2.083)	Data  0.234 ( 0.881)	Loss 2.7161e+00 (2.4843e+00)	Acc@1  41.80 ( 45.30)	Acc@5  67.19 ( 70.25)
Epoch: [11][ 650/5005]	Time  1.388 ( 2.086)	Data  0.215 ( 0.882)	Loss 2.6651e+00 (2.4891e+00)	Acc@1  44.14 ( 45.24)	Acc@5  68.75 ( 70.18)
Epoch: [11][ 700/5005]	Time  3.538 ( 2.084)	Data  2.412 ( 0.881)	Loss 2.6412e+00 (2.4926e+00)	Acc@1  41.41 ( 45.18)	Acc@5  68.36 ( 70.10)
Epoch: [11][ 750/5005]	Time  1.394 ( 2.079)	Data  0.225 ( 0.876)	Loss 2.4587e+00 (2.4926e+00)	Acc@1  50.39 ( 45.20)	Acc@5  71.48 ( 70.10)
Epoch: [11][ 800/5005]	Time  3.135 ( 2.078)	Data  1.853 ( 0.875)	Loss 2.4208e+00 (2.4940e+00)	Acc@1  48.44 ( 45.19)	Acc@5  69.92 ( 70.08)
Epoch: [11][ 850/5005]	Time  2.248 ( 2.076)	Data  0.893 ( 0.874)	Loss 2.5296e+00 (2.4959e+00)	Acc@1  43.36 ( 45.17)	Acc@5  71.09 ( 70.06)
Epoch: [11][ 900/5005]	Time  1.496 ( 2.076)	Data  0.257 ( 0.875)	Loss 2.3843e+00 (2.4977e+00)	Acc@1  45.31 ( 45.16)	Acc@5  71.48 ( 70.03)
Epoch: [11][ 950/5005]	Time  1.409

Epoch: [11][3600/5005]	Time  1.492 ( 2.080)	Data  0.238 ( 0.881)	Loss 2.5467e+00 (2.5200e+00)	Acc@1  47.27 ( 44.64)	Acc@5  67.19 ( 69.63)
Epoch: [11][3650/5005]	Time  1.558 ( 2.083)	Data  0.240 ( 0.882)	Loss 2.2967e+00 (2.5197e+00)	Acc@1  47.66 ( 44.64)	Acc@5  71.09 ( 69.64)
Epoch: [11][3700/5005]	Time  2.209 ( 2.085)	Data  0.943 ( 0.883)	Loss 2.3804e+00 (2.5203e+00)	Acc@1  45.70 ( 44.64)	Acc@5  70.31 ( 69.63)
Epoch: [11][3750/5005]	Time  1.569 ( 2.087)	Data  0.209 ( 0.884)	Loss 2.5779e+00 (2.5203e+00)	Acc@1  43.75 ( 44.63)	Acc@5  68.36 ( 69.63)
Epoch: [11][3800/5005]	Time  3.901 ( 2.089)	Data  2.700 ( 0.885)	Loss 2.2518e+00 (2.5209e+00)	Acc@1  48.05 ( 44.62)	Acc@5  76.17 ( 69.62)
Epoch: [11][3850/5005]	Time  5.251 ( 2.091)	Data  3.894 ( 0.886)	Loss 2.5944e+00 (2.5211e+00)	Acc@1  44.92 ( 44.63)	Acc@5  66.41 ( 69.62)
Epoch: [11][3900/5005]	Time  3.116 ( 2.093)	Data  1.825 ( 0.886)	Loss 2.5394e+00 (2.5214e+00)	Acc@1  42.19 ( 44.63)	Acc@5  70.31 ( 69.62)
Epoch: [11][3950/5005]	Time  2.928

Epoch: [12][1200/5005]	Time  1.434 ( 2.042)	Data  0.200 ( 0.861)	Loss 2.4249e+00 (2.4919e+00)	Acc@1  44.14 ( 45.17)	Acc@5  71.48 ( 70.11)
Epoch: [12][1250/5005]	Time  1.642 ( 2.043)	Data  0.451 ( 0.861)	Loss 2.6378e+00 (2.4946e+00)	Acc@1  42.58 ( 45.12)	Acc@5  66.41 ( 70.06)
Epoch: [12][1300/5005]	Time  1.516 ( 2.043)	Data  0.205 ( 0.861)	Loss 2.5369e+00 (2.4952e+00)	Acc@1  44.53 ( 45.10)	Acc@5  67.97 ( 70.05)
Epoch: [12][1350/5005]	Time  2.685 ( 2.044)	Data  1.424 ( 0.863)	Loss 2.6646e+00 (2.4963e+00)	Acc@1  46.48 ( 45.10)	Acc@5  66.02 ( 70.03)
Epoch: [12][1400/5005]	Time  1.405 ( 2.044)	Data  0.210 ( 0.862)	Loss 2.3930e+00 (2.4963e+00)	Acc@1  44.53 ( 45.09)	Acc@5  70.70 ( 70.04)
Epoch: [12][1450/5005]	Time  3.455 ( 2.048)	Data  2.312 ( 0.865)	Loss 2.4955e+00 (2.4976e+00)	Acc@1  41.41 ( 45.08)	Acc@5  72.66 ( 70.03)
Epoch: [12][1500/5005]	Time  1.365 ( 2.046)	Data  0.205 ( 0.863)	Loss 2.2884e+00 (2.4957e+00)	Acc@1  45.70 ( 45.09)	Acc@5  73.05 ( 70.05)
Epoch: [12][1550/5005]	Time  3.317

Epoch: [12][4200/5005]	Time  4.778 ( 2.053)	Data  3.607 ( 0.868)	Loss 2.5904e+00 (2.5052e+00)	Acc@1  44.14 ( 44.91)	Acc@5  65.62 ( 69.93)
Epoch: [12][4250/5005]	Time  3.326 ( 2.053)	Data  2.154 ( 0.868)	Loss 2.5109e+00 (2.5053e+00)	Acc@1  47.27 ( 44.90)	Acc@5  71.09 ( 69.93)
Epoch: [12][4300/5005]	Time  3.557 ( 2.053)	Data  2.393 ( 0.868)	Loss 2.3851e+00 (2.5050e+00)	Acc@1  46.88 ( 44.90)	Acc@5  75.39 ( 69.93)
Epoch: [12][4350/5005]	Time  1.379 ( 2.052)	Data  0.220 ( 0.867)	Loss 2.4824e+00 (2.5055e+00)	Acc@1  47.66 ( 44.90)	Acc@5  69.53 ( 69.92)
Epoch: [12][4400/5005]	Time  1.476 ( 2.053)	Data  0.202 ( 0.868)	Loss 2.4723e+00 (2.5060e+00)	Acc@1  46.48 ( 44.89)	Acc@5  70.70 ( 69.91)
Epoch: [12][4450/5005]	Time  3.996 ( 2.053)	Data  2.770 ( 0.868)	Loss 2.6864e+00 (2.5061e+00)	Acc@1  43.36 ( 44.88)	Acc@5  66.41 ( 69.91)
Epoch: [12][4500/5005]	Time  1.436 ( 2.053)	Data  0.223 ( 0.868)	Loss 2.7105e+00 (2.5060e+00)	Acc@1  43.36 ( 44.88)	Acc@5  65.23 ( 69.91)
Epoch: [12][4550/5005]	Time  1.608

Epoch: [13][1800/5005]	Time  2.380 ( 2.047)	Data  1.201 ( 0.852)	Loss 2.5218e+00 (2.4837e+00)	Acc@1  42.58 ( 45.26)	Acc@5  69.14 ( 70.26)
Epoch: [13][1850/5005]	Time  2.796 ( 2.047)	Data  1.627 ( 0.852)	Loss 2.4942e+00 (2.4838e+00)	Acc@1  48.44 ( 45.27)	Acc@5  69.92 ( 70.24)
Epoch: [13][1900/5005]	Time  1.902 ( 2.047)	Data  0.708 ( 0.853)	Loss 2.5760e+00 (2.4840e+00)	Acc@1  45.31 ( 45.28)	Acc@5  69.14 ( 70.25)
Epoch: [13][1950/5005]	Time  1.393 ( 2.047)	Data  0.221 ( 0.853)	Loss 2.4710e+00 (2.4845e+00)	Acc@1  45.70 ( 45.28)	Acc@5  71.88 ( 70.24)
Epoch: [13][2000/5005]	Time  4.209 ( 2.048)	Data  2.986 ( 0.854)	Loss 2.3096e+00 (2.4842e+00)	Acc@1  46.88 ( 45.27)	Acc@5  76.17 ( 70.25)
Epoch: [13][2050/5005]	Time  1.428 ( 2.048)	Data  0.211 ( 0.854)	Loss 2.7869e+00 (2.4848e+00)	Acc@1  39.06 ( 45.27)	Acc@5  66.80 ( 70.24)
Epoch: [13][2100/5005]	Time  2.691 ( 2.047)	Data  1.458 ( 0.854)	Loss 2.6187e+00 (2.4856e+00)	Acc@1  42.19 ( 45.26)	Acc@5  69.14 ( 70.22)
Epoch: [13][2150/5005]	Time  1.426

Epoch: [13][4800/5005]	Time  1.397 ( 2.048)	Data  0.210 ( 0.858)	Loss 2.5819e+00 (2.4954e+00)	Acc@1  41.02 ( 45.12)	Acc@5  70.70 ( 70.08)
Epoch: [13][4850/5005]	Time  1.322 ( 2.049)	Data  0.196 ( 0.858)	Loss 2.5336e+00 (2.4957e+00)	Acc@1  43.75 ( 45.11)	Acc@5  72.66 ( 70.07)
Epoch: [13][4900/5005]	Time  1.395 ( 2.049)	Data  0.200 ( 0.858)	Loss 2.8827e+00 (2.4960e+00)	Acc@1  39.84 ( 45.11)	Acc@5  60.55 ( 70.07)
Epoch: [13][4950/5005]	Time  1.300 ( 2.049)	Data  0.191 ( 0.858)	Loss 2.3783e+00 (2.4961e+00)	Acc@1  48.05 ( 45.10)	Acc@5  71.09 ( 70.06)
Epoch: [13][5000/5005]	Time  1.500 ( 2.048)	Data  0.169 ( 0.858)	Loss 2.4492e+00 (2.4961e+00)	Acc@1  43.75 ( 45.11)	Acc@5  72.27 ( 70.06)
Test: [  0/391]	Time  4.408 ( 4.408)	Loss 1.8930e+00 (1.8930e+00)	Acc@1  53.91 ( 53.91)	Acc@5  81.25 ( 81.25)
Test: [ 50/391]	Time  0.649 ( 1.130)	Loss 1.5150e+00 (2.2863e+00)	Acc@1  60.94 ( 47.15)	Acc@5  88.28 ( 74.22)
Test: [100/391]	Time  2.324 ( 1.113)	Loss 2.1724e+00 (2.2513e+00)	Acc@1  46.09 ( 48.04)	Ac

Epoch: [14][2400/5005]	Time  1.436 ( 2.043)	Data  0.228 ( 0.857)	Loss 2.5655e+00 (2.4756e+00)	Acc@1  44.53 ( 45.55)	Acc@5  67.58 ( 70.38)
Epoch: [14][2450/5005]	Time  1.373 ( 2.044)	Data  0.219 ( 0.857)	Loss 2.3923e+00 (2.4759e+00)	Acc@1  46.48 ( 45.54)	Acc@5  70.70 ( 70.38)
Epoch: [14][2500/5005]	Time  1.462 ( 2.042)	Data  0.255 ( 0.856)	Loss 2.5428e+00 (2.4762e+00)	Acc@1  43.36 ( 45.52)	Acc@5  69.14 ( 70.37)
Epoch: [14][2550/5005]	Time  1.605 ( 2.042)	Data  0.309 ( 0.856)	Loss 2.3362e+00 (2.4764e+00)	Acc@1  51.95 ( 45.52)	Acc@5  73.44 ( 70.36)
Epoch: [14][2600/5005]	Time  2.781 ( 2.042)	Data  1.670 ( 0.856)	Loss 2.6051e+00 (2.4772e+00)	Acc@1  42.58 ( 45.50)	Acc@5  73.05 ( 70.35)
Epoch: [14][2650/5005]	Time  1.367 ( 2.042)	Data  0.211 ( 0.856)	Loss 2.5119e+00 (2.4782e+00)	Acc@1  41.80 ( 45.48)	Acc@5  69.53 ( 70.34)
Epoch: [14][2700/5005]	Time  3.266 ( 2.042)	Data  2.036 ( 0.857)	Loss 2.4105e+00 (2.4781e+00)	Acc@1  48.44 ( 45.49)	Acc@5  70.70 ( 70.34)
Epoch: [14][2750/5005]	Time  1.368

 * Acc@1 47.092 Acc@5 73.922
lr: [0.09861849601988384]
Epoch: [15][   0/5005]	Time  8.870 ( 8.870)	Data  7.679 ( 7.679)	Loss 2.3628e+00 (2.3628e+00)	Acc@1  47.27 ( 47.27)	Acc@5  69.53 ( 69.53)
Epoch: [15][  50/5005]	Time  1.524 ( 2.366)	Data  0.224 ( 1.097)	Loss 2.6126e+00 (2.4368e+00)	Acc@1  44.53 ( 46.11)	Acc@5  71.09 ( 71.31)
Epoch: [15][ 100/5005]	Time  3.792 ( 2.308)	Data  2.530 ( 1.028)	Loss 2.2180e+00 (2.4267e+00)	Acc@1  53.91 ( 46.36)	Acc@5  74.61 ( 71.23)
Epoch: [15][ 150/5005]	Time  1.496 ( 2.285)	Data  0.228 ( 1.000)	Loss 2.4764e+00 (2.4375e+00)	Acc@1  45.31 ( 46.15)	Acc@5  71.48 ( 71.00)
Epoch: [15][ 200/5005]	Time  4.597 ( 2.287)	Data  3.276 ( 1.001)	Loss 2.3189e+00 (2.4303e+00)	Acc@1  48.83 ( 46.29)	Acc@5  75.39 ( 71.02)
Epoch: [15][ 250/5005]	Time  1.469 ( 2.263)	Data  0.233 ( 0.977)	Loss 2.4385e+00 (2.4271e+00)	Acc@1  42.58 ( 46.16)	Acc@5  70.70 ( 71.07)
Epoch: [15][ 300/5005]	Time  4.318 ( 2.259)	Data  3.014 ( 0.970)	Loss 2.5017e+00 (2.4343e+00)	Acc@1  46.88 ( 46.12)	A

Epoch: [15][2950/5005]	Time  3.416 ( 2.094)	Data  2.327 ( 0.881)	Loss 2.4404e+00 (2.4670e+00)	Acc@1  49.22 ( 45.66)	Acc@5  70.31 ( 70.53)
Epoch: [15][3000/5005]	Time  1.232 ( 2.094)	Data  0.189 ( 0.882)	Loss 2.4482e+00 (2.4678e+00)	Acc@1  47.27 ( 45.65)	Acc@5  72.27 ( 70.52)
Epoch: [15][3050/5005]	Time  1.889 ( 2.092)	Data  0.702 ( 0.880)	Loss 2.0923e+00 (2.4676e+00)	Acc@1  49.61 ( 45.65)	Acc@5  76.95 ( 70.52)
Epoch: [15][3100/5005]	Time  1.470 ( 2.092)	Data  0.218 ( 0.881)	Loss 2.4404e+00 (2.4677e+00)	Acc@1  42.58 ( 45.64)	Acc@5  71.09 ( 70.52)
Epoch: [15][3150/5005]	Time  4.532 ( 2.092)	Data  3.223 ( 0.881)	Loss 2.3446e+00 (2.4676e+00)	Acc@1  49.61 ( 45.64)	Acc@5  72.27 ( 70.51)
Epoch: [15][3200/5005]	Time  1.458 ( 2.093)	Data  0.220 ( 0.881)	Loss 2.3899e+00 (2.4679e+00)	Acc@1  45.70 ( 45.64)	Acc@5  73.05 ( 70.51)
Epoch: [15][3250/5005]	Time  3.260 ( 2.092)	Data  2.114 ( 0.880)	Loss 2.5645e+00 (2.4677e+00)	Acc@1  47.66 ( 45.64)	Acc@5  71.48 ( 70.52)
Epoch: [15][3300/5005]	Time  1.411

Epoch: [16][ 550/5005]	Time  4.304 ( 2.047)	Data  3.072 ( 0.863)	Loss 2.5970e+00 (2.4356e+00)	Acc@1  40.23 ( 46.38)	Acc@5  65.23 ( 71.02)
Epoch: [16][ 600/5005]	Time  1.411 ( 2.043)	Data  0.119 ( 0.860)	Loss 2.3268e+00 (2.4384e+00)	Acc@1  47.66 ( 46.28)	Acc@5  75.00 ( 71.00)
Epoch: [16][ 650/5005]	Time  2.754 ( 2.045)	Data  1.606 ( 0.860)	Loss 2.4230e+00 (2.4364e+00)	Acc@1  46.48 ( 46.31)	Acc@5  71.88 ( 71.05)
Epoch: [16][ 700/5005]	Time  1.454 ( 2.053)	Data  0.201 ( 0.863)	Loss 2.5733e+00 (2.4410e+00)	Acc@1  46.88 ( 46.22)	Acc@5  67.97 ( 70.99)
Epoch: [16][ 750/5005]	Time  4.814 ( 2.063)	Data  3.647 ( 0.869)	Loss 2.4325e+00 (2.4436e+00)	Acc@1  44.92 ( 46.15)	Acc@5  72.66 ( 70.94)
Epoch: [16][ 800/5005]	Time  1.374 ( 2.063)	Data  0.200 ( 0.868)	Loss 2.4747e+00 (2.4447e+00)	Acc@1  48.44 ( 46.16)	Acc@5  69.14 ( 70.91)
Epoch: [16][ 850/5005]	Time  3.503 ( 2.069)	Data  2.276 ( 0.873)	Loss 2.3407e+00 (2.4425e+00)	Acc@1  47.27 ( 46.22)	Acc@5  71.88 ( 70.91)
Epoch: [16][ 900/5005]	Time  1.374

Epoch: [16][3550/5005]	Time  2.046 ( 2.066)	Data  0.842 ( 0.873)	Loss 2.4240e+00 (2.4625e+00)	Acc@1  46.09 ( 45.72)	Acc@5  69.53 ( 70.57)
Epoch: [16][3600/5005]	Time  1.354 ( 2.065)	Data  0.241 ( 0.873)	Loss 2.7854e+00 (2.4628e+00)	Acc@1  37.89 ( 45.71)	Acc@5  66.80 ( 70.57)
Epoch: [16][3650/5005]	Time  1.364 ( 2.065)	Data  0.181 ( 0.873)	Loss 2.5311e+00 (2.4630e+00)	Acc@1  46.88 ( 45.71)	Acc@5  70.31 ( 70.57)
Epoch: [16][3700/5005]	Time  1.415 ( 2.064)	Data  0.207 ( 0.872)	Loss 2.8312e+00 (2.4632e+00)	Acc@1  38.67 ( 45.70)	Acc@5  67.97 ( 70.56)
Epoch: [16][3750/5005]	Time  1.319 ( 2.065)	Data  0.200 ( 0.873)	Loss 2.5455e+00 (2.4632e+00)	Acc@1  45.70 ( 45.71)	Acc@5  68.75 ( 70.56)
Epoch: [16][3800/5005]	Time  1.294 ( 2.064)	Data  0.153 ( 0.873)	Loss 2.4659e+00 (2.4632e+00)	Acc@1  43.36 ( 45.71)	Acc@5  67.58 ( 70.56)
Epoch: [16][3850/5005]	Time  1.442 ( 2.064)	Data  0.186 ( 0.873)	Loss 2.6154e+00 (2.4630e+00)	Acc@1  41.41 ( 45.71)	Acc@5  68.36 ( 70.56)
Epoch: [16][3900/5005]	Time  1.376

Epoch: [17][1150/5005]	Time  1.910 ( 2.050)	Data  0.711 ( 0.871)	Loss 2.5394e+00 (2.4375e+00)	Acc@1  38.67 ( 46.08)	Acc@5  71.48 ( 71.06)
Epoch: [17][1200/5005]	Time  3.255 ( 2.049)	Data  2.149 ( 0.871)	Loss 2.4880e+00 (2.4388e+00)	Acc@1  41.02 ( 46.08)	Acc@5  70.31 ( 71.04)
Epoch: [17][1250/5005]	Time  1.361 ( 2.046)	Data  0.210 ( 0.868)	Loss 2.4115e+00 (2.4398e+00)	Acc@1  50.00 ( 46.07)	Acc@5  73.05 ( 71.02)
Epoch: [17][1300/5005]	Time  4.005 ( 2.048)	Data  2.849 ( 0.871)	Loss 2.6626e+00 (2.4408e+00)	Acc@1  43.75 ( 46.07)	Acc@5  66.80 ( 71.00)
Epoch: [17][1350/5005]	Time  1.483 ( 2.044)	Data  0.265 ( 0.868)	Loss 2.6364e+00 (2.4417e+00)	Acc@1  41.41 ( 46.06)	Acc@5  71.48 ( 70.99)
Epoch: [17][1400/5005]	Time  3.594 ( 2.044)	Data  2.467 ( 0.868)	Loss 2.3655e+00 (2.4425e+00)	Acc@1  47.66 ( 46.04)	Acc@5  72.27 ( 70.97)
Epoch: [17][1450/5005]	Time  1.366 ( 2.044)	Data  0.227 ( 0.868)	Loss 2.3001e+00 (2.4438e+00)	Acc@1  45.70 ( 46.00)	Acc@5  73.05 ( 70.95)
Epoch: [17][1500/5005]	Time  3.540

Epoch: [17][4150/5005]	Time  2.376 ( 2.041)	Data  1.182 ( 0.867)	Loss 2.4981e+00 (2.4573e+00)	Acc@1  43.75 ( 45.79)	Acc@5  66.02 ( 70.69)
Epoch: [17][4200/5005]	Time  1.455 ( 2.041)	Data  0.247 ( 0.867)	Loss 2.3422e+00 (2.4570e+00)	Acc@1  48.05 ( 45.80)	Acc@5  72.66 ( 70.69)
Epoch: [17][4250/5005]	Time  4.143 ( 2.041)	Data  3.021 ( 0.867)	Loss 2.5439e+00 (2.4573e+00)	Acc@1  45.31 ( 45.80)	Acc@5  70.70 ( 70.69)
Epoch: [17][4300/5005]	Time  1.445 ( 2.040)	Data  0.231 ( 0.867)	Loss 2.3670e+00 (2.4570e+00)	Acc@1  51.95 ( 45.80)	Acc@5  70.31 ( 70.69)
Epoch: [17][4350/5005]	Time  3.248 ( 2.040)	Data  2.056 ( 0.866)	Loss 2.1917e+00 (2.4568e+00)	Acc@1  49.61 ( 45.80)	Acc@5  72.27 ( 70.70)
Epoch: [17][4400/5005]	Time  1.703 ( 2.040)	Data  0.421 ( 0.866)	Loss 2.5553e+00 (2.4568e+00)	Acc@1  44.14 ( 45.80)	Acc@5  71.48 ( 70.69)
Epoch: [17][4450/5005]	Time  1.340 ( 2.040)	Data  0.213 ( 0.867)	Loss 2.6096e+00 (2.4567e+00)	Acc@1  41.80 ( 45.80)	Acc@5  68.36 ( 70.70)
Epoch: [17][4500/5005]	Time  1.463

Epoch: [18][1750/5005]	Time  2.252 ( 2.026)	Data  1.055 ( 0.856)	Loss 2.4454e+00 (2.4382e+00)	Acc@1  49.22 ( 46.19)	Acc@5  68.75 ( 71.03)
Epoch: [18][1800/5005]	Time  3.575 ( 2.025)	Data  2.467 ( 0.856)	Loss 2.1669e+00 (2.4384e+00)	Acc@1  48.44 ( 46.18)	Acc@5  74.61 ( 71.02)
Epoch: [18][1850/5005]	Time  1.413 ( 2.024)	Data  0.244 ( 0.855)	Loss 2.2342e+00 (2.4379e+00)	Acc@1  49.61 ( 46.20)	Acc@5  75.78 ( 71.03)
Epoch: [18][1900/5005]	Time  1.404 ( 2.024)	Data  0.204 ( 0.855)	Loss 2.4208e+00 (2.4379e+00)	Acc@1  48.83 ( 46.20)	Acc@5  69.92 ( 71.03)
Epoch: [18][1950/5005]	Time  2.171 ( 2.023)	Data  1.010 ( 0.855)	Loss 2.4448e+00 (2.4385e+00)	Acc@1  48.44 ( 46.20)	Acc@5  73.05 ( 71.01)
Epoch: [18][2000/5005]	Time  2.418 ( 2.023)	Data  1.270 ( 0.854)	Loss 2.8745e+00 (2.4394e+00)	Acc@1  40.23 ( 46.19)	Acc@5  64.84 ( 70.99)
Epoch: [18][2050/5005]	Time  1.363 ( 2.024)	Data  0.220 ( 0.855)	Loss 2.5656e+00 (2.4397e+00)	Acc@1  46.48 ( 46.18)	Acc@5  68.75 ( 70.99)
Epoch: [18][2100/5005]	Time  4.317

Epoch: [18][4750/5005]	Time  2.981 ( 2.022)	Data  1.779 ( 0.854)	Loss 2.2671e+00 (2.4500e+00)	Acc@1  49.22 ( 45.98)	Acc@5  73.05 ( 70.82)
Epoch: [18][4800/5005]	Time  2.282 ( 2.022)	Data  1.131 ( 0.854)	Loss 2.3226e+00 (2.4501e+00)	Acc@1  51.56 ( 45.98)	Acc@5  74.61 ( 70.82)
Epoch: [18][4850/5005]	Time  1.433 ( 2.022)	Data  0.219 ( 0.854)	Loss 2.6123e+00 (2.4503e+00)	Acc@1  43.75 ( 45.98)	Acc@5  68.75 ( 70.82)
Epoch: [18][4900/5005]	Time  2.597 ( 2.022)	Data  1.501 ( 0.854)	Loss 2.3920e+00 (2.4503e+00)	Acc@1  47.27 ( 45.98)	Acc@5  73.83 ( 70.82)
Epoch: [18][4950/5005]	Time  3.968 ( 2.022)	Data  2.876 ( 0.854)	Loss 2.3440e+00 (2.4502e+00)	Acc@1  45.31 ( 45.98)	Acc@5  73.44 ( 70.82)
Epoch: [18][5000/5005]	Time  0.562 ( 2.022)	Data  0.047 ( 0.855)	Loss 2.2848e+00 (2.4503e+00)	Acc@1  47.66 ( 45.97)	Acc@5  72.66 ( 70.82)
Test: [  0/391]	Time  4.982 ( 4.982)	Loss 2.1598e+00 (2.1598e+00)	Acc@1  46.09 ( 46.09)	Acc@5  75.00 ( 75.00)
Test: [ 50/391]	Time  0.636 ( 1.112)	Loss 2.4223e+00 (2.2012e+

Epoch: [19][2350/5005]	Time  1.374 ( 2.029)	Data  0.218 ( 0.851)	Loss 2.6635e+00 (2.4308e+00)	Acc@1  41.02 ( 46.26)	Acc@5  68.36 ( 71.17)
Epoch: [19][2400/5005]	Time  2.548 ( 2.031)	Data  1.311 ( 0.852)	Loss 2.5953e+00 (2.4308e+00)	Acc@1  42.97 ( 46.26)	Acc@5  66.41 ( 71.16)
Epoch: [19][2450/5005]	Time  1.437 ( 2.030)	Data  0.190 ( 0.852)	Loss 2.6515e+00 (2.4305e+00)	Acc@1  42.19 ( 46.25)	Acc@5  65.62 ( 71.16)
Epoch: [19][2500/5005]	Time  1.376 ( 2.030)	Data  0.238 ( 0.852)	Loss 2.4988e+00 (2.4300e+00)	Acc@1  49.22 ( 46.27)	Acc@5  67.19 ( 71.17)
Epoch: [19][2550/5005]	Time  1.386 ( 2.031)	Data  0.192 ( 0.853)	Loss 2.4721e+00 (2.4307e+00)	Acc@1  50.00 ( 46.25)	Acc@5  68.75 ( 71.16)
Epoch: [19][2600/5005]	Time  1.342 ( 2.031)	Data  0.217 ( 0.853)	Loss 2.7686e+00 (2.4314e+00)	Acc@1  39.06 ( 46.24)	Acc@5  65.62 ( 71.15)
Epoch: [19][2650/5005]	Time  1.338 ( 2.030)	Data  0.234 ( 0.853)	Loss 2.4789e+00 (2.4308e+00)	Acc@1  46.88 ( 46.25)	Acc@5  71.09 ( 71.16)
Epoch: [19][2700/5005]	Time  2.300

Test: [350/391]	Time  2.075 ( 1.203)	Loss 2.1800e+00 (2.2089e+00)	Acc@1  45.31 ( 48.87)	Acc@5  76.56 ( 75.07)
 * Acc@1 48.784 Acc@5 75.020
lr: [0.0975528258147577]
Epoch: [20][   0/5005]	Time 10.605 (10.605)	Data  9.298 ( 9.298)	Loss 2.1442e+00 (2.1442e+00)	Acc@1  54.69 ( 54.69)	Acc@5  77.34 ( 77.34)
Epoch: [20][  50/5005]	Time  1.536 ( 2.330)	Data  0.229 ( 1.053)	Loss 2.3287e+00 (2.4249e+00)	Acc@1  47.27 ( 46.46)	Acc@5  73.83 ( 71.16)
Epoch: [20][ 100/5005]	Time  4.148 ( 2.294)	Data  2.798 ( 1.018)	Loss 2.4559e+00 (2.4134e+00)	Acc@1  46.48 ( 46.56)	Acc@5  69.53 ( 71.47)
Epoch: [20][ 150/5005]	Time  1.525 ( 2.262)	Data  0.223 ( 0.991)	Loss 2.2364e+00 (2.4097e+00)	Acc@1  49.22 ( 46.71)	Acc@5  72.66 ( 71.46)
Epoch: [20][ 200/5005]	Time  3.300 ( 2.255)	Data  2.052 ( 0.984)	Loss 2.3494e+00 (2.4094e+00)	Acc@1  48.44 ( 46.73)	Acc@5  76.17 ( 71.43)
Epoch: [20][ 250/5005]	Time  1.467 ( 2.230)	Data  0.213 ( 0.958)	Loss 2.2879e+00 (2.4045e+00)	Acc@1  48.83 ( 46.87)	Acc@5  74.22 ( 71.50)
Epoch: [

Epoch: [20][2950/5005]	Time  1.329 ( 2.177)	Data  0.215 ( 0.935)	Loss 2.2297e+00 (2.4261e+00)	Acc@1  47.27 ( 46.36)	Acc@5  76.56 ( 71.23)
Epoch: [20][3000/5005]	Time  1.432 ( 2.174)	Data  0.220 ( 0.934)	Loss 2.3879e+00 (2.4262e+00)	Acc@1  44.53 ( 46.35)	Acc@5  74.22 ( 71.24)
Epoch: [20][3050/5005]	Time  1.420 ( 2.171)	Data  0.215 ( 0.932)	Loss 2.4922e+00 (2.4263e+00)	Acc@1  45.70 ( 46.35)	Acc@5  72.66 ( 71.24)
Epoch: [20][3100/5005]	Time  1.444 ( 2.170)	Data  0.287 ( 0.932)	Loss 2.5404e+00 (2.4275e+00)	Acc@1  41.02 ( 46.34)	Acc@5  67.97 ( 71.22)
Epoch: [20][3150/5005]	Time  1.593 ( 2.166)	Data  0.261 ( 0.930)	Loss 2.3045e+00 (2.4280e+00)	Acc@1  47.27 ( 46.33)	Acc@5  73.05 ( 71.20)
Epoch: [20][3200/5005]	Time  1.399 ( 2.163)	Data  0.201 ( 0.928)	Loss 2.6853e+00 (2.4286e+00)	Acc@1  41.41 ( 46.32)	Acc@5  69.14 ( 71.20)
Epoch: [20][3250/5005]	Time  5.067 ( 2.162)	Data  3.906 ( 0.928)	Loss 2.2044e+00 (2.4288e+00)	Acc@1  46.09 ( 46.32)	Acc@5  73.44 ( 71.20)
Epoch: [20][3300/5005]	Time  1.403

Epoch: [21][ 550/5005]	Time  3.666 ( 2.041)	Data  2.507 ( 0.852)	Loss 2.5158e+00 (2.4109e+00)	Acc@1  44.92 ( 46.75)	Acc@5  67.58 ( 71.36)
Epoch: [21][ 600/5005]	Time  1.393 ( 2.038)	Data  0.194 ( 0.850)	Loss 2.4784e+00 (2.4097e+00)	Acc@1  44.92 ( 46.71)	Acc@5  71.88 ( 71.37)
Epoch: [21][ 650/5005]	Time  1.332 ( 2.037)	Data  0.208 ( 0.849)	Loss 2.3769e+00 (2.4108e+00)	Acc@1  46.48 ( 46.69)	Acc@5  70.31 ( 71.36)
Epoch: [21][ 700/5005]	Time  1.648 ( 2.035)	Data  0.426 ( 0.847)	Loss 2.3215e+00 (2.4119e+00)	Acc@1  49.61 ( 46.67)	Acc@5  71.48 ( 71.33)
Epoch: [21][ 750/5005]	Time  1.355 ( 2.035)	Data  0.212 ( 0.847)	Loss 2.3090e+00 (2.4112e+00)	Acc@1  50.00 ( 46.65)	Acc@5  73.44 ( 71.34)
Epoch: [21][ 800/5005]	Time  1.368 ( 2.033)	Data  0.242 ( 0.845)	Loss 2.5522e+00 (2.4109e+00)	Acc@1  46.88 ( 46.63)	Acc@5  69.53 ( 71.34)
Epoch: [21][ 850/5005]	Time  1.371 ( 2.033)	Data  0.212 ( 0.845)	Loss 2.1548e+00 (2.4098e+00)	Acc@1  50.78 ( 46.67)	Acc@5  75.78 ( 71.38)
Epoch: [21][ 900/5005]	Time  1.339

Epoch: [21][3550/5005]	Time  1.358 ( 2.045)	Data  0.185 ( 0.859)	Loss 2.6918e+00 (2.4241e+00)	Acc@1  45.70 ( 46.45)	Acc@5  67.19 ( 71.25)
Epoch: [21][3600/5005]	Time  1.361 ( 2.045)	Data  0.201 ( 0.859)	Loss 2.4621e+00 (2.4247e+00)	Acc@1  44.53 ( 46.44)	Acc@5  69.92 ( 71.23)
Epoch: [21][3650/5005]	Time  1.494 ( 2.046)	Data  0.238 ( 0.859)	Loss 2.4332e+00 (2.4245e+00)	Acc@1  46.09 ( 46.44)	Acc@5  73.05 ( 71.24)
Epoch: [21][3700/5005]	Time  1.340 ( 2.046)	Data  0.187 ( 0.860)	Loss 2.4200e+00 (2.4251e+00)	Acc@1  44.53 ( 46.43)	Acc@5  73.05 ( 71.23)
Epoch: [21][3750/5005]	Time  1.476 ( 2.047)	Data  0.167 ( 0.860)	Loss 2.5567e+00 (2.4254e+00)	Acc@1  46.88 ( 46.43)	Acc@5  68.36 ( 71.22)
Epoch: [21][3800/5005]	Time  1.329 ( 2.046)	Data  0.169 ( 0.860)	Loss 2.4571e+00 (2.4257e+00)	Acc@1  43.36 ( 46.42)	Acc@5  71.48 ( 71.22)
Epoch: [21][3850/5005]	Time  1.370 ( 2.047)	Data  0.175 ( 0.861)	Loss 2.3836e+00 (2.4260e+00)	Acc@1  44.53 ( 46.41)	Acc@5  70.70 ( 71.22)
Epoch: [21][3900/5005]	Time  1.357

Epoch: [22][1150/5005]	Time  1.373 ( 2.052)	Data  0.176 ( 0.857)	Loss 2.6537e+00 (2.4029e+00)	Acc@1  44.14 ( 46.97)	Acc@5  66.41 ( 71.62)
Epoch: [22][1200/5005]	Time  4.012 ( 2.052)	Data  2.900 ( 0.858)	Loss 2.4227e+00 (2.4027e+00)	Acc@1  44.92 ( 46.94)	Acc@5  69.92 ( 71.62)
Epoch: [22][1250/5005]	Time  3.727 ( 2.052)	Data  2.596 ( 0.858)	Loss 2.3505e+00 (2.4045e+00)	Acc@1  48.44 ( 46.89)	Acc@5  73.44 ( 71.58)
Epoch: [22][1300/5005]	Time  1.321 ( 2.050)	Data  0.196 ( 0.856)	Loss 2.4748e+00 (2.4043e+00)	Acc@1  44.92 ( 46.90)	Acc@5  69.53 ( 71.60)
Epoch: [22][1350/5005]	Time  3.627 ( 2.051)	Data  2.374 ( 0.857)	Loss 2.4821e+00 (2.4059e+00)	Acc@1  50.78 ( 46.89)	Acc@5  71.48 ( 71.58)
Epoch: [22][1400/5005]	Time  1.624 ( 2.049)	Data  0.268 ( 0.856)	Loss 2.5142e+00 (2.4061e+00)	Acc@1  44.92 ( 46.88)	Acc@5  71.09 ( 71.57)
Epoch: [22][1450/5005]	Time  3.440 ( 2.048)	Data  2.274 ( 0.855)	Loss 2.5880e+00 (2.4058e+00)	Acc@1  40.23 ( 46.89)	Acc@5  71.48 ( 71.57)
Epoch: [22][1500/5005]	Time  3.440

Epoch: [22][4150/5005]	Time  1.395 ( 2.046)	Data  0.200 ( 0.860)	Loss 2.6161e+00 (2.4213e+00)	Acc@1  47.66 ( 46.56)	Acc@5  70.31 ( 71.29)
Epoch: [22][4200/5005]	Time  1.322 ( 2.046)	Data  0.204 ( 0.861)	Loss 2.6699e+00 (2.4219e+00)	Acc@1  46.09 ( 46.55)	Acc@5  67.19 ( 71.28)
Epoch: [22][4250/5005]	Time  2.272 ( 2.046)	Data  0.898 ( 0.860)	Loss 2.6133e+00 (2.4223e+00)	Acc@1  43.75 ( 46.54)	Acc@5  68.75 ( 71.27)
Epoch: [22][4300/5005]	Time  3.821 ( 2.047)	Data  2.570 ( 0.861)	Loss 2.4797e+00 (2.4229e+00)	Acc@1  48.83 ( 46.53)	Acc@5  72.66 ( 71.25)
Epoch: [22][4350/5005]	Time  1.873 ( 2.046)	Data  0.684 ( 0.860)	Loss 2.2553e+00 (2.4231e+00)	Acc@1  49.22 ( 46.52)	Acc@5  74.61 ( 71.25)
Epoch: [22][4400/5005]	Time  2.489 ( 2.046)	Data  1.355 ( 0.860)	Loss 2.4346e+00 (2.4234e+00)	Acc@1  44.53 ( 46.51)	Acc@5  69.92 ( 71.25)
Epoch: [22][4450/5005]	Time  1.406 ( 2.046)	Data  0.226 ( 0.860)	Loss 2.5653e+00 (2.4232e+00)	Acc@1  42.58 ( 46.52)	Acc@5  67.19 ( 71.25)
Epoch: [22][4500/5005]	Time  1.329

Epoch: [23][1750/5005]	Time  1.460 ( 2.048)	Data  0.204 ( 0.856)	Loss 2.3267e+00 (2.4031e+00)	Acc@1  49.22 ( 46.70)	Acc@5  75.00 ( 71.66)
Epoch: [23][1800/5005]	Time  1.741 ( 2.047)	Data  0.518 ( 0.855)	Loss 2.1713e+00 (2.4034e+00)	Acc@1  51.17 ( 46.70)	Acc@5  76.17 ( 71.65)
Epoch: [23][1850/5005]	Time  2.560 ( 2.047)	Data  1.375 ( 0.855)	Loss 2.4242e+00 (2.4043e+00)	Acc@1  47.27 ( 46.68)	Acc@5  71.09 ( 71.64)
Epoch: [23][1900/5005]	Time  1.418 ( 2.047)	Data  0.272 ( 0.855)	Loss 2.4702e+00 (2.4051e+00)	Acc@1  47.66 ( 46.68)	Acc@5  68.75 ( 71.62)
Epoch: [23][1950/5005]	Time  2.243 ( 2.046)	Data  0.987 ( 0.855)	Loss 2.3857e+00 (2.4057e+00)	Acc@1  46.88 ( 46.69)	Acc@5  70.70 ( 71.61)
Epoch: [23][2000/5005]	Time  1.450 ( 2.047)	Data  0.239 ( 0.856)	Loss 2.5078e+00 (2.4059e+00)	Acc@1  48.44 ( 46.68)	Acc@5  67.97 ( 71.60)
Epoch: [23][2050/5005]	Time  1.355 ( 2.049)	Data  0.198 ( 0.858)	Loss 2.3447e+00 (2.4066e+00)	Acc@1  50.78 ( 46.68)	Acc@5  71.48 ( 71.59)
Epoch: [23][2100/5005]	Time  1.371

Epoch: [23][4750/5005]	Time  1.433 ( 2.055)	Data  0.197 ( 0.863)	Loss 2.3476e+00 (2.4181e+00)	Acc@1  47.27 ( 46.54)	Acc@5  73.05 ( 71.34)
Epoch: [23][4800/5005]	Time  3.647 ( 2.055)	Data  2.520 ( 0.863)	Loss 2.2627e+00 (2.4180e+00)	Acc@1  47.27 ( 46.53)	Acc@5  73.44 ( 71.34)
Epoch: [23][4850/5005]	Time  1.527 ( 2.055)	Data  0.257 ( 0.863)	Loss 2.4937e+00 (2.4182e+00)	Acc@1  46.48 ( 46.53)	Acc@5  71.88 ( 71.34)
Epoch: [23][4900/5005]	Time  3.738 ( 2.055)	Data  2.590 ( 0.863)	Loss 2.4756e+00 (2.4185e+00)	Acc@1  41.41 ( 46.53)	Acc@5  72.27 ( 71.34)
Epoch: [23][4950/5005]	Time  1.411 ( 2.055)	Data  0.211 ( 0.863)	Loss 2.4076e+00 (2.4182e+00)	Acc@1  48.44 ( 46.54)	Acc@5  71.48 ( 71.34)
Epoch: [23][5000/5005]	Time  1.973 ( 2.055)	Data  1.247 ( 0.863)	Loss 2.3101e+00 (2.4183e+00)	Acc@1  47.66 ( 46.53)	Acc@5  73.83 ( 71.34)
Test: [  0/391]	Time  4.676 ( 4.676)	Loss 2.0208e+00 (2.0208e+00)	Acc@1  53.91 ( 53.91)	Acc@5  78.91 ( 78.91)
Test: [ 50/391]	Time  0.808 ( 1.156)	Loss 2.3179e+00 (2.1773e+

Epoch: [24][2350/5005]	Time  1.427 ( 2.054)	Data  0.219 ( 0.858)	Loss 2.4362e+00 (2.4024e+00)	Acc@1  44.53 ( 46.78)	Acc@5  69.53 ( 71.65)
Epoch: [24][2400/5005]	Time  1.390 ( 2.053)	Data  0.207 ( 0.858)	Loss 2.5114e+00 (2.4026e+00)	Acc@1  45.70 ( 46.77)	Acc@5  71.48 ( 71.65)
Epoch: [24][2450/5005]	Time  1.403 ( 2.052)	Data  0.204 ( 0.857)	Loss 2.0793e+00 (2.4030e+00)	Acc@1  53.52 ( 46.77)	Acc@5  78.12 ( 71.64)
Epoch: [24][2500/5005]	Time  1.400 ( 2.052)	Data  0.237 ( 0.857)	Loss 2.5420e+00 (2.4033e+00)	Acc@1  43.75 ( 46.77)	Acc@5  67.19 ( 71.64)
Epoch: [24][2550/5005]	Time  1.323 ( 2.052)	Data  0.175 ( 0.857)	Loss 2.4213e+00 (2.4035e+00)	Acc@1  46.48 ( 46.77)	Acc@5  70.70 ( 71.63)
Epoch: [24][2600/5005]	Time  2.734 ( 2.052)	Data  1.617 ( 0.856)	Loss 2.4557e+00 (2.4036e+00)	Acc@1  43.75 ( 46.76)	Acc@5  68.36 ( 71.63)
Epoch: [24][2650/5005]	Time  1.362 ( 2.051)	Data  0.238 ( 0.856)	Loss 2.3097e+00 (2.4037e+00)	Acc@1  43.75 ( 46.75)	Acc@5  75.39 ( 71.62)
Epoch: [24][2700/5005]	Time  1.740

Test: [350/391]	Time  0.658 ( 1.119)	Loss 1.9719e+00 (2.2572e+00)	Acc@1  53.91 ( 48.32)	Acc@5  81.25 ( 74.49)
 * Acc@1 48.304 Acc@5 74.522
lr: [0.09619397662556435]
Epoch: [25][   0/5005]	Time  9.433 ( 9.433)	Data  8.169 ( 8.169)	Loss 2.3334e+00 (2.3334e+00)	Acc@1  48.05 ( 48.05)	Acc@5  71.09 ( 71.09)
Epoch: [25][  50/5005]	Time  1.458 ( 2.206)	Data  0.222 ( 0.978)	Loss 2.5159e+00 (2.3989e+00)	Acc@1  46.09 ( 47.11)	Acc@5  67.97 ( 71.44)
Epoch: [25][ 100/5005]	Time  3.837 ( 2.123)	Data  2.653 ( 0.910)	Loss 2.4195e+00 (2.3913e+00)	Acc@1  45.70 ( 47.23)	Acc@5  71.09 ( 71.82)
Epoch: [25][ 150/5005]	Time  1.390 ( 2.105)	Data  0.203 ( 0.896)	Loss 2.3691e+00 (2.3811e+00)	Acc@1  48.44 ( 47.36)	Acc@5  71.09 ( 71.96)
Epoch: [25][ 200/5005]	Time  3.291 ( 2.093)	Data  2.074 ( 0.889)	Loss 2.4035e+00 (2.3748e+00)	Acc@1  48.44 ( 47.37)	Acc@5  71.88 ( 72.10)
Epoch: [25][ 250/5005]	Time  1.431 ( 2.072)	Data  0.184 ( 0.870)	Loss 2.2122e+00 (2.3687e+00)	Acc@1  55.08 ( 47.57)	Acc@5  73.83 ( 72.17)
Epoch: 

Epoch: [25][2950/5005]	Time  1.544 ( 2.060)	Data  0.232 ( 0.863)	Loss 2.4287e+00 (2.3986e+00)	Acc@1  44.92 ( 46.93)	Acc@5  72.66 ( 71.65)
Epoch: [25][3000/5005]	Time  2.591 ( 2.060)	Data  1.447 ( 0.863)	Loss 2.7222e+00 (2.3991e+00)	Acc@1  42.97 ( 46.92)	Acc@5  67.97 ( 71.64)
Epoch: [25][3050/5005]	Time  1.328 ( 2.060)	Data  0.205 ( 0.863)	Loss 2.4654e+00 (2.3998e+00)	Acc@1  44.14 ( 46.91)	Acc@5  68.75 ( 71.63)
Epoch: [25][3100/5005]	Time  2.802 ( 2.060)	Data  1.533 ( 0.863)	Loss 2.6945e+00 (2.3998e+00)	Acc@1  44.14 ( 46.92)	Acc@5  68.36 ( 71.63)
Epoch: [25][3150/5005]	Time  1.407 ( 2.063)	Data  0.212 ( 0.865)	Loss 2.1617e+00 (2.4004e+00)	Acc@1  48.44 ( 46.92)	Acc@5  77.73 ( 71.62)
Epoch: [25][3200/5005]	Time  1.929 ( 2.063)	Data  0.656 ( 0.865)	Loss 2.0928e+00 (2.4006e+00)	Acc@1  53.52 ( 46.92)	Acc@5  76.56 ( 71.61)
Epoch: [25][3250/5005]	Time  1.373 ( 2.063)	Data  0.193 ( 0.865)	Loss 2.1904e+00 (2.4009e+00)	Acc@1  46.48 ( 46.91)	Acc@5  74.61 ( 71.61)
Epoch: [25][3300/5005]	Time  2.540

Epoch: [26][ 550/5005]	Time  1.475 ( 2.065)	Data  0.196 ( 0.867)	Loss 2.3603e+00 (2.3652e+00)	Acc@1  50.00 ( 47.50)	Acc@5  71.48 ( 72.14)
Epoch: [26][ 600/5005]	Time  3.465 ( 2.063)	Data  2.348 ( 0.865)	Loss 2.3283e+00 (2.3682e+00)	Acc@1  46.88 ( 47.46)	Acc@5  72.66 ( 72.10)
Epoch: [26][ 650/5005]	Time  2.288 ( 2.060)	Data  0.766 ( 0.861)	Loss 2.4667e+00 (2.3695e+00)	Acc@1  45.31 ( 47.44)	Acc@5  71.09 ( 72.05)
Epoch: [26][ 700/5005]	Time  2.629 ( 2.059)	Data  1.349 ( 0.860)	Loss 2.5147e+00 (2.3696e+00)	Acc@1  46.09 ( 47.42)	Acc@5  71.48 ( 72.07)
Epoch: [26][ 750/5005]	Time  1.422 ( 2.060)	Data  0.279 ( 0.861)	Loss 2.4852e+00 (2.3682e+00)	Acc@1  46.88 ( 47.44)	Acc@5  72.66 ( 72.11)
Epoch: [26][ 800/5005]	Time  1.386 ( 2.060)	Data  0.213 ( 0.861)	Loss 2.4758e+00 (2.3705e+00)	Acc@1  44.14 ( 47.42)	Acc@5  69.14 ( 72.07)
Epoch: [26][ 850/5005]	Time  1.449 ( 2.057)	Data  0.250 ( 0.859)	Loss 2.5402e+00 (2.3738e+00)	Acc@1  46.09 ( 47.35)	Acc@5  67.97 ( 71.99)
Epoch: [26][ 900/5005]	Time  1.416

Epoch: [26][3550/5005]	Time  2.434 ( 2.057)	Data  1.226 ( 0.866)	Loss 2.3004e+00 (2.3981e+00)	Acc@1  50.78 ( 46.86)	Acc@5  72.66 ( 71.61)
Epoch: [26][3600/5005]	Time  1.313 ( 2.057)	Data  0.216 ( 0.866)	Loss 2.4339e+00 (2.3983e+00)	Acc@1  44.14 ( 46.87)	Acc@5  68.75 ( 71.61)
Epoch: [26][3650/5005]	Time  4.761 ( 2.058)	Data  3.524 ( 0.867)	Loss 2.4586e+00 (2.3986e+00)	Acc@1  43.36 ( 46.86)	Acc@5  73.05 ( 71.60)
Epoch: [26][3700/5005]	Time  1.320 ( 2.059)	Data  0.152 ( 0.868)	Loss 2.1658e+00 (2.3991e+00)	Acc@1  50.39 ( 46.85)	Acc@5  76.95 ( 71.60)
Epoch: [26][3750/5005]	Time  3.211 ( 2.059)	Data  2.043 ( 0.868)	Loss 2.5021e+00 (2.3996e+00)	Acc@1  43.75 ( 46.84)	Acc@5  67.97 ( 71.59)
Epoch: [26][3800/5005]	Time  1.524 ( 2.059)	Data  0.243 ( 0.868)	Loss 2.3179e+00 (2.3998e+00)	Acc@1  46.09 ( 46.83)	Acc@5  76.56 ( 71.59)
Epoch: [26][3850/5005]	Time  3.520 ( 2.059)	Data  2.331 ( 0.868)	Loss 2.4951e+00 (2.3999e+00)	Acc@1  42.58 ( 46.83)	Acc@5  72.66 ( 71.59)
Epoch: [26][3900/5005]	Time  1.480

Epoch: [27][1150/5005]	Time  3.673 ( 2.052)	Data  2.454 ( 0.853)	Loss 2.3289e+00 (2.3701e+00)	Acc@1  44.92 ( 47.40)	Acc@5  73.05 ( 72.21)
Epoch: [27][1200/5005]	Time  1.338 ( 2.051)	Data  0.191 ( 0.852)	Loss 2.4721e+00 (2.3708e+00)	Acc@1  47.27 ( 47.37)	Acc@5  69.53 ( 72.20)
Epoch: [27][1250/5005]	Time  1.530 ( 2.049)	Data  0.327 ( 0.850)	Loss 2.1735e+00 (2.3703e+00)	Acc@1  49.61 ( 47.36)	Acc@5  73.44 ( 72.20)
Epoch: [27][1300/5005]	Time  1.376 ( 2.052)	Data  0.200 ( 0.853)	Loss 2.4052e+00 (2.3708e+00)	Acc@1  47.66 ( 47.33)	Acc@5  70.31 ( 72.18)
Epoch: [27][1350/5005]	Time  2.303 ( 2.050)	Data  1.065 ( 0.852)	Loss 2.2715e+00 (2.3715e+00)	Acc@1  48.83 ( 47.32)	Acc@5  73.05 ( 72.16)
Epoch: [27][1400/5005]	Time  1.334 ( 2.052)	Data  0.202 ( 0.853)	Loss 2.3600e+00 (2.3720e+00)	Acc@1  49.61 ( 47.32)	Acc@5  69.92 ( 72.14)
Epoch: [27][1450/5005]	Time  1.320 ( 2.053)	Data  0.200 ( 0.855)	Loss 2.5023e+00 (2.3723e+00)	Acc@1  41.02 ( 47.29)	Acc@5  69.92 ( 72.14)
Epoch: [27][1500/5005]	Time  1.420

Epoch: [27][4150/5005]	Time  2.986 ( 2.073)	Data  1.877 ( 0.869)	Loss 2.4208e+00 (2.3953e+00)	Acc@1  50.39 ( 46.92)	Acc@5  70.31 ( 71.70)
Epoch: [27][4200/5005]	Time  1.650 ( 2.077)	Data  0.305 ( 0.870)	Loss 2.3800e+00 (2.3955e+00)	Acc@1  44.92 ( 46.92)	Acc@5  71.88 ( 71.70)
Epoch: [27][4250/5005]	Time  1.471 ( 2.080)	Data  0.228 ( 0.872)	Loss 2.1999e+00 (2.3960e+00)	Acc@1  48.83 ( 46.91)	Acc@5  75.39 ( 71.69)
Epoch: [27][4300/5005]	Time  1.515 ( 2.082)	Data  0.247 ( 0.873)	Loss 2.4573e+00 (2.3963e+00)	Acc@1  48.83 ( 46.91)	Acc@5  67.97 ( 71.69)
Epoch: [27][4350/5005]	Time  1.531 ( 2.084)	Data  0.227 ( 0.874)	Loss 2.2241e+00 (2.3964e+00)	Acc@1  51.95 ( 46.92)	Acc@5  74.61 ( 71.69)
Epoch: [27][4400/5005]	Time  1.476 ( 2.086)	Data  0.241 ( 0.875)	Loss 2.3406e+00 (2.3965e+00)	Acc@1  43.36 ( 46.92)	Acc@5  72.27 ( 71.68)
Epoch: [27][4450/5005]	Time  1.521 ( 2.088)	Data  0.211 ( 0.877)	Loss 2.2834e+00 (2.3963e+00)	Acc@1  46.48 ( 46.93)	Acc@5  75.39 ( 71.68)
Epoch: [27][4500/5005]	Time  1.609

Epoch: [28][1750/5005]	Time  1.386 ( 2.058)	Data  0.184 ( 0.861)	Loss 2.3325e+00 (2.3735e+00)	Acc@1  49.22 ( 47.35)	Acc@5  72.27 ( 72.05)
Epoch: [28][1800/5005]	Time  1.412 ( 2.059)	Data  0.204 ( 0.862)	Loss 2.5774e+00 (2.3743e+00)	Acc@1  42.19 ( 47.35)	Acc@5  70.31 ( 72.03)
Epoch: [28][1850/5005]	Time  1.451 ( 2.059)	Data  0.199 ( 0.863)	Loss 2.3704e+00 (2.3755e+00)	Acc@1  48.44 ( 47.34)	Acc@5  73.83 ( 72.02)
Epoch: [28][1900/5005]	Time  1.349 ( 2.057)	Data  0.200 ( 0.862)	Loss 2.4855e+00 (2.3770e+00)	Acc@1  46.09 ( 47.31)	Acc@5  69.14 ( 72.00)
Epoch: [28][1950/5005]	Time  1.182 ( 2.058)	Data  0.128 ( 0.863)	Loss 2.2171e+00 (2.3779e+00)	Acc@1  53.12 ( 47.29)	Acc@5  74.22 ( 71.99)
Epoch: [28][2000/5005]	Time  1.247 ( 2.057)	Data  0.180 ( 0.863)	Loss 2.4948e+00 (2.3781e+00)	Acc@1  46.48 ( 47.30)	Acc@5  71.09 ( 71.99)
Epoch: [28][2050/5005]	Time  1.328 ( 2.060)	Data  0.181 ( 0.865)	Loss 2.4573e+00 (2.3784e+00)	Acc@1  46.48 ( 47.29)	Acc@5  71.48 ( 71.98)
Epoch: [28][2100/5005]	Time  1.412

Epoch: [28][4750/5005]	Time  1.434 ( 2.050)	Data  0.197 ( 0.863)	Loss 2.8385e+00 (2.3936e+00)	Acc@1  38.28 ( 46.98)	Acc@5  65.62 ( 71.76)
Epoch: [28][4800/5005]	Time  1.412 ( 2.050)	Data  0.262 ( 0.863)	Loss 2.2793e+00 (2.3939e+00)	Acc@1  48.44 ( 46.97)	Acc@5  72.27 ( 71.76)
Epoch: [28][4850/5005]	Time  1.367 ( 2.050)	Data  0.209 ( 0.863)	Loss 2.3574e+00 (2.3943e+00)	Acc@1  45.31 ( 46.96)	Acc@5  69.92 ( 71.75)
Epoch: [28][4900/5005]	Time  1.599 ( 2.050)	Data  0.236 ( 0.863)	Loss 2.5611e+00 (2.3944e+00)	Acc@1  44.53 ( 46.96)	Acc@5  71.09 ( 71.74)
Epoch: [28][4950/5005]	Time  1.382 ( 2.049)	Data  0.218 ( 0.863)	Loss 2.6435e+00 (2.3943e+00)	Acc@1  42.58 ( 46.97)	Acc@5  69.14 ( 71.74)
Epoch: [28][5000/5005]	Time  0.807 ( 2.050)	Data  0.076 ( 0.863)	Loss 2.3904e+00 (2.3946e+00)	Acc@1  47.27 ( 46.97)	Acc@5  74.61 ( 71.74)
Test: [  0/391]	Time  4.734 ( 4.734)	Loss 2.2014e+00 (2.2014e+00)	Acc@1  49.22 ( 49.22)	Acc@5  73.44 ( 73.44)
Test: [ 50/391]	Time  0.651 ( 1.155)	Loss 2.1108e+00 (2.2083e+

Epoch: [29][2350/5005]	Time  1.385 ( 2.043)	Data  0.178 ( 0.854)	Loss 2.3363e+00 (2.3733e+00)	Acc@1  48.44 ( 47.28)	Acc@5  73.83 ( 72.08)
Epoch: [29][2400/5005]	Time  2.371 ( 2.042)	Data  1.074 ( 0.853)	Loss 2.2691e+00 (2.3736e+00)	Acc@1  48.83 ( 47.27)	Acc@5  73.83 ( 72.07)
Epoch: [29][2450/5005]	Time  1.436 ( 2.044)	Data  0.224 ( 0.855)	Loss 2.4099e+00 (2.3738e+00)	Acc@1  48.05 ( 47.28)	Acc@5  71.88 ( 72.07)
Epoch: [29][2500/5005]	Time  1.448 ( 2.044)	Data  0.216 ( 0.855)	Loss 2.5862e+00 (2.3745e+00)	Acc@1  41.41 ( 47.27)	Acc@5  69.53 ( 72.06)
Epoch: [29][2550/5005]	Time  1.276 ( 2.044)	Data  0.205 ( 0.855)	Loss 2.5069e+00 (2.3755e+00)	Acc@1  44.92 ( 47.25)	Acc@5  71.88 ( 72.04)
Epoch: [29][2600/5005]	Time  1.365 ( 2.044)	Data  0.241 ( 0.856)	Loss 2.4665e+00 (2.3756e+00)	Acc@1  41.80 ( 47.25)	Acc@5  70.70 ( 72.04)
Epoch: [29][2650/5005]	Time  1.446 ( 2.044)	Data  0.178 ( 0.856)	Loss 2.4216e+00 (2.3763e+00)	Acc@1  48.05 ( 47.23)	Acc@5  67.97 ( 72.03)
Epoch: [29][2700/5005]	Time  1.410

Test: [350/391]	Time  2.687 ( 1.121)	Loss 2.6345e+00 (2.1959e+00)	Acc@1  42.19 ( 49.39)	Acc@5  70.31 ( 75.38)
 * Acc@1 49.376 Acc@5 75.372
lr: [0.0945503262094184]
Epoch: [30][   0/5005]	Time  7.589 ( 7.589)	Data  6.497 ( 6.497)	Loss 2.2402e+00 (2.2402e+00)	Acc@1  52.73 ( 52.73)	Acc@5  77.34 ( 77.34)
Epoch: [30][  50/5005]	Time  1.365 ( 2.166)	Data  0.218 ( 0.972)	Loss 2.0670e+00 (2.3168e+00)	Acc@1  52.34 ( 48.39)	Acc@5  77.73 ( 73.04)
Epoch: [30][ 100/5005]	Time  1.561 ( 2.102)	Data  0.269 ( 0.903)	Loss 1.9667e+00 (2.3262e+00)	Acc@1  56.25 ( 48.23)	Acc@5  78.12 ( 73.08)
Epoch: [30][ 150/5005]	Time  1.462 ( 2.077)	Data  0.232 ( 0.880)	Loss 2.3870e+00 (2.3376e+00)	Acc@1  51.17 ( 48.08)	Acc@5  71.48 ( 72.85)
Epoch: [30][ 200/5005]	Time  1.420 ( 2.080)	Data  0.203 ( 0.879)	Loss 2.4471e+00 (2.3475e+00)	Acc@1  45.31 ( 47.91)	Acc@5  70.31 ( 72.64)
Epoch: [30][ 250/5005]	Time  1.390 ( 2.068)	Data  0.211 ( 0.866)	Loss 2.1543e+00 (2.3527e+00)	Acc@1  48.83 ( 47.75)	Acc@5  76.95 ( 72.49)
Epoch: [

Epoch: [30][2950/5005]	Time  1.590 ( 2.043)	Data  0.283 ( 0.853)	Loss 2.2104e+00 (2.3806e+00)	Acc@1  48.83 ( 47.26)	Acc@5  76.17 ( 71.95)
Epoch: [30][3000/5005]	Time  1.278 ( 2.043)	Data  0.196 ( 0.854)	Loss 2.4806e+00 (2.3806e+00)	Acc@1  41.80 ( 47.25)	Acc@5  71.09 ( 71.96)
Epoch: [30][3050/5005]	Time  1.362 ( 2.043)	Data  0.217 ( 0.853)	Loss 2.1586e+00 (2.3803e+00)	Acc@1  50.39 ( 47.25)	Acc@5  75.78 ( 71.96)
Epoch: [30][3100/5005]	Time  1.388 ( 2.042)	Data  0.198 ( 0.853)	Loss 2.6570e+00 (2.3807e+00)	Acc@1  41.41 ( 47.24)	Acc@5  67.19 ( 71.95)
Epoch: [30][3150/5005]	Time  1.482 ( 2.041)	Data  0.263 ( 0.853)	Loss 2.3863e+00 (2.3811e+00)	Acc@1  50.39 ( 47.24)	Acc@5  70.31 ( 71.95)
Epoch: [30][3200/5005]	Time  1.364 ( 2.042)	Data  0.214 ( 0.853)	Loss 2.3160e+00 (2.3810e+00)	Acc@1  46.09 ( 47.24)	Acc@5  73.05 ( 71.94)
Epoch: [30][3250/5005]	Time  1.384 ( 2.041)	Data  0.212 ( 0.853)	Loss 2.3832e+00 (2.3816e+00)	Acc@1  44.92 ( 47.24)	Acc@5  74.22 ( 71.93)
Epoch: [30][3300/5005]	Time  1.371

Epoch: [31][ 550/5005]	Time  3.884 ( 2.048)	Data  2.774 ( 0.859)	Loss 2.4442e+00 (2.3473e+00)	Acc@1  49.61 ( 47.77)	Acc@5  69.92 ( 72.54)
Epoch: [31][ 600/5005]	Time  1.374 ( 2.044)	Data  0.194 ( 0.856)	Loss 2.3653e+00 (2.3534e+00)	Acc@1  47.27 ( 47.70)	Acc@5  75.00 ( 72.44)
Epoch: [31][ 650/5005]	Time  3.360 ( 2.051)	Data  2.202 ( 0.861)	Loss 2.4132e+00 (2.3549e+00)	Acc@1  46.48 ( 47.69)	Acc@5  73.44 ( 72.42)
Epoch: [31][ 700/5005]	Time  1.577 ( 2.052)	Data  0.265 ( 0.860)	Loss 2.2585e+00 (2.3536e+00)	Acc@1  49.61 ( 47.69)	Acc@5  72.27 ( 72.45)
Epoch: [31][ 750/5005]	Time  1.373 ( 2.053)	Data  0.217 ( 0.861)	Loss 2.1856e+00 (2.3545e+00)	Acc@1  52.34 ( 47.68)	Acc@5  74.22 ( 72.42)
Epoch: [31][ 800/5005]	Time  1.347 ( 2.055)	Data  0.188 ( 0.863)	Loss 2.2646e+00 (2.3571e+00)	Acc@1  48.44 ( 47.64)	Acc@5  75.78 ( 72.37)
Epoch: [31][ 850/5005]	Time  3.502 ( 2.053)	Data  2.287 ( 0.861)	Loss 2.3914e+00 (2.3570e+00)	Acc@1  44.92 ( 47.65)	Acc@5  68.75 ( 72.36)
Epoch: [31][ 900/5005]	Time  1.368

Epoch: [31][3550/5005]	Time  4.041 ( 2.052)	Data  2.751 ( 0.864)	Loss 2.3540e+00 (2.3745e+00)	Acc@1  50.00 ( 47.32)	Acc@5  72.27 ( 72.05)
Epoch: [31][3600/5005]	Time  1.340 ( 2.052)	Data  0.195 ( 0.865)	Loss 2.4443e+00 (2.3747e+00)	Acc@1  42.97 ( 47.31)	Acc@5  71.48 ( 72.05)
Epoch: [31][3650/5005]	Time  1.581 ( 2.052)	Data  0.375 ( 0.864)	Loss 2.4218e+00 (2.3746e+00)	Acc@1  41.80 ( 47.31)	Acc@5  69.53 ( 72.05)
Epoch: [31][3700/5005]	Time  1.431 ( 2.052)	Data  0.218 ( 0.864)	Loss 2.5889e+00 (2.3748e+00)	Acc@1  44.14 ( 47.31)	Acc@5  66.80 ( 72.04)
Epoch: [31][3750/5005]	Time  3.661 ( 2.052)	Data  2.527 ( 0.865)	Loss 2.4056e+00 (2.3754e+00)	Acc@1  46.48 ( 47.30)	Acc@5  73.05 ( 72.03)
Epoch: [31][3800/5005]	Time  1.410 ( 2.052)	Data  0.184 ( 0.865)	Loss 2.3565e+00 (2.3755e+00)	Acc@1  48.83 ( 47.30)	Acc@5  72.66 ( 72.03)
Epoch: [31][3850/5005]	Time  1.339 ( 2.051)	Data  0.198 ( 0.864)	Loss 2.3119e+00 (2.3762e+00)	Acc@1  46.88 ( 47.29)	Acc@5  75.00 ( 72.01)
Epoch: [31][3900/5005]	Time  1.338

In [None]:
#from torchsummary import summary
print(model)

In [None]:
print(device)