In [1]:
import os
import random
import shutil
import time
import warnings

import torch
import torch.nn as nn
import torch.backends.cudnn as cudnn
import torch.optim as opt

import torch.utils.data
import torchvision
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import torchvision.models as models


In [2]:
SEED=1

In [3]:
random.seed(SEED)
torch.manual_seed(SEED)
cudnn.deterministic = True

In [4]:
START_EPOCH = 0

### Set the architecture to resnet 18 below

In [5]:
##########################
ARCH = models.resnet18() # set the architecture to RESNET 18
# please look up how to do that
########################
EPOCHS = 40
LR = 0.1
MOMENTUM = 0.9
WEIGHT_DECAY = 3e-5
PRINT_FREQ = 50
TRAIN_BATCH=256
VAL_BATCH=128
WORKERS=4
# TRAINDIR="/workspace/data/imagenet2012/train"
# VALDIR="/workspace/data/imagenet2012/val"

In [6]:
TRAINDIR="/home/user1/work/w251/v3/week05/hw/imageNet-ILSVRC2012/download_and_prepare_imagenet_dataset/train"
VALDIR="/home/user1/work/w251/v3/week05/hw/imageNet-ILSVRC2012/download_and_prepare_imagenet_dataset/val"


### Check if cuda is available here

In [7]:
# check if cuda is available in this cell
# if it is not available, you should not go forward!

torch.cuda.is_available()

True

### Assign your GPU below

In [8]:
# Assign your GPU in this cell
GPU = 0

In [9]:
# set your active device to your GPU in this cell
if torch.cuda.is_available():  
  dev =   "cuda:0"
else:  
  dev = "cpu"  

device = torch.device(dev)

In [10]:
# enable algorithm optimization
cudnn.benchmark = True

### Fill in the heart of the train section below

In [11]:
def train(train_loader, model, criterion, optimizer, epoch, device=torch.device('cpu')):
    batch_time = AverageMeter('Time', ':6.3f')
    data_time = AverageMeter('Data', ':6.3f')
    losses = AverageMeter('Loss', ':.4e')
    top1 = AverageMeter('Acc@1', ':6.2f')
    top5 = AverageMeter('Acc@5', ':6.2f')
    progress = ProgressMeter(
        len(train_loader),
        [batch_time, data_time, losses, top1, top5],
        prefix="Epoch: [{}]".format(epoch))

    ######################
    # switch model to train mode here
    model.train()
    ################

    end = time.time()
    for i, (images, target) in enumerate(train_loader):
        # measure data loading time
        data_time.update(time.time() - end)

        #####################
        # send the images to cuda device
        images = images.cuda(0, non_blocking=True)
        # send the target to cuda device
        target = target.cuda(0, non_blocking=True)
        
        #images = images.to(device)
        #target = target.to(device)
        
        # compute output
        output = model(images)

        # compute loss 
        loss = criterion(output, target)


        # measure accuracy and record loss
        acc1, acc5 = accuracy(output, target, topk=(1, 5))
        losses.update(loss.item(), images.size(0))
        top1.update(acc1[0], images.size(0))
        top5.update(acc5[0], images.size(0))

        # compute gradient and do SGD step
        
        #### zero out gradients in the optimier
        ## optimizer ..??
        optimizer.zero_grad()
        
        ## backprop!
        ### loss... ???
        loss.backward()
        
        # update the weights!
        ## optimier .. ??
        optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if i % PRINT_FREQ == 0:
            progress.display(i)

#### Fill in the validate section below

In [12]:
def validate(val_loader, model, criterion, device):
    batch_time = AverageMeter('Time', ':6.3f')
    losses = AverageMeter('Loss', ':.4e')
    top1 = AverageMeter('Acc@1', ':6.2f')
    top5 = AverageMeter('Acc@5', ':6.2f')
    progress = ProgressMeter(
        len(val_loader),
        [batch_time, losses, top1, top5],
        prefix='Test: ')

    # switch to evaluate mode
    # model ???
    model.eval()

    with torch.no_grad():
        end = time.time()
        for i, (images, target) in enumerate(val_loader):
            
            
            ### send the images and target to cuda
            images = images.to(device)
            target = target.to(device)

            # compute output
            output = model(images)

            # compute loss
            loss  = criterion(output,target)


            # measure accuracy and record loss
            acc1, acc5 = accuracy(output, target, topk=(1, 5))
            losses.update(loss.item(), images.size(0))
            top1.update(acc1[0], images.size(0))
            top5.update(acc5[0], images.size(0))

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

            if i % PRINT_FREQ == 0:
                progress.display(i)

        # TODO: this should also be done with the ProgressMeter
        print(' * Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}'
              .format(top1=top1, top5=top5))

    return top1.avg

### Save the checkpoint

In [13]:
class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self, name, fmt=':f'):
        self.name = name
        self.fmt = fmt
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

    def __str__(self):
        fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})'
        return fmtstr.format(**self.__dict__)

In [14]:
class ProgressMeter(object):
    def __init__(self, num_batches, meters, prefix=""):
        self.batch_fmtstr = self._get_batch_fmtstr(num_batches)
        self.meters = meters
        self.prefix = prefix

    def display(self, batch):
        entries = [self.prefix + self.batch_fmtstr.format(batch)]
        entries += [str(meter) for meter in self.meters]
        print('\t'.join(entries))

    def _get_batch_fmtstr(self, num_batches):
        num_digits = len(str(num_batches // 1))
        fmt = '{:' + str(num_digits) + 'd}'
        return '[' + fmt + '/' + fmt.format(num_batches) + ']'

In [15]:
def save_checkpoint(state, is_best, filename='./checkpoint_192.pth.tar'):
    # save the model state!
    # state ??? 
    torch.save(state, filename)
    if is_best:
        shutil.copyfile(filename, './model_192_best.pth.tar')

In [16]:
# if we are adjusting the LR manually use this
def adjust_learning_rate(optimizer, epoch):
    """Sets the learning rate to the initial LR decayed by 10 every 30 epochs"""
    lr = LR * (0.1 ** (epoch // 30))
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr

In [17]:
def accuracy(output, target, topk=(1,)):
    """Computes the accuracy over the k top predictions for the specified values of k"""
    with torch.no_grad():
        maxk = max(topk)
        batch_size = target.size(0)

        _, pred = output.topk(maxk, 1, True, True)
        pred = pred.t()
        correct = pred.eq(target.view(1, -1).expand_as(pred))

        res = []
        for k in topk:
            correct_k = correct[:k].reshape(-1).float().sum(0, keepdim=True)
            res.append(correct_k.mul_(100.0 / batch_size))
        return res

In [18]:
imagenet_mean_RGB = [0.47889522, 0.47227842, 0.43047404]
imagenet_std_RGB = [0.229, 0.224, 0.225]
cinic_mean_RGB = [0.47889522, 0.47227842, 0.43047404]
cinic_std_RGB = [0.24205776, 0.23828046, 0.25874835]
cifar_mean_RGB = [0.4914, 0.4822, 0.4465]
cifar_std_RGB = [0.2023, 0.1994, 0.2010]

In [19]:
normalize = transforms.Normalize(mean=imagenet_mean_RGB, std=imagenet_std_RGB)

In [20]:
IMG_SIZE = 32
IMG_SIZE = 224

### Initialize the model using the architecture you selected above

In [21]:
# select the model
model = ARCH

### Send the model to the cuda device

In [22]:
# send the model to the cuda device..
model = model.to(device)

### Instantiate the loss to cross entropy

In [23]:
# use the cross-entropy loss
criterion = nn.CrossEntropyLoss()# .cuda(GPU)

### Instantiate the optimizer to SGD

In [24]:
# use SGD .. use the momentum and weight decay vars
optimizer = opt.SGD(model.parameters(), lr=LR, momentum=0.9, weight_decay=WEIGHT_DECAY)

#### Create the learning rate scheduler

In [25]:
# use CosineAnnealingLR
scheduler =  torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=200)

### Create the train dataset object

In [26]:
transform_train = transforms.Compose([
    transforms.RandomResizedCrop(IMG_SIZE),# padding=4),
    #transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(imagenet_mean_RGB, imagenet_std_RGB),
])


In [27]:
train_dataset = torchvision.datasets.ImageFolder(TRAINDIR, transform=transform_train)

### Create the val dataset object

In [28]:
transform_val = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(IMG_SIZE),
    transforms.ToTensor(),
    transforms.Normalize(imagenet_mean_RGB, imagenet_std_RGB),
])

In [29]:
val_dataset = torchvision.datasets.ImageFolder(VALDIR, transform=transform_val)

### Create the train dataloader

In [30]:
# fill this in
train_loader = torch.utils.data.DataLoader(train_dataset,batch_size=256, num_workers=WORKERS, shuffle=True)
#train_loader = torch.utils.data.DataLoader(train_dataset,batch_size=128, shuffle=True)

### Create the c

In [31]:
# fill this in..
val_loader = torch.utils.data.DataLoader(val_dataset,  batch_size=128, num_workers=WORKERS, shuffle=True) 
#val_loader = torch.utils.data.DataLoader(val_dataset,  batch_size=128, shuffle=True) 

In [32]:
best_acc1 = 0

In [None]:
for epoch in range(START_EPOCH, EPOCHS):
#    adjust_learning_rate(optimizer, epoch)

    # train for one epoch
    train(train_loader, model, criterion, optimizer, epoch, device)

    # evaluate on validation set
    acc1 = validate(val_loader, model, criterion, device)

    # remember best acc@1 and save checkpoint
    is_best = acc1 > best_acc1
    best_acc1 = max(acc1, best_acc1)


    save_checkpoint({
        'epoch': epoch + 1,
        'arch': ARCH,
        'state_dict': model.state_dict(),
        'best_acc1': best_acc1,
        'optimizer' : optimizer.state_dict(),
    }, is_best)
    
    scheduler.step()
    print('lr: ' + str(scheduler.get_last_lr()))

Epoch: [0][   0/5005]	Time 22.242 (22.242)	Data 13.711 (13.711)	Loss 7.0529e+00 (7.0529e+00)	Acc@1   0.39 (  0.39)	Acc@5   0.39 (  0.39)
Epoch: [0][  50/5005]	Time  0.707 ( 3.165)	Data  0.050 ( 2.322)	Loss 6.8932e+00 (7.0212e+00)	Acc@1   0.39 (  0.21)	Acc@5   1.17 (  0.91)
Epoch: [0][ 100/5005]	Time  5.155 ( 3.038)	Data  4.546 ( 2.294)	Loss 6.7537e+00 (6.9336e+00)	Acc@1   0.00 (  0.30)	Acc@5   2.34 (  1.25)
Epoch: [0][ 150/5005]	Time  2.179 ( 2.983)	Data  1.343 ( 2.262)	Loss 6.7715e+00 (6.8680e+00)	Acc@1   0.39 (  0.35)	Acc@5   1.95 (  1.54)
Epoch: [0][ 200/5005]	Time  4.048 ( 2.955)	Data  3.708 ( 2.258)	Loss 6.5739e+00 (6.8106e+00)	Acc@1   0.00 (  0.44)	Acc@5   4.69 (  1.88)
Epoch: [0][ 250/5005]	Time  3.326 ( 2.935)	Data  3.021 ( 2.259)	Loss 6.6304e+00 (6.7583e+00)	Acc@1   0.39 (  0.51)	Acc@5   2.34 (  2.12)
Epoch: [0][ 300/5005]	Time  4.908 ( 2.912)	Data  4.381 ( 2.255)	Loss 6.5394e+00 (6.7125e+00)	Acc@1   1.56 (  0.59)	Acc@5   2.34 (  2.36)
Epoch: [0][ 350/5005]	Time  6.037 ( 2.904

Epoch: [0][3000/5005]	Time  1.265 ( 2.842)	Data  0.705 ( 2.242)	Loss 4.6066e+00 (5.3812e+00)	Acc@1  12.89 (  7.12)	Acc@5  30.47 ( 18.58)
Epoch: [0][3050/5005]	Time 10.315 ( 2.846)	Data  9.537 ( 2.246)	Loss 4.6969e+00 (5.3664e+00)	Acc@1  13.28 (  7.25)	Acc@5  28.12 ( 18.83)
Epoch: [0][3100/5005]	Time  0.761 ( 2.847)	Data  0.061 ( 2.246)	Loss 4.2836e+00 (5.3510e+00)	Acc@1  20.31 (  7.38)	Acc@5  40.62 ( 19.08)
Epoch: [0][3150/5005]	Time  5.493 ( 2.849)	Data  4.786 ( 2.248)	Loss 4.2723e+00 (5.3369e+00)	Acc@1  15.23 (  7.49)	Acc@5  38.67 ( 19.31)
Epoch: [0][3200/5005]	Time  0.662 ( 2.850)	Data  0.054 ( 2.249)	Loss 4.3796e+00 (5.3226e+00)	Acc@1  13.28 (  7.61)	Acc@5  36.33 ( 19.55)
Epoch: [0][3250/5005]	Time  5.662 ( 2.852)	Data  4.952 ( 2.251)	Loss 4.2608e+00 (5.3085e+00)	Acc@1  15.62 (  7.74)	Acc@5  34.77 ( 19.79)
Epoch: [0][3300/5005]	Time  0.650 ( 2.853)	Data  0.047 ( 2.253)	Loss 4.4065e+00 (5.2944e+00)	Acc@1  13.67 (  7.86)	Acc@5  36.33 ( 20.03)
Epoch: [0][3350/5005]	Time  7.101 ( 2.856

Epoch: [1][ 600/5005]	Time  4.555 ( 2.166)	Data  3.915 ( 1.382)	Loss 3.8053e+00 (3.7637e+00)	Acc@1  26.95 ( 24.07)	Acc@5  48.83 ( 47.22)
Epoch: [1][ 650/5005]	Time  0.735 ( 2.166)	Data  0.064 ( 1.387)	Loss 3.6665e+00 (3.7587e+00)	Acc@1  25.78 ( 24.20)	Acc@5  48.83 ( 47.34)
Epoch: [1][ 700/5005]	Time  0.883 ( 2.167)	Data  0.312 ( 1.389)	Loss 3.8535e+00 (3.7549e+00)	Acc@1  19.14 ( 24.21)	Acc@5  45.70 ( 47.43)
Epoch: [1][ 750/5005]	Time  0.919 ( 2.172)	Data  0.124 ( 1.399)	Loss 3.5611e+00 (3.7523e+00)	Acc@1  26.56 ( 24.26)	Acc@5  49.22 ( 47.46)
Epoch: [1][ 800/5005]	Time  4.100 ( 2.177)	Data  3.378 ( 1.407)	Loss 3.5669e+00 (3.7458e+00)	Acc@1  28.12 ( 24.36)	Acc@5  53.52 ( 47.57)
Epoch: [1][ 850/5005]	Time  0.789 ( 2.179)	Data  0.105 ( 1.413)	Loss 3.6659e+00 (3.7418e+00)	Acc@1  26.17 ( 24.41)	Acc@5  47.27 ( 47.65)
Epoch: [1][ 900/5005]	Time  4.726 ( 2.184)	Data  4.085 ( 1.419)	Loss 3.5682e+00 (3.7353e+00)	Acc@1  29.69 ( 24.51)	Acc@5  51.95 ( 47.78)
Epoch: [1][ 950/5005]	Time  0.749 ( 2.189

Epoch: [1][3600/5005]	Time  4.056 ( 2.443)	Data  3.134 ( 1.764)	Loss 3.3171e+00 (3.5221e+00)	Acc@1  34.77 ( 27.90)	Acc@5  53.91 ( 51.81)
Epoch: [1][3650/5005]	Time  0.846 ( 2.444)	Data  0.060 ( 1.766)	Loss 3.3679e+00 (3.5196e+00)	Acc@1  33.20 ( 27.95)	Acc@5  56.64 ( 51.86)
Epoch: [1][3700/5005]	Time  7.206 ( 2.446)	Data  6.531 ( 1.768)	Loss 3.3351e+00 (3.5163e+00)	Acc@1  30.47 ( 27.99)	Acc@5  58.98 ( 51.93)
Epoch: [1][3750/5005]	Time  0.693 ( 2.447)	Data  0.052 ( 1.769)	Loss 2.9792e+00 (3.5130e+00)	Acc@1  33.59 ( 28.05)	Acc@5  58.20 ( 51.99)
Epoch: [1][3800/5005]	Time  2.664 ( 2.449)	Data  2.079 ( 1.772)	Loss 3.4142e+00 (3.5103e+00)	Acc@1  30.08 ( 28.09)	Acc@5  53.52 ( 52.03)
Epoch: [1][3850/5005]	Time  0.783 ( 2.450)	Data  0.070 ( 1.774)	Loss 3.2203e+00 (3.5071e+00)	Acc@1  33.20 ( 28.14)	Acc@5  55.47 ( 52.09)
Epoch: [1][3900/5005]	Time  2.154 ( 2.451)	Data  1.616 ( 1.776)	Loss 3.2250e+00 (3.5039e+00)	Acc@1  35.55 ( 28.20)	Acc@5  55.08 ( 52.15)
Epoch: [1][3950/5005]	Time  0.832 ( 2.452

Epoch: [2][1200/5005]	Time  4.466 ( 2.234)	Data  3.913 ( 1.486)	Loss 3.1188e+00 (3.0588e+00)	Acc@1  34.38 ( 35.25)	Acc@5  62.89 ( 60.21)
Epoch: [2][1250/5005]	Time  5.122 ( 2.241)	Data  4.519 ( 1.495)	Loss 2.9423e+00 (3.0562e+00)	Acc@1  36.72 ( 35.31)	Acc@5  61.33 ( 60.27)
Epoch: [2][1300/5005]	Time  2.356 ( 2.251)	Data  1.545 ( 1.505)	Loss 2.7257e+00 (3.0560e+00)	Acc@1  37.50 ( 35.33)	Acc@5  62.50 ( 60.28)
Epoch: [2][1350/5005]	Time  5.990 ( 2.259)	Data  5.458 ( 1.515)	Loss 3.1533e+00 (3.0547e+00)	Acc@1  32.42 ( 35.36)	Acc@5  56.25 ( 60.29)
Epoch: [2][1400/5005]	Time  4.587 ( 2.266)	Data  3.984 ( 1.523)	Loss 2.7450e+00 (3.0532e+00)	Acc@1  40.23 ( 35.38)	Acc@5  66.41 ( 60.31)
Epoch: [2][1450/5005]	Time  6.266 ( 2.272)	Data  5.457 ( 1.532)	Loss 3.1138e+00 (3.0510e+00)	Acc@1  31.25 ( 35.42)	Acc@5  59.77 ( 60.36)
Epoch: [2][1500/5005]	Time  2.204 ( 2.278)	Data  1.642 ( 1.539)	Loss 3.2234e+00 (3.0500e+00)	Acc@1  31.25 ( 35.44)	Acc@5  58.98 ( 60.40)
Epoch: [2][1550/5005]	Time  4.851 ( 2.288

Epoch: [2][4200/5005]	Time  0.876 ( 2.467)	Data  0.064 ( 1.778)	Loss 2.9816e+00 (2.9676e+00)	Acc@1  35.94 ( 36.89)	Acc@5  61.33 ( 61.86)
Epoch: [2][4250/5005]	Time  3.274 ( 2.469)	Data  2.675 ( 1.781)	Loss 3.0931e+00 (2.9664e+00)	Acc@1  31.64 ( 36.91)	Acc@5  57.81 ( 61.89)
Epoch: [2][4300/5005]	Time  0.734 ( 2.470)	Data  0.076 ( 1.781)	Loss 2.8952e+00 (2.9646e+00)	Acc@1  37.50 ( 36.94)	Acc@5  64.84 ( 61.92)
Epoch: [2][4350/5005]	Time  2.525 ( 2.471)	Data  1.873 ( 1.783)	Loss 2.8592e+00 (2.9634e+00)	Acc@1  41.80 ( 36.96)	Acc@5  64.06 ( 61.94)
Epoch: [2][4400/5005]	Time  0.763 ( 2.472)	Data  0.073 ( 1.784)	Loss 2.7938e+00 (2.9616e+00)	Acc@1  40.23 ( 36.99)	Acc@5  64.45 ( 61.98)
Epoch: [2][4450/5005]	Time  1.548 ( 2.473)	Data  1.025 ( 1.786)	Loss 2.8506e+00 (2.9608e+00)	Acc@1  40.62 ( 37.01)	Acc@5  63.67 ( 61.99)
Epoch: [2][4500/5005]	Time  1.566 ( 2.474)	Data  0.875 ( 1.787)	Loss 2.9940e+00 (2.9593e+00)	Acc@1  38.67 ( 37.04)	Acc@5  58.98 ( 62.02)
Epoch: [2][4550/5005]	Time  1.329 ( 2.475

Epoch: [3][1800/5005]	Time  0.787 ( 2.329)	Data  0.059 ( 1.587)	Loss 2.5606e+00 (2.7351e+00)	Acc@1  43.36 ( 40.85)	Acc@5  67.58 ( 65.82)
Epoch: [3][1850/5005]	Time  2.718 ( 2.335)	Data  2.104 ( 1.596)	Loss 2.5862e+00 (2.7343e+00)	Acc@1  44.92 ( 40.87)	Acc@5  67.58 ( 65.83)
Epoch: [3][1900/5005]	Time  0.683 ( 2.342)	Data  0.075 ( 1.605)	Loss 2.6171e+00 (2.7341e+00)	Acc@1  41.02 ( 40.89)	Acc@5  66.41 ( 65.84)
Epoch: [3][1950/5005]	Time  2.803 ( 2.346)	Data  2.245 ( 1.612)	Loss 2.7786e+00 (2.7324e+00)	Acc@1  40.23 ( 40.93)	Acc@5  62.11 ( 65.86)
Epoch: [3][2000/5005]	Time  2.540 ( 2.353)	Data  1.945 ( 1.621)	Loss 2.7881e+00 (2.7319e+00)	Acc@1  42.97 ( 40.94)	Acc@5  62.89 ( 65.88)
Epoch: [3][2050/5005]	Time  0.688 ( 2.358)	Data  0.055 ( 1.627)	Loss 2.4679e+00 (2.7309e+00)	Acc@1  46.88 ( 40.95)	Acc@5  66.80 ( 65.90)
Epoch: [3][2100/5005]	Time  3.223 ( 2.364)	Data  2.393 ( 1.635)	Loss 2.7864e+00 (2.7305e+00)	Acc@1  37.11 ( 40.95)	Acc@5  66.02 ( 65.91)
Epoch: [3][2150/5005]	Time  0.707 ( 2.367

Epoch: [3][4800/5005]	Time  6.512 ( 2.484)	Data  5.960 ( 1.797)	Loss 2.3553e+00 (2.6972e+00)	Acc@1  46.88 ( 41.59)	Acc@5  74.61 ( 66.50)
Epoch: [3][4850/5005]	Time  0.728 ( 2.484)	Data  0.052 ( 1.797)	Loss 2.4828e+00 (2.6966e+00)	Acc@1  41.80 ( 41.60)	Acc@5  68.36 ( 66.50)
Epoch: [3][4900/5005]	Time  3.405 ( 2.485)	Data  2.770 ( 1.799)	Loss 2.7399e+00 (2.6959e+00)	Acc@1  41.80 ( 41.61)	Acc@5  66.80 ( 66.52)
Epoch: [3][4950/5005]	Time  2.101 ( 2.485)	Data  1.384 ( 1.799)	Loss 2.6451e+00 (2.6955e+00)	Acc@1  41.41 ( 41.62)	Acc@5  66.41 ( 66.52)
Epoch: [3][5000/5005]	Time  1.024 ( 2.487)	Data  0.081 ( 1.801)	Loss 2.4337e+00 (2.6954e+00)	Acc@1  47.66 ( 41.62)	Acc@5  68.75 ( 66.52)
Test: [  0/391]	Time  6.376 ( 6.376)	Loss 2.7721e+00 (2.7721e+00)	Acc@1  33.59 ( 33.59)	Acc@5  64.84 ( 64.84)
Test: [ 50/391]	Time  0.520 ( 1.268)	Loss 2.6253e+00 (2.3224e+00)	Acc@1  44.53 ( 46.94)	Acc@5  70.31 ( 72.84)
Test: [100/391]	Time  2.216 ( 1.222)	Loss 2.0360e+00 (2.3514e+00)	Acc@1  48.44 ( 46.55)	Acc@5  

Epoch: [4][2400/5005]	Time  3.481 ( 2.378)	Data  2.963 ( 1.675)	Loss 2.4115e+00 (2.5616e+00)	Acc@1  40.62 ( 44.00)	Acc@5  73.44 ( 68.75)
Epoch: [4][2450/5005]	Time  0.681 ( 2.382)	Data  0.071 ( 1.679)	Loss 2.3695e+00 (2.5617e+00)	Acc@1  49.22 ( 44.00)	Acc@5  71.09 ( 68.76)
Epoch: [4][2500/5005]	Time  2.402 ( 2.386)	Data  1.843 ( 1.685)	Loss 2.4402e+00 (2.5615e+00)	Acc@1  46.48 ( 44.01)	Acc@5  71.48 ( 68.76)
Epoch: [4][2550/5005]	Time  0.753 ( 2.389)	Data  0.056 ( 1.690)	Loss 2.7044e+00 (2.5617e+00)	Acc@1  41.80 ( 44.01)	Acc@5  63.28 ( 68.77)
Epoch: [4][2600/5005]	Time  1.996 ( 2.392)	Data  1.416 ( 1.694)	Loss 2.6187e+00 (2.5620e+00)	Acc@1  40.62 ( 44.00)	Acc@5  66.80 ( 68.76)
Epoch: [4][2650/5005]	Time  0.746 ( 2.396)	Data  0.099 ( 1.698)	Loss 2.6953e+00 (2.5620e+00)	Acc@1  39.84 ( 44.00)	Acc@5  66.02 ( 68.75)
Epoch: [4][2700/5005]	Time  0.946 ( 2.399)	Data  0.065 ( 1.702)	Loss 2.3949e+00 (2.5613e+00)	Acc@1  45.70 ( 44.01)	Acc@5  70.70 ( 68.76)
Epoch: [4][2750/5005]	Time  0.730 ( 2.400

Epoch: [5][   0/5005]	Time  9.281 ( 9.281)	Data  8.309 ( 8.309)	Loss 2.4392e+00 (2.4392e+00)	Acc@1  49.22 ( 49.22)	Acc@5  68.36 ( 68.36)
Epoch: [5][  50/5005]	Time  1.107 ( 2.224)	Data  0.158 ( 1.406)	Loss 2.4006e+00 (2.4550e+00)	Acc@1  44.92 ( 46.13)	Acc@5  71.09 ( 70.68)
Epoch: [5][ 100/5005]	Time  4.989 ( 2.223)	Data  4.157 ( 1.406)	Loss 2.3558e+00 (2.4547e+00)	Acc@1  43.75 ( 46.13)	Acc@5  73.83 ( 70.87)
Epoch: [5][ 150/5005]	Time  0.944 ( 2.182)	Data  0.110 ( 1.369)	Loss 2.1387e+00 (2.4592e+00)	Acc@1  52.34 ( 46.04)	Acc@5  76.56 ( 70.60)
Epoch: [5][ 200/5005]	Time  4.264 ( 2.181)	Data  3.496 ( 1.379)	Loss 2.5388e+00 (2.4617e+00)	Acc@1  44.92 ( 45.79)	Acc@5  69.14 ( 70.67)
Epoch: [5][ 250/5005]	Time  0.817 ( 2.175)	Data  0.056 ( 1.376)	Loss 2.3952e+00 (2.4589e+00)	Acc@1  47.66 ( 45.83)	Acc@5  72.66 ( 70.64)
Epoch: [5][ 300/5005]	Time  0.926 ( 2.172)	Data  0.077 ( 1.379)	Loss 2.1280e+00 (2.4603e+00)	Acc@1  51.17 ( 45.87)	Acc@5  75.39 ( 70.57)
Epoch: [5][ 350/5005]	Time  5.542 ( 2.182

Epoch: [5][3000/5005]	Time  0.789 ( 2.442)	Data  0.053 ( 1.735)	Loss 2.3394e+00 (2.4465e+00)	Acc@1  44.14 ( 46.12)	Acc@5  70.70 ( 70.69)
Epoch: [5][3050/5005]	Time  8.420 ( 2.446)	Data  7.690 ( 1.739)	Loss 2.3891e+00 (2.4461e+00)	Acc@1  51.95 ( 46.13)	Acc@5  73.05 ( 70.70)
Epoch: [5][3100/5005]	Time  0.840 ( 2.447)	Data  0.062 ( 1.740)	Loss 2.5006e+00 (2.4456e+00)	Acc@1  44.92 ( 46.14)	Acc@5  67.58 ( 70.71)
Epoch: [5][3150/5005]	Time  6.822 ( 2.451)	Data  6.242 ( 1.744)	Loss 2.5316e+00 (2.4450e+00)	Acc@1  45.31 ( 46.16)	Acc@5  68.36 ( 70.72)
Epoch: [5][3200/5005]	Time  0.782 ( 2.452)	Data  0.058 ( 1.746)	Loss 2.4769e+00 (2.4448e+00)	Acc@1  46.09 ( 46.17)	Acc@5  70.70 ( 70.73)
Epoch: [5][3250/5005]	Time  0.809 ( 2.455)	Data  0.297 ( 1.749)	Loss 2.3243e+00 (2.4443e+00)	Acc@1  46.09 ( 46.17)	Acc@5  73.05 ( 70.73)
Epoch: [5][3300/5005]	Time  0.817 ( 2.456)	Data  0.097 ( 1.750)	Loss 2.4825e+00 (2.4442e+00)	Acc@1  42.58 ( 46.18)	Acc@5  71.09 ( 70.73)
Epoch: [5][3350/5005]	Time  7.171 ( 2.461

Epoch: [6][ 600/5005]	Time  0.858 ( 2.207)	Data  0.070 ( 1.404)	Loss 2.4628e+00 (2.3508e+00)	Acc@1  41.41 ( 47.83)	Acc@5  69.53 ( 72.17)
Epoch: [6][ 650/5005]	Time  6.801 ( 2.215)	Data  5.945 ( 1.415)	Loss 2.3403e+00 (2.3545e+00)	Acc@1  47.27 ( 47.77)	Acc@5  71.48 ( 72.09)
Epoch: [6][ 700/5005]	Time  0.831 ( 2.221)	Data  0.072 ( 1.419)	Loss 2.2688e+00 (2.3561e+00)	Acc@1  48.44 ( 47.78)	Acc@5  75.00 ( 72.07)
Epoch: [6][ 750/5005]	Time  3.688 ( 2.228)	Data  2.968 ( 1.428)	Loss 2.4339e+00 (2.3569e+00)	Acc@1  46.48 ( 47.76)	Acc@5  72.66 ( 72.07)
Epoch: [6][ 800/5005]	Time  1.016 ( 2.228)	Data  0.168 ( 1.433)	Loss 2.0985e+00 (2.3568e+00)	Acc@1  54.69 ( 47.76)	Acc@5  78.12 ( 72.08)
Epoch: [6][ 850/5005]	Time  5.320 ( 2.235)	Data  4.353 ( 1.443)	Loss 2.4793e+00 (2.3573e+00)	Acc@1  49.22 ( 47.74)	Acc@5  70.31 ( 72.08)
Epoch: [6][ 900/5005]	Time  0.788 ( 2.235)	Data  0.062 ( 1.448)	Loss 2.4368e+00 (2.3587e+00)	Acc@1  48.44 ( 47.73)	Acc@5  72.66 ( 72.07)
Epoch: [6][ 950/5005]	Time  4.520 ( 2.242

Epoch: [6][3600/5005]	Time  0.708 ( 2.493)	Data  0.066 ( 1.783)	Loss 2.5164e+00 (2.3675e+00)	Acc@1  46.88 ( 47.61)	Acc@5  67.97 ( 71.88)
Epoch: [6][3650/5005]	Time  8.369 ( 2.498)	Data  7.687 ( 1.788)	Loss 2.5876e+00 (2.3673e+00)	Acc@1  46.48 ( 47.61)	Acc@5  67.58 ( 71.88)
Epoch: [6][3700/5005]	Time  0.835 ( 2.500)	Data  0.054 ( 1.790)	Loss 2.5616e+00 (2.3673e+00)	Acc@1  43.36 ( 47.61)	Acc@5  66.02 ( 71.89)
Epoch: [6][3750/5005]	Time  7.117 ( 2.504)	Data  6.595 ( 1.794)	Loss 2.1406e+00 (2.3671e+00)	Acc@1  44.53 ( 47.61)	Acc@5  72.27 ( 71.89)
Epoch: [6][3800/5005]	Time  0.946 ( 2.507)	Data  0.058 ( 1.797)	Loss 2.3112e+00 (2.3668e+00)	Acc@1  49.22 ( 47.61)	Acc@5  73.83 ( 71.89)
Epoch: [6][3850/5005]	Time  7.763 ( 2.510)	Data  7.154 ( 1.801)	Loss 2.3276e+00 (2.3664e+00)	Acc@1  47.66 ( 47.62)	Acc@5  70.31 ( 71.90)
Epoch: [6][3900/5005]	Time  0.762 ( 2.511)	Data  0.091 ( 1.803)	Loss 2.2824e+00 (2.3665e+00)	Acc@1  51.17 ( 47.62)	Acc@5  75.00 ( 71.90)
Epoch: [6][3950/5005]	Time  7.823 ( 2.515

Epoch: [7][1200/5005]	Time  1.102 ( 2.438)	Data  0.230 ( 1.703)	Loss 2.3279e+00 (2.2930e+00)	Acc@1  49.61 ( 49.00)	Acc@5  73.83 ( 73.11)
Epoch: [7][1250/5005]	Time  0.927 ( 2.448)	Data  0.053 ( 1.716)	Loss 2.1985e+00 (2.2931e+00)	Acc@1  53.12 ( 49.00)	Acc@5  75.78 ( 73.10)
Epoch: [7][1300/5005]	Time  0.973 ( 2.454)	Data  0.109 ( 1.722)	Loss 2.3353e+00 (2.2956e+00)	Acc@1  47.66 ( 48.96)	Acc@5  73.83 ( 73.07)
Epoch: [7][1350/5005]	Time  1.004 ( 2.463)	Data  0.079 ( 1.733)	Loss 2.2373e+00 (2.2963e+00)	Acc@1  51.56 ( 48.95)	Acc@5  73.83 ( 73.04)
Epoch: [7][1400/5005]	Time  0.746 ( 2.471)	Data  0.085 ( 1.743)	Loss 2.4393e+00 (2.2967e+00)	Acc@1  46.88 ( 48.96)	Acc@5  72.66 ( 73.03)
Epoch: [7][1450/5005]	Time  0.873 ( 2.478)	Data  0.056 ( 1.753)	Loss 2.2680e+00 (2.2965e+00)	Acc@1  47.27 ( 48.94)	Acc@5  73.83 ( 73.03)
Epoch: [7][1500/5005]	Time  1.781 ( 2.486)	Data  1.050 ( 1.764)	Loss 2.2743e+00 (2.2971e+00)	Acc@1  50.78 ( 48.93)	Acc@5  73.83 ( 73.02)
Epoch: [7][1550/5005]	Time  0.733 ( 2.496

Epoch: [7][4200/5005]	Time  7.007 ( 2.729)	Data  6.429 ( 2.049)	Loss 2.1566e+00 (2.3049e+00)	Acc@1  51.17 ( 48.75)	Acc@5  78.12 ( 72.94)
Epoch: [7][4250/5005]	Time  0.740 ( 2.729)	Data  0.190 ( 2.050)	Loss 2.2931e+00 (2.3052e+00)	Acc@1  47.27 ( 48.74)	Acc@5  72.66 ( 72.93)
Epoch: [7][4300/5005]	Time  2.224 ( 2.731)	Data  1.597 ( 2.052)	Loss 2.1699e+00 (2.3050e+00)	Acc@1  49.61 ( 48.75)	Acc@5  74.22 ( 72.94)
Epoch: [7][4350/5005]	Time  3.175 ( 2.732)	Data  2.467 ( 2.054)	Loss 2.3523e+00 (2.3051e+00)	Acc@1  48.05 ( 48.74)	Acc@5  74.61 ( 72.94)
Epoch: [7][4400/5005]	Time  0.784 ( 2.734)	Data  0.096 ( 2.056)	Loss 2.2892e+00 (2.3053e+00)	Acc@1  50.39 ( 48.74)	Acc@5  73.44 ( 72.94)
Epoch: [7][4450/5005]	Time  0.491 ( 2.735)	Data  0.040 ( 2.057)	Loss 2.5566e+00 (2.3056e+00)	Acc@1  42.97 ( 48.74)	Acc@5  68.75 ( 72.93)
Epoch: [7][4500/5005]	Time  1.084 ( 2.738)	Data  0.125 ( 2.060)	Loss 2.2462e+00 (2.3053e+00)	Acc@1  51.17 ( 48.74)	Acc@5  75.78 ( 72.94)
Epoch: [7][4550/5005]	Time  2.710 ( 2.738

Epoch: [8][1800/5005]	Time  4.284 ( 2.466)	Data  3.534 ( 1.752)	Loss 2.2926e+00 (2.2521e+00)	Acc@1  48.44 ( 49.78)	Acc@5  73.05 ( 73.74)
Epoch: [8][1850/5005]	Time  5.336 ( 2.473)	Data  4.789 ( 1.761)	Loss 2.2019e+00 (2.2522e+00)	Acc@1  48.83 ( 49.77)	Acc@5  75.78 ( 73.74)
Epoch: [8][1900/5005]	Time  0.681 ( 2.476)	Data  0.070 ( 1.766)	Loss 2.4419e+00 (2.2529e+00)	Acc@1  43.75 ( 49.74)	Acc@5  69.92 ( 73.73)
Epoch: [8][1950/5005]	Time  7.939 ( 2.485)	Data  7.405 ( 1.775)	Loss 2.3245e+00 (2.2535e+00)	Acc@1  48.44 ( 49.73)	Acc@5  73.44 ( 73.72)
Epoch: [8][2000/5005]	Time  3.717 ( 2.488)	Data  3.070 ( 1.781)	Loss 2.2748e+00 (2.2546e+00)	Acc@1  52.73 ( 49.73)	Acc@5  72.66 ( 73.71)
Epoch: [8][2050/5005]	Time  4.995 ( 2.494)	Data  4.456 ( 1.787)	Loss 2.0125e+00 (2.2541e+00)	Acc@1  56.25 ( 49.73)	Acc@5  77.34 ( 73.73)
Epoch: [8][2100/5005]	Time  3.564 ( 2.498)	Data  3.008 ( 1.793)	Loss 2.4157e+00 (2.2534e+00)	Acc@1  46.88 ( 49.75)	Acc@5  70.31 ( 73.73)
Epoch: [8][2150/5005]	Time  0.814 ( 2.503

Epoch: [8][4800/5005]	Time  3.248 ( 2.618)	Data  2.621 ( 1.937)	Loss 2.2827e+00 (2.2601e+00)	Acc@1  50.00 ( 49.62)	Acc@5  76.56 ( 73.64)
Epoch: [8][4850/5005]	Time  0.738 ( 2.619)	Data  0.063 ( 1.938)	Loss 2.2579e+00 (2.2601e+00)	Acc@1  50.39 ( 49.62)	Acc@5  71.88 ( 73.64)
Epoch: [8][4900/5005]	Time  4.308 ( 2.620)	Data  3.640 ( 1.940)	Loss 2.1428e+00 (2.2602e+00)	Acc@1  53.52 ( 49.62)	Acc@5  77.34 ( 73.64)
Epoch: [8][4950/5005]	Time  0.795 ( 2.620)	Data  0.169 ( 1.940)	Loss 2.1846e+00 (2.2601e+00)	Acc@1  49.22 ( 49.63)	Acc@5  75.39 ( 73.64)
Epoch: [8][5000/5005]	Time  1.696 ( 2.621)	Data  1.226 ( 1.942)	Loss 2.1618e+00 (2.2599e+00)	Acc@1  50.39 ( 49.63)	Acc@5  72.27 ( 73.64)
Test: [  0/391]	Time  6.980 ( 6.980)	Loss 2.3854e+00 (2.3854e+00)	Acc@1  47.66 ( 47.66)	Acc@5  71.88 ( 71.88)
Test: [ 50/391]	Time  0.789 ( 1.326)	Loss 2.0662e+00 (2.0664e+00)	Acc@1  54.69 ( 52.68)	Acc@5  76.56 ( 77.19)
Test: [100/391]	Time  1.480 ( 1.266)	Loss 2.2900e+00 (2.0994e+00)	Acc@1  52.34 ( 51.83)	Acc@5  

Epoch: [9][2400/5005]	Time  6.687 ( 2.422)	Data  6.066 ( 1.688)	Loss 2.2177e+00 (2.2166e+00)	Acc@1  50.78 ( 50.44)	Acc@5  73.05 ( 74.27)
Epoch: [9][2450/5005]	Time  0.937 ( 2.424)	Data  0.070 ( 1.691)	Loss 2.1605e+00 (2.2167e+00)	Acc@1  48.44 ( 50.43)	Acc@5  74.22 ( 74.27)
Epoch: [9][2500/5005]	Time  6.766 ( 2.429)	Data  6.213 ( 1.697)	Loss 2.1803e+00 (2.2160e+00)	Acc@1  51.56 ( 50.44)	Acc@5  76.17 ( 74.29)
Epoch: [9][2550/5005]	Time  0.626 ( 2.431)	Data  0.082 ( 1.699)	Loss 2.1994e+00 (2.2161e+00)	Acc@1  51.56 ( 50.44)	Acc@5  75.39 ( 74.29)
Epoch: [9][2600/5005]	Time  3.931 ( 2.435)	Data  3.367 ( 1.704)	Loss 2.2677e+00 (2.2163e+00)	Acc@1  52.73 ( 50.44)	Acc@5  71.88 ( 74.28)
Epoch: [9][2650/5005]	Time  0.810 ( 2.437)	Data  0.187 ( 1.707)	Loss 2.2671e+00 (2.2169e+00)	Acc@1  50.78 ( 50.43)	Acc@5  75.00 ( 74.27)
Epoch: [9][2700/5005]	Time  3.129 ( 2.440)	Data  2.559 ( 1.711)	Loss 2.3571e+00 (2.2164e+00)	Acc@1  47.66 ( 50.43)	Acc@5  72.27 ( 74.27)
Epoch: [9][2750/5005]	Time  2.004 ( 2.443

Epoch: [10][   0/5005]	Time  9.168 ( 9.168)	Data  8.180 ( 8.180)	Loss 1.8972e+00 (1.8972e+00)	Acc@1  59.77 ( 59.77)	Acc@5  78.91 ( 78.91)
Epoch: [10][  50/5005]	Time  2.242 ( 2.250)	Data  1.603 ( 1.449)	Loss 2.1133e+00 (2.1383e+00)	Acc@1  50.39 ( 51.61)	Acc@5  74.61 ( 75.35)
Epoch: [10][ 100/5005]	Time  3.741 ( 2.221)	Data  3.011 ( 1.407)	Loss 2.1477e+00 (2.1495e+00)	Acc@1  50.78 ( 51.62)	Acc@5  77.73 ( 75.26)
Epoch: [10][ 150/5005]	Time  2.278 ( 2.208)	Data  1.666 ( 1.396)	Loss 2.3287e+00 (2.1525e+00)	Acc@1  44.53 ( 51.58)	Acc@5  70.31 ( 75.27)
Epoch: [10][ 200/5005]	Time  2.344 ( 2.196)	Data  1.543 ( 1.387)	Loss 2.1608e+00 (2.1532e+00)	Acc@1  53.52 ( 51.69)	Acc@5  74.22 ( 75.27)
Epoch: [10][ 250/5005]	Time  0.957 ( 2.198)	Data  0.107 ( 1.393)	Loss 2.0128e+00 (2.1526e+00)	Acc@1  53.12 ( 51.67)	Acc@5  78.12 ( 75.31)
Epoch: [10][ 300/5005]	Time  1.559 ( 2.198)	Data  0.654 ( 1.388)	Loss 2.1034e+00 (2.1583e+00)	Acc@1  48.83 ( 51.54)	Acc@5  76.56 ( 75.23)
Epoch: [10][ 350/5005]	Time  0.997

Epoch: [10][3000/5005]	Time  1.547 ( 2.470)	Data  0.930 ( 1.756)	Loss 2.2554e+00 (2.1818e+00)	Acc@1  50.39 ( 51.04)	Acc@5  73.44 ( 74.83)
Epoch: [10][3050/5005]	Time  0.710 ( 2.472)	Data  0.083 ( 1.759)	Loss 2.2246e+00 (2.1818e+00)	Acc@1  53.52 ( 51.04)	Acc@5  72.66 ( 74.83)
Epoch: [10][3100/5005]	Time  7.685 ( 2.475)	Data  7.100 ( 1.763)	Loss 2.2737e+00 (2.1820e+00)	Acc@1  51.17 ( 51.03)	Acc@5  72.66 ( 74.83)
Epoch: [10][3150/5005]	Time  6.396 ( 2.478)	Data  5.840 ( 1.765)	Loss 2.0423e+00 (2.1818e+00)	Acc@1  52.73 ( 51.04)	Acc@5  79.30 ( 74.84)
Epoch: [10][3200/5005]	Time  4.347 ( 2.479)	Data  3.599 ( 1.767)	Loss 2.0459e+00 (2.1816e+00)	Acc@1  51.17 ( 51.04)	Acc@5  78.91 ( 74.84)
Epoch: [10][3250/5005]	Time  0.764 ( 2.480)	Data  0.074 ( 1.769)	Loss 2.4216e+00 (2.1817e+00)	Acc@1  48.05 ( 51.04)	Acc@5  73.05 ( 74.85)
Epoch: [10][3300/5005]	Time  3.663 ( 2.482)	Data  2.949 ( 1.772)	Loss 2.0306e+00 (2.1818e+00)	Acc@1  53.91 ( 51.03)	Acc@5  75.39 ( 74.85)
Epoch: [10][3350/5005]	Time  2.226

Epoch: [11][ 600/5005]	Time  1.289 ( 2.199)	Data  0.535 ( 1.414)	Loss 2.2956e+00 (2.1254e+00)	Acc@1  48.44 ( 52.19)	Acc@5  74.61 ( 75.82)
Epoch: [11][ 650/5005]	Time  2.599 ( 2.204)	Data  1.882 ( 1.419)	Loss 2.2747e+00 (2.1284e+00)	Acc@1  48.83 ( 52.12)	Acc@5  73.83 ( 75.75)
Epoch: [11][ 700/5005]	Time  2.774 ( 2.208)	Data  2.105 ( 1.424)	Loss 2.3551e+00 (2.1318e+00)	Acc@1  48.05 ( 52.03)	Acc@5  71.88 ( 75.68)
Epoch: [11][ 750/5005]	Time  3.251 ( 2.208)	Data  2.639 ( 1.427)	Loss 2.0873e+00 (2.1330e+00)	Acc@1  54.69 ( 52.01)	Acc@5  75.00 ( 75.64)
Epoch: [11][ 800/5005]	Time  2.188 ( 2.214)	Data  1.502 ( 1.436)	Loss 2.1851e+00 (2.1340e+00)	Acc@1  51.56 ( 52.00)	Acc@5  73.44 ( 75.63)
Epoch: [11][ 850/5005]	Time  0.936 ( 2.216)	Data  0.159 ( 1.440)	Loss 2.1434e+00 (2.1361e+00)	Acc@1  50.00 ( 51.98)	Acc@5  75.00 ( 75.59)
Epoch: [11][ 900/5005]	Time  2.678 ( 2.222)	Data  1.997 ( 1.448)	Loss 2.0382e+00 (2.1366e+00)	Acc@1  53.12 ( 51.96)	Acc@5  75.78 ( 75.58)
Epoch: [11][ 950/5005]	Time  0.950

Epoch: [11][3600/5005]	Time  1.114 ( 2.467)	Data  0.066 ( 1.766)	Loss 2.0357e+00 (2.1603e+00)	Acc@1  52.34 ( 51.43)	Acc@5  79.30 ( 75.18)
Epoch: [11][3650/5005]	Time  7.685 ( 2.471)	Data  7.087 ( 1.769)	Loss 1.9162e+00 (2.1602e+00)	Acc@1  51.56 ( 51.43)	Acc@5  78.52 ( 75.19)
Epoch: [11][3700/5005]	Time  2.779 ( 2.471)	Data  2.180 ( 1.770)	Loss 1.9676e+00 (2.1606e+00)	Acc@1  51.95 ( 51.42)	Acc@5  79.69 ( 75.19)
Epoch: [11][3750/5005]	Time  5.017 ( 2.474)	Data  4.471 ( 1.773)	Loss 2.2330e+00 (2.1610e+00)	Acc@1  50.39 ( 51.41)	Acc@5  73.44 ( 75.19)
Epoch: [11][3800/5005]	Time  3.984 ( 2.475)	Data  3.275 ( 1.774)	Loss 1.9982e+00 (2.1614e+00)	Acc@1  51.56 ( 51.40)	Acc@5  77.34 ( 75.18)
Epoch: [11][3850/5005]	Time  8.903 ( 2.477)	Data  8.254 ( 1.777)	Loss 2.1812e+00 (2.1617e+00)	Acc@1  51.95 ( 51.40)	Acc@5  72.27 ( 75.17)
Epoch: [11][3900/5005]	Time  3.056 ( 2.478)	Data  2.264 ( 1.779)	Loss 2.2234e+00 (2.1621e+00)	Acc@1  49.22 ( 51.39)	Acc@5  72.27 ( 75.17)
Epoch: [11][3950/5005]	Time  4.722

Epoch: [12][1200/5005]	Time  0.801 ( 2.276)	Data  0.084 ( 1.498)	Loss 2.0624e+00 (2.1209e+00)	Acc@1  53.12 ( 52.28)	Acc@5  75.78 ( 75.81)
Epoch: [12][1250/5005]	Time  3.062 ( 2.283)	Data  2.509 ( 1.507)	Loss 2.2022e+00 (2.1233e+00)	Acc@1  50.78 ( 52.23)	Acc@5  74.61 ( 75.79)
Epoch: [12][1300/5005]	Time  0.851 ( 2.290)	Data  0.143 ( 1.516)	Loss 2.1926e+00 (2.1237e+00)	Acc@1  52.73 ( 52.23)	Acc@5  73.83 ( 75.78)
Epoch: [12][1350/5005]	Time  6.711 ( 2.299)	Data  6.161 ( 1.526)	Loss 2.3190e+00 (2.1254e+00)	Acc@1  45.31 ( 52.20)	Acc@5  71.88 ( 75.75)
Epoch: [12][1400/5005]	Time  0.662 ( 2.303)	Data  0.070 ( 1.533)	Loss 2.0069e+00 (2.1255e+00)	Acc@1  55.08 ( 52.19)	Acc@5  75.78 ( 75.75)
Epoch: [12][1450/5005]	Time  6.145 ( 2.313)	Data  5.542 ( 1.545)	Loss 2.0941e+00 (2.1274e+00)	Acc@1  55.08 ( 52.16)	Acc@5  76.95 ( 75.72)
Epoch: [12][1500/5005]	Time  0.746 ( 2.318)	Data  0.075 ( 1.552)	Loss 1.9116e+00 (2.1256e+00)	Acc@1  53.91 ( 52.18)	Acc@5  77.73 ( 75.75)
Epoch: [12][1550/5005]	Time  2.847

In [None]:
#from torchsummary import summary
print(model)

In [None]:
print(device)