In [1]:
import torch
import torch.nn as nn
import torch.optim
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import torchvision.models as models
from simsiam.model_factory import SimSiam
import neptune.new as neptune
import math
import time
from torchvision.models import alexnet, vgg13, resnet34, squeezenet1_1, mobilenet_v2, mobilenet_v3_small
from torch.backends import cudnn

Hiper-parámetros que tiene que definir el usuario

In [3]:
arch = 'mobilenet_v2' #Opciones: resnet34, mobilenet_v2, alexnet, vgg13 y squeezenet1_1
train_path = 'ImageWoof/train' #Path del dataset con las imágenes de train
val_path = 'ImageWoof/val'  #Path del dataset con las imágenes de val
model_path = 'Classifier_Checkpoints/ImageWoof/Linear_Classifier/Mobilenet/100epochs/' #Path donde se querrá guardar el modelo
#Path donde se ha guardado el modelo preentrenado con SimSiam
pretrained_path = 'simsiam/Unsupervised_Training_Checkpoints/ImageWoof/Mobilenet/checkpoint_100.pth'
batch_size = 256
epochs = 90 #número de épocas
lr = 0.4 #base_lr
weight_decay = 0.0
momentum = 0.9
gpu = 0 #gpu donde ejecutar
nombre = 'MobilenetV2ImageWoofSimSiamLinear100epochsPre'#nombre del experimento

Código

In [2]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)

cuda


In [4]:
class ProgressMeter(object):
    def __init__(self, num_batches, meters, prefix=""):
        self.batch_fmtstr = self._get_batch_fmtstr(num_batches)
        self.meters = meters
        self.prefix = prefix

    def display(self, batch):
        entries = [self.prefix + self.batch_fmtstr.format(batch)]
        entries += [str(meter) for meter in self.meters]
        print('\t'.join(entries))

    def _get_batch_fmtstr(self, num_batches):
        num_digits = len(str(num_batches // 1))
        fmt = '{:' + str(num_digits) + 'd}'
        return '[' + fmt + '/' + fmt.format(num_batches) + ']'

In [5]:
class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self, name, fmt=':f'):
        self.name = name
        self.fmt = fmt
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

    def __str__(self):
        fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})'
        return fmtstr.format(**self.__dict__)

In [6]:
def accuracy(output, target, topk=(1,)):
    """Computes the accuracy over the k top predictions for the specified values of k"""
    with torch.no_grad():
        maxk = max(topk)
        batch_size = target.size(0)

        _, pred = output.topk(maxk, 1, True, True)
        pred = pred.t()
        correct = pred.eq(target.view(1, -1).expand_as(pred))

        res = []
        for k in topk:
            correct_k = correct[:k].reshape(-1).float().sum(0, keepdim=True)
            res.append(correct_k.mul_(100.0 / batch_size))
        return res

In [7]:
def train(train_loader, model, criterion, optimizer, epoch):
    batch_time = AverageMeter('Time', ':6.3f')
    data_time = AverageMeter('Data', ':6.3f')
    losses = AverageMeter('Loss', ':.4e')
    top1 = AverageMeter('Acc@1', ':6.2f')
    top5 = AverageMeter('Acc@5', ':6.2f')
    progress = ProgressMeter(
        len(train_loader),
        [batch_time, data_time, losses, top1, top5],
        prefix="Epoch: [{}]".format(epoch))

    """
    Switch to eval mode:
    Under the protocol of linear classification on frozen features/models,
    it is not legitimate to change any part of the pre-trained model.
    BatchNorm in train mode may revise running mean/std (even if it receives
    no gradient), which are part of the model parameters too.
    """
    model.eval()

    end = time.time()
    for i, (images, target) in enumerate(train_loader):
        # measure data loading time
        data_time.update(time.time() - end)
        
        images = images.cuda(0, non_blocking=True)
        target = target.cuda(0, non_blocking=True)

        # compute output
        output = model(images)
        loss = criterion(output, target)

        # measure accuracy and record loss
        acc1, acc5 = accuracy(output, target, topk=(1, 5))
        losses.update(loss.item(), images.size(0))
        top1.update(acc1[0], images.size(0))
        top5.update(acc5[0], images.size(0))

        # compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()
        if i % 10 == 0:
            progress.display(i)
    
    return losses.avg, top1.avg, top5.avg

In [8]:
def validate(val_loader, model, criterion):
    batch_time = AverageMeter('Time', ':6.3f')
    losses = AverageMeter('Loss', ':.4e')
    top1 = AverageMeter('Acc@1', ':6.2f')
    top5 = AverageMeter('Acc@5', ':6.2f')
    progress = ProgressMeter(
        len(val_loader),
        [batch_time, losses, top1, top5],
        prefix='Test: ')

    # switch to evaluate mode
    model.eval()

    with torch.no_grad():
        end = time.time()
        for i, (images, target) in enumerate(val_loader):
            images = images.cuda(0, non_blocking=True)
            target = target.cuda(0, non_blocking=True)

            # compute output
            output = model(images)
            loss = criterion(output, target)

            # measure accuracy and record loss
            acc1, acc5 = accuracy(output, target, topk=(1, 5))
            losses.update(loss.item(), images.size(0))
            top1.update(acc1[0], images.size(0))
            top5.update(acc5[0], images.size(0))

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

        # TODO: this should also be done with the ProgressMeter
        print(' * Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}'
              .format(top1=top1, top5=top5))

    return losses.avg, top1.avg, top5.avg

In [9]:
def adjust_learning_rate(optimizer, init_lr, epoch, n_epochs):
    cur_lr = init_lr * 0.5 * (1. + math.cos(math.pi * epoch / n_epochs))
    for param_group in optimizer.param_groups:
        param_group['lr'] = cur_lr

In [10]:
def get_backbone(backbone_name):
        return {'alexnet': alexnet(num_classes=10),
                'mobilenet_v2': mobilenet_v2(num_classes=10),
                'squeezenet1_1': squeezenet1_1(num_classes=10),
                'resnet34': resnet34(num_classes=10),
                'vgg13': vgg13(num_classes=10)}[backbone_name]

In [11]:
transform_train = transforms.Compose(
    [transforms.RandomResizedCrop(224),
     transforms.RandomHorizontalFlip(),
     transforms.ToTensor(),
     transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])

transform_val = transforms.Compose(
    [transforms.Resize((256,256)),
     transforms.CenterCrop(224),
     transforms.ToTensor(),
     transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])

train_set = datasets.ImageFolder(train_path, transform=transform_train)
train_loader = torch.utils.data.DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=4, pin_memory=True)
val_set = datasets.ImageFolder(val_path, transform=transform_val)
val_loader = torch.utils.data.DataLoader(val_set, batch_size=batch_size, shuffle=False, num_workers=4, pin_memory=True)

In [12]:
model = get_backbone(arch)
#congelamos todos los parametros menos la fc clasificadora que entrenaremos
classifier = 'classifier'
if 'resnet' in arch:
    classifier = 'fc'
for name, param in model.named_parameters():
    if classifier not in name:
        param.requires_grad = False     
print(model)

MobileNetV2(
  (features): Sequential(
    (0): Conv2dNormActivation(
      (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU6(inplace=True)
    )
    (1): InvertedResidual(
      (conv): Sequential(
        (0): Conv2dNormActivation(
          (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
          (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (2): ReLU6(inplace=True)
        )
        (1): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (2): InvertedResidual(
      (conv): Sequential(
        (0): Conv2dNormActivation(
          (0): Conv2d(16, 96, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (1): BatchNorm2d(96, eps=

In [13]:
# inicializamos la fc clasificadora
if arch == 'resnet34':
    model.fc.weight.data.normal_(mean=0.0, std=0.01)
    model.fc.bias.data.zero_()
elif arch == 'alexnet':
    model.classifier[1].weight.data.normal_(mean=0.0, std=0.01)
    model.classifier[4].weight.data.normal_(mean=0.0, std=0.01)
    model.classifier[6].weight.data.normal_(mean=0.0, std=0.01)
    model.classifier[1].bias.data.zero_()
    model.classifier[4].bias.data.zero_()
    model.classifier[6].bias.data.zero_()
elif arch== 'vgg13':
    model.classifier[0].weight.data.normal_(mean=0.0, std=0.01)
    model.classifier[3].weight.data.normal_(mean=0.0, std=0.01)
    model.classifier[6].weight.data.normal_(mean=0.0, std=0.01)
    model.classifier[0].bias.data.zero_()
    model.classifier[3].bias.data.zero_()
    model.classifier[6].bias.data.zero_()
else:
    model.classifier[1].weight.data.normal_(mean=0.0, std=0.01)
    model.classifier[1].bias.data.zero_()

# cargamos el modelo autosupervisado preentrenado
checkpoint = torch.load(pretrained_path, map_location="cpu")
#obtenemos el diccionario con los parámetros preentrenados
state_dict = checkpoint.state_dict()

for k in list(state_dict.keys()):
    # Si el parámetro pertenece al backbone y no a la capa clasificadora
    if k.startswith('encoder') and clasifier not in k:
        #La renombramos
        state_dict[k[len("encoder."):]] = state_dict[k]
    # Borramos los parámetros que no son del backbone o los que hemos renombrado
    del state_dict[k]
#cargamos los parámetros del preentreno al linear classifier        
msg = model.load_state_dict(state_dict, strict=False)
print(msg)
#nos aseguramos que se hayan cargado todos menos los de la ultima FC
if arch == 'resnet34':
    assert set(msg.missing_keys) == {"fc.weight", "fc.bias"}
elif arch == 'alexnet':
    assert set(msg.missing_keys) == {"classifier.1.weight","classifier.4.weight", "classifier.6.weight", "classifier.1.bias", 'classifier.4.bias', 'classifier.6.bias'}
elif 'vgg' in arch:
    assert set(msg.missing_keys) == {"classifier.0.weight","classifier.3.weight", "classifier.6.weight", "classifier.0.bias", 'classifier.3.bias', 'classifier.6.bias'}
else:
    assert set(msg.missing_keys) == {"classifier.1.weight", "classifier.1.bias"}

print(model)
#lo enviamos a la GPU
if gpu is not None:
    torch.cuda.set_device(gpu)
    model = model.cuda(gpu)
    cudnn.benchmark = True

_IncompatibleKeys(missing_keys=['classifier.1.weight', 'classifier.1.bias'], unexpected_keys=[])
MobileNetV2(
  (features): Sequential(
    (0): Conv2dNormActivation(
      (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU6(inplace=True)
    )
    (1): InvertedResidual(
      (conv): Sequential(
        (0): Conv2dNormActivation(
          (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
          (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (2): ReLU6(inplace=True)
        )
        (1): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (2): InvertedResidual(
      (conv): Sequential(
        (0): Conv2dNormActivation(
          (0): 

In [14]:
init_lr = lr * batch_size / 256
criterion = nn.CrossEntropyLoss().cuda(0)
parameters = list(filter(lambda p: p.requires_grad, model.parameters()))

if arch == 'resnet34':
    assert len(parameters) == 2
elif arch == 'alexnet':
    assert len(parameters) == 6 
elif 'vgg' in arch:
    assert len(parameters) == 6
else:
    assert len(parameters) == 2

optimizer = torch.optim.SGD(parameters, init_lr, momentum=momentum, weight_decay=weight_decay)

In [15]:
best_val_top1 = 0.
best_val_top5 = 0.
#Empezamos entrenamiento y validación
torch.cuda.empty_cache()
for epoch in range(0, epochs):
    adjust_learning_rate(optimizer, init_lr, epoch, epochs)
    #Entrenamos
    train_loss, train_top1, train_top5 = train(train_loader, model, criterion, optimizer, epoch)
    #Validamos
    val_loss, val_top1, val_top5 = validate(val_loader, model, criterion)
    #Guardamos el mejor modelo
    if val_top1 > best_val_top1:
        best_val_top1 = val_top1
        name = 'best.pth'
        torch.save(model, model_path + name)
    if val_top5 > best_val_top5:
        best_val_top5 = val_top5
    print(f'EPOCH {epoch+1} ENDED: LOSS train {train_loss} LOSS val {val_loss} Train Acc {train_top1} Val Acc {val_top1}')

https://app.neptune.ai/franciscolm6/TFGInformatica/e/TFGIN-192




Remember to stop your run once you’ve finished logging your metadata (https://docs.neptune.ai/api/run#stop). It will be stopped automatically only when the notebook kernel/interactive console is terminated.
Epoch: [0][ 0/36]	Time 11.322 (11.322)	Data  7.383 ( 7.383)	Loss 2.3193e+00 (2.3193e+00)	Acc@1   8.59 (  8.59)	Acc@5  51.56 ( 51.56)
Epoch: [0][10/36]	Time  0.150 ( 1.192)	Data  0.000 ( 0.694)	Loss 1.5225e+01 (1.1510e+01)	Acc@1   8.98 ( 14.81)	Acc@5  56.25 ( 55.97)
Epoch: [0][20/36]	Time  0.154 ( 0.817)	Data  0.000 ( 0.479)	Loss 2.7640e+01 (1.6055e+01)	Acc@1  14.84 ( 14.92)	Acc@5  58.59 ( 57.74)
Epoch: [0][30/36]	Time  0.155 ( 0.658)	Data  0.000 ( 0.379)	Loss 1.6924e+01 (1.7126e+01)	Acc@1  20.70 ( 15.85)	Acc@5  64.06 ( 58.87)
 * Acc@1 16.722 Acc@5 65.411
EPOCH 1 ENDED: LOSS train 16.77627211679052 LOSS val 12.76944652905929 Train Acc 15.822714805603027 Val Acc 16.721811294555664
Epoch: [1][ 0/36]	Time  7.641 ( 7.641)	Data  7.152 ( 7.152)	Loss 1.2224e+01 (1.2224e+01)	Acc@1  20.31 ( 2

Epoch: [11][30/36]	Time  0.151 ( 0.590)	Data  0.000 ( 0.430)	Loss 1.5709e+01 (1.2042e+01)	Acc@1  18.75 ( 21.64)	Acc@5  56.25 ( 66.65)
 * Acc@1 14.482 Acc@5 59.939
EPOCH 12 ENDED: LOSS train 12.683971067941089 LOSS val 28.340907461195993 Train Acc 21.329639434814453 Val Acc 14.482056617736816
Epoch: [12][ 0/36]	Time  7.657 ( 7.657)	Data  7.243 ( 7.243)	Loss 2.9789e+01 (2.9789e+01)	Acc@1  13.28 ( 13.28)	Acc@5  58.59 ( 58.59)
Epoch: [12][10/36]	Time  0.151 ( 0.944)	Data  0.000 ( 0.768)	Loss 1.0441e+01 (1.9005e+01)	Acc@1  28.52 ( 20.42)	Acc@5  71.09 ( 64.20)
Epoch: [12][20/36]	Time  0.620 ( 0.670)	Data  0.470 ( 0.506)	Loss 9.6752e+00 (1.5266e+01)	Acc@1  21.88 ( 22.04)	Acc@5  68.36 ( 67.08)
Epoch: [12][30/36]	Time  0.151 ( 0.560)	Data  0.000 ( 0.399)	Loss 7.7842e+00 (1.2886e+01)	Acc@1  23.05 ( 22.66)	Acc@5  75.39 ( 68.85)
 * Acc@1 22.194 Acc@5 72.461
EPOCH 13 ENDED: LOSS train 12.149347737911995 LOSS val 8.236341570011682 Train Acc 22.8919677734375 Val Acc 22.193941116333008
Epoch: [13][ 0/

Epoch: [23][30/36]	Time  0.151 ( 0.572)	Data  0.000 ( 0.410)	Loss 1.0128e+01 (1.0312e+01)	Acc@1  20.70 ( 22.14)	Acc@5  66.80 ( 67.25)
 * Acc@1 18.503 Acc@5 68.058
EPOCH 24 ENDED: LOSS train 10.288529249809455 LOSS val 8.55571746692672 Train Acc 22.08310317993164 Val Acc 18.503435134887695
Epoch: [24][ 0/36]	Time  7.433 ( 7.433)	Data  7.095 ( 7.095)	Loss 7.9473e+00 (7.9473e+00)	Acc@1  19.14 ( 19.14)	Acc@5  66.02 ( 66.02)
Epoch: [24][10/36]	Time  0.184 ( 0.983)	Data  0.000 ( 0.806)	Loss 9.0503e+00 (8.7796e+00)	Acc@1  26.95 ( 23.72)	Acc@5  69.14 ( 66.26)
Epoch: [24][20/36]	Time  0.953 ( 0.724)	Data  0.803 ( 0.556)	Loss 1.1836e+01 (1.0824e+01)	Acc@1  20.70 ( 22.04)	Acc@5  64.06 ( 66.13)
Epoch: [24][30/36]	Time  0.151 ( 0.594)	Data  0.000 ( 0.431)	Loss 1.1128e+01 (1.0960e+01)	Acc@1  21.88 ( 22.08)	Acc@5  70.70 ( 66.60)
 * Acc@1 23.772 Acc@5 69.636
EPOCH 25 ENDED: LOSS train 10.715079062612434 LOSS val 9.84212564687809 Train Acc 22.09418296813965 Val Acc 23.77195167541504
Epoch: [25][ 0/36]	

Epoch: [35][30/36]	Time  0.153 ( 0.595)	Data  0.000 ( 0.416)	Loss 6.9892e+00 (1.0846e+01)	Acc@1  23.44 ( 21.69)	Acc@5  67.97 ( 67.59)
 * Acc@1 23.772 Acc@5 68.083
EPOCH 36 ENDED: LOSS train 10.3244233393471 LOSS val 8.678256783421089 Train Acc 22.00554084777832 Val Acc 23.77195167541504
Epoch: [36][ 0/36]	Time  8.364 ( 8.364)	Data  8.016 ( 8.016)	Loss 8.3860e+00 (8.3860e+00)	Acc@1  24.61 ( 24.61)	Acc@5  66.80 ( 66.80)
Epoch: [36][10/36]	Time  0.151 ( 1.034)	Data  0.000 ( 0.863)	Loss 5.1821e+00 (6.4603e+00)	Acc@1  25.00 ( 24.82)	Acc@5  72.66 ( 71.70)
Epoch: [36][20/36]	Time  0.893 ( 0.745)	Data  0.743 ( 0.583)	Loss 4.9793e+00 (5.6977e+00)	Acc@1  27.73 ( 26.02)	Acc@5  73.83 ( 73.42)
Epoch: [36][30/36]	Time  0.153 ( 0.609)	Data  0.000 ( 0.449)	Loss 4.9799e+00 (5.4106e+00)	Acc@1  28.12 ( 26.81)	Acc@5  73.44 ( 73.65)
 * Acc@1 26.877 Acc@5 77.501
EPOCH 37 ENDED: LOSS train 5.392438888972486 LOSS val 4.708356725923825 Train Acc 27.19113540649414 Val Acc 26.87706756591797
Epoch: [37][ 0/36]	Ti

Epoch: [47][30/36]	Time  0.151 ( 0.575)	Data  0.000 ( 0.416)	Loss 3.3969e+00 (3.9051e+00)	Acc@1  27.73 ( 28.05)	Acc@5  82.03 ( 76.45)
 * Acc@1 28.022 Acc@5 73.963
EPOCH 48 ENDED: LOSS train 3.8475734048933203 LOSS val 4.189121214889576 Train Acc 28.365652084350586 Val Acc 28.022396087646484
Epoch: [48][ 0/36]	Time  7.496 ( 7.496)	Data  7.086 ( 7.086)	Loss 4.2904e+00 (4.2904e+00)	Acc@1  25.00 ( 25.00)	Acc@5  73.05 ( 73.05)
Epoch: [48][10/36]	Time  0.496 ( 0.982)	Data  0.344 ( 0.807)	Loss 3.5219e+00 (3.6583e+00)	Acc@1  22.66 ( 27.34)	Acc@5  75.78 ( 76.53)
Epoch: [48][20/36]	Time  0.714 ( 0.698)	Data  0.555 ( 0.533)	Loss 3.0449e+00 (3.6040e+00)	Acc@1  35.94 ( 27.86)	Acc@5  84.77 ( 77.12)
Epoch: [48][30/36]	Time  0.441 ( 0.583)	Data  0.290 ( 0.423)	Loss 3.4562e+00 (3.6040e+00)	Acc@1  30.08 ( 27.95)	Acc@5  72.66 ( 76.83)
 * Acc@1 27.081 Acc@5 73.607
EPOCH 49 ENDED: LOSS train 3.549212274075875 LOSS val 3.9051431263333574 Train Acc 28.02216148376465 Val Acc 27.08068084716797
Epoch: [49][ 0/3

Epoch: [59][30/36]	Time  0.152 ( 0.582)	Data  0.001 ( 0.422)	Loss 2.7936e+00 (2.5499e+00)	Acc@1  28.12 ( 31.51)	Acc@5  79.69 ( 79.51)
 * Acc@1 30.797 Acc@5 79.893
EPOCH 60 ENDED: LOSS train 2.5695287156303173 LOSS val 2.543824288583524 Train Acc 31.335180282592773 Val Acc 30.796640396118164
Epoch: [60][ 0/36]	Time  7.693 ( 7.693)	Data  7.371 ( 7.371)	Loss 2.3448e+00 (2.3448e+00)	Acc@1  37.11 ( 37.11)	Acc@5  79.30 ( 79.30)
Epoch: [60][10/36]	Time  0.155 ( 1.010)	Data  0.000 ( 0.837)	Loss 2.5415e+00 (2.3698e+00)	Acc@1  24.61 ( 32.07)	Acc@5  78.91 ( 79.76)
Epoch: [60][20/36]	Time  0.886 ( 0.721)	Data  0.733 ( 0.558)	Loss 2.4575e+00 (2.3077e+00)	Acc@1  26.17 ( 33.07)	Acc@5  77.73 ( 80.99)
Epoch: [60][30/36]	Time  0.151 ( 0.604)	Data  0.000 ( 0.445)	Loss 2.2634e+00 (2.2907e+00)	Acc@1  33.20 ( 32.72)	Acc@5  83.59 ( 80.91)
 * Acc@1 28.786 Acc@5 78.850
EPOCH 61 ENDED: LOSS train 2.287807025473534 LOSS val 2.6924618650374748 Train Acc 32.86426544189453 Val Acc 28.78594970703125
Epoch: [61][ 0/3

Epoch: [71][30/36]	Time  0.151 ( 0.574)	Data  0.000 ( 0.410)	Loss 1.8531e+00 (1.9449e+00)	Acc@1  39.06 ( 36.77)	Acc@5  84.38 ( 83.67)
 * Acc@1 34.004 Acc@5 81.598
EPOCH 72 ENDED: LOSS train 1.9460276215888788 LOSS val 2.0486279622869477 Train Acc 36.66482162475586 Val Acc 34.003562927246094
Epoch: [72][ 0/36]	Time  7.465 ( 7.465)	Data  7.012 ( 7.012)	Loss 1.9183e+00 (1.9183e+00)	Acc@1  32.42 ( 32.42)	Acc@5  83.98 ( 83.98)
Epoch: [72][10/36]	Time  0.152 ( 0.973)	Data  0.001 ( 0.795)	Loss 1.9272e+00 (1.9837e+00)	Acc@1  38.28 ( 35.30)	Acc@5  83.20 ( 82.32)
Epoch: [72][20/36]	Time  1.159 ( 0.717)	Data  1.005 ( 0.545)	Loss 1.8610e+00 (1.9677e+00)	Acc@1  37.50 ( 35.64)	Acc@5  86.72 ( 83.05)
Epoch: [72][30/36]	Time  0.151 ( 0.599)	Data  0.000 ( 0.432)	Loss 2.0266e+00 (1.9590e+00)	Acc@1  29.69 ( 35.64)	Acc@5  83.20 ( 83.33)
 * Acc@1 33.953 Acc@5 81.166
EPOCH 73 ENDED: LOSS train 1.953398927413856 LOSS val 2.03623569491074 Train Acc 36.07756423950195 Val Acc 33.952659606933594
Epoch: [73][ 0/36

Epoch: [83][30/36]	Time  0.151 ( 0.585)	Data  0.000 ( 0.423)	Loss 1.9335e+00 (1.8511e+00)	Acc@1  36.33 ( 37.95)	Acc@5  81.64 ( 83.97)
 * Acc@1 35.760 Acc@5 83.075
EPOCH 84 ENDED: LOSS train 1.8568755827319918 LOSS val 1.9464833615359596 Train Acc 37.783935546875 Val Acc 35.759735107421875
Epoch: [84][ 0/36]	Time  7.622 ( 7.622)	Data  7.155 ( 7.155)	Loss 1.7339e+00 (1.7339e+00)	Acc@1  37.11 ( 37.11)	Acc@5  86.72 ( 86.72)
Epoch: [84][10/36]	Time  0.151 ( 0.951)	Data  0.000 ( 0.771)	Loss 1.8063e+00 (1.8280e+00)	Acc@1  37.89 ( 38.64)	Acc@5  83.20 ( 84.55)
Epoch: [84][20/36]	Time  0.737 ( 0.675)	Data  0.586 ( 0.508)	Loss 1.7621e+00 (1.8382e+00)	Acc@1  40.23 ( 38.39)	Acc@5  83.20 ( 83.98)
Epoch: [84][30/36]	Time  0.151 ( 0.577)	Data  0.000 ( 0.416)	Loss 1.9149e+00 (1.8326e+00)	Acc@1  38.28 ( 38.61)	Acc@5  83.98 ( 84.34)
 * Acc@1 35.607 Acc@5 83.075
EPOCH 85 ENDED: LOSS train 1.8336381215468007 LOSS val 1.9366622191405107 Train Acc 38.614959716796875 Val Acc 35.607025146484375
Epoch: [85][ 0/