In [1]:
import timm
from torchsummary import summary
import torch.nn as nn

In [2]:
encoder = timm.create_model('tf_efficientnetv2_m', pretrained=False, num_classes=0).cuda()

In [3]:
summary(encoder,(3,112,122))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
        Conv2dSame-1           [-1, 24, 56, 61]             648
       BatchNorm2d-2           [-1, 24, 56, 61]              48
              SiLU-3           [-1, 24, 56, 61]               0
            Conv2d-4           [-1, 24, 56, 61]           5,184
       BatchNorm2d-5           [-1, 24, 56, 61]              48
              SiLU-6           [-1, 24, 56, 61]               0
         ConvBnAct-7           [-1, 24, 56, 61]               0
            Conv2d-8           [-1, 24, 56, 61]           5,184
       BatchNorm2d-9           [-1, 24, 56, 61]              48
             SiLU-10           [-1, 24, 56, 61]               0
        ConvBnAct-11           [-1, 24, 56, 61]               0
           Conv2d-12           [-1, 24, 56, 61]           5,184
      BatchNorm2d-13           [-1, 24, 56, 61]              48
             SiLU-14           [-1, 24,

In [4]:
encoder

EfficientNet(
  (conv_stem): Conv2dSame(3, 24, kernel_size=(3, 3), stride=(2, 2), bias=False)
  (bn1): BatchNorm2d(24, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  (act1): SiLU(inplace=True)
  (blocks): Sequential(
    (0): Sequential(
      (0): ConvBnAct(
        (conv): Conv2d(24, 24, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(24, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
        (act1): SiLU(inplace=True)
      )
      (1): ConvBnAct(
        (conv): Conv2d(24, 24, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(24, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
        (act1): SiLU(inplace=True)
      )
      (2): ConvBnAct(
        (conv): Conv2d(24, 24, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(24, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
        (act1): SiLU(inp

In [5]:
class Identity(nn.Module):
    def __init__(self):
        super(Identity, self).__init__()
        
    def forward(self, x):
        return x

In [6]:
encoder.classifier = Identity()

In [7]:
summary(encoder,(3,112,122))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
        Conv2dSame-1           [-1, 24, 56, 61]             648
       BatchNorm2d-2           [-1, 24, 56, 61]              48
              SiLU-3           [-1, 24, 56, 61]               0
            Conv2d-4           [-1, 24, 56, 61]           5,184
       BatchNorm2d-5           [-1, 24, 56, 61]              48
              SiLU-6           [-1, 24, 56, 61]               0
         ConvBnAct-7           [-1, 24, 56, 61]               0
            Conv2d-8           [-1, 24, 56, 61]           5,184
       BatchNorm2d-9           [-1, 24, 56, 61]              48
             SiLU-10           [-1, 24, 56, 61]               0
        ConvBnAct-11           [-1, 24, 56, 61]               0
           Conv2d-12           [-1, 24, 56, 61]           5,184
      BatchNorm2d-13           [-1, 24, 56, 61]              48
             SiLU-14           [-1, 24,

          Conv2d-471           [-1, 1824, 1, 1]         140,448
   SqueezeExcite-472           [-1, 1824, 4, 4]               0
          Conv2d-473            [-1, 304, 4, 4]         554,496
     BatchNorm2d-474            [-1, 304, 4, 4]             608
InvertedResidual-475            [-1, 304, 4, 4]               0
          Conv2d-476           [-1, 1824, 4, 4]         554,496
     BatchNorm2d-477           [-1, 1824, 4, 4]           3,648
            SiLU-478           [-1, 1824, 4, 4]               0
          Conv2d-479           [-1, 1824, 4, 4]          16,416
     BatchNorm2d-480           [-1, 1824, 4, 4]           3,648
            SiLU-481           [-1, 1824, 4, 4]               0
          Conv2d-482             [-1, 76, 1, 1]         138,700
            SiLU-483             [-1, 76, 1, 1]               0
          Conv2d-484           [-1, 1824, 1, 1]         140,448
   SqueezeExcite-485           [-1, 1824, 4, 4]               0
          Conv2d-486            [-1, 304

In [8]:
import Model as md
import data as dta
import torch
import torch.optim as optim
from pytorch_metric_learning import losses
from torchvision import transforms as T
from torch.utils.data import DataLoader
from absl import app
from absl import flags
from absl import logging
import os
from tqdm import tqdm
from torchlars import LARS
from torch.cuda import amp
import pandas as pd
import math

In [9]:
proj_head = md.Projection_Head(1280).cuda()

In [10]:
def save_model(encoder, projection_head, epoch_number, optimizer, scheduler):
    torch.save({
        'encoder': encoder.state_dict(),
        'projection_head': projection_head.state_dict(),
        'epoch': epoch,
        'optimizer': optimizer.state_dict(),
        'scheduler': scheduler.state_dict()
    }, 'model/model.pt')

In [11]:
base_optimizer = optim.SGD(list(encoder.parameters()) + list(proj_head.parameters()), lr=0.35, weight_decay=1e-4,momentum=0.9)
optimizer = LARS(base_optimizer, trust_coef=0.001)
ntxent_loss = losses.NTXentLoss(temperature=0.15)

In [12]:
#Transformations
transf = T.Compose([
    T.CenterCrop(520),
    T.Resize(112),
    T.RandomVerticalFlip(),
    T.RandomHorizontalFlip(),
    T.RandomApply(
    [T.ColorJitter(brightness=(0.65,1.5), contrast=(0.65,1.3), saturation=(0.60,1.3), hue=0.2),
    T.RandomResizedCrop(112, scale=(0.2, 1.0))], p=0.8),
    #T.Resize(330),
    T.RandomGrayscale(p=0.2),
    T.RandomHorizontalFlip(p=0.5),
    T.RandomApply(
    [T.GaussianBlur(kernel_size=3, sigma=(0.1, 5.1))], p=0.5),
    T.ToTensor(),
])

In [13]:
astro_ds = dta.AstroDataset('nair_unbalanced_train.csv', 'imagenes_clasificadas_nair/', transform=transf)
dataset_astro = DataLoader(astro_ds,batch_size=64, shuffle=True,num_workers=6)

In [14]:
scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, (astro_ds.__len__() * 5 // 64 + 1))
scaler = amp.GradScaler()
df = pd.DataFrame(columns=['Epoch','ContrastiveLoss','ContrastiveAccuracy'])

In [15]:
for epoch in range(0, 5):
    acc_epoc = 0
    epoch_loss = 0
    #use tqdm
    tqdm_loop = tqdm(enumerate(dataset_astro), total=len(dataset_astro), leave=True)
    encoder.train()
    proj_head.train()
    for batch_idx, data in tqdm_loop:
        data = data.cuda()
        transformed_img1, transformed_img2 = torch.split(data, 3, dim=1)
        transformed_img1, transformed_img2 = transformed_img1.cuda(), transformed_img2.cuda()
        inputs = torch.cat((transformed_img1,transformed_img2),0)
        optimizer.zero_grad()
        with amp.autocast():
            projection = proj_head(encoder(inputs))
            pseudolabels = torch.arange(transformed_img1.size(0)).cuda()
            pseudolabels = torch.cat([pseudolabels, pseudolabels], dim=0)
            loss = ntxent_loss(projection, pseudolabels)
            hiddens = torch.split(projection,[projection.size(0)//2,projection.size(0)//2],dim=0)
            #print(hiddens[0].shape)
            logits = torch.matmul(hiddens[0], torch.transpose(hiddens[1], 0,1))/0.1
            #print(logits.shape)
            contrastive_acc = torch.argmax(logits,dim=1)
            contrastive_acc = torch.mean(torch.eq(pseudolabels[:projection.size(0)//2], contrastive_acc).float())
            #print(contrastive_acc)
            acc_epoc += contrastive_acc.item()
            epoch_loss += loss.item()
        scaler.scale(loss).backward()
        #loss.backward()
        scaler.step(optimizer)
        scaler.update()
        #optimizer.step()
        #epoch_loss += 
        #print(loss.item())
        scheduler.step()
        #update progress bar
        tqdm_loop.set_description(f'Epoch [{epoch}/{5}]')
        tqdm_loop.set_postfix(loss = loss.item())
    save_model(encoder, proj_head, epoch, optimizer, scheduler)
    acc_epoc = acc_epoc/(batch_idx+1)
    epoch_loss /= (batch_idx+1)
    d_list = [epoch, epoch_loss, acc_epoc]
    df.loc[len(df), :] = d_list
    print('Epoch: {}, Loss: {}, Contrastive Accuracy: {}'.format(epoch, epoch_loss, acc_epoc*100))

	add_(Number alpha, Tensor other)
Consider using one of the following signatures instead:
	add_(Tensor other, *, Number alpha) (Triggered internally at  ../torch/csrc/utils/python_arg_parser.cpp:1005.)
  p.grad.add_(weight_decay, p.data)
Epoch [0/5]: 100%|██████████| 147/147 [00:31<00:00,  4.63it/s, loss=3.2] 


Epoch: 0, Loss: 4.950950916121606, Contrastive Accuracy: 2.1583633459344203


Epoch [1/5]: 100%|██████████| 147/147 [00:30<00:00,  4.75it/s, loss=2.95]


Epoch: 1, Loss: 4.3200612343898435, Contrastive Accuracy: 2.333433373647482


Epoch [2/5]: 100%|██████████| 147/147 [00:30<00:00,  4.77it/s, loss=2.9] 


Epoch: 2, Loss: 4.111279161608949, Contrastive Accuracy: 3.053721491576863


Epoch [3/5]: 100%|██████████| 147/147 [00:30<00:00,  4.78it/s, loss=2.61]


Epoch: 3, Loss: 3.910673258255939, Contrastive Accuracy: 3.739620856687325


Epoch [4/5]: 100%|██████████| 147/147 [00:31<00:00,  4.68it/s, loss=2.1] 


Epoch: 4, Loss: 3.567338619102426, Contrastive Accuracy: 6.388805522805169
