[Source](https://github.com/giakoumoglou/classification/blob/main/notebooks/main_simclr.ipynb)

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F

from torchvision import datasets, transforms

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from src_supcon import TwoCropTransform, SupConLoss, supcon_train, supcon_train_step

In [2]:

device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)
if device=='cuda':
    print(torch.cuda.get_device_properties(0).name)


LOAD_MODELS = True

cpu


In [3]:
batch_size = 64

dataset = datasets.MNIST('./data', download=True, train=True, transform=TwoCropTransform())
dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True)


In [4]:

batch, label = next(iter(dataloader))

fig, axs = plt.subplots(8, 3, figsize=(3, 8))

for i in range(8):
    orig = batch[2][i][0]
    view_1 = batch[0][i][0]
    view_2 = batch[1][i][0]

    axs[i, 0].imshow(orig.numpy(), cmap='gray')
    if i==0:
        axs[i, 0].set_title('Original')
    axs[i, 0].axis('off')

    for j, view in enumerate([view_1, view_2], 1):
        
        axs[i, j].imshow(view.numpy(), cmap='gray')
        axs[i, j].axis('off')
        if i==0:
            axs[i, j].set_title('View {}'.format(j))

#remove space between subplots
plt.subplots_adjust(wspace=0, hspace=0)
plt.show()





In [5]:
class Encoder(nn.Module):

    "Encoder network"
    def __init__(self):
        super(Encoder, self).__init__()
    
        image_channels = 1
        self.encoder = nn.Sequential(
            nn.Conv2d(image_channels, 16, kernel_size=2, stride=2),
            nn.ReLU(),

            nn.Conv2d(16, 32, kernel_size=2, stride=2),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            
            nn.Flatten()
        )

    def forward(self, x):
        return self.encoder(x)
    
class ProjectionHead(nn.Module):
    "Projection head"
    def __init__(self, input_dim=288, hidden_dim=128, output_dim=64):
        super(ProjectionHead, self).__init__()
        self.projection_head = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, output_dim)
        )

    def forward(self, x):
        return self.projection_head(x)
    

class SupCon(nn.Module):
    """encoder + projection head"""
    def __init__(self, encoder, head):
        super(SupCon, self).__init__()
        self.encoder = encoder
        self.head = head

    def forward(self, x):
        x = self.encoder(x)
        x = F.normalize(self.head(x), dim=1)
        return x


In [6]:

lr = 1e-3
temperature = 0.07


encoder = Encoder().to(device)
head = ProjectionHead().to(device)
model = SupCon(encoder=encoder, head = head).to(device)

if LOAD_MODELS:
    try:
        #vae.load_state_dict(torch.load('./models/vae'))
        if device == 'cuda':
            model.load_state_dict(torch.load('models/supcon'))
        if device == 'cpu':
            model.load_state_dict(torch.load('models/supcon', map_location=torch.device('cpu')))
        history = pd.read_csv('models/supcon_history.csv')
        #history to dict
        history = history.to_dict(orient='list')
        print('Model loaded')

    except:
        print('Could not load model')
        history = None
        pass



criterion = SupConLoss(temperature=temperature, device=device)
optimizer = torch.optim.Adam(model.parameters(), lr=lr)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.5)



Could not load model


In [7]:
n_epochs = 5

history = supcon_train(n_epochs, 
                       model, criterion, optimizer, scheduler=scheduler,
                           dataloader_train=dataloader,
                           history=history,
                           device=device)


Epochs:  60%|██████    | 3/5 [02:50<01:53, 56.67s/it, loss=3.66]


Interrupted


ValueError: All arrays must be of the same length

In [8]:
plt.figure(figsize=(10, 5))
plt.plot(history['train_loss'], label='train loss')
plt.legend()

TypeError: 'NoneType' object is not subscriptable

-----

In [None]:
def pretraining(epoch, model, contrastive_loader, optimizer, criterion):
    "Contrastive pre-training over an epoch"

    model.train()
    for batch_idx, (data,labels) in enumerate(contrastive_loader):
        data = torch.cat([data[0], data[1]], dim=0)
        if torch.cuda.is_available():
            data,labels = data.cuda(), labels.cuda()
        data, labels = torch.autograd.Variable(data,False), torch.autograd.Variable(labels)
        bsz = labels.shape[0]
        features = model(data)
        f1, f2 = torch.split(features, [bsz, bsz], dim=0)
        features = torch.cat([f1.unsqueeze(1), f2.unsqueeze(1)], dim=1)
        
        loss = criterion(features)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    return loss.item()

# 4. Main

In [None]:
import os
def main():

    num_epochs = 5
    use_scheduler = True
    save_file = os.path.join('./models/', 'simclr.pth')
    if not os.path.isdir('./results/'):
         os.makedirs('./results/')

    contrastive_transform = transforms.Compose([
                                    transforms.RandomResizedCrop(size=(28, 28), scale = (0.5, 0.95)),
                                    transforms.RandomRotation(degrees=45),
                                    transforms.ToTensor(),
                                       ])
    

    train_transform = transforms.Compose([
                                       transforms.ToTensor(),
                                       ])
    valid_transform = transforms.Compose([
                                       transforms.ToTensor(),
                                       ])
    

    contrastive_set = datasets.MNIST('./data', download=True, train=True, transform=TwoCropTransform(contrastive_transform))

    contrastive_loader = torch.utils.data.DataLoader(contrastive_set, batch_size=64, shuffle=True)

    # Part 1
    encoder = Encoder()
    head = ProjectionHead()
    model = SupCon(encoder, head)

    criterion = SupConLoss(temperature=0.07)
    if torch.cuda.is_available():
        model = model.cuda()
        criterion = criterion.cuda()
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-3, betas=(0.9, 0.999), eps=1e-08, weight_decay=1e-3)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.9)

    contrastive_loss, contrastive_lr = [], []

    for epoch in range(1, num_epochs+1):
        print('Epoch:', epoch, '/', num_epochs)
        loss  = pretraining(epoch, model, contrastive_loader, optimizer, criterion)
        if use_scheduler:
            scheduler.step()
        contrastive_loss.append(loss)
        print('-->Loss', loss)


    plt.plot(range(1,len(contrastive_loss)+1),contrastive_loss, color='b', label = 'loss')
    plt.legend(), plt.ylabel('loss'), plt.xlabel('epochs'), plt.title('Loss'), plt.show()
    plt.xlabel('epochs'), plt.title('Accuracy'), plt.show()

In [None]:
%matplotlib inline
main()