In [None]:
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, random_split
from torchvision import datasets, transforms

In [None]:
from model import CNN
from losses import CombinedLoss

In [None]:
def get_device():
    if torch.cuda.is_available():
        # NVIDIA GPU
        device = torch.device("cuda")
        print(f"Using CUDA: {torch.cuda.get_device_name(0)}")
    elif torch.backends.mps.is_available() and torch.backends.mps.is_built():
        # Apple Silicon GPU (MPS)
        device = torch.device("mps")
        print("Using MPS (Apple Silicon GPU)")
    else:
        # Fallback to CPU
        device = torch.device("cpu")
        print("Using CPU")
    return device

In [None]:
DEVICE = get_device()
BATCH_SIZE = 128
EPOCHS = 10

In [None]:
train_transform = transforms.Compose([
    # spatial
    transforms.RandomResizedCrop(size=32, scale=(0.2, 1.0), ratio=(0.75, 1.33)),
    transforms.RandomHorizontalFlip(),
    # color
    transforms.RandomApply([transforms.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4, hue=0.1)]),
    transforms.RandomGrayscale(),

    transforms.GaussianBlur(kernel_size=3),

    transforms.ToTensor(),
    transforms.Normalize(mean=(0.4914, 0.4822, 0.4465), std=(0.2470, 0.2435, 0.2616))
])

test_transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize(mean=(0.4914, 0.4822, 0.4465), std=(0.2470, 0.2435, 0.2616))])

In [None]:
train_dataset = datasets.CIFAR10(root="data", train=True, download=True, transform=train_transform)
validation_dataset = datasets.CIFAR10(root="data", train=True, download=True, transform=test_transform) # different transformations

dataset_size = len(train_dataset)
train_ratio, validation_ratio = 0.8, 0.2
train_size = int(train_ratio * dataset_size)
validation_size = dataset_size - train_size

train_dataset, _ = random_split(train_dataset, [train_size, validation_size])
_, validation_dataset = random_split(validation_dataset, [train_size, validation_size])

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=8)
validation_loader = DataLoader(validation_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=8)

test_data = datasets.CIFAR10(root="data", train=False, download=True, transform=test_transform)
test_loader = DataLoader(test_data, batch_size=BATCH_SIZE, shuffle=True, num_workers=8)

classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

In [None]:
train_dataset.dataset

In [None]:
# it = iter(train_loader) # create an iterator over the DataLoader
# first = next(it)
# print(first[1]) # in position [1], we get the vector of the labels of the batch
# second = next(it)
# print(second[0].shape)

In [None]:
# labels_map = {0: "Airplane", 1: "Automobile", 2: "Bird", 3: "Cat", 4: "Deer",
#     5: "Dog", 6: "Frog", 7: "Horse", 8: "Ship", 9: "Truck",}
# figure = plt.figure(figsize=(8, 8))
# cols, rows = 3, 3
# for i in range(1, cols * rows + 1):
#     sample_idx = torch.randint(len(dataset), size=(1,)).item()
#     img, label = dataset[sample_idx]
#     figure.add_subplot(rows, cols, i)
#     plt.title(labels_map[label])
#     plt.axis("off")
#     plt.imshow(img.permute(1, 2, 0))
# plt.show()

In [None]:
train_loader.dataset

`training_data` is the Dataset, whereas `train_loader` is the wrapper around the Dataset and it controls how data is delivered by creating mini-batches, shuffling, workers, iterating, ... $\rightarrow$ `train_loader.dataset` is the Dataset  
The dataset is composed of pairs of (image, class), where the image is a matrix of pixels and the class $\in \{0,9\}$

In [None]:
train_loader.dataset[15][1] # class

In [None]:
classic_model = CNN().to(DEVICE)
combined_model = CNN().to(DEVICE)

In [None]:
# optimizer = optim.Adam(model.parameters(), lr=0.001)
optimizer1 = optim.SGD(classic_model.parameters(), lr=0.01, momentum = 0.9)
optimizer2 = optim.SGD(combined_model.parameters(), lr=0.01, momentum = 0.9)
criterion1 = nn.CrossEntropyLoss()
criterion2 = CombinedLoss(alpha = 0.2) # TODO: optimise alpha
total_step = len(train_loader)

Allora, l'idea e' quella di implementare la contrastive loss. Cio' significa ridurre la distanza tra vettori simili e aumentare la distanza tra vettori diversi $\rightarrow$ loss in funzione della distanza tra i due vettori: se appartenenti alla stessa classe e bassa distanza, loss bassa, e viceversa
Contrastive Loss: $$ L_{i, j} = -\log \frac{\exp(sim(z_i, z_j) / \tau)}{\sum_{k\neq i}\exp(sim(z_i, z_k) / \tau)}, \quad\quad\quad \text{where } sim(z_i, z_j) = \frac{z_i\cdot z_j}{||z_i|| \ ||z_j||} $$

SCHERZONE QUESTA Ãˆ la NT-XENT nooooooo  
Noi vogliamo la SupCon: $$ \mathcal L_i = {-1\over |P(i)|}\sum_{p\in P(i)}\log \frac{\exp(sim(z_i, z_p) / \tau)}{\sum_{a\neq i}\exp(sim(z_i, z_a) / \tau)}, $$ where $P(i)$ is the set of positives for anchor $i$.
$\rightarrow$ final loss: $$ L = {1\over B}\sum_i L_i $$

In [None]:
classic_model.train()
for epoch in range(EPOCHS):
    for i, (images, labels) in enumerate(train_loader): # iterating over all the batches
        # Move tensors to the configured device
        images = images.to(DEVICE)
        labels = labels.to(DEVICE)
        optimizer1.zero_grad()
        
        # Forward pass
        _, outputs = classic_model(images)
        loss = criterion1(outputs, labels)
        
        # Backward and optimize
        loss.backward()
        optimizer1.step()

    print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' 
                   .format(epoch+1, EPOCHS, i+1, total_step, loss.item()))

In [None]:
combined_model.train()
for epoch in range(EPOCHS):
    for i, (images, labels) in enumerate(train_loader): # iterating over all the batches
        # Move tensors to the configured device
        images = images.to(DEVICE)
        labels = labels.to(DEVICE)
        optimizer2.zero_grad()
        
        # Forward pass
        embeddings, outputs = combined_model(images)
        loss = criterion2(embeddings, outputs, labels)
        
        # Backward and optimize
        loss.backward()
        optimizer2.step()

    print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' 
                   .format(epoch+1, EPOCHS, i+1, total_step, loss.item()))

In [None]:
classic_model.eval()
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in test_loader:
        images = images.to(DEVICE)
        labels = labels.to(DEVICE)
        _, outputs = classic_model(images)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        del images, labels, outputs

    print('Accuracy of the network on the {} test images: {} %'.format(10000, 100 * correct / total))

In [None]:
combined_model.eval()
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in test_loader:
        images = images.to(DEVICE)
        labels = labels.to(DEVICE)
        embeddings, outputs = combined_model(images)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        del images, labels, outputs

    print('Accuracy of the network on the {} test images: {} %'.format(10000, 100 * correct / total))