In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, Dataset
from torchvision.datasets import CIFAR100
import numpy as np
from resnet18 import ResNet18_NoFC, ProjectionHead, BasicBlock, count_parameters

# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)


cuda


In [38]:
class SimCLRTransform:
    def __init__(self, size):
        self.transform = transforms.Compose([
            transforms.RandomResizedCrop(size=size),
            transforms.RandomHorizontalFlip(),
            transforms.RandomApply([
                transforms.ColorJitter(0.8, 0.8, 0.8, 0.2)
            ], p=0.8),
            transforms.RandomGrayscale(p=0.2),
            transforms.ToTensor(),
            transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
        ])

    def __call__(self, x):
        return self.transform(x), self.transform(x)


class CIFAR100SimCLR(Dataset):
    def __init__(self, root='./data', train=True, transform=None):
        self.dataset = CIFAR100(root=root, train=train, download=True, transform=transform)

    def __len__(self):
        return len(self.dataset)

    def __getitem__(self, idx):
        img, _ = self.dataset[idx]
        return img

simclr_transform = SimCLRTransform(5) # cifar 
train_dataset = CIFAR100SimCLR(train=True, transform=simclr_transform)
train_loader = DataLoader(train_dataset, batch_size=4000, shuffle=True, num_workers=0)


    
def nt_xent_loss(z_i, z_j, temperature):
    """
    Calculates the NT-Xent loss.
    z_i, z_j are the representations of two augmentations of the same image, 
    and should be normalized.
    """
    batch_size = z_i.size(0)

    z = torch.cat((z_i, z_j), dim=0)
    sim_matrix = torch.exp(torch.mm(z, z.T) / temperature)

    mask = torch.eye(batch_size, dtype=torch.bool).to(z.device)
    mask = mask.repeat(2, 2)
    sim_matrix = sim_matrix.masked_select(~mask).view(2 * batch_size, -1)

    positives = torch.exp(torch.sum(z_i * z_j, dim=-1) / temperature).repeat(2)
    negatives = sim_matrix.sum(dim=-1)

    loss = -torch.log(positives / negatives).mean()
    return loss

num_filters = [32, 32, 64, 128, 256]  # Example filter numbers for each layer
model = ResNet18_NoFC(BasicBlock, [2, 2, 2, 2], num_filters).to(device)
projection_head = ProjectionHead(input_dim=num_filters[-1], hidden_dim=512, output_dim=128).to(device)

print(count_parameters(model))

def train(train_loader, model, projection_head, optimizer, temperature=0.15, epochs=100):
    for epoch in range(epochs):
        for (images1, images2) in train_loader:
            # Concatenate the images from the two augmentations
            images = torch.cat([images1, images2], dim=0)
            images = images.to(device)

            optimizer.zero_grad()

            features = model(images)
            projections = projection_head(features)
            projections = F.normalize(projections, dim=1)

            loss = nt_xent_loss(projections[:len(images)//2], projections[len(images)//2:], temperature)
            
            loss.backward()
            optimizer.step()

            # Print loss (or log it)
        print(f'Epoch [{epoch+1}/{epochs}], Loss: {loss.item():.4f}')

optimizer = torch.optim.AdamW(list(model.parameters()) + list(projection_head.parameters()), lr=1e-2, weight_decay=1e-4)
train(train_loader, model, projection_head, optimizer)


Files already downloaded and verified
2798880
Epoch [1/100], Loss: 8.2444
Epoch [2/100], Loss: 8.2046
Epoch [3/100], Loss: 8.1249
Epoch [4/100], Loss: 8.0682
Epoch [5/100], Loss: 8.0829
Epoch [6/100], Loss: 8.0600
Epoch [7/100], Loss: 7.9920
Epoch [8/100], Loss: 7.9826
Epoch [9/100], Loss: 7.9687
Epoch [10/100], Loss: 7.8877
Epoch [11/100], Loss: 7.8565
Epoch [12/100], Loss: 7.8409
Epoch [13/100], Loss: 7.8273
Epoch [14/100], Loss: 7.8308
Epoch [15/100], Loss: 7.7679
Epoch [16/100], Loss: 7.7329
Epoch [17/100], Loss: 7.7735
Epoch [18/100], Loss: 7.7261
Epoch [19/100], Loss: 7.7096
Epoch [20/100], Loss: 7.7342
Epoch [21/100], Loss: 7.6978
Epoch [22/100], Loss: 7.6557
Epoch [23/100], Loss: 7.6638
Epoch [24/100], Loss: 7.6612
Epoch [25/100], Loss: 7.5904
Epoch [26/100], Loss: 7.6269
Epoch [27/100], Loss: 7.5931
Epoch [28/100], Loss: 7.5260
Epoch [29/100], Loss: 7.5452
Epoch [30/100], Loss: 7.5299
Epoch [31/100], Loss: 7.5005
Epoch [32/100], Loss: 7.4932
Epoch [33/100], Loss: 7.4039
Epoch 

In [39]:
torch.save(model.cpu(),"resnet18_simclr_cifar100_bs4000.pt")

In [5]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, Dataset
from torchvision.datasets import CIFAR100
import numpy as np
from resnet18 import ResNet18_NoFC, ProjectionHead, BasicBlock, count_parameters
import os, random, itertools


def set_seed(seed: int = 42) -> None:
    np.random.seed(seed)
    random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    # When running on the CuDNN backend, two further options must be set
    torch.backends.cudnn.benchmark = False
    # Set a fixed value for the hash seed
    os.environ["PYTHONHASHSEED"] = str(seed)
    print(f"Random seed set as {seed}")


class SimCLRTransform:
    def __init__(self, size):
        self.transform = transforms.Compose([
            transforms.RandomResizedCrop(size=size),
            transforms.RandomHorizontalFlip(),
            transforms.RandomApply([
                transforms.ColorJitter(0.8, 0.8, 0.8, 0.2)
            ], p=0.8),
            transforms.RandomGrayscale(p=0.2),
            transforms.ToTensor(),
            transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
        ])

    def __call__(self, x):
        return self.transform(x), self.transform(x)


class CIFAR100SimCLR(Dataset):
    def __init__(self, root='./data', train=True, transform=None):
        self.dataset = CIFAR100(root=root, train=train, download=True, transform=transform)

    def __len__(self):
        return len(self.dataset)

    def __getitem__(self, idx):
        img, _ = self.dataset[idx]
        return img
    
def nt_xent_loss(z_i, z_j, temperature):
    """
    Calculates the NT-Xent loss.
    z_i, z_j are the representations of two augmentations of the same image, 
    and should be normalized.
    """
    batch_size = z_i.size(0)

    z = torch.cat((z_i, z_j), dim=0)
    sim_matrix = torch.exp(torch.mm(z, z.T) / temperature)

    mask = torch.eye(batch_size, dtype=torch.bool).to(z.device)
    mask = mask.repeat(2, 2)
    sim_matrix = sim_matrix.masked_select(~mask).view(2 * batch_size, -1)

    positives = torch.exp(torch.sum(z_i * z_j, dim=-1) / temperature).repeat(2)
    negatives = sim_matrix.sum(dim=-1)

    loss = -torch.log(positives / negatives).mean()
    return loss

# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

filter_list = np.array([64,64,128,256,512])

# divisor = list(np.geomspace(1, 32, 40))

# seed_list = [0,1,2]

# para_comb = list(itertools.product(seed_list, divisor))

(seed, d) = (0, 1)

num_filters = (filter_list / d).astype(int)  # [32, 32, 64, 128, 256]  filter numbers for each layer
set_seed(seed)

simclr_transform = SimCLRTransform(5) # cifar 
train_dataset = CIFAR100SimCLR(train=True, transform=simclr_transform)
val_dataset =  CIFAR100SimCLR(train=False, transform=simclr_transform)
train_loader = DataLoader(train_dataset, batch_size=512, shuffle=True, num_workers=0)
val_loader =  DataLoader(val_dataset, batch_size=5000, shuffle=False, num_workers=0)

num_epochs = 100
model = ResNet18_NoFC(BasicBlock, [2, 2, 2, 2], num_filters).to(device)
projection_head = ProjectionHead(input_dim=num_filters[-1], hidden_dim=512, output_dim=128).to(device)

print("num_parameters:",count_parameters(model))

def train(train_loader, model, projection_head, optimizer, scheduler, temperature=0.15, epochs=num_epochs):
    for epoch in range(epochs):
        for (images1, images2) in train_loader:
            model.train()
            # Concatenate the images from the two augmentations
            images = torch.cat([images1, images2], dim=0)
            images = images.to(device)

            optimizer.zero_grad()

            features = model(images)
            projections = projection_head(features)
            projections = F.normalize(projections, dim=1)

            loss = nt_xent_loss(projections[:len(images)//2], projections[len(images)//2:], temperature)
            
            loss.backward()
            optimizer.step()
            
        # Validation
        val_loss_list = []
        for (images1, images2) in val_loader:
            model.eval()
            # Concatenate the images from the two augmentations
            images = torch.cat([images1, images2], dim=0)
            images = images.to(device)

            features = model(images)
            projections = projection_head(features)
            projections = F.normalize(projections, dim=1)

            val_loss = nt_xent_loss(projections[:len(images)//2], projections[len(images)//2:], temperature)
            val_loss_list.append(val_loss.item())
        scheduler.step(np.mean(val_loss_list))
            
            # Print loss (or log it)
        
        print(f'Epoch [{epoch+1}/{epochs}], Train Loss: {loss.item():.4f}, Val Loss: {val_loss.item():.4f}')
        
    return model

optimizer = torch.optim.AdamW(list(model.parameters()) + list(projection_head.parameters()), lr=1e-2, weight_decay=1e-4)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer)
model = train(train_loader, model, projection_head, optimizer, scheduler)

torch.save(model,f"resnet18_simclr_cifar100_parameters{count_parameters(model)}_seed{seed}_bs512.pt")

# torch.save(model,f"~/scratch/models/resnet18_simclr_cifar100_parameters{count_parameters(model)}_seed{seed}.pt")

cuda
Random seed set as 0
Files already downloaded and verified
Files already downloaded and verified
num_parameters: 11176512
Epoch [1/100], Train Loss: 6.2897, Val Loss: 8.9863
Epoch [2/100], Train Loss: 6.0888, Val Loss: 8.8555
Epoch [3/100], Train Loss: 6.0500, Val Loss: 8.8296
Epoch [4/100], Train Loss: 6.0948, Val Loss: 8.8159
Epoch [5/100], Train Loss: 5.8345, Val Loss: 8.6807
Epoch [6/100], Train Loss: 6.0102, Val Loss: 8.6974
Epoch [7/100], Train Loss: 5.7252, Val Loss: 8.5794
Epoch [8/100], Train Loss: 5.6922, Val Loss: 8.5957
Epoch [9/100], Train Loss: 5.7884, Val Loss: 8.5025
Epoch [10/100], Train Loss: 5.6733, Val Loss: 8.4335
Epoch [11/100], Train Loss: 5.5639, Val Loss: 8.4713
Epoch [12/100], Train Loss: 5.6091, Val Loss: 8.5019
Epoch [13/100], Train Loss: 5.6258, Val Loss: 8.5079
Epoch [14/100], Train Loss: 5.5456, Val Loss: 8.3049
Epoch [15/100], Train Loss: 5.5884, Val Loss: 8.3944
Epoch [16/100], Train Loss: 5.4775, Val Loss: 8.3046
Epoch [17/100], Train Loss: 5.5451

In [3]:
val_loss


NameError: name 'val_loss' is not defined