# SimCLR in CIFAR10 with Resnet18 ,EPOCHS=10

## Imports, basic utils, augmentations and Contrastive loss

In [51]:

import torch
import torchvision.models as models
import numpy as np
import os
import torch
import torchvision.transforms as T
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision.datasets import CIFAR10
from torch.utils.data import DataLoader
from torch.multiprocessing import cpu_count
import torchvision.transforms as T

def default(val, def_val):
    return def_val if val is None else val

def reproducibility(config):
    SEED = int(config.seed)
    torch.manual_seed(SEED)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    np.random.seed(SEED)
    if (config.cuda):
        torch.cuda.manual_seed(SEED)


def device_as(t1, t2):
    
    return t1.to(t2.device)


def weights_update(model, checkpoint_path):
    checkpoint = torch.load(checkpoint_path)
    model_dict = model.state_dict()
    pretrained_dict = {k: v for k, v in checkpoint['state_dict'].items() if k in model_dict}
    model_dict.update(pretrained_dict)
    model.load_state_dict(model_dict)
    print(f'Checkpoint {checkpoint_path} was loaded')
    return model


class Augment:
    def __init__(self, img_size, s=1):
        color_jitter = T.ColorJitter(
            0.8 * s, 0.8 * s, 0.8 * s, 0.2 * s
        )
        
        blur = T.GaussianBlur((3, 3), (0.1, 2.0))

        self.train_transform = T.Compose(
            [
            T.RandomResizedCrop(size=img_size),
            T.RandomHorizontalFlip(p=0.5),  
            T.RandomApply([color_jitter], p=0.8),
            T.RandomApply([blur], p=0.5),
            T.RandomGrayscale(p=0.2),
            T.ToTensor(),
            T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
            ]
        )

        self.test_transform = T.Compose(
            [
                T.ToTensor(),
                T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
            ]
        )

    def __call__(self, x):
        return self.train_transform(x), self.train_transform(x)



def get_cifar10_dataloader(batch_size,transform=None,split="unlabeled", train=True):
    cifar10_dataset = CIFAR10(root="./data", train=train, transform=transform, download=True)
    return DataLoader(dataset=cifar10_dataset, batch_size=batch_size, shuffle=True, num_workers=cpu_count())

import matplotlib.pyplot as plt

def imshow(img):
    mean = torch.tensor([0.485, 0.456, 0.406], dtype=torch.float32)
    std = torch.tensor([0.229, 0.224, 0.225], dtype=torch.float32)
    unnormalize = T.Normalize((-mean / std).tolist(), (1.0 / std).tolist())
    npimg = unnormalize(img).numpy()
    plt.imshow(np.transpose(npimg, (1, 2, 0)))
    plt.show()



class ContrastiveLoss(nn.Module):
    """
    Vanilla Contrastive loss
    """
    def __init__(self, batch_size, temperature=0.5):
        super().__init__()
        self.batch_size = batch_size
        self.temperature = temperature
        self.mask = (~torch.eye(batch_size * 2, batch_size * 2, dtype=bool)).float()

    def calc_similarity_batch(self, a, b):
        representations = torch.cat([a, b], dim=0)
        return F.cosine_similarity(representations.unsqueeze(1), representations.unsqueeze(0), dim=2)

    def forward(self, proj_1, proj_2):
        """
        """
        batch_size = proj_1.shape[0]
        z_i = F.normalize(proj_1, p=2, dim=1)
        z_j = F.normalize(proj_2, p=2, dim=1)

        similarity_matrix = self.calc_similarity_batch(z_i, z_j)

        sim_ij = torch.diag(similarity_matrix, batch_size)
        sim_ji = torch.diag(similarity_matrix, -batch_size)

        positives = torch.cat([sim_ij, sim_ji], dim=0)

        nominator = torch.exp(positives / self.temperature)

        denominator = device_as(self.mask, similarity_matrix) * torch.exp(similarity_matrix / self.temperature)

        all_losses = -torch.log(nominator / torch.sum(denominator, dim=1))
        loss = torch.sum(all_losses) / (2 * self.batch_size)
        return loss



## Add projection Head for embedding and training logic with pytorch lightning model

In [52]:
import pytorch_lightning as pl
import torch
import torch.nn.functional as F
from pl_bolts.optimizers.lr_scheduler import LinearWarmupCosineAnnealingLR
from torch.optim import SGD, Adam


class AddProjection(nn.Module):
    def __init__(self, config, model=None, mlp_dim=512):
        super(AddProjection, self).__init__()
        embedding_size = config.embedding_size
        self.backbone = default(model, models.resnet18(pretrained=False, num_classes=config.embedding_size))
        mlp_dim = default(mlp_dim, self.backbone.fc.in_features)
        print('Dim MLP input:',mlp_dim)
        self.backbone.fc = nn.Identity()

        # add mlp projection head
        self.projection = nn.Sequential(
            nn.Linear(in_features=mlp_dim, out_features=mlp_dim),
            nn.BatchNorm1d(mlp_dim),
            nn.ReLU(),
            nn.Linear(in_features=mlp_dim, out_features=embedding_size),
            nn.BatchNorm1d(embedding_size),
        )

    def forward(self, x, return_embedding=False):
        embedding = self.backbone(x)
        if return_embedding:
            return embedding
        return self.projection(embedding)



def define_param_groups(model, weight_decay, optimizer_name):
    def exclude_from_wd_and_adaptation(name):
        if 'bn' in name:
            return True
        if optimizer_name == 'lars' and 'bias' in name:
            return True

    param_groups = [
        {
            'params': [p for name, p in model.named_parameters() if not exclude_from_wd_and_adaptation(name)],
            'weight_decay': weight_decay,
            'layer_adaptation': True,
        },
        {
            'params': [p for name, p in model.named_parameters() if exclude_from_wd_and_adaptation(name)],
            'weight_decay': 0.,
            'layer_adaptation': False,
        },
    ]
    return param_groups


class SimCLR_pl(pl.LightningModule):
    def __init__(self, config, model=None, feat_dim=512):
        super().__init__()
        self.config = config
        
        self.model = AddProjection(config, model=model, mlp_dim=feat_dim)

        self.loss = ContrastiveLoss(config.batch_size, temperature=self.config.temperature)

    def forward(self, X):
        return self.model(X)

    def training_step(self, batch, batch_idx):
        (x1, x2), labels = batch
        z1 = self.model(x1)
        z2 = self.model(x2)
        loss = self.loss(z1, z2)
        self.log('Contrastive loss', loss, on_step=True, on_epoch=True, prog_bar=True, logger=True)
        return loss

    def configure_optimizers(self):
        max_epochs = int(self.config.epochs)
        param_groups = define_param_groups(self.model, self.config.weight_decay, 'adam')
        lr = self.config.lr
        optimizer = Adam(param_groups, lr=lr, weight_decay=self.config.weight_decay)

        print(f'Optimizer Adam, '
              f'Learning Rate {lr}, '
              f'Effective batch size {self.config.batch_size * self.config.gradient_accumulation_steps}')

        scheduler_warmup = LinearWarmupCosineAnnealingLR(optimizer, warmup_epochs=10, max_epochs=max_epochs,
                                                         warmup_start_lr=0.0)

        return [optimizer], [scheduler_warmup]

## Hyperparameters, and configuration stuff

In [53]:

class Hparams:
    def __init__(self):
        self.epochs = 15 
        self.seed = 77777 
        self.cuda = True 
        self.img_size = 32
        self.save = "./saved_models/" 
        self.load = False 
        self.gradient_accumulation_steps = 5 
        self.batch_size = 200
        self.lr = 3e-4 
        self.weight_decay = 1e-6
        self.embedding_size= 128 
        self.temperature = 0.5 
        self.checkpoint_path = './SimCLR_ResNet18.ckpt' 

## Pretraining main logic

In [54]:
import torch
from pytorch_lightning import Trainer
from pytorch_lightning.callbacks import GradientAccumulationScheduler, ModelCheckpoint
from torchvision.models import resnet18
import os



available_gpus = torch.cuda.device_count()
save_model_path = os.path.join(os.getcwd(), "saved_models/")
print('available_gpus:', available_gpus)
filename = 'SimCLR_ResNet18_adam_'
resume_from_checkpoint = False
train_config = Hparams()


save_name = filename + str(train_config.epochs) + '.ckpt'

model = SimCLR_pl(train_config, model=resnet18(pretrained=False), feat_dim=512)

transform = Augment(train_config.img_size)
data_loader = get_cifar10_dataloader(train_config.batch_size, transform)

accumulator = GradientAccumulationScheduler(scheduling={0: train_config.gradient_accumulation_steps})
checkpoint_callback = ModelCheckpoint(
    filename=filename,
    dirpath=save_model_path,
    save_last=True,
    save_top_k=2,
    monitor='Contrastive loss_epoch',
    mode='min'
)

if resume_from_checkpoint:
    trainer = Trainer(
        callbacks=[accumulator, checkpoint_callback],
        gpus=available_gpus,
        max_epochs=train_config.epochs,
        resume_from_checkpoint=train_config.checkpoint_path
    )
else:
    trainer = Trainer(
        callbacks=[accumulator, checkpoint_callback],
        gpus=available_gpus,
        max_epochs=train_config.epochs
    )

trainer.fit(model, data_loader)
trainer.save_checkpoint(save_name)

import shutil




available_gpus: 1
Dim MLP input: 512
Files already downloaded and verified


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 4080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
  scheduler_warmup = LinearWarmupCosineAnnealingLR(optimizer, warmup_epochs=10, max_epochs=max_epochs,

  | Name  | Type            | Params
------------------------------------------
0 | model | AddProjection   | 11.5 M
1 | loss  | ContrastiveLoss | 0     
------------------------------------------
11.5 M    Trainable params
0         Non-trainable params
11.5 M    Total params
46.024    Total estimated model params size (MB)


Optimizer Adam, Learning Rate 0.0003, Effective batch size 1000
Epoch 0:   0%|          | 0/250 [00:00<?, ?it/s] 

Epoch 14: 100%|██████████| 250/250 [00:06<00:00, 41.55it/s, loss=5.13, v_num=212, Contrastive loss_step=5.070, Contrastive loss_epoch=5.140]

`Trainer.fit` stopped: `max_epochs=15` reached.


Epoch 14: 100%|██████████| 250/250 [00:06<00:00, 39.01it/s, loss=5.13, v_num=212, Contrastive loss_step=5.070, Contrastive loss_epoch=5.140]


## Save only backbone weights from Resnet18 that are only necessary for fine tuning

In [55]:
model_pl = SimCLR_pl(train_config, model=resnet18(pretrained=False))
model_pl = weights_update(model_pl, "SimCLR_ResNet18_adam_.ckpt")

resnet18_backbone_weights = model_pl.model.backbone
print(resnet18_backbone_weights)
torch.save({
            'model_state_dict': resnet18_backbone_weights.state_dict(),
            }, 'resnet18_backbone_weights.ckpt')

Dim MLP input: 512
Checkpoint SimCLR_ResNet18_adam_.ckpt was loaded
ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine

# Fine-tuning from SSL simclr checkpoint

In [56]:
import pytorch_lightning as pl
import torch
from torch.optim import SGD


class SimCLR_eval(pl.LightningModule):
    def __init__(self, lr = None, model=None, linear_eval=False):
        super().__init__()
        self.lr = lr
        self.linear_eval = linear_eval
        if self.linear_eval:
          model.eval()
        self.mlp = torch.nn.Sequential(
            torch.nn.Linear(512,10),
        )

        self.model = torch.nn.Sequential(
            model, self.mlp
        )
        self.loss = torch.nn.CrossEntropyLoss()

    def forward(self, X):
        return self.model(X)

    def training_step(self, batch, batch_idx):
        x, y = batch
        z = self.forward(x)
        loss = self.loss(z, y)
        self.log('Cross Entropy loss', loss, on_step=True, on_epoch=True, prog_bar=True, logger=True)

        predicted = z.argmax(1)
        acc = (predicted == y).sum().item() / y.size(0)
        self.log('Train Acc', acc, on_step=False, on_epoch=True, prog_bar=True, logger=True)

        return loss
    
    def validation_step(self, batch, batch_idx):
        x, y = batch
        z = self.forward(x)
        loss = self.loss(z, y)
        self.log('Val CE loss', loss, on_step=True, on_epoch=True, prog_bar=False, logger=True)

        predicted = z.argmax(1)
        acc = (predicted == y).sum().item() / y.size(0)
        self.log('Val Accuracy', acc, on_step=True, on_epoch=True, prog_bar=True, logger=True)

        return loss

    def configure_optimizers(self):
        if self.linear_eval:
          print(f"\n\n Attention! Linear evaluation \n")
          optimizer = SGD(self.mlp.parameters(), lr=self.lr, momentum=0.9)
        else:
          optimizer = SGD(self.model.parameters(), lr=self.lr, momentum=0.9)
        return [optimizer]


class Hparams:
    def __init__(self):
        self.epochs = 5 
        self.seed = 77777  
        self.cuda = True  
        self.img_size = 32  
        self.save = "./saved_models/"  
        self.gradient_accumulation_steps = 1 
        self.batch_size = 128
        self.lr = 1e-3
        self.embedding_size = 128  
        self.temperature = 0.5  

import torch
from pytorch_lightning import Trainer
from pytorch_lightning.callbacks import GradientAccumulationScheduler
import os
from pytorch_lightning.callbacks import ModelCheckpoint
from torchvision.models import resnet18


# general stuff
available_gpus = len([torch.cuda.device(i) for i in range(torch.cuda.device_count())])
train_config = Hparams()
save_model_path = os.path.join(os.getcwd(), "saved_models/")
print('available_gpus:', available_gpus)
filename = 'SimCLR_ResNet18_finetune_'
reproducibility(train_config)
save_name = filename + '_Final.ckpt'

# load resnet backbone
backbone = models.resnet18(pretrained=False)
backbone.fc = nn.Identity()
checkpoint = torch.load('resnet18_backbone_weights.ckpt')
backbone.load_state_dict(checkpoint['model_state_dict'])
model = SimCLR_eval(train_config.lr, model=backbone, linear_eval=False)

# preprocessing and data loaders
transform_preprocess = Augment(train_config.img_size).test_transform
data_loader = get_cifar10_dataloader(train_config.batch_size, transform=transform_preprocess,split='train')
print("Train_dataset",len(data_loader))
data_loader_test = get_cifar10_dataloader(train_config.batch_size,train=False, transform=transform_preprocess,split='test')
print("test",len(data_loader_test))

# callbacks and trainer
accumulator = GradientAccumulationScheduler(scheduling={0: train_config.gradient_accumulation_steps})

checkpoint_callback = ModelCheckpoint(filename=filename, dirpath=save_model_path,save_last=True,save_top_k=2,
                                       monitor='Val Accuracy_epoch', mode='max')

trainer = Trainer(callbacks=[checkpoint_callback,accumulator],
                  gpus=available_gpus,
                  max_epochs=4)
#train_config.epochs
trainer.fit(model, data_loader,data_loader_test)
trainer.save_checkpoint(save_name)


available_gpus: 1
Files already downloaded and verified
Train_dataset 391
Files already downloaded and verified


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 4080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type             | Params
-------------------------------------------
0 | mlp   | Sequential       | 5.1 K 
1 | model | Sequential       | 11.2 M
2 | loss  | CrossEntropyLoss | 0     
-------------------------------------------
11.2 M    Trainable params
0         Non-trainable params
11.2 M    Total params
44.727    Total estimated model params size (MB)


test 79
Epoch 3: 100%|██████████| 470/470 [00:03<00:00, 127.10it/s, loss=0.985, v_num=213, Cross Entropy loss_step=1.220, Val Accuracy_step=0.688, Val Accuracy_epoch=0.602, Cross Entropy loss_epoch=0.955, Train Acc=0.659]

`Trainer.fit` stopped: `max_epochs=4` reached.


Epoch 3: 100%|██████████| 470/470 [00:03<00:00, 119.03it/s, loss=0.985, v_num=213, Cross Entropy loss_step=1.220, Val Accuracy_step=0.688, Val Accuracy_epoch=0.602, Cross Entropy loss_epoch=0.955, Train Acc=0.659]


# Finetune from Imageget pretraining

In [57]:
# load model
resnet = models.resnet18(pretrained=False)
resnet.fc = nn.Identity()
print('imagenet weights, no pretraining')
model = SimCLR_eval(train_config.lr, model=resnet, linear_eval=False)

# preprocessing and data loaders
transform_preprocess = Augment(train_config.img_size).test_transform
data_loader = get_cifar10_dataloader(128, transform=transform_preprocess,split='train')
data_loader_test = get_cifar10_dataloader(128, transform=transform_preprocess,train=False,split='test')

checkpoint_callback = ModelCheckpoint(filename=filename, dirpath=save_model_path)

trainer = Trainer(callbacks=[checkpoint_callback],
                  gpus=available_gpus,
                  max_epochs=3)

trainer.fit(model, data_loader, data_loader_test)
trainer.save_checkpoint(save_name)

imagenet weights, no pretraining
Files already downloaded and verified


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 4080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type             | Params
-------------------------------------------
0 | mlp   | Sequential       | 5.1 K 
1 | model | Sequential       | 11.2 M
2 | loss  | CrossEntropyLoss | 0     
-------------------------------------------
11.2 M    Trainable params
0         Non-trainable params
11.2 M    Total params
44.727    Total estimated model params size (MB)


Files already downloaded and verified
Epoch 2: 100%|██████████| 470/470 [00:03<00:00, 126.12it/s, loss=1.05, v_num=214, Cross Entropy loss_step=0.953, Val Accuracy_step=0.312, Val Accuracy_epoch=0.572, Cross Entropy loss_epoch=1.110, Train Acc=0.602]

`Trainer.fit` stopped: `max_epochs=3` reached.


Epoch 2: 100%|██████████| 470/470 [00:03<00:00, 122.14it/s, loss=1.05, v_num=214, Cross Entropy loss_step=0.953, Val Accuracy_step=0.312, Val Accuracy_epoch=0.572, Cross Entropy loss_epoch=1.110, Train Acc=0.602]


In [58]:
import pytorch_lightning as pl
import torch
from torch.optim import SGD


class SimCLR_eval(pl.LightningModule):
    def __init__(self, lr=None, model=None, linear_eval=False):
        super().__init__()
        self.lr = lr
        self.linear_eval = linear_eval
        if self.linear_eval:
            model.eval()
        self.mlp = torch.nn.Sequential(
            torch.nn.Linear(512,10),
        )

        self.model = torch.nn.Sequential(
            model, self.mlp
        )
        self.loss = torch.nn.CrossEntropyLoss()

    def forward(self, x):
        x = self.model(x)
        return x

    


    def training_step(self, batch, batch_idx):
        x, y = batch
        z = self.forward(x)
        loss = self.loss(z, y)
        self.log('Cross Entropy loss', loss, on_step=True, on_epoch=True, prog_bar=True, logger=True)

        predicted = z.argmax(1)
        acc = (predicted == y).sum().item() / y.size(0)
        self.log('Train Acc', acc, on_step=False, on_epoch=True, prog_bar=True, logger=True)

        return loss
    
    def validation_step(self, batch, batch_idx):
        x, y = batch
        z = self.forward(x)
        loss = self.loss(z, y)
        self.log('Val CE loss', loss, on_step=True, on_epoch=True, prog_bar=False, logger=True)

        predicted = z.argmax(1)
        acc = (predicted == y).sum().item() / y.size(0)
        self.log('Val Accuracy', acc, on_step=True, on_epoch=True, prog_bar=True, logger=True)

        return loss

    def configure_optimizers(self):
        if self.linear_eval:
          print(f"\n\n Attention! Linear evaluation \n")
          optimizer = SGD(self.mlp.parameters(), lr=self.lr, momentum=0.9)
        else:
          optimizer = SGD(self.model.parameters(), lr=self.lr, momentum=0.9)
        return [optimizer]

class Hparams:
    def __init__(self):
        self.epochs = 5
        self.seed = 77777 
        self.cuda = True 
        self.img_size = 32 
        self.save = "./saved_models/" 
        self.gradient_accumulation_steps = 1 
        self.batch_size = 128
        self.lr = 1e-3 
        self.embedding_size= 128 
        self.temperature = 0.5 


import torch
from pytorch_lightning import Trainer
from pytorch_lightning.callbacks import GradientAccumulationScheduler
import os
from pytorch_lightning.callbacks import ModelCheckpoint
from torchvision.models import resnet18


# general stuff
available_gpus = len([torch.cuda.device(i) for i in range(torch.cuda.device_count())])
train_config = Hparams()
save_model_path = os.path.join(os.getcwd(), "saved_models/")
print('available_gpus:', available_gpus)
filename = 'SimCLR_ResNet18_finetune_'
reproducibility(train_config)
save_name = filename+  str(train_config.epochs)+ '_Final.ckpt'

# load resnet backbone
backbone = models.resnet18(pretrained=False)
backbone.fc = nn.Identity()
checkpoint = torch.load('resnet18_backbone_weights.ckpt')
backbone.load_state_dict(checkpoint['model_state_dict'])
model = SimCLR_eval(train_config.lr, model=backbone, linear_eval=False)

# preprocessing and data loaders
transform_preprocess = Augment(train_config.img_size).test_transform
data_loader = get_cifar10_dataloader(train_config.batch_size, transform=transform_preprocess,split='train')
data_loader_test = get_cifar10_dataloader(train_config.batch_size, train=False,transform=transform_preprocess,split='test')


# callbacks and trainer
accumulator = GradientAccumulationScheduler(scheduling={0: train_config.gradient_accumulation_steps})

checkpoint_callback = ModelCheckpoint(filename=filename, dirpath=save_model_path,save_last=True,save_top_k=2,
                                       monitor='Val Accuracy_epoch', mode='max')

trainer = Trainer(callbacks=[checkpoint_callback,accumulator],
                  gpus=available_gpus,
                  max_epochs=train_config.epochs)

trainer.fit(model, data_loader,data_loader_test)
trainer.save_checkpoint(save_name)

     

available_gpus: 1
Files already downloaded and verified
Files already downloaded and verified


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 4080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type             | Params
-------------------------------------------
0 | mlp   | Sequential       | 5.1 K 
1 | model | Sequential       | 11.2 M
2 | loss  | CrossEntropyLoss | 0     
-------------------------------------------
11.2 M    Trainable params
0         Non-trainable params
11.2 M    Total params
44.727    Total estimated model params size (MB)


Epoch 6: 100%|██████████| 470/470 [00:03<00:00, 129.16it/s, loss=0.644, v_num=215, Cross Entropy loss_step=0.459, Val Accuracy_step=0.812, Val Accuracy_epoch=0.630, Cross Entropy loss_epoch=0.569, Train Acc=0.802]

`Trainer.fit` stopped: `max_epochs=7` reached.


Epoch 6: 100%|██████████| 470/470 [00:03<00:00, 120.47it/s, loss=0.644, v_num=215, Cross Entropy loss_step=0.459, Val Accuracy_step=0.812, Val Accuracy_epoch=0.630, Cross Entropy loss_epoch=0.569, Train Acc=0.802]


fine tune


In [59]:
# load model
resnet = models.resnet18(pretrained=False)
resnet.fc = nn.Identity()
print('imagenet weights, no pretraining')
model = SimCLR_eval(train_config.lr, model=resnet, linear_eval=False)

# preprocessing and data loaders
transform_preprocess = Augment(train_config.img_size).test_transform
data_loader = get_cifar10_dataloader(128, transform=transform_preprocess,split='train')
data_loader_test = get_cifar10_dataloader(128, transform=transform_preprocess,train=False,split='test')

checkpoint_callback = ModelCheckpoint(filename=filename, dirpath=save_model_path)

trainer = Trainer(callbacks=[checkpoint_callback],
                  gpus=available_gpus,
                  max_epochs=train_config.epochs)

trainer.fit(model, data_loader, data_loader_test)
trainer.save_checkpoint(save_name)
     

imagenet weights, no pretraining
Files already downloaded and verified


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 4080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type             | Params
-------------------------------------------
0 | mlp   | Sequential       | 5.1 K 
1 | model | Sequential       | 11.2 M
2 | loss  | CrossEntropyLoss | 0     
-------------------------------------------
11.2 M    Trainable params
0         Non-trainable params
11.2 M    Total params
44.727    Total estimated model params size (MB)


Files already downloaded and verified
Epoch 6: 100%|██████████| 470/470 [00:03<00:00, 125.61it/s, loss=0.564, v_num=216, Cross Entropy loss_step=0.585, Val Accuracy_step=0.625, Val Accuracy_epoch=0.593, Cross Entropy loss_epoch=0.518, Train Acc=0.823]

`Trainer.fit` stopped: `max_epochs=7` reached.


Epoch 6: 100%|██████████| 470/470 [00:03<00:00, 120.85it/s, loss=0.564, v_num=216, Cross Entropy loss_step=0.585, Val Accuracy_step=0.625, Val Accuracy_epoch=0.593, Cross Entropy loss_epoch=0.518, Train Acc=0.823]


In [60]:
# After training is complete
final_val_accuracy = trainer.callback_metrics['Val Accuracy_epoch']
print("Final validation accuracy:", final_val_accuracy)

Final validation accuracy: tensor(0.5934)


In [61]:
def evaluate_accuracy(model, dataloader):
    correct = 0
    total = 0
    with torch.no_grad():
        for data in dataloader:
            inputs, labels = data
            outputs = model(inputs)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    accuracy = correct / total
    return accuracy


In [62]:
print("train",len(data_loader))
print("test",len(data_loader_test))

train 391
test 79


In [63]:
# After training is complete
# Evaluation on training dataset
model.eval()  # Set the model to evaluation mode
train_accuracy = evaluate_accuracy(model, data_loader)  # Function to evaluate accuracy on training dataset
print("Final training accuracy:", train_accuracy)

# Evaluation on test dataset
test_accuracy = evaluate_accuracy(model, data_loader_test
                                  )  # Function to evaluate accuracy on test dataset
print("Final test accuracy:", test_accuracy)


Final training accuracy: 0.87148
Final test accuracy: 0.5931


In [64]:
import torch
from torchvision import transforms
from PIL import Image


image_path = "dog.jpeg"


transform = transforms.Compose([
    transforms.Resize((96, 96)),  
    transforms.ToTensor(),         
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  
])


image = Image.open(image_path)


input_tensor = transform(image).unsqueeze(0)  # Add batch dimension


with torch.no_grad():
    output = model(input_tensor)


probabilities = torch.softmax(output, dim=1)
predicted_class = torch.argmax(probabilities, dim=1).item()


print("Predicted class:", predicted_class)

Predicted class: 3
