<a href="https://colab.research.google.com/github/sthalles/SimCLR/blob/simclr-refactor/feature_eval/mini_batch_logistic_regression_evaluator.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch
import sys
import numpy as np
import os
import yaml
import matplotlib.pyplot as plt
import torchvision
import pandas as pd

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def get_file_id_by_model(folder_name):
  file_id = {'resnet18_100-epochs_stl10': '14_nH2FkyKbt61cieQDiSbBVNP8-gtwgF',
             'resnet18_100-epochs_cifar10': '1lc2aoVtrAetGn0PnTkOyFzPCIucOJq7C',
             'resnet50_50-epochs_stl10': '1ByTKAUsdm_X7tLcii6oAEl5qFRqRMZSu'}
  return file_id.get(folder_name, "Model not found.")

In [3]:
folder_name = 'resnet50_50-epochs_stl10'
file_id = get_file_id_by_model(folder_name)
print(folder_name, file_id)

resnet50_50-epochs_stl10 1ByTKAUsdm_X7tLcii6oAEl5qFRqRMZSu


In [4]:
from torch.utils.data import DataLoader
import torchvision.transforms as transforms
from torchvision import datasets

In [5]:
dataset_list = ['stl10','cifar10']
dataset = dataset_list[1]
models = ['rgb3000im_50ep','deptheuclidean3000im_50ep', 'brightness3000im_50ep', 'lowluminosity3000im_50ep', 'lowfov3000im_50ep', 'highfov3000im_50ep', 'highluminosity3000im_50ep']
model = models[6]
epochs = 100

if model == 'rgb3000im_50ep':
  model_weights_dest = './rgb/checkpoints/epoch=49-step=4800.ckpt'
elif model== 'deptheuclidean3000im_50ep':
  model_weights_dest = './depth/checkpoints/epoch=49-step=4800.ckpt'
elif model == 'brightness3000im_50ep':
  model_weights_dest = './brightened/checkpoints/epoch=49-step=4500.ckpt'
elif model == 'lowluminosity3000im_50ep':
  model_weights_dest = './low_luminosity/checkpoints/epoch=49-step=4500.ckpt'
elif model == 'lowfov3000im_50ep':
  model_weights_dest = './low_fov/checkpoints/epoch=49-step=3700.ckpt'
elif model == 'highfov3000im_50ep':
  model_weights_dest = './high_fov/checkpoints/epoch=49-step=5200.ckpt'
elif model == 'highluminosity3000im_50ep':
  model_weights_dest = './high_luminosity/checkpoints/epoch=49-step=2400.ckpt'
save_file_name = f'{model}_linear_regression_eval_{dataset}_Finetune{epochs}ep'

In [6]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print("Using device:", device)

Using device: cuda


In [7]:
def get_stl10_data_loaders(download, shuffle=False, batch_size=256):
  train_dataset = datasets.STL10('./data', split='train', download=download,
                                  transform=transforms.ToTensor())

  train_loader = DataLoader(train_dataset, batch_size=batch_size,
                            num_workers=0, drop_last=False, shuffle=shuffle)
  
  test_dataset = datasets.STL10('./data', split='test', download=download,
                                  transform=transforms.ToTensor())

  test_loader = DataLoader(test_dataset, batch_size=2*batch_size,
                            num_workers=8, drop_last=False, shuffle=shuffle)
  return train_loader, test_loader

def get_cifar10_data_loaders(download, shuffle=False, batch_size=256):
  train_dataset = datasets.CIFAR10('./data', train=True, download=download,
                                  transform=transforms.ToTensor())

  train_loader = DataLoader(train_dataset, batch_size=batch_size,
                            num_workers=0, drop_last=False, shuffle=shuffle)
  
  test_dataset = datasets.CIFAR10('./data', train=False, download=download,
                                  transform=transforms.ToTensor())

  test_loader = DataLoader(test_dataset, batch_size=2*batch_size,
                            num_workers=8, drop_last=False, shuffle=shuffle)
  return train_loader, test_loader

In [8]:
model = torchvision.models.resnet50(pretrained=False, num_classes=10).to(device)



In [9]:
MODEL_PATH = model_weights_dest
checkpoint = torch.load(MODEL_PATH, map_location=device)
state_dict = checkpoint['state_dict']

for k in list(state_dict.keys()):

  if k.startswith('backbone.'):
    if k.startswith('backbone') and not k.startswith('backbone.fc'):
      # remove prefix
      state_dict[k[len("backbone."):]] = state_dict[k]
  del state_dict[k]

  warn_missing_pkg("wandb")
  warn_missing_pkg("sklearn", pypi_name="scikit-learn")
  "lr_options": generate_power_seq(LEARNING_RATE_CIFAR, 11),
  contrastive_task: Union[FeatureMapContrastiveTask] = FeatureMapContrastiveTask("01, 02, 11"),
  self.nce_loss = AmdimNCELoss(tclip)
  warn_missing_pkg("gym")
  warn_missing_pkg("sklearn")


In [10]:
if dataset == 'cifar10':
  train_loader, test_loader = get_cifar10_data_loaders(download=True)
elif dataset == 'stl10':
  train_loader, test_loader = get_stl10_data_loaders(download=True)

Files already downloaded and verified
Files already downloaded and verified


In [11]:
# freeze all layers but the last fc
for name, param in model.named_parameters():
    if name not in ['fc.weight', 'fc.bias']:
        param.requires_grad = False

parameters = list(filter(lambda p: p.requires_grad, model.parameters()))
assert len(parameters) == 2  # fc.weight, fc.bias

In [12]:
optimizer = torch.optim.Adam(model.parameters(), lr=3e-4, weight_decay=0.0008)
criterion = torch.nn.CrossEntropyLoss().to(device)

In [13]:
def accuracy(output, target, topk=(1,)):
    """Computes the accuracy over the k top predictions for the specified values of k"""
    with torch.no_grad():
        maxk = max(topk)
        batch_size = target.size(0)

        _, pred = output.topk(maxk, 1, True, True)
        pred = pred.t()
        correct = pred.eq(target.view(1, -1).expand_as(pred))

        res = []
        for k in topk:
            correct_k = correct[:k].reshape(-1).float().sum(0, keepdim=True)
            res.append(correct_k.mul_(100.0 / batch_size))
        return res

In [14]:
from tqdm import tqdm

In [15]:
data ={'Epoch':[],'Top1 Train Accuracy':[], 'Top1 Test Accuracy':[], 'Top5 Test Accuracy':[]}
desc = ""
for epoch in tqdm(range(epochs)):
  top1_train_accuracy = 0
  for counter, (x_batch, y_batch) in enumerate(train_loader):
    x_batch = x_batch.to(device)
    y_batch = y_batch.to(device)

    logits = model(x_batch)
    loss = criterion(logits, y_batch)
    top1 = accuracy(logits, y_batch, topk=(1,))
    top1_train_accuracy += top1[0]

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

  top1_train_accuracy /= (counter + 1)
  top1_accuracy = 0
  top5_accuracy = 0
  
  for counter, (x_batch, y_batch) in enumerate(test_loader):
    x_batch = x_batch.to(device)
    y_batch = y_batch.to(device)

    logits = model(x_batch)
  
    top1, top5 = accuracy(logits, y_batch, topk=(1,5))
    top1_accuracy += top1[0]
    top5_accuracy += top5[0]
  
  top1_accuracy /= (counter + 1)
  top5_accuracy /= (counter + 1)
  desc = (f"Epoch {epoch}\tTop1 Train accuracy {top1_train_accuracy.item()}\tTop1 Test accuracy: {top1_accuracy.item()}\tTop5 test acc: {top5_accuracy.item()}")
  epoch_data = {'Epoch':epoch,'Top1 Train Accuracy':top1_train_accuracy.item(), 'Top1 Test Accuracy':top1_accuracy.item(), 'Top5 Test Accuracy':top5_accuracy.item()}
  for k, v in epoch_data.items():
        data[k].append(v)
df_testdata = pd.DataFrame(data)
df_testdata.to_csv(f'{save_file_name}.csv')
    

100%|█████████████████████████████████████████████████████████████████████████████████| 100/100 [32:17<00:00, 19.38s/it]


In [16]:
df_testdata

Unnamed: 0,Epoch,Top1 Train Accuracy,Top1 Test Accuracy,Top5 Test Accuracy
0,0,14.565130,16.673368,65.678429
1,1,19.707829,18.558134,67.876266
2,2,22.202248,19.461742,68.802849
3,3,23.938137,19.946577,69.240005
4,4,25.161032,20.266544,69.811577
...,...,...,...,...
95,95,31.180244,19.635225,69.316978
96,96,31.168287,19.635225,69.297447
97,97,31.176258,19.644991,69.307213
98,98,31.178251,19.635225,69.297447
