#Necessary imports

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
!pip install wandb -Uq

[0m

In [3]:
import wandb

wandb.login()

[34m[1mwandb[0m: Currently logged in as: [33mivanova3581321[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

In [4]:
import warnings
warnings.filterwarnings("ignore")

In [5]:
!pip install -q lightly

[0m

In [6]:
N_EPOCHS = 20

In [7]:
import numpy as np
import pandas as pd
import copy
import torch
from torch import nn
import matplotlib.pyplot as plt
from PIL import Image

from tqdm import tqdm

import torch
import torchvision

from torch import nn
from torchvision import transforms

from lightly.loss import NTXentLoss
from lightly.models.modules import SimCLRProjectionHead
from lightly.transforms.simclr_transform import SimCLRTransform


from lightly.data import LightlyDataset
from sklearn.neighbors import NearestNeighbors
from sklearn.preprocessing import normalize

In [8]:
from knn import KNN, reproducibility

reproducibility(42)

#Define model

In [9]:
class SimCLR(nn.Module):
    def __init__(self, backbone):
        super().__init__()
        self.backbone = backbone
        self.projection_head = SimCLRProjectionHead(512, 512, 128)

    def forward(self, x):
        x = self.backbone(x).flatten(start_dim=1)
        z = self.projection_head(x)
        return z

In [10]:
resnet = torchvision.models.resnet18()
backbone = nn.Sequential(*list(resnet.children())[:-1])
model = SimCLR(backbone)

device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)
print()




#Data (TinyImageNet)

In [11]:
def get_input_stats():
    data_mean = (0.4802, 0.4481, 0.3975)
    data_std = (0.2770, 0.2691, 0.2821)

    return data_mean, data_std

In [12]:
from torchvision import transforms as T
from torchvision import datasets

test_transform = T.Compose([
            T.ToTensor(),
            T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ])

tin_data_val = datasets.ImageFolder(root='datasets/tiny-imagenet-200/val',
                                    transform=test_transform)
tin_loader_val = torch.utils.data.DataLoader(tin_data_val,batch_size=32, drop_last=True)
tin_data_train = datasets.ImageFolder(root='datasets/tiny-imagenet-200/train',
                                    transform=test_transform)
tin_loader_train = torch.utils.data.DataLoader(tin_data_train,batch_size=32, drop_last=True)

In [13]:
transform = SimCLRTransform(input_size=32, gaussian_blur=0.0)
# transform = transforms.ToTensor()
data_mean, data_std = get_input_stats()
normalize_dict = {'mean': data_mean, 'std': data_std}

dataset = LightlyDataset.from_torch_dataset(tin_data_train, transform=transform)

dataloader = torch.utils.data.DataLoader(
    dataset,
    batch_size=128,
    shuffle=True,
    drop_last=True,
    num_workers=2,
    pin_memory=True
)

#Optimizer and Scheduler

In [14]:
criterion = NTXentLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.06)
# scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer,gamma=0.95)
# scheduler = torch.optim.lr_scheduler.CyclicLR(optimizer,base_lr=0.06, max_lr=0.1)

#KNN and dataloaders for evaluation

In [15]:
from torchvision import transforms as T
from torch.utils.data import DataLoader
from sklearn.model_selection import train_test_split

BATCH_SIZE = 32

test_transform = T.Compose([
            T.ToTensor(),
            T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ])

# def get_loaders( batch_size, transform=test_transform,data_path='./'):
#     dst_train = datasets.ImageFolder(root=os.path.join(data_path, 'tiny-imagenet-200/train'), transform=test_transform)
#     dst_test = datasets.ImageFolder(root=os.path.join(data_path, 'tiny-imagenet-200/test'), transform=test_transform)
#     train_loader = DataLoader(dataset=dst_train, batch_size=batch_size, num_workers=1, drop_last=True)
#     test_loader = DataLoader(dataset=dst_test, batch_size=batch_size, num_workers=1, drop_last=True)
#     return train_loader, test_loader

# train_loader, test_loader = get_loaders(batch_size=BATCH_SIZE)

#Make directory for saving models

In [16]:
config = dict(
    epochs=20,
    classes=200,
    learning_rate=0.006,
    dataset="TinyImageNet",
    model="SimCLR")

In [17]:
wandb.init(project="SCLR_TIN_20epochs", config=config)

In [18]:
tin_data_val = datasets.ImageFolder(root='datasets/tiny-imagenet-200/val',
                                    transform=test_transform)
tin_loader_val = torch.utils.data.DataLoader(tin_data_val,batch_size=32, drop_last=True)
tin_data_train = datasets.ImageFolder(root='datasets/tiny-imagenet-200/train',
                                    transform=test_transform)
tin_loader_train = torch.utils.data.DataLoader(tin_data_train,batch_size=32, drop_last=True)
ssl_evaluator = KNN(model=model, k=1, device='cuda')
train_acc, val_acc = ssl_evaluator.fit(tin_loader_train, tin_loader_val)
print(train_acc, val_acc)

Evaluate on train data...
Evaluate on test data...
2.311 1.6225891944128619


In [None]:
model.to(device)
optimizer = torch.optim.SGD(model.parameters(), lr=0.06)
scheduler = torch.optim.lr_scheduler.CyclicLR(optimizer,base_lr=0.06, max_lr=0.1)

best_val_acc = 1.68
print("Starting Training")
for epoch in tqdm(range(N_EPOCHS)):
    total_loss = 0
    for index,batch in enumerate(dataloader):
        x0, x1 = batch[0]
        x0 = x0.to(device)
        x1 = x1.to(device)
        z0 = model(x0)
        z1 = model(x1)
        loss = criterion(z0, z1)
        total_loss += loss.detach()
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        scheduler.step()
        # scheduler.step()
    avg_loss = total_loss / len(dataloader)
    print(f"\n epoch: {epoch:>02}, loss: {avg_loss:.5f}")
    wandb.log({'epoch': epoch, 'loss': avg_loss})
    # if epoch % 10 == 0:
    #     ssl_evaluator = KNN(model=model, k=1, device='cuda')
    #     train_acc, _ = ssl_evaluator.fit(tin_loader_train, tin_loader_val)
    #     if train_acc > best_val_acc:
    #         torch.save(model.state_dict(),f'SimCLR_{val_acc:.2f}_TIN.pt')
    #         best_val_acc = train_acc
    #     print(f'\n Train Accuracy:{train_acc:.1f}%')#,f' Val Accuracy:{val_acc:.1f}%')

Starting Training


  5%|▌         | 1/20 [05:36<1:46:31, 336.41s/it]


 epoch: 00, loss: 5.53936


#Load best model we have got

In [None]:
epoch

In [None]:
print('Start evaluating')
ssl_evaluator = KNN(model=model, k=1, device='cuda')
train_acc, val_acc = ssl_evaluator.fit(tin_loader_train, tin_loader_val)
print(train_acc, val_acc)

#Visual evaluation

#Save backbone model

In [None]:
backbone = model.backbone
torch.save(backbone.state_dict(), f'SimCLR_TIN_acc{val_acc:.2f}_epoch{N_EPOCHS}.pt')