In [None]:
import time
import typing

import tqdm
import torch
import wandb
import random
import pandas
import torchscan
import torchvision
import sklearn.metrics
import matplotlib.pyplot as plt
import torch.utils.data as torchdata

device = torch.device(
    "cuda" if torch.cuda.is_available()
    else "mps" if torch.backends.mps.is_available()
    else "cpu"
)
print(device)

RANDOM_STATE = 42
def set_random_seed(seed):
    random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
def fix_random():
    return set_random_seed(RANDOM_STATE)
fix_random()

In [None]:
wandb.login(anonymous = "allow")

# 1. Train the CNN based classifier

## Load the dataset


In [None]:
fix_random()

# Transforms
transforms = torchvision.transforms.Compose([
    torchvision.transforms.ToTensor(),
    torchvision.transforms.Normalize(
        mean = torch.tensor([ 0.491, 0.482, 0.447 ]),
        std = torch.tensor([ 0.247, 0.244, 0.262 ])
    )
])

# Load dataset
train_set = torchvision.datasets.CIFAR10('cifar-10', train = True, download = True, transform = transforms)
test_set = torchvision.datasets.CIFAR10('cifar-10', train = False, download = True, transform = transforms)

# Split train dataset into train and val
train_set, val_set = torchdata.random_split(train_set, [ len(train_set) - 5000, 5000 ])

# Extract labels
labels = test_set.classes

# Check
print(len(train_set), len(val_set), len(test_set))
print(labels)

In [None]:
# Denormalization
denormalize = torchvision.transforms.Compose([
    torchvision.transforms.Normalize(
        mean = [ 0., 0., 0. ],
        std = 1 / transforms.transforms[1].std
    ),
    torchvision.transforms.Normalize(
        mean = -transforms.transforms[1].mean,
        std = [ 1., 1., 1. ]
    )
])

# Display some samples
plt.rcParams["figure.figsize"] = (15, 5)
for i in range(10):
    image, class_num = train_set[i]
    plt.subplot(3, 10, i + 1)
    plt.axis('off')
    plt.imshow(denormalize(image).permute(1, 2, 0))
    plt.title('{} ({})'.format(labels[class_num], str(class_num)))

for i in range(10):
    image, class_num = val_set[i]
    plt.subplot(3, 10, i + 11)
    plt.axis('off')
    plt.imshow(denormalize(image).permute(1, 2, 0))
    plt.title('{} ({})'.format(labels[class_num], str(class_num)))

for i in range(10):
    image, class_num = test_set[i]
    plt.subplot(3, 10, i + 21)
    plt.axis('off')
    plt.imshow(denormalize(image).permute(1, 2, 0))
    plt.title('{} ({})'.format(labels[class_num], str(class_num)))

## Function for accuracy checking

In [None]:
def calc_metrics(model, dataset = val_set) -> dict:
    all_true = torch.tensor([])
    all_scores = torch.empty((0, 10))
    all_predictions = torch.tensor([])
    loader = torchdata.DataLoader(dataset, batch_size = 512, shuffle = False)
    for images, labels in loader:
        all_true = torch.cat([ all_true, labels ])
        predictions, scores = model.predict(images)
        all_scores = torch.cat([ all_scores, scores.detach().cpu() ])
        all_predictions = torch.cat([ all_predictions, predictions.detach().cpu() ])

    return {
        'Accuracy':       sklearn.metrics.accuracy_score      (all_true, all_predictions),
        'TOP-2 Accuracy': sklearn.metrics.top_k_accuracy_score(all_true, all_scores, k = 2),
        'TOP-3 Accuracy': sklearn.metrics.top_k_accuracy_score(all_true, all_scores, k = 3),
        'TOP-4 Accuracy': sklearn.metrics.top_k_accuracy_score(all_true, all_scores, k = 4),
        'TOP-5 Accuracy': sklearn.metrics.top_k_accuracy_score(all_true, all_scores, k = 5),
        'TOP-6 Accuracy': sklearn.metrics.top_k_accuracy_score(all_true, all_scores, k = 6),
        'TOP-7 Accuracy': sklearn.metrics.top_k_accuracy_score(all_true, all_scores, k = 7),
        'TOP-8 Accuracy': sklearn.metrics.top_k_accuracy_score(all_true, all_scores, k = 8),
        'TOP-9 Accuracy': sklearn.metrics.top_k_accuracy_score(all_true, all_scores, k = 9),
        'AUC-ROC':        sklearn.metrics.roc_auc_score       (all_true, all_scores, multi_class = 'ovo'),
        'Precision':      sklearn.metrics.precision_score     (all_true, all_predictions, average = 'macro'),
        'Recall':         sklearn.metrics.recall_score        (all_true, all_predictions, average = 'macro'),
        'F1-score':       sklearn.metrics.f1_score            (all_true, all_predictions, average = 'macro')
    }

## Implement CNN class for CIFAR10

**In constructor**

Define 2 - 3 convolutional layers

 https://pytorch.org/docs/stable/generated/torch.nn.Conv2d.html

with corresponding in/out dimensions W_out = 1 + ((W_in - F + 2*P) / S)


Also define max pooling : https://pytorch.org/docs/stable/generated/torch.nn.MaxPool2d.html

and fully connected layers: https://pytorch.org/docs/stable/generated/torch.nn.Linear.html#torch.nn.Linear


**In forward**

Write code for forward pass.
Remember that first dimension is the batch dimension

In [None]:
class TorchModel:
    def __init__(
            self,
            name: str,
            model: torch.nn.Module,
            batch_size: int = 256,
            device: torch.device = device,
            metrics: typing.Callable = calc_metrics,
            optimizer: typing.Optional[torch.optim.Optimizer] = None,
            scheduler: typing.Optional[torch.optim.lr_scheduler.LRScheduler] = None,
        ):
        self.name = name
        self.history = [ ]
        self.device = device
        self.metrics = metrics
        self.scheduler = scheduler
        self.batch_size = batch_size
        self.model = model.to(self.device)
        self.optimizer = optimizer or torch.optim.AdamW(model.parameters())

    def train(self, images: torch.Tensor, labels: torch.Tensor) -> float:
        self.model.train() # Enter train mode
        self.optimizer.zero_grad() # Zero gradients
        output = self.model(images.to(self.device)) # Get predictions
        loss = torch.nn.functional.cross_entropy(output, labels.to(self.device)) # Calculate loss
        loss.backward() # Calculate gradients
        self.optimizer.step() # Update weights
        return loss.item()

    def train_epoch(self, loader: torchdata.DataLoader) -> float:
        sum_loss = 0
        for images, labels in loader:
            sum_loss += self.train(images, labels)
        if self.scheduler is not None:
            self.scheduler.step()
        return sum_loss / len(loader)
       
    def fit(self, dataset: torchdata.Dataset, n_epochs: int = 1):
        loader = torchdata.DataLoader(dataset, batch_size = self.batch_size, shuffle = True)
        wandb.init(project = "CV-HW-4", name = self.name, anonymous = "allow")
        wandb.watch(self.model, log = "all")
        for epoch in tqdm.trange(n_epochs):
            train_start = time.perf_counter()
            loss = self.train_epoch(loader)
            train_time = time.perf_counter() - train_start
            metrics = { 'Loss': loss, 'Train time': train_time }

            val_start = time.perf_counter()
            metrics.update(self.metrics(self))
            val_time = time.perf_counter() - val_start
            metrics['Validation time'] = val_time
            
            wandb.log(metrics)
            self.history.append(metrics)

        wandb.finish()
        return self
    
    def predict(self, images: torch.Tensor) -> typing.Tuple[torch.Tensor, torch.Tensor]:
        self.model.eval() # Enter evaluation mode
        with torch.no_grad():
            outputs = self.model(images.to(self.device))
            scores = torch.softmax(outputs, dim = 1)
            predictions = torch.argmax(scores, dim = 1)
        return predictions, scores

**In constructor**

Define 2 - 3 convolutional layers

 https://pytorch.org/docs/stable/generated/torch.nn.Conv2d.html

with corresponding in/out dimensions W_out = 1 + ((W_in - F + 2*P) / S)


Also define max pooling : https://pytorch.org/docs/stable/generated/torch.nn.MaxPool2d.html

and fully connected layers: https://pytorch.org/docs/stable/generated/torch.nn.Linear.html#torch.nn.Linear


**In forward**

Write code for forward pass.
Remember that first dimension is the batch dimension

In [None]:
from torch_receptive_field import receptive_field

image = train_set[0][0].to(device).unsqueeze(0)
print(image.shape)

model = torch.nn.Sequential(
    torch.nn.Conv2d(in_channels = 3, out_channels = 16, kernel_size = 7, padding = 1),
    torch.nn.BatchNorm2d(16),
    torch.nn.MaxPool2d(2, 2),
    torch.nn.ReLU(),
    
    torch.nn.Conv2d(in_channels = 16, out_channels = 32, kernel_size = 5, padding = 1),
    torch.nn.BatchNorm2d(32),
    torch.nn.MaxPool2d(2, 2),
    torch.nn.ReLU(),
    
    torch.nn.Conv2d(in_channels = 32, out_channels = 32, kernel_size = 3, padding = 1),
    torch.nn.BatchNorm2d(32),
    torch.nn.ReLU(),
).to(device)
print(model(image).shape)
res = receptive_field(model, image.squeeze(0).shape)

In [None]:
fix_random()
model = torch.nn.Sequential(
    torch.nn.Conv2d(in_channels = 3, out_channels = 16, kernel_size = 7, padding = 1),
    torch.nn.BatchNorm2d(16),
    torch.nn.MaxPool2d(2, 2),
    torch.nn.ReLU(),
    
    torch.nn.Conv2d(in_channels = 16, out_channels = 32, kernel_size = 5, padding = 1),
    torch.nn.BatchNorm2d(32),
    torch.nn.MaxPool2d(2, 2),
    torch.nn.ReLU(),
    
    torch.nn.Conv2d(in_channels = 32, out_channels = 32, kernel_size = 3, padding = 1),
    torch.nn.BatchNorm2d(32),
    torch.nn.ReLU(),
    
    torch.nn.Flatten(),

    torch.nn.Dropout(0.5),
    torch.nn.Linear(32 * 6 * 6, 250),
    torch.nn.BatchNorm1d(250),
    torch.nn.ReLU(),
    
    torch.nn.Dropout(0.2),
    torch.nn.Linear(250, 64),
    torch.nn.BatchNorm1d(64),
    torch.nn.ReLU(),
    
    torch.nn.Linear(64, 10),
).to(device)
optimizer = torch.optim.AdamW(model.parameters(), lr = 1e-4)
model = TorchModel(name = 'test', model = model, optimizer = optimizer).fit(train_set, 100)
calc_metrics(model, test_set)

In [None]:
import torch
import torchscan
import torch_receptive_field

model = torch.nn.Sequential(
    torch.nn.Conv2d(in_channels = 3, out_channels = 16, kernel_size = 7, padding = 1),
    torch.nn.BatchNorm2d(16),
    torch.nn.MaxPool2d(2, 2),
    torch.nn.ReLU(),
    
    torch.nn.Conv2d(in_channels = 16, out_channels = 32, kernel_size = 5, padding = 1),
    torch.nn.BatchNorm2d(32),
    torch.nn.MaxPool2d(2, 2),
    torch.nn.ReLU(),
    
    torch.nn.Conv2d(in_channels = 32, out_channels = 32, kernel_size = 3, padding = 1),
    torch.nn.BatchNorm2d(32),
    torch.nn.ReLU(),
    
    torch.nn.Flatten(),

    torch.nn.Dropout(0.5),
    torch.nn.Linear(32 * 6 * 6, 250),
    torch.nn.BatchNorm1d(250),
    torch.nn.ReLU(),
    
    torch.nn.Dropout(0.2),
    torch.nn.Linear(250, 64),
    torch.nn.BatchNorm1d(64),
    torch.nn.ReLU(),
    
    torch.nn.Linear(64, 10),
).to(device)
torchscan.summary(model.eval().cuda(), (3, 32, 32), receptive_field = True)

## Train the model




### Validat results on test dataset

You must get accuracy above 0.65

In [None]:
calc_metrics(model, test_set)

# 2. Compare different Normalization methods

* Add extra conv layer to your model (3-7)
* Take three different normalization layers: BatchNorm, InstanceNorm, LayerNorm
* Train the model with each of them.
* Plot the loss curve for different normalization in same axis


*Because this task is time consuming it is recommended to perform calculations on a small piece of datastat*

In [None]:
# Put your code here ...

# Place for brief conclusion:

....


# Ideas for extra work

---
1. Evaluate the impact of the number and size of filters in convolutional layers on the accuracy.

2. Evaluate the impact of the convolutional layers count on the accuracy.

3. Visualize something: filters, activations ...

---







