In [15]:
import numpy as np
import torch

from torch import nn
from cifar10_utils import get_cifar10, get_dataloader
from tqdm import tqdm
from torch.utils.tensorboard import SummaryWriter

# Setup

In [50]:
# Seed for reproduceability
seed = 42
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
torch.backends.cudnn.determinstic = True
torch.backends.cudnn.benchmark = False

# Setup device-agnostic code
device = 'cuda' if torch.cuda.is_available() else 'cpu'

# Tensorboard
%load_ext tensorboard

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


## Backbone Architecture

We will use ResNet50 as our backbone architecture. The code is taken from [this GitHub repository by Maciej Balawejder](https://github.com/maciejbalawejder/Deep-Learning-Collection/tree/main/ConvNets/ResNet).

In [9]:
# ConvBlock
class ConvBlock(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, stride, padding):
        super().__init__()
        self.c = nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride, padding=padding)
        self.bn = nn.BatchNorm2d(out_channels)
    
    def forward(self, x):
        return self.bn(self.c(x))

# Bottleneck ResidualBlock 
class ResidualBlock(nn.Module):
    def __init__(self, in_channels, out_channels, first=False):
        super().__init__()
        res_channels = in_channels // 4
        stride = 1

        self.projection = in_channels!=out_channels
        if self.projection:
            self.p = ConvBlock(in_channels, out_channels, 1, 2, 0)
            stride = 2
            res_channels = in_channels // 2

        if first:
            self.p = ConvBlock(in_channels, out_channels, 1, 1, 0)
            stride = 1
            res_channels = in_channels


        self.c1 = ConvBlock(in_channels, res_channels, 1, 1, 0) 
        self.c2 = ConvBlock(res_channels, res_channels, 3, stride, 1)
        self.c3 = ConvBlock(res_channels, out_channels, 1, 1, 0)
        self.relu = nn.ReLU()

    def forward(self, x):
        f = self.relu(self.c1(x))
        f = self.relu(self.c2(f))
        f = self.c3(f)

        if self.projection:
            x = self.p(x)

        h = self.relu(torch.add(f, x))
        return h

# ResNetx
class ResNet(nn.Module):
    def __init__(
        self, 
        config_name : int, 
        in_channels=3, 
        classes=10
        ):
        super().__init__()

        configurations = {
            50 : [3, 4, 6, 3],
            101 : [3, 4, 23, 3],
            152 : [3, 8, 36, 3]
        }

        no_blocks = configurations[config_name]

        out_features = [256, 512, 1024, 2048]
        self.blocks = nn.ModuleList([ResidualBlock(64, 256, True)])

        for i in range(len(out_features)):
            if i > 0:
                self.blocks.append(ResidualBlock(out_features[i-1], out_features[i]))
            for _ in range(no_blocks[i]-1):
                self.blocks.append(ResidualBlock(out_features[i], out_features[i]))
        
        self.conv1 = ConvBlock(in_channels, 64, 7, 2, 3)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.avgpool = nn.AdaptiveAvgPool2d((1,1))
        self.fc = nn.Linear(2048, classes)

        self.relu = nn.ReLU()

        self.init_weight()

    def forward(self, x):
        x = self.relu(self.conv1(x))
        x = self.maxpool(x)
        for block in self.blocks:
            x = block(x)

        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.fc(x)
        return x

    def init_weight(self):
        for layer in self.modules():
            if isinstance(layer, nn.Conv2d) or isinstance(layer, nn.Linear):
                nn.init.kaiming_normal_(layer.weight)

In [5]:
# Loading the dataset
cifar10 = get_cifar10()
cifar10_loader = get_dataloader(cifar10, batch_size=128,return_numpy=False)

Files already downloaded and verified
Files already downloaded and verified


In [26]:
def evaluate_model(model, data_loader):
    """
    Performs the evaluation of the MLP model on a given dataset.

    Args:
      model: An instance of 'MLP', the model to evaluate.
      data_loader: The data loader of the dataset to evaluate.
    Returns:
        accuracy
    """
    accuracies_per_batch = []
    
    # Get validation accuracy for epoch
    for batch in data_loader:
        
        # Get validation images and labels
        X = batch[0].to(device)
        y = batch[1].to(device)
        
        # Get predictions on validation set
        model.eval()
        with torch.no_grad():
            pred = torch.argmax(torch.softmax(model.forward(X), dim=1), axis=1)
        
        # Calculate accuracy := # of correct preds / total # of preds
        current_accuracy = torch.sum(pred == y) / pred.shape[0]
        accuracies_per_batch.append(current_accuracy.item())

    accuracy = np.average(accuracies_per_batch)

    return accuracy

In [36]:
def train(model, writer: SummaryWriter, epochs=10, lr=0.01, momentum=0, verbose=True):
    
    for epoch in tqdm(range(epochs)):
        
        model.train()
        
        # Loss module and optimizer
        loss_module = nn.CrossEntropyLoss()
        optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=momentum)
        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer)
         
        for i, batch in enumerate(cifar10_loader['train']):
            
            # Get training images and labels
            X_train = batch[0].to(device)
            y_train = batch[1].to(device)
            
            # Forward pass
            train_pred = model.forward(X_train)
            
            # Calculate loss
            loss = loss_module(train_pred, y_train)
            writer.add_scalar('Loss/train', loss.item(), i)
            
            # Zero gradients
            optimizer.zero_grad()
            
            # Backward pass
            loss.backward()
            
            # Update parameters
            optimizer.step()
        
        # Log current LR
        writer.add_scalar('lr', optimizer.param_groups[0]['lr'], epoch)
        
        # Update LR
        scheduler.step(loss.item())
        
        # Get accuracy on test set
        test_accuracy = evaluate_model(model, cifar10_loader['test'])
        writer.add_scalar('Accuracy/test', test_accuracy, epoch)
        
        # Get accuracy on training set as well
        train_accuracy = evaluate_model(model, cifar10_loader['train'])
        writer.add_scalar('Accuracy/train', train_accuracy, epoch)
        
        
        if verbose:
            print(f'\n{epoch = }, '
                  f'{test_accuracy = }, '
                  f'{train_accuracy = }, '
                  f'{loss.item() = }')

# Vanilla ResNet

In [37]:
resnet50 = ResNet(50).to(device)
resnet50_writer = SummaryWriter('logs/resnet/')
train(resnet50, resnet50_writer)


 10%|█         | 1/10 [00:53<08:01, 53.49s/it]


epoch = 0, test_accuracy = 0.31269778481012656, train_accuracy = 0.3468883547008547, loss.item() = 2.26366925239563


 20%|██        | 2/10 [01:47<07:08, 53.59s/it]


epoch = 1, test_accuracy = 0.39566851265822783, train_accuracy = 0.458377849002849, loss.item() = 1.8115355968475342


 30%|███       | 3/10 [02:40<06:14, 53.56s/it]


epoch = 2, test_accuracy = 0.450751582278481, train_accuracy = 0.5289797008547008, loss.item() = 1.5195506811141968


 40%|████      | 4/10 [03:33<05:20, 53.42s/it]


epoch = 3, test_accuracy = 0.4830893987341772, train_accuracy = 0.5806623931623932, loss.item() = 1.5158103704452515


 50%|█████     | 5/10 [04:27<04:27, 53.41s/it]


epoch = 4, test_accuracy = 0.4777492088607595, train_accuracy = 0.6142717236467237, loss.item() = 1.7899606227874756


 60%|██████    | 6/10 [05:20<03:33, 53.46s/it]


epoch = 5, test_accuracy = 0.5014833860759493, train_accuracy = 0.6810452279202279, loss.item() = 1.3362033367156982


 70%|███████   | 7/10 [06:13<02:39, 53.32s/it]


epoch = 6, test_accuracy = 0.49268196202531644, train_accuracy = 0.6946892806267806, loss.item() = 1.2141482830047607


 80%|████████  | 8/10 [07:07<01:46, 53.34s/it]


epoch = 7, test_accuracy = 0.5038568037974683, train_accuracy = 0.7579460470085471, loss.item() = 0.932548999786377


 90%|█████████ | 9/10 [08:00<00:53, 53.40s/it]


epoch = 8, test_accuracy = 0.5134493670886076, train_accuracy = 0.7938479344729344, loss.item() = 0.8724949955940247


100%|██████████| 10/10 [08:54<00:00, 53.47s/it]


epoch = 9, test_accuracy = 0.5058346518987342, train_accuracy = 0.8198228276353277, loss.item() = 0.8018503785133362





In [58]:
# %reload_ext tensorboard
!lsof -i:6006


/bin/bash: line 1: lsof: command not found


In [59]:
%tensorboard --logdir '/kaggle/working/logs/resnet' --port=8008

In [44]:
%mkdir -p models
torch.save(resnet50, './models/resnet50.pt')