# Lab6 CNN Architectures and Transfer Learning

Mount google drive onto virtual machine

In [0]:
from google.colab import drive
drive.mount('/content/gdrive')

Change current directory to Lab 6 

In [0]:
cd "gdrive/My Drive/UCCD3074_Lab6"

Load required libraries

In [0]:
%load_ext autoreload
%autoreload 2

import numpy as np
import torchvision.models as models

import torch, torchvision
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader
import torchvision.transforms as transforms
from torchsummary import summary

from cifar10 import CIFAR10

---
## Helper Functions

Define the train function

In [0]:
loss_iter = 1

def train(net, num_epochs, lr=0.1, momentum=0.9, verbose=True):
    
    history = []
    
    loss_iterations = int(np.ceil(len(trainloader)/loss_iter))
    
    # transfer model to GPU
    if use_gpu and torch.cuda.is_available():
        net = net.cuda()
    
    # set the optimizer
    optimizer = optim.SGD(net.parameters(), lr=lr, momentum=momentum)
    
    # set to training mode
    net.train()

    # train the network
    for e in range(num_epochs):    

        running_loss = 0.0
        running_count = 0.0

        for i, (inputs, labels) in enumerate(trainloader):

            # Clear all the gradient to 0
            optimizer.zero_grad()

            # transfer data to GPU
            if torch.cuda.is_available():
                inputs = inputs.cuda()
                labels = labels.cuda()

            # forward propagation to get h
            outs = net(inputs)

            # compute loss 
            loss = F.cross_entropy(outs, labels)

            # backpropagation to get dw
            loss.backward()

            # update w
            optimizer.step()

            # get the loss
            running_loss += loss.item()
            running_count += 1

             # display the averaged loss value 
            if i % loss_iterations == loss_iterations-1 or i == len(trainloader) - 1:                
                train_loss = running_loss / running_count
                running_loss = 0. 
                running_count = 0.
                if verbose:
                    print(f'[Epoch {e+1:2d}/{num_epochs:d} Iter {i+1:5d}/{len(trainloader)}]: train_loss = {train_loss:.4f}')       
                
                history.append(train_loss)
    
    return history

Define the evaluate function

In [0]:
def evaluate(net):
    # set to evaluation mode
    net.eval()
    
    # running_correct
    running_corrects = 0
    
    for inputs, targets in testloader:
        
        # transfer to the GPU
        if torch.cuda.is_available():
            inputs = inputs.cuda()
            targets = targets.cuda()
        
        # perform prediction (no need to compute gradient)
        with torch.no_grad():
            outputs = net(inputs)
            _, predicted = torch.max(outputs, 1)
            running_corrects += (targets == predicted).double().sum()
            
    print('Accuracy = {:.2f}%'.format(100*running_corrects/len(testloader.dataset)))

## 1. Load CIFAR10 dataset


## 2. The ResNet18 model


### Network Architecture of ResNet18


### Customizing ResNet18


---
### Model 1: Training from scratch

Build the network **without** loading the pretrained model

Train the model

Evaluate the model

---
### Model 2: Finetuning the pretrained model

Load the model 

Train the model

Evaluate the network

---
### Model 3: As a fixed feature extractor

Load the model

By default, all the layers are set to `requires_grad=True`

We set `requires_grad=False` for all parameters except for the newly replaced layer `fc`, i.e., the last two parameters in `resnet.parameters()`.

Train the model

Evaluate the model

---
### Model 4: Finetuning the top few layers

We can also tune the top few layers of the network. The following tunes all the layers in the block `layer 4` as well as the `fc` layer.


Then, we freeze all tha layers except for `layer4` and `fc` layers

Train the model

Evaluate the model

### Plotting training loss

Lastly, we plot the training loss history for each of the training schemes above.

## Conclusion

You can try with different network architecture and compare their performances