In [1]:
#model.py


import torch
import torch.nn as nn


class ResidualBlock(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, stride):
        super().__init__()
        """
        My custom ResidualBlock

        [input]
        * in_channels  : input channel number
        * out_channels : output channel number
        * kernel_size  : kernel size
        * stride       : stride size

        [hint]
        * See the instruction PDF for details
        * Set the bias argument to False
        """
        
        ## Define all the layers
        # ----- TODO -----

        self.conv2d_1 = nn.Sequential(
                nn.Conv2d(in_channels, out_channels, kernel_size, stride, 1),
                nn.BatchNorm2d(out_channels),
                nn.ReLU()
        )

        self.conv2d_2 = nn.Sequential(
            nn.Conv2d(out_channels, out_channels, kernel_size, 1, 1),
            nn.BatchNorm2d(out_channels)
        )

        self.shortcut = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, 1, stride, 0),
            nn.BatchNorm2d(out_channels)
        )

        self.Relu = nn.ReLU()

        

    def forward(self, x):
       
        # ----- TODO -----
        output = self.conv2d_1(x)
        output1 = self.conv2d_2(output)
        shortcut = self.shortcut(x)
        output2 = shortcut + output1
        output = self.Relu(output2)
        conv_layer = output1
        return output, conv_layer


class MyResnet(nn.Module):
    def __init__(self, in_channels=3, num_classes=10):
        super().__init__()

        """
        My custom ResNet.

        [input]
        * in_channels  : input channel number
        * num_classes  : number of classes

        [hint]
        * See the instruction PDF for details
        * Set the bias argument to False
        """
        
        ## Define all the layers
        # ----- TODO -----

        self.conv_initial = nn.Conv2d(in_channels=in_channels,
                                      out_channels=64,
                                      kernel_size=3,
                                      stride = 1,
                                      padding = 1)
        
        self.batch_norm_initial = nn.BatchNorm2d(num_features=64)
        self.initial_Relu = nn.ReLU()

        self.block1 = ResidualBlock(in_channels=64,
                                    out_channels=128,
                                    kernel_size=3,
                                    stride =2)
        
        self.block2 = ResidualBlock(in_channels=128,
                                    out_channels=256,
                                    kernel_size=3,
                                    stride=2)
        
        self.block3 = ResidualBlock(in_channels=256,
                                    out_channels=512,
                                    kernel_size=3,
                                    stride=2)
                
        self.AvgPool_2d = nn.AvgPool2d(kernel_size=4)

        self.Linear_1d = nn.Linear(512, out_features=num_classes)
        self.Flatten = nn.Flatten()
        self.softmax = nn.Softmax()

        #raise NotImplementedError


    def forward(self, x, return_embed=False):
        """
        Forward path.

        [input]
        * x             : input data
        * return_embed  : whether return the feature map of the last conv layer or not

        [output]
        * output        : output data
        * embedding     : the feature map after the last conv layer (optional)
        
        [hint]
        * See the instruction PDF for network details
        * You want to set return_embed to True if you are dealing with CAM
        """

        # ----- TODO -----
        output = self.conv_initial(x)
        output = self.batch_norm_initial(output)
        output = self.initial_Relu(output)
        output, conv_layer = self.block1(output)
        output, conv_layer = self.block2(output)
        output, conv_layer = self.block3(output)
        output = self.AvgPool_2d(output)
        output = self.Flatten(output)
        #print(output.shape)
        output = self.Linear_1d(output)
        output =  self.softmax(output)
        
        if return_embed == True:
            return output, conv_layer
        else:
            return output


def init_weights_kaiming(m):

    """
    Kaming initialization.

    [input]
    * m : torch.nn.Module

    [hint]
    * Refer to the course slides/recitations for more details
    * Initialize the bias term in linear layer by a small constant, e.g., 0.01
    """

    if isinstance(m, nn.Conv2d):
        # ----- TODO -----
        nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
        if m.bias is not None:
            nn.init.constant_(m.bias, 0)

    elif isinstance(m, nn.Linear):
        # ----- TODO -----
        nn.init.kaiming_normal_(m.weight, mode = 'fan_out', nonlinearity='relu')
        if m.bias is not None:
            nn.init.constant_(m.bias, 0.01)


if __name__ == "__main__":

    # set model
    net = MyResnet(in_channels=3, num_classes=10)
    net.apply(init_weights_kaiming)
    
    # sanity check
    input = torch.randn((64, 3, 32, 32), requires_grad=True)
    output = net(input)
    print(output.shape)

torch.Size([64, 10])


  return self._call_impl(*args, **kwargs)


In [2]:
#train.py


import os
import time
import random
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt

import torch
import torchvision
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms
from torch.utils.data import DataLoader, Dataset
#from model import MyResnet, init_weights_kaiming
import torchvision.transforms as transforms

In [3]:
def setup_seed(seed):
    
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

In [4]:
# set param
setup_seed(18786)
batch_size = 128
num_epoch = 2
lr = 1e-3
DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

## Set model
## Set the device to Cuda if needed
## Initialize all the parameters
# ----- TODO -----
net = MyResnet()


## Create the criterion and optimizer
# ----- TODO -----
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(params = net.parameters(), lr=lr)

## Load dataset
normalize_param = dict(
    mean=[0.485, 0.456, 0.406], 
    std=[0.229, 0.224, 0.225]
    )

train_transform = transforms.Compose([
    transforms.RandomResizedCrop(32, scale=(0.8, 1.0)), 
    transforms.RandomHorizontalFlip(), transforms.ToTensor(), 
    transforms.Normalize(**normalize_param,inplace=True)
    ])

val_transform = transforms.Compose([
    transforms.ToTensor(), 
    transforms.Normalize(**normalize_param,inplace=True)
    ])

# ----- TODO -----
trainset = torchvision.datasets.CIFAR10(root='handout/code/deliverable3-5',
                                        train=True,
                                        transform=train_transform,
                                        download=True)
trainloader = DataLoader(dataset=trainset, 
                         batch_size=batch_size,
                         shuffle=True)
valset = torchvision.datasets.CIFAR10(root='handout/code/deliverable3-5',
                                      train=False,
                                      transform=val_transform,
                                      download=True)
valloader = DataLoader(valset, batch_size, False)

classes = ('plane', 'car', 'bird', 'cat',
        'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

print(f"LOAD DATASET: TRAIN/VAL | {len(trainset)}/{len(valset)}")


net.to(DEVICE)
## Training and evaluation
## Feel free to record the loss and accuracy numbers
## Hint: you could separate the training and evaluation 
## process into 2 different functions for each epoch
for epoch in range(num_epoch): 

    # ----- TODO -----
    total_loss = 0
    correct = 0
    total_vals = 0
    for i, data in enumerate(trainloader):
        inputs, labels = data
        inputs, labels = inputs.to(DEVICE), labels.to(DEVICE)
        optimizer.zero_grad()
        output = net(inputs)
        values, prediction_output = output.max(1)
        loss = criterion(output, labels)
        loss.backward()
        optimizer.step()
        correct_vals = torch.eq(prediction_output, labels)
        correct += torch.sum(correct_vals).item()
        total_vals+=labels.size(0)
        total_loss+=loss.item()

    val_loss = 0
    correct_val = 0
    total_vals_val = 0
    for i, data in enumerate(valloader):
        inputs, labels = data
        inputs, labels = inputs.to(DEVICE), labels.to(DEVICE)
        output = net(inputs)
        vals, preds = output.max(1)
        true = torch.eq(preds, labels)
        correct_val += torch.sum(true).item()
        total_vals_val+=labels.size(0)

    print(f'train accuracy = {correct/total_vals}')
    print(f'val accuracy = {correct_val/total_vals_val}')

print('Finished Training')

Files already downloaded and verified
Files already downloaded and verified
LOAD DATASET: TRAIN/VAL | 50000/10000


  return self._call_impl(*args, **kwargs)


train accuracy = 0.42854
val accuracy = 0.5108
train accuracy = 0.56066
val accuracy = 0.6122
Finished Training


In [8]:
def CAM(net, inputs, labels, idx):
    
    """
    Calculate the CAM.

    [input]
    * net     : network
    * inputs  : input data
    * labels  : label data
    * idx     : the index of the chosen image in a minibatch, range: [0, batch_size-1]

    [output]
    * cam_img : CAM result
    * img     : raw image

    [hint]
    * Inputs and labels are in a minibatch form
    * You can choose one images from them for CAM by idx.
    """
    
    net.eval()
    net.to(DEVICE)
    output, conv_layer_final = net(inputs, return_embed = True)
    
    gap = nn.AvgPool2d(conv_layer_final.size()[2])
    conv_gap = gap(conv_layer_final) #performed global average pooling on the images
    conv_gap = conv_gap.view(conv_gap.size(0), -1) 
    
    output = net.Linear_1d(conv_gap)
    weights = net.Linear_1d.weight #weights for the linear layer
    
    #n=512, N=10, batch_size = 128
    cam = torch.matmul(conv_gap, weights.T)
    cam_idx = cam[idx]
    print(cam_idx.size())
    return cam_idx

In [9]:
import torch.nn.functional as F

## Fetch the test image for CAM
dataiter = iter(valloader)
inputs, labels = next(dataiter)
inputs, label = inputs.to(DEVICE), labels.to(DEVICE)
cam_idx = CAM(net, inputs, labels, idx=0) # idx could be changed



## Visualization
## Plot the loss and acc curves
# ----- TODO -----


## Plot the CAM resuls as well as raw images
## Hint: You will want to resize the CAM result.
# ----- TODO -----
'''cam = F.interpolate(cam_idx.unsqueeze(0).unsqueeze(0), size=(32,32), 
                            mode='bilinear', align_corners=False)
img = inputs[0].permute(1, 2, 0).cpu().numpy()
ax1.imshow(img)
ax1.axis('off')
ax1.set_title('Original Image')'''

torch.Size([10])


"cam = F.interpolate(cam_idx.unsqueeze(0).unsqueeze(0), size=(32,32), \n                            mode='bilinear', align_corners=False)\nimg = inputs[0].permute(1, 2, 0).cpu().numpy()\nax1.imshow(img)\nax1.axis('off')\nax1.set_title('Original Image')"