# Resnet CIFAR-10


**(a)** 

(4 pts) Implement the ResNet-20 architecture by following Section 4.2 of the ResNet paper [3]. This lab is designed to have you learn how to implement a DNN model yourself. DO NOT borrow any code from online resource.

In [150]:
import argparse
import os, sys
import time
import datetime

# Import pytorch dependencies
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
import torch.optim as optim
from tqdm import tqdm_notebook as tqdm
import matplotlib.pyplot as plt
import numpy as np

In [151]:
# import math

class Block_Regular(nn.Module):
    def __init__(self, in_channels, out_channels, size):
        super(Block_Regular, self).__init__()
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.size = size
        
        # 1st conv: /2 downsamples the image resolution, 3x3
        ### in = [16 x 32 x 32] 
        ### out = [16 x 32, 32]
        self.conv1 = nn.Conv2d(self.in_channels, self.out_channels, 3, stride = 1, padding = 1)
        self.bn1 = nn.BatchNorm2d(self.out_channels)
        
        # 2nd conv of block, 3x3
        ### in = [16 x 32 x 32]
        ### out = [16 x 32 x 32]
        self.conv2 = nn.Conv2d(self.in_channels, self.out_channels, 3, stride = 1, padding = 1)
        self.bn2 = nn.BatchNorm2d(self.out_channels)

    def swish(self, x):
        return x * torch.sigmoid(x)
    
    def forward(self, x):
        out = self.swish(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out = self.swish(torch.add(out, x))
        return out
        
        
class Block_Identity(nn.Module):
    def __init__(self, in_channels, out_channels, size):
        super(Block_Identity, self).__init__()
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.size =size 
        
        # 1st block: /2 downsamples the image resolution, 3x3
        ### in = [16 x 32 x 32] 
        ### out = [32 x 16, 16]
        self.conv3 = nn.Conv2d(self.in_channels, self.out_channels, 3, stride = 2, padding = 1)
        self.bn3 = nn.BatchNorm2d(self.out_channels)
        
        # 2nd layer of block, 3x3
        ### in = [32 x 16 x 16]
        ### out = [32 x 16 x 16]
        self.conv4 = nn.Conv2d(self.out_channels, self.out_channels, 3, padding = 1)
        self.bn4 = nn.BatchNorm2d(self.out_channels)
        
        # Identity block: /2 downsamples the image resolution, 1x1 
        ### in = [16 x 32 x 32] 
        ### out = [32 x 16, 16]
        self.conv_down = nn.Conv2d(self.in_channels, self.out_channels, 1, stride = 2)
        self.bn_down = nn.BatchNorm2d(self.out_channels)
    
    def forward(self, x):
        out = self.swish(self.bn3(self.conv3(x)))
        out = self.bn4(self.conv4(out))
        out2 = self.bn_down(self.conv_down(x))
        return self.swish(torch.add(out, out2))
    
    def swish(self, x):
        return x * torch.sigmoid(x)

# Create the neural network module: ResNet
class ResNet(nn.Module):
    def __init__(self):
        super(ResNet, self).__init__()
    
        # First megablock which does not change the size or number of channels
        self.b1 = Block_Regular(16, 16, 32)
        self.b2 = Block_Regular(16, 16, 32)
        self.b3 = Block_Regular(16, 16, 32)

        # Second megablock which takes in 16 channels and outputs 32 channels, H/W reduced 32 --> 16
        self.b4 = Block_Identity(16, 32, 32)
        self.b5 = Block_Regular(32, 32, 16)
        self.b6 = Block_Regular(32, 32, 16)

        # Third megablock which takes in 32 channels and outputs 64 channels, H/W reduced 16--> 8
        self.b7 = Block_Identity(32, 64, 16)
        self.b8 = Block_Regular(64, 64, 8)
        self.b9 = Block_Regular(64, 64, 8)

        self.fc = nn.Linear(64, 10)

        self._initialize_weights()
        
        # Initial Conv
        self.conv1 = nn.Conv2d(3, 16, 3, padding=1)
        self.bn1 = torch.nn.BatchNorm2d(16)
        self.aap = nn.AdaptiveAvgPool2d((1,1))
        
    def swish(self, x):
        return x * torch.sigmoid(x)
    
    def forward(self,x):
        # Initial conv
        x = self.swish(self.bn1(self.conv1(x)))

        # Blocks
        x = self.b1(x)
        x = self.b2(x)
        x = self.b3(x)
        #print(x.shape)
        x = self.b4(x)
        x = self.b5(x)
        x = self.b6(x)
        x = self.b7(x)
        x = self.b8(x)
        x = self.b9(x)

        # Pooling and FC
        x = self.aap(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x

    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode = 'fan_in')
                if m.bias is not None:
                    m.bias.data.zero_()
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()
            elif isinstance(m, nn.Linear):
                n = m.weight.size(1)
                nn.init.kaiming_normal_(m.weight, mode = 'fan_in')
                #m.weight.data.normal_(0, 0.01)
                m.bias.data.zero_() 

In [152]:
# Specify the device for computation
device = 'cuda' if torch.cuda.is_available() else 'cpu'
net = ResNet()
net = net.to(device)
if device =='cuda':
    print("Train on GPU...")
else:
    print("Train on CPU...")

Train on GPU...


In [153]:
# Sanity check
# Test forward pass
data_check = torch.randn(size=[5,3,32,32])
data_check = data_check.to(device)
# Forward pass "data_check" through "net" to get output "out"
out_check =  net.forward(data_check)
# Check output shape
assert(out_check.detach().shape == (5,10))
print("Forward pass successful")

Forward pass successful


In [154]:
# Specify preprocessing function.
# Reference mean/std value for 
transform_train  = transforms.Compose([ 
        transforms.RandomCrop(32, padding = 4),
        transforms.RandomHorizontalFlip(0.5),
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
    ])

transform_val = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
    ])

In [155]:
# You cannot change this line.
from tools.dataloader import CIFAR10
# Call the dataset Loader
DATAROOT = "./data"
TRAIN_BATCH_SIZE = 128
VAL_BATCH_SIZE = 100
trainset = CIFAR10(root=DATAROOT, train=True, download=True, transform=transform_train)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=TRAIN_BATCH_SIZE, shuffle=True, num_workers=4)
valset = CIFAR10(root=DATAROOT, train=False, download=True, transform=transform_val)
valloader = torch.utils.data.DataLoader(valset, batch_size=VAL_BATCH_SIZE, shuffle=False, num_workers=4)

Using downloaded and verified file: ./data/cifar10_trainval_F20.zip
Extracting ./data/cifar10_trainval_F20.zip to ./data
Files already downloaded and verified
Training dataset has 45000 examples!
Using downloaded and verified file: ./data/cifar10_trainval_F20.zip
Extracting ./data/cifar10_trainval_F20.zip to ./data
Files already downloaded and verified
Validation dataset has 5000 examples!


In [156]:
# Initial learning rate
INITIAL_LR = 0.2
# Momentum for optimizer.
MOMENTUM = 0.9
# Regularization
REG = 1e-5
# Total number of training epochs
EPOCHS = 105
# Learning rate decay policy.
DECAY_EPOCHS = 25
DECAY = .1

In [157]:
CHECKPOINT_PATH = "./saved_model"
# FLAG for loading the pretrained model
TRAIN_FROM_SCRATCH = True
# Code for loading checkpoint and recover epoch id.
CKPT_PATH = "./saved_model/model.h5"
def get_checkpoint(ckpt_path):
    try:
        ckpt = torch.load(ckpt_path)
    except Exception as e:
        print(e)
        return None
    return ckpt

ckpt = get_checkpoint(CKPT_PATH)
if ckpt is None or TRAIN_FROM_SCRATCH:
    if not TRAIN_FROM_SCRATCH:
        print("Checkpoint not found.")
    print("Training from scratch ...")
    start_epoch = 0
    current_learning_rate = INITIAL_LR
else:
    print("Successfully loaded checkpoint: %s" %CKPT_PATH)
    net.load_state_dict(ckpt['net'])
    start_epoch = ckpt['epoch'] + 1
    current_learning_rate = ckpt['lr']
    print("Starting from epoch %d " %start_epoch)

print("Starting from learning rate %f:" %current_learning_rate)

[Errno 2] No such file or directory: './saved_model/model.h5'
Training from scratch ...
Starting from learning rate 0.200000:


In [158]:
# Create loss function and specify regularization
criterion = nn.CrossEntropyLoss()
# Add optimizer
optimizer = optim.SGD(params=net.parameters(), lr=current_learning_rate, momentum=MOMENTUM, weight_decay=REG, nesterov=False)

In [159]:
# Start the training/validation process
# The process should take about 5 minutes on a GTX 1070-Ti
# if the code is written efficiently.

lamb = 0.5

global_step = 0
best_val_acc = 0

train_acc_l, val_acc_l = [],[]
from torch.autograd import Variable


for i in range(start_epoch, EPOCHS):
    print(datetime.datetime.now())
    # Switch to train mode
    net.train()
    print("Epoch %d:" %i)

    total_examples = 0
    correct_examples = 0

    train_loss = 0
    train_acc = 0
    
    # Train the training dataset for 1 epoch.
    for batch_idx, (inputs, targets) in enumerate(trainloader):
        # Copy inputs to device
        inputs = inputs.to(device)
        targets = targets.to(device)
        # Zero the gradient
        optimizer.zero_grad()
        # Generate output
        outputs = net(inputs)
        
        ### L1 regularization
        #for name, param in net.named_parameters():
        #    if name.endswith('weight'):
        #        print(name, param)
        #        L1_pre = Variable(param, requires_grad = True)
        #        L1 = torch.norm(L1_pre, 1)
        #loss = criterion(outputs, targets) + (L1 * lamb)
        
        loss = criterion(outputs, targets) 
        
        # Now backward loss
        loss.backward()
        # Print initial loss
        if batch_idx == 0:
            print(f'loss: {loss}')
        # Apply gradient
        optimizer.step()
        # Calculate predicted labels
        _, predicted = outputs.max(1)
        total_examples += predicted.size(0)
        correct_examples += predicted.eq(targets).sum().item()
        train_loss += loss
        global_step += 1
                
    avg_loss = train_loss / (batch_idx + 1)
    avg_acc = correct_examples / total_examples
    print("Training loss: %.4f, Training accuracy: %.4f" %(avg_loss, avg_acc))
    train_acc_l.append(avg_acc)
    print(datetime.datetime.now())
    # Validate on the validation dataset
    print("Validation...")
    total_examples = 0
    correct_examples = 0
    
    net.eval()

    val_loss = 0
    val_acc = 0
    # Disable gradient during validation
    with torch.no_grad():
        for batch_idx, (inputs, targets) in enumerate(valloader):
            # Copy inputs to device
            inputs = inputs.to(device)
            targets = targets.to(device)
            # Zero the gradient
            optimizer.zero_grad()
            # Generate output from the DNN.
            outputs = net(inputs)
            loss = criterion(outputs, targets)            
            # Calculate predicted labels
            _, predicted = outputs.max(1)
            total_examples += predicted.size(0)
            correct_examples += predicted.eq(targets).sum().item()
            val_loss += loss

    avg_loss = val_loss / len(valloader)
    avg_acc = correct_examples / total_examples
    val_acc_l.append(avg_acc)
    
    print("Validation loss: %.4f, Validation accuracy: %.4f\n" % (avg_loss, avg_acc))

    # Handle the learning rate scheduler.
    if i % DECAY_EPOCHS == 0 and i != 0:
        current_learning_rate = current_learning_rate * DECAY
        for param_group in optimizer.param_groups:
            param_group['lr'] = current_learning_rate
        print("Current learning rate has decayed to %f" %current_learning_rate)
    
    # Save for checkpoint
    if avg_acc > best_val_acc:
        best_val_acc = avg_acc
        if not os.path.exists(CHECKPOINT_PATH):
            os.makedirs(CHECKPOINT_PATH)
        print("Saving ...\n")
        state = {'net': net.state_dict(),
                 'epoch': i,
                 'lr': current_learning_rate}
        torch.save(state, os.path.join(CHECKPOINT_PATH, 'model.h5'))

print("Optimization finished.")

2020-09-20 20:31:47.100227
Epoch 0:
loss: 2.573115348815918
Training loss: 1.6330, Training accuracy: 0.3905
2020-09-20 20:32:12.840393
Validation...
Validation loss: 1.3850, Validation accuracy: 0.4878

Saving ...

2020-09-20 20:32:14.793291
Epoch 1:
loss: 1.3941386938095093
Training loss: 1.1344, Training accuracy: 0.5905
2020-09-20 20:32:39.330969
Validation...
Validation loss: 1.0271, Validation accuracy: 0.6454

Saving ...

2020-09-20 20:32:41.057869
Epoch 2:
loss: 1.1286301612854004
Training loss: 0.8829, Training accuracy: 0.6890
2020-09-20 20:33:04.294330
Validation...
Validation loss: 0.9909, Validation accuracy: 0.6788

Saving ...

2020-09-20 20:33:05.876150
Epoch 3:
loss: 0.7484288811683655
Training loss: 0.7459, Training accuracy: 0.7383
2020-09-20 20:33:29.538434
Validation...
Validation loss: 0.8309, Validation accuracy: 0.7262

Saving ...

2020-09-20 20:33:31.133917
Epoch 4:
loss: 0.6775314211845398
Training loss: 0.6582, Training accuracy: 0.7709
2020-09-20 20:33:53.528

## Running on Test Data

In [172]:
transform_test = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
    ])

In [173]:
from tools.dataloader_test import CIFAR10
testset = CIFAR10(root='./data', train=False,
                        download=True, transform=transform_test)
testloader = torch.utils.data.DataLoader(testset, batch_size=100,
                                         shuffle=False, num_workers=4)

Downloading https://www.dropbox.com/s/vaeeg2zf6kovxpp/cifar10_test_F20.zip?dl=1 to ./data/cifar10_test_F20.zip


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ./data/cifar10_test_F20.zip to ./data
Files already downloaded and verified
Testing dataset has 10000 examples!


In [227]:
model = ResNet()
model.load_state_dict(torch.load('saved_model/ResNet_best.h5')['net'])
model.to(device)

ResNet(
  (b1): Block_Regular(
    (conv1): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (bn1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (conv2): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (bn2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (b2): Block_Regular(
    (conv1): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (bn1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (conv2): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (bn2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (b3): Block_Regular(
    (conv1): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (bn1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (conv2): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), pa

In [299]:
model.eval()
pred = []

with torch.no_grad():
        for batch_idx, (inputs, targets) in enumerate(testloader):
            # Copy inputs to device
            inputs = inputs.to(device)
            targets = targets.to(device)
            # Generate output from the DNN.
            outputs = net(inputs)            
            # Calculate predicted labels
            _, predicted = outputs.max(1)
            pred.append(predicted.cpu().numpy())

In [300]:
pred = np.array(pred)
pred = pred.flatten()

IDs = list(range(len(pred)))
dictionary = dict(zip(IDs, pred))

In [304]:
import pandas as pd
df = pd.DataFrame(dictionary.values(), dictionary.keys(), columns = ['Label'])

In [306]:
df.index.rename('Id', inplace = True)
df = df.reset_index()
df.to_csv('sample.csv', index = False)