# CE-40719: Deep Learning
## HW3 - CNN / CNN Case Studies / CNN Applications
(18 points)

#### Name: Seyed Shayan Nazemi
#### Student No.: 98209037

In this exercise we are going to implement a simple residual netwrok architecture to classify images from Cifar10 dataset. Here we give you a suggestion for architecture but you are allowed to  make changes and experiment to get better results. Explain your ideas or reference the papers that you take the ideas from. You are allowed to use out-of-the-box pytorch modules in `torch.nn`.

## Architecture

_All convolution layers have `3 * 3` kernel,  `padding=1`, batch normalization and relu activation_

__ResidualBlock:__ _in_channel, out_channel, stride_

- Conv(in_channel, out_channel, stride)
- Conv(out_channel, out_channel, stride=1)

`*` This block has a residual connection. To match dimmensions of output and residual use `1 * 1` convolution and stride. 

__ResidualLayer:__ _in_channel, out_channel, stride_

- ResidualBlock(in_channel, out_channel, stride)
- ResidualBlock(out_channel, out_channel, stride=1)

__ResidualNetwork__:
- Conv(3, 64, stride=1)
- ResidualLayer(64, 64, stride=1)
- ResidualLayer(64, 128, stride=2)
- ResidualLayer(128, 256, stride=2)
- ResidualLayer(256, 512, stride=2)
- AveragePool(4, 4)
- Linear(512, 10)


In [0]:
import numpy as np
import torch
from torch import nn, optim
from torch.nn import functional as F
from torchvision import datasets, transforms, utils
from torch.utils.data import DataLoader
from torch.utils.data.sampler import SubsetRandomSampler

In [0]:
NUM_WORKERS = 4
BATCH_SIZE = 32
VAL_SIZE = 0.1
N_EPOCHS = 10

DEVICE = None
if torch.cuda.is_available():
    cuda_device = torch.cuda.current_device()
    DEVICE = torch.device('cuda', cuda_device)
else:
    DEVICE = torch.device('cpu', 0)

In [3]:
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

train_data = datasets.CIFAR10(root = 'data', train = True, download = True, transform = transform)

test_data = datasets.CIFAR10(root = 'data', train = False, download = True, transform = transform)



num_train = len(train_data)
indices = list(range(num_train))
np.random.shuffle(indices)
split = int(np.floor(num_train * VAL_SIZE))
train_index, val_index = indices[split:], indices[:split]
train_sampler = SubsetRandomSampler(train_index)
val_sampler = SubsetRandomSampler(val_index)



train_loader = torch.utils.data.DataLoader(train_data, sampler=train_sampler, batch_size=BATCH_SIZE, num_workers=NUM_WORKERS)

val_loader = torch.utils.data.DataLoader(train_data, sampler=val_sampler, batch_size=BATCH_SIZE, num_workers=NUM_WORKERS)

test_loader = torch.utils.data.DataLoader(test_data, batch_size=BATCH_SIZE, num_workers=NUM_WORKERS)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to data/cifar-10-python.tar.gz


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Extracting data/cifar-10-python.tar.gz to data
Files already downloaded and verified


In [0]:
class ConvolutionLayer(nn.Module):
    def __init__(self, in_channel, out_channel, stride, kernel_size=(3, 3) , padding=1):
        super(ConvolutionLayer, self).__init__()
        self.conv = nn.Conv2d(in_channel, out_channel, kernel_size=kernel_size, stride=stride, padding=padding)
        self.batch = nn.BatchNorm2d(num_features=out_channel)
        self.relu = nn.ReLU()

    def forward(self, x):
        out = self.conv(x)
        out = self.batch(out)
        out = self.relu(out)
        return out

In [0]:
class ResidualBlock(nn.Module):
    def __init__(self, in_channel, out_channel, stride):
        super(ResidualBlock, self).__init__()
        self.conv1 = ConvolutionLayer(in_channel, out_channel, stride=stride)
        self.conv2 = ConvolutionLayer(out_channel, out_channel, stride=1)

        self.residual_conv = ConvolutionLayer(in_channel, out_channel, stride=stride, kernel_size=(1, 1), padding=0)

    def forward(self, x):
        out = self.conv1(x)
        out = self.conv2(out)
        
        x_res = self.residual_conv(x)

        out += x_res
        return out

In [0]:
class ResidualLayer(nn.Module):
    def __init__(self, in_channel, out_channel, stride):
        super(ResidualLayer, self).__init__()
        self.res_block1 = ResidualBlock(in_channel, out_channel, stride=stride)
        self.res_block2 = ResidualBlock(out_channel, out_channel, stride=1)

    def forward(self, x):
        out = self.res_block1(x)
        out = self.res_block2(out)

        return out

In [0]:
class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        self.config = {
            'conv' : (3, 64, 1), 
            'res1' : (64, 64, 1), 
            'res2' : (64, 128, 1), 
            'res3' : (128, 256, 2), 
            'res4' : (256, 512, 2),
            'pool' : (4, 4),
            'linear' : (2048, 10)
            }

        self.conv = ConvolutionLayer(*self.config['conv'])
        self.res1 = ResidualLayer(*self.config['res1'])
        self.res2 = ResidualLayer(*self.config['res2'])
        self.res3 = ResidualLayer(*self.config['res3'])
        self.res4 = ResidualLayer(*self.config['res4'])
        self.pool = nn.AvgPool2d(kernel_size=self.config['pool'])
        self.linear = nn.Linear(*self.config['linear'])

    def forward(self, x):
        out = self.conv(x)
        out = self.res1(out)
        out = self.res2(out)
        out = self.res3(out)
        out = self.res4(out)
        out = self.pool(out)
        out = out.view(-1, 2048)
        out = self.linear(out)

        return out

In [0]:
def train(model, optimizer, train_loader, val_loader, criterion, device, n_epochs):
    val_loss_min = np.Inf
    for epoch in range(n_epochs):
        train_loss = 0
        model.train() # set the model state to train state - specially for dropout and batchnorm

        for iteration, (data, label) in enumerate(train_loader):
            optimizer.zero_grad()
            output = model(data.to(device))
            loss = criterion(output, label.to(device))
            loss.backward()
            optimizer.step()
            train_loss += loss.item() * data.shape[0]

            if iteration % 100 == 0:
                is_training = model.training
                val_loss = 0
                model.eval()
                acc_val = 0
                with torch.no_grad():
                    for data_val, label_val in val_loader:
                        output_val = model(data_val.to(device))
                        loss = criterion(output_val, label_val.to(device))
                        val_loss += loss.item() * data_val.shape[0]
                        acc_val += torch.sum(torch.argmax(output_val, dim=1) == label_val.to(device)).item()

                val_loss /= len(val_loader.sampler) # averaging loss on all validation set
                if val_loss <= val_loss_min :
                    print('Iteration : {:4d} \t Validation Accuracy : {:.6f} - Validation loss decreased ({:.6f} --> {:.6f}). Saving model ...'.format(iteration, acc_val/len(val_loader.sampler), val_loss_min, val_loss))
                    torch.save(model.state_dict(), 'model.pt')
                    val_loss_min = val_loss

            model.train(mode = is_training)
        
        train_loss /= len(train_loader.sampler)
        print('Epoch : {} \t Training loss {:.6f}'.format(epoch + 1, train_loss))

In [0]:
def test(model, test_loader, device):
    test_loss = 0.0
    num_correct = 0
    model.eval()
    with torch.no_grad():
        for data, target in test_loader:
            output = model(data.to(device))
            pred = torch.argmax(output, dim = 1)
            num_correct += torch.sum(pred == target.to(device)).item()

    test_acc = num_correct / len(test_loader.sampler)
    print('Test Accuracy: {:.2f}%\n'.format(test_acc * 100))

In [11]:
model = Model()
model.to(DEVICE)
optimizer = optim.Adam(model.parameters())
criterion = F.cross_entropy

train(model, optimizer, train_loader, val_loader, criterion, DEVICE, N_EPOCHS)

Iteration :    0 	 Validation Accuracy : 0.100600 - Validation loss decreased (inf --> 2.302870). Saving model ...
Iteration :  100 	 Validation Accuracy : 0.282400 - Validation loss decreased (2.302870 --> 1.898354). Saving model ...
Iteration :  300 	 Validation Accuracy : 0.389600 - Validation loss decreased (1.898354 --> 1.750025). Saving model ...
Iteration :  500 	 Validation Accuracy : 0.425800 - Validation loss decreased (1.750025 --> 1.572589). Saving model ...
Iteration :  600 	 Validation Accuracy : 0.416600 - Validation loss decreased (1.572589 --> 1.563126). Saving model ...
Iteration :  700 	 Validation Accuracy : 0.443000 - Validation loss decreased (1.563126 --> 1.518974). Saving model ...
Iteration :  900 	 Validation Accuracy : 0.470400 - Validation loss decreased (1.518974 --> 1.431534). Saving model ...
Iteration : 1100 	 Validation Accuracy : 0.519000 - Validation loss decreased (1.431534 --> 1.352346). Saving model ...
Iteration : 1200 	 Validation Accuracy : 0.54

In [12]:
model = Model()
model.to(DEVICE)
model.load_state_dict(torch.load('model.pt'))
test(model, test_loader, DEVICE)

Test Accuracy: 83.66%

