# Problem 2
Convolutional Networks: Many techniques correspond to incorporating certain prior knowledge
of the structure of the data into the parameterization of the model. Convolution operation, for
example, is designed for visual imagery.


Instructions: For this part of the assignment we will train a convolutional network on MNIST
for 10 epochs using your favorite deep learning frameworks such as Pytorch of Tensorflow. Plot the
train and valid errors at the end of each epoch for the model.
1. Come up with a CNN architecture with more or less similar number of parameters as MLP
trained in Problem 1 and describe it.
2. Compare the performances of CNN vs MLP. Comment.
You could take reference from the architecture mentioned here https://github.com/MaximumEntropy/welcome_tutorials/tree/pytorch/pytorch .

In [1]:
from __future__ import print_function
import time
import numpy as np
import matplotlib.pyplot as plt

In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.init as init
import torch.nn.functional as F

In [3]:
import torchvision
import torchvision.transforms

In [4]:
mnist_transforms = torchvision.transforms.Compose([torchvision.transforms.ToTensor()])
mnist_train = torchvision.datasets.MNIST(root='./data', train=True, transform=mnist_transforms, download=True)
mnist_test = torchvision.datasets.MNIST(root='./data', train=False, transform=mnist_transforms, download=True)

In [5]:
train_loader = torch.utils.data.DataLoader(mnist_train, batch_size=64, shuffle=True, num_workers=12)
test_loader = torch.utils.data.DataLoader(mnist_test, batch_size=64, shuffle=True, num_workers=12)

In [6]:
class Block(nn.Module):
    def __init__(self, n_channels):
        super(Block, self).__init__()
        self.c = nn.Sequential(
            nn.Conv2d(n_channels, n_channels, kernel_size=3, stride=1, padding=1),
            nn.ReLU(True),
            nn.Conv2d(n_channels, n_channels, kernel_size=3, stride=1, padding=1)
        )
        self.relu = nn.ReLU(True)
        
    def forward(self, x):
        return self.relu(self.c(x) + x)
      
class SuperBlock(nn.Module):
    def __init__(self, n_channels):
        super(SuperBlock, self).__init__()
        self.out = nn.Sequential(
            Block(n_channels),
            Block(n_channels),
            Block(n_channels),
            Block(n_channels),
            Block(n_channels))
        
    def forward(self, x):
        return self.out(x)
        
class ConvLayer(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(ConvLayer, self).__init__()
        self.out = nn.Sequential(
            nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=(3, 3), padding=1),
            nn.Dropout(p=0.5),
            nn.ReLU(True),
            nn.MaxPool2d(kernel_size=(2, 2), stride=2))
        
    def forward(self, x):
        return self.out(x)
        
class Classifier(nn.Module):
    def __init__(self, out_n):
        super(Classifier, self).__init__()
        
        self.resnet_fake = nn.Sequential(
            ConvLayer(1,16),
            SuperBlock(16),
            ConvLayer(16,32),
            SuperBlock(32),
            ConvLayer(32,64),
            SuperBlock(64),
            ConvLayer(64,128)
            )
        
        self.linear = nn.Linear(128,10)

    def forward(self, x):
        x = self.resnet_fake(x)
        return self.linear(x.squeeze())

    
cuda_available = torch.cuda.is_available()
clf = Classifier(128)
if cuda_available:
    clf = clf.cuda()
optimizer = torch.optim.Adam(clf.parameters(), lr=1e-4)
criterion = nn.CrossEntropyLoss()

def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)
number_of_parameters = count_parameters(clf)
print('Number of parameters : %d' % (number_of_parameters))

Number of parameters : 583402


In [7]:
def evalutate(clf, inputs, targets):
    clf.eval()
    outputs = clf(inputs)
    _, predicted = torch.max(outputs.data, 1)
    total = targets.size(0)
    correct = predicted.eq(targets.data).cpu().sum()
    return total, correct


def evalutate_accuracy(clf, loader):
    clf.eval()
    total = 0
    correct = 0
    for batch_idx, (inputs, targets) in enumerate(loader):
        if cuda_available:
            inputs, targets = inputs.cuda(), targets.cuda()
        
        partial_total, partial_correct = evalutate(clf, inputs, targets)
        total += partial_total
        correct += partial_correct
        
    return 100*float(correct)/total

for epoch in range(10):
    losses = []
    # Train
    for batch_idx, (inputs, targets) in enumerate(train_loader):
        if cuda_available:
            inputs, targets = inputs.cuda(), targets.cuda()
        optimizer.zero_grad()
        outputs = clf(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()
        losses.append(loss.data.item())
        
        if batch_idx%200==0:
            print('Epoch : %d, Batch : %d, Loss : %.3f ' % (epoch, batch_idx, np.mean(losses)))
        
    
    # Evaluate
    train_acc = evalutate_accuracy(clf, train_loader)
    test_acc = evalutate_accuracy(clf, test_loader)
    
    print('Epoch : %d Train Acc : %.3f' % (epoch, train_acc))
    print('Epoch : %d Test Acc : %.3f' % (epoch, test_acc))
    print('--------------------------------------------------------------')
    clf.train()

Epoch : 0, Batch : 0, Loss : 2.351 
Epoch : 0, Batch : 200, Loss : 1.553 
Epoch : 0, Batch : 400, Loss : 1.050 
Epoch : 0, Batch : 600, Loss : 0.816 
Epoch : 0, Batch : 800, Loss : 0.678 
Epoch : 0 Train Acc : 95.578
Epoch : 0 Test Acc : 95.790
--------------------------------------------------------------
Epoch : 1, Batch : 0, Loss : 0.231 
Epoch : 1, Batch : 200, Loss : 0.217 
Epoch : 1, Batch : 400, Loss : 0.198 
Epoch : 1, Batch : 600, Loss : 0.184 
Epoch : 1, Batch : 800, Loss : 0.175 
Epoch : 1 Train Acc : 97.315
Epoch : 1 Test Acc : 97.420
--------------------------------------------------------------
Epoch : 2, Batch : 0, Loss : 0.048 
Epoch : 2, Batch : 200, Loss : 0.137 
Epoch : 2, Batch : 400, Loss : 0.129 
Epoch : 2, Batch : 600, Loss : 0.130 
Epoch : 2, Batch : 800, Loss : 0.127 
Epoch : 2 Train Acc : 97.858
Epoch : 2 Test Acc : 97.980
--------------------------------------------------------------
Epoch : 3, Batch : 0, Loss : 0.206 
Epoch : 3, Batch : 200, Loss : 0.108 
Ep