# Problem 2
Convolutional Networks: Many techniques correspond to incorporating certain prior knowledge
of the structure of the data into the parameterization of the model. Convolution operation, for
example, is designed for visual imagery.


Instructions: For this part of the assignment we will train a convolutional network on MNIST
for 10 epochs using your favorite deep learning frameworks such as Pytorch of Tensorflow. Plot the
train and valid errors at the end of each epoch for the model.
1. Come up with a CNN architecture with more or less similar number of parameters as MLP
trained in Problem 1 and describe it.
2. Compare the performances of CNN vs MLP. Comment.
You could take reference from the architecture mentioned here https://github.com/MaximumEntropy/welcome_tutorials/tree/pytorch/pytorch .

In [1]:
from __future__ import print_function
import time
import numpy as np
import matplotlib.pyplot as plt

In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.init as init
import torch.nn.functional as F

In [3]:
import torchvision
import torchvision.transforms

In [4]:
mnist_transforms = torchvision.transforms.Compose([torchvision.transforms.ToTensor()])
mnist_train = torchvision.datasets.MNIST(root='./data', train=True, transform=mnist_transforms, download=True)
mnist_test = torchvision.datasets.MNIST(root='./data', train=False, transform=mnist_transforms, download=True)

In [5]:
train_loader = torch.utils.data.DataLoader(mnist_train, batch_size=64, shuffle=True, num_workers=12)
test_loader = torch.utils.data.DataLoader(mnist_test, batch_size=64, shuffle=True, num_workers=12)

In [6]:
class Classifier(nn.Module):
    """Convnet Classifier"""
    def __init__(self):
        super(Classifier, self).__init__()
        self.conv = nn.Sequential(
            # Layer 1
            nn.Conv2d(in_channels=1, out_channels=16, kernel_size=(3, 3), padding=1),
            nn.Dropout(p=0.5),
            nn.LeakyReLU(negative_slope=0.2),
            nn.MaxPool2d(kernel_size=(2, 2), stride=2),
            
            # Layer 2
            nn.Conv2d(in_channels=16, out_channels=32, kernel_size=(3, 3), padding=1),
            nn.Dropout(p=0.5),
            nn.LeakyReLU(negative_slope=0.2),
            nn.MaxPool2d(kernel_size=(2, 2), stride=2),
            
            # Layer 3
            nn.Conv2d(in_channels=32, out_channels=64, kernel_size=(3, 3), padding=1),
            nn.Dropout(p=0.5),
            nn.LeakyReLU(negative_slope=0.2),
            nn.MaxPool2d(kernel_size=(2, 2), stride=2),
            
            # Layer 4
            nn.Conv2d(in_channels=64, out_channels=128, kernel_size=(3, 3), padding=1),
            nn.Dropout(p=0.5),
            nn.LeakyReLU(negative_slope=0.2),
            nn.MaxPool2d(kernel_size=(2, 2), stride=2)
        )
        # Logistic Regression
        self.clf = nn.Linear(128, 10)

    def forward(self, x):
        return self.clf(self.conv(x).squeeze())

    
cuda_available = torch.cuda.is_available()
clf = Classifier()
if cuda_available:
    clf = clf.cuda()
optimizer = torch.optim.Adam(clf.parameters(), lr=1e-4)
criterion = nn.CrossEntropyLoss()

def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)
number_of_parameters = count_parameters(clf)
print('Number of parameters : %d' % (number_of_parameters))

Number of parameters : 98442


In [11]:
for epoch in range(1):
    losses = []
    # Train
    for batch_idx, (inputs, targets) in enumerate(train_loader):
        if cuda_available:
            inputs, targets = inputs.cuda(), targets.cuda()
        optimizer.zero_grad()
        outputs = clf(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()
        losses.append(loss.data.item())
        
        if batch_idx%200==0:
            print('Epoch : %d, Batch : %d, Loss : %.3f ' % (epoch, batch_idx, np.mean(losses)))
    
    # Evaluate
    clf.eval()
    total = 0
    correct = 0
    for batch_idx, (inputs, targets) in enumerate(test_loader):
        if cuda_available:
            inputs, targets = inputs.cuda(), targets.cuda()

        outputs = clf(inputs)
        _, predicted = torch.max(outputs.data, 1)
        total += targets.size(0)
        correct += predicted.eq(targets.data).cpu().sum()

    print('Epoch : %d Test Acc : %.3f' % (epoch, 100.*correct/total))
    print('--------------------------------------------------------------')
    clf.train()

Epoch : 0, Batch : 0, Loss : 0.288 
Epoch : 0, Batch : 200, Loss : 0.217 
Epoch : 0, Batch : 400, Loss : 0.212 
Epoch : 0, Batch : 600, Loss : 0.213 
Epoch : 0, Batch : 800, Loss : 0.207 
Epoch : 0 Test Acc : 96.000
--------------------------------------------------------------
