# Homework 7: Train MLP and CNN on MNIST for 10-class feature extraction and classification
Please implement the following three functions:
- MnistMLP() - Design a 2-layer MLP
- MnistCNN() - Design a 2-layer CNN 

Please train a 2-layer MLP and CNN on the Mnist dataset and print the training results for each epoch.

In [1]:
from torchvision.datasets import MNIST
from torchvision.transforms import Compose,ToTensor,Normalize
from torch.utils.data import DataLoader
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import Adam
import os
import torch
import numpy as np
 
BATCH_SIZE = 128
TEST_BATCH_SIZE = 1000

# dataloader for the dataset
def get_dataloader(train,batch_size=BATCH_SIZE):
    transform_fn = Compose([
        ToTensor(),
        Normalize(mean = (0.1307,),std = (0.3081,))
        ]) 
    dataset = MNIST(root = './data',train = train,transform = transform_fn, download = True)
    data_loader = DataLoader(dataset,batch_size = batch_size,shuffle = True)
    return data_loader

In [2]:
# 2-lyer MLP 
class MnistMLP(nn.Module):
    # implement a 2-layer MLP with 256 hidden units
    def __init__(self):
        super(MnistMLP,self).__init__()
        self.fc1 = nn.Linear(784,256)
        self.fc2 = nn.Linear(256,10)
       
    def forward(self,x):
        x = x.view(-1,784)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x
    
    

In [8]:
# 2-lyer CNN
class MnistCNN(nn.Module):
    # implement a 2-layer CNN with 32 hidden units
    def __init__(self):
        super(MnistCNN,self).__init__()
        self.conv1 = nn.Conv2d(1,32,5)
        self.conv2 = nn.Conv2d(32,64,5)
        self.fc1 = nn.Linear(1024,10)
    
    def forward(self,x):
        x = F.relu(self.conv1(x))
        x = F.max_pool2d(x,2)
        x = F.relu(self.conv2(x))
        x = F.max_pool2d(x,2)
        x = x.view(-1,1024)
        x = self.fc1(x)
        return x

## Train the MLP model

In [3]:
model = MnistMLP()
optimizer = Adam(model.parameters(),lr = 0.001)

In [4]:
def train(epoch, num_epochs):
    data_loader = get_dataloader(True)
    total_step = len(data_loader)
    for idx, (input,target) in enumerate(data_loader):
        optimizer.zero_grad()
        output = model(input)
        loss = F.nll_loss(output,target)
        loss.backward()
        optimizer.step()
        if (idx+1) % 100 == 0:
            print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'.format(epoch+1, num_epochs, idx+1, total_step, loss.item()))

In [5]:
def test():
    loss_list = []
    acc_list = []
    test_dataloader = get_dataloader(train = False,batch_size=TEST_BATCH_SIZE)
    for idx,(input,target) in enumerate(test_dataloader):
        with torch.no_grad():
            output = model(input)
            cur_loss = F.nll_loss(output,target)
            loss_list.append(cur_loss)
            pred = output.max(dim = -1)[-1]
            cur_acc = pred.eq(target).float().mean()
            acc_list.append(cur_acc)
    print("Mean accuracy：",np.mean(acc_list),"Mean loss：",np.mean(loss_list))

In [6]:
test()
num_epochs = 3
for i in range(num_epochs):
    train(i, num_epochs)
test()

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


  0%|          | 0/9912422 [00:00<?, ?it/s]

Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz


  0%|          | 0/28881 [00:00<?, ?it/s]

Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz


  0%|          | 0/1648877 [00:00<?, ?it/s]

Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz


  0%|          | 0/4542 [00:00<?, ?it/s]

Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw



  return torch.from_numpy(parsed.astype(m[2], copy=False)).view(*s)


Mean accuracy： 0.13280001 Mean loss： 0.047826394
Epoch [1/3], Step [100/469], Loss: -1337.5470
Epoch [1/3], Step [200/469], Loss: -6875.2241
Epoch [1/3], Step [300/469], Loss: -18192.4336
Epoch [1/3], Step [400/469], Loss: -31483.2109
Epoch [2/3], Step [100/469], Loss: -66548.0625
Epoch [2/3], Step [200/469], Loss: -92688.9531
Epoch [2/3], Step [300/469], Loss: -121390.2656
Epoch [2/3], Step [400/469], Loss: -151889.6562
Epoch [3/3], Step [100/469], Loss: -217582.9062
Epoch [3/3], Step [200/469], Loss: -266575.4375
Epoch [3/3], Step [300/469], Loss: -304110.8750
Epoch [3/3], Step [400/469], Loss: -349140.0625
Mean accuracy： 0.1028 Mean loss： -397532.84


## Train the CNN model

In [9]:
model = MnistCNN()
optimizer = Adam(model.parameters(),lr = 0.001)

In [10]:
test()
num_epochs = 3
for i in range(num_epochs):
    train(i, num_epochs)
test()

  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)


Mean accuracy： 0.1003 Mean loss： 0.1282757
Epoch [1/3], Step [100/469], Loss: -47170.9414
Epoch [1/3], Step [200/469], Loss: -639579.0000
Epoch [1/3], Step [300/469], Loss: -2566493.5000
Epoch [1/3], Step [400/469], Loss: -6714652.5000
Epoch [2/3], Step [100/469], Loss: -21581188.0000
Epoch [2/3], Step [200/469], Loss: -35664700.0000
Epoch [2/3], Step [300/469], Loss: -52052776.0000
Epoch [2/3], Step [400/469], Loss: -79147312.0000
Epoch [3/3], Step [100/469], Loss: -127857608.0000
