# MNIST NN classifier with pytorch

## 1. Download data

In [1]:
import numpy as np
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision.datasets as dsets
from torch.autograd import Variable

In [2]:
train_data = dsets.MNIST(root='./data',
                         train=True,
                         download=True,
                         transform=transforms.ToTensor()
                        )
test_data = dsets.MNIST(root='./data',
                        train=False,
                        transform=transforms.ToTensor()
                       )

## 2. Make it iterable

In [3]:
print(train_data)
print(test_data)

batch_size = 100
n_iter = 3000
n_epoch = int(n_iter / (len(train_data) / batch_size))

train_loader = torch.utils.data.DataLoader(dataset=train_data,
                                      batch_size=batch_size,
                                      shuffle=True,
                                      drop_last=True
                                     )
test_loader = torch.utils.data.DataLoader(dataset=test_data,
                                      batch_size=batch_size,
                                      shuffle=False,
                                      drop_last=True
                                     )
print('Batches in one epoch : ' + str(len(train_loader)))

Dataset MNIST
    Number of datapoints: 60000
    Root location: ./data
    Split: Train
Dataset MNIST
    Number of datapoints: 10000
    Root location: ./data
    Split: Test
Batches in one epoch : 600


## 3.Build the model
Here I use a 2 hidden layer shallow model.  
Nodes in every layer can be adjusted as hyper-parameter later.  

<img src='img/Net.jpeg' width=500>

In [4]:
# TODO:: refactor into layer function to call
class NeuralNet(nn.Module):
    def __init__(self, in_dim, hid_dim_1, hid_dim_2, out_dim):
        super(NeuralNet, self).__init__()
        
        # Layer1
        self.fc1 = nn.Linear(in_features=in_dim, out_features=hid_dim_1, bias=True)
        self.relu1 = nn.ReLU()
        
        # Layer2
        self.fc2 = nn.Linear(in_features=hid_dim_1, out_features=hid_dim_2, bias=True)
        self.relu2 = nn.ReLU()
        
        # Layer3 out
        self.fc3 = nn.Linear(in_features=hid_dim_2, out_features=out_dim, bias=True)
    
    def forward(self, x):
        out = self.fc1(x)
        out = self.relu1(out)
        
        out = self.fc2(out)
        out = self.relu2(out)
        
        out = self.fc3(out)
        return out
    

## 4. instantiate model

In [5]:
in_dim = 28*28
out_dim = 10
hid_dim_1 = 500
hid_dim_2 = 100

model = NeuralNet(in_dim=in_dim, hid_dim_1=hid_dim_1, hid_dim_2=hid_dim_2, out_dim=out_dim)

# loss function
criterion = nn.CrossEntropyLoss()

# optimizer
momentum = 0.4
lr = 0.1
optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=momentum)


## 5.Train

In [6]:
iteration = 0
for epoch in range(n_epoch):
    for i, (images, labels) in enumerate(train_loader):
        images = torch.Tensor(images.view(-1,28*28))
#         print(images.size())
#         labels = torch.Tensor(labels)

        optimizer.zero_grad()

        output = model(images)

        loss = criterion(output, labels)

        loss.backward()

        optimizer.step()

        iteration +=1

        if iteration % 50 ==0:
            total = 0
            correct = 0
            for images, labels in test_loader:
                images = images.view(100, 28*28)
#                 print(images.size())
                output = model(images)
                _, predicted = torch.max(output.data, 1)
                total += labels.size(0)
                correct += (predicted.cpu() == labels.cpu()).sum()
                
            accuracy = 100 * correct / total
            # Print Loss
            print('Iteration: {}. Loss: {}. Accuracy: {}'.format(iteration, loss.data.item(), accuracy))


Iteration: 50. Loss: 1.0377416610717773. Accuracy: 74
Iteration: 100. Loss: 0.5705192685127258. Accuracy: 83
Iteration: 150. Loss: 0.28889885544776917. Accuracy: 89
Iteration: 200. Loss: 0.3787156641483307. Accuracy: 89
Iteration: 250. Loss: 0.37976351380348206. Accuracy: 90
Iteration: 300. Loss: 0.20429667830467224. Accuracy: 91
Iteration: 350. Loss: 0.3439764380455017. Accuracy: 92
Iteration: 400. Loss: 0.36298462748527527. Accuracy: 91
Iteration: 450. Loss: 0.30877169966697693. Accuracy: 92
Iteration: 500. Loss: 0.22305038571357727. Accuracy: 93
Iteration: 550. Loss: 0.14639055728912354. Accuracy: 93
Iteration: 600. Loss: 0.21425558626651764. Accuracy: 93
Iteration: 650. Loss: 0.2670036852359772. Accuracy: 93
Iteration: 700. Loss: 0.17512629926204681. Accuracy: 94
Iteration: 750. Loss: 0.20291756093502045. Accuracy: 94
Iteration: 800. Loss: 0.1425827443599701. Accuracy: 95
Iteration: 850. Loss: 0.15749768912792206. Accuracy: 94
Iteration: 900. Loss: 0.1525595337152481. Accuracy: 95
