# MNIST NN classifier with pytorch

## 1. Check cuda

In [0]:
import numpy as np
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision.datasets as dsets
from torch.autograd import Variable

In [2]:
cuda_available = torch.cuda.is_available()
if cuda_available:
    print('Run in GPU')
else:
    print('Run in CPU')


Run in CPU


# 1.Download data

In [0]:

train_data = dsets.MNIST(root='./data',
                         train=True,
                         download=True,
                         transform=transforms.ToTensor()
                        )
test_data = dsets.MNIST(root='./data',
                        train=False,
                        transform=transforms.ToTensor()
                       )

## 2. Make it iterable

In [4]:
print(train_data)
print(test_data)

batch_size = 100
n_iter = 3000
n_epoch = int(n_iter / (len(train_data) / batch_size))

train_loader = torch.utils.data.DataLoader(dataset=train_data,
                                      batch_size=batch_size,
                                      shuffle=True,
                                      drop_last=True
                                     )
test_loader = torch.utils.data.DataLoader(dataset=test_data,
                                      batch_size=batch_size,
                                      shuffle=False,
                                      drop_last=True
                                     )
print('Batches in one epoch : ' + str(len(train_loader)))

Dataset MNIST
    Number of datapoints: 60000
    Root location: ./data
    Split: Train
Dataset MNIST
    Number of datapoints: 10000
    Root location: ./data
    Split: Test
Batches in one epoch : 600


## 3.Build the model
Here I use a 2 hidden layer shallow model.  
Nodes in every layer can be adjusted as hyper-parameter later.  

<img src='img/Net.jpeg' width=500>

In [0]:
# TODO:: refactor into layer function to call
class NeuralNet(nn.Module):
    def __init__(self, in_dim, hid_dim_1, hid_dim_2, out_dim):
        super(NeuralNet, self).__init__()
        
        # Layer1
        self.fc1 = nn.Linear(in_features=in_dim, out_features=hid_dim_1, bias=True)
        self.relu1 = nn.ReLU()
        
        # Layer2
        self.fc2 = nn.Linear(in_features=hid_dim_1, out_features=hid_dim_2, bias=True)
        self.relu2 = nn.ReLU()
        
        # Layer3 out
        self.fc3 = nn.Linear(in_features=hid_dim_2, out_features=out_dim, bias=True)
    
    def forward(self, x):
        out = self.fc1(x)
        out = self.relu1(out)
        
        out = self.fc2(out)
        out = self.relu2(out)
        
        out = self.fc3(out)
        return out
    

## 4. instantiate model

In [0]:
in_dim = 28*28
out_dim = 10
hid_dim_1 = 500
hid_dim_2 = 100

model = NeuralNet(in_dim=in_dim, hid_dim_1=hid_dim_1, hid_dim_2=hid_dim_2, out_dim=out_dim)

if cuda_available:
    model.cuda()
    print('model in cuda')
# loss function
criterion = nn.CrossEntropyLoss()

# optimizer
momentum = 0.4
lr = 0.1
optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=momentum)


## 5.Train

In [7]:
# %%time
iteration = 0
for epoch in range(n_epoch):
    for i, (images, labels) in enumerate(train_loader):
        
        if cuda_available:
            images = images.view(-1,28*28).cuda()
            labels = labels.cuda()
        else:
            images = torch.Tensor(images.view(-1,28*28))    

        optimizer.zero_grad()

        output = model(images)

        loss = criterion(output, labels)

        loss.backward()

        optimizer.step()

        iteration +=1

        if iteration % 50 ==0:
            total = 0
            correct = 0
            for images, labels in test_loader:
                
                if cuda_available:
                    images = images.view(100, 28*28).cuda()
                else:
                    images = images.view(100, 28*28)

                output = model(images)
                _, predicted = torch.max(output.data, 1)
                total += labels.size(0)
                correct += (predicted.cpu() == labels.cpu()).sum()
                
            accuracy = 100.0 * correct / total
            # Print Loss
            print('Iteration: {}. Loss: {}. Accuracy: {}'.format(iteration, loss.data.item(), accuracy))


Iteration: 50. Loss: 1.0482827425003052. Accuracy: 67
Iteration: 100. Loss: 0.6515190601348877. Accuracy: 82
Iteration: 150. Loss: 0.5181882381439209. Accuracy: 87
Iteration: 200. Loss: 0.34922507405281067. Accuracy: 88
Iteration: 250. Loss: 0.3440299928188324. Accuracy: 90
Iteration: 300. Loss: 0.34638649225234985. Accuracy: 90
Iteration: 350. Loss: 0.3191926181316376. Accuracy: 91
Iteration: 400. Loss: 0.2168610394001007. Accuracy: 92
Iteration: 450. Loss: 0.33199647068977356. Accuracy: 92
Iteration: 500. Loss: 0.31492283940315247. Accuracy: 92
Iteration: 550. Loss: 0.3508247137069702. Accuracy: 92
Iteration: 600. Loss: 0.21341241896152496. Accuracy: 93
Iteration: 650. Loss: 0.2924889326095581. Accuracy: 94
Iteration: 700. Loss: 0.22854205965995789. Accuracy: 94
Iteration: 750. Loss: 0.22336572408676147. Accuracy: 94
Iteration: 800. Loss: 0.17014899849891663. Accuracy: 94
Iteration: 850. Loss: 0.20003987848758698. Accuracy: 95
Iteration: 900. Loss: 0.16163089871406555. Accuracy: 95
I