# <center>Simple convolutional neural network with Pytorch</center>

>*Convolutional networks are simply neural networks that use convolution in place of general matrix multiplication in at least one of their layers.*

__Deep Learning__, I. Goodfellow & al.

##### Import Pytorch and other useful librairies

In [108]:
from matplotlib import pyplot
import math
import numpy as np
import pandas as pd

import torch
import torchvision.datasets as datasets
import torch.nn.functional as F
from torch import nn
from torch import optim

In [43]:
loss_func = F.cross_entropy

def accuracy(Y_hat, Y):
    preds = torch.argmax(Y_hat, dim=1)
    return (preds == Y).float().mean()

##### Load and preprocess dataset

In [92]:
#import data
mnist_trainset = datasets.MNIST(root='../data', train=True, download=True, transform=None)
mnist_testset = datasets.MNIST(root='../data', train=False, download=True, transform=None)

#load trainset into tensors
train_loader = torch.utils.data.DataLoader(mnist_trainset, batch_size=1, shuffle=True)
X_train = train_loader.dataset.data
Y_train = train_loader.dataset.targets

#load testset into tensors
test_loader = torch.utils.data.DataLoader(mnist_testset, batch_size=10000, shuffle=False)
X_test = test_loader.dataset.data
Y_test = test_loader.dataset.targets

#scale data to [0:1] and convert to float32
X_train = (X_train.to(dtype=torch.float32) / X_train.max().to(dtype=torch.float32))
X_test = (X_test.to(dtype=torch.float32) / X_test.max().to(dtype=torch.float32))

#Flatten train and test data
X_train = X_train.reshape(X_train.shape[0],1,28,28)
X_test = X_test.reshape(X_test.shape[0],1,28,28)

print("Train examples : ",X_train.shape[0])
print("Test examples : ",X_test.shape[0])
print("Nb of features : ",X_train.shape[1])

Train examples :  60000
Test examples :  10000
Nb of features :  1


##### Define the CNN architecture

In [96]:
class NeuralNet(nn.Module):
    
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 24, kernel_size=5, stride=1, padding=2)
        self.max1 = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        self.bn1 = nn.BatchNorm2d(24)
        self.conv2 = nn.Conv2d(24, 48, kernel_size=5, stride=1, padding=2)
        self.max2 = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        self.bn2 = nn.BatchNorm2d(48)
        self.conv3 = nn.Conv2d(48, 64, kernel_size=5, stride=1, padding=2)
        self.max3 = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        self.bn3 = nn.BatchNorm2d(64)
        self.linear4 = nn.Linear(64*3*3,256)
        self.bn4 = nn.BatchNorm1d(256)
        self.linear5 = nn.Linear(256,10)
 
    def forward(self, x):
        #print("--------FORWARD---------")
        x = torch.relu(self.conv1(x))
        #print("conv1 :" , x.shape)
        x = self.max1(x)
        x = self.bn1(x)
        #print("max1 :" , x.shape)
        x = torch.relu(self.conv2(x))
        #print("conv2 :" , x.shape)
        x = self.max2(x)
        x = self.bn2(x)
        #print("max2 :" , x.shape)
        x = torch.relu(self.conv3(x))
        #print("conv3 :" , x.shape)
        x = self.max3(x)
        x = self.bn3(x)
        #print("max3 :" , x.shape)
        x = self.linear4(torch.relu(x.reshape(x.shape[0],-1)))
        #print("linear4 :" , x.shape)
        x = self.bn4(x)
        x = self.linear5(torch.softmax(x,1))
        #print("linear5 :" , x.shape)
        return x
                            
                            
    def fit(self,optimizer,epochs,batch_size,lr,decay):
        n = X_train.shape[0]
        for epoch in range(epochs):
            #opt.param_groups[0]['lr'] = lr / (1+decay)
                           
            model.train()
            for i in range((n - 1) // batch_size + 1):
                optimizer.zero_grad()
                
                X = X_train[ i * batch_size : (i+1) * batch_size ]
                Y = Y_train[ i * batch_size : (i+1) * batch_size ]
                
                pred = self.forward( X )
                loss = loss_func( pred , Y )

                loss.backward()
                optimizer.step()
                
            print(epoch+1,"\t",loss.item())
            
            
            with torch.no_grad():
                model.eval()
                print("Test set \t", round(accuracy( model.forward(X_test) , Y_test).item(),3))

In [97]:
model = NeuralNet()

In [98]:
opt = optim.SGD(model.parameters(), lr=1)
opt_2 = optim.Adam(model.parameters(), lr=0.01)
epochs = 20
batch_size = 600
learning_rate = 1
decay = 0.1

model.fit(opt_2,epochs,batch_size,learning_rate,decay)

1 	 0.08516699075698853
Test set 	 0.975
2 	 0.04562624916434288
Test set 	 0.984
3 	 0.032392021268606186
Test set 	 0.988
4 	 0.028965575620532036
Test set 	 0.987
5 	 0.02674502693116665
Test set 	 0.988
6 	 0.02047395147383213
Test set 	 0.99
7 	 0.023352866992354393
Test set 	 0.989
8 	 0.025886444374918938
Test set 	 0.99
9 	 0.027795525267720222
Test set 	 0.99
10 	 0.022772518917918205
Test set 	 0.987
11 	 0.0373346172273159
Test set 	 0.985
12 	 0.016209373250603676
Test set 	 0.992
13 	 0.021052422001957893
Test set 	 0.993
14 	 0.022053474560379982
Test set 	 0.993
15 	 0.018207136541604996
Test set 	 0.993
16 	 0.017679264768958092
Test set 	 0.989
17 	 0.02370826154947281
Test set 	 0.989
18 	 0.02704772911965847
Test set 	 0.991
19 	 0.040121354162693024
Test set 	 0.988
20 	 0.015256193466484547
Test set 	 0.992


###### Load, Preprocess and predict test set from Kaggle

In [104]:
#Load data from CSV
test = pd.read_csv('../data/MNIST/test.csv')
test_tensor = torch.tensor(test.values)

#Preprocess
test_tensor = (test_tensor.to(dtype=torch.float32) / test_tensor.max().to(dtype=torch.float32))
test_tensor = test_tensor.reshape(test_tensor.shape[0],1,28,28)

#Predict
test_tensor = model.forward(test_tensor)
test_tensor = test_tensor.argmax(1)

In [105]:
test_tensor.shape

torch.Size([28000])

In [107]:
test_tensor.dtype

torch.int64

##### Save predictions to a csv file

In [111]:
#Convert to a numpy array
arr = test_tensor.numpy()

# write CSV
np.savetxt('../data/MNIST/predictions.csv', arr)