# <center>Neural network from scratch</center>

In this notebook we don't use any `torch.module` nor `torch.optimizer` to create our neural network. We train it on MNIST. 

In [15]:
from matplotlib import pyplot
import math
import numpy as np
import torch
import torchvision.datasets as datasets
import torch.nn.functional as F

##### Function definition

In [16]:
loss_func = F.cross_entropy

def model(x):
    x = torch.relu(x @ weights_1 + bias_1) #First activation function
    x = torch.sigmoid(x @ weights_2 + bias_2) #Second activation function
    return x

def accuracy(Y_hat, Y):
    preds = torch.argmax(Y_hat, dim=1)
    return (preds == Y).float().mean()

##### Loading dataset

In [17]:
#import data
mnist_trainset = datasets.MNIST(root='../data', train=True, download=True, transform=None)
mnist_testset = datasets.MNIST(root='../data', train=False, download=True, transform=None)

#load trainset into tensors
train_loader = torch.utils.data.DataLoader(mnist_trainset, batch_size=1, shuffle=True)
X_train = train_loader.dataset.data
Y_train = train_loader.dataset.targets

#load testset into tensors
test_loader = torch.utils.data.DataLoader(mnist_testset, batch_size=10000, shuffle=False)
X_test = test_loader.dataset.data
Y_test = test_loader.dataset.targets

##### Preprocessing

In [18]:
#scale data to [0:1] and convert to float32
X_train = (X_train.to(dtype=torch.float32) / X_train.max().to(dtype=torch.float32))
X_test = (X_test.to(dtype=torch.float32) / X_test.max().to(dtype=torch.float32))

#Flatten train and test data
X_train = X_train.reshape(X_train.shape[0],-1)
X_test = X_test.reshape(X_test.shape[0],-1)

print("Train examples : ",X_train.shape[0])
print("Test examples : ",X_test.shape[0])
print("Nb of features : ",X_train.shape[1])

Train examples :  60000
Test examples :  10000
Nb of features :  784


##### Initialization

In [19]:
weights_1 = (torch.rand((784, 64) , dtype = torch.float32 ) * 2 - 1 ) / math.sqrt(784)
weights_1.requires_grad_()
bias_1 = torch.zeros(64, requires_grad=True)

weights_2 = (torch.rand((64, 10) , dtype = torch.float32 ) * 2 - 1 ) / math.sqrt(64) 
weights_2.requires_grad_()
bias_2 = torch.zeros(10, requires_grad=True)

##### Gradient descent

In [20]:
lr_init = 1 #learning rate initialization
decay = 0.01  # learning rate
epochs = 100  # how many epochs to train for
n = X_train.shape[0] #number of training examples
batch_size = 600


for epoch in range(epochs):
    lr = lr_init / (1 + decay * epoch)
    for i in range(n // batch_size):
        X = X_train[i*batch_size: (i+1) *batch_size]
        Y = Y_train[i*batch_size: (i+1) *batch_size]
        X = model(X)

        loss = loss_func( X , Y )
            

        loss.backward()
        
        with torch.no_grad():
            weights_1 -= weights_1.grad * lr
            weights_2 -= weights_2.grad * lr
            bias_1 -= bias_1.grad * lr
            bias_2 -= bias_2.grad * lr
            
            weights_1.grad.zero_()
            weights_2.grad.zero_()
            bias_1.grad.zero_()
            bias_2.grad.zero_()    
            
    if((epoch+1)%10 == 0):
        print(epoch + 1 , "\t", loss.item())


10 	 1.5122243165969849
20 	 1.4995709657669067
30 	 1.4941699504852295
40 	 1.491289734840393
50 	 1.4891860485076904
60 	 1.487610936164856
70 	 1.4862712621688843
80 	 1.485148310661316
90 	 1.4843205213546753
100 	 1.4837138652801514


##### Predictions

In [22]:
preds = model(X_test)


print("Accuracy on test set : ", round(accuracy( preds , Y_test).item(),3))

Accuracy on test set :  0.966
