# 4 Simple Feed-Forward Neural Network
### Q4.2

In [1]:
import numpy as np
import pandas as pd
import torch
import torchvision
from torchvision import datasets, transforms
import os
import math
from dataclasses import dataclass

In [2]:
tform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])

trainset = torchvision.datasets.MNIST(root='./data', train=True, transform=tform, download=True)
testset = torchvision.datasets.MNIST(root='./data', train=False, transform=tform, download=True)

In [3]:
@dataclass
class NeuralNet:
    d: int # num inputs
    d1: int # num neurons in hidden layer
    k: int # num classes

    def __post_init__(self):
        self.W1 = np.random.randn(self.d1, self.d) * 0.01
        self.W2 = np.random.randn(self.k, self.d1) * 0.01

    def sigmoid_fn(self, X: np.array) -> np.array:
        return 1 / (1 + np.exp(-X))
    
    def softmax_fn(self, X: np.array) -> np.array:
        # X is (k, 1)
        softmax = np.exp(X) / np.sum(np.exp(X))
        return softmax # (k, 1)

    def forward_pass(self, X: np.array) -> np.array:
        self.A = self.sigmoid_fn(self.W1 @ X)
        self.Z = self.softmax_fn(self.W2 @ self.A)
        return self.Z

In [4]:
nnet = NeuralNet(28**2, 100, 10)

In [5]:
learn_rate = 0.01
batch_size = 10
epochs = 10

#trainset.data = trainset.data[:30000]
data_loader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True)

In [6]:
for epoch in range(epochs):
    for b_i, (images, labels) in enumerate(data_loader):
        W1_grad = np.zeros(nnet.W1.shape)
        W2_grad = np.zeros(nnet.W2.shape)
        for i in range(len(images)):
            image = images[i]
            label = labels[i]

            X = np.array(image.reshape(784, 1))
            Y = np.zeros((10,1))
            Y[label] = 1
            Yp = nnet.forward_pass(X)

            delZ = Yp - Y  # (10, 1)
            delA = nnet.W2.T @ delZ  # (300, 1)
            delSig = nnet.A * (1 - nnet.A)  # (300, 1)
            delW1 = (delA * delSig) @ X.T
            W1_grad += delW1
            W2_grad += delZ @ nnet.A.T
        W1_grad = (learn_rate / batch_size) * W1_grad
        W2_grad = (learn_rate / batch_size) * W2_grad
        nnet.W1 -= W1_grad
        nnet.W2 -= W2_grad
    print('Epoch #' + str(epoch) + ' finished.')

Epoch #0 finished.
Epoch #1 finished.
Epoch #2 finished.
Epoch #3 finished.
Epoch #4 finished.
Epoch #5 finished.
Epoch #6 finished.
Epoch #7 finished.
Epoch #8 finished.
Epoch #9 finished.


In [7]:
# test accuracy
test_loader = torch.utils.data.DataLoader(testset, batch_size=1, shuffle=False)
correct = 0
incorrect = 0
for b_i, (images, labels) in enumerate(test_loader):
    for i in range(len(images)):
        image = images[i]
        label = labels[i]
        X = np.array(image.reshape(784, 1))
        Yp = nnet.forward_pass(X)
        yp = np.argmax(Yp)
        if int(label) == int(yp):
            correct += 1
        else:
            incorrect += 1

In [8]:
accuracy = correct / (correct + incorrect)
print(accuracy)

0.9539


In [9]:
trainset

Dataset MNIST
    Number of datapoints: 60000
    Root location: ./data
    Split: Train
    StandardTransform
Transform: Compose(
               ToTensor()
               Normalize(mean=(0.5,), std=(0.5,))
           )