# Mathematics for AI - Coursework Task 2

The second task is about classifying handwritten digits. We will use the MNIST dataset for training and testing. The point of this task is to develop a multi-layer neural network for classificationusing mostly Numpy:

• Implement sigmoid and relu layers (with forward and backward pass)

• Implement a softmax output layer

• Implement a fully parameterizable neural network (number and types of layers, number of units)

• Implement an optimizer(e.g. SGD or Adam)and a stopping criterionof your choosing
• Train your Neural Network using backpropagation. Evaluate different neural network architectures andcompare your different results. You can also compare withthe results presented inhttp://yann.lecun.com/exdb/mnist/

In [1]:
# Import limited libraries
from sklearn.datasets import load_digits
import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt

In [2]:
# Load MNIST dataset
df = load_digits()

# Divorce data and target 
X = df.data
y = df.target

In [3]:
# Splitting Dataset - Inspiration taken from Lab 04 
ratio = 0.75
length = len(X)

X_train = X[:int(length*ratio)]
X_test = X[int(length*ratio):]
y_train = y[:(length - int(length*ratio))]
y_test = y[(length - int(length*ratio)):]

In [4]:
print(X.shape)
print(y.shape)
print(y_train.shape)

(1797, 64)
(1797,)
(450,)


In [41]:
from sklearn.datasets import fetch_openml
from keras.utils.np_utils import to_categorical
import numpy as np
from sklearn.model_selection import train_test_split
import time

x, y = fetch_openml('mnist_784', version=1, return_X_y=True)
x = (x/255).astype('float32')
y = to_categorical(y)

In [42]:
x_train, x_val, y_train, y_val = train_test_split(x, y, test_size=0.15, random_state=42)

https://mlfromscratch.com/neural-network-tutorial/#/

In [64]:
class DeepNeuralNetwork():
    def __init__(self, sizes, epochs=10, l_rate=0.01):
        self.sizes = sizes
        self.epochs = epochs
        self.l_rate = l_rate

        # Save parameters
        self.params = self.initialization()

    def sigmoid(self, x, derivative=False):
        if derivative:
            return (np.exp(-x))/((np.exp(-x)+1)**2)
        return 1/(1 + np.exp(-x))

    def softmax(self, x, derivative=False):
        exps = np.exp(x - x.max())
        if derivative:
            return exps / np.sum(exps, axis=0) * (1 - exps / np.sum(exps, axis=0))
        return exps / np.sum(exps, axis=0)
    
    def relu(self, x, derivative=False):
        if derivative:
            copy = np.array(x, copy = True)
            copy[x <= 0] = 0
            copy[x > 0] = 1
            return copy
        return np.maximum(0,x)

    def initialization(self):
        # number of nodes in each layer
        input_layer=self.sizes[0]
        hidden_1=self.sizes[1]
        hidden_2=self.sizes[2]
        output_layer=self.sizes[3]

        params = {
            'W1':np.random.randn(hidden_1, input_layer) * np.sqrt(1. / hidden_1),
            'W2':np.random.randn(hidden_2, hidden_1) * np.sqrt(1. / hidden_2),
            'W3':np.random.randn(output_layer, hidden_2) * np.sqrt(1. / output_layer)
        }

        return params

    def forward_pass(self, x_train):
        params = self.params

        # input layer activations becomes sample
        params['A0'] = x_train

        # input layer to hidden layer 1
        params['Z1'] = np.dot(params["W1"], params['A0'])
        # Select activation function
        params['A1'] = self.sigmoid(params['Z1'])

        # hidden layer 1 to hidden layer 2
        params['Z2'] = np.dot(params["W2"], params['A1'])
        # Select activation function
        params['A2'] = self.sigmoid(params['Z2'])
        

        # hidden layer 2 to output layer
        params['Z3'] = np.dot(params["W3"], params['A2'])
        # select activation funcion
        params['A3'] = self.softmax(params['Z3'])

        return params['A3']

    def backward_pass(self, y_train, output):
        params = self.params
        change_w = {}

        # Calculate W3 update
        error = 2 * (output - y_train) / output.shape[0] * self.sigmoid(params['Z3'], derivative=True)
        change_w['W3'] = np.outer(error, params['A2'])
        
        # Calculate W2 update
        error = np.dot(params['W3'].T, error) * self.sigmoid(params['Z2'], derivative=True)
        change_w['W2'] = np.outer(error, params['A1'])

        # Calculate W1 update
        error = np.dot(params['W2'].T, error) * self.softmax(params['Z1'], derivative=True)
        change_w['W1'] = np.outer(error, params['A0'])

        return change_w

    def update_network_parameters(self, changes_to_w):

        for key, value in changes_to_w.items():
            self.params[key] -= self.l_rate * value

    def compute_accuracy(self, x_val, y_val):
        
        predictions = []

        for x, y in zip(x_val, y_val):
            output = self.forward_pass(x)
            pred = np.argmax(output)
            predictions.append(pred == np.argmax(y))
        
        return np.mean(predictions)

    def train(self, x_train, y_train, x_val, y_val):
        start_time = time.time()
        for iteration in range(self.epochs):
            for x,y in zip(x_train, y_train):
                output = self.forward_pass(x)
                changes_to_w = self.backward_pass(y, output)
                self.update_network_parameters(changes_to_w)
            
            accuracy = self.compute_accuracy(x_val, y_val)
            print('Epoch: {0}, Time Spent: {1:.2f}s, Accuracy: {2:.2f}%'.format(
                iteration+1, time.time() - start_time, accuracy * 100
            ))
            
dnn = DeepNeuralNetwork(sizes=[784, 64, 64, 10])
dnn.train(x_train, y_train, x_val, y_val)

Epoch: 1, Time Spent: 17.50s, Accuracy: 61.90%
Epoch: 2, Time Spent: 39.14s, Accuracy: 70.60%
Epoch: 3, Time Spent: 59.47s, Accuracy: 74.01%
Epoch: 4, Time Spent: 80.59s, Accuracy: 75.72%
Epoch: 5, Time Spent: 100.69s, Accuracy: 76.88%
Epoch: 6, Time Spent: 120.73s, Accuracy: 77.83%
Epoch: 7, Time Spent: 141.10s, Accuracy: 78.71%
Epoch: 8, Time Spent: 161.27s, Accuracy: 79.53%
Epoch: 9, Time Spent: 182.08s, Accuracy: 80.15%
Epoch: 10, Time Spent: 202.14s, Accuracy: 80.58%
