# MNIST - Handwritten Digit Recognition, Deep Neural Network from scratch

**Author:** Niklas Wicklund

**Description:** This notebook contains an implementation of a deep neural network from scratch used for classifying handwritten digits from the MNIST dataset.


In [1]:
import numpy as np
from keras.datasets import mnist


### Auxiliary functions

In [2]:
def softmax(x):
    eps = 1e-12
    return np.exp(x) / np.sum(np.exp(x), axis=0)

def ReLu(X):
    return np.maximum(0, X)

def one_hot_encoding(y, num_classes=10):
    one_hot = np.zeros((y.shape[0], num_classes))
    one_hot[np.arange(y.shape[0]), y] = 1
    return one_hot.T

def loss(Y, P):
    """
	Computes the cross entropy loss 
	Y: one-hot encoded labels
	probablities: activation probabilities
    loss_function: "cross_entropy" or "mbce"
	"""
    eps = 1e-12
    loss = np.sum(-Y*np.log(P + eps))
    return loss

### Defining our neural network class

In [3]:
class NeuralNetwork:
    # define the constructor with proper python docstring
    def __init__(self, input_nodes, hidden_nodes, output_nodes, learning_rate = 0.1):
        '''
        Arguments:
            input_nodes: number of input nodes
            hidden_nodes: each hidden layer and its #nodes are passed as a list
            output_nodes: number of output nodes
            learning_rate: learning rate
        '''
        self.input_nodes = input_nodes
        self.hidden_nodes = hidden_nodes
        self.output_nodes = output_nodes
        self.lr = learning_rate
        self.intialize_weights_bias()
    def intialize_weights_bias(self):
        '''
        Initialize the weights and biases of the neural network
        '''
        self.W = []
        self.b = []
        # add all nodes to allow for easy iteration
        nodes = [self.input_nodes] + self.hidden_nodes + [self.output_nodes]
        for i in range(len(nodes)-1):
            self.W.append(np.random.normal(0,0.01,(nodes[i+1], nodes[i]))) # dim should be rows x cols (output x input)
            self.b.append(np.zeros((nodes[i+1],1))) # dim should be rows x 1 (output x 1)

    def compute_accuracy(self,X, y):
        # Get our probabilites (returns P with dimensions (K x n))
        P,_ = self.forward_pass(X)
        arg_maxes = P.argmax(0)
        avg = np.mean(arg_maxes)
        return np.sum((arg_maxes == y))/arg_maxes.shape[0]
    
    def forward_pass(self, X):
        layer_input = X
        # The amount of intermediate results are equal to the amount of hidden layers.
        Hs = []
        # len(W) = 3; run one time i = 0,1
        for i in range(len(self.W)-1):
            #print('i: ', i, 'W shape: ', self.W[i].shape, 'X shape: ', X.shape, 'b shape: ', self.b[i].shape)
            layer_input = np.maximum(self.W[i]@layer_input + self.b[i],0)
            Hs.append(layer_input)

        S = self.W[-1]@layer_input + self.b[-1]
        P = softmax(S)
        return P, Hs
    
    def backward_pass(self, X, Y, P,Hs):
        n = X.shape[1] # number of samples
        dWs = [None] * len(self.W)
        dbs = [None] * len(self.b)
        G = -(Y-P)
        for i in range(len(self.W)-1, 0, -1):
        
            dWs[i] = (1/n)*G@Hs[i-1].T
            dbs[i] = (1/n) * (G@np.ones(shape=(n,1)))

            G = self.W[i].T@G
            G = G * (Hs[i-1] > 0)
        dWs[0] = (1/n)*G@X.T
        dbs[0] = (1/n) * (G@np.ones(shape=(n,1)))
        

        return dWs, dbs
    
    def update_weights_bias(self, dWs, dbs):
        '''
        Arguments:
            dWs: list of gradients of weights
            dbs: list of gradients of biases
        '''
        for i in range(len(self.W)):
            self.W[i] -= self.lr * dWs[i]
            self.b[i] -= self.lr * dbs[i]

    def train(self,X,Y,y,settings):
        '''
        Arguments:
            X: input data
            Y: true one hot encoded labels
            y: true labels
            settings: dictionary of hyperparameters
        '''
        epochs = settings['epochs']
        self.lr = settings['learning_rate'] if 'learning_rate' in settings else self.lr
        
        for i in range(epochs):
            P, Hs = self.forward_pass(X)
            dWs, dbs = self.backward_pass(X,Y,P,Hs)
            self.update_weights_bias(dWs, dbs)
            
            # Log results
            if i % 50 == 0:
                accuracy = self.compute_accuracy(X,y)
                print(f'Accuracy at epoch {i}: {accuracy}')
        return self.W, self.b

        
            
        


### Execution

In [4]:
#Import the dataset
(X_train, y_train), (X_test, y_test) = mnist.load_data()



# Reshape the data
X_train = X_train.reshape(60000, 784)
X_test = X_test.reshape(10000, 784)
#Want input as a column vector
X_train = X_train.T
X_test = X_test.T

#Normalize the data
X_train = X_train/255
X_test = X_test/255




In [5]:
nn = NeuralNetwork(784, [50], 10)

settings = {
    'epochs': 900,
    'learning_rate': 0.1
}

X = X_train[:, 0:10000] # first 1000 samples
y = y_train[0:10000] # first 1000 samples
Y = one_hot_encoding(y)
print(Y.shape)
print(X.shape)
print(y.shape)
nn.train(X,Y, y, settings);

(10, 10000)
(784, 10000)
(10000,)
Accuracy at epoch 0: 0.1998
Accuracy at epoch 50: 0.4425
Accuracy at epoch 100: 0.7259
Accuracy at epoch 150: 0.832
Accuracy at epoch 200: 0.8681
Accuracy at epoch 250: 0.8852
Accuracy at epoch 300: 0.8947
Accuracy at epoch 350: 0.8992
Accuracy at epoch 400: 0.9055
Accuracy at epoch 450: 0.9091
Accuracy at epoch 500: 0.9133
Accuracy at epoch 550: 0.917
Accuracy at epoch 600: 0.9191
Accuracy at epoch 650: 0.921
Accuracy at epoch 700: 0.9226
Accuracy at epoch 750: 0.9238
Accuracy at epoch 800: 0.9275
Accuracy at epoch 850: 0.9281


**Calculate accuracy**

In [6]:
# Calculate accuracy
print('Accuracy on test set: ', nn.compute_accuracy(X_test, y_test))

Accuracy on test set:  0.9122
