## Installing required packages

In [1]:
pip install numpy sklearn

Note: you may need to restart the kernel to use updated packages.



### Fetching The MNIST handwrittern digits dataset

In [2]:
import numpy as np
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split

x, y = fetch_openml('mnist_784', return_X_y=True)
x = (x/255).astype('float64') # normalising values to [0, 1]

In [3]:
tmp = np.array(y, dtype=int)
n = tmp.shape[0]
Y = np.zeros((n, np.max(tmp) + 1), dtype=int)
Y[np.arange(n), tmp] = 1 # converted to 70000x10 matrix where each row has 1 at index equal to target value

### Splitting Dataset into train and test sets

In [4]:
x_train, x_test, y_train, y_test = train_test_split(x, Y, test_size=0.1, random_state=7)

## Neural network with forward pass and back propagation on MNIST handwritten dataset

In [5]:
class NeuralNetwork():
    def __init__(self):
        self.sizes = [784, 128, 64, 10]
        self.epochs = 50
        self.learning_rate = 0.001
        self.parameters = {
            'W1' : np.random.randn(self.sizes[1], self.sizes[0])*np.sqrt(1./self.sizes[1]),
            'W2' : np.random.randn(self.sizes[2], self.sizes[1])*np.sqrt(1./self.sizes[2]),
            'W3' : np.random.randn(self.sizes[3], self.sizes[2])*np.sqrt(1./self.sizes[3])
        }
        
    # activation function and its derivative
    def sigmoid(self, x):
        return 1/(1+np.exp(-x))
    def sigmoidDerivative(self, x):
        return np.exp(-x)/((np.exp(-x)+1)**2)
    
    # calculating activations of each layer
    def forward_pass(self, x_train):
        params = self.parameters
        # layer 1 / input
        params['A0'] = x_train
        # layer 2        
        params['Z1'] = np.dot(params['W1'], params['A0'])
        params['A1'] = self.sigmoid(params['Z1'])
        # layer 3
        params['Z2'] = np.dot(params['W2'], params['A1'])
        params['A2'] = self.sigmoid(params['Z2'])
        # layer 4 / output
        params['Z3'] = np.dot(params['W3'], params['A2'])
        params['A3'] = self.sigmoid(params['Z3'])
        # activation of last layer is our output
        return params['A3']
    
    # updating weights through back-propagation
    def back_propagation(self, y_train, output):
        params = self.parameters
        change_in_weights = {} # calculates weight changes in all weights
        
        error3 = 2*(output-y_train)/output.shape[0] * self.sigmoidDerivative(params['Z3'])
        change_in_weights['W3'] = np.outer(error3, params['A2'])
        
        error2 = np.dot(error3, params['W3'] ) * self.sigmoidDerivative(params['Z2'])
        change_in_weights['W2'] = np.outer(error2, params['A1'])
        
        error1 = np.dot(error2, params['W2']) * self.sigmoidDerivative(params['Z1'])
        change_in_weights['W1'] = np.outer(error1, params['A0'])
        
        return change_in_weights

    # training on train-set and testing accuracy on test-set for epochs number of iterations
    def train(self, x_train, y_train, x_val, y_val):
        print('Training...')
        for _iter in range(self.epochs):
            for x,y in zip(x_train, y_train):
                output = self.forward_pass(x)
                changes_in_weights = self.back_propagation(y, output)
                # updating weights depending on values returned by back propagation
                for k,v in changes_in_weights.items():
                    self.parameters[k] -= self.learning_rate * v                

            accuracy = np.mean([np.argmax(self.forward_pass(x)) == np.argmax(y) for x,y in zip(x_test, y_test)])
            print('Iteration: {0}\t|\tAccuracy: {1}%'.format(_iter+1, accuracy * 100))
        print('\nTraining complete!')

In [6]:
nn = NeuralNetwork()
nn.train(x_train, y_train, x_test, y_test)

Training...
Iteration: 1	|	Accuracy: 25.257142857142856%
Iteration: 2	|	Accuracy: 27.514285714285712%
Iteration: 3	|	Accuracy: 31.02857142857143%
Iteration: 4	|	Accuracy: 35.97142857142857%
Iteration: 5	|	Accuracy: 41.6%
Iteration: 6	|	Accuracy: 46.62857142857143%
Iteration: 7	|	Accuracy: 51.18571428571429%
Iteration: 8	|	Accuracy: 54.91428571428572%
Iteration: 9	|	Accuracy: 58.52857142857143%
Iteration: 10	|	Accuracy: 62.671428571428564%
Iteration: 11	|	Accuracy: 66.57142857142857%
Iteration: 12	|	Accuracy: 69.65714285714286%
Iteration: 13	|	Accuracy: 72.32857142857144%
Iteration: 14	|	Accuracy: 74.31428571428572%
Iteration: 15	|	Accuracy: 75.95714285714286%
Iteration: 16	|	Accuracy: 77.41428571428571%
Iteration: 17	|	Accuracy: 78.55714285714286%
Iteration: 18	|	Accuracy: 79.31428571428572%
Iteration: 19	|	Accuracy: 80.24285714285713%
Iteration: 20	|	Accuracy: 81.11428571428571%
Iteration: 21	|	Accuracy: 81.84285714285714%
Iteration: 22	|	Accuracy: 82.52857142857142%
Iteration: 23	|	A