In [2]:
#Importing the necessary libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import math
%matplotlib inline

In [3]:
#loading and converting the CSV file to a pandas dataframe for easy manipulation
data = pd.read_csv('train.csv')

In [4]:
#Viewing the top five datasets
data.head()

Unnamed: 0,label,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [5]:
#Converting the dataset into an array
data = np.array(data)

m, n = data.shape

#Shuffle the datapoints in the dataset to remove biases
np.random.shuffle(data)

'''Transforming the array, where the individual datapoints are changed from being 
represented in rows to being represented in columns'''
data_dev = data[0:1000].T

In [6]:
#Spliting the dataset into the features, labels, train and test sets
y_dev = data_dev[0]
X_dev = (data_dev[1:n])/255

data_train = data[1000:m].T
y_train = data_train[0]
X_train = data_train[1:n]
X_train = X_train/255

In [7]:
def init_parameters():
    #creating and giving the dimensions of the placeholders of the parameters
    W1 = np.random.rand(10, 784) -0.5
    b1 = np.random.rand(10, 1) - 0.5
    W2 = np.random.rand(10, 10) -0.5
    b2 = np.random.rand(10, 1) - 0.5
    return W1,b1,W2,b2

#Creating functions for the activation functions that will be used in the Neural network, Rectified Linear unit and softmax
def ReLU(z):
    return np.maximum(0, z)
def softmax(z):
    E= math.e
    exp = [E**i for i in z]
    expSum = sum(exp)
    prob = [i/expSum for i in exp]
    return prob

#Creating a function for the derivative of the rectified linear unit
def deriv_ReLU(z):
    return z > 0

#Creating Function for the forward propagation
def forward_propagation(W1,b1,W2,b2,x):
    z1= W1.dot(x) + b1
    A1 = ReLU(z1)
    z2 = W2.dot(A1) + b2
    A2 = softmax(z2)
    return z1, A1, z2, A2

#Creating a function that onehot encodes the label
def oneHot(y):
    one_hot= np.zeros((y.size, y.max()+1))
    one_hot[np.arange(y.size), y] = 1
    one_hot = one_hot.T
    return one_hot

#Creating a function that backpropagates through the network 
def backpropagation(z1, A1, z2, A2, W2, x, y):
    m = y.size
    one_hot_y = oneHot(y)
    dz2 = A2 - one_hot_y
    dW2 = 1/m * dz2.dot(A1.T)
    db2 = (1/m * np.sum(dz2,1)).reshape(10, 1)
    dz1 = W2.T.dot(dz2) * deriv_ReLU(z1)
    dW1 = 1/m * dz1.dot(x.T)
    db1 = (1/m * np.sum(dz1,1)).reshape(10, 1)
    return dW1, db1, dW2, db2

#Creating a function that Optimizes the Weights and biases
def update_parameters(W1, b1, W2, b2, dW1, db1, dW2, db2, alpha):
    W1 = W1 - alpha*dW1
    W2 = W2 - alpha*dW2
    b1 = b1 - alpha*db1
    b2 = b2 - alpha*db2
    return W1, b1, W2, b2 



In [8]:
def gradient_descent(x, y, iterations, alpha):
    W1, b1, W2, b2 = init_parameters()
    for i in range(iterations):
        z1, A1, z2, A2 = forward_propagation(W1,b1,W2,b2,x)
        dW1, db1, dW2, db2 = backpropagation(z1, A1, z2, A2, W2, x, y)
        W1, b1, W2, b2 = update_parameters(W1, b1, W2, b2, dW1, db1, dW2, db2, alpha)
        if i % 50 == 0:
            print('Iterations', i)
            print('Accuracy: ', accuracy(predictions(A2), y))
    return W1, b1, W2, b2
#Creating a function that returns the label withe highest softmax value
def predictions(A2):
    return np.argmax(A2, 0)
#A functioon that calculates the accuracy of the predictions
def accuracy(pred, y):
    print(pred, y)
    return np.sum(pred == y)/y.size

In [9]:
# Running the gradient descent function
W1, b1, W2, b2 = gradient_descent(X_train, y_train, 1000, 0.1)

Iterations 0
[3 2 8 ... 3 3 3] [9 0 2 ... 5 4 8]
Accuracy:  0.08553658536585367
Iterations 50
[9 0 2 ... 4 4 5] [9 0 2 ... 5 4 8]
Accuracy:  0.48641463414634145
Iterations 100
[9 0 2 ... 5 4 5] [9 0 2 ... 5 4 8]
Accuracy:  0.6604634146341464
Iterations 150
[9 0 2 ... 5 4 5] [9 0 2 ... 5 4 8]
Accuracy:  0.7373414634146341
Iterations 200
[9 0 2 ... 5 4 5] [9 0 2 ... 5 4 8]
Accuracy:  0.7755609756097561
Iterations 250
[9 0 2 ... 5 4 8] [9 0 2 ... 5 4 8]
Accuracy:  0.8004878048780488
Iterations 300
[9 0 2 ... 5 4 8] [9 0 2 ... 5 4 8]
Accuracy:  0.8174390243902439
Iterations 350
[9 0 2 ... 5 4 8] [9 0 2 ... 5 4 8]
Accuracy:  0.8298536585365853
Iterations 400
[9 0 2 ... 5 4 8] [9 0 2 ... 5 4 8]
Accuracy:  0.8395609756097561
Iterations 450
[9 0 2 ... 5 4 8] [9 0 2 ... 5 4 8]
Accuracy:  0.8459756097560975
Iterations 500
[9 0 2 ... 5 4 8] [9 0 2 ... 5 4 8]
Accuracy:  0.8519512195121951
Iterations 550
[9 0 2 ... 5 4 8] [9 0 2 ... 5 4 8]
Accuracy:  0.8574390243902439
Iterations 600
[9 0 2 ... 5 4

In [10]:
def testingNN(W1,b1,W2,b2,x,y):
    _, _, _, A2 = forward_propagation(W1,b1,W2,b2,x)
    print('Accuracy: ', accuracy(predictions(A2), y))

In [11]:
#Running the model with the test dataset and printing it's accuracy
testingNN(W1,b1,W2,b2,X_dev,y_dev)

[3 6 2 1 0 6 1 6 8 7 9 8 0 6 6 3 8 2 8 5 9 1 2 2 2 3 4 6 2 1 6 4 8 6 4 8 3
 6 6 6 5 0 0 6 7 7 1 3 8 4 6 4 6 4 0 4 6 6 0 2 1 4 8 3 9 9 3 5 1 6 2 6 0 5
 3 5 7 6 5 2 7 2 7 0 0 6 7 5 2 1 8 0 2 1 5 2 6 4 3 7 8 6 9 6 1 8 4 6 2 7 4
 1 7 9 3 4 9 1 6 6 8 4 3 6 0 3 9 0 0 6 3 2 4 6 6 0 0 7 4 3 6 1 4 9 1 3 4 3
 7 3 1 3 3 3 7 5 7 6 7 1 9 6 4 2 5 7 8 2 2 1 3 8 3 7 3 0 7 7 1 9 8 8 4 5 6
 9 1 9 7 1 8 4 3 2 3 9 3 2 8 9 8 4 4 2 4 8 2 9 0 0 2 0 7 6 7 7 9 6 1 1 5 8
 3 8 7 4 2 2 3 8 8 6 1 6 6 5 3 7 6 1 2 1 1 1 8 4 5 3 4 0 0 6 4 9 2 6 7 1 5
 1 5 6 2 4 2 6 3 9 0 5 7 6 5 6 5 1 3 4 5 6 9 4 5 3 0 4 6 7 3 2 7 2 4 2 3 5
 6 4 7 4 4 5 9 2 0 7 6 4 3 4 8 7 5 8 1 6 7 9 2 3 8 1 7 7 2 6 8 9 5 7 3 3 9
 3 0 5 7 8 4 1 7 4 7 6 2 0 8 1 3 2 0 7 5 6 8 4 5 7 6 7 2 4 5 0 5 6 6 7 7 3
 9 5 1 5 0 5 3 3 9 3 9 9 6 1 6 8 2 7 2 7 7 3 1 4 6 5 3 7 6 9 8 4 1 9 0 8 3
 5 2 7 9 0 7 2 0 2 4 0 4 6 0 6 1 4 0 4 5 5 0 7 4 9 7 1 7 4 9 8 7 7 5 8 6 8
 1 7 4 2 9 0 8 4 1 9 2 9 1 8 4 5 9 7 7 2 1 3 6 9 4 9 8 7 7 3 5 7 3 5 9 2 3
 2 7 0 6 0 1 9 9 0 7 6 7 