In [2]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt

data = pd.read_csv('classification_train.csv')
data

Unnamed: 0.1,Unnamed: 0,label,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,...,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783,pixel784
0,0,8,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,1,4,0,0,0,0,0,0,0,0,...,70,27,0,0,0,0,0,0,0,0
2,2,1,0,0,0,0,0,0,0,0,...,35,0,0,0,0,0,0,0,0,0
3,3,8,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,4,2,0,0,0,0,1,0,3,0,...,2,0,0,91,117,7,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
29995,29995,3,0,0,0,0,0,0,0,0,...,13,192,150,0,0,0,0,0,0,0
29996,29996,4,0,0,0,0,0,0,0,0,...,2,0,11,179,167,105,0,0,0,0
29997,29997,9,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
29998,29998,4,0,0,0,0,1,0,0,0,...,1,1,0,106,163,170,67,0,0,0


In [3]:
data=data.drop("Unnamed: 0",axis=1)#dropping extra column on the df

In [5]:
data = np.array(data)
m, n = data.shape

np.random.shuffle(data) # shuffle before splitting into dev and training sets

data_dev = data[0:1000].T   #for training taking only first 1000 obs
Y_dev = data_dev[0]
X_dev = data_dev[1:n]
X_dev = X_dev / 255

data_train = data[1000:m].T
Y_train = data_train[0]
X_train = data_train[1:n]
X_train = X_train / 255
n,m = X_train.shape

In [10]:
#first,defining all layers w and b
def init_params():
    W1 = np.random.rand(50, 784) - 0.5
    b1 = np.random.rand(50, 1) - 0.5
    W2 = np.random.rand(40, 50) - 0.5
    b2 = np.random.rand(40, 1) - 0.5
    w3=np.random.rand(10,40)-0.5
    b3=np.random.rand(10,1)-0.5
    return W1, b1, W2, b2,w3,b3
#relu function                             
def ReLU(Z):
    return np.maximum(Z, 0)
#softmax fn
def softmax(Z):
    A = np.exp(Z) / sum(np.exp(Z))
    return A
#forward propagating with 784,50,40,10 units as layers and taking x_dev as an input first
def forward_prop(W1, b1, W2, b2,w3,b3, X):
    Z1 = W1.dot(X) + b1
    A1 = ReLU(Z1)
    Z2 = W2.dot(A1) + b2
    A2 = ReLU(Z2)
    z3=w3.dot(A2)+b3
    a3=softmax(z3)
    return Z1, A1, Z2, A2,z3,a3
#derivative of relu for use in back prop
def ReLU_deriv(Z):
    return Z > 0
#creating an array in which in each obs , for every different value of y ,the entry at that index turns 1
def y_actual(Y):
    y_actual = np.zeros((Y.size, Y.max() + 1))
    y_actual[np.arange(Y.size), Y] = 1
    y_actual = y_actual.T
    return y_actual
#backward propagating
def backward_prop(Z1, A1, Z2, A2,z3,a3, W1, W2,w3, X, Y):
    y1 = y_actual(Y)
    dZ3 = a3 - y1
    dW3 = 1 / m * dZ3.dot(A2.T)
    db3 = 1 / m * np.sum(dZ3)
    dZ2 = w3.T.dot(dZ3) * ReLU_deriv(Z2)
    dW2 = 1 / m * dZ2.dot(A1.T)
    db2 = 1 / m * np.sum(dZ2)
    dZ1=W2.T.dot(dZ2)*ReLU_deriv(Z1)
    dW1=1/m *dZ1.dot(X.T)
    db1=1/m * np.sum(dZ1)
    return dW1, db1, dW2, db2,dW3,db3

def update_params(W1, b1, W2, b2,W3,b3, dW1, db1, dW2, db2,dw3,db3, alpha):
    W1 = W1-alpha*dW1
    b1 = b1-alpha*db1    
    W2 = W2-alpha*dW2  
    b2 = b2-alpha*db2    
    W3=W3-alpha*dw3
    b3=b3-alpha*db3
    return W1, b1, W2, b2,W3,b3

In [8]:
#get_predictions used to get the index of the value with largest prob
def get_predictions(A3):
    return np.argmax(A3, 0)
#checking the accuracy of the model
def get_accuracy(predictions, Y):
    print(predictions, Y)
    return np.sum(predictions == Y) / Y.size

def gradient_descent(X, Y, alpha, iterations):
    W1, b1, W2, b2,w3,b3 = init_params()
    for i in range(iterations):
        Z1, A1, Z2, A2,z3,a3 = forward_prop(W1, b1, W2, b2,w3,b3, X)
        dW1, db1, dW2, db2,dw3,db3 = backward_prop(Z1, A1, Z2, A2,z3,a3, W1, W2,w3, X, Y)
        W1, b1, W2, b2,w3,b3 = update_params(W1, b1, W2, b2,w3,b3, dW1, db1, dW2, db2,dw3,db3, alpha)
        if i % 10 == 0:
            print("Iteration: ", i)
            predictions = get_predictions(a3)
            print(get_accuracy(predictions, Y))
            
    return W1, b1, W2, b2,w3,b3

In [11]:
W1, b1, W2, b2,w3,b3 = gradient_descent(X_train, Y_train, 0.10, 5000)

Iteration:  0
[4 8 2 ... 2 2 8] [8 7 3 ... 1 3 7]
0.1153103448275862
Iteration:  10
[5 7 6 ... 1 3 7] [8 7 3 ... 1 3 7]
0.4362758620689655
Iteration:  20
[5 7 4 ... 1 3 7] [8 7 3 ... 1 3 7]
0.5233793103448275
Iteration:  30
[8 7 4 ... 1 3 7] [8 7 3 ... 1 3 7]
0.5750344827586207
Iteration:  40
[8 7 4 ... 1 3 7] [8 7 3 ... 1 3 7]
0.6261724137931034
Iteration:  50
[8 7 4 ... 1 3 7] [8 7 3 ... 1 3 7]
0.663896551724138
Iteration:  60
[8 7 0 ... 1 3 7] [8 7 3 ... 1 3 7]
0.6509655172413793
Iteration:  70
[8 7 3 ... 1 3 7] [8 7 3 ... 1 3 7]
0.6701724137931034
Iteration:  80
[8 7 3 ... 1 3 7] [8 7 3 ... 1 3 7]
0.6819655172413793
Iteration:  90
[8 7 3 ... 1 3 7] [8 7 3 ... 1 3 7]
0.6922068965517242
Iteration:  100
[8 7 3 ... 1 3 7] [8 7 3 ... 1 3 7]
0.7014827586206897
Iteration:  110
[8 7 3 ... 1 3 7] [8 7 3 ... 1 3 7]
0.709551724137931
Iteration:  120
[8 7 3 ... 1 3 7] [8 7 3 ... 1 3 7]
0.7153448275862069
Iteration:  130
[8 7 3 ... 1 3 7] [8 7 3 ... 1 3 7]
0.722
Iteration:  140
[8 7 3 ... 1 3 7

Iteration:  1160
[8 7 3 ... 1 3 7] [8 7 3 ... 1 3 7]
0.8361379310344828
Iteration:  1170
[8 7 3 ... 1 3 7] [8 7 3 ... 1 3 7]
0.8363793103448276
Iteration:  1180
[8 7 3 ... 1 3 7] [8 7 3 ... 1 3 7]
0.8366896551724138
Iteration:  1190
[8 7 3 ... 1 3 7] [8 7 3 ... 1 3 7]
0.8369310344827586
Iteration:  1200
[8 7 3 ... 1 3 7] [8 7 3 ... 1 3 7]
0.837103448275862
Iteration:  1210
[8 7 3 ... 1 3 7] [8 7 3 ... 1 3 7]
0.8372413793103448
Iteration:  1220
[8 7 3 ... 1 3 7] [8 7 3 ... 1 3 7]
0.8374827586206897
Iteration:  1230
[8 7 3 ... 1 3 7] [8 7 3 ... 1 3 7]
0.8382413793103448
Iteration:  1240
[8 7 3 ... 1 3 7] [8 7 3 ... 1 3 7]
0.8386206896551724
Iteration:  1250
[8 7 3 ... 1 3 7] [8 7 3 ... 1 3 7]
0.8387931034482758
Iteration:  1260
[8 7 3 ... 1 3 7] [8 7 3 ... 1 3 7]
0.839
Iteration:  1270
[8 7 3 ... 1 3 7] [8 7 3 ... 1 3 7]
0.8392413793103448
Iteration:  1280
[8 7 3 ... 1 3 7] [8 7 3 ... 1 3 7]
0.8396896551724138
Iteration:  1290
[8 7 3 ... 1 3 7] [8 7 3 ... 1 3 7]
0.8399310344827586
Iterat

Iteration:  2310
[8 7 3 ... 1 3 7] [8 7 3 ... 1 3 7]
0.857448275862069
Iteration:  2320
[8 7 3 ... 1 3 7] [8 7 3 ... 1 3 7]
0.8573448275862069
Iteration:  2330
[8 7 3 ... 1 3 7] [8 7 3 ... 1 3 7]
0.8576896551724138
Iteration:  2340
[8 7 3 ... 1 3 7] [8 7 3 ... 1 3 7]
0.8575172413793103
Iteration:  2350
[8 7 3 ... 1 3 7] [8 7 3 ... 1 3 7]
0.857448275862069
Iteration:  2360
[8 7 3 ... 1 3 7] [8 7 3 ... 1 3 7]
0.8577931034482759
Iteration:  2370
[8 7 3 ... 1 3 7] [8 7 3 ... 1 3 7]
0.8575862068965517
Iteration:  2380
[8 7 3 ... 1 3 7] [8 7 3 ... 1 3 7]
0.8580344827586207
Iteration:  2390
[8 7 3 ... 1 3 7] [8 7 3 ... 1 3 7]
0.858103448275862
Iteration:  2400
[8 7 3 ... 1 3 7] [8 7 3 ... 1 3 7]
0.8581379310344828
Iteration:  2410
[8 7 3 ... 1 3 7] [8 7 3 ... 1 3 7]
0.8583448275862069
Iteration:  2420
[8 7 3 ... 1 3 7] [8 7 3 ... 1 3 7]
0.8586206896551725
Iteration:  2430
[8 7 3 ... 1 3 7] [8 7 3 ... 1 3 7]
0.8588620689655172
Iteration:  2440
[8 7 3 ... 1 3 7] [8 7 3 ... 1 3 7]
0.858931034482

Iteration:  3460
[8 7 3 ... 1 3 7] [8 7 3 ... 1 3 7]
0.8699655172413793
Iteration:  3470
[8 7 3 ... 1 3 7] [8 7 3 ... 1 3 7]
0.8699655172413793
Iteration:  3480
[8 7 3 ... 1 3 7] [8 7 3 ... 1 3 7]
0.8704827586206897
Iteration:  3490
[8 7 3 ... 1 3 7] [8 7 3 ... 1 3 7]
0.8705862068965518
Iteration:  3500
[8 7 3 ... 1 3 7] [8 7 3 ... 1 3 7]
0.8706551724137931
Iteration:  3510
[8 7 3 ... 1 3 7] [8 7 3 ... 1 3 7]
0.8707931034482759
Iteration:  3520
[8 7 3 ... 1 3 7] [8 7 3 ... 1 3 7]
0.8708275862068966
Iteration:  3530
[8 7 3 ... 1 3 7] [8 7 3 ... 1 3 7]
0.8708620689655172
Iteration:  3540
[8 7 3 ... 1 3 7] [8 7 3 ... 1 3 7]
0.870551724137931
Iteration:  3550
[8 7 3 ... 1 3 7] [8 7 3 ... 1 3 7]
0.8709310344827587
Iteration:  3560
[8 7 3 ... 1 3 7] [8 7 3 ... 1 3 7]
0.8707586206896551
Iteration:  3570
[8 7 3 ... 1 3 7] [8 7 3 ... 1 3 7]
0.8708275862068966
Iteration:  3580
[8 7 3 ... 1 3 7] [8 7 3 ... 1 3 7]
0.8707241379310345
Iteration:  3590
[8 7 3 ... 1 3 7] [8 7 3 ... 1 3 7]
0.8702413793

Iteration:  4610
[8 7 3 ... 1 3 7] [8 7 3 ... 1 3 7]
0.8792068965517241
Iteration:  4620
[8 7 3 ... 1 3 7] [8 7 3 ... 1 3 7]
0.8795172413793103
Iteration:  4630
[8 7 3 ... 1 3 7] [8 7 3 ... 1 3 7]
0.8788965517241379
Iteration:  4640
[8 7 3 ... 1 3 7] [8 7 3 ... 1 3 7]
0.8790689655172413
Iteration:  4650
[8 7 3 ... 1 3 7] [8 7 3 ... 1 3 7]
0.8794827586206897
Iteration:  4660
[8 7 3 ... 1 3 7] [8 7 3 ... 1 3 7]
0.8796206896551724
Iteration:  4670
[8 7 3 ... 1 3 7] [8 7 3 ... 1 3 7]
0.8791379310344828
Iteration:  4680
[8 7 3 ... 1 3 7] [8 7 3 ... 1 3 7]
0.8789310344827587
Iteration:  4690
[8 7 3 ... 1 3 7] [8 7 3 ... 1 3 7]
0.8790344827586207
Iteration:  4700
[8 7 3 ... 1 3 7] [8 7 3 ... 1 3 7]
0.8789655172413793
Iteration:  4710
[8 7 3 ... 1 3 7] [8 7 3 ... 1 3 7]
0.8790344827586207
Iteration:  4720
[8 7 3 ... 1 3 7] [8 7 3 ... 1 3 7]
0.8794137931034482
Iteration:  4730
[8 7 3 ... 1 3 7] [8 7 3 ... 1 3 7]
0.8793103448275862
Iteration:  4740
[8 7 3 ... 1 3 7] [8 7 3 ... 1 3 7]
0.879275862

In [12]:
def make_predictions(X, W1, b1, W2, b2,w3,b3):
    _, _, _, _,_,A3 = forward_prop(W1, b1, W2, b2,w3,b3, X)
    predictions = get_predictions(A3)
    return predictions



In [13]:
dev_predictions = make_predictions(X_dev, W1, b1, W2, b2,w3,b3)
get_accuracy(dev_predictions, Y_dev)


[5 2 2 6 9 7 2 7 5 8 2 7 6 5 0 5 4 3 5 6 3 9 1 4 4 0 7 9 8 8 8 5 5 3 2 3 7
 8 4 0 9 1 5 3 9 0 7 4 7 5 9 1 2 2 9 8 8 2 8 9 1 4 8 0 0 5 0 0 1 6 0 7 7 4
 6 1 2 5 7 7 8 3 1 7 9 2 4 5 4 1 6 4 2 5 5 8 7 1 5 1 9 3 9 7 2 7 9 0 4 1 9
 1 3 0 3 0 8 0 4 0 5 8 4 2 3 1 0 7 5 3 2 9 9 3 0 9 5 3 2 1 6 7 2 7 8 5 8 9
 8 0 4 3 1 7 9 2 9 7 9 4 9 0 0 1 0 2 3 3 0 0 2 1 3 3 6 0 4 5 8 9 5 4 9 2 6
 8 7 6 3 2 5 9 5 1 7 1 0 2 5 5 5 8 6 7 0 5 7 7 2 8 4 5 3 9 2 7 8 2 6 0 4 8
 5 8 0 0 2 6 0 6 3 4 8 0 9 9 6 3 0 3 4 1 1 7 7 5 9 1 9 6 7 4 4 7 3 4 1 6 5
 1 6 8 9 5 9 7 9 3 0 9 9 1 0 1 1 9 4 5 4 6 0 0 7 9 8 6 1 4 0 5 7 6 3 5 0 5
 3 6 6 4 7 2 4 5 8 2 6 9 5 8 6 8 2 2 6 4 4 4 8 5 9 8 6 7 1 8 6 0 2 3 8 7 3
 0 2 8 3 3 1 2 8 9 9 0 0 6 6 5 9 9 6 1 7 3 2 5 7 7 5 3 5 2 2 1 7 2 5 3 8 1
 2 5 6 8 4 1 0 6 6 9 1 7 1 9 9 9 7 4 8 6 6 4 1 9 5 6 7 6 8 0 4 9 3 7 7 3 3
 7 7 1 1 5 0 5 5 7 6 4 8 5 4 9 4 9 9 6 4 0 6 0 2 9 7 0 4 7 9 2 4 9 6 7 9 0
 9 8 6 1 4 7 8 2 1 4 1 2 0 3 1 9 9 4 7 1 4 4 4 1 6 4 9 9 5 8 7 5 3 8 9 9 7
 8 3 0 3 3 1 9 6 5 1 2 4 

0.853