<a href="https://www.kaggle.com/code/abdulrehmanikram/notebook80ac244295?scriptVersionId=118273575" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

In [1]:
import numpy as np 
import pandas as pd 
from matplotlib import pyplot as plt

In [2]:
#read dataset from csv file
data = pd.read_csv('/kaggle/input/mnist-digit-recognizer/train.csv')

In [3]:
data.head()

Unnamed: 0,label,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [4]:
#convert dataframe into numpy array
data = np.array(data)


In [5]:
# m : no. of samples n: no. of features
m,n = data.shape
#randomize data
np.random.shuffle(data)

# split data for testing and training 
data_dev = data[0:1000].T
y_dev = data_dev[0]
x_dev = data_dev[1:n]
x_dev =x_dev/255

data_train = data[1000:m].T

y_train = data_train[0]
x_train = data_train[1:n]
x_train=x_train/255

In [6]:
#initialize paramters
def init_parameters():
    w1 = np.random.rand(10,784) -0.5
    b1 = np.random.rand(10,1) -0.5
    w2 = np.random.rand(10,10) -0.5
    b2 = np.random.rand(10,1) -0.5
    return w1,b1,w2,b2
#activation function for hidden layers
def ReLu(z):
    return np.maximum(z,0)

#activation function for output layer
def softmax(z):
    return np.exp(z)/sum(np.exp(z))

#forward progoation
def forward(w1,b1,w2,b2,x):
    z1 = w1.dot(x)+b1
    a1 = ReLu(z1)
    z2 = w2.dot(a1)+b2
    a2 = softmax(z2)
    
    return z1,a1,z2,a2

#this will return a 2d matrix where each column represents
# a sample and the element at the position of the corresponding 
# class is set to 1
def one_hot_enc(y):
    one_hot_y = np.zeros((y.size,y.max()+1))
    one_hot_y[np.arange(y.size),y]=1
    one_hot_y=one_hot_y.T
    return one_hot_y

#derivate of ReLu function
def dReLu(z):
    return z>0

#backwards propogation returns derivative of loss function
# with respect to the parameters, tells us how much to
# nudge each parameter by
def back_prop(z1,a1,z2,a2,w1,w2,x,y):
    one_hot_y = one_hot_enc(y)
    dz2 = a2-one_hot_y
    dw2 = 1/m * dz2.dot(a1.T)
    db2 = 1/m * np.sum(dz2)
    dz1 = w2.T.dot(dz2) * dReLu(z1)
    dw1 = 1/m * dz1.dot(x.T)
    db1 = 1/m * np.sum(dz1)
    return dw2,db2,dw1,db1

# updates the parameters based on the derivative and the
#learning rate to minimize loss
def update_params(w1,w2,b1,b2,dw1,dw2,db1,db2,alpha):
    w1 = w1 - alpha*dw1
    b1 = b1 - alpha*db1
    w2 = w2 - alpha*w2
    b2 = b2 - alpha*db2
    return w1,b1,w2,b2

#returns the index position of the class which the model
# beleives to be correct
def get_predictions(a2):
    return np.argmax(a2,0)

# gives back the accuracy of the model
def get_accuracy(predictions,y):
    print(predictions, y)
    return np.sum(predictions==y)/y.size
    
#finds the optimal values for the parameters and tries
# to minimize loss as much as possible 
# the more epochs or iterations the more accurate
def gradient_descent(x,y,alpha,epochs):
    w1,b1,w2,b2 = init_parameters()
    for i in range(epochs):
        z1,a1,z2,a2 = forward(w1,b1,w2,b2,x)
        dw2,db2,dw1,db1 = back_prop(z1,a1,z2,a2,w1,w2,x,y)
        w1,b1,w2,b2 = update_params(w1,w2,b1,b2,dw1,dw2,db1,db2,alpha)
        if i%10==0:
            print(f'iteration: {i}')
            predictions = get_predictions(a2)
            print(get_accuracy(predictions,y))
    return w1,b1,w2,b2
        
    
    
    
    
    

In [7]:
w1,b1,w2,b2 = gradient_descent(x_train,y_train,0.10,100)

iteration: 0
[4 4 4 ... 4 4 4] [6 9 3 ... 6 9 6]
0.11090243902439025
iteration: 10
[4 0 4 ... 4 9 3] [6 9 3 ... 6 9 6]
0.1364390243902439
iteration: 20
[4 4 4 ... 4 4 4] [6 9 3 ... 6 9 6]
0.10958536585365854
iteration: 30
[4 4 4 ... 4 4 4] [6 9 3 ... 6 9 6]
0.09729268292682927
iteration: 40
[4 4 4 ... 4 4 4] [6 9 3 ... 6 9 6]
0.09726829268292683
iteration: 50
[4 4 4 ... 4 4 4] [6 9 3 ... 6 9 6]
0.09726829268292683
iteration: 60
[4 4 4 ... 4 4 4] [6 9 3 ... 6 9 6]
0.09726829268292683
iteration: 70
[4 4 4 ... 4 4 4] [6 9 3 ... 6 9 6]
0.09726829268292683
iteration: 80
[4 4 4 ... 4 4 4] [6 9 3 ... 6 9 6]
0.09726829268292683
iteration: 90
[4 4 4 ... 4 4 4] [6 9 3 ... 6 9 6]
0.09726829268292683


In [8]:
print(y_train)

[6 9 3 ... 6 9 6]
