In [113]:
import pandas as pd
import numpy as np

# Importing dataset

In [2]:
df = pd.read_csv('dataset/mnist_test.csv')

In [3]:
df.head()

Unnamed: 0,label,1x1,1x2,1x3,1x4,1x5,1x6,1x7,1x8,1x9,...,28x19,28x20,28x21,28x22,28x23,28x24,28x25,28x26,28x27,28x28
0,7,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,2,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [7]:
df = np.array(df)

# Splitting data for NN dev and Training

In [103]:
m, n = df.shape
df_dev = df[0: 1000]
df_train = df[1000: m]
data = df_dev.T
y = data[0]
X = data[1: ]
y_train = df_train.T[0]
X_train = df_train.T[1: ]
X_train = X_train / 255

# Functions for NN dev

In [61]:
def one_hot(y, num_classes=10):
    one_hot_matrix = np.zeros((y.size, num_classes))
    one_hot_matrix[np.arange(y.size), y] = 1
    return one_hot_matrix.T

In [42]:
def init_params():
    w1 = np.random.rand(10, 784) - 0.5
    b1 = np.random.rand(10, 1) - 0.5
    w2 = np.random.rand(10, 10) - 0.5
    b2 = np.random.rand(10, 1) - 0.5
    return w1, b1, w2, b2

In [46]:
def reLU(z):
    return np.maximum(0, z)

def der_reLU(z):
    return (z > 0)

In [111]:
def softMax(z):
    exp_z = np.exp(z - np.max(z, axis=0, keepdims=True))
    return exp_z / np.sum(exp_z, axis=0, keepdims=True)

In [45]:
def forward_prop(X, w1, b1, w2, b2):
    z1 = np.dot(w1, X) + b1
    a1 = reLU(z1)
    z2 = np.dot(w2, a1) + b2
    a2 = softMax(z2)
    return z1, a1, z2, a2

In [106]:
def back_prop(X, y, z1, a1, w2, a2):
    dz2 = a2 - one_hot(y)
    dw2 = (1 / m) * np.dot(dz2, a1.T)
    db2 = (1 / m) * np.sum(dz2)
    dz1 = np.dot(w2.T, dz2) * der_reLU(z1)
    dw1 = (1 / m) * np.dot(dz1, X.T)
    db1 = (1 / m) * np.sum(dz1)
    return dw1, db1, dw2, db2

In [48]:
def update_params(w1, b1, w2, b2, dw1, db1, dw2, db2, alpha):
    w1 = w1 - alpha * dw1
    b1 = b1 - alpha * db1
    w2 = w2 - alpha * dw2
    b2 = b2 - alpha * db2
    return w1, b1, w2, b2

In [63]:
def pred(a):
    return np.argmax(a, 0)

In [97]:
def gradient_descent(X, y, iterations, alpha):
    w1, b1, w2, b2 = init_params()
    for i in range(iterations):
        z1, a1, z2, a2 = forward_prop(X, w1, b1, w2, b2)
        dw1, db1, dw2, db2 = back_prop(X, y, z1, a1, w2, a2)
        w1, b1, w2, b2 = update_params(w1, b1, w2, b2, dw1, db1, dw2, db2, alpha)
        print("Iteration: ", i)
        print("Accuracy:", np.mean(y == pred(a2)))
    return w1, b1, w2, b2

# Running NN

In [114]:
w1, b1, w2, b2 = gradient_descent(X_train, y_train, 500, 0.1)

Iteration:  0
Accuracy: 0.11377777777777778
Iteration:  1
Accuracy: 0.15
Iteration:  2
Accuracy: 0.15222222222222223
Iteration:  3
Accuracy: 0.16166666666666665
Iteration:  4
Accuracy: 0.16822222222222222
Iteration:  5
Accuracy: 0.17633333333333334
Iteration:  6
Accuracy: 0.18566666666666667
Iteration:  7
Accuracy: 0.19311111111111112
Iteration:  8
Accuracy: 0.1998888888888889
Iteration:  9
Accuracy: 0.2088888888888889
Iteration:  10
Accuracy: 0.21633333333333332
Iteration:  11
Accuracy: 0.22466666666666665
Iteration:  12
Accuracy: 0.23144444444444445
Iteration:  13
Accuracy: 0.23766666666666666
Iteration:  14
Accuracy: 0.24444444444444444
Iteration:  15
Accuracy: 0.25155555555555553
Iteration:  16
Accuracy: 0.2587777777777778
Iteration:  17
Accuracy: 0.26455555555555554
Iteration:  18
Accuracy: 0.271
Iteration:  19
Accuracy: 0.27944444444444444
Iteration:  20
Accuracy: 0.28555555555555556
Iteration:  21
Accuracy: 0.29444444444444445
Iteration:  22
Accuracy: 0.3016666666666667
Iteratio