In [36]:
import numpy as np
import pandas as pd

data = pd.read_csv('/kaggle/input/digit-recognizer/train.csv')

In [37]:
data = np.array(data)
m, n = data.shape
np.random.shuffle(data)

data_test = data[0:1000].T
y_test = data_test[0]
x_test = data_test[1:n]
x_test = x_test / 255.

data_train = data[1000:m].T
y_train = data_train[0]
x_train = data_train[1:n]
x_train = x_train / 255.
m = x_train.shape[1]

print(x_train.shape)

(784, 41000)


In [56]:
# Architecture
n_input = x_train.shape[0]
n_hidden_1 = 32
n_out = 10

In [39]:
def relu(z):
    return np.maximum(z, 0)

def drel(z):
    return z > 0
    
def softmax(z):
    temp = np.exp(z) / sum(np.exp(z))
    return temp

def dsoft(z):
    return softmax(z) * (1 - softmax(z))

def one_hot(y):
    one_hot_y = np.zeros((y.size, y.max() + 1))
    one_hot_y[np.arange(y.size), y] = 1
    one_hot_y = one_hot_y.T
    return one_hot_y

In [53]:
def forward_prop(wh, bh, wo, bo, x):
    z1 = wh.dot(x) + bh
    a1 = relu(z1)
    z2 = wo.dot(a1) + bo
    a2 = softmax(z2)
    return z1, a1, z2, a2

def backward_prop(z1, a1, z2, a2, wh, wo, x, y):
    one_hot_y = one_hot(y)
    dz2 = a2 - one_hot_y
    dwo = 1 / m * dz2.dot(z1.T)
    dbo = 1 / m * np.sum(dz2)
    dz1 = wo.T.dot(dz2) * drel(z1)
    dwh = 1 / m * dz1.dot(x.T)
    dbh = 1 / m * np.sum(dz1)
    return dwh, dbh, dwo, dbo

In [85]:
def fit(x, y, iter, a):
    wh = np.random.rand(n_hidden_1, n_input) - 0.5
    bh = np.random.rand(n_hidden_1, 1) - 0.5
    wo = np.random.rand(n_out, n_hidden_1) - 0.5
    bo = np.random.rand(n_out, 1) - 0.5
    while iter:
        z1, a1, z2, a2 = forward_prop(wh, bh, wo, bo, x)
        dwh, dbh, dwo, dbo = backward_prop(z1, a1, z2, a2, wh, wo, x, y)
        wh -= (a * dwh)
        bh -= (a * dbh)
        wo -= (a * dwo)
        bo -= (a * dbo)
        iter -= 1

    return wh, bh, wo, bo

In [86]:
wh, bh, wo, bo = fit(x_train, y_train, 300, 0.1)

In [81]:
def predict_vals(a2):
    return np.argmax(a2, 0)

def predict(x, wh, bh, wo, bo):
    z1, a1, z2, a2 = forward_prop(wh, bh, wo, bo, x)
    return predict_vals(a2)

In [87]:
y_pred = predict(x_test, wh, bh, wo, bo)
print(y_pred)

[2 5 7 6 0 6 2 3 3 3 3 6 3 8 4 0 2 1 6 3 8 7 0 1 9 7 7 6 0 2 6 9 4 9 1 3 3
 9 7 9 8 4 2 6 0 6 4 2 6 7 1 4 4 1 5 7 8 2 7 2 4 8 6 3 0 0 2 5 6 2 5 6 4 3
 0 1 9 4 2 5 5 1 6 5 6 2 1 4 3 1 0 6 7 9 8 2 9 4 0 6 8 5 1 7 9 8 0 3 4 3 9
 1 6 9 9 5 4 4 0 4 1 5 0 7 1 9 0 9 9 3 3 0 6 0 6 4 8 4 1 6 2 0 1 0 6 4 1 4
 7 1 5 3 0 4 9 8 9 0 8 9 6 5 9 9 5 4 6 4 9 4 0 4 1 8 8 2 1 7 4 0 2 9 7 3 9
 8 7 3 4 5 4 6 9 7 8 2 1 6 1 1 8 7 1 0 6 6 2 7 8 4 8 1 1 4 5 5 2 6 2 6 8 5
 7 6 6 8 6 3 0 9 0 3 6 4 8 6 2 2 2 0 7 2 4 9 9 9 1 2 0 7 4 5 5 0 8 0 7 0 8
 3 5 7 0 6 1 3 4 4 0 5 1 4 4 6 3 5 2 5 0 4 0 6 8 1 8 5 5 3 3 5 3 8 3 7 7 4
 0 4 8 5 8 2 7 3 2 4 8 2 0 6 3 6 3 4 4 3 2 1 8 2 2 4 7 9 8 0 1 0 4 9 6 4 7
 9 2 9 2 9 8 4 4 0 1 2 0 5 1 2 0 9 3 4 7 7 8 1 2 8 1 8 2 5 9 3 0 5 5 2 3 2
 9 0 3 3 3 2 1 6 0 5 5 0 0 3 2 5 4 8 3 3 7 8 8 4 3 2 5 2 8 9 7 7 3 7 0 5 1
 5 0 9 2 2 3 9 0 8 3 7 9 2 2 1 1 4 2 2 6 4 1 9 2 3 3 2 1 2 1 7 7 9 8 2 8 1
 1 4 6 6 9 1 0 7 0 9 0 9 1 7 6 5 3 7 3 3 6 3 1 9 5 0 4 9 3 0 2 2 4 9 1 8 3
 7 4 5 4 3 0 1 4 3 6 1 4 

In [89]:
from sklearn.metrics import confusion_matrix, accuracy_score

confusion_matrix(y_test, y_pred), accuracy_score(y_test, y_pred)

(array([[ 93,   0,   0,   2,   2,   2,   2,   0,   2,   1],
        [  0, 109,   2,   0,   0,   0,   0,   0,   6,   0],
        [  1,   0,  89,   4,   3,   1,   5,   1,   6,   1],
        [  2,   0,   4,  73,   1,   3,   1,   2,   5,   1],
        [  0,   0,   2,   0,  98,   0,   0,   0,   0,   4],
        [  3,   1,   5,   3,   3,  74,   1,   0,   6,   0],
        [  1,   0,   4,   0,   0,   1,  89,   0,   1,   0],
        [  1,   2,   3,   1,   1,   0,   0,  85,   0,   5],
        [  0,   1,   3,   9,   0,   3,   0,   0,  70,   4],
        [  0,   0,   0,   1,   5,   2,   0,   4,   4,  76]]),
 0.856)

nearly 86 % accuracy achieved