In [1]:
import random
from sklearn.datasets import fetch_openml
import tensorflow as tf
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score 

In [2]:
x, y = fetch_openml('mnist_784', version=1, return_X_y=True) #importing data
x = (x/255).astype('float32')
y = tf.keras.utils.to_categorical(y)

x_train,x_test,y_train,y_test=train_test_split(x, y, test_size=0.15, random_state=38) #splitting it into training and testing data

In [3]:
print(y_train.shape[1]) #checking number of nodes in output

10


In [4]:
size_il=784
size_ol=10
size_hl=[256, 64]

In [5]:
#initialising random bias and weights
random.seed(1)
W1=np.random.randn(size_hl[0], size_il) * np.sqrt(1/size_hl[0])
W2=np.random.randn(size_hl[1], size_hl[0]) * np.sqrt(1/size_hl[1])
W3=np.random.randn(size_ol, size_hl[1]) * np.sqrt(1/size_ol)
B1=np.zeros(shape=(size_hl[0], 1))
B2=np.zeros(shape=(size_hl[1], 1))
B3=np.zeros(shape=(size_ol, 1))


In [6]:
#the expression
def sigmoid(x):
    return 1/(1 + np.exp(-x))
def dsigmoid(x):
    return (np.exp(-x))/((np.exp(-x)+1)**2)

In [7]:
def feet_forward_propogation():
    A0=x_train.T
    Z1=np.dot(W1,A0) + B1
    A1=sigmoid(Z1)
    Z2=np.dot(W2,A1) +B2
    A2=sigmoid(Z2)
    Z3=np.dot(W3,A2) +B3
    A3=sigmoid(Z3)
    return A0,A1,A2,A3,Z1,Z2,Z3

In [8]:
def backward_propogation(A3,A2,A1,A0,Z1,Z2,a):
    global W3,W2,W1
    dZ3=A3-y_train.T
    dW3 = np.dot(dZ3, A2.T)
    err = np.dot(W3.T, dZ3) * dsigmoid(Z2)
    dW2 = np.dot(err, A1.T)
    err = np.dot(W2.T, err) * dsigmoid(Z1)
    dW1 = np.dot(err, A0.T)
    dZ2 = np.multiply(np.dot(W3.T,dZ3), 1-np.power(A2,2))
    dZ1 = np.multiply(np.dot(W2.T,dZ2), 1-np.power(A1,2))
    dB3 = (1/784) * np.sum(dZ3,axis=1,keepdims=True)
    dB2 = (1/784) * np.sum(dZ2,axis=1,keepdims=True)
    dB1 = (1/784) * np.sum(dZ1,axis=1,keepdims=True)
    W3 -= a*dW3
    W2 -= a*dW2
    W1 -= a*dW1

In [9]:
#cost function 
def cross_entropy_cost(A3):
    num = y_train.shape[1]
    logprobs=np.multiply(np.log(A3),y_train.T) + np.multiply((1-y_train.T),np.log(1-A3))
    cost = - np.sum(logprobs) / num
    cost = float(np.squeeze(cost))
    return cost


In [10]:
#training
for i in range(1000):
    a=0.00003
    A0,A1,A2,A3,Z1,Z2,Z3=feet_forward_propogation()
    cost = cross_entropy_cost(A3)
    backward_propogation(A3,A2,A1,A0,Z1,Z2, a)
    print("Cost after iteration ",i," is ", cost)

Cost after iteration  0  is  41299.89833098615
Cost after iteration  1  is  73412.75504219029
Cost after iteration  2  is  47227.967416675965
Cost after iteration  3  is  58375.55675401234
Cost after iteration  4  is  44033.88921022885
Cost after iteration  5  is  33488.06979518232
Cost after iteration  6  is  27285.413736289884
Cost after iteration  7  is  25213.496376573567
Cost after iteration  8  is  21417.179463092692
Cost after iteration  9  is  20621.40438960467
Cost after iteration  10  is  21669.80429719117
Cost after iteration  11  is  20090.236794646957
Cost after iteration  12  is  20248.448011669596
Cost after iteration  13  is  20139.50705500091
Cost after iteration  14  is  21294.189559330018
Cost after iteration  15  is  21254.45633757957
Cost after iteration  16  is  23299.010637704567
Cost after iteration  17  is  19872.854083121165
Cost after iteration  18  is  20782.252080524395
Cost after iteration  19  is  20233.66222034832
Cost after iteration  20  is  22014.9549

In [27]:
#accuracy
A0=x_train.T
Z1=np.dot(W1,A0) + B1
A1=sigmoid(Z1)
Z2=np.dot(W2,A1) +B2
A2=sigmoid(Z2)
Z3=np.dot(W3,A2) +B3
A3=sigmoid(Z3)
acc1=0
acc2=0
for i in range(len(A3)):
    for j in range(len(A3[0])):
        if A3[i][j]==y_train[j][i]:
            acc1+=1
acc1/=len(A3)*len(A3[0])
A0=x_test.T
Z1=np.dot(W1,A0) + B1
A1=sigmoid(Z1)
Z2=np.dot(W2,A1) +B2
A2=sigmoid(Z2)
Z3=np.dot(W3,A2) +B3
A3=sigmoid(Z3)
for i in range(len(A3)):
    for j in range(len(A3[0])):
        if A3[i][j]==y_test[j][i]:
            acc2+=1
acc2/=len(A3)*len(A3[0])

In [28]:
print(acc1)
print(acc2)

0.0
0.0
