# *2-Layer Neural Network*

In [64]:
#Importing useful Libraries
import numpy as np
import pandas as pd

In [65]:
#Loading MNIST Training Data
df=pd.read_csv('/content/sample_data/mnist_train_small.csv',header=None)
df.shape

(20000, 785)

In [66]:
#Loading MNIST Testing Data
df1=pd.read_csv('/content/sample_data/mnist_test.csv',header=None)
df1.shape

(10000, 785)

In [92]:
#Splitting the Data into Training and Testing
df = np.array(df)
m,n = df.shape
data_train = df.T
Y_train = data_train[0]
X_train = data_train[1:n]
X_train = X_train / 255                                                         ##Normalisation
m_train = X_train.shape

df1 = np.array(df1)
m1,n1 = df1.shape
data_test=df1.T
Y_test = data_test[0]
X_test = data_test[1:n1]
X_test = X_test / 255                                                           ##Normalisation
m_test = X_test.shape

In [68]:
Y_train,Y_test

(array([6, 5, 7, ..., 2, 9, 5]), array([7, 2, 1, ..., 4, 5, 6]))

In [69]:
#Initialising Weights and Bias
def init_params():
    W1 = np.random.rand(10, 784) - 0.5                                          ##Since we are Implementing a 2-Layer we require 2-weights,bias
    b1 = np.random.rand(10, 1) - 0.5
    W2 = np.random.rand(10, 10) - 0.5
    b2 = np.random.rand(10, 1) - 0.5
    return W1, b1, W2, b2

In [70]:
def f(Z):                                                                       ## This function passes only maximum values 
    return np.maximum(Z, 0)

In [71]:
#Sigmoid Function
def sigmoid(Z):
    A = 1 /(1+np.exp(-Z))-0.000001                                              ##Activations 'A'
    return A

In [72]:
#Forward Propagation Function
def forward_prop(W1, b1, W2, b2, X):
    Z1 = W1.dot(X) + b1
    A1 = f(Z1)
    Z2 = W2.dot(A1) + b2
    A2 = sigmoid(Z2)
    return Z1, A1, Z2, A2

In [73]:
#Derivative Fuction of 'f'
def f_dash(Z):
    return Z>0

In [74]:
#One-Hot Encoding
def probability(Y):                                                             ##A Single columned matrix which contains only 0 or 1 for specified values in y
    probability_Y = np.zeros((Y.size, Y.max() + 1))
    probability_Y[np.arange(Y.size), Y] = 1                                     ##For each row go to the column specified Y and set it to 1
    probability_Y = probability_Y.T
    return probability_Y

In [75]:
#Back Propagation
def backward_prop(Z1, A1, Z2, A2, W1, W2, X, Y):
    probability_Y = probability(Y)
    dZ2 = A2 - probability_Y
    dW2 = 1 / m * dZ2.dot(A1.T)
    db2 = 1 / m * np.sum(dZ2)
    dZ1 = W2.T.dot(dZ2) * f_dash(Z1)
    dW1 = 1 / m * dZ1.dot(X.T)
    db1 = 1 / m * np.sum(dZ1)
    return dW1, db1, dW2, db2

In [76]:
#Simultaneous Updating of Parameters
def update_params(W1, b1, W2, b2, dW1, db1, dW2, db2, alpha):
    W1 = W1 - alpha * dW1
    b1 = b1 - alpha * db1    
    W2 = W2 - alpha * dW2  
    b2 = b2 - alpha * db2    
    return W1, b1, W2, b2

In [77]:
def get_predictions(A2):
    return np.argmax(A2, 0)

In [78]:
def get_accuracy(predictions, Y):
    print(predictions, Y)
    return np.sum(predictions == Y) / Y.size

In [79]:
def gradient_descent(X, Y, alpha, iterations):
  W1, b1, W2, b2 = init_params()
  for i in range(iterations):
      Z1, A1, Z2, A2 = forward_prop(W1, b1, W2, b2, X)
      dW1, db1, dW2, db2 = backward_prop(Z1, A1, Z2, A2, W1, W2, X, Y)
      W1, b1, W2, b2 = update_params(W1, b1, W2, b2, dW1, db1, dW2, db2, alpha)
      if i % 10 == 0:
            print("Iteration: ", i)
            predictions = get_predictions(A2)
            print(get_accuracy(predictions, Y))
  return W1, b1, W2, b2

In [91]:
#Implementing Accuracy on Training Data
W1, b1, W2, b2 = gradient_descent(X_train, Y_train, 0.1,600)

Iteration:  0
[2 2 2 ... 2 3 7] [6 5 7 ... 2 9 5]
0.07575
Iteration:  10
[0 0 0 ... 2 3 8] [6 5 7 ... 2 9 5]
0.18885
Iteration:  20
[0 0 0 ... 2 7 8] [6 5 7 ... 2 9 5]
0.31085
Iteration:  30
[0 0 0 ... 2 7 8] [6 5 7 ... 2 9 5]
0.3999
Iteration:  40
[6 0 0 ... 6 7 8] [6 5 7 ... 2 9 5]
0.45575
Iteration:  50
[6 0 0 ... 6 7 8] [6 5 7 ... 2 9 5]
0.49315
Iteration:  60
[6 0 4 ... 6 7 8] [6 5 7 ... 2 9 5]
0.5244
Iteration:  70
[6 0 4 ... 6 7 8] [6 5 7 ... 2 9 5]
0.5492
Iteration:  80
[6 0 4 ... 6 7 8] [6 5 7 ... 2 9 5]
0.5714
Iteration:  90
[6 0 4 ... 6 7 8] [6 5 7 ... 2 9 5]
0.59175
Iteration:  100
[6 0 4 ... 6 7 8] [6 5 7 ... 2 9 5]
0.6071
Iteration:  110
[6 0 4 ... 6 7 8] [6 5 7 ... 2 9 5]
0.62195
Iteration:  120
[6 0 4 ... 6 7 8] [6 5 7 ... 2 9 5]
0.63465
Iteration:  130
[6 0 4 ... 6 7 8] [6 5 7 ... 2 9 5]
0.6467
Iteration:  140
[6 5 4 ... 6 7 8] [6 5 7 ... 2 9 5]
0.6574
Iteration:  150
[6 5 4 ... 6 7 8] [6 5 7 ... 2 9 5]
0.6676
Iteration:  160
[2 5 4 ... 6 7 5] [6 5 7 ... 2 9 5]
0.67585

In [100]:
W1, b1, W2, b2 = gradient_descent(X_test, Y_test, 0.3,7000)

Iteration:  0
[8 8 8 ... 8 8 8] [7 2 1 ... 4 5 6]
0.0795
Iteration:  10
[2 8 8 ... 8 2 6] [7 2 1 ... 4 5 6]
0.1804
Iteration:  20
[7 8 8 ... 7 7 0] [7 2 1 ... 4 5 6]
0.2724
Iteration:  30
[7 8 8 ... 7 7 0] [7 2 1 ... 4 5 6]
0.3144
Iteration:  40
[7 0 8 ... 9 7 0] [7 2 1 ... 4 5 6]
0.3583
Iteration:  50
[7 0 8 ... 9 7 0] [7 2 1 ... 4 5 6]
0.3982
Iteration:  60
[7 0 8 ... 9 7 0] [7 2 1 ... 4 5 6]
0.4299
Iteration:  70
[7 0 8 ... 9 7 6] [7 2 1 ... 4 5 6]
0.4746
Iteration:  80
[7 0 8 ... 9 7 6] [7 2 1 ... 4 5 6]
0.5201
Iteration:  90
[7 0 8 ... 9 7 6] [7 2 1 ... 4 5 6]
0.5477
Iteration:  100
[7 0 8 ... 9 7 6] [7 2 1 ... 4 5 6]
0.5748
Iteration:  110
[7 0 8 ... 9 7 6] [7 2 1 ... 4 5 6]
0.6031
Iteration:  120
[7 0 8 ... 9 7 6] [7 2 1 ... 4 5 6]
0.6278
Iteration:  130
[7 0 1 ... 9 8 6] [7 2 1 ... 4 5 6]
0.6489
Iteration:  140
[7 0 1 ... 9 8 6] [7 2 1 ... 4 5 6]
0.6688
Iteration:  150
[7 2 1 ... 9 8 6] [7 2 1 ... 4 5 6]
0.707
Iteration:  160
[7 2 1 ... 9 8 6] [7 2 1 ... 4 5 6]
0.7287
Iteration

Final Accuracy of Testing Dataset is 96.42%