Importing Libraries

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

Dataset Creation

In [2]:
X = np.vstack([(np.random.rand(10,2)*5),(np.random.rand(10,2)*10)])
Y = np.hstack([[0]*10, [1]*10])
dataset = pd.DataFrame(X, columns = {"X1", "X2"})
dataset["Y"] = Y
dataset

Unnamed: 0,X2,X1,Y
0,4.691534,1.269821,0
1,4.633984,3.464855,0
2,0.5189,4.181024,0
3,4.395097,0.566331,0
4,1.744464,2.350135,0
5,2.430135,0.562324,0
6,1.181834,2.501829,0
7,2.383304,0.520892,0
8,2.966886,4.113989,0
9,4.692872,3.353119,0


Class Vector creation- One Hot Encoding 

In [3]:
Z = np.zeros((20,2))
for i in range(20):
  Z[i, Y[i]] = 1
Z  

array([[1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [0., 1.]])

Random Weight & Bias Initialization

In [4]:
Wi_1 = np.random.rand(3,2)
Bi_1 = np.random.rand(3)
Wi_2 = np.random.rand(3,2)
Bi_2 = np.random.rand(2)

Forward Propagation

In [5]:
def forward_prop(X, Wi_1, Bi_1, Wi_2, Bi_2):
  #first layer
  M = 1/(1 + np.exp(-(X.dot(Wi_1.T) + Bi_1)))
  #second layer
  A = M.dot(Wi_2) + Bi_2
  expA = np.exp(A)
  Y = expA/expA.sum(axis = 1, keepdims = True)
  return Y, M

In [6]:
forward_prop(X, Wi_1, Bi_1, Wi_2, Bi_2)

(array([[0.80262123, 0.19737877],
        [0.80762967, 0.19237033],
        [0.78788658, 0.21211342],
        [0.79905204, 0.20094796],
        [0.78888434, 0.21111566],
        [0.78384766, 0.21615234],
        [0.78345663, 0.21654337],
        [0.78295801, 0.21704199],
        [0.80400639, 0.19599361],
        [0.80756247, 0.19243753],
        [0.80629658, 0.19370342],
        [0.81621322, 0.18378678],
        [0.81304127, 0.18695873],
        [0.81625224, 0.18374776],
        [0.8163601 , 0.1836399 ],
        [0.81635003, 0.18364997],
        [0.80327275, 0.19672725],
        [0.81645877, 0.18354123],
        [0.81295402, 0.18704598],
        [0.80475803, 0.19524197]]),
 array([[0.99098075, 0.99141883, 0.87984616],
        [0.99793261, 0.99638036, 0.91508389],
        [0.98290113, 0.90613128, 0.82776751],
        [0.9824379 , 0.98510903, 0.85758605],
        [0.97254825, 0.9300665 , 0.81994393],
        [0.94098559, 0.92036019, 0.79361409],
        [0.96489629, 0.89586891, 0.8046092

Back Propagation

In [54]:
#returning gradient for Wi_1
def diff_Wi_1(X, H, Z, output, Wi_2):
  dZ = (Z - output).dot(Wi_2.T) * H * (1 - H)
  return X.T.dot(dZ)

#returning gradient for Wi_2
def diff_Wi_2(H, Z, Y):
  return H.T.dot(Z-Y)

#returning gradient for bias
def diff_Bi_2(Z,Y):
  return(Z-Y).sum(axis = 0)

def diff_Bi_1(Z, Y, Wi_2, H):
  return ((Z-Y).dot(Wi_2.T)* H*(1-H)).sum(axis=0)  

Training

In [43]:
learning_rate = 1e-3
for epoch in range(5000):
  output, hidden = forward_prop(X, Wi_1, Bi_1, Wi_2, Bi_2)
  Wi_2+= learning_rate * diff_Wi_2(hidden, Z, output)
  Bi_2+= learning_rate * diff_Bi_2(Z, output)
  Wi_1+= learning_rate * diff_Wi_1(X,hidden, Z, output, Wi_2).T
  Bi_1+= learning_rate * diff_Bi_1(Z, output, Wi_2, hidden)[0]

Testing


In [56]:
X_test = np.array([5,7])
hidden_output = 1/(1+ np.exp(-(X_test.dot(Wi_1.T) + Bi_1)))
Outer_layer_output = hidden_output.dot(Wi_2) + Bi_2
expA  = np.exp(Outer_layer_output)
Y = expA/expA.sum()
print("Probability of class 0>>>>>>> {} \nprobability of class 1>>>>> {}".format(Y[0], Y[1]))

Probability of class 0>>>>>>> 0.4605096222112106 
probability of class 1>>>>> 0.5394903777887895
