#### Step 0: Read input and output

In [1]:
import numpy as np
X = np.array([[1,0,1,0],[1,0,1,1],[0,1,0,1]])
y = np.array([[1],[1],[0]])
print("X : \n", X)
print("y : \n", y)

X : 
 [[1 0 1 0]
 [1 0 1 1]
 [0 1 0 1]]
y : 
 [[1]
 [1]
 [0]]


#### Step 1: Initialize weights and biases with random values 

In [2]:
wh = np.random.rand(4,3)
print("Weights: \n", wh)
bh=np.random.rand(1,3)
print("baises: \n", bh)
wout=np.random.random((3,1))
print("Hidden layer Weights: \n", wout)
bout=np.random.random((1,1))
print("Hidden layer Bias:\n", bout)

Weights: 
 [[0.96301645 0.90691287 0.16381665]
 [0.12070335 0.01951863 0.18193523]
 [0.39742738 0.62332728 0.1114101 ]
 [0.13471936 0.86938414 0.61156002]]
baises: 
 [[0.2848115  0.72784961 0.56603158]]
Hidden layer Weights: 
 [[0.92378989]
 [0.29286508]
 [0.23150473]]
Hidden layer Bias:
 [[0.74998388]]


#### Step 2: Calculate hidden layer input:

hidden_layer_input = matrix_dot_product(X,wh) + bh

In [3]:
hidden_layer_input = (np.dot(X,wh))+bh
print("hidden_layer_input: \n",hidden_layer_input)

hidden_layer_input: 
 [[1.64525533 2.25808977 0.84125833]
 [1.77997468 3.1274739  1.45281836]
 [0.5402342  1.61675238 1.35952683]]



#### Step 3: Perform non-linear transformation on hidden linear input

hiddenlayer_activations = sigmoid(hidden_layer_input)

In [4]:
def sigmoid(x):
    return 1/(1 + np.exp(-x))

hiddenlayer_activations = sigmoid(hidden_layer_input)
print("hiddenlayer_activations: \n",hiddenlayer_activations)

hiddenlayer_activations: 
 [[0.83824876 0.90534606 0.69873017]
 [0.85569374 0.9580119  0.8104318 ]
 [0.6318669  0.83434676 0.79568278]]


#### Step 4: Perform linear and non-linear transformation of hidden layer activation at output layer

output_layer_input = matrix_dot_product (hiddenlayer_activations * wout ) + bout output = sigmoid(output_layer_input)

In [5]:
output_layer_input = hiddenlayer_activations.dot(wout) + bout
print("output_layer_input: \n", output_layer_input)

output_layer_input: 
 [[1.95125321]
 [2.00865214]
 [1.7622515 ]]


In [6]:
output = sigmoid(output_layer_input)
print("output: \n",output)

output: 
 [[0.87558323]
 [0.88170251]
 [0.85349142]]


#### Step 5: Calculate gradient of Error(E) at output layer

E = y-output

In [7]:
E = y - output
print("Error: \n",E)

Error: 
 [[ 0.12441677]
 [ 0.11829749]
 [-0.85349142]]


#### Step 6: Compute slope at output and hidden layer

Slope_output_layer= derivatives_sigmoid(output)

Slope_hidden_layer = derivatives_sigmoid(hiddenlayer_activations)

In [8]:
def derivatives_sigmoid(x):
    return x*(1-x)

Slope_output_layer=derivatives_sigmoid(output)
print("Slope_output_layer: \n", Slope_output_layer)

Slope_hidden_layer=derivatives_sigmoid(hiddenlayer_activations)
print("Slope_hidden_layer: \n", Slope_hidden_layer)

Slope_output_layer: 
 [[0.10893724]
 [0.1043032 ]
 [0.12504382]]
Slope_hidden_layer: 
 [[0.13558777 0.08569457 0.21050632]
 [0.12348196 0.0402251  0.1536321 ]
 [0.23261112 0.13821225 0.16257169]]


#### Step 7: Compute delta at output layer

d_output = E * slope_output_layer*lr

In [9]:
lr = 0.1

d_output = E * Slope_output_layer * lr
print("delta ouput: \n",d_output)

delta ouput: 
 [[ 0.00135536]
 [ 0.00123388]
 [-0.01067238]]


#### Step 8: Calculate Error at hidden layer

Error_at_hidden_layer = matrix_dot_product(d_output, wout.Transpose)

In [10]:
Error_at_hidden_layer=d_output * wout.transpose()
print("Error at hidden layer: \n",Error_at_hidden_layer)

Error at hidden layer: 
 [[ 0.00125207  0.00039694  0.00031377]
 [ 0.00113985  0.00036136  0.00028565]
 [-0.00985904 -0.00312557 -0.00247071]]


#### Step 9: Compute delta at hidden layer

d_hiddenlayer = Error_at_hidden_layer * slope_hidden_layer

In [11]:
d_hiddenlayer=Error_at_hidden_layer*Slope_hidden_layer
print("delta at hidden layer: \n",d_hiddenlayer)

delta at hidden layer: 
 [[ 1.69765345e-04  3.40154486e-05  6.60511393e-05]
 [ 1.40750479e-04  1.45357648e-05  4.38848862e-05]
 [-2.29332212e-03 -4.31991797e-04 -4.01667021e-04]]


#### Step 10: Update weight at both output and hidden layer

wout = wout + matrix_dot_product (hiddenlayer_activations.Transpose, d_output) * learning_rate

wh = wh+ matrix_dot_product (X.Transpose,d_hiddenlayer) * learning_rate

In [12]:
wout=wout+(hiddenlayer_activations.transpose().dot(d_output)) * lr
print("wout : \n", wout)

wh=wh+X.transpose().dot(d_hiddenlayer) * lr
print("wh: \n",wh)

wout : 
 [[0.92333474]
 [0.29221555]
 [0.23085025]]
wh: 
 [[0.9630475  0.90691773 0.16382765]
 [0.12047402 0.01947543 0.18189507]
 [0.39745843 0.62333214 0.1114211 ]
 [0.1345041  0.86934239 0.61152425]]


#### Step 11: Update biases at both output and hidden layer

bh = bh + sum(d_hiddenlayer, axis=0) * learning_rate

bout = bout + sum(d_output, axis=0)*learning_rate

In [13]:
bh = bh + np.sum(d_hiddenlayer, axis=0) * lr
print("bh: \n",bh)

bout = bout + np.sum(d_output, axis=0) * lr
print("bout: \n", bout)

bh: 
 [[0.28461322 0.72781127 0.5660024 ]]
bout: 
 [[0.74917557]]
