### Imports

In [95]:
import numpy as np

### Step 0: 
Read input and output

In [96]:
X = np.array([[1,0,1,0], [1,0,1,1], [0,1,0,1]])
y = np.array([[1], [1], [0]])

### Step 1: 
Initialize weights and biases with random values (There are methods to initialize weights and biases but for now initialize with random values)

In [97]:
#Generate 4*3 random numbers for the weights for first hidden layer
wh = np.random.rand(4,3)
print(wh)
print('\n')

#Generate 1*3 random numbers for the bias for the first hidden layer
bh = np.random.rand(1,3)
print(bh)
print('\n')

#Generate 1*3 random numbers for the weights for output layer
wout = np.random.rand(3,1)
print(wout)
print('\n')

#Generate 1*1 random numbers for the bias for output layer
bout = np.random.rand(1,1)
print(bout)

[[0.22060317 0.50605656 0.25878311]
 [0.31713256 0.86296086 0.40861328]
 [0.66894608 0.16998899 0.46500975]
 [0.6299352  0.38595048 0.88567273]]


[[0.48224255 0.3645863  0.13582952]]


[[0.49702365]
 [0.17177305]
 [0.84804526]]


[[0.02443264]]


### Step 2:
Calculate hidden layer input

hidden_layer_input = matrix_dot_product(X,wh) + bh

In [98]:
hidden_layer_input = np.dot(X,wh) + bh
print(hidden_layer_input)

[[1.37179179 1.04063185 0.85962238]
 [2.00172699 1.42658234 1.7452951 ]
 [1.42931031 1.61349765 1.43011552]]


### Step 3: 
Perform non-linear transformation on hidden linear input.

hiddenlayer_activations = sigmoid(hidden_layer_input)

In [99]:
def sigmoid(x):
    return 1/(1 + np.exp(-x))

hiddenlayer_activations = sigmoid(hidden_layer_input)
print(hiddenlayer_activations)

[[0.79766949 0.7389719  0.70258175]
 [0.88097828 0.80636825 0.85135839]
 [0.80679383 0.83389642 0.80691931]]


### Step 4: 

Perform linear and non-linear transformation of hidden layer activation at output layer

##### Output layer Summation:
output_layer_input = matrix_dot_product (hiddenlayer_activations * wout ) + bout 

##### Output layer Activation:
output = sigmoid(output_layer_input)


In [100]:
output_layer_input = np.dot(hiddenlayer_activations,wout) + bout
output = sigmoid(output_layer_input)
print(output)

[[0.75834912]
 [0.78964759]
 [0.77781413]]


### Step 5:
Calculate gradient of Error(E) at output layer

E = y-output

In [101]:
E = y-output
print(E)

[[ 0.24165088]
 [ 0.21035241]
 [-0.77781413]]


### Step 6: 
Compute slope at output and hidden layer

Slope_output_layer= derivatives_sigmoid(output)

Slope_hidden_layer = derivatives_sigmoid(hiddenlayer_activations)`

In [102]:
def derivatives_sigmoid(x):
    return x*(1-x)
slope_output_layer= derivatives_sigmoid(output)
print(slope_output_layer)

slope_hidden_layer = derivatives_sigmoid(hiddenlayer_activations)
print(slope_hidden_layer)

[[0.18325573]
 [0.16610428]
 [0.17281931]]
[[0.16139287 0.19289243 0.20896063]
 [0.10485555 0.1561385  0.12654728]
 [0.15587755 0.13851318 0.15580053]]


### Step 7:
Compute delta at output layer

d_output = E * slope_output_layer*lr


In [103]:
lr = 1
d_output = E * slope_output_layer*lr
print(d_output)

[[ 0.04428391]
 [ 0.03494044]
 [-0.1344213 ]]


### Step 8: 
Calculate Error at hidden layer

Error_at_hidden_layer = matrix_dot_product(d_output, wout.Transpose)

In [104]:
Error_at_hidden_layer = np.dot(d_output, wout.T)
print(Error_at_hidden_layer)

[[ 0.02201015  0.00760678  0.03755476]
 [ 0.01736622  0.00600183  0.02963107]
 [-0.06681057 -0.02308996 -0.11399535]]


### Step 9:
Compute delta at hidden layer

d_hiddenlayer = Error_at_hidden_layer * slope_hidden_layer

In [105]:
d_hiddenlayer = Error_at_hidden_layer * slope_hidden_layer
print(d_hiddenlayer)

[[ 0.00355228  0.00146729  0.00784747]
 [ 0.00182094  0.00093712  0.00374973]
 [-0.01041427 -0.00319826 -0.01776054]]


### Step 10: 
Update weight at both output and hidden layer

In [106]:
wout = wout + np.dot(hiddenlayer_activations.T, d_output) * lr

wh = wh+ np.dot(X.T,d_hiddenlayer) * lr

print(wout)
print(wh)

[[0.45467906]
 [0.12057903]
 [0.80043802]]
[[0.22597639 0.50846097 0.2703803 ]
 [0.3067183  0.8597626  0.39085274]
 [0.67431931 0.17239339 0.47660695]
 [0.62134188 0.38368934 0.87166192]]


### Step 11: 
Update biases at both output and hidden layer

bh = bh + sum(d_hiddenlayer, axis=0) * learning_rate

bout = bout + sum(d_output, axis=0)*learning_rate

In [107]:
bh = bh + np.sum(d_hiddenlayer, axis=0) * lr

bout = bout + np.sum(d_output, axis=0)*lr

print(bh)
print(bout)

[[0.47720151 0.36379245 0.12966618]]
[[-0.03076431]]
