In [1]:
#Input Array
Input=[
    [1,0,1],
    [1,1,1],
    [0,0,1],
]

In [2]:
#Output Array
Output=[
    [1],
    [1],
    [0],
]

In [3]:
import numpy as np

In [4]:
np.random.seed(42)

In [5]:
X = np.array(Input)
y = np.array(Output)

In [6]:
X.shape, y.shape

((3, 3), (3, 1))

![../notebooks/images/title](../notebooks/images/graph.jpg)

In [7]:
# Define the number of neurons at input hidden and output layer
# Weights initialization for edges between input and hidden layer
# bias for hidden layer neurons
# Weights initialization for edges between hidden and output layer
# bias for output layer neuron(s)
# Define activation function for hidden layer and output layer neurons
# Define derivative of activation function for hidden and output layer neurons
# Forward Propagation
# Backward Propagation
# Train for more number of epochs

In [8]:
inputlayer_neurons = 3
hiddenlayer_neurons = 2
outputlayer_neuron = 1

In [9]:
wih = np.round(np.random.uniform(size=(inputlayer_neurons, hiddenlayer_neurons)), decimals=1)

In [10]:
wih

array([[0.4, 1. ],
       [0.7, 0.6],
       [0.2, 0.2]])

In [11]:
bih = np.round(np.random.uniform(size=(1, hiddenlayer_neurons)), decimals=1)

In [12]:
bih

array([[0.1, 0.9]])

In [13]:
who = np.round(np.random.uniform(size=(hiddenlayer_neurons, outputlayer_neuron)), decimals=1)

In [14]:
bho = np.round(np.random.uniform(size=(1, outputlayer_neuron)), decimals=1)

In [15]:
who

array([[0.6],
       [0.7]])

In [16]:
bho

array([[0.]])

<img src="../notebooks/images/sigmoid-equation.png" alt="drawing" style="width:150px;"/>

In [17]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

In [18]:
def derivative_sigmoid(x):
    return x * (1 - x)

In [19]:
# Multiply input with weight matrix (input and hidden layer) and add bias matrix
# apply activation function on weighted input to generate hidden layer activations
# multiply hidden layer activations with weight matrix (hidden and output) and add bias matrix
# Apply activatoin function on weighted hidden layer activations

In [20]:
X.shape, wih.shape

((3, 3), (3, 2))

In [21]:
weighted_input = np.dot(X, wih) + bih

In [22]:
weighted_input.shape

(3, 2)

In [23]:
X

array([[1, 0, 1],
       [1, 1, 1],
       [0, 0, 1]])

In [24]:
wih

array([[0.4, 1. ],
       [0.7, 0.6],
       [0.2, 0.2]])

In [25]:
np.dot(X, wih)

array([[0.6, 1.2],
       [1.3, 1.8],
       [0.2, 0.2]])

In [26]:
1*0.4 + 0*0.7 + 1*0.2

0.6000000000000001

In [27]:
bih

array([[0.1, 0.9]])

In [28]:
bih.shape, np.dot(X, wih).shape

((1, 2), (3, 2))

In [29]:
weighted_input

array([[0.7, 2.1],
       [1.4, 2.7],
       [0.3, 1.1]])

In [30]:
sigmoid(0.7)

0.6681877721681662

In [31]:
sigmoid(2.1)

0.8909031788043871

In [32]:
hiddenlayer_activations = sigmoid(weighted_input)
hiddenlayer_activations

array([[0.66818777, 0.89090318],
       [0.80218389, 0.93702664],
       [0.57444252, 0.75026011]])

In [33]:
weighted_hidden_layer_activation = np.dot(hiddenlayer_activations, who) + bho
output = sigmoid(weighted_hidden_layer_activation)

In [34]:
output

array([[0.73585694],
       [0.75717051],
       [0.70471398]])

![title](../notebooks/images/all_matrices.png)

![title](../notebooks/images/graph.jpg)

In [41]:
# Backpropagation

# step 1 - calculate error
error = ((y - output)*(y - output)) / 2
error

array([[0.03488578],
       [0.02948308],
       [0.2483109 ]])

In [50]:
# step 2 - calculate change in error with respect to who
    # step 2.1 - find change in error with respect to output
    
change_error_wrt_output = y - output
    
    # step 2.2 - find change in output with respect to weighted hidden layer activations
    
change_output_wrt_weighted_hidden_layer_activations = derivative_sigmoid(output)
    
    # step 2.3 - find change in weighted hidden layer activations with respect to who
    
change_weighted_hidden_layer_activations_wrt_who = hiddenlayer_activations
    
    # step 2.4 - calculate change in error with respect to who
delta_who = np.dot(
    change_weighted_hidden_layer_activations_wrt_who.T,
    (change_error_wrt_output * change_output_wrt_weighted_hidden_layer_activations)
)

In [51]:
# step 3 - calculate change in error with respect to bho
    # step 3.1 - find change in error with respect to output
    # step 3.2 - find change in output with respect to weighted hidden layer activations
    # step 3.3 - find change in weighted hidden layer activations with respect to bho
    
change_weighted_hidden_layer_activations_wrt_bho = np.ones((3, 1))
    
    # step 3.4 - calculate change in error with respect to bho
delta_bho = np.dot(
    change_weighted_hidden_layer_activations_wrt_bho.T,
    (change_error_wrt_output * change_output_wrt_weighted_hidden_layer_activations)
)

In [52]:
delta_bho.shape, bho.shape

((1, 1), (1, 1))

In [64]:
# step 4 - calculate change in error with respect to wih
    # step 4.1 - find change in error with respect to output
change_error_wrt_output
    # step 4.2 - find change in output with respect to weighted hidden layer activations
change_output_wrt_weighted_hidden_layer_activations
    # step 4.3 - find change in weighted hidden layer activations with respect to hidden layer activations
change_weighted_hidden_layer_activations_wrt_hidden_layer_activations = who
    # step 4.4 - find change in hidden layer activations with respect to weighted input
change_hidden_layer_activations_wrt_weighted_input = derivative_sigmoid(hiddenlayer_activations)
    # step 4.5 - find change in weighted input with respect to wih
change_weighted_input_wrt_wih = X
    # step 4.6 - calculate change in error with respect to wih
delta_wih = np.dot(
    change_weighted_input_wrt_wih.T,
    change_hidden_layer_activations_wrt_weighted_input * 
    np.dot(
        (change_error_wrt_output * change_output_wrt_weighted_hidden_layer_activations),
        change_weighted_hidden_layer_activations_wrt_hidden_layer_activations.T
    )
)

In [65]:
# step 5 - calculate change in error with respect to bih
    # step 5.1 - find change in error with respect to output
    # step 5.2 - find change in output with respect to weighted hidden layer activations
    # step 5.3 - find change in weighted hidden layer activations with respect to hidden layer activations
    # step 5.4 - find change in hidden layer activations with respect to weighted input
    # step 5.5 - find change in weighted input with respect to bih
    
change_weighted_input_wrt_bih = np.ones((3, 1))
    
    # step 5.6 - calculate error with respect to bih
delta_bih = np.dot(
    change_weighted_input_wrt_bih.T,
    change_hidden_layer_activations_wrt_weighted_input * 
    np.dot(
        (change_error_wrt_output * change_output_wrt_weighted_hidden_layer_activations),
        change_weighted_hidden_layer_activations_wrt_hidden_layer_activations.T
    )
)

In [66]:
delta_who

array([[-0.01411792],
       [-0.02244576]])

In [67]:
delta_bho

array([[-0.05065615]])

In [68]:
delta_wih

array([[ 0.01108082,  0.00533729],
       [ 0.00425092,  0.00184418],
       [-0.0104284 , -0.01389657]])

In [69]:
delta_bih

array([[-0.0104284 , -0.01389657]])

In [70]:
# step 6 - update who, wih, bho, bih
who += delta_who * 0.1
wih += delta_wih * 0.1
bih += delta_bih * 0.1
bho += delta_bho * 0.1

In [71]:
# forward prop

#1) Multiply hidden layer activation with weight matrix (Input and Hidden Layer)
weighted_input=np.dot(X,wih) + bih

#2) Apply activation function on weighted_input
hiddenlayer_activations = sigmoid(weighted_input)

#3) Multiply hidden layer activation with weight matrix (Hidden Layer and Output)
weighted_hidden_layer_activation=np.dot(hiddenlayer_activations,who) + bho

#4) Apply activation function on weighted_hidden_layer_activation
new_output = sigmoid(weighted_hidden_layer_activation)
new_output

array([[0.73424229],
       [0.75561556],
       [0.70299861]])

In [72]:
output

array([[0.73585694],
       [0.75717051],
       [0.70471398]])

In [79]:
#Backpropagation

# step 1 - calculate error
error = ((y-output)*(y-output))/2

# step 2 - find change in error with respect to who
    # step 2.1 - change in error with respect to output
    
change_error_output = y-output

    # step 2.2 - change in output wrt weighted_hidden_layer_activation
    
change_output_wrt_weighted_hidden_layer_activation = derivative_sigmoid(output)


    # step 2.3 - change in weighted_hidden_layer_activation wrt who
    
change_weighted_hidden_layer_activation_wrt_who = hiddenlayer_activations


    # step 2.4 - calculate change in error with respect to who
    
delta_who = np.dot(change_weighted_hidden_layer_activation_wrt_who.T, change_error_output * change_output_wrt_weighted_hidden_layer_activation)

# step 3 - find change in error with respect to bho
    # step 3.1 - change in error with respect to output 
    
# (already calculated in step 1.1)

    # step 3.2 - change in output wrt weighted_hidden_layer_activation 
    
# (already calculated in step 1.2)


    
    # step 3.3 - change in weighted_hidden_layer_activation wrt bho
    
change_weighted_hidden_layer_activation_wrt_bho = np.array([[1], [1], [1]])



    # step 3.4 - calculate change in error with respect to who

delta_bho = np.dot(change_weighted_hidden_layer_activation_wrt_bho.T, change_error_output * change_output_wrt_weighted_hidden_layer_activation)


    
# step 4 - change in error wrt wih
    # step 4.1 - change in error with respect to output 
        
# (already calculated in step 1.1)

    # step 4.2 - change in output wrt weighted_hidden_layer_activation 
        
# (already calculated in step 1.2)


        
    # step 4.3 - change in weighted_hidden_layer_activation wrt hidden_layer_activation
        
change_weighted_hidden_layer_activation_wrt_hidden_layer_activation = who


    # step 4.4 - change in hidden_layer_activation wrt weighted_input
    
change_hidden_layer_activation_wrt_weighted_input = derivative_sigmoid(hiddenlayer_activations)
    

    # step 4.5 - change in weighted_input wrt wih
    
change_weighted_input_wrt_wih = X


    # step 4.6 - calculate error wrt wih
    
delta_wih = np.dot(change_weighted_input_wrt_wih.T, (change_hidden_layer_activation_wrt_weighted_input * (change_error_output * change_output_wrt_weighted_hidden_layer_activation).dot(change_weighted_hidden_layer_activation_wrt_hidden_layer_activation.T)))

# step 5 - change in error wrt bih
    # step 5.1 - change in error with respect to output 

# (already calculated in step 1.1)

        
        
    # step 5.2 - change in output wrt weighted_hidden_layer_activation 
    
# (already calculated in step 1.2)

    # step 5.3 - change in weighted_hidden_layer_activation wrt hidden_layer_activation 
        
# (already calculated in step 4.1.3)

    # step 5.4 - change in hidden_layer_activation wrt weighted_input
    
# (already calculated in step 4.2)
    
    # step 5.5 - change in weighted_input wrt bih
    
change_weighted_input_wrt_bih = np.array([[1.], [1.], [1.]])

    # step 5.6 - calculate error wrt bih
    
delta_bih = np.dot(change_weighted_input_wrt_bih.T, (change_hidden_layer_activation_wrt_weighted_input * (change_error_output * change_output_wrt_weighted_hidden_layer_activation).dot(change_weighted_hidden_layer_activation_wrt_hidden_layer_activation.T)))

# step 6 - update who, wih, bho, bih
    # step 6.1 - who is updated with a part of change in error wrt who
who += delta_who * 0.1

    # step 6.2 - wih is updated with a part of change in error wrt wih
wih += delta_wih * 0.1

    # step 6.3 - bho is updated with a part of change in error wrt bho
bho += delta_bho * 0.1

    # step 6.4 - bih is updated with a part of change in error wrt bih
bih += delta_bih * 0.1

# Forward Prop
#1) Multiply hidden layer activation with weight matrix (Input and Hidden Layer)
weighted_input=np.dot(X,wih) + bih

#2) Apply activation function on weighted_input
hiddenlayer_activations = sigmoid(weighted_input)

#3) Multiply hidden layer activation with weight matrix (Hidden Layer and Output)
weighted_hidden_layer_activation=np.dot(hiddenlayer_activations,who) + bho

#4) Apply activation function on weighted_hidden_layer_activation
output = sigmoid(weighted_hidden_layer_activation)
output

array([[0.72391394],
       [0.74569277],
       [0.69195775]])

In [80]:
for i in range(5000):
    # Forward Propagation

    #1) Multiply hidden layer activation with weight matrix (Input and Hidden Layer)
    weighted_input=np.dot(X,wih) + bih

    #2) Apply activation function on weighted_input
    hiddenlayer_activations = sigmoid(weighted_input)

    #3) Multiply hidden layer activation with weight matrix (Hidden Layer and Output)
    weighted_hidden_layer_activation=np.dot(hiddenlayer_activations,who) + bho

    #4) Apply activation function on weighted_hidden_layer_activation
    output = sigmoid(weighted_hidden_layer_activation)

    #Backpropagation

    # step 1 - calculate error
    error = ((y-output)*(y-output))/2

    # step 2 - find change in error with respect to who
        # step 2.1 - change in error with respect to output

    change_error_output = y-output

        # step 2.2 - change in output wrt weighted_hidden_layer_activation

    change_output_wrt_weighted_hidden_layer_activation = derivative_sigmoid(output)


        # step 2.3 - change in weighted_hidden_layer_activation wrt who

    change_weighted_hidden_layer_activation_wrt_who = hiddenlayer_activations


        # step 2.4 - calculate change in error with respect to who

    delta_who = np.dot(change_weighted_hidden_layer_activation_wrt_who.T, change_error_output * change_output_wrt_weighted_hidden_layer_activation)

    # step 3 - find change in error with respect to bho
        # step 3.1 - change in error with respect to output 

    # (already calculated in step 1.1)

        # step 3.2 - change in output wrt weighted_hidden_layer_activation 

    # (already calculated in step 1.2)



        # step 3.3 - change in weighted_hidden_layer_activation wrt bho

    change_weighted_hidden_layer_activation_wrt_bho = np.array([[1], [1], [1]])



        # step 3.4 - calculate change in error with respect to who

    delta_bho = np.dot(change_weighted_hidden_layer_activation_wrt_bho.T, change_error_output * change_output_wrt_weighted_hidden_layer_activation)



    # step 4 - change in error wrt wih
        # step 4.1 - change in error with respect to output 

    # (already calculated in step 1.1)

        # step 4.2 - change in output wrt weighted_hidden_layer_activation 

    # (already calculated in step 1.2)



        # step 4.3 - change in weighted_hidden_layer_activation wrt hidden_layer_activation

    change_weighted_hidden_layer_activation_wrt_hidden_layer_activation = who


        # step 4.4 - change in hidden_layer_activation wrt weighted_input

    change_hidden_layer_activation_wrt_weighted_input = derivative_sigmoid(hiddenlayer_activations)


        # step 4.5 - change in weighted_input wrt wih

    change_weighted_input_wrt_wih = X


        # step 4.6 - calculate error wrt wih

    delta_wih = np.dot(change_weighted_input_wrt_wih.T, (change_hidden_layer_activation_wrt_weighted_input * (change_error_output * change_output_wrt_weighted_hidden_layer_activation).dot(change_weighted_hidden_layer_activation_wrt_hidden_layer_activation.T)))

    # step 5 - change in error wrt bih
        # step 5.1 - change in error with respect to output 

    # (already calculated in step 1.1)



        # step 5.2 - change in output wrt weighted_hidden_layer_activation 

    # (already calculated in step 1.2)

        # step 5.3 - change in weighted_hidden_layer_activation wrt hidden_layer_activation 

    # (already calculated in step 4.1.3)

        # step 5.4 - change in hidden_layer_activation wrt weighted_input

    # (already calculated in step 4.2)

        # step 5.5 - change in weighted_input wrt bih

    change_weighted_input_wrt_bih = np.array([[1.], [1.], [1.]])

        # step 5.6 - calculate error wrt bih

    delta_bih = np.dot(change_weighted_input_wrt_bih.T, (change_hidden_layer_activation_wrt_weighted_input * (change_error_output * change_output_wrt_weighted_hidden_layer_activation).dot(change_weighted_hidden_layer_activation_wrt_hidden_layer_activation.T)))

    # step 6 - update who, wih, bho, bih
        # step 6.1 - who is updated with a part of change in error wrt who
    who += delta_who * 0.1

        # step 6.2 - wih is updated with a part of change in error wrt wih
    wih += delta_wih * 0.1

        # step 6.3 - bho is updated with a part of change in error wrt bho
    bho += delta_bho * 0.1

        # step 6.4 - bih is updated with a part of change in error wrt bih
    bih += delta_bih * 0.1
    
print(output)

[[0.95859347]
 [0.98253517]
 [0.05873176]]


In [81]:
y

array([[1],
       [1],
       [0]])

In [82]:
## Define the structure of neural network

# define the number of neurons at input, hidden and output layer
# initialize weights and biases for edges between input and hidden layer
# initialize weights and biases for edges between hidden and output layer
# define activation functions for hidden and output layer
# define derivative of activation functions for hidden and output layers

## Forward Propagation
# multiply input with weight matrix (input and hidden layer)
# apply activation function on weighted input
# multiply hidden layer activations with weight matrix (hidden and output layer)
# apply activation function on weighted hidden layer activations

## BackPropagation
# calculate error
# calculate change in error with respect to who
# calculate change in error with respect to bho
# calculate change in error with respect to wih
# calculate change in error with respect to bih
# update who, wih, bho, bih

# repeat forward and backward propagation for more iterations