**Importing Libraries**

In [1]:
import numpy as np

**Sigmoid Activation Function**

In [2]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

**Derivative of Sigmoid Function**

In [3]:
def sigmoid_derivative(x):
    return x * (1 - x)

**Assumed Values**

In [4]:
x1, x2 = 0.2, 0.1
o1, o2 = 0.09, 0.01

**Storing weights in key-value pairs**

In [5]:
weights = {
    "w1": 0.5, "w2": 0.3, "w3": 0.1, "w4": 0.4,
    "w5": 0.7, "w6": 0.2, "w7": 0.4, "w8": 0.8
}

**Storing bias in key-value pairs**

In [6]:
biases = {
    "b1": 0.3,  # for hidden layer
    "b2": 0.5   # for output layer
}

**Initializing learning rate**

In [7]:
learning_rate = 0.1


**Calculating hidden layer activations**

In [8]:
h1_in = weights["w1"] * x1 + weights["w2"] * x2 + biases["b1"]
h2_in = weights["w3"] * x1 + weights["w4"] * x2 + biases["b1"]

h1_out = sigmoid(h1_in)
h2_out = sigmoid(h2_in)

**Calculating output layer activations**

In [9]:
o1_in = weights["w5"] * h1_out + weights["w6"] * h2_out + biases["b2"]
o2_in = weights["w7"] * h1_out + weights["w8"] * h2_out + biases["b2"]

o1_out = sigmoid(o1_in)
o2_out = sigmoid(o2_in)


**Using Mean Squared Error**

In [10]:
error = (1 / 2) * ((o1_out - o1) ** 2 + (o2_out - o2) ** 2)

**Back Propagation Starts**

In [11]:
error_o1 = o1_out - o1
error_o2 = o2_out - o2

**Output layer gradients**

In [12]:
dE_dw5 = error_o1 * sigmoid_derivative(o1_out) * h1_out
dE_dw6 = error_o1 * sigmoid_derivative(o1_out) * h2_out
dE_dw7 = error_o2 * sigmoid_derivative(o2_out) * h1_out
dE_dw8 = error_o2 * sigmoid_derivative(o2_out) * h2_out

**Update weights for the output layer**

In [13]:
weights["w5"] -= learning_rate * dE_dw5
weights["w6"] -= learning_rate * dE_dw6
weights["w7"] -= learning_rate * dE_dw7
weights["w8"] -= learning_rate * dE_dw8

**Hidden Layer gradients for weights w1,w2,w3,w4**

In [14]:
dE_dh1_out = (error_o1 * sigmoid_derivative(o1_out) * weights["w5"] +
              error_o2 * sigmoid_derivative(o2_out) * weights["w7"])
dE_dh2_out = (error_o1 * sigmoid_derivative(o1_out) * weights["w6"] +
              error_o2 * sigmoid_derivative(o2_out) * weights["w8"])

dE_dw1 = dE_dh1_out * sigmoid_derivative(h1_out) * x1
dE_dw2 = dE_dh1_out * sigmoid_derivative(h1_out) * x2
dE_dw3 = dE_dh2_out * sigmoid_derivative(h2_out) * x1
dE_dw4 = dE_dh2_out * sigmoid_derivative(h2_out) * x2

**Update weights for the hidden layer**

In [15]:
weights["w1"] -= learning_rate * dE_dw1
weights["w2"] -= learning_rate * dE_dw2
weights["w3"] -= learning_rate * dE_dw3
weights["w4"] -= learning_rate * dE_dw4


In [16]:
weights

{'w1': 0.4993346585658121,
 'w2': 0.29966732928290607,
 'w3': 0.09936796905620592,
 'w4': 0.399683984528103,
 'w5': 0.6924173351299006,
 'w6': 0.19262800738978184,
 'w7': 0.3918584264618821,
 'w8': 0.7920846271084422}

In [17]:
error

0.5002522842154867