In [26]:
import math
import numpy as np

# sigmoid activation function
def sigmoid(x):
    return 1 / (1 + math.exp(-x))

def sigmoid_d(x):
    return sigmoid(x) * (1 - sigmoid(x))

# tanh activation function
def tanh(x):
    return math.tanh(x)
    
def tanh_d(x):
    return 1 - tanh(x)**2

## CHOOSING THE ACTIVATION FUNCTION


In [27]:
# sigmoid | tanh
act_fun = tanh 

#Set the derivative of the act_fun based on the chosen act_fun
act_fun_d = sigmoid_d if act_fun == sigmoid else tanh_d 

## INITALIZATION OF PARAMETERS

In [28]:
#weights
w1 = 0.35
w2 = 0.3
w3 = 0.35
w4 = 0.5
w5 = 0.3
w6 = 0.45
w7 = 0.5
w8 = 0.4

#biases
b1 = 0.4
b2 = 0.65

#initial inputs 
i1 = 0.1
i2 = 0.15

#targets (desired outputs)
target1 = 0.02
target2 = 0.98

## EPOCHS

In [30]:
learning_rate = 0.1
iterations = 1
for _ in range (iterations):
  # hidden layer forwad pass
  h1 = i1 * w1 + i2 * w2 + b1
  out_h1 = act_fun(h1)

  h2 = i1 * w3 + i2 * w4 + b1
  out_h2 = act_fun(h2)

  #output layer forward pass
  y1 = out_h1 * w5 + out_h2 * w6 + b2
  out_y1 = act_fun(y1)

  y2 = out_h1 * w7 + out_h2 * w8 + b2
  out_y2 = act_fun(y2)

  # Calculate the gradients for the output layer
  d_out_y1 =  (out_y1 - target1) * act_fun_d(y1)
  d_out_y2 =  (out_y2 - target2) * act_fun_d(y2)

  # Calculate the gradients for the hidden layer
  d_out_h1 = (d_out_y1 * w5 + d_out_y2 * w7) * act_fun_d(h1)
  d_out_h2 = (d_out_y1 * w6 + d_out_y2 * w8) * act_fun_d(h2)

  # Calculate  the gradients for the weights and biases 
  d_w1 = i1 * d_out_h1
  d_w2 = i2 * d_out_h1
  d_w3 = i1 * d_out_h2
  d_w4 = i2 * d_out_h2
  d_w5 = out_h1 * d_out_y1
  d_w6 = out_h2 * d_out_y1
  d_w7 = out_h1 * d_out_y2
  d_w8 = out_h2 * d_out_y2

  d_b1 = d_out_h1 + d_out_h2
  d_b2 = d_out_y1 + d_out_y2

  # Update the weights and biases using gradient descent
  w1 -= learning_rate * d_w1
  w2 -= learning_rate * d_w2
  w3 -= learning_rate * d_w3
  w4 -= learning_rate * d_w4
  w5 -= learning_rate * d_w5
  w6 -= learning_rate * d_w6
  w7 -= learning_rate * d_w7
  w8 -= learning_rate * d_w8

  b1 -= learning_rate * d_b1
  b2 -= learning_rate * d_b2

## UPDATED PARAMETERS



In [31]:
print("out_h1: ",out_h1)
print("out_h2: ",out_h2)

print("out_y1: ",out_y1)
print("out_y2: ",out_y2)

print("\nUpdated weights:")
print("w1 =", w1)
print("w2 =", w2)
print("w3 =", w3)
print("w4 =", w4)
print("w5 =", w5)
print("w6 =", w6)
print("w7 =", w7)
print("w8 =", w8)

print("\nUpdated biases:")
print("b1 =", b1)
print("b2 =", b2)

out_h1:  0.4462436102487797
out_h2:  0.46994519893303754
out_y1:  0.7596336817333533
out_y2:  0.7860844568028507

Updated weights:
w1 = 0.3495450667048495
w2 = 0.29931760005727426
w3 = 0.34913405857237884
w4 = 0.4987010878585683
w5 = 0.28604002732407263
w6 = 0.43529856341779094
w7 = 0.5033061988041518
w8 = 0.4034818028069088

Updated biases:
b1 = 0.38679125277228416
b2 = 0.6261256550747336


## ERROR

In [32]:
# Calculate the squared difference for each target-output pair
mse1 = np.square(target1 - out_y1)
mse2 = np.square(target2 - out_y2)

# Calculate the mean squared error
mse = np.mean([mse1, mse2])

print("Mean Squared Error:",  f"{mse:.{20}f}")

Mean Squared Error: 0.29233061052394038182
