In [1]:
import numpy as np

# Sigmoid activation function
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

# Derivative of sigmoid function
def sigmoid_derivative(x):
    return x * (1 - x)

# Forward pass function
def forward_pass(inputs, weights_input_hidden, weights_hidden_output):
    hidden_layer_input = np.dot(inputs, weights_input_hidden)
    hidden_layer_output = sigmoid(hidden_layer_input)

    output_layer_input = np.dot(hidden_layer_output, weights_hidden_output)
    output_layer_output = sigmoid(output_layer_input)

    return hidden_layer_output, output_layer_output

# Backpropagation function
def backpropagation(inputs, targets, hidden_layer_output, output_layer_output, weights_input_hidden, weights_hidden_output, learning_rate):
    output_error = targets - output_layer_output
    output_delta = -1* sigmoid_derivative(output_layer_output)*hidden_layer_output
    hidden_delta = -1*sigmoid_derivative(output_layer_output)*(((sigmoid_derivative(hidden_layer_output).T)*weights_hidden_output)@(np.array([1,1]).reshape([1,2])))

    return output_delta, hidden_delta

# Analytical gradient calculation function
def analytical_gradient(inputs, targets, weights_input_hidden, weights_hidden_output):
    eps =1e-10
    for_w1 = np.array([[eps, 0],[0,0]])
    for_w2 = np.array([[0, eps],[0,0]])
    for_w3 = np.array([[0, 0],[eps,0]])
    for_w4 = np.array([[0, 0],[0,eps]])
    for_w5 = np.array([eps, 0]).reshape([2,1])
    for_w6 = np.array([0,eps]).reshape([2,1])
    dw1 =  ((-forward_pass(inputs, weights_input_hidden + for_w1, weights_hidden_output)[1][0][0] + forward_pass(inputs, weights_input_hidden, weights_hidden_output)[1][0][0]))/eps
    dw2 =  ((-forward_pass(inputs, weights_input_hidden + for_w2, weights_hidden_output)[1][0][0] + forward_pass(inputs, weights_input_hidden, weights_hidden_output)[1][0][0]))/eps
    dw3 =  ((-forward_pass(inputs, weights_input_hidden + for_w3, weights_hidden_output)[1][0][0] + forward_pass(inputs, weights_input_hidden, weights_hidden_output)[1][0][0]))/eps
    dw4 =  ((-forward_pass(inputs, weights_input_hidden + for_w4, weights_hidden_output)[1][0][0] + forward_pass(inputs, weights_input_hidden, weights_hidden_output)[1][0][0]))/eps
    dw5 =  ((-forward_pass(inputs, weights_input_hidden , weights_hidden_output + for_w6)[1][0][0] + forward_pass(inputs, weights_input_hidden, weights_hidden_output)[1][0][0]))/eps
    dw6 =  ((-forward_pass(inputs, weights_input_hidden , weights_hidden_output + for_w6)[1][0][0] + forward_pass(inputs, weights_input_hidden, weights_hidden_output)[1][0][0]))/eps
    output_delta = np.array([dw5, dw6])
    hidden_delta = np.array([[dw1, dw3],[dw2,dw4]])
    return  hidden_delta, output_delta
# Initialize random weights
np.random.seed(0)
weights_input_hidden = np.random.rand(2, 2)
weights_hidden_output = np.random.rand(2, 1)

# Input and target
inputs = np.array([[1, 1]])
targets = np.array([[1]])

# Learning rate
learning_rate = 0.1

# Training loop
hidden_layer_output, output_layer_output = forward_pass(inputs, weights_input_hidden, weights_hidden_output)
back_delta_out, back_delta_hidd = backpropagation(inputs, targets, hidden_layer_output, output_layer_output, weights_input_hidden, weights_hidden_output, learning_rate)

# Analytical gradients
analytical_gradients_input_hidden, analytical_gradients_hidden_output = analytical_gradient(inputs, targets, weights_input_hidden, weights_hidden_output)

# Print the gradients
print("Analytical gradients are")
print(analytical_gradients_input_hidden)
print(analytical_gradients_hidden_output)
print("============================================")
print("\nBackpropagated gradient are")
print(back_delta_hidd)
print(back_delta_out)


Analytical gradients are
[[-0.01637912 -0.01637912]
 [-0.02355449 -0.02355449]]
[-0.16504242 -0.16504242]

Backpropagated gradient are
[[-0.01638029 -0.01638029]
 [-0.02355442 -0.02355442]]
[[-0.16096605 -0.16504206]]
