In [None]:
import numpy as np

In [None]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

In [None]:
def sigmoid_derivative(x):
    return x * (1 - x)

In [None]:
input_size = 3
hidden_size = 2
output_size = 1

In [None]:
weights_input_hidden = np.array([[0.2, -0.3], [0.4, 0.1], [-0.5, 0.2]])
bias_hidden = np.array([[-0.4, 0.2]])
weights_hidden_output = np.array([[-0.3], [-0.2]])
bias_output = np.array([[0.1]])

In [None]:
X = np.array([[1, 0, 1]])
y = np.array([[1]])

In [None]:
learning_rate = 0.9
momentum = 0.9
epochs = 2

In [None]:
prev_weights_input_hidden = np.zeros((input_size, hidden_size))
prev_bias_hidden = np.zeros(hidden_size)
prev_weights_hidden_output = np.zeros((hidden_size, output_size))
prev_bias_output = np.zeros(output_size)

In [None]:
for epoch in range(epochs):
    # Forward pass
    hidden_layer_activation = np.dot(X, weights_input_hidden) + bias_hidden
    hidden_layer_output = sigmoid(hidden_layer_activation)
    output_layer_activation = np.dot(hidden_layer_output, weights_hidden_output) + bias_output
    predicted_output = sigmoid(output_layer_activation)

    # Calculate the mean square error
    error = np.mean(np.square(y - predicted_output))

    # Backward pass
    d_predicted_output = (predicted_output - y) * sigmoid_derivative(predicted_output)
    error_hidden_layer = d_predicted_output.dot(weights_hidden_output.T)
    d_hidden_layer = error_hidden_layer * sigmoid_derivative(hidden_layer_output)

    # Update the weights and biases with Momentum for gradient descent
    delta_weights_hidden_output = prev_weights_hidden_output * momentum - np.dot(hidden_layer_output.T, d_predicted_output) * learning_rate
    delta_bias_output = prev_bias_output * momentum - np.sum(d_predicted_output, axis=0, keepdims=True) * learning_rate
    delta_weights_input_hidden = prev_weights_input_hidden * momentum - np.dot(X.T, d_hidden_layer) * learning_rate
    delta_bias_hidden = prev_bias_hidden * momentum - np.sum(d_hidden_layer, axis=0, keepdims=True)

    weights_hidden_output += delta_weights_hidden_output
    bias_output += delta_bias_output
    weights_input_hidden += delta_weights_input_hidden
    bias_hidden += delta_bias_hidden

    # Update the momentum terms
    prev_weights_hidden_output = delta_weights_hidden_output
    prev_bias_output = delta_bias_output
    prev_weights_input_hidden = delta_weights_input_hidden
    prev_bias_hidden = delta_bias_hidden

In [None]:
print("w11:", round(weights_input_hidden[0][0], 3))
print("w12:", round(weights_input_hidden[0][1], 3))
print("w21:", round(weights_input_hidden[1][0], 3))
print("w22:", round(weights_input_hidden[1][1], 3))
print("w31:", round(weights_input_hidden[2][0], 3))
print("w32:", round(weights_input_hidden[2][1], 3))

w11: 0.179
w12: -0.315
w21: 0.4
w22: 0.1
w31: -0.521
w32: 0.185


In [None]:
print("u1:", round(weights_hidden_output[0][0], 3))
print("u2:", round(weights_hidden_output[1][0], 3))

u1: -0.19
u2: -0.026


In [None]:
print("b1:", round(bias_hidden[0][0], 3))
print("b2:", round(bias_hidden[0][1], 3))
print("b3:", round(bias_output[0][0], 3))

b1: -0.424
b2: 0.183
b3: 0.433
