In [12]:
import math
import numpy as np 

In [13]:
def calculate_net_value(w1, x1, w2, x2, bias):
    """Calculates the weighted sum for a neuron."""
    net = (w1 * x1) + (w2 * x2) + bias
    return net

def sigmoid_activation(weighted_sum):
    """Applies the sigmoid activation function."""
    power = -1 * weighted_sum
    layer_output = 1.0 / (1.0 + math.exp(power))
    return layer_output

def sigmoid_derivative(output_val): 
    """Calculates the derivative of the sigmoid function."""
    return output_val * (1 - output_val)

In [14]:
def total_error_derivative(actual_output, target_output):
    """Calculates the derivative of the total error with respect to an actual output."""
    # Note: This is dE_total / d(actual_output_neuron)
    # If E_total = 0.5 * (target1 - actual1)^2 + 0.5 * (target2 - actual2)^2
    # Then dE_total / d(actual1) = actual1 - target1
    return actual_output - target_output

In [15]:
def evaluate_individual_error(target_output, actual_output):
    """Calculates the squared error for a single output neuron."""
    # Using math.pow for direct translation, np.power can also be used
    error = 0.5 * math.pow((target_output - actual_output), 2)
    return error

def evaluate_total_error(error_1, error_2):
    """Calculates the sum of individual errors."""
    return error_1 + error_2

In [16]:
def differentiate_x_power_n(x, n): # This function isn't used in the main C++ logic, but translated
    """Calculates the derivative of x^n."""
    return float(n * math.pow(x, (n - 1)))

In [17]:
def calculate_hidden_layer_output(i1, i2, w1, w2, w3, w4, b1):
    """Calculates the outputs of the hidden layer neurons.
    Returns:
        tuple: (output_h1, output_h2)
    """
    net_h1 = calculate_net_value(w1, i1, w3, i2, b1)
    output_h1 = sigmoid_activation(net_h1)

    net_h2 = calculate_net_value(w2, i1, w4, i2, b1) # Note: w2 and w4 are for h2, not w3 for h2
    output_h2 = sigmoid_activation(net_h2)
    return output_h1, output_h2

def calculate_final_layer_output(h1, h2, w5, w6, w7, w8, b2):
    """Calculates the outputs of the final output layer neurons.
    Returns:
        tuple: (output_o1, output_o2)
    """
    net_o1 = calculate_net_value(w5, h1, w6, h2, b2) # w5, w6 for o1
    output_o1 = sigmoid_activation(net_o1)

    net_o2 = calculate_net_value(w7, h1, w8, h2, b2) # w7, w8 for o2
    output_o2 = sigmoid_activation(net_o2)
    return output_o1, output_o2

In [18]:
def calculate_output_layer_gradients(target_o1, target_o2, output_o1, output_o2,
                                     output_h1, output_h2):
    """Calculates gradients for the weights connecting hidden layer to output layer.
    Returns:
        tuple: (dE_total_dw5, dE_total_dw6, dE_total_dw7, dE_total_dw8)
    """
    # For w5 and w6 (affecting output_o1)
    dE_total_doutput_o1 = total_error_derivative(output_o1, target_o1)
    doutput_o1_dsum_o1 = sigmoid_derivative(output_o1)
    dsum_o1_dw5 = output_h1  # d(net_o1)/dw5
    dsum_o1_dw6 = output_h2  # d(net_o1)/dw6

    dE_total_dw5 = dE_total_doutput_o1 * doutput_o1_dsum_o1 * dsum_o1_dw5
    dE_total_dw6 = dE_total_doutput_o1 * doutput_o1_dsum_o1 * dsum_o1_dw6

    # For w7 and w8 (affecting output_o2)
    dE_total_doutput_o2 = total_error_derivative(output_o2, target_o2)
    doutput_o2_dsum_o2 = sigmoid_derivative(output_o2)
    dsum_o2_dw7 = output_h1  # d(net_o2)/dw7
    dsum_o2_dw8 = output_h2  # d(net_o2)/dw8

    dE_total_dw7 = dE_total_doutput_o2 * doutput_o2_dsum_o2 * dsum_o2_dw7
    dE_total_dw8 = dE_total_doutput_o2 * doutput_o2_dsum_o2 * dsum_o2_dw8

    return dE_total_dw5, dE_total_dw6, dE_total_dw7, dE_total_dw8

In [19]:
def calculate_hidden_layer_gradients(i1, i2, output_h1, output_h2, w5, w6, w7, w8,
                                     dE_total_doutput_o1, doutput_o1_dsum_o1,
                                     dE_total_doutput_o2, doutput_o2_dsum_o2):
    """Calculates gradients for the weights connecting input layer to hidden layer.
    Returns:
        tuple: (dE_total_dw1, dE_total_dw2, dE_total_dw3, dE_total_dw4)
    """
    # For w1 and w3 (affecting output_h1)
    # dE_total / d(output_h1)
    dE_total_doutput_h1 = (dE_total_doutput_o1 * doutput_o1_dsum_o1 * w5) + \
                          (dE_total_doutput_o2 * doutput_o2_dsum_o2 * w7)
    doutput_h1_dsum_h1 = sigmoid_derivative(output_h1)
    dsum_h1_dw1 = i1  # d(net_h1)/dw1
    dsum_h1_dw3 = i2  # d(net_h1)/dw3 (Corrected from dw2 in C++ comment)

    dE_total_dw1 = dE_total_doutput_h1 * doutput_h1_dsum_h1 * dsum_h1_dw1
    # The C++ code had dE_total_dw2 associated with dsum_h1_dw2 = i2
    # Based on how w1,w2,w3,w4 are used for net_h1 and net_h2:
    # net_h1 = w1*i1 + w3*i2 + b1 --> depends on w1 and w3
    # net_h2 = w2*i1 + w4*i2 + b1 --> depends on w2 and w4
    # So, the gradient for w3 should use dE_total_doutput_h1
    dE_total_dw3 = dE_total_doutput_h1 * doutput_h1_dsum_h1 * dsum_h1_dw3


    # For w2 and w4 (affecting output_h2)
    # dE_total / d(output_h2)
    dE_total_doutput_h2 = (dE_total_doutput_o1 * doutput_o1_dsum_o1 * w6) + \
                          (dE_total_doutput_o2 * doutput_o2_dsum_o2 * w8)
    doutput_h2_dsum_h2 = sigmoid_derivative(output_h2)
    dsum_h2_dw2 = i1  # d(net_h2)/dw2 (Corrected from dw3 in C++ comment)
    dsum_h2_dw4 = i2  # d(net_h2)/dw4

    dE_total_dw2 = dE_total_doutput_h2 * doutput_h2_dsum_h2 * dsum_h2_dw2
    dE_total_dw4 = dE_total_doutput_h2 * doutput_h2_dsum_h2 * dsum_h2_dw4

    return dE_total_dw1, dE_total_dw2, dE_total_dw3, dE_total_dw4


In [20]:
def update_weights(learning_rate, w1, w2, w3, w4, w5, w6, w7, w8,
                   dE_total_dw1, dE_total_dw2, dE_total_dw3, dE_total_dw4,
                   dE_total_dw5, dE_total_dw6, dE_total_dw7, dE_total_dw8):
    """Updates the weights using gradient descent.
    Returns:
        tuple: Updated weights (w1, w2, ..., w8)
    """
    w1 -= (learning_rate * dE_total_dw1)
    w2 -= (learning_rate * dE_total_dw2)
    w3 -= (learning_rate * dE_total_dw3)
    w4 -= (learning_rate * dE_total_dw4)
    w5 -= (learning_rate * dE_total_dw5)
    w6 -= (learning_rate * dE_total_dw6)
    w7 -= (learning_rate * dE_total_dw7)
    w8 -= (learning_rate * dE_total_dw8)
    return w1, w2, w3, w4, w5, w6, w7, w8

In [21]:
def main():
    # Initialize variables
    w1, w2, w3, w4 = 0.1, 0.2, 0.3, 0.4
    w5, w6, w7, w8 = 0.5, 0.6, 0.7, 0.8
    b1, b2 = 0.25, 0.35 # Biases

    i1, i2 = 0.1, 0.5 # Inputs
    target_o1, target_o2 = 0.05, 0.95 # Target outputs
    learning_rate = 0.6

    # --- Forward Pass ---
    # Calculate hidden layer outputs
    output_h1, output_h2 = calculate_hidden_layer_output(i1, i2, w1, w2, w3, w4, b1)
    print(f"Hidden layer outputs: h1={output_h1:.4f}, h2={output_h2:.4f}")

    # Calculate final layer outputs
    output_o1, output_o2 = calculate_final_layer_output(output_h1, output_h2, w5, w6, w7, w8, b2)
    print(f"Final layer outputs: o1={output_o1:.4f}, o2={output_o2:.4f}")

    # --- Calculate Error ---
    E1 = evaluate_individual_error(target_o1, output_o1)
    E2 = evaluate_individual_error(target_o2, output_o2)
    E_total = evaluate_total_error(E1, E2)
    print(f"Total Error: E_total={E_total:.4f}")

    # --- Backward Pass ---
    # Calculate gradients for the output layer weights (w5, w6, w7, w8)
    dE_total_dw5, dE_total_dw6, dE_total_dw7, dE_total_dw8 = \
        calculate_output_layer_gradients(target_o1, target_o2, output_o1, output_o2,
                                         output_h1, output_h2)

    # Need these terms for hidden layer gradient calculation
    # Derivative of total error w.r.t. output of neuron o1
    dE_total_doutput_o1 = total_error_derivative(output_o1, target_o1)
    # Derivative of output of neuron o1 w.r.t. its net input
    doutput_o1_dsum_o1 = sigmoid_derivative(output_o1)

    # Derivative of total error w.r.t. output of neuron o2
    dE_total_doutput_o2 = total_error_derivative(output_o2, target_o2)
    # Derivative of output of neuron o2 w.r.t. its net input
    doutput_o2_dsum_o2 = sigmoid_derivative(output_o2)

    # Calculate gradients for the hidden layer weights (w1, w2, w3, w4)
    dE_total_dw1, dE_total_dw2, dE_total_dw3, dE_total_dw4 = \
        calculate_hidden_layer_gradients(i1, i2, output_h1, output_h2, w5, w6, w7, w8,
                                         dE_total_doutput_o1, doutput_o1_dsum_o1,
                                         dE_total_doutput_o2, doutput_o2_dsum_o2)

    # --- Update Weights ---
    w1, w2, w3, w4, w5, w6, w7, w8 = \
        update_weights(learning_rate, w1, w2, w3, w4, w5, w6, w7, w8,
                       dE_total_dw1, dE_total_dw2, dE_total_dw3, dE_total_dw4,
                       dE_total_dw5, dE_total_dw6, dE_total_dw7, dE_total_dw8)

    print("\nNew weights after update:")
    print(f"w1: {w1:.4f}")
    print(f"w2: {w2:.4f}")
    print(f"w3: {w3:.4f}")
    print(f"w4: {w4:.4f}")
    print(f"w5: {w5:.4f}")
    print(f"w6: {w6:.4f}")
    print(f"w7: {w7:.4f}")
    print(f"w8: {w8:.4f}")
    
    
       # --- Optional: Forward pass with new weights to see error reduction ---
    output_h1_new, output_h2_new = calculate_hidden_layer_output(i1, i2, w1, w2, w3, w4, b1)
    output_o1_new, output_o2_new = calculate_final_layer_output(output_h1_new, output_h2_new, w5, w6, w7, w8, b2)
    E1_new = evaluate_individual_error(target_o1, output_o1_new)
    E2_new = evaluate_individual_error(target_o2, output_o2_new)
    E_total_new = evaluate_total_error(E1_new, E2_new)
    print(f"\nError after one update: E_total_new={E_total_new:.4f}")

In [22]:
if __name__ == "__main__":
    main()

Hidden layer outputs: h1=0.6011, h2=0.6154
Final layer outputs: o1=0.7349, o2=0.7796
Total Error: E_total=0.2491

New weights after update:
w1: 0.0993
w2: 0.1992
w3: 0.2967
w4: 0.3960
w5: 0.4519
w6: 0.5507
w7: 0.7106
w8: 0.8108

Error after one update: E_total_new=0.2407
