In [5]:
import numpy as np

x1 = 0.10
x2 = 0.20

w1, w2 = 0.20, 0.25
w3, w4 = 0.30, 0.35

w5, w6 = 0.45, 0.50
w7, w8 = 0.55, 0.60

b1 = 0.40
b2 = 0.65

T1 = 0.05
T2 = 0.95

lr = 0.3

def sigmoid(z):
    return 1.0 / (1.0 + np.exp(-z))

def sigmoid_derivative_from_activation(a):
    return a * (1.0 - a)

def forward_pass(x1, x2, w1, w2, w3, w4, w5, w6, w7, w8, b1, b2):
    H1_net = x1 * w1 + x2 * w2 + b1
    H2_net = x1 * w3 + x2 * w4 + b1

    H1 = sigmoid(H1_net)
    H2 = sigmoid(H2_net)

    y1_net = H1 * w5 + H2 * w6 + b2
    y2_net = H1 * w7 + H2 * w8 + b2

    y1 = sigmoid(y1_net)
    y2 = sigmoid(y2_net)

    return {
        "H1_net": H1_net, "H2_net": H2_net,
        "H1": H1, "H2": H2,
        "y1_net": y1_net, "y2_net": y2_net,
        "y1": y1, "y2": y2
    }

print("\n=== FORWARD PASS (before weight update) ===")
print(f"Input values: x1 = {x1}, x2 = {x2}")
print(f"Target values: T1 = {T1}, T2 = {T2}")
print(f"Learning rate: {lr}\n")

out = forward_pass(x1, x2, w1, w2, w3, w4, w5, w6, w7, w8, b1, b2)

print(f"H1_net = {out['H1_net']:.7f} => H1 = sigmoid(H1_net) = {out['H1']:.9f}")
print(f"H2_net = {out['H2_net']:.7f} => H2 = sigmoid(H2_net) = {out['H2']:.9f}")
print(f"y1_net = {out['y1_net']:.9f} => y1 = sigmoid(y1_net) = {out['y1']:.9f}")
print(f"y2_net = {out['y2_net']:.9f} => y2 = sigmoid(y2_net) = {out['y2']:.9f}")

E1 = 0.5 * (T1 - out['y1'])**2
E2 = 0.5 * (T2 - out['y2'])**2
E_total = E1 + E2
print(f"\nE1 = 0.5*(T1 - y1)^2 = {E1:.9f}")
print(f"E2 = 0.5*(T2 - y2)^2 = {E2:.9f}")
print(f"Total error E_total = E1 + E2 = {E_total:.9f}")

print("\n=== BACKPROP: output layer ===")

dE_dy1 = -(T1 - out['y1'])
dy1_dnet = sigmoid_derivative_from_activation(out['y1'])
delta1 = dE_dy1 * dy1_dnet

dE_dy2 = -(T2 - out['y2'])
dy2_dnet = sigmoid_derivative_from_activation(out['y2'])
delta2 = dE_dy2 * dy2_dnet

print(f"dE/dy1 = {dE_dy1:.9f}, dy1/dnet = {dy1_dnet:.9f}, => delta1 = {delta1:.9f}")
print(f"dE/dy2 = {dE_dy2:.9f}, dy2/dnet = {dy2_dnet:.9f}, => delta2 = {delta2:.9f}")

dw5 = delta1 * out['H1']
dw6 = delta1 * out['H2']
dw7 = delta2 * out['H1']
dw8 = delta2 * out['H2']

print("\nGradients for hidden->output weights:")
print(f"dw5 (for w5) = delta1 * H1 = {dw5:.9f}")
print(f"dw6 (for w6) = delta1 * H2 = {dw6:.9f}")
print(f"dw7 (for w7) = delta2 * H1 = {dw7:.9f}")
print(f"dw8 (for w8) = delta2 * H2 = {dw8:.9f}")

w5_new = w5 - lr * dw5
w6_new = w6 - lr * dw6
w7_new = w7 - lr * dw7
w8_new = w8 - lr * dw8

print("\nUpdated hidden->output weights (one step):")
print(f"w5: {w5:.9f} -> {w5_new:.9f}")
print(f"w6: {w6:.9f} -> {w6_new:.9f}")
print(f"w7: {w7:.9f} -> {w7_new:.9f}")
print(f"w8: {w8:.9f} -> {w8_new:.9f}")

print("\n=== BACKPROP: hidden layer ===")

delta_H1 = (delta1 * w5 + delta2 * w7) * sigmoid_derivative_from_activation(out['H1'])
delta_H2 = (delta1 * w6 + delta2 * w8) * sigmoid_derivative_from_activation(out['H2'])

print(f"delta_H1 = (delta1*w5 + delta2*w7) * sigmoid'(H1) = {delta_H1:.12f}")
print(f"delta_H2 = (delta1*w6 + delta2*w8) * sigmoid'(H2) = {delta_H2:.12f}")

dw1 = delta_H1 * x1
dw2 = delta_H1 * x2
dw3 = delta_H2 * x1
dw4 = delta_H2 * x2

print("\nGradients for input->hidden weights:")
print(f"dw1 (for w1) = delta_H1 * x1 = {dw1:.12f}")
print(f"dw2 (for w2) = delta_H1 * x2 = {dw2:.12f}")
print(f"dw3 (for w3) = delta_H2 * x1 = {dw3:.12f}")
print(f"dw4 (for w4) = delta_H2 * x2 = {dw4:.12f}")

w1_new = w1 - lr * dw1
w2_new = w2 - lr * dw2
w3_new = w3 - lr * dw3
w4_new = w4 - lr * dw4

print("\nUpdated input->hidden weights (one step):")
print(f"w1: {w1:.9f} -> {w1_new:.9f}")
print(f"w2: {w2:.9f} -> {w2_new:.9f}")
print(f"w3: {w3:.9f} -> {w3_new:.9f}")
print(f"w4: {w4:.9f} -> {w4_new:.9f}")

b2_new = b2 - lr * (delta1 + delta2)
b1_new = b1 - lr * (delta_H1 + delta_H2)

print(f"\nUpdated biases:")
print(f"b1: {b1:.9f} -> {b1_new:.9f}")
print(f"b2: {b2:.9f} -> {b2_new:.9f}")

print("\n=== FORWARD PASS (after weight update) ===")
out_after = forward_pass(x1, x2,
                         w1_new, w2_new, w3_new, w4_new,
                         w5_new, w6_new, w7_new, w8_new,
                         b1_new, b2_new)

print(f"H1 (after) = {out_after['H1']:.9f}")
print(f"H2 (after) = {out_after['H2']:.9f}")
print(f"y1 (after) = {out_after['y1']:.9f}")
print(f"y2 (after) = {out_after['y2']:.9f}")

E1_after = 0.5 * (T1 - out_after['y1'])**2
E2_after = 0.5 * (T2 - out_after['y2'])**2
E_total_after = E1_after + E2_after

print(f"\nE1 (before) = {E1:.9f}, E1 (after) = {E1_after:.9f}")
print(f"E2 (before) = {E2:.9f}, E2 (after) = {E2_after:.9f}")
print(f"\nE_total (before) = {E_total:.9f}")
print(f"E_total (after)  = {E_total_after:.9f}")
print(f"Error reduction  = {E_total - E_total_after:.9f}")


=== FORWARD PASS (before weight update) ===
Input values: x1 = 0.1, x2 = 0.2
Target values: T1 = 0.05, T2 = 0.95
Learning rate: 0.3

H1_net = 0.4700000 => H1 = sigmoid(H1_net) = 0.615383756
H2_net = 0.5000000 => H2 = sigmoid(H2_net) = 0.622459331
y1_net = 1.238152356 => y1 = sigmoid(y1_net) = 0.775242241
y2_net = 1.361936665 => y2 = sigmoid(y2_net) = 0.796074276

E1 = 0.5*(T1 - y1)^2 = 0.262988154
E2 = 0.5*(T2 - y2)^2 = 0.011846564
Total error E_total = E1 + E2 = 0.274834719

=== BACKPROP: output layer ===
dE/dy1 = 0.725242241, dy1/dnet = 0.174241709, => delta1 = 0.126367447
dE/dy2 = -0.153925724, dy2/dnet = 0.162340023, => delta2 = -0.024988306

Gradients for hidden->output weights:
dw5 (for w5) = delta1 * H1 = 0.077764474
dw6 (for w6) = delta1 * H2 = 0.078658597
dw7 (for w7) = delta2 * H1 = -0.015377397
dw8 (for w8) = delta2 * H2 = -0.015554204

Updated hidden->output weights (one step):
w5: 0.450000000 -> 0.426670658
w6: 0.500000000 -> 0.476402421
w7: 0.550000000 -> 0.554613219
w8: