In [None]:
import numpy as np

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_derivative(y):
    return y * (1 - y)

X = np.array([[1, 1, 0, 1]])
y = np.array([[1]])
lr = 0.8
tolerance = 0.001

W1 = np.array([
    [ 0.3,  0.1],
    [-0.2,  0.4],
    [ 0.2, -0.3],
    [ 0.1,  0.4]
])
b1 = np.array([[0.2, 0.1]])

W2 = np.array([
    [-0.3],
    [ 0.2]
])
b2 = np.array([[-0.3]])

epoch = 0
first_epoch_printed = False

while True:
    epoch += 1

    z1 = np.dot(X, W1) + b1
    h = sigmoid(z1)
    z2 = np.dot(h, W2) + b2
    o = sigmoid(z2)

    error = y - o
    delta_out = error * sigmoid_derivative(o)
    delta_hidden = delta_out.dot(W2.T) * sigmoid_derivative(h)

    W2 += lr * h.T.dot(delta_out)
    b2 += lr * np.sum(delta_out, axis=0, keepdims=True)
    W1 += lr * X.T.dot(delta_hidden)
    b1 += lr * np.sum(delta_hidden, axis=0, keepdims=True)

    if not first_epoch_printed:
        print(f"\n--- First Epoch ({epoch}) ---")
        print("Output (O):", o)
        print("Error (D - O):", error)
        print("Updated W1:\n", W1)
        print("Updated b1:\n", b1)
        print("Updated W2:\n", W2)
        print("Updated b2:\n", b2)
        first_epoch_printed = True

    if abs(error[0][0]) < tolerance:
        print(f"\n--- Last Epoch (Converged at {epoch}) ---")
        print("Output (O):", o)
        print("Error (D - O):", error)
        print("Updated W1:\n", W1)
        print("Updated b1:\n", b1)
        print("Updated W2:\n", W2)
        print("Updated b2:\n", b2)
        break

    if epoch > 10000:
        print("\nReached maximum iterations without full convergence.")
        break



--- First Epoch (1) ---
Output (O): [[0.4174149]]
Error (D - O): [[0.5825851]]
Updated W1:
 [[ 0.29183078  0.10445673]
 [-0.20816922  0.40445673]
 [ 0.2        -0.3       ]
 [ 0.09183078  0.40445673]]
Updated b1:
 [[0.19183078 0.10445673]]
Updated W2:
 [[-0.23214576]
 [ 0.28285693]]
Updated b2:
 [[-0.1866617]]

Reached maximum iterations without full convergence.
