In [4]:
import numpy as np

X = np.array([[0,0],
              [0,1],
              [1,0],
              [1,1]])
y = np.array([[0], [1], [1], [0]])   

np.random.seed(42)
input_size = 2
hidden_size = 2
output_size = 1

W1 = np.random.randn(input_size, hidden_size)
b1 = np.zeros((1, hidden_size))
W2 = np.random.randn(hidden_size, output_size)
b2 = np.zeros((1, output_size))

def sigmoid(x):
    return 1 / (1 + np.exp(-x))
def sigmoid_derivative(x):
    return x * (1 - x)

lr = 0.08
epochs = 10000
losses = []

for epoch in range(epochs):
    z1 = np.dot(X, W1) + b1
    a1 = sigmoid(z1)

    z2 = np.dot(a1, W2) + b2
    y_pred = sigmoid(z2)

    loss = np.mean((y - y_pred) ** 2)
    losses.append(loss)

    dL_dy = (y_pred - y)
    dy_dz2 = sigmoid_derivative(y_pred)
    dz2_dW2 = a1
    dL_dz2 = dL_dy * dy_dz2
    dW2 = np.dot(a1.T, dL_dz2)
    db2 = np.sum(dL_dz2, axis=0, keepdims=True)
    dL_da1 = np.dot(dL_dz2, W2.T)
    da1_dz1 = sigmoid_derivative(a1)
    dL_dz1 = dL_da1 * da1_dz1
    dW1 = np.dot(X.T, dL_dz1)
    db1 = np.sum(dL_dz1, axis=0, keepdims=True)

    W1 -= lr * dW1
    b1 -= lr * db1
    W2 -= lr * dW2
    b2 -= lr * db2

    if epoch % 1000 == 0:
        print(f"Epoch {epoch:5d} | Loss: {loss:.6f}")

print("\nFinal Predictions:")
print(y_pred.round(3))

print("\nRounded Output (Binary):")
print(np.round(y_pred))

Epoch     0 | Loss: 0.255830
Epoch  1000 | Loss: 0.249607
Epoch  2000 | Loss: 0.248099
Epoch  3000 | Loss: 0.237724
Epoch  4000 | Loss: 0.190351
Epoch  5000 | Loss: 0.153196
Epoch  6000 | Loss: 0.140404
Epoch  7000 | Loss: 0.135142
Epoch  8000 | Loss: 0.132430
Epoch  9000 | Loss: 0.130812

Final Predictions:
[[0.066]
 [0.493]
 [0.939]
 [0.504]]

Rounded Output (Binary):
[[0.]
 [0.]
 [1.]
 [1.]]
