In [None]:
import numpy as np


def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_derivative(x):
    return x * (1 - x)

# XOR 
X = np.array([[0, 0],
              [0, 1],
              [1, 0],
              [1, 1]])

y = np.array([[0], [1], [1], [0]])


# Network architecture
np.random.seed(42)
layer_sizes = [2, 3, 3, 1]  # input=2, 2 hidden layers of 3, output=1
eta = 0.5
epochs = 100000

# Initialize weights and biases for each layer
W = [np.random.randn(layer_sizes[i], layer_sizes[i+1]) for i in range(len(layer_sizes)-1)]
b = [np.zeros((1, layer_sizes[i+1])) for i in range(len(layer_sizes)-1)]


for epoch in range(epochs):

    # ---- Forward pass ----
    a = [X]  # activations per layer
    for i in range(len(W)):
        z = np.dot(a[-1], W[i]) + b[i]
        a.append(sigmoid(z))
    
    # ---- Compute error ----
    error = y - a[-1]

    # ---- Backward pass ----
    deltas = [error * sigmoid_derivative(a[-1])]
    for i in reversed(range(len(W) - 1)):
        delta = np.dot(deltas[0], W[i+1].T) * sigmoid_derivative(a[i+1])
        deltas.insert(0, delta)

    # ---- Update weights and biases ----
    for i in range(len(W)):
        W[i] += eta * np.dot(a[i].T, deltas[i])
        b[i] += eta * np.sum(deltas[i], axis=0, keepdims=True)

    # ---- Monitor loss ----
    if epoch % 2000 == 0:
        loss = np.mean(np.square(error))
        print(f"Epoch {epoch}, Loss: {loss:.6f}")


print("\nFinal predictions:")
print(a[-1].round(3))


Epoch 0, Loss: 0.282525
Epoch 2000, Loss: 0.229151
Epoch 4000, Loss: 0.000676
Epoch 6000, Loss: 0.000272
Epoch 8000, Loss: 0.000167
Epoch 10000, Loss: 0.000120
Epoch 12000, Loss: 0.000093
Epoch 14000, Loss: 0.000076
Epoch 16000, Loss: 0.000064
Epoch 18000, Loss: 0.000056
Epoch 20000, Loss: 0.000049
Epoch 22000, Loss: 0.000044
Epoch 24000, Loss: 0.000039
Epoch 26000, Loss: 0.000036
Epoch 28000, Loss: 0.000033
Epoch 30000, Loss: 0.000030
Epoch 32000, Loss: 0.000028
Epoch 34000, Loss: 0.000026
Epoch 36000, Loss: 0.000025
Epoch 38000, Loss: 0.000023
Epoch 40000, Loss: 0.000022
Epoch 42000, Loss: 0.000021
Epoch 44000, Loss: 0.000020
Epoch 46000, Loss: 0.000019
Epoch 48000, Loss: 0.000018
Epoch 50000, Loss: 0.000017
Epoch 52000, Loss: 0.000017
Epoch 54000, Loss: 0.000016
Epoch 56000, Loss: 0.000015
Epoch 58000, Loss: 0.000015
Epoch 60000, Loss: 0.000014
Epoch 62000, Loss: 0.000014
Epoch 64000, Loss: 0.000013
Epoch 66000, Loss: 0.000013
Epoch 68000, Loss: 0.000012
Epoch 70000, Loss: 0.000012
