In [43]:
import math

In [44]:
def sigmoid(z):
    return 1 / (1 + math.exp(-z))

In [55]:
def log_loss(y,y_hat):
    eps = 1e-15
    y_hat = max(min(y_hat,1-eps),eps)
    return -(y*math.log(y_hat) + (1-y)*math.log(1 - y_hat))

In [46]:
# ---------- XOR dataset ----------
data = [
    (0,0,0),
    (0,1,1),
    (1,0,1),
    (1,1,0)
]

In [58]:
# ---------- Hyperparameters ----------
alpha = 0.1
epochs = 5000

In [48]:
# ---------- Initialize weights ----------

# Hidden layer
w11, w12, b1 = 0.5, -0.5, 0.0
w21, w22, b2 = -0.5, 0.5, 0.0

# Output neuron
w31, w32, b3 = 0.5, 0.5, 0.0

In [59]:
#---------- Training loop ----------
for epoch in range(epochs):

    total_loss = 0

    for x1, x2, y in data:

        # ===== FORWARD PASS =====
        z1 = w11 * x1 + w12 * x2 + b1
        a1 = sigmoid(z1)

        z2 = w21 * x1 + w22 * x2 + b2
        a2 = sigmoid(z2)

        z3 = w31 * a1 + w32 * a2 + b3
        y_hat = sigmoid(z3)

        loss = log_loss(y, y_hat)
        total_loss += loss

        # ===== BACKPROPAGATION =====
        dz3 = y_hat - y

        dw31 = dz3 * a1
        dw32 = dz3 * a2
        db3 = dz3

        dz1 = dz3 * w31 * a1 * (1 - a1)
        dz2 = dz3 * w32 * a2 * (1 - a2)

        dw11 = dz1 * x1
        dw12 = dz1 * x2
        db1 = dz1

        dw21 = dz2 * x1
        dw22 = dz2 * x2
        db2 = dz2

         # ===== GRADIENT DESCENT =====
        w31 -= alpha * dw31
        w32 -= alpha * dw32
        b3 -= alpha * db3

        w11 -= alpha * dw11
        w12 -= alpha * dw12
        b1 -= alpha * db1

        w21 -= alpha * dw21
        w22 -= alpha * dw22
        b2 -= alpha * db2

    # ===== MONITOR LEARNING =====
    if epoch % 500 == 0:
        print(f"Epoch {epoch:04d} | Loss: {total_loss:.4f}")

Epoch 0000 | Loss: 0.0099
Epoch 0500 | Loss: 0.0094
Epoch 1000 | Loss: 0.0089
Epoch 1500 | Loss: 0.0085
Epoch 2000 | Loss: 0.0081
Epoch 2500 | Loss: 0.0078
Epoch 3000 | Loss: 0.0075
Epoch 3500 | Loss: 0.0072
Epoch 4000 | Loss: 0.0069
Epoch 4500 | Loss: 0.0067


In [50]:
# ---------- Test trained network ----------
print("\nXOR Predictions After Training:")
for x1, x2, y in data:
    a1 = sigmoid(w11 * x1 + w12 * x2 + b1)
    a2 = sigmoid(w21 * x1 + w22 * x2 + b2)
    y_hat = sigmoid(w31 * a1 + w32 * a2 + b3)
    print(f"{x1} XOR {x2} → {y_hat:.3f}")


XOR Predictions After Training:
0 XOR 0 → 0.007
0 XOR 1 → 0.995
1 XOR 0 → 0.995
1 XOR 1 → 0.006
