## Mini Deep Neural Network to learn XOR (from scratch)

This notebook implements a Deep Neural Network to learn the XOR - function with Neural Networks from scratch. It only requires numpy.

In [6]:
import numpy as np

## Specify input

In [7]:
# XOR input (X) and output (y)
X = np.array([[0, 0],
              [0, 1],
              [1, 0],
              [1, 1]])

y = np.array([[0], [1], [1], [0]])

# Sigmoid activation and derivative
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

def sigmoid_derivative(a):
    return a * (1 - a)

# Initialize parameters
np.random.seed(42)

# Number of neurons per layer
input_size = 2
hidden_size = 4
output_size = 1

lr = 0.1  # Learning rate

# Initialize weights and biases
W1 = np.random.randn(input_size, hidden_size)
b1 = np.zeros((1, hidden_size))
W2 = np.random.randn(hidden_size, output_size)
b2 = np.zeros((1, output_size))

## Training loop

In [None]:
# Training loop
for epoch in range(10000):
    
    # --- Feedforward pass ---
    z1 = X @ W1 + b1
    a1 = sigmoid(z1)

    z2 = a1 @ W2 + b2
    y_hat = sigmoid(z2)  # prediction y_hat

    # --- Compute loss (mean squared error) ---
    loss = np.mean((y - y_hat) ** 2)

    # --- Backpropagation ---
    d_loss_y_hat = -(y - y_hat)
    d_y_hat_z2 = sigmoid_derivative(y_hat)
    d_z2_W2 = a1

    dW2 = d_z2_W2.T @ (d_loss_y_hat * d_y_hat_z2)
    db2 = np.sum(d_loss_y_hat * d_y_hat_z2, axis=0, keepdims=True)

    d_z2_a1 = W2
    d_a1_z1 = sigmoid_derivative(a1)

    dW1 = X.T @ ((d_loss_y_hat * d_y_hat_z2) @ d_z2_a1.T * d_a1_z1)
    db1 = np.sum((d_loss_y_hat * d_y_hat_z2) @ d_z2_a1.T * d_a1_z1, axis=0, keepdims=True)

    # --- Gradient descent update ---
    W2 -= lr * dW2
    b2 -= lr * db2
    W1 -= lr * dW1
    b1 -= lr * db1

    if epoch % 1000 == 0:
        print(f"Epoch {epoch}, Loss: {loss:.5f}")

Epoch 0, Loss: 0.2832
Epoch 1000, Loss: 0.2452
Epoch 2000, Loss: 0.2124
Epoch 3000, Loss: 0.1503
Epoch 4000, Loss: 0.0572
Epoch 5000, Loss: 0.0209
Epoch 6000, Loss: 0.0107
Epoch 7000, Loss: 0.0067
Epoch 8000, Loss: 0.0047
Epoch 9000, Loss: 0.0035


## Prediction

In [9]:
# Final predictions
print("\nPredictions after training:")
print(np.round(a2, 3))


Predictions after training:
[[0.037]
 [0.949]
 [0.945]
 [0.064]]
