In [1]:
import numpy as np

Below is an example of a Tiny Network being trained solely using NumPy

In [7]:
# --- Activation and derivative ---
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

def d_sigmoid(a):
    return a * (1 - a) # derivative given a = sigmoid(z)

# --- Data (XOR problem) ---
X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
y = np.array([[0], [1], [1], [0]]) # XOR outputs

# --- Initialize weights ---
np.random.seed(42)
W1 = np.random.randn(2, 2)
b1 = np.zeros((1, 2))
W2 = np.random.randn(2, 1)
b2 = np.zeros((1, 1))

# --- Training ---
lr = 0.1
for epoch in range(10000):
    # Forward pass
    z1 = X @ W1 + b1
    a1 = sigmoid(z1)
    z2 = a1 @ W2 + b2
    a2 = sigmoid(z2) # predictions

    # Loss (MSE)
    loss = np.mean((a2 - y)**2)

    # Backpropagation
    d_a2 = 2 * (a2 - y) / y.shape[0] # dL/da2
    d_z2 = d_a2 * d_sigmoid(a2) # dL/dz2
    d_W2 = a1.T @ d_z2 # dL/dW2
    d_b2 = np.sum(d_z2, axis=0, keepdims=True)

    d_a1 = d_z2 @ W2.T # dL/da1
    d_z1 = d_a1 * d_sigmoid(a1) # dL/dz1
    d_W1 = X.T @ d_z1 # dL/dW1
    d_b1 = np.sum(d_z1, axis=0, keepdims=True)

    # Gradient descent update
    W1 -= lr * d_W1
    b1 -= lr * d_b1
    W2 -= lr * d_W2
    b2 -= lr * d_b2

    if epoch % 1000 == 0:
        print(f"Epoch {epoch}, Loss: {loss:.4f}")

# Final predictions
print("Predictions:", np.round(a2, 3))

Epoch 0, Loss: 0.2558
Epoch 1000, Loss: 0.2498
Epoch 2000, Loss: 0.2494
Epoch 3000, Loss: 0.2484
Epoch 4000, Loss: 0.2454
Epoch 5000, Loss: 0.2342
Epoch 6000, Loss: 0.2046
Epoch 7000, Loss: 0.1720
Epoch 8000, Loss: 0.1532
Epoch 9000, Loss: 0.1438
Predictions: [[0.124]
 [0.481]
 [0.891]
 [0.508]]
