In [7]:
# 1. Activations and Derivatives

In [23]:
import numpy as np

def relu(x):
    return np.maximum(0, x)

def relu_derivative(x):
    return np.where(x > 0, 1.0, 0.0)

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_derivative(x):
    s = sigmoid(x)
    return s * (1 - s)

In [8]:
# 2. Loss Function

In [24]:
def binary_cross_entropy(y_true, y_pred):
    epsilon = 1e-8
    return -np.mean(y_true * np.log(y_pred + epsilon) + (1 - y_true) * np.log(1 - y_pred + epsilon))

In [9]:
# 3. Forward Pass

In [25]:
def forward(W1, W2, W3, b1, b2, b3, X):
    Z1 = np.dot(W1, X) + b1
    A1 = relu(Z1)

    Z2 = np.dot(W2, A1) + b2
    A2 = relu(Z2)

    Z3 = np.dot(W3, A2) + b3
    A3 = sigmoid(Z3)

    return A1, A2, A3, Z1, Z2, Z3

In [11]:
# 4. Backward Pass

In [26]:
def back_prop(X, Y, Z1, A1, Z2, A2, Z3, A3, W2, W3):
    m = X.shape[1]

    dZ3 = A3 - Y
    dW3 = np.dot(dZ3, A2.T) / m
    db3 = np.sum(dZ3) / m

    dZ2 = np.dot(W3.T, dZ3) * relu_derivative(Z2)
    dW2 = np.dot(dZ2, A1.T) / m
    db2 = np.sum(dZ2) / m

    dZ1 = np.dot(W2.T, dZ2) * relu_derivative(Z1)
    dW1 = np.dot(dZ1, X.T) / m
    db1 = np.sum(dZ1) / m

    return dW1, db1, dW2, db2, dW3, db3

In [13]:
# 5. Training Loop

In [27]:
def train(X, Y, epochs=1000, learning_rate=0.01):
    np.random.seed(0)
    W1 = np.random.randn(4, X.shape[0])
    b1 = np.random.randn(4, 1)
    W2 = np.random.randn(3, 4)
    b2 = np.random.randn(3, 1)
    W3 = np.random.randn(1, 3)
    b3 = np.random.randn(1, 1)

    for epoch in range(epochs):
        A1, A2, A3, Z1, Z2, Z3 = forward(W1, W2, W3, b1, b2, b3, X)
        loss = binary_cross_entropy(Y, A3)

        dW1, db1, dW2, db2, dW3, db3 = back_prop(X, Y, Z1, A1, Z2, A2, Z3, A3, W2, W3)

        W1 -= learning_rate * dW1
        b1 -= learning_rate * db1
        W2 -= learning_rate * dW2
        b2 -= learning_rate * db2
        W3 -= learning_rate * dW3
        b3 -= learning_rate * db3

        if epoch % 100 == 0 or epoch == epochs - 1:
            print(f"Epoch {epoch}, Loss: {loss:.4f}")

    return W1, b1, W2, b2, W3, b3

In [15]:
# 6. Main Script

In [29]:
X = np.random.randn(3, 1)
Y = np.array([[1]])

trained_params = train(X, Y)

Epoch 0, Loss: 6.1254
Epoch 100, Loss: 0.0381
Epoch 200, Loss: 0.0148
Epoch 300, Loss: 0.0085
Epoch 400, Loss: 0.0058
Epoch 500, Loss: 0.0043
Epoch 600, Loss: 0.0034
Epoch 700, Loss: 0.0028
Epoch 800, Loss: 0.0023
Epoch 900, Loss: 0.0020
Epoch 999, Loss: 0.0017
