<a href="https://colab.research.google.com/github/Therushi14/paper-replications/blob/main/MLP_with_Backprop.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [11]:
import numpy as np

In [12]:
class Sigmoid:
    def forward(self, x): return 1.0 / (1.0 + np.exp(-x))
    def prime(self, x, out): return out * (1 - out)

class ReLU:
    def forward(self, x): return np.maximum(0, x)
    def prime(self, x, out): return (x > 0).astype(float)

class MSE:
    def loss(self, y_true, y_pred): return np.mean(0.5 * (y_pred - y_true)**2)
    def prime(self, y_true, y_pred): return (y_pred - y_true)

In [13]:
class NeuralNetwork:
    def __init__(self, layers, lr=0.1):
        self.layers = layers # e.g., [2, 3, 1]
        self.lr = lr
        self.params = {}
        self.acts = [ReLU() for _ in range(len(layers)-2)] + [Sigmoid()] # ReLU hidden, Sigmoid output
        self.loss_fn = MSE()

        # Initialize Weights (Xavier) and Biases
        for i in range(1, len(layers)):
            n_in, n_out = layers[i-1], layers[i]
            limit = np.sqrt(6 / (n_in + n_out))
            self.params[f'W{i}'] = np.random.uniform(-limit, limit, (n_out, n_in))
            self.params[f'b{i}'] = np.zeros((n_out, 1))

    def forward(self, X):
        cache = {'A0': X}
        for i in range(1, len(self.layers)):
            Z = self.params[f'W{i}'] @ cache[f'A{i-1}'] + self.params[f'b{i}']
            A = self.acts[i-1].forward(Z)
            cache[f'Z{i}'], cache[f'A{i}'] = Z, A
        return cache['A' + str(len(self.layers)-1)], cache

    def backward(self, cache, Y):
        L = len(self.layers) - 1
        m = Y.shape[1]
        gradients = {}

        # Calculate Output Layer Error
        A_final = cache[f'A{L}']
        dA = self.loss_fn.prime(Y, A_final)

        # Backpropagate
        for i in range(L, 0, -1):
            dZ = dA * self.acts[i-1].prime(cache[f'Z{i}'], cache[f'A{i}'])
            gradients[f'dW{i}'] = (dZ @ cache[f'A{i-1}'].T) / m
            gradients[f'db{i}'] = np.sum(dZ, axis=1, keepdims=True) / m
            if i > 1:
                dA = self.params[f'W{i}'].T @ dZ
        return gradients

    def update(self, grads):
        for i in range(1, len(self.layers)):
            self.params[f'W{i}'] -= self.lr * grads[f'dW{i}']
            self.params[f'b{i}'] -= self.lr * grads[f'db{i}']

    def train(self, X, Y, epochs=1000):
        print(f"Training on input shape: {X.shape}")
        for ep in range(epochs):
            Y_hat, cache = self.forward(X)
            loss = self.loss_fn.loss(Y, Y_hat)
            grads = self.backward(cache, Y)
            self.update(grads)
            if ep % (epochs // 10) == 0:
                print(f"Epoch {ep} | Loss: {loss:.5f}")

In [14]:
if __name__ == "__main__":
    np.random.seed(42)

    # XOR Dataset: Inputs (2, 4) and Labels (1, 4)
    X = np.array([[0, 0, 1, 1],
                  [0, 1, 0, 1]])
    Y = np.array([[0, 1, 1, 0]])

    # Create Network: 2 inputs -> 3 hidden neurons -> 1 output
    nn = NeuralNetwork(layers=[2, 3, 1], lr=0.5)

    # Train
    nn.train(X, Y, epochs=5000)

    # Test
    prediction, _ = nn.forward(X)
    print("\nFinal Predictions (Rounded):")
    print(np.round(prediction))

Training on input shape: (2, 4)
Epoch 0 | Loss: 0.13990
Epoch 500 | Loss: 0.01249
Epoch 1000 | Loss: 0.00316
Epoch 1500 | Loss: 0.00162
Epoch 2000 | Loss: 0.00105
Epoch 2500 | Loss: 0.00076
Epoch 3000 | Loss: 0.00060
Epoch 3500 | Loss: 0.00048
Epoch 4000 | Loss: 0.00041
Epoch 4500 | Loss: 0.00035

Final Predictions (Rounded):
[[0. 1. 1. 0.]]
