In [None]:
import numpy as np
import pandas as pd

In [None]:
class NeuralNetwork:
    def __init__(self, layer_sizes, activation='relu', learning_rate=0.01):
        """
        Initialize the network.
        :param layer_sizes: List of layer sizes, e.g., [input_size, hidden1_size, ..., output_size]
        :param activation: 'relu' or 'sigmoid'
        :param learning_rate: Learning rate for gradient descent
        """
        self.layer_sizes = layer_sizes
        self.activation = activation
        self.learning_rate = learning_rate
        self.weights = []
        self.biases = []

        # Initialize weights and biases (He initialization for ReLU, Xavier for sigmoid)
        for i in range(len(layer_sizes) - 1):
            if activation == 'relu':
                scale = np.sqrt(2.0 / layer_sizes[i])
            else:  # sigmoid
                scale = np.sqrt(1.0 / layer_sizes[i])
            self.weights.append(np.random.randn(layer_sizes[i], layer_sizes[i+1]) * scale)
            self.biases.append(np.zeros((1, layer_sizes[i+1])))  # Row vector

    def _activate(self, x):
        """Apply activation function."""
        if self.activation == 'relu':
            return np.maximum(0, x)
        elif self.activation == 'sigmoid':
            return 1 / (1 + np.exp(-x))
        return x  # Linear (output layer)

    def _activate_derivative(self, x):
        """Derivative of activation function."""
        if self.activation == 'relu':
            return (x > 0).astype(float)
        elif self.activation == 'sigmoid':
            s = self._activate(x)
            return s * (1 - s)
        return np.ones_like(x)  # Linear (output layer)

    def forward(self, X):
        """Forward pass through all layers."""
        self.layer_outputs = [X]  # Store outputs of each layer (including input)
        for w, b in zip(self.weights, self.biases):
            X = np.dot(X, w) + b
            X = self._activate(X)
            self.layer_outputs.append(X)
        return X

    def backward(self, X, y_true):
        """Backpropagation to compute gradients."""
        m = X.shape[0]  # Number of samples
        y_pred = self.layer_outputs[-1]

        # Initialize gradients
        dW = [np.zeros_like(w) for w in self.weights]
        db = [np.zeros_like(b) for b in self.biases]

        # Output layer error (assuming MSE loss)
        error = (y_pred - y_true) / m  # dL/dZ for output layer
        if self.activation == 'sigmoid':
            error *= self._activate_derivative(self.layer_outputs[-1])  # Chain rule

        # Backpropagate through layers
        for l in range(len(self.weights)-1, -1, -1):
            # Gradient of weights/biases
            dW[l] = np.dot(self.layer_outputs[l].T, error)
            db[l] = np.sum(error, axis=0, keepdims=True)

            # Propagate error backward (skip for input layer)
            if l > 0:
                error = np.dot(error, self.weights[l].T) * self._activate_derivative(self.layer_outputs[l])

        return dW, db

    def update_weights(self, dW, db):
        """Update weights using gradients."""
        for i in range(len(self.weights)):
            self.weights[i] -= self.learning_rate * dW[i]
            self.biases[i] -= self.learning_rate * db[i]

    def train(self, X, y_true, epochs=1000):
        """Train the network."""
        for epoch in range(epochs):
            y_pred = self.forward(X)
            dW, db = self.backward(X, y_true)
            self.update_weights(dW, db)

            # Print loss (MSE) every 100 epochs
            if epoch % 100 == 0:
                loss = np.mean((y_pred - y_true)**2)
                print(f"Epoch {epoch}, Loss: {loss:.4f}")

In [None]:
# Example: XOR problem (2 inputs, 1 output)
X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
y = np.array([[0], [1], [1], [0]])

# Create network (2 inputs, 2 hidden layers of 4 neurons, 1 output)
nn = NeuralNetwork(layer_sizes=[2, 4, 4, 1], activation='relu', learning_rate=0.01)

# Train
nn.train(X, y, epochs=1000)

# Test
print("Predictions:", nn.forward(X))

Epoch 0, Loss: 0.6128
Epoch 100, Loss: 0.2299
Epoch 200, Loss: 0.1791
Epoch 300, Loss: 0.1443
Epoch 400, Loss: 0.1074
Epoch 500, Loss: 0.0760
Epoch 600, Loss: 0.0508
Epoch 700, Loss: 0.0322
Epoch 800, Loss: 0.0200
Epoch 900, Loss: 0.0151
Predictions: [[0.18066033]
 [0.89307042]
 [0.98364135]
 [0.02154583]]
