In [None]:
import numpy as np


# Neural Network with Sigmoid

In [None]:
import numpy as np

# Activation functions and their derivatives
def sigmoid(Z):
    return 1 / (1 + np.exp(-Z))

def sigmoid_derivative(A):
    return A * (1 - A)

def Relu(Z):
    return np.maximum(0, Z)

def Relu_derivative(Z):
    return (Z > 0).astype(float)

class MultiLayerPerception:
    def __init__(self, input_dim, hidden_dim, output_dim, learning_rate=0.01):
        self.learning_rate = learning_rate
        
        # Initialize weights and biases
        self.W1 = np.random.randn(input_dim, hidden_dim) * np.sqrt(2. / input_dim)  # He initialization
        self.b1 = np.zeros((1, hidden_dim))
        
        self.W2 = np.random.randn(hidden_dim, output_dim) * np.sqrt(2. / hidden_dim)  # He initialization
        self.b2 = np.zeros((1, output_dim))
    
    def forward(self, X):
        self.X = X  # Shape: (n_samples, input_dim)
        self.Z1 = np.dot(self.X, self.W1) + self.b1  # Shape: (n_samples, hidden_dim)
        self.A1 = Relu(self.Z1)  # Shape: (n_samples, hidden_dim)
        
        self.Z2 = np.dot(self.A1, self.W2) + self.b2  # Shape: (n_samples, output_dim)
        self.A2 = sigmoid(self.Z2)  # Shape: (n_samples, output_dim)
        return self.A2
    
    def compute_loss(self, Y):
        """
        Compute Binary Cross-Entropy loss
        Y: true labels, shape (n_samples, output_dim)
        """
        m = Y.shape[0]
        epsilon = 1e-15  # To prevent log(0)
        # Binary Cross-Entropy Loss
        self.loss = - (1 / m) * np.sum(Y * np.log(self.A2 + epsilon) + (1 - Y) * np.log(1 - self.A2 + epsilon))
        return self.loss
    
    def backward(self, Y):
        """
        Perform backpropagation and compute gradients.
        Y: true labels, shape (n_samples, output_dim)
        """
        n = Y.shape[0]
        
        # Step 1: Compute derivative of loss w.r.t A2
        # For Binary Cross-Entropy and sigmoid activation: dL/dA2 = -(Y/A2) + (1-Y)/(1-A2)
        # However, combining with sigmoid derivative simplifies it to dL/dZ2 = A2 - Y
        dldA2 = self.A2 - Y  # Shape: (n_samples, output_dim)
        
        # Step 2: Compute gradients for W2 and b2
        dldW2 = np.dot(self.A1.T, dldA2) / n  # Shape: (hidden_dim, output_dim)
        dldb2 = np.sum(dldA2, axis=0, keepdims=True) / n  # Shape: (1, output_dim)
        
        # Step 3: Compute derivative of loss w.r.t A1
        dldA1 = np.dot(dldA2, self.W2.T)  # Shape: (n_samples, hidden_dim)
        
        # Step 4: Compute derivative of loss w.r.t Z1
        dldZ1 = dldA1 * Relu_derivative(self.Z1)  # Shape: (n_samples, hidden_dim)
        
        # Step 5: Compute gradients for W1 and b1
        dldW1 = np.dot(self.X.T, dldZ1) / n  # Shape: (input_dim, hidden_dim)
        dldb1 = np.sum(dldZ1, axis=0, keepdims=True) / n  # Shape: (1, hidden_dim)
        
        # Step 6: Store gradients
        self.dW2 = dldW2
        self.db2 = dldb2
        self.dW1 = dldW1
        self.db1 = dldb1
    
    def update_parameters(self):
        """
        Update parameters using gradient descent
        """
        self.W1 -= self.learning_rate * self.dW1
        self.b1 -= self.learning_rate * self.db1
        self.W2 -= self.learning_rate * self.dW2
        self.b2 -= self.learning_rate * self.db2
    
    def train(self, X, Y, epochs=1000, print_loss=False):
        """
        Train the MLP on the given data
        """
        for epoch in range(epochs):
            # Forward pass
            A2 = self.forward(X)
            
            # Compute loss
            loss = self.compute_loss(Y)
            
            # Backward pass
            self.backward(Y)
            
            # Update parameters
            self.update_parameters()
            
            if print_loss and epoch % 100 == 0:
                print(f"Epoch {epoch}, Loss: {loss:.4f}")
    
    def predict(self, X, threshold=0.5):
        """
        Make predictions with the trained model
        """
        A2 = self.forward(X)
        return (A2 > threshold).astype(int)

# Example usage:
if __name__ == "__main__":
    # Sample data for binary classification
    np.random.seed(42)
    X_train = np.random.randn(100, 2)
    Y_train = (X_train[:, 0] + X_train[:, 1] > 0).astype(int).reshape(-1, 1)
    
    # Initialize and train the MLP
    mlp = MultiLayerPerception(input_dim=2, hidden_dim=4, output_dim=1, learning_rate=0.1)
    mlp.train(X_train, Y_train, epochs=1000, print_loss=True)
    
    # Make predictions
    predictions = mlp.predict(X_train)
    accuracy = np.mean(predictions == Y_train)
    print(f"Training Accuracy: {accuracy * 100:.2f}%")


Epoch 0, Loss: 1.2973
Epoch 100, Loss: 0.1985
Epoch 200, Loss: 0.1177
Epoch 300, Loss: 0.0847
Epoch 400, Loss: 0.0657
Epoch 500, Loss: 0.0541
Epoch 600, Loss: 0.0471
Epoch 700, Loss: 0.0423
Epoch 800, Loss: 0.0387
Epoch 900, Loss: 0.0359
Training Accuracy: 100.00%


# Neural Network with Softmax

In [None]:
import numpy as np

# Activation functions and their derivatives
def sigmoid(Z):
    return 1 / (1 + np.exp(-Z))

def sigmoid_derivative(A):
    return A * (1 - A)

def relu(Z):
    return np.maximum(0, Z)

def relu_derivative(Z):
    return (Z > 0).astype(float)

def softmax(Z):
    # For numerical stability, subtract the max from each row
    exp_Z = np.exp(Z - np.max(Z, axis=1, keepdims=True))
    return exp_Z / np.sum(exp_Z, axis=1, keepdims=True)

class MultiLayerPerceptron:
    def __init__(self, input_dim, hidden_dim, output_dim, learning_rate=0.01):
        self.learning_rate = learning_rate
        
        # Initialize weights and biases
        self.W1 = np.random.randn(input_dim, hidden_dim) * np.sqrt(2. / input_dim)  # He initialization
        self.b1 = np.zeros((1, hidden_dim))
        
        self.W2 = np.random.randn(hidden_dim, output_dim) * np.sqrt(2. / hidden_dim)  # He initialization
        self.b2 = np.zeros((1, output_dim))
    
    def forward(self, X):
        self.X = X  # Shape: (m, input_dim)
        self.Z1 = np.dot(self.X, self.W1) + self.b1  # Shape: (m, hidden_dim)
        self.A1 = relu(self.Z1)  # Shape: (m, hidden_dim)
        
        self.Z2 = np.dot(self.A1, self.W2) + self.b2  # Shape: (m, output_dim)
        self.A2 = softmax(self.Z2)  # Shape: (m, output_dim)
        return self.A2
    
    def compute_loss(self, Y):
        """
        Compute Categorical Cross-Entropy loss
        Y: true labels, shape (m, output_dim) - one-hot encoded
        """
        m = Y.shape[0]
        epsilon = 1e-15  # To prevent log(0)
        # Categorical Cross-Entropy Loss
        self.loss = - (1 / m) * np.sum(Y * np.log(self.A2 + epsilon))
        return self.loss
    
    def backward(self, Y):
        """
        Perform backpropagation and compute gradients.
        Y: true labels, shape (m, output_dim) - one-hot encoded
        """
        n = Y.shape[0]
        
        # Step 1: Compute derivative of loss w.r.t Z2
        # For Categorical Cross-Entropy and softmax activation: dL/dZ2 = A2 - Y
        dldZ2 = self.A2 - Y  # Shape: (m, output_dim)
        
        # Step 2: Compute gradients for W2 and b2
        dldW2 = np.dot(self.A1.T, dldZ2) / n  # Shape: (hidden_dim, output_dim)
        dldb2 = np.sum(dldZ2, axis=0, keepdims=True) / n  # Shape: (1, output_dim)
        
        # Step 3: Compute derivative of loss w.r.t A1
        dldA1 = np.dot(dldZ2, self.W2.T)  # Shape: (m, hidden_dim)
        
        # Step 4: Compute derivative of loss w.r.t Z1
        dldZ1 = dldA1 * relu_derivative(self.Z1)  # Shape: (m, hidden_dim)
        
        # Step 5: Compute gradients for W1 and b1
        dldW1 = np.dot(self.X.T, dldZ1) / n  # Shape: (input_dim, hidden_dim)
        dldb1 = np.sum(dldZ1, axis=0, keepdims=True) / n  # Shape: (1, hidden_dim)
        
        # Step 6: Store gradients
        self.dldW2 = dldW2
        self.dldb2 = dldb2
        self.dldW1 = dldW1
        self.dldb1 = dldb1
    
    def update_parameters(self):
        """
        Update parameters using gradient descent
        """
        self.W1 -= self.learning_rate * self.dldW2
        self.b1 -= self.learning_rate * self.dldb2
        self.W2 -= self.learning_rate * self.dldW1
        self.b2 -= self.learning_rate * self.dldb1
    
    def train(self, X, Y, epochs=1000, print_loss=False):
        """
        Train the MLP on the given data
        """
        for epoch in range(epochs):
            # Forward pass
            A2 = self.forward(X)
            
            # Compute loss
            loss = self.compute_loss(Y)
            
            # Backward pass
            self.backward(Y)
            
            # Update parameters
            self.update_parameters()
            
            if print_loss and epoch % 100 == 0:
                print(f"Epoch {epoch}, Loss: {loss:.4f}")
    
    def predict(self, X):
        """
        Make predictions with the trained model
        """
        A2 = self.forward(X)
        return np.argmax(A2, axis=1)  # Returns class indices
    
    def accuracy(self, X, Y_true):
        """
        Compute accuracy of the model
        Y_true: true labels, shape (m,) - integer encoded
        """
        Y_pred = self.predict(X)
        return
