In [1]:
import numpy as np

class MLP:
    def __init__(self, inputSize, hiddenSizes, outputSize):
        self.layers = [inputSize] + hiddenSizes + [outputSize]  #Note that hiddenSizes is a list (allowing for any number of hidden layers).
        
        #Initialize weights and biases for all layers.
        self.weights = []
        self.biases = []
        for i in range(len(self.layers) - 1):
            self.weights.append(np.random.randn(self.layers[i], self.layers[i+1]))  #Creates weight matrix connecting layer i to layer i+1.
            self.biases.append(np.zeros((1, self.layers[i+1])))                     #Creates bias vector for layer i+1.

    def ReLU(x):
        return np.maximum(0, x)
    
    def softmax(x):
        x = x - np.max(x, axis=1, keepdims=True)  # Ensures numerical stability.
        return np.exp(x) / np.exp(x).sum(axis=1, keepdims=True)

    def forward(self, X):
        a = X  #Activation of the current layer (initially equal to input layer).
        self.hiddenInputs = []   #Store pre-activation of hidden layers (useful for backpropagation).
        self.hiddenOutputs = []  #Store post-activation of hidden layers.
        
        #Forward pass through all hidden layers
        for i in range(len(self.layers) - 2):  #Excludes the input and output layer.
            z = np.dot(a, self.weights[i]) + self.biases[i]  # pre-activation  (z = Î˜a + bias)
            a = MLP.ReLU(z)                                  # post-activation (a = ReLU(z))
            self.hiddenInputs.append(z)
            self.hiddenOutputs.append(a)
        
        #Forward pass through output layer
        z = np.dot(a, self.weights[-1]) + self.biases[-1]
        output = MLP.softmax(z)
        
        return output