In [1]:
import numpy as np

def init_params(nx, nh, ny):
    """
    Initializes the weights for an MLP with:
    - nx: number of input neurons
    - nh: number of hidden neurons
    - ny: number of output neurons
    
    All weights are initialized following a normal distribution with mean=0 and std=0.3.
    
    Returns:
        A dictionary containing the initialized weights and biases:
        - W1: weights for the input to hidden layer
        - b1: biases for the hidden layer
        - W2: weights for the hidden to output layer
        - b2: biases for the output layer
    """
    
    # Initialize weights and biases
    W1 = np.random.normal(0, 0.3, (nh, nx))  # Weights from input to hidden layer
    b1 = np.zeros((nh, 1))  # Biases for hidden layer
    
    W2 = np.random.normal(0, 0.3, (ny, nh))  # Weights from hidden to output layer
    b2 = np.zeros((ny, 1))  # Biases for output layer
    
    # Store the parameters in a dictionary
    params = {
        'W1': W1,
        'b1': b1,
        'W2': W2,
        'b2': b2
    }
    
    return params


In [2]:
import numpy as np

def forward(params, X):
    """
    Perform a forward pass through the MLP.

    Arguments:
    - params: dictionary containing the parameters ('W1', 'b1', 'W2', 'b2')
    - X: input data of shape (n_batch, nx), where n_batch is the batch size and nx is the number of input neurons.

    Returns:
    - Y_hat: output predictions (after softmax), of shape (n_batch, ny)
    - cache: dictionary containing intermediate values for backpropagation (Z1, A1, Z2, A2)
    """
    
    # Retrieve parameters from the dictionary
    W1 = params['W1']
    b1 = params['b1']
    W2 = params['W2']
    b2 = params['b2']
    
    # Forward pass for the hidden layer
    Z1 = np.dot(X, W1.T) + b1.T  # Z1 = X * W1^T + b1
    A1 = np.tanh(Z1)  # A1 = tanh(Z1) for the hidden layer activation
    
    # Forward pass for the output layer
    Z2 = np.dot(A1, W2.T) + b2.T  # Z2 = A1 * W2^T + b2
    A2 = np.exp(Z2) / np.sum(np.exp(Z2), axis=1, keepdims=True)  # A2 = softmax(Z2)
    
    # Store intermediate values in cache for backpropagation
    cache = {
        'Z1': Z1,
        'A1': A1,
        'Z2': Z2,
        'A2': A2
    }
    
    # The output of the network is A2 (the softmax output)
    Y_hat = A2
    
    return Y_hat, cache


In [3]:
import numpy as np

def loss_accuracy(Y_hat, Y):
    """
    Compute the loss (cross-entropy) and accuracy of the model.

    Arguments:
    - Y_hat: predicted values (after softmax), of shape (n_batch, ny)
    - Y: true labels, of shape (n_batch, ny), one-hot encoded for multi-class classification.

    Returns:
    - loss: the computed cross-entropy loss.
    - accuracy: the computed accuracy as a percentage of correct predictions.
    """
    
    # Number of examples in the batch
    n_batch = Y.shape[0]
    
    # Cross-entropy loss
    # We use np.log to compute the log of predicted probabilities
    loss = -np.sum(Y * np.log(Y_hat)) / n_batch
    
    # Accuracy calculation
    # We compare the predicted class (the class with the highest probability) with the true class
    predictions = np.argmax(Y_hat, axis=1)  # Get the index of the class with the highest probability
    true_labels = np.argmax(Y, axis=1)  # Get the index of the true class
    
    # Calculate the number of correct predictions
    correct_predictions = np.sum(predictions == true_labels)
    
    # Accuracy as the percentage of correct predictions
    accuracy = correct_predictions / n_batch * 100
    
    return loss, accuracy
