# LECTURE 04: Perceptron and Generalized Linear Model

### Boilerplate

In [157]:
import torch # type: ignore
import numpy as np
import numpy.typing as npt
import matplotlib.pyplot as plt # type: ignore
from typing import Callable

from sklearn.datasets import make_classification

### GML with hypothesis as input

In [None]:

def updateParameterMatrix(Y: np.ndarray,
                          X: np.ndarray,
                          theta: np.ndarray,
                          hypothesisMatrixFunction: Callable[[np.ndarray, np.ndarray], np.ndarray | float],
                          learningRate: float) -> np.ndarray:
    
    """ Compute theta prime using GLM update rule

    Update parameter matrix using GLM update rule with the given hypothesis function

    Args:
        Y: Output vector where each element is output of one sample
        X: Input matrix of shape (m_sample, n_features) where each row
           represents the input of one sample.
        theta: parameter vector containing weight of each feature
        hypothesis: function that maps parameter matrix θ and input matrix X to a
                    hypothesis output
    
    Returns:
        numpy.ndarray: Array of updated parameter vector theta(θ') of size (n_features,)
    """
    m = Y.shape[0]
    eta = theta.T @ X.T
    prediction = hypothesisMatrixFunction(eta)
    error = prediction - Y
    gradient = X.T @ error.T

    return theta - learningRate / m * gradient




### Softmax Regression

In [159]:
def hypothesisMatrixSoftmax(eta: np.ndarray) -> np.ndarray:

    """ Computes hypothesis matrix for softmax function with given eta (η = θ.T dot X.T)
    
    Args:
        eta: η = θ.T dot X.T, input for hypothesis function, shape (k_classes, m_samples)
    
    Returns:
        numpy.ndarray: Softmax hypothesis matrix, shape (k_classes, m_samples)
    """

    etaExp              = np.exp(eta)

    # Denominator: sum over classes (axis=1), shape (m,)
    denominator = np.sum(etaExp, axis=0, keepdims=True)  # shape: (1, m)

    # Element-wise division, broadcasted over each row
    hypothesis = etaExp / denominator  # shape: (m, k)
    
    return hypothesis

def softmaxRegression(X:np.ndarray, Y: np.ndarray, alpha: float, epochs: int) -> np.ndarray:
    n, k = X.shape[1], Y.shape[0]
    theta = np.zeros((n, k))
    
    for epoch in range(epochs):
        theta = updateParameterMatrix(Y, X, theta, hypothesisMatrixSoftmax, alpha)
    return theta

In [160]:
# Function to generate synthetic dataset
def generate_synthetic_data(n_samples: int = 100, n_features: int = 5, n_classes: int = 3):
    X, Y = make_classification(n_samples=n_samples, n_features=n_features, n_classes=n_classes, n_informative=3, random_state=42)
    Y = np.eye(n_classes)[Y]  # One-hot encoding of the labels
    return X, Y

# Test softmax regression
def test_softmax_regression():
    # Generate synthetic data
    X, Y = generate_synthetic_data(n_samples=200, n_features=5, n_classes=3)

    # Hyperparameters for testing
    alpha = 0.01  # Learning rate
    epochs = 1000  # Number of iterations

    # Run softmax regression
    theta = softmaxRegression(X, Y, alpha, epochs)

    # Print the output (theta values)
    print(f"Trained theta values: \n{theta}")

    # Test: Check if the theta matrix is of the expected shape (n_features, n_classes)
    assert theta.shape == (X.shape[1], Y.shape[1]), f"Expected shape {(X.shape[1], Y.shape[1])}, but got {theta.shape}"

    # Optionally: Check if the model has converged or if there's any improvement in the error
    eta = X @ theta  # compute eta = X * theta
    prediction = hypothesisMatrixSoftmax(eta.T)  # hypothesis matrix
    error = np.mean(np.argmax(prediction, axis=0) != np.argmax(Y, axis=1))  # Classification error
    print(f"Final classification error: {error}")

    # Assert that the error is below a threshold
    assert error < 0.1, f"Expected error to be below 0.1, but got {error}"

# Run the test
test_softmax_regression()

ValueError: operands could not be broadcast together with shapes (200,200) (3,200) 