# LECTURE 04: Perceptron and Generalized Linear Model

### Boilerplate

In [132]:
import torch # type: ignore
import numpy as np
import numpy.typing as npt
import matplotlib.pyplot as plt # type: ignore
from typing import Callable

from sklearn.datasets import make_classification

### GML with hypothesis as input

In [133]:

def hypothesisMatrix(X: np.ndarray,
                     theta: np.ndarray,
                     hypothesis: Callable[[np.ndarray | float], np.ndarray | float]) -> np.ndarray:

    """Computes hypothesis values for each sample in the input matrix.

    Applies the given hypothesis function to the linear combination (dot product)
    of input features and parameters for each sample in the dataset.

    Args:
        X: Input feature matrix of shape (m_samples, n_features) where each row
           represents one sample/observation.
        theta: Parameter vector of shape (n_features,) containing the weights
               for each feature.
        hypothesis: Function that maps the linear predictor (η = θ.T dot X.T) to the
                   hypothesis/prediction space. Should accept either array or
                   float input and return corresponding output.

    Returns:
        numpy.ndarray: Array of hypothesis values for each sample, with shape
                      (n_samples,). The i-th element corresponds to the
                      hypothesis value for the i-th sample in X.

    Example:
        >>> X = np.array([[1, 2], [1, 3]])
        >>> theta = np.array([0.5, 1.0])
        >>> sigmoid = lambda eta: 1/(1+np.exp(-eta))
        >>> hypothesisMatrix(X, theta, sigmoid)
        array([0.88079708, 0.95257413])  # sigmoid(2.5), sigmoid(3.5)
    """

    etaArr =  theta.T @ X.T
    return np.array([hypothesis(eta) for eta in etaArr])



def updateParameterMatrix(Y: np.ndarray,
                          X: np.ndarray,
                          theta: np.ndarray,
                          hypothesisMatrixFunction: Callable[[np.ndarray, np.ndarray], np.ndarray | float],
                          learningRate: float) -> np.ndarray:
    
    """ Compute theta prime using GLM update rule

    Update parameter matrix using GLM update rule with the given hypothesis function

    Args:
        Y: Output vector where each element is output of one sample
        X: Input matrix of shape (m_sample, n_features) where each row
           represents the input of one sample.
        theta: parameter vector containing weight of each feature
        hypothesis: function that maps parameter matrix θ and input matrix X to a
                    hypothesis output
    
    Returns:
        numpy.ndarray: Array of updated parameter vector theta(θ') of size (n_features,)
    """
    m = Y.size
    prediction = hypothesisMatrix(X= X, theta= theta, hypothesis= hypothesisMatrixFunction)
    error = prediction - Y
    gradient = X.T @ error.T

    return theta - learningRate / m * gradient




### Hypothesis functions

In [134]:
def hypothesisMatrixSoftmax(eta: np.ndarray) -> np.ndarray:

    """ Computes hypothesis matrix for softmax function with given eta (η = θ.T dot X.T)
    
    Args:
        eta: η = θ.T dot X.T, input for hypothesis function, shape (k_classes, m_samples)
    
    Returns:
        numpy.ndarray: Softmax hypothesis matrix, shape (k_classes, m_samples)
    """

    etaExp              = np.exp(eta)

    # Denominator: sum over classes (axis=1), shape (m,)
    denominator = np.sum(etaExp, axis=0, keepdims=True)  # shape: (1, m)

    # Element-wise division, broadcasted over each row
    hypothesis = etaExp / denominator  # shape: (m, k)
    
    return hypothesis

In [136]:
def test_glm_softmax_classification():
    # Set random seed for reproducibility
    np.random.seed(42)
    
    # Generate synthetic data with 3 classes and some overlap
    n_samples = 500
    n_features = 2
    n_classes = 3
    
    # Generate data with some overlap between classes
    X, y = make_classification(n_samples=n_samples, 
                              n_features=n_features,
                              n_informative=2,
                              n_redundant=0,
                              n_classes=n_classes,
                              n_clusters_per_class=1,
                              class_sep=1.0,
                              random_state=42)
    
    # Add bias term to X
    X_with_bias = np.c_[np.ones((n_samples, 1)), X]  # shape (500, 3)
    
    # Initialize parameters (theta) - shape (n_classes, n_features + 1)
    theta = np.random.randn(n_classes, n_features + 1) * 0.01  # shape (3, 3)
    
    # Learning parameters
    learning_rate = 0.1
    n_iterations = 1000
    
    # Create one-hot encoded y matrix - shape (n_samples, n_classes)
    y_onehot = np.eye(n_classes)[y]  # shape (500, 3)
    
    # Training loop
    for iteration in range(n_iterations):
        # Compute logits (scores)
        logits = X_with_bias @ theta.T  # shape (500, 3)
        
        # Compute softmax probabilities
        exp_logits = np.exp(logits - np.max(logits, axis=1, keepdims=True))
        probs = exp_logits / np.sum(exp_logits, axis=1, keepdims=True)
        
        # Compute loss (cross-entropy)
        loss = -np.mean(np.sum(y_onehot * np.log(probs + 1e-15), axis=1))
        
        if iteration % 100 == 0:
            print(f"Iteration {iteration}, Loss: {loss:.4f}")
        
        # Compute gradient
        error = probs - y_onehot  # shape (500, 3)
        gradient = error.T @ X_with_bias  # shape (3, 3)
        
        # Update parameters

test_glm_softmax_classification()

Iteration 0, Loss: 1.1039
Iteration 100, Loss: 1.1039
Iteration 200, Loss: 1.1039
Iteration 300, Loss: 1.1039
Iteration 400, Loss: 1.1039
Iteration 500, Loss: 1.1039
Iteration 600, Loss: 1.1039
Iteration 700, Loss: 1.1039
Iteration 800, Loss: 1.1039
Iteration 900, Loss: 1.1039
