In [1]:
import numpy as np

class SoftmaxRegressionScratch:
    """
    Multinomial Logistic Regression (Softmax Regression) from scratch using gradient descent.
    Supports multi-class classification.
    """

    def __init__(self, learning_rate=0.1, iterations=1000):
        """
        Parameters:
        -----------
        learning_rate : float
            Gradient descent step size.
        iterations : int
            Number of iterations for gradient descent.
        """
        self.learning_rate = learning_rate
        self.iterations = iterations
        self.weights = None  # Shape: (n_features, n_classes)
        self.bias = None     # Shape: (n_classes,)

    def _softmax(self, z):
        """
        Compute softmax values for each set of scores in z.
        Parameters:
        -----------
        z : numpy.ndarray
            Input array of shape (n_samples, n_classes).
        Returns:
        --------
        numpy.ndarray
            Softmax probabilities of shape (n_samples, n_classes).
        """
        exp_z = np.exp(z - np.max(z, axis=1, keepdims=True))  # For numerical stability
        return exp_z / np.sum(exp_z, axis=1, keepdims=True)

    def _one_hot(self, y, n_classes):
        """
        Convert labels to one-hot encoding.
        """
        one_hot = np.zeros((y.shape[0], n_classes))
        one_hot[np.arange(y.shape[0]), y] = 1
        return one_hot

    def fit(self, X, y):
        """
        Fit the softmax regression model using gradient descent.

        Parameters:
        -----------
        X : numpy.ndarray
            Training data of shape (n_samples, n_features).
        y : numpy.ndarray
            Target values of shape (n_samples,), integer labels (0...n_classes-1).
        """
        n_samples, n_features = X.shape
        n_classes = np.max(y) + 1
        y_one_hot = self._one_hot(y, n_classes)

        # Initialize weights and bias
        self.weights = np.zeros((n_features, n_classes))
        self.bias = np.zeros(n_classes)

        for _ in range(self.iterations):
            # Linear logits
            logits = X @ self.weights + self.bias  # Shape: (n_samples, n_classes)
            probs = self._softmax(logits)          # Shape: (n_samples, n_classes)

            # Compute gradients
            error = probs - y_one_hot              # Shape: (n_samples, n_classes)
            dw = (X.T @ error) / n_samples         # Shape: (n_features, n_classes)
            db = np.mean(error, axis=0)            # Shape: (n_classes,)

            # Update parameters
            self.weights -= self.learning_rate * dw
            self.bias -= self.learning_rate * db

    def predict_proba(self, X):
        """
        Predict class probabilities for samples in X.

        Parameters:
        -----------
        X : numpy.ndarray
            Input data of shape (n_samples, n_features).

        Returns:
        --------
        numpy.ndarray
            Predicted class probabilities of shape (n_samples, n_classes).
        """
        logits = X @ self.weights + self.bias
        return self._softmax(logits)

    def predict(self, X):
        """
        Predict class labels for samples in X.

        Parameters:
        -----------
        X : numpy.ndarray
            Input data of shape (n_samples, n_features).

        Returns:
        --------
        numpy.ndarray
            Predicted class labels (integers) of shape (n_samples,).
        """
        proba = self.predict_proba(X)
        return np.argmax(proba, axis=1)

> ## Example usage:

In [2]:
# Simple synthetic dataset (3 classes, 2 features)
np.random.seed(42)
n_samples = 120
X_demo = np.vstack([
        np.random.randn(n_samples//3, 2) + np.array([2, 2]),
        np.random.randn(n_samples//3, 2) + np.array([-2, -2]),
        np.random.randn(n_samples//3, 2) + np.array([2, -2])
    ])
y_demo = np.array([0] * (n_samples//3) + [1] * (n_samples//3) + [2] * (n_samples//3))

# Train softmax regression
model = SoftmaxRegressionScratch(learning_rate=0.1, iterations=1000)
model.fit(X_demo, y_demo)

preds = model.predict(X_demo)
accuracy = np.mean(preds == y_demo)
print("Softmax Regression accuracy:", accuracy)

Softmax Regression accuracy: 0.95
