02_softmax_regression_multiclass.ipynb: 1 layer (input → output)

In [1]:
import numpy as np

# ----------------------------
# 1) Synthetic Multiclass Dataset
# ----------------------------
def generate_multiclass_data(n_per_class=200, seed=0):
    """
    Three 2D Gaussian blobs for classes 0, 1, 2.
    Returns:
      X: shape (3*n_per_class, 2)
      Y: shape (3*n_per_class, 3) one‐hot
    """
    np.random.seed(seed)
    N = n_per_class
    cov = [[0.3, 0], [0, 0.3]]

    # Class 0: center at (-1, 0)
    x0 = np.random.multivariate_normal(mean=[-1, 0], cov=cov, size=N)
    y0 = np.zeros((N, 3)); y0[:, 0] = 1

    # Class 1: center at (1, 0)
    x1 = np.random.multivariate_normal(mean=[1, 0], cov=cov, size=N)
    y1 = np.zeros((N, 3)); y1[:, 1] = 1

    # Class 2: center at (0, 1.5)
    x2 = np.random.multivariate_normal(mean=[0, 1.5], cov=cov, size=N)
    y2 = np.zeros((N, 3)); y2[:, 2] = 1

    X = np.vstack([x0, x1, x2])  # (3N, 2)
    Y = np.vstack([y0, y1, y2])  # (3N, 3)

    perm = np.random.permutation(3 * N)
    return X[perm], Y[perm]

# ----------------------------
# 2) Model: Softmax Regression
# ----------------------------
class SoftmaxRegression:
    def __init__(self, in_dim, num_classes, lr=0.1):
        self.W = np.zeros((in_dim, num_classes))  # (2, 3)
        self.b = np.zeros((1, num_classes))       # (1, 3)
        self.lr = lr

    def softmax(self, z):
        """
        z: shape (batch, num_classes)
        returns: shape (batch, num_classes)
        """
        z_shift = z - np.max(z, axis=1, keepdims=True)
        exp_z = np.exp(z_shift)
        return exp_z / np.sum(exp_z, axis=1, keepdims=True)

    def forward(self, X):
        """
        X: shape (batch, 2)
        returns: shape (batch, 3), probability distribution
        """
        z = X.dot(self.W) + self.b  # (batch, 3)
        return self.softmax(z)

    def compute_loss_and_grad(self, X, Y_onehot):
        """
        Y_onehot: (batch, 3)
        Returns:
          loss: scalar (average CE)
          dW: shape (2,3)
          db: shape (1,3)
        """
        m = X.shape[0]
        P = self.forward(X)  # (batch,3)
        P_clipped = np.clip(P, 1e-8, 1 - 1e-8)
        # Cross‐entropy loss
        loss = -np.sum(Y_onehot * np.log(P_clipped)) / m

        # Gradient of softmax+CE: dZ = (P - Y)/m
        dZ = (P - Y_onehot) / m      # (batch,3)
        dW = X.T.dot(dZ)             # (2,3)
        db = np.sum(dZ, axis=0, keepdims=True)  # (1,3)
        return loss, dW, db

    def update_params(self, dW, db):
        self.W -= self.lr * dW
        self.b -= self.lr * db

    def predict(self, X):
        P = self.forward(X)           # (batch,3)
        return np.argmax(P, axis=1)   # (batch,)

# ----------------------------
# 3) Training Loop
# ----------------------------
if __name__ == "__main__":
    # Generate data
    X, Y = generate_multiclass_data(n_per_class=200, seed=0)
    # One-hot labels in Y already. Also prepare integer labels for accuracy.
    Y_int = np.argmax(Y, axis=1)

    # Split 80% train, 20% val
    split = int(0.8 * X.shape[0])
    X_train, Y_train = X[:split], Y[:split]
    X_val,   Y_val   = X[split:], Y[split:]
    Y_val_int = Y_int[split:]

    # Instantiate model
    model = SoftmaxRegression(in_dim=2, num_classes=3, lr=0.1)
    epochs = 200

    for epoch in range(1, epochs + 1):
        loss, dW, db = model.compute_loss_and_grad(X_train, Y_train)
        model.update_params(dW, db)

        if epoch % 50 == 0 or epoch == 1:
            # Compute train & val accuracy
            train_preds = model.predict(X_train)
            val_preds   = model.predict(X_val)
            train_acc = np.mean(train_preds == np.argmax(Y_train, axis=1))
            val_acc   = np.mean(val_preds   == Y_val_int)
            print(f"Epoch {epoch:3d} | Loss: {loss:.4f} | Train Acc: {train_acc:.4f} | Val Acc: {val_acc:.4f}")

    # Final evaluation
    train_acc = np.mean(model.predict(X_train) == np.argmax(Y_train, axis=1))
    val_acc   = np.mean(model.predict(X_val)   == Y_val_int)
    print(f"\nFinal Train Acc: {train_acc:.4f} | Final Val Acc: {val_acc:.4f}")

Epoch   1 | Loss: 1.0986 | Train Acc: 0.8917 | Val Acc: 0.8750
Epoch  50 | Loss: 0.4417 | Train Acc: 0.9167 | Val Acc: 0.9250
Epoch 100 | Loss: 0.3257 | Train Acc: 0.9313 | Val Acc: 0.9333
Epoch 150 | Loss: 0.2785 | Train Acc: 0.9292 | Val Acc: 0.9417
Epoch 200 | Loss: 0.2526 | Train Acc: 0.9250 | Val Acc: 0.9417

Final Train Acc: 0.9250 | Final Val Acc: 0.9417
