In [2]:
import numpy as np

# Sample dataset (6 samples, 2 features)
X = np.array([
    [150, 7],   # Class 0
    [170, 6],   # Class 0
    [120, 8],   # Class 1
    [110, 9],   # Class 1
    [180, 5],   # Class 2
    [200, 4],   # Class 2
])

y = np.array([0, 0, 1, 1, 2, 2])  # Class labels: 0, 1, 2
n_samples, n_features = X.shape
n_classes = len(np.unique(y))

# One-hot encode the labels
def one_hot(y, num_classes):
    return np.eye(num_classes)[y]

Y = one_hot(y, n_classes)  # Shape: (6, 3)

# Initialize weights and bias
W = np.zeros((n_classes, n_features))  # Shape: (3, 2)
b = np.zeros((n_classes,))            # Shape: (3,)

# Softmax function
def softmax(z):
    z -= np.max(z, axis=1, keepdims=True)  # For numerical stability
    exp_z = np.exp(z)
    return exp_z / np.sum(exp_z, axis=1, keepdims=True)

# Cross-entropy loss
def cross_entropy(y_true, y_pred):
    return -np.mean(np.sum(y_true * np.log(y_pred + 1e-9), axis=1))

# Training loop
def train(X, Y, W, b, lr=0.0005, epochs=1):
    losses = []
    for epoch in range(epochs):
        # Linear logits
        Z = X @ W.T + b  # Shape: (n_samples, n_classes)

        # Softmax output
        y_pred = softmax(Z)

        # Loss
        loss = cross_entropy(Y, y_pred)
        losses.append(loss)

        # Gradients
        error = y_pred - Y  # Shape: (n_samples, n_classes)
        dW = (error.T @ X) / n_samples
        db = np.mean(error, axis=0)

        # Update weights
        W -= lr * dW
        b -= lr * db

        if epoch % 50 == 0 or epoch == epochs - 1:
            print(f"Epoch {epoch}: Loss = {loss:.4f}")

    return W, b, losses

# Train the model
W_trained, b_trained, losses = train(X, Y, W, b)

# Predict function
def predict(X, W, b):
    logits = X @ W.T + b
    probs = softmax(logits)
    return np.argmax(probs, axis=1), probs

# Test prediction
pred_labels, pred_probs = predict(X, W_trained, b_trained)

print("\nTrue Labels:     ", y)
print("Predicted Labels:", pred_labels)
print("Predicted Probs:\n", np.round(pred_probs, 3))


Epoch 0: Loss = 1.0986

True Labels:      [0 0 1 1 2 2]
Predicted Labels: [2 2 2 2 2 2]
Predicted Probs:
 [[0.291 0.095 0.614]
 [0.277 0.077 0.646]
 [0.31  0.126 0.564]
 [0.316 0.139 0.546]
 [0.269 0.07  0.661]
 [0.254 0.057 0.689]]
