<a href="https://colab.research.google.com/github/SrijaniBasu/ConceptualPractice_ArtificialNeuralSystem/blob/main/SoftmaxClassifier_With_CrossEntropy.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Hereâ€™s a complete, self-contained softmax classifier in Python, with:

1.   Softmax function
2.   Cross-entropy loss
3.   Synthetic test data generation
4.   Training loop to test it end-to-end


In [None]:
import numpy as np


Helper functions: softmax & cross-entropy

In [None]:
def softmax(z):
    """
    Compute softmax for each row of input z.
    z: shape (n_samples, n_classes)
    returns: shape (n_samples, n_classes)
    """
    # For numerical stability: subtract max per row
    z_shift = z - np.max(z, axis=1, keepdims=True)
    exp_z = np.exp(z_shift)
    probs = exp_z / np.sum(exp_z, axis=1, keepdims=True)
    return probs


def cross_entropy_loss(y_true, y_pred):
    """
    Compute average cross-entropy loss.

    y_true: one-hot labels, shape (n_samples, n_classes)
    y_pred: predicted probabilities, shape (n_samples, n_classes)
    """
    # Clip to avoid log(0)
    eps = 1e-15
    y_pred_clipped = np.clip(y_pred, eps, 1 - eps)

    # Cross entropy per sample: -sum(y_true * log(y_pred))
    ce = -np.sum(y_true * np.log(y_pred_clipped), axis=1)
    return np.mean(ce)


Generate synthetic test data

In [None]:
def generate_data(n_per_class=100, random_state=0):
    np.random.seed(random_state)

    # Class 0: centered at (0, 0)
    x0 = np.random.randn(n_per_class, 2) + np.array([0, 0])
    y0 = np.zeros(n_per_class, dtype=int)

    # Class 1: centered at (3, 3)
    x1 = np.random.randn(n_per_class, 2) + np.array([3, 3])
    y1 = np.ones(n_per_class, dtype=int)

    # Class 2: centered at (0, 4)
    x2 = np.random.randn(n_per_class, 2) + np.array([0, 4])
    y2 = np.full(n_per_class, 2, dtype=int)

    X = np.vstack([x0, x1, x2])        # shape: (3*n_per_class, 2)
    y = np.concatenate([y0, y1, y2])   # shape: (3*n_per_class,)

    return X, y


One-hot encoder for labels:

In [None]:
def to_one_hot(y, num_classes):
    """
    y: integer labels (n_samples,)
    returns: one-hot matrix (n_samples, num_classes)
    """
    n_samples = y.shape[0]
    one_hot = np.zeros((n_samples, num_classes))
    one_hot[np.arange(n_samples), y] = 1
    return one_hot


#**SOFTMAX CLASSIFIER MODEL**

In [None]:
class SoftmaxClassifier:
    def __init__(self, n_features, n_classes, learning_rate=0.1):
        self.n_features = n_features
        self.n_classes = n_classes
        self.lr = learning_rate

        # Initialize weights and bias
        self.W = 0.01 * np.random.randn(n_features, n_classes)
        self.b = np.zeros((1, n_classes))

    def forward(self, X):
        """
        X: input data of shape (n_samples, n_features)
        returns: probabilities (n_samples, n_classes)
        """
        logits = X @ self.W + self.b  # shape: (n_samples, n_classes)
        probs = softmax(logits)
        return probs

    def compute_gradients(self, X, y_true_one_hot):
        """
        Compute gradients of loss w.r.t W and b.

        X: shape (n_samples, n_features)
        y_true_one_hot: shape (n_samples, n_classes)
        """
        n_samples = X.shape[0]
        y_pred = self.forward(X)  # (n_samples, n_classes)

        # Gradient of loss w.r.t logits for softmax + cross-entropy:
        # dL/dz = (y_pred - y_true) / n_samples
        dlogits = (y_pred - y_true_one_hot) / n_samples  # (n_samples, n_classes)

        # Gradients for W and b
        dW = X.T @ dlogits                      # (n_features, n_classes)
        db = np.sum(dlogits, axis=0, keepdims=True)  # (1, n_classes)

        return dW, db

    def update_params(self, dW, db):
        self.W -= self.lr * dW
        self.b -= self.lr * db

    def predict(self, X):
        """
        Returns predicted class indices.
        """
        probs = self.forward(X)
        return np.argmax(probs, axis=1)

    def accuracy(self, X, y_true):
        y_pred_labels = self.predict(X)
        return np.mean(y_pred_labels == y_true)


Train and Test The Classifier

In [None]:
# 1. Generate data
X, y = generate_data(n_per_class=200, random_state=42)
n_samples, n_features = X.shape
n_classes = len(np.unique(y))

# 2. Convert labels to one-hot for loss
y_one_hot = to_one_hot(y, n_classes)

# 3. Initialize model
model = SoftmaxClassifier(n_features=n_features, n_classes=n_classes, learning_rate=0.5)

# 4. Training loop
n_epochs = 1000

for epoch in range(1, n_epochs + 1):
    # Forward pass: probabilities
    probs = model.forward(X)

    # Compute loss
    loss = cross_entropy_loss(y_one_hot, probs)

    # Compute gradients
    dW, db = model.compute_gradients(X, y_one_hot)

    # Update parameters
    model.update_params(dW, db)

    # Occasionally print progress
    if epoch % 100 == 0 or epoch == 1:
        acc = model.accuracy(X, y)
        print(f"Epoch {epoch:4d} | Loss: {loss:.4f} | Accuracy: {acc:.4f}")

# 5. Final evaluation
final_acc = model.accuracy(X, y)
print("\nFinal training accuracy:", final_acc)


Epoch    1 | Loss: 1.1060 | Accuracy: 0.7350
Epoch  100 | Loss: 0.2124 | Accuracy: 0.9317
Epoch  200 | Loss: 0.1948 | Accuracy: 0.9283
Epoch  300 | Loss: 0.1889 | Accuracy: 0.9250
Epoch  400 | Loss: 0.1860 | Accuracy: 0.9217
Epoch  500 | Loss: 0.1844 | Accuracy: 0.9217
Epoch  600 | Loss: 0.1834 | Accuracy: 0.9233
Epoch  700 | Loss: 0.1827 | Accuracy: 0.9267
Epoch  800 | Loss: 0.1822 | Accuracy: 0.9250
Epoch  900 | Loss: 0.1819 | Accuracy: 0.9267
Epoch 1000 | Loss: 0.1817 | Accuracy: 0.9267

Final training accuracy: 0.9266666666666666
