<a href="https://colab.research.google.com/github/BoboSamson/BoboSamson/blob/main/Softmax_bacpropagation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
#Soxtmax implementation(forward pass)

import numpy as np

def softmax(X_in):
    exp_values = np.exp(X_in - np.max(X_in))  # Stability trick to prevent overflow
    return exp_values / np.sum(exp_values)

# Example input vector
X_in = np.array([2.0, 1.0, 0.1])
X_out = softmax(X_in)

print("Softmax Output:", X_out)


Softmax Output: [0.65900114 0.24243297 0.09856589]


In [2]:
#Compute cross entropy.

def cross_entropy_loss(y_true, X_out):
    return -np.sum(y_true * np.log(X_out))

# Example: True class is index 0
y_true = np.array([1, 0, 0])  # One-hot vector
loss = cross_entropy_loss(y_true, X_out)

print("Loss:", loss)


Loss: 0.4170300162778335


In [3]:
# Compute gradient(Back propagation)
def softmax_backward(X_out, y_true):
    return X_out - y_true  # Simplified gradient

grad_X_in = softmax_backward(X_out, y_true)
print("Gradient w.r.t Input:", grad_X_in)


Gradient w.r.t Input: [-0.34099886  0.24243297  0.09856589]


In [4]:
#Pytorch implementation

import torch
import torch.nn as nn
import torch.optim as optim

# ----- STEP 1: Initialize data as tensors -----
X_in = torch.tensor([[2.0, 1.0, 0.1]], dtype=torch.float32)  # Input (1 sample, 3 features)
y_true = torch.tensor([0], dtype=torch.long)  # Class index (not one-hot)

num_features = X_in.shape[1]
num_classes = 3

# ----- STEP 2: Define model -----
class SoftmaxRegression(nn.Module):
    def __init__(self, num_features, num_classes):
        super(SoftmaxRegression, self).__init__()
        self.linear = nn.Linear(num_features, num_classes)  # Linear layer

    def forward(self, x):
        return self.linear(x)  # No softmax here since CrossEntropyLoss applies it

# Initialize model
model = SoftmaxRegression(num_features, num_classes)

# ----- STEP 3: Define loss function and optimizer -----
criterion = nn.CrossEntropyLoss()  # Automatically applies softmax + cross-entropy
optimizer = optim.SGD(model.parameters(), lr=0.1)

# ----- STEP 4: Training loop -----
epochs = 100
for epoch in range(epochs):
    # Forward pass
    logits = model(X_in)  # Compute raw scores (logits)

    loss = criterion(logits, y_true)  # Compute loss

    # Backward pass
    optimizer.zero_grad()  # Reset gradients
    loss.backward()  # Compute gradients
    optimizer.step()  # Update weights

    if epoch % 10 == 0:
        print(f"Epoch {epoch}, Loss: {loss.item():.4f}")

# ----- STEP 5: Final prediction -----
logits_final = model(X_in)
X_out_final = torch.softmax(logits_final, dim=1)  # Convert logits to probabilities

print("\nFinal Softmax Probabilities:", X_out_final.detach().numpy())
print("Predicted Class:", torch.argmax(X_out_final).item())


Epoch 0, Loss: 1.9270
Epoch 10, Loss: 0.1381
Epoch 20, Loss: 0.0630
Epoch 30, Loss: 0.0405
Epoch 40, Loss: 0.0298
Epoch 50, Loss: 0.0235
Epoch 60, Loss: 0.0194
Epoch 70, Loss: 0.0166
Epoch 80, Loss: 0.0144
Epoch 90, Loss: 0.0128

Final Softmax Probabilities: [[0.9886149  0.00519783 0.00618721]]
Predicted Class: 0


In [5]:
#Set device
# Set device type
device = "cuda" if torch.cuda.is_available() else "cpu"
device
seed =1234