In [None]:
import numpy as np

class CategoricalCrossEntropyLoss:
    def calculate(self, y_pred, y_true):
        # Clip predictions to prevent division by zero
        y_pred_clipped = np.clip(y_pred, 1e-7, 1-1e-7)
        
        # Handle both categorical labels and one-hot encoded labels
        if len(y_true.shape) == 1:
            correct_confidences = y_pred_clipped[range(len(y_pred)), y_true]
        else:
            correct_confidences = np.sum(y_pred_clipped * y_true, axis=1)
        
        # Calculate negative log likelihoods and mean loss
        losses = -np.log(correct_confidences)
        return np.mean(losses)

# Dry Run Example 1: One-Hot Encoded Labels
# Input data
class_targets = np.array([
    [1, 0, 0],  # Class 0
    [0, 1, 0],  # Class 1
    [0, 1, 0]   # Class 1
])

softmax_outputs = np.array([
    [0.7, 0.1, 0.2],  # Predicted probabilities for sample 1
    [0.1, 0.5, 0.4],  # Predicted probabilities for sample 2
    [0.02, 0.9, 0.08] # Predicted probabilities for sample 3
])

# Step-by-step dry run for one-hot encoded labels
# 1. Clip predictions to avoid division by zero
# y_pred_clipped = np.clip(softmax_outputs, 1e-7, 1-1e-7)
# y_pred_clipped ≈ [[0.7, 0.1, 0.2],
#                   [0.1, 0.5, 0.4],
#                   [0.02, 0.9, 0.08]] (no significant change since values are within bounds)

# 2. Check y_true shape: class_targets.shape = (3, 3), so it's 2D (one-hot encoded)
# Use: correct_confidences = np.sum(y_pred_clipped * y_true, axis=1)

# 3. Calculate correct confidences (element-wise multiplication and sum along axis 1)
# Sample 1: [0.7, 0.1, 0.2] * [1, 0, 0] = [0.7, 0, 0] → sum = 0.7
# Sample 2: [0.1, 0.5, 0.4] * [0, 1, 0] = [0, 0.5, 0] → sum = 0.5
# Sample 3: [0.02, 0.9, 0.08] * [0, 1, 0] = [0, 0.9, 0] → sum = 0.9
# correct_confidences = [0.7, 0.5, 0.9]

# 4. Calculate negative log likelihoods
# losses = -np.log([0.7, 0.5, 0.9])
#        ≈ [-np.log(0.7), -np.log(0.5), -np.log(0.9)]
#        ≈ [0.3567, 0.6931, 0.1054]

# 5. Calculate mean loss
# mean_loss = np.mean([0.3567, 0.6931, 0.1054]) ≈ 0.3851

# Example 1 Output:
loss_obj = CategoricalCrossEntropyLoss()
loss = loss_obj.calculate(softmax_outputs, class_targets)
# loss ≈ 0.3851

# Dry Run Example 2: Categorical Labels
# Input data (same softmax_outputs, but class_targets as categorical indices)
class_targets_categorical = np.array([0, 1, 1])  # Class indices: 0, 1, 1

# Step-by-step dry run for categorical labels
# 1. Clip predictions (same as above)
# y_pred_clipped ≈ [[0.7, 0.1, 0.2],
#                   [0.1, 0.5, 0.4],
#                   [0.02, 0.9, 0.08]]

# 2. Check y_true shape: class_targets_categorical.shape = (3,), so it's 1D (categorical)
# Use: correct_confidences = y_pred_clipped[range(len(y_pred)), y_true]

# 3. Calculate correct confidences (select probabilities at true class indices)
# range(len(y_pred)) = [0, 1, 2]
# y_true = [0, 1, 1]
# correct_confidences = [y_pred_clipped[0,0], y_pred_clipped[1,1], y_pred_clipped[2,1]]
#                    = [0.7, 0.5, 0.9]

# 4. Calculate negative log likelihoods (same as above)
# losses = -np.log([0.7, 0.5, 0.9])
#        ≈ [0.3567, 0.6931, 0.1054]

# 5. Calculate mean loss
# mean_loss = np.mean([0.3567, 0.6931, 0.1054]) ≈ 0.3851

# Example 2 Output:
loss = loss_obj.calculate(softmax_outputs, class_targets_categorical)
# loss ≈ 0.3851