In [14]:
import numpy as np

# Ground truth labels (one-hot encoded)
y_true = np.array([
    [1, 0, 0, 0, 0],  # Sample 1: Class 1
    [0, 1, 0, 0, 0],  # Sample 2: Class 2
    [0, 0, 1, 0, 0],  # Sample 3: Class 3
    [0, 0, 0, 1, 0],  # Sample 4: Class 4
    [0, 0, 0, 0, 1]   # Sample 5: Class 5
])

# Model output logits
logits = np.array([
    [2.0, 1.0, 0.1, 2.0, 2.5],  # Sample 1
    [0.5, 2.5, 0.2, 2.5, 0.5],  # Sample 2
    [0.1, 0.2, 3.0, 1.0, 2.0],  # Sample 3
    [0.5, 2.0, 0.2, 2.0, 1.0],  # Sample 4
    [2.0, 0.2, 3.0, 1.0, 2.0],  # Sample 5
])


In [15]:
# Softmax function (stable implementation)
def softmax(x):
    exp_x = np.exp(x - np.max(x, axis=-1, keepdims=True))  # for numerical stability
    return exp_x / np.sum(exp_x, axis=-1, keepdims=True)


In [16]:
# Compute softmax probabilities for each sample
probs = softmax(logits)
print("Softmax Probabilities:\n", probs)


Softmax Probabilities:
 [[0.24002865 0.08830161 0.03590075 0.24002865 0.39574034]
 [0.05708111 0.42177553 0.04228673 0.42177553 0.05708111]
 [0.03398492 0.03755915 0.61764691 0.08358942 0.2272196 ]
 [0.08095254 0.36280409 0.05997111 0.36280409 0.13346817]
 [0.19042323 0.03147675 0.517624   0.07005279 0.19042323]]


In [17]:
# Compute log of the softmax probabilities
log_probs = np.log(probs)
print("\nLog of Softmax Probabilities:\n", log_probs)



Log of Softmax Probabilities:
 [[-1.42699699 -2.42699699 -3.32699699 -1.42699699 -0.92699699]
 [-2.86328203 -0.86328203 -3.16328203 -0.86328203 -2.86328203]
 [-3.38183833 -3.28183833 -0.48183833 -2.48183833 -1.48183833]
 [-2.51389228 -1.01389228 -2.81389228 -1.01389228 -2.01389228]
 [-1.65850616 -3.45850616 -0.65850616 -2.65850616 -1.65850616]]


In [18]:
# Multiply y_true by log_probs (only target class log-prob survives)
y_true_log_probs = y_true * log_probs
print("\ny_true * log(probs):\n", y_true_log_probs)



y_true * log(probs):
 [[-1.42699699 -0.         -0.         -0.         -0.        ]
 [-0.         -0.86328203 -0.         -0.         -0.        ]
 [-0.         -0.         -0.48183833 -0.         -0.        ]
 [-0.         -0.         -0.         -1.01389228 -0.        ]
 [-0.         -0.         -0.         -0.         -1.65850616]]


In [22]:
# Compute CE loss for each sample
ce_loss = -np.sum(y_true_log_probs, axis=-1)
print("\nCE Loss per Sample:", ce_loss)



CE Loss per Sample: [1.42699699 0.86328203 0.48183833 1.01389228 1.65850616]


In [23]:
# Average CE loss for the dataset
avg_ce_loss = np.mean(ce_loss)
print("\nAverage CE Loss for the Dataset:", avg_ce_loss)



Average CE Loss for the Dataset: 1.0889031585882822


In [29]:
import numpy as np

# Ground truth labels (one-hot encoded)
y_true = np.array([
    [1, 0, 0],  # Sample 1: Class 1
    [0, 1, 0],  # Sample 2: Class 2
    [0, 0, 1],  # Sample 3: Class 3
    [1, 0, 0],  # Sample 4: Class 1
    [0, 1, 0],  # Sample 5: Class 2
])

# Model predictions (logits)
logits = np.array([
    [2.0, 1.0, 0.1],  # Sample 1
    [0.5, 2.5, 0.2],  # Sample 2
    [0.1, 0.2, 3.0],  # Sample 3
    [0.2, 0.1, 2.0],  # Sample 4
    [2.0, 0.1, 0.2],  # Sample 5
])


In [28]:
# Softmax function
def softmax(x):
    exp_x = np.exp(x - np.max(x, axis=-1, keepdims=True))  # Numerical stability
    return exp_x / np.sum(exp_x, axis=-1, keepdims=True)

# Compute softmax probabilities
probs = softmax(logits)
print("Softmax Probabilities:\n", probs)


Softmax Probabilities:
 [[0.24002865 0.08830161 0.03590075 0.24002865 0.39574034]
 [0.05708111 0.42177553 0.04228673 0.42177553 0.05708111]
 [0.03398492 0.03755915 0.61764691 0.08358942 0.2272196 ]
 [0.08095254 0.36280409 0.05997111 0.36280409 0.13346817]
 [0.19042323 0.03147675 0.517624   0.07005279 0.19042323]]


In [27]:
# Compute log(probs)
log_probs = np.log(probs)
print("\nLog of Softmax Probabilities:\n", log_probs)



Log of Softmax Probabilities:
 [[-1.42699699 -2.42699699 -3.32699699 -1.42699699 -0.92699699]
 [-2.86328203 -0.86328203 -3.16328203 -0.86328203 -2.86328203]
 [-3.38183833 -3.28183833 -0.48183833 -2.48183833 -1.48183833]
 [-2.51389228 -1.01389228 -2.81389228 -1.01389228 -2.01389228]
 [-1.65850616 -3.45850616 -0.65850616 -2.65850616 -1.65850616]]


In [26]:
# Compute y_true * log(probs)
y_true_log_probs = y_true * log_probs
print("\ny_true * log(probs):\n", y_true_log_probs)



y_true * log(probs):
 [[-1.42699699 -0.         -0.         -0.         -0.        ]
 [-0.         -0.86328203 -0.         -0.         -0.        ]
 [-0.         -0.         -0.48183833 -0.         -0.        ]
 [-0.         -0.         -0.         -1.01389228 -0.        ]
 [-0.         -0.         -0.         -0.         -1.65850616]]


In [25]:
# Compute CE loss for each sample
ce_loss = -np.sum(y_true_log_probs, axis=-1)
print("\nCE Loss per Sample:", ce_loss)



CE Loss per Sample: [1.42699699 0.86328203 0.48183833 1.01389228 1.65850616]


In [24]:
# Average CE loss for the dataset
avg_ce_loss = np.mean(ce_loss)
print("\nAverage CE Loss for the Dataset:", avg_ce_loss)



Average CE Loss for the Dataset: 1.0889031585882822
