In [4]:
# Implementing Cross-Entropy Loss from Scratch
import numpy as np
import torch

# Create a tensor of logits (raw scores before softmax)
logits = torch.tensor([ [2.0, 1.0, 0.1],[0.5, 2.5, 0.3]])
print(f'logits:{logits}')

# Interpretation
# These are the raw scores (logits) from the model for two samples and three classes.
# Each row corresponds to one sample, and each value in the row is the unnormalized score for each class.
# For example, for the first sample:
# Class 0 has a logit of 2.0,
# Class 1 has a logit of 1.0,
# Class 2 has a logit of 0.1.
# For the second sample, Class 1 has the highest logit (2.5).

logits:tensor([[2.0000, 1.0000, 0.1000],
        [0.5000, 2.5000, 0.3000]])


In [9]:
# Implementing Cross-Entropy Loss from Scratch

# apply the softmax function to convert these logits into probabilities
softmax = torch.nn.functional.softmax(logits, dim=1)

print("\nSoftmax Probabilities:")
print(softmax)

# Interpretation
# The softmax function converts logits into probabilities that sum to 1 across each row (for each sample).
# For the first sample:
# Class 0 has a probability of 0.6590,
# Class 1 has a probability of 0.2424,
# Class 2 has a probability of 0.0986.

# For the second sample:
# Class 1 has the highest probability of 0.8025


Softmax Probabilities:
tensor([[0.6590, 0.2424, 0.0986],
        [0.1086, 0.8025, 0.0889]])


In [6]:
# Implementing Cross-Entropy Loss from Scratch
# take the logarithm of the softmax probabilities
log_probs = torch.log(softmax)
print(log_probs)

# Interpretation
# The log function is applied to the softmax probabilities. 
# The log of a number between 0 and 1 is always negative.
# These log values tell us how confident the model is in its predictions. 
# Larger negative values (e.g., -2.3180) correspond to low probabilities.
# Values closer to 0 (e.g., -0.2412) correspond to higher probabilities.

tensor([[-0.4170, -1.4170, -2.3170],
        [-2.2200, -0.2200, -2.4200]])


In [7]:
# Implementing Cross-Entropy Loss from Scratch

# assume that the true class labels for the two samples are 0 and 1, respectively
# We will compute the negative log likelihood for these true labels.

# Apply the Negative Logarithm (Compute the Loss)
true_labels = torch.tensor([0,1])

# Extract the log probabilities corresponding to the true labels
negative_log_likelihood = -log_probs[range(len(true_labels)), true_labels]

print("\nNegative Log Likelihood for True Labels:")
print(negative_log_likelihood)


# Interpretation



Negative Log Likelihood for True Labels:
tensor([0.4170, 0.2200])


In [8]:
# Implementing Cross-Entropy Loss from Scratch
# compute the average cross-entropy loss by averaging the negative log likelihood values

# compute the average cross-entropy loss (mean of the negative log likelihoods)
cross_entropy_loss = negative_log_likelihood.mean()

print("\nCross-Entropy Loss:")
print(cross_entropy_loss.item())

# Interpretation
# The cross-entropy loss is the mean of the negative log likelihoods for both samples: 
# (0.4170 + 0.2200) / 2 = 0.3185

# This value represents the overall loss for the batch, and it is the value that the model tries to minimize during training.
# A lower cross-entropy loss means the model is making better predictions (i.e., assigning higher probabilities to the correct classes).

# the loss of 0.3291 suggests that the model is doing reasonably well, but there is still room for improvement.


Cross-Entropy Loss:
0.3185397684574127


In [10]:
# Implementing Cross-Entropy Loss from Scratch
import torch

# Step 1: Create a tensor of logits (raw scores before softmax)
logits = torch.tensor([[2.0, 1.0, 0.1],
                       [0.5, 2.5, 0.3]])  # 2 samples, 3 classes
print("Logits:")
print(logits)

# Step 2: Apply softmax to logits to get probabilities
softmax = torch.nn.functional.softmax(logits, dim=1)
print("\nSoftmax Probabilities:")
print(softmax)

# Step 3: Take the log of the softmax probabilities
log_probs = torch.log(softmax)
print("\nLog of Softmax Probabilities:")
print(log_probs)

# Step 4: True labels (0 and 1)
true_labels = torch.tensor([0, 1])

# Extract the log probabilities corresponding to the true labels
negative_log_likelihood = -log_probs[range(len(true_labels)), true_labels]
print("\nNegative Log Likelihood for True Labels:")
print(negative_log_likelihood)

# Step 5: Compute the cross-entropy loss (mean of negative log likelihoods)
cross_entropy_loss = negative_log_likelihood.mean()
print("\nCross-Entropy Loss:")
print(cross_entropy_loss.item())

Logits:
tensor([[2.0000, 1.0000, 0.1000],
        [0.5000, 2.5000, 0.3000]])

Softmax Probabilities:
tensor([[0.6590, 0.2424, 0.0986],
        [0.1086, 0.8025, 0.0889]])

Log of Softmax Probabilities:
tensor([[-0.4170, -1.4170, -2.3170],
        [-2.2200, -0.2200, -2.4200]])

Negative Log Likelihood for True Labels:
tensor([0.4170, 0.2200])

Cross-Entropy Loss:
0.3185397684574127
