In [None]:
# To Train a model we tweak the weights and biases to improve the models accuracy.
# To do this we have to calculate how much error does a model have. 
# This algorithm that quantifies how wrong a model is reffered to as a loss function or
# a cost function. Ideally we want this loss function to be 0.
# The more confidence a model has the lesser is the loss function.

In [None]:
# Loss function used with neural netwoek that does the regression: squared error 
# (or mean squared error with neural networks)


# Categorical cross-entropy is explicitly used to compare a "ground-truth" probability (y-targets)
# to some prdicted distribution (y-hat or predictions), so it makes sense to use cross-entropy here.
# It is also one of the most commonly used loss functions with a softmax activation on the output
# layer.

In [None]:
# The formula for calculating the categorical cross-entropy of y and y-hat is
# L_i = - Summation(y_i,j * log(y-hat_i,j))[summation over j]

# Where L_i denotes the sample loss value, i is the i-th sample in the set, j is the label/output
# index, y denotes the target values, and y-hat denotes the predicted values.

# Once we start coding the solution, we’ll simplify it further to -log(correct_class_confidence), the
# formula for which is:
# L_i = -log(y-hat_i,k) ; where k is an index of "true" probability

# Where L_i denotes sample loss value, i is the i-th sample in a set, k is the index of the target label
# (ground-true label), y denotes the target values and y-hat denotes the predicted values.

In [2]:
# Example code to find the cross-entropy loss given the Groud reality result
import numpy as np

softmax_output = [0.7, 0.1, 0.2] #Example output

target_output = [1, 0, 0] #Groud truth

loss = -(np.log(softmax_output[0])*target_output[0] + 
         np.log(softmax_output[1])*target_output[1] +
         np.log(softmax_output[2])*target_output[2])
print(loss)

0.35667494393873245


In [2]:
# The Categorical Cross-Entropy Loss accounts for that and outputs a larger loss the lower
# the confidence is.
# We’ve printed different log values for a few example confidences. When the confidence level
# equals 1, meaning the model is 100% “sure” about its prediction, the loss value for this sample
# equals 0. The loss value raises with the confidence level, approaching 0.

In [None]:
# The Loss Calculation for Sparse Labels is
# L= -1/N(Summation i=1 to N(log(y_i,t_i)))
# t_i is the target value and  for sample i and the y_i,t_i is the predicted value or confidence.

In [3]:
import numpy as np

# probability of 3 samples softmax output batch
softmax_outputs_raw = np.array([[0.7, 0.1, 0.2],
                               [0.1, 0.5, 0.4],
                               [0.0, 0.9, 0.08]])
# Before directly using the Softmax Outputs we have to understand that it may contain 0 in certain
# places so -log(0) is infinte which will throw an error so to solve this we will be clipping the
# predicted range of values from 1e^-7 to (1 - 1e^-7)
# So this will insignificantly impact the output but will get rid of the log(0) problem.
softmax_outputs = np.array(np.clip(softmax_outputs_raw, 1e-7, 1 - 1e-7))
print(softmax_outputs)

# Here each row indicates probabilities of [dog, cat, human]
# Lets say we have 3 classifications to do so we have received 3 subarrays of data from softmax. 

#class targets are the actual training data and it shows that out of 3 samples 1st one is dog
# second is a cat and the 3rd sample is human.[0,1,2] (Sparese Labels)
# But the current training set represents [0,1,1] a dog and 2 cats no humans in the training
# sample yet.
class_targets = [0,1,1] #dog, cat1, cat2 

#It will store the confidences of the class target indices for each of the sample.
output = []
for targ_idx, distribution in zip(class_targets, softmax_outputs):
    output.append(distribution[targ_idx])

#it has the list of samples wrt the class targets.
print(output)

#After loss calculation
loss_list = -np.log(output)

print(loss_list)
#Calculating the Average loss
avg_loss = np.mean(loss_list)
print("Avg Loss = ", avg_loss)

[[7.e-01 1.e-01 2.e-01]
 [1.e-01 5.e-01 4.e-01]
 [1.e-07 9.e-01 8.e-02]]
[0.7, 0.5, 0.9]
[0.35667494 0.69314718 0.10536052]
Avg Loss =  0.38506088005216804


In [4]:
# Loss Calculation for One Hot Encoded Labels (i.e. each classification is done with only o and 1's)
# Like dog = [1,0,0], cat = [0,1,0] and human = [0,0,1]

In [5]:
# For One Hot Encoded Labels the Loss calculation formula is 
# L = -1/N(Summation i=1 to N (Summation k=1 to K(t_i,k*log(y_i,k))))
# t_i,k = target value for sample i and class k and y_i,k is the predicted value or confidence.

In [6]:
import numpy as np

softmax_outputs_raw = np.array([[0.7, 0.1, 0.2],
                            [0.1, 0.5, 0.4],
                            [0.0, 0.9, 0.08]])
# Before directly using the Softmax Outputs we have to understand that it may contain 0 in certain
# places so -log(0) is infinte which will throw an error so to solve this we will be clipping the
# predicted range of values from 1e^-7 to (1 - 1e^-7)
# So this will insignificantly impact the output but will get rid of the log(0) problem.

softmax_outputs = np.array(np.clip(softmax_outputs_raw, 1e-7, 1 - 1e-7))
print(softmax_outputs)

# [1,0,0] means 
class_targets = np.array([[1, 0, 0],
                          [0, 1, 0],
                          [0, 1, 0]])
outputs = []
sum = 0
N = len(softmax_outputs)
for i in range(0,len(softmax_outputs)):
    sum = 0
    for k in range(0,len(class_targets[0])):
        sum = sum + class_targets[i][k]*np.log(softmax_outputs[i][k])
    outputs.append(sum)

avg_loss = -1/N * np.sum(outputs)
print("Avg Loss = ",avg_loss)

[[7.e-01 1.e-01 2.e-01]
 [1.e-01 5.e-01 4.e-01]
 [1.e-07 9.e-01 8.e-02]]
Avg Loss =  0.38506088005216804


In [7]:
# Categorical Cross-Entropy Loss Class

In [8]:
import numpy as np
class Loss_CategoricalCrossEntropy():

    #Forward Pass

    def Forward(self, y_pred, y_true):
        # No of samples in a batch
        samples = len(y_pred)

        # clip the data to avoid log(0) error
        y_pred_clipped = np.clip(y_pred, 1e-7, 1 - 1e-7)

        avg_loss = 0.0
        # checking whether the class target/real values are sparse labels or one hot encoded
        if(len(y_true.shape) == 1):
            output = []
            for targ_idx, distribution in zip(y_true, y_pred_clipped):
                output.append(distribution[targ_idx])

            loss_list = -np.log(output)
            #Calculating the Average loss
            avg_loss = np.mean(loss_list)
            print("Avg Loss = ", avg_loss)
            
        elif(len(y_true.shape) == 2):
            outputs = []
            sum = 0
            for i in range(0,samples):
                sum = 0
                for k in range(0,len(y_true[0])):
                    sum = sum + y_true[i][k]*np.log(y_pred_clipped[i][k])
                outputs.append(sum)

            avg_loss = -1/N * np.sum(outputs)
            print("Avg Loss = ",avg_loss)
        
        return avg_loss

def test():
    softmax_outputs_raw = np.array([[0.7, 0.1, 0.2],
                            [0.1, 0.5, 0.4],
                            [0.0, 0.9, 0.08]])
    class_targets = np.array([[1, 0, 0],
                          [0, 1, 0],
                          [0, 1, 0]])
    loss_function = Loss_CategoricalCrossEntropy()
    loss = loss_function.Forward(softmax_outputs_raw,class_targets)
    print("Loss = ",loss)

test()

Avg Loss =  0.38506088005216804
Loss =  0.38506088005216804
