# Categorical Cross Entropy Loss

## Case 1
#### Class Target are just numbers [0,1,2]

## CROSS ENTROPY LOSS BUILDING BLOCKS IN PYTHON

In [1]:
import numpy as np

In [2]:
softmax_outputs = np.array([[0.7,0.1,0.2],[0.1,0.5,0.4],[0.02,0.9,0.08]])
class_targets = [0,1,1]
print(softmax_outputs[[0,1,2],class_targets])

[0.7 0.5 0.9]


In [6]:
print(-np.log(softmax_outputs[
    range(len(softmax_outputs))
]))

neg_log = -np.log(softmax_outputs[

    range(len(softmax_outputs)), class_targets
])
average_loss = np.mean(neg_log)
print(average_loss)

[[0.35667494 2.30258509 1.60943791]
 [2.30258509 0.69314718 0.91629073]
 [3.91202301 0.10536052 2.52572864]]
0.38506088005216804


## Case 2

#### Targets is one-hot encoded
| Class  | R | G | B |
|--------|---|---|---|
| Target | 1 | 0 | 0 |
|        | 0 | 1 | 0 |
|        | 0 | 0 | 1 |




### Step 1: Element-wise Multiplication

| Class  | R   | G   | B   |
|--------|-----|-----|-----|
| Target | 1   | 0   | 0   |
|        | 0   | 1   | 0   |
|        | 0   | 0   | 1   |

**Multiplied by:**

| Prediction | R    | G    | B    |
|------------|------|------|------|
|            | 0.7  | 0.1  | 0.2  |
|            | 0.1  | 0.5  | 0.4  |
|            | 0.02 | 0.9  | 0.08 |

**Results of element-wise multiplication:**

| Result     | R    | G    | B    |
|------------|------|------|------|
|            | 0.7  | 0.0  | 0.0  |
|            | 0.0  | 0.5  | 0.0  |
|            | 0.0  | 0.0  | 0.9  |

### Step 2: Summing across each row

The summation is performed row-wise after element-wise multiplication:

| Summation  |
|------------|
| 0.7        |
| 0.5        |
| 0.9        |

This final result is the output of summing each row of the element-wise multiplied matrix.

In [13]:
y_true_check = np.array([

    [1,0,0],
    [0,1,0],
    [0,1,0]
])

y_pred_clipped_check = np.array([

    [0.7,0.2,0.1],
    [0.1,0.5,0.4],
    [0.02,0.9,0.08]
])
# Element wise mulitplication
A = y_true_check*y_pred_clipped_check
B = np.sum(A,axis=1)

C = - np.log(B)


print(C)

print(np.mean(C))


[0.35667494 0.69314718 0.10536052]
0.38506088005216804


#### IMPLEMENTING THE LOG LOSS CLASS

In [2]:
# Common loss class

class Loss:
    # calculates the data and regularization losses
    # given model output and ground truth values
    def calculate(self,output,y):
        # Calculate sample losses
        sample_losses = self.forward(output,y) 
        # Calcuate mean loss
        data_loss = np.mean(sample_losses)
        # Return loss
        return data_loss

In [3]:
# Cross-entropy loss

class Loss_CategoricalCrossentropy(Loss):
    # Forward pass
    def forward(self,y_pred,y_true):
        # Number of samples in a batch
        samples = len(y_pred)
        # Clip data to prevent division by 0
        # Clip both sides to not drag mean towards any value
        y_pred_clipped = np.clip(y_pred,1e-7, 1 - 1e-7)
        # Probabilities for target values -
        # only if categorical labels
        if len(y_true.shape) == 1:
            correct_confidences = y_pred_clipped[
                range(samples),
                y_true
            ]

        # Mask values - only for one-hot encoded labels

        elif len(y_true.shape) == 2:
            correct_confidences = np.sum(
                y_pred_clipped*y_true,
                axis=1
            )
        # Losses
        negative_log_likelihoods = -np.log(correct_confidences)
        return negative_log_likelihoods
    


    # Backward pass

    def backward(self,dvalues,y_true):
        # Number of samples

        samples = len(dvalues)
        # Number of labels in every sample
        # we'll use the first sample to count them

        labels = len(dvalues[0])
        # If labels are sparse, turn them into one-hot vector

        if len(y_true.shape) == 1:
            y_true = np.eye(labels)[y_true]
        
        # Calculate gradient

        self.dinputs = -y_true/ dvalues 

        # Normalize gradient 

        self.dinputs = self.dinputs / samples

In [18]:
softmax_outputs = np.array([[0.7,0.1,0.2],[0.1,0.5,0.4],[0.02,0.9,0.08]])

class_targets = np.array([[1,0,0],[0,1,0],[0,1,0]])

loss_function = Loss_CategoricalCrossentropy()
loss = loss_function.calculate(softmax_outputs,class_targets)
print(loss)

0.38506088005216804
