# Calculate Loss

- Loss function is a function that calculate the difference between the predicted value and the actual value.
- aka calculate how much the model is wrong.
- Loss function == Cost function == Objective function

## Lost function for regression problem is Mean Squared Error (MSE)
$$
MSE = \frac{1}{n} \sum_{i=1}^{n} (y_i - \hat{y_i})^2
$$

where $y_i$ is the actual value and $\hat{y_i}$ is the predicted value.

## Lost function for classification problem is Cross Entropy Loss
$$
Cross Entropy Loss = - \sum_{i=1}^{n} y_i \log(\hat{y_i})
$$

where $y_i$ is the actual value and $\hat{y_i}$ is the predicted value.

- if we take a softmax output of [0.7, 0.1, 0.2] and targets of [1, 0, 0], we can apply the calculations as follows:

$$
- \sum_{i=1}^{n} y_i \log(\hat{y_i}) = -(1 \log(0.7) + 0 \log(0.1) + 0 \log(0.2)) = - \log(0.7) = 0.35667494393873245
$$

In [2]:
import math

softmax_output = [0.7, 0.1, 0.2]

target_output = [1, 0, 0]

loss = -(math.log(softmax_output[0]) * target_output[0] +
         math.log(softmax_output[1]) * target_output[1] +
         math.log(softmax_output[2]) * target_output[2])

print(loss)

0.35667494393873245


In [4]:
loss = -math.log(softmax_output[0])
loss

0.35667494393873245

In [5]:
import math
# When the confidence level equals 1, meaning the model is 100% “sure” about its prediction, 
# the loss value for this sample equals 0
print(math.log(1.))
print(math.log(0.95))
print(math.log(0.9))
print(math.log(0.8))
print('...')
print(math.log(0.2))
print(math.log(0.1))
print(math.log(0.05))
print(math.log(0.01))


0.0
-0.05129329438755058
-0.10536051565782628
-0.2231435513142097
...
-1.6094379124341003
-2.3025850929940455
-2.995732273553991
-4.605170185988091


$$
e^x = 5.2 \implies x = \log(5.2)

In [6]:
import numpy as np

b = 5.2
print(np.log(b))


1.6486586255873816


In [7]:
import math
print(math.e ** 1.6486586255873816)


5.199999999999999


In [8]:
# each row is a sample, each column is a neuron output
softmax_outputs = [[0.7, 0.1, 0.2],
                    [0.1, 0.5, 0.4],
                    [0.02, 0.9, 0.08]]

# each target is mapped to the sample inputs
class_targets = [0, 1, 1] # dog, cat, cat

In [10]:
for targex_id, distribution in zip(class_targets,softmax_outputs):
    print(distribution[targex_id])

0.7
0.5
0.9


In [12]:
softmax_outputs = np.array([[0.7, 0.1, 0.2],
                            [0.1, 0.5, 0.4],
                            [0.02, 0.9, 0.08]])
class_targets = [0, 1, 1]

print(softmax_outputs[[0,1,2], class_targets])

[0.7 0.5 0.9]


In [15]:
print(softmax_outputs[range(len(softmax_outputs)),class_targets])

[0.7 0.5 0.9]


In [22]:
neg_log =-np.log(softmax_outputs[range(len(softmax_outputs)),class_targets])
neg_log


array([0.35667494, 0.69314718, 0.10536052])

In [23]:
average_loss = np.mean(neg_log)
average_loss

0.38506088005216804

In [24]:
import numpy as np
softmax_outputs = np.array([[0.7, 0.1, 0.2],
                            [0.1, 0.5, 0.4],
                            [0.02, 0.9, 0.08]])

# One hot encoded
class_targets = np.array([[1, 0, 0],
                            [0, 1, 0],
                            [0, 1, 0]])

# Sparse
# class_targets = [0, 1, 1]

# Probabilities for target values -
# only if categorical labels
if len(class_targets.shape) == 1:
    correct_confidences = softmax_outputs[
        range(len(softmax_outputs)),
        class_targets
    ]

# Mask values - only for one-hot encoded labels
elif len(class_targets.shape) == 2:
    correct_confidences = np.sum(
        softmax_outputs * class_targets,
        axis=1
    )

# Losses
neg_log = -np.log(correct_confidences)

average_loss = np.mean(neg_log)

print(average_loss)


0.38506088005216804


$$
y=log(0) \implies e^y = 0 \implies undefined

In [25]:
import numpy as np
-np.log(0)

  -np.log(0)


inf

In [26]:
-np.log(1)


-0.0

In [29]:
# since the log of 0 is negative infinity, we need to add a small value to prevent the error
-np.log(1e-7)


16.11809565095832

In [30]:
-np.log(1+1e-7)


-9.999999505838704e-08

In [31]:
-np.log(1-1e-7)


1.0000000494736474e-07

# Loss Function

In [33]:
class Loss:
    def calculate(self, output, y):
        sample_losses = self.forward(output, y)
        data_loss = np.mean(sample_losses)
        return data_loss

In [47]:
# Cross-entropy loss
class Loss_CategoricalCrossentropy(Loss):
    def forward(self, y_pred, y_true):
        # Number of samples in a batch
        samples = len(y_pred)
        print(y_pred)

        # Clip data to prevent division by 0
        # Clip both sides to not drag mean towards any value
        y_pred_clipped = np.clip(y_pred, 1e-7, 1-1e-7)
        print(y_pred_clipped)

        # categorical labels
        if len(y_true.shape) == 1:
            correct_confidences = y_pred_clipped[range(samples), y_true]
        # one-hot encoded labels
        elif len(y_true.shape) == 2:
            correct_confidences = np.sum(y_pred_clipped * y_true, axis=1)

        print(correct_confidences)

        # Losses
        negative_log_likelihoods = -np.log(correct_confidences)
        print(negative_log_likelihoods)
        return negative_log_likelihoods

In [48]:
softmax_outputs = np.array([[1.0, 0, 0],
                            [0.1, 0.5, 0.4],
                            [0.02, 0.9, 0.08]])

class_targets = np.array([0, 1, 1])

loss_function = Loss_CategoricalCrossentropy()
loss = loss_function.calculate(softmax_outputs, class_targets)
print(loss)

[[1.   0.   0.  ]
 [0.1  0.5  0.4 ]
 [0.02 0.9  0.08]]
[[9.999999e-01 1.000000e-07 1.000000e-07]
 [1.000000e-01 5.000000e-01 4.000000e-01]
 [2.000000e-02 9.000000e-01 8.000000e-02]]
[0.9999999 0.5       0.9      ]
[1.00000005e-07 6.93147181e-01 1.05360516e-01]
0.2661692654059255


# Full Code

In [50]:
import numpy as np
import nnfs
from nnfs.datasets import spiral_data

nnfs.init()


# Dense layer
class Layer_Dense:

    # Layer initialization
    def __init__(self, n_inputs, n_neurons):
        # Initialize weights and biases
        self.weights = 0.01 * np.random.randn(n_inputs, n_neurons)
        self.biases = np.zeros((1, n_neurons))

    # Forward pass
    def forward(self, inputs):
        # Calculate output values from inputs, weights and biases
        self.output = np.dot(inputs, self.weights) + self.biases


# ReLU activation
class Activation_ReLU:

    # Forward pass
    def forward(self, inputs):
        # Calculate output values from inputs
        self.output = np.maximum(0, inputs)



# Softmax activation
class Activation_Softmax:

    # Forward pass
    def forward(self, inputs):

        # Get unnormalized probabilities
        exp_values = np.exp(inputs - np.max(inputs, axis=1,
                                            keepdims=True))
        # Normalize them for each sample
        probabilities = exp_values / np.sum(exp_values, axis=1,
                                            keepdims=True)

        self.output = probabilities


# Common loss class
class Loss:

    # Calculates the data and regularization losses
    # given model output and ground truth values
    def calculate(self, output, y):

        # Calculate sample losses
        sample_losses = self.forward(output, y)

        # Calculate mean loss
        data_loss = np.mean(sample_losses)

        # Return loss
        return data_loss


# Cross-entropy loss
class Loss_CategoricalCrossentropy(Loss):

    # Forward pass
    def forward(self, y_pred, y_true):

        # Number of samples in a batch
        samples = len(y_pred)

        # Clip data to prevent division by 0
        # Clip both sides to not drag mean towards any value
        y_pred_clipped = np.clip(y_pred, 1e-7, 1 - 1e-7)


        # Probabilities for target values -
        # only if categorical labels
        if len(y_true.shape) == 1:
            correct_confidences = y_pred_clipped[
                range(samples),
                y_true
            ]

        # Mask values - only for one-hot encoded labels
        elif len(y_true.shape) == 2:
            correct_confidences = np.sum(
                y_pred_clipped*y_true,
                axis=1
            )

        # Losses
        negative_log_likelihoods = -np.log(correct_confidences)
        return negative_log_likelihoods



# Create dataset
X, y = spiral_data(samples=100, classes=3)

# Create Dense layer with 2 input features and 3 output values
dense1 = Layer_Dense(2, 3)

# Create ReLU activation (to be used with Dense layer):
activation1 = Activation_ReLU()

# Create second Dense layer with 3 input features (as we take output
# of previous layer here) and 3 output values
dense2 = Layer_Dense(3, 3)

# Create Softmax activation (to be used with Dense layer):
activation2 = Activation_Softmax()

# Create loss function
loss_function = Loss_CategoricalCrossentropy()

# Perform a forward pass of our training data through this layer
dense1.forward(X)

# Perform a forward pass through activation function
# it takes the output of first dense layer here
activation1.forward(dense1.output)


# Perform a forward pass through second Dense layer
# it takes outputs of activation function of first layer as inputs
dense2.forward(activation1.output)

# Perform a forward pass through activation function
# it takes the output of second dense layer here
activation2.forward(dense2.output)

# Let's see output of the first few samples:
print(activation2.output[:5])

# Perform a forward pass through loss function
# it takes the output of second dense layer here and returns loss
loss = loss_function.calculate(activation2.output, y)

# Print loss value
print('loss:', loss)

# Calculate accuracy from output of activation2 and targets
# calculate values along first axis
predictions = np.argmax(activation2.output, axis=1)

if len(y.shape) == 2:
    y = np.argmax(y, axis=1)

accuracy = np.mean(predictions==y)

# Print accuracy
print('acc:', accuracy)


[[0.33333334 0.33333334 0.33333334]
 [0.3333332  0.3333332  0.33333364]
 [0.3333329  0.33333293 0.3333342 ]
 [0.3333326  0.33333263 0.33333477]
 [0.33333233 0.3333324  0.33333528]]
loss: 1.0986104
acc: 0.34


# Accuracy Calculation

In [49]:
import numpy as np

# Probabilities of 3 samples
softmax_outputs = np.array([[0.7, 0.2, 0.1],
                            [0.5, 0.1, 0.4],
                            [0.02, 0.9, 0.08]])

# Target (ground-truth) labels for 3 samples
class_targets = np.array([0, 1, 1])

# Calculate values along second axis (axis of index 1)
predictions = np.argmax(softmax_outputs, axis=1)
print(predictions)

# If targets are one-hot encoded - convert them to single values
if len(class_targets.shape) == 2:
    class_targets = np.argmax(class_targets, axis=1)

# True evaluates to 1; False to 0
accuracy = np.mean(predictions==class_targets)


print('acc:', accuracy)


[0 0 1]
acc: 0.6666666666666666
