# Building Neural Network from Scratch Part - 7

### Calculating Network Error With Loss

In [1]:
# Importing Numpy
import numpy as np
from nnfs.datasets import spiral_data
import nnfs
nnfs.init()

### Case - 1: When The Class Targets are Just Numbers

[0, 1, 1] means [Red, Green, Green]

In [2]:
# A 2D NumPy array containing softmax output probabilities
softmax_outputs = np.array([
    [0.7, 0.1, 0.2],    # Sample 0: Class 0 has highest probability
    [0.1, 0.5, 0.4],    # Sample 1: Class 1 is the target
    [0.02, 0.9, 0.08]   # Sample 2: Class 1 has highest probability
])

# Actual class labels (targets) for each sample
class_targets = np.array([0, 1, 1])
# print(len(class_targets.shape))    # 1

# softmax_outputs[[0, 1, 2], class_targets] grabs:
# - softmax_outputs[0, 0]
# - softmax_outputs[1, 1]
# - softmax_outputs[2, 1]
print(softmax_outputs[[0, 1, 2], class_targets])

[0.7 0.5 0.9]


In [3]:
# Extract the probability values corresponding to the correct classes
correct_confidences = softmax_outputs[range(len(softmax_outputs)), class_targets]

# Apply the negative log to get the cross-entropy loss for each sample
neg_log = -np.log(correct_confidences)
print(neg_log)

# Calculate the average loss across all samples (mean cross-entropy loss)
average_loss = np.mean(neg_log)
print(average_loss)

[0.35667494 0.69314718 0.10536052]
0.38506088005216804


### Case - 2: When The Class Targets are One Hot Encoded

In [4]:
true_y_value = np.array([
    [1, 0, 0],
    [0, 1, 0],
    [0, 1, 0]
])
# print(len(true_y_value.shape))    # 2

y_pred_clipped_value = np.array([
    [0.7, 0.2, 0.1],
    [0.1, 0.5, 0.4],
    [0.02, 0.9, 0.08]
])


# Extract the probability values corresponding to the correct classes
correct_confidences = np.sum(true_y_value * y_pred_clipped_value, axis = 1)
neg_log = -np.log(correct_confidences)
print(neg_log)

# Calculate the average loss across all samples (mean cross-entropy loss)
average_loss = np.mean(neg_log)
print(average_loss)

[0.35667494 0.69314718 0.10536052]
0.38506088005216804


### Implementing The Categorical Cross Entropy Class

In [5]:
# Base class for all loss functions
class Loss:
    
    # Calculates the mean loss over a batch of samples
    def calculate(self, output, y):
        
        # Perform the forward pass (implemented in derived class, e.g., CategoricalCrossEntropy)
        # This returns individual loss values for each sample in the batch
        sample_losses = self.forward(output, y)

        # Compute the average loss across all samples
        data_loss = np.mean(sample_losses)

        # Return the scalar average loss
        return data_loss


In [6]:
# Cross-entropy loss for classification tasks
# Inherits from a base Loss class
class Loss_CategoricalCrossEntropy(Loss):
    
    # Forward pass to compute the loss for each sample
    def forward(self, y_pred, y_true):
        
        # Get the number of samples in the batch
        no_samples = len(y_pred)

        # Clip predicted values to avoid log(0), which is undefined
        # Clipping prevents extremely small or large values that could cause numerical issues
        y_pred_clipped = np.clip(y_pred, 1e-7, 1 - 1e-7)

        # Case 1: Labels are integer class indices (e.g., [0, 2, 1])
        if len(y_true.shape) == 1:
            # Extract the confidence (probability) for the correct class for each sample
            # For each sample index `i`, get y_pred_clipped[i, y_true[i]]
            correct_confidences = y_pred_clipped[range(no_samples), y_true]

        # Case 2: Labels are one-hot encoded (e.g., [[1, 0, 0], [0, 1, 0]])
        elif len(y_true.shape) == 2:
            # Element-wise multiplication of predicted probs and true labels
            # Since only the correct class has a 1, this effectively selects the correct class confidence
            correct_confidences = np.sum(y_pred_clipped * y_true, axis = 1)

        # Compute the negative log of the correct class probabilities
        # This gives the cross-entropy loss for each sample
        negative_log_likelihoods = -np.log(correct_confidences)

        # Return the loss per sample (not averaged)
        return negative_log_likelihoods

In [7]:
# Softmax output predictions from a neural network
# Each row corresponds to the predicted probability distribution for one sample
softmax_outputs = np.array([
    [0.7, 0.1, 0.2],    # Sample 0: most confident in class 0
    [0.1, 0.5, 0.4],    # Sample 1: most confident in class 1
    [0.02, 0.9, 0.08]   # Sample 2: most confident in class 1
])

# True labels in one-hot encoded format
# Each row has '1' at the correct class index
class_targets = np.array([
    [1, 0, 0],  # Sample 0: true class is 0
    [0, 1, 0],  # Sample 1: true class is 1
    [0, 1, 0]   # Sample 2: true class is 1
])

# Create an instance of the cross-entropy loss function
loss_function = Loss_CategoricalCrossEntropy()

# Calculate the average loss across all samples
# This calls .forward() internally and returns a scalar
total_loss = loss_function.calculate(softmax_outputs, class_targets)

# Print the final averaged cross-entropy loss
print(total_loss)

0.38506088005216804


### Full Code Upto This Point

In [8]:
class Layer_Dense:
    # Layer initialization
    def __init__(self, n_inputs, n_neurons):
        # Initialize weights and biases
        self.weights = 0.01 * np.random.randn(n_inputs, n_neurons)
        self.biases = np.zeros((1, n_neurons))

    # Forward pass
    def forward(self, batch_inputs):
        # Calculate the output values from inputs, weights and biases
        self.output = np.dot(batch_inputs, self.weights) + self.biases

In [9]:
# ReLU activation
class Activation_ReLU:
    def forward(self, inputs):
        # Calculate the output value from input
        self.output = np.maximum(0, inputs)

In [10]:
# Softmax activation
class Activation_Softmax:
    def forward(self, inputs):
        exp_values = np.exp(inputs - np.max(inputs, axis = 1, keepdims = True))
        probabilities = exp_values / np.sum(exp_values, axis = 1, keepdims = True)
        
        # Assigning the probability values into output
        self.output = probabilities

In [11]:
# Create Dataset
X, Y = spiral_data(samples = 100, classes = 3)

# Create Dense Layer with 2 input features and 3 output values
dense_1 = Layer_Dense(2, 3)
# Create ReLU activation (to be used with Dese layer)
activation_1 = Activation_ReLU()

# Create a second Dense layer with 3 input features (as we take output of previous layer here) and 3 output values
dense_2 = Layer_Dense(3, 3)
# Create Softmax activation (to be used with Dese layer)
activation_2 = Activation_Softmax()

# Create a loss function
loss_function = Loss_CategoricalCrossEntropy()


# Make a forward pass of our training data through this layer
dense_1.forward(X)
# Make a forward pass through activation function, it takes the output of first dense layer
activation_1.forward(dense_1.output)
# Make a forward pass through second dense layer, it takes outputs of activation function of first layer as inputs
dense_2.forward(activation_1.output)
# Make a forward pass through activation function, it takes the output of second dense layer
activation_2.forward(dense_2.output)

# Let's see the output of the first few samples
# print(activation_2.output[: 5])

# Perform a forward pass through activation function, it takes the output of second dense layer here and returns loss
loss = loss_function.calculate(activation_2.output, Y)
# Print the loss value
print("Loss: ", loss)

# Calculate accuracy from output of activation_2 and targets, calculate values along first axis
predictions = np.argmax(activation_2.output, axis = 1)
if len(Y.shape) == 2:
    Y = np.argmax(Y, axis = 1)

accuracy = np.mean(predictions == Y)

# Print accuracy
print("Accuracy: ", accuracy)

Loss:  1.0986104
Accuracy:  0.34


### Introducing Accuracy

In [12]:
# Probabilities of 3 samples
softmax_outputs = np.array([
    [0.7, 0.2, 0.1],
    [0.1, 0.5, 0.4],
    [0.02, 0.9, 0.08]
])

# Target (ground-truth) labels for 3 samples
class_targets = np.array([0, 1, 1])

# Calculate values along second axis (axis of index 1)
predictions = np.argmax(softmax_outputs, axis = 1)
# If targets are one-hot encoded - convert them
if len(class_targets.shape) == 2:
    class_targets = np.argmax(class_targets, axis = 1)

# True evaluates to 1; False to 0
accuracy = np.mean(predictions == class_targets)

# Print accuracy
print("Accuracy: ", accuracy)

Accuracy:  1.0
