## Categorical Cross Entropy

In [1]:
import math
import numpy as np

In [2]:
# An example output from the output layer of the neural network
softmax_output = [0.7, 0.1, 0.2]
# Ground truth
target_output = [1, 0, 0]
loss = -(math.log(softmax_output[0])*target_output[0] +
         math.log(softmax_output[1])*target_output[1] +
         math.log(softmax_output[2])*target_output[2])
print(loss)

0.35667494393873245


In [3]:
softmax_outputs = np.array([[0.7, 0.1, 0.2],
                   [0.1, 0.5, 0.4],
                   [0.02, 0.9, 0.08]])
class_targets = [0, 1, 1] # dog, cat, cat
# print(softmax_outputs[[0, 1, 2], class_targets])

print(-np.log(softmax_outputs[
      range(len(softmax_outputs)), class_targets]))


[0.35667494 0.69314718 0.10536052]


## merging everything

In [4]:
import numpy as np

In [5]:
class Layer_Dense:
# Layer initialization
    def __init__(self, n_inputs, n_neurons):
# Initialize weights and biases
        self.weights = 0.01 * np.random.randn(n_inputs, n_neurons)
        self.biases = np.zeros((1, n_neurons))
# Forward pass
    def forward(self, inputs):
# Calculate output values from inputs, weights and biases
        self.output = np.dot(inputs, self.weights) + self.biases


In [6]:
class Activation_ReLU:
# Forward pass
    def forward(self, inputs):
# Calculate output values from inputs
        self.output = np.maximum(0, inputs)


In [7]:
# Softmax activation
class Activation_Softmax:
# Forward pass
    def forward(self, inputs):
# Get unnormalized probabilities
        exp_values = np.exp(inputs - np.max(inputs, axis=1,keepdims=True))
# Normalize them for each sample
        probabilities = exp_values / np.sum(exp_values, axis=1,keepdims=True)
        self.output = probabilities

In [8]:
class Loss:
# Calculates the data and regularization losses
# given model output and ground truth values
    def calculate(self, output, y):
# Calculate sample losses
        sample_losses = self.forward(output, y)
# Calculate mean loss
        data_loss = np.mean(sample_losses)
# Return loss
        return data_loss


In [9]:
class Loss_CategoricalCrossentropy(Loss):
# Forward pass
    def forward(self, y_pred, y_true):
# Number of samples in a batch
        samples = len(y_pred)
# Clip data to prevent division by 0
# Clip both sides to not drag mean towards any value
        y_pred_clipped = np.clip(y_pred, 1e-7, 1 - 1e-7)
        if len(y_true.shape) == 1:
            correct_confidences = y_pred_clipped[range(samples),y_true]
# Mask values - only for one-hot encoded labels
        elif len(y_true.shape) == 2:
            correct_confidences = np.sum(y_pred_clipped * y_true,axis=1)
# Losses
        negative_log_likelihoods = -np.log(correct_confidences)
        return negative_log_likelihoods


In [10]:
# we will use data of spiral data
def create_data(samples, classes):
    X = np.zeros((samples*classes, 2))
    y = np.zeros(samples*classes, dtype='uint8')
    for class_number in range(classes):
        ix = range(samples*class_number, samples*(class_number+1))
        r = np.linspace(0.0, 1, samples)
        t = np.linspace(class_number*4, (class_number+1)*4, samples) + np.random.randn(samples)*0.2
        X[ix] = np.c_[r*np.sin(t*2.5), r*np.cos(t*2.5)]
        y[ix] = class_number
    return X, y

In [11]:
X, y = create_data(samples=100, classes=3)
# Create Dense layer with 2 input features and 3 output values
dense1 = Layer_Dense(2, 3)
# Create ReLU activation (to be used with Dense layer):
activation1 = Activation_ReLU()
# Create second Dense layer with 3 input features (as we take output
# of previous layer here) and 3 output values
dense2 = Layer_Dense(3, 3)
# Create Softmax activation (to be used with Dense layer):
activation2 = Activation_Softmax()
# Create loss function
loss_function = Loss_CategoricalCrossentropy()
# Perform a forward pass of our training data through this layer
dense1.forward(X)
# Perform a forward pass through activation function
# it takes the output of first dense layer here
activation1.forward(dense1.output)

# Perform a forward pass through second Dense layer
# it takes outputs of activation function of first layer as inputs
dense2.forward(activation1.output)
# Perform a forward pass through activation function
# it takes the output of second dense layer here
activation2.forward(dense2.output)
# Let's see output of the first few samples:
print(activation2.output[:5])
# Perform a forward pass through activation function
# it takes the output of second dense layer here and returns loss
loss = loss_function.calculate(activation2.output, y)
# Print loss value
print('loss:', loss)


[[0.33333333 0.33333333 0.33333333]
 [0.33333341 0.33333309 0.3333335 ]
 [0.33333312 0.33333305 0.33333384]
 [0.33333258 0.33333318 0.33333425]
 [0.33333242 0.33333304 0.33333454]]
loss: 1.0986181849640213
