[[Neural Networks from Scratch]]

##### Computing the Categorical Cross-Entropy Loss on a batch of Softmax outputs

In [None]:
softmax_outputs = np.array([[0.7, 0.1, 0.2],
							[0.1, 0.5, 0.4],
							[0.02, 0.9, 0.08]])

class_targets = [0, 1, 1]

'''
Hard coding the range of however long our values are

print(softmax_outputs[[0, 1, 2], [0, 1, 1]])
'''

neg_log = -np.log(softmax_outputs[range(len(softmax_outputs)), class_targets])

average_loss = np.mean(neg_log)
print(average_loss)


The negative log of 0 is infinity so our average loss becomes infinity.

In order to prevent this infinity problem, we need to clip 1e-7 from 0.

##### What does clipping do?
Clipping replaces predicted probabilities less than 1 x 10^-7 with 1 x 10^-7, and those greater than 1-1x10^-7 with 1-1x10^-7. This avoids feeding zero or one directly into the logarithm, which causes infinite or undefined loss values. 

##### Implementing the Loss and Categorical Cross-Entropy Loss (Inherited from Loss) Classes

In [None]:
nnfs.init()

class Layer_Dense:
	def __init__(self, n_inputs, n_neurons):
		self.weights = 0.10 * np.random.randn(n_inputs, n_neurons)
		self.biases = np.zeros((1, n_neurons))
	def forward(self, inputs):
		self.output = np.dot(inputs, self.weights) + self.biases

class Activation_ReLU:
	def forward(self, inputs):
		self.output = np.maximum(0, inputs)

class Activation_Softmax:
	def forward(self,inputs):
		exp_values = np.exp(inputs - np.max(inputs, axis=1, keepdims=True))
		probabilities = exp_values / np.sum(exp_values, axis=1, keepdims=True)
		self.output = probabilities

class Loss:
	def calculate(self, output, y):
		sample_losses = self.forward(output, y)
		data_loss = np.mean(sample_losses)
		return data_loss

class Loss_CategoricalCrossentropy(Loss):
	def forward(self, y_pred, y_true):
		samples = len(y_pred)
		epsilon = 1e-7
		y_pred_clipped = np.clip(y_pred, epsilon, 1-epsilon)
		
		# When y_true is a vector of scalar class values. E.g [0,1,1]
		if len(y_true.shape) == 1:
			correct_confidences = y_pred_clipped[range(samples), y_true]
		
		# When y_true is an array of vectors (one-hot coding) E.g [[1,0,0], [0,1,0], [0,0,1]]
		elif len(y_true.shape) == 2:
			correct_confidences = np.sum(y_pred_clipped*y_true, axis=1)
		negative_log_likelihoods = -np.log(correct_confidences)
		return negative_log_likelihoods


# 3 classes associated with the three-armed spiral in the dataset. (Each coordinate must fit to one of the 3 arms)
X, y = spiral_data(samples=100, classes=3)
# There must be 2 inputs as the only 2 features are X and y coordinates for this data
dense1 = Layer_Dense(2, 3)
activation1 = Activation_ReLU()

dense2 = Layer_Dense(3, 3)
activation2 = Activation_Softmax()

dense1.forward(X)
activation1.forward(dense1.output)

dense2.forward(activation1.output)
activation2.forward(dense2.output)

print(activation2.output[:5])

loss_function = Loss_CategoricalCrossentropy()
loss = loss_function.calculate(activation2.output, y)

print("Loss:", loss)


Given the targets and Softmax outputs, we can easily calculate accuracy for classification tasks by taking the mean of the equivalence checks of these predictions `(np.mean(predictions == class_targets))` 

##### Calculating the accuracy

In [None]:
softmax_outputs = np.array([[0.7, 0.2, 0.1],
							[0.5, 0.1, 0.4],
							[0.02, 0.9, 0.08]])

class_targets = [0, 1, 1]

# Get the index of the maximum value of the array
predictions = np.argmax(softmax_outputs, axis=1)
print(f"predictions: {predictions}")
print(f"class targets: {class_targets}")

accuracy = np.mean(predictions == class_targets)

print('acc:', accuracy)


Our goal now is to decrease this loss