In [1]:
import numpy as np
import nnfs # package by the book's author to generate random data
from nnfs.datasets import spiral_data
import matplotlib.pyplot as plt
import math

nnfs.init()

# Full network with categorical cross entropy loss

In [2]:
# Dense layer

class Layer_Dense():
  
  # layer initialization
  def __init__(self, n_inputs, n_neurons):
      self.weights = 0.01 * np.random.rand(n_inputs, n_neurons)
      self.biases = np.zeros((1, n_neurons))
      
      
      
  # forward pass
  def forward(self, inputs):
    # calculate output values from inputs, weights and biases
    self.output = np.dot(inputs, self.weights) + self.biases
    
    
  
# ReLU activation

class Activation_Relu():
   
   # forward pass
   def forward(self, inputs):
     # calculate output values from inputs
     self.output = np.maximum(0,inputs)
    
    
    
# Softmax activation

class Activation_Softmax():
  
  # forward pass
  def forward(self, inputs):
    
    # get unnormalized probabilities
    exp_values = np.exp(inputs - np.max(inputs, axis = 1, keepdims = True))
    
    # normalize probabilities
    probabilities = exp_values / np.sum(exp_values, axis = 1, keepdims = True)
    
    self.output = probabilities
    
    

# Common loss class

class Loss():
  
  # calculates the data and regularization losses
  # given model output and ground truth values
  
  def calculate(self, output, y):
    # calculate sample loss
    sample_losses = self.forward(output, y)
    
    # calculate mean loss
    data_loss = np.mean(sample_losses)
    
    # return loss
    return data_loss
  

  
  
# Cross-entropy loss

class Loss_CategoricalCrossentropy(Loss):
  
  # forward pass
  def forward(self, y_pred, y_true):
    
    # number of samples in a batch
    samples = len(y_pred)
    
    # clip data to prevent division by 0
    # clip both sides to not drag the mean towards any value
    y_pred_clipped = np.clip(y_pred, 1e-7, 1 - 1e-7)
    
    
    # probabilites for target values
    # only if categorical labels
    
    if len(y_true.shape) == 1:
      correct_confidences = y_pred_clipped[range(samples), y_true]
      
    # mask values - only for one hot encoded labels
    elif len(y_true.shape) == 2:
      correct_confidences = np.sum(y_pred_clipped * y_true, axis = 1)
    
    
    # Losses
    negative_log_likelihoods = -np.log(correct_confidences)
    return negative_log_likelihoods

In [3]:
# create dataset
X, y = spiral_data(samples = 100, classes = 3)

In [4]:
# create dense layer with 2 input features and 3 output values
dense1 = Layer_Dense(2, 3)

# create ReLU activation which will be used with Dense layer
activation1 = Activation_Relu()

# create second dense layer with 3 input features from the previous layer and 3 output values
dense2 = Layer_Dense(3,3)

# create softmax activation which will be used with dense layer
activation2 = Activation_Softmax()

# create a loss function
loss_function = Loss_CategoricalCrossentropy()

In [5]:
dense1.weights.shape

(2, 3)

In [6]:
# perform the first pass of input data to the dense layer
dense1.forward(X)
print(dense1.output[:5])

[[0.00000000e+00 0.00000000e+00 0.00000000e+00]
 [5.97437393e-05 3.68895635e-05 8.37819971e-05]
 [1.46999708e-04 9.15808050e-05 1.40212578e-04]
 [2.07372344e-04 1.30936343e-04 5.65641167e-05]
 [2.86790368e-04 1.80765084e-04 1.03854705e-04]]


In [7]:
# perform pass through the first activation function which takes the inputs from above - the layer before
activation1.forward(dense1.output)
print(activation1.output[:5])

[[0.00000000e+00 0.00000000e+00 0.00000000e+00]
 [5.97437393e-05 3.68895635e-05 8.37819971e-05]
 [1.46999708e-04 9.15808050e-05 1.40212578e-04]
 [2.07372344e-04 1.30936343e-04 5.65641167e-05]
 [2.86790368e-04 1.80765084e-04 1.03854705e-04]]


In [8]:
# perfrom the second pass to the dense layer which input is the output from the activation function
dense2.forward(activation1.output)
print(dense2.output[:5])

[[0.0000000e+00 0.0000000e+00 0.0000000e+00]
 [4.8766827e-07 1.3575345e-06 3.6410142e-07]
 [9.3824906e-07 2.7276390e-06 8.3170107e-07]
 [7.6309243e-07 2.5356903e-06 1.0358194e-06]
 [1.1570428e-06 3.7448992e-06 1.4574538e-06]]


In [9]:
# perform second activation function which takes inputs from the second dense layer
activation2.forward(dense2.output)
print(activation2.output[:5])

[[0.33333334 0.33333334 0.33333334]
 [0.33333325 0.33333355 0.33333322]
 [0.33333313 0.33333373 0.3333331 ]
 [0.3333331  0.3333337  0.33333322]
 [0.333333   0.33333385 0.3333331 ]]


In [10]:
# perfrom a forward pass through loss function it takes the output of a second dense layer and returns loss
loss = loss_function.calculate(activation2.output, y)
print(f'Loss: {loss}')

Loss: 1.0986113548278809


In [12]:
# Adding accuracy, which describes how often the largest confidence if the correct class in terms of a fraction
predictions = np.argmax(activation2.output, axis = 1)
if len(y.shape) == 2:
  y = np.argmax(y, axis = 1)
accuracy = np.mean(predictions == y)

print(f'Accuracy : {accuracy}')

Accuracy : 0.36
