In [2]:
import numpy as np
import nnfs # package by the book's author to generate random data
from nnfs.datasets import spiral_data
import matplotlib.pyplot as plt
import math

nnfs.init()

# Full network with categorical cross entropy loss

In [189]:
# Dense layer

class Layer_Dense():
  
  # layer initialization
  def __init__(self, n_inputs, n_neurons):
      self.weights = 0.01 * np.random.rand(n_inputs, n_neurons)
      self.biases = np.zeros((1, n_neurons))
      
      
      
  # forward pass
  def forward(self, inputs):
    # calculate output values from inputs, weights and biases
    self.output = np.dot(inputs, self.weights) + self.biases
    
    
  
# ReLU activation

class Activation_Relu():
   
   # forward pass
   def forward(self, inputs):
     # calculate output values from inputs
     self.output = np.maximum(0,inputs)
    
    
    
# Softmax activation

class Activation_Softmax():
  
  # forward pass
  def forward(self, inputs):
    
    # get unnormalized probabilities
    exp_values = np.exp(inputs - np.max(inputs, axis = 1, keepdims = True))
    
    # normalize probabilities
    probabilities = exp_values / np.sum(exp_values, axis = 1, keepdims = True)
    
    self.output = probabilities
    
    

# Common loss class

class Loss():
  
  # calculates the data and regularization losses
  # given model output and ground truth values
  
  def calculate(self, output, y):
    # calculate sample loss
    sample_losses = self.forward(output, y)
    
    # calculate mean loss
    data_loss = np.mean(sample_losses)
    
    # return loss
    return data_loss
  

  
  
# Cross-entropy loss

class Loss_CategoricalCrossentropy(Loss):
  
  # forward pass
  def forward(self, y_pred, y_true):
    
    # number of samples in a batch
    samples = len(y_pred)
    
    # clip data to prevent division by 0
    # clip both sides to not drag the mean towards any value
    y_pred_clipped = np.clip(y_pred, 1e-7, 1 - 1e-7)
    
    
    # probabilites for target values
    # only if categorical labels
    
    if len(y_true.shape) == 1:
      correct_confidences = y_pred_clipped[range(samples), y_true]
      
    # mask values - only for one hot encoded labels
    elif len(y_true.shape) == 2:
      correct_confidences = np.sum(y_pred_clipped * y_true, axis = 1)
    
    
    # Losses
    negative_log_likelihoods = -np.log(correct_confidences)
    return negative_log_likelihoods

In [190]:
# create dataset
X, y = spiral_data(samples = 100, classes = 3)

In [281]:
# create dense layer with 2 input features and 3 output values
dense1 = Layer_Dense(2, 3)

# create ReLU activation which will be used with Dense layer
activation1 = Activation_Relu()

# create second dense layer with 3 input features from the previous layer and 3 output values
dense2 = Layer_Dense(3,3)

# create softmax activation which will be used with dense layer
activation2 = Activation_Softmax()

# create a loss function
loss_function = Loss_CategoricalCrossentropy()

In [286]:
dense1.weights.shape

(2, 3)

In [282]:
# perform the first pass of input data to the dense layer
dense1.forward(X)
print(dense1.output[:5])

[[0.0000000e+00 0.0000000e+00 0.0000000e+00]
 [8.0659374e-05 4.3710388e-05 6.5012209e-05]
 [1.5923499e-04 6.9124777e-05 1.0470775e-04]
 [2.3033096e-04 1.9152602e-04 2.7749798e-04]
 [1.9318146e-04 3.1980115e-04 4.5189835e-04]]


In [82]:
# perform pass through the first activation function which takes the inputs from above - the layer before
activation1.forward(dense1.output)
print(activation1.output[:5])

[[0.0000000e+00 0.0000000e+00 0.0000000e+00]
 [2.6297315e-05 9.2926544e-05 5.1812236e-05]
 [0.0000000e+00 1.4744070e-04 0.0000000e+00]
 [2.3272418e-04 6.6075481e-05 3.0338956e-04]
 [0.0000000e+00 3.4931320e-04 0.0000000e+00]]


In [83]:
# perfrom the second pass to the dense layer which input is the output from the activation function
dense2.forward(activation1.output)
print(dense2.output[:5])

[[0.0000000e+00 0.0000000e+00 0.0000000e+00]
 [1.0993491e-06 6.8928142e-07 4.6805695e-07]
 [1.1044792e-06 7.8488006e-07 1.6948626e-07]
 [3.4528434e-06 1.4912927e-06 2.3913783e-06]
 [2.6167072e-06 1.8595201e-06 4.0154302e-07]]


In [84]:
# perform second activation function which takes inputs from the second dense layer
activation2.forward(dense2.output)
print(activation2.output[:5])

[[0.33333334 0.33333334 0.33333334]
 [0.33333343 0.3333333  0.33333325]
 [0.33333346 0.33333334 0.33333316]
 [0.33333367 0.33333302 0.33333334]
 [0.33333364 0.3333334  0.33333293]]


In [85]:
# perfrom a forward pass through loss function it takes the output of a second dense layer and returns loss
loss = loss_function.calculate(activation2.output, y)
print(f'Loss: {loss}')

Loss: 1.0986117124557495
