In [1]:
!pip install nnfs

In [6]:
import sys
import numpy as np
import matplotlib.pyplot as plt
import nnfs

In [None]:
# neuronA
# basic concept of an input/output neurons connection

inputs = [1, 2, 3] #unique input from three neurons from prev. layer
weights = [0.2, 0.8,-0.5] #weights per node
bias = 2

# first step to add up all inputs x weights + bias

output = inputs[0]*weights[0] + inputs[1]*weights[1] +inputs[2]*weights[2] + bias
output

In [None]:
# neuronB
# basic concept of an input/output neurons connection

inputs = [1, 2, 3, 2.5] #unique input from three neurons (e.g. from prev. layer)

weights1 = [0.2, 0.8,-0.5, 1.0]
weights2 = [0.5, -0.91, 0.26, -0.5]
weights3 = [-0.26, -0.27, 0.17, 0.87]#weights per node

bias1 = 2
bias2 = 3
bias3 = 0.5

# as before add up all inputs x weights + bias. 
# However this time, each node will have its own unique weights and bias
# inputs remain the same as nodes do not change

output = [inputs[0]*weights1[0] + inputs[1]*weights1[1] +inputs[2]*weights1[2] +inputs[3]*weights1[3]+ bias1,
         inputs[0]*weights2[0] + inputs[1]*weights2[1] +inputs[2]*weights2[2] +inputs[3]*weights2[3]+ bias2,
         inputs[0]*weights3[0] + inputs[1]*weights3[1] +inputs[2]*weights3[2] +inputs[3]*weights3[3]+ bias3]
output

In [None]:
# neuronB simplified via lists & loops

inputs = [1, 2, 3, 2.5] #unique input from three neurons (e.g. from prev. layer)

weights = [[0.2, 0.8,-0.5, 1.0],
           [0.5, -0.91, 0.26, -0.5],
           [-0.26, -0.27, 0.17, 0.87]]

biases = [2, 3, 0.5]


layer_outputs = [] # output of current layer

for neuron_weights, neuron_bias in zip(weights, biases):
    
    neuron_output = 0 # output of given neuron
    
    for n_input, weight in zip(inputs, neuron_weights):
        neuron_output += n_input*weight
    
    neuron_output += neuron_bias
    layer_outputs.append(neuron_output)
    
layer_outputs

In [None]:
# neuronB simplified via numpy's dot product 

inputs = [1, 2, 3, 2.5] #unique input from three neurons (e.g. from prev. layer)

weights = [[0.2, 0.8,-0.5, 1.0],
           [0.5, -0.91, 0.26, -0.5],
           [-0.26, -0.27, 0.17, 0.87]]

biases = [2, 3, 0.5]


output = np.dot(weights, inputs) + biases
output

In [None]:
# adding an additional layer (of weights and biases) to neuronB 

inputs = [[1, 2, 3, 2.5],
          [2.0, 5.0, -1.0, 2.0],
          [-1.5, 2.7, 3.3, -0.8]]

#first layer 
weights1 = [[0.2, 0.8,-0.5, 1.0],
           [0.5, -0.91, 0.26, -0.5],
           [-0.26, -0.27, 0.17, 0.87]]

biases1 = [2, 3, 0.5]

#second layer
weights2 = [[0.1, -0.14,0.5],
           [-0.5, 0.12, -0.33],
           [-0.44, 0.73, -0.13]]

biases2 = [-1, 2, -0.5]


layer1_outputs = np.dot(inputs, np.array(weights).T) + biases

#input of layer2 taken from output of layer1
layer2_outputs = np.dot(layer1_outputs, np.array(weights2).T) + biases2

layer2_outputs

In [None]:
# Multilayer neural network now converted to objects to enable simple build-up

#input becomes X dataset (3 samples), will not change
X = [[1, 2, 3, 2.5],
     [2.0, 5.0, -1.0, 2.0],
     [-1.5, 2.7, 3.3, -0.8]]

np.random.seed(0)

#when loading a saved model, essentially just loading pre-defined weights and biases
#here we will initialise the weights and biases directly
class Layer_Dense:
    def __init__(self,n_inputs, n_neurons):
        self.weights = 0.10 * np.random.randn(n_inputs,n_neurons)
        self.biases = np.zeros((1, n_neurons))
    def forward(self, inputs):
        self.output = np.dot(inputs, self.weights) + self.biases

layer1 = Layer_Dense(4,5)
layer2 = Layer_Dense(5,2)

layer1.forward(X)
print(layer1.output)
layer2.forward(layer1.output)
print(layer2.output)

In [None]:
# Activation Functions in long code

inputs = [0, 2, -1, 3.3,-2.7, 1.2, 2.2, -100]
output = [] 

# for i in inputs:
#     if i > 0:
#         output.append(i)
#     elif i <= 0:
#         output.append(0)
        
# or ...

for i in inputs:
    output.append(max(0,i))
               
output

In [None]:
# Adding ReLU to my neural network framework
from nnfs.datasets import spiral_data

# replaces random seed and sets default datatype for numpy
nnfs.init()

#generates 100 feature sets of three classes
X, y = spiral_data(100,3)

class Layer_Dense:
    ''' defining the weights/biases and running multilayered CNN'''
    def __init__(self,n_inputs, n_neurons):
        self.weights = 0.10 * np.random.randn(n_inputs,n_neurons)
        self.biases = np.zeros((1, n_neurons))
    def forward(self, inputs):
        self.output = np.dot(inputs, self.weights) + self.biases

class Activation_ReLU:
    ''' defining activation function to run on each node. All negatives become zero'''
    def forward(self, inputs):
        self.output = np.maximum(0, inputs)

#spiral dataset just x,y coords so inputs "2"        
layer1 = Layer_Dense(2,5)
activation1 = Activation_ReLU()

layer1.forward(X)
activation1.forward(layer1.output)
activation1.output

In [None]:
# basics to Softmax Function of output layer in long code
import math

layer_outputs = [4.8, 1.21, 2.385]

E = math.e

exp_values = []

for output in layer_outputs:
    exp_values.append(E**output)
    
#now we normalize to get probability as value

norm_base = sum(exp_values)
norm_values = []

for value in exp_values:
    norm_values.append( value / norm_base )
    
norm_values, sum(norm_values)


In [None]:
# Converting above softmax function into numpy functions

layer_outputs = [4.8, 1.21, 2.385]
E = math.e

exp_values = np.exp(layer_outputs)
norm_values = exp_values / np.sum(exp_values)
    
norm_values, sum(norm_values)


In [None]:
# Creating softmax function to run on batches of data (not just single output vector)

layer_outputs = [[4.8, 1.21, 2.385],
                 [8.9, -1.81, 0.2],
                 [1.41, 1.051, 0.026]]

#numpy automatically runs iteratively on all elements
exp_values = np.exp(layer_outputs)

#axis adds info on what layer to act on, and not just sum all as single value
#keepdims allows matrix to keep same dimension when summing
np.sum(layer_outputs, axis=1, keepdims=True)

norm_values = exp_values / np.sum(exp_values, axis=1, keepdims=True)



In [None]:
# Adding softmax activation output layer to our CNN
from nnfs.datasets import spiral_data

nnfs.init()

class Layer_Dense:
    ''' defining the weights/biases and running multilayered CNN'''
    def __init__(self,n_inputs, n_neurons):
        self.weights = 0.10 * np.random.randn(n_inputs,n_neurons)
        self.biases = np.zeros((1, n_neurons))
    def forward(self, inputs):
        self.output = np.dot(inputs, self.weights) + self.biases

class Activation_ReLU:
    ''' defining activation function to run on each node. All negatives become zero'''
    def forward(self, inputs):
        self.output = np.maximum(0, inputs)
        
class Activation_Softmax:
    ''' defining softmax function'''
    def forward(self, inputs):
        exp_values = np.exp(inputs - np.max(inputs, axis=1, keepdims=True))
        probabilities = exp_values / np.sum(exp_values, axis=1, keepdims=True)
        self.output = probabilities

#setting the data scene
X, y = spiral_data(samples=100, classes=3)

dense1 = Layer_Dense(2,3)
activation1 = Activation_ReLU()
#input should match output of dense1, which is 3
dense2 = Layer_Dense(3,3)
activation2 = Activation_Softmax()

#running our network on X
dense1.forward(X)
activation1.forward(dense1.output)

dense2.forward(activation1.output)
activation2.forward(dense2.output)

activation2.output[:5]


In [None]:
# Coding the categorical cross-entropy loss function without numpy

import math

softmax_output = [0.7, 0.1, 0.2]
target_output = [1,0,0]

# CCE just the sum of each -log of the softmax output of the target class
loss = -(math.log(softmax_output[0])* target_output[0] +
         math.log(softmax_output[1])* target_output[1] +
         math.log(softmax_output[2])* target_output[2]
        )

# or...since the calculated output of incorrect classes is zero
loss = -math.log(softmax_output[0])
loss1 = -math.log(0.7)

loss, loss1

In [None]:
# Coding the CCE as confidence of each prediction from a batch (without numpy)

'''
Classes: 0-->Dog, 1-->Cat, 2-->Human
class_targets in words [dog, cat, cat]
'''
#typically a batch of outputs as we run a batch of inputs
softmax_outputs = [[0.7, 0.1, 0.2], #<--dog, 0.7
                  [0.1, 0.5, 0.4],  #<--cat, 0.5
                  [0.02, 0.9, 0.08]]#<--cat, 0.9

class_targets = [0,1,1]

for target_index, distribution in zip(class_targets, softmax_outputs):
    print(distribution[target_index])
    
    
'''
the above loops through the lists as
(0,[0.7,0.1,0.2])
(1,[0.1,0.5,0.4])
(1,[0.02,0.9,0.08])

and takes the first element to use as the index for the resp. list
''' 

In [None]:
# Coding the CCE as confidence of each prediction WITH numpy

#converting softmax_outputs to numpy array enables us to perfrom same task in single line of code and call upon elements more easily
softmax_outputs = np.array([[0.7, 0.1, 0.2],
                            [0.1, 0.5, 0.4],
                            [0.02, 0.9, 0.08]])

class_targets = [0,1,1]

#returns an array of each of the losses from batch
neg_log = -np.log(softmax_outputs[range(len(softmax_outputs)), class_targets])

'''
the above code returns an array with each element calculated as below
-np.log(softmax_outputs[0,0])
-np.log(softmax_outputs[1,1])
-np.log(softmax_outputs[2,1])
'''

#average loss values gives an idea of how CNN performs as a whole. Can also sum losses too
average_loss = np.mean(neg_log)
average_loss

In [None]:
# Coding the CCE as confidence of each prediction WITH numpy

#the above code works but we will run into trouble when calc. log loss of zero (infinite)
#this will occur when the confidence of the correct class is zerp
# a single zero is technically correct (it is infinitely wrong!) but it will result in all losses/probs being infinite throughout the CNN after averaging and backprop

softmax_outputs = np.array([[0.7, 0.1, 0.2],
                            [0.1, 0.5, 0.4],
                            [0.02, 0.9, 0.08]])

class_targets = [0,1,1]

neg_log = -np.log(softmax_outputs[range(len(softmax_outputs)), class_targets])

#we can handle zero/infinity problems by clipping all predicted values by an insignificant value that is close to zero but not quite zero
#this handles problem of inifinite values without introducing a bias

y_pred_clip = np.clip(y_pred, 1e-7, 1 - 1e-7)

In [8]:
# Introducing the categorical class entropy 'loss function' to our CNN

from nnfs.datasets import spiral_data

nnfs.init()

class Layer_Dense:
    ''' defining the weights/biases and running multilayered CNN'''
    def __init__(self,n_inputs, n_neurons):
        self.weights = 0.10 * np.random.randn(n_inputs,n_neurons)
        self.biases = np.zeros((1, n_neurons))
    def forward(self, inputs):
        self.output = np.dot(inputs, self.weights) + self.biases

class Activation_ReLU:
    ''' defining activation function to run on each node. All negatives become zero'''
    def forward(self, inputs):
        self.output = np.maximum(0, inputs)
        
class Activation_Softmax:
    ''' defining softmax function'''
    def forward(self, inputs):
        exp_values = np.exp(inputs - np.max(inputs, axis=1, keepdims=True))
        probabilities = exp_values / np.sum(exp_values, axis=1, keepdims=True)
        self.output = probabilities
        
class Loss:
    ''' defining CCE loss function calculations'''
    def calculate(self, output, y):
        '''output will be output from model, y the intended target values'''
        sample_losses = self.forward(output,y)
        data_loss = np.mean(sample_losses)
        return data_loss

class Loss_CategoricalCrossentropy(Loss):
    '''
    CCE loss function inherited from base Loss class
    also handles both scaler, 1d array of [0,1], and OHC, 2d array of [[1,0],[0,1]], target classes as input
    see youtube sentdex 8/9 11:06 -  13:48 for further explanation
    '''
    def forward(self,y_pred, y_true): 
        '''y_pred model output, y_true target training class values'''
        samples = len(y_pred)
        y_pred_clip = np.clip(y_pred, 1e-7, 1-1e-7)
        
        if len(y_true.shape) ==1:
            ''' then scaler values placed as class'''
            correct_confidences = y_pred_clip[range(samples), y_true]
        
        elif len(y_true.shape) == 2:
            correct_confidences = np.sum(y_pred_clip * y_true, axis=1)
        
        neg_log_likelihoods = -np.log(correct_confidences)
        return neg_log_likelihoods
      
#setting the data scene
X, y = spiral_data(samples=100, classes=3)        
        
dense1 = Layer_Dense(2,3)
activation1 = Activation_ReLU()
#input should match output of dense1, which is 3
dense2 = Layer_Dense(3,3)
activation2 = Activation_Softmax()

#running our network on X
dense1.forward(X)
activation1.forward(dense1.output)

dense2.forward(activation1.output)
activation2.forward(dense2.output)

#print(activation2.output[:5])

loss_function = Loss_CategoricalCrossentropy()
loss = loss_function.calculate(activation2.output,y)

print(f"Loss: {loss}")        

In [10]:
X.shape, y.shape