In [1]:
import numpy as np
import nnfs
from nnfs.datasets import spiral_data

In [2]:
nnfs.init()

In [3]:
# Dense Layer
class Layer_Dense:

    # Layer init
    def __init__(self, n_inputs, n_neurons):
        # init wts and biases
        self.weights = 0.01 * np.random.randn(n_inputs, n_neurons)
        self.biases = np.zeros((1, n_neurons))

    # Forward Pass
    def forward(self, inputs):

        # inputs init
        self.inputs= inputs
        # calulating the forward pass
        self.output = np.dot(inputs, self.weights) + self.biases

    # Backward Pass

    def backward(self, dvalues):

        self.dweights = np.dot(self.inputs.T, dvalues)
        self.dbiases = np.sum(dvalues, axis=0, keepdims=True)

        # gradient on values to be passed to prev layer
        self.dinputs = np.dot(dvalues, self.weights.T)

# ReLU Activation
class Activation_ReLU:

    # Forward Pass
    def forward(self, inputs):

        self.inputs = inputs

        # ReLU gives max of 0, x. Inputs is the output of the first layer and np.maximum performs this for every value in inputs
        self.output = np.maximum(0, inputs)

    def backward(self, dvalues):

        # since we need to modify dvalues itself we make a copy
        self.dinputs = dvalues.copy()

        # for values <= 0 the value will be 0
        self.dinputs[self.inputs <= 0] = 0

# Softmax Activation
class Activation_Softmax:

    # Forward Pass
    def forward(self, inputs):
        # exp(x) / sum(exp(x))
        exp_values = np.exp(inputs - np.max(inputs, axis=1, keepdims=True)) #subtraction is done to ensure there is no explosion of bigger values
        probablitites = exp_values / np.sum(exp_values, axis=1, keepdims=True)
        self.output = probablitites

    # Backward pass
    def backward(self, dvalues):

        # Create uninitialized array
        self.dinputs = np.empty_like(dvalues)

        # Enumerate outputs and gradients
        for index, (single_output, single_dvalues) in enumerate(zip(self.output, dvalues)):
            # Flatten output array
            single_output = single_output.reshape(-1, 1)
            # Calculate Jacobian matrix of the output
            jacobian_matrix = np.diagflat(single_output) - np.dot(single_output, single_output.T)

            # Calculate sample-wise gradient
            # and add it to the array of sample gradients
            self.dinputs[index] = np.dot(jacobian_matrix, single_dvalues)


# Loss
class Loss:
    # calculate loss from y and output
    def calculate(self, output, y):

        # apply forward function
        sample_losses = self.forward(output, y)
        print("Loss: ", sample_losses)

        # Calculate mean loss
        mean_loss = np.mean(sample_losses)

        return mean_loss
    
# Cross Entropy Loss
class Loss_CategoricalCrossEntropy(Loss):

    # Forward function
    def forward(self, y_pred, y_true):

        # No. of samples in a batch
        samples = len(y_pred)

        # Clip data to prevent division by 0
        # Clip data from both sides to prevent change in mean
        y_pred_clipped = np.clip(y_pred, 1e-7, 1 - 1e-7)

        # Calculate probabilities
        # Only category labels
        if(len(y_true.shape) == 1):
            correct_confidences = y_pred_clipped[range(samples), y_true]

        # Y_true is one hot encoded
        if(len(y_true.shape) == 2):
            correct_confidences = np.sum(y_pred_clipped * y_true, axis=1)

        # loss
        negative_log_likelihoods = -np.log(correct_confidences)

        return negative_log_likelihoods
    
    def backward(self, dvalues, y_true):

        # no. of samples
        samples = len(dvalues)

        # no. of labels
        labels = len(dvalues[0])

        if(len(y_true.shape) == 1):
            # converting to one hot
            y_true = np.eye(labels)[y_true]

        # derivative
        self.dinputs = -y_true / dvalues

        # normalising
        self.dinputs = self.dinputs / samples


# Softmax classifier - combined Softmax activation
# and cross-entropy loss for faster backward step
class Activation_Softmax_Loss_CategoricalCrossentropy():

    # Creates activation and loss function objects
    def __init__(self):
        self.activation = Activation_Softmax()
        self.loss = Loss_CategoricalCrossEntropy()

    # Forward pass
    def forward(self, inputs, y_true):
        # Output layer's activation function
        self.activation.forward(inputs)
        # Set the output
        self.output = self.activation.output
        # Calculate and return loss value
        return self.loss.calculate(self.output, y_true)


    # Backward pass
    def backward(self, dvalues, y_true):

        # Number of samples
        samples = len(dvalues)

        # If labels are one-hot encoded,
        # turn them into discrete values
        if len(y_true.shape) == 2:
            y_true = np.argmax(y_true, axis=1)

        # Copy so we can safely modify
        self.dinputs = dvalues.copy()
        # Calculate gradient
        self.dinputs[range(samples), y_true] -= 1
        # Normalize gradient
        self.dinputs = self.dinputs / samples


In [4]:
X, y = spiral_data(samples=100, classes=3)

# Create a dense layer with 2 input features and 3 output values
dense1 = Layer_Dense(2, 3)

# Create the ReLU Actuvation 
activation1 = Activation_ReLU()

# Create the 2nd dense layer with 3 input features as the previous layer gave 3 outputs ans 3 output as we have 3 classes
dense2 = Layer_Dense(3, 3)

# # Create the Softmax Activation
# activation2 = Activation_Softmax()

# # Create the loss function
# loss_function= Loss_CategoricalCrossEntropy()

# Create Softmax classifier's combined loss and activation
loss_activation = Activation_Softmax_Loss_CategoricalCrossentropy()


In [5]:
# Forward pass
dense1.forward(X)
# print(X.shape)
# Apply the first activation to the ouput of 1st layer
activation1.forward(dense1.output)
# print(dense1.output.shape)
# Forward pass to the 2nd layer
dense2.forward(activation1.output)

# # Applying the 2nd activation
# activation2.forward(dense2.output)

# print(activation2.output[:5])

# Perform a forward pass through the activation/loss function
# takes the output of second dense layer here and returns loss
loss = loss_activation.forward(dense2.output, y)
# Let's see output of the first few samples:
print(loss_activation.output[:5])

Loss:  [1.0986123 1.0986127 1.0986136 1.0986145 1.0986153 1.0986137 1.0986168
 1.098617  1.0986178 1.0986192 1.0986199 1.098619  1.0986207 1.0986221
 1.098622  1.0986223 1.0986185 1.0986241 1.0986226 1.098626  1.098626
 1.0986195 1.098617  1.0986255 1.0986354 1.0986278 1.0986179 1.0986184
 1.0986398 1.0986419 1.0986304 1.098637  1.0986211 1.0986254 1.0986376
 1.0986471 1.0986526 1.098653  1.0986507 1.098654  1.0986497 1.0986483
 1.0986478 1.0986323 1.0986618 1.098662  1.098664  1.0986414 1.0986664
 1.0986533 1.0986621 1.0986348 1.0986497 1.0986599 1.0986315 1.0986141
 1.0986186 1.0986125 1.0986273 1.0986137 1.0986171 1.098616  1.0986123
 1.0986311 1.0986419 1.0986344 1.0986123 1.0986576 1.0986377 1.0986589
 1.0986654 1.0986648 1.0986633 1.0986542 1.0986674 1.0986677 1.0986686
 1.0986708 1.0986696 1.0986639 1.0986731 1.0986443 1.0986497 1.0986748
 1.0986497 1.0986748 1.0986547 1.0986434 1.0986588 1.0986766 1.0986344
 1.0986977 1.0986686 1.0987039 1.0986919 1.098709  1.0986958 1.0987172


In [6]:
# Print loss value
print('loss:', loss)

loss: 1.0986104


In [7]:
# Calculate accuracy from output of activation2 and targets
# calculate values along first axis
predictions = np.argmax(loss_activation.output, axis=1)
if len(y.shape) == 2:
    y = np.argmax(y, axis=1)
accuracy = np.mean(predictions==y)

# Print accuracy
print('acc:', accuracy)

acc: 0.34


In [8]:
# Backward pass
loss_activation.backward(loss_activation.output, y)
dense2.backward(loss_activation.dinputs)
activation1.backward(dense2.dinputs)
dense1.backward(activation1.dinputs)

# Print gradients
print(dense1.dweights)
print(dense1.dbiases)
print(dense2.dweights)
print(dense2.dbiases)

[[ 1.5766357e-04  7.8368583e-05  4.7324400e-05]
 [ 1.8161038e-04  1.1045573e-05 -3.3096312e-05]]
[[-3.60553473e-04  9.66117223e-05 -1.03671395e-04]]
[[ 5.44109462e-05  1.07411419e-04 -1.61822361e-04]
 [-4.07913431e-05 -7.16780924e-05  1.12469446e-04]
 [-5.30112993e-05  8.58172934e-05 -3.28059905e-05]]
[[-1.0729185e-05 -9.4610732e-06  2.0027859e-05]]
