In [15]:
import numpy as np
import nnfs
from nnfs.datasets import spiral_data
import matplotlib.pyplot as plt

# Dense Layer

In [16]:
# creating Dense Layer class  with backpropogation
class Dense:
    # layer initialization
    def __init__(self,n_inputs,n_neurons):
        super().__init__()
        self.weights =  0.01*np.random.randn(n_inputs,n_neurons)
        self.biases = np.zeros((1,n_neurons))
    
    # forward pass of Dense Layer
    def forward(self,inputs):
        # remember input values
        self.inputs = inputs
        # calculate output values from weights and inputs,weights,biases
        self.outputs = np.dot(inputs,self.weights) + self.biases
    
    # backward method or backpropogation 
    def backward(self,dvalues):
        # gradients on parameters
        # loss with respect to weights
        self.dweights = np.dot(self.inputs.T,dvalues)
        # loss with respect to biases
        self.dbiases = np.sum(dvalues,axis=0,keepdims=True)
        # loss with respect to the inputs
        self.dinputs = np.dot(dvalues,self.weights.T)

# ReLU Class

In [17]:
# creating relu class with backpropogation
class ReLU:
    # forward pass
    def forward(self,inputs):
        # remember input values
        self.inputs = inputs
        self.output = np.maximum(0,inputs)
    
    # backward method in relu activation function
    def backward(self,dvalues):
        # since we need to modify original variable,
        # lets make a copy of value event first
        self.dinputs = dvalues.copy()
        # Zero gradient where input values are negative
        self.dinputs[self.inputs <= 0] = 0

# SoftMax Layer

In [18]:
# creating softmax class
class Softmax:
    def forward(self,inputs):
        exp_values = np.exp(inputs - np.max(inputs,axis=1,keepdims=True))
        probabelities = exp_values/ np.sum(exp_values,axis=1,keepdims=True)
        self.output = probabelities

# CategoricalCrossEntropyLoss and Loss Class

In [19]:
class Loss:
    def calculate(self,output,y):
        sample_losses = self.forward(output,y)
        data_loss = np.mean(sample_losses)
        return data_loss

In [20]:
class CrossEntropyLoss(Loss):
    def forward(self,y_pred,y_true):
        # storing y_pred and y_true
        self.y_true = y_true
        self.y_pred = y_pred
        # number of samples in a batch
        samples = len(y_pred)
        # clip data to prevent division by 0
        # clip both sides to not drag mean towards any value
        y_pred_clipped = np.clip(y_pred,1e-7,1-1e-7)
        # probabelities for target values
        # only if categorical labels
        if len(y_true.shape) == 1:
            correct_confidences = y_pred_clipped[
            range(samples),
            y_true]
        elif len(y_true.shape) == 2:
            correct_confidences = np.sum(
                y_pred_clipped*y_true,axis=1
            )
        negative_log_likelihoods = -np.log(correct_confidences)
        return negative_log_likelihoods
    
    # backward pass or backpropogation
    def backward(self,dvalues,y_true):
        # number of samples
        samples = len(dvalues)
        # number of labels in every sample
        # we will use the first sample to count them
        labels = len(dvalues[0])
        # if labels are sparse turn them into one hot vector
        if len(y_true.shape) == 1:
            y_true = np.eye(labels)[y_true]
        # calculate gradient
        self.dinputs = -y_true / dvalues
        # normalize gradient
        self.dinputs = self.dinputs/samples

# Softmax and CategoricalCross Entropy Class

In [21]:
# classifier - Softmax Classifier
# Loss Function - Categorical Cross Entropy Loss Function
class Activation_Softmax_Loss_CategoricalCrossEntropy:
    # initializing class or class constructor
    def __init__(self):
        # setting activation function to softmax
        self.activation = Softmax()
        # setting loss function to categorical cross entropy
        self.loss = CrossEntropyLoss()
    
    # forward method of class
    def forward(self,inputs,y_true):
        # applying activation function to inputs
        self.activation.forward(inputs)
        # getting the output valus of softmax function to get input for loss function
        self.output = self.activation.output
        # applying loss function to inputs and y_true
        return self.loss.calculate(self.output,y_true)
    
    # backward pss
    def backward(self, d_values,y_true):
        # number of samples 
        samples = len(d_values)
        # if tables are one-hot encoded,
        # turn them into discrete values
        if len(y_true.shape) == 2:
            y_true = np.argmax(y_true,axis=1)
        # copying values of dvalues into input
        self.dinputs = d_values.copy()
        # calculate gradient
        self.dinputs[range(samples),y_true] -= 1
        # normalize gradient
        self.dinputs = self.dinputs/samples

# Generating Dataset

In [22]:
# create dataset
X,y = spiral_data(samples=100,classes=3)

# Forward Pass

In [23]:
# create dense layer with 2 inputs and 3 layers
dense_1 = Dense(2,3)
# create activation ReLU(to be used with dense layer)
relu = ReLU()
# create second dense layer with 3 input feature (as we take output of previous layer here ) and 3  output values(output values)
dense_2 = Dense(3,3)
# create softmax classifier's combined loss and activation
loss_activation = Activation_Softmax_Loss_CategoricalCrossEntropy()

# perform a forward pass of our training data through this layer
dense_1.forward(X)
# perform a activation function
# take this output of first dense layer here
relu.forward(dense_1.outputs)
# perform a forward pass through second dense layer
dense_2.forward(relu.output)
# petform a forward pass through loss_activation layer
loss = loss_activation.forward(dense_2.outputs,y)

In [24]:
# lets see output of the first few samples
print(loss_activation.output[:5])
# print loss valus
print('Loss: ',loss)

[[0.33333333 0.33333333 0.33333333]
 [0.33333342 0.33333347 0.33333311]
 [0.33333351 0.33333361 0.33333288]
 [0.3333336  0.33333376 0.33333265]
 [0.33333372 0.33333352 0.33333276]]
Loss:  1.0986116893754867


In [25]:
# calculate accuracy from output of loss_activation and targets
# calculate values along first axis
predictions = np.argmax(loss_activation.output,axis=1)
if len(y.shape) == 2:
    y = np.argmax(y,axis=1)
accuracy = np.mean(predictions == y)
# print accuracy
print(f"Accuracy: {accuracy}")

Accuracy: 0.27666666666666667


# Backward Pass

In [26]:
loss_activation.backward(loss_activation.output,y)
dense_2.backward(loss_activation.dinputs)
relu.backward(dense_2.dinputs)
dense_1.backward(relu.dinputs)

In [27]:
# print gradients
print(f"Dweights - 1: {dense_1.dweights}")
print(f"DBiases -1: {dense_1.dbiases}")
print(f"DWeights - 2: {dense_2.dweights}")
print(f"Dbiases -2: {dense_2.dbiases}")

Dweights - 1: [[-1.34979178e-05 -5.68233055e-05 -1.09172517e-04]
 [-1.19384749e-04 -7.02433129e-05  2.16113387e-04]]
DBiases -1: [[-4.11988091e-04 -5.41601404e-05  6.58477145e-04]]
DWeights - 2: [[ 7.67654087e-05 -2.68928067e-04  1.92162658e-04]
 [-7.69842139e-05 -1.38042323e-05  9.07884462e-05]
 [ 8.36841060e-05 -1.70436887e-04  8.67527812e-05]]
Dbiases -2: [[ 1.77704472e-06  1.15997915e-06 -2.93702387e-06]]
