# Gradient Descent Optimizer

Here in this lacture we will code gradient descent optimizer for neural network

In [1]:
import numpy as np
import nnfs
from nnfs.datasets import spiral_data
import matplotlib.pyplot as plt

In [2]:
# creating Dense Layer class  with backpropogation
class Dense:
    # layer initialization
    def __init__(self,n_inputs,n_neurons):
        super().__init__()
        self.weights =  0.01*np.random.randn(n_inputs,n_neurons)
        self.biases = np.zeros((1,n_neurons))
    
    # forward pass of Dense Layer
    def forward(self,inputs):
        # remember input values
        self.inputs = inputs
        # calculate output values from weights and inputs,weights,biases
        self.outputs = np.dot(inputs,self.weights) + self.biases
    
    # backward method or backpropogation 
    def backward(self,dvalues):
        # gradients on parameters
        # loss with respect to weights
        self.dweights = np.dot(self.inputs.T,dvalues)
        # loss with respect to biases
        self.dbiases = np.sum(dvalues,axis=0,keepdims=True)
        # loss with respect to the inputs
        self.dinputs = np.dot(dvalues,self.weights.T)

In [3]:
# creating relu class with backpropogation
class ReLU:
    # forward pass
    def forward(self,inputs):
        # remember input values
        self.inputs = inputs
        self.output = np.maximum(0,inputs)
    
    # backward method in relu activation function
    def backward(self,dvalues):
        # since we need to modify original variable,
        # lets make a copy of value event first
        self.dinputs = dvalues.copy()
        # Zero gradient where input values are negative
        self.dinputs[self.inputs <= 0] = 0

In [4]:
# creating softmax class
class Softmax:
    def forward(self,inputs):
        exp_values = np.exp(inputs - np.max(inputs,axis=1,keepdims=True))
        probabelities = exp_values/ np.sum(exp_values,axis=1,keepdims=True)
        self.output = probabelities

In [5]:
class Loss:
    def calculate(self,output,y):
        sample_losses = self.forward(output,y)
        data_loss = np.mean(sample_losses)
        return data_loss

In [6]:
class CrossEntropyLoss(Loss):
    def forward(self,y_pred,y_true):
        # storing y_pred and y_true
        self.y_true = y_true
        self.y_pred = y_pred
        # number of samples in a batch
        samples = len(y_pred)
        # clip data to prevent division by 0
        # clip both sides to not drag mean towards any value
        y_pred_clipped = np.clip(y_pred,1e-7,1-1e-7)
        # probabelities for target values
        # only if categorical labels
        if len(y_true.shape) == 1:
            correct_confidences = y_pred_clipped[
            range(samples),
            y_true]
        elif len(y_true.shape) == 2:
            correct_confidences = np.sum(
                y_pred_clipped*y_true,axis=1
            )
        negative_log_likelihoods = -np.log(correct_confidences)
        return negative_log_likelihoods
    
    # backward pass or backpropogation
    def backward(self,dvalues,y_true):
        # number of samples
        samples = len(dvalues)
        # number of labels in every sample
        # we will use the first sample to count them
        labels = len(dvalues[0])
        # if labels are sparse turn them into one hot vector
        if len(y_true.shape) == 1:
            y_true = np.eye(labels)[y_true]
        # calculate gradient
        self.dinputs = -y_true / dvalues
        # normalize gradient
        self.dinputs = self.dinputs/samples

In [7]:
# classifier - Softmax Classifier
# Loss Function - Categorical Cross Entropy Loss Function
class Activation_Softmax_Loss_CategoricalCrossEntropy:
    # initializing class or class constructor
    def __init__(self):
        # setting activation function to softmax
        self.activation = Softmax()
        # setting loss function to categorical cross entropy
        self.loss = CrossEntropyLoss()
    
    # forward method of class
    def forward(self,inputs,y_true):
        # applying activation function to inputs
        self.activation.forward(inputs)
        # getting the output valus of softmax function to get input for loss function
        self.output = self.activation.output
        # applying loss function to inputs and y_true
        return self.loss.calculate(self.output,y_true)
    
    # backward pss
    def backward(self, d_values,y_true):
        # number of samples 
        samples = len(d_values)
        # if tables are one-hot encoded,
        # turn them into discrete values
        if len(y_true.shape) == 2:
            y_true = np.argmax(y_true,axis=1)
        # copying values of dvalues into input
        self.dinputs = d_values.copy()
        # calculate gradient
        self.dinputs[range(samples),y_true] -= 1
        # normalize gradient
        self.dinputs = self.dinputs/samples

In [8]:
# Gradient Descent Optimizer
class GradientDescent:
    # initialize optimizer - set settings
    # learning rate of 1. is default for this optimizer
    def __init__(self,learning_rate = 1):
        self.learning_rate = learning_rate
    
    # update parameters
    def update_parameters(self,layer):
        layer.weights += -self.learning_rate * layer.dweights
        layer.biases += -self.learning_rate * layer.dbiases

In [9]:
# create dataset
X,y = spiral_data(samples=100,classes=3)

In [10]:
# create dense layer with 2 inputs and 3 layers
dense_1 = Dense(2,64)
# create activation ReLU(to be used with dense layer)
relu = ReLU()
# create second dense layer with 64 input feature (as we take output of previous layer here ) and 3  output values(output values)
dense_2 = Dense(64,3)
# create softmax classifier's combined loss and activation
loss_activation = Activation_Softmax_Loss_CategoricalCrossEntropy()
# add a gradient descent optimizer
optimizer = GradientDescent(0.1)

# train in loop
for epoch in range(10001):
    # forward pass of our training data through this layer
    dense_1.forward(X)
    # perform a forward pass through activation function
    # take outpu of first dense layer here
    relu.forward(dense_1.outputs)
    # perform a forward passs through second dense layer
    # take output of relu layer
    dense_2.forward(relu.output)
    # perform a forward pass through activation function and loss function
    # takes output from second dense layer and return output here
    loss = loss_activation.forward(dense_2.outputs, y)

    # calculate accuracy from output of softmax activation and outputs
    # calculate values along first axis
    predictions = np.argmax(loss_activation.output,axis=1)

    if len(y.shape) == 2:
        y = np.argmax(y,axis=1)
    accuracy = np.mean(predictions == y)

    if not epoch % 100:
        print(f'epoch: {epoch}, acc: {accuracy:.3f}, loss: {loss:.3f}')
    
    # backward pass
    loss_activation.backward(loss_activation.output,y)
    dense_2.backward(loss_activation.dinputs)
    relu.backward(dense_2.dinputs)
    dense_1.backward(relu.dinputs)

    # update weights and biases
    optimizer.update_parameters(dense_1)
    optimizer.update_parameters(dense_2)

epoch: 0, acc: 0.340, loss: 1.099
epoch: 100, acc: 0.433, loss: 1.098
epoch: 200, acc: 0.427, loss: 1.098
epoch: 300, acc: 0.420, loss: 1.098
epoch: 400, acc: 0.413, loss: 1.098
epoch: 500, acc: 0.413, loss: 1.097
epoch: 600, acc: 0.413, loss: 1.096
epoch: 700, acc: 0.413, loss: 1.094
epoch: 800, acc: 0.413, loss: 1.092
epoch: 900, acc: 0.413, loss: 1.089
epoch: 1000, acc: 0.417, loss: 1.086
epoch: 1100, acc: 0.420, loss: 1.083
epoch: 1200, acc: 0.417, loss: 1.081
epoch: 1300, acc: 0.417, loss: 1.079
epoch: 1400, acc: 0.430, loss: 1.078
epoch: 1500, acc: 0.427, loss: 1.077
epoch: 1600, acc: 0.427, loss: 1.076
epoch: 1700, acc: 0.423, loss: 1.075
epoch: 1800, acc: 0.417, loss: 1.074
epoch: 1900, acc: 0.420, loss: 1.074
epoch: 2000, acc: 0.413, loss: 1.073
epoch: 2100, acc: 0.420, loss: 1.073
epoch: 2200, acc: 0.420, loss: 1.072
epoch: 2300, acc: 0.420, loss: 1.072
epoch: 2400, acc: 0.420, loss: 1.072
epoch: 2500, acc: 0.427, loss: 1.072
epoch: 2600, acc: 0.423, loss: 1.072
epoch: 2700, 

In [11]:
# lets see output of the first few samples
print(loss_activation.output[:5])
# print loss valus
print('Loss: ',loss)

[[0.32816587 0.3306806  0.34115352]
 [0.3270962  0.33379876 0.33910503]
 [0.32518862 0.33720453 0.33760685]
 [0.32717808 0.33787271 0.3349492 ]
 [0.32766006 0.3392009  0.33313904]]
Loss:  1.0386800661036162


In [12]:
# calculate accuracy from output of loss_activation and targets
# calculate values along first axis
predictions = np.argmax(loss_activation.output,axis=1)
if len(y.shape) == 2:
    y = np.argmax(y,axis=1)
accuracy = np.mean(predictions == y)
# print accuracy
print(f"Accuracy: {accuracy}")

Accuracy: 0.48
