<a href="https://colab.research.google.com/github/Saikat-too/Neural_Network_From_Scratch/blob/main/NeuralNetworkSGD.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Saikat Singha

In [1]:
import numpy as np

In [2]:
# Dense Layer with backward

class Layer_Dense:

  def __init__(self , inputs , neurons):
    self.weight = 0.01 * np.random.randn(inputs , neurons)
    self.biases = np.zeros((1 , neurons))

  # Forward pass
  def forward(self , inputs):
    self.output = np.dot(inputs , self.weight)+self.biases
    self.inputs = inputs

  # Backward Pass
  def backward(self , dvalues):
    # Gradients on parameters
    self.dweights = np.dot(self.inputs.T , dvalues)
    self.dbiases  = np.sum(dvalues , axis=0 , keepdims=True)
    # Gradient on Values
    self.dinputs  = np.dot(dvalues , self.weight.T)




In [3]:
# ReLu Activation
class Activation_ReLu:

  # Forward Pass
  def forward(self , inputs):
    #Remember input values
    self.inputs = inputs
    self.outputs = np.maximum(0 , inputs)

    #Backward Pass
  def backward(self , dvalues):
    # Since we need to modify the original value
    # Let's make a copy of the value first
    self.dinputs = dvalues.copy()
    # Zero Gradient where input values were negative
    self.dinputs[self.inputs <=0] = 0


In [4]:
# Softmax Activation Function

class Softmax_Activation:

  #Forward Pass

  def forward(self , input):

    #Get Unnormalized Probabilities

    exp_values = np.exp(input - np.max(input , axis=1 , keepdims=True))

    #Normalize them for each sample

    Probabilities = exp_values/ np.sum(exp_values , axis=1 , keepdims=True)
    self.output = Probabilities

  # Backward Pass

  def backward(self , dvalues):
    # Create unutilized arrays
    self.dinputs = np.empty_like(dvalues)

    # Enumerate outputs and gradients
    for index , (single_output , single_dvalues) in enumerate(zip(self.output , dvalues)):
        # Flatten output array
        single_output = single_output.reshape(-1,1)
        # Calculate Jacobian matrix of the output
        jacobian_matrix = np.diagflat(single_output) - np.dot(single_output , single_output.T)
        # Calculate Sample wise gradient and add it to the sample gradients
        self.dinputs[index] = np.dot(jacobian_matrix , single_dvalues)


In [5]:
# Common Loss

class Loss:

  # Calculate the data and regularization losss
  def calculate(self , output , y ):

      #Calculate sample loss
      sample_loss = self.forward(output , y)

      #Calculate mean los
      data_loss = np.mean(sample_loss)

      return data_loss


In [6]:
#Cross Entropy Loss

class Loss_CategoricalCrossEntropy(Loss):

  #Forward Pass
  def forward(self, y_pred, y_true):
        # Number of samples in a batch
        samples = len(y_pred)

        # Clip data to prevent division by 0
        # Clip both sides to not drag mean towards any value
        y_pred_clipped = np.clip(y_pred, 1e-7, 1 - 1e-7)

        # Probabilities for target values - only if categorical labels
        if len(y_true.shape) == 1:
            correct_confidences = y_pred_clipped[range(samples), y_true]
        # Mask values - only for one-hot encoded labels
        elif len(y_true.shape) == 2:
            correct_confidences = np.sum(y_pred_clipped*y_true, axis=1)

        # Losses
        negative_log_likelihoods = -np.log(correct_confidences)

        return negative_log_likelihoods

    # Backward Pass
  def backward(self , dvalues , y_true):
      # Number of samples
      samples = len(dvalues)
      # Number of labels in every sample
      # We'll use the first sample to count them
      labels = len(dvalues[0])

      # If labels are sparse , turn them into one-hot vector
      if len(y_true.shape) == 1:
        y_true = np.eye(labels)[y_true]
      # Calculate Gradient
      self.dinputs = -y_true / dvalues
      # Normalize Gradient
      self.dinputs = self.dinputs / samples



In [7]:
# Softmax Classifier -> Combined softmax activation and cross entropy loss for faster backward step

class Activation_Softmax_Loss_CategoricalCrossentropy():

  # Create activation and loss function objects
  def __init__(self):
    self.activation = Softmax_Activation()
    self.loss = Loss_CategoricalCrossEntropy()

    # Forward Pass
  def forward(self ,inputs , y_true):
    # Output Layer activation function
    self.activation.forward(inputs)
    # Set the output
    self.output = self.activation.output
    # Calculate and return loss value
    return self.loss.calculate(self.output , y_true)

  def backward(self , dvalues , y_true):

    # Number of samples
    samples = len(dvalues)
    # If labels are one hot coded turn them into discrete values
    if len(y_true.shape) == 2:
      y_true = np.argmax(y_true , axis=1)
    # Copy so that we can safely modify
    self.dinputs = dvalues.copy()
    # Calculate Gradient
    self.dinputs[range(samples), y_true] -=1
    # Normalize Gradient
    self.dinputs = self.dinputs / samples

In [49]:
class Optimizer_SGD:

  # Initiialize optimizer -set settings , learning rate 1 is default for the setting
  def __init__(self , learning_rate=1.0):
    self.learning_rate = learning_rate

  # Update Parameters
  def update_params(self , layer):
    layer.weight += -self.learning_rate * layer.dweights
    layer.biases  += -self.learning_rate * layer.dbiases

In [8]:
!pip install nnfs


Collecting nnfs
  Downloading nnfs-0.5.1-py3-none-any.whl (9.1 kB)
Installing collected packages: nnfs
Successfully installed nnfs-0.5.1


In [9]:
import numpy as np
import nnfs

In [10]:
from nnfs.datasets import spiral_data

In [27]:
dense1 = Layer_Dense(2 ,64)

In [28]:
activation1 =Activation_ReLu()

In [29]:
dense2 = Layer_Dense(64,3)

In [30]:

loss_activation = Activation_Softmax_Loss_CategoricalCrossentropy()

In [51]:
optimizer = Optimizer_SGD()

In [52]:
X , y = spiral_data(samples = 100 , classes=3)

In [53]:
# Trainiing in loop

for epoch in range(10001):
   # Perform a forward pass of our training data through this layer
   dense1.forward(X)
   # Perform a forward pass through activation function
   activation1.forward(dense1.output)
   # Perform a forward pass through second layer
   dense2.forward(activation1.outputs)
   # Perform a forward pass through activation/loss function
   loss = loss_activation.forward(dense2.output , y)

   # Calculate accuracy from output of activation 2 and target
   predictions = np.argmax(loss_activation.output , axis=1)
   if len(y.shape) == 2:
       y = np.argmax(y , axis=1)
   accuracy = np.mean(predictions==y)

   if not epoch % 100:
    print (f'epoch : {epoch},' + f'acc : {accuracy:.3f},' + f'loss : {loss:.3f}')

   # Backward Pass
   loss_activation.backward(loss_activation.output , y)
   dense2.backward(loss_activation.dinputs)
   activation1.backward(dense2.dinputs)
   dense1.backward(activation1.dinputs)

   #Update Weight and biases
   optimizer.update_params(dense1)
   optimizer.update_params(dense2)










epoch : 0,acc : 0.343,loss : 1.099
epoch : 100,acc : 0.430,loss : 1.078
epoch : 200,acc : 0.430,loss : 1.067
epoch : 300,acc : 0.430,loss : 1.066
epoch : 400,acc : 0.433,loss : 1.065
epoch : 500,acc : 0.457,loss : 1.064
epoch : 600,acc : 0.460,loss : 1.062
epoch : 700,acc : 0.457,loss : 1.060
epoch : 800,acc : 0.460,loss : 1.054
epoch : 900,acc : 0.460,loss : 1.042
epoch : 1000,acc : 0.470,loss : 1.027
epoch : 1100,acc : 0.520,loss : 1.012
epoch : 1200,acc : 0.493,loss : 0.994
epoch : 1300,acc : 0.453,loss : 0.993
epoch : 1400,acc : 0.477,loss : 0.983
epoch : 1500,acc : 0.477,loss : 0.978
epoch : 1600,acc : 0.470,loss : 0.975
epoch : 1700,acc : 0.507,loss : 0.956
epoch : 1800,acc : 0.480,loss : 0.962
epoch : 1900,acc : 0.487,loss : 0.918
epoch : 2000,acc : 0.547,loss : 0.903
epoch : 2100,acc : 0.537,loss : 0.883
epoch : 2200,acc : 0.587,loss : 0.875
epoch : 2300,acc : 0.527,loss : 0.886
epoch : 2400,acc : 0.607,loss : 0.850
epoch : 2500,acc : 0.647,loss : 0.817
epoch : 2600,acc : 0.617

Gradient Descent -> https://youtu.be/IHZwWFHWa-w?si=32k-mu7MSUSj_iUI


SGD -> https://youtu.be/UmathvAKj80?si=LsSjXg_bcUjPTvu3