<a href="https://colab.research.google.com/github/Saikat-too/Neural_Network_From_Scratch/blob/main/NeuralNetworkSGD.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Saikat Singha

In [19]:
import numpy as np

In [20]:
# Dense Layer with backward

class Layer_Dense:

  def __init__(self , inputs , neurons):
    self.weight = 0.01 * np.random.randn(inputs , neurons)
    self.biases = np.zeros((1 , neurons))

  # Forward pass
  def forward(self , inputs):
    self.output = np.dot(inputs , self.weight)+self.biases
    self.inputs = inputs

  # Backward Pass
  def backward(self , dvalues):
    # Gradients on parameters
    self.dweights = np.dot(self.inputs.T , dvalues)
    self.dbiases  = np.sum(dvalues , axis=0 , keepdims=True)
    # Gradient on Values
    self.dinputs  = np.dot(dvalues , self.weight.T)




In [21]:
# ReLu Activation
class Activation_ReLu:

  # Forward Pass
  def forward(self , inputs):
    #Remember input values
    self.inputs = inputs
    self.outputs = np.maximum(0 , inputs)

    #Backward Pass
  def backward(self , dvalues):
    # Since we need to modify the original value
    # Let's make a copy of the value first
    self.dinputs = dvalues.copy()
    # Zero Gradient where input values were negative
    self.dinputs[self.inputs <=0] = 0


In [22]:
# Softmax Activation Function

class Softmax_Activation:

  #Forward Pass

  def forward(self , input):

    #Get Unnormalized Probabilities

    exp_values = np.exp(input - np.max(input , axis=1 , keepdims=True))

    #Normalize them for each sample

    Probabilities = exp_values/ np.sum(exp_values , axis=1 , keepdims=True)
    self.output = Probabilities

  # Backward Pass

  def backward(self , dvalues):
    # Create unutilized arrays
    self.dinputs = np.empty_like(dvalues)

    # Enumerate outputs and gradients
    for index , (single_output , single_dvalues) in enumerate(zip(self.output , dvalues)):
        # Flatten output array
        single_output = single_output.reshape(-1,1)
        # Calculate Jacobian matrix of the output
        jacobian_matrix = np.diagflat(single_output) - np.dot(single_output , single_output.T)
        # Calculate Sample wise gradient and add it to the sample gradients
        self.dinputs[index] = np.dot(jacobian_matrix , single_dvalues)


In [23]:
# Common Loss

class Loss:

  # Calculate the data and regularization losss
  def calculate(self , output , y ):

      #Calculate sample loss
      sample_loss = self.forward(output , y)

      #Calculate mean los
      data_loss = np.mean(sample_loss)

      return data_loss


In [24]:
#Cross Entropy Loss

class Loss_CategoricalCrossEntropy(Loss):

  #Forward Pass
  def forward(self, y_pred, y_true):
        # Number of samples in a batch
        samples = len(y_pred)

        # Clip data to prevent division by 0
        # Clip both sides to not drag mean towards any value
        y_pred_clipped = np.clip(y_pred, 1e-7, 1 - 1e-7)

        # Probabilities for target values - only if categorical labels
        if len(y_true.shape) == 1:
            correct_confidences = y_pred_clipped[range(samples), y_true]
        # Mask values - only for one-hot encoded labels
        elif len(y_true.shape) == 2:
            correct_confidences = np.sum(y_pred_clipped*y_true, axis=1)

        # Losses
        negative_log_likelihoods = -np.log(correct_confidences)

        return negative_log_likelihoods

    # Backward Pass
  def backward(self , dvalues , y_true):
      # Number of samples
      samples = len(dvalues)
      # Number of labels in every sample
      # We'll use the first sample to count them
      labels = len(dvalues[0])

      # If labels are sparse , turn them into one-hot vector
      if len(y_true.shape) == 1:
        y_true = np.eye(labels)[y_true]
      # Calculate Gradient
      self.dinputs = -y_true / dvalues
      # Normalize Gradient
      self.dinputs = self.dinputs / samples



In [25]:
# Softmax Classifier -> Combined softmax activation and cross entropy loss for faster backward step

class Activation_Softmax_Loss_CategoricalCrossentropy():

  # Create activation and loss function objects
  def __init__(self):
    self.activation = Softmax_Activation()
    self.loss = Loss_CategoricalCrossEntropy()

    # Forward Pass
  def forward(self ,inputs , y_true):
    # Output Layer activation function
    self.activation.forward(inputs)
    # Set the output
    self.output = self.activation.output
    # Calculate and return loss value
    return self.loss.calculate(self.output , y_true)

  def backward(self , dvalues , y_true):

    # Number of samples
    samples = len(dvalues)
    # If labels are one hot coded turn them into discrete values
    if len(y_true.shape) == 2:
      y_true = np.argmax(y_true , axis=1)
    # Copy so that we can safely modify
    self.dinputs = dvalues.copy()
    # Calculate Gradient
    self.dinputs[range(samples), y_true] -=1
    # Normalize Gradient
    self.dinputs = self.dinputs / samples

In [26]:
class Optimizer_SGD:

  # Initiialize optimizer -set settings , learning rate 1 is default for the setting
  def __init__(self , learning_rate=1. , decay=0.):
    self.learning_rate = learning_rate
    self.current_learning_rate = learning_rate
    self.decay = decay
    self.iterations = 0

  # Call once before any parameter updates

  def pre_update_params(self):
    if self.decay:
      self.current_learning_rate = self.learning_rate * (1. / (1 + self.decay * self.iterations))


  # Update Parameters
  def update_params(self , layer):
    layer.weight += -self.learning_rate * layer.dweights
    layer.biases  += -self.learning_rate * layer.dbiases

  # Call once after every parameters update
  def post_update_params(self):
    self.iterations+=1

In [27]:
!pip install nnfs




In [28]:
import numpy as np
import nnfs

In [29]:
from nnfs.datasets import spiral_data

In [30]:
dense1 = Layer_Dense(2 ,64)

In [31]:
activation1 =Activation_ReLu()

In [32]:
dense2 = Layer_Dense(64,3)

In [33]:

loss_activation = Activation_Softmax_Loss_CategoricalCrossentropy()

In [34]:
optimizer = Optimizer_SGD()

In [35]:
X , y = spiral_data(samples = 100 , classes=3)

In [36]:
# Trainiing in loop

for epoch in range(10001):
   # Perform a forward pass of our training data through this layer
   dense1.forward(X)
   # Perform a forward pass through activation function
   activation1.forward(dense1.output)
   # Perform a forward pass through second layer
   dense2.forward(activation1.outputs)
   # Perform a forward pass through activation/loss function
   loss = loss_activation.forward(dense2.output , y)

   # Calculate accuracy from output of activation 2 and target
   predictions = np.argmax(loss_activation.output , axis=1)
   if len(y.shape) == 2:
       y = np.argmax(y , axis=1)
   accuracy = np.mean(predictions==y)

   if not epoch % 100:
    print (f'epoch : {epoch},' + f'acc : {accuracy:.3f},' + f'loss : {loss:.3f},' + f'lr : {optimizer.current_learning_rate}')

   # Backward Pass
   loss_activation.backward(loss_activation.output , y)
   dense2.backward(loss_activation.dinputs)
   activation1.backward(dense2.dinputs)
   dense1.backward(activation1.dinputs)

   #Update Weight and biases
   optimizer.pre_update_params()
   optimizer.update_params(dense1)
   optimizer.update_params(dense2)
   optimizer.post_update_params()










epoch : 0,acc : 0.333,loss : 1.099,lr : 1.0
epoch : 100,acc : 0.460,loss : 1.069,lr : 1.0
epoch : 200,acc : 0.450,loss : 1.061,lr : 1.0
epoch : 300,acc : 0.460,loss : 1.059,lr : 1.0
epoch : 400,acc : 0.460,loss : 1.058,lr : 1.0
epoch : 500,acc : 0.457,loss : 1.056,lr : 1.0
epoch : 600,acc : 0.457,loss : 1.052,lr : 1.0
epoch : 700,acc : 0.447,loss : 1.043,lr : 1.0
epoch : 800,acc : 0.397,loss : 1.047,lr : 1.0
epoch : 900,acc : 0.380,loss : 1.035,lr : 1.0
epoch : 1000,acc : 0.440,loss : 1.016,lr : 1.0
epoch : 1100,acc : 0.450,loss : 0.999,lr : 1.0
epoch : 1200,acc : 0.470,loss : 0.992,lr : 1.0
epoch : 1300,acc : 0.457,loss : 1.000,lr : 1.0
epoch : 1400,acc : 0.463,loss : 0.996,lr : 1.0
epoch : 1500,acc : 0.477,loss : 0.997,lr : 1.0
epoch : 1600,acc : 0.513,loss : 0.990,lr : 1.0
epoch : 1700,acc : 0.480,loss : 0.992,lr : 1.0
epoch : 1800,acc : 0.460,loss : 0.982,lr : 1.0
epoch : 1900,acc : 0.463,loss : 0.971,lr : 1.0
epoch : 2000,acc : 0.487,loss : 0.967,lr : 1.0
epoch : 2100,acc : 0.470,

Learning Rate ->> https://youtu.be/jWT-AX9677k?si=KJoqyZZyExJKqL7U

https://www.reddit.com/r/datascience/comments/tgualj/is_there_any_solid_scientific_way_of_choosing/