<a href="https://colab.research.google.com/github/Saikat-too/Neural_Network_From_Scratch/blob/main/NeuralNetworkBackpropagation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Saikat Singha

In [55]:
import numpy as np

In [56]:
# Dense Layer with backward

class Layer_Dense:

  def __init__(self , inputs , neurons):
    self.weight = 0.01 * np.random.randn(inputs , neurons)
    self.biases = np.zeros((1 , neurons))

  # Forward pass
  def forward(self , inputs):
    self.output = np.dot(inputs , self.weight)+self.biases
    self.inputs = inputs

  # Backward Pass
  def backward(self , dvalues):
    # Gradients on parameters
    self.dweights = np.dot(self.inputs.T , dvalues)
    self.dbiases  = np.sum(dvalues , axis=0 , keepdims=True)
    # Gradient on Values
    self.dinputs  = np.dot(dvalues , self.weight.T)




In [94]:
# ReLu Activation
class Activation_ReLu:

  # Forward Pass
  def forward(self , inputs):
    #Remember input values
    self.inputs = inputs
    self.outputs = np.maximum(0 , inputs)

    #Backward Pass
  def backward(self , dvalues):
    # Since we need to modify the original value
    # Let's make a copy of the value first
    self.dinputs = dvalues.copy()
    # Zero Gradient where input values were negative
    self.dinputs[self.inputs <=0] = 0


In [95]:
# Softmax Activation Function

class Softmax_Activation:

  #Forward Pass

  def forward(self , input):

    #Get Unnormalized Probabilities

    exp_values = np.exp(input - np.max(input , axis=1 , keepdims=True))

    #Normalize them for each sample

    Probabilities = exp_values/ np.sum(exp_values , axis=1 , keepdims=True)
    self.output = Probabilities

  # Backward Pass

  def backward(self , dvalues):
    # Create unutilized arrays
    self.dinputs = np.empty_like(dvalues)

    # Enumerate outputs and gradients
    for index , (single_output , single_dvalues) in enumerate(zip(self.output , dvalues)):
        # Flatten output array
        single_output = single_output.reshape(-1,1)
        # Calculate Jacobian matrix of the output
        jacobian_matrix = np.diagflat(single_output) - np.dot(single_output , single_output.T)
        # Calculate Sample wise gradient and add it to the sample gradients
        self.dinputs[index] = np.dot(jacobian_matrix , single_dvalues)


In [96]:
# Common Loss

class Loss:

  # Calculate the data and regularization losss
  def calculate(self , output , y ):

      #Calculate sample loss
      sample_loss = self.forward(output , y)

      #Calculate mean los
      data_loss = np.mean(sample_loss)

      return data_loss


In [97]:
#Cross Entropy Loss

class Loss_CategoricalCrossEntropy(Loss):

  #Forward Pass
  def forward(self, y_pred, y_true):
        # Number of samples in a batch
        samples = len(y_pred)

        # Clip data to prevent division by 0
        # Clip both sides to not drag mean towards any value
        y_pred_clipped = np.clip(y_pred, 1e-7, 1 - 1e-7)

        # Probabilities for target values - only if categorical labels
        if len(y_true.shape) == 1:
            correct_confidences = y_pred_clipped[range(samples), y_true]
        # Mask values - only for one-hot encoded labels
        elif len(y_true.shape) == 2:
            correct_confidences = np.sum(y_pred_clipped*y_true, axis=1)

        # Losses
        negative_log_likelihoods = -np.log(correct_confidences)

        return negative_log_likelihoods

    # Backward Pass
  def backward(self , dvalues , y_true):
      # Number of samples
      samples = len(dvalues)
      # Number of labels in every sample
      # We'll use the first sample to count them
      labels = len(dvalues[0])

      # If labels are sparse , turn them into one-hot vector
      if len(y_true.shape) == 1:
        y_true = np.eye(labels)[y_true]
      # Calculate Gradient
      self.dinputs = -y_true / dvalues
      # Normalize Gradient
      self.dinputs = self.dinputs / samples



In [98]:
# Softmax Classifier -> Combined softmax activation and cross entropy loss for faster backward step

class Activation_Softmax_Loss_CategoricalCrossentropy():

  # Create activation and loss function objects
  def __init__(self):
    self.activation = Softmax_Activation()
    self.loss = Loss_CategoricalCrossEntropy()

    # Forward Pass
  def forward(self ,inputs , y_true):
    # Output Layer activation function
    self.activation.forward(inputs)
    # Set the output
    self.output = self.activation.output
    # Calculate and return loss value
    return self.loss.calculate(self.output , y_true)

  def backward(self , dvalues , y_true):

    # Number of samples
    samples = len(dvalues)
    # If labels are one hot coded turn them into discrete values
    if len(y_true.shape) == 2:
      y_true = np.argmax(y_true , axis=1)
    # Copy so that we can safely modify
    self.dinputs = dvalues.copy()
    # Calculate Gradient
    self.dinputs[range(samples), y_true] -=1
    # Normalize Gradient
    self.dinputs = self.dinputs / samples

In [62]:
!pip install nnfs




In [63]:
import numpy as np
import nnfs

In [64]:
from nnfs.datasets import spiral_data

In [99]:
dense1 = Layer_Dense(2 ,3)

In [100]:
activation1 =Activation_ReLu()

In [101]:
dense2 = Layer_Dense(3,3)

In [102]:
loss_activation = Activation_Softmax_Loss_CategoricalCrossentropy()

In [103]:
X , y = spiral_data(samples = 100 , classes=3)

In [104]:
dense1.forward(X)

In [105]:
activation1.forward(dense1.output)

In [106]:
dense2.forward(activation1.outputs)

In [107]:
loss = loss_activation.forward(dense2.output , y)

In [108]:
print(loss_activation.output[:5])

[[0.33333334 0.33333334 0.33333334]
 [0.3333329  0.33333465 0.33333248]
 [0.33333355 0.33333308 0.3333334 ]
 [0.3333335  0.33333316 0.33333337]
 [0.33333436 0.333332   0.33333364]]


In [109]:



print ('loss' , loss)

loss 1.0986216


In [110]:
predictions = np.argmax(loss_activation.output , axis=1)

if len(y.shape) == 2:
  y = np.argmax(y , axis=1)
accuracy = np.mean(predictions==y)

In [111]:
print('acc' , accuracy)

acc 0.38


In [112]:
# Backward Passs

loss_activation.backward(loss_activation.output , y)
dense2.backward(loss_activation.dinputs)
activation1.backward(dense2.dinputs)
dense1.backward(activation1.dinputs)

In [116]:
# Print Gradients

print(dense1.dweights)
print(dense1.dbiases)
print(dense2.dweights)
print(dense2.dbiases)

[[-5.0960352e-05 -1.8986563e-04  3.7796417e-05]
 [ 3.1350408e-04  1.7625358e-04 -1.1716461e-03]]
[[ 0.00063679  0.00028276 -0.00094657]]
[[ 1.4502851e-04 -2.9231978e-04  1.4729128e-04]
 [ 6.7305977e-05  1.6400444e-04 -2.3131041e-04]
 [ 2.6439058e-04  1.2619779e-04 -3.9058836e-04]]
[[-7.4376585e-06  2.7806498e-05 -2.0474428e-05]]


BackPropagation -> 3Blue1Brown

1.https://youtu.be/Ilg3gGewQ5U?si=f9setGMTBWCSH7M6
2.https://youtu.be/tIeHLnjs5U8?si=IN8dfdzQKab6SuOu
