<a href="https://colab.research.google.com/github/MuleHakim/Deep-Learning-Pytorch/blob/main/DL_BackProp.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# !pip install torchvision
import numpy as np
import torch
from torchvision import datasets, transforms


# Define a transform to normalize the data
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])

# Load MNIST dataset using torchvision
trainset = datasets.MNIST('~/.pytorch/MNIST_data/', download=True, train=True, transform=transform)
testset = datasets.MNIST('~/.pytorch/MNIST_data/', download=True, train=False, transform=transform)

# Create DataLoader for training and testing sets
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True)
testloader = torch.utils.data.DataLoader(testset, batch_size=64, shuffle=True)

# Extract one batch of data for training
for images, labels in trainloader:
  break

# Flatten the images
X_train = images.view(images.shape[0], -1).numpy()
y_train_onehot = np.eye(10)[labels.numpy()]




In [None]:
# Dense layer
class Layer_Dense:

    # Layer initialization
    def __init__(self, n_inputs, n_neurons):
        # Initialize weights and biases
        self.weights = 0.01 * np.random.randn(n_inputs, n_neurons)
        self.biases = np.zeros((1, n_neurons))

    # Forward pass
    def forward(self, inputs):
        # Remember input values
        self.inputs = inputs
        # Calculate output values from inputs, weights, and biases
        self.output = np.dot(inputs, self.weights) + self.biases

    # Backward pass
    def backward(self, dvalues):
        # Gradients on parameters
        self.dweights = np.dot(self.inputs.T, dvalues)
        self.dbiases = np.sum(dvalues, axis=0, keepdims=True)
        # Gradient on values
        self.dinputs = np.dot(dvalues, self.weights.T)



In [None]:
# ReLU activation
class Activation_ReLU:

    # Forward pass
    def forward(self, inputs):
        # Remember input values
        self.inputs = inputs
        # Calculate output values from inputs
        self.output = np.maximum(0, inputs)

    # Backward pass
    def backward(self, dvalues):
        # Since we need to modify the original variable,
        # let's make a copy of values first
        self.dinputs = dvalues.copy()
        # Zero gradient where input values were negative
        self.dinputs[self.inputs <= 0] = 0



In [None]:
# Softmax activation
class Activation_Softmax:

    # Forward pass
    def forward(self, inputs):
        # Remember input values
        self.inputs = inputs
        # Get unnormalized probabilities
        exp_values = np.exp(inputs - np.max(inputs, axis=1, keepdims=True))
        # Normalize them for each sample
        probabilities = exp_values / np.sum(exp_values, axis=1, keepdims=True)
        self.output = probabilities

    # Backward pass
    def backward(self, dvalues):
        # Create uninitialized array
        self.dinputs = np.empty_like(dvalues)
        # Enumerate outputs and gradients
        for index, (single_output, single_dvalues) in enumerate(zip(self.output, dvalues)):
            # Flatten output array
            single_output = single_output.reshape(-1, 1)
            # Calculate Jacobian matrix of the output
            jacobian_matrix = np.diagflat(single_output) - np.dot(single_output, single_output.T)
            # Calculate sample-wise gradient and add it to the array of sample gradients
            self.dinputs[index] = np.dot(jacobian_matrix, single_dvalues)



In [None]:
# Common loss class
class Loss:

    # Calculates the data and regularization losses
    # given model output and ground truth values
    def calculate(self, output, y):
        # Calculate sample losses
        sample_losses = self.forward(output, y)
        # Calculate mean loss
        data_loss = np.mean(sample_losses)
        # Return loss
        return data_loss

# Cross-entropy loss
class Loss_CategoricalCrossentropy(Loss):

    # Forward pass
    def forward(self, y_pred, y_true):
        # Number of samples in a batch
        samples = len(y_pred)
        # Clip data to prevent division by 0
        # Clip both sides to not drag mean towards any value
        y_pred_clipped = np.clip(y_pred, 1e-7, 1 - 1e-7)
        # Probabilities for target values -
        # only if categorical labels
        if len(y_true.shape) == 1:
            correct_confidences = y_pred_clipped[range(samples), y_true]
        # Mask values - only for one-hot encoded labels
        elif len(y_true.shape) == 2:
            correct_confidences = np.sum(y_pred_clipped * y_true, axis=1)
        # Losses
        negative_log_likelihoods = -np.log(correct_confidences)
        return negative_log_likelihoods

    # Backward pass
    def backward(self, dvalues, y_true):
        # Number of samples
        samples = len(dvalues)
        # Number of labels in every sample
        # We'll use the first sample to count them
        labels = len(dvalues[0])
        # If labels are sparse, turn them into a one-hot vector
        if len(y_true.shape) == 1:
            y_true = np.eye(labels)[y_true]
        # Calculate gradient
        self.dinputs = -y_true / dvalues
        # Normalize gradient
        self.dinputs = self.dinputs / samples



In [None]:
# Softmax classifier - combined Softmax activation
# and cross-entropy loss for faster backward step
class Activation_Softmax_Loss_CategoricalCrossentropy():

    # Creates activation and loss function objects
    def __init__(self):
        self.activation = Activation_Softmax()
        self.loss = Loss_CategoricalCrossentropy()

    # Forward pass
    def forward(self, inputs, y_true):
        # Output layer's activation function
        self.activation.forward(inputs)
        # Set the output
        self.output = self.activation.output
        # Calculate and return loss value
        return self.loss.calculate(self.output, y_true)

    # Backward pass
    def backward(self, dvalues, y_true):
        # Number of samples
        samples = len(dvalues)
        # If labels are one-hot encoded,
        # turn them into discrete values
        if len(y_true.shape) == 2:
            y_true = np.argmax(y_true, axis=1)
        # Copy so we can safely modify
        self.dinputs = dvalues.copy()
        # Calculate gradient
        self.dinputs[range(samples), y_true] -= 1
        # Normalize gradient
        self.dinputs = self.dinputs / samples

In [None]:
# Create Dense layer with 784 input features and 64 output values
dense1 = Layer_Dense(784, 64)

# Create ReLU activation (to be used with Dense layer):
activation1 = Activation_ReLU()

# Create second Dense layer with 64 input features (as we take output
# of the previous layer here) and 10 output values (output values)
dense2 = Layer_Dense(64, 10)

# Create Softmax classifier's combined loss and activation
loss_activation = Activation_Softmax_Loss_CategoricalCrossentropy()

# Perform a forward pass of our training data through this layer
dense1.forward(X_train)

# Perform a forward pass through activation function
# takes the output of the first dense layer here
activation1.forward(dense1.output)

# Perform a forward pass through the second Dense layer
# takes outputs of the activation function of the first layer as inputs
dense2.forward(activation1.output)

# Perform a forward pass through the activation/loss function
# takes the output of the second dense layer here and returns loss
loss = loss_activation.forward(dense2.output, y_train_onehot)



In [None]:
# Let's see the output of the first few samples:
print(loss_activation.output[:5])



[[0.10111312 0.10091501 0.10065762 0.10011536 0.09881388 0.09770232
  0.10018043 0.09959669 0.10178134 0.09912422]
 [0.10082201 0.09948633 0.10008114 0.1007011  0.0996671  0.09969102
  0.10205675 0.09843529 0.09929668 0.0997626 ]
 [0.10119645 0.10270662 0.10202328 0.09993858 0.09920311 0.09740989
  0.10138436 0.09867394 0.100573   0.09689078]
 [0.10105929 0.10072766 0.10134901 0.10048848 0.09819008 0.09871686
  0.10044242 0.09919836 0.10169274 0.09813509]
 [0.10086254 0.10078634 0.10061818 0.10069984 0.09770212 0.09946223
  0.10114631 0.09855743 0.10244274 0.09772225]]


In [None]:
# Print loss value
print('loss:', loss)



loss: 2.3023723824524893


In [None]:
# Calculate accuracy from the output of activation2 and targets
# calculate values along the first axis
predictions = np.argmax(loss_activation.output, axis=1)
if len(y_train_onehot.shape) == 2:
    y_train = np.argmax(y_train_onehot, axis=1)

accuracy = np.mean(predictions == y_train)

# Print accuracy
print('acc:', accuracy)

acc: 0.109375


In [None]:
# Backward pass
loss_activation.backward(loss_activation.output, y_train_onehot)
dense2.backward(loss_activation.dinputs)
activation1.backward(dense2.dinputs)
dense1.backward(activation1.dinputs)

# Print gradients
print("Gradients for dense1:")
print(dense1.dweights)
print(dense1.dbiases)


Gradients for dense1:
[[ 9.65431940e-06 -9.20002167e-05  7.43459472e-05 ...  2.26045102e-05
  -1.32089985e-03  1.36385469e-04]
 [ 9.65431940e-06 -9.20002167e-05  7.43459472e-05 ...  2.26045102e-05
  -1.32089985e-03  1.36385469e-04]
 [ 9.65431940e-06 -9.20002167e-05  7.43459472e-05 ...  2.26045102e-05
  -1.32089985e-03  1.36385469e-04]
 ...
 [ 9.65431940e-06 -9.20002167e-05  7.43459472e-05 ...  2.26045102e-05
  -1.32089985e-03  1.36385469e-04]
 [ 9.65431940e-06 -9.20002167e-05  7.43459472e-05 ...  2.26045102e-05
  -1.32089985e-03  1.36385469e-04]
 [ 9.65431940e-06 -9.20002167e-05  7.43459472e-05 ...  2.26045102e-05
  -1.32089985e-03  1.36385469e-04]]
[[-9.65431940e-06  9.20002167e-05 -7.43459472e-05  2.28283573e-04
   5.71201096e-04  6.65137772e-04 -1.27935077e-03  8.42257423e-04
  -2.03235627e-04 -2.52281249e-05 -1.32723750e-03 -4.05878173e-04
   1.09031355e-03  0.00000000e+00 -6.22409527e-04  5.76867796e-05
   9.95725165e-04 -4.17804313e-04 -3.87026180e-04  0.00000000e+00
  -1.4757354

In [None]:
print("Gradients for dense2:")
print(dense2.dweights)
print(dense2.dbiases)

Gradients for dense2:
[[-9.02533441e-03  1.65711735e-03  3.21885182e-03  3.35283241e-03
   2.79906366e-03  1.50049864e-03 -6.09835165e-03  1.28364283e-03
   2.14723035e-04  1.09695632e-03]
 [-1.07281648e-03  1.35648662e-03 -4.81198385e-03 -1.30135048e-03
   7.46620543e-04  1.34299721e-03  1.12170715e-03  3.25758673e-04
   1.36333012e-03  9.29250493e-04]
 [ 1.94002476e-04  1.91374804e-04 -1.38721888e-03 -1.53955121e-04
   1.90918741e-04  1.93065761e-04  1.93148502e-04  1.93216860e-04
   1.97638904e-04  1.87807957e-04]
 [ 6.18731830e-05  6.22104842e-05  6.15499764e-05  6.10389899e-05
  -5.53375903e-04  5.99911813e-05  6.24230186e-05  6.09227996e-05
   6.18362548e-05  6.15300153e-05]
 [ 2.16549823e-02  9.81743255e-03 -3.60243385e-02 -5.98276643e-03
   1.32352053e-02 -3.28660721e-03 -1.27918864e-02  1.60798294e-02
  -7.65117457e-04 -1.93673364e-03]
 [ 7.58190159e-03  1.26973592e-02 -2.21478534e-02 -1.85631611e-02
   1.50471298e-02 -2.07814735e-03 -5.82052747e-03  1.49126044e-02
   9.265793