<a href="https://colab.research.google.com/github/Mr-MaNia7/deep-learning/blob/main/DL_Lab_5.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Backward Propagation
## Abdulkarim Getachew UGR/7992/12


In [3]:
!pip install nnfs

Collecting nnfs
  Downloading nnfs-0.5.1-py3-none-any.whl (9.1 kB)
Installing collected packages: nnfs
Successfully installed nnfs-0.5.1


In [4]:
import numpy as np
import nnfs
from nnfs.datasets import spiral_data

nnfs.init()

# Dense layer


In [5]:
class DenseLayer:

    def __init__(self, n_inputs, n_neurons, weight_initializer='random'):
        # Choose weight initializer
        if weight_initializer == 'random':
            self.weights = 0.01 * np.random.randn(n_inputs, n_neurons)
        elif weight_initializer == 'xavier':
            scale = np.sqrt(2 / (n_inputs + n_neurons))
            self.weights = np.random.randn(n_inputs, n_neurons) * scale
        else:
            raise ValueError("Invalid weight initializer")

        self.biases = np.zeros((1, n_neurons))

    def forward(self, inputs):
        # Save input for backward pass
        self.inputs = inputs
        # Calculate output values
        self.output = np.dot(inputs, self.weights) + self.biases

    def backward(self, dvalues):
        # Gradient on parameters
        self.dweights = np.dot(self.inputs.T, dvalues)
        self.dbiases = np.sum(dvalues, axis=0, keepdims=True)
        # Gradient on values
        self.dinputs = np.dot(dvalues, self.weights.T)

# ReLU activation


In [6]:
class ReLUActivation:

    def forward(self, inputs):
        # Save input for backward pass
        self.inputs = inputs
        # Calculate output values
        self.output = np.maximum(0, inputs)

    def backward(self, dvalues):
        # Copy the gradient to avoid modifying the original variable
        self.dinputs = dvalues.copy()
        # Zero gradient where input values were negative
        self.dinputs[self.inputs <= 0] = 0

# Softmax activation


In [7]:
class SoftmaxActivation:

    def forward(self, inputs):
        # Save input values for backward pass
        self.inputs = inputs

        # Calculate unnormalized probabilities
        exp_values = np.exp(inputs - np.max(inputs, axis=1, keepdims=True))

        # Normalize probabilities for each sample
        probabilities = exp_values / np.sum(exp_values, axis=1, keepdims=True)

        self.output = probabilities

    def backward(self, dvalues):
        # Create an uninitialized array
        self.dinputs = np.empty_like(dvalues)

        # Enumerate outputs and gradients
        for index, (single_output, single_dvalues) in enumerate(zip(self.output, dvalues)):
            # Flatten output array
            single_output = single_output.reshape(-1, 1)

            # Calculate Jacobian matrix of the output
            jacobian_matrix = np.diagflat(single_output) - np.dot(single_output, single_output.T)

            # Calculate sample-wise gradient and add it to the array of sample gradients
            self.dinputs[index] = np.dot(jacobian_matrix, single_dvalues)


# Common loss class


In [9]:
class Loss:

    # Calculates the data and regularization losses
    def calculate(self, output, y):

        # Calculate sample losses
        sample_losses = self.forward(output, y)

        # Calculate mean loss
        data_loss = np.mean(sample_losses)

        # Return loss
        return data_loss


# Cross-entropy loss


In [10]:
class CategoricalCrossentropyLoss(Loss):

    def forward(self, y_pred, y_true):
        # Number of samples in a batch
        samples = len(y_pred)

        # Clip data to prevent division by 0
        y_pred_clipped = np.clip(y_pred, 1e-7, 1 - 1e-7)

        # Probabilities for target values -
        # only if categorical labels
        if len(y_true.shape) == 1:
            correct_confidences = y_pred_clipped[range(samples), y_true]

        # Mask values - only for one-hot encoded labels
        elif len(y_true.shape) == 2:
            correct_confidences = np.sum(y_pred_clipped * y_true, axis=1)

        # Losses
        negative_log_likelihoods = -np.log(correct_confidences)
        return negative_log_likelihoods

    def backward(self, dvalues, y_true):
        # Number of samples
        samples = len(dvalues)
        # Number of labels in every sample
        # We'll use the first sample to count them
        labels = len(dvalues[0])

        # If labels are sparse, turn them into one-hot vector
        if len(y_true.shape) == 1:
            y_true = np.eye(labels)[y_true]

        # Calculate gradient
        self.dinputs = -y_true / dvalues
        # Normalize gradient
        self.dinputs = self.dinputs / samples


# Softmax classifier - combined Softmax activation



In [11]:
class SoftmaxCrossentropyActivation:

    def __init__(self):
        self.activation = SoftmaxActivation()
        self.loss = CategoricalCrossentropyLoss()

    def forward(self, inputs, y_true):
        # Output layer's activation function
        self.activation.forward(inputs)
        # Set the output
        self.output = self.activation.output
        # Calculate and return loss value
        return self.loss.calculate(self.output, y_true)

    def backward(self, dvalues, y_true):
        # Number of samples
        samples = len(dvalues)

        # If labels are one-hot encoded,
        # turn them into discrete values
        if len(y_true.shape) == 2:
            y_true = np.argmax(y_true, axis=1)

        # Copy so we can safely modify
        self.dinputs = dvalues.copy()
        # Calculate gradient
        self.dinputs[range(samples), y_true] -= 1
        # Normalize gradient
        self.dinputs = self.dinputs / samples


# Driver code

In [16]:
class NeuralNetwork:

    def __init__(self):
        self.dense1 = DenseLayer(2, 3)
        self.activation1 = ReLUActivation()
        self.dense2 = DenseLayer(3, 3)
        self.loss_activation = SoftmaxCrossentropyActivation()

    def forward(self, X):
        # Forward pass through the layers
        self.dense1.forward(X)
        self.activation1.forward(self.dense1.output)
        self.dense2.forward(self.activation1.output)
        loss = self.loss_activation.forward(self.dense2.output, y)
        return loss

    def backward(self, y):
        # Backward pass through the layers
        self.loss_activation.backward(self.loss_activation.output, y)
        self.dense2.backward(self.loss_activation.dinputs)
        self.activation1.backward(self.dense2.dinputs)
        self.dense1.backward(self.activation1.dinputs)

    def train(self, X, y, learning_rate=0.01, epochs=1000):
        for epoch in range(epochs):
            # Forward and backward pass
            loss = self.forward(X)
            self.backward(y)

            # Update weights and biases using gradient descent
            self.dense1.weights -= learning_rate * self.dense1.dweights
            self.dense1.biases -= learning_rate * self.dense1.dbiases
            self.dense2.weights -= learning_rate * self.dense2.dweights
            self.dense2.biases -= learning_rate * self.dense2.dbiases

            # Print loss for every 100 epochs
            if epoch % 100 == 0:
                print(f'Epoch {epoch}, Loss: {loss}')

# Create dataset
X, y = spiral_data(samples=100, classes=3)

# Create neural network instance
model = NeuralNetwork()

# Train the model
model.train(X, y)

print()

# Print gradients after training
print("Gradients after training:")
print("Dense1 Weights:\n", model.dense1.dweights)
print("Dense1 Biases:\n", model.dense1.dbiases)
print("Dense2 Weights:\n", model.dense2.dweights)
print("Dense2 Biases:\n", model.dense2.dbiases)


Epoch 0, Loss: 1.0986077785491943
Epoch 100, Loss: 1.0986071825027466
Epoch 200, Loss: 1.0986064672470093
Epoch 300, Loss: 1.0986056327819824
Epoch 400, Loss: 1.0986040830612183
Epoch 500, Loss: 1.0986028909683228
Epoch 600, Loss: 1.0986019372940063
Epoch 700, Loss: 1.09860098361969
Epoch 800, Loss: 1.0986000299453735
Epoch 900, Loss: 1.0985991954803467

Gradients after training:
Dense1 Weights:
 [[2.7538810e-05 2.0965304e-04 6.2896207e-04]
 [1.5314254e-05 1.3425304e-04 3.2094587e-04]]
Dense1 Biases:
 [[ 9.1978975e-05 -8.1915859e-05 -2.4086129e-04]]
Dense2 Weights:
 [[ 3.70227790e-05 -1.97678135e-04  1.60655341e-04]
 [ 5.33945713e-05  1.21198376e-04 -1.74592948e-04]
 [ 1.42504388e-04  1.88320948e-04 -3.30825336e-04]]
Dense2 Biases:
 [[-5.1407842e-06 -4.2991014e-06  9.2568807e-06]]
