<a href="https://colab.research.google.com/github/SimonMekonnen/DeepLearing/blob/main/DL%20LAB5%20.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>


*   Simon Mekonnen
*   UGR/9508/12




In [3]:
import torch

In [4]:
class DenseLayer():
    def __init__(self, input_features, output_neurons):
        # Initialize weights with small random values and biases with zeros
        self.weights = 0.01 * torch.rand(output_neurons, input_features)
        self.biases = torch.zeros(1, output_neurons)

    def forward(self, input_data):
        # Compute the weighted sum and add biases for the forward pass
        self.input_data = input_data
        self.output = torch.matmul(input_data, self.weights.T) + self.biases

    def backward(self, gradient_values):
        # Calculate gradients for weights, biases, and input values during backward pass
        self.dweights = torch.matmul(self.input_data.T, gradient_values)
        self.dbiases = torch.sum(gradient_values, axis=0, keepdims=True)
        self.dinputs = torch.matmul(gradient_values, self.weights.T)

In [5]:
class ReLUActivation:

    def forward(self, input_values):
        # Apply ReLU activation: if input_values < 0, set to 0, else keep the value
        output_values = torch.max(input_values, torch.tensor(0.0))
        return output_values

    def backward(self, gradient_values):
        # Clone the gradient values to avoid in-place modification
        self.dinputs = gradient_values.clone()

        # Derivative of ReLU activation on the inputs
        self.dinputs[self.input_values <= 0] = 0
        # Note: Assuming self.input_values is the input data during the forward pass

In [6]:
class SoftmaxActivation:
    def forward(self, input_values):
        # Calculate exponentials of input values
        exp_values = torch.exp(input_values)
        # Get the shape of the input
        input_shape = input_values.shape
        # Calculate the sum of exponentials along the second axis
        sum_exp_values = torch.sum(exp_values, axis=1, keepdims=True)
        # Compute the softmax result by dividing each element by the sum
        result = exp_values / sum_exp_values
        self.output = result
        return result

    def backward(self, gradient_values):
        # Initialize an empty tensor with the same shape as gradient_values
        self.dinputs = torch.empty_like(gradient_values)

        for index, (single_output, single_gradient) in enumerate(zip(self.output, gradient_values)):
            # Reshape the single_output to a column vector
            single_output = single_output.reshape(-1, 1)
            # Calculate the Jacobian matrix for the softmax
            jacobian_matrix = torch.diagflat(single_output) - torch.matmul(single_output, single_output.T)
            # Multiply the Jacobian matrix by the gradient to get the derivative
            self.dinputs[index] = torch.matmul(jacobian_matrix, single_gradient)


In [7]:
relu = ReLUActivation()

softmax = SoftmaxActivation()

In [9]:
manual_seed = 42
torch.manual_seed(manual_seed)
features = 2
neuron_1 = 2
output_class = 2
samples = 10
lower_bound = 0
upper_bound = 10000
input_layer = (upper_bound - lower_bound) * torch.rand(samples, features) + lower_bound
layer_1 = DenseLayer(features, neuron_1)
layer_1.forward(input_layer)
output_1 = relu.forward(layer_1.output)
output_layer = DenseLayer(output_1.shape[1], output_class)
output_layer.forward(output_1)
final_output_1 = softmax.forward(output_layer.output)
print(final_output_1)

tensor([[0.4773, 0.5227],
        [0.4826, 0.5174],
        [0.4873, 0.5127],
        [0.4863, 0.5137],
        [0.4870, 0.5130],
        [0.4810, 0.5190],
        [0.4821, 0.5179],
        [0.4854, 0.5146],
        [0.4818, 0.5182],
        [0.4884, 0.5116]])


In [10]:
class Loss:
  def calculate(self, predicted_values, ground_truth):
    sample_losses = self.forward(predicted_values, ground_truth)
    data_loss = torch.mean(sample_losses)
    return data_loss


In [12]:
class CategoricalCrossEntropyLoss(Loss):
  def forward(self, predictions, true_labels):
    num_samples = len(predictions)

    predictions_clipped = torch.clip(predictions, 1e-8, 1 - 1e-8)

    if len(true_labels.shape) == 1:
      correct_confidences = predictions_clipped[
      range(num_samples),
      true_labels
      ]
    elif len(true_labels.shape) == 2:
      correct_confidences = torch.sum(
      predictions_clipped * true_labels,
      axis=1
      )
    negative_log_likelihoods = -torch.log(correct_confidences)
    return negative_log_likelihoods

  def backward(self, gradients, true_labels):
    num_samples = len(gradients)
    num_labels = len(gradients[0])
    if len(true_labels.shape) == 1:
      true_labels = torch.eye(num_labels)[true_labels]
    self.deltas = -true_labels / gradients
    self.deltas = self.deltas / num_samples


In [13]:
class SoftmaxCategoricalCrossEntropy():
  def __init__(self):
    self.softmax_activation = SoftmaxActivation()
    self.categorical_crossentropy_loss = CategoricalCrossEntropyLoss()

  def forward(self, inputs, true_labels):
    self.softmax_activation.forward(inputs)
    self.output = self.softmax_activation.output
    return self.categorical_crossentropy_loss.calculate(self.output, true_labels)

  def backward(self, gradients, true_labels):
    num_samples = len(gradients)
    if len(true_labels.shape) == 2:
      true_labels = torch.argmax(true_labels, axis=1)

    self.deltas = gradients.clone()
    self.deltas[range(num_samples), true_labels] -= 1
    self.deltas = self.deltas / num_samples


In [18]:
activation_loss = SoftmaxCategoricalCrossEntropy()


In [21]:
target = torch.randint(output_class, (samples,))
loss = activation_loss.forward(final_output_1,target)


In [26]:
activation_loss.backward(activation_loss.output, target)
layer_1.backward(activation_loss.deltas)
print(layer_1.dweights)
print(layer_1.biases)


tensor([[  752.4683,  -752.4681],
        [ 1116.2562, -1116.2561]])
tensor([[0., 0.]])
