### Name: REDIET FEREW
### ID No.: UGR/1415/12

In [None]:
import torch

### Dense Layer

In [None]:

class DenseLayer():
  def __init__(self, features, neurons):
    self.weights = 0.01 * torch.rand(neurons, features)
    self.biases = torch.zeros(1, neurons)
  def forward(self, inputs):
    self.inputs = inputs
    self.output = torch.matmul(inputs, self.weights.T) + self.biases

  def backward(self, dvalues):

    self.dweights = torch.matmul(self.inputs.T, dvalues)
    self.dbiases = torch.sum(dvalues, axis=0, keepdims=True)
    self.dinputs = torch.matmul(dvalues, self.weights.T)


### ReLu Activation

In [None]:
#Relu Activation Function

class ActivationRelu:

  def forward(self, inputs):
    #if inputs < 0 make 0 else make x
    #tensor of some dimension
    output = torch.max(inputs, torch.tensor(0.0))
    return output
  def backward(self, dvalues):

    self.dinputs = dvalues.clone()

    # The derivation of relu activation on the inputs
    self.dinputs = self.dinputs[self.inputs <= 0] = 0



### Softmax Activation

In [None]:
#softmax activation function
class Activation_SoftMax:
  def forward(self, inputs):
    #calculate powers
    power_x = torch.exp(inputs)
    #get shape
    shape_x = inputs.shape
    sum_x = torch.sum(power_x, axis = 1, keepdims = True)
    #divide
    result = power_x / sum_x
    self.output = result
    return result
  def backward(self, dvalues):
    self.dinputs = torch.empty_like(dvalues)
    for index, (single_output, single_dvalues) in enumerate(zip(self.output, dvalues)):
      single_output = single_output.reshape(-1, 1)
      jacobian_matrix = torch.diagflat(single_output) - torch.dot(single_output, single_output.T)

      self.dinputs[index] = torch.dot(jacobian_matrix, single_dvalues)

In [None]:
#nitialize activations
relu = ActivationRelu()

softmax = Activation_SoftMax()

###Forward Pass


In [None]:
manual_seed = 42
torch.manual_seed(manual_seed)

# Number of features
features = 2
# Neurons in layers
neuron_1 = 2
# Output class
output_class = 2
# Number of samples
samples = 10


lower_bound = 0
upper_bound = 10000
input = (upper_bound - lower_bound) * torch.rand(samples, features) + lower_bound
# input = torch.rand(samples, features)

layer_1 = DenseLayer(features, neuron_1)
layer_1.forward(input)
output_1 = relu.forward(layer_1.output)
print(output_1.shape)


output_layer = DenseLayer(output_1.shape[1], output_class)
output_layer.forward(output_1)
print(output_layer.output.shape)
final_output_1 = softmax.forward(output_layer.output)
print(final_output_1)
print(final_output_1.shape)

torch.Size([10, 2])
torch.Size([10, 2])
tensor([[0.4773, 0.5227],
        [0.4826, 0.5174],
        [0.4873, 0.5127],
        [0.4863, 0.5137],
        [0.4870, 0.5130],
        [0.4810, 0.5190],
        [0.4821, 0.5179],
        [0.4854, 0.5146],
        [0.4818, 0.5182],
        [0.4884, 0.5116]])
torch.Size([10, 2])


### Loss Calculation

In [None]:
class Loss:
  def calculate(self, output, y):
    sample_losses = self.forward(output, y)
    data_loss = torch.mean(sample_losses)
    return data_loss

In [None]:
class Loss_CategoricalCrossentropy(Loss):
  def forward(self, y_pred, y_true):
    samples = len(y_pred)

    y_pred_clipped = torch.clip(y_pred, 1e-7, 1 - 1e-7)

    if len(y_true.shape) == 1:
      correct_confidences = y_pred_clipped[
      range(samples),
      y_true
      ]
    elif len(y_true.shape) == 2:
      correct_confidences = torch.sum(
      y_pred_clipped * y_true,
      axis=1
      )
    negative_log_likelihoods = -torch.log(correct_confidences)
    return negative_log_likelihoods
  def backward(self, dvalues, y_true):
    samples = len(dvalues)
    labels = len(dvalues[0])
    if len(y_true.shape) == 1:
      y_true = torch.eye(labels)[y_true]
    self.dinputs = -y_true / dvalues
    self.dinputs = self.dinputs / samples




In [None]:
class Activation_Softmax_Loss_CategoricalCrossentropy():
  def __init__(self):
    self.activation = Activation_SoftMax()
    self.loss = Loss_CategoricalCrossentropy()
  def forward(self, inputs, y_true):
    self.activation.forward(inputs)
    self.output = self.activation.output
    return self.loss.calculate(self.output, y_true)
  def backward(self, dvalues, y_true):
    samples = len(dvalues)
    if len(y_true.shape) == 2:
      y_true = torch.argmax(y_true, axis=1)
    self.dinputs = dvalues.clone()
    self.dinputs[range(samples), y_true] -= 1
    self.dinputs = self.dinputs / samples


In [None]:
loss_activation = Activation_Softmax_Loss_CategoricalCrossentropy()


In [None]:
target = torch.randint(output_class, (samples,))

In [None]:
loss = loss_activation.forward(final_output_1, target)


### Backward Propagation

In [None]:

loss_activation.backward(loss_activation.output, target)
layer_1.backward(loss_activation.dinputs)


In [None]:
print(layer_1.dweights)
print(layer_1.dbiases)


tensor([[ 1564.9775, -1564.9774],
        [  594.1116,  -594.1115]])
tensor([[ 0.0920, -0.0920]])
