<a href="https://colab.research.google.com/github/GauravKanwat/DL_Assignment_1/blob/main/DL_Assignment_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [107]:
import numpy as np
import pandas as pd
from tensorflow import keras
import matplotlib.pyplot as plt
from time import sleep
from tqdm import tqdm, trange

class NeuralNetwork:
    def __init__(self, x_input, y_input, num_of_pixels, hidden_neurons_list, num_hidden_layers, output_neurons):
      self.x_input = x_input
      self.y_input = y_input
      self.num_of_pixels = num_of_pixels
      self.hidden_neurons_list = hidden_neurons_list
      self.num_hidden_layers = num_hidden_layers
      self.output_neurons = output_neurons


    def initialize_parameters(self, num_of_pixels, hidden_neurons_list, num_hidden_layers, output_neurons):
        weights = {}
        biases = {}
        for l in range(num_hidden_layers):
          weights[l] = np.random.rand(hidden_neurons_list[l], num_of_pixels if l == 0 else hidden_neurons_list[l-1]) - 0.5
          biases[l] = np.random.rand(hidden_neurons_list[l], 1) - 0.5
        weights[num_hidden_layers] = np.random.rand(output_neurons, hidden_neurons_list[-1]) - 0.5
        biases[num_hidden_layers] = np.random.rand(output_neurons, 1) - 0.5
        return weights, biases

    def xavier_intialization(self, num_of_pixels, hidden_neurons_list, output_neurons):
        num_layers = len(hidden_neurons_list) + 1
        weights = {}
        biases = {}

        weights[0] = np.random.randn(hidden_neurons_list[0], num_of_pixels) * np.sqrt(1 / num_of_pixels)
        biases[0] = np.zeros((hidden_neurons_list[0], 1))

        # Initialize weights and biases for hidden layers
        for l in range(1, len(hidden_neurons_list)):
            weights[l] = np.random.randn(hidden_neurons_list[l], hidden_neurons_list[l-1]) * np.sqrt(1 / hidden_neurons_list[l-1])
            biases[l] = np.zeros((hidden_neurons_list[l], 1))

        # Initialize weights for last hidden layer to output layer
        weights[len(hidden_neurons_list)] = np.random.randn(output_neurons, hidden_neurons_list[-1]) * np.sqrt(1 / hidden_neurons_list[-1])
        biases[len(hidden_neurons_list)] = np.zeros((output_neurons, 1))
        return weights, biases

    def sigmoid(self, x):
        #return 1 / (1 + np.exp(-x))
        return 1 / (1 + np.exp(-np.clip(x, -500, 500)))

    def reLU(self, Z):
        return np.maximum(0, Z)

    def tanh(self, x):
      return np.tanh(x)

    def softmax(self, x):
        max_x = np.max(x, axis=0)
        exp_x = np.exp(x - max_x)  # avoiding overflow
        return exp_x / np.sum(exp_x, axis=0)

    def feedforward_propagation(self, X, weights, biases, num_hidden_layers, activation_function):
        a = {}
        h = {}

        for k in range(num_hidden_layers):
            if k == 0:
              a[k] = np.dot(weights[k], X) + biases[k]
              if(activation_function == "reLU"):
                h[k] = self.reLU(a[k])
              elif(activation_function == "sigmoid"):
                h[k] = self.sigmoid(a[k])
              elif(activation_function == "tanh"):
                h[k] = self.tanh(a[k])
            else:
              a[k] = np.dot(weights[k], h[k-1]) + biases[k]
              if(activation_function == "reLU"):
                h[k] = self.reLU(a[k])
              elif(activation_function == "sigmoid"):
                h[k] = self.sigmoid(a[k])
              elif(activation_function == "tanh"):
                h[k] = self.tanh(a[k])

        a[num_hidden_layers] = np.dot(weights[num_hidden_layers], h[num_hidden_layers - 1]) + biases[num_hidden_layers]
        y_hat = self.softmax(a[num_hidden_layers])
        return a, h, y_hat

    def one_hot(self, Y):
      if Y.max() != 9:
        one_hot_Y = np.zeros((Y.size, 10))
      else:
        one_hot_Y = np.zeros((Y.size, Y.max() + 1))
      one_hot_Y[np.arange(Y.size), Y] = 1
      one_hot_Y = one_hot_Y.T
      return one_hot_Y

    def deriv_sigmoid(self, Z):
      func = self.sigmoid(Z)
      return func * (1 - func)

    def deriv_reLU(self, Z):
      return Z > 0

    def deriv_tanh(self, x):
      sechX = 1 / np.cosh(x)
      return sechX ** 2

    def back_propagation(self, Y, fwd_A, fwd_H, weights, biases, pred_output, num_hidden_layers, activation_function):
      one_hot_Y = self.one_hot(Y)
      dA = {}
      dH = {}
      dW = {}
      dB = {}

      dA[num_hidden_layers] = pred_output - one_hot_Y

      for k in range(num_hidden_layers, 0, -1):
        dW[k] = np.dot(dA[k], fwd_H[k-1].T)
        dB[k] = np.mean(dA[k], axis=1, keepdims=True)

        dH[k-1] = np.dot(weights[k].T, dA[k])
        if(activation_function == "reLU"):
          dA[k-1] = np.multiply(dH[k-1], self.deriv_reLU(fwd_A[k-1]))
        elif(activation_function == "sigmoid"):
          dA[k-1] = np.multiply(dH[k-1], self.deriv_sigmoid(fwd_A[k-1]))
        elif(activation_function == "tanh"):
          dA[k-1] = np.multiply(dH[k-1], self.deriv_tanh(fwd_A[k-1]))
      return dW, dB

    def get_predictions(self, pred_output):
      return np.argmax(pred_output, axis = 0)

    def get_accuracy(self, y_pred, y_true):
      return np.sum(y_pred == y_true) / y_true.size

    def gradient_descent(self, epochs, eta, activation_function, initialization):
      if(initialization == "normal"):
        weights, biases = self.initialize_parameters(self.num_of_pixels, self.hidden_neurons_list, self.num_hidden_layers, self.output_neurons)
      elif(initialization == "xavier"):
        weights, biases = self.xavier_intialization(self.num_of_pixels, self.hidden_neurons_list, self.output_neurons)

      for epoch in tqdm(range(epochs)):
        fwd_a, fwd_h, pred_output = self.feedforward_propagation(self.x_input, weights, biases, self.num_hidden_layers, activation_function)
        del_w, del_b = self.back_propagation(self.y_input, fwd_a, fwd_h, weights, biases, pred_output, self.num_hidden_layers, activation_function)

        # Update weights and biases
        for l in range(1, self.num_hidden_layers + 1):
          weights[l] -= eta * del_w[l]
          biases[l] -= eta * del_b[l]

        if epoch % 10 == 0:
          accuracy = self.get_accuracy(self.get_predictions(pred_output), self.y_input)
          print(f"Iteration: {epoch}, Accuracy: {accuracy}")
      return weights, biases

    def stochastic_gradient_descent(self, epochs, eta, activation_function, initialization, batch_size):
      if(initialization == "normal"):
        weights, biases = self.initialize_parameters(self.num_of_pixels, self.hidden_neurons_list, self.num_hidden_layers, self.output_neurons)
      elif(initialization == "xavier"):
        weights, biases = self.xavier_intialization(self.num_of_pixels, self.hidden_neurons_list, self.output_neurons)

      num_samples = self.x_input.shape[1]

      for epoch in tqdm(range(epochs)):
        shuffled_indices = np.random.permutation(num_samples)
        for i in range(0, num_samples, batch_size):

          batch_indices = shuffled_indices[i:i+batch_size]
          x_batch = self.x_input[:, batch_indices]
          y_batch = self.y_input[batch_indices]

          fwd_a, fwd_h, pred_output = self.feedforward_propagation(x_batch, weights, biases, self.num_hidden_layers, activation_function)
          del_w, del_b = self.back_propagation(y_batch, fwd_a, fwd_h, weights, biases, pred_output, self.num_hidden_layers, activation_function)

          # Update weights and biases
          for l in range(1, self.num_hidden_layers + 1):
            weights[l] -= eta * del_w[l]
            biases[l] -= eta * del_b[l]

        if epoch % 1 == 0:
          accuracy = self.get_accuracy(self.get_predictions(pred_output), y_batch)
          print(f"Iteration: {epoch}, Accuracy: {accuracy}")

      return weights, biases


    def momentum_based_gradient_descent(self, epochs, eta, beta, activation_function, initialization):
      if(initialization == "normal"):
        weights, biases = self.initialize_parameters(self.num_of_pixels, self.hidden_neurons_list, self.num_hidden_layers, self.output_neurons)
      elif(initialization == "xavier"):
        weights, biases = self.xavier_intialization(self.num_of_pixels, self.hidden_neurons_list, self.output_neurons)

      prev_uw = {}
      prev_ub = {}

      for l in range(1, self.num_hidden_layers + 1):
        prev_uw[l] = 0
        prev_ub[l] = 0

      for epoch in tqdm(range(epochs)):
        fwd_a, fwd_h, pred_output = self.feedforward_propagation(self.x_input, weights, biases, self.num_hidden_layers, activation_function)
        del_w, del_b = self.back_propagation(self.y_input, fwd_a, fwd_h, weights, biases, pred_output, self.num_hidden_layers, activation_function)

        # Update weights and biases
        for l in range(1, self.num_hidden_layers + 1):
          uw = beta * prev_uw[l] + eta * del_w[l]
          ub = beta * prev_ub[l] + eta * del_b[l]
          weights[l] -= uw
          biases[l] -= ub
          prev_uw[l] = uw
          prev_ub[l] = ub

        if epoch % 10 == 0:
          accuracy = self.get_accuracy(self.get_predictions(pred_output), self.y_input)
          print(f"Iteration: {epoch}, Accuracy: {accuracy}")
      return weights, biases

    def nesterov_accelerated_gradient_descent(self, epochs, eta, beta, activation_function, initialization):
      if(initialization == "normal"):
        weights, biases = self.initialize_parameters(self.num_of_pixels, self.hidden_neurons_list, self.num_hidden_layers, self.output_neurons)
      elif(initialization == "xavier"):
        weights, biases = self.xavier_intialization(self.num_of_pixels, self.hidden_neurons_list, self.output_neurons)

      prev_vw = 0
      prev_vb = 0

      for epoch in tqdm(range(epochs)):

        fwd_a, fwd_h, pred_output = self.feedforward_propagation(self.x_input, weights, biases, self.num_hidden_layers, activation_function)
        del_w, del_b = self.back_propagation(self.y_input, fwd_a, fwd_h, weights, biases, pred_output, self.num_hidden_layers, activation_function)

        v_w = beta*prev_vw
        v_b = beta*prev_vb

        # Update weights and biases
        for l in range(1, self.num_hidden_layers + 1):
          vw = beta * prev_vw + eta * del_w[l]
          vb = beta * prev_vb + eta * del_b[l]
          weights[l] -= vw
          biases[l] -= vb
          prev_uw = vw
          prev_ub = vb

        if epoch % 10 == 0:
          accuracy = self.get_accuracy(self.get_predictions(pred_output), self.y_input)
          print(f"Iteration: {epoch}, Accuracy: {accuracy}")
      return weights, biases

    def adagrad_gradient_descent(self, epochs, eta, eps, activation_function, initialization):
      if(initialization == "normal"):
        weights, biases = self.initialize_parameters(self.num_of_pixels, self.hidden_neurons_list, self.num_hidden_layers, self.output_neurons)
      elif(initialization == "xavier"):
        weights, biases = self.xavier_intialization(self.num_of_pixels, self.hidden_neurons_list, self.output_neurons)

      v_w = {}
      v_b = {}

      for l in range(1, self.num_hidden_layers + 1):
        v_w[l] = 0
        v_b[l] = 0

      for epoch in tqdm(range(epochs)):
        fwd_a, fwd_h, pred_output = self.feedforward_propagation(self.x_input, weights, biases, self.num_hidden_layers, activation_function)
        del_w, del_b = self.back_propagation(self.y_input, fwd_a, fwd_h, weights, biases, pred_output, self.num_hidden_layers, activation_function)

        # Update weights and biases
        for l in range(1, self.num_hidden_layers + 1):
          v_w[l] = v_w[l] + del_w[l]**2
          v_b[l] = v_b[l] + del_b[l]**2

          weights[l] -= eta * del_w[l] / (np.sqrt(v_w[l]) + eps)
          biases[l] -= eta * del_b[l] / (np.sqrt(v_b[l]) + eps)

        if epoch % 10 == 0:
          accuracy = self.get_accuracy(self.get_predictions(pred_output), self.y_input)
          print(f"Iteration: {epoch}, Accuracy: {accuracy}")
      return weights, biases

    def rmsProp_gradient_descent(self, epochs, eta, eps, beta, activation_function, initialization):
      if(initialization == "normal"):
        weights, biases = self.initialize_parameters(self.num_of_pixels, self.hidden_neurons_list, self.num_hidden_layers, self.output_neurons)
      elif(initialization == "xavier"):
        weights, biases = self.xavier_intialization(self.num_of_pixels, self.hidden_neurons_list, self.output_neurons)

      v_w = {}
      v_b = {}

      for l in range(1, self.num_hidden_layers + 1):
        v_w[l] = 0
        v_b[l] = 0

      for epoch in tqdm(range(epochs)):
        fwd_a, fwd_h, pred_output = self.feedforward_propagation(self.x_input, weights, biases, self.num_hidden_layers, activation_function)
        del_w, del_b = self.back_propagation(self.y_input, fwd_a, fwd_h, weights, biases, pred_output, self.num_hidden_layers, activation_function)

        # Update weights and biases
        for l in range(1, self.num_hidden_layers + 1):
          v_w[l] = (beta * v_w[l]) + ((1-beta) * del_w[l] ** 2)
          v_b[l] = (beta * v_b[l]) + ((1-beta) * del_b[l] ** 2)

          weights[l] -= eta * del_w[l] / (np.sqrt(v_w[l]) + eps)
          biases[l] -= eta * del_b[l] / (np.sqrt(v_b[l]) + eps)

        if epoch % 10 == 0:
          accuracy = self.get_accuracy(self.get_predictions(pred_output), self.y_input)
          print(f"Iteration: {epoch}, Accuracy: {accuracy}")
      return weights, biases

    def adam_gradient_descent(self, epochs, eta, eps, beta1, beta2, activation_function, initialization):
      if(initialization == "normal"):
        weights, biases = self.initialize_parameters(self.num_of_pixels, self.hidden_neurons_list, self.num_hidden_layers, self.output_neurons)
      elif(initialization == "xavier"):
        weights, biases = self.xavier_intialization(self.num_of_pixels, self.hidden_neurons_list, self.output_neurons)

      m_w = {}
      m_b = {}
      v_w = {}
      v_b = {}
      m_w_hat = {}
      m_b_hat = {}
      v_w_hat = {}
      v_b_hat = {}

      for l in range(1, self.num_hidden_layers + 1):
        m_w[l] = 0
        m_b[l] = 0
        v_w[l] = 0
        v_b[l] = 0
        m_w_hat[l] = 0
        m_b_hat[l] = 0
        v_w_hat[l] = 0
        v_b_hat[l] = 0

      for epoch in tqdm(range(epochs)):
        fwd_a, fwd_h, pred_output = self.feedforward_propagation(self.x_input, weights, biases, self.num_hidden_layers, activation_function)
        del_w, del_b = self.back_propagation(self.y_input, fwd_a, fwd_h, weights, biases, pred_output, self.num_hidden_layers, activation_function)

        # Update weights and biases
        for l in range(1, self.num_hidden_layers + 1):
          m_w[l] = (beta1 * m_w[l]) + (1-beta1) * del_w[l]
          m_b[l] = (beta1 * m_b[l]) + (1-beta1) * del_b[l]

          v_w[l] = beta2 * v_w[l] + (1 - beta2) * (del_w[l] ** 2)
          v_b[l] = beta2 * v_b[l] + (1 - beta2) * (del_b[l] ** 2)

          m_w_hat[l] = m_w[l]/(1-np.power(beta1, l+1))
          m_b_hat[l] = m_b[l]/(1-np.power(beta1, l+1))
          v_w_hat[l] = v_w[l]/(1-np.power(beta2, l+1))
          v_b_hat[l] = v_b[l]/(1-np.power(beta2, l+1))

          #update parameters
          weights[l] -= eta*m_w_hat[l]/(np.sqrt(v_w_hat[l])+eps)
          biases[l] -= eta*m_b_hat[l]/(np.sqrt(v_b_hat[l])+eps)

        if epoch % 10 == 0:
          accuracy = self.get_accuracy(self.get_predictions(pred_output), self.y_input)
          print(f"Iteration: {epoch}, Accuracy: {accuracy}")
      return weights, biases


def main():

  fashion_mnist = keras.datasets.fashion_mnist
  (x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()
  classes = {0:"T-shirt/top", 1:"Trouser", 2:"Pullover", 3:"Dress", 4:"Coat", 5:"Sandal", 6:"Shirt", 7:"Sneaker", 8:"Bag", 9:"Ankle Boot"}
  x_train_norm = x_train / 255
  x_test_norm = x_test / 255

  # Define hyperparameters
  num_of_pixels = 28 * 28                                                         #28 * 28 = 784 pixels
  hidden_neurons_list = [128, 128]
  num_hidden_layers = len(hidden_neurons_list)
  output_neurons = 10
  eta = 1e-2
  epochs = 51
  activation_function = "sigmoid"
  initialization = "normal"
  batch_size = 1
  beta1 = 0.999
  beta2 = 0.9
  eps = 1e-10

  #Taking pixels as inputs
  x_train_input = x_train_norm.reshape(len(x_train_norm), num_of_pixels)                      #flattening the image into 1d array
  x_test_input = x_test_norm.reshape(len(x_test_norm), num_of_pixels)                         #same thing
  x_train_input = x_train_input.T
  x_test_input = x_test_input.T

  # Neural network class -> nn object
  nn = NeuralNetwork(x_test_input, y_test, num_of_pixels, hidden_neurons_list, num_hidden_layers, output_neurons)

  # Call the gradient_descent method
  #weights, biases = nn.gradient_descent(epochs, eta, activation_function, initialization)
  #weights, biases = nn.stochastic_gradient_descent(epochs, eta, activation_function, initialization, batch_size)
  #weights, biases = nn.momentum_based_gradient_descent(epochs, eta, beta, activation_function, initialization)
  #weights, biases = nn.nesterov_accelerated_gradient_descent(epochs, eta, beta, activation_function, initialization)
  #weights, biases = nn.adagrad_gradient_descent(epochs, eta, eps, activation_function, initialization)
  #weights, biases = nn.rmsProp_gradient_descent(epochs, eta, eps, beta, activation_function, initialization)
  weights, biases = nn.adam_gradient_descent(epochs, eta, eps, beta1, beta2, activation_function, initialization)

if __name__ == "__main__":
    main()

  2%|▏         | 1/51 [00:00<00:12,  3.92it/s]

Iteration: 0, Accuracy: 0.1044


 22%|██▏       | 11/51 [00:03<00:14,  2.78it/s]

Iteration: 10, Accuracy: 0.5163


 41%|████      | 21/51 [00:06<00:09,  3.33it/s]

Iteration: 20, Accuracy: 0.6134


 61%|██████    | 31/51 [00:09<00:05,  3.77it/s]

Iteration: 30, Accuracy: 0.7036


 80%|████████  | 41/51 [00:12<00:02,  3.80it/s]

Iteration: 40, Accuracy: 0.7022


100%|██████████| 51/51 [00:14<00:00,  3.44it/s]

Iteration: 50, Accuracy: 0.723



