<a href="https://colab.research.google.com/github/TirthVamja/cs6910_assignment1/blob/main/assignment1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Import Library

In [1]:
!pip install wandb



In [2]:
from keras.datasets import fashion_mnist, mnist
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import numpy as np
import wandb
import seaborn as sn

**Plotting various Categories of data**

In [3]:
(x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()

In [4]:
# MetaData of Fashion_Mnist dataset ...
CLASSES = {
  0:'T-shirt/top',
  1:'Trouser',
  2:'Pullover',
  3:'Dress',
  4:'Coat',
  5:'Sandal',
  6:'Shirt',
  7:'Sneaker',
  8:'Bag',
  9:'Ankle boot'
}

In [5]:
# ind_of_first_occurance = np.argsort(y_train)
# ind = ind_of_first_occurance[np.searchsorted(y_train, np.arange(0,10,1), sorter=ind_of_first_occurance)]
# fig, ax = plt.subplots(nrows=2, ncols=5)
# for i in range(10):
#   ax[i//5, i%5].imshow(x_train[ind[i]], cmap='gray')
#   ax[i//5, i%5].set_title(CLASSES[i])


# # wandb.init(project="cs6910_assignment1")
# # wandb.run.name = f'List of Categories'
# # wandb.log({'List of Categories':plt})
# # wandb.finish()

# # fig.suptitle('List of Categories')
# plt.show()

In [6]:
def load_data(dataset='fashion_mnist', purpose='train'):
  dataset=dataset.lower()
  purpose=purpose.lower()
  x,x_t,y,y_t = None,None,None,None

  if dataset == 'fashion_mnist':
    (x, y), (x_t, y_t) = fashion_mnist.load_data()
  elif dataset == 'mnist':
    (x, y), (x_t, y_t) = mnist.load_data()

  if purpose == 'train':
    x = x.reshape(x.shape[0], 784) / 255
    y = np.eye(10)[y]
    return x, y
  elif purpose == 'test':
    x_t = x_t.reshape(x_t.shape[0], 784) / 255
    y_t = np.eye(10)[y_t]
    return x_t, y_t

**Feed Forward Neural Net**

In [7]:
class FF_NN:

  def __init__(self, param):
    self.hidden_layers = param['hidden_lyrs']
    self.neurons = param['neurons']
    self.input_neurons = param['inpt_sz']
    self.output_neurons = param['oupt_sz']
    self.weights = []
    self.bias = []
    self.activation = param['activation']
    self.output_activation = param['oupt_activation']
    self.weight_initialisation = param['weight_initialisation']

    self.get_weights()
    self.get_bias()


  def get_bias(self):
    for _ in range(self.hidden_layers):
      self.bias.append(np.random.randn(self.neurons))
    self.bias.append(np.random.randn(self.output_neurons))

  def get_weights(self):
    if self.weight_initialisation.lower() == 'random':
      self.weights.append(np.random.randn(self.input_neurons, self.neurons))
      for _ in range(self.hidden_layers-1):
        self.weights.append(np.random.randn(self.neurons, self.neurons))
      self.weights.append(np.random.randn(self.neurons, self.output_neurons))

    else:
      limit = np.sqrt(6/(self.input_neurons + self.neurons))
      self.weights.append(np.random.uniform(low=-limit, high=limit, size=(self.input_neurons, self.neurons)))
      limit = np.sqrt(6/(self.neurons + self.neurons))
      for _ in range(self.hidden_layers-1):
        self.weights.append(np.random.uniform(low=-limit, high=limit, size=(self.neurons, self.neurons)))
      limit = np.sqrt(6/(self.neurons + self.output_neurons))
      self.weights.append(np.random.uniform(low=-limit, high=limit, size=(self.neurons, self.output_neurons)))


  def apply_activation(self, data):
    act = self.activation.lower()
    if act == 'sigmoid':
      data = np.maximum(data, -500)
      data = np.minimum(data, 500)
      return 1/(1+np.exp(-data))
    elif act == 'relu':
      return np.maximum(0,data)
    elif act == 'tanh':
      return np.tanh(data)
    elif act == 'identity':
      return data


  def apply_output_activation(self, data):
    if self.output_activation.lower() == 'softmax':
      data = np.maximum(data, -500)
      data = np.minimum(data, 500)
      data = np.exp(data)
      return data/np.sum(data,axis=1).reshape(data.shape[0],1)


  def feed_forward(self, input):
    self.A = [input]
    self.H = [input]

    # hidden layer calculations...
    for i in range(self.hidden_layers):
      self.A.append(self.bias[i] + np.matmul(self.H[-1], self.weights[i]))
      self.H.append(self.apply_activation(self.A[-1]))

    # output layer calculations...
    self.A.append(self.bias[-1] + np.matmul(self.H[-1], self.weights[-1]))
    self.H.append(self.apply_output_activation(self.A[-1]))

    return self.H[-1] # shape of H[-1] = 60000,10   shape of H = layers, 60000, neurons in each layer


In [8]:
# nn = FF_NN(PARAMETERS)
# x_train, y_train = load_data(PARAMETERS['dataset'], 'train')
# prediction = nn.feed_forward(x_train) # shape of xtrain -> 60000,784
# print(prediction[0])

**Back_Propagation**

In [9]:
class BP_NN:

  def __init__(
      self,
      ff_nn:FF_NN,
      param):
    self.ff_nn, self.loss, self.activation, self.output_activation = ff_nn, param['loss_function'], param['activation'], param['oupt_activation']


  def der_actvtn(self, x):
    act = self.activation.lower()
    if act == "sigmoid":
      return x * (1 - x)
    elif act == "tanh":
      return 1 - x ** 2
    elif act == "relu":
      return (x > 0).astype(int)
    elif act == "identity":
      return np.ones(x.shape)

  def der_ls(self, y, yp):
    ls = self.loss.lower()
    if ls == "mean_squared_error":
      return yp-y
    elif ls == "cross_entropy":
      return -y/yp

  def der_outpt_actvtn(self, yp):
    act = self.output_activation.lower()
    if act == "softmax":
      return np.diag(yp)-np.outer(yp, yp)


  def propogate_backward(self, y, y_pred):  # y=60000,10   y_pred=60000,10
    self.d_h, self.d_a, self.delta_weights, self.delta_bias = [], [], [], []
    der_outpt_mat = []

    self.d_h.append(self.der_ls(y, y_pred))
    for i in range(y_pred.shape[0]):
        der_outpt_mat.append(np.matmul(self.der_ls(y[i], y_pred[i]), self.der_outpt_actvtn(y_pred[i])))
    der_outpt_arr = np.array(der_outpt_mat)
    self.d_a.append(der_outpt_arr)
    # self.d_a.append(y_pred-y)

    for i in range(self.ff_nn.hidden_layers, 0, -1):
      self.delta_weights.append(np.matmul(self.ff_nn.H[i].T, self.d_a[-1]))
      self.delta_bias.append(np.sum(self.d_a[-1], axis=0))
      self.d_h.append(np.matmul(self.d_a[-1], self.ff_nn.weights[i].T))
      self.d_a.append(self.d_h[-1] * self.der_actvtn(self.ff_nn.H[i]))

    self.delta_weights.append(np.matmul(self.ff_nn.H[0].T, self.d_a[-1]))
    self.delta_weights.reverse()
    self.delta_bias.append(np.sum(self.d_a[-1], axis=0))
    self.delta_bias.reverse()

    for i in range(len(self.delta_bias)):
      self.delta_weights[i] = self.delta_weights[i] / y.shape[0]
      self.delta_bias[i] = self.delta_bias[i] / y.shape[0]

    return self.delta_weights, self.delta_bias

**Optimizers**

In [10]:
class Optimizer():
  def __init__(
      self,
      ff_nn: FF_NN,
      bp_nn: BP_NN,
      param
  ):
    self.ff_nn, self.bp_nn, self.lr, self.optimizer, self.momentum, self.decay = ff_nn, bp_nn, param['learning_rate'], param['optimizer'], param['momentum'], param['decay']
    self.B1, self.B2, self.eps, self.t = param['beta1'], param['beta2'], param['epsilon'], 0
    self.b_history = [np.zeros_like(i) for i in self.ff_nn.bias]
    self.b_hm = [np.zeros_like(i) for i in self.ff_nn.bias]
    self.w_history = [np.zeros_like(i) for i in self.ff_nn.weights]
    self.w_hm = [np.zeros_like(i) for i in self.ff_nn.weights]


  def optimize(self, delta_weights, delta_bias):
    opt = self.optimizer.lower()
    if(opt == "sgd"):
      self.SGD(delta_weights, delta_bias)
    elif(opt == "momentum"):
      self.MGD(delta_weights, delta_bias)
    elif(opt == "nesterov"):
      self.NAG(delta_weights, delta_bias)
    elif(opt == "rmsprop"):
      self.RMSPROP(delta_weights, delta_bias)
    elif(opt == "adam"):
      self.ADAM(delta_weights, delta_bias)
    elif(opt == "nadam"):
      self.NADAM(delta_weights, delta_bias)


  def SGD(self, delta_weights, delta_bias):
    for i in range(self.ff_nn.hidden_layers + 1):
      self.ff_nn.weights[i] -= self.lr * (delta_weights[i] + self.ff_nn.weights[i]*self.decay)
      self.ff_nn.bias[i] -= self.lr * (delta_bias[i] + self.ff_nn.bias[i]*self.decay)

  def MGD(self, delta_weights, delta_bias):
    for i in range(self.ff_nn.hidden_layers + 1):
      self.w_history[i] = self.momentum * self.w_history[i] + delta_weights[i]
      self.ff_nn.weights[i] -= self.lr * (self.w_history[i] + self.ff_nn.weights[i]*self.decay)
      self.b_history[i] = self.momentum * self.b_history[i] + delta_bias[i]
      self.ff_nn.bias[i] -= self.lr * (self.b_history[i] + self.ff_nn.bias[i]*self.decay)

  def NAG(self, delta_weights, delta_bias):
    for i in range(self.ff_nn.hidden_layers + 1):
      self.w_history[i] = self.momentum * self.w_history[i] + delta_weights[i]
      self.ff_nn.weights[i] -= self.lr * (self.momentum * self.w_history[i] + delta_weights[i] + self.ff_nn.weights[i]*self.decay)
      self.b_history[i] = self.momentum * self.b_history[i] + delta_bias[i]
      self.ff_nn.bias[i] -= self.lr * (self.momentum * self.b_history[i] + delta_bias[i] + self.ff_nn.bias[i]*self.decay)


  def RMSPROP(self, delta_weights, delta_bias):
    for i in range(self.ff_nn.hidden_layers + 1):
      self.w_history[i] = self.w_history[i]*self.momentum + (1-self.momentum)*delta_weights[i]**2
      self.ff_nn.weights[i] -= delta_weights[i]*(self.lr / (np.sqrt(self.w_history[i]) + self.eps)) + self.decay * self.ff_nn.weights[i] * self.lr
      self.b_history[i] = self.b_history[i]*self.momentum + (1-self.momentum)*delta_bias[i]**2
      self.ff_nn.bias[i] -= delta_bias[i]*(self.lr / (np.sqrt(self.b_history[i]) + self.eps)) + self.decay * self.ff_nn.bias[i] * self.lr


  def ADAM(self, delta_weights, delta_bias):
    for i in range(self.ff_nn.hidden_layers + 1):
      self.w_hm[i] = self.B1 * self.w_hm[i] + (1 - self.B1) * delta_weights[i]
      self.w_history[i] = self.B2 * self.w_history[i] + (1 - self.B2) * delta_weights[i]**2
      self.w_hat_hm = self.w_hm[i] / (1 - self.B1**(self.t + 1))
      self.w_history_hat = self.w_history[i] / (1 - self.B2**(self.t + 1))
      self.ff_nn.weights[i] -= self.lr * (self.w_hat_hm / ((np.sqrt(self.w_history_hat)) + self.eps) + self.decay * self.ff_nn.weights[i])

      self.b_hm[i] = self.B1 * self.b_hm[i] + (1 - self.B1) * delta_bias[i]
      self.b_history[i] = self.B2 * self.b_history[i] + (1 - self.B2) * delta_bias[i]**2
      self.b_hat_hm = self.b_hm[i] / (1 - self.B1**(1+self.t))
      self.h_hat_b = self.b_history[i] / (1 - self.B2**(1+self.t))
      self.ff_nn.bias[i] -= self.lr * (self.b_hat_hm / ((np.sqrt(self.h_hat_b)) + self.eps) + self.decay * self.ff_nn.bias[i])


  def NADAM(self, delta_weights, delta_bias):
    for i in range(self.ff_nn.hidden_layers + 1):
      self.w_hm[i] = self.B1 * self.w_hm[i] + (1 - self.B1) * delta_weights[i]
      self.w_hat_hm = self.w_hm[i] / (1 - self.B1 ** (self.t + 1))
      self.w_history[i] = self.B2 * self.w_history[i] + (1 - self.B2) * delta_weights[i]**2
      self.w_history_hat = self.w_history[i] / (1 - self.B2 ** (self.t + 1))
      w_temp = self.B1 * self.w_hat_hm + ((1 - self.B1) / (1 - self.B1 ** (self.t + 1))) * delta_weights[i]
      self.ff_nn.weights[i] -= self.lr * (w_temp / ((np.sqrt(self.w_history_hat)) + self.eps) + self.decay * self.ff_nn.weights[i])


      self.b_hm[i] = self.B1 * self.b_hm[i] + (1 - self.B1) * delta_bias[i]
      self.b_hat_hm = self.b_hm[i] / (1 - self.B1 ** (self.t + 1))
      self.b_history[i] = self.B2 * self.b_history[i] + (1 - self.B2) * delta_bias[i]**2
      self.h_hat_b = self.b_history[i] / (1 - self.B2 ** (self.t + 1))
      b_temp = self.B1 * self.b_hat_hm + ((1 - self.B1) / (1 - self.B1 ** (self.t + 1))) * delta_bias[i]
      self.ff_nn.bias[i] -= self.lr * (b_temp / ((np.sqrt(self.h_hat_b)) + self.eps) + self.decay * self.ff_nn.bias[i])


In [11]:
def calculate_loss(y, y_pred, loss_function):
  ls_fn = loss_function.lower()
  if ls_fn == "mean_squared_error":
    return np.sum((y_pred-y) ** 2) / y.shape[0]
  elif ls_fn == "cross_entropy":
    return (-np.sum(y * np.log(y_pred))) / y.shape[0]


In [12]:
# def train(PARAMETERS):
#   # wandb.init()
#   # PARAMETERS = wandb.config
#   # wandb.run.name = f'hl_{PARAMETERS.hidden_lyrs}_bs_{PARAMETERS.batch_sz}_ac_{PARAMETERS.activation}'

#   x_train, y_train = load_data(PARAMETERS['dataset'], 'train')
#   np.random.seed(7)
#   ff_nn = FF_NN(PARAMETERS)
#   bp_nn = BP_NN(ff_nn, PARAMETERS)
#   opt = Optimizer(ff_nn, bp_nn, PARAMETERS)
#   print("Initial Accuracy: {}".format(np.sum(np.argmax(ff_nn.feed_forward(x_train), axis=1) == np.argmax(y_train, axis=1)) / y_train.shape[0]))
#   batch_size = PARAMETERS['batch_sz']

#   x_train, x_train_t, y_train, y_train_t = train_test_split(x_train, y_train, test_size=0.1, random_state=7)

#   for epoch in range(PARAMETERS['epochs']):
#     for i in range(0, x_train.shape[0], batch_size):
#       y_batch = y_train[i:i+batch_size]
#       x_batch = x_train[i:i+batch_size]
#       opt.optimize(*bp_nn.propogate_backward(y_batch, ff_nn.feed_forward(x_batch)))

#     opt.t += 1
#     y_pred = ff_nn.feed_forward(x_train)
#     y_pred_t = ff_nn.feed_forward(x_train_t)
#     print("epoch-",epoch+1)
#     print("accuracy-",np.sum(np.argmax(y_pred, axis=1) == np.argmax(y_train, axis=1)) / y_train.shape[0])
#     print("loss-", calculate_loss(y_train, y_pred, PARAMETERS['loss_function']))
#     print("validation-",np.sum(np.argmax(y_pred_t, axis=1) == np.argmax(y_train_t, axis=1)) / y_train_t.shape[0])


#     # lg={
#     #     'accuracy':np.sum(np.argmax(y_pred, axis=1) == np.argmax(y_train, axis=1)) / y_train.shape[0],
#     #     'val_accuracy':np.sum(np.argmax(y_pred_t, axis=1) == np.argmax(y_train_t, axis=1)) / y_train_t.shape[0],
#     #     'epoch':epoch+1,
#     #     'loss':calculate_loss(y_train, y_pred, PARAMETERS['loss_function']),
#     #     'validation_loss':calculate_loss(y_train_t, y_pred_t, PARAMETERS['loss_function'])
#     # }
#     # wandb.log(lg)


#   return ff_nn


In [13]:
# # Best Validation Accuracy Parameters ...
# PARAMETERS = {
#     'inpt_sz' : 784,
#     'oupt_sz' : 10,
#     'neurons' : 32,
#     'hidden_lyrs' : 4,
#     'activation' : 'tanh',
#     'oupt_activation' : 'softmax',
#     'dataset' : 'fashion_mnist',
#     'loss_function': 'cross_entropy',
#     'learning_rate': 0.001,
#     'optimizer': 'adam',
#     'momentum': 0.9,
#     'batch_sz': 16,
#     'epochs': 10,
#     'weight_initialisation': 'xavier',
#     'decay': 0.0005,
#     'beta': 0.9,
#     'beta1': 0.9,
#     'beta2': 0.999,
#     'epsilon': 1e-8
# }

In [14]:
# model = train(PARAMETERS)

Initial Accuracy: 0.1
epoch- 1
accuracy- 0.8487037037037037
loss- 0.4249131213636829
validation- 0.8406666666666667
epoch- 2
accuracy- 0.8653518518518518
loss- 0.37346304950517567
validation- 0.8536666666666667
epoch- 3
accuracy- 0.8734259259259259
loss- 0.34932005610715183
validation- 0.8615
epoch- 4
accuracy- 0.8787222222222222
loss- 0.33325438217264247
validation- 0.8636666666666667
epoch- 5
accuracy- 0.8824814814814815
loss- 0.3210413166754241
validation- 0.8665
epoch- 6
accuracy- 0.8860370370370371
loss- 0.31085882898893347
validation- 0.8703333333333333
epoch- 7
accuracy- 0.8890555555555556
loss- 0.302108374585719
validation- 0.8721666666666666
epoch- 8
accuracy- 0.8919814814814815
loss- 0.2946252693403678
validation- 0.8726666666666667
epoch- 9
accuracy- 0.8945185185185185
loss- 0.28811345852583975
validation- 0.8736666666666667
epoch- 10
accuracy- 0.8966851851851851
loss- 0.2823468789652041
validation- 0.8733333333333333


In [15]:
# def plot_confusion_mat(y, y_pred):
#   import pandas as pd
#   import seaborn as sn
#   mp = np.zeros((len(CLASSES),len(CLASSES)))
#   for i,j in zip(y, y_pred):
#     mp[np.argmax(i)][np.argmax(j)]+=1

#   df_cm = pd.DataFrame(mp, [i for i in CLASSES.values()], [i for i in CLASSES.values()])
#   plt.figure(figsize=(12,12))
#   sn.set(font_scale=1) # for label size
#   sn.heatmap(df_cm, annot=True, annot_kws={"size": 9}, cmap='crest', fmt='g') # font size
#   plt.xlabel('Prediction')
#   plt.ylabel('Actual')

#   wandb.init(project="cs6910_assignment1")
#   wandb.run.name = f'Q7 Confusion Matrix'
#   wandb.log({"plot":wandb.Image(plt)})
#   wandb.finish()


In [16]:
# x_test, y_test = load_data(PARAMETERS['dataset'], 'test')
# prediction = model.feed_forward(x_test)
# plot_confusion_mat(y_test, prediction)

In [None]:
def train():
  wandb.init()
  PARAMETERS = wandb.config
  wandb.run.name = f'hl_{PARAMETERS.hidden_lyrs}_bs_{PARAMETERS.batch_sz}_ac_{PARAMETERS.activation}'

  x_train, y_train = load_data(PARAMETERS['dataset'], 'train')
  np.random.seed(7)
  ff_nn = FF_NN(PARAMETERS)
  bp_nn = BP_NN(ff_nn, PARAMETERS)
  opt = Optimizer(ff_nn, bp_nn, PARAMETERS)
  print("Initial Accuracy: {}".format(np.sum(np.argmax(ff_nn.feed_forward(x_train), axis=1) == np.argmax(y_train, axis=1)) / y_train.shape[0]))
  batch_size = PARAMETERS['batch_sz']

  x_train, x_train_t, y_train, y_train_t = train_test_split(x_train, y_train, test_size=0.1, random_state=7)

  for epoch in range(PARAMETERS['epochs']):
    for i in range(0, x_train.shape[0], batch_size):
      y_batch = y_train[i:i+batch_size]
      x_batch = x_train[i:i+batch_size]
      opt.optimize(*bp_nn.propogate_backward(y_batch, ff_nn.feed_forward(x_batch)))

    opt.t += 1
    y_pred = ff_nn.feed_forward(x_train)
    y_pred_t = ff_nn.feed_forward(x_train_t)
    print("epoch-",epoch+1)
    print("accuracy-",np.sum(np.argmax(y_pred, axis=1) == np.argmax(y_train, axis=1)) / y_train.shape[0])
    print("loss-", calculate_loss(y_train, y_pred, PARAMETERS['loss_function']))
    print("validation-",np.sum(np.argmax(y_pred_t, axis=1) == np.argmax(y_train_t, axis=1)) / y_train_t.shape[0])


    lg={
        'accuracy':np.sum(np.argmax(y_pred, axis=1) == np.argmax(y_train, axis=1)) / y_train.shape[0],
        'val_accuracy':np.sum(np.argmax(y_pred_t, axis=1) == np.argmax(y_train_t, axis=1)) / y_train_t.shape[0],
        'epoch':epoch+1,
        'loss':calculate_loss(y_train, y_pred, PARAMETERS['loss_function']),
        'validation_loss':calculate_loss(y_train_t, y_pred_t, PARAMETERS['loss_function'])
    }
    wandb.log(lg)


  return ff_nn


In [None]:
sweep_config = {
    "method": "bayes",
    "name": "Q4 WandB sweep",
    "metric": {"goal": "maximize", "name": "accuracy"},
    "parameters": {
        "inpt_sz": {"values": [784]},
        "oupt_sz": {"values": [10]},
        "oupt_activation": {"values": ["softmax"]},
        "dataset": {"values": ["fashion_mnist"]},
        "loss_function": {"values": ["cross_entropy"]},
        "beta": {"values": [0.9]},
        "beta1": {"values": [0.9]},
        "beta2": {"values": [0.999]},
        "neurons": {"values": [32, 64, 128]},
        "hidden_lyrs": {"values": [3, 4, 5]},
        "activation": {"values": ["relu", "tanh", "sigmoid"]},
        "learning_rate": {"values": [1e-3, 1e-4]},
        "optimizer": {"values": ['adam', 'sgd', 'nesterov', 'rmsprop', 'momentum', 'nadam']},
        "momentum": {"values": [0.8, 0.9]},
        "batch_sz": {"values": [16, 32, 64]},
        "epochs": {"values": [5, 10]},
        "weight_initialisation": {"values": ["random", "xavier"]},
        "decay": {"values": [0, 0.0005, 0.5]},
        "epsilon": {"values": [1e-8, 1e-10]},
    }
}


In [None]:
sweep_id = wandb.sweep(sweep_config, project="cs6910_assignment1")

Create sweep with ID: e00roqbx
Sweep URL: https://wandb.ai/cs23m070/cs6910_assignment1/sweeps/e00roqbx


In [None]:
wandb.agent(sweep_id, function=train, count=5)
wandb.finish()

[34m[1mwandb[0m: Agent Starting Run: 3e8nuudj with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_sz: 64
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	dataset: fashion_mnist
[34m[1mwandb[0m: 	decay: 0.5
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-10
[34m[1mwandb[0m: 	hidden_lyrs: 3
[34m[1mwandb[0m: 	inpt_sz: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_function: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	oupt_activation: softmax
[34m[1mwandb[0m: 	oupt_sz: 10
[34m[1mwandb[0m: 	weight_initialisation: xavier


Initial Accuracy: 0.1
epoch- 1
accuracy- 0.8342037037037037
loss- 0.4525486255549258
validation- 0.8253333333333334
epoch- 2
accuracy- 0.8455185185185186
loss- 0.42245035412416165
validation- 0.8393333333333334
epoch- 3
accuracy- 0.8507962962962963
loss- 0.4074257018619091
validation- 0.8438333333333333
epoch- 4
accuracy- 0.8537962962962963
loss- 0.4017881462482137
validation- 0.8445
epoch- 5
accuracy- 0.8547407407407407
loss- 0.3998902835524814
validation- 0.8456666666666667


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
accuracy,▁▅▇██
epoch,▁▃▅▆█
loss,█▄▂▁▁
val_accuracy,▁▆▇██
validation_loss,█▄▂▁▁

0,1
accuracy,0.85474
epoch,5.0
loss,0.39989
val_accuracy,0.84567
validation_loss,0.43108


[34m[1mwandb[0m: Agent Starting Run: qdsxb1ht with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_sz: 64
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	dataset: fashion_mnist
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-10
[34m[1mwandb[0m: 	hidden_lyrs: 3
[34m[1mwandb[0m: 	inpt_sz: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_function: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	oupt_activation: softmax
[34m[1mwandb[0m: 	oupt_sz: 10
[34m[1mwandb[0m: 	weight_initialisation: xavier


Initial Accuracy: 0.09976666666666667
epoch- 1
accuracy- 0.6279074074074074
loss- 1.457781354610751
validation- 0.6265
epoch- 2
accuracy- 0.7006481481481481
loss- 1.035833762756186
validation- 0.6953333333333334
epoch- 3
accuracy- 0.725462962962963
loss- 0.8553180117756503
validation- 0.7231666666666666
epoch- 4
accuracy- 0.7407592592592592
loss- 0.7632519039518232
validation- 0.7376666666666667
epoch- 5
accuracy- 0.7516111111111111
loss- 0.7082380802807317
validation- 0.7448333333333333


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
accuracy,▁▅▇▇█
epoch,▁▃▅▆█
loss,█▄▂▂▁
val_accuracy,▁▅▇██
validation_loss,█▄▂▂▁

0,1
accuracy,0.75161
epoch,5.0
loss,0.70824
val_accuracy,0.74483
validation_loss,0.72009


[34m[1mwandb[0m: Agent Starting Run: i3bdl45l with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_sz: 64
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	dataset: fashion_mnist
[34m[1mwandb[0m: 	decay: 0.5
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-10
[34m[1mwandb[0m: 	hidden_lyrs: 3
[34m[1mwandb[0m: 	inpt_sz: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_function: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	oupt_activation: softmax
[34m[1mwandb[0m: 	oupt_sz: 10
[34m[1mwandb[0m: 	weight_initialisation: xavier


Initial Accuracy: 0.09976666666666667
epoch- 1
accuracy- 0.7989444444444445
loss- 0.5960040475473005
validation- 0.7876666666666666
epoch- 2
accuracy- 0.8130555555555555
loss- 0.5536537265567235
validation- 0.803
epoch- 3
accuracy- 0.8211296296296297
loss- 0.5373877037144031
validation- 0.811
epoch- 4
accuracy- 0.8252962962962963
loss- 0.5299124677466451
validation- 0.8168333333333333
epoch- 5
accuracy- 0.8273518518518519
loss- 0.525869378679475
validation- 0.8203333333333334


VBox(children=(Label(value='0.011 MB of 0.011 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
accuracy,▁▄▆▇█
epoch,▁▃▅▆█
loss,█▄▂▁▁
val_accuracy,▁▄▆▇█
validation_loss,█▄▂▁▁

0,1
accuracy,0.82735
epoch,5.0
loss,0.52587
val_accuracy,0.82033
validation_loss,0.53899


[34m[1mwandb[0m: Agent Starting Run: ny6kx6nz with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_sz: 64
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	dataset: fashion_mnist
[34m[1mwandb[0m: 	decay: 0.5
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_lyrs: 3
[34m[1mwandb[0m: 	inpt_sz: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_function: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	oupt_activation: softmax
[34m[1mwandb[0m: 	oupt_sz: 10
[34m[1mwandb[0m: 	weight_initialisation: random


Initial Accuracy: 0.1001


  return -y/yp
  return -y/yp
  der_outpt_mat.append(np.matmul(self.der_ls(y[i], y_pred[i]), self.der_outpt_actvtn(y_pred[i])))


epoch- 1
accuracy- 0.0995
loss- nan
validation- 0.1045
epoch- 2
accuracy- 0.0995
loss- nan
validation- 0.1045
epoch- 3
accuracy- 0.0995
loss- nan
validation- 0.1045
epoch- 4
accuracy- 0.0995
loss- nan
validation- 0.1045
epoch- 5
accuracy- 0.0995
loss- nan
validation- 0.1045
epoch- 6
accuracy- 0.0995
loss- nan
validation- 0.1045
epoch- 7
accuracy- 0.0995
loss- nan
validation- 0.1045
epoch- 8
accuracy- 0.0995
loss- nan
validation- 0.1045
epoch- 9
accuracy- 0.0995
loss- nan
validation- 0.1045
epoch- 10
accuracy- 0.0995
loss- nan
validation- 0.1045


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
accuracy,▁▁▁▁▁▁▁▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
val_accuracy,▁▁▁▁▁▁▁▁▁▁

0,1
accuracy,0.0995
epoch,10.0
loss,
val_accuracy,0.1045
validation_loss,


[34m[1mwandb[0m: Agent Starting Run: x5wmz4iz with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_sz: 64
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	dataset: fashion_mnist
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-10
[34m[1mwandb[0m: 	hidden_lyrs: 3
[34m[1mwandb[0m: 	inpt_sz: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_function: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	oupt_activation: softmax
[34m[1mwandb[0m: 	oupt_sz: 10
[34m[1mwandb[0m: 	weight_initialisation: xavier


Initial Accuracy: 0.1
epoch- 1
accuracy- 0.8231481481481482
loss- 0.5046904593161642
validation- 0.8151666666666667
epoch- 2
accuracy- 0.8437037037037037
loss- 0.4394058257624958
validation- 0.8358333333333333
epoch- 3
accuracy- 0.8541666666666666
loss- 0.4102047170998284
validation- 0.8446666666666667
epoch- 4
accuracy- 0.8603148148148149
loss- 0.39099279978819507
validation- 0.8513333333333334
epoch- 5
accuracy- 0.8654259259259259
loss- 0.37686430909720636
validation- 0.8545


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
accuracy,▁▄▆▇█
epoch,▁▃▅▆█
loss,█▄▃▂▁
val_accuracy,▁▅▆▇█
validation_loss,█▄▃▂▁

0,1
accuracy,0.86543
epoch,5.0
loss,0.37686
val_accuracy,0.8545
validation_loss,0.40443
