In [1]:
!pip install wandb

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting wandb
  Downloading wandb-0.13.11-py3-none-any.whl (2.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m33.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting sentry-sdk>=1.0.0
  Downloading sentry_sdk-1.16.0-py2.py3-none-any.whl (184 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m184.3/184.3 KB[0m [31m8.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting pathtools
  Downloading pathtools-0.1.2.tar.gz (11 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting GitPython!=3.1.29,>=1.0.0
  Downloading GitPython-3.1.31-py3-none-any.whl (184 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m184.3/184.3 KB[0m [31m22.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting setproctitle
  Downloading setproctitle-1.3.2-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_6

In [1]:
from tensorflow import keras
from keras.datasets import fashion_mnist
from keras.datasets import mnist
import numpy as np
from matplotlib import pyplot as plt
import random
import wandb
import argparse
from datetime import datetime
import copy

class FeedForward:

    def __init__(self):

        # defining the default parameters
        self.parameters = {
            "wandb_project": "DL Final Assignment 1",
            "wandb_entity": "cs22m019",
            "dataset": "fashion_mnist",
            "epochs": 5,
            "batch_size": 32,
            "loss": "mean_squared_error",
            "optimizer": "gd",
            "learning_rate": 0.1,
            "momentum": 0.01,
            "beta": 0.5,
            "beta1": 0.5,
            "beta2": 0.5,
            "epsilon": 0.000001,
            "weight_decay": 0.0,
            "weight_init": "random",
            "num_layers": 3,
            "hidden_size": 128,
            "activation": "sigmoid",
            "output_function": "softmax"
        }

        # updating the parameters to the parameters given in command line
        # self.update_parameters()

       
        # loading training and test data from fashion_mnist dataset or mnist dataset
        if (self.parameters["dataset"] == "fashion_mnist"):
            (self.x_train, self.y_train), (self.x_test,self.y_test) = fashion_mnist.load_data()
        else:
            (self.x_train, self.y_train), (self.x_test,self.y_test) = mnist.load_data()

        # normalizing data points
        self.x_train = self.x_train / 255
        self.x_test = self.x_test / 255

        # computing number of samples in training and test data
        self.train_n_samples = self.x_train.shape[0]
        self.test_n_samples = self.x_test.shape[0]

        # spiltting the data -> 90% train,10% test 
        idx = np.random.permutation(self.train_n_samples)
        self.x_train = self.x_train[idx]
        self.y_train = self.y_train[idx]

        self.x_validate = self.x_train[: self.train_n_samples // 10]
        self.y_validate = self.y_train[: self.train_n_samples // 10]

        self.x_train = self.x_train[self.train_n_samples // 10:]
        self.y_train = self.y_train[self.train_n_samples // 10:]

        self.train_n_samples = self.x_train.shape[0]

        # list of label titles -> actual output
        self.title = ["T-shirt/top", "Trouser", "PullOver", "Dress",
                      "Coat", "Sandal", "Shirt", "Sneaker", "Bag", "Ankle Boot"]
        self.no_of_label = len(self.title)

        # setting the class variables
        self.epoch = self.parameters["epochs"]
        self.batch_size = self.parameters["batch_size"]
        self.lossFunction = self.parameters["loss"]
        self.optimizer = self.parameters["optimizer"]
        self.learningRate = self.parameters["learning_rate"]
        self.weightInitialization = self.parameters["weight_init"]
        self.L = self.parameters["num_layers"] + 1
        self.hl = self.parameters["num_layers"]
        self.nnl = self.parameters["hidden_size"]
        self.activationFunction = self.parameters["activation"]
        self.outputFunction = self.parameters["output_function"]
        self.weight_decay = self.parameters["weight_decay"]
        
        
        
        self.k = len(self.title)
        self.d = self.x_train.shape[1] * self.x_train.shape[2]
        self.n = self.train_n_samples
        self.weights = {}
        self.bias = {}
        self.wHistory = {}
        self.bHistory = {}
        self.wMomentum = {}
        self.bMomentum = {}
        self.prev_wHistory = {}
        self.prev_bHistory = {}
        self.pre_activation = {}
        self.post_activation = {}
        
    # updates the default parameters with the paramters given in command line
    def update_parameters(self):

        parser = argparse.ArgumentParser(description='DL Assignment 1 Parser')

        parser.add_argument('-wp', '--wandb_project',
                            type=str, metavar='', help='wandb project')
        parser.add_argument('-we', '--wandb_entity', type=str,
                            metavar='', help='wandb entity')
        parser.add_argument('-d', '--dataset', type=str,
                            metavar='', help='dataset')
        parser.add_argument('-e', '--epochs', type=int,
                            metavar='', help='epochs')
        parser.add_argument('-b', '--batch_size', type=int,
                            metavar='', help='batch size')
        parser.add_argument('-l', '--loss', type=str, 
                            metavar='', help='loss')
        parser.add_argument('-o', '--optimizer', type=str,
                            metavar='', help='optimizer')
        parser.add_argument('-lr', '--learning_rate',
                            type=float, metavar='', help='learning rate')
        parser.add_argument('-m', '--momentum', type=float,
                            metavar='', help='momentum')
        parser.add_argument('-beta', '--beta', type=float,
                            metavar='', help='beta')
        parser.add_argument('-beta1', '--beta1', type=float,
                            metavar='', help='beta1')
        parser.add_argument('-beta2', '--beta2', type=float,
                            metavar='', help='beta2')
        parser.add_argument('-eps', '--epsilon', type=float,
                            metavar='', help='epsilon')
        parser.add_argument('-w_d', '--weight_decay',
                            type=float, metavar='', help='weight decay')
        parser.add_argument('-w_i', '--weight_init', type=str,
                            metavar='', help='weight init')
        parser.add_argument('-nhl', '--num_layers', type=int,
                            metavar='', help='num layers')
        parser.add_argument('-sz', '--hidden_size', type=int,
                            metavar='', help='hidden size')
        parser.add_argument('-a', '--activation', type=str,
                            metavar='', help='activation')
        parser.add_argument('-of', '--output_function',
                            type=str, metavar='', help='output function')
        args = parser.parse_args()

        if (args.wandb_project != None):
            self.parameters["wandb_project"] = args.wandb_project
        if (args.wandb_entity != None):
            self.parameters["wandb_entity"] = args.wandb_entity
        if (args.dataset != None):
            self.parameters["dataset"] = args.dataset
        if (args.epochs != None):
            self.parameters["epochs"] = args.epochs
        if (args.batch_size != None):
            self.parameters["batch_size"] = args.batch_size
        if (args.loss != None):
            self.parameters["loss"] = args.loss
        if (args.optimizer != None):
            self.parameters["optimizer"] = args.optimizer
        if (args.learning_rate != None):
            self.parameters["learning_rate"] = args.learning_rate
        if (args.momentum != None):
            self.parameters["momentum"] = args.momentum
        if (args.beta != None):
            self.parameters["beta"] = args.beta
        if (args.beta1 != None):
            self.parameters["beta1"] = args.beta1
        if (args.beta2 != None):
            self.parameters["beta2"] = args.beta2
        if (args.epsilon != None):
            self.parameters["epsilon"] = args.epsilon
        if (args.weight_decay != None):
            self.parameters["weight_decay"] = args.weight_decay
        if (args.weight_init != None):
            self.parameters["weight_init"] = args.weight_init
        if (args.num_layers != None):
            self.parameters["num_layers"] = args.num_layers
        if (args.hidden_size != None):
            self.parameters["hidden_size"] = args.hidden_size
        if (args.activation != None):
            self.parameters["activation"] = args.activation

    # function to initialize weights and bias based on type -> random or Xavier initialization
    def weightsAndBiasInitializer(self):
        if self.weightInitialization == "Xavier":

            # first and last matrix 
            self.weights["w1"] = np.random.uniform(-np.sqrt(6 / (self.nnl + self.d)), np.sqrt(6 / (self.nnl + self.d)), (self.nnl, self.d))
            self.weights["w" + str(self.L)] = np.random.uniform(-np.sqrt(6 / (self.k + self.nnl)),np.sqrt(6 / (self.k + self.nnl)), (self.k, self.nnl))

            # Intermediate Matrices
            for i in range(2, self.L):
                self.weights["w" + str(i)] = np.random.uniform(-np.sqrt(6 / (self.nnl + self.nnl)), np.sqrt(6 / (self.nnl + self.nnl)), (self.nnl, self.nnl))

            # Last Vector
            self.bias["b" + str(self.L)] = np.random.uniform(-np.sqrt(6 / (self.k + 1)),np.sqrt(6 / (self.k + 1)), (self.k))

            for i in range(1, self.L):
                self.bias["b" + str(i)] = np.random.uniform(-np.sqrt(6 / (self.nnl + 1)),np.sqrt(6 / (self.nnl + 1)), (self.nnl))

        if self.weightInitialization == "random":
            # initailzation of weights
            '''
                  W1 = (d,nnl)
                  W2,..,W(L - 1) = (nnl,nnl)
                  WL = (k,nnl)
            '''
            w1 = np.random.normal(0, 0.5, size=(self.nnl, self.d))
            self.weights["w1"] = w1
            for i in range(2, self.L):
                self.weights["w" + str(i)] = np.random.normal(0,0.5, size=(self.nnl, self.nnl))
            self.weights["w" + str(self.L)] = np.random.normal(0,0.5, size=(self.k, self.nnl))

            # initialization of bias
            for i in range(1, self.L):
                self.bias["b" + str(i)] = np.random.normal(0,0.5, size=(self.nnl))
            self.bias["b" + str(self.L)] = np.random.normal(0,0.5, size=(self.k))

    # function to initialize momentum for weights and bias
    def momentumInitializer(self):

        # initializing momentum for weights
        w1 = np.zeros((self.nnl, self.d))
        self.wMomentum["w1"] = w1
        for i in range(2, self.L):
            self.wMomentum["w" + str(i)] = np.zeros((self.nnl, self.nnl))
        self.wMomentum["w" + str(self.L)] = np.zeros((self.k, self.nnl))

        # initializing momentum for bais
        for i in range(1, self.L):
            self.bMomentum["b" + str(i)] = np.zeros((self.nnl))
        self.bMomentum["b" + str(self.L)] = np.zeros((self.k))

    # function to initialize history for weights and bias
    def historyInitializer(self):

        # initializing history for weights
        w1 = np.zeros((self.nnl, self.d))
        self.wHistory["w1"] = w1
        for i in range(2, self.L):
            self.wHistory["w" + str(i)] = np.zeros((self.nnl, self.nnl))
        self.wHistory["w" + str(self.L)] = np.zeros((self.k, self.nnl))

        # initializing history for bais
        for i in range(1, self.L):
            self.bHistory["b" + str(i)] = np.zeros((self.nnl))
        self.bHistory["b" + str(self.L)] = np.zeros((self.k))

    # function used to implement different activation functions
    def activation_func(self, vector):
        if self.activationFunction == "sigmoid":
            return 1.0 / (1 + np.exp(-(vector)))
        
        if self.activationFunction == "tanh":
            return np.tanh(vector)
        
        if self.activationFunction == "ReLU":
            return np.maximum(0,vector)

    # function used to implement different output functions
    def output_func(self, vector):
        if self.outputFunction == "softmax":

            vector = vector - vector[np.argmax(vector)]

            return np.exp(vector) / np.sum(np.exp(vector))

    # function generating one-hot vector
    def oneHotVector(self, size, index):
        oneHot = np.zeros(size)
        oneHot[index] = 1.0
        return oneHot

    # function returning the differentiation of activation function
    def differentiation(self, vector):

        if self.activationFunction == "sigmoid":
            return (1.0 / (1 + np.exp(-(vector)))) * (1 - 1.0 / (1 + np.exp(-(vector))))

        if self.activationFunction == "tanh":
            return 1 - (np.tanh(vector)) ** 2

        if self.activationFunction == "ReLU":
            t = np.maximum(0,vector)
            t[t > 0] = 1
            return t

    # regularization
    def regularize(self):
        reg_term = 0
        validation_size = self.y_validate.shape[0]
        
        for (key,value) in self.weights.items():
          reg_term += (np.sum(self.weights[key] ** 2))
        reg_term = (self.weight_decay / (2 * validation_size)) * reg_term

        return reg_term

    # function returning the loss function value
    def loss_function(self, y_predicted, index):

        if self.lossFunction == "cross_entropy":
            t = 1e-8
            return (-1)*np.log(y_predicted[index] + t)

        if self.lossFunction == "mean_squared_error":
            y = self.oneHotVector(size=self.no_of_label, index=index)
            return np.sum((y_predicted - y) ** 2)

    # forward propagation - computes pre_activation vector,post_activation vector for each layer and predicts y at last layer
    def forward_propagation(self, input, index):

        # Populating pre_activation and post_activation vectors to dictionary in each layer for input[index]
        for k in range(1, self.L):

            # for first layer,post activation will be input
            if (k == 1):
                ''' flattening the input: 
                    -input(60000,28,28)
                    -input[index] size = (28,28)
                    -flattening input[index] gives size (784,1) = (d,1) where d is dimension of input
                    post_activation[h0] size = (d,1)
                    bias[b1] size = (nnl,1)
                    weights[w1] size = (nnl,d)
                    Therefore we get pre_activation[a1] size = (nnl,1) for all layer except last layer
                '''
                self.post_activation["h" + str(k - 1)] = input[index].flatten()

            # computing a(k) = b(k) + w(k)*h(k - 1) for each input[index]
            self.pre_activation["a" + str(k)] = self.bias["b" + str(k)] + np.dot(self.weights["w" + str(k)], self.post_activation["h" + str(k - 1)])
           
            # computing h(k) = g(a(k)) where g is activation function
            self.post_activation["h" + str(k)] = self.activation_func(self.pre_activation["a" + str(k)])

        # computing pre_activation for last layer
        self.pre_activation["a" + str(self.L)] = self.bias["b" + str(self.L)] + np.dot(self.weights["w" + str(self.L)], self.post_activation["h" + str(self.L - 1)])

        # prediction y (y_hat) = O(a(L)) where O is output function
        self.post_activation["h" +str(self.L)] = self.output_func(self.pre_activation["a" + str(self.L)])

    # performs back propagation and returns gradients of weights and bias
    def backward_propagation(self, index, actual_y):

        grad_pre_activation = {}
        grad_post_activation = {}
        grad_weights = {}
        grad_bias = {}

        predicted_y = self.post_activation["h" + str(self.L)]

        # Computing output gradient
        one_hot_vector = self.oneHotVector(self.no_of_label, actual_y[index])
        if self.lossFunction == "cross_entropy" :
          grad_pre_activation["a" + str(self.L)] = (predicted_y - one_hot_vector)
        else :
          grad_pre_activation["a" + str(self.L)] = -2 * (one_hot_vector - predicted_y) * (predicted_y * (np.ones(self.no_of_label) - predicted_y))
       
        
        k = self.L
        while k > 0:

            # Computing gradient w.r.t parameters - weight and bais
            '''
              np.reshape(grad_pre_activation["a" + str(L)],(-1,1)) = (k,1)
              np.reshape(post_activation["h" + str(L - 1)],(1,-1)) = (1,nnl)
            '''
            grad_weights["w" + str(k)] = np.dot(np.reshape(grad_pre_activation["a" + str(k)], (-1, 1)), np.reshape(self.post_activation["h" + str(k - 1)], (1, -1)))
            grad_bias["b" + str(k)] = grad_pre_activation["a" + str(k)]

            if k != 1:
                # Computing gradient w.r.t layer below (post_activation)
                grad_post_activation["h" + str(k - 1)] = np.dot(self.weights["w" + str(k)].T, np.reshape(grad_pre_activation["a" + str(k)], (-1, 1))).flatten()

                # Computing gradient w.r.t layer below (pre_activation)
                g_dash = self.differentiation(self.pre_activation["a" + str(k - 1)])
                grad_pre_activation["a" +str(k - 1)] = grad_post_activation["h" + str(k - 1)] * g_dash

            k = k - 1
        return grad_weights, grad_bias

    # function to make accumalated gradients zero
    def make_accumalate_zero(self):

        acc_grad_weights = {}
        acc_grad_bias = {}

        # accumalated weights are set to zero
        acc_grad_weights["w1"] = np.zeros((self.nnl, self.d))
        for i in range(2, self.L):
            acc_grad_weights["w" + str(i)] = np.zeros((self.nnl, self.nnl))
        acc_grad_weights["w" + str(self.L)] = np.zeros((self.k, self.nnl))

        # accumalated bias are set to zero
        for i in range(1, self.L):
            acc_grad_bias["b" + str(i)] = np.zeros((self.nnl))
        acc_grad_bias["b" + str(self.L)] = np.zeros((self.k))

        return acc_grad_weights, acc_grad_bias

    # runs stochastic gradient descent for one epoch
    def oneEpochSGD(self, epoch):
        ''' Executes A Single Epoch for Stochastic Gradient Descent Algorithm.
            Returns the training loss,training accuracy,validaiton loss and validation accuracy,averaged over all points. '''
        
        n = self.train_n_samples
        
        # randomizing batches
        idx = np.random.permutation(self.train_n_samples)
        self.x_train = self.x_train[idx]
        self.y_train = self.y_train[idx]

        input = self.x_train
        actual_y = self.y_train

        # total Loss for epoch
        loss_input = 0
        count = 0

        # execute one epoch for all datapoints in train set
        for index in range(n):

            # perform forward propagation
            self.forward_propagation(input, index)
            predicted_y = self.post_activation["h" + str(self.L)]

            # compute loss
            loss_input += self.loss_function(predicted_y, actual_y[index])

            # perform backward propagation
            grad_weights, grad_bias = self.backward_propagation(index, actual_y)

            # compute the number of datapoints which are correctly classified
            indexWithMaxProb = np.argmax(predicted_y)
            if (actual_y[index] == (indexWithMaxProb)):
                count = count + 1

            # update weights and bias if the number of datapoints in batch_size are divisble by batch_size
            if ((index + 1) % self.batch_size == 0):
                # update weights
                for (key, value) in self.weights.items():
                    self.weights[key] = self.weights[key] - ((self.learningRate / self.batch_size) * grad_weights[key])

                # update bias
                for (key, value) in self.bias.items():
                    self.bias[key] = self.bias[key] - ((self.learningRate / self.batch_size) * grad_bias[key])

        # if the number of datapoints in batch is not divisible by batch_size update weights and bias 
        if n % self.batch_size != 0:
            # update weights
            for (key, value) in self.weights.items():
                self.weights[key] = self.weights[key] - ((self.learningRate / self.batch_size) * grad_weights[key])

            # update bias
            for (key, value) in self.bias.items():
                self.bias[key] = self.bias[key] - ((self.learningRate / self.batch_size) * grad_bias[key])

        # compute trainAccuracy,trainLoss averaged over train size
        trainAccuracy = count / n
        trainLoss = loss_input / n + self.regularize()

        # compute validationAccuracy,validationLoss avergaed over test size 
        validationLoss, validationAccuracy = self.computeTestLossAndAccuracy()
        
        return trainLoss, trainAccuracy, validationLoss, validationAccuracy

    # runs momentum gradient descent for one epoch
    def oneEpochMOMENTUM(self, epoch):
        ''' Executes A Single Epoch for Momentum Gradient Descent Algorithm.
            Returns the training loss,training accuracy,validaiton loss and validation accuracy,averaged over all points. '''

        n = self.train_n_samples
        
        # randomizing batches
        idx = np.random.permutation(self.train_n_samples)
        self.x_train = self.x_train[idx]
        self.y_train = self.y_train[idx]

        input = self.x_train
        actual_y = self.y_train

        # maintaining previous history for weights and bias
        self.prev_wHistory, self.prev_bHistory = self.wHistory, self.bHistory

        # Total Loss for epoch
        loss_input = 0
        count = 0  
        beta = self.parameters["momentum"]

        # set accumalated gradients to zero
        acc_grad_weights, acc_grad_bias = self.make_accumalate_zero()

        # execute one epoch for all datapoints in train set
        for index in range(n):

            # perform forward propagation
            self.forward_propagation(input, index)
            predicted_y = self.post_activation["h" + str(self.L)]

            # compute loss
            loss_input += self.loss_function(predicted_y, actual_y[index])

            # perform backward propagation
            grad_weights, grad_bias = self.backward_propagation(
                index, actual_y)

            # compute the number of datapoints which are correctly classified
            indexWithMaxProb = np.argmax(predicted_y)
            if (actual_y[index] == (indexWithMaxProb)):
                count = count + 1

            # accumulate grad_weights and grad_bais for each input
            for (key, value) in grad_weights.items():
                acc_grad_weights[key] = acc_grad_weights[key] + grad_weights[key]

            for (key, value) in grad_bias.items():
                acc_grad_bias[key] = acc_grad_bias[key] + grad_bias[key]


            # update weights and bias if the number of datapoints in batch_size are divisble by batch_size
            if ((index + 1) % self.batch_size == 0):

                # update weight history
                for (key, value) in self.wHistory.items():
                    self.wHistory[key] = beta * self.prev_wHistory[key] + \
                        ((self.learningRate / self.batch_size) * acc_grad_weights[key])

                # update bias history
                for (key, value) in self.bHistory.items():
                    self.bHistory[key] = beta * self.prev_bHistory[key] + \
                        ((self.learningRate / self.batch_size) * acc_grad_bias[key])

                # update weights
                for (key, value) in self.weights.items():
                    self.weights[key] = self.weights[key] - self.wHistory[key]

                # update bias
                for (key, value) in self.bias.items():
                    self.bias[key] = self.bias[key] - self.bHistory[key]

                # updating histroy for weights and bias
                self.prev_wHistory = self.wHistory
                self.prev_bHistory = self.bHistory

                # set accumalated gradients to zero
                acc_grad_weights, acc_grad_bias = self.make_accumalate_zero()

            # if the number of datapoints in batch is not divisible by batch_size update weights and bias 
        if n % self.batch_size != 0:
            
            # update weight history
            for (key, value) in self.wHistory.items():
                self.wHistory[key] = beta * self.prev_wHistory[key] + \
                    ((self.learningRate / self.batch_size) * acc_grad_weights[key])

            # update bias history
            for (key, value) in self.bHistory.items():
                self.bHistory[key] = beta * self.prev_bHistory[key] + \
                    ((self.learningRate / self.batch_size) * acc_grad_bias[key])

            # update weights
            for (key, value) in self.weights.items():
                self.weights[key] = self.weights[key] - self.wHistory[key]

            # update bias
            for (key, value) in self.bias.items():
                self.bias[key] = self.bias[key] - self.bHistory[key]

            # updating histroy for weights and bias
            self.prev_wHistory = self.wHistory
            self.prev_bHistory = self.bHistory

            # set accumalated gradients to zero
            acc_grad_weights, acc_grad_bias = self.make_accumalate_zero()

        # compute trainAccuracy,trainLoss averaged over train size
        trainAccuracy = count / n
        trainLoss = loss_input / n + self.regularize()

        # compute validationAccuracy,validationLoss averaged over test size 
        validationLoss, validationAccuracy = self.computeTestLossAndAccuracy()
        
        return trainLoss, trainAccuracy, validationLoss, validationAccuracy

    # runs nestrov accelerated gradient descent for one epoch
    def oneEpochNAG(self, epoch):
        ''' Executes A Single Epoch for Nesterov Accelerated Gradient Descent Algorithm.
            Returns the training loss,training accuracy,validaiton loss and validation accuracy,averaged over all points. '''
        
        n = self.train_n_samples
        
        # randomizing batches
        idx = np.random.permutation(self.train_n_samples)
        self.x_train = self.x_train[idx]
        self.y_train = self.y_train[idx]

        input = self.x_train
        actual_y = self.y_train

        # maintaining previous history for weights and bias
        self.prev_wHistory, self.prev_bHistory = self.wHistory, self.bHistory
        
        # Total Loss for epoch
        loss_input = 0
        count = 0
        beta = self.parameters["momentum"]

        # set accumalated gradients to zero
        acc_grad_weights, acc_grad_bias = self.make_accumalate_zero()
        
        # computing partial values
        self.partial_wHistory = {}
        self.partial_bHistory = {}

        for (key, value) in self.wHistory.items():
            self.partial_wHistory[key] = beta * self.prev_wHistory[key]

        for (key, value) in self.bHistory.items():
            self.partial_bHistory[key] = beta * self.prev_bHistory[key]

        # execute one epoch for all datapoints in train set
        for index in range(n):

            # perfrom forward propagation
            self.forward_propagation(input, index)
            predicted_y = self.post_activation["h" + str(self.L)]

            # storing weights and bias in temperory values
            temp_weights = copy.deepcopy(self.weights)
            temp_bias = copy.deepcopy(self.bias)

            # update weights and bias
            for (key, value) in self.weights.items():
                self.weights[key] = self.weights[key] -  self.partial_wHistory[key]

            for (key, value) in self.bias.items():
                self.bias[key] = self.bias[key] - self.partial_bHistory[key]

            # perform backward propagation
            grad_weights, grad_bias = self.backward_propagation(index,actual_y)

            # update weights and bias 
            self.weights = temp_weights
            self.bias = temp_bias

            # accumulate grad_weights and grad_bais for each input
            for (key, value) in grad_weights.items():
                acc_grad_weights[key] = acc_grad_weights[key] + grad_weights[key]

            for (key, value) in grad_bias.items():
                acc_grad_bias[key] = acc_grad_bias[key] + grad_bias[key]

            # compute loss
            loss_input += self.loss_function(predicted_y, self.y_train[index])

            # compute the number of datapoints which are correctly classified
            indexWithMaxProb = np.argmax(predicted_y)
            if (actual_y[index] == (indexWithMaxProb)):
                count = count + 1

            # update weights and bias if the number of datapoints in batch_size are divisble by batch_size
            if ((index + 1) % self.batch_size == 0):
    
                # update weight history
                for (key, value) in self.wHistory.items():
                    self.wHistory[key] = beta * self.prev_wHistory[key] + ((self.learningRate / self.batch_size) * acc_grad_weights[key])

                # update bias history
                for (key, value) in self.bHistory.items():
                    self.bHistory[key] = beta * self.prev_bHistory[key] + ((self.learningRate / self.batch_size) * acc_grad_bias[key])

                # update weights
                for (key, value) in self.weights.items():
                    self.weights[key] = self.weights[key] - self.wHistory[key]

                for (key, value) in self.bias.items():
                    self.bias[key] = self.bias[key] - self.bHistory[key]

                # updating histroy for weights and bias
                self.prev_wHistory = self.wHistory
                self.prev_bHistory = self.bHistory
                
                # set accumalated gradients to zero
                acc_grad_weights, acc_grad_bias = self.make_accumalate_zero()
            
        # if the number of datapoints in batch is not divisible by batch_size update weights and bias 
        if n % self.batch_size != 0:

            # update weight history
            for (key, value) in self.wHistory.items():
                self.wHistory[key] = beta * self.prev_wHistory[key] + ((self.learningRate / self.batch_size) * acc_grad_weights[key])

            # update bias history
            for (key, value) in self.bHistory.items():
                self.bHistory[key] = beta * self.prev_bHistory[key] + ((self.learningRate / self.batch_size) * acc_grad_bias[key])

            # update weights
            for (key, value) in self.weights.items():
                self.weights[key] = self.weights[key] - self.wHistory[key]

            for (key, value) in self.bias.items():
                self.bias[key] = self.bias[key] - self.bHistory[key]

            # updating histroy for weights and bias
            self.prev_wHistory = self.wHistory
            self.prev_bHistory = self.bHistory
            
            # set accumalated gradients to zero
            acc_grad_weights, acc_grad_bias = self.make_accumalate_zero()
                
        # compute trainAccuracy,trainLoss averaged over train size
        trainAccuracy = count / n
        trainLoss = loss_input / n + self.regularize()

        # compute validationAccuracy,validationLoss averaged over test size 
        validationLoss, validationAccuracy = self.computeTestLossAndAccuracy()
        
        return trainLoss, trainAccuracy, validationLoss, validationAccuracy

    # runs RMSPROP for one epoch
    def oneEpochRMSPROP(self,epoch):
        ''' Executes A Single Epoch for RMSPROP Gradient Descent Algorithm.
            Returns the training loss,training accuracy,validaiton loss and validation accuracy,averaged over all points. 
        '''
        n = self.train_n_samples
        
        # randomizing batches
        idx = np.random.permutation(self.train_n_samples)
        self.x_train = self.x_train[idx]
        self.y_train = self.y_train[idx]

        input = self.x_train
        actual_y = self.y_train


        # maintaining previous history for weights and bias
        prev_wHistory, prev_bHistory = self.wHistory, self.bHistory

        # Total Loss for epoch
        loss_input = 0
        beta = self.parameters["beta"]
        eps = self.parameters["epsilon"]
        count = 0
        
        # set accumalated gradients to zero
        acc_grad_weights, acc_grad_bias = self.make_accumalate_zero()

        # execute one epoch for all datapoints in train set
        for index in range(n):

            # perfrom forward propagation
            self.forward_propagation(input, index)
            predicted_y = self.post_activation["h" + str(self.L)]

            # perform backward propagation
            grad_weights, grad_bias = self.backward_propagation(index,actual_y)

            # compute loss
            loss_input += self.loss_function(predicted_y, self.y_train[index])

            # compute the number of datapoints which are correctly classified
            indexWithMaxProb = np.argmax(predicted_y)
            if (actual_y[index] == (indexWithMaxProb)):
                count = count + 1

            # accumulate grad_weights and grad_bais for each input
            for (key, value) in grad_weights.items():
                acc_grad_weights[key] = acc_grad_weights[key] + grad_weights[key]

            for (key, value) in grad_bias.items():
                acc_grad_bias[key] = acc_grad_bias[key] + grad_bias[key]
        

            # update weights and bias if the number of datapoints in batch_size are divisble by batch_size
            if ((index + 1) % self.batch_size == 0):
    
                # update weight history
                for (key, value) in self.wHistory.items():
                    self.wHistory[key] = beta * prev_wHistory[key] +  (1 - beta) * acc_grad_weights[key] ** 2

                # update bias history
                for (key, value) in self.bHistory.items():
                    self.bHistory[key] = beta * prev_bHistory[key] + (1 - beta) * acc_grad_bias[key] ** 2

                # update weights
                for (key, value) in self.weights.items():
                    self.weights[key] = self.weights[key] -  (self.learningRate / self.batch_size) * acc_grad_weights[key] / (np.sqrt(self.wHistory[key] + eps))

                # update bias
                for (key, value) in self.bias.items():
                    self.bias[key] = self.bias[key] - (self.learningRate / self.batch_size) * acc_grad_bias[key] / (np.sqrt(self.bHistory[key] + eps))
                
                # updating histroy for weights and bias
                self.prev_wHistory = self.wHistory
                self.prev_bHistory = self.bHistory

                # set accumalated gradients to zero
                acc_grad_weights, acc_grad_bias = self.make_accumalate_zero()

        # if the number of datapoints in batch is not divisible by batch_size update weights and bias 
        if n % self.batch_size != 0:
            
            # update weight history
            for (key, value) in self.wHistory.items():
                self.wHistory[key] = beta * prev_wHistory[key] +  (1 - beta) * acc_grad_weights[key] ** 2

            # update bias history
            for (key, value) in self.bHistory.items():
                self.bHistory[key] = beta * prev_bHistory[key] + (1 - beta) * acc_grad_bias[key] ** 2

            # update weights
            for (key, value) in self.weights.items():
                self.weights[key] = self.weights[key] -  (self.learningRate / self.batch_size) * acc_grad_weights[key] / (np.sqrt(self.wHistory[key] + eps))

            # update bias
            for (key, value) in self.bias.items():
                self.bias[key] = self.bias[key] - (self.learningRate / self.batch_size) * acc_grad_bias[key] / (np.sqrt(self.bHistory[key] + eps))
            
            # updating histroy for weights and bias
            self.prev_wHistory = self.wHistory
            self.prev_bHistory = self.bHistory

            # set accumalated gradients to zero
            acc_grad_weights, acc_grad_bias = self.make_accumalate_zero()

        # compute trainAccuracy,trainLoss averaged over train size
        trainAccuracy = count / n
        trainLoss = loss_input / n + self.regularize()

        # compute validationAccuracy,validationLoss averaged over test size 
        validationLoss, validationAccuracy = self.computeTestLossAndAccuracy()
        
        return trainLoss, trainAccuracy, validationLoss, validationAccuracy

    # runs ADAM gradient descent for one epoch
    def oneEpochADAM(self,epoch):
        ''' Executes A Single Epoch for ADAM Gradient Descent Algorithm.
            Returns the training loss,training accuracy,validaiton loss and validation accuracy,averaged over all points. '''

        n = self.train_n_samples
        # randomizing batches
        idx = np.random.permutation(self.train_n_samples)
        self.x_train = self.x_train[idx]
        self.y_train = self.y_train[idx]

        input = self.x_train
        actual_y = self.y_train

        # maintaining previous history and momentum for weights and bias
        prev_wMomentum, prev_bMomentum = self.wMomentum, self.bMomentum
        prev_wHistory, prev_bHistory = self.wHistory, self.bHistory

        wMomentum_hat = {} 
        bMomentum_hat = {}
        wHistory_hat = {} 
        bHistory_hat = {}

        # Total Loss for epoch
        loss_input = 0
        count = 0
        beta1 = self.parameters["beta1"]
        beta2 = self.parameters["beta2"]
        epsilon = self.parameters["epsilon"]
        
        # set accumalated gradients to zero
        acc_grad_weights, acc_grad_bias = self.make_accumalate_zero()

        # execute one epoch for all datapoints in train set
        for index in range(n):

            # perform forward propagation

            self.forward_propagation(input, index)
            predicted_y = self.post_activation["h" + str(self.L)]

            # perform backward propagation
            grad_weights, grad_bias = self.backward_propagation(index,actual_y)

            # compute loss
            loss_input += self.loss_function(predicted_y, actual_y[index])

            # compute the number of datapoints which are correctly classified
            indexWithMaxProb = np.argmax(predicted_y)
            if (actual_y[index] == (indexWithMaxProb)):
                count = count + 1

            # accumulate grad_weights and grad_bais for each input
            for (key, value) in grad_weights.items():
                acc_grad_weights[key] = acc_grad_weights[key] + grad_weights[key]

            for (key, value) in grad_bias.items():
                acc_grad_bias[key] = acc_grad_bias[key] + grad_bias[key]

            # update weights and bias if the number of datapoints in batch_size are divisble by batch_size
            if ((index + 1) % self.batch_size == 0):
        
                # update weight momentum
                for (key, value) in self.wMomentum.items():
                    self.wMomentum[key] = beta1*prev_wMomentum[key] +  (1 - beta1) * acc_grad_weights[key]

                # update bias momentum
                for (key, value) in self.bMomentum.items():
                    self.bMomentum[key] = beta1*prev_bMomentum[key] + (1 - beta1) * acc_grad_bias[key]

                # update weight history
                for (key, value) in self.wHistory.items():
                    self.wHistory[key] = beta2 * prev_wHistory[key] + (1 - beta2) * acc_grad_weights[key] ** 2

                # update bias history
                for (key, value) in self.bHistory.items():
                    self.bHistory[key] = beta2 * prev_bHistory[key] + (1 - beta2) * acc_grad_bias[key] ** 2

                
                # compute intermediate values
                for (key, value) in self.weights.items():
                    wMomentum_hat[key] = self.wMomentum[key] / (1 - np.power(beta1, epoch + 1))

                for (key, value) in self.bias.items():
                    bMomentum_hat[key] = self.bMomentum[key] / (1 - np.power(beta1, epoch + 1))

                for (key, value) in self.weights.items():
                    wHistory_hat[key] = self.wHistory[key] / (1 - np.power(beta2, epoch + 1))

                for (key, value) in self.bias.items():
                    bHistory_hat[key] = self.bHistory[key] / (1 - np.power(beta2, epoch + 1))

                # update weights
                for (key, value) in self.weights.items():
                    temp = (self.learningRate / self.batch_size) * wMomentum_hat[key] / (np.sqrt(wHistory_hat[key] + epsilon))
                    self.weights[key] = self.weights[key] - temp
                
                # update bias
                for (key, value) in self.bias.items():
                    temp = (self.learningRate / self.batch_size) * bMomentum_hat[key] / (np.sqrt(bHistory_hat[key] + epsilon))
                    self.bias[key] = self.bias[key] - temp

                # set accumalated gradients to zero
                acc_grad_weights, acc_grad_bias = self.make_accumalate_zero()
            
        # if the number of datapoints in batch is not divisible by batch_size update weights and bias 
        if n % self.batch_size != 0:
            
            # update weight momentum
            for (key, value) in self.wMomentum.items():
                self.wMomentum[key] = beta1*prev_wMomentum[key] +  (1 - beta1) * acc_grad_weights[key]

            # update bias momentum
            for (key, value) in self.bMomentum.items():
                self.bMomentum[key] = beta1*prev_bMomentum[key] + (1 - beta1) * acc_grad_bias[key]

            # update weight history
            for (key, value) in self.wHistory.items():
                self.wHistory[key] = beta2 * prev_wHistory[key] + (1 - beta2) * acc_grad_weights[key] ** 2

            # update bias history
            for (key, value) in self.bHistory.items():
                self.bHistory[key] = beta2 * prev_bHistory[key] + (1 - beta2) * acc_grad_bias[key] ** 2

            
            # compute intermediate values
            for (key, value) in self.weights.items():
                wMomentum_hat[key] = self.wMomentum[key] / (1 - np.power(beta1, epoch + 1))

            for (key, value) in self.bias.items():
                bMomentum_hat[key] = self.bMomentum[key] / (1 - np.power(beta1, epoch + 1))

            for (key, value) in self.weights.items():
                wHistory_hat[key] = self.wHistory[key] / (1 - np.power(beta2, epoch + 1))

            for (key, value) in self.bias.items():
                bHistory_hat[key] = self.bHistory[key] / (1 - np.power(beta2, epoch + 1))

            # update weights
            for (key, value) in self.weights.items():
                temp = (self.learningRate / self.batch_size) * wMomentum_hat[key] / (np.sqrt(wHistory_hat[key] + epsilon))
                self.weights[key] = self.weights[key] - temp
            
            # update bias
            for (key, value) in self.bias.items():
                temp = (self.learningRate / self.batch_size) * bMomentum_hat[key] / (np.sqrt(bHistory_hat[key] + epsilon))
                self.bias[key] = self.bias[key] - temp

            # set accumalated gradients to zero
            acc_grad_weights, acc_grad_bias = self.make_accumalate_zero()
        
        # compute trainAccuracy,trainLoss averaged over train size
        trainAccuracy = count / n
        trainLoss = loss_input / n + self.regularize()
        
        # compute validationAccuracy,validationLoss averaged over test size 
        validationLoss, validationAccuracy = self.computeTestLossAndAccuracy()
        
        return trainLoss, trainAccuracy, validationLoss, validationAccuracy

    # runs NADAM gradient descent for one epoch
    def oneEpochNADAM(self,epoch):
        ''' Executes A Single Epoch for NADAM Gradient Descent Algorithm.
            Returns the training loss,training accuracy,validaiton loss and validation accuracy,averaged over all points. '''

        n = self.train_n_samples
        
        # randomizing batches
        idx = np.random.permutation(self.train_n_samples)
        self.x_train = self.x_train[idx]
        self.y_train = self.y_train[idx]

        input = self.x_train
        actual_y = self.y_train

        # maintaining previous history and momentum for weights and bias
        prev_wMomentum, prev_bMomentum = self.wMomentum, self.bMomentum
        prev_wHistory, prev_bHistory = self.wHistory, self.bHistory

        wMomentum_hat = {} 
        bMomentum_hat = {}
        wHistory_hat = {} 
        bHistory_hat = {}

        # Total Loss for epoch
        loss_input = 0
        count = 0
        beta1 = self.parameters["beta1"]
        beta2 = self.parameters["beta2"]
        epsilon = self.parameters["epsilon"]


        # set accumalated gradients to zero
        acc_grad_weights, acc_grad_bias = self.make_accumalate_zero()

        for index in range(n):

            # perform forward propagation
            self.forward_propagation(input, index)
            predicted_y = self.post_activation["h" + str(self.L)]

            # perform backward propagation
            grad_weights, grad_bias = self.backward_propagation(index,actual_y)

            # compute loss
            loss_input += self.loss_function(predicted_y, actual_y[index])

            # compute the number of datapoints which are correctly classified
            indexWithMaxProb = np.argmax(predicted_y)
            if (actual_y[index] == (indexWithMaxProb)):
                count = count + 1

            # accumulate grad_weights and grad_bais for each input
            for (key, value) in grad_weights.items():
                acc_grad_weights[key] = acc_grad_weights[key] + grad_weights[key]

            for (key, value) in grad_bias.items():
                acc_grad_bias[key] = acc_grad_bias[key] + grad_bias[key]

            # update weights and bias if the number of datapoints in batch_size are divisble by batch_size
            if ((index + 1) % self.batch_size == 0):
                
                # update weight momentum
                for (key, value) in self.wMomentum.items():
                    self.wMomentum[key] = beta1*prev_wMomentum[key] + (1 - beta1) * acc_grad_weights[key]

                # update bias momentum
                for (key, value) in self.bMomentum.items():
                    self.bMomentum[key] = beta1*prev_bMomentum[key] + (1 - beta1) * acc_grad_bias[key]

                # update weight history
                for (key, value) in self.wHistory.items():
                    self.wHistory[key] = beta2 * prev_wHistory[key] + (1 - beta2) * acc_grad_weights[key] ** 2

                # update bias history
                for (key, value) in self.bHistory.items():
                    self.bHistory[key] = beta2 * prev_bHistory[key] + (1 - beta2) * acc_grad_bias[key] ** 2


                # compute intermediate values
                for (key, value) in self.weights.items():
                    wMomentum_hat[key] = self.wMomentum[key] / (1 - np.power(beta1, epoch + 1))
                
                for (key, value) in self.bias.items():
                    bMomentum_hat[key] = self.bMomentum[key] / (1 - np.power(beta1, epoch + 1))

                for (key, value) in self.weights.items():
                    wHistory_hat[key] = self.wHistory[key] / (1 - np.power(beta2, epoch + 1))

                for (key, value) in self.bias.items():
                    bHistory_hat[key] = self.bHistory[key] / (1 - np.power(beta2, epoch + 1))

                # update weights
                for (key, value) in self.weights.items():
                    num1 = ((self.learningRate / self.batch_size) / np.sqrt(wHistory_hat[key] + epsilon))
                    num2 = beta1 * wMomentum_hat[key] + ((1 - beta1) * acc_grad_weights[key] / (1 - beta1 ** (epoch + 1)))
                    self.weights[key] = self.weights[key] - num1*num2

                # update bias
                for (key, value) in self.bias.items():
                    num1 = ((self.learningRate / self.batch_size) / np.sqrt(bHistory_hat[key] + epsilon))
                    num2 = beta1 * bMomentum_hat[key] + ((1 - beta1) * acc_grad_bias[key] / (1 - beta1 ** (epoch + 1)))
                    self.bias[key] = self.bias[key] - num1*num2

                # set accumalated gradients to zero
                acc_grad_weights, acc_grad_bias = self.make_accumalate_zero()
            
            # if the number of datapoints in batch is not divisible by batch_size update weights and bias 
        if n % self.batch_size != 0:
            
            # update weight momentum
            for (key, value) in self.wMomentum.items():
                self.wMomentum[key] = beta1*prev_wMomentum[key] + (1 - beta1) * acc_grad_weights[key]

            # update bias momentum
            for (key, value) in self.bMomentum.items():
                self.bMomentum[key] = beta1*prev_bMomentum[key] + (1 - beta1) * acc_grad_bias[key]

            # update weight history
            for (key, value) in self.wHistory.items():
                self.wHistory[key] = beta2 * prev_wHistory[key] + (1 - beta2) * acc_grad_weights[key] ** 2

            # update bias history
            for (key, value) in self.bHistory.items():
                self.bHistory[key] = beta2 * prev_bHistory[key] + (1 - beta2) * acc_grad_bias[key] ** 2


            # compute intermediate values
            for (key, value) in self.weights.items():
                wMomentum_hat[key] = self.wMomentum[key] / (1 - np.power(beta1, epoch + 1))
            
            for (key, value) in self.bias.items():
                bMomentum_hat[key] = self.bMomentum[key] / (1 - np.power(beta1, epoch + 1))

            for (key, value) in self.weights.items():
                wHistory_hat[key] = self.wHistory[key] / (1 - np.power(beta2, epoch + 1))

            for (key, value) in self.bias.items():
                bHistory_hat[key] = self.bHistory[key] / (1 - np.power(beta2, epoch + 1))

            # update weights
            for (key, value) in self.weights.items():
                num1 = ((self.learningRate / self.batch_size) / np.sqrt(wHistory_hat[key] + epsilon))
                num2 = beta1 * wMomentum_hat[key] + ((1 - beta1) * acc_grad_weights[key] / (1 - beta1 ** (epoch + 1)))
                self.weights[key] = self.weights[key] - num1*num2

            # update bias
            for (key, value) in self.bias.items():
                num1 = ((self.learningRate / self.batch_size) / np.sqrt(bHistory_hat[key] + epsilon))
                num2 = beta1 * bMomentum_hat[key] + ((1 - beta1) * acc_grad_bias[key] / (1 - beta1 ** (epoch + 1)))
                self.bias[key] = self.bias[key] - num1*num2

            # set accumalated gradients to zero
            acc_grad_weights, acc_grad_bias = self.make_accumalate_zero()
        
                
        # compute trainAccuracy,trainLoss averaged over train size
        trainAccuracy = count / n
        trainLoss = loss_input / n + self.regularize()
        
        # compute validationAccuracy,validationLoss averaged over test size 
        validationLoss, validationAccuracy = self.computeTestLossAndAccuracy()
        return trainLoss, trainAccuracy, validationLoss, validationAccuracy

    # runs gradient descent for one epoch
    def oneEpochGD(self, epoch):
        ''' Executes A Single Epoch for Vanilla Gradient Descent Algorithm.
            Returns the training loss,training accuracy,validaiton loss and validation accuracy,averaged over all points. '''

        n = self.train_n_samples
        
        # randomizing batches
        idx = np.random.permutation(self.train_n_samples)
        self.x_train = self.x_train[idx]
        self.y_train = self.y_train[idx]

        input = self.x_train
        actual_y = self.y_train

        # Total Loss for epoch
        loss_input = 0
        count = 0

        # set accumalated gradients to zero
        acc_grad_weights, acc_grad_bias = self.make_accumalate_zero()

        # execute one epoch for all datapoints in train set
        for index in range(n):

            # perform forward propagation
            self.forward_propagation(input, index)
            predicted_y = self.post_activation["h" + str(self.L)]

            # compute loss
            loss_input += self.loss_function(predicted_y, actual_y[index])

            # perofrm backward propagation
            grad_weights, grad_bias = self.backward_propagation(index, actual_y)

            # compute the number of datapoints which are correctly classified
            indexWithMaxProb = np.argmax(predicted_y)
            if (actual_y[index] == (indexWithMaxProb)):
                count = count + 1

            # accumulate grad_weights and grad_bais for each input
            for (key, value) in grad_weights.items():
                acc_grad_weights[key] = acc_grad_weights[key] + \
                    grad_weights[key]

            for (key, value) in grad_bias.items():
                acc_grad_bias[key] = acc_grad_bias[key] + grad_bias[key]

            # update weights and bias if the number of datapoints in batch_size are divisble by batch_size
            if ((index + 1) % self.batch_size == 0):

                # update weights
                for (key, value) in self.weights.items():
                    self.weights[key] = self.weights[key] - ((self.learningRate / self.batch_size) * acc_grad_weights[key])
                
                # update bias
                for (key, value) in self.bias.items():
                    self.bias[key] = self.bias[key] - ((self.learningRate / self.batch_size) * acc_grad_bias[key])

                # set accumalated gradients to zero
                acc_grad_weights, acc_grad_bias = self.make_accumalate_zero()

        # if the number of datapoints in batch is not divisible by batch_size update weights and bias 
        if n % self.batch_size != 0:

            # update weights
            for (key, value) in self.weights.items():
                self.weights[key] = self.weights[key] - ((self.learningRate / self.batch_size) * acc_grad_weights[key])
            
            # update bias
            for (key, value) in self.bias.items():
                self.bias[key] = self.bias[key] - ((self.learningRate / self.batch_size) * acc_grad_bias[key])

            # set accumalated gradients to zero
            acc_grad_weights, acc_grad_bias = self.make_accumalate_zero()

        # compute trainAccuracy,trainLoss averaged over train size
        trainAccuracy = count / n
        trainLoss = loss_input / n + self.regularize()
        
        # compute validationAccuracy,validationLoss averaged over test size 
        validationLoss, validationAccuracy = self.computeTestLossAndAccuracy()
        
        return trainLoss, trainAccuracy, validationLoss, validationAccuracy

    # executes a single epoch of the FeedForward NN according to the optimizer function. 
    def executeOneEpoch(self,epoch):
        
        if self.optimizer == "sgd":
            return self.oneEpochSGD(epoch)

        if self.optimizer == "momentum":
            return self.oneEpochMOMENTUM(epoch)

        if self.optimizer == "nestrov":
            return self.oneEpochNAG(epoch)

        if self.optimizer == "rmsprop":
            return self.oneEpochRMSPROP(epoch)

        if self.optimizer == "adam":
            return self.oneEpochADAM(epoch)

        if self.optimizer == "nadam":
            return self.oneEpochNADAM(epoch)

        if self.optimizer == "gd":
            return self.oneEpochGD(epoch)

    # computes validation loss and validation accuracy 
    def computeTestLossAndAccuracy(self):
        
        validation_size = self.y_validate.shape[0]
        test_loss = 0
        count = 0

        input = self.x_validate
        actual_y = self.y_validate

        for index in range(0, validation_size):

            # perform forward propagation
            self.forward_propagation(input, index)
            predicted_y = self.post_activation["h" + str(self.L)]

            # compute loss
            test_loss += self.loss_function(predicted_y, actual_y[index])

            # compute the number of datapoints which are correctly classified
            indexWithMaxProb = np.argmax(predicted_y)
            if (actual_y[index] == (indexWithMaxProb)):
                count = count + 1

        # compute validationAccuracy,validationLoss averaged over validation size 
        validationAccuracy = count / validation_size
        validationLoss = test_loss / validation_size + self.regularize()
        
        return validationLoss, validationAccuracy

    '''<----------------------------Question 1------------------------------------->'''
    def question_1(self):

        wandb.init(
                # set the wandb project where this run will be logged
                project = feed_forward.parameters["wandb_project"],
                # config = sweep_config
        )

        # dictionary of labels to be added
        labels_added = {}

        ''' 
        Running the loop for the number of training samples.
        In each iteration,a random index is generated and we extract the feature and label at the generated index.
        If the label is already in the labels_added dictionary,we ignore that label,else we add that (label,feature) 
        as (key,value) pair in dictionary (so that one label is considered only once).
        '''
        images = []
        for i in range(self.train_n_samples):
            index = random.randrange(self.train_n_samples)
            feature = self. x_train[index]
            label = self.y_train[index]
            if (label in labels_added.keys()):
                continue
            labels_added[label] = feature
            image = wandb.Image(
                labels_added[label], caption=f"{self.title[label]}")
            images.append(image)
        wandb.log({"Images": images})
    
    '''<----------------------------Question 2------------------------>'''

    def feed_forward_q2(self): 
        # initialization of weights
        self.weightsAndBiasInitializer()
 
        # train the data
        for i in range(1, self.epoch + 1):
            (train_Loss, train_Accuracy, validation_Loss,
             validation_Accuracy) = self.executeOneEpoch(i)
            
        # generating the random index to test the model and finding the y for that
        index = np.random.randint(self.y_validate.shape[0])
        input = self.x_validate
        self.forward_propagation(input, index)
        predicted_y = self.post_activation["h" + str(self.L)]
        print(predicted_y)

    '''<----------------------------Question 3-4------------------------------------->'''
    def feed_forward_q3_4(self):
        
        self.weights = dict()
        self.bias = dict()
        self.wHistory = dict()
        self.bHistory = dict()
        self.wMomentum = dict()
        self.bMomentum = dict()
        
        # initialization of weights and bias
        self.weightsAndBiasInitializer()

        # initializing history for weights and bias
        self.historyInitializer()

        # initializing momentum for weights and bias
        self.momentumInitializer()

        self.validation_Accuracy = 0


        # run feedforward NN 
        for i in range(1, self.epoch + 1):
              (train_Loss, train_Accuracy, validation_Loss,self.validation_Accuracy) = self.executeOneEpoch(i)
              print("epoch:{epoch}, train loss:{train_l}, train accuracy:{train_ac}, validation loss:{validation_l}, validation accuracy:{validation_ac}".\
                  format(epoch = i,train_l = train_Loss,train_ac = train_Accuracy,validation_l = validation_Loss,validation_ac = self.validation_Accuracy))
            
              wandb.log({'train loss':train_Loss, 'train accuracy':train_Accuracy,'validation loss':validation_Loss, 'validation accuracy':self.validation_Accuracy})
            

In [2]:
feed_forward = FeedForward()


In [3]:
sweep_config = {

        'method' : 'random', #grid ,random - generates exponential ways,bayesian  efficient way
        'name' : 'random_sweep mse',
        'metric' : {
            'name' : 'validation accuracy',
            'goal' : 'maximize'
        },
        'parameters':{
                'epochs' : {
                    'values' : [5,10]
                },
                'number_of_hidden_layer':{
                    'values' : [3,4,5]
                },
                'size_of_hidden_layer' : {
                    'values' :[32,64,128]
                },
                'weight_decay' : {
                    'values' : [0,0.0005,0.5]
                },
                'learning_rate' : {
                    'values' : [1e-3,1e-4]
                },
                'optimizer' : {
                    'values' : ['sgd','momentum','nestrov','rmsprop','adam','nadam']
                },
                'batch_size' : {
                        'values' : [16,32,64]
                },
                'weight_initialization' :{
                    'values' : ['random','Xavier']
                },
                'activation' : {
                    'values' : ['sigmoid','tanh','ReLU']
                }
        }
}
sweep_id = wandb.sweep(sweep = sweep_config,project= feed_forward.parameters["wandb_project"])

Create sweep with ID: hl8fpi30
Sweep URL: https://wandb.ai/cs22m019/DL%20Final%20Assignment%201/sweeps/hl8fpi30


In [4]:
def train():
    wandb.init(
                # set the wandb project where this run will be logged
                # project = feed_forward.parameters["wandb_project"],
                config = sweep_config
    )
    
    feed_forward.epoch = wandb.config.epochs
    feed_forward.nnl = wandb.config.size_of_hidden_layer
    feed_forward.weightDecay =  wandb.config.weight_decay
    feed_forward.learningRate = wandb.config.learning_rate
    feed_forward.optimizer = wandb.config.optimizer
    feed_forward.batch_size = wandb.config.batch_size
    feed_forward.weightInitialization = wandb.config.weight_initialization
    feed_forward.activationFunction = wandb.config.activation
    feed_forward.L = wandb.config.number_of_hidden_layer + 1
    feed_forward.weight_decay = wandb.config.weight_decay


    wandb.run.name = "optimizer_" + str(wandb.config.optimizer) +  "_hl_"+ str(wandb.config.number_of_hidden_layer) + "_bs_" + str(wandb.config.batch_size) + "_ac_" + str(wandb.config.activation)    
    feed_forward.feed_forward_q3_4()

In [None]:
wandb.agent(sweep_id=sweep_id,function = train,count = 100)

[34m[1mwandb[0m: Agent Starting Run: bdrj5ovc with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	number_of_hidden_layer: 4
[34m[1mwandb[0m: 	optimizer: nestrov
[34m[1mwandb[0m: 	size_of_hidden_layer: 32
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_initialization: Xavier
[34m[1mwandb[0m: Currently logged in as: [33mcs22m019[0m. Use [1m`wandb login --relogin`[0m to force relogin


epoch:1, train loss:0.927165388652894, train accuracy:0.10031481481481481, validation loss:0.9161960841437223, validation accuracy:0.09983333333333333
epoch:2, train loss:0.9124516756557705, train accuracy:0.10337037037037038, validation loss:0.9088522957948203, validation accuracy:0.09983333333333333
epoch:3, train loss:0.9074925278220668, train accuracy:0.11040740740740741, validation loss:0.9057520745352943, validation accuracy:0.15483333333333332
epoch:4, train loss:0.9051262502624003, train accuracy:0.11212962962962963, validation loss:0.9040933839838856, validation accuracy:0.10066666666666667
epoch:5, train loss:0.9036505071742047, train accuracy:0.12496296296296296, validation loss:0.9028726372289254, validation accuracy:0.136


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
train accuracy,▁▂▄▄█
train loss,█▄▂▁▁
validation accuracy,▁▁█▁▆
validation loss,█▄▃▂▁

0,1
train accuracy,0.12496
train loss,0.90365
validation accuracy,0.136
validation loss,0.90287


[34m[1mwandb[0m: Agent Starting Run: emcr40b3 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	number_of_hidden_layer: 5
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	size_of_hidden_layer: 64
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_initialization: Xavier


epoch:1, train loss:0.8216885556579229, train accuracy:0.3443703703703704, validation loss:0.7279213011246519, validation accuracy:0.48783333333333334
epoch:2, train loss:0.6707726320160337, train accuracy:0.5514814814814815, validation loss:0.6140853886532305, validation accuracy:0.6071666666666666
epoch:3, train loss:0.577303876778874, train accuracy:0.6468518518518519, validation loss:0.5354249577767946, validation accuracy:0.6731666666666667
epoch:4, train loss:0.5094832336615671, train accuracy:0.6886666666666666, validation loss:0.47765335662142716, validation accuracy:0.7033333333333334
epoch:5, train loss:0.45917963716616883, train accuracy:0.7144259259259259, validation loss:0.4348375755536872, validation accuracy:0.732
epoch:6, train loss:0.42174869358847916, train accuracy:0.7370740740740741, validation loss:0.4032720700396119, validation accuracy:0.7531666666666667
epoch:7, train loss:0.393227069619491, train accuracy:0.7524259259259259, validation loss:0.3787522558797738, 

VBox(children=(Label(value='0.001 MB of 0.024 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.044564…

0,1
train accuracy,▁▄▆▇▇▇████
train loss,█▆▄▃▃▂▂▁▁▁
validation accuracy,▁▄▅▆▇▇████
validation loss,█▆▅▄▃▂▂▁▁▁

0,1
train accuracy,0.77665
train loss,0.33911
validation accuracy,0.7825
validation loss,0.33179


[34m[1mwandb[0m: Agent Starting Run: 5mf6blwr with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	number_of_hidden_layer: 4
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	size_of_hidden_layer: 64
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_initialization: Xavier


epoch:1, train loss:0.9223107264331781, train accuracy:0.12498148148148149, validation loss:0.8984687673988595, validation accuracy:0.22316666666666668
epoch:2, train loss:0.8887727364263928, train accuracy:0.27, validation loss:0.8767225933678542, validation accuracy:0.30616666666666664
epoch:3, train loss:0.8696824819267702, train accuracy:0.3296296296296296, validation loss:0.8586524738996959, validation accuracy:0.3615
epoch:4, train loss:0.8523603864154659, train accuracy:0.37394444444444447, validation loss:0.8407899642392606, validation accuracy:0.39166666666666666
epoch:5, train loss:0.8354599506081863, train accuracy:0.3923888888888889, validation loss:0.8240267902893564, validation accuracy:0.4026666666666667
epoch:6, train loss:0.8191322267801787, train accuracy:0.40475925925925926, validation loss:0.8076264530942546, validation accuracy:0.4103333333333333
epoch:7, train loss:0.8026410007537645, train accuracy:0.4114814814814815, validation loss:0.790741336151209, validation

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
train accuracy,▁▄▆▇▇▇████
train loss,█▇▆▅▄▄▃▂▂▁
validation accuracy,▁▄▅▆▇▇▇███
validation loss,█▇▆▅▅▄▃▂▂▁

0,1
train accuracy,0.4293
train loss,0.75895
validation accuracy,0.43917
validation loss,0.74753


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: f6l8dzih with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	number_of_hidden_layer: 3
[34m[1mwandb[0m: 	optimizer: nestrov
[34m[1mwandb[0m: 	size_of_hidden_layer: 128
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_initialization: Xavier


epoch:1, train loss:0.8884474081689535, train accuracy:0.17062962962962963, validation loss:0.8690245344039136, validation accuracy:0.25666666666666665
epoch:2, train loss:0.8493495029142814, train accuracy:0.35338888888888886, validation loss:0.8260585143878161, validation accuracy:0.4315
epoch:3, train loss:0.8036016012751955, train accuracy:0.44075925925925924, validation loss:0.7759168017367223, validation accuracy:0.4648333333333333
epoch:4, train loss:0.7522206991257886, train accuracy:0.4675, validation loss:0.7208248852769034, validation accuracy:0.5
epoch:5, train loss:0.6970058574792343, train accuracy:0.5115, validation loss:0.6644181753901652, validation accuracy:0.5423333333333333


0,1
train accuracy,▁▅▇▇█
train loss,█▇▅▃▁
validation accuracy,▁▅▆▇█
validation loss,█▇▅▃▁

0,1
train accuracy,0.5115
train loss,0.69701
validation accuracy,0.54233
validation loss,0.66442


[34m[1mwandb[0m: Agent Starting Run: 93t0mb67 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	number_of_hidden_layer: 5
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	size_of_hidden_layer: 128
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_initialization: random


epoch:1, train loss:1.0523750367359646, train accuracy:0.12498148148148149, validation loss:0.9982682339575646, validation accuracy:0.15216666666666667
epoch:2, train loss:0.9741920700428479, train accuracy:0.1635925925925926, validation loss:0.9555390321641734, validation accuracy:0.17666666666666667
epoch:3, train loss:0.9413227746882046, train accuracy:0.20222222222222222, validation loss:0.9285756934997792, validation accuracy:0.22516666666666665
epoch:4, train loss:0.918974491399178, train accuracy:0.2487962962962963, validation loss:0.9083887801321119, validation accuracy:0.2623333333333333
epoch:5, train loss:0.8998149939418477, train accuracy:0.2782777777777778, validation loss:0.889183689121147, validation accuracy:0.28883333333333333


0,1
train accuracy,▁▃▅▇█
train loss,█▄▃▂▁
validation accuracy,▁▂▅▇█
validation loss,█▅▄▂▁

0,1
train accuracy,0.27828
train loss,0.89981
validation accuracy,0.28883
validation loss,0.88918


[34m[1mwandb[0m: Agent Starting Run: 82n2z3xk with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	number_of_hidden_layer: 3
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	size_of_hidden_layer: 64
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_initialization: Xavier


epoch:1, train loss:0.9105669870161389, train accuracy:0.131, validation loss:0.8959282865081363, validation accuracy:0.3365
epoch:2, train loss:0.8906397139650967, train accuracy:0.28429629629629627, validation loss:0.884736779595426, validation accuracy:0.26716666666666666
epoch:3, train loss:0.8784848158868901, train accuracy:0.2609444444444444, validation loss:0.8712159539851873, validation accuracy:0.25883333333333336
epoch:4, train loss:0.8629259801185166, train accuracy:0.26296296296296295, validation loss:0.8538452083934682, validation accuracy:0.26716666666666666
epoch:5, train loss:0.8441791916598423, train accuracy:0.27390740740740743, validation loss:0.8341836457269591, validation accuracy:0.2816666666666667


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
train accuracy,▁█▇▇█
train loss,█▆▅▃▁
validation accuracy,█▂▁▂▃
validation loss,█▇▅▃▁

0,1
train accuracy,0.27391
train loss,0.84418
validation accuracy,0.28167
validation loss,0.83418


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 6lgq8pbx with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	number_of_hidden_layer: 5
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	size_of_hidden_layer: 64
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_initialization: random


epoch:1, train loss:1.394506448572295, train accuracy:0.0852037037037037, validation loss:1.3780576645150793, validation accuracy:0.09166666666666666
epoch:2, train loss:1.3795016614292774, train accuracy:0.08837037037037038, validation loss:1.3649626211387236, validation accuracy:0.0955
epoch:3, train loss:1.366460331613402, train accuracy:0.08977777777777778, validation loss:1.3541632179915721, validation accuracy:0.09783333333333333
epoch:4, train loss:1.354700220901361, train accuracy:0.09244444444444444, validation loss:1.3454748654283404, validation accuracy:0.0965
epoch:5, train loss:1.3442485784590061, train accuracy:0.09364814814814815, validation loss:1.3372450768079638, validation accuracy:0.09816666666666667


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
train accuracy,▁▄▅▇█
train loss,█▆▄▂▁
validation accuracy,▁▅█▆█
validation loss,█▆▄▂▁

0,1
train accuracy,0.09365
train loss,1.34425
validation accuracy,0.09817
validation loss,1.33725


[34m[1mwandb[0m: Agent Starting Run: k3lm7aar with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	number_of_hidden_layer: 3
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	size_of_hidden_layer: 64
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_initialization: Xavier


epoch:1, train loss:1.0083622746181513, train accuracy:0.10011111111111111, validation loss:1.00524871443141, validation accuracy:0.099
epoch:2, train loss:1.0012426645839463, train accuracy:0.10011111111111111, validation loss:0.9983429872957188, validation accuracy:0.099
epoch:3, train loss:0.9948302924069834, train accuracy:0.10011111111111111, validation loss:0.9924641726064656, validation accuracy:0.099
epoch:4, train loss:0.9893074440090015, train accuracy:0.10011111111111111, validation loss:0.9872137398776756, validation accuracy:0.099
epoch:5, train loss:0.9842564367262822, train accuracy:0.10011111111111111, validation loss:0.9822722419379576, validation accuracy:0.099
epoch:6, train loss:0.9797450149978288, train accuracy:0.10011111111111111, validation loss:0.9780650029289553, validation accuracy:0.099
epoch:7, train loss:0.9756973853227218, train accuracy:0.10011111111111111, validation loss:0.9741113260028514, validation accuracy:0.099
epoch:8, train loss:0.97194818408448

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
train accuracy,▁▁▁▁▁▁▁▁▁▁
train loss,█▇▆▅▄▃▃▂▁▁
validation accuracy,▁▁▁▁▁▁▁▁▁▁
validation loss,█▇▆▅▄▃▃▂▁▁

0,1
train accuracy,0.10011
train loss,0.96565
validation accuracy,0.099
validation loss,0.96462


[34m[1mwandb[0m: Agent Starting Run: vzpwkioc with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	number_of_hidden_layer: 4
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	size_of_hidden_layer: 64
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_initialization: Xavier


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016666666666666666, max=1.0…

epoch:1, train loss:0.9753154716620319, train accuracy:0.10011111111111111, validation loss:0.9238910213856679, validation accuracy:0.099
epoch:2, train loss:0.9108004919119489, train accuracy:0.10011111111111111, validation loss:0.9031215454126819, validation accuracy:0.099
epoch:3, train loss:0.9005599901423821, train accuracy:0.10061111111111111, validation loss:0.8986319059596648, validation accuracy:0.099
epoch:4, train loss:0.8975374171436822, train accuracy:0.18627777777777776, validation loss:0.8964138539017926, validation accuracy:0.20433333333333334
epoch:5, train loss:0.8951397526699798, train accuracy:0.25775925925925924, validation loss:0.8934947843099672, validation accuracy:0.25716666666666665


VBox(children=(Label(value='0.001 MB of 0.024 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.045411…

0,1
train accuracy,▁▁▁▅█
train loss,█▂▁▁▁
validation accuracy,▁▁▁▆█
validation loss,█▃▂▂▁

0,1
train accuracy,0.25776
train loss,0.89514
validation accuracy,0.25717
validation loss,0.89349


[34m[1mwandb[0m: Agent Starting Run: zp54h21z with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	number_of_hidden_layer: 3
[34m[1mwandb[0m: 	optimizer: nestrov
[34m[1mwandb[0m: 	size_of_hidden_layer: 64
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_initialization: random


epoch:1, train loss:1.7426564920324707, train accuracy:0.11675925925925926, validation loss:1.7990147274776565, validation accuracy:0.10066666666666667
epoch:2, train loss:1.8007125815500835, train accuracy:0.09992592592592593, validation loss:1.7992931077709748, validation accuracy:0.10066666666666667
epoch:3, train loss:1.800774589234314, train accuracy:0.09992592592592593, validation loss:1.799293107771008, validation accuracy:0.10066666666666667
epoch:4, train loss:1.8007745892343463, train accuracy:0.09992592592592593, validation loss:1.799293107771041, validation accuracy:0.10066666666666667
epoch:5, train loss:1.8007745892343783, train accuracy:0.09992592592592593, validation loss:1.799293107771074, validation accuracy:0.10066666666666667


0,1
train accuracy,█▁▁▁▁
train loss,▁████
validation accuracy,▁▁▁▁▁
validation loss,▁████

0,1
train accuracy,0.09993
train loss,1.80077
validation accuracy,0.10067
validation loss,1.79929


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: p5s6yuag with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	number_of_hidden_layer: 3
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	size_of_hidden_layer: 32
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_initialization: Xavier


epoch:1, train loss:0.958395538663084, train accuracy:0.10051851851851852, validation loss:0.9389854650728144, validation accuracy:0.09533333333333334
epoch:2, train loss:0.928920372735753, train accuracy:0.10051851851851852, validation loss:0.9229854460014559, validation accuracy:0.09533333333333334
epoch:3, train loss:0.9181035692313654, train accuracy:0.105, validation loss:0.9152393450477301, validation accuracy:0.096
epoch:4, train loss:0.9121140434669285, train accuracy:0.10575925925925926, validation loss:0.9104191239961938, validation accuracy:0.10516666666666667
epoch:5, train loss:0.9081933824144789, train accuracy:0.11287037037037037, validation loss:0.9071227756730109, validation accuracy:0.1175
epoch:6, train loss:0.9054862548668666, train accuracy:0.11988888888888889, validation loss:0.9048114353004982, validation accuracy:0.12583333333333332
epoch:7, train loss:0.9035915872229615, train accuracy:0.13312962962962963, validation loss:0.9031722677599823, validation accuracy

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
train accuracy,▁▁▂▂▃▄▇██▇
train loss,█▄▃▂▂▂▁▁▁▁
validation accuracy,▁▁▁▂▄▅▆▇▇█
validation loss,█▅▄▃▂▂▁▁▁▁

0,1
train accuracy,0.13709
train loss,0.90069
validation accuracy,0.144
validation loss,0.90062


[34m[1mwandb[0m: Agent Starting Run: te22yi9e with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	number_of_hidden_layer: 5
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	size_of_hidden_layer: 64
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_initialization: Xavier


epoch:1, train loss:0.9023684356877893, train accuracy:0.10525925925925926, validation loss:0.8997017269232856, validation accuracy:0.09433333333333334
epoch:2, train loss:0.8930936653044723, train accuracy:0.17827777777777779, validation loss:0.8668364455985348, validation accuracy:0.19783333333333333
epoch:3, train loss:0.815138339667077, train accuracy:0.2609814814814815, validation loss:0.7680091502397763, validation accuracy:0.33216666666666667
epoch:4, train loss:0.7342469027151866, train accuracy:0.37392592592592594, validation loss:0.702437147593549, validation accuracy:0.44866666666666666
epoch:5, train loss:0.6681381460706419, train accuracy:0.48807407407407405, validation loss:0.6254217935861468, validation accuracy:0.5531666666666667
epoch:6, train loss:0.5951174584215023, train accuracy:0.5777222222222222, validation loss:0.5607820754891794, validation accuracy:0.6061666666666666
epoch:7, train loss:0.5381279419838977, train accuracy:0.6234259259259259, validation loss:0.5

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
train accuracy,▁▂▃▄▆▇▇███
train loss,██▇▅▄▃▂▂▁▁
validation accuracy,▁▂▄▅▇▇████
validation loss,██▆▅▄▃▂▂▁▁

0,1
train accuracy,0.67313
train loss,0.43943
validation accuracy,0.66967
validation loss,0.42584


[34m[1mwandb[0m: Agent Starting Run: k1tp0225 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	number_of_hidden_layer: 4
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	size_of_hidden_layer: 32
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_initialization: Xavier


epoch:1, train loss:0.9479798860051066, train accuracy:0.09931481481481481, validation loss:0.9422228081117675, validation accuracy:0.10616666666666667
epoch:2, train loss:0.9418058256448395, train accuracy:0.09931481481481481, validation loss:0.9373637997331774, validation accuracy:0.10616666666666667
epoch:3, train loss:0.9374411352225215, train accuracy:0.09931481481481481, validation loss:0.933817084119801, validation accuracy:0.10616666666666667
epoch:4, train loss:0.934088482859703, train accuracy:0.09931481481481481, validation loss:0.9309029582226268, validation accuracy:0.10616666666666667
epoch:5, train loss:0.9313492644644158, train accuracy:0.09931481481481481, validation loss:0.9287152358409821, validation accuracy:0.10616666666666667


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
train accuracy,▁▁▁▁▁
train loss,█▅▄▂▁
validation accuracy,▁▁▁▁▁
validation loss,█▅▄▂▁

0,1
train accuracy,0.09931
train loss,0.93135
validation accuracy,0.10617
validation loss,0.92872


[34m[1mwandb[0m: Agent Starting Run: on9ypz0o with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	number_of_hidden_layer: 5
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	size_of_hidden_layer: 128
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_initialization: Xavier


epoch:1, train loss:0.5159458703070819, train accuracy:0.6450185185185185, validation loss:0.33611388866808056, validation accuracy:0.7798333333333334
epoch:2, train loss:0.3012031885086937, train accuracy:0.7953333333333333, validation loss:0.2830355068573019, validation accuracy:0.8076666666666666
epoch:3, train loss:0.2706362352995023, train accuracy:0.8154814814814815, validation loss:0.2618668733888682, validation accuracy:0.8171666666666667
epoch:4, train loss:0.2536795628896406, train accuracy:0.8276666666666667, validation loss:0.25795959162768817, validation accuracy:0.826
epoch:5, train loss:0.24136282345206914, train accuracy:0.8347407407407408, validation loss:0.23594560292565242, validation accuracy:0.8371666666666666


0,1
train accuracy,▁▇▇██
train loss,█▃▂▁▁
validation accuracy,▁▄▆▇█
validation loss,█▄▃▃▁

0,1
train accuracy,0.83474
train loss,0.24136
validation accuracy,0.83717
validation loss,0.23595


[34m[1mwandb[0m: Agent Starting Run: f37c8qa9 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	number_of_hidden_layer: 4
[34m[1mwandb[0m: 	optimizer: nestrov
[34m[1mwandb[0m: 	size_of_hidden_layer: 128
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_initialization: random


epoch:1, train loss:1.4207176022091277, train accuracy:0.1297037037037037, validation loss:1.287153682697163, validation accuracy:0.18533333333333332
epoch:2, train loss:1.1650764699147405, train accuracy:0.22818518518518519, validation loss:1.0385819413742348, validation accuracy:0.2718333333333333
epoch:3, train loss:0.9451945051794632, train accuracy:0.31222222222222223, validation loss:0.8394308449973038, validation accuracy:0.37383333333333335
epoch:4, train loss:0.786631811012436, train accuracy:0.4036851851851852, validation loss:0.7363271603658362, validation accuracy:0.43383333333333335
epoch:5, train loss:0.7004908325251976, train accuracy:0.4653703703703704, validation loss:0.6688062113855355, validation accuracy:0.4886666666666667
epoch:6, train loss:0.6458599211090337, train accuracy:0.5092777777777778, validation loss:0.620512597479431, validation accuracy:0.5255
epoch:7, train loss:0.6074949710894907, train accuracy:0.5398888888888889, validation loss:0.5888645240452021,

0,1
train accuracy,▁▂▄▅▆▇▇▇██
train loss,█▆▄▃▂▂▂▁▁▁
validation accuracy,▁▂▄▅▆▇▇███
validation loss,█▆▄▃▂▂▂▁▁▁

0,1
train accuracy,0.59635
train loss,0.53559
validation accuracy,0.60483
validation loss,0.52503


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: pmgnsp21 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	number_of_hidden_layer: 5
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	size_of_hidden_layer: 128
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_initialization: Xavier


epoch:1, train loss:0.9001556620449559, train accuracy:0.11212962962962963, validation loss:0.8963687792543281, validation accuracy:0.10066666666666667
epoch:2, train loss:0.8156877302787642, train accuracy:0.25683333333333336, validation loss:0.7388696185589124, validation accuracy:0.32516666666666666
epoch:3, train loss:0.6835824650994705, train accuracy:0.4164074074074074, validation loss:0.6291360469140108, validation accuracy:0.4801666666666667
epoch:4, train loss:0.5911549096951942, train accuracy:0.5255185185185185, validation loss:0.5567443933355858, validation accuracy:0.5618333333333333
epoch:5, train loss:0.5387892901085874, train accuracy:0.5731666666666667, validation loss:0.5216006452751288, validation accuracy:0.5793333333333334
epoch:6, train loss:0.5089543844828519, train accuracy:0.5885370370370371, validation loss:0.5059403863303623, validation accuracy:0.5873333333333334
epoch:7, train loss:0.4852227657330948, train accuracy:0.6199259259259259, validation loss:0.475

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
train accuracy,▁▃▅▆▇▇▇███
train loss,█▇▅▄▃▂▂▂▁▁
validation accuracy,▁▄▅▆▇▇▇███
validation loss,█▆▄▃▃▃▂▂▁▁

0,1
train accuracy,0.68893
train loss,0.40819
validation accuracy,0.704
validation loss,0.39477


[34m[1mwandb[0m: Agent Starting Run: glu2zrg0 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	number_of_hidden_layer: 3
[34m[1mwandb[0m: 	optimizer: nestrov
[34m[1mwandb[0m: 	size_of_hidden_layer: 128
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_initialization: Xavier


epoch:1, train loss:0.753412376597776, train accuracy:0.46025925925925926, validation loss:0.5813479927685602, validation accuracy:0.6328333333333334
epoch:2, train loss:0.5046205182596644, train accuracy:0.6769259259259259, validation loss:0.4490814614008179, validation accuracy:0.7143333333333334
epoch:3, train loss:0.4213028355383654, train accuracy:0.736462962962963, validation loss:0.3951081930946781, validation accuracy:0.7536666666666667
epoch:4, train loss:0.3801531885771814, train accuracy:0.7616851851851851, validation loss:0.3651262114234256, validation accuracy:0.7705
epoch:5, train loss:0.35526595095494773, train accuracy:0.7755185185185185, validation loss:0.3454119786870474, validation accuracy:0.7816666666666666
epoch:6, train loss:0.337744159063919, train accuracy:0.7862962962962963, validation loss:0.330829740684988, validation accuracy:0.7903333333333333
epoch:7, train loss:0.32428932441414504, train accuracy:0.795537037037037, validation loss:0.31867996096927736, va

0,1
train accuracy,▁▅▆▇▇▇████
train loss,█▄▃▂▂▂▁▁▁▁
validation accuracy,▁▄▆▆▇▇████
validation loss,█▅▃▃▂▂▂▁▁▁

0,1
train accuracy,0.8132
train loss,0.297
validation accuracy,0.81033
validation loss,0.29538


[34m[1mwandb[0m: Agent Starting Run: tyq68grv with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	number_of_hidden_layer: 4
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	size_of_hidden_layer: 64
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_initialization: random


epoch:1, train loss:1.7003545084255205, train accuracy:0.147, validation loss:1.6888209099636249, validation accuracy:0.15233333333333332
epoch:2, train loss:1.6899219664182137, train accuracy:0.1492037037037037, validation loss:1.6953203422636898, validation accuracy:0.14516666666666667
epoch:3, train loss:1.65333401897107, train accuracy:0.16525925925925927, validation loss:1.5697311090639117, validation accuracy:0.20683333333333334
epoch:4, train loss:1.6375012022941413, train accuracy:0.17464814814814814, validation loss:1.8114631215742298, validation accuracy:0.09166666666666666
epoch:5, train loss:1.784710446412018, train accuracy:0.10727777777777778, validation loss:1.8113333333333332, validation accuracy:0.09433333333333334
epoch:6, train loss:1.7987407407407408, train accuracy:0.10062962962962962, validation loss:1.8113333333333332, validation accuracy:0.09433333333333334
epoch:7, train loss:1.7987407407407408, train accuracy:0.10062962962962962, validation loss:1.811333333333

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
train accuracy,▅▆▇█▂▁▁▁▁▁
train loss,▄▃▂▁▇█████
validation accuracy,▅▄█▁▁▁▁▁▁▁
validation loss,▄▅▁███████

0,1
train accuracy,0.10063
train loss,1.79874
validation accuracy,0.09433
validation loss,1.81133


[34m[1mwandb[0m: Agent Starting Run: r01k1vsf with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	number_of_hidden_layer: 3
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	size_of_hidden_layer: 32
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_initialization: Xavier


epoch:1, train loss:0.9454925858167769, train accuracy:0.10001851851851852, validation loss:0.9447149522541978, validation accuracy:0.09983333333333333
epoch:2, train loss:0.9453828062810413, train accuracy:0.10001851851851852, validation loss:0.9446034526343672, validation accuracy:0.09983333333333333
epoch:3, train loss:0.9452710822171505, train accuracy:0.10001851851851852, validation loss:0.9444905975017077, validation accuracy:0.09983333333333333
epoch:4, train loss:0.9451587058779405, train accuracy:0.10001851851851852, validation loss:0.9443772978362508, validation accuracy:0.09983333333333333
epoch:5, train loss:0.9450479565610986, train accuracy:0.10001851851851852, validation loss:0.9442728441197644, validation accuracy:0.09983333333333333
epoch:6, train loss:0.9449475904475865, train accuracy:0.10001851851851852, validation loss:0.9441710342124789, validation accuracy:0.09983333333333333
epoch:7, train loss:0.944839904050323, train accuracy:0.10001851851851852, validation lo

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
train accuracy,▁▁▁▁▁▁▁▁▁▁
train loss,█▇▆▆▅▄▃▂▂▁
validation accuracy,▁▁▁▁▁▁▁▁▁▁
validation loss,█▇▆▆▅▄▃▂▂▁

0,1
train accuracy,0.10002
train loss,0.94453
validation accuracy,0.09983
validation loss,0.94375


[34m[1mwandb[0m: Agent Starting Run: a1x63ibj with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	number_of_hidden_layer: 3
[34m[1mwandb[0m: 	optimizer: nestrov
[34m[1mwandb[0m: 	size_of_hidden_layer: 64
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_initialization: Xavier


epoch:1, train loss:0.9010772335227293, train accuracy:0.11848148148148148, validation loss:0.8773545330789977, validation accuracy:0.1885
epoch:2, train loss:0.8430474446523534, train accuracy:0.3207962962962963, validation loss:0.79603112503552, validation accuracy:0.41583333333333333
epoch:3, train loss:0.7365025629974195, train accuracy:0.452, validation loss:0.6708814160718618, validation accuracy:0.5168333333333334
epoch:4, train loss:0.6211398243736231, train accuracy:0.5556481481481481, validation loss:0.568134747321318, validation accuracy:0.5923333333333334
epoch:5, train loss:0.5314073475938264, train accuracy:0.6234259259259259, validation loss:0.502135666841556, validation accuracy:0.6481666666666667
epoch:6, train loss:0.4866429417044573, train accuracy:0.6477037037037037, validation loss:0.4742150082416483, validation accuracy:0.6528333333333334
epoch:7, train loss:0.4505437923547245, train accuracy:0.6619074074074074, validation loss:0.425033600048334, validation accura

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
train accuracy,▁▃▅▆▇▇▇███
train loss,█▇▆▄▃▂▂▁▁▁
validation accuracy,▁▄▅▆▇▇▇▇██
validation loss,█▇▅▄▃▂▂▂▁▁

0,1
train accuracy,0.70781
train loss,0.40418
validation accuracy,0.73567
validation loss,0.38037


[34m[1mwandb[0m: Agent Starting Run: phf87g5d with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	number_of_hidden_layer: 5
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	size_of_hidden_layer: 32
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_initialization: Xavier


epoch:1, train loss:0.9500206769338082, train accuracy:0.09972222222222223, validation loss:0.9490061879954242, validation accuracy:0.1025
epoch:2, train loss:0.9484537606827695, train accuracy:0.09972222222222223, validation loss:0.9475294577000164, validation accuracy:0.1025
epoch:3, train loss:0.9470116522862999, train accuracy:0.09972222222222223, validation loss:0.9461678026911485, validation accuracy:0.1025
epoch:4, train loss:0.9456787329186207, train accuracy:0.09972222222222223, validation loss:0.9449080870783261, validation accuracy:0.1025
epoch:5, train loss:0.9444448745657124, train accuracy:0.09972222222222223, validation loss:0.9437392108120202, validation accuracy:0.1025
epoch:6, train loss:0.9432973017233898, train accuracy:0.09972222222222223, validation loss:0.9426512413139599, validation accuracy:0.1025
epoch:7, train loss:0.9422279544948625, train accuracy:0.09972222222222223, validation loss:0.9416358245300791, validation accuracy:0.1025
epoch:8, train loss:0.94122

0,1
train accuracy,▁▁▁▁▁▁▁▁▁▁
train loss,█▇▆▅▄▄▃▂▂▁
validation accuracy,▁▁▁▁▁▁▁▁▁▁
validation loss,█▇▆▅▄▄▃▂▂▁

0,1
train accuracy,0.09972
train loss,0.93941
validation accuracy,0.1025
validation loss,0.93896


[34m[1mwandb[0m: Agent Starting Run: p9l2pnnf with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	number_of_hidden_layer: 5
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	size_of_hidden_layer: 128
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_initialization: Xavier


epoch:1, train loss:0.7101825127015605, train accuracy:0.5218518518518519, validation loss:0.4986190165962344, validation accuracy:0.6625
epoch:2, train loss:0.4302372359206809, train accuracy:0.7275, validation loss:0.38161972874774563, validation accuracy:0.77
epoch:3, train loss:0.3632520765502077, train accuracy:0.7826851851851852, validation loss:0.3479251209343482, validation accuracy:0.7896666666666666
epoch:4, train loss:0.33489786156279433, train accuracy:0.798537037037037, validation loss:0.32746837430320036, validation accuracy:0.8043333333333333
epoch:5, train loss:0.31863200348679543, train accuracy:0.8085925925925926, validation loss:0.3160587979905969, validation accuracy:0.8058333333333333


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
train accuracy,▁▆▇██
train loss,█▃▂▁▁
validation accuracy,▁▆▇██
validation loss,█▄▂▁▁

0,1
train accuracy,0.80859
train loss,0.31863
validation accuracy,0.80583
validation loss,0.31606


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: xnvj7duz with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	number_of_hidden_layer: 5
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	size_of_hidden_layer: 32
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_initialization: random


epoch:1, train loss:1.067082264258892, train accuracy:0.16974074074074075, validation loss:1.0681229313069365, validation accuracy:0.169
epoch:2, train loss:1.0667774915575727, train accuracy:0.17012962962962963, validation loss:1.0678156248311774, validation accuracy:0.1695
epoch:3, train loss:1.0664755806169157, train accuracy:0.1703888888888889, validation loss:1.0675228843601272, validation accuracy:0.17016666666666666
epoch:4, train loss:1.066171773657005, train accuracy:0.17083333333333334, validation loss:1.0672125559514916, validation accuracy:0.17066666666666666
epoch:5, train loss:1.0658676748848015, train accuracy:0.17127777777777778, validation loss:1.0669160531677102, validation accuracy:0.17083333333333334


0,1
train accuracy,▁▃▄▆█
train loss,█▆▅▃▁
validation accuracy,▁▃▅▇█
validation loss,█▆▅▃▁

0,1
train accuracy,0.17128
train loss,1.06587
validation accuracy,0.17083
validation loss,1.06692


[34m[1mwandb[0m: Agent Starting Run: 0rb1nrp6 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	number_of_hidden_layer: 5
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	size_of_hidden_layer: 32
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_initialization: Xavier


epoch:1, train loss:1.0785551692807425, train accuracy:0.09931481481481481, validation loss:1.0730255376792905, validation accuracy:0.10616666666666667
epoch:2, train loss:1.0762830067025995, train accuracy:0.09931481481481481, validation loss:1.0708300730386073, validation accuracy:0.10616666666666667
epoch:3, train loss:1.0740632443314633, train accuracy:0.09931481481481481, validation loss:1.068725868870294, validation accuracy:0.10616666666666667
epoch:4, train loss:1.0719468576862552, train accuracy:0.09931481481481481, validation loss:1.0667054939927714, validation accuracy:0.10616666666666667
epoch:5, train loss:1.0698601002297514, train accuracy:0.09931481481481481, validation loss:1.0646568875225777, validation accuracy:0.10616666666666667
epoch:6, train loss:1.0677609960417676, train accuracy:0.09931481481481481, validation loss:1.0625598084553736, validation accuracy:0.10616666666666667
epoch:7, train loss:1.0656306608999169, train accuracy:0.09931481481481481, validation lo

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
train accuracy,▁▁▁▁▁▁▁▁▁▁
train loss,█▇▆▆▅▄▃▂▂▁
validation accuracy,▁▁▁▁▁▁▁▁▁▁
validation loss,█▇▆▆▅▄▃▂▂▁

0,1
train accuracy,0.09931
train loss,1.0597
validation accuracy,0.10617
validation loss,1.0548


[34m[1mwandb[0m: Agent Starting Run: u3o2t8c6 with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	number_of_hidden_layer: 3
[34m[1mwandb[0m: 	optimizer: nestrov
[34m[1mwandb[0m: 	size_of_hidden_layer: 64
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_initialization: Xavier


epoch:1, train loss:0.9438782926700721, train accuracy:0.06872222222222223, validation loss:0.9330857088091, validation accuracy:0.0725
epoch:2, train loss:0.9240279297856174, train accuracy:0.08209259259259259, validation loss:0.9180721355244501, validation accuracy:0.09766666666666667
epoch:3, train loss:0.9115123218975024, train accuracy:0.10685185185185185, validation loss:0.9073904041556206, validation accuracy:0.12816666666666668
epoch:4, train loss:0.9020407500253761, train accuracy:0.14881481481481482, validation loss:0.8989032973998288, validation accuracy:0.17283333333333334
epoch:5, train loss:0.8942612872254999, train accuracy:0.202, validation loss:0.8917191419350122, validation accuracy:0.2315


0,1
train accuracy,▁▂▃▅█
train loss,█▅▃▂▁
validation accuracy,▁▂▃▅█
validation loss,█▅▄▂▁

0,1
train accuracy,0.202
train loss,0.89426
validation accuracy,0.2315
validation loss,0.89172


[34m[1mwandb[0m: Agent Starting Run: 65iqo1l0 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	number_of_hidden_layer: 3
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	size_of_hidden_layer: 128
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_initialization: random


epoch:1, train loss:1.4803360079207661, train accuracy:0.09733333333333333, validation loss:1.4856006163611755, validation accuracy:0.09133333333333334
epoch:2, train loss:1.4789731862837039, train accuracy:0.0972962962962963, validation loss:1.4842553620603824, validation accuracy:0.09133333333333334
epoch:3, train loss:1.4776529564614254, train accuracy:0.09731481481481481, validation loss:1.482952117171982, validation accuracy:0.09116666666666666
epoch:4, train loss:1.4763527697293746, train accuracy:0.0972962962962963, validation loss:1.4816260274796547, validation accuracy:0.09116666666666666
epoch:5, train loss:1.475072872509631, train accuracy:0.09725925925925925, validation loss:1.480324967696792, validation accuracy:0.09116666666666666


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
train accuracy,█▅▆▅▁
train loss,█▆▄▃▁
validation accuracy,██▁▁▁
validation loss,█▆▄▃▁

0,1
train accuracy,0.09726
train loss,1.47507
validation accuracy,0.09117
validation loss,1.48032


[34m[1mwandb[0m: Agent Starting Run: a9vowx10 with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	number_of_hidden_layer: 4
[34m[1mwandb[0m: 	optimizer: nestrov
[34m[1mwandb[0m: 	size_of_hidden_layer: 64
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_initialization: Xavier


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.017183333333317327, max=1.0…

epoch:1, train loss:0.9230884170540389, train accuracy:0.1371851851851852, validation loss:0.9052568363816892, validation accuracy:0.2025
epoch:2, train loss:0.8881174579051724, train accuracy:0.22753703703703704, validation loss:0.8711722706883053, validation accuracy:0.24316666666666667
epoch:3, train loss:0.8456859788171378, train accuracy:0.2775925925925926, validation loss:0.8151615714569629, validation accuracy:0.29183333333333333
epoch:4, train loss:0.772073145004808, train accuracy:0.3464074074074074, validation loss:0.7255055996999802, validation accuracy:0.4141666666666667
epoch:5, train loss:0.6880259479605024, train accuracy:0.46205555555555555, validation loss:0.6464137650847808, validation accuracy:0.5343333333333333


0,1
train accuracy,▁▃▄▆█
train loss,█▇▆▄▁
validation accuracy,▁▂▃▅█
validation loss,█▇▆▃▁

0,1
train accuracy,0.46206
train loss,0.68803
validation accuracy,0.53433
validation loss,0.64641


[34m[1mwandb[0m: Agent Starting Run: f5xxapsu with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	number_of_hidden_layer: 3
[34m[1mwandb[0m: 	optimizer: nestrov
[34m[1mwandb[0m: 	size_of_hidden_layer: 32
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_initialization: Xavier


epoch:1, train loss:0.913909662819724, train accuracy:0.1237037037037037, validation loss:0.9050764512736488, validation accuracy:0.14683333333333334
epoch:2, train loss:0.8955152464047826, train accuracy:0.18624074074074073, validation loss:0.8884967021687964, validation accuracy:0.20283333333333334
epoch:3, train loss:0.8784974351927786, train accuracy:0.2328148148148148, validation loss:0.8709965763663451, validation accuracy:0.23933333333333334
epoch:4, train loss:0.8600157130463681, train accuracy:0.25316666666666665, validation loss:0.8517653602419775, validation accuracy:0.245
epoch:5, train loss:0.8402897768533224, train accuracy:0.27316666666666667, validation loss:0.8319944472941285, validation accuracy:0.2831666666666667
epoch:6, train loss:0.8213444778675516, train accuracy:0.3177962962962963, validation loss:0.8139555296264377, validation accuracy:0.3365
epoch:7, train loss:0.8043339098406024, train accuracy:0.3646111111111111, validation loss:0.7973306743931772, validatio

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
train accuracy,▁▂▃▄▄▅▆▇▇█
train loss,█▇▆▆▅▄▃▂▂▁
validation accuracy,▁▂▃▃▄▅▆▇▇█
validation loss,█▇▆▆▅▄▃▂▂▁

0,1
train accuracy,0.46296
train loss,0.75689
validation accuracy,0.4775
validation loss,0.74933


[34m[1mwandb[0m: Agent Starting Run: ohlfvnxk with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	number_of_hidden_layer: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	size_of_hidden_layer: 64
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_initialization: Xavier


epoch:1, train loss:0.8428814886571734, train accuracy:0.27725925925925926, validation loss:0.7618239258349966, validation accuracy:0.43216666666666664
epoch:2, train loss:0.6790347705344182, train accuracy:0.5390370370370371, validation loss:0.5974969834473679, validation accuracy:0.6255
epoch:3, train loss:0.5290747954635995, train accuracy:0.6721111111111111, validation loss:0.4644276677734794, validation accuracy:0.7023333333333334
epoch:4, train loss:0.4255352762638605, train accuracy:0.7121296296296297, validation loss:0.3903976158542204, validation accuracy:0.7211666666666666
epoch:5, train loss:0.3744276682737725, train accuracy:0.7324444444444445, validation loss:0.355099415908277, validation accuracy:0.752
epoch:6, train loss:0.3484807332055412, train accuracy:0.7511851851851852, validation loss:0.3334367090832295, validation accuracy:0.7641666666666667
epoch:7, train loss:0.3294432950323225, train accuracy:0.764962962962963, validation loss:0.3160016395795758, validation acc

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
train accuracy,▁▅▆▇▇▇████
train loss,█▆▄▃▂▂▂▁▁▁
validation accuracy,▁▅▆▆▇▇████
validation loss,█▆▄▃▂▂▂▁▁▁

0,1
train accuracy,0.79543
train loss,0.288
validation accuracy,0.8
validation loss,0.27937


[34m[1mwandb[0m: Agent Starting Run: hkgj013d with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	number_of_hidden_layer: 4
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	size_of_hidden_layer: 32
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_initialization: random


epoch:1, train loss:1.2563484406757495, train accuracy:0.10407407407407407, validation loss:1.2567010215225367, validation accuracy:0.10183333333333333
epoch:2, train loss:1.255993966346995, train accuracy:0.10405555555555555, validation loss:1.2563792819140411, validation accuracy:0.102
epoch:3, train loss:1.2556455823358774, train accuracy:0.10403703703703704, validation loss:1.256030801581497, validation accuracy:0.10183333333333333
epoch:4, train loss:1.2553121489825063, train accuracy:0.10412962962962963, validation loss:1.2556888950251062, validation accuracy:0.102
epoch:5, train loss:1.2549525895485827, train accuracy:0.10427777777777777, validation loss:1.2553348363810022, validation accuracy:0.10183333333333333


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
train accuracy,▂▂▁▄█
train loss,█▆▄▃▁
validation accuracy,▁█▁█▁
validation loss,█▆▅▃▁

0,1
train accuracy,0.10428
train loss,1.25495
validation accuracy,0.10183
validation loss,1.25533


[34m[1mwandb[0m: Agent Starting Run: st7ns539 with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	number_of_hidden_layer: 4
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	size_of_hidden_layer: 32
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_initialization: random


epoch:1, train loss:1.7180416360387354, train accuracy:0.1341111111111111, validation loss:1.7096635786306291, validation accuracy:0.13466666666666666
epoch:2, train loss:1.6800760880850403, train accuracy:0.14046296296296296, validation loss:1.6818380418373424, validation accuracy:0.12433333333333334
epoch:3, train loss:1.6607984901192618, train accuracy:0.134, validation loss:1.6596585414292078, validation accuracy:0.13183333333333333
epoch:4, train loss:1.627699281824635, train accuracy:0.14787037037037037, validation loss:1.633500888318764, validation accuracy:0.14433333333333334
epoch:5, train loss:1.5942926466742082, train accuracy:0.16227777777777777, validation loss:1.591902528767765, validation accuracy:0.161


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
train accuracy,▁▃▁▄█
train loss,█▆▅▃▁
validation accuracy,▃▁▂▅█
validation loss,█▆▅▃▁

0,1
train accuracy,0.16228
train loss,1.59429
validation accuracy,0.161
validation loss,1.5919


[34m[1mwandb[0m: Agent Starting Run: omevjvgj with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	number_of_hidden_layer: 5
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	size_of_hidden_layer: 64
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_initialization: Xavier


epoch:1, train loss:0.8282329035804301, train accuracy:0.289462962962963, validation loss:0.7052175592907488, validation accuracy:0.43766666666666665
epoch:2, train loss:0.59489559072213, train accuracy:0.5883333333333334, validation loss:0.49763686628372095, validation accuracy:0.6808333333333333
epoch:3, train loss:0.45700046604276123, train accuracy:0.7046851851851852, validation loss:0.4234993646198652, validation accuracy:0.7221666666666666
epoch:4, train loss:0.40230911764114136, train accuracy:0.7358888888888889, validation loss:0.3771380512408927, validation accuracy:0.7503333333333333
epoch:5, train loss:0.35672265128865444, train accuracy:0.7550370370370371, validation loss:0.34047777169437615, validation accuracy:0.7683333333333333


VBox(children=(Label(value='0.001 MB of 0.024 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.045355…

0,1
train accuracy,▁▅▇██
train loss,█▅▂▂▁
validation accuracy,▁▆▇██
validation loss,█▄▃▂▁

0,1
train accuracy,0.75504
train loss,0.35672
validation accuracy,0.76833
validation loss,0.34048


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: hjz1dcw0 with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	number_of_hidden_layer: 4
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	size_of_hidden_layer: 128
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_initialization: Xavier


epoch:1, train loss:0.9383944139610346, train accuracy:0.10092592592592593, validation loss:0.9366195822392638, validation accuracy:0.1065
epoch:2, train loss:0.9351260200042556, train accuracy:0.10481481481481482, validation loss:0.9335008590423941, validation accuracy:0.11316666666666667
epoch:3, train loss:0.932122876511615, train accuracy:0.109, validation loss:0.930609628366728, validation accuracy:0.1175
epoch:4, train loss:0.9293140358462822, train accuracy:0.1145, validation loss:0.927880538386144, validation accuracy:0.12283333333333334
epoch:5, train loss:0.9266498342383898, train accuracy:0.12081481481481482, validation loss:0.9252757322299717, validation accuracy:0.12866666666666668
epoch:6, train loss:0.9240999857368336, train accuracy:0.1272962962962963, validation loss:0.9227638942390454, validation accuracy:0.13233333333333333
epoch:7, train loss:0.9216325398610521, train accuracy:0.1328888888888889, validation loss:0.9203194648812508, validation accuracy:0.1375
epoch:8

VBox(children=(Label(value='0.001 MB of 0.024 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.044608…

0,1
train accuracy,▁▂▂▃▄▅▆▇▇█
train loss,█▇▆▅▅▄▃▂▂▁
validation accuracy,▁▂▃▃▄▅▅▆▇█
validation loss,█▇▆▅▅▄▃▂▂▁

0,1
train accuracy,0.14861
train loss,0.9145
validation accuracy,0.1575
validation loss,0.91319


[34m[1mwandb[0m: Agent Starting Run: i7tuefan with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	number_of_hidden_layer: 3
[34m[1mwandb[0m: 	optimizer: nestrov
[34m[1mwandb[0m: 	size_of_hidden_layer: 128
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_initialization: random


epoch:1, train loss:1.8005017150133795, train accuracy:0.09946296296296296, validation loss:1.8152822978673746, validation accuracy:0.09183333333333334
epoch:2, train loss:1.7778713293599306, train accuracy:0.10703703703703704, validation loss:1.7599550950609033, validation accuracy:0.11416666666666667
epoch:3, train loss:1.6890953979894956, train accuracy:0.1496111111111111, validation loss:1.6841889606114677, validation accuracy:0.15333333333333332
epoch:4, train loss:1.7319671207773242, train accuracy:0.13290740740740742, validation loss:1.6491064962880229, validation accuracy:0.17566666666666667
epoch:5, train loss:1.7940001180682472, train accuracy:0.10348148148148148, validation loss:1.8106673519445697, validation accuracy:0.09533333333333334
epoch:6, train loss:1.803515045351411, train accuracy:0.09888888888888889, validation loss:1.8060506360699582, validation accuracy:0.09766666666666667
epoch:7, train loss:1.8023682095328963, train accuracy:0.0995, validation loss:1.800081783

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
train accuracy,▁▂█▆▂▁▁▁▁▁
train loss,█▆▁▄▇█████
validation accuracy,▁▃▆█▁▁▂▂▂▂
validation loss,█▆▂▁██▇▇▇▇

0,1
train accuracy,0.09987
train loss,1.80167
validation accuracy,0.10067
validation loss,1.80008


[34m[1mwandb[0m: Agent Starting Run: 0cmku8ma with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	number_of_hidden_layer: 4
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	size_of_hidden_layer: 32
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_initialization: random


epoch:1, train loss:0.9652608844554927, train accuracy:0.16603703703703704, validation loss:0.8651678311514138, validation accuracy:0.26566666666666666
epoch:2, train loss:0.8242133309784114, train accuracy:0.41174074074074074, validation loss:0.7874197588414082, validation accuracy:0.4593333333333333
epoch:3, train loss:0.7579535999091224, train accuracy:0.45831481481481484, validation loss:0.7306850487580601, validation accuracy:0.473
epoch:4, train loss:0.7091984839772336, train accuracy:0.47738888888888886, validation loss:0.6882512350781806, validation accuracy:0.5023333333333333
epoch:5, train loss:0.6714481689571351, train accuracy:0.5058888888888889, validation loss:0.6536471381786143, validation accuracy:0.5418333333333333


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
train accuracy,▁▆▇▇█
train loss,█▅▃▂▁
validation accuracy,▁▆▆▇█
validation loss,█▅▄▂▁

0,1
train accuracy,0.50589
train loss,0.67145
validation accuracy,0.54183
validation loss,0.65365


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 2c9l1a3r with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	number_of_hidden_layer: 4
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	size_of_hidden_layer: 128
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_initialization: Xavier


epoch:1, train loss:0.757533401945344, train accuracy:0.4081666666666667, validation loss:0.6064249774069325, validation accuracy:0.6066666666666667
epoch:2, train loss:0.5267770999798596, train accuracy:0.671, validation loss:0.46445999672279153, validation accuracy:0.7085
epoch:3, train loss:0.4289852263775771, train accuracy:0.7230740740740741, validation loss:0.39796747093319074, validation accuracy:0.744
epoch:4, train loss:0.37837971604288434, train accuracy:0.7507777777777778, validation loss:0.3598400647471303, validation accuracy:0.7636666666666667
epoch:5, train loss:0.347153388769563, train accuracy:0.7696851851851851, validation loss:0.3360641798913351, validation accuracy:0.7771666666666667


0,1
train accuracy,▁▆▇██
train loss,█▄▂▂▁
validation accuracy,▁▅▇▇█
validation loss,█▄▃▂▁

0,1
train accuracy,0.76969
train loss,0.34715
validation accuracy,0.77717
validation loss,0.33606


[34m[1mwandb[0m: Agent Starting Run: yfwh5y41 with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	number_of_hidden_layer: 4
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	size_of_hidden_layer: 64
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_initialization: Xavier


epoch:1, train loss:0.43152768459922974, train accuracy:0.6961296296296297, validation loss:0.2954094881657837, validation accuracy:0.8041666666666667
epoch:2, train loss:0.2783766625249043, train accuracy:0.8188888888888889, validation loss:0.2632175524626577, validation accuracy:0.8221666666666667
epoch:3, train loss:0.2550457888400874, train accuracy:0.835, validation loss:0.24857115956863585, validation accuracy:0.8336666666666667
epoch:4, train loss:0.24294348199145455, train accuracy:0.8435, validation loss:0.2473041826744234, validation accuracy:0.8401666666666666
epoch:5, train loss:0.23504339835745477, train accuracy:0.8489074074074074, validation loss:0.2312238207560365, validation accuracy:0.8511666666666666
epoch:6, train loss:0.22929721060737568, train accuracy:0.8537037037037037, validation loss:0.22880314163208937, validation accuracy:0.8516666666666667
epoch:7, train loss:0.22673660615288363, train accuracy:0.8555925925925926, validation loss:0.22429437259914678, valida

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
train accuracy,▆▇██████▇▁
train loss,▂▁▁▁▁▁▁▁▂█
validation accuracy,████████▇▁
validation loss,▁▁▁▁▁▁▁▁▂█

0,1
train accuracy,0.34781
train loss,1.31292
validation accuracy,0.10617
validation loss,1.85049


[34m[1mwandb[0m: Agent Starting Run: qdkvczcd with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	number_of_hidden_layer: 3
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	size_of_hidden_layer: 32
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_initialization: Xavier


epoch:1, train loss:0.915716567239909, train accuracy:0.1187037037037037, validation loss:0.9081993970552628, validation accuracy:0.15383333333333332
epoch:2, train loss:0.903569268051562, train accuracy:0.2329074074074074, validation loss:0.8993462546282665, validation accuracy:0.26033333333333336
epoch:3, train loss:0.8957963178932045, train accuracy:0.25483333333333336, validation loss:0.8922490490120452, validation accuracy:0.23116666666666666
epoch:4, train loss:0.8889203954840108, train accuracy:0.22824074074074074, validation loss:0.8853298938779924, validation accuracy:0.23116666666666666
epoch:5, train loss:0.8818020730381896, train accuracy:0.22537037037037036, validation loss:0.8777758926081216, validation accuracy:0.215


0,1
train accuracy,▁▇█▇▆
train loss,█▅▄▂▁
validation accuracy,▁█▆▆▅
validation loss,█▆▄▃▁

0,1
train accuracy,0.22537
train loss,0.8818
validation accuracy,0.215
validation loss,0.87778


[34m[1mwandb[0m: Agent Starting Run: 5iq8h36n with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	number_of_hidden_layer: 5
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	size_of_hidden_layer: 64
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_initialization: Xavier


epoch:1, train loss:0.9344730043741791, train accuracy:0.11203703703703703, validation loss:0.9327545996061992, validation accuracy:0.11783333333333333
epoch:2, train loss:0.9296606039994539, train accuracy:0.12503703703703703, validation loss:0.9283223411746765, validation accuracy:0.13783333333333334
epoch:3, train loss:0.9256161221768098, train accuracy:0.14253703703703705, validation loss:0.9245520984408635, validation accuracy:0.154
epoch:4, train loss:0.9220738727233221, train accuracy:0.16242592592592592, validation loss:0.9211860076470345, validation accuracy:0.17066666666666666
epoch:5, train loss:0.9187713843890941, train accuracy:0.18005555555555555, validation loss:0.9180517203977694, validation accuracy:0.18166666666666667


VBox(children=(Label(value='0.001 MB of 0.024 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.045857…

0,1
train accuracy,▁▂▄▆█
train loss,█▆▄▂▁
validation accuracy,▁▃▅▇█
validation loss,█▆▄▂▁

0,1
train accuracy,0.18006
train loss,0.91877
validation accuracy,0.18167
validation loss,0.91805


[34m[1mwandb[0m: Agent Starting Run: bo0g7xsp with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	number_of_hidden_layer: 4
[34m[1mwandb[0m: 	optimizer: nestrov
[34m[1mwandb[0m: 	size_of_hidden_layer: 32
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_initialization: Xavier


epoch:1, train loss:0.8863970937519049, train accuracy:0.18303703703703703, validation loss:0.8514558457152661, validation accuracy:0.2901666666666667
epoch:2, train loss:0.8130879455692251, train accuracy:0.3409074074074074, validation loss:0.7745813355858824, validation accuracy:0.4073333333333333
epoch:3, train loss:0.7412154579551296, train accuracy:0.4204074074074074, validation loss:0.7075303225409562, validation accuracy:0.441
epoch:4, train loss:0.6816170083898392, train accuracy:0.44146296296296295, validation loss:0.6538955763383661, validation accuracy:0.4593333333333333
epoch:5, train loss:0.6332245780329581, train accuracy:0.4732037037037037, validation loss:0.6097845399111692, validation accuracy:0.5011666666666666
epoch:6, train loss:0.5906732319762709, train accuracy:0.5478518518518518, validation loss:0.5691626719779415, validation accuracy:0.5908333333333333
epoch:7, train loss:0.5514160319452409, train accuracy:0.6243518518518518, validation loss:0.5311926666707154, 

0,1
train accuracy,▁▃▄▅▅▆▇███
train loss,█▇▆▅▄▃▃▂▁▁
validation accuracy,▁▃▄▄▅▆▇███
validation loss,█▇▆▅▄▃▂▂▁▁

0,1
train accuracy,0.69131
train loss,0.45886
validation accuracy,0.69467
validation loss,0.44623


[34m[1mwandb[0m: Agent Starting Run: s4bm6g3m with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	number_of_hidden_layer: 4
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	size_of_hidden_layer: 64
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_initialization: Xavier


epoch:1, train loss:0.9381441920529202, train accuracy:0.09983333333333333, validation loss:0.925332535345308, validation accuracy:0.11466666666666667
epoch:2, train loss:0.9214204213486633, train accuracy:0.09933333333333333, validation loss:0.918870830623433, validation accuracy:0.1245
epoch:3, train loss:0.9174911936636226, train accuracy:0.10729629629629629, validation loss:0.9164201491858032, validation accuracy:0.066
epoch:4, train loss:0.9154074627266029, train accuracy:0.09433333333333334, validation loss:0.9146536097605517, validation accuracy:0.10533333333333333
epoch:5, train loss:0.9141350201688664, train accuracy:0.09894444444444445, validation loss:0.9138378527488291, validation accuracy:0.1285
epoch:6, train loss:0.9137020921041715, train accuracy:0.10679629629629629, validation loss:0.9136460392148228, validation accuracy:0.087
epoch:7, train loss:0.913617528804399, train accuracy:0.0917037037037037, validation loss:0.9136034720643536, validation accuracy:0.099
epoch:8,

0,1
train accuracy,▃▃▅▂▃▅▁▅█▇
train loss,█▃▂▂▁▁▁▁▁▁
validation accuracy,▆█▁▅█▃▅▃▅▅
validation loss,█▄▃▂▁▁▁▁▁▁

0,1
train accuracy,0.11394
train loss,0.91358
validation accuracy,0.1025
validation loss,0.91359


[34m[1mwandb[0m: Agent Starting Run: 5vzxba2y with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	number_of_hidden_layer: 5
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	size_of_hidden_layer: 64
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_initialization: Xavier


epoch:1, train loss:0.9556212447158853, train accuracy:0.0999074074074074, validation loss:0.919253602243069, validation accuracy:0.10083333333333333
epoch:2, train loss:0.9074262239407791, train accuracy:0.09994444444444445, validation loss:0.9021514434357761, validation accuracy:0.10083333333333333
epoch:3, train loss:0.9006748576199195, train accuracy:0.09983333333333333, validation loss:0.900057435387646, validation accuracy:0.10083333333333333
epoch:4, train loss:0.8999005693834647, train accuracy:0.10298148148148148, validation loss:0.8998067836358842, validation accuracy:0.10083333333333333
epoch:5, train loss:0.8997778048163892, train accuracy:0.10781481481481482, validation loss:0.8997490132497786, validation accuracy:0.09433333333333334
epoch:6, train loss:0.8996713045274713, train accuracy:0.11912962962962963, validation loss:0.8995849759694375, validation accuracy:0.14383333333333334
epoch:7, train loss:0.8994998190782509, train accuracy:0.16585185185185186, validation loss

0,1
train accuracy,▁▁▁▁▁▂▄▄▆█
train loss,█▂▁▁▁▁▁▁▁▁
validation accuracy,▁▁▁▁▁▃█▆▅▆
validation loss,█▃▂▂▂▂▂▂▁▁

0,1
train accuracy,0.23237
train loss,0.89775
validation accuracy,0.23467
validation loss,0.89691


[34m[1mwandb[0m: Agent Starting Run: 7mlz96zd with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	number_of_hidden_layer: 3
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	size_of_hidden_layer: 128
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_initialization: Xavier


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016933333333266395, max=1.0…

epoch:1, train loss:0.9591776477662097, train accuracy:0.07861111111111112, validation loss:0.960587079164963, validation accuracy:0.07733333333333334
epoch:2, train loss:0.9590455023734225, train accuracy:0.0787037037037037, validation loss:0.9604582971685328, validation accuracy:0.07783333333333334
epoch:3, train loss:0.9589173579424985, train accuracy:0.07875925925925926, validation loss:0.9603306264665431, validation accuracy:0.07783333333333334
epoch:4, train loss:0.9587821050384556, train accuracy:0.0787962962962963, validation loss:0.9601870657353986, validation accuracy:0.07816666666666666
epoch:5, train loss:0.9586443406642625, train accuracy:0.079, validation loss:0.9600525772597626, validation accuracy:0.078
epoch:6, train loss:0.9585081017829511, train accuracy:0.07914814814814815, validation loss:0.9599153988872061, validation accuracy:0.07833333333333334
epoch:7, train loss:0.9583771304973694, train accuracy:0.07927777777777778, validation loss:0.9597824659414383, validat

0,1
train accuracy,▁▂▂▂▃▄▅▆▇█
train loss,█▇▆▆▅▄▃▃▂▁
validation accuracy,▁▃▃▅▄▆▆███
validation loss,█▇▇▆▅▄▃▃▂▁

0,1
train accuracy,0.07987
train loss,0.95797
validation accuracy,0.07883
validation loss,0.95937


[34m[1mwandb[0m: Agent Starting Run: 9x3piq6d with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	number_of_hidden_layer: 5
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	size_of_hidden_layer: 32
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_initialization: random


epoch:1, train loss:2.1018269630849153, train accuracy:0.10064814814814815, validation loss:2.116061751210596, validation accuracy:0.094
epoch:2, train loss:2.1017851288412235, train accuracy:0.10064814814814815, validation loss:2.1156252893693175, validation accuracy:0.09416666666666666
epoch:3, train loss:2.1016718003112747, train accuracy:0.1007037037037037, validation loss:2.115452306408458, validation accuracy:0.09416666666666666
epoch:4, train loss:2.1016349857812924, train accuracy:0.1007037037037037, validation loss:2.1151227633902474, validation accuracy:0.094
epoch:5, train loss:2.1015676724708916, train accuracy:0.10083333333333333, validation loss:2.11493160246131, validation accuracy:0.094
epoch:6, train loss:2.101406255964885, train accuracy:0.10081481481481482, validation loss:2.114752127722278, validation accuracy:0.09366666666666666
epoch:7, train loss:2.101473549933669, train accuracy:0.10068518518518518, validation loss:2.1142454896183125, validation accuracy:0.09416

0,1
train accuracy,▁▁▃▃█▇▂▂▁▂
train loss,█▇▅▅▄▁▂▁▁▂
validation accuracy,▆██▆▆▁███▃
validation loss,█▆▆▅▄▃▂▁▂▃

0,1
train accuracy,0.10069
train loss,2.10149
validation accuracy,0.09383
validation loss,2.11448


[34m[1mwandb[0m: Agent Starting Run: h333aog9 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	number_of_hidden_layer: 5
[34m[1mwandb[0m: 	optimizer: nestrov
[34m[1mwandb[0m: 	size_of_hidden_layer: 128
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_initialization: random


epoch:1, train loss:1.0531902509089761, train accuracy:0.1050925925925926, validation loss:0.9887202478909527, validation accuracy:0.1255
epoch:2, train loss:0.9762998137026091, train accuracy:0.1275925925925926, validation loss:0.9626010240391708, validation accuracy:0.13416666666666666
epoch:3, train loss:0.9608224555998964, train accuracy:0.13125925925925927, validation loss:0.9535364029117571, validation accuracy:0.13666666666666666
epoch:4, train loss:0.9534954935817129, train accuracy:0.13335185185185186, validation loss:0.9479208453422557, validation accuracy:0.13633333333333333
epoch:5, train loss:0.9482599558233789, train accuracy:0.1356111111111111, validation loss:0.9434859769317719, validation accuracy:0.138
epoch:6, train loss:0.9439357289543939, train accuracy:0.13733333333333334, validation loss:0.9396754492557473, validation accuracy:0.13916666666666666
epoch:7, train loss:0.9401454895005618, train accuracy:0.1383888888888889, validation loss:0.9362862536563463, validat

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
train accuracy,▁▆▆▇▇█████
train loss,█▄▃▂▂▂▂▁▁▁
validation accuracy,▁▅▆▆▇▇▇███
validation loss,█▅▄▃▃▂▂▂▁▁

0,1
train accuracy,0.13881
train loss,0.93078
validation accuracy,0.14033
validation loss,0.92768


[34m[1mwandb[0m: Agent Starting Run: x94kbnyl with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	number_of_hidden_layer: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	size_of_hidden_layer: 64
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_initialization: random


epoch:1, train loss:2.466659411051028, train accuracy:0.06074074074074074, validation loss:2.3998387711127585, validation accuracy:0.09283333333333334
epoch:2, train loss:2.350848844147719, train accuracy:0.11716666666666667, validation loss:2.291289617360402, validation accuracy:0.1475
epoch:3, train loss:2.2610720432983373, train accuracy:0.16124074074074074, validation loss:2.227250541758126, validation accuracy:0.178
epoch:4, train loss:2.2050874768369946, train accuracy:0.1877962962962963, validation loss:2.1753685516236945, validation accuracy:0.20433333333333334
epoch:5, train loss:2.1544084823020824, train accuracy:0.2132962962962963, validation loss:2.1331221012925563, validation accuracy:0.22466666666666665
epoch:6, train loss:2.12280145717732, train accuracy:0.23064814814814816, validation loss:2.1068552093685486, validation accuracy:0.23916666666666667
epoch:7, train loss:2.0939203716612482, train accuracy:0.24551851851851852, validation loss:2.081636850543228, validation a

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
train accuracy,▁▃▄▅▅▆▆▇██
train loss,█▆▅▄▃▃▃▂▁▁
validation accuracy,▁▃▄▅▅▆▆▇██
validation loss,█▆▅▄▃▃▃▂▁▁

0,1
train accuracy,0.30017
train loss,1.98573
validation accuracy,0.2995
validation loss,1.98626


[34m[1mwandb[0m: Agent Starting Run: vd5wolgu with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	number_of_hidden_layer: 5
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	size_of_hidden_layer: 32
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_initialization: Xavier


epoch:1, train loss:0.9664414906098919, train accuracy:0.09951851851851852, validation loss:0.9676592499601642, validation accuracy:0.10033333333333333
epoch:2, train loss:0.9661340073730609, train accuracy:0.0995, validation loss:0.9673653582575086, validation accuracy:0.10033333333333333
epoch:3, train loss:0.9658255182262397, train accuracy:0.0995, validation loss:0.9670475591787762, validation accuracy:0.10033333333333333
epoch:4, train loss:0.9655143057561653, train accuracy:0.0995, validation loss:0.9667571858913487, validation accuracy:0.10033333333333333
epoch:5, train loss:0.9652186042329411, train accuracy:0.0995, validation loss:0.9664463479493146, validation accuracy:0.10033333333333333
epoch:6, train loss:0.9649176852960483, train accuracy:0.09946296296296296, validation loss:0.9661695926482172, validation accuracy:0.10033333333333333
epoch:7, train loss:0.9646243125501448, train accuracy:0.09946296296296296, validation loss:0.9658665832898916, validation accuracy:0.100333

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
train accuracy,█▇▇▇▇▅▅▄▂▁
train loss,█▇▆▆▅▄▃▃▂▁
validation accuracy,▁▁▁▁▁▁▁▁▁▁
validation loss,█▇▆▆▅▄▃▃▂▁

0,1
train accuracy,0.09939
train loss,0.96373
validation accuracy,0.10033
validation loss,0.96499


[34m[1mwandb[0m: Agent Starting Run: nlganwbn with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	number_of_hidden_layer: 5
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	size_of_hidden_layer: 32
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_initialization: random


epoch:1, train loss:1.3244901456581717, train accuracy:0.09972222222222223, validation loss:1.3075803685296132, validation accuracy:0.1025
epoch:2, train loss:1.3001039352947488, train accuracy:0.09972222222222223, validation loss:1.2881403547608878, validation accuracy:0.1025
epoch:3, train loss:1.2839728951630203, train accuracy:0.09972222222222223, validation loss:1.2745208009883482, validation accuracy:0.1025
epoch:4, train loss:1.271982026408203, train accuracy:0.09972222222222223, validation loss:1.263881303950304, validation accuracy:0.1025
epoch:5, train loss:1.2620386588898518, train accuracy:0.09972222222222223, validation loss:1.2547121287816414, validation accuracy:0.1025


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
train accuracy,▁▁▁▁▁
train loss,█▅▃▂▁
validation accuracy,▁▁▁▁▁
validation loss,█▅▄▂▁

0,1
train accuracy,0.09972
train loss,1.26204
validation accuracy,0.1025
validation loss,1.25471


[34m[1mwandb[0m: Agent Starting Run: fzfqjnkx with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	number_of_hidden_layer: 3
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	size_of_hidden_layer: 64
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_initialization: Xavier


epoch:1, train loss:0.8964135156170683, train accuracy:0.15787037037037038, validation loss:0.8583137380866599, validation accuracy:0.3115
epoch:2, train loss:0.8293168572837368, train accuracy:0.3789814814814815, validation loss:0.799635802603253, validation accuracy:0.43
epoch:3, train loss:0.7761920545181914, train accuracy:0.454, validation loss:0.7501706869131565, validation accuracy:0.4891666666666667
epoch:4, train loss:0.7315998921450575, train accuracy:0.5080925925925925, validation loss:0.7088753689371169, validation accuracy:0.5408333333333334
epoch:5, train loss:0.6949155803728697, train accuracy:0.5462777777777778, validation loss:0.6746881908147462, validation accuracy:0.5708333333333333
epoch:6, train loss:0.663968552990659, train accuracy:0.5699074074074074, validation loss:0.6459110624260694, validation accuracy:0.5853333333333334
epoch:7, train loss:0.6372715181647326, train accuracy:0.584462962962963, validation loss:0.6207930843662064, validation accuracy:0.59783333

0,1
train accuracy,▁▄▅▆▇▇▇▇██
train loss,█▇▅▄▄▃▂▂▁▁
validation accuracy,▁▃▅▆▆▇▇▇██
validation loss,█▇▅▄▄▃▂▂▁▁

0,1
train accuracy,0.64443
train loss,0.57201
validation accuracy,0.65417
validation loss,0.55984


[34m[1mwandb[0m: Agent Starting Run: mjrxxl96 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	number_of_hidden_layer: 3
[34m[1mwandb[0m: 	optimizer: nestrov
[34m[1mwandb[0m: 	size_of_hidden_layer: 64
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_initialization: random


epoch:1, train loss:1.201726351070633, train accuracy:0.10590740740740741, validation loss:1.1032298228855253, validation accuracy:0.1105
epoch:2, train loss:1.0562340354455877, train accuracy:0.11137037037037037, validation loss:1.0205087640857051, validation accuracy:0.1125
epoch:3, train loss:1.0047972743877975, train accuracy:0.10887037037037037, validation loss:0.9883754901259173, validation accuracy:0.10833333333333334
epoch:4, train loss:0.9805828391736096, train accuracy:0.10412962962962963, validation loss:0.9700273306176403, validation accuracy:0.10533333333333333
epoch:5, train loss:0.9656061975499625, train accuracy:0.10162962962962963, validation loss:0.9581251412649133, validation accuracy:0.1065


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
train accuracy,▄█▆▃▁
train loss,█▄▂▁▁
validation accuracy,▆█▄▁▂
validation loss,█▄▂▂▁

0,1
train accuracy,0.10163
train loss,0.96561
validation accuracy,0.1065
validation loss,0.95813


[34m[1mwandb[0m: Agent Starting Run: icyfv912 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	number_of_hidden_layer: 5
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	size_of_hidden_layer: 64
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_initialization: Xavier


epoch:1, train loss:0.8875770599580602, train accuracy:0.17407407407407408, validation loss:0.8605249137004289, validation accuracy:0.26816666666666666
epoch:2, train loss:0.8378007463602306, train accuracy:0.33924074074074073, validation loss:0.8152180695629128, validation accuracy:0.3893333333333333
epoch:3, train loss:0.7960608580298575, train accuracy:0.4121666666666667, validation loss:0.7775830393268517, validation accuracy:0.43483333333333335
epoch:4, train loss:0.7625562644106312, train accuracy:0.44155555555555553, validation loss:0.7471651153216304, validation accuracy:0.46
epoch:5, train loss:0.734585090789797, train accuracy:0.46655555555555556, validation loss:0.7203392902231563, validation accuracy:0.48783333333333334
epoch:6, train loss:0.7087609549577794, train accuracy:0.49807407407407406, validation loss:0.694598810985119, validation accuracy:0.522
epoch:7, train loss:0.6833575130655705, train accuracy:0.5374629629629629, validation loss:0.6688796808620513, validation

0,1
train accuracy,▁▄▅▅▆▆▇▇██
train loss,█▇▆▅▄▄▃▂▂▁
validation accuracy,▁▃▄▅▅▆▆▇██
validation loss,█▇▆▅▄▄▃▂▂▁

0,1
train accuracy,0.62652
train loss,0.60854
validation accuracy,0.645
validation loss,0.59325


[34m[1mwandb[0m: Agent Starting Run: wmbe4gpm with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	number_of_hidden_layer: 3
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	size_of_hidden_layer: 128
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_initialization: Xavier


epoch:1, train loss:0.7076136981770437, train accuracy:0.4592037037037037, validation loss:0.5019778693517948, validation accuracy:0.6663333333333333
epoch:2, train loss:0.4303984739458596, train accuracy:0.6973518518518519, validation loss:0.37827368420855045, validation accuracy:0.7463333333333333
epoch:3, train loss:0.34805569704745803, train accuracy:0.7749814814814815, validation loss:0.32916917034130705, validation accuracy:0.7846666666666666
epoch:4, train loss:0.3116650303964427, train accuracy:0.7972962962962963, validation loss:0.30115904366920865, validation accuracy:0.7985
epoch:5, train loss:0.29073450129536904, train accuracy:0.8092962962962963, validation loss:0.28398362222683793, validation accuracy:0.809


0,1
train accuracy,▁▆▇██
train loss,█▃▂▁▁
validation accuracy,▁▅▇▇█
validation loss,█▄▂▂▁

0,1
train accuracy,0.8093
train loss,0.29073
validation accuracy,0.809
validation loss,0.28398


[34m[1mwandb[0m: Agent Starting Run: 50alr74i with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	number_of_hidden_layer: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	size_of_hidden_layer: 32
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_initialization: Xavier


epoch:1, train loss:0.49151455122847687, train accuracy:0.6755185185185185, validation loss:0.3233167578509746, validation accuracy:0.7963333333333333
epoch:2, train loss:0.2867890061340037, train accuracy:0.8098888888888889, validation loss:0.26600495804202323, validation accuracy:0.8173333333333334
epoch:3, train loss:0.25059624965335375, train accuracy:0.8275740740740741, validation loss:0.2435280699187844, validation accuracy:0.8285
epoch:4, train loss:0.23373415328553582, train accuracy:0.8366296296296296, validation loss:0.23388960575199094, validation accuracy:0.8388333333333333
epoch:5, train loss:0.22278619093280558, train accuracy:0.8444074074074074, validation loss:0.22808010619656371, validation accuracy:0.8381666666666666


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
train accuracy,▁▇▇██
train loss,█▃▂▁▁
validation accuracy,▁▄▆██
validation loss,█▄▂▁▁

0,1
train accuracy,0.84441
train loss,0.22279
validation accuracy,0.83817
validation loss,0.22808


[34m[1mwandb[0m: Agent Starting Run: q4eafvlp with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	number_of_hidden_layer: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	size_of_hidden_layer: 32
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_initialization: Xavier


epoch:1, train loss:0.8673434015055355, train accuracy:0.3079074074074074, validation loss:0.8205074557630502, validation accuracy:0.37616666666666665
epoch:2, train loss:0.775455375918288, train accuracy:0.44022222222222224, validation loss:0.7314069230401749, validation accuracy:0.482
epoch:3, train loss:0.6862245688256035, train accuracy:0.5213888888888889, validation loss:0.6426344415975352, validation accuracy:0.5911666666666666
epoch:4, train loss:0.6024737317720913, train accuracy:0.6036481481481482, validation loss:0.5623200181579452, validation accuracy:0.6236666666666667
epoch:5, train loss:0.5325731716982278, train accuracy:0.6229814814814815, validation loss:0.5026162371304099, validation accuracy:0.6333333333333333
epoch:6, train loss:0.48575350415800844, train accuracy:0.6388703703703704, validation loss:0.4633844907789131, validation accuracy:0.6586666666666666
epoch:7, train loss:0.45276578851667315, train accuracy:0.6761851851851852, validation loss:0.43356102259134066

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
train accuracy,▁▃▄▆▆▆▇▇██
train loss,█▇▅▄▃▃▂▂▁▁
validation accuracy,▁▃▅▅▆▆▇███
validation loss,█▇▅▄▃▃▂▂▁▁

0,1
train accuracy,0.76063
train loss,0.3692
validation accuracy,0.76683
validation loss,0.35359


[34m[1mwandb[0m: Agent Starting Run: hr6zj144 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	number_of_hidden_layer: 5
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	size_of_hidden_layer: 128
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_initialization: random


epoch:1, train loss:1.2131690017410652, train accuracy:0.2047037037037037, validation loss:0.8620803160748037, validation accuracy:0.3473333333333333
epoch:2, train loss:0.704555018294119, train accuracy:0.4594259259259259, validation loss:0.5872411286459807, validation accuracy:0.547
epoch:3, train loss:0.526850196517663, train accuracy:0.6022222222222222, validation loss:0.48279572814209765, validation accuracy:0.641
epoch:4, train loss:0.458911383954518, train accuracy:0.6617407407407407, validation loss:0.43538149441964225, validation accuracy:0.6835
epoch:5, train loss:0.41605024463292933, train accuracy:0.6985740740740741, validation loss:0.40696417750561514, validation accuracy:0.7053333333333334
epoch:6, train loss:0.3892137606114928, train accuracy:0.7215925925925926, validation loss:0.3877412504919275, validation accuracy:0.7238333333333333
epoch:7, train loss:0.36840197120631296, train accuracy:0.7381296296296296, validation loss:0.36778076852434344, validation accuracy:0.73

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
train accuracy,▁▄▆▇▇▇████
train loss,█▄▃▂▂▁▁▁▁▁
validation accuracy,▁▄▆▇▇▇████
validation loss,█▄▃▂▂▂▁▁▁▁

0,1
train accuracy,0.77157
train loss,0.32734
validation accuracy,0.75883
validation loss,0.34513


[34m[1mwandb[0m: Agent Starting Run: m0qttrdc with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	number_of_hidden_layer: 5
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	size_of_hidden_layer: 32
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_initialization: Xavier


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016916666666656966, max=1.0…

epoch:1, train loss:0.9344126353485402, train accuracy:0.10062962962962962, validation loss:0.9348005948612634, validation accuracy:0.09433333333333334
epoch:2, train loss:0.930892549847637, train accuracy:0.10064814814814815, validation loss:0.9314664391982679, validation accuracy:0.09433333333333334
epoch:3, train loss:0.9278984110736584, train accuracy:0.10066666666666667, validation loss:0.9285991707459967, validation accuracy:0.09433333333333334
epoch:4, train loss:0.9253046078616479, train accuracy:0.10066666666666667, validation loss:0.9260842723120647, validation accuracy:0.09466666666666666
epoch:5, train loss:0.923018286444854, train accuracy:0.10072222222222223, validation loss:0.9238517433338548, validation accuracy:0.09466666666666666


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
train accuracy,▁▂▄▄█
train loss,█▆▄▂▁
validation accuracy,▁▁▁██
validation loss,█▆▄▂▁

0,1
train accuracy,0.10072
train loss,0.92302
validation accuracy,0.09467
validation loss,0.92385


[34m[1mwandb[0m: Agent Starting Run: q8842t12 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	number_of_hidden_layer: 4
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	size_of_hidden_layer: 64
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_initialization: random


epoch:1, train loss:0.8937469572730351, train accuracy:0.339, validation loss:0.6131553606254638, validation accuracy:0.5465
epoch:2, train loss:0.5438910711894785, train accuracy:0.5962222222222222, validation loss:0.4915806775337961, validation accuracy:0.6371666666666667
epoch:3, train loss:0.4558755919436141, train accuracy:0.6692962962962963, validation loss:0.4395431814968683, validation accuracy:0.6771666666666667
epoch:4, train loss:0.4119746968375407, train accuracy:0.7037037037037037, validation loss:0.40651594819781867, validation accuracy:0.7075
epoch:5, train loss:0.38384774713061753, train accuracy:0.7267037037037037, validation loss:0.38799062000409557, validation accuracy:0.7158333333333333


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
train accuracy,▁▆▇██
train loss,█▃▂▁▁
validation accuracy,▁▅▆██
validation loss,█▄▃▂▁

0,1
train accuracy,0.7267
train loss,0.38385
validation accuracy,0.71583
validation loss,0.38799


[34m[1mwandb[0m: Agent Starting Run: q2qyr9bj with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	number_of_hidden_layer: 4
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	size_of_hidden_layer: 128
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_initialization: random


epoch:1, train loss:1.5083689269853389, train accuracy:0.10372222222222222, validation loss:1.5092388954483875, validation accuracy:0.09883333333333333
epoch:2, train loss:1.5081753349307883, train accuracy:0.10366666666666667, validation loss:1.5090191132340443, validation accuracy:0.09883333333333333
epoch:3, train loss:1.5079828719632444, train accuracy:0.10374074074074074, validation loss:1.5087894816273149, validation accuracy:0.09933333333333333
epoch:4, train loss:1.507795412771528, train accuracy:0.10368518518518519, validation loss:1.508563973975965, validation accuracy:0.09916666666666667
epoch:5, train loss:1.5076103812181423, train accuracy:0.1039074074074074, validation loss:1.50841117436813, validation accuracy:0.09916666666666667


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
train accuracy,▃▁▃▂█
train loss,█▆▄▃▁
validation accuracy,▁▁█▆▆
validation loss,█▆▄▂▁

0,1
train accuracy,0.10391
train loss,1.50761
validation accuracy,0.09917
validation loss,1.50841


[34m[1mwandb[0m: Agent Starting Run: vbpoekc8 with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	number_of_hidden_layer: 3
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	size_of_hidden_layer: 64
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_initialization: Xavier


epoch:1, train loss:0.910513363500539, train accuracy:0.11385185185185186, validation loss:0.9034277816910922, validation accuracy:0.11166666666666666
epoch:2, train loss:0.8995092743859464, train accuracy:0.11731481481481482, validation loss:0.8934536203734811, validation accuracy:0.12616666666666668
epoch:3, train loss:0.8896986619858243, train accuracy:0.13425925925925927, validation loss:0.8837122024652216, validation accuracy:0.14933333333333335
epoch:4, train loss:0.8797788829440361, train accuracy:0.16720370370370372, validation loss:0.8734504580513309, validation accuracy:0.189
epoch:5, train loss:0.8691210494293832, train accuracy:0.2175740740740741, validation loss:0.8621429502300018, validation accuracy:0.2495
epoch:6, train loss:0.8572386910810132, train accuracy:0.2768148148148148, validation loss:0.8492513209327689, validation accuracy:0.30633333333333335
epoch:7, train loss:0.8435847794048196, train accuracy:0.3256296296296296, validation loss:0.8342581517391354, validat

0,1
train accuracy,▁▁▁▂▃▅▆▇▇█
train loss,█▇▇▆▆▅▄▃▂▁
validation accuracy,▁▁▂▃▄▅▆▇██
validation loss,█▇▇▆▆▅▄▃▂▁

0,1
train accuracy,0.40504
train loss,0.79117
validation accuracy,0.4275
validation loss,0.77833


[34m[1mwandb[0m: Agent Starting Run: eegp3gc2 with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	number_of_hidden_layer: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	size_of_hidden_layer: 128
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_initialization: Xavier


epoch:1, train loss:0.6703646355570662, train accuracy:0.48714814814814816, validation loss:0.4677450645301122, validation accuracy:0.7006666666666667
epoch:2, train loss:0.39618028044029574, train accuracy:0.7393703703703703, validation loss:0.34757928707054664, validation accuracy:0.7658333333333334
epoch:3, train loss:0.32520329247630014, train accuracy:0.7786481481481482, validation loss:0.30685672016987664, validation accuracy:0.79
epoch:4, train loss:0.2961787007960412, train accuracy:0.7999074074074074, validation loss:0.28781514472288344, validation accuracy:0.8095
epoch:5, train loss:0.278951034278948, train accuracy:0.8126481481481481, validation loss:0.27334665814432607, validation accuracy:0.8183333333333334


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
train accuracy,▁▆▇██
train loss,█▃▂▁▁
validation accuracy,▁▅▆▇█
validation loss,█▄▂▂▁

0,1
train accuracy,0.81265
train loss,0.27895
validation accuracy,0.81833
validation loss,0.27335


[34m[1mwandb[0m: Agent Starting Run: wa7ak1h2 with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	number_of_hidden_layer: 4
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	size_of_hidden_layer: 64
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_initialization: random


epoch:1, train loss:2.4533509708737475, train accuracy:0.10024074074074074, validation loss:2.453486690726802, validation accuracy:0.10016666666666667
epoch:2, train loss:2.4533234424572203, train accuracy:0.10024074074074074, validation loss:2.453455686642024, validation accuracy:0.10016666666666667
epoch:3, train loss:2.4532243124751067, train accuracy:0.10025925925925926, validation loss:2.4534734944399905, validation accuracy:0.10016666666666667
epoch:4, train loss:2.4531477940369815, train accuracy:0.10027777777777777, validation loss:2.453148054694993, validation accuracy:0.10016666666666667
epoch:5, train loss:2.453022760722115, train accuracy:0.10035185185185186, validation loss:2.4524910352074767, validation accuracy:0.10033333333333333


0,1
train accuracy,▁▁▂▃█
train loss,█▇▅▄▁
validation accuracy,▁▁▁▁█
validation loss,███▆▁

0,1
train accuracy,0.10035
train loss,2.45302
validation accuracy,0.10033
validation loss,2.45249


[34m[1mwandb[0m: Agent Starting Run: p95ov502 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	number_of_hidden_layer: 3
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	size_of_hidden_layer: 64
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_initialization: Xavier


epoch:1, train loss:0.8185176051359834, train accuracy:0.35394444444444445, validation loss:0.7139536871228939, validation accuracy:0.5028333333333334
epoch:2, train loss:0.6557342028323908, train accuracy:0.5255, validation loss:0.6049789508448257, validation accuracy:0.5651666666666667
epoch:3, train loss:0.5742495240219413, train accuracy:0.5864074074074074, validation loss:0.5412220443003639, validation accuracy:0.632
epoch:4, train loss:0.521164380359723, train accuracy:0.6396851851851851, validation loss:0.49569042598014346, validation accuracy:0.6675
epoch:5, train loss:0.4801711153735378, train accuracy:0.670962962962963, validation loss:0.4589287655432563, validation accuracy:0.692


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
train accuracy,▁▅▆▇█
train loss,█▅▃▂▁
validation accuracy,▁▃▆▇█
validation loss,█▅▃▂▁

0,1
train accuracy,0.67096
train loss,0.48017
validation accuracy,0.692
validation loss,0.45893


[34m[1mwandb[0m: Agent Starting Run: e5mtus7y with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	number_of_hidden_layer: 4
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	size_of_hidden_layer: 128
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_initialization: random


epoch:1, train loss:3.3690222185078, train accuracy:0.10131481481481482, validation loss:3.3699331119617977, validation accuracy:0.101
epoch:2, train loss:3.3690449387848678, train accuracy:0.10131481481481482, validation loss:3.3698580306509514, validation accuracy:0.101
epoch:3, train loss:3.368812803011748, train accuracy:0.1014074074074074, validation loss:3.369574739505914, validation accuracy:0.101
epoch:4, train loss:3.368742892093339, train accuracy:0.10142592592592592, validation loss:3.3696463396374776, validation accuracy:0.101
epoch:5, train loss:3.3686439468258254, train accuracy:0.10146296296296296, validation loss:3.369634902173961, validation accuracy:0.101


0,1
train accuracy,▁▁▅▆█
train loss,██▄▃▁
validation accuracy,▁▁▁▁▁
validation loss,█▇▁▂▂

0,1
train accuracy,0.10146
train loss,3.36864
validation accuracy,0.101
validation loss,3.36963


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: mkacdh7u with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	number_of_hidden_layer: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	size_of_hidden_layer: 64
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_initialization: random


epoch:1, train loss:1.797129333366898, train accuracy:0.10046296296296296, validation loss:1.778792756967969, validation accuracy:0.1085
epoch:2, train loss:1.7672059377097191, train accuracy:0.10844444444444444, validation loss:1.6761627841424593, validation accuracy:0.14366666666666666
epoch:3, train loss:1.6228705927001508, train accuracy:0.17062962962962963, validation loss:1.584549453450093, validation accuracy:0.18833333333333332
epoch:4, train loss:1.5408092757284102, train accuracy:0.21107407407407408, validation loss:1.4856355182961254, validation accuracy:0.24016666666666667
epoch:5, train loss:1.4433470128489194, train accuracy:0.26114814814814813, validation loss:1.4092019532166749, validation accuracy:0.2793333333333333


0,1
train accuracy,▁▁▄▆█
train loss,█▇▅▃▁
validation accuracy,▁▂▄▆█
validation loss,█▆▄▂▁

0,1
train accuracy,0.26115
train loss,1.44335
validation accuracy,0.27933
validation loss,1.4092


[34m[1mwandb[0m: Agent Starting Run: ydk3vcd9 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	number_of_hidden_layer: 5
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	size_of_hidden_layer: 32
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_initialization: random


epoch:1, train loss:1.4534993891585348, train accuracy:0.09972222222222223, validation loss:1.394396355143806, validation accuracy:0.1025
epoch:2, train loss:1.3609869440550164, train accuracy:0.09972222222222223, validation loss:1.3236657676695223, validation accuracy:0.1025
epoch:3, train loss:1.3059481023324087, train accuracy:0.09972222222222223, validation loss:1.2824690535888617, validation accuracy:0.1025
epoch:4, train loss:1.2732651820391383, train accuracy:0.09972222222222223, validation loss:1.257173685169198, validation accuracy:0.1025
epoch:5, train loss:1.2522659177231188, train accuracy:0.09972222222222223, validation loss:1.2404126855494568, validation accuracy:0.1025


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
train accuracy,▁▁▁▁▁
train loss,█▅▃▂▁
validation accuracy,▁▁▁▁▁
validation loss,█▅▃▂▁

0,1
train accuracy,0.09972
train loss,1.25227
validation accuracy,0.1025
validation loss,1.24041


[34m[1mwandb[0m: Agent Starting Run: geuget3n with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	number_of_hidden_layer: 5
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	size_of_hidden_layer: 32
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_initialization: random


epoch:1, train loss:1.1437371338967912, train accuracy:0.10051851851851852, validation loss:1.0966868548222952, validation accuracy:0.09533333333333334
epoch:2, train loss:1.0509766545383452, train accuracy:0.10051851851851852, validation loss:1.0196674694394963, validation accuracy:0.09533333333333334
epoch:3, train loss:0.9923648651331234, train accuracy:0.10051851851851852, validation loss:0.9755702147377929, validation accuracy:0.09533333333333334
epoch:4, train loss:0.9600146571280179, train accuracy:0.10051851851851852, validation loss:0.9509479374900361, validation accuracy:0.09533333333333334
epoch:5, train loss:0.9412425728098214, train accuracy:0.10051851851851852, validation loss:0.9356440959095049, validation accuracy:0.09533333333333334
epoch:6, train loss:0.9288077972128947, train accuracy:0.10055555555555555, validation loss:0.9248279696597788, validation accuracy:0.0955
epoch:7, train loss:0.9197729249461368, train accuracy:0.12351851851851851, validation loss:0.9167969

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
train accuracy,▁▁▁▁▁▁▂▅▇█
train loss,█▅▄▃▂▂▁▁▁▁
validation accuracy,▁▁▁▁▁▁▄▆▇█
validation loss,█▅▄▃▂▂▂▁▁▁

0,1
train accuracy,0.23719
train loss,0.9033
validation accuracy,0.241
validation loss,0.90183


[34m[1mwandb[0m: Agent Starting Run: qe5vmp86 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	number_of_hidden_layer: 5
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	size_of_hidden_layer: 64
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_initialization: Xavier


epoch:1, train loss:0.9560082030259942, train accuracy:0.09935185185185186, validation loss:0.9560073595121569, validation accuracy:0.0995
epoch:2, train loss:0.9558215078250412, train accuracy:0.09937037037037037, validation loss:0.9558228522585509, validation accuracy:0.0995
epoch:3, train loss:0.9556454158698887, train accuracy:0.09938888888888889, validation loss:0.9556447934880673, validation accuracy:0.09966666666666667
epoch:4, train loss:0.9554670190678529, train accuracy:0.09938888888888889, validation loss:0.9554684704528762, validation accuracy:0.09983333333333333
epoch:5, train loss:0.9552956330668935, train accuracy:0.09937037037037037, validation loss:0.955295344849154, validation accuracy:0.1
epoch:6, train loss:0.9551252114653702, train accuracy:0.09937037037037037, validation loss:0.9551266981041526, validation accuracy:0.1
epoch:7, train loss:0.9549387177847996, train accuracy:0.09938888888888889, validation loss:0.9549347915745175, validation accuracy:0.1001666666666

0,1
train accuracy,▃▄▆▆▄▄▆▃▁█
train loss,█▇▆▆▅▄▃▃▂▁
validation accuracy,▁▁▃▅▆▆██▅▁
validation loss,█▇▆▆▅▄▃▃▂▁

0,1
train accuracy,0.09943
train loss,0.95438
validation accuracy,0.0995
validation loss,0.95439


[34m[1mwandb[0m: Agent Starting Run: 5afrwsmc with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	number_of_hidden_layer: 5
[34m[1mwandb[0m: 	optimizer: nestrov
[34m[1mwandb[0m: 	size_of_hidden_layer: 64
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_initialization: Xavier


epoch:1, train loss:0.9772673131516826, train accuracy:0.09972222222222223, validation loss:0.9640316449038221, validation accuracy:0.1025
epoch:2, train loss:0.9567110572424191, train accuracy:0.09972222222222223, validation loss:0.9495847035213463, validation accuracy:0.1025


[34m[1mwandb[0m: Network error (ConnectionError), entering retry loop.


epoch:3, train loss:0.9452685170455666, train accuracy:0.09972222222222223, validation loss:0.9409417846221136, validation accuracy:0.1025
epoch:4, train loss:0.938094596809259, train accuracy:0.09972222222222223, validation loss:0.9352627809212446, validation accuracy:0.1025
epoch:5, train loss:0.9332380828313057, train accuracy:0.09972222222222223, validation loss:0.9312876801930622, validation accuracy:0.1025


0,1
train accuracy,▁▁▁▁▁
train loss,█▅▃▂▁
validation accuracy,▁▁▁▁▁
validation loss,█▅▃▂▁

0,1
train accuracy,0.09972
train loss,0.93324
validation accuracy,0.1025
validation loss,0.93129


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: x9agak2l with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	number_of_hidden_layer: 5
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	size_of_hidden_layer: 128
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_initialization: Xavier


epoch:1, train loss:0.906267604467005, train accuracy:0.10033333333333333, validation loss:0.9007645842325842, validation accuracy:0.09433333333333334
epoch:2, train loss:0.9001554587896127, train accuracy:0.0976111111111111, validation loss:0.9000985791510114, validation accuracy:0.10066666666666667
epoch:3, train loss:0.9000509055759877, train accuracy:0.09464814814814815, validation loss:0.9000936920367884, validation accuracy:0.09433333333333334
epoch:4, train loss:0.9000563081556443, train accuracy:0.09651851851851852, validation loss:0.9000474187915609, validation accuracy:0.09433333333333334
epoch:5, train loss:0.9000486991007915, train accuracy:0.09603703703703703, validation loss:0.9000642453225541, validation accuracy:0.0915
epoch:6, train loss:0.900047597351778, train accuracy:0.09759259259259259, validation loss:0.9000494387176233, validation accuracy:0.10066666666666667
epoch:7, train loss:0.9000443192882487, train accuracy:0.09651851851851852, validation loss:0.9000315820

0,1
train accuracy,█▅▁▃▃▅▃▂▃▄
train loss,█▁▁▁▁▁▁▁▁▁
validation accuracy,▅▆▅▅▅▆▆▆▁█
validation loss,█▂▂▁▁▁▁▁▁▁

0,1
train accuracy,0.0973
train loss,0.90005
validation accuracy,0.113
validation loss,0.90003


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: sj16ge60 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	number_of_hidden_layer: 5
[34m[1mwandb[0m: 	optimizer: nestrov
[34m[1mwandb[0m: 	size_of_hidden_layer: 128
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_initialization: random
