In [None]:
!pip install wandb

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting wandb
  Downloading wandb-0.13.11-py3-none-any.whl (2.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m12.8 MB/s[0m eta [36m0:00:00[0m
Collecting GitPython!=3.1.29,>=1.0.0
  Downloading GitPython-3.1.31-py3-none-any.whl (184 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m184.3/184.3 KB[0m [31m4.6 MB/s[0m eta [36m0:00:00[0m
Collecting sentry-sdk>=1.0.0
  Downloading sentry_sdk-1.16.0-py2.py3-none-any.whl (184 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m184.3/184.3 KB[0m [31m13.8 MB/s[0m eta [36m0:00:00[0m
Collecting appdirs>=1.4.3
  Downloading appdirs-1.4.4-py2.py3-none-any.whl (9.6 kB)
Collecting docker-pycreds>=0.4.0
  Downloading docker_pycreds-0.4.0-py2.py3-none-any.whl (9.0 kB)
Collecting setproctitle
  Downloading setproctitle-1.3.2-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86

In [None]:
from tensorflow import keras
from keras.datasets import fashion_mnist
from keras.datasets import mnist
import numpy as np
from matplotlib import pyplot as plt
import random
import wandb
import argparse
from datetime import datetime
import copy

class FeedForward:

    def __init__(self):

        # defining the default parameters
        self.parameters = {
            "wandb_project": "DL Final Assignment 1",
            "wandb_entity": "cs22m019",
            "dataset": "fashion_mnist",
            "epochs": 5,
            "batch_size": 32,
            "loss": "cross_entropy",
            "optimizer": "gd",
            "learning_rate": 0.1,
            "momentum": 0.01,
            "beta": 0.5,
            "beta1": 0.5,
            "beta2": 0.5,
            "epsilon": 0.000001,
            "weight_decay": 0.0,
            "weight_init": "random",
            "num_layers": 3,
            "hidden_size": 128,
            "activation": "sigmoid",
            "output_function": "softmax"
        }

        # updating the parameters to the parameters given in command line
        # self.update_parameters()

       
        # loading training and test data from fashion_mnist dataset or mnist dataset
        if (self.parameters["dataset"] == "fashion_mnist"):
            (self.x_train, self.y_train), (self.x_test,self.y_test) = fashion_mnist.load_data()
        else:
            (self.x_train, self.y_train), (self.x_test,self.y_test) = mnist.load_data()

        # normalizing data points
        self.x_train = self.x_train / 255
        self.x_test = self.x_test / 255

        # computing number of samples in training and test data
        self.train_n_samples = self.x_train.shape[0]
        self.test_n_samples = self.x_test.shape[0]

        # spiltting the data -> 90% train,10% test 
        idx = np.random.permutation(self.train_n_samples)
        self.x_train = self.x_train[idx]
        self.y_train = self.y_train[idx]

        self.x_validate = self.x_train[: self.train_n_samples // 10]
        self.y_validate = self.y_train[: self.train_n_samples // 10]

        self.x_train = self.x_train[self.train_n_samples // 10:]
        self.y_train = self.y_train[self.train_n_samples // 10:]

        self.train_n_samples = self.x_train.shape[0]

        # list of label titles -> actual output
        self.title = ["T-shirt/top", "Trouser", "PullOver", "Dress",
                      "Coat", "Sandal", "Shirt", "Sneaker", "Bag", "Ankle Boot"]
        self.no_of_label = len(self.title)

        # setting the class variables
        self.epoch = self.parameters["epochs"]
        self.batch_size = self.parameters["batch_size"]
        self.lossFunction = self.parameters["loss"]
        self.optimizer = self.parameters["optimizer"]
        self.learningRate = self.parameters["learning_rate"]
        self.weightInitialization = self.parameters["weight_init"]
        self.L = self.parameters["num_layers"] + 1
        self.hl = self.parameters["num_layers"]
        self.nnl = self.parameters["hidden_size"]
        self.activationFunction = self.parameters["activation"]
        self.outputFunction = self.parameters["output_function"]
        self.weight_decay = self.parameters["weight_decay"]
        
        
        
        self.k = len(self.title)
        self.d = self.x_train.shape[1] * self.x_train.shape[2]
        self.n = self.train_n_samples
        self.weights = {}
        self.bias = {}
        self.wHistory = {}
        self.bHistory = {}
        self.wMomentum = {}
        self.bMomentum = {}
        self.prev_wHistory = {}
        self.prev_bHistory = {}
        self.pre_activation = {}
        self.post_activation = {}
        
    # updates the default parameters with the paramters given in command line
    def update_parameters(self):

        parser = argparse.ArgumentParser(description='DL Assignment 1 Parser')

        parser.add_argument('-wp', '--wandb_project',
                            type=str, metavar='', help='wandb project')
        parser.add_argument('-we', '--wandb_entity', type=str,
                            metavar='', help='wandb entity')
        parser.add_argument('-d', '--dataset', type=str,
                            metavar='', help='dataset')
        parser.add_argument('-e', '--epochs', type=int,
                            metavar='', help='epochs')
        parser.add_argument('-b', '--batch_size', type=int,
                            metavar='', help='batch size')
        parser.add_argument('-l', '--loss', type=str, 
                            metavar='', help='loss')
        parser.add_argument('-o', '--optimizer', type=str,
                            metavar='', help='optimizer')
        parser.add_argument('-lr', '--learning_rate',
                            type=float, metavar='', help='learning rate')
        parser.add_argument('-m', '--momentum', type=float,
                            metavar='', help='momentum')
        parser.add_argument('-beta', '--beta', type=float,
                            metavar='', help='beta')
        parser.add_argument('-beta1', '--beta1', type=float,
                            metavar='', help='beta1')
        parser.add_argument('-beta2', '--beta2', type=float,
                            metavar='', help='beta2')
        parser.add_argument('-eps', '--epsilon', type=float,
                            metavar='', help='epsilon')
        parser.add_argument('-w_d', '--weight_decay',
                            type=float, metavar='', help='weight decay')
        parser.add_argument('-w_i', '--weight_init', type=str,
                            metavar='', help='weight init')
        parser.add_argument('-nhl', '--num_layers', type=int,
                            metavar='', help='num layers')
        parser.add_argument('-sz', '--hidden_size', type=int,
                            metavar='', help='hidden size')
        parser.add_argument('-a', '--activation', type=str,
                            metavar='', help='activation')
        parser.add_argument('-of', '--output_function',
                            type=str, metavar='', help='output function')
        args = parser.parse_args()

        if (args.wandb_project != None):
            self.parameters["wandb_project"] = args.wandb_project
        if (args.wandb_entity != None):
            self.parameters["wandb_entity"] = args.wandb_entity
        if (args.dataset != None):
            self.parameters["dataset"] = args.dataset
        if (args.epochs != None):
            self.parameters["epochs"] = args.epochs
        if (args.batch_size != None):
            self.parameters["batch_size"] = args.batch_size
        if (args.loss != None):
            self.parameters["loss"] = args.loss
        if (args.optimizer != None):
            self.parameters["optimizer"] = args.optimizer
        if (args.learning_rate != None):
            self.parameters["learning_rate"] = args.learning_rate
        if (args.momentum != None):
            self.parameters["momentum"] = args.momentum
        if (args.beta != None):
            self.parameters["beta"] = args.beta
        if (args.beta1 != None):
            self.parameters["beta1"] = args.beta1
        if (args.beta2 != None):
            self.parameters["beta2"] = args.beta2
        if (args.epsilon != None):
            self.parameters["epsilon"] = args.epsilon
        if (args.weight_decay != None):
            self.parameters["weight_decay"] = args.weight_decay
        if (args.weight_init != None):
            self.parameters["weight_init"] = args.weight_init
        if (args.num_layers != None):
            self.parameters["num_layers"] = args.num_layers
        if (args.hidden_size != None):
            self.parameters["hidden_size"] = args.hidden_size
        if (args.activation != None):
            self.parameters["activation"] = args.activation

    # function to initialize weights and bias based on type -> random or Xavier initialization
    def weightsAndBiasInitializer(self):
        if self.weightInitialization == "Xavier":

            # first and last matrix 
            self.weights["w1"] = np.random.uniform(-np.sqrt(6 / (self.nnl + self.d)), np.sqrt(6 / (self.nnl + self.d)), (self.nnl, self.d))
            self.weights["w" + str(self.L)] = np.random.uniform(-np.sqrt(6 / (self.k + self.nnl)),np.sqrt(6 / (self.k + self.nnl)), (self.k, self.nnl))

            # Intermediate Matrices
            for i in range(2, self.L):
                self.weights["w" + str(i)] = np.random.uniform(-np.sqrt(6 / (self.nnl + self.nnl)), np.sqrt(6 / (self.nnl + self.nnl)), (self.nnl, self.nnl))

            # Last Vector
            self.bias["b" + str(self.L)] = np.random.uniform(-np.sqrt(6 / (self.k + 1)),np.sqrt(6 / (self.k + 1)), (self.k))

            for i in range(1, self.L):
                self.bias["b" + str(i)] = np.random.uniform(-np.sqrt(6 / (self.nnl + 1)),np.sqrt(6 / (self.nnl + 1)), (self.nnl))

        if self.weightInitialization == "random":
            # initailzation of weights
            '''
                  W1 = (d,nnl)
                  W2,..,W(L - 1) = (nnl,nnl)
                  WL = (k,nnl)
            '''
            w1 = np.random.normal(0, 0.5, size=(self.nnl, self.d))
            self.weights["w1"] = w1
            for i in range(2, self.L):
                self.weights["w" + str(i)] = np.random.normal(0,0.5, size=(self.nnl, self.nnl))
            self.weights["w" + str(self.L)] = np.random.normal(0,0.5, size=(self.k, self.nnl))

            # initialization of bias
            for i in range(1, self.L):
                self.bias["b" + str(i)] = np.random.normal(0,0.5, size=(self.nnl))
            self.bias["b" + str(self.L)] = np.random.normal(0,0.5, size=(self.k))

    # function to initialize momentum for weights and bias
    def momentumInitializer(self):

        # initializing momentum for weights
        w1 = np.zeros((self.nnl, self.d))
        self.wMomentum["w1"] = w1
        for i in range(2, self.L):
            self.wMomentum["w" + str(i)] = np.zeros((self.nnl, self.nnl))
        self.wMomentum["w" + str(self.L)] = np.zeros((self.k, self.nnl))

        # initializing momentum for bais
        for i in range(1, self.L):
            self.bMomentum["b" + str(i)] = np.zeros((self.nnl))
        self.bMomentum["b" + str(self.L)] = np.zeros((self.k))

    # function to initialize history for weights and bias
    def historyInitializer(self):

        # initializing history for weights
        w1 = np.zeros((self.nnl, self.d))
        self.wHistory["w1"] = w1
        for i in range(2, self.L):
            self.wHistory["w" + str(i)] = np.zeros((self.nnl, self.nnl))
        self.wHistory["w" + str(self.L)] = np.zeros((self.k, self.nnl))

        # initializing history for bais
        for i in range(1, self.L):
            self.bHistory["b" + str(i)] = np.zeros((self.nnl))
        self.bHistory["b" + str(self.L)] = np.zeros((self.k))

    # function used to implement different activation functions
    def activation_func(self, vector):
        if self.activationFunction == "sigmoid":
            return 1.0 / (1 + np.exp(-(vector)))
        
        if self.activationFunction == "tanh":
            return np.tanh(vector)
        
        if self.activationFunction == "ReLU":
            return np.maximum(0,vector)

    # function used to implement different output functions
    def output_func(self, vector):
        if self.outputFunction == "softmax":

            vector = vector - vector[np.argmax(vector)]

            return np.exp(vector) / np.sum(np.exp(vector))

    # function generating one-hot vector
    def oneHotVector(self, size, index):
        oneHot = np.zeros(size)
        oneHot[index] = 1.0
        return oneHot

    # function returning the differentiation of activation function
    def differentiation(self, vector):

        if self.activationFunction == "sigmoid":
            return (1.0 / (1 + np.exp(-(vector)))) * (1 - 1.0 / (1 + np.exp(-(vector))))

        if self.activationFunction == "tanh":
            return 1 - (np.tanh(vector)) ** 2

        if self.activationFunction == "ReLU":
            t = np.maximum(0,vector)
            t[t > 0] = 1
            return t

    # regularization
    def regularize(self):
        reg_term = 0
        validation_size = self.y_validate.shape[0]
        
        for (key,value) in self.weights.items():
          reg_term += (np.sum(self.weights[key] ** 2))
        reg_term = (self.weight_decay / (2 * validation_size)) * reg_term

        return reg_term

    # function returning the loss function value
    def loss_function(self, y_predicted, index):

        if self.lossFunction == "cross_entropy":
            t = 1e-8
            return (-1)*np.log(y_predicted[index] + t)

        if self.lossFunction == "mean_squared_error":
            y = self.oneHotVector(size=self.no_of_label, index=index)
            return np.sum((y_predicted - y) ** 2)

    # forward propagation - computes pre_activation vector,post_activation vector for each layer and predicts y at last layer
    def forward_propagation(self, input, index):

        # Populating pre_activation and post_activation vectors to dictionary in each layer for input[index]
        for k in range(1, self.L):

            # for first layer,post activation will be input
            if (k == 1):
                ''' flattening the input: 
                    -input(60000,28,28)
                    -input[index] size = (28,28)
                    -flattening input[index] gives size (784,1) = (d,1) where d is dimension of input
                    post_activation[h0] size = (d,1)
                    bias[b1] size = (nnl,1)
                    weights[w1] size = (nnl,d)
                    Therefore we get pre_activation[a1] size = (nnl,1) for all layer except last layer
                '''
                self.post_activation["h" + str(k - 1)] = input[index].flatten()

            # computing a(k) = b(k) + w(k)*h(k - 1) for each input[index]
            self.pre_activation["a" + str(k)] = self.bias["b" + str(k)] + np.dot(self.weights["w" + str(k)], self.post_activation["h" + str(k - 1)])
           
            # computing h(k) = g(a(k)) where g is activation function
            self.post_activation["h" + str(k)] = self.activation_func(self.pre_activation["a" + str(k)])

        # computing pre_activation for last layer
        self.pre_activation["a" + str(self.L)] = self.bias["b" + str(self.L)] + np.dot(self.weights["w" + str(self.L)], self.post_activation["h" + str(self.L - 1)])

        # prediction y (y_hat) = O(a(L)) where O is output function
        # self.pre_activation["a" + str(self.L)] = self.pre_activation["a" + str(self.L)] / np.linalg.norm(self.pre_activation["a" + str(self.L)])
        self.post_activation["h" +str(self.L)] = self.output_func(self.pre_activation["a" + str(self.L)])

    # performs back propagation and returns gradients of weights and bias
    def backward_propagation(self, index, actual_y):

        grad_pre_activation = {}
        grad_post_activation = {}
        grad_weights = {}
        grad_bias = {}

        predicted_y = self.post_activation["h" + str(self.L)]

        # Computing output gradient
        one_hot_vector = self.oneHotVector(self.no_of_label, actual_y[index])
        if self.lossFunction == "cross_entropy" :
          grad_pre_activation["a" + str(self.L)] = (predicted_y - one_hot_vector)
        else :
          grad_pre_activation["a" + str(self.L)] = -2 * (one_hot_vector - predicted_y) * (predicted_y * (np.ones(self.no_of_label) - predicted_y))
       
        
        k = self.L
        while k > 0:

            # Computing gradient w.r.t parameters - weight and bais
            '''
              np.reshape(grad_pre_activation["a" + str(L)],(-1,1)) = (k,1)
              np.reshape(post_activation["h" + str(L - 1)],(1,-1)) = (1,nnl)
            '''
            grad_weights["w" + str(k)] = np.dot(np.reshape(grad_pre_activation["a" + str(k)], (-1, 1)), np.reshape(self.post_activation["h" + str(k - 1)], (1, -1)))
            grad_bias["b" + str(k)] = grad_pre_activation["a" + str(k)]

            if k != 1:
                # Computing gradient w.r.t layer below (post_activation)
                grad_post_activation["h" + str(k - 1)] = np.dot(self.weights["w" + str(k)].T, np.reshape(grad_pre_activation["a" + str(k)], (-1, 1))).flatten()

                # Computing gradient w.r.t layer below (pre_activation)
                g_dash = self.differentiation(self.pre_activation["a" + str(k - 1)])
                grad_pre_activation["a" +str(k - 1)] = grad_post_activation["h" + str(k - 1)] * g_dash

            k = k - 1
        return grad_weights, grad_bias

    # function to make accumalated gradients zero
    def make_accumalate_zero(self):

        acc_grad_weights = {}
        acc_grad_bias = {}

        # accumalated weights are set to zero
        acc_grad_weights["w1"] = np.zeros((self.nnl, self.d))
        for i in range(2, self.L):
            acc_grad_weights["w" + str(i)] = np.zeros((self.nnl, self.nnl))
        acc_grad_weights["w" + str(self.L)] = np.zeros((self.k, self.nnl))

        # accumalated bias are set to zero
        for i in range(1, self.L):
            acc_grad_bias["b" + str(i)] = np.zeros((self.nnl))
        acc_grad_bias["b" + str(self.L)] = np.zeros((self.k))

        return acc_grad_weights, acc_grad_bias

    # runs stochastic gradient descent for one epoch
    def oneEpochSGD(self, epoch):
        ''' Executes A Single Epoch for Stochastic Gradient Descent Algorithm.
            Returns the training loss,training accuracy,validaiton loss and validation accuracy,averaged over all points. '''
        
        n = self.train_n_samples
        
        # randomizing batches
        idx = np.random.permutation(self.train_n_samples)
        self.x_train = self.x_train[idx]
        self.y_train = self.y_train[idx]

        input = self.x_train
        actual_y = self.y_train

        # total Loss for epoch
        loss_input = 0
        count = 0

        # execute one epoch for all datapoints in train set
        for index in range(n):

            # perform forward propagation
            self.forward_propagation(input, index)
            predicted_y = self.post_activation["h" + str(self.L)]

            # compute loss
            loss_input += self.loss_function(predicted_y, actual_y[index])

            # perform backward propagation
            grad_weights, grad_bias = self.backward_propagation(index, actual_y)

            # compute the number of datapoints which are correctly classified
            indexWithMaxProb = np.argmax(predicted_y)
            if (actual_y[index] == (indexWithMaxProb)):
                count = count + 1

            # update weights and bias if the number of datapoints in batch_size are divisble by batch_size
            if ((index + 1) % self.batch_size == 0):
                # update weights
                for (key, value) in self.weights.items():
                    self.weights[key] = self.weights[key] - ((self.learningRate / self.batch_size) * grad_weights[key])

                # update bias
                for (key, value) in self.bias.items():
                    self.bias[key] = self.bias[key] - ((self.learningRate / self.batch_size) * grad_bias[key])

        # if the number of datapoints in batch is not divisible by batch_size update weights and bias 
        if n % self.batch_size != 0:
            # update weights
            for (key, value) in self.weights.items():
                self.weights[key] = self.weights[key] - ((self.learningRate / self.batch_size) * grad_weights[key])

            # update bias
            for (key, value) in self.bias.items():
                self.bias[key] = self.bias[key] - ((self.learningRate / self.batch_size) * grad_bias[key])

        # compute trainAccuracy,trainLoss averaged over train size
        trainAccuracy = count / n
        trainLoss = loss_input / n + self.regularize()

        # compute validationAccuracy,validationLoss avergaed over test size 
        validationLoss, validationAccuracy = self.computeTestLossAndAccuracy()
        
        return trainLoss, trainAccuracy, validationLoss, validationAccuracy

    # runs momentum gradient descent for one epoch
    def oneEpochMOMENTUM(self, epoch):
        ''' Executes A Single Epoch for Momentum Gradient Descent Algorithm.
            Returns the training loss,training accuracy,validaiton loss and validation accuracy,averaged over all points. '''

        n = self.train_n_samples
        
        # randomizing batches
        idx = np.random.permutation(self.train_n_samples)
        self.x_train = self.x_train[idx]
        self.y_train = self.y_train[idx]

        input = self.x_train
        actual_y = self.y_train

        # maintaining previous history for weights and bias
        self.prev_wHistory, self.prev_bHistory = self.wHistory, self.bHistory

        # Total Loss for epoch
        loss_input = 0
        count = 0  
        beta = self.parameters["momentum"]

        # set accumalated gradients to zero
        acc_grad_weights, acc_grad_bias = self.make_accumalate_zero()

        # execute one epoch for all datapoints in train set
        for index in range(n):

            # perform forward propagation
            self.forward_propagation(input, index)
            predicted_y = self.post_activation["h" + str(self.L)]

            # compute loss
            loss_input += self.loss_function(predicted_y, actual_y[index])

            # perform backward propagation
            grad_weights, grad_bias = self.backward_propagation(
                index, actual_y)

            # compute the number of datapoints which are correctly classified
            indexWithMaxProb = np.argmax(predicted_y)
            if (actual_y[index] == (indexWithMaxProb)):
                count = count + 1

            # accumulate grad_weights and grad_bais for each input
            for (key, value) in grad_weights.items():
                acc_grad_weights[key] = acc_grad_weights[key] + grad_weights[key]

            for (key, value) in grad_bias.items():
                acc_grad_bias[key] = acc_grad_bias[key] + grad_bias[key]


            # update weights and bias if the number of datapoints in batch_size are divisble by batch_size
            if ((index + 1) % self.batch_size == 0):

                # update weight history
                for (key, value) in self.wHistory.items():
                    self.wHistory[key] = beta * self.prev_wHistory[key] + \
                        ((self.learningRate / self.batch_size) * acc_grad_weights[key])

                # update bias history
                for (key, value) in self.bHistory.items():
                    self.bHistory[key] = beta * self.prev_bHistory[key] + \
                        ((self.learningRate / self.batch_size) * acc_grad_bias[key])

                # update weights
                for (key, value) in self.weights.items():
                    self.weights[key] = self.weights[key] - self.wHistory[key]

                # update bias
                for (key, value) in self.bias.items():
                    self.bias[key] = self.bias[key] - self.bHistory[key]

                # updating histroy for weights and bias
                self.prev_wHistory = self.wHistory
                self.prev_bHistory = self.bHistory

                # set accumalated gradients to zero
                acc_grad_weights, acc_grad_bias = self.make_accumalate_zero()

            # if the number of datapoints in batch is not divisible by batch_size update weights and bias 
        if n % self.batch_size != 0:
            
            # update weight history
            for (key, value) in self.wHistory.items():
                self.wHistory[key] = beta * self.prev_wHistory[key] + \
                    ((self.learningRate / self.batch_size) * acc_grad_weights[key])

            # update bias history
            for (key, value) in self.bHistory.items():
                self.bHistory[key] = beta * self.prev_bHistory[key] + \
                    ((self.learningRate / self.batch_size) * acc_grad_bias[key])

            # update weights
            for (key, value) in self.weights.items():
                self.weights[key] = self.weights[key] - self.wHistory[key]

            # update bias
            for (key, value) in self.bias.items():
                self.bias[key] = self.bias[key] - self.bHistory[key]

            # updating histroy for weights and bias
            self.prev_wHistory = self.wHistory
            self.prev_bHistory = self.bHistory

            # set accumalated gradients to zero
            acc_grad_weights, acc_grad_bias = self.make_accumalate_zero()

        # compute trainAccuracy,trainLoss averaged over train size
        trainAccuracy = count / n
        trainLoss = loss_input / n + self.regularize()

        # compute validationAccuracy,validationLoss averaged over test size 
        validationLoss, validationAccuracy = self.computeTestLossAndAccuracy()
        
        return trainLoss, trainAccuracy, validationLoss, validationAccuracy

    # runs nestrov accelerated gradient descent for one epoch
    def oneEpochNAG(self, epoch):
        ''' Executes A Single Epoch for Nesterov Accelerated Gradient Descent Algorithm.
            Returns the training loss,training accuracy,validaiton loss and validation accuracy,averaged over all points. '''
        
        n = self.train_n_samples
        
        # randomizing batches
        idx = np.random.permutation(self.train_n_samples)
        self.x_train = self.x_train[idx]
        self.y_train = self.y_train[idx]

        input = self.x_train
        actual_y = self.y_train

        # maintaining previous history for weights and bias
        self.prev_wHistory, self.prev_bHistory = self.wHistory, self.bHistory
        
        # Total Loss for epoch
        loss_input = 0
        count = 0
        beta = self.parameters["momentum"]

        # set accumalated gradients to zero
        acc_grad_weights, acc_grad_bias = self.make_accumalate_zero()
        
        # computing partial values
        self.partial_wHistory = {}
        self.partial_bHistory = {}

        for (key, value) in self.wHistory.items():
            self.partial_wHistory[key] = beta * self.prev_wHistory[key]

        for (key, value) in self.bHistory.items():
            self.partial_bHistory[key] = beta * self.prev_bHistory[key]

        # execute one epoch for all datapoints in train set
        for index in range(n):

            # perfrom forward propagation
            self.forward_propagation(input, index)
            predicted_y = self.post_activation["h" + str(self.L)]

            # storing weights and bias in temperory values
            temp_weights = copy.deepcopy(self.weights)
            temp_bias = copy.deepcopy(self.bias)

            # update weights and bias
            for (key, value) in self.weights.items():
                self.weights[key] = self.weights[key] -  self.partial_wHistory[key]

            for (key, value) in self.bias.items():
                self.bias[key] = self.bias[key] - self.partial_bHistory[key]

            # perform backward propagation
            grad_weights, grad_bias = self.backward_propagation(index,actual_y)

            # update weights and bias 
            self.weights = temp_weights
            self.bias = temp_bias

            # accumulate grad_weights and grad_bais for each input
            for (key, value) in grad_weights.items():
                acc_grad_weights[key] = acc_grad_weights[key] + grad_weights[key]

            for (key, value) in grad_bias.items():
                acc_grad_bias[key] = acc_grad_bias[key] + grad_bias[key]

            # compute loss
            loss_input += self.loss_function(predicted_y, self.y_train[index])

            # compute the number of datapoints which are correctly classified
            indexWithMaxProb = np.argmax(predicted_y)
            if (actual_y[index] == (indexWithMaxProb)):
                count = count + 1

            # update weights and bias if the number of datapoints in batch_size are divisble by batch_size
            if ((index + 1) % self.batch_size == 0):
    
                # update weight history
                for (key, value) in self.wHistory.items():
                    self.wHistory[key] = beta * self.prev_wHistory[key] + ((self.learningRate / self.batch_size) * acc_grad_weights[key])

                # update bias history
                for (key, value) in self.bHistory.items():
                    self.bHistory[key] = beta * self.prev_bHistory[key] + ((self.learningRate / self.batch_size) * acc_grad_bias[key])

                # update weights
                for (key, value) in self.weights.items():
                    self.weights[key] = self.weights[key] - self.wHistory[key]

                for (key, value) in self.bias.items():
                    self.bias[key] = self.bias[key] - self.bHistory[key]

                # updating histroy for weights and bias
                self.prev_wHistory = self.wHistory
                self.prev_bHistory = self.bHistory
                
                # set accumalated gradients to zero
                acc_grad_weights, acc_grad_bias = self.make_accumalate_zero()
            
        # if the number of datapoints in batch is not divisible by batch_size update weights and bias 
        if n % self.batch_size != 0:

            # update weight history
            for (key, value) in self.wHistory.items():
                self.wHistory[key] = beta * self.prev_wHistory[key] + ((self.learningRate / self.batch_size) * acc_grad_weights[key])

            # update bias history
            for (key, value) in self.bHistory.items():
                self.bHistory[key] = beta * self.prev_bHistory[key] + ((self.learningRate / self.batch_size) * acc_grad_bias[key])

            # update weights
            for (key, value) in self.weights.items():
                self.weights[key] = self.weights[key] - self.wHistory[key]

            for (key, value) in self.bias.items():
                self.bias[key] = self.bias[key] - self.bHistory[key]

            # updating histroy for weights and bias
            self.prev_wHistory = self.wHistory
            self.prev_bHistory = self.bHistory
            
            # set accumalated gradients to zero
            acc_grad_weights, acc_grad_bias = self.make_accumalate_zero()
                
        # compute trainAccuracy,trainLoss averaged over train size
        trainAccuracy = count / n
        trainLoss = loss_input / n + self.regularize()

        # compute validationAccuracy,validationLoss averaged over test size 
        validationLoss, validationAccuracy = self.computeTestLossAndAccuracy()
        
        return trainLoss, trainAccuracy, validationLoss, validationAccuracy

    # runs RMSPROP for one epoch
    def oneEpochRMSPROP(self,epoch):
        ''' Executes A Single Epoch for RMSPROP Gradient Descent Algorithm.
            Returns the training loss,training accuracy,validaiton loss and validation accuracy,averaged over all points. 
        '''
        n = self.train_n_samples
        
        # randomizing batches
        idx = np.random.permutation(self.train_n_samples)
        self.x_train = self.x_train[idx]
        self.y_train = self.y_train[idx]

        input = self.x_train
        actual_y = self.y_train


        # maintaining previous history for weights and bias
        prev_wHistory, prev_bHistory = self.wHistory, self.bHistory

        # Total Loss for epoch
        loss_input = 0
        beta = self.parameters["beta"]
        eps = self.parameters["epsilon"]
        count = 0
        
        # set accumalated gradients to zero
        acc_grad_weights, acc_grad_bias = self.make_accumalate_zero()

        # execute one epoch for all datapoints in train set
        for index in range(n):

            # perfrom forward propagation
            self.forward_propagation(input, index)
            predicted_y = self.post_activation["h" + str(self.L)]

            # perform backward propagation
            grad_weights, grad_bias = self.backward_propagation(index,actual_y)

            # compute loss
            loss_input += self.loss_function(predicted_y, self.y_train[index])

            # compute the number of datapoints which are correctly classified
            indexWithMaxProb = np.argmax(predicted_y)
            if (actual_y[index] == (indexWithMaxProb)):
                count = count + 1

            # accumulate grad_weights and grad_bais for each input
            for (key, value) in grad_weights.items():
                acc_grad_weights[key] = acc_grad_weights[key] + grad_weights[key]

            for (key, value) in grad_bias.items():
                acc_grad_bias[key] = acc_grad_bias[key] + grad_bias[key]
        

            # update weights and bias if the number of datapoints in batch_size are divisble by batch_size
            if ((index + 1) % self.batch_size == 0):
    
                # update weight history
                for (key, value) in self.wHistory.items():
                    self.wHistory[key] = beta * prev_wHistory[key] +  (1 - beta) * acc_grad_weights[key] ** 2

                # update bias history
                for (key, value) in self.bHistory.items():
                    self.bHistory[key] = beta * prev_bHistory[key] + (1 - beta) * acc_grad_bias[key] ** 2

                # update weights
                for (key, value) in self.weights.items():
                    self.weights[key] = self.weights[key] -  (self.learningRate / self.batch_size) * acc_grad_weights[key] / (np.sqrt(self.wHistory[key] + eps))

                # update bias
                for (key, value) in self.bias.items():
                    self.bias[key] = self.bias[key] - (self.learningRate / self.batch_size) * acc_grad_bias[key] / (np.sqrt(self.bHistory[key] + eps))
                
                # updating histroy for weights and bias
                self.prev_wHistory = self.wHistory
                self.prev_bHistory = self.bHistory

                # set accumalated gradients to zero
                acc_grad_weights, acc_grad_bias = self.make_accumalate_zero()

        # if the number of datapoints in batch is not divisible by batch_size update weights and bias 
        if n % self.batch_size != 0:
            
            # update weight history
            for (key, value) in self.wHistory.items():
                self.wHistory[key] = beta * prev_wHistory[key] +  (1 - beta) * acc_grad_weights[key] ** 2

            # update bias history
            for (key, value) in self.bHistory.items():
                self.bHistory[key] = beta * prev_bHistory[key] + (1 - beta) * acc_grad_bias[key] ** 2

            # update weights
            for (key, value) in self.weights.items():
                self.weights[key] = self.weights[key] -  (self.learningRate / self.batch_size) * acc_grad_weights[key] / (np.sqrt(self.wHistory[key] + eps))

            # update bias
            for (key, value) in self.bias.items():
                self.bias[key] = self.bias[key] - (self.learningRate / self.batch_size) * acc_grad_bias[key] / (np.sqrt(self.bHistory[key] + eps))
            
            # updating histroy for weights and bias
            self.prev_wHistory = self.wHistory
            self.prev_bHistory = self.bHistory

            # set accumalated gradients to zero
            acc_grad_weights, acc_grad_bias = self.make_accumalate_zero()

        # compute trainAccuracy,trainLoss averaged over train size
        trainAccuracy = count / n
        trainLoss = loss_input / n + self.regularize()

        # compute validationAccuracy,validationLoss averaged over test size 
        validationLoss, validationAccuracy = self.computeTestLossAndAccuracy()
        
        return trainLoss, trainAccuracy, validationLoss, validationAccuracy

    # runs ADAM gradient descent for one epoch
    def oneEpochADAM(self,epoch):
        ''' Executes A Single Epoch for ADAM Gradient Descent Algorithm.
            Returns the training loss,training accuracy,validaiton loss and validation accuracy,averaged over all points. '''

        n = self.train_n_samples
        # randomizing batches
        idx = np.random.permutation(self.train_n_samples)
        self.x_train = self.x_train[idx]
        self.y_train = self.y_train[idx]

        input = self.x_train
        actual_y = self.y_train

        # maintaining previous history and momentum for weights and bias
        prev_wMomentum, prev_bMomentum = self.wMomentum, self.bMomentum
        prev_wHistory, prev_bHistory = self.wHistory, self.bHistory

        wMomentum_hat = {} 
        bMomentum_hat = {}
        wHistory_hat = {} 
        bHistory_hat = {}

        # Total Loss for epoch
        loss_input = 0
        count = 0
        beta1 = self.parameters["beta1"]
        beta2 = self.parameters["beta2"]
        epsilon = self.parameters["epsilon"]
        
        # set accumalated gradients to zero
        acc_grad_weights, acc_grad_bias = self.make_accumalate_zero()

        # execute one epoch for all datapoints in train set
        for index in range(n):

            # perform forward propagation

            self.forward_propagation(input, index)
            predicted_y = self.post_activation["h" + str(self.L)]

            # perform backward propagation
            grad_weights, grad_bias = self.backward_propagation(index,actual_y)

            # compute loss
            loss_input += self.loss_function(predicted_y, actual_y[index])

            # compute the number of datapoints which are correctly classified
            indexWithMaxProb = np.argmax(predicted_y)
            if (actual_y[index] == (indexWithMaxProb)):
                count = count + 1

            # accumulate grad_weights and grad_bais for each input
            for (key, value) in grad_weights.items():
                acc_grad_weights[key] = acc_grad_weights[key] + grad_weights[key]

            for (key, value) in grad_bias.items():
                acc_grad_bias[key] = acc_grad_bias[key] + grad_bias[key]

            # update weights and bias if the number of datapoints in batch_size are divisble by batch_size
            if ((index + 1) % self.batch_size == 0):
        
                # update weight momentum
                for (key, value) in self.wMomentum.items():
                    self.wMomentum[key] = beta1*prev_wMomentum[key] +  (1 - beta1) * acc_grad_weights[key]

                # update bias momentum
                for (key, value) in self.bMomentum.items():
                    self.bMomentum[key] = beta1*prev_bMomentum[key] + (1 - beta1) * acc_grad_bias[key]

                # update weight history
                for (key, value) in self.wHistory.items():
                    self.wHistory[key] = beta2 * prev_wHistory[key] + (1 - beta2) * acc_grad_weights[key] ** 2

                # update bias history
                for (key, value) in self.bHistory.items():
                    self.bHistory[key] = beta2 * prev_bHistory[key] + (1 - beta2) * acc_grad_bias[key] ** 2

                
                # compute intermediate values
                for (key, value) in self.weights.items():
                    wMomentum_hat[key] = self.wMomentum[key] / (1 - np.power(beta1, epoch + 1))

                for (key, value) in self.bias.items():
                    bMomentum_hat[key] = self.bMomentum[key] / (1 - np.power(beta1, epoch + 1))

                for (key, value) in self.weights.items():
                    wHistory_hat[key] = self.wHistory[key] / (1 - np.power(beta2, epoch + 1))

                for (key, value) in self.bias.items():
                    bHistory_hat[key] = self.bHistory[key] / (1 - np.power(beta2, epoch + 1))

                # update weights
                for (key, value) in self.weights.items():
                    temp = (self.learningRate / self.batch_size) * wMomentum_hat[key] / (np.sqrt(wHistory_hat[key] + epsilon))
                    self.weights[key] = self.weights[key] - temp
                
                # update bias
                for (key, value) in self.bias.items():
                    temp = (self.learningRate / self.batch_size) * bMomentum_hat[key] / (np.sqrt(bHistory_hat[key] + epsilon))
                    self.bias[key] = self.bias[key] - temp

                # set accumalated gradients to zero
                acc_grad_weights, acc_grad_bias = self.make_accumalate_zero()
            
        # if the number of datapoints in batch is not divisible by batch_size update weights and bias 
        if n % self.batch_size != 0:
            
            # update weight momentum
            for (key, value) in self.wMomentum.items():
                self.wMomentum[key] = beta1*prev_wMomentum[key] +  (1 - beta1) * acc_grad_weights[key]

            # update bias momentum
            for (key, value) in self.bMomentum.items():
                self.bMomentum[key] = beta1*prev_bMomentum[key] + (1 - beta1) * acc_grad_bias[key]

            # update weight history
            for (key, value) in self.wHistory.items():
                self.wHistory[key] = beta2 * prev_wHistory[key] + (1 - beta2) * acc_grad_weights[key] ** 2

            # update bias history
            for (key, value) in self.bHistory.items():
                self.bHistory[key] = beta2 * prev_bHistory[key] + (1 - beta2) * acc_grad_bias[key] ** 2

            
            # compute intermediate values
            for (key, value) in self.weights.items():
                wMomentum_hat[key] = self.wMomentum[key] / (1 - np.power(beta1, epoch + 1))

            for (key, value) in self.bias.items():
                bMomentum_hat[key] = self.bMomentum[key] / (1 - np.power(beta1, epoch + 1))

            for (key, value) in self.weights.items():
                wHistory_hat[key] = self.wHistory[key] / (1 - np.power(beta2, epoch + 1))

            for (key, value) in self.bias.items():
                bHistory_hat[key] = self.bHistory[key] / (1 - np.power(beta2, epoch + 1))

            # update weights
            for (key, value) in self.weights.items():
                temp = (self.learningRate / self.batch_size) * wMomentum_hat[key] / (np.sqrt(wHistory_hat[key] + epsilon))
                self.weights[key] = self.weights[key] - temp
            
            # update bias
            for (key, value) in self.bias.items():
                temp = (self.learningRate / self.batch_size) * bMomentum_hat[key] / (np.sqrt(bHistory_hat[key] + epsilon))
                self.bias[key] = self.bias[key] - temp

            # set accumalated gradients to zero
            acc_grad_weights, acc_grad_bias = self.make_accumalate_zero()
        
        # compute trainAccuracy,trainLoss averaged over train size
        trainAccuracy = count / n
        trainLoss = loss_input / n + self.regularize()
        
        # compute validationAccuracy,validationLoss averaged over test size 
        validationLoss, validationAccuracy = self.computeTestLossAndAccuracy()
        
        return trainLoss, trainAccuracy, validationLoss, validationAccuracy

    # runs NADAM gradient descent for one epoch
    def oneEpochNADAM(self,epoch):
        ''' Executes A Single Epoch for NADAM Gradient Descent Algorithm.
            Returns the training loss,training accuracy,validaiton loss and validation accuracy,averaged over all points. '''

        n = self.train_n_samples
        
        # randomizing batches
        idx = np.random.permutation(self.train_n_samples)
        self.x_train = self.x_train[idx]
        self.y_train = self.y_train[idx]

        input = self.x_train
        actual_y = self.y_train

        # maintaining previous history and momentum for weights and bias
        prev_wMomentum, prev_bMomentum = self.wMomentum, self.bMomentum
        prev_wHistory, prev_bHistory = self.wHistory, self.bHistory

        wMomentum_hat = {} 
        bMomentum_hat = {}
        wHistory_hat = {} 
        bHistory_hat = {}

        # Total Loss for epoch
        loss_input = 0
        count = 0
        beta1 = self.parameters["beta1"]
        beta2 = self.parameters["beta2"]
        epsilon = self.parameters["epsilon"]


        # set accumalated gradients to zero
        acc_grad_weights, acc_grad_bias = self.make_accumalate_zero()

        for index in range(n):

            # perform forward propagation
            self.forward_propagation(input, index)
            predicted_y = self.post_activation["h" + str(self.L)]

            # perform backward propagation
            grad_weights, grad_bias = self.backward_propagation(index,actual_y)

            # compute loss
            loss_input += self.loss_function(predicted_y, actual_y[index])

            # compute the number of datapoints which are correctly classified
            indexWithMaxProb = np.argmax(predicted_y)
            if (actual_y[index] == (indexWithMaxProb)):
                count = count + 1

            # accumulate grad_weights and grad_bais for each input
            for (key, value) in grad_weights.items():
                acc_grad_weights[key] = acc_grad_weights[key] + grad_weights[key]

            for (key, value) in grad_bias.items():
                acc_grad_bias[key] = acc_grad_bias[key] + grad_bias[key]

            # update weights and bias if the number of datapoints in batch_size are divisble by batch_size
            if ((index + 1) % self.batch_size == 0):
                
                # update weight momentum
                for (key, value) in self.wMomentum.items():
                    self.wMomentum[key] = beta1*prev_wMomentum[key] + (1 - beta1) * acc_grad_weights[key]

                # update bias momentum
                for (key, value) in self.bMomentum.items():
                    self.bMomentum[key] = beta1*prev_bMomentum[key] + (1 - beta1) * acc_grad_bias[key]

                # update weight history
                for (key, value) in self.wHistory.items():
                    self.wHistory[key] = beta2 * prev_wHistory[key] + (1 - beta2) * acc_grad_weights[key] ** 2

                # update bias history
                for (key, value) in self.bHistory.items():
                    self.bHistory[key] = beta2 * prev_bHistory[key] + (1 - beta2) * acc_grad_bias[key] ** 2


                # compute intermediate values
                for (key, value) in self.weights.items():
                    wMomentum_hat[key] = self.wMomentum[key] / (1 - np.power(beta1, epoch + 1))
                
                for (key, value) in self.bias.items():
                    bMomentum_hat[key] = self.bMomentum[key] / (1 - np.power(beta1, epoch + 1))

                for (key, value) in self.weights.items():
                    wHistory_hat[key] = self.wHistory[key] / (1 - np.power(beta2, epoch + 1))

                for (key, value) in self.bias.items():
                    bHistory_hat[key] = self.bHistory[key] / (1 - np.power(beta2, epoch + 1))

                # update weights
                for (key, value) in self.weights.items():
                    num1 = ((self.learningRate / self.batch_size) / np.sqrt(wHistory_hat[key] + epsilon))
                    num2 = beta1 * wMomentum_hat[key] + ((1 - beta1) * acc_grad_weights[key] / (1 - beta1 ** (epoch + 1)))
                    self.weights[key] = self.weights[key] - num1*num2

                # update bias
                for (key, value) in self.bias.items():
                    num1 = ((self.learningRate / self.batch_size) / np.sqrt(bHistory_hat[key] + epsilon))
                    num2 = beta1 * bMomentum_hat[key] + ((1 - beta1) * acc_grad_bias[key] / (1 - beta1 ** (epoch + 1)))
                    self.bias[key] = self.bias[key] - num1*num2

                # set accumalated gradients to zero
                acc_grad_weights, acc_grad_bias = self.make_accumalate_zero()
            
            # if the number of datapoints in batch is not divisible by batch_size update weights and bias 
        if n % self.batch_size != 0:
            
            # update weight momentum
            for (key, value) in self.wMomentum.items():
                self.wMomentum[key] = beta1*prev_wMomentum[key] + (1 - beta1) * acc_grad_weights[key]

            # update bias momentum
            for (key, value) in self.bMomentum.items():
                self.bMomentum[key] = beta1*prev_bMomentum[key] + (1 - beta1) * acc_grad_bias[key]

            # update weight history
            for (key, value) in self.wHistory.items():
                self.wHistory[key] = beta2 * prev_wHistory[key] + (1 - beta2) * acc_grad_weights[key] ** 2

            # update bias history
            for (key, value) in self.bHistory.items():
                self.bHistory[key] = beta2 * prev_bHistory[key] + (1 - beta2) * acc_grad_bias[key] ** 2


            # compute intermediate values
            for (key, value) in self.weights.items():
                wMomentum_hat[key] = self.wMomentum[key] / (1 - np.power(beta1, epoch + 1))
            
            for (key, value) in self.bias.items():
                bMomentum_hat[key] = self.bMomentum[key] / (1 - np.power(beta1, epoch + 1))

            for (key, value) in self.weights.items():
                wHistory_hat[key] = self.wHistory[key] / (1 - np.power(beta2, epoch + 1))

            for (key, value) in self.bias.items():
                bHistory_hat[key] = self.bHistory[key] / (1 - np.power(beta2, epoch + 1))

            # update weights
            for (key, value) in self.weights.items():
                num1 = ((self.learningRate / self.batch_size) / np.sqrt(wHistory_hat[key] + epsilon))
                num2 = beta1 * wMomentum_hat[key] + ((1 - beta1) * acc_grad_weights[key] / (1 - beta1 ** (epoch + 1)))
                self.weights[key] = self.weights[key] - num1*num2

            # update bias
            for (key, value) in self.bias.items():
                num1 = ((self.learningRate / self.batch_size) / np.sqrt(bHistory_hat[key] + epsilon))
                num2 = beta1 * bMomentum_hat[key] + ((1 - beta1) * acc_grad_bias[key] / (1 - beta1 ** (epoch + 1)))
                self.bias[key] = self.bias[key] - num1*num2

            # set accumalated gradients to zero
            acc_grad_weights, acc_grad_bias = self.make_accumalate_zero()
        
                
        # compute trainAccuracy,trainLoss averaged over train size
        trainAccuracy = count / n
        trainLoss = loss_input / n + self.regularize()
        
        # compute validationAccuracy,validationLoss averaged over test size 
        validationLoss, validationAccuracy = self.computeTestLossAndAccuracy()
        return trainLoss, trainAccuracy, validationLoss, validationAccuracy

    # runs gradient descent for one epoch
    def oneEpochGD(self, epoch):
        ''' Executes A Single Epoch for Vanilla Gradient Descent Algorithm.
            Returns the training loss,training accuracy,validaiton loss and validation accuracy,averaged over all points. '''

        n = self.train_n_samples
        
        # randomizing batches
        idx = np.random.permutation(self.train_n_samples)
        self.x_train = self.x_train[idx]
        self.y_train = self.y_train[idx]

        input = self.x_train
        actual_y = self.y_train

        # Total Loss for epoch
        loss_input = 0
        count = 0

        # set accumalated gradients to zero
        acc_grad_weights, acc_grad_bias = self.make_accumalate_zero()

        # execute one epoch for all datapoints in train set
        for index in range(n):

            # perform forward propagation
            self.forward_propagation(input, index)
            predicted_y = self.post_activation["h" + str(self.L)]

            # compute loss
            loss_input += self.loss_function(predicted_y, actual_y[index])

            # perofrm backward propagation
            grad_weights, grad_bias = self.backward_propagation(index, actual_y)

            # compute the number of datapoints which are correctly classified
            indexWithMaxProb = np.argmax(predicted_y)
            if (actual_y[index] == (indexWithMaxProb)):
                count = count + 1

            # accumulate grad_weights and grad_bais for each input
            for (key, value) in grad_weights.items():
                acc_grad_weights[key] = acc_grad_weights[key] + \
                    grad_weights[key]

            for (key, value) in grad_bias.items():
                acc_grad_bias[key] = acc_grad_bias[key] + grad_bias[key]

            # update weights and bias if the number of datapoints in batch_size are divisble by batch_size
            if ((index + 1) % self.batch_size == 0):

                # update weights
                for (key, value) in self.weights.items():
                    self.weights[key] = self.weights[key] - ((self.learningRate / self.batch_size) * acc_grad_weights[key])
                
                # update bias
                for (key, value) in self.bias.items():
                    self.bias[key] = self.bias[key] - ((self.learningRate / self.batch_size) * acc_grad_bias[key])

                # set accumalated gradients to zero
                acc_grad_weights, acc_grad_bias = self.make_accumalate_zero()

        # if the number of datapoints in batch is not divisible by batch_size update weights and bias 
        if n % self.batch_size != 0:

            # update weights
            for (key, value) in self.weights.items():
                self.weights[key] = self.weights[key] - ((self.learningRate / self.batch_size) * acc_grad_weights[key])
            
            # update bias
            for (key, value) in self.bias.items():
                self.bias[key] = self.bias[key] - ((self.learningRate / self.batch_size) * acc_grad_bias[key])

            # set accumalated gradients to zero
            acc_grad_weights, acc_grad_bias = self.make_accumalate_zero()

        # compute trainAccuracy,trainLoss averaged over train size
        trainAccuracy = count / n
        trainLoss = loss_input / n + self.regularize()
        
        # compute validationAccuracy,validationLoss averaged over test size 
        validationLoss, validationAccuracy = self.computeTestLossAndAccuracy()
        
        return trainLoss, trainAccuracy, validationLoss, validationAccuracy

    # executes a single epoch of the FeedForward NN according to the optimizer function. 
    def executeOneEpoch(self,epoch):
        
        if self.optimizer == "sgd":
            return self.oneEpochSGD(epoch)

        if self.optimizer == "momentum":
            return self.oneEpochMOMENTUM(epoch)

        if self.optimizer == "nestrov":
            return self.oneEpochNAG(epoch)

        if self.optimizer == "rmsprop":
            return self.oneEpochRMSPROP(epoch)

        if self.optimizer == "adam":
            return self.oneEpochADAM(epoch)

        if self.optimizer == "nadam":
            return self.oneEpochNADAM(epoch)

        if self.optimizer == "gd":
            return self.oneEpochGD(epoch)

    # computes validation loss and validation accuracy 
    def computeTestLossAndAccuracy(self):
        
        validation_size = self.y_validate.shape[0]
        test_loss = 0
        count = 0

        input = self.x_validate
        actual_y = self.y_validate

        for index in range(0, validation_size):

            # perform forward propagation
            self.forward_propagation(input, index)
            predicted_y = self.post_activation["h" + str(self.L)]

            # compute loss
            test_loss += self.loss_function(predicted_y, actual_y[index])

            # compute the number of datapoints which are correctly classified
            indexWithMaxProb = np.argmax(predicted_y)
            if (actual_y[index] == (indexWithMaxProb)):
                count = count + 1

        # compute validationAccuracy,validationLoss averaged over validation size 
        validationAccuracy = count / validation_size
        validationLoss = test_loss / validation_size + self.regularize()
        
        return validationLoss, validationAccuracy

    '''<----------------------------Question 1------------------------------------->'''
    def question_1(self):

        wandb.init(
                # set the wandb project where this run will be logged
                project = feed_forward.parameters["wandb_project"],
                # config = sweep_config
        )

        # dictionary of labels to be added
        labels_added = {}

        ''' 
        Running the loop for the number of training samples.
        In each iteration,a random index is generated and we extract the feature and label at the generated index.
        If the label is already in the labels_added dictionary,we ignore that label,else we add that (label,feature) 
        as (key,value) pair in dictionary (so that one label is considered only once).
        '''
        images = []
        for i in range(self.train_n_samples):
            index = random.randrange(self.train_n_samples)
            feature = self. x_train[index]
            label = self.y_train[index]
            if (label in labels_added.keys()):
                continue
            labels_added[label] = feature
            image = wandb.Image(
                labels_added[label], caption=f"{self.title[label]}")
            images.append(image)
        wandb.log({"Images": images})
    
    '''<----------------------------Question 2------------------------>'''

    def feed_forward_q2(self): 
        # initialization of weights
        self.weightsAndBiasInitializer()
 
        # train the data
        for i in range(1, self.epoch + 1):
            (train_Loss, train_Accuracy, validation_Loss,
             validation_Accuracy) = self.executeOneEpoch(i)
            
        # generating the random index to test the model and finding the y for that
        index = np.random.randint(self.y_validate.shape[0])
        input = self.x_validate
        self.forward_propagation(input, index)
        predicted_y = self.post_activation["h" + str(self.L)]
        print(predicted_y)

    '''<----------------------------Question 3-4------------------------------------->'''
    def feed_forward_q3_4(self):
        
        self.weights = dict()
        self.bias = dict()
        self.wHistory = dict()
        self.bHistory = dict()
        self.wMomentum = dict()
        self.bMomentum = dict()
        
        # initialization of weights and bias
        self.weightsAndBiasInitializer()

        # initializing history for weights and bias
        self.historyInitializer()

        # initializing momentum for weights and bias
        self.momentumInitializer()

        self.validation_Accuracy = 0


        # run feedforward NN 
        for i in range(1, self.epoch + 1):
              (train_Loss, train_Accuracy, validation_Loss,self.validation_Accuracy) = self.executeOneEpoch(i)
              print("epoch:{epoch}, train loss:{train_l}, train accuracy:{train_ac}, validation loss:{validation_l}, validation accuracy:{validation_ac}".\
                  format(epoch = i,train_l = train_Loss,train_ac = train_Accuracy,validation_l = validation_Loss,validation_ac = self.validation_Accuracy))
            
              wandb.log({'train loss':train_Loss, 'train accuracy':train_Accuracy,'validation loss':validation_Loss, 'validation accuracy':self.validation_Accuracy})
          

In [None]:
feed_forward = FeedForward()

In [None]:
sweep_config = {

        'method' : 'random', #grid ,random - generates exponential ways,bayesian  efficient way
        'name' : 'random_sweep cross_entropy',
        'metric' : {
            'name' : 'validation accuracy',
            'goal' : 'maximize'
        },
        'parameters':{
                'epochs' : {
                    'values' : [5,10]
                },
                'number_of_hidden_layer':{
                    'values' : [3,4,5]
                },
                'size_of_hidden_layer' : {
                    'values' :[32,64,128]
                },
                'weight_decay' : {
                    'values' : [0,0.0005,0.5]
                },
                'learning_rate' : {
                    'values' : [1e-3,1e-4]
                },
                'optimizer' : {
                    'values' : ['sgd','momentum','nestrov','rmsprop','adam','nadam']
                },
                'batch_size' : {
                        'values' : [16,32,64]
                },
                'weight_initialization' :{
                    'values' : ['random','Xavier']
                },
                'activation' : {
                    'values' : ['sigmoid','tanh','ReLU']
                }
        }
}
sweep_id = wandb.sweep(sweep = sweep_config,project= feed_forward.parameters["wandb_project"])

Create sweep with ID: eayjxkwm
Sweep URL: https://wandb.ai/cs22m019/DL%20Final%20Assignment%201/sweeps/eayjxkwm


In [None]:
def train():
    wandb.init(
                # set the wandb project where this run will be logged
                # project = feed_forward.parameters["wandb_project"],
                config = sweep_config
    )
    
    feed_forward.epoch = wandb.config.epochs
    feed_forward.nnl = wandb.config.size_of_hidden_layer
    feed_forward.weightDecay =  wandb.config.weight_decay
    feed_forward.learningRate = wandb.config.learning_rate
    feed_forward.optimizer = wandb.config.optimizer
    feed_forward.batch_size = wandb.config.batch_size
    feed_forward.weightInitialization = wandb.config.weight_initialization
    feed_forward.activationFunction = wandb.config.activation
    feed_forward.L = wandb.config.number_of_hidden_layer + 1
    feed_forward.weight_decay = wandb.config.weight_decay


    wandb.run.name = "optimizer_" + str(wandb.config.optimizer) +  "_hl_"+ str(wandb.config.number_of_hidden_layer) + "_bs_" + str(wandb.config.batch_size) + "_ac_" + str(wandb.config.activation)    
    feed_forward.feed_forward_q3_4()

In [None]:
wandb.agent(sweep_id=sweep_id,function = train,count = 100)

[34m[1mwandb[0m: Agent Starting Run: cyzitirc with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	number_of_hidden_layer: 4
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	size_of_hidden_layer: 128
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_initialization: random
[34m[1mwandb[0m: Currently logged in as: [33mcs22m019[0m. Use [1m`wandb login --relogin`[0m to force relogin


epoch:1, train loss:3.4646655317465602, train accuracy:0.4955555555555556, validation loss:2.5479093518138494, validation accuracy:0.7005
epoch:2, train loss:2.3773906763168635, train accuracy:0.7407962962962963, validation loss:2.275765051386854, validation accuracy:0.7626666666666667
epoch:3, train loss:2.2209347964083976, train accuracy:0.7772222222222223, validation loss:2.188720368789107, validation accuracy:0.7843333333333333
epoch:4, train loss:2.1591311133235083, train accuracy:0.7943518518518519, validation loss:2.1441496096834376, validation accuracy:0.796
epoch:5, train loss:2.1231219431827646, train accuracy:0.8037777777777778, validation loss:2.1155363947294337, validation accuracy:0.804
epoch:6, train loss:2.099002168302847, train accuracy:0.8112962962962963, validation loss:2.0959157467005443, validation accuracy:0.8135
epoch:7, train loss:2.0806891663055342, train accuracy:0.8176111111111111, validation loss:2.0798355514074185, validation accuracy:0.8198333333333333
epo

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
train accuracy,▁▆▇▇▇█████
train loss,█▃▂▂▁▁▁▁▁▁
validation accuracy,▁▄▆▆▇▇████
validation loss,█▄▃▂▂▂▁▁▁▁

0,1
train accuracy,0.83002
train loss,2.04358
validation accuracy,0.82833
validation loss,2.0484


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: i7m64zdt with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	number_of_hidden_layer: 5
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	size_of_hidden_layer: 128
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_initialization: Xavier


epoch:1, train loss:2.516526195172978, train accuracy:0.09981481481481481, validation loss:2.4031773372882097, validation accuracy:0.10166666666666667
epoch:2, train loss:2.3579558982215674, train accuracy:0.09981481481481481, validation loss:2.332381770845464, validation accuracy:0.10166666666666667
epoch:3, train loss:2.318490593299313, train accuracy:0.09981481481481481, validation loss:2.308714840013463, validation accuracy:0.10166666666666667
epoch:4, train loss:2.3058995437951437, train accuracy:0.09898148148148148, validation loss:2.3047594933854674, validation accuracy:0.09733333333333333
epoch:5, train loss:2.303837470487052, train accuracy:0.09927777777777778, validation loss:2.3037648481941657, validation accuracy:0.0985
epoch:6, train loss:2.3033101194247734, train accuracy:0.09981481481481481, validation loss:2.3026443391031464, validation accuracy:0.101
epoch:7, train loss:2.3034662937566774, train accuracy:0.09812962962962964, validation loss:2.3030635130676496, validati

0,1
train accuracy,▆▆▆▃▄▆▁▂█▇
train loss,█▃▂▁▁▁▁▁▁▁
validation accuracy,███▁▃▇▁▂▂█
validation loss,█▃▁▁▁▁▁▁▁▁

0,1
train accuracy,0.10046
train loss,2.30288
validation accuracy,0.10167
validation loss,2.30272


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: wumrxwtc with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	number_of_hidden_layer: 3
[34m[1mwandb[0m: 	optimizer: nestrov
[34m[1mwandb[0m: 	size_of_hidden_layer: 128
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_initialization: random


epoch:1, train loss:4.310795188824373, train accuracy:0.2649444444444444, validation loss:3.099555981944458, validation accuracy:0.37633333333333335
epoch:2, train loss:2.6056349775420293, train accuracy:0.43575925925925924, validation loss:2.322498266123775, validation accuracy:0.4723333333333333
epoch:3, train loss:2.0702701917190063, train accuracy:0.5075, validation loss:1.9412264763272051, validation accuracy:0.5196666666666667
epoch:4, train loss:1.7778961285009258, train accuracy:0.5487037037037037, validation loss:1.7056289503974862, validation accuracy:0.5576666666666666
epoch:5, train loss:1.58903944810929, train accuracy:0.5786296296296296, validation loss:1.5522619157910997, validation accuracy:0.584


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
train accuracy,▁▅▆▇█
train loss,█▄▂▁▁
validation accuracy,▁▄▆▇█
validation loss,█▄▃▂▁

0,1
train accuracy,0.57863
train loss,1.58904
validation accuracy,0.584
validation loss,1.55226


[34m[1mwandb[0m: Agent Starting Run: mtj7ydle with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	number_of_hidden_layer: 4
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	size_of_hidden_layer: 32
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_initialization: random


epoch:1, train loss:2.8290512481780894, train accuracy:0.11638888888888889, validation loss:2.514173143109429, validation accuracy:0.1545
epoch:2, train loss:2.4131067235706967, train accuracy:0.16190740740740742, validation loss:2.3459861762517034, validation accuracy:0.17066666666666666
epoch:3, train loss:2.3144409780229522, train accuracy:0.1774074074074074, validation loss:2.291779711914448, validation accuracy:0.18416666666666667
epoch:4, train loss:2.2788259045678743, train accuracy:0.18748148148148147, validation loss:2.26914165938633, validation accuracy:0.18716666666666668
epoch:5, train loss:2.261886052129084, train accuracy:0.18851851851851853, validation loss:2.2562513603103063, validation accuracy:0.18633333333333332
epoch:6, train loss:2.2502314170408573, train accuracy:0.20096296296296295, validation loss:2.2458008657585804, validation accuracy:0.20533333333333334
epoch:7, train loss:2.2396713451695773, train accuracy:0.22872222222222222, validation loss:2.2354861202848

0,1
train accuracy,▁▃▃▄▄▄▅▆▇█
train loss,█▃▂▂▂▁▁▁▁▁
validation accuracy,▁▂▂▂▂▃▄▆▇█
validation loss,█▄▃▂▂▂▂▂▁▁

0,1
train accuracy,0.30417
train loss,2.20659
validation accuracy,0.30933
validation loss,2.20267


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: gpaxubnw with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	number_of_hidden_layer: 4
[34m[1mwandb[0m: 	optimizer: nestrov
[34m[1mwandb[0m: 	size_of_hidden_layer: 64
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_initialization: Xavier


epoch:1, train loss:2.348943569671913, train accuracy:0.16742592592592592, validation loss:2.2364740116138346, validation accuracy:0.21583333333333332
epoch:2, train loss:2.1127918593523183, train accuracy:0.31294444444444447, validation loss:2.0074516821657715, validation accuracy:0.33316666666666667
epoch:3, train loss:1.8930694265878776, train accuracy:0.3680925925925926, validation loss:1.7949629460487817, validation accuracy:0.3908333333333333
epoch:4, train loss:1.6875533534821265, train accuracy:0.42924074074074076, validation loss:1.5990277795172883, validation accuracy:0.4696666666666667
epoch:5, train loss:1.4991921993149164, train accuracy:0.5230555555555556, validation loss:1.4225221764016347, validation accuracy:0.5653333333333334
epoch:6, train loss:1.3340490540817234, train accuracy:0.5881481481481482, validation loss:1.2724074289263487, validation accuracy:0.612
epoch:7, train loss:1.1986832788893533, train accuracy:0.6268333333333334, validation loss:1.1528955015863307

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
train accuracy,▁▃▄▄▆▇▇▇██
train loss,█▇▆▅▄▃▂▂▁▁
validation accuracy,▁▃▄▅▆▇▇███
validation loss,█▇▆▅▄▃▂▂▁▁

0,1
train accuracy,0.69352
train loss,0.94879
validation accuracy,0.69917
validation loss,0.93392


[34m[1mwandb[0m: Agent Starting Run: gg64hfxg with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	number_of_hidden_layer: 4
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	size_of_hidden_layer: 32
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.01718333333337796, max=1.0)…

epoch:1, train loss:3.8477568106924775, train accuracy:0.1287962962962963, validation loss:3.8217363666158572, validation accuracy:0.137
epoch:2, train loss:3.727130290213669, train accuracy:0.1392962962962963, validation loss:3.7074827392317036, validation accuracy:0.147
epoch:3, train loss:3.617048166668471, train accuracy:0.14831481481481482, validation loss:3.5983297714212004, validation accuracy:0.1565
epoch:4, train loss:3.511323560966938, train accuracy:0.15772222222222224, validation loss:3.4917293294479452, validation accuracy:0.1665
epoch:5, train loss:3.409960766942312, train accuracy:0.1671111111111111, validation loss:3.386742538936665, validation accuracy:0.174
epoch:6, train loss:3.3126933591135286, train accuracy:0.17642592592592593, validation loss:3.2848733586250054, validation accuracy:0.18616666666666667
epoch:7, train loss:3.220084876835263, train accuracy:0.1847962962962963, validation loss:3.186725698959893, validation accuracy:0.19483333333333333
epoch:8, train 

0,1
train accuracy,▁▂▃▃▄▅▆▆▇█
train loss,█▇▆▅▅▄▃▂▂▁
validation accuracy,▁▂▃▄▄▅▆▇▇█
validation loss,█▇▆▅▅▄▃▂▂▁

0,1
train accuracy,0.21256
train loss,2.96613
validation accuracy,0.218
validation loss,2.92445


[34m[1mwandb[0m: Agent Starting Run: uuetym6i with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	number_of_hidden_layer: 3
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	size_of_hidden_layer: 32
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_initialization: random


epoch:1, train loss:13.842913611646214, train accuracy:0.13112962962962962, validation loss:11.894514711020648, validation accuracy:0.14233333333333334
epoch:2, train loss:9.663614192417326, train accuracy:0.1792037037037037, validation loss:7.54954754121756, validation accuracy:0.23433333333333334
epoch:3, train loss:6.464175905341793, train accuracy:0.2828333333333333, validation loss:5.430511114397283, validation accuracy:0.3318333333333333
epoch:4, train loss:4.941593743362424, train accuracy:0.36227777777777775, validation loss:4.347766261289536, validation accuracy:0.3978333333333333
epoch:5, train loss:4.074216697769403, train accuracy:0.41555555555555557, validation loss:3.692230678912875, validation accuracy:0.4405
epoch:6, train loss:3.5149645694099223, train accuracy:0.4551851851851852, validation loss:3.2552772250970277, validation accuracy:0.4708333333333333
epoch:7, train loss:3.1214084101341193, train accuracy:0.4852962962962963, validation loss:2.943241960163979, valida

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
train accuracy,▁▂▄▅▆▆▇▇██
train loss,█▅▃▃▂▂▁▁▁▁
validation accuracy,▁▃▄▅▆▇▇▇██
validation loss,█▅▃▂▂▂▁▁▁▁

0,1
train accuracy,0.54587
train loss,2.4555
validation accuracy,0.54933
validation loss,2.37849


[34m[1mwandb[0m: Agent Starting Run: 4nuyc05e with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	number_of_hidden_layer: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	size_of_hidden_layer: 128
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_initialization: random


epoch:1, train loss:15.198419391291157, train accuracy:0.16127777777777777, validation loss:12.017122215482166, validation accuracy:0.324
epoch:2, train loss:10.413269169982343, train accuracy:0.4080740740740741, validation loss:9.008316092785739, validation accuracy:0.48183333333333334
epoch:3, train loss:8.294774028153016, train accuracy:0.5215, validation loss:7.603201494881662, validation accuracy:0.5565
epoch:4, train loss:7.196939505456183, train accuracy:0.5811666666666667, validation loss:6.831999779330433, validation accuracy:0.6015
epoch:5, train loss:6.544271799674202, train accuracy:0.616574074074074, validation loss:6.317180809969057, validation accuracy:0.6251666666666666
epoch:6, train loss:6.105513865649647, train accuracy:0.6382777777777778, validation loss:5.936896479717863, validation accuracy:0.6448333333333334
epoch:7, train loss:5.76679056765485, train accuracy:0.6563518518518519, validation loss:5.717375599690552, validation accuracy:0.6595
epoch:8, train loss:5.

0,1
train accuracy,▁▄▆▇▇▇████
train loss,█▅▃▂▂▂▁▁▁▁
validation accuracy,▁▄▅▆▇▇▇███
validation loss,█▅▃▃▂▂▁▁▁▁

0,1
train accuracy,0.69054
train loss,5.12947
validation accuracy,0.68583
validation loss,5.23414


[34m[1mwandb[0m: Agent Starting Run: 0p2ukyxx with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	number_of_hidden_layer: 4
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	size_of_hidden_layer: 128
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_initialization: random


epoch:1, train loss:7.172381468214971, train accuracy:0.11672222222222223, validation loss:6.791726872016612, validation accuracy:0.12566666666666668
epoch:2, train loss:6.3711508821875364, train accuracy:0.14024074074074075, validation loss:6.098828679748419, validation accuracy:0.149
epoch:3, train loss:5.750926136150524, train accuracy:0.16605555555555557, validation loss:5.556688809284944, validation accuracy:0.17033333333333334
epoch:4, train loss:5.252161907159053, train accuracy:0.1925, validation loss:5.117775501553132, validation accuracy:0.19483333333333333
epoch:5, train loss:4.836441739107748, train accuracy:0.21914814814814815, validation loss:4.756274801348675, validation accuracy:0.22183333333333333


0,1
train accuracy,▁▃▄▆█
train loss,█▆▄▂▁
validation accuracy,▁▃▄▆█
validation loss,█▆▄▂▁

0,1
train accuracy,0.21915
train loss,4.83644
validation accuracy,0.22183
validation loss,4.75627


[34m[1mwandb[0m: Agent Starting Run: 24e7dnmt with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	number_of_hidden_layer: 5
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	size_of_hidden_layer: 64
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_initialization: Xavier


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.01693333333338766, max=1.0)…

epoch:1, train loss:2.4890993823376633, train accuracy:0.10025925925925926, validation loss:2.4963504765750475, validation accuracy:0.09766666666666667
epoch:2, train loss:2.4809526940112923, train accuracy:0.10025925925925926, validation loss:2.4877552538003744, validation accuracy:0.09766666666666667
epoch:3, train loss:2.473098972801817, train accuracy:0.10025925925925926, validation loss:2.4802722830601773, validation accuracy:0.09766666666666667
epoch:4, train loss:2.466418124215065, train accuracy:0.10025925925925926, validation loss:2.473915886355697, validation accuracy:0.09766666666666667
epoch:5, train loss:2.4602549857669063, train accuracy:0.10025925925925926, validation loss:2.4676792212993295, validation accuracy:0.09766666666666667


VBox(children=(Label(value='0.001 MB of 0.017 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.064932…

0,1
train accuracy,▁▁▁▁▁
train loss,█▆▄▂▁
validation accuracy,▁▁▁▁▁
validation loss,█▆▄▃▁

0,1
train accuracy,0.10026
train loss,2.46025
validation accuracy,0.09767
validation loss,2.46768


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: kju4r54k with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	number_of_hidden_layer: 5
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	size_of_hidden_layer: 32
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_initialization: Xavier


epoch:1, train loss:1.8265066150182916, train accuracy:0.3804074074074074, validation loss:1.4836440160347109, validation accuracy:0.5758333333333333
epoch:2, train loss:1.2910164281132213, train accuracy:0.6243703703703704, validation loss:1.151966794588502, validation accuracy:0.6526666666666666
epoch:3, train loss:1.0394137142429118, train accuracy:0.6838518518518518, validation loss:0.9602049753857101, validation accuracy:0.6995
epoch:4, train loss:0.8851259912419361, train accuracy:0.7222222222222222, validation loss:0.835449454644767, validation accuracy:0.7355
epoch:5, train loss:0.7809055208476896, train accuracy:0.7487962962962963, validation loss:0.7484574528844655, validation accuracy:0.7583333333333333
epoch:6, train loss:0.707653390036331, train accuracy:0.7697777777777778, validation loss:0.6862702962022694, validation accuracy:0.7793333333333333
epoch:7, train loss:0.6543697037620375, train accuracy:0.7853518518518519, validation loss:0.6399877498537672, validation accur

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
train accuracy,▁▅▆▇▇▇████
train loss,█▅▄▃▂▂▂▁▁▁
validation accuracy,▁▃▅▆▆▇▇███
validation loss,█▆▄▃▂▂▂▁▁▁

0,1
train accuracy,0.81352
train loss,0.55742
validation accuracy,0.8125
validation loss,0.55479


[34m[1mwandb[0m: Agent Starting Run: 3f2248yj with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	number_of_hidden_layer: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	size_of_hidden_layer: 32
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_initialization: Xavier


epoch:1, train loss:0.9045824071543623, train accuracy:0.7260555555555556, validation loss:0.5764394379614922, validation accuracy:0.8063333333333333
epoch:2, train loss:0.5270189789214371, train accuracy:0.818962962962963, validation loss:0.48881911539007855, validation accuracy:0.8298333333333333
epoch:3, train loss:0.47267785338572826, train accuracy:0.8348333333333333, validation loss:0.4609502006923699, validation accuracy:0.8373333333333334
epoch:4, train loss:0.4474272620768006, train accuracy:0.8426296296296296, validation loss:0.4391730446995484, validation accuracy:0.8433333333333334
epoch:5, train loss:0.42987315794529185, train accuracy:0.8486666666666667, validation loss:0.43190533388829955, validation accuracy:0.8471666666666666


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
train accuracy,▁▆▇██
train loss,█▂▂▁▁
validation accuracy,▁▅▆▇█
validation loss,█▄▂▁▁

0,1
train accuracy,0.84867
train loss,0.42987
validation accuracy,0.84717
validation loss,0.43191


[34m[1mwandb[0m: Agent Starting Run: juyqouud with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	number_of_hidden_layer: 3
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	size_of_hidden_layer: 32
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_initialization: Xavier


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.01718333333337796, max=1.0)…

epoch:1, train loss:2.35788450384889, train accuracy:0.09438888888888888, validation loss:2.307167149546513, validation accuracy:0.098
epoch:2, train loss:2.3029204613530676, train accuracy:0.08146296296296296, validation loss:2.3014109767321456, validation accuracy:0.0865
epoch:3, train loss:2.3004076495319867, train accuracy:0.10551851851851853, validation loss:2.3001056472989747, validation accuracy:0.11716666666666667
epoch:4, train loss:2.298986428996012, train accuracy:0.16816666666666666, validation loss:2.298547080670346, validation accuracy:0.17566666666666667
epoch:5, train loss:2.2976532778210883, train accuracy:0.19618518518518518, validation loss:2.297338090157325, validation accuracy:0.15066666666666667


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
train accuracy,▂▁▂▆█
train loss,█▂▁▁▁
validation accuracy,▂▁▃█▆
validation loss,█▄▃▂▁

0,1
train accuracy,0.19619
train loss,2.29765
validation accuracy,0.15067
validation loss,2.29734


[34m[1mwandb[0m: Agent Starting Run: le8f6erc with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	number_of_hidden_layer: 3
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	size_of_hidden_layer: 32
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_initialization: Xavier


epoch:1, train loss:2.3348283948764093, train accuracy:0.16461111111111112, validation loss:2.292149201908639, validation accuracy:0.18016666666666667
epoch:2, train loss:2.26754559924863, train accuracy:0.21109259259259258, validation loss:2.2482892131898873, validation accuracy:0.29033333333333333
epoch:3, train loss:2.228939420225654, train accuracy:0.3989259259259259, validation loss:2.2125288606362807, validation accuracy:0.4558333333333333
epoch:4, train loss:2.1924811198685172, train accuracy:0.45281481481481484, validation loss:2.1747641294308004, validation accuracy:0.43016666666666664
epoch:5, train loss:2.1521861591550473, train accuracy:0.4310185185185185, validation loss:2.132114833502227, validation accuracy:0.4195
epoch:6, train loss:2.106868436538925, train accuracy:0.42177777777777775, validation loss:2.084392343332688, validation accuracy:0.423
epoch:7, train loss:2.0562666957094082, train accuracy:0.42853703703703705, validation loss:2.0316504551686685, validation ac

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
train accuracy,▁▂▆▇▇▆▆▆▆█
train loss,█▇▆▆▅▄▄▃▂▁
validation accuracy,▁▃▇▇▆▆▆▆▇█
validation loss,█▇▇▆▅▅▄▃▂▁

0,1
train accuracy,0.50061
train loss,1.88472
validation accuracy,0.49017
validation loss,1.85804


[34m[1mwandb[0m: Agent Starting Run: qw3pv2iw with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	number_of_hidden_layer: 4
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	size_of_hidden_layer: 64
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_initialization: Xavier


epoch:1, train loss:2.375958223118448, train accuracy:0.13692592592592592, validation loss:2.3177462723168722, validation accuracy:0.19633333333333333
epoch:2, train loss:2.30278704836169, train accuracy:0.23387037037037037, validation loss:2.288769634254276, validation accuracy:0.20383333333333334
epoch:3, train loss:2.27282152620576, train accuracy:0.23057407407407407, validation loss:2.252483662801646, validation accuracy:0.25816666666666666
epoch:4, train loss:2.2227425049245535, train accuracy:0.26172222222222224, validation loss:2.1873970472425164, validation accuracy:0.22
epoch:5, train loss:2.1410643130211757, train accuracy:0.22361111111111112, validation loss:2.091152291740963, validation accuracy:0.21333333333333335


0,1
train accuracy,▁▆▆█▆
train loss,█▆▅▃▁
validation accuracy,▁▂█▄▃
validation loss,█▇▆▄▁

0,1
train accuracy,0.22361
train loss,2.14106
validation accuracy,0.21333
validation loss,2.09115


[34m[1mwandb[0m: Agent Starting Run: xzs2z8tv with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	number_of_hidden_layer: 3
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	size_of_hidden_layer: 128
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_initialization: Xavier


epoch:1, train loss:2.2862627410068863, train accuracy:0.1332037037037037, validation loss:2.2485058674458944, validation accuracy:0.17366666666666666
epoch:2, train loss:2.217472721742001, train accuracy:0.20859259259259258, validation loss:2.184735112309971, validation accuracy:0.24766666666666667
epoch:3, train loss:2.1582572420342614, train accuracy:0.2771111111111111, validation loss:2.1270050250549026, validation accuracy:0.31683333333333336
epoch:4, train loss:2.102043692543632, train accuracy:0.34025925925925926, validation loss:2.0711500414732096, validation accuracy:0.3735
epoch:5, train loss:2.04664201697291, train accuracy:0.39325925925925925, validation loss:2.0157250618924287, validation accuracy:0.4201666666666667
epoch:6, train loss:1.9907270351944766, train accuracy:0.4446111111111111, validation loss:1.9593912233522988, validation accuracy:0.4661666666666667
epoch:7, train loss:1.9331962983964357, train accuracy:0.48844444444444446, validation loss:1.901149096428332, 

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
train accuracy,▁▂▃▄▅▆▇▇██
train loss,█▇▆▆▅▄▃▃▂▁
validation accuracy,▁▂▃▄▅▆▇▇██
validation loss,█▇▆▆▅▄▃▃▂▁

0,1
train accuracy,0.58
train loss,1.74899
validation accuracy,0.58433
validation loss,1.71619


[34m[1mwandb[0m: Agent Starting Run: fvalbir5 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	number_of_hidden_layer: 4
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	size_of_hidden_layer: 64
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_initialization: random


epoch:1, train loss:5.109666034674665, train accuracy:0.10766666666666666, validation loss:4.633067021834829, validation accuracy:0.11916666666666667
epoch:2, train loss:4.2577011660296264, train accuracy:0.14631481481481481, validation loss:3.965286249413629, validation accuracy:0.1595
epoch:3, train loss:3.7066499196556277, train accuracy:0.1870925925925926, validation loss:3.526913460890357, validation accuracy:0.20466666666666666
epoch:4, train loss:3.3378125021615634, train accuracy:0.22566666666666665, validation loss:3.2171860499022094, validation accuracy:0.24083333333333334
epoch:5, train loss:3.067671410829698, train accuracy:0.2612037037037037, validation loss:2.9767266932217895, validation accuracy:0.2748333333333333
epoch:6, train loss:2.8549528265371147, train accuracy:0.292462962962963, validation loss:2.781559378828194, validation accuracy:0.3025
epoch:7, train loss:2.6810737918020418, train accuracy:0.3195925925925926, validation loss:2.6206259810479944, validation acc

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
train accuracy,▁▂▃▄▅▆▆▇▇█
train loss,█▆▅▄▃▂▂▂▁▁
validation accuracy,▁▂▃▄▅▆▆▇▇█
validation loss,█▆▅▄▃▃▂▂▁▁

0,1
train accuracy,0.38617
train loss,2.28941
validation accuracy,0.3915
validation loss,2.25278


[34m[1mwandb[0m: Agent Starting Run: p1iitxwo with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	number_of_hidden_layer: 3
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	size_of_hidden_layer: 32
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_initialization: Xavier


epoch:1, train loss:2.30163111564936, train accuracy:0.08944444444444444, validation loss:2.1785270504160756, validation accuracy:0.17766666666666667
epoch:2, train loss:2.093626796799813, train accuracy:0.2749814814814815, validation loss:2.019405595323854, validation accuracy:0.3466666666666667
epoch:3, train loss:1.9513880944271824, train accuracy:0.38692592592592595, validation loss:1.8922025153509305, validation accuracy:0.41733333333333333
epoch:4, train loss:1.8361217197764121, train accuracy:0.42677777777777776, validation loss:1.788646732003574, validation accuracy:0.4385
epoch:5, train loss:1.7415904518408614, train accuracy:0.45025925925925925, validation loss:1.7030234833847093, validation accuracy:0.45816666666666667


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
train accuracy,▁▅▇██
train loss,█▅▄▂▁
validation accuracy,▁▅▇██
validation loss,█▆▄▂▁

0,1
train accuracy,0.45026
train loss,1.74159
validation accuracy,0.45817
validation loss,1.70302


[34m[1mwandb[0m: Agent Starting Run: 56pghqpm with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	number_of_hidden_layer: 5
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	size_of_hidden_layer: 64
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_initialization: Xavier


epoch:1, train loss:2.549327112189583, train accuracy:0.10016666666666667, validation loss:2.508057653060533, validation accuracy:0.0985
epoch:2, train loss:2.4671590774854324, train accuracy:0.10016666666666667, validation loss:2.4403762614576734, validation accuracy:0.0985
epoch:3, train loss:2.412633541019467, train accuracy:0.10016666666666667, validation loss:2.394603493489624, validation accuracy:0.0985
epoch:4, train loss:2.3755259956109755, train accuracy:0.10016666666666667, validation loss:2.3632145392082036, validation accuracy:0.0985
epoch:5, train loss:2.3501647392937928, train accuracy:0.10016666666666667, validation loss:2.341855871494745, validation accuracy:0.0985
epoch:6, train loss:2.3330389872083153, train accuracy:0.10016666666666667, validation loss:2.3275374699290623, validation accuracy:0.0985
epoch:7, train loss:2.3216933008893696, train accuracy:0.10016666666666667, validation loss:2.318177162923713, validation accuracy:0.0985
epoch:8, train loss:2.31436180508

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
train accuracy,████████▇▁
train loss,█▆▄▃▂▂▁▁▁▁
validation accuracy,▁▁▁▁▁▁▁▁▁▁
validation loss,█▆▄▃▂▂▁▁▁▁

0,1
train accuracy,0.09869
train loss,2.30687
validation accuracy,0.0985
validation loss,2.30618


[34m[1mwandb[0m: Agent Starting Run: eqx5qgwr with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	number_of_hidden_layer: 3
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	size_of_hidden_layer: 32
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_initialization: random


epoch:1, train loss:3.4077912097236642, train accuracy:0.11025925925925927, validation loss:3.300402213088289, validation accuracy:0.1205
epoch:2, train loss:3.199502834357332, train accuracy:0.11537037037037037, validation loss:3.128511906794199, validation accuracy:0.12516666666666668
epoch:3, train loss:3.047060569481769, train accuracy:0.1262037037037037, validation loss:2.9967490392459495, validation accuracy:0.14066666666666666
epoch:4, train loss:2.9213071027860513, train accuracy:0.13720370370370372, validation loss:2.8830820408839104, validation accuracy:0.15133333333333332
epoch:5, train loss:2.814547125467131, train accuracy:0.14881481481481482, validation loss:2.78625054838437, validation accuracy:0.1605
epoch:6, train loss:2.7203475425115924, train accuracy:0.1627222222222222, validation loss:2.696190917810016, validation accuracy:0.1705
epoch:7, train loss:2.6368478222544103, train accuracy:0.1764814814814815, validation loss:2.619184589350095, validation accuracy:0.18183

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
train accuracy,▁▁▂▃▃▄▅▆▇█
train loss,█▇▅▅▄▃▂▂▁▁
validation accuracy,▁▁▂▃▄▄▅▆▇█
validation loss,█▇▆▅▄▃▃▂▁▁

0,1
train accuracy,0.21904
train loss,2.43431
validation accuracy,0.22683
validation loss,2.42454


[34m[1mwandb[0m: Agent Starting Run: 7lyg8ari with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	number_of_hidden_layer: 5
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	size_of_hidden_layer: 64
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_initialization: Xavier


epoch:1, train loss:2.5175655099801006, train accuracy:0.1002962962962963, validation loss:2.5020653903742156, validation accuracy:0.09733333333333333
epoch:2, train loss:2.485786343296177, train accuracy:0.1002962962962963, validation loss:2.4734267797246883, validation accuracy:0.09733333333333333
epoch:3, train loss:2.459771921422025, train accuracy:0.1002962962962963, validation loss:2.4492557432252675, validation accuracy:0.09733333333333333
epoch:4, train loss:2.4376299750483357, train accuracy:0.1002962962962963, validation loss:2.428501429155858, validation accuracy:0.09733333333333333
epoch:5, train loss:2.4184864629827545, train accuracy:0.1002962962962963, validation loss:2.4103799419114456, validation accuracy:0.09733333333333333
epoch:6, train loss:2.4018099657116214, train accuracy:0.1002962962962963, validation loss:2.394704111673766, validation accuracy:0.09733333333333333
epoch:7, train loss:2.387227692066835, train accuracy:0.1002962962962963, validation loss:2.380889

0,1
train accuracy,▁▁▁▁▁▁▁▁▁▁
train loss,█▇▆▅▄▃▂▂▁▁
validation accuracy,▁▁▁▁▁▁▁▁▁▁
validation loss,█▇▆▅▄▃▂▂▁▁

0,1
train accuracy,0.1003
train loss,2.35367
validation accuracy,0.09733
validation loss,2.34925


[34m[1mwandb[0m: Agent Starting Run: ryud70n3 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	number_of_hidden_layer: 5
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	size_of_hidden_layer: 32
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_initialization: Xavier


epoch:1, train loss:2.3364126528604565, train accuracy:0.09985185185185186, validation loss:2.301373506162104, validation accuracy:0.09816666666666667
epoch:2, train loss:2.2981551861953338, train accuracy:0.12651851851851853, validation loss:2.2946966282272645, validation accuracy:0.25283333333333335
epoch:3, train loss:2.2889821627809295, train accuracy:0.3351296296296296, validation loss:2.280731797487777, validation accuracy:0.2733333333333333
epoch:4, train loss:2.2664844169384453, train accuracy:0.40114814814814814, validation loss:2.245352419496103, validation accuracy:0.4875
epoch:5, train loss:2.2087520796733813, train accuracy:0.4426111111111111, validation loss:2.158821084612278, validation accuracy:0.4735


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
train accuracy,▁▂▆▇█
train loss,█▆▅▄▁
validation accuracy,▁▄▄██
validation loss,██▇▅▁

0,1
train accuracy,0.44261
train loss,2.20875
validation accuracy,0.4735
validation loss,2.15882


[34m[1mwandb[0m: Agent Starting Run: pkvkgbwz with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	number_of_hidden_layer: 3
[34m[1mwandb[0m: 	optimizer: nestrov
[34m[1mwandb[0m: 	size_of_hidden_layer: 32
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_initialization: Xavier


epoch:1, train loss:1.3951208787268123, train accuracy:0.5912592592592593, validation loss:1.003768449542712, validation accuracy:0.706
epoch:2, train loss:0.8579767966163608, train accuracy:0.7354074074074074, validation loss:0.7638807540001508, validation accuracy:0.7516666666666667
epoch:3, train loss:0.7036876468748892, train accuracy:0.7666666666666667, validation loss:0.6629032079734053, validation accuracy:0.7736666666666666
epoch:4, train loss:0.6254635777385608, train accuracy:0.7890925925925926, validation loss:0.6021940424162284, validation accuracy:0.7953333333333333
epoch:5, train loss:0.5749947934993026, train accuracy:0.8049814814814815, validation loss:0.5595235712488359, validation accuracy:0.8053333333333333
epoch:6, train loss:0.5392370272508553, train accuracy:0.8158518518518518, validation loss:0.5303805913703924, validation accuracy:0.8146666666666667
epoch:7, train loss:0.5135679622050263, train accuracy:0.8236851851851852, validation loss:0.5088737974511774, val

0,1
train accuracy,▁▅▆▇▇▇████
train loss,█▄▃▂▂▂▁▁▁▁
validation accuracy,▁▄▅▆▆▇▇███
validation loss,█▅▄▃▂▂▂▁▁▁

0,1
train accuracy,0.837
train loss,0.46548
validation accuracy,0.833
validation loss,0.46934


[34m[1mwandb[0m: Agent Starting Run: owyhkc2x with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	number_of_hidden_layer: 4
[34m[1mwandb[0m: 	optimizer: nestrov
[34m[1mwandb[0m: 	size_of_hidden_layer: 64
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_initialization: random


epoch:1, train loss:4.762405424373785, train accuracy:0.17685185185185184, validation loss:3.3980915799772164, validation accuracy:0.2855
epoch:2, train loss:2.848721665816979, train accuracy:0.37457407407407406, validation loss:2.6516386896266706, validation accuracy:0.4115
epoch:3, train loss:2.3901697625319485, train accuracy:0.4711296296296296, validation loss:2.3430194451117896, validation accuracy:0.4811666666666667
epoch:4, train loss:2.1687376118033352, train accuracy:0.5162222222222222, validation loss:2.162919886147529, validation accuracy:0.5211666666666667
epoch:5, train loss:2.0338130036399074, train accuracy:0.549074074074074, validation loss:2.0460095766947655, validation accuracy:0.5476666666666666


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
train accuracy,▁▅▇▇█
train loss,█▃▂▁▁
validation accuracy,▁▄▆▇█
validation loss,█▄▃▂▁

0,1
train accuracy,0.54907
train loss,2.03381
validation accuracy,0.54767
validation loss,2.04601


[34m[1mwandb[0m: Agent Starting Run: mesduglx with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	number_of_hidden_layer: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	size_of_hidden_layer: 128
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_initialization: Xavier


epoch:1, train loss:2.3848758362328972, train accuracy:0.14637037037037037, validation loss:2.276275342100096, validation accuracy:0.1785
epoch:2, train loss:2.190019428147666, train accuracy:0.21912962962962962, validation loss:2.104203359208313, validation accuracy:0.25933333333333336
epoch:3, train loss:2.0281246625552787, train accuracy:0.32614814814814813, validation loss:1.949683451962451, validation accuracy:0.37933333333333336
epoch:4, train loss:1.8761012940950048, train accuracy:0.4064814814814815, validation loss:1.8014707462001895, validation accuracy:0.42933333333333334
epoch:5, train loss:1.7293804893478308, train accuracy:0.4491851851851852, validation loss:1.6588720953604952, validation accuracy:0.47783333333333333
epoch:6, train loss:1.5889046463801542, train accuracy:0.5153703703703704, validation loss:1.5230561723253906, validation accuracy:0.5483333333333333
epoch:7, train loss:1.4571830761149842, train accuracy:0.576925925925926, validation loss:1.3987272208521293,

0,1
train accuracy,▁▂▃▅▅▆▇▇██
train loss,█▇▆▅▄▄▃▂▁▁
validation accuracy,▁▂▄▅▅▆▇▇██
validation loss,█▇▆▅▄▃▃▂▁▁

0,1
train accuracy,0.66306
train loss,1.14648
validation accuracy,0.66833
validation loss,1.11364


[34m[1mwandb[0m: Agent Starting Run: bop79nyt with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	number_of_hidden_layer: 3
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	size_of_hidden_layer: 32
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_initialization: random


epoch:1, train loss:3.843439837514582, train accuracy:0.10429629629629629, validation loss:3.177562330726887, validation accuracy:0.16816666666666666
epoch:2, train loss:2.7515835473462085, train accuracy:0.24674074074074073, validation loss:2.4742347127300137, validation accuracy:0.30666666666666664
epoch:3, train loss:2.243244537765749, train accuracy:0.3618703703703704, validation loss:2.127491356823681, validation accuracy:0.38466666666666666
epoch:4, train loss:1.9877026568415297, train accuracy:0.43096296296296294, validation loss:1.9400943291960155, validation accuracy:0.43283333333333335
epoch:5, train loss:1.8371756573160418, train accuracy:0.47224074074074074, validation loss:1.8174511433841825, validation accuracy:0.4696666666666667
epoch:6, train loss:1.7314520880645596, train accuracy:0.5022592592592593, validation loss:1.7240323168989853, validation accuracy:0.502
epoch:7, train loss:1.6497551999977491, train accuracy:0.5253888888888889, validation loss:1.6487416783840123

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
train accuracy,▁▃▅▆▆▇▇▇██
train loss,█▅▃▃▂▂▂▁▁▁
validation accuracy,▁▃▅▆▆▇▇▇██
validation loss,█▅▄▃▂▂▂▁▁▁

0,1
train accuracy,0.58104
train loss,1.47927
validation accuracy,0.576
validation loss,1.48694


[34m[1mwandb[0m: Agent Starting Run: ma86ew2f with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	number_of_hidden_layer: 3
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	size_of_hidden_layer: 64
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_initialization: Xavier


epoch:1, train loss:2.40655739518439, train accuracy:0.09775925925925925, validation loss:2.394841952010057, validation accuracy:0.104
epoch:2, train loss:2.392646950300214, train accuracy:0.09833333333333333, validation loss:2.3820202693122408, validation accuracy:0.10483333333333333
epoch:3, train loss:2.3805832712207864, train accuracy:0.09922222222222223, validation loss:2.370025743407411, validation accuracy:0.10666666666666667
epoch:4, train loss:2.3689930609988803, train accuracy:0.1004074074074074, validation loss:2.3594087582186707, validation accuracy:0.10816666666666666
epoch:5, train loss:2.357951983097815, train accuracy:0.10161111111111111, validation loss:2.3486571765328033, validation accuracy:0.11066666666666666


0,1
train accuracy,▁▂▄▆█
train loss,█▆▄▃▁
validation accuracy,▁▂▄▅█
validation loss,█▆▄▃▁

0,1
train accuracy,0.10161
train loss,2.35795
validation accuracy,0.11067
validation loss,2.34866


[34m[1mwandb[0m: Agent Starting Run: okbby18i with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	number_of_hidden_layer: 4
[34m[1mwandb[0m: 	optimizer: nestrov
[34m[1mwandb[0m: 	size_of_hidden_layer: 32
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_initialization: Xavier


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.01718333333337796, max=1.0)…

epoch:1, train loss:2.451725196600499, train accuracy:0.09916666666666667, validation loss:2.373397543053271, validation accuracy:0.1075
epoch:2, train loss:2.3496825762869338, train accuracy:0.10042592592592593, validation loss:2.313349524123999, validation accuracy:0.11283333333333333
epoch:3, train loss:2.298152120551395, train accuracy:0.11422222222222222, validation loss:2.268544090411716, validation accuracy:0.12816666666666668
epoch:4, train loss:2.2513571862614037, train accuracy:0.13418518518518519, validation loss:2.2245187614982633, validation accuracy:0.15433333333333332
epoch:5, train loss:2.205776014593563, train accuracy:0.1745925925925926, validation loss:2.18222067784589, validation accuracy:0.19833333333333333


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
train accuracy,▁▁▂▄█
train loss,█▅▄▂▁
validation accuracy,▁▁▃▅█
validation loss,█▆▄▃▁

0,1
train accuracy,0.17459
train loss,2.20578
validation accuracy,0.19833
validation loss,2.18222


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 9byphqcn with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	number_of_hidden_layer: 3
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	size_of_hidden_layer: 32
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_initialization: random


epoch:1, train loss:4.323431007524274, train accuracy:0.10825925925925926, validation loss:3.489414162619208, validation accuracy:0.145
epoch:2, train loss:3.223352540078132, train accuracy:0.1738888888888889, validation loss:2.9903436437869417, validation accuracy:0.20333333333333334
epoch:3, train loss:2.850834654733572, train accuracy:0.22127777777777777, validation loss:2.711653230841746, validation accuracy:0.24166666666666667
epoch:4, train loss:2.6034536887427215, train accuracy:0.25677777777777777, validation loss:2.4916155048735193, validation accuracy:0.2785
epoch:5, train loss:2.409566104188269, train accuracy:0.29453703703703704, validation loss:2.3273352720015814, validation accuracy:0.31783333333333336
epoch:6, train loss:2.2613397443648475, train accuracy:0.3315740740740741, validation loss:2.2008366885082906, validation accuracy:0.3516666666666667
epoch:7, train loss:2.143783640984242, train accuracy:0.3663888888888889, validation loss:2.1003709838945337, validation acc

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
train accuracy,▁▂▃▄▅▆▇▇██
train loss,█▅▄▃▂▂▂▁▁▁
validation accuracy,▁▂▃▄▅▆▇▇██
validation loss,█▆▅▄▃▂▂▂▁▁

0,1
train accuracy,0.43567
train loss,1.91471
validation accuracy,0.442
validation loss,1.898


[34m[1mwandb[0m: Agent Starting Run: ngsekokh with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	number_of_hidden_layer: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	size_of_hidden_layer: 128
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_initialization: Xavier


epoch:1, train loss:2.096072531638647, train accuracy:0.2826111111111111, validation loss:1.7949046446005217, validation accuracy:0.496
epoch:2, train loss:1.5415501556016276, train accuracy:0.5852962962962963, validation loss:1.3360009112418616, validation accuracy:0.6265
epoch:3, train loss:1.1671787489490482, train accuracy:0.6623888888888889, validation loss:1.0528546945040358, validation accuracy:0.6831666666666667
epoch:4, train loss:0.9538080210496311, train accuracy:0.7106296296296296, validation loss:0.8972778644904948, validation accuracy:0.7223333333333334
epoch:5, train loss:0.8369346362926013, train accuracy:0.7407037037037038, validation loss:0.8071754989090365, validation accuracy:0.747
epoch:6, train loss:0.7655884319597458, train accuracy:0.7581111111111111, validation loss:0.7480199492867999, validation accuracy:0.7645
epoch:7, train loss:0.7164422530102206, train accuracy:0.7709074074074074, validation loss:0.7034254225919949, validation accuracy:0.7738333333333334
e

VBox(children=(Label(value='0.001 MB of 0.024 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.044755…

0,1
train accuracy,▁▅▆▇▇█████
train loss,█▅▄▃▂▂▁▁▁▁
validation accuracy,▁▄▅▆▇▇████
validation loss,█▅▄▃▂▂▁▁▁▁

0,1
train accuracy,0.79387
train loss,0.62645
validation accuracy,0.795
validation loss,0.62127


[34m[1mwandb[0m: Agent Starting Run: eoq8u4t4 with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	number_of_hidden_layer: 4
[34m[1mwandb[0m: 	optimizer: nestrov
[34m[1mwandb[0m: 	size_of_hidden_layer: 128
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_initialization: Xavier


epoch:1, train loss:2.3474455285450517, train accuracy:0.10894444444444444, validation loss:2.302859604253967, validation accuracy:0.14183333333333334
epoch:2, train loss:2.2572247236022154, train accuracy:0.15757407407407406, validation loss:2.2111799890176953, validation accuracy:0.17766666666666667
epoch:3, train loss:2.1565314827180164, train accuracy:0.21924074074074074, validation loss:2.0997399778474333, validation accuracy:0.295
epoch:4, train loss:2.034402998616943, train accuracy:0.37183333333333335, validation loss:1.965762131502812, validation accuracy:0.43166666666666664
epoch:5, train loss:1.8904190889550965, train accuracy:0.4786666666666667, validation loss:1.8123558555160457, validation accuracy:0.5268333333333334
epoch:6, train loss:1.7326011227220373, train accuracy:0.5566296296296296, validation loss:1.653131409724841, validation accuracy:0.5803333333333334
epoch:7, train loss:1.5756204318547022, train accuracy:0.595574074074074, validation loss:1.5024608309702965, 

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
train accuracy,▁▂▂▄▆▇▇███
train loss,█▇▇▆▅▄▃▂▂▁
validation accuracy,▁▁▃▅▆▇▇███
validation loss,█▇▇▆▅▄▃▂▂▁

0,1
train accuracy,0.64752
train loss,1.20193
validation accuracy,0.64967
validation loss,1.16239


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: h2vkh6u2 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	number_of_hidden_layer: 5
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	size_of_hidden_layer: 64
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_initialization: random


epoch:1, train loss:3.741459322868261, train accuracy:0.17807407407407408, validation loss:2.6451878267648743, validation accuracy:0.288
epoch:2, train loss:2.1996123864531123, train accuracy:0.3695185185185185, validation loss:1.9051459099281507, validation accuracy:0.4205
epoch:3, train loss:1.680915092702389, train accuracy:0.4742037037037037, validation loss:1.5483737092728427, validation accuracy:0.5033333333333333
epoch:4, train loss:1.406512805533691, train accuracy:0.5436481481481481, validation loss:1.350994665099773, validation accuracy:0.5598333333333333
epoch:5, train loss:1.2441979158253371, train accuracy:0.5876481481481481, validation loss:1.2383680851981214, validation accuracy:0.5925
epoch:6, train loss:1.1387269617484768, train accuracy:0.6177037037037038, validation loss:1.1503511574471241, validation accuracy:0.619
epoch:7, train loss:1.0660890024980527, train accuracy:0.6414074074074074, validation loss:1.0902941864528164, validation accuracy:0.6365
epoch:8, train 

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
train accuracy,▁▄▅▆▇▇▇███
train loss,█▄▃▂▂▂▁▁▁▁
validation accuracy,▁▃▅▆▆▇▇███
validation loss,█▅▃▃▂▂▂▁▁▁

0,1
train accuracy,0.68735
train loss,0.9256
validation accuracy,0.67733
validation loss,0.9691


[34m[1mwandb[0m: Agent Starting Run: gww2ddpy with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	number_of_hidden_layer: 4
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	size_of_hidden_layer: 64
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_initialization: Xavier


epoch:1, train loss:2.4467728303602234, train accuracy:0.10025925925925926, validation loss:2.4163412544430636, validation accuracy:0.09766666666666667
epoch:2, train loss:2.3902280704685404, train accuracy:0.10025925925925926, validation loss:2.3705343292501704, validation accuracy:0.09766666666666667
epoch:3, train loss:2.3551707578254977, train accuracy:0.10025925925925926, validation loss:2.342614737581399, validation accuracy:0.09766666666666667
epoch:4, train loss:2.3335503660055927, train accuracy:0.10025925925925926, validation loss:2.3254616743975918, validation accuracy:0.09766666666666667
epoch:5, train loss:2.3203604231765116, train accuracy:0.10025925925925926, validation loss:2.3153624665342902, validation accuracy:0.09766666666666667


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
train accuracy,▁▁▁▁▁
train loss,█▅▃▂▁
validation accuracy,▁▁▁▁▁
validation loss,█▅▃▂▁

0,1
train accuracy,0.10026
train loss,2.32036
validation accuracy,0.09767
validation loss,2.31536


[34m[1mwandb[0m: Agent Starting Run: 2pikg7lg with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	number_of_hidden_layer: 5
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	size_of_hidden_layer: 32
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_initialization: random


epoch:1, train loss:13.288304550665778, train accuracy:0.2078888888888889, validation loss:12.118529448444614, validation accuracy:0.231
epoch:2, train loss:11.015883912223881, train accuracy:0.25033333333333335, validation loss:10.091632316087493, validation accuracy:0.2625
epoch:3, train loss:9.251884720770283, train accuracy:0.28853703703703704, validation loss:8.409768456074639, validation accuracy:0.29583333333333334
epoch:4, train loss:7.878333887668035, train accuracy:0.33037037037037037, validation loss:7.302071378456556, validation accuracy:0.35083333333333333
epoch:5, train loss:7.143665046480614, train accuracy:0.3431296296296296, validation loss:7.664473080528257, validation accuracy:0.31283333333333335
epoch:6, train loss:6.396528654368937, train accuracy:0.36177777777777775, validation loss:6.201067676053049, validation accuracy:0.38483333333333336
epoch:7, train loss:5.8856544832577695, train accuracy:0.38024074074074077, validation loss:5.574585996705699, validation acc

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
train accuracy,▁▃▄▅▆▆▇███
train loss,█▆▅▄▃▂▂▂▁▁
validation accuracy,▁▂▄▆▄▇▇▅██
validation loss,█▆▅▄▄▃▂▃▁▁

0,1
train accuracy,0.40507
train loss,4.71527
validation accuracy,0.40433
validation loss,4.4395


[34m[1mwandb[0m: Agent Starting Run: s0qiygt3 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	number_of_hidden_layer: 4
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	size_of_hidden_layer: 64
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_initialization: Xavier


epoch:1, train loss:1.5977072075592471, train accuracy:0.5012962962962964, validation loss:1.158342340123844, validation accuracy:0.6396666666666667
epoch:2, train loss:0.9792677249729006, train accuracy:0.6904074074074074, validation loss:0.8683213700688287, validation accuracy:0.7123333333333334
epoch:3, train loss:0.794282426179416, train accuracy:0.7455185185185185, validation loss:0.743167879876516, validation accuracy:0.759
epoch:4, train loss:0.701184411995598, train accuracy:0.7759259259259259, validation loss:0.6697126187599095, validation accuracy:0.7825
epoch:5, train loss:0.6432238668562004, train accuracy:0.7933148148148148, validation loss:0.6225798940598464, validation accuracy:0.7951666666666667


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
train accuracy,▁▆▇██
train loss,█▃▂▁▁
validation accuracy,▁▄▆▇█
validation loss,█▄▃▂▁

0,1
train accuracy,0.79331
train loss,0.64322
validation accuracy,0.79517
validation loss,0.62258


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: ek4kdcu7 with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	number_of_hidden_layer: 4
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	size_of_hidden_layer: 64
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_initialization: random


epoch:1, train loss:15.845626528555288, train accuracy:0.13033333333333333, validation loss:15.748550295793475, validation accuracy:0.135
epoch:2, train loss:15.575977752143986, train accuracy:0.14335185185185184, validation loss:15.543860579479745, validation accuracy:0.1455
epoch:3, train loss:15.364758404880941, train accuracy:0.15303703703703703, validation loss:15.367069568698515, validation accuracy:0.15233333333333332
epoch:4, train loss:15.193250812990554, train accuracy:0.16085185185185186, validation loss:15.234543309668354, validation accuracy:0.15816666666666668
epoch:5, train loss:15.022328373103841, train accuracy:0.16866666666666666, validation loss:15.09740843601582, validation accuracy:0.16333333333333333


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
train accuracy,▁▃▅▇█
train loss,█▆▄▂▁
validation accuracy,▁▄▅▇█
validation loss,█▆▄▂▁

0,1
train accuracy,0.16867
train loss,15.02233
validation accuracy,0.16333
validation loss,15.09741


[34m[1mwandb[0m: Agent Starting Run: utbk8xis with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	number_of_hidden_layer: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	size_of_hidden_layer: 64
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_initialization: Xavier
[34m[1mwandb[0m: W&B API key is configured. Use [1m`wandb login --relogin`[0m to force relogin


epoch:1, train loss:1.8515177251465336, train accuracy:0.421462962962963, validation loss:1.3722504983118426, validation accuracy:0.5521666666666667
epoch:2, train loss:1.1620972756152266, train accuracy:0.6239629629629629, validation loss:1.0079137463221732, validation accuracy:0.6645
epoch:3, train loss:0.8934457571616466, train accuracy:0.6915740740740741, validation loss:0.8092887234746327, validation accuracy:0.702
epoch:4, train loss:0.7474427976392886, train accuracy:0.7218518518518519, validation loss:0.7039508247682755, validation accuracy:0.7256666666666667
epoch:5, train loss:0.6683702433769881, train accuracy:0.7463333333333333, validation loss:0.6416460269123171, validation accuracy:0.7628333333333334


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
train accuracy,▁▅▇▇█
train loss,█▄▂▁▁
validation accuracy,▁▅▆▇█
validation loss,█▅▃▂▁

0,1
train accuracy,0.74633
train loss,0.66837
validation accuracy,0.76283
validation loss,0.64165


[34m[1mwandb[0m: Agent Starting Run: yg7wfdlm with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	number_of_hidden_layer: 5
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	size_of_hidden_layer: 32
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_initialization: Xavier


epoch:1, train loss:2.3357569806465786, train accuracy:0.18853703703703703, validation loss:2.2619104380510704, validation accuracy:0.268
epoch:2, train loss:2.183069977217907, train accuracy:0.2751296296296296, validation loss:2.083287075514385, validation accuracy:0.27466666666666667
epoch:3, train loss:1.9266044575145047, train accuracy:0.3473333333333333, validation loss:1.7406982935018565, validation accuracy:0.3973333333333333
epoch:4, train loss:1.506340775359178, train accuracy:0.4845555555555556, validation loss:1.301494297354987, validation accuracy:0.5268333333333334
epoch:5, train loss:1.1489709637048577, train accuracy:0.5678703703703704, validation loss:1.047792083699323, validation accuracy:0.6048333333333333
epoch:6, train loss:0.9774166406630918, train accuracy:0.6269814814814815, validation loss:0.9382655556934517, validation accuracy:0.6538333333333334
epoch:7, train loss:0.8933906552241165, train accuracy:0.6678703703703703, validation loss:0.8733716695738873, valid

VBox(children=(Label(value='0.001 MB of 0.018 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.059890…

0,1
train accuracy,▁▂▃▅▆▇▇▇██
train loss,█▇▆▄▃▂▂▁▁▁
validation accuracy,▁▁▃▅▆▇▇▇██
validation loss,█▇▆▄▂▂▂▁▁▁

0,1
train accuracy,0.73731
train loss,0.75467
validation accuracy,0.74267
validation loss,0.74813


[34m[1mwandb[0m: Agent Starting Run: 2xkhj06z with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	number_of_hidden_layer: 5
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	size_of_hidden_layer: 32
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_initialization: Xavier


epoch:1, train loss:2.3622834591072537, train accuracy:0.12055555555555555, validation loss:2.3162004604857125, validation accuracy:0.13483333333333333
epoch:2, train loss:2.2721682566759998, train accuracy:0.13092592592592592, validation loss:2.2310035394479324, validation accuracy:0.12216666666666667
epoch:3, train loss:2.185379573613461, train accuracy:0.13140740740740742, validation loss:2.1411271736130315, validation accuracy:0.1345
epoch:4, train loss:2.094897936243834, train accuracy:0.1464074074074074, validation loss:2.049659279478402, validation accuracy:0.15983333333333333
epoch:5, train loss:2.0010897493550526, train accuracy:0.1759074074074074, validation loss:1.9554272403638837, validation accuracy:0.1875


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
train accuracy,▁▂▂▄█
train loss,█▆▅▃▁
validation accuracy,▂▁▂▅█
validation loss,█▆▅▃▁

0,1
train accuracy,0.17591
train loss,2.00109
validation accuracy,0.1875
validation loss,1.95543


[34m[1mwandb[0m: Agent Starting Run: zlj5q826 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	number_of_hidden_layer: 4
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	size_of_hidden_layer: 32
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_initialization: Xavier


epoch:1, train loss:2.379501259298146, train accuracy:0.10025925925925926, validation loss:2.3002254805189715, validation accuracy:0.09766666666666667
epoch:2, train loss:2.2772081930159427, train accuracy:0.17909259259259258, validation loss:2.2467600832953223, validation accuracy:0.2575
epoch:3, train loss:2.1892227408530824, train accuracy:0.2750925925925926, validation loss:2.1128895386269773, validation accuracy:0.2831666666666667
epoch:4, train loss:2.016864285820821, train accuracy:0.2965925925925926, validation loss:1.9235001470632576, validation accuracy:0.284
epoch:5, train loss:1.8519501281283628, train accuracy:0.2670740740740741, validation loss:1.7969310653384538, validation accuracy:0.28633333333333333


0,1
train accuracy,▁▄▇█▇
train loss,█▇▅▃▁
validation accuracy,▁▇███
validation loss,█▇▅▃▁

0,1
train accuracy,0.26707
train loss,1.85195
validation accuracy,0.28633
validation loss,1.79693


[34m[1mwandb[0m: Agent Starting Run: zmukzfjf with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	number_of_hidden_layer: 3
[34m[1mwandb[0m: 	optimizer: nestrov
[34m[1mwandb[0m: 	size_of_hidden_layer: 32
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_initialization: Xavier


epoch:1, train loss:2.342779618037152, train accuracy:0.11101851851851852, validation loss:2.3062718278980623, validation accuracy:0.12183333333333334
epoch:2, train loss:2.2817511538936976, train accuracy:0.14601851851851852, validation loss:2.262424712247633, validation accuracy:0.16116666666666668
epoch:3, train loss:2.2421957829104984, train accuracy:0.18285185185185185, validation loss:2.2253402806501983, validation accuracy:0.193
epoch:4, train loss:2.204314239306621, train accuracy:0.21357407407407408, validation loss:2.1863681120781004, validation accuracy:0.21983333333333333
epoch:5, train loss:2.163893374383154, train accuracy:0.2380925925925926, validation loss:2.14502672150283, validation accuracy:0.2475


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
train accuracy,▁▃▅▇█
train loss,█▆▄▃▁
validation accuracy,▁▃▅▆█
validation loss,█▆▄▃▁

0,1
train accuracy,0.23809
train loss,2.16389
validation accuracy,0.2475
validation loss,2.14503


[34m[1mwandb[0m: Agent Starting Run: vdh6qxrq with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	number_of_hidden_layer: 4
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	size_of_hidden_layer: 64
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_initialization: Xavier


epoch:1, train loss:2.4022619557207876, train accuracy:0.06964814814814815, validation loss:2.321557857039888, validation accuracy:0.09316666666666666
epoch:2, train loss:2.2405146127382882, train accuracy:0.12603703703703703, validation loss:2.1763325236254114, validation accuracy:0.165
epoch:3, train loss:2.113341220835038, train accuracy:0.21768518518518518, validation loss:2.065154424010532, validation accuracy:0.26216666666666666
epoch:4, train loss:2.017257403604035, train accuracy:0.30133333333333334, validation loss:1.9843591360690993, validation accuracy:0.3471666666666667
epoch:5, train loss:1.9388052437725478, train accuracy:0.38885185185185184, validation loss:1.9101435380824698, validation accuracy:0.4201666666666667


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
train accuracy,▁▂▄▆█
train loss,█▆▄▂▁
validation accuracy,▁▃▅▆█
validation loss,█▆▄▂▁

0,1
train accuracy,0.38885
train loss,1.93881
validation accuracy,0.42017
validation loss,1.91014


[34m[1mwandb[0m: Agent Starting Run: xg3q57kk with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	number_of_hidden_layer: 4
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	size_of_hidden_layer: 64
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_initialization: random


epoch:1, train loss:5.233760419516087, train accuracy:0.1466851851851852, validation loss:4.487444590518825, validation accuracy:0.18916666666666668
epoch:2, train loss:3.803482605170405, train accuracy:0.23344444444444445, validation loss:3.4041395383733817, validation accuracy:0.26516666666666666
epoch:3, train loss:2.9514992179738995, train accuracy:0.3112037037037037, validation loss:2.733405286627455, validation accuracy:0.33616666666666667
epoch:4, train loss:2.416653016156378, train accuracy:0.3805925925925926, validation loss:2.30990549858255, validation accuracy:0.3998333333333333
epoch:5, train loss:2.074837067058369, train accuracy:0.4376481481481482, validation loss:2.0213757773721936, validation accuracy:0.45016666666666666
epoch:6, train loss:1.8373128939667274, train accuracy:0.48212962962962963, validation loss:1.8173881465479846, validation accuracy:0.4855
epoch:7, train loss:1.663095922724312, train accuracy:0.5176481481481482, validation loss:1.6662712636949437, vali

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
train accuracy,▁▂▄▅▆▆▇▇██
train loss,█▅▄▃▂▂▂▁▁▁
validation accuracy,▁▂▄▅▆▆▇▇██
validation loss,█▆▄▃▂▂▂▁▁▁

0,1
train accuracy,0.58702
train loss,1.34364
validation accuracy,0.577
validation loss,1.37229


[34m[1mwandb[0m: Agent Starting Run: 8t0zb0mj with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	number_of_hidden_layer: 4
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	size_of_hidden_layer: 64
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_initialization: random


epoch:1, train loss:5.089236087724614, train accuracy:0.1393148148148148, validation loss:4.499549046257074, validation accuracy:0.174
epoch:2, train loss:4.103857182558066, train accuracy:0.2207222222222222, validation loss:3.8472311750658523, validation accuracy:0.24783333333333332
epoch:3, train loss:3.5729184034140067, train accuracy:0.290962962962963, validation loss:3.4445753660055884, validation accuracy:0.305
epoch:4, train loss:3.2205359270018894, train accuracy:0.3469814814814815, validation loss:3.159360765154394, validation accuracy:0.355
epoch:5, train loss:2.9630491408685455, train accuracy:0.39122222222222225, validation loss:2.94102901793814, validation accuracy:0.3933333333333333


0,1
train accuracy,▁▃▅▇█
train loss,█▅▃▂▁
validation accuracy,▁▃▅▇█
validation loss,█▅▃▂▁

0,1
train accuracy,0.39122
train loss,2.96305
validation accuracy,0.39333
validation loss,2.94103


[34m[1mwandb[0m: Agent Starting Run: aq7dd0zi with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	number_of_hidden_layer: 5
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	size_of_hidden_layer: 32
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_initialization: Xavier


epoch:1, train loss:2.457155803316664, train accuracy:0.10077777777777777, validation loss:2.4478401194178185, validation accuracy:0.10366666666666667
epoch:2, train loss:2.449833064403763, train accuracy:0.10244444444444445, validation loss:2.440210685696269, validation accuracy:0.10633333333333334
epoch:3, train loss:2.442053196020216, train accuracy:0.10374074074074074, validation loss:2.4327337960626516, validation accuracy:0.10716666666666666
epoch:4, train loss:2.4347039385135405, train accuracy:0.10494444444444445, validation loss:2.4257184277718706, validation accuracy:0.108
epoch:5, train loss:2.427837049338262, train accuracy:0.1058888888888889, validation loss:2.4188684922305406, validation accuracy:0.108


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
train accuracy,▁▃▅▇█
train loss,█▆▄▃▁
validation accuracy,▁▅▇██
validation loss,█▆▄▃▁

0,1
train accuracy,0.10589
train loss,2.42784
validation accuracy,0.108
validation loss,2.41887


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: lz0p40af with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	number_of_hidden_layer: 5
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	size_of_hidden_layer: 64
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_initialization: Xavier


epoch:1, train loss:1.0271567557132246, train accuracy:0.6796111111111112, validation loss:0.6261505650247209, validation accuracy:0.7943333333333333
epoch:2, train loss:0.5562074564738859, train accuracy:0.8092962962962963, validation loss:0.5112464578980891, validation accuracy:0.82
epoch:3, train loss:0.4874112473901241, train accuracy:0.8283888888888888, validation loss:0.4734734732365749, validation accuracy:0.8305
epoch:4, train loss:0.4569643329758631, train accuracy:0.8377592592592593, validation loss:0.45396080022418067, validation accuracy:0.8365
epoch:5, train loss:0.43829597554806765, train accuracy:0.844037037037037, validation loss:0.43553663968135536, validation accuracy:0.8413333333333334
epoch:6, train loss:0.4243796320798082, train accuracy:0.8495925925925926, validation loss:0.4275000402765189, validation accuracy:0.8445
epoch:7, train loss:0.41288840003547, train accuracy:0.852925925925926, validation loss:0.4176260623507241, validation accuracy:0.8493333333333334
e

0,1
train accuracy,▁▆▇▇▇█████
train loss,█▃▂▂▂▁▁▁▁▁
validation accuracy,▁▄▅▆▆▇▇▇██
validation loss,█▄▃▃▂▂▂▁▁▁

0,1
train accuracy,0.86222
train loss,0.38814
validation accuracy,0.85717
validation loss,0.39665


[34m[1mwandb[0m: Agent Starting Run: t103z3ch with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	number_of_hidden_layer: 5
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	size_of_hidden_layer: 128
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_initialization: random


epoch:1, train loss:6.313606201520925, train accuracy:0.15394444444444444, validation loss:4.743921392801082, validation accuracy:0.223
epoch:2, train loss:3.962634966821459, train accuracy:0.29348148148148145, validation loss:3.5358775828963287, validation accuracy:0.33316666666666667
epoch:3, train loss:3.092473927879412, train accuracy:0.38951851851851854, validation loss:2.9286205227656494, validation accuracy:0.4085
epoch:4, train loss:2.603490439723596, train accuracy:0.4513888888888889, validation loss:2.5790725396916296, validation accuracy:0.456
epoch:5, train loss:2.2982697113280324, train accuracy:0.4937962962962963, validation loss:2.366526153743036, validation accuracy:0.4861666666666667
epoch:6, train loss:2.066256550466783, train accuracy:0.5266666666666666, validation loss:2.211393972824133, validation accuracy:0.5126666666666667
epoch:7, train loss:1.8918455722620724, train accuracy:0.5524259259259259, validation loss:2.101873339743165, validation accuracy:0.525
epoch:

VBox(children=(Label(value='0.001 MB of 0.025 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.044298…

0,1
train accuracy,▁▃▅▆▆▇▇▇██
train loss,█▅▃▃▂▂▂▁▁▁
validation accuracy,▁▃▅▆▆▇▇███
validation loss,█▅▄▃▂▂▂▁▁▁

0,1
train accuracy,0.60678
train loss,1.53156
validation accuracy,0.5585
validation loss,1.82921


[34m[1mwandb[0m: Agent Starting Run: 623gdafj with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	number_of_hidden_layer: 5
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	size_of_hidden_layer: 128
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_initialization: Xavier


epoch:1, train loss:1.5925864521694355, train accuracy:0.5255, validation loss:1.002966284048888, validation accuracy:0.6873333333333334
epoch:2, train loss:0.8546964839864766, train accuracy:0.7302962962962963, validation loss:0.7734224153685121, validation accuracy:0.7546666666666667
epoch:3, train loss:0.7220357952066052, train accuracy:0.7697962962962963, validation loss:0.6853931441481504, validation accuracy:0.7831666666666667
epoch:4, train loss:0.6540872242287099, train accuracy:0.7921666666666667, validation loss:0.6313512315052706, validation accuracy:0.7958333333333333
epoch:5, train loss:0.6074715973457316, train accuracy:0.8053333333333333, validation loss:0.5930215913305248, validation accuracy:0.8078333333333333


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
train accuracy,▁▆▇██
train loss,█▃▂▁▁
validation accuracy,▁▅▇▇█
validation loss,█▄▃▂▁

0,1
train accuracy,0.80533
train loss,0.60747
validation accuracy,0.80783
validation loss,0.59302


[34m[1mwandb[0m: Agent Starting Run: 1x4b5v5t with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	number_of_hidden_layer: 3
[34m[1mwandb[0m: 	optimizer: nestrov
[34m[1mwandb[0m: 	size_of_hidden_layer: 32
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_initialization: random


epoch:1, train loss:11.53761517369531, train accuracy:0.14196296296296296, validation loss:7.4445949496184705, validation accuracy:0.23816666666666667
epoch:2, train loss:5.625795181546251, train accuracy:0.2951666666666667, validation loss:4.565232952363553, validation accuracy:0.3238333333333333
epoch:3, train loss:4.055168678658599, train accuracy:0.33866666666666667, validation loss:3.6369329715500767, validation accuracy:0.3581666666666667
epoch:4, train loss:3.365691614431596, train accuracy:0.35755555555555557, validation loss:3.121817944699374, validation accuracy:0.36783333333333335
epoch:5, train loss:2.94600737874055, train accuracy:0.3701111111111111, validation loss:2.7888805486316253, validation accuracy:0.374
epoch:6, train loss:2.664122700688449, train accuracy:0.3764074074074074, validation loss:2.551561983354039, validation accuracy:0.387
epoch:7, train loss:2.458566586344847, train accuracy:0.3862037037037037, validation loss:2.368972903995162, validation accuracy:0.

0,1
train accuracy,▁▅▆▇▇▇▇▇██
train loss,█▄▂▂▂▁▁▁▁▁
validation accuracy,▁▄▅▆▆▇▇▇██
validation loss,█▄▃▂▂▂▁▁▁▁

0,1
train accuracy,0.41567
train loss,2.0622
validation accuracy,0.42617
validation loss,2.01804


[34m[1mwandb[0m: Agent Starting Run: aycmjj94 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	number_of_hidden_layer: 5
[34m[1mwandb[0m: 	optimizer: nestrov
[34m[1mwandb[0m: 	size_of_hidden_layer: 128
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_initialization: random


epoch:1, train loss:5.232025210623972, train accuracy:0.21572222222222223, validation loss:3.816683933046608, validation accuracy:0.31416666666666665
epoch:2, train loss:3.2297284685459173, train accuracy:0.3748888888888889, validation loss:2.904319652816501, validation accuracy:0.4096666666666667
epoch:3, train loss:2.537850944901987, train accuracy:0.46316666666666667, validation loss:2.450394141855014, validation accuracy:0.466
epoch:4, train loss:2.1549137066772257, train accuracy:0.5090740740740741, validation loss:2.12355733263511, validation accuracy:0.5085
epoch:5, train loss:1.8930939838287775, train accuracy:0.5461296296296296, validation loss:1.9464334500655371, validation accuracy:0.5383333333333333
epoch:6, train loss:1.726189364367999, train accuracy:0.5656296296296296, validation loss:1.8305590075649194, validation accuracy:0.5448333333333333
epoch:7, train loss:1.582941565525009, train accuracy:0.586462962962963, validation loss:1.7525060527647975, validation accuracy:0

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
train accuracy,▁▄▅▆▇▇▇███
train loss,█▄▃▂▂▂▁▁▁▁
validation accuracy,▁▃▅▆▇▇▇███
validation loss,█▅▄▃▂▂▂▁▁▁

0,1
train accuracy,0.6253
train loss,1.31605
validation accuracy,0.59183
validation loss,1.58159


In [None]:
wandb.agent(sweep_id=sweep_id,function = train,count = 50)

[34m[1mwandb[0m: Agent Starting Run: qg01qmg4 with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	number_of_hidden_layer: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	size_of_hidden_layer: 64
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_initialization: Xavier


epoch:1, train loss:1.8053795534416206, train accuracy:0.4621296296296296, validation loss:1.2928978666354087, validation accuracy:0.6206666666666667
epoch:2, train loss:1.0547027763203662, train accuracy:0.6679444444444445, validation loss:0.8970867606029355, validation accuracy:0.7021666666666667
epoch:3, train loss:0.8102002831978022, train accuracy:0.7321851851851852, validation loss:0.7460311328267183, validation accuracy:0.7541666666666667
epoch:4, train loss:0.7033200166607706, train accuracy:0.7661666666666667, validation loss:0.6675164080224869, validation accuracy:0.7771666666666667
epoch:5, train loss:0.6414647027433853, train accuracy:0.7871666666666667, validation loss:0.61845440539408, validation accuracy:0.7926666666666666


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
train accuracy,▁▅▇██
train loss,█▃▂▁▁
validation accuracy,▁▄▆▇█
validation loss,█▄▂▂▁

0,1
train accuracy,0.78717
train loss,0.64146
validation accuracy,0.79267
validation loss,0.61845


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: vhclu1mj with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	number_of_hidden_layer: 5
[34m[1mwandb[0m: 	optimizer: nestrov
[34m[1mwandb[0m: 	size_of_hidden_layer: 32
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_initialization: Xavier


epoch:1, train loss:2.4011265805869986, train accuracy:0.09964814814814815, validation loss:2.338328025879505, validation accuracy:0.10316666666666667
epoch:2, train loss:2.320015585971715, train accuracy:0.09964814814814815, validation loss:2.310520925883134, validation accuracy:0.10316666666666667
epoch:3, train loss:2.306137628947415, train accuracy:0.09964814814814815, validation loss:2.304539712810183, validation accuracy:0.10316666666666667
epoch:4, train loss:2.3034185753237395, train accuracy:0.09964814814814815, validation loss:2.3032858621062307, validation accuracy:0.10316666666666667
epoch:5, train loss:2.302900091961673, train accuracy:0.08444444444444445, validation loss:2.3029882420570043, validation accuracy:0.10316666666666667


VBox(children=(Label(value='0.001 MB of 0.017 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.064872…

0,1
train accuracy,████▁
train loss,█▂▁▁▁
validation accuracy,▁▁▁▁▁
validation loss,█▂▁▁▁

0,1
train accuracy,0.08444
train loss,2.3029
validation accuracy,0.10317
validation loss,2.30299


[34m[1mwandb[0m: Agent Starting Run: spainrg6 with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	number_of_hidden_layer: 3
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	size_of_hidden_layer: 32
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_initialization: Xavier


epoch:1, train loss:1.8616233183545612, train accuracy:0.36414814814814817, validation loss:1.3052747141450634, validation accuracy:0.6416666666666667
epoch:2, train loss:1.0445267115177699, train accuracy:0.6918333333333333, validation loss:0.8856326214883542, validation accuracy:0.7216666666666667
epoch:3, train loss:0.8051044689493412, train accuracy:0.7369629629629629, validation loss:0.7445593705986882, validation accuracy:0.7556666666666667
epoch:4, train loss:0.7037543162315352, train accuracy:0.762537037037037, validation loss:0.6733924002937796, validation accuracy:0.77
epoch:5, train loss:0.6455779807046045, train accuracy:0.7802777777777777, validation loss:0.6276727459673253, validation accuracy:0.7865
epoch:6, train loss:0.6082490659970148, train accuracy:0.7935925925925926, validation loss:0.5907470866960035, validation accuracy:0.8023333333333333
epoch:7, train loss:0.5815000300259521, train accuracy:0.8012407407407407, validation loss:0.5684417154335438, validation accu

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
train accuracy,▁▆▇▇▇█████
train loss,█▄▂▂▂▁▁▁▁▁
validation accuracy,▁▄▅▆▇▇▇███
validation loss,█▄▃▂▂▂▁▁▁▁

0,1
train accuracy,0.81707
train loss,0.53475
validation accuracy,0.8235
validation loss,0.52732


[34m[1mwandb[0m: Agent Starting Run: nkxyihe3 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	number_of_hidden_layer: 5
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	size_of_hidden_layer: 128
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_initialization: Xavier


epoch:1, train loss:2.5182470599387607, train accuracy:0.09916666666666667, validation loss:2.5017821041898674, validation accuracy:0.1075
epoch:2, train loss:2.5159024222699635, train accuracy:0.09916666666666667, validation loss:2.499732091502128, validation accuracy:0.1075
epoch:3, train loss:2.5137829059071524, train accuracy:0.09916666666666667, validation loss:2.497707460600966, validation accuracy:0.1075
epoch:4, train loss:2.5116592452719995, train accuracy:0.09916666666666667, validation loss:2.4955770861763726, validation accuracy:0.1075
epoch:5, train loss:2.5095256759322657, train accuracy:0.09916666666666667, validation loss:2.4935883630497506, validation accuracy:0.1075


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
train accuracy,▁▁▁▁▁
train loss,█▆▄▃▁
validation accuracy,▁▁▁▁▁
validation loss,█▆▅▃▁

0,1
train accuracy,0.09917
train loss,2.50953
validation accuracy,0.1075
validation loss,2.49359


[34m[1mwandb[0m: Agent Starting Run: xjem4row with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	number_of_hidden_layer: 5
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	size_of_hidden_layer: 64
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_initialization: random


epoch:1, train loss:2.347657330873966, train accuracy:0.38775925925925925, validation loss:1.5080252139990633, validation accuracy:0.5296666666666666
epoch:2, train loss:1.2240339239024811, train accuracy:0.5976851851851852, validation loss:1.1192261947231272, validation accuracy:0.6255
epoch:3, train loss:0.9794248288377243, train accuracy:0.6682407407407407, validation loss:0.9500160084835586, validation accuracy:0.6751666666666667
epoch:4, train loss:0.8670783756218997, train accuracy:0.7047962962962963, validation loss:0.8555294441414777, validation accuracy:0.703
epoch:5, train loss:0.7999305751134134, train accuracy:0.7272592592592593, validation loss:0.8030098932067326, validation accuracy:0.7238333333333333
epoch:6, train loss:0.7526100231139937, train accuracy:0.7432037037037037, validation loss:0.7636034548221105, validation accuracy:0.7333333333333333
epoch:7, train loss:0.7177263479501275, train accuracy:0.7551296296296296, validation loss:0.7400898253129322, validation acc

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
train accuracy,▁▅▆▇▇▇████
train loss,█▃▂▂▂▁▁▁▁▁
validation accuracy,▁▄▅▆▇▇▇███
validation loss,█▅▃▂▂▂▁▁▁▁

0,1
train accuracy,0.77852
train loss,0.64971
validation accuracy,0.76083
validation loss,0.68846


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 5a6ayofs with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	number_of_hidden_layer: 4
[34m[1mwandb[0m: 	optimizer: nestrov
[34m[1mwandb[0m: 	size_of_hidden_layer: 128
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_initialization: random


epoch:1, train loss:6.280001687396365, train accuracy:0.2462777777777778, validation loss:4.8764138387591505, validation accuracy:0.36566666666666664
epoch:2, train loss:4.31737089292088, train accuracy:0.43342592592592594, validation loss:4.060514100828444, validation accuracy:0.46016666666666667
epoch:3, train loss:3.756861050119973, train accuracy:0.5086666666666667, validation loss:3.7091357324075895, validation accuracy:0.5143333333333333
epoch:4, train loss:3.4645416479570823, train accuracy:0.550537037037037, validation loss:3.4990357595071937, validation accuracy:0.5456666666666666
epoch:5, train loss:3.2747659866733287, train accuracy:0.5792037037037037, validation loss:3.3562498661610904, validation accuracy:0.5618333333333333
epoch:6, train loss:3.13924101447944, train accuracy:0.6007962962962963, validation loss:3.2503362735602943, validation accuracy:0.5795
epoch:7, train loss:3.04051080701848, train accuracy:0.6170740740740741, validation loss:3.186352575913431, validatio

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
train accuracy,▁▄▆▆▇▇▇███
train loss,█▄▃▂▂▂▁▁▁▁
validation accuracy,▁▄▅▆▇▇▇███
validation loss,█▅▄▃▂▂▂▁▁▁

0,1
train accuracy,0.65017
train loss,2.8436
validation accuracy,0.61533
validation loss,3.03267


[34m[1mwandb[0m: Agent Starting Run: lxh0ww6y with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	number_of_hidden_layer: 5
[34m[1mwandb[0m: 	optimizer: nestrov
[34m[1mwandb[0m: 	size_of_hidden_layer: 32
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_initialization: Xavier


epoch:1, train loss:2.6174840875392946, train accuracy:0.09942592592592593, validation loss:2.5752971673838876, validation accuracy:0.10516666666666667
epoch:2, train loss:2.55608105420746, train accuracy:0.09942592592592593, validation loss:2.522245218723309, validation accuracy:0.10516666666666667
epoch:3, train loss:2.508223045118707, train accuracy:0.09942592592592593, validation loss:2.4808259883853196, validation accuracy:0.10516666666666667
epoch:4, train loss:2.470622175235105, train accuracy:0.09942592592592593, validation loss:2.448225506904744, validation accuracy:0.10516666666666667
epoch:5, train loss:2.4408542778237616, train accuracy:0.09942592592592593, validation loss:2.422379701440955, validation accuracy:0.10516666666666667
epoch:6, train loss:2.417115093378932, train accuracy:0.09942592592592593, validation loss:2.401758226506291, validation accuracy:0.10516666666666667
epoch:7, train loss:2.3980506346163604, train accuracy:0.09942592592592593, validation loss:2.385

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
train accuracy,▁▁▁▁▁▁▁▁▁▁
train loss,█▆▅▄▃▃▂▂▁▁
validation accuracy,▁▁▁▁▁▁▁▁▁▁
validation loss,█▆▅▄▃▃▂▂▁▁

0,1
train accuracy,0.09943
train loss,2.35996
validation accuracy,0.10517
validation loss,2.35213


[34m[1mwandb[0m: Agent Starting Run: rpv6rmfe with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	number_of_hidden_layer: 5
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	size_of_hidden_layer: 32
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_initialization: Xavier


epoch:1, train loss:2.008434458888477, train accuracy:0.31475925925925924, validation loss:1.7762711819365393, validation accuracy:0.442
epoch:2, train loss:1.6176383630136937, train accuracy:0.5136481481481482, validation loss:1.493867370014857, validation accuracy:0.5693333333333334
epoch:3, train loss:1.3753283623853456, train accuracy:0.6175555555555555, validation loss:1.2877981568172596, validation accuracy:0.6443333333333333
epoch:4, train loss:1.1958193146888567, train accuracy:0.6617037037037037, validation loss:1.1333884866087058, validation accuracy:0.6703333333333333
epoch:5, train loss:1.0630639493818905, train accuracy:0.6810925925925926, validation loss:1.0200958652268224, validation accuracy:0.6875


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
train accuracy,▁▅▇██
train loss,█▅▃▂▁
validation accuracy,▁▅▇██
validation loss,█▅▃▂▁

0,1
train accuracy,0.68109
train loss,1.06306
validation accuracy,0.6875
validation loss,1.0201


[34m[1mwandb[0m: Agent Starting Run: 8jjka6zr with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	number_of_hidden_layer: 4
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	size_of_hidden_layer: 64
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_initialization: random


epoch:1, train loss:7.342732343721093, train accuracy:0.5428333333333333, validation loss:5.916178920852496, validation accuracy:0.6008333333333333
epoch:2, train loss:5.124453815776008, train accuracy:0.6179814814814815, validation loss:4.598717972108851, validation accuracy:0.6373333333333333
epoch:3, train loss:4.230571068554207, train accuracy:0.6376481481481482, validation loss:3.868438075551492, validation accuracy:0.6531666666666667
epoch:4, train loss:3.6408662557390223, train accuracy:0.6495, validation loss:3.4505112537907863, validation accuracy:0.6561666666666667
epoch:5, train loss:3.196446117142419, train accuracy:0.6545185185185185, validation loss:3.0004764105538153, validation accuracy:0.6648333333333334
epoch:6, train loss:2.842889625790889, train accuracy:0.6612037037037037, validation loss:2.670509566168139, validation accuracy:0.6636666666666666
epoch:7, train loss:2.565170433149013, train accuracy:0.6652777777777777, validation loss:2.284270440206807, validation a

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
train accuracy,▁▅▆▇▇▇████
train loss,█▅▄▃▃▂▂▁▁▁
validation accuracy,▁▄▅▆▆▆▇█▇█
validation loss,█▆▄▄▃▂▂▂▁▁

0,1
train accuracy,0.6737
train loss,2.00477
validation accuracy,0.68333
validation loss,1.8775


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: iqugk5q7 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	number_of_hidden_layer: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	size_of_hidden_layer: 64
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_initialization: Xavier


epoch:1, train loss:2.563209561687526, train accuracy:0.09916666666666667, validation loss:2.5277030599162504, validation accuracy:0.1075
epoch:2, train loss:2.5132755236836424, train accuracy:0.09916666666666667, validation loss:2.48301303962576, validation accuracy:0.1075
epoch:3, train loss:2.470720705613719, train accuracy:0.09916666666666667, validation loss:2.4454305998385357, validation accuracy:0.1075
epoch:4, train loss:2.4354200851222574, train accuracy:0.09916666666666667, validation loss:2.4142455643276532, validation accuracy:0.1075
epoch:5, train loss:2.405967946521343, train accuracy:0.09916666666666667, validation loss:2.388489070743362, validation accuracy:0.1075


0,1
train accuracy,▁▁▁▁▁
train loss,█▆▄▂▁
validation accuracy,▁▁▁▁▁
validation loss,█▆▄▂▁

0,1
train accuracy,0.09917
train loss,2.40597
validation accuracy,0.1075
validation loss,2.38849


[34m[1mwandb[0m: Agent Starting Run: 949ca2oh with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	number_of_hidden_layer: 4
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	size_of_hidden_layer: 128
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_initialization: Xavier


epoch:1, train loss:2.5999166024344227, train accuracy:0.10016666666666667, validation loss:2.553715755354979, validation accuracy:0.0985
epoch:2, train loss:2.513007439862275, train accuracy:0.10016666666666667, validation loss:2.4771531777414424, validation accuracy:0.0985
epoch:3, train loss:2.4472886978919917, train accuracy:0.10016666666666667, validation loss:2.420280846702573, validation accuracy:0.0985
epoch:4, train loss:2.399177836304816, train accuracy:0.10016666666666667, validation loss:2.380307752979057, validation accuracy:0.0985
epoch:5, train loss:2.3666207873562968, train accuracy:0.10016666666666667, validation loss:2.353784429539059, validation accuracy:0.0985
epoch:6, train loss:2.3455083074897214, train accuracy:0.10016666666666667, validation loss:2.3375920186662227, validation accuracy:0.0985
epoch:7, train loss:2.332756414577171, train accuracy:0.10016666666666667, validation loss:2.327977927784999, validation accuracy:0.0985
epoch:8, train loss:2.3252553575853

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
train accuracy,▁▁▁▁▁▁▁▁▁█
train loss,█▆▄▃▂▂▁▁▁▁
validation accuracy,▁▁▁▁▁▁▁▁▂█
validation loss,█▆▄▃▂▂▁▁▁▁

0,1
train accuracy,0.13911
train loss,2.31736
validation accuracy,0.15617
validation loss,2.31611


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: iwu1o4vg with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	number_of_hidden_layer: 5
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	size_of_hidden_layer: 64
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_initialization: Xavier


epoch:1, train loss:2.326381195702258, train accuracy:0.12048148148148148, validation loss:2.2946397572472415, validation accuracy:0.17983333333333335
epoch:2, train loss:2.254986257612845, train accuracy:0.2121851851851852, validation loss:2.16755700600363, validation accuracy:0.198
epoch:3, train loss:1.991780447647674, train accuracy:0.20287037037037037, validation loss:1.8408874697788329, validation accuracy:0.20666666666666667
epoch:4, train loss:1.7691591628625216, train accuracy:0.21164814814814814, validation loss:1.7187578745070013, validation accuracy:0.2821666666666667
epoch:5, train loss:1.6608569855550817, train accuracy:0.29944444444444446, validation loss:1.6059113463244645, validation accuracy:0.32016666666666665


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
train accuracy,▁▅▄▅█
train loss,█▇▄▂▁
validation accuracy,▁▂▂▆█
validation loss,█▇▃▂▁

0,1
train accuracy,0.29944
train loss,1.66086
validation accuracy,0.32017
validation loss,1.60591


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: jtr8yxp2 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	number_of_hidden_layer: 5
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	size_of_hidden_layer: 128
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_initialization: Xavier


epoch:1, train loss:2.3275624209157852, train accuracy:0.10074074074074074, validation loss:2.3028605165660103, validation accuracy:0.09733333333333333
epoch:2, train loss:2.303018940432217, train accuracy:0.09757407407407408, validation loss:2.3029997800808686, validation accuracy:0.09433333333333334
epoch:3, train loss:2.3030623593056614, train accuracy:0.09801851851851852, validation loss:2.3030678887783225, validation accuracy:0.10166666666666667
epoch:4, train loss:2.3030138768026975, train accuracy:0.09764814814814815, validation loss:2.3027775462034628, validation accuracy:0.0985
epoch:5, train loss:2.303021345667703, train accuracy:0.09662962962962963, validation loss:2.3029983579574536, validation accuracy:0.09816666666666667


0,1
train accuracy,█▃▃▃▁
train loss,█▁▁▁▁
validation accuracy,▄▁█▅▅
validation loss,▃▆█▁▆

0,1
train accuracy,0.09663
train loss,2.30302
validation accuracy,0.09817
validation loss,2.303


[34m[1mwandb[0m: Agent Starting Run: eu0rna0f with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	number_of_hidden_layer: 3
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	size_of_hidden_layer: 64
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_initialization: random


epoch:1, train loss:3.268907856650145, train accuracy:0.28629629629629627, validation loss:2.230402002889449, validation accuracy:0.4191666666666667
epoch:2, train loss:1.8526118812380044, train accuracy:0.48862962962962964, validation loss:1.603229376632136, validation accuracy:0.5355
epoch:3, train loss:1.4534281917244114, train accuracy:0.570462962962963, validation loss:1.3413570388901843, validation accuracy:0.5941666666666666
epoch:4, train loss:1.2514663236184804, train accuracy:0.6162962962962963, validation loss:1.1938354376934044, validation accuracy:0.6278333333333334
epoch:5, train loss:1.120907028958186, train accuracy:0.6469444444444444, validation loss:1.093970462652309, validation accuracy:0.6583333333333333


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
train accuracy,▁▅▇▇█
train loss,█▃▂▁▁
validation accuracy,▁▄▆▇█
validation loss,█▄▃▂▁

0,1
train accuracy,0.64694
train loss,1.12091
validation accuracy,0.65833
validation loss,1.09397


[34m[1mwandb[0m: Agent Starting Run: s2fw7y04 with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	number_of_hidden_layer: 4
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	size_of_hidden_layer: 128
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_initialization: random


epoch:1, train loss:18.207073444080443, train accuracy:0.09659259259259259, validation loss:18.095112753597927, validation accuracy:0.10266666666666667
epoch:2, train loss:18.309228135012678, train accuracy:0.09055555555555556, validation loss:18.207679781874287, validation accuracy:0.0955
epoch:3, train loss:18.312127916192047, train accuracy:0.08957407407407407, validation loss:18.265330622170378, validation accuracy:0.092
epoch:4, train loss:18.045658253750485, train accuracy:0.10342592592592592, validation loss:17.860106218063752, validation accuracy:0.11266666666666666
epoch:5, train loss:17.454345394307936, train accuracy:0.1347962962962963, validation loss:17.285357175052102, validation accuracy:0.14133333333333334


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
train accuracy,▂▁▁▃█
train loss,▇██▆▁
validation accuracy,▃▁▁▄█
validation loss,▇██▅▁

0,1
train accuracy,0.1348
train loss,17.45435
validation accuracy,0.14133
validation loss,17.28536


[34m[1mwandb[0m: Agent Starting Run: bombnay3 with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	number_of_hidden_layer: 5
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	size_of_hidden_layer: 32
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_initialization: Xavier


epoch:1, train loss:1.2268117087757062, train accuracy:0.5930555555555556, validation loss:0.7324532595223541, validation accuracy:0.7483333333333333
epoch:2, train loss:0.6523133297462188, train accuracy:0.7826851851851852, validation loss:0.5903949980909616, validation accuracy:0.808
epoch:3, train loss:0.5638403125624808, train accuracy:0.8146296296296296, validation loss:0.5366487591505334, validation accuracy:0.818
epoch:4, train loss:0.524036637706117, train accuracy:0.8251296296296297, validation loss:0.506323928109196, validation accuracy:0.8278333333333333
epoch:5, train loss:0.5022875011461173, train accuracy:0.8318703703703704, validation loss:0.4951142698249425, validation accuracy:0.8355


VBox(children=(Label(value='0.001 MB of 0.024 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.045559…

0,1
train accuracy,▁▇▇██
train loss,█▂▂▁▁
validation accuracy,▁▆▇▇█
validation loss,█▄▂▁▁

0,1
train accuracy,0.83187
train loss,0.50229
validation accuracy,0.8355
validation loss,0.49511


[34m[1mwandb[0m: Agent Starting Run: mrfo3otd with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	number_of_hidden_layer: 3
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	size_of_hidden_layer: 32
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_initialization: random


epoch:1, train loss:4.643631148668726, train accuracy:0.26587037037037037, validation loss:2.370543482885009, validation accuracy:0.35133333333333333
epoch:2, train loss:2.024513859129343, train accuracy:0.40796296296296297, validation loss:1.8352575487606673, validation accuracy:0.43233333333333335
epoch:3, train loss:1.697166220090992, train accuracy:0.4605740740740741, validation loss:1.6287761117399562, validation accuracy:0.4653333333333333
epoch:4, train loss:1.5431866941162573, train accuracy:0.48924074074074075, validation loss:1.5126694348894896, validation accuracy:0.49
epoch:5, train loss:1.4481013499748738, train accuracy:0.5099814814814815, validation loss:1.4347144414982451, validation accuracy:0.5065


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
train accuracy,▁▅▇▇█
train loss,█▂▂▁▁
validation accuracy,▁▅▆▇█
validation loss,█▄▂▂▁

0,1
train accuracy,0.50998
train loss,1.4481
validation accuracy,0.5065
validation loss,1.43471


[34m[1mwandb[0m: Agent Starting Run: 9r6zplyp with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	number_of_hidden_layer: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	size_of_hidden_layer: 64
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_initialization: Xavier


epoch:1, train loss:2.5213410257545577, train accuracy:0.10433333333333333, validation loss:2.417328400936573, validation accuracy:0.1455
epoch:2, train loss:2.329732043501748, train accuracy:0.28024074074074073, validation loss:2.2826366729174663, validation accuracy:0.4085
epoch:3, train loss:2.2259415083837872, train accuracy:0.4677777777777778, validation loss:2.188316891360885, validation accuracy:0.4683333333333333
epoch:4, train loss:2.140099410427271, train accuracy:0.5257037037037037, validation loss:2.1016767561297285, validation accuracy:0.5433333333333333
epoch:5, train loss:2.049041094229509, train accuracy:0.5433148148148148, validation loss:2.0048222017814465, validation accuracy:0.5283333333333333


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
train accuracy,▁▄▇██
train loss,█▅▄▂▁
validation accuracy,▁▆▇██
validation loss,█▆▄▃▁

0,1
train accuracy,0.54331
train loss,2.04904
validation accuracy,0.52833
validation loss,2.00482


[34m[1mwandb[0m: Agent Starting Run: sbq3vmma with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	number_of_hidden_layer: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	size_of_hidden_layer: 32
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_initialization: Xavier


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016666666666666666, max=1.0…

epoch:1, train loss:2.574462437751123, train accuracy:0.045425925925925925, validation loss:2.4447769607125434, validation accuracy:0.0785
epoch:2, train loss:2.3437003360794466, train accuracy:0.09838888888888889, validation loss:2.2462712872122177, validation accuracy:0.129
epoch:3, train loss:2.1749877962457487, train accuracy:0.18485185185185185, validation loss:2.108755459141239, validation accuracy:0.24583333333333332
epoch:4, train loss:2.056096164221487, train accuracy:0.27770370370370373, validation loss:2.0046586484440048, validation accuracy:0.30983333333333335
epoch:5, train loss:1.9599728928239486, train accuracy:0.32655555555555554, validation loss:1.9164667558583346, validation accuracy:0.35033333333333333
epoch:6, train loss:1.8772246474956542, train accuracy:0.3607962962962963, validation loss:1.8388644939162784, validation accuracy:0.37916666666666665
epoch:7, train loss:1.8033096376516977, train accuracy:0.3897962962962963, validation loss:1.7689613333925953, validat

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
train accuracy,▁▂▃▅▆▆▇▇██
train loss,█▆▅▄▃▃▂▂▁▁
validation accuracy,▁▂▄▅▆▆▇▇██
validation loss,█▆▅▄▄▃▂▂▁▁

0,1
train accuracy,0.48074
train loss,1.62226
validation accuracy,0.49433
validation loss,1.59807


[34m[1mwandb[0m: Agent Starting Run: xv87g6cj with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	number_of_hidden_layer: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	size_of_hidden_layer: 64
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_initialization: random


epoch:1, train loss:6.271291040479149, train accuracy:0.09651851851851852, validation loss:6.167389465964502, validation accuracy:0.09416666666666666
epoch:2, train loss:5.967416413252413, train accuracy:0.10462962962962963, validation loss:5.8891544833827885, validation accuracy:0.10316666666666667
epoch:3, train loss:5.701336771661403, train accuracy:0.11335185185185186, validation loss:5.636239440716554, validation accuracy:0.11066666666666666
epoch:4, train loss:5.459133060078896, train accuracy:0.1247962962962963, validation loss:5.401868280296413, validation accuracy:0.12183333333333334
epoch:5, train loss:5.235519447786837, train accuracy:0.13598148148148148, validation loss:5.1867262294714305, validation accuracy:0.13416666666666666


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
train accuracy,▁▂▄▆█
train loss,█▆▄▃▁
validation accuracy,▁▃▄▆█
validation loss,█▆▄▃▁

0,1
train accuracy,0.13598
train loss,5.23552
validation accuracy,0.13417
validation loss,5.18673


[34m[1mwandb[0m: Agent Starting Run: oncsu3bl with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	number_of_hidden_layer: 4
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	size_of_hidden_layer: 128
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_initialization: Xavier


epoch:1, train loss:2.2058655855399394, train accuracy:0.2072037037037037, validation loss:1.9228904666929822, validation accuracy:0.22133333333333333
epoch:2, train loss:1.7096156793536237, train accuracy:0.35474074074074075, validation loss:1.5435631583052327, validation accuracy:0.3988333333333333
epoch:3, train loss:1.4033180491994701, train accuracy:0.4368148148148148, validation loss:1.3014316793406033, validation accuracy:0.4638333333333333
epoch:4, train loss:1.2225619746486696, train accuracy:0.5454814814814815, validation loss:1.1582296192541557, validation accuracy:0.579
epoch:5, train loss:1.0922482216427123, train accuracy:0.6105185185185186, validation loss:1.0368690730968952, validation accuracy:0.6081666666666666


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
train accuracy,▁▄▅▇█
train loss,█▅▃▂▁
validation accuracy,▁▄▅▇█
validation loss,█▅▃▂▁

0,1
train accuracy,0.61052
train loss,1.09225
validation accuracy,0.60817
validation loss,1.03687


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: gustixjv with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	number_of_hidden_layer: 5
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	size_of_hidden_layer: 32
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_initialization: random


epoch:1, train loss:8.17700950468422, train accuracy:0.3290925925925926, validation loss:5.5147123323035325, validation accuracy:0.416
epoch:2, train loss:4.376798856125634, train accuracy:0.4177222222222222, validation loss:3.829433640100108, validation accuracy:0.45166666666666666
epoch:3, train loss:3.341668899359643, train accuracy:0.4557222222222222, validation loss:3.038602852399537, validation accuracy:0.4771666666666667
epoch:4, train loss:2.75129304646247, train accuracy:0.47403703703703703, validation loss:2.6524207092995598, validation accuracy:0.4693333333333333
epoch:5, train loss:2.369018640824917, train accuracy:0.4925925925925926, validation loss:2.2410853231904406, validation accuracy:0.4995


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
train accuracy,▁▅▆▇█
train loss,█▃▂▁▁
validation accuracy,▁▄▆▅█
validation loss,█▄▃▂▁

0,1
train accuracy,0.49259
train loss,2.36902
validation accuracy,0.4995
validation loss,2.24109


[34m[1mwandb[0m: Agent Starting Run: a6jg9ikk with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	number_of_hidden_layer: 3
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	size_of_hidden_layer: 64
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_initialization: random


epoch:1, train loss:4.423111927834371, train accuracy:0.13807407407407407, validation loss:3.8832801364666607, validation accuracy:0.163
epoch:2, train loss:3.461468121668868, train accuracy:0.19, validation loss:3.1511104403036896, validation accuracy:0.2225
epoch:3, train loss:2.9052787607157304, train accuracy:0.2529814814814815, validation loss:2.7299112473857026, validation accuracy:0.2816666666666667
epoch:4, train loss:2.547606963027914, train accuracy:0.31016666666666665, validation loss:2.4517543331634015, validation accuracy:0.32416666666666666
epoch:5, train loss:2.2990842195750347, train accuracy:0.3516296296296296, validation loss:2.2430877822548605, validation accuracy:0.3606666666666667


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
train accuracy,▁▃▅▇█
train loss,█▅▃▂▁
validation accuracy,▁▃▅▇█
validation loss,█▅▃▂▁

0,1
train accuracy,0.35163
train loss,2.29908
validation accuracy,0.36067
validation loss,2.24309


[34m[1mwandb[0m: Agent Starting Run: qh5x74b4 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	number_of_hidden_layer: 4
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	size_of_hidden_layer: 128
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_initialization: Xavier


epoch:1, train loss:0.8207656060326195, train accuracy:0.748462962962963, validation loss:0.5488149149404719, validation accuracy:0.8165
epoch:2, train loss:0.5121170576404851, train accuracy:0.8279074074074074, validation loss:0.487484133450127, validation accuracy:0.8383333333333334
epoch:3, train loss:0.47078308568151667, train accuracy:0.8423703703703703, validation loss:0.46369492386802996, validation accuracy:0.8448333333333333
epoch:4, train loss:0.4489598766158118, train accuracy:0.8501481481481481, validation loss:0.4484013541291773, validation accuracy:0.8521666666666666
epoch:5, train loss:0.4338649350139555, train accuracy:0.8546666666666667, validation loss:0.4352116728172983, validation accuracy:0.8581666666666666
epoch:6, train loss:0.4226714760200267, train accuracy:0.8591481481481481, validation loss:0.4282077338773069, validation accuracy:0.8576666666666667
epoch:7, train loss:0.41340190348651146, train accuracy:0.8621481481481481, validation loss:0.42562686206638073,

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
train accuracy,▁▆▆▇▇▇████
train loss,█▃▂▂▂▁▁▁▁▁
validation accuracy,▁▄▅▆▇▇▇▇██
validation loss,█▅▄▃▂▂▂▁▁▁

0,1
train accuracy,0.86939
train loss,0.39337
validation accuracy,0.86617
validation loss,0.40719


[34m[1mwandb[0m: Agent Starting Run: qhe45b2x with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	number_of_hidden_layer: 3
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	size_of_hidden_layer: 128
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_initialization: Xavier


epoch:1, train loss:2.5373169332369567, train accuracy:0.10062962962962962, validation loss:2.535452741897114, validation accuracy:0.09433333333333334
epoch:2, train loss:2.528616107042852, train accuracy:0.10062962962962962, validation loss:2.5267437479288914, validation accuracy:0.09433333333333334
epoch:3, train loss:2.5203643415169843, train accuracy:0.10062962962962962, validation loss:2.5186323299306097, validation accuracy:0.09433333333333334
epoch:4, train loss:2.5122941039811546, train accuracy:0.10062962962962962, validation loss:2.5103832078068424, validation accuracy:0.09433333333333334
epoch:5, train loss:2.5047059653255928, train accuracy:0.10062962962962962, validation loss:2.503012224740562, validation accuracy:0.09433333333333334


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
train accuracy,▁▁▁▁▁
train loss,█▆▄▃▁
validation accuracy,▁▁▁▁▁
validation loss,█▆▄▃▁

0,1
train accuracy,0.10063
train loss,2.50471
validation accuracy,0.09433
validation loss,2.50301


[34m[1mwandb[0m: Agent Starting Run: 0o3ay8dw with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	number_of_hidden_layer: 3
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	size_of_hidden_layer: 32
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_initialization: random


epoch:1, train loss:2.2619093203259126, train accuracy:0.5490555555555555, validation loss:1.3590177214681627, validation accuracy:0.6266666666666667
epoch:2, train loss:1.2562010050087347, train accuracy:0.6593148148148148, validation loss:1.1720030462621915, validation accuracy:0.664
epoch:3, train loss:1.1421641541660932, train accuracy:0.6897962962962964, validation loss:1.105452945433611, validation accuracy:0.6896666666666667
epoch:4, train loss:1.0900483728541128, train accuracy:0.7057037037037037, validation loss:1.0626809881117887, validation accuracy:0.705
epoch:5, train loss:1.0566413035345787, train accuracy:0.7147962962962963, validation loss:1.0325045743593453, validation accuracy:0.7221666666666666
epoch:6, train loss:1.0316401966121238, train accuracy:0.7227777777777777, validation loss:1.0264187089301573, validation accuracy:0.7226666666666667
epoch:7, train loss:1.01182435679487, train accuracy:0.732925925925926, validation loss:1.003045616256673, validation accuracy:

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
train accuracy,▁▅▆▇▇▇▇███
train loss,█▃▂▂▁▁▁▁▁▁
validation accuracy,▁▃▄▅▆▆▇▇▇█
validation loss,█▅▄▃▂▂▂▁▁▁

0,1
train accuracy,0.74778
train loss,0.96853
validation accuracy,0.75417
validation loss,0.95946


[34m[1mwandb[0m: Agent Starting Run: rs28rwzj with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	number_of_hidden_layer: 4
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	size_of_hidden_layer: 128
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_initialization: random


epoch:1, train loss:6.904753775264803, train accuracy:0.2177037037037037, validation loss:5.101933639550905, validation accuracy:0.33666666666666667
epoch:2, train loss:4.430318132693298, train accuracy:0.418, validation loss:4.0219705033022315, validation accuracy:0.4663333333333333
epoch:3, train loss:3.6844496511863714, train accuracy:0.5184814814814814, validation loss:3.5428363553300914, validation accuracy:0.5303333333333333
epoch:4, train loss:3.325876748018163, train accuracy:0.5737407407407408, validation loss:3.289577254240397, validation accuracy:0.573
epoch:5, train loss:3.0991658618020557, train accuracy:0.6117777777777778, validation loss:3.1294448882763244, validation accuracy:0.596


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
train accuracy,▁▅▆▇█
train loss,█▃▂▁▁
validation accuracy,▁▄▆▇█
validation loss,█▄▂▂▁

0,1
train accuracy,0.61178
train loss,3.09917
validation accuracy,0.596
validation loss,3.12944


[34m[1mwandb[0m: Agent Starting Run: t3jpita7 with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	number_of_hidden_layer: 4
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	size_of_hidden_layer: 32
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_initialization: Xavier


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.01693333333338766, max=1.0)…

epoch:1, train loss:2.3192822775080364, train accuracy:0.10711111111111112, validation loss:2.247085002236906, validation accuracy:0.18233333333333332
epoch:2, train loss:2.1541142359551833, train accuracy:0.27011111111111114, validation loss:2.0497010597258076, validation accuracy:0.2966666666666667
epoch:3, train loss:1.8672829251504524, train accuracy:0.3537962962962963, validation loss:1.6738951095746895, validation accuracy:0.43033333333333335
epoch:4, train loss:1.4955108842604226, train accuracy:0.4980185185185185, validation loss:1.3436930805132936, validation accuracy:0.543
epoch:5, train loss:1.2246881373175782, train accuracy:0.5803888888888888, validation loss:1.126313809287739, validation accuracy:0.6118333333333333


VBox(children=(Label(value='0.001 MB of 0.018 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.062306…

0,1
train accuracy,▁▃▅▇█
train loss,█▇▅▃▁
validation accuracy,▁▃▅▇█
validation loss,█▇▄▂▁

0,1
train accuracy,0.58039
train loss,1.22469
validation accuracy,0.61183
validation loss,1.12631


[34m[1mwandb[0m: Agent Starting Run: 2li7l65j with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	number_of_hidden_layer: 5
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	size_of_hidden_layer: 32
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_initialization: Xavier


epoch:1, train loss:2.3952098830082194, train accuracy:0.10038888888888889, validation loss:2.3214168405518913, validation accuracy:0.0965
epoch:2, train loss:2.3096159304052932, train accuracy:0.10038888888888889, validation loss:2.3037894201479694, validation accuracy:0.0965
epoch:3, train loss:2.303069452877403, train accuracy:0.09116666666666666, validation loss:2.3029018255272087, validation accuracy:0.09266666666666666
epoch:4, train loss:2.302712865618344, train accuracy:0.09674074074074074, validation loss:2.3027475702700566, validation accuracy:0.09933333333333333
epoch:5, train loss:2.3026897984914982, train accuracy:0.09942592592592593, validation loss:2.302713273119261, validation accuracy:0.0965


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
train accuracy,██▁▅▇
train loss,█▂▁▁▁
validation accuracy,▅▅▁█▅
validation loss,█▁▁▁▁

0,1
train accuracy,0.09943
train loss,2.30269
validation accuracy,0.0965
validation loss,2.30271


[34m[1mwandb[0m: Agent Starting Run: imok1vu0 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	number_of_hidden_layer: 4
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	size_of_hidden_layer: 64
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_initialization: Xavier


epoch:1, train loss:1.1594346176842425, train accuracy:0.6467962962962963, validation loss:0.7416324269983023, validation accuracy:0.7638333333333334
epoch:2, train loss:0.6411748115072089, train accuracy:0.7962222222222223, validation loss:0.5770365772733332, validation accuracy:0.8108333333333333
epoch:3, train loss:0.543901024075331, train accuracy:0.8207962962962962, validation loss:0.5183450316545175, validation accuracy:0.8258333333333333
epoch:4, train loss:0.5030512885515749, train accuracy:0.8300740740740741, validation loss:0.4893821372498596, validation accuracy:0.8346666666666667
epoch:5, train loss:0.47909367286114823, train accuracy:0.8366296296296296, validation loss:0.47472277184136397, validation accuracy:0.8363333333333334
epoch:6, train loss:0.4618207160302802, train accuracy:0.8418888888888889, validation loss:0.45720528546817824, validation accuracy:0.8433333333333334
epoch:7, train loss:0.44856929634463955, train accuracy:0.8463888888888889, validation loss:0.4538

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
train accuracy,▁▆▇▇▇█████
train loss,█▃▂▂▂▁▁▁▁▁
validation accuracy,▁▅▆▇▇▇▇███
validation loss,█▄▃▂▂▂▁▁▁▁

0,1
train accuracy,0.85641
train loss,0.42119
validation accuracy,0.85267
validation loss,0.43229


[34m[1mwandb[0m: Agent Starting Run: w4aryy3u with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	number_of_hidden_layer: 5
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	size_of_hidden_layer: 32
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_initialization: Xavier


epoch:1, train loss:2.5636493394579225, train accuracy:0.10025925925925926, validation loss:2.5478392758320325, validation accuracy:0.09766666666666667
epoch:2, train loss:2.525375413874292, train accuracy:0.10025925925925926, validation loss:2.512859442314389, validation accuracy:0.09766666666666667
epoch:3, train loss:2.4942217536718934, train accuracy:0.10025925925925926, validation loss:2.4842412805266587, validation accuracy:0.09766666666666667
epoch:4, train loss:2.469399532442453, train accuracy:0.10025925925925926, validation loss:2.460843575756484, validation accuracy:0.09766666666666667
epoch:5, train loss:2.44630488067471, train accuracy:0.10025925925925926, validation loss:2.4381516936721814, validation accuracy:0.09766666666666667


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
train accuracy,▁▁▁▁▁
train loss,█▆▄▂▁
validation accuracy,▁▁▁▁▁
validation loss,█▆▄▂▁

0,1
train accuracy,0.10026
train loss,2.4463
validation accuracy,0.09767
validation loss,2.43815


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 648yq6cz with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	number_of_hidden_layer: 4
[34m[1mwandb[0m: 	optimizer: nestrov
[34m[1mwandb[0m: 	size_of_hidden_layer: 128
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016916666666656966, max=1.0…

epoch:1, train loss:7.217223025119752, train accuracy:0.11081481481481481, validation loss:6.242635174185044, validation accuracy:0.1445
epoch:2, train loss:5.529683477478802, train accuracy:0.18505555555555556, validation loss:5.081142033425685, validation accuracy:0.21333333333333335
epoch:3, train loss:4.60243736584688, train accuracy:0.24944444444444444, validation loss:4.366633593640764, validation accuracy:0.2708333333333333
epoch:4, train loss:4.0002215927301386, train accuracy:0.29894444444444446, validation loss:3.8964206850603844, validation accuracy:0.317
epoch:5, train loss:3.5912642725884707, train accuracy:0.33674074074074073, validation loss:3.5676684244725867, validation accuracy:0.35
epoch:6, train loss:3.297915589954475, train accuracy:0.37066666666666664, validation loss:3.323135942391556, validation accuracy:0.375
epoch:7, train loss:3.068395765595817, train accuracy:0.3967222222222222, validation loss:3.1165611832813522, validation accuracy:0.3985
epoch:8, train lo

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
train accuracy,▁▂▄▅▆▆▇▇██
train loss,█▅▄▃▃▂▂▁▁▁
validation accuracy,▁▃▄▅▆▆▇▇██
validation loss,█▆▄▃▃▂▂▂▁▁

0,1
train accuracy,0.45844
train loss,2.58147
validation accuracy,0.447
validation loss,2.67487


[34m[1mwandb[0m: Agent Starting Run: oml3kp1l with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	number_of_hidden_layer: 5
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	size_of_hidden_layer: 128
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_initialization: random


epoch:1, train loss:7.86766253936071, train accuracy:0.09916666666666667, validation loss:7.732500045092812, validation accuracy:0.09866666666666667
epoch:2, train loss:7.645394436101618, train accuracy:0.10325925925925926, validation loss:7.536640659365413, validation accuracy:0.10083333333333333
epoch:3, train loss:7.442715059265649, train accuracy:0.10762962962962963, validation loss:7.3530248547740875, validation accuracy:0.10616666666666667
epoch:4, train loss:7.2532012349981, train accuracy:0.11337037037037037, validation loss:7.1792904918759035, validation accuracy:0.11133333333333334
epoch:5, train loss:7.074048810494599, train accuracy:0.11768518518518518, validation loss:7.013181261197829, validation accuracy:0.11816666666666667
epoch:6, train loss:6.902788533580917, train accuracy:0.12190740740740741, validation loss:6.854303446221848, validation accuracy:0.12416666666666666
epoch:7, train loss:6.739428763327768, train accuracy:0.12718518518518518, validation loss:6.70156529

0,1
train accuracy,▁▂▂▃▄▅▅▆▇█
train loss,█▇▆▅▄▄▃▂▂▁
validation accuracy,▁▁▂▃▄▅▆▆▇█
validation loss,█▇▆▅▅▄▃▂▂▁

0,1
train accuracy,0.14346
train loss,6.2853
validation accuracy,0.144
validation loss,6.27814


[34m[1mwandb[0m: Agent Starting Run: 4ldsuho9 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	number_of_hidden_layer: 5
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	size_of_hidden_layer: 64
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_initialization: Xavier


epoch:1, train loss:1.105748168185566, train accuracy:0.671962962962963, validation loss:0.6904574010617053, validation accuracy:0.7688333333333334
epoch:2, train loss:0.5940922462173021, train accuracy:0.7978333333333333, validation loss:0.5333394662833808, validation accuracy:0.8166666666666667
epoch:3, train loss:0.505148665353876, train accuracy:0.8231851851851851, validation loss:0.48108818343464654, validation accuracy:0.8305
epoch:4, train loss:0.4688681363280223, train accuracy:0.8333703703703703, validation loss:0.46054217145643817, validation accuracy:0.8343333333333334
epoch:5, train loss:0.44869783975655847, train accuracy:0.8410555555555556, validation loss:0.447028324429163, validation accuracy:0.8381666666666666


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
train accuracy,▁▆▇██
train loss,█▃▂▁▁
validation accuracy,▁▆▇██
validation loss,█▃▂▁▁

0,1
train accuracy,0.84106
train loss,0.4487
validation accuracy,0.83817
validation loss,0.44703


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: gg76qs44 with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	number_of_hidden_layer: 5
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	size_of_hidden_layer: 128
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_initialization: Xavier


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016916666666656966, max=1.0…

epoch:1, train loss:2.3992675538885853, train accuracy:0.12968518518518518, validation loss:2.379015569217583, validation accuracy:0.137
epoch:2, train loss:2.388385169324054, train accuracy:0.13294444444444445, validation loss:2.3686159632450674, validation accuracy:0.14066666666666666
epoch:3, train loss:2.3781908600028476, train accuracy:0.1366851851851852, validation loss:2.359274968861321, validation accuracy:0.14366666666666666
epoch:4, train loss:2.3689300650628646, train accuracy:0.13996296296296296, validation loss:2.3506574243478737, validation accuracy:0.14916666666666667
epoch:5, train loss:2.360462673373256, train accuracy:0.14437037037037037, validation loss:2.34308734113815, validation accuracy:0.15416666666666667
epoch:6, train loss:2.35289687251549, train accuracy:0.15051851851851852, validation loss:2.3360767716335182, validation accuracy:0.15833333333333333
epoch:7, train loss:2.346003454768872, train accuracy:0.15518518518518518, validation loss:2.329666245573556, v

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
train accuracy,▁▂▂▃▃▄▅▆▇█
train loss,█▇▆▅▄▄▃▂▂▁
validation accuracy,▁▂▂▃▄▄▅▆▇█
validation loss,█▇▆▅▄▄▃▂▂▁

0,1
train accuracy,0.17483
train loss,2.3265
validation accuracy,0.18483
validation loss,2.31116


[34m[1mwandb[0m: Agent Starting Run: trzzg0kz with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	number_of_hidden_layer: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	size_of_hidden_layer: 32
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_initialization: Xavier


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016916666666656966, max=1.0…

epoch:1, train loss:2.4813696122610187, train accuracy:0.10292592592592592, validation loss:2.4594318703141904, validation accuracy:0.1325
epoch:2, train loss:2.433739107608591, train accuracy:0.1638148148148148, validation loss:2.4141274891379054, validation accuracy:0.1925
epoch:3, train loss:2.388189807159964, train accuracy:0.22025925925925927, validation loss:2.368312026358645, validation accuracy:0.24033333333333334
epoch:4, train loss:2.343724040494995, train accuracy:0.25833333333333336, validation loss:2.3246688024919533, validation accuracy:0.2668333333333333
epoch:5, train loss:2.2985342653942737, train accuracy:0.27505555555555555, validation loss:2.275470821429088, validation accuracy:0.2778333333333333
epoch:6, train loss:2.245920532492272, train accuracy:0.2828148148148148, validation loss:2.220045550622263, validation accuracy:0.2828333333333333
epoch:7, train loss:2.191134334785648, train accuracy:0.2869074074074074, validation loss:2.16405236291415, validation accurac

0,1
train accuracy,▁▃▅▆▇▇▇▇▇█
train loss,█▇▇▆▅▄▃▃▂▁
validation accuracy,▁▃▅▆▆▆▇▇▇█
validation loss,█▇▇▆▅▄▃▃▂▁

0,1
train accuracy,0.31891
train loss,2.03056
validation accuracy,0.32767
validation loss,2.00266


[34m[1mwandb[0m: Agent Starting Run: uv3ar3x0 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	number_of_hidden_layer: 3
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	size_of_hidden_layer: 128
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_initialization: random


epoch:1, train loss:5.998213635102006, train accuracy:0.1660925925925926, validation loss:4.456606847904178, validation accuracy:0.23633333333333334
epoch:2, train loss:3.8797046679859974, train accuracy:0.30214814814814817, validation loss:3.350594154462787, validation accuracy:0.3496666666666667
epoch:3, train loss:3.10432245984005, train accuracy:0.3849814814814815, validation loss:2.8211329851178113, validation accuracy:0.4206666666666667
epoch:4, train loss:2.676611159819857, train accuracy:0.43633333333333335, validation loss:2.4913446543788127, validation accuracy:0.46116666666666667
epoch:5, train loss:2.3845769468228495, train accuracy:0.47503703703703704, validation loss:2.250663976839915, validation accuracy:0.49083333333333334
epoch:6, train loss:2.171878747341731, train accuracy:0.5017037037037037, validation loss:2.0723366621178507, validation accuracy:0.5123333333333333
epoch:7, train loss:2.013139656332505, train accuracy:0.5238703703703703, validation loss:1.9358888518

VBox(children=(Label(value='0.001 MB of 0.025 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.044202…

0,1
train accuracy,▁▃▅▆▆▇▇▇██
train loss,█▅▃▃▂▂▂▁▁▁
validation accuracy,▁▃▅▆▆▇▇███
validation loss,█▅▄▃▂▂▂▁▁▁

0,1
train accuracy,0.57093
train loss,1.69907
validation accuracy,0.561
validation loss,1.66615


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: h7ysa1r0 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	number_of_hidden_layer: 4
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	size_of_hidden_layer: 32
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_initialization: random


epoch:1, train loss:3.2226724437878365, train accuracy:0.09981481481481481, validation loss:3.085129126553256, validation accuracy:0.10166666666666667
epoch:2, train loss:3.0216382334332454, train accuracy:0.09842592592592593, validation loss:2.9284535742601263, validation accuracy:0.09516666666666666
epoch:3, train loss:2.887141832223005, train accuracy:0.08792592592592592, validation loss:2.8193498177086913, validation accuracy:0.0855
epoch:4, train loss:2.7903919001175654, train accuracy:0.08727777777777777, validation loss:2.7388992430045027, validation accuracy:0.09733333333333333
epoch:5, train loss:2.717216602321056, train accuracy:0.10142592592592592, validation loss:2.6772387634188783, validation accuracy:0.1125
epoch:6, train loss:2.6601535930967763, train accuracy:0.11381481481481481, validation loss:2.6286477023253405, validation accuracy:0.12716666666666668
epoch:7, train loss:2.6141205354149757, train accuracy:0.12764814814814815, validation loss:2.5891833179067647, valid

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
train accuracy,▁▁▁▁▁▂▂▄▆█
train loss,█▆▅▄▃▂▂▂▁▁
validation accuracy,▂▁▁▁▂▂▃▅▇█
validation loss,█▆▅▄▃▃▂▂▁▁

0,1
train accuracy,0.29343
train loss,2.51626
validation accuracy,0.307
validation loss,2.50296


[34m[1mwandb[0m: Agent Starting Run: w1fcz1pk with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	number_of_hidden_layer: 4
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	size_of_hidden_layer: 64
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_initialization: Xavier


epoch:1, train loss:2.334327193032957, train accuracy:0.13314814814814815, validation loss:2.2986337477037777, validation accuracy:0.199
epoch:2, train loss:2.2881573188333313, train accuracy:0.1973148148148148, validation loss:2.2785170308078904, validation accuracy:0.21616666666666667
epoch:3, train loss:2.266038308682467, train accuracy:0.31537037037037036, validation loss:2.251493508881219, validation accuracy:0.30083333333333334
epoch:4, train loss:2.230610991123755, train accuracy:0.25735185185185183, validation loss:2.2061176960987674, validation accuracy:0.22216666666666668
epoch:5, train loss:2.174243125274672, train accuracy:0.2024074074074074, validation loss:2.138144416312107, validation accuracy:0.2085
epoch:6, train loss:2.0945699912551374, train accuracy:0.2046851851851852, validation loss:2.0477580681725405, validation accuracy:0.2185
epoch:7, train loss:1.997985405759847, train accuracy:0.21214814814814814, validation loss:1.948185767977008, validation accuracy:0.23333

0,1
train accuracy,▁▃▆▅▃▃▃▅▇█
train loss,█▇▇▇▆▅▄▃▂▁
validation accuracy,▁▂▅▂▁▂▂▆██
validation loss,██▇▇▆▅▄▃▂▁

0,1
train accuracy,0.37356
train loss,1.7173
validation accuracy,0.37683
validation loss,1.67996


[34m[1mwandb[0m: Agent Starting Run: 0mettzql with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	number_of_hidden_layer: 4
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	size_of_hidden_layer: 128
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_initialization: random


epoch:1, train loss:2.995844844962447, train accuracy:0.24003703703703705, validation loss:1.8997171597780453, validation accuracy:0.37483333333333335
epoch:2, train loss:1.7318775614464408, train accuracy:0.4414444444444444, validation loss:1.6047574346205968, validation accuracy:0.4825
epoch:3, train loss:1.4852503998671451, train accuracy:0.5312777777777777, validation loss:1.397017442951867, validation accuracy:0.5576666666666666
epoch:4, train loss:1.3039647330819115, train accuracy:0.5890555555555556, validation loss:1.2421933864729267, validation accuracy:0.6043333333333333
epoch:5, train loss:1.1676589086227016, train accuracy:0.6287037037037037, validation loss:1.125863331862587, validation accuracy:0.6388333333333334


VBox(children=(Label(value='0.001 MB of 0.017 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.064918…

0,1
train accuracy,▁▅▆▇█
train loss,█▃▂▂▁
validation accuracy,▁▄▆▇█
validation loss,█▅▃▂▁

0,1
train accuracy,0.6287
train loss,1.16766
validation accuracy,0.63883
validation loss,1.12586


[34m[1mwandb[0m: Agent Starting Run: u6qbwhu6 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	number_of_hidden_layer: 5
[34m[1mwandb[0m: 	optimizer: nestrov
[34m[1mwandb[0m: 	size_of_hidden_layer: 32
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_initialization: Xavier


epoch:1, train loss:2.4197478502734704, train accuracy:0.1002962962962963, validation loss:2.3415358037021865, validation accuracy:0.09733333333333333
epoch:2, train loss:2.3225903656203544, train accuracy:0.1002962962962963, validation loss:2.3087870189399977, validation accuracy:0.09733333333333333
epoch:3, train loss:2.3062414435349647, train accuracy:0.10031481481481481, validation loss:2.3034780021940207, validation accuracy:0.09733333333333333
epoch:4, train loss:2.303328372970446, train accuracy:0.10051851851851852, validation loss:2.3027961332126896, validation accuracy:0.0965
epoch:5, train loss:2.302787049150886, train accuracy:0.09911111111111111, validation loss:2.302718979399558, validation accuracy:0.1
epoch:6, train loss:2.3026960199230664, train accuracy:0.10053703703703704, validation loss:2.30276845325773, validation accuracy:0.0965
epoch:7, train loss:2.3026646218629248, train accuracy:0.0992962962962963, validation loss:2.3028144217379336, validation accuracy:0.0943

0,1
train accuracy,████▇█▇▇▁▅
train loss,█▂▁▁▁▁▁▁▁▁
validation accuracy,▅▅▅▄█▄▁▁▁▁
validation loss,█▂▁▁▁▁▁▁▁▁

0,1
train accuracy,0.09609
train loss,2.30266
validation accuracy,0.09433
validation loss,2.30281


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: ht6czeg0 with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	number_of_hidden_layer: 4
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	size_of_hidden_layer: 32
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_initialization: random


epoch:1, train loss:6.570953374421467, train accuracy:0.3459814814814815, validation loss:2.941842027904912, validation accuracy:0.49816666666666665
epoch:2, train loss:2.226631037217091, train accuracy:0.5575555555555556, validation loss:1.827706043873266, validation accuracy:0.593
epoch:3, train loss:1.5671668954969245, train accuracy:0.6255555555555555, validation loss:1.4398442177408641, validation accuracy:0.6353333333333333
epoch:4, train loss:1.2855763011026602, train accuracy:0.6600555555555555, validation loss:1.2398343163789602, validation accuracy:0.6636666666666666
epoch:5, train loss:1.118236044426667, train accuracy:0.6855925925925926, validation loss:1.111241728216603, validation accuracy:0.6881666666666667


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
train accuracy,▁▅▇▇█
train loss,█▂▂▁▁
validation accuracy,▁▄▆▇█
validation loss,█▄▂▁▁

0,1
train accuracy,0.68559
train loss,1.11824
validation accuracy,0.68817
validation loss,1.11124


[34m[1mwandb[0m: Agent Starting Run: pizspxin with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	number_of_hidden_layer: 3
[34m[1mwandb[0m: 	optimizer: nestrov
[34m[1mwandb[0m: 	size_of_hidden_layer: 64
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_initialization: Xavier


epoch:1, train loss:2.324501662769083, train accuracy:0.12807407407407406, validation loss:2.171327773013351, validation accuracy:0.17783333333333334
epoch:2, train loss:2.085224933926416, train accuracy:0.25996296296296295, validation loss:2.013597612709889, validation accuracy:0.32616666666666666
epoch:3, train loss:1.9476314817906462, train accuracy:0.3845, validation loss:1.8927748099382862, validation accuracy:0.4246666666666667
epoch:4, train loss:1.8347283701825843, train accuracy:0.4650185185185185, validation loss:1.7891488672198446, validation accuracy:0.4895
epoch:5, train loss:1.7364661605747103, train accuracy:0.5152777777777777, validation loss:1.6977754873143924, validation accuracy:0.5265
epoch:6, train loss:1.6495870428357828, train accuracy:0.5493518518518519, validation loss:1.6167695928634558, validation accuracy:0.5553333333333333
epoch:7, train loss:1.5727010575116267, train accuracy:0.5732222222222222, validation loss:1.5451841257019288, validation accuracy:0.574

0,1
train accuracy,▁▃▅▆▆▇▇███
train loss,█▆▅▄▄▃▂▂▁▁
validation accuracy,▁▃▅▆▆▇▇███
validation loss,█▇▆▅▄▃▂▂▁▁

0,1
train accuracy,0.62639
train loss,1.39165
validation accuracy,0.6225
validation loss,1.37618


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: dippvkev with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	number_of_hidden_layer: 4
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	size_of_hidden_layer: 128
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_initialization: random


epoch:1, train loss:5.071097246847524, train accuracy:0.23664814814814814, validation loss:3.376475893502702, validation accuracy:0.35333333333333333
epoch:2, train loss:2.848630087857752, train accuracy:0.42388888888888887, validation loss:2.4096673196267453, validation accuracy:0.48233333333333334
epoch:3, train loss:2.163052429971678, train accuracy:0.5178518518518519, validation loss:1.9967274234067238, validation accuracy:0.541
epoch:4, train loss:1.8117526291410906, train accuracy:0.5673148148148148, validation loss:1.7443339006960044, validation accuracy:0.5805
epoch:5, train loss:1.6016969246312933, train accuracy:0.6012407407407407, validation loss:1.5655146011997445, validation accuracy:0.6065
epoch:6, train loss:1.4501380443831182, train accuracy:0.6272777777777778, validation loss:1.4431153770251632, validation accuracy:0.6235
epoch:7, train loss:1.3394750527327626, train accuracy:0.644537037037037, validation loss:1.354831154929026, validation accuracy:0.6363333333333333
e

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
train accuracy,▁▄▅▆▇▇▇███
train loss,█▄▃▂▂▂▁▁▁▁
validation accuracy,▁▄▅▆▇▇▇███
validation loss,█▅▄▃▂▂▂▁▁▁

0,1
train accuracy,0.68561
train loss,1.10894
validation accuracy,0.666
validation loss,1.17644


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: swz1oklj with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	number_of_hidden_layer: 5
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	size_of_hidden_layer: 32
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_initialization: random


epoch:1, train loss:3.928239767482705, train accuracy:0.177, validation loss:3.3722060036776065, validation accuracy:0.2125
epoch:2, train loss:3.0885221575511728, train accuracy:0.23837037037037037, validation loss:2.8277848471426927, validation accuracy:0.2605
epoch:3, train loss:2.662973420676093, train accuracy:0.2846666666666667, validation loss:2.520756963124085, validation accuracy:0.30916666666666665
epoch:4, train loss:2.4012579801435847, train accuracy:0.32357407407407407, validation loss:2.315625188760219, validation accuracy:0.3421666666666667
epoch:5, train loss:2.2324312629872027, train accuracy:0.35774074074074075, validation loss:2.1756829154059263, validation accuracy:0.37033333333333335
epoch:6, train loss:2.1107268359385523, train accuracy:0.3894444444444444, validation loss:2.0731866531910987, validation accuracy:0.4036666666666667
epoch:7, train loss:2.013508302907423, train accuracy:0.41688888888888886, validation loss:1.9871774781650786, validation accuracy:0.432

0,1
train accuracy,▁▂▃▄▅▆▆▇██
train loss,█▅▄▃▂▂▂▁▁▁
validation accuracy,▁▂▃▄▅▆▆▇██
validation loss,█▆▄▃▃▂▂▂▁▁

0,1
train accuracy,0.49102
train loss,1.80199
validation accuracy,0.49483
validation loss,1.79805


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: v1oj7kil with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	number_of_hidden_layer: 3
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	size_of_hidden_layer: 128
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_initialization: random


epoch:1, train loss:7.491819580878889, train accuracy:0.11937037037037038, validation loss:6.752988256319971, validation accuracy:0.1265
epoch:2, train loss:6.117642245484619, train accuracy:0.14825925925925926, validation loss:5.6528520712105434, validation accuracy:0.16066666666666668
epoch:3, train loss:5.170565261042307, train accuracy:0.1874074074074074, validation loss:4.882881201549451, validation accuracy:0.19916666666666666
epoch:4, train loss:4.5299817197689, train accuracy:0.22742592592592592, validation loss:4.3527480991971546, validation accuracy:0.2385
epoch:5, train loss:4.085960318193108, train accuracy:0.2665925925925926, validation loss:3.9756651664896867, validation accuracy:0.27566666666666667


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
train accuracy,▁▂▄▆█
train loss,█▅▃▂▁
validation accuracy,▁▃▄▆█
validation loss,█▅▃▂▁

0,1
train accuracy,0.26659
train loss,4.08596
validation accuracy,0.27567
validation loss,3.97567


[34m[1mwandb[0m: Agent Starting Run: 43d172oy with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	number_of_hidden_layer: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	size_of_hidden_layer: 64
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_initialization: Xavier


epoch:1, train loss:1.498814671809957, train accuracy:0.5339444444444444, validation loss:1.0642586936756449, validation accuracy:0.6716666666666666
epoch:2, train loss:0.9180022118806388, train accuracy:0.709462962962963, validation loss:0.8206269122866364, validation accuracy:0.736
epoch:3, train loss:0.7584688810454766, train accuracy:0.7552962962962962, validation loss:0.7134772563729227, validation accuracy:0.7665
epoch:4, train loss:0.678054335273757, train accuracy:0.7787592592592593, validation loss:0.6512965468765702, validation accuracy:0.7823333333333333
epoch:5, train loss:0.6279178247721706, train accuracy:0.7935, validation loss:0.6107272409160803, validation accuracy:0.7965
epoch:6, train loss:0.5934774125064692, train accuracy:0.8038518518518518, validation loss:0.5800987895635525, validation accuracy:0.8036666666666666
epoch:7, train loss:0.5680713276853724, train accuracy:0.8103888888888889, validation loss:0.558557778490425, validation accuracy:0.8098333333333333
epo

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
train accuracy,▁▅▆▇▇█████
train loss,█▄▃▂▂▂▁▁▁▁
validation accuracy,▁▄▅▆▇▇▇███
validation loss,█▅▄▃▂▂▂▁▁▁

0,1
train accuracy,0.82317
train loss,0.51995
validation accuracy,0.82233
validation loss,0.5159


[34m[1mwandb[0m: Agent Starting Run: tablwsl9 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	number_of_hidden_layer: 4
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	size_of_hidden_layer: 32
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_initialization: random


epoch:1, train loss:4.1973742576639665, train accuracy:0.12261111111111112, validation loss:3.334376571914317, validation accuracy:0.18016666666666667
epoch:2, train loss:2.8860579661189325, train accuracy:0.24996296296296297, validation loss:2.564480800759343, validation accuracy:0.30316666666666664
epoch:3, train loss:2.3678298441653105, train accuracy:0.33446296296296296, validation loss:2.216962828396242, validation accuracy:0.362
epoch:4, train loss:2.1076403291113683, train accuracy:0.3867407407407407, validation loss:2.0163798497089296, validation accuracy:0.4105
epoch:5, train loss:1.9392973372742175, train accuracy:0.4315555555555556, validation loss:1.8725013043845915, validation accuracy:0.45


0,1
train accuracy,▁▄▆▇█
train loss,█▄▂▂▁
validation accuracy,▁▄▆▇█
validation loss,█▄▃▂▁

0,1
train accuracy,0.43156
train loss,1.9393
validation accuracy,0.45
validation loss,1.8725


[34m[1mwandb[0m: Agent Starting Run: m18zb2jy with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	number_of_hidden_layer: 3
[34m[1mwandb[0m: 	optimizer: nestrov
[34m[1mwandb[0m: 	size_of_hidden_layer: 64
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_initialization: Xavier


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016916666666656966, max=1.0…

epoch:1, train loss:2.3397719568456674, train accuracy:0.10433333333333333, validation loss:2.3137796876769494, validation accuracy:0.09433333333333334
epoch:2, train loss:2.311905550391048, train accuracy:0.13596296296296295, validation loss:2.3111905145820892, validation accuracy:0.133
epoch:3, train loss:2.3102004317950344, train accuracy:0.15974074074074074, validation loss:2.3094935376484838, validation accuracy:0.18816666666666668
epoch:4, train loss:2.308512156952198, train accuracy:0.16324074074074074, validation loss:2.3079884204789884, validation accuracy:0.212
epoch:5, train loss:2.3067480842523627, train accuracy:0.21344444444444444, validation loss:2.3061690589376607, validation accuracy:0.29833333333333334


0,1
train accuracy,▁▃▅▅█
train loss,█▂▂▁▁
validation accuracy,▁▂▄▅█
validation loss,█▆▄▃▁

0,1
train accuracy,0.21344
train loss,2.30675
validation accuracy,0.29833
validation loss,2.30617


[34m[1mwandb[0m: Agent Starting Run: xzqcyfxg with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	number_of_hidden_layer: 5
[34m[1mwandb[0m: 	optimizer: nestrov
[34m[1mwandb[0m: 	size_of_hidden_layer: 128
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_initialization: Xavier


epoch:1, train loss:2.335017821758593, train accuracy:0.09951851851851852, validation loss:2.3030432748213805, validation accuracy:0.0965
epoch:2, train loss:2.3027997501126403, train accuracy:0.09953703703703703, validation loss:2.3030052798092724, validation accuracy:0.0965
epoch:3, train loss:2.302774558442306, train accuracy:0.09933333333333333, validation loss:2.3028949656728313, validation accuracy:0.09433333333333334
epoch:4, train loss:2.302757287096922, train accuracy:0.09864814814814815, validation loss:2.302702168749957, validation accuracy:0.10166666666666667
epoch:5, train loss:2.3027606262349583, train accuracy:0.0995925925925926, validation loss:2.302889129324374, validation accuracy:0.09433333333333334
epoch:6, train loss:2.3027275888026986, train accuracy:0.0990925925925926, validation loss:2.3026822970726735, validation accuracy:0.10166666666666667
epoch:7, train loss:2.30275652896597, train accuracy:0.10011111111111111, validation loss:2.3028684913376014, validation 

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
train accuracy,▇▇▆▄▇▆█▁▅▃
train loss,█▁▁▁▁▁▁▁▁▁
validation accuracy,▂▂▁▆▁▆▇▁█▄
validation loss,█▇▅▁▅▁▅▅▁▂

0,1
train accuracy,0.09813
train loss,2.30274
validation accuracy,0.0985
validation loss,2.30273
