<a href="https://colab.research.google.com/github/RahulSundar/CS6910-DeepLearningFundamentals/blob/main/Assignment1/Assignment1_training_sweep_Fashion_MNIST.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install wandb




In [3]:
import numpy as np


def sigmoid(z):
    return 1.0 / (1 + np.exp(-(z)))


def tanh(z):
    return np.tanh(z)


def sin(z):
    return np.sin(z)


def relu(z):
    return (z>0)*(z) + ((z<0)*(z)*0.01)
    #return np.maximum(z,0)
    #return np.where(z<0, 0.01*z, z)

def softmax(Z):
    return np.exp(Z) / np.sum(np.exp(Z))


def der_sigmoid(z):
    return  (1.0 / (1 + np.exp(-(z))))*(1 -  1.0 / (1 + np.exp(-(z))))

def der_tanh(z):
    return 1 - np.tanh(z) ** 2


def der_relu(z):
    return (z>0)*np.ones(z.shape) + (z<0)*(0.01*np.ones(z.shape) )

In [9]:
import numpy as np
import scipy as sp
import wandb
import time

#import tensorflow.keras as tfkeras

class FeedForwardNeuralNetwork:
    def __init__(
        self, 
        num_hidden_layers, 
        num_hidden_neurons, 
        X_train_raw, 
        Y_train_raw,  
        N_train, 
        X_val_raw, 
        Y_val_raw, 
        N_val,
        X_test_raw, 
        Y_test_raw, 
        N_test,        
        optimizer,
        batch_size,
        weight_decay,
        learning_rate,
        max_epochs,
        activation,
        initializer,
        loss

    ):

        """
        Here, we initialize the FeedForwardNeuralNetwork class with the number of hidden layers, number of hidden neurons, raw training data. 
        """
        
        self.num_classes = np.max(Y_train_raw) + 1  # NUM_CLASSES
        self.num_hidden_layers = num_hidden_layers
        self.num_hidden_neurons = num_hidden_neurons
        self.output_layer_size = self.num_classes
        self.img_height = X_train_raw.shape[1]
        self.img_width = X_train_raw.shape[2]
        self.img_flattened_size = self.img_height * self.img_width

        # self.layers = layers
        self.layers = (
            [self.img_flattened_size]
            + num_hidden_layers * [num_hidden_neurons]
            + [self.output_layer_size]
        )

        self.N_train = N_train
        self.N_val = N_val
        self.N_test = N_test
        


        self.X_train = np.transpose(
            X_train_raw.reshape(
                X_train_raw.shape[0], X_train_raw.shape[1] * X_train_raw.shape[2]
            )
        )  # [IMG_HEIGHT*IMG_WIDTH X NTRAIN]
        self.X_test = np.transpose(
            X_test_raw.reshape(
                X_test_raw.shape[0], X_test_raw.shape[1] * X_test_raw.shape[2]
            )
        )  # [IMG_HEIGHT*IMG_WIDTH X NTRAIN]
        self.X_val = np.transpose(
            X_val_raw.reshape(
                X_val_raw.shape[0], X_val_raw.shape[1] * X_val_raw.shape[2]
            )
        )  # [IMG_HEIGHT*IMG_WIDTH X NTRAIN]


        self.X_train = self.X_train / 255
        self.X_test = self.X_test / 255
        self.X_val = self.X_val / 255
        
        self.Y_train = self.oneHotEncode(Y_train_raw)  # [NUM_CLASSES X NTRAIN]
        self.Y_val = self.oneHotEncode(Y_val_raw)
        self.Y_test = self.oneHotEncode(Y_test_raw)
        #self.Y_shape = self.Y_train.shape




        # self.weights, self.biases = self.initializeNeuralNet(self.layers)



        self.Activations_dict = {"SIGMOID": sigmoid, "TANH": tanh, "RELU": relu}
        self.DerActivation_dict = {
            "SIGMOID": der_sigmoid,
            "TANH": der_tanh,
            "RELU": der_relu,
        }

        self.Initializer_dict = {
            "XAVIER": self.Xavier_initializer,
            "RANDOM": self.random_initializer,
            "HE": self.He_initializer
        }

        self.Optimizer_dict = {
            "SGD": self.sgdMiniBatch,
            "MGD": self.mgd,
            "NAG": self.nag,
            "RMSPROP": self.rmsProp,
            "ADAM": self.adam,
            "NADAM": self.nadam,
        }
        
        self.activation = self.Activations_dict[activation]
        
        self.der_activation = self.DerActivation_dict[activation]
        
        self.optimizer = self.Optimizer_dict[optimizer]
        self.initializer = self.Initializer_dict[initializer]
        self.loss_function = loss
        self.max_epochs = max_epochs
        self.batch_size = batch_size
        self.learning_rate = learning_rate
        
        self.weights, self.biases = self.initializeNeuralNet(self.layers)


        
        
    # helper functions
    def oneHotEncode(self, Y_train_raw):
        Ydata = np.zeros((self.num_classes, Y_train_raw.shape[0]))
        for i in range(Y_train_raw.shape[0]):
            value = Y_train_raw[i]
            Ydata[int(value)][i] = 1.0
        return Ydata

    # Loss functions
    def meanSquaredErrorLoss(self, Y_true, Y_pred):
        MSE = np.mean((Y_true - Y_pred) ** 2)
        return MSE

    def crossEntropyLoss(self, Y_true, Y_pred):
        CE = [-Y_true[i] * np.log(Y_pred[i]) for i in range(len(Y_pred))]
        crossEntropy = np.mean(CE)
        return crossEntropy

    def L2RegularisationLoss(self, weight_decay):
        ALPHA = weight_decay
        return ALPHA * np.sum(
            [
                np.linalg.norm(self.weights[str(i + 1)]) ** 2
                for i in range(len(self.weights))
            ]
        )


    def accuracy(self, Y_true, Y_pred, data_size):
        Y_true_label = []
        Y_pred_label = []
        ctr = 0
        for i in range(data_size):
            Y_true_label.append(np.argmax(Y_true[:, i]))
            Y_pred_label.append(np.argmax(Y_pred[:, i]))
            if Y_true_label[i] == Y_pred_label[i]:
                ctr += 1
        accuracy = ctr / data_size
        return accuracy, Y_true_label, Y_pred_label

    def Xavier_initializer(self, size):
        in_dim = size[1]
        out_dim = size[0]
        xavier_stddev = np.sqrt(2 / (in_dim + out_dim))
        return np.random.normal(0, xavier_stddev, size=(out_dim, in_dim))

    #def Xavier_initializer(self, size):
    #    in_dim = size[1]
    #    out_dim = size[0]
    #    #xavier_stddev = np.sqrt(2 / (in_dim + out_dim))
    #    initializer = tfkeras.initializers.GlorotNormal()
    #    return initializer(shape=(out_dim, in_dim)).numpy()

    #def random_initializer(self, size):
    #    in_dim = size[1]
    #    out_dim = size[0]
    #    #xavier_stddev = np.sqrt(2 / (in_dim + out_dim))
    #    initializer = tfkeras.initializers.RandomNormal()
    #    return initializer(shape=(out_dim, in_dim)).numpy()

    def random_initializer(self, size):
        in_dim = size[1]
        out_dim = size[0]
        return np.random.normal(0, 1, size=(out_dim, in_dim))


    def He_initializer(self,size):
        in_dim = size[1]
        out_dim = size[0]
        He_stddev = np.sqrt(2 / (in_dim))
        return np.random.normal(0, 1, size=(out_dim, in_dim)) * He_stddev


    def initializeNeuralNet(self, layers):
        weights = {}
        biases = {}
        num_layers = len(layers)
        for l in range(0, num_layers - 1):
            W = self.initializer(size=[layers[l + 1], layers[l]])
            b = np.zeros((layers[l + 1], 1))
            weights[str(l + 1)] = W
            biases[str(l + 1)] = b
        return weights, biases

    def forwardPropagate(self, X_train_batch, weights, biases):
        """
        Returns the neural network given input data, weights, biases.
        Arguments:
                 : X - input matrix
                 : Weights  - Weights matrix
                 : biases - Bias vectors 
        """
        # Number of layers = length of weight matrix + 1
        num_layers = len(weights) + 1
        # A - Preactivations
        # H - Activations
        X = X_train_batch
        H = {}
        A = {}
        H["0"] = X
        A["0"] = X
        for l in range(0, num_layers - 2):
            if l == 0:
                W = weights[str(l + 1)]
                b = biases[str(l + 1)]
                A[str(l + 1)] = np.add(np.matmul(W, X), b)
                H[str(l + 1)] = self.activation(A[str(l + 1)])
            else:
                W = weights[str(l + 1)]
                b = biases[str(l + 1)]
                A[str(l + 1)] = np.add(np.matmul(W, H[str(l)]), b)
                H[str(l + 1)] = self.activation(A[str(l + 1)])

        # Here the last layer is not activated as it is a regression problem
        W = weights[str(num_layers - 1)]
        b = biases[str(num_layers - 1)]
        A[str(num_layers - 1)] = np.add(np.matmul(W, H[str(num_layers - 2)]), b)
        # Y = softmax(A[-1])
        Y = softmax(A[str(num_layers - 1)])
        H[str(num_layers - 1)] = Y
        return Y, H, A

    def backPropagate(
        self, Y, H, A, Y_train_batch, weight_decay=0
    ):

        ALPHA = weight_decay
        gradients_weights = []
        gradients_biases = []
        num_layers = len(self.layers)

        # Gradient with respect to the output layer is absolutely fine.
        if self.loss_function == "CROSS":
            globals()["grad_a" + str(num_layers - 1)] = -(Y_train_batch - Y)
        elif self.loss_function == "MSE":
            globals()["grad_a" + str(num_layers - 1)] = np.multiply(
                2 * (Y - Y_train_batch), np.multiply(Y, (1 - Y))
            )

        for l in range(num_layers - 2, -1, -1):

            if ALPHA != 0:
                globals()["grad_W" + str(l + 1)] = (
                    np.outer(globals()["grad_a" + str(l + 1)], H[str(l)])
                    + ALPHA * self.weights[str(l + 1)]
                )
            elif ALPHA == 0:
                globals()["grad_W" + str(l + 1)] = np.outer(
                    globals()["grad_a" + str(l + 1)], H[str(l)]
                )
            globals()["grad_b" + str(l + 1)] = globals()["grad_a" + str(l + 1)]
            gradients_weights.append(globals()["grad_W" + str(l + 1)])
            gradients_biases.append(globals()["grad_b" + str(l + 1)])
            if l != 0:
                globals()["grad_h" + str(l)] = np.matmul(
                    self.weights[str(l + 1)].transpose(),
                    globals()["grad_a" + str(l + 1)],
                )
                globals()["grad_a" + str(l)] = np.multiply(
                    globals()["grad_h" + str(l)], self.der_activation(A[str(l)])
                )
            elif l == 0:

                globals()["grad_h" + str(l)] = np.matmul(
                    self.weights[str(l + 1)].transpose(),
                    globals()["grad_a" + str(l + 1)],
                )
                globals()["grad_a" + str(l)] = np.multiply(
                    globals()["grad_h" + str(l)], (A[str(l)])
                )
        return gradients_weights, gradients_biases


    def predict(self,X,length_dataset):
        Y_pred = []        
        for i in range(length_dataset):

            Y, H, A = self.forwardPropagate(
                X[:, i].reshape(self.img_flattened_size, 1),
                self.weights,
                self.biases,
            )

            Y_pred.append(Y.reshape(self.num_classes,))
        Y_pred = np.array(Y_pred).transpose()
        return Y_pred

    def sgd(self, epochs, length_dataset, learning_rate, weight_decay=0):
        
        trainingloss = []
        trainingaccuracy = []
        validationaccuracy = []
        
        num_layers = len(self.layers)

        X_train = self.X_train[:, :length_dataset]
        Y_train = self.Y_train[:, :length_dataset]

        for epoch in range(epochs):
            start_time = time.time()
            # perm = np.random.permutation(N)
            idx = np.random.shuffle(np.arange(length_dataset))
            X_train = X_train[:, idx].reshape(self.img_flattened_size, length_dataset)
            Y_train = Y_train[:, idx].reshape(self.num_classes, length_dataset)

            CE = []
            #Y_pred = []
            deltaw = [
                np.zeros((self.layers[l + 1], self.layers[l]))
                for l in range(0, len(self.layers) - 1)
            ]
            deltab = [
                np.zeros((self.layers[l + 1], 1))
                for l in range(0, len(self.layers) - 1)
            ]

            for i in range(length_dataset):

                Y, H, A = self.forwardPropagate(
                    X_train[:, i].reshape(self.img_flattened_size, 1),
                    self.weights,
                    self.biases,
                )
                grad_weights, grad_biases = self.backPropagate(
                    Y, H, A, Y_train[:, i].reshape(self.num_classes, 1)
                )
                deltaw = [
                    grad_weights[num_layers - 2 - i] for i in range(num_layers - 1)
                ]
                deltab = [
                    grad_biases[num_layers - 2 - i] for i in range(num_layers - 1)
                ]

                #Y_pred.append(Y.reshape(self.num_classes,))

                CE.append(
                    self.crossEntropyLoss(
                        self.Y_train[:, i].reshape(self.num_classes, 1), Y
                    )
                    + self.L2RegularisationLoss(weight_decay)
                )

                # print(num_points_seen)
                self.weights = {
                    str(i + 1): (self.weights[str(i + 1)] - learning_rate * deltaw[i])
                    for i in range(len(self.weights))
                }
                self.biases = {
                    str(i + 1): (self.biases[str(i + 1)] - learning_rate * deltab[i])
                    for i in range(len(self.biases))
                }

            elapsed = time.time() - start_time
            #Y_pred = np.array(Y_pred).transpose()
            Y_pred = self.predict(self.X_train, self.N_train)
            trainingloss.append(np.mean(CE))
            trainingaccuracy.append(self.accuracy(Y_train, Y_pred, length_dataset)[0])
            validationaccuracy.append(self.accuracy(self.Y_val, self.predict(self.X_val, self.N_val), self.N_val)[0])
            
            print(
                        "Epoch: %d, Loss: %.3e, Training accuracy:%.2f, Validation Accuracy: %.2f, Time: %.2f, Learning Rate: %.3e"
                        % (
                            epoch,
                            trainingloss[epoch],
                            trainingaccuracy[epoch],
                            validationaccuracy[epoch],
                            elapsed,
                            self.learning_rate,
                        )
                    )

            wandb.log({'loss':np.mean(CE), 'trainingaccuracy':trainingaccuracy[epoch], 'validationaccuracy':validationaccuracy[epoch],'epoch':epoch, })
        # data = [[epoch, loss[epoch]] for epoch in range(epochs)]
        # table = wandb.Table(data=data, columns = ["Epoch", "Loss"])
        # wandb.log({'loss':wandb.plot.line(table, "Epoch", "Loss", title="Loss vs Epoch Line Plot")})
        return trainingloss, trainingaccuracy, validationaccuracy, Y_pred


      
    def sgdMiniBatch(self, epochs,length_dataset, batch_size, learning_rate, weight_decay = 0):

        X_train = self.X_train[:, :length_dataset]
        Y_train = self.Y_train[:, :length_dataset]        

        trainingloss = []
        trainingaccuracy = []
        validationaccuracy = []
        
        num_layers = len(self.layers)
        num_points_seen = 0


        for epoch in range(epochs):
            start_time = time.time()
            idx = np.random.shuffle(np.arange(length_dataset))
            X_train = X_train[:, idx].reshape(self.img_flattened_size, length_dataset)
            Y_train = Y_train[:, idx].reshape(self.num_classes, length_dataset)
            
            CE = []
            #Y_pred = []
            
            deltaw = [np.zeros((self.layers[l+1], self.layers[l])) for l in range(0, len(self.layers)-1)]
            deltab = [np.zeros((self.layers[l+1], 1)) for l in range(0, len(self.layers)-1)]

            for i in range(length_dataset):
                
                Y,H,A = self.forwardPropagate(X_train[:,i].reshape(self.img_flattened_size,1), self.weights, self.biases) 
                grad_weights, grad_biases = self.backPropagate(Y,H,A,Y_train[:,i].reshape(self.num_classes,1))
                
                deltaw = [grad_weights[num_layers-2 - i] + deltaw[i] for i in range(num_layers - 1)]
                deltab = [grad_biases[num_layers-2 - i] + deltab[i] for i in range(num_layers - 1)]
                
                #Y_pred.append(Y.reshape(self.num_classes,))
                CE.append(self.crossEntropyLoss(self.Y_train[:,i].reshape(self.num_classes,1), Y) + self.L2RegularisationLoss(weight_decay))
                
                num_points_seen +=1
                
                if int(num_points_seen) % batch_size == 0:
                    
                    
                    self.weights = {str(i+1):(self.weights[str(i+1)] - learning_rate*deltaw[i]/batch_size) for i in range(len(self.weights))} 
                    self.biases = {str(i+1):(self.biases[str(i+1)] - learning_rate*deltab[i]) for i in range(len(self.biases))}
                    
                    #resetting gradient updates
                    deltaw = [np.zeros((self.layers[l+1], self.layers[l])) for l in range(0, len(self.layers)-1)]
                    deltab = [np.zeros((self.layers[l+1], 1)) for l in range(0, len(self.layers)-1)]
            
            elapsed = time.time() - start_time
            #Y_pred = np.array(Y_pred).transpose()
            Y_pred = self.predict(self.X_train, self.N_train)
            trainingloss.append(np.mean(CE))
            trainingaccuracy.append(self.accuracy(Y_train, Y_pred, length_dataset)[0])
            validationaccuracy.append(self.accuracy(self.Y_val, self.predict(self.X_val, self.N_val), self.N_val)[0])

            print(
                        "Epoch: %d, Loss: %.3e, Training accuracy:%.2f, Validation Accuracy: %.2f, Time: %.2f, Learning Rate: %.3e"
                        % (
                            epoch,
                            trainingloss[epoch],
                            trainingaccuracy[epoch],
                            validationaccuracy[epoch],
                            elapsed,
                            self.learning_rate,
                        )
                    )
                    
            wandb.log({'loss':np.mean(CE), 'trainingaccuracy':trainingaccuracy[epoch], 'validationaccuracy':validationaccuracy[epoch],'epoch':epoch })
            
        return trainingloss, trainingaccuracy, validationaccuracy, Y_pred



    def mgd(self, epochs,length_dataset, batch_size, learning_rate, weight_decay = 0):
        GAMMA = 0.9

        X_train = self.X_train[:, :length_dataset]
        Y_train = self.Y_train[:, :length_dataset]        

        
        trainingloss = []
        trainingaccuracy = []
        validationaccuracy = []
        
        num_layers = len(self.layers)
        prev_v_w = [np.zeros((self.layers[l+1], self.layers[l])) for l in range(0, len(self.layers)-1)]
        prev_v_b = [np.zeros((self.layers[l+1], 1)) for l in range(0, len(self.layers)-1)]
        num_points_seen = 0
        for epoch in range(epochs):
            start_time = time.time()
            idx = np.random.shuffle(np.arange(length_dataset))
            X_train = X_train[:, idx].reshape(self.img_flattened_size, length_dataset)
            Y_train = Y_train[:, idx].reshape(self.num_classes, length_dataset)

            CE = []
            #Y_pred = []
            deltaw = [np.zeros((self.layers[l+1], self.layers[l])) for l in range(0, len(self.layers)-1)]
            deltab = [np.zeros((self.layers[l+1], 1)) for l in range(0, len(self.layers)-1)]
            

            for i in range(length_dataset):
                Y,H,A = self.forwardPropagate(self.X_train[:,i].reshape(self.img_flattened_size,1), self.weights, self.biases) 
                grad_weights, grad_biases = self.backPropagate(Y,H,A,self.Y_train[:,i].reshape(self.num_classes,1))
                
                deltaw = [grad_weights[num_layers-2 - i] + deltaw[i] for i in range(num_layers - 1)]
                deltab = [grad_biases[num_layers-2 - i] + deltab[i] for i in range(num_layers - 1)]

                #Y_pred.append(Y.reshape(self.num_classes,))
                CE.append(self.crossEntropyLoss(self.Y_train[:,i].reshape(self.num_classes,1), Y) + self.L2RegularisationLoss(weight_decay))
                
                num_points_seen +=1
                
                if int(num_points_seen) % batch_size == 0:

                    v_w = [GAMMA*prev_v_w[i] + learning_rate*deltaw[i]/batch_size for i in range(num_layers - 1)]
                    v_b = [GAMMA*prev_v_b[i] + learning_rate*deltab[i]/batch_size for i in range(num_layers - 1)]
                    
                    self.weights = {str(i+1) : (self.weights[str(i+1)] - v_w[i]) for i in range(len(self.weights))}
                    self.biases = {str(i+1): (self.biases[str(i+1)] - v_b[i]) for i in range(len(self.biases))}

                    prev_v_w = v_w
                    prev_v_b = v_b

                    #resetting gradient updates
                    deltaw = [np.zeros((self.layers[l+1], self.layers[l])) for l in range(0, len(self.layers)-1)]
                    deltab = [np.zeros((self.layers[l+1], 1)) for l in range(0, len(self.layers)-1)]

            elapsed = time.time() - start_time
            #Y_pred = np.array(Y_pred).transpose()
            Y_pred = self.predict(self.X_train, self.N_train)
            trainingloss.append(np.mean(CE))
            trainingaccuracy.append(self.accuracy(Y_train, Y_pred, length_dataset)[0])
            validationaccuracy.append(self.accuracy(self.Y_val, self.predict(self.X_val, self.N_val), self.N_val)[0])

            print(
                        "Epoch: %d, Loss: %.3e, Training accuracy:%.2f, Validation Accuracy: %.2f, Time: %.2f, Learning Rate: %.3e"
                        % (
                            epoch,
                            trainingloss[epoch],
                            trainingaccuracy[epoch],
                            validationaccuracy[epoch],
                            elapsed,
                            self.learning_rate,
                        )
                    )

            wandb.log({'loss':np.mean(CE), 'trainingaccuracy':trainingaccuracy[epoch], 'validationaccuracy':validationaccuracy[epoch],'epoch':epoch })


        return trainingloss, trainingaccuracy, validationaccuracy, Y_pred


 
 
    def stochasticNag(self,epochs,length_dataset, learning_rate, weight_decay = 0):
        GAMMA = 0.9

        X_train = self.X_train[:, :length_dataset]
        Y_train = self.Y_train[:, :length_dataset]        

        trainingloss = []
        trainingaccuracy = []
        validationaccuracy = []
        
        num_layers = len(self.layers)
        
        prev_v_w = [np.zeros((self.layers[l+1], self.layers[l])) for l in range(0, len(self.layers)-1)]
        prev_v_b = [np.zeros((self.layers[l+1], 1)) for l in range(0, len(self.layers)-1)]
        
        for epoch in range(epochs):
            start_time = time.time()
            idx = np.random.shuffle(np.arange(length_dataset))
            X_train = X_train[:, idx].reshape(self.img_flattened_size, length_dataset)
            Y_train = Y_train[:, idx].reshape(self.num_classes, length_dataset)

            CE = []
            #Y_pred = []  
            
            deltaw = [np.zeros((self.layers[l+1], self.layers[l])) for l in range(0, len(self.layers)-1)]
            deltab = [np.zeros((self.layers[l+1], 1)) for l in range(0, len(self.layers)-1)]
            
            v_w = [GAMMA*prev_v_w[i] for i in range(0, len(self.layers)-1)]  
            v_b = [GAMMA*prev_v_b[i] for i in range(0, len(self.layers)-1)]
                        
            for i in range(length_dataset):
                winter = {str(i+1) : self.weights[str(i+1)] - v_w[i] for i in range(0, len(self.layers)-1)}
                binter = {str(i+1) : self.biases[str(i+1)] - v_b[i] for i in range(0, len(self.layers)-1)}
                
                Y,H,A = self.forwardPropagate(self.X_train[:,i].reshape(self.img_flattened_size,1), winter, binter) 
                grad_weights, grad_biases = self.backPropagate(Y,H,A,self.Y_train[:,i].reshape(self.num_classes,1))
                
                deltaw = [grad_weights[num_layers-2 - i] for i in range(num_layers - 1)]
                deltab = [grad_biases[num_layers-2 - i] for i in range(num_layers - 1)]

                #Y_pred.append(Y.reshape(self.num_classes,))
                CE.append(self.crossEntropyLoss(self.Y_train[:,i].reshape(self.num_classes,1), Y) + self.L2RegularisationLoss(weight_decay))
                            
                v_w = [GAMMA*prev_v_w[i] + learning_rate*deltaw[i] for i in range(num_layers - 1)]
                v_b = [GAMMA*prev_v_b[i] + learning_rate*deltab[i] for i in range(num_layers - 1)]
        
                self.weights = {str(i+1):self.weights[str(i+1)] - v_w[i] for i in range(len(self.weights))} 
                self.biases = {str(i+1):self.biases[str(i+1)] - v_b[i] for i in range(len(self.biases))}
                
                prev_v_w = v_w
                prev_v_b = v_b
    
            
            elapsed = time.time() - start_time
            #Y_pred = np.array(Y_pred).transpose()
            Y_pred = self.predict(self.X_train, self.N_train)
            trainingloss.append(np.mean(CE))
            trainingaccuracy.append(self.accuracy(Y_train, Y_pred, length_dataset)[0])
            validationaccuracy.append(self.accuracy(self.Y_val, self.predict(self.X_val, self.N_val), self.N_val)[0])

            print(
                        "Epoch: %d, Loss: %.3e, Training accuracy:%.2f, Validation Accuracy: %.2f, Time: %.2f, Learning Rate: %.3e"
                        % (
                            epoch,
                            trainingloss[epoch],
                            trainingaccuracy[epoch],
                            validationaccuracy[epoch],
                            elapsed,
                            self.learning_rate,
                        )
                    )
                    
            wandb.log({'loss':np.mean(CE), 'trainingaccuracy':trainingaccuracy[epoch], 'validationaccuracy':validationaccuracy[epoch],'epoch':epoch })
        
        return trainingloss, trainingaccuracy, validationaccuracy, Y_pred
    

    def nag(self,epochs,length_dataset, batch_size,learning_rate, weight_decay = 0):
        GAMMA = 0.9

        X_train = self.X_train[:, :length_dataset]
        Y_train = self.Y_train[:, :length_dataset]        


        trainingloss = []
        trainingaccuracy = []
        validationaccuracy = []
        
        num_layers = len(self.layers)
        
        prev_v_w = [np.zeros((self.layers[l+1], self.layers[l])) for l in range(0, len(self.layers)-1)]
        prev_v_b = [np.zeros((self.layers[l+1], 1)) for l in range(0, len(self.layers)-1)]
        
        num_points_seen = 0
        for epoch in range(epochs):
            start_time = time.time()
            idx = np.random.shuffle(np.arange(length_dataset))
            X_train = X_train[:, idx].reshape(self.img_flattened_size, length_dataset)
            Y_train = Y_train[:, idx].reshape(self.num_classes, length_dataset)

            CE = []
            #Y_pred = []  
            
            deltaw = [np.zeros((self.layers[l+1], self.layers[l])) for l in range(0, len(self.layers)-1)]
            deltab = [np.zeros((self.layers[l+1], 1)) for l in range(0, len(self.layers)-1)]
            
            v_w = [GAMMA*prev_v_w[i] for i in range(0, len(self.layers)-1)]  
            v_b = [GAMMA*prev_v_b[i] for i in range(0, len(self.layers)-1)]

            for i in range(length_dataset):
                winter = {str(i+1) : self.weights[str(i+1)] - v_w[i] for i in range(0, len(self.layers)-1)}
                binter = {str(i+1) : self.biases[str(i+1)] - v_b[i] for i in range(0, len(self.layers)-1)}
                
                Y,H,A = self.forwardPropagate(self.X_train[:,i].reshape(self.img_flattened_size,1), winter, binter) 
                grad_weights, grad_biases = self.backPropagate(Y,H,A,self.Y_train[:,i].reshape(self.num_classes,1))
                
                deltaw = [grad_weights[num_layers-2 - i] + deltaw[i] for i in range(num_layers - 1)]
                deltab = [grad_biases[num_layers-2 - i] + deltab[i] for i in range(num_layers - 1)]

                #Y_pred.append(Y.reshape(self.num_classes,))
                CE.append(self.crossEntropyLoss(self.Y_train[:,i].reshape(self.num_classes,1), Y) + self.L2RegularisationLoss(weight_decay))

                num_points_seen +=1
                
                if int(num_points_seen) % batch_size == 0:                            

                    v_w = [GAMMA*prev_v_w[i] + learning_rate*deltaw[i]/batch_size for i in range(num_layers - 1)]
                    v_b = [GAMMA*prev_v_b[i] + learning_rate*deltab[i]/batch_size for i in range(num_layers - 1)]
        
                    self.weights ={str(i+1):self.weights[str(i+1)]  - v_w[i] for i in range(len(self.weights))}
                    self.biases = {str(i+1):self.biases[str(i+1)]  - v_b[i] for i in range(len(self.biases))}
                
                    prev_v_w = v_w
                    prev_v_b = v_b

                    deltaw = [np.zeros((self.layers[l+1], self.layers[l])) for l in range(0, len(self.layers)-1)]
                    deltab = [np.zeros((self.layers[l+1], 1)) for l in range(0, len(self.layers)-1)]

    
            
            elapsed = time.time() - start_time
            #Y_pred = np.array(Y_pred).transpose()
            Y_pred = self.predict(self.X_train, self.N_train)
            trainingloss.append(np.mean(CE))
            trainingaccuracy.append(self.accuracy(Y_train, Y_pred, length_dataset)[0])
            validationaccuracy.append(self.accuracy(self.Y_val, self.predict(self.X_val, self.N_val), self.N_val)[0])

            print(
                        "Epoch: %d, Loss: %.3e, Training accuracy:%.2f, Validation Accuracy: %.2f, Time: %.2f, Learning Rate: %.3e"
                        % (
                            epoch,
                            trainingloss[epoch],
                            trainingaccuracy[epoch],
                            validationaccuracy[epoch],
                            elapsed,
                            self.learning_rate,
                        )
                    )

            wandb.log({'loss':np.mean(CE), 'trainingaccuracy':trainingaccuracy[epoch], 'validationaccuracy':validationaccuracy[epoch],'epoch':epoch })
        
        return trainingloss, trainingaccuracy, validationaccuracy, Y_pred
    

    
    def rmsProp(self, epochs,length_dataset, batch_size, learning_rate, weight_decay = 0):


        X_train = self.X_train[:, :length_dataset]
        Y_train = self.Y_train[:, :length_dataset]        

        
        trainingloss = []
        trainingaccuracy = []
        validationaccuracy = []
        
        num_layers = len(self.layers)
        EPS, BETA = 1e-8, 0.9
        
        v_w = [np.zeros((self.layers[l+1], self.layers[l])) for l in range(0, len(self.layers)-1)]
        v_b = [np.zeros((self.layers[l+1], 1)) for l in range(0, len(self.layers)-1)]
        
        num_points_seen = 0        
        for epoch in range(epochs):
            start_time = time.time()
            idx = np.random.shuffle(np.arange(length_dataset))
            X_train = X_train[:, idx].reshape(self.img_flattened_size, length_dataset)
            Y_train = Y_train[:, idx].reshape(self.num_classes, length_dataset)


            CE = []
            #Y_pred = []
                        
            deltaw = [np.zeros((self.layers[l+1], self.layers[l])) for l in range(0, len(self.layers)-1)]
            deltab = [np.zeros((self.layers[l+1], 1)) for l in range(0, len(self.layers)-1)]

            for i in range(length_dataset):
            
                Y,H,A = self.forwardPropagate(self.X_train[:,i].reshape(self.img_flattened_size,1), self.weights, self.biases) 
                grad_weights, grad_biases = self.backPropagate(Y,H,A,self.Y_train[:,i].reshape(self.num_classes,1))
            
                deltaw = [grad_weights[num_layers-2 - i] + deltaw[i] for i in range(num_layers - 1)]
                deltab = [grad_biases[num_layers-2 - i] + deltab[i] for i in range(num_layers - 1)]
                
                #Y_pred.append(Y.reshape(self.num_classes,))
                CE.append(self.crossEntropyLoss(self.Y_train[:,i].reshape(self.num_classes,1), Y) + self.L2RegularisationLoss(weight_decay))            
                num_points_seen +=1
                
                if int(num_points_seen) % batch_size == 0:
                
                    v_w = [BETA*v_w[i] + (1-BETA)*(deltaw[i])**2 for i in range(num_layers - 1)]
                    v_b = [BETA*v_b[i] + (1-BETA)*(deltab[i])**2 for i in range(num_layers - 1)]

                    self.weights = {str(i+1):self.weights[str(i+1)]  - deltaw[i]*(learning_rate/np.sqrt(v_w[i]+EPS)) for i in range(len(self.weights))} 
                    self.biases = {str(i+1):self.biases[str(i+1)]  - deltab[i]*(learning_rate/np.sqrt(v_b[i]+EPS)) for i in range(len(self.biases))}

                    deltaw = [np.zeros((self.layers[l+1], self.layers[l])) for l in range(0, len(self.layers)-1)]
                    deltab = [np.zeros((self.layers[l+1], 1)) for l in range(0, len(self.layers)-1)]
    
            
            elapsed = time.time() - start_time
            #Y_pred = np.array(Y_pred).transpose()
            Y_pred = self.predict(self.X_train, self.N_train)
            trainingloss.append(np.mean(CE))
            trainingaccuracy.append(self.accuracy(Y_train, Y_pred, length_dataset)[0])
            validationaccuracy.append(self.accuracy(self.Y_val, self.predict(self.X_val, self.N_val), self.N_val)[0])

            print(
                        "Epoch: %d, Loss: %.3e, Training accuracy:%.2f, Validation Accuracy: %.2f, Time: %.2f, Learning Rate: %.3e"
                        % (
                            epoch,
                            trainingloss[epoch],
                            trainingaccuracy[epoch],
                            validationaccuracy[epoch],
                            elapsed,
                            self.learning_rate,
                        )
                    )
                    
            wandb.log({'loss':np.mean(CE), 'trainingaccuracy':trainingaccuracy[epoch], 'validationaccuracy':validationaccuracy[epoch],'epoch':epoch })
        
        return trainingloss, trainingaccuracy, validationaccuracy, Y_pred  



    def adam(self, epochs,length_dataset, batch_size, learning_rate, weight_decay = 0):
        
        X_train = self.X_train[:, :length_dataset]
        Y_train = self.Y_train[:, :length_dataset]        

        trainingloss = []
        trainingaccuracy = []
        validationaccuracy = []
        num_layers = len(self.layers)
        EPS, BETA1, BETA2 = 1e-8, 0.9, 0.99
        
        m_w = [np.zeros((self.layers[l+1], self.layers[l])) for l in range(0, len(self.layers)-1)]
        m_b = [np.zeros((self.layers[l+1], 1)) for l in range(0, len(self.layers)-1)]
        
        v_w = [np.zeros((self.layers[l+1], self.layers[l])) for l in range(0, len(self.layers)-1)]
        v_b = [np.zeros((self.layers[l+1], 1)) for l in range(0, len(self.layers)-1)]        
        
        m_w_hat = [np.zeros((self.layers[l+1], self.layers[l])) for l in range(0, len(self.layers)-1)]
        m_b_hat = [np.zeros((self.layers[l+1], 1)) for l in range(0, len(self.layers)-1)]
        
        v_w_hat = [np.zeros((self.layers[l+1], self.layers[l])) for l in range(0, len(self.layers)-1)]
        v_b_hat = [np.zeros((self.layers[l+1], 1)) for l in range(0, len(self.layers)-1)]   
        
        num_points_seen = 0 
        for epoch in range(epochs):
            start_time = time.time()
            idx = np.random.shuffle(np.arange(length_dataset))
            X_train = X_train[:, idx].reshape(self.img_flattened_size, length_dataset)
            Y_train = Y_train[:, idx].reshape(self.num_classes, length_dataset)


            CE = []
            #Y_pred = []
            
            deltaw = [np.zeros((self.layers[l+1], self.layers[l])) for l in range(0, len(self.layers)-1)]
            deltab = [np.zeros((self.layers[l+1], 1)) for l in range(0, len(self.layers)-1)]
            
           
            for i in range(length_dataset):
                Y,H,A = self.forwardPropagate(self.X_train[:,i].reshape(self.img_flattened_size,1), self.weights, self.biases) 
                grad_weights, grad_biases = self.backPropagate(Y,H,A,self.Y_train[:,i].reshape(self.num_classes,1))
                
                deltaw = [grad_weights[num_layers-2 - i] + deltaw[i] for i in range(num_layers - 1)]
                deltab = [grad_biases[num_layers-2 - i] + deltab[i] for i in range(num_layers - 1)]

                #Y_pred.append(Y.reshape(self.num_classes,))
                CE.append(self.crossEntropyLoss(self.Y_train[:,i].reshape(self.num_classes,1), Y) + self.L2RegularisationLoss(weight_decay))                 

                num_points_seen += 1
                ctr = 0
                if int(num_points_seen) % batch_size == 0:
                    ctr += 1
                
                    m_w = [BETA1*m_w[i] + (1-BETA1)*deltaw[i] for i in range(num_layers - 1)]
                    m_b = [BETA1*m_b[i] + (1-BETA1)*deltab[i] for i in range(num_layers - 1)]
                
                    v_w = [BETA2*v_w[i] + (1-BETA2)*(deltaw[i])**2 for i in range(num_layers - 1)]
                    v_b = [BETA2*v_b[i] + (1-BETA2)*(deltab[i])**2 for i in range(num_layers - 1)]
                    
                    m_w_hat = [m_w[i]/(1-BETA1**(epoch+1)) for i in range(num_layers - 1)]
                    m_b_hat = [m_b[i]/(1-BETA1**(epoch+1)) for i in range(num_layers - 1)]            
                
                    v_w_hat = [v_w[i]/(1-BETA2**(epoch+1)) for i in range(num_layers - 1)]
                    v_b_hat = [v_b[i]/(1-BETA2**(epoch+1)) for i in range(num_layers - 1)]
                
                    self.weights = {str(i+1):self.weights[str(i+1)] - (learning_rate/np.sqrt(v_w[i]+EPS))*m_w_hat[i] for i in range(len(self.weights))} 
                    self.biases = {str(i+1):self.biases[str(i+1)] - (learning_rate/np.sqrt(v_b[i]+EPS))*m_b_hat[i] for i in range(len(self.biases))}

                    deltaw = [np.zeros((self.layers[l+1], self.layers[l])) for l in range(0, len(self.layers)-1)]
                    deltab = [np.zeros((self.layers[l+1], 1)) for l in range(0, len(self.layers)-1)]


            elapsed = time.time() - start_time
            #Y_pred = np.array(Y_pred).transpose()
            Y_pred = self.predict(self.X_train, self.N_train)
            trainingloss.append(np.mean(CE))
            trainingaccuracy.append(self.accuracy(Y_train, Y_pred, length_dataset)[0])
            validationaccuracy.append(self.accuracy(self.Y_val, self.predict(self.X_val, self.N_val), self.N_val)[0])

            print(
                        "Epoch: %d, Loss: %.3e, Training accuracy:%.2f, Validation Accuracy: %.2f, Time: %.2f, Learning Rate: %.3e"
                        % (
                            epoch,
                            trainingloss[epoch],
                            trainingaccuracy[epoch],
                            validationaccuracy[epoch],
                            elapsed,
                            self.learning_rate,
                        )
                    )
                    
            wandb.log({'loss':np.mean(CE), 'trainingaccuracy':trainingaccuracy[epoch], 'validationaccuracy':validationaccuracy[epoch],'epoch':epoch })
        
        return trainingloss, trainingaccuracy, validationaccuracy, Y_pred


    
    def nadam(self, epochs,length_dataset, batch_size, learning_rate, weight_decay = 0):

        X_train = self.X_train[:, :length_dataset]
        Y_train = self.Y_train[:, :length_dataset]        

        
        trainingloss = []
        trainingaccuracy = []
        validationaccuracy = []
        num_layers = len(self.layers)
        
        GAMMA, EPS, BETA1, BETA2 = 0.9, 1e-8, 0.9, 0.99

        m_w = [np.zeros((self.layers[l+1], self.layers[l])) for l in range(0, len(self.layers)-1)]
        m_b = [np.zeros((self.layers[l+1], 1)) for l in range(0, len(self.layers)-1)]
        
        v_w = [np.zeros((self.layers[l+1], self.layers[l])) for l in range(0, len(self.layers)-1)]
        v_b = [np.zeros((self.layers[l+1], 1)) for l in range(0, len(self.layers)-1)]        

        m_w_hat = [np.zeros((self.layers[l+1], self.layers[l])) for l in range(0, len(self.layers)-1)]
        m_b_hat = [np.zeros((self.layers[l+1], 1)) for l in range(0, len(self.layers)-1)]
        
        v_w_hat = [np.zeros((self.layers[l+1], self.layers[l])) for l in range(0, len(self.layers)-1)]
        v_b_hat = [np.zeros((self.layers[l+1], 1)) for l in range(0, len(self.layers)-1)] 

        num_points_seen = 0 
        
        
        for epoch in range(epochs):
            start_time = time.time()
            idx = np.random.shuffle(np.arange(length_dataset))
            X_train = X_train[:, idx].reshape(self.img_flattened_size, length_dataset)
            Y_train = Y_train[:, idx].reshape(self.num_classes, length_dataset)

            CE = []
            #Y_pred = []

            deltaw = [np.zeros((self.layers[l+1], self.layers[l])) for l in range(0, len(self.layers)-1)]
            deltab = [np.zeros((self.layers[l+1], 1)) for l in range(0, len(self.layers)-1)]

            for i in range(length_dataset):

                Y,H,A = self.forwardPropagate(self.X_train[:,i].reshape(self.img_flattened_size,1), self.weights, self.biases) 
                grad_weights, grad_biases = self.backPropagate(Y,H,A,self.Y_train[:,i].reshape(self.num_classes,1))

                deltaw = [grad_weights[num_layers-2 - i] + deltaw[i] for i in range(num_layers - 1)]
                deltab = [grad_biases[num_layers-2 - i] + deltab[i] for i in range(num_layers - 1)]

                #Y_pred.append(Y.reshape(self.num_classes,))
                CE.append(self.crossEntropyLoss(self.Y_train[:,i].reshape(self.num_classes,1), Y) + self.L2RegularisationLoss(weight_decay))   
                num_points_seen += 1
                
                if num_points_seen % batch_size == 0:
                    
                    m_w = [BETA1*m_w[i] + (1-BETA1)*deltaw[i] for i in range(num_layers - 1)]
                    m_b = [BETA1*m_b[i] + (1-BETA1)*deltab[i] for i in range(num_layers - 1)]
                    
                    v_w = [BETA2*v_w[i] + (1-BETA2)*(deltaw[i])**2 for i in range(num_layers - 1)]
                    v_b = [BETA2*v_b[i] + (1-BETA2)*(deltab[i])**2 for i in range(num_layers - 1)]
                    
                    m_w_hat = [m_w[i]/(1-BETA1**(epoch+1)) for i in range(num_layers - 1)]
                    m_b_hat = [m_b[i]/(1-BETA1**(epoch+1)) for i in range(num_layers - 1)]            
                    
                    v_w_hat = [v_w[i]/(1-BETA2**(epoch+1)) for i in range(num_layers - 1)]
                    v_b_hat = [v_b[i]/(1-BETA2**(epoch+1)) for i in range(num_layers - 1)]
                    
                    self.weights = {str(i+1):self.weights[str(i+1)] - (learning_rate/(np.sqrt(v_w_hat[i])+EPS))*(BETA1*m_w_hat[i]+ (1-BETA1)*deltaw[i]) for i in range(len(self.weights))} 
                    self.biases = {str(i+1):self.biases[str(i+1)] - (learning_rate/(np.sqrt(v_b_hat[i])+EPS))*(BETA1*m_b_hat[i] + (1-BETA1)*deltab[i]) for i in range(len(self.biases))}

                    deltaw = [np.zeros((self.layers[l+1], self.layers[l])) for l in range(0, len(self.layers)-1)]
                    deltab = [np.zeros((self.layers[l+1], 1)) for l in range(0, len(self.layers)-1)]
             
            elapsed = time.time() - start_time

            #Y_pred = np.array(Y_pred).transpose()
            Y_pred = self.predict(self.X_train, self.N_train)
            trainingloss.append(np.mean(CE))
            trainingaccuracy.append(self.accuracy(Y_train, Y_pred, length_dataset)[0])
            validationaccuracy.append(self.accuracy(self.Y_val, self.predict(self.X_val, self.N_val), self.N_val)[0])

            print(
                        "Epoch: %d, Loss: %.3e, Training accuracy:%.2f, Validation Accuracy: %.2f, Time: %.2f, Learning Rate: %.3e"
                        % (
                            epoch,
                            trainingloss[epoch],
                            trainingaccuracy[epoch],
                            validationaccuracy[epoch],
                            elapsed,
                            self.learning_rate,
                        )
                    )
            wandb.log({'loss':np.mean(CE), 'trainingaccuracy':trainingaccuracy[epoch], 'validationaccuracy':validationaccuracy[epoch],'epoch':epoch })
            
        return trainingloss, trainingaccuracy, validationaccuracy, Y_pred  


In [10]:
import wandb

import numpy as np
import matplotlib.pyplot as plt
from keras.datasets import fashion_mnist


(trainIn, trainOut), (testIn, testOut) = fashion_mnist.load_data()

N_train_full = trainOut.shape[0]
N_train = int(0.9*N_train_full)
N_validation = int(0.1 * trainOut.shape[0])
N_test = testOut.shape[0]


idx  = np.random.choice(trainOut.shape[0], N_train_full, replace=False)
idx2 = np.random.choice(testOut.shape[0], N_test, replace=False)

trainInFull = trainIn[idx, :]
trainOutFull = trainOut[idx]

trainIn = trainInFull[:N_train,:]
trainOut = trainOutFull[:N_train]

validIn = trainInFull[N_train:, :]
validOut = trainOutFull[N_train:]    

testIn = testIn[idx2, :]
testOut = testOut[idx2]


sweep_config = {
  "name": "Bayesian Sweep",
  "method": "bayes",
  "metric":{
  "name": "validationaccuracy",
  "goal": "maximize"
  },
  "parameters": {
        "max_epochs": {
            "values": [5, 10]
        },

        "initializer": {
            "values": ["RANDOM", "XAVIER", "HE"]
        },

        "num_layers": {
            "values": [2, 3, 4]
        },
        
        
        "num_hidden_neurons": {
            "values": [32, 64, 128]
        },
        
        "activation": {
            "values": [ 'TANH',  'SIGMOID', 'RELU']
        },
        
        "learning_rate": {
            "values": [0.001, 0.0001]
        },
        
        
        "weight_decay": {
            "values": [0, 0.0005,0.5]
        },
        
        "optimizer": {
            "values": ["SGD", "MGD", "NAG", "RMSPROP", "ADAM","NADAM"]
        },
                    
        "batch_size": {
            "values": [16, 32, 64]
        }
        
        
    }
}

sweep_id = wandb.sweep(sweep_config,project='CS6910-DeepLearningFundamentals-Assignment1', entity='rahulsundar')



Create sweep with ID: vny2mdtz
Sweep URL: https://wandb.ai/rahulsundar/CS6910-DeepLearningFundamentals-Assignment1/sweeps/vny2mdtz


In [11]:
def train():    
    config_defaults = dict(
            max_epochs=5,
            num_hidden_layers=3,
            num_hidden_neurons=32,
            weight_decay=0,
            learning_rate=1e-3,
            optimizer="MGD",
            batch_size=16,
            activation="TANH",
            initializer="XAVIER",
            loss="CROSS",
        )
        
    wandb.init(config = config_defaults)
    #wandb.init(project='CS6910-DeepLearningFundamentals-Assignment1', entity='rahulsundar',config = config_defaults)


    wandb.run.name = "hl_" + str(wandb.config.num_hidden_layers) + "_hn_" + str(wandb.config.num_hidden_neurons) + "_opt_" + wandb.config.optimizer + "_act_" + wandb.config.activation + "_lr_" + str(wandb.config.learning_rate) + "_bs_"+str(wandb.config.batch_size) + "_init_" + wandb.config.initializer + "_ep_"+ str(wandb.config.max_epochs)+ "_l2_" + str(wandb.config.weight_decay) 
    CONFIG = wandb.config


    
    #sweep_id = wandb.sweep(sweep_config)
  

    FFNN = FeedForwardNeuralNetwork(
        num_hidden_layers=CONFIG.num_hidden_layers,
        num_hidden_neurons=CONFIG.num_hidden_neurons,
        X_train_raw=trainIn,
        Y_train_raw=trainOut,
        N_train = N_train,
        X_val_raw = validIn,
        Y_val_raw = validOut,
        N_val = N_validation,
        X_test_raw = testIn,
        Y_test_raw = testOut,
        N_test = N_test,
        optimizer = CONFIG.optimizer,
        batch_size = CONFIG.batch_size,
        weight_decay = CONFIG.weight_decay,
        learning_rate = CONFIG.learning_rate,
        max_epochs = CONFIG.max_epochs,
        activation = CONFIG.activation,
        initializer = CONFIG.initializer,
        loss = CONFIG.loss
        )



    training_loss, trainingaccuracy, validationaccuracy, Y_pred_train = FFNN.optimizer(FFNN.max_epochs, FFNN.N_train, FFNN.batch_size, FFNN.learning_rate)
 

In [None]:
wandb.agent(sweep_id, train, count = 100)

[34m[1mwandb[0m: Agent Starting Run: coxe11jq with config:
[34m[1mwandb[0m: 	activation: TANH
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	initializer: XAVIER
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	max_epochs: 10
[34m[1mwandb[0m: 	num_hidden_neurons: 128
[34m[1mwandb[0m: 	num_layers: 2
[34m[1mwandb[0m: 	optimizer: NADAM
[34m[1mwandb[0m: 	weight_decay: 0


Epoch: 0, Loss: 4.748e-02, Training accuracy:0.86, Validation Accuracy: 0.85, Time: 125.35, Learning Rate: 1.000e-03
Epoch: 1, Loss: 3.568e-02, Training accuracy:0.89, Validation Accuracy: 0.87, Time: 124.21, Learning Rate: 1.000e-03
Epoch: 2, Loss: 3.140e-02, Training accuracy:0.90, Validation Accuracy: 0.88, Time: 125.99, Learning Rate: 1.000e-03
Epoch: 3, Loss: 2.854e-02, Training accuracy:0.90, Validation Accuracy: 0.88, Time: 126.38, Learning Rate: 1.000e-03
Epoch: 4, Loss: 2.642e-02, Training accuracy:0.90, Validation Accuracy: 0.88, Time: 127.55, Learning Rate: 1.000e-03
Epoch: 5, Loss: 2.470e-02, Training accuracy:0.91, Validation Accuracy: 0.88, Time: 128.32, Learning Rate: 1.000e-03
Epoch: 6, Loss: 2.321e-02, Training accuracy:0.91, Validation Accuracy: 0.88, Time: 122.75, Learning Rate: 1.000e-03
Epoch: 7, Loss: 2.182e-02, Training accuracy:0.91, Validation Accuracy: 0.88, Time: 127.48, Learning Rate: 1.000e-03
Epoch: 8, Loss: 2.063e-02, Training accuracy:0.91, Validation Ac

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
loss,0.0194
trainingaccuracy,0.91854
validationaccuracy,0.88067
epoch,9.0
_runtime,1365.0
_timestamp,1615724254.0
_step,9.0


0,1
loss,█▅▄▃▃▂▂▂▁▁
trainingaccuracy,▁▄▅▆▆▇▇▇██
validationaccuracy,▁▅▇██▇▇▇▇█
epoch,▁▂▃▃▄▅▆▆▇█
_runtime,▁▂▃▃▄▅▆▆▇█
_timestamp,▁▂▃▃▄▅▆▆▇█
_step,▁▂▃▃▄▅▆▆▇█


[34m[1mwandb[0m: Agent Starting Run: 5o8ovqdz with config:
[34m[1mwandb[0m: 	activation: TANH
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	initializer: HE
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	max_epochs: 10
[34m[1mwandb[0m: 	num_hidden_neurons: 128
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: NAG
[34m[1mwandb[0m: 	weight_decay: 0


Epoch: 0, Loss: 7.377e-02, Training accuracy:0.82, Validation Accuracy: 0.81, Time: 137.08, Learning Rate: 1.000e-03
Epoch: 1, Loss: 4.851e-02, Training accuracy:0.84, Validation Accuracy: 0.84, Time: 135.80, Learning Rate: 1.000e-03
Epoch: 2, Loss: 4.411e-02, Training accuracy:0.85, Validation Accuracy: 0.85, Time: 139.80, Learning Rate: 1.000e-03
Epoch: 3, Loss: 4.168e-02, Training accuracy:0.86, Validation Accuracy: 0.85, Time: 137.29, Learning Rate: 1.000e-03
Epoch: 4, Loss: 3.996e-02, Training accuracy:0.86, Validation Accuracy: 0.85, Time: 139.54, Learning Rate: 1.000e-03
Epoch: 5, Loss: 3.862e-02, Training accuracy:0.87, Validation Accuracy: 0.86, Time: 138.22, Learning Rate: 1.000e-03
Epoch: 6, Loss: 3.750e-02, Training accuracy:0.87, Validation Accuracy: 0.86, Time: 138.50, Learning Rate: 1.000e-03
Epoch: 7, Loss: 3.656e-02, Training accuracy:0.87, Validation Accuracy: 0.86, Time: 139.45, Learning Rate: 1.000e-03
Epoch: 8, Loss: 3.572e-02, Training accuracy:0.88, Validation Ac

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
loss,0.03497
trainingaccuracy,0.87952
validationaccuracy,0.86367
epoch,9.0
_runtime,1488.0
_timestamp,1615725748.0
_step,9.0


0,1
loss,█▃▃▂▂▂▁▁▁▁
trainingaccuracy,▁▃▅▆▆▇▇▇██
validationaccuracy,▁▄▅▆▇▇▇███
epoch,▁▂▃▃▄▅▆▆▇█
_runtime,▁▂▃▃▄▅▆▆▇█
_timestamp,▁▂▃▃▄▅▆▆▇█
_step,▁▂▃▃▄▅▆▆▇█


[34m[1mwandb[0m: Agent Starting Run: f0vu3efx with config:
[34m[1mwandb[0m: 	activation: SIGMOID
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	initializer: HE
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	max_epochs: 10
[34m[1mwandb[0m: 	num_hidden_neurons: 64
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: RMSPROP
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch: 0, Loss: 9.084e-02, Training accuracy:0.82, Validation Accuracy: 0.82, Time: 68.64, Learning Rate: 1.000e-03
Epoch: 1, Loss: 4.645e-02, Training accuracy:0.86, Validation Accuracy: 0.85, Time: 70.94, Learning Rate: 1.000e-03
Epoch: 2, Loss: 4.050e-02, Training accuracy:0.86, Validation Accuracy: 0.85, Time: 70.18, Learning Rate: 1.000e-03
Epoch: 3, Loss: 3.766e-02, Training accuracy:0.87, Validation Accuracy: 0.86, Time: 70.14, Learning Rate: 1.000e-03
Epoch: 4, Loss: 3.567e-02, Training accuracy:0.88, Validation Accuracy: 0.86, Time: 68.92, Learning Rate: 1.000e-03
Epoch: 5, Loss: 3.415e-02, Training accuracy:0.88, Validation Accuracy: 0.87, Time: 70.88, Learning Rate: 1.000e-03
Epoch: 6, Loss: 3.284e-02, Training accuracy:0.88, Validation Accuracy: 0.86, Time: 69.76, Learning Rate: 1.000e-03
Epoch: 7, Loss: 3.183e-02, Training accuracy:0.89, Validation Accuracy: 0.87, Time: 68.37, Learning Rate: 1.000e-03
Epoch: 8, Loss: 3.088e-02, Training accuracy:0.89, Validation Accuracy: 

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
loss,0.03007
trainingaccuracy,0.89515
validationaccuracy,0.8715
epoch,9.0
_runtime,774.0
_timestamp,1615726530.0
_step,9.0


0,1
loss,█▃▂▂▂▁▁▁▁▁
trainingaccuracy,▁▄▅▆▇▇▇▇██
validationaccuracy,▁▅▅▆▇▇▆▇██
epoch,▁▂▃▃▄▅▆▆▇█
_runtime,▁▂▃▃▄▅▆▆▇█
_timestamp,▁▂▃▃▄▅▆▆▇█
_step,▁▂▃▃▄▅▆▆▇█


[34m[1mwandb[0m: Agent Starting Run: xca8nxbz with config:
[34m[1mwandb[0m: 	activation: SIGMOID
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	initializer: XAVIER
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	max_epochs: 10
[34m[1mwandb[0m: 	num_hidden_neurons: 128
[34m[1mwandb[0m: 	num_layers: 2
[34m[1mwandb[0m: 	optimizer: NADAM
[34m[1mwandb[0m: 	weight_decay: 0


Epoch: 0, Loss: 6.490e-02, Training accuracy:0.85, Validation Accuracy: 0.84, Time: 120.62, Learning Rate: 1.000e-03
Epoch: 1, Loss: 4.013e-02, Training accuracy:0.87, Validation Accuracy: 0.86, Time: 111.46, Learning Rate: 1.000e-03
Epoch: 2, Loss: 3.609e-02, Training accuracy:0.88, Validation Accuracy: 0.87, Time: 112.68, Learning Rate: 1.000e-03
Epoch: 3, Loss: 3.378e-02, Training accuracy:0.89, Validation Accuracy: 0.87, Time: 114.68, Learning Rate: 1.000e-03
Epoch: 4, Loss: 3.211e-02, Training accuracy:0.89, Validation Accuracy: 0.88, Time: 118.13, Learning Rate: 1.000e-03
Epoch: 5, Loss: 3.079e-02, Training accuracy:0.89, Validation Accuracy: 0.88, Time: 118.46, Learning Rate: 1.000e-03
Epoch: 6, Loss: 2.967e-02, Training accuracy:0.90, Validation Accuracy: 0.88, Time: 118.97, Learning Rate: 1.000e-03
Epoch: 7, Loss: 2.870e-02, Training accuracy:0.90, Validation Accuracy: 0.88, Time: 119.69, Learning Rate: 1.000e-03
Epoch: 8, Loss: 2.783e-02, Training accuracy:0.90, Validation Ac

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
loss,0.02704
trainingaccuracy,0.90674
validationaccuracy,0.88567
epoch,9.0
_runtime,1283.0
_timestamp,1615727822.0
_step,9.0


0,1
loss,█▃▃▂▂▂▁▁▁▁
trainingaccuracy,▁▄▅▅▆▇▇▇██
validationaccuracy,▁▄▅▆▇▇▇███
epoch,▁▂▃▃▄▅▆▆▇█
_runtime,▁▂▂▃▄▅▆▆▇█
_timestamp,▁▂▂▃▄▅▆▆▇█
_step,▁▂▃▃▄▅▆▆▇█


[34m[1mwandb[0m: Agent Starting Run: glplzk6l with config:
[34m[1mwandb[0m: 	activation: SIGMOID
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	initializer: XAVIER
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	max_epochs: 10
[34m[1mwandb[0m: 	num_hidden_neurons: 128
[34m[1mwandb[0m: 	num_layers: 2
[34m[1mwandb[0m: 	optimizer: NADAM
[34m[1mwandb[0m: 	weight_decay: 0


Epoch: 0, Loss: 7.391e-02, Training accuracy:0.84, Validation Accuracy: 0.84, Time: 115.85, Learning Rate: 1.000e-03
Epoch: 1, Loss: 4.268e-02, Training accuracy:0.86, Validation Accuracy: 0.86, Time: 114.73, Learning Rate: 1.000e-03
Epoch: 2, Loss: 3.825e-02, Training accuracy:0.87, Validation Accuracy: 0.87, Time: 115.06, Learning Rate: 1.000e-03
Epoch: 3, Loss: 3.573e-02, Training accuracy:0.88, Validation Accuracy: 0.87, Time: 116.65, Learning Rate: 1.000e-03
Epoch: 4, Loss: 3.396e-02, Training accuracy:0.89, Validation Accuracy: 0.87, Time: 114.75, Learning Rate: 1.000e-03
Epoch: 5, Loss: 3.260e-02, Training accuracy:0.89, Validation Accuracy: 0.88, Time: 116.11, Learning Rate: 1.000e-03
Epoch: 6, Loss: 3.147e-02, Training accuracy:0.89, Validation Accuracy: 0.88, Time: 114.13, Learning Rate: 1.000e-03
Epoch: 7, Loss: 3.052e-02, Training accuracy:0.90, Validation Accuracy: 0.88, Time: 114.00, Learning Rate: 1.000e-03
Epoch: 8, Loss: 2.968e-02, Training accuracy:0.90, Validation Ac

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
loss,0.02891
trainingaccuracy,0.90146
validationaccuracy,0.88117
epoch,9.0
_runtime,1248.0
_timestamp,1615729079.0
_step,9.0


0,1
loss,█▃▂▂▂▂▁▁▁▁
trainingaccuracy,▁▄▅▆▆▇▇▇██
validationaccuracy,▁▄▆▇▇▇████
epoch,▁▂▃▃▄▅▆▆▇█
_runtime,▁▂▃▃▄▅▆▇▇█
_timestamp,▁▂▃▃▄▅▆▇▇█
_step,▁▂▃▃▄▅▆▆▇█


[34m[1mwandb[0m: Agent Starting Run: xjscyae1 with config:
[34m[1mwandb[0m: 	activation: RELU
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	initializer: XAVIER
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	max_epochs: 10
[34m[1mwandb[0m: 	num_hidden_neurons: 128
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: NADAM
[34m[1mwandb[0m: 	weight_decay: 0


Epoch: 0, Loss: 5.991e-02, Training accuracy:0.85, Validation Accuracy: 0.84, Time: 135.30, Learning Rate: 1.000e-04
Epoch: 1, Loss: 4.198e-02, Training accuracy:0.86, Validation Accuracy: 0.85, Time: 136.22, Learning Rate: 1.000e-04
Epoch: 2, Loss: 3.838e-02, Training accuracy:0.87, Validation Accuracy: 0.86, Time: 138.44, Learning Rate: 1.000e-04
Epoch: 3, Loss: 3.623e-02, Training accuracy:0.88, Validation Accuracy: 0.86, Time: 137.01, Learning Rate: 1.000e-04
Epoch: 4, Loss: 3.465e-02, Training accuracy:0.88, Validation Accuracy: 0.87, Time: 136.98, Learning Rate: 1.000e-04
Epoch: 5, Loss: 3.339e-02, Training accuracy:0.89, Validation Accuracy: 0.87, Time: 136.28, Learning Rate: 1.000e-04
Epoch: 6, Loss: 3.235e-02, Training accuracy:0.89, Validation Accuracy: 0.87, Time: 136.13, Learning Rate: 1.000e-04
Epoch: 7, Loss: 3.147e-02, Training accuracy:0.89, Validation Accuracy: 0.88, Time: 137.50, Learning Rate: 1.000e-04
Epoch: 8, Loss: 3.069e-02, Training accuracy:0.89, Validation Ac

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
loss,0.02999
trainingaccuracy,0.89689
validationaccuracy,0.87967
epoch,9.0
_runtime,1494.0
_timestamp,1615730581.0
_step,9.0


0,1
loss,█▄▃▂▂▂▂▁▁▁
trainingaccuracy,▁▃▄▅▆▆▇▇██
validationaccuracy,▁▃▄▅▆▆▇▇██
epoch,▁▂▃▃▄▅▆▆▇█
_runtime,▁▂▃▃▄▅▆▆▇█
_timestamp,▁▂▃▃▄▅▆▆▇█
_step,▁▂▃▃▄▅▆▆▇█


[34m[1mwandb[0m: Agent Starting Run: f9wne3pl with config:
[34m[1mwandb[0m: 	activation: RELU
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	initializer: XAVIER
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	max_epochs: 10
[34m[1mwandb[0m: 	num_hidden_neurons: 128
[34m[1mwandb[0m: 	num_layers: 2
[34m[1mwandb[0m: 	optimizer: ADAM
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch: 0, Loss: 6.218e-02, Training accuracy:0.83, Validation Accuracy: 0.82, Time: 122.99, Learning Rate: 1.000e-03
Epoch: 1, Loss: 4.357e-02, Training accuracy:0.85, Validation Accuracy: 0.84, Time: 120.80, Learning Rate: 1.000e-03




Epoch: 2, Loss: nan, Training accuracy:0.86, Validation Accuracy: 0.85, Time: 121.61, Learning Rate: 1.000e-03
Epoch: 3, Loss: nan, Training accuracy:0.87, Validation Accuracy: 0.86, Time: 118.91, Learning Rate: 1.000e-03
Epoch: 4, Loss: nan, Training accuracy:0.87, Validation Accuracy: 0.86, Time: 119.30, Learning Rate: 1.000e-03
Epoch: 5, Loss: nan, Training accuracy:0.88, Validation Accuracy: 0.87, Time: 118.98, Learning Rate: 1.000e-03
Epoch: 6, Loss: nan, Training accuracy:0.88, Validation Accuracy: 0.87, Time: 113.84, Learning Rate: 1.000e-03
Epoch: 7, Loss: nan, Training accuracy:0.89, Validation Accuracy: 0.87, Time: 120.96, Learning Rate: 1.000e-03
Epoch: 8, Loss: nan, Training accuracy:0.89, Validation Accuracy: 0.87, Time: 121.16, Learning Rate: 1.000e-03
Epoch: 9, Loss: nan, Training accuracy:0.89, Validation Accuracy: 0.87, Time: 121.42, Learning Rate: 1.000e-03


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
loss,
trainingaccuracy,0.89254
validationaccuracy,0.87433
epoch,9.0
_runtime,1325.0
_timestamp,1615731914.0
_step,9.0


0,1
loss,█▁
trainingaccuracy,▁▃▄▆▆▇▇▇██
validationaccuracy,▁▃▅▆▆▇▇▇██
epoch,▁▂▃▃▄▅▆▆▇█
_runtime,▁▂▃▃▄▅▆▆▇█
_timestamp,▁▂▃▃▄▅▆▆▇█
_step,▁▂▃▃▄▅▆▆▇█


[34m[1mwandb[0m: Agent Starting Run: uc5fd8yw with config:
[34m[1mwandb[0m: 	activation: SIGMOID
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	initializer: XAVIER
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	max_epochs: 10
[34m[1mwandb[0m: 	num_hidden_neurons: 128
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: ADAM
[34m[1mwandb[0m: 	weight_decay: 0


Epoch: 0, Loss: 5.886e-02, Training accuracy:0.86, Validation Accuracy: 0.85, Time: 127.50, Learning Rate: 1.000e-04
Epoch: 1, Loss: 3.841e-02, Training accuracy:0.87, Validation Accuracy: 0.86, Time: 127.49, Learning Rate: 1.000e-04
Epoch: 2, Loss: 3.464e-02, Training accuracy:0.88, Validation Accuracy: 0.87, Time: 127.74, Learning Rate: 1.000e-04
Epoch: 3, Loss: 3.255e-02, Training accuracy:0.89, Validation Accuracy: 0.88, Time: 131.07, Learning Rate: 1.000e-04
Epoch: 4, Loss: 3.114e-02, Training accuracy:0.89, Validation Accuracy: 0.88, Time: 128.48, Learning Rate: 1.000e-04
Epoch: 5, Loss: 3.010e-02, Training accuracy:0.89, Validation Accuracy: 0.88, Time: 127.80, Learning Rate: 1.000e-04
Epoch: 6, Loss: 2.927e-02, Training accuracy:0.90, Validation Accuracy: 0.89, Time: 125.56, Learning Rate: 1.000e-04
Epoch: 7, Loss: 2.858e-02, Training accuracy:0.90, Validation Accuracy: 0.89, Time: 127.91, Learning Rate: 1.000e-04
Epoch: 8, Loss: 2.799e-02, Training accuracy:0.90, Validation Ac

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
loss,0.02746
trainingaccuracy,0.90389
validationaccuracy,0.88717
epoch,9.0
_runtime,1381.0
_timestamp,1615733304.0
_step,9.0


0,1
loss,█▃▃▂▂▂▁▁▁▁
trainingaccuracy,▁▃▄▅▆▇▇▇██
validationaccuracy,▁▃▅▆▇█████
epoch,▁▂▃▃▄▅▆▆▇█
_runtime,▁▂▃▃▄▅▆▆▇█
_timestamp,▁▂▃▃▄▅▆▆▇█
_step,▁▂▃▃▄▅▆▆▇█


[34m[1mwandb[0m: Agent Starting Run: inuq9i9e with config:
[34m[1mwandb[0m: 	activation: SIGMOID
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	initializer: XAVIER
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	max_epochs: 10
[34m[1mwandb[0m: 	num_hidden_neurons: 128
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: NADAM
[34m[1mwandb[0m: 	weight_decay: 0


Epoch: 0, Loss: 1.343e-01, Training accuracy:0.68, Validation Accuracy: 0.67, Time: 126.82, Learning Rate: 1.000e-04
Epoch: 1, Loss: 7.467e-02, Training accuracy:0.76, Validation Accuracy: 0.76, Time: 130.15, Learning Rate: 1.000e-04
Epoch: 2, Loss: 6.124e-02, Training accuracy:0.78, Validation Accuracy: 0.78, Time: 130.29, Learning Rate: 1.000e-04
Epoch: 3, Loss: 5.569e-02, Training accuracy:0.80, Validation Accuracy: 0.80, Time: 131.56, Learning Rate: 1.000e-04
Epoch: 4, Loss: 5.203e-02, Training accuracy:0.82, Validation Accuracy: 0.82, Time: 129.85, Learning Rate: 1.000e-04
Epoch: 5, Loss: 4.933e-02, Training accuracy:0.83, Validation Accuracy: 0.82, Time: 127.12, Learning Rate: 1.000e-04
Epoch: 6, Loss: 4.723e-02, Training accuracy:0.84, Validation Accuracy: 0.83, Time: 129.92, Learning Rate: 1.000e-04
Epoch: 7, Loss: 4.555e-02, Training accuracy:0.84, Validation Accuracy: 0.84, Time: 127.93, Learning Rate: 1.000e-04
Epoch: 8, Loss: 4.417e-02, Training accuracy:0.85, Validation Ac

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
loss,0.04299
trainingaccuracy,0.84974
validationaccuracy,0.84767
epoch,9.0
_runtime,1402.0
_timestamp,1615734715.0
_step,9.0


0,1
loss,█▃▂▂▂▁▁▁▁▁
trainingaccuracy,▁▄▅▆▇▇▇███
validationaccuracy,▁▄▅▆▇▇▇███
epoch,▁▂▃▃▄▅▆▆▇█
_runtime,▁▂▃▃▄▅▆▆▇█
_timestamp,▁▂▃▃▄▅▆▆▇█
_step,▁▂▃▃▄▅▆▆▇█


[34m[1mwandb[0m: Agent Starting Run: r8cdv13f with config:
[34m[1mwandb[0m: 	activation: RELU
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	initializer: HE
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	max_epochs: 10
[34m[1mwandb[0m: 	num_hidden_neurons: 128
[34m[1mwandb[0m: 	num_layers: 2
[34m[1mwandb[0m: 	optimizer: NADAM
[34m[1mwandb[0m: 	weight_decay: 0


Epoch: 0, Loss: 5.714e-02, Training accuracy:0.85, Validation Accuracy: 0.85, Time: 128.99, Learning Rate: 1.000e-04
Epoch: 1, Loss: 4.070e-02, Training accuracy:0.87, Validation Accuracy: 0.86, Time: 132.69, Learning Rate: 1.000e-04
Epoch: 2, Loss: 3.708e-02, Training accuracy:0.88, Validation Accuracy: 0.87, Time: 133.08, Learning Rate: 1.000e-04
Epoch: 3, Loss: 3.487e-02, Training accuracy:0.88, Validation Accuracy: 0.87, Time: 132.99, Learning Rate: 1.000e-04
Epoch: 4, Loss: 3.328e-02, Training accuracy:0.89, Validation Accuracy: 0.87, Time: 131.59, Learning Rate: 1.000e-04
Epoch: 5, Loss: 3.201e-02, Training accuracy:0.89, Validation Accuracy: 0.88, Time: 133.03, Learning Rate: 1.000e-04
Epoch: 6, Loss: 3.094e-02, Training accuracy:0.89, Validation Accuracy: 0.88, Time: 138.08, Learning Rate: 1.000e-04
Epoch: 7, Loss: 3.001e-02, Training accuracy:0.90, Validation Accuracy: 0.88, Time: 133.21, Learning Rate: 1.000e-04
Epoch: 8, Loss: 2.921e-02, Training accuracy:0.90, Validation Ac

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
loss,0.02849
trainingaccuracy,0.90193
validationaccuracy,0.88567
epoch,9.0
_runtime,1453.0
_timestamp,1615736177.0
_step,9.0


0,1
loss,█▄▃▃▂▂▂▁▁▁
trainingaccuracy,▁▃▄▅▆▇▇▇██
validationaccuracy,▁▃▅▅▆▆▇▇██
epoch,▁▂▃▃▄▅▆▆▇█
_runtime,▁▂▃▃▄▅▆▆▇█
_timestamp,▁▂▃▃▄▅▆▆▇█
_step,▁▂▃▃▄▅▆▆▇█


[34m[1mwandb[0m: Agent Starting Run: xgivsx26 with config:
[34m[1mwandb[0m: 	activation: TANH
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	initializer: HE
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	max_epochs: 5
[34m[1mwandb[0m: 	num_hidden_neurons: 32
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: SGD
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch: 0, Loss: 1.123e-01, Training accuracy:0.75, Validation Accuracy: 0.74, Time: 44.33, Learning Rate: 1.000e-03
Epoch: 1, Loss: 7.390e-02, Training accuracy:0.78, Validation Accuracy: 0.78, Time: 44.18, Learning Rate: 1.000e-03
Epoch: 2, Loss: 6.341e-02, Training accuracy:0.80, Validation Accuracy: 0.80, Time: 45.37, Learning Rate: 1.000e-03
Epoch: 3, Loss: 5.742e-02, Training accuracy:0.82, Validation Accuracy: 0.82, Time: 44.37, Learning Rate: 1.000e-03
Epoch: 4, Loss: 5.340e-02, Training accuracy:0.83, Validation Accuracy: 0.82, Time: 43.81, Learning Rate: 1.000e-03


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
loss,0.0534
trainingaccuracy,0.82593
validationaccuracy,0.82217
epoch,4.0
_runtime,254.0
_timestamp,1615736440.0
_step,4.0


0,1
loss,█▃▂▁▁
trainingaccuracy,▁▄▆▇█
validationaccuracy,▁▅▆▇█
epoch,▁▃▅▆█
_runtime,▁▃▅▆█
_timestamp,▁▃▅▆█
_step,▁▃▅▆█


[34m[1mwandb[0m: Agent Starting Run: vpmjp0gx with config:
[34m[1mwandb[0m: 	activation: SIGMOID
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	initializer: HE
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	max_epochs: 10
[34m[1mwandb[0m: 	num_hidden_neurons: 128
[34m[1mwandb[0m: 	num_layers: 2
[34m[1mwandb[0m: 	optimizer: NADAM
[34m[1mwandb[0m: 	weight_decay: 0


Epoch: 0, Loss: 1.645e-01, Training accuracy:0.63, Validation Accuracy: 0.63, Time: 114.31, Learning Rate: 1.000e-04
Epoch: 1, Loss: 9.974e-02, Training accuracy:0.71, Validation Accuracy: 0.70, Time: 112.37, Learning Rate: 1.000e-04
Epoch: 2, Loss: 7.831e-02, Training accuracy:0.75, Validation Accuracy: 0.75, Time: 111.00, Learning Rate: 1.000e-04
Epoch: 3, Loss: 6.818e-02, Training accuracy:0.76, Validation Accuracy: 0.77, Time: 113.94, Learning Rate: 1.000e-04
Epoch: 4, Loss: 6.253e-02, Training accuracy:0.78, Validation Accuracy: 0.78, Time: 114.33, Learning Rate: 1.000e-04
Epoch: 5, Loss: 5.881e-02, Training accuracy:0.79, Validation Accuracy: 0.80, Time: 117.41, Learning Rate: 1.000e-04
Epoch: 6, Loss: 5.601e-02, Training accuracy:0.80, Validation Accuracy: 0.81, Time: 118.89, Learning Rate: 1.000e-04
Epoch: 7, Loss: 5.376e-02, Training accuracy:0.81, Validation Accuracy: 0.81, Time: 117.31, Learning Rate: 1.000e-04
Epoch: 8, Loss: 5.189e-02, Training accuracy:0.82, Validation Ac

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
loss,0.05029
trainingaccuracy,0.82222
validationaccuracy,0.823
epoch,9.0
_runtime,1268.0
_timestamp,1615737717.0
_step,9.0


0,1
loss,█▄▃▂▂▂▁▁▁▁
trainingaccuracy,▁▄▅▆▆▇▇███
validationaccuracy,▁▄▅▆▇▇▇███
epoch,▁▂▃▃▄▅▆▆▇█
_runtime,▁▂▃▃▄▅▆▆▇█
_timestamp,▁▂▃▃▄▅▆▆▇█
_step,▁▂▃▃▄▅▆▆▇█


[34m[1mwandb[0m: Agent Starting Run: gugi6umg with config:
[34m[1mwandb[0m: 	activation: SIGMOID
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	initializer: HE
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	max_epochs: 10
[34m[1mwandb[0m: 	num_hidden_neurons: 128
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: NADAM
[34m[1mwandb[0m: 	weight_decay: 0


Epoch: 0, Loss: 6.850e-02, Training accuracy:0.85, Validation Accuracy: 0.84, Time: 117.73, Learning Rate: 1.000e-03
Epoch: 1, Loss: 4.111e-02, Training accuracy:0.86, Validation Accuracy: 0.86, Time: 112.55, Learning Rate: 1.000e-03
Epoch: 2, Loss: 3.721e-02, Training accuracy:0.87, Validation Accuracy: 0.87, Time: 114.74, Learning Rate: 1.000e-03
Epoch: 3, Loss: 3.495e-02, Training accuracy:0.88, Validation Accuracy: 0.87, Time: 113.83, Learning Rate: 1.000e-03
Epoch: 4, Loss: 3.331e-02, Training accuracy:0.89, Validation Accuracy: 0.88, Time: 114.27, Learning Rate: 1.000e-03
Epoch: 5, Loss: 3.203e-02, Training accuracy:0.89, Validation Accuracy: 0.88, Time: 115.01, Learning Rate: 1.000e-03
Epoch: 6, Loss: 3.095e-02, Training accuracy:0.89, Validation Accuracy: 0.88, Time: 111.77, Learning Rate: 1.000e-03
Epoch: 7, Loss: 3.002e-02, Training accuracy:0.90, Validation Accuracy: 0.88, Time: 113.60, Learning Rate: 1.000e-03
Epoch: 8, Loss: 2.918e-02, Training accuracy:0.90, Validation Ac

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
loss,0.02843
trainingaccuracy,0.90254
validationaccuracy,0.88617
epoch,9.0
_runtime,1250.0
_timestamp,1615738976.0
_step,9.0


0,1
loss,█▃▃▂▂▂▁▁▁▁
trainingaccuracy,▁▃▅▆▆▇▇▇██
validationaccuracy,▁▃▅▆▇▇▇███
epoch,▁▂▃▃▄▅▆▆▇█
_runtime,▁▂▃▃▄▅▆▆▇█
_timestamp,▁▂▃▃▄▅▆▆▇█
_step,▁▂▃▃▄▅▆▆▇█


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: uloyui9j with config:
[34m[1mwandb[0m: 	activation: SIGMOID
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	initializer: XAVIER
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	max_epochs: 10
[34m[1mwandb[0m: 	num_hidden_neurons: 128
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: ADAM
[34m[1mwandb[0m: 	weight_decay: 0


Epoch: 0, Loss: 5.977e-02, Training accuracy:0.85, Validation Accuracy: 0.84, Time: 127.56, Learning Rate: 1.000e-04
Epoch: 1, Loss: 3.864e-02, Training accuracy:0.87, Validation Accuracy: 0.86, Time: 123.46, Learning Rate: 1.000e-04
Epoch: 2, Loss: 3.474e-02, Training accuracy:0.88, Validation Accuracy: 0.87, Time: 129.31, Learning Rate: 1.000e-04
Epoch: 3, Loss: 3.262e-02, Training accuracy:0.89, Validation Accuracy: 0.87, Time: 127.75, Learning Rate: 1.000e-04
Epoch: 4, Loss: 3.119e-02, Training accuracy:0.89, Validation Accuracy: 0.88, Time: 122.99, Learning Rate: 1.000e-04
Epoch: 5, Loss: 3.012e-02, Training accuracy:0.89, Validation Accuracy: 0.88, Time: 129.09, Learning Rate: 1.000e-04
Epoch: 6, Loss: 2.926e-02, Training accuracy:0.90, Validation Accuracy: 0.88, Time: 128.15, Learning Rate: 1.000e-04
Epoch: 7, Loss: 2.855e-02, Training accuracy:0.90, Validation Accuracy: 0.88, Time: 128.64, Learning Rate: 1.000e-04
Epoch: 8, Loss: 2.793e-02, Training accuracy:0.90, Validation Ac

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
loss,0.02739
trainingaccuracy,0.90413
validationaccuracy,0.88667
epoch,9.0
_runtime,1379.0
_timestamp,1615740382.0
_step,9.0


0,1
loss,█▃▃▂▂▂▁▁▁▁
trainingaccuracy,▁▃▅▆▆▇▇▇██
validationaccuracy,▁▃▅▆▆▇▇███
epoch,▁▂▃▃▄▅▆▆▇█
_runtime,▁▂▃▃▄▅▆▆▇█
_timestamp,▁▂▃▃▄▅▆▆▇█
_step,▁▂▃▃▄▅▆▆▇█


[34m[1mwandb[0m: Agent Starting Run: b5bd7d2z with config:
[34m[1mwandb[0m: 	activation: SIGMOID
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	initializer: HE
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	max_epochs: 10
[34m[1mwandb[0m: 	num_hidden_neurons: 128
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: NADAM
[34m[1mwandb[0m: 	weight_decay: 0


Epoch: 0, Loss: 6.146e-02, Training accuracy:0.85, Validation Accuracy: 0.85, Time: 118.64, Learning Rate: 1.000e-03
Epoch: 1, Loss: 3.964e-02, Training accuracy:0.87, Validation Accuracy: 0.86, Time: 118.06, Learning Rate: 1.000e-03
Epoch: 2, Loss: 3.584e-02, Training accuracy:0.88, Validation Accuracy: 0.87, Time: 110.82, Learning Rate: 1.000e-03
Epoch: 3, Loss: 3.354e-02, Training accuracy:0.89, Validation Accuracy: 0.88, Time: 119.63, Learning Rate: 1.000e-03
Epoch: 4, Loss: 3.185e-02, Training accuracy:0.89, Validation Accuracy: 0.88, Time: 118.60, Learning Rate: 1.000e-03
Epoch: 5, Loss: 3.052e-02, Training accuracy:0.90, Validation Accuracy: 0.88, Time: 122.01, Learning Rate: 1.000e-03
Epoch: 6, Loss: 2.939e-02, Training accuracy:0.90, Validation Accuracy: 0.88, Time: 122.70, Learning Rate: 1.000e-03
Epoch: 7, Loss: 2.840e-02, Training accuracy:0.90, Validation Accuracy: 0.88, Time: 121.20, Learning Rate: 1.000e-03
Epoch: 8, Loss: 2.751e-02, Training accuracy:0.90, Validation Ac

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
loss,0.02671
trainingaccuracy,0.90696
validationaccuracy,0.88567
epoch,9.0
_runtime,1300.0
_timestamp,1615741692.0
_step,9.0


0,1
loss,█▄▃▂▂▂▂▁▁▁
trainingaccuracy,▁▃▄▅▆▇▇▇██
validationaccuracy,▁▃▅▆▇▇▇███
epoch,▁▂▃▃▄▅▆▆▇█
_runtime,▁▂▂▃▄▅▆▆▇█
_timestamp,▁▂▂▃▄▅▆▆▇█
_step,▁▂▃▃▄▅▆▆▇█


[34m[1mwandb[0m: Agent Starting Run: 7wxthqpo with config:
[34m[1mwandb[0m: 	activation: TANH
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	initializer: XAVIER
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	max_epochs: 10
[34m[1mwandb[0m: 	num_hidden_neurons: 128
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: NADAM
[34m[1mwandb[0m: 	weight_decay: 0


Epoch: 0, Loss: 4.677e-02, Training accuracy:0.86, Validation Accuracy: 0.85, Time: 111.78, Learning Rate: 1.000e-03
Epoch: 1, Loss: 3.538e-02, Training accuracy:0.88, Validation Accuracy: 0.87, Time: 110.78, Learning Rate: 1.000e-03
Epoch: 2, Loss: 3.172e-02, Training accuracy:0.89, Validation Accuracy: 0.88, Time: 110.72, Learning Rate: 1.000e-03
Epoch: 3, Loss: 2.935e-02, Training accuracy:0.90, Validation Accuracy: 0.88, Time: 111.07, Learning Rate: 1.000e-03
Epoch: 4, Loss: 2.759e-02, Training accuracy:0.91, Validation Accuracy: 0.88, Time: 114.74, Learning Rate: 1.000e-03
Epoch: 5, Loss: 2.615e-02, Training accuracy:0.91, Validation Accuracy: 0.88, Time: 113.19, Learning Rate: 1.000e-03
Epoch: 6, Loss: 2.490e-02, Training accuracy:0.91, Validation Accuracy: 0.89, Time: 113.96, Learning Rate: 1.000e-03
Epoch: 7, Loss: 2.375e-02, Training accuracy:0.92, Validation Accuracy: 0.89, Time: 113.90, Learning Rate: 1.000e-03
Epoch: 8, Loss: 2.270e-02, Training accuracy:0.92, Validation Ac

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
loss,0.02169
trainingaccuracy,0.92154
validationaccuracy,0.88583
epoch,9.0
_runtime,1238.0
_timestamp,1615742939.0
_step,9.0


0,1
loss,█▅▄▃▃▂▂▂▁▁
trainingaccuracy,▁▄▅▆▆▇▇▇██
validationaccuracy,▁▄▆▇▇▇████
epoch,▁▂▃▃▄▅▆▆▇█
_runtime,▁▂▃▃▄▅▆▆▇█
_timestamp,▁▂▃▃▄▅▆▆▇█
_step,▁▂▃▃▄▅▆▆▇█


[34m[1mwandb[0m: Agent Starting Run: jab3rpp7 with config:
[34m[1mwandb[0m: 	activation: SIGMOID
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	initializer: HE
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	max_epochs: 10
[34m[1mwandb[0m: 	num_hidden_neurons: 128
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: NADAM
[34m[1mwandb[0m: 	weight_decay: 0


Epoch: 0, Loss: 6.860e-02, Training accuracy:0.85, Validation Accuracy: 0.84, Time: 111.32, Learning Rate: 1.000e-03
Epoch: 1, Loss: 4.111e-02, Training accuracy:0.87, Validation Accuracy: 0.86, Time: 114.62, Learning Rate: 1.000e-03
Epoch: 2, Loss: 3.726e-02, Training accuracy:0.87, Validation Accuracy: 0.86, Time: 110.52, Learning Rate: 1.000e-03
Epoch: 3, Loss: 3.501e-02, Training accuracy:0.88, Validation Accuracy: 0.87, Time: 109.89, Learning Rate: 1.000e-03
Epoch: 4, Loss: 3.336e-02, Training accuracy:0.89, Validation Accuracy: 0.87, Time: 112.67, Learning Rate: 1.000e-03
Epoch: 5, Loss: 3.208e-02, Training accuracy:0.89, Validation Accuracy: 0.87, Time: 112.51, Learning Rate: 1.000e-03
Epoch: 6, Loss: 3.098e-02, Training accuracy:0.89, Validation Accuracy: 0.88, Time: 109.68, Learning Rate: 1.000e-03
Epoch: 7, Loss: 3.004e-02, Training accuracy:0.90, Validation Accuracy: 0.88, Time: 112.20, Learning Rate: 1.000e-03
Epoch: 8, Loss: 2.919e-02, Training accuracy:0.90, Validation Ac

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
loss,0.02842
trainingaccuracy,0.90309
validationaccuracy,0.88083
epoch,9.0
_runtime,1227.0
_timestamp,1615744176.0
_step,9.0


0,1
loss,█▃▃▂▂▂▁▁▁▁
trainingaccuracy,▁▃▄▅▆▆▇▇██
validationaccuracy,▁▄▅▆▆▇▇███
epoch,▁▂▃▃▄▅▆▆▇█
_runtime,▁▂▃▃▄▅▆▆▇█
_timestamp,▁▂▃▃▄▅▆▆▇█
_step,▁▂▃▃▄▅▆▆▇█


[34m[1mwandb[0m: Agent Starting Run: food6oeb with config:
[34m[1mwandb[0m: 	activation: RELU
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	initializer: HE
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	max_epochs: 10
[34m[1mwandb[0m: 	num_hidden_neurons: 128
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: NADAM
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch: 0, Loss: 6.009e-02, Training accuracy:0.85, Validation Accuracy: 0.84, Time: 120.93, Learning Rate: 1.000e-04
Epoch: 1, Loss: 4.266e-02, Training accuracy:0.86, Validation Accuracy: 0.85, Time: 122.36, Learning Rate: 1.000e-04
Epoch: 2, Loss: 3.916e-02, Training accuracy:0.87, Validation Accuracy: 0.86, Time: 121.91, Learning Rate: 1.000e-04
Epoch: 3, Loss: 3.703e-02, Training accuracy:0.88, Validation Accuracy: 0.86, Time: 123.00, Learning Rate: 1.000e-04
Epoch: 4, Loss: 3.546e-02, Training accuracy:0.88, Validation Accuracy: 0.87, Time: 123.24, Learning Rate: 1.000e-04
Epoch: 5, Loss: 3.421e-02, Training accuracy:0.88, Validation Accuracy: 0.87, Time: 122.44, Learning Rate: 1.000e-04
Epoch: 6, Loss: 3.318e-02, Training accuracy:0.89, Validation Accuracy: 0.87, Time: 123.58, Learning Rate: 1.000e-04
Epoch: 7, Loss: 3.228e-02, Training accuracy:0.89, Validation Accuracy: 0.87, Time: 121.92, Learning Rate: 1.000e-04
Epoch: 8, Loss: 3.149e-02, Training accuracy:0.89, Validation Ac

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
loss,0.03078
trainingaccuracy,0.89496
validationaccuracy,0.87417
epoch,9.0
_runtime,1356.0
_timestamp,1615745542.0
_step,9.0


0,1
loss,█▄▃▂▂▂▂▁▁▁
trainingaccuracy,▁▃▅▅▆▇▇▇██
validationaccuracy,▁▄▅▆▇▇▇███
epoch,▁▂▃▃▄▅▆▆▇█
_runtime,▁▂▃▃▄▅▆▆▇█
_timestamp,▁▂▃▃▄▅▆▆▇█
_step,▁▂▃▃▄▅▆▆▇█


[34m[1mwandb[0m: Agent Starting Run: x6ptuxb0 with config:
[34m[1mwandb[0m: 	activation: SIGMOID
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	initializer: XAVIER
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	max_epochs: 10
[34m[1mwandb[0m: 	num_hidden_neurons: 128
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: ADAM
[34m[1mwandb[0m: 	weight_decay: 0


Epoch: 0, Loss: 6.346e-02, Training accuracy:0.84, Validation Accuracy: 0.84, Time: 118.32, Learning Rate: 1.000e-04
Epoch: 1, Loss: 3.940e-02, Training accuracy:0.87, Validation Accuracy: 0.86, Time: 117.01, Learning Rate: 1.000e-04
Epoch: 2, Loss: 3.589e-02, Training accuracy:0.88, Validation Accuracy: 0.87, Time: 116.58, Learning Rate: 1.000e-04
Epoch: 3, Loss: 3.399e-02, Training accuracy:0.88, Validation Accuracy: 0.87, Time: 117.23, Learning Rate: 1.000e-04
Epoch: 4, Loss: 3.271e-02, Training accuracy:0.89, Validation Accuracy: 0.88, Time: 116.40, Learning Rate: 1.000e-04
Epoch: 5, Loss: 3.175e-02, Training accuracy:0.89, Validation Accuracy: 0.88, Time: 119.72, Learning Rate: 1.000e-04
Epoch: 6, Loss: 3.097e-02, Training accuracy:0.89, Validation Accuracy: 0.88, Time: 114.26, Learning Rate: 1.000e-04
Epoch: 7, Loss: 3.032e-02, Training accuracy:0.89, Validation Accuracy: 0.88, Time: 117.70, Learning Rate: 1.000e-04
Epoch: 8, Loss: 2.976e-02, Training accuracy:0.90, Validation Ac

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
loss,0.02926
trainingaccuracy,0.89835
validationaccuracy,0.88083
epoch,9.0
_runtime,1284.0
_timestamp,1615746836.0
_step,9.0


0,1
loss,█▃▂▂▂▂▁▁▁▁
trainingaccuracy,▁▄▆▆▇▇▇███
validationaccuracy,▁▅▆▇▇█████
epoch,▁▂▃▃▄▅▆▆▇█
_runtime,▁▂▃▃▄▅▆▆▇█
_timestamp,▁▂▃▃▄▅▆▆▇█
_step,▁▂▃▃▄▅▆▆▇█


[34m[1mwandb[0m: Agent Starting Run: vezc5j2n with config:
[34m[1mwandb[0m: 	activation: SIGMOID
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	initializer: XAVIER
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	max_epochs: 10
[34m[1mwandb[0m: 	num_hidden_neurons: 128
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: ADAM
[34m[1mwandb[0m: 	weight_decay: 0


Epoch: 0, Loss: 6.423e-02, Training accuracy:0.85, Validation Accuracy: 0.84, Time: 118.89, Learning Rate: 1.000e-04
Epoch: 1, Loss: 3.914e-02, Training accuracy:0.87, Validation Accuracy: 0.86, Time: 116.87, Learning Rate: 1.000e-04
Epoch: 2, Loss: 3.569e-02, Training accuracy:0.88, Validation Accuracy: 0.87, Time: 117.96, Learning Rate: 1.000e-04
Epoch: 3, Loss: 3.381e-02, Training accuracy:0.88, Validation Accuracy: 0.87, Time: 118.34, Learning Rate: 1.000e-04
Epoch: 4, Loss: 3.252e-02, Training accuracy:0.89, Validation Accuracy: 0.87, Time: 118.54, Learning Rate: 1.000e-04
Epoch: 5, Loss: 3.154e-02, Training accuracy:0.89, Validation Accuracy: 0.88, Time: 117.66, Learning Rate: 1.000e-04
Epoch: 6, Loss: 3.076e-02, Training accuracy:0.89, Validation Accuracy: 0.88, Time: 115.71, Learning Rate: 1.000e-04
Epoch: 7, Loss: 3.010e-02, Training accuracy:0.90, Validation Accuracy: 0.88, Time: 118.00, Learning Rate: 1.000e-04
Epoch: 8, Loss: 2.954e-02, Training accuracy:0.90, Validation Ac

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
loss,0.02904
trainingaccuracy,0.89859
validationaccuracy,0.8825
epoch,9.0
_runtime,1275.0
_timestamp,1615748120.0
_step,9.0


0,1
loss,█▃▂▂▂▁▁▁▁▁
trainingaccuracy,▁▄▅▆▇▇▇███
validationaccuracy,▁▅▆▇▇▇▇███
epoch,▁▂▃▃▄▅▆▆▇█
_runtime,▁▂▃▃▄▅▆▆▇█
_timestamp,▁▂▃▃▄▅▆▆▇█
_step,▁▂▃▃▄▅▆▆▇█


[34m[1mwandb[0m: Agent Starting Run: 0i8gldxu with config:
[34m[1mwandb[0m: 	activation: RELU
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	initializer: HE
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	max_epochs: 10
[34m[1mwandb[0m: 	num_hidden_neurons: 64
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: NADAM
[34m[1mwandb[0m: 	weight_decay: 0


Epoch: 0, Loss: 5.314e-02, Training accuracy:0.85, Validation Accuracy: 0.85, Time: 74.49, Learning Rate: 1.000e-03
Epoch: 1, Loss: 3.839e-02, Training accuracy:0.87, Validation Accuracy: 0.86, Time: 75.22, Learning Rate: 1.000e-03
Epoch: 2, Loss: 3.445e-02, Training accuracy:0.89, Validation Accuracy: 0.87, Time: 74.90, Learning Rate: 1.000e-03
Epoch: 3, Loss: 3.199e-02, Training accuracy:0.89, Validation Accuracy: 0.88, Time: 76.00, Learning Rate: 1.000e-03
Epoch: 4, Loss: 3.026e-02, Training accuracy:0.90, Validation Accuracy: 0.88, Time: 75.77, Learning Rate: 1.000e-03
Epoch: 5, Loss: 2.877e-02, Training accuracy:0.90, Validation Accuracy: 0.88, Time: 75.71, Learning Rate: 1.000e-03
Epoch: 6, Loss: 2.757e-02, Training accuracy:0.90, Validation Accuracy: 0.88, Time: 75.48, Learning Rate: 1.000e-03
Epoch: 7, Loss: 2.646e-02, Training accuracy:0.91, Validation Accuracy: 0.89, Time: 74.22, Learning Rate: 1.000e-03
Epoch: 8, Loss: 2.558e-02, Training accuracy:0.91, Validation Accuracy: 

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
loss,0.02472
trainingaccuracy,0.90983
validationaccuracy,0.88717
epoch,9.0
_runtime,845.0
_timestamp,1615748974.0
_step,9.0


0,1
loss,█▄▃▃▂▂▂▁▁▁
trainingaccuracy,▁▄▅▆▆▇▇███
validationaccuracy,▁▄▆▆▇▇▇███
epoch,▁▂▃▃▄▅▆▆▇█
_runtime,▁▂▃▃▄▅▆▆▇█
_timestamp,▁▂▃▃▄▅▆▆▇█
_step,▁▂▃▃▄▅▆▆▇█


[34m[1mwandb[0m: Agent Starting Run: 41xi5lzw with config:
[34m[1mwandb[0m: 	activation: TANH
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	initializer: HE
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	max_epochs: 10
[34m[1mwandb[0m: 	num_hidden_neurons: 128
[34m[1mwandb[0m: 	num_layers: 2
[34m[1mwandb[0m: 	optimizer: NADAM
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch: 0, Loss: 5.733e-02, Training accuracy:0.85, Validation Accuracy: 0.84, Time: 114.85, Learning Rate: 1.000e-04
Epoch: 1, Loss: 4.135e-02, Training accuracy:0.86, Validation Accuracy: 0.85, Time: 115.28, Learning Rate: 1.000e-04
Epoch: 2, Loss: 3.807e-02, Training accuracy:0.87, Validation Accuracy: 0.86, Time: 115.35, Learning Rate: 1.000e-04
Epoch: 3, Loss: 3.616e-02, Training accuracy:0.88, Validation Accuracy: 0.87, Time: 115.07, Learning Rate: 1.000e-04
Epoch: 4, Loss: 3.479e-02, Training accuracy:0.88, Validation Accuracy: 0.87, Time: 116.09, Learning Rate: 1.000e-04
Epoch: 5, Loss: 3.372e-02, Training accuracy:0.88, Validation Accuracy: 0.87, Time: 113.48, Learning Rate: 1.000e-04
Epoch: 6, Loss: 3.284e-02, Training accuracy:0.89, Validation Accuracy: 0.87, Time: 116.69, Learning Rate: 1.000e-04
Epoch: 7, Loss: 3.207e-02, Training accuracy:0.89, Validation Accuracy: 0.87, Time: 114.23, Learning Rate: 1.000e-04
Epoch: 8, Loss: 3.139e-02, Training accuracy:0.89, Validation Ac

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
loss,0.03078
trainingaccuracy,0.89417
validationaccuracy,0.87883
epoch,9.0
_runtime,1255.0
_timestamp,1615750238.0
_step,9.0


0,1
loss,█▄▃▂▂▂▂▁▁▁
trainingaccuracy,▁▃▄▅▆▇▇▇██
validationaccuracy,▁▃▅▆▆▇▇▇██
epoch,▁▂▃▃▄▅▆▆▇█
_runtime,▁▂▃▃▄▅▆▆▇█
_timestamp,▁▂▃▃▄▅▆▆▇█
_step,▁▂▃▃▄▅▆▆▇█


[34m[1mwandb[0m: Agent Starting Run: i5jyzepr with config:
[34m[1mwandb[0m: 	activation: RELU
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	initializer: HE
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	max_epochs: 10
[34m[1mwandb[0m: 	num_hidden_neurons: 128
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: ADAM
[34m[1mwandb[0m: 	weight_decay: 0


Epoch: 0, Loss: 6.087e-02, Training accuracy:0.84, Validation Accuracy: 0.83, Time: 114.08, Learning Rate: 1.000e-03
Epoch: 1, Loss: 4.050e-02, Training accuracy:0.86, Validation Accuracy: 0.85, Time: 114.56, Learning Rate: 1.000e-03
Epoch: 2, Loss: 3.574e-02, Training accuracy:0.88, Validation Accuracy: 0.87, Time: 109.86, Learning Rate: 1.000e-03
Epoch: 3, Loss: nan, Training accuracy:0.88, Validation Accuracy: 0.87, Time: 114.33, Learning Rate: 1.000e-03
Epoch: 4, Loss: nan, Training accuracy:0.89, Validation Accuracy: 0.87, Time: 114.72, Learning Rate: 1.000e-03
Epoch: 5, Loss: nan, Training accuracy:0.89, Validation Accuracy: 0.87, Time: 116.38, Learning Rate: 1.000e-03
Epoch: 6, Loss: nan, Training accuracy:0.90, Validation Accuracy: 0.88, Time: 118.97, Learning Rate: 1.000e-03
Epoch: 7, Loss: nan, Training accuracy:0.90, Validation Accuracy: 0.87, Time: 111.16, Learning Rate: 1.000e-03
Epoch: 8, Loss: nan, Training accuracy:0.90, Validation Accuracy: 0.88, Time: 116.62, Learning

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
loss,
trainingaccuracy,0.90319
validationaccuracy,0.87867
epoch,9.0
_runtime,1268.0
_timestamp,1615751516.0
_step,9.0


0,1
loss,█▂▁
trainingaccuracy,▁▃▅▆▆▇▇███
validationaccuracy,▁▄▇▆▆▇█▇██
epoch,▁▂▃▃▄▅▆▆▇█
_runtime,▁▂▃▃▄▅▆▆▇█
_timestamp,▁▂▃▃▄▅▆▆▇█
_step,▁▂▃▃▄▅▆▆▇█


[34m[1mwandb[0m: Agent Starting Run: 17xg5nqm with config:
[34m[1mwandb[0m: 	activation: SIGMOID
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	initializer: XAVIER
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	max_epochs: 10
[34m[1mwandb[0m: 	num_hidden_neurons: 128
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: ADAM
[34m[1mwandb[0m: 	weight_decay: 0


Epoch: 0, Loss: 7.454e-02, Training accuracy:0.84, Validation Accuracy: 0.84, Time: 109.43, Learning Rate: 1.000e-04
Epoch: 1, Loss: 4.177e-02, Training accuracy:0.86, Validation Accuracy: 0.86, Time: 111.79, Learning Rate: 1.000e-04
Epoch: 2, Loss: 3.801e-02, Training accuracy:0.87, Validation Accuracy: 0.86, Time: 113.96, Learning Rate: 1.000e-04
Epoch: 3, Loss: 3.607e-02, Training accuracy:0.87, Validation Accuracy: 0.86, Time: 110.27, Learning Rate: 1.000e-04
Epoch: 4, Loss: 3.479e-02, Training accuracy:0.88, Validation Accuracy: 0.87, Time: 112.68, Learning Rate: 1.000e-04
Epoch: 5, Loss: 3.385e-02, Training accuracy:0.88, Validation Accuracy: 0.87, Time: 112.94, Learning Rate: 1.000e-04
Epoch: 6, Loss: 3.311e-02, Training accuracy:0.88, Validation Accuracy: 0.87, Time: 111.33, Learning Rate: 1.000e-04
Epoch: 7, Loss: 3.249e-02, Training accuracy:0.88, Validation Accuracy: 0.87, Time: 109.65, Learning Rate: 1.000e-04
Epoch: 8, Loss: 3.196e-02, Training accuracy:0.88, Validation Ac

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
loss,0.03149
trainingaccuracy,0.88539
validationaccuracy,0.87383
epoch,9.0
_runtime,1226.0
_timestamp,1615752753.0
_step,9.0


0,1
loss,█▃▂▂▂▁▁▁▁▁
trainingaccuracy,▁▄▆▆▇▇▇███
validationaccuracy,▁▄▆▆▇▇▇▇██
epoch,▁▂▃▃▄▅▆▆▇█
_runtime,▁▂▃▃▄▅▆▆▇█
_timestamp,▁▂▃▃▄▅▆▆▇█
_step,▁▂▃▃▄▅▆▆▇█


[34m[1mwandb[0m: Agent Starting Run: iyt9c7ko with config:
[34m[1mwandb[0m: 	activation: RELU
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	initializer: XAVIER
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	max_epochs: 10
[34m[1mwandb[0m: 	num_hidden_neurons: 128
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: NADAM
[34m[1mwandb[0m: 	weight_decay: 0


Epoch: 0, Loss: 7.044e-02, Training accuracy:0.83, Validation Accuracy: 0.83, Time: 118.20, Learning Rate: 1.000e-04
Epoch: 1, Loss: 4.663e-02, Training accuracy:0.85, Validation Accuracy: 0.84, Time: 118.06, Learning Rate: 1.000e-04
Epoch: 2, Loss: 4.301e-02, Training accuracy:0.86, Validation Accuracy: 0.85, Time: 118.37, Learning Rate: 1.000e-04
Epoch: 3, Loss: 4.090e-02, Training accuracy:0.86, Validation Accuracy: 0.86, Time: 118.95, Learning Rate: 1.000e-04
Epoch: 4, Loss: 3.936e-02, Training accuracy:0.87, Validation Accuracy: 0.86, Time: 115.51, Learning Rate: 1.000e-04
Epoch: 5, Loss: 3.815e-02, Training accuracy:0.87, Validation Accuracy: 0.86, Time: 117.60, Learning Rate: 1.000e-04
Epoch: 6, Loss: 3.716e-02, Training accuracy:0.87, Validation Accuracy: 0.86, Time: 114.19, Learning Rate: 1.000e-04
Epoch: 7, Loss: 3.631e-02, Training accuracy:0.88, Validation Accuracy: 0.86, Time: 117.09, Learning Rate: 1.000e-04
Epoch: 8, Loss: 3.556e-02, Training accuracy:0.88, Validation Ac

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
loss,0.0349
trainingaccuracy,0.87957
validationaccuracy,0.867
epoch,9.0
_runtime,1298.0
_timestamp,1615754061.0
_step,9.0


0,1
loss,█▃▃▂▂▂▁▁▁▁
trainingaccuracy,▁▃▅▅▆▆▇▇██
validationaccuracy,▁▃▅▆▆▇▇▇██
epoch,▁▂▃▃▄▅▆▆▇█
_runtime,▁▂▃▃▄▅▆▆▇█
_timestamp,▁▂▃▃▄▅▆▆▇█
_step,▁▂▃▃▄▅▆▆▇█


[34m[1mwandb[0m: Agent Starting Run: ez8r87pf with config:
[34m[1mwandb[0m: 	activation: SIGMOID
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	initializer: HE
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	max_epochs: 10
[34m[1mwandb[0m: 	num_hidden_neurons: 64
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: NADAM
[34m[1mwandb[0m: 	weight_decay: 0


Epoch: 0, Loss: 8.752e-02, Training accuracy:0.82, Validation Accuracy: 0.82, Time: 71.23, Learning Rate: 1.000e-03
Epoch: 1, Loss: 4.623e-02, Training accuracy:0.85, Validation Accuracy: 0.84, Time: 70.61, Learning Rate: 1.000e-03
Epoch: 2, Loss: 4.066e-02, Training accuracy:0.86, Validation Accuracy: 0.85, Time: 71.94, Learning Rate: 1.000e-03
Epoch: 3, Loss: 3.776e-02, Training accuracy:0.87, Validation Accuracy: 0.86, Time: 70.33, Learning Rate: 1.000e-03
Epoch: 4, Loss: 3.583e-02, Training accuracy:0.88, Validation Accuracy: 0.86, Time: 71.40, Learning Rate: 1.000e-03
Epoch: 5, Loss: 3.442e-02, Training accuracy:0.88, Validation Accuracy: 0.87, Time: 71.24, Learning Rate: 1.000e-03
Epoch: 6, Loss: 3.329e-02, Training accuracy:0.88, Validation Accuracy: 0.87, Time: 69.78, Learning Rate: 1.000e-03
Epoch: 7, Loss: 3.235e-02, Training accuracy:0.89, Validation Accuracy: 0.87, Time: 70.77, Learning Rate: 1.000e-03
Epoch: 8, Loss: 3.152e-02, Training accuracy:0.89, Validation Accuracy: 

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
loss,0.03078
trainingaccuracy,0.89165
validationaccuracy,0.87617
epoch,9.0
_runtime,788.0
_timestamp,1615754859.0
_step,9.0


0,1
loss,█▃▂▂▂▁▁▁▁▁
trainingaccuracy,▁▄▅▆▆▇▇███
validationaccuracy,▁▄▅▆▇▇████
epoch,▁▂▃▃▄▅▆▆▇█
_runtime,▁▂▃▃▄▅▆▆▇█
_timestamp,▁▂▃▃▄▅▆▆▇█
_step,▁▂▃▃▄▅▆▆▇█


[34m[1mwandb[0m: Agent Starting Run: y77dqkcc with config:
[34m[1mwandb[0m: 	activation: RELU
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	initializer: HE
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	max_epochs: 10
[34m[1mwandb[0m: 	num_hidden_neurons: 128
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: NADAM
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch: 0, Loss: 4.860e-02, Training accuracy:0.86, Validation Accuracy: 0.85, Time: 117.79, Learning Rate: 1.000e-03
Epoch: 1, Loss: 3.561e-02, Training accuracy:0.88, Validation Accuracy: 0.86, Time: 115.72, Learning Rate: 1.000e-03
Epoch: 2, Loss: 3.172e-02, Training accuracy:0.89, Validation Accuracy: 0.88, Time: 111.84, Learning Rate: 1.000e-03
Epoch: 3, Loss: 2.904e-02, Training accuracy:0.90, Validation Accuracy: 0.88, Time: 113.21, Learning Rate: 1.000e-03
Epoch: 4, Loss: 2.699e-02, Training accuracy:0.90, Validation Accuracy: 0.88, Time: 113.33, Learning Rate: 1.000e-03
Epoch: 5, Loss: 2.533e-02, Training accuracy:0.91, Validation Accuracy: 0.88, Time: 111.94, Learning Rate: 1.000e-03
Epoch: 6, Loss: 2.392e-02, Training accuracy:0.91, Validation Accuracy: 0.88, Time: 114.79, Learning Rate: 1.000e-03
Epoch: 7, Loss: 2.257e-02, Training accuracy:0.92, Validation Accuracy: 0.88, Time: 114.18, Learning Rate: 1.000e-03
Epoch: 8, Loss: 2.142e-02, Training accuracy:0.92, Validation Ac

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
loss,0.02043
trainingaccuracy,0.92248
validationaccuracy,0.88117
epoch,9.0
_runtime,1272.0
_timestamp,1615756141.0
_step,9.0


0,1
loss,█▅▄▃▃▂▂▂▁▁
trainingaccuracy,▁▃▅▆▆▇▇▇██
validationaccuracy,▁▄▆▇██▇▇█▇
epoch,▁▂▃▃▄▅▆▆▇█
_runtime,▁▂▃▃▄▅▆▆▇█
_timestamp,▁▂▃▃▄▅▆▆▇█
_step,▁▂▃▃▄▅▆▆▇█


[34m[1mwandb[0m: Agent Starting Run: 2s5m3p3k with config:
[34m[1mwandb[0m: 	activation: RELU
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	initializer: XAVIER
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	max_epochs: 10
[34m[1mwandb[0m: 	num_hidden_neurons: 128
[34m[1mwandb[0m: 	num_layers: 2
[34m[1mwandb[0m: 	optimizer: NADAM
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch: 0, Loss: 5.911e-02, Training accuracy:0.85, Validation Accuracy: 0.84, Time: 132.70, Learning Rate: 1.000e-04
Epoch: 1, Loss: 4.239e-02, Training accuracy:0.86, Validation Accuracy: 0.85, Time: 129.58, Learning Rate: 1.000e-04
Epoch: 2, Loss: 3.879e-02, Training accuracy:0.87, Validation Accuracy: 0.86, Time: 133.70, Learning Rate: 1.000e-04
Epoch: 3, Loss: 3.661e-02, Training accuracy:0.88, Validation Accuracy: 0.87, Time: 134.91, Learning Rate: 1.000e-04
Epoch: 4, Loss: 3.501e-02, Training accuracy:0.88, Validation Accuracy: 0.87, Time: 134.27, Learning Rate: 1.000e-04
Epoch: 5, Loss: 3.370e-02, Training accuracy:0.89, Validation Accuracy: 0.87, Time: 132.46, Learning Rate: 1.000e-04
Epoch: 6, Loss: 3.260e-02, Training accuracy:0.89, Validation Accuracy: 0.88, Time: 134.99, Learning Rate: 1.000e-04
Epoch: 7, Loss: 3.166e-02, Training accuracy:0.89, Validation Accuracy: 0.88, Time: 133.57, Learning Rate: 1.000e-04
Epoch: 8, Loss: 3.083e-02, Training accuracy:0.89, Validation Ac

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
loss,0.03008
trainingaccuracy,0.89604
validationaccuracy,0.88083
epoch,9.0
_runtime,1452.0
_timestamp,1615757603.0
_step,9.0


0,1
loss,█▄▃▃▂▂▂▁▁▁
trainingaccuracy,▁▃▅▅▆▇▇▇██
validationaccuracy,▁▃▄▅▆▇▇▇██
epoch,▁▂▃▃▄▅▆▆▇█
_runtime,▁▂▃▃▄▅▆▆▇█
_timestamp,▁▂▃▃▄▅▆▆▇█
_step,▁▂▃▃▄▅▆▆▇█


[34m[1mwandb[0m: Agent Starting Run: 9i41mhfr with config:
[34m[1mwandb[0m: 	activation: SIGMOID
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	initializer: XAVIER
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	max_epochs: 10
[34m[1mwandb[0m: 	num_hidden_neurons: 128
[34m[1mwandb[0m: 	num_layers: 2
[34m[1mwandb[0m: 	optimizer: NADAM
[34m[1mwandb[0m: 	weight_decay: 0


Epoch: 0, Loss: 6.482e-02, Training accuracy:0.85, Validation Accuracy: 0.85, Time: 114.15, Learning Rate: 1.000e-03
Epoch: 1, Loss: 4.030e-02, Training accuracy:0.87, Validation Accuracy: 0.86, Time: 114.41, Learning Rate: 1.000e-03
Epoch: 2, Loss: 3.637e-02, Training accuracy:0.88, Validation Accuracy: 0.87, Time: 116.79, Learning Rate: 1.000e-03
Epoch: 3, Loss: 3.401e-02, Training accuracy:0.89, Validation Accuracy: 0.87, Time: 112.86, Learning Rate: 1.000e-03
Epoch: 4, Loss: 3.229e-02, Training accuracy:0.89, Validation Accuracy: 0.88, Time: 115.96, Learning Rate: 1.000e-03
Epoch: 5, Loss: 3.092e-02, Training accuracy:0.90, Validation Accuracy: 0.88, Time: 115.90, Learning Rate: 1.000e-03
Epoch: 6, Loss: 2.978e-02, Training accuracy:0.90, Validation Accuracy: 0.88, Time: 115.33, Learning Rate: 1.000e-03
Epoch: 7, Loss: 2.878e-02, Training accuracy:0.90, Validation Accuracy: 0.88, Time: 117.08, Learning Rate: 1.000e-03
Epoch: 8, Loss: 2.789e-02, Training accuracy:0.90, Validation Ac

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
loss,0.02706
trainingaccuracy,0.90757
validationaccuracy,0.88483
epoch,9.0
_runtime,1262.0
_timestamp,1615758876.0
_step,9.0


0,1
loss,█▃▃▂▂▂▂▁▁▁
trainingaccuracy,▁▃▄▅▆▇▇▇██
validationaccuracy,▁▄▅▆▇▇▇███
epoch,▁▂▃▃▄▅▆▆▇█
_runtime,▁▂▃▃▄▅▆▆▇█
_timestamp,▁▂▃▃▄▅▆▆▇█
_step,▁▂▃▃▄▅▆▆▇█


[34m[1mwandb[0m: Agent Starting Run: 3ecnftvk with config:
[34m[1mwandb[0m: 	activation: RELU
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	initializer: XAVIER
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	max_epochs: 10
[34m[1mwandb[0m: 	num_hidden_neurons: 128
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: RMSPROP
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch: 0, Loss: 6.092e-02, Training accuracy:0.85, Validation Accuracy: 0.84, Time: 119.31, Learning Rate: 1.000e-04
Epoch: 1, Loss: 4.214e-02, Training accuracy:0.87, Validation Accuracy: 0.85, Time: 120.53, Learning Rate: 1.000e-04
Epoch: 2, Loss: 3.816e-02, Training accuracy:0.88, Validation Accuracy: 0.86, Time: 121.47, Learning Rate: 1.000e-04
Epoch: 3, Loss: 3.565e-02, Training accuracy:0.88, Validation Accuracy: 0.87, Time: 120.63, Learning Rate: 1.000e-04
Epoch: 4, Loss: 3.387e-02, Training accuracy:0.89, Validation Accuracy: 0.87, Time: 119.19, Learning Rate: 1.000e-04
Epoch: 5, Loss: 3.249e-02, Training accuracy:0.89, Validation Accuracy: 0.87, Time: 122.96, Learning Rate: 1.000e-04
Epoch: 6, Loss: 3.139e-02, Training accuracy:0.90, Validation Accuracy: 0.87, Time: 111.29, Learning Rate: 1.000e-04
Epoch: 7, Loss: 3.044e-02, Training accuracy:0.90, Validation Accuracy: 0.87, Time: 118.36, Learning Rate: 1.000e-04
Epoch: 8, Loss: 2.959e-02, Training accuracy:0.90, Validation Ac

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
loss,0.02885
trainingaccuracy,0.90456
validationaccuracy,0.8775
epoch,9.0
_runtime,1316.0
_timestamp,1615760203.0
_step,9.0


0,1
loss,█▄▃▂▂▂▂▁▁▁
trainingaccuracy,▁▃▅▅▆▇▇▇██
validationaccuracy,▁▄▅▆▆▇▇▇██
epoch,▁▂▃▃▄▅▆▆▇█
_runtime,▁▂▃▃▄▅▆▆▇█
_timestamp,▁▂▃▃▄▅▆▆▇█
_step,▁▂▃▃▄▅▆▆▇█


[34m[1mwandb[0m: Agent Starting Run: axfvzn74 with config:
[34m[1mwandb[0m: 	activation: RELU
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	initializer: HE
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	max_epochs: 10
[34m[1mwandb[0m: 	num_hidden_neurons: 128
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: NADAM
[34m[1mwandb[0m: 	weight_decay: 0


Epoch: 0, Loss: 6.092e-02, Training accuracy:0.84, Validation Accuracy: 0.84, Time: 118.10, Learning Rate: 1.000e-04
Epoch: 1, Loss: 4.290e-02, Training accuracy:0.86, Validation Accuracy: 0.85, Time: 113.36, Learning Rate: 1.000e-04
Epoch: 2, Loss: 3.944e-02, Training accuracy:0.87, Validation Accuracy: 0.86, Time: 116.26, Learning Rate: 1.000e-04
Epoch: 3, Loss: 3.733e-02, Training accuracy:0.88, Validation Accuracy: 0.87, Time: 119.16, Learning Rate: 1.000e-04
Epoch: 4, Loss: 3.579e-02, Training accuracy:0.88, Validation Accuracy: 0.87, Time: 119.49, Learning Rate: 1.000e-04
Epoch: 5, Loss: 3.456e-02, Training accuracy:0.88, Validation Accuracy: 0.87, Time: 121.11, Learning Rate: 1.000e-04
Epoch: 6, Loss: 3.351e-02, Training accuracy:0.89, Validation Accuracy: 0.87, Time: 118.71, Learning Rate: 1.000e-04
Epoch: 7, Loss: 3.260e-02, Training accuracy:0.89, Validation Accuracy: 0.88, Time: 120.23, Learning Rate: 1.000e-04
Epoch: 8, Loss: 3.179e-02, Training accuracy:0.89, Validation Ac

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
loss,0.03107
trainingaccuracy,0.89469
validationaccuracy,0.87933
epoch,9.0
_runtime,1312.0
_timestamp,1615761525.0
_step,9.0


0,1
loss,█▄▃▂▂▂▂▁▁▁
trainingaccuracy,▁▃▄▅▆▇▇▇██
validationaccuracy,▁▃▅▆▆▇▇▇██
epoch,▁▂▃▃▄▅▆▆▇█
_runtime,▁▂▃▃▄▅▆▆▇█
_timestamp,▁▂▃▃▄▅▆▆▇█
_step,▁▂▃▃▄▅▆▆▇█


[34m[1mwandb[0m: Agent Starting Run: tc3c9hsp with config:
[34m[1mwandb[0m: 	activation: RELU
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	initializer: HE
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	max_epochs: 10
[34m[1mwandb[0m: 	num_hidden_neurons: 128
[34m[1mwandb[0m: 	num_layers: 2
[34m[1mwandb[0m: 	optimizer: NADAM
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch: 0, Loss: 6.215e-02, Training accuracy:0.84, Validation Accuracy: 0.84, Time: 117.92, Learning Rate: 1.000e-04


In [8]:
wandb.finish()