<div style='text-align: center;'>
    <h1>CS6910 Fundamentals of Deep Learning</h1>
    <h2>Assignment-1</h2>
</div>

In [1]:
# Load all necessary libraries
import numpy as np
import matplotlib.pyplot as plt
from keras.datasets import mnist
import wandb
from sklearn.model_selection import train_test_split

In [2]:
#Random seed to initiate initial weights and bias.
#This decides the overall convergence of the model.
np.random.seed(27)

In [3]:
class ObjectiveFunction:
    def __init__(self, method):
        self.method = method

    def get_loss(self, y, y_hat):
        if self.method == "cel":
            return self.cross_entropy_loss(y, y_hat)
        elif self.method == "mse":
            return self.mean_square_error(y, y_hat)

    def get_derivative(self, y, y_hat):
        if self.method == "cel":
            return self.cross_entropy_loss_derivative(y, y_hat)
        elif self.method == "mse":
            return self.mean_square_error_derivative(y, y_hat)

    def mean_square_error(self, y, y_hat):
        return np.sum((y - y_hat) ** 2) / 2

    def mean_square_error_derivative(self, y, y_hat):
        return y_hat - y

    def cross_entropy_loss(self, y, y_hat):
        return -np.sum(y * np.log(y_hat))

    def cross_entropy_loss_derivative(self, y, y_hat):
        return -y/y_hat

In [4]:
class NeuralLayer:
    def __init__(self, index, n_input, n_neurons, function=None, weights=None, bias=None, method="random"):
        self.index = index
        self.function = function if function is not None else 'sigmoid'
        self.weights = weights if weights is not None else self.initialize_weights(method, n_input, n_neurons)
        self.bias = bias if bias is not None else np.random.randn(n_neurons)
        self.activation = None

        self.error = None
        self.delta = None

        self.d_weights = np.zeros([n_input, n_neurons])
        self.d_bias = np.zeros(n_neurons)

        self.h_weights = np.zeros([n_input, n_neurons])
        self.h_bias = np.zeros(n_neurons)
        self.m_weights = np.zeros([n_input, n_neurons])
        self.m_bias = np.zeros(n_neurons)

    def initialize_weights(self, method, n_input, n_neurons):
        if method == "xavier":
            limit = np.sqrt(2 / (n_input + n_neurons))
            return np.random.randn(n_input, n_neurons) * limit
        return np.random.randn(n_input, n_neurons)

    def activate(self, x):
        z = np.dot(x, self.weights) + self.bias
        self.activation = self._apply_activation(z)
        return self.activation

    def _apply_activation(self, r):
        if self.function == 'sigmoid':
            return 1 / (1 + np.exp(-r))
        elif self.function == 'tanh':
            return np.tanh(r)
        elif self.function == 'relu':
            return np.maximum(0, r)
        elif self.function == 'softmax':
            max_r = np.max(r, axis=1)
            max_r = max_r.reshape(max_r.shape[0], 1)
            exp_r = np.exp(r - max_r)
            return exp_r / np.sum(exp_r, axis=1).reshape(exp_r.shape[0], 1)
        return r

    def apply_activation_derivative(self, z):
        if self.function == 'sigmoid':
            return z * (1 - z)
        elif self.function == 'tanh':
            return (z - z**2)
        elif self.function == 'relu':
            return np.where(z > 0, 1, 0)
        elif self.function == 'softmax':
            return np.diag(z) - np.outer(z, z)
        return np.ones(z.shape)

    def __str__(self):
        return f'Neural Layer: {self.index}, {self.weights.shape} , {self.function}'

In [5]:
class NeuralNetwork:
    def __init__(self, config):
        def get_value(key, default):
            return config[key] if key in config else default

        self.layers = []

        self.criterion = get_value('criterion', 'cel')
        self.weight_initialization = get_value('weight_initialization', 'random')

        self.c = ObjectiveFunction(method=self.criterion)

        self.add_layers(config['input_size'],
                         config['hidden_layers'],
                         config['output_size'],
                         config['neurons'],
                         config['activation'],
                         config['output_activation']
                        )

    def forward(self, x):
        for layer in self.layers:
            x = layer.activate(x)
        return x

    def backward(self, x, y, y_hat):
        for i in reversed(range(len(self.layers))):
            layer = self.layers[i]
            if layer == self.layers[-1]:
                layer.error = self.c.get_derivative(y, y_hat)
                output_derivative_matrix = []
                for i in range(y_hat.shape[0]):
                    output_derivative_matrix.append(np.matmul(
                        self.c.get_derivative(y[i], y_hat[i]),
                        layer.apply_activation_derivative(y_hat[i])
                    ))
                layer.delta = np.array(output_derivative_matrix)
            else:
                next_layer = self.layers[i + 1]
                layer.error = np.matmul(next_layer.delta, next_layer.weights.T)
                layer.delta = layer.error * layer.apply_activation_derivative(layer.activation)


        for i in range(len(self.layers)):
            layer = self.layers[i]
            activation = np.atleast_2d(x if i == 0 else self.layers[i - 1].activation)
            layer.d_weights = np.matmul(activation.T, layer.delta)/y.shape[0]
            layer.d_bias = np.sum(layer.delta, axis=0)/y.shape[0]

    def add_layers(self, input_size, hidden_layers, output_size, neurons, activation, output_activation):
        for i in range(0, hidden_layers+1):
            n_input = input_size if i==0 else neurons
            n_neurons = output_size if i==hidden_layers else neurons
            self.layers.append(NeuralLayer(
                index=i+1,
                n_input=n_input,
                n_neurons=n_neurons,
                function= output_activation if i==hidden_layers else activation,
                method=self.weight_initialization
                )
            )

In [6]:
class Optimizer:
    def __init__(self, nn:NeuralNetwork, config=None):
        self.nn, self.lr, self.optimizer = nn, config['learning_rate'], config['optimizer']
        self.beta, self.epsilon, self.beta1, self.beta2= config['beta'], config['epsilon'], config['beta1'], config['beta2']
        self.timestep = 0
        self.decay = config['decay']

    def step(self):
        if(self.optimizer == "sgd"):
            self.sgd()
        elif(self.optimizer == "momentum"):
            self.momentum()
        elif(self.optimizer == "nag"):
            self.nag()
        elif(self.optimizer == "rmsprop"):
            self.rmsprop()
        elif(self.optimizer == "adam"):
            self.adam()
        elif (self.optimizer == "nadam"):
            self.nadam()

    def sgd(self):
        for layer in self.nn.layers:
            layer.weights -= self.lr*(layer.d_weights + self.decay*layer.weights)
            layer.bias -= self.lr*(layer.d_bias + self.decay*layer.bias)

    def momentum(self):
        for layer in self.nn.layers:
            layer.h_weights = self.beta*layer.h_weights + layer.d_weights
            layer.h_bias = self.beta*layer.h_bias + layer.d_bias
            layer.weights -= self.lr*(layer.h_weights + self.decay*layer.weights)
            layer.bias -= self.lr*(layer.h_bias + self.decay*layer.bias)

    def nag(self):
        for layer in self.nn.layers:
            layer.h_weights = self.beta*layer.h_weights + layer.d_weights
            layer.h_bias = self.beta*layer.h_bias + layer.d_bias
            layer.weights -= self.lr * (self.beta * layer.h_weights + layer.d_weights + self.decay * layer.weights)
            layer.bias -= self.lr * (self.beta * layer.h_bias + layer.d_bias + self.decay * layer.bias)

    def rmsprop(self):
        for layer in self.nn.layers:
            layer.h_weights = self.beta * layer.h_weights + (1 - self.beta) * layer.d_weights**2
            layer.h_bias = self.beta * layer.h_bias + (1 - self.beta) * layer.d_bias**2
            layer.weights -= (self.lr / (np.sqrt(layer.h_weights) + self.epsilon)) * layer.d_weights + self.decay * layer.weights * self.lr
            layer.bias -= (self.lr / (np.sqrt(layer.h_bias) + self.epsilon)) * layer.d_bias + self.decay * layer.bias * self.lr

    def adam(self):
        for layer in self.nn.layers:
            layer.m_weights = self.beta1 * layer.m_weights + (1 - self.beta1) * layer.d_weights
            layer.m_bias = self.beta1 * layer.m_bias + (1 - self.beta1) * layer.d_bias
            layer.h_weights = self.beta2 * layer.h_weights + (1 - self.beta2) * layer.d_weights**2
            layer.h_bias = self.beta2 * layer.h_bias + (1 - self.beta2) * layer.d_bias**2
            correction_term1 = 1/(1 - self.beta1**(self.timestep + 1))
            correction_term2 = 1/(1 - self.beta2**(self.timestep + 1))
            weights_hat1 = layer.m_weights * correction_term1
            bias_hat1 = layer.m_bias * correction_term1
            weights_hat2 = layer.h_weights * correction_term2
            bias_hat2 = layer.h_bias * correction_term2
            layer.weights -= self.lr * (weights_hat1 / ((np.sqrt(weights_hat2)) + self.epsilon)) + self.decay * layer.weights * self.lr
            layer.bias -= self.lr * (bias_hat1 / ((np.sqrt(bias_hat2)) + self.epsilon)) + self.decay * layer.bias * self.lr

    def nadam(self):
        for layer in self.nn.layers:
            layer.m_weights = self.beta1 * layer.m_weights + (1 - self.beta1) * layer.d_weights
            layer.m_bias = self.beta1 * layer.m_bias + (1 - self.beta1) * layer.d_bias
            layer.h_weights = self.beta2 * layer.h_weights + (1 - self.beta2) * layer.d_weights**2
            layer.h_bias = self.beta2 * layer.h_bias + (1 - self.beta2) * layer.d_bias**2
            correction_term1 = 1/(1 - self.beta1**(self.timestep + 1))
            correction_term2 = 1/(1 - self.beta2**(self.timestep + 1))
            weights_hat1 = layer.m_weights * correction_term1
            bias_hat1 = layer.m_bias * correction_term1
            weights_hat2 = layer.h_weights * correction_term2
            bias_hat2 = layer.h_bias * correction_term2
            combined_weight_update = self.beta1 * weights_hat1 + ((1 - self.beta1) / (1 - self.beta1 ** (self.timestep + 1))) * layer.d_weights
            combined_bias_update = self.beta1 * bias_hat1 + ((1 - self.beta1) / (1 - self.beta1 ** (self.timestep + 1))) * layer.d_bias
            layer.weights -= self.lr * (combined_weight_update / ((np.sqrt(weights_hat2)) + self.epsilon)) + self.decay * layer.weights * self.lr
            layer.bias -= self.lr * (combined_bias_update / ((np.sqrt(bias_hat2)) + self.epsilon)) + self.decay * layer.bias * self.lr

In [7]:
# Load Input Data
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()

# Flatten the images
train_images = train_images.reshape(train_images.shape[0], 784) / 255
X_test = test_images.reshape(test_images.shape[0], 784) / 255

# Encode the labels
train_labels = np.eye(10)[train_labels]
Y_test = np.eye(10)[test_labels]

# Prepare data for training and validation
X_train, X_val, Y_train, Y_val = train_test_split(train_images, train_labels, test_size=0.1, shuffle=True, random_state=27)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


In [8]:
def accuracy(y, y_hat):
    accuracy = np.mean(np.argmax(y, axis=1)==np.argmax(y_hat, axis=1))
    return accuracy

In [10]:
def wandb_sweep():
    train_loss_hist = []
    train_accuracy_hist = []
    val_loss_hist = []
    val_accuracy_hist = []

    run = wandb.init()
    config = wandb.config
    run.name = f"hl_{config['hidden_layers']}_nu_{config['neurons']}_ac_{config['activation']}_lr_{config['learning_rate']}_bs_{config['batch_size']}_opt_{config['optimizer']}_de_{config['decay']}_init_{config['weight_initialization']}"


    nn = NeuralNetwork(config)
    optimizer = Optimizer(nn=nn, config=config)

    batch_size = config['batch_size']
    criterion = ObjectiveFunction(method = config['criterion'])

    for epoch in range(config['epochs']):
        for batch in range(0, X_train.shape[0], batch_size):
            # Get the batch of data
            X_batch = X_train[batch:batch+batch_size]
            Y_batch = Y_train[batch:batch+batch_size]

            Y_hat_batch = nn.forward(X_batch)
            nn.backward(X_batch, Y_batch, Y_hat_batch)
            optimizer.step()

        optimizer.timestep += 1

        # Training
        Y_hat_train = nn.forward(X_train)
        train_loss = criterion.get_loss(Y_train, Y_hat_train)
        train_accuracy = accuracy(Y_train, Y_hat_train)

        # Validation
        Y_hat_val = nn.forward(X_val)
        val_loss = criterion.get_loss(Y_val, Y_hat_val)
        val_accuracy = accuracy(Y_val, Y_hat_val)

        wandb.log({
            "epochs": epoch + 1,
            "train_loss": train_loss,
            "train_accuracy": train_accuracy,
            "val_loss": val_loss,
            "val_accuracy": val_accuracy
        })

        train_loss_hist.append(train_loss)
        train_accuracy_hist.append(train_accuracy)
        val_loss_hist.append(val_loss)
        val_accuracy_hist.append(val_accuracy)

    # Testing
    Y_hat_test = nn.forward(X_test)
    test_loss = criterion.get_loss(Y_test, Y_hat_test)
    test_accuracy = accuracy(Y_test, Y_hat_test)
    wandb.log({
        "test_loss": test_loss,
        "test_accuracy": test_accuracy
    })

    return nn, train_loss_hist, train_accuracy_hist, val_loss_hist, val_accuracy_hist

In [11]:
sweep_config = {
    'method': 'random',
    'name': 'Q9_SWEEP',
    'metric': {
        'name': 'val_accuracy',
        'goal': 'maximize',
    },
    'parameters': {
        'input_size': {
            'value': 784
        },
        'output_size': {
            'value': 10
        },
        'hidden_layers': {
            'values': [3, 4, 5]
        },
        'neurons': {
            'values': [32, 64, 128]
        },
        'activation': {
            'values': ['sigmoid', 'tanh', 'relu']
        },
        'output_activation': {
            'value': 'softmax'
        },
        'learning_rate': {
            'values': [1e-2, 1e-3, 1e-4]
        },
        'decay': {
            'values': [0, 0.0005, 0.000005]
        },
        'epochs': {
            'values': [5, 10, 15]
        },
        'optimizer': {
            'values': ['sgd', 'momentum', 'nag', 'rmsprop', 'adam', 'nadam']
        },
        'batch_size': {
            'values': [16, 32, 64]
        },
        'weight_initialization': {
            'values': ['xavier', 'random']
        },
        'beta': {
            'values': [0.7, 0.8, 0.9]
        },
        'beta1':{
            'value': 0.9
        },
        'beta2':{
            'value': 0.9999
        },
        'epsilon': {
            'value': 1e-8
        },
        'criterion': {
            'value': 'cel'
        },
    }
}

In [12]:
# Setup Wandb
wandb.login(key='5da0c161a9c9720f15195bb6e9f05e44c45112d1')
wandb.init(project="CS6910_AS1", entity='ed23s037')

# Do Sweep
wandb_id = wandb.sweep(sweep_config, project="CS6910_AS1")
wandb.agent(wandb_id, function=wandb_sweep, count=100)

# Finish
wandb.finish()

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33med23s037[0m. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: C:\Users\ravig\.netrc




Create sweep with ID: wm0ztjmf
Sweep URL: https://wandb.ai/ed23s037/CS6910_AS1/sweeps/wm0ztjmf


[34m[1mwandb[0m: Agent Starting Run: 93r9m5sv with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random
Exception in thread IntMsgThr:
Exception in thread Traceback (most recent call last):
ChkStopThr  File "E:\Anaconda\lib\threading.py", line 932, in _bootstrap_inner
:
Traceback (most recent call last):
  File "E:\Anaconda\lib\threading.py", line 932, in _bootstrap_inner
Exception in thread NetStatThr:


  return -y/y_hat
  return -y/y_hat
  return -y/y_hat
  output_derivative_matrix.append(np.matmul(


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.11814236111111111, max=1.…

0,1
epochs,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
train_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epochs,15.0
test_accuracy,0.098
test_loss,
train_accuracy,0.09911
train_loss,
val_accuracy,0.09517
val_loss,


[34m[1mwandb[0m: Agent Starting Run: pj85vpsn with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.11958527370178368, max=1.…

0,1
epochs,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▆▆▆▆▆▆█▇▇
train_loss,▄▇█▇█▆▆▁▂▄
val_accuracy,▁▇▇▇▇▇▇█▇▇
val_loss,▁▆█▆▂▅▅▆▁▂

0,1
epochs,10.0
test_accuracy,0.1011
test_loss,23119.49581
train_accuracy,0.10309
train_loss,124745.0098
val_accuracy,0.09633
val_loss,13868.36413


[34m[1mwandb[0m: Agent Starting Run: nv6cfh4q with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.11964835164835165, max=1.…

0,1
epochs,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▁▁▁▃██▇▆▆▆▆▆▆▆
train_loss,█▄▃▂▁▁▁▁▁▁▁▁▁▁▁
val_accuracy,▁▁▁▁▃▇█▇▇▇▇▇▇▇▇
val_loss,█▄▃▂▂▁▁▁▁▁▁▁▁▁▁

0,1
epochs,15.0
test_accuracy,0.1542
test_loss,22820.41886
train_accuracy,0.1483
train_loss,123339.90566
val_accuracy,0.15317
val_loss,13704.25533


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: fb3rl2qp with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epochs,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▃▄▅▅▆▆▇▇▇▇▇███
train_loss,█▆▅▄▄▃▃▂▂▂▂▂▁▁▁
val_accuracy,▁▃▄▅▅▆▆▇▇▇▇████
val_loss,█▅▄▄▃▃▃▂▂▂▂▁▁▁▁

0,1
epochs,15.0
test_accuracy,0.9502
test_loss,1633.14753
train_accuracy,0.95739
train_loss,7833.59466
val_accuracy,0.95033
val_loss,1049.58935


[34m[1mwandb[0m: Agent Starting Run: ilw8i5h0 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epochs,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▃▇▇█
train_loss,█▆▃▄▁
val_accuracy,▁▃▇██
val_loss,█▅▁▄▂

0,1
epochs,5.0
test_accuracy,0.9455
test_loss,3439.67283
train_accuracy,0.95344
train_loss,13803.1716
val_accuracy,0.94233
val_loss,2153.36832


[34m[1mwandb[0m: Agent Starting Run: 3q3jnrft with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.11972202674173117, max=1.…

0,1
epochs,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▂▄▄▅▅▆▆▆▇▇▇███
train_loss,█▆▄▄▃▃▂▂▂▂▂▁▁▁▁
val_accuracy,▁▂▄▄▅▅▆▆▆▇▇▇███
val_loss,█▆▄▄▃▃▂▂▂▂▂▁▁▁▁

0,1
epochs,15.0
test_accuracy,0.58
test_loss,12548.24581
train_accuracy,0.57302
train_loss,68928.98452
val_accuracy,0.5615
val_loss,7890.60245


[34m[1mwandb[0m: Agent Starting Run: tc7ly6gl with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12311171415649028, max=1.…

0,1
epochs,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▆▅▇▇▁██▇▇▅
train_loss,▅▇▄▂▁▃▂▂▅█
val_accuracy,▆▅▆█▁█▇▆▇▅
val_loss,▄▇▄▂▁▃▃▃▆█

0,1
epochs,10.0
test_accuracy,0.0958
test_loss,23057.47943
train_accuracy,0.09844
train_loss,124460.05563
val_accuracy,0.10033
val_loss,13829.56981


[34m[1mwandb[0m: Agent Starting Run: 0rsrb6xt with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epochs,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,█▇▃▁▁▂▂▃▁▁
train_loss,▁▂▆███▇▇██
val_accuracy,█▇▃▁▁▂▂▃▁▁
val_loss,▁▂▆███▇▇██

0,1
epochs,10.0
test_accuracy,0.1418
test_loss,22396.55282
train_accuracy,0.14976
train_loss,120514.62143
val_accuracy,0.14917
val_loss,13398.42831


[34m[1mwandb[0m: Agent Starting Run: 3zgwt93u with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12312285145648634, max=1.…

0,1
epochs,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▁▁██
train_loss,▁▅█▄▄
val_accuracy,▁▁▁▁▁
val_loss,▁▅█▃▃

0,1
epochs,5.0
test_accuracy,0.101
test_loss,23136.68717
train_accuracy,0.10285
train_loss,124858.20104
val_accuracy,0.09617
val_loss,13884.17902


[34m[1mwandb[0m: Agent Starting Run: p85g5jfy with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011288888888278356, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12289896981745889, max=1.…

0,1
epochs,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▁▁▁▁
train_loss,█▁▁▁▁
val_accuracy,▁▁▁▁▁
val_loss,█▁▁▁▁

0,1
epochs,5.0
test_accuracy,0.0958
test_loss,23043.48939
train_accuracy,0.09844
train_loss,124402.41574
val_accuracy,0.10033
val_loss,13821.80188


[34m[1mwandb[0m: Agent Starting Run: ocbkdphu with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.11958527370178368, max=1.…

0,1
epochs,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▄▅▃▂██▃▄▁█████
train_loss,█▃▁▂▁▁▁▁▁▁▁▁▁▁▁
val_accuracy,▁▂▃▃▄██▄▃▁█████
val_loss,█▃▁▂▁▁▁▁▁▁▁▁▁▁▁

0,1
epochs,15.0
test_accuracy,0.1135
test_loss,23013.19003
train_accuracy,0.11206
train_loss,124293.88634
val_accuracy,0.11517
val_loss,13805.29803


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: rveog2j1 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.11966939242064539, max=1.…

0,1
epochs,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▅▇▇█
train_loss,█▄▃▂▁
val_accuracy,▁▅▇▇█
val_loss,█▄▂▂▁

0,1
epochs,5.0
test_accuracy,0.9663
test_loss,1127.58159
train_accuracy,0.97433
train_loss,4654.15213
val_accuracy,0.96367
val_loss,766.73467


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: jqd4732e with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12304493264623452, max=1.…

0,1
epochs,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▄▅▆▆▇▇▇▇█▇████
train_loss,█▅▄▃▃▂▂▂▂▁▂▁▁▁▁
val_accuracy,▁▅▆▇▇▇▇▇▇█▇████
val_loss,█▃▂▂▂▁▁▁▁▁▂▁▁▂▂

0,1
epochs,15.0
test_accuracy,0.9759
test_loss,997.7004
train_accuracy,0.99446
train_loss,908.26295
val_accuracy,0.974
val_loss,697.24912


[34m[1mwandb[0m: Agent Starting Run: wevvm4yt with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.1230338094377147, max=1.0…

0,1
epochs,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▂▃▂▁▃▄▆█▂▅
train_loss,█▆▆▇▆▃▄▁▅▂
val_accuracy,▂▃▂▁▂▅▆█▂▆
val_loss,█▆▆▇▆▃▄▁▅▂

0,1
epochs,10.0
test_accuracy,0.2548
test_loss,20803.41803
train_accuracy,0.24613
train_loss,112987.26643
val_accuracy,0.24967
val_loss,12576.22405


[34m[1mwandb[0m: Agent Starting Run: i2rrxrae with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.11999294159167108, max=1.…

0,1
epochs,▁▃▅▆█
test_accuracy,▁
train_accuracy,▁▁▁▁▁
val_accuracy,▁▁▁▁▁

0,1
epochs,5.0
test_accuracy,0.098
test_loss,
train_accuracy,0.09911
train_loss,
val_accuracy,0.09517
val_loss,


[34m[1mwandb[0m: Agent Starting Run: gpxvk2s1 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.009 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.8666490625825192, max=1.0…

0,1
epochs,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▆███
train_loss,█▄▂▁▁
val_accuracy,▁▆███
val_loss,█▄▁▁▁

0,1
epochs,5.0
test_accuracy,0.7947
test_loss,7030.47598
train_accuracy,0.78546
train_loss,38687.21129
val_accuracy,0.7795
val_loss,4370.90992


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: gqvd6uo9 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.011 MB of 0.011 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epochs,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▃▅▅▆▆▇▇▇▇▇████
train_loss,█▆▄▄▃▃▂▂▂▂▂▁▁▁▁
val_accuracy,▁▃▆▇▇▇▇████████
val_loss,█▄▂▁▁▁▂▂▂▃▃▃▃▄▄

0,1
epochs,15.0
test_accuracy,0.9665
test_loss,1526.50714
train_accuracy,0.98837
train_loss,2332.49574
val_accuracy,0.96317
val_loss,1018.63029


[34m[1mwandb[0m: Agent Starting Run: 7u36brg9 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier
[34m[1mwandb[0m: W&B API key is configured. Use [1m`wandb login --relogin`[0m to force relogin


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12300045187528243, max=1.…

0,1
epochs,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,█▃▁▁▁
train_loss,▁▇███
val_accuracy,█▃▁▁▁
val_loss,▁▇███

0,1
epochs,5.0
test_accuracy,0.1007
test_loss,23251.59777
train_accuracy,0.10067
train_loss,125404.72957
val_accuracy,0.10583
val_loss,13932.53628


[34m[1mwandb[0m: Agent Starting Run: q98me8tu with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.002 MB uploaded\r'), FloatProgress(value=0.5287490287490287, max=1.0…

0,1
epochs,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,█▃▃▁▂
train_loss,▁▅▄▆█
val_accuracy,█▃▄▁▂
val_loss,▁▅▄▆█

0,1
epochs,5.0
test_accuracy,0.2157
test_loss,24847.09384
train_accuracy,0.21867
train_loss,133690.75813
val_accuracy,0.22033
val_loss,14914.23926


[34m[1mwandb[0m: Agent Starting Run: wpd0zn69 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.1230115690527838, max=1.0…

0,1
epochs,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▁▁▄█
train_loss,██▇▆▁
val_accuracy,▁▁▁▄█
val_loss,██▇▆▁

0,1
epochs,5.0
test_accuracy,0.4071
test_loss,21221.49433
train_accuracy,0.411
train_loss,114812.09806
val_accuracy,0.40967
val_loss,12756.04575


[34m[1mwandb[0m: Agent Starting Run: 4zh2mifq with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.011 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.972124516355962, max=1.0)…

0,1
epochs,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,█▆▄▂▁
train_loss,▁▂▃▇█
val_accuracy,█▆▄▂▁
val_loss,▁▂▃▇█

0,1
epochs,5.0
test_accuracy,0.2254
test_loss,21378.45358
train_accuracy,0.22494
train_loss,115603.50127
val_accuracy,0.22067
val_loss,12892.27742


[34m[1mwandb[0m: Agent Starting Run: lysxug17 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12305605786618445, max=1.…

0,1
epochs,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
train_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epochs,15.0
test_accuracy,0.098
test_loss,
train_accuracy,0.09911
train_loss,
val_accuracy,0.09517
val_loss,


[34m[1mwandb[0m: Agent Starting Run: h4vh4o8h with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.002 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.22692476902771666, max=1.…

0,1
epochs,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▃▅▇█
train_loss,█▆▄▃▁
val_accuracy,▁▃▅▇█
val_loss,█▆▄▃▁

0,1
epochs,5.0
test_accuracy,0.4714
test_loss,15964.08836
train_accuracy,0.46244
train_loss,87189.38577
val_accuracy,0.4695
val_loss,9768.14968


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: goqfbfbc with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.011 MB of 0.011 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epochs,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▁▄▅██▇▆▅▅▅▅▅▆▆
train_loss,█▄▃▃▂▂▂▂▂▂▁▁▁▁▁
val_accuracy,▁▁▄▅██▇▆▅▅▅▅▆▆▇
val_loss,█▄▃▃▂▂▂▂▂▂▂▁▁▁▁

0,1
epochs,15.0
test_accuracy,0.2301
test_loss,22584.41435
train_accuracy,0.22704
train_loss,121959.74069
val_accuracy,0.233
val_loss,13539.83068


[34m[1mwandb[0m: Agent Starting Run: y709kwvq with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12304493264623452, max=1.…

0,1
epochs,▁▃▅▆█
test_accuracy,▁
train_accuracy,▁▁▁▁▁
val_accuracy,▁▁▁▁▁

0,1
epochs,5.0
test_accuracy,0.098
test_loss,
train_accuracy,0.09911
train_loss,
val_accuracy,0.09517
val_loss,


[34m[1mwandb[0m: Agent Starting Run: l205cuqo with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.11957095129242132, max=1.…

0,1
epochs,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▂▂▂▂▂▃▄▄▅▅▆▇▇█
train_loss,██████▇▇▆▅▄▃▃▂▁
val_accuracy,▁▂▂▂▂▂▃▄▄▅▅▆▇▇█
val_loss,██████▇▇▆▅▄▃▃▂▁

0,1
epochs,15.0
test_accuracy,0.6417
test_loss,13914.49609
train_accuracy,0.63167
train_loss,75557.21052
val_accuracy,0.62517
val_loss,8429.59585


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: v8404msk with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.1230338094377147, max=1.0…

0,1
epochs,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▆███▇▂▁▁▁▁
train_loss,▅▂▁▁▂█████
val_accuracy,▆███▇▂▁▁▁▁
val_loss,▅▂▁▁▂█████

0,1
epochs,10.0
test_accuracy,0.101
test_loss,23118.45778
train_accuracy,0.10285
train_loss,124744.13187
val_accuracy,0.09617
val_loss,13868.49103


[34m[1mwandb[0m: Agent Starting Run: lq42pm8a with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.1196062922928201, max=1.0…

0,1
epochs,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▂▁████████
train_loss,█▃▁▁▁▁▁▁▁▁
val_accuracy,▂▁████████
val_loss,█▃▁▁▁▁▁▁▁▁

0,1
epochs,10.0
test_accuracy,0.1135
test_loss,23001.19193
train_accuracy,0.11206
train_loss,124214.027
val_accuracy,0.11517
val_loss,13798.8062


[34m[1mwandb[0m: Agent Starting Run: zryc2ojs with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12252464056424632, max=1.…

0,1
epochs,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▁▃▆▇██████████
train_loss,██▇▇▆▅▄▄▃▃▂▂▂▁▁
val_accuracy,▁▁▃▅▇███████▇▇█
val_loss,██▇▇▆▅▄▄▃▃▂▂▂▁▁

0,1
epochs,15.0
test_accuracy,0.3468
test_loss,19266.02764
train_accuracy,0.33974
train_loss,104180.16225
val_accuracy,0.34717
val_loss,11576.44046


[34m[1mwandb[0m: Agent Starting Run: t5gomqit with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.011 MB of 0.011 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epochs,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▄▅▆▇▇▇▇██
train_loss,█▇▅▄▄▃▂▂▁▁
val_accuracy,▁▄▅▆▇▇▇▇██
val_loss,█▇▅▄▄▃▂▂▁▁

0,1
epochs,10.0
test_accuracy,0.6399
test_loss,14205.88
train_accuracy,0.6347
train_loss,77318.42433
val_accuracy,0.64233
val_loss,8571.14766


[34m[1mwandb[0m: Agent Starting Run: oxic9n7k with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epochs,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▃▄▅▆▆▆▇▇▇▇████
train_loss,█▆▅▄▃▃▃▂▂▂▂▁▁▁▁
val_accuracy,▁▄▅▅▆▆▇▇▇▇█████
val_loss,█▄▃▂▂▂▁▁▁▁▁▁▁▁▁

0,1
epochs,15.0
test_accuracy,0.9479
test_loss,2059.81262
train_accuracy,0.99206
train_loss,1859.80252
val_accuracy,0.94933
val_loss,1245.87437


[34m[1mwandb[0m: Agent Starting Run: tnqbk5ca with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12302268824007954, max=1.…

0,1
epochs,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▃▅█▄▇██▃▁▂
train_loss,█▇▂▆▃▂▁▅█▇
val_accuracy,▃▆▇▅▆█▇▄▁▃
val_loss,█▆▂▅▂▁▁▅█▆

0,1
epochs,10.0
test_accuracy,0.2039
test_loss,21808.12649
train_accuracy,0.20652
train_loss,117864.42793
val_accuracy,0.21433
val_loss,13028.29387


[34m[1mwandb[0m: Agent Starting Run: g7hs8t0n with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

VBox(children=(Label(value='0.011 MB of 0.011 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epochs,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
train_accuracy,▁▁▁▁▁▁▁▁▁▁
val_accuracy,▁▁▁▁▁▁▁▁▁▁

0,1
epochs,10.0
test_accuracy,0.098
test_loss,
train_accuracy,0.09911
train_loss,
val_accuracy,0.09517
val_loss,


[34m[1mwandb[0m: Agent Starting Run: 63ic9hvq with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12304493264623452, max=1.…

0,1
epochs,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▁▁▁▁
train_loss,▂▂▂▁█
val_accuracy,▁▁▁▁▁
val_loss,▂▂▂▁█

0,1
epochs,5.0
test_accuracy,0.0974
test_loss,24508.81484
train_accuracy,0.09759
train_loss,132306.84779
val_accuracy,0.09683
val_loss,14773.81189


[34m[1mwandb[0m: Agent Starting Run: xgwc06th with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.011 MB of 0.011 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epochs,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
train_accuracy,▁▁▁▁▁▁▁▁▁▁
val_accuracy,▁▁▁▁▁▁▁▁▁▁

0,1
epochs,10.0
test_accuracy,0.098
test_loss,
train_accuracy,0.09911
train_loss,
val_accuracy,0.09517
val_loss,


[34m[1mwandb[0m: Agent Starting Run: 9mo9qrki with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.1230338094377147, max=1.0…

0,1
epochs,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▅▆▄▃▃▄█▃▄▁
train_loss,▄▄█▆▇▅▃▁▂▇
val_accuracy,▅▅▃▃▃▄█▂▃▁
val_loss,▄▄█▆▇▅▃▁▂▇

0,1
epochs,10.0
test_accuracy,0.0847
test_loss,106029.91462
train_accuracy,0.08674
train_loss,573842.52248
val_accuracy,0.09483
val_loss,64007.92539


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 2rfv2tps with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12297822354748351, max=1.…

0,1
epochs,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▁▄▆█
train_loss,█▇▆▄▁
val_accuracy,▁▁▄▆█
val_loss,█▇▆▄▁

0,1
epochs,5.0
test_accuracy,0.334
test_loss,21496.43174
train_accuracy,0.32902
train_loss,116055.38804
val_accuracy,0.32833
val_loss,12885.12925


[34m[1mwandb[0m: Agent Starting Run: brhxpgg4 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12305605786618445, max=1.…

0,1
epochs,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▁▁▁▁
train_loss,█▃▃▃▁
val_accuracy,▁▁▁▁▁
val_loss,█▃▃▃▁

0,1
epochs,5.0
test_accuracy,0.101
test_loss,23522.31518
train_accuracy,0.10285
train_loss,126906.94806
val_accuracy,0.09617
val_loss,14137.75545


[34m[1mwandb[0m: Agent Starting Run: 7uoaaevd with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12286566085463908, max=1.…

0,1
epochs,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▂▃▃▄▅▅▆▆▆▇▇▇██
train_loss,█▆▆▅▅▄▄▄▃▃▂▂▂▁▁
val_accuracy,▁▁▂▃▄▅▅▆▆▆▇▇▇██
val_loss,█▆▆▅▅▄▄▄▃▃▂▂▂▁▁

0,1
epochs,15.0
test_accuracy,0.3423
test_loss,19584.98568
train_accuracy,0.3413
train_loss,106061.40248
val_accuracy,0.34067
val_loss,11802.9118


[34m[1mwandb[0m: Agent Starting Run: pw1k95bs with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.1230338094377147, max=1.0…

0,1
epochs,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▅▆▇█
train_loss,█▄▂▂▁
val_accuracy,▁▅▇██
val_loss,█▃▂▁▁

0,1
epochs,5.0
test_accuracy,0.9498
test_loss,1691.13658
train_accuracy,0.95957
train_loss,7090.71469
val_accuracy,0.94417
val_loss,1112.57066


[34m[1mwandb[0m: Agent Starting Run: jggm2q4l with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.11978524907586692, max=1.…

0,1
epochs,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▁▃▅▆▆▆▇▇█
train_loss,█▅▄▄▃▃▂▂▁▁
val_accuracy,▁▁▃▅▆▆▆▇▇█
val_loss,█▅▄▄▃▃▂▂▁▁

0,1
epochs,10.0
test_accuracy,0.3869
test_loss,21424.75578
train_accuracy,0.39244
train_loss,115750.55048
val_accuracy,0.3925
val_loss,12859.45645


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 6s39fc39 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12304493264623452, max=1.…

0,1
epochs,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▃▅█▄
train_loss,▃▄▅█▁
val_accuracy,▁▃▅█▃
val_loss,▃▄▅█▁

0,1
epochs,5.0
test_accuracy,0.1519
test_loss,23825.30155
train_accuracy,0.14748
train_loss,129244.66079
val_accuracy,0.14067
val_loss,14470.98795


[34m[1mwandb[0m: Agent Starting Run: htsy918a with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.1195957820738137, max=1.0…

0,1
epochs,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▃▄▅▅▅▆▆▆▇▇▇███
train_loss,█▆▅▄▄▃▃▂▂▂▂▂▁▁▁
val_accuracy,▁▃▄▅▅▅▆▆▆▇▇▇▇██
val_loss,█▆▅▄▃▃▃▂▂▂▂▂▁▁▁

0,1
epochs,15.0
test_accuracy,0.8341
test_loss,6637.4008
train_accuracy,0.82669
train_loss,36211.51136
val_accuracy,0.81783
val_loss,4169.32059


[34m[1mwandb[0m: Agent Starting Run: nb72po2v with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.1230338094377147, max=1.0…

0,1
epochs,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▃▄▆▆▆▇▇▇█
train_loss,█▆▅▄▄▄▃▃▂▁
val_accuracy,▁▃▅▆▆▇▇▇▇█
val_loss,█▆▅▄▄▄▃▃▂▁

0,1
epochs,10.0
test_accuracy,0.3312
test_loss,20284.58047
train_accuracy,0.33154
train_loss,109917.3829
val_accuracy,0.3295
val_loss,12219.15365


[34m[1mwandb[0m: Agent Starting Run: 58gmdilz with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12306718509811014, max=1.…

0,1
epochs,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▃▄▅▅▆▆▇▇▇▇████
train_loss,█▇▆▅▄▃▃▃▂▂▂▁▁▁▁
val_accuracy,▁▃▄▅▅▆▆▇▇▇▇████
val_loss,█▇▆▅▄▃▃▃▂▂▂▁▁▁▁

0,1
epochs,15.0
test_accuracy,0.7898
test_loss,6467.8255
train_accuracy,0.79007
train_loss,35442.62444
val_accuracy,0.789
val_loss,3956.54908


[34m[1mwandb[0m: Agent Starting Run: kof4hkfd with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.1195957820738137, max=1.0…

0,1
epochs,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▆▅▅▆▆▇▇▇▇▇█▇▇█
train_loss,█▂▅▆▄▃▂▃▃▁▄▂▁▃▂
val_accuracy,▁▇▅▅▆▆▇▆▇█▇▇▇▇█
val_loss,▃▁▄▄▅▄▆▆▆▃▇▇▅▆█

0,1
epochs,15.0
test_accuracy,0.9672
test_loss,4708.67505
train_accuracy,0.98028
train_loss,10603.8416
val_accuracy,0.96483
val_loss,3325.23016


[34m[1mwandb[0m: Agent Starting Run: mi00wsha with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.002 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.2261695392191347, max=1.0…

0,1
epochs,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁█▃▄▃
train_loss,█▅▃▂▁
val_accuracy,▁▇█▇▅
val_loss,█▅▃▂▁

0,1
epochs,5.0
test_accuracy,0.1021
test_loss,73289.15414
train_accuracy,0.09991
train_loss,393429.24716
val_accuracy,0.10433
val_loss,43785.40402


[34m[1mwandb[0m: Agent Starting Run: nlxjf50z with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.1197325591624879, max=1.0…

0,1
epochs,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▄▆▇█
train_loss,█▄▃▂▁
val_accuracy,▁▅▆▇█
val_loss,█▄▃▂▁

0,1
epochs,5.0
test_accuracy,0.9364
test_loss,2174.80024
train_accuracy,0.93856
train_loss,11477.81522
val_accuracy,0.935
val_loss,1396.58823


[34m[1mwandb[0m: Agent Starting Run: 38ul7uym with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12306718509811014, max=1.…

0,1
epochs,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,█▁▁▁▁
train_loss,▁█▆▆▆
val_accuracy,▁▁▁▁▁
val_loss,█▁▂▂▂

0,1
epochs,5.0
test_accuracy,0.0974
test_loss,26477.89078
train_accuracy,0.09759
train_loss,143176.48217
val_accuracy,0.09683
val_loss,15932.31482


[34m[1mwandb[0m: Agent Starting Run: ba2bww7h with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12010236498411578, max=1.…

0,1
epochs,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
train_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epochs,15.0
test_accuracy,0.098
test_loss,
train_accuracy,0.09911
train_loss,
val_accuracy,0.09517
val_loss,


[34m[1mwandb[0m: Agent Starting Run: 3iin6ndf with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011288888888278356, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epochs,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▁▁▁▂▆█▆▆▇
train_loss,█▆▅▄▃▃▂▂▁▁
val_accuracy,▁▁▁▁▂▆█▆▆▇
val_loss,█▆▅▄▃▃▂▂▁▁

0,1
epochs,10.0
test_accuracy,0.2202
test_loss,22524.23887
train_accuracy,0.21781
train_loss,121693.32412
val_accuracy,0.21317
val_loss,13515.26521


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 56h77lb1 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.11969044059449477, max=1.…

0,1
epochs,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▄▆▇█
train_loss,█▅▃▂▁
val_accuracy,▁▄▆▇█
val_loss,█▅▃▂▁

0,1
epochs,5.0
test_accuracy,0.2645
test_loss,22298.14069
train_accuracy,0.27356
train_loss,120427.92752
val_accuracy,0.2675
val_loss,13442.98799


[34m[1mwandb[0m: Agent Starting Run: 0zlm9hv6 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.1195957820738137, max=1.0…

0,1
epochs,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▃▅▇█
train_loss,█▅▃▂▁
val_accuracy,▁▄▅▇█
val_loss,█▅▃▂▁

0,1
epochs,5.0
test_accuracy,0.2726
test_loss,20453.47831
train_accuracy,0.27187
train_loss,111065.12956
val_accuracy,0.27333
val_loss,12298.4578


[34m[1mwandb[0m: Agent Starting Run: li178ywu with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.1197325591624879, max=1.0…

0,1
epochs,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▁▂▃▄▄▄▇█▆▅▅▅▅▅
train_loss,████▇▇▇▆▆▅▄▃▃▂▁
val_accuracy,▁▂▂▃▄▄▄▇█▆▆▅▅▅▅
val_loss,████▇▇▇▆▆▅▄▃▃▂▁

0,1
epochs,15.0
test_accuracy,0.2219
test_loss,20280.14105
train_accuracy,0.22093
train_loss,109428.66946
val_accuracy,0.2285
val_loss,12156.43729


[34m[1mwandb[0m: Agent Starting Run: qa6l45oa with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epochs,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▃▄▄▅▆▆▆▇▇▇▇███
train_loss,█▆▅▄▄▃▃▃▂▂▂▂▁▁▁
val_accuracy,▁▃▃▅▅▆▆▇▇▇▇▇▇██
val_loss,█▆▅▄▄▃▃▂▂▂▂▁▁▁▁

0,1
epochs,15.0
test_accuracy,0.9556
test_loss,1522.70036
train_accuracy,0.96078
train_loss,7073.36004
val_accuracy,0.95383
val_loss,997.40582


[34m[1mwandb[0m: Agent Starting Run: ncsar9ou with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.11963783403656822, max=1.…

0,1
epochs,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▅▅▁▄█
train_loss,▇▂▃█▁
val_accuracy,▆▅▁▄█
val_loss,▇▂▃█▁

0,1
epochs,5.0
test_accuracy,0.105
test_loss,202253.13619
train_accuracy,0.10019
train_loss,1098301.53139
val_accuracy,0.10167
val_loss,120998.44522


[34m[1mwandb[0m: Agent Starting Run: 5e5d18ro with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12297676100913284, max=1.…

0,1
epochs,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▃▄▅▆▆▆▇▇▇▇████
train_loss,█▆▅▄▃▃▃▂▂▂▂▁▁▁▁
val_accuracy,▁▄▅▆▆▇▇▇▇██████
val_loss,█▆▄▄▃▃▂▂▂▂▁▁▁▁▁

0,1
epochs,15.0
test_accuracy,0.9661
test_loss,1101.46904
train_accuracy,0.9767
train_loss,4414.0646
val_accuracy,0.96317
val_loss,764.37158


[34m[1mwandb[0m: Agent Starting Run: ydr59or8 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.11956426249670561, max=1.…

0,1
epochs,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▄▆▇█
train_loss,█▄▃▂▁
val_accuracy,▁▄▆▇█
val_loss,█▄▃▂▁

0,1
epochs,5.0
test_accuracy,0.3784
test_loss,19575.30684
train_accuracy,0.38759
train_loss,104822.81012
val_accuracy,0.37617
val_loss,11726.03526


[34m[1mwandb[0m: Agent Starting Run: 08yynwt6 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12302268824007954, max=1.…

0,1
epochs,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▁▂▂▃▄▅▆▆▇▇▇███
train_loss,█▆▆▅▅▄▄▄▄▃▃▂▂▂▁
val_accuracy,▁▁▂▂▄▄▅▆▆▇▇▇███
val_loss,█▆▆▅▅▄▄▄▄▃▃▂▂▂▁

0,1
epochs,15.0
test_accuracy,0.5327
test_loss,19399.00679
train_accuracy,0.52796
train_loss,105198.51734
val_accuracy,0.52633
val_loss,11686.95837


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 6wj2cq29 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12308944560007236, max=1.…

0,1
epochs,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▄▄█▄▅▄▄▂▂▁▃▃▂▁▃
train_loss,▅█▁▆▃▄▄▆▆▆▆▆▅▇▆
val_accuracy,▄▄█▄▅▄▄▂▂▁▃▃▂▁▃
val_loss,▅█▁▆▃▄▄▆▆▆▆▆▅▇▆

0,1
epochs,15.0
test_accuracy,0.2024
test_loss,22297.4698
train_accuracy,0.19107
train_loss,120948.22088
val_accuracy,0.19433
val_loss,13434.6086


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: y5uonsm5 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.1230115690527838, max=1.0…

0,1
epochs,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▄▆▇█
train_loss,█▅▃▂▁
val_accuracy,▁▅█▆▇
val_loss,█▄▄▂▁

0,1
epochs,5.0
test_accuracy,0.0965
test_loss,157908.5267
train_accuracy,0.09759
train_loss,847880.59738
val_accuracy,0.09383
val_loss,94012.96719


[34m[1mwandb[0m: Agent Starting Run: rw98un59 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.11962731827371012, max=1.…

0,1
epochs,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▄█▇▅▄▃▃▄▄
train_loss,█▅▃▂▂▁▁▁▁▁
val_accuracy,▁▄█▇▅▃▃▃▃▃
val_loss,█▄▃▂▂▁▁▁▁▁

0,1
epochs,10.0
test_accuracy,0.1266
test_loss,22980.70838
train_accuracy,0.12526
train_loss,124199.56795
val_accuracy,0.12633
val_loss,13792.18667


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: d0liwifo with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12304493264623452, max=1.…

0,1
epochs,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
train_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epochs,15.0
test_accuracy,0.098
test_loss,
train_accuracy,0.09911
train_loss,
val_accuracy,0.09517
val_loss,


[34m[1mwandb[0m: Agent Starting Run: ite9u830 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.1230115690527838, max=1.0…

0,1
epochs,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▃▄▅▆▆▇▇██
train_loss,█▆▅▄▃▂▂▂▁▁
val_accuracy,▁▃▄▅▆▆▇▇██
val_loss,█▆▅▄▃▂▂▂▁▁

0,1
epochs,10.0
test_accuracy,0.7103
test_loss,8755.96621
train_accuracy,0.70443
train_loss,46781.96407
val_accuracy,0.69133
val_loss,5341.68523


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: clicsuaq with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.11967991558213155, max=1.…

0,1
epochs,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▅▇▇█
train_loss,█▆▄▂▁
val_accuracy,▁▅▇▇█
val_loss,█▆▄▂▁

0,1
epochs,5.0
test_accuracy,0.5444
test_loss,18179.46831
train_accuracy,0.54196
train_loss,98263.19937
val_accuracy,0.53717
val_loss,10912.51676


[34m[1mwandb[0m: Agent Starting Run: nvu2mkw0 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.11978524907586692, max=1.…

0,1
epochs,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▄▅▆▇▇▇███
train_loss,█▅▄▃▂▂▂▁▁▁
val_accuracy,▁▄▅▆▇▇▇███
val_loss,█▅▄▃▂▂▂▁▁▁

0,1
epochs,10.0
test_accuracy,0.8429
test_loss,5067.70084
train_accuracy,0.83639
train_loss,28433.13753
val_accuracy,0.8335
val_loss,3244.26852


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 2ix2km0b with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.11971149617380597, max=1.…

0,1
epochs,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▃▃▃▃▃▄▆▆▆▇▇▇██
train_loss,██▇▇▆▆▅▅▄▃▃▂▂▁▁
val_accuracy,▁▃▃▃▃▃▄▅▆▆▆▇▇██
val_loss,██▇▇▆▆▅▅▄▄▃▂▂▁▁

0,1
epochs,15.0
test_accuracy,0.4918
test_loss,16464.40193
train_accuracy,0.47804
train_loss,89853.94857
val_accuracy,0.4845
val_loss,9970.30524


[34m[1mwandb[0m: Agent Starting Run: 0m0m4ip9 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12003880754983243, max=1.…

0,1
epochs,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
train_accuracy,▁▁▁▁▁▁▁▁▁▁
val_accuracy,▁▁▁▁▁▁▁▁▁▁

0,1
epochs,10.0
test_accuracy,0.098
test_loss,
train_accuracy,0.09911
train_loss,
val_accuracy,0.09517
val_loss,


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: e8m9yno6 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12299900515510537, max=1.…

0,1
epochs,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▂▇▆▆▆▇▅▆▆█▅▆▃▅▁
train_loss,▅▇▇█▂▃▂▁▁▁▁▁▁▁▁
val_accuracy,▂▇▆▆▆▆▆▆▆█▅▆▄▅▁
val_loss,▅▇▇█▂▃▂▁▁▁▁▁▁▁▁

0,1
epochs,15.0
test_accuracy,0.0978
test_loss,23280.48009
train_accuracy,0.09969
train_loss,125440.51405
val_accuracy,0.09883
val_loss,13987.25388


[34m[1mwandb[0m: Agent Starting Run: qq9le8i0 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.011 MB of 0.011 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epochs,▁▃▅▆█
test_accuracy,▁
train_accuracy,▁▁▁▁▁
val_accuracy,▁▁▁▁▁

0,1
epochs,5.0
test_accuracy,0.098
test_loss,
train_accuracy,0.09911
train_loss,
val_accuracy,0.09517
val_loss,


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: wnn0a8u9 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12296711239609685, max=1.…

0,1
epochs,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▃▄▅▆▆▆▇▇▇▇▇███
train_loss,█▆▅▄▄▃▃▃▂▂▂▂▁▁▁
val_accuracy,▁▃▄▅▆▆▆▇▇▇▇▇███
val_loss,█▆▅▄▃▃▃▂▂▂▂▁▁▁▁

0,1
epochs,15.0
test_accuracy,0.8879
test_loss,3698.84407
train_accuracy,0.91885
train_loss,14444.21736
val_accuracy,0.888
val_loss,2255.65155


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: pz86cwy2 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epochs,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▆▇██
train_loss,█▃▂▁▁
val_accuracy,▁▆███
val_loss,█▃▁▁▁

0,1
epochs,5.0
test_accuracy,0.9672
test_loss,1165.48025
train_accuracy,0.97581
train_loss,4246.60363
val_accuracy,0.9625
val_loss,786.41447


[34m[1mwandb[0m: Agent Starting Run: f3qjrmnf with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


  return -np.sum(y * np.log(y_hat))
  return -np.sum(y * np.log(y_hat))


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12032534700733799, max=1.…

0,1
epochs,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
train_accuracy,██████▁▁▁▁▁▁▁▁▁
train_loss,█▄▂▁▁
val_accuracy,██████▁▁▁▁▁▁▁▁▁
val_loss,▂▁▁▃▆█

0,1
epochs,15.0
test_accuracy,0.098
test_loss,
train_accuracy,0.09911
train_loss,
val_accuracy,0.09517
val_loss,


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 5vqwiumx with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12302268824007954, max=1.…

0,1
epochs,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▁▁▁▁▁▁▁▁▁▁▂▄█▇
train_loss,█████████████▇▁
val_accuracy,▁▁▁▁▁▁▁▁▁▁▁▂▄█▇
val_loss,█████████████▇▁

0,1
epochs,15.0
test_accuracy,0.2507
test_loss,20104.78845
train_accuracy,0.25287
train_loss,108770.31474
val_accuracy,0.25
val_loss,12090.09174


[34m[1mwandb[0m: Agent Starting Run: 6uahlhfz with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.011 MB of 0.011 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epochs,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
train_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epochs,15.0
test_accuracy,0.098
test_loss,
train_accuracy,0.09911
train_loss,
val_accuracy,0.09517
val_loss,


[34m[1mwandb[0m: Agent Starting Run: sahrmp2q with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.11963783403656822, max=1.…

0,1
epochs,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▃▇█▇▃▃▃▃▃▇▁█▄▇
train_loss,▆▄▃▁▁▄█▅▅▄▆█▇█▅
val_accuracy,▁▃▇█▇▄▃▃▃▃▇▂█▄▇
val_loss,▆▄▃▁▁▄█▅▅▄▇▇▇█▅

0,1
epochs,15.0
test_accuracy,0.1711
test_loss,94118.35563
train_accuracy,0.1628
train_loss,511413.0361
val_accuracy,0.16133
val_loss,56224.94212


[34m[1mwandb[0m: Agent Starting Run: kkqs9awt with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.011 MB of 0.011 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epochs,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▁▁▁▁
train_loss,▅██▅▁
val_accuracy,▁▁▁▁▁
val_loss,███▅▁

0,1
epochs,5.0
test_accuracy,0.0974
test_loss,23974.20098
train_accuracy,0.09759
train_loss,129363.03394
val_accuracy,0.09683
val_loss,14437.64164


[34m[1mwandb[0m: Agent Starting Run: 3by609pt with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12305605786618445, max=1.…

0,1
epochs,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▁▁▁▁
train_loss,█▆▅▃▁
val_accuracy,▁▁▁▁▁
val_loss,█▆▅▃▁

0,1
epochs,5.0
test_accuracy,0.1135
test_loss,22991.03916
train_accuracy,0.11206
train_loss,124159.45827
val_accuracy,0.11517
val_loss,13792.21011


[34m[1mwandb[0m: Agent Starting Run: 9s3jxchc with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12296711239609685, max=1.…

0,1
epochs,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▄▆▇█
train_loss,█▅▃▂▁
val_accuracy,▁▄▆▇█
val_loss,█▅▃▂▁

0,1
epochs,5.0
test_accuracy,0.4749
test_loss,15853.65479
train_accuracy,0.46883
train_loss,86237.85559
val_accuracy,0.45783
val_loss,9775.45049


[34m[1mwandb[0m: Agent Starting Run: lkl554kj with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12308944560007236, max=1.…

0,1
epochs,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▂█▁▇▅
train_loss,█▁▅▁▁
val_accuracy,▁█▁▇▆
val_loss,█▂▄▂▁

0,1
epochs,5.0
test_accuracy,0.2196
test_loss,21351.88011
train_accuracy,0.21756
train_loss,115313.44857
val_accuracy,0.22033
val_loss,12796.24434


[34m[1mwandb[0m: Agent Starting Run: z7dw1f5m with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.11965887110954809, max=1.…

0,1
epochs,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▅▆▇█
train_loss,█▄▃▂▁
val_accuracy,▁▅▇▇█
val_loss,█▄▂▂▁

0,1
epochs,5.0
test_accuracy,0.938
test_loss,2091.20443
train_accuracy,0.96007
train_loss,7753.88247
val_accuracy,0.93683
val_loss,1370.74573


[34m[1mwandb[0m: Agent Starting Run: 7ukfbzya with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.002 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.22691733286479837, max=1.…

0,1
epochs,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▄▅▆▇▇▇███
train_loss,█▅▄▃▂▂▂▁▁▁
val_accuracy,▁▄▆▆▇▇▇███
val_loss,█▅▃▂▂▂▁▁▁▁

0,1
epochs,10.0
test_accuracy,0.9437
test_loss,2151.44797
train_accuracy,0.97472
train_loss,4756.42348
val_accuracy,0.93817
val_loss,1477.76293


[34m[1mwandb[0m: Agent Starting Run: 06guwfxn with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12304493264623452, max=1.…

0,1
epochs,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
train_accuracy,▁▁▁▁▁▁▁▁▁▁
val_accuracy,▁▁▁▁▁▁▁▁▁▁

0,1
epochs,10.0
test_accuracy,0.098
test_loss,
train_accuracy,0.09911
train_loss,
val_accuracy,0.09517
val_loss,


[34m[1mwandb[0m: Agent Starting Run: vw5xh0nh with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.011 MB of 0.011 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epochs,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▅▆▇█
train_loss,█▄▃▂▁
val_accuracy,▁▅▆▇█
val_loss,█▄▃▂▁

0,1
epochs,5.0
test_accuracy,0.8028
test_loss,6208.4768
train_accuracy,0.80652
train_loss,32720.3519
val_accuracy,0.7945
val_loss,3865.69925


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 60tzr3sz with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.11961680435928985, max=1.…

0,1
epochs,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▃▆▇▇█▇▇▆▆▇▅▁▄▄
train_loss,█▆▄▃▂▁▂▂▃▂▂▃▇▅▅
val_accuracy,▁▃▅▆▇█▇▇▆▆▇▅▁▄▄
val_loss,█▆▄▃▂▁▂▂▃▂▂▃▇▅▅

0,1
epochs,15.0
test_accuracy,0.3391
test_loss,19063.45837
train_accuracy,0.32883
train_loss,103433.60939
val_accuracy,0.33367
val_loss,11423.66949


[34m[1mwandb[0m: Agent Starting Run: fwsr921o with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.11959198030249736, max=1.…

0,1
epochs,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▃▄▅▆▆▆▇▇▇▇▇███
train_loss,█▆▅▄▃▃▃▂▂▂▂▂▁▁▁
val_accuracy,▁▅▆▆▆▆▇▇▇▇█▇▇▇▇
val_loss,█▄▂▁▁▁▁▁▂▃▃▃▅▅▆

0,1
epochs,15.0
test_accuracy,0.9635
test_loss,1697.88464
train_accuracy,0.98604
train_loss,2356.34545
val_accuracy,0.96167
val_loss,1013.89031


[34m[1mwandb[0m: Agent Starting Run: lr7p44v2 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.011 MB of 0.011 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epochs,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▆▇██
train_loss,█▃▂▁▁
val_accuracy,▁▅▇██
val_loss,█▃▂▁▁

0,1
epochs,5.0
test_accuracy,0.8919
test_loss,3802.64124
train_accuracy,0.88724
train_loss,20818.89983
val_accuracy,0.88
val_loss,2394.55652


[34m[1mwandb[0m: Agent Starting Run: vjm7zmsp with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12298933670703055, max=1.…

0,1
epochs,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▄▆▇█
train_loss,█▅▃▂▁
val_accuracy,▁▄▆▇█
val_loss,█▅▃▂▁

0,1
epochs,5.0
test_accuracy,0.4513
test_loss,15916.72133
train_accuracy,0.43811
train_loss,88011.56894
val_accuracy,0.42867
val_loss,9877.43013


[34m[1mwandb[0m: Agent Starting Run: nehcchpg with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.1230115690527838, max=1.0…

0,1
epochs,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▃▄▅▅▆▆▆▇▇▇▇▇██
train_loss,█▆▅▄▄▃▃▃▂▂▂▂▁▁▁
val_accuracy,▁▃▅▅▆▆▇▇▇▇▇████
val_loss,█▆▅▄▄▃▃▂▂▂▂▂▁▁▁

0,1
epochs,15.0
test_accuracy,0.9575
test_loss,1401.17864
train_accuracy,0.96567
train_loss,6396.47025
val_accuracy,0.95683
val_loss,961.79514


[34m[1mwandb[0m: Agent Starting Run: fkypst0s with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epochs,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▄▅▇▆▇███▆████▇
train_loss,▅▃▄▂▁▃▃▁▁█▃█▄▃█
val_accuracy,▁▃▅▆▅▆███▇████▆
val_loss,▂▁▂▃▁▃▂▁▂▅▃▅▄▃█

0,1
epochs,15.0
test_accuracy,0.9393
test_loss,6874.29081
train_accuracy,0.94585
train_loss,32054.4781
val_accuracy,0.93383
val_loss,5360.83683


[34m[1mwandb[0m: Agent Starting Run: if1ckzpg with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.011 MB of 0.011 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epochs,▁▃▅▆█
test_accuracy,▁
train_accuracy,▁▁▁▁▁
val_accuracy,▁▁▁▁▁

0,1
epochs,5.0
test_accuracy,0.098
test_loss,
train_accuracy,0.09911
train_loss,
val_accuracy,0.09517
val_loss,


[34m[1mwandb[0m: Agent Starting Run: 9yhb7ido with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.011 MB of 0.011 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epochs,▁▃▅▆█
test_accuracy,▁
train_accuracy,▁▁▁▁▁
val_accuracy,▁▁▁▁▁

0,1
epochs,5.0
test_accuracy,0.098
test_loss,
train_accuracy,0.09911
train_loss,
val_accuracy,0.09517
val_loss,


[34m[1mwandb[0m: Agent Starting Run: z6mu33o4 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12003880754983243, max=1.…

0,1
epochs,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
train_accuracy,█████████████▁▁
train_loss,█▃▂▂▁▂▃▄▅▃▁▁
val_accuracy,█████████████▁▁
val_loss,▂▁▁▂▂▃▄▅▅▅▅▅█

0,1
epochs,15.0
test_accuracy,0.098
test_loss,
train_accuracy,0.09911
train_loss,
val_accuracy,0.09517
val_loss,


[34m[1mwandb[0m: Agent Starting Run: z4apwhni with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12307831434255742, max=1.…

0,1
epochs,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▃▃▁▆█
train_loss,█▅▃▂▁
val_accuracy,▃▃▁▆█
val_loss,█▅▃▂▁

0,1
epochs,5.0
test_accuracy,0.1848
test_loss,24065.28239
train_accuracy,0.18048
train_loss,129780.39259
val_accuracy,0.18317
val_loss,14365.66137


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: mm68ge1k with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.011 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.9732606209868941, max=1.0…

0,1
epochs,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▁▂▁▁█▁▁▁▁▁▁▁▁▁
train_loss,█▇▁█▇▁▇████████
val_accuracy,▁▁▂▁▁█▁▁▁▁▁▁▁▁▁
val_loss,█▇▁█▇▁▇████████

0,1
epochs,15.0
test_accuracy,0.1032
test_loss,26823.98973
train_accuracy,0.09944
train_loss,145286.313
val_accuracy,0.098
val_loss,16134.50356


[34m[1mwandb[0m: Agent Starting Run: 204w9txc with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12308944560007236, max=1.…

0,1
epochs,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,███▇▅▃▂▁▂▃
train_loss,█▇▆▅▄▃▃▂▁▁
val_accuracy,███▇▆▃▁▁▂▃
val_loss,█▇▆▅▄▃▃▂▁▁

0,1
epochs,10.0
test_accuracy,0.0796
test_loss,25402.0738
train_accuracy,0.08176
train_loss,136735.87219
val_accuracy,0.08333
val_loss,15210.49259


[34m[1mwandb[0m: Agent Starting Run: s8o868bb with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12294340987163262, max=1.…

0,1
epochs,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▁▁▄█
train_loss,███▅▁
val_accuracy,▁▁▁▄█
val_loss,███▅▁

0,1
epochs,5.0
test_accuracy,0.7215
test_loss,8467.0769
train_accuracy,0.71696
train_loss,45973.67437
val_accuracy,0.708
val_loss,5228.1831


[34m[1mwandb[0m: Agent Starting Run: jz3hvx72 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12295452490733207, max=1.…

0,1
epochs,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▄▅▆▆▇▇▇██
train_loss,█▅▄▃▃▂▂▂▁▁
val_accuracy,▁▃▅▆▆▇▇███
val_loss,█▄▃▂▂▁▁▁▁▁

0,1
epochs,10.0
test_accuracy,0.9535
test_loss,1620.11454
train_accuracy,0.96361
train_loss,6372.09959
val_accuracy,0.94967
val_loss,1102.67313


[34m[1mwandb[0m: Agent Starting Run: bm160st5 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12308944560007236, max=1.…

0,1
epochs,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▂▃▅▅▁▄█▁▂▅
train_loss,▆▆▄▃▃▂▁▃▄█
val_accuracy,▃▃▄▅▁▄█▁▂▆
val_loss,▆▆▄▃▃▁▁▃▄█

0,1
epochs,10.0
test_accuracy,0.1354
test_loss,91317.69102
train_accuracy,0.1315
train_loss,493178.08866
val_accuracy,0.133
val_loss,54610.02254


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: k26bzzwc with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.11961680435928985, max=1.…

0,1
epochs,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▄▃▁▄▆▇█▇▆▄
train_loss,▅▅█▅▃▂▁▁▂▄
val_accuracy,▅▃▁▅▆▇█▇▆▄
val_loss,▅▅█▅▃▂▁▁▂▅

0,1
epochs,10.0
test_accuracy,0.3063
test_loss,18825.42103
train_accuracy,0.31989
train_loss,101394.15483
val_accuracy,0.31783
val_loss,11326.28699


Error in callback <bound method _WandbInit._pause_backend of <wandb.sdk.wandb_init._WandbInit object at 0x0000016B180EF9D0>> (for post_run_cell):


ConnectionAbortedError: [WinError 10053] An established connection was aborted by the software in your host machine