<div style='text-align: center;'>
    <h1>CS6910 Fundamentals of Deep Learning</h1>
    <h2>Assignment-1</h2>
</div>

In [1]:
# Load all necessary libraries
import numpy as np
import matplotlib.pyplot as plt
from keras.datasets import fashion_mnist
import wandb
from sklearn.model_selection import train_test_split

In [2]:
#Random seed to initiate initial weights and bias.
#This decides the overall convergence of the model.
np.random.seed(27)

In [3]:
class ObjectiveFunction:
    def __init__(self, method):
        self.method = method

    def get_loss(self, y, y_hat):
        if self.method == "cel":
            return self.cross_entropy_loss(y, y_hat)
        elif self.method == "mse":
            return self.mean_square_error(y, y_hat)

    def get_derivative(self, y, y_hat):
        if self.method == "cel":
            return self.cross_entropy_loss_derivative(y, y_hat)
        elif self.method == "mse":
            return self.mean_square_error_derivative(y, y_hat)

    def mean_square_error(self, y, y_hat):
        return np.sum((y - y_hat) ** 2) / 2

    def mean_square_error_derivative(self, y, y_hat):
        return y_hat - y

    def cross_entropy_loss(self, y, y_hat):
        return -np.sum(y * np.log(y_hat))

    def cross_entropy_loss_derivative(self, y, y_hat):
        return -y/y_hat

In [4]:
class NeuralLayer:
    def __init__(self, index, n_input, n_neurons, function=None, weights=None, bias=None, method="random"):
        self.index = index
        self.function = function if function is not None else 'sigmoid'
        self.weights = weights if weights is not None else self.initialize_weights(method, n_input, n_neurons)
        self.bias = bias if bias is not None else np.random.randn(n_neurons)
        self.activation = None

        self.error = None
        self.delta = None

        self.d_weights = np.zeros([n_input, n_neurons])
        self.d_bias = np.zeros(n_neurons)

        self.h_weights = np.zeros([n_input, n_neurons])
        self.h_bias = np.zeros(n_neurons)
        self.m_weights = np.zeros([n_input, n_neurons])
        self.m_bias = np.zeros(n_neurons)

    def initialize_weights(self, method, n_input, n_neurons):
        if method == "xavier":
            limit = np.sqrt(2 / (n_input + n_neurons))
            return np.random.randn(n_input, n_neurons) * limit
        return np.random.randn(n_input, n_neurons)

    def activate(self, x):
        z = np.dot(x, self.weights) + self.bias
        self.activation = self._apply_activation(z)
        return self.activation

    def _apply_activation(self, r):
        if self.function == 'sigmoid':
            return 1 / (1 + np.exp(-r))
        elif self.function == 'tanh':
            return np.tanh(r)
        elif self.function == 'relu':
            return np.maximum(0, r)
        elif self.function == 'softmax':
            max_r = np.max(r, axis=1)
            max_r = max_r.reshape(max_r.shape[0], 1)
            exp_r = np.exp(r - max_r)
            return exp_r / np.sum(exp_r, axis=1).reshape(exp_r.shape[0], 1)
        return r

    def apply_activation_derivative(self, z):
        if self.function == 'sigmoid':
            return z * (1 - z)
        elif self.function == 'tanh':
            return (z - z**2)
        elif self.function == 'relu':
            return np.where(z > 0, 1, 0)
        elif self.function == 'softmax':
            return np.diag(z) - np.outer(z, z)
        return np.ones(z.shape)

    def __str__(self):
        return f'Neural Layer: {self.index}, {self.weights.shape} , {self.function}'

In [5]:
class NeuralNetwork:
    def __init__(self, config):
        def get_value(key, default):
            return config[key] if key in config else default

        self.layers = []

        self.criterion = get_value('criterion', 'cel')
        self.weight_initialization = get_value('weight_initialization', 'random')

        self.c = ObjectiveFunction(method=self.criterion)

        self.add_layers(config['input_size'],
                         config['hidden_layers'],
                         config['output_size'],
                         config['neurons'],
                         config['activation'],
                         config['output_activation']
                        )

    def forward(self, x):
        for layer in self.layers:
            x = layer.activate(x)
        return x

    def backward(self, x, y, y_hat):
        for i in reversed(range(len(self.layers))):
            layer = self.layers[i]
            if layer == self.layers[-1]:
                layer.error = self.c.get_derivative(y, y_hat)
                output_derivative_matrix = []
                for i in range(y_hat.shape[0]):
                    output_derivative_matrix.append(np.matmul(
                        self.c.get_derivative(y[i], y_hat[i]),
                        layer.apply_activation_derivative(y_hat[i])
                    ))
                layer.delta = np.array(output_derivative_matrix)
            else:
                next_layer = self.layers[i + 1]
                layer.error = np.matmul(next_layer.delta, next_layer.weights.T)
                layer.delta = layer.error * layer.apply_activation_derivative(layer.activation)


        for i in range(len(self.layers)):
            layer = self.layers[i]
            activation = np.atleast_2d(x if i == 0 else self.layers[i - 1].activation)
            layer.d_weights = np.matmul(activation.T, layer.delta)/y.shape[0]
            layer.d_bias = np.sum(layer.delta, axis=0)/y.shape[0]

    def add_layers(self, input_size, hidden_layers, output_size, neurons, activation, output_activation):
        for i in range(0, hidden_layers+1):
            n_input = input_size if i==0 else neurons
            n_neurons = output_size if i==hidden_layers else neurons
            self.layers.append(NeuralLayer(
                index=i+1,
                n_input=n_input,
                n_neurons=n_neurons,
                function= output_activation if i==hidden_layers else activation,
                method=self.weight_initialization
                )
            )

In [6]:
class Optimizer:
    def __init__(self, nn:NeuralNetwork, config=None):
        self.nn, self.lr, self.optimizer = nn, config['learning_rate'], config['optimizer']
        self.beta, self.epsilon, self.beta1, self.beta2= config['beta'], config['epsilon'], config['beta1'], config['beta2']
        self.timestep = 0
        self.decay = config['decay']

    def step(self):
        if(self.optimizer == "sgd"):
            self.sgd()
        elif(self.optimizer == "momentum"):
            self.momentum()
        elif(self.optimizer == "nag"):
            self.nag()
        elif(self.optimizer == "rmsprop"):
            self.rmsprop()
        elif(self.optimizer == "adam"):
            self.adam()
        elif (self.optimizer == "nadam"):
            self.nadam()

    def sgd(self):
        for layer in self.nn.layers:
            layer.weights -= self.lr*(layer.d_weights + self.decay*layer.weights)
            layer.bias -= self.lr*(layer.d_bias + self.decay*layer.bias)

    def momentum(self):
        for layer in self.nn.layers:
            layer.h_weights = self.beta*layer.h_weights + layer.d_weights
            layer.h_bias = self.beta*layer.h_bias + layer.d_bias
            layer.weights -= self.lr*(layer.h_weights + self.decay*layer.weights)
            layer.bias -= self.lr*(layer.h_bias + self.decay*layer.bias)

    def nag(self):
        for layer in self.nn.layers:
            layer.h_weights = self.beta*layer.h_weights + layer.d_weights
            layer.h_bias = self.beta*layer.h_bias + layer.d_bias
            layer.weights -= self.lr * (self.beta * layer.h_weights + layer.d_weights + self.decay * layer.weights)
            layer.bias -= self.lr * (self.beta * layer.h_bias + layer.d_bias + self.decay * layer.bias)

    def rmsprop(self):
        for layer in self.nn.layers:
            layer.h_weights = self.beta * layer.h_weights + (1 - self.beta) * layer.d_weights**2
            layer.h_bias = self.beta * layer.h_bias + (1 - self.beta) * layer.d_bias**2
            layer.weights -= (self.lr / (np.sqrt(layer.h_weights) + self.epsilon)) * layer.d_weights + self.decay * layer.weights * self.lr
            layer.bias -= (self.lr / (np.sqrt(layer.h_bias) + self.epsilon)) * layer.d_bias + self.decay * layer.bias * self.lr

    def adam(self):
        for layer in self.nn.layers:
            layer.m_weights = self.beta1 * layer.m_weights + (1 - self.beta1) * layer.d_weights
            layer.m_bias = self.beta1 * layer.m_bias + (1 - self.beta1) * layer.d_bias
            layer.h_weights = self.beta2 * layer.h_weights + (1 - self.beta2) * layer.d_weights**2
            layer.h_bias = self.beta2 * layer.h_bias + (1 - self.beta2) * layer.d_bias**2
            correction_term1 = 1/(1 - self.beta1**(self.timestep + 1))
            correction_term2 = 1/(1 - self.beta2**(self.timestep + 1))
            weights_hat1 = layer.m_weights * correction_term1
            bias_hat1 = layer.m_bias * correction_term1
            weights_hat2 = layer.h_weights * correction_term2
            bias_hat2 = layer.h_bias * correction_term2
            layer.weights -= self.lr * (weights_hat1 / ((np.sqrt(weights_hat2)) + self.epsilon)) + self.decay * layer.weights * self.lr
            layer.bias -= self.lr * (bias_hat1 / ((np.sqrt(bias_hat2)) + self.epsilon)) + self.decay * layer.bias * self.lr

    def nadam(self):
        for layer in self.nn.layers:
            layer.m_weights = self.beta1 * layer.m_weights + (1 - self.beta1) * layer.d_weights
            layer.m_bias = self.beta1 * layer.m_bias + (1 - self.beta1) * layer.d_bias
            layer.h_weights = self.beta2 * layer.h_weights + (1 - self.beta2) * layer.d_weights**2
            layer.h_bias = self.beta2 * layer.h_bias + (1 - self.beta2) * layer.d_bias**2
            correction_term1 = 1/(1 - self.beta1**(self.timestep + 1))
            correction_term2 = 1/(1 - self.beta2**(self.timestep + 1))
            weights_hat1 = layer.m_weights * correction_term1
            bias_hat1 = layer.m_bias * correction_term1
            weights_hat2 = layer.h_weights * correction_term2
            bias_hat2 = layer.h_bias * correction_term2
            combined_weight_update = self.beta1 * weights_hat1 + ((1 - self.beta1) / (1 - self.beta1 ** (self.timestep + 1))) * layer.d_weights
            combined_bias_update = self.beta1 * bias_hat1 + ((1 - self.beta1) / (1 - self.beta1 ** (self.timestep + 1))) * layer.d_bias
            layer.weights -= self.lr * (combined_weight_update / ((np.sqrt(weights_hat2)) + self.epsilon)) + self.decay * layer.weights * self.lr
            layer.bias -= self.lr * (combined_bias_update / ((np.sqrt(bias_hat2)) + self.epsilon)) + self.decay * layer.bias * self.lr

In [7]:
# Load Input Data
(train_images, train_labels), (test_images, test_labels) = fashion_mnist.load_data()

# Flatten the images
train_images = train_images.reshape(train_images.shape[0], 784) / 255
X_test = test_images.reshape(test_images.shape[0], 784) / 255

# Encode the labels
train_labels = np.eye(10)[train_labels]
Y_test = np.eye(10)[test_labels]

# Prepare data for training and validation
X_train, X_val, Y_train, Y_val = train_test_split(train_images, train_labels, test_size=0.1, shuffle=True, random_state=27)

In [8]:
def accuracy(y, y_hat):
    accuracy = np.mean(np.argmax(y, axis=1)==np.argmax(y_hat, axis=1))
    return accuracy

In [9]:
def wandb_sweep():
    train_loss_hist = []
    train_accuracy_hist = []
    val_loss_hist = []
    val_accuracy_hist = []

    run = wandb.init()
    config = wandb.config
    run.name = f"hl_{config['hidden_layers']}_nu_{config['neurons']}_ac_{config['activation']}_lr_{config['learning_rate']}_bs_{config['batch_size']}_opt_{config['optimizer']}_de_{config['decay']}_init_{config['weight_initialization']}"


    nn = NeuralNetwork(config)
    optimizer = Optimizer(nn=nn, config=config)

    batch_size = config['batch_size']
    criterion = ObjectiveFunction(method = config['criterion'])

    for epoch in range(config['epochs']):
        for batch in range(0, X_train.shape[0], batch_size):
            # Get the batch of data
            X_batch = X_train[batch:batch+batch_size]
            Y_batch = Y_train[batch:batch+batch_size]

            Y_hat_batch = nn.forward(X_batch)
            nn.backward(X_batch, Y_batch, Y_hat_batch)
            optimizer.step()

        optimizer.timestep += 1

        # Training
        Y_hat_train = nn.forward(X_train)
        train_loss = criterion.get_loss(Y_train, Y_hat_train)
        train_accuracy = accuracy(Y_train, Y_hat_train)

        # Validation
        Y_hat_val = nn.forward(X_val)
        val_loss = criterion.get_loss(Y_val, Y_hat_val)
        val_accuracy = accuracy(Y_val, Y_hat_val)

        wandb.log({
            "epoch": epoch + 1,
            "train_loss": train_loss,
            "train_accuracy": train_accuracy,
            "val_loss": val_loss,
            "val_accuracy": val_accuracy
        })

        train_loss_hist.append(train_loss)
        train_accuracy_hist.append(train_accuracy)
        val_loss_hist.append(val_loss)
        val_accuracy_hist.append(val_accuracy)

    # Testing
    Y_hat_test = nn.forward(X_test)
    test_loss = criterion.get_loss(Y_test, Y_hat_test)
    test_accuracy = accuracy(Y_test, Y_hat_test)
    wandb.log({
        "test_loss": test_loss,
        "test_accuracy": test_accuracy
    })

    return nn, train_loss_hist, train_accuracy_hist, val_loss_hist, val_accuracy_hist

In [10]:
sweep_config = {
    'method': 'random',
    'name': 'Q4_SWEEP_3',
    'metric': {
        'name': 'val_accuracy',
        'goal': 'maximize',
    },
    'parameters': {
        'input_size': {
            'value': 784
        },
        'output_size': {
            'value': 10
        },
        'hidden_layers': {
            'values': [3, 4, 5]
        },
        'neurons': {
            'values': [32, 64, 128]
        },
        'activation': {
            'values': ['sigmoid', 'tanh', 'relu']
        },
        'output_activation': {
            'value': 'softmax'
        },
        'learning_rate': {
            'values': [1e-3, 1e-4]
        },
        'decay': {
            'values': [0, 0.0005, 0.000005]
        },
        'epochs': {
            'values': [5, 10, 20]
        },
        'optimizer': {
            'values': ['sgd', 'momentum', 'nag', 'rmsprop', 'adam', 'nadam']
        },
        'batch_size': {
            'values': [16, 32, 64]
        },
        'weight_initialization': {
            'values': ['xavier', 'random']
        },
        'beta': {
            'values': [0.7, 0.8, 0.9]
        },
        'beta1':{
            'value': 0.9
        },
        'beta2':{
            'value': 0.9999
        },
        'epsilon': {
            'value': 1e-8
        },
        'criterion': {
            'value': 'cel'
        },
    }
}

In [None]:
# Setup Wandb
wandb.login(key='5da0c161a9c9720f15195bb6e9f05e44c45112d1')
wandb.init(project="CS6910_AS1", entity='ed23s037')

# Do Sweep
wandb_id = wandb.sweep(sweep_config, project="CS6910_AS1")
wandb.agent(wandb_id, function=wandb_sweep, count=1000)

# Finish
wandb.finish()

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33med23s037[0m. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: C:\Users\ravig\.netrc




Create sweep with ID: mpyawpak
Sweep URL: https://wandb.ai/ed23s037/CS6910_AS1/sweeps/mpyawpak


[34m[1mwandb[0m: Agent Starting Run: 68o8q7sv with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random
Exception in thread Exception in thread ChkStopThr:
Traceback (most recent call last):
  File "E:\Anaconda\lib\threading.py", line 932, in _bootstrap_inner
NetStatThr:
Traceback (most recent call last):
  File "E:\Anaconda\lib\threading.py", line 932, in _bootstrap_inner
    self.run()
      File "E

VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12298933670703055, max=1.…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▁▃▄▄▅▅▅▆▆▆▇▇▇▇▇▇████
train_loss,█▆▅▄▄▄▃▃▃▃▂▂▂▂▂▂▁▁▁▁
val_accuracy,▁▃▄▅▅▆▆▆▆▇▇▇▇▇▇█████
val_loss,█▆▅▄▄▃▃▃▃▂▂▂▂▂▂▁▁▁▁▁

0,1
epoch,20.0
test_accuracy,0.7972
test_loss,5692.47612
train_accuracy,0.8108
train_loss,28383.44488
val_accuracy,0.8015
val_loss,3334.06975


[34m[1mwandb[0m: Agent Starting Run: 8cjiadeh with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.11966939242064539, max=1.…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▇▇█▂▆▆▅▃▁▄
train_loss,▄▂▁▇▃▄▄▇█▇
val_accuracy,▇▇█▂▆▆▅▃▁▄
val_loss,▄▂▁▇▄▄▄▇█▇

0,1
epoch,10.0
test_accuracy,0.3222
test_loss,19114.80012
train_accuracy,0.33013
train_loss,103129.28852
val_accuracy,0.32683
val_loss,11467.46626


[34m[1mwandb[0m: Agent Starting Run: o0avp9ft with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.003 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.25334389299542415, max=1.…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▄▇██
train_loss,█▅▃▂▁
val_accuracy,▁▄▇██
val_loss,█▅▃▂▁

0,1
epoch,5.0
test_accuracy,0.5325
test_loss,14619.84554
train_accuracy,0.52819
train_loss,78775.92286
val_accuracy,0.5185
val_loss,8802.04007


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: yenvfulz with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▄▆▇█
train_loss,█▅▃▂▁
val_accuracy,▁▄▆▇█
val_loss,█▅▃▂▁

0,1
epoch,5.0
test_accuracy,0.5051
test_loss,14523.58273
train_accuracy,0.50206
train_loss,77721.93784
val_accuracy,0.50433
val_loss,8592.10733


[34m[1mwandb[0m: Agent Starting Run: wb8qz8ce with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.011 MB of 0.011 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,█▁▁▁▁
train_loss,█▁▁▁▁
val_accuracy,▁████
val_loss,█▁▁▁▁

0,1
epoch,5.0
test_accuracy,0.1
test_loss,23029.18551
train_accuracy,0.09981
train_loss,124356.36058
val_accuracy,0.10167
val_loss,13818.75838


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 9i0t49on with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


  return -y/y_hat
  return -y/y_hat
  output_derivative_matrix.append(np.matmul(


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▃▅▆█
test_accuracy,▁
train_accuracy,▁▁▁▁▁
val_accuracy,▁▁▁▁▁

0,1
epoch,5.0
test_accuracy,0.1
test_loss,
train_accuracy,0.09981
train_loss,
val_accuracy,0.10167
val_loss,


[34m[1mwandb[0m: Agent Starting Run: dxqxh0kg with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12007057785619762, max=1.…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
train_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,20.0
test_accuracy,0.1
test_loss,
train_accuracy,0.09981
train_loss,
val_accuracy,0.10167
val_loss,


[34m[1mwandb[0m: Agent Starting Run: 8iht8jby with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.1197430934365652, max=1.0…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▂▃▃█▂▁▃▆▃▂▅▅▃▄▂▅▇▂▂▅
train_loss,█▂▂▁▂▂▂▁▂▂▁▁▂▁▂▁▁▂▂▁
val_accuracy,▂▃▃█▂▁▃▆▃▃▆▅▃▄▂▅▇▂▂▅
val_loss,█▂▂▁▂▂▂▁▂▂▁▁▂▁▂▁▁▂▂▁

0,1
epoch,20.0
test_accuracy,0.2478
test_loss,20277.01765
train_accuracy,0.251
train_loss,109237.59444
val_accuracy,0.24783
val_loss,12171.70364


[34m[1mwandb[0m: Agent Starting Run: gh88ftpg with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▅▇██
train_loss,█▆▄▃▁
val_accuracy,▁▅▇██
val_loss,█▆▄▃▁

0,1
epoch,5.0
test_accuracy,0.5579
test_loss,16673.32136
train_accuracy,0.55841
train_loss,89890.11183
val_accuracy,0.5555
val_loss,9998.87891


[34m[1mwandb[0m: Agent Starting Run: 0ed94jm7 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▅▆▇█
train_loss,█▄▂▂▁
val_accuracy,▁▅▇▇█
val_loss,█▄▂▂▁

0,1
epoch,5.0
test_accuracy,0.7619
test_loss,6606.11219
train_accuracy,0.77837
train_loss,33763.13867
val_accuracy,0.76717
val_loss,3921.93964


[34m[1mwandb[0m: Agent Starting Run: dc1dgbuh with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.11962731827371012, max=1.…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,20.0
test_accuracy,0.1
test_loss,23046.66812
train_accuracy,0.1003
train_loss,124448.34119
val_accuracy,0.09733
val_loss,13831.85241


[34m[1mwandb[0m: Agent Starting Run: x7c9qrh6 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▁▃▄▅▆▆▆▆▇▇▇▇▇▇██████
train_loss,█▅▄▄▃▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁
val_accuracy,▁▄▅▆▆▆▆▇▇▇▇▇▇▇██████
val_loss,█▅▃▂▂▁▁▁▁▁▁▁▁▁▁▂▁▂▂▂

0,1
epoch,20.0
test_accuracy,0.8503
test_loss,4996.76564
train_accuracy,0.88444
train_loss,18652.59262
val_accuracy,0.8555
val_loss,2922.03813


[34m[1mwandb[0m: Agent Starting Run: r502pwii with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.11969044059449477, max=1.…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▂▃▄▅▆▆▇▇█
train_loss,█▇▆▅▄▄▃▂▂▁
val_accuracy,▁▂▃▄▅▆▆▇▇█
val_loss,█▇▆▅▄▄▃▂▂▁

0,1
epoch,10.0
test_accuracy,0.3443
test_loss,18534.10803
train_accuracy,0.35035
train_loss,99693.87788
val_accuracy,0.342
val_loss,11113.47423


[34m[1mwandb[0m: Agent Starting Run: orz0alu5 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,█▆▅▃▂▂▂▂▁▁
train_loss,█▃▂▂▁▁▁▁▁▁
val_accuracy,█▆▅▂▁▂▂▂▁▁
val_loss,█▃▂▂▁▁▁▁▁▁

0,1
epoch,10.0
test_accuracy,0.0402
test_loss,23268.44316
train_accuracy,0.03857
train_loss,125657.44941
val_accuracy,0.04117
val_loss,13951.90483


[34m[1mwandb[0m: Agent Starting Run: vofp6ap8 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12296711239609685, max=1.…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▁▃▄▅▅▆▆▆▆▇▇▇▇▇▇█████
train_loss,█▆▅▄▄▃▃▃▃▂▂▂▂▂▂▁▁▁▁▁
val_accuracy,▁▃▄▅▅▆▆▆▇▇▇▇▇▇██████
val_loss,█▆▅▄▄▃▃▃▃▂▂▂▂▂▂▁▁▁▁▁

0,1
epoch,20.0
test_accuracy,0.6964
test_loss,8907.21565
train_accuracy,0.70904
train_loss,45550.71938
val_accuracy,0.68833
val_loss,5368.40876


[34m[1mwandb[0m: Agent Starting Run: x4z2u2p9 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011288888888925108, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.11957476717624319, max=1.…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▄▆▇▇▇▇▇██
train_loss,█▆▅▄▃▃▂▂▁▁
val_accuracy,▁▄▆▇▇▇▇▇██
val_loss,█▆▅▄▃▃▂▂▁▁

0,1
epoch,10.0
test_accuracy,0.5934
test_loss,12034.19571
train_accuracy,0.60344
train_loss,64511.12637
val_accuracy,0.59717
val_loss,7214.79998


[34m[1mwandb[0m: Agent Starting Run: 5lmdc4qg with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.1195289154508701, max=1.0…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▄▆▇█
train_loss,█▅▃▂▁
val_accuracy,▁▅▆▇█
val_loss,█▄▃▂▁

0,1
epoch,5.0
test_accuracy,0.8544
test_loss,4084.58933
train_accuracy,0.87296
train_loss,19033.91966
val_accuracy,0.85167
val_loss,2426.35238


[34m[1mwandb[0m: Agent Starting Run: 4cfq38uw with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.002 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.2264698128130767, max=1.0…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▁▁▁▁▄▅▄▅▄▄▄▄▄▅▅▇▆▇██
train_loss,█▇▇▇▇▇▆▆▆▅▅▅▅▅▄▃▃▂▂▁
val_accuracy,▁▁▁▁▄▅▄▅▄▄▄▄▄▅▅▇▆▇██
val_loss,█▇▇▇▇▇▆▆▆▅▅▅▅▅▄▃▃▂▂▁

0,1
epoch,20.0
test_accuracy,0.3988
test_loss,18343.52177
train_accuracy,0.39811
train_loss,98769.06645
val_accuracy,0.39483
val_loss,11028.25598


[34m[1mwandb[0m: Agent Starting Run: dlnphvzs with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.11962731827371012, max=1.…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▄▆▆▇▇▇███
train_loss,█▅▄▃▂▂▂▁▁▁
val_accuracy,▁▅▆▆▇▇████
val_loss,█▅▃▂▂▁▁▁▁▁

0,1
epoch,10.0
test_accuracy,0.8655
test_loss,3891.41346
train_accuracy,0.89009
train_loss,16450.05881
val_accuracy,0.86933
val_loss,2241.26693


[34m[1mwandb[0m: Agent Starting Run: bxanhc49 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12308944560007236, max=1.…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▁▁▂▄▄▆▇▇▆▆▆▇███▇▇▇█▇
train_loss,███▇▅▅▄▄▃▃▂▂▂▂▂▂▂▂▁▂
val_accuracy,▁▁▃▄▄▆▆▇▆▇▆▇███▇▇▇█▇
val_loss,███▇▅▅▄▄▄▃▃▂▂▂▂▂▃▂▁▂

0,1
epoch,20.0
test_accuracy,0.571
test_loss,12132.37686
train_accuracy,0.57194
train_loss,65224.71157
val_accuracy,0.57233
val_loss,7264.89275


[34m[1mwandb[0m: Agent Starting Run: 0pl8t352 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12308944560007236, max=1.…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▃▅█▇
train_loss,█▅▃▂▁
val_accuracy,▂▇▄█▁
val_loss,█▅▃▁▁

0,1
epoch,5.0
test_accuracy,0.0853
test_loss,109830.24402
train_accuracy,0.0885
train_loss,583027.76855
val_accuracy,0.0785
val_loss,64915.49112


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: or854pty with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier
[34m[1mwandb[0m: W&B API key is configured. Use [1m`wandb login --relogin`[0m to force relogin


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

VBox(children=(Label(value='0.009 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.8650500790722193, max=1.0…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▁▄▄▄▅▅▅▅▆▆▆▇▇▇▇█████
train_loss,█▆▅▅▄▄▃▃▃▃▂▂▂▂▂▂▁▁▁▁
val_accuracy,▁▄▄▄▅▅▅▅▆▆▆▇▇▇▇█████
val_loss,█▆▅▅▄▄▃▃▃▃▂▂▂▂▂▂▁▁▁▁

0,1
epoch,20.0
test_accuracy,0.7139
test_loss,8725.0265
train_accuracy,0.71865
train_loss,46457.40732
val_accuracy,0.71183
val_loss,5185.19371


[34m[1mwandb[0m: Agent Starting Run: r4plu1o4 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12305605786618445, max=1.…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▃▄▅▆▇▇▇██
train_loss,█▆▄▃▃▂▂▂▁▁
val_accuracy,▁▃▄▅▆▇▇▇██
val_loss,█▆▄▃▃▂▂▂▁▁

0,1
epoch,10.0
test_accuracy,0.5859
test_loss,11888.36376
train_accuracy,0.59272
train_loss,63412.71599
val_accuracy,0.5835
val_loss,7018.1102


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: nsy5d3nq with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.1196062922928201, max=1.0…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▃▃▃▃▃▃▃▃▃▃▃▁▂▂██████
train_loss,▆▄▃▂▁▁▁▁▁▁▁█▁▁▁▁▁▁▁▁
val_accuracy,███████████▇██▁▁▁▁▁▁
val_loss,█▆▄▃▂▂▂▁▁▁▁▇▁▁▁▁▁▁▁▁

0,1
epoch,20.0
test_accuracy,0.1
test_loss,23077.82438
train_accuracy,0.1003
train_loss,124616.32952
val_accuracy,0.09733
val_loss,13849.41408


[34m[1mwandb[0m: Agent Starting Run: xhd8kgzd with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.002 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.22626582278481014, max=1.…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▆▁▇██▂▆▆▆▆
train_loss,█▁▁▁▁▁▁▁▁▁
val_accuracy,▁█▁▁▁█▁▁▁▁
val_loss,█▁▁▁▁▁▁▁▁▁

0,1
epoch,10.0
test_accuracy,0.1
test_loss,23055.17344
train_accuracy,0.10033
train_loss,124493.16937
val_accuracy,0.09733
val_loss,13837.08259


[34m[1mwandb[0m: Agent Starting Run: 9ik5xnmc with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.11965887110954809, max=1.…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▁▁▁█
train_loss,██▆▂▁
val_accuracy,████▁
val_loss,██▆▂▁

0,1
epoch,5.0
test_accuracy,0.1
test_loss,23057.29948
train_accuracy,0.1003
train_loss,124504.34089
val_accuracy,0.09733
val_loss,13839.45544


[34m[1mwandb[0m: Agent Starting Run: x5ypmd3x with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.1230115690527838, max=1.0…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▄▆▇█
train_loss,█▅▃▂▁
val_accuracy,▁▄▆▇█
val_loss,█▄▃▂▁

0,1
epoch,5.0
test_accuracy,0.7613
test_loss,6556.34882
train_accuracy,0.77594
train_loss,33518.38112
val_accuracy,0.76383
val_loss,3907.01585


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: iqkrdy3a with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.011 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.9722173377879374, max=1.0…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▄▇██
train_loss,█▆▄▂▁
val_accuracy,▁▅███
val_loss,█▆▄▂▁

0,1
epoch,5.0
test_accuracy,0.3062
test_loss,19616.66669
train_accuracy,0.30957
train_loss,105749.45166
val_accuracy,0.30583
val_loss,11775.22711


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 1chglaz4 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.11972202674173117, max=1.…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▁▃▄▅▆▆▆▇▇▇▇▇▇▇██████
train_loss,█▅▄▃▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁
val_accuracy,▁▃▄▅▆▆▆▇▇▇▇▇▇▇██████
val_loss,█▅▄▃▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁

0,1
epoch,20.0
test_accuracy,0.7744
test_loss,6342.27517
train_accuracy,0.79181
train_loss,31675.09452
val_accuracy,0.7785
val_loss,3766.79507


[34m[1mwandb[0m: Agent Starting Run: seru62lr with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.1230115690527838, max=1.0…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▂▄▄▅▅▅▆██
train_loss,█████▇▆▃▂▁
val_accuracy,▁▂▄▄▅▅▅▆██
val_loss,█████▇▆▃▂▁

0,1
epoch,10.0
test_accuracy,0.439
test_loss,16301.08985
train_accuracy,0.43989
train_loss,87939.95899
val_accuracy,0.431
val_loss,9775.4939


[34m[1mwandb[0m: Agent Starting Run: k735xjq1 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.011 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.9719683655536028, max=1.0…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,██▆▂▁
train_loss,█▃▂▁▁
val_accuracy,▇█▅▁▁
val_loss,█▃▁▁▁

0,1
epoch,5.0
test_accuracy,0.0524
test_loss,23123.79246
train_accuracy,0.05044
train_loss,124831.81579
val_accuracy,0.05183
val_loss,13863.17383


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 44srx6uv with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.011 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.9727376659924369, max=1.0…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▅▇▇█
train_loss,█▅▄▂▁
val_accuracy,▁▄▆▇█
val_loss,█▅▄▂▁

0,1
epoch,5.0
test_accuracy,0.4797
test_loss,15861.3301
train_accuracy,0.48407
train_loss,85528.75679
val_accuracy,0.47967
val_loss,9510.60503


[34m[1mwandb[0m: Agent Starting Run: yzxhyodv with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12307831434255742, max=1.…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▆▇▆▄▁▆▄▅▄▂█▂█▇▂▅▃▃▆▃
train_loss,▂▁▂▂▃▂▅▂▄▄▂▄▂▃▄▅▇█▃█
val_accuracy,▆▇▆▄▁▆▃▅▄▂▇▂█▇▂▅▃▃▆▃
val_loss,▂▁▂▂▃▂▅▂▄▄▂▄▂▃▄▅▇█▃█

0,1
epoch,20.0
test_accuracy,0.0904
test_loss,303262.43628
train_accuracy,0.0885
train_loss,1635529.06116
val_accuracy,0.09333
val_loss,182811.06756


[34m[1mwandb[0m: Agent Starting Run: ew8o7bdl with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12009890498057224, max=1.…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
train_accuracy,▁▁▁▁▁
val_accuracy,▁▁▁▁▁

0,1
epoch,5.0
test_accuracy,0.1
test_loss,
train_accuracy,0.09981
train_loss,
val_accuracy,0.10167
val_loss,


[34m[1mwandb[0m: Agent Starting Run: k5ekc2y6 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.1195957820738137, max=1.0…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▃▇▁▆█
train_loss,█▄▂▁▁
val_accuracy,▂▇▁▆█
val_loss,█▃▂▁▁

0,1
epoch,5.0
test_accuracy,0.1255
test_loss,22823.83195
train_accuracy,0.12576
train_loss,123209.36135
val_accuracy,0.12967
val_loss,13685.62868


[34m[1mwandb[0m: Agent Starting Run: r4pywrmq with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12306718509811014, max=1.…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
train_accuracy,▁▁▁▁▁▁▁▁▁▁
val_accuracy,▁▁▁▁▁▁▁▁▁▁

0,1
epoch,10.0
test_accuracy,0.1
test_loss,
train_accuracy,0.09981
train_loss,
val_accuracy,0.10167
val_loss,


[34m[1mwandb[0m: Agent Starting Run: f2h8y3m7 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.11969044059449477, max=1.…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▁▂▄▅▅▆▆▆▇▇▇▇▇███████
train_loss,█▆▅▄▄▃▃▃▂▂▂▂▂▂▁▁▁▁▁▁
val_accuracy,▁▃▄▅▆▆▇▇▇▇██████████
val_loss,█▅▄▄▃▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁

0,1
epoch,20.0
test_accuracy,0.853
test_loss,4131.55105
train_accuracy,0.87093
train_loss,19436.88869
val_accuracy,0.85183
val_loss,2393.27927


[34m[1mwandb[0m: Agent Starting Run: 3tmxzh7p with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011288888888925108, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.11962731827371012, max=1.…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▆▇▇█
train_loss,█▃▂▁▁
val_accuracy,▁▆▇▇█
val_loss,█▃▂▁▁

0,1
epoch,5.0
test_accuracy,0.8023
test_loss,5384.69695
train_accuracy,0.81752
train_loss,27561.3429
val_accuracy,0.80883
val_loss,3162.59564


[34m[1mwandb[0m: Agent Starting Run: qv6oz4th with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

VBox(children=(Label(value='0.002 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.22661744022503516, max=1.…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,█▆▃▂▁
train_loss,▁▃▆▇█
val_accuracy,█▅▃▂▁
val_loss,▁▃▆▇█

0,1
epoch,5.0
test_accuracy,0.103
test_loss,123064.97169
train_accuracy,0.10315
train_loss,664420.95336
val_accuracy,0.10017
val_loss,74235.8632


[34m[1mwandb[0m: Agent Starting Run: 66r5esz6 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011288888888925108, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.11956043956043956, max=1.…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▅█▆▂▁
train_loss,▄▁▂█▆
val_accuracy,▅█▆▂▁
val_loss,▄▁▂█▆

0,1
epoch,5.0
test_accuracy,0.1127
test_loss,22733.204
train_accuracy,0.11306
train_loss,122887.43575
val_accuracy,0.10833
val_loss,13651.71176


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: lapn83to with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▆██▄▄▅▆▄▁▂▃▄▅▅▆▅▂▃▃▃
train_loss,▃▁▁▄▃▃▄▅█▆▅▄▃▃▃▃▄▅▄▅
val_accuracy,▆██▄▄▅▆▅▁▂▃▄▅▆▆▅▂▃▃▃
val_loss,▃▁▁▄▃▃▃▅█▆▅▄▃▃▃▃▄▅▄▅

0,1
epoch,20.0
test_accuracy,0.2641
test_loss,21336.51705
train_accuracy,0.26859
train_loss,115002.13356
val_accuracy,0.25733
val_loss,12863.27385


[34m[1mwandb[0m: Agent Starting Run: emsx6pga with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.11969044059449477, max=1.…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▄▆▇█
train_loss,█▃▂▁▁
val_accuracy,▁▅▆▇█
val_loss,█▃▂▁▁

0,1
epoch,5.0
test_accuracy,0.7911
test_loss,5780.1429
train_accuracy,0.80306
train_loss,29691.72179
val_accuracy,0.79817
val_loss,3433.78047


[34m[1mwandb[0m: Agent Starting Run: fwbii76g with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.11967991558213155, max=1.…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,██▇█▆▃▁▁▃▃
train_loss,▃▁▂▁▃▅█▆▅▄
val_accuracy,██▇█▆▃▁▁▃▃
val_loss,▃▁▂▁▃▅█▆▅▄

0,1
epoch,10.0
test_accuracy,0.2644
test_loss,18532.07195
train_accuracy,0.2643
train_loss,100220.88638
val_accuracy,0.26433
val_loss,11184.53118


[34m[1mwandb[0m: Agent Starting Run: vwv10akg with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

  return -y/y_hat


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12194247827255622, max=1.…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
train_accuracy,▁▁▁▁▁
val_accuracy,▁▁▁▁▁

0,1
epoch,5.0
test_accuracy,0.1
test_loss,
train_accuracy,0.09981
train_loss,
val_accuracy,0.10167
val_loss,


[34m[1mwandb[0m: Agent Starting Run: 215b7vaq with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▅▆▇█
train_loss,█▄▃▂▁
val_accuracy,▁▅▇▇█
val_loss,█▄▃▂▁

0,1
epoch,5.0
test_accuracy,0.8475
test_loss,4268.13113
train_accuracy,0.86452
train_loss,20598.42832
val_accuracy,0.85467
val_loss,2468.95896


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: hyinn5bo with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12285301030555053, max=1.…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▃▅▆▆▇▇▇██
train_loss,█▅▄▃▂▂▂▁▁▁
val_accuracy,▁▃▅▆▆▇▇▇██
val_loss,█▅▄▃▂▂▂▁▁▁

0,1
epoch,10.0
test_accuracy,0.685
test_loss,8623.36234
train_accuracy,0.68902
train_loss,45956.8366
val_accuracy,0.67533
val_loss,5310.37425


[34m[1mwandb[0m: Agent Starting Run: z5lqa8zb with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
train_accuracy,██████▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_loss,▁▁▁▃▄█
val_accuracy,██████▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_loss,▁▁▂▃▄█

0,1
epoch,20.0
test_accuracy,0.1
test_loss,
train_accuracy,0.09981
train_loss,
val_accuracy,0.10167
val_loss,


[34m[1mwandb[0m: Agent Starting Run: hrvxds9j with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.011 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.9722271049393566, max=1.0…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,█▂▂▁▂
train_loss,█▃▂▁▁
val_accuracy,█▃▂▁▂
val_loss,█▃▂▁▁

0,1
epoch,5.0
test_accuracy,0.0587
test_loss,23193.40356
train_accuracy,0.05922
train_loss,125170.73399
val_accuracy,0.06533
val_loss,13920.13873


[34m[1mwandb[0m: Agent Starting Run: p97cl3a0 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.002 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.22743269399964808, max=1.…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▅▆▇█
train_loss,█▄▃▂▁
val_accuracy,▁▅▆▇█
val_loss,█▄▃▂▁

0,1
epoch,5.0
test_accuracy,0.6989
test_loss,8517.30579
train_accuracy,0.71398
train_loss,43746.28911
val_accuracy,0.7105
val_loss,5001.71416


[34m[1mwandb[0m: Agent Starting Run: jwq5on9i with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.011 MB of 0.011 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
train_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,20.0
test_accuracy,0.1
test_loss,
train_accuracy,0.09981
train_loss,
val_accuracy,0.10167
val_loss,


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: avo84avi with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.1230115690527838, max=1.0…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▁▁▁▁▂▂▃▃▃▃▄▆▇▇▇▇████
train_loss,███████▇▆▅▅▄▄▃▃▂▂▂▁▁
val_accuracy,▁▁▁▁▂▂▃▃▃▃▄▆▇▇▇▇████
val_loss,███████▇▆▅▅▄▄▃▃▂▂▂▁▁

0,1
epoch,20.0
test_accuracy,0.5827
test_loss,11456.43019
train_accuracy,0.58885
train_loss,61439.61849
val_accuracy,0.5825
val_loss,6864.55122


[34m[1mwandb[0m: Agent Starting Run: 9aje2meb with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.11965887110954809, max=1.…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▂▁▂▂▄▅▇▇▇█
train_loss,██▇▇▅▄▃▃▂▁
val_accuracy,▂▁▂▂▄▅▇▇▇█
val_loss,██▇▇▅▄▃▃▂▁

0,1
epoch,10.0
test_accuracy,0.4658
test_loss,16532.15329
train_accuracy,0.46852
train_loss,89112.34487
val_accuracy,0.46783
val_loss,9921.26896


[34m[1mwandb[0m: Agent Starting Run: 9dbym0ps with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12300045187528243, max=1.…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
train_accuracy,▁▁▁▁▁▁▁▁▁▁
val_accuracy,▁▁▁▁▁▁▁▁▁▁

0,1
epoch,10.0
test_accuracy,0.1
test_loss,
train_accuracy,0.09981
train_loss,
val_accuracy,0.10167
val_loss,


[34m[1mwandb[0m: Agent Starting Run: 0dxloduh with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.003 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.253606615059817, max=1.0)…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▁▂▃▃▄▄▅▇█
train_loss,█▆▅▄▃▂▂▁▁▁
val_accuracy,▂▁▁▂▂▃▄▅▆█
val_loss,█▆▅▄▃▂▂▁▁▁

0,1
epoch,10.0
test_accuracy,0.1156
test_loss,25742.95262
train_accuracy,0.11719
train_loss,139030.48892
val_accuracy,0.1175
val_loss,15377.78621


[34m[1mwandb[0m: Agent Starting Run: wzw7f8dc with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12296711239609685, max=1.…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▁███
train_loss,█▅▅▂▁
val_accuracy,██▁▁▁
val_loss,█▅▅▂▁

0,1
epoch,5.0
test_accuracy,0.1
test_loss,74755.33177
train_accuracy,0.10061
train_loss,403084.38614
val_accuracy,0.0945
val_loss,45447.82546


[34m[1mwandb[0m: Agent Starting Run: yqzyryzs with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.003 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.2534752771423544, max=1.0…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▂▂█▁█
train_loss,█▃▂▁▁
val_accuracy,▇▇▁█▁
val_loss,█▃▂▁▁

0,1
epoch,5.0
test_accuracy,0.1
test_loss,24993.89727
train_accuracy,0.10061
train_loss,134944.56475
val_accuracy,0.0945
val_loss,15018.81895


[34m[1mwandb[0m: Agent Starting Run: orijpg98 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.011 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.9724835164835165, max=1.0…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▁▃▄▅▆▆▆▇▇▇▇▇▇▇██████
train_loss,█▆▄▄▃▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁
val_accuracy,▁▄▅▅▆▆▇▇▇▇▇▇████████
val_loss,█▅▄▃▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁

0,1
epoch,20.0
test_accuracy,0.8408
test_loss,4478.15617
train_accuracy,0.87178
train_loss,19329.50271
val_accuracy,0.84233
val_loss,2635.97334


[34m[1mwandb[0m: Agent Starting Run: ncf9rk4e with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

VBox(children=(Label(value='0.002 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.22672884040119656, max=1.…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▁▄▅▆▆▆▇▇▇▇▇▇████████
train_loss,█▅▄▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁
val_accuracy,▁▄▅▆▆▆▇▇▇▇▇▇████████
val_loss,█▅▄▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁

0,1
epoch,20.0
test_accuracy,0.7914
test_loss,5823.27383
train_accuracy,0.80157
train_loss,29955.56198
val_accuracy,0.7885
val_loss,3430.27992


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: brkjrdn5 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▁▁▆▄▂▅▃▆▃▁▄▁▁▅▄▃▃▅▅█
train_loss,█▆▃▄▇▄▅▄▄▃▂▃▄▃▂▁▁▂▁▁
val_accuracy,▁▁▆▅▃▅▃▇▃▂▄▂▂▅▄▃▃▅▅█
val_loss,█▆▃▄▇▄▅▄▄▃▂▃▄▃▂▁▁▂▁▁

0,1
epoch,20.0
test_accuracy,0.2529
test_loss,33686.34267
train_accuracy,0.26159
train_loss,179458.48635
val_accuracy,0.25533
val_loss,20008.42542


[34m[1mwandb[0m: Agent Starting Run: hdpjn17m with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12302268824007954, max=1.…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▁▅▂▅▅▆▄▂▂▁▂▂▂▃▅▇▇▆▇█
train_loss,█▆▅▅▄▄▄▅▆▅▅▅▅▄▃▂▂▂▂▁
val_accuracy,▁▅▂▄▅▆▅▂▂▁▂▂▃▃▆▇▇▆▆█
val_loss,█▆▅▅▄▄▄▅▆▅▅▅▅▄▃▂▂▂▂▁

0,1
epoch,20.0
test_accuracy,0.217
test_loss,47462.89291
train_accuracy,0.2087
train_loss,260433.09083
val_accuracy,0.206
val_loss,28973.04069


[34m[1mwandb[0m: Agent Starting Run: 17u0ikfg with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.011 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.9725810703928289, max=1.0…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▅▇██
train_loss,█▅▃▂▁
val_accuracy,▁▅▇██
val_loss,█▅▂▁▁

0,1
epoch,5.0
test_accuracy,0.8492
test_loss,4444.21879
train_accuracy,0.87033
train_loss,20609.3324
val_accuracy,0.85433
val_loss,2609.37327


[34m[1mwandb[0m: Agent Starting Run: gtcui50b with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▂▃▂▂▂▂▃▁▃▁▂▂▁▃▃▃▃█▇▇
train_loss,█▆▇▇▅▅▆▄▄▄▂▃▃▄▃▃▂▁▁▁
val_accuracy,▂▃▂▂▂▂▃▁▃▁▂▂▁▃▃▃▃█▇█
val_loss,█▆▇▇▅▅▆▄▄▄▂▃▃▄▃▃▂▁▁▁

0,1
epoch,20.0
test_accuracy,0.2839
test_loss,21365.86045
train_accuracy,0.28322
train_loss,115486.27067
val_accuracy,0.28917
val_loss,12830.66026


[34m[1mwandb[0m: Agent Starting Run: oc3atmeu with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.011 MB of 0.011 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▃▄▅▆▇▇▇██
train_loss,█▆▅▄▃▂▂▂▁▁
val_accuracy,▁▄▅▆▆▇▇███
val_loss,█▆▅▄▃▂▂▁▁▁

0,1
epoch,10.0
test_accuracy,0.8749
test_loss,3486.0892
train_accuracy,0.90209
train_loss,14284.60391
val_accuracy,0.87533
val_loss,2013.29873


[34m[1mwandb[0m: Agent Starting Run: 8k50oper with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12302268824007954, max=1.…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▄▅▆▆▇▇███
train_loss,█▅▄▃▃▂▂▁▁▁
val_accuracy,▁▄▅▆▇▇▇▇██
val_loss,█▅▄▃▂▂▂▁▁▁

0,1
epoch,10.0
test_accuracy,0.8389
test_loss,4640.46563
train_accuracy,0.86757
train_loss,20106.30985
val_accuracy,0.847
val_loss,2688.52972


[34m[1mwandb[0m: Agent Starting Run: 2hcvtat2 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.1198274344074661, max=1.0…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▂▄▄▅▆▆▇▇█
train_loss,█▇▇▅▄▃▃▂▁▁
val_accuracy,▁▂▄▄▅▆▆▇▇█
val_loss,█▇▇▅▄▃▃▂▁▁

0,1
epoch,10.0
test_accuracy,0.6075
test_loss,13132.32719
train_accuracy,0.61719
train_loss,70414.76092
val_accuracy,0.6185
val_loss,7854.51603


[34m[1mwandb[0m: Agent Starting Run: xrud0dtg with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12304493264623452, max=1.…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▃▄▆▇▇▇███
train_loss,█▅▄▃▃▂▂▁▁▁
val_accuracy,▁▃▅▆▇▇▇███
val_loss,█▅▄▃▂▂▂▁▁▁

0,1
epoch,10.0
test_accuracy,0.8471
test_loss,4378.91074
train_accuracy,0.86391
train_loss,21057.51816
val_accuracy,0.852
val_loss,2564.1163


[34m[1mwandb[0m: Agent Starting Run: d6y0ghr0 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12304493264623452, max=1.…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▃▅▆▆▇▇███
train_loss,█▅▄▃▂▂▂▁▁▁
val_accuracy,▁▄▅▆▇▇▇▇██
val_loss,█▅▃▃▂▂▂▁▁▁

0,1
epoch,10.0
test_accuracy,0.8326
test_loss,4737.3264
train_accuracy,0.84811
train_loss,23658.38178
val_accuracy,0.83683
val_loss,2800.55227


[34m[1mwandb[0m: Agent Starting Run: k4sx0djq with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.1196062922928201, max=1.0…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▅▇██
train_loss,█▅▃▂▁
val_accuracy,▁▅▇██
val_loss,█▅▃▂▁

0,1
epoch,5.0
test_accuracy,0.8153
test_loss,5559.15503
train_accuracy,0.83133
train_loss,27515.93198
val_accuracy,0.82617
val_loss,3260.89313


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: jqq5sssn with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12308944560007236, max=1.…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▅▆▆▇▇▇███
train_loss,█▅▃▂▂▁▁▁▁▁
val_accuracy,▁▅▆▇▇▇████
val_loss,█▅▃▂▂▁▁▁▁▁

0,1
epoch,10.0
test_accuracy,0.7323
test_loss,7315.66542
train_accuracy,0.73528
train_loss,38718.5654
val_accuracy,0.72533
val_loss,4396.20331


[34m[1mwandb[0m: Agent Starting Run: 9mi7lgkz with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011277777777286247, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.1230115690527838, max=1.0…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▄▅▆▇▇▇███
train_loss,█▅▄▃▂▂▂▁▁▁
val_accuracy,▁▄▅▆▇▇▇███
val_loss,█▅▄▃▂▂▂▁▁▁

0,1
epoch,10.0
test_accuracy,0.8022
test_loss,5545.68154
train_accuracy,0.81576
train_loss,27544.58664
val_accuracy,0.803
val_loss,3263.11228


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 48ucfl2m with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
train_accuracy,▁▁▁▁▁▁▁▁▁▁
val_accuracy,▁▁▁▁▁▁▁▁▁▁

0,1
epoch,10.0
test_accuracy,0.1
test_loss,
train_accuracy,0.09981
train_loss,
val_accuracy,0.10167
val_loss,


[34m[1mwandb[0m: Agent Starting Run: h4qupev3 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▅▇██
train_loss,█▄▂▂▁
val_accuracy,▁▅▇██
val_loss,█▄▂▂▁

0,1
epoch,5.0
test_accuracy,0.685
test_loss,8509.02146
train_accuracy,0.69243
train_loss,44453.48536
val_accuracy,0.68633
val_loss,5075.11568


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: g0powv81 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.11969044059449477, max=1.…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▁▄▅▆▆▇▇▇▇▇▇▇████████
train_loss,█▅▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁
val_accuracy,▁▄▆▆▆▇▇▇▇▇▇▇▇███████
val_loss,█▅▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁

0,1
epoch,20.0
test_accuracy,0.8173
test_loss,5228.12038
train_accuracy,0.83148
train_loss,26215.38978
val_accuracy,0.82217
val_loss,3073.29962


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: z0ko5vme with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
train_accuracy,█████▁▁▁▁▁
train_loss,▂▁▃▄█
val_accuracy,█████▁▁▁▁▁
val_loss,▁▁▃▄█

0,1
epoch,10.0
test_accuracy,0.1
test_loss,
train_accuracy,0.09981
train_loss,
val_accuracy,0.10167
val_loss,


[34m[1mwandb[0m: Agent Starting Run: 837vykgd with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▅▇▇█
train_loss,█▅▃▂▁
val_accuracy,▁▅▇▇█
val_loss,█▅▃▂▁

0,1
epoch,5.0
test_accuracy,0.5703
test_loss,14289.69047
train_accuracy,0.57698
train_loss,76931.8891
val_accuracy,0.56617
val_loss,8576.31676


[34m[1mwandb[0m: Agent Starting Run: de60s01s with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12294489611562782, max=1.…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▁▃▄▄▄▅▆▆▆▆▇▇████████
train_loss,██▆▅▄▃▃▃▃▂▂▂▂▂▁▁▁▁▁▁
val_accuracy,▁▃▄▄▅▅▆▆▆▆▇▇████████
val_loss,██▆▄▄▃▃▃▃▂▂▂▂▂▁▁▁▁▁▁

0,1
epoch,20.0
test_accuracy,0.8063
test_loss,5933.651
train_accuracy,0.81765
train_loss,30266.07247
val_accuracy,0.80283
val_loss,3490.04692


[34m[1mwandb[0m: Agent Starting Run: 1ddq45pq with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011288888888925108, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▁▁▁▁
train_loss,█▆▅▃▁
val_accuracy,▁▁▁▁▁
val_loss,█▆▅▃▁

0,1
epoch,5.0
test_accuracy,0.1
test_loss,23043.59711
train_accuracy,0.1003
train_loss,124432.38673
val_accuracy,0.09733
val_loss,13829.22073


[34m[1mwandb[0m: Agent Starting Run: 0w6rxl06 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▃▅▆█
test_accuracy,▁
train_accuracy,▁▁▁▁▁
val_accuracy,▁▁▁▁▁

0,1
epoch,5.0
test_accuracy,0.1
test_loss,
train_accuracy,0.09981
train_loss,
val_accuracy,0.10167
val_loss,


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: wbe73mbb with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▃▅▆█
test_accuracy,▁
train_accuracy,▁▁▁▁▁
val_accuracy,▁▁▁▁▁

0,1
epoch,5.0
test_accuracy,0.1
test_loss,
train_accuracy,0.09981
train_loss,
val_accuracy,0.10167
val_loss,


[34m[1mwandb[0m: Agent Starting Run: al5vffgc with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.1196062922928201, max=1.0…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▁▃▄▄▅▅▆▆▆▆▇▇▇▇▇█████
train_loss,█▆▅▄▄▄▃▃▃▃▂▂▂▂▂▂▁▁▁▁
val_accuracy,▁▂▄▅▅▆▆▆▇▇▇▇▇▇██████
val_loss,█▆▅▄▄▃▃▃▂▂▂▂▂▂▂▁▁▁▁▁

0,1
epoch,20.0
test_accuracy,0.8013
test_loss,5475.24268
train_accuracy,0.83131
train_loss,25022.04953
val_accuracy,0.80617
val_loss,3271.0995


[34m[1mwandb[0m: Agent Starting Run: ugstcdvf with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.009 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.8653541923009316, max=1.0…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▁▃▄▅▆▆▆▇▇▇▇▇▇███████
train_loss,█▆▅▄▃▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁
val_accuracy,▁▃▄▅▆▆▆▇▇▇▇▇████████
val_loss,█▆▅▄▃▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁

0,1
epoch,20.0
test_accuracy,0.7624
test_loss,6604.81226
train_accuracy,0.77426
train_loss,34793.30285
val_accuracy,0.76167
val_loss,3946.45268


[34m[1mwandb[0m: Agent Starting Run: 7cmyo42g with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011277777777932999, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▁▁▁▁▁▁▁▁▁▁▂▂▃▄▄▅▆▇██
train_loss,█████████████▇▄▃▃▂▁▁
val_accuracy,▁▁▁▁▁▁▁▁▁▁▂▂▄▄▄▅▆▇██
val_loss,█████████████▇▄▃▃▂▁▁

0,1
epoch,20.0
test_accuracy,0.5453
test_loss,12962.9693
train_accuracy,0.54846
train_loss,69487.98415
val_accuracy,0.53767
val_loss,7785.18114


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: ghsroz6z with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.002 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.22706885937912233, max=1.…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▄▅▆▇▇▇███
train_loss,█▄▃▃▂▂▂▁▁▁
val_accuracy,▁▄▆▇▇▇████
val_loss,█▄▃▂▂▂▁▁▁▁

0,1
epoch,10.0
test_accuracy,0.8314
test_loss,4687.82315
train_accuracy,0.84763
train_loss,23127.05949
val_accuracy,0.8345
val_loss,2750.1618


[34m[1mwandb[0m: Agent Starting Run: kqk9dftq with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12310057887120116, max=1.…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▁▄▅▆▆▆▇▇▇▇▇▇████████
train_loss,█▄▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁
val_accuracy,▁▄▅▆▆▆▇▇▇▇▇█████████
val_loss,█▄▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁

0,1
epoch,20.0
test_accuracy,0.8271
test_loss,4887.7936
train_accuracy,0.8415
train_loss,24219.10371
val_accuracy,0.83183
val_loss,2864.78305


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: l909erk9 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12004589990290405, max=1.…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
train_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,20.0
test_accuracy,0.1
test_loss,
train_accuracy,0.09981
train_loss,
val_accuracy,0.10167
val_loss,


[34m[1mwandb[0m: Agent Starting Run: kc9cuw1t with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12297822354748351, max=1.…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▁▂▃▃▄▅▅▅▅▆▇▇▇▇▆▇▇▇██
train_loss,██▇▇▆▅▅▅▅▄▄▃▃▂▃▂▂▂▁▁
val_accuracy,▁▂▃▃▄▅▅▅▅▆▇▇▇▇▆▆▇▇██
val_loss,██▇▇▆▆▅▅▅▄▄▃▃▂▃▂▂▂▁▁

0,1
epoch,20.0
test_accuracy,0.6477
test_loss,11451.95812
train_accuracy,0.6513
train_loss,61339.79519
val_accuracy,0.649
val_loss,6843.24722


[34m[1mwandb[0m: Agent Starting Run: n66b3bwg with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.1230338094377147, max=1.0…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▄▇▇█
train_loss,█▆▄▃▁
val_accuracy,▁▄▇▇█
val_loss,█▆▄▃▁

0,1
epoch,5.0
test_accuracy,0.5178
test_loss,17864.88218
train_accuracy,0.52783
train_loss,96194.75841
val_accuracy,0.51317
val_loss,10688.17184


[34m[1mwandb[0m: Agent Starting Run: mblkure1 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▄▅▇▇▇████
train_loss,█▅▃▂▂▂▁▁▁▁
val_accuracy,▁▄▆▇▇█████
val_loss,█▅▃▂▂▁▁▁▁▁

0,1
epoch,10.0
test_accuracy,0.7477
test_loss,7113.03561
train_accuracy,0.75589
train_loss,37182.68235
val_accuracy,0.7525
val_loss,4191.63591


[34m[1mwandb[0m: Agent Starting Run: igkhstj8 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12012356575463372, max=1.…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
train_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,20.0
test_accuracy,0.1
test_loss,
train_accuracy,0.09981
train_loss,
val_accuracy,0.10167
val_loss,


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: wpoejnd4 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.1198274344074661, max=1.0…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▆▆▇█
train_loss,█▃▃▂▁
val_accuracy,▁▅▆▇█
val_loss,█▂▂▁▁

0,1
epoch,5.0
test_accuracy,0.865
test_loss,4002.52524
train_accuracy,0.88713
train_loss,16577.13131
val_accuracy,0.865
val_loss,2274.53756


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: xfqrv9vj with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.011 MB of 0.011 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▄▆▇▇▇▇███
train_loss,█▆▅▄▃▃▂▂▁▁
val_accuracy,▁▄▇▇▇▇▇███
val_loss,█▆▅▄▃▃▂▂▁▁

0,1
epoch,10.0
test_accuracy,0.5804
test_loss,11835.17014
train_accuracy,0.58576
train_loss,63486.45539
val_accuracy,0.58033
val_loss,7082.08472


[34m[1mwandb[0m: Agent Starting Run: yt66cii1 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.11964835164835165, max=1.…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▄▆▇█
train_loss,█▄▃▂▁
val_accuracy,▁▄▆▇█
val_loss,█▄▃▂▁

0,1
epoch,5.0
test_accuracy,0.7624
test_loss,6733.40719
train_accuracy,0.76935
train_loss,35602.93091
val_accuracy,0.75867
val_loss,4042.16013


[34m[1mwandb[0m: Agent Starting Run: crdgffdg with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.1230338094377147, max=1.0…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▁▁▂▃▄▅▅▅▅▅▆▆▅▆▇▇▇▇██
train_loss,██▇▇▇▆▆▅▅▅▄▄▃▃▃▂▂▂▁▁
val_accuracy,▁▁▂▃▄▅▅▅▅▅▆▆▆▆▇▇▇▇██
val_loss,█▇▇▇▇▆▆▅▅▅▄▄▃▃▂▂▂▁▁▁

0,1
epoch,20.0
test_accuracy,0.5695
test_loss,14506.06022
train_accuracy,0.57528
train_loss,77979.78833
val_accuracy,0.56767
val_loss,8707.98472


[34m[1mwandb[0m: Agent Starting Run: afzeqm3y with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.002 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.227437141596824, max=1.0)…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
train_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,20.0
test_accuracy,0.1
test_loss,
train_accuracy,0.09981
train_loss,
val_accuracy,0.10167
val_loss,


[34m[1mwandb[0m: Agent Starting Run: um84r6yk with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12292118582791034, max=1.…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▅▆▇█
train_loss,█▄▃▂▁
val_accuracy,▁▅▇▇█
val_loss,█▄▃▂▁

0,1
epoch,5.0
test_accuracy,0.5773
test_loss,12542.15173
train_accuracy,0.5808
train_loss,67321.2893
val_accuracy,0.58017
val_loss,7588.68646


[34m[1mwandb[0m: Agent Starting Run: f60wg9ea with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.011 MB of 0.011 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▃▅▆▆▇▇▇██
train_loss,█▅▄▃▂▂▂▁▁▁
val_accuracy,▁▃▅▆▆▇▇▇██
val_loss,█▅▄▃▂▂▂▁▁▁

0,1
epoch,10.0
test_accuracy,0.5369
test_loss,14957.13002
train_accuracy,0.54881
train_loss,79890.54076
val_accuracy,0.537
val_loss,8937.27769


[34m[1mwandb[0m: Agent Starting Run: alxzvpk1 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.11965511173675876, max=1.…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▅▆▇█
train_loss,█▄▃▂▁
val_accuracy,▁▅▆▇█
val_loss,█▄▃▂▁

0,1
epoch,5.0
test_accuracy,0.7094
test_loss,8255.0789
train_accuracy,0.71615
train_loss,43208.87687
val_accuracy,0.7045
val_loss,4996.84231


[34m[1mwandb[0m: Agent Starting Run: g1hcfhmi with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.11972202674173117, max=1.…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▄▁▄▅█▃▆▂▃▄
train_loss,█▆▃▁▁▁▁▁▁▁
val_accuracy,▄▁▅▅█▄▆▂▃▅
val_loss,█▆▃▁▁▁▁▁▁▁

0,1
epoch,10.0
test_accuracy,0.1344
test_loss,22722.43126
train_accuracy,0.13189
train_loss,122424.78844
val_accuracy,0.131
val_loss,13662.93565


[34m[1mwandb[0m: Agent Starting Run: flzucgxg with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011288888888925108, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12296564195298372, max=1.…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▆▇▇█
train_loss,█▄▂▂▁
val_accuracy,▁▆▇▇█
val_loss,█▄▂▂▁

0,1
epoch,5.0
test_accuracy,0.7334
test_loss,7697.427
train_accuracy,0.74113
train_loss,40813.13248
val_accuracy,0.7385
val_loss,4595.32174


[34m[1mwandb[0m: Agent Starting Run: aqy5y7z7 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.11975362956445226, max=1.…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▄▆▇█
train_loss,█▄▃▂▁
val_accuracy,▁▄▆▇█
val_loss,█▄▃▂▁

0,1
epoch,5.0
test_accuracy,0.8232
test_loss,4848.23495
train_accuracy,0.83911
train_loss,24417.69936
val_accuracy,0.831
val_loss,2851.02561


[34m[1mwandb[0m: Agent Starting Run: 2irasin5 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12294340987163262, max=1.…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
train_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,20.0
test_accuracy,0.1
test_loss,
train_accuracy,0.09981
train_loss,
val_accuracy,0.10167
val_loss,


[34m[1mwandb[0m: Agent Starting Run: 6pizb2j5 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.003 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.25354095187824405, max=1.…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,█▁▁▁▁
train_loss,▁██▅▅
val_accuracy,█▁▁▁▁
val_loss,▁█▇▅▅

0,1
epoch,5.0
test_accuracy,0.1
test_loss,23132.82908
train_accuracy,0.10026
train_loss,124904.08296
val_accuracy,0.09767
val_loss,13892.9164


[34m[1mwandb[0m: Agent Starting Run: ymqw45gc with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.1197009674582234, max=1.0…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▄▆▇█
train_loss,█▅▃▂▁
val_accuracy,▁▄▆▇█
val_loss,█▅▃▂▁

0,1
epoch,5.0
test_accuracy,0.4138
test_loss,18418.77691
train_accuracy,0.41876
train_loss,99259.33382
val_accuracy,0.40683
val_loss,11067.34728


[34m[1mwandb[0m: Agent Starting Run: oegbipkt with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.011 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.976182074805928, max=1.0)…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
train_accuracy,▁▁▁▁▁▁▁▁▁▁
val_accuracy,▁▁▁▁▁▁▁▁▁▁

0,1
epoch,10.0
test_accuracy,0.1
test_loss,
train_accuracy,0.09981
train_loss,
val_accuracy,0.10167
val_loss,


[34m[1mwandb[0m: Agent Starting Run: srjyltsi with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.011 MB of 0.011 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▃▄▅▆▆▆▇██
train_loss,█▆▅▄▄▃▃▂▂▁
val_accuracy,▁▃▄▅▆▆▆▇▇█
val_loss,█▆▅▄▄▃▃▂▂▁

0,1
epoch,10.0
test_accuracy,0.3593
test_loss,18611.18619
train_accuracy,0.36976
train_loss,99814.73073
val_accuracy,0.37333
val_loss,11092.36351


[34m[1mwandb[0m: Agent Starting Run: 9gpk6rkz with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.002 MB uploaded\r'), FloatProgress(value=0.5273149941882991, max=1.0…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▁███████████████████
train_loss,█▄▄▄▄▄▃▃▃▃▃▂▂▂▂▂▂▁▁▁
val_accuracy,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_loss,█▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▂▁▁▁

0,1
epoch,20.0
test_accuracy,0.1
test_loss,23022.49554
train_accuracy,0.1003
train_loss,124320.46349
val_accuracy,0.09733
val_loss,13814.06932


[34m[1mwandb[0m: Agent Starting Run: x0nat0g2 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.1195957820738137, max=1.0…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▁▁▃▅▅▆▇▇▇▇██████████
train_loss,█▇▆▅▅▄▄▄▃▃▃▂▂▂▂▂▁▁▁▁
val_accuracy,▁▁▃▄▅▆▇▇▇███████████
val_loss,█▇▆▅▅▄▄▄▃▃▃▂▂▂▂▂▁▁▁▁

0,1
epoch,20.0
test_accuracy,0.55
test_loss,13110.12429
train_accuracy,0.55835
train_loss,70401.8449
val_accuracy,0.55467
val_loss,7853.13673


[34m[1mwandb[0m: Agent Starting Run: dc1j6a8z with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.002 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.22671353251318102, max=1.…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_loss,█▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,20.0
test_accuracy,0.1
test_loss,23026.23564
train_accuracy,0.1003
train_loss,124340.73136
val_accuracy,0.09733
val_loss,13816.54772


[34m[1mwandb[0m: Agent Starting Run: xsj29ts8 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011288888888925108, max=1.0…

[34m[1mwandb[0m: Network error resolved after 0:00:11.754580, resuming normal operation.


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▁▅▆▆▇▇▇▇████████████
train_loss,█▅▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁
val_accuracy,▁▅▆▇▇▇▇▇▇███████████
val_loss,█▅▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,20.0
test_accuracy,0.8283
test_loss,4812.6617
train_accuracy,0.84341
train_loss,23680.42127
val_accuracy,0.83417
val_loss,2784.84092


[34m[1mwandb[0m: Agent Starting Run: nox4o6dt with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12299900515510537, max=1.…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▁▂▃▂▃▄▄▅▅▅▆▆▇▇▇█████
train_loss,███▇▇▇▆▆▅▅▄▄▃▃▃▂▂▂▁▁
val_accuracy,▁▂▃▃▃▄▄▅▅▅▆▆▇▇▇█████
val_loss,███▇▇▆▆▆▅▅▄▄▃▃▃▂▂▂▁▁

0,1
epoch,20.0
test_accuracy,0.6337
test_loss,13354.29436
train_accuracy,0.64333
train_loss,71728.21606
val_accuracy,0.63533
val_loss,8012.16125


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: a6wwpmtr with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.002 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.22650475184794086, max=1.…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▁▁▁▁▁▁▁▁▁▂▃▄▅▆█▆▄▄▄▄
train_loss,█▆▅▄▄▃▃▃▂▂▂▂▁▁▁▁▁▁▁▁
val_accuracy,▁▁▁▁▁▁▁▁▁▂▃▄▅▇█▆▄▄▄▄
val_loss,█▆▅▄▄▃▃▃▂▂▂▂▁▁▁▁▁▁▁▁

0,1
epoch,20.0
test_accuracy,0.1737
test_loss,21290.35602
train_accuracy,0.18206
train_loss,114788.64588
val_accuracy,0.1855
val_loss,12761.585


[34m[1mwandb[0m: Agent Starting Run: 2863z16e with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.11967991558213155, max=1.…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▄▁█▄▄
train_loss,▃▁█▁▁
val_accuracy,▄▁█▄▄
val_loss,▃▁█▁▂

0,1
epoch,5.0
test_accuracy,0.1
test_loss,23083.11245
train_accuracy,0.10026
train_loss,124642.68741
val_accuracy,0.09767
val_loss,13859.94472


[34m[1mwandb[0m: Agent Starting Run: c4ofrcd6 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.002 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.22665728855283981, max=1.…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▄▆▇█
train_loss,█▅▃▂▁
val_accuracy,▁▄▆▇█
val_loss,█▅▃▂▁

0,1
epoch,5.0
test_accuracy,0.704
test_loss,10931.94859
train_accuracy,0.70459
train_loss,58537.32115
val_accuracy,0.70133
val_loss,6529.24642


[34m[1mwandb[0m: Agent Starting Run: mr7i2yts with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.11946591707659873, max=1.…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▄▅▆▇▇▇███
train_loss,█▅▄▃▂▂▂▁▁▁
val_accuracy,▁▄▅▆▇▇▇███
val_loss,█▅▄▃▂▂▂▁▁▁

0,1
epoch,10.0
test_accuracy,0.8457
test_loss,4279.4175
train_accuracy,0.86461
train_loss,20707.68332
val_accuracy,0.85317
val_loss,2483.6363


[34m[1mwandb[0m: Agent Starting Run: rz28908m with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.009 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.8651260873385467, max=1.0…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▆▆█▅▁
train_loss,▆█▂▁▁
val_accuracy,▆▆█▅▁
val_loss,▆█▂▁▁

0,1
epoch,5.0
test_accuracy,0.0761
test_loss,239094.73301
train_accuracy,0.07609
train_loss,1285005.98507
val_accuracy,0.07717
val_loss,143140.21244


[34m[1mwandb[0m: Agent Starting Run: bwzkb8hp with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.1228197017623136, max=1.0…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▄▅▆▆▇▇▇██
train_loss,█▆▄▃▃▂▂▂▁▁
val_accuracy,▁▄▅▆▆▇▇▇██
val_loss,█▅▄▃▂▂▂▁▁▁

0,1
epoch,10.0
test_accuracy,0.8614
test_loss,3864.77248
train_accuracy,0.88276
train_loss,17451.69782
val_accuracy,0.8685
val_loss,2227.03235


[34m[1mwandb[0m: Agent Starting Run: dhgnpbj2 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

[34m[1mwandb[0m: Network error (ConnectionError), entering retry loop.


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▃▄▅▆▇▇███
train_loss,█▇▅▄▃▃▂▂▁▁
val_accuracy,▁▃▄▅▆▆▇███
val_loss,█▇▅▄▄▃▂▂▁▁

0,1
epoch,10.0
test_accuracy,0.6892
test_loss,9653.62066
train_accuracy,0.69539
train_loss,51457.008
val_accuracy,0.68633
val_loss,5787.95209


[34m[1mwandb[0m: Agent Starting Run: lgo0obfr with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.011 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.9724956063268893, max=1.0…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▂▃▄▅▆▆▇██
train_loss,█▅▄▃▃▂▂▂▁▁
val_accuracy,▁▂▄▄▅▆▇▇██
val_loss,█▅▄▃▃▂▂▂▁▁

0,1
epoch,10.0
test_accuracy,0.4173
test_loss,16741.49211
train_accuracy,0.41907
train_loss,90123.47242
val_accuracy,0.42633
val_loss,9949.75668


[34m[1mwandb[0m: Agent Starting Run: vxk9eyqt with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.002 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.2265096246813747, max=1.0…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁█████████
train_loss,█▅▁▁▁▁▁▁▁▁
val_accuracy,▁▇████████
val_loss,█▅▁▁▁▁▁▁▁▁

0,1
epoch,10.0
test_accuracy,0.1
test_loss,23045.84922
train_accuracy,0.09965
train_loss,124413.97135
val_accuracy,0.10333
val_loss,13826.20025


[34m[1mwandb[0m: Agent Starting Run: b4w9hz9u with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12308944560007236, max=1.…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▇▆▂▃▁█▆▅█▆
train_loss,▅▄▅▇█▂▁▃▂▁
val_accuracy,▆▆▂▃▁█▆▅▇▆
val_loss,▅▄▅▇█▂▁▃▂▁

0,1
epoch,10.0
test_accuracy,0.1121
test_loss,71064.61737
train_accuracy,0.11487
train_loss,385664.80619
val_accuracy,0.1065
val_loss,42927.29386


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: n3omfati with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12292118582791034, max=1.…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▁▁▁▁▁▁▁▁▁
train_loss,█▇▇▆▅▄▄▃▂▁
val_accuracy,▁▁▁▁▁▁▁▁▁▁
val_loss,█▇▇▆▅▄▄▃▂▁

0,1
epoch,10.0
test_accuracy,0.1
test_loss,23053.08727
train_accuracy,0.1003
train_loss,124481.33312
val_accuracy,0.09733
val_loss,13837.2379


[34m[1mwandb[0m: Agent Starting Run: p3e6e28a with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.009 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.8665727864812534, max=1.0…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▁▂▅▆▆▆▇▇▇▇▇▇▇███████
train_loss,█▆▄▃▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁
val_accuracy,▁▂▅▆▆▆▇▇▇▇▇▇████████
val_loss,█▅▄▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁

0,1
epoch,20.0
test_accuracy,0.84
test_loss,4695.97302
train_accuracy,0.85563
train_loss,22792.01083
val_accuracy,0.848
val_loss,2733.41638


[34m[1mwandb[0m: Agent Starting Run: jdvanhj8 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12007057785619762, max=1.…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
train_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,20.0
test_accuracy,0.1
test_loss,
train_accuracy,0.09981
train_loss,
val_accuracy,0.10167
val_loss,


[34m[1mwandb[0m: Agent Starting Run: 34xnoi4h with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.1230338094377147, max=1.0…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▁▅▆▆▇▇▇▇▇███████████
train_loss,█▄▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁
val_accuracy,▁▅▆▆▇▇▇▇▇███████████
val_loss,█▄▃▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,20.0
test_accuracy,0.8329
test_loss,4777.20136
train_accuracy,0.84876
train_loss,22940.31493
val_accuracy,0.83383
val_loss,2790.91405


[34m[1mwandb[0m: Agent Starting Run: nsmnml2a with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12305605786618445, max=1.…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▅█▆▇▃▁▂▅▄▃▅▄▁▅▃▂▄▄▆▂
train_loss,▃▁▃▂▄▇▅▃▃▄▃▄▅▄█▆▄▃▂▅
val_accuracy,▅█▆▇▃▁▂▅▄▃▅▄▁▅▃▂▄▄▆▂
val_loss,▃▁▃▂▄▇▅▃▃▄▃▄▅▄█▆▄▃▂▅

0,1
epoch,20.0
test_accuracy,0.2449
test_loss,19300.23456
train_accuracy,0.24617
train_loss,104073.86159
val_accuracy,0.24183
val_loss,11604.31476


[34m[1mwandb[0m: Agent Starting Run: ce88r45w with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.002 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.22646825746235802, max=1.…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▄▁▅█▁
train_loss,▇█▄▁▅
val_accuracy,▄▁▆█▁
val_loss,██▄▁▅

0,1
epoch,5.0
test_accuracy,0.0875
test_loss,109152.8951
train_accuracy,0.08794
train_loss,587329.57809
val_accuracy,0.081
val_loss,66098.10874


[34m[1mwandb[0m: Agent Starting Run: mb1hdvtj with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.1197325591624879, max=1.0…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▃▆▇█
train_loss,█▇▅▃▁
val_accuracy,▁▃▆▇█
val_loss,█▇▅▃▁

0,1
epoch,5.0
test_accuracy,0.1972
test_loss,22830.66882
train_accuracy,0.19633
train_loss,123285.93383
val_accuracy,0.1945
val_loss,13699.80755


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 08pmnevf with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011277777777932999, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12297822354748351, max=1.…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▄▅▆▆▇▇███
train_loss,█▅▄▃▂▂▂▁▁▁
val_accuracy,▁▄▅▆▆▇▇███
val_loss,█▅▄▃▂▂▂▁▁▁

0,1
epoch,10.0
test_accuracy,0.6814
test_loss,9632.66219
train_accuracy,0.68643
train_loss,50771.8516
val_accuracy,0.67983
val_loss,5768.8261


[34m[1mwandb[0m: Agent Starting Run: mbjbqqz5 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.002 MB uploaded\r'), FloatProgress(value=0.5283385093167702, max=1.0…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▅▆▇█
train_loss,█▄▂▂▁
val_accuracy,▁▅▇▇█
val_loss,█▄▂▂▁

0,1
epoch,5.0
test_accuracy,0.7991
test_loss,5516.17523
train_accuracy,0.812
train_loss,28345.10711
val_accuracy,0.80167
val_loss,3257.65558


[34m[1mwandb[0m: Agent Starting Run: 4sdyu2i3 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12286566085463908, max=1.…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▄▆▇█
train_loss,█▄▃▂▁
val_accuracy,▁▅▆▇█
val_loss,█▄▃▂▁

0,1
epoch,5.0
test_accuracy,0.6538
test_loss,9633.2762
train_accuracy,0.66546
train_loss,49882.24812
val_accuracy,0.64483
val_loss,5857.14468


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 87u44i2n with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▃▅▆█
test_accuracy,▁
train_accuracy,▁▁▁▁▁
val_accuracy,▁▁▁▁▁

0,1
epoch,5.0
test_accuracy,0.1
test_loss,
train_accuracy,0.09981
train_loss,
val_accuracy,0.10167
val_loss,


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 5en166lk with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12308944560007236, max=1.…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,█▁██████████████████
train_loss,▅▅▆▆▅█▃▂▂▁▁▁▁▁▁▁▁▁▁▁
val_accuracy,▇▁▇▇████████████▇███
val_loss,▅▅▆▆▅█▃▂▂▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,20.0
test_accuracy,0.1
test_loss,23018.25878
train_accuracy,0.10031
train_loss,124296.68177
val_accuracy,0.09733
val_loss,13811.27984


[34m[1mwandb[0m: Agent Starting Run: row1zoyb with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

[34m[1mwandb[0m: Network error (ReadTimeout), entering retry loop.
[34m[1mwandb[0m: [32m[41mERROR[0m Run row1zoyb errored:
[34m[1mwandb[0m: [32m[41mERROR[0m Traceback (most recent call last):
[34m[1mwandb[0m: [32m[41mERROR[0m   File "E:\Anaconda\lib\site-packages\wandb\agents\pyagent.py", line 308, in _run_job
[34m[1mwandb[0m: [32m[41mERROR[0m     self._function()
[34m[1mwandb[0m: [32m[41mERROR[0m   File "<ipython-input-9-1355c4795445>", line 7, in wandb_sweep
[34m[1mwandb[0m: [32m[41mERROR[0m     run = wandb.init()
[34m[1mwandb[0m: [32m[41mERROR[0m   File "E:\Anaconda\lib\site-packages\wandb\sdk\wandb_init.py", line 1195, in init
[34m[1mwandb[0m: [32m[41mERROR[0m     raise e
[34m[1mwandb[0m: [32m[41mERROR[0m   File "E:\Anaconda\lib\site-packages\wandb\sdk\wandb_init.py", line 1176, in init
[34m[1mwandb[0m: [32m[41mERROR[0m     run = wi.init()
[34m[1mwandb[0m: [32m[41mERROR[0m   File "E:\Anaconda\lib\site-packages\wan

Problem at: <ipython-input-9-1355c4795445> 7 wandb_sweep


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 0vhqpday with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011277777777932999, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▅█▆▃▁
train_loss,▅▁▂▆█
val_accuracy,▄▃█▅▁
val_loss,▄▁▃██

0,1
epoch,5.0
test_accuracy,0.0807
test_loss,126462.90749
train_accuracy,0.07835
train_loss,691124.14508
val_accuracy,0.079
val_loss,76610.59331


[34m[1mwandb[0m: Agent Starting Run: h55o1sxp with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.1196867024553375, max=1.0…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▂▃▁▂▅▇▇███
train_loss,█▆▅▄▃▃▂▂▁▁
val_accuracy,▂▃▁▂▅▇▇███
val_loss,█▆▅▄▃▃▂▂▁▁

0,1
epoch,10.0
test_accuracy,0.4974
test_loss,17913.04181
train_accuracy,0.50209
train_loss,96480.9697
val_accuracy,0.494
val_loss,10755.67687


[34m[1mwandb[0m: Agent Starting Run: hwq5nxvz with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.11958527370178368, max=1.…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▁▃▄▅▅▆▆▇▆▇▇▇▇███████
train_loss,█▅▄▃▂▂▂▂▂▂▂▁▂▁▂▂▂▂▁▁
val_accuracy,▁▃▄▅▆▆▇▇▇▇▆▇▇▇████▇█
val_loss,█▅▄▃▂▁▃▃▄▅▄▃▄▃▂▄▃▅▅▄

0,1
epoch,20.0
test_accuracy,0.8534
test_loss,5863.78695
train_accuracy,0.88111
train_loss,22570.80693
val_accuracy,0.85683
val_loss,3301.58908


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 6j2j6msr with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.1230338094377147, max=1.0…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▁▂▄▅▆▆▇▇▇▇▇▇████████
train_loss,█▆▅▄▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁
val_accuracy,▁▂▄▅▆▇▇▇▇▇▇█████████
val_loss,█▆▅▄▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁

0,1
epoch,20.0
test_accuracy,0.7904
test_loss,5896.45448
train_accuracy,0.79957
train_loss,30516.72236
val_accuracy,0.78467
val_loss,3497.70407


[34m[1mwandb[0m: Agent Starting Run: w88vkdle with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12297822354748351, max=1.…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▁▄▅▆▆▆▇▇▇▇▇▇████████
train_loss,█▅▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁
val_accuracy,▁▄▅▆▆▇▇▇▇▇▇█████████
val_loss,█▄▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁

0,1
epoch,20.0
test_accuracy,0.827
test_loss,4860.27636
train_accuracy,0.84178
train_loss,24271.46343
val_accuracy,0.83067
val_loss,2865.27161


[34m[1mwandb[0m: Agent Starting Run: xgm61cx5 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12305605786618445, max=1.…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▅▆▇█
train_loss,█▄▂▂▁
val_accuracy,▁▅▆▇█
val_loss,█▄▂▂▁

0,1
epoch,5.0
test_accuracy,0.81
test_loss,5107.30162
train_accuracy,0.823
train_loss,25812.73023
val_accuracy,0.814
val_loss,3031.93196


[34m[1mwandb[0m: Agent Starting Run: lpbbnuoy with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011277777777932999, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12307831434255742, max=1.…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▃▅▅▆▇▇▇██
train_loss,█▅▄▃▃▂▂▂▁▁
val_accuracy,▁▃▅▆▆▇▇███
val_loss,█▅▄▃▃▂▂▂▁▁

0,1
epoch,10.0
test_accuracy,0.8209
test_loss,5141.78381
train_accuracy,0.83283
train_loss,26148.51929
val_accuracy,0.825
val_loss,3043.72974


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: q35wfh5t with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011277777777932999, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▂▄▃█
train_loss,▁▃▅▇█
val_accuracy,▁▁▅▁█
val_loss,▁▃▄▇█

0,1
epoch,5.0
test_accuracy,0.125
test_loss,116786.34349
train_accuracy,0.1302
train_loss,620032.55846
val_accuracy,0.13283
val_loss,68957.35012


[34m[1mwandb[0m: Agent Starting Run: 8rx9n2u6 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.009 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.8652781439493804, max=1.0…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▂▄▁█▅
train_loss,█▂▅▁▄
val_accuracy,▂▄▁█▅
val_loss,█▂▅▁▄

0,1
epoch,5.0
test_accuracy,0.1815
test_loss,38535.44761
train_accuracy,0.18669
train_loss,206586.58061
val_accuracy,0.18433
val_loss,22946.60128


[34m[1mwandb[0m: Agent Starting Run: 4sjtmf8b with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▅▆▇█
train_loss,█▄▃▂▁
val_accuracy,▁▅▇▇█
val_loss,█▄▃▂▁

0,1
epoch,5.0
test_accuracy,0.8195
test_loss,4973.71191
train_accuracy,0.83278
train_loss,25294.96592
val_accuracy,0.82233
val_loss,2961.33319


[34m[1mwandb[0m: Agent Starting Run: s7y91qjv with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.11966939242064539, max=1.…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▂▅▆▇▇▇███
train_loss,█▆▄▃▂▂▁▁▁▁
val_accuracy,▁▂▅▆▇▇▇███
val_loss,█▆▄▃▂▂▁▁▁▁

0,1
epoch,10.0
test_accuracy,0.757
test_loss,6657.08466
train_accuracy,0.76419
train_loss,35169.53351
val_accuracy,0.75683
val_loss,4019.16146


[34m[1mwandb[0m: Agent Starting Run: w3lasw09 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▄▇▇█
train_loss,█▇▆▄▁
val_accuracy,▁▄▇▇█
val_loss,█▇▆▄▁

0,1
epoch,5.0
test_accuracy,0.3144
test_loss,21125.40218
train_accuracy,0.31435
train_loss,114051.56167
val_accuracy,0.30883
val_loss,12680.5964


[34m[1mwandb[0m: Agent Starting Run: iboyedli with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.11962731827371012, max=1.…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▄▅▆▇▇▇███
train_loss,█▅▄▃▂▂▂▁▁▁
val_accuracy,▁▄▅▆▆▇▇▇██
val_loss,█▅▃▂▂▁▁▁▁▁

0,1
epoch,10.0
test_accuracy,0.8673
test_loss,3959.29842
train_accuracy,0.89104
train_loss,16875.28074
val_accuracy,0.86933
val_loss,2317.17444


[34m[1mwandb[0m: Agent Starting Run: 7x600dj4 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.1230338094377147, max=1.0…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▁▂▃▃▄▄▅▅▆▆▆▇▇▇▇▇████
train_loss,█▆▅▅▄▄▄▃▃▃▂▂▂▂▂▂▁▁▁▁
val_accuracy,▁▂▃▃▃▄▅▅▆▆▆▇▇▇▇█████
val_loss,█▆▅▅▄▄▄▃▃▃▂▂▂▂▂▁▁▁▁▁

0,1
epoch,20.0
test_accuracy,0.7497
test_loss,6825.29681
train_accuracy,0.75317
train_loss,36004.83468
val_accuracy,0.749
val_loss,4054.46947


[34m[1mwandb[0m: Agent Starting Run: 6td28tem with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12295452490733207, max=1.…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▄▆▇█
train_loss,█▅▃▂▁
val_accuracy,▁▅▇▇█
val_loss,█▄▃▂▁

0,1
epoch,5.0
test_accuracy,0.8204
test_loss,5074.56733
train_accuracy,0.84194
train_loss,23584.92275
val_accuracy,0.8195
val_loss,2983.06237


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: rlo7fth6 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12306718509811014, max=1.…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
train_accuracy,▁▁▁▁▁▁▁▁▁▁
val_accuracy,▁▁▁▁▁▁▁▁▁▁

0,1
epoch,10.0
test_accuracy,0.1
test_loss,
train_accuracy,0.09981
train_loss,
val_accuracy,0.10167
val_loss,


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: t5dovgl7 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12298933670703055, max=1.…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
train_accuracy,▁▁▁▁▁
val_accuracy,▁▁▁▁▁

0,1
epoch,5.0
test_accuracy,0.1
test_loss,
train_accuracy,0.09981
train_loss,
val_accuracy,0.10167
val_loss,


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: p3s3f5hb with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
train_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,20.0
test_accuracy,0.1
test_loss,
train_accuracy,0.09981
train_loss,
val_accuracy,0.10167
val_loss,


[34m[1mwandb[0m: Agent Starting Run: s5dy31a2 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12302268824007954, max=1.…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▁▁▁▁▁▁▁▁▁▂▃▅▆▇██████
train_loss,█▆▅▄▄▃▃▃▂▂▂▂▂▁▁▁▁▁▁▁
val_accuracy,▁▁▁▁▁▁▁▁▁▂▄▅▇▇██████
val_loss,█▆▅▄▄▃▃▃▂▂▂▂▂▁▁▁▁▁▁▁

0,1
epoch,20.0
test_accuracy,0.1571
test_loss,23106.53558
train_accuracy,0.15522
train_loss,124789.61561
val_accuracy,0.15717
val_loss,13870.91297


[34m[1mwandb[0m: Agent Starting Run: 5o5cfqlc with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12304493264623452, max=1.…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▄▃▅██▆▅▆▄▅▃▂▂▁▂▃▂▂▂▂
train_loss,▇▆▄▂▁▃▅▂▅▅▆███▇▇▇███
val_accuracy,▄▃▅██▆▅▆▄▆▃▂▂▁▂▃▂▂▂▂
val_loss,▇▆▄▂▁▃▅▂▅▅▆███▇▇▇███

0,1
epoch,20.0
test_accuracy,0.1032
test_loss,22944.49471
train_accuracy,0.10467
train_loss,123826.03755
val_accuracy,0.10233
val_loss,13766.59448


[34m[1mwandb[0m: Agent Starting Run: zva36dn9 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


  return -np.sum(y * np.log(y_hat))
  return -np.sum(y * np.log(y_hat))


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁█▆█▆
train_loss,█▁▃▂
val_accuracy,▁▇▆█▆
val_loss,▅▁▃▄█

0,1
epoch,5.0
test_accuracy,0.8504
test_loss,5179.2615
train_accuracy,0.86811
train_loss,
val_accuracy,0.8505
val_loss,2983.90621


[34m[1mwandb[0m: Agent Starting Run: e6qlk5sw with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.002 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.2261894292498461, max=1.0…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,█▄▅▅▅▅▁▅▃▅
train_loss,▁▄▃▂▃▃█▃▄▂
val_accuracy,█▄▅▅▅▅▁▅▃▅
val_loss,▁▄▃▂▃▃█▃▄▂

0,1
epoch,10.0
test_accuracy,0.4136
test_loss,15234.8592
train_accuracy,0.41656
train_loss,81951.16773
val_accuracy,0.40833
val_loss,9142.8621


[34m[1mwandb[0m: Agent Starting Run: bc98deqz with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.1228878648233487, max=1.0…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▃▅▅▆▇▇▇██
train_loss,█▅▄▃▃▂▂▂▁▁
val_accuracy,▁▄▅▆▆▇▇▇██
val_loss,█▅▄▃▃▂▂▂▁▁

0,1
epoch,10.0
test_accuracy,0.6421
test_loss,10009.27015
train_accuracy,0.65761
train_loss,52047.28879
val_accuracy,0.65217
val_loss,5940.06002


[34m[1mwandb[0m: Agent Starting Run: yteadbzv with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12297822354748351, max=1.…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▆▇█▆
train_loss,█▆▆▄▁
val_accuracy,▁▆▇█▆
val_loss,█▆▆▄▁

0,1
epoch,5.0
test_accuracy,0.1524
test_loss,21954.46184
train_accuracy,0.1507
train_loss,118448.70525
val_accuracy,0.15183
val_loss,13167.14741


[34m[1mwandb[0m: Agent Starting Run: 3ok20pv8 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.11954325867369346, max=1.…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▄█▅▃▅▅▇▅▅▁
train_loss,▇▁▁▁▂▃▇█▅▄
val_accuracy,▅█▆▄▆▅█▆▅▁
val_loss,▇▂▂▁▂▄██▅▅

0,1
epoch,10.0
test_accuracy,0.0643
test_loss,177673.51485
train_accuracy,0.06559
train_loss,953341.01757
val_accuracy,0.05667
val_loss,106685.65375


[34m[1mwandb[0m: Agent Starting Run: damrhttl with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▅▆▇█
train_loss,█▄▃▂▁
val_accuracy,▁▅▆▇█
val_loss,█▄▂▂▁

0,1
epoch,5.0
test_accuracy,0.8255
test_loss,5133.29832
train_accuracy,0.84211
train_loss,24458.18052
val_accuracy,0.82517
val_loss,3008.86971


[34m[1mwandb[0m: Agent Starting Run: f6trzvhn with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.011 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.9720537832849987, max=1.0…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▅▇▇█▅▃▃▃▁▂▂▂▂▂▂▃▂▃▂▂
train_loss,▄▃▂▁▂▄▅▃█▆▆▅▅▅▅▅▅▅▅▅
val_accuracy,▅▇▇█▅▃▃▃▁▂▂▂▂▂▂▃▂▃▂▂
val_loss,▄▃▂▁▂▄▅▃█▆▆▅▅▅▅▅▅▅▅▅

0,1
epoch,20.0
test_accuracy,0.1075
test_loss,22951.99834
train_accuracy,0.10722
train_loss,123945.35056
val_accuracy,0.10867
val_loss,13772.69423


[34m[1mwandb[0m: Agent Starting Run: zqen5209 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.11963783403656822, max=1.…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▁▁▂▂▄▅▄▄▅▅▆▆▆▇▇▇▇███
train_loss,██▇▇▇▇▇▆▆▅▄▄▃▃▂▂▂▁▁▁
val_accuracy,▁▁▂▂▄▅▄▄▅▅▅▆▆▆▇▇▇███
val_loss,██▇▇▇▇▇▆▆▅▄▄▃▃▂▂▂▁▁▁

0,1
epoch,20.0
test_accuracy,0.5487
test_loss,15287.23987
train_accuracy,0.55252
train_loss,82400.12037
val_accuracy,0.54717
val_loss,9189.76536


[34m[1mwandb[0m: Agent Starting Run: s9vdibid with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.002 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.2268648838845883, max=1.0…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▄▅▆▇▇▇███
train_loss,█▆▅▄▃▂▂▂▁▁
val_accuracy,▁▄▅▆▆▇▇▇██
val_loss,█▆▅▄▃▂▂▂▁▁

0,1
epoch,10.0
test_accuracy,0.6171
test_loss,11042.7506
train_accuracy,0.62513
train_loss,58530.59143
val_accuracy,0.6195
val_loss,6613.75234


[34m[1mwandb[0m: Agent Starting Run: 2g5vqdac with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12018721299894031, max=1.…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
train_accuracy,▁▁▁▁▁
val_accuracy,▁▁▁▁▁

0,1
epoch,5.0
test_accuracy,0.1
test_loss,
train_accuracy,0.09981
train_loss,
val_accuracy,0.10167
val_loss,


[34m[1mwandb[0m: Agent Starting Run: my2m4ew7 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.11961680435928985, max=1.…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▄▇██
train_loss,█▄▂▁▁
val_accuracy,▁▃▆▇█
val_loss,█▅▂▁▁

0,1
epoch,5.0
test_accuracy,0.8544
test_loss,4233.80702
train_accuracy,0.8787
train_loss,18178.40756
val_accuracy,0.86333
val_loss,2413.87888


[34m[1mwandb[0m: Agent Starting Run: paijy8f7 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12296711239609685, max=1.…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▄▅▆▆▇▇███
train_loss,█▅▄▃▃▂▂▁▁▁
val_accuracy,▁▄▅▆▆▇████
val_loss,█▅▄▃▂▂▂▁▁▁

0,1
epoch,10.0
test_accuracy,0.8462
test_loss,4375.98993
train_accuracy,0.86213
train_loss,21319.2727
val_accuracy,0.84817
val_loss,2578.15966


[34m[1mwandb[0m: Agent Starting Run: xiwn862g with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.11962731827371012, max=1.…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▁▁▃▄▅▆▆▆▆▆▇▇▇▇█████▇
train_loss,███▇▆▅▄▃▃▂▂▂▂▂▁▁▁▁▁▂
val_accuracy,▁▁▃▄▅▆▆▆▆▇▇▇▇▇██████
val_loss,███▇▆▅▄▃▃▂▂▂▂▂▁▁▁▁▁▂

0,1
epoch,20.0
test_accuracy,0.6253
test_loss,11792.59583
train_accuracy,0.62769
train_loss,63301.9569
val_accuracy,0.63017
val_loss,7011.38684


[34m[1mwandb[0m: Agent Starting Run: row1zoyb with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.009 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.8661154116819142, max=1.0…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▅▆▇█
train_loss,█▄▃▂▁
val_accuracy,▁▅▇▇█
val_loss,█▄▃▂▁

0,1
epoch,5.0
test_accuracy,0.7636
test_loss,6533.80535
train_accuracy,0.77239
train_loss,34339.73407
val_accuracy,0.76367
val_loss,3893.91304


[34m[1mwandb[0m: Agent Starting Run: 0jlxtbda with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.11955375966268447, max=1.…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▁▁▂▃▃▄▅▅▆▆▆▇▇▇▇▇████
train_loss,█▆▄▄▃▃▃▃▂▂▂▂▂▂▂▁▁▁▁▁
val_accuracy,▁▁▂▃▄▄▅▅▆▆▆▇▇▇▇▇████
val_loss,█▆▅▄▃▃▃▃▂▂▂▂▂▂▂▁▁▁▁▁

0,1
epoch,20.0
test_accuracy,0.5041
test_loss,15899.30067
train_accuracy,0.51381
train_loss,85156.3415
val_accuracy,0.51233
val_loss,9487.61819


[34m[1mwandb[0m: Agent Starting Run: yq48nk39 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.011 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.9760959689512216, max=1.0…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
train_accuracy,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_loss,▁
val_accuracy,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_loss,▁

0,1
epoch,20.0
test_accuracy,0.1
test_loss,
train_accuracy,0.09981
train_loss,
val_accuracy,0.10167
val_loss,


[34m[1mwandb[0m: Agent Starting Run: 1y5fptel with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12300045187528243, max=1.…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_loss,█▇▆▅▅▄▃▃▃▂▂▂▂▂▁▁▁▁▁▁
val_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_loss,█▇▆▅▅▄▃▃▃▂▂▂▂▂▁▁▁▁▁▁

0,1
epoch,20.0
test_accuracy,0.1
test_loss,23198.59156
train_accuracy,0.0997
train_loss,125275.35186
val_accuracy,0.10267
val_loss,13916.15263


[34m[1mwandb[0m: Agent Starting Run: 7v7gu7zq with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.002 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.22664557518235345, max=1.…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▃▄▅▆▇▇███
train_loss,█▅▄▃▂▂▁▁▁▁
val_accuracy,▁▃▄▅▇▇▇███
val_loss,█▅▄▃▂▂▁▁▁▁

0,1
epoch,10.0
test_accuracy,0.8137
test_loss,5242.53168
train_accuracy,0.8288
train_loss,26732.27403
val_accuracy,0.81717
val_loss,3083.66336


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 6l7wagt8 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.119493537325244, max=1.0)…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▁▄▄▅▅▆▆▆▆▇▇▇▇▇▇▇████
train_loss,█▅▄▄▃▃▃▃▂▂▂▂▂▂▂▁▁▁▁▁
val_accuracy,▁▃▄▄▅▅▆▆▆▆▇▇▇▇▇█████
val_loss,█▅▄▄▃▃▃▃▂▂▂▂▂▂▂▁▁▁▁▁

0,1
epoch,20.0
test_accuracy,0.7743
test_loss,6130.4978
train_accuracy,0.78767
train_loss,30763.248
val_accuracy,0.78117
val_loss,3610.75136


[34m[1mwandb[0m: Agent Starting Run: hdpzj85j with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.11967991558213155, max=1.…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▁▁▁▂▂▂▃▃▄▅▅▆▆▇▇▇████
train_loss,██▇▇▇▇▇▇▆▆▅▅▄▄▃▃▂▂▁▁
val_accuracy,▁▁▁▁▂▂▃▃▄▄▅▆▆▇▇▇████
val_loss,██▇▇▇▇▇▇▆▆▅▅▄▄▃▃▂▂▁▁

0,1
epoch,20.0
test_accuracy,0.5722
test_loss,15398.31098
train_accuracy,0.58057
train_loss,83104.14222
val_accuracy,0.57117
val_loss,9266.97487


[34m[1mwandb[0m: Agent Starting Run: i90utnpg with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12300045187528243, max=1.…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
train_accuracy,████▁
train_loss,█▅▁
val_accuracy,████▁
val_loss,▃▃▁█

0,1
epoch,5.0
test_accuracy,0.1
test_loss,
train_accuracy,0.09981
train_loss,
val_accuracy,0.10167
val_loss,


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: mnjst7xz with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▃▃▆▃▃▃▁▃▃█▃▃▃▃▃▃▃▃▃▃
train_loss,█▇▂▃▁▄▄▁▁▁▁▁▁▁▁▁▁▁▁▁
val_accuracy,▃▃▆▃▃▃▁▄▄█▃▃▃▃▃▃▃▃▃▃
val_loss,█▇▂▃▁▄▄▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,20.0
test_accuracy,0.0999
test_loss,23036.45413
train_accuracy,0.10011
train_loss,124367.07987
val_accuracy,0.09667
val_loss,13827.29301


[34m[1mwandb[0m: Agent Starting Run: 1nd5yaho with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.002 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.22699169757993287, max=1.…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
train_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,20.0
test_accuracy,0.1
test_loss,
train_accuracy,0.09981
train_loss,
val_accuracy,0.10167
val_loss,


[34m[1mwandb[0m: Agent Starting Run: 08nu2snq with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.002 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.14622766088564135, max=1.…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▁▁▁▂▂▁▂▃▅▇█▆▅▆▅▅▆▆▇▇
train_loss,█▆▅▄▄▄▃▃▃▃▃▂▂▂▂▂▁▁▁▁
val_accuracy,▁▁▁▂▁▁▁▃▄▇█▆▅▆▅▆▆▆▇▆
val_loss,█▆▅▄▄▄▃▃▃▃▃▂▂▂▂▂▁▁▁▁

0,1
epoch,20.0
test_accuracy,0.2325
test_loss,21238.81657
train_accuracy,0.22802
train_loss,114552.51227
val_accuracy,0.2205
val_loss,12732.31151


[34m[1mwandb[0m: Agent Starting Run: wulb7889 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.11958527370178368, max=1.…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▄▆▇█
train_loss,█▄▃▂▁
val_accuracy,▁▅▆▇█
val_loss,█▄▃▂▁

0,1
epoch,5.0
test_accuracy,0.3725
test_loss,18621.24268
train_accuracy,0.37613
train_loss,100449.3137
val_accuracy,0.38883
val_loss,11103.4914


[34m[1mwandb[0m: Agent Starting Run: b9g48zzs with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.11964835164835165, max=1.…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▂▁█▃▄
train_loss,▆▅▅▁█
val_accuracy,▁▁█▂▄
val_loss,▆▅▅▁█

0,1
epoch,5.0
test_accuracy,0.3458
test_loss,18170.63295
train_accuracy,0.34339
train_loss,98192.85545
val_accuracy,0.34117
val_loss,10922.50653


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: tsitvoja with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.1197325591624879, max=1.0…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁█▁▃▄
train_loss,█▁▁▁▁
val_accuracy,▁█▂▃▄
val_loss,█▁▁▁▁

0,1
epoch,5.0
test_accuracy,0.1267
test_loss,23021.66759
train_accuracy,0.12744
train_loss,124316.34848
val_accuracy,0.1235
val_loss,13813.57903


[34m[1mwandb[0m: Agent Starting Run: jcmys4n2 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.002 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.22696530522617478, max=1.…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▁▄▅▅▆▆▆▆▇▇▇▇▇███████
train_loss,█▅▄▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁
val_accuracy,▁▄▅▅▆▆▆▇▇▇▇▇▇███████
val_loss,█▅▄▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁

0,1
epoch,20.0
test_accuracy,0.8103
test_loss,5293.23521
train_accuracy,0.82476
train_loss,26865.60514
val_accuracy,0.81383
val_loss,3135.29484


[34m[1mwandb[0m: Agent Starting Run: whg8zq7v with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12298933670703055, max=1.…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▆▇▇█
train_loss,█▃▂▁▁
val_accuracy,▁▆▇▇█
val_loss,█▃▂▁▁

0,1
epoch,5.0
test_accuracy,0.7324
test_loss,7248.198
train_accuracy,0.73644
train_loss,38417.76658
val_accuracy,0.73333
val_loss,4326.57232


[34m[1mwandb[0m: Agent Starting Run: 3o8tspg2 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12306718509811014, max=1.…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▃█▇▃▁
train_loss,▁▃▄▆█
val_accuracy,██▅▁▁
val_loss,▁▃▆▇█

0,1
epoch,5.0
test_accuracy,0.0843
test_loss,179258.71736
train_accuracy,0.08489
train_loss,965807.49467
val_accuracy,0.08067
val_loss,108613.57137


[34m[1mwandb[0m: Agent Starting Run: j2yatil1 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.002 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.22680866044710438, max=1.…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▅▆▇█
train_loss,█▄▃▂▁
val_accuracy,▁▅▆▇█
val_loss,█▄▃▂▁

0,1
epoch,5.0
test_accuracy,0.8428
test_loss,4415.74222
train_accuracy,0.85963
train_loss,21524.55163
val_accuracy,0.8445
val_loss,2593.54853


[34m[1mwandb[0m: Agent Starting Run: zn3k4kye with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.002 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.22674980240625275, max=1.…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▂▂▃▄▅▆▇▇█
train_loss,█▅▃▃▂▂▂▁▁▁
val_accuracy,▁▂▂▃▄▅▆▆▇█
val_loss,█▅▃▃▂▂▂▁▁▁

0,1
epoch,10.0
test_accuracy,0.2902
test_loss,20557.89336
train_accuracy,0.30093
train_loss,109997.70046
val_accuracy,0.30267
val_loss,12197.7125


[34m[1mwandb[0m: Agent Starting Run: eyhnphyy with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.1200811716957826, max=1.0…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
train_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,20.0
test_accuracy,0.1
test_loss,
train_accuracy,0.09981
train_loss,
val_accuracy,0.10167
val_loss,


[34m[1mwandb[0m: Agent Starting Run: gwn3v7ir with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12294340987163262, max=1.…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▁▁█▁
train_loss,█▁▁▁▁
val_accuracy,▁▁▁█▁
val_loss,█▁▁▁▁

0,1
epoch,5.0
test_accuracy,0.0998
test_loss,23022.34608
train_accuracy,0.10078
train_loss,124319.45616
val_accuracy,0.0965
val_loss,13813.96418


[34m[1mwandb[0m: Agent Starting Run: twdci3ge with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12302268824007954, max=1.…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▁▂▃▄▅▅▆▆▆▆▇▇▇▇▇▇████
train_loss,█▆▅▄▄▃▃▃▂▂▂▂▂▂▂▁▁▁▁▁
val_accuracy,▁▂▃▄▅▅▆▆▆▆▇▇▇▇▇█████
val_loss,█▆▅▄▄▃▃▃▂▂▂▂▂▂▁▁▁▁▁▁

0,1
epoch,20.0
test_accuracy,0.7987
test_loss,5617.40844
train_accuracy,0.80863
train_loss,29259.18391
val_accuracy,0.79917
val_loss,3360.08995


[34m[1mwandb[0m: Agent Starting Run: lm0k7v5k with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.11959198030249736, max=1.…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▅▇██
train_loss,█▄▂▂▁
val_accuracy,▁▆▇██
val_loss,█▄▂▁▁

0,1
epoch,5.0
test_accuracy,0.8007
test_loss,5172.80793
train_accuracy,0.81481
train_loss,26074.3628
val_accuracy,0.81017
val_loss,3010.17289


[34m[1mwandb[0m: Agent Starting Run: dx2dyaxa with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12014477401129943, max=1.…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
train_accuracy,▁▁▁▁▁
val_accuracy,▁▁▁▁▁

0,1
epoch,5.0
test_accuracy,0.1
test_loss,
train_accuracy,0.09981
train_loss,
val_accuracy,0.10167
val_loss,


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: bmrbgk3v with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12306718509811014, max=1.…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▁▃▄▅▅▆▆▆▇▇▇▇▇▇██████
train_loss,█▇▅▅▄▃▃▃▂▂▂▂▂▂▁▁▁▁▁▁
val_accuracy,▁▃▄▅▅▆▆▆▇▇▇▇▇▇██████
val_loss,█▇▅▅▄▃▃▃▂▂▂▂▂▂▁▁▁▁▁▁

0,1
epoch,20.0
test_accuracy,0.6708
test_loss,8981.59023
train_accuracy,0.68017
train_loss,47302.69265
val_accuracy,0.67317
val_loss,5322.27615


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: ka8gielp with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.11963783403656822, max=1.…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▁▃▂▂▃▄▄▅▅▆▇███▆▄▄▂▂▂
train_loss,▅▇█▇▇██▇▅▅▄▃▂▁▂▂▂▂▂▁
val_accuracy,▁▄▂▂▄▄▅▅▆▇████▆▅▄▃▃▃
val_loss,▅▇█▇▇██▇▅▄▄▃▂▁▂▂▁▂▂▁

0,1
epoch,20.0
test_accuracy,0.0877
test_loss,76753.25883
train_accuracy,0.08291
train_loss,417929.75488
val_accuracy,0.08083
val_loss,46634.5483


[34m[1mwandb[0m: Agent Starting Run: qxbvzppp with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.11953275952924644, max=1.…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▄▆▇▇▇████
train_loss,█▄▃▂▂▂▁▁▁▁
val_accuracy,▁▅▇▇▇█████
val_loss,█▄▃▂▂▁▁▁▁▁

0,1
epoch,10.0
test_accuracy,0.8087
test_loss,5317.25301
train_accuracy,0.82698
train_loss,26363.10809
val_accuracy,0.81367
val_loss,3163.16255


[34m[1mwandb[0m: Agent Starting Run: 5t3hhogq with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.009 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.8656585194302796, max=1.0…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▇▄▁▃▃█▂▃▁▆
train_loss,▁▅▃██▅▆█▄▁
val_accuracy,▇▄▁▂▂█▁▃▁▆
val_loss,▁▅▃██▅▆█▄▁

0,1
epoch,10.0
test_accuracy,0.1324
test_loss,66253.05333
train_accuracy,0.13285
train_loss,357896.56668
val_accuracy,0.12933
val_loss,39970.04119


[34m[1mwandb[0m: Agent Starting Run: bwz7z3i7 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.11957095129242132, max=1.…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▅▄▆▁█▇▃▂▅▄
train_loss,█▆▄▅▁▃▄▄▂▁
val_accuracy,▆▅▆▁██▃▂▆▅
val_loss,█▆▃▅▁▃▄▄▂▁

0,1
epoch,10.0
test_accuracy,0.096
test_loss,67122.95524
train_accuracy,0.09585
train_loss,362856.42992
val_accuracy,0.09933
val_loss,39755.56816


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: kasu092w with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12304493264623452, max=1.…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▁▁▃▄▄▄▄▅▅▄▄▅▅▆▆▇▇███
train_loss,█▇▇▇▆▆▆▅▅▄▄▄▃▃▂▂▂▂▁▁
val_accuracy,▁▁▃▄▄▄▄▅▅▄▄▅▅▆▆▇▇███
val_loss,█▇▇▇▆▆▆▅▅▄▄▃▃▃▂▂▂▂▁▁

0,1
epoch,20.0
test_accuracy,0.3548
test_loss,18821.978
train_accuracy,0.35761
train_loss,101226.00339
val_accuracy,0.353
val_loss,11301.43385


[34m[1mwandb[0m: Agent Starting Run: blb9kbzg with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.009 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.8662560492740871, max=1.0…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▄▆▇█
train_loss,█▅▃▂▁
val_accuracy,▁▅▆▇█
val_loss,█▅▃▂▁

0,1
epoch,5.0
test_accuracy,0.8198
test_loss,4934.00441
train_accuracy,0.85246
train_loss,22063.81224
val_accuracy,0.835
val_loss,2843.60357


[34m[1mwandb[0m: Agent Starting Run: o7otn3zr with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.009 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.868560338743825, max=1.0)…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
train_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,20.0
test_accuracy,0.1
test_loss,
train_accuracy,0.09981
train_loss,
val_accuracy,0.10167
val_loss,


[34m[1mwandb[0m: Agent Starting Run: or5t63t4 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12305605786618445, max=1.…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▅█▄▁▇
train_loss,▂▁▅█▁
val_accuracy,▅▇▃▁█
val_loss,▄▂▆█▁

0,1
epoch,5.0
test_accuracy,0.1512
test_loss,91538.01876
train_accuracy,0.15631
train_loss,494081.71395
val_accuracy,0.16117
val_loss,54614.01135


[34m[1mwandb[0m: Agent Starting Run: 1o3pwmca with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.002 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.2278458689709902, max=1.0…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
train_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,20.0
test_accuracy,0.1
test_loss,
train_accuracy,0.09981
train_loss,
val_accuracy,0.10167
val_loss,


[34m[1mwandb[0m: Agent Starting Run: 9uqk4ji5 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.1230338094377147, max=1.0…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▅▇██
train_loss,█▄▃▂▁
val_accuracy,▁▅▇██
val_loss,█▄▃▂▁

0,1
epoch,5.0
test_accuracy,0.7475
test_loss,7413.3949
train_accuracy,0.75031
train_loss,39155.57926
val_accuracy,0.7435
val_loss,4433.746


[34m[1mwandb[0m: Agent Starting Run: xdtriw3b with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.11961680435928985, max=1.…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▁▃▄▅▆▆▆▇▇▇▇▇▇███████
train_loss,█▆▅▄▃▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁
val_accuracy,▁▃▄▅▆▆▆▇▇▇▇▇▇▇██████
val_loss,█▆▅▄▃▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁

0,1
epoch,20.0
test_accuracy,0.7449
test_loss,7078.54458
train_accuracy,0.75615
train_loss,36689.29086
val_accuracy,0.74983
val_loss,4184.04731


[34m[1mwandb[0m: Agent Starting Run: ecq7dpyf with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▁▃▄▄▅▅▅▆▆▆▆▇▇▇▇▇████
train_loss,█▆▅▅▄▄▃▃▃▃▂▂▂▂▂▂▁▁▁▁
val_accuracy,▁▃▄▄▅▅▅▆▆▆▆▇▇▇▇▇████
val_loss,█▆▅▅▄▄▃▃▃▃▂▂▂▂▂▁▁▁▁▁

0,1
epoch,20.0
test_accuracy,0.7208
test_loss,7795.60119
train_accuracy,0.72639
train_loss,41356.88094
val_accuracy,0.71967
val_loss,4630.19465


[34m[1mwandb[0m: Agent Starting Run: p2uyop2w with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.1230115690527838, max=1.0…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,█▅▁█▄
train_loss,▄▁█▁▇
val_accuracy,█▆▁█▄
val_loss,▄▂█▁▇

0,1
epoch,5.0
test_accuracy,0.2031
test_loss,21328.83953
train_accuracy,0.20485
train_loss,114741.8041
val_accuracy,0.20733
val_loss,12706.51311


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: ehwaftm6 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12297822354748351, max=1.…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▁▃▄▅▆▆▆▆▇▇▇▇▇▇▇█████
train_loss,█▆▅▄▄▃▃▃▃▂▂▂▂▂▂▁▁▁▁▁
val_accuracy,▁▃▄▅▆▆▆▇▇▇▇▇▇███████
val_loss,█▆▅▄▃▃▃▃▂▂▂▂▂▂▂▁▁▁▁▁

0,1
epoch,20.0
test_accuracy,0.8576
test_loss,4004.46117
train_accuracy,0.88202
train_loss,18328.38119
val_accuracy,0.86683
val_loss,2355.08167


[34m[1mwandb[0m: Agent Starting Run: ykdcehcz with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.011 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.972485935302391, max=1.0)…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▄▆▇▇▇████
train_loss,█▇▆▄▃▃▂▂▁▁
val_accuracy,▁▄▆▇▇▇▇███
val_loss,█▇▆▄▃▃▂▂▁▁

0,1
epoch,10.0
test_accuracy,0.7202
test_loss,8511.31566
train_accuracy,0.72552
train_loss,45457.55499
val_accuracy,0.72533
val_loss,5093.91006


[34m[1mwandb[0m: Agent Starting Run: h0sdjyfk with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.1200141193081539, max=1.0…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
train_accuracy,▁▁▁▁▁▁▁▁▁▁
val_accuracy,▁▁▁▁▁▁▁▁▁▁

0,1
epoch,10.0
test_accuracy,0.1
test_loss,
train_accuracy,0.09981
train_loss,
val_accuracy,0.10167
val_loss,


[34m[1mwandb[0m: Agent Starting Run: p3f7hk2e with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▁▃▄▄▅▅▆▆▆▆▇▇▇▇▇▇████
train_loss,█▆▅▅▄▄▄▃▃▃▃▂▂▂▂▂▁▁▁▁
val_accuracy,▁▃▄▄▅▅▅▆▆▆▆▇▇▇▇▇▇███
val_loss,█▆▅▄▄▄▃▃▃▃▂▂▂▂▂▂▁▁▁▁

0,1
epoch,20.0
test_accuracy,0.804
test_loss,5282.26865
train_accuracy,0.83657
train_loss,24213.24224
val_accuracy,0.81533
val_loss,3027.92128


[34m[1mwandb[0m: Agent Starting Run: tyh7v0bn with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.11949740796063615, max=1.…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▄▁▅▃▇▄▃▆▆█
train_loss,▆█▅▅▄▄▄▁▁▁
val_accuracy,▅▁▆▃▇▅▃▆▆█
val_loss,▆█▅▅▄▄▄▂▁▁

0,1
epoch,10.0
test_accuracy,0.109
test_loss,87056.22469
train_accuracy,0.10552
train_loss,472810.46804
val_accuracy,0.10483
val_loss,52309.94854


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 52hx2m7z with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.002 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.22740910695375927, max=1.…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
train_accuracy,▁▁▁▁▁▁▁▁▁▁
val_accuracy,▁▁▁▁▁▁▁▁▁▁

0,1
epoch,10.0
test_accuracy,0.1
test_loss,
train_accuracy,0.09981
train_loss,
val_accuracy,0.10167
val_loss,


[34m[1mwandb[0m: Agent Starting Run: 8hwfmjqj with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12003880754983243, max=1.…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
train_accuracy,▁▁▁▁▁▁▁▁▁▁
val_accuracy,▁▁▁▁▁▁▁▁▁▁

0,1
epoch,10.0
test_accuracy,0.1
test_loss,
train_accuracy,0.09981
train_loss,
val_accuracy,0.10167
val_loss,


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 7qd0ntmc with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12298933670703055, max=1.…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
train_accuracy,▁▁▁▁▁
val_accuracy,▁▁▁▁▁

0,1
epoch,5.0
test_accuracy,0.1
test_loss,
train_accuracy,0.09981
train_loss,
val_accuracy,0.10167
val_loss,


[34m[1mwandb[0m: Agent Starting Run: 45x988ws with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.002 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.2265563088843885, max=1.0…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▄▃▁▂█
train_loss,▆▁▆█▅
val_accuracy,▁▆▄▆█
val_loss,▇▁▃█▇

0,1
epoch,5.0
test_accuracy,0.1051
test_loss,119642.67644
train_accuracy,0.10091
train_loss,644819.71317
val_accuracy,0.104
val_loss,71770.33388


[34m[1mwandb[0m: Agent Starting Run: ckeplfwd with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.011 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.972485935302391, max=1.0)…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▄▅▆▆▇▇▇██
train_loss,█▅▄▃▃▂▂▂▁▁
val_accuracy,▁▃▅▅▆▇▇███
val_loss,█▅▄▃▃▂▂▂▁▁

0,1
epoch,10.0
test_accuracy,0.8494
test_loss,4231.67243
train_accuracy,0.86713
train_loss,20386.7679
val_accuracy,0.85133
val_loss,2488.78158


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: d908vjof with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.11973939073780596, max=1.…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▄▆▆▇▇▇▇██
train_loss,█▆▅▄▃▃▂▂▁▁
val_accuracy,▁▄▆▆▇▇▇███
val_loss,█▆▅▄▃▃▂▂▁▁

0,1
epoch,10.0
test_accuracy,0.6759
test_loss,12059.35645
train_accuracy,0.67987
train_loss,64751.50868
val_accuracy,0.675
val_loss,7237.23781


[34m[1mwandb[0m: Agent Starting Run: ppakmgmn with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12016598975807875, max=1.…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
train_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,20.0
test_accuracy,0.1
test_loss,
train_accuracy,0.09981
train_loss,
val_accuracy,0.10167
val_loss,


[34m[1mwandb[0m: Agent Starting Run: nbx1g2f3 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.011 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.9724077328646749, max=1.0…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▁▄▅▅▆▆▆▇▇▇▇▇▇▇██████
train_loss,█▆▅▄▄▃▃▃▃▂▂▂▂▂▂▁▁▁▁▁
val_accuracy,▁▄▅▅▆▆▆▇▇▇▇▇▇▇██████
val_loss,█▆▅▄▄▃▃▃▃▂▂▂▂▂▂▁▁▁▁▁

0,1
epoch,20.0
test_accuracy,0.6381
test_loss,9847.54328
train_accuracy,0.65019
train_loss,51617.99026
val_accuracy,0.64883
val_loss,5776.8695


[34m[1mwandb[0m: Agent Starting Run: q4wwpski with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011288888888925108, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12018721299894031, max=1.…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
train_accuracy,▁▁▁▁▁
val_accuracy,▁▁▁▁▁

0,1
epoch,5.0
test_accuracy,0.1
test_loss,
train_accuracy,0.09981
train_loss,
val_accuracy,0.10167
val_loss,


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: bo8jbyro with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12302268824007954, max=1.…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▁▁▁▁
train_loss,█▆▄▃▁
val_accuracy,▁▁▁▁▁
val_loss,█▆▄▃▁

0,1
epoch,5.0
test_accuracy,0.1
test_loss,23027.13054
train_accuracy,0.1003
train_loss,124346.24355
val_accuracy,0.09733
val_loss,13816.61386


[34m[1mwandb[0m: Agent Starting Run: suv7yxdj with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.1230115690527838, max=1.0…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
train_accuracy,▁▁▁▁▁▁▁▁▁▁
val_accuracy,▁▁▁▁▁▁▁▁▁▁

0,1
epoch,10.0
test_accuracy,0.1
test_loss,
train_accuracy,0.09981
train_loss,
val_accuracy,0.10167
val_loss,


[34m[1mwandb[0m: Agent Starting Run: uk6g3n3t with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12291007681879801, max=1.…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▅▆▇█
train_loss,█▅▃▂▁
val_accuracy,▁▅▆▇█
val_loss,█▅▃▂▁

0,1
epoch,5.0
test_accuracy,0.7087
test_loss,8513.57431
train_accuracy,0.71252
train_loss,45349.44425
val_accuracy,0.711
val_loss,5086.70406


[34m[1mwandb[0m: Agent Starting Run: unu9rzrv with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12293229684534032, max=1.…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▁▁▁▁
train_loss,█▆▄▃▁
val_accuracy,▁▁▁▁▁
val_loss,█▆▄▃▁

0,1
epoch,5.0
test_accuracy,0.1
test_loss,23078.90934
train_accuracy,0.10026
train_loss,124616.46459
val_accuracy,0.09767
val_loss,13856.68895


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: e662xncc with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.11980633802816902, max=1.…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▄▂▅▃▅▄▅█▅
train_loss,▂▁▂▃▃▃▄▂█▂
val_accuracy,▁▄▁▅▃▅▃▅█▅
val_loss,▂▁▂▃▃▃▄▂█▂

0,1
epoch,10.0
test_accuracy,0.1346
test_loss,131908.87559
train_accuracy,0.13341
train_loss,707172.61835
val_accuracy,0.1265
val_loss,80066.37595


[34m[1mwandb[0m: Agent Starting Run: ogzw8dvo with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.11971149617380597, max=1.…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▂▃▄▅▆▇▇██
train_loss,█▅▄▃▃▂▂▁▁▁
val_accuracy,▁▂▃▄▅▆▇▇██
val_loss,█▅▄▃▃▂▂▁▁▁

0,1
epoch,10.0
test_accuracy,0.418
test_loss,16885.12845
train_accuracy,0.41276
train_loss,91262.71728
val_accuracy,0.411
val_loss,10113.01808


[34m[1mwandb[0m: Agent Starting Run: 3bvv0ddf with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▁▂▃▃▄▅▅▅▆▆▆▇▇▇▇▇▇███
train_loss,█▅▄▄▃▃▃▃▃▂▂▂▂▂▂▁▁▁▁▁
val_accuracy,▁▂▃▃▄▄▅▅▆▆▆▆▇▇▇▇▇███
val_loss,█▅▄▄▄▃▃▃▃▂▂▂▂▂▂▁▁▁▁▁

0,1
epoch,20.0
test_accuracy,0.4593
test_loss,16076.84784
train_accuracy,0.45726
train_loss,87097.96079
val_accuracy,0.45367
val_loss,9727.10044


[34m[1mwandb[0m: Agent Starting Run: jsphuh6q with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▂▅▁█
train_loss,█▄▁▆█
val_accuracy,▁▂▄▁█
val_loss,█▄▁▅█

0,1
epoch,5.0
test_accuracy,0.1825
test_loss,114075.48522
train_accuracy,0.18457
train_loss,616636.21713
val_accuracy,0.187
val_loss,67955.26941


[34m[1mwandb[0m: Agent Starting Run: 0fp0ob48 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.1227498869289914, max=1.0…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
train_accuracy,▁▁▁▁▁▁▁▁▁▁
val_accuracy,▁▁▁▁▁▁▁▁▁▁

0,1
epoch,10.0
test_accuracy,0.1
test_loss,
train_accuracy,0.09981
train_loss,
val_accuracy,0.10167
val_loss,


[34m[1mwandb[0m: Agent Starting Run: 2iz0zl2m with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,█▇▇████▇▆▄▅▄▂▄▄▁▃▃▄▅
train_loss,▁▂▂▁▁▁▁▂▃▃▃▄▅▄▅█▆▄▄▃
val_accuracy,█▇▇████▇▆▄▅▄▂▄▄▁▃▃▄▅
val_loss,▁▂▂▁▁▁▁▂▃▃▃▄▆▄▅█▆▅▄▃

0,1
epoch,20.0
test_accuracy,0.4513
test_loss,15511.08315
train_accuracy,0.45785
train_loss,83035.6879
val_accuracy,0.4565
val_loss,9265.66012


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: oe4h1asi with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12298933670703055, max=1.…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▄█▇▁▇
train_loss,█▃▁▁▁
val_accuracy,▆▇▇▁█
val_loss,█▃▁▁▁

0,1
epoch,5.0
test_accuracy,0.1532
test_loss,22712.20078
train_accuracy,0.15485
train_loss,122117.8489
val_accuracy,0.15467
val_loss,13624.08127


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: xzqm9iyi with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.002 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.2270807672004223, max=1.0…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▄▆▇█
train_loss,█▆▅▃▁
val_accuracy,▁▄▅▇█
val_loss,█▆▅▃▁

0,1
epoch,5.0
test_accuracy,0.4717
test_loss,17072.54105
train_accuracy,0.4755
train_loss,91902.65657
val_accuracy,0.46883
val_loss,10279.3191


[34m[1mwandb[0m: Agent Starting Run: yn0tcu4p with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.1197430934365652, max=1.0…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▂▄▆█
train_loss,█▆▅▃▁
val_accuracy,▁▂▄▆█
val_loss,█▆▅▃▁

0,1
epoch,5.0
test_accuracy,0.2096
test_loss,22898.09712
train_accuracy,0.21428
train_loss,123641.32561
val_accuracy,0.207
val_loss,13740.23106


[34m[1mwandb[0m: Agent Starting Run: swdb5a3i with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12298933670703055, max=1.…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▁▁▁▁▁█▆▆▆▆▅▅▄▄▄▄▄▄▄▄
train_loss,█▃▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_accuracy,█████▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_loss,█▃▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,20.0
test_accuracy,0.0998
test_loss,23025.5736
train_accuracy,0.10011
train_loss,124337.58994
val_accuracy,0.09733
val_loss,13815.90897


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 2lw93gno with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.002 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.22657760590613465, max=1.…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,█▄▆▅▄▁▃▆▇▅▅▄▄▅▅▆▅▄▅▃
train_loss,▁▄▃▄▄█▄▃▂▃▃▅▄▃▃▃▃▄▄▆
val_accuracy,█▄▆▅▄▁▃▆▇▅▆▄▄▅▅▆▅▄▅▃
val_loss,▁▄▃▃▄█▄▃▂▃▃▅▄▃▃▃▃▄▄▆

0,1
epoch,20.0
test_accuracy,0.271
test_loss,21036.07024
train_accuracy,0.28028
train_loss,112926.10329
val_accuracy,0.27867
val_loss,12521.02545


[34m[1mwandb[0m: Agent Starting Run: qu4ytxhk with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.002 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.2261894292498461, max=1.0…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▄▅▆▆▇▇███
train_loss,█▅▄▃▃▂▂▂▁▁
val_accuracy,▁▄▅▆▆▇▇███
val_loss,█▅▄▃▃▂▂▂▁▁

0,1
epoch,10.0
test_accuracy,0.8441
test_loss,4333.89547
train_accuracy,0.86378
train_loss,20821.12349
val_accuracy,0.85033
val_loss,2534.95571


[34m[1mwandb[0m: Agent Starting Run: 3kcpaxe9 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12307831434255742, max=1.…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▁▃▅█
train_loss,█▅▃▂▁
val_accuracy,▁▂▄▅█
val_loss,█▅▃▂▁

0,1
epoch,5.0
test_accuracy,0.1662
test_loss,22457.57977
train_accuracy,0.16228
train_loss,121361.40576
val_accuracy,0.15683
val_loss,13471.0437


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: yfqmji1q with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12295600325232632, max=1.…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▁▃▄▄▅▆▆▆▇▇▇▇▇███████
train_loss,█▇▆▅▄▄▃▃▃▂▂▂▂▂▁▁▁▁▁▁
val_accuracy,▁▃▄▄▅▅▆▆▇▇▇▇▇███████
val_loss,█▇▆▅▄▄▃▃▃▂▂▂▂▂▁▁▁▁▁▁

0,1
epoch,20.0
test_accuracy,0.7132
test_loss,7756.02326
train_accuracy,0.72065
train_loss,40633.62029
val_accuracy,0.71867
val_loss,4580.24086


[34m[1mwandb[0m: Agent Starting Run: 8gvsr4wr with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

VBox(children=(Label(value='0.009 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.8655824175824176, max=1.0…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_loss,█▅▄▄▄▄▄▄▄▃▃▃▃▃▂▂▂▂▁▁
val_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_loss,█▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▁▁▁▁

0,1
epoch,20.0
test_accuracy,0.1
test_loss,23042.74448
train_accuracy,0.1003
train_loss,124427.34899
val_accuracy,0.09733
val_loss,13829.11873


[34m[1mwandb[0m: Agent Starting Run: 4o7g91pu with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12296711239609685, max=1.…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▁▁▁▁
train_loss,█▆▅▃▁
val_accuracy,▁▁▁▁▁
val_loss,█▆▅▃▁

0,1
epoch,5.0
test_accuracy,0.1
test_loss,23020.37141
train_accuracy,0.09978
train_loss,124309.24554
val_accuracy,0.102
val_loss,13812.80791


[34m[1mwandb[0m: Agent Starting Run: dr3wk16u with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12304493264623452, max=1.…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,█▂▇▁▇
train_loss,▁▄█▅▁
val_accuracy,█▃▇▁▇
val_loss,▁▄█▅▁

0,1
epoch,5.0
test_accuracy,0.1134
test_loss,138663.28974
train_accuracy,0.11344
train_loss,750407.20834
val_accuracy,0.104
val_loss,83500.42419


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: ngv6pfmh with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12294489611562782, max=1.…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▁▃▄▅▆▆▆▇▇▇▇█████████
train_loss,█▆▅▄▃▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁
val_accuracy,▁▃▄▅▆▆▆▇▇▇▇█████████
val_loss,█▆▅▄▃▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁

0,1
epoch,20.0
test_accuracy,0.8261
test_loss,5347.23627
train_accuracy,0.83952
train_loss,26497.85344
val_accuracy,0.832
val_loss,3115.87641


[34m[1mwandb[0m: Agent Starting Run: gz3yxoqq with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.11958527370178368, max=1.…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▄▃▁▂▄▃▂▇█▇
train_loss,█▆▅▅▄▄▃▂▂▁
val_accuracy,▄▄▁▂▃▃▂▇█▇
val_loss,█▆▅▅▄▄▃▂▂▁

0,1
epoch,10.0
test_accuracy,0.3073
test_loss,19795.12674
train_accuracy,0.30587
train_loss,106809.54795
val_accuracy,0.30117
val_loss,11879.51592


[34m[1mwandb[0m: Agent Starting Run: prweapa8 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12292118582791034, max=1.…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▁▆▂▇▇▄▄▄█
train_loss,█▃▄▂▁▁▁▁▁▁
val_accuracy,▁▁▆▂▆▇▄▄▄█
val_loss,█▃▄▂▁▁▁▁▁▁

0,1
epoch,10.0
test_accuracy,0.2832
test_loss,20391.70837
train_accuracy,0.28517
train_loss,109846.18125
val_accuracy,0.28283
val_loss,12304.2647


[34m[1mwandb[0m: Agent Starting Run: 5w6k0hv4 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▃▁▁▂▃▃▅▄▄▃▅▇▆▆▆▇▇██▅
train_loss,█▇▅▃▃▂▂▂▃▄▃▃▂▁▁▁▁▁▁▁
val_accuracy,▃▁▁▂▃▃▅▄▄▃▅▇▆▇▇█▇██▆
val_loss,█▇▅▃▃▂▂▂▃▄▃▂▂▁▁▁▁▁▁▁

0,1
epoch,20.0
test_accuracy,0.2086
test_loss,25867.95894
train_accuracy,0.20885
train_loss,140560.70385
val_accuracy,0.20817
val_loss,15515.96113


[34m[1mwandb[0m: Agent Starting Run: i46555ms with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.11967991558213155, max=1.…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▅▆▇█
train_loss,█▄▂▂▁
val_accuracy,▁▆▇▇█
val_loss,█▃▁▂▁

0,1
epoch,5.0
test_accuracy,0.8661
test_loss,3975.54633
train_accuracy,0.88863
train_loss,16811.90461
val_accuracy,0.86417
val_loss,2347.93272


[34m[1mwandb[0m: Agent Starting Run: rfear43w with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12304493264623452, max=1.…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
train_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,20.0
test_accuracy,0.1
test_loss,
train_accuracy,0.09981
train_loss,
val_accuracy,0.10167
val_loss,


[34m[1mwandb[0m: Agent Starting Run: rnmqzhcy with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.11961680435928985, max=1.…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,███▁█
train_loss,█▁▁▁▁
val_accuracy,▇█▅▁▇
val_loss,█▁▁▁▁

0,1
epoch,5.0
test_accuracy,0.1004
test_loss,23045.23786
train_accuracy,0.10031
train_loss,124487.06511
val_accuracy,0.10183
val_loss,13836.76134


[34m[1mwandb[0m: Agent Starting Run: p93wrejq with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.11986964946274441, max=1.…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▇▇▇▆█
train_loss,█████████████████▇▄▁
val_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▇▇▇▆█
val_loss,█████████████████▇▄▁

0,1
epoch,20.0
test_accuracy,0.2819
test_loss,17577.73107
train_accuracy,0.279
train_loss,94754.01572
val_accuracy,0.2735
val_loss,10536.83733


[34m[1mwandb[0m: Agent Starting Run: vw7tp10c with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011288888888925108, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12294340987163262, max=1.…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,█▂▄▁▄
train_loss,▂█▂▂▁
val_accuracy,█▆▂▁▆
val_loss,▅█▂▁▂

0,1
epoch,5.0
test_accuracy,0.0853
test_loss,70228.8115
train_accuracy,0.08557
train_loss,377377.95182
val_accuracy,0.09067
val_loss,41416.67185


[34m[1mwandb[0m: Agent Starting Run: b3gohw8r with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▄▅▆▇▇▇███
train_loss,█▅▄▃▂▂▂▁▁▁
val_accuracy,▁▄▅▆▇▇▇███
val_loss,█▅▄▃▂▂▂▁▁▁

0,1
epoch,10.0
test_accuracy,0.6261
test_loss,11223.85186
train_accuracy,0.64161
train_loss,58912.36084
val_accuracy,0.63183
val_loss,6684.10779


[34m[1mwandb[0m: Agent Starting Run: 5t6epsdf with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.1200811716957826, max=1.0…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
train_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,20.0
test_accuracy,0.1
test_loss,
train_accuracy,0.09981
train_loss,
val_accuracy,0.10167
val_loss,


[34m[1mwandb[0m: Agent Starting Run: yu3johe6 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.11964835164835165, max=1.…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▃▂▂▃▇▆█▇▆
train_loss,█▇▅▅▄▂▁▁▁▂
val_accuracy,▁▃▂▂▃▇▇██▇
val_loss,█▇▅▅▄▂▁▁▁▂

0,1
epoch,10.0
test_accuracy,0.1226
test_loss,84436.13591
train_accuracy,0.11919
train_loss,457429.52826
val_accuracy,0.12017
val_loss,50837.67806


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: b02pkarr with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.002 MB uploaded\r'), FloatProgress(value=0.527723924001551, max=1.0)…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▂▃▂▃▄▆█▇▇
train_loss,█▇▅▅▄▃▂▁▁▁
val_accuracy,▁▂▃▂▃▄▇█▇▇
val_loss,█▇▅▅▄▃▂▁▁▁

0,1
epoch,10.0
test_accuracy,0.147
test_loss,80996.54365
train_accuracy,0.14931
train_loss,438390.66192
val_accuracy,0.1485
val_loss,49253.84912


[34m[1mwandb[0m: Agent Starting Run: o8h58any with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12302268824007954, max=1.…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▅█▁▄▇▇▇▁██
train_loss,█▅▄▄▃▃▂▃▂▁
val_accuracy,▄▆▁▄▆▇▆▁██
val_loss,█▅▄▄▃▃▂▃▂▁

0,1
epoch,10.0
test_accuracy,0.1269
test_loss,31658.90929
train_accuracy,0.12724
train_loss,171550.31071
val_accuracy,0.12983
val_loss,18990.51797


[34m[1mwandb[0m: Agent Starting Run: 1aq6eo7f with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12298933670703055, max=1.…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▁▁▁▁▁█▁▁▁
train_loss,▁▁▁▁▂▂▂▂▂█
val_accuracy,▁▁▁▁▁▁████
val_loss,▁▁▂▂▃▃▄▅▆█

0,1
epoch,10.0
test_accuracy,0.1001
test_loss,23033.80716
train_accuracy,0.09981
train_loss,124388.08961
val_accuracy,0.102
val_loss,13822.43108


[34m[1mwandb[0m: Agent Starting Run: in12uawd with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.009 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.8651260873385467, max=1.0…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▅▆▆█▁▃▅▄▄▆
train_loss,▁▃▅▇▅▄▃█▂█
val_accuracy,▅▆▆█▁▃▄▃▄▅
val_loss,▁▂▅▇▅▄▃█▂█

0,1
epoch,10.0
test_accuracy,0.1383
test_loss,224834.09353
train_accuracy,0.14004
train_loss,1215295.87998
val_accuracy,0.13867
val_loss,134388.48851


[34m[1mwandb[0m: Agent Starting Run: a0enumrj with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,███▃▁
train_loss,▄▂▁▇█
val_accuracy,███▃▁
val_loss,▄▂▁▇█

0,1
epoch,5.0
test_accuracy,0.1004
test_loss,23063.75657
train_accuracy,0.1007
train_loss,124536.60681
val_accuracy,0.09833
val_loss,13844.14478


[34m[1mwandb[0m: Agent Starting Run: kn8imn1t with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▄▆████▇▇▇
train_loss,▁▃▅▆▇▇▇███
val_accuracy,▁▅▇███▇▇▇█
val_loss,▁▃▅▆▆▇▇███

0,1
epoch,10.0
test_accuracy,0.1105
test_loss,191123.68545
train_accuracy,0.10746
train_loss,1035442.16798
val_accuracy,0.11183
val_loss,114887.37191


[34m[1mwandb[0m: Agent Starting Run: 1r1ixcef with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.1230115690527838, max=1.0…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,█▂▃▂▁
train_loss,█▅▃▁▃
val_accuracy,█▂▃▂▁
val_loss,█▄▃▁▂

0,1
epoch,5.0
test_accuracy,0.0347
test_loss,169721.19881
train_accuracy,0.03426
train_loss,906713.4872
val_accuracy,0.0345
val_loss,100687.3169


[34m[1mwandb[0m: Agent Starting Run: evgby53l with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.1230115690527838, max=1.0…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
train_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,20.0
test_accuracy,0.1
test_loss,
train_accuracy,0.09981
train_loss,
val_accuracy,0.10167
val_loss,


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: fw8wefkn with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.11977470738361348, max=1.…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁███▁▁▁▁▁▁
train_loss,█▁▁▁▁▁▁▁▁▁
val_accuracy,█▁▁▁██████
val_loss,█▁▁▁▁▁▁▁▁▁

0,1
epoch,10.0
test_accuracy,0.1
test_loss,23034.13883
train_accuracy,0.09978
train_loss,124383.09786
val_accuracy,0.102
val_loss,13821.73512


[34m[1mwandb[0m: Agent Starting Run: eupakbrv with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.11980633802816902, max=1.…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,█▇▆▅▅▅▄▄▃▃▃▂▁▁▁▂▃▄▅▇
train_loss,▁▃▅▇▇▇▇███▇▆▆▅▅▅▄▃▃▂
val_accuracy,██▆▆▅▅▅▄▃▃▃▂▁▂▂▁▂▄▅▇
val_loss,▁▃▅▇▇▇▇▇██▇▆▆▆▅▅▅▄▃▂

0,1
epoch,20.0
test_accuracy,0.0999
test_loss,132899.66273
train_accuracy,0.10056
train_loss,712890.98919
val_accuracy,0.099
val_loss,81129.87837


[34m[1mwandb[0m: Agent Starting Run: 9c6x4xjq with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12310057887120116, max=1.…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▄█▆▅
train_loss,█▄▂▁▂
val_accuracy,▁▄█▆▅
val_loss,█▄▂▁▂

0,1
epoch,5.0
test_accuracy,0.2095
test_loss,22322.34065
train_accuracy,0.20996
train_loss,120487.67412
val_accuracy,0.2025
val_loss,13398.81915


[34m[1mwandb[0m: Agent Starting Run: j72mxk86 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.1197009674582234, max=1.0…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▁▁▂▃▅▆▇██
train_loss,████▇▆▄▃▂▁
val_accuracy,▁▁▁▂▃▅▆▇██
val_loss,████▇▆▄▃▂▁

0,1
epoch,10.0
test_accuracy,0.524
test_loss,14922.33667
train_accuracy,0.5263
train_loss,80576.37336
val_accuracy,0.51483
val_loss,8989.84692


[34m[1mwandb[0m: Agent Starting Run: adblzyb2 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,█▃▁▃▆
train_loss,▁█▂▃▅
val_accuracy,█▃▁▃▆
val_loss,▁█▂▃▅

0,1
epoch,5.0
test_accuracy,0.1451
test_loss,163574.77408
train_accuracy,0.14711
train_loss,883022.14641
val_accuracy,0.1535
val_loss,98211.5449


[34m[1mwandb[0m: Agent Starting Run: 1dj0obc4 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12310057887120116, max=1.…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▄▆▇█
train_loss,█▆▄▂▁
val_accuracy,▁▄▆▇█
val_loss,█▆▄▂▁

0,1
epoch,5.0
test_accuracy,0.6397
test_loss,10611.88005
train_accuracy,0.64552
train_loss,56728.1228
val_accuracy,0.64383
val_loss,6335.66962


[34m[1mwandb[0m: Agent Starting Run: ipbzf4yt with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12305605786618445, max=1.…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
train_accuracy,▁▁▁▁▁▁▁▁▁▁
val_accuracy,▁▁▁▁▁▁▁▁▁▁

0,1
epoch,10.0
test_accuracy,0.1
test_loss,
train_accuracy,0.09981
train_loss,
val_accuracy,0.10167
val_loss,


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: j8t10k60 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12308944560007236, max=1.…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▁▃▃▄▄▆▇██
train_loss,██▇▆▆▅▄▃▂▁
val_accuracy,▁▁▃▃▄▅▆▇██
val_loss,██▇▆▆▅▄▃▂▁

0,1
epoch,10.0
test_accuracy,0.4985
test_loss,15906.29596
train_accuracy,0.49915
train_loss,85954.30366
val_accuracy,0.49367
val_loss,9558.41675


[34m[1mwandb[0m: Agent Starting Run: 9cgrrp2m with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12294340987163262, max=1.…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▁▃▄▅▅▆▆▆▆▆▇▇▇▇▇▇████
train_loss,█▆▅▄▄▃▃▃▃▃▂▂▂▂▂▂▁▁▁▁
val_accuracy,▁▄▄▅▆▆▆▆▆▇▇▇▇▇▇█████
val_loss,█▆▄▃▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁

0,1
epoch,20.0
test_accuracy,0.8763
test_loss,3397.46946
train_accuracy,0.9093
train_loss,13222.01397
val_accuracy,0.88417
val_loss,1958.66933


[34m[1mwandb[0m: Agent Starting Run: 7y4ywmaq with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.11972202674173117, max=1.…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▅▇▇█
train_loss,█▄▃▂▁
val_accuracy,▁▅▆▇█
val_loss,█▄▃▂▁

0,1
epoch,5.0
test_accuracy,0.844
test_loss,4334.70094
train_accuracy,0.86185
train_loss,21104.65013
val_accuracy,0.84533
val_loss,2566.09067


[34m[1mwandb[0m: Agent Starting Run: d1a0c51n with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.002 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.22668541794849256, max=1.…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▄▆▆▇▇▇▇▇█
train_loss,█▅▄▃▃▂▃▃▂▁
val_accuracy,▁▄▆▇▇▇▇▇▇█
val_loss,█▅▄▃▂▂▂▃▂▁

0,1
epoch,10.0
test_accuracy,0.8507
test_loss,4787.71346
train_accuracy,0.87311
train_loss,21071.85235
val_accuracy,0.85367
val_loss,2783.87235


[34m[1mwandb[0m: Agent Starting Run: vvftz4c6 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12307831434255742, max=1.…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▄▅▄▅▄▇▆▄▁▂▄▄▃▅▅▆▆▆▅█
train_loss,▇█▇▇▇▆▅▆▇▅▅▅▅▄▄▃▃▂▂▁
val_accuracy,▄▅▄▅▄▆▆▃▁▂▄▄▃▄▄▆▆▇▅█
val_loss,▇█▇▇▇▅▅▆▇▅▄▅▅▄▄▃▃▂▂▁

0,1
epoch,20.0
test_accuracy,0.1331
test_loss,48526.5272
train_accuracy,0.13617
train_loss,262182.16806
val_accuracy,0.13433
val_loss,29680.83013


[34m[1mwandb[0m: Agent Starting Run: djlu20pe with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12298933670703055, max=1.…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
train_accuracy,▁▁▁▁▁
val_accuracy,▁▁▁▁▁

0,1
epoch,5.0
test_accuracy,0.1
test_loss,
train_accuracy,0.09981
train_loss,
val_accuracy,0.10167
val_loss,


[34m[1mwandb[0m: Agent Starting Run: jlky7sd2 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.11962731827371012, max=1.…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▁▁▁▁▁▁▁▁▂▃▃▃▄▄▅▆▆▇██
train_loss,████████████▇▆▄▃▂▂▁▁
val_accuracy,▁▁▁▁▁▁▁▁▂▃▂▃▄▄▅▆▇▇██
val_loss,████████████▇▆▄▃▂▂▁▁

0,1
epoch,20.0
test_accuracy,0.4359
test_loss,13704.78249
train_accuracy,0.43806
train_loss,73491.31292
val_accuracy,0.43333
val_loss,8221.53103


[34m[1mwandb[0m: Agent Starting Run: sh5jggwn with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
train_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,20.0
test_accuracy,0.1
test_loss,
train_accuracy,0.09981
train_loss,
val_accuracy,0.10167
val_loss,


[34m[1mwandb[0m: Agent Starting Run: gm82czcq with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.011 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.9723956043956044, max=1.0…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▄▆▇█
train_loss,█▅▃▂▁
val_accuracy,▁▄▆▇█
val_loss,█▅▃▂▁

0,1
epoch,5.0
test_accuracy,0.8442
test_loss,4358.36422
train_accuracy,0.86322
train_loss,21033.51063
val_accuracy,0.84717
val_loss,2591.93903


[34m[1mwandb[0m: Agent Starting Run: cq03u0ws with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011288888888925108, max=1.0…

VBox(children=(Label(value='0.009 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.8655705996131529, max=1.0…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▅▃▄▁▁▆▆█▃▃
train_loss,██▆▅▅▂▂▂▁▂
val_accuracy,▇▄▅▁▂▆▆█▃▄
val_loss,██▆▅▅▂▂▂▁▂

0,1
epoch,10.0
test_accuracy,0.1532
test_loss,44472.90935
train_accuracy,0.15631
train_loss,239286.19297
val_accuracy,0.15267
val_loss,26586.79906


[34m[1mwandb[0m: Agent Starting Run: k7s9899c with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.002 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.22738105746314768, max=1.…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
train_accuracy,▁▁▁▁▁
val_accuracy,▁▁▁▁▁

0,1
epoch,5.0
test_accuracy,0.1
test_loss,
train_accuracy,0.09981
train_loss,
val_accuracy,0.10167
val_loss,


[34m[1mwandb[0m: Agent Starting Run: m1mfs2qp with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.11958146487294469, max=1.…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▃▅▇█
train_loss,█▇▆▄▁
val_accuracy,▁▃▆██
val_loss,█▇▆▃▁

0,1
epoch,5.0
test_accuracy,0.4282
test_loss,17684.88963
train_accuracy,0.42859
train_loss,95318.70918
val_accuracy,0.41467
val_loss,10638.43252


[34m[1mwandb[0m: Agent Starting Run: 4dl7ndbn with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.1230338094377147, max=1.0…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▄▆▆▇▇▇███
train_loss,█▅▄▃▃▂▂▂▁▁
val_accuracy,▁▄▆▆▇▇▇▇██
val_loss,█▅▄▃▂▂▂▁▁▁

0,1
epoch,10.0
test_accuracy,0.8593
test_loss,3924.82728
train_accuracy,0.88004
train_loss,17831.69783
val_accuracy,0.86567
val_loss,2254.92119


[34m[1mwandb[0m: Agent Starting Run: xkeysgta with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.1195957820738137, max=1.0…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▁▄▅▅▆▆▆▇▇▇▇▇▇███████
train_loss,█▅▄▃▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁
val_accuracy,▁▄▅▆▆▇▇▇▇▇▇▇▇▇▇█████
val_loss,█▅▄▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁

0,1
epoch,20.0
test_accuracy,0.8548
test_loss,4172.97617
train_accuracy,0.87533
train_loss,19221.91685
val_accuracy,0.86183
val_loss,2394.95732


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: ml7dedcs with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011288888888925108, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.11956426249670561, max=1.…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▁▃▄▅▅▆▆▆▇▇▇▇▇▇██████
train_loss,█▆▄▄▃▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁
val_accuracy,▁▃▄▅▅▆▆▇▇▇▇▇▇███████
val_loss,█▅▄▄▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁

0,1
epoch,20.0
test_accuracy,0.7833
test_loss,6038.20081
train_accuracy,0.79876
train_loss,30182.40049
val_accuracy,0.78733
val_loss,3589.59486


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: l87z4vui with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.1195957820738137, max=1.0…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▇██▅▅▅▅▄▄▁
train_loss,▄▇█▇▆▆▄▃▂▁
val_accuracy,▇▆█▆▆▅▄▃▄▁
val_loss,▄▇█▇▆▅▄▃▂▁

0,1
epoch,10.0
test_accuracy,0.0819
test_loss,162286.06253
train_accuracy,0.08304
train_loss,880151.40654
val_accuracy,0.08333
val_loss,97298.81098


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: hpcyp930 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12300045187528243, max=1.…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
train_accuracy,▁▁▁▁▁▁▁▁▁▁
val_accuracy,▁▁▁▁▁▁▁▁▁▁

0,1
epoch,10.0
test_accuracy,0.1
test_loss,
train_accuracy,0.09981
train_loss,
val_accuracy,0.10167
val_loss,


[34m[1mwandb[0m: Agent Starting Run: ema2d79q with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.1195957820738137, max=1.0…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▃▄▄▅▆▆▇██
train_loss,█▆▅▄▃▃▂▂▁▁
val_accuracy,▁▃▃▄▅▆▆▇██
val_loss,█▆▅▄▃▃▂▂▁▁

0,1
epoch,10.0
test_accuracy,0.7706
test_loss,6367.68817
train_accuracy,0.77913
train_loss,33027.87644
val_accuracy,0.78183
val_loss,3733.5156


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: gfu1n6f5 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12306718509811014, max=1.…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▅▆▇█
train_loss,█▄▃▂▁
val_accuracy,▁▅▆▇█
val_loss,█▄▃▂▁

0,1
epoch,5.0
test_accuracy,0.7672
test_loss,6613.56615
train_accuracy,0.77839
train_loss,33937.29036
val_accuracy,0.77467
val_loss,3903.84835


[34m[1mwandb[0m: Agent Starting Run: e3ymo2wa with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12304493264623452, max=1.…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
train_accuracy,▁▁▁▁▁▁▁▁▁▁
val_accuracy,▁▁▁▁▁▁▁▁▁▁

0,1
epoch,10.0
test_accuracy,0.1
test_loss,
train_accuracy,0.09981
train_loss,
val_accuracy,0.10167
val_loss,


[34m[1mwandb[0m: Agent Starting Run: 11a41j0m with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▅▆▇█
train_loss,█▄▂▁▁
val_accuracy,▁▅▆▇█
val_loss,█▄▂▁▁

0,1
epoch,5.0
test_accuracy,0.7453
test_loss,6448.63861
train_accuracy,0.7542
train_loss,33805.78575
val_accuracy,0.7485
val_loss,3852.48233


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 4tbnpac7 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.11984853821768228, max=1.…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▇▁█▁
train_loss,▄▂▁▃█
val_accuracy,▂█▂█▁
val_loss,▄▂▁▃█

0,1
epoch,5.0
test_accuracy,0.1
test_loss,194790.04567
train_accuracy,0.10063
train_loss,1050446.11667
val_accuracy,0.0945
val_loss,117780.95763


[34m[1mwandb[0m: Agent Starting Run: 1ji32tby with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.11952226222885746, max=1.…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▄█▇▅▅▃▅▅▅▆▄▅▄▆▃▅▆▆▁▅
train_loss,▇▅█▇▆▃▅▂▂▁▁▁▂▁▁▁▁▁▁▁
val_accuracy,▄█▇▅▅▃▅▆▅▆▄▄▄▆▃▅▆▅▁▅
val_loss,█▅█▇▆▃▅▂▂▁▁▁▂▁▁▁▁▁▁▁

0,1
epoch,20.0
test_accuracy,0.0895
test_loss,23022.12966
train_accuracy,0.08969
train_loss,124355.41471
val_accuracy,0.08583
val_loss,13818.03301


[34m[1mwandb[0m: Agent Starting Run: tquvedni with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12300045187528243, max=1.…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▅▇██
train_loss,█▃▂▁▁
val_accuracy,▁▆▇██
val_loss,█▃▂▁▁

0,1
epoch,5.0
test_accuracy,0.8126
test_loss,5319.8702
train_accuracy,0.82389
train_loss,27126.90862
val_accuracy,0.81217
val_loss,3154.22561


[34m[1mwandb[0m: Agent Starting Run: dg2zr1az with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.11966939242064539, max=1.…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▄▆▇██▆▇▆█
train_loss,█▆▃▂▁▁▃▂▂▁
val_accuracy,▁▃▆▇██▆▇▆█
val_loss,█▆▃▂▁▁▃▂▂▁

0,1
epoch,10.0
test_accuracy,0.7405
test_loss,7477.80076
train_accuracy,0.74926
train_loss,39394.21041
val_accuracy,0.73983
val_loss,4502.08813


[34m[1mwandb[0m: Agent Starting Run: ad585zb0 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.002 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.22654602951510894, max=1.…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▂▄▁▂▁▄▆▄▇█▇▃▄▆▇▅▆▆█▇
train_loss,█▆██▇▅▅▄▃▃▄▃▃▂▂▂▂▂▁▁
val_accuracy,▂▄▁▂▁▃▆▅██▇▄▄▇▆▅▇▆█▇
val_loss,█▆██▇▆▅▄▃▃▄▃▃▃▂▂▂▂▁▁

0,1
epoch,20.0
test_accuracy,0.1586
test_loss,29615.94102
train_accuracy,0.15426
train_loss,160364.89931
val_accuracy,0.15983
val_loss,17715.13796


[34m[1mwandb[0m: Agent Starting Run: c17r3dp3 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.002 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.22648144891858624, max=1.…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▂▆▇█
train_loss,█▅▃▂▁
val_accuracy,▁▂▆▇█
val_loss,█▅▃▂▁

0,1
epoch,5.0
test_accuracy,0.4193
test_loss,14838.02028
train_accuracy,0.41772
train_loss,79919.16498
val_accuracy,0.40717
val_loss,8912.07589


[34m[1mwandb[0m: Agent Starting Run: y763uyk8 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12302268824007954, max=1.…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▁▃▄▅▆▆▆▇▇▇▇▇▇███████
train_loss,█▅▄▃▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁
val_accuracy,▁▃▅▅▆▆▆▇▇▇▇█████████
val_loss,█▅▄▃▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁

0,1
epoch,20.0
test_accuracy,0.85
test_loss,4149.02227
train_accuracy,0.87137
train_loss,19427.65037
val_accuracy,0.8575
val_loss,2385.62078


[34m[1mwandb[0m: Agent Starting Run: 86u3ze9g with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.11967991558213155, max=1.…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,█▁▁▁▁▁▁▁▁▁
train_loss,▁▃▆██▅▆▄▃▂
val_accuracy,▁█████████
val_loss,▁▃▆██▅▆▄▃▂

0,1
epoch,10.0
test_accuracy,0.1
test_loss,185243.24308
train_accuracy,0.0997
train_loss,1000819.12443
val_accuracy,0.10267
val_loss,110536.83915


[34m[1mwandb[0m: Agent Starting Run: 6faq5dt0 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.11957476717624319, max=1.…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▁▃▄▅▅▅▆▆▆▇▇▇▇▇▇▇████
train_loss,█▆▅▄▄▃▃▃▃▂▂▂▂▂▂▁▁▁▁▁
val_accuracy,▁▃▄▅▅▆▆▆▇▇▇▇▇███████
val_loss,█▆▅▄▄▃▃▃▂▂▂▂▂▂▂▁▁▁▁▁

0,1
epoch,20.0
test_accuracy,0.8207
test_loss,5038.95294
train_accuracy,0.83089
train_loss,25609.70798
val_accuracy,0.82117
val_loss,2995.32611


[34m[1mwandb[0m: Agent Starting Run: gu0s3unr with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.1200811716957826, max=1.0…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
train_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,20.0
test_accuracy,0.1
test_loss,
train_accuracy,0.09981
train_loss,
val_accuracy,0.10167
val_loss,


[34m[1mwandb[0m: Agent Starting Run: y5zmgpzg with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.009 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.8659630606860158, max=1.0…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▆▇█▆
train_loss,█▅▃▁▄
val_accuracy,▁▆▇█▆
val_loss,█▅▃▁▄

0,1
epoch,5.0
test_accuracy,0.3844
test_loss,16970.88767
train_accuracy,0.38885
train_loss,91050.8019
val_accuracy,0.38083
val_loss,10157.00425


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: rj9maxtk with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_loss,▁▄▆▆▆▆▇▇▇▇▇█████████
val_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_loss,▁▄▆▆▆▆▇▇▇▇▇▇████████

0,1
epoch,20.0
test_accuracy,0.1
test_loss,23054.23335
train_accuracy,0.1003
train_loss,124490.34019
val_accuracy,0.09733
val_loss,13835.05974


[34m[1mwandb[0m: Agent Starting Run: 14hp1seb with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12297822354748351, max=1.…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
train_accuracy,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_accuracy,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_loss,▁

0,1
epoch,20.0
test_accuracy,0.1
test_loss,
train_accuracy,0.09981
train_loss,
val_accuracy,0.10167
val_loss,


[34m[1mwandb[0m: Agent Starting Run: a6bxas6e with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.1197009674582234, max=1.0…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▅▄▇█
train_loss,█▇▄▂▁
val_accuracy,▁▅▄▇█
val_loss,█▇▄▂▁

0,1
epoch,5.0
test_accuracy,0.3216
test_loss,16392.22676
train_accuracy,0.32576
train_loss,88489.19652
val_accuracy,0.326
val_loss,9832.70785


[34m[1mwandb[0m: Agent Starting Run: surs4p8t with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.11963783403656822, max=1.…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▂▄▆▆▆▇▇██
train_loss,█▇▆▄▃▃▂▂▁▁
val_accuracy,▁▂▄▆▆▆▇▇██
val_loss,█▇▆▄▃▃▂▂▁▁

0,1
epoch,10.0
test_accuracy,0.7016
test_loss,8608.06908
train_accuracy,0.70935
train_loss,45848.30232
val_accuracy,0.69867
val_loss,5136.98839


[34m[1mwandb[0m: Agent Starting Run: 5ghbyeof with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

VBox(children=(Label(value='0.002 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.22695285010555946, max=1.…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,█▆▆▁▁▁▃▂▂▂▄▂▅▃▂▂▂▃▃▁
train_loss,▁▃▂█▅▅▄▅▄▄▃▄▃▃▄▄▅▄▃▄
val_accuracy,█▆▆▁▂▁▃▁▂▂▄▂▅▃▃▂▂▃▃▁
val_loss,▁▃▂█▅▅▄▆▄▄▃▄▃▃▄▄▅▄▃▄

0,1
epoch,20.0
test_accuracy,0.1633
test_loss,21480.37345
train_accuracy,0.16085
train_loss,115807.43607
val_accuracy,0.151
val_loss,12949.83419


[34m[1mwandb[0m: Agent Starting Run: d85z05jg with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12292118582791034, max=1.…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▁▃▄▅▅▅▆▆▆▇▇▇▇▇▇▇████
train_loss,█▆▅▄▄▃▃▃▃▂▂▂▂▂▂▁▁▁▁▁
val_accuracy,▁▃▄▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
val_loss,█▆▅▄▄▃▃▃▃▂▂▂▂▂▂▁▁▁▁▁

0,1
epoch,20.0
test_accuracy,0.8004
test_loss,5552.68807
train_accuracy,0.81387
train_loss,27919.71748
val_accuracy,0.80767
val_loss,3285.99795


[34m[1mwandb[0m: Agent Starting Run: ns03qk8g with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12296564195298372, max=1.…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▄▅▆▇▇▇███
train_loss,█▅▃▃▂▂▂▁▁▁
val_accuracy,▁▄▅▆▇▇▇███
val_loss,█▅▃▃▂▂▁▁▁▁

0,1
epoch,10.0
test_accuracy,0.8195
test_loss,5109.93647
train_accuracy,0.83226
train_loss,25952.97241
val_accuracy,0.82367
val_loss,3040.92031


[34m[1mwandb[0m: Agent Starting Run: lbfzazk4 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.11965887110954809, max=1.…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▆▇██
train_loss,█▄▂▂▁
val_accuracy,▁▆▇██
val_loss,█▄▂▂▁

0,1
epoch,5.0
test_accuracy,0.7427
test_loss,7582.39208
train_accuracy,0.7482
train_loss,39998.57395
val_accuracy,0.74517
val_loss,4488.59309


[34m[1mwandb[0m: Agent Starting Run: a6xrn398 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.11961680435928985, max=1.…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▄▂▂▃▄▆▇▇█
train_loss,█▄▆▆▅▄▂▂▂▁
val_accuracy,▁▄▂▃▃▄▆▆▇█
val_loss,█▄▇▆▅▄▂▂▂▁

0,1
epoch,10.0
test_accuracy,0.7935
test_loss,6096.80062
train_accuracy,0.80485
train_loss,31395.22042
val_accuracy,0.79517
val_loss,3618.05365


[34m[1mwandb[0m: Agent Starting Run: f7si58z7 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12017660044150111, max=1.…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
train_accuracy,▁▁▁▁▁
val_accuracy,▁▁▁▁▁

0,1
epoch,5.0
test_accuracy,0.1
test_loss,
train_accuracy,0.09981
train_loss,
val_accuracy,0.10167
val_loss,


[34m[1mwandb[0m: Agent Starting Run: 67q42g2g with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.1200811716957826, max=1.0…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
train_accuracy,▁▁▁▁▁
val_accuracy,▁▁▁▁▁

0,1
epoch,5.0
test_accuracy,0.1
test_loss,
train_accuracy,0.09981
train_loss,
val_accuracy,0.10167
val_loss,


[34m[1mwandb[0m: Agent Starting Run: ss3rif2l with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▁▂▄▄▅▆▆▆▆▇▇▇▇▇██████
train_loss,█▇▇▆▅▄▄▃▃▃▂▂▂▂▂▁▁▁▁▁
val_accuracy,▁▃▄▅▅▆▆▆▆▇▇▇▇▇▇█████
val_loss,█▇▇▆▅▄▄▃▃▃▂▂▂▂▂▁▁▁▁▁

0,1
epoch,20.0
test_accuracy,0.6195
test_loss,10840.17668
train_accuracy,0.62044
train_loss,57903.80644
val_accuracy,0.61333
val_loss,6510.21547


[34m[1mwandb[0m: Agent Starting Run: w8vjkt4d with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.003 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.2545246881040239, max=1.0…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▁▂▃▄▄▅▆▆▆▇▇▇▇▇▇█████
train_loss,█▇▆▅▅▄▄▃▃▃▂▂▂▂▂▂▁▁▁▁
val_accuracy,▁▂▃▄▄▅▆▆▆▇▇▇▇▇██████
val_loss,█▇▆▅▅▄▄▃▃▃▂▂▂▂▂▂▁▁▁▁

0,1
epoch,20.0
test_accuracy,0.6264
test_loss,10458.98648
train_accuracy,0.63519
train_loss,55754.99765
val_accuracy,0.62833
val_loss,6215.90718


[34m[1mwandb[0m: Agent Starting Run: 4heweinw with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.002 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.22612976965007914, max=1.…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▄▅▅▆▇▇▇██
train_loss,█▅▄▃▃▂▂▂▁▁
val_accuracy,▁▄▆▆▇▇▇███
val_loss,█▅▄▃▃▂▂▂▁▁

0,1
epoch,10.0
test_accuracy,0.8668
test_loss,3688.23504
train_accuracy,0.88976
train_loss,16272.83086
val_accuracy,0.86817
val_loss,2145.80803


[34m[1mwandb[0m: Agent Starting Run: kkgrvja7 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

VBox(children=(Label(value='0.002 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.22742917659518136, max=1.…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
train_accuracy,▁▁▁▁▁
val_accuracy,▁▁▁▁▁

0,1
epoch,5.0
test_accuracy,0.1
test_loss,
train_accuracy,0.09981
train_loss,
val_accuracy,0.10167
val_loss,


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: i2lbptwy with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12295600325232632, max=1.…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▁▃▄▅▆▇▇██
train_loss,███▇▅▄▃▂▁▁
val_accuracy,▁▁▃▄▅▆▇▇██
val_loss,███▇▅▄▃▂▁▁

0,1
epoch,10.0
test_accuracy,0.6277
test_loss,10055.74801
train_accuracy,0.63283
train_loss,53835.02773
val_accuracy,0.62
val_loss,6024.80996


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: vwkxqp5m with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011277777777932999, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.11926524872561083, max=1.…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▃▆█▃▂▅▃▃▁▁▂▄▃▄▂▁▃▃▄▂
train_loss,▆▄▁▆█▆▅▄█▇▅▃▄▅▇▆▆▄▄▆
val_accuracy,▃▆█▃▂▅▃▃▁▂▂▄▃▃▂▁▃▃▄▂
val_loss,▆▄▁▆█▆▅▄█▇▅▃▄▅▇▆▆▄▄▆

0,1
epoch,20.0
test_accuracy,0.1227
test_loss,22566.41821
train_accuracy,0.12341
train_loss,121890.52572
val_accuracy,0.12433
val_loss,13532.86286


[34m[1mwandb[0m: Agent Starting Run: ep3mdxsd with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12302268824007954, max=1.…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,█▄▁▁▅▆▄▃▅▃
train_loss,█▃▂▂▂▂▂▂▁▁
val_accuracy,█▄▁▂▅▆▄▃▅▃
val_loss,█▃▂▂▂▂▂▂▁▁

0,1
epoch,10.0
test_accuracy,0.045
test_loss,22978.94906
train_accuracy,0.04276
train_loss,124135.72345
val_accuracy,0.04183
val_loss,13791.89564


[34m[1mwandb[0m: Agent Starting Run: qvpjp9l9 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.11978524907586692, max=1.…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▃▅▅▆▇▇▇██
train_loss,█▆▅▄▃▃▂▂▁▁
val_accuracy,▁▃▅▆▆▇▇▇██
val_loss,█▆▄▄▃▃▂▂▁▁

0,1
epoch,10.0
test_accuracy,0.5637
test_loss,12750.43684
train_accuracy,0.57228
train_loss,68136.61125
val_accuracy,0.5745
val_loss,7517.36523


[34m[1mwandb[0m: Agent Starting Run: c19dp7t7 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12269606071557644, max=1.…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▃▄▅▆▇▇▇██
train_loss,█▇▆▄▄▃▂▂▁▁
val_accuracy,▁▃▄▅▆▇▇▇██
val_loss,█▇▆▅▄▃▂▂▁▁

0,1
epoch,10.0
test_accuracy,0.5634
test_loss,12809.93568
train_accuracy,0.57159
train_loss,68583.20832
val_accuracy,0.56883
val_loss,7638.11568


[34m[1mwandb[0m: Agent Starting Run: iyto5z68 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.11963783403656822, max=1.…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▃▄▅▆▆▇▇██
train_loss,█▆▄▄▃▂▂▂▁▁
val_accuracy,▁▃▄▅▆▆▇▇██
val_loss,█▆▄▄▃▂▂▂▁▁

0,1
epoch,10.0
test_accuracy,0.441
test_loss,16434.30589
train_accuracy,0.45096
train_loss,87978.8721
val_accuracy,0.44083
val_loss,9813.84471


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: l2cs5383 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.002 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.22717678100263852, max=1.…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▄▅▆▆▇▇▇██
train_loss,█▄▃▂▂▂▁▁▁▁
val_accuracy,▁▄▅▆▆▇▇███
val_loss,█▄▃▂▂▂▁▁▁▁

0,1
epoch,10.0
test_accuracy,0.8083
test_loss,5347.28845
train_accuracy,0.82091
train_loss,27491.74437
val_accuracy,0.8115
val_loss,3173.12969


[34m[1mwandb[0m: Agent Starting Run: uxaax94t with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.1197325591624879, max=1.0…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,█▆▁▂▁▄▄▅▃▄
train_loss,▁▂█▄▃▃▂▁▃▂
val_accuracy,█▆▁▂▁▄▅▆▃▄
val_loss,▁▁█▄▃▃▂▁▃▂

0,1
epoch,10.0
test_accuracy,0.3029
test_loss,17371.65615
train_accuracy,0.31107
train_loss,93095.78951
val_accuracy,0.3065
val_loss,10321.3594


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 6fiw2o81 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.11953942164015119, max=1.…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▅▆▇█
train_loss,█▄▃▂▁
val_accuracy,▁▅▆▇█
val_loss,█▄▃▂▁

0,1
epoch,5.0
test_accuracy,0.6123
test_loss,11536.69237
train_accuracy,0.62256
train_loss,60539.86616
val_accuracy,0.61783
val_loss,6832.05592


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 91sx6cux with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.002 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.22742917659518136, max=1.…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
train_accuracy,▁▁▁▁▁
val_accuracy,▁▁▁▁▁

0,1
epoch,5.0
test_accuracy,0.1
test_loss,
train_accuracy,0.09981
train_loss,
val_accuracy,0.10167
val_loss,


[34m[1mwandb[0m: Agent Starting Run: u5gsextd with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12300045187528243, max=1.…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▃▄▅▆▆▇▇██
train_loss,█▆▅▄▃▃▂▂▁▁
val_accuracy,▁▄▅▆▆▇▇▇▇█
val_loss,█▆▄▃▃▂▂▂▁▁

0,1
epoch,10.0
test_accuracy,0.8762
test_loss,3503.21267
train_accuracy,0.89831
train_loss,15076.28274
val_accuracy,0.87667
val_loss,2052.89888


[34m[1mwandb[0m: Agent Starting Run: udy1st3f with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12010236498411578, max=1.…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
train_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,20.0
test_accuracy,0.1
test_loss,
train_accuracy,0.09981
train_loss,
val_accuracy,0.10167
val_loss,


[34m[1mwandb[0m: Agent Starting Run: xijfcqtx with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.11978524907586692, max=1.…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▆▇█▂
train_loss,█▄▂▁▅
val_accuracy,▁▆██▁
val_loss,█▄▂▁▅

0,1
epoch,5.0
test_accuracy,0.4068
test_loss,15831.21156
train_accuracy,0.40781
train_loss,85019.78228
val_accuracy,0.391
val_loss,9587.94193


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 17anj5xo with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.002 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.22659274903203097, max=1.…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▃▄▅▆▇▇▇██
train_loss,█▅▄▃▂▂▂▁▁▁
val_accuracy,▁▃▄▅▆▇▇▇██
val_loss,█▅▄▃▂▂▂▁▁▁

0,1
epoch,10.0
test_accuracy,0.5057
test_loss,13792.50438
train_accuracy,0.50861
train_loss,73729.75011
val_accuracy,0.5105
val_loss,8318.01817


[34m[1mwandb[0m: Agent Starting Run: l84hxhhd with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.11975362956445226, max=1.…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▄▆▇█
train_loss,█▅▃▂▁
val_accuracy,▁▅▆▇█
val_loss,█▄▃▂▁

0,1
epoch,5.0
test_accuracy,0.8722
test_loss,3604.17011
train_accuracy,0.89137
train_loss,16053.99471
val_accuracy,0.8705
val_loss,2093.19856


[34m[1mwandb[0m: Agent Starting Run: 6gsgqk58 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

VBox(children=(Label(value='0.009 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.8652021089630931, max=1.0…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▃▁▆██
train_loss,██▇▃▁
val_accuracy,▃▁▆██
val_loss,██▇▄▁

0,1
epoch,5.0
test_accuracy,0.326
test_loss,18370.46119
train_accuracy,0.33131
train_loss,99024.90156
val_accuracy,0.32733
val_loss,10998.96428


[34m[1mwandb[0m: Agent Starting Run: lw6vxpga with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.11962731827371012, max=1.…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▁█▁▁▅▅▅▅▅
train_loss,█▃▃▁▁▁▁▁▁▁
val_accuracy,██▁██▄▄▄▄▄
val_loss,█▃▃▁▁▁▁▁▁▁

0,1
epoch,10.0
test_accuracy,0.1
test_loss,23028.07636
train_accuracy,0.1003
train_loss,124350.37731
val_accuracy,0.09733
val_loss,13818.08085


[34m[1mwandb[0m: Agent Starting Run: 4vxn72vd with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12304493264623452, max=1.…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▆▅██▁
train_loss,█▄▁▁▅
val_accuracy,▅▅█▇▁
val_loss,█▄▁▁▅

0,1
epoch,5.0
test_accuracy,0.0763
test_loss,90803.92524
train_accuracy,0.0777
train_loss,491956.42118
val_accuracy,0.0745
val_loss,54467.02898


[34m[1mwandb[0m: Agent Starting Run: mompocg2 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011288888888278356, max=1.0…

VBox(children=(Label(value='0.002 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.22766163888153831, max=1.…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
train_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,20.0
test_accuracy,0.1
test_loss,
train_accuracy,0.09981
train_loss,
val_accuracy,0.10167
val_loss,


[34m[1mwandb[0m: Agent Starting Run: p58p64ll with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.009 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.8690203000882613, max=1.0…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
train_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,20.0
test_accuracy,0.1
test_loss,
train_accuracy,0.09981
train_loss,
val_accuracy,0.10167
val_loss,


[34m[1mwandb[0m: Agent Starting Run: up6q8tuc with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.11963783403656822, max=1.…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▄▆▇█
train_loss,█▅▃▂▁
val_accuracy,▁▄▇██
val_loss,█▅▃▂▁

0,1
epoch,5.0
test_accuracy,0.8406
test_loss,4465.38148
train_accuracy,0.85367
train_loss,21881.67243
val_accuracy,0.83783
val_loss,2654.3754


[34m[1mwandb[0m: Agent Starting Run: y90amm3v with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.1230115690527838, max=1.0…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▁▂▃▄▄▅▅▆▆▇▇▇▇▇▇█████
train_loss,█▅▄▃▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁
val_accuracy,▁▂▃▄▄▅▅▆▆▇▇▇▇▇██████
val_loss,█▅▄▃▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁

0,1
epoch,20.0
test_accuracy,0.5658
test_loss,12289.73878
train_accuracy,0.57439
train_loss,65673.20869
val_accuracy,0.566
val_loss,7383.99955


[34m[1mwandb[0m: Agent Starting Run: ygrvmcoz with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.11967991558213155, max=1.…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▁▂▃▄▅▆▆▇█
train_loss,█▄▃▃▂▂▂▁▁▁
val_accuracy,▁▁▂▃▄▄▅▆▇█
val_loss,█▄▃▃▂▂▂▁▁▁

0,1
epoch,10.0
test_accuracy,0.2954
test_loss,20703.96962
train_accuracy,0.29881
train_loss,111544.99008
val_accuracy,0.301
val_loss,12433.59151


[34m[1mwandb[0m: Agent Starting Run: r9podn4x with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12304493264623452, max=1.…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_loss,▁▂▂▂▃▃▃▃▄▄▅▅▅▆▆▆▇▇██
val_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_loss,██▇▆▆▅▅▄▄▃▃▃▂▂▂▂▂▁▁▁

0,1
epoch,20.0
test_accuracy,0.1
test_loss,23048.56987
train_accuracy,0.1003
train_loss,124459.55888
val_accuracy,0.09733
val_loss,13831.86092


[34m[1mwandb[0m: Agent Starting Run: uazibb2v with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.11961680435928985, max=1.…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▅▆▇█
train_loss,█▅▃▂▁
val_accuracy,▁▅▆▇█
val_loss,█▅▃▂▁

0,1
epoch,5.0
test_accuracy,0.7952
test_loss,5635.39118
train_accuracy,0.81063
train_loss,28347.27519
val_accuracy,0.80617
val_loss,3274.64253


[34m[1mwandb[0m: Agent Starting Run: mrmi205x with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.11966939242064539, max=1.…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▄▅▆▆▇▇▇██
train_loss,█▅▄▃▂▂▂▁▁▁
val_accuracy,▁▄▅▆▆▇▇███
val_loss,█▅▄▃▂▂▂▁▁▁

0,1
epoch,10.0
test_accuracy,0.7585
test_loss,6632.69224
train_accuracy,0.77293
train_loss,34612.38801
val_accuracy,0.76867
val_loss,3909.0076


[34m[1mwandb[0m: Agent Starting Run: xwetfc6b with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.009 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.8647461795187071, max=1.0…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▁▁▁▁▁▂▂▂▂▃▃▄▄▅▅▆▆▇██
train_loss,██▇▇▇▇▆▆▆▅▅▅▄▄▄▃▃▂▂▁
val_accuracy,▁▁▁▁▂▂▂▂▂▃▃▃▄▅▅▆▆▇██
val_loss,██▇▇▇▇▆▆▆▅▅▅▄▄▄▃▃▂▂▁

0,1
epoch,20.0
test_accuracy,0.3155
test_loss,22850.10928
train_accuracy,0.31774
train_loss,123385.26068
val_accuracy,0.31333
val_loss,13711.50109


[34m[1mwandb[0m: Agent Starting Run: 7n1svfw0 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12294489611562782, max=1.…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▁▃▄▅▅▆▆▆▇▇▇▇▇▇▇█████
train_loss,█▆▅▄▄▃▃▃▂▂▂▂▂▂▁▁▁▁▁▁
val_accuracy,▁▃▄▅▅▆▆▆▆▇▇▇▇▇▇▇████
val_loss,█▆▅▄▃▃▃▃▂▂▂▂▂▂▁▁▁▁▁▁

0,1
epoch,20.0
test_accuracy,0.7879
test_loss,5941.38308
train_accuracy,0.80724
train_loss,28978.79188
val_accuracy,0.7835
val_loss,3629.38648


[34m[1mwandb[0m: Agent Starting Run: wae4oyr5 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.002 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.226999823415151, max=1.0)…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
train_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,20.0
test_accuracy,0.1
test_loss,
train_accuracy,0.09981
train_loss,
val_accuracy,0.10167
val_loss,


[34m[1mwandb[0m: Agent Starting Run: zgapeylw with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12304493264623452, max=1.…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
train_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,20.0
test_accuracy,0.1
test_loss,
train_accuracy,0.09981
train_loss,
val_accuracy,0.10167
val_loss,


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 13s2g2dk with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.002 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.22664557518235345, max=1.…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▂▄▅▆▆▇█▇▆
train_loss,▁▇█▇▆▄▃▂▂▂
val_accuracy,▁▁▃▅▅▆▇█▆▆
val_loss,▁▇█▇▆▅▃▁▂▂

0,1
epoch,10.0
test_accuracy,0.0947
test_loss,168641.2294
train_accuracy,0.09435
train_loss,911747.63962
val_accuracy,0.09867
val_loss,100346.04725


[34m[1mwandb[0m: Agent Starting Run: s7a7xf43 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.1230338094377147, max=1.0…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▂▅▆▆▇▇▇██
train_loss,█▇▆▆▅▄▃▂▂▁
val_accuracy,▁▂▅▆▆▇▇▇██
val_loss,█▇▆▆▅▄▃▂▂▁

0,1
epoch,10.0
test_accuracy,0.5372
test_loss,14058.66154
train_accuracy,0.5403
train_loss,75585.12322
val_accuracy,0.5395
val_loss,8426.34592


[34m[1mwandb[0m: Agent Starting Run: vkgojclo with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.1197430934365652, max=1.0…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▇██▆▇▇▆▆▇▇▆▆▇▇▅▃▁▁▁▂
train_loss,▃▁▁▂▂▁▂▂▂▁▂▂▂▂▃█▇▇▇▇
val_accuracy,▇██▆▇▇▆▆▆▇▆▆▇▇▅▃▁▁▁▂
val_loss,▃▂▁▂▂▁▂▂▂▁▂▂▂▂▃█▇▇▇▇

0,1
epoch,20.0
test_accuracy,0.2441
test_loss,22152.16635
train_accuracy,0.23965
train_loss,119195.36068
val_accuracy,0.233
val_loss,13291.11925


[34m[1mwandb[0m: Agent Starting Run: 5b6c5f3n with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.002 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.22740910695375927, max=1.…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
train_accuracy,▁▁▁▁▁▁▁▁▁▁
val_accuracy,▁▁▁▁▁▁▁▁▁▁

0,1
epoch,10.0
test_accuracy,0.1
test_loss,
train_accuracy,0.09981
train_loss,
val_accuracy,0.10167
val_loss,


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 555ezhbo with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.11958527370178368, max=1.…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▄▁▃▄▃▂▂▃▅█
train_loss,█▆▅▄▃▂▂▂▁▁
val_accuracy,▄▆▆▅▁▄▅▅▆█
val_loss,█▆▅▄▃▂▂▂▁▁

0,1
epoch,10.0
test_accuracy,0.1008
test_loss,80831.43067
train_accuracy,0.09583
train_loss,439386.63617
val_accuracy,0.09867
val_loss,49078.32955


[34m[1mwandb[0m: Agent Starting Run: 4wrjrdn1 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12300045187528243, max=1.…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▁▁▁▁▁▁▁▁▁
train_loss,█▇▇▆▅▄▄▃▂▁
val_accuracy,▁▁▁▁▁▁▁▁▁▁
val_loss,█▇▇▆▅▄▄▃▂▁

0,1
epoch,10.0
test_accuracy,0.1
test_loss,23018.93767
train_accuracy,0.09978
train_loss,124300.38284
val_accuracy,0.102
val_loss,13812.79346


[34m[1mwandb[0m: Agent Starting Run: ti3xxbsz with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.11958527370178368, max=1.…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▇█▁███████
train_loss,█▁▁▁▁▁▁▁▁▁
val_accuracy,█▄▁▄▄▄▄▄▄▄
val_loss,█▁▁▁▁▁▁▁▁▁

0,1
epoch,10.0
test_accuracy,0.1
test_loss,23023.78301
train_accuracy,0.10031
train_loss,124327.66285
val_accuracy,0.09733
val_loss,13814.79745


[34m[1mwandb[0m: Agent Starting Run: 3n4mi0hw with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
train_accuracy,▁▁▁▁▁▁▁▁▁▁
val_accuracy,▁▁▁▁▁▁▁▁▁▁

0,1
epoch,10.0
test_accuracy,0.1
test_loss,
train_accuracy,0.09981
train_loss,
val_accuracy,0.10167
val_loss,


[34m[1mwandb[0m: Agent Starting Run: cnq8t6n2 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.1230115690527838, max=1.0…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
train_accuracy,▁▁▁▁▁
val_accuracy,▁▁▁▁▁

0,1
epoch,5.0
test_accuracy,0.1
test_loss,
train_accuracy,0.09981
train_loss,
val_accuracy,0.10167
val_loss,


[34m[1mwandb[0m: Agent Starting Run: zj7yk11f with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.11972202674173117, max=1.…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▁▃▃▅▃▅▄▃▇█▄▃▃▆▇▁▆▁▅▃
train_loss,█▄▃▂▂▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁
val_accuracy,▁▃▃▅▃▅▄▄▇█▄▄▃▆▇▂▆▁▅▃
val_loss,█▄▃▂▂▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,20.0
test_accuracy,0.1295
test_loss,22853.73592
train_accuracy,0.13365
train_loss,123116.87296
val_accuracy,0.1305
val_loss,13696.99789


[34m[1mwandb[0m: Agent Starting Run: gljp22o0 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.11967991558213155, max=1.…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▄▆▁█▃▄▃▂▄
train_loss,█▂▅▆▄▄▅▃▁▃
val_accuracy,▁▄▆▁█▃▄▃▂▄
val_loss,█▂▅▆▄▄▅▃▁▃

0,1
epoch,10.0
test_accuracy,0.1046
test_loss,55395.82474
train_accuracy,0.10461
train_loss,298736.94887
val_accuracy,0.10183
val_loss,33408.28952


[34m[1mwandb[0m: Agent Starting Run: xh92oshc with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
train_accuracy,▁▁▁▁▁▁▁▁▁▁
val_accuracy,▁▁▁▁▁▁▁▁▁▁

0,1
epoch,10.0
test_accuracy,0.1
test_loss,
train_accuracy,0.09981
train_loss,
val_accuracy,0.10167
val_loss,


[34m[1mwandb[0m: Agent Starting Run: 43wi22uo with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12296711239609685, max=1.…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▇▆▅▆█▅▆▅▁▃
train_loss,█▃▂▁▁▁▁▁▁▁
val_accuracy,▄▇█▇▄▇▇▂▁▂
val_loss,█▃▂▁▁▁▁▁▁▁

0,1
epoch,10.0
test_accuracy,0.0983
test_loss,23028.61012
train_accuracy,0.09887
train_loss,124351.19648
val_accuracy,0.09483
val_loss,13819.72009


[34m[1mwandb[0m: Agent Starting Run: duzra8ln with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.11963783403656822, max=1.…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▅▆▇█
train_loss,█▄▂▂▁
val_accuracy,▁▅▆██
val_loss,█▄▂▂▁

0,1
epoch,5.0
test_accuracy,0.733
test_loss,7695.10432
train_accuracy,0.73587
train_loss,40687.4927
val_accuracy,0.72983
val_loss,4563.018


[34m[1mwandb[0m: Agent Starting Run: s4qaqgtu with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12302268824007954, max=1.…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁██▇█
train_loss,█▂▂▁▁
val_accuracy,▁██▇▇
val_loss,█▂▂▁▁

0,1
epoch,5.0
test_accuracy,0.1
test_loss,23013.44719
train_accuracy,0.09956
train_loss,124273.28091
val_accuracy,0.09517
val_loss,13808.81339


[34m[1mwandb[0m: Agent Starting Run: imqdlex7 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.009 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.8664202745512144, max=1.0…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▃▅▆▇▇▇███
train_loss,█▅▃▃▂▁▁▁▁▁
val_accuracy,▁▃▅▆▇▇▇▇▇█
val_loss,█▄▃▂▁▁▂▂▂▂

0,1
epoch,10.0
test_accuracy,0.8372
test_loss,5345.34511
train_accuracy,0.86126
train_loss,23029.05962
val_accuracy,0.839
val_loss,3022.19409


[34m[1mwandb[0m: Agent Starting Run: mcd65kqe with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.11985909291061206, max=1.…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,█████████▁
train_loss,█▄▂▁▁▂▂▃▄▅
val_accuracy,▁▁▁▁▁▁▁▁▁█
val_loss,▃▂▁▁▂▃▄▅▇█

0,1
epoch,10.0
test_accuracy,0.1
test_loss,23034.85893
train_accuracy,0.09978
train_loss,124387.31486
val_accuracy,0.102
val_loss,13821.84126


[34m[1mwandb[0m: Agent Starting Run: yayiyo46 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.002 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.22699278550061588, max=1.…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▁▃▅▅▆▆▆▇▇▇▇▇▇▇██████
train_loss,█▅▄▄▃▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁
val_accuracy,▁▃▄▅▆▆▆▇▇▇▇▇▇▇██████
val_loss,█▅▄▄▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁

0,1
epoch,20.0
test_accuracy,0.7211
test_loss,7966.79239
train_accuracy,0.73124
train_loss,40671.16702
val_accuracy,0.7295
val_loss,4693.98184


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: jp315ymq with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12308944560007236, max=1.…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▃▅▆▇▇▇███
train_loss,█▆▅▅▄▃▃▂▂▁
val_accuracy,▁▃▅▆▇▇████
val_loss,█▆▆▅▄▃▃▂▂▁

0,1
epoch,10.0
test_accuracy,0.5588
test_loss,15097.91237
train_accuracy,0.56189
train_loss,81275.32867
val_accuracy,0.54867
val_loss,9066.61643


[34m[1mwandb[0m: Agent Starting Run: d05sdg10 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.002 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.22680866044710438, max=1.…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▂▁███▁▂▆▆▆▆▆▆▆▆▆▆▆▆▆
train_loss,█▇▄▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_accuracy,▇▇▁▁▁█▇▄▄▄▄▄▄▄▄▄▄▄▄▄
val_loss,█▇▄▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,20.0
test_accuracy,0.1
test_loss,23025.70055
train_accuracy,0.10031
train_loss,124341.64295
val_accuracy,0.0975
val_loss,13816.56267


[34m[1mwandb[0m: Agent Starting Run: u7rsv5ee with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.1197641675466385, max=1.0…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▃▅▆▇▇████
train_loss,█▇▅▄▃▂▂▁▁▁
val_accuracy,▁▃▅▆▇▇▇███
val_loss,█▇▅▄▃▂▂▁▁▁

0,1
epoch,10.0
test_accuracy,0.7371
test_loss,7700.34965
train_accuracy,0.74331
train_loss,40916.87992
val_accuracy,0.7395
val_loss,4599.35288


[34m[1mwandb[0m: Agent Starting Run: j2s250tf with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.003 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.253606615059817, max=1.0)…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▇▁▇▇█▁▁▁▁▁
train_loss,▁▂▂▁▁█▃▃▃▃
val_accuracy,▇▁▇▇█▂▁▁▂▂
val_loss,▁▂▂▁▁█▃▃▃▃

0,1
epoch,10.0
test_accuracy,0.1
test_loss,23037.47244
train_accuracy,0.09978
train_loss,124401.5094
val_accuracy,0.102
val_loss,13823.48059


[34m[1mwandb[0m: Agent Starting Run: ouiw86br with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.002 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.22697281604645025, max=1.…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▁▄▅▆▆▇▇▇▇▇▇▇████████
train_loss,█▅▄▃▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁
val_accuracy,▁▄▅▆▆▇▇▇▇▇▇▇████████
val_loss,█▅▄▃▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁

0,1
epoch,20.0
test_accuracy,0.6851
test_loss,8459.27058
train_accuracy,0.69913
train_loss,44487.25973
val_accuracy,0.689
val_loss,5112.00223


[34m[1mwandb[0m: Agent Starting Run: vstzeb5d with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12012356575463372, max=1.…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
train_accuracy,▁▁▁▁▁
val_accuracy,▁▁▁▁▁

0,1
epoch,5.0
test_accuracy,0.1
test_loss,
train_accuracy,0.09981
train_loss,
val_accuracy,0.10167
val_loss,


[34m[1mwandb[0m: Agent Starting Run: 0fxihvxr with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12302268824007954, max=1.…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▅▅▆▆▆▇▇▇█
train_loss,█▅▄▃▂▂▂▁▁▁
val_accuracy,▁▅▅▆▆▆▆▇▇█
val_loss,█▅▄▃▂▂▂▁▁▁

0,1
epoch,10.0
test_accuracy,0.4794
test_loss,13059.54895
train_accuracy,0.48463
train_loss,69941.20158
val_accuracy,0.48067
val_loss,7822.98826


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: dig8hw6c with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12009176740492368, max=1.…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
train_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,20.0
test_accuracy,0.1
test_loss,
train_accuracy,0.09981
train_loss,
val_accuracy,0.10167
val_loss,


[34m[1mwandb[0m: Agent Starting Run: 5zg0o5if with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.11947641219362207, max=1.…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▃▅▆▇▇▇███
train_loss,█▆▅▄▃▂▂▂▁▁
val_accuracy,▁▃▅▆▇▇▇███
val_loss,█▆▅▄▃▂▂▂▁▁

0,1
epoch,10.0
test_accuracy,0.6753
test_loss,9382.87556
train_accuracy,0.68513
train_loss,49355.58365
val_accuracy,0.67683
val_loss,5612.09288


[34m[1mwandb[0m: Agent Starting Run: 3e73u9ng with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.002 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.22659752131493363, max=1.…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▁▄▅▅▆▆▆▇▇▇▇▇▇███████
train_loss,█▅▄▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁
val_accuracy,▁▄▅▅▆▆▇▇▇▇▇▇████████
val_loss,█▅▄▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁

0,1
epoch,20.0
test_accuracy,0.8284
test_loss,4855.78595
train_accuracy,0.84335
train_loss,24288.39997
val_accuracy,0.82783
val_loss,2877.74958


[34m[1mwandb[0m: Agent Starting Run: urccig0i with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12302268824007954, max=1.…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
train_accuracy,▁▁▁▁▁
val_accuracy,▁▁▁▁▁

0,1
epoch,5.0
test_accuracy,0.1
test_loss,
train_accuracy,0.09981
train_loss,
val_accuracy,0.10167
val_loss,


[34m[1mwandb[0m: Agent Starting Run: iegoe91k with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.009 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.869238919300724, max=1.0)…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
train_accuracy,▁▁▁▁▁
val_accuracy,▁▁▁▁▁

0,1
epoch,5.0
test_accuracy,0.1
test_loss,
train_accuracy,0.09981
train_loss,
val_accuracy,0.10167
val_loss,


[34m[1mwandb[0m: Agent Starting Run: tz1ptqw4 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.002 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.22658929042469006, max=1.…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▅▆▇█
train_loss,█▃▂▁▁
val_accuracy,▁▅▇▇█
val_loss,█▃▂▁▁

0,1
epoch,5.0
test_accuracy,0.7668
test_loss,6260.28627
train_accuracy,0.77302
train_loss,32809.91772
val_accuracy,0.76683
val_loss,3750.02227


[34m[1mwandb[0m: Agent Starting Run: f7qsld8e with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12301013024602026, max=1.…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
train_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,20.0
test_accuracy,0.1
test_loss,
train_accuracy,0.09981
train_loss,
val_accuracy,0.10167
val_loss,


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 26r7ddjs with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

VBox(children=(Label(value='0.002 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.22668541794849256, max=1.…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁█████████
train_loss,█▁▁▁▁▁▁▁▁▁
val_accuracy,█▁▁▁▁▁▁▁▁▁
val_loss,█▁▁▁▁▁▁▁▁▁

0,1
epoch,10.0
test_accuracy,0.1
test_loss,23026.59637
train_accuracy,0.1003
train_loss,124342.99049
val_accuracy,0.09733
val_loss,13816.57842


[34m[1mwandb[0m: Agent Starting Run: e2mmp7k0 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12308944560007236, max=1.…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▁▂▄▄▄▅███
train_loss,█▆▅▄▃▃▂▂▁▁
val_accuracy,▁▁▁▄▄▄▆██▇
val_loss,█▆▅▄▃▃▂▂▁▁

0,1
epoch,10.0
test_accuracy,0.3286
test_loss,20497.51902
train_accuracy,0.3273
train_loss,110540.87805
val_accuracy,0.31167
val_loss,12317.69097


[34m[1mwandb[0m: Agent Starting Run: 3g1fve96 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12308944560007236, max=1.…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▄▆▇█
train_loss,█▄▃▂▁
val_accuracy,▁▄▆▇█
val_loss,█▄▃▂▁

0,1
epoch,5.0
test_accuracy,0.8436
test_loss,4410.39465
train_accuracy,0.86041
train_loss,21169.54016
val_accuracy,0.8455
val_loss,2573.96253


[34m[1mwandb[0m: Agent Starting Run: 3ugmt173 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▆▆▅█
train_loss,█▅▂▂▁
val_accuracy,▁▅▆▆█
val_loss,█▅▂▂▁

0,1
epoch,5.0
test_accuracy,0.1492
test_loss,53338.82616
train_accuracy,0.14515
train_loss,289231.81231
val_accuracy,0.13967
val_loss,32694.60061


[34m[1mwandb[0m: Agent Starting Run: fmw19iev with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.119816885289198, max=1.0)…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▁▄▅▆▆▇▇▇▇▇▇█████████
train_loss,█▄▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁
val_accuracy,▁▄▅▆▇▇▇▇▇▇██████████
val_loss,█▄▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,20.0
test_accuracy,0.8536
test_loss,4132.34668
train_accuracy,0.8765
train_loss,18991.00206
val_accuracy,0.8635
val_loss,2352.78638


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 9rm93p1t with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011277777777932999, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▄▆▇█
train_loss,█▅▃▂▁
val_accuracy,▁▄▆▇█
val_loss,█▅▃▂▁

0,1
epoch,5.0
test_accuracy,0.5131
test_loss,15961.34905
train_accuracy,0.52011
train_loss,85971.87141
val_accuracy,0.50783
val_loss,9608.8404


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: hmcyi3lj with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011288888888278356, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.11971149617380597, max=1.…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▁▃█▇
train_loss,█▅▄▂▁
val_accuracy,▁▁▃█▇
val_loss,█▅▄▃▁

0,1
epoch,5.0
test_accuracy,0.1755
test_loss,22332.87274
train_accuracy,0.17737
train_loss,120594.94437
val_accuracy,0.1775
val_loss,13398.69286


[34m[1mwandb[0m: Agent Starting Run: 5z8vnu4n with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12296711239609685, max=1.…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▅▇▇█
train_loss,█▄▂▂▁
val_accuracy,▁▅▇▇█
val_loss,█▄▂▂▁

0,1
epoch,5.0
test_accuracy,0.7128
test_loss,7758.48466
train_accuracy,0.72265
train_loss,40328.43648
val_accuracy,0.72467
val_loss,4515.67115


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 3mr6nan0 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.11965887110954809, max=1.…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▅▆▇█
train_loss,█▃▂▁▁
val_accuracy,▁▅▆▇█
val_loss,█▃▂▁▁

0,1
epoch,5.0
test_accuracy,0.2424
test_loss,21468.32011
train_accuracy,0.2402
train_loss,116134.51466
val_accuracy,0.23983
val_loss,12861.46652


[34m[1mwandb[0m: Agent Starting Run: mmakwuum with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12296711239609685, max=1.…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▄▆▇▇████▇
train_loss,█▄▂▁▁▁▂▂▂▄
val_accuracy,▁▄▆▇▇███▇▇
val_loss,▇▃▁▁▁▂▃▄▅█

0,1
epoch,10.0
test_accuracy,0.8522
test_loss,5324.70167
train_accuracy,0.87417
train_loss,22248.77365
val_accuracy,0.85817
val_loss,3075.49738


[34m[1mwandb[0m: Agent Starting Run: y68ye19c with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.011 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.9725714285714285, max=1.0…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▄▅▆▇▇▇███
train_loss,█▅▄▃▂▂▂▁▁▁
val_accuracy,▁▄▅▆▆▇▇▇██
val_loss,█▅▄▃▂▂▁▁▁▁

0,1
epoch,10.0
test_accuracy,0.8364
test_loss,4858.20701
train_accuracy,0.85607
train_loss,22137.49918
val_accuracy,0.83867
val_loss,2823.86365


[34m[1mwandb[0m: Agent Starting Run: fknfla2q with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.11965511173675876, max=1.…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▁▂▃▄▄▅▅▇▅▅▄▆█▇▅▅▇▇▇█
train_loss,█▇▅▅▅▄▃▃▃▃▃▃▂▂▂▂▁▁▁▁
val_accuracy,▁▂▃▄▄▅▅▇▅▅▄▆▇▇▅▆█▇▇█
val_loss,█▇▅▅▅▄▃▃▃▃▃▃▂▂▂▂▁▁▁▁

0,1
epoch,20.0
test_accuracy,0.1746
test_loss,49899.08407
train_accuracy,0.17431
train_loss,270502.58731
val_accuracy,0.177
val_loss,29828.79489


[34m[1mwandb[0m: Agent Starting Run: vplsgzbe with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.11984853821768228, max=1.…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▁▃▄▅▆▆▇▇▇▇▇▇████████
train_loss,█▇▆▄▃▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁
val_accuracy,▁▃▄▅▆▆▇▇▇▇▇█████████
val_loss,█▇▆▄▃▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁

0,1
epoch,20.0
test_accuracy,0.793
test_loss,5884.35252
train_accuracy,0.80087
train_loss,30587.2415
val_accuracy,0.79
val_loss,3511.73719


[34m[1mwandb[0m: Agent Starting Run: wa1tptq1 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▇▇█▅
train_loss,█▁▁▁▄
val_accuracy,▁▇▇█▅
val_loss,▇▁▂▃█

0,1
epoch,5.0
test_accuracy,0.8425
test_loss,4879.68046
train_accuracy,0.85889
train_loss,21496.24383
val_accuracy,0.84517
val_loss,2785.56953


[34m[1mwandb[0m: Agent Starting Run: lcnvwfs0 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.002 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.22682948256171484, max=1.…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▄▅▆▇▇▇███
train_loss,█▅▄▃▃▂▂▂▁▁
val_accuracy,▁▄▅▆▇▇████
val_loss,█▅▄▃▂▂▂▁▁▁

0,1
epoch,10.0
test_accuracy,0.8608
test_loss,3839.37046
train_accuracy,0.88456
train_loss,17133.1034
val_accuracy,0.86617
val_loss,2226.27334


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: fmxra5ey with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12278481012658228, max=1.…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▁▁▁▁▁▁▁▁▁
train_loss,█▇▆▆▂▂▂▂▁▁
val_accuracy,▁▁▁▁▁▁▁▁▁▁
val_loss,█▇▆▅▃▃▂▂▁▁

0,1
epoch,10.0
test_accuracy,0.1
test_loss,23050.09742
train_accuracy,0.1003
train_loss,124466.63527
val_accuracy,0.09733
val_loss,13833.94928


[34m[1mwandb[0m: Agent Starting Run: iefv971q with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.11971149617380597, max=1.…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▁▃▄▅▅▅▆▆▆▆▇▇▇▇▇▇████
train_loss,█▆▅▄▄▃▃▃▃▂▂▂▂▂▂▂▁▁▁▁
val_accuracy,▁▄▅▅▆▆▆▇▇▇▇▇▇▇▇▇████
val_loss,█▆▅▄▄▃▃▃▃▂▂▂▂▂▂▁▁▁▁▁

0,1
epoch,20.0
test_accuracy,0.8577
test_loss,3980.78735
train_accuracy,0.87607
train_loss,18704.66595
val_accuracy,0.85783
val_loss,2363.96399


[34m[1mwandb[0m: Agent Starting Run: 0dg70mmq with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12015538094817692, max=1.…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
train_accuracy,▁▁▁▁▁
val_accuracy,▁▁▁▁▁

0,1
epoch,5.0
test_accuracy,0.1
test_loss,
train_accuracy,0.09981
train_loss,
val_accuracy,0.10167
val_loss,


[34m[1mwandb[0m: Agent Starting Run: 2mj13568 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.002 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.22663736263736264, max=1.…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▅▆▇█
train_loss,█▆▃▂▁
val_accuracy,▁▅▆██
val_loss,█▆▃▂▁

0,1
epoch,5.0
test_accuracy,0.7071
test_loss,10517.90758
train_accuracy,0.70963
train_loss,56134.39788
val_accuracy,0.70067
val_loss,6299.27775


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 68goy0t4 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.11947252747252747, max=1.…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▄▅▆▆▇▇███
train_loss,█▅▄▃▂▂▂▁▁▁
val_accuracy,▁▄▅▆▆▇▇▇██
val_loss,█▅▄▃▂▂▂▁▁▁

0,1
epoch,10.0
test_accuracy,0.7275
test_loss,7379.97764
train_accuracy,0.74102
train_loss,37871.54063
val_accuracy,0.73233
val_loss,4403.09086


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: dppi44y4 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.011 MB of 0.011 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▄▇██
train_loss,█▅▃▂▁
val_accuracy,▁▄███
val_loss,█▅▃▂▁

0,1
epoch,5.0
test_accuracy,0.4199
test_loss,12957.49332
train_accuracy,0.428
train_loss,69304.25137
val_accuracy,0.42183
val_loss,7771.98


[34m[1mwandb[0m: Agent Starting Run: wh90by18 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.11963406052076003, max=1.…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▄▆▇█
train_loss,█▄▃▂▁
val_accuracy,▁▄▆▇█
val_loss,█▄▃▂▁

0,1
epoch,5.0
test_accuracy,0.8352
test_loss,4748.63806
train_accuracy,0.84739
train_loss,23847.94117
val_accuracy,0.83717
val_loss,2803.75577


[34m[1mwandb[0m: Agent Starting Run: ythl48n2 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

VBox(children=(Label(value='0.009 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.865506329113924, max=1.0)…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▁▃▄▅▅▆▆▆▆▇▇▇▇▇██████
train_loss,██▆▅▄▃▃▃▃▂▂▂▂▂▂▁▁▁▁▁
val_accuracy,▁▃▄▅▅▆▆▆▆▇▇▇▇▇██████
val_loss,██▆▅▄▃▃▃▃▂▂▂▂▂▂▁▁▁▁▁

0,1
epoch,20.0
test_accuracy,0.7288
test_loss,7717.05098
train_accuracy,0.73213
train_loss,40904.12132
val_accuracy,0.72633
val_loss,4598.22715


[34m[1mwandb[0m: Agent Starting Run: 1ml5ctkh with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12005998588567396, max=1.…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
train_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,20.0
test_accuracy,0.1
test_loss,
train_accuracy,0.09981
train_loss,
val_accuracy,0.10167
val_loss,


[34m[1mwandb[0m: Agent Starting Run: 6lyfy3gd with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.002 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.22690921873626857, max=1.…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▄▆▇█
train_loss,█▄▃▂▁
val_accuracy,▁▅▆▇█
val_loss,█▄▃▂▁

0,1
epoch,5.0
test_accuracy,0.6011
test_loss,12767.9703
train_accuracy,0.6083
train_loss,66989.18451
val_accuracy,0.60233
val_loss,7858.32057


[34m[1mwandb[0m: Agent Starting Run: hc1n5mfq with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12302268824007954, max=1.…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▅▆▇█
train_loss,█▅▃▂▁
val_accuracy,▁▅▇▇█
val_loss,█▄▃▂▁

0,1
epoch,5.0
test_accuracy,0.7969
test_loss,5776.27477
train_accuracy,0.80919
train_loss,29758.82134
val_accuracy,0.80067
val_loss,3382.10789


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 6q8op1de with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.009 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.8684837258534004, max=1.0…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
train_accuracy,▁▁▁▁▁▁▁▁▁▁
val_accuracy,▁▁▁▁▁▁▁▁▁▁

0,1
epoch,10.0
test_accuracy,0.1
test_loss,
train_accuracy,0.09981
train_loss,
val_accuracy,0.10167
val_loss,


[34m[1mwandb[0m: Agent Starting Run: 5atcvxih with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12291007681879801, max=1.…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▆▇██
train_loss,█▆▃▂▁
val_accuracy,▁▅▇██
val_loss,█▆▃▂▁

0,1
epoch,5.0
test_accuracy,0.6572
test_loss,9053.4717
train_accuracy,0.66459
train_loss,48127.18376
val_accuracy,0.66117
val_loss,5433.30455


[34m[1mwandb[0m: Agent Starting Run: mreew2sy with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12004939578371704, max=1.…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
train_accuracy,▁▁▁▁▁▁▁▁▁▁
val_accuracy,▁▁▁▁▁▁▁▁▁▁

0,1
epoch,10.0
test_accuracy,0.1
test_loss,
train_accuracy,0.09981
train_loss,
val_accuracy,0.10167
val_loss,


[34m[1mwandb[0m: Agent Starting Run: sjplkjcp with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.1198274344074661, max=1.0…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▅▆▇█
train_loss,█▇▅▃▁
val_accuracy,▁▅▆▇█
val_loss,█▇▅▃▁

0,1
epoch,5.0
test_accuracy,0.5916
test_loss,12816.61558
train_accuracy,0.59202
train_loss,69046.58441
val_accuracy,0.587
val_loss,7689.4684


[34m[1mwandb[0m: Agent Starting Run: z0uou6x0 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12302268824007954, max=1.…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,█▇▁▃▇
train_loss,█▃▃▁▃
val_accuracy,██▁▃█
val_loss,█▃▃▁▂

0,1
epoch,5.0
test_accuracy,0.1185
test_loss,51934.60794
train_accuracy,0.11783
train_loss,282153.2383
val_accuracy,0.1165
val_loss,30816.49764


[34m[1mwandb[0m: Agent Starting Run: k3ims5pk with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.11966939242064539, max=1.…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▅▆▇█
train_loss,█▅▃▂▁
val_accuracy,▁▅▆▇█
val_loss,█▅▃▂▁

0,1
epoch,5.0
test_accuracy,0.7786
test_loss,6000.29513
train_accuracy,0.80078
train_loss,29591.48605
val_accuracy,0.78617
val_loss,3507.97157


[34m[1mwandb[0m: Agent Starting Run: ih4qp30k with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▅▇▇█
train_loss,█▄▂▁▁
val_accuracy,▁▅▇▇█
val_loss,█▄▂▁▁

0,1
epoch,5.0
test_accuracy,0.7294
test_loss,7219.65028
train_accuracy,0.7335
train_loss,38425.70688
val_accuracy,0.73
val_loss,4294.7599


[34m[1mwandb[0m: Agent Starting Run: fqvx1ovj with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
train_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,20.0
test_accuracy,0.1
test_loss,
train_accuracy,0.09981
train_loss,
val_accuracy,0.10167
val_loss,


[34m[1mwandb[0m: Agent Starting Run: jp767435 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.11983398092546803, max=1.…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
train_accuracy,▁▁▁▁▁
val_accuracy,▁▁▁▁▁

0,1
epoch,5.0
test_accuracy,0.1
test_loss,
train_accuracy,0.09981
train_loss,
val_accuracy,0.10167
val_loss,


[34m[1mwandb[0m: Agent Starting Run: t1xg9wir with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12302268824007954, max=1.…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▄▅▆▇▇▇███
train_loss,█▅▄▃▃▂▂▂▁▁
val_accuracy,▁▄▅▆▇▇▇███
val_loss,█▅▄▃▃▂▂▁▁▁

0,1
epoch,10.0
test_accuracy,0.8607
test_loss,3854.46104
train_accuracy,0.88146
train_loss,17779.40332
val_accuracy,0.86433
val_loss,2241.51275


[34m[1mwandb[0m: Agent Starting Run: m02ytqoz with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12300045187528243, max=1.…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▃▆███████
train_loss,▁▃▄▆▇███▇█
val_accuracy,▁▃▆███████
val_loss,▁▃▄▆▇▇████

0,1
epoch,10.0
test_accuracy,0.1005
test_loss,386177.7731
train_accuracy,0.09998
train_loss,2083158.83186
val_accuracy,0.10167
val_loss,232327.95678


[34m[1mwandb[0m: Agent Starting Run: kjiuqy16 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.002 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.22704081632653061, max=1.…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▃▆█▅▁
train_loss,█▃▂▁▁
val_accuracy,▂▅█▅▁
val_loss,█▃▂▁▁

0,1
epoch,5.0
test_accuracy,0.1863
test_loss,22575.5181
train_accuracy,0.18376
train_loss,121933.77441
val_accuracy,0.182
val_loss,13550.34147


[34m[1mwandb[0m: Agent Starting Run: 4m5guqjz with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.1230115690527838, max=1.0…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▃▅▇█
train_loss,█▅▄▂▁
val_accuracy,▁▃▅▇█
val_loss,█▅▄▂▁

0,1
epoch,5.0
test_accuracy,0.3164
test_loss,20159.80094
train_accuracy,0.32078
train_loss,108464.41613
val_accuracy,0.31333
val_loss,12114.43972


[34m[1mwandb[0m: Agent Starting Run: 2x4irqz9 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.002 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.22646153846153846, max=1.…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▄▁▂▆█
train_loss,█▃▂▁▁
val_accuracy,▄▁▂▆█
val_loss,█▃▂▁▁

0,1
epoch,5.0
test_accuracy,0.128
test_loss,23059.98861
train_accuracy,0.12993
train_loss,124485.46848
val_accuracy,0.12917
val_loss,13850.54157


[34m[1mwandb[0m: Agent Starting Run: 4ojnqaq7 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.11966939242064539, max=1.…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▃▁▁▁▁▁▃▄▆▇▇████▇▇▇▇▇
train_loss,█▅▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁
val_accuracy,▃▁▁▁▁▁▃▄▆▇▇███▇▇▇▇▇▇
val_loss,█▅▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,20.0
test_accuracy,0.141
test_loss,22778.02202
train_accuracy,0.14076
train_loss,123063.73898
val_accuracy,0.13833
val_loss,13665.93513


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: jwhnszym with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12304493264623452, max=1.…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▅▆▇█
train_loss,█▄▃▂▁
val_accuracy,▁▅▆▇█
val_loss,█▄▃▂▁

0,1
epoch,5.0
test_accuracy,0.5503
test_loss,13296.09133
train_accuracy,0.55544
train_loss,70564.41151
val_accuracy,0.55167
val_loss,7909.46079


[34m[1mwandb[0m: Agent Starting Run: alrj9orq with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.11956426249670561, max=1.…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▃▄▅▆▆▇▇██
train_loss,█▅▃▃▂▂▂▁▁▁
val_accuracy,▁▃▄▅▆▇▇▇██
val_loss,█▅▃▃▂▂▂▁▁▁

0,1
epoch,10.0
test_accuracy,0.4582
test_loss,15498.89745
train_accuracy,0.46406
train_loss,83183.47583
val_accuracy,0.46417
val_loss,9291.12045


[34m[1mwandb[0m: Agent Starting Run: qeqwhkg3 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12298933670703055, max=1.…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▅▇▇█
train_loss,█▅▃▂▁
val_accuracy,▁▅▆▇█
val_loss,█▅▃▂▁

0,1
epoch,5.0
test_accuracy,0.1326
test_loss,40560.73641
train_accuracy,0.12969
train_loss,217283.10754
val_accuracy,0.12217
val_loss,24362.59077


[34m[1mwandb[0m: Agent Starting Run: 6qzc72vf with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12010236498411578, max=1.…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
train_accuracy,▁▁▁▁▁▁▁▁▁▁
val_accuracy,▁▁▁▁▁▁▁▁▁▁

0,1
epoch,10.0
test_accuracy,0.1
test_loss,
train_accuracy,0.09981
train_loss,
val_accuracy,0.10167
val_loss,


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: esqhl6ls with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.002 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.22689739985945187, max=1.…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▄█▄▁▅
train_loss,▁▅▁█▆
val_accuracy,▄█▄▁▅
val_loss,▁▅▁█▆

0,1
epoch,5.0
test_accuracy,0.0903
test_loss,250427.42188
train_accuracy,0.09246
train_loss,1361274.83384
val_accuracy,0.09383
val_loss,152817.60507


[34m[1mwandb[0m: Agent Starting Run: zd02odsc with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.11961680435928985, max=1.…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▂▂▁▁▃▄▆▆▇█▇██▇▇▅▅▆▆█
train_loss,██▇▇▇▇▇▇▇▇▇▆▆▅▄▄▃▂▂▁
val_accuracy,▂▂▁▁▃▄▆▆▇█▇██▇▇▅▅▆▇█
val_loss,██▇▇▇▇▇▇▇▇▇▆▆▅▄▄▃▂▂▁

0,1
epoch,20.0
test_accuracy,0.2605
test_loss,19795.96996
train_accuracy,0.25643
train_loss,106956.45594
val_accuracy,0.25167
val_loss,11905.80546


[34m[1mwandb[0m: Agent Starting Run: zilufgmf with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12310057887120116, max=1.…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁████
train_loss,█▂▁▁▁
val_accuracy,█▁▁▁▁
val_loss,█▂▁▁▁

0,1
epoch,5.0
test_accuracy,0.1
test_loss,23038.01448
train_accuracy,0.1003
train_loss,124403.61672
val_accuracy,0.09733
val_loss,13824.46892


[34m[1mwandb[0m: Agent Starting Run: eoj1u7a5 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.011 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.9723149938477764, max=1.0…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▁▃▄▅▆▆▇▇▇▇▇▇▇███████
train_loss,█▇▆▅▄▃▃▂▂▂▂▂▁▁▁▁▁▁▁▁
val_accuracy,▁▃▄▅▆▆▇▇▇▇▇▇▇███████
val_loss,█▇▆▅▄▃▃▂▂▂▂▂▁▁▁▁▁▁▁▁

0,1
epoch,20.0
test_accuracy,0.761
test_loss,6596.6227
train_accuracy,0.76967
train_loss,34661.31902
val_accuracy,0.76667
val_loss,3892.46677


[34m[1mwandb[0m: Agent Starting Run: 4hivwnly with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12295600325232632, max=1.…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▁▁▃▃▄▅▆▆▆▇▇▇▇▇▇▇████
train_loss,███▆▅▄▃▃▃▂▂▂▂▂▂▁▁▁▁▁
val_accuracy,▁▁▃▃▄▅▆▆▆▇▇▇▇▇▇▇████
val_loss,███▆▅▄▃▃▃▂▂▂▂▂▂▁▁▁▁▁

0,1
epoch,20.0
test_accuracy,0.7294
test_loss,7277.19609
train_accuracy,0.73911
train_loss,38234.43791
val_accuracy,0.73033
val_loss,4288.16598


[34m[1mwandb[0m: Agent Starting Run: str7flzy with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.11937274248964849, max=1.…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▁▃▄▅▆▆▆▇▇▇▇▇▇███████
train_loss,█▅▄▄▃▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁
val_accuracy,▁▃▄▅▆▆▇▇▇▇▇▇▇███████
val_loss,█▅▄▄▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁

0,1
epoch,20.0
test_accuracy,0.8463
test_loss,4271.52138
train_accuracy,0.86889
train_loss,20234.3629
val_accuracy,0.856
val_loss,2477.99047


[34m[1mwandb[0m: Agent Starting Run: agglswig with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.11970777220315112, max=1.…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▁▂▃▂▂▂▃▂▄▆▆▅▅▇▇▅▄▆██
train_loss,██▇▅▄▅▄▄▃▃▄▄▃▂▂▂▂▁▁▁
val_accuracy,▁▂▃▂▂▂▃▃▄▆▆▅▅▆▇▅▄▆█▇
val_loss,██▇▅▄▅▄▄▃▃▄▄▃▂▂▂▂▁▁▁

0,1
epoch,20.0
test_accuracy,0.2446
test_loss,24472.22643
train_accuracy,0.24606
train_loss,131315.2516
val_accuracy,0.2355
val_loss,14504.9598


[34m[1mwandb[0m: Agent Starting Run: jgzsqk3i with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.1197009674582234, max=1.0…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▁▃▄▅▅▅▆▆▆▆▆▇▇▇▇▇████
train_loss,█▆▅▄▄▃▃▃▂▂▂▂▂▂▂▁▁▁▁▁
val_accuracy,▁▃▄▅▅▅▆▆▆▆▆▇▇▇▇█████
val_loss,█▆▅▄▄▃▃▃▂▂▂▂▂▂▂▁▁▁▁▁

0,1
epoch,20.0
test_accuracy,0.7524
test_loss,6809.67126
train_accuracy,0.7585
train_loss,35876.2616
val_accuracy,0.75683
val_loss,4044.76731


[34m[1mwandb[0m: Agent Starting Run: 34opjykw with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.1230115690527838, max=1.0…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
train_accuracy,▁▁▁▁▁
val_accuracy,▁▁▁▁▁

0,1
epoch,5.0
test_accuracy,0.1
test_loss,
train_accuracy,0.09981
train_loss,
val_accuracy,0.10167
val_loss,


[34m[1mwandb[0m: Agent Starting Run: 4tiogj9v with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.002 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.22612137203166227, max=1.…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▁▃▄▅▆▆▆▇▇▇▇▇▇███████
train_loss,█▄▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁
val_accuracy,▁▃▄▅▆▆▆▇▇▇▇▇▇███████
val_loss,█▄▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁

0,1
epoch,20.0
test_accuracy,0.8274
test_loss,4882.95362
train_accuracy,0.84194
train_loss,24484.95113
val_accuracy,0.83217
val_loss,2870.23524


[34m[1mwandb[0m: Agent Starting Run: j1ulknt0 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12297822354748351, max=1.…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁█▂▇▇▇▇▇▇▆
train_loss,█▂▁▁▁▁▁▁▁▁
val_accuracy,█▁█▁▁▁▁▁▁▁
val_loss,█▂▁▁▁▁▁▁▁▁

0,1
epoch,10.0
test_accuracy,0.0998
test_loss,23028.35006
train_accuracy,0.10009
train_loss,124352.36726
val_accuracy,0.09733
val_loss,13817.42035


[34m[1mwandb[0m: Agent Starting Run: za1wyr3k with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12297676100913284, max=1.…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▅▆▇█
train_loss,█▄▃▂▁
val_accuracy,▁▅▆▇█
val_loss,█▄▃▂▁

0,1
epoch,5.0
test_accuracy,0.7437
test_loss,7088.98534
train_accuracy,0.7597
train_loss,36585.35551
val_accuracy,0.753
val_loss,4134.83932


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: ub5sb3nw with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12310057887120116, max=1.…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▃▅▂█
train_loss,█▁▁▂▂
val_accuracy,▁▃▅▁█
val_loss,█▁▁▂▂

0,1
epoch,5.0
test_accuracy,0.0987
test_loss,23353.57814
train_accuracy,0.10031
train_loss,126039.58792
val_accuracy,0.09567
val_loss,14024.41591


[34m[1mwandb[0m: Agent Starting Run: szoz597v with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.002 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.22707308503162332, max=1.…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▁▁▁▁▁▁▁▁▁
train_loss,█▇▇▆▅▄▄▃▂▁
val_accuracy,▁▁▁▁▁▁▁▁▁▁
val_loss,█▇▇▆▅▄▄▃▂▁

0,1
epoch,10.0
test_accuracy,0.1
test_loss,23028.19762
train_accuracy,0.1003
train_loss,124351.45348
val_accuracy,0.09733
val_loss,13817.81194


[34m[1mwandb[0m: Agent Starting Run: fbeg6do6 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12289744981009224, max=1.…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
train_accuracy,██████▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_loss,▄▁█▆▃
val_accuracy,██████▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_loss,▂▁▅▄▃█

0,1
epoch,20.0
test_accuracy,0.1
test_loss,
train_accuracy,0.09981
train_loss,
val_accuracy,0.10167
val_loss,


[34m[1mwandb[0m: Agent Starting Run: ei992r5z with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.011 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.9724125812686698, max=1.0…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▁▃▄▅▅▆▆▆▇▇▇▇▇▇██████
train_loss,█▆▅▄▃▃▃▃▂▂▂▂▂▂▁▁▁▁▁▁
val_accuracy,▁▃▄▅▅▆▆▆▇▇▇▇▇▇██████
val_loss,█▆▅▄▃▃▃▃▂▂▂▂▂▂▁▁▁▁▁▁

0,1
epoch,20.0
test_accuracy,0.6768
test_loss,8960.96149
train_accuracy,0.69354
train_loss,46222.78312
val_accuracy,0.68817
val_loss,5273.95558


[34m[1mwandb[0m: Agent Starting Run: f1d135fm with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12004939578371704, max=1.…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
train_accuracy,▁▁▁▁▁▁▁▁▁▁
val_accuracy,▁▁▁▁▁▁▁▁▁▁

0,1
epoch,10.0
test_accuracy,0.1
test_loss,
train_accuracy,0.09981
train_loss,
val_accuracy,0.10167
val_loss,


[34m[1mwandb[0m: Agent Starting Run: ukvp8agt with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12293229684534032, max=1.…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▄▆▇█
train_loss,█▄▃▂▁
val_accuracy,▁▅▆██
val_loss,█▄▃▂▁

0,1
epoch,5.0
test_accuracy,0.8168
test_loss,5154.98266
train_accuracy,0.83207
train_loss,26247.65056
val_accuracy,0.819
val_loss,3043.80211


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 7uses09n with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.002 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.22633383141425684, max=1.…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▃▆▇▇▇████
train_loss,█▅▄▃▃▂▂▂▁▁
val_accuracy,▁▃▆▇▇█████
val_loss,█▅▄▃▃▂▂▂▁▁

0,1
epoch,10.0
test_accuracy,0.6092
test_loss,11311.08442
train_accuracy,0.61302
train_loss,60537.46735
val_accuracy,0.61117
val_loss,6773.60186


[34m[1mwandb[0m: Agent Starting Run: 4k4pfokf with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.1230338094377147, max=1.0…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
train_accuracy,▁▁▁▁▁
val_accuracy,▁▁▁▁▁

0,1
epoch,5.0
test_accuracy,0.1
test_loss,
train_accuracy,0.09981
train_loss,
val_accuracy,0.10167
val_loss,


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 3by86zqw with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▇█▆▇▆▆▄▅▅▁
train_loss,▂▁▃▂▃▃▅▄▄█
val_accuracy,██▆▇▆▆▄▆▅▁
val_loss,▂▁▃▂▃▃▅▄▄█

0,1
epoch,10.0
test_accuracy,0.0869
test_loss,27453.63057
train_accuracy,0.09065
train_loss,148070.96856
val_accuracy,0.08283
val_loss,16570.52058


[34m[1mwandb[0m: Agent Starting Run: kymtc05a with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12297822354748351, max=1.…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
train_accuracy,▁▁▁▁▁▁▁▁▁▁
val_accuracy,▁▁▁▁▁▁▁▁▁▁

0,1
epoch,10.0
test_accuracy,0.1
test_loss,
train_accuracy,0.09981
train_loss,
val_accuracy,0.10167
val_loss,


[34m[1mwandb[0m: Agent Starting Run: hhflf49v with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12304493264623452, max=1.…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▁▁▂▂▂▃▃▄▄▅▅▆▆▇▇▇▇███
train_loss,█▆▅▄▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁
val_accuracy,▁▁▂▂▂▃▃▄▄▅▅▆▆▆▇▇▇███
val_loss,█▆▅▄▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁

0,1
epoch,20.0
test_accuracy,0.4429
test_loss,17209.11635
train_accuracy,0.45231
train_loss,91296.57347
val_accuracy,0.45183
val_loss,10274.56635


[34m[1mwandb[0m: Agent Starting Run: kc1hig79 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12302268824007954, max=1.…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▅▄▄▄▃▃▃▂▁▁▂▂▃▄▄▅▆▆▇█
train_loss,█▄▃▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_accuracy,▄▃▄▃▄▃▃▂▁▁▂▃▃▄▅▅▆▇▇█
val_loss,█▄▃▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,20.0
test_accuracy,0.1534
test_loss,22822.39073
train_accuracy,0.15324
train_loss,123224.96543
val_accuracy,0.14983
val_loss,13696.30331


[34m[1mwandb[0m: Agent Starting Run: fff85orx with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12296711239609685, max=1.…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▁▆▆▆▇▇▇▇██▅▆▆▅▂▅▆▆▆▆
train_loss,█▃▃▃▂▂▂▂▁▁▄▃▃▄▅▄▃▃▃▃
val_accuracy,▁▆▆▆▆▇█▇██▅▆▅▅▁▄▆▆▆▆
val_loss,█▃▃▃▂▂▂▂▁▁▄▃▃▄▆▄▃▃▃▂

0,1
epoch,20.0
test_accuracy,0.768
test_loss,6718.93702
train_accuracy,0.77378
train_loss,35455.65607
val_accuracy,0.76933
val_loss,4034.21705


[34m[1mwandb[0m: Agent Starting Run: wf90f58t with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12297822354748351, max=1.…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▁▃▄▅▆▆▆▇▇▇▇▇▇███████
train_loss,█▆▅▄▃▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁
val_accuracy,▁▃▄▅▆▆▇▇▇▇▇▇▇▇██████
val_loss,█▆▅▄▃▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁

0,1
epoch,20.0
test_accuracy,0.6585
test_loss,9749.80666
train_accuracy,0.66928
train_loss,51543.60253
val_accuracy,0.65367
val_loss,5854.7018


[34m[1mwandb[0m: Agent Starting Run: cn5hr9is with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12298933670703055, max=1.…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▁▁▁▁▁▁▁▁▁
train_loss,█▅▂▁▁▁▁▁▁▁
val_accuracy,▁▁▁▁▁▁▁▁▁▁
val_loss,█▅▂▁▁▁▁▁▁▁

0,1
epoch,10.0
test_accuracy,0.1
test_loss,23056.77756
train_accuracy,0.1003
train_loss,124501.19862
val_accuracy,0.09733
val_loss,13839.46192


[34m[1mwandb[0m: Agent Starting Run: po3kd1j8 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12300045187528243, max=1.…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▄▆▇█
train_loss,█▅▃▂▁
val_accuracy,▁▄▆▇█
val_loss,█▄▃▂▁

0,1
epoch,5.0
test_accuracy,0.7265
test_loss,7614.06097
train_accuracy,0.73965
train_loss,38851.23423
val_accuracy,0.72567
val_loss,4573.70869


[34m[1mwandb[0m: Agent Starting Run: 79r2sy6d with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.011 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.9726593406593407, max=1.0…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▃▅▆▆▇▇███
train_loss,█▅▄▃▃▂▂▁▁▁
val_accuracy,▁▃▅▅▆▇▇▇██
val_loss,█▅▄▃▃▂▂▂▁▁

0,1
epoch,10.0
test_accuracy,0.553
test_loss,12999.60388
train_accuracy,0.55885
train_loss,68744.00544
val_accuracy,0.55017
val_loss,7860.14765


[34m[1mwandb[0m: Agent Starting Run: nrt0ks47 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.1230338094377147, max=1.0…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,█▃▂▁▂
train_loss,▁▄▇█▇
val_accuracy,█▃▂▁▂
val_loss,▁▄▇█▇

0,1
epoch,5.0
test_accuracy,0.2923
test_loss,18976.68373
train_accuracy,0.298
train_loss,102097.35146
val_accuracy,0.29433
val_loss,11334.92194


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 5ka8i4rw with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.1195957820738137, max=1.0…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▅▇██
train_loss,█▄▃▂▁
val_accuracy,▁▅▇██
val_loss,█▄▃▂▁

0,1
epoch,5.0
test_accuracy,0.6697
test_loss,10007.55308
train_accuracy,0.68189
train_loss,52997.15655
val_accuracy,0.67217
val_loss,5983.00671


[34m[1mwandb[0m: Agent Starting Run: yudlcbbe with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12298933670703055, max=1.…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁██▇▆
train_loss,█▆▅▃▁
val_accuracy,▁██▇▆
val_loss,█▆▄▃▁

0,1
epoch,5.0
test_accuracy,0.2078
test_loss,21611.63373
train_accuracy,0.20896
train_loss,116663.66389
val_accuracy,0.2075
val_loss,12967.85352


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: erpnm5pt with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.002 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.2268648838845883, max=1.0…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▇▇▇█▇▇▇▅▇
train_loss,█▂▂▂▁▁▅▃▅▃
val_accuracy,▁▇▇▇█▇▆▇▅▇
val_loss,█▂▂▂▁▁▅▃▅▃

0,1
epoch,10.0
test_accuracy,0.7029
test_loss,9615.26824
train_accuracy,0.70935
train_loss,50615.76952
val_accuracy,0.7055
val_loss,5689.92908


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: hk9fxxeb with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12009176740492368, max=1.…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
train_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,20.0
test_accuracy,0.1
test_loss,
train_accuracy,0.09981
train_loss,
val_accuracy,0.10167
val_loss,


[34m[1mwandb[0m: Agent Starting Run: 8ssa0pox with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12307831434255742, max=1.…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▆▅▆▄▃▄▄▆█▇▄▂▁▂▂▁▃▆▇▇
train_loss,██▇▆▆▅▄▄▄▅▆▅▄▃▃▃▃▂▁▁
val_accuracy,▆▅▆▅▄▄▆▄▇▆▅▄▃▂▁▁▃▆██
val_loss,██▇▇▆▅▄▄▄▅▆▅▄▃▃▃▃▂▂▁

0,1
epoch,20.0
test_accuracy,0.1004
test_loss,51867.11293
train_accuracy,0.10824
train_loss,279309.85352
val_accuracy,0.11067
val_loss,30766.5573


[34m[1mwandb[0m: Agent Starting Run: ob4zddlh with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.009 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.8648221343873518, max=1.0…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▂█▅▅▁
train_loss,█▁▁▁▁
val_accuracy,█▁▄▄▃
val_loss,█▁▁▁▁

0,1
epoch,5.0
test_accuracy,0.0995
test_loss,23021.43494
train_accuracy,0.09967
train_loss,124315.1853
val_accuracy,0.09717
val_loss,13813.45981


[34m[1mwandb[0m: Agent Starting Run: 40euygqh with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.11957476717624319, max=1.…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▃▁████████
train_loss,█▁▁▁▁▁▁▁▁▁
val_accuracy,▆█▁▁▁▁▁▁▁▁
val_loss,█▁▁▁▁▁▁▁▁▁

0,1
epoch,10.0
test_accuracy,0.1
test_loss,23121.07983
train_accuracy,0.10026
train_loss,124851.80213
val_accuracy,0.09733
val_loss,13876.60971


[34m[1mwandb[0m: Agent Starting Run: e6wam2w1 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12297822354748351, max=1.…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▃▅▇█
train_loss,█▄▂▁▁
val_accuracy,▁▃▅▇█
val_loss,█▄▂▁▁

0,1
epoch,5.0
test_accuracy,0.8055
test_loss,5560.41176
train_accuracy,0.81589
train_loss,28919.03962
val_accuracy,0.81267
val_loss,3320.71459


[34m[1mwandb[0m: Agent Starting Run: 8kakfoqk with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.009 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.8661154116819142, max=1.0…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▅████████
train_loss,█▇▇▆▅▄▃▂▂▁
val_accuracy,▁▆████████
val_loss,█▇▇▆▅▄▃▂▂▁

0,1
epoch,10.0
test_accuracy,0.2978
test_loss,18321.54075
train_accuracy,0.30167
train_loss,98847.81889
val_accuracy,0.295
val_loss,10986.66652


[34m[1mwandb[0m: Agent Starting Run: m7y2w5ax with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.11958527370178368, max=1.…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▄▅▆▇▇▇███
train_loss,█▅▃▃▂▂▂▁▁▁
val_accuracy,▁▄▆▆▇▇▇███
val_loss,█▄▃▂▂▂▁▁▁▁

0,1
epoch,10.0
test_accuracy,0.8461
test_loss,4396.27078
train_accuracy,0.86283
train_loss,21106.97886
val_accuracy,0.84917
val_loss,2564.24645


[34m[1mwandb[0m: Agent Starting Run: 0h506ham with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12304493264623452, max=1.…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▁▄▅▆▆▆▇▇▇▇▇▇▇▇██████
train_loss,█▅▄▃▃▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁
val_accuracy,▁▄▅▆▆▇▇▇▇▇▇▇▇▇▇▇████
val_loss,█▅▄▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁

0,1
epoch,20.0
test_accuracy,0.861
test_loss,4079.80014
train_accuracy,0.88583
train_loss,17566.9231
val_accuracy,0.8655
val_loss,2317.287


[34m[1mwandb[0m: Agent Starting Run: wvg5af55 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.1195957820738137, max=1.0…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▁▁▇█
train_loss,█▃▂▁▁
val_accuracy,▂▁▁▆█
val_loss,█▃▂▂▁

0,1
epoch,5.0
test_accuracy,0.2483
test_loss,16836.53206
train_accuracy,0.25231
train_loss,90723.37908
val_accuracy,0.24367
val_loss,10095.74023


[34m[1mwandb[0m: Agent Starting Run: v1seoqxn with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.119816885289198, max=1.0)…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▂▄▆█
train_loss,█▅▃▂▁
val_accuracy,▁▂▄▆█
val_loss,█▅▃▂▁

0,1
epoch,5.0
test_accuracy,0.21
test_loss,22337.74019
train_accuracy,0.20622
train_loss,121011.83681
val_accuracy,0.2055
val_loss,13468.47583


[34m[1mwandb[0m: Agent Starting Run: bul2mkog with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12295600325232632, max=1.…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▄▆▇█
train_loss,█▄▃▂▁
val_accuracy,▁▄▆▇█
val_loss,█▄▃▂▁

0,1
epoch,5.0
test_accuracy,0.4659
test_loss,16026.90164
train_accuracy,0.46615
train_loss,86537.64085
val_accuracy,0.4535
val_loss,9731.46348


[34m[1mwandb[0m: Agent Starting Run: 7gg7g8jp with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.009 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.8664085188770572, max=1.0…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▁▄▅▅▆▆▆▇▇▇▇▇▇▇██████
train_loss,█▅▄▃▃▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁
val_accuracy,▁▄▅▅▆▆▆▇▇▇▇▇▇███████
val_loss,█▅▄▃▃▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁

0,1
epoch,20.0
test_accuracy,0.6836
test_loss,9014.2837
train_accuracy,0.69207
train_loss,47145.66691
val_accuracy,0.68
val_loss,5373.36421


[34m[1mwandb[0m: Agent Starting Run: bxa4a20n with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.1197325591624879, max=1.0…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▁▄▆▆▇▇▇▇▇▇▇█████████
train_loss,█▄▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁
val_accuracy,▁▄▆▆▇▇▇▇▇▇▇█████████
val_loss,█▄▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,20.0
test_accuracy,0.8241
test_loss,4872.10813
train_accuracy,0.84237
train_loss,24271.81687
val_accuracy,0.8325
val_loss,2844.19003


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: x8o8qh5f with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.011 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.9736008447729673, max=1.0…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▁▁▁█
train_loss,█▅▃▂▁
val_accuracy,▁▁▁▁█
val_loss,█▅▃▂▁

0,1
epoch,5.0
test_accuracy,0.1072
test_loss,23784.70677
train_accuracy,0.10724
train_loss,128419.42456
val_accuracy,0.111
val_loss,14288.61222


[34m[1mwandb[0m: Agent Starting Run: wubfh0zg with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12297822354748351, max=1.…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▅▇██
train_loss,█▄▂▁▁
val_accuracy,▁▅▇▇█
val_loss,█▄▂▁▁

0,1
epoch,5.0
test_accuracy,0.7549
test_loss,6612.74637
train_accuracy,0.7577
train_loss,34810.60572
val_accuracy,0.75667
val_loss,3921.03784


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 0phfawwc with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12012356575463372, max=1.…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
train_accuracy,▁▁▁▁▁
val_accuracy,▁▁▁▁▁

0,1
epoch,5.0
test_accuracy,0.1
test_loss,
train_accuracy,0.09981
train_loss,
val_accuracy,0.10167
val_loss,


[34m[1mwandb[0m: Agent Starting Run: 6pralr3z with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12306718509811014, max=1.…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▁█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_loss,█▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_accuracy,▄█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_loss,█▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,20.0
test_accuracy,0.1
test_loss,23025.54415
train_accuracy,0.1003
train_loss,124337.25517
val_accuracy,0.09733
val_loss,13815.88601


[34m[1mwandb[0m: Agent Starting Run: azg95egf with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12308944560007236, max=1.…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▁▇▇█
train_loss,█▅▃▂▁
val_accuracy,▁▃▅▇█
val_loss,█▅▃▂▁

0,1
epoch,5.0
test_accuracy,0.1286
test_loss,70511.01545
train_accuracy,0.1275
train_loss,379819.89448
val_accuracy,0.13183
val_loss,41527.46629


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: nqp0uwp1 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.002 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.2265096246813747, max=1.0…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▇▁▁▄▃▅▆▅▅▅▇▁▆▅▁▃█▆▆▄
train_loss,▂▄▃▃▄▂▃▄▃▃█▅▃▄▂▄▁▂▂▃
val_accuracy,▇▁▁▄▃▅▆▅▅▅▆▁▆▅▁▃█▆▆▄
val_loss,▂▄▃▃▄▂▃▄▃▃█▅▃▄▂▄▁▂▃▃

0,1
epoch,20.0
test_accuracy,0.0994
test_loss,66242.35212
train_accuracy,0.09954
train_loss,357117.61891
val_accuracy,0.10333
val_loss,39405.79328


[34m[1mwandb[0m: Agent Starting Run: a68cbyof with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12297676100913284, max=1.…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▂▃▄▅▅▆▇▇█
train_loss,█▅▄▃▂▂▂▁▁▁
val_accuracy,▁▂▃▄▄▅▆▇▇█
val_loss,█▅▄▃▂▂▂▁▁▁

0,1
epoch,10.0
test_accuracy,0.2284
test_loss,23029.28429
train_accuracy,0.2307
train_loss,123422.71693
val_accuracy,0.22767
val_loss,13655.67523


[34m[1mwandb[0m: Agent Starting Run: ryllgmn5 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.009 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.8651142355008787, max=1.0…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▁▄▄▅▆▇▇▇▇▇▇▇████████
train_loss,█▅▄▃▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁
val_accuracy,▁▄▅▅▆▇▇▇▇▇▇▇▇███████
val_loss,█▅▄▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁

0,1
epoch,20.0
test_accuracy,0.8644
test_loss,3855.3585
train_accuracy,0.88454
train_loss,17430.65259
val_accuracy,0.86983
val_loss,2216.19713


[34m[1mwandb[0m: Agent Starting Run: 7je5wm44 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.11964458520277997, max=1.…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▃▅▆▇▇▇███
train_loss,█▆▄▃▃▂▂▂▁▁
val_accuracy,▁▃▅▆▇▇████
val_loss,█▆▄▃▃▂▂▁▁▁

0,1
epoch,10.0
test_accuracy,0.8456
test_loss,4384.10069
train_accuracy,0.8625
train_loss,21233.36052
val_accuracy,0.84683
val_loss,2581.08077


[34m[1mwandb[0m: Agent Starting Run: bo7138cd with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.009 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.8667253521126761, max=1.0…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▁▁▂▄▆▅▄▃▄▃▅▅▅▆▆▇▇███
train_loss,█▇▆▅▄▄▄▄▃▃▃▂▂▂▂▂▂▁▁▁
val_accuracy,▁▁▃▄▆▅▄▃▄▃▅▅▅▆▆▇▇███
val_loss,█▇▆▅▄▄▄▄▃▃▃▃▂▂▂▂▂▁▁▁

0,1
epoch,20.0
test_accuracy,0.1907
test_loss,44435.21476
train_accuracy,0.19394
train_loss,238908.19371
val_accuracy,0.193
val_loss,26380.895


[34m[1mwandb[0m: Agent Starting Run: 6vu7j2vf with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.009 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.866877971473851, max=1.0)…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▂▂▁▂█
train_loss,█▄▂▁▁
val_accuracy,▂▂▁▂█
val_loss,█▄▂▁▁

0,1
epoch,5.0
test_accuracy,0.148
test_loss,22983.97805
train_accuracy,0.14643
train_loss,124108.75921
val_accuracy,0.156
val_loss,13780.7138


[34m[1mwandb[0m: Agent Starting Run: 3ujorvb4 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.1195881731784583, max=1.0…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▁▁▂▂▃▃▄▅▆▅▅▆▆▇▇█████
train_loss,███████▇▇▆▅▄▃▃▂▂▂▁▁▁
val_accuracy,▁▁▂▂▃▃▄▅▅▅▅▆▆▇▇▇████
val_loss,███████▇▇▆▅▄▃▃▂▂▂▁▁▁

0,1
epoch,20.0
test_accuracy,0.5952
test_loss,11115.71852
train_accuracy,0.5958
train_loss,59652.27625
val_accuracy,0.5895
val_loss,6684.96233


[34m[1mwandb[0m: Agent Starting Run: exa3ncod with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12010236498411578, max=1.…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
train_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,20.0
test_accuracy,0.1
test_loss,
train_accuracy,0.09981
train_loss,
val_accuracy,0.10167
val_loss,


[34m[1mwandb[0m: Agent Starting Run: 06oea1mb with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.1195957820738137, max=1.0…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▄▅▆▇▇▇███
train_loss,█▄▃▂▂▂▁▁▁▁
val_accuracy,▁▄▅▆▆▇▇███
val_loss,█▄▃▂▂▂▁▁▁▁

0,1
epoch,10.0
test_accuracy,0.8115
test_loss,5250.87596
train_accuracy,0.82217
train_loss,26889.92309
val_accuracy,0.81567
val_loss,3109.32919


[34m[1mwandb[0m: Agent Starting Run: e2eixav7 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.01128888888957186, max=1.0)…

VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12009176740492368, max=1.…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
train_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,20.0
test_accuracy,0.1
test_loss,
train_accuracy,0.09981
train_loss,
val_accuracy,0.10167
val_loss,


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: j2rnc1ai with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.002 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.22648144891858624, max=1.…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,█▅▁▄▂
train_loss,▁▄█▅▅
val_accuracy,█▅▁▄▂
val_loss,▁▄█▅▅

0,1
epoch,5.0
test_accuracy,0.3615
test_loss,14574.81316
train_accuracy,0.37415
train_loss,77840.02406
val_accuracy,0.37617
val_loss,8694.17473


[34m[1mwandb[0m: Agent Starting Run: 1fwi9csp with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.11966939242064539, max=1.…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▇██▁▁
train_loss,█▂▁▁▂
val_accuracy,▂▁▁██
val_loss,█▂▁▁▂

0,1
epoch,5.0
test_accuracy,0.0999
test_loss,29185.87517
train_accuracy,0.0998
train_loss,157847.35043
val_accuracy,0.10167
val_loss,17267.00535


[34m[1mwandb[0m: Agent Starting Run: azluhqzm with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.11967991558213155, max=1.…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▁▄▆▆▇▇▇▇▇▇▇█████████
train_loss,█▄▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁
val_accuracy,▁▄▆▆▇▇▇▇▇███████████
val_loss,█▄▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁

0,1
epoch,20.0
test_accuracy,0.8342
test_loss,4645.59966
train_accuracy,0.85181
train_loss,22357.44961
val_accuracy,0.83533
val_loss,2736.22198


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: moan36p5 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.11965887110954809, max=1.…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▄▇██
train_loss,█▄▂▁▁
val_accuracy,▁▄▇██
val_loss,█▄▂▁▁

0,1
epoch,5.0
test_accuracy,0.7612
test_loss,7131.43745
train_accuracy,0.76833
train_loss,37489.39833
val_accuracy,0.76333
val_loss,4231.32986


[34m[1mwandb[0m: Agent Starting Run: 8hszulu3 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.1200811716957826, max=1.0…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
train_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,20.0
test_accuracy,0.1
test_loss,
train_accuracy,0.09981
train_loss,
val_accuracy,0.10167
val_loss,


[34m[1mwandb[0m: Agent Starting Run: f527ay3r with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011288888888278356, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.1197430934365652, max=1.0…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▁▁▁▁▁▁▁▁▁
train_loss,▁█▇█▇▇▇▄▃▃
val_accuracy,█████████▁
val_loss,▁█▇█▇▇▇▄▃▃

0,1
epoch,10.0
test_accuracy,0.1
test_loss,287667.58891
train_accuracy,0.09981
train_loss,1553964.32339
val_accuracy,0.1015
val_loss,172229.50089


[34m[1mwandb[0m: Agent Starting Run: ifeu328m with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011288888888278356, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12302268824007954, max=1.…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▁▅▆▆▇▇▇▇▇▇▇█████████
train_loss,█▅▄▃▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁
val_accuracy,▁▅▆▆▇▇▇▇▇███████████
val_loss,█▅▄▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁

0,1
epoch,20.0
test_accuracy,0.8071
test_loss,5452.42386
train_accuracy,0.81776
train_loss,27999.21878
val_accuracy,0.80517
val_loss,3219.78732


[34m[1mwandb[0m: Agent Starting Run: 6iwpxmwq with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

VBox(children=(Label(value='0.002 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.2268914981901651, max=1.0…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
train_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,20.0
test_accuracy,0.1
test_loss,
train_accuracy,0.09981
train_loss,
val_accuracy,0.10167
val_loss,


[34m[1mwandb[0m: Agent Starting Run: 6ondkj4y with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.01128888888957186, max=1.0)…

VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12302125734961555, max=1.…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▂▁▂▁▁▂▂▂▂▂▄▇▆▆▆▆▆▆▇█
train_loss,█▇▇▇▇▇▇▆▆▆▅▅▄▄▃▃▂▂▂▁
val_accuracy,▂▁▂▁▁▂▂▂▂▂▄▆▆▆▆▆▆▆▇█
val_loss,█▇▇▇▇▇▇▆▆▆▅▅▄▄▃▃▂▂▂▁

0,1
epoch,20.0
test_accuracy,0.3841
test_loss,18396.60363
train_accuracy,0.3837
train_loss,99287.11651
val_accuracy,0.37917
val_loss,11057.42697


[34m[1mwandb[0m: Agent Starting Run: ug4gasoc with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.1200811716957826, max=1.0…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
train_accuracy,▁▁▁▁▁
val_accuracy,▁▁▁▁▁

0,1
epoch,5.0
test_accuracy,0.1
test_loss,
train_accuracy,0.09981
train_loss,
val_accuracy,0.10167
val_loss,


[34m[1mwandb[0m: Agent Starting Run: jiuzxidq with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.119816885289198, max=1.0)…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▄▅▆▇▇▇███
train_loss,█▅▄▃▃▂▂▂▁▁
val_accuracy,▁▅▇▇█▇████
val_loss,█▅▄▃▂▂▂▁▁▁

0,1
epoch,10.0
test_accuracy,0.8511
test_loss,4115.58537
train_accuracy,0.86976
train_loss,19781.7933
val_accuracy,0.848
val_loss,2432.94692


[34m[1mwandb[0m: Agent Starting Run: mg9a4ulm with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


[34m[1mwandb[0m: Network error (ConnectionError), entering retry loop.


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▇█▆▆▅▆▅▆▇
train_loss,█▆▅▄▂▁▁▁▁▂
val_accuracy,▂▃█▅▂▄▃▆▄▁
val_loss,█▇▄▃▁▁▂▁▂▂

0,1
epoch,10.0
test_accuracy,0.1062
test_loss,158725.25442
train_accuracy,0.11043
train_loss,852735.11439
val_accuracy,0.09867
val_loss,95374.42471


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: pnyz7qh3 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.11971149617380597, max=1.…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▆██▆
train_loss,█▃▁▁▂
val_accuracy,▁▆██▆
val_loss,█▃▁▁▂

0,1
epoch,5.0
test_accuracy,0.4899
test_loss,14189.89148
train_accuracy,0.49131
train_loss,76647.92899
val_accuracy,0.49367
val_loss,8572.15668


[34m[1mwandb[0m: Agent Starting Run: 5xb72n1b with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.011 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.9723295853829936, max=1.0…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▁▃▄▅▆▆▆▇▇▇▇▇▇███████
train_loss,█▅▄▄▃▃▂▂▂▁▁▂▂▁▁▁▁▁▁▁
val_accuracy,▁▄▄▅▆▆▆▇▇▇▇▇▇▇▇█████
val_loss,█▅▃▃▃▂▂▁▁▁▁▂▂▂▃▃▃▃▃▄

0,1
epoch,20.0
test_accuracy,0.849
test_loss,5356.08449
train_accuracy,0.88504
train_loss,19584.80522
val_accuracy,0.86067
val_loss,3004.38549


[34m[1mwandb[0m: Agent Starting Run: 6xi6zj0e with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12298933670703055, max=1.…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▁▃▄▅▅▅▆▆▆▆▇▇▇▇▇█████
train_loss,█▅▄▄▃▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁
val_accuracy,▁▃▅▅▅▆▆▆▆▇▇▇▇▇██████
val_loss,█▅▄▃▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁

0,1
epoch,20.0
test_accuracy,0.8212
test_loss,5226.50826
train_accuracy,0.83987
train_loss,26255.6422
val_accuracy,0.82617
val_loss,3080.18463


[34m[1mwandb[0m: Agent Starting Run: gnrthjn8 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

VBox(children=(Label(value='0.002 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.226073187895848, max=1.0)…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▅▇▇█
train_loss,█▄▃▂▁
val_accuracy,▁▅▇▇█
val_loss,█▅▂▂▁

0,1
epoch,5.0
test_accuracy,0.8331
test_loss,4785.12603
train_accuracy,0.84194
train_loss,23716.28928
val_accuracy,0.82533
val_loss,2899.89354


[34m[1mwandb[0m: Agent Starting Run: 4lywg7l6 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.002 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.22701326073592693, max=1.…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▁▁▂▂▃▃▄▄▄▅▅▅▆▆▆▇▇▇██
train_loss,█▅▄▃▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁
val_accuracy,▁▁▂▂▃▃▄▄▄▅▅▅▆▆▆▇▇▇██
val_loss,█▅▄▃▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁

0,1
epoch,20.0
test_accuracy,0.3163
test_loss,18980.13218
train_accuracy,0.32256
train_loss,102220.90501
val_accuracy,0.32717
val_loss,11285.13889


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 8gson5hs with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.002 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.22646153846153846, max=1.…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▄▃█▁▄
train_loss,█▅▃▁▅
val_accuracy,▄▃█▁▄
val_loss,█▅▃▁▆

0,1
epoch,5.0
test_accuracy,0.105
test_loss,109999.93281
train_accuracy,0.10381
train_loss,594995.80303
val_accuracy,0.10217
val_loss,65722.87369


[34m[1mwandb[0m: Agent Starting Run: xj7pgr48 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12307831434255742, max=1.…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,█▃▃▃▁▂▂▃▂▃
train_loss,▆█▇▄▃▂▂▁▁▁
val_accuracy,█▃▆▃▂▃▄▁▂▇
val_loss,▆█▇▅▃▂▂▁▁▁

0,1
epoch,10.0
test_accuracy,0.0809
test_loss,78139.55503
train_accuracy,0.08317
train_loss,421159.99979
val_accuracy,0.08683
val_loss,46677.13876


[34m[1mwandb[0m: Agent Starting Run: ny1c4jre with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.002 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.22670534458509142, max=1.…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▃▆██
train_loss,█▅▃▂▁
val_accuracy,▁▃▆▇█
val_loss,█▅▃▂▁

0,1
epoch,5.0
test_accuracy,0.7252
test_loss,7086.12096
train_accuracy,0.72889
train_loss,37038.55418
val_accuracy,0.72633
val_loss,4194.69513


[34m[1mwandb[0m: Agent Starting Run: u7yhakvn with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.11954325867369346, max=1.…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_loss,██▇▇▇▆▆▆▅▅▅▄▄▃▃▃▂▂▁▁
val_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_loss,██▇▇▇▆▆▆▅▅▅▄▄▃▃▃▂▂▁▁

0,1
epoch,20.0
test_accuracy,0.1
test_loss,23027.94179
train_accuracy,0.1003
train_loss,124350.01423
val_accuracy,0.09733
val_loss,13817.72906


[34m[1mwandb[0m: Agent Starting Run: c6g1hw1o with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12302268824007954, max=1.…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▇█▁▂▇
train_loss,▂▁▆█▅
val_accuracy,▇█▁▂▇
val_loss,▂▁▆█▅

0,1
epoch,5.0
test_accuracy,0.3773
test_loss,17227.20135
train_accuracy,0.37922
train_loss,92893.20746
val_accuracy,0.36983
val_loss,10345.81058


[34m[1mwandb[0m: Agent Starting Run: yiozbvai with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.002 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.2267733145820515, max=1.0…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▁▂▃▄▄▅▆▆▆▇▇▇▇▇██████
train_loss,█▇▇▆▅▅▄▄▃▃▃▂▂▂▂▁▁▁▁▁
val_accuracy,▁▂▃▄▄▅▆▆▆▇▇▇▇▇██████
val_loss,█▇▇▆▅▅▄▄▃▃▃▂▂▂▂▁▁▁▁▁

0,1
epoch,20.0
test_accuracy,0.6727
test_loss,9049.65191
train_accuracy,0.6762
train_loss,47929.48778
val_accuracy,0.67517
val_loss,5374.38711


[34m[1mwandb[0m: Agent Starting Run: mqxhhjtv with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12300045187528243, max=1.…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▄▆▇█
train_loss,█▆▄▃▁
val_accuracy,▁▃▆▆█
val_loss,█▆▄▃▁

0,1
epoch,5.0
test_accuracy,0.0613
test_loss,36812.1764
train_accuracy,0.05837
train_loss,198915.02065
val_accuracy,0.06417
val_loss,22254.41654


[34m[1mwandb[0m: Agent Starting Run: v8p8qdlh with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.011 MB of 0.011 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
train_accuracy,██▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_loss,▁
val_accuracy,██▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_loss,▁█

0,1
epoch,20.0
test_accuracy,0.1
test_loss,
train_accuracy,0.09981
train_loss,
val_accuracy,0.10167
val_loss,


[34m[1mwandb[0m: Agent Starting Run: o28dqrzi with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.1195957820738137, max=1.0…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁█████████
train_loss,█▅▅▄▄▃▃▂▂▁
val_accuracy,▁█████████
val_loss,█▅▅▄▄▃▃▂▂▁

0,1
epoch,10.0
test_accuracy,0.1
test_loss,23023.60326
train_accuracy,0.1003
train_loss,124326.214
val_accuracy,0.09733
val_loss,13814.86043


[34m[1mwandb[0m: Agent Starting Run: 5e58ok83 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.11966939242064539, max=1.…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▃▅▆▆▇▇███
train_loss,█▅▄▃▂▂▂▁▁▁
val_accuracy,▁▃▅▆▆▇▇███
val_loss,█▅▄▃▂▂▂▁▁▁

0,1
epoch,10.0
test_accuracy,0.8337
test_loss,4745.48945
train_accuracy,0.84537
train_loss,23404.40085
val_accuracy,0.83267
val_loss,2813.19942


[34m[1mwandb[0m: Agent Starting Run: 9shwepkb with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.1196867024553375, max=1.0…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▄▄▄▁█
train_loss,█▅▃▂▁
val_accuracy,▃▃▃▁█
val_loss,█▅▃▂▁

0,1
epoch,5.0
test_accuracy,0.0995
test_loss,23760.47741
train_accuracy,0.10022
train_loss,128316.18884
val_accuracy,0.102
val_loss,14212.69931


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: q1ykp7fp with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12298933670703055, max=1.…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▁▄▅▅▆▆▇▇▇▇▇▇▇███████
train_loss,█▅▄▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁
val_accuracy,▁▄▅▆▆▆▇▇▇▇▇█████████
val_loss,█▄▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁

0,1
epoch,20.0
test_accuracy,0.8501
test_loss,4281.99994
train_accuracy,0.87078
train_loss,20032.6886
val_accuracy,0.85233
val_loss,2559.32751


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: cwqpob6b with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12298933670703055, max=1.…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▂▄█▃
train_loss,█▅▂▁▁
val_accuracy,▁▂▄█▃
val_loss,█▅▂▁▁

0,1
epoch,5.0
test_accuracy,0.1377
test_loss,23529.72924
train_accuracy,0.13996
train_loss,127010.99413
val_accuracy,0.1385
val_loss,14128.73354


[34m[1mwandb[0m: Agent Starting Run: 0el7wbdm with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.011 MB of 0.011 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▅▆▇█
train_loss,█▅▃▂▁
val_accuracy,▁▄▆▇█
val_loss,█▅▃▂▁

0,1
epoch,5.0
test_accuracy,0.8332
test_loss,4649.30623
train_accuracy,0.85043
train_loss,22953.73664
val_accuracy,0.84083
val_loss,2689.91919


[34m[1mwandb[0m: Agent Starting Run: g9gtxe58 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.1196062922928201, max=1.0…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▁▁▁▁▂▂▃▃▃▄▄▅▅▆▇▇▇▇██
train_loss,███████▆▅▄▄▄▃▃▂▂▂▁▁▁
val_accuracy,▁▁▁▁▂▂▃▃▃▄▄▅▅▆▆▇▇▇██
val_loss,███████▆▅▄▄▄▃▃▂▂▂▁▁▁

0,1
epoch,20.0
test_accuracy,0.6628
test_loss,9541.52488
train_accuracy,0.66656
train_loss,50533.20046
val_accuracy,0.66267
val_loss,5685.2203


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: ra84fqcu with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.1195957820738137, max=1.0…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_loss,█▇▇▇▆▆▅▅▅▄▄▄▃▃▃▂▂▂▁▁
val_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_loss,█▆▆▆▅▅▅▄▄▄▄▃▃▃▂▂▂▂▁▁

0,1
epoch,20.0
test_accuracy,0.1
test_loss,23026.89353
train_accuracy,0.1003
train_loss,124344.43818
val_accuracy,0.09733
val_loss,13816.78521


[34m[1mwandb[0m: Agent Starting Run: ao1nkivx with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.009 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.865506329113924, max=1.0)…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▆▇▂▁▆▃▄▅▇█▆▇▇▅▄▄▂▄▂▃
train_loss,█▄▄▆▅▆▅▃▃▁▁▂▂▃▃▄▆▆▅▄
val_accuracy,▇▇▂▁▇▄▅▅▇█▆▆▇▅▄▄▂▄▂▃
val_loss,█▄▅▇▅▅▅▃▂▁▁▂▂▃▃▄▅▆▅▃

0,1
epoch,20.0
test_accuracy,0.07
test_loss,83869.20051
train_accuracy,0.06372
train_loss,455527.62837
val_accuracy,0.06383
val_loss,50040.53976


[34m[1mwandb[0m: Agent Starting Run: l45tcxds with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.011 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.9722246637953766, max=1.0…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▆█▆▇
train_loss,█▂▁▁▁
val_accuracy,▁▇█▆▇
val_loss,█▂▁▁▁

0,1
epoch,5.0
test_accuracy,0.1743
test_loss,22805.97311
train_accuracy,0.17065
train_loss,123164.86913
val_accuracy,0.17567
val_loss,13683.32229


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 4bea644f with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▁▄▅▅▆▆▆▆▇▇▇▇▇▇██████
train_loss,█▅▄▄▃▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁
val_accuracy,▁▃▄▅▆▆▆▇▇▇▇▇▇███████
val_loss,█▅▄▃▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁

0,1
epoch,20.0
test_accuracy,0.8233
test_loss,5218.83315
train_accuracy,0.83969
train_loss,24721.42946
val_accuracy,0.82183
val_loss,3082.38064


[34m[1mwandb[0m: Agent Starting Run: 2vthsvoh with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.01128888888957186, max=1.0)…

VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.11953275952924644, max=1.…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▆▂▄▅█▁▄▆▁▁
train_loss,▁▇▅█▅▄▅▄▃▂
val_accuracy,▆▃▄▅█▁▄▆▁▁
val_loss,▁▇▄█▅▄▅▃▃▂

0,1
epoch,10.0
test_accuracy,0.061
test_loss,149437.11837
train_accuracy,0.06241
train_loss,811278.60928
val_accuracy,0.05967
val_loss,89848.12051


[34m[1mwandb[0m: Agent Starting Run: 0u0xs2vr with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.002 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.2267687434002112, max=1.0…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▄▅▅▆▇▇▇██
train_loss,█▅▄▄▃▂▂▂▁▁
val_accuracy,▁▄▅▆▆▆▇▇██
val_loss,█▅▄▃▃▂▂▁▁▁

0,1
epoch,10.0
test_accuracy,0.8702
test_loss,3720.54095
train_accuracy,0.8942
train_loss,15894.69724
val_accuracy,0.871
val_loss,2156.22083


[34m[1mwandb[0m: Agent Starting Run: dtwxjj1o with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.009 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.8668428005284016, max=1.0…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▁▃▅▅▆▆▇▇▇▇▇▇▇▇██████
train_loss,█▆▅▄▄▃▃▃▂▂▂▂▂▂▂▁▁▁▁▁
val_accuracy,▁▄▅▆▆▆▇▇▇▇▇▇▇███████
val_loss,█▆▅▄▃▃▃▃▂▂▂▂▂▂▁▁▁▁▁▁

0,1
epoch,20.0
test_accuracy,0.8465
test_loss,4457.74688
train_accuracy,0.8665
train_loss,21064.75196
val_accuracy,0.8495
val_loss,2599.71801


[34m[1mwandb[0m: Agent Starting Run: sw4ausgj with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12295600325232632, max=1.…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▃▅▇█
train_loss,█▆▄▃▁
val_accuracy,▁▃▅▆█
val_loss,█▆▄▃▁

0,1
epoch,5.0
test_accuracy,0.4962
test_loss,15763.84916
train_accuracy,0.49833
train_loss,84567.56025
val_accuracy,0.48217
val_loss,9539.68706


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: c220o5ch with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.002 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.22696471002376134, max=1.…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▃▄▆█
train_loss,█▅▃▂▁
val_accuracy,▁▃▄▆█
val_loss,█▅▃▂▁

0,1
epoch,5.0
test_accuracy,0.169
test_loss,23258.64011
train_accuracy,0.16652
train_loss,125299.11043
val_accuracy,0.163
val_loss,13903.66435


[34m[1mwandb[0m: Agent Starting Run: eq2tw03r with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12298788207632483, max=1.…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▁▂▂▃▃▃▄▄▄▅▅▅▆▆▆▇▇▇██
train_loss,█▆▆▅▅▅▄▄▄▃▃▃▃▂▂▂▂▁▁▁
val_accuracy,▁▁▂▃▃▄▄▄▅▅▅▅▆▆▆▇▇▇██
val_loss,█▆▆▅▅▅▄▄▄▃▃▃▃▂▂▂▂▁▁▁

0,1
epoch,20.0
test_accuracy,0.4447
test_loss,16889.38622
train_accuracy,0.45278
train_loss,90771.01783
val_accuracy,0.437
val_loss,10157.91408


[34m[1mwandb[0m: Agent Starting Run: vabz3qja with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.002 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.22643334505803728, max=1.…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▁▃▄▅▅▆▆▆▆▇▇▇▇▇▇█████
train_loss,█▆▅▄▄▃▃▃▂▂▂▂▂▂▂▁▁▁▁▁
val_accuracy,▁▃▄▅▅▆▆▆▆▇▇▇▇▇▇▇████
val_loss,█▆▅▄▄▃▃▃▂▂▂▂▂▂▂▁▁▁▁▁

0,1
epoch,20.0
test_accuracy,0.7954
test_loss,5807.9179
train_accuracy,0.80598
train_loss,30199.89478
val_accuracy,0.78967
val_loss,3544.4368


[34m[1mwandb[0m: Agent Starting Run: 9edkinna with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12307831434255742, max=1.…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▅▂▃▁▄▆▇█▄
train_loss,█▄▅▇▄▂▂▁▁▂
val_accuracy,▂▄▂▄▁▄▅▆█▄
val_loss,█▄▅▇▃▂▂▁▁▂

0,1
epoch,10.0
test_accuracy,0.1179
test_loss,77895.8808
train_accuracy,0.11126
train_loss,426837.0742
val_accuracy,0.10917
val_loss,47363.11914


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: h1cs96os with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.11967991558213155, max=1.…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁█▆▆▆
train_loss,▂▁▇██
val_accuracy,█▁▁▁▁
val_loss,▂▁▇██

0,1
epoch,5.0
test_accuracy,0.1
test_loss,394769.96628
train_accuracy,0.1003
train_loss,2131116.22024
val_accuracy,0.09733
val_loss,237322.95729


[34m[1mwandb[0m: Agent Starting Run: vzdy8gjq with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12304493264623452, max=1.…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
train_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,20.0
test_accuracy,0.1
test_loss,
train_accuracy,0.09981
train_loss,
val_accuracy,0.10167
val_loss,


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 411qxrby with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.11984853821768228, max=1.…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▃▅▅▆▆▇███
train_loss,█▇▆▅▅▄▃▂▁▁
val_accuracy,▁▃▄▅▆▇▇███
val_loss,█▇▆▅▅▄▃▂▁▁

0,1
epoch,10.0
test_accuracy,0.6964
test_loss,9775.02884
train_accuracy,0.70556
train_loss,52202.86512
val_accuracy,0.695
val_loss,5837.14517


[34m[1mwandb[0m: Agent Starting Run: 6j76lhpy with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.11957476717624319, max=1.…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▁▁▁▁▁▁▁▁▁
train_loss,█▆▅▄▃▂▂▁▁▁
val_accuracy,▁▁▁▁▁▁▁▁▁▁
val_loss,█▅▄▂▂▁▁▁▁▁

0,1
epoch,10.0
test_accuracy,0.1
test_loss,23057.51445
train_accuracy,0.1003
train_loss,124508.49213
val_accuracy,0.09733
val_loss,13836.59458


[34m[1mwandb[0m: Agent Starting Run: u0t60cy9 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12298933670703055, max=1.…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_loss,██▇▇▇▇▆▆▆▅▅▅▄▄▃▃▃▂▂▁
val_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_loss,██▇▇▇▆▆▆▆▅▅▄▄▄▃▃▂▂▂▁

0,1
epoch,20.0
test_accuracy,0.1
test_loss,23048.31094
train_accuracy,0.1003
train_loss,124457.42975
val_accuracy,0.09733
val_loss,13832.24031


[34m[1mwandb[0m: Agent Starting Run: f8pspx44 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

VBox(children=(Label(value='0.002 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.2267814779017661, max=1.0…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▄▅▆▆▇▇▇██
train_loss,█▅▄▃▂▂▂▁▁▁
val_accuracy,▁▄▅▆▆▇▇███
val_loss,█▅▄▃▂▂▂▁▁▁

0,1
epoch,10.0
test_accuracy,0.7684
test_loss,6436.89341
train_accuracy,0.78302
train_loss,33090.38193
val_accuracy,0.77083
val_loss,3858.23205


[34m[1mwandb[0m: Agent Starting Run: jh96yzuw with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011288888888278356, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12300045187528243, max=1.…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▁▂▃▄▄▅▅▅▆▆▆▇▇▇▇▇████
train_loss,█▄▃▃▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁
val_accuracy,▁▂▃▃▄▅▅▅▆▆▆▆▇▇▇▇████
val_loss,█▄▃▃▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁

0,1
epoch,20.0
test_accuracy,0.53
test_loss,13534.01775
train_accuracy,0.53187
train_loss,72159.48127
val_accuracy,0.52817
val_loss,8094.38027


[34m[1mwandb[0m: Agent Starting Run: 6c0yomak with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12003530450132392, max=1.…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
train_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,20.0
test_accuracy,0.1
test_loss,
train_accuracy,0.09981
train_loss,
val_accuracy,0.10167
val_loss,


[34m[1mwandb[0m: Agent Starting Run: f53px4c8 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▅▄▆█
train_loss,▅▁▂▇█
val_accuracy,▇▂▁▂█
val_loss,▃▁▃▇█

0,1
epoch,5.0
test_accuracy,0.1088
test_loss,111453.54741
train_accuracy,0.11083
train_loss,603528.36321
val_accuracy,0.113
val_loss,67459.67045


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: u1rxjpqf with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12304493264623452, max=1.…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▃▅▇█
train_loss,█▆▄▂▁
val_accuracy,▁▃▅▇█
val_loss,█▆▄▂▁

0,1
epoch,5.0
test_accuracy,0.5397
test_loss,15847.01466
train_accuracy,0.53469
train_loss,85487.66277
val_accuracy,0.53117
val_loss,9501.55315


[34m[1mwandb[0m: Agent Starting Run: f4v7xwys with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12302268824007954, max=1.…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▇█▃▄▁
train_loss,█▄▁▃▄
val_accuracy,▄█▂▃▁
val_loss,█▃▁▂▃

0,1
epoch,5.0
test_accuracy,0.0895
test_loss,89664.21683
train_accuracy,0.09111
train_loss,482430.75415
val_accuracy,0.0885
val_loss,53712.58387


[34m[1mwandb[0m: Agent Starting Run: j7147t3p with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.011 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.972485935302391, max=1.0)…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,█▄▆▇▇▅▁▄▄▄
train_loss,▁▇▅▃▃▆█▅▄▄
val_accuracy,█▄▅▇▇▅▁▄▄▄
val_loss,▁▇▅▃▃▆█▅▄▄

0,1
epoch,10.0
test_accuracy,0.6354
test_loss,9531.6712
train_accuracy,0.64985
train_loss,50554.27593
val_accuracy,0.65233
val_loss,5582.82444


[34m[1mwandb[0m: Agent Starting Run: w7yza370 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.002 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.22697281604645025, max=1.…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▁▁▂█
train_loss,█▃▂▁▁
val_accuracy,▁▁▁▂█
val_loss,█▄▂▁▁

0,1
epoch,5.0
test_accuracy,0.1041
test_loss,23060.35872
train_accuracy,0.10465
train_loss,124526.51861
val_accuracy,0.102
val_loss,13838.32425


[34m[1mwandb[0m: Agent Starting Run: w79rfj8s with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▁▃▄▆▇██████▇▇▇▇▇▇▇▇▇
train_loss,██▇▇▇▆▆▅▅▅▄▄▄▃▃▃▂▂▁▁
val_accuracy,▁▂▃▆▇█████▇▇▆▆▆▆▆▆▆▆
val_loss,██▇▇▇▆▆▅▅▅▄▄▄▃▃▃▂▂▁▁

0,1
epoch,20.0
test_accuracy,0.1177
test_loss,23026.92978
train_accuracy,0.11763
train_loss,124345.17821
val_accuracy,0.11583
val_loss,13816.28808


[34m[1mwandb[0m: Agent Starting Run: 2k5p6sc7 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.011 MB of 0.011 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,█▄▇▆▄▂▂▂▁▁▁▂▃▃▁▁▁▁▁▁
train_loss,▁█▅▅▅▅▅▅▄▄▄▄▄▄▄▄▄▄▄▄
val_accuracy,█▂▅▂▂▂▂▂▁▂▁▁▂▁▁▁▁▁▁▂
val_loss,▁█▅▅▅▅▅▅▄▄▃▃▃▃▃▃▃▃▃▃

0,1
epoch,20.0
test_accuracy,0.1002
test_loss,23145.71133
train_accuracy,0.10033
train_loss,124935.17335
val_accuracy,0.09783
val_loss,13897.5351


[34m[1mwandb[0m: Agent Starting Run: 6jko0kdw with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011288888888278356, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.1197325591624879, max=1.0…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▃▄▅▆▆▇▇██
train_loss,█▆▅▄▃▃▂▂▁▁
val_accuracy,▁▄▄▅▅▆▇▇██
val_loss,█▆▄▃▃▂▂▂▁▁

0,1
epoch,10.0
test_accuracy,0.8783
test_loss,3442.95718
train_accuracy,0.90394
train_loss,14238.27524
val_accuracy,0.879
val_loss,2012.88747


[34m[1mwandb[0m: Agent Starting Run: 16ovnecu with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.1197325591624879, max=1.0…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▄▆▇█
train_loss,█▅▃▂▁
val_accuracy,▁▄▆▇█
val_loss,█▅▃▂▁

0,1
epoch,5.0
test_accuracy,0.6696
test_loss,8992.71171
train_accuracy,0.68156
train_loss,47425.0812
val_accuracy,0.675
val_loss,5395.07555


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: xx6vto6i with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12294192147638863, max=1.…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▄▆▇█
train_loss,█▇▅▄▁
val_accuracy,▁▄▆▇█
val_loss,█▇▅▄▁

0,1
epoch,5.0
test_accuracy,0.3841
test_loss,19810.41896
train_accuracy,0.38811
train_loss,106721.74794
val_accuracy,0.39383
val_loss,11847.36024


[34m[1mwandb[0m: Agent Starting Run: fe01oijv with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.11956426249670561, max=1.…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁█████████
train_loss,█▁▁▁▁▁▁▁▁▁
val_accuracy,▁█████████
val_loss,█▁▁▁▁▁▁▁▁▁

0,1
epoch,10.0
test_accuracy,0.1001
test_loss,23041.04177
train_accuracy,0.10031
train_loss,124428.53363
val_accuracy,0.09733
val_loss,13828.25719


[34m[1mwandb[0m: Agent Starting Run: o98nv9oh with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.11975362956445226, max=1.…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▂▁▁▇▇▇█▇█▇
train_loss,██▇▅▅▄▂▃▂▁
val_accuracy,▂▁▁▇▆▇█▇█▇
val_loss,██▇▅▅▄▂▃▂▁

0,1
epoch,10.0
test_accuracy,0.3848
test_loss,17276.33096
train_accuracy,0.3858
train_loss,93078.16386
val_accuracy,0.375
val_loss,10388.69766


[34m[1mwandb[0m: Agent Starting Run: abjy07bo with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▆█▆▁▃
train_loss,▄▁▄█▆
val_accuracy,▆█▅▁▃
val_loss,▄▁▄█▆

0,1
epoch,5.0
test_accuracy,0.2152
test_loss,20602.98794
train_accuracy,0.21728
train_loss,111028.84522
val_accuracy,0.2095
val_loss,12388.25109


[34m[1mwandb[0m: Agent Starting Run: a1px5qme with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.11964835164835165, max=1.…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▃▆██
train_loss,█▅▄▂▁
val_accuracy,▁▂▆█▇
val_loss,█▅▄▂▁

0,1
epoch,5.0
test_accuracy,0.1254
test_loss,22729.1954
train_accuracy,0.12793
train_loss,122767.82749
val_accuracy,0.12767
val_loss,13640.49141


[34m[1mwandb[0m: Agent Starting Run: 8dea6g7q with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.010 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.8925336382024448, max=1.0…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▂▄▄▂█▁▁▂▅▂
train_loss,█▃▁▄▃▄▃▁▂▃
val_accuracy,▂▄▄▂█▁▁▁▅▂
val_loss,█▃▁▄▃▄▃▁▂▃

0,1
epoch,10.0
test_accuracy,0.1064
test_loss,53369.69902
train_accuracy,0.10622
train_loss,287549.04156
val_accuracy,0.1065
val_loss,32017.95194


[34m[1mwandb[0m: Agent Starting Run: qyq9hsxq with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.011 MB of 0.011 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▄▅▆▆▇▇▇██
train_loss,█▅▄▃▃▂▂▂▁▁
val_accuracy,▁▅▆▇▇█▇▇▇█
val_loss,█▅▃▂▂▁▁▁▁▁

0,1
epoch,10.0
test_accuracy,0.8609
test_loss,3827.85656
train_accuracy,0.88774
train_loss,16041.41889
val_accuracy,0.861
val_loss,2255.10196


[34m[1mwandb[0m: Agent Starting Run: w45b6unz with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.002 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.2277821904509752, max=1.0…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
train_accuracy,▁▁▁▁▁
val_accuracy,▁▁▁▁▁

0,1
epoch,5.0
test_accuracy,0.1
test_loss,
train_accuracy,0.09981
train_loss,
val_accuracy,0.10167
val_loss,


[34m[1mwandb[0m: Agent Starting Run: r9am2cgp with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12294489611562782, max=1.…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▂▂▃▅▇▇▇▇█
train_loss,█▆▅▄▃▃▂▂▁▁
val_accuracy,▁▂▂▃▅▇▇▇▇█
val_loss,█▆▅▄▃▃▂▂▁▁

0,1
epoch,10.0
test_accuracy,0.6444
test_loss,8862.56362
train_accuracy,0.65313
train_loss,46834.41305
val_accuracy,0.64333
val_loss,5270.25047


[34m[1mwandb[0m: Agent Starting Run: tgno5f35 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.11966939242064539, max=1.…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▁▁▂█▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄
train_loss,█▃▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_accuracy,▇▇█▄▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_loss,█▃▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,20.0
test_accuracy,0.1
test_loss,23028.8053
train_accuracy,0.1003
train_loss,124354.64963
val_accuracy,0.09733
val_loss,13818.19669


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 9t9wpino with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.011 MB of 0.011 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,█▁▁▁▁▁▁▁▁▁
train_loss,█▂▁▁▁▁▁▁▁▁
val_accuracy,█▁▁▁▁▁▁▁▁▁
val_loss,█▂▁▁▁▁▁▁▁▁

0,1
epoch,10.0
test_accuracy,0.1
test_loss,23024.23026
train_accuracy,0.1003
train_loss,124329.97304
val_accuracy,0.09733
val_loss,13815.28181


[34m[1mwandb[0m: Agent Starting Run: e9cvr8oj with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

VBox(children=(Label(value='0.011 MB of 0.011 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁█▁▁▁▁▁▁▁▁
train_loss,█▁▁▁▁▁▁▁▁▁
val_accuracy,▁▁▁▁▁▁▁▁▁▁
val_loss,█▁▁▁▁▁▁▁▁▁

0,1
epoch,10.0
test_accuracy,0.1
test_loss,23020.88832
train_accuracy,0.1003
train_loss,124311.4179
val_accuracy,0.09733
val_loss,13812.98094


[34m[1mwandb[0m: Agent Starting Run: lfazpli8 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.011 MB of 0.011 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
train_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,20.0
test_accuracy,0.1
test_loss,
train_accuracy,0.09981
train_loss,
val_accuracy,0.10167
val_loss,


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: cw4cisd4 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.002 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.22655769399771508, max=1.…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▁▃▃▅▅▅▅▄▅▆▆▆▇█▂▃▅▅▅▅
train_loss,▆▅▅▄▄▃▂▂▂▂▂▂▁▁█▆▃▂▂▁
val_accuracy,▁▃▃▅▅▅▅▄▅▆▆▆▇█▂▃▅▅▅▅
val_loss,▆▅▅▄▄▃▂▂▂▂▂▂▁▁█▆▃▂▁▁

0,1
epoch,20.0
test_accuracy,0.3031
test_loss,19180.29138
train_accuracy,0.30261
train_loss,103484.21918
val_accuracy,0.30617
val_loss,11538.92597


[34m[1mwandb[0m: Agent Starting Run: lv7lj0th with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.01128888888957186, max=1.0)…

VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.1228878648233487, max=1.0…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▁▃▄▅▅▆▆▆▆▇▇▇▇▇▇█████
train_loss,█▆▅▅▄▄▃▃▃▃▂▂▂▂▂▂▁▁▁▁
val_accuracy,▁▃▄▅▅▅▆▆▇▇▇▇▇▇▇█████
val_loss,█▆▅▅▄▄▃▃▃▃▂▂▂▂▂▂▁▁▁▁

0,1
epoch,20.0
test_accuracy,0.672
test_loss,9420.01196
train_accuracy,0.67948
train_loss,49886.14152
val_accuracy,0.671
val_loss,5659.30382


[34m[1mwandb[0m: Agent Starting Run: m7v3f9j1 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.002 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.22638193162843834, max=1.…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▁▃▅▇█▇▆▂▄▅▂▁▃▃▂▁▂▂▂▂
train_loss,▇▄▅▄▁▁▂▇▄▃▄█▆▆▆▇▆▇▆▆
val_accuracy,▂▃▅▇█▇▆▂▄▅▃▂▃▄▂▁▂▂▂▂
val_loss,▇▄▅▄▁▁▂▇▄▃▄█▆▆▆▇▆▇▆▆

0,1
epoch,20.0
test_accuracy,0.1069
test_loss,22954.4465
train_accuracy,0.10661
train_loss,124002.09654
val_accuracy,0.10433
val_loss,13778.10888


[34m[1mwandb[0m: Agent Starting Run: 4jpvd1sp with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.1230115690527838, max=1.0…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▁▁▂▇█▅▅█▇
train_loss,██▅▃▂▁▁▁▁▁
val_accuracy,▂▂▁▂▇█▄▅█▆
val_loss,██▅▃▂▁▁▁▁▁

0,1
epoch,10.0
test_accuracy,0.1313
test_loss,66491.32246
train_accuracy,0.13719
train_loss,357394.37434
val_accuracy,0.131
val_loss,39924.8096


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 80rdsoqh with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▂▃▃▅▆▇███
train_loss,█▇▆▅▅▄▃▂▂▁
val_accuracy,▁▂▃▃▅▆▇███
val_loss,█▇▆▅▅▄▃▂▂▁

0,1
epoch,10.0
test_accuracy,0.5109
test_loss,17518.01733
train_accuracy,0.51657
train_loss,94343.12402
val_accuracy,0.50633
val_loss,10522.80855


[34m[1mwandb[0m: Agent Starting Run: 7w0nf1q7 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12296711239609685, max=1.…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▃▁█▇▇█▆█████████████
train_loss,▆▄█▃▅▇▆▇▇▇▅▄▂▂▁▁▁▁▁▁
val_accuracy,▃▁█▇▇█▇█████▇███████
val_loss,▆▄█▄▅▇▆▇▇▇▅▄▂▂▁▁▁▁▁▁

0,1
epoch,20.0
test_accuracy,0.1
test_loss,35396.86012
train_accuracy,0.0998
train_loss,191253.37641
val_accuracy,0.10183
val_loss,21127.78061


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: jou4u4ka with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▁█▂▂▄▄▄▇▇
train_loss,▇▆▁▁▃▇▅▆▆█
val_accuracy,▁▁█▂▂▄▄▄▇▇
val_loss,▇▆▁▁▃▇▅▆▆█

0,1
epoch,10.0
test_accuracy,0.1914
test_loss,122118.77862
train_accuracy,0.19237
train_loss,659026.63632
val_accuracy,0.18867
val_loss,73352.54016


[34m[1mwandb[0m: Agent Starting Run: fa3xn3pn with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,█████▇█▁████████████
train_loss,█▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_accuracy,█████▇▇▁▇▇▇▇▇▇▇▇▇▇▇▇
val_loss,█▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,20.0
test_accuracy,0.1
test_loss,23025.70896
train_accuracy,0.1003
train_loss,124338.32362
val_accuracy,0.09733
val_loss,13815.97729


[34m[1mwandb[0m: Agent Starting Run: quistvdu with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12298933670703055, max=1.…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,███▅▄▆▆▃▂▁▁▁▂▂▂▃▃▃▄▄
train_loss,█▃▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_accuracy,███▅▄▆▅▃▂▁▁▁▁▂▂▂▃▃▄▄
val_loss,█▃▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,20.0
test_accuracy,0.0714
test_loss,23015.93328
train_accuracy,0.07246
train_loss,124281.33756
val_accuracy,0.072
val_loss,13809.13331


[34m[1mwandb[0m: Agent Starting Run: kxx2cswg with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.009 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.8654302540212709, max=1.0…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▃▅▆▆▇▇▇██
train_loss,█▆▄▃▃▂▂▂▁▁
val_accuracy,▁▃▅▅▆▆▇▇██
val_loss,█▅▄▃▃▂▂▂▁▁

0,1
epoch,10.0
test_accuracy,0.6593
test_loss,9591.32914
train_accuracy,0.67219
train_loss,50570.1725
val_accuracy,0.66217
val_loss,5789.15783


[34m[1mwandb[0m: Agent Starting Run: zrucz9lu with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.11964835164835165, max=1.…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▃▅▅▆▇▇▇██
train_loss,█▆▄▃▃▂▂▂▁▁
val_accuracy,▁▃▅▆▆▇▇▇██
val_loss,█▅▄▃▃▂▂▂▁▁

0,1
epoch,10.0
test_accuracy,0.8005
test_loss,5694.29136
train_accuracy,0.81922
train_loss,27023.38428
val_accuracy,0.79767
val_loss,3450.12462


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 9dz0kad4 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.11941217881027807, max=1.…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▅▆▇█
train_loss,█▄▃▂▁
val_accuracy,▁▅▇▇█
val_loss,█▄▃▂▁

0,1
epoch,5.0
test_accuracy,0.837
test_loss,4584.36341
train_accuracy,0.85319
train_loss,22400.32175
val_accuracy,0.84183
val_loss,2655.36265


[34m[1mwandb[0m: Agent Starting Run: jwutadgv with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.1196062922928201, max=1.0…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▆██▃▁
train_loss,█▆▅▄▁
val_accuracy,▆██▄▁
val_loss,█▆▅▃▁

0,1
epoch,5.0
test_accuracy,0.0893
test_loss,51827.80457
train_accuracy,0.09252
train_loss,276789.70572
val_accuracy,0.09383
val_loss,30670.91959


[34m[1mwandb[0m: Agent Starting Run: uh1ni6bu with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.011 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.9721539002108222, max=1.0…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▁▂▄▅▆▆▆▆▇▇▇▇▇▇▇█████
train_loss,█▆▅▄▄▃▃▃▂▂▂▂▂▂▂▁▁▁▁▁
val_accuracy,▁▃▄▅▆▆▇▇▇▇▇█████████
val_loss,█▆▅▄▃▃▃▃▂▂▂▂▂▂▁▁▁▁▁▁

0,1
epoch,20.0
test_accuracy,0.8547
test_loss,4151.11636
train_accuracy,0.8717
train_loss,19776.94611
val_accuracy,0.85817
val_loss,2419.70773


[34m[1mwandb[0m: Agent Starting Run: 4fb5wb95 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12297822354748351, max=1.…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▃▄▁▂▃▂▂▃▄▃▄▄▂▂▅█▄▁▆▆
train_loss,██▇▆▆▇▇█▅▇▄▃▅▄▂▁▂▃▂▂
val_accuracy,▄▅▁▂▂▁▁▃▄▃▄▄▂▁▆█▃▁▆▆
val_loss,██▇▆▆▇▇█▅▇▄▃▅▄▂▁▂▃▂▂

0,1
epoch,20.0
test_accuracy,0.1118
test_loss,94744.69738
train_accuracy,0.11248
train_loss,511632.47381
val_accuracy,0.10967
val_loss,57881.26023


[34m[1mwandb[0m: Agent Starting Run: 7vzedqu9 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

VBox(children=(Label(value='0.011 MB of 0.011 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▃▅▆█
test_accuracy,▁
train_accuracy,▁▁▁▁▁
val_accuracy,▁▁▁▁▁

0,1
epoch,5.0
test_accuracy,0.1
test_loss,
train_accuracy,0.09981
train_loss,
val_accuracy,0.10167
val_loss,


[34m[1mwandb[0m: Agent Starting Run: oea0s9lq with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▂▁██▄
train_loss,█▆▁▁▁
val_accuracy,▂▁██▄
val_loss,█▆▁▁▁

0,1
epoch,5.0
test_accuracy,0.5005
test_loss,13451.88324
train_accuracy,0.50648
train_loss,72290.57392
val_accuracy,0.50817
val_loss,8016.27862


[34m[1mwandb[0m: Agent Starting Run: vdbde6ko with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.002 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.22644831836590948, max=1.…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,█▆▅▁▁
train_loss,▁▃▃██
val_accuracy,█▆▆▁▁
val_loss,▁▃▃██

0,1
epoch,5.0
test_accuracy,0.3174
test_loss,19609.96166
train_accuracy,0.32013
train_loss,105986.57133
val_accuracy,0.316
val_loss,11717.59414


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: d9ln2ad2 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.011 MB of 0.011 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▁▄▅▅▆▆▆▇▇▇▇▇▇███████
train_loss,█▅▄▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁
val_accuracy,▁▄▅▆▆▆▇▇▇▇▇▇████████
val_loss,█▅▄▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁

0,1
epoch,20.0
test_accuracy,0.7993
test_loss,5614.38773
train_accuracy,0.80922
train_loss,28909.75972
val_accuracy,0.79933
val_loss,3316.70666


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: hi9gxo6m with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011288888888278356, max=1.0…

VBox(children=(Label(value='0.011 MB of 0.011 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▁▁▁▁▁▁▁▁▁
train_loss,▇█▄▄▃▃▃▃▂▁
val_accuracy,▁▁▁▁▁▁▁▁▁▁
val_loss,█▆▄▄▄▃▃▃▂▁

0,1
epoch,10.0
test_accuracy,0.1
test_loss,23075.69393
train_accuracy,0.10026
train_loss,124601.07402
val_accuracy,0.09767
val_loss,13853.08959


[34m[1mwandb[0m: Agent Starting Run: o8880wdu with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

VBox(children=(Label(value='0.002 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.22695720938406116, max=1.…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▄▆▇█
train_loss,█▅▃▂▁
val_accuracy,▁▄▆▇█
val_loss,█▄▂▂▁

0,1
epoch,5.0
test_accuracy,0.8431
test_loss,4796.29066
train_accuracy,0.86667
train_loss,20538.98512
val_accuracy,0.84367
val_loss,2787.30431


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: wetmttw5 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011288888888278356, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.11971149617380597, max=1.…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▂▄▄▁▂▄▆▇█▆
train_loss,█▆▅▅▅▃▂▂▁▂
val_accuracy,▂▃▄▁▂▄▇▇█▇
val_loss,█▆▄▅▅▃▁▂▁▂

0,1
epoch,10.0
test_accuracy,0.1083
test_loss,119106.04492
train_accuracy,0.10898
train_loss,640179.50682
val_accuracy,0.1105
val_loss,72016.07799


[34m[1mwandb[0m: Agent Starting Run: qjvnvss3 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.11964835164835165, max=1.…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▄▅▆▇▇▇███
train_loss,█▅▄▃▂▂▂▁▁▁
val_accuracy,▁▄▅▆▇▇████
val_loss,█▅▄▃▂▂▁▁▁▁

0,1
epoch,10.0
test_accuracy,0.745
test_loss,7162.56527
train_accuracy,0.75943
train_loss,36854.7872
val_accuracy,0.75333
val_loss,4206.93077


[34m[1mwandb[0m: Agent Starting Run: t1nq5a39 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.11964835164835165, max=1.…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▁▃▄▅▅▆▆▆▆▇▇▇▇▇▇█████
train_loss,█▆▅▄▃▃▃▃▂▂▂▂▂▂▁▁▁▁▁▁
val_accuracy,▁▃▄▅▅▆▆▆▆▇▇▇▇▇▇█████
val_loss,█▅▄▄▃▃▃▃▂▂▂▂▂▂▁▁▁▁▁▁

0,1
epoch,20.0
test_accuracy,0.668
test_loss,9725.80767
train_accuracy,0.67876
train_loss,50559.24414
val_accuracy,0.66433
val_loss,5784.21234


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: i4lnu561 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011277777777932999, max=1.0…

VBox(children=(Label(value='0.002 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.22668074621612108, max=1.…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▁▅▆▆▇▇▇▇▇▇██████████
train_loss,█▄▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁
val_accuracy,▁▅▆▆▇▇▇▇▇███████████
val_loss,█▄▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,20.0
test_accuracy,0.8353
test_loss,4708.46867
train_accuracy,0.85028
train_loss,23085.76839
val_accuracy,0.834
val_loss,2768.59557


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 5ynvadsq with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.011 MB of 0.011 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▁▄▅▅▆▆▆▆▇▇▇▇▇▇▇█████
train_loss,█▆▅▄▃▃▃▃▂▂▂▂▂▂▂▁▁▁▁▁
val_accuracy,▁▄▅▅▆▆▇▇▇▇▇▇▇▇██████
val_loss,█▅▄▄▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁

0,1
epoch,20.0
test_accuracy,0.8664
test_loss,3699.43566
train_accuracy,0.89385
train_loss,15796.04233
val_accuracy,0.87517
val_loss,2139.36596


[34m[1mwandb[0m: Agent Starting Run: a91yivmw with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12010236498411578, max=1.…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
train_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,20.0
test_accuracy,0.1
test_loss,
train_accuracy,0.09981
train_loss,
val_accuracy,0.10167
val_loss,


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 6pnts7ob with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12307831434255742, max=1.…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,█▁▃▅▆▅▄▃▅▅
train_loss,▁█▇▅▅▆▆▆▅▄
val_accuracy,█▁▃▅▆▅▄▃▅▅
val_loss,▁█▇▅▅▆▆▆▅▄

0,1
epoch,10.0
test_accuracy,0.5794
test_loss,11933.43787
train_accuracy,0.58269
train_loss,64076.69503
val_accuracy,0.58067
val_loss,7174.29616


[34m[1mwandb[0m: Agent Starting Run: hqlvo7xn with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12300045187528243, max=1.…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
train_accuracy,████▁▁▁▁▁▁
train_loss,█▅█▁
val_accuracy,████▁▁▁▁▁▁
val_loss,▁▁█▄

0,1
epoch,10.0
test_accuracy,0.1
test_loss,
train_accuracy,0.09981
train_loss,
val_accuracy,0.10167
val_loss,


[34m[1mwandb[0m: Agent Starting Run: j6iqajj8 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.11961680435928985, max=1.…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_loss,▁▅▆▆▇▇▇▇▇▇▇█████████
val_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_loss,▁▅▄▄▄▅▅▅▆▆▆▆▇▇▇▇████

0,1
epoch,20.0
test_accuracy,0.1
test_loss,23117.45186
train_accuracy,0.10026
train_loss,124821.30594
val_accuracy,0.09767
val_loss,13883.40522


[34m[1mwandb[0m: Agent Starting Run: 045z6nww with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12300045187528243, max=1.…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▄▄▇█
train_loss,█▅▃▃▁
val_accuracy,▁▄▄▇█
val_loss,█▅▃▃▁

0,1
epoch,5.0
test_accuracy,0.6274
test_loss,12336.98564
train_accuracy,0.62811
train_loss,66207.23936
val_accuracy,0.625
val_loss,7403.95155


[34m[1mwandb[0m: Agent Starting Run: u0kf5hqt with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▁▁▂█
train_loss,▁█▆▂▁
val_accuracy,▁▁▁▂█
val_loss,▁█▆▂▁

0,1
epoch,5.0
test_accuracy,0.1894
test_loss,87270.47314
train_accuracy,0.19044
train_loss,470107.20627
val_accuracy,0.181
val_loss,52574.89246


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: rr6pf9fe with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12287676183592339, max=1.…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▁▃▄▅▅▆▆▆▇▇▇▇▇▇▇█████
train_loss,█▆▅▄▃▃▃▃▂▂▂▂▂▂▁▁▁▁▁▁
val_accuracy,▁▃▄▅▅▆▆▆▇▇▇▇▇▇██████
val_loss,█▆▄▄▃▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁

0,1
epoch,20.0
test_accuracy,0.8011
test_loss,5529.7647
train_accuracy,0.81698
train_loss,27375.93563
val_accuracy,0.80033
val_loss,3312.88428


[34m[1mwandb[0m: Agent Starting Run: uzgdbu7z with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12305605786618445, max=1.…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▁▃▄▅▆▃▄▅▂▅▆▆▁█▁▅▃▇▃▃
train_loss,█▄▂▃▂▃▃▃▄▂▂▂▄▂▄▃▂▁▃▄
val_accuracy,▁▃▄▅▆▃▄▄▂▄▆▆▁█▁▅▃▇▃▃
val_loss,█▄▂▃▂▃▃▃▄▂▂▂▄▂▄▂▂▁▃▄

0,1
epoch,20.0
test_accuracy,0.1947
test_loss,22724.87105
train_accuracy,0.18837
train_loss,122836.21625
val_accuracy,0.18017
val_loss,13673.0295


[34m[1mwandb[0m: Agent Starting Run: 5h0h0tdp with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▅▅██▇▅▃▆▂
train_loss,█▅▃▃▁▁▂▂▁▁
val_accuracy,▁▅▅██▇▄▃▆▂
val_loss,█▅▃▃▁▁▂▂▁▁

0,1
epoch,10.0
test_accuracy,0.1397
test_loss,31124.20771
train_accuracy,0.14452
train_loss,168151.8111
val_accuracy,0.1415
val_loss,18837.42558


[34m[1mwandb[0m: Agent Starting Run: tfw8l2f3 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.011 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.9718953100298612, max=1.0…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,▁▅▅▃▆█▄▂▅▂▃▄▆▄▄▃▄▃▃▄
train_loss,█▃▂▂▁▁▂▃▂▂▂▂▂▂▂▂▂▂▂▂
val_accuracy,▁▅▅▃▆█▄▂▅▂▃▄▆▄▄▃▄▃▄▅
val_loss,█▃▂▂▁▁▂▃▂▂▂▂▂▂▂▂▂▂▂▂

0,1
epoch,20.0
test_accuracy,0.2459
test_loss,21125.99609
train_accuracy,0.23687
train_loss,114381.36751
val_accuracy,0.23683
val_loss,12696.5399


[34m[1mwandb[0m: Agent Starting Run: cg002y6r with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.7
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.011 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.9731679422890824, max=1.0…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,█▁▂▂▄▇▂▂▃▇
train_loss,█▃▁▁▁▁▁▁▁▁
val_accuracy,▁█▇▇▇▄▇▇▇▃
val_loss,█▃▁▁▁▁▁▁▁▁

0,1
epoch,10.0
test_accuracy,0.1001
test_loss,23033.97848
train_accuracy,0.10048
train_loss,124384.43215
val_accuracy,0.0975
val_loss,13836.80355


[34m[1mwandb[0m: Agent Starting Run: gs7ciay2 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12300045187528243, max=1.…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▂▄▅▆▇▇▇██
train_loss,█▄▃▃▂▂▂▁▁▁
val_accuracy,▁▂▄▅▆▇▇▇██
val_loss,█▄▃▃▂▂▂▁▁▁

0,1
epoch,10.0
test_accuracy,0.5467
test_loss,12955.7088
train_accuracy,0.55487
train_loss,69263.65598
val_accuracy,0.5655
val_loss,7601.19622


[34m[1mwandb[0m: Agent Starting Run: xw14ifl9 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.11954992967651196, max=1.…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▆▆██▂▄▁▂▃▇
train_loss,██▆▄▇▄▁▁▃▂
val_accuracy,▇▅██▂▃▁▂▂▄
val_loss,██▅▄▇▄▂▁▃▂

0,1
epoch,10.0
test_accuracy,0.1295
test_loss,69851.45306
train_accuracy,0.13541
train_loss,372915.44288
val_accuracy,0.12283
val_loss,41558.92975


[34m[1mwandb[0m: Agent Starting Run: a4lxq8tx with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.002 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.2266654948145544, max=1.0…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▄▅▆▇▇▇███
train_loss,█▅▄▃▂▂▂▁▁▁
val_accuracy,▁▄▆▆▇▇████
val_loss,█▅▄▃▂▂▂▁▁▁

0,1
epoch,10.0
test_accuracy,0.7537
test_loss,6872.17717
train_accuracy,0.76333
train_loss,35531.33838
val_accuracy,0.75067
val_loss,4038.54235


[34m[1mwandb[0m: Agent Starting Run: onnae9ht with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12011296443385402, max=1.…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
train_accuracy,▁▁▁▁▁
val_accuracy,▁▁▁▁▁

0,1
epoch,5.0
test_accuracy,0.1
test_loss,
train_accuracy,0.09981
train_loss,
val_accuracy,0.10167
val_loss,


[34m[1mwandb[0m: Agent Starting Run: hk6hrp8e with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12298933670703055, max=1.…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,█▁▁▁▁
train_loss,▁████
val_accuracy,█▁▁▁▁
val_loss,▁████

0,1
epoch,5.0
test_accuracy,0.1005
test_loss,23217.8426
train_accuracy,0.10074
train_loss,125336.41677
val_accuracy,0.09783
val_loss,13951.08


[34m[1mwandb[0m: Agent Starting Run: 45sullge with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0.0005
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: xavier


VBox(children=(Label(value='0.011 MB of 0.011 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
test_accuracy,▁
test_loss,▁
train_accuracy,█▇▅▇▆▇▆▅▆▃▅▆▇▇▇▆▂▁▃▃
train_loss,▁▁▃▂▂▂▂▂▂▃▂▂▂▂▂▂▄█▃▃
val_accuracy,█▇▆▇▆▇▆▅▆▄▅▆▇▇▇▆▂▁▃▄
val_loss,▁▁▂▂▂▂▂▂▂▃▂▂▂▂▂▂▄█▃▃

0,1
epoch,20.0
test_accuracy,0.35
test_loss,17522.81861
train_accuracy,0.34587
train_loss,95140.41322
val_accuracy,0.35267
val_loss,10643.84155


[34m[1mwandb[0m: Agent Starting Run: tkl45j3s with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.11969044059449477, max=1.…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,█▁███
train_loss,█▄▁▁▁
val_accuracy,▁█▁▁▁
val_loss,█▄▁▁▁

0,1
epoch,5.0
test_accuracy,0.1
test_loss,23028.59021
train_accuracy,0.1003
train_loss,124354.78672
val_accuracy,0.09733
val_loss,13818.06912


[34m[1mwandb[0m: Agent Starting Run: 3mjpy40u with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 0
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.12305605786618445, max=1.…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
test_loss,▁
train_accuracy,▆▇█▄▁
train_loss,█▂▁▁▁
val_accuracy,▆▇█▃▁
val_loss,█▂▁▁▁

0,1
epoch,5.0
test_accuracy,0.0995
test_loss,23252.62294
train_accuracy,0.09983
train_loss,125505.02455
val_accuracy,0.104
val_loss,13923.35506


[34m[1mwandb[0m: Agent Starting Run: 2gnvhet2 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9999
[34m[1mwandb[0m: 	criterion: cel
[34m[1mwandb[0m: 	decay: 5e-06
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_initialization: random
