In [None]:
import numpy as np
import wandb
from keras.datasets import fashion_mnist


# Neural Network Class: feed_forward_NN_4

class feed_forward_NN_4:
    def __init__(self,
                 layers,
                 optimizer,
                 learning_rate,
                 momentum,
                 beta1,
                 beta2,
                 beta_rms,
                 epsilon,
                 weight_decay,
                 init_type,
                 activation
                 ):
    
        
        self.layers = layers
        self.layer_n = len(layers)
        self.optimizer = optimizer.lower()
        self.lr = learning_rate
        self.momentum = momentum
        self.beta1 = beta1
        self.beta2 = beta2
        self.beta_rms = beta_rms
        self.epsilon = epsilon
        self.weight_decay = weight_decay
        self.init_type = init_type.lower()
        self.activation = activation.lower()
        

        # Initialize Weights & BiaseS
        self.weights = []
        self.biases = []
        for i in range(self.layer_n - 1):
            if self.init_type == "xavier":
                # "Xavier" initialization
                w = np.random.randn(layers[i], layers[i+1]) * np.sqrt(1.0 / layers[i])
            else:
                # "random" initialization
                w = np.random.randn(layers[i], layers[i+1]) * np.sqrt(2.0 / layers[i])
            b = np.zeros((1, layers[i+1]))
            self.weights.append(w)
            self.biases.append(b)

        # initialize extra Params 
        if self.optimizer in ["momentum", "nesterov", "rmsprop", "adam", "nadam"]:
            self.v_w = [np.zeros_like(w) for w in self.weights]
            self.v_b = [np.zeros_like(b) for b in self.biases]
        if self.optimizer in ["adam", "nadam"]:
            self.m_w = [np.zeros_like(w) for w in self.weights]
            self.m_b = [np.zeros_like(b) for b in self.biases]
            self.t = 0

    # activations 
    def sigmoid(self, x):
        return 1 / (1 + np.exp(-np.clip(x, -500, 500)))
    
    def tanh(self, x):
        return np.tanh(x)
    
    def relu(self, x):
        return np.maximum(0, x)

    def activate(self, x):
        if self.activation == "sigmoid":
            return self.sigmoid(x)
        elif self.activation == "tanh":
            return self.tanh(x)
        elif self.activation == "relu":
            return self.relu(x)
        else:
            return self.sigmoid(x) 
        
    # derivatives
    def derivative(self, a):

        if self.activation == "sigmoid":
            return a * (1 - a)
        elif self.activation == "tanh":
            return 1 - a**2
        elif self.activation == "relu":
            return (a > 0).astype(float)
        else:
            return a * (1 - a) 

    def softmax(self, x):
        exp_x = np.exp(x - np.max(x, axis=1, keepdims=True))
        return exp_x / np.sum(exp_x, axis=1, keepdims=True)

    # Forward Pass
    def forward_pass(self, x):
        self.h = [x]  
        # Hidden layers
        for i in range(self.layer_n - 2):
            z = np.dot(self.h[i], self.weights[i]) + self.biases[i]
            act = self.activate(z)
            self.h.append(act)
        # Output layer- softmax
        z_out = np.dot(self.h[-1], self.weights[-1]) + self.biases[-1]
        out = self.softmax(z_out)
        self.h.append(out)
        return self.h

    # Backward Pass
    def backward_prop(self, y_true):
        m = y_true.shape[0]
        dw = [None] * (self.layer_n - 1)
        db = [None] * (self.layer_n - 1)

        # Cross-entropy derivative for output layer
        delta = self.h[-1] - y_true  # shape: (batch_size, output_dim)

        # Propagation
        for i in reversed(range(self.layer_n - 1)):
            dw[i] = np.dot(self.h[i].T, delta) / m
            db[i] = np.sum(delta, axis=0, keepdims=True) / m
            if i > 0:
                # For hidden layers, multiply by derivative of activation
                delta = np.dot(delta, self.weights[i].T) * self.derivative(self.h[i])
        return dw, db

    # Param Updates for "Non-Nesterov" 
    def _update_params(self, dw, db):
        # Add weight decay to each gradient
        for i in range(self.layer_n - 1):
            dw[i] += self.weight_decay * self.weights[i]

        if self.optimizer == "sgd":
            for i in range(self.layer_n - 1):
                self.weights[i] -= self.lr * dw[i]
                self.biases[i] -= self.lr * db[i]

        elif self.optimizer == "momentum":
            for i in range(self.layer_n - 1):
                self.v_w[i] = self.momentum * self.v_w[i] + dw[i]
                self.v_b[i] = self.momentum * self.v_b[i] + db[i]
                self.weights[i] -= self.lr * self.v_w[i]
                self.biases[i] -= self.lr * self.v_b[i]

        elif self.optimizer == "rmsprop":
            for i in range(self.layer_n - 1):
                self.v_w[i] = self.beta_rms * self.v_w[i] + (1 - self.beta_rms) * (dw[i] ** 2)
                self.v_b[i] = self.beta_rms * self.v_b[i] + (1 - self.beta_rms) * (db[i] ** 2)
                self.weights[i] -= self.lr * dw[i] / (np.sqrt(self.v_w[i]) + self.epsilon)
                self.biases[i]  -= self.lr * db[i] / (np.sqrt(self.v_b[i]) + self.epsilon)

        elif self.optimizer == "adam":
            self.t += 1
            for i in range(self.layer_n - 1):
                self.m_w[i] = self.beta1 * self.m_w[i] + (1 - self.beta1) * dw[i]
                self.m_b[i] = self.beta1 * self.m_b[i] + (1 - self.beta1) * db[i]
                self.v_w[i] = self.beta2 * self.v_w[i] + (1 - self.beta2) * (dw[i] ** 2)
                self.v_b[i] = self.beta2 * self.v_b[i] + (1 - self.beta2) * (db[i] ** 2)

                # bias correction
                m_w_hat = self.m_w[i] / (1 - self.beta1 ** self.t)
                m_b_hat = self.m_b[i] / (1 - self.beta1 ** self.t)
                v_w_hat = self.v_w[i] / (1 - self.beta2 ** self.t)
                v_b_hat = self.v_b[i] / (1 - self.beta2 ** self.t)

                self.weights[i] -= self.lr * m_w_hat / (np.sqrt(v_w_hat) + self.epsilon)
                self.biases[i]  -= self.lr * m_b_hat / (np.sqrt(v_b_hat) + self.epsilon)

        elif self.optimizer == "nadam":
            self.t += 1
            for i in range(self.layer_n - 1):
                self.m_w[i] = self.beta1 * self.m_w[i] + (1 - self.beta1) * dw[i]
                self.m_b[i] = self.beta1 * self.m_b[i] + (1 - self.beta1) * db[i]
                self.v_w[i] = self.beta2 * self.v_w[i] + (1 - self.beta2) * (dw[i] ** 2)
                self.v_b[i] = self.beta2 * self.v_b[i] + (1 - self.beta2) * (db[i] ** 2)

                # bias correction
                m_w_hat = self.m_w[i] / (1 - self.beta1 ** (self.t + 1))
                m_b_hat = self.m_b[i] / (1 - self.beta1 ** (self.t + 1))
                v_w_hat = self.v_w[i] / (1 - self.beta2 ** (self.t + 1))
                v_b_hat = self.v_b[i] / (1 - self.beta2 ** (self.t + 1))

                grad_term_w = self.beta1 * m_w_hat + (1 - self.beta1) * dw[i] / (1 - self.beta1 ** (self.t + 1))
                grad_term_b = self.beta1 * m_b_hat + (1 - self.beta1) * db[i] / (1 - self.beta1 ** (self.t + 1))

                self.weights[i] -= self.lr * grad_term_w / (np.sqrt(v_w_hat) + self.epsilon)
                self.biases[i]  -= self.lr * grad_term_b / (np.sqrt(v_b_hat) + self.epsilon)

    # Training Step  with "Nesterov"
    def _train_step(self, x_batch, y_batch):
        if self.optimizer == "nesterov":
            # to look-ahead: w_look = w - momentum * v
            for i in range(self.layer_n - 1):
                self.weights[i] -= self.lr*self.momentum * self.v_w[i]
                self.biases[i]  -= self.lr*self.momentum * self.v_b[i]

            # Forward at the look-ahead position
            self.forward_pass(x_batch)
            out = self.h[-1]
            l2_norm_weights = 0
            for i in range(len(self.weights)):
                l2_norm_weights += np.sum(self.weights[i] ** 2)
            # for i in range(len(self.biases)):
            #     l2_norm_bias += np.sum(self.biases[i] ** 2)
                    
            l2_norm_params = l2_norm_weights #+ l2_norm_bias
            
            loss = -np.mean(np.sum(y_batch * np.log(out + 1e-10), axis = 1)) +  (self.weight_decay/2) * l2_norm_params # (1e-10) to prevent underflow
            #loss = -np.mean(np.sum(y_batch * np.log(out + 1e-10), axis=1))
            dW, dB = self.backward_prop(y_batch)

            # add weight decay here
            for i in range(self.layer_n - 1):
                dW[i] += self.weight_decay * self.weights[i]

            # backward at the look-ahead position (go back to w_t)
            for i in range(self.layer_n - 1):
                self.weights[i] += self.lr*self.momentum * self.v_w[i]
                self.biases[i]  += self.lr*self.momentum * self.v_b[i]

            # update velocity: u_t = momentum*u_{t-1} + dW
            for i in range(self.layer_n - 1):
                self.v_w[i] = self.momentum * self.v_w[i] + dW[i]
                self.v_b[i] = self.momentum * self.v_b[i] + dB[i]

            # final param update: w = w - lr*u_t
            for i in range(self.layer_n - 1):
                self.weights[i] -= self.lr * self.v_w[i]
                self.biases[i]  -= self.lr * self.v_b[i]

            return loss
        else:
            # Normal forward/back
            self.forward_pass(x_batch)
            out = self.h[-1]

            l2_norm_weights=0
            l2_norm_bias= 0
            for i in range(len(self.weights)):
                l2_norm_weights += np.sum(self.weights[i] ** 2)
            # for i in range(len(self.biases)):
            #     l2_norm_bias += np.sum(self.biases[i] ** 2)
                    
            l2_norm_params = l2_norm_weights #+ l2_norm_bias
            
            loss = -np.mean(np.sum(y_batch * np.log(out + 1e-10), axis = 1)) +  (self.weight_decay/2) * l2_norm_params 

            #loss = -np.mean(np.sum(y_batch * np.log(out + 1e-10), axis=1))
            dW, dB = self.backward_prop(y_batch)
            self._update_params(dW, dB)
            return loss

    # Outer Training Loop 
    def training(self, x_train, y_train, x_val, y_val, epochs, batch_size):
       
        for ep in range(epochs):
            idx = np.random.permutation(x_train.shape[0])
            x_train_shuff = x_train[idx]
            y_train_shuff = y_train[idx]
            n_batches = len(x_train) // batch_size
            epoch_loss = 0.0
            for b in range(n_batches):
                start = b * batch_size
                end = start + batch_size
                x_batch = x_train_shuff[start:end]
                y_batch = y_train_shuff[start:end]
                loss = self._train_step(x_batch, y_batch)
                epoch_loss += loss
            avg_loss = epoch_loss / n_batches

            # Validation

            preds = self.predict(x_val)
            val_labels = np.argmax(y_val, axis=1)
            val_acc = np.mean(preds == val_labels)

            val_outputs = self.forward_pass(x_val)[-1]
        
            # Cross-entropy loss for validation
            val_loss = -np.mean(np.sum(y_val * np.log(val_outputs + 1e-10), axis=1))

            # Log metrics to wandb
            wandb.log({"epoch": ep+1, "training_loss": avg_loss, "validation_accuracy": val_acc, "validation loss": val_loss})
            print(f"Epoch {ep+1}/{epochs} - loss={avg_loss:.4f}, val_acc={val_acc:.4f}, val_loss={val_loss}" )

    #Prediction 
    def predict(self, X):
        self.forward_pass(X)
        return np.argmax(self.h[-1], axis=1)




# (x_train_full, y_train_full), (x_test, y_test) = fashion_mnist.load_data()
# x_train_full = x_train_full.reshape(x_train_full.shape[0], -1) / 255.0
# x_test = x_test.reshape(x_test.shape[0], -1) / 255.0

# np.random.seed(42)
# idx = np.arange(x_train_full.shape[0])
# np.random.shuffle(idx)
# x_train_full = x_train_full[idx]
# y_train_full = y_train_full[idx]

# # 90% training, 10% validation 
# train_size=int(.9*len(x_train_full))

# x_train, y_train=x_train_full[:train_size],y_train_full[:train_size]
# x_val, y_val=x_train_full[train_size:], y_train_full[train_size:]

# num_classes = 10
# y_train_1h = np.eye(num_classes)[y_train]
# y_val_1h = np.eye(num_classes)[y_val]
# y_test_1h = np.eye(num_classes)[y_test]

# # model
# model = feed_forward_NN_4(
#     layers=[784] + [32] *3 + [10],
# optimizer="nesterov",
# learning_rate=0.01,
# momentum=0.9,
# beta1=0.9,
# beta2=0.999,
# beta_rms=0.9,
# epsilon=1e-4,
# weight_decay=0.0005,
# init_type="xavier",
# activation="relu")

#     # Train the model
# model.training(
#         x_train=x_train,
#         y_train=y_train_1h,
#         x_val=x_val,
#         y_val=y_val_1h,
#         epochs=10,
#         batch_size=32
#     )

#     #Evaluation on test set
# test_preds = model.predict(x_test)
# test_labels = np.argmax(y_test_1h, axis=1)
# test_acc = np.mean(test_preds == test_labels)
# print("test accuracy ",test_acc)
# #wandb.log({"test_accuracy": test_acc})




# train_sweep() function

def train_sweep():
    # Initialize wandb
    wandb.init()
    config = wandb.config

    #custom run name from hyperparameters
    run_name = f"hl_{config.num_hidden_layers}_bs_{config.batch_size}_ac_{config.activation}_opt_{config.optimizer}"
    wandb.run.name = run_name

    # Load Fashion-MNIST
    (x_train_full, y_train_full), (x_test, y_test) = fashion_mnist.load_data()
    x_train_full = x_train_full.reshape(x_train_full.shape[0], -1) / 255.0
    x_test = x_test.reshape(x_test.shape[0], -1) / 255.0

    np.random.seed(42)
    idx = np.arange(x_train_full.shape[0])
    np.random.shuffle(idx)
    x_train_full = x_train_full[idx]
    y_train_full = y_train_full[idx]

    # 90% training, 10% validation 
    train_size=int(.9*len(x_train_full))

    x_train, y_train=x_train_full[:train_size],y_train_full[:train_size]
    x_val, y_val=x_train_full[train_size:], y_train_full[train_size:]

    num_classes = 10
    y_train_1h = np.eye(num_classes)[y_train]
    y_val_1h = np.eye(num_classes)[y_val]
    y_test_1h = np.eye(num_classes)[y_test]

    # model
    model = feed_forward_NN_4(
        layers=[784] + [config.hidden_size] * config.num_hidden_layers + [10],
        optimizer=config.optimizer,
        learning_rate=config.learning_rate,
        momentum=config.momentum,
        beta1=config.beta1,
        beta2=config.beta2,
        beta_rms=config.beta_rms,
        epsilon=config.epsilon,
        weight_decay=config.weight_decay,
        init_type=config.init_type,
        activation=config.activation
    )

    # Train the model
    model.training(
        x_train=x_train,
        y_train=y_train_1h,
        x_val=x_val,
        y_val=y_val_1h,
        epochs=config.epochs,
        batch_size=config.batch_size
    )

    #Evaluation on test set
    test_preds = model.predict(x_test)
    test_labels = np.argmax(y_test_1h, axis=1)
    test_acc = np.mean(test_preds == test_labels)
    
    wandb.log({"test_accuracy": test_acc})
    print("test accuracy ",test_acc)


# sweep configuration
sweep_config = {
    "method": "random", 
    "metric": {
        "name": "validation_accuracy",
        "goal": "maximize"
    },
    "parameters": {
        "epochs": {"values": [10]},
        "num_hidden_layers": {"values": [3,5]},
        "hidden_size": {"values": [64, 128]},
        "weight_decay": {"values": [0.0, 0.0005]},
        "learning_rate": {"values": [1e-3, 1e-4]},
        "optimizer": {"values": ["momentum", "nesterov", "rmsprop", "adam", "nadam"]},
        "batch_size": {"values": [32]},
        "init_type": {"values": ["random", "xavier"]},
        "activation": {"values": ["sigmoid", "tanh", "relu"]},
        "momentum": {"values": [0.8, 0.9]},
        "beta1": {"values": [0.9]},
        "beta2": {"values": [0.999]},
        "beta_rms": {"values": [0.9]},
        "epsilon": {"values": [1e-8]}
    }
}

# Running the sweep

if __name__ == "__main__":
    # Creating sweep
    sweep_id = wandb.sweep(sweep_config, project="q4_sweep_project")
    # Launching sweep agent
    wandb.agent(sweep_id, function=train_sweep)


[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


Create sweep with ID: p60nv46x
Sweep URL: https://wandb.ai/ed24s401-indian-institute-of-technology-madras/q4_sweep_project/sweeps/p60nv46x


[34m[1mwandb[0m: Agent Starting Run: 983zh0zp with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: Currently logged in as: [33med24s401[0m ([33med24s401-indian-institute-of-technology-madras[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Epoch 1/10 - loss=0.5553, val_acc=0.8282, val_loss=0.4502716316380152
Epoch 2/10 - loss=0.4163, val_acc=0.8480, val_loss=0.42432164110251913
Epoch 3/10 - loss=0.3919, val_acc=0.8623, val_loss=0.3812924125999896
Epoch 4/10 - loss=0.3840, val_acc=0.8690, val_loss=0.4012169075061982
Epoch 5/10 - loss=0.3837, val_acc=0.8687, val_loss=0.39931507821164314
Epoch 6/10 - loss=0.3857, val_acc=0.8615, val_loss=0.40627183501358344
Epoch 7/10 - loss=0.3884, val_acc=0.8632, val_loss=0.4016074682796713
Epoch 8/10 - loss=0.3928, val_acc=0.8398, val_loss=0.5069759598151149
Epoch 9/10 - loss=0.3916, val_acc=0.8578, val_loss=0.4444773724894061
Epoch 10/10 - loss=0.3890, val_acc=0.8718, val_loss=0.42694463804726085
test accuracy  0.8656


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▂▁▁▁▁▁▁▁▁
validation loss,▅▃▁▂▂▂▂█▅▄
validation_accuracy,▁▄▆█▇▆▇▃▆█

0,1
epoch,10.0
test_accuracy,0.8656
training_loss,0.38904
validation loss,0.42694
validation_accuracy,0.87183


[34m[1mwandb[0m: Agent Starting Run: uovgr4vw with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 1/10 - loss=0.8834, val_acc=0.8267, val_loss=0.47538230296875705
Epoch 2/10 - loss=0.6570, val_acc=0.8558, val_loss=0.39825404014473625
Epoch 3/10 - loss=0.6084, val_acc=0.8575, val_loss=0.39168677438194544
Epoch 4/10 - loss=0.5783, val_acc=0.8637, val_loss=0.36085865300128966
Epoch 5/10 - loss=0.5563, val_acc=0.8707, val_loss=0.3545870458846383
Epoch 6/10 - loss=0.5377, val_acc=0.8703, val_loss=0.34473827226874093
Epoch 7/10 - loss=0.5217, val_acc=0.8723, val_loss=0.3401226195617471
Epoch 8/10 - loss=0.5085, val_acc=0.8760, val_loss=0.33950222180277234
Epoch 9/10 - loss=0.4963, val_acc=0.8757, val_loss=0.3220967424473837
Epoch 10/10 - loss=0.4843, val_acc=0.8688, val_loss=0.343127294585632
test accuracy  0.8675


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▄▃▃▂▂▂▁▁▁
validation loss,█▄▄▃▂▂▂▂▁▂
validation_accuracy,▁▅▅▆▇▇▇██▇

0,1
epoch,10.0
test_accuracy,0.8675
training_loss,0.48426
validation loss,0.34313
validation_accuracy,0.86883


[34m[1mwandb[0m: Agent Starting Run: q3c3z15h with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: nesterov
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/10 - loss=1.3440, val_acc=0.7082, val_loss=0.9073662886684932
Epoch 2/10 - loss=0.7750, val_acc=0.7633, val_loss=0.7138991853294984
Epoch 3/10 - loss=0.6476, val_acc=0.7920, val_loss=0.6288609369578053
Epoch 4/10 - loss=0.5838, val_acc=0.7983, val_loss=0.5806358211738426
Epoch 5/10 - loss=0.5445, val_acc=0.8093, val_loss=0.5485756062703193
Epoch 6/10 - loss=0.5190, val_acc=0.8168, val_loss=0.5264923467970231
Epoch 7/10 - loss=0.4991, val_acc=0.8190, val_loss=0.5119154577780011
Epoch 8/10 - loss=0.4842, val_acc=0.8245, val_loss=0.4939587734489917
Epoch 9/10 - loss=0.4717, val_acc=0.8273, val_loss=0.48385537928223477
Epoch 10/10 - loss=0.4610, val_acc=0.8283, val_loss=0.477346511452069
test accuracy  0.8235


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▃▂▂▂▁▁▁▁▁
validation loss,█▅▃▃▂▂▂▁▁▁
validation_accuracy,▁▄▆▆▇▇▇███

0,1
epoch,10.0
test_accuracy,0.8235
training_loss,0.46102
validation loss,0.47735
validation_accuracy,0.82833


[34m[1mwandb[0m: Agent Starting Run: 9he8lidr with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 1/10 - loss=1.6901, val_acc=0.6608, val_loss=1.0849220550864034
Epoch 2/10 - loss=1.0849, val_acc=0.7203, val_loss=0.8209892621538978
Epoch 3/10 - loss=0.9088, val_acc=0.7500, val_loss=0.7014003415657878
Epoch 4/10 - loss=0.8141, val_acc=0.7800, val_loss=0.6265385215766032
Epoch 5/10 - loss=0.7525, val_acc=0.7970, val_loss=0.5769467214846653
Epoch 6/10 - loss=0.7115, val_acc=0.8092, val_loss=0.5440950830985022
Epoch 7/10 - loss=0.6837, val_acc=0.8172, val_loss=0.520259758321714
Epoch 8/10 - loss=0.6639, val_acc=0.8195, val_loss=0.5060898067384088
Epoch 9/10 - loss=0.6491, val_acc=0.8228, val_loss=0.4920164784575944
Epoch 10/10 - loss=0.6372, val_acc=0.8255, val_loss=0.4804385771582166
test accuracy  0.8225


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▄▃▂▂▁▁▁▁▁
validation loss,█▅▄▃▂▂▁▁▁▁
validation_accuracy,▁▄▅▆▇▇████

0,1
epoch,10.0
test_accuracy,0.8225
training_loss,0.63718
validation loss,0.48044
validation_accuracy,0.8255


[34m[1mwandb[0m: Agent Starting Run: 06fllqqi with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	momentum: 0.8
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/10 - loss=0.6100, val_acc=0.8407, val_loss=0.44123077796022897
Epoch 2/10 - loss=0.4132, val_acc=0.8550, val_loss=0.4002993521066746
Epoch 3/10 - loss=0.3807, val_acc=0.8495, val_loss=0.40426051903768817
Epoch 4/10 - loss=0.3577, val_acc=0.8613, val_loss=0.3784133103676986
Epoch 5/10 - loss=0.3424, val_acc=0.8652, val_loss=0.35747088755349204
Epoch 6/10 - loss=0.3284, val_acc=0.8642, val_loss=0.3588343176087859
Epoch 7/10 - loss=0.3184, val_acc=0.8747, val_loss=0.33800688875213775
Epoch 8/10 - loss=0.3071, val_acc=0.8792, val_loss=0.33343892668284936
Epoch 9/10 - loss=0.3005, val_acc=0.8750, val_loss=0.33113935830790064
Epoch 10/10 - loss=0.2907, val_acc=0.8742, val_loss=0.34139463457914676
test accuracy  0.8719


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▄▃▂▂▂▂▁▁▁
validation loss,█▅▆▄▃▃▁▁▁▂
validation_accuracy,▁▄▃▅▅▅▇█▇▇

0,1
epoch,10.0
test_accuracy,0.8719
training_loss,0.29069
validation loss,0.34139
validation_accuracy,0.87417


[34m[1mwandb[0m: Agent Starting Run: c283qtl2 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/10 - loss=0.5316, val_acc=0.8408, val_loss=0.4262506810276712
Epoch 2/10 - loss=0.3910, val_acc=0.8612, val_loss=0.38029229947575316
Epoch 3/10 - loss=0.3584, val_acc=0.8673, val_loss=0.35333921797827467
Epoch 4/10 - loss=0.3346, val_acc=0.8703, val_loss=0.35265591099771987
Epoch 5/10 - loss=0.3207, val_acc=0.8672, val_loss=0.3440293440400351
Epoch 6/10 - loss=0.3072, val_acc=0.8673, val_loss=0.3479996395814301
Epoch 7/10 - loss=0.2944, val_acc=0.8773, val_loss=0.3374566103018656
Epoch 8/10 - loss=0.2859, val_acc=0.8805, val_loss=0.3280997443162233
Epoch 9/10 - loss=0.2771, val_acc=0.8798, val_loss=0.3222661801201401
Epoch 10/10 - loss=0.2679, val_acc=0.8777, val_loss=0.33938835640725173
test accuracy  0.8716


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▄▃▃▂▂▂▁▁▁
validation loss,█▅▃▃▂▃▂▁▁▂
validation_accuracy,▁▅▆▆▆▆▇██▇

0,1
epoch,10.0
test_accuracy,0.8716
training_loss,0.26789
validation loss,0.33939
validation_accuracy,0.87767


[34m[1mwandb[0m: Agent Starting Run: 87e7mxie with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 1/10 - loss=1.6882, val_acc=0.6713, val_loss=0.9829690042692873
Epoch 2/10 - loss=1.0144, val_acc=0.7610, val_loss=0.7127878521082422
Epoch 3/10 - loss=0.8510, val_acc=0.7903, val_loss=0.6126208839811959
Epoch 4/10 - loss=0.7720, val_acc=0.8055, val_loss=0.556748979053276
Epoch 5/10 - loss=0.7221, val_acc=0.8183, val_loss=0.5202245598817438
Epoch 6/10 - loss=0.6901, val_acc=0.8305, val_loss=0.4956543569604034
Epoch 7/10 - loss=0.6667, val_acc=0.8338, val_loss=0.4803127923235173
Epoch 8/10 - loss=0.6491, val_acc=0.8340, val_loss=0.46663277474079934
Epoch 9/10 - loss=0.6347, val_acc=0.8390, val_loss=0.45545805654147564
Epoch 10/10 - loss=0.6227, val_acc=0.8425, val_loss=0.44658549176739265
test accuracy  0.836


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▄▂▂▂▁▁▁▁▁
validation loss,█▄▃▂▂▂▁▁▁▁
validation_accuracy,▁▅▆▆▇█████

0,1
epoch,10.0
test_accuracy,0.836
training_loss,0.62273
validation loss,0.44659
validation_accuracy,0.8425


[34m[1mwandb[0m: Agent Starting Run: pxrx0b96 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	momentum: 0.8
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/10 - loss=0.4830, val_acc=0.8525, val_loss=0.3915567499634833
Epoch 2/10 - loss=0.3673, val_acc=0.8642, val_loss=0.3709101547896698
Epoch 3/10 - loss=0.3353, val_acc=0.8743, val_loss=0.3340325507608215
Epoch 4/10 - loss=0.3147, val_acc=0.8802, val_loss=0.32716974497891876
Epoch 5/10 - loss=0.2985, val_acc=0.8735, val_loss=0.3279797500329235
Epoch 6/10 - loss=0.2853, val_acc=0.8817, val_loss=0.3249539008688805
Epoch 7/10 - loss=0.2745, val_acc=0.8837, val_loss=0.32202964045447513
Epoch 8/10 - loss=0.2651, val_acc=0.8832, val_loss=0.30925817110188275
Epoch 9/10 - loss=0.2554, val_acc=0.8857, val_loss=0.3108576969021752
Epoch 10/10 - loss=0.2466, val_acc=0.8797, val_loss=0.3196521121128617
test accuracy  0.8726


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▅▄▃▃▂▂▂▁▁
validation loss,█▆▃▃▃▂▂▁▁▂
validation_accuracy,▁▃▆▇▅▇█▇█▇

0,1
epoch,10.0
test_accuracy,0.8726
training_loss,0.24655
validation loss,0.31965
validation_accuracy,0.87967


[34m[1mwandb[0m: Agent Starting Run: gag8ms48 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: nesterov
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 1/10 - loss=2.4132, val_acc=0.1768, val_loss=2.292150293765403
Epoch 2/10 - loss=2.3905, val_acc=0.2853, val_loss=2.2861204554435686
Epoch 3/10 - loss=2.3842, val_acc=0.2873, val_loss=2.2802184838898456
Epoch 4/10 - loss=2.3777, val_acc=0.4022, val_loss=2.2732270674223356
Epoch 5/10 - loss=2.3706, val_acc=0.4557, val_loss=2.2657527965506956
Epoch 6/10 - loss=2.3627, val_acc=0.4705, val_loss=2.2576536299632615
Epoch 7/10 - loss=2.3540, val_acc=0.4965, val_loss=2.2483936568820377
Epoch 8/10 - loss=2.3440, val_acc=0.4177, val_loss=2.2378449647951
Epoch 9/10 - loss=2.3327, val_acc=0.4445, val_loss=2.2254009518067543
Epoch 10/10 - loss=2.3195, val_acc=0.4220, val_loss=2.2110843586417293
test accuracy  0.4219


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▆▆▅▅▄▄▃▂▁
validation loss,█▇▇▆▆▅▄▃▂▁
validation_accuracy,▁▃▃▆▇▇█▆▇▆

0,1
epoch,10.0
test_accuracy,0.4219
training_loss,2.31946
validation loss,2.21108
validation_accuracy,0.422


[34m[1mwandb[0m: Agent Starting Run: 8np5n7m6 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/10 - loss=0.5553, val_acc=0.8282, val_loss=0.4502716316380152
Epoch 2/10 - loss=0.4163, val_acc=0.8480, val_loss=0.42432164110251913
Epoch 3/10 - loss=0.3919, val_acc=0.8623, val_loss=0.3812924125999896
Epoch 4/10 - loss=0.3840, val_acc=0.8690, val_loss=0.4012169075061982
Epoch 5/10 - loss=0.3837, val_acc=0.8687, val_loss=0.39931507821164314
Epoch 6/10 - loss=0.3857, val_acc=0.8615, val_loss=0.40627183501358344
Epoch 7/10 - loss=0.3884, val_acc=0.8632, val_loss=0.4016074682796713
Epoch 8/10 - loss=0.3928, val_acc=0.8398, val_loss=0.5069759598151149
Epoch 9/10 - loss=0.3916, val_acc=0.8578, val_loss=0.4444773724894061
Epoch 10/10 - loss=0.3890, val_acc=0.8718, val_loss=0.42694463804726085
test accuracy  0.8656


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▂▁▁▁▁▁▁▁▁
validation loss,▅▃▁▂▂▂▂█▅▄
validation_accuracy,▁▄▆█▇▆▇▃▆█

0,1
epoch,10.0
test_accuracy,0.8656
training_loss,0.38904
validation loss,0.42694
validation_accuracy,0.87183


[34m[1mwandb[0m: Agent Starting Run: d7kc72pl with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	momentum: 0.8
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/10 - loss=1.8201, val_acc=0.6285, val_loss=1.4182794127681342
Epoch 2/10 - loss=1.2176, val_acc=0.6615, val_loss=1.0795495502435692
Epoch 3/10 - loss=0.9810, val_acc=0.6913, val_loss=0.916333560681336
Epoch 4/10 - loss=0.8531, val_acc=0.7242, val_loss=0.8176308782402324
Epoch 5/10 - loss=0.7706, val_acc=0.7383, val_loss=0.7493140426217068
Epoch 6/10 - loss=0.7109, val_acc=0.7557, val_loss=0.6978554089214497
Epoch 7/10 - loss=0.6642, val_acc=0.7702, val_loss=0.6565216775726654
Epoch 8/10 - loss=0.6268, val_acc=0.7792, val_loss=0.6239664273975669
Epoch 9/10 - loss=0.5962, val_acc=0.7862, val_loss=0.5977701306216459
Epoch 10/10 - loss=0.5711, val_acc=0.7950, val_loss=0.5747982165197526
test accuracy  0.7966


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▅▃▃▂▂▂▁▁▁
validation loss,█▅▄▃▂▂▂▁▁▁
validation_accuracy,▁▂▄▅▆▆▇▇██

0,1
epoch,10.0
test_accuracy,0.7966
training_loss,0.57108
validation loss,0.5748
validation_accuracy,0.795


[34m[1mwandb[0m: Agent Starting Run: b2u3g9u7 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 1/10 - loss=1.4049, val_acc=0.6580, val_loss=0.8802240088826195
Epoch 2/10 - loss=0.9893, val_acc=0.6897, val_loss=0.7898220231309623
Epoch 3/10 - loss=0.9177, val_acc=0.7102, val_loss=0.7576589613292878
Epoch 4/10 - loss=0.8454, val_acc=0.7783, val_loss=0.6096843799087401
Epoch 5/10 - loss=0.7688, val_acc=0.8012, val_loss=0.5732428096530953
Epoch 6/10 - loss=0.7143, val_acc=0.8263, val_loss=0.51107325815624
Epoch 7/10 - loss=0.6963, val_acc=0.8280, val_loss=0.48082160792455714
Epoch 8/10 - loss=0.6796, val_acc=0.8390, val_loss=0.4740562301790807
Epoch 9/10 - loss=0.6668, val_acc=0.8388, val_loss=0.4716351772123521
Epoch 10/10 - loss=0.6625, val_acc=0.8365, val_loss=0.4660809467256131
test accuracy  0.8336


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▄▃▃▂▁▁▁▁▁
validation loss,█▆▆▃▃▂▁▁▁▁
validation_accuracy,▁▂▃▆▇█████

0,1
epoch,10.0
test_accuracy,0.8336
training_loss,0.66249
validation loss,0.46608
validation_accuracy,0.8365


[34m[1mwandb[0m: Agent Starting Run: rzj40vla with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	momentum: 0.8
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 1/10 - loss=0.8189, val_acc=0.8330, val_loss=0.47288786305722436
Epoch 2/10 - loss=0.5876, val_acc=0.8490, val_loss=0.42048123166841916
Epoch 3/10 - loss=0.5446, val_acc=0.8553, val_loss=0.40074556277352696
Epoch 4/10 - loss=0.5177, val_acc=0.8617, val_loss=0.37286030160037376
Epoch 5/10 - loss=0.4973, val_acc=0.8620, val_loss=0.3725171362276269
Epoch 6/10 - loss=0.4816, val_acc=0.8667, val_loss=0.3578653649249147
Epoch 7/10 - loss=0.4685, val_acc=0.8707, val_loss=0.34814295280925867
Epoch 8/10 - loss=0.4573, val_acc=0.8785, val_loss=0.3339394323350977
Epoch 9/10 - loss=0.4464, val_acc=0.8735, val_loss=0.3362757898576429
Epoch 10/10 - loss=0.4362, val_acc=0.8832, val_loss=0.3229743905907827
test accuracy  0.876


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▄▃▂▂▂▂▁▁▁
validation loss,█▆▅▃▃▃▂▂▂▁
validation_accuracy,▁▃▄▅▅▆▆▇▇█

0,1
epoch,10.0
test_accuracy,0.876
training_loss,0.43622
validation loss,0.32297
validation_accuracy,0.88317


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 1ybr11n1 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	momentum: 0.8
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/10 - loss=0.6495, val_acc=0.8287, val_loss=0.4716594105169914
Epoch 2/10 - loss=0.4239, val_acc=0.8508, val_loss=0.4094628426307846
Epoch 3/10 - loss=0.3844, val_acc=0.8553, val_loss=0.4017706956811508
Epoch 4/10 - loss=0.3606, val_acc=0.8672, val_loss=0.36979783960650636
Epoch 5/10 - loss=0.3414, val_acc=0.8645, val_loss=0.3633434222988479
Epoch 6/10 - loss=0.3276, val_acc=0.8720, val_loss=0.3461213650769512
Epoch 7/10 - loss=0.3138, val_acc=0.8745, val_loss=0.339501928620412
Epoch 8/10 - loss=0.3028, val_acc=0.8777, val_loss=0.32613300740102114
Epoch 9/10 - loss=0.2913, val_acc=0.8803, val_loss=0.32298694665596944
Epoch 10/10 - loss=0.2815, val_acc=0.8817, val_loss=0.31899116400933514
test accuracy  0.8767


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▄▃▃▂▂▂▁▁▁
validation loss,█▅▅▃▃▂▂▁▁▁
validation_accuracy,▁▄▅▆▆▇▇▇██

0,1
epoch,10.0
test_accuracy,0.8767
training_loss,0.28153
validation loss,0.31899
validation_accuracy,0.88167


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: nglt2wjb with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: nesterov
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/10 - loss=0.8667, val_acc=0.8015, val_loss=0.5542136418987664
Epoch 2/10 - loss=0.4980, val_acc=0.8322, val_loss=0.46923135082256345
Epoch 3/10 - loss=0.4471, val_acc=0.8422, val_loss=0.4511293157104777
Epoch 4/10 - loss=0.4176, val_acc=0.8525, val_loss=0.4181017991411145
Epoch 5/10 - loss=0.3947, val_acc=0.8477, val_loss=0.4115344210252111
Epoch 6/10 - loss=0.3807, val_acc=0.8617, val_loss=0.3815295543177438
Epoch 7/10 - loss=0.3656, val_acc=0.8640, val_loss=0.3686995210007295
Epoch 8/10 - loss=0.3543, val_acc=0.8678, val_loss=0.35666497829869853
Epoch 9/10 - loss=0.3424, val_acc=0.8685, val_loss=0.357329174166745
Epoch 10/10 - loss=0.3330, val_acc=0.8713, val_loss=0.34871448679526496
test accuracy  0.8641


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▃▂▂▂▂▁▁▁▁
validation loss,█▅▄▃▃▂▂▁▁▁
validation_accuracy,▁▄▅▆▆▇▇███

0,1
epoch,10.0
test_accuracy,0.8641
training_loss,0.33302
validation loss,0.34871
validation_accuracy,0.87133


[34m[1mwandb[0m: Agent Starting Run: 4nee0p0z with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: nesterov
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 1/10 - loss=1.6906, val_acc=0.6823, val_loss=0.9048527284554221
Epoch 2/10 - loss=0.9201, val_acc=0.7538, val_loss=0.6892007252587247
Epoch 3/10 - loss=0.7928, val_acc=0.7907, val_loss=0.6004321967752192
Epoch 4/10 - loss=0.7314, val_acc=0.8097, val_loss=0.5544426119523772
Epoch 5/10 - loss=0.6948, val_acc=0.8142, val_loss=0.5367966445109027
Epoch 6/10 - loss=0.6701, val_acc=0.8202, val_loss=0.5019612522504148
Epoch 7/10 - loss=0.6502, val_acc=0.8268, val_loss=0.4903386301804005
Epoch 8/10 - loss=0.6349, val_acc=0.8333, val_loss=0.47504550103156346
Epoch 9/10 - loss=0.6222, val_acc=0.8348, val_loss=0.46388152454563153
Epoch 10/10 - loss=0.6111, val_acc=0.8340, val_loss=0.4662693691200872
test accuracy  0.8249


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▃▂▂▂▁▁▁▁▁
validation loss,█▅▃▂▂▂▁▁▁▁
validation_accuracy,▁▄▆▇▇▇████

0,1
epoch,10.0
test_accuracy,0.8249
training_loss,0.6111
validation loss,0.46627
validation_accuracy,0.834


[34m[1mwandb[0m: Agent Starting Run: sfanokm3 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/10 - loss=1.6146, val_acc=0.5805, val_loss=1.0941827307565963
Epoch 2/10 - loss=0.9099, val_acc=0.7280, val_loss=0.794222893856428
Epoch 3/10 - loss=0.6987, val_acc=0.7647, val_loss=0.6553949047708305
Epoch 4/10 - loss=0.6043, val_acc=0.7820, val_loss=0.5938219521646441
Epoch 5/10 - loss=0.5539, val_acc=0.7912, val_loss=0.5514526221307969
Epoch 6/10 - loss=0.5190, val_acc=0.8093, val_loss=0.5189406094148027
Epoch 7/10 - loss=0.4893, val_acc=0.8210, val_loss=0.4933283969571248
Epoch 8/10 - loss=0.4646, val_acc=0.8292, val_loss=0.47173530711730616
Epoch 9/10 - loss=0.4451, val_acc=0.8373, val_loss=0.4527234083301381
Epoch 10/10 - loss=0.4291, val_acc=0.8402, val_loss=0.4391439940109482
test accuracy  0.84


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▄▃▂▂▂▁▁▁▁
validation loss,█▅▃▃▂▂▂▁▁▁
validation_accuracy,▁▅▆▆▇▇▇███

0,1
epoch,10.0
test_accuracy,0.84
training_loss,0.4291
validation loss,0.43914
validation_accuracy,0.84017


[34m[1mwandb[0m: Agent Starting Run: 4i0s7br5 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 1/10 - loss=0.6652, val_acc=0.8530, val_loss=0.40190003921103834
Epoch 2/10 - loss=0.5329, val_acc=0.8558, val_loss=0.38664011381065483
Epoch 3/10 - loss=0.4909, val_acc=0.8675, val_loss=0.36779681542488407
Epoch 4/10 - loss=0.4683, val_acc=0.8720, val_loss=0.34719066558647804
Epoch 5/10 - loss=0.4491, val_acc=0.8630, val_loss=0.36899379085688844
Epoch 6/10 - loss=0.4389, val_acc=0.8733, val_loss=0.35014310054714476
Epoch 7/10 - loss=0.4329, val_acc=0.8735, val_loss=0.349062959071303
Epoch 8/10 - loss=0.4259, val_acc=0.8763, val_loss=0.3396814481277801
Epoch 9/10 - loss=0.4161, val_acc=0.8660, val_loss=0.3612485302096555
Epoch 10/10 - loss=0.4126, val_acc=0.8693, val_loss=0.34604696393793954
test accuracy  0.8623


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▄▃▃▂▂▂▁▁▁
validation loss,█▆▄▂▄▂▂▁▃▂
validation_accuracy,▁▂▅▇▄▇▇█▅▆

0,1
epoch,10.0
test_accuracy,0.8623
training_loss,0.41263
validation loss,0.34605
validation_accuracy,0.86933


[34m[1mwandb[0m: Agent Starting Run: l1cwwmqk with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/10 - loss=0.5235, val_acc=0.8253, val_loss=0.4633600436704045
Epoch 2/10 - loss=0.3967, val_acc=0.8488, val_loss=0.40709307504086384
Epoch 3/10 - loss=0.3610, val_acc=0.8640, val_loss=0.3750663852004295
Epoch 4/10 - loss=0.3397, val_acc=0.8732, val_loss=0.3487636518233484
Epoch 5/10 - loss=0.3226, val_acc=0.8778, val_loss=0.33550856993749373
Epoch 6/10 - loss=0.3093, val_acc=0.8700, val_loss=0.3507626464132797
Epoch 7/10 - loss=0.2964, val_acc=0.8770, val_loss=0.3380899762526547
Epoch 8/10 - loss=0.2882, val_acc=0.8720, val_loss=0.35602823451083776
Epoch 9/10 - loss=0.2785, val_acc=0.8805, val_loss=0.3330456849676104
Epoch 10/10 - loss=0.2719, val_acc=0.8753, val_loss=0.34319352018095023
test accuracy  0.8643


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▄▃▃▂▂▂▁▁▁
validation loss,█▅▃▂▁▂▁▂▁▂
validation_accuracy,▁▄▆▇█▇█▇█▇

0,1
epoch,10.0
test_accuracy,0.8643
training_loss,0.27193
validation loss,0.34319
validation_accuracy,0.87533


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: qxmrbax7 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	momentum: 0.8
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 1/10 - loss=0.5989, val_acc=0.8478, val_loss=0.41970906043412487
Epoch 2/10 - loss=0.4915, val_acc=0.8545, val_loss=0.392838231025159
Epoch 3/10 - loss=0.4646, val_acc=0.8608, val_loss=0.37799624343090227
Epoch 4/10 - loss=0.4496, val_acc=0.8642, val_loss=0.36305117779936236
Epoch 5/10 - loss=0.4352, val_acc=0.8605, val_loss=0.3680567154066666
Epoch 6/10 - loss=0.4299, val_acc=0.8705, val_loss=0.35336949691059216
Epoch 7/10 - loss=0.4217, val_acc=0.8715, val_loss=0.35497743564564316
Epoch 8/10 - loss=0.4179, val_acc=0.8727, val_loss=0.3404028179054465
Epoch 9/10 - loss=0.4078, val_acc=0.8598, val_loss=0.3717643313340888
Epoch 10/10 - loss=0.4033, val_acc=0.8638, val_loss=0.357059538077422
test accuracy  0.8601


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▄▃▃▂▂▂▂▁▁
validation loss,█▆▄▃▃▂▂▁▄▂
validation_accuracy,▁▃▅▆▅▇██▄▆

0,1
epoch,10.0
test_accuracy,0.8601
training_loss,0.40332
validation loss,0.35706
validation_accuracy,0.86383


[34m[1mwandb[0m: Agent Starting Run: bq0xhfvs with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	momentum: 0.8
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/10 - loss=0.7384, val_acc=0.8442, val_loss=0.44929709339667245
Epoch 2/10 - loss=0.4116, val_acc=0.8608, val_loss=0.39029753459721195
Epoch 3/10 - loss=0.3683, val_acc=0.8682, val_loss=0.3662082821915141
Epoch 4/10 - loss=0.3435, val_acc=0.8710, val_loss=0.3527548945868662
Epoch 5/10 - loss=0.3247, val_acc=0.8728, val_loss=0.3428160543798559
Epoch 6/10 - loss=0.3120, val_acc=0.8805, val_loss=0.33990519086044596
Epoch 7/10 - loss=0.2998, val_acc=0.8745, val_loss=0.3311118536134983
Epoch 8/10 - loss=0.2903, val_acc=0.8793, val_loss=0.3258548674831543
Epoch 9/10 - loss=0.2794, val_acc=0.8792, val_loss=0.3244729602770525
Epoch 10/10 - loss=0.2721, val_acc=0.8835, val_loss=0.3205575965837035
test accuracy  0.8758


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▃▂▂▂▂▁▁▁▁
validation loss,█▅▃▃▂▂▂▁▁▁
validation_accuracy,▁▄▅▆▆▇▆▇▇█

0,1
epoch,10.0
test_accuracy,0.8758
training_loss,0.2721
validation loss,0.32056
validation_accuracy,0.8835


[34m[1mwandb[0m: Agent Starting Run: 85yffuw1 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 1/10 - loss=2.0524, val_acc=0.6217, val_loss=1.4913677175989664
Epoch 2/10 - loss=1.2682, val_acc=0.6715, val_loss=0.9849910493824618
Epoch 3/10 - loss=0.9801, val_acc=0.7198, val_loss=0.8226347576227293
Epoch 4/10 - loss=0.8603, val_acc=0.7480, val_loss=0.7358059700852158
Epoch 5/10 - loss=0.7883, val_acc=0.7708, val_loss=0.6771725064066416
Epoch 6/10 - loss=0.7386, val_acc=0.7870, val_loss=0.6339038920989798
Epoch 7/10 - loss=0.7001, val_acc=0.7927, val_loss=0.6034520612606704
Epoch 8/10 - loss=0.6714, val_acc=0.8025, val_loss=0.573628052082561
Epoch 9/10 - loss=0.6487, val_acc=0.8082, val_loss=0.5555827518895411
Epoch 10/10 - loss=0.6310, val_acc=0.8082, val_loss=0.5431267292891337
test accuracy  0.8037


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▄▃▂▂▂▁▁▁▁
validation loss,█▄▃▂▂▂▁▁▁▁
validation_accuracy,▁▃▅▆▇▇▇███

0,1
epoch,10.0
test_accuracy,0.8037
training_loss,0.63097
validation loss,0.54313
validation_accuracy,0.80817


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: ukqf25xa with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	momentum: 0.8
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/10 - loss=2.0587, val_acc=0.3055, val_loss=1.7334853526327958
Epoch 2/10 - loss=1.5013, val_acc=0.4195, val_loss=1.2980689043578804
Epoch 3/10 - loss=1.1921, val_acc=0.5462, val_loss=1.1103010869695438
Epoch 4/10 - loss=1.0272, val_acc=0.6357, val_loss=0.969085243009809
Epoch 5/10 - loss=0.8923, val_acc=0.7052, val_loss=0.8468564462964443
Epoch 6/10 - loss=0.7658, val_acc=0.7503, val_loss=0.7294345944194273
Epoch 7/10 - loss=0.6720, val_acc=0.7675, val_loss=0.6637225598858318
Epoch 8/10 - loss=0.6176, val_acc=0.7807, val_loss=0.6171374223996476
Epoch 9/10 - loss=0.5803, val_acc=0.7902, val_loss=0.5846230379331021
Epoch 10/10 - loss=0.5521, val_acc=0.7958, val_loss=0.5613758420505413
test accuracy  0.793


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▅▄▃▃▂▂▁▁▁
validation loss,█▅▄▃▃▂▂▁▁▁
validation_accuracy,▁▃▄▆▇▇████

0,1
epoch,10.0
test_accuracy,0.793
training_loss,0.55205
validation loss,0.56138
validation_accuracy,0.79583


[34m[1mwandb[0m: Agent Starting Run: yfr81jrj with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/10 - loss=0.7868, val_acc=0.8167, val_loss=0.5217478342201655
Epoch 2/10 - loss=0.4816, val_acc=0.8377, val_loss=0.4585569569009179
Epoch 3/10 - loss=0.4367, val_acc=0.8437, val_loss=0.43797609699630824
Epoch 4/10 - loss=0.4112, val_acc=0.8547, val_loss=0.40707383224604626
Epoch 5/10 - loss=0.3929, val_acc=0.8593, val_loss=0.39868272907460245
Epoch 6/10 - loss=0.3794, val_acc=0.8622, val_loss=0.38730276633180966
Epoch 7/10 - loss=0.3672, val_acc=0.8595, val_loss=0.383965068278298
Epoch 8/10 - loss=0.3567, val_acc=0.8680, val_loss=0.36583353707208216
Epoch 9/10 - loss=0.3465, val_acc=0.8702, val_loss=0.35970920867590317
Epoch 10/10 - loss=0.3392, val_acc=0.8698, val_loss=0.3568636316582727
test accuracy  0.8618


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▃▃▂▂▂▁▁▁▁
validation loss,█▅▄▃▃▂▂▁▁▁
validation_accuracy,▁▄▅▆▇▇▇███

0,1
epoch,10.0
test_accuracy,0.8618
training_loss,0.33918
validation loss,0.35686
validation_accuracy,0.86983


[34m[1mwandb[0m: Agent Starting Run: tewg7q73 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	momentum: 0.8
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/10 - loss=0.8951, val_acc=0.7687, val_loss=0.6317323622913114
Epoch 2/10 - loss=0.5409, val_acc=0.8318, val_loss=0.4904202835232726
Epoch 3/10 - loss=0.4743, val_acc=0.8278, val_loss=0.49711713173726596
Epoch 4/10 - loss=0.4353, val_acc=0.8400, val_loss=0.4547748386550395
Epoch 5/10 - loss=0.4084, val_acc=0.8448, val_loss=0.42994919418965144
Epoch 6/10 - loss=0.3877, val_acc=0.8645, val_loss=0.38962028736764415
Epoch 7/10 - loss=0.3719, val_acc=0.8695, val_loss=0.3749814437184869
Epoch 8/10 - loss=0.3560, val_acc=0.8727, val_loss=0.36309485865762303
Epoch 9/10 - loss=0.3429, val_acc=0.8627, val_loss=0.38277883014484254
Epoch 10/10 - loss=0.3326, val_acc=0.8598, val_loss=0.3912337574388703
test accuracy  0.8539


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▄▃▂▂▂▁▁▁▁
validation loss,█▄▄▃▃▂▁▁▂▂
validation_accuracy,▁▅▅▆▆▇██▇▇

0,1
epoch,10.0
test_accuracy,0.8539
training_loss,0.33263
validation loss,0.39123
validation_accuracy,0.85983


[34m[1mwandb[0m: Agent Starting Run: lr95q2c4 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/10 - loss=0.5553, val_acc=0.8282, val_loss=0.4502716316380152
Epoch 2/10 - loss=0.4163, val_acc=0.8480, val_loss=0.42432164110251913
Epoch 3/10 - loss=0.3919, val_acc=0.8623, val_loss=0.3812924125999896
Epoch 4/10 - loss=0.3840, val_acc=0.8690, val_loss=0.4012169075061982
Epoch 5/10 - loss=0.3837, val_acc=0.8687, val_loss=0.39931507821164314
Epoch 6/10 - loss=0.3857, val_acc=0.8615, val_loss=0.40627183501358344
Epoch 7/10 - loss=0.3884, val_acc=0.8632, val_loss=0.4016074682796713
Epoch 8/10 - loss=0.3928, val_acc=0.8398, val_loss=0.5069759598151149
Epoch 9/10 - loss=0.3916, val_acc=0.8578, val_loss=0.4444773724894061
Epoch 10/10 - loss=0.3890, val_acc=0.8718, val_loss=0.42694463804726085
test accuracy  0.8656


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▂▁▁▁▁▁▁▁▁
validation loss,▅▃▁▂▂▂▂█▅▄
validation_accuracy,▁▄▆█▇▆▇▃▆█

0,1
epoch,10.0
test_accuracy,0.8656
training_loss,0.38904
validation loss,0.42694
validation_accuracy,0.87183


[34m[1mwandb[0m: Agent Starting Run: psoodmf7 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: nesterov
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 1/10 - loss=0.9458, val_acc=0.8307, val_loss=0.4717492848556397
Epoch 2/10 - loss=0.7505, val_acc=0.8492, val_loss=0.4149236866866515
Epoch 3/10 - loss=0.7053, val_acc=0.8497, val_loss=0.4047650881879877
Epoch 4/10 - loss=0.6761, val_acc=0.8537, val_loss=0.3858373777010057
Epoch 5/10 - loss=0.6542, val_acc=0.8605, val_loss=0.3692215402204152
Epoch 6/10 - loss=0.6355, val_acc=0.8732, val_loss=0.34359989486151715
Epoch 7/10 - loss=0.6203, val_acc=0.8643, val_loss=0.3541446593248155
Epoch 8/10 - loss=0.6051, val_acc=0.8683, val_loss=0.3538595572741048
Epoch 9/10 - loss=0.5931, val_acc=0.8763, val_loss=0.32662779599426184
Epoch 10/10 - loss=0.5808, val_acc=0.8738, val_loss=0.3445214310005897
test accuracy  0.8681


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▄▃▃▂▂▂▁▁▁
validation loss,█▅▅▄▃▂▂▂▁▂
validation_accuracy,▁▄▄▅▆█▆▇██

0,1
epoch,10.0
test_accuracy,0.8681
training_loss,0.58082
validation loss,0.34452
validation_accuracy,0.87383


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: x3e7fk3x with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	momentum: 0.8
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: nesterov
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 1/10 - loss=2.5374, val_acc=0.0985, val_loss=2.3121771854705244
Epoch 2/10 - loss=2.4683, val_acc=0.0565, val_loss=2.3028506409185963
Epoch 3/10 - loss=2.4657, val_acc=0.0985, val_loss=2.3025417670037673
Epoch 4/10 - loss=2.4655, val_acc=0.0985, val_loss=2.302450844586812
Epoch 5/10 - loss=2.4653, val_acc=0.1008, val_loss=2.3024113084779354
Epoch 6/10 - loss=2.4652, val_acc=0.1000, val_loss=2.3023695339661865
Epoch 7/10 - loss=2.4650, val_acc=0.1340, val_loss=2.302327853384874
Epoch 8/10 - loss=2.4649, val_acc=0.1265, val_loss=2.3022962478471762
Epoch 9/10 - loss=2.4647, val_acc=0.1000, val_loss=2.3022770195152655
Epoch 10/10 - loss=2.4646, val_acc=0.1513, val_loss=2.3022897082031553
test accuracy  0.1551


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▁▁▁▁▁▁▁▁▁
validation loss,█▁▁▁▁▁▁▁▁▁
validation_accuracy,▄▁▄▄▄▄▇▆▄█

0,1
epoch,10.0
test_accuracy,0.1551
training_loss,2.46456
validation loss,2.30229
validation_accuracy,0.15133


[34m[1mwandb[0m: Agent Starting Run: bm327x0t with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 1/10 - loss=2.3081, val_acc=0.2553, val_loss=2.1058923593780623
Epoch 2/10 - loss=1.9266, val_acc=0.4692, val_loss=1.5164345320150387
Epoch 3/10 - loss=1.3078, val_acc=0.6187, val_loss=1.0185960674297476
Epoch 4/10 - loss=0.9976, val_acc=0.6745, val_loss=0.855118582710457
Epoch 5/10 - loss=0.8832, val_acc=0.7007, val_loss=0.7821122982786574
Epoch 6/10 - loss=0.8224, val_acc=0.7300, val_loss=0.7270636160485401
Epoch 7/10 - loss=0.7783, val_acc=0.7520, val_loss=0.687869736322436
Epoch 8/10 - loss=0.7408, val_acc=0.7727, val_loss=0.6551448341213724
Epoch 9/10 - loss=0.7082, val_acc=0.7858, val_loss=0.6188450660197422
Epoch 10/10 - loss=0.6775, val_acc=0.7843, val_loss=0.6039336734814228
test accuracy  0.7819


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▆▄▂▂▂▁▁▁▁
validation loss,█▅▃▂▂▂▁▁▁▁
validation_accuracy,▁▄▆▇▇▇████

0,1
epoch,10.0
test_accuracy,0.7819
training_loss,0.67755
validation loss,0.60393
validation_accuracy,0.78433


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: kc11tucu with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/10 - loss=1.9537, val_acc=0.6215, val_loss=1.4895556826599456
Epoch 2/10 - loss=1.1672, val_acc=0.6715, val_loss=0.9832608663296118
Epoch 3/10 - loss=0.8780, val_acc=0.7203, val_loss=0.8208723950380293
Epoch 4/10 - loss=0.7574, val_acc=0.7483, val_loss=0.7339664405128069
Epoch 5/10 - loss=0.6849, val_acc=0.7712, val_loss=0.6752168550212373
Epoch 6/10 - loss=0.6346, val_acc=0.7877, val_loss=0.6319089450692479
Epoch 7/10 - loss=0.5957, val_acc=0.7935, val_loss=0.6014234778546999
Epoch 8/10 - loss=0.5667, val_acc=0.8033, val_loss=0.5716297730551
Epoch 9/10 - loss=0.5439, val_acc=0.8087, val_loss=0.5536474427258639
Epoch 10/10 - loss=0.5260, val_acc=0.8082, val_loss=0.541292236940221
test accuracy  0.8042


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▄▃▂▂▂▁▁▁▁
validation loss,█▄▃▂▂▂▁▁▁▁
validation_accuracy,▁▃▅▆▇▇▇███

0,1
epoch,10.0
test_accuracy,0.8042
training_loss,0.526
validation loss,0.54129
validation_accuracy,0.80817


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: h087iat4 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/10 - loss=2.3062, val_acc=0.0977, val_loss=2.303974915229481
Epoch 2/10 - loss=2.3041, val_acc=0.1013, val_loss=2.303352820200559
Epoch 3/10 - loss=2.3038, val_acc=0.0985, val_loss=2.3053607235277527
Epoch 4/10 - loss=2.3041, val_acc=0.0977, val_loss=2.3034478026941754
Epoch 5/10 - loss=2.3038, val_acc=0.1013, val_loss=2.304703166190963
Epoch 6/10 - loss=2.3038, val_acc=0.1000, val_loss=2.303048407506977
Epoch 7/10 - loss=2.3039, val_acc=0.1013, val_loss=2.3030503863014564
Epoch 8/10 - loss=2.3037, val_acc=0.1000, val_loss=2.302993919066203
Epoch 9/10 - loss=2.3039, val_acc=0.1018, val_loss=2.303747821363612
Epoch 10/10 - loss=2.3036, val_acc=0.1000, val_loss=2.3042547144259826
test accuracy  0.1


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▃▂▂▂▂▂▂▂▁
validation loss,▄▂█▂▆▁▁▁▃▅
validation_accuracy,▁▇▂▁▇▅▇▅█▅

0,1
epoch,10.0
test_accuracy,0.1
training_loss,2.30355
validation loss,2.30425
validation_accuracy,0.1


[34m[1mwandb[0m: Agent Starting Run: b280jmqr with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	momentum: 0.8
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: nesterov
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/10 - loss=2.3069, val_acc=0.0977, val_loss=2.3039294933728303
Epoch 2/10 - loss=2.3041, val_acc=0.1145, val_loss=2.3036475947900055
Epoch 3/10 - loss=2.3041, val_acc=0.0977, val_loss=2.3040859922048162
Epoch 4/10 - loss=2.3041, val_acc=0.0985, val_loss=2.302757747155341
Epoch 5/10 - loss=2.3041, val_acc=0.1023, val_loss=2.3030441769201704
Epoch 6/10 - loss=2.3040, val_acc=0.0985, val_loss=2.3032501931688043
Epoch 7/10 - loss=2.3039, val_acc=0.1000, val_loss=2.3035614843626755
Epoch 8/10 - loss=2.3039, val_acc=0.0977, val_loss=2.3051427433363156
Epoch 9/10 - loss=2.3038, val_acc=0.1000, val_loss=2.303022491236831
Epoch 10/10 - loss=2.3038, val_acc=0.0995, val_loss=2.3036126237574437
test accuracy  0.1


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▂▂▂▂▂▁▁▁▁
validation loss,▄▄▅▁▂▂▃█▂▄
validation_accuracy,▁█▁▁▃▁▂▁▂▂

0,1
epoch,10.0
test_accuracy,0.1
training_loss,2.30375
validation loss,2.30361
validation_accuracy,0.0995


[34m[1mwandb[0m: Agent Starting Run: d1v9slx6 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	momentum: 0.8
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: nesterov
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/10 - loss=1.4495, val_acc=0.6555, val_loss=1.1057567094704246
Epoch 2/10 - loss=0.9782, val_acc=0.7152, val_loss=0.8925417709375482
Epoch 3/10 - loss=0.8300, val_acc=0.7400, val_loss=0.7911863218934891
Epoch 4/10 - loss=0.7487, val_acc=0.7555, val_loss=0.7268907522606611
Epoch 5/10 - loss=0.6951, val_acc=0.7690, val_loss=0.6820783325512757
Epoch 6/10 - loss=0.6563, val_acc=0.7782, val_loss=0.6492531203908745
Epoch 7/10 - loss=0.6266, val_acc=0.7862, val_loss=0.623056697578619
Epoch 8/10 - loss=0.6028, val_acc=0.7905, val_loss=0.6022458178492165
Epoch 9/10 - loss=0.5833, val_acc=0.7955, val_loss=0.5840648765722177
Epoch 10/10 - loss=0.5671, val_acc=0.7995, val_loss=0.5694665725577399
test accuracy  0.7986


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▄▃▂▂▂▁▁▁▁
validation loss,█▅▄▃▂▂▂▁▁▁
validation_accuracy,▁▄▅▆▇▇▇███

0,1
epoch,10.0
test_accuracy,0.7986
training_loss,0.56709
validation loss,0.56947
validation_accuracy,0.7995


[34m[1mwandb[0m: Agent Starting Run: 0t1d1fpq with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: nesterov
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/10 - loss=0.6975, val_acc=0.8257, val_loss=0.4909688508627646
Epoch 2/10 - loss=0.4701, val_acc=0.8468, val_loss=0.4404039728104688
Epoch 3/10 - loss=0.4268, val_acc=0.8507, val_loss=0.4185324033728051
Epoch 4/10 - loss=0.4017, val_acc=0.8615, val_loss=0.3897991543178661
Epoch 5/10 - loss=0.3832, val_acc=0.8570, val_loss=0.39360127803452094
Epoch 6/10 - loss=0.3675, val_acc=0.8635, val_loss=0.376634273848217
Epoch 7/10 - loss=0.3563, val_acc=0.8657, val_loss=0.3630551852859084
Epoch 8/10 - loss=0.3461, val_acc=0.8677, val_loss=0.3570034831621371
Epoch 9/10 - loss=0.3355, val_acc=0.8712, val_loss=0.35884386898329995
Epoch 10/10 - loss=0.3280, val_acc=0.8713, val_loss=0.34810187685248656
test accuracy  0.8635


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▄▃▂▂▂▂▁▁▁
validation loss,█▆▄▃▃▂▂▁▂▁
validation_accuracy,▁▄▅▆▆▇▇▇██

0,1
epoch,10.0
test_accuracy,0.8635
training_loss,0.32804
validation loss,0.3481
validation_accuracy,0.87133


[34m[1mwandb[0m: Agent Starting Run: 38kjwemk with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 1/10 - loss=0.8068, val_acc=0.8245, val_loss=0.48460313163559043
Epoch 2/10 - loss=0.5657, val_acc=0.8507, val_loss=0.42441486305631687
Epoch 3/10 - loss=0.5233, val_acc=0.8438, val_loss=0.4250030600844261
Epoch 4/10 - loss=0.4951, val_acc=0.8500, val_loss=0.39805016474225263
Epoch 5/10 - loss=0.4755, val_acc=0.8645, val_loss=0.37296798847230667
Epoch 6/10 - loss=0.4583, val_acc=0.8717, val_loss=0.3495343054376769
Epoch 7/10 - loss=0.4457, val_acc=0.8670, val_loss=0.35775600224671744
Epoch 8/10 - loss=0.4335, val_acc=0.8725, val_loss=0.34672425176180693
Epoch 9/10 - loss=0.4234, val_acc=0.8738, val_loss=0.33049233409171475
Epoch 10/10 - loss=0.4134, val_acc=0.8697, val_loss=0.3574495601553445
test accuracy  0.8669


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▄▃▂▂▂▂▁▁▁
validation loss,█▅▅▄▃▂▂▂▁▂
validation_accuracy,▁▅▄▅▇█▇██▇

0,1
epoch,10.0
test_accuracy,0.8669
training_loss,0.41336
validation loss,0.35745
validation_accuracy,0.86967


[34m[1mwandb[0m: Agent Starting Run: o0q3vah8 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/10 - loss=0.6319, val_acc=0.8482, val_loss=0.41032161635779246
Epoch 2/10 - loss=0.3863, val_acc=0.8690, val_loss=0.3621803123398466
Epoch 3/10 - loss=0.3482, val_acc=0.8668, val_loss=0.36053820623089133
Epoch 4/10 - loss=0.3248, val_acc=0.8713, val_loss=0.3367102290093226
Epoch 5/10 - loss=0.3062, val_acc=0.8798, val_loss=0.33028242139786046
Epoch 6/10 - loss=0.2934, val_acc=0.8838, val_loss=0.3103834000682259
Epoch 7/10 - loss=0.2790, val_acc=0.8847, val_loss=0.3073644912756381
Epoch 8/10 - loss=0.2690, val_acc=0.8900, val_loss=0.29842738446706546
Epoch 9/10 - loss=0.2588, val_acc=0.8828, val_loss=0.3122128336770696
Epoch 10/10 - loss=0.2490, val_acc=0.8815, val_loss=0.3131750293829135
test accuracy  0.8809


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▄▃▂▂▂▂▁▁▁
validation loss,█▅▅▃▃▂▂▁▂▂
validation_accuracy,▁▄▄▅▆▇▇█▇▇

0,1
epoch,10.0
test_accuracy,0.8809
training_loss,0.24904
validation loss,0.31318
validation_accuracy,0.8815


[34m[1mwandb[0m: Agent Starting Run: uichwsbf with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	momentum: 0.8
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: nesterov
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 1/10 - loss=1.8016, val_acc=0.6790, val_loss=0.9804535881290728
Epoch 2/10 - loss=1.1582, val_acc=0.7352, val_loss=0.7594119776884702
Epoch 3/10 - loss=1.0158, val_acc=0.7695, val_loss=0.6610090867275565
Epoch 4/10 - loss=0.9390, val_acc=0.7933, val_loss=0.6011567021805156
Epoch 5/10 - loss=0.8901, val_acc=0.8023, val_loss=0.5623255178529079
Epoch 6/10 - loss=0.8583, val_acc=0.8102, val_loss=0.5341619190918158
Epoch 7/10 - loss=0.8363, val_acc=0.8175, val_loss=0.5149791021784477
Epoch 8/10 - loss=0.8186, val_acc=0.8195, val_loss=0.510186877484347
Epoch 9/10 - loss=0.8054, val_acc=0.8258, val_loss=0.4900451325941546
Epoch 10/10 - loss=0.7940, val_acc=0.8318, val_loss=0.48176495180232237
test accuracy  0.8226


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▄▃▂▂▁▁▁▁▁
validation loss,█▅▄▃▂▂▁▁▁▁
validation_accuracy,▁▄▅▆▇▇▇▇██

0,1
epoch,10.0
test_accuracy,0.8226
training_loss,0.79401
validation loss,0.48176
validation_accuracy,0.83183


[34m[1mwandb[0m: Agent Starting Run: 9d1nyypy with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/10 - loss=0.5110, val_acc=0.8445, val_loss=0.41731685878230323
Epoch 2/10 - loss=0.3877, val_acc=0.8582, val_loss=0.4026808516989839
Epoch 3/10 - loss=0.3543, val_acc=0.8688, val_loss=0.3576988630079578
Epoch 4/10 - loss=0.3370, val_acc=0.8590, val_loss=0.3982048896122742
Epoch 5/10 - loss=0.3188, val_acc=0.8745, val_loss=0.3381999577785887
Epoch 6/10 - loss=0.3059, val_acc=0.8737, val_loss=0.3341030155522551
Epoch 7/10 - loss=0.2964, val_acc=0.8718, val_loss=0.34447520389734365
Epoch 8/10 - loss=0.2848, val_acc=0.8815, val_loss=0.32981068640999844
Epoch 9/10 - loss=0.2774, val_acc=0.8793, val_loss=0.33755768932031066
Epoch 10/10 - loss=0.2727, val_acc=0.8813, val_loss=0.34274082961877667
test accuracy  0.8747


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▄▃▃▂▂▂▁▁▁
validation loss,█▇▃▆▂▁▂▁▂▂
validation_accuracy,▁▄▆▄▇▇▆███

0,1
epoch,10.0
test_accuracy,0.8747
training_loss,0.27268
validation loss,0.34274
validation_accuracy,0.88133


[34m[1mwandb[0m: Agent Starting Run: 6yoa9a6l with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	momentum: 0.8
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 1/10 - loss=0.7681, val_acc=0.8290, val_loss=0.46376235509071556
Epoch 2/10 - loss=0.5850, val_acc=0.8490, val_loss=0.4136026527884151
Epoch 3/10 - loss=0.5490, val_acc=0.8470, val_loss=0.409510880212974
Epoch 4/10 - loss=0.5252, val_acc=0.8608, val_loss=0.3777728595698547
Epoch 5/10 - loss=0.5077, val_acc=0.8640, val_loss=0.36355409927132026
Epoch 6/10 - loss=0.4927, val_acc=0.8675, val_loss=0.357906690732336
Epoch 7/10 - loss=0.4810, val_acc=0.8693, val_loss=0.35458540147957424
Epoch 8/10 - loss=0.4691, val_acc=0.8705, val_loss=0.3494467253523409
Epoch 9/10 - loss=0.4596, val_acc=0.8643, val_loss=0.35960365520510756
Epoch 10/10 - loss=0.4504, val_acc=0.8675, val_loss=0.35112915327788824
test accuracy  0.8652


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▄▃▃▂▂▂▁▁▁
validation loss,█▅▅▃▂▂▁▁▂▁
validation_accuracy,▁▄▄▆▇▇██▇▇

0,1
epoch,10.0
test_accuracy,0.8652
training_loss,0.45037
validation loss,0.35113
validation_accuracy,0.8675


[34m[1mwandb[0m: Agent Starting Run: wy4t3qli with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	momentum: 0.8
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 1/10 - loss=0.9438, val_acc=0.8075, val_loss=0.5450329698841433
Epoch 2/10 - loss=0.6564, val_acc=0.8317, val_loss=0.4744028034904157
Epoch 3/10 - loss=0.6028, val_acc=0.8443, val_loss=0.4328132859490495
Epoch 4/10 - loss=0.5741, val_acc=0.8453, val_loss=0.4180453489393723
Epoch 5/10 - loss=0.5547, val_acc=0.8497, val_loss=0.3998252193053268
Epoch 6/10 - loss=0.5398, val_acc=0.8603, val_loss=0.38186287685823267
Epoch 7/10 - loss=0.5271, val_acc=0.8600, val_loss=0.38820815938232256
Epoch 8/10 - loss=0.5167, val_acc=0.8648, val_loss=0.36643093084671075
Epoch 9/10 - loss=0.5070, val_acc=0.8662, val_loss=0.3686045723369192
Epoch 10/10 - loss=0.4991, val_acc=0.8640, val_loss=0.3709058891053292
test accuracy  0.8578


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▃▃▂▂▂▁▁▁▁
validation loss,█▅▄▃▂▂▂▁▁▁
validation_accuracy,▁▄▅▆▆▇▇███

0,1
epoch,10.0
test_accuracy,0.8578
training_loss,0.49913
validation loss,0.37091
validation_accuracy,0.864


[34m[1mwandb[0m: Agent Starting Run: 33ifnyay with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 1/10 - loss=0.8068, val_acc=0.8245, val_loss=0.48460313163559043
Epoch 2/10 - loss=0.5657, val_acc=0.8507, val_loss=0.42441486305631687
Epoch 3/10 - loss=0.5233, val_acc=0.8438, val_loss=0.4250030600844261
Epoch 4/10 - loss=0.4951, val_acc=0.8500, val_loss=0.39805016474225263
Epoch 5/10 - loss=0.4755, val_acc=0.8645, val_loss=0.37296798847230667
Epoch 6/10 - loss=0.4583, val_acc=0.8717, val_loss=0.3495343054376769
Epoch 7/10 - loss=0.4457, val_acc=0.8670, val_loss=0.35775600224671744
Epoch 8/10 - loss=0.4335, val_acc=0.8725, val_loss=0.34672425176180693
Epoch 9/10 - loss=0.4234, val_acc=0.8738, val_loss=0.33049233409171475
Epoch 10/10 - loss=0.4134, val_acc=0.8697, val_loss=0.3574495601553445
test accuracy  0.8669


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▄▃▂▂▂▂▁▁▁
validation loss,█▅▅▄▃▂▂▂▁▂
validation_accuracy,▁▅▄▅▇█▇██▇

0,1
epoch,10.0
test_accuracy,0.8669
training_loss,0.41336
validation loss,0.35745
validation_accuracy,0.86967


[34m[1mwandb[0m: Agent Starting Run: gwt7n34g with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: nesterov
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 1/10 - loss=0.8769, val_acc=0.8278, val_loss=0.49833848913031953
Epoch 2/10 - loss=0.6218, val_acc=0.8465, val_loss=0.43311498432685586
Epoch 3/10 - loss=0.5752, val_acc=0.8533, val_loss=0.40044057168346026
Epoch 4/10 - loss=0.5463, val_acc=0.8623, val_loss=0.3835845182125025
Epoch 5/10 - loss=0.5253, val_acc=0.8540, val_loss=0.3945468502034772
Epoch 6/10 - loss=0.5109, val_acc=0.8655, val_loss=0.36056368987632437
Epoch 7/10 - loss=0.4971, val_acc=0.8623, val_loss=0.3672698172302763
Epoch 8/10 - loss=0.4844, val_acc=0.8680, val_loss=0.353410330872395
Epoch 9/10 - loss=0.4744, val_acc=0.8667, val_loss=0.35411812229755574
Epoch 10/10 - loss=0.4652, val_acc=0.8643, val_loss=0.35689214784333245
test accuracy  0.8639


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▄▃▂▂▂▂▁▁▁
validation loss,█▅▃▂▃▁▂▁▁▁
validation_accuracy,▁▄▅▇▆█▇██▇

0,1
epoch,10.0
test_accuracy,0.8639
training_loss,0.46523
validation loss,0.35689
validation_accuracy,0.86433


[34m[1mwandb[0m: Agent Starting Run: xlhea1wh with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	momentum: 0.8
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/10 - loss=0.7868, val_acc=0.8167, val_loss=0.5217478342201655
Epoch 2/10 - loss=0.4816, val_acc=0.8377, val_loss=0.4585569569009179
Epoch 3/10 - loss=0.4367, val_acc=0.8437, val_loss=0.43797609699630824
Epoch 4/10 - loss=0.4112, val_acc=0.8547, val_loss=0.40707383224604626
Epoch 5/10 - loss=0.3929, val_acc=0.8593, val_loss=0.39868272907460245
Epoch 6/10 - loss=0.3794, val_acc=0.8622, val_loss=0.38730276633180966
Epoch 7/10 - loss=0.3672, val_acc=0.8595, val_loss=0.383965068278298
Epoch 8/10 - loss=0.3567, val_acc=0.8680, val_loss=0.36583353707208216
Epoch 9/10 - loss=0.3465, val_acc=0.8702, val_loss=0.35970920867590317
Epoch 10/10 - loss=0.3392, val_acc=0.8698, val_loss=0.3568636316582727
test accuracy  0.8618


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▃▃▂▂▂▁▁▁▁
validation loss,█▅▄▃▃▂▂▁▁▁
validation_accuracy,▁▄▅▆▇▇▇███

0,1
epoch,10.0
test_accuracy,0.8618
training_loss,0.33918
validation loss,0.35686
validation_accuracy,0.86983


[34m[1mwandb[0m: Agent Starting Run: kzgeivlg with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	momentum: 0.8
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 1/10 - loss=0.8317, val_acc=0.8258, val_loss=0.4757579152108262
Epoch 2/10 - loss=0.5906, val_acc=0.8502, val_loss=0.41328346409049
Epoch 3/10 - loss=0.5434, val_acc=0.8562, val_loss=0.40813697109285646
Epoch 4/10 - loss=0.5149, val_acc=0.8628, val_loss=0.3739574488702956
Epoch 5/10 - loss=0.4938, val_acc=0.8658, val_loss=0.3692313030829563
Epoch 6/10 - loss=0.4785, val_acc=0.8693, val_loss=0.3513944451690005
Epoch 7/10 - loss=0.4640, val_acc=0.8740, val_loss=0.3431881958227336
Epoch 8/10 - loss=0.4528, val_acc=0.8783, val_loss=0.330285510888617
Epoch 9/10 - loss=0.4400, val_acc=0.8770, val_loss=0.33109005835320926
Epoch 10/10 - loss=0.4306, val_acc=0.8795, val_loss=0.32264725908270125
test accuracy  0.8748


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▄▃▂▂▂▂▁▁▁
validation loss,█▅▅▃▃▂▂▁▁▁
validation_accuracy,▁▄▅▆▆▇▇███

0,1
epoch,10.0
test_accuracy,0.8748
training_loss,0.43059
validation loss,0.32265
validation_accuracy,0.8795


[34m[1mwandb[0m: Agent Starting Run: cz40h8yp with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/10 - loss=0.5077, val_acc=0.8492, val_loss=0.40737801492036063
Epoch 2/10 - loss=0.3722, val_acc=0.8715, val_loss=0.3396064193370988
Epoch 3/10 - loss=0.3384, val_acc=0.8583, val_loss=0.366624837734801
Epoch 4/10 - loss=0.3145, val_acc=0.8763, val_loss=0.3379841200786454
Epoch 5/10 - loss=0.2955, val_acc=0.8787, val_loss=0.3256897137472036
Epoch 6/10 - loss=0.2820, val_acc=0.8867, val_loss=0.31204421024490914
Epoch 7/10 - loss=0.2691, val_acc=0.8903, val_loss=0.31161403607265575
Epoch 8/10 - loss=0.2601, val_acc=0.8832, val_loss=0.31869695173533424
Epoch 9/10 - loss=0.2483, val_acc=0.8870, val_loss=0.32339436454480425
Epoch 10/10 - loss=0.2376, val_acc=0.8873, val_loss=0.32405208778690026
test accuracy  0.8852


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▄▄▃▃▂▂▂▁▁
validation loss,█▃▅▃▂▁▁▂▂▂
validation_accuracy,▁▅▃▆▆▇█▇▇▇

0,1
epoch,10.0
test_accuracy,0.8852
training_loss,0.23757
validation loss,0.32405
validation_accuracy,0.88733


[34m[1mwandb[0m: Agent Starting Run: uavnpf8p with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	momentum: 0.8
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/10 - loss=2.3505, val_acc=0.1033, val_loss=2.303114840127441
Epoch 2/10 - loss=2.3030, val_acc=0.0977, val_loss=2.302991830490144
Epoch 3/10 - loss=2.3029, val_acc=0.1017, val_loss=2.302877073553554
Epoch 4/10 - loss=2.3029, val_acc=0.1390, val_loss=2.3027453588016695
Epoch 5/10 - loss=2.3029, val_acc=0.0588, val_loss=2.3026822158058344
Epoch 6/10 - loss=2.3028, val_acc=0.0977, val_loss=2.3028041736631644
Epoch 7/10 - loss=2.3028, val_acc=0.1023, val_loss=2.302529165400271
Epoch 8/10 - loss=2.3028, val_acc=0.0950, val_loss=2.3026551629448933
Epoch 9/10 - loss=2.3027, val_acc=0.0995, val_loss=2.3026615993658277
Epoch 10/10 - loss=2.3027, val_acc=0.0995, val_loss=2.3024827947628483
test accuracy  0.1


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▁▁▁▁▁▁▁▁▁
validation loss,█▇▅▄▃▅▂▃▃▁
validation_accuracy,▅▄▅█▁▄▅▄▅▅

0,1
epoch,10.0
test_accuracy,0.1
training_loss,2.30265
validation loss,2.30248
validation_accuracy,0.0995


[34m[1mwandb[0m: Agent Starting Run: cg0mzruy with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	momentum: 0.8
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/10 - loss=0.5329, val_acc=0.8527, val_loss=0.4092622362524991
Epoch 2/10 - loss=0.3801, val_acc=0.8627, val_loss=0.37678530092138246
Epoch 3/10 - loss=0.3484, val_acc=0.8715, val_loss=0.3536841316474912
Epoch 4/10 - loss=0.3250, val_acc=0.8775, val_loss=0.33737200769110104
Epoch 5/10 - loss=0.3100, val_acc=0.8733, val_loss=0.3366078429537517
Epoch 6/10 - loss=0.2979, val_acc=0.8770, val_loss=0.3347923308453668
Epoch 7/10 - loss=0.2882, val_acc=0.8792, val_loss=0.3343706348091062
Epoch 8/10 - loss=0.2760, val_acc=0.8788, val_loss=0.3289205366126509
Epoch 9/10 - loss=0.2691, val_acc=0.8818, val_loss=0.3333668671057646
Epoch 10/10 - loss=0.2616, val_acc=0.8867, val_loss=0.3194607393409948
test accuracy  0.8772


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▄▃▃▂▂▂▁▁▁
validation loss,█▅▄▂▂▂▂▂▂▁
validation_accuracy,▁▃▅▆▅▆▆▆▇█

0,1
epoch,10.0
test_accuracy,0.8772
training_loss,0.26155
validation loss,0.31946
validation_accuracy,0.88667


[34m[1mwandb[0m: Agent Starting Run: hp6rfeb7 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/10 - loss=0.6682, val_acc=0.8212, val_loss=0.4960866594166533
Epoch 2/10 - loss=0.4613, val_acc=0.8437, val_loss=0.43831543837941583
Epoch 3/10 - loss=0.4184, val_acc=0.8458, val_loss=0.41931714840191103
Epoch 4/10 - loss=0.3938, val_acc=0.8570, val_loss=0.39253523318361644
Epoch 5/10 - loss=0.3747, val_acc=0.8622, val_loss=0.37704187077881884
Epoch 6/10 - loss=0.3616, val_acc=0.8610, val_loss=0.37511828601601077
Epoch 7/10 - loss=0.3498, val_acc=0.8590, val_loss=0.3763203924808063
Epoch 8/10 - loss=0.3389, val_acc=0.8700, val_loss=0.35616856559083326
Epoch 9/10 - loss=0.3295, val_acc=0.8705, val_loss=0.35277462155179856
Epoch 10/10 - loss=0.3230, val_acc=0.8763, val_loss=0.33946002539413217
test accuracy  0.8677


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▄▃▂▂▂▂▁▁▁
validation loss,█▅▅▃▃▃▃▂▂▁
validation_accuracy,▁▄▄▆▆▆▆▇▇█

0,1
epoch,10.0
test_accuracy,0.8677
training_loss,0.323
validation loss,0.33946
validation_accuracy,0.87633


[34m[1mwandb[0m: Agent Starting Run: vpvdawoy with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	momentum: 0.8
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/10 - loss=0.7300, val_acc=0.8450, val_loss=0.4435416368954084
Epoch 2/10 - loss=0.4073, val_acc=0.8620, val_loss=0.3860047900777413
Epoch 3/10 - loss=0.3647, val_acc=0.8710, val_loss=0.36089079914533434
Epoch 4/10 - loss=0.3395, val_acc=0.8740, val_loss=0.3447158601452188
Epoch 5/10 - loss=0.3219, val_acc=0.8697, val_loss=0.3428662401267571
Epoch 6/10 - loss=0.3082, val_acc=0.8785, val_loss=0.3388292589004029
Epoch 7/10 - loss=0.2960, val_acc=0.8803, val_loss=0.3249173169138768
Epoch 8/10 - loss=0.2861, val_acc=0.8807, val_loss=0.32296377960348327
Epoch 9/10 - loss=0.2752, val_acc=0.8800, val_loss=0.32424029818772904
Epoch 10/10 - loss=0.2681, val_acc=0.8812, val_loss=0.32541383176483074
test accuracy  0.8759


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▃▂▂▂▂▁▁▁▁
validation loss,█▅▃▂▂▂▁▁▁▁
validation_accuracy,▁▄▆▇▆▇████

0,1
epoch,10.0
test_accuracy,0.8759
training_loss,0.26814
validation loss,0.32541
validation_accuracy,0.88117


[34m[1mwandb[0m: Agent Starting Run: 3sdyw555 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	momentum: 0.8
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 1/10 - loss=0.8478, val_acc=0.8262, val_loss=0.4783358270358508
Epoch 2/10 - loss=0.5955, val_acc=0.8448, val_loss=0.4267881054358972
Epoch 3/10 - loss=0.5490, val_acc=0.8573, val_loss=0.38842400722619513
Epoch 4/10 - loss=0.5226, val_acc=0.8627, val_loss=0.3748567832569485
Epoch 5/10 - loss=0.5039, val_acc=0.8683, val_loss=0.35971185546307394
Epoch 6/10 - loss=0.4890, val_acc=0.8708, val_loss=0.3480860320712212
Epoch 7/10 - loss=0.4764, val_acc=0.8732, val_loss=0.34536249464667107
Epoch 8/10 - loss=0.4656, val_acc=0.8803, val_loss=0.332780731479129
Epoch 9/10 - loss=0.4552, val_acc=0.8767, val_loss=0.3356068055435803
Epoch 10/10 - loss=0.4470, val_acc=0.8777, val_loss=0.33938896037424465
test accuracy  0.8696


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▄▃▂▂▂▂▁▁▁
validation loss,█▆▄▃▂▂▂▁▁▁
validation_accuracy,▁▃▅▆▆▇▇███

0,1
epoch,10.0
test_accuracy,0.8696
training_loss,0.44701
validation loss,0.33939
validation_accuracy,0.87767


[34m[1mwandb[0m: Agent Starting Run: 5lhugntd with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	momentum: 0.8
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 1/10 - loss=0.5947, val_acc=0.8550, val_loss=0.40004481059941327
Epoch 2/10 - loss=0.4679, val_acc=0.8622, val_loss=0.37814290566354974
Epoch 3/10 - loss=0.4399, val_acc=0.8645, val_loss=0.36416750451199126
Epoch 4/10 - loss=0.4206, val_acc=0.8682, val_loss=0.3552099781524033
Epoch 5/10 - loss=0.4065, val_acc=0.8658, val_loss=0.3512040931536227
Epoch 6/10 - loss=0.3992, val_acc=0.8727, val_loss=0.34745390163508033
Epoch 7/10 - loss=0.3919, val_acc=0.8737, val_loss=0.3455926828922082
Epoch 8/10 - loss=0.3844, val_acc=0.8797, val_loss=0.32220983431652583
Epoch 9/10 - loss=0.3765, val_acc=0.8645, val_loss=0.35710395703557146
Epoch 10/10 - loss=0.3705, val_acc=0.8757, val_loss=0.33232230117787737
test accuracy  0.8715


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▄▃▃▂▂▂▁▁▁
validation loss,█▆▅▄▄▃▃▁▄▂
validation_accuracy,▁▃▄▅▄▆▆█▄▇

0,1
epoch,10.0
test_accuracy,0.8715
training_loss,0.37045
validation loss,0.33232
validation_accuracy,0.87567


[34m[1mwandb[0m: Agent Starting Run: mtgq3vgw with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 1/10 - loss=0.8068, val_acc=0.8245, val_loss=0.48460313163559043
Epoch 2/10 - loss=0.5657, val_acc=0.8507, val_loss=0.42441486305631687
Epoch 3/10 - loss=0.5233, val_acc=0.8438, val_loss=0.4250030600844261
Epoch 4/10 - loss=0.4951, val_acc=0.8500, val_loss=0.39805016474225263
Epoch 5/10 - loss=0.4755, val_acc=0.8645, val_loss=0.37296798847230667
Epoch 6/10 - loss=0.4583, val_acc=0.8717, val_loss=0.3495343054376769
Epoch 7/10 - loss=0.4457, val_acc=0.8670, val_loss=0.35775600224671744
Epoch 8/10 - loss=0.4335, val_acc=0.8725, val_loss=0.34672425176180693
Epoch 9/10 - loss=0.4234, val_acc=0.8738, val_loss=0.33049233409171475
Epoch 10/10 - loss=0.4134, val_acc=0.8697, val_loss=0.3574495601553445
test accuracy  0.8669


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▄▃▂▂▂▂▁▁▁
validation loss,█▅▅▄▃▂▂▂▁▂
validation_accuracy,▁▅▄▅▇█▇██▇

0,1
epoch,10.0
test_accuracy,0.8669
training_loss,0.41336
validation loss,0.35745
validation_accuracy,0.86967


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: ntbfx4rg with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/10 - loss=1.9141, val_acc=0.5212, val_loss=1.4870541357989566
Epoch 2/10 - loss=1.2436, val_acc=0.5907, val_loss=1.0779374447568588
Epoch 3/10 - loss=0.9484, val_acc=0.7010, val_loss=0.8659524161128677
Epoch 4/10 - loss=0.7803, val_acc=0.7482, val_loss=0.7361761990778463
Epoch 5/10 - loss=0.6729, val_acc=0.7758, val_loss=0.6440259042713036
Epoch 6/10 - loss=0.5975, val_acc=0.7853, val_loss=0.5844261903681399
Epoch 7/10 - loss=0.5498, val_acc=0.7987, val_loss=0.5450699133784428
Epoch 8/10 - loss=0.5166, val_acc=0.8165, val_loss=0.5162399172010155
Epoch 9/10 - loss=0.4913, val_acc=0.8250, val_loss=0.49525224920313965
Epoch 10/10 - loss=0.4718, val_acc=0.8282, val_loss=0.48056949609841
test accuracy  0.8253


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▅▃▂▂▂▁▁▁▁
validation loss,█▅▄▃▂▂▁▁▁▁
validation_accuracy,▁▃▅▆▇▇▇███

0,1
epoch,10.0
test_accuracy,0.8253
training_loss,0.47178
validation loss,0.48057
validation_accuracy,0.82817


[34m[1mwandb[0m: Agent Starting Run: 65nmpiws with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 1/10 - loss=1.8761, val_acc=0.6513, val_loss=1.316037991346341
Epoch 2/10 - loss=1.2056, val_acc=0.7128, val_loss=0.9040826516923129
Epoch 3/10 - loss=0.9383, val_acc=0.7622, val_loss=0.7225658606613665
Epoch 4/10 - loss=0.8121, val_acc=0.7795, val_loss=0.6235027085348667
Epoch 5/10 - loss=0.7422, val_acc=0.8090, val_loss=0.5672073469666594
Epoch 6/10 - loss=0.7001, val_acc=0.8177, val_loss=0.5329385233129775
Epoch 7/10 - loss=0.6723, val_acc=0.8230, val_loss=0.5114268878926068
Epoch 8/10 - loss=0.6519, val_acc=0.8297, val_loss=0.493962479744864
Epoch 9/10 - loss=0.6368, val_acc=0.8370, val_loss=0.47897376754557264
Epoch 10/10 - loss=0.6250, val_acc=0.8388, val_loss=0.4703022834206467
test accuracy  0.8299


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▄▃▂▂▁▁▁▁▁
validation loss,█▅▃▂▂▂▁▁▁▁
validation_accuracy,▁▃▅▆▇▇▇███

0,1
epoch,10.0
test_accuracy,0.8299
training_loss,0.62499
validation loss,0.4703
validation_accuracy,0.83883


[34m[1mwandb[0m: Agent Starting Run: 620js3bq with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	momentum: 0.8
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/10 - loss=0.7256, val_acc=0.8273, val_loss=0.4673318918451395
Epoch 2/10 - loss=0.4105, val_acc=0.8628, val_loss=0.3770169245390811
Epoch 3/10 - loss=0.3641, val_acc=0.8613, val_loss=0.3767116555124935
Epoch 4/10 - loss=0.3379, val_acc=0.8707, val_loss=0.3454709405758556
Epoch 5/10 - loss=0.3194, val_acc=0.8790, val_loss=0.3303277145976799
Epoch 6/10 - loss=0.3057, val_acc=0.8832, val_loss=0.32077597623888987
Epoch 7/10 - loss=0.2898, val_acc=0.8817, val_loss=0.3210067616496181
Epoch 8/10 - loss=0.2809, val_acc=0.8857, val_loss=0.31057743511941827
Epoch 9/10 - loss=0.2690, val_acc=0.8770, val_loss=0.32625332220643044
Epoch 10/10 - loss=0.2597, val_acc=0.8867, val_loss=0.3107681353720471
test accuracy  0.877


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▃▃▂▂▂▁▁▁▁
validation loss,█▄▄▃▂▁▁▁▂▁
validation_accuracy,▁▅▅▆▇█▇█▇█

0,1
epoch,10.0
test_accuracy,0.877
training_loss,0.25972
validation loss,0.31077
validation_accuracy,0.88667


[34m[1mwandb[0m: Agent Starting Run: 5z0nn6c1 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	momentum: 0.8
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/10 - loss=0.5942, val_acc=0.8420, val_loss=0.44596152307589726
Epoch 2/10 - loss=0.4044, val_acc=0.8578, val_loss=0.39627172246780173
Epoch 3/10 - loss=0.3661, val_acc=0.8518, val_loss=0.3968671534591622
Epoch 4/10 - loss=0.3402, val_acc=0.8640, val_loss=0.37183326977802345
Epoch 5/10 - loss=0.3212, val_acc=0.8665, val_loss=0.3551449524236282
Epoch 6/10 - loss=0.3045, val_acc=0.8750, val_loss=0.34028447478192164
Epoch 7/10 - loss=0.2924, val_acc=0.8770, val_loss=0.3289861793354293
Epoch 8/10 - loss=0.2795, val_acc=0.8777, val_loss=0.3363487796620868
Epoch 9/10 - loss=0.2705, val_acc=0.8817, val_loss=0.3184104231206639
Epoch 10/10 - loss=0.2607, val_acc=0.8748, val_loss=0.34898257163335467
test accuracy  0.874


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▄▃▃▂▂▂▁▁▁
validation loss,█▅▅▄▃▂▂▂▁▃
validation_accuracy,▁▄▃▅▅▇▇▇█▇

0,1
epoch,10.0
test_accuracy,0.874
training_loss,0.26066
validation loss,0.34898
validation_accuracy,0.87483


[34m[1mwandb[0m: Agent Starting Run: ybldhux1 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	momentum: 0.8
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 1/10 - loss=1.8761, val_acc=0.6513, val_loss=1.316037991346341
Epoch 2/10 - loss=1.2056, val_acc=0.7128, val_loss=0.9040826516923129
Epoch 3/10 - loss=0.9383, val_acc=0.7622, val_loss=0.7225658606613665
Epoch 4/10 - loss=0.8121, val_acc=0.7795, val_loss=0.6235027085348667
Epoch 5/10 - loss=0.7422, val_acc=0.8090, val_loss=0.5672073469666594
Epoch 6/10 - loss=0.7001, val_acc=0.8177, val_loss=0.5329385233129775
Epoch 7/10 - loss=0.6723, val_acc=0.8230, val_loss=0.5114268878926068
Epoch 8/10 - loss=0.6519, val_acc=0.8297, val_loss=0.493962479744864
Epoch 9/10 - loss=0.6368, val_acc=0.8370, val_loss=0.47897376754557264
Epoch 10/10 - loss=0.6250, val_acc=0.8388, val_loss=0.4703022834206467
test accuracy  0.8299


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▄▃▂▂▁▁▁▁▁
validation loss,█▅▃▂▂▂▁▁▁▁
validation_accuracy,▁▃▅▆▇▇▇███

0,1
epoch,10.0
test_accuracy,0.8299
training_loss,0.62499
validation loss,0.4703
validation_accuracy,0.83883


[34m[1mwandb[0m: Agent Starting Run: r8grrtcu with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	momentum: 0.8
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: nesterov
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 1/10 - loss=1.8599, val_acc=0.6513, val_loss=1.1987273039564865
Epoch 2/10 - loss=1.2025, val_acc=0.7112, val_loss=0.9021251713397124
Epoch 3/10 - loss=1.0153, val_acc=0.7480, val_loss=0.7824109813660479
Epoch 4/10 - loss=0.9231, val_acc=0.7665, val_loss=0.7115170394301333
Epoch 5/10 - loss=0.8639, val_acc=0.7787, val_loss=0.6632367558781346
Epoch 6/10 - loss=0.8228, val_acc=0.7938, val_loss=0.6273316985588631
Epoch 7/10 - loss=0.7915, val_acc=0.7975, val_loss=0.6018952852474583
Epoch 8/10 - loss=0.7678, val_acc=0.8017, val_loss=0.578607197903012
Epoch 9/10 - loss=0.7487, val_acc=0.8050, val_loss=0.5623222930651237
Epoch 10/10 - loss=0.7330, val_acc=0.8105, val_loss=0.5480784291519131
test accuracy  0.8047


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▄▃▂▂▂▁▁▁▁
validation loss,█▅▄▃▂▂▂▁▁▁
validation_accuracy,▁▄▅▆▇▇▇███

0,1
epoch,10.0
test_accuracy,0.8047
training_loss,0.73301
validation loss,0.54808
validation_accuracy,0.8105


[34m[1mwandb[0m: Agent Starting Run: gq0x0u3d with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/10 - loss=0.4993, val_acc=0.8443, val_loss=0.41945589139294776
Epoch 2/10 - loss=0.3727, val_acc=0.8545, val_loss=0.39162822897714317
Epoch 3/10 - loss=0.3404, val_acc=0.8620, val_loss=0.3831670579721599
Epoch 4/10 - loss=0.3181, val_acc=0.8758, val_loss=0.3345850011258963
Epoch 5/10 - loss=0.3034, val_acc=0.8820, val_loss=0.3260498270446626
Epoch 6/10 - loss=0.2871, val_acc=0.8802, val_loss=0.3297582839121178
Epoch 7/10 - loss=0.2796, val_acc=0.8787, val_loss=0.3357156125080656
Epoch 8/10 - loss=0.2656, val_acc=0.8783, val_loss=0.32369610975977975
Epoch 9/10 - loss=0.2582, val_acc=0.8912, val_loss=0.2994587853592412
Epoch 10/10 - loss=0.2485, val_acc=0.8852, val_loss=0.32994052340796465
test accuracy  0.8753


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▄▄▃▃▂▂▁▁▁
validation loss,█▆▆▃▃▃▃▂▁▃
validation_accuracy,▁▃▄▆▇▆▆▆█▇

0,1
epoch,10.0
test_accuracy,0.8753
training_loss,0.24853
validation loss,0.32994
validation_accuracy,0.88517


[34m[1mwandb[0m: Agent Starting Run: 49alveq3 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	momentum: 0.8
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 1/10 - loss=0.5922, val_acc=0.8113, val_loss=0.5043562626204777
Epoch 2/10 - loss=0.4644, val_acc=0.8443, val_loss=0.3974883205851049
Epoch 3/10 - loss=0.4377, val_acc=0.8562, val_loss=0.38812089701454655
Epoch 4/10 - loss=0.4251, val_acc=0.8570, val_loss=0.3896627008795613
Epoch 5/10 - loss=0.4189, val_acc=0.8562, val_loss=0.37790959477597114
Epoch 6/10 - loss=0.4136, val_acc=0.8667, val_loss=0.3705262971707586
Epoch 7/10 - loss=0.4081, val_acc=0.8645, val_loss=0.3780838841254187
Epoch 8/10 - loss=0.4073, val_acc=0.8767, val_loss=0.3311754603970795
Epoch 9/10 - loss=0.4035, val_acc=0.8627, val_loss=0.3709741485020921
Epoch 10/10 - loss=0.4028, val_acc=0.8653, val_loss=0.37166232446883063
test accuracy  0.8612


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▃▂▂▂▁▁▁▁▁
validation loss,█▄▃▃▃▃▃▁▃▃
validation_accuracy,▁▅▆▆▆▇▇█▆▇

0,1
epoch,10.0
test_accuracy,0.8612
training_loss,0.40276
validation loss,0.37166
validation_accuracy,0.86533


[34m[1mwandb[0m: Agent Starting Run: ieo7f0qq with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	momentum: 0.8
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 1/10 - loss=2.3880, val_acc=0.3647, val_loss=2.2662633768522396
Epoch 2/10 - loss=2.3434, val_acc=0.3467, val_loss=2.2117564417877613
Epoch 3/10 - loss=2.2608, val_acc=0.3825, val_loss=2.090290051393923
Epoch 4/10 - loss=2.0824, val_acc=0.4097, val_loss=1.8523027272635275
Epoch 5/10 - loss=1.8321, val_acc=0.4957, val_loss=1.610647242289025
Epoch 6/10 - loss=1.6316, val_acc=0.5457, val_loss=1.4439653101964014
Epoch 7/10 - loss=1.4906, val_acc=0.5735, val_loss=1.3209292701186024
Epoch 8/10 - loss=1.3847, val_acc=0.5895, val_loss=1.2271299187580171
Epoch 9/10 - loss=1.3033, val_acc=0.6132, val_loss=1.1533030920336462
Epoch 10/10 - loss=1.2387, val_acc=0.6248, val_loss=1.0941803954639535
test accuracy  0.6302


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,██▇▆▅▃▃▂▁▁
validation loss,██▇▆▄▃▂▂▁▁
validation_accuracy,▁▁▂▃▅▆▇▇██

0,1
epoch,10.0
test_accuracy,0.6302
training_loss,1.23868
validation loss,1.09418
validation_accuracy,0.62483


[34m[1mwandb[0m: Agent Starting Run: 463qk3ek with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	momentum: 0.8
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/10 - loss=0.6876, val_acc=0.8270, val_loss=0.4787191735082048
Epoch 2/10 - loss=0.4325, val_acc=0.8442, val_loss=0.42907133896595706
Epoch 3/10 - loss=0.3866, val_acc=0.8567, val_loss=0.38695662726556185
Epoch 4/10 - loss=0.3614, val_acc=0.8628, val_loss=0.37818805991411075
Epoch 5/10 - loss=0.3438, val_acc=0.8693, val_loss=0.35755855983633744
Epoch 6/10 - loss=0.3301, val_acc=0.8725, val_loss=0.34532276138696427
Epoch 7/10 - loss=0.3183, val_acc=0.8748, val_loss=0.3420171888445994
Epoch 8/10 - loss=0.3085, val_acc=0.8808, val_loss=0.3306732243814953
Epoch 9/10 - loss=0.2983, val_acc=0.8778, val_loss=0.3324914830052198
Epoch 10/10 - loss=0.2912, val_acc=0.8827, val_loss=0.33369341382379875
test accuracy  0.8718


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▃▃▂▂▂▁▁▁▁
validation loss,█▆▄▃▂▂▂▁▁▁
validation_accuracy,▁▃▅▆▆▇▇█▇█

0,1
epoch,10.0
test_accuracy,0.8718
training_loss,0.29121
validation loss,0.33369
validation_accuracy,0.88267


[34m[1mwandb[0m: Agent Starting Run: o2zvu9sr with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 1/10 - loss=0.7315, val_acc=0.8188, val_loss=0.4880425732511726
Epoch 2/10 - loss=0.5547, val_acc=0.8465, val_loss=0.40241880272532565
Epoch 3/10 - loss=0.5030, val_acc=0.8517, val_loss=0.40850838401592515
Epoch 4/10 - loss=0.4645, val_acc=0.8688, val_loss=0.3510237060304163
Epoch 5/10 - loss=0.4460, val_acc=0.8567, val_loss=0.37143277883929915
Epoch 6/10 - loss=0.4275, val_acc=0.8658, val_loss=0.3515097596625312
Epoch 7/10 - loss=0.4186, val_acc=0.8677, val_loss=0.35268084748600664
Epoch 8/10 - loss=0.4051, val_acc=0.8610, val_loss=0.38150908009891416
Epoch 9/10 - loss=0.4029, val_acc=0.8743, val_loss=0.33542419715914423
Epoch 10/10 - loss=0.3981, val_acc=0.8595, val_loss=0.38772148775174037
test accuracy  0.8546


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▄▃▂▂▂▁▁▁▁
validation loss,█▄▄▂▃▂▂▃▁▃
validation_accuracy,▁▄▅▇▆▇▇▆█▆

0,1
epoch,10.0
test_accuracy,0.8546
training_loss,0.3981
validation loss,0.38772
validation_accuracy,0.8595


[34m[1mwandb[0m: Agent Starting Run: m8opp22r with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 1/10 - loss=0.8789, val_acc=0.8293, val_loss=0.49250414618965943
Epoch 2/10 - loss=0.5854, val_acc=0.8375, val_loss=0.4468011026558781
Epoch 3/10 - loss=0.5343, val_acc=0.8603, val_loss=0.39557882862400495
Epoch 4/10 - loss=0.5054, val_acc=0.8645, val_loss=0.38861329140122386
Epoch 5/10 - loss=0.4858, val_acc=0.8583, val_loss=0.38268895460734803
Epoch 6/10 - loss=0.4713, val_acc=0.8703, val_loss=0.36325237012540734
Epoch 7/10 - loss=0.4573, val_acc=0.8702, val_loss=0.35968322778334694
Epoch 8/10 - loss=0.4475, val_acc=0.8745, val_loss=0.34606613432327094
Epoch 9/10 - loss=0.4380, val_acc=0.8747, val_loss=0.34645284002661697
Epoch 10/10 - loss=0.4286, val_acc=0.8657, val_loss=0.37188148841247753
test accuracy  0.8599


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▃▃▂▂▂▁▁▁▁
validation loss,█▆▃▃▃▂▂▁▁▂
validation_accuracy,▁▂▆▆▅▇▇██▇

0,1
epoch,10.0
test_accuracy,0.8599
training_loss,0.42855
validation loss,0.37188
validation_accuracy,0.86567


[34m[1mwandb[0m: Agent Starting Run: hmfhqpkf with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 1/10 - loss=1.8788, val_acc=0.6522, val_loss=1.3177682148110759
Epoch 2/10 - loss=1.2067, val_acc=0.7120, val_loss=0.9047966936328766
Epoch 3/10 - loss=0.9388, val_acc=0.7613, val_loss=0.7227805061010943
Epoch 4/10 - loss=0.8124, val_acc=0.7797, val_loss=0.6237953468683043
Epoch 5/10 - loss=0.7424, val_acc=0.8088, val_loss=0.5673309979330844
Epoch 6/10 - loss=0.7002, val_acc=0.8175, val_loss=0.5331097811639605
Epoch 7/10 - loss=0.6725, val_acc=0.8228, val_loss=0.511762578390429
Epoch 8/10 - loss=0.6520, val_acc=0.8290, val_loss=0.49436417143052025
Epoch 9/10 - loss=0.6369, val_acc=0.8378, val_loss=0.4789121611083826
Epoch 10/10 - loss=0.6252, val_acc=0.8390, val_loss=0.4703528555415023
test accuracy  0.8298


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▄▃▂▂▁▁▁▁▁
validation loss,█▅▃▂▂▂▁▁▁▁
validation_accuracy,▁▃▅▆▇▇▇███

0,1
epoch,10.0
test_accuracy,0.8298
training_loss,0.62519
validation loss,0.47035
validation_accuracy,0.839


[34m[1mwandb[0m: Agent Starting Run: t1yoaghb with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	momentum: 0.8
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/10 - loss=0.7026, val_acc=0.8180, val_loss=0.508315750043568
Epoch 2/10 - loss=0.4560, val_acc=0.8465, val_loss=0.43375187156670525
Epoch 3/10 - loss=0.4096, val_acc=0.8480, val_loss=0.41586965491770206
Epoch 4/10 - loss=0.3787, val_acc=0.8563, val_loss=0.3794809978940059
Epoch 5/10 - loss=0.3566, val_acc=0.8663, val_loss=0.36566807975099647
Epoch 6/10 - loss=0.3398, val_acc=0.8697, val_loss=0.35057483950521356
Epoch 7/10 - loss=0.3257, val_acc=0.8738, val_loss=0.34777300476303435
Epoch 8/10 - loss=0.3150, val_acc=0.8755, val_loss=0.34171287501286696
Epoch 9/10 - loss=0.3055, val_acc=0.8770, val_loss=0.3292644956300795
Epoch 10/10 - loss=0.2956, val_acc=0.8735, val_loss=0.3481889219222678
test accuracy  0.8657


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▄▃▂▂▂▂▁▁▁
validation loss,█▅▄▃▂▂▂▁▁▂
validation_accuracy,▁▄▅▆▇▇████

0,1
epoch,10.0
test_accuracy,0.8657
training_loss,0.29565
validation loss,0.34819
validation_accuracy,0.8735


[34m[1mwandb[0m: Agent Starting Run: ju6bxvc5 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	momentum: 0.8
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 1/10 - loss=0.5747, val_acc=0.8473, val_loss=0.3992222247935671
Epoch 2/10 - loss=0.4687, val_acc=0.8583, val_loss=0.3770528426234996
Epoch 3/10 - loss=0.4409, val_acc=0.8645, val_loss=0.35510538620169807
Epoch 4/10 - loss=0.4217, val_acc=0.8745, val_loss=0.34657719251007013
Epoch 5/10 - loss=0.4096, val_acc=0.8645, val_loss=0.3512953437946029
Epoch 6/10 - loss=0.3996, val_acc=0.8742, val_loss=0.33506414142979746
Epoch 7/10 - loss=0.3929, val_acc=0.8782, val_loss=0.33141834739733556
Epoch 8/10 - loss=0.3879, val_acc=0.8713, val_loss=0.33343671491373866
Epoch 9/10 - loss=0.3826, val_acc=0.8825, val_loss=0.31466370280752504
Epoch 10/10 - loss=0.3786, val_acc=0.8737, val_loss=0.33103923583679284
test accuracy  0.8658


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▄▃▃▂▂▂▁▁▁
validation loss,█▆▄▄▄▃▂▃▁▂
validation_accuracy,▁▃▄▆▄▆▇▆█▆

0,1
epoch,10.0
test_accuracy,0.8658
training_loss,0.37856
validation loss,0.33104
validation_accuracy,0.87367


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: nkwtwi76 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/10 - loss=0.5235, val_acc=0.8253, val_loss=0.4633600436704045
Epoch 2/10 - loss=0.3967, val_acc=0.8488, val_loss=0.40709307504086384
Epoch 3/10 - loss=0.3610, val_acc=0.8640, val_loss=0.3750663852004295
Epoch 4/10 - loss=0.3397, val_acc=0.8732, val_loss=0.3487636518233484
Epoch 5/10 - loss=0.3226, val_acc=0.8778, val_loss=0.33550856993749373
Epoch 6/10 - loss=0.3093, val_acc=0.8700, val_loss=0.3507626464132797
Epoch 7/10 - loss=0.2964, val_acc=0.8770, val_loss=0.3380899762526547
Epoch 8/10 - loss=0.2882, val_acc=0.8720, val_loss=0.35602823451083776
Epoch 9/10 - loss=0.2785, val_acc=0.8805, val_loss=0.3330456849676104
Epoch 10/10 - loss=0.2719, val_acc=0.8753, val_loss=0.34319352018095023
test accuracy  0.8643


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▄▃▃▂▂▂▁▁▁
validation loss,█▅▃▂▁▂▁▂▁▂
validation_accuracy,▁▄▆▇█▇█▇█▇

0,1
epoch,10.0
test_accuracy,0.8643
training_loss,0.27193
validation loss,0.34319
validation_accuracy,0.87533


[34m[1mwandb[0m: Agent Starting Run: zizv6nlz with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 1/10 - loss=1.4726, val_acc=0.6570, val_loss=0.9519804150382182
Epoch 2/10 - loss=0.9799, val_acc=0.7383, val_loss=0.7338354562689723
Epoch 3/10 - loss=0.8237, val_acc=0.7828, val_loss=0.6260389744172502
Epoch 4/10 - loss=0.7495, val_acc=0.7940, val_loss=0.5875338393049072
Epoch 5/10 - loss=0.7053, val_acc=0.8138, val_loss=0.5460062106976246
Epoch 6/10 - loss=0.6845, val_acc=0.8380, val_loss=0.48720995592278965
Epoch 7/10 - loss=0.6708, val_acc=0.8500, val_loss=0.4626496005102085
Epoch 8/10 - loss=0.6641, val_acc=0.8445, val_loss=0.4686090572145925
Epoch 9/10 - loss=0.6573, val_acc=0.8338, val_loss=0.4788795738823623
Epoch 10/10 - loss=0.6506, val_acc=0.8448, val_loss=0.46297848261177615
test accuracy  0.8389


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▄▂▂▁▁▁▁▁▁
validation loss,█▅▃▃▂▁▁▁▁▁
validation_accuracy,▁▄▆▆▇███▇█

0,1
epoch,10.0
test_accuracy,0.8389
training_loss,0.65062
validation loss,0.46298
validation_accuracy,0.84483


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 1ius3xv7 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 1/10 - loss=0.7485, val_acc=0.8303, val_loss=0.447105174906459
Epoch 2/10 - loss=0.5571, val_acc=0.8510, val_loss=0.4130422023035232
Epoch 3/10 - loss=0.5067, val_acc=0.8547, val_loss=0.3934090925138971
Epoch 4/10 - loss=0.4816, val_acc=0.8717, val_loss=0.3547327091415729
Epoch 5/10 - loss=0.4706, val_acc=0.8673, val_loss=0.37859030541784017
Epoch 6/10 - loss=0.4603, val_acc=0.8688, val_loss=0.35613618301641714
Epoch 7/10 - loss=0.4512, val_acc=0.8732, val_loss=0.35672510944922337
Epoch 8/10 - loss=0.4473, val_acc=0.8673, val_loss=0.365690321140569
Epoch 9/10 - loss=0.4436, val_acc=0.8670, val_loss=0.3712182849876734
Epoch 10/10 - loss=0.4378, val_acc=0.8630, val_loss=0.38359008977498776
test accuracy  0.8566


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▄▃▂▂▂▁▁▁▁
validation loss,█▅▄▁▃▁▁▂▂▃
validation_accuracy,▁▄▅█▇▇█▇▇▆

0,1
epoch,10.0
test_accuracy,0.8566
training_loss,0.4378
validation loss,0.38359
validation_accuracy,0.863


[34m[1mwandb[0m: Agent Starting Run: 5vdkzq7e with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 1/10 - loss=0.8666, val_acc=0.8412, val_loss=0.43212140525758547
Epoch 2/10 - loss=0.7057, val_acc=0.8603, val_loss=0.38636569547142124
Epoch 3/10 - loss=0.6638, val_acc=0.8575, val_loss=0.38550247465227
Epoch 4/10 - loss=0.6352, val_acc=0.8730, val_loss=0.3472397827945906
Epoch 5/10 - loss=0.6125, val_acc=0.8745, val_loss=0.33837453411464297
Epoch 6/10 - loss=0.5921, val_acc=0.8783, val_loss=0.3328322062261781
Epoch 7/10 - loss=0.5759, val_acc=0.8755, val_loss=0.33230527076001437
Epoch 8/10 - loss=0.5600, val_acc=0.8770, val_loss=0.3348858830022946
Epoch 9/10 - loss=0.5470, val_acc=0.8782, val_loss=0.33055211116781713
Epoch 10/10 - loss=0.5345, val_acc=0.8758, val_loss=0.3352300001152131
test accuracy  0.8703


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▅▄▃▃▂▂▂▁▁
validation loss,█▅▅▂▂▁▁▁▁▁
validation_accuracy,▁▅▄▇▇█▇███

0,1
epoch,10.0
test_accuracy,0.8703
training_loss,0.53447
validation loss,0.33523
validation_accuracy,0.87583


[34m[1mwandb[0m: Agent Starting Run: z6yr2fnf with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	momentum: 0.8
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 1/10 - loss=0.7485, val_acc=0.8303, val_loss=0.447105174906459
Epoch 2/10 - loss=0.5571, val_acc=0.8510, val_loss=0.4130422023035232
Epoch 3/10 - loss=0.5067, val_acc=0.8547, val_loss=0.3934090925138971
Epoch 4/10 - loss=0.4816, val_acc=0.8717, val_loss=0.3547327091415729
Epoch 5/10 - loss=0.4706, val_acc=0.8673, val_loss=0.37859030541784017
Epoch 6/10 - loss=0.4603, val_acc=0.8688, val_loss=0.35613618301641714
Epoch 7/10 - loss=0.4512, val_acc=0.8732, val_loss=0.35672510944922337
Epoch 8/10 - loss=0.4473, val_acc=0.8673, val_loss=0.365690321140569
Epoch 9/10 - loss=0.4436, val_acc=0.8670, val_loss=0.3712182849876734
Epoch 10/10 - loss=0.4378, val_acc=0.8630, val_loss=0.38359008977498776
test accuracy  0.8566


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▄▃▂▂▂▁▁▁▁
validation loss,█▅▄▁▃▁▁▂▂▃
validation_accuracy,▁▄▅█▇▇█▇▇▆

0,1
epoch,10.0
test_accuracy,0.8566
training_loss,0.4378
validation loss,0.38359
validation_accuracy,0.863


[34m[1mwandb[0m: Agent Starting Run: 419bhuoo with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	momentum: 0.8
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: nesterov
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/10 - loss=2.3069, val_acc=0.0977, val_loss=2.3039294933728303
Epoch 2/10 - loss=2.3041, val_acc=0.1145, val_loss=2.3036475947900055
Epoch 3/10 - loss=2.3041, val_acc=0.0977, val_loss=2.3040859922048162
Epoch 4/10 - loss=2.3041, val_acc=0.0985, val_loss=2.302757747155341
Epoch 5/10 - loss=2.3041, val_acc=0.1023, val_loss=2.3030441769201704
Epoch 6/10 - loss=2.3040, val_acc=0.0985, val_loss=2.3032501931688043
Epoch 7/10 - loss=2.3039, val_acc=0.1000, val_loss=2.3035614843626755
Epoch 8/10 - loss=2.3039, val_acc=0.0977, val_loss=2.3051427433363156
Epoch 9/10 - loss=2.3038, val_acc=0.1000, val_loss=2.303022491236831
Epoch 10/10 - loss=2.3038, val_acc=0.0995, val_loss=2.3036126237574437
test accuracy  0.1


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▂▂▂▂▂▁▁▁▁
validation loss,▄▄▅▁▂▂▃█▂▄
validation_accuracy,▁█▁▁▃▁▂▁▂▂

0,1
epoch,10.0
test_accuracy,0.1
training_loss,2.30375
validation loss,2.30361
validation_accuracy,0.0995


[34m[1mwandb[0m: Agent Starting Run: t4a9qf8h with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/10 - loss=0.6876, val_acc=0.8270, val_loss=0.4787191735082048
Epoch 2/10 - loss=0.4325, val_acc=0.8442, val_loss=0.42907133896595706
Epoch 3/10 - loss=0.3866, val_acc=0.8567, val_loss=0.38695662726556185
Epoch 4/10 - loss=0.3614, val_acc=0.8628, val_loss=0.37818805991411075
Epoch 5/10 - loss=0.3438, val_acc=0.8693, val_loss=0.35755855983633744
Epoch 6/10 - loss=0.3301, val_acc=0.8725, val_loss=0.34532276138696427
Epoch 7/10 - loss=0.3183, val_acc=0.8748, val_loss=0.3420171888445994
Epoch 8/10 - loss=0.3085, val_acc=0.8808, val_loss=0.3306732243814953
Epoch 9/10 - loss=0.2983, val_acc=0.8778, val_loss=0.3324914830052198
Epoch 10/10 - loss=0.2912, val_acc=0.8827, val_loss=0.33369341382379875
test accuracy  0.8718


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▃▃▂▂▂▁▁▁▁
validation loss,█▆▄▃▂▂▂▁▁▁
validation_accuracy,▁▃▅▆▆▇▇█▇█

0,1
epoch,10.0
test_accuracy,0.8718
training_loss,0.29121
validation loss,0.33369
validation_accuracy,0.88267


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: tyman2y2 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 1/10 - loss=1.4185, val_acc=0.7175, val_loss=0.903268396775767
Epoch 2/10 - loss=0.9493, val_acc=0.7670, val_loss=0.7112956152764559
Epoch 3/10 - loss=0.8182, val_acc=0.7890, val_loss=0.6235152465222219
Epoch 4/10 - loss=0.7500, val_acc=0.8022, val_loss=0.5704385096973644
Epoch 5/10 - loss=0.7076, val_acc=0.8145, val_loss=0.5373902335502819
Epoch 6/10 - loss=0.6783, val_acc=0.8222, val_loss=0.5109826591433704
Epoch 7/10 - loss=0.6567, val_acc=0.8293, val_loss=0.49414232137867914
Epoch 8/10 - loss=0.6399, val_acc=0.8323, val_loss=0.4769013480420701
Epoch 9/10 - loss=0.6260, val_acc=0.8375, val_loss=0.4648432700650765
Epoch 10/10 - loss=0.6143, val_acc=0.8338, val_loss=0.4587649205499406
test accuracy  0.8308


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▄▃▂▂▂▁▁▁▁
validation loss,█▅▄▃▂▂▂▁▁▁
validation_accuracy,▁▄▅▆▇▇████

0,1
epoch,10.0
test_accuracy,0.8308
training_loss,0.61432
validation loss,0.45876
validation_accuracy,0.83383


[34m[1mwandb[0m: Agent Starting Run: ycxscwhp with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 1/10 - loss=0.7774, val_acc=0.8352, val_loss=0.4505209033881386
Epoch 2/10 - loss=0.6017, val_acc=0.8532, val_loss=0.4015291694116428
Epoch 3/10 - loss=0.5639, val_acc=0.8595, val_loss=0.39001280072844285
Epoch 4/10 - loss=0.5393, val_acc=0.8632, val_loss=0.36751491984000145
Epoch 5/10 - loss=0.5197, val_acc=0.8642, val_loss=0.3689570585937188
Epoch 6/10 - loss=0.5048, val_acc=0.8670, val_loss=0.357397397275442
Epoch 7/10 - loss=0.4911, val_acc=0.8723, val_loss=0.3422021594207085
Epoch 8/10 - loss=0.4801, val_acc=0.8755, val_loss=0.3342321336477077
Epoch 9/10 - loss=0.4698, val_acc=0.8702, val_loss=0.3365545053120305
Epoch 10/10 - loss=0.4597, val_acc=0.8773, val_loss=0.32682267719413166
test accuracy  0.8722


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▄▃▃▂▂▂▁▁▁
validation loss,█▅▅▃▃▃▂▁▂▁
validation_accuracy,▁▄▅▆▆▆▇█▇█

0,1
epoch,10.0
test_accuracy,0.8722
training_loss,0.45969
validation loss,0.32682
validation_accuracy,0.87733


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: d9t5vwds with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/10 - loss=0.7281, val_acc=0.8330, val_loss=0.4615748313493885
Epoch 2/10 - loss=0.4249, val_acc=0.8530, val_loss=0.4002088556795746
Epoch 3/10 - loss=0.3834, val_acc=0.8630, val_loss=0.3787910719320583
Epoch 4/10 - loss=0.3590, val_acc=0.8643, val_loss=0.3766657126518925
Epoch 5/10 - loss=0.3431, val_acc=0.8615, val_loss=0.3605342476256158
Epoch 6/10 - loss=0.3295, val_acc=0.8745, val_loss=0.3527585037339373
Epoch 7/10 - loss=0.3189, val_acc=0.8773, val_loss=0.3437619093296629
Epoch 8/10 - loss=0.3097, val_acc=0.8793, val_loss=0.3354996242404891
Epoch 9/10 - loss=0.3000, val_acc=0.8748, val_loss=0.33925472334358364
Epoch 10/10 - loss=0.2928, val_acc=0.8733, val_loss=0.3440719290222493
test accuracy  0.8665


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▃▂▂▂▂▁▁▁▁
validation loss,█▅▃▃▂▂▁▁▁▁
validation_accuracy,▁▄▆▆▅▇██▇▇

0,1
epoch,10.0
test_accuracy,0.8665
training_loss,0.2928
validation loss,0.34407
validation_accuracy,0.87333


[34m[1mwandb[0m: Agent Starting Run: 4wpjv8g2 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	momentum: 0.8
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 1/10 - loss=0.6513, val_acc=0.8468, val_loss=0.42662459365402333
Epoch 2/10 - loss=0.4885, val_acc=0.8437, val_loss=0.4208390784462202
Epoch 3/10 - loss=0.4611, val_acc=0.8543, val_loss=0.41541329688275014
Epoch 4/10 - loss=0.4501, val_acc=0.8623, val_loss=0.37901689566703034
Epoch 5/10 - loss=0.4393, val_acc=0.8737, val_loss=0.35594904653691484
Epoch 6/10 - loss=0.4346, val_acc=0.8718, val_loss=0.3620761252648114
Epoch 7/10 - loss=0.4311, val_acc=0.8732, val_loss=0.3490335207206748
Epoch 8/10 - loss=0.4266, val_acc=0.8440, val_loss=0.4198555955240446
Epoch 9/10 - loss=0.4226, val_acc=0.8720, val_loss=0.3559292248727048
Epoch 10/10 - loss=0.4204, val_acc=0.8588, val_loss=0.3682957115237183
test accuracy  0.8569


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▃▂▂▂▁▁▁▁▁
validation loss,█▇▇▄▂▂▁▇▂▃
validation_accuracy,▂▁▃▅███▁█▅

0,1
epoch,10.0
test_accuracy,0.8569
training_loss,0.42044
validation loss,0.3683
validation_accuracy,0.85883


[34m[1mwandb[0m: Agent Starting Run: ksb7z26n with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	momentum: 0.8
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 1/10 - loss=1.7396, val_acc=0.5857, val_loss=1.1343396418943155
Epoch 2/10 - loss=1.0911, val_acc=0.7187, val_loss=0.8451706600971275
Epoch 3/10 - loss=0.8888, val_acc=0.7545, val_loss=0.69240641572341
Epoch 4/10 - loss=0.7896, val_acc=0.7782, val_loss=0.6241208809667945
Epoch 5/10 - loss=0.7362, val_acc=0.7935, val_loss=0.5767993497445661
Epoch 6/10 - loss=0.6964, val_acc=0.8112, val_loss=0.5409533337233089
Epoch 7/10 - loss=0.6650, val_acc=0.8198, val_loss=0.5171653298828971
Epoch 8/10 - loss=0.6430, val_acc=0.8260, val_loss=0.4975933221251779
Epoch 9/10 - loss=0.6263, val_acc=0.8323, val_loss=0.48305914480987333
Epoch 10/10 - loss=0.6133, val_acc=0.8305, val_loss=0.4762505294739002
test accuracy  0.8262


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▄▃▂▂▂▁▁▁▁
validation loss,█▅▃▃▂▂▁▁▁▁
validation_accuracy,▁▅▆▆▇▇████

0,1
epoch,10.0
test_accuracy,0.8262
training_loss,0.61325
validation loss,0.47625
validation_accuracy,0.8305


[34m[1mwandb[0m: Agent Starting Run: 42m02fru with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/10 - loss=2.1276, val_acc=0.4033, val_loss=1.8188310433576163
Epoch 2/10 - loss=1.3104, val_acc=0.6553, val_loss=0.9830370929656074
Epoch 3/10 - loss=0.8650, val_acc=0.6908, val_loss=0.8079335250032849
Epoch 4/10 - loss=0.7455, val_acc=0.7452, val_loss=0.7174377454811656
Epoch 5/10 - loss=0.6732, val_acc=0.7660, val_loss=0.6619538658791259
Epoch 6/10 - loss=0.6192, val_acc=0.7895, val_loss=0.6082241475734063
Epoch 7/10 - loss=0.5791, val_acc=0.7982, val_loss=0.5717956437984428
Epoch 8/10 - loss=0.5479, val_acc=0.8005, val_loss=0.5547954016057876
Epoch 9/10 - loss=0.5250, val_acc=0.8133, val_loss=0.532647655423864
Epoch 10/10 - loss=0.5068, val_acc=0.8188, val_loss=0.5151775258020787
test accuracy  0.8114


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▄▃▂▂▁▁▁▁▁
validation loss,█▄▃▂▂▁▁▁▁▁
validation_accuracy,▁▅▆▇▇█████

0,1
epoch,10.0
test_accuracy,0.8114
training_loss,0.50678
validation loss,0.51518
validation_accuracy,0.81883


[34m[1mwandb[0m: Agent Starting Run: iziir7yk with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/10 - loss=0.5548, val_acc=0.8453, val_loss=0.4194081959209287
Epoch 2/10 - loss=0.3889, val_acc=0.8647, val_loss=0.3780701550814465
Epoch 3/10 - loss=0.3538, val_acc=0.8575, val_loss=0.38089540590785115
Epoch 4/10 - loss=0.3305, val_acc=0.8730, val_loss=0.35222763031743926
Epoch 5/10 - loss=0.3127, val_acc=0.8767, val_loss=0.33224679366490717
Epoch 6/10 - loss=0.2972, val_acc=0.8783, val_loss=0.3318268988480358
Epoch 7/10 - loss=0.2850, val_acc=0.8805, val_loss=0.32358663497350504
Epoch 8/10 - loss=0.2735, val_acc=0.8823, val_loss=0.32237098344760884
Epoch 9/10 - loss=0.2648, val_acc=0.8825, val_loss=0.31436079532519723
Epoch 10/10 - loss=0.2538, val_acc=0.8842, val_loss=0.3231169497104704
test accuracy  0.876


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▄▃▃▂▂▂▁▁▁
validation loss,█▅▅▄▂▂▂▂▁▂
validation_accuracy,▁▄▃▆▇▇▇███

0,1
epoch,10.0
test_accuracy,0.876
training_loss,0.25382
validation loss,0.32312
validation_accuracy,0.88417


[34m[1mwandb[0m: Agent Starting Run: fl0kri1m with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 1/10 - loss=1.3237, val_acc=0.7130, val_loss=0.8954104418621994
Epoch 2/10 - loss=0.8946, val_acc=0.7565, val_loss=0.7283967218450743
Epoch 3/10 - loss=0.7811, val_acc=0.7757, val_loss=0.6524893456211704
Epoch 4/10 - loss=0.7206, val_acc=0.7900, val_loss=0.6037802052356253
Epoch 5/10 - loss=0.6812, val_acc=0.8002, val_loss=0.570805544006411
Epoch 6/10 - loss=0.6533, val_acc=0.8083, val_loss=0.547718027437147
Epoch 7/10 - loss=0.6322, val_acc=0.8135, val_loss=0.5289343887892091
Epoch 8/10 - loss=0.6151, val_acc=0.8183, val_loss=0.515373113415713
Epoch 9/10 - loss=0.6014, val_acc=0.8200, val_loss=0.5011490545710252
Epoch 10/10 - loss=0.5900, val_acc=0.8252, val_loss=0.4917332802693977
test accuracy  0.8209


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▄▃▂▂▂▁▁▁▁
validation loss,█▅▄▃▂▂▂▁▁▁
validation_accuracy,▁▄▅▆▆▇▇███

0,1
epoch,10.0
test_accuracy,0.8209
training_loss,0.59004
validation loss,0.49173
validation_accuracy,0.82517


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 6pp22kd3 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	momentum: 0.8
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 1/10 - loss=0.6665, val_acc=0.8473, val_loss=0.41417683184437876
Epoch 2/10 - loss=0.5159, val_acc=0.8588, val_loss=0.3969606288143836
Epoch 3/10 - loss=0.4821, val_acc=0.8585, val_loss=0.38764647632458776
Epoch 4/10 - loss=0.4669, val_acc=0.8582, val_loss=0.391143500246997
Epoch 5/10 - loss=0.4566, val_acc=0.8633, val_loss=0.3727388832901861
Epoch 6/10 - loss=0.4485, val_acc=0.8637, val_loss=0.3835888115022828
Epoch 7/10 - loss=0.4457, val_acc=0.8615, val_loss=0.3822746680056724
Epoch 8/10 - loss=0.4412, val_acc=0.8575, val_loss=0.3704033135904653
Epoch 9/10 - loss=0.4373, val_acc=0.8698, val_loss=0.3411480981359136
Epoch 10/10 - loss=0.4365, val_acc=0.8717, val_loss=0.3439395541592189
test accuracy  0.8658


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▃▂▂▂▁▁▁▁▁
validation loss,█▆▅▆▄▅▅▄▁▁
validation_accuracy,▁▄▄▄▆▆▅▄▇█

0,1
epoch,10.0
test_accuracy,0.8658
training_loss,0.43651
validation loss,0.34394
validation_accuracy,0.87167


[34m[1mwandb[0m: Agent Starting Run: knigqm3r with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 1/10 - loss=0.6521, val_acc=0.8373, val_loss=0.44065812036649904
Epoch 2/10 - loss=0.5039, val_acc=0.8515, val_loss=0.39819258078497416
Epoch 3/10 - loss=0.4676, val_acc=0.8628, val_loss=0.3624573377998644
Epoch 4/10 - loss=0.4469, val_acc=0.8650, val_loss=0.3662027999469988
Epoch 5/10 - loss=0.4281, val_acc=0.8625, val_loss=0.3602144561795144
Epoch 6/10 - loss=0.4187, val_acc=0.8593, val_loss=0.3647337416492024
Epoch 7/10 - loss=0.4095, val_acc=0.8533, val_loss=0.39725572272295684
Epoch 8/10 - loss=0.4014, val_acc=0.8748, val_loss=0.3352346645340002
Epoch 9/10 - loss=0.3950, val_acc=0.8642, val_loss=0.3563430628467275
Epoch 10/10 - loss=0.3903, val_acc=0.8767, val_loss=0.3320024211434913
test accuracy  0.874


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▄▃▃▂▂▂▁▁▁
validation loss,█▅▃▃▃▃▅▁▃▁
validation_accuracy,▁▄▆▆▅▅▄█▆█

0,1
epoch,10.0
test_accuracy,0.874
training_loss,0.39028
validation loss,0.332
validation_accuracy,0.87667


[34m[1mwandb[0m: Agent Starting Run: y9xitscc with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 1/10 - loss=0.7551, val_acc=0.8255, val_loss=0.47672989789171677
Epoch 2/10 - loss=0.5300, val_acc=0.8517, val_loss=0.40880270868574053
Epoch 3/10 - loss=0.4919, val_acc=0.8510, val_loss=0.4145312046348821
Epoch 4/10 - loss=0.4705, val_acc=0.8660, val_loss=0.3734965671285398
Epoch 5/10 - loss=0.4539, val_acc=0.8642, val_loss=0.3709481925641312
Epoch 6/10 - loss=0.4427, val_acc=0.8677, val_loss=0.3601828136426502
Epoch 7/10 - loss=0.4310, val_acc=0.8705, val_loss=0.3488026420651943
Epoch 8/10 - loss=0.4218, val_acc=0.8730, val_loss=0.3381843602211754
Epoch 9/10 - loss=0.4129, val_acc=0.8745, val_loss=0.3395534913113545
Epoch 10/10 - loss=0.4052, val_acc=0.8763, val_loss=0.3359064347333098
test accuracy  0.8679


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▃▃▂▂▂▂▁▁▁
validation loss,█▅▅▃▃▂▂▁▁▁
validation_accuracy,▁▅▅▇▆▇▇███

0,1
epoch,10.0
test_accuracy,0.8679
training_loss,0.40519
validation loss,0.33591
validation_accuracy,0.87633


[34m[1mwandb[0m: Agent Starting Run: isju9qrs with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/10 - loss=2.3092, val_acc=0.0977, val_loss=2.3052656278976276
Epoch 2/10 - loss=2.3048, val_acc=0.0985, val_loss=2.3034536806452883
Epoch 3/10 - loss=2.3038, val_acc=0.0977, val_loss=2.3028196133118377
Epoch 4/10 - loss=2.3028, val_acc=0.0985, val_loss=2.299086407611523
Epoch 5/10 - loss=2.3009, val_acc=0.1023, val_loss=2.298503463021415
Epoch 6/10 - loss=2.2982, val_acc=0.0985, val_loss=2.2946153449668474
Epoch 7/10 - loss=2.2936, val_acc=0.1000, val_loss=2.290176967505703
Epoch 8/10 - loss=2.2837, val_acc=0.1693, val_loss=2.2774916005620387
Epoch 9/10 - loss=2.2510, val_acc=0.2073, val_loss=2.207731613659371
Epoch 10/10 - loss=2.0532, val_acc=0.2850, val_loss=1.835449401631109
test accuracy  0.2888


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,███████▇▆▁
validation loss,████████▇▁
validation_accuracy,▁▁▁▁▁▁▁▄▅█

0,1
epoch,10.0
test_accuracy,0.2888
training_loss,2.05322
validation loss,1.83545
validation_accuracy,0.285


[34m[1mwandb[0m: Agent Starting Run: finwfr2y with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/10 - loss=0.6464, val_acc=0.8440, val_loss=0.42760787806701894
Epoch 2/10 - loss=0.3935, val_acc=0.8667, val_loss=0.3632784631273935
Epoch 3/10 - loss=0.3540, val_acc=0.8618, val_loss=0.3721167821329822
Epoch 4/10 - loss=0.3298, val_acc=0.8710, val_loss=0.3436420202280783
Epoch 5/10 - loss=0.3115, val_acc=0.8777, val_loss=0.3331553842981655
Epoch 6/10 - loss=0.2991, val_acc=0.8832, val_loss=0.31689799783551054
Epoch 7/10 - loss=0.2843, val_acc=0.8823, val_loss=0.3136265402736383
Epoch 8/10 - loss=0.2741, val_acc=0.8868, val_loss=0.3015015743051702
Epoch 9/10 - loss=0.2628, val_acc=0.8817, val_loss=0.31460962083852756
Epoch 10/10 - loss=0.2539, val_acc=0.8837, val_loss=0.30959520978627136
test accuracy  0.8823


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▃▃▂▂▂▂▁▁▁
validation loss,█▄▅▃▃▂▂▁▂▁
validation_accuracy,▁▅▄▅▇▇▇█▇▇

0,1
epoch,10.0
test_accuracy,0.8823
training_loss,0.2539
validation loss,0.3096
validation_accuracy,0.88367


[34m[1mwandb[0m: Agent Starting Run: e57u65kp with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: nesterov
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 1/10 - loss=1.6892, val_acc=0.6608, val_loss=1.0847750771351428
Epoch 2/10 - loss=1.0846, val_acc=0.7203, val_loss=0.8209152207116789
Epoch 3/10 - loss=0.9087, val_acc=0.7503, val_loss=0.7013350929221411
Epoch 4/10 - loss=0.8140, val_acc=0.7800, val_loss=0.626491785996969
Epoch 5/10 - loss=0.7525, val_acc=0.7972, val_loss=0.5769074983600697
Epoch 6/10 - loss=0.7115, val_acc=0.8093, val_loss=0.5440485159904451
Epoch 7/10 - loss=0.6836, val_acc=0.8173, val_loss=0.5201908292520977
Epoch 8/10 - loss=0.6638, val_acc=0.8195, val_loss=0.5059459487173098
Epoch 9/10 - loss=0.6490, val_acc=0.8228, val_loss=0.49197605895824487
Epoch 10/10 - loss=0.6371, val_acc=0.8255, val_loss=0.48041776217680643
test accuracy  0.8225


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▄▃▂▂▁▁▁▁▁
validation loss,█▅▄▃▂▂▁▁▁▁
validation_accuracy,▁▄▅▆▇▇████

0,1
epoch,10.0
test_accuracy,0.8225
training_loss,0.6371
validation loss,0.48042
validation_accuracy,0.8255


[34m[1mwandb[0m: Agent Starting Run: cldyv9q4 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	momentum: 0.8
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/10 - loss=0.7003, val_acc=0.8220, val_loss=0.4943318380748373
Epoch 2/10 - loss=0.4455, val_acc=0.8453, val_loss=0.4272136531671892
Epoch 3/10 - loss=0.4050, val_acc=0.8513, val_loss=0.4170393582911953
Epoch 4/10 - loss=0.3805, val_acc=0.8595, val_loss=0.3825536575852357
Epoch 5/10 - loss=0.3603, val_acc=0.8628, val_loss=0.37099440208518414
Epoch 6/10 - loss=0.3468, val_acc=0.8693, val_loss=0.3573359847989474
Epoch 7/10 - loss=0.3328, val_acc=0.8708, val_loss=0.3482750134588486
Epoch 8/10 - loss=0.3219, val_acc=0.8737, val_loss=0.33455455284190694
Epoch 9/10 - loss=0.3102, val_acc=0.8758, val_loss=0.3333025842760965
Epoch 10/10 - loss=0.3008, val_acc=0.8777, val_loss=0.32892516834399954
test accuracy  0.8731


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▄▃▂▂▂▂▁▁▁
validation loss,█▅▅▃▃▂▂▁▁▁
validation_accuracy,▁▄▅▆▆▇▇▇██

0,1
epoch,10.0
test_accuracy,0.8731
training_loss,0.30077
validation loss,0.32893
validation_accuracy,0.87767


[34m[1mwandb[0m: Agent Starting Run: 0kb7lvp2 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	momentum: 0.8
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/10 - loss=0.7199, val_acc=0.8170, val_loss=0.5128903662099363
Epoch 2/10 - loss=0.4730, val_acc=0.8392, val_loss=0.4538128228786432
Epoch 3/10 - loss=0.4248, val_acc=0.8355, val_loss=0.44736311666876477
Epoch 4/10 - loss=0.3977, val_acc=0.8302, val_loss=0.4442250180687813
Epoch 5/10 - loss=0.3783, val_acc=0.8610, val_loss=0.3842733586401374
Epoch 6/10 - loss=0.3616, val_acc=0.8647, val_loss=0.36905950641710683
Epoch 7/10 - loss=0.3489, val_acc=0.8633, val_loss=0.3650292868111833
Epoch 8/10 - loss=0.3367, val_acc=0.8585, val_loss=0.3853419386577023
Epoch 9/10 - loss=0.3276, val_acc=0.8697, val_loss=0.345113075744468
Epoch 10/10 - loss=0.3186, val_acc=0.8683, val_loss=0.3624201005778085
test accuracy  0.8607


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▄▃▂▂▂▂▁▁▁
validation loss,█▆▅▅▃▂▂▃▁▂
validation_accuracy,▁▄▃▃▇▇▇▇██

0,1
epoch,10.0
test_accuracy,0.8607
training_loss,0.3186
validation loss,0.36242
validation_accuracy,0.86833


[34m[1mwandb[0m: Agent Starting Run: 38m46yc1 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: nesterov
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 1/10 - loss=0.8185, val_acc=0.8267, val_loss=0.4747185221942402
Epoch 2/10 - loss=0.6008, val_acc=0.8395, val_loss=0.4333648467520245
Epoch 3/10 - loss=0.5590, val_acc=0.8573, val_loss=0.39086682603332096
Epoch 4/10 - loss=0.5339, val_acc=0.8580, val_loss=0.3843805576452475
Epoch 5/10 - loss=0.5163, val_acc=0.8630, val_loss=0.36642308566661236
Epoch 6/10 - loss=0.5020, val_acc=0.8647, val_loss=0.3592140153546443
Epoch 7/10 - loss=0.4894, val_acc=0.8668, val_loss=0.3595176354774418
Epoch 8/10 - loss=0.4791, val_acc=0.8758, val_loss=0.3397706236734661
Epoch 9/10 - loss=0.4691, val_acc=0.8727, val_loss=0.35173497933937126
Epoch 10/10 - loss=0.4613, val_acc=0.8698, val_loss=0.3569104978669728
test accuracy  0.8638


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▄▃▂▂▂▂▁▁▁
validation loss,█▆▄▃▂▂▂▁▂▂
validation_accuracy,▁▃▅▅▆▆▇██▇

0,1
epoch,10.0
test_accuracy,0.8638
training_loss,0.46127
validation loss,0.35691
validation_accuracy,0.86983


[34m[1mwandb[0m: Agent Starting Run: amal80tu with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 1/10 - loss=1.3714, val_acc=0.6633, val_loss=0.8435831076835084
Epoch 2/10 - loss=0.9579, val_acc=0.6982, val_loss=0.7828577488327914
Epoch 3/10 - loss=0.8909, val_acc=0.7322, val_loss=0.7029925216641043
Epoch 4/10 - loss=0.8286, val_acc=0.7755, val_loss=0.6285914572820426
Epoch 5/10 - loss=0.7663, val_acc=0.8168, val_loss=0.548338086476002
Epoch 6/10 - loss=0.7271, val_acc=0.8390, val_loss=0.4892845525471838
Epoch 7/10 - loss=0.6921, val_acc=0.8320, val_loss=0.4860357990407158
Epoch 8/10 - loss=0.6715, val_acc=0.8112, val_loss=0.5223625216423984
Epoch 9/10 - loss=0.6565, val_acc=0.8380, val_loss=0.4794633079271275
Epoch 10/10 - loss=0.6451, val_acc=0.8353, val_loss=0.47687067772232344
test accuracy  0.8297


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▄▃▃▂▂▁▁▁▁
validation loss,█▇▅▄▂▁▁▂▁▁
validation_accuracy,▁▂▄▅▇██▇██

0,1
epoch,10.0
test_accuracy,0.8297
training_loss,0.64514
validation loss,0.47687
validation_accuracy,0.83533


[34m[1mwandb[0m: Agent Starting Run: vvt1s8ho with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/10 - loss=0.6495, val_acc=0.8287, val_loss=0.4716594105169914
Epoch 2/10 - loss=0.4239, val_acc=0.8508, val_loss=0.4094628426307846
Epoch 3/10 - loss=0.3844, val_acc=0.8553, val_loss=0.4017706956811508
Epoch 4/10 - loss=0.3606, val_acc=0.8672, val_loss=0.36979783960650636
Epoch 5/10 - loss=0.3414, val_acc=0.8645, val_loss=0.3633434222988479
Epoch 6/10 - loss=0.3276, val_acc=0.8720, val_loss=0.3461213650769512
Epoch 7/10 - loss=0.3138, val_acc=0.8745, val_loss=0.339501928620412
Epoch 8/10 - loss=0.3028, val_acc=0.8777, val_loss=0.32613300740102114
Epoch 9/10 - loss=0.2913, val_acc=0.8803, val_loss=0.32298694665596944
Epoch 10/10 - loss=0.2815, val_acc=0.8817, val_loss=0.31899116400933514
test accuracy  0.8767


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▄▃▃▂▂▂▁▁▁
validation loss,█▅▅▃▃▂▂▁▁▁
validation_accuracy,▁▄▅▆▆▇▇▇██

0,1
epoch,10.0
test_accuracy,0.8767
training_loss,0.28153
validation loss,0.31899
validation_accuracy,0.88167


[34m[1mwandb[0m: Agent Starting Run: 99yhfeko with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	momentum: 0.8
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/10 - loss=1.6146, val_acc=0.5805, val_loss=1.0941827307565963
Epoch 2/10 - loss=0.9099, val_acc=0.7280, val_loss=0.794222893856428
Epoch 3/10 - loss=0.6987, val_acc=0.7647, val_loss=0.6553949047708305
Epoch 4/10 - loss=0.6043, val_acc=0.7820, val_loss=0.5938219521646441
Epoch 5/10 - loss=0.5539, val_acc=0.7912, val_loss=0.5514526221307969
Epoch 6/10 - loss=0.5190, val_acc=0.8093, val_loss=0.5189406094148027
Epoch 7/10 - loss=0.4893, val_acc=0.8210, val_loss=0.4933283969571248
Epoch 8/10 - loss=0.4646, val_acc=0.8292, val_loss=0.47173530711730616
Epoch 9/10 - loss=0.4451, val_acc=0.8373, val_loss=0.4527234083301381
Epoch 10/10 - loss=0.4291, val_acc=0.8402, val_loss=0.4391439940109482
test accuracy  0.84


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▄▃▂▂▂▁▁▁▁
validation loss,█▅▃▃▂▂▂▁▁▁
validation_accuracy,▁▅▆▆▇▇▇███

0,1
epoch,10.0
test_accuracy,0.84
training_loss,0.4291
validation loss,0.43914
validation_accuracy,0.84017


[34m[1mwandb[0m: Agent Starting Run: 5lnsw8ku with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	momentum: 0.8
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 1/10 - loss=1.8761, val_acc=0.6513, val_loss=1.316037991346341
Epoch 2/10 - loss=1.2056, val_acc=0.7128, val_loss=0.9040826516923129
Epoch 3/10 - loss=0.9383, val_acc=0.7622, val_loss=0.7225658606613665
Epoch 4/10 - loss=0.8121, val_acc=0.7795, val_loss=0.6235027085348667
Epoch 5/10 - loss=0.7422, val_acc=0.8090, val_loss=0.5672073469666594
Epoch 6/10 - loss=0.7001, val_acc=0.8177, val_loss=0.5329385233129775
Epoch 7/10 - loss=0.6723, val_acc=0.8230, val_loss=0.5114268878926068
Epoch 8/10 - loss=0.6519, val_acc=0.8297, val_loss=0.493962479744864
Epoch 9/10 - loss=0.6368, val_acc=0.8370, val_loss=0.47897376754557264
Epoch 10/10 - loss=0.6250, val_acc=0.8388, val_loss=0.4703022834206467
test accuracy  0.8299


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▄▃▂▂▁▁▁▁▁
validation loss,█▅▃▂▂▂▁▁▁▁
validation_accuracy,▁▃▅▆▇▇▇███

0,1
epoch,10.0
test_accuracy,0.8299
training_loss,0.62499
validation loss,0.4703
validation_accuracy,0.83883


[34m[1mwandb[0m: Agent Starting Run: yz9jgsc6 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/10 - loss=0.4824, val_acc=0.8497, val_loss=0.4094896702082607
Epoch 2/10 - loss=0.3684, val_acc=0.8652, val_loss=0.36314704902243694
Epoch 3/10 - loss=0.3355, val_acc=0.8697, val_loss=0.3510010769668911
Epoch 4/10 - loss=0.3185, val_acc=0.8790, val_loss=0.3328199859907706
Epoch 5/10 - loss=0.3005, val_acc=0.8758, val_loss=0.33776022930666433
Epoch 6/10 - loss=0.2904, val_acc=0.8903, val_loss=0.3168903930233975
Epoch 7/10 - loss=0.2785, val_acc=0.8843, val_loss=0.3179844143254806
Epoch 8/10 - loss=0.2698, val_acc=0.8890, val_loss=0.31000926630535247
Epoch 9/10 - loss=0.2568, val_acc=0.8783, val_loss=0.3277326714393295
Epoch 10/10 - loss=0.2509, val_acc=0.8835, val_loss=0.3181441056007354
test accuracy  0.8793


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▅▄▃▃▂▂▂▁▁
validation loss,█▅▄▃▃▁▂▁▂▂
validation_accuracy,▁▄▄▆▆█▇█▆▇

0,1
epoch,10.0
test_accuracy,0.8793
training_loss,0.25091
validation loss,0.31814
validation_accuracy,0.8835


[34m[1mwandb[0m: Agent Starting Run: 348e8r0l with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	momentum: 0.8
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 1/10 - loss=0.9132, val_acc=0.8232, val_loss=0.5031127522894252
Epoch 2/10 - loss=0.6244, val_acc=0.8352, val_loss=0.45720350495009
Epoch 3/10 - loss=0.5872, val_acc=0.8430, val_loss=0.4448720229648976
Epoch 4/10 - loss=0.5723, val_acc=0.8492, val_loss=0.4222432090165793
Epoch 5/10 - loss=0.5591, val_acc=0.8428, val_loss=0.4386283950805854
Epoch 6/10 - loss=0.5527, val_acc=0.8562, val_loss=0.3978777771286153
Epoch 7/10 - loss=0.5476, val_acc=0.8535, val_loss=0.40436496997255433
Epoch 8/10 - loss=0.5443, val_acc=0.8552, val_loss=0.39789317849189926
Epoch 9/10 - loss=0.5410, val_acc=0.8520, val_loss=0.40021954369075496
Epoch 10/10 - loss=0.5370, val_acc=0.8620, val_loss=0.3867194531683326
test accuracy  0.8521


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▃▂▂▁▁▁▁▁▁
validation loss,█▅▄▃▄▂▂▂▂▁
validation_accuracy,▁▃▅▆▅▇▆▇▆█

0,1
epoch,10.0
test_accuracy,0.8521
training_loss,0.53698
validation loss,0.38672
validation_accuracy,0.862


[34m[1mwandb[0m: Agent Starting Run: u29lsdys with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	momentum: 0.8
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/10 - loss=0.9194, val_acc=0.7722, val_loss=0.6247052076783431
Epoch 2/10 - loss=0.5350, val_acc=0.8217, val_loss=0.4773651422677773
Epoch 3/10 - loss=0.4532, val_acc=0.8393, val_loss=0.4443693803346329
Epoch 4/10 - loss=0.3921, val_acc=0.8557, val_loss=0.3979751079476605
Epoch 5/10 - loss=0.3629, val_acc=0.8633, val_loss=0.37492988942255123
Epoch 6/10 - loss=0.3411, val_acc=0.8708, val_loss=0.36411498253627544
Epoch 7/10 - loss=0.3258, val_acc=0.8710, val_loss=0.3549427680735744
Epoch 8/10 - loss=0.3097, val_acc=0.8772, val_loss=0.33885527424818956
Epoch 9/10 - loss=0.2977, val_acc=0.8750, val_loss=0.33990892696151387
Epoch 10/10 - loss=0.2873, val_acc=0.8663, val_loss=0.3689977660912504
test accuracy  0.861


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▄▃▂▂▂▁▁▁▁
validation loss,█▄▄▂▂▂▁▁▁▂
validation_accuracy,▁▄▅▇▇████▇

0,1
epoch,10.0
test_accuracy,0.861
training_loss,0.28729
validation loss,0.369
validation_accuracy,0.86633


[34m[1mwandb[0m: Agent Starting Run: 7zkr703n with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 1/10 - loss=0.5947, val_acc=0.8550, val_loss=0.40004481059941327
Epoch 2/10 - loss=0.4679, val_acc=0.8622, val_loss=0.37814290566354974
Epoch 3/10 - loss=0.4399, val_acc=0.8645, val_loss=0.36416750451199126
Epoch 4/10 - loss=0.4206, val_acc=0.8682, val_loss=0.3552099781524033
Epoch 5/10 - loss=0.4065, val_acc=0.8658, val_loss=0.3512040931536227
Epoch 6/10 - loss=0.3992, val_acc=0.8727, val_loss=0.34745390163508033
Epoch 7/10 - loss=0.3919, val_acc=0.8737, val_loss=0.3455926828922082
Epoch 8/10 - loss=0.3844, val_acc=0.8797, val_loss=0.32220983431652583
Epoch 9/10 - loss=0.3765, val_acc=0.8645, val_loss=0.35710395703557146
Epoch 10/10 - loss=0.3705, val_acc=0.8757, val_loss=0.33232230117787737
test accuracy  0.8715


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▄▃▃▂▂▂▁▁▁
validation loss,█▆▅▄▄▃▃▁▄▂
validation_accuracy,▁▃▄▅▄▆▆█▄▇

0,1
epoch,10.0
test_accuracy,0.8715
training_loss,0.37045
validation loss,0.33232
validation_accuracy,0.87567


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: epiesjs6 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	momentum: 0.8
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 1/10 - loss=2.6318, val_acc=0.0977, val_loss=2.3039547946335484
Epoch 2/10 - loss=2.6234, val_acc=0.1117, val_loss=2.303224197269155
Epoch 3/10 - loss=2.6203, val_acc=0.1048, val_loss=2.3033991266945484
Epoch 4/10 - loss=2.6174, val_acc=0.0985, val_loss=2.3015347061677742
Epoch 5/10 - loss=2.6143, val_acc=0.1023, val_loss=2.301502390590313
Epoch 6/10 - loss=2.6113, val_acc=0.0985, val_loss=2.3012897906382253
Epoch 7/10 - loss=2.6082, val_acc=0.1000, val_loss=2.301344041277508
Epoch 8/10 - loss=2.6053, val_acc=0.0977, val_loss=2.302602603113493
Epoch 9/10 - loss=2.6022, val_acc=0.1000, val_loss=2.299648232084021
Epoch 10/10 - loss=2.5991, val_acc=0.0995, val_loss=2.299741918303517
test accuracy  0.1


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▆▆▅▄▄▃▂▂▁
validation loss,█▇▇▄▄▄▄▆▁▁
validation_accuracy,▁█▅▁▃▁▂▁▂▂

0,1
epoch,10.0
test_accuracy,0.1
training_loss,2.59911
validation loss,2.29974
validation_accuracy,0.0995


[34m[1mwandb[0m: Agent Starting Run: 0vtghmjr with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	momentum: 0.8
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: nesterov
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 1/10 - loss=1.3321, val_acc=0.7588, val_loss=0.6802282418946582
Epoch 2/10 - loss=0.7525, val_acc=0.8067, val_loss=0.5387756888152906
Epoch 3/10 - loss=0.6645, val_acc=0.8197, val_loss=0.5019136963674357
Epoch 4/10 - loss=0.6256, val_acc=0.8162, val_loss=0.500401789202134
Epoch 5/10 - loss=0.6004, val_acc=0.8477, val_loss=0.4345177639974425
Epoch 6/10 - loss=0.5808, val_acc=0.8547, val_loss=0.4162778798354893
Epoch 7/10 - loss=0.5649, val_acc=0.8545, val_loss=0.4085545129184875
Epoch 8/10 - loss=0.5507, val_acc=0.8500, val_loss=0.4258146055599618
Epoch 9/10 - loss=0.5390, val_acc=0.8623, val_loss=0.38615704652438143
Epoch 10/10 - loss=0.5279, val_acc=0.8622, val_loss=0.3904449720484519
test accuracy  0.8524


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▃▂▂▂▁▁▁▁▁
validation loss,█▅▄▄▂▂▂▂▁▁
validation_accuracy,▁▄▅▅▇▇▇▇██

0,1
epoch,10.0
test_accuracy,0.8524
training_loss,0.52794
validation loss,0.39044
validation_accuracy,0.86217


[34m[1mwandb[0m: Agent Starting Run: kbwwfa28 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/10 - loss=0.7108, val_acc=0.8317, val_loss=0.45197346581245823
Epoch 2/10 - loss=0.4031, val_acc=0.8658, val_loss=0.3743080259768522
Epoch 3/10 - loss=0.3582, val_acc=0.8658, val_loss=0.36881995469291406
Epoch 4/10 - loss=0.3327, val_acc=0.8697, val_loss=0.3426239806321781
Epoch 5/10 - loss=0.3135, val_acc=0.8783, val_loss=0.3290646395049539
Epoch 6/10 - loss=0.2995, val_acc=0.8822, val_loss=0.31723166937197494
Epoch 7/10 - loss=0.2847, val_acc=0.8822, val_loss=0.3127479356116543
Epoch 8/10 - loss=0.2753, val_acc=0.8862, val_loss=0.30485095546180524
Epoch 9/10 - loss=0.2629, val_acc=0.8793, val_loss=0.32081862532507466
Epoch 10/10 - loss=0.2539, val_acc=0.8848, val_loss=0.3045769869529447
test accuracy  0.8813


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▃▃▂▂▂▁▁▁▁
validation loss,█▄▄▃▂▂▁▁▂▁
validation_accuracy,▁▅▅▆▇▇▇█▇█

0,1
epoch,10.0
test_accuracy,0.8813
training_loss,0.25389
validation loss,0.30458
validation_accuracy,0.88483


[34m[1mwandb[0m: Agent Starting Run: zvapuyup with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 1/10 - loss=0.9184, val_acc=0.8188, val_loss=0.5049314272442289
Epoch 2/10 - loss=0.6360, val_acc=0.8420, val_loss=0.45111183866966226
Epoch 3/10 - loss=0.6022, val_acc=0.8462, val_loss=0.4347713996801243
Epoch 4/10 - loss=0.5847, val_acc=0.8370, val_loss=0.44628267903080815
Epoch 5/10 - loss=0.5758, val_acc=0.8398, val_loss=0.43692074769561545
Epoch 6/10 - loss=0.5685, val_acc=0.8557, val_loss=0.41329930450844565
Epoch 7/10 - loss=0.5628, val_acc=0.8537, val_loss=0.4067230086957541
Epoch 8/10 - loss=0.5598, val_acc=0.8587, val_loss=0.38998499099317013
Epoch 9/10 - loss=0.5558, val_acc=0.8555, val_loss=0.4031372866402529
Epoch 10/10 - loss=0.5534, val_acc=0.8515, val_loss=0.41467719405437536
test accuracy  0.8472


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▃▂▂▁▁▁▁▁▁
validation loss,█▅▄▄▄▂▂▁▂▃
validation_accuracy,▁▅▆▄▅▇▇█▇▇

0,1
epoch,10.0
test_accuracy,0.8472
training_loss,0.55337
validation loss,0.41468
validation_accuracy,0.8515


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: ttjkaq9h with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	momentum: 0.8
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: nesterov
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 1/10 - loss=2.0827, val_acc=0.5873, val_loss=1.424627241733186
Epoch 2/10 - loss=1.2568, val_acc=0.6862, val_loss=0.8967264427107717
Epoch 3/10 - loss=0.9633, val_acc=0.7338, val_loss=0.7467600970285996
Epoch 4/10 - loss=0.8619, val_acc=0.7667, val_loss=0.6767671041323889
Epoch 5/10 - loss=0.8065, val_acc=0.7795, val_loss=0.6362308650161196
Epoch 6/10 - loss=0.7675, val_acc=0.7930, val_loss=0.595903531217156
Epoch 7/10 - loss=0.7382, val_acc=0.8017, val_loss=0.5726420579560508
Epoch 8/10 - loss=0.7163, val_acc=0.8097, val_loss=0.5526433582263343
Epoch 9/10 - loss=0.6983, val_acc=0.8140, val_loss=0.5348964902976391
Epoch 10/10 - loss=0.6831, val_acc=0.8122, val_loss=0.5296817006565858
test accuracy  0.8078


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▄▂▂▂▁▁▁▁▁
validation loss,█▄▃▂▂▂▁▁▁▁
validation_accuracy,▁▄▆▇▇▇████

0,1
epoch,10.0
test_accuracy,0.8078
training_loss,0.68313
validation loss,0.52968
validation_accuracy,0.81217


[34m[1mwandb[0m: Agent Starting Run: fi6lxvg8 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/10 - loss=1.0623, val_acc=0.7610, val_loss=0.7400236792484648
Epoch 2/10 - loss=0.6469, val_acc=0.7917, val_loss=0.6029446841613165
Epoch 3/10 - loss=0.5565, val_acc=0.8072, val_loss=0.5435162589829463
Epoch 4/10 - loss=0.5117, val_acc=0.8183, val_loss=0.5093614947523107
Epoch 5/10 - loss=0.4840, val_acc=0.8252, val_loss=0.4864539917201097
Epoch 6/10 - loss=0.4648, val_acc=0.8305, val_loss=0.4708648106578368
Epoch 7/10 - loss=0.4499, val_acc=0.8333, val_loss=0.4571372142208378
Epoch 8/10 - loss=0.4374, val_acc=0.8337, val_loss=0.45035169210189135
Epoch 9/10 - loss=0.4270, val_acc=0.8407, val_loss=0.43721658243386446
Epoch 10/10 - loss=0.4178, val_acc=0.8403, val_loss=0.43076259552788754
test accuracy  0.8365


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▃▃▂▂▂▁▁▁▁
validation loss,█▅▄▃▂▂▂▁▁▁
validation_accuracy,▁▄▅▆▇▇▇▇██

0,1
epoch,10.0
test_accuracy,0.8365
training_loss,0.41775
validation loss,0.43076
validation_accuracy,0.84033


[34m[1mwandb[0m: Agent Starting Run: mp99gi93 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/10 - loss=0.4945, val_acc=0.8487, val_loss=0.40497708993450826
Epoch 2/10 - loss=0.3877, val_acc=0.8550, val_loss=0.38405627870921616
Epoch 3/10 - loss=0.3580, val_acc=0.8540, val_loss=0.40123109671740753
Epoch 4/10 - loss=0.3339, val_acc=0.8590, val_loss=0.37882812722810116
Epoch 5/10 - loss=0.3211, val_acc=0.8612, val_loss=0.36083196520087846
Epoch 6/10 - loss=0.3068, val_acc=0.8538, val_loss=0.3854545344762349
Epoch 7/10 - loss=0.3007, val_acc=0.8740, val_loss=0.3522976826772949
Epoch 8/10 - loss=0.2919, val_acc=0.8707, val_loss=0.35143332638322383
Epoch 9/10 - loss=0.2856, val_acc=0.8752, val_loss=0.34423169043173046
Epoch 10/10 - loss=0.2757, val_acc=0.8655, val_loss=0.3705624845693075
test accuracy  0.8584


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▅▄▃▂▂▂▂▁▁
validation loss,█▆█▅▃▆▂▂▁▄
validation_accuracy,▁▃▂▄▄▂█▇█▅

0,1
epoch,10.0
test_accuracy,0.8584
training_loss,0.27566
validation loss,0.37056
validation_accuracy,0.8655


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: aqzw5nme with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	momentum: 0.8
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/10 - loss=0.6675, val_acc=0.8287, val_loss=0.4785893829628484
Epoch 2/10 - loss=0.4340, val_acc=0.8463, val_loss=0.4244753287050816
Epoch 3/10 - loss=0.3918, val_acc=0.8577, val_loss=0.3952291603367697
Epoch 4/10 - loss=0.3674, val_acc=0.8635, val_loss=0.3705940622874374
Epoch 5/10 - loss=0.3506, val_acc=0.8678, val_loss=0.3581261020686349
Epoch 6/10 - loss=0.3369, val_acc=0.8710, val_loss=0.3479400625540101
Epoch 7/10 - loss=0.3256, val_acc=0.8755, val_loss=0.35027662931175013
Epoch 8/10 - loss=0.3162, val_acc=0.8758, val_loss=0.33583144676237014
Epoch 9/10 - loss=0.3078, val_acc=0.8760, val_loss=0.3326601061061895
Epoch 10/10 - loss=0.2998, val_acc=0.8785, val_loss=0.3330744377302076
test accuracy  0.8732


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▄▃▂▂▂▁▁▁▁
validation loss,█▅▄▃▂▂▂▁▁▁
validation_accuracy,▁▃▅▆▇▇████

0,1
epoch,10.0
test_accuracy,0.8732
training_loss,0.29985
validation loss,0.33307
validation_accuracy,0.8785


[34m[1mwandb[0m: Agent Starting Run: jij4t362 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	momentum: 0.8
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 1/10 - loss=0.5947, val_acc=0.8550, val_loss=0.40004481059941327
Epoch 2/10 - loss=0.4679, val_acc=0.8622, val_loss=0.37814290566354974
Epoch 3/10 - loss=0.4399, val_acc=0.8645, val_loss=0.36416750451199126
Epoch 4/10 - loss=0.4206, val_acc=0.8682, val_loss=0.3552099781524033
Epoch 5/10 - loss=0.4065, val_acc=0.8658, val_loss=0.3512040931536227
Epoch 6/10 - loss=0.3992, val_acc=0.8727, val_loss=0.34745390163508033
Epoch 7/10 - loss=0.3919, val_acc=0.8737, val_loss=0.3455926828922082
Epoch 8/10 - loss=0.3844, val_acc=0.8797, val_loss=0.32220983431652583
Epoch 9/10 - loss=0.3765, val_acc=0.8645, val_loss=0.35710395703557146
Epoch 10/10 - loss=0.3705, val_acc=0.8757, val_loss=0.33232230117787737
test accuracy  0.8715


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▄▃▃▂▂▂▁▁▁
validation loss,█▆▅▄▄▃▃▁▄▂
validation_accuracy,▁▃▄▅▄▆▆█▄▇

0,1
epoch,10.0
test_accuracy,0.8715
training_loss,0.37045
validation loss,0.33232
validation_accuracy,0.87567


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 0b5utx5z with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/10 - loss=0.6737, val_acc=0.8223, val_loss=0.48559231665898367
Epoch 2/10 - loss=0.4411, val_acc=0.8502, val_loss=0.41925963044517783
Epoch 3/10 - loss=0.3988, val_acc=0.8465, val_loss=0.4226792832611386
Epoch 4/10 - loss=0.3681, val_acc=0.8520, val_loss=0.3963816133732048
Epoch 5/10 - loss=0.3486, val_acc=0.8673, val_loss=0.3659821143344434
Epoch 6/10 - loss=0.3301, val_acc=0.8715, val_loss=0.3490507531717481
Epoch 7/10 - loss=0.3183, val_acc=0.8710, val_loss=0.3460891244762915
Epoch 8/10 - loss=0.3054, val_acc=0.8773, val_loss=0.3310111943590008
Epoch 9/10 - loss=0.2955, val_acc=0.8773, val_loss=0.32589956186916946
Epoch 10/10 - loss=0.2856, val_acc=0.8743, val_loss=0.34665342560690854
test accuracy  0.8704


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▄▃▂▂▂▂▁▁▁
validation loss,█▅▅▄▃▂▂▁▁▂
validation_accuracy,▁▅▄▅▇▇▇███

0,1
epoch,10.0
test_accuracy,0.8704
training_loss,0.28557
validation loss,0.34665
validation_accuracy,0.87433


[34m[1mwandb[0m: Agent Starting Run: w3cxdidf with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 1/10 - loss=0.6399, val_acc=0.8487, val_loss=0.4123830119927123
Epoch 2/10 - loss=0.4926, val_acc=0.8583, val_loss=0.37024365133418013
Epoch 3/10 - loss=0.4544, val_acc=0.8647, val_loss=0.36080359987802363
Epoch 4/10 - loss=0.4301, val_acc=0.8638, val_loss=0.3715538145935752
Epoch 5/10 - loss=0.4133, val_acc=0.8657, val_loss=0.3561157283839861
Epoch 6/10 - loss=0.4045, val_acc=0.8767, val_loss=0.33827443957330044
Epoch 7/10 - loss=0.3943, val_acc=0.8743, val_loss=0.3476176120355813
Epoch 8/10 - loss=0.3882, val_acc=0.8825, val_loss=0.31375107992157025
Epoch 9/10 - loss=0.3788, val_acc=0.8613, val_loss=0.3705572316041471
Epoch 10/10 - loss=0.3735, val_acc=0.8663, val_loss=0.3489561331135637
test accuracy  0.864


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▄▃▂▂▂▂▁▁▁
validation loss,█▅▄▅▄▃▃▁▅▃
validation_accuracy,▁▃▄▄▅▇▆█▄▅

0,1
epoch,10.0
test_accuracy,0.864
training_loss,0.3735
validation loss,0.34896
validation_accuracy,0.86633


[34m[1mwandb[0m: Agent Starting Run: 1sn7ghc1 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	momentum: 0.8
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/10 - loss=2.1015, val_acc=0.2007, val_loss=1.7670488353515708
Epoch 2/10 - loss=1.6984, val_acc=0.3028, val_loss=1.6368958653216423
Epoch 3/10 - loss=1.5422, val_acc=0.2977, val_loss=1.4882700951189138
Epoch 4/10 - loss=1.3674, val_acc=0.4512, val_loss=1.263775485810506
Epoch 5/10 - loss=1.1914, val_acc=0.4780, val_loss=1.15795061820963
Epoch 6/10 - loss=1.1089, val_acc=0.5228, val_loss=1.0965540512069953
Epoch 7/10 - loss=1.0535, val_acc=0.5512, val_loss=1.0415659236055557
Epoch 8/10 - loss=0.9840, val_acc=0.6092, val_loss=0.9623903162702936
Epoch 9/10 - loss=0.9050, val_acc=0.6307, val_loss=0.8860030819699919
Epoch 10/10 - loss=0.8355, val_acc=0.7022, val_loss=0.8246311213461012
test accuracy  0.7055


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▆▅▄▃▃▂▂▁▁
validation loss,█▇▆▄▃▃▃▂▁▁
validation_accuracy,▁▂▂▄▅▅▆▇▇█

0,1
epoch,10.0
test_accuracy,0.7055
training_loss,0.83548
validation loss,0.82463
validation_accuracy,0.70217


[34m[1mwandb[0m: Agent Starting Run: 4cges4ly with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	momentum: 0.8
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/10 - loss=0.4921, val_acc=0.8570, val_loss=0.39998971849132553
Epoch 2/10 - loss=0.3764, val_acc=0.8662, val_loss=0.3698643931361812
Epoch 3/10 - loss=0.3472, val_acc=0.8733, val_loss=0.34554305311377587
Epoch 4/10 - loss=0.3275, val_acc=0.8762, val_loss=0.34778072708607755
Epoch 5/10 - loss=0.3114, val_acc=0.8757, val_loss=0.33327091570391
Epoch 6/10 - loss=0.2981, val_acc=0.8713, val_loss=0.33901001314123025
Epoch 7/10 - loss=0.2887, val_acc=0.8752, val_loss=0.3511542339633185
Epoch 8/10 - loss=0.2774, val_acc=0.8823, val_loss=0.32339280918785507
Epoch 9/10 - loss=0.2692, val_acc=0.8765, val_loss=0.34770759088400005
Epoch 10/10 - loss=0.2622, val_acc=0.8837, val_loss=0.3203979338730821
test accuracy  0.8786


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▄▄▃▂▂▂▁▁▁
validation loss,█▅▃▃▂▃▄▁▃▁
validation_accuracy,▁▃▅▆▆▅▆█▆█

0,1
epoch,10.0
test_accuracy,0.8786
training_loss,0.2622
validation loss,0.3204
validation_accuracy,0.88367


[34m[1mwandb[0m: Agent Starting Run: e4l98zx9 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 1/10 - loss=0.6533, val_acc=0.8432, val_loss=0.4100978579424642
Epoch 2/10 - loss=0.5049, val_acc=0.8552, val_loss=0.3837048360459039
Epoch 3/10 - loss=0.4669, val_acc=0.8588, val_loss=0.3887303408728992
Epoch 4/10 - loss=0.4505, val_acc=0.8698, val_loss=0.37679797746368376
Epoch 5/10 - loss=0.4375, val_acc=0.8665, val_loss=0.3646840452380502
Epoch 6/10 - loss=0.4287, val_acc=0.8707, val_loss=0.35680121775162515
Epoch 7/10 - loss=0.4246, val_acc=0.8640, val_loss=0.37250448353658855
Epoch 8/10 - loss=0.4206, val_acc=0.8688, val_loss=0.3631785599106644
Epoch 9/10 - loss=0.4165, val_acc=0.8647, val_loss=0.35783766501098124
Epoch 10/10 - loss=0.4143, val_acc=0.8647, val_loss=0.3553409552048066
test accuracy  0.8648


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▄▃▂▂▁▁▁▁▁
validation loss,█▅▅▄▂▁▃▂▁▁
validation_accuracy,▁▄▅█▇█▆█▆▆

0,1
epoch,10.0
test_accuracy,0.8648
training_loss,0.41432
validation loss,0.35534
validation_accuracy,0.86467


[34m[1mwandb[0m: Agent Starting Run: j2kot6ea with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: nesterov
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 1/10 - loss=0.7703, val_acc=0.8205, val_loss=0.4969668847799174
Epoch 2/10 - loss=0.5661, val_acc=0.8425, val_loss=0.43868046047263853
Epoch 3/10 - loss=0.5241, val_acc=0.8473, val_loss=0.4184906329044926
Epoch 4/10 - loss=0.4998, val_acc=0.8592, val_loss=0.39182365113940903
Epoch 5/10 - loss=0.4816, val_acc=0.8618, val_loss=0.37845373042665625
Epoch 6/10 - loss=0.4684, val_acc=0.8620, val_loss=0.3745570488905528
Epoch 7/10 - loss=0.4566, val_acc=0.8640, val_loss=0.3723803044395995
Epoch 8/10 - loss=0.4462, val_acc=0.8687, val_loss=0.35822441100415564
Epoch 9/10 - loss=0.4372, val_acc=0.8703, val_loss=0.3542105182150955
Epoch 10/10 - loss=0.4303, val_acc=0.8753, val_loss=0.3434319171901751
test accuracy  0.8671


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▄▃▂▂▂▂▁▁▁
validation loss,█▅▄▃▃▂▂▂▁▁
validation_accuracy,▁▄▄▆▆▆▇▇▇█

0,1
epoch,10.0
test_accuracy,0.8671
training_loss,0.43034
validation loss,0.34343
validation_accuracy,0.87533


[34m[1mwandb[0m: Agent Starting Run: zh6tvgbn with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 1/10 - loss=0.8304, val_acc=0.8158, val_loss=0.4973247831710082
Epoch 2/10 - loss=0.6411, val_acc=0.8432, val_loss=0.42764569657373736
Epoch 3/10 - loss=0.6018, val_acc=0.8477, val_loss=0.4256114462243451
Epoch 4/10 - loss=0.5784, val_acc=0.8595, val_loss=0.3872299444191971
Epoch 5/10 - loss=0.5605, val_acc=0.8577, val_loss=0.3804058116218331
Epoch 6/10 - loss=0.5477, val_acc=0.8638, val_loss=0.3716560459161567
Epoch 7/10 - loss=0.5357, val_acc=0.8683, val_loss=0.3599044966697169
Epoch 8/10 - loss=0.5250, val_acc=0.8715, val_loss=0.3467886101811283
Epoch 9/10 - loss=0.5149, val_acc=0.8713, val_loss=0.34398438970468687
Epoch 10/10 - loss=0.5060, val_acc=0.8722, val_loss=0.3422544492370441
test accuracy  0.8668


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▄▃▃▂▂▂▁▁▁
validation loss,█▅▅▃▃▂▂▁▁▁
validation_accuracy,▁▄▅▆▆▇████

0,1
epoch,10.0
test_accuracy,0.8668
training_loss,0.50598
validation loss,0.34225
validation_accuracy,0.87217


[34m[1mwandb[0m: Agent Starting Run: ubw8raed with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/10 - loss=0.6682, val_acc=0.8212, val_loss=0.4960866594166533
Epoch 2/10 - loss=0.4613, val_acc=0.8437, val_loss=0.43831543837941583
Epoch 3/10 - loss=0.4184, val_acc=0.8458, val_loss=0.41931714840191103
Epoch 4/10 - loss=0.3938, val_acc=0.8570, val_loss=0.39253523318361644
Epoch 5/10 - loss=0.3747, val_acc=0.8622, val_loss=0.37704187077881884
Epoch 6/10 - loss=0.3616, val_acc=0.8610, val_loss=0.37511828601601077
Epoch 7/10 - loss=0.3498, val_acc=0.8590, val_loss=0.3763203924808063
Epoch 8/10 - loss=0.3389, val_acc=0.8700, val_loss=0.35616856559083326
Epoch 9/10 - loss=0.3295, val_acc=0.8705, val_loss=0.35277462155179856
Epoch 10/10 - loss=0.3230, val_acc=0.8763, val_loss=0.33946002539413217
test accuracy  0.8677


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▄▃▂▂▂▂▁▁▁
validation loss,█▅▅▃▃▃▃▂▂▁
validation_accuracy,▁▄▄▆▆▆▆▇▇█

0,1
epoch,10.0
test_accuracy,0.8677
training_loss,0.323
validation loss,0.33946
validation_accuracy,0.87633


[34m[1mwandb[0m: Agent Starting Run: lk28mi7p with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	momentum: 0.8
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: nesterov
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/10 - loss=2.2927, val_acc=0.1847, val_loss=2.2697803101214715
Epoch 2/10 - loss=2.2388, val_acc=0.3400, val_loss=2.19671733497147
Epoch 3/10 - loss=2.1099, val_acc=0.3598, val_loss=1.991062382668213
Epoch 4/10 - loss=1.8336, val_acc=0.4712, val_loss=1.6917843179375007
Epoch 5/10 - loss=1.5839, val_acc=0.5375, val_loss=1.49029122804662
Epoch 6/10 - loss=1.4069, val_acc=0.5632, val_loss=1.3354155537182975
Epoch 7/10 - loss=1.2682, val_acc=0.5762, val_loss=1.2170031152095766
Epoch 8/10 - loss=1.1632, val_acc=0.6142, val_loss=1.1290919318393853
Epoch 9/10 - loss=1.0842, val_acc=0.6235, val_loss=1.06244360454499
Epoch 10/10 - loss=1.0222, val_acc=0.6447, val_loss=1.0066061878824133
test accuracy  0.6467


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,██▇▅▄▃▂▂▁▁
validation loss,██▆▅▄▃▂▂▁▁
validation_accuracy,▁▃▄▅▆▇▇███

0,1
epoch,10.0
test_accuracy,0.6467
training_loss,1.02222
validation loss,1.00661
validation_accuracy,0.64467


[34m[1mwandb[0m: Agent Starting Run: hwu1pzhm with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	momentum: 0.8
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 1/10 - loss=0.9302, val_acc=0.8215, val_loss=0.5098234135723082
Epoch 2/10 - loss=0.6400, val_acc=0.8347, val_loss=0.46508586410603797
Epoch 3/10 - loss=0.6033, val_acc=0.8372, val_loss=0.4712971269031375
Epoch 4/10 - loss=0.5867, val_acc=0.8437, val_loss=0.43089466061191
Epoch 5/10 - loss=0.5753, val_acc=0.8468, val_loss=0.4321568001683751
Epoch 6/10 - loss=0.5692, val_acc=0.8517, val_loss=0.40872833177722245
Epoch 7/10 - loss=0.5635, val_acc=0.8557, val_loss=0.39754124174319205
Epoch 8/10 - loss=0.5603, val_acc=0.8443, val_loss=0.4229084204826042
Epoch 9/10 - loss=0.5568, val_acc=0.8445, val_loss=0.41501195994078005
Epoch 10/10 - loss=0.5505, val_acc=0.8510, val_loss=0.4094798612783348
test accuracy  0.845


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▃▂▂▁▁▁▁▁▁
validation loss,█▅▆▃▃▂▁▃▂▂
validation_accuracy,▁▄▄▆▆▇█▆▆▇

0,1
epoch,10.0
test_accuracy,0.845
training_loss,0.55053
validation loss,0.40948
validation_accuracy,0.851


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: rwkticvy with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: nesterov
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/10 - loss=1.9528, val_acc=0.6222, val_loss=1.488868675506611
Epoch 2/10 - loss=1.1666, val_acc=0.6722, val_loss=0.9830985267945842
Epoch 3/10 - loss=0.8777, val_acc=0.7210, val_loss=0.8206761982659
Epoch 4/10 - loss=0.7572, val_acc=0.7480, val_loss=0.7337079165640624
Epoch 5/10 - loss=0.6847, val_acc=0.7715, val_loss=0.6750219800466328
Epoch 6/10 - loss=0.6344, val_acc=0.7873, val_loss=0.6317206085926708
Epoch 7/10 - loss=0.5955, val_acc=0.7930, val_loss=0.6009636843303932
Epoch 8/10 - loss=0.5665, val_acc=0.8027, val_loss=0.5715748720118036
Epoch 9/10 - loss=0.5437, val_acc=0.8087, val_loss=0.5534889757086299
Epoch 10/10 - loss=0.5257, val_acc=0.8100, val_loss=0.5404203970714132
test accuracy  0.8052


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▄▃▂▂▂▁▁▁▁
validation loss,█▄▃▂▂▂▁▁▁▁
validation_accuracy,▁▃▅▆▇▇▇███

0,1
epoch,10.0
test_accuracy,0.8052
training_loss,0.52572
validation loss,0.54042
validation_accuracy,0.81


[34m[1mwandb[0m: Agent Starting Run: mw154kvl with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	momentum: 0.8
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: nesterov
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 1/10 - loss=2.3977, val_acc=0.2528, val_loss=2.1603318190713976
Epoch 2/10 - loss=2.1725, val_acc=0.4117, val_loss=1.8070782739268392
Epoch 3/10 - loss=1.6766, val_acc=0.6045, val_loss=1.2486997124259562
Epoch 4/10 - loss=1.2458, val_acc=0.6595, val_loss=0.977791189649625
Epoch 5/10 - loss=1.0678, val_acc=0.6853, val_loss=0.8665776461426346
Epoch 6/10 - loss=0.9836, val_acc=0.7085, val_loss=0.802746884763704
Epoch 7/10 - loss=0.9293, val_acc=0.7335, val_loss=0.7558545131122538
Epoch 8/10 - loss=0.8870, val_acc=0.7432, val_loss=0.7213722631332666
Epoch 9/10 - loss=0.8511, val_acc=0.7563, val_loss=0.6870627503224388
Epoch 10/10 - loss=0.8200, val_acc=0.7692, val_loss=0.6577034362382999
test accuracy  0.7628


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▇▅▃▂▂▁▁▁▁
validation loss,█▆▄▂▂▂▁▁▁▁
validation_accuracy,▁▃▆▇▇▇████

0,1
epoch,10.0
test_accuracy,0.7628
training_loss,0.81997
validation loss,0.6577
validation_accuracy,0.76917


[34m[1mwandb[0m: Agent Starting Run: 0hpmvch2 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 1/10 - loss=2.4132, val_acc=0.1757, val_loss=2.292149410728407
Epoch 2/10 - loss=2.3905, val_acc=0.2850, val_loss=2.286120843243217
Epoch 3/10 - loss=2.3842, val_acc=0.2870, val_loss=2.2802201192228773
Epoch 4/10 - loss=2.3777, val_acc=0.4023, val_loss=2.2732283146744727
Epoch 5/10 - loss=2.3706, val_acc=0.4557, val_loss=2.265754305995266
Epoch 6/10 - loss=2.3627, val_acc=0.4703, val_loss=2.2576561472868706
Epoch 7/10 - loss=2.3540, val_acc=0.4970, val_loss=2.248397168271816
Epoch 8/10 - loss=2.3440, val_acc=0.4175, val_loss=2.2378499398579175
Epoch 9/10 - loss=2.3327, val_acc=0.4443, val_loss=2.225407147370007
Epoch 10/10 - loss=2.3195, val_acc=0.4218, val_loss=2.2110929580550467
test accuracy  0.4219


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▆▆▅▅▄▄▃▂▁
validation loss,█▇▇▆▆▅▄▃▂▁
validation_accuracy,▁▃▃▆▇▇█▆▇▆

0,1
epoch,10.0
test_accuracy,0.4219
training_loss,2.31947
validation loss,2.21109
validation_accuracy,0.42183


[34m[1mwandb[0m: Agent Starting Run: 0tnrspx5 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	momentum: 0.8
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 1/10 - loss=1.4579, val_acc=0.6627, val_loss=0.9369514286009158
Epoch 2/10 - loss=0.9496, val_acc=0.7585, val_loss=0.67948511449532
Epoch 3/10 - loss=0.7873, val_acc=0.7873, val_loss=0.5997603391570766
Epoch 4/10 - loss=0.7238, val_acc=0.8253, val_loss=0.5241486185181857
Epoch 5/10 - loss=0.6889, val_acc=0.8373, val_loss=0.4897224216541715
Epoch 6/10 - loss=0.6691, val_acc=0.8350, val_loss=0.4872276023892784
Epoch 7/10 - loss=0.6556, val_acc=0.8517, val_loss=0.4575857715757526
Epoch 8/10 - loss=0.6472, val_acc=0.8503, val_loss=0.4540401677982447
Epoch 9/10 - loss=0.6405, val_acc=0.8375, val_loss=0.47461275210622883
Epoch 10/10 - loss=0.6339, val_acc=0.8355, val_loss=0.4819571799530768
test accuracy  0.8302


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▄▂▂▁▁▁▁▁▁
validation loss,█▄▃▂▂▁▁▁▁▁
validation_accuracy,▁▅▆▇▇▇██▇▇

0,1
epoch,10.0
test_accuracy,0.8302
training_loss,0.63386
validation loss,0.48196
validation_accuracy,0.8355


[34m[1mwandb[0m: Agent Starting Run: 4ez3ar7f with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	momentum: 0.8
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 1/10 - loss=0.6059, val_acc=0.8202, val_loss=0.46856034482426945
Epoch 2/10 - loss=0.4904, val_acc=0.8490, val_loss=0.39712311831084585
Epoch 3/10 - loss=0.4570, val_acc=0.8538, val_loss=0.3772954055090674
Epoch 4/10 - loss=0.4381, val_acc=0.8657, val_loss=0.37066047957976517
Epoch 5/10 - loss=0.4283, val_acc=0.8633, val_loss=0.3557609029300658
Epoch 6/10 - loss=0.4180, val_acc=0.8682, val_loss=0.36248216991948873
Epoch 7/10 - loss=0.4105, val_acc=0.8705, val_loss=0.35220066138708195
Epoch 8/10 - loss=0.4067, val_acc=0.8787, val_loss=0.32690916101117146
Epoch 9/10 - loss=0.4007, val_acc=0.8737, val_loss=0.3457578409822827
Epoch 10/10 - loss=0.3983, val_acc=0.8662, val_loss=0.3574558788491632
test accuracy  0.8586


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▄▃▂▂▂▁▁▁▁
validation loss,█▄▃▃▂▃▂▁▂▃
validation_accuracy,▁▄▅▆▆▇▇█▇▇

0,1
epoch,10.0
test_accuracy,0.8586
training_loss,0.39834
validation loss,0.35746
validation_accuracy,0.86617


[34m[1mwandb[0m: Agent Starting Run: 422v120e with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/10 - loss=0.5113, val_acc=0.8412, val_loss=0.41982872871786214
Epoch 2/10 - loss=0.3832, val_acc=0.8603, val_loss=0.36874738621796044
Epoch 3/10 - loss=0.3498, val_acc=0.8723, val_loss=0.34249231391040164
Epoch 4/10 - loss=0.3285, val_acc=0.8763, val_loss=0.3385119755557759
Epoch 5/10 - loss=0.3110, val_acc=0.8725, val_loss=0.33579001090145655
Epoch 6/10 - loss=0.2960, val_acc=0.8805, val_loss=0.3363594574545956
Epoch 7/10 - loss=0.2855, val_acc=0.8693, val_loss=0.34704784502669955
Epoch 8/10 - loss=0.2766, val_acc=0.8802, val_loss=0.33028511010534717
Epoch 9/10 - loss=0.2664, val_acc=0.8827, val_loss=0.32014132678765084
Epoch 10/10 - loss=0.2577, val_acc=0.8857, val_loss=0.3229742656491021
test accuracy  0.8772


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▄▄▃▂▂▂▂▁▁
validation loss,█▄▃▂▂▂▃▂▁▁
validation_accuracy,▁▄▆▇▆▇▅▇██

0,1
epoch,10.0
test_accuracy,0.8772
training_loss,0.25773
validation loss,0.32297
validation_accuracy,0.88567


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: s5t2yftj with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/10 - loss=0.8178, val_acc=0.8122, val_loss=0.5338731306545785
Epoch 2/10 - loss=0.4824, val_acc=0.8323, val_loss=0.4831700890906673
Epoch 3/10 - loss=0.4328, val_acc=0.8495, val_loss=0.4299099605913498
Epoch 4/10 - loss=0.4060, val_acc=0.8517, val_loss=0.42827443874788534
Epoch 5/10 - loss=0.3884, val_acc=0.8535, val_loss=0.4091237333559002
Epoch 6/10 - loss=0.3744, val_acc=0.8630, val_loss=0.3874460564221149
Epoch 7/10 - loss=0.3613, val_acc=0.8638, val_loss=0.38347928528032266
Epoch 8/10 - loss=0.3497, val_acc=0.8708, val_loss=0.3695867222647378
Epoch 9/10 - loss=0.3401, val_acc=0.8695, val_loss=0.3695824459511611
Epoch 10/10 - loss=0.3318, val_acc=0.8605, val_loss=0.38947761407042736
test accuracy  0.8551


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▃▂▂▂▂▁▁▁▁
validation loss,█▆▄▄▃▂▂▁▁▂
validation_accuracy,▁▃▅▆▆▇▇██▇

0,1
epoch,10.0
test_accuracy,0.8551
training_loss,0.33176
validation loss,0.38948
validation_accuracy,0.8605


[34m[1mwandb[0m: Agent Starting Run: d3f1g0pu with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/10 - loss=0.6039, val_acc=0.8398, val_loss=0.44165719359859423
Epoch 2/10 - loss=0.4103, val_acc=0.8540, val_loss=0.40119687676580285
Epoch 3/10 - loss=0.3773, val_acc=0.8488, val_loss=0.40332472783758705
Epoch 4/10 - loss=0.3552, val_acc=0.8608, val_loss=0.3771265040326976
Epoch 5/10 - loss=0.3395, val_acc=0.8688, val_loss=0.3559319649234633
Epoch 6/10 - loss=0.3259, val_acc=0.8703, val_loss=0.35329648509813627
Epoch 7/10 - loss=0.3158, val_acc=0.8748, val_loss=0.33828014442065574
Epoch 8/10 - loss=0.3048, val_acc=0.8767, val_loss=0.3381156299595528
Epoch 9/10 - loss=0.2976, val_acc=0.8767, val_loss=0.32811203045945986
Epoch 10/10 - loss=0.2882, val_acc=0.8775, val_loss=0.3341833735132362
test accuracy  0.8725


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▄▃▂▂▂▂▁▁▁
validation loss,█▆▆▄▃▃▂▂▁▁
validation_accuracy,▁▄▃▅▆▇████

0,1
epoch,10.0
test_accuracy,0.8725
training_loss,0.28818
validation loss,0.33418
validation_accuracy,0.8775


[34m[1mwandb[0m: Agent Starting Run: zl0r21t6 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 1/10 - loss=1.5418, val_acc=0.7082, val_loss=0.9083673194699204
Epoch 2/10 - loss=0.9731, val_acc=0.7632, val_loss=0.7150302960299796
Epoch 3/10 - loss=0.8462, val_acc=0.7912, val_loss=0.630167919302444
Epoch 4/10 - loss=0.7828, val_acc=0.7982, val_loss=0.5822023480795748
Epoch 5/10 - loss=0.7438, val_acc=0.8093, val_loss=0.5499991572213754
Epoch 6/10 - loss=0.7184, val_acc=0.8165, val_loss=0.5278828967397781
Epoch 7/10 - loss=0.6984, val_acc=0.8183, val_loss=0.5136060307705561
Epoch 8/10 - loss=0.6836, val_acc=0.8242, val_loss=0.49521414527778596
Epoch 9/10 - loss=0.6710, val_acc=0.8273, val_loss=0.48499843043737173
Epoch 10/10 - loss=0.6603, val_acc=0.8282, val_loss=0.47960992024213117
test accuracy  0.8227


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▃▂▂▂▁▁▁▁▁
validation loss,█▅▃▃▂▂▂▁▁▁
validation_accuracy,▁▄▆▆▇▇▇███

0,1
epoch,10.0
test_accuracy,0.8227
training_loss,0.66029
validation loss,0.47961
validation_accuracy,0.82817


[34m[1mwandb[0m: Agent Starting Run: y65o26dg with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 1/10 - loss=1.3864, val_acc=0.7610, val_loss=0.7405439047619236
Epoch 2/10 - loss=0.9716, val_acc=0.7917, val_loss=0.6036345659801708
Epoch 3/10 - loss=0.8813, val_acc=0.8070, val_loss=0.5442857589636527
Epoch 4/10 - loss=0.8364, val_acc=0.8185, val_loss=0.5101903295101275
Epoch 5/10 - loss=0.8086, val_acc=0.8243, val_loss=0.4873449338939016
Epoch 6/10 - loss=0.7893, val_acc=0.8303, val_loss=0.47183523228664276
Epoch 7/10 - loss=0.7742, val_acc=0.8335, val_loss=0.4581863205370861
Epoch 8/10 - loss=0.7615, val_acc=0.8330, val_loss=0.45144673575954175
Epoch 9/10 - loss=0.7509, val_acc=0.8400, val_loss=0.43847154307107805
Epoch 10/10 - loss=0.7415, val_acc=0.8400, val_loss=0.4320215667337944
test accuracy  0.836


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▃▃▂▂▂▁▁▁▁
validation loss,█▅▄▃▂▂▂▁▁▁
validation_accuracy,▁▄▅▆▇▇▇▇██

0,1
epoch,10.0
test_accuracy,0.836
training_loss,0.74147
validation loss,0.43202
validation_accuracy,0.84


[34m[1mwandb[0m: Agent Starting Run: 7d7mzncm with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 1/10 - loss=0.8912, val_acc=0.8155, val_loss=0.5370060167019673
Epoch 2/10 - loss=0.6261, val_acc=0.8408, val_loss=0.4423304961944872
Epoch 3/10 - loss=0.5780, val_acc=0.8527, val_loss=0.4013329948465592
Epoch 4/10 - loss=0.5470, val_acc=0.8543, val_loss=0.39596201321794033
Epoch 5/10 - loss=0.5273, val_acc=0.8548, val_loss=0.3934490957632786
Epoch 6/10 - loss=0.5121, val_acc=0.8645, val_loss=0.36579270098783007
Epoch 7/10 - loss=0.4988, val_acc=0.8712, val_loss=0.354828508242239
Epoch 8/10 - loss=0.4863, val_acc=0.8695, val_loss=0.3484076130929045
Epoch 9/10 - loss=0.4762, val_acc=0.8653, val_loss=0.36292224165262604
Epoch 10/10 - loss=0.4675, val_acc=0.8637, val_loss=0.3719448048511276
test accuracy  0.8574


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▄▃▂▂▂▂▁▁▁
validation loss,█▄▃▃▃▂▁▁▂▂
validation_accuracy,▁▄▆▆▆▇██▇▇

0,1
epoch,10.0
test_accuracy,0.8574
training_loss,0.46745
validation loss,0.37194
validation_accuracy,0.86367


[34m[1mwandb[0m: Agent Starting Run: h4rpwv7w with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/10 - loss=0.7712, val_acc=0.8198, val_loss=0.5171308345358533
Epoch 2/10 - loss=0.4646, val_acc=0.8410, val_loss=0.44009706315958724
Epoch 3/10 - loss=0.4146, val_acc=0.8460, val_loss=0.4202219052602324
Epoch 4/10 - loss=0.3888, val_acc=0.8538, val_loss=0.3892305150530232
Epoch 5/10 - loss=0.3705, val_acc=0.8620, val_loss=0.3758533494086125
Epoch 6/10 - loss=0.3571, val_acc=0.8620, val_loss=0.37228696936694033
Epoch 7/10 - loss=0.3455, val_acc=0.8573, val_loss=0.37791668092532077
Epoch 8/10 - loss=0.3353, val_acc=0.8677, val_loss=0.3507127796166509
Epoch 9/10 - loss=0.3266, val_acc=0.8693, val_loss=0.34680266676434834
Epoch 10/10 - loss=0.3200, val_acc=0.8737, val_loss=0.3397596092706626
test accuracy  0.8641


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▃▂▂▂▂▁▁▁▁
validation loss,█▅▄▃▂▂▃▁▁▁
validation_accuracy,▁▄▄▅▆▆▆▇▇█

0,1
epoch,10.0
test_accuracy,0.8641
training_loss,0.32
validation loss,0.33976
validation_accuracy,0.87367


[34m[1mwandb[0m: Agent Starting Run: 392masqk with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	momentum: 0.8
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 1/10 - loss=0.5765, val_acc=0.8562, val_loss=0.40175693714284505
Epoch 2/10 - loss=0.4751, val_acc=0.8635, val_loss=0.3650754929062156
Epoch 3/10 - loss=0.4473, val_acc=0.8607, val_loss=0.37838958244247844
Epoch 4/10 - loss=0.4329, val_acc=0.8615, val_loss=0.36785748725000544
Epoch 5/10 - loss=0.4201, val_acc=0.8695, val_loss=0.34472972395099244
Epoch 6/10 - loss=0.4134, val_acc=0.8755, val_loss=0.34262180058984426
Epoch 7/10 - loss=0.4057, val_acc=0.8735, val_loss=0.34716862183748753
Epoch 8/10 - loss=0.4011, val_acc=0.8743, val_loss=0.33083629780418333
Epoch 9/10 - loss=0.3948, val_acc=0.8667, val_loss=0.3442749568684081
Epoch 10/10 - loss=0.3888, val_acc=0.8750, val_loss=0.3253588600057805
test accuracy  0.8677


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▄▃▃▂▂▂▁▁▁
validation loss,█▅▆▅▃▃▃▂▃▁
validation_accuracy,▁▄▃▃▆█▇█▅█

0,1
epoch,10.0
test_accuracy,0.8677
training_loss,0.38879
validation loss,0.32536
validation_accuracy,0.875


[34m[1mwandb[0m: Agent Starting Run: dfvor87c with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 1/10 - loss=1.4457, val_acc=0.6115, val_loss=0.9680858043342784
Epoch 2/10 - loss=1.0305, val_acc=0.6910, val_loss=0.7836149356792321
Epoch 3/10 - loss=0.9371, val_acc=0.7320, val_loss=0.7064388743272961
Epoch 4/10 - loss=0.8550, val_acc=0.7847, val_loss=0.6165698469537587
Epoch 5/10 - loss=0.8063, val_acc=0.8115, val_loss=0.5630100467141814
Epoch 6/10 - loss=0.7562, val_acc=0.8205, val_loss=0.5271188858585153
Epoch 7/10 - loss=0.7264, val_acc=0.7738, val_loss=0.6274032151446501
Epoch 8/10 - loss=0.7070, val_acc=0.8163, val_loss=0.5300744665227476
Epoch 9/10 - loss=0.6966, val_acc=0.8232, val_loss=0.5119432400169666
Epoch 10/10 - loss=0.6855, val_acc=0.8380, val_loss=0.4923950961128453
test accuracy  0.832


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▄▃▃▂▂▁▁▁▁
validation loss,█▅▄▃▂▂▃▂▁▁
validation_accuracy,▁▃▅▆▇▇▆▇██

0,1
epoch,10.0
test_accuracy,0.832
training_loss,0.68548
validation loss,0.4924
validation_accuracy,0.838


[34m[1mwandb[0m: Agent Starting Run: q47hxul1 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	momentum: 0.8
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 1/10 - loss=0.7502, val_acc=0.8257, val_loss=0.47267723600383843
Epoch 2/10 - loss=0.5280, val_acc=0.8523, val_loss=0.4077927779953439
Epoch 3/10 - loss=0.4904, val_acc=0.8535, val_loss=0.4104962535308235
Epoch 4/10 - loss=0.4689, val_acc=0.8652, val_loss=0.3726118898597833
Epoch 5/10 - loss=0.4520, val_acc=0.8662, val_loss=0.3696481652330731
Epoch 6/10 - loss=0.4407, val_acc=0.8688, val_loss=0.35829447093680816
Epoch 7/10 - loss=0.4289, val_acc=0.8718, val_loss=0.3471979877578244
Epoch 8/10 - loss=0.4198, val_acc=0.8717, val_loss=0.33783327844957817
Epoch 9/10 - loss=0.4109, val_acc=0.8740, val_loss=0.3388514714496703
Epoch 10/10 - loss=0.4031, val_acc=0.8770, val_loss=0.3337571130856006
test accuracy  0.869


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▄▃▂▂▂▂▁▁▁
validation loss,█▅▅▃▃▂▂▁▁▁
validation_accuracy,▁▅▅▆▇▇▇▇██

0,1
epoch,10.0
test_accuracy,0.869
training_loss,0.40306
validation loss,0.33376
validation_accuracy,0.877


[34m[1mwandb[0m: Agent Starting Run: 4rqf3945 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/10 - loss=0.7232, val_acc=0.8163, val_loss=0.539244644087698
Epoch 2/10 - loss=0.4571, val_acc=0.8417, val_loss=0.4441377625811625
Epoch 3/10 - loss=0.4111, val_acc=0.8563, val_loss=0.39541793171031303
Epoch 4/10 - loss=0.3801, val_acc=0.8598, val_loss=0.39310118459805216
Epoch 5/10 - loss=0.3613, val_acc=0.8572, val_loss=0.38958998426149205
Epoch 6/10 - loss=0.3483, val_acc=0.8605, val_loss=0.36870509780872884
Epoch 7/10 - loss=0.3365, val_acc=0.8680, val_loss=0.35817251828116925
Epoch 8/10 - loss=0.3260, val_acc=0.8695, val_loss=0.350512843616856
Epoch 9/10 - loss=0.3165, val_acc=0.8652, val_loss=0.3628224929748065
Epoch 10/10 - loss=0.3096, val_acc=0.8633, val_loss=0.3736361891312806
test accuracy  0.8581


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▃▃▂▂▂▁▁▁▁
validation loss,█▄▃▃▂▂▁▁▁▂
validation_accuracy,▁▄▆▇▆▇██▇▇

0,1
epoch,10.0
test_accuracy,0.8581
training_loss,0.30956
validation loss,0.37364
validation_accuracy,0.86333


[34m[1mwandb[0m: Agent Starting Run: wjc1m78s with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	momentum: 0.8
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/10 - loss=2.3289, val_acc=0.1718, val_loss=2.298317407134947
Epoch 2/10 - loss=2.2945, val_acc=0.2562, val_loss=2.2920485397826034
Epoch 3/10 - loss=2.2907, val_acc=0.2582, val_loss=2.289120648304031
Epoch 4/10 - loss=2.2878, val_acc=0.3545, val_loss=2.2860192358710876
Epoch 5/10 - loss=2.2847, val_acc=0.3892, val_loss=2.2828491278939707
Epoch 6/10 - loss=2.2814, val_acc=0.4128, val_loss=2.279637115172056
Epoch 7/10 - loss=2.2781, val_acc=0.4590, val_loss=2.2762568813699176
Epoch 8/10 - loss=2.2747, val_acc=0.4220, val_loss=2.272730350909881
Epoch 9/10 - loss=2.2710, val_acc=0.4487, val_loss=2.2689598629545573
Epoch 10/10 - loss=2.2672, val_acc=0.4407, val_loss=2.2650471852557716
test accuracy  0.4452


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▄▄▃▃▃▂▂▁▁
validation loss,█▇▆▅▅▄▃▃▂▁
validation_accuracy,▁▃▃▅▆▇█▇██

0,1
epoch,10.0
test_accuracy,0.4452
training_loss,2.26723
validation loss,2.26505
validation_accuracy,0.44067


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: oybjnqjv with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 1/10 - loss=1.9692, val_acc=0.5195, val_loss=1.4988916948701845
Epoch 2/10 - loss=1.3603, val_acc=0.5777, val_loss=1.1222516229596131
Epoch 3/10 - loss=1.1025, val_acc=0.6978, val_loss=0.9266656614545628
Epoch 4/10 - loss=0.9542, val_acc=0.7333, val_loss=0.7975915833070955
Epoch 5/10 - loss=0.8533, val_acc=0.7653, val_loss=0.699939271084455
Epoch 6/10 - loss=0.7803, val_acc=0.7805, val_loss=0.634146781446896
Epoch 7/10 - loss=0.7314, val_acc=0.7977, val_loss=0.5878866689984001
Epoch 8/10 - loss=0.6951, val_acc=0.8090, val_loss=0.5551336668978317
Epoch 9/10 - loss=0.6690, val_acc=0.8212, val_loss=0.5303576719751294
Epoch 10/10 - loss=0.6492, val_acc=0.8228, val_loss=0.5133687750799193
test accuracy  0.8158


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▅▃▃▂▂▁▁▁▁
validation loss,█▅▄▃▂▂▂▁▁▁
validation_accuracy,▁▂▅▆▇▇▇███

0,1
epoch,10.0
test_accuracy,0.8158
training_loss,0.64923
validation loss,0.51337
validation_accuracy,0.82283


[34m[1mwandb[0m: Agent Starting Run: mkuc2uq4 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/10 - loss=0.6362, val_acc=0.8267, val_loss=0.4832897933028316
Epoch 2/10 - loss=0.4353, val_acc=0.8523, val_loss=0.4072348866515004
Epoch 3/10 - loss=0.3929, val_acc=0.8415, val_loss=0.43033649572904004
Epoch 4/10 - loss=0.3630, val_acc=0.8515, val_loss=0.3856215460520983
Epoch 5/10 - loss=0.3455, val_acc=0.8593, val_loss=0.37303018192918397
Epoch 6/10 - loss=0.3289, val_acc=0.8635, val_loss=0.35469925665149504
Epoch 7/10 - loss=0.3177, val_acc=0.8657, val_loss=0.35384932290347304
Epoch 8/10 - loss=0.3052, val_acc=0.8722, val_loss=0.3475566084574847
Epoch 9/10 - loss=0.2963, val_acc=0.8760, val_loss=0.3289910889774865
Epoch 10/10 - loss=0.2876, val_acc=0.8725, val_loss=0.34490473534412397
test accuracy  0.8701


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▄▃▃▂▂▂▁▁▁
validation loss,█▅▆▄▃▂▂▂▁▂
validation_accuracy,▁▅▃▅▆▆▇▇██

0,1
epoch,10.0
test_accuracy,0.8701
training_loss,0.28757
validation loss,0.3449
validation_accuracy,0.8725


[34m[1mwandb[0m: Agent Starting Run: vachuxkn with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 1/10 - loss=0.8236, val_acc=0.8305, val_loss=0.4687227664188648
Epoch 2/10 - loss=0.5874, val_acc=0.8548, val_loss=0.4071550455764708
Epoch 3/10 - loss=0.5404, val_acc=0.8567, val_loss=0.4028818327447139
Epoch 4/10 - loss=0.5123, val_acc=0.8628, val_loss=0.37414192205179014
Epoch 5/10 - loss=0.4909, val_acc=0.8663, val_loss=0.3699844552227271
Epoch 6/10 - loss=0.4758, val_acc=0.8737, val_loss=0.347483146893112
Epoch 7/10 - loss=0.4614, val_acc=0.8775, val_loss=0.3383522034008145
Epoch 8/10 - loss=0.4504, val_acc=0.8800, val_loss=0.3320337075992654
Epoch 9/10 - loss=0.4376, val_acc=0.8790, val_loss=0.3294538043213297
Epoch 10/10 - loss=0.4284, val_acc=0.8812, val_loss=0.3258144191776602
test accuracy  0.8761


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▄▃▂▂▂▂▁▁▁
validation loss,█▅▅▃▃▂▂▁▁▁
validation_accuracy,▁▄▅▅▆▇▇███

0,1
epoch,10.0
test_accuracy,0.8761
training_loss,0.42838
validation loss,0.32581
validation_accuracy,0.88117


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 8a3j55kt with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/10 - loss=0.7202, val_acc=0.8227, val_loss=0.4983337418992164
Epoch 2/10 - loss=0.4619, val_acc=0.8470, val_loss=0.44035877111110394
Epoch 3/10 - loss=0.4182, val_acc=0.8503, val_loss=0.4183421802665193
Epoch 4/10 - loss=0.3931, val_acc=0.8575, val_loss=0.392330762003526
Epoch 5/10 - loss=0.3739, val_acc=0.8605, val_loss=0.3828004638819316
Epoch 6/10 - loss=0.3597, val_acc=0.8590, val_loss=0.3782196796350389
Epoch 7/10 - loss=0.3484, val_acc=0.8575, val_loss=0.37546387293909445
Epoch 8/10 - loss=0.3379, val_acc=0.8713, val_loss=0.35188004300015396
Epoch 9/10 - loss=0.3286, val_acc=0.8688, val_loss=0.3525152262329579
Epoch 10/10 - loss=0.3215, val_acc=0.8733, val_loss=0.34338025058625443
test accuracy  0.8658


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▃▃▂▂▂▁▁▁▁
validation loss,█▅▄▃▃▃▂▁▁▁
validation_accuracy,▁▄▅▆▆▆▆█▇█

0,1
epoch,10.0
test_accuracy,0.8658
training_loss,0.32151
validation loss,0.34338
validation_accuracy,0.87333


[34m[1mwandb[0m: Agent Starting Run: 7h33j41y with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/10 - loss=0.4989, val_acc=0.8235, val_loss=0.4666189888399364
Epoch 2/10 - loss=0.3783, val_acc=0.8547, val_loss=0.3858070033744651
Epoch 3/10 - loss=0.3437, val_acc=0.8645, val_loss=0.36058280467901316
Epoch 4/10 - loss=0.3205, val_acc=0.8752, val_loss=0.3391587044761378
Epoch 5/10 - loss=0.3062, val_acc=0.8735, val_loss=0.3326116435903424
Epoch 6/10 - loss=0.2934, val_acc=0.8818, val_loss=0.33217213096850834
Epoch 7/10 - loss=0.2816, val_acc=0.8890, val_loss=0.31867233637943576
Epoch 8/10 - loss=0.2734, val_acc=0.8878, val_loss=0.3133414423733099
Epoch 9/10 - loss=0.2647, val_acc=0.8818, val_loss=0.3290578242665005
Epoch 10/10 - loss=0.2594, val_acc=0.8772, val_loss=0.3362046428762552
test accuracy  0.8745


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▄▃▃▂▂▂▁▁▁
validation loss,█▄▃▂▂▂▁▁▂▂
validation_accuracy,▁▄▅▇▆▇██▇▇

0,1
epoch,10.0
test_accuracy,0.8745
training_loss,0.25939
validation loss,0.3362
validation_accuracy,0.87717


[34m[1mwandb[0m: Agent Starting Run: 4daj2wlz with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	momentum: 0.8
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/10 - loss=0.7380, val_acc=0.7975, val_loss=0.5543797988853191
Epoch 2/10 - loss=0.4923, val_acc=0.8248, val_loss=0.47586306567635706
Epoch 3/10 - loss=0.4463, val_acc=0.8347, val_loss=0.453768462999362
Epoch 4/10 - loss=0.4199, val_acc=0.8467, val_loss=0.42097091822649296
Epoch 5/10 - loss=0.4010, val_acc=0.8505, val_loss=0.41072161665523416
Epoch 6/10 - loss=0.3881, val_acc=0.8552, val_loss=0.3964578222240913
Epoch 7/10 - loss=0.3764, val_acc=0.8598, val_loss=0.3869317785953528
Epoch 8/10 - loss=0.3674, val_acc=0.8613, val_loss=0.3745446925279826
Epoch 9/10 - loss=0.3587, val_acc=0.8665, val_loss=0.3680646827420149
Epoch 10/10 - loss=0.3510, val_acc=0.8638, val_loss=0.36516780582101266
test accuracy  0.8601


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▄▃▂▂▂▁▁▁▁
validation loss,█▅▄▃▃▂▂▁▁▁
validation_accuracy,▁▄▅▆▆▇▇▇██

0,1
epoch,10.0
test_accuracy,0.8601
training_loss,0.35104
validation loss,0.36517
validation_accuracy,0.86383


[34m[1mwandb[0m: Agent Starting Run: tvtzd1b2 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	momentum: 0.8
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 1/10 - loss=0.6772, val_acc=0.8460, val_loss=0.41391309967051515
Epoch 2/10 - loss=0.5392, val_acc=0.8565, val_loss=0.3821828360241716
Epoch 3/10 - loss=0.4958, val_acc=0.8502, val_loss=0.392778619209788
Epoch 4/10 - loss=0.4715, val_acc=0.8622, val_loss=0.37625334821180173
Epoch 5/10 - loss=0.4536, val_acc=0.8610, val_loss=0.3772025629031982
Epoch 6/10 - loss=0.4416, val_acc=0.8663, val_loss=0.3570653892029561
Epoch 7/10 - loss=0.4323, val_acc=0.8692, val_loss=0.3471946436450412
Epoch 8/10 - loss=0.4277, val_acc=0.8773, val_loss=0.3363165077210429
Epoch 9/10 - loss=0.4222, val_acc=0.8677, val_loss=0.35871421027977324
Epoch 10/10 - loss=0.4158, val_acc=0.8718, val_loss=0.34568295041871827
test accuracy  0.864


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▄▃▂▂▂▁▁▁▁
validation loss,█▅▆▅▅▃▂▁▃▂
validation_accuracy,▁▃▂▅▄▆▆█▆▇

0,1
epoch,10.0
test_accuracy,0.864
training_loss,0.41578
validation loss,0.34568
validation_accuracy,0.87183


[34m[1mwandb[0m: Agent Starting Run: tprn0qns with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	momentum: 0.8
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 1/10 - loss=1.0143, val_acc=0.8167, val_loss=0.5261405020844369
Epoch 2/10 - loss=0.6309, val_acc=0.8443, val_loss=0.44433348151390073
Epoch 3/10 - loss=0.5891, val_acc=0.8528, val_loss=0.42175326149021164
Epoch 4/10 - loss=0.5685, val_acc=0.8567, val_loss=0.4103688259323083
Epoch 5/10 - loss=0.5582, val_acc=0.8535, val_loss=0.4054058677755715
Epoch 6/10 - loss=0.5505, val_acc=0.8600, val_loss=0.3923221106507514
Epoch 7/10 - loss=0.5433, val_acc=0.8645, val_loss=0.3804074079436166
Epoch 8/10 - loss=0.5397, val_acc=0.8560, val_loss=0.39989964761347746
Epoch 9/10 - loss=0.5354, val_acc=0.8685, val_loss=0.37718652164948024
Epoch 10/10 - loss=0.5326, val_acc=0.8528, val_loss=0.3996795428451606
test accuracy  0.8455


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▂▂▂▁▁▁▁▁▁
validation loss,█▄▃▃▂▂▁▂▁▂
validation_accuracy,▁▅▆▆▆▇▇▆█▆

0,1
epoch,10.0
test_accuracy,0.8455
training_loss,0.53258
validation loss,0.39968
validation_accuracy,0.85283


[34m[1mwandb[0m: Agent Starting Run: ghhvipq7 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	momentum: 0.8
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 1/10 - loss=0.8098, val_acc=0.8260, val_loss=0.4968602169951069
Epoch 2/10 - loss=0.5481, val_acc=0.8470, val_loss=0.4396081421488968
Epoch 3/10 - loss=0.5019, val_acc=0.8512, val_loss=0.41473304980065934
Epoch 4/10 - loss=0.4758, val_acc=0.8602, val_loss=0.38851150206561164
Epoch 5/10 - loss=0.4569, val_acc=0.8617, val_loss=0.3802137849219665
Epoch 6/10 - loss=0.4425, val_acc=0.8618, val_loss=0.375123667841302
Epoch 7/10 - loss=0.4313, val_acc=0.8658, val_loss=0.36303900022471247
Epoch 8/10 - loss=0.4215, val_acc=0.8703, val_loss=0.35327634261742547
Epoch 9/10 - loss=0.4122, val_acc=0.8713, val_loss=0.35030488280678856
Epoch 10/10 - loss=0.4051, val_acc=0.8743, val_loss=0.34438363012746176
test accuracy  0.8645


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▃▃▂▂▂▁▁▁▁
validation loss,█▅▄▃▃▂▂▁▁▁
validation_accuracy,▁▄▅▆▆▆▇▇██

0,1
epoch,10.0
test_accuracy,0.8645
training_loss,0.40507
validation loss,0.34438
validation_accuracy,0.87433


[34m[1mwandb[0m: Agent Starting Run: 9aq9sdue with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/10 - loss=0.4808, val_acc=0.8420, val_loss=0.4188979493923465
Epoch 2/10 - loss=0.3815, val_acc=0.8487, val_loss=0.3924972229374133
Epoch 3/10 - loss=0.3502, val_acc=0.8548, val_loss=0.4145968415205008
Epoch 4/10 - loss=0.3308, val_acc=0.8655, val_loss=0.36458072481448517
Epoch 5/10 - loss=0.3152, val_acc=0.8675, val_loss=0.34359845851878157
Epoch 6/10 - loss=0.3013, val_acc=0.8747, val_loss=0.33363804901341004
Epoch 7/10 - loss=0.2939, val_acc=0.8670, val_loss=0.35251965522073614
Epoch 8/10 - loss=0.2853, val_acc=0.8623, val_loss=0.3640933227833591
Epoch 9/10 - loss=0.2742, val_acc=0.8773, val_loss=0.3300449198716142
Epoch 10/10 - loss=0.2709, val_acc=0.8687, val_loss=0.3595177771238248
test accuracy  0.8661


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▅▄▃▂▂▂▁▁▁
validation loss,█▆█▄▂▁▃▄▁▃
validation_accuracy,▁▂▄▆▆▇▆▅█▆

0,1
epoch,10.0
test_accuracy,0.8661
training_loss,0.27094
validation loss,0.35952
validation_accuracy,0.86867


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: p4mtk6ay with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	momentum: 0.8
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 1/10 - loss=1.9397, val_acc=0.6462, val_loss=1.4767198766748673
Epoch 2/10 - loss=1.3712, val_acc=0.6810, val_loss=1.1284812198098009
Epoch 3/10 - loss=1.1247, val_acc=0.7068, val_loss=0.9578239255864118
Epoch 4/10 - loss=0.9912, val_acc=0.7237, val_loss=0.8561067771429551
Epoch 5/10 - loss=0.9067, val_acc=0.7388, val_loss=0.7872714923703444
Epoch 6/10 - loss=0.8480, val_acc=0.7532, val_loss=0.737204121868954
Epoch 7/10 - loss=0.8039, val_acc=0.7627, val_loss=0.698727533422673
Epoch 8/10 - loss=0.7693, val_acc=0.7718, val_loss=0.6676452248436787
Epoch 9/10 - loss=0.7413, val_acc=0.7775, val_loss=0.6427498075039485
Epoch 10/10 - loss=0.7179, val_acc=0.7852, val_loss=0.6213910350367057
test accuracy  0.7822


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▅▃▃▂▂▁▁▁▁
validation loss,█▅▄▃▂▂▂▁▁▁
validation_accuracy,▁▃▄▅▆▆▇▇██

0,1
epoch,10.0
test_accuracy,0.7822
training_loss,0.71789
validation loss,0.62139
validation_accuracy,0.78517


[34m[1mwandb[0m: Agent Starting Run: cgmo4g6r with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/10 - loss=0.6772, val_acc=0.8208, val_loss=0.4933800286983313
Epoch 2/10 - loss=0.4513, val_acc=0.8450, val_loss=0.4317039885895401
Epoch 3/10 - loss=0.4082, val_acc=0.8515, val_loss=0.4063338806193088
Epoch 4/10 - loss=0.3831, val_acc=0.8583, val_loss=0.3861635146100228
Epoch 5/10 - loss=0.3666, val_acc=0.8587, val_loss=0.38127062165210024
Epoch 6/10 - loss=0.3532, val_acc=0.8638, val_loss=0.3661753960587231
Epoch 7/10 - loss=0.3421, val_acc=0.8687, val_loss=0.3516104792155165
Epoch 8/10 - loss=0.3327, val_acc=0.8695, val_loss=0.34951726180438664
Epoch 9/10 - loss=0.3247, val_acc=0.8727, val_loss=0.3451006211199286
Epoch 10/10 - loss=0.3175, val_acc=0.8718, val_loss=0.3467695422992322
test accuracy  0.8614


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▄▃▂▂▂▁▁▁▁
validation loss,█▅▄▃▃▂▁▁▁▁
validation_accuracy,▁▄▅▆▆▇▇███

0,1
epoch,10.0
test_accuracy,0.8614
training_loss,0.3175
validation loss,0.34677
validation_accuracy,0.87183


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 6uabswt4 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/10 - loss=0.7573, val_acc=0.8175, val_loss=0.5181460220537604
Epoch 2/10 - loss=0.4671, val_acc=0.8385, val_loss=0.4454357611428704
Epoch 3/10 - loss=0.4199, val_acc=0.8490, val_loss=0.41815175880354816
Epoch 4/10 - loss=0.3945, val_acc=0.8557, val_loss=0.39659919842190866
Epoch 5/10 - loss=0.3781, val_acc=0.8560, val_loss=0.3890265923437881
Epoch 6/10 - loss=0.3651, val_acc=0.8592, val_loss=0.37612977996154706
Epoch 7/10 - loss=0.3537, val_acc=0.8660, val_loss=0.36261574848701805
Epoch 8/10 - loss=0.3450, val_acc=0.8645, val_loss=0.35860735092846424
Epoch 9/10 - loss=0.3370, val_acc=0.8702, val_loss=0.3532888749338757
Epoch 10/10 - loss=0.3298, val_acc=0.8685, val_loss=0.3570771003061808
test accuracy  0.8548


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▃▂▂▂▂▁▁▁▁
validation loss,█▅▄▃▃▂▁▁▁▁
validation_accuracy,▁▄▅▆▆▇▇▇██

0,1
epoch,10.0
test_accuracy,0.8548
training_loss,0.32979
validation loss,0.35708
validation_accuracy,0.8685


[34m[1mwandb[0m: Agent Starting Run: fl4ks1z9 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 1/10 - loss=0.8546, val_acc=0.8222, val_loss=0.5068006370324034
Epoch 2/10 - loss=0.5462, val_acc=0.8375, val_loss=0.4472362969446999
Epoch 3/10 - loss=0.5037, val_acc=0.8520, val_loss=0.41495434869037495
Epoch 4/10 - loss=0.4808, val_acc=0.8543, val_loss=0.39552439965493724
Epoch 5/10 - loss=0.4646, val_acc=0.8592, val_loss=0.3844921838891823
Epoch 6/10 - loss=0.4528, val_acc=0.8648, val_loss=0.3731911363001564
Epoch 7/10 - loss=0.4422, val_acc=0.8623, val_loss=0.3767614091353427
Epoch 8/10 - loss=0.4331, val_acc=0.8665, val_loss=0.36767442897105457
Epoch 9/10 - loss=0.4259, val_acc=0.8665, val_loss=0.3635068462056101
Epoch 10/10 - loss=0.4186, val_acc=0.8678, val_loss=0.36460602954894006
test accuracy  0.866


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▃▂▂▂▂▁▁▁▁
validation loss,█▅▄▃▂▁▂▁▁▁
validation_accuracy,▁▃▆▆▇█▇███

0,1
epoch,10.0
test_accuracy,0.866
training_loss,0.41862
validation loss,0.36461
validation_accuracy,0.86783


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 6c81xa7r with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/10 - loss=0.5465, val_acc=0.8418, val_loss=0.43030478910201875
Epoch 2/10 - loss=0.3941, val_acc=0.8618, val_loss=0.38269785065583906
Epoch 3/10 - loss=0.3598, val_acc=0.8582, val_loss=0.38100606452884833
Epoch 4/10 - loss=0.3381, val_acc=0.8733, val_loss=0.34223729179692025
Epoch 5/10 - loss=0.3214, val_acc=0.8755, val_loss=0.3321786377973449
Epoch 6/10 - loss=0.3064, val_acc=0.8795, val_loss=0.32797739156365013
Epoch 7/10 - loss=0.2951, val_acc=0.8768, val_loss=0.32777318415648277
Epoch 8/10 - loss=0.2835, val_acc=0.8780, val_loss=0.3319324867634421
Epoch 9/10 - loss=0.2743, val_acc=0.8822, val_loss=0.3250371844979421
Epoch 10/10 - loss=0.2654, val_acc=0.8793, val_loss=0.32688229234654936
test accuracy  0.8741


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▄▃▃▂▂▂▁▁▁
validation loss,█▅▅▂▁▁▁▁▁▁
validation_accuracy,▁▄▄▆▇█▇▇██

0,1
epoch,10.0
test_accuracy,0.8741
training_loss,0.26539
validation loss,0.32688
validation_accuracy,0.87933


[34m[1mwandb[0m: Agent Starting Run: 791yfrxp with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	momentum: 0.8
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/10 - loss=0.5317, val_acc=0.8158, val_loss=0.49694791171606784
Epoch 2/10 - loss=0.4052, val_acc=0.8497, val_loss=0.3977529091022819
Epoch 3/10 - loss=0.3661, val_acc=0.8635, val_loss=0.37506122825664806
Epoch 4/10 - loss=0.3426, val_acc=0.8715, val_loss=0.36603607485622525
Epoch 5/10 - loss=0.3253, val_acc=0.8715, val_loss=0.34907785490088844
Epoch 6/10 - loss=0.3102, val_acc=0.8652, val_loss=0.364309009693323
Epoch 7/10 - loss=0.2993, val_acc=0.8712, val_loss=0.3422714000915465
Epoch 8/10 - loss=0.2881, val_acc=0.8740, val_loss=0.3457337770170664
Epoch 9/10 - loss=0.2791, val_acc=0.8743, val_loss=0.3469555764381196
Epoch 10/10 - loss=0.2726, val_acc=0.8763, val_loss=0.3447834505334876
test accuracy  0.8665


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▅▄▃▂▂▂▁▁▁
validation loss,█▄▂▂▁▂▁▁▁▁
validation_accuracy,▁▅▇▇▇▇▇███

0,1
epoch,10.0
test_accuracy,0.8665
training_loss,0.27263
validation loss,0.34478
validation_accuracy,0.87633


[34m[1mwandb[0m: Agent Starting Run: vytax9vx with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	momentum: 0.8
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 1/10 - loss=0.6199, val_acc=0.8393, val_loss=0.42300481015559177
Epoch 2/10 - loss=0.4851, val_acc=0.8573, val_loss=0.3768277359510584
Epoch 3/10 - loss=0.4477, val_acc=0.8530, val_loss=0.39464397422656416
Epoch 4/10 - loss=0.4250, val_acc=0.8640, val_loss=0.3699185426607387
Epoch 5/10 - loss=0.4073, val_acc=0.8725, val_loss=0.3306110991570574
Epoch 6/10 - loss=0.3986, val_acc=0.8772, val_loss=0.3275174029418979
Epoch 7/10 - loss=0.3876, val_acc=0.8788, val_loss=0.3265874114383754
Epoch 8/10 - loss=0.3816, val_acc=0.8833, val_loss=0.31994185312209295
Epoch 9/10 - loss=0.3725, val_acc=0.8732, val_loss=0.3380295418029533
Epoch 10/10 - loss=0.3666, val_acc=0.8787, val_loss=0.3191389836936499
test accuracy  0.8732


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▄▃▃▂▂▂▁▁▁
validation loss,█▅▆▄▂▂▂▁▂▁
validation_accuracy,▁▄▃▅▆▇▇█▆▇

0,1
epoch,10.0
test_accuracy,0.8732
training_loss,0.36663
validation loss,0.31914
validation_accuracy,0.87867


[34m[1mwandb[0m: Agent Starting Run: mog594yr with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 1/10 - loss=1.4185, val_acc=0.7175, val_loss=0.903268396775767
Epoch 2/10 - loss=0.9493, val_acc=0.7670, val_loss=0.7112956152764559
Epoch 3/10 - loss=0.8182, val_acc=0.7890, val_loss=0.6235152465222219
Epoch 4/10 - loss=0.7500, val_acc=0.8022, val_loss=0.5704385096973644
Epoch 5/10 - loss=0.7076, val_acc=0.8145, val_loss=0.5373902335502819
Epoch 6/10 - loss=0.6783, val_acc=0.8222, val_loss=0.5109826591433704
Epoch 7/10 - loss=0.6567, val_acc=0.8293, val_loss=0.49414232137867914
Epoch 8/10 - loss=0.6399, val_acc=0.8323, val_loss=0.4769013480420701
Epoch 9/10 - loss=0.6260, val_acc=0.8375, val_loss=0.4648432700650765
Epoch 10/10 - loss=0.6143, val_acc=0.8338, val_loss=0.4587649205499406
test accuracy  0.8308


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▄▃▂▂▂▁▁▁▁
validation loss,█▅▄▃▂▂▂▁▁▁
validation_accuracy,▁▄▅▆▇▇████

0,1
epoch,10.0
test_accuracy,0.8308
training_loss,0.61432
validation loss,0.45876
validation_accuracy,0.83383


[34m[1mwandb[0m: Agent Starting Run: wek879r3 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	momentum: 0.8
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 1/10 - loss=0.8246, val_acc=0.8193, val_loss=0.5184543154334708
Epoch 2/10 - loss=0.5253, val_acc=0.8398, val_loss=0.4422794807542723
Epoch 3/10 - loss=0.4775, val_acc=0.8458, val_loss=0.4211858153646728
Epoch 4/10 - loss=0.4530, val_acc=0.8538, val_loss=0.39154571038031705
Epoch 5/10 - loss=0.4360, val_acc=0.8605, val_loss=0.3796268371963095
Epoch 6/10 - loss=0.4236, val_acc=0.8623, val_loss=0.3747063297402766
Epoch 7/10 - loss=0.4126, val_acc=0.8625, val_loss=0.37590656830321645
Epoch 8/10 - loss=0.4035, val_acc=0.8663, val_loss=0.35552395999911374
Epoch 9/10 - loss=0.3956, val_acc=0.8680, val_loss=0.35077635963902687
Epoch 10/10 - loss=0.3895, val_acc=0.8725, val_loss=0.347266487916806
test accuracy  0.8603


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▃▂▂▂▂▁▁▁▁
validation loss,█▅▄▃▂▂▂▁▁▁
validation_accuracy,▁▄▄▆▆▇▇▇▇█

0,1
epoch,10.0
test_accuracy,0.8603
training_loss,0.38947
validation loss,0.34727
validation_accuracy,0.8725


[34m[1mwandb[0m: Agent Starting Run: ffunv8ph with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/10 - loss=0.6048, val_acc=0.8407, val_loss=0.4474349883682922
Epoch 2/10 - loss=0.4096, val_acc=0.8578, val_loss=0.388441182017817
Epoch 3/10 - loss=0.3711, val_acc=0.8507, val_loss=0.40338644224335385
Epoch 4/10 - loss=0.3433, val_acc=0.8625, val_loss=0.3749629881162083
Epoch 5/10 - loss=0.3259, val_acc=0.8653, val_loss=0.36066029789327436
Epoch 6/10 - loss=0.3084, val_acc=0.8730, val_loss=0.3396344091059448
Epoch 7/10 - loss=0.2967, val_acc=0.8768, val_loss=0.33092025278994885
Epoch 8/10 - loss=0.2834, val_acc=0.8828, val_loss=0.3299278291995032
Epoch 9/10 - loss=0.2733, val_acc=0.8825, val_loss=0.31383759919617416
Epoch 10/10 - loss=0.2635, val_acc=0.8807, val_loss=0.3234983395230651
test accuracy  0.8771


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▄▃▃▂▂▂▁▁▁
validation loss,█▅▆▄▃▂▂▂▁▂
validation_accuracy,▁▄▃▅▅▆▇███

0,1
epoch,10.0
test_accuracy,0.8771
training_loss,0.26354
validation loss,0.3235
validation_accuracy,0.88067


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: i14bg109 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/10 - loss=0.7712, val_acc=0.8198, val_loss=0.5171308345358533
Epoch 2/10 - loss=0.4646, val_acc=0.8410, val_loss=0.44009706315958724
Epoch 3/10 - loss=0.4146, val_acc=0.8460, val_loss=0.4202219052602324
Epoch 4/10 - loss=0.3888, val_acc=0.8538, val_loss=0.3892305150530232
Epoch 5/10 - loss=0.3705, val_acc=0.8620, val_loss=0.3758533494086125
Epoch 6/10 - loss=0.3571, val_acc=0.8620, val_loss=0.37228696936694033
Epoch 7/10 - loss=0.3455, val_acc=0.8573, val_loss=0.37791668092532077
Epoch 8/10 - loss=0.3353, val_acc=0.8677, val_loss=0.3507127796166509
Epoch 9/10 - loss=0.3266, val_acc=0.8693, val_loss=0.34680266676434834
Epoch 10/10 - loss=0.3200, val_acc=0.8737, val_loss=0.3397596092706626
test accuracy  0.8641


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▃▂▂▂▂▁▁▁▁
validation loss,█▅▄▃▂▂▃▁▁▁
validation_accuracy,▁▄▄▅▆▆▆▇▇█

0,1
epoch,10.0
test_accuracy,0.8641
training_loss,0.32
validation loss,0.33976
validation_accuracy,0.87367


[34m[1mwandb[0m: Agent Starting Run: y5dpn4do with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	momentum: 0.8
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: nesterov
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 1/10 - loss=2.3679, val_acc=0.1508, val_loss=2.3026370632889455
Epoch 2/10 - loss=2.3517, val_acc=0.1307, val_loss=2.3002509671200033
Epoch 3/10 - loss=2.3506, val_acc=0.1018, val_loss=2.299815105754251
Epoch 4/10 - loss=2.3501, val_acc=0.1877, val_loss=2.2992933624137804
Epoch 5/10 - loss=2.3496, val_acc=0.2317, val_loss=2.2987856217904756
Epoch 6/10 - loss=2.3491, val_acc=0.3047, val_loss=2.298330856314106
Epoch 7/10 - loss=2.3486, val_acc=0.2873, val_loss=2.2978443112337037
Epoch 8/10 - loss=2.3480, val_acc=0.2702, val_loss=2.297354458330047
Epoch 9/10 - loss=2.3475, val_acc=0.3607, val_loss=2.296798040495372
Epoch 10/10 - loss=2.3470, val_acc=0.2830, val_loss=2.29628060396844
test accuracy  0.2803


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▃▂▂▂▂▂▁▁▁
validation loss,█▅▅▄▄▃▃▂▂▁
validation_accuracy,▂▂▁▃▅▆▆▆█▆

0,1
epoch,10.0
test_accuracy,0.2803
training_loss,2.34696
validation loss,2.29628
validation_accuracy,0.283


[34m[1mwandb[0m: Agent Starting Run: ub6fqorw with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	momentum: 0.8
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 1/10 - loss=1.7429, val_acc=0.5855, val_loss=1.135961077095992
Epoch 2/10 - loss=1.0922, val_acc=0.7183, val_loss=0.8462478819410982
Epoch 3/10 - loss=0.8895, val_acc=0.7545, val_loss=0.6931239358700519
Epoch 4/10 - loss=0.7901, val_acc=0.7780, val_loss=0.6243779754232299
Epoch 5/10 - loss=0.7366, val_acc=0.7943, val_loss=0.576918691296528
Epoch 6/10 - loss=0.6969, val_acc=0.8107, val_loss=0.541402687360744
Epoch 7/10 - loss=0.6654, val_acc=0.8192, val_loss=0.5175331544146687
Epoch 8/10 - loss=0.6435, val_acc=0.8263, val_loss=0.49790349149931157
Epoch 9/10 - loss=0.6267, val_acc=0.8328, val_loss=0.4834471345587802
Epoch 10/10 - loss=0.6137, val_acc=0.8297, val_loss=0.4777725837984732
test accuracy  0.8261


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▄▃▂▂▂▁▁▁▁
validation loss,█▅▃▃▂▂▁▁▁▁
validation_accuracy,▁▅▆▆▇▇████

0,1
epoch,10.0
test_accuracy,0.8261
training_loss,0.61374
validation loss,0.47777
validation_accuracy,0.82967


[34m[1mwandb[0m: Agent Starting Run: a7flkxyu with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 1/10 - loss=0.7727, val_acc=0.8202, val_loss=0.49763246838124725
Epoch 2/10 - loss=0.5678, val_acc=0.8433, val_loss=0.4402029010802987
Epoch 3/10 - loss=0.5257, val_acc=0.8460, val_loss=0.42174480763261485
Epoch 4/10 - loss=0.5017, val_acc=0.8555, val_loss=0.39522497131652334
Epoch 5/10 - loss=0.4830, val_acc=0.8620, val_loss=0.3802989667113615
Epoch 6/10 - loss=0.4702, val_acc=0.8592, val_loss=0.37781049067552613
Epoch 7/10 - loss=0.4586, val_acc=0.8577, val_loss=0.3792605306469989
Epoch 8/10 - loss=0.4477, val_acc=0.8680, val_loss=0.3589427531853951
Epoch 9/10 - loss=0.4386, val_acc=0.8693, val_loss=0.3558238485438948
Epoch 10/10 - loss=0.4323, val_acc=0.8753, val_loss=0.3439572325300616
test accuracy  0.8665


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▄▃▂▂▂▂▁▁▁
validation loss,█▅▅▃▃▃▃▂▂▁
validation_accuracy,▁▄▄▅▆▆▆▇▇█

0,1
epoch,10.0
test_accuracy,0.8665
training_loss,0.4323
validation loss,0.34396
validation_accuracy,0.87533


[34m[1mwandb[0m: Agent Starting Run: hyoqr58t with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/10 - loss=1.4731, val_acc=0.6952, val_loss=0.948272531529829
Epoch 2/10 - loss=0.7653, val_acc=0.7763, val_loss=0.6635661039338194
Epoch 3/10 - loss=0.5842, val_acc=0.8110, val_loss=0.5488028813872511
