In [None]:
import numpy as np
import wandb
from keras.datasets import fashion_mnist


# Neural Network Class: feed_forward_NN_4

class feed_forward_NN_4:
    def __init__(self,
                 layers,
                 optimizer,
                 learning_rate,
                 momentum,
                 beta1,
                 beta2,
                 beta_rms,
                 epsilon,
                 weight_decay,
                 init_type,
                 activation
                 ):
    
        
        self.layers = layers
        self.layer_n = len(layers)
        self.optimizer = optimizer.lower()
        self.lr = learning_rate
        self.momentum = momentum
        self.beta1 = beta1
        self.beta2 = beta2
        self.beta_rms = beta_rms
        self.epsilon = epsilon
        self.weight_decay = weight_decay
        self.init_type = init_type.lower()
        self.activation = activation.lower()
        

        # Initialize Weights & BiaseS
        self.weights = []
        self.biases = []
        for i in range(self.layer_n - 1):
            if self.init_type == "xavier":
                # "Xavier" initialization
                w = np.random.randn(layers[i], layers[i+1]) * np.sqrt(1.0 / layers[i])
            else:
                # "random" initialization
                w = np.random.randn(layers[i], layers[i+1]) * np.sqrt(2.0 / layers[i])
            b = np.zeros((1, layers[i+1]))
            self.weights.append(w)
            self.biases.append(b)

        # initialize extra Params 
        if self.optimizer in ["momentum", "nesterov", "rmsprop", "adam", "nadam"]:
            self.v_w = [np.zeros_like(w) for w in self.weights]
            self.v_b = [np.zeros_like(b) for b in self.biases]
        if self.optimizer in ["adam", "nadam"]:
            self.m_w = [np.zeros_like(w) for w in self.weights]
            self.m_b = [np.zeros_like(b) for b in self.biases]
            self.t = 0

    # activations 
    def sigmoid(self, x):
        return 1 / (1 + np.exp(-np.clip(x, -500, 500)))
    
    def tanh(self, x):
        return np.tanh(x)
    
    def relu(self, x):
        return np.maximum(0, x)

    def activate(self, x):
        if self.activation == "sigmoid":
            return self.sigmoid(x)
        elif self.activation == "tanh":
            return self.tanh(x)
        elif self.activation == "relu":
            return self.relu(x)
        else:
            return self.sigmoid(x) 
        
    # derivatives
    def derivative(self, a):

        if self.activation == "sigmoid":
            return a * (1 - a)
        elif self.activation == "tanh":
            return 1 - a**2
        elif self.activation == "relu":
            return (a > 0).astype(float)
        else:
            return a * (1 - a) 

    def softmax(self, x):
        exp_x = np.exp(x - np.max(x, axis=1, keepdims=True))
        return exp_x / np.sum(exp_x, axis=1, keepdims=True)

    # Forward Pass
    def forward_pass(self, x):
        self.h = [x]  
        # Hidden layers
        for i in range(self.layer_n - 2):
            z = np.dot(self.h[i], self.weights[i]) + self.biases[i]
            act = self.activate(z)
            self.h.append(act)
        # Output layer- softmax
        z_out = np.dot(self.h[-1], self.weights[-1]) + self.biases[-1]
        out = self.softmax(z_out)
        self.h.append(out)
        return self.h

    # Backward Pass
    def backward_prop(self, y_true):
        m = y_true.shape[0]
        dw = [None] * (self.layer_n - 1)
        db = [None] * (self.layer_n - 1)

        # Cross-entropy derivative for output layer
        delta = self.h[-1] - y_true  # shape: (batch_size, output_dim)

        # Propagation
        for i in reversed(range(self.layer_n - 1)):
            dw[i] = np.dot(self.h[i].T, delta) / m
            db[i] = np.sum(delta, axis=0, keepdims=True) / m
            if i > 0:
                # For hidden layers, multiply by derivative of activation
                delta = np.dot(delta, self.weights[i].T) * self.derivative(self.h[i])
        return dw, db

    # Param Updates for "Non-Nesterov" 
    def _update_params(self, dw, db):
        # Add weight decay to each gradient
        for i in range(self.layer_n - 1):
            dw[i] += self.weight_decay * self.weights[i]

        if self.optimizer == "sgd":
            for i in range(self.layer_n - 1):
                self.weights[i] -= self.lr * dw[i]
                self.biases[i] -= self.lr * db[i]

        elif self.optimizer == "momentum":
            for i in range(self.layer_n - 1):
                self.v_w[i] = self.momentum * self.v_w[i] + dw[i]
                self.v_b[i] = self.momentum * self.v_b[i] + db[i]
                self.weights[i] -= self.lr * self.v_w[i]
                self.biases[i] -= self.lr * self.v_b[i]

        elif self.optimizer == "rmsprop":
            for i in range(self.layer_n - 1):
                self.v_w[i] = self.beta_rms * self.v_w[i] + (1 - self.beta_rms) * (dw[i] ** 2)
                self.v_b[i] = self.beta_rms * self.v_b[i] + (1 - self.beta_rms) * (db[i] ** 2)
                self.weights[i] -= self.lr * dw[i] / (np.sqrt(self.v_w[i]) + self.epsilon)
                self.biases[i]  -= self.lr * db[i] / (np.sqrt(self.v_b[i]) + self.epsilon)

        elif self.optimizer == "adam":
            self.t += 1
            for i in range(self.layer_n - 1):
                self.m_w[i] = self.beta1 * self.m_w[i] + (1 - self.beta1) * dw[i]
                self.m_b[i] = self.beta1 * self.m_b[i] + (1 - self.beta1) * db[i]
                self.v_w[i] = self.beta2 * self.v_w[i] + (1 - self.beta2) * (dw[i] ** 2)
                self.v_b[i] = self.beta2 * self.v_b[i] + (1 - self.beta2) * (db[i] ** 2)

                # bias correction
                m_w_hat = self.m_w[i] / (1 - self.beta1 ** self.t)
                m_b_hat = self.m_b[i] / (1 - self.beta1 ** self.t)
                v_w_hat = self.v_w[i] / (1 - self.beta2 ** self.t)
                v_b_hat = self.v_b[i] / (1 - self.beta2 ** self.t)

                self.weights[i] -= self.lr * m_w_hat / (np.sqrt(v_w_hat) + self.epsilon)
                self.biases[i]  -= self.lr * m_b_hat / (np.sqrt(v_b_hat) + self.epsilon)

        elif self.optimizer == "nadam":
            self.t += 1
            for i in range(self.layer_n - 1):
                self.m_w[i] = self.beta1 * self.m_w[i] + (1 - self.beta1) * dw[i]
                self.m_b[i] = self.beta1 * self.m_b[i] + (1 - self.beta1) * db[i]
                self.v_w[i] = self.beta2 * self.v_w[i] + (1 - self.beta2) * (dw[i] ** 2)
                self.v_b[i] = self.beta2 * self.v_b[i] + (1 - self.beta2) * (db[i] ** 2)

                # bias correction
                m_w_hat = self.m_w[i] / (1 - self.beta1 ** (self.t + 1))
                m_b_hat = self.m_b[i] / (1 - self.beta1 ** (self.t + 1))
                v_w_hat = self.v_w[i] / (1 - self.beta2 ** (self.t + 1))
                v_b_hat = self.v_b[i] / (1 - self.beta2 ** (self.t + 1))

                grad_term_w = self.beta1 * m_w_hat + (1 - self.beta1) * dw[i] / (1 - self.beta1 ** (self.t + 1))
                grad_term_b = self.beta1 * m_b_hat + (1 - self.beta1) * db[i] / (1 - self.beta1 ** (self.t + 1))

                self.weights[i] -= self.lr * grad_term_w / (np.sqrt(v_w_hat) + self.epsilon)
                self.biases[i]  -= self.lr * grad_term_b / (np.sqrt(v_b_hat) + self.epsilon)

    # Training Step  with "Nesterov"
    def _train_step(self, x_batch, y_batch):
        if self.optimizer == "nesterov":
            # to look-ahead: w_look = w - momentum * v
            for i in range(self.layer_n - 1):
                self.weights[i] -= self.lr*self.momentum * self.v_w[i]
                self.biases[i]  -= self.lr*self.momentum * self.v_b[i]

            # Forward at the look-ahead position
            self.forward_pass(x_batch)
            out = self.h[-1]
            l2_norm_weights = 0
            for i in range(len(self.weights)):
                l2_norm_weights += np.sum(self.weights[i] ** 2)
            # for i in range(len(self.biases)):
            #     l2_norm_bias += np.sum(self.biases[i] ** 2)
                    
            l2_norm_params = l2_norm_weights #+ l2_norm_bias
            
            loss = -np.mean(np.sum(y_batch * np.log(out + 1e-10), axis = 1)) +  (self.weight_decay/2) * l2_norm_params # (1e-10) to prevent underflow
            #loss = -np.mean(np.sum(y_batch * np.log(out + 1e-10), axis=1))
            dW, dB = self.backward_prop(y_batch)

            # add weight decay here
            for i in range(self.layer_n - 1):
                dW[i] += self.weight_decay * self.weights[i]

            # backward at the look-ahead position (go back to w_t)
            for i in range(self.layer_n - 1):
                self.weights[i] += self.lr*self.momentum * self.v_w[i]
                self.biases[i]  += self.lr*self.momentum * self.v_b[i]

            # update velocity: u_t = momentum*u_{t-1} + dW
            for i in range(self.layer_n - 1):
                self.v_w[i] = self.momentum * self.v_w[i] + dW[i]
                self.v_b[i] = self.momentum * self.v_b[i] + dB[i]

            # final param update: w = w - lr*u_t
            for i in range(self.layer_n - 1):
                self.weights[i] -= self.lr * self.v_w[i]
                self.biases[i]  -= self.lr * self.v_b[i]

            return loss
        else:
            # Normal forward/back
            self.forward_pass(x_batch)
            out = self.h[-1]

            l2_norm_weights=0
            l2_norm_bias= 0
            for i in range(len(self.weights)):
                l2_norm_weights += np.sum(self.weights[i] ** 2)
            # for i in range(len(self.biases)):
            #     l2_norm_bias += np.sum(self.biases[i] ** 2)
                    
            l2_norm_params = l2_norm_weights #+ l2_norm_bias
            
            loss = -np.mean(np.sum(y_batch * np.log(out + 1e-10), axis = 1)) +  (self.weight_decay/2) * l2_norm_params 

            #loss = -np.mean(np.sum(y_batch * np.log(out + 1e-10), axis=1))
            dW, dB = self.backward_prop(y_batch)
            self._update_params(dW, dB)
            return loss

    # Outer Training Loop 
    def training(self, x_train, y_train, x_val, y_val, epochs, batch_size):
       
        for ep in range(epochs):
            idx = np.random.permutation(x_train.shape[0])
            x_train_shuff = x_train[idx]
            y_train_shuff = y_train[idx]
            n_batches = len(x_train) // batch_size
            epoch_loss = 0.0
            for b in range(n_batches):
                start = b * batch_size
                end = start + batch_size
                x_batch = x_train_shuff[start:end]
                y_batch = y_train_shuff[start:end]
                loss = self._train_step(x_batch, y_batch)
                epoch_loss += loss
            avg_loss = epoch_loss / n_batches

            # Validation

            preds = self.predict(x_val)
            val_labels = np.argmax(y_val, axis=1)
            val_acc = np.mean(preds == val_labels)

            val_outputs = self.forward_pass(x_val)[-1]
        
            # Cross-entropy loss for validation
            val_loss = -np.mean(np.sum(y_val * np.log(val_outputs + 1e-10), axis=1))

            # Log metrics to wandb
            wandb.log({"epoch": ep+1, "training_loss": avg_loss, "validation_accuracy": val_acc, "validation loss": val_loss})
            print(f"Epoch {ep+1}/{epochs} - loss={avg_loss:.4f}, val_acc={val_acc:.4f}, val_loss={val_loss}" )

    #Prediction 
    def predict(self, X):
        self.forward_pass(X)
        return np.argmax(self.h[-1], axis=1)




# (x_train_full, y_train_full), (x_test, y_test) = fashion_mnist.load_data()
# x_train_full = x_train_full.reshape(x_train_full.shape[0], -1) / 255.0
# x_test = x_test.reshape(x_test.shape[0], -1) / 255.0

# np.random.seed(42)
# idx = np.arange(x_train_full.shape[0])
# np.random.shuffle(idx)
# x_train_full = x_train_full[idx]
# y_train_full = y_train_full[idx]

# # 90% training, 10% validation 
# train_size=int(.9*len(x_train_full))

# x_train, y_train=x_train_full[:train_size],y_train_full[:train_size]
# x_val, y_val=x_train_full[train_size:], y_train_full[train_size:]

# num_classes = 10
# y_train_1h = np.eye(num_classes)[y_train]
# y_val_1h = np.eye(num_classes)[y_val]
# y_test_1h = np.eye(num_classes)[y_test]

# # model
# model = feed_forward_NN_4(
#     layers=[784] + [32] *3 + [10],
# optimizer="nesterov",
# learning_rate=0.01,
# momentum=0.9,
# beta1=0.9,
# beta2=0.999,
# beta_rms=0.9,
# epsilon=1e-4,
# weight_decay=0.0005,
# init_type="xavier",
# activation="relu")

#     # Train the model
# model.training(
#         x_train=x_train,
#         y_train=y_train_1h,
#         x_val=x_val,
#         y_val=y_val_1h,
#         epochs=10,
#         batch_size=32
#     )

#     #Evaluation on test set
# test_preds = model.predict(x_test)
# test_labels = np.argmax(y_test_1h, axis=1)
# test_acc = np.mean(test_preds == test_labels)
# print("test accuracy ",test_acc)
# #wandb.log({"test_accuracy": test_acc})




# train_sweep() function

def train_sweep():
    # Initialize wandb
    wandb.init()
    config = wandb.config

    #custom run name from hyperparameters
    run_name = f"hl_{config.num_hidden_layers}_bs_{config.batch_size}_ac_{config.activation}_opt_{config.optimizer}"
    wandb.run.name = run_name

    # Load Fashion-MNIST
    (x_train_full, y_train_full), (x_test, y_test) = fashion_mnist.load_data()
    x_train_full = x_train_full.reshape(x_train_full.shape[0], -1) / 255.0
    x_test = x_test.reshape(x_test.shape[0], -1) / 255.0

    np.random.seed(42)
    idx = np.arange(x_train_full.shape[0])
    np.random.shuffle(idx)
    x_train_full = x_train_full[idx]
    y_train_full = y_train_full[idx]

    # 90% training, 10% validation 
    train_size=int(.9*len(x_train_full))

    x_train, y_train=x_train_full[:train_size],y_train_full[:train_size]
    x_val, y_val=x_train_full[train_size:], y_train_full[train_size:]

    num_classes = 10
    y_train_1h = np.eye(num_classes)[y_train]
    y_val_1h = np.eye(num_classes)[y_val]
    y_test_1h = np.eye(num_classes)[y_test]

    # model
    model = feed_forward_NN_4(
        layers=[784] + [config.hidden_size] * config.num_hidden_layers + [10],
        optimizer=config.optimizer,
        learning_rate=config.learning_rate,
        momentum=config.momentum,
        beta1=config.beta1,
        beta2=config.beta2,
        beta_rms=config.beta_rms,
        epsilon=config.epsilon,
        weight_decay=config.weight_decay,
        init_type=config.init_type,
        activation=config.activation
    )

    # Train the model
    model.training(
        x_train=x_train,
        y_train=y_train_1h,
        x_val=x_val,
        y_val=y_val_1h,
        epochs=config.epochs,
        batch_size=config.batch_size
    )

    #Evaluation on test set
    test_preds = model.predict(x_test)
    test_labels = np.argmax(y_test_1h, axis=1)
    test_acc = np.mean(test_preds == test_labels)
    
    wandb.log({"test_accuracy": test_acc})
    print("test accuracy ",test_acc)


# sweep configuration
sweep_config = {
    "method": "random", 
    "metric": {
        "name": "validation_accuracy",
        "goal": "maximize"
    },
    "parameters": {
        "epochs": {"values": [10]},
        "num_hidden_layers": {"values": [3,5]},
        "hidden_size": {"values": [64, 128]},
        "weight_decay": {"values": [0.0, 0.0005]},
        "learning_rate": {"values": [1e-3, 1e-4]},
        "optimizer": {"values": ["momentum", "nesterov", "rmsprop", "adam", "nadam"]},
        "batch_size": {"values": [32]},
        "init_type": {"values": ["random", "xavier"]},
        "activation": {"values": ["sigmoid", "tanh", "relu"]},
        "momentum": {"values": [0.8, 0.9]},
        "beta1": {"values": [0.9]},
        "beta2": {"values": [0.999]},
        "beta_rms": {"values": [0.9]},
        "epsilon": {"values": [1e-8]}
    }
}

# Running the sweep

if __name__ == "__main__":
    # Creating sweep
    sweep_id = wandb.sweep(sweep_config, project="q4_sweep_project")
    # Launching sweep agent
    wandb.agent(sweep_id, function=train_sweep)


[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


Create sweep with ID: p60nv46x
Sweep URL: https://wandb.ai/ed24s401-indian-institute-of-technology-madras/q4_sweep_project/sweeps/p60nv46x


[34m[1mwandb[0m: Agent Starting Run: 983zh0zp with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: Currently logged in as: [33med24s401[0m ([33med24s401-indian-institute-of-technology-madras[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Epoch 1/10 - loss=0.5553, val_acc=0.8282, val_loss=0.4502716316380152
Epoch 2/10 - loss=0.4163, val_acc=0.8480, val_loss=0.42432164110251913
Epoch 3/10 - loss=0.3919, val_acc=0.8623, val_loss=0.3812924125999896
Epoch 4/10 - loss=0.3840, val_acc=0.8690, val_loss=0.4012169075061982
Epoch 5/10 - loss=0.3837, val_acc=0.8687, val_loss=0.39931507821164314
Epoch 6/10 - loss=0.3857, val_acc=0.8615, val_loss=0.40627183501358344
Epoch 7/10 - loss=0.3884, val_acc=0.8632, val_loss=0.4016074682796713
Epoch 8/10 - loss=0.3928, val_acc=0.8398, val_loss=0.5069759598151149
Epoch 9/10 - loss=0.3916, val_acc=0.8578, val_loss=0.4444773724894061
Epoch 10/10 - loss=0.3890, val_acc=0.8718, val_loss=0.42694463804726085
test accuracy  0.8656


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▂▁▁▁▁▁▁▁▁
validation loss,▅▃▁▂▂▂▂█▅▄
validation_accuracy,▁▄▆█▇▆▇▃▆█

0,1
epoch,10.0
test_accuracy,0.8656
training_loss,0.38904
validation loss,0.42694
validation_accuracy,0.87183


[34m[1mwandb[0m: Agent Starting Run: uovgr4vw with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 1/10 - loss=0.8834, val_acc=0.8267, val_loss=0.47538230296875705
Epoch 2/10 - loss=0.6570, val_acc=0.8558, val_loss=0.39825404014473625
Epoch 3/10 - loss=0.6084, val_acc=0.8575, val_loss=0.39168677438194544
Epoch 4/10 - loss=0.5783, val_acc=0.8637, val_loss=0.36085865300128966
Epoch 5/10 - loss=0.5563, val_acc=0.8707, val_loss=0.3545870458846383
Epoch 6/10 - loss=0.5377, val_acc=0.8703, val_loss=0.34473827226874093
Epoch 7/10 - loss=0.5217, val_acc=0.8723, val_loss=0.3401226195617471
Epoch 8/10 - loss=0.5085, val_acc=0.8760, val_loss=0.33950222180277234
Epoch 9/10 - loss=0.4963, val_acc=0.8757, val_loss=0.3220967424473837
Epoch 10/10 - loss=0.4843, val_acc=0.8688, val_loss=0.343127294585632
test accuracy  0.8675


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▄▃▃▂▂▂▁▁▁
validation loss,█▄▄▃▂▂▂▂▁▂
validation_accuracy,▁▅▅▆▇▇▇██▇

0,1
epoch,10.0
test_accuracy,0.8675
training_loss,0.48426
validation loss,0.34313
validation_accuracy,0.86883


[34m[1mwandb[0m: Agent Starting Run: q3c3z15h with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: nesterov
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/10 - loss=1.3440, val_acc=0.7082, val_loss=0.9073662886684932
Epoch 2/10 - loss=0.7750, val_acc=0.7633, val_loss=0.7138991853294984
Epoch 3/10 - loss=0.6476, val_acc=0.7920, val_loss=0.6288609369578053
Epoch 4/10 - loss=0.5838, val_acc=0.7983, val_loss=0.5806358211738426
Epoch 5/10 - loss=0.5445, val_acc=0.8093, val_loss=0.5485756062703193
Epoch 6/10 - loss=0.5190, val_acc=0.8168, val_loss=0.5264923467970231
Epoch 7/10 - loss=0.4991, val_acc=0.8190, val_loss=0.5119154577780011
Epoch 8/10 - loss=0.4842, val_acc=0.8245, val_loss=0.4939587734489917
Epoch 9/10 - loss=0.4717, val_acc=0.8273, val_loss=0.48385537928223477
Epoch 10/10 - loss=0.4610, val_acc=0.8283, val_loss=0.477346511452069
test accuracy  0.8235


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▃▂▂▂▁▁▁▁▁
validation loss,█▅▃▃▂▂▂▁▁▁
validation_accuracy,▁▄▆▆▇▇▇███

0,1
epoch,10.0
test_accuracy,0.8235
training_loss,0.46102
validation loss,0.47735
validation_accuracy,0.82833


[34m[1mwandb[0m: Agent Starting Run: 9he8lidr with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 1/10 - loss=1.6901, val_acc=0.6608, val_loss=1.0849220550864034
Epoch 2/10 - loss=1.0849, val_acc=0.7203, val_loss=0.8209892621538978
Epoch 3/10 - loss=0.9088, val_acc=0.7500, val_loss=0.7014003415657878
Epoch 4/10 - loss=0.8141, val_acc=0.7800, val_loss=0.6265385215766032
Epoch 5/10 - loss=0.7525, val_acc=0.7970, val_loss=0.5769467214846653
Epoch 6/10 - loss=0.7115, val_acc=0.8092, val_loss=0.5440950830985022
Epoch 7/10 - loss=0.6837, val_acc=0.8172, val_loss=0.520259758321714
Epoch 8/10 - loss=0.6639, val_acc=0.8195, val_loss=0.5060898067384088
Epoch 9/10 - loss=0.6491, val_acc=0.8228, val_loss=0.4920164784575944
Epoch 10/10 - loss=0.6372, val_acc=0.8255, val_loss=0.4804385771582166
test accuracy  0.8225


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▄▃▂▂▁▁▁▁▁
validation loss,█▅▄▃▂▂▁▁▁▁
validation_accuracy,▁▄▅▆▇▇████

0,1
epoch,10.0
test_accuracy,0.8225
training_loss,0.63718
validation loss,0.48044
validation_accuracy,0.8255


[34m[1mwandb[0m: Agent Starting Run: 06fllqqi with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	momentum: 0.8
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/10 - loss=0.6100, val_acc=0.8407, val_loss=0.44123077796022897
Epoch 2/10 - loss=0.4132, val_acc=0.8550, val_loss=0.4002993521066746
Epoch 3/10 - loss=0.3807, val_acc=0.8495, val_loss=0.40426051903768817
Epoch 4/10 - loss=0.3577, val_acc=0.8613, val_loss=0.3784133103676986
Epoch 5/10 - loss=0.3424, val_acc=0.8652, val_loss=0.35747088755349204
Epoch 6/10 - loss=0.3284, val_acc=0.8642, val_loss=0.3588343176087859
Epoch 7/10 - loss=0.3184, val_acc=0.8747, val_loss=0.33800688875213775
Epoch 8/10 - loss=0.3071, val_acc=0.8792, val_loss=0.33343892668284936
Epoch 9/10 - loss=0.3005, val_acc=0.8750, val_loss=0.33113935830790064
Epoch 10/10 - loss=0.2907, val_acc=0.8742, val_loss=0.34139463457914676
test accuracy  0.8719


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▄▃▂▂▂▂▁▁▁
validation loss,█▅▆▄▃▃▁▁▁▂
validation_accuracy,▁▄▃▅▅▅▇█▇▇

0,1
epoch,10.0
test_accuracy,0.8719
training_loss,0.29069
validation loss,0.34139
validation_accuracy,0.87417


[34m[1mwandb[0m: Agent Starting Run: c283qtl2 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/10 - loss=0.5316, val_acc=0.8408, val_loss=0.4262506810276712
Epoch 2/10 - loss=0.3910, val_acc=0.8612, val_loss=0.38029229947575316
Epoch 3/10 - loss=0.3584, val_acc=0.8673, val_loss=0.35333921797827467
Epoch 4/10 - loss=0.3346, val_acc=0.8703, val_loss=0.35265591099771987
Epoch 5/10 - loss=0.3207, val_acc=0.8672, val_loss=0.3440293440400351
Epoch 6/10 - loss=0.3072, val_acc=0.8673, val_loss=0.3479996395814301
Epoch 7/10 - loss=0.2944, val_acc=0.8773, val_loss=0.3374566103018656
Epoch 8/10 - loss=0.2859, val_acc=0.8805, val_loss=0.3280997443162233
Epoch 9/10 - loss=0.2771, val_acc=0.8798, val_loss=0.3222661801201401
Epoch 10/10 - loss=0.2679, val_acc=0.8777, val_loss=0.33938835640725173
test accuracy  0.8716


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▄▃▃▂▂▂▁▁▁
validation loss,█▅▃▃▂▃▂▁▁▂
validation_accuracy,▁▅▆▆▆▆▇██▇

0,1
epoch,10.0
test_accuracy,0.8716
training_loss,0.26789
validation loss,0.33939
validation_accuracy,0.87767


[34m[1mwandb[0m: Agent Starting Run: 87e7mxie with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 1/10 - loss=1.6882, val_acc=0.6713, val_loss=0.9829690042692873
Epoch 2/10 - loss=1.0144, val_acc=0.7610, val_loss=0.7127878521082422
Epoch 3/10 - loss=0.8510, val_acc=0.7903, val_loss=0.6126208839811959
Epoch 4/10 - loss=0.7720, val_acc=0.8055, val_loss=0.556748979053276
Epoch 5/10 - loss=0.7221, val_acc=0.8183, val_loss=0.5202245598817438
Epoch 6/10 - loss=0.6901, val_acc=0.8305, val_loss=0.4956543569604034
Epoch 7/10 - loss=0.6667, val_acc=0.8338, val_loss=0.4803127923235173
Epoch 8/10 - loss=0.6491, val_acc=0.8340, val_loss=0.46663277474079934
Epoch 9/10 - loss=0.6347, val_acc=0.8390, val_loss=0.45545805654147564
Epoch 10/10 - loss=0.6227, val_acc=0.8425, val_loss=0.44658549176739265
test accuracy  0.836


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▄▂▂▂▁▁▁▁▁
validation loss,█▄▃▂▂▂▁▁▁▁
validation_accuracy,▁▅▆▆▇█████

0,1
epoch,10.0
test_accuracy,0.836
training_loss,0.62273
validation loss,0.44659
validation_accuracy,0.8425


[34m[1mwandb[0m: Agent Starting Run: pxrx0b96 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	momentum: 0.8
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/10 - loss=0.4830, val_acc=0.8525, val_loss=0.3915567499634833
Epoch 2/10 - loss=0.3673, val_acc=0.8642, val_loss=0.3709101547896698
Epoch 3/10 - loss=0.3353, val_acc=0.8743, val_loss=0.3340325507608215
Epoch 4/10 - loss=0.3147, val_acc=0.8802, val_loss=0.32716974497891876
Epoch 5/10 - loss=0.2985, val_acc=0.8735, val_loss=0.3279797500329235
Epoch 6/10 - loss=0.2853, val_acc=0.8817, val_loss=0.3249539008688805
Epoch 7/10 - loss=0.2745, val_acc=0.8837, val_loss=0.32202964045447513
Epoch 8/10 - loss=0.2651, val_acc=0.8832, val_loss=0.30925817110188275
Epoch 9/10 - loss=0.2554, val_acc=0.8857, val_loss=0.3108576969021752
Epoch 10/10 - loss=0.2466, val_acc=0.8797, val_loss=0.3196521121128617
test accuracy  0.8726


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▅▄▃▃▂▂▂▁▁
validation loss,█▆▃▃▃▂▂▁▁▂
validation_accuracy,▁▃▆▇▅▇█▇█▇

0,1
epoch,10.0
test_accuracy,0.8726
training_loss,0.24655
validation loss,0.31965
validation_accuracy,0.87967


[34m[1mwandb[0m: Agent Starting Run: gag8ms48 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: nesterov
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 1/10 - loss=2.4132, val_acc=0.1768, val_loss=2.292150293765403
Epoch 2/10 - loss=2.3905, val_acc=0.2853, val_loss=2.2861204554435686
Epoch 3/10 - loss=2.3842, val_acc=0.2873, val_loss=2.2802184838898456
Epoch 4/10 - loss=2.3777, val_acc=0.4022, val_loss=2.2732270674223356
Epoch 5/10 - loss=2.3706, val_acc=0.4557, val_loss=2.2657527965506956
Epoch 6/10 - loss=2.3627, val_acc=0.4705, val_loss=2.2576536299632615
Epoch 7/10 - loss=2.3540, val_acc=0.4965, val_loss=2.2483936568820377
Epoch 8/10 - loss=2.3440, val_acc=0.4177, val_loss=2.2378449647951
Epoch 9/10 - loss=2.3327, val_acc=0.4445, val_loss=2.2254009518067543
Epoch 10/10 - loss=2.3195, val_acc=0.4220, val_loss=2.2110843586417293
test accuracy  0.4219


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▆▆▅▅▄▄▃▂▁
validation loss,█▇▇▆▆▅▄▃▂▁
validation_accuracy,▁▃▃▆▇▇█▆▇▆

0,1
epoch,10.0
test_accuracy,0.4219
training_loss,2.31946
validation loss,2.21108
validation_accuracy,0.422


[34m[1mwandb[0m: Agent Starting Run: 8np5n7m6 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/10 - loss=0.5553, val_acc=0.8282, val_loss=0.4502716316380152
Epoch 2/10 - loss=0.4163, val_acc=0.8480, val_loss=0.42432164110251913
Epoch 3/10 - loss=0.3919, val_acc=0.8623, val_loss=0.3812924125999896
Epoch 4/10 - loss=0.3840, val_acc=0.8690, val_loss=0.4012169075061982
Epoch 5/10 - loss=0.3837, val_acc=0.8687, val_loss=0.39931507821164314
Epoch 6/10 - loss=0.3857, val_acc=0.8615, val_loss=0.40627183501358344
Epoch 7/10 - loss=0.3884, val_acc=0.8632, val_loss=0.4016074682796713
Epoch 8/10 - loss=0.3928, val_acc=0.8398, val_loss=0.5069759598151149
Epoch 9/10 - loss=0.3916, val_acc=0.8578, val_loss=0.4444773724894061
Epoch 10/10 - loss=0.3890, val_acc=0.8718, val_loss=0.42694463804726085
test accuracy  0.8656


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▂▁▁▁▁▁▁▁▁
validation loss,▅▃▁▂▂▂▂█▅▄
validation_accuracy,▁▄▆█▇▆▇▃▆█

0,1
epoch,10.0
test_accuracy,0.8656
training_loss,0.38904
validation loss,0.42694
validation_accuracy,0.87183


[34m[1mwandb[0m: Agent Starting Run: d7kc72pl with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	momentum: 0.8
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/10 - loss=1.8201, val_acc=0.6285, val_loss=1.4182794127681342
Epoch 2/10 - loss=1.2176, val_acc=0.6615, val_loss=1.0795495502435692
Epoch 3/10 - loss=0.9810, val_acc=0.6913, val_loss=0.916333560681336
Epoch 4/10 - loss=0.8531, val_acc=0.7242, val_loss=0.8176308782402324
Epoch 5/10 - loss=0.7706, val_acc=0.7383, val_loss=0.7493140426217068
Epoch 6/10 - loss=0.7109, val_acc=0.7557, val_loss=0.6978554089214497
Epoch 7/10 - loss=0.6642, val_acc=0.7702, val_loss=0.6565216775726654
Epoch 8/10 - loss=0.6268, val_acc=0.7792, val_loss=0.6239664273975669
Epoch 9/10 - loss=0.5962, val_acc=0.7862, val_loss=0.5977701306216459
Epoch 10/10 - loss=0.5711, val_acc=0.7950, val_loss=0.5747982165197526
test accuracy  0.7966


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▅▃▃▂▂▂▁▁▁
validation loss,█▅▄▃▂▂▂▁▁▁
validation_accuracy,▁▂▄▅▆▆▇▇██

0,1
epoch,10.0
test_accuracy,0.7966
training_loss,0.57108
validation loss,0.5748
validation_accuracy,0.795


[34m[1mwandb[0m: Agent Starting Run: b2u3g9u7 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 1/10 - loss=1.4049, val_acc=0.6580, val_loss=0.8802240088826195
Epoch 2/10 - loss=0.9893, val_acc=0.6897, val_loss=0.7898220231309623
Epoch 3/10 - loss=0.9177, val_acc=0.7102, val_loss=0.7576589613292878
Epoch 4/10 - loss=0.8454, val_acc=0.7783, val_loss=0.6096843799087401
Epoch 5/10 - loss=0.7688, val_acc=0.8012, val_loss=0.5732428096530953
Epoch 6/10 - loss=0.7143, val_acc=0.8263, val_loss=0.51107325815624
Epoch 7/10 - loss=0.6963, val_acc=0.8280, val_loss=0.48082160792455714
Epoch 8/10 - loss=0.6796, val_acc=0.8390, val_loss=0.4740562301790807
Epoch 9/10 - loss=0.6668, val_acc=0.8388, val_loss=0.4716351772123521
Epoch 10/10 - loss=0.6625, val_acc=0.8365, val_loss=0.4660809467256131
test accuracy  0.8336


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▄▃▃▂▁▁▁▁▁
validation loss,█▆▆▃▃▂▁▁▁▁
validation_accuracy,▁▂▃▆▇█████

0,1
epoch,10.0
test_accuracy,0.8336
training_loss,0.66249
validation loss,0.46608
validation_accuracy,0.8365


[34m[1mwandb[0m: Agent Starting Run: rzj40vla with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	momentum: 0.8
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 1/10 - loss=0.8189, val_acc=0.8330, val_loss=0.47288786305722436
Epoch 2/10 - loss=0.5876, val_acc=0.8490, val_loss=0.42048123166841916
Epoch 3/10 - loss=0.5446, val_acc=0.8553, val_loss=0.40074556277352696
Epoch 4/10 - loss=0.5177, val_acc=0.8617, val_loss=0.37286030160037376
Epoch 5/10 - loss=0.4973, val_acc=0.8620, val_loss=0.3725171362276269
Epoch 6/10 - loss=0.4816, val_acc=0.8667, val_loss=0.3578653649249147
Epoch 7/10 - loss=0.4685, val_acc=0.8707, val_loss=0.34814295280925867
Epoch 8/10 - loss=0.4573, val_acc=0.8785, val_loss=0.3339394323350977
Epoch 9/10 - loss=0.4464, val_acc=0.8735, val_loss=0.3362757898576429
Epoch 10/10 - loss=0.4362, val_acc=0.8832, val_loss=0.3229743905907827
test accuracy  0.876


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▄▃▂▂▂▂▁▁▁
validation loss,█▆▅▃▃▃▂▂▂▁
validation_accuracy,▁▃▄▅▅▆▆▇▇█

0,1
epoch,10.0
test_accuracy,0.876
training_loss,0.43622
validation loss,0.32297
validation_accuracy,0.88317


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 1ybr11n1 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	momentum: 0.8
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/10 - loss=0.6495, val_acc=0.8287, val_loss=0.4716594105169914
Epoch 2/10 - loss=0.4239, val_acc=0.8508, val_loss=0.4094628426307846
Epoch 3/10 - loss=0.3844, val_acc=0.8553, val_loss=0.4017706956811508
Epoch 4/10 - loss=0.3606, val_acc=0.8672, val_loss=0.36979783960650636
Epoch 5/10 - loss=0.3414, val_acc=0.8645, val_loss=0.3633434222988479
Epoch 6/10 - loss=0.3276, val_acc=0.8720, val_loss=0.3461213650769512
Epoch 7/10 - loss=0.3138, val_acc=0.8745, val_loss=0.339501928620412
Epoch 8/10 - loss=0.3028, val_acc=0.8777, val_loss=0.32613300740102114
Epoch 9/10 - loss=0.2913, val_acc=0.8803, val_loss=0.32298694665596944
Epoch 10/10 - loss=0.2815, val_acc=0.8817, val_loss=0.31899116400933514
test accuracy  0.8767


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▄▃▃▂▂▂▁▁▁
validation loss,█▅▅▃▃▂▂▁▁▁
validation_accuracy,▁▄▅▆▆▇▇▇██

0,1
epoch,10.0
test_accuracy,0.8767
training_loss,0.28153
validation loss,0.31899
validation_accuracy,0.88167


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: nglt2wjb with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: nesterov
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/10 - loss=0.8667, val_acc=0.8015, val_loss=0.5542136418987664
Epoch 2/10 - loss=0.4980, val_acc=0.8322, val_loss=0.46923135082256345
Epoch 3/10 - loss=0.4471, val_acc=0.8422, val_loss=0.4511293157104777
Epoch 4/10 - loss=0.4176, val_acc=0.8525, val_loss=0.4181017991411145
Epoch 5/10 - loss=0.3947, val_acc=0.8477, val_loss=0.4115344210252111
Epoch 6/10 - loss=0.3807, val_acc=0.8617, val_loss=0.3815295543177438
Epoch 7/10 - loss=0.3656, val_acc=0.8640, val_loss=0.3686995210007295
Epoch 8/10 - loss=0.3543, val_acc=0.8678, val_loss=0.35666497829869853
Epoch 9/10 - loss=0.3424, val_acc=0.8685, val_loss=0.357329174166745
Epoch 10/10 - loss=0.3330, val_acc=0.8713, val_loss=0.34871448679526496
test accuracy  0.8641


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▃▂▂▂▂▁▁▁▁
validation loss,█▅▄▃▃▂▂▁▁▁
validation_accuracy,▁▄▅▆▆▇▇███

0,1
epoch,10.0
test_accuracy,0.8641
training_loss,0.33302
validation loss,0.34871
validation_accuracy,0.87133


[34m[1mwandb[0m: Agent Starting Run: 4nee0p0z with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: nesterov
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 1/10 - loss=1.6906, val_acc=0.6823, val_loss=0.9048527284554221
Epoch 2/10 - loss=0.9201, val_acc=0.7538, val_loss=0.6892007252587247
Epoch 3/10 - loss=0.7928, val_acc=0.7907, val_loss=0.6004321967752192
Epoch 4/10 - loss=0.7314, val_acc=0.8097, val_loss=0.5544426119523772
Epoch 5/10 - loss=0.6948, val_acc=0.8142, val_loss=0.5367966445109027
Epoch 6/10 - loss=0.6701, val_acc=0.8202, val_loss=0.5019612522504148
Epoch 7/10 - loss=0.6502, val_acc=0.8268, val_loss=0.4903386301804005
Epoch 8/10 - loss=0.6349, val_acc=0.8333, val_loss=0.47504550103156346
Epoch 9/10 - loss=0.6222, val_acc=0.8348, val_loss=0.46388152454563153
Epoch 10/10 - loss=0.6111, val_acc=0.8340, val_loss=0.4662693691200872
test accuracy  0.8249


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▃▂▂▂▁▁▁▁▁
validation loss,█▅▃▂▂▂▁▁▁▁
validation_accuracy,▁▄▆▇▇▇████

0,1
epoch,10.0
test_accuracy,0.8249
training_loss,0.6111
validation loss,0.46627
validation_accuracy,0.834


[34m[1mwandb[0m: Agent Starting Run: sfanokm3 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/10 - loss=1.6146, val_acc=0.5805, val_loss=1.0941827307565963
Epoch 2/10 - loss=0.9099, val_acc=0.7280, val_loss=0.794222893856428
Epoch 3/10 - loss=0.6987, val_acc=0.7647, val_loss=0.6553949047708305
Epoch 4/10 - loss=0.6043, val_acc=0.7820, val_loss=0.5938219521646441
Epoch 5/10 - loss=0.5539, val_acc=0.7912, val_loss=0.5514526221307969
Epoch 6/10 - loss=0.5190, val_acc=0.8093, val_loss=0.5189406094148027
Epoch 7/10 - loss=0.4893, val_acc=0.8210, val_loss=0.4933283969571248
Epoch 8/10 - loss=0.4646, val_acc=0.8292, val_loss=0.47173530711730616
Epoch 9/10 - loss=0.4451, val_acc=0.8373, val_loss=0.4527234083301381
Epoch 10/10 - loss=0.4291, val_acc=0.8402, val_loss=0.4391439940109482
test accuracy  0.84


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▄▃▂▂▂▁▁▁▁
validation loss,█▅▃▃▂▂▂▁▁▁
validation_accuracy,▁▅▆▆▇▇▇███

0,1
epoch,10.0
test_accuracy,0.84
training_loss,0.4291
validation loss,0.43914
validation_accuracy,0.84017


[34m[1mwandb[0m: Agent Starting Run: 4i0s7br5 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 1/10 - loss=0.6652, val_acc=0.8530, val_loss=0.40190003921103834
Epoch 2/10 - loss=0.5329, val_acc=0.8558, val_loss=0.38664011381065483
Epoch 3/10 - loss=0.4909, val_acc=0.8675, val_loss=0.36779681542488407
Epoch 4/10 - loss=0.4683, val_acc=0.8720, val_loss=0.34719066558647804
Epoch 5/10 - loss=0.4491, val_acc=0.8630, val_loss=0.36899379085688844
Epoch 6/10 - loss=0.4389, val_acc=0.8733, val_loss=0.35014310054714476
Epoch 7/10 - loss=0.4329, val_acc=0.8735, val_loss=0.349062959071303
Epoch 8/10 - loss=0.4259, val_acc=0.8763, val_loss=0.3396814481277801
Epoch 9/10 - loss=0.4161, val_acc=0.8660, val_loss=0.3612485302096555
Epoch 10/10 - loss=0.4126, val_acc=0.8693, val_loss=0.34604696393793954
test accuracy  0.8623


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▄▃▃▂▂▂▁▁▁
validation loss,█▆▄▂▄▂▂▁▃▂
validation_accuracy,▁▂▅▇▄▇▇█▅▆

0,1
epoch,10.0
test_accuracy,0.8623
training_loss,0.41263
validation loss,0.34605
validation_accuracy,0.86933


[34m[1mwandb[0m: Agent Starting Run: l1cwwmqk with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/10 - loss=0.5235, val_acc=0.8253, val_loss=0.4633600436704045
Epoch 2/10 - loss=0.3967, val_acc=0.8488, val_loss=0.40709307504086384
Epoch 3/10 - loss=0.3610, val_acc=0.8640, val_loss=0.3750663852004295
Epoch 4/10 - loss=0.3397, val_acc=0.8732, val_loss=0.3487636518233484
Epoch 5/10 - loss=0.3226, val_acc=0.8778, val_loss=0.33550856993749373
Epoch 6/10 - loss=0.3093, val_acc=0.8700, val_loss=0.3507626464132797
Epoch 7/10 - loss=0.2964, val_acc=0.8770, val_loss=0.3380899762526547
Epoch 8/10 - loss=0.2882, val_acc=0.8720, val_loss=0.35602823451083776
Epoch 9/10 - loss=0.2785, val_acc=0.8805, val_loss=0.3330456849676104
Epoch 10/10 - loss=0.2719, val_acc=0.8753, val_loss=0.34319352018095023
test accuracy  0.8643


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▄▃▃▂▂▂▁▁▁
validation loss,█▅▃▂▁▂▁▂▁▂
validation_accuracy,▁▄▆▇█▇█▇█▇

0,1
epoch,10.0
test_accuracy,0.8643
training_loss,0.27193
validation loss,0.34319
validation_accuracy,0.87533


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: qxmrbax7 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	momentum: 0.8
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 1/10 - loss=0.5989, val_acc=0.8478, val_loss=0.41970906043412487
Epoch 2/10 - loss=0.4915, val_acc=0.8545, val_loss=0.392838231025159
Epoch 3/10 - loss=0.4646, val_acc=0.8608, val_loss=0.37799624343090227
Epoch 4/10 - loss=0.4496, val_acc=0.8642, val_loss=0.36305117779936236
Epoch 5/10 - loss=0.4352, val_acc=0.8605, val_loss=0.3680567154066666
Epoch 6/10 - loss=0.4299, val_acc=0.8705, val_loss=0.35336949691059216
Epoch 7/10 - loss=0.4217, val_acc=0.8715, val_loss=0.35497743564564316
Epoch 8/10 - loss=0.4179, val_acc=0.8727, val_loss=0.3404028179054465
Epoch 9/10 - loss=0.4078, val_acc=0.8598, val_loss=0.3717643313340888
Epoch 10/10 - loss=0.4033, val_acc=0.8638, val_loss=0.357059538077422
test accuracy  0.8601


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▄▃▃▂▂▂▂▁▁
validation loss,█▆▄▃▃▂▂▁▄▂
validation_accuracy,▁▃▅▆▅▇██▄▆

0,1
epoch,10.0
test_accuracy,0.8601
training_loss,0.40332
validation loss,0.35706
validation_accuracy,0.86383


[34m[1mwandb[0m: Agent Starting Run: bq0xhfvs with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	momentum: 0.8
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/10 - loss=0.7384, val_acc=0.8442, val_loss=0.44929709339667245
Epoch 2/10 - loss=0.4116, val_acc=0.8608, val_loss=0.39029753459721195
Epoch 3/10 - loss=0.3683, val_acc=0.8682, val_loss=0.3662082821915141
Epoch 4/10 - loss=0.3435, val_acc=0.8710, val_loss=0.3527548945868662
Epoch 5/10 - loss=0.3247, val_acc=0.8728, val_loss=0.3428160543798559
Epoch 6/10 - loss=0.3120, val_acc=0.8805, val_loss=0.33990519086044596
Epoch 7/10 - loss=0.2998, val_acc=0.8745, val_loss=0.3311118536134983
Epoch 8/10 - loss=0.2903, val_acc=0.8793, val_loss=0.3258548674831543
Epoch 9/10 - loss=0.2794, val_acc=0.8792, val_loss=0.3244729602770525
Epoch 10/10 - loss=0.2721, val_acc=0.8835, val_loss=0.3205575965837035
test accuracy  0.8758


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▃▂▂▂▂▁▁▁▁
validation loss,█▅▃▃▂▂▂▁▁▁
validation_accuracy,▁▄▅▆▆▇▆▇▇█

0,1
epoch,10.0
test_accuracy,0.8758
training_loss,0.2721
validation loss,0.32056
validation_accuracy,0.8835


[34m[1mwandb[0m: Agent Starting Run: 85yffuw1 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 1/10 - loss=2.0524, val_acc=0.6217, val_loss=1.4913677175989664
Epoch 2/10 - loss=1.2682, val_acc=0.6715, val_loss=0.9849910493824618
Epoch 3/10 - loss=0.9801, val_acc=0.7198, val_loss=0.8226347576227293
Epoch 4/10 - loss=0.8603, val_acc=0.7480, val_loss=0.7358059700852158
Epoch 5/10 - loss=0.7883, val_acc=0.7708, val_loss=0.6771725064066416
Epoch 6/10 - loss=0.7386, val_acc=0.7870, val_loss=0.6339038920989798
Epoch 7/10 - loss=0.7001, val_acc=0.7927, val_loss=0.6034520612606704
Epoch 8/10 - loss=0.6714, val_acc=0.8025, val_loss=0.573628052082561
Epoch 9/10 - loss=0.6487, val_acc=0.8082, val_loss=0.5555827518895411
Epoch 10/10 - loss=0.6310, val_acc=0.8082, val_loss=0.5431267292891337
test accuracy  0.8037


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▄▃▂▂▂▁▁▁▁
validation loss,█▄▃▂▂▂▁▁▁▁
validation_accuracy,▁▃▅▆▇▇▇███

0,1
epoch,10.0
test_accuracy,0.8037
training_loss,0.63097
validation loss,0.54313
validation_accuracy,0.80817


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: ukqf25xa with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	momentum: 0.8
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/10 - loss=2.0587, val_acc=0.3055, val_loss=1.7334853526327958
Epoch 2/10 - loss=1.5013, val_acc=0.4195, val_loss=1.2980689043578804
Epoch 3/10 - loss=1.1921, val_acc=0.5462, val_loss=1.1103010869695438
Epoch 4/10 - loss=1.0272, val_acc=0.6357, val_loss=0.969085243009809
Epoch 5/10 - loss=0.8923, val_acc=0.7052, val_loss=0.8468564462964443
Epoch 6/10 - loss=0.7658, val_acc=0.7503, val_loss=0.7294345944194273
Epoch 7/10 - loss=0.6720, val_acc=0.7675, val_loss=0.6637225598858318
Epoch 8/10 - loss=0.6176, val_acc=0.7807, val_loss=0.6171374223996476
Epoch 9/10 - loss=0.5803, val_acc=0.7902, val_loss=0.5846230379331021
Epoch 10/10 - loss=0.5521, val_acc=0.7958, val_loss=0.5613758420505413
test accuracy  0.793


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▅▄▃▃▂▂▁▁▁
validation loss,█▅▄▃▃▂▂▁▁▁
validation_accuracy,▁▃▄▆▇▇████

0,1
epoch,10.0
test_accuracy,0.793
training_loss,0.55205
validation loss,0.56138
validation_accuracy,0.79583


[34m[1mwandb[0m: Agent Starting Run: yfr81jrj with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/10 - loss=0.7868, val_acc=0.8167, val_loss=0.5217478342201655
Epoch 2/10 - loss=0.4816, val_acc=0.8377, val_loss=0.4585569569009179
Epoch 3/10 - loss=0.4367, val_acc=0.8437, val_loss=0.43797609699630824
Epoch 4/10 - loss=0.4112, val_acc=0.8547, val_loss=0.40707383224604626
Epoch 5/10 - loss=0.3929, val_acc=0.8593, val_loss=0.39868272907460245
Epoch 6/10 - loss=0.3794, val_acc=0.8622, val_loss=0.38730276633180966
Epoch 7/10 - loss=0.3672, val_acc=0.8595, val_loss=0.383965068278298
Epoch 8/10 - loss=0.3567, val_acc=0.8680, val_loss=0.36583353707208216
Epoch 9/10 - loss=0.3465, val_acc=0.8702, val_loss=0.35970920867590317
Epoch 10/10 - loss=0.3392, val_acc=0.8698, val_loss=0.3568636316582727
test accuracy  0.8618


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▃▃▂▂▂▁▁▁▁
validation loss,█▅▄▃▃▂▂▁▁▁
validation_accuracy,▁▄▅▆▇▇▇███

0,1
epoch,10.0
test_accuracy,0.8618
training_loss,0.33918
validation loss,0.35686
validation_accuracy,0.86983


[34m[1mwandb[0m: Agent Starting Run: tewg7q73 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	momentum: 0.8
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/10 - loss=0.8951, val_acc=0.7687, val_loss=0.6317323622913114
Epoch 2/10 - loss=0.5409, val_acc=0.8318, val_loss=0.4904202835232726
Epoch 3/10 - loss=0.4743, val_acc=0.8278, val_loss=0.49711713173726596
Epoch 4/10 - loss=0.4353, val_acc=0.8400, val_loss=0.4547748386550395
Epoch 5/10 - loss=0.4084, val_acc=0.8448, val_loss=0.42994919418965144
Epoch 6/10 - loss=0.3877, val_acc=0.8645, val_loss=0.38962028736764415
Epoch 7/10 - loss=0.3719, val_acc=0.8695, val_loss=0.3749814437184869
Epoch 8/10 - loss=0.3560, val_acc=0.8727, val_loss=0.36309485865762303
Epoch 9/10 - loss=0.3429, val_acc=0.8627, val_loss=0.38277883014484254
Epoch 10/10 - loss=0.3326, val_acc=0.8598, val_loss=0.3912337574388703
test accuracy  0.8539


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▄▃▂▂▂▁▁▁▁
validation loss,█▄▄▃▃▂▁▁▂▂
validation_accuracy,▁▅▅▆▆▇██▇▇

0,1
epoch,10.0
test_accuracy,0.8539
training_loss,0.33263
validation loss,0.39123
validation_accuracy,0.85983


[34m[1mwandb[0m: Agent Starting Run: lr95q2c4 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/10 - loss=0.5553, val_acc=0.8282, val_loss=0.4502716316380152
Epoch 2/10 - loss=0.4163, val_acc=0.8480, val_loss=0.42432164110251913
Epoch 3/10 - loss=0.3919, val_acc=0.8623, val_loss=0.3812924125999896
Epoch 4/10 - loss=0.3840, val_acc=0.8690, val_loss=0.4012169075061982
Epoch 5/10 - loss=0.3837, val_acc=0.8687, val_loss=0.39931507821164314
Epoch 6/10 - loss=0.3857, val_acc=0.8615, val_loss=0.40627183501358344
Epoch 7/10 - loss=0.3884, val_acc=0.8632, val_loss=0.4016074682796713
Epoch 8/10 - loss=0.3928, val_acc=0.8398, val_loss=0.5069759598151149
Epoch 9/10 - loss=0.3916, val_acc=0.8578, val_loss=0.4444773724894061
Epoch 10/10 - loss=0.3890, val_acc=0.8718, val_loss=0.42694463804726085
test accuracy  0.8656


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▂▁▁▁▁▁▁▁▁
validation loss,▅▃▁▂▂▂▂█▅▄
validation_accuracy,▁▄▆█▇▆▇▃▆█

0,1
epoch,10.0
test_accuracy,0.8656
training_loss,0.38904
validation loss,0.42694
validation_accuracy,0.87183


[34m[1mwandb[0m: Agent Starting Run: psoodmf7 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: nesterov
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 1/10 - loss=0.9458, val_acc=0.8307, val_loss=0.4717492848556397
Epoch 2/10 - loss=0.7505, val_acc=0.8492, val_loss=0.4149236866866515
Epoch 3/10 - loss=0.7053, val_acc=0.8497, val_loss=0.4047650881879877
Epoch 4/10 - loss=0.6761, val_acc=0.8537, val_loss=0.3858373777010057
Epoch 5/10 - loss=0.6542, val_acc=0.8605, val_loss=0.3692215402204152
Epoch 6/10 - loss=0.6355, val_acc=0.8732, val_loss=0.34359989486151715
Epoch 7/10 - loss=0.6203, val_acc=0.8643, val_loss=0.3541446593248155
Epoch 8/10 - loss=0.6051, val_acc=0.8683, val_loss=0.3538595572741048
Epoch 9/10 - loss=0.5931, val_acc=0.8763, val_loss=0.32662779599426184
Epoch 10/10 - loss=0.5808, val_acc=0.8738, val_loss=0.3445214310005897
test accuracy  0.8681


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▄▃▃▂▂▂▁▁▁
validation loss,█▅▅▄▃▂▂▂▁▂
validation_accuracy,▁▄▄▅▆█▆▇██

0,1
epoch,10.0
test_accuracy,0.8681
training_loss,0.58082
validation loss,0.34452
validation_accuracy,0.87383


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: x3e7fk3x with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	momentum: 0.8
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: nesterov
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 1/10 - loss=2.5374, val_acc=0.0985, val_loss=2.3121771854705244
Epoch 2/10 - loss=2.4683, val_acc=0.0565, val_loss=2.3028506409185963
Epoch 3/10 - loss=2.4657, val_acc=0.0985, val_loss=2.3025417670037673
Epoch 4/10 - loss=2.4655, val_acc=0.0985, val_loss=2.302450844586812
Epoch 5/10 - loss=2.4653, val_acc=0.1008, val_loss=2.3024113084779354
Epoch 6/10 - loss=2.4652, val_acc=0.1000, val_loss=2.3023695339661865
Epoch 7/10 - loss=2.4650, val_acc=0.1340, val_loss=2.302327853384874
Epoch 8/10 - loss=2.4649, val_acc=0.1265, val_loss=2.3022962478471762
Epoch 9/10 - loss=2.4647, val_acc=0.1000, val_loss=2.3022770195152655
Epoch 10/10 - loss=2.4646, val_acc=0.1513, val_loss=2.3022897082031553
test accuracy  0.1551


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▁▁▁▁▁▁▁▁▁
validation loss,█▁▁▁▁▁▁▁▁▁
validation_accuracy,▄▁▄▄▄▄▇▆▄█

0,1
epoch,10.0
test_accuracy,0.1551
training_loss,2.46456
validation loss,2.30229
validation_accuracy,0.15133


[34m[1mwandb[0m: Agent Starting Run: bm327x0t with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 1/10 - loss=2.3081, val_acc=0.2553, val_loss=2.1058923593780623
Epoch 2/10 - loss=1.9266, val_acc=0.4692, val_loss=1.5164345320150387
Epoch 3/10 - loss=1.3078, val_acc=0.6187, val_loss=1.0185960674297476
Epoch 4/10 - loss=0.9976, val_acc=0.6745, val_loss=0.855118582710457
Epoch 5/10 - loss=0.8832, val_acc=0.7007, val_loss=0.7821122982786574
Epoch 6/10 - loss=0.8224, val_acc=0.7300, val_loss=0.7270636160485401
Epoch 7/10 - loss=0.7783, val_acc=0.7520, val_loss=0.687869736322436
Epoch 8/10 - loss=0.7408, val_acc=0.7727, val_loss=0.6551448341213724
Epoch 9/10 - loss=0.7082, val_acc=0.7858, val_loss=0.6188450660197422
Epoch 10/10 - loss=0.6775, val_acc=0.7843, val_loss=0.6039336734814228
test accuracy  0.7819


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▆▄▂▂▂▁▁▁▁
validation loss,█▅▃▂▂▂▁▁▁▁
validation_accuracy,▁▄▆▇▇▇████

0,1
epoch,10.0
test_accuracy,0.7819
training_loss,0.67755
validation loss,0.60393
validation_accuracy,0.78433


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: kc11tucu with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/10 - loss=1.9537, val_acc=0.6215, val_loss=1.4895556826599456
Epoch 2/10 - loss=1.1672, val_acc=0.6715, val_loss=0.9832608663296118
Epoch 3/10 - loss=0.8780, val_acc=0.7203, val_loss=0.8208723950380293
Epoch 4/10 - loss=0.7574, val_acc=0.7483, val_loss=0.7339664405128069
Epoch 5/10 - loss=0.6849, val_acc=0.7712, val_loss=0.6752168550212373
Epoch 6/10 - loss=0.6346, val_acc=0.7877, val_loss=0.6319089450692479
Epoch 7/10 - loss=0.5957, val_acc=0.7935, val_loss=0.6014234778546999
Epoch 8/10 - loss=0.5667, val_acc=0.8033, val_loss=0.5716297730551
Epoch 9/10 - loss=0.5439, val_acc=0.8087, val_loss=0.5536474427258639
Epoch 10/10 - loss=0.5260, val_acc=0.8082, val_loss=0.541292236940221
test accuracy  0.8042


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▄▃▂▂▂▁▁▁▁
validation loss,█▄▃▂▂▂▁▁▁▁
validation_accuracy,▁▃▅▆▇▇▇███

0,1
epoch,10.0
test_accuracy,0.8042
training_loss,0.526
validation loss,0.54129
validation_accuracy,0.80817


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: h087iat4 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/10 - loss=2.3062, val_acc=0.0977, val_loss=2.303974915229481
Epoch 2/10 - loss=2.3041, val_acc=0.1013, val_loss=2.303352820200559
Epoch 3/10 - loss=2.3038, val_acc=0.0985, val_loss=2.3053607235277527
Epoch 4/10 - loss=2.3041, val_acc=0.0977, val_loss=2.3034478026941754
Epoch 5/10 - loss=2.3038, val_acc=0.1013, val_loss=2.304703166190963
Epoch 6/10 - loss=2.3038, val_acc=0.1000, val_loss=2.303048407506977
Epoch 7/10 - loss=2.3039, val_acc=0.1013, val_loss=2.3030503863014564
Epoch 8/10 - loss=2.3037, val_acc=0.1000, val_loss=2.302993919066203
Epoch 9/10 - loss=2.3039, val_acc=0.1018, val_loss=2.303747821363612
Epoch 10/10 - loss=2.3036, val_acc=0.1000, val_loss=2.3042547144259826
test accuracy  0.1


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▃▂▂▂▂▂▂▂▁
validation loss,▄▂█▂▆▁▁▁▃▅
validation_accuracy,▁▇▂▁▇▅▇▅█▅

0,1
epoch,10.0
test_accuracy,0.1
training_loss,2.30355
validation loss,2.30425
validation_accuracy,0.1


[34m[1mwandb[0m: Agent Starting Run: b280jmqr with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	momentum: 0.8
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: nesterov
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/10 - loss=2.3069, val_acc=0.0977, val_loss=2.3039294933728303
Epoch 2/10 - loss=2.3041, val_acc=0.1145, val_loss=2.3036475947900055
Epoch 3/10 - loss=2.3041, val_acc=0.0977, val_loss=2.3040859922048162
Epoch 4/10 - loss=2.3041, val_acc=0.0985, val_loss=2.302757747155341
Epoch 5/10 - loss=2.3041, val_acc=0.1023, val_loss=2.3030441769201704
Epoch 6/10 - loss=2.3040, val_acc=0.0985, val_loss=2.3032501931688043
Epoch 7/10 - loss=2.3039, val_acc=0.1000, val_loss=2.3035614843626755
Epoch 8/10 - loss=2.3039, val_acc=0.0977, val_loss=2.3051427433363156
Epoch 9/10 - loss=2.3038, val_acc=0.1000, val_loss=2.303022491236831
Epoch 10/10 - loss=2.3038, val_acc=0.0995, val_loss=2.3036126237574437
test accuracy  0.1


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▂▂▂▂▂▁▁▁▁
validation loss,▄▄▅▁▂▂▃█▂▄
validation_accuracy,▁█▁▁▃▁▂▁▂▂

0,1
epoch,10.0
test_accuracy,0.1
training_loss,2.30375
validation loss,2.30361
validation_accuracy,0.0995


[34m[1mwandb[0m: Agent Starting Run: d1v9slx6 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	momentum: 0.8
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: nesterov
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/10 - loss=1.4495, val_acc=0.6555, val_loss=1.1057567094704246
Epoch 2/10 - loss=0.9782, val_acc=0.7152, val_loss=0.8925417709375482
Epoch 3/10 - loss=0.8300, val_acc=0.7400, val_loss=0.7911863218934891
Epoch 4/10 - loss=0.7487, val_acc=0.7555, val_loss=0.7268907522606611
Epoch 5/10 - loss=0.6951, val_acc=0.7690, val_loss=0.6820783325512757
Epoch 6/10 - loss=0.6563, val_acc=0.7782, val_loss=0.6492531203908745
Epoch 7/10 - loss=0.6266, val_acc=0.7862, val_loss=0.623056697578619
Epoch 8/10 - loss=0.6028, val_acc=0.7905, val_loss=0.6022458178492165
Epoch 9/10 - loss=0.5833, val_acc=0.7955, val_loss=0.5840648765722177
Epoch 10/10 - loss=0.5671, val_acc=0.7995, val_loss=0.5694665725577399
test accuracy  0.7986


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▄▃▂▂▂▁▁▁▁
validation loss,█▅▄▃▂▂▂▁▁▁
validation_accuracy,▁▄▅▆▇▇▇███

0,1
epoch,10.0
test_accuracy,0.7986
training_loss,0.56709
validation loss,0.56947
validation_accuracy,0.7995


[34m[1mwandb[0m: Agent Starting Run: 0t1d1fpq with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: nesterov
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/10 - loss=0.6975, val_acc=0.8257, val_loss=0.4909688508627646
Epoch 2/10 - loss=0.4701, val_acc=0.8468, val_loss=0.4404039728104688
Epoch 3/10 - loss=0.4268, val_acc=0.8507, val_loss=0.4185324033728051
Epoch 4/10 - loss=0.4017, val_acc=0.8615, val_loss=0.3897991543178661
Epoch 5/10 - loss=0.3832, val_acc=0.8570, val_loss=0.39360127803452094
Epoch 6/10 - loss=0.3675, val_acc=0.8635, val_loss=0.376634273848217
Epoch 7/10 - loss=0.3563, val_acc=0.8657, val_loss=0.3630551852859084
Epoch 8/10 - loss=0.3461, val_acc=0.8677, val_loss=0.3570034831621371
Epoch 9/10 - loss=0.3355, val_acc=0.8712, val_loss=0.35884386898329995
Epoch 10/10 - loss=0.3280, val_acc=0.8713, val_loss=0.34810187685248656
test accuracy  0.8635


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▄▃▂▂▂▂▁▁▁
validation loss,█▆▄▃▃▂▂▁▂▁
validation_accuracy,▁▄▅▆▆▇▇▇██

0,1
epoch,10.0
test_accuracy,0.8635
training_loss,0.32804
validation loss,0.3481
validation_accuracy,0.87133


[34m[1mwandb[0m: Agent Starting Run: 38kjwemk with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 1/10 - loss=0.8068, val_acc=0.8245, val_loss=0.48460313163559043
Epoch 2/10 - loss=0.5657, val_acc=0.8507, val_loss=0.42441486305631687
Epoch 3/10 - loss=0.5233, val_acc=0.8438, val_loss=0.4250030600844261
Epoch 4/10 - loss=0.4951, val_acc=0.8500, val_loss=0.39805016474225263
Epoch 5/10 - loss=0.4755, val_acc=0.8645, val_loss=0.37296798847230667
Epoch 6/10 - loss=0.4583, val_acc=0.8717, val_loss=0.3495343054376769
Epoch 7/10 - loss=0.4457, val_acc=0.8670, val_loss=0.35775600224671744
Epoch 8/10 - loss=0.4335, val_acc=0.8725, val_loss=0.34672425176180693
Epoch 9/10 - loss=0.4234, val_acc=0.8738, val_loss=0.33049233409171475
Epoch 10/10 - loss=0.4134, val_acc=0.8697, val_loss=0.3574495601553445
test accuracy  0.8669


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▄▃▂▂▂▂▁▁▁
validation loss,█▅▅▄▃▂▂▂▁▂
validation_accuracy,▁▅▄▅▇█▇██▇

0,1
epoch,10.0
test_accuracy,0.8669
training_loss,0.41336
validation loss,0.35745
validation_accuracy,0.86967


[34m[1mwandb[0m: Agent Starting Run: o0q3vah8 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/10 - loss=0.6319, val_acc=0.8482, val_loss=0.41032161635779246
Epoch 2/10 - loss=0.3863, val_acc=0.8690, val_loss=0.3621803123398466
Epoch 3/10 - loss=0.3482, val_acc=0.8668, val_loss=0.36053820623089133
Epoch 4/10 - loss=0.3248, val_acc=0.8713, val_loss=0.3367102290093226
Epoch 5/10 - loss=0.3062, val_acc=0.8798, val_loss=0.33028242139786046
Epoch 6/10 - loss=0.2934, val_acc=0.8838, val_loss=0.3103834000682259
Epoch 7/10 - loss=0.2790, val_acc=0.8847, val_loss=0.3073644912756381
Epoch 8/10 - loss=0.2690, val_acc=0.8900, val_loss=0.29842738446706546
Epoch 9/10 - loss=0.2588, val_acc=0.8828, val_loss=0.3122128336770696
Epoch 10/10 - loss=0.2490, val_acc=0.8815, val_loss=0.3131750293829135
test accuracy  0.8809


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▄▃▂▂▂▂▁▁▁
validation loss,█▅▅▃▃▂▂▁▂▂
validation_accuracy,▁▄▄▅▆▇▇█▇▇

0,1
epoch,10.0
test_accuracy,0.8809
training_loss,0.24904
validation loss,0.31318
validation_accuracy,0.8815


[34m[1mwandb[0m: Agent Starting Run: uichwsbf with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	momentum: 0.8
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: nesterov
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 1/10 - loss=1.8016, val_acc=0.6790, val_loss=0.9804535881290728
Epoch 2/10 - loss=1.1582, val_acc=0.7352, val_loss=0.7594119776884702
Epoch 3/10 - loss=1.0158, val_acc=0.7695, val_loss=0.6610090867275565
Epoch 4/10 - loss=0.9390, val_acc=0.7933, val_loss=0.6011567021805156
Epoch 5/10 - loss=0.8901, val_acc=0.8023, val_loss=0.5623255178529079
Epoch 6/10 - loss=0.8583, val_acc=0.8102, val_loss=0.5341619190918158
Epoch 7/10 - loss=0.8363, val_acc=0.8175, val_loss=0.5149791021784477
Epoch 8/10 - loss=0.8186, val_acc=0.8195, val_loss=0.510186877484347
Epoch 9/10 - loss=0.8054, val_acc=0.8258, val_loss=0.4900451325941546
Epoch 10/10 - loss=0.7940, val_acc=0.8318, val_loss=0.48176495180232237
test accuracy  0.8226


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▄▃▂▂▁▁▁▁▁
validation loss,█▅▄▃▂▂▁▁▁▁
validation_accuracy,▁▄▅▆▇▇▇▇██

0,1
epoch,10.0
test_accuracy,0.8226
training_loss,0.79401
validation loss,0.48176
validation_accuracy,0.83183


[34m[1mwandb[0m: Agent Starting Run: 9d1nyypy with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/10 - loss=0.5110, val_acc=0.8445, val_loss=0.41731685878230323
Epoch 2/10 - loss=0.3877, val_acc=0.8582, val_loss=0.4026808516989839
Epoch 3/10 - loss=0.3543, val_acc=0.8688, val_loss=0.3576988630079578
Epoch 4/10 - loss=0.3370, val_acc=0.8590, val_loss=0.3982048896122742
Epoch 5/10 - loss=0.3188, val_acc=0.8745, val_loss=0.3381999577785887
Epoch 6/10 - loss=0.3059, val_acc=0.8737, val_loss=0.3341030155522551
Epoch 7/10 - loss=0.2964, val_acc=0.8718, val_loss=0.34447520389734365
Epoch 8/10 - loss=0.2848, val_acc=0.8815, val_loss=0.32981068640999844
Epoch 9/10 - loss=0.2774, val_acc=0.8793, val_loss=0.33755768932031066
Epoch 10/10 - loss=0.2727, val_acc=0.8813, val_loss=0.34274082961877667
test accuracy  0.8747


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▄▃▃▂▂▂▁▁▁
validation loss,█▇▃▆▂▁▂▁▂▂
validation_accuracy,▁▄▆▄▇▇▆███

0,1
epoch,10.0
test_accuracy,0.8747
training_loss,0.27268
validation loss,0.34274
validation_accuracy,0.88133


[34m[1mwandb[0m: Agent Starting Run: 6yoa9a6l with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	momentum: 0.8
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 1/10 - loss=0.7681, val_acc=0.8290, val_loss=0.46376235509071556
Epoch 2/10 - loss=0.5850, val_acc=0.8490, val_loss=0.4136026527884151
Epoch 3/10 - loss=0.5490, val_acc=0.8470, val_loss=0.409510880212974
Epoch 4/10 - loss=0.5252, val_acc=0.8608, val_loss=0.3777728595698547
Epoch 5/10 - loss=0.5077, val_acc=0.8640, val_loss=0.36355409927132026
Epoch 6/10 - loss=0.4927, val_acc=0.8675, val_loss=0.357906690732336
Epoch 7/10 - loss=0.4810, val_acc=0.8693, val_loss=0.35458540147957424
Epoch 8/10 - loss=0.4691, val_acc=0.8705, val_loss=0.3494467253523409
Epoch 9/10 - loss=0.4596, val_acc=0.8643, val_loss=0.35960365520510756
Epoch 10/10 - loss=0.4504, val_acc=0.8675, val_loss=0.35112915327788824
test accuracy  0.8652


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▄▃▃▂▂▂▁▁▁
validation loss,█▅▅▃▂▂▁▁▂▁
validation_accuracy,▁▄▄▆▇▇██▇▇

0,1
epoch,10.0
test_accuracy,0.8652
training_loss,0.45037
validation loss,0.35113
validation_accuracy,0.8675


[34m[1mwandb[0m: Agent Starting Run: wy4t3qli with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	momentum: 0.8
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 1/10 - loss=0.9438, val_acc=0.8075, val_loss=0.5450329698841433
Epoch 2/10 - loss=0.6564, val_acc=0.8317, val_loss=0.4744028034904157
Epoch 3/10 - loss=0.6028, val_acc=0.8443, val_loss=0.4328132859490495
Epoch 4/10 - loss=0.5741, val_acc=0.8453, val_loss=0.4180453489393723
Epoch 5/10 - loss=0.5547, val_acc=0.8497, val_loss=0.3998252193053268
Epoch 6/10 - loss=0.5398, val_acc=0.8603, val_loss=0.38186287685823267
Epoch 7/10 - loss=0.5271, val_acc=0.8600, val_loss=0.38820815938232256
Epoch 8/10 - loss=0.5167, val_acc=0.8648, val_loss=0.36643093084671075
Epoch 9/10 - loss=0.5070, val_acc=0.8662, val_loss=0.3686045723369192
Epoch 10/10 - loss=0.4991, val_acc=0.8640, val_loss=0.3709058891053292
test accuracy  0.8578


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▃▃▂▂▂▁▁▁▁
validation loss,█▅▄▃▂▂▂▁▁▁
validation_accuracy,▁▄▅▆▆▇▇███

0,1
epoch,10.0
test_accuracy,0.8578
training_loss,0.49913
validation loss,0.37091
validation_accuracy,0.864


[34m[1mwandb[0m: Agent Starting Run: 33ifnyay with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 1/10 - loss=0.8068, val_acc=0.8245, val_loss=0.48460313163559043
Epoch 2/10 - loss=0.5657, val_acc=0.8507, val_loss=0.42441486305631687
Epoch 3/10 - loss=0.5233, val_acc=0.8438, val_loss=0.4250030600844261
Epoch 4/10 - loss=0.4951, val_acc=0.8500, val_loss=0.39805016474225263
Epoch 5/10 - loss=0.4755, val_acc=0.8645, val_loss=0.37296798847230667
Epoch 6/10 - loss=0.4583, val_acc=0.8717, val_loss=0.3495343054376769
Epoch 7/10 - loss=0.4457, val_acc=0.8670, val_loss=0.35775600224671744
Epoch 8/10 - loss=0.4335, val_acc=0.8725, val_loss=0.34672425176180693
Epoch 9/10 - loss=0.4234, val_acc=0.8738, val_loss=0.33049233409171475
Epoch 10/10 - loss=0.4134, val_acc=0.8697, val_loss=0.3574495601553445
test accuracy  0.8669


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▄▃▂▂▂▂▁▁▁
validation loss,█▅▅▄▃▂▂▂▁▂
validation_accuracy,▁▅▄▅▇█▇██▇

0,1
epoch,10.0
test_accuracy,0.8669
training_loss,0.41336
validation loss,0.35745
validation_accuracy,0.86967


[34m[1mwandb[0m: Agent Starting Run: gwt7n34g with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: nesterov
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 1/10 - loss=0.8769, val_acc=0.8278, val_loss=0.49833848913031953
Epoch 2/10 - loss=0.6218, val_acc=0.8465, val_loss=0.43311498432685586
Epoch 3/10 - loss=0.5752, val_acc=0.8533, val_loss=0.40044057168346026
Epoch 4/10 - loss=0.5463, val_acc=0.8623, val_loss=0.3835845182125025
Epoch 5/10 - loss=0.5253, val_acc=0.8540, val_loss=0.3945468502034772
Epoch 6/10 - loss=0.5109, val_acc=0.8655, val_loss=0.36056368987632437
Epoch 7/10 - loss=0.4971, val_acc=0.8623, val_loss=0.3672698172302763
Epoch 8/10 - loss=0.4844, val_acc=0.8680, val_loss=0.353410330872395
Epoch 9/10 - loss=0.4744, val_acc=0.8667, val_loss=0.35411812229755574
Epoch 10/10 - loss=0.4652, val_acc=0.8643, val_loss=0.35689214784333245
test accuracy  0.8639


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▄▃▂▂▂▂▁▁▁
validation loss,█▅▃▂▃▁▂▁▁▁
validation_accuracy,▁▄▅▇▆█▇██▇

0,1
epoch,10.0
test_accuracy,0.8639
training_loss,0.46523
validation loss,0.35689
validation_accuracy,0.86433


[34m[1mwandb[0m: Agent Starting Run: xlhea1wh with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	momentum: 0.8
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/10 - loss=0.7868, val_acc=0.8167, val_loss=0.5217478342201655
Epoch 2/10 - loss=0.4816, val_acc=0.8377, val_loss=0.4585569569009179
Epoch 3/10 - loss=0.4367, val_acc=0.8437, val_loss=0.43797609699630824
Epoch 4/10 - loss=0.4112, val_acc=0.8547, val_loss=0.40707383224604626
Epoch 5/10 - loss=0.3929, val_acc=0.8593, val_loss=0.39868272907460245
Epoch 6/10 - loss=0.3794, val_acc=0.8622, val_loss=0.38730276633180966
Epoch 7/10 - loss=0.3672, val_acc=0.8595, val_loss=0.383965068278298
Epoch 8/10 - loss=0.3567, val_acc=0.8680, val_loss=0.36583353707208216
Epoch 9/10 - loss=0.3465, val_acc=0.8702, val_loss=0.35970920867590317
Epoch 10/10 - loss=0.3392, val_acc=0.8698, val_loss=0.3568636316582727
test accuracy  0.8618


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▃▃▂▂▂▁▁▁▁
validation loss,█▅▄▃▃▂▂▁▁▁
validation_accuracy,▁▄▅▆▇▇▇███

0,1
epoch,10.0
test_accuracy,0.8618
training_loss,0.33918
validation loss,0.35686
validation_accuracy,0.86983


[34m[1mwandb[0m: Agent Starting Run: kzgeivlg with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	momentum: 0.8
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 1/10 - loss=0.8317, val_acc=0.8258, val_loss=0.4757579152108262
Epoch 2/10 - loss=0.5906, val_acc=0.8502, val_loss=0.41328346409049
Epoch 3/10 - loss=0.5434, val_acc=0.8562, val_loss=0.40813697109285646
Epoch 4/10 - loss=0.5149, val_acc=0.8628, val_loss=0.3739574488702956
Epoch 5/10 - loss=0.4938, val_acc=0.8658, val_loss=0.3692313030829563
Epoch 6/10 - loss=0.4785, val_acc=0.8693, val_loss=0.3513944451690005
Epoch 7/10 - loss=0.4640, val_acc=0.8740, val_loss=0.3431881958227336
Epoch 8/10 - loss=0.4528, val_acc=0.8783, val_loss=0.330285510888617
Epoch 9/10 - loss=0.4400, val_acc=0.8770, val_loss=0.33109005835320926
Epoch 10/10 - loss=0.4306, val_acc=0.8795, val_loss=0.32264725908270125
test accuracy  0.8748


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▄▃▂▂▂▂▁▁▁
validation loss,█▅▅▃▃▂▂▁▁▁
validation_accuracy,▁▄▅▆▆▇▇███

0,1
epoch,10.0
test_accuracy,0.8748
training_loss,0.43059
validation loss,0.32265
validation_accuracy,0.8795


[34m[1mwandb[0m: Agent Starting Run: cz40h8yp with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/10 - loss=0.5077, val_acc=0.8492, val_loss=0.40737801492036063
Epoch 2/10 - loss=0.3722, val_acc=0.8715, val_loss=0.3396064193370988
Epoch 3/10 - loss=0.3384, val_acc=0.8583, val_loss=0.366624837734801
Epoch 4/10 - loss=0.3145, val_acc=0.8763, val_loss=0.3379841200786454
Epoch 5/10 - loss=0.2955, val_acc=0.8787, val_loss=0.3256897137472036
Epoch 6/10 - loss=0.2820, val_acc=0.8867, val_loss=0.31204421024490914
Epoch 7/10 - loss=0.2691, val_acc=0.8903, val_loss=0.31161403607265575
Epoch 8/10 - loss=0.2601, val_acc=0.8832, val_loss=0.31869695173533424
Epoch 9/10 - loss=0.2483, val_acc=0.8870, val_loss=0.32339436454480425
Epoch 10/10 - loss=0.2376, val_acc=0.8873, val_loss=0.32405208778690026
test accuracy  0.8852


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▄▄▃▃▂▂▂▁▁
validation loss,█▃▅▃▂▁▁▂▂▂
validation_accuracy,▁▅▃▆▆▇█▇▇▇

0,1
epoch,10.0
test_accuracy,0.8852
training_loss,0.23757
validation loss,0.32405
validation_accuracy,0.88733


[34m[1mwandb[0m: Agent Starting Run: uavnpf8p with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	momentum: 0.8
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/10 - loss=2.3505, val_acc=0.1033, val_loss=2.303114840127441
Epoch 2/10 - loss=2.3030, val_acc=0.0977, val_loss=2.302991830490144
Epoch 3/10 - loss=2.3029, val_acc=0.1017, val_loss=2.302877073553554
Epoch 4/10 - loss=2.3029, val_acc=0.1390, val_loss=2.3027453588016695
Epoch 5/10 - loss=2.3029, val_acc=0.0588, val_loss=2.3026822158058344
Epoch 6/10 - loss=2.3028, val_acc=0.0977, val_loss=2.3028041736631644
Epoch 7/10 - loss=2.3028, val_acc=0.1023, val_loss=2.302529165400271
Epoch 8/10 - loss=2.3028, val_acc=0.0950, val_loss=2.3026551629448933
Epoch 9/10 - loss=2.3027, val_acc=0.0995, val_loss=2.3026615993658277
Epoch 10/10 - loss=2.3027, val_acc=0.0995, val_loss=2.3024827947628483
test accuracy  0.1


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▁▁▁▁▁▁▁▁▁
validation loss,█▇▅▄▃▅▂▃▃▁
validation_accuracy,▅▄▅█▁▄▅▄▅▅

0,1
epoch,10.0
test_accuracy,0.1
training_loss,2.30265
validation loss,2.30248
validation_accuracy,0.0995


[34m[1mwandb[0m: Agent Starting Run: cg0mzruy with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	momentum: 0.8
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/10 - loss=0.5329, val_acc=0.8527, val_loss=0.4092622362524991
Epoch 2/10 - loss=0.3801, val_acc=0.8627, val_loss=0.37678530092138246
Epoch 3/10 - loss=0.3484, val_acc=0.8715, val_loss=0.3536841316474912
Epoch 4/10 - loss=0.3250, val_acc=0.8775, val_loss=0.33737200769110104
Epoch 5/10 - loss=0.3100, val_acc=0.8733, val_loss=0.3366078429537517
Epoch 6/10 - loss=0.2979, val_acc=0.8770, val_loss=0.3347923308453668
Epoch 7/10 - loss=0.2882, val_acc=0.8792, val_loss=0.3343706348091062
Epoch 8/10 - loss=0.2760, val_acc=0.8788, val_loss=0.3289205366126509
Epoch 9/10 - loss=0.2691, val_acc=0.8818, val_loss=0.3333668671057646
Epoch 10/10 - loss=0.2616, val_acc=0.8867, val_loss=0.3194607393409948
test accuracy  0.8772


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▄▃▃▂▂▂▁▁▁
validation loss,█▅▄▂▂▂▂▂▂▁
validation_accuracy,▁▃▅▆▅▆▆▆▇█

0,1
epoch,10.0
test_accuracy,0.8772
training_loss,0.26155
validation loss,0.31946
validation_accuracy,0.88667


[34m[1mwandb[0m: Agent Starting Run: hp6rfeb7 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/10 - loss=0.6682, val_acc=0.8212, val_loss=0.4960866594166533
Epoch 2/10 - loss=0.4613, val_acc=0.8437, val_loss=0.43831543837941583
Epoch 3/10 - loss=0.4184, val_acc=0.8458, val_loss=0.41931714840191103
Epoch 4/10 - loss=0.3938, val_acc=0.8570, val_loss=0.39253523318361644
Epoch 5/10 - loss=0.3747, val_acc=0.8622, val_loss=0.37704187077881884
Epoch 6/10 - loss=0.3616, val_acc=0.8610, val_loss=0.37511828601601077
Epoch 7/10 - loss=0.3498, val_acc=0.8590, val_loss=0.3763203924808063
Epoch 8/10 - loss=0.3389, val_acc=0.8700, val_loss=0.35616856559083326
Epoch 9/10 - loss=0.3295, val_acc=0.8705, val_loss=0.35277462155179856
Epoch 10/10 - loss=0.3230, val_acc=0.8763, val_loss=0.33946002539413217
test accuracy  0.8677


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▄▃▂▂▂▂▁▁▁
validation loss,█▅▅▃▃▃▃▂▂▁
validation_accuracy,▁▄▄▆▆▆▆▇▇█

0,1
epoch,10.0
test_accuracy,0.8677
training_loss,0.323
validation loss,0.33946
validation_accuracy,0.87633


[34m[1mwandb[0m: Agent Starting Run: vpvdawoy with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	momentum: 0.8
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/10 - loss=0.7300, val_acc=0.8450, val_loss=0.4435416368954084
Epoch 2/10 - loss=0.4073, val_acc=0.8620, val_loss=0.3860047900777413
Epoch 3/10 - loss=0.3647, val_acc=0.8710, val_loss=0.36089079914533434
Epoch 4/10 - loss=0.3395, val_acc=0.8740, val_loss=0.3447158601452188
Epoch 5/10 - loss=0.3219, val_acc=0.8697, val_loss=0.3428662401267571
Epoch 6/10 - loss=0.3082, val_acc=0.8785, val_loss=0.3388292589004029
Epoch 7/10 - loss=0.2960, val_acc=0.8803, val_loss=0.3249173169138768
Epoch 8/10 - loss=0.2861, val_acc=0.8807, val_loss=0.32296377960348327
Epoch 9/10 - loss=0.2752, val_acc=0.8800, val_loss=0.32424029818772904
Epoch 10/10 - loss=0.2681, val_acc=0.8812, val_loss=0.32541383176483074
test accuracy  0.8759


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▃▂▂▂▂▁▁▁▁
validation loss,█▅▃▂▂▂▁▁▁▁
validation_accuracy,▁▄▆▇▆▇████

0,1
epoch,10.0
test_accuracy,0.8759
training_loss,0.26814
validation loss,0.32541
validation_accuracy,0.88117


[34m[1mwandb[0m: Agent Starting Run: 3sdyw555 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	momentum: 0.8
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 1/10 - loss=0.8478, val_acc=0.8262, val_loss=0.4783358270358508
Epoch 2/10 - loss=0.5955, val_acc=0.8448, val_loss=0.4267881054358972
Epoch 3/10 - loss=0.5490, val_acc=0.8573, val_loss=0.38842400722619513
Epoch 4/10 - loss=0.5226, val_acc=0.8627, val_loss=0.3748567832569485
Epoch 5/10 - loss=0.5039, val_acc=0.8683, val_loss=0.35971185546307394
Epoch 6/10 - loss=0.4890, val_acc=0.8708, val_loss=0.3480860320712212
Epoch 7/10 - loss=0.4764, val_acc=0.8732, val_loss=0.34536249464667107
Epoch 8/10 - loss=0.4656, val_acc=0.8803, val_loss=0.332780731479129
Epoch 9/10 - loss=0.4552, val_acc=0.8767, val_loss=0.3356068055435803
Epoch 10/10 - loss=0.4470, val_acc=0.8777, val_loss=0.33938896037424465
test accuracy  0.8696


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▄▃▂▂▂▂▁▁▁
validation loss,█▆▄▃▂▂▂▁▁▁
validation_accuracy,▁▃▅▆▆▇▇███

0,1
epoch,10.0
test_accuracy,0.8696
training_loss,0.44701
validation loss,0.33939
validation_accuracy,0.87767


[34m[1mwandb[0m: Agent Starting Run: 5lhugntd with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	momentum: 0.8
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 1/10 - loss=0.5947, val_acc=0.8550, val_loss=0.40004481059941327
