#### ANSWER 7


In [2]:
from keras.datasets import mnist

#### ANSWER 8



In [None]:
import numpy as np
import wandb
from keras.datasets import fashion_mnist


# Neural Network Class: feed_forward_NN_4

class feed_forward_NN_8:
    def __init__(self,
                 layers,
                 optimizer,
                 learning_rate,
                 momentum,
                 beta1,
                 beta2,
                 beta,
                 epsilon,
                 weight_decay,
                 init_type,
                 activation,
                 loss_func
                 ):
    
        
        self.layers = layers
        self.layer_n = len(layers)
        self.optimizer = optimizer.lower()
        self.lr = learning_rate
        self.momentum = momentum
        self.beta1 = beta1
        self.beta2 = beta2
        self.beta = beta
        self.epsilon = epsilon
        self.weight_decay = weight_decay
        self.init_type = init_type.lower()
        self.activation = activation.lower()
        self.loss_func=loss_func.lower()

        # Initialize Weights & BiaseS
        self.weights = []
        self.biases = []
        for i in range(self.layer_n - 1):
            if self.init_type == "xavier":
                # "Xavier" initialization
                w = np.random.randn(layers[i], layers[i+1]) * np.sqrt(1.0 / layers[i])
            else:
                # "random" initialization
                w = np.random.randn(layers[i], layers[i+1]) * np.sqrt(2.0 / layers[i])
            b = np.zeros((1, layers[i+1]))
            self.weights.append(w)
            self.biases.append(b)

        # initialize extra Params 
        if self.optimizer in ["momentum", "nesterov", "rmsprop", "adam", "nadam"]:
            self.v_w = [np.zeros_like(w) for w in self.weights]
            self.v_b = [np.zeros_like(b) for b in self.biases]
        if self.optimizer in ["adam", "nadam"]:
            self.m_w = [np.zeros_like(w) for w in self.weights]
            self.m_b = [np.zeros_like(b) for b in self.biases]
            self.t = 0

    # activations 
    def sigmoid(self, x):
        return 1 / (1 + np.exp(-np.clip(x, -500, 500)))
    
    def tanh(self, x):
        return np.tanh(x)
    
    def relu(self, x):
        return np.maximum(0, x)

    def activate(self, x):
        if self.activation == "sigmoid":
            return self.sigmoid(x)
        elif self.activation == "tanh":
            return self.tanh(x)
        elif self.activation == "relu":
            return self.relu(x)
        else:
            return self.sigmoid(x) 
        
    # derivatives
    def derivative(self, a):

        if self.activation == "sigmoid":
            return a * (1 - a)
        elif self.activation == "tanh":
            return 1 - a**2
        elif self.activation == "relu":
            return (a > 0).astype(float)
        else:
            return a * (1 - a) 

    def softmax(self, x):
        exp_x = np.exp(x - np.max(x, axis=1, keepdims=True))
        return exp_x / np.sum(exp_x, axis=1, keepdims=True)

    # Forward Pass
    def forward_pass(self, x):
        self.h = [x]  
        # Hidden layers
        for i in range(self.layer_n - 2):
            z = np.dot(self.h[i], self.weights[i]) + self.biases[i]
            act = self.activate(z)
            self.h.append(act)
        # Output layer- softmax
        z_out = np.dot(self.h[-1], self.weights[-1]) + self.biases[-1]
        out = self.softmax(z_out)
        self.h.append(out)
        return self.h

    # Backward Pass
    def backward_prop(self, y_true):
        m = y_true.shape[0]
        dw = [None] * (self.layer_n - 1)
        db = [None] * (self.layer_n - 1)

        # Cross-entropy derivative for output layer
        if self.loss_func=="cross_entropy":
            delta = self.h[-1] - y_true  # shape: (batch_size, output_dim)
        elif self.loss_func=="sq_error":
            batch_size_sq=len(self.h[-1])
            classes_sq=len(self.h[-1][0])
            delta=np.zeros((batch_size_sq,classes_sq))

            for i in range(batch_size_sq):
                jacobian_softmax= np.diag(self.h[-1][i]) - np.outer(self.h[-1][i], self.h[-1][i])
                # print(jacobian_softmax.shape)
                # print(self.h[-1][i])    
                delta[i]= 2*np.dot(self.h[-1][i]-y_true[i], jacobian_softmax)
                
        else:
            delta = self.h[-1] - y_true 


        # Propagation
        for i in reversed(range(self.layer_n - 1)):
            dw[i] = np.dot(self.h[i].T, delta) / m
            db[i] = np.sum(delta, axis=0, keepdims=True) / m
            if i > 0:
                # For hidden layers, multiply by derivative of activation
                delta = np.dot(delta, self.weights[i].T) * self.derivative(self.h[i])
        return dw, db

    # Param Updates for "Non-Nesterov" 
    def _update_params(self, dw, db):
        # Add weight decay to each gradient
        for i in range(self.layer_n - 1):
            dw[i] += self.weight_decay * self.weights[i]

        if self.optimizer == "sgd":
            for i in range(self.layer_n - 1):
                self.weights[i] -= self.lr * dw[i]
                self.biases[i] -= self.lr * db[i]

        elif self.optimizer == "momentum":
            for i in range(self.layer_n - 1):
                self.v_w[i] = self.momentum * self.v_w[i] + dw[i]
                self.v_b[i] = self.momentum * self.v_b[i] + db[i]
                self.weights[i] -= self.lr * self.v_w[i]
                self.biases[i] -= self.lr * self.v_b[i]

        elif self.optimizer == "rmsprop":
            for i in range(self.layer_n - 1):
                self.v_w[i] = self.beta* self.v_w[i] + (1 - self.beta) * (dw[i] ** 2)
                self.v_b[i] = self.beta * self.v_b[i] + (1 - self.beta) * (db[i] ** 2)
                self.weights[i] -= self.lr * dw[i] / (np.sqrt(self.v_w[i]) + self.epsilon)
                self.biases[i]  -= self.lr * db[i] / (np.sqrt(self.v_b[i]) + self.epsilon)

        elif self.optimizer == "adam":
            self.t += 1
            for i in range(self.layer_n - 1):
                self.m_w[i] = self.beta1 * self.m_w[i] + (1 - self.beta1) * dw[i]
                self.m_b[i] = self.beta1 * self.m_b[i] + (1 - self.beta1) * db[i]
                self.v_w[i] = self.beta2 * self.v_w[i] + (1 - self.beta2) * (dw[i] ** 2)
                self.v_b[i] = self.beta2 * self.v_b[i] + (1 - self.beta2) * (db[i] ** 2)

                # bias correction
                m_w_hat = self.m_w[i] / (1 - self.beta1 ** self.t)
                m_b_hat = self.m_b[i] / (1 - self.beta1 ** self.t)
                v_w_hat = self.v_w[i] / (1 - self.beta2 ** self.t)
                v_b_hat = self.v_b[i] / (1 - self.beta2 ** self.t)

                self.weights[i] -= self.lr * m_w_hat / (np.sqrt(v_w_hat) + self.epsilon)
                self.biases[i]  -= self.lr * m_b_hat / (np.sqrt(v_b_hat) + self.epsilon)

        elif self.optimizer == "nadam":
            self.t += 1
            for i in range(self.layer_n - 1):
                self.m_w[i] = self.beta1 * self.m_w[i] + (1 - self.beta1) * dw[i]
                self.m_b[i] = self.beta1 * self.m_b[i] + (1 - self.beta1) * db[i]
                self.v_w[i] = self.beta2 * self.v_w[i] + (1 - self.beta2) * (dw[i] ** 2)
                self.v_b[i] = self.beta2 * self.v_b[i] + (1 - self.beta2) * (db[i] ** 2)

                # bias correction
                m_w_hat = self.m_w[i] / (1 - self.beta1 ** (self.t + 1))
                m_b_hat = self.m_b[i] / (1 - self.beta1 ** (self.t + 1))
                v_w_hat = self.v_w[i] / (1 - self.beta2 ** (self.t + 1))
                v_b_hat = self.v_b[i] / (1 - self.beta2 ** (self.t + 1))

                grad_term_w = self.beta1 * m_w_hat + (1 - self.beta1) * dw[i] / (1 - self.beta1 ** (self.t + 1))
                grad_term_b = self.beta1 * m_b_hat + (1 - self.beta1) * db[i] / (1 - self.beta1 ** (self.t + 1))

                self.weights[i] -= self.lr * grad_term_w / (np.sqrt(v_w_hat) + self.epsilon)
                self.biases[i]  -= self.lr * grad_term_b / (np.sqrt(v_b_hat) + self.epsilon)

    # Training Step  with "Nesterov"
    def _train_step(self, x_batch, y_batch):
        if self.optimizer == "nesterov":
            # to look-ahead: w_look = w - momentum * v
            for i in range(self.layer_n - 1):
                self.weights[i] -= self.lr*self.momentum * self.v_w[i]
                self.biases[i]  -= self.lr*self.momentum * self.v_b[i]

            # Forward at the look-ahead position
            self.forward_pass(x_batch)
            out = self.h[-1]
            l2_norm_weights = 0
            for i in range(len(self.weights)):
                l2_norm_weights += np.sum(self.weights[i] ** 2)
            # for i in range(len(self.biases)):
            #     l2_norm_bias += np.sum(self.biases[i] ** 2)
                    
            l2_norm_params = l2_norm_weights #+ l2_norm_bias

            if self.loss_func=="cross_entropy":
                loss = -np.mean(np.sum(y_batch * np.log(out + 1e-10), axis = 1)) +  (self.weight_decay/2) * l2_norm_params # (1e-10) to prevent underflow
            elif self.loss_func=="sq_error" :
                loss= 0.5 * np.mean(np.sum((out - y_batch)**2, axis=1))
            else:
                loss = -np.mean(np.sum(y_batch * np.log(out + 1e-10), axis = 1)) +  (self.weight_decay/2) * l2_norm_params
            
            
            #loss = -np.mean(np.sum(y_batch * np.log(out + 1e-10), axis = 1)) +  (self.weight_decay/2) * l2_norm_params # (1e-10) to prevent underflow
            #loss = -np.mean(np.sum(y_batch * np.log(out + 1e-10), axis=1))
            dW, dB = self.backward_prop(y_batch)

            # add weight decay here
            for i in range(self.layer_n - 1):
                dW[i] += self.weight_decay * self.weights[i]

            # backward at the look-ahead position (go back to w_t)
            for i in range(self.layer_n - 1):
                self.weights[i] += self.lr*self.momentum * self.v_w[i]
                self.biases[i]  += self.lr*self.momentum * self.v_b[i]

            # update velocity: u_t = momentum*u_{t-1} + dW
            for i in range(self.layer_n - 1):
                self.v_w[i] = self.momentum * self.v_w[i] + dW[i]
                self.v_b[i] = self.momentum * self.v_b[i] + dB[i]

            # final param update: w = w - lr*u_t
            for i in range(self.layer_n - 1):
                self.weights[i] -= self.lr * self.v_w[i]
                self.biases[i]  -= self.lr * self.v_b[i]

            return loss
        else:
            # Normal forward/back
            self.forward_pass(x_batch)
            out = self.h[-1]

            l2_norm_weights=0
            
            for i in range(len(self.weights)):
                l2_norm_weights += np.sum(self.weights[i] ** 2)
            # for i in range(len(self.biases)):
            #     l2_norm_bias += np.sum(self.biases[i] ** 2)
                    
            l2_norm_params = l2_norm_weights #+ l2_norm_bias

            if self.loss_func=="cross_entropy":
                loss = -np.mean(np.sum(y_batch * np.log(out + 1e-10), axis = 1)) +  (self.weight_decay/2) * l2_norm_params # (1e-10) to prevent underflow
            elif self.loss_func=="sq_error" :
                loss= 0.5 * np.mean(np.sum((out - y_batch)**2, axis=1))
            else:
                loss = -np.mean(np.sum(y_batch * np.log(out + 1e-10), axis = 1)) +  (self.weight_decay/2) * l2_norm_params    
            
            
             

            #loss = -np.mean(np.sum(y_batch * np.log(out + 1e-10), axis=1))
            dW, dB = self.backward_prop(y_batch)
            self._update_params(dW, dB)
            return loss

    # Outer Training Loop 
    def training(self, x_train, y_train, x_val, y_val, epochs, batch_size):
       
        for ep in range(epochs):
            idx = np.random.permutation(x_train.shape[0])
            x_train_shuff = x_train[idx]
            y_train_shuff = y_train[idx]
            n_batches = len(x_train) // batch_size
            epoch_loss = 0.0
            for b in range(n_batches):
                start = b * batch_size
                end = start + batch_size
                x_batch = x_train_shuff[start:end]
                y_batch = y_train_shuff[start:end]
                loss = self._train_step(x_batch, y_batch)
                epoch_loss += loss
            avg_loss = epoch_loss / n_batches

            # Validation

            preds = self.predict(x_val)
            val_labels = np.argmax(y_val, axis=1)
            val_acc = np.mean(preds == val_labels)

            val_outputs = self.forward_pass(x_val)[-1]
        
            # Cross-entropy loss for validation
            #val_loss = -np.mean(np.sum(y_val * np.log(val_outputs + 1e-10), axis=1))
            # training
            preds_train = self.predict(x_train)
            train_labels = np.argmax(y_train, axis=1)
            train_acc = np.mean(preds_train == train_labels)

            l2_norm_weights=0
            
            for i in range(len(self.weights)):
                l2_norm_weights += np.sum(self.weights[i] ** 2)
            # for i in range(len(self.biases)):
            #     l2_norm_bias += np.sum(self.biases[i] ** 2)
                    
            l2_norm_params = l2_norm_weights

            if self.loss_func=="cross_entropy":
                val_loss = -np.mean(np.sum(y_val * np.log(val_outputs + 1e-10), axis = 1)) +  (self.weight_decay/2) * l2_norm_params # (1e-10) to prevent underflow
            elif self.loss_func=="sq_error" :
                val_loss= 0.5 * np.mean(np.sum((val_outputs - y_val)**2, axis=1))
            else:
                val_loss = -np.mean(np.sum(y_val * np.log(val_outputs + 1e-10), axis = 1)) +  (self.weight_decay/2) * l2_norm_params

            # Log metrics to wandb
            wandb.log({"epoch": ep+1, "training_loss": avg_loss, "validation_accuracy": val_acc, "training accuracy": train_acc "validation loss": val_loss})
            print(f"Epoch {ep+1}/{epochs} - loss={avg_loss:.4f}, val_acc={val_acc:.4f}, val_loss={val_loss}" )

    #Prediction 
    def predict(self, X):
        self.forward_pass(X)
        return np.argmax(self.h[-1], axis=1)




# train_sweep() function

def train_sweep():
    # Initialize wandb
    wandb.init()
    config = wandb.config

    #custom run name from hyperparameters

    run_name = f"hl_{config.num_layers}_hs_{config.hidden_size}_bs_{config.batch_size}_ac_{config.activation}_opt_{config.optimizer}"
    wandb.run.name = run_name

    if config["dataset"] == "fashion_mnist":
        (x_train_full, y_train_full), (x_test, y_test) = fashion_mnist.load_data()
    else:  # "mnist"
        (x_train_full, y_train_full), (x_test, y_test) = mnist.load_data()

    x_train_full = x_train_full.reshape(x_train_full.shape[0], -1) / 255.0
    x_test = x_test.reshape(x_test.shape[0], -1) / 255.0

    np.random.seed(42)
    idx = np.arange(x_train_full.shape[0])
    np.random.shuffle(idx)
    x_train_full = x_train_full[idx]
    y_train_full = y_train_full[idx]

    # 90% train, 10% validation
    train_size = int(0.9 * len(x_train_full))
    x_train, y_train = x_train_full[:train_size], y_train_full[:train_size]
    x_val, y_val = x_train_full[train_size:], y_train_full[train_size:]

    num_classes = 10
    y_train_1h = np.eye(num_classes)[y_train]
    y_val_1h = np.eye(num_classes)[y_val]
    y_test_1h = np.eye(num_classes)[y_test]

    # Build model
    # layers = [784] + [hidden_size] * num_layers + [10]
    model = feed_forward_NN_8(
        layers=[784] + [config["hidden_size"]] * config["num_layers"] + [10],
        optimizer=config["optimizer"],
        learning_rate=config["learning_rate"],
        momentum=config["momentum"],
        beta1=config["beta1"],
        beta2=config["beta2"],
        beta=config["beta"],
        epsilon=config["epsilon"],
        weight_decay=config["weight_decay"],
        weight_init=config["weight_init"],
        activation=config["activation"],
        loss=config["loss"],
    )

    # Train
    model.training(
        x_train=x_train,
        y_train=y_train_1h,
        x_val=x_val,
        y_val=y_val_1h,
        epochs=config["epochs"],
        batch_size=config["batch_size"],
    )

    #Evaluation on test set
    test_preds = model.predict(x_test)
    test_labels = np.argmax(y_test_1h, axis=1)
    test_acc = np.mean(test_preds == test_labels)
    
    wandb.log({"test_accuracy": test_acc})
    print("test accuracy ",test_acc)


# sweep configuration
sweep_config = {
    "method": "random", 
    "metric": {
        "name": "validation_accuracy",
        "goal": "maximize"
    },
    "parameters": {
        "epochs": {"values": [5,10]},
        "num_layers": {"values": [3,4,5]},
        "hidden_size": {"values": [32,16,64]},
        "weight_decay": {"values": [0.0, 0.0005, 0.5]},
        "learning_rate": {"values": [1e-3, 1e-4]},
        "optimizer": {"values": ["sgd", "momentum", "nag", "rmsprop", "adam", "nadam"]},
        "batch_size": {"values": [32,64,128]},
        "init_type": {"values": ["random","xavier"]},
        "activation": {"values": [ "tanh", "relu"]},
        "momentum": {"values": [0.9]},
        "beta1": {"values": [0.9]},
        "beta2": {"values": [0.999]},
        "beta_rms": {"values": [0.9]},
        "epsilon": {"values": [1e-8]},
        "loss_func":{"values":["cross_entropy"]}
    }
}


# Running the sweep



if __name__ == "__main__":
    # Creating sweep
    sweep_id = wandb.sweep(sweep_config, project="q4_sweep_project")
    # Launching sweep agent
    wandb.agent(sweep_id, function=train_sweep)





[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


Create sweep with ID: g2aoqjdt
Sweep URL: https://wandb.ai/ed24s401-indian-institute-of-technology-madras/q4_sweep_project/sweeps/g2aoqjdt


[34m[1mwandb[0m: Agent Starting Run: phf8zdp6 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: sq_error
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: Currently logged in as: [33med24s401[0m ([33med24s401-indian-institute-of-technology-madras[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Epoch 1/15 - loss=0.2674, val_acc=0.7638, val_loss=0.1743751192722
Epoch 2/15 - loss=0.1500, val_acc=0.8127, val_loss=0.14090028356791603
Epoch 3/15 - loss=0.1293, val_acc=0.8278, val_loss=0.12823391681726123
Epoch 4/15 - loss=0.1195, val_acc=0.8408, val_loss=0.11902486080913699
Epoch 5/15 - loss=0.1134, val_acc=0.8455, val_loss=0.11426869310717432
Epoch 6/15 - loss=0.1090, val_acc=0.8462, val_loss=0.11188272051937002
Epoch 7/15 - loss=0.1058, val_acc=0.8507, val_loss=0.11078548537210581
Epoch 8/15 - loss=0.1033, val_acc=0.8555, val_loss=0.10661031260267315
Epoch 9/15 - loss=0.1010, val_acc=0.8540, val_loss=0.10798276465733196
Epoch 10/15 - loss=0.0991, val_acc=0.8595, val_loss=0.10309541399735062
Epoch 11/15 - loss=0.0971, val_acc=0.8573, val_loss=0.10286096169843634
Epoch 12/15 - loss=0.0955, val_acc=0.8622, val_loss=0.10150745408797428
Epoch 13/15 - loss=0.0941, val_acc=0.8645, val_loss=0.09836587726005193
Epoch 14/15 - loss=0.0928, val_acc=0.8658, val_loss=0.09854203803645092
Epoch

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▃▃▂▂▂▂▁▁▁▁▁▁▁▁
validation loss,█▅▄▃▂▂▂▂▂▁▁▁▁▁▁
validation_accuracy,▁▄▅▆▇▇▇▇▇█▇███▇

0,1
epoch,15.0
test_accuracy,0.8513
training_loss,0.09155
validation loss,0.10188
validation_accuracy,0.85783


[34m[1mwandb[0m: Agent Starting Run: u237z7u1 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.5588, val_acc=0.8448, val_loss=0.4271538644512599
Epoch 2/15 - loss=0.3943, val_acc=0.8480, val_loss=0.40445947172242025
Epoch 3/15 - loss=0.3609, val_acc=0.8647, val_loss=0.3622889719636303
Epoch 4/15 - loss=0.3390, val_acc=0.8708, val_loss=0.3485043947176629
Epoch 5/15 - loss=0.3253, val_acc=0.8713, val_loss=0.35399132639249614
Epoch 6/15 - loss=0.3102, val_acc=0.8805, val_loss=0.32743767254658884
Epoch 7/15 - loss=0.3012, val_acc=0.8778, val_loss=0.33563633969756895
Epoch 8/15 - loss=0.2913, val_acc=0.8673, val_loss=0.3643031518449673
Epoch 9/15 - loss=0.2855, val_acc=0.8812, val_loss=0.32097279264277906
Epoch 10/15 - loss=0.2787, val_acc=0.8770, val_loss=0.3393258879564772
Epoch 11/15 - loss=0.2732, val_acc=0.8807, val_loss=0.3367238459348289
Epoch 12/15 - loss=0.2634, val_acc=0.8760, val_loss=0.33319998529664646
Epoch 13/15 - loss=0.2609, val_acc=0.8798, val_loss=0.33265992856267446
Epoch 14/15 - loss=0.2546, val_acc=0.8857, val_loss=0.3187087963350519
Epoch 15

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▄▄▃▃▂▂▂▂▂▂▁▁▁▁
validation loss,█▇▄▃▃▂▂▄▁▂▂▂▂▁▂
validation_accuracy,▁▂▄▅▆▇▇▅▇▇▇▆▇█▇

0,1
epoch,15.0
test_accuracy,0.8713
training_loss,0.24972
validation loss,0.32963
validation_accuracy,0.88


[34m[1mwandb[0m: Agent Starting Run: hkk486l4 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=1.1280, val_acc=0.7710, val_loss=0.6874403469354357
Epoch 2/15 - loss=0.5903, val_acc=0.8093, val_loss=0.5478209353920453
Epoch 3/15 - loss=0.5036, val_acc=0.8313, val_loss=0.48710003291639853
Epoch 4/15 - loss=0.4640, val_acc=0.8400, val_loss=0.459879015729208
Epoch 5/15 - loss=0.4416, val_acc=0.8440, val_loss=0.4455406488407984
Epoch 6/15 - loss=0.4243, val_acc=0.8527, val_loss=0.4266782544020839
Epoch 7/15 - loss=0.4113, val_acc=0.8532, val_loss=0.4175954601943847
Epoch 8/15 - loss=0.4017, val_acc=0.8563, val_loss=0.40938636345511836
Epoch 9/15 - loss=0.3921, val_acc=0.8563, val_loss=0.40278812722555274
Epoch 10/15 - loss=0.3844, val_acc=0.8590, val_loss=0.395092607387703
Epoch 11/15 - loss=0.3772, val_acc=0.8623, val_loss=0.38809914083643315
Epoch 12/15 - loss=0.3697, val_acc=0.8593, val_loss=0.3885536930151969
Epoch 13/15 - loss=0.3639, val_acc=0.8633, val_loss=0.3797105619284139
Epoch 14/15 - loss=0.3589, val_acc=0.8652, val_loss=0.37356018365124766
Epoch 15/15 

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▃▂▂▂▂▂▁▁▁▁▁▁▁▁
validation loss,█▅▄▃▃▂▂▂▂▂▁▁▁▁▁
validation_accuracy,▁▄▅▆▆▇▇▇▇██████

0,1
epoch,15.0
test_accuracy,0.8597
training_loss,0.35299
validation loss,0.37165
validation_accuracy,0.86567


[34m[1mwandb[0m: Agent Starting Run: 7eo6koov with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.5558, val_acc=0.8328, val_loss=0.43777155646277605
Epoch 2/15 - loss=0.4027, val_acc=0.8590, val_loss=0.3860200103252417
Epoch 3/15 - loss=0.3684, val_acc=0.8703, val_loss=0.36221018302277114
Epoch 4/15 - loss=0.3451, val_acc=0.8683, val_loss=0.36754871587464294
Epoch 5/15 - loss=0.3293, val_acc=0.8677, val_loss=0.35029306676145977
Epoch 6/15 - loss=0.3156, val_acc=0.8682, val_loss=0.35125727292328474
Epoch 7/15 - loss=0.3063, val_acc=0.8582, val_loss=0.38536835636902395
Epoch 8/15 - loss=0.2972, val_acc=0.8767, val_loss=0.3388813449871443
Epoch 9/15 - loss=0.2906, val_acc=0.8737, val_loss=0.3436672801769622
Epoch 10/15 - loss=0.2836, val_acc=0.8790, val_loss=0.32317606716566977
Epoch 11/15 - loss=0.2767, val_acc=0.8710, val_loss=0.33245739693126186
Epoch 12/15 - loss=0.2706, val_acc=0.8812, val_loss=0.33168192896185533
Epoch 13/15 - loss=0.2654, val_acc=0.8838, val_loss=0.31609606544582625
Epoch 14/15 - loss=0.2602, val_acc=0.8830, val_loss=0.3224808191262113
Epoch

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▄▄▃▃▂▂▂▂▂▁▁▁▁▁
validation loss,█▅▄▄▃▃▅▂▃▁▂▂▁▁▂
validation_accuracy,▁▅▆▆▆▆▄▇▇▇▆███▇

0,1
epoch,15.0
test_accuracy,0.8687
training_loss,0.25581
validation loss,0.32548
validation_accuracy,0.87767


[34m[1mwandb[0m: Agent Starting Run: 7bims42w with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: sq_error
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.2508, val_acc=0.7885, val_loss=0.15991361816997302
Epoch 2/15 - loss=0.1409, val_acc=0.8227, val_loss=0.1339766734499667
Epoch 3/15 - loss=0.1235, val_acc=0.8362, val_loss=0.12235040903217013
Epoch 4/15 - loss=0.1152, val_acc=0.8407, val_loss=0.11602632680767191
Epoch 5/15 - loss=0.1101, val_acc=0.8485, val_loss=0.1111943736075314
Epoch 6/15 - loss=0.1061, val_acc=0.8497, val_loss=0.10889108678250492
Epoch 7/15 - loss=0.1030, val_acc=0.8548, val_loss=0.10599378831103562
Epoch 8/15 - loss=0.1006, val_acc=0.8590, val_loss=0.10350006496157217
Epoch 9/15 - loss=0.0983, val_acc=0.8617, val_loss=0.10252062173030667
Epoch 10/15 - loss=0.0964, val_acc=0.8615, val_loss=0.10153151535661716
Epoch 11/15 - loss=0.0947, val_acc=0.8625, val_loss=0.09942441967314822
Epoch 12/15 - loss=0.0929, val_acc=0.8647, val_loss=0.0992691619963896
Epoch 13/15 - loss=0.0916, val_acc=0.8675, val_loss=0.09794821775890449
Epoch 14/15 - loss=0.0903, val_acc=0.8682, val_loss=0.09633445285695712
Epoc

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▃▂▂▂▂▂▂▁▁▁▁▁▁▁
validation loss,█▅▄▃▃▂▂▂▂▂▁▁▁▁▁
validation_accuracy,▁▄▅▆▆▆▇▇▇▇█████

0,1
epoch,15.0
test_accuracy,0.8588
training_loss,0.08889
validation loss,0.09731
validation_accuracy,0.8645


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: gwswzsix with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.5847, val_acc=0.8423, val_loss=0.4223828706648879
Epoch 2/15 - loss=0.3954, val_acc=0.8545, val_loss=0.3912485599404234
Epoch 3/15 - loss=0.3605, val_acc=0.8673, val_loss=0.3556274217469379
Epoch 4/15 - loss=0.3403, val_acc=0.8665, val_loss=0.3574887546508234
Epoch 5/15 - loss=0.3259, val_acc=0.8678, val_loss=0.365927484966705
Epoch 6/15 - loss=0.3097, val_acc=0.8770, val_loss=0.32927511615321914
Epoch 7/15 - loss=0.2998, val_acc=0.8787, val_loss=0.32379152470487654
Epoch 8/15 - loss=0.2898, val_acc=0.8635, val_loss=0.3650505127536943
Epoch 9/15 - loss=0.2851, val_acc=0.8810, val_loss=0.31823920240703
Epoch 10/15 - loss=0.2764, val_acc=0.8787, val_loss=0.33518423769134087
Epoch 11/15 - loss=0.2713, val_acc=0.8768, val_loss=0.3297460155598525
Epoch 12/15 - loss=0.2614, val_acc=0.8833, val_loss=0.3264488395338764
Epoch 13/15 - loss=0.2580, val_acc=0.8865, val_loss=0.32119755011907447
Epoch 14/15 - loss=0.2532, val_acc=0.8832, val_loss=0.3204723575858921
Epoch 15/15 - 

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▄▃▃▃▂▂▂▂▂▂▁▁▁▁
validation loss,█▆▄▄▄▂▁▄▁▂▂▂▁▁▂
validation_accuracy,▁▃▅▅▅▆▇▄▇▇▆▇█▇▇

0,1
epoch,15.0
test_accuracy,0.8704
training_loss,0.24719
validation loss,0.33044
validation_accuracy,0.87833


[34m[1mwandb[0m: Agent Starting Run: 9uczbktc with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: sq_error
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.2262, val_acc=0.7972, val_loss=0.15340633505537074
Epoch 2/15 - loss=0.1351, val_acc=0.8230, val_loss=0.12849873562126413
Epoch 3/15 - loss=0.1186, val_acc=0.8365, val_loss=0.11654167326939735
Epoch 4/15 - loss=0.1104, val_acc=0.8438, val_loss=0.1116252541829645
Epoch 5/15 - loss=0.1056, val_acc=0.8525, val_loss=0.10642589802703982
Epoch 6/15 - loss=0.1016, val_acc=0.8510, val_loss=0.10472057925021
Epoch 7/15 - loss=0.0987, val_acc=0.8575, val_loss=0.10131341121112283
Epoch 8/15 - loss=0.0963, val_acc=0.8600, val_loss=0.09995529280625355
Epoch 9/15 - loss=0.0943, val_acc=0.8628, val_loss=0.09797341864225681
Epoch 10/15 - loss=0.0926, val_acc=0.8635, val_loss=0.09862472186706352
Epoch 11/15 - loss=0.0912, val_acc=0.8643, val_loss=0.09677629532947156
Epoch 12/15 - loss=0.0895, val_acc=0.8667, val_loss=0.09631917740106112
Epoch 13/15 - loss=0.0884, val_acc=0.8690, val_loss=0.09333225153615081
Epoch 14/15 - loss=0.0872, val_acc=0.8700, val_loss=0.09374016410840073
Epoch

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▃▃▂▂▂▂▂▁▁▁▁▁▁▁
validation loss,█▅▄▃▃▂▂▂▂▂▁▁▁▁▁
validation_accuracy,▁▃▅▅▆▆▇▇▇▇▇████

0,1
epoch,15.0
test_accuracy,0.8624
training_loss,0.086
validation loss,0.0937
validation_accuracy,0.86867


[34m[1mwandb[0m: Agent Starting Run: 0cz8wsny with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: sq_error
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.2262, val_acc=0.7972, val_loss=0.15340633505537074
Epoch 2/15 - loss=0.1351, val_acc=0.8230, val_loss=0.12849873562126413
Epoch 3/15 - loss=0.1186, val_acc=0.8365, val_loss=0.11654167326939735
Epoch 4/15 - loss=0.1104, val_acc=0.8438, val_loss=0.1116252541829645
Epoch 5/15 - loss=0.1056, val_acc=0.8525, val_loss=0.10642589802703982
Epoch 6/15 - loss=0.1016, val_acc=0.8510, val_loss=0.10472057925021
Epoch 7/15 - loss=0.0987, val_acc=0.8575, val_loss=0.10131341121112283
Epoch 8/15 - loss=0.0963, val_acc=0.8600, val_loss=0.09995529280625355
Epoch 9/15 - loss=0.0943, val_acc=0.8628, val_loss=0.09797341864225681
Epoch 10/15 - loss=0.0926, val_acc=0.8635, val_loss=0.09862472186706352
Epoch 11/15 - loss=0.0912, val_acc=0.8643, val_loss=0.09677629532947156
Epoch 12/15 - loss=0.0895, val_acc=0.8667, val_loss=0.09631917740106112
Epoch 13/15 - loss=0.0884, val_acc=0.8690, val_loss=0.09333225153615081
Epoch 14/15 - loss=0.0872, val_acc=0.8700, val_loss=0.09374016410840073
Epoch

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▃▃▂▂▂▂▂▁▁▁▁▁▁▁
validation loss,█▅▄▃▃▂▂▂▂▂▁▁▁▁▁
validation_accuracy,▁▃▅▅▆▆▇▇▇▇▇████

0,1
epoch,15.0
test_accuracy,0.8624
training_loss,0.086
validation loss,0.0937
validation_accuracy,0.86867


[34m[1mwandb[0m: Agent Starting Run: b90msnl2 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: sq_error
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.1524, val_acc=0.8313, val_loss=0.1245255895745075
Epoch 2/15 - loss=0.1118, val_acc=0.8493, val_loss=0.10794346530513453
Epoch 3/15 - loss=0.1019, val_acc=0.8608, val_loss=0.10114784327619517
Epoch 4/15 - loss=0.0959, val_acc=0.8623, val_loss=0.0987054971601063
Epoch 5/15 - loss=0.0916, val_acc=0.8738, val_loss=0.09158856352115755
Epoch 6/15 - loss=0.0883, val_acc=0.8640, val_loss=0.09430304095777431
Epoch 7/15 - loss=0.0862, val_acc=0.8655, val_loss=0.10152089438475206
Epoch 8/15 - loss=0.0835, val_acc=0.8730, val_loss=0.09048963897219622
Epoch 9/15 - loss=0.0821, val_acc=0.8745, val_loss=0.08900585688577257
Epoch 10/15 - loss=0.0799, val_acc=0.8692, val_loss=0.09298489125222162
Epoch 11/15 - loss=0.0777, val_acc=0.8758, val_loss=0.0892168823899104
Epoch 12/15 - loss=0.0771, val_acc=0.8790, val_loss=0.08838762237306609
Epoch 13/15 - loss=0.0758, val_acc=0.8842, val_loss=0.08579004300847413
Epoch 14/15 - loss=0.0741, val_acc=0.8825, val_loss=0.08606499442054288
Epoc

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▄▄▃▃▂▂▂▂▂▁▁▁▁▁
validation loss,█▅▄▃▂▃▄▂▂▂▂▂▁▁▁
validation_accuracy,▁▃▅▅▇▅▆▇▇▆▇▇███

0,1
epoch,15.0
test_accuracy,0.8724
training_loss,0.07339
validation loss,0.08512
validation_accuracy,0.8825


[34m[1mwandb[0m: Agent Starting Run: 6wz5blpo with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: sq_error
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.2557, val_acc=0.7805, val_loss=0.1707722580727658
Epoch 2/15 - loss=0.1444, val_acc=0.8188, val_loss=0.1329447690918598
Epoch 3/15 - loss=0.1219, val_acc=0.8345, val_loss=0.1195483380835641
Epoch 4/15 - loss=0.1125, val_acc=0.8422, val_loss=0.11316655820370497
Epoch 5/15 - loss=0.1070, val_acc=0.8470, val_loss=0.10848648750904173
Epoch 6/15 - loss=0.1027, val_acc=0.8533, val_loss=0.10580834427994996
Epoch 7/15 - loss=0.0995, val_acc=0.8598, val_loss=0.10260613500171341
Epoch 8/15 - loss=0.0971, val_acc=0.8607, val_loss=0.10063556806982776
Epoch 9/15 - loss=0.0948, val_acc=0.8617, val_loss=0.09884146377346889
Epoch 10/15 - loss=0.0930, val_acc=0.8623, val_loss=0.09955211952367332
Epoch 11/15 - loss=0.0915, val_acc=0.8637, val_loss=0.09715185716357451
Epoch 12/15 - loss=0.0897, val_acc=0.8650, val_loss=0.09731779821621647
Epoch 13/15 - loss=0.0885, val_acc=0.8693, val_loss=0.09411591335441452
Epoch 14/15 - loss=0.0873, val_acc=0.8658, val_loss=0.09497111199991268
Epoc

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▃▂▂▂▂▂▁▁▁▁▁▁▁▁
validation loss,█▅▃▃▂▂▂▂▁▁▁▁▁▁▁
validation_accuracy,▁▄▅▆▆▇▇▇▇▇█████

0,1
epoch,15.0
test_accuracy,0.8636
training_loss,0.08612
validation loss,0.09472
validation_accuracy,0.868


[34m[1mwandb[0m: Agent Starting Run: iqx5eoul with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: sq_error
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.2701, val_acc=0.7712, val_loss=0.17233303914369377
Epoch 2/15 - loss=0.1489, val_acc=0.8120, val_loss=0.14023549319911544
Epoch 3/15 - loss=0.1283, val_acc=0.8308, val_loss=0.12563040785218538
Epoch 4/15 - loss=0.1190, val_acc=0.8403, val_loss=0.11926689887964766
Epoch 5/15 - loss=0.1133, val_acc=0.8457, val_loss=0.11464305398695039
Epoch 6/15 - loss=0.1091, val_acc=0.8485, val_loss=0.11144482326195737
Epoch 7/15 - loss=0.1059, val_acc=0.8540, val_loss=0.10865193871514116
Epoch 8/15 - loss=0.1035, val_acc=0.8557, val_loss=0.1063017702072951
Epoch 9/15 - loss=0.1012, val_acc=0.8563, val_loss=0.10551612705504892
Epoch 10/15 - loss=0.0994, val_acc=0.8577, val_loss=0.10394982501700006
Epoch 11/15 - loss=0.0977, val_acc=0.8590, val_loss=0.10234280924618876
Epoch 12/15 - loss=0.0959, val_acc=0.8615, val_loss=0.10259491782410007
Epoch 13/15 - loss=0.0947, val_acc=0.8613, val_loss=0.10039957074187579
Epoch 14/15 - loss=0.0934, val_acc=0.8668, val_loss=0.0988652795390645
Epo

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▃▂▂▂▂▂▁▁▁▁▁▁▁▁
validation loss,█▅▄▃▃▂▂▂▂▁▁▁▁▁▁
validation_accuracy,▁▄▅▆▆▇▇▇▇▇▇████

0,1
epoch,15.0
test_accuracy,0.8588
training_loss,0.09208
validation loss,0.09916
validation_accuracy,0.86083


[34m[1mwandb[0m: Agent Starting Run: hdgqxvx8 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: sq_error
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.1524, val_acc=0.8313, val_loss=0.1245255895745075
Epoch 2/15 - loss=0.1118, val_acc=0.8493, val_loss=0.10794346530513453
Epoch 3/15 - loss=0.1019, val_acc=0.8608, val_loss=0.10114784327619517
Epoch 4/15 - loss=0.0959, val_acc=0.8623, val_loss=0.0987054971601063
Epoch 5/15 - loss=0.0916, val_acc=0.8738, val_loss=0.09158856352115755
Epoch 6/15 - loss=0.0883, val_acc=0.8640, val_loss=0.09430304095777431
Epoch 7/15 - loss=0.0862, val_acc=0.8655, val_loss=0.10152089438475206
Epoch 8/15 - loss=0.0835, val_acc=0.8730, val_loss=0.09048963897219622
Epoch 9/15 - loss=0.0821, val_acc=0.8745, val_loss=0.08900585688577257
Epoch 10/15 - loss=0.0799, val_acc=0.8692, val_loss=0.09298489125222162
Epoch 11/15 - loss=0.0777, val_acc=0.8758, val_loss=0.0892168823899104
Epoch 12/15 - loss=0.0771, val_acc=0.8790, val_loss=0.08838762237306609
Epoch 13/15 - loss=0.0758, val_acc=0.8842, val_loss=0.08579004300847413
Epoch 14/15 - loss=0.0741, val_acc=0.8825, val_loss=0.08606499442054288
Epoc

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▄▄▃▃▂▂▂▂▂▁▁▁▁▁
validation loss,█▅▄▃▂▃▄▂▂▂▂▂▁▁▁
validation_accuracy,▁▃▅▅▇▅▆▇▇▆▇▇███

0,1
epoch,15.0
test_accuracy,0.8724
training_loss,0.07339
validation loss,0.08512
validation_accuracy,0.8825


[34m[1mwandb[0m: Agent Starting Run: grrae8qy with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: sq_error
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.1413, val_acc=0.8418, val_loss=0.11187608837434508
Epoch 2/15 - loss=0.1032, val_acc=0.8572, val_loss=0.10292216007550754
Epoch 3/15 - loss=0.0944, val_acc=0.8645, val_loss=0.09652480990400185
Epoch 4/15 - loss=0.0897, val_acc=0.8685, val_loss=0.09476959669844394
Epoch 5/15 - loss=0.0862, val_acc=0.8678, val_loss=0.0945033290148884
Epoch 6/15 - loss=0.0822, val_acc=0.8603, val_loss=0.09800798271184999
Epoch 7/15 - loss=0.0804, val_acc=0.8767, val_loss=0.08819866380313499
Epoch 8/15 - loss=0.0776, val_acc=0.8752, val_loss=0.09065071683291501
Epoch 9/15 - loss=0.0767, val_acc=0.8832, val_loss=0.0857508196502852
Epoch 10/15 - loss=0.0749, val_acc=0.8777, val_loss=0.08825522418715129
Epoch 11/15 - loss=0.0735, val_acc=0.8748, val_loss=0.08935921443706722
Epoch 12/15 - loss=0.0709, val_acc=0.8795, val_loss=0.08773304187319131
Epoch 13/15 - loss=0.0707, val_acc=0.8830, val_loss=0.08585769123801612
Epoch 14/15 - loss=0.0688, val_acc=0.8865, val_loss=0.08314714065452436
Epo

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▄▄▃▃▂▂▂▂▂▂▁▁▁▁
validation loss,█▆▄▄▄▅▂▃▂▂▃▂▂▁▃
validation_accuracy,▁▃▅▅▅▄▆▆▇▇▆▇▇█▆

0,1
epoch,15.0
test_accuracy,0.8671
training_loss,0.06787
validation loss,0.0915
validation_accuracy,0.87333


[34m[1mwandb[0m: Agent Starting Run: 7nkr7dbw with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: sq_error
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.1432, val_acc=0.8278, val_loss=0.12019735603298004
Epoch 2/15 - loss=0.1063, val_acc=0.8537, val_loss=0.10458825745834655
Epoch 3/15 - loss=0.0980, val_acc=0.8640, val_loss=0.09894489446275748
Epoch 4/15 - loss=0.0928, val_acc=0.8628, val_loss=0.09815624616556345
Epoch 5/15 - loss=0.0885, val_acc=0.8652, val_loss=0.09491658417853527
Epoch 6/15 - loss=0.0853, val_acc=0.8692, val_loss=0.09312992971938058
Epoch 7/15 - loss=0.0827, val_acc=0.8712, val_loss=0.09282399194692598
Epoch 8/15 - loss=0.0809, val_acc=0.8782, val_loss=0.08817061121725349
Epoch 9/15 - loss=0.0787, val_acc=0.8765, val_loss=0.09128837832752773
Epoch 10/15 - loss=0.0770, val_acc=0.8765, val_loss=0.08810301816926316
Epoch 11/15 - loss=0.0753, val_acc=0.8757, val_loss=0.09139334683740018
Epoch 12/15 - loss=0.0741, val_acc=0.8788, val_loss=0.08844351168724683
Epoch 13/15 - loss=0.0731, val_acc=0.8822, val_loss=0.08722778262327051
Epoch 14/15 - loss=0.0716, val_acc=0.8777, val_loss=0.08822212025571004
E

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▄▄▃▃▂▂▂▂▂▁▁▁▁▁
validation loss,█▅▃▃▃▂▂▁▂▁▂▁▁▁▁
validation_accuracy,▁▄▆▆▆▆▇▇▇▇▇██▇▇

0,1
epoch,15.0
test_accuracy,0.8703
training_loss,0.07025
validation loss,0.0887
validation_accuracy,0.87783


[34m[1mwandb[0m: Agent Starting Run: 81jcm5vp with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.5558, val_acc=0.8328, val_loss=0.43777155646277605
Epoch 2/15 - loss=0.4027, val_acc=0.8590, val_loss=0.3860200103252417
Epoch 3/15 - loss=0.3684, val_acc=0.8703, val_loss=0.36221018302277114
Epoch 4/15 - loss=0.3451, val_acc=0.8683, val_loss=0.36754871587464294
Epoch 5/15 - loss=0.3293, val_acc=0.8677, val_loss=0.35029306676145977
Epoch 6/15 - loss=0.3156, val_acc=0.8682, val_loss=0.35125727292328474
Epoch 7/15 - loss=0.3063, val_acc=0.8582, val_loss=0.38536835636902395
Epoch 8/15 - loss=0.2972, val_acc=0.8767, val_loss=0.3388813449871443
Epoch 9/15 - loss=0.2906, val_acc=0.8737, val_loss=0.3436672801769622
Epoch 10/15 - loss=0.2836, val_acc=0.8790, val_loss=0.32317606716566977
Epoch 11/15 - loss=0.2767, val_acc=0.8710, val_loss=0.33245739693126186
Epoch 12/15 - loss=0.2706, val_acc=0.8812, val_loss=0.33168192896185533
Epoch 13/15 - loss=0.2654, val_acc=0.8838, val_loss=0.31609606544582625
Epoch 14/15 - loss=0.2602, val_acc=0.8830, val_loss=0.3224808191262113
Epoch

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▄▄▃▃▂▂▂▂▂▁▁▁▁▁
validation loss,█▅▄▄▃▃▅▂▃▁▂▂▁▁▂
validation_accuracy,▁▅▆▆▆▆▄▇▇▇▆███▇

0,1
epoch,15.0
test_accuracy,0.8687
training_loss,0.25581
validation loss,0.32548
validation_accuracy,0.87767


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: q4k131ww with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: sq_error
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.2508, val_acc=0.7885, val_loss=0.15991361816997302
Epoch 2/15 - loss=0.1409, val_acc=0.8227, val_loss=0.1339766734499667
Epoch 3/15 - loss=0.1235, val_acc=0.8362, val_loss=0.12235040903217013
Epoch 4/15 - loss=0.1152, val_acc=0.8407, val_loss=0.11602632680767191
Epoch 5/15 - loss=0.1101, val_acc=0.8485, val_loss=0.1111943736075314
Epoch 6/15 - loss=0.1061, val_acc=0.8497, val_loss=0.10889108678250492
Epoch 7/15 - loss=0.1030, val_acc=0.8548, val_loss=0.10599378831103562
Epoch 8/15 - loss=0.1006, val_acc=0.8590, val_loss=0.10350006496157217
Epoch 9/15 - loss=0.0983, val_acc=0.8617, val_loss=0.10252062173030667
Epoch 10/15 - loss=0.0964, val_acc=0.8615, val_loss=0.10153151535661716
Epoch 11/15 - loss=0.0947, val_acc=0.8625, val_loss=0.09942441967314822
Epoch 12/15 - loss=0.0929, val_acc=0.8647, val_loss=0.0992691619963896
Epoch 13/15 - loss=0.0916, val_acc=0.8675, val_loss=0.09794821775890449
Epoch 14/15 - loss=0.0903, val_acc=0.8682, val_loss=0.09633445285695712
Epoc

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▃▂▂▂▂▂▂▁▁▁▁▁▁▁
validation loss,█▅▄▃▃▂▂▂▂▂▁▁▁▁▁
validation_accuracy,▁▄▅▆▆▆▇▇▇▇█████

0,1
epoch,15.0
test_accuracy,0.8588
training_loss,0.08889
validation loss,0.09731
validation_accuracy,0.8645


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: mdn59d9z with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.5588, val_acc=0.8448, val_loss=0.4271538644512599
Epoch 2/15 - loss=0.3943, val_acc=0.8480, val_loss=0.40445947172242025
Epoch 3/15 - loss=0.3609, val_acc=0.8647, val_loss=0.3622889719636303
Epoch 4/15 - loss=0.3390, val_acc=0.8708, val_loss=0.3485043947176629
Epoch 5/15 - loss=0.3253, val_acc=0.8713, val_loss=0.35399132639249614
Epoch 6/15 - loss=0.3102, val_acc=0.8805, val_loss=0.32743767254658884
Epoch 7/15 - loss=0.3012, val_acc=0.8778, val_loss=0.33563633969756895
Epoch 8/15 - loss=0.2913, val_acc=0.8673, val_loss=0.3643031518449673
Epoch 9/15 - loss=0.2855, val_acc=0.8812, val_loss=0.32097279264277906
Epoch 10/15 - loss=0.2787, val_acc=0.8770, val_loss=0.3393258879564772
Epoch 11/15 - loss=0.2732, val_acc=0.8807, val_loss=0.3367238459348289
Epoch 12/15 - loss=0.2634, val_acc=0.8760, val_loss=0.33319998529664646
Epoch 13/15 - loss=0.2609, val_acc=0.8798, val_loss=0.33265992856267446
Epoch 14/15 - loss=0.2546, val_acc=0.8857, val_loss=0.3187087963350519
Epoch 15

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▄▄▃▃▂▂▂▂▂▂▁▁▁▁
validation loss,█▇▄▃▃▂▂▄▁▂▂▂▂▁▂
validation_accuracy,▁▂▄▅▆▇▇▅▇▇▇▆▇█▇

0,1
epoch,15.0
test_accuracy,0.8713
training_loss,0.24972
validation loss,0.32963
validation_accuracy,0.88


[34m[1mwandb[0m: Agent Starting Run: sk0cwaei with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=1.3047, val_acc=0.7235, val_loss=0.788669674683696
Epoch 2/15 - loss=0.6742, val_acc=0.7862, val_loss=0.6152835071864606
Epoch 3/15 - loss=0.5601, val_acc=0.8157, val_loss=0.5400664878764678
Epoch 4/15 - loss=0.5057, val_acc=0.8278, val_loss=0.5008668393211678
Epoch 5/15 - loss=0.4751, val_acc=0.8355, val_loss=0.4762351567778349
Epoch 6/15 - loss=0.4542, val_acc=0.8347, val_loss=0.46176735466885765
Epoch 7/15 - loss=0.4392, val_acc=0.8402, val_loss=0.44963650196665744
Epoch 8/15 - loss=0.4278, val_acc=0.8470, val_loss=0.4370708800210514
Epoch 9/15 - loss=0.4177, val_acc=0.8482, val_loss=0.43394314526168554
Epoch 10/15 - loss=0.4094, val_acc=0.8532, val_loss=0.4233323727065861
Epoch 11/15 - loss=0.4020, val_acc=0.8540, val_loss=0.4174645779064537
Epoch 12/15 - loss=0.3950, val_acc=0.8535, val_loss=0.41624983585856673
Epoch 13/15 - loss=0.3889, val_acc=0.8562, val_loss=0.40333310236625447
Epoch 14/15 - loss=0.3830, val_acc=0.8577, val_loss=0.3979582144325897
Epoch 15/15

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▃▂▂▂▂▁▁▁▁▁▁▁▁▁
validation loss,█▅▄▃▂▂▂▂▂▁▁▁▁▁▁
validation_accuracy,▁▄▆▆▇▇▇▇███████

0,1
epoch,15.0
test_accuracy,0.8501
training_loss,0.3777
validation loss,0.40125
validation_accuracy,0.85767


[34m[1mwandb[0m: Agent Starting Run: ohulx0v8 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.5779, val_acc=0.8348, val_loss=0.43985750491571146
Epoch 2/15 - loss=0.4065, val_acc=0.8563, val_loss=0.3876037488820295
Epoch 3/15 - loss=0.3719, val_acc=0.8633, val_loss=0.38170573697800014
Epoch 4/15 - loss=0.3492, val_acc=0.8563, val_loss=0.3856889007710927
Epoch 5/15 - loss=0.3318, val_acc=0.8653, val_loss=0.3559947591976067
Epoch 6/15 - loss=0.3187, val_acc=0.8695, val_loss=0.346945216445743
Epoch 7/15 - loss=0.3089, val_acc=0.8655, val_loss=0.376927189905875
Epoch 8/15 - loss=0.2990, val_acc=0.8783, val_loss=0.3309598861360001
Epoch 9/15 - loss=0.2918, val_acc=0.8758, val_loss=0.34302784655103435
Epoch 10/15 - loss=0.2843, val_acc=0.8718, val_loss=0.34237081897012583
Epoch 11/15 - loss=0.2792, val_acc=0.8742, val_loss=0.3415574364712802
Epoch 12/15 - loss=0.2730, val_acc=0.8835, val_loss=0.3271817440888692
Epoch 13/15 - loss=0.2677, val_acc=0.8810, val_loss=0.32395863597477026
Epoch 14/15 - loss=0.2629, val_acc=0.8765, val_loss=0.3302192657515811
Epoch 15/15 

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▄▃▃▃▂▂▂▂▂▁▁▁▁▁
validation loss,█▅▄▅▃▂▄▁▂▂▂▁▁▁▂
validation_accuracy,▁▄▅▄▅▆▅▇▇▆▇██▇▇

0,1
epoch,15.0
test_accuracy,0.8715
training_loss,0.25899
validation loss,0.33395
validation_accuracy,0.87817


[34m[1mwandb[0m: Agent Starting Run: khov993v with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.5847, val_acc=0.8423, val_loss=0.4223828706648879
Epoch 2/15 - loss=0.3954, val_acc=0.8545, val_loss=0.3912485599404234
Epoch 3/15 - loss=0.3605, val_acc=0.8673, val_loss=0.3556274217469379
Epoch 4/15 - loss=0.3403, val_acc=0.8665, val_loss=0.3574887546508234
Epoch 5/15 - loss=0.3259, val_acc=0.8678, val_loss=0.365927484966705
Epoch 6/15 - loss=0.3097, val_acc=0.8770, val_loss=0.32927511615321914
Epoch 7/15 - loss=0.2998, val_acc=0.8787, val_loss=0.32379152470487654
Epoch 8/15 - loss=0.2898, val_acc=0.8635, val_loss=0.3650505127536943
Epoch 9/15 - loss=0.2851, val_acc=0.8810, val_loss=0.31823920240703
Epoch 10/15 - loss=0.2764, val_acc=0.8787, val_loss=0.33518423769134087
Epoch 11/15 - loss=0.2713, val_acc=0.8768, val_loss=0.3297460155598525
Epoch 12/15 - loss=0.2614, val_acc=0.8833, val_loss=0.3264488395338764
Epoch 13/15 - loss=0.2580, val_acc=0.8865, val_loss=0.32119755011907447
Epoch 14/15 - loss=0.2532, val_acc=0.8832, val_loss=0.3204723575858921
Epoch 15/15 - 

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▄▃▃▃▂▂▂▂▂▂▁▁▁▁
validation loss,█▆▄▄▄▂▁▄▁▂▂▂▁▁▂
validation_accuracy,▁▃▅▅▅▆▇▄▇▇▆▇█▇▇

0,1
epoch,15.0
test_accuracy,0.8704
training_loss,0.24719
validation loss,0.33044
validation_accuracy,0.87833


[34m[1mwandb[0m: Agent Starting Run: pa0pl7s9 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: sq_error
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.2557, val_acc=0.7805, val_loss=0.1707722580727658
Epoch 2/15 - loss=0.1444, val_acc=0.8188, val_loss=0.1329447690918598
Epoch 3/15 - loss=0.1219, val_acc=0.8345, val_loss=0.1195483380835641
Epoch 4/15 - loss=0.1125, val_acc=0.8422, val_loss=0.11316655820370497
Epoch 5/15 - loss=0.1070, val_acc=0.8470, val_loss=0.10848648750904173
Epoch 6/15 - loss=0.1027, val_acc=0.8533, val_loss=0.10580834427994996
Epoch 7/15 - loss=0.0995, val_acc=0.8598, val_loss=0.10260613500171341
Epoch 8/15 - loss=0.0971, val_acc=0.8607, val_loss=0.10063556806982776
Epoch 9/15 - loss=0.0948, val_acc=0.8617, val_loss=0.09884146377346889
Epoch 10/15 - loss=0.0930, val_acc=0.8623, val_loss=0.09955211952367332
Epoch 11/15 - loss=0.0915, val_acc=0.8637, val_loss=0.09715185716357451
Epoch 12/15 - loss=0.0897, val_acc=0.8650, val_loss=0.09731779821621647
Epoch 13/15 - loss=0.0885, val_acc=0.8693, val_loss=0.09411591335441452
Epoch 14/15 - loss=0.0873, val_acc=0.8658, val_loss=0.09497111199991268
Epoc

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▃▂▂▂▂▂▁▁▁▁▁▁▁▁
validation loss,█▅▃▃▂▂▂▂▁▁▁▁▁▁▁
validation_accuracy,▁▄▅▆▆▇▇▇▇▇█████

0,1
epoch,15.0
test_accuracy,0.8636
training_loss,0.08612
validation loss,0.09472
validation_accuracy,0.868


[34m[1mwandb[0m: Agent Starting Run: 5bqhg14d with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: sq_error
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.1432, val_acc=0.8278, val_loss=0.12019735603298004
Epoch 2/15 - loss=0.1063, val_acc=0.8537, val_loss=0.10458825745834655
Epoch 3/15 - loss=0.0980, val_acc=0.8640, val_loss=0.09894489446275748
Epoch 4/15 - loss=0.0928, val_acc=0.8628, val_loss=0.09815624616556345
Epoch 5/15 - loss=0.0885, val_acc=0.8652, val_loss=0.09491658417853527
Epoch 6/15 - loss=0.0853, val_acc=0.8692, val_loss=0.09312992971938058
Epoch 7/15 - loss=0.0827, val_acc=0.8712, val_loss=0.09282399194692598
Epoch 8/15 - loss=0.0809, val_acc=0.8782, val_loss=0.08817061121725349
Epoch 9/15 - loss=0.0787, val_acc=0.8765, val_loss=0.09128837832752773
Epoch 10/15 - loss=0.0770, val_acc=0.8765, val_loss=0.08810301816926316
Epoch 11/15 - loss=0.0753, val_acc=0.8757, val_loss=0.09139334683740018
Epoch 12/15 - loss=0.0741, val_acc=0.8788, val_loss=0.08844351168724683
Epoch 13/15 - loss=0.0731, val_acc=0.8822, val_loss=0.08722778262327051
Epoch 14/15 - loss=0.0716, val_acc=0.8777, val_loss=0.08822212025571004
E

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▄▄▃▃▂▂▂▂▂▁▁▁▁▁
validation loss,█▅▃▃▃▂▂▁▂▁▂▁▁▁▁
validation_accuracy,▁▄▆▆▆▆▇▇▇▇▇██▇▇

0,1
epoch,15.0
test_accuracy,0.8703
training_loss,0.07025
validation loss,0.0887
validation_accuracy,0.87783


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: xt4u3lc3 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.9780, val_acc=0.7860, val_loss=0.6512629818942361
Epoch 2/15 - loss=0.5636, val_acc=0.8148, val_loss=0.5228944074259625
Epoch 3/15 - loss=0.4808, val_acc=0.8333, val_loss=0.4719444224507964
Epoch 4/15 - loss=0.4430, val_acc=0.8395, val_loss=0.443731536751288
Epoch 5/15 - loss=0.4199, val_acc=0.8490, val_loss=0.4218333284333397
Epoch 6/15 - loss=0.4029, val_acc=0.8515, val_loss=0.409878987433738
Epoch 7/15 - loss=0.3908, val_acc=0.8545, val_loss=0.4020654994868725
Epoch 8/15 - loss=0.3806, val_acc=0.8597, val_loss=0.3903855355267599
Epoch 9/15 - loss=0.3718, val_acc=0.8575, val_loss=0.3875869973248034
Epoch 10/15 - loss=0.3644, val_acc=0.8625, val_loss=0.37707866790659794
Epoch 11/15 - loss=0.3577, val_acc=0.8635, val_loss=0.3731787426115567
Epoch 12/15 - loss=0.3517, val_acc=0.8625, val_loss=0.3777987545739664
Epoch 13/15 - loss=0.3468, val_acc=0.8688, val_loss=0.35954957352361405
Epoch 14/15 - loss=0.3416, val_acc=0.8677, val_loss=0.3586472484648075
Epoch 15/15 - l

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▃▃▂▂▂▂▁▁▁▁▁▁▁▁
validation loss,█▅▄▃▃▂▂▂▂▁▁▁▁▁▁
validation_accuracy,▁▃▅▆▆▇▇▇▇▇█▇███

0,1
epoch,15.0
test_accuracy,0.8609
training_loss,0.33727
validation loss,0.35906
validation_accuracy,0.86617


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: pg7aol37 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: sq_error
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.2701, val_acc=0.7712, val_loss=0.17233303914369377
Epoch 2/15 - loss=0.1489, val_acc=0.8120, val_loss=0.14023549319911544
Epoch 3/15 - loss=0.1283, val_acc=0.8308, val_loss=0.12563040785218538
Epoch 4/15 - loss=0.1190, val_acc=0.8403, val_loss=0.11926689887964766
Epoch 5/15 - loss=0.1133, val_acc=0.8457, val_loss=0.11464305398695039
Epoch 6/15 - loss=0.1091, val_acc=0.8485, val_loss=0.11144482326195737
Epoch 7/15 - loss=0.1059, val_acc=0.8540, val_loss=0.10865193871514116
Epoch 8/15 - loss=0.1035, val_acc=0.8557, val_loss=0.1063017702072951
Epoch 9/15 - loss=0.1012, val_acc=0.8563, val_loss=0.10551612705504892
Epoch 10/15 - loss=0.0994, val_acc=0.8577, val_loss=0.10394982501700006
Epoch 11/15 - loss=0.0977, val_acc=0.8590, val_loss=0.10234280924618876
Epoch 12/15 - loss=0.0959, val_acc=0.8615, val_loss=0.10259491782410007
Epoch 13/15 - loss=0.0947, val_acc=0.8613, val_loss=0.10039957074187579
Epoch 14/15 - loss=0.0934, val_acc=0.8668, val_loss=0.0988652795390645
Epo

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▃▂▂▂▂▂▁▁▁▁▁▁▁▁
validation loss,█▅▄▃▃▂▂▂▂▁▁▁▁▁▁
validation_accuracy,▁▄▅▆▆▇▇▇▇▇▇████

0,1
epoch,15.0
test_accuracy,0.8588
training_loss,0.09208
validation loss,0.09916
validation_accuracy,0.86083


[34m[1mwandb[0m: Agent Starting Run: pw5fcor9 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: sq_error
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.2251, val_acc=0.7937, val_loss=0.15555477288051223
Epoch 2/15 - loss=0.1363, val_acc=0.8188, val_loss=0.12978856318332885
Epoch 3/15 - loss=0.1197, val_acc=0.8340, val_loss=0.11886597894815847
Epoch 4/15 - loss=0.1118, val_acc=0.8415, val_loss=0.11237039833777483
Epoch 5/15 - loss=0.1068, val_acc=0.8487, val_loss=0.10782254377191218
Epoch 6/15 - loss=0.1029, val_acc=0.8508, val_loss=0.10580812756301354
Epoch 7/15 - loss=0.1001, val_acc=0.8547, val_loss=0.10346746048261021
Epoch 8/15 - loss=0.0978, val_acc=0.8592, val_loss=0.10160616912260707
Epoch 9/15 - loss=0.0958, val_acc=0.8580, val_loss=0.10194214366832723
Epoch 10/15 - loss=0.0940, val_acc=0.8613, val_loss=0.09877766964297852
Epoch 11/15 - loss=0.0923, val_acc=0.8617, val_loss=0.09740027399928149
Epoch 12/15 - loss=0.0911, val_acc=0.8622, val_loss=0.09871866585730099
Epoch 13/15 - loss=0.0898, val_acc=0.8682, val_loss=0.09457333113366889
Epoch 14/15 - loss=0.0886, val_acc=0.8680, val_loss=0.09465305052316601
E

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▃▃▂▂▂▂▂▁▁▁▁▁▁▁
validation loss,█▅▄▃▃▂▂▂▂▁▁▁▁▁▁
validation_accuracy,▁▃▅▅▆▆▇▇▇▇▇▇███

0,1
epoch,15.0
test_accuracy,0.8613
training_loss,0.08752
validation loss,0.09534
validation_accuracy,0.86317


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: xgebycnm with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: sq_error
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.1413, val_acc=0.8418, val_loss=0.11187608837434508
Epoch 2/15 - loss=0.1032, val_acc=0.8572, val_loss=0.10292216007550754
Epoch 3/15 - loss=0.0944, val_acc=0.8645, val_loss=0.09652480990400185
Epoch 4/15 - loss=0.0897, val_acc=0.8685, val_loss=0.09476959669844394
Epoch 5/15 - loss=0.0862, val_acc=0.8678, val_loss=0.0945033290148884
Epoch 6/15 - loss=0.0822, val_acc=0.8603, val_loss=0.09800798271184999
Epoch 7/15 - loss=0.0804, val_acc=0.8767, val_loss=0.08819866380313499
Epoch 8/15 - loss=0.0776, val_acc=0.8752, val_loss=0.09065071683291501
Epoch 9/15 - loss=0.0767, val_acc=0.8832, val_loss=0.0857508196502852
Epoch 10/15 - loss=0.0749, val_acc=0.8777, val_loss=0.08825522418715129
Epoch 11/15 - loss=0.0735, val_acc=0.8748, val_loss=0.08935921443706722
Epoch 12/15 - loss=0.0709, val_acc=0.8795, val_loss=0.08773304187319131
Epoch 13/15 - loss=0.0707, val_acc=0.8830, val_loss=0.08585769123801612
Epoch 14/15 - loss=0.0688, val_acc=0.8865, val_loss=0.08314714065452436
Epo

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▄▄▃▃▂▂▂▂▂▂▁▁▁▁
validation loss,█▆▄▄▄▅▂▃▂▂▃▂▂▁▃
validation_accuracy,▁▃▅▅▅▄▆▆▇▇▆▇▇█▆

0,1
epoch,15.0
test_accuracy,0.8671
training_loss,0.06787
validation loss,0.0915
validation_accuracy,0.87333


[34m[1mwandb[0m: Agent Starting Run: v2fxhroo with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.6270, val_acc=0.8425, val_loss=0.44806579568108995
Epoch 2/15 - loss=0.4148, val_acc=0.8480, val_loss=0.40956287006615216
Epoch 3/15 - loss=0.3790, val_acc=0.8595, val_loss=0.3849564114459321
Epoch 4/15 - loss=0.3550, val_acc=0.8663, val_loss=0.3724371378558453
Epoch 5/15 - loss=0.3403, val_acc=0.8695, val_loss=0.35556014969300476
Epoch 6/15 - loss=0.3229, val_acc=0.8707, val_loss=0.3478934113529482
Epoch 7/15 - loss=0.3146, val_acc=0.8768, val_loss=0.3295220304940766
Epoch 8/15 - loss=0.3054, val_acc=0.8653, val_loss=0.3652100624233602
Epoch 9/15 - loss=0.2982, val_acc=0.8775, val_loss=0.3312161494387455
Epoch 10/15 - loss=0.2902, val_acc=0.8688, val_loss=0.36596795707779933
Epoch 11/15 - loss=0.2842, val_acc=0.8718, val_loss=0.3462242708421703
Epoch 12/15 - loss=0.2771, val_acc=0.8800, val_loss=0.33932088558946594
Epoch 13/15 - loss=0.2720, val_acc=0.8750, val_loss=0.3352174624255721
Epoch 14/15 - loss=0.2690, val_acc=0.8800, val_loss=0.32494663082018665
Epoch 15/

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▄▃▃▂▂▂▂▂▁▁▁▁▁▁
validation loss,█▆▄▄▃▂▁▃▁▃▂▂▂▁▄
validation_accuracy,▁▂▄▅▆▆▇▅█▆▆█▇█▆

0,1
epoch,15.0
test_accuracy,0.863
training_loss,0.26503
validation loss,0.37061
validation_accuracy,0.87


[34m[1mwandb[0m: Agent Starting Run: iiy4t6so with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: sq_error
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.1413, val_acc=0.8418, val_loss=0.11187608837434508
Epoch 2/15 - loss=0.1032, val_acc=0.8572, val_loss=0.10292216007550754
Epoch 3/15 - loss=0.0944, val_acc=0.8645, val_loss=0.09652480990400185
Epoch 4/15 - loss=0.0897, val_acc=0.8685, val_loss=0.09476959669844394
Epoch 5/15 - loss=0.0862, val_acc=0.8678, val_loss=0.0945033290148884
Epoch 6/15 - loss=0.0822, val_acc=0.8603, val_loss=0.09800798271184999
Epoch 7/15 - loss=0.0804, val_acc=0.8767, val_loss=0.08819866380313499
Epoch 8/15 - loss=0.0776, val_acc=0.8752, val_loss=0.09065071683291501
Epoch 9/15 - loss=0.0767, val_acc=0.8832, val_loss=0.0857508196502852
Epoch 10/15 - loss=0.0749, val_acc=0.8777, val_loss=0.08825522418715129
Epoch 11/15 - loss=0.0735, val_acc=0.8748, val_loss=0.08935921443706722
Epoch 12/15 - loss=0.0709, val_acc=0.8795, val_loss=0.08773304187319131
Epoch 13/15 - loss=0.0707, val_acc=0.8830, val_loss=0.08585769123801612
Epoch 14/15 - loss=0.0688, val_acc=0.8865, val_loss=0.08314714065452436
Epo

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▄▄▃▃▂▂▂▂▂▂▁▁▁▁
validation loss,█▆▄▄▄▅▂▃▂▂▃▂▂▁▃
validation_accuracy,▁▃▅▅▅▄▆▆▇▇▆▇▇█▆

0,1
epoch,15.0
test_accuracy,0.8671
training_loss,0.06787
validation loss,0.0915
validation_accuracy,0.87333


[34m[1mwandb[0m: Agent Starting Run: ml3l4z96 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: sq_error
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.2262, val_acc=0.7972, val_loss=0.15340633505537074
Epoch 2/15 - loss=0.1351, val_acc=0.8230, val_loss=0.12849873562126413
Epoch 3/15 - loss=0.1186, val_acc=0.8365, val_loss=0.11654167326939735
Epoch 4/15 - loss=0.1104, val_acc=0.8438, val_loss=0.1116252541829645
Epoch 5/15 - loss=0.1056, val_acc=0.8525, val_loss=0.10642589802703982
Epoch 6/15 - loss=0.1016, val_acc=0.8510, val_loss=0.10472057925021
Epoch 7/15 - loss=0.0987, val_acc=0.8575, val_loss=0.10131341121112283
Epoch 8/15 - loss=0.0963, val_acc=0.8600, val_loss=0.09995529280625355
Epoch 9/15 - loss=0.0943, val_acc=0.8628, val_loss=0.09797341864225681
Epoch 10/15 - loss=0.0926, val_acc=0.8635, val_loss=0.09862472186706352
Epoch 11/15 - loss=0.0912, val_acc=0.8643, val_loss=0.09677629532947156
Epoch 12/15 - loss=0.0895, val_acc=0.8667, val_loss=0.09631917740106112
Epoch 13/15 - loss=0.0884, val_acc=0.8690, val_loss=0.09333225153615081
Epoch 14/15 - loss=0.0872, val_acc=0.8700, val_loss=0.09374016410840073
Epoch

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▃▃▂▂▂▂▂▁▁▁▁▁▁▁
validation loss,█▅▄▃▃▂▂▂▂▂▁▁▁▁▁
validation_accuracy,▁▃▅▅▆▆▇▇▇▇▇████

0,1
epoch,15.0
test_accuracy,0.8624
training_loss,0.086
validation loss,0.0937
validation_accuracy,0.86867


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 971ts816 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: sq_error
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.2508, val_acc=0.7885, val_loss=0.15991361816997302
Epoch 2/15 - loss=0.1409, val_acc=0.8227, val_loss=0.1339766734499667
Epoch 3/15 - loss=0.1235, val_acc=0.8362, val_loss=0.12235040903217013
Epoch 4/15 - loss=0.1152, val_acc=0.8407, val_loss=0.11602632680767191
Epoch 5/15 - loss=0.1101, val_acc=0.8485, val_loss=0.1111943736075314
Epoch 6/15 - loss=0.1061, val_acc=0.8497, val_loss=0.10889108678250492
Epoch 7/15 - loss=0.1030, val_acc=0.8548, val_loss=0.10599378831103562
Epoch 8/15 - loss=0.1006, val_acc=0.8590, val_loss=0.10350006496157217
Epoch 9/15 - loss=0.0983, val_acc=0.8617, val_loss=0.10252062173030667
Epoch 10/15 - loss=0.0964, val_acc=0.8615, val_loss=0.10153151535661716
Epoch 11/15 - loss=0.0947, val_acc=0.8625, val_loss=0.09942441967314822
Epoch 12/15 - loss=0.0929, val_acc=0.8647, val_loss=0.0992691619963896
Epoch 13/15 - loss=0.0916, val_acc=0.8675, val_loss=0.09794821775890449
Epoch 14/15 - loss=0.0903, val_acc=0.8682, val_loss=0.09633445285695712
Epoc

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▃▂▂▂▂▂▂▁▁▁▁▁▁▁
validation loss,█▅▄▃▃▂▂▂▂▂▁▁▁▁▁
validation_accuracy,▁▄▅▆▆▆▇▇▇▇█████

0,1
epoch,15.0
test_accuracy,0.8588
training_loss,0.08889
validation loss,0.09731
validation_accuracy,0.8645


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: tn5wprcw with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: sq_error
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.1556, val_acc=0.8275, val_loss=0.12376927939821812
Epoch 2/15 - loss=0.1123, val_acc=0.8487, val_loss=0.10914155139324669
Epoch 3/15 - loss=0.1032, val_acc=0.8567, val_loss=0.10336085367071043
Epoch 4/15 - loss=0.0974, val_acc=0.8550, val_loss=0.10369154206696556
Epoch 5/15 - loss=0.0928, val_acc=0.8692, val_loss=0.09388641542241928
Epoch 6/15 - loss=0.0891, val_acc=0.8633, val_loss=0.09691810265368508
Epoch 7/15 - loss=0.0867, val_acc=0.8635, val_loss=0.10091406397319008
Epoch 8/15 - loss=0.0849, val_acc=0.8763, val_loss=0.09052125489096238
Epoch 9/15 - loss=0.0826, val_acc=0.8658, val_loss=0.09602094514999028
Epoch 10/15 - loss=0.0812, val_acc=0.8783, val_loss=0.08653326105908628
Epoch 11/15 - loss=0.0792, val_acc=0.8668, val_loss=0.09581575435407996
Epoch 12/15 - loss=0.0783, val_acc=0.8660, val_loss=0.09726290569530321
Epoch 13/15 - loss=0.0770, val_acc=0.8797, val_loss=0.08673747693474843
Epoch 14/15 - loss=0.0756, val_acc=0.8777, val_loss=0.08932270018879274
E

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▄▃▃▃▂▂▂▂▂▁▁▁▁▁
validation loss,█▅▄▄▂▃▄▂▃▁▃▃▁▂▂
validation_accuracy,▁▄▅▅▇▆▆█▆█▆▆██▇

0,1
epoch,15.0
test_accuracy,0.8618
training_loss,0.07445
validation loss,0.09193
validation_accuracy,0.87083


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: mrt3y9zj with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.9780, val_acc=0.7860, val_loss=0.6512629818942361
Epoch 2/15 - loss=0.5636, val_acc=0.8148, val_loss=0.5228944074259625
Epoch 3/15 - loss=0.4808, val_acc=0.8333, val_loss=0.4719444224507964
Epoch 4/15 - loss=0.4430, val_acc=0.8395, val_loss=0.443731536751288
Epoch 5/15 - loss=0.4199, val_acc=0.8490, val_loss=0.4218333284333397
Epoch 6/15 - loss=0.4029, val_acc=0.8515, val_loss=0.409878987433738
Epoch 7/15 - loss=0.3908, val_acc=0.8545, val_loss=0.4020654994868725
Epoch 8/15 - loss=0.3806, val_acc=0.8597, val_loss=0.3903855355267599
Epoch 9/15 - loss=0.3718, val_acc=0.8575, val_loss=0.3875869973248034
Epoch 10/15 - loss=0.3644, val_acc=0.8625, val_loss=0.37707866790659794
Epoch 11/15 - loss=0.3577, val_acc=0.8635, val_loss=0.3731787426115567
Epoch 12/15 - loss=0.3517, val_acc=0.8625, val_loss=0.3777987545739664
Epoch 13/15 - loss=0.3468, val_acc=0.8688, val_loss=0.35954957352361405
Epoch 14/15 - loss=0.3416, val_acc=0.8677, val_loss=0.3586472484648075
Epoch 15/15 - l

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▃▃▂▂▂▂▁▁▁▁▁▁▁▁
validation loss,█▅▄▃▃▂▂▂▂▁▁▁▁▁▁
validation_accuracy,▁▃▅▆▆▇▇▇▇▇█▇███

0,1
epoch,15.0
test_accuracy,0.8609
training_loss,0.33727
validation loss,0.35906
validation_accuracy,0.86617


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: fmdt1xlz with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.5588, val_acc=0.8448, val_loss=0.4271538644512599
Epoch 2/15 - loss=0.3943, val_acc=0.8480, val_loss=0.40445947172242025
Epoch 3/15 - loss=0.3609, val_acc=0.8647, val_loss=0.3622889719636303
Epoch 4/15 - loss=0.3390, val_acc=0.8708, val_loss=0.3485043947176629
Epoch 5/15 - loss=0.3253, val_acc=0.8713, val_loss=0.35399132639249614
Epoch 6/15 - loss=0.3102, val_acc=0.8805, val_loss=0.32743767254658884
Epoch 7/15 - loss=0.3012, val_acc=0.8778, val_loss=0.33563633969756895
Epoch 8/15 - loss=0.2913, val_acc=0.8673, val_loss=0.3643031518449673
Epoch 9/15 - loss=0.2855, val_acc=0.8812, val_loss=0.32097279264277906
Epoch 10/15 - loss=0.2787, val_acc=0.8770, val_loss=0.3393258879564772
Epoch 11/15 - loss=0.2732, val_acc=0.8807, val_loss=0.3367238459348289
Epoch 12/15 - loss=0.2634, val_acc=0.8760, val_loss=0.33319998529664646
Epoch 13/15 - loss=0.2609, val_acc=0.8798, val_loss=0.33265992856267446
Epoch 14/15 - loss=0.2546, val_acc=0.8857, val_loss=0.3187087963350519
Epoch 15

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▄▄▃▃▂▂▂▂▂▂▁▁▁▁
validation loss,█▇▄▃▃▂▂▄▁▂▂▂▂▁▂
validation_accuracy,▁▂▄▅▆▇▇▅▇▇▇▆▇█▇

0,1
epoch,15.0
test_accuracy,0.8713
training_loss,0.24972
validation loss,0.32963
validation_accuracy,0.88


[34m[1mwandb[0m: Agent Starting Run: oe3c9kmm with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: sq_error
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.1432, val_acc=0.8278, val_loss=0.12019735603298004
Epoch 2/15 - loss=0.1063, val_acc=0.8537, val_loss=0.10458825745834655
Epoch 3/15 - loss=0.0980, val_acc=0.8640, val_loss=0.09894489446275748
Epoch 4/15 - loss=0.0928, val_acc=0.8628, val_loss=0.09815624616556345
Epoch 5/15 - loss=0.0885, val_acc=0.8652, val_loss=0.09491658417853527
Epoch 6/15 - loss=0.0853, val_acc=0.8692, val_loss=0.09312992971938058
Epoch 7/15 - loss=0.0827, val_acc=0.8712, val_loss=0.09282399194692598
Epoch 8/15 - loss=0.0809, val_acc=0.8782, val_loss=0.08817061121725349
Epoch 9/15 - loss=0.0787, val_acc=0.8765, val_loss=0.09128837832752773
Epoch 10/15 - loss=0.0770, val_acc=0.8765, val_loss=0.08810301816926316
Epoch 11/15 - loss=0.0753, val_acc=0.8757, val_loss=0.09139334683740018
Epoch 12/15 - loss=0.0741, val_acc=0.8788, val_loss=0.08844351168724683
Epoch 13/15 - loss=0.0731, val_acc=0.8822, val_loss=0.08722778262327051
Epoch 14/15 - loss=0.0716, val_acc=0.8777, val_loss=0.08822212025571004
E

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▄▄▃▃▂▂▂▂▂▁▁▁▁▁
validation loss,█▅▃▃▃▂▂▁▂▁▂▁▁▁▁
validation_accuracy,▁▄▆▆▆▆▇▇▇▇▇██▇▇

0,1
epoch,15.0
test_accuracy,0.8703
training_loss,0.07025
validation loss,0.0887
validation_accuracy,0.87783


[34m[1mwandb[0m: Agent Starting Run: 64syb81l with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=1.1684, val_acc=0.7595, val_loss=0.7656161216685117
Epoch 2/15 - loss=0.6343, val_acc=0.8075, val_loss=0.5643192722810766
Epoch 3/15 - loss=0.5117, val_acc=0.8272, val_loss=0.49474799202383346
Epoch 4/15 - loss=0.4622, val_acc=0.8352, val_loss=0.46027655629627634
Epoch 5/15 - loss=0.4350, val_acc=0.8437, val_loss=0.43663804996165595
Epoch 6/15 - loss=0.4160, val_acc=0.8482, val_loss=0.42320942443700355
Epoch 7/15 - loss=0.4023, val_acc=0.8508, val_loss=0.4110628621184563
Epoch 8/15 - loss=0.3908, val_acc=0.8537, val_loss=0.40271150484231427
Epoch 9/15 - loss=0.3813, val_acc=0.8533, val_loss=0.39744558979354605
Epoch 10/15 - loss=0.3733, val_acc=0.8605, val_loss=0.38701558632697586
Epoch 11/15 - loss=0.3661, val_acc=0.8618, val_loss=0.38161410936788304
Epoch 12/15 - loss=0.3597, val_acc=0.8585, val_loss=0.38717715258045654
Epoch 13/15 - loss=0.3542, val_acc=0.8657, val_loss=0.36754165830593416
Epoch 14/15 - loss=0.3486, val_acc=0.8627, val_loss=0.36683406405463936
Epoc

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▃▂▂▂▂▁▁▁▁▁▁▁▁▁
validation loss,█▄▃▃▂▂▂▂▂▁▁▁▁▁▁
validation_accuracy,▁▄▅▆▇▇▇▇▇██████

0,1
epoch,15.0
test_accuracy,0.8599
training_loss,0.34423
validation loss,0.36479
validation_accuracy,0.866


[34m[1mwandb[0m: Agent Starting Run: p67ferw0 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: sq_error
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.1524, val_acc=0.8313, val_loss=0.1245255895745075
Epoch 2/15 - loss=0.1118, val_acc=0.8493, val_loss=0.10794346530513453
Epoch 3/15 - loss=0.1019, val_acc=0.8608, val_loss=0.10114784327619517
Epoch 4/15 - loss=0.0959, val_acc=0.8623, val_loss=0.0987054971601063
Epoch 5/15 - loss=0.0916, val_acc=0.8738, val_loss=0.09158856352115755
Epoch 6/15 - loss=0.0883, val_acc=0.8640, val_loss=0.09430304095777431
Epoch 7/15 - loss=0.0862, val_acc=0.8655, val_loss=0.10152089438475206
Epoch 8/15 - loss=0.0835, val_acc=0.8730, val_loss=0.09048963897219622
Epoch 9/15 - loss=0.0821, val_acc=0.8745, val_loss=0.08900585688577257
Epoch 10/15 - loss=0.0799, val_acc=0.8692, val_loss=0.09298489125222162
Epoch 11/15 - loss=0.0777, val_acc=0.8758, val_loss=0.0892168823899104
Epoch 12/15 - loss=0.0771, val_acc=0.8790, val_loss=0.08838762237306609
Epoch 13/15 - loss=0.0758, val_acc=0.8842, val_loss=0.08579004300847413
Epoch 14/15 - loss=0.0741, val_acc=0.8825, val_loss=0.08606499442054288
Epoc

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▄▄▃▃▂▂▂▂▂▁▁▁▁▁
validation loss,█▅▄▃▂▃▄▂▂▂▂▂▁▁▁
validation_accuracy,▁▃▅▅▇▅▆▇▇▆▇▇███

0,1
epoch,15.0
test_accuracy,0.8724
training_loss,0.07339
validation loss,0.08512
validation_accuracy,0.8825


[34m[1mwandb[0m: Agent Starting Run: shzcq4hc with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.9780, val_acc=0.7860, val_loss=0.6512629818942361
Epoch 2/15 - loss=0.5636, val_acc=0.8148, val_loss=0.5228944074259625
Epoch 3/15 - loss=0.4808, val_acc=0.8333, val_loss=0.4719444224507964
Epoch 4/15 - loss=0.4430, val_acc=0.8395, val_loss=0.443731536751288
Epoch 5/15 - loss=0.4199, val_acc=0.8490, val_loss=0.4218333284333397
Epoch 6/15 - loss=0.4029, val_acc=0.8515, val_loss=0.409878987433738
Epoch 7/15 - loss=0.3908, val_acc=0.8545, val_loss=0.4020654994868725
Epoch 8/15 - loss=0.3806, val_acc=0.8597, val_loss=0.3903855355267599
Epoch 9/15 - loss=0.3718, val_acc=0.8575, val_loss=0.3875869973248034
Epoch 10/15 - loss=0.3644, val_acc=0.8625, val_loss=0.37707866790659794
Epoch 11/15 - loss=0.3577, val_acc=0.8635, val_loss=0.3731787426115567
Epoch 12/15 - loss=0.3517, val_acc=0.8625, val_loss=0.3777987545739664
Epoch 13/15 - loss=0.3468, val_acc=0.8688, val_loss=0.35954957352361405
Epoch 14/15 - loss=0.3416, val_acc=0.8677, val_loss=0.3586472484648075
Epoch 15/15 - l

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▃▃▂▂▂▂▁▁▁▁▁▁▁▁
validation loss,█▅▄▃▃▂▂▂▂▁▁▁▁▁▁
validation_accuracy,▁▃▅▆▆▇▇▇▇▇█▇███

0,1
epoch,15.0
test_accuracy,0.8609
training_loss,0.33727
validation loss,0.35906
validation_accuracy,0.86617


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: cpr56p4w with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: sq_error
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.1397, val_acc=0.8355, val_loss=0.11350597836975147
Epoch 2/15 - loss=0.1057, val_acc=0.8588, val_loss=0.10112544991042209
Epoch 3/15 - loss=0.0971, val_acc=0.8688, val_loss=0.09539135940287359
Epoch 4/15 - loss=0.0914, val_acc=0.8623, val_loss=0.09890848436977485
Epoch 5/15 - loss=0.0877, val_acc=0.8713, val_loss=0.09307759306069037
Epoch 6/15 - loss=0.0842, val_acc=0.8645, val_loss=0.09620183190547767
Epoch 7/15 - loss=0.0824, val_acc=0.8698, val_loss=0.09265516953815112
Epoch 8/15 - loss=0.0803, val_acc=0.8785, val_loss=0.08747185659491241
Epoch 9/15 - loss=0.0784, val_acc=0.8743, val_loss=0.09149474544387273
Epoch 10/15 - loss=0.0767, val_acc=0.8785, val_loss=0.08797854065202694
Epoch 11/15 - loss=0.0754, val_acc=0.8757, val_loss=0.08909678934648581
Epoch 12/15 - loss=0.0739, val_acc=0.8755, val_loss=0.08976308206757432
Epoch 13/15 - loss=0.0721, val_acc=0.8727, val_loss=0.09255991391862062
Epoch 14/15 - loss=0.0712, val_acc=0.8808, val_loss=0.08545360661998516
E

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▅▄▃▃▂▂▂▂▂▂▁▁▁▁
validation loss,█▅▃▄▃▄▃▂▃▂▂▂▃▁▁
validation_accuracy,▁▅▆▅▇▅▆█▇█▇▇▇██

0,1
epoch,15.0
test_accuracy,0.8726
training_loss,0.07011
validation loss,0.08633
validation_accuracy,0.87917


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: j14lnxr2 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=1.1280, val_acc=0.7710, val_loss=0.6874403469354357
Epoch 2/15 - loss=0.5903, val_acc=0.8093, val_loss=0.5478209353920453
Epoch 3/15 - loss=0.5036, val_acc=0.8313, val_loss=0.48710003291639853
Epoch 4/15 - loss=0.4640, val_acc=0.8400, val_loss=0.459879015729208
Epoch 5/15 - loss=0.4416, val_acc=0.8440, val_loss=0.4455406488407984
Epoch 6/15 - loss=0.4243, val_acc=0.8527, val_loss=0.4266782544020839
Epoch 7/15 - loss=0.4113, val_acc=0.8532, val_loss=0.4175954601943847
Epoch 8/15 - loss=0.4017, val_acc=0.8563, val_loss=0.40938636345511836
Epoch 9/15 - loss=0.3921, val_acc=0.8563, val_loss=0.40278812722555274
Epoch 10/15 - loss=0.3844, val_acc=0.8590, val_loss=0.395092607387703
Epoch 11/15 - loss=0.3772, val_acc=0.8623, val_loss=0.38809914083643315
Epoch 12/15 - loss=0.3697, val_acc=0.8593, val_loss=0.3885536930151969
Epoch 13/15 - loss=0.3639, val_acc=0.8633, val_loss=0.3797105619284139
Epoch 14/15 - loss=0.3589, val_acc=0.8652, val_loss=0.37356018365124766
Epoch 15/15 

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▃▂▂▂▂▂▁▁▁▁▁▁▁▁
validation loss,█▅▄▃▃▂▂▂▂▂▁▁▁▁▁
validation_accuracy,▁▄▅▆▆▇▇▇▇██████

0,1
epoch,15.0
test_accuracy,0.8597
training_loss,0.35299
validation loss,0.37165
validation_accuracy,0.86567


[34m[1mwandb[0m: Agent Starting Run: yz0pf9ln with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: sq_error
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.2701, val_acc=0.7712, val_loss=0.17233303914369377
Epoch 2/15 - loss=0.1489, val_acc=0.8120, val_loss=0.14023549319911544
Epoch 3/15 - loss=0.1283, val_acc=0.8308, val_loss=0.12563040785218538
Epoch 4/15 - loss=0.1190, val_acc=0.8403, val_loss=0.11926689887964766
Epoch 5/15 - loss=0.1133, val_acc=0.8457, val_loss=0.11464305398695039
Epoch 6/15 - loss=0.1091, val_acc=0.8485, val_loss=0.11144482326195737
Epoch 7/15 - loss=0.1059, val_acc=0.8540, val_loss=0.10865193871514116
Epoch 8/15 - loss=0.1035, val_acc=0.8557, val_loss=0.1063017702072951
Epoch 9/15 - loss=0.1012, val_acc=0.8563, val_loss=0.10551612705504892
Epoch 10/15 - loss=0.0994, val_acc=0.8577, val_loss=0.10394982501700006
Epoch 11/15 - loss=0.0977, val_acc=0.8590, val_loss=0.10234280924618876
Epoch 12/15 - loss=0.0959, val_acc=0.8615, val_loss=0.10259491782410007
Epoch 13/15 - loss=0.0947, val_acc=0.8613, val_loss=0.10039957074187579
Epoch 14/15 - loss=0.0934, val_acc=0.8668, val_loss=0.0988652795390645
Epo

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▃▂▂▂▂▂▁▁▁▁▁▁▁▁
validation loss,█▅▄▃▃▂▂▂▂▁▁▁▁▁▁
validation_accuracy,▁▄▅▆▆▇▇▇▇▇▇████

0,1
epoch,15.0
test_accuracy,0.8588
training_loss,0.09208
validation loss,0.09916
validation_accuracy,0.86083


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 6vf7qgo2 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: sq_error
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.2508, val_acc=0.7885, val_loss=0.15991361816997302
Epoch 2/15 - loss=0.1409, val_acc=0.8227, val_loss=0.1339766734499667
Epoch 3/15 - loss=0.1235, val_acc=0.8362, val_loss=0.12235040903217013
Epoch 4/15 - loss=0.1152, val_acc=0.8407, val_loss=0.11602632680767191
Epoch 5/15 - loss=0.1101, val_acc=0.8485, val_loss=0.1111943736075314
Epoch 6/15 - loss=0.1061, val_acc=0.8497, val_loss=0.10889108678250492
Epoch 7/15 - loss=0.1030, val_acc=0.8548, val_loss=0.10599378831103562
Epoch 8/15 - loss=0.1006, val_acc=0.8590, val_loss=0.10350006496157217
Epoch 9/15 - loss=0.0983, val_acc=0.8617, val_loss=0.10252062173030667
Epoch 10/15 - loss=0.0964, val_acc=0.8615, val_loss=0.10153151535661716
Epoch 11/15 - loss=0.0947, val_acc=0.8625, val_loss=0.09942441967314822
Epoch 12/15 - loss=0.0929, val_acc=0.8647, val_loss=0.0992691619963896
Epoch 13/15 - loss=0.0916, val_acc=0.8675, val_loss=0.09794821775890449
Epoch 14/15 - loss=0.0903, val_acc=0.8682, val_loss=0.09633445285695712
Epoc

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▃▂▂▂▂▂▂▁▁▁▁▁▁▁
validation loss,█▅▄▃▃▂▂▂▂▂▁▁▁▁▁
validation_accuracy,▁▄▅▆▆▆▇▇▇▇█████

0,1
epoch,15.0
test_accuracy,0.8588
training_loss,0.08889
validation loss,0.09731
validation_accuracy,0.8645


[34m[1mwandb[0m: Agent Starting Run: atudosf3 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: sq_error
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.2508, val_acc=0.7885, val_loss=0.15991361816997302
Epoch 2/15 - loss=0.1409, val_acc=0.8227, val_loss=0.1339766734499667
Epoch 3/15 - loss=0.1235, val_acc=0.8362, val_loss=0.12235040903217013
Epoch 4/15 - loss=0.1152, val_acc=0.8407, val_loss=0.11602632680767191
Epoch 5/15 - loss=0.1101, val_acc=0.8485, val_loss=0.1111943736075314
Epoch 6/15 - loss=0.1061, val_acc=0.8497, val_loss=0.10889108678250492
Epoch 7/15 - loss=0.1030, val_acc=0.8548, val_loss=0.10599378831103562
Epoch 8/15 - loss=0.1006, val_acc=0.8590, val_loss=0.10350006496157217
Epoch 9/15 - loss=0.0983, val_acc=0.8617, val_loss=0.10252062173030667
Epoch 10/15 - loss=0.0964, val_acc=0.8615, val_loss=0.10153151535661716
Epoch 11/15 - loss=0.0947, val_acc=0.8625, val_loss=0.09942441967314822
Epoch 12/15 - loss=0.0929, val_acc=0.8647, val_loss=0.0992691619963896
Epoch 13/15 - loss=0.0916, val_acc=0.8675, val_loss=0.09794821775890449
Epoch 14/15 - loss=0.0903, val_acc=0.8682, val_loss=0.09633445285695712
Epoc

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▃▂▂▂▂▂▂▁▁▁▁▁▁▁
validation loss,█▅▄▃▃▂▂▂▂▂▁▁▁▁▁
validation_accuracy,▁▄▅▆▆▆▇▇▇▇█████

0,1
epoch,15.0
test_accuracy,0.8588
training_loss,0.08889
validation loss,0.09731
validation_accuracy,0.8645


[34m[1mwandb[0m: Agent Starting Run: ouzau4ii with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.5779, val_acc=0.8348, val_loss=0.43985750491571146
Epoch 2/15 - loss=0.4065, val_acc=0.8563, val_loss=0.3876037488820295
Epoch 3/15 - loss=0.3719, val_acc=0.8633, val_loss=0.38170573697800014
Epoch 4/15 - loss=0.3492, val_acc=0.8563, val_loss=0.3856889007710927
Epoch 5/15 - loss=0.3318, val_acc=0.8653, val_loss=0.3559947591976067
Epoch 6/15 - loss=0.3187, val_acc=0.8695, val_loss=0.346945216445743
Epoch 7/15 - loss=0.3089, val_acc=0.8655, val_loss=0.376927189905875
Epoch 8/15 - loss=0.2990, val_acc=0.8783, val_loss=0.3309598861360001
Epoch 9/15 - loss=0.2918, val_acc=0.8758, val_loss=0.34302784655103435
Epoch 10/15 - loss=0.2843, val_acc=0.8718, val_loss=0.34237081897012583
Epoch 11/15 - loss=0.2792, val_acc=0.8742, val_loss=0.3415574364712802
Epoch 12/15 - loss=0.2730, val_acc=0.8835, val_loss=0.3271817440888692
Epoch 13/15 - loss=0.2677, val_acc=0.8810, val_loss=0.32395863597477026
Epoch 14/15 - loss=0.2629, val_acc=0.8765, val_loss=0.3302192657515811
Epoch 15/15 

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▄▃▃▃▂▂▂▂▂▁▁▁▁▁
validation loss,█▅▄▅▃▂▄▁▂▂▂▁▁▁▂
validation_accuracy,▁▄▅▄▅▆▅▇▇▆▇██▇▇

0,1
epoch,15.0
test_accuracy,0.8715
training_loss,0.25899
validation loss,0.33395
validation_accuracy,0.87817


[34m[1mwandb[0m: Agent Starting Run: ufyxhj91 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: sq_error
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.2508, val_acc=0.7885, val_loss=0.15991361816997302
Epoch 2/15 - loss=0.1409, val_acc=0.8227, val_loss=0.1339766734499667
Epoch 3/15 - loss=0.1235, val_acc=0.8362, val_loss=0.12235040903217013
Epoch 4/15 - loss=0.1152, val_acc=0.8407, val_loss=0.11602632680767191
Epoch 5/15 - loss=0.1101, val_acc=0.8485, val_loss=0.1111943736075314
Epoch 6/15 - loss=0.1061, val_acc=0.8497, val_loss=0.10889108678250492
Epoch 7/15 - loss=0.1030, val_acc=0.8548, val_loss=0.10599378831103562
Epoch 8/15 - loss=0.1006, val_acc=0.8590, val_loss=0.10350006496157217
Epoch 9/15 - loss=0.0983, val_acc=0.8617, val_loss=0.10252062173030667
Epoch 10/15 - loss=0.0964, val_acc=0.8615, val_loss=0.10153151535661716
Epoch 11/15 - loss=0.0947, val_acc=0.8625, val_loss=0.09942441967314822
Epoch 12/15 - loss=0.0929, val_acc=0.8647, val_loss=0.0992691619963896
Epoch 13/15 - loss=0.0916, val_acc=0.8675, val_loss=0.09794821775890449
Epoch 14/15 - loss=0.0903, val_acc=0.8682, val_loss=0.09633445285695712
Epoc

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▃▂▂▂▂▂▂▁▁▁▁▁▁▁
validation loss,█▅▄▃▃▂▂▂▂▂▁▁▁▁▁
validation_accuracy,▁▄▅▆▆▆▇▇▇▇█████

0,1
epoch,15.0
test_accuracy,0.8588
training_loss,0.08889
validation loss,0.09731
validation_accuracy,0.8645


[34m[1mwandb[0m: Agent Starting Run: k4tb20bx with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=1.1280, val_acc=0.7710, val_loss=0.6874403469354357
Epoch 2/15 - loss=0.5903, val_acc=0.8093, val_loss=0.5478209353920453
Epoch 3/15 - loss=0.5036, val_acc=0.8313, val_loss=0.48710003291639853
Epoch 4/15 - loss=0.4640, val_acc=0.8400, val_loss=0.459879015729208
Epoch 5/15 - loss=0.4416, val_acc=0.8440, val_loss=0.4455406488407984
Epoch 6/15 - loss=0.4243, val_acc=0.8527, val_loss=0.4266782544020839
Epoch 7/15 - loss=0.4113, val_acc=0.8532, val_loss=0.4175954601943847
Epoch 8/15 - loss=0.4017, val_acc=0.8563, val_loss=0.40938636345511836
Epoch 9/15 - loss=0.3921, val_acc=0.8563, val_loss=0.40278812722555274
Epoch 10/15 - loss=0.3844, val_acc=0.8590, val_loss=0.395092607387703
Epoch 11/15 - loss=0.3772, val_acc=0.8623, val_loss=0.38809914083643315
Epoch 12/15 - loss=0.3697, val_acc=0.8593, val_loss=0.3885536930151969
Epoch 13/15 - loss=0.3639, val_acc=0.8633, val_loss=0.3797105619284139
Epoch 14/15 - loss=0.3589, val_acc=0.8652, val_loss=0.37356018365124766
Epoch 15/15 

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▃▂▂▂▂▂▁▁▁▁▁▁▁▁
validation loss,█▅▄▃▃▂▂▂▂▂▁▁▁▁▁
validation_accuracy,▁▄▅▆▆▇▇▇▇██████

0,1
epoch,15.0
test_accuracy,0.8597
training_loss,0.35299
validation loss,0.37165
validation_accuracy,0.86567


[34m[1mwandb[0m: Agent Starting Run: 6na9cfgd with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: sq_error
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: W&B API key is configured. Use [1m`wandb login --relogin`[0m to force relogin


Epoch 1/15 - loss=0.2508, val_acc=0.7885, val_loss=0.15991361816997302
Epoch 2/15 - loss=0.1409, val_acc=0.8227, val_loss=0.1339766734499667
Epoch 3/15 - loss=0.1235, val_acc=0.8362, val_loss=0.12235040903217013
Epoch 4/15 - loss=0.1152, val_acc=0.8407, val_loss=0.11602632680767191
Epoch 5/15 - loss=0.1101, val_acc=0.8485, val_loss=0.1111943736075314
Epoch 6/15 - loss=0.1061, val_acc=0.8497, val_loss=0.10889108678250492
Epoch 7/15 - loss=0.1030, val_acc=0.8548, val_loss=0.10599378831103562
Epoch 8/15 - loss=0.1006, val_acc=0.8590, val_loss=0.10350006496157217
Epoch 9/15 - loss=0.0983, val_acc=0.8617, val_loss=0.10252062173030667
Epoch 10/15 - loss=0.0964, val_acc=0.8615, val_loss=0.10153151535661716
Epoch 11/15 - loss=0.0947, val_acc=0.8625, val_loss=0.09942441967314822
Epoch 12/15 - loss=0.0929, val_acc=0.8647, val_loss=0.0992691619963896
Epoch 13/15 - loss=0.0916, val_acc=0.8675, val_loss=0.09794821775890449
Epoch 14/15 - loss=0.0903, val_acc=0.8682, val_loss=0.09633445285695712
Epoc

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▃▂▂▂▂▂▂▁▁▁▁▁▁▁
validation loss,█▅▄▃▃▂▂▂▂▂▁▁▁▁▁
validation_accuracy,▁▄▅▆▆▆▇▇▇▇█████

0,1
epoch,15.0
test_accuracy,0.8588
training_loss,0.08889
validation loss,0.09731
validation_accuracy,0.8645


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 35x6qerg with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: sq_error
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.1499, val_acc=0.8377, val_loss=0.11754444951492507
Epoch 2/15 - loss=0.1077, val_acc=0.8550, val_loss=0.1039970672223214
Epoch 3/15 - loss=0.0997, val_acc=0.8633, val_loss=0.0973678987654608
Epoch 4/15 - loss=0.0943, val_acc=0.8713, val_loss=0.09508071831895568
Epoch 5/15 - loss=0.0906, val_acc=0.8728, val_loss=0.09288174359546006
Epoch 6/15 - loss=0.0872, val_acc=0.8648, val_loss=0.097597768798854
Epoch 7/15 - loss=0.0844, val_acc=0.8702, val_loss=0.09190042013525783
Epoch 8/15 - loss=0.0833, val_acc=0.8580, val_loss=0.09973719189928204
Epoch 9/15 - loss=0.0814, val_acc=0.8738, val_loss=0.09209028580316203
Epoch 10/15 - loss=0.0789, val_acc=0.8605, val_loss=0.09909293062333975
Epoch 11/15 - loss=0.0785, val_acc=0.8647, val_loss=0.09600598337536208
Epoch 12/15 - loss=0.0760, val_acc=0.8723, val_loss=0.09453986752509165
Epoch 13/15 - loss=0.0755, val_acc=0.8727, val_loss=0.09029244110907954
Epoch 14/15 - loss=0.0740, val_acc=0.8780, val_loss=0.08767841927800032
Epoch

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▄▃▃▃▂▂▂▂▂▂▁▁▁▁
validation loss,█▅▃▃▂▃▂▄▂▄▃▃▂▁▁
validation_accuracy,▁▄▅▇▇▆▇▅▇▅▆▇▇██

0,1
epoch,15.0
test_accuracy,0.8692
training_loss,0.07261
validation loss,0.08955
validation_accuracy,0.87633


[34m[1mwandb[0m: Agent Starting Run: 2zjh7y1k with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.6270, val_acc=0.8425, val_loss=0.44806579568108995
Epoch 2/15 - loss=0.4148, val_acc=0.8480, val_loss=0.40956287006615216
Epoch 3/15 - loss=0.3790, val_acc=0.8595, val_loss=0.3849564114459321
Epoch 4/15 - loss=0.3550, val_acc=0.8663, val_loss=0.3724371378558453
Epoch 5/15 - loss=0.3403, val_acc=0.8695, val_loss=0.35556014969300476
Epoch 6/15 - loss=0.3229, val_acc=0.8707, val_loss=0.3478934113529482
Epoch 7/15 - loss=0.3146, val_acc=0.8768, val_loss=0.3295220304940766
Epoch 8/15 - loss=0.3054, val_acc=0.8653, val_loss=0.3652100624233602
Epoch 9/15 - loss=0.2982, val_acc=0.8775, val_loss=0.3312161494387455
Epoch 10/15 - loss=0.2902, val_acc=0.8688, val_loss=0.36596795707779933
Epoch 11/15 - loss=0.2842, val_acc=0.8718, val_loss=0.3462242708421703
Epoch 12/15 - loss=0.2771, val_acc=0.8800, val_loss=0.33932088558946594
Epoch 13/15 - loss=0.2720, val_acc=0.8750, val_loss=0.3352174624255721
Epoch 14/15 - loss=0.2690, val_acc=0.8800, val_loss=0.32494663082018665
Epoch 15/

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▄▃▃▂▂▂▂▂▁▁▁▁▁▁
validation loss,█▆▄▄▃▂▁▃▁▃▂▂▂▁▄
validation_accuracy,▁▂▄▅▆▆▇▅█▆▆█▇█▆

0,1
epoch,15.0
test_accuracy,0.863
training_loss,0.26503
validation loss,0.37061
validation_accuracy,0.87


[34m[1mwandb[0m: Agent Starting Run: hidkswq8 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: sq_error
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.1499, val_acc=0.8377, val_loss=0.11754444951492507
Epoch 2/15 - loss=0.1077, val_acc=0.8550, val_loss=0.1039970672223214
Epoch 3/15 - loss=0.0997, val_acc=0.8633, val_loss=0.0973678987654608
Epoch 4/15 - loss=0.0943, val_acc=0.8713, val_loss=0.09508071831895568
Epoch 5/15 - loss=0.0906, val_acc=0.8728, val_loss=0.09288174359546006
Epoch 6/15 - loss=0.0872, val_acc=0.8648, val_loss=0.097597768798854
Epoch 7/15 - loss=0.0844, val_acc=0.8702, val_loss=0.09190042013525783
Epoch 8/15 - loss=0.0833, val_acc=0.8580, val_loss=0.09973719189928204
Epoch 9/15 - loss=0.0814, val_acc=0.8738, val_loss=0.09209028580316203
Epoch 10/15 - loss=0.0789, val_acc=0.8605, val_loss=0.09909293062333975
Epoch 11/15 - loss=0.0785, val_acc=0.8647, val_loss=0.09600598337536208
Epoch 12/15 - loss=0.0760, val_acc=0.8723, val_loss=0.09453986752509165
Epoch 13/15 - loss=0.0755, val_acc=0.8727, val_loss=0.09029244110907954
Epoch 14/15 - loss=0.0740, val_acc=0.8780, val_loss=0.08767841927800032
Epoch

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▄▃▃▃▂▂▂▂▂▂▁▁▁▁
validation loss,█▅▃▃▂▃▂▄▂▄▃▃▂▁▁
validation_accuracy,▁▄▅▇▇▆▇▅▇▅▆▇▇██

0,1
epoch,15.0
test_accuracy,0.8692
training_loss,0.07261
validation loss,0.08955
validation_accuracy,0.87633


[34m[1mwandb[0m: Agent Starting Run: 6v5bdmkq with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.5558, val_acc=0.8328, val_loss=0.43777155646277605
Epoch 2/15 - loss=0.4027, val_acc=0.8590, val_loss=0.3860200103252417
Epoch 3/15 - loss=0.3684, val_acc=0.8703, val_loss=0.36221018302277114
Epoch 4/15 - loss=0.3451, val_acc=0.8683, val_loss=0.36754871587464294
Epoch 5/15 - loss=0.3293, val_acc=0.8677, val_loss=0.35029306676145977
Epoch 6/15 - loss=0.3156, val_acc=0.8682, val_loss=0.35125727292328474
Epoch 7/15 - loss=0.3063, val_acc=0.8582, val_loss=0.38536835636902395
Epoch 8/15 - loss=0.2972, val_acc=0.8767, val_loss=0.3388813449871443
Epoch 9/15 - loss=0.2906, val_acc=0.8737, val_loss=0.3436672801769622
Epoch 10/15 - loss=0.2836, val_acc=0.8790, val_loss=0.32317606716566977
Epoch 11/15 - loss=0.2767, val_acc=0.8710, val_loss=0.33245739693126186
Epoch 12/15 - loss=0.2706, val_acc=0.8812, val_loss=0.33168192896185533
Epoch 13/15 - loss=0.2654, val_acc=0.8838, val_loss=0.31609606544582625
Epoch 14/15 - loss=0.2602, val_acc=0.8830, val_loss=0.3224808191262113
Epoch

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▄▄▃▃▂▂▂▂▂▁▁▁▁▁
validation loss,█▅▄▄▃▃▅▂▃▁▂▂▁▁▂
validation_accuracy,▁▅▆▆▆▆▄▇▇▇▆███▇

0,1
epoch,15.0
test_accuracy,0.8687
training_loss,0.25581
validation loss,0.32548
validation_accuracy,0.87767


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: w6a0v1vg with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.5558, val_acc=0.8328, val_loss=0.43777155646277605
Epoch 2/15 - loss=0.4027, val_acc=0.8590, val_loss=0.3860200103252417
Epoch 3/15 - loss=0.3684, val_acc=0.8703, val_loss=0.36221018302277114
Epoch 4/15 - loss=0.3451, val_acc=0.8683, val_loss=0.36754871587464294
Epoch 5/15 - loss=0.3293, val_acc=0.8677, val_loss=0.35029306676145977
Epoch 6/15 - loss=0.3156, val_acc=0.8682, val_loss=0.35125727292328474
Epoch 7/15 - loss=0.3063, val_acc=0.8582, val_loss=0.38536835636902395
Epoch 8/15 - loss=0.2972, val_acc=0.8767, val_loss=0.3388813449871443
Epoch 9/15 - loss=0.2906, val_acc=0.8737, val_loss=0.3436672801769622
Epoch 10/15 - loss=0.2836, val_acc=0.8790, val_loss=0.32317606716566977
Epoch 11/15 - loss=0.2767, val_acc=0.8710, val_loss=0.33245739693126186
Epoch 12/15 - loss=0.2706, val_acc=0.8812, val_loss=0.33168192896185533
Epoch 13/15 - loss=0.2654, val_acc=0.8838, val_loss=0.31609606544582625
Epoch 14/15 - loss=0.2602, val_acc=0.8830, val_loss=0.3224808191262113
Epoch

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▄▄▃▃▂▂▂▂▂▁▁▁▁▁
validation loss,█▅▄▄▃▃▅▂▃▁▂▂▁▁▂
validation_accuracy,▁▅▆▆▆▆▄▇▇▇▆███▇

0,1
epoch,15.0
test_accuracy,0.8687
training_loss,0.25581
validation loss,0.32548
validation_accuracy,0.87767


[34m[1mwandb[0m: Agent Starting Run: y1cnyfot with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: sq_error
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.2557, val_acc=0.7805, val_loss=0.1707722580727658
Epoch 2/15 - loss=0.1444, val_acc=0.8188, val_loss=0.1329447690918598
Epoch 3/15 - loss=0.1219, val_acc=0.8345, val_loss=0.1195483380835641
Epoch 4/15 - loss=0.1125, val_acc=0.8422, val_loss=0.11316655820370497
Epoch 5/15 - loss=0.1070, val_acc=0.8470, val_loss=0.10848648750904173
Epoch 6/15 - loss=0.1027, val_acc=0.8533, val_loss=0.10580834427994996
Epoch 7/15 - loss=0.0995, val_acc=0.8598, val_loss=0.10260613500171341
Epoch 8/15 - loss=0.0971, val_acc=0.8607, val_loss=0.10063556806982776
Epoch 9/15 - loss=0.0948, val_acc=0.8617, val_loss=0.09884146377346889
Epoch 10/15 - loss=0.0930, val_acc=0.8623, val_loss=0.09955211952367332
Epoch 11/15 - loss=0.0915, val_acc=0.8637, val_loss=0.09715185716357451
Epoch 12/15 - loss=0.0897, val_acc=0.8650, val_loss=0.09731779821621647
Epoch 13/15 - loss=0.0885, val_acc=0.8693, val_loss=0.09411591335441452
Epoch 14/15 - loss=0.0873, val_acc=0.8658, val_loss=0.09497111199991268
Epoc

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▃▂▂▂▂▂▁▁▁▁▁▁▁▁
validation loss,█▅▃▃▂▂▂▂▁▁▁▁▁▁▁
validation_accuracy,▁▄▅▆▆▇▇▇▇▇█████

0,1
epoch,15.0
test_accuracy,0.8636
training_loss,0.08612
validation loss,0.09472
validation_accuracy,0.868


[34m[1mwandb[0m: Agent Starting Run: 6ebinc8o with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.6270, val_acc=0.8425, val_loss=0.44806579568108995
Epoch 2/15 - loss=0.4148, val_acc=0.8480, val_loss=0.40956287006615216
Epoch 3/15 - loss=0.3790, val_acc=0.8595, val_loss=0.3849564114459321
Epoch 4/15 - loss=0.3550, val_acc=0.8663, val_loss=0.3724371378558453
Epoch 5/15 - loss=0.3403, val_acc=0.8695, val_loss=0.35556014969300476
Epoch 6/15 - loss=0.3229, val_acc=0.8707, val_loss=0.3478934113529482
Epoch 7/15 - loss=0.3146, val_acc=0.8768, val_loss=0.3295220304940766
Epoch 8/15 - loss=0.3054, val_acc=0.8653, val_loss=0.3652100624233602
Epoch 9/15 - loss=0.2982, val_acc=0.8775, val_loss=0.3312161494387455
Epoch 10/15 - loss=0.2902, val_acc=0.8688, val_loss=0.36596795707779933
Epoch 11/15 - loss=0.2842, val_acc=0.8718, val_loss=0.3462242708421703
Epoch 12/15 - loss=0.2771, val_acc=0.8800, val_loss=0.33932088558946594
Epoch 13/15 - loss=0.2720, val_acc=0.8750, val_loss=0.3352174624255721
Epoch 14/15 - loss=0.2690, val_acc=0.8800, val_loss=0.32494663082018665
Epoch 15/

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▄▃▃▂▂▂▂▂▁▁▁▁▁▁
validation loss,█▆▄▄▃▂▁▃▁▃▂▂▂▁▄
validation_accuracy,▁▂▄▅▆▆▇▅█▆▆█▇█▆

0,1
epoch,15.0
test_accuracy,0.863
training_loss,0.26503
validation loss,0.37061
validation_accuracy,0.87


[34m[1mwandb[0m: Agent Starting Run: 5c7v99vr with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: sq_error
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.1397, val_acc=0.8355, val_loss=0.11350597836975147
Epoch 2/15 - loss=0.1057, val_acc=0.8588, val_loss=0.10112544991042209
Epoch 3/15 - loss=0.0971, val_acc=0.8688, val_loss=0.09539135940287359
Epoch 4/15 - loss=0.0914, val_acc=0.8623, val_loss=0.09890848436977485
Epoch 5/15 - loss=0.0877, val_acc=0.8713, val_loss=0.09307759306069037
Epoch 6/15 - loss=0.0842, val_acc=0.8645, val_loss=0.09620183190547767
Epoch 7/15 - loss=0.0824, val_acc=0.8698, val_loss=0.09265516953815112
Epoch 8/15 - loss=0.0803, val_acc=0.8785, val_loss=0.08747185659491241
Epoch 9/15 - loss=0.0784, val_acc=0.8743, val_loss=0.09149474544387273
Epoch 10/15 - loss=0.0767, val_acc=0.8785, val_loss=0.08797854065202694
Epoch 11/15 - loss=0.0754, val_acc=0.8757, val_loss=0.08909678934648581
Epoch 12/15 - loss=0.0739, val_acc=0.8755, val_loss=0.08976308206757432
Epoch 13/15 - loss=0.0721, val_acc=0.8727, val_loss=0.09255991391862062
Epoch 14/15 - loss=0.0712, val_acc=0.8808, val_loss=0.08545360661998516
E

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▅▄▃▃▂▂▂▂▂▂▁▁▁▁
validation loss,█▅▃▄▃▄▃▂▃▂▂▂▃▁▁
validation_accuracy,▁▅▆▅▇▅▆█▇█▇▇▇██

0,1
epoch,15.0
test_accuracy,0.8726
training_loss,0.07011
validation loss,0.08633
validation_accuracy,0.87917


[34m[1mwandb[0m: Agent Starting Run: whd3ezoq with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: sq_error
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.2674, val_acc=0.7638, val_loss=0.1743751192722
Epoch 2/15 - loss=0.1500, val_acc=0.8127, val_loss=0.14090028356791603
Epoch 3/15 - loss=0.1293, val_acc=0.8278, val_loss=0.12823391681726123
Epoch 4/15 - loss=0.1195, val_acc=0.8408, val_loss=0.11902486080913699
Epoch 5/15 - loss=0.1134, val_acc=0.8455, val_loss=0.11426869310717432
Epoch 6/15 - loss=0.1090, val_acc=0.8462, val_loss=0.11188272051937002
Epoch 7/15 - loss=0.1058, val_acc=0.8507, val_loss=0.11078548537210581
Epoch 8/15 - loss=0.1033, val_acc=0.8555, val_loss=0.10661031260267315
Epoch 9/15 - loss=0.1010, val_acc=0.8540, val_loss=0.10798276465733196
Epoch 10/15 - loss=0.0991, val_acc=0.8595, val_loss=0.10309541399735062
Epoch 11/15 - loss=0.0971, val_acc=0.8573, val_loss=0.10286096169843634
Epoch 12/15 - loss=0.0955, val_acc=0.8622, val_loss=0.10150745408797428
Epoch 13/15 - loss=0.0941, val_acc=0.8645, val_loss=0.09836587726005193
Epoch 14/15 - loss=0.0928, val_acc=0.8658, val_loss=0.09854203803645092
Epoch

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▃▃▂▂▂▂▁▁▁▁▁▁▁▁
validation loss,█▅▄▃▂▂▂▂▂▁▁▁▁▁▁
validation_accuracy,▁▄▅▆▇▇▇▇▇█▇███▇

0,1
epoch,15.0
test_accuracy,0.8513
training_loss,0.09155
validation loss,0.10188
validation_accuracy,0.85783


[34m[1mwandb[0m: Agent Starting Run: dk3ndtdi with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: sq_error
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.2614, val_acc=0.7657, val_loss=0.17869219264990102
Epoch 2/15 - loss=0.1497, val_acc=0.8140, val_loss=0.13685060676687874
Epoch 3/15 - loss=0.1248, val_acc=0.8290, val_loss=0.12301112224892029
Epoch 4/15 - loss=0.1150, val_acc=0.8393, val_loss=0.1153765425620314
Epoch 5/15 - loss=0.1092, val_acc=0.8470, val_loss=0.11045117646180808
Epoch 6/15 - loss=0.1049, val_acc=0.8488, val_loss=0.1082665319748341
Epoch 7/15 - loss=0.1018, val_acc=0.8545, val_loss=0.10485673066199987
Epoch 8/15 - loss=0.0992, val_acc=0.8575, val_loss=0.10324114404053948
Epoch 9/15 - loss=0.0970, val_acc=0.8565, val_loss=0.10281740151268254
Epoch 10/15 - loss=0.0951, val_acc=0.8617, val_loss=0.10006913099133749
Epoch 11/15 - loss=0.0933, val_acc=0.8622, val_loss=0.09911340969458003
Epoch 12/15 - loss=0.0919, val_acc=0.8595, val_loss=0.10030356166940334
Epoch 13/15 - loss=0.0906, val_acc=0.8662, val_loss=0.09551479782723016
Epoch 14/15 - loss=0.0893, val_acc=0.8657, val_loss=0.09630514068844791
Epo

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▃▂▂▂▂▂▁▁▁▁▁▁▁▁
validation loss,█▄▃▃▂▂▂▂▂▁▁▁▁▁▁
validation_accuracy,▁▄▅▆▇▇▇▇▇██████

0,1
epoch,15.0
test_accuracy,0.8592
training_loss,0.08831
validation loss,0.09697
validation_accuracy,0.86183


[34m[1mwandb[0m: Agent Starting Run: w91vquse with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=1.1280, val_acc=0.7710, val_loss=0.6874403469354357
Epoch 2/15 - loss=0.5903, val_acc=0.8093, val_loss=0.5478209353920453
Epoch 3/15 - loss=0.5036, val_acc=0.8313, val_loss=0.48710003291639853
Epoch 4/15 - loss=0.4640, val_acc=0.8400, val_loss=0.459879015729208
Epoch 5/15 - loss=0.4416, val_acc=0.8440, val_loss=0.4455406488407984
Epoch 6/15 - loss=0.4243, val_acc=0.8527, val_loss=0.4266782544020839
Epoch 7/15 - loss=0.4113, val_acc=0.8532, val_loss=0.4175954601943847
Epoch 8/15 - loss=0.4017, val_acc=0.8563, val_loss=0.40938636345511836
Epoch 9/15 - loss=0.3921, val_acc=0.8563, val_loss=0.40278812722555274
Epoch 10/15 - loss=0.3844, val_acc=0.8590, val_loss=0.395092607387703
Epoch 11/15 - loss=0.3772, val_acc=0.8623, val_loss=0.38809914083643315
Epoch 12/15 - loss=0.3697, val_acc=0.8593, val_loss=0.3885536930151969
Epoch 13/15 - loss=0.3639, val_acc=0.8633, val_loss=0.3797105619284139
Epoch 14/15 - loss=0.3589, val_acc=0.8652, val_loss=0.37356018365124766
Epoch 15/15 

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▃▂▂▂▂▂▁▁▁▁▁▁▁▁
validation loss,█▅▄▃▃▂▂▂▂▂▁▁▁▁▁
validation_accuracy,▁▄▅▆▆▇▇▇▇██████

0,1
epoch,15.0
test_accuracy,0.8597
training_loss,0.35299
validation loss,0.37165
validation_accuracy,0.86567


[34m[1mwandb[0m: Agent Starting Run: i9gpcw4a with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.5847, val_acc=0.8423, val_loss=0.4223828706648879
Epoch 2/15 - loss=0.3954, val_acc=0.8545, val_loss=0.3912485599404234
Epoch 3/15 - loss=0.3605, val_acc=0.8673, val_loss=0.3556274217469379
Epoch 4/15 - loss=0.3403, val_acc=0.8665, val_loss=0.3574887546508234
Epoch 5/15 - loss=0.3259, val_acc=0.8678, val_loss=0.365927484966705
Epoch 6/15 - loss=0.3097, val_acc=0.8770, val_loss=0.32927511615321914
Epoch 7/15 - loss=0.2998, val_acc=0.8787, val_loss=0.32379152470487654
Epoch 8/15 - loss=0.2898, val_acc=0.8635, val_loss=0.3650505127536943
Epoch 9/15 - loss=0.2851, val_acc=0.8810, val_loss=0.31823920240703
Epoch 10/15 - loss=0.2764, val_acc=0.8787, val_loss=0.33518423769134087
Epoch 11/15 - loss=0.2713, val_acc=0.8768, val_loss=0.3297460155598525
Epoch 12/15 - loss=0.2614, val_acc=0.8833, val_loss=0.3264488395338764
Epoch 13/15 - loss=0.2580, val_acc=0.8865, val_loss=0.32119755011907447
Epoch 14/15 - loss=0.2532, val_acc=0.8832, val_loss=0.3204723575858921
Epoch 15/15 - 

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▄▃▃▃▂▂▂▂▂▂▁▁▁▁
validation loss,█▆▄▄▄▂▁▄▁▂▂▂▁▁▂
validation_accuracy,▁▃▅▅▅▆▇▄▇▇▆▇█▇▇

0,1
epoch,15.0
test_accuracy,0.8704
training_loss,0.24719
validation loss,0.33044
validation_accuracy,0.87833


[34m[1mwandb[0m: Agent Starting Run: 04f8s3xv with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: sq_error
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.1556, val_acc=0.8275, val_loss=0.12376927939821812
Epoch 2/15 - loss=0.1123, val_acc=0.8487, val_loss=0.10914155139324669
Epoch 3/15 - loss=0.1032, val_acc=0.8567, val_loss=0.10336085367071043
Epoch 4/15 - loss=0.0974, val_acc=0.8550, val_loss=0.10369154206696556
Epoch 5/15 - loss=0.0928, val_acc=0.8692, val_loss=0.09388641542241928
Epoch 6/15 - loss=0.0891, val_acc=0.8633, val_loss=0.09691810265368508
Epoch 7/15 - loss=0.0867, val_acc=0.8635, val_loss=0.10091406397319008
Epoch 8/15 - loss=0.0849, val_acc=0.8763, val_loss=0.09052125489096238
Epoch 9/15 - loss=0.0826, val_acc=0.8658, val_loss=0.09602094514999028
Epoch 10/15 - loss=0.0812, val_acc=0.8783, val_loss=0.08653326105908628
Epoch 11/15 - loss=0.0792, val_acc=0.8668, val_loss=0.09581575435407996
Epoch 12/15 - loss=0.0783, val_acc=0.8660, val_loss=0.09726290569530321
Epoch 13/15 - loss=0.0770, val_acc=0.8797, val_loss=0.08673747693474843
Epoch 14/15 - loss=0.0756, val_acc=0.8777, val_loss=0.08932270018879274
E

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▄▃▃▃▂▂▂▂▂▁▁▁▁▁
validation loss,█▅▄▄▂▃▄▂▃▁▃▃▁▂▂
validation_accuracy,▁▄▅▅▇▆▆█▆█▆▆██▇

0,1
epoch,15.0
test_accuracy,0.8618
training_loss,0.07445
validation loss,0.09193
validation_accuracy,0.87083


[34m[1mwandb[0m: Agent Starting Run: pzd9gtl9 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.9780, val_acc=0.7860, val_loss=0.6512629818942361
Epoch 2/15 - loss=0.5636, val_acc=0.8148, val_loss=0.5228944074259625
Epoch 3/15 - loss=0.4808, val_acc=0.8333, val_loss=0.4719444224507964
Epoch 4/15 - loss=0.4430, val_acc=0.8395, val_loss=0.443731536751288
Epoch 5/15 - loss=0.4199, val_acc=0.8490, val_loss=0.4218333284333397
Epoch 6/15 - loss=0.4029, val_acc=0.8515, val_loss=0.409878987433738
Epoch 7/15 - loss=0.3908, val_acc=0.8545, val_loss=0.4020654994868725
Epoch 8/15 - loss=0.3806, val_acc=0.8597, val_loss=0.3903855355267599
Epoch 9/15 - loss=0.3718, val_acc=0.8575, val_loss=0.3875869973248034
Epoch 10/15 - loss=0.3644, val_acc=0.8625, val_loss=0.37707866790659794
Epoch 11/15 - loss=0.3577, val_acc=0.8635, val_loss=0.3731787426115567
Epoch 12/15 - loss=0.3517, val_acc=0.8625, val_loss=0.3777987545739664
Epoch 13/15 - loss=0.3468, val_acc=0.8688, val_loss=0.35954957352361405
Epoch 14/15 - loss=0.3416, val_acc=0.8677, val_loss=0.3586472484648075
Epoch 15/15 - l

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▃▃▂▂▂▂▁▁▁▁▁▁▁▁
validation loss,█▅▄▃▃▂▂▂▂▁▁▁▁▁▁
validation_accuracy,▁▃▅▆▆▇▇▇▇▇█▇███

0,1
epoch,15.0
test_accuracy,0.8609
training_loss,0.33727
validation loss,0.35906
validation_accuracy,0.86617


[34m[1mwandb[0m: Agent Starting Run: bywz9zx6 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.6259, val_acc=0.8292, val_loss=0.46986892883264325
Epoch 2/15 - loss=0.4350, val_acc=0.8482, val_loss=0.40931900197957294
Epoch 3/15 - loss=0.3933, val_acc=0.8610, val_loss=0.3929496971886448
Epoch 4/15 - loss=0.3677, val_acc=0.8625, val_loss=0.38100447920381475
Epoch 5/15 - loss=0.3505, val_acc=0.8697, val_loss=0.34861474358761296
Epoch 6/15 - loss=0.3366, val_acc=0.8655, val_loss=0.3651900360971135
Epoch 7/15 - loss=0.3265, val_acc=0.8460, val_loss=0.44542384190344814
Epoch 8/15 - loss=0.3173, val_acc=0.8730, val_loss=0.34927960227979377
Epoch 9/15 - loss=0.3099, val_acc=0.8582, val_loss=0.38767435174390713
Epoch 10/15 - loss=0.3022, val_acc=0.8687, val_loss=0.3559346085114018
Epoch 11/15 - loss=0.2968, val_acc=0.8705, val_loss=0.3537828276951274
Epoch 12/15 - loss=0.2914, val_acc=0.8787, val_loss=0.33658602607240146
Epoch 13/15 - loss=0.2865, val_acc=0.8823, val_loss=0.32385177927827774
Epoch 14/15 - loss=0.2823, val_acc=0.8757, val_loss=0.3449483972933062
Epoch 

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▄▃▃▂▂▂▂▂▂▁▁▁▁▁
validation loss,█▅▄▄▂▃▇▂▄▃▂▂▁▂▃
validation_accuracy,▁▄▅▅▆▆▃▇▅▆▆██▇▆

0,1
epoch,15.0
test_accuracy,0.8634
training_loss,0.27677
validation loss,0.35515
validation_accuracy,0.87


[34m[1mwandb[0m: Agent Starting Run: u4k251nv with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: sq_error
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.2262, val_acc=0.7972, val_loss=0.15340633505537074
Epoch 2/15 - loss=0.1351, val_acc=0.8230, val_loss=0.12849873562126413
Epoch 3/15 - loss=0.1186, val_acc=0.8365, val_loss=0.11654167326939735
Epoch 4/15 - loss=0.1104, val_acc=0.8438, val_loss=0.1116252541829645
Epoch 5/15 - loss=0.1056, val_acc=0.8525, val_loss=0.10642589802703982
Epoch 6/15 - loss=0.1016, val_acc=0.8510, val_loss=0.10472057925021
Epoch 7/15 - loss=0.0987, val_acc=0.8575, val_loss=0.10131341121112283
Epoch 8/15 - loss=0.0963, val_acc=0.8600, val_loss=0.09995529280625355
Epoch 9/15 - loss=0.0943, val_acc=0.8628, val_loss=0.09797341864225681
Epoch 10/15 - loss=0.0926, val_acc=0.8635, val_loss=0.09862472186706352
Epoch 11/15 - loss=0.0912, val_acc=0.8643, val_loss=0.09677629532947156
Epoch 12/15 - loss=0.0895, val_acc=0.8667, val_loss=0.09631917740106112
Epoch 13/15 - loss=0.0884, val_acc=0.8690, val_loss=0.09333225153615081
Epoch 14/15 - loss=0.0872, val_acc=0.8700, val_loss=0.09374016410840073
Epoch

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▃▃▂▂▂▂▂▁▁▁▁▁▁▁
validation loss,█▅▄▃▃▂▂▂▂▂▁▁▁▁▁
validation_accuracy,▁▃▅▅▆▆▇▇▇▇▇████

0,1
epoch,15.0
test_accuracy,0.8624
training_loss,0.086
validation loss,0.0937
validation_accuracy,0.86867


[34m[1mwandb[0m: Agent Starting Run: 5a5qqflc with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.6259, val_acc=0.8292, val_loss=0.46986892883264325
Epoch 2/15 - loss=0.4350, val_acc=0.8482, val_loss=0.40931900197957294
Epoch 3/15 - loss=0.3933, val_acc=0.8610, val_loss=0.3929496971886448
Epoch 4/15 - loss=0.3677, val_acc=0.8625, val_loss=0.38100447920381475
Epoch 5/15 - loss=0.3505, val_acc=0.8697, val_loss=0.34861474358761296
Epoch 6/15 - loss=0.3366, val_acc=0.8655, val_loss=0.3651900360971135
Epoch 7/15 - loss=0.3265, val_acc=0.8460, val_loss=0.44542384190344814
Epoch 8/15 - loss=0.3173, val_acc=0.8730, val_loss=0.34927960227979377
Epoch 9/15 - loss=0.3099, val_acc=0.8582, val_loss=0.38767435174390713
Epoch 10/15 - loss=0.3022, val_acc=0.8687, val_loss=0.3559346085114018
Epoch 11/15 - loss=0.2968, val_acc=0.8705, val_loss=0.3537828276951274
Epoch 12/15 - loss=0.2914, val_acc=0.8787, val_loss=0.33658602607240146
Epoch 13/15 - loss=0.2865, val_acc=0.8823, val_loss=0.32385177927827774
Epoch 14/15 - loss=0.2823, val_acc=0.8757, val_loss=0.3449483972933062
Epoch 

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▄▃▃▂▂▂▂▂▂▁▁▁▁▁
validation loss,█▅▄▄▂▃▇▂▄▃▂▂▁▂▃
validation_accuracy,▁▄▅▅▆▆▃▇▅▆▆██▇▆

0,1
epoch,15.0
test_accuracy,0.8634
training_loss,0.27677
validation loss,0.35515
validation_accuracy,0.87


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: vnv1cfp3 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: sq_error
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.1524, val_acc=0.8313, val_loss=0.1245255895745075
Epoch 2/15 - loss=0.1118, val_acc=0.8493, val_loss=0.10794346530513453
Epoch 3/15 - loss=0.1019, val_acc=0.8608, val_loss=0.10114784327619517
Epoch 4/15 - loss=0.0959, val_acc=0.8623, val_loss=0.0987054971601063
Epoch 5/15 - loss=0.0916, val_acc=0.8738, val_loss=0.09158856352115755
Epoch 6/15 - loss=0.0883, val_acc=0.8640, val_loss=0.09430304095777431
Epoch 7/15 - loss=0.0862, val_acc=0.8655, val_loss=0.10152089438475206
Epoch 8/15 - loss=0.0835, val_acc=0.8730, val_loss=0.09048963897219622
Epoch 9/15 - loss=0.0821, val_acc=0.8745, val_loss=0.08900585688577257
Epoch 10/15 - loss=0.0799, val_acc=0.8692, val_loss=0.09298489125222162
Epoch 11/15 - loss=0.0777, val_acc=0.8758, val_loss=0.0892168823899104
Epoch 12/15 - loss=0.0771, val_acc=0.8790, val_loss=0.08838762237306609
Epoch 13/15 - loss=0.0758, val_acc=0.8842, val_loss=0.08579004300847413
Epoch 14/15 - loss=0.0741, val_acc=0.8825, val_loss=0.08606499442054288
Epoc

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▄▄▃▃▂▂▂▂▂▁▁▁▁▁
validation loss,█▅▄▃▂▃▄▂▂▂▂▂▁▁▁
validation_accuracy,▁▃▅▅▇▅▆▇▇▆▇▇███

0,1
epoch,15.0
test_accuracy,0.8724
training_loss,0.07339
validation loss,0.08512
validation_accuracy,0.8825


[34m[1mwandb[0m: Agent Starting Run: 5b3t6i9u with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.5558, val_acc=0.8328, val_loss=0.43777155646277605
Epoch 2/15 - loss=0.4027, val_acc=0.8590, val_loss=0.3860200103252417
Epoch 3/15 - loss=0.3684, val_acc=0.8703, val_loss=0.36221018302277114
Epoch 4/15 - loss=0.3451, val_acc=0.8683, val_loss=0.36754871587464294
Epoch 5/15 - loss=0.3293, val_acc=0.8677, val_loss=0.35029306676145977
Epoch 6/15 - loss=0.3156, val_acc=0.8682, val_loss=0.35125727292328474
Epoch 7/15 - loss=0.3063, val_acc=0.8582, val_loss=0.38536835636902395
Epoch 8/15 - loss=0.2972, val_acc=0.8767, val_loss=0.3388813449871443
Epoch 9/15 - loss=0.2906, val_acc=0.8737, val_loss=0.3436672801769622
Epoch 10/15 - loss=0.2836, val_acc=0.8790, val_loss=0.32317606716566977
Epoch 11/15 - loss=0.2767, val_acc=0.8710, val_loss=0.33245739693126186
Epoch 12/15 - loss=0.2706, val_acc=0.8812, val_loss=0.33168192896185533
Epoch 13/15 - loss=0.2654, val_acc=0.8838, val_loss=0.31609606544582625
Epoch 14/15 - loss=0.2602, val_acc=0.8830, val_loss=0.3224808191262113
Epoch

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▄▄▃▃▂▂▂▂▂▁▁▁▁▁
validation loss,█▅▄▄▃▃▅▂▃▁▂▂▁▁▂
validation_accuracy,▁▅▆▆▆▆▄▇▇▇▆███▇

0,1
epoch,15.0
test_accuracy,0.8687
training_loss,0.25581
validation loss,0.32548
validation_accuracy,0.87767


[34m[1mwandb[0m: Agent Starting Run: lp8f580i with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.6270, val_acc=0.8425, val_loss=0.44806579568108995
Epoch 2/15 - loss=0.4148, val_acc=0.8480, val_loss=0.40956287006615216
Epoch 3/15 - loss=0.3790, val_acc=0.8595, val_loss=0.3849564114459321
Epoch 4/15 - loss=0.3550, val_acc=0.8663, val_loss=0.3724371378558453
Epoch 5/15 - loss=0.3403, val_acc=0.8695, val_loss=0.35556014969300476
Epoch 6/15 - loss=0.3229, val_acc=0.8707, val_loss=0.3478934113529482
Epoch 7/15 - loss=0.3146, val_acc=0.8768, val_loss=0.3295220304940766
Epoch 8/15 - loss=0.3054, val_acc=0.8653, val_loss=0.3652100624233602
Epoch 9/15 - loss=0.2982, val_acc=0.8775, val_loss=0.3312161494387455
Epoch 10/15 - loss=0.2902, val_acc=0.8688, val_loss=0.36596795707779933
Epoch 11/15 - loss=0.2842, val_acc=0.8718, val_loss=0.3462242708421703
Epoch 12/15 - loss=0.2771, val_acc=0.8800, val_loss=0.33932088558946594
Epoch 13/15 - loss=0.2720, val_acc=0.8750, val_loss=0.3352174624255721
Epoch 14/15 - loss=0.2690, val_acc=0.8800, val_loss=0.32494663082018665
Epoch 15/

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▄▃▃▂▂▂▂▂▁▁▁▁▁▁
validation loss,█▆▄▄▃▂▁▃▁▃▂▂▂▁▄
validation_accuracy,▁▂▄▅▆▆▇▅█▆▆█▇█▆

0,1
epoch,15.0
test_accuracy,0.863
training_loss,0.26503
validation loss,0.37061
validation_accuracy,0.87


[34m[1mwandb[0m: Agent Starting Run: 0kjc24ri with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: sq_error
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.2967, val_acc=0.7340, val_loss=0.19353167162186222
Epoch 2/15 - loss=0.1641, val_acc=0.7993, val_loss=0.15037936977739025
Epoch 3/15 - loss=0.1371, val_acc=0.8235, val_loss=0.1334672447531351
Epoch 4/15 - loss=0.1251, val_acc=0.8307, val_loss=0.12476232000834377
Epoch 5/15 - loss=0.1182, val_acc=0.8368, val_loss=0.11905171376192653
Epoch 6/15 - loss=0.1131, val_acc=0.8422, val_loss=0.11590167759760822
Epoch 7/15 - loss=0.1097, val_acc=0.8460, val_loss=0.1134366287014274
Epoch 8/15 - loss=0.1068, val_acc=0.8518, val_loss=0.10946153657566889
Epoch 9/15 - loss=0.1045, val_acc=0.8513, val_loss=0.10955881514100405
Epoch 10/15 - loss=0.1025, val_acc=0.8555, val_loss=0.10582188828075677
Epoch 11/15 - loss=0.1004, val_acc=0.8553, val_loss=0.10445724863847552
Epoch 12/15 - loss=0.0988, val_acc=0.8565, val_loss=0.1054353527369703
Epoch 13/15 - loss=0.0974, val_acc=0.8602, val_loss=0.10143812962956059
Epoch 14/15 - loss=0.0960, val_acc=0.8630, val_loss=0.10104848043476686
Epoc

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▃▂▂▂▂▂▁▁▁▁▁▁▁▁
validation loss,█▅▃▃▂▂▂▂▂▁▁▁▁▁▁
validation_accuracy,▁▅▆▆▇▇▇▇▇█████▇

0,1
epoch,15.0
test_accuracy,0.8488
training_loss,0.09477
validation loss,0.10395
validation_accuracy,0.85283


[34m[1mwandb[0m: Agent Starting Run: vd86x7rf with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.9780, val_acc=0.7860, val_loss=0.6512629818942361
Epoch 2/15 - loss=0.5636, val_acc=0.8148, val_loss=0.5228944074259625
Epoch 3/15 - loss=0.4808, val_acc=0.8333, val_loss=0.4719444224507964
Epoch 4/15 - loss=0.4430, val_acc=0.8395, val_loss=0.443731536751288
Epoch 5/15 - loss=0.4199, val_acc=0.8490, val_loss=0.4218333284333397
Epoch 6/15 - loss=0.4029, val_acc=0.8515, val_loss=0.409878987433738
Epoch 7/15 - loss=0.3908, val_acc=0.8545, val_loss=0.4020654994868725
Epoch 8/15 - loss=0.3806, val_acc=0.8597, val_loss=0.3903855355267599
Epoch 9/15 - loss=0.3718, val_acc=0.8575, val_loss=0.3875869973248034
Epoch 10/15 - loss=0.3644, val_acc=0.8625, val_loss=0.37707866790659794
Epoch 11/15 - loss=0.3577, val_acc=0.8635, val_loss=0.3731787426115567
Epoch 12/15 - loss=0.3517, val_acc=0.8625, val_loss=0.3777987545739664
Epoch 13/15 - loss=0.3468, val_acc=0.8688, val_loss=0.35954957352361405
Epoch 14/15 - loss=0.3416, val_acc=0.8677, val_loss=0.3586472484648075
Epoch 15/15 - l

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▃▃▂▂▂▂▁▁▁▁▁▁▁▁
validation loss,█▅▄▃▃▂▂▂▂▁▁▁▁▁▁
validation_accuracy,▁▃▅▆▆▇▇▇▇▇█▇███

0,1
epoch,15.0
test_accuracy,0.8609
training_loss,0.33727
validation loss,0.35906
validation_accuracy,0.86617


[34m[1mwandb[0m: Agent Starting Run: r0zr4bgn with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: sq_error
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.1397, val_acc=0.8355, val_loss=0.11350597836975147
Epoch 2/15 - loss=0.1057, val_acc=0.8588, val_loss=0.10112544991042209
Epoch 3/15 - loss=0.0971, val_acc=0.8688, val_loss=0.09539135940287359
Epoch 4/15 - loss=0.0914, val_acc=0.8623, val_loss=0.09890848436977485
Epoch 5/15 - loss=0.0877, val_acc=0.8713, val_loss=0.09307759306069037
Epoch 6/15 - loss=0.0842, val_acc=0.8645, val_loss=0.09620183190547767
Epoch 7/15 - loss=0.0824, val_acc=0.8698, val_loss=0.09265516953815112
Epoch 8/15 - loss=0.0803, val_acc=0.8785, val_loss=0.08747185659491241
Epoch 9/15 - loss=0.0784, val_acc=0.8743, val_loss=0.09149474544387273
Epoch 10/15 - loss=0.0767, val_acc=0.8785, val_loss=0.08797854065202694
Epoch 11/15 - loss=0.0754, val_acc=0.8757, val_loss=0.08909678934648581
Epoch 12/15 - loss=0.0739, val_acc=0.8755, val_loss=0.08976308206757432
Epoch 13/15 - loss=0.0721, val_acc=0.8727, val_loss=0.09255991391862062
Epoch 14/15 - loss=0.0712, val_acc=0.8808, val_loss=0.08545360661998516
E

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▅▄▃▃▂▂▂▂▂▂▁▁▁▁
validation loss,█▅▃▄▃▄▃▂▃▂▂▂▃▁▁
validation_accuracy,▁▅▆▅▇▅▆█▇█▇▇▇██

0,1
epoch,15.0
test_accuracy,0.8726
training_loss,0.07011
validation loss,0.08633
validation_accuracy,0.87917


[34m[1mwandb[0m: Agent Starting Run: nefdckof with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: sq_error
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.1413, val_acc=0.8418, val_loss=0.11187608837434508
Epoch 2/15 - loss=0.1032, val_acc=0.8572, val_loss=0.10292216007550754
Epoch 3/15 - loss=0.0944, val_acc=0.8645, val_loss=0.09652480990400185
Epoch 4/15 - loss=0.0897, val_acc=0.8685, val_loss=0.09476959669844394
Epoch 5/15 - loss=0.0862, val_acc=0.8678, val_loss=0.0945033290148884
Epoch 6/15 - loss=0.0822, val_acc=0.8603, val_loss=0.09800798271184999
Epoch 7/15 - loss=0.0804, val_acc=0.8767, val_loss=0.08819866380313499
Epoch 8/15 - loss=0.0776, val_acc=0.8752, val_loss=0.09065071683291501
Epoch 9/15 - loss=0.0767, val_acc=0.8832, val_loss=0.0857508196502852
Epoch 10/15 - loss=0.0749, val_acc=0.8777, val_loss=0.08825522418715129
Epoch 11/15 - loss=0.0735, val_acc=0.8748, val_loss=0.08935921443706722
Epoch 12/15 - loss=0.0709, val_acc=0.8795, val_loss=0.08773304187319131
Epoch 13/15 - loss=0.0707, val_acc=0.8830, val_loss=0.08585769123801612
Epoch 14/15 - loss=0.0688, val_acc=0.8865, val_loss=0.08314714065452436
Epo

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▄▄▃▃▂▂▂▂▂▂▁▁▁▁
validation loss,█▆▄▄▄▅▂▃▂▂▃▂▂▁▃
validation_accuracy,▁▃▅▅▅▄▆▆▇▇▆▇▇█▆

0,1
epoch,15.0
test_accuracy,0.8671
training_loss,0.06787
validation loss,0.0915
validation_accuracy,0.87333


[34m[1mwandb[0m: Agent Starting Run: c5yvoc3p with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: sq_error
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.2508, val_acc=0.7885, val_loss=0.15991361816997302
Epoch 2/15 - loss=0.1409, val_acc=0.8227, val_loss=0.1339766734499667
Epoch 3/15 - loss=0.1235, val_acc=0.8362, val_loss=0.12235040903217013
Epoch 4/15 - loss=0.1152, val_acc=0.8407, val_loss=0.11602632680767191
Epoch 5/15 - loss=0.1101, val_acc=0.8485, val_loss=0.1111943736075314
Epoch 6/15 - loss=0.1061, val_acc=0.8497, val_loss=0.10889108678250492
Epoch 7/15 - loss=0.1030, val_acc=0.8548, val_loss=0.10599378831103562
Epoch 8/15 - loss=0.1006, val_acc=0.8590, val_loss=0.10350006496157217
Epoch 9/15 - loss=0.0983, val_acc=0.8617, val_loss=0.10252062173030667
Epoch 10/15 - loss=0.0964, val_acc=0.8615, val_loss=0.10153151535661716
Epoch 11/15 - loss=0.0947, val_acc=0.8625, val_loss=0.09942441967314822
Epoch 12/15 - loss=0.0929, val_acc=0.8647, val_loss=0.0992691619963896
Epoch 13/15 - loss=0.0916, val_acc=0.8675, val_loss=0.09794821775890449
Epoch 14/15 - loss=0.0903, val_acc=0.8682, val_loss=0.09633445285695712
Epoc

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▃▂▂▂▂▂▂▁▁▁▁▁▁▁
validation loss,█▅▄▃▃▂▂▂▂▂▁▁▁▁▁
validation_accuracy,▁▄▅▆▆▆▇▇▇▇█████

0,1
epoch,15.0
test_accuracy,0.8588
training_loss,0.08889
validation loss,0.09731
validation_accuracy,0.8645


[34m[1mwandb[0m: Agent Starting Run: zo2kxn37 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: sq_error
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.2508, val_acc=0.7885, val_loss=0.15991361816997302
Epoch 2/15 - loss=0.1409, val_acc=0.8227, val_loss=0.1339766734499667
Epoch 3/15 - loss=0.1235, val_acc=0.8362, val_loss=0.12235040903217013
Epoch 4/15 - loss=0.1152, val_acc=0.8407, val_loss=0.11602632680767191
Epoch 5/15 - loss=0.1101, val_acc=0.8485, val_loss=0.1111943736075314
Epoch 6/15 - loss=0.1061, val_acc=0.8497, val_loss=0.10889108678250492
Epoch 7/15 - loss=0.1030, val_acc=0.8548, val_loss=0.10599378831103562
Epoch 8/15 - loss=0.1006, val_acc=0.8590, val_loss=0.10350006496157217
Epoch 9/15 - loss=0.0983, val_acc=0.8617, val_loss=0.10252062173030667
Epoch 10/15 - loss=0.0964, val_acc=0.8615, val_loss=0.10153151535661716
Epoch 11/15 - loss=0.0947, val_acc=0.8625, val_loss=0.09942441967314822
Epoch 12/15 - loss=0.0929, val_acc=0.8647, val_loss=0.0992691619963896
Epoch 13/15 - loss=0.0916, val_acc=0.8675, val_loss=0.09794821775890449
Epoch 14/15 - loss=0.0903, val_acc=0.8682, val_loss=0.09633445285695712
Epoc

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▃▂▂▂▂▂▂▁▁▁▁▁▁▁
validation loss,█▅▄▃▃▂▂▂▂▂▁▁▁▁▁
validation_accuracy,▁▄▅▆▆▆▇▇▇▇█████

0,1
epoch,15.0
test_accuracy,0.8588
training_loss,0.08889
validation loss,0.09731
validation_accuracy,0.8645


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: hg7sswlu with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: sq_error
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.2557, val_acc=0.7805, val_loss=0.1707722580727658
Epoch 2/15 - loss=0.1444, val_acc=0.8188, val_loss=0.1329447690918598
Epoch 3/15 - loss=0.1219, val_acc=0.8345, val_loss=0.1195483380835641
Epoch 4/15 - loss=0.1125, val_acc=0.8422, val_loss=0.11316655820370497
Epoch 5/15 - loss=0.1070, val_acc=0.8470, val_loss=0.10848648750904173
Epoch 6/15 - loss=0.1027, val_acc=0.8533, val_loss=0.10580834427994996
Epoch 7/15 - loss=0.0995, val_acc=0.8598, val_loss=0.10260613500171341
Epoch 8/15 - loss=0.0971, val_acc=0.8607, val_loss=0.10063556806982776
Epoch 9/15 - loss=0.0948, val_acc=0.8617, val_loss=0.09884146377346889
Epoch 10/15 - loss=0.0930, val_acc=0.8623, val_loss=0.09955211952367332
Epoch 11/15 - loss=0.0915, val_acc=0.8637, val_loss=0.09715185716357451
Epoch 12/15 - loss=0.0897, val_acc=0.8650, val_loss=0.09731779821621647
Epoch 13/15 - loss=0.0885, val_acc=0.8693, val_loss=0.09411591335441452
Epoch 14/15 - loss=0.0873, val_acc=0.8658, val_loss=0.09497111199991268
Epoc

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▃▂▂▂▂▂▁▁▁▁▁▁▁▁
validation loss,█▅▃▃▂▂▂▂▁▁▁▁▁▁▁
validation_accuracy,▁▄▅▆▆▇▇▇▇▇█████

0,1
epoch,15.0
test_accuracy,0.8636
training_loss,0.08612
validation loss,0.09472
validation_accuracy,0.868


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: o7bp0rtv with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: sq_error
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.1432, val_acc=0.8278, val_loss=0.12019735603298004
Epoch 2/15 - loss=0.1063, val_acc=0.8537, val_loss=0.10458825745834655
Epoch 3/15 - loss=0.0980, val_acc=0.8640, val_loss=0.09894489446275748
Epoch 4/15 - loss=0.0928, val_acc=0.8628, val_loss=0.09815624616556345
Epoch 5/15 - loss=0.0885, val_acc=0.8652, val_loss=0.09491658417853527
Epoch 6/15 - loss=0.0853, val_acc=0.8692, val_loss=0.09312992971938058
Epoch 7/15 - loss=0.0827, val_acc=0.8712, val_loss=0.09282399194692598
Epoch 8/15 - loss=0.0809, val_acc=0.8782, val_loss=0.08817061121725349
Epoch 9/15 - loss=0.0787, val_acc=0.8765, val_loss=0.09128837832752773
Epoch 10/15 - loss=0.0770, val_acc=0.8765, val_loss=0.08810301816926316
Epoch 11/15 - loss=0.0753, val_acc=0.8757, val_loss=0.09139334683740018
Epoch 12/15 - loss=0.0741, val_acc=0.8788, val_loss=0.08844351168724683
Epoch 13/15 - loss=0.0731, val_acc=0.8822, val_loss=0.08722778262327051
Epoch 14/15 - loss=0.0716, val_acc=0.8777, val_loss=0.08822212025571004
E

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▄▄▃▃▂▂▂▂▂▁▁▁▁▁
validation loss,█▅▃▃▃▂▂▁▂▁▂▁▁▁▁
validation_accuracy,▁▄▆▆▆▆▇▇▇▇▇██▇▇

0,1
epoch,15.0
test_accuracy,0.8703
training_loss,0.07025
validation loss,0.0887
validation_accuracy,0.87783


[34m[1mwandb[0m: Agent Starting Run: vu22wpzm with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.5558, val_acc=0.8328, val_loss=0.43777155646277605
Epoch 2/15 - loss=0.4027, val_acc=0.8590, val_loss=0.3860200103252417
Epoch 3/15 - loss=0.3684, val_acc=0.8703, val_loss=0.36221018302277114
Epoch 4/15 - loss=0.3451, val_acc=0.8683, val_loss=0.36754871587464294
Epoch 5/15 - loss=0.3293, val_acc=0.8677, val_loss=0.35029306676145977
Epoch 6/15 - loss=0.3156, val_acc=0.8682, val_loss=0.35125727292328474
Epoch 7/15 - loss=0.3063, val_acc=0.8582, val_loss=0.38536835636902395
Epoch 8/15 - loss=0.2972, val_acc=0.8767, val_loss=0.3388813449871443
Epoch 9/15 - loss=0.2906, val_acc=0.8737, val_loss=0.3436672801769622
Epoch 10/15 - loss=0.2836, val_acc=0.8790, val_loss=0.32317606716566977
Epoch 11/15 - loss=0.2767, val_acc=0.8710, val_loss=0.33245739693126186
Epoch 12/15 - loss=0.2706, val_acc=0.8812, val_loss=0.33168192896185533
Epoch 13/15 - loss=0.2654, val_acc=0.8838, val_loss=0.31609606544582625
Epoch 14/15 - loss=0.2602, val_acc=0.8830, val_loss=0.3224808191262113
Epoch

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▄▄▃▃▂▂▂▂▂▁▁▁▁▁
validation loss,█▅▄▄▃▃▅▂▃▁▂▂▁▁▂
validation_accuracy,▁▅▆▆▆▆▄▇▇▇▆███▇

0,1
epoch,15.0
test_accuracy,0.8687
training_loss,0.25581
validation loss,0.32548
validation_accuracy,0.87767


[34m[1mwandb[0m: Agent Starting Run: iab6ppzk with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: sq_error
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.2967, val_acc=0.7340, val_loss=0.19353167162186222
Epoch 2/15 - loss=0.1641, val_acc=0.7993, val_loss=0.15037936977739025
Epoch 3/15 - loss=0.1371, val_acc=0.8235, val_loss=0.1334672447531351
Epoch 4/15 - loss=0.1251, val_acc=0.8307, val_loss=0.12476232000834377
Epoch 5/15 - loss=0.1182, val_acc=0.8368, val_loss=0.11905171376192653
Epoch 6/15 - loss=0.1131, val_acc=0.8422, val_loss=0.11590167759760822
Epoch 7/15 - loss=0.1097, val_acc=0.8460, val_loss=0.1134366287014274
Epoch 8/15 - loss=0.1068, val_acc=0.8518, val_loss=0.10946153657566889
Epoch 9/15 - loss=0.1045, val_acc=0.8513, val_loss=0.10955881514100405
Epoch 10/15 - loss=0.1025, val_acc=0.8555, val_loss=0.10582188828075677
Epoch 11/15 - loss=0.1004, val_acc=0.8553, val_loss=0.10445724863847552
Epoch 12/15 - loss=0.0988, val_acc=0.8565, val_loss=0.1054353527369703
Epoch 13/15 - loss=0.0974, val_acc=0.8602, val_loss=0.10143812962956059
Epoch 14/15 - loss=0.0960, val_acc=0.8630, val_loss=0.10104848043476686
Epoc

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▃▂▂▂▂▂▁▁▁▁▁▁▁▁
validation loss,█▅▃▃▂▂▂▂▂▁▁▁▁▁▁
validation_accuracy,▁▅▆▆▇▇▇▇▇█████▇

0,1
epoch,15.0
test_accuracy,0.8488
training_loss,0.09477
validation loss,0.10395
validation_accuracy,0.85283


[34m[1mwandb[0m: Agent Starting Run: guw4zvjw with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=1.0304, val_acc=0.7783, val_loss=0.6854700131235105
Epoch 2/15 - loss=0.5907, val_acc=0.8133, val_loss=0.5398483609602577
Epoch 3/15 - loss=0.4939, val_acc=0.8337, val_loss=0.4772536365495495
Epoch 4/15 - loss=0.4496, val_acc=0.8383, val_loss=0.44708576067735356
Epoch 5/15 - loss=0.4234, val_acc=0.8473, val_loss=0.4243967248349546
Epoch 6/15 - loss=0.4040, val_acc=0.8550, val_loss=0.4078576478340448
Epoch 7/15 - loss=0.3900, val_acc=0.8562, val_loss=0.3979806607141436
Epoch 8/15 - loss=0.3794, val_acc=0.8595, val_loss=0.3876338099364094
Epoch 9/15 - loss=0.3700, val_acc=0.8613, val_loss=0.37961898499548835
Epoch 10/15 - loss=0.3619, val_acc=0.8623, val_loss=0.3775909875707191
Epoch 11/15 - loss=0.3552, val_acc=0.8632, val_loss=0.3714708377316151
Epoch 12/15 - loss=0.3479, val_acc=0.8660, val_loss=0.3702109490551611
Epoch 13/15 - loss=0.3434, val_acc=0.8712, val_loss=0.3552438853868546
Epoch 14/15 - loss=0.3379, val_acc=0.8705, val_loss=0.35517057870899993
Epoch 15/15 

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▄▃▂▂▂▂▁▁▁▁▁▁▁▁
validation loss,█▅▄▃▂▂▂▂▂▁▁▁▁▁▁
validation_accuracy,▁▄▅▆▆▇▇▇▇▇▇████

0,1
epoch,15.0
test_accuracy,0.8635
training_loss,0.33301
validation loss,0.35395
validation_accuracy,0.86867


[34m[1mwandb[0m: Agent Starting Run: 6vrl0n86 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.5588, val_acc=0.8448, val_loss=0.4271538644512599
Epoch 2/15 - loss=0.3943, val_acc=0.8480, val_loss=0.40445947172242025
Epoch 3/15 - loss=0.3609, val_acc=0.8647, val_loss=0.3622889719636303
Epoch 4/15 - loss=0.3390, val_acc=0.8708, val_loss=0.3485043947176629
Epoch 5/15 - loss=0.3253, val_acc=0.8713, val_loss=0.35399132639249614
Epoch 6/15 - loss=0.3102, val_acc=0.8805, val_loss=0.32743767254658884
Epoch 7/15 - loss=0.3012, val_acc=0.8778, val_loss=0.33563633969756895
Epoch 8/15 - loss=0.2913, val_acc=0.8673, val_loss=0.3643031518449673
Epoch 9/15 - loss=0.2855, val_acc=0.8812, val_loss=0.32097279264277906
Epoch 10/15 - loss=0.2787, val_acc=0.8770, val_loss=0.3393258879564772
Epoch 11/15 - loss=0.2732, val_acc=0.8807, val_loss=0.3367238459348289
Epoch 12/15 - loss=0.2634, val_acc=0.8760, val_loss=0.33319998529664646
Epoch 13/15 - loss=0.2609, val_acc=0.8798, val_loss=0.33265992856267446
Epoch 14/15 - loss=0.2546, val_acc=0.8857, val_loss=0.3187087963350519
Epoch 15

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▄▄▃▃▂▂▂▂▂▂▁▁▁▁
validation loss,█▇▄▃▃▂▂▄▁▂▂▂▂▁▂
validation_accuracy,▁▂▄▅▆▇▇▅▇▇▇▆▇█▇

0,1
epoch,15.0
test_accuracy,0.8713
training_loss,0.24972
validation loss,0.32963
validation_accuracy,0.88


[34m[1mwandb[0m: Agent Starting Run: d4e3pz0x with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: sq_error
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.1546, val_acc=0.8385, val_loss=0.11807534667181187
Epoch 2/15 - loss=0.1089, val_acc=0.8507, val_loss=0.1079245415272775
Epoch 3/15 - loss=0.1008, val_acc=0.8613, val_loss=0.10000543939049349
Epoch 4/15 - loss=0.0952, val_acc=0.8652, val_loss=0.09625666071572679
Epoch 5/15 - loss=0.0923, val_acc=0.8683, val_loss=0.09380915621205271
Epoch 6/15 - loss=0.0880, val_acc=0.8592, val_loss=0.10111848930572352
Epoch 7/15 - loss=0.0857, val_acc=0.8758, val_loss=0.08908036578476146
Epoch 8/15 - loss=0.0847, val_acc=0.8598, val_loss=0.10085679632055909
Epoch 9/15 - loss=0.0831, val_acc=0.8683, val_loss=0.09525840257122066
Epoch 10/15 - loss=0.0808, val_acc=0.8637, val_loss=0.0966752390122895
Epoch 11/15 - loss=0.0801, val_acc=0.8692, val_loss=0.09290405982900182
Epoch 12/15 - loss=0.0777, val_acc=0.8675, val_loss=0.09691714929371673
Epoch 13/15 - loss=0.0772, val_acc=0.8732, val_loss=0.0920233822369103
Epoch 14/15 - loss=0.0760, val_acc=0.8767, val_loss=0.08734610662139132
Epoc

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▄▃▃▃▂▂▂▂▂▂▁▁▁▁
validation loss,█▆▄▃▂▄▁▄▃▃▂▃▂▁▃
validation_accuracy,▁▃▅▆▆▅█▅▆▆▇▆▇█▇

0,1
epoch,15.0
test_accuracy,0.8652
training_loss,0.07423
validation loss,0.09461
validation_accuracy,0.87083


[34m[1mwandb[0m: Agent Starting Run: 75ay7v14 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: sq_error
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.1524, val_acc=0.8313, val_loss=0.1245255895745075
Epoch 2/15 - loss=0.1118, val_acc=0.8493, val_loss=0.10794346530513453
Epoch 3/15 - loss=0.1019, val_acc=0.8608, val_loss=0.10114784327619517
Epoch 4/15 - loss=0.0959, val_acc=0.8623, val_loss=0.0987054971601063
Epoch 5/15 - loss=0.0916, val_acc=0.8738, val_loss=0.09158856352115755
Epoch 6/15 - loss=0.0883, val_acc=0.8640, val_loss=0.09430304095777431
Epoch 7/15 - loss=0.0862, val_acc=0.8655, val_loss=0.10152089438475206
Epoch 8/15 - loss=0.0835, val_acc=0.8730, val_loss=0.09048963897219622
Epoch 9/15 - loss=0.0821, val_acc=0.8745, val_loss=0.08900585688577257
Epoch 10/15 - loss=0.0799, val_acc=0.8692, val_loss=0.09298489125222162
Epoch 11/15 - loss=0.0777, val_acc=0.8758, val_loss=0.0892168823899104
Epoch 12/15 - loss=0.0771, val_acc=0.8790, val_loss=0.08838762237306609
Epoch 13/15 - loss=0.0758, val_acc=0.8842, val_loss=0.08579004300847413
Epoch 14/15 - loss=0.0741, val_acc=0.8825, val_loss=0.08606499442054288
Epoc

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▄▄▃▃▂▂▂▂▂▁▁▁▁▁
validation loss,█▅▄▃▂▃▄▂▂▂▂▂▁▁▁
validation_accuracy,▁▃▅▅▇▅▆▇▇▆▇▇███

0,1
epoch,15.0
test_accuracy,0.8724
training_loss,0.07339
validation loss,0.08512
validation_accuracy,0.8825


[34m[1mwandb[0m: Agent Starting Run: c44ot10c with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.6270, val_acc=0.8425, val_loss=0.44806579568108995
Epoch 2/15 - loss=0.4148, val_acc=0.8480, val_loss=0.40956287006615216
Epoch 3/15 - loss=0.3790, val_acc=0.8595, val_loss=0.3849564114459321
Epoch 4/15 - loss=0.3550, val_acc=0.8663, val_loss=0.3724371378558453
Epoch 5/15 - loss=0.3403, val_acc=0.8695, val_loss=0.35556014969300476
Epoch 6/15 - loss=0.3229, val_acc=0.8707, val_loss=0.3478934113529482
Epoch 7/15 - loss=0.3146, val_acc=0.8768, val_loss=0.3295220304940766
Epoch 8/15 - loss=0.3054, val_acc=0.8653, val_loss=0.3652100624233602
Epoch 9/15 - loss=0.2982, val_acc=0.8775, val_loss=0.3312161494387455
Epoch 10/15 - loss=0.2902, val_acc=0.8688, val_loss=0.36596795707779933
Epoch 11/15 - loss=0.2842, val_acc=0.8718, val_loss=0.3462242708421703
Epoch 12/15 - loss=0.2771, val_acc=0.8800, val_loss=0.33932088558946594
Epoch 13/15 - loss=0.2720, val_acc=0.8750, val_loss=0.3352174624255721
Epoch 14/15 - loss=0.2690, val_acc=0.8800, val_loss=0.32494663082018665
Epoch 15/

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▄▃▃▂▂▂▂▂▁▁▁▁▁▁
validation loss,█▆▄▄▃▂▁▃▁▃▂▂▂▁▄
validation_accuracy,▁▂▄▅▆▆▇▅█▆▆█▇█▆

0,1
epoch,15.0
test_accuracy,0.863
training_loss,0.26503
validation loss,0.37061
validation_accuracy,0.87


[34m[1mwandb[0m: Agent Starting Run: 067imo0q with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: sq_error
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.2614, val_acc=0.7657, val_loss=0.17869219264990102
Epoch 2/15 - loss=0.1497, val_acc=0.8140, val_loss=0.13685060676687874
Epoch 3/15 - loss=0.1248, val_acc=0.8290, val_loss=0.12301112224892029
Epoch 4/15 - loss=0.1150, val_acc=0.8393, val_loss=0.1153765425620314
Epoch 5/15 - loss=0.1092, val_acc=0.8470, val_loss=0.11045117646180808
Epoch 6/15 - loss=0.1049, val_acc=0.8488, val_loss=0.1082665319748341
Epoch 7/15 - loss=0.1018, val_acc=0.8545, val_loss=0.10485673066199987
Epoch 8/15 - loss=0.0992, val_acc=0.8575, val_loss=0.10324114404053948
Epoch 9/15 - loss=0.0970, val_acc=0.8565, val_loss=0.10281740151268254
Epoch 10/15 - loss=0.0951, val_acc=0.8617, val_loss=0.10006913099133749
Epoch 11/15 - loss=0.0933, val_acc=0.8622, val_loss=0.09911340969458003
Epoch 12/15 - loss=0.0919, val_acc=0.8595, val_loss=0.10030356166940334
Epoch 13/15 - loss=0.0906, val_acc=0.8662, val_loss=0.09551479782723016
Epoch 14/15 - loss=0.0893, val_acc=0.8657, val_loss=0.09630514068844791
Epo

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▃▂▂▂▂▂▁▁▁▁▁▁▁▁
validation loss,█▄▃▃▂▂▂▂▂▁▁▁▁▁▁
validation_accuracy,▁▄▅▆▇▇▇▇▇██████

0,1
epoch,15.0
test_accuracy,0.8592
training_loss,0.08831
validation loss,0.09697
validation_accuracy,0.86183


[34m[1mwandb[0m: Agent Starting Run: ailozv1o with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.6259, val_acc=0.8292, val_loss=0.46986892883264325
Epoch 2/15 - loss=0.4350, val_acc=0.8482, val_loss=0.40931900197957294
Epoch 3/15 - loss=0.3933, val_acc=0.8610, val_loss=0.3929496971886448
Epoch 4/15 - loss=0.3677, val_acc=0.8625, val_loss=0.38100447920381475
Epoch 5/15 - loss=0.3505, val_acc=0.8697, val_loss=0.34861474358761296
Epoch 6/15 - loss=0.3366, val_acc=0.8655, val_loss=0.3651900360971135
Epoch 7/15 - loss=0.3265, val_acc=0.8460, val_loss=0.44542384190344814
Epoch 8/15 - loss=0.3173, val_acc=0.8730, val_loss=0.34927960227979377
Epoch 9/15 - loss=0.3099, val_acc=0.8582, val_loss=0.38767435174390713
Epoch 10/15 - loss=0.3022, val_acc=0.8687, val_loss=0.3559346085114018
Epoch 11/15 - loss=0.2968, val_acc=0.8705, val_loss=0.3537828276951274
Epoch 12/15 - loss=0.2914, val_acc=0.8787, val_loss=0.33658602607240146
Epoch 13/15 - loss=0.2865, val_acc=0.8823, val_loss=0.32385177927827774
Epoch 14/15 - loss=0.2823, val_acc=0.8757, val_loss=0.3449483972933062
Epoch 

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▄▃▃▂▂▂▂▂▂▁▁▁▁▁
validation loss,█▅▄▄▂▃▇▂▄▃▂▂▁▂▃
validation_accuracy,▁▄▅▅▆▆▃▇▅▆▆██▇▆

0,1
epoch,15.0
test_accuracy,0.8634
training_loss,0.27677
validation loss,0.35515
validation_accuracy,0.87


[34m[1mwandb[0m: Agent Starting Run: ryp1wozn with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.5847, val_acc=0.8423, val_loss=0.4223828706648879
Epoch 2/15 - loss=0.3954, val_acc=0.8545, val_loss=0.3912485599404234
Epoch 3/15 - loss=0.3605, val_acc=0.8673, val_loss=0.3556274217469379
Epoch 4/15 - loss=0.3403, val_acc=0.8665, val_loss=0.3574887546508234
Epoch 5/15 - loss=0.3259, val_acc=0.8678, val_loss=0.365927484966705
Epoch 6/15 - loss=0.3097, val_acc=0.8770, val_loss=0.32927511615321914
Epoch 7/15 - loss=0.2998, val_acc=0.8787, val_loss=0.32379152470487654
Epoch 8/15 - loss=0.2898, val_acc=0.8635, val_loss=0.3650505127536943
Epoch 9/15 - loss=0.2851, val_acc=0.8810, val_loss=0.31823920240703
Epoch 10/15 - loss=0.2764, val_acc=0.8787, val_loss=0.33518423769134087
Epoch 11/15 - loss=0.2713, val_acc=0.8768, val_loss=0.3297460155598525
Epoch 12/15 - loss=0.2614, val_acc=0.8833, val_loss=0.3264488395338764
Epoch 13/15 - loss=0.2580, val_acc=0.8865, val_loss=0.32119755011907447
Epoch 14/15 - loss=0.2532, val_acc=0.8832, val_loss=0.3204723575858921
Epoch 15/15 - 

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▄▃▃▃▂▂▂▂▂▂▁▁▁▁
validation loss,█▆▄▄▄▂▁▄▁▂▂▂▁▁▂
validation_accuracy,▁▃▅▅▅▆▇▄▇▇▆▇█▇▇

0,1
epoch,15.0
test_accuracy,0.8704
training_loss,0.24719
validation loss,0.33044
validation_accuracy,0.87833


[34m[1mwandb[0m: Agent Starting Run: ua1cf13u with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.5779, val_acc=0.8348, val_loss=0.43985750491571146
Epoch 2/15 - loss=0.4065, val_acc=0.8563, val_loss=0.3876037488820295
Epoch 3/15 - loss=0.3719, val_acc=0.8633, val_loss=0.38170573697800014
Epoch 4/15 - loss=0.3492, val_acc=0.8563, val_loss=0.3856889007710927
Epoch 5/15 - loss=0.3318, val_acc=0.8653, val_loss=0.3559947591976067
Epoch 6/15 - loss=0.3187, val_acc=0.8695, val_loss=0.346945216445743
Epoch 7/15 - loss=0.3089, val_acc=0.8655, val_loss=0.376927189905875
Epoch 8/15 - loss=0.2990, val_acc=0.8783, val_loss=0.3309598861360001
Epoch 9/15 - loss=0.2918, val_acc=0.8758, val_loss=0.34302784655103435
Epoch 10/15 - loss=0.2843, val_acc=0.8718, val_loss=0.34237081897012583
Epoch 11/15 - loss=0.2792, val_acc=0.8742, val_loss=0.3415574364712802
Epoch 12/15 - loss=0.2730, val_acc=0.8835, val_loss=0.3271817440888692
Epoch 13/15 - loss=0.2677, val_acc=0.8810, val_loss=0.32395863597477026
Epoch 14/15 - loss=0.2629, val_acc=0.8765, val_loss=0.3302192657515811
Epoch 15/15 

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▄▃▃▃▂▂▂▂▂▁▁▁▁▁
validation loss,█▅▄▅▃▂▄▁▂▂▂▁▁▁▂
validation_accuracy,▁▄▅▄▅▆▅▇▇▆▇██▇▇

0,1
epoch,15.0
test_accuracy,0.8715
training_loss,0.25899
validation loss,0.33395
validation_accuracy,0.87817


[34m[1mwandb[0m: Agent Starting Run: wotzlj3k with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=1.1487, val_acc=0.7618, val_loss=0.7060827972598178
Epoch 2/15 - loss=0.6067, val_acc=0.8093, val_loss=0.5593056891504574
Epoch 3/15 - loss=0.5165, val_acc=0.8237, val_loss=0.5027129393573212
Epoch 4/15 - loss=0.4757, val_acc=0.8373, val_loss=0.47078306930415437
Epoch 5/15 - loss=0.4518, val_acc=0.8427, val_loss=0.44901364739761607
Epoch 6/15 - loss=0.4346, val_acc=0.8430, val_loss=0.4402083389557976
Epoch 7/15 - loss=0.4224, val_acc=0.8493, val_loss=0.43018341115921754
Epoch 8/15 - loss=0.4125, val_acc=0.8508, val_loss=0.419089080804916
Epoch 9/15 - loss=0.4034, val_acc=0.8497, val_loss=0.42172908662786857
Epoch 10/15 - loss=0.3961, val_acc=0.8592, val_loss=0.40600326465141573
Epoch 11/15 - loss=0.3892, val_acc=0.8543, val_loss=0.4025864385834172
Epoch 12/15 - loss=0.3826, val_acc=0.8568, val_loss=0.4000387158840789
Epoch 13/15 - loss=0.3770, val_acc=0.8575, val_loss=0.39027683618345355
Epoch 14/15 - loss=0.3717, val_acc=0.8620, val_loss=0.38640348967624033
Epoch 15/

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▃▂▂▂▂▁▁▁▁▁▁▁▁▁
validation loss,█▅▄▃▂▂▂▂▂▁▁▁▁▁▁
validation_accuracy,▁▄▅▆▇▇▇▇▇█▇████

0,1
epoch,15.0
test_accuracy,0.8522
training_loss,0.36679
validation loss,0.38584
validation_accuracy,0.86033


[34m[1mwandb[0m: Agent Starting Run: ol62aezo with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.5588, val_acc=0.8448, val_loss=0.4271538644512599
Epoch 2/15 - loss=0.3943, val_acc=0.8480, val_loss=0.40445947172242025
Epoch 3/15 - loss=0.3609, val_acc=0.8647, val_loss=0.3622889719636303
Epoch 4/15 - loss=0.3390, val_acc=0.8708, val_loss=0.3485043947176629
Epoch 5/15 - loss=0.3253, val_acc=0.8713, val_loss=0.35399132639249614
Epoch 6/15 - loss=0.3102, val_acc=0.8805, val_loss=0.32743767254658884
Epoch 7/15 - loss=0.3012, val_acc=0.8778, val_loss=0.33563633969756895
Epoch 8/15 - loss=0.2913, val_acc=0.8673, val_loss=0.3643031518449673
Epoch 9/15 - loss=0.2855, val_acc=0.8812, val_loss=0.32097279264277906
Epoch 10/15 - loss=0.2787, val_acc=0.8770, val_loss=0.3393258879564772
Epoch 11/15 - loss=0.2732, val_acc=0.8807, val_loss=0.3367238459348289
Epoch 12/15 - loss=0.2634, val_acc=0.8760, val_loss=0.33319998529664646
Epoch 13/15 - loss=0.2609, val_acc=0.8798, val_loss=0.33265992856267446
Epoch 14/15 - loss=0.2546, val_acc=0.8857, val_loss=0.3187087963350519
Epoch 15

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▄▄▃▃▂▂▂▂▂▂▁▁▁▁
validation loss,█▇▄▃▃▂▂▄▁▂▂▂▂▁▂
validation_accuracy,▁▂▄▅▆▇▇▅▇▇▇▆▇█▇

0,1
epoch,15.0
test_accuracy,0.8713
training_loss,0.24972
validation loss,0.32963
validation_accuracy,0.88


[34m[1mwandb[0m: Agent Starting Run: cixjfwf6 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.6094, val_acc=0.8387, val_loss=0.45171820913596056
Epoch 2/15 - loss=0.4182, val_acc=0.8477, val_loss=0.414238206275963
Epoch 3/15 - loss=0.3859, val_acc=0.8590, val_loss=0.3921560463062768
Epoch 4/15 - loss=0.3664, val_acc=0.8628, val_loss=0.367580587928539
Epoch 5/15 - loss=0.3493, val_acc=0.8627, val_loss=0.380974212908759
Epoch 6/15 - loss=0.3326, val_acc=0.8778, val_loss=0.33713473099885616
Epoch 7/15 - loss=0.3213, val_acc=0.8740, val_loss=0.3478365374809786
Epoch 8/15 - loss=0.3106, val_acc=0.8653, val_loss=0.36720863611454246
Epoch 9/15 - loss=0.3047, val_acc=0.8788, val_loss=0.3365835997772626
Epoch 10/15 - loss=0.2945, val_acc=0.8682, val_loss=0.36050397008235285
Epoch 11/15 - loss=0.2902, val_acc=0.8723, val_loss=0.3483397652325901
Epoch 12/15 - loss=0.2804, val_acc=0.8722, val_loss=0.35855933576778337
Epoch 13/15 - loss=0.2785, val_acc=0.8775, val_loss=0.33436116963169304
Epoch 14/15 - loss=0.2735, val_acc=0.8823, val_loss=0.3344395518285832
Epoch 15/15 

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▄▃▃▃▂▂▂▂▂▁▁▁▁▁
validation loss,█▆▄▃▄▁▂▃▁▃▂▂▁▁▂
validation_accuracy,▁▂▄▅▅▇▇▅▇▆▆▆▇█▇

0,1
epoch,15.0
test_accuracy,0.8656
training_loss,0.26661
validation loss,0.35018
validation_accuracy,0.87367


[34m[1mwandb[0m: Agent Starting Run: p8kpc2qv with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: sq_error
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.2967, val_acc=0.7340, val_loss=0.19353167162186222
Epoch 2/15 - loss=0.1641, val_acc=0.7993, val_loss=0.15037936977739025
Epoch 3/15 - loss=0.1371, val_acc=0.8235, val_loss=0.1334672447531351
Epoch 4/15 - loss=0.1251, val_acc=0.8307, val_loss=0.12476232000834377
Epoch 5/15 - loss=0.1182, val_acc=0.8368, val_loss=0.11905171376192653
Epoch 6/15 - loss=0.1131, val_acc=0.8422, val_loss=0.11590167759760822
Epoch 7/15 - loss=0.1097, val_acc=0.8460, val_loss=0.1134366287014274
Epoch 8/15 - loss=0.1068, val_acc=0.8518, val_loss=0.10946153657566889
Epoch 9/15 - loss=0.1045, val_acc=0.8513, val_loss=0.10955881514100405
Epoch 10/15 - loss=0.1025, val_acc=0.8555, val_loss=0.10582188828075677
Epoch 11/15 - loss=0.1004, val_acc=0.8553, val_loss=0.10445724863847552
Epoch 12/15 - loss=0.0988, val_acc=0.8565, val_loss=0.1054353527369703
Epoch 13/15 - loss=0.0974, val_acc=0.8602, val_loss=0.10143812962956059
Epoch 14/15 - loss=0.0960, val_acc=0.8630, val_loss=0.10104848043476686
Epoc

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▃▂▂▂▂▂▁▁▁▁▁▁▁▁
validation loss,█▅▃▃▂▂▂▂▂▁▁▁▁▁▁
validation_accuracy,▁▅▆▆▇▇▇▇▇█████▇

0,1
epoch,15.0
test_accuracy,0.8488
training_loss,0.09477
validation loss,0.10395
validation_accuracy,0.85283


[34m[1mwandb[0m: Agent Starting Run: 8iepycok with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.5779, val_acc=0.8348, val_loss=0.43985750491571146
Epoch 2/15 - loss=0.4065, val_acc=0.8563, val_loss=0.3876037488820295
Epoch 3/15 - loss=0.3719, val_acc=0.8633, val_loss=0.38170573697800014
Epoch 4/15 - loss=0.3492, val_acc=0.8563, val_loss=0.3856889007710927
Epoch 5/15 - loss=0.3318, val_acc=0.8653, val_loss=0.3559947591976067
Epoch 6/15 - loss=0.3187, val_acc=0.8695, val_loss=0.346945216445743
Epoch 7/15 - loss=0.3089, val_acc=0.8655, val_loss=0.376927189905875
Epoch 8/15 - loss=0.2990, val_acc=0.8783, val_loss=0.3309598861360001
Epoch 9/15 - loss=0.2918, val_acc=0.8758, val_loss=0.34302784655103435
Epoch 10/15 - loss=0.2843, val_acc=0.8718, val_loss=0.34237081897012583
Epoch 11/15 - loss=0.2792, val_acc=0.8742, val_loss=0.3415574364712802
Epoch 12/15 - loss=0.2730, val_acc=0.8835, val_loss=0.3271817440888692
Epoch 13/15 - loss=0.2677, val_acc=0.8810, val_loss=0.32395863597477026
Epoch 14/15 - loss=0.2629, val_acc=0.8765, val_loss=0.3302192657515811
Epoch 15/15 

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▄▃▃▃▂▂▂▂▂▁▁▁▁▁
validation loss,█▅▄▅▃▂▄▁▂▂▂▁▁▁▂
validation_accuracy,▁▄▅▄▅▆▅▇▇▆▇██▇▇

0,1
epoch,15.0
test_accuracy,0.8715
training_loss,0.25899
validation loss,0.33395
validation_accuracy,0.87817


[34m[1mwandb[0m: Agent Starting Run: htgpmiwr with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=1.3047, val_acc=0.7235, val_loss=0.788669674683696
Epoch 2/15 - loss=0.6742, val_acc=0.7862, val_loss=0.6152835071864606
Epoch 3/15 - loss=0.5601, val_acc=0.8157, val_loss=0.5400664878764678
Epoch 4/15 - loss=0.5057, val_acc=0.8278, val_loss=0.5008668393211678
Epoch 5/15 - loss=0.4751, val_acc=0.8355, val_loss=0.4762351567778349
Epoch 6/15 - loss=0.4542, val_acc=0.8347, val_loss=0.46176735466885765
Epoch 7/15 - loss=0.4392, val_acc=0.8402, val_loss=0.44963650196665744
Epoch 8/15 - loss=0.4278, val_acc=0.8470, val_loss=0.4370708800210514
Epoch 9/15 - loss=0.4177, val_acc=0.8482, val_loss=0.43394314526168554
Epoch 10/15 - loss=0.4094, val_acc=0.8532, val_loss=0.4233323727065861
Epoch 11/15 - loss=0.4020, val_acc=0.8540, val_loss=0.4174645779064537
Epoch 12/15 - loss=0.3950, val_acc=0.8535, val_loss=0.41624983585856673
Epoch 13/15 - loss=0.3889, val_acc=0.8562, val_loss=0.40333310236625447
Epoch 14/15 - loss=0.3830, val_acc=0.8577, val_loss=0.3979582144325897
Epoch 15/15

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▃▂▂▂▂▁▁▁▁▁▁▁▁▁
validation loss,█▅▄▃▂▂▂▂▂▁▁▁▁▁▁
validation_accuracy,▁▄▆▆▇▇▇▇███████

0,1
epoch,15.0
test_accuracy,0.8501
training_loss,0.3777
validation loss,0.40125
validation_accuracy,0.85767


[34m[1mwandb[0m: Agent Starting Run: cza3u0d9 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: sq_error
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.1546, val_acc=0.8385, val_loss=0.11807534667181187
Epoch 2/15 - loss=0.1089, val_acc=0.8507, val_loss=0.1079245415272775
Epoch 3/15 - loss=0.1008, val_acc=0.8613, val_loss=0.10000543939049349
Epoch 4/15 - loss=0.0952, val_acc=0.8652, val_loss=0.09625666071572679
Epoch 5/15 - loss=0.0923, val_acc=0.8683, val_loss=0.09380915621205271
Epoch 6/15 - loss=0.0880, val_acc=0.8592, val_loss=0.10111848930572352
Epoch 7/15 - loss=0.0857, val_acc=0.8758, val_loss=0.08908036578476146
Epoch 8/15 - loss=0.0847, val_acc=0.8598, val_loss=0.10085679632055909
Epoch 9/15 - loss=0.0831, val_acc=0.8683, val_loss=0.09525840257122066
Epoch 10/15 - loss=0.0808, val_acc=0.8637, val_loss=0.0966752390122895
Epoch 11/15 - loss=0.0801, val_acc=0.8692, val_loss=0.09290405982900182
Epoch 12/15 - loss=0.0777, val_acc=0.8675, val_loss=0.09691714929371673
Epoch 13/15 - loss=0.0772, val_acc=0.8732, val_loss=0.0920233822369103
Epoch 14/15 - loss=0.0760, val_acc=0.8767, val_loss=0.08734610662139132
Epoc

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▄▃▃▃▂▂▂▂▂▂▁▁▁▁
validation loss,█▆▄▃▂▄▁▄▃▃▂▃▂▁▃
validation_accuracy,▁▃▅▆▆▅█▅▆▆▇▆▇█▇

0,1
epoch,15.0
test_accuracy,0.8652
training_loss,0.07423
validation loss,0.09461
validation_accuracy,0.87083


[34m[1mwandb[0m: Agent Starting Run: ywsbwlar with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.5847, val_acc=0.8423, val_loss=0.4223828706648879
Epoch 2/15 - loss=0.3954, val_acc=0.8545, val_loss=0.3912485599404234
Epoch 3/15 - loss=0.3605, val_acc=0.8673, val_loss=0.3556274217469379
Epoch 4/15 - loss=0.3403, val_acc=0.8665, val_loss=0.3574887546508234
Epoch 5/15 - loss=0.3259, val_acc=0.8678, val_loss=0.365927484966705
Epoch 6/15 - loss=0.3097, val_acc=0.8770, val_loss=0.32927511615321914
Epoch 7/15 - loss=0.2998, val_acc=0.8787, val_loss=0.32379152470487654
Epoch 8/15 - loss=0.2898, val_acc=0.8635, val_loss=0.3650505127536943
Epoch 9/15 - loss=0.2851, val_acc=0.8810, val_loss=0.31823920240703
Epoch 10/15 - loss=0.2764, val_acc=0.8787, val_loss=0.33518423769134087
Epoch 11/15 - loss=0.2713, val_acc=0.8768, val_loss=0.3297460155598525
Epoch 12/15 - loss=0.2614, val_acc=0.8833, val_loss=0.3264488395338764
Epoch 13/15 - loss=0.2580, val_acc=0.8865, val_loss=0.32119755011907447
Epoch 14/15 - loss=0.2532, val_acc=0.8832, val_loss=0.3204723575858921
Epoch 15/15 - 

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▄▃▃▃▂▂▂▂▂▂▁▁▁▁
validation loss,█▆▄▄▄▂▁▄▁▂▂▂▁▁▂
validation_accuracy,▁▃▅▅▅▆▇▄▇▇▆▇█▇▇

0,1
epoch,15.0
test_accuracy,0.8704
training_loss,0.24719
validation loss,0.33044
validation_accuracy,0.87833


[34m[1mwandb[0m: Agent Starting Run: 0f5jgbw0 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.5558, val_acc=0.8328, val_loss=0.43777155646277605
Epoch 2/15 - loss=0.4027, val_acc=0.8590, val_loss=0.3860200103252417
Epoch 3/15 - loss=0.3684, val_acc=0.8703, val_loss=0.36221018302277114
Epoch 4/15 - loss=0.3451, val_acc=0.8683, val_loss=0.36754871587464294
Epoch 5/15 - loss=0.3293, val_acc=0.8677, val_loss=0.35029306676145977
Epoch 6/15 - loss=0.3156, val_acc=0.8682, val_loss=0.35125727292328474
Epoch 7/15 - loss=0.3063, val_acc=0.8582, val_loss=0.38536835636902395
Epoch 8/15 - loss=0.2972, val_acc=0.8767, val_loss=0.3388813449871443
Epoch 9/15 - loss=0.2906, val_acc=0.8737, val_loss=0.3436672801769622
Epoch 10/15 - loss=0.2836, val_acc=0.8790, val_loss=0.32317606716566977
Epoch 11/15 - loss=0.2767, val_acc=0.8710, val_loss=0.33245739693126186
Epoch 12/15 - loss=0.2706, val_acc=0.8812, val_loss=0.33168192896185533
Epoch 13/15 - loss=0.2654, val_acc=0.8838, val_loss=0.31609606544582625
Epoch 14/15 - loss=0.2602, val_acc=0.8830, val_loss=0.3224808191262113
Epoch

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▄▄▃▃▂▂▂▂▂▁▁▁▁▁
validation loss,█▅▄▄▃▃▅▂▃▁▂▂▁▁▂
validation_accuracy,▁▅▆▆▆▆▄▇▇▇▆███▇

0,1
epoch,15.0
test_accuracy,0.8687
training_loss,0.25581
validation loss,0.32548
validation_accuracy,0.87767


[34m[1mwandb[0m: Agent Starting Run: thx8ifrj with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=1.1487, val_acc=0.7618, val_loss=0.7060827972598178
Epoch 2/15 - loss=0.6067, val_acc=0.8093, val_loss=0.5593056891504574
Epoch 3/15 - loss=0.5165, val_acc=0.8237, val_loss=0.5027129393573212
Epoch 4/15 - loss=0.4757, val_acc=0.8373, val_loss=0.47078306930415437
Epoch 5/15 - loss=0.4518, val_acc=0.8427, val_loss=0.44901364739761607
Epoch 6/15 - loss=0.4346, val_acc=0.8430, val_loss=0.4402083389557976
Epoch 7/15 - loss=0.4224, val_acc=0.8493, val_loss=0.43018341115921754
Epoch 8/15 - loss=0.4125, val_acc=0.8508, val_loss=0.419089080804916
Epoch 9/15 - loss=0.4034, val_acc=0.8497, val_loss=0.42172908662786857
Epoch 10/15 - loss=0.3961, val_acc=0.8592, val_loss=0.40600326465141573
Epoch 11/15 - loss=0.3892, val_acc=0.8543, val_loss=0.4025864385834172
Epoch 12/15 - loss=0.3826, val_acc=0.8568, val_loss=0.4000387158840789
Epoch 13/15 - loss=0.3770, val_acc=0.8575, val_loss=0.39027683618345355
Epoch 14/15 - loss=0.3717, val_acc=0.8620, val_loss=0.38640348967624033
Epoch 15/

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▃▂▂▂▂▁▁▁▁▁▁▁▁▁
validation loss,█▅▄▃▂▂▂▂▂▁▁▁▁▁▁
validation_accuracy,▁▄▅▆▇▇▇▇▇█▇████

0,1
epoch,15.0
test_accuracy,0.8522
training_loss,0.36679
validation loss,0.38584
validation_accuracy,0.86033


[34m[1mwandb[0m: Agent Starting Run: jlyb20ce with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: sq_error
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.2967, val_acc=0.7340, val_loss=0.19353167162186222
Epoch 2/15 - loss=0.1641, val_acc=0.7993, val_loss=0.15037936977739025
Epoch 3/15 - loss=0.1371, val_acc=0.8235, val_loss=0.1334672447531351
Epoch 4/15 - loss=0.1251, val_acc=0.8307, val_loss=0.12476232000834377
Epoch 5/15 - loss=0.1182, val_acc=0.8368, val_loss=0.11905171376192653
Epoch 6/15 - loss=0.1131, val_acc=0.8422, val_loss=0.11590167759760822
Epoch 7/15 - loss=0.1097, val_acc=0.8460, val_loss=0.1134366287014274
Epoch 8/15 - loss=0.1068, val_acc=0.8518, val_loss=0.10946153657566889
Epoch 9/15 - loss=0.1045, val_acc=0.8513, val_loss=0.10955881514100405
Epoch 10/15 - loss=0.1025, val_acc=0.8555, val_loss=0.10582188828075677
Epoch 11/15 - loss=0.1004, val_acc=0.8553, val_loss=0.10445724863847552
Epoch 12/15 - loss=0.0988, val_acc=0.8565, val_loss=0.1054353527369703
Epoch 13/15 - loss=0.0974, val_acc=0.8602, val_loss=0.10143812962956059
Epoch 14/15 - loss=0.0960, val_acc=0.8630, val_loss=0.10104848043476686
Epoc

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▃▂▂▂▂▂▁▁▁▁▁▁▁▁
validation loss,█▅▃▃▂▂▂▂▂▁▁▁▁▁▁
validation_accuracy,▁▅▆▆▇▇▇▇▇█████▇

0,1
epoch,15.0
test_accuracy,0.8488
training_loss,0.09477
validation loss,0.10395
validation_accuracy,0.85283


[34m[1mwandb[0m: Agent Starting Run: a0a4j2m6 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: sq_error
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.2251, val_acc=0.7937, val_loss=0.15555477288051223
Epoch 2/15 - loss=0.1363, val_acc=0.8188, val_loss=0.12978856318332885
Epoch 3/15 - loss=0.1197, val_acc=0.8340, val_loss=0.11886597894815847
Epoch 4/15 - loss=0.1118, val_acc=0.8415, val_loss=0.11237039833777483
Epoch 5/15 - loss=0.1068, val_acc=0.8487, val_loss=0.10782254377191218
Epoch 6/15 - loss=0.1029, val_acc=0.8508, val_loss=0.10580812756301354
Epoch 7/15 - loss=0.1001, val_acc=0.8547, val_loss=0.10346746048261021
Epoch 8/15 - loss=0.0978, val_acc=0.8592, val_loss=0.10160616912260707
Epoch 9/15 - loss=0.0958, val_acc=0.8580, val_loss=0.10194214366832723
Epoch 10/15 - loss=0.0940, val_acc=0.8613, val_loss=0.09877766964297852
Epoch 11/15 - loss=0.0923, val_acc=0.8617, val_loss=0.09740027399928149
Epoch 12/15 - loss=0.0911, val_acc=0.8622, val_loss=0.09871866585730099
Epoch 13/15 - loss=0.0898, val_acc=0.8682, val_loss=0.09457333113366889
Epoch 14/15 - loss=0.0886, val_acc=0.8680, val_loss=0.09465305052316601
E

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▃▃▂▂▂▂▂▁▁▁▁▁▁▁
validation loss,█▅▄▃▃▂▂▂▂▁▁▁▁▁▁
validation_accuracy,▁▃▅▅▆▆▇▇▇▇▇▇███

0,1
epoch,15.0
test_accuracy,0.8613
training_loss,0.08752
validation loss,0.09534
validation_accuracy,0.86317


[34m[1mwandb[0m: Agent Starting Run: y2694r6n with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=1.1280, val_acc=0.7710, val_loss=0.6874403469354357
Epoch 2/15 - loss=0.5903, val_acc=0.8093, val_loss=0.5478209353920453
Epoch 3/15 - loss=0.5036, val_acc=0.8313, val_loss=0.48710003291639853
Epoch 4/15 - loss=0.4640, val_acc=0.8400, val_loss=0.459879015729208
Epoch 5/15 - loss=0.4416, val_acc=0.8440, val_loss=0.4455406488407984
Epoch 6/15 - loss=0.4243, val_acc=0.8527, val_loss=0.4266782544020839
Epoch 7/15 - loss=0.4113, val_acc=0.8532, val_loss=0.4175954601943847
Epoch 8/15 - loss=0.4017, val_acc=0.8563, val_loss=0.40938636345511836
Epoch 9/15 - loss=0.3921, val_acc=0.8563, val_loss=0.40278812722555274
Epoch 10/15 - loss=0.3844, val_acc=0.8590, val_loss=0.395092607387703
Epoch 11/15 - loss=0.3772, val_acc=0.8623, val_loss=0.38809914083643315
Epoch 12/15 - loss=0.3697, val_acc=0.8593, val_loss=0.3885536930151969
Epoch 13/15 - loss=0.3639, val_acc=0.8633, val_loss=0.3797105619284139
Epoch 14/15 - loss=0.3589, val_acc=0.8652, val_loss=0.37356018365124766
Epoch 15/15 

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▃▂▂▂▂▂▁▁▁▁▁▁▁▁
validation loss,█▅▄▃▃▂▂▂▂▂▁▁▁▁▁
validation_accuracy,▁▄▅▆▆▇▇▇▇██████

0,1
epoch,15.0
test_accuracy,0.8597
training_loss,0.35299
validation loss,0.37165
validation_accuracy,0.86567


[34m[1mwandb[0m: Agent Starting Run: h80oj7cz with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.6259, val_acc=0.8292, val_loss=0.46986892883264325
Epoch 2/15 - loss=0.4350, val_acc=0.8482, val_loss=0.40931900197957294
Epoch 3/15 - loss=0.3933, val_acc=0.8610, val_loss=0.3929496971886448
Epoch 4/15 - loss=0.3677, val_acc=0.8625, val_loss=0.38100447920381475
Epoch 5/15 - loss=0.3505, val_acc=0.8697, val_loss=0.34861474358761296
Epoch 6/15 - loss=0.3366, val_acc=0.8655, val_loss=0.3651900360971135
Epoch 7/15 - loss=0.3265, val_acc=0.8460, val_loss=0.44542384190344814
Epoch 8/15 - loss=0.3173, val_acc=0.8730, val_loss=0.34927960227979377
Epoch 9/15 - loss=0.3099, val_acc=0.8582, val_loss=0.38767435174390713
Epoch 10/15 - loss=0.3022, val_acc=0.8687, val_loss=0.3559346085114018
Epoch 11/15 - loss=0.2968, val_acc=0.8705, val_loss=0.3537828276951274
Epoch 12/15 - loss=0.2914, val_acc=0.8787, val_loss=0.33658602607240146
Epoch 13/15 - loss=0.2865, val_acc=0.8823, val_loss=0.32385177927827774
Epoch 14/15 - loss=0.2823, val_acc=0.8757, val_loss=0.3449483972933062
Epoch 

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▄▃▃▂▂▂▂▂▂▁▁▁▁▁
validation loss,█▅▄▄▂▃▇▂▄▃▂▂▁▂▃
validation_accuracy,▁▄▅▅▆▆▃▇▅▆▆██▇▆

0,1
epoch,15.0
test_accuracy,0.8634
training_loss,0.27677
validation loss,0.35515
validation_accuracy,0.87


[34m[1mwandb[0m: Agent Starting Run: pk32lag2 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: sq_error
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.1413, val_acc=0.8418, val_loss=0.11187608837434508
Epoch 2/15 - loss=0.1032, val_acc=0.8572, val_loss=0.10292216007550754
Epoch 3/15 - loss=0.0944, val_acc=0.8645, val_loss=0.09652480990400185
Epoch 4/15 - loss=0.0897, val_acc=0.8685, val_loss=0.09476959669844394
Epoch 5/15 - loss=0.0862, val_acc=0.8678, val_loss=0.0945033290148884
Epoch 6/15 - loss=0.0822, val_acc=0.8603, val_loss=0.09800798271184999
Epoch 7/15 - loss=0.0804, val_acc=0.8767, val_loss=0.08819866380313499
Epoch 8/15 - loss=0.0776, val_acc=0.8752, val_loss=0.09065071683291501
Epoch 9/15 - loss=0.0767, val_acc=0.8832, val_loss=0.0857508196502852
Epoch 10/15 - loss=0.0749, val_acc=0.8777, val_loss=0.08825522418715129
Epoch 11/15 - loss=0.0735, val_acc=0.8748, val_loss=0.08935921443706722
Epoch 12/15 - loss=0.0709, val_acc=0.8795, val_loss=0.08773304187319131
Epoch 13/15 - loss=0.0707, val_acc=0.8830, val_loss=0.08585769123801612
Epoch 14/15 - loss=0.0688, val_acc=0.8865, val_loss=0.08314714065452436
Epo

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▄▄▃▃▂▂▂▂▂▂▁▁▁▁
validation loss,█▆▄▄▄▅▂▃▂▂▃▂▂▁▃
validation_accuracy,▁▃▅▅▅▄▆▆▇▇▆▇▇█▆

0,1
epoch,15.0
test_accuracy,0.8671
training_loss,0.06787
validation loss,0.0915
validation_accuracy,0.87333


[34m[1mwandb[0m: Agent Starting Run: bo835g0y with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: sq_error
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.1556, val_acc=0.8275, val_loss=0.12376927939821812
Epoch 2/15 - loss=0.1123, val_acc=0.8487, val_loss=0.10914155139324669
Epoch 3/15 - loss=0.1032, val_acc=0.8567, val_loss=0.10336085367071043
Epoch 4/15 - loss=0.0974, val_acc=0.8550, val_loss=0.10369154206696556
Epoch 5/15 - loss=0.0928, val_acc=0.8692, val_loss=0.09388641542241928
Epoch 6/15 - loss=0.0891, val_acc=0.8633, val_loss=0.09691810265368508
Epoch 7/15 - loss=0.0867, val_acc=0.8635, val_loss=0.10091406397319008
Epoch 8/15 - loss=0.0849, val_acc=0.8763, val_loss=0.09052125489096238
Epoch 9/15 - loss=0.0826, val_acc=0.8658, val_loss=0.09602094514999028
Epoch 10/15 - loss=0.0812, val_acc=0.8783, val_loss=0.08653326105908628
Epoch 11/15 - loss=0.0792, val_acc=0.8668, val_loss=0.09581575435407996
Epoch 12/15 - loss=0.0783, val_acc=0.8660, val_loss=0.09726290569530321
Epoch 13/15 - loss=0.0770, val_acc=0.8797, val_loss=0.08673747693474843
Epoch 14/15 - loss=0.0756, val_acc=0.8777, val_loss=0.08932270018879274
E

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▄▃▃▃▂▂▂▂▂▁▁▁▁▁
validation loss,█▅▄▄▂▃▄▂▃▁▃▃▁▂▂
validation_accuracy,▁▄▅▅▇▆▆█▆█▆▆██▇

0,1
epoch,15.0
test_accuracy,0.8618
training_loss,0.07445
validation loss,0.09193
validation_accuracy,0.87083


[34m[1mwandb[0m: Agent Starting Run: h52l7j9u with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: sq_error
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.1524, val_acc=0.8313, val_loss=0.1245255895745075
Epoch 2/15 - loss=0.1118, val_acc=0.8493, val_loss=0.10794346530513453
Epoch 3/15 - loss=0.1019, val_acc=0.8608, val_loss=0.10114784327619517
Epoch 4/15 - loss=0.0959, val_acc=0.8623, val_loss=0.0987054971601063
Epoch 5/15 - loss=0.0916, val_acc=0.8738, val_loss=0.09158856352115755
Epoch 6/15 - loss=0.0883, val_acc=0.8640, val_loss=0.09430304095777431
Epoch 7/15 - loss=0.0862, val_acc=0.8655, val_loss=0.10152089438475206
Epoch 8/15 - loss=0.0835, val_acc=0.8730, val_loss=0.09048963897219622
Epoch 9/15 - loss=0.0821, val_acc=0.8745, val_loss=0.08900585688577257
Epoch 10/15 - loss=0.0799, val_acc=0.8692, val_loss=0.09298489125222162
Epoch 11/15 - loss=0.0777, val_acc=0.8758, val_loss=0.0892168823899104
Epoch 12/15 - loss=0.0771, val_acc=0.8790, val_loss=0.08838762237306609
Epoch 13/15 - loss=0.0758, val_acc=0.8842, val_loss=0.08579004300847413
Epoch 14/15 - loss=0.0741, val_acc=0.8825, val_loss=0.08606499442054288
Epoc

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▄▄▃▃▂▂▂▂▂▁▁▁▁▁
validation loss,█▅▄▃▂▃▄▂▂▂▂▂▁▁▁
validation_accuracy,▁▃▅▅▇▅▆▇▇▆▇▇███

0,1
epoch,15.0
test_accuracy,0.8724
training_loss,0.07339
validation loss,0.08512
validation_accuracy,0.8825


[34m[1mwandb[0m: Agent Starting Run: gopmygfa with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: sq_error
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.1432, val_acc=0.8278, val_loss=0.12019735603298004
Epoch 2/15 - loss=0.1063, val_acc=0.8537, val_loss=0.10458825745834655
Epoch 3/15 - loss=0.0980, val_acc=0.8640, val_loss=0.09894489446275748
Epoch 4/15 - loss=0.0928, val_acc=0.8628, val_loss=0.09815624616556345
Epoch 5/15 - loss=0.0885, val_acc=0.8652, val_loss=0.09491658417853527
Epoch 6/15 - loss=0.0853, val_acc=0.8692, val_loss=0.09312992971938058
Epoch 7/15 - loss=0.0827, val_acc=0.8712, val_loss=0.09282399194692598
Epoch 8/15 - loss=0.0809, val_acc=0.8782, val_loss=0.08817061121725349
Epoch 9/15 - loss=0.0787, val_acc=0.8765, val_loss=0.09128837832752773
Epoch 10/15 - loss=0.0770, val_acc=0.8765, val_loss=0.08810301816926316
Epoch 11/15 - loss=0.0753, val_acc=0.8757, val_loss=0.09139334683740018
Epoch 12/15 - loss=0.0741, val_acc=0.8788, val_loss=0.08844351168724683
Epoch 13/15 - loss=0.0731, val_acc=0.8822, val_loss=0.08722778262327051
Epoch 14/15 - loss=0.0716, val_acc=0.8777, val_loss=0.08822212025571004
E

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▄▄▃▃▂▂▂▂▂▁▁▁▁▁
validation loss,█▅▃▃▃▂▂▁▂▁▂▁▁▁▁
validation_accuracy,▁▄▆▆▆▆▇▇▇▇▇██▇▇

0,1
epoch,15.0
test_accuracy,0.8703
training_loss,0.07025
validation loss,0.0887
validation_accuracy,0.87783


[34m[1mwandb[0m: Agent Starting Run: x71mxd9l with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: sq_error
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.2614, val_acc=0.7657, val_loss=0.17869219264990102
Epoch 2/15 - loss=0.1497, val_acc=0.8140, val_loss=0.13685060676687874
Epoch 3/15 - loss=0.1248, val_acc=0.8290, val_loss=0.12301112224892029
Epoch 4/15 - loss=0.1150, val_acc=0.8393, val_loss=0.1153765425620314
Epoch 5/15 - loss=0.1092, val_acc=0.8470, val_loss=0.11045117646180808
Epoch 6/15 - loss=0.1049, val_acc=0.8488, val_loss=0.1082665319748341
Epoch 7/15 - loss=0.1018, val_acc=0.8545, val_loss=0.10485673066199987
Epoch 8/15 - loss=0.0992, val_acc=0.8575, val_loss=0.10324114404053948
Epoch 9/15 - loss=0.0970, val_acc=0.8565, val_loss=0.10281740151268254
Epoch 10/15 - loss=0.0951, val_acc=0.8617, val_loss=0.10006913099133749
Epoch 11/15 - loss=0.0933, val_acc=0.8622, val_loss=0.09911340969458003
Epoch 12/15 - loss=0.0919, val_acc=0.8595, val_loss=0.10030356166940334
Epoch 13/15 - loss=0.0906, val_acc=0.8662, val_loss=0.09551479782723016
Epoch 14/15 - loss=0.0893, val_acc=0.8657, val_loss=0.09630514068844791
Epo

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▃▂▂▂▂▂▁▁▁▁▁▁▁▁
validation loss,█▄▃▃▂▂▂▂▂▁▁▁▁▁▁
validation_accuracy,▁▄▅▆▇▇▇▇▇██████

0,1
epoch,15.0
test_accuracy,0.8592
training_loss,0.08831
validation loss,0.09697
validation_accuracy,0.86183


[34m[1mwandb[0m: Agent Starting Run: jvq2o31v with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: sq_error
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.2262, val_acc=0.7972, val_loss=0.15340633505537074
Epoch 2/15 - loss=0.1351, val_acc=0.8230, val_loss=0.12849873562126413
Epoch 3/15 - loss=0.1186, val_acc=0.8365, val_loss=0.11654167326939735
Epoch 4/15 - loss=0.1104, val_acc=0.8438, val_loss=0.1116252541829645
Epoch 5/15 - loss=0.1056, val_acc=0.8525, val_loss=0.10642589802703982
Epoch 6/15 - loss=0.1016, val_acc=0.8510, val_loss=0.10472057925021
Epoch 7/15 - loss=0.0987, val_acc=0.8575, val_loss=0.10131341121112283
Epoch 8/15 - loss=0.0963, val_acc=0.8600, val_loss=0.09995529280625355
Epoch 9/15 - loss=0.0943, val_acc=0.8628, val_loss=0.09797341864225681
Epoch 10/15 - loss=0.0926, val_acc=0.8635, val_loss=0.09862472186706352
Epoch 11/15 - loss=0.0912, val_acc=0.8643, val_loss=0.09677629532947156
Epoch 12/15 - loss=0.0895, val_acc=0.8667, val_loss=0.09631917740106112
Epoch 13/15 - loss=0.0884, val_acc=0.8690, val_loss=0.09333225153615081
Epoch 14/15 - loss=0.0872, val_acc=0.8700, val_loss=0.09374016410840073
Epoch

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▃▃▂▂▂▂▂▁▁▁▁▁▁▁
validation loss,█▅▄▃▃▂▂▂▂▂▁▁▁▁▁
validation_accuracy,▁▃▅▅▆▆▇▇▇▇▇████

0,1
epoch,15.0
test_accuracy,0.8624
training_loss,0.086
validation loss,0.0937
validation_accuracy,0.86867


[34m[1mwandb[0m: Agent Starting Run: sscu552f with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: sq_error
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.1432, val_acc=0.8278, val_loss=0.12019735603298004
Epoch 2/15 - loss=0.1063, val_acc=0.8537, val_loss=0.10458825745834655
Epoch 3/15 - loss=0.0980, val_acc=0.8640, val_loss=0.09894489446275748
Epoch 4/15 - loss=0.0928, val_acc=0.8628, val_loss=0.09815624616556345
Epoch 5/15 - loss=0.0885, val_acc=0.8652, val_loss=0.09491658417853527
Epoch 6/15 - loss=0.0853, val_acc=0.8692, val_loss=0.09312992971938058
Epoch 7/15 - loss=0.0827, val_acc=0.8712, val_loss=0.09282399194692598
Epoch 8/15 - loss=0.0809, val_acc=0.8782, val_loss=0.08817061121725349
Epoch 9/15 - loss=0.0787, val_acc=0.8765, val_loss=0.09128837832752773
Epoch 10/15 - loss=0.0770, val_acc=0.8765, val_loss=0.08810301816926316
Epoch 11/15 - loss=0.0753, val_acc=0.8757, val_loss=0.09139334683740018
Epoch 12/15 - loss=0.0741, val_acc=0.8788, val_loss=0.08844351168724683
Epoch 13/15 - loss=0.0731, val_acc=0.8822, val_loss=0.08722778262327051
Epoch 14/15 - loss=0.0716, val_acc=0.8777, val_loss=0.08822212025571004
E

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▄▄▃▃▂▂▂▂▂▁▁▁▁▁
validation loss,█▅▃▃▃▂▂▁▂▁▂▁▁▁▁
validation_accuracy,▁▄▆▆▆▆▇▇▇▇▇██▇▇

0,1
epoch,15.0
test_accuracy,0.8703
training_loss,0.07025
validation loss,0.0887
validation_accuracy,0.87783


[34m[1mwandb[0m: Agent Starting Run: t29p3k12 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=1.1953, val_acc=0.7537, val_loss=0.7829581861813705
Epoch 2/15 - loss=0.6502, val_acc=0.8077, val_loss=0.5747135449574344
Epoch 3/15 - loss=0.5188, val_acc=0.8282, val_loss=0.49650118575317015
Epoch 4/15 - loss=0.4641, val_acc=0.8343, val_loss=0.4604563297512127
Epoch 5/15 - loss=0.4342, val_acc=0.8423, val_loss=0.43731749213697185
Epoch 6/15 - loss=0.4128, val_acc=0.8512, val_loss=0.4172212941320218
Epoch 7/15 - loss=0.3976, val_acc=0.8540, val_loss=0.40700705793293057
Epoch 8/15 - loss=0.3860, val_acc=0.8550, val_loss=0.3959101901038994
Epoch 9/15 - loss=0.3759, val_acc=0.8573, val_loss=0.3859872244664137
Epoch 10/15 - loss=0.3671, val_acc=0.8605, val_loss=0.38504770047177456
Epoch 11/15 - loss=0.3602, val_acc=0.8647, val_loss=0.3760536411013232
Epoch 12/15 - loss=0.3527, val_acc=0.8628, val_loss=0.3764751127040352
Epoch 13/15 - loss=0.3479, val_acc=0.8660, val_loss=0.3620467727888231
Epoch 14/15 - loss=0.3424, val_acc=0.8667, val_loss=0.3621035389928984
Epoch 15/15

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▄▂▂▂▂▁▁▁▁▁▁▁▁▁
validation loss,█▅▃▃▂▂▂▂▁▁▁▁▁▁▁
validation_accuracy,▁▄▆▆▆▇▇▇▇▇█████

0,1
epoch,15.0
test_accuracy,0.8615
training_loss,0.33711
validation loss,0.36055
validation_accuracy,0.8695


[34m[1mwandb[0m: Agent Starting Run: 4fybi6ec with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: sq_error
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.1524, val_acc=0.8313, val_loss=0.1245255895745075
Epoch 2/15 - loss=0.1118, val_acc=0.8493, val_loss=0.10794346530513453
Epoch 3/15 - loss=0.1019, val_acc=0.8608, val_loss=0.10114784327619517
Epoch 4/15 - loss=0.0959, val_acc=0.8623, val_loss=0.0987054971601063
Epoch 5/15 - loss=0.0916, val_acc=0.8738, val_loss=0.09158856352115755
Epoch 6/15 - loss=0.0883, val_acc=0.8640, val_loss=0.09430304095777431
Epoch 7/15 - loss=0.0862, val_acc=0.8655, val_loss=0.10152089438475206
Epoch 8/15 - loss=0.0835, val_acc=0.8730, val_loss=0.09048963897219622
Epoch 9/15 - loss=0.0821, val_acc=0.8745, val_loss=0.08900585688577257
Epoch 10/15 - loss=0.0799, val_acc=0.8692, val_loss=0.09298489125222162
Epoch 11/15 - loss=0.0777, val_acc=0.8758, val_loss=0.0892168823899104
Epoch 12/15 - loss=0.0771, val_acc=0.8790, val_loss=0.08838762237306609
Epoch 13/15 - loss=0.0758, val_acc=0.8842, val_loss=0.08579004300847413
Epoch 14/15 - loss=0.0741, val_acc=0.8825, val_loss=0.08606499442054288
Epoc

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▄▄▃▃▂▂▂▂▂▁▁▁▁▁
validation loss,█▅▄▃▂▃▄▂▂▂▂▂▁▁▁
validation_accuracy,▁▃▅▅▇▅▆▇▇▆▇▇███

0,1
epoch,15.0
test_accuracy,0.8724
training_loss,0.07339
validation loss,0.08512
validation_accuracy,0.8825


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: lgjtm9w8 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=1.1280, val_acc=0.7710, val_loss=0.6874403469354357
Epoch 2/15 - loss=0.5903, val_acc=0.8093, val_loss=0.5478209353920453
Epoch 3/15 - loss=0.5036, val_acc=0.8313, val_loss=0.48710003291639853
Epoch 4/15 - loss=0.4640, val_acc=0.8400, val_loss=0.459879015729208
Epoch 5/15 - loss=0.4416, val_acc=0.8440, val_loss=0.4455406488407984
Epoch 6/15 - loss=0.4243, val_acc=0.8527, val_loss=0.4266782544020839
Epoch 7/15 - loss=0.4113, val_acc=0.8532, val_loss=0.4175954601943847
Epoch 8/15 - loss=0.4017, val_acc=0.8563, val_loss=0.40938636345511836
Epoch 9/15 - loss=0.3921, val_acc=0.8563, val_loss=0.40278812722555274
Epoch 10/15 - loss=0.3844, val_acc=0.8590, val_loss=0.395092607387703
Epoch 11/15 - loss=0.3772, val_acc=0.8623, val_loss=0.38809914083643315
Epoch 12/15 - loss=0.3697, val_acc=0.8593, val_loss=0.3885536930151969
Epoch 13/15 - loss=0.3639, val_acc=0.8633, val_loss=0.3797105619284139
Epoch 14/15 - loss=0.3589, val_acc=0.8652, val_loss=0.37356018365124766
Epoch 15/15 

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▃▂▂▂▂▂▁▁▁▁▁▁▁▁
validation loss,█▅▄▃▃▂▂▂▂▂▁▁▁▁▁
validation_accuracy,▁▄▅▆▆▇▇▇▇██████

0,1
epoch,15.0
test_accuracy,0.8597
training_loss,0.35299
validation loss,0.37165
validation_accuracy,0.86567


[34m[1mwandb[0m: Agent Starting Run: li1fveej with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: sq_error
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.2674, val_acc=0.7638, val_loss=0.1743751192722
Epoch 2/15 - loss=0.1500, val_acc=0.8127, val_loss=0.14090028356791603
Epoch 3/15 - loss=0.1293, val_acc=0.8278, val_loss=0.12823391681726123
Epoch 4/15 - loss=0.1195, val_acc=0.8408, val_loss=0.11902486080913699
Epoch 5/15 - loss=0.1134, val_acc=0.8455, val_loss=0.11426869310717432
Epoch 6/15 - loss=0.1090, val_acc=0.8462, val_loss=0.11188272051937002
Epoch 7/15 - loss=0.1058, val_acc=0.8507, val_loss=0.11078548537210581
Epoch 8/15 - loss=0.1033, val_acc=0.8555, val_loss=0.10661031260267315
Epoch 9/15 - loss=0.1010, val_acc=0.8540, val_loss=0.10798276465733196
Epoch 10/15 - loss=0.0991, val_acc=0.8595, val_loss=0.10309541399735062
Epoch 11/15 - loss=0.0971, val_acc=0.8573, val_loss=0.10286096169843634
Epoch 12/15 - loss=0.0955, val_acc=0.8622, val_loss=0.10150745408797428
Epoch 13/15 - loss=0.0941, val_acc=0.8645, val_loss=0.09836587726005193
Epoch 14/15 - loss=0.0928, val_acc=0.8658, val_loss=0.09854203803645092
Epoch

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▃▃▂▂▂▂▁▁▁▁▁▁▁▁
validation loss,█▅▄▃▂▂▂▂▂▁▁▁▁▁▁
validation_accuracy,▁▄▅▆▇▇▇▇▇█▇███▇

0,1
epoch,15.0
test_accuracy,0.8513
training_loss,0.09155
validation loss,0.10188
validation_accuracy,0.85783


[34m[1mwandb[0m: Agent Starting Run: 1nzyceq9 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.6094, val_acc=0.8387, val_loss=0.45171820913596056
Epoch 2/15 - loss=0.4182, val_acc=0.8477, val_loss=0.414238206275963
Epoch 3/15 - loss=0.3859, val_acc=0.8590, val_loss=0.3921560463062768
Epoch 4/15 - loss=0.3664, val_acc=0.8628, val_loss=0.367580587928539
Epoch 5/15 - loss=0.3493, val_acc=0.8627, val_loss=0.380974212908759
Epoch 6/15 - loss=0.3326, val_acc=0.8778, val_loss=0.33713473099885616
Epoch 7/15 - loss=0.3213, val_acc=0.8740, val_loss=0.3478365374809786
Epoch 8/15 - loss=0.3106, val_acc=0.8653, val_loss=0.36720863611454246
Epoch 9/15 - loss=0.3047, val_acc=0.8788, val_loss=0.3365835997772626
Epoch 10/15 - loss=0.2945, val_acc=0.8682, val_loss=0.36050397008235285
Epoch 11/15 - loss=0.2902, val_acc=0.8723, val_loss=0.3483397652325901
Epoch 12/15 - loss=0.2804, val_acc=0.8722, val_loss=0.35855933576778337
Epoch 13/15 - loss=0.2785, val_acc=0.8775, val_loss=0.33436116963169304
Epoch 14/15 - loss=0.2735, val_acc=0.8823, val_loss=0.3344395518285832
Epoch 15/15 

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▄▃▃▃▂▂▂▂▂▁▁▁▁▁
validation loss,█▆▄▃▄▁▂▃▁▃▂▂▁▁▂
validation_accuracy,▁▂▄▅▅▇▇▅▇▆▆▆▇█▇

0,1
epoch,15.0
test_accuracy,0.8656
training_loss,0.26661
validation loss,0.35018
validation_accuracy,0.87367


[34m[1mwandb[0m: Agent Starting Run: ymv76lff with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: sq_error
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.2508, val_acc=0.7885, val_loss=0.15991361816997302
Epoch 2/15 - loss=0.1409, val_acc=0.8227, val_loss=0.1339766734499667
Epoch 3/15 - loss=0.1235, val_acc=0.8362, val_loss=0.12235040903217013
Epoch 4/15 - loss=0.1152, val_acc=0.8407, val_loss=0.11602632680767191
Epoch 5/15 - loss=0.1101, val_acc=0.8485, val_loss=0.1111943736075314
Epoch 6/15 - loss=0.1061, val_acc=0.8497, val_loss=0.10889108678250492
Epoch 7/15 - loss=0.1030, val_acc=0.8548, val_loss=0.10599378831103562
Epoch 8/15 - loss=0.1006, val_acc=0.8590, val_loss=0.10350006496157217
Epoch 9/15 - loss=0.0983, val_acc=0.8617, val_loss=0.10252062173030667
Epoch 10/15 - loss=0.0964, val_acc=0.8615, val_loss=0.10153151535661716
Epoch 11/15 - loss=0.0947, val_acc=0.8625, val_loss=0.09942441967314822
Epoch 12/15 - loss=0.0929, val_acc=0.8647, val_loss=0.0992691619963896
Epoch 13/15 - loss=0.0916, val_acc=0.8675, val_loss=0.09794821775890449
Epoch 14/15 - loss=0.0903, val_acc=0.8682, val_loss=0.09633445285695712
Epoc

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▃▂▂▂▂▂▂▁▁▁▁▁▁▁
validation loss,█▅▄▃▃▂▂▂▂▂▁▁▁▁▁
validation_accuracy,▁▄▅▆▆▆▇▇▇▇█████

0,1
epoch,15.0
test_accuracy,0.8588
training_loss,0.08889
validation loss,0.09731
validation_accuracy,0.8645


[34m[1mwandb[0m: Agent Starting Run: qf4u0o6w with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: sq_error
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.1546, val_acc=0.8385, val_loss=0.11807534667181187
Epoch 2/15 - loss=0.1089, val_acc=0.8507, val_loss=0.1079245415272775
Epoch 3/15 - loss=0.1008, val_acc=0.8613, val_loss=0.10000543939049349
Epoch 4/15 - loss=0.0952, val_acc=0.8652, val_loss=0.09625666071572679
Epoch 5/15 - loss=0.0923, val_acc=0.8683, val_loss=0.09380915621205271
Epoch 6/15 - loss=0.0880, val_acc=0.8592, val_loss=0.10111848930572352
Epoch 7/15 - loss=0.0857, val_acc=0.8758, val_loss=0.08908036578476146
Epoch 8/15 - loss=0.0847, val_acc=0.8598, val_loss=0.10085679632055909
Epoch 9/15 - loss=0.0831, val_acc=0.8683, val_loss=0.09525840257122066
Epoch 10/15 - loss=0.0808, val_acc=0.8637, val_loss=0.0966752390122895
Epoch 11/15 - loss=0.0801, val_acc=0.8692, val_loss=0.09290405982900182
Epoch 12/15 - loss=0.0777, val_acc=0.8675, val_loss=0.09691714929371673
Epoch 13/15 - loss=0.0772, val_acc=0.8732, val_loss=0.0920233822369103
Epoch 14/15 - loss=0.0760, val_acc=0.8767, val_loss=0.08734610662139132
Epoc

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▄▃▃▃▂▂▂▂▂▂▁▁▁▁
validation loss,█▆▄▃▂▄▁▄▃▃▂▃▂▁▃
validation_accuracy,▁▃▅▆▆▅█▅▆▆▇▆▇█▇

0,1
epoch,15.0
test_accuracy,0.8652
training_loss,0.07423
validation loss,0.09461
validation_accuracy,0.87083


[34m[1mwandb[0m: Agent Starting Run: op9xf0ri with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: sq_error
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.2701, val_acc=0.7712, val_loss=0.17233303914369377
Epoch 2/15 - loss=0.1489, val_acc=0.8120, val_loss=0.14023549319911544
Epoch 3/15 - loss=0.1283, val_acc=0.8308, val_loss=0.12563040785218538
Epoch 4/15 - loss=0.1190, val_acc=0.8403, val_loss=0.11926689887964766
Epoch 5/15 - loss=0.1133, val_acc=0.8457, val_loss=0.11464305398695039
Epoch 6/15 - loss=0.1091, val_acc=0.8485, val_loss=0.11144482326195737
Epoch 7/15 - loss=0.1059, val_acc=0.8540, val_loss=0.10865193871514116
Epoch 8/15 - loss=0.1035, val_acc=0.8557, val_loss=0.1063017702072951
Epoch 9/15 - loss=0.1012, val_acc=0.8563, val_loss=0.10551612705504892
Epoch 10/15 - loss=0.0994, val_acc=0.8577, val_loss=0.10394982501700006
Epoch 11/15 - loss=0.0977, val_acc=0.8590, val_loss=0.10234280924618876
Epoch 12/15 - loss=0.0959, val_acc=0.8615, val_loss=0.10259491782410007
Epoch 13/15 - loss=0.0947, val_acc=0.8613, val_loss=0.10039957074187579
Epoch 14/15 - loss=0.0934, val_acc=0.8668, val_loss=0.0988652795390645
Epo

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▃▂▂▂▂▂▁▁▁▁▁▁▁▁
validation loss,█▅▄▃▃▂▂▂▂▁▁▁▁▁▁
validation_accuracy,▁▄▅▆▆▇▇▇▇▇▇████

0,1
epoch,15.0
test_accuracy,0.8588
training_loss,0.09208
validation loss,0.09916
validation_accuracy,0.86083


[34m[1mwandb[0m: Agent Starting Run: 89iejyb5 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=1.3047, val_acc=0.7235, val_loss=0.788669674683696
Epoch 2/15 - loss=0.6742, val_acc=0.7862, val_loss=0.6152835071864606
Epoch 3/15 - loss=0.5601, val_acc=0.8157, val_loss=0.5400664878764678
Epoch 4/15 - loss=0.5057, val_acc=0.8278, val_loss=0.5008668393211678
Epoch 5/15 - loss=0.4751, val_acc=0.8355, val_loss=0.4762351567778349
Epoch 6/15 - loss=0.4542, val_acc=0.8347, val_loss=0.46176735466885765
Epoch 7/15 - loss=0.4392, val_acc=0.8402, val_loss=0.44963650196665744
Epoch 8/15 - loss=0.4278, val_acc=0.8470, val_loss=0.4370708800210514
Epoch 9/15 - loss=0.4177, val_acc=0.8482, val_loss=0.43394314526168554
Epoch 10/15 - loss=0.4094, val_acc=0.8532, val_loss=0.4233323727065861
Epoch 11/15 - loss=0.4020, val_acc=0.8540, val_loss=0.4174645779064537
Epoch 12/15 - loss=0.3950, val_acc=0.8535, val_loss=0.41624983585856673
Epoch 13/15 - loss=0.3889, val_acc=0.8562, val_loss=0.40333310236625447
Epoch 14/15 - loss=0.3830, val_acc=0.8577, val_loss=0.3979582144325897
Epoch 15/15

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▃▂▂▂▂▁▁▁▁▁▁▁▁▁
validation loss,█▅▄▃▂▂▂▂▂▁▁▁▁▁▁
validation_accuracy,▁▄▆▆▇▇▇▇███████

0,1
epoch,15.0
test_accuracy,0.8501
training_loss,0.3777
validation loss,0.40125
validation_accuracy,0.85767


[34m[1mwandb[0m: Agent Starting Run: emapqbtk with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: sq_error
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.2251, val_acc=0.7937, val_loss=0.15555477288051223
Epoch 2/15 - loss=0.1363, val_acc=0.8188, val_loss=0.12978856318332885
Epoch 3/15 - loss=0.1197, val_acc=0.8340, val_loss=0.11886597894815847
Epoch 4/15 - loss=0.1118, val_acc=0.8415, val_loss=0.11237039833777483
Epoch 5/15 - loss=0.1068, val_acc=0.8487, val_loss=0.10782254377191218
Epoch 6/15 - loss=0.1029, val_acc=0.8508, val_loss=0.10580812756301354
Epoch 7/15 - loss=0.1001, val_acc=0.8547, val_loss=0.10346746048261021
Epoch 8/15 - loss=0.0978, val_acc=0.8592, val_loss=0.10160616912260707
Epoch 9/15 - loss=0.0958, val_acc=0.8580, val_loss=0.10194214366832723
Epoch 10/15 - loss=0.0940, val_acc=0.8613, val_loss=0.09877766964297852
Epoch 11/15 - loss=0.0923, val_acc=0.8617, val_loss=0.09740027399928149
Epoch 12/15 - loss=0.0911, val_acc=0.8622, val_loss=0.09871866585730099
Epoch 13/15 - loss=0.0898, val_acc=0.8682, val_loss=0.09457333113366889
Epoch 14/15 - loss=0.0886, val_acc=0.8680, val_loss=0.09465305052316601
E

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▃▃▂▂▂▂▂▁▁▁▁▁▁▁
validation loss,█▅▄▃▃▂▂▂▂▁▁▁▁▁▁
validation_accuracy,▁▃▅▅▆▆▇▇▇▇▇▇███

0,1
epoch,15.0
test_accuracy,0.8613
training_loss,0.08752
validation loss,0.09534
validation_accuracy,0.86317


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 219xt02z with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.5847, val_acc=0.8423, val_loss=0.4223828706648879
Epoch 2/15 - loss=0.3954, val_acc=0.8545, val_loss=0.3912485599404234
Epoch 3/15 - loss=0.3605, val_acc=0.8673, val_loss=0.3556274217469379
Epoch 4/15 - loss=0.3403, val_acc=0.8665, val_loss=0.3574887546508234
Epoch 5/15 - loss=0.3259, val_acc=0.8678, val_loss=0.365927484966705
Epoch 6/15 - loss=0.3097, val_acc=0.8770, val_loss=0.32927511615321914
Epoch 7/15 - loss=0.2998, val_acc=0.8787, val_loss=0.32379152470487654
Epoch 8/15 - loss=0.2898, val_acc=0.8635, val_loss=0.3650505127536943
Epoch 9/15 - loss=0.2851, val_acc=0.8810, val_loss=0.31823920240703
Epoch 10/15 - loss=0.2764, val_acc=0.8787, val_loss=0.33518423769134087
Epoch 11/15 - loss=0.2713, val_acc=0.8768, val_loss=0.3297460155598525
Epoch 12/15 - loss=0.2614, val_acc=0.8833, val_loss=0.3264488395338764
Epoch 13/15 - loss=0.2580, val_acc=0.8865, val_loss=0.32119755011907447
Epoch 14/15 - loss=0.2532, val_acc=0.8832, val_loss=0.3204723575858921
Epoch 15/15 - 

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▄▃▃▃▂▂▂▂▂▂▁▁▁▁
validation loss,█▆▄▄▄▂▁▄▁▂▂▂▁▁▂
validation_accuracy,▁▃▅▅▅▆▇▄▇▇▆▇█▇▇

0,1
epoch,15.0
test_accuracy,0.8704
training_loss,0.24719
validation loss,0.33044
validation_accuracy,0.87833


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: ns50orbm with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.6270, val_acc=0.8425, val_loss=0.44806579568108995
Epoch 2/15 - loss=0.4148, val_acc=0.8480, val_loss=0.40956287006615216
Epoch 3/15 - loss=0.3790, val_acc=0.8595, val_loss=0.3849564114459321
Epoch 4/15 - loss=0.3550, val_acc=0.8663, val_loss=0.3724371378558453
Epoch 5/15 - loss=0.3403, val_acc=0.8695, val_loss=0.35556014969300476
Epoch 6/15 - loss=0.3229, val_acc=0.8707, val_loss=0.3478934113529482
Epoch 7/15 - loss=0.3146, val_acc=0.8768, val_loss=0.3295220304940766
Epoch 8/15 - loss=0.3054, val_acc=0.8653, val_loss=0.3652100624233602
Epoch 9/15 - loss=0.2982, val_acc=0.8775, val_loss=0.3312161494387455
Epoch 10/15 - loss=0.2902, val_acc=0.8688, val_loss=0.36596795707779933
Epoch 11/15 - loss=0.2842, val_acc=0.8718, val_loss=0.3462242708421703
Epoch 12/15 - loss=0.2771, val_acc=0.8800, val_loss=0.33932088558946594
Epoch 13/15 - loss=0.2720, val_acc=0.8750, val_loss=0.3352174624255721
Epoch 14/15 - loss=0.2690, val_acc=0.8800, val_loss=0.32494663082018665
Epoch 15/

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▄▃▃▂▂▂▂▂▁▁▁▁▁▁
validation loss,█▆▄▄▃▂▁▃▁▃▂▂▂▁▄
validation_accuracy,▁▂▄▅▆▆▇▅█▆▆█▇█▆

0,1
epoch,15.0
test_accuracy,0.863
training_loss,0.26503
validation loss,0.37061
validation_accuracy,0.87


[34m[1mwandb[0m: Agent Starting Run: 7owjv44r with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.5847, val_acc=0.8423, val_loss=0.4223828706648879
Epoch 2/15 - loss=0.3954, val_acc=0.8545, val_loss=0.3912485599404234
Epoch 3/15 - loss=0.3605, val_acc=0.8673, val_loss=0.3556274217469379
Epoch 4/15 - loss=0.3403, val_acc=0.8665, val_loss=0.3574887546508234
Epoch 5/15 - loss=0.3259, val_acc=0.8678, val_loss=0.365927484966705
Epoch 6/15 - loss=0.3097, val_acc=0.8770, val_loss=0.32927511615321914
Epoch 7/15 - loss=0.2998, val_acc=0.8787, val_loss=0.32379152470487654
Epoch 8/15 - loss=0.2898, val_acc=0.8635, val_loss=0.3650505127536943
Epoch 9/15 - loss=0.2851, val_acc=0.8810, val_loss=0.31823920240703
Epoch 10/15 - loss=0.2764, val_acc=0.8787, val_loss=0.33518423769134087
Epoch 11/15 - loss=0.2713, val_acc=0.8768, val_loss=0.3297460155598525
Epoch 12/15 - loss=0.2614, val_acc=0.8833, val_loss=0.3264488395338764
Epoch 13/15 - loss=0.2580, val_acc=0.8865, val_loss=0.32119755011907447
Epoch 14/15 - loss=0.2532, val_acc=0.8832, val_loss=0.3204723575858921
Epoch 15/15 - 

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▄▃▃▃▂▂▂▂▂▂▁▁▁▁
validation loss,█▆▄▄▄▂▁▄▁▂▂▂▁▁▂
validation_accuracy,▁▃▅▅▅▆▇▄▇▇▆▇█▇▇

0,1
epoch,15.0
test_accuracy,0.8704
training_loss,0.24719
validation loss,0.33044
validation_accuracy,0.87833


[34m[1mwandb[0m: Agent Starting Run: 6kru21gx with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.5779, val_acc=0.8348, val_loss=0.43985750491571146
Epoch 2/15 - loss=0.4065, val_acc=0.8563, val_loss=0.3876037488820295
Epoch 3/15 - loss=0.3719, val_acc=0.8633, val_loss=0.38170573697800014
Epoch 4/15 - loss=0.3492, val_acc=0.8563, val_loss=0.3856889007710927
Epoch 5/15 - loss=0.3318, val_acc=0.8653, val_loss=0.3559947591976067
Epoch 6/15 - loss=0.3187, val_acc=0.8695, val_loss=0.346945216445743
Epoch 7/15 - loss=0.3089, val_acc=0.8655, val_loss=0.376927189905875
Epoch 8/15 - loss=0.2990, val_acc=0.8783, val_loss=0.3309598861360001
Epoch 9/15 - loss=0.2918, val_acc=0.8758, val_loss=0.34302784655103435
Epoch 10/15 - loss=0.2843, val_acc=0.8718, val_loss=0.34237081897012583
Epoch 11/15 - loss=0.2792, val_acc=0.8742, val_loss=0.3415574364712802
Epoch 12/15 - loss=0.2730, val_acc=0.8835, val_loss=0.3271817440888692
Epoch 13/15 - loss=0.2677, val_acc=0.8810, val_loss=0.32395863597477026
Epoch 14/15 - loss=0.2629, val_acc=0.8765, val_loss=0.3302192657515811
Epoch 15/15 

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▄▃▃▃▂▂▂▂▂▁▁▁▁▁
validation loss,█▅▄▅▃▂▄▁▂▂▂▁▁▁▂
validation_accuracy,▁▄▅▄▅▆▅▇▇▆▇██▇▇

0,1
epoch,15.0
test_accuracy,0.8715
training_loss,0.25899
validation loss,0.33395
validation_accuracy,0.87817


[34m[1mwandb[0m: Agent Starting Run: d5qrnu3r with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.5989, val_acc=0.8410, val_loss=0.44994710875608734
Epoch 2/15 - loss=0.4256, val_acc=0.8517, val_loss=0.4013520539824901
Epoch 3/15 - loss=0.3864, val_acc=0.8647, val_loss=0.3748061871000329
Epoch 4/15 - loss=0.3594, val_acc=0.8658, val_loss=0.3695753243721706
Epoch 5/15 - loss=0.3424, val_acc=0.8728, val_loss=0.3456849637029308
Epoch 6/15 - loss=0.3283, val_acc=0.8603, val_loss=0.3710932057749727
Epoch 7/15 - loss=0.3205, val_acc=0.8775, val_loss=0.34597953077527593
Epoch 8/15 - loss=0.3099, val_acc=0.8770, val_loss=0.33969475408024874
Epoch 9/15 - loss=0.3030, val_acc=0.8668, val_loss=0.36464865130682816
Epoch 10/15 - loss=0.2953, val_acc=0.8728, val_loss=0.35835655412067724
Epoch 11/15 - loss=0.2894, val_acc=0.8750, val_loss=0.3539932443350205
Epoch 12/15 - loss=0.2848, val_acc=0.8788, val_loss=0.33219148297520196
Epoch 13/15 - loss=0.2810, val_acc=0.8843, val_loss=0.32731006684640773
Epoch 14/15 - loss=0.2765, val_acc=0.8788, val_loss=0.344105748849108
Epoch 15/

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▄▃▃▂▂▂▂▂▁▁▁▁▁▁
validation loss,█▅▄▃▂▃▂▂▃▃▃▁▁▂▃
validation_accuracy,▁▃▅▅▆▄▇▇▅▆▆▇█▇▆

0,1
epoch,15.0
test_accuracy,0.8643
training_loss,0.27383
validation loss,0.35796
validation_accuracy,0.8745


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: rshijcz9 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: sq_error
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.1413, val_acc=0.8418, val_loss=0.11187608837434508
Epoch 2/15 - loss=0.1032, val_acc=0.8572, val_loss=0.10292216007550754
Epoch 3/15 - loss=0.0944, val_acc=0.8645, val_loss=0.09652480990400185
Epoch 4/15 - loss=0.0897, val_acc=0.8685, val_loss=0.09476959669844394
Epoch 5/15 - loss=0.0862, val_acc=0.8678, val_loss=0.0945033290148884
Epoch 6/15 - loss=0.0822, val_acc=0.8603, val_loss=0.09800798271184999
Epoch 7/15 - loss=0.0804, val_acc=0.8767, val_loss=0.08819866380313499
Epoch 8/15 - loss=0.0776, val_acc=0.8752, val_loss=0.09065071683291501
Epoch 9/15 - loss=0.0767, val_acc=0.8832, val_loss=0.0857508196502852
Epoch 10/15 - loss=0.0749, val_acc=0.8777, val_loss=0.08825522418715129
Epoch 11/15 - loss=0.0735, val_acc=0.8748, val_loss=0.08935921443706722
Epoch 12/15 - loss=0.0709, val_acc=0.8795, val_loss=0.08773304187319131
Epoch 13/15 - loss=0.0707, val_acc=0.8830, val_loss=0.08585769123801612
Epoch 14/15 - loss=0.0688, val_acc=0.8865, val_loss=0.08314714065452436
Epo

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▄▄▃▃▂▂▂▂▂▂▁▁▁▁
validation loss,█▆▄▄▄▅▂▃▂▂▃▂▂▁▃
validation_accuracy,▁▃▅▅▅▄▆▆▇▇▆▇▇█▆

0,1
epoch,15.0
test_accuracy,0.8671
training_loss,0.06787
validation loss,0.0915
validation_accuracy,0.87333


[34m[1mwandb[0m: Agent Starting Run: ge2egdh4 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: sq_error
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.1556, val_acc=0.8275, val_loss=0.12376927939821812
Epoch 2/15 - loss=0.1123, val_acc=0.8487, val_loss=0.10914155139324669
Epoch 3/15 - loss=0.1032, val_acc=0.8567, val_loss=0.10336085367071043
Epoch 4/15 - loss=0.0974, val_acc=0.8550, val_loss=0.10369154206696556
Epoch 5/15 - loss=0.0928, val_acc=0.8692, val_loss=0.09388641542241928
Epoch 6/15 - loss=0.0891, val_acc=0.8633, val_loss=0.09691810265368508
Epoch 7/15 - loss=0.0867, val_acc=0.8635, val_loss=0.10091406397319008
Epoch 8/15 - loss=0.0849, val_acc=0.8763, val_loss=0.09052125489096238
Epoch 9/15 - loss=0.0826, val_acc=0.8658, val_loss=0.09602094514999028
Epoch 10/15 - loss=0.0812, val_acc=0.8783, val_loss=0.08653326105908628
Epoch 11/15 - loss=0.0792, val_acc=0.8668, val_loss=0.09581575435407996
Epoch 12/15 - loss=0.0783, val_acc=0.8660, val_loss=0.09726290569530321
Epoch 13/15 - loss=0.0770, val_acc=0.8797, val_loss=0.08673747693474843
Epoch 14/15 - loss=0.0756, val_acc=0.8777, val_loss=0.08932270018879274
E

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▄▃▃▃▂▂▂▂▂▁▁▁▁▁
validation loss,█▅▄▄▂▃▄▂▃▁▃▃▁▂▂
validation_accuracy,▁▄▅▅▇▆▆█▆█▆▆██▇

0,1
epoch,15.0
test_accuracy,0.8618
training_loss,0.07445
validation loss,0.09193
validation_accuracy,0.87083


[34m[1mwandb[0m: Agent Starting Run: by9qcqnw with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.9780, val_acc=0.7860, val_loss=0.6512629818942361
Epoch 2/15 - loss=0.5636, val_acc=0.8148, val_loss=0.5228944074259625
Epoch 3/15 - loss=0.4808, val_acc=0.8333, val_loss=0.4719444224507964
Epoch 4/15 - loss=0.4430, val_acc=0.8395, val_loss=0.443731536751288
Epoch 5/15 - loss=0.4199, val_acc=0.8490, val_loss=0.4218333284333397
Epoch 6/15 - loss=0.4029, val_acc=0.8515, val_loss=0.409878987433738
Epoch 7/15 - loss=0.3908, val_acc=0.8545, val_loss=0.4020654994868725
Epoch 8/15 - loss=0.3806, val_acc=0.8597, val_loss=0.3903855355267599
Epoch 9/15 - loss=0.3718, val_acc=0.8575, val_loss=0.3875869973248034
Epoch 10/15 - loss=0.3644, val_acc=0.8625, val_loss=0.37707866790659794
Epoch 11/15 - loss=0.3577, val_acc=0.8635, val_loss=0.3731787426115567
Epoch 12/15 - loss=0.3517, val_acc=0.8625, val_loss=0.3777987545739664
Epoch 13/15 - loss=0.3468, val_acc=0.8688, val_loss=0.35954957352361405
Epoch 14/15 - loss=0.3416, val_acc=0.8677, val_loss=0.3586472484648075
Epoch 15/15 - l

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▃▃▂▂▂▂▁▁▁▁▁▁▁▁
validation loss,█▅▄▃▃▂▂▂▂▁▁▁▁▁▁
validation_accuracy,▁▃▅▆▆▇▇▇▇▇█▇███

0,1
epoch,15.0
test_accuracy,0.8609
training_loss,0.33727
validation loss,0.35906
validation_accuracy,0.86617


[34m[1mwandb[0m: Agent Starting Run: d1wt4tu0 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: sq_error
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.2508, val_acc=0.7885, val_loss=0.15991361816997302
Epoch 2/15 - loss=0.1409, val_acc=0.8227, val_loss=0.1339766734499667
Epoch 3/15 - loss=0.1235, val_acc=0.8362, val_loss=0.12235040903217013
Epoch 4/15 - loss=0.1152, val_acc=0.8407, val_loss=0.11602632680767191
Epoch 5/15 - loss=0.1101, val_acc=0.8485, val_loss=0.1111943736075314
Epoch 6/15 - loss=0.1061, val_acc=0.8497, val_loss=0.10889108678250492
Epoch 7/15 - loss=0.1030, val_acc=0.8548, val_loss=0.10599378831103562
Epoch 8/15 - loss=0.1006, val_acc=0.8590, val_loss=0.10350006496157217
Epoch 9/15 - loss=0.0983, val_acc=0.8617, val_loss=0.10252062173030667
Epoch 10/15 - loss=0.0964, val_acc=0.8615, val_loss=0.10153151535661716
Epoch 11/15 - loss=0.0947, val_acc=0.8625, val_loss=0.09942441967314822
Epoch 12/15 - loss=0.0929, val_acc=0.8647, val_loss=0.0992691619963896
Epoch 13/15 - loss=0.0916, val_acc=0.8675, val_loss=0.09794821775890449
Epoch 14/15 - loss=0.0903, val_acc=0.8682, val_loss=0.09633445285695712
Epoc

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▃▂▂▂▂▂▂▁▁▁▁▁▁▁
validation loss,█▅▄▃▃▂▂▂▂▂▁▁▁▁▁
validation_accuracy,▁▄▅▆▆▆▇▇▇▇█████

0,1
epoch,15.0
test_accuracy,0.8588
training_loss,0.08889
validation loss,0.09731
validation_accuracy,0.8645


[34m[1mwandb[0m: Agent Starting Run: jlvocnn4 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=1.1487, val_acc=0.7618, val_loss=0.7060827972598178
Epoch 2/15 - loss=0.6067, val_acc=0.8093, val_loss=0.5593056891504574
Epoch 3/15 - loss=0.5165, val_acc=0.8237, val_loss=0.5027129393573212
Epoch 4/15 - loss=0.4757, val_acc=0.8373, val_loss=0.47078306930415437
Epoch 5/15 - loss=0.4518, val_acc=0.8427, val_loss=0.44901364739761607
Epoch 6/15 - loss=0.4346, val_acc=0.8430, val_loss=0.4402083389557976
Epoch 7/15 - loss=0.4224, val_acc=0.8493, val_loss=0.43018341115921754
Epoch 8/15 - loss=0.4125, val_acc=0.8508, val_loss=0.419089080804916
Epoch 9/15 - loss=0.4034, val_acc=0.8497, val_loss=0.42172908662786857
Epoch 10/15 - loss=0.3961, val_acc=0.8592, val_loss=0.40600326465141573
Epoch 11/15 - loss=0.3892, val_acc=0.8543, val_loss=0.4025864385834172
Epoch 12/15 - loss=0.3826, val_acc=0.8568, val_loss=0.4000387158840789
Epoch 13/15 - loss=0.3770, val_acc=0.8575, val_loss=0.39027683618345355
Epoch 14/15 - loss=0.3717, val_acc=0.8620, val_loss=0.38640348967624033
Epoch 15/

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▃▂▂▂▂▁▁▁▁▁▁▁▁▁
validation loss,█▅▄▃▂▂▂▂▂▁▁▁▁▁▁
validation_accuracy,▁▄▅▆▇▇▇▇▇█▇████

0,1
epoch,15.0
test_accuracy,0.8522
training_loss,0.36679
validation loss,0.38584
validation_accuracy,0.86033


[34m[1mwandb[0m: Agent Starting Run: p78ej5r6 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.5847, val_acc=0.8423, val_loss=0.4223828706648879
Epoch 2/15 - loss=0.3954, val_acc=0.8545, val_loss=0.3912485599404234
Epoch 3/15 - loss=0.3605, val_acc=0.8673, val_loss=0.3556274217469379
Epoch 4/15 - loss=0.3403, val_acc=0.8665, val_loss=0.3574887546508234
Epoch 5/15 - loss=0.3259, val_acc=0.8678, val_loss=0.365927484966705
Epoch 6/15 - loss=0.3097, val_acc=0.8770, val_loss=0.32927511615321914
Epoch 7/15 - loss=0.2998, val_acc=0.8787, val_loss=0.32379152470487654
Epoch 8/15 - loss=0.2898, val_acc=0.8635, val_loss=0.3650505127536943
Epoch 9/15 - loss=0.2851, val_acc=0.8810, val_loss=0.31823920240703
Epoch 10/15 - loss=0.2764, val_acc=0.8787, val_loss=0.33518423769134087
Epoch 11/15 - loss=0.2713, val_acc=0.8768, val_loss=0.3297460155598525
Epoch 12/15 - loss=0.2614, val_acc=0.8833, val_loss=0.3264488395338764
Epoch 13/15 - loss=0.2580, val_acc=0.8865, val_loss=0.32119755011907447
Epoch 14/15 - loss=0.2532, val_acc=0.8832, val_loss=0.3204723575858921
Epoch 15/15 - 

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▄▃▃▃▂▂▂▂▂▂▁▁▁▁
validation loss,█▆▄▄▄▂▁▄▁▂▂▂▁▁▂
validation_accuracy,▁▃▅▅▅▆▇▄▇▇▆▇█▇▇

0,1
epoch,15.0
test_accuracy,0.8704
training_loss,0.24719
validation loss,0.33044
validation_accuracy,0.87833


[34m[1mwandb[0m: Agent Starting Run: bgmlsjri with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: sq_error
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.1397, val_acc=0.8355, val_loss=0.11350597836975147
Epoch 2/15 - loss=0.1057, val_acc=0.8588, val_loss=0.10112544991042209
Epoch 3/15 - loss=0.0971, val_acc=0.8688, val_loss=0.09539135940287359
Epoch 4/15 - loss=0.0914, val_acc=0.8623, val_loss=0.09890848436977485
Epoch 5/15 - loss=0.0877, val_acc=0.8713, val_loss=0.09307759306069037
Epoch 6/15 - loss=0.0842, val_acc=0.8645, val_loss=0.09620183190547767
Epoch 7/15 - loss=0.0824, val_acc=0.8698, val_loss=0.09265516953815112
Epoch 8/15 - loss=0.0803, val_acc=0.8785, val_loss=0.08747185659491241
Epoch 9/15 - loss=0.0784, val_acc=0.8743, val_loss=0.09149474544387273
Epoch 10/15 - loss=0.0767, val_acc=0.8785, val_loss=0.08797854065202694
Epoch 11/15 - loss=0.0754, val_acc=0.8757, val_loss=0.08909678934648581
Epoch 12/15 - loss=0.0739, val_acc=0.8755, val_loss=0.08976308206757432
Epoch 13/15 - loss=0.0721, val_acc=0.8727, val_loss=0.09255991391862062
Epoch 14/15 - loss=0.0712, val_acc=0.8808, val_loss=0.08545360661998516
E

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▅▄▃▃▂▂▂▂▂▂▁▁▁▁
validation loss,█▅▃▄▃▄▃▂▃▂▂▂▃▁▁
validation_accuracy,▁▅▆▅▇▅▆█▇█▇▇▇██

0,1
epoch,15.0
test_accuracy,0.8726
training_loss,0.07011
validation loss,0.08633
validation_accuracy,0.87917


[34m[1mwandb[0m: Agent Starting Run: h2si5yio with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=1.2334, val_acc=0.7492, val_loss=0.7368111650417875
Epoch 2/15 - loss=0.6287, val_acc=0.7963, val_loss=0.5803781453953825
Epoch 3/15 - loss=0.5271, val_acc=0.8235, val_loss=0.5092606480098787
Epoch 4/15 - loss=0.4817, val_acc=0.8337, val_loss=0.47846744425305465
Epoch 5/15 - loss=0.4564, val_acc=0.8372, val_loss=0.46122426343334855
Epoch 6/15 - loss=0.4385, val_acc=0.8445, val_loss=0.4409039608222512
Epoch 7/15 - loss=0.4247, val_acc=0.8482, val_loss=0.4321696479216189
Epoch 8/15 - loss=0.4141, val_acc=0.8517, val_loss=0.42028523153254854
Epoch 9/15 - loss=0.4036, val_acc=0.8565, val_loss=0.41579040835760556
Epoch 10/15 - loss=0.3956, val_acc=0.8588, val_loss=0.4069758499707156
Epoch 11/15 - loss=0.3878, val_acc=0.8587, val_loss=0.4006438894957089
Epoch 12/15 - loss=0.3804, val_acc=0.8578, val_loss=0.3998514244373557
Epoch 13/15 - loss=0.3753, val_acc=0.8627, val_loss=0.3883854847782135
Epoch 14/15 - loss=0.3702, val_acc=0.8628, val_loss=0.3837091570522375
Epoch 15/15

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▃▂▂▂▂▁▁▁▁▁▁▁▁▁
validation loss,█▅▃▃▃▂▂▂▂▁▁▁▁▁▁
validation_accuracy,▁▄▅▆▆▇▇▇▇██▇███

0,1
epoch,15.0
test_accuracy,0.8565
training_loss,0.36474
validation loss,0.38332
validation_accuracy,0.8665


[34m[1mwandb[0m: Agent Starting Run: lrvavj36 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=1.1953, val_acc=0.7537, val_loss=0.7829581861813705
Epoch 2/15 - loss=0.6502, val_acc=0.8077, val_loss=0.5747135449574344
Epoch 3/15 - loss=0.5188, val_acc=0.8282, val_loss=0.49650118575317015
Epoch 4/15 - loss=0.4641, val_acc=0.8343, val_loss=0.4604563297512127
Epoch 5/15 - loss=0.4342, val_acc=0.8423, val_loss=0.43731749213697185
Epoch 6/15 - loss=0.4128, val_acc=0.8512, val_loss=0.4172212941320218
Epoch 7/15 - loss=0.3976, val_acc=0.8540, val_loss=0.40700705793293057
Epoch 8/15 - loss=0.3860, val_acc=0.8550, val_loss=0.3959101901038994
Epoch 9/15 - loss=0.3759, val_acc=0.8573, val_loss=0.3859872244664137
Epoch 10/15 - loss=0.3671, val_acc=0.8605, val_loss=0.38504770047177456
Epoch 11/15 - loss=0.3602, val_acc=0.8647, val_loss=0.3760536411013232
Epoch 12/15 - loss=0.3527, val_acc=0.8628, val_loss=0.3764751127040352
Epoch 13/15 - loss=0.3479, val_acc=0.8660, val_loss=0.3620467727888231
Epoch 14/15 - loss=0.3424, val_acc=0.8667, val_loss=0.3621035389928984
Epoch 15/15

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▄▂▂▂▂▁▁▁▁▁▁▁▁▁
validation loss,█▅▃▃▂▂▂▂▁▁▁▁▁▁▁
validation_accuracy,▁▄▆▆▆▇▇▇▇▇█████

0,1
epoch,15.0
test_accuracy,0.8615
training_loss,0.33711
validation loss,0.36055
validation_accuracy,0.8695


[34m[1mwandb[0m: Agent Starting Run: 4o2wj912 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.9780, val_acc=0.7860, val_loss=0.6512629818942361
Epoch 2/15 - loss=0.5636, val_acc=0.8148, val_loss=0.5228944074259625
Epoch 3/15 - loss=0.4808, val_acc=0.8333, val_loss=0.4719444224507964
Epoch 4/15 - loss=0.4430, val_acc=0.8395, val_loss=0.443731536751288
Epoch 5/15 - loss=0.4199, val_acc=0.8490, val_loss=0.4218333284333397
Epoch 6/15 - loss=0.4029, val_acc=0.8515, val_loss=0.409878987433738
Epoch 7/15 - loss=0.3908, val_acc=0.8545, val_loss=0.4020654994868725
Epoch 8/15 - loss=0.3806, val_acc=0.8597, val_loss=0.3903855355267599
Epoch 9/15 - loss=0.3718, val_acc=0.8575, val_loss=0.3875869973248034
Epoch 10/15 - loss=0.3644, val_acc=0.8625, val_loss=0.37707866790659794
Epoch 11/15 - loss=0.3577, val_acc=0.8635, val_loss=0.3731787426115567
Epoch 12/15 - loss=0.3517, val_acc=0.8625, val_loss=0.3777987545739664
Epoch 13/15 - loss=0.3468, val_acc=0.8688, val_loss=0.35954957352361405
Epoch 14/15 - loss=0.3416, val_acc=0.8677, val_loss=0.3586472484648075
Epoch 15/15 - l

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▃▃▂▂▂▂▁▁▁▁▁▁▁▁
validation loss,█▅▄▃▃▂▂▂▂▁▁▁▁▁▁
validation_accuracy,▁▃▅▆▆▇▇▇▇▇█▇███

0,1
epoch,15.0
test_accuracy,0.8609
training_loss,0.33727
validation loss,0.35906
validation_accuracy,0.86617


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: lm7lscu2 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=1.2334, val_acc=0.7492, val_loss=0.7368111650417875
Epoch 2/15 - loss=0.6287, val_acc=0.7963, val_loss=0.5803781453953825
Epoch 3/15 - loss=0.5271, val_acc=0.8235, val_loss=0.5092606480098787
Epoch 4/15 - loss=0.4817, val_acc=0.8337, val_loss=0.47846744425305465
Epoch 5/15 - loss=0.4564, val_acc=0.8372, val_loss=0.46122426343334855
Epoch 6/15 - loss=0.4385, val_acc=0.8445, val_loss=0.4409039608222512
Epoch 7/15 - loss=0.4247, val_acc=0.8482, val_loss=0.4321696479216189
Epoch 8/15 - loss=0.4141, val_acc=0.8517, val_loss=0.42028523153254854
Epoch 9/15 - loss=0.4036, val_acc=0.8565, val_loss=0.41579040835760556
Epoch 10/15 - loss=0.3956, val_acc=0.8588, val_loss=0.4069758499707156
Epoch 11/15 - loss=0.3878, val_acc=0.8587, val_loss=0.4006438894957089
Epoch 12/15 - loss=0.3804, val_acc=0.8578, val_loss=0.3998514244373557
Epoch 13/15 - loss=0.3753, val_acc=0.8627, val_loss=0.3883854847782135
Epoch 14/15 - loss=0.3702, val_acc=0.8628, val_loss=0.3837091570522375
Epoch 15/15

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▃▂▂▂▂▁▁▁▁▁▁▁▁▁
validation loss,█▅▃▃▃▂▂▂▂▁▁▁▁▁▁
validation_accuracy,▁▄▅▆▆▇▇▇▇██▇███

0,1
epoch,15.0
test_accuracy,0.8565
training_loss,0.36474
validation loss,0.38332
validation_accuracy,0.8665


[34m[1mwandb[0m: Agent Starting Run: m0lcd92f with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.6270, val_acc=0.8425, val_loss=0.44806579568108995
Epoch 2/15 - loss=0.4148, val_acc=0.8480, val_loss=0.40956287006615216
Epoch 3/15 - loss=0.3790, val_acc=0.8595, val_loss=0.3849564114459321
Epoch 4/15 - loss=0.3550, val_acc=0.8663, val_loss=0.3724371378558453
Epoch 5/15 - loss=0.3403, val_acc=0.8695, val_loss=0.35556014969300476
Epoch 6/15 - loss=0.3229, val_acc=0.8707, val_loss=0.3478934113529482
Epoch 7/15 - loss=0.3146, val_acc=0.8768, val_loss=0.3295220304940766
Epoch 8/15 - loss=0.3054, val_acc=0.8653, val_loss=0.3652100624233602
Epoch 9/15 - loss=0.2982, val_acc=0.8775, val_loss=0.3312161494387455
Epoch 10/15 - loss=0.2902, val_acc=0.8688, val_loss=0.36596795707779933
Epoch 11/15 - loss=0.2842, val_acc=0.8718, val_loss=0.3462242708421703
Epoch 12/15 - loss=0.2771, val_acc=0.8800, val_loss=0.33932088558946594
Epoch 13/15 - loss=0.2720, val_acc=0.8750, val_loss=0.3352174624255721
Epoch 14/15 - loss=0.2690, val_acc=0.8800, val_loss=0.32494663082018665
Epoch 15/

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▄▃▃▂▂▂▂▂▁▁▁▁▁▁
validation loss,█▆▄▄▃▂▁▃▁▃▂▂▂▁▄
validation_accuracy,▁▂▄▅▆▆▇▅█▆▆█▇█▆

0,1
epoch,15.0
test_accuracy,0.863
training_loss,0.26503
validation loss,0.37061
validation_accuracy,0.87


[34m[1mwandb[0m: Agent Starting Run: 9jxjog02 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: sq_error
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.2674, val_acc=0.7638, val_loss=0.1743751192722
Epoch 2/15 - loss=0.1500, val_acc=0.8127, val_loss=0.14090028356791603
Epoch 3/15 - loss=0.1293, val_acc=0.8278, val_loss=0.12823391681726123
Epoch 4/15 - loss=0.1195, val_acc=0.8408, val_loss=0.11902486080913699
Epoch 5/15 - loss=0.1134, val_acc=0.8455, val_loss=0.11426869310717432
Epoch 6/15 - loss=0.1090, val_acc=0.8462, val_loss=0.11188272051937002
Epoch 7/15 - loss=0.1058, val_acc=0.8507, val_loss=0.11078548537210581
Epoch 8/15 - loss=0.1033, val_acc=0.8555, val_loss=0.10661031260267315
Epoch 9/15 - loss=0.1010, val_acc=0.8540, val_loss=0.10798276465733196
Epoch 10/15 - loss=0.0991, val_acc=0.8595, val_loss=0.10309541399735062
Epoch 11/15 - loss=0.0971, val_acc=0.8573, val_loss=0.10286096169843634
Epoch 12/15 - loss=0.0955, val_acc=0.8622, val_loss=0.10150745408797428
Epoch 13/15 - loss=0.0941, val_acc=0.8645, val_loss=0.09836587726005193
Epoch 14/15 - loss=0.0928, val_acc=0.8658, val_loss=0.09854203803645092
Epoch

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▃▃▂▂▂▂▁▁▁▁▁▁▁▁
validation loss,█▅▄▃▂▂▂▂▂▁▁▁▁▁▁
validation_accuracy,▁▄▅▆▇▇▇▇▇█▇███▇

0,1
epoch,15.0
test_accuracy,0.8513
training_loss,0.09155
validation loss,0.10188
validation_accuracy,0.85783


[34m[1mwandb[0m: Agent Starting Run: ufqyl3di with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=1.1487, val_acc=0.7618, val_loss=0.7060827972598178
Epoch 2/15 - loss=0.6067, val_acc=0.8093, val_loss=0.5593056891504574
Epoch 3/15 - loss=0.5165, val_acc=0.8237, val_loss=0.5027129393573212
Epoch 4/15 - loss=0.4757, val_acc=0.8373, val_loss=0.47078306930415437
Epoch 5/15 - loss=0.4518, val_acc=0.8427, val_loss=0.44901364739761607
Epoch 6/15 - loss=0.4346, val_acc=0.8430, val_loss=0.4402083389557976
Epoch 7/15 - loss=0.4224, val_acc=0.8493, val_loss=0.43018341115921754
Epoch 8/15 - loss=0.4125, val_acc=0.8508, val_loss=0.419089080804916
Epoch 9/15 - loss=0.4034, val_acc=0.8497, val_loss=0.42172908662786857
Epoch 10/15 - loss=0.3961, val_acc=0.8592, val_loss=0.40600326465141573
Epoch 11/15 - loss=0.3892, val_acc=0.8543, val_loss=0.4025864385834172
Epoch 12/15 - loss=0.3826, val_acc=0.8568, val_loss=0.4000387158840789
Epoch 13/15 - loss=0.3770, val_acc=0.8575, val_loss=0.39027683618345355
Epoch 14/15 - loss=0.3717, val_acc=0.8620, val_loss=0.38640348967624033
Epoch 15/

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▃▂▂▂▂▁▁▁▁▁▁▁▁▁
validation loss,█▅▄▃▂▂▂▂▂▁▁▁▁▁▁
validation_accuracy,▁▄▅▆▇▇▇▇▇█▇████

0,1
epoch,15.0
test_accuracy,0.8522
training_loss,0.36679
validation loss,0.38584
validation_accuracy,0.86033


[34m[1mwandb[0m: Agent Starting Run: 2nikeqqk with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: sq_error
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.1556, val_acc=0.8275, val_loss=0.12376927939821812
Epoch 2/15 - loss=0.1123, val_acc=0.8487, val_loss=0.10914155139324669
Epoch 3/15 - loss=0.1032, val_acc=0.8567, val_loss=0.10336085367071043
Epoch 4/15 - loss=0.0974, val_acc=0.8550, val_loss=0.10369154206696556
Epoch 5/15 - loss=0.0928, val_acc=0.8692, val_loss=0.09388641542241928
Epoch 6/15 - loss=0.0891, val_acc=0.8633, val_loss=0.09691810265368508
Epoch 7/15 - loss=0.0867, val_acc=0.8635, val_loss=0.10091406397319008
Epoch 8/15 - loss=0.0849, val_acc=0.8763, val_loss=0.09052125489096238
Epoch 9/15 - loss=0.0826, val_acc=0.8658, val_loss=0.09602094514999028
Epoch 10/15 - loss=0.0812, val_acc=0.8783, val_loss=0.08653326105908628
Epoch 11/15 - loss=0.0792, val_acc=0.8668, val_loss=0.09581575435407996
Epoch 12/15 - loss=0.0783, val_acc=0.8660, val_loss=0.09726290569530321
Epoch 13/15 - loss=0.0770, val_acc=0.8797, val_loss=0.08673747693474843
Epoch 14/15 - loss=0.0756, val_acc=0.8777, val_loss=0.08932270018879274
E

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▄▃▃▃▂▂▂▂▂▁▁▁▁▁
validation loss,█▅▄▄▂▃▄▂▃▁▃▃▁▂▂
validation_accuracy,▁▄▅▅▇▆▆█▆█▆▆██▇

0,1
epoch,15.0
test_accuracy,0.8618
training_loss,0.07445
validation loss,0.09193
validation_accuracy,0.87083


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 9bsrd3z6 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.6259, val_acc=0.8292, val_loss=0.46986892883264325
Epoch 2/15 - loss=0.4350, val_acc=0.8482, val_loss=0.40931900197957294
Epoch 3/15 - loss=0.3933, val_acc=0.8610, val_loss=0.3929496971886448
Epoch 4/15 - loss=0.3677, val_acc=0.8625, val_loss=0.38100447920381475
Epoch 5/15 - loss=0.3505, val_acc=0.8697, val_loss=0.34861474358761296
Epoch 6/15 - loss=0.3366, val_acc=0.8655, val_loss=0.3651900360971135
Epoch 7/15 - loss=0.3265, val_acc=0.8460, val_loss=0.44542384190344814
Epoch 8/15 - loss=0.3173, val_acc=0.8730, val_loss=0.34927960227979377
Epoch 9/15 - loss=0.3099, val_acc=0.8582, val_loss=0.38767435174390713
Epoch 10/15 - loss=0.3022, val_acc=0.8687, val_loss=0.3559346085114018
Epoch 11/15 - loss=0.2968, val_acc=0.8705, val_loss=0.3537828276951274
Epoch 12/15 - loss=0.2914, val_acc=0.8787, val_loss=0.33658602607240146
Epoch 13/15 - loss=0.2865, val_acc=0.8823, val_loss=0.32385177927827774
Epoch 14/15 - loss=0.2823, val_acc=0.8757, val_loss=0.3449483972933062
Epoch 

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▄▃▃▂▂▂▂▂▂▁▁▁▁▁
validation loss,█▅▄▄▂▃▇▂▄▃▂▂▁▂▃
validation_accuracy,▁▄▅▅▆▆▃▇▅▆▆██▇▆

0,1
epoch,15.0
test_accuracy,0.8634
training_loss,0.27677
validation loss,0.35515
validation_accuracy,0.87


[34m[1mwandb[0m: Agent Starting Run: l6ps2guo with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: sq_error
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.2557, val_acc=0.7805, val_loss=0.1707722580727658
Epoch 2/15 - loss=0.1444, val_acc=0.8188, val_loss=0.1329447690918598
Epoch 3/15 - loss=0.1219, val_acc=0.8345, val_loss=0.1195483380835641
Epoch 4/15 - loss=0.1125, val_acc=0.8422, val_loss=0.11316655820370497
Epoch 5/15 - loss=0.1070, val_acc=0.8470, val_loss=0.10848648750904173
Epoch 6/15 - loss=0.1027, val_acc=0.8533, val_loss=0.10580834427994996
Epoch 7/15 - loss=0.0995, val_acc=0.8598, val_loss=0.10260613500171341
Epoch 8/15 - loss=0.0971, val_acc=0.8607, val_loss=0.10063556806982776
Epoch 9/15 - loss=0.0948, val_acc=0.8617, val_loss=0.09884146377346889
Epoch 10/15 - loss=0.0930, val_acc=0.8623, val_loss=0.09955211952367332
Epoch 11/15 - loss=0.0915, val_acc=0.8637, val_loss=0.09715185716357451
Epoch 12/15 - loss=0.0897, val_acc=0.8650, val_loss=0.09731779821621647
Epoch 13/15 - loss=0.0885, val_acc=0.8693, val_loss=0.09411591335441452
Epoch 14/15 - loss=0.0873, val_acc=0.8658, val_loss=0.09497111199991268
Epoc

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▃▂▂▂▂▂▁▁▁▁▁▁▁▁
validation loss,█▅▃▃▂▂▂▂▁▁▁▁▁▁▁
validation_accuracy,▁▄▅▆▆▇▇▇▇▇█████

0,1
epoch,15.0
test_accuracy,0.8636
training_loss,0.08612
validation loss,0.09472
validation_accuracy,0.868


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: wb7ikdj3 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.5558, val_acc=0.8328, val_loss=0.43777155646277605
Epoch 2/15 - loss=0.4027, val_acc=0.8590, val_loss=0.3860200103252417
Epoch 3/15 - loss=0.3684, val_acc=0.8703, val_loss=0.36221018302277114
Epoch 4/15 - loss=0.3451, val_acc=0.8683, val_loss=0.36754871587464294
Epoch 5/15 - loss=0.3293, val_acc=0.8677, val_loss=0.35029306676145977
Epoch 6/15 - loss=0.3156, val_acc=0.8682, val_loss=0.35125727292328474
Epoch 7/15 - loss=0.3063, val_acc=0.8582, val_loss=0.38536835636902395
Epoch 8/15 - loss=0.2972, val_acc=0.8767, val_loss=0.3388813449871443
Epoch 9/15 - loss=0.2906, val_acc=0.8737, val_loss=0.3436672801769622
Epoch 10/15 - loss=0.2836, val_acc=0.8790, val_loss=0.32317606716566977
Epoch 11/15 - loss=0.2767, val_acc=0.8710, val_loss=0.33245739693126186
Epoch 12/15 - loss=0.2706, val_acc=0.8812, val_loss=0.33168192896185533
Epoch 13/15 - loss=0.2654, val_acc=0.8838, val_loss=0.31609606544582625
Epoch 14/15 - loss=0.2602, val_acc=0.8830, val_loss=0.3224808191262113
Epoch

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▄▄▃▃▂▂▂▂▂▁▁▁▁▁
validation loss,█▅▄▄▃▃▅▂▃▁▂▂▁▁▂
validation_accuracy,▁▅▆▆▆▆▄▇▇▇▆███▇

0,1
epoch,15.0
test_accuracy,0.8687
training_loss,0.25581
validation loss,0.32548
validation_accuracy,0.87767


[34m[1mwandb[0m: Agent Starting Run: 7p46k2bi with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.6259, val_acc=0.8292, val_loss=0.46986892883264325
Epoch 2/15 - loss=0.4350, val_acc=0.8482, val_loss=0.40931900197957294
Epoch 3/15 - loss=0.3933, val_acc=0.8610, val_loss=0.3929496971886448
Epoch 4/15 - loss=0.3677, val_acc=0.8625, val_loss=0.38100447920381475
Epoch 5/15 - loss=0.3505, val_acc=0.8697, val_loss=0.34861474358761296
Epoch 6/15 - loss=0.3366, val_acc=0.8655, val_loss=0.3651900360971135
Epoch 7/15 - loss=0.3265, val_acc=0.8460, val_loss=0.44542384190344814
Epoch 8/15 - loss=0.3173, val_acc=0.8730, val_loss=0.34927960227979377
Epoch 9/15 - loss=0.3099, val_acc=0.8582, val_loss=0.38767435174390713
Epoch 10/15 - loss=0.3022, val_acc=0.8687, val_loss=0.3559346085114018
Epoch 11/15 - loss=0.2968, val_acc=0.8705, val_loss=0.3537828276951274
Epoch 12/15 - loss=0.2914, val_acc=0.8787, val_loss=0.33658602607240146
Epoch 13/15 - loss=0.2865, val_acc=0.8823, val_loss=0.32385177927827774
Epoch 14/15 - loss=0.2823, val_acc=0.8757, val_loss=0.3449483972933062
Epoch 

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▄▃▃▂▂▂▂▂▂▁▁▁▁▁
validation loss,█▅▄▄▂▃▇▂▄▃▂▂▁▂▃
validation_accuracy,▁▄▅▅▆▆▃▇▅▆▆██▇▆

0,1
epoch,15.0
test_accuracy,0.8634
training_loss,0.27677
validation loss,0.35515
validation_accuracy,0.87


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: vy2clszf with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=1.1487, val_acc=0.7618, val_loss=0.7060827972598178
Epoch 2/15 - loss=0.6067, val_acc=0.8093, val_loss=0.5593056891504574
Epoch 3/15 - loss=0.5165, val_acc=0.8237, val_loss=0.5027129393573212
Epoch 4/15 - loss=0.4757, val_acc=0.8373, val_loss=0.47078306930415437
Epoch 5/15 - loss=0.4518, val_acc=0.8427, val_loss=0.44901364739761607
Epoch 6/15 - loss=0.4346, val_acc=0.8430, val_loss=0.4402083389557976
Epoch 7/15 - loss=0.4224, val_acc=0.8493, val_loss=0.43018341115921754
Epoch 8/15 - loss=0.4125, val_acc=0.8508, val_loss=0.419089080804916
Epoch 9/15 - loss=0.4034, val_acc=0.8497, val_loss=0.42172908662786857
Epoch 10/15 - loss=0.3961, val_acc=0.8592, val_loss=0.40600326465141573
Epoch 11/15 - loss=0.3892, val_acc=0.8543, val_loss=0.4025864385834172
Epoch 12/15 - loss=0.3826, val_acc=0.8568, val_loss=0.4000387158840789
Epoch 13/15 - loss=0.3770, val_acc=0.8575, val_loss=0.39027683618345355
Epoch 14/15 - loss=0.3717, val_acc=0.8620, val_loss=0.38640348967624033
Epoch 15/

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▃▂▂▂▂▁▁▁▁▁▁▁▁▁
validation loss,█▅▄▃▂▂▂▂▂▁▁▁▁▁▁
validation_accuracy,▁▄▅▆▇▇▇▇▇█▇████

0,1
epoch,15.0
test_accuracy,0.8522
training_loss,0.36679
validation loss,0.38584
validation_accuracy,0.86033


[34m[1mwandb[0m: Agent Starting Run: ywpqem2v with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: sq_error
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.1376, val_acc=0.8423, val_loss=0.11112433564752147
Epoch 2/15 - loss=0.1025, val_acc=0.8477, val_loss=0.10777411884002344
Epoch 3/15 - loss=0.0945, val_acc=0.8620, val_loss=0.09892814536193184
Epoch 4/15 - loss=0.0897, val_acc=0.8653, val_loss=0.09418991352338693
Epoch 5/15 - loss=0.0857, val_acc=0.8710, val_loss=0.09264558895498605
Epoch 6/15 - loss=0.0821, val_acc=0.8667, val_loss=0.09416271172475506
Epoch 7/15 - loss=0.0797, val_acc=0.8743, val_loss=0.08965335280493393
Epoch 8/15 - loss=0.0772, val_acc=0.8610, val_loss=0.09858342616893281
Epoch 9/15 - loss=0.0767, val_acc=0.8818, val_loss=0.08504300356954755
Epoch 10/15 - loss=0.0739, val_acc=0.8742, val_loss=0.08980353038586741
Epoch 11/15 - loss=0.0732, val_acc=0.8777, val_loss=0.08927301549656505
Epoch 12/15 - loss=0.0709, val_acc=0.8768, val_loss=0.08852451372980254
Epoch 13/15 - loss=0.0700, val_acc=0.8782, val_loss=0.08840949088910666
Epoch 14/15 - loss=0.0688, val_acc=0.8818, val_loss=0.08668242368013991
E

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▄▄▃▃▂▂▂▂▂▂▁▁▁▁
validation loss,█▇▅▃▃▃▂▅▁▂▂▂▂▁▃
validation_accuracy,▁▂▄▅▆▅▇▄█▇▇▇▇█▆

0,1
epoch,15.0
test_accuracy,0.868
training_loss,0.06824
validation loss,0.09068
validation_accuracy,0.87167


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: jegikfk7 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.9780, val_acc=0.7860, val_loss=0.6512629818942361
Epoch 2/15 - loss=0.5636, val_acc=0.8148, val_loss=0.5228944074259625
Epoch 3/15 - loss=0.4808, val_acc=0.8333, val_loss=0.4719444224507964
Epoch 4/15 - loss=0.4430, val_acc=0.8395, val_loss=0.443731536751288
Epoch 5/15 - loss=0.4199, val_acc=0.8490, val_loss=0.4218333284333397
Epoch 6/15 - loss=0.4029, val_acc=0.8515, val_loss=0.409878987433738
Epoch 7/15 - loss=0.3908, val_acc=0.8545, val_loss=0.4020654994868725
Epoch 8/15 - loss=0.3806, val_acc=0.8597, val_loss=0.3903855355267599
Epoch 9/15 - loss=0.3718, val_acc=0.8575, val_loss=0.3875869973248034
Epoch 10/15 - loss=0.3644, val_acc=0.8625, val_loss=0.37707866790659794
Epoch 11/15 - loss=0.3577, val_acc=0.8635, val_loss=0.3731787426115567
Epoch 12/15 - loss=0.3517, val_acc=0.8625, val_loss=0.3777987545739664
Epoch 13/15 - loss=0.3468, val_acc=0.8688, val_loss=0.35954957352361405
Epoch 14/15 - loss=0.3416, val_acc=0.8677, val_loss=0.3586472484648075
Epoch 15/15 - l

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▃▃▂▂▂▂▁▁▁▁▁▁▁▁
validation loss,█▅▄▃▃▂▂▂▂▁▁▁▁▁▁
validation_accuracy,▁▃▅▆▆▇▇▇▇▇█▇███

0,1
epoch,15.0
test_accuracy,0.8609
training_loss,0.33727
validation loss,0.35906
validation_accuracy,0.86617


[34m[1mwandb[0m: Agent Starting Run: rlu2jrd1 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=1.1487, val_acc=0.7618, val_loss=0.7060827972598178
Epoch 2/15 - loss=0.6067, val_acc=0.8093, val_loss=0.5593056891504574
Epoch 3/15 - loss=0.5165, val_acc=0.8237, val_loss=0.5027129393573212
Epoch 4/15 - loss=0.4757, val_acc=0.8373, val_loss=0.47078306930415437
Epoch 5/15 - loss=0.4518, val_acc=0.8427, val_loss=0.44901364739761607
Epoch 6/15 - loss=0.4346, val_acc=0.8430, val_loss=0.4402083389557976
Epoch 7/15 - loss=0.4224, val_acc=0.8493, val_loss=0.43018341115921754
Epoch 8/15 - loss=0.4125, val_acc=0.8508, val_loss=0.419089080804916
Epoch 9/15 - loss=0.4034, val_acc=0.8497, val_loss=0.42172908662786857
Epoch 10/15 - loss=0.3961, val_acc=0.8592, val_loss=0.40600326465141573
Epoch 11/15 - loss=0.3892, val_acc=0.8543, val_loss=0.4025864385834172
Epoch 12/15 - loss=0.3826, val_acc=0.8568, val_loss=0.4000387158840789
Epoch 13/15 - loss=0.3770, val_acc=0.8575, val_loss=0.39027683618345355
Epoch 14/15 - loss=0.3717, val_acc=0.8620, val_loss=0.38640348967624033
Epoch 15/

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▃▂▂▂▂▁▁▁▁▁▁▁▁▁
validation loss,█▅▄▃▂▂▂▂▂▁▁▁▁▁▁
validation_accuracy,▁▄▅▆▇▇▇▇▇█▇████

0,1
epoch,15.0
test_accuracy,0.8522
training_loss,0.36679
validation loss,0.38584
validation_accuracy,0.86033


[34m[1mwandb[0m: Agent Starting Run: mxsvevjp with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: sq_error
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.2614, val_acc=0.7657, val_loss=0.17869219264990102
Epoch 2/15 - loss=0.1497, val_acc=0.8140, val_loss=0.13685060676687874
Epoch 3/15 - loss=0.1248, val_acc=0.8290, val_loss=0.12301112224892029
Epoch 4/15 - loss=0.1150, val_acc=0.8393, val_loss=0.1153765425620314
Epoch 5/15 - loss=0.1092, val_acc=0.8470, val_loss=0.11045117646180808
Epoch 6/15 - loss=0.1049, val_acc=0.8488, val_loss=0.1082665319748341
Epoch 7/15 - loss=0.1018, val_acc=0.8545, val_loss=0.10485673066199987
Epoch 8/15 - loss=0.0992, val_acc=0.8575, val_loss=0.10324114404053948
Epoch 9/15 - loss=0.0970, val_acc=0.8565, val_loss=0.10281740151268254
Epoch 10/15 - loss=0.0951, val_acc=0.8617, val_loss=0.10006913099133749
Epoch 11/15 - loss=0.0933, val_acc=0.8622, val_loss=0.09911340969458003
Epoch 12/15 - loss=0.0919, val_acc=0.8595, val_loss=0.10030356166940334
Epoch 13/15 - loss=0.0906, val_acc=0.8662, val_loss=0.09551479782723016
Epoch 14/15 - loss=0.0893, val_acc=0.8657, val_loss=0.09630514068844791
Epo

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▃▂▂▂▂▂▁▁▁▁▁▁▁▁
validation loss,█▄▃▃▂▂▂▂▂▁▁▁▁▁▁
validation_accuracy,▁▄▅▆▇▇▇▇▇██████

0,1
epoch,15.0
test_accuracy,0.8592
training_loss,0.08831
validation loss,0.09697
validation_accuracy,0.86183


[34m[1mwandb[0m: Agent Starting Run: mdnriwgp with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.5989, val_acc=0.8410, val_loss=0.44994710875608734
Epoch 2/15 - loss=0.4256, val_acc=0.8517, val_loss=0.4013520539824901
Epoch 3/15 - loss=0.3864, val_acc=0.8647, val_loss=0.3748061871000329
Epoch 4/15 - loss=0.3594, val_acc=0.8658, val_loss=0.3695753243721706
Epoch 5/15 - loss=0.3424, val_acc=0.8728, val_loss=0.3456849637029308
Epoch 6/15 - loss=0.3283, val_acc=0.8603, val_loss=0.3710932057749727
Epoch 7/15 - loss=0.3205, val_acc=0.8775, val_loss=0.34597953077527593
Epoch 8/15 - loss=0.3099, val_acc=0.8770, val_loss=0.33969475408024874
Epoch 9/15 - loss=0.3030, val_acc=0.8668, val_loss=0.36464865130682816
Epoch 10/15 - loss=0.2953, val_acc=0.8728, val_loss=0.35835655412067724
Epoch 11/15 - loss=0.2894, val_acc=0.8750, val_loss=0.3539932443350205
Epoch 12/15 - loss=0.2848, val_acc=0.8788, val_loss=0.33219148297520196
Epoch 13/15 - loss=0.2810, val_acc=0.8843, val_loss=0.32731006684640773
Epoch 14/15 - loss=0.2765, val_acc=0.8788, val_loss=0.344105748849108
Epoch 15/

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▄▃▃▂▂▂▂▂▁▁▁▁▁▁
validation loss,█▅▄▃▂▃▂▂▃▃▃▁▁▂▃
validation_accuracy,▁▃▅▅▆▄▇▇▅▆▆▇█▇▆

0,1
epoch,15.0
test_accuracy,0.8643
training_loss,0.27383
validation loss,0.35796
validation_accuracy,0.8745


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 2o6m1xdd with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: sq_error
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.1546, val_acc=0.8385, val_loss=0.11807534667181187
Epoch 2/15 - loss=0.1089, val_acc=0.8507, val_loss=0.1079245415272775
Epoch 3/15 - loss=0.1008, val_acc=0.8613, val_loss=0.10000543939049349
Epoch 4/15 - loss=0.0952, val_acc=0.8652, val_loss=0.09625666071572679
Epoch 5/15 - loss=0.0923, val_acc=0.8683, val_loss=0.09380915621205271
Epoch 6/15 - loss=0.0880, val_acc=0.8592, val_loss=0.10111848930572352
Epoch 7/15 - loss=0.0857, val_acc=0.8758, val_loss=0.08908036578476146
Epoch 8/15 - loss=0.0847, val_acc=0.8598, val_loss=0.10085679632055909
Epoch 9/15 - loss=0.0831, val_acc=0.8683, val_loss=0.09525840257122066
Epoch 10/15 - loss=0.0808, val_acc=0.8637, val_loss=0.0966752390122895
Epoch 11/15 - loss=0.0801, val_acc=0.8692, val_loss=0.09290405982900182
Epoch 12/15 - loss=0.0777, val_acc=0.8675, val_loss=0.09691714929371673
Epoch 13/15 - loss=0.0772, val_acc=0.8732, val_loss=0.0920233822369103
Epoch 14/15 - loss=0.0760, val_acc=0.8767, val_loss=0.08734610662139132
Epoc

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▄▃▃▃▂▂▂▂▂▂▁▁▁▁
validation loss,█▆▄▃▂▄▁▄▃▃▂▃▂▁▃
validation_accuracy,▁▃▅▆▆▅█▅▆▆▇▆▇█▇

0,1
epoch,15.0
test_accuracy,0.8652
training_loss,0.07423
validation loss,0.09461
validation_accuracy,0.87083


[34m[1mwandb[0m: Agent Starting Run: t7u5g8dl with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.6094, val_acc=0.8387, val_loss=0.45171820913596056
Epoch 2/15 - loss=0.4182, val_acc=0.8477, val_loss=0.414238206275963
Epoch 3/15 - loss=0.3859, val_acc=0.8590, val_loss=0.3921560463062768
Epoch 4/15 - loss=0.3664, val_acc=0.8628, val_loss=0.367580587928539
Epoch 5/15 - loss=0.3493, val_acc=0.8627, val_loss=0.380974212908759
Epoch 6/15 - loss=0.3326, val_acc=0.8778, val_loss=0.33713473099885616
Epoch 7/15 - loss=0.3213, val_acc=0.8740, val_loss=0.3478365374809786
Epoch 8/15 - loss=0.3106, val_acc=0.8653, val_loss=0.36720863611454246
Epoch 9/15 - loss=0.3047, val_acc=0.8788, val_loss=0.3365835997772626
Epoch 10/15 - loss=0.2945, val_acc=0.8682, val_loss=0.36050397008235285
Epoch 11/15 - loss=0.2902, val_acc=0.8723, val_loss=0.3483397652325901
Epoch 12/15 - loss=0.2804, val_acc=0.8722, val_loss=0.35855933576778337
Epoch 13/15 - loss=0.2785, val_acc=0.8775, val_loss=0.33436116963169304
Epoch 14/15 - loss=0.2735, val_acc=0.8823, val_loss=0.3344395518285832
Epoch 15/15 

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▄▃▃▃▂▂▂▂▂▁▁▁▁▁
validation loss,█▆▄▃▄▁▂▃▁▃▂▂▁▁▂
validation_accuracy,▁▂▄▅▅▇▇▅▇▆▆▆▇█▇

0,1
epoch,15.0
test_accuracy,0.8656
training_loss,0.26661
validation loss,0.35018
validation_accuracy,0.87367


[34m[1mwandb[0m: Agent Starting Run: 6j11my0l with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.5847, val_acc=0.8423, val_loss=0.4223828706648879
Epoch 2/15 - loss=0.3954, val_acc=0.8545, val_loss=0.3912485599404234
Epoch 3/15 - loss=0.3605, val_acc=0.8673, val_loss=0.3556274217469379
Epoch 4/15 - loss=0.3403, val_acc=0.8665, val_loss=0.3574887546508234
Epoch 5/15 - loss=0.3259, val_acc=0.8678, val_loss=0.365927484966705
Epoch 6/15 - loss=0.3097, val_acc=0.8770, val_loss=0.32927511615321914
Epoch 7/15 - loss=0.2998, val_acc=0.8787, val_loss=0.32379152470487654
Epoch 8/15 - loss=0.2898, val_acc=0.8635, val_loss=0.3650505127536943
Epoch 9/15 - loss=0.2851, val_acc=0.8810, val_loss=0.31823920240703
Epoch 10/15 - loss=0.2764, val_acc=0.8787, val_loss=0.33518423769134087
Epoch 11/15 - loss=0.2713, val_acc=0.8768, val_loss=0.3297460155598525
Epoch 12/15 - loss=0.2614, val_acc=0.8833, val_loss=0.3264488395338764
Epoch 13/15 - loss=0.2580, val_acc=0.8865, val_loss=0.32119755011907447
Epoch 14/15 - loss=0.2532, val_acc=0.8832, val_loss=0.3204723575858921
Epoch 15/15 - 

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▄▃▃▃▂▂▂▂▂▂▁▁▁▁
validation loss,█▆▄▄▄▂▁▄▁▂▂▂▁▁▂
validation_accuracy,▁▃▅▅▅▆▇▄▇▇▆▇█▇▇

0,1
epoch,15.0
test_accuracy,0.8704
training_loss,0.24719
validation loss,0.33044
validation_accuracy,0.87833


[34m[1mwandb[0m: Agent Starting Run: e2ulb77h with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.6259, val_acc=0.8292, val_loss=0.46986892883264325
Epoch 2/15 - loss=0.4350, val_acc=0.8482, val_loss=0.40931900197957294
Epoch 3/15 - loss=0.3933, val_acc=0.8610, val_loss=0.3929496971886448
Epoch 4/15 - loss=0.3677, val_acc=0.8625, val_loss=0.38100447920381475
Epoch 5/15 - loss=0.3505, val_acc=0.8697, val_loss=0.34861474358761296
Epoch 6/15 - loss=0.3366, val_acc=0.8655, val_loss=0.3651900360971135
Epoch 7/15 - loss=0.3265, val_acc=0.8460, val_loss=0.44542384190344814
Epoch 8/15 - loss=0.3173, val_acc=0.8730, val_loss=0.34927960227979377
Epoch 9/15 - loss=0.3099, val_acc=0.8582, val_loss=0.38767435174390713
Epoch 10/15 - loss=0.3022, val_acc=0.8687, val_loss=0.3559346085114018
Epoch 11/15 - loss=0.2968, val_acc=0.8705, val_loss=0.3537828276951274
Epoch 12/15 - loss=0.2914, val_acc=0.8787, val_loss=0.33658602607240146
Epoch 13/15 - loss=0.2865, val_acc=0.8823, val_loss=0.32385177927827774
Epoch 14/15 - loss=0.2823, val_acc=0.8757, val_loss=0.3449483972933062
Epoch 

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▄▃▃▂▂▂▂▂▂▁▁▁▁▁
validation loss,█▅▄▄▂▃▇▂▄▃▂▂▁▂▃
validation_accuracy,▁▄▅▅▆▆▃▇▅▆▆██▇▆

0,1
epoch,15.0
test_accuracy,0.8634
training_loss,0.27677
validation loss,0.35515
validation_accuracy,0.87


[34m[1mwandb[0m: Agent Starting Run: kmio2i2l with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: sq_error
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.1546, val_acc=0.8385, val_loss=0.11807534667181187
Epoch 2/15 - loss=0.1089, val_acc=0.8507, val_loss=0.1079245415272775
Epoch 3/15 - loss=0.1008, val_acc=0.8613, val_loss=0.10000543939049349
Epoch 4/15 - loss=0.0952, val_acc=0.8652, val_loss=0.09625666071572679
Epoch 5/15 - loss=0.0923, val_acc=0.8683, val_loss=0.09380915621205271
Epoch 6/15 - loss=0.0880, val_acc=0.8592, val_loss=0.10111848930572352
Epoch 7/15 - loss=0.0857, val_acc=0.8758, val_loss=0.08908036578476146
Epoch 8/15 - loss=0.0847, val_acc=0.8598, val_loss=0.10085679632055909
Epoch 9/15 - loss=0.0831, val_acc=0.8683, val_loss=0.09525840257122066
Epoch 10/15 - loss=0.0808, val_acc=0.8637, val_loss=0.0966752390122895
Epoch 11/15 - loss=0.0801, val_acc=0.8692, val_loss=0.09290405982900182
Epoch 12/15 - loss=0.0777, val_acc=0.8675, val_loss=0.09691714929371673
Epoch 13/15 - loss=0.0772, val_acc=0.8732, val_loss=0.0920233822369103
Epoch 14/15 - loss=0.0760, val_acc=0.8767, val_loss=0.08734610662139132
Epoc

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▄▃▃▃▂▂▂▂▂▂▁▁▁▁
validation loss,█▆▄▃▂▄▁▄▃▃▂▃▂▁▃
validation_accuracy,▁▃▅▆▆▅█▅▆▆▇▆▇█▇

0,1
epoch,15.0
test_accuracy,0.8652
training_loss,0.07423
validation loss,0.09461
validation_accuracy,0.87083


[34m[1mwandb[0m: Agent Starting Run: o5kofxt4 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: sq_error
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.2701, val_acc=0.7712, val_loss=0.17233303914369377
Epoch 2/15 - loss=0.1489, val_acc=0.8120, val_loss=0.14023549319911544
Epoch 3/15 - loss=0.1283, val_acc=0.8308, val_loss=0.12563040785218538
Epoch 4/15 - loss=0.1190, val_acc=0.8403, val_loss=0.11926689887964766
Epoch 5/15 - loss=0.1133, val_acc=0.8457, val_loss=0.11464305398695039
Epoch 6/15 - loss=0.1091, val_acc=0.8485, val_loss=0.11144482326195737
Epoch 7/15 - loss=0.1059, val_acc=0.8540, val_loss=0.10865193871514116
Epoch 8/15 - loss=0.1035, val_acc=0.8557, val_loss=0.1063017702072951
Epoch 9/15 - loss=0.1012, val_acc=0.8563, val_loss=0.10551612705504892
Epoch 10/15 - loss=0.0994, val_acc=0.8577, val_loss=0.10394982501700006
Epoch 11/15 - loss=0.0977, val_acc=0.8590, val_loss=0.10234280924618876
Epoch 12/15 - loss=0.0959, val_acc=0.8615, val_loss=0.10259491782410007
Epoch 13/15 - loss=0.0947, val_acc=0.8613, val_loss=0.10039957074187579
Epoch 14/15 - loss=0.0934, val_acc=0.8668, val_loss=0.0988652795390645
Epo

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▃▂▂▂▂▂▁▁▁▁▁▁▁▁
validation loss,█▅▄▃▃▂▂▂▂▁▁▁▁▁▁
validation_accuracy,▁▄▅▆▆▇▇▇▇▇▇████

0,1
epoch,15.0
test_accuracy,0.8588
training_loss,0.09208
validation loss,0.09916
validation_accuracy,0.86083


[34m[1mwandb[0m: Agent Starting Run: pemuaruf with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=1.1684, val_acc=0.7595, val_loss=0.7656161216685117
Epoch 2/15 - loss=0.6343, val_acc=0.8075, val_loss=0.5643192722810766
Epoch 3/15 - loss=0.5117, val_acc=0.8272, val_loss=0.49474799202383346
Epoch 4/15 - loss=0.4622, val_acc=0.8352, val_loss=0.46027655629627634
Epoch 5/15 - loss=0.4350, val_acc=0.8437, val_loss=0.43663804996165595
Epoch 6/15 - loss=0.4160, val_acc=0.8482, val_loss=0.42320942443700355
Epoch 7/15 - loss=0.4023, val_acc=0.8508, val_loss=0.4110628621184563
Epoch 8/15 - loss=0.3908, val_acc=0.8537, val_loss=0.40271150484231427
Epoch 9/15 - loss=0.3813, val_acc=0.8533, val_loss=0.39744558979354605
Epoch 10/15 - loss=0.3733, val_acc=0.8605, val_loss=0.38701558632697586
Epoch 11/15 - loss=0.3661, val_acc=0.8618, val_loss=0.38161410936788304
Epoch 12/15 - loss=0.3597, val_acc=0.8585, val_loss=0.38717715258045654
Epoch 13/15 - loss=0.3542, val_acc=0.8657, val_loss=0.36754165830593416
Epoch 14/15 - loss=0.3486, val_acc=0.8627, val_loss=0.36683406405463936
Epoc

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▃▂▂▂▂▁▁▁▁▁▁▁▁▁
validation loss,█▄▃▃▂▂▂▂▂▁▁▁▁▁▁
validation_accuracy,▁▄▅▆▇▇▇▇▇██████

0,1
epoch,15.0
test_accuracy,0.8599
training_loss,0.34423
validation loss,0.36479
validation_accuracy,0.866


[34m[1mwandb[0m: Agent Starting Run: 2cgebc18 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: sq_error
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.2251, val_acc=0.7937, val_loss=0.15555477288051223
Epoch 2/15 - loss=0.1363, val_acc=0.8188, val_loss=0.12978856318332885
Epoch 3/15 - loss=0.1197, val_acc=0.8340, val_loss=0.11886597894815847
Epoch 4/15 - loss=0.1118, val_acc=0.8415, val_loss=0.11237039833777483
Epoch 5/15 - loss=0.1068, val_acc=0.8487, val_loss=0.10782254377191218
Epoch 6/15 - loss=0.1029, val_acc=0.8508, val_loss=0.10580812756301354
Epoch 7/15 - loss=0.1001, val_acc=0.8547, val_loss=0.10346746048261021
Epoch 8/15 - loss=0.0978, val_acc=0.8592, val_loss=0.10160616912260707
Epoch 9/15 - loss=0.0958, val_acc=0.8580, val_loss=0.10194214366832723
Epoch 10/15 - loss=0.0940, val_acc=0.8613, val_loss=0.09877766964297852
Epoch 11/15 - loss=0.0923, val_acc=0.8617, val_loss=0.09740027399928149
Epoch 12/15 - loss=0.0911, val_acc=0.8622, val_loss=0.09871866585730099
Epoch 13/15 - loss=0.0898, val_acc=0.8682, val_loss=0.09457333113366889
Epoch 14/15 - loss=0.0886, val_acc=0.8680, val_loss=0.09465305052316601
E

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▃▃▂▂▂▂▂▁▁▁▁▁▁▁
validation loss,█▅▄▃▃▂▂▂▂▁▁▁▁▁▁
validation_accuracy,▁▃▅▅▆▆▇▇▇▇▇▇███

0,1
epoch,15.0
test_accuracy,0.8613
training_loss,0.08752
validation loss,0.09534
validation_accuracy,0.86317


[34m[1mwandb[0m: Agent Starting Run: 37fl7817 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: sq_error
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.1556, val_acc=0.8275, val_loss=0.12376927939821812
Epoch 2/15 - loss=0.1123, val_acc=0.8487, val_loss=0.10914155139324669
Epoch 3/15 - loss=0.1032, val_acc=0.8567, val_loss=0.10336085367071043
Epoch 4/15 - loss=0.0974, val_acc=0.8550, val_loss=0.10369154206696556
Epoch 5/15 - loss=0.0928, val_acc=0.8692, val_loss=0.09388641542241928
Epoch 6/15 - loss=0.0891, val_acc=0.8633, val_loss=0.09691810265368508
Epoch 7/15 - loss=0.0867, val_acc=0.8635, val_loss=0.10091406397319008
Epoch 8/15 - loss=0.0849, val_acc=0.8763, val_loss=0.09052125489096238
Epoch 9/15 - loss=0.0826, val_acc=0.8658, val_loss=0.09602094514999028
Epoch 10/15 - loss=0.0812, val_acc=0.8783, val_loss=0.08653326105908628
Epoch 11/15 - loss=0.0792, val_acc=0.8668, val_loss=0.09581575435407996
Epoch 12/15 - loss=0.0783, val_acc=0.8660, val_loss=0.09726290569530321
Epoch 13/15 - loss=0.0770, val_acc=0.8797, val_loss=0.08673747693474843
Epoch 14/15 - loss=0.0756, val_acc=0.8777, val_loss=0.08932270018879274
E

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▄▃▃▃▂▂▂▂▂▁▁▁▁▁
validation loss,█▅▄▄▂▃▄▂▃▁▃▃▁▂▂
validation_accuracy,▁▄▅▅▇▆▆█▆█▆▆██▇

0,1
epoch,15.0
test_accuracy,0.8618
training_loss,0.07445
validation loss,0.09193
validation_accuracy,0.87083


[34m[1mwandb[0m: Agent Starting Run: orntsfn5 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.6094, val_acc=0.8387, val_loss=0.45171820913596056
Epoch 2/15 - loss=0.4182, val_acc=0.8477, val_loss=0.414238206275963
Epoch 3/15 - loss=0.3859, val_acc=0.8590, val_loss=0.3921560463062768
Epoch 4/15 - loss=0.3664, val_acc=0.8628, val_loss=0.367580587928539
Epoch 5/15 - loss=0.3493, val_acc=0.8627, val_loss=0.380974212908759
Epoch 6/15 - loss=0.3326, val_acc=0.8778, val_loss=0.33713473099885616
Epoch 7/15 - loss=0.3213, val_acc=0.8740, val_loss=0.3478365374809786
Epoch 8/15 - loss=0.3106, val_acc=0.8653, val_loss=0.36720863611454246
Epoch 9/15 - loss=0.3047, val_acc=0.8788, val_loss=0.3365835997772626
Epoch 10/15 - loss=0.2945, val_acc=0.8682, val_loss=0.36050397008235285
Epoch 11/15 - loss=0.2902, val_acc=0.8723, val_loss=0.3483397652325901
Epoch 12/15 - loss=0.2804, val_acc=0.8722, val_loss=0.35855933576778337
Epoch 13/15 - loss=0.2785, val_acc=0.8775, val_loss=0.33436116963169304
Epoch 14/15 - loss=0.2735, val_acc=0.8823, val_loss=0.3344395518285832
Epoch 15/15 

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▄▃▃▃▂▂▂▂▂▁▁▁▁▁
validation loss,█▆▄▃▄▁▂▃▁▃▂▂▁▁▂
validation_accuracy,▁▂▄▅▅▇▇▅▇▆▆▆▇█▇

0,1
epoch,15.0
test_accuracy,0.8656
training_loss,0.26661
validation loss,0.35018
validation_accuracy,0.87367


[34m[1mwandb[0m: Agent Starting Run: pasz737b with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: sq_error
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.2262, val_acc=0.7972, val_loss=0.15340633505537074
Epoch 2/15 - loss=0.1351, val_acc=0.8230, val_loss=0.12849873562126413
Epoch 3/15 - loss=0.1186, val_acc=0.8365, val_loss=0.11654167326939735
Epoch 4/15 - loss=0.1104, val_acc=0.8438, val_loss=0.1116252541829645
Epoch 5/15 - loss=0.1056, val_acc=0.8525, val_loss=0.10642589802703982
Epoch 6/15 - loss=0.1016, val_acc=0.8510, val_loss=0.10472057925021
Epoch 7/15 - loss=0.0987, val_acc=0.8575, val_loss=0.10131341121112283
Epoch 8/15 - loss=0.0963, val_acc=0.8600, val_loss=0.09995529280625355
Epoch 9/15 - loss=0.0943, val_acc=0.8628, val_loss=0.09797341864225681
Epoch 10/15 - loss=0.0926, val_acc=0.8635, val_loss=0.09862472186706352
Epoch 11/15 - loss=0.0912, val_acc=0.8643, val_loss=0.09677629532947156
Epoch 12/15 - loss=0.0895, val_acc=0.8667, val_loss=0.09631917740106112
Epoch 13/15 - loss=0.0884, val_acc=0.8690, val_loss=0.09333225153615081
Epoch 14/15 - loss=0.0872, val_acc=0.8700, val_loss=0.09374016410840073
Epoch

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▃▃▂▂▂▂▂▁▁▁▁▁▁▁
validation loss,█▅▄▃▃▂▂▂▂▂▁▁▁▁▁
validation_accuracy,▁▃▅▅▆▆▇▇▇▇▇████

0,1
epoch,15.0
test_accuracy,0.8624
training_loss,0.086
validation loss,0.0937
validation_accuracy,0.86867


[34m[1mwandb[0m: Agent Starting Run: eljjjktw with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=1.1953, val_acc=0.7537, val_loss=0.7829581861813705
Epoch 2/15 - loss=0.6502, val_acc=0.8077, val_loss=0.5747135449574344
Epoch 3/15 - loss=0.5188, val_acc=0.8282, val_loss=0.49650118575317015
Epoch 4/15 - loss=0.4641, val_acc=0.8343, val_loss=0.4604563297512127
Epoch 5/15 - loss=0.4342, val_acc=0.8423, val_loss=0.43731749213697185
Epoch 6/15 - loss=0.4128, val_acc=0.8512, val_loss=0.4172212941320218
Epoch 7/15 - loss=0.3976, val_acc=0.8540, val_loss=0.40700705793293057
Epoch 8/15 - loss=0.3860, val_acc=0.8550, val_loss=0.3959101901038994
Epoch 9/15 - loss=0.3759, val_acc=0.8573, val_loss=0.3859872244664137
Epoch 10/15 - loss=0.3671, val_acc=0.8605, val_loss=0.38504770047177456
Epoch 11/15 - loss=0.3602, val_acc=0.8647, val_loss=0.3760536411013232
Epoch 12/15 - loss=0.3527, val_acc=0.8628, val_loss=0.3764751127040352
Epoch 13/15 - loss=0.3479, val_acc=0.8660, val_loss=0.3620467727888231
Epoch 14/15 - loss=0.3424, val_acc=0.8667, val_loss=0.3621035389928984
Epoch 15/15

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▄▂▂▂▂▁▁▁▁▁▁▁▁▁
validation loss,█▅▃▃▂▂▂▂▁▁▁▁▁▁▁
validation_accuracy,▁▄▆▆▆▇▇▇▇▇█████

0,1
epoch,15.0
test_accuracy,0.8615
training_loss,0.33711
validation loss,0.36055
validation_accuracy,0.8695


[34m[1mwandb[0m: Agent Starting Run: 2mo2ovfr with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.6094, val_acc=0.8387, val_loss=0.45171820913596056
Epoch 2/15 - loss=0.4182, val_acc=0.8477, val_loss=0.414238206275963
Epoch 3/15 - loss=0.3859, val_acc=0.8590, val_loss=0.3921560463062768
Epoch 4/15 - loss=0.3664, val_acc=0.8628, val_loss=0.367580587928539
Epoch 5/15 - loss=0.3493, val_acc=0.8627, val_loss=0.380974212908759
Epoch 6/15 - loss=0.3326, val_acc=0.8778, val_loss=0.33713473099885616
Epoch 7/15 - loss=0.3213, val_acc=0.8740, val_loss=0.3478365374809786
Epoch 8/15 - loss=0.3106, val_acc=0.8653, val_loss=0.36720863611454246
Epoch 9/15 - loss=0.3047, val_acc=0.8788, val_loss=0.3365835997772626
Epoch 10/15 - loss=0.2945, val_acc=0.8682, val_loss=0.36050397008235285
Epoch 11/15 - loss=0.2902, val_acc=0.8723, val_loss=0.3483397652325901
Epoch 12/15 - loss=0.2804, val_acc=0.8722, val_loss=0.35855933576778337
Epoch 13/15 - loss=0.2785, val_acc=0.8775, val_loss=0.33436116963169304
Epoch 14/15 - loss=0.2735, val_acc=0.8823, val_loss=0.3344395518285832
Epoch 15/15 

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▄▃▃▃▂▂▂▂▂▁▁▁▁▁
validation loss,█▆▄▃▄▁▂▃▁▃▂▂▁▁▂
validation_accuracy,▁▂▄▅▅▇▇▅▇▆▆▆▇█▇

0,1
epoch,15.0
test_accuracy,0.8656
training_loss,0.26661
validation loss,0.35018
validation_accuracy,0.87367


[34m[1mwandb[0m: Agent Starting Run: npjxmbzq with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: sq_error
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.2557, val_acc=0.7805, val_loss=0.1707722580727658
Epoch 2/15 - loss=0.1444, val_acc=0.8188, val_loss=0.1329447690918598
Epoch 3/15 - loss=0.1219, val_acc=0.8345, val_loss=0.1195483380835641
Epoch 4/15 - loss=0.1125, val_acc=0.8422, val_loss=0.11316655820370497
Epoch 5/15 - loss=0.1070, val_acc=0.8470, val_loss=0.10848648750904173
Epoch 6/15 - loss=0.1027, val_acc=0.8533, val_loss=0.10580834427994996
Epoch 7/15 - loss=0.0995, val_acc=0.8598, val_loss=0.10260613500171341
Epoch 8/15 - loss=0.0971, val_acc=0.8607, val_loss=0.10063556806982776
Epoch 9/15 - loss=0.0948, val_acc=0.8617, val_loss=0.09884146377346889
Epoch 10/15 - loss=0.0930, val_acc=0.8623, val_loss=0.09955211952367332
Epoch 11/15 - loss=0.0915, val_acc=0.8637, val_loss=0.09715185716357451
Epoch 12/15 - loss=0.0897, val_acc=0.8650, val_loss=0.09731779821621647
Epoch 13/15 - loss=0.0885, val_acc=0.8693, val_loss=0.09411591335441452
Epoch 14/15 - loss=0.0873, val_acc=0.8658, val_loss=0.09497111199991268
Epoc

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▃▂▂▂▂▂▁▁▁▁▁▁▁▁
validation loss,█▅▃▃▂▂▂▂▁▁▁▁▁▁▁
validation_accuracy,▁▄▅▆▆▇▇▇▇▇█████

0,1
epoch,15.0
test_accuracy,0.8636
training_loss,0.08612
validation loss,0.09472
validation_accuracy,0.868


[34m[1mwandb[0m: Agent Starting Run: e5v06zxw with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: sq_error
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.2557, val_acc=0.7805, val_loss=0.1707722580727658
Epoch 2/15 - loss=0.1444, val_acc=0.8188, val_loss=0.1329447690918598
Epoch 3/15 - loss=0.1219, val_acc=0.8345, val_loss=0.1195483380835641
Epoch 4/15 - loss=0.1125, val_acc=0.8422, val_loss=0.11316655820370497
Epoch 5/15 - loss=0.1070, val_acc=0.8470, val_loss=0.10848648750904173
Epoch 6/15 - loss=0.1027, val_acc=0.8533, val_loss=0.10580834427994996
Epoch 7/15 - loss=0.0995, val_acc=0.8598, val_loss=0.10260613500171341
Epoch 8/15 - loss=0.0971, val_acc=0.8607, val_loss=0.10063556806982776
Epoch 9/15 - loss=0.0948, val_acc=0.8617, val_loss=0.09884146377346889
Epoch 10/15 - loss=0.0930, val_acc=0.8623, val_loss=0.09955211952367332
Epoch 11/15 - loss=0.0915, val_acc=0.8637, val_loss=0.09715185716357451
Epoch 12/15 - loss=0.0897, val_acc=0.8650, val_loss=0.09731779821621647
Epoch 13/15 - loss=0.0885, val_acc=0.8693, val_loss=0.09411591335441452
Epoch 14/15 - loss=0.0873, val_acc=0.8658, val_loss=0.09497111199991268
Epoc

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▃▂▂▂▂▂▁▁▁▁▁▁▁▁
validation loss,█▅▃▃▂▂▂▂▁▁▁▁▁▁▁
validation_accuracy,▁▄▅▆▆▇▇▇▇▇█████

0,1
epoch,15.0
test_accuracy,0.8636
training_loss,0.08612
validation loss,0.09472
validation_accuracy,0.868


[34m[1mwandb[0m: Agent Starting Run: amxon1g7 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: sq_error
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.2251, val_acc=0.7937, val_loss=0.15555477288051223
Epoch 2/15 - loss=0.1363, val_acc=0.8188, val_loss=0.12978856318332885
Epoch 3/15 - loss=0.1197, val_acc=0.8340, val_loss=0.11886597894815847
Epoch 4/15 - loss=0.1118, val_acc=0.8415, val_loss=0.11237039833777483
Epoch 5/15 - loss=0.1068, val_acc=0.8487, val_loss=0.10782254377191218
Epoch 6/15 - loss=0.1029, val_acc=0.8508, val_loss=0.10580812756301354
Epoch 7/15 - loss=0.1001, val_acc=0.8547, val_loss=0.10346746048261021
Epoch 8/15 - loss=0.0978, val_acc=0.8592, val_loss=0.10160616912260707
Epoch 9/15 - loss=0.0958, val_acc=0.8580, val_loss=0.10194214366832723
Epoch 10/15 - loss=0.0940, val_acc=0.8613, val_loss=0.09877766964297852
Epoch 11/15 - loss=0.0923, val_acc=0.8617, val_loss=0.09740027399928149
Epoch 12/15 - loss=0.0911, val_acc=0.8622, val_loss=0.09871866585730099
Epoch 13/15 - loss=0.0898, val_acc=0.8682, val_loss=0.09457333113366889
Epoch 14/15 - loss=0.0886, val_acc=0.8680, val_loss=0.09465305052316601
E

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▃▃▂▂▂▂▂▁▁▁▁▁▁▁
validation loss,█▅▄▃▃▂▂▂▂▁▁▁▁▁▁
validation_accuracy,▁▃▅▅▆▆▇▇▇▇▇▇███

0,1
epoch,15.0
test_accuracy,0.8613
training_loss,0.08752
validation loss,0.09534
validation_accuracy,0.86317


[34m[1mwandb[0m: Agent Starting Run: 4os1dq85 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=1.1953, val_acc=0.7537, val_loss=0.7829581861813705
Epoch 2/15 - loss=0.6502, val_acc=0.8077, val_loss=0.5747135449574344
Epoch 3/15 - loss=0.5188, val_acc=0.8282, val_loss=0.49650118575317015
Epoch 4/15 - loss=0.4641, val_acc=0.8343, val_loss=0.4604563297512127
Epoch 5/15 - loss=0.4342, val_acc=0.8423, val_loss=0.43731749213697185
Epoch 6/15 - loss=0.4128, val_acc=0.8512, val_loss=0.4172212941320218
Epoch 7/15 - loss=0.3976, val_acc=0.8540, val_loss=0.40700705793293057
Epoch 8/15 - loss=0.3860, val_acc=0.8550, val_loss=0.3959101901038994
Epoch 9/15 - loss=0.3759, val_acc=0.8573, val_loss=0.3859872244664137
Epoch 10/15 - loss=0.3671, val_acc=0.8605, val_loss=0.38504770047177456
Epoch 11/15 - loss=0.3602, val_acc=0.8647, val_loss=0.3760536411013232
Epoch 12/15 - loss=0.3527, val_acc=0.8628, val_loss=0.3764751127040352
Epoch 13/15 - loss=0.3479, val_acc=0.8660, val_loss=0.3620467727888231
Epoch 14/15 - loss=0.3424, val_acc=0.8667, val_loss=0.3621035389928984
Epoch 15/15

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▄▂▂▂▂▁▁▁▁▁▁▁▁▁
validation loss,█▅▃▃▂▂▂▂▁▁▁▁▁▁▁
validation_accuracy,▁▄▆▆▆▇▇▇▇▇█████

0,1
epoch,15.0
test_accuracy,0.8615
training_loss,0.33711
validation loss,0.36055
validation_accuracy,0.8695


[34m[1mwandb[0m: Agent Starting Run: ixq2rlgt with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.5847, val_acc=0.8423, val_loss=0.4223828706648879
Epoch 2/15 - loss=0.3954, val_acc=0.8545, val_loss=0.3912485599404234
Epoch 3/15 - loss=0.3605, val_acc=0.8673, val_loss=0.3556274217469379
Epoch 4/15 - loss=0.3403, val_acc=0.8665, val_loss=0.3574887546508234
Epoch 5/15 - loss=0.3259, val_acc=0.8678, val_loss=0.365927484966705
Epoch 6/15 - loss=0.3097, val_acc=0.8770, val_loss=0.32927511615321914
Epoch 7/15 - loss=0.2998, val_acc=0.8787, val_loss=0.32379152470487654
Epoch 8/15 - loss=0.2898, val_acc=0.8635, val_loss=0.3650505127536943
Epoch 9/15 - loss=0.2851, val_acc=0.8810, val_loss=0.31823920240703
Epoch 10/15 - loss=0.2764, val_acc=0.8787, val_loss=0.33518423769134087
Epoch 11/15 - loss=0.2713, val_acc=0.8768, val_loss=0.3297460155598525
Epoch 12/15 - loss=0.2614, val_acc=0.8833, val_loss=0.3264488395338764
Epoch 13/15 - loss=0.2580, val_acc=0.8865, val_loss=0.32119755011907447
Epoch 14/15 - loss=0.2532, val_acc=0.8832, val_loss=0.3204723575858921
Epoch 15/15 - 

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▄▃▃▃▂▂▂▂▂▂▁▁▁▁
validation loss,█▆▄▄▄▂▁▄▁▂▂▂▁▁▂
validation_accuracy,▁▃▅▅▅▆▇▄▇▇▆▇█▇▇

0,1
epoch,15.0
test_accuracy,0.8704
training_loss,0.24719
validation loss,0.33044
validation_accuracy,0.87833


[34m[1mwandb[0m: Agent Starting Run: ms10tf33 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.6270, val_acc=0.8425, val_loss=0.44806579568108995
Epoch 2/15 - loss=0.4148, val_acc=0.8480, val_loss=0.40956287006615216
Epoch 3/15 - loss=0.3790, val_acc=0.8595, val_loss=0.3849564114459321
Epoch 4/15 - loss=0.3550, val_acc=0.8663, val_loss=0.3724371378558453
Epoch 5/15 - loss=0.3403, val_acc=0.8695, val_loss=0.35556014969300476
Epoch 6/15 - loss=0.3229, val_acc=0.8707, val_loss=0.3478934113529482
Epoch 7/15 - loss=0.3146, val_acc=0.8768, val_loss=0.3295220304940766
Epoch 8/15 - loss=0.3054, val_acc=0.8653, val_loss=0.3652100624233602
Epoch 9/15 - loss=0.2982, val_acc=0.8775, val_loss=0.3312161494387455
Epoch 10/15 - loss=0.2902, val_acc=0.8688, val_loss=0.36596795707779933
Epoch 11/15 - loss=0.2842, val_acc=0.8718, val_loss=0.3462242708421703
Epoch 12/15 - loss=0.2771, val_acc=0.8800, val_loss=0.33932088558946594
Epoch 13/15 - loss=0.2720, val_acc=0.8750, val_loss=0.3352174624255721
Epoch 14/15 - loss=0.2690, val_acc=0.8800, val_loss=0.32494663082018665
Epoch 15/

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▄▃▃▂▂▂▂▂▁▁▁▁▁▁
validation loss,█▆▄▄▃▂▁▃▁▃▂▂▂▁▄
validation_accuracy,▁▂▄▅▆▆▇▅█▆▆█▇█▆

0,1
epoch,15.0
test_accuracy,0.863
training_loss,0.26503
validation loss,0.37061
validation_accuracy,0.87


[34m[1mwandb[0m: Agent Starting Run: 5he7sckz with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: sq_error
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.1397, val_acc=0.8355, val_loss=0.11350597836975147
Epoch 2/15 - loss=0.1057, val_acc=0.8588, val_loss=0.10112544991042209
Epoch 3/15 - loss=0.0971, val_acc=0.8688, val_loss=0.09539135940287359
Epoch 4/15 - loss=0.0914, val_acc=0.8623, val_loss=0.09890848436977485
Epoch 5/15 - loss=0.0877, val_acc=0.8713, val_loss=0.09307759306069037
Epoch 6/15 - loss=0.0842, val_acc=0.8645, val_loss=0.09620183190547767
Epoch 7/15 - loss=0.0824, val_acc=0.8698, val_loss=0.09265516953815112
Epoch 8/15 - loss=0.0803, val_acc=0.8785, val_loss=0.08747185659491241
Epoch 9/15 - loss=0.0784, val_acc=0.8743, val_loss=0.09149474544387273
Epoch 10/15 - loss=0.0767, val_acc=0.8785, val_loss=0.08797854065202694
Epoch 11/15 - loss=0.0754, val_acc=0.8757, val_loss=0.08909678934648581
Epoch 12/15 - loss=0.0739, val_acc=0.8755, val_loss=0.08976308206757432
Epoch 13/15 - loss=0.0721, val_acc=0.8727, val_loss=0.09255991391862062
Epoch 14/15 - loss=0.0712, val_acc=0.8808, val_loss=0.08545360661998516
E

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▅▄▃▃▂▂▂▂▂▂▁▁▁▁
validation loss,█▅▃▄▃▄▃▂▃▂▂▂▃▁▁
validation_accuracy,▁▅▆▅▇▅▆█▇█▇▇▇██

0,1
epoch,15.0
test_accuracy,0.8726
training_loss,0.07011
validation loss,0.08633
validation_accuracy,0.87917


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 6qsqptnp with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: sq_error
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.1546, val_acc=0.8385, val_loss=0.11807534667181187
Epoch 2/15 - loss=0.1089, val_acc=0.8507, val_loss=0.1079245415272775
Epoch 3/15 - loss=0.1008, val_acc=0.8613, val_loss=0.10000543939049349
Epoch 4/15 - loss=0.0952, val_acc=0.8652, val_loss=0.09625666071572679
Epoch 5/15 - loss=0.0923, val_acc=0.8683, val_loss=0.09380915621205271
Epoch 6/15 - loss=0.0880, val_acc=0.8592, val_loss=0.10111848930572352
Epoch 7/15 - loss=0.0857, val_acc=0.8758, val_loss=0.08908036578476146
Epoch 8/15 - loss=0.0847, val_acc=0.8598, val_loss=0.10085679632055909
Epoch 9/15 - loss=0.0831, val_acc=0.8683, val_loss=0.09525840257122066
Epoch 10/15 - loss=0.0808, val_acc=0.8637, val_loss=0.0966752390122895
Epoch 11/15 - loss=0.0801, val_acc=0.8692, val_loss=0.09290405982900182
Epoch 12/15 - loss=0.0777, val_acc=0.8675, val_loss=0.09691714929371673
Epoch 13/15 - loss=0.0772, val_acc=0.8732, val_loss=0.0920233822369103
Epoch 14/15 - loss=0.0760, val_acc=0.8767, val_loss=0.08734610662139132
Epoc

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▄▃▃▃▂▂▂▂▂▂▁▁▁▁
validation loss,█▆▄▃▂▄▁▄▃▃▂▃▂▁▃
validation_accuracy,▁▃▅▆▆▅█▅▆▆▇▆▇█▇

0,1
epoch,15.0
test_accuracy,0.8652
training_loss,0.07423
validation loss,0.09461
validation_accuracy,0.87083


[34m[1mwandb[0m: Agent Starting Run: 6p2owmbz with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: sq_error
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.2508, val_acc=0.7885, val_loss=0.15991361816997302
Epoch 2/15 - loss=0.1409, val_acc=0.8227, val_loss=0.1339766734499667
Epoch 3/15 - loss=0.1235, val_acc=0.8362, val_loss=0.12235040903217013
Epoch 4/15 - loss=0.1152, val_acc=0.8407, val_loss=0.11602632680767191
Epoch 5/15 - loss=0.1101, val_acc=0.8485, val_loss=0.1111943736075314
Epoch 6/15 - loss=0.1061, val_acc=0.8497, val_loss=0.10889108678250492
Epoch 7/15 - loss=0.1030, val_acc=0.8548, val_loss=0.10599378831103562
Epoch 8/15 - loss=0.1006, val_acc=0.8590, val_loss=0.10350006496157217
Epoch 9/15 - loss=0.0983, val_acc=0.8617, val_loss=0.10252062173030667
Epoch 10/15 - loss=0.0964, val_acc=0.8615, val_loss=0.10153151535661716
Epoch 11/15 - loss=0.0947, val_acc=0.8625, val_loss=0.09942441967314822
Epoch 12/15 - loss=0.0929, val_acc=0.8647, val_loss=0.0992691619963896
Epoch 13/15 - loss=0.0916, val_acc=0.8675, val_loss=0.09794821775890449
Epoch 14/15 - loss=0.0903, val_acc=0.8682, val_loss=0.09633445285695712
Epoc

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▃▂▂▂▂▂▂▁▁▁▁▁▁▁
validation loss,█▅▄▃▃▂▂▂▂▂▁▁▁▁▁
validation_accuracy,▁▄▅▆▆▆▇▇▇▇█████

0,1
epoch,15.0
test_accuracy,0.8588
training_loss,0.08889
validation loss,0.09731
validation_accuracy,0.8645


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: mqe1hba5 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: sq_error
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.1376, val_acc=0.8423, val_loss=0.11112433564752147
Epoch 2/15 - loss=0.1025, val_acc=0.8477, val_loss=0.10777411884002344
Epoch 3/15 - loss=0.0945, val_acc=0.8620, val_loss=0.09892814536193184
Epoch 4/15 - loss=0.0897, val_acc=0.8653, val_loss=0.09418991352338693
Epoch 5/15 - loss=0.0857, val_acc=0.8710, val_loss=0.09264558895498605
Epoch 6/15 - loss=0.0821, val_acc=0.8667, val_loss=0.09416271172475506
Epoch 7/15 - loss=0.0797, val_acc=0.8743, val_loss=0.08965335280493393
Epoch 8/15 - loss=0.0772, val_acc=0.8610, val_loss=0.09858342616893281
Epoch 9/15 - loss=0.0767, val_acc=0.8818, val_loss=0.08504300356954755
Epoch 10/15 - loss=0.0739, val_acc=0.8742, val_loss=0.08980353038586741
Epoch 11/15 - loss=0.0732, val_acc=0.8777, val_loss=0.08927301549656505
Epoch 12/15 - loss=0.0709, val_acc=0.8768, val_loss=0.08852451372980254
Epoch 13/15 - loss=0.0700, val_acc=0.8782, val_loss=0.08840949088910666
Epoch 14/15 - loss=0.0688, val_acc=0.8818, val_loss=0.08668242368013991
E

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▄▄▃▃▂▂▂▂▂▂▁▁▁▁
validation loss,█▇▅▃▃▃▂▅▁▂▂▂▂▁▃
validation_accuracy,▁▂▄▅▆▅▇▄█▇▇▇▇█▆

0,1
epoch,15.0
test_accuracy,0.868
training_loss,0.06824
validation loss,0.09068
validation_accuracy,0.87167


[34m[1mwandb[0m: Agent Starting Run: 7m8n7cnh with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: sq_error
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.1397, val_acc=0.8355, val_loss=0.11350597836975147
Epoch 2/15 - loss=0.1057, val_acc=0.8588, val_loss=0.10112544991042209
Epoch 3/15 - loss=0.0971, val_acc=0.8688, val_loss=0.09539135940287359
Epoch 4/15 - loss=0.0914, val_acc=0.8623, val_loss=0.09890848436977485
Epoch 5/15 - loss=0.0877, val_acc=0.8713, val_loss=0.09307759306069037
Epoch 6/15 - loss=0.0842, val_acc=0.8645, val_loss=0.09620183190547767
Epoch 7/15 - loss=0.0824, val_acc=0.8698, val_loss=0.09265516953815112
Epoch 8/15 - loss=0.0803, val_acc=0.8785, val_loss=0.08747185659491241
Epoch 9/15 - loss=0.0784, val_acc=0.8743, val_loss=0.09149474544387273
Epoch 10/15 - loss=0.0767, val_acc=0.8785, val_loss=0.08797854065202694
Epoch 11/15 - loss=0.0754, val_acc=0.8757, val_loss=0.08909678934648581
Epoch 12/15 - loss=0.0739, val_acc=0.8755, val_loss=0.08976308206757432
Epoch 13/15 - loss=0.0721, val_acc=0.8727, val_loss=0.09255991391862062
Epoch 14/15 - loss=0.0712, val_acc=0.8808, val_loss=0.08545360661998516
E

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▅▄▃▃▂▂▂▂▂▂▁▁▁▁
validation loss,█▅▃▄▃▄▃▂▃▂▂▂▃▁▁
validation_accuracy,▁▅▆▅▇▅▆█▇█▇▇▇██

0,1
epoch,15.0
test_accuracy,0.8726
training_loss,0.07011
validation loss,0.08633
validation_accuracy,0.87917


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: ndwwl9x7 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.6259, val_acc=0.8292, val_loss=0.46986892883264325
Epoch 2/15 - loss=0.4350, val_acc=0.8482, val_loss=0.40931900197957294
Epoch 3/15 - loss=0.3933, val_acc=0.8610, val_loss=0.3929496971886448
Epoch 4/15 - loss=0.3677, val_acc=0.8625, val_loss=0.38100447920381475
Epoch 5/15 - loss=0.3505, val_acc=0.8697, val_loss=0.34861474358761296
Epoch 6/15 - loss=0.3366, val_acc=0.8655, val_loss=0.3651900360971135
Epoch 7/15 - loss=0.3265, val_acc=0.8460, val_loss=0.44542384190344814
Epoch 8/15 - loss=0.3173, val_acc=0.8730, val_loss=0.34927960227979377
Epoch 9/15 - loss=0.3099, val_acc=0.8582, val_loss=0.38767435174390713
Epoch 10/15 - loss=0.3022, val_acc=0.8687, val_loss=0.3559346085114018
Epoch 11/15 - loss=0.2968, val_acc=0.8705, val_loss=0.3537828276951274
Epoch 12/15 - loss=0.2914, val_acc=0.8787, val_loss=0.33658602607240146
Epoch 13/15 - loss=0.2865, val_acc=0.8823, val_loss=0.32385177927827774
Epoch 14/15 - loss=0.2823, val_acc=0.8757, val_loss=0.3449483972933062
Epoch 

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▄▃▃▂▂▂▂▂▂▁▁▁▁▁
validation loss,█▅▄▄▂▃▇▂▄▃▂▂▁▂▃
validation_accuracy,▁▄▅▅▆▆▃▇▅▆▆██▇▆

0,1
epoch,15.0
test_accuracy,0.8634
training_loss,0.27677
validation loss,0.35515
validation_accuracy,0.87


[34m[1mwandb[0m: Agent Starting Run: m08trzzk with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: sq_error
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/15 - loss=0.1376, val_acc=0.8423, val_loss=0.11112433564752147
Epoch 2/15 - loss=0.1025, val_acc=0.8477, val_loss=0.10777411884002344
Epoch 3/15 - loss=0.0945, val_acc=0.8620, val_loss=0.09892814536193184
Epoch 4/15 - loss=0.0897, val_acc=0.8653, val_loss=0.09418991352338693
Epoch 5/15 - loss=0.0857, val_acc=0.8710, val_loss=0.09264558895498605
Epoch 6/15 - loss=0.0821, val_acc=0.8667, val_loss=0.09416271172475506
Epoch 7/15 - loss=0.0797, val_acc=0.8743, val_loss=0.08965335280493393
Epoch 8/15 - loss=0.0772, val_acc=0.8610, val_loss=0.09858342616893281
Epoch 9/15 - loss=0.0767, val_acc=0.8818, val_loss=0.08504300356954755
Epoch 10/15 - loss=0.0739, val_acc=0.8742, val_loss=0.08980353038586741
Epoch 11/15 - loss=0.0732, val_acc=0.8777, val_loss=0.08927301549656505
Epoch 12/15 - loss=0.0709, val_acc=0.8768, val_loss=0.08852451372980254
Epoch 13/15 - loss=0.0700, val_acc=0.8782, val_loss=0.08840949088910666
Epoch 14/15 - loss=0.0688, val_acc=0.8818, val_loss=0.08668242368013991
E

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
training_loss,█▄▄▃▃▂▂▂▂▂▂▁▁▁▁
validation loss,█▇▅▃▃▃▂▅▁▂▂▂▂▁▃
validation_accuracy,▁▂▄▅▆▅▇▄█▇▇▇▇█▆

0,1
epoch,15.0
test_accuracy,0.868
training_loss,0.06824
validation loss,0.09068
validation_accuracy,0.87167


[34m[1mwandb[0m: Ctrl + C detected. Stopping sweep.


In [4]:
import numpy as np
import wandb
from keras.datasets import fashion_mnist


# Neural Network Class: feed_forward_NN_4

class feed_forward_NN_8b:
    def __init__(self,
                 layers,
                 optimizer,
                 learning_rate,
                 momentum,
                 beta1,
                 beta2,
                 beta,
                 epsilon,
                 weight_decay,
                 init_type,
                 activation,
                 loss_func
                 ):
    
        
        self.layers = layers
        self.layer_n = len(layers)
        self.optimizer = optimizer.lower()
        self.lr = learning_rate
        self.momentum = momentum
        self.beta1 = beta1
        self.beta2 = beta2
        self.beta= beta
        self.epsilon = epsilon
        self.weight_decay = weight_decay
        self.init_type = init_type.lower()
        self.activation = activation.lower()
        self.loss_func=loss_func.lower()

        # Initialize Weights & BiaseS
        self.weights = []
        self.biases = []
        for i in range(self.layer_n - 1):
            if self.init_type == "xavier":
                # "Xavier" initialization
                w = np.random.randn(layers[i], layers[i+1]) * np.sqrt(1.0 / layers[i])
            else:
                # "random" initialization
                w = np.random.randn(layers[i], layers[i+1]) * np.sqrt(2.0 / layers[i])
            b = np.zeros((1, layers[i+1]))
            self.weights.append(w)
            self.biases.append(b)

        # initialize extra Params 
        if self.optimizer in ["momentum", "nesterov", "rmsprop", "adam", "nadam"]:
            self.v_w = [np.zeros_like(w) for w in self.weights]
            self.v_b = [np.zeros_like(b) for b in self.biases]
        if self.optimizer in ["adam", "nadam"]:
            self.m_w = [np.zeros_like(w) for w in self.weights]
            self.m_b = [np.zeros_like(b) for b in self.biases]
            self.t = 0

    # activations 
    def sigmoid(self, x):
        return 1 / (1 + np.exp(-np.clip(x, -500, 500)))
    
    def tanh(self, x):
        return np.tanh(x)
    
    def relu(self, x):
        return np.maximum(0, x)

    def activate(self, x):
        if self.activation == "sigmoid":
            return self.sigmoid(x)
        elif self.activation == "tanh":
            return self.tanh(x)
        elif self.activation == "relu":
            return self.relu(x)
        else:
            return self.sigmoid(x) 
        
    # derivatives
    def derivative(self, a):

        if self.activation == "sigmoid":
            return a * (1 - a)
        elif self.activation == "tanh":
            return 1 - a**2
        elif self.activation == "relu":
            return (a > 0).astype(float)
        else:
            return a * (1 - a) 

    def softmax(self, x):
        exp_x = np.exp(x - np.max(x, axis=1, keepdims=True))
        return exp_x / np.sum(exp_x, axis=1, keepdims=True)

    # Forward Pass
    def forward_pass(self, x):
        self.h = [x]  
        # Hidden layers
        for i in range(self.layer_n - 2):
            z = np.dot(self.h[i], self.weights[i]) + self.biases[i]
            act = self.activate(z)
            self.h.append(act)
        # Output layer- softmax
        z_out = np.dot(self.h[-1], self.weights[-1]) + self.biases[-1]
        out = self.softmax(z_out)
        self.h.append(out)
        return self.h

    # Backward Pass
    def backward_prop(self, y_true):
        m = y_true.shape[0]
        dw = [None] * (self.layer_n - 1)
        db = [None] * (self.layer_n - 1)

        # Cross-entropy derivative for output layer
        if self.loss_func=="cross_entropy":
            delta = self.h[-1] - y_true  # shape: (batch_size, output_dim)
        elif self.loss_func=="sq_error":
            batch_size_sq=len(self.h[-1])
            classes_sq=len(self.h[-1][0])
            delta=np.zeros((batch_size_sq,classes_sq))

            for i in range(batch_size_sq):
                jacobian_softmax= np.diag(self.h[-1][i]) - np.outer(self.h[-1][i], self.h[-1][i])
                # print(jacobian_softmax.shape)
                # print(self.h[-1][i])    
                delta[i]= 2*np.dot(self.h[-1][i]-y_true[i], jacobian_softmax)
                
        else:
            delta = self.h[-1] - y_true 


        # Propagation
        for i in reversed(range(self.layer_n - 1)):
            dw[i] = np.dot(self.h[i].T, delta) / m
            db[i] = np.sum(delta, axis=0, keepdims=True) / m
            if i > 0:
                # For hidden layers, multiply by derivative of activation
                delta = np.dot(delta, self.weights[i].T) * self.derivative(self.h[i])
        return dw, db

    # Param Updates for "Non-Nesterov" 
    def _update_params(self, dw, db):
        # Add weight decay to each gradient
        for i in range(self.layer_n - 1):
            dw[i] += self.weight_decay * self.weights[i]

        if self.optimizer == "sgd":
            for i in range(self.layer_n - 1):
                self.weights[i] -= self.lr * dw[i]
                self.biases[i] -= self.lr * db[i]

        elif self.optimizer == "momentum":
            for i in range(self.layer_n - 1):
                self.v_w[i] = self.momentum * self.v_w[i] + dw[i]
                self.v_b[i] = self.momentum * self.v_b[i] + db[i]
                self.weights[i] -= self.lr * self.v_w[i]
                self.biases[i] -= self.lr * self.v_b[i]

        elif self.optimizer == "rmsprop":
            for i in range(self.layer_n - 1):
                self.v_w[i] = self.beta * self.v_w[i] + (1 - self.beta) * (dw[i] ** 2)
                self.v_b[i] = self.beta * self.v_b[i] + (1 - self.beta) * (db[i] ** 2)
                self.weights[i] -= self.lr * dw[i] / (np.sqrt(self.v_w[i]) + self.epsilon)
                self.biases[i]  -= self.lr * db[i] / (np.sqrt(self.v_b[i]) + self.epsilon)

        elif self.optimizer == "adam":
            self.t += 1
            for i in range(self.layer_n - 1):
                self.m_w[i] = self.beta1 * self.m_w[i] + (1 - self.beta1) * dw[i]
                self.m_b[i] = self.beta1 * self.m_b[i] + (1 - self.beta1) * db[i]
                self.v_w[i] = self.beta2 * self.v_w[i] + (1 - self.beta2) * (dw[i] ** 2)
                self.v_b[i] = self.beta2 * self.v_b[i] + (1 - self.beta2) * (db[i] ** 2)

                # bias correction
                m_w_hat = self.m_w[i] / (1 - self.beta1 ** self.t)
                m_b_hat = self.m_b[i] / (1 - self.beta1 ** self.t)
                v_w_hat = self.v_w[i] / (1 - self.beta2 ** self.t)
                v_b_hat = self.v_b[i] / (1 - self.beta2 ** self.t)

                self.weights[i] -= self.lr * m_w_hat / (np.sqrt(v_w_hat) + self.epsilon)
                self.biases[i]  -= self.lr * m_b_hat / (np.sqrt(v_b_hat) + self.epsilon)

        elif self.optimizer == "nadam":
            self.t += 1
            for i in range(self.layer_n - 1):
                self.m_w[i] = self.beta1 * self.m_w[i] + (1 - self.beta1) * dw[i]
                self.m_b[i] = self.beta1 * self.m_b[i] + (1 - self.beta1) * db[i]
                self.v_w[i] = self.beta2 * self.v_w[i] + (1 - self.beta2) * (dw[i] ** 2)
                self.v_b[i] = self.beta2 * self.v_b[i] + (1 - self.beta2) * (db[i] ** 2)

                # bias correction
                m_w_hat = self.m_w[i] / (1 - self.beta1 ** (self.t + 1))
                m_b_hat = self.m_b[i] / (1 - self.beta1 ** (self.t + 1))
                v_w_hat = self.v_w[i] / (1 - self.beta2 ** (self.t + 1))
                v_b_hat = self.v_b[i] / (1 - self.beta2 ** (self.t + 1))

                grad_term_w = self.beta1 * m_w_hat + (1 - self.beta1) * dw[i] / (1 - self.beta1 ** (self.t + 1))
                grad_term_b = self.beta1 * m_b_hat + (1 - self.beta1) * db[i] / (1 - self.beta1 ** (self.t + 1))

                self.weights[i] -= self.lr * grad_term_w / (np.sqrt(v_w_hat) + self.epsilon)
                self.biases[i]  -= self.lr * grad_term_b / (np.sqrt(v_b_hat) + self.epsilon)

    # Training Step  with "Nesterov"
    def _train_step(self, x_batch, y_batch):
        if self.optimizer == "nesterov":
            # to look-ahead: w_look = w - momentum * v
            for i in range(self.layer_n - 1):
                self.weights[i] -= self.lr*self.momentum * self.v_w[i]
                self.biases[i]  -= self.lr*self.momentum * self.v_b[i]

            # Forward at the look-ahead position
            self.forward_pass(x_batch)
            out = self.h[-1]
            l2_norm_weights = 0
            for i in range(len(self.weights)):
                l2_norm_weights += np.sum(self.weights[i] ** 2)
            # for i in range(len(self.biases)):
            #     l2_norm_bias += np.sum(self.biases[i] ** 2)
                    
            l2_norm_params = l2_norm_weights #+ l2_norm_bias

            if self.loss_func=="cross_entropy":
                loss = -np.mean(np.sum(y_batch * np.log(out + 1e-10), axis = 1)) +  (self.weight_decay/2) * l2_norm_params # (1e-10) to prevent underflow
            elif self.loss_func=="sq_error" :
                loss= 0.5 * np.mean(np.sum((out - y_batch)**2, axis=1))
            else:
                loss = -np.mean(np.sum(y_batch * np.log(out + 1e-10), axis = 1)) +  (self.weight_decay/2) * l2_norm_params
            
            
            #loss = -np.mean(np.sum(y_batch * np.log(out + 1e-10), axis = 1)) +  (self.weight_decay/2) * l2_norm_params # (1e-10) to prevent underflow
            #loss = -np.mean(np.sum(y_batch * np.log(out + 1e-10), axis=1))
            dW, dB = self.backward_prop(y_batch)

            # add weight decay here
            for i in range(self.layer_n - 1):
                dW[i] += self.weight_decay * self.weights[i]

            # backward at the look-ahead position (go back to w_t)
            for i in range(self.layer_n - 1):
                self.weights[i] += self.lr*self.momentum * self.v_w[i]
                self.biases[i]  += self.lr*self.momentum * self.v_b[i]

            # update velocity: u_t = momentum*u_{t-1} + dW
            for i in range(self.layer_n - 1):
                self.v_w[i] = self.momentum * self.v_w[i] + dW[i]
                self.v_b[i] = self.momentum * self.v_b[i] + dB[i]

            # final param update: w = w - lr*u_t
            for i in range(self.layer_n - 1):
                self.weights[i] -= self.lr * self.v_w[i]
                self.biases[i]  -= self.lr * self.v_b[i]

            return loss
        else:
            # Normal forward/back
            self.forward_pass(x_batch)
            out = self.h[-1]

            l2_norm_weights=0
            
            for i in range(len(self.weights)):
                l2_norm_weights += np.sum(self.weights[i] ** 2)
            # for i in range(len(self.biases)):
            #     l2_norm_bias += np.sum(self.biases[i] ** 2)
                    
            l2_norm_params = l2_norm_weights #+ l2_norm_bias

            if self.loss_func=="cross_entropy":
                loss = -np.mean(np.sum(y_batch * np.log(out + 1e-10), axis = 1)) +  (self.weight_decay/2) * l2_norm_params # (1e-10) to prevent underflow
            elif self.loss_func=="sq_error" :
                loss= 0.5 * np.mean(np.sum((out - y_batch)**2, axis=1))
            else:
                loss = -np.mean(np.sum(y_batch * np.log(out + 1e-10), axis = 1)) +  (self.weight_decay/2) * l2_norm_params    
            
            
             

            #loss = -np.mean(np.sum(y_batch * np.log(out + 1e-10), axis=1))
            dW, dB = self.backward_prop(y_batch)
            self._update_params(dW, dB)
            return loss

    # Outer Training Loop 
    def training(self, x_train, y_train, x_val, y_val, epochs, batch_size):
       
        for ep in range(epochs):
            idx = np.random.permutation(x_train.shape[0])
            x_train_shuff = x_train[idx]
            y_train_shuff = y_train[idx]
            n_batches = len(x_train) // batch_size
            epoch_loss = 0.0
            for b in range(n_batches):
                start = b * batch_size
                end = start + batch_size
                x_batch = x_train_shuff[start:end]
                y_batch = y_train_shuff[start:end]
                loss = self._train_step(x_batch, y_batch)
                epoch_loss += loss
            avg_loss = epoch_loss / n_batches

            # Validation

            preds = self.predict(x_val)
            val_labels = np.argmax(y_val, axis=1)
            val_acc = np.mean(preds == val_labels)

            val_outputs = self.forward_pass(x_val)[-1]
        
            # Cross-entropy loss for validation
            #val_loss = -np.mean(np.sum(y_val * np.log(val_outputs + 1e-10), axis=1))
            # training
            preds_train = self.predict(x_train)
            train_labels = np.argmax(y_train, axis=1)
            train_acc = np.mean(preds_train == train_labels)

            l2_norm_weights=0
            
            for i in range(len(self.weights)):
                l2_norm_weights += np.sum(self.weights[i] ** 2)
            # for i in range(len(self.biases)):
            #     l2_norm_bias += np.sum(self.biases[i] ** 2)
                    
            l2_norm_params = l2_norm_weights

            if self.loss_func=="cross_entropy":
                val_loss = -np.mean(np.sum(y_val * np.log(val_outputs + 1e-10), axis = 1)) +  (self.weight_decay/2) * l2_norm_params # (1e-10) to prevent underflow
            elif self.loss_func=="sq_error" :
                val_loss= 0.5 * np.mean(np.sum((val_outputs - y_val)**2, axis=1))
            else:
                val_loss = -np.mean(np.sum(y_val * np.log(val_outputs + 1e-10), axis = 1)) +  (self.weight_decay/2) * l2_norm_params

            # Log metrics to wandb
            wandb.log({"epoch": ep+1, "training_loss": avg_loss, "validation_accuracy": val_acc, "training accuracy": train_acc, "validation loss": val_loss})
            print(f"Epoch {ep+1}/{epochs} - loss={avg_loss:.4f}, val_acc={val_acc:.4f}, val_loss={val_loss}" )

    #Prediction 
    def predict(self, X):
        self.forward_pass(X)
        return np.argmax(self.h[-1], axis=1)




# train_sweep() function

def train_sweep():
    # Initialize wandb
    wandb.init()
    config = wandb.config

    #custom run name from hyperparameters

    run_name = f"hl_{config.num_layers}_hs_{config.hidden_size}_bs_{config.batch_size}_ac_{config.activation}_opt_{config.optimizer}"
    wandb.run.name = run_name

    if config["dataset"] == "fashion_mnist":
        (x_train_full, y_train_full), (x_test, y_test) = fashion_mnist.load_data()
    else:  # "mnist"
        (x_train_full, y_train_full), (x_test, y_test) = mnist.load_data()

    x_train_full = x_train_full.reshape(x_train_full.shape[0], -1) / 255.0
    x_test = x_test.reshape(x_test.shape[0], -1) / 255.0

    np.random.seed(42)
    idx = np.arange(x_train_full.shape[0])
    np.random.shuffle(idx)
    x_train_full = x_train_full[idx]
    y_train_full = y_train_full[idx]

    # 90% train, 10% validation
    train_size = int(0.9 * len(x_train_full))
    x_train, y_train = x_train_full[:train_size], y_train_full[:train_size]
    x_val, y_val = x_train_full[train_size:], y_train_full[train_size:]

    num_classes = 10
    y_train_1h = np.eye(num_classes)[y_train]
    y_val_1h = np.eye(num_classes)[y_val]
    y_test_1h = np.eye(num_classes)[y_test]

    # Build model
    # layers = [784] + [hidden_size] * num_layers + [10]
    model = feed_forward_NN_8b(
        layers=[784] + [config["hidden_size"]] * config["num_layers"] + [10],
        optimizer=config["optimizer"],
        learning_rate=config["learning_rate"],
        momentum=config["momentum"],
        beta1=config["beta1"],
        beta2=config["beta2"],
        beta=config["beta"],
        epsilon=config["epsilon"],
        weight_decay=config["weight_decay"],
        weight_init=config["weight_init"],
        activation=config["activation"],
        loss=config["loss"],
    )

    # Train
    model.training(
        x_train=x_train,
        y_train=y_train_1h,
        x_val=x_val,
        y_val=y_val_1h,
        epochs=config["epochs"],
        batch_size=config["batch_size"],
    )

    #Evaluation on test set
    test_preds = model.predict(x_test)
    test_labels = np.argmax(y_test_1h, axis=1)
    test_acc = np.mean(test_preds == test_labels)
    
    wandb.log({"test_accuracy": test_acc})
    print("test accuracy ",test_acc)


# sweep configuration
sweep_config = {
    "method": "random", 
    "metric": {
        "name": "validation_accuracy",
        "goal": "maximize"
    },
    "parameters": {
        "epochs": {"values": [5,10]},
        "num_layers": {"values": [3,4,5]},
        "hidden_size": {"values": [32,16,64]},
        "weight_decay": {"values": [0.0, 0.0005, 0.5]},
        "learning_rate": {"values": [1e-3, 1e-4]},
        "optimizer": {"values": ["sgd", "momentum", "nag", "rmsprop", "adam", "nadam"]},
        "batch_size": {"values": [32,64,128]},
        "init_type": {"values": ["random","xavier"]},
        "activation": {"values": [ "tanh", "relu","sigmoid"]},
        "momentum": {"values": [0.9]},
        "beta1": {"values": [0.9]},
        "beta2": {"values": [0.999]},
        "beta_rms": {"values": [0.9]},
        "epsilon": {"values": [1e-8]},
        "loss_func":{"values":["cross_entropy"]},
        "dataset":{"values":["fashion_mnist"]}
    }
}


# Running the sweep



if __name__ == "__main__":
    # Creating sweep
    sweep_id = wandb.sweep(sweep_config, project="q4_sweep_project")
    # Launching sweep agent
    wandb.agent(sweep_id, function=train_sweep)





Create sweep with ID: jl0s4l1o
Sweep URL: https://wandb.ai/ed24s401-indian-institute-of-technology-madras/q4_sweep_project/sweeps/jl0s4l1o


[34m[1mwandb[0m: Network error (SSLError), entering retry loop.
[34m[1mwandb[0m: Agent Starting Run: z4mu7ngb with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	dataset: fashion_mnist
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0.5


Run z4mu7ngb errored:
Traceback (most recent call last):
  File "c:\Users\acer\AppData\Local\Programs\Python\Python311\Lib\site-packages\wandb\agents\pyagent.py", line 306, in _run_job
    self._function()
  File "C:\Users\acer\AppData\Local\Temp\ipykernel_24364\153520030.py", line 393, in train_sweep
    beta=config["beta"],
         ~~~~~~^^^^^^^^
  File "c:\Users\acer\AppData\Local\Programs\Python\Python311\Lib\site-packages\wandb\sdk\wandb_config.py", line 130, in __getitem__
    return self._items[key]
           ~~~~~~~~~~~^^^^^
KeyError: 'beta'

[34m[1mwandb[0m: [32m[41mERROR[0m Run z4mu7ngb errored:
[34m[1mwandb[0m: [32m[41mERROR[0m Traceback (most recent call last):
[34m[1mwandb[0m: [32m[41mERROR[0m   File "c:\Users\acer\AppData\Local\Programs\Python\Python311\Lib\site-packages\wandb\agents\pyagent.py", line 306, in _run_job
[34m[1mwandb[0m: [32m[41mERROR[0m     self._function()
[34m[1mwandb[0m: [32m[41mERROR[0m   File "C:\Users\acer\AppData\Local

Run vgq4ktau errored:
Traceback (most recent call last):
  File "c:\Users\acer\AppData\Local\Programs\Python\Python311\Lib\site-packages\wandb\agents\pyagent.py", line 306, in _run_job
    self._function()
  File "C:\Users\acer\AppData\Local\Temp\ipykernel_24364\153520030.py", line 393, in train_sweep
    beta=config["beta"],
         ~~~~~~^^^^^^^^
  File "c:\Users\acer\AppData\Local\Programs\Python\Python311\Lib\site-packages\wandb\sdk\wandb_config.py", line 130, in __getitem__
    return self._items[key]
           ~~~~~~~~~~~^^^^^
KeyError: 'beta'

[34m[1mwandb[0m: [32m[41mERROR[0m Run vgq4ktau errored:
[34m[1mwandb[0m: [32m[41mERROR[0m Traceback (most recent call last):
[34m[1mwandb[0m: [32m[41mERROR[0m   File "c:\Users\acer\AppData\Local\Programs\Python\Python311\Lib\site-packages\wandb\agents\pyagent.py", line 306, in _run_job
[34m[1mwandb[0m: [32m[41mERROR[0m     self._function()
[34m[1mwandb[0m: [32m[41mERROR[0m   File "C:\Users\acer\AppData\Local

Run uyv586k2 errored:
Traceback (most recent call last):
  File "c:\Users\acer\AppData\Local\Programs\Python\Python311\Lib\site-packages\wandb\agents\pyagent.py", line 306, in _run_job
    self._function()
  File "C:\Users\acer\AppData\Local\Temp\ipykernel_24364\153520030.py", line 393, in train_sweep
    beta=config["beta"],
         ~~~~~~^^^^^^^^
  File "c:\Users\acer\AppData\Local\Programs\Python\Python311\Lib\site-packages\wandb\sdk\wandb_config.py", line 130, in __getitem__
    return self._items[key]
           ~~~~~~~~~~~^^^^^
KeyError: 'beta'

[34m[1mwandb[0m: [32m[41mERROR[0m Run uyv586k2 errored:
[34m[1mwandb[0m: [32m[41mERROR[0m Traceback (most recent call last):
[34m[1mwandb[0m: [32m[41mERROR[0m   File "c:\Users\acer\AppData\Local\Programs\Python\Python311\Lib\site-packages\wandb\agents\pyagent.py", line 306, in _run_job
[34m[1mwandb[0m: [32m[41mERROR[0m     self._function()
[34m[1mwandb[0m: [32m[41mERROR[0m   File "C:\Users\acer\AppData\Local

Run 0pekahxd errored:
Traceback (most recent call last):
  File "c:\Users\acer\AppData\Local\Programs\Python\Python311\Lib\site-packages\wandb\agents\pyagent.py", line 306, in _run_job
    self._function()
  File "C:\Users\acer\AppData\Local\Temp\ipykernel_24364\153520030.py", line 393, in train_sweep
    beta=config["beta"],
         ~~~~~~^^^^^^^^
  File "c:\Users\acer\AppData\Local\Programs\Python\Python311\Lib\site-packages\wandb\sdk\wandb_config.py", line 130, in __getitem__
    return self._items[key]
           ~~~~~~~~~~~^^^^^
KeyError: 'beta'

[34m[1mwandb[0m: [32m[41mERROR[0m Run 0pekahxd errored:
[34m[1mwandb[0m: [32m[41mERROR[0m Traceback (most recent call last):
[34m[1mwandb[0m: [32m[41mERROR[0m   File "c:\Users\acer\AppData\Local\Programs\Python\Python311\Lib\site-packages\wandb\agents\pyagent.py", line 306, in _run_job
[34m[1mwandb[0m: [32m[41mERROR[0m     self._function()
[34m[1mwandb[0m: [32m[41mERROR[0m   File "C:\Users\acer\AppData\Local

Run eqjwuii7 errored:
Traceback (most recent call last):
  File "c:\Users\acer\AppData\Local\Programs\Python\Python311\Lib\site-packages\wandb\agents\pyagent.py", line 306, in _run_job
    self._function()
  File "C:\Users\acer\AppData\Local\Temp\ipykernel_24364\153520030.py", line 393, in train_sweep
    beta=config["beta"],
         ~~~~~~^^^^^^^^
  File "c:\Users\acer\AppData\Local\Programs\Python\Python311\Lib\site-packages\wandb\sdk\wandb_config.py", line 130, in __getitem__
    return self._items[key]
           ~~~~~~~~~~~^^^^^
KeyError: 'beta'

[34m[1mwandb[0m: [32m[41mERROR[0m Run eqjwuii7 errored:
[34m[1mwandb[0m: [32m[41mERROR[0m Traceback (most recent call last):
[34m[1mwandb[0m: [32m[41mERROR[0m   File "c:\Users\acer\AppData\Local\Programs\Python\Python311\Lib\site-packages\wandb\agents\pyagent.py", line 306, in _run_job
[34m[1mwandb[0m: [32m[41mERROR[0m     self._function()
[34m[1mwandb[0m: [32m[41mERROR[0m   File "C:\Users\acer\AppData\Local

Run wfx4nk6n errored:
Traceback (most recent call last):
  File "c:\Users\acer\AppData\Local\Programs\Python\Python311\Lib\site-packages\wandb\agents\pyagent.py", line 306, in _run_job
    self._function()
  File "C:\Users\acer\AppData\Local\Temp\ipykernel_24364\153520030.py", line 393, in train_sweep
    beta=config["beta"],
         ~~~~~~^^^^^^^^
  File "c:\Users\acer\AppData\Local\Programs\Python\Python311\Lib\site-packages\wandb\sdk\wandb_config.py", line 130, in __getitem__
    return self._items[key]
           ~~~~~~~~~~~^^^^^
KeyError: 'beta'

[34m[1mwandb[0m: [32m[41mERROR[0m Run wfx4nk6n errored:
[34m[1mwandb[0m: [32m[41mERROR[0m Traceback (most recent call last):
[34m[1mwandb[0m: [32m[41mERROR[0m   File "c:\Users\acer\AppData\Local\Programs\Python\Python311\Lib\site-packages\wandb\agents\pyagent.py", line 306, in _run_job
[34m[1mwandb[0m: [32m[41mERROR[0m     self._function()
[34m[1mwandb[0m: [32m[41mERROR[0m   File "C:\Users\acer\AppData\Local