In [None]:
import numpy as np
import wandb
from keras.datasets import fashion_mnist


# Neural Network Class: feed_forward_NN_4

class feed_forward_NN_4:
    def __init__(self,
                 layers,
                 optimizer,
                 learning_rate,
                 momentum,
                 beta1,
                 beta2,
                 beta_rms,
                 epsilon,
                 weight_decay,
                 init_type,
                 activation
                 ):
    
        
        self.layers = layers
        self.layer_n = len(layers)
        self.optimizer = optimizer.lower()
        self.lr = learning_rate
        self.momentum = momentum
        self.beta1 = beta1
        self.beta2 = beta2
        self.beta_rms = beta_rms
        self.epsilon = epsilon
        self.weight_decay = weight_decay
        self.init_type = init_type.lower()
        self.activation = activation.lower()
        

        # Initialize Weights & BiaseS
        self.weights = []
        self.biases = []
        for i in range(self.layer_n - 1):
            if self.init_type == "xavier":
                # "Xavier" initialization
                w = np.random.randn(layers[i], layers[i+1]) * np.sqrt(1.0 / layers[i])
            else:
                # "random" initialization
                w = np.random.randn(layers[i], layers[i+1]) * np.sqrt(2.0 / layers[i])
            b = np.zeros((1, layers[i+1]))
            self.weights.append(w)
            self.biases.append(b)

        # initialize extra Params 
        if self.optimizer in ["momentum", "nesterov", "rmsprop", "adam", "nadam"]:
            self.v_w = [np.zeros_like(w) for w in self.weights]
            self.v_b = [np.zeros_like(b) for b in self.biases]
        if self.optimizer in ["adam", "nadam"]:
            self.m_w = [np.zeros_like(w) for w in self.weights]
            self.m_b = [np.zeros_like(b) for b in self.biases]
            self.t = 0

    # activations 
    def sigmoid(self, x):
        return 1 / (1 + np.exp(-np.clip(x, -500, 500)))
    
    def tanh(self, x):
        return np.tanh(x)
    
    def relu(self, x):
        return np.maximum(0, x)

    def activate(self, x):
        if self.activation == "sigmoid":
            return self.sigmoid(x)
        elif self.activation == "tanh":
            return self.tanh(x)
        elif self.activation == "relu":
            return self.relu(x)
        else:
            return self.sigmoid(x) 
        
    # derivatives
    def derivative(self, a):

        if self.activation == "sigmoid":
            return a * (1 - a)
        elif self.activation == "tanh":
            return 1 - a**2
        elif self.activation == "relu":
            return (a > 0).astype(float)
        else:
            return a * (1 - a) 

    def softmax(self, x):
        exp_x = np.exp(x - np.max(x, axis=1, keepdims=True))
        return exp_x / np.sum(exp_x, axis=1, keepdims=True)

    # Forward Pass
    def forward_pass(self, x):
        self.h = [x]  
        # Hidden layers
        for i in range(self.layer_n - 2):
            z = np.dot(self.h[i], self.weights[i]) + self.biases[i]
            act = self.activate(z)
            self.h.append(act)
        # Output layer- softmax
        z_out = np.dot(self.h[-1], self.weights[-1]) + self.biases[-1]
        out = self.softmax(z_out)
        self.h.append(out)
        return self.h

    # Backward Pass
    def backward_prop(self, y_true):
        m = y_true.shape[0]
        dw = [None] * (self.layer_n - 1)
        db = [None] * (self.layer_n - 1)

        # Cross-entropy derivative for output layer
        delta = self.h[-1] - y_true  # shape: (batch_size, output_dim)

        # Propagation
        for i in reversed(range(self.layer_n - 1)):
            dw[i] = np.dot(self.h[i].T, delta) / m
            db[i] = np.sum(delta, axis=0, keepdims=True) / m
            if i > 0:
                # For hidden layers, multiply by derivative of activation
                delta = np.dot(delta, self.weights[i].T) * self.derivative(self.h[i])
        return dw, db

    # Param Updates for "Non-Nesterov" 
    def _update_params(self, dw, db):
        # Add weight decay to each gradient
        for i in range(self.layer_n - 1):
            dw[i] += self.weight_decay * self.weights[i]

        if self.optimizer == "sgd":
            for i in range(self.layer_n - 1):
                self.weights[i] -= self.lr * dw[i]
                self.biases[i] -= self.lr * db[i]

        elif self.optimizer == "momentum":
            for i in range(self.layer_n - 1):
                self.v_w[i] = self.momentum * self.v_w[i] + dw[i]
                self.v_b[i] = self.momentum * self.v_b[i] + db[i]
                self.weights[i] -= self.lr * self.v_w[i]
                self.biases[i] -= self.lr * self.v_b[i]

        elif self.optimizer == "rmsprop":
            for i in range(self.layer_n - 1):
                self.v_w[i] = self.beta_rms * self.v_w[i] + (1 - self.beta_rms) * (dw[i] ** 2)
                self.v_b[i] = self.beta_rms * self.v_b[i] + (1 - self.beta_rms) * (db[i] ** 2)
                self.weights[i] -= self.lr * dw[i] / (np.sqrt(self.v_w[i]) + self.epsilon)
                self.biases[i]  -= self.lr * db[i] / (np.sqrt(self.v_b[i]) + self.epsilon)

        elif self.optimizer == "adam":
            self.t += 1
            for i in range(self.layer_n - 1):
                self.m_w[i] = self.beta1 * self.m_w[i] + (1 - self.beta1) * dw[i]
                self.m_b[i] = self.beta1 * self.m_b[i] + (1 - self.beta1) * db[i]
                self.v_w[i] = self.beta2 * self.v_w[i] + (1 - self.beta2) * (dw[i] ** 2)
                self.v_b[i] = self.beta2 * self.v_b[i] + (1 - self.beta2) * (db[i] ** 2)

                # bias correction
                m_w_hat = self.m_w[i] / (1 - self.beta1 ** self.t)
                m_b_hat = self.m_b[i] / (1 - self.beta1 ** self.t)
                v_w_hat = self.v_w[i] / (1 - self.beta2 ** self.t)
                v_b_hat = self.v_b[i] / (1 - self.beta2 ** self.t)

                self.weights[i] -= self.lr * m_w_hat / (np.sqrt(v_w_hat) + self.epsilon)
                self.biases[i]  -= self.lr * m_b_hat / (np.sqrt(v_b_hat) + self.epsilon)

        elif self.optimizer == "nadam":
            self.t += 1
            for i in range(self.layer_n - 1):
                self.m_w[i] = self.beta1 * self.m_w[i] + (1 - self.beta1) * dw[i]
                self.m_b[i] = self.beta1 * self.m_b[i] + (1 - self.beta1) * db[i]
                self.v_w[i] = self.beta2 * self.v_w[i] + (1 - self.beta2) * (dw[i] ** 2)
                self.v_b[i] = self.beta2 * self.v_b[i] + (1 - self.beta2) * (db[i] ** 2)

                # bias correction
                m_w_hat = self.m_w[i] / (1 - self.beta1 ** (self.t + 1))
                m_b_hat = self.m_b[i] / (1 - self.beta1 ** (self.t + 1))
                v_w_hat = self.v_w[i] / (1 - self.beta2 ** (self.t + 1))
                v_b_hat = self.v_b[i] / (1 - self.beta2 ** (self.t + 1))

                grad_term_w = self.beta1 * m_w_hat + (1 - self.beta1) * dw[i] / (1 - self.beta1 ** (self.t + 1))
                grad_term_b = self.beta1 * m_b_hat + (1 - self.beta1) * db[i] / (1 - self.beta1 ** (self.t + 1))

                self.weights[i] -= self.lr * grad_term_w / (np.sqrt(v_w_hat) + self.epsilon)
                self.biases[i]  -= self.lr * grad_term_b / (np.sqrt(v_b_hat) + self.epsilon)

    # Training Step  with "Nesterov"
    def _train_step(self, x_batch, y_batch):
        if self.optimizer == "nesterov":
            # to look-ahead: w_look = w - momentum * v
            for i in range(self.layer_n - 1):
                self.weights[i] -= self.lr*self.momentum * self.v_w[i]
                self.biases[i]  -= self.lr*self.momentum * self.v_b[i]

            # Forward at the look-ahead position
            self.forward_pass(x_batch)
            out = self.h[-1]
            l2_norm_weights = 0
            for i in range(len(self.weights)):
                l2_norm_weights += np.sum(self.weights[i] ** 2)
            # for i in range(len(self.biases)):
            #     l2_norm_bias += np.sum(self.biases[i] ** 2)
                    
            l2_norm_params = l2_norm_weights #+ l2_norm_bias
            
            loss = -np.mean(np.sum(y_batch * np.log(out + 1e-10), axis = 1)) +  (self.weight_decay/2) * l2_norm_params # (1e-10) to prevent underflow
            #loss = -np.mean(np.sum(y_batch * np.log(out + 1e-10), axis=1))
            dW, dB = self.backward_prop(y_batch)

            # add weight decay here
            for i in range(self.layer_n - 1):
                dW[i] += self.weight_decay * self.weights[i]

            # backward at the look-ahead position (go back to w_t)
            for i in range(self.layer_n - 1):
                self.weights[i] += self.lr*self.momentum * self.v_w[i]
                self.biases[i]  += self.lr*self.momentum * self.v_b[i]

            # update velocity: u_t = momentum*u_{t-1} + dW
            for i in range(self.layer_n - 1):
                self.v_w[i] = self.momentum * self.v_w[i] + dW[i]
                self.v_b[i] = self.momentum * self.v_b[i] + dB[i]

            # final param update: w = w - lr*u_t
            for i in range(self.layer_n - 1):
                self.weights[i] -= self.lr * self.v_w[i]
                self.biases[i]  -= self.lr * self.v_b[i]

            return loss
        else:
            # Normal forward/back
            self.forward_pass(x_batch)
            out = self.h[-1]

            l2_norm_weights=0
            l2_norm_bias= 0
            for i in range(len(self.weights)):
                l2_norm_weights += np.sum(self.weights[i] ** 2)
            # for i in range(len(self.biases)):
            #     l2_norm_bias += np.sum(self.biases[i] ** 2)
                    
            l2_norm_params = l2_norm_weights #+ l2_norm_bias
            
            loss = -np.mean(np.sum(y_batch * np.log(out + 1e-10), axis = 1)) +  (self.weight_decay/2) * l2_norm_params 

            #loss = -np.mean(np.sum(y_batch * np.log(out + 1e-10), axis=1))
            dW, dB = self.backward_prop(y_batch)
            self._update_params(dW, dB)
            return loss

    # Outer Training Loop 
    def training(self, x_train, y_train, x_val, y_val, epochs, batch_size):
       
        for ep in range(epochs):
            idx = np.random.permutation(x_train.shape[0])
            x_train_shuff = x_train[idx]
            y_train_shuff = y_train[idx]
            n_batches = len(x_train) // batch_size
            epoch_loss = 0.0
            for b in range(n_batches):
                start = b * batch_size
                end = start + batch_size
                x_batch = x_train_shuff[start:end]
                y_batch = y_train_shuff[start:end]
                loss = self._train_step(x_batch, y_batch)
                epoch_loss += loss
            avg_loss = epoch_loss / n_batches

            # Validation

            preds = self.predict(x_val)
            val_labels = np.argmax(y_val, axis=1)
            val_acc = np.mean(preds == val_labels)

            val_outputs = self.forward_pass(x_val)[-1]
        
            # Cross-entropy loss for validation
            val_loss = -np.mean(np.sum(y_val * np.log(val_outputs + 1e-10), axis=1))

            # Log metrics to wandb
            wandb.log({"epoch": ep+1, "training_loss": avg_loss, "validation_accuracy": val_acc, "validation loss": val_loss})
            print(f"Epoch {ep+1}/{epochs} - loss={avg_loss:.4f}, val_acc={val_acc:.4f}, val_loss={val_loss}" )

    #Prediction 
    def predict(self, X):
        self.forward_pass(X)
        return np.argmax(self.h[-1], axis=1)




# (x_train_full, y_train_full), (x_test, y_test) = fashion_mnist.load_data()
# x_train_full = x_train_full.reshape(x_train_full.shape[0], -1) / 255.0
# x_test = x_test.reshape(x_test.shape[0], -1) / 255.0

# np.random.seed(42)
# idx = np.arange(x_train_full.shape[0])
# np.random.shuffle(idx)
# x_train_full = x_train_full[idx]
# y_train_full = y_train_full[idx]

# # 90% training, 10% validation 
# train_size=int(.9*len(x_train_full))

# x_train, y_train=x_train_full[:train_size],y_train_full[:train_size]
# x_val, y_val=x_train_full[train_size:], y_train_full[train_size:]

# num_classes = 10
# y_train_1h = np.eye(num_classes)[y_train]
# y_val_1h = np.eye(num_classes)[y_val]
# y_test_1h = np.eye(num_classes)[y_test]

# # model
# model = feed_forward_NN_4(
#     layers=[784] + [32] *3 + [10],
# optimizer="nesterov",
# learning_rate=0.01,
# momentum=0.9,
# beta1=0.9,
# beta2=0.999,
# beta_rms=0.9,
# epsilon=1e-4,
# weight_decay=0.0005,
# init_type="xavier",
# activation="relu")

#     # Train the model
# model.training(
#         x_train=x_train,
#         y_train=y_train_1h,
#         x_val=x_val,
#         y_val=y_val_1h,
#         epochs=10,
#         batch_size=32
#     )

#     #Evaluation on test set
# test_preds = model.predict(x_test)
# test_labels = np.argmax(y_test_1h, axis=1)
# test_acc = np.mean(test_preds == test_labels)
# print("test accuracy ",test_acc)
# #wandb.log({"test_accuracy": test_acc})




# train_sweep() function

def train_sweep():
    # Initialize wandb
    wandb.init()
    config = wandb.config

    #custom run name from hyperparameters
    run_name = f"hl_{config.num_hidden_layers}_hs_{config.hidden_size}_bs_{config.batch_size}_ac_{config.activation}_opt_{config.optimizer}"
    wandb.run.name = run_name

    # Load Fashion-MNIST
    (x_train_full, y_train_full), (x_test, y_test) = fashion_mnist.load_data()
    x_train_full = x_train_full.reshape(x_train_full.shape[0], -1) / 255.0
    x_test = x_test.reshape(x_test.shape[0], -1) / 255.0

    np.random.seed(42)
    idx = np.arange(x_train_full.shape[0])
    np.random.shuffle(idx)
    x_train_full = x_train_full[idx]
    y_train_full = y_train_full[idx]

    # 90% training, 10% validation 
    train_size=int(.9*len(x_train_full))

    x_train, y_train=x_train_full[:train_size],y_train_full[:train_size]
    x_val, y_val=x_train_full[train_size:], y_train_full[train_size:]

    num_classes = 10
    y_train_1h = np.eye(num_classes)[y_train]
    y_val_1h = np.eye(num_classes)[y_val]
    y_test_1h = np.eye(num_classes)[y_test]

    # model
    model = feed_forward_NN_4(
        layers=[784] + [config.hidden_size] * config.num_hidden_layers + [10],
        optimizer=config.optimizer,
        learning_rate=config.learning_rate,
        momentum=config.momentum,
        beta1=config.beta1,
        beta2=config.beta2,
        beta_rms=config.beta_rms,
        epsilon=config.epsilon,
        weight_decay=config.weight_decay,
        init_type=config.init_type,
        activation=config.activation
    )

    # Train the model
    model.training(
        x_train=x_train,
        y_train=y_train_1h,
        x_val=x_val,
        y_val=y_val_1h,
        epochs=config.epochs,
        batch_size=config.batch_size
    )

    #Evaluation on test set
    test_preds = model.predict(x_test)
    test_labels = np.argmax(y_test_1h, axis=1)
    test_acc = np.mean(test_preds == test_labels)
    
    wandb.log({"test_accuracy": test_acc})
    print("test accuracy ",test_acc)


# sweep configuration
sweep_config = {
    "method": "random", 
    "metric": {
        "name": "validation_accuracy",
        "goal": "maximize"
    },
    "parameters": {
        "epochs": {"values": [5,10]},
        "num_hidden_layers": {"values": [3, 4, 5]},
        "hidden_size": {"values": [32,64,128]},
        "weight_decay": {"values": [0.0, 5e-4, 0.5]},
        "learning_rate": {"values": [1e-3, 1e-4]},
        "optimizer": {"values": ["sgd", "momentum", "nesterov", "rmsprop", "adam", "nadam"]},
        "batch_size": {"values": [16, 32, 64]},
        "init_type": {"values": ["random", "xavier"]},
        "activation": {"values": ["sigmoid", "tanh", "relu"]},
        "momentum": {"values": [0.9]},
        "beta1": {"values": [0.9]},
        "beta2": {"values": [0.999,0.99]},
        "beta_rms": {"values": [0.9]},
        "epsilon": {"values": [1e-8]},
        "loss_func":{"values":["cross_entropy"]}
    }
}

# Running the sweep

if __name__ == "__main__":
    # Creating sweep
    sweep_id = wandb.sweep(sweep_config, project="q4_sweep_project")
    # Launching sweep agent
    wandb.agent(sweep_id, function=train_sweep)


[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


Create sweep with ID: hp8fw2uv
Sweep URL: https://wandb.ai/ed24s401-indian-institute-of-technology-madras/q4_sweep_project/sweeps/hp8fw2uv


[34m[1mwandb[0m: Agent Starting Run: mlk210ay with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: Currently logged in as: [33med24s401[0m ([33med24s401-indian-institute-of-technology-madras[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Epoch 1/5 - loss=40.8990, val_acc=0.1673, val_loss=2.2907739061868595
Epoch 2/5 - loss=6.6646, val_acc=0.1023, val_loss=2.302511084192579
Epoch 3/5 - loss=2.6305, val_acc=0.1023, val_loss=2.302519634067361
Epoch 4/5 - loss=2.3150, val_acc=0.1012, val_loss=2.302534656067891
Epoch 5/5 - loss=2.3029, val_acc=0.1012, val_loss=2.302546886691154
test accuracy  0.1


0,1
epoch,▁▃▅▆█
test_accuracy,▁
training_loss,█▂▁▁▁
validation loss,▁████
validation_accuracy,█▁▁▁▁

0,1
epoch,5.0
test_accuracy,0.1
training_loss,2.30294
validation loss,2.30255
validation_accuracy,0.10117


[34m[1mwandb[0m: Agent Starting Run: zo5r9aii with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 1/5 - loss=0.6101, val_acc=0.8373, val_loss=0.4390277663963681
Epoch 2/5 - loss=0.5187, val_acc=0.8545, val_loss=0.39593892756785254
Epoch 3/5 - loss=0.4910, val_acc=0.8418, val_loss=0.4371763767167325
Epoch 4/5 - loss=0.4770, val_acc=0.8488, val_loss=0.40828071301098734
Epoch 5/5 - loss=0.4679, val_acc=0.8687, val_loss=0.3627857876854914
test accuracy  0.8607


0,1
epoch,▁▃▅▆█
test_accuracy,▁
training_loss,█▃▂▁▁
validation loss,█▄█▅▁
validation_accuracy,▁▅▂▄█

0,1
epoch,5.0
test_accuracy,0.8607
training_loss,0.46789
validation loss,0.36279
validation_accuracy,0.86867


[34m[1mwandb[0m: Agent Starting Run: ija6sh2a with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.99
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/5 - loss=0.7588, val_acc=0.8158, val_loss=0.5184272025717886
Epoch 2/5 - loss=0.4650, val_acc=0.8398, val_loss=0.44410415451702656
Epoch 3/5 - loss=0.4151, val_acc=0.8452, val_loss=0.4202374809979013
Epoch 4/5 - loss=0.3870, val_acc=0.8592, val_loss=0.3883624201772921
Epoch 5/5 - loss=0.3686, val_acc=0.8598, val_loss=0.3782924943300987
test accuracy  0.8554


0,1
epoch,▁▃▅▆█
test_accuracy,▁
training_loss,█▃▂▁▁
validation loss,█▄▃▂▁
validation_accuracy,▁▅▆██

0,1
epoch,5.0
test_accuracy,0.8554
training_loss,0.36864
validation loss,0.37829
validation_accuracy,0.85983


[34m[1mwandb[0m: Agent Starting Run: bb0hvefn with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.99
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 4
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.5


Epoch 1/10 - loss=3.0499, val_acc=0.1000, val_loss=2.3027245423011324
Epoch 2/10 - loss=2.3046, val_acc=0.0995, val_loss=2.302657744278535
Epoch 3/10 - loss=2.3046, val_acc=0.1013, val_loss=2.3026158621076434
Epoch 4/10 - loss=2.3046, val_acc=0.0995, val_loss=2.3029372600833877
Epoch 5/10 - loss=2.3046, val_acc=0.1000, val_loss=2.302757839927576
Epoch 6/10 - loss=2.3047, val_acc=0.0995, val_loss=2.302648088478558
Epoch 7/10 - loss=2.3046, val_acc=0.1012, val_loss=2.30274709989466
Epoch 8/10 - loss=2.3046, val_acc=0.0977, val_loss=2.302744582622311
Epoch 9/10 - loss=2.3046, val_acc=0.0977, val_loss=2.3028527201531817
Epoch 10/10 - loss=2.3046, val_acc=0.1013, val_loss=2.3025718950696463
test accuracy  0.1


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▁▁▁▁▁▁▁▁▁
validation loss,▄▃▂█▅▂▄▄▆▁
validation_accuracy,▅▅█▅▅▅█▁▁█

0,1
epoch,10.0
test_accuracy,0.1
training_loss,2.30463
validation loss,2.30257
validation_accuracy,0.10133


[34m[1mwandb[0m: Agent Starting Run: rctxp2hl with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 4
[34m[1mwandb[0m: 	optimizer: nesterov
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/5 - loss=2.3115, val_acc=0.0993, val_loss=2.303015199282377
Epoch 2/5 - loss=2.3030, val_acc=0.1018, val_loss=2.3026813279945904
Epoch 3/5 - loss=2.3029, val_acc=0.0995, val_loss=2.302968128227342
Epoch 4/5 - loss=2.3029, val_acc=0.0977, val_loss=2.30262918864855
Epoch 5/5 - loss=2.3027, val_acc=0.0977, val_loss=2.3027720963458433
test accuracy  0.1


0,1
epoch,▁▃▅▆█
test_accuracy,▁
training_loss,█▁▁▁▁
validation loss,█▂▇▁▄
validation_accuracy,▄█▄▁▁

0,1
epoch,5.0
test_accuracy,0.1
training_loss,2.30269
validation loss,2.30277
validation_accuracy,0.09767


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 3za4mz8a with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.99
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/10 - loss=0.5167, val_acc=0.8475, val_loss=0.4157992602449881
Epoch 2/10 - loss=0.3826, val_acc=0.8623, val_loss=0.3724421827394258
Epoch 3/10 - loss=0.3459, val_acc=0.8782, val_loss=0.3431994676871723
Epoch 4/10 - loss=0.3220, val_acc=0.8783, val_loss=0.33124194239118143
Epoch 5/10 - loss=0.3054, val_acc=0.8755, val_loss=0.3335823233260507
Epoch 6/10 - loss=0.2903, val_acc=0.8783, val_loss=0.326151366963755
Epoch 7/10 - loss=0.2799, val_acc=0.8837, val_loss=0.3199459992153886
Epoch 8/10 - loss=0.2703, val_acc=0.8803, val_loss=0.3241656205503574
Epoch 9/10 - loss=0.2599, val_acc=0.8880, val_loss=0.30563255950964774
Epoch 10/10 - loss=0.2546, val_acc=0.8827, val_loss=0.32142511341451585
test accuracy  0.8795


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▄▃▃▂▂▂▁▁▁
validation loss,█▅▃▃▃▂▂▂▁▂
validation_accuracy,▁▄▆▆▆▆▇▇█▇

0,1
epoch,10.0
test_accuracy,0.8795
training_loss,0.25455
validation loss,0.32143
validation_accuracy,0.88267


[34m[1mwandb[0m: Agent Starting Run: 1bjpmgm4 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.99
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: nesterov
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 1/10 - loss=2.4294, val_acc=0.1012, val_loss=2.3070099067544882
Epoch 2/10 - loss=2.3524, val_acc=0.2153, val_loss=2.2949199972316405
Epoch 3/10 - loss=2.3451, val_acc=0.2393, val_loss=2.289798684137088
Epoch 4/10 - loss=2.3400, val_acc=0.2873, val_loss=2.2848715584484522
Epoch 5/10 - loss=2.3349, val_acc=0.3177, val_loss=2.27964418416063
Epoch 6/10 - loss=2.3296, val_acc=0.3298, val_loss=2.2740921762453583
Epoch 7/10 - loss=2.3238, val_acc=0.3200, val_loss=2.268079310237216
Epoch 8/10 - loss=2.3175, val_acc=0.3377, val_loss=2.2613472085350392
Epoch 9/10 - loss=2.3105, val_acc=0.3270, val_loss=2.253804328705537
Epoch 10/10 - loss=2.3025, val_acc=0.3237, val_loss=2.2450918236727397
test accuracy  0.3184


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▄▃▃▃▂▂▂▁▁
validation loss,█▇▆▅▅▄▄▃▂▁
validation_accuracy,▁▄▅▇▇█▇███

0,1
epoch,10.0
test_accuracy,0.3184
training_loss,2.30249
validation loss,2.24509
validation_accuracy,0.32367


[34m[1mwandb[0m: Agent Starting Run: a244evgq with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.99
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0.5


Epoch 1/10 - loss=38.2100, val_acc=0.2527, val_loss=2.1529700224466186
Epoch 2/10 - loss=17.7579, val_acc=0.3050, val_loss=2.2196834643680896
Epoch 3/10 - loss=9.0057, val_acc=0.3358, val_loss=2.270609800008005
Epoch 4/10 - loss=5.2158, val_acc=0.1360, val_loss=2.293251642564492
Epoch 5/10 - loss=3.5659, val_acc=0.1023, val_loss=2.3005726386969183
Epoch 6/10 - loss=2.8483, val_acc=0.1023, val_loss=2.302328513479357
Epoch 7/10 - loss=2.5375, val_acc=0.1023, val_loss=2.302650588733575
Epoch 8/10 - loss=2.4035, val_acc=0.1023, val_loss=2.3026902196403487
Epoch 9/10 - loss=2.3459, val_acc=0.1023, val_loss=2.3026800799254716
Epoch 10/10 - loss=2.3212, val_acc=0.0977, val_loss=2.3026656438330813
test accuracy  0.1


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▄▂▂▁▁▁▁▁▁
validation loss,▁▄▇███████
validation_accuracy,▆▇█▂▁▁▁▁▁▁

0,1
epoch,10.0
test_accuracy,0.1
training_loss,2.32123
validation loss,2.30267
validation_accuracy,0.09767


[34m[1mwandb[0m: Agent Starting Run: gp485gz7 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.99
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 1/5 - loss=0.8307, val_acc=0.8495, val_loss=0.41277935955795053
Epoch 2/5 - loss=0.6824, val_acc=0.8645, val_loss=0.3757961226085417
Epoch 3/5 - loss=0.6375, val_acc=0.8607, val_loss=0.3725148016237982
Epoch 4/5 - loss=0.6055, val_acc=0.8757, val_loss=0.3394944504837161
Epoch 5/5 - loss=0.5796, val_acc=0.8763, val_loss=0.32667168781466793
test accuracy  0.8709


0,1
epoch,▁▃▅▆█
test_accuracy,▁
training_loss,█▄▃▂▁
validation loss,█▅▅▂▁
validation_accuracy,▁▅▄██

0,1
epoch,5.0
test_accuracy,0.8709
training_loss,0.57962
validation loss,0.32667
validation_accuracy,0.87633


[34m[1mwandb[0m: Agent Starting Run: 4tcaqm1z with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/5 - loss=1.3679, val_acc=0.7063, val_loss=0.8329415952754935
Epoch 2/5 - loss=0.7221, val_acc=0.7687, val_loss=0.6608629474838044
Epoch 3/5 - loss=0.6029, val_acc=0.7983, val_loss=0.5786175086804313
Epoch 4/5 - loss=0.5460, val_acc=0.8128, val_loss=0.5379657745643167
Epoch 5/5 - loss=0.5127, val_acc=0.8207, val_loss=0.5109834376221335
test accuracy  0.8185


0,1
epoch,▁▃▅▆█
test_accuracy,▁
training_loss,█▃▂▁▁
validation loss,█▄▂▂▁
validation_accuracy,▁▅▇██

0,1
epoch,5.0
test_accuracy,0.8185
training_loss,0.51271
validation loss,0.51098
validation_accuracy,0.82067


[34m[1mwandb[0m: Agent Starting Run: yvtb999e with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.99
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 4
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	weight_decay: 0.5


Epoch 1/5 - loss=251.8533, val_acc=0.1023, val_loss=2.3938381049310484
Epoch 2/5 - loss=231.6148, val_acc=0.1023, val_loss=2.3486768101089224
Epoch 3/5 - loss=213.0452, val_acc=0.0972, val_loss=2.32615257681437
Epoch 4/5 - loss=195.9921, val_acc=0.0953, val_loss=2.3147223430369794
Epoch 5/5 - loss=180.3253, val_acc=0.0977, val_loss=2.308852765237357
test accuracy  0.1


0,1
epoch,▁▃▅▆█
test_accuracy,▁
training_loss,█▆▄▃▁
validation loss,█▄▂▁▁
validation_accuracy,██▃▁▃

0,1
epoch,5.0
test_accuracy,0.1
training_loss,180.32527
validation loss,2.30885
validation_accuracy,0.09767


[34m[1mwandb[0m: Agent Starting Run: hlbte7bc with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.99
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0.5


Epoch 1/5 - loss=180.8732, val_acc=0.1023, val_loss=2.304111544043523
Epoch 2/5 - loss=48.8240, val_acc=0.1023, val_loss=2.303070952149344
Epoch 3/5 - loss=12.4699, val_acc=0.1018, val_loss=2.3030267502451522
Epoch 4/5 - loss=3.9583, val_acc=0.0995, val_loss=2.302777034733915
Epoch 5/5 - loss=2.4948, val_acc=0.1023, val_loss=2.3029602736661166
test accuracy  0.1


0,1
epoch,▁▃▅▆█
test_accuracy,▁
training_loss,█▃▁▁▁
validation loss,█▃▂▁▂
validation_accuracy,██▇▁█

0,1
epoch,5.0
test_accuracy,0.1
training_loss,2.49483
validation loss,2.30296
validation_accuracy,0.10233


[34m[1mwandb[0m: Agent Starting Run: 3x8nhhq6 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 4
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/10 - loss=0.6152, val_acc=0.8390, val_loss=0.4499839346746184
Epoch 2/10 - loss=0.4193, val_acc=0.8580, val_loss=0.39119279730481915
Epoch 3/10 - loss=0.3775, val_acc=0.8652, val_loss=0.3800032651587875
Epoch 4/10 - loss=0.3514, val_acc=0.8635, val_loss=0.376188775243125
Epoch 5/10 - loss=0.3308, val_acc=0.8747, val_loss=0.3425675286821864
Epoch 6/10 - loss=0.3154, val_acc=0.8767, val_loss=0.3326182716398669
Epoch 7/10 - loss=0.3006, val_acc=0.8782, val_loss=0.3219800155367676
Epoch 8/10 - loss=0.2876, val_acc=0.8815, val_loss=0.3194324392290063
Epoch 9/10 - loss=0.2786, val_acc=0.8783, val_loss=0.3263217315349846
Epoch 10/10 - loss=0.2669, val_acc=0.8897, val_loss=0.2989923942204972
test accuracy  0.8831


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▄▃▃▂▂▂▁▁▁
validation loss,█▅▅▅▃▃▂▂▂▁
validation_accuracy,▁▄▅▄▆▆▆▇▆█

0,1
epoch,10.0
test_accuracy,0.8831
training_loss,0.2669
validation loss,0.29899
validation_accuracy,0.88967


[34m[1mwandb[0m: Agent Starting Run: 8ey81hmb with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 1/5 - loss=1.9928, val_acc=0.6273, val_loss=1.4259939003865498
Epoch 2/5 - loss=1.3861, val_acc=0.6603, val_loss=1.0840363631127985
Epoch 3/5 - loss=1.1483, val_acc=0.6915, val_loss=0.9198249040450425
Epoch 4/5 - loss=1.0200, val_acc=0.7233, val_loss=0.8207315511756907
Epoch 5/5 - loss=0.9377, val_acc=0.7377, val_loss=0.7522250816356751
test accuracy  0.7352


0,1
epoch,▁▃▅▆█
test_accuracy,▁
training_loss,█▄▂▂▁
validation loss,█▄▃▂▁
validation_accuracy,▁▃▅▇█

0,1
epoch,5.0
test_accuracy,0.7352
training_loss,0.93768
validation loss,0.75223
validation_accuracy,0.73767


[34m[1mwandb[0m: Agent Starting Run: cpxmamc8 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 1/5 - loss=2.4307, val_acc=0.1705, val_loss=2.298290245268061
Epoch 2/5 - loss=2.3957, val_acc=0.2630, val_loss=2.292100398869779
Epoch 3/5 - loss=2.3919, val_acc=0.2612, val_loss=2.2892188969531557
Epoch 4/5 - loss=2.3889, val_acc=0.3587, val_loss=2.2861675791027025
Epoch 5/5 - loss=2.3858, val_acc=0.3885, val_loss=2.2830662579796006
test accuracy  0.3795


0,1
epoch,▁▃▅▆█
test_accuracy,▁
training_loss,█▃▂▁▁
validation loss,█▅▄▂▁
validation_accuracy,▁▄▄▇█

0,1
epoch,5.0
test_accuracy,0.3795
training_loss,2.3858
validation loss,2.28307
validation_accuracy,0.3885


[34m[1mwandb[0m: Agent Starting Run: em5ifhe9 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.99
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	weight_decay: 0.5


Epoch 1/5 - loss=182.4454, val_acc=0.0977, val_loss=2.3738836219136847
Epoch 2/5 - loss=154.3880, val_acc=0.0977, val_loss=2.3233749222177487
Epoch 3/5 - loss=130.7537, val_acc=0.0977, val_loss=2.308703639578245
Epoch 4/5 - loss=110.8062, val_acc=0.0977, val_loss=2.304416810720981
Epoch 5/5 - loss=93.9603, val_acc=0.0977, val_loss=2.303096475750441
test accuracy  0.1


0,1
epoch,▁▃▅▆█
test_accuracy,▁
training_loss,█▆▄▂▁
validation loss,█▃▂▁▁
validation_accuracy,▁▁▁▁▁

0,1
epoch,5.0
test_accuracy,0.1
training_loss,93.96026
validation loss,2.3031
validation_accuracy,0.09767


[34m[1mwandb[0m: Agent Starting Run: dtqe0tzb with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.99
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 4
[34m[1mwandb[0m: 	optimizer: nesterov
[34m[1mwandb[0m: 	weight_decay: 0.5


Epoch 1/10 - loss=128.6928, val_acc=0.4420, val_loss=2.2151189375752893
Epoch 2/10 - loss=25.6893, val_acc=0.1012, val_loss=2.3001445861100263
Epoch 3/10 - loss=6.6138, val_acc=0.1012, val_loss=2.3026731434812504
Epoch 4/10 - loss=3.0951, val_acc=0.1012, val_loss=2.3026643854000675
Epoch 5/10 - loss=2.4483, val_acc=0.1012, val_loss=2.3026412936363205
Epoch 6/10 - loss=2.3294, val_acc=0.1012, val_loss=2.302626679796253
Epoch 7/10 - loss=2.3076, val_acc=0.1012, val_loss=2.3026175928454595
Epoch 8/10 - loss=2.3035, val_acc=0.1012, val_loss=2.3026100601872797
Epoch 9/10 - loss=2.3028, val_acc=0.1012, val_loss=2.302606772972771
Epoch 10/10 - loss=2.3026, val_acc=0.1012, val_loss=2.302606618142486
test accuracy  0.1


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▂▁▁▁▁▁▁▁▁
validation loss,▁█████████
validation_accuracy,█▁▁▁▁▁▁▁▁▁

0,1
epoch,10.0
test_accuracy,0.1
training_loss,2.30265
validation loss,2.30261
validation_accuracy,0.10117


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: g7f41slr with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.5


Epoch 1/10 - loss=45.3385, val_acc=0.0977, val_loss=2.3065861077214893
Epoch 2/10 - loss=13.2579, val_acc=0.0985, val_loss=2.3030876323980247
Epoch 3/10 - loss=4.6072, val_acc=0.1012, val_loss=2.3027356068787497
Epoch 4/10 - loss=2.6406, val_acc=0.0995, val_loss=2.3028134202696977
Epoch 5/10 - loss=2.3274, val_acc=0.0995, val_loss=2.3027409489126622
Epoch 6/10 - loss=2.3044, val_acc=0.1000, val_loss=2.302717264944213
Epoch 7/10 - loss=2.3033, val_acc=0.1012, val_loss=2.3027971206067117
Epoch 8/10 - loss=2.3032, val_acc=0.1000, val_loss=2.302723224421464
Epoch 9/10 - loss=2.3033, val_acc=0.1012, val_loss=2.3026905624401897
Epoch 10/10 - loss=2.3032, val_acc=0.1012, val_loss=2.302597006468284
test accuracy  0.1


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▃▁▁▁▁▁▁▁▁
validation loss,█▂▁▁▁▁▁▁▁▁
validation_accuracy,▁▃█▅▅▆█▆██

0,1
epoch,10.0
test_accuracy,0.1
training_loss,2.30317
validation loss,2.3026
validation_accuracy,0.10117


[34m[1mwandb[0m: Agent Starting Run: b87vjdoi with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.99
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 1/5 - loss=0.9986, val_acc=0.7373, val_loss=0.7070792639155095
Epoch 2/5 - loss=0.6427, val_acc=0.8027, val_loss=0.5699640093095261
Epoch 3/5 - loss=0.5414, val_acc=0.8285, val_loss=0.4965142704079345
Epoch 4/5 - loss=0.4911, val_acc=0.8408, val_loss=0.45182286148104567
Epoch 5/5 - loss=0.4631, val_acc=0.8510, val_loss=0.4297944786402634
test accuracy  0.8426


0,1
epoch,▁▃▅▆█
test_accuracy,▁
training_loss,█▃▂▁▁
validation loss,█▅▃▂▁
validation_accuracy,▁▅▇▇█

0,1
epoch,5.0
test_accuracy,0.8426
training_loss,0.46308
validation loss,0.42979
validation_accuracy,0.851


[34m[1mwandb[0m: Agent Starting Run: slc8rcbu with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 1/10 - loss=1.7876, val_acc=0.6128, val_loss=1.2224024267055946
Epoch 2/10 - loss=1.0528, val_acc=0.6810, val_loss=0.8561838379181044
Epoch 3/10 - loss=0.8704, val_acc=0.7128, val_loss=0.7643719064714508
Epoch 4/10 - loss=0.7922, val_acc=0.7433, val_loss=0.6951883972663191
Epoch 5/10 - loss=0.7399, val_acc=0.7640, val_loss=0.6575126325764058
Epoch 6/10 - loss=0.7000, val_acc=0.7860, val_loss=0.6151650424617933
Epoch 7/10 - loss=0.6662, val_acc=0.7980, val_loss=0.590694356190673
Epoch 8/10 - loss=0.6380, val_acc=0.8095, val_loss=0.5625736558759356
Epoch 9/10 - loss=0.6156, val_acc=0.8118, val_loss=0.5451017928268878
Epoch 10/10 - loss=0.5980, val_acc=0.8158, val_loss=0.5330872100364479
test accuracy  0.8154


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▄▃▂▂▂▁▁▁▁
validation loss,█▄▃▃▂▂▂▁▁▁
validation_accuracy,▁▃▄▆▆▇▇███

0,1
epoch,10.0
test_accuracy,0.8154
training_loss,0.59804
validation loss,0.53309
validation_accuracy,0.81583


[34m[1mwandb[0m: Agent Starting Run: 837nkmc5 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 4
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 1/5 - loss=0.6748, val_acc=0.8267, val_loss=0.4648631724119012
Epoch 2/5 - loss=0.5208, val_acc=0.8425, val_loss=0.42602637067653315
Epoch 3/5 - loss=0.4756, val_acc=0.8575, val_loss=0.389321786758338
Epoch 4/5 - loss=0.4515, val_acc=0.8633, val_loss=0.37249648773847743
Epoch 5/5 - loss=0.4344, val_acc=0.8738, val_loss=0.34761762462200835
test accuracy  0.8659


0,1
epoch,▁▃▅▆█
test_accuracy,▁
training_loss,█▄▂▁▁
validation loss,█▆▃▂▁
validation_accuracy,▁▃▆▆█

0,1
epoch,5.0
test_accuracy,0.8659
training_loss,0.4344
validation loss,0.34762
validation_accuracy,0.87383


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 0gufytxs with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 1/10 - loss=2.6492, val_acc=0.0977, val_loss=2.432119962393201
Epoch 2/10 - loss=2.5563, val_acc=0.0977, val_loss=2.368756931070126
Epoch 3/10 - loss=2.5121, val_acc=0.0977, val_loss=2.3373209445315153
Epoch 4/10 - loss=2.4898, val_acc=0.0977, val_loss=2.321020518756396
Epoch 5/10 - loss=2.4783, val_acc=0.0977, val_loss=2.312388280806739
Epoch 6/10 - loss=2.4723, val_acc=0.0977, val_loss=2.307799523430362
Epoch 7/10 - loss=2.4691, val_acc=0.0407, val_loss=2.3053554469900672
Epoch 8/10 - loss=2.4675, val_acc=0.0345, val_loss=2.304037163268205
Epoch 9/10 - loss=2.4666, val_acc=0.0688, val_loss=2.303338365016215
Epoch 10/10 - loss=2.4661, val_acc=0.0588, val_loss=2.302954823924299
test accuracy  0.0585


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▄▃▂▁▁▁▁▁▁
validation loss,█▅▃▂▂▁▁▁▁▁
validation_accuracy,██████▂▁▅▄

0,1
epoch,10.0
test_accuracy,0.0585
training_loss,2.46614
validation loss,2.30295
validation_accuracy,0.05883


[34m[1mwandb[0m: Agent Starting Run: o9zsvxei with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/5 - loss=2.5086, val_acc=0.1012, val_loss=2.4251334420616044
Epoch 2/5 - loss=2.3901, val_acc=0.1012, val_loss=2.3592268294229277
Epoch 3/5 - loss=2.3449, val_acc=0.1012, val_loss=2.329927796928456
Epoch 4/5 - loss=2.3232, val_acc=0.1012, val_loss=2.315097219974698
Epoch 5/5 - loss=2.3118, val_acc=0.1012, val_loss=2.307034089897792
test accuracy  0.1


0,1
epoch,▁▃▅▆█
test_accuracy,▁
training_loss,█▄▂▁▁
validation loss,█▄▂▁▁
validation_accuracy,▁▁▁▁▁

0,1
epoch,5.0
test_accuracy,0.1
training_loss,2.31178
validation loss,2.30703
validation_accuracy,0.10117


[34m[1mwandb[0m: Agent Starting Run: 4zekiuit with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 1/5 - loss=1.4181, val_acc=0.6915, val_loss=0.8590074537338245
Epoch 2/5 - loss=0.7626, val_acc=0.7570, val_loss=0.6684897753636453
Epoch 3/5 - loss=0.6274, val_acc=0.8045, val_loss=0.5678447011237641
Epoch 4/5 - loss=0.5564, val_acc=0.8242, val_loss=0.5115263481723994
Epoch 5/5 - loss=0.5200, val_acc=0.8345, val_loss=0.4808425787510861
test accuracy  0.8271


0,1
epoch,▁▃▅▆█
test_accuracy,▁
training_loss,█▃▂▁▁
validation loss,█▄▃▂▁
validation_accuracy,▁▄▇▇█

0,1
epoch,5.0
test_accuracy,0.8271
training_loss,0.52005
validation loss,0.48084
validation_accuracy,0.8345


[34m[1mwandb[0m: Agent Starting Run: pipfql07 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/5 - loss=0.7233, val_acc=0.8168, val_loss=0.5014266118980183
Epoch 2/5 - loss=0.4524, val_acc=0.8400, val_loss=0.4359909145218588
Epoch 3/5 - loss=0.4111, val_acc=0.8502, val_loss=0.41528412174927976
Epoch 4/5 - loss=0.3880, val_acc=0.8598, val_loss=0.38780417820472113
Epoch 5/5 - loss=0.3719, val_acc=0.8617, val_loss=0.37843844288094625
test accuracy  0.853


0,1
epoch,▁▃▅▆█
test_accuracy,▁
training_loss,█▃▂▁▁
validation loss,█▄▃▂▁
validation_accuracy,▁▅▆██

0,1
epoch,5.0
test_accuracy,0.853
training_loss,0.37193
validation loss,0.37844
validation_accuracy,0.86167


[34m[1mwandb[0m: Agent Starting Run: 2w1lx3k2 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/5 - loss=0.8577, val_acc=0.7943, val_loss=0.6037159593694097
Epoch 2/5 - loss=0.5371, val_acc=0.8148, val_loss=0.5146431912790519
Epoch 3/5 - loss=0.4781, val_acc=0.8258, val_loss=0.4730320611388005
Epoch 4/5 - loss=0.4470, val_acc=0.8383, val_loss=0.4490382080383644
Epoch 5/5 - loss=0.4261, val_acc=0.8415, val_loss=0.4306662833050165
test accuracy  0.8365


0,1
epoch,▁▃▅▆█
test_accuracy,▁
training_loss,█▃▂▁▁
validation loss,█▄▃▂▁
validation_accuracy,▁▄▆██

0,1
epoch,5.0
test_accuracy,0.8365
training_loss,0.42612
validation loss,0.43067
validation_accuracy,0.8415


[34m[1mwandb[0m: Agent Starting Run: g21o5zx4 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/10 - loss=1.6325, val_acc=0.4988, val_loss=1.1732639053204024
Epoch 2/10 - loss=1.0281, val_acc=0.5692, val_loss=0.951172685564414
Epoch 3/10 - loss=0.8020, val_acc=0.7907, val_loss=0.6566691892747056
Epoch 4/10 - loss=0.5576, val_acc=0.8107, val_loss=0.5333284791912336
Epoch 5/10 - loss=0.4880, val_acc=0.8137, val_loss=0.49581319853397166
Epoch 6/10 - loss=0.4569, val_acc=0.8215, val_loss=0.48200255090168237
Epoch 7/10 - loss=0.4371, val_acc=0.8328, val_loss=0.46170992074386724
Epoch 8/10 - loss=0.4174, val_acc=0.8345, val_loss=0.46009808428638616
Epoch 9/10 - loss=0.4036, val_acc=0.8447, val_loss=0.4432953657635385
Epoch 10/10 - loss=0.3891, val_acc=0.8532, val_loss=0.4272639634206245
test accuracy  0.8484


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▅▃▂▂▁▁▁▁▁
validation loss,█▆▃▂▂▂▁▁▁▁
validation_accuracy,▁▂▇▇▇▇████

0,1
epoch,10.0
test_accuracy,0.8484
training_loss,0.38911
validation loss,0.42726
validation_accuracy,0.85317


[34m[1mwandb[0m: Agent Starting Run: 6pwyz51s with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.99
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: nesterov
[34m[1mwandb[0m: 	weight_decay: 0.5


Epoch 1/5 - loss=31.4995, val_acc=0.2490, val_loss=2.283846149124263
Epoch 2/5 - loss=3.3277, val_acc=0.1013, val_loss=2.3026272062033484
Epoch 3/5 - loss=2.3376, val_acc=0.1013, val_loss=2.3026238529658563
Epoch 4/5 - loss=2.3038, val_acc=0.1013, val_loss=2.30260655681544
Epoch 5/5 - loss=2.3027, val_acc=0.1013, val_loss=2.302601588110152
test accuracy  0.1


0,1
epoch,▁▃▅▆█
test_accuracy,▁
training_loss,█▁▁▁▁
validation loss,▁████
validation_accuracy,█▁▁▁▁

0,1
epoch,5.0
test_accuracy,0.1
training_loss,2.30268
validation loss,2.3026
validation_accuracy,0.10133


[34m[1mwandb[0m: Agent Starting Run: 2o40wwav with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 4
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0.5


Epoch 1/10 - loss=6.5903, val_acc=0.1013, val_loss=2.3026180571561143
Epoch 2/10 - loss=2.3027, val_acc=0.0995, val_loss=2.302600279265666
Epoch 3/10 - loss=2.3027, val_acc=0.1013, val_loss=2.302587833010286
Epoch 4/10 - loss=2.3027, val_acc=0.0995, val_loss=2.302667258322679
Epoch 5/10 - loss=2.3027, val_acc=0.1000, val_loss=2.3026166035653604
Epoch 6/10 - loss=2.3027, val_acc=0.1000, val_loss=2.302613467101041
Epoch 7/10 - loss=2.3027, val_acc=0.1000, val_loss=2.302624770816555
Epoch 8/10 - loss=2.3027, val_acc=0.0977, val_loss=2.302637847559093
Epoch 9/10 - loss=2.3027, val_acc=0.0977, val_loss=2.302664959590406
Epoch 10/10 - loss=2.3027, val_acc=0.0977, val_loss=2.3026100929296076
test accuracy  0.1


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▁▁▁▁▁▁▁▁▁
validation loss,▄▂▁█▄▃▄▅█▃
validation_accuracy,█▅█▅▅▅▅▁▁▁

0,1
epoch,10.0
test_accuracy,0.1
training_loss,2.3027
validation loss,2.30261
validation_accuracy,0.09767


[34m[1mwandb[0m: Agent Starting Run: gxag9tee with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 4
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 1/5 - loss=2.3718, val_acc=0.2472, val_loss=2.1126082669451454
Epoch 2/5 - loss=2.1306, val_acc=0.5062, val_loss=1.8954953077121335
Epoch 3/5 - loss=1.9260, val_acc=0.5740, val_loss=1.7052431154042722
Epoch 4/5 - loss=1.7509, val_acc=0.6050, val_loss=1.5474980932075226
Epoch 5/5 - loss=1.6097, val_acc=0.6237, val_loss=1.4232544996389636
test accuracy  0.6268


0,1
epoch,▁▃▅▆█
test_accuracy,▁
training_loss,█▆▄▂▁
validation loss,█▆▄▂▁
validation_accuracy,▁▆▇██

0,1
epoch,5.0
test_accuracy,0.6268
training_loss,1.60975
validation loss,1.42325
validation_accuracy,0.62367


[34m[1mwandb[0m: Agent Starting Run: 8hrfwzaw with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/5 - loss=1.9770, val_acc=0.4797, val_loss=1.6256454172261383
Epoch 2/5 - loss=1.4731, val_acc=0.6105, val_loss=1.349566867409626
Epoch 3/5 - loss=1.2730, val_acc=0.6693, val_loss=1.201461354869569
Epoch 4/5 - loss=1.1504, val_acc=0.6935, val_loss=1.101152136982692
Epoch 5/5 - loss=1.0633, val_acc=0.7055, val_loss=1.0265535869361146
test accuracy  0.6967


0,1
epoch,▁▃▅▆█
test_accuracy,▁
training_loss,█▄▃▂▁
validation loss,█▅▃▂▁
validation_accuracy,▁▅▇██

0,1
epoch,5.0
test_accuracy,0.6967
training_loss,1.06327
validation loss,1.02655
validation_accuracy,0.7055


[34m[1mwandb[0m: Agent Starting Run: 0vn6afpf with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0.5


Epoch 1/5 - loss=47.0910, val_acc=0.0985, val_loss=2.306493175054373
Epoch 2/5 - loss=16.4519, val_acc=0.0985, val_loss=2.3030446403667817
Epoch 3/5 - loss=6.9265, val_acc=0.0985, val_loss=2.3029006743543694
Epoch 4/5 - loss=3.6702, val_acc=0.0985, val_loss=2.3027939048499437
Epoch 5/5 - loss=2.6496, val_acc=0.1000, val_loss=2.3029043515909082
test accuracy  0.1


0,1
epoch,▁▃▅▆█
test_accuracy,▁
training_loss,█▃▂▁▁
validation loss,█▁▁▁▁
validation_accuracy,▁▁▁▁█

0,1
epoch,5.0
test_accuracy,0.1
training_loss,2.64962
validation loss,2.3029
validation_accuracy,0.1


[34m[1mwandb[0m: Agent Starting Run: j8q164cg with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 1/5 - loss=2.2288, val_acc=0.3778, val_loss=1.9911405872533832
Epoch 2/5 - loss=1.9310, val_acc=0.4217, val_loss=1.7200659380691274
Epoch 3/5 - loss=1.7082, val_acc=0.4922, val_loss=1.5416678876327108
Epoch 4/5 - loss=1.5590, val_acc=0.5377, val_loss=1.41727430328914
Epoch 5/5 - loss=1.4501, val_acc=0.5803, val_loss=1.3227709327220518
test accuracy  0.5703


0,1
epoch,▁▃▅▆█
test_accuracy,▁
training_loss,█▅▃▂▁
validation loss,█▅▃▂▁
validation_accuracy,▁▃▅▇█

0,1
epoch,5.0
test_accuracy,0.5703
training_loss,1.45006
validation loss,1.32277
validation_accuracy,0.58033


[34m[1mwandb[0m: Agent Starting Run: cb9uzzf1 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.99
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 4
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0.5


Epoch 1/5 - loss=16.0337, val_acc=0.1012, val_loss=2.3067605159784392
Epoch 2/5 - loss=2.3055, val_acc=0.1013, val_loss=2.3048949068363807
Epoch 3/5 - loss=2.3053, val_acc=0.0977, val_loss=2.30371499859875
Epoch 4/5 - loss=2.3048, val_acc=0.0977, val_loss=2.3032578189633814
Epoch 5/5 - loss=2.3042, val_acc=0.0995, val_loss=2.3028458741299893
test accuracy  0.1


0,1
epoch,▁▃▅▆█
test_accuracy,▁
training_loss,█▁▁▁▁
validation loss,█▅▃▂▁
validation_accuracy,██▁▁▅

0,1
epoch,5.0
test_accuracy,0.1
training_loss,2.30422
validation loss,2.30285
validation_accuracy,0.0995


[34m[1mwandb[0m: Agent Starting Run: euh6t159 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.99
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	weight_decay: 0.5


Epoch 1/5 - loss=59.4314, val_acc=0.2962, val_loss=2.2052981248473884
Epoch 2/5 - loss=27.0084, val_acc=0.2762, val_loss=2.2822717042011864
Epoch 3/5 - loss=12.9827, val_acc=0.1012, val_loss=2.300046952573626
Epoch 4/5 - loss=6.9065, val_acc=0.1012, val_loss=2.302417172137834
Epoch 5/5 - loss=4.2847, val_acc=0.1012, val_loss=2.3026240747756006
test accuracy  0.1


0,1
epoch,▁▃▅▆█
test_accuracy,▁
training_loss,█▄▂▁▁
validation loss,▁▇███
validation_accuracy,█▇▁▁▁

0,1
epoch,5.0
test_accuracy,0.1
training_loss,4.28471
validation loss,2.30262
validation_accuracy,0.10117


[34m[1mwandb[0m: Agent Starting Run: omn11cyj with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 4
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 1/5 - loss=2.2513, val_acc=0.5043, val_loss=1.8954128485342003
Epoch 2/5 - loss=1.8385, val_acc=0.6045, val_loss=1.5472957811097068
Epoch 3/5 - loss=1.5545, val_acc=0.6375, val_loss=1.3264133591426501
Epoch 4/5 - loss=1.3777, val_acc=0.6485, val_loss=1.1869786841418157
Epoch 5/5 - loss=1.2605, val_acc=0.6640, val_loss=1.0897138887539923
test accuracy  0.6593


0,1
epoch,▁▃▅▆█
test_accuracy,▁
training_loss,█▅▃▂▁
validation loss,█▅▃▂▁
validation_accuracy,▁▅▇▇█

0,1
epoch,5.0
test_accuracy,0.6593
training_loss,1.26052
validation loss,1.08971
validation_accuracy,0.664


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: tt9zcuwu with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.99
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 4
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0.5


Epoch 1/5 - loss=92.1266, val_acc=0.0985, val_loss=2.305672738397101
Epoch 2/5 - loss=40.7933, val_acc=0.1013, val_loss=2.3026842335213322
Epoch 3/5 - loss=18.8026, val_acc=0.0977, val_loss=2.3026787743991677
Epoch 4/5 - loss=9.3759, val_acc=0.0977, val_loss=2.302642357013071
Epoch 5/5 - loss=5.3349, val_acc=0.0977, val_loss=2.302610534519099
test accuracy  0.1


0,1
epoch,▁▃▅▆█
test_accuracy,▁
training_loss,█▄▂▁▁
validation loss,█▁▁▁▁
validation_accuracy,▃█▁▁▁

0,1
epoch,5.0
test_accuracy,0.1
training_loss,5.33485
validation loss,2.30261
validation_accuracy,0.09767


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: y3hto20u with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/5 - loss=1.3287, val_acc=0.6922, val_loss=0.831890537870595
Epoch 2/5 - loss=0.6812, val_acc=0.7862, val_loss=0.6089471762594449
Epoch 3/5 - loss=0.5252, val_acc=0.8238, val_loss=0.5064736413293646
Epoch 4/5 - loss=0.4577, val_acc=0.8380, val_loss=0.47458284213757407
Epoch 5/5 - loss=0.4167, val_acc=0.8488, val_loss=0.44309113084967444
test accuracy  0.8425


0,1
epoch,▁▃▅▆█
test_accuracy,▁
training_loss,█▃▂▁▁
validation loss,█▄▂▂▁
validation_accuracy,▁▅▇██

0,1
epoch,5.0
test_accuracy,0.8425
training_loss,0.41668
validation loss,0.44309
validation_accuracy,0.84883


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: ewhs7rlw with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.99
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/5 - loss=0.5343, val_acc=0.8285, val_loss=0.45583644017122843
Epoch 2/5 - loss=0.3950, val_acc=0.8407, val_loss=0.4147441947341265
Epoch 3/5 - loss=0.3634, val_acc=0.8543, val_loss=0.39878081464858844
Epoch 4/5 - loss=0.3440, val_acc=0.8718, val_loss=0.3557867088506788
Epoch 5/5 - loss=0.3265, val_acc=0.8687, val_loss=0.3546714890185924
test accuracy  0.8603


0,1
epoch,▁▃▅▆█
test_accuracy,▁
training_loss,█▃▂▂▁
validation loss,█▅▄▁▁
validation_accuracy,▁▃▅█▇

0,1
epoch,5.0
test_accuracy,0.8603
training_loss,0.32651
validation loss,0.35467
validation_accuracy,0.86867


[34m[1mwandb[0m: Agent Starting Run: yz3w93q3 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 4
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0.5


Epoch 1/10 - loss=178.9526, val_acc=0.6365, val_loss=1.5639065418264666
Epoch 2/10 - loss=78.2915, val_acc=0.6220, val_loss=1.8843333711381147
Epoch 3/10 - loss=35.2716, val_acc=0.4968, val_loss=2.1274713828394622
Epoch 4/10 - loss=16.6689, val_acc=0.3105, val_loss=2.252135288349998
Epoch 5/10 - loss=8.5516, val_acc=0.2068, val_loss=2.293247934839609
Epoch 6/10 - loss=5.0033, val_acc=0.1012, val_loss=2.3013004962661965
Epoch 7/10 - loss=3.4639, val_acc=0.1012, val_loss=2.3024416911514485
Epoch 8/10 - loss=2.8009, val_acc=0.1012, val_loss=2.3025788102775513
Epoch 9/10 - loss=2.5163, val_acc=0.1012, val_loss=2.3025932786234384
Epoch 10/10 - loss=2.3942, val_acc=0.1012, val_loss=2.3025937821667575
test accuracy  0.1


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▄▂▂▁▁▁▁▁▁
validation loss,▁▄▆███████
validation_accuracy,██▆▄▂▁▁▁▁▁

0,1
epoch,10.0
test_accuracy,0.1
training_loss,2.39421
validation loss,2.30259
validation_accuracy,0.10117


[34m[1mwandb[0m: Agent Starting Run: 6v7ecpkh with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/10 - loss=2.3768, val_acc=0.1012, val_loss=2.306987796204098
Epoch 2/10 - loss=2.2997, val_acc=0.2145, val_loss=2.2948619226716604
Epoch 3/10 - loss=2.2924, val_acc=0.2405, val_loss=2.2896617942971518
Epoch 4/10 - loss=2.2873, val_acc=0.2885, val_loss=2.2846203544890202
Epoch 5/10 - loss=2.2821, val_acc=0.3178, val_loss=2.27923895793898
Epoch 6/10 - loss=2.2767, val_acc=0.3295, val_loss=2.273486241587802
Epoch 7/10 - loss=2.2707, val_acc=0.3203, val_loss=2.2672161444837955
Epoch 8/10 - loss=2.2641, val_acc=0.3373, val_loss=2.260155269128498
Epoch 9/10 - loss=2.2566, val_acc=0.3280, val_loss=2.252197730417334
Epoch 10/10 - loss=2.2482, val_acc=0.3252, val_loss=2.24296403400351
test accuracy  0.3195


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▄▃▃▃▃▂▂▁▁
validation loss,█▇▆▆▅▄▄▃▂▁
validation_accuracy,▁▄▅▇▇█▇███

0,1
epoch,10.0
test_accuracy,0.3195
training_loss,2.24816
validation loss,2.24296
validation_accuracy,0.32517


[34m[1mwandb[0m: Agent Starting Run: 1ul7hc37 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.99
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/5 - loss=1.7665, val_acc=0.6318, val_loss=1.336378417111746
Epoch 2/5 - loss=1.0931, val_acc=0.6977, val_loss=0.9313019508653809
Epoch 3/5 - loss=0.8456, val_acc=0.7288, val_loss=0.7945521545060457
Epoch 4/5 - loss=0.7455, val_acc=0.7500, val_loss=0.7218791109343126
Epoch 5/5 - loss=0.6869, val_acc=0.7645, val_loss=0.6747043648236735
test accuracy  0.7655


0,1
epoch,▁▃▅▆█
test_accuracy,▁
training_loss,█▄▂▁▁
validation loss,█▄▂▁▁
validation_accuracy,▁▄▆▇█

0,1
epoch,5.0
test_accuracy,0.7655
training_loss,0.68692
validation loss,0.6747
validation_accuracy,0.7645


[34m[1mwandb[0m: Agent Starting Run: omdhsns3 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.99
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 4
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 1/5 - loss=0.8077, val_acc=0.8110, val_loss=0.5413536450032422
Epoch 2/5 - loss=0.5378, val_acc=0.8417, val_loss=0.4509334537312352
Epoch 3/5 - loss=0.4889, val_acc=0.8442, val_loss=0.4441127236756074
Epoch 4/5 - loss=0.4636, val_acc=0.8428, val_loss=0.4321760932756502
Epoch 5/5 - loss=0.4467, val_acc=0.8535, val_loss=0.40151655060508284
test accuracy  0.8472


0,1
epoch,▁▃▅▆█
test_accuracy,▁
training_loss,█▃▂▁▁
validation loss,█▃▃▃▁
validation_accuracy,▁▆▆▆█

0,1
epoch,5.0
test_accuracy,0.8472
training_loss,0.44674
validation loss,0.40152
validation_accuracy,0.8535


[34m[1mwandb[0m: Agent Starting Run: htbtycon with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 4
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 1/5 - loss=1.3567, val_acc=0.6945, val_loss=0.8251701735394275
Epoch 2/5 - loss=0.8386, val_acc=0.7732, val_loss=0.630065635048164
Epoch 3/5 - loss=0.7397, val_acc=0.8027, val_loss=0.5494110044115831
Epoch 4/5 - loss=0.6868, val_acc=0.8162, val_loss=0.5199589377858157
Epoch 5/5 - loss=0.6512, val_acc=0.8247, val_loss=0.49032905845994407
test accuracy  0.8246


0,1
epoch,▁▃▅▆█
test_accuracy,▁
training_loss,█▃▂▁▁
validation loss,█▄▂▂▁
validation_accuracy,▁▅▇██

0,1
epoch,5.0
test_accuracy,0.8246
training_loss,0.6512
validation loss,0.49033
validation_accuracy,0.82467


[34m[1mwandb[0m: Agent Starting Run: 9unmkfj0 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.99
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 1/10 - loss=0.7227, val_acc=0.8290, val_loss=0.45354602732363536
Epoch 2/10 - loss=0.5713, val_acc=0.8507, val_loss=0.40222099882764967
Epoch 3/10 - loss=0.5358, val_acc=0.8538, val_loss=0.3988533429042631
Epoch 4/10 - loss=0.5122, val_acc=0.8645, val_loss=0.3672086236594939
Epoch 5/10 - loss=0.4940, val_acc=0.8628, val_loss=0.36710327249491176
Epoch 6/10 - loss=0.4791, val_acc=0.8693, val_loss=0.35236298323966914
Epoch 7/10 - loss=0.4672, val_acc=0.8715, val_loss=0.34643250654808705
Epoch 8/10 - loss=0.4554, val_acc=0.8740, val_loss=0.344599798560596
Epoch 9/10 - loss=0.4453, val_acc=0.8660, val_loss=0.3512388237806435
Epoch 10/10 - loss=0.4370, val_acc=0.8713, val_loss=0.34149264209441826
test accuracy  0.8671


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▄▃▃▂▂▂▁▁▁
validation loss,█▅▅▃▃▂▁▁▂▁
validation_accuracy,▁▄▅▇▆▇██▇█

0,1
epoch,10.0
test_accuracy,0.8671
training_loss,0.43697
validation loss,0.34149
validation_accuracy,0.87133


[34m[1mwandb[0m: Agent Starting Run: r901kcch with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.99
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/10 - loss=2.2937, val_acc=0.1220, val_loss=2.2871056593020054
Epoch 2/10 - loss=2.2805, val_acc=0.1602, val_loss=2.275130705670482
Epoch 3/10 - loss=2.2687, val_acc=0.1767, val_loss=2.2636481932458117
Epoch 4/10 - loss=2.2571, val_acc=0.1847, val_loss=2.2519324059570107
Epoch 5/10 - loss=2.2450, val_acc=0.1878, val_loss=2.2396083425000723
Epoch 6/10 - loss=2.2322, val_acc=0.1983, val_loss=2.226283642696734
Epoch 7/10 - loss=2.2182, val_acc=0.2087, val_loss=2.211726596237313
Epoch 8/10 - loss=2.2030, val_acc=0.2222, val_loss=2.1958133164969804
Epoch 9/10 - loss=2.1863, val_acc=0.2385, val_loss=2.1782475198377216
Epoch 10/10 - loss=2.1677, val_acc=0.2555, val_loss=2.1588063213750472
test accuracy  0.2547


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▇▇▆▅▅▄▃▂▁
validation loss,█▇▇▆▅▅▄▃▂▁
validation_accuracy,▁▃▄▄▄▅▆▆▇█

0,1
epoch,10.0
test_accuracy,0.2547
training_loss,2.16775
validation loss,2.15881
validation_accuracy,0.2555


[34m[1mwandb[0m: Agent Starting Run: jf0iocy6 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 4
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0.5


Epoch 1/5 - loss=59.9885, val_acc=0.3845, val_loss=1.9838898693459137
Epoch 2/5 - loss=13.3509, val_acc=0.0977, val_loss=2.3029177449059652
Epoch 3/5 - loss=4.1166, val_acc=0.0977, val_loss=2.302981163154466
Epoch 4/5 - loss=2.4973, val_acc=0.0977, val_loss=2.302823054819569
Epoch 5/5 - loss=2.3137, val_acc=0.0977, val_loss=2.302697317462445
test accuracy  0.1


0,1
epoch,▁▃▅▆█
test_accuracy,▁
training_loss,█▂▁▁▁
validation loss,▁████
validation_accuracy,█▁▁▁▁

0,1
epoch,5.0
test_accuracy,0.1
training_loss,2.31374
validation loss,2.3027
validation_accuracy,0.09767


[34m[1mwandb[0m: Agent Starting Run: k5u42mrp with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 4
[34m[1mwandb[0m: 	optimizer: nesterov
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 1/5 - loss=2.4851, val_acc=0.0985, val_loss=2.3083430908750544
Epoch 2/5 - loss=2.4355, val_acc=0.1168, val_loss=2.301479925351961
Epoch 3/5 - loss=2.4329, val_acc=0.1045, val_loss=2.30114091516327
Epoch 4/5 - loss=2.4325, val_acc=0.1815, val_loss=2.300840588444003
Epoch 5/5 - loss=2.4321, val_acc=0.1498, val_loss=2.3005347905362923
test accuracy  0.1529


0,1
epoch,▁▃▅▆█
test_accuracy,▁
training_loss,█▁▁▁▁
validation loss,█▂▂▁▁
validation_accuracy,▁▃▂█▅

0,1
epoch,5.0
test_accuracy,0.1529
training_loss,2.43206
validation loss,2.30053
validation_accuracy,0.14983


[34m[1mwandb[0m: Agent Starting Run: ps028l61 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.99
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 4
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/10 - loss=1.7936, val_acc=0.6318, val_loss=1.1100986128912333
Epoch 2/10 - loss=0.8904, val_acc=0.7170, val_loss=0.7838823515600288
Epoch 3/10 - loss=0.7102, val_acc=0.7592, val_loss=0.6839038909499192
Epoch 4/10 - loss=0.6311, val_acc=0.7860, val_loss=0.620277329648298
Epoch 5/10 - loss=0.5804, val_acc=0.7980, val_loss=0.5738915935259986
Epoch 6/10 - loss=0.5452, val_acc=0.8098, val_loss=0.5461503442412167
Epoch 7/10 - loss=0.5175, val_acc=0.8210, val_loss=0.5224329555372274
Epoch 8/10 - loss=0.4954, val_acc=0.8248, val_loss=0.5018308721563447
Epoch 9/10 - loss=0.4781, val_acc=0.8283, val_loss=0.4914017465755068
Epoch 10/10 - loss=0.4654, val_acc=0.8313, val_loss=0.48091041249901095
test accuracy  0.8225


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▃▂▂▂▁▁▁▁▁
validation loss,█▄▃▃▂▂▁▁▁▁
validation_accuracy,▁▄▅▆▇▇████

0,1
epoch,10.0
test_accuracy,0.8225
training_loss,0.46542
validation loss,0.48091
validation_accuracy,0.83133


[34m[1mwandb[0m: Agent Starting Run: 5zx8730j with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.99
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/5 - loss=0.9020, val_acc=0.7730, val_loss=0.610057281006536
Epoch 2/5 - loss=0.5384, val_acc=0.8218, val_loss=0.4829442707304804
Epoch 3/5 - loss=0.4513, val_acc=0.8302, val_loss=0.45001170644972316
Epoch 4/5 - loss=0.4014, val_acc=0.8575, val_loss=0.4005268300549291
Epoch 5/5 - loss=0.3661, val_acc=0.8590, val_loss=0.3811792114907509
test accuracy  0.8523


0,1
epoch,▁▃▅▆█
test_accuracy,▁
training_loss,█▃▂▁▁
validation loss,█▄▃▂▁
validation_accuracy,▁▅▆██

0,1
epoch,5.0
test_accuracy,0.8523
training_loss,0.36614
validation loss,0.38118
validation_accuracy,0.859


[34m[1mwandb[0m: Agent Starting Run: 5tknaq69 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 4
[34m[1mwandb[0m: 	optimizer: nesterov
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/10 - loss=2.3157, val_acc=0.1000, val_loss=2.3032102331168187
Epoch 2/10 - loss=2.3032, val_acc=0.0995, val_loss=2.30239226423234
Epoch 3/10 - loss=2.3028, val_acc=0.1000, val_loss=2.3021379706424585
Epoch 4/10 - loss=2.3022, val_acc=0.0995, val_loss=2.302641309150897
Epoch 5/10 - loss=2.3018, val_acc=0.1123, val_loss=2.3013711621414865
Epoch 6/10 - loss=2.3013, val_acc=0.1950, val_loss=2.3003226623293855
Epoch 7/10 - loss=2.3002, val_acc=0.1750, val_loss=2.299456694578824
Epoch 8/10 - loss=2.2992, val_acc=0.1533, val_loss=2.298512923854352
Epoch 9/10 - loss=2.2973, val_acc=0.0985, val_loss=2.296373831078539
Epoch 10/10 - loss=2.2947, val_acc=0.2242, val_loss=2.2920642177376553
test accuracy  0.2223


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▄▄▄▃▃▃▃▂▁
validation loss,█▇▇█▇▆▆▅▄▁
validation_accuracy,▁▁▁▁▂▆▅▄▁█

0,1
epoch,10.0
test_accuracy,0.2223
training_loss,2.29466
validation loss,2.29206
validation_accuracy,0.22417


[34m[1mwandb[0m: Agent Starting Run: 6ouc1pf2 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 4
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 1/10 - loss=0.9730, val_acc=0.8133, val_loss=0.5126196424382137
Epoch 2/10 - loss=0.7283, val_acc=0.8350, val_loss=0.4531654759102553
Epoch 3/10 - loss=0.6857, val_acc=0.8487, val_loss=0.41402929667097454
Epoch 4/10 - loss=0.6603, val_acc=0.8463, val_loss=0.42001477536955706
Epoch 5/10 - loss=0.6417, val_acc=0.8602, val_loss=0.3848002115904369
Epoch 6/10 - loss=0.6266, val_acc=0.8643, val_loss=0.3753922778270392
Epoch 7/10 - loss=0.6148, val_acc=0.8628, val_loss=0.371569904996635
Epoch 8/10 - loss=0.6040, val_acc=0.8620, val_loss=0.37019161849417614
Epoch 9/10 - loss=0.5949, val_acc=0.8672, val_loss=0.3548978718161686
Epoch 10/10 - loss=0.5864, val_acc=0.8645, val_loss=0.3588347155237947
test accuracy  0.8554


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▄▃▂▂▂▂▁▁▁
validation loss,█▅▄▄▂▂▂▂▁▁
validation_accuracy,▁▄▆▅▇█▇▇██

0,1
epoch,10.0
test_accuracy,0.8554
training_loss,0.58636
validation loss,0.35883
validation_accuracy,0.8645


[34m[1mwandb[0m: Agent Starting Run: bfsqxgb8 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.99
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 4
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 1/5 - loss=0.8717, val_acc=0.8130, val_loss=0.5076352135600393
Epoch 2/5 - loss=0.6021, val_acc=0.8328, val_loss=0.45565970917769966
Epoch 3/5 - loss=0.5615, val_acc=0.8468, val_loss=0.41803876020373043
Epoch 4/5 - loss=0.5376, val_acc=0.8528, val_loss=0.40127543657657283
Epoch 5/5 - loss=0.5200, val_acc=0.8592, val_loss=0.38911527897068177
test accuracy  0.8492


0,1
epoch,▁▃▅▆█
test_accuracy,▁
training_loss,█▃▂▁▁
validation loss,█▅▃▂▁
validation_accuracy,▁▄▆▇█

0,1
epoch,5.0
test_accuracy,0.8492
training_loss,0.51997
validation loss,0.38912
validation_accuracy,0.85917


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 5kcm9eh7 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.99
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/10 - loss=0.7439, val_acc=0.8167, val_loss=0.5026992546209904
Epoch 2/10 - loss=0.4488, val_acc=0.8422, val_loss=0.4338544619512289
Epoch 3/10 - loss=0.4053, val_acc=0.8510, val_loss=0.4139029506437038
Epoch 4/10 - loss=0.3824, val_acc=0.8602, val_loss=0.3863545883312677
Epoch 5/10 - loss=0.3657, val_acc=0.8635, val_loss=0.3731739293147886
Epoch 6/10 - loss=0.3539, val_acc=0.8687, val_loss=0.36290190204502265
Epoch 7/10 - loss=0.3423, val_acc=0.8677, val_loss=0.3593922443346111
Epoch 8/10 - loss=0.3339, val_acc=0.8703, val_loss=0.3457204614723775
Epoch 9/10 - loss=0.3253, val_acc=0.8752, val_loss=0.34064979016511865
Epoch 10/10 - loss=0.3184, val_acc=0.8722, val_loss=0.34480101499089444
test accuracy  0.8669


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▃▂▂▂▂▁▁▁▁
validation loss,█▅▄▃▂▂▂▁▁▁
validation_accuracy,▁▄▅▆▇▇▇▇██

0,1
epoch,10.0
test_accuracy,0.8669
training_loss,0.31843
validation loss,0.3448
validation_accuracy,0.87217


[34m[1mwandb[0m: Agent Starting Run: 45klwkjl with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 4
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 1/10 - loss=2.4359, val_acc=0.0977, val_loss=2.3032058797606467
Epoch 2/10 - loss=2.4299, val_acc=0.0977, val_loss=2.3045741913854116
Epoch 3/10 - loss=2.4234, val_acc=0.0995, val_loss=2.2996684351762497
Epoch 4/10 - loss=2.4160, val_acc=0.0977, val_loss=2.2966353525990466
Epoch 5/10 - loss=2.3999, val_acc=0.1817, val_loss=2.267710741553585
Epoch 6/10 - loss=2.2192, val_acc=0.2793, val_loss=1.7777570248361705
Epoch 7/10 - loss=1.7648, val_acc=0.3515, val_loss=1.5341899277361684
Epoch 8/10 - loss=1.5262, val_acc=0.4890, val_loss=1.3009107777902544
Epoch 9/10 - loss=1.3697, val_acc=0.5350, val_loss=1.1815425336312713
Epoch 10/10 - loss=1.2728, val_acc=0.5637, val_loss=1.0900192569411613
test accuracy  0.5636


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█████▇▄▃▂▁
validation loss,█████▅▄▂▂▁
validation_accuracy,▁▁▁▁▂▄▅▇██

0,1
epoch,10.0
test_accuracy,0.5636
training_loss,1.27284
validation loss,1.09002
validation_accuracy,0.56367


[34m[1mwandb[0m: Agent Starting Run: xutwudn7 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.99
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 4
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/5 - loss=0.4545, val_acc=0.8505, val_loss=0.4007399164304077
Epoch 2/5 - loss=0.3542, val_acc=0.8713, val_loss=0.34238282363498257
Epoch 3/5 - loss=0.3206, val_acc=0.8803, val_loss=0.3323627488947404
Epoch 4/5 - loss=0.2981, val_acc=0.8792, val_loss=0.3265023483876427
Epoch 5/5 - loss=0.2824, val_acc=0.8878, val_loss=0.3079259922592906
test accuracy  0.8764


0,1
epoch,▁▃▅▆█
test_accuracy,▁
training_loss,█▄▃▂▁
validation loss,█▄▃▂▁
validation_accuracy,▁▅▇▆█

0,1
epoch,5.0
test_accuracy,0.8764
training_loss,0.2824
validation loss,0.30793
validation_accuracy,0.88783


[34m[1mwandb[0m: Agent Starting Run: zub0fhcj with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: nesterov
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/5 - loss=0.6659, val_acc=0.8213, val_loss=0.49542081447485686
Epoch 2/5 - loss=0.4596, val_acc=0.8438, val_loss=0.43673648953797145
Epoch 3/5 - loss=0.4167, val_acc=0.8475, val_loss=0.4160090315390941
Epoch 4/5 - loss=0.3919, val_acc=0.8602, val_loss=0.389064541135099
Epoch 5/5 - loss=0.3733, val_acc=0.8620, val_loss=0.3754235826002323
test accuracy  0.8555


0,1
epoch,▁▃▅▆█
test_accuracy,▁
training_loss,█▃▂▁▁
validation loss,█▅▃▂▁
validation_accuracy,▁▅▆██

0,1
epoch,5.0
test_accuracy,0.8555
training_loss,0.37331
validation loss,0.37542
validation_accuracy,0.862


[34m[1mwandb[0m: Agent Starting Run: 2fbh1q86 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.99
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: nesterov
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/10 - loss=1.1705, val_acc=0.7487, val_loss=0.7704718150462689
Epoch 2/10 - loss=0.6524, val_acc=0.7893, val_loss=0.5932840491154239
Epoch 3/10 - loss=0.5418, val_acc=0.8138, val_loss=0.5289603065393529
Epoch 4/10 - loss=0.4890, val_acc=0.8290, val_loss=0.48377038747833284
Epoch 5/10 - loss=0.4560, val_acc=0.8378, val_loss=0.45820906657758964
Epoch 6/10 - loss=0.4329, val_acc=0.8428, val_loss=0.4411679718618387
Epoch 7/10 - loss=0.4145, val_acc=0.8462, val_loss=0.42588116321424796
Epoch 8/10 - loss=0.3981, val_acc=0.8570, val_loss=0.4080193945663923
Epoch 9/10 - loss=0.3863, val_acc=0.8558, val_loss=0.4054220641407827
Epoch 10/10 - loss=0.3759, val_acc=0.8612, val_loss=0.3883531097207951
test accuracy  0.849


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▃▂▂▂▂▁▁▁▁
validation loss,█▅▄▃▂▂▂▁▁▁
validation_accuracy,▁▄▅▆▇▇▇███

0,1
epoch,10.0
test_accuracy,0.849
training_loss,0.37595
validation loss,0.38835
validation_accuracy,0.86117


[34m[1mwandb[0m: Agent Starting Run: jipddqxh with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0.5


Epoch 1/10 - loss=28.4937, val_acc=0.2597, val_loss=2.0999051868460783
Epoch 2/10 - loss=13.8261, val_acc=0.1870, val_loss=2.284823103753756
Epoch 3/10 - loss=7.6395, val_acc=0.0977, val_loss=2.3025791573606194
Epoch 4/10 - loss=4.6795, val_acc=0.0977, val_loss=2.3026167548538177
Epoch 5/10 - loss=3.3027, val_acc=0.0977, val_loss=2.302613979934242
Epoch 6/10 - loss=2.6890, val_acc=0.0977, val_loss=2.3026111196054218
Epoch 7/10 - loss=2.4352, val_acc=0.0977, val_loss=2.302608154055921
Epoch 8/10 - loss=2.3413, val_acc=0.0977, val_loss=2.3026108678437502
Epoch 9/10 - loss=2.3116, val_acc=0.0977, val_loss=2.3026100879068916
Epoch 10/10 - loss=2.3042, val_acc=0.0977, val_loss=2.3026094507739434
test accuracy  0.1


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▄▂▂▁▁▁▁▁▁
validation loss,▁▇████████
validation_accuracy,█▅▁▁▁▁▁▁▁▁

0,1
epoch,10.0
test_accuracy,0.1
training_loss,2.30416
validation loss,2.30261
validation_accuracy,0.09767


[34m[1mwandb[0m: Agent Starting Run: kmgeequ2 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 1/5 - loss=2.3906, val_acc=0.1687, val_loss=2.2949352214382217
Epoch 2/5 - loss=2.3426, val_acc=0.3117, val_loss=2.2848278586077764
Epoch 3/5 - loss=2.3323, val_acc=0.2732, val_loss=2.274036094787746
Epoch 4/5 - loss=2.3207, val_acc=0.2902, val_loss=2.26149130453995
Epoch 5/5 - loss=2.3065, val_acc=0.3397, val_loss=2.245067168780772
test accuracy  0.3373


0,1
epoch,▁▃▅▆█
test_accuracy,▁
training_loss,█▄▃▂▁
validation loss,█▇▅▃▁
validation_accuracy,▁▇▅▆█

0,1
epoch,5.0
test_accuracy,0.3373
training_loss,2.30651
validation loss,2.24507
validation_accuracy,0.33967


[34m[1mwandb[0m: Agent Starting Run: 9iej1nlo with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	weight_decay: 0.5


Epoch 1/5 - loss=152.9682, val_acc=0.0977, val_loss=2.411804891670959
Epoch 2/5 - loss=129.4872, val_acc=0.0977, val_loss=2.350126208463405
Epoch 3/5 - loss=109.7059, val_acc=0.0977, val_loss=2.323962637072413
Epoch 4/5 - loss=93.0165, val_acc=0.0977, val_loss=2.31235407146774
Epoch 5/5 - loss=78.9268, val_acc=0.0977, val_loss=2.3070920918813402
test accuracy  0.1


0,1
epoch,▁▃▅▆█
test_accuracy,▁
training_loss,█▆▄▂▁
validation loss,█▄▂▁▁
validation_accuracy,▁▁▁▁▁

0,1
epoch,5.0
test_accuracy,0.1
training_loss,78.92682
validation loss,2.30709
validation_accuracy,0.09767


[34m[1mwandb[0m: Agent Starting Run: 8blxqaut with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.99
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 4
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0.5


Epoch 1/5 - loss=128.7710, val_acc=0.4423, val_loss=2.2151764524743394
Epoch 2/5 - loss=25.6861, val_acc=0.1012, val_loss=2.3001508929715673
Epoch 3/5 - loss=6.6099, val_acc=0.1012, val_loss=2.302673208290972
Epoch 4/5 - loss=3.0937, val_acc=0.1012, val_loss=2.3026643275303784
Epoch 5/5 - loss=2.4479, val_acc=0.1012, val_loss=2.302641262703668
test accuracy  0.1


0,1
epoch,▁▃▅▆█
test_accuracy,▁
training_loss,█▂▁▁▁
validation loss,▁████
validation_accuracy,█▁▁▁▁

0,1
epoch,5.0
test_accuracy,0.1
training_loss,2.44791
validation loss,2.30264
validation_accuracy,0.10117


[34m[1mwandb[0m: Agent Starting Run: nbjljswu with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.99
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0.5


Epoch 1/10 - loss=20.4237, val_acc=0.0977, val_loss=2.3038956253465046
Epoch 2/10 - loss=4.9443, val_acc=0.1018, val_loss=2.3026870048973564
Epoch 3/10 - loss=2.5374, val_acc=0.1013, val_loss=2.302603441527706
Epoch 4/10 - loss=2.3103, val_acc=0.0995, val_loss=2.3027132212748396
Epoch 5/10 - loss=2.3031, val_acc=0.0995, val_loss=2.302761408026399
Epoch 6/10 - loss=2.3030, val_acc=0.1000, val_loss=2.3026505852481853
Epoch 7/10 - loss=2.3030, val_acc=0.1012, val_loss=2.302701723521713
Epoch 8/10 - loss=2.3029, val_acc=0.1000, val_loss=2.302728293275487
Epoch 9/10 - loss=2.3030, val_acc=0.1012, val_loss=2.302691117935975
Epoch 10/10 - loss=2.3029, val_acc=0.1012, val_loss=2.3025742490623555
test accuracy  0.1


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▂▁▁▁▁▁▁▁▁
validation loss,█▂▁▂▂▁▂▂▂▁
validation_accuracy,▁█▇▄▄▅▇▅▇▇

0,1
epoch,10.0
test_accuracy,0.1
training_loss,2.30293
validation loss,2.30257
validation_accuracy,0.10117


[34m[1mwandb[0m: Agent Starting Run: t6gfz02y with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 1/10 - loss=0.5951, val_acc=0.8435, val_loss=0.4328606449814516
Epoch 2/10 - loss=0.4738, val_acc=0.8427, val_loss=0.41951161812574983
Epoch 3/10 - loss=0.4561, val_acc=0.8615, val_loss=0.38876164522239637
Epoch 4/10 - loss=0.4485, val_acc=0.8395, val_loss=0.4274575940483908
Epoch 5/10 - loss=0.4477, val_acc=0.8622, val_loss=0.37001525633334303
Epoch 6/10 - loss=0.4452, val_acc=0.8618, val_loss=0.3760484605030366
Epoch 7/10 - loss=0.4441, val_acc=0.8575, val_loss=0.3889166459707893
Epoch 8/10 - loss=0.4453, val_acc=0.8623, val_loss=0.38795378431235683
Epoch 9/10 - loss=0.4459, val_acc=0.8652, val_loss=0.3769073367634984
Epoch 10/10 - loss=0.4457, val_acc=0.8640, val_loss=0.3669266598754481
test accuracy  0.86


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▂▂▁▁▁▁▁▁▁
validation loss,█▇▃▇▁▂▃▃▂▁
validation_accuracy,▂▂▇▁▇▇▆▇██

0,1
epoch,10.0
test_accuracy,0.86
training_loss,0.44571
validation loss,0.36693
validation_accuracy,0.864


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: rxuqtebm with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.99
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 4
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 1/5 - loss=2.1852, val_acc=0.3798, val_loss=1.6128695583014694
Epoch 2/5 - loss=1.4527, val_acc=0.5725, val_loss=1.1440136697390753
Epoch 3/5 - loss=1.1696, val_acc=0.6572, val_loss=0.9478935868566216
Epoch 4/5 - loss=1.0210, val_acc=0.7060, val_loss=0.841029011272386
Epoch 5/5 - loss=0.9428, val_acc=0.7282, val_loss=0.7767094219093963
test accuracy  0.7305


0,1
epoch,▁▃▅▆█
test_accuracy,▁
training_loss,█▄▂▁▁
validation loss,█▄▂▂▁
validation_accuracy,▁▅▇██

0,1
epoch,5.0
test_accuracy,0.7305
training_loss,0.94284
validation loss,0.77671
validation_accuracy,0.72817


[34m[1mwandb[0m: Agent Starting Run: k7jrig7x with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 4
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 1/10 - loss=0.8958, val_acc=0.8155, val_loss=0.5456868079712159
Epoch 2/10 - loss=0.5258, val_acc=0.8395, val_loss=0.4531108985505634
Epoch 3/10 - loss=0.4697, val_acc=0.8508, val_loss=0.4153610838132601
Epoch 4/10 - loss=0.4414, val_acc=0.8595, val_loss=0.3992722627255889
Epoch 5/10 - loss=0.4235, val_acc=0.8633, val_loss=0.37812841189490354
Epoch 6/10 - loss=0.4100, val_acc=0.8615, val_loss=0.3785031574863911
Epoch 7/10 - loss=0.4009, val_acc=0.8687, val_loss=0.3651508078230196
Epoch 8/10 - loss=0.3923, val_acc=0.8683, val_loss=0.3617995887670953
Epoch 9/10 - loss=0.3849, val_acc=0.8672, val_loss=0.36319651669151926
Epoch 10/10 - loss=0.3787, val_acc=0.8712, val_loss=0.34816502378531755
test accuracy  0.8651


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▃▂▂▂▁▁▁▁▁
validation loss,█▅▃▃▂▂▂▁▂▁
validation_accuracy,▁▄▅▇▇▇██▇█

0,1
epoch,10.0
test_accuracy,0.8651
training_loss,0.37865
validation loss,0.34817
validation_accuracy,0.87117


[34m[1mwandb[0m: Agent Starting Run: drrt4jqi with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: nesterov
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 1/10 - loss=2.3286, val_acc=0.4788, val_loss=1.7247003044037046
Epoch 2/10 - loss=1.5886, val_acc=0.5930, val_loss=1.154867398545692
Epoch 3/10 - loss=1.2422, val_acc=0.6370, val_loss=0.9495209020628984
Epoch 4/10 - loss=1.0791, val_acc=0.7253, val_loss=0.8104059371547335
Epoch 5/10 - loss=0.9743, val_acc=0.7363, val_loss=0.731014968783156
Epoch 6/10 - loss=0.9176, val_acc=0.7537, val_loss=0.6883078458508846
Epoch 7/10 - loss=0.8813, val_acc=0.7623, val_loss=0.65786346303495
Epoch 8/10 - loss=0.8532, val_acc=0.7750, val_loss=0.6312242724192679
Epoch 9/10 - loss=0.8290, val_acc=0.7760, val_loss=0.610993171902238
Epoch 10/10 - loss=0.8070, val_acc=0.7817, val_loss=0.5906480904410929
test accuracy  0.7808


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▅▃▂▂▂▁▁▁▁
validation loss,█▄▃▂▂▂▁▁▁▁
validation_accuracy,▁▄▅▇▇▇████

0,1
epoch,10.0
test_accuracy,0.7808
training_loss,0.80699
validation loss,0.59065
validation_accuracy,0.78167


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: y2ro3sd9 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 1/5 - loss=0.6723, val_acc=0.8445, val_loss=0.42365148436079864
Epoch 2/5 - loss=0.5523, val_acc=0.8487, val_loss=0.44211366317713197
Epoch 3/5 - loss=0.5147, val_acc=0.8578, val_loss=0.38822879046530395
Epoch 4/5 - loss=0.4942, val_acc=0.8650, val_loss=0.3754659202737542
Epoch 5/5 - loss=0.4795, val_acc=0.8632, val_loss=0.3728902203868882
test accuracy  0.8563


0,1
epoch,▁▃▅▆█
test_accuracy,▁
training_loss,█▄▂▂▁
validation loss,▆█▃▁▁
validation_accuracy,▁▂▆█▇

0,1
epoch,5.0
test_accuracy,0.8563
training_loss,0.47945
validation loss,0.37289
validation_accuracy,0.86317


[34m[1mwandb[0m: Agent Starting Run: f87xxqoj with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0.5


Epoch 1/5 - loss=30.4415, val_acc=0.0977, val_loss=2.302858843106043
Epoch 2/5 - loss=3.2504, val_acc=0.1018, val_loss=2.3027532173210803
Epoch 3/5 - loss=2.3348, val_acc=0.1013, val_loss=2.3029450517591887
Epoch 4/5 - loss=2.3042, val_acc=0.0985, val_loss=2.3029858769476363
Epoch 5/5 - loss=2.3032, val_acc=0.0977, val_loss=2.3027969971346516
test accuracy  0.1


0,1
epoch,▁▃▅▆█
test_accuracy,▁
training_loss,█▁▁▁▁
validation loss,▄▁▇█▂
validation_accuracy,▁█▇▂▁

0,1
epoch,5.0
test_accuracy,0.1
training_loss,2.30317
validation loss,2.3028
validation_accuracy,0.09767


[34m[1mwandb[0m: Agent Starting Run: cgdfyx0x with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.99
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/5 - loss=0.7802, val_acc=0.8187, val_loss=0.5145951388510943
Epoch 2/5 - loss=0.4609, val_acc=0.8388, val_loss=0.44310659371847383
Epoch 3/5 - loss=0.4093, val_acc=0.8517, val_loss=0.40907662378758897
Epoch 4/5 - loss=0.3819, val_acc=0.8553, val_loss=0.39421180178255083
Epoch 5/5 - loss=0.3629, val_acc=0.8623, val_loss=0.37854985409963204
test accuracy  0.8598


0,1
epoch,▁▃▅▆█
test_accuracy,▁
training_loss,█▃▂▁▁
validation loss,█▄▃▂▁
validation_accuracy,▁▄▆▇█

0,1
epoch,5.0
test_accuracy,0.8598
training_loss,0.36293
validation loss,0.37855
validation_accuracy,0.86233


[34m[1mwandb[0m: Agent Starting Run: 20dlzybb with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 4
[34m[1mwandb[0m: 	optimizer: nesterov
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 1/10 - loss=2.3812, val_acc=0.1000, val_loss=2.296204657807809
Epoch 2/10 - loss=2.3543, val_acc=0.4233, val_loss=2.273477796346443
Epoch 3/10 - loss=2.2773, val_acc=0.2433, val_loss=2.0598365155284286
Epoch 4/10 - loss=1.8568, val_acc=0.3340, val_loss=1.5746356948254554
Epoch 5/10 - loss=1.5515, val_acc=0.4552, val_loss=1.3821994024648752
Epoch 6/10 - loss=1.4109, val_acc=0.5237, val_loss=1.2649309414208165
Epoch 7/10 - loss=1.3065, val_acc=0.5890, val_loss=1.1505908709539776
Epoch 8/10 - loss=1.1898, val_acc=0.6148, val_loss=1.0328391070222978
Epoch 9/10 - loss=1.0994, val_acc=0.6368, val_loss=0.9561902325615341
Epoch 10/10 - loss=1.0300, val_acc=0.6687, val_loss=0.8830060679599829
test accuracy  0.6714


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,██▇▅▄▃▂▂▁▁
validation loss,██▇▄▃▃▂▂▁▁
validation_accuracy,▁▅▃▄▅▆▇▇██

0,1
epoch,10.0
test_accuracy,0.6714
training_loss,1.03004
validation loss,0.88301
validation_accuracy,0.66867


[34m[1mwandb[0m: Agent Starting Run: hylo2itp with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	weight_decay: 0.5


Epoch 1/5 - loss=95.2166, val_acc=0.4775, val_loss=1.7800650111483203
Epoch 2/5 - loss=80.5366, val_acc=0.5600, val_loss=1.6394115057582985
Epoch 3/5 - loss=68.3172, val_acc=0.5957, val_loss=1.6025541991826528
Epoch 4/5 - loss=58.0677, val_acc=0.6095, val_loss=1.6122791613616452
Epoch 5/5 - loss=49.4468, val_acc=0.6157, val_loss=1.645091694031292
test accuracy  0.6202


0,1
epoch,▁▃▅▆█
test_accuracy,▁
training_loss,█▆▄▂▁
validation loss,█▂▁▁▃
validation_accuracy,▁▅▇██

0,1
epoch,5.0
test_accuracy,0.6202
training_loss,49.44676
validation loss,1.64509
validation_accuracy,0.61567


[34m[1mwandb[0m: Agent Starting Run: 63xbxgwu with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.99
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 4
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 1/5 - loss=0.7434, val_acc=0.8345, val_loss=0.45199741271838245
Epoch 2/5 - loss=0.5438, val_acc=0.8587, val_loss=0.3903618507580223
Epoch 3/5 - loss=0.5050, val_acc=0.8573, val_loss=0.3841422179686083
Epoch 4/5 - loss=0.4808, val_acc=0.8645, val_loss=0.3668719768560803
Epoch 5/5 - loss=0.4627, val_acc=0.8707, val_loss=0.34833192408118596
test accuracy  0.8629


0,1
epoch,▁▃▅▆█
test_accuracy,▁
training_loss,█▃▂▁▁
validation loss,█▄▃▂▁
validation_accuracy,▁▆▅▇█

0,1
epoch,5.0
test_accuracy,0.8629
training_loss,0.46268
validation loss,0.34833
validation_accuracy,0.87067


[34m[1mwandb[0m: Agent Starting Run: lcucnhro with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 4
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0.5


Epoch 1/10 - loss=47.2151, val_acc=0.0985, val_loss=2.3042188020521883
Epoch 2/10 - loss=21.5474, val_acc=0.1013, val_loss=2.302614923564291
Epoch 3/10 - loss=10.5523, val_acc=0.0977, val_loss=2.302651333806653
Epoch 4/10 - loss=5.8391, val_acc=0.0977, val_loss=2.3026271815029657
Epoch 5/10 - loss=3.8187, val_acc=0.0977, val_loss=2.30259878823933
Epoch 6/10 - loss=2.9526, val_acc=0.1013, val_loss=2.3025762006257895
Epoch 7/10 - loss=2.5813, val_acc=0.0977, val_loss=2.302611931004415
Epoch 8/10 - loss=2.4221, val_acc=0.0977, val_loss=2.3026153595544825
Epoch 9/10 - loss=2.3539, val_acc=0.1000, val_loss=2.302624077130353
Epoch 10/10 - loss=2.3246, val_acc=0.0995, val_loss=2.3026167583474475
test accuracy  0.1


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▄▂▂▁▁▁▁▁▁
validation loss,█▁▁▁▁▁▁▁▁▁
validation_accuracy,▃█▁▁▁█▁▁▅▅

0,1
epoch,10.0
test_accuracy,0.1
training_loss,2.32464
validation loss,2.30262
validation_accuracy,0.0995


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: aghp1nu5 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.99
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: nesterov
[34m[1mwandb[0m: 	weight_decay: 0.5


Epoch 1/10 - loss=14.8764, val_acc=0.1013, val_loss=2.3026365843442704
Epoch 2/10 - loss=2.3047, val_acc=0.1013, val_loss=2.3025933760858552
Epoch 3/10 - loss=2.3027, val_acc=0.0977, val_loss=2.3026224470486745
Epoch 4/10 - loss=2.3027, val_acc=0.0977, val_loss=2.302621573233131
Epoch 5/10 - loss=2.3026, val_acc=0.1013, val_loss=2.3026189138527973
Epoch 6/10 - loss=2.3026, val_acc=0.0977, val_loss=2.3026189430427864
Epoch 7/10 - loss=2.3027, val_acc=0.0977, val_loss=2.302625888025992
Epoch 8/10 - loss=2.3026, val_acc=0.0977, val_loss=2.3026345674290707
Epoch 9/10 - loss=2.3026, val_acc=0.0977, val_loss=2.3026247606846986
Epoch 10/10 - loss=2.3026, val_acc=0.0977, val_loss=2.302621685659217
test accuracy  0.1


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▁▁▁▁▁▁▁▁▁
validation loss,█▁▆▆▅▅▆█▆▆
validation_accuracy,██▁▁█▁▁▁▁▁

0,1
epoch,10.0
test_accuracy,0.1
training_loss,2.30265
validation loss,2.30262
validation_accuracy,0.09767


[34m[1mwandb[0m: Agent Starting Run: 0xhlpv37 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/10 - loss=1.4033, val_acc=0.5407, val_loss=1.0104150807420715
Epoch 2/10 - loss=0.9135, val_acc=0.7040, val_loss=0.8198196181755584
Epoch 3/10 - loss=0.7019, val_acc=0.7743, val_loss=0.6492770275066525
Epoch 4/10 - loss=0.6010, val_acc=0.7905, val_loss=0.5971239895680953
Epoch 5/10 - loss=0.5605, val_acc=0.7985, val_loss=0.5799633708933367
Epoch 6/10 - loss=0.5402, val_acc=0.7992, val_loss=0.5798825372127182
Epoch 7/10 - loss=0.5211, val_acc=0.8073, val_loss=0.5614215859990448
Epoch 8/10 - loss=0.5107, val_acc=0.8100, val_loss=0.5518388797980307
Epoch 9/10 - loss=0.5012, val_acc=0.8087, val_loss=0.5510397742645318
Epoch 10/10 - loss=0.4918, val_acc=0.8098, val_loss=0.5492044562118099
test accuracy  0.8079


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▄▃▂▂▁▁▁▁▁
validation loss,█▅▃▂▁▁▁▁▁▁
validation_accuracy,▁▅▇▇██████

0,1
epoch,10.0
test_accuracy,0.8079
training_loss,0.49176
validation loss,0.5492
validation_accuracy,0.80983


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: p2wa62wx with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 4
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0.5


Epoch 1/10 - loss=7.5630, val_acc=0.1000, val_loss=2.3027340524116036
Epoch 2/10 - loss=2.3029, val_acc=0.1018, val_loss=2.3025894246413694
Epoch 3/10 - loss=2.3030, val_acc=0.0995, val_loss=2.302820816753907
Epoch 4/10 - loss=2.3036, val_acc=0.0977, val_loss=2.3027914770238787
Epoch 5/10 - loss=2.3032, val_acc=0.0977, val_loss=2.302806685213464
Epoch 6/10 - loss=2.3033, val_acc=0.1000, val_loss=2.302598155760871
Epoch 7/10 - loss=2.3032, val_acc=0.1023, val_loss=2.302591879423971
Epoch 8/10 - loss=2.3034, val_acc=0.1012, val_loss=2.3028602107363803
Epoch 9/10 - loss=2.3032, val_acc=0.1013, val_loss=2.302702958334325
Epoch 10/10 - loss=2.3037, val_acc=0.1012, val_loss=2.302725566439423
test accuracy  0.1


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▁▁▁▁▁▁▁▁▁
validation loss,▅▁▇▆▇▁▁█▄▅
validation_accuracy,▅▇▄▁▁▅█▆▇▆

0,1
epoch,10.0
test_accuracy,0.1
training_loss,2.30369
validation loss,2.30273
validation_accuracy,0.10117


[34m[1mwandb[0m: Agent Starting Run: tegq2llk with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.99
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 4
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0


Epoch 1/5 - loss=1.7306, val_acc=0.5438, val_loss=1.1806339629776827
Epoch 2/5 - loss=0.9892, val_acc=0.6722, val_loss=0.8678784964829244
Epoch 3/5 - loss=0.7773, val_acc=0.7338, val_loss=0.7369658382770962
Epoch 4/5 - loss=0.6845, val_acc=0.7555, val_loss=0.6775051110393526
Epoch 5/5 - loss=0.6317, val_acc=0.7712, val_loss=0.6303498820234652
test accuracy  0.7666


0,1
epoch,▁▃▅▆█
test_accuracy,▁
training_loss,█▃▂▁▁
validation loss,█▄▂▂▁
validation_accuracy,▁▅▇██

0,1
epoch,5.0
test_accuracy,0.7666
training_loss,0.63172
validation loss,0.63035
validation_accuracy,0.77117


[34m[1mwandb[0m: Agent Starting Run: li9ekeof with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.99
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 4
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 1/10 - loss=2.4352, val_acc=0.1000, val_loss=2.303848310421912
Epoch 2/10 - loss=2.4314, val_acc=0.1018, val_loss=2.30335548281317
Epoch 3/10 - loss=2.4286, val_acc=0.0995, val_loss=2.3028956337973336
Epoch 4/10 - loss=2.4258, val_acc=0.0977, val_loss=2.302443665491736
Epoch 5/10 - loss=2.4223, val_acc=0.1000, val_loss=2.3013480053498014
Epoch 6/10 - loss=2.4197, val_acc=0.1013, val_loss=2.2989749397537285
Epoch 7/10 - loss=2.4158, val_acc=0.1230, val_loss=2.299001314704211
Epoch 8/10 - loss=2.4114, val_acc=0.0977, val_loss=2.2937725712257073
Epoch 9/10 - loss=2.4048, val_acc=0.1013, val_loss=2.2855419965348154
Epoch 10/10 - loss=2.3916, val_acc=0.1000, val_loss=2.2662700845054857
test accuracy  0.1


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_loss,█▇▇▆▆▆▅▄▃▁
validation loss,█████▇▇▆▅▁
validation_accuracy,▂▂▂▁▂▂█▁▂▂

0,1
epoch,10.0
test_accuracy,0.1
training_loss,2.39158
validation loss,2.26627
validation_accuracy,0.1


[34m[1mwandb[0m: Agent Starting Run: nuddm3nm with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.99
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0.5


Epoch 1/5 - loss=5.3494, val_acc=0.0977, val_loss=2.302699415573264
Epoch 2/5 - loss=2.3028, val_acc=0.0977, val_loss=2.3027081691802005
Epoch 3/5 - loss=2.3027, val_acc=0.1013, val_loss=2.3026405472652374
Epoch 4/5 - loss=2.3028, val_acc=0.0977, val_loss=2.3027341962743875
Epoch 5/5 - loss=2.3028, val_acc=0.0977, val_loss=2.3026691067141667
test accuracy  0.1


0,1
epoch,▁▃▅▆█
test_accuracy,▁
training_loss,█▁▁▁▁
validation loss,▅▆▁█▃
validation_accuracy,▁▁█▁▁

0,1
epoch,5.0
test_accuracy,0.1
training_loss,2.30279
validation loss,2.30267
validation_accuracy,0.09767


[34m[1mwandb[0m: Agent Starting Run: vwgf7rdm with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.99
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 1/5 - loss=0.6291, val_acc=0.8145, val_loss=0.4617105904462595
Epoch 2/5 - loss=0.5075, val_acc=0.8460, val_loss=0.40847044732482973
Epoch 3/5 - loss=0.4728, val_acc=0.8548, val_loss=0.387610026505924
Epoch 4/5 - loss=0.4503, val_acc=0.8542, val_loss=0.38147729664676056
Epoch 5/5 - loss=0.4355, val_acc=0.8713, val_loss=0.3373712979499467
test accuracy  0.865


0,1
epoch,▁▃▅▆█
test_accuracy,▁
training_loss,█▄▂▂▁
validation loss,█▅▄▃▁
validation_accuracy,▁▅▆▆█

0,1
epoch,5.0
test_accuracy,0.865
training_loss,0.43546
validation loss,0.33737
validation_accuracy,0.87133


[34m[1mwandb[0m: Agent Starting Run: uaodd0uy with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	beta_rms: 0.9
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 1/5 - loss=0.8189, val_acc=0.8330, val_loss=0.47288786305722436
Epoch 2/5 - loss=0.5876, val_acc=0.8490, val_loss=0.42048123166841916
Epoch 3/5 - loss=0.5446, val_acc=0.8553, val_loss=0.40074556277352696
Epoch 4/5 - loss=0.5177, val_acc=0.8617, val_loss=0.37286030160037376
