# Question-4

In [1]:
import numpy as np
import wandb

# Optimizer Class 
class Optimizer:
    def __init__(self, method="sgd", learning_rate=0.01, beta1=0.9, beta2=0.999, epsilon=1e-8):
        self.method = method
        self.lr = learning_rate
        self.beta1 = beta1
        self.beta2 = beta2
        self.epsilon = epsilon
        self.velocity = {}
        self.cache = {}
        self.m = {}
        self.v = {}
        self.t = 0

    def update(self, weights, grads, key):
        if self.method == "sgd":
            return weights - self.lr * grads

        elif self.method == "momentum":
            if key not in self.velocity:
                self.velocity[key] = np.zeros_like(weights)
            self.velocity[key] = self.beta1 * self.velocity[key] - self.lr * grads
            return weights + self.velocity[key]

        elif self.method == "nag":
            if key not in self.velocity:
                self.velocity[key] = np.zeros_like(weights)
            lookahead = weights + self.beta1 * self.velocity[key]
            self.velocity[key] = self.beta1 * self.velocity[key] - self.lr * grads
            return lookahead + self.velocity[key]

        elif self.method == "rmsprop":
            if key not in self.cache:
                self.cache[key] = np.zeros_like(weights)
            self.cache[key] = self.beta1 * self.cache[key] + (1 - self.beta1) * (grads ** 2)
            return weights - self.lr * grads / (np.sqrt(self.cache[key]) + self.epsilon)

        elif self.method == "adam":
            self.t += 1
            if key not in self.m:
                self.m[key] = np.zeros_like(weights)
                self.v[key] = np.zeros_like(weights)
            self.m[key] = self.beta1 * self.m[key] + (1 - self.beta1) * grads
            self.v[key] = self.beta2 * self.v[key] + (1 - self.beta2) * (grads ** 2)
            m_hat = self.m[key] / (1 - self.beta1 ** self.t)
            v_hat = self.v[key] / (1 - self.beta2 ** self.t)
            return weights - self.lr * m_hat / (np.sqrt(v_hat) + self.epsilon)

def load_data():
    from keras.datasets import fashion_mnist  
    (X_train, y_train), (X_test, y_test) = fashion_mnist.load_data()
    X_train, X_test = X_train / 255.0, X_test / 255.0  # Normalize
    X_train = X_train.reshape(X_train.shape[0], -1)
    X_test = X_test.reshape(X_test.shape[0], -1)
    
    # Split 10% of training data for validation
    val_size = int(0.1 * X_train.shape[0])
    X_val, y_val = X_train[:val_size], y_train[:val_size]
    X_train, y_train = X_train[val_size:], y_train[val_size:]
    
    return X_train, y_train, X_val, y_val, X_test, y_test 

def initialize_weights(layers, init_type):
    weights = {}
    for i in range(len(layers) - 1):
        if init_type == "xavier":
            weights[f'W{i+1}'] = np.random.randn(layers[i], layers[i+1]) / np.sqrt(layers[i])
        elif init_type == "he":
            weights[f'W{i+1}'] = np.random.randn(layers[i], layers[i+1]) * np.sqrt(2 / layers[i])
        else:
            weights[f'W{i+1}'] = np.random.randn(layers[i], layers[i+1]) * 0.1  # Increase from 0.01
        weights[f'b{i+1}'] = np.zeros((1, layers[i+1]))
    return weights
    
def softmax(Z):
    expZ = np.exp(Z - np.max(Z, axis=1, keepdims=True)) 
    return expZ / np.sum(expZ, axis=1, keepdims=True)

def forward_pass(X, weights, activation):
    Z, A = {}, {"A0": X}
    for i in range(1, len(weights) // 2):
        Z[f'Z{i}'] = np.dot(A[f'A{i-1}'], weights[f'W{i}']) + weights[f'b{i}']
        A[f'A{i}'] = np.maximum(0, Z[f'Z{i}']) if activation == "relu" else 1 / (1 + np.exp(-Z[f'Z{i}']))

    # Final layer (Softmax for classification)
    last_layer = len(weights) // 2
    Z[f'Z{last_layer}'] = np.dot(A[f'A{last_layer-1}'], weights[f'W{last_layer}']) + weights[f'b{last_layer}']
    A[f'A{last_layer}'] = softmax(Z[f'Z{last_layer}'])
    
    return Z, A

def backward_pass(X, Y, weights, A, Z, activation, weight_decay):
    grads = {}
    m = X.shape[0]
    dA = A[f'A{len(A)-1}'] - Y 

    for i in reversed(range(1, len(weights) // 2 + 1)):
        if activation == "relu":
            dZ = dA * (A[f'A{i}'] > 0)
        elif activation == "sigmoid":
            dZ = dA * A[f'A{i}'] * (1 - A[f'A{i}'])
        elif activation == "tanh":
            dZ = dA * (1 - A[f'A{i}'] ** 2)  # tanh derivative: 1 - tanh^2(x)
        else:
            raise ValueError("Unsupported activation function")

        grads[f'dW{i}'] = np.dot(A[f'A{i-1}'].T, dZ) / m + weight_decay * weights[f'W{i}']
        grads[f'db{i}'] = np.sum(dZ, axis=0, keepdims=True) / m
        dA = np.dot(dZ, weights[f'W{i}'].T)
    
    return grads


def apply_optimizer(optimizer, weights, grads):
    for i in range(1, len(weights) // 2 + 1):
        weights[f'W{i}'] = optimizer.update(weights[f'W{i}'], grads[f'dW{i}'], f'W{i}')
        weights[f'b{i}'] = optimizer.update(weights[f'b{i}'], grads[f'db{i}'], f'b{i}')

def compute_loss(Y, A, loss_function):
    m = Y.shape[0]
    if loss_function == "cross_entropy":
        return -np.sum(Y * np.log(A + 1e-8)) / m
    else:
        return np.mean((Y - A) ** 2) # MSE

def compute_accuracy(Y, A):
    return np.mean(np.argmax(Y, axis=1) == np.argmax(A, axis=1))

def train(config=None):
    with wandb.init(config=config, reinit=True):
        config = wandb.config
        wandb.run.name = (
            "_hl_" + str(config.num_layers) +
            "_hn_" + str(config.hidden_size) +
            "_opt_" + config.optimizer +
            "_act_" + config.activation +
            "_lr_" + str(config.learning_rate) +
            "_bs_" + str(config.batch_size) +
            "_init_" + config.weight_init +
            "_ep_" + str(config.epochs) +
            "_l2_" + str(config.weight_decay)
        )
        
        X_train, y_train, X_val, y_val, _, _ = load_data()
        num_classes = 10
        y_train, y_val = [np.eye(num_classes)[y] for y in [y_train, y_val]]
        
        layers = [784] + [config.hidden_size] * config.num_layers + [num_classes]
        weights = initialize_weights(layers, config.weight_init)
        
        optimizer = Optimizer(method=config.optimizer, learning_rate=config.learning_rate)
        
        for epoch in range(config.epochs):
            for i in range(0, X_train.shape[0], config.batch_size):
                X_batch = X_train[i:i + config.batch_size]
                y_batch = y_train[i:i + config.batch_size]
                
                Z, A = forward_pass(X_batch, weights, config.activation)
                grads = backward_pass(X_batch, y_batch, weights, A, Z, config.activation, config.weight_decay)
                apply_optimizer(optimizer, weights, grads)
                
            Z_train, A_train = forward_pass(X_train, weights, config.activation)
            train_loss = compute_loss(y_train, A_train[f'A{len(A_train)-1}'], config.loss)
            train_acc = compute_accuracy(y_train, A_train[f'A{len(A_train)-1}'])

            Z_val, A_val = forward_pass(X_val, weights, config.activation)
            val_loss = compute_loss(y_val, A_val[f'A{len(A_val)-1}'], config.loss)
            val_acc = compute_accuracy(y_val, A_val[f'A{len(A_val)-1}'])
            
            wandb.log({"epoch": epoch + 1, "train_loss": train_loss, "train_accuracy": train_acc,
                       "val_loss": val_loss, "val_accuracy": val_acc})

sweep_config = {
    "method": "bayes",
    "metric": {"name": "val_loss", "goal": "minimize"},
    "parameters": {
        "activation": {"values": ["relu", "sigmoid", "tanh"]},
        "batch_size": {"values": [32, 64, 128]},
        "epochs": {"values": [5, 10, 20]},
        "hidden_size": {"values": [32, 64, 128]},
        "learning_rate": {"values": [0.01, 0.001, 0.0001]},
        "num_layers": {"values": [2, 3, 4]},
        "optimizer": {"values": ["sgd", "momentum","nag","adam", "rmsprop"]},
        "weight_decay": {"values": [0.0001, 0.0005, 0.001]},
        "weight_init": {"values": ["random", "xavier"]},
        "loss": {"values": ["cross_entropy"]}
    }}

sweep_id = wandb.sweep(sweep_config, project="fashion-mnist-dataset")
wandb.agent(sweep_id, function=train, count=50)

wandb: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


Create sweep with ID: f4yt29uq
Sweep URL: https://wandb.ai/ma23c014-indian-institute-of-technology-madras/fashion-mnist-dataset/sweeps/f4yt29uq


wandb: Agent Starting Run: 6kw719nc with config:
wandb: 	activation: relu
wandb: 	batch_size: 128
wandb: 	epochs: 10
wandb: 	hidden_size: 32
wandb: 	learning_rate: 0.01
wandb: 	loss: cross_entropy
wandb: 	num_layers: 4
wandb: 	optimizer: momentum
wandb: 	weight_decay: 0.0005
wandb: 	weight_init: random
wandb: Currently logged in as: ma23c014 (ma23c014-indian-institute-of-technology-madras) to https://api.wandb.ai. Use `wandb login --relogin` to force relogin


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▄▄▆▆▇▇▇██
train_loss,█▅▅▄▃▂▂▂▁▁
val_accuracy,▁▄▅▆▆▇▇▇██
val_loss,█▅▅▃▃▂▂▂▁▁

0,1
epoch,10.0
train_accuracy,0.86219
train_loss,0.37697
val_accuracy,0.851
val_loss,0.41279


wandb: Agent Starting Run: g3vfy0k3 with config:
wandb: 	activation: tanh
wandb: 	batch_size: 64
wandb: 	epochs: 20
wandb: 	hidden_size: 64
wandb: 	learning_rate: 0.001
wandb: 	loss: cross_entropy
wandb: 	num_layers: 3
wandb: 	optimizer: rmsprop
wandb: 	weight_decay: 0.0005
wandb: 	weight_init: random


  A[f'A{i}'] = np.maximum(0, Z[f'Z{i}']) if activation == "relu" else 1 / (1 + np.exp(-Z[f'Z{i}']))


0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train_accuracy,▁▂▄▅▆▆▆▇▇███▇▇▇▇▇███
train_loss,█▇▅▄▃▃▃▃▂▂▂▁▂▂▂▂▂▁▁▁
val_accuracy,▁▃▄▅▆▆▆▇▇███▇▇▇▇▇███
val_loss,█▇▅▃▃▃▃▂▂▂▁▁▁▂▂▂▂▁▁▁

0,1
epoch,20.0
train_accuracy,0.73654
train_loss,0.73798
val_accuracy,0.73833
val_loss,0.75304


wandb: Sweep Agent: Waiting for job.
wandb: Job received.
wandb: Agent Starting Run: ranufayz with config:
wandb: 	activation: sigmoid
wandb: 	batch_size: 64
wandb: 	epochs: 20
wandb: 	hidden_size: 128
wandb: 	learning_rate: 0.0001
wandb: 	loss: cross_entropy
wandb: 	num_layers: 2
wandb: 	optimizer: sgd
wandb: 	weight_decay: 0.0005
wandb: 	weight_init: random


0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train_accuracy,████▇▇▆▅▅▄▃▂▂▁▁▁▁▁▁▁
train_loss,█▇▇▆▅▅▄▄▄▃▃▃▂▂▂▂▂▁▁▁
val_accuracy,█████▇▇▆▅▅▅▅▄▃▃▂▁▁▂▂
val_loss,█▇▇▆▅▅▄▄▄▃▃▃▂▂▂▂▂▁▁▁

0,1
epoch,20.0
train_accuracy,0.09469
train_loss,2.39926
val_accuracy,0.08783
val_loss,2.40403


wandb: Agent Starting Run: xh7chx3r with config:
wandb: 	activation: tanh
wandb: 	batch_size: 128
wandb: 	epochs: 10
wandb: 	hidden_size: 128
wandb: 	learning_rate: 0.0001
wandb: 	loss: cross_entropy
wandb: 	num_layers: 3
wandb: 	optimizer: adam
wandb: 	weight_decay: 0.001
wandb: 	weight_init: xavier


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▃▅▅▅█▇▇▇█
train_loss,█▅▄▃▃▂▂▂▁▁
val_accuracy,▁▃▅▅▆█▆▇▇█
val_loss,█▅▄▃▃▂▂▂▁▁

0,1
epoch,10.0
train_accuracy,0.54759
train_loss,1.11113
val_accuracy,0.55017
val_loss,1.11022


wandb: Agent Starting Run: 12yq54e1 with config:
wandb: 	activation: sigmoid
wandb: 	batch_size: 128
wandb: 	epochs: 10
wandb: 	hidden_size: 128
wandb: 	learning_rate: 0.0001
wandb: 	loss: cross_entropy
wandb: 	num_layers: 4
wandb: 	optimizer: sgd
wandb: 	weight_decay: 0.0001
wandb: 	weight_init: random


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▁▁▁▁▁▁▁▁▁
train_loss,█▇▆▅▅▄▃▂▂▁
val_accuracy,▁▁▁▁▁▁▁▁▁▁
val_loss,█▇▆▅▅▄▃▂▂▁

0,1
epoch,10.0
train_accuracy,0.09996
train_loss,2.43683
val_accuracy,0.10033
val_loss,2.4276


wandb: Agent Starting Run: c06iauwh with config:
wandb: 	activation: relu
wandb: 	batch_size: 128
wandb: 	epochs: 20
wandb: 	hidden_size: 64
wandb: 	learning_rate: 0.001
wandb: 	loss: cross_entropy
wandb: 	num_layers: 3
wandb: 	optimizer: nag
wandb: 	weight_decay: 0.001
wandb: 	weight_init: xavier


0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train_accuracy,▁▄▅▅▆▆▆▇▇▇▇▇▇▇██████
train_loss,█▅▄▄▃▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁
val_accuracy,▁▄▅▆▆▆▇▇▇▇▇▇████████
val_loss,█▅▄▄▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁

0,1
epoch,20.0
train_accuracy,0.87959
train_loss,0.33821
val_accuracy,0.87033
val_loss,0.3638


wandb: Agent Starting Run: bk56c4t4 with config:
wandb: 	activation: relu
wandb: 	batch_size: 128
wandb: 	epochs: 20
wandb: 	hidden_size: 32
wandb: 	learning_rate: 0.01
wandb: 	loss: cross_entropy
wandb: 	num_layers: 4
wandb: 	optimizer: rmsprop
wandb: 	weight_decay: 0.001
wandb: 	weight_init: xavier


0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train_accuracy,▁▃▇▃▃▆█▂▄▄▇▆▂▃▁▄▄▃▃▃
train_loss,▆▅▂▅▆▂▁█▅▄▂▃▆▆▇▄▅▅█▆
val_accuracy,▂▃▇▃▃▆█▂▄▄█▆▂▃▁▅▄▄▃▃
val_loss,▆▅▂▅▆▂▁█▅▄▂▃▇▆▇▄▅▅█▆

0,1
epoch,20.0
train_accuracy,0.77293
train_loss,0.64859
val_accuracy,0.7795
val_loss,0.65397


wandb: Sweep Agent: Waiting for job.
wandb: Job received.
wandb: Agent Starting Run: p1k1x3z8 with config:
wandb: 	activation: relu
wandb: 	batch_size: 128
wandb: 	epochs: 20
wandb: 	hidden_size: 32
wandb: 	learning_rate: 0.001
wandb: 	loss: cross_entropy
wandb: 	num_layers: 4
wandb: 	optimizer: rmsprop
wandb: 	weight_decay: 0.001
wandb: 	weight_init: random


0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train_accuracy,▁▃▅▅▆▆▆▇▇▇▇▇▇▇▇▇▇███
train_loss,█▆▅▄▃▃▃▂▂▂▂▂▂▂▂▂▁▁▁▁
val_accuracy,▁▄▅▆▆▆▇▇▇▇▇▇▇▇██████
val_loss,█▅▄▄▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁

0,1
epoch,20.0
train_accuracy,0.89346
train_loss,0.29028
val_accuracy,0.87617
val_loss,0.33709


wandb: Agent Starting Run: e4p2idl3 with config:
wandb: 	activation: relu
wandb: 	batch_size: 128
wandb: 	epochs: 10
wandb: 	hidden_size: 64
wandb: 	learning_rate: 0.001
wandb: 	loss: cross_entropy
wandb: 	num_layers: 3
wandb: 	optimizer: adam
wandb: 	weight_decay: 0.001
wandb: 	weight_init: xavier


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▄▆▆▇▇████
train_loss,█▅▃▃▂▂▁▁▁▁
val_accuracy,▁▆▇▇▇██▇▇█
val_loss,█▄▂▂▂▁▁▁▁▁

0,1
epoch,10.0
train_accuracy,0.88991
train_loss,0.30085
val_accuracy,0.87583
val_loss,0.33927


wandb: Agent Starting Run: vkl3xyhh with config:
wandb: 	activation: relu
wandb: 	batch_size: 128
wandb: 	epochs: 10
wandb: 	hidden_size: 32
wandb: 	learning_rate: 0.01
wandb: 	loss: cross_entropy
wandb: 	num_layers: 3
wandb: 	optimizer: rmsprop
wandb: 	weight_decay: 0.001
wandb: 	weight_init: xavier


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▆▆▁█▇▇▃▆▅▁
train_loss,▃▃█▁▂▂▅▃▃▇
val_accuracy,▆▇▁█▇█▄▆▄▁
val_loss,▃▃█▁▂▂▅▃▃▇

0,1
epoch,10.0
train_accuracy,0.77015
train_loss,0.60044
val_accuracy,0.77117
val_loss,0.60594


wandb: Agent Starting Run: ib9dbxfe with config:
wandb: 	activation: relu
wandb: 	batch_size: 64
wandb: 	epochs: 20
wandb: 	hidden_size: 32
wandb: 	learning_rate: 0.01
wandb: 	loss: cross_entropy
wandb: 	num_layers: 4
wandb: 	optimizer: adam
wandb: 	weight_decay: 0.001
wandb: 	weight_init: random


0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train_accuracy,▁▆▃▂▄▁▅▅▆▄▄▅█▅▇▆▇▆▇▃
train_loss,▇▄▇▇▅█▅▅▄▆▄▃▁▃▂▃▂▃▁▇
val_accuracy,▂▅▃▂▄▁▄▅▅▄▄▅█▅▇▆▆▆▇▃
val_loss,▆▄▇▇▅█▅▆▅▇▅▃▁▄▃▄▃▄▂█

0,1
epoch,20.0
train_accuracy,0.80304
train_loss,0.55118
val_accuracy,0.8015
val_loss,0.56369


wandb: Agent Starting Run: cwmvdcni with config:
wandb: 	activation: relu
wandb: 	batch_size: 128
wandb: 	epochs: 5
wandb: 	hidden_size: 32
wandb: 	learning_rate: 0.01
wandb: 	loss: cross_entropy
wandb: 	num_layers: 4
wandb: 	optimizer: nag
wandb: 	weight_decay: 0.001
wandb: 	weight_init: random


0,1
epoch,▁▃▅▆█
train_accuracy,▁▅▆▇█
train_loss,█▄▃▁▁
val_accuracy,▁▄▆██
val_loss,█▄▃▁▁

0,1
epoch,5.0
train_accuracy,0.85533
train_loss,0.399
val_accuracy,0.851
val_loss,0.418


wandb: Agent Starting Run: 66gkx7fd with config:
wandb: 	activation: relu
wandb: 	batch_size: 128
wandb: 	epochs: 20
wandb: 	hidden_size: 32
wandb: 	learning_rate: 0.01
wandb: 	loss: cross_entropy
wandb: 	num_layers: 3
wandb: 	optimizer: rmsprop
wandb: 	weight_decay: 0.001
wandb: 	weight_init: random


0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train_accuracy,▁▃▅▅▇▆▆▆▅▇▅▇█▁▆█▇▇█▆
train_loss,█▆▄▄▂▃▃▃▃▃▄▃▁▅▃▁▂▂▂▂
val_accuracy,▂▄▅▅▇▆▆▅▄▇▄▇█▁▅█▇▇▇▇
val_loss,█▆▄▅▂▃▃▃▄▃▅▃▁▆▄▁▂▂▂▂

0,1
epoch,20.0
train_accuracy,0.83369
train_loss,0.44432
val_accuracy,0.8355
val_loss,0.45091


wandb: Agent Starting Run: 5qb9uai3 with config:
wandb: 	activation: relu
wandb: 	batch_size: 128
wandb: 	epochs: 10
wandb: 	hidden_size: 32
wandb: 	learning_rate: 0.001
wandb: 	loss: cross_entropy
wandb: 	num_layers: 4
wandb: 	optimizer: adam
wandb: 	weight_decay: 0.001
wandb: 	weight_init: random


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▅▅▆▆▇████
train_loss,█▅▄▃▃▂▂▁▁▁
val_accuracy,▁▅▆▆▆▇████
val_loss,█▄▃▃▂▂▁▁▁▁

0,1
epoch,10.0
train_accuracy,0.87513
train_loss,0.33911
val_accuracy,0.86483
val_loss,0.37264


wandb: Agent Starting Run: 8c8ztaes with config:
wandb: 	activation: relu
wandb: 	batch_size: 128
wandb: 	epochs: 10
wandb: 	hidden_size: 32
wandb: 	learning_rate: 0.01
wandb: 	loss: cross_entropy
wandb: 	num_layers: 4
wandb: 	optimizer: nag
wandb: 	weight_decay: 0.001
wandb: 	weight_init: random


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▅▅▆▆▇████
train_loss,█▄▄▃▃▂▂▁▁▁
val_accuracy,▁▅▆▇▇▇████
val_loss,█▄▃▃▂▂▁▁▁▁

0,1
epoch,10.0
train_accuracy,0.87531
train_loss,0.34194
val_accuracy,0.86633
val_loss,0.37329


wandb: Agent Starting Run: isisfoq8 with config:
wandb: 	activation: relu
wandb: 	batch_size: 64
wandb: 	epochs: 5
wandb: 	hidden_size: 32
wandb: 	learning_rate: 0.01
wandb: 	loss: cross_entropy
wandb: 	num_layers: 2
wandb: 	optimizer: rmsprop
wandb: 	weight_decay: 0.001
wandb: 	weight_init: random


0,1
epoch,▁▃▅▆█
train_accuracy,▆▄▁▁█
train_loss,▄▄█▅▁
val_accuracy,█▇▁▃█
val_loss,▃▂█▄▁

0,1
epoch,5.0
train_accuracy,0.79896
train_loss,0.53926
val_accuracy,0.79567
val_loss,0.54997


wandb: Agent Starting Run: kuehhk3w with config:
wandb: 	activation: relu
wandb: 	batch_size: 128
wandb: 	epochs: 5
wandb: 	hidden_size: 32
wandb: 	learning_rate: 0.01
wandb: 	loss: cross_entropy
wandb: 	num_layers: 4
wandb: 	optimizer: nag
wandb: 	weight_decay: 0.001
wandb: 	weight_init: random


0,1
epoch,▁▃▅▆█
train_accuracy,▁▆▇██
train_loss,█▃▂▁▁
val_accuracy,▁▆▇██
val_loss,█▃▂▁▁

0,1
epoch,5.0
train_accuracy,0.85957
train_loss,0.38805
val_accuracy,0.8555
val_loss,0.40146


wandb: Agent Starting Run: xsxux03h with config:
wandb: 	activation: tanh
wandb: 	batch_size: 128
wandb: 	epochs: 20
wandb: 	hidden_size: 32
wandb: 	learning_rate: 0.01
wandb: 	loss: cross_entropy
wandb: 	num_layers: 3
wandb: 	optimizer: rmsprop
wandb: 	weight_decay: 0.001
wandb: 	weight_init: xavier


0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train_accuracy,███▅▃▂▁▃▂▄▃▂▂▂▂▂▁▁▁▁
train_loss,▂▁▃▅▇██▆▆▆▇▇▇▇▇▇▇███
val_accuracy,███▅▃▂▁▄▂▄▃▂▂▂▂▂▂▂▂▁
val_loss,▂▁▂▅▇██▆▆▆▇▇▇▇▇▇████

0,1
epoch,20.0
train_accuracy,0.37093
train_loss,1.49657
val_accuracy,0.37733
val_loss,1.50466


wandb: Sweep Agent: Waiting for job.
wandb: Job received.
wandb: Agent Starting Run: h175hlf4 with config:
wandb: 	activation: relu
wandb: 	batch_size: 128
wandb: 	epochs: 10
wandb: 	hidden_size: 64
wandb: 	learning_rate: 0.0001
wandb: 	loss: cross_entropy
wandb: 	num_layers: 4
wandb: 	optimizer: rmsprop
wandb: 	weight_decay: 0.001
wandb: 	weight_init: xavier


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▄▆▇▇▇▇███
train_loss,█▅▃▃▂▂▂▁▁▁
val_accuracy,▁▅▆▇▇▇████
val_loss,█▅▃▂▂▂▁▁▁▁

0,1
epoch,10.0
train_accuracy,0.86089
train_loss,0.39677
val_accuracy,0.85883
val_loss,0.4057


wandb: Agent Starting Run: 01pv3od4 with config:
wandb: 	activation: relu
wandb: 	batch_size: 128
wandb: 	epochs: 20
wandb: 	hidden_size: 32
wandb: 	learning_rate: 0.01
wandb: 	loss: cross_entropy
wandb: 	num_layers: 3
wandb: 	optimizer: nag
wandb: 	weight_decay: 0.001
wandb: 	weight_init: random


0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train_accuracy,▁▅▅▆▆▅▇▆▇▇▇▇▇▇▇▇██▇█
train_loss,█▅▄▃▃▄▂▃▂▂▂▂▂▂▂▂▁▁▁▁
val_accuracy,▁▅▅▆▇▆▇▇▇███▇▇▇▇██▇█
val_loss,█▄▄▃▃▄▂▂▂▂▁▂▂▂▂▂▁▁▂▁

0,1
epoch,20.0
train_accuracy,0.88717
train_loss,0.30567
val_accuracy,0.87733
val_loss,0.33673


wandb: Agent Starting Run: 3tywmx1i with config:
wandb: 	activation: relu
wandb: 	batch_size: 128
wandb: 	epochs: 20
wandb: 	hidden_size: 32
wandb: 	learning_rate: 0.01
wandb: 	loss: cross_entropy
wandb: 	num_layers: 3
wandb: 	optimizer: momentum
wandb: 	weight_decay: 0.001
wandb: 	weight_init: random


0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train_accuracy,▁▄▅▆▆▆▇▇▇▇▇▇▇███████
train_loss,█▅▄▃▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁
val_accuracy,▁▄▆▆▇▇▇▇▇███████████
val_loss,█▅▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁

0,1
epoch,20.0
train_accuracy,0.88465
train_loss,0.31226
val_accuracy,0.86767
val_loss,0.35769


wandb: Agent Starting Run: p12gszu2 with config:
wandb: 	activation: relu
wandb: 	batch_size: 128
wandb: 	epochs: 10
wandb: 	hidden_size: 32
wandb: 	learning_rate: 0.001
wandb: 	loss: cross_entropy
wandb: 	num_layers: 4
wandb: 	optimizer: adam
wandb: 	weight_decay: 0.001
wandb: 	weight_init: xavier


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▄▄▅▅▆▇███
train_loss,█▅▅▄▃▃▂▂▁▁
val_accuracy,▁▄▃▅▅▅▇▇██
val_loss,█▅▄▃▃▂▂▁▁▁

0,1
epoch,10.0
train_accuracy,0.87607
train_loss,0.33779
val_accuracy,0.86767
val_loss,0.36807


wandb: Agent Starting Run: 9ft9yafw with config:
wandb: 	activation: relu
wandb: 	batch_size: 128
wandb: 	epochs: 20
wandb: 	hidden_size: 32
wandb: 	learning_rate: 0.001
wandb: 	loss: cross_entropy
wandb: 	num_layers: 4
wandb: 	optimizer: rmsprop
wandb: 	weight_decay: 0.001
wandb: 	weight_init: random


0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train_accuracy,▁▄▅▅▆▆▆▇▇▇▇█████████
train_loss,█▆▅▄▄▃▃▂▂▂▂▂▁▁▁▁▁▁▁▁
val_accuracy,▁▄▅▅▆▆▇▇▇▇██████████
val_loss,█▅▄▄▃▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,20.0
train_accuracy,0.88781
train_loss,0.30921
val_accuracy,0.87533
val_loss,0.35588


wandb: Agent Starting Run: 1s4s1acg with config:
wandb: 	activation: relu
wandb: 	batch_size: 128
wandb: 	epochs: 10
wandb: 	hidden_size: 32
wandb: 	learning_rate: 0.0001
wandb: 	loss: cross_entropy
wandb: 	num_layers: 4
wandb: 	optimizer: rmsprop
wandb: 	weight_decay: 0.001
wandb: 	weight_init: xavier


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▅▆▇▇▇████
train_loss,█▄▃▂▂▂▁▁▁▁
val_accuracy,▁▅▆▇▇▇████
val_loss,█▄▃▂▂▂▁▁▁▁

0,1
epoch,10.0
train_accuracy,0.84339
train_loss,0.44326
val_accuracy,0.845
val_loss,0.44575


wandb: Agent Starting Run: j7901eqk with config:
wandb: 	activation: relu
wandb: 	batch_size: 128
wandb: 	epochs: 20
wandb: 	hidden_size: 64
wandb: 	learning_rate: 0.01
wandb: 	loss: cross_entropy
wandb: 	num_layers: 3
wandb: 	optimizer: adam
wandb: 	weight_decay: 0.001
wandb: 	weight_init: xavier


0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train_accuracy,▄▁▄▇▆▇▇▅█▇█▅▇▆▇▇▇▆▄▆
train_loss,▇█▆▂▄▂▂▄▁▂▁▅▂▃▂▂▃▃▅▂
val_accuracy,▃▁▄▇▆▇▇▅█▇█▄▇▆▇▇▆▆▄▆
val_loss,▇█▆▂▄▂▂▄▁▂▁▆▂▄▂▂▃▃▆▂

0,1
epoch,20.0
train_accuracy,0.84231
train_loss,0.41734
val_accuracy,0.83917
val_loss,0.43007


wandb: Sweep Agent: Waiting for job.
wandb: Job received.
wandb: Agent Starting Run: grt30gs1 with config:
wandb: 	activation: relu
wandb: 	batch_size: 128
wandb: 	epochs: 10
wandb: 	hidden_size: 64
wandb: 	learning_rate: 0.001
wandb: 	loss: cross_entropy
wandb: 	num_layers: 4
wandb: 	optimizer: nag
wandb: 	weight_decay: 0.001
wandb: 	weight_init: xavier


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▅▆▇▇▇▇███
train_loss,█▅▃▃▂▂▂▁▁▁
val_accuracy,▁▅▆▇▇▇████
val_loss,█▄▃▂▂▂▂▁▁▁

0,1
epoch,10.0
train_accuracy,0.85881
train_loss,0.39383
val_accuracy,0.8585
val_loss,0.39608


wandb: Agent Starting Run: afauxmch with config:
wandb: 	activation: relu
wandb: 	batch_size: 128
wandb: 	epochs: 10
wandb: 	hidden_size: 64
wandb: 	learning_rate: 0.001
wandb: 	loss: cross_entropy
wandb: 	num_layers: 4
wandb: 	optimizer: rmsprop
wandb: 	weight_decay: 0.001
wandb: 	weight_init: random


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▄▅▆▆▇▇███
train_loss,█▅▃▂▂▂▂▁▁▁
val_accuracy,▁▅▆▇▇█▇█▇█
val_loss,█▄▃▂▂▁▂▁▁▁

0,1
epoch,10.0
train_accuracy,0.88233
train_loss,0.32449
val_accuracy,0.86617
val_loss,0.37156


wandb: Agent Starting Run: qky0cmbl with config:
wandb: 	activation: relu
wandb: 	batch_size: 128
wandb: 	epochs: 20
wandb: 	hidden_size: 64
wandb: 	learning_rate: 0.0001
wandb: 	loss: cross_entropy
wandb: 	num_layers: 3
wandb: 	optimizer: rmsprop
wandb: 	weight_decay: 0.001
wandb: 	weight_init: xavier


0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train_accuracy,▁▄▅▆▆▆▇▇▇▇▇▇▇███████
train_loss,█▅▄▃▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁
val_accuracy,▁▄▆▆▇▇▇▇▇███████████
val_loss,█▅▄▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁

0,1
epoch,20.0
train_accuracy,0.876
train_loss,0.35366
val_accuracy,0.8685
val_loss,0.37548


wandb: Agent Starting Run: wfdo1uio with config:
wandb: 	activation: relu
wandb: 	batch_size: 128
wandb: 	epochs: 20
wandb: 	hidden_size: 32
wandb: 	learning_rate: 0.01
wandb: 	loss: cross_entropy
wandb: 	num_layers: 4
wandb: 	optimizer: adam
wandb: 	weight_decay: 0.001
wandb: 	weight_init: random


0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train_accuracy,▁▂▄▄▇▇▇██▇▆▆▇▇▇▆▇▇▇█
train_loss,▇█▅▅▂▃▂▁▁▂▂▁▂▂▁▂▁▂▂▁
val_accuracy,▁▂▄▄▇▇▇█▇▆▅▆▆▆▆▆▆▇▇█
val_loss,▇█▅▅▂▃▂▁▁▂▃▁▂▁▂▃▁▁▂▁

0,1
epoch,20.0
train_accuracy,0.84365
train_loss,0.43542
val_accuracy,0.84133
val_loss,0.44913


wandb: Agent Starting Run: b971fs12 with config:
wandb: 	activation: relu
wandb: 	batch_size: 128
wandb: 	epochs: 20
wandb: 	hidden_size: 32
wandb: 	learning_rate: 0.001
wandb: 	loss: cross_entropy
wandb: 	num_layers: 4
wandb: 	optimizer: momentum
wandb: 	weight_decay: 0.0005
wandb: 	weight_init: xavier


0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train_accuracy,▁▅▆▇▇▇▇▇▇▇██████████
train_loss,█▄▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁
val_accuracy,▁▅▆▇▇▇▇▇████████████
val_loss,█▃▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,20.0
train_accuracy,0.86757
train_loss,0.37801
val_accuracy,0.86083
val_loss,0.40246


wandb: Agent Starting Run: b10p7f7w with config:
wandb: 	activation: relu
wandb: 	batch_size: 128
wandb: 	epochs: 10
wandb: 	hidden_size: 32
wandb: 	learning_rate: 0.01
wandb: 	loss: cross_entropy
wandb: 	num_layers: 4
wandb: 	optimizer: adam
wandb: 	weight_decay: 0.001
wandb: 	weight_init: xavier


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▆▇▆▄▅▇▇█▇
train_loss,█▄▃▃▅▃▂▂▁▃
val_accuracy,▁█▇▆▃▄▆▆█▆
val_loss,█▃▃▃▇▄▃▂▁▄

0,1
epoch,10.0
train_accuracy,0.82435
train_loss,0.48712
val_accuracy,0.8195
val_loss,0.49979


wandb: Agent Starting Run: ku0kj0s2 with config:
wandb: 	activation: relu
wandb: 	batch_size: 128
wandb: 	epochs: 10
wandb: 	hidden_size: 32
wandb: 	learning_rate: 0.01
wandb: 	loss: cross_entropy
wandb: 	num_layers: 4
wandb: 	optimizer: momentum
wandb: 	weight_decay: 0.001
wandb: 	weight_init: xavier


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▃▅▆▇▇████
train_loss,█▆▄▃▂▂▂▁▁▁
val_accuracy,▁▃▅▅▇▇█▇██
val_loss,█▆▄▃▂▂▁▁▁▁

0,1
epoch,10.0
train_accuracy,0.88002
train_loss,0.32776
val_accuracy,0.87317
val_loss,0.35824


wandb: Agent Starting Run: 89z4jmrw with config:
wandb: 	activation: relu
wandb: 	batch_size: 128
wandb: 	epochs: 20
wandb: 	hidden_size: 64
wandb: 	learning_rate: 0.001
wandb: 	loss: cross_entropy
wandb: 	num_layers: 4
wandb: 	optimizer: rmsprop
wandb: 	weight_decay: 0.001
wandb: 	weight_init: xavier


0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train_accuracy,▁▄▅▅▆▆▆▆▇▇▆▇▇▇▇█▇▇██
train_loss,█▅▄▄▃▃▃▂▂▂▂▂▂▁▂▁▂▁▁▁
val_accuracy,▁▄▆▆▆▇▇▇▇▇▆▇▇█▇█▇███
val_loss,█▄▃▃▂▂▂▂▁▁▂▂▂▁▂▁▂▂▁▁

0,1
epoch,20.0
train_accuracy,0.90094
train_loss,0.26607
val_accuracy,0.87983
val_loss,0.33346


wandb: Agent Starting Run: 1stn5w91 with config:
wandb: 	activation: relu
wandb: 	batch_size: 32
wandb: 	epochs: 5
wandb: 	hidden_size: 32
wandb: 	learning_rate: 0.01
wandb: 	loss: cross_entropy
wandb: 	num_layers: 4
wandb: 	optimizer: rmsprop
wandb: 	weight_decay: 0.0005
wandb: 	weight_init: random


0,1
epoch,▁▃▅▆█
train_accuracy,▁▃█▇▃
train_loss,█▆▁▅▅
val_accuracy,▁▂█▄▂
val_loss,█▆▁▆▆

0,1
epoch,5.0
train_accuracy,0.79013
train_loss,0.57766
val_accuracy,0.79117
val_loss,0.58399


wandb: Sweep Agent: Waiting for job.
wandb: Job received.
wandb: Agent Starting Run: qjxea2ga with config:
wandb: 	activation: relu
wandb: 	batch_size: 128
wandb: 	epochs: 5
wandb: 	hidden_size: 32
wandb: 	learning_rate: 0.01
wandb: 	loss: cross_entropy
wandb: 	num_layers: 4
wandb: 	optimizer: rmsprop
wandb: 	weight_decay: 0.0001
wandb: 	weight_init: xavier


0,1
epoch,▁▃▅▆█
train_accuracy,▁▅▇██
train_loss,█▄▂▁▁
val_accuracy,▁▅███
val_loss,█▄▁▁▁

0,1
epoch,5.0
train_accuracy,0.85796
train_loss,0.39621
val_accuracy,0.85383
val_loss,0.4263


wandb: Agent Starting Run: 15xdm59y with config:
wandb: 	activation: relu
wandb: 	batch_size: 128
wandb: 	epochs: 20
wandb: 	hidden_size: 32
wandb: 	learning_rate: 0.001
wandb: 	loss: cross_entropy
wandb: 	num_layers: 3
wandb: 	optimizer: rmsprop
wandb: 	weight_decay: 0.001
wandb: 	weight_init: xavier


0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train_accuracy,▁▃▄▅▆▆▆▆▇▇▇▇████████
train_loss,█▆▅▄▃▃▃▂▂▂▂▂▁▁▁▁▁▁▁▁
val_accuracy,▁▄▅▆▇▇▇▇▇▇██████████
val_loss,█▅▄▃▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁

0,1
epoch,20.0
train_accuracy,0.89115
train_loss,0.29932
val_accuracy,0.873
val_loss,0.34884


wandb: Agent Starting Run: jm11tiwh with config:
wandb: 	activation: relu
wandb: 	batch_size: 128
wandb: 	epochs: 5
wandb: 	hidden_size: 32
wandb: 	learning_rate: 0.01
wandb: 	loss: cross_entropy
wandb: 	num_layers: 4
wandb: 	optimizer: rmsprop
wandb: 	weight_decay: 0.001
wandb: 	weight_init: random


0,1
epoch,▁▃▅▆█
train_accuracy,▂▁███
train_loss,▇█▁▃▁
val_accuracy,▃▁███
val_loss,▆█▁▃▁

0,1
epoch,5.0
train_accuracy,0.81996
train_loss,0.49886
val_accuracy,0.81617
val_loss,0.51008


wandb: Agent Starting Run: 18bprwkm with config:
wandb: 	activation: relu
wandb: 	batch_size: 128
wandb: 	epochs: 10
wandb: 	hidden_size: 32
wandb: 	learning_rate: 0.01
wandb: 	loss: cross_entropy
wandb: 	num_layers: 4
wandb: 	optimizer: nag
wandb: 	weight_decay: 0.0001
wandb: 	weight_init: xavier


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▄▆▆▇▇████
train_loss,█▅▃▃▂▂▁▁▁▁
val_accuracy,▁▄▆▇██████
val_loss,█▅▃▂▁▁▁▁▁▁

0,1
epoch,10.0
train_accuracy,0.8718
train_loss,0.34568
val_accuracy,0.86233
val_loss,0.38165


wandb: Sweep Agent: Waiting for job.
wandb: Job received.
wandb: Agent Starting Run: it30wj4f with config:
wandb: 	activation: relu
wandb: 	batch_size: 128
wandb: 	epochs: 20
wandb: 	hidden_size: 32
wandb: 	learning_rate: 0.001
wandb: 	loss: cross_entropy
wandb: 	num_layers: 3
wandb: 	optimizer: rmsprop
wandb: 	weight_decay: 0.0005
wandb: 	weight_init: xavier


0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train_accuracy,▁▃▅▅▆▆▆▇▇▇▇▇▇▇▇▇████
train_loss,█▆▅▄▃▃▃▃▂▂▂▂▂▂▂▂▁▁▁▁
val_accuracy,▁▄▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇███
val_loss,█▅▄▃▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁

0,1
epoch,20.0
train_accuracy,0.89252
train_loss,0.29003
val_accuracy,0.87517
val_loss,0.35593


wandb: Agent Starting Run: g3ldiip8 with config:
wandb: 	activation: relu
wandb: 	batch_size: 128
wandb: 	epochs: 20
wandb: 	hidden_size: 32
wandb: 	learning_rate: 0.001
wandb: 	loss: cross_entropy
wandb: 	num_layers: 3
wandb: 	optimizer: adam
wandb: 	weight_decay: 0.0001
wandb: 	weight_init: xavier


0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train_accuracy,▁▃▃▄▅▅▆▆▆▇▇▇▇▇▇█████
train_loss,█▆▆▅▄▄▃▃▃▃▂▂▂▂▂▂▁▁▁▁
val_accuracy,▁▃▃▄▅▆▆▇▇▇▇████████▇
val_loss,█▆▅▄▃▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,20.0
train_accuracy,0.90596
train_loss,0.25445
val_accuracy,0.88017
val_loss,0.33169


wandb: Agent Starting Run: 1ulmhhcu with config:
wandb: 	activation: relu
wandb: 	batch_size: 128
wandb: 	epochs: 20
wandb: 	hidden_size: 32
wandb: 	learning_rate: 0.01
wandb: 	loss: cross_entropy
wandb: 	num_layers: 3
wandb: 	optimizer: rmsprop
wandb: 	weight_decay: 0.0005
wandb: 	weight_init: random


0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train_accuracy,▅▆▆▁▄▄▆▆▇▁▅▄▇▇██▂▅▇▇
train_loss,▃▃▃▆▆▄▃▃▂█▃█▂▂▁▁▇▅▂▂
val_accuracy,▆▇▇▂▄▅▆▆▇▁▆▄▇▆██▃▅▇▇
val_loss,▃▂▃▆▅▄▃▃▂█▃▇▂▂▁▁▇▅▂▂

0,1
epoch,20.0
train_accuracy,0.84393
train_loss,0.42527
val_accuracy,0.8395
val_loss,0.44364


wandb: Agent Starting Run: 4xtzmt91 with config:
wandb: 	activation: relu
wandb: 	batch_size: 128
wandb: 	epochs: 20
wandb: 	hidden_size: 32
wandb: 	learning_rate: 0.001
wandb: 	loss: cross_entropy
wandb: 	num_layers: 4
wandb: 	optimizer: adam
wandb: 	weight_decay: 0.001
wandb: 	weight_init: xavier


0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train_accuracy,▁▂▃▄▅▅▅▆▆▆▇▆▇▇█▇████
train_loss,█▆▅▅▄▄▄▃▃▃▂▂▂▂▂▂▂▁▁▁
val_accuracy,▁▃▄▄▅▅▅▅▆▆▆▆▇▇▇▇▇▇█▇
val_loss,█▆▅▄▃▃▃▃▂▂▂▂▂▂▁▂▂▁▁▁

0,1
epoch,20.0
train_accuracy,0.88778
train_loss,0.30628
val_accuracy,0.86933
val_loss,0.36075


wandb: Agent Starting Run: uvxfh4rj with config:
wandb: 	activation: relu
wandb: 	batch_size: 128
wandb: 	epochs: 10
wandb: 	hidden_size: 32
wandb: 	learning_rate: 0.01
wandb: 	loss: cross_entropy
wandb: 	num_layers: 2
wandb: 	optimizer: rmsprop
wandb: 	weight_decay: 0.0001
wandb: 	weight_init: xavier


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▄▆▆▇██▇▄█
train_loss,█▅▃▃▂▁▁▂▄▁
val_accuracy,▁▅▇▆▇██▇▄▇
val_loss,█▅▂▃▂▁▁▂▆▂

0,1
epoch,10.0
train_accuracy,0.86835
train_loss,0.36451
val_accuracy,0.85717
val_loss,0.40953


wandb: Agent Starting Run: a5k5tmg3 with config:
wandb: 	activation: relu
wandb: 	batch_size: 128
wandb: 	epochs: 20
wandb: 	hidden_size: 32
wandb: 	learning_rate: 0.001
wandb: 	loss: cross_entropy
wandb: 	num_layers: 3
wandb: 	optimizer: rmsprop
wandb: 	weight_decay: 0.001
wandb: 	weight_init: xavier


0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train_accuracy,▁▃▄▅▅▆▆▆▆▇▇▇████████
train_loss,█▆▅▄▃▃▃▃▂▂▂▂▁▁▁▁▁▁▁▁
val_accuracy,▁▄▅▆▆▆▆▇▇▇▇▇▇███████
val_loss,█▅▄▃▃▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁

0,1
epoch,20.0
train_accuracy,0.88809
train_loss,0.3045
val_accuracy,0.87083
val_loss,0.35109


wandb: Agent Starting Run: vrrr1egm with config:
wandb: 	activation: relu
wandb: 	batch_size: 128
wandb: 	epochs: 10
wandb: 	hidden_size: 32
wandb: 	learning_rate: 0.01
wandb: 	loss: cross_entropy
wandb: 	num_layers: 3
wandb: 	optimizer: adam
wandb: 	weight_decay: 0.0001
wandb: 	weight_init: xavier


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▅▆▅▆▇▇▇▇█
train_loss,█▄▃▃▃▁▂▂▂▁
val_accuracy,▁▄▆▅▆▇█▆▇█
val_loss,█▄▂▄▃▁▁▂▂▁

0,1
epoch,10.0
train_accuracy,0.8698
train_loss,0.35517
val_accuracy,0.86167
val_loss,0.39817


wandb: Agent Starting Run: aewu4h1a with config:
wandb: 	activation: relu
wandb: 	batch_size: 128
wandb: 	epochs: 10
wandb: 	hidden_size: 32
wandb: 	learning_rate: 0.0001
wandb: 	loss: cross_entropy
wandb: 	num_layers: 4
wandb: 	optimizer: adam
wandb: 	weight_decay: 0.001
wandb: 	weight_init: xavier


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▄▅▆▆▇▇▇██
train_loss,█▅▄▃▂▂▂▁▁▁
val_accuracy,▁▄▅▆▇▇▇▇██
val_loss,█▅▄▃▂▂▂▁▁▁

0,1
epoch,10.0
train_accuracy,0.85165
train_loss,0.41681
val_accuracy,0.85517
val_loss,0.4212


wandb: Agent Starting Run: 28kgaj68 with config:
wandb: 	activation: relu
wandb: 	batch_size: 128
wandb: 	epochs: 20
wandb: 	hidden_size: 64
wandb: 	learning_rate: 0.0001
wandb: 	loss: cross_entropy
wandb: 	num_layers: 3
wandb: 	optimizer: rmsprop
wandb: 	weight_decay: 0.0005
wandb: 	weight_init: xavier


0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train_accuracy,▁▄▅▆▆▆▇▇▇▇▇▇▇███████
train_loss,█▅▄▃▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁
val_accuracy,▁▄▅▆▆▆▇▇▇▇▇█████████
val_loss,█▅▄▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁

0,1
epoch,20.0
train_accuracy,0.87998
train_loss,0.34291
val_accuracy,0.87117
val_loss,0.36477


wandb: Agent Starting Run: bf5456ri with config:
wandb: 	activation: relu
wandb: 	batch_size: 128
wandb: 	epochs: 10
wandb: 	hidden_size: 32
wandb: 	learning_rate: 0.001
wandb: 	loss: cross_entropy
wandb: 	num_layers: 3
wandb: 	optimizer: adam
wandb: 	weight_decay: 0.001
wandb: 	weight_init: xavier


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▄▅▆▆▇▇███
train_loss,█▅▄▃▃▃▂▁▁▁
val_accuracy,▁▄▅▆▆▆▇███
val_loss,█▅▄▃▃▂▂▁▁▁

0,1
epoch,10.0
train_accuracy,0.87813
train_loss,0.33808
val_accuracy,0.871
val_loss,0.36188


wandb: Sweep Agent: Waiting for job.
wandb: Job received.
wandb: Agent Starting Run: 004wfujl with config:
wandb: 	activation: relu
wandb: 	batch_size: 128
wandb: 	epochs: 20
wandb: 	hidden_size: 32
wandb: 	learning_rate: 0.01
wandb: 	loss: cross_entropy
wandb: 	num_layers: 3
wandb: 	optimizer: nag
wandb: 	weight_decay: 0.001
wandb: 	weight_init: xavier


0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train_accuracy,▁▄▄▆▆▆▆▆▇▇▇█▇▇██████
train_loss,█▅▄▃▃▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁
val_accuracy,▁▄▅▆▇▇▇▇▇▇▇█▇▇█████▇
val_loss,█▅▄▃▃▂▂▂▂▂▂▂▂▂▁▁▁▁▁▂

0,1
epoch,20.0
train_accuracy,0.88481
train_loss,0.31235
val_accuracy,0.86933
val_loss,0.35348


wandb: Sweep Agent: Waiting for job.
wandb: Job received.
wandb: Agent Starting Run: 6yn7u4fh with config:
wandb: 	activation: relu
wandb: 	batch_size: 128
wandb: 	epochs: 10
wandb: 	hidden_size: 32
wandb: 	learning_rate: 0.001
wandb: 	loss: cross_entropy
wandb: 	num_layers: 3
wandb: 	optimizer: nag
wandb: 	weight_decay: 0.001
wandb: 	weight_init: xavier


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▄▅▆▆▇▇▇██
train_loss,█▅▄▃▃▂▂▂▁▁
val_accuracy,▁▄▅▆▆▇▇▇██
val_loss,█▅▄▃▂▂▂▁▁▁

0,1
epoch,10.0
train_accuracy,0.85389
train_loss,0.41771
val_accuracy,0.8535
val_loss,0.4279
