


# CS6910 Assignment-1

by
- Akansh Maurya (CS22Z003)
- Tejoram Vivekanandan (EE22Z001)

In [48]:
from keras.datasets import fashion_mnist
import numpy as np
from  matplotlib import pyplot as plt
import time
import math
from sklearn.model_selection import train_test_split
from tqdm.auto import tqdm

### Question 1: Loading and ploting the dataset

In [49]:
dataset= fashion_mnist.load_data()
(X_train_and_validation, y_train_and_validation), (X_test, y_test) = dataset
X_train, X_validation, y_train, y_validation = train_test_split(X_train_and_validation, y_train_and_validation, test_size=0.1, random_state=42)
X_train = (X_train/255.0).astype(np.float32)
X_validation = (X_validation/255.0).astype(np.float32)
X_test = (X_test/255.0).astype(np.float32)

print("Train Dataset Shape: ", X_train.shape)
print("Train Target Vector Shape: ", y_train.shape) 
print("Test Dataset Shape:", X_test.shape)
print("Test Target Vector Shape", y_test.shape)
print("Validation Dataset Shape:", X_validation.shape)
print("Validation Target Vector Shape", y_validation.shape)




Train Dataset Shape:  (54000, 28, 28)
Train Target Vector Shape:  (54000,)
Test Dataset Shape: (10000, 28, 28)
Test Target Vector Shape (10000,)
Validation Dataset Shape: (6000, 28, 28)
Validation Target Vector Shape (6000,)


In [50]:
X_train = np.array(X_train.reshape(X_train.shape[0], 784,1))         
X_test = np.array(X_test.reshape(X_test.shape[0], 784,1))
X_validation = np.array(X_validation.reshape(X_validation.shape[0], 784,1))

**Implement a feedforward and backpropagation**

In [51]:
#Activation function
def activation(activation_function):
  if activation_function == 'sigmoid':
    return sigmoid
  if activation_function == 'tanh':
    return tanh
  if activation_function == 'ReLU':
    return relu

def sigmoid(x, derivative = False):
  if derivative:
    return sigmoid(x)*(1-sigmoid(x))
  return 1/(1 + np.exp(-x))  

def tanh(x, derivative = False):
  if derivative:
    return 1 - tanh(x)**2
  return (np.exp(x)-np.exp(-x))/(np.exp(x)+np.exp(-x))

def relu(x, derivative = False):
  if derivative:
    return (x>0)*1 
  return x*(x>0)

def softmax(x,derivative = False):
  if derivative:
    return softmax(x)*(1- softmax(x))
  return np.exp(x)/np.sum(np.exp(x), axis = 0)

def one_hot(y, num_output_nodes):
  v = np.zeros((num_output_nodes, len(y)))
  for i,j in enumerate(y):
    v[j,i] = 1
  return v

### Weight Initialization
def param_inint(num_inputs_nodes, hidden_layers, num_output_nodes, init_type):
  W = []
  B = []
  if init_type == "random":
    W.append(np.random.randn(hidden_layers[0],num_inputs_nodes)*0.1)
    B.append(np.random.randn(hidden_layers[0], 1)*0.1)
    for i in range(len(hidden_layers)-1):
      W.append(np.random.randn(hidden_layers[i+1],hidden_layers[i])*0.1)
      B.append(np.random.randn(hidden_layers[i+1], 1)*0.1)
    W.append(np.random.randn(num_output_nodes, hidden_layers[-1])*0.1)
    B.append(np.random.randn(num_output_nodes, 1)*0.1)
    return W, B

  if init_type == "xavier":
    W.append(np.random.randn(hidden_layers[0],num_inputs_nodes)*np.sqrt(2/(hidden_layers[0] + num_inputs_nodes)))
    B.append(np.random.randn(hidden_layers[0], 1)*0.1)
    for i in range(len(hidden_layers)-1):
      W.append(np.random.randn(hidden_layers[i+1],hidden_layers[i])*np.sqrt(2/(hidden_layers[i+1] + hidden_layers[i])))
      B.append(np.random.randn(hidden_layers[i+1], 1)*0.1)
    W.append(np.random.randn(num_output_nodes, hidden_layers[-1])*np.sqrt(2/(num_output_nodes + hidden_layers[-1])))
    B.append(np.random.randn(num_output_nodes, 1)*0.1)
    return W, B


def feed_forward(x, W, B, activation_type):
  h = []
  a = []
  sigma = activation(activation_type)  #activation
  h.append(x)   #h0 = x
  a.append(np.dot(W[0], h[0]) + B[0])
  for i in range(len(W)-1):
    h.append(sigma(a[-1]))
    a.append(np.dot(W[i+1], h[-1]) + B[i+1])
  y_hat = softmax(a[-1])

  return y_hat, h, a



def loss_compute(y,y_hat, loss_type, W, reg_lamda):
  if loss_type == "squared_error":
    error = np.sum((one_hot(y, 10)-y_hat)**2)/(2*one_hot(y, 10).shape[1])
  if loss_type == "cross_entropy":
    error = -1*np.sum(np.multiply(one_hot(y, 10),np.log(y_hat)))/one_hot(y, 10).shape[1]         # hardcoded classes = 10

  if W:
    r = 0
    for i in range(len(W)):
      r += np.sum((np.array(W, dtype = object) **2)[i])
    error = error + reg_lamda * r

  return error


def accuracy(y_hat, y_true):
  return np.mean(np.argmax(y_hat, axis = 0) ==y_true )*100


##### Back Propogation

In [52]:
def back_prop(x, y, y_hat, a, h , W, B, batch_size, loss_type, activation_type):
  gh = [0]*len(h)
  ga = [0]*len(a)
  gw = [0]*len(W)
  gb = [0]*len(B)

  sigma = activation(activation_type) 

  if loss_type == "cross_entropy":
    gh[-1] = -1*(y/y_hat)
    ga[-1] = -1*(y-y_hat)
  if loss_type == "squared_error":   ##### edit this
    gh[-1] = y_hat - y
    ga[-1] = (y_hat - y)*softmax(a[-1])*(1-softmax(a[-1]))

  for i in range(len(W)-1, -1, -1):
    gw[i] = np.dot(ga[i], h[i].T)
    gb[i] = np.dot(ga[i], np.ones((batch_size,1)))
    if i > 0:
      gh[i-1] = np.dot(W[i].T, ga[i])
      ga[i-1]  = np.multiply(gh[i-1],sigma(a[i-1], derivative = True))

  return gw, gb, gh, ga

**Optimizing functions**

In [53]:
class SGD:
  ''' Stochastic Gradient Descent '''
  def __init__(self, lr = 0.001, reg = 0):
    self.lr = lr
    self.reg = reg
  
  def update(self, w,b, gW, gB):
    W = np.array(w, dtype = object)
    B = np.array(b, dtype = object)

    W = (1-self.lr*self.reg)*W - self.lr * np.array(gW, dtype = object)
    B = (1-self.lr*self.reg)*B - self.lr * np.array(gB, dtype = object)

    return W.tolist(),B.tolist()


class Momentum:

  def __init__(self, lr = 0.001, gamma = 0.9, reg = 0):
    self.lr = lr
    self.gamma = gamma
    self.Wmoments = None
    self.Bmoments = None
    self.reg = reg


  def update(self, w,b, gW, gB):
    params = {'w':w, 'b':b}

    if self.Wmoments == None:
      self.Wmoments = [0] * len(params['w'])
      for i in range(len(params['w'])):
        self.Wmoments[i] = np.zeros_like(params['w'][i])

    if self.Bmoments == None:
      self.Bmoments = [0] * len(params['b'])
      for i in range(len(params['b'])):
        self.Bmoments[i] = np.zeros_like(params['b'][i])
    
    self.Wmoments = self.gamma * np.array(self.Wmoments, dtype = object) + self.lr * np.array(gW, dtype = object)
    W = (1-self.lr*self.reg)*np.array(params['w'], dtype = object) - self.Wmoments
    self.Wmoments = self.Wmoments.tolist()

    self.Bmoments = self.gamma * np.array(self.Bmoments, dtype = object) + self.lr * np.array(gB, dtype = object)
    B = (1-self.lr*self.reg)*np.array(params['b'], dtype = object) - self.Bmoments
    self.Bmoments = self.Bmoments.tolist()
    
    return W.tolist(), B.tolist()


class RMSprop:
  def __init__(self, lr=0.01, beta = 0.99):
    
    self.lr = lr
    self.vW = None
    self.vB = None
    self.beta = beta

  def update(self, w,b, gW, gB):
    params = {'w':w, 'b':b}
    if self.vW == None:
      self.vW = [0] * len(params['w'])
      for i in range(len(params['w'])):
        self.vW[i] = np.zeros_like(params['w'][i])

    if self.vB == None:
      self.vB = [0] * len(params['b'])
      for i in range(len(params['b'])):
        self.vB[i] = np.zeros_like(params['b'][i])

    self.vW = self.beta*np.array(self.vW, dtype = object) + (1-self.beta)*(np.array(gW, dtype = object) **2) 
    W = (1-self.lr*self.reg)*np.array(params['w'], dtype = object) - (self.lr/((self.vW + 1e-7)**0.5)) * np.array(gW, dtype = object)
    self.vW = self.vW.tolist()

    self.vB = self.beta*np.array(self.vB, dtype = object) + (1-self.beta)*(np.array(gB, dtype = object) **2 )
    B = (1-self.lr*self.reg)*np.array(params['b'], dtype = object) - (self.lr/((self.vB + 1e-7)**0.5)) * np.array(gB, dtype = object)
    self.vB = self.vB.tolist()

    return W.tolist(), B.tolist()

class Nesterov:   
  def __init__(self, lr=0.01, gamma=0.9):
    self.lr = lr
    self.reg = None
    self.gamma = gamma                                                             
    self.Wmoments = None
    self.Bmoments = None
    self.activation_type = None
    self.loss_type = None
        
  def update(self, w,b, gW, gB):
    params = {'w':w, 'b':b}
    if self.Wmoments == None:
      self.Wmoments = [0] * len(params['w'])
      for i in range(len(params['w'])):
        self.Wmoments[i] = np.zeros_like(params['w'][i])

    if self.Bmoments == None:
      self.Bmoments = [0] * len(params['b'])
      for i in range(len(params['b'])):
        self.Bmoments[i] = np.zeros_like(params['b'][i])


    W_look_ahead = np.array(params['w'], dtype = object) - self.gamma*np.array(self.Wmoments, dtype = object)
    B_look_ahead = np.array(params['b'], dtype = object) - self.gamma*np.array(self.Bmoments, dtype = object)
    ##
    out, temp_h, temp_a = feed_forward(x,W_look_ahead.tolist(),B_look_ahead.tolist(), self.activation_type)
    gW_look_ahead, gB_look_ahead, _, _ = back_prop(x, y,out,temp_a,temp_h, W_look_ahead.tolist(),B_look_ahead.tolist(), x.shape[1], self.loss_type, self.activation_type)

    ###
    self.Wmoments = self.gamma*np.array(self.Wmoments, dtype = object) + self.lr * np.array(gW_look_ahead, dtype = object)
    self.Bmoments = self.gamma*np.array(self.Bmoments, dtype = object) + self.lr * np.array(gB_look_ahead, dtype = object)

    W = (1-self.lr*self.reg)*np.array(params['w'], dtype = object) - self.Wmoments
    self.Wmoments = self.Wmoments.tolist()

    B = (1-self.lr*self.reg)*np.array(params['b'], dtype = object) - self.Bmoments
    self.Bmoments = self.Bmoments.tolist()

    return W.tolist(), B.tolist()

class Adam:
  def __init__(self, lr=0.001, beta1=0.9, beta2=0.999, reg = 0):
    self.lr = lr
    self.beta1 = beta1
    self.beta2 = beta2
    self.t = 0
    self.mW = None
    self.vW = None
    self.mB = None
    self.vB = None
    self.reg = None
        
  def update(self, w,b, gW, gB):
    params = {'w':w, 'b':b}

    if self.mW is None:
      self.mW, self.vW = [0] * len(params['w']), [0] * len(params['w'])
      for i in range(len(params['w'])):
        self.mW[i] = np.zeros_like(params['w'][i])
        self.vW[i] = np.zeros_like(params['w'][i])

    if self.mB is None:
      self.mB, self.vB = [0] * len(params['b']), [0] * len(params['b'])
      for i in range(len(params['b'])):
        self.mB[i] = np.zeros_like(params['b'][i])
        self.vB[i] = np.zeros_like(params['b'][i])
    

    self.t += 1
    self.mW = (self.beta1 * np.array(self.mW, dtype = object)) + (1-self.beta1)*(np.array(gW, dtype = object))
    self.vW = (self.beta2 * np.array(self.vW, dtype = object)) + (1-self.beta2)*((np.array(gW, dtype = object)**2))

    self.mB = (self.beta1 * np.array(self.mB, dtype = object)) + (1-self.beta1)*(np.array(gB, dtype = object))
    self.vB = (self.beta2 * np.array(self.vB, dtype = object)) + (1-self.beta2)*((np.array(gB, dtype = object)**2))

    # Bias Correction
    self.mW = (self.mW)*(1.0/(1-(self.beta1**self.t)))
    self.vW = (self.vW)*(1.0/(1-(self.beta2**self.t)))
    self.mB = (self.mB)*(1.0/(1-(self.beta1**self.t)))
    self.vB = (self.vB)*(1.0/(1-(self.beta2**self.t)))

    W = (1-self.lr*self.reg)*np.array(params['w'], dtype = object) - (self.lr/((self.vW + 1e-7)**0.5)) * self.mW
    self.vW = self.vW.tolist()
    self.mW = self.mW.tolist()

    B = (1-self.lr*self.reg)*np.array(params['b'], dtype = object) - (self.lr/((self.vB + 1e-7)**0.5)) * self.mB
    self.vB = self.vB.tolist()
    self.mB = self.mB.tolist()

    return W.tolist(), B.tolist()    

##### Training Function

In [54]:
def train(X_train, y_train,x_val, y_val, num_inputs_nodes, hidden_layers, num_output_nodes, init_type, epochs, batch_size, loss_type,activation_type, optimizer_name, learning_rate, reg_lamda):
  if optimizer_name=='sgd':
    optimizer = SGD()
  elif optimizer_name=='momentum':
    optimizer = Momentum()
  elif optimizer_name=='rmsprop':
    optimizer = RMSprop()
  elif optimizer_name=='nesterov':
    optimizer = Nesterov()
  elif optimizer_name=='adam':
    optimizer = Adam()    
  

  try:   
    optimizer.activation_type = activation_type
    optimizer.loss_type = loss_type
  except:
    pass

  W, B = param_inint(num_inputs_nodes,hidden_layers, num_output_nodes, init_type)
  N = X_train.shape[0]
  n_batches = int(np.floor(N/batch_size))
  optimizer.lr = learning_rate
  optimizer.reg = reg_lamda

  for epoch in range(epochs):

    train_loss = []
    train_accuracy = []
    val_loss = []
    val_accuracy = []
    l = 0
    acc = 0
    temp = 0
    for batch in range(n_batches):
      x = np.squeeze(X_train[batch*batch_size:batch_size+batch*batch_size]).T
      y = one_hot(y_train[batch*batch_size:batch_size+batch*batch_size], 10)
      y_hat, h, a = feed_forward(x, W,B, activation_type)
      gw, gb, gh, ga = back_prop(x, y,y_hat,a,h, W,B, batch_size, loss_type, activation_type)
      W,B = optimizer.update(W,B, gw,gb)
      l += loss_compute(y_train[batch*batch_size:batch_size+batch*batch_size],y_hat, loss_type, W,reg_lamda)
      acc += accuracy(y_hat, y_train[batch*batch_size:batch_size+batch*batch_size])

    if N%batch_size != 0:
        x = np.squeeze(X_train[-1*(N%batch_size):]).T
        y = one_hot(y_train[-1*(N%batch_size):], 10)
        y_hat, h, a = feed_forward(x, W,B, activation_type)
        gw, gb, gh, ga = back_prop(x, y,y_hat,a,h, W,B, N%batch_size, loss_type, activation_type)
        W,B = optimizer.update(W,B, gw,gb)
        l += loss_compute(y_train[-1*(N%batch_size):],y_hat, loss_type, W,reg_lamda)
        acc += accuracy(y_hat, y_train[-1*(N%batch_size):])
        temp = 1

    l = l/(n_batches + (N%batch_size))
    acc = acc/(n_batches + temp)

    train_loss.append(l)
    train_accuracy.append(acc)
    #print(f"Epoch:{epoch+1}")
    #print(f"Train Loss: {l}")
    #print(f"Train Accuracy: {acc}")

    #### Validation
    if x_val.any():
      y_val_hat, _,_ = feed_forward(np.squeeze(x_val).T, W,B, activation_type)
      val_acc = accuracy(y_val_hat,y_val)
      val_l = loss_compute(y_val, y_val_hat, loss_type,W = None, reg_lamda = reg_lamda)
      val_accuracy.append(val_acc)
      val_loss.append(val_l)
      #print(f"Val Loss: {val_l}")
      #print(f"Val Accuracy: {val_acc}")

    wandb.log({"epoch":epoch,"Train_loss":l,"Train_acc":acc,"val_loss":val_l,"val_Accuracy":val_acc})
  return W,B, train_loss, train_accuracy, val_loss, val_accuracy




In [55]:
!pip install wandb
!wandb login
import wandb

[34m[1mwandb[0m: Currently logged in as: [33mtejoram[0m (use `wandb login --relogin` to force relogin)


In [56]:
sweep_configuration = {'method'    : "random",  #Other available--> grid, bayes
                       'metric'    : {'name': 'val_Accuracy','goal':'maximize'},
                       'parameters': {'epochs':{'values':[ 5, 10]},
                                      'hidden_layers':{'values':[[64,32],[64,32,16],[128,64,32],[128,64,32,16],[128,32,32,16]] }, #change needed
                                      'learning_rate':{'values':[1e-3,1e-4]},
                                      'weight_decay':{'values':[0, 0.0005, 0.5]},
                                      'optimizer_name': {'values':['sgd', 'momentum', 'rmsprop', 'nesterov','adam']}, #, 'nesterov','adam', 'nadam'
                                      'batch_size':{'values':[16,32,64]},
                                      'init_type': {'values':['random','xavier']},
                                      'activation_type':{'values':['sigmoid','tanh','ReLU']},
                                      'loss_type': {'values':['cross_entropy']} }}

In [57]:
def sweep_train():

  hyperparameters=dict(epochs = 5,
                      hidden_layers= [64,32],
                      learning_rate=1e-4,
                      weight_decay=0,
                      optimizer_name='sgd',
                      batch_size=16,
                      init_type='random',
                      activation_type='sigmoid',
                      loss_type='cross_entropy',
                      reg_lamda=0)
                           
  wandb.init(project="CS6910-Assignment-1", entity="tejoram",config=hyperparameters)
  config=wandb.config
  epochs=config.epochs
  hidden_layers=config.hidden_layers
  learning_rate=config.learning_rate
  weight_decay=config.weight_decay
  optimizer_name=config.optimizer_name
  batch_size=config.batch_size
  init_type=config.init_type
  activation_type=config.activation_type
  loss_type=config.loss_type
  reg_lamda=config.reg_lamda  
  wandb.run.name = "e_{}_hl_{}_lr_{}_wd_{}_o_{}_bs_{}_winit_{}_ac_{}_los_{}_r_{}".format(epochs,\
                                                                                    hidden_layers,\
                                                                                    learning_rate,\
                                                                                    weight_decay,\
                                                                                    optimizer_name,\
                                                                                    batch_size,\
                                                                                    init_type,\
                                                                                    activation_type,\
                                                                                    loss_type,\
                                                                                    reg_lamda)
  
  _,_,train_loss, train_accuracy, val_loss, val_accuracy = train(X_train, y_train, X_validation, y_validation, 784, hidden_layers, 10, init_type, epochs, batch_size, loss_type, activation_type, optimizer_name, learning_rate, reg_lamda)

In [58]:
sweep_id = wandb.sweep(sweep_configuration,project='CS6910-Assignment-1')
wandb.agent(sweep_id,function=sweep_train,project='CS6910-Assignment-1',count=100)

Create sweep with ID: yttmvpj4
Sweep URL: https://wandb.ai/tejoram/CS6910-Assignment-1/sweeps/yttmvpj4


[34m[1mwandb[0m: Agent Starting Run: fkncwkbv with config:
[34m[1mwandb[0m: 	activation_type: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layers: [128, 64, 32, 16]
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.0005





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Train_acc,▁▃▄▅▆▆▇▇██
Train_loss,█▆▅▄▃▃▂▂▁▁
epoch,▁▂▃▃▄▅▆▆▇█
val_Accuracy,▁▃▄▅▅▇▇▇██
val_loss,█▆▅▄▃▃▂▂▁▁

0,1
Train_acc,75.06296
Train_loss,0.70061
epoch,9.0
val_Accuracy,75.5
val_loss,0.6917


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 41wuzuv2 with config:
[34m[1mwandb[0m: 	activation_type: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layers: [64, 32, 16]
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: adam
[34m[1mwandb[0m: 	weight_decay: 0.5







VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Train_acc,▁████
Train_loss,█▁▁▁▁
epoch,▁▃▅▆█
val_Accuracy,▁▁▁▁▁
val_loss,▁▁▁▁▁

0,1
Train_acc,2.39003
Train_loss,2.41961
epoch,4.0
val_Accuracy,2.38333
val_loss,2.44423


[34m[1mwandb[0m: Agent Starting Run: 6k71a3oo with config:
[34m[1mwandb[0m: 	activation_type: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layers: [128, 32, 32, 16]
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Train_acc,▁▆▇▇█
Train_loss,█▃▂▁▁
epoch,▁▃▅▆█
val_Accuracy,▁▅▆▇█
val_loss,█▄▂▁▁

0,1
Train_acc,87.08346
Train_loss,0.37768
epoch,4.0
val_Accuracy,86.86667
val_loss,0.3818


[34m[1mwandb[0m: Agent Starting Run: mr28zzte with config:
[34m[1mwandb[0m: 	activation_type: ReLU
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layers: [64, 32, 16]
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: adam
[34m[1mwandb[0m: 	weight_decay: 0





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Train_acc,▁▁▁▁▁
Train_loss,█▁▁▁▁
epoch,▁▃▅▆█
val_Accuracy,▁▁▁▁▁
val_loss,▁▁▁▁▁

0,1
Train_acc,4.79488
Train_loss,2.19003
epoch,4.0
val_Accuracy,5.13333
val_loss,2.30819


[34m[1mwandb[0m: Agent Starting Run: pkr0f86o with config:
[34m[1mwandb[0m: 	activation_type: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layers: [128, 32, 32, 16]
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: sgd
[34m[1mwandb[0m: 	weight_decay: 0





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Train_acc,█▂▁▂▂
Train_loss,█▁▁▁▁
epoch,▁▃▅▆█
val_Accuracy,▁▇███
val_loss,█▂▂▁▁

0,1
Train_acc,9.65084
Train_loss,2.18127
epoch,4.0
val_Accuracy,10.33333
val_loss,2.30256


[34m[1mwandb[0m: Agent Starting Run: qkvzhwr1 with config:
[34m[1mwandb[0m: 	activation_type: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layers: [128, 64, 32, 16]
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.0005





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Train_acc,▁▆▇██
Train_loss,█▃▂▁▁
epoch,▁▃▅▆█
val_Accuracy,▁▅▇▇█
val_loss,█▄▂▁▁

0,1
Train_acc,87.47963
Train_loss,0.37766
epoch,4.0
val_Accuracy,87.23333
val_loss,0.38389


[34m[1mwandb[0m: Agent Starting Run: 7k26e9zr with config:
[34m[1mwandb[0m: 	activation_type: ReLU
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layers: [64, 32]
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: sgd
[34m[1mwandb[0m: 	weight_decay: 0.5





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Train_acc,▁▆▇██
Train_loss,█▃▂▁▁
epoch,▁▃▅▆█
val_Accuracy,▁▅▆▇█
val_loss,█▄▃▂▁

0,1
Train_acc,86.49659
Train_loss,0.35598
epoch,4.0
val_Accuracy,86.6
val_loss,0.37618


[34m[1mwandb[0m: Agent Starting Run: ryk1dngf with config:
[34m[1mwandb[0m: 	activation_type: ReLU
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layers: [64, 32]
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.5





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Train_acc,▁▅▆▆▇▇▇███
Train_loss,█▄▃▃▂▂▂▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
val_Accuracy,▁▅▆▇▇██▇█▇
val_loss,█▃▂▁▁▁▁▃▃▃

0,1
Train_acc,90.54259
Train_loss,0.25477
epoch,9.0
val_Accuracy,88.01667
val_loss,0.35643


[34m[1mwandb[0m: Agent Starting Run: rkhedtpo with config:
[34m[1mwandb[0m: 	activation_type: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layers: [64, 32]
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: nesterov
[34m[1mwandb[0m: 	weight_decay: 0





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

Run rkhedtpo errored: NameError("name 'x' is not defined")
[34m[1mwandb[0m: [32m[41mERROR[0m Run rkhedtpo errored: NameError("name 'x' is not defined")
[34m[1mwandb[0m: Agent Starting Run: 0s4lhnhd with config:
[34m[1mwandb[0m: 	activation_type: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layers: [64, 32, 16]
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: adam
[34m[1mwandb[0m: 	weight_decay: 0.0005





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Train_acc,▁▁▁▁▁▁▁▁▁▁
Train_loss,█▁▁▁▁▁▁▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
val_Accuracy,▁▁▁▁▁▁▁▁▁▁
val_loss,▁▁▁▁▁▁▁▁▁▁

0,1
Train_acc,8.40556
Train_loss,2.31483
epoch,9.0
val_Accuracy,8.83333
val_loss,2.31355


[34m[1mwandb[0m: Agent Starting Run: c434gj6s with config:
[34m[1mwandb[0m: 	activation_type: ReLU
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layers: [128, 64, 32]
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: sgd
[34m[1mwandb[0m: 	weight_decay: 0.0005





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Train_acc,▁▅▆▆▇▇▇███
Train_loss,█▄▃▃▂▂▂▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
val_Accuracy,▁▅▆▇▇█████
val_loss,█▄▃▂▂▁▁▁▁▁

0,1
Train_acc,89.79191
Train_loss,0.26391
epoch,9.0
val_Accuracy,87.65
val_loss,0.3398


[34m[1mwandb[0m: Agent Starting Run: 3z9l53bv with config:
[34m[1mwandb[0m: 	activation_type: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layers: [64, 32]
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: momentum
[34m[1mwandb[0m: 	weight_decay: 0.5





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Train_acc,▁▅▇██
Train_loss,█▄▂▁▁
epoch,▁▃▅▆█
val_Accuracy,▁▅▇▇█
val_loss,█▄▃▂▁

0,1
Train_acc,77.68809
Train_loss,0.588
epoch,4.0
val_Accuracy,78.26667
val_loss,0.59189


[34m[1mwandb[0m: Agent Starting Run: m556fqf2 with config:
[34m[1mwandb[0m: 	activation_type: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layers: [64, 32]
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: adam
[34m[1mwandb[0m: 	weight_decay: 0.0005





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Train_acc,▁█████████
Train_loss,█▁▁▁▁▁▁▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
val_Accuracy,▁▁▁▁▁▁▁▁▁▁
val_loss,▁▁▁▁▁▁▁▁▁▁

0,1
Train_acc,25.49985
Train_loss,2.09894
epoch,9.0
val_Accuracy,25.28333
val_loss,2.22006


[34m[1mwandb[0m: Agent Starting Run: btvw79hr with config:
[34m[1mwandb[0m: 	activation_type: ReLU
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layers: [64, 32, 16]
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: momentum
[34m[1mwandb[0m: 	weight_decay: 0





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Train_acc,▁▆▇██
Train_loss,█▃▂▁▁
epoch,▁▃▅▆█
val_Accuracy,▁▅▇██
val_loss,█▄▂▂▁

0,1
Train_acc,86.35404
Train_loss,0.37721
epoch,4.0
val_Accuracy,85.91667
val_loss,0.38641


[34m[1mwandb[0m: Agent Starting Run: zq217k2m with config:
[34m[1mwandb[0m: 	activation_type: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layers: [64, 32, 16]
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: sgd
[34m[1mwandb[0m: 	weight_decay: 0.5





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Train_acc,▁▅▆▆▇▇▇███
Train_loss,█▄▃▂▂▂▂▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
val_Accuracy,▁▅▆▇▇▇████
val_loss,█▄▃▃▂▂▂▁▁▁

0,1
Train_acc,89.2501
Train_loss,0.28267
epoch,9.0
val_Accuracy,87.56667
val_loss,0.34114


[34m[1mwandb[0m: Agent Starting Run: rcjns814 with config:
[34m[1mwandb[0m: 	activation_type: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layers: [128, 64, 32, 16]
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Train_acc,▁▃▄▅▅▆▇▇██
Train_loss,█▆▅▄▃▃▂▂▁▁
epoch,▁▂▃▃▄▅▆▆▇█
val_Accuracy,▁▁▃▄▅▆▇▇██
val_loss,█▆▅▄▄▃▂▂▁▁

0,1
Train_acc,78.77851
Train_loss,0.71468
epoch,9.0
val_Accuracy,78.9
val_loss,0.69929


[34m[1mwandb[0m: Agent Starting Run: dw3u3c42 with config:
[34m[1mwandb[0m: 	activation_type: ReLU
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layers: [64, 32]
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: nesterov
[34m[1mwandb[0m: 	weight_decay: 0





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

Run dw3u3c42 errored: NameError("name 'x' is not defined")
[34m[1mwandb[0m: [32m[41mERROR[0m Run dw3u3c42 errored: NameError("name 'x' is not defined")
[34m[1mwandb[0m: Agent Starting Run: 4734352a with config:
[34m[1mwandb[0m: 	activation_type: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layers: [128, 64, 32]
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: nesterov
[34m[1mwandb[0m: 	weight_decay: 0.0005





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

Run 4734352a errored: NameError("name 'x' is not defined")
[34m[1mwandb[0m: [32m[41mERROR[0m Run 4734352a errored: NameError("name 'x' is not defined")
[34m[1mwandb[0m: Agent Starting Run: 1r42to8i with config:
[34m[1mwandb[0m: 	activation_type: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layers: [128, 64, 32]
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: adam
[34m[1mwandb[0m: 	weight_decay: 0





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Train_acc,▁█████████
Train_loss,█▁▁▁▁▁▁▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
val_Accuracy,▁▁▁▁▁▁▁▁▁▁
val_loss,▁▁▁▁▁▁▁▁▁▁

0,1
Train_acc,12.72593
Train_loss,2.63955
epoch,9.0
val_Accuracy,12.91667
val_loss,2.64681


[34m[1mwandb[0m: Agent Starting Run: t1hr12dr with config:
[34m[1mwandb[0m: 	activation_type: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layers: [128, 64, 32, 16]
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: adam
[34m[1mwandb[0m: 	weight_decay: 0.5





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Train_acc,▁▁▁▁▁
Train_loss,█▁▁▁▁
epoch,▁▃▅▆█
val_Accuracy,▁▁▁▁▁
val_loss,▁▁▁▁▁

0,1
Train_acc,10.01852
Train_loss,2.31296
epoch,4.0
val_Accuracy,9.83333
val_loss,2.31476


[34m[1mwandb[0m: Agent Starting Run: 1inkajoe with config:
[34m[1mwandb[0m: 	activation_type: ReLU
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layers: [128, 64, 32]
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: sgd
[34m[1mwandb[0m: 	weight_decay: 0.5





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Train_acc,▁▆▇██
Train_loss,█▃▂▁▁
epoch,▁▃▅▆█
val_Accuracy,▁▄▆▇█
val_loss,█▄▃▂▁

0,1
Train_acc,82.56481
Train_loss,0.49012
epoch,4.0
val_Accuracy,83.35
val_loss,0.47863


[34m[1mwandb[0m: Agent Starting Run: d0zbyhp5 with config:
[34m[1mwandb[0m: 	activation_type: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layers: [64, 32, 16]
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: adam
[34m[1mwandb[0m: 	weight_decay: 0.5





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Train_acc,▁▁▁▁▁▁▁▁▁▁
Train_loss,█▁▁▁▁▁▁▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
val_Accuracy,▁▁▁▁▁▁▁▁▁▁
val_loss,▁▁▁▁▁▁▁▁▁▁

0,1
Train_acc,10.02407
Train_loss,2.42332
epoch,9.0
val_Accuracy,9.78333
val_loss,2.43117


[34m[1mwandb[0m: Agent Starting Run: 1vx1ofnl with config:
[34m[1mwandb[0m: 	activation_type: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layers: [128, 32, 32, 16]
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: nesterov
[34m[1mwandb[0m: 	weight_decay: 0.0005





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

Run 1vx1ofnl errored: NameError("name 'x' is not defined")
[34m[1mwandb[0m: [32m[41mERROR[0m Run 1vx1ofnl errored: NameError("name 'x' is not defined")
[34m[1mwandb[0m: Agent Starting Run: ljkumomy with config:
[34m[1mwandb[0m: 	activation_type: ReLU
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layers: [128, 64, 32]
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: adam
[34m[1mwandb[0m: 	weight_decay: 0.0005





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Train_acc,▁▁▁▁▁
Train_loss,█▁▁▁▁
epoch,▁▃▅▆█
val_Accuracy,▁▁▁▁▁
val_loss,▁▁▁▁▁

0,1
Train_acc,9.75
Train_loss,2.38681
epoch,4.0
val_Accuracy,9.43333
val_loss,2.39064


[34m[1mwandb[0m: Agent Starting Run: z1u4vr9x with config:
[34m[1mwandb[0m: 	activation_type: ReLU
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layers: [128, 64, 32, 16]
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: adam
[34m[1mwandb[0m: 	weight_decay: 0.5





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Train_acc,▁█████████
Train_loss,█▁▁▁▁▁▁▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
val_Accuracy,▁▁▁▁▁▁▁▁▁▁
val_loss,▁▁▁▁▁▁▁▁▁▁

0,1
Train_acc,8.11056
Train_loss,2.29477
epoch,9.0
val_Accuracy,7.95
val_loss,2.31847


[34m[1mwandb[0m: Agent Starting Run: ipvju60j with config:
[34m[1mwandb[0m: 	activation_type: ReLU
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layers: [128, 64, 32]
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: momentum
[34m[1mwandb[0m: 	weight_decay: 0.0005





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Train_acc,▁▅▇▇█
Train_loss,█▄▂▂▁
epoch,▁▃▅▆█
val_Accuracy,▁▆▇██
val_loss,█▄▂▁▁

0,1
Train_acc,87.67778
Train_loss,0.34063
epoch,4.0
val_Accuracy,87.35
val_loss,0.35522


[34m[1mwandb[0m: Agent Starting Run: gy0shgfy with config:
[34m[1mwandb[0m: 	activation_type: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layers: [64, 32]
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: nesterov
[34m[1mwandb[0m: 	weight_decay: 0.5





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

Run gy0shgfy errored: NameError("name 'x' is not defined")
[34m[1mwandb[0m: [32m[41mERROR[0m Run gy0shgfy errored: NameError("name 'x' is not defined")
[34m[1mwandb[0m: Agent Starting Run: pmo74tr5 with config:
[34m[1mwandb[0m: 	activation_type: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layers: [64, 32, 16]
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: sgd
[34m[1mwandb[0m: 	weight_decay: 0





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Train_acc,▁▅▇██
Train_loss,█▄▂▁▁
epoch,▁▃▅▆█
val_Accuracy,▁▅▇██
val_loss,█▄▃▂▁

0,1
Train_acc,82.55147
Train_loss,0.52276
epoch,4.0
val_Accuracy,82.8
val_loss,0.51241


[34m[1mwandb[0m: Agent Starting Run: l9rozs2e with config:
[34m[1mwandb[0m: 	activation_type: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layers: [64, 32]
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: adam
[34m[1mwandb[0m: 	weight_decay: 0.0005





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Train_acc,▁█████████
Train_loss,█▁▁▁▁▁▁▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
val_Accuracy,▁▁▁▁▁▁▁▁▁▁
val_loss,▁▁▁▁▁▁▁▁▁▁

0,1
Train_acc,13.88663
Train_loss,2.2824
epoch,9.0
val_Accuracy,13.55
val_loss,2.30306


[34m[1mwandb[0m: Agent Starting Run: ipks4uk7 with config:
[34m[1mwandb[0m: 	activation_type: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layers: [128, 32, 32, 16]
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: adam
[34m[1mwandb[0m: 	weight_decay: 0





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Train_acc,▁▁▁▁▁
Train_loss,█▁▁▁▁
epoch,▁▃▅▆█
val_Accuracy,▁▁▁▁▁
val_loss,▁▁▁▁▁

0,1
Train_acc,10.80482
Train_loss,2.1991
epoch,4.0
val_Accuracy,10.31667
val_loss,2.32329


[34m[1mwandb[0m: Agent Starting Run: edrrkur5 with config:
[34m[1mwandb[0m: 	activation_type: ReLU
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layers: [64, 32]
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: nesterov
[34m[1mwandb[0m: 	weight_decay: 0.0005





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

Run edrrkur5 errored: NameError("name 'x' is not defined")
[34m[1mwandb[0m: [32m[41mERROR[0m Run edrrkur5 errored: NameError("name 'x' is not defined")
[34m[1mwandb[0m: Agent Starting Run: tx9ph2gw with config:
[34m[1mwandb[0m: 	activation_type: ReLU
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layers: [64, 32, 16]
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: momentum
[34m[1mwandb[0m: 	weight_decay: 0.5





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Train_acc,▁▆▇▇█
Train_loss,█▃▂▂▁
epoch,▁▃▅▆█
val_Accuracy,▁▃▅██
val_loss,█▅▄▁▃

0,1
Train_acc,87.04074
Train_loss,0.36469
epoch,4.0
val_Accuracy,86.73333
val_loss,0.38847


[34m[1mwandb[0m: Agent Starting Run: ssd1l536 with config:
[34m[1mwandb[0m: 	activation_type: ReLU
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layers: [128, 32, 32, 16]
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: momentum
[34m[1mwandb[0m: 	weight_decay: 0





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Train_acc,▁▆▇██
Train_loss,█▃▂▁▁
epoch,▁▃▅▆█
val_Accuracy,▁▆▇█▇
val_loss,█▃▃▁▃

0,1
Train_acc,87.01481
Train_loss,0.36458
epoch,4.0
val_Accuracy,86.7
val_loss,0.3943


[34m[1mwandb[0m: Agent Starting Run: rw1rh7ex with config:
[34m[1mwandb[0m: 	activation_type: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layers: [128, 64, 32, 16]
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: adam
[34m[1mwandb[0m: 	weight_decay: 0





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Train_acc,▁▁▁▁▁▁▁▁▁▁
Train_loss,█▁▁▁▁▁▁▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
val_Accuracy,▁▁▁▁▁▁▁▁▁▁
val_loss,▁▁▁▁▁▁▁▁▁▁

0,1
Train_acc,10.02407
Train_loss,2.54561
epoch,9.0
val_Accuracy,9.78333
val_loss,2.54631


[34m[1mwandb[0m: Agent Starting Run: 8ih9bj7u with config:
[34m[1mwandb[0m: 	activation_type: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layers: [128, 32, 32, 16]
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: nesterov
[34m[1mwandb[0m: 	weight_decay: 0





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

Run 8ih9bj7u errored: NameError("name 'x' is not defined")
[34m[1mwandb[0m: [32m[41mERROR[0m Run 8ih9bj7u errored: NameError("name 'x' is not defined")
[34m[1mwandb[0m: Agent Starting Run: piccok6d with config:
[34m[1mwandb[0m: 	activation_type: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layers: [64, 32]
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.5





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Train_acc,▁▆▇██
Train_loss,█▃▂▁▁
epoch,▁▃▅▆█
val_Accuracy,▁▄▆▇█
val_loss,█▄▃▂▁

0,1
Train_acc,86.9
Train_loss,0.36986
epoch,4.0
val_Accuracy,86.58333
val_loss,0.36834


[34m[1mwandb[0m: Agent Starting Run: qmsfmzc8 with config:
[34m[1mwandb[0m: 	activation_type: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layers: [64, 32, 16]
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: sgd
[34m[1mwandb[0m: 	weight_decay: 0.0005





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Train_acc,▁▃▅▆▇▇████
Train_loss,█▆▄▃▂▂▁▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
val_Accuracy,▁▄▅▆▇▇████
val_loss,█▆▄▃▂▂▂▁▁▁

0,1
Train_acc,78.73408
Train_loss,0.5601
epoch,9.0
val_Accuracy,78.21667
val_loss,0.56036


[34m[1mwandb[0m: Agent Starting Run: 0e40a9gs with config:
[34m[1mwandb[0m: 	activation_type: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layers: [128, 64, 32, 16]
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: sgd
[34m[1mwandb[0m: 	weight_decay: 0





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Train_acc,▁▂▄▆█
Train_loss,██▅▂▁
epoch,▁▃▅▆█
val_Accuracy,▁▂▄▇█
val_loss,█▇▄▂▁

0,1
Train_acc,50.20179
Train_loss,1.2779
epoch,4.0
val_Accuracy,54.45
val_loss,1.22602


[34m[1mwandb[0m: Agent Starting Run: 7b490opi with config:
[34m[1mwandb[0m: 	activation_type: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layers: [128, 64, 32, 16]
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: momentum
[34m[1mwandb[0m: 	weight_decay: 0.5





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Train_acc,▁▆▇▇█
Train_loss,█▃▂▁▁
epoch,▁▃▅▆█
val_Accuracy,▁▇█▇▆
val_loss,█▃▁▂▄

0,1
Train_acc,86.9463
Train_loss,0.36349
epoch,4.0
val_Accuracy,85.76667
val_loss,0.39979


[34m[1mwandb[0m: Agent Starting Run: kk1pqsxr with config:
[34m[1mwandb[0m: 	activation_type: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layers: [128, 64, 32, 16]
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: sgd
[34m[1mwandb[0m: 	weight_decay: 0.0005





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Train_acc,▁▁▂▅█
Train_loss,█▅▄▃▁
epoch,▁▃▅▆█
val_Accuracy,▁▁▁▁█
val_loss,█▇▆▄▁

0,1
Train_acc,10.33768
Train_loss,2.18145
epoch,4.0
val_Accuracy,18.45
val_loss,2.3014


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: b582ag42 with config:
[34m[1mwandb[0m: 	activation_type: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layers: [64, 32]
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: nesterov
[34m[1mwandb[0m: 	weight_decay: 0.0005





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

Run b582ag42 errored: NameError("name 'x' is not defined")
[34m[1mwandb[0m: [32m[41mERROR[0m Run b582ag42 errored: NameError("name 'x' is not defined")
[34m[1mwandb[0m: Agent Starting Run: agnj7wf6 with config:
[34m[1mwandb[0m: 	activation_type: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layers: [128, 64, 32, 16]
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: adam
[34m[1mwandb[0m: 	weight_decay: 0.0005





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Train_acc,▁▁▁▁▁▁▁▁▁▁
Train_loss,█▁▁▁▁▁▁▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
val_Accuracy,▁▁▁▁▁▁▁▁▁▁
val_loss,▁▁▁▁▁▁▁▁▁▁

0,1
Train_acc,9.98593
Train_loss,2.31781
epoch,9.0
val_Accuracy,10.13333
val_loss,2.33612


[34m[1mwandb[0m: Agent Starting Run: o43kv1t5 with config:
[34m[1mwandb[0m: 	activation_type: ReLU
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layers: [128, 64, 32]
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.5





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Train_acc,▁▅▆▆▇▇▇███
Train_loss,█▄▃▃▂▂▂▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
val_Accuracy,▁▄▆▆▆▆▇▇█▇
val_loss,█▄▃▂▂▁▂▁▁▂

0,1
Train_acc,90.99958
Train_loss,0.2299
epoch,9.0
val_Accuracy,88.28333
val_loss,0.33723


[34m[1mwandb[0m: Agent Starting Run: 3u7zdae4 with config:
[34m[1mwandb[0m: 	activation_type: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layers: [128, 32, 32, 16]
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: adam
[34m[1mwandb[0m: 	weight_decay: 0.0005





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Train_acc,▁▁▁▁▁
Train_loss,█▁▁▁▁
epoch,▁▃▅▆█
val_Accuracy,▁▁▁▁▁
val_loss,▁▁▁▁▁

0,1
Train_acc,9.98593
Train_loss,2.29434
epoch,4.0
val_Accuracy,10.13333
val_loss,2.315


[34m[1mwandb[0m: Agent Starting Run: qv9okxs5 with config:
[34m[1mwandb[0m: 	activation_type: ReLU
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layers: [64, 32]
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: sgd
[34m[1mwandb[0m: 	weight_decay: 0.0005





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Train_acc,▁▅▆▇▇█████
Train_loss,█▃▂▂▂▁▁▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
val_Accuracy,▁▄▆▇▇▇████
val_loss,█▅▃▃▂▂▂▁▁▁

0,1
Train_acc,83.32408
Train_loss,0.44907
epoch,9.0
val_Accuracy,82.73333
val_loss,0.47776


[34m[1mwandb[0m: Agent Starting Run: zjtv9mmg with config:
[34m[1mwandb[0m: 	activation_type: ReLU
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layers: [128, 32, 32, 16]
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: sgd
[34m[1mwandb[0m: 	weight_decay: 0.5





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Train_acc,▁▆▇██
Train_loss,█▃▂▁▁
epoch,▁▃▅▆█
val_Accuracy,▁▅▇▇█
val_loss,█▄▃▂▁

0,1
Train_acc,87.4315
Train_loss,0.32782
epoch,4.0
val_Accuracy,87.3
val_loss,0.35948


[34m[1mwandb[0m: Agent Starting Run: 2fv9a4vd with config:
[34m[1mwandb[0m: 	activation_type: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layers: [64, 32]
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: momentum
[34m[1mwandb[0m: 	weight_decay: 0.0005





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Train_acc,▁▅▇▇█
Train_loss,█▄▂▂▁
epoch,▁▃▅▆█
val_Accuracy,▁▃▆█▇
val_loss,█▅▂▁▁

0,1
Train_acc,87.38148
Train_loss,0.3447
epoch,4.0
val_Accuracy,86.68333
val_loss,0.37238


[34m[1mwandb[0m: Agent Starting Run: i6cyhtmj with config:
[34m[1mwandb[0m: 	activation_type: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layers: [64, 32, 16]
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.0005





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Train_acc,▁▅▆▇▇█████
Train_loss,█▄▃▂▂▁▁▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
val_Accuracy,▁▄▆▇▇█████
val_loss,█▅▃▂▂▁▁▁▁▁

0,1
Train_acc,88.58486
Train_loss,0.31988
epoch,9.0
val_Accuracy,87.2
val_loss,0.37015


[34m[1mwandb[0m: Agent Starting Run: dsata9bv with config:
[34m[1mwandb[0m: 	activation_type: ReLU
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layers: [64, 32, 16]
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Train_acc,▁▅▇▇█
Train_loss,█▃▂▂▁
epoch,▁▃▅▆█
val_Accuracy,▁▄▆██
val_loss,█▄▃▂▁

0,1
Train_acc,88.33864
Train_loss,0.31494
epoch,4.0
val_Accuracy,87.8
val_loss,0.33767


[34m[1mwandb[0m: Agent Starting Run: vm7nmv0c with config:
[34m[1mwandb[0m: 	activation_type: ReLU
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layers: [64, 32]
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: sgd
[34m[1mwandb[0m: 	weight_decay: 0.0005





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Train_acc,▁▆▇██
Train_loss,█▃▂▁▁
epoch,▁▃▅▆█
val_Accuracy,▁▄▆██
val_loss,█▄▃▂▁

0,1
Train_acc,86.31146
Train_loss,0.35829
epoch,4.0
val_Accuracy,86.2
val_loss,0.37902


[34m[1mwandb[0m: Agent Starting Run: yj3bui10 with config:
[34m[1mwandb[0m: 	activation_type: ReLU
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layers: [64, 32, 16]
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.0005





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Train_acc,▁▆▇▇█
Train_loss,█▃▂▂▁
epoch,▁▃▅▆█
val_Accuracy,▁▆▇██
val_loss,█▃▂▁▁

0,1
Train_acc,88.55185
Train_loss,0.31487
epoch,4.0
val_Accuracy,87.55
val_loss,0.344


[34m[1mwandb[0m: Agent Starting Run: 9k1dwqi0 with config:
[34m[1mwandb[0m: 	activation_type: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layers: [128, 64, 32, 16]
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: momentum
[34m[1mwandb[0m: 	weight_decay: 0.0005





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Train_acc,▁▆▇▇█
Train_loss,█▃▂▁▁
epoch,▁▃▅▆█
val_Accuracy,▁▅▆▇█
val_loss,█▄▃▂▁

0,1
Train_acc,87.67032
Train_loss,0.32477
epoch,4.0
val_Accuracy,86.7
val_loss,0.3617


[34m[1mwandb[0m: Agent Starting Run: 93meu44l with config:
[34m[1mwandb[0m: 	activation_type: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layers: [64, 32]
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: sgd
[34m[1mwandb[0m: 	weight_decay: 0.5





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Train_acc,▁▅▆▆▇▇▇███
Train_loss,█▄▃▃▂▂▂▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
val_Accuracy,▁▄▅▆▆▇▇▇██
val_loss,█▅▄▃▂▂▂▁▁▁

0,1
Train_acc,88.98166
Train_loss,0.28981
epoch,9.0
val_Accuracy,87.4
val_loss,0.3404


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: eyik2yek with config:
[34m[1mwandb[0m: 	activation_type: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layers: [128, 64, 32]
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: nesterov
[34m[1mwandb[0m: 	weight_decay: 0.0005





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

Run eyik2yek errored: NameError("name 'x' is not defined")
[34m[1mwandb[0m: [32m[41mERROR[0m Run eyik2yek errored: NameError("name 'x' is not defined")
[34m[1mwandb[0m: Agent Starting Run: qe7q12yn with config:
[34m[1mwandb[0m: 	activation_type: ReLU
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layers: [128, 64, 32]
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: nesterov
[34m[1mwandb[0m: 	weight_decay: 0.5





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

Run qe7q12yn errored: NameError("name 'x' is not defined")
[34m[1mwandb[0m: [32m[41mERROR[0m Run qe7q12yn errored: NameError("name 'x' is not defined")
[34m[1mwandb[0m: Agent Starting Run: a8gj5p50 with config:
[34m[1mwandb[0m: 	activation_type: ReLU
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layers: [128, 64, 32]
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: nesterov
[34m[1mwandb[0m: 	weight_decay: 0.0005





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

Run a8gj5p50 errored: NameError("name 'x' is not defined")
[34m[1mwandb[0m: [32m[41mERROR[0m Run a8gj5p50 errored: NameError("name 'x' is not defined")
[34m[1mwandb[0m: Agent Starting Run: 7jx5rjxx with config:
[34m[1mwandb[0m: 	activation_type: ReLU
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layers: [64, 32, 16]
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: nesterov
[34m[1mwandb[0m: 	weight_decay: 0





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

Run 7jx5rjxx errored: NameError("name 'x' is not defined")
[34m[1mwandb[0m: [32m[41mERROR[0m Run 7jx5rjxx errored: NameError("name 'x' is not defined")
[34m[1mwandb[0m: Agent Starting Run: 5pa2tqah with config:
[34m[1mwandb[0m: 	activation_type: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layers: [128, 32, 32, 16]
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: momentum
[34m[1mwandb[0m: 	weight_decay: 0





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Train_acc,▁▇▇██
Train_loss,█▂▂▁▁
epoch,▁▃▅▆█
val_Accuracy,▁▅▆▇█
val_loss,█▄▂▂▁

0,1
Train_acc,86.81995
Train_loss,0.35232
epoch,4.0
val_Accuracy,86.43333
val_loss,0.3807


[34m[1mwandb[0m: Agent Starting Run: l8npkr9l with config:
[34m[1mwandb[0m: 	activation_type: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layers: [64, 32]
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: nesterov
[34m[1mwandb[0m: 	weight_decay: 0.5





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

Run l8npkr9l errored: NameError("name 'x' is not defined")
[34m[1mwandb[0m: [32m[41mERROR[0m Run l8npkr9l errored: NameError("name 'x' is not defined")
[34m[1mwandb[0m: Agent Starting Run: h2l20xhl with config:
[34m[1mwandb[0m: 	activation_type: ReLU
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layers: [128, 64, 32]
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: sgd
[34m[1mwandb[0m: 	weight_decay: 0.0005





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Train_acc,▁▆▇██
Train_loss,█▃▂▁▁
epoch,▁▃▅▆█
val_Accuracy,▁▅▇██
val_loss,█▄▂▂▁

0,1
Train_acc,86.99521
Train_loss,0.33721
epoch,4.0
val_Accuracy,86.9
val_loss,0.36383


[34m[1mwandb[0m: Agent Starting Run: twu2h0m2 with config:
[34m[1mwandb[0m: 	activation_type: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layers: [64, 32]
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: momentum
[34m[1mwandb[0m: 	weight_decay: 0





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Train_acc,▁▄▆▆▇▇████
Train_loss,█▄▃▂▂▂▁▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
val_Accuracy,▁▃▅▆▆▇▇███
val_loss,█▅▃▃▂▂▁▁▁▁

0,1
Train_acc,82.92593
Train_loss,0.47996
epoch,9.0
val_Accuracy,82.63333
val_loss,0.47546


[34m[1mwandb[0m: Agent Starting Run: 3vozlcis with config:
[34m[1mwandb[0m: 	activation_type: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layers: [128, 32, 32, 16]
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: sgd
[34m[1mwandb[0m: 	weight_decay: 0.0005





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Train_acc,▁▅▆▆▇▇▇███
Train_loss,█▄▃▂▂▂▂▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
val_Accuracy,▁▃▅▆▇▇████
val_loss,█▅▄▃▂▂▂▁▁▁

0,1
Train_acc,89.34883
Train_loss,0.27862
epoch,9.0
val_Accuracy,87.56667
val_loss,0.33942


[34m[1mwandb[0m: Agent Starting Run: m46hzv0l with config:
[34m[1mwandb[0m: 	activation_type: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layers: [128, 32, 32, 16]
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: momentum
[34m[1mwandb[0m: 	weight_decay: 0





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Train_acc,▁▆▇██
Train_loss,█▃▂▁▁
epoch,▁▃▅▆█
val_Accuracy,▁▆▇██
val_loss,█▄▂▁▁

0,1
Train_acc,86.98889
Train_loss,0.36647
epoch,4.0
val_Accuracy,87.18333
val_loss,0.36064


[34m[1mwandb[0m: Agent Starting Run: 7ktzwquq with config:
[34m[1mwandb[0m: 	activation_type: ReLU
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layers: [128, 32, 32, 16]
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.0005





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Train_acc,▁▅▆▇▇▇▇███
Train_loss,█▄▃▂▂▂▂▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
val_Accuracy,▁▅▆▇▇█████
val_loss,█▃▂▁▁▁▁▁▁▂

0,1
Train_acc,90.73238
Train_loss,0.23864
epoch,9.0
val_Accuracy,88.43333
val_loss,0.34046


[34m[1mwandb[0m: Agent Starting Run: c5h4od4w with config:
[34m[1mwandb[0m: 	activation_type: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layers: [64, 32, 16]
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: momentum
[34m[1mwandb[0m: 	weight_decay: 0





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Train_acc,▁▅▆▆▇▇▇███
Train_loss,█▄▃▂▂▂▁▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
val_Accuracy,▁▄▅▅▆▃▅▇▇█
val_loss,█▅▃▃▃▆▃▂▂▁

0,1
Train_acc,87.59257
Train_loss,0.34337
epoch,9.0
val_Accuracy,86.78333
val_loss,0.37685


[34m[1mwandb[0m: Agent Starting Run: yhk3ysif with config:
[34m[1mwandb[0m: 	activation_type: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layers: [64, 32]
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.0005





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Train_acc,▁▅▆▇▇▇▇███
Train_loss,█▄▃▂▂▂▁▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
val_Accuracy,▁▄▅▆▇▇▇▇██
val_loss,█▅▄▃▂▂▂▁▁▁

0,1
Train_acc,87.33955
Train_loss,0.34319
epoch,9.0
val_Accuracy,86.23333
val_loss,0.37236


[34m[1mwandb[0m: Agent Starting Run: lh6k1az7 with config:
[34m[1mwandb[0m: 	activation_type: ReLU
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layers: [128, 32, 32, 16]
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: nesterov
[34m[1mwandb[0m: 	weight_decay: 0.5





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

Run lh6k1az7 errored: NameError("name 'x' is not defined")
[34m[1mwandb[0m: [32m[41mERROR[0m Run lh6k1az7 errored: NameError("name 'x' is not defined")
[34m[1mwandb[0m: Agent Starting Run: y9grd17t with config:
[34m[1mwandb[0m: 	activation_type: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layers: [128, 64, 32]
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: nesterov
[34m[1mwandb[0m: 	weight_decay: 0.0005





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

Run y9grd17t errored: NameError("name 'x' is not defined")
[34m[1mwandb[0m: [32m[41mERROR[0m Run y9grd17t errored: NameError("name 'x' is not defined")
[34m[1mwandb[0m: Agent Starting Run: s4bbtym2 with config:
[34m[1mwandb[0m: 	activation_type: ReLU
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layers: [64, 32, 16]
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: nesterov
[34m[1mwandb[0m: 	weight_decay: 0





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

Run s4bbtym2 errored: NameError("name 'x' is not defined")
[34m[1mwandb[0m: [32m[41mERROR[0m Run s4bbtym2 errored: NameError("name 'x' is not defined")
[34m[1mwandb[0m: Agent Starting Run: iosv38m7 with config:
[34m[1mwandb[0m: 	activation_type: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layers: [64, 32]
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: momentum
[34m[1mwandb[0m: 	weight_decay: 0





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Train_acc,▁▅▆▆▇▇▇███
Train_loss,█▄▃▃▂▂▂▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
val_Accuracy,▁▄▇▇▅▇█▇▇█
val_loss,█▅▂▂▃▂▂▁▂▂

0,1
Train_acc,88.45527
Train_loss,0.2982
epoch,9.0
val_Accuracy,86.45
val_loss,0.39075


[34m[1mwandb[0m: Agent Starting Run: mhoy30bp with config:
[34m[1mwandb[0m: 	activation_type: ReLU
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layers: [64, 32, 16]
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: momentum
[34m[1mwandb[0m: 	weight_decay: 0.0005





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Train_acc,▁▆▇██
Train_loss,█▃▂▁▁
epoch,▁▃▅▆█
val_Accuracy,▁▃▄▇█
val_loss,█▅▅▂▁

0,1
Train_acc,86.60926
Train_loss,0.36909
epoch,4.0
val_Accuracy,87.26667
val_loss,0.3619


[34m[1mwandb[0m: Agent Starting Run: ed8jg4gw with config:
[34m[1mwandb[0m: 	activation_type: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layers: [128, 64, 32]
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: momentum
[34m[1mwandb[0m: 	weight_decay: 0.5





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Train_acc,▁▆▇▇▇█████
Train_loss,█▃▂▂▂▁▁▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
val_Accuracy,▁▅▆▇▇▇████
val_loss,█▄▃▂▂▂▁▁▁▁

0,1
Train_acc,88.91667
Train_loss,0.30473
epoch,9.0
val_Accuracy,87.68333
val_loss,0.33614


[34m[1mwandb[0m: Agent Starting Run: lxft9cqx with config:
[34m[1mwandb[0m: 	activation_type: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layers: [64, 32, 16]
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: nesterov
[34m[1mwandb[0m: 	weight_decay: 0





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

Run lxft9cqx errored: NameError("name 'x' is not defined")
[34m[1mwandb[0m: [32m[41mERROR[0m Run lxft9cqx errored: NameError("name 'x' is not defined")
[34m[1mwandb[0m: Agent Starting Run: 7attztbq with config:
[34m[1mwandb[0m: 	activation_type: ReLU
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layers: [128, 64, 32, 16]
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: adam
[34m[1mwandb[0m: 	weight_decay: 0





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Train_acc,▁▁▁▁▁▁▁▁▁▁
Train_loss,█▁▁▁▁▁▁▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
val_Accuracy,▁▁▁▁▁▁▁▁▁▁
val_loss,▁▁▁▁▁▁▁▁▁▁

0,1
Train_acc,10.09259
Train_loss,2.31975
epoch,9.0
val_Accuracy,9.16667
val_loss,2.32257


[34m[1mwandb[0m: Agent Starting Run: otibn6ls with config:
[34m[1mwandb[0m: 	activation_type: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layers: [128, 64, 32]
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: adam
[34m[1mwandb[0m: 	weight_decay: 0





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Train_acc,▁▁▁▁▁▁▁▁▁▁
Train_loss,█▁▁▁▁▁▁▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
val_Accuracy,▁▁▁▁▁▁▁▁▁▁
val_loss,▁▁▁▁▁▁▁▁▁▁

0,1
Train_acc,9.98593
Train_loss,2.31256
epoch,9.0
val_Accuracy,10.13333
val_loss,2.33541


[34m[1mwandb[0m: Agent Starting Run: i6cx9anu with config:
[34m[1mwandb[0m: 	activation_type: ReLU
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layers: [128, 32, 32, 16]
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Train_acc,▁▆▇██
Train_loss,█▄▂▁▁
epoch,▁▃▅▆█
val_Accuracy,▁▅███
val_loss,█▃▂▁▁

0,1
Train_acc,88.7737
Train_loss,0.30742
epoch,4.0
val_Accuracy,87.86667
val_loss,0.34239


[34m[1mwandb[0m: Agent Starting Run: wmy8vgle with config:
[34m[1mwandb[0m: 	activation_type: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layers: [64, 32]
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: adam
[34m[1mwandb[0m: 	weight_decay: 0.0005





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Train_acc,▁█████████
Train_loss,█▁▁▁▁▁▁▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
val_Accuracy,▁▁▁▁▁▁▁▁▁▁
val_loss,▁▁▁▁▁▁▁▁▁▁

0,1
Train_acc,14.07546
Train_loss,2.3329
epoch,9.0
val_Accuracy,13.15
val_loss,2.46478


[34m[1mwandb[0m: Agent Starting Run: 5oizeo96 with config:
[34m[1mwandb[0m: 	activation_type: ReLU
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layers: [64, 32]
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: adam
[34m[1mwandb[0m: 	weight_decay: 0.5





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Train_acc,▁████
Train_loss,█▁▁▁▁
epoch,▁▃▅▆█
val_Accuracy,▁▁▁▁▁
val_loss,▁▁▁▁▁

0,1
Train_acc,14.36488
Train_loss,2.13304
epoch,4.0
val_Accuracy,14.25
val_loss,2.25288


[34m[1mwandb[0m: Agent Starting Run: wf6qy55b with config:
[34m[1mwandb[0m: 	activation_type: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layers: [64, 32]
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: sgd
[34m[1mwandb[0m: 	weight_decay: 0





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Train_acc,▁▅▆▇▇▇████
Train_loss,█▄▃▂▂▂▁▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
val_Accuracy,▁▄▅▆▇▇████
val_loss,█▅▃▃▂▂▂▁▁▁

0,1
Train_acc,88.76382
Train_loss,0.29563
epoch,9.0
val_Accuracy,87.66667
val_loss,0.34691


[34m[1mwandb[0m: Agent Starting Run: finzfj94 with config:
[34m[1mwandb[0m: 	activation_type: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layers: [128, 64, 32, 16]
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: momentum
[34m[1mwandb[0m: 	weight_decay: 0.0005





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Train_acc,▁▅▆▇▇▇▇███
Train_loss,█▄▃▂▂▂▁▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
val_Accuracy,▁▄▆▆▄▅▅▅▇█
val_loss,█▅▃▂▆▃▄▃▁▁

0,1
Train_acc,87.03224
Train_loss,0.33607
epoch,9.0
val_Accuracy,86.33333
val_loss,0.39271


[34m[1mwandb[0m: Agent Starting Run: 0l0zm65w with config:
[34m[1mwandb[0m: 	activation_type: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layers: [128, 32, 32, 16]
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: adam
[34m[1mwandb[0m: 	weight_decay: 0.0005





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Train_acc,▁▁▁▁▁
Train_loss,█▁▁▁▁
epoch,▁▃▅▆█
val_Accuracy,▁▁▁▁▁
val_loss,▁▁▁▁▁

0,1
Train_acc,11.98519
Train_loss,2.31462
epoch,4.0
val_Accuracy,11.71667
val_loss,2.31597


[34m[1mwandb[0m: Agent Starting Run: v4vnxg0u with config:
[34m[1mwandb[0m: 	activation_type: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layers: [128, 64, 32]
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: adam
[34m[1mwandb[0m: 	weight_decay: 0





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Train_acc,▁▁▁▁▁
Train_loss,█▁▁▁▁
epoch,▁▃▅▆█
val_Accuracy,▁▁▁▁▁
val_loss,▁▁▁▁▁

0,1
Train_acc,11.34108
Train_loss,2.29702
epoch,4.0
val_Accuracy,11.86667
val_loss,2.31325


[34m[1mwandb[0m: Agent Starting Run: jc3cuke9 with config:
[34m[1mwandb[0m: 	activation_type: ReLU
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layers: [64, 32]
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: sgd
[34m[1mwandb[0m: 	weight_decay: 0





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Train_acc,▁▅▆▆▇▇▇███
Train_loss,█▄▃▃▂▂▂▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
val_Accuracy,▁▄▅▆▆▇▇▇██
val_loss,█▅▄▃▂▂▂▁▁▁

0,1
Train_acc,89.05324
Train_loss,0.29604
epoch,9.0
val_Accuracy,87.75
val_loss,0.33848


[34m[1mwandb[0m: Agent Starting Run: aoqyvlp4 with config:
[34m[1mwandb[0m: 	activation_type: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layers: [128, 64, 32]
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.5





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Train_acc,▁▄▅▆▆▇▇▇██
Train_loss,█▅▄▃▃▂▂▂▁▁
epoch,▁▂▃▃▄▅▆▆▇█
val_Accuracy,▁▄▅▆██▆▇▆█
val_loss,█▅▃▂▂▁▂▂▂▂

0,1
Train_acc,91.66605
Train_loss,0.21683
epoch,9.0
val_Accuracy,87.9
val_loss,0.34126


[34m[1mwandb[0m: Agent Starting Run: fdvkozdw with config:
[34m[1mwandb[0m: 	activation_type: ReLU
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layers: [64, 32]
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: adam
[34m[1mwandb[0m: 	weight_decay: 0





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Train_acc,▁█████████
Train_loss,█▁▁▁▁▁▁▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
val_Accuracy,▁▁▁▁▁▁▁▁▁▁
val_loss,▁▁▁▁▁▁▁▁▁▁

0,1
Train_acc,17.20416
Train_loss,2.19386
epoch,9.0
val_Accuracy,16.98333
val_loss,2.31833


[34m[1mwandb[0m: Agent Starting Run: ce3q3wzr with config:
[34m[1mwandb[0m: 	activation_type: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layers: [128, 32, 32, 16]
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: sgd
[34m[1mwandb[0m: 	weight_decay: 0.5





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Train_acc,▁▆▇██
Train_loss,█▄▂▁▁
epoch,▁▃▅▆█
val_Accuracy,▁▅▇▇█
val_loss,█▄▃▂▁

0,1
Train_acc,82.96246
Train_loss,0.48935
epoch,4.0
val_Accuracy,82.98333
val_loss,0.50644


[34m[1mwandb[0m: Agent Starting Run: 9ke3kjex with config:
[34m[1mwandb[0m: 	activation_type: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layers: [64, 32, 16]
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: sgd
[34m[1mwandb[0m: 	weight_decay: 0





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Train_acc,▁▅▆▆▇▇▇███
Train_loss,█▄▃▂▂▂▂▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
val_Accuracy,▁▄▅▆▇▇████
val_loss,█▅▃▂▂▁▁▁▁▁

0,1
Train_acc,89.39815
Train_loss,0.29502
epoch,9.0
val_Accuracy,88.06667
val_loss,0.33256


[34m[1mwandb[0m: Agent Starting Run: non8xir6 with config:
[34m[1mwandb[0m: 	activation_type: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layers: [64, 32]
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: nesterov
[34m[1mwandb[0m: 	weight_decay: 0.0005





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

Run non8xir6 errored: NameError("name 'x' is not defined")
[34m[1mwandb[0m: [32m[41mERROR[0m Run non8xir6 errored: NameError("name 'x' is not defined")
[34m[1mwandb[0m: Agent Starting Run: n8a9kpjs with config:
[34m[1mwandb[0m: 	activation_type: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layers: [128, 32, 32, 16]
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: momentum
[34m[1mwandb[0m: 	weight_decay: 0





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Train_acc,▁▅▆▇▇▇▇▇██
Train_loss,█▄▃▂▂▂▁▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
val_Accuracy,▁▆▄▇▇█▆███
val_loss,█▄▆▃▃▁▃▁▁▂

0,1
Train_acc,86.87037
Train_loss,0.36043
epoch,9.0
val_Accuracy,85.96667
val_loss,0.3942


[34m[1mwandb[0m: Agent Starting Run: fio1gw8m with config:
[34m[1mwandb[0m: 	activation_type: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layers: [64, 32, 16]
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: adam
[34m[1mwandb[0m: 	weight_decay: 0





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Train_acc,▁▁▁▁▁▁▁▁▁▁
Train_loss,█▁▁▁▁▁▁▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
val_Accuracy,▁▁▁▁▁▁▁▁▁▁
val_loss,▁▁▁▁▁▁▁▁▁▁

0,1
Train_acc,10.01987
Train_loss,2.2036
epoch,9.0
val_Accuracy,9.83333
val_loss,2.32626


[34m[1mwandb[0m: Agent Starting Run: tg0psi17 with config:
[34m[1mwandb[0m: 	activation_type: ReLU
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layers: [128, 64, 32, 16]
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Train_acc,▁▅▆▇▇▇████
Train_loss,█▃▃▂▂▂▁▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
val_Accuracy,▁▄▆▆▇▇████
val_loss,█▅▄▃▃▂▂▁▁▁

0,1
Train_acc,86.49659
Train_loss,0.38482
epoch,9.0
val_Accuracy,86.03333
val_loss,0.39606


[34m[1mwandb[0m: Agent Starting Run: maagm05h with config:
[34m[1mwandb[0m: 	activation_type: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layers: [64, 32, 16]
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: adam
[34m[1mwandb[0m: 	weight_decay: 0





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Train_acc,▁▁▁▁▁
Train_loss,█▁▁▁▁
epoch,▁▃▅▆█
val_Accuracy,▁▁▁▁▁
val_loss,▁▁▁▁▁

0,1
Train_acc,9.97963
Train_loss,2.31954
epoch,4.0
val_Accuracy,10.18333
val_loss,2.32186


[34m[1mwandb[0m: Agent Starting Run: 62ci4c7g with config:
[34m[1mwandb[0m: 	activation_type: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layers: [128, 64, 32]
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: adam
[34m[1mwandb[0m: 	weight_decay: 0.5





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Train_acc,▁▁▁▁▁▁▁▁▁▁
Train_loss,█▁▁▁▁▁▁▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
val_Accuracy,▁▁▁▁▁▁▁▁▁▁
val_loss,▁▁▁▁▁▁▁▁▁▁

0,1
Train_acc,9.98593
Train_loss,2.35629
epoch,9.0
val_Accuracy,10.13333
val_loss,2.36886


[34m[1mwandb[0m: Agent Starting Run: lctnlgfp with config:
[34m[1mwandb[0m: 	activation_type: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layers: [128, 64, 32, 16]
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: nesterov
[34m[1mwandb[0m: 	weight_decay: 0.5





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

Run lctnlgfp errored: NameError("name 'x' is not defined")
[34m[1mwandb[0m: [32m[41mERROR[0m Run lctnlgfp errored: NameError("name 'x' is not defined")
[34m[1mwandb[0m: Agent Starting Run: kwzzwnph with config:
[34m[1mwandb[0m: 	activation_type: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layers: [64, 32]
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.0005





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Train_acc,▁▄▆▆▇▇▇███
Train_loss,█▄▃▃▂▂▂▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
val_Accuracy,▁▄▅▇▆▇████
val_loss,█▅▄▂▃▂▁▁▁▂

0,1
Train_acc,90.37407
Train_loss,0.2632
epoch,9.0
val_Accuracy,87.88333
val_loss,0.34966


[34m[1mwandb[0m: Agent Starting Run: mqs1u2ji with config:
[34m[1mwandb[0m: 	activation_type: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layers: [128, 64, 32, 16]
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: adam
[34m[1mwandb[0m: 	weight_decay: 0.0005





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Train_acc,▁▁▁▁▁▁▁▁▁▁
Train_loss,█▁▁▁▁▁▁▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
val_Accuracy,▁▁▁▁▁▁▁▁▁▁
val_loss,▁▁▁▁▁▁▁▁▁▁

0,1
Train_acc,9.95816
Train_loss,2.29288
epoch,9.0
val_Accuracy,10.35
val_loss,2.31074


[34m[1mwandb[0m: Agent Starting Run: g6aeyxs3 with config:
[34m[1mwandb[0m: 	activation_type: ReLU
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layers: [128, 64, 32, 16]
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: momentum
[34m[1mwandb[0m: 	weight_decay: 0.5





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Train_acc,▁▅▆▇▇▇█▇██
Train_loss,█▄▃▂▂▂▂▂▁▁
epoch,▁▂▃▃▄▅▆▆▇█
val_Accuracy,▁▃▅▅▄▆▆▇█▇
val_loss,█▇▃▃▇▃▃▃▁▄

0,1
Train_acc,88.40344
Train_loss,0.32271
epoch,9.0
val_Accuracy,86.98333
val_loss,0.40859


[34m[1mwandb[0m: Agent Starting Run: 10yjfpnh with config:
[34m[1mwandb[0m: 	activation_type: ReLU
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layers: [64, 32, 16]
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.0005





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Train_acc,▁▆▇██
Train_loss,█▃▂▁▁
epoch,▁▃▅▆█
val_Accuracy,▁▆▇██
val_loss,█▄▂▁▁

0,1
Train_acc,84.86374
Train_loss,0.43495
epoch,4.0
val_Accuracy,85.05
val_loss,0.43526


[34m[1mwandb[0m: Agent Starting Run: 2pfy7lrx with config:
[34m[1mwandb[0m: 	activation_type: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layers: [64, 32, 16]
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: sgd
[34m[1mwandb[0m: 	weight_decay: 0





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Train_acc,▁▆▇██
Train_loss,█▃▂▁▁
epoch,▁▃▅▆█
val_Accuracy,▁▄▆██
val_loss,█▅▃▂▁

0,1
Train_acc,87.45927
Train_loss,0.34753
epoch,4.0
val_Accuracy,86.96667
val_loss,0.36325


[34m[1mwandb[0m: Agent Starting Run: sbg4d8yq with config:
[34m[1mwandb[0m: 	activation_type: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layers: [64, 32, 16]
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: momentum
[34m[1mwandb[0m: 	weight_decay: 0.0005





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Train_acc,▁▆▇██
Train_loss,█▃▂▁▁
epoch,▁▃▅▆█
val_Accuracy,▁▆▇██
val_loss,█▄▂▁▁

0,1
Train_acc,85.48333
Train_loss,0.4141
epoch,4.0
val_Accuracy,85.51667
val_loss,0.40692
