In [1]:
import numpy as np
import wandb
import matplotlib.pyplot as plt
import sys
import math
from keras.datasets import fashion_mnist,mnist
import sklearn
from sklearn.model_selection import train_test_split
from copy import deepcopy

2025-03-16 18:36:29.133676: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-03-16 18:36:29.144621: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1742130389.159024   13387 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1742130389.163122   13387 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-03-16 18:36:29.178136: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instr

In [8]:
class Neural_Net:
    def __init__(self,input_shape,number_of_hidden_layers,hidden_neurons_per_layer,activation_function,output_shape,type_of_init,L2reg_const):
        self.input_shape = input_shape
        self.number_of_hidden_layers = number_of_hidden_layers
        self.hidden_neurons_per_layer = hidden_neurons_per_layer
        self.output_shape = output_shape
        self.activation_function = activation_function
        self.type_of_init = type_of_init
        self.L2reg_const = L2reg_const
        self.W, self.B = self.weight_init(type_of_init)

    def weight_init(self,type_of_init):
        W = []
        B = []
        
        if type_of_init == 'Xavier':    
            W.append(np.random.randn(self.input_shape, self.hidden_neurons_per_layer[0]) * np.sqrt(6 / (self.input_shape + self.hidden_neurons_per_layer[0])))
            for i in range(1, self.number_of_hidden_layers):
                W.append(np.random.randn(self.hidden_neurons_per_layer[i-1], self.hidden_neurons_per_layer[i]) *np.sqrt(6 / (self.hidden_neurons_per_layer[i-1] + self.hidden_neurons_per_layer[i])))
        
            W.append(np.random.randn(self.hidden_neurons_per_layer[-1], self.output_shape)*np.sqrt(6 / (self.hidden_neurons_per_layer[-1] + self.output_shape)))
        elif type_of_init == 'random':            
            W.append(np.random.rand(self.input_shape,self.hidden_neurons_per_layer[0])-0.5)
            for i in range(1,self.number_of_hidden_layers):
                W.append(np.random.rand(self.hidden_neurons_per_layer[i-1],self.hidden_neurons_per_layer[i])-0.5)
            W.append(np.random.rand(self.hidden_neurons_per_layer[-1],self.output_shape)-0.5)
            
        B.append(np.random.rand(1,self.hidden_neurons_per_layer[0])-0.5)
        for i in range(1,self.number_of_hidden_layers):
            B.append(np.random.rand(1,self.hidden_neurons_per_layer[i])-0.5)
        
        B.append(np.random.rand(1,self.output_shape)-0.5)
        return W,B

    def feed_forward(self,data,W,B):
        A = []         #pre-activation output list
        H = []         #post-ativation output list

        input_d = data
        for i in range(self.number_of_hidden_layers):
            linear_out = np.dot(input_d,W[i]) + B[i]
            activation_out = self.activation(linear_out)
            A.append(linear_out)
            H.append(activation_out)
            input_d = activation_out

        y_pred = self.softmax(np.dot(H[-1],W[-1]) + B[-1])
        return A,H,y_pred

    def activation(self,x):
        if self.activation_function == 'ReLU':
            return self.ReLU(x)
        elif self.activation_function == 'tanh':
            return self.tanh(x)
        elif self.activation_function == 'sigmoid':
            return self.sigmoid(x)
        elif self.activation_function == 'linear':
            return x

    def activation_derivative(self,x):
        if self.activation_function == 'ReLU':
            return self.ReLU_derivative(x)
        elif self.activation_function == 'tanh':
            return self.tanh_derivative(x)
        elif self.activation_function == 'sigmoid':
            return self.sigmoid_derivative(x)
        elif self.activation_function == 'linear':
            return 1

    def ReLU(self,x):
        return np.maximum(0,x)

    def ReLU_derivative(self,x):
        return np.where(x > 0, 1, 0)
    
    def sigmoid(self,x):
        # x = np.clip(x, -500, 500)
        return 1/(1+np.exp(-x))

    def sigmoid_derivative(self,x):
        return self.sigmoid(x)*(1-self.sigmoid(x))

    def tanh(self,x):
        return np.tanh(x)

    def tanh_derivative(self,x):
        return 1 - self.tanh(x)**2

    def softmax(self,x):
        exp_x = np.exp(x - np.max(x, axis=1, keepdims=True))
        out = exp_x / np.sum(exp_x, axis=1, keepdims=True)
        return out

    def loss(self,label,pred):
        pred = np.clip(pred, 1e-15, 1 - 1e-15)
        loss = -np.sum(label*np.log(pred+1e-18),axis=1)
        return np.mean(loss)

    def accuracy(self,data,label):
        A,H,y_prob = self.feed_forward(data,self.W,self.B)
        acc = 0
        for i in range(len(data)):
            if np.argmax(label[i]) == np.argmax(y_prob[i]):
                acc += 1
        return acc/data.shape[0]
        
    def sgd_train(self,epochs,learning_rate,batch_size,train_data,train_label,test_data,test_label):
        num_batches = train_data.shape[0]//batch_size
        remaining_data = train_data.shape[0]%batch_size
        for ep in range(epochs):
            for b_id in range(num_batches+1):                
                batch_start = b_id * batch_size
                batch_end = min((b_id + 1) * batch_size, train_data.shape[0])
                batch_data = train_data[batch_start:batch_end]
                batch_label = train_label[batch_start:batch_end]

                A,H,y_prob = self.feed_forward(batch_data,self.W,self.B)
                
                dw,db = self.backpropagation(A,H,self.W,self.B,batch_data,y_prob,batch_label)
                for i in range(self.number_of_hidden_layers+1):
                    self.W[i] -= learning_rate * (dw[i])
                    self.B[i] -= learning_rate * db[i]
                
                if(b_id+1)%100 == 0:
                    _,_,test_y_prob = self.feed_forward(test_data,self.W,self.B)
                    test_loss = self.loss(test_label,test_y_prob)
                    train_acc = self.accuracy(batch_data,batch_label)
                    train_loss = self.loss(batch_label,y_prob)
                    test_acc = self.accuracy(test_data,test_label)
                    wandb.log({'train_loss': train_loss, 'train_acc': train_acc, 'val_loss': test_loss, 'val_acc': test_acc})
                    sys.stdout.write(f"\rEpoch {ep + 1}/{epochs} - Batch {b_id + 1}/{num_batches} - train-loss: {train_loss:.6f} train-acc: {train_acc:.6f} test-loss:{test_loss:.6f} test-acc : {test_acc:.6f} ")
                    sys.stdout.flush()
            print()

    def momentum_train(self,epochs,learning_rate,batch_size,train_data,train_label,test_data,test_label,beta=0.9):
        momentum_W = [np.zeros_like(w) for w in self.W]
        momentum_B = [np.zeros_like(b) for b in self.B]
        num_batches = train_data.shape[0]//batch_size
        remaining_data = train_data.shape[0]%batch_size
        for ep in range(epochs):
            for b_id in range(num_batches+1):                
                batch_start = b_id * batch_size
                batch_end = min((b_id + 1) * batch_size, train_data.shape[0])
                batch_data = train_data[batch_start:batch_end]
                batch_label = train_label[batch_start:batch_end]

                W_copy = deepcopy(self.W)
                B_copy = deepcopy(self.B)
                A,H,y_prob = self.feed_forward(batch_data,W_copy,B_copy)

                
                dw,db = self.backpropagation(A,H,W_copy,B_copy,batch_data,y_prob,batch_label)
                for i in range(self.number_of_hidden_layers+1):
                    momentum_W[i] = beta*momentum_W[i] + (1-beta)*(dw[i])
                    momentum_B[i] = beta*momentum_B[i] + (1-beta)*db[i]
                    self.W[i] -= learning_rate * momentum_W[i]
                    self.B[i] -= learning_rate * momentum_B[i]
                
                if(b_id+1)%100 == 0:
                    _,_,test_y_prob = self.feed_forward(test_data,self.W,self.B)
                    test_loss = self.loss(test_label,test_y_prob)
                    train_acc = self.accuracy(batch_data,batch_label)
                    train_loss = self.loss(batch_label,y_prob)
                    test_acc = self.accuracy(test_data,test_label)
                    wandb.log({'train_loss': train_loss, 'train_acc': train_acc, 'val_loss': test_loss, 'val_acc': test_acc})
                    sys.stdout.write(f"\rEpoch {ep + 1}/{epochs} - Batch {b_id + 1}/{num_batches} - train-loss: {train_loss:.6f} train-acc: {train_acc:.6f} test-loss:{test_loss:.6f} test-acc : {test_acc:.6f} ")
                    sys.stdout.flush()
            print()

    def nestrov_train(self,epochs,learning_rate,batch_size,train_data,train_label,test_data,test_label,beta=0.9):
        momentum_W = [np.zeros_like(w) for w in self.W]
        momentum_B = [np.zeros_like(b) for b in self.B]
        num_batches = train_data.shape[0]//batch_size
        remaining_data = train_data.shape[0]%batch_size
        for ep in range(epochs):
            for b_id in range(num_batches+1):                
                batch_start = b_id * batch_size
                batch_end = min((b_id + 1) * batch_size, train_data.shape[0])
                batch_data = train_data[batch_start:batch_end]
                batch_label = train_label[batch_start:batch_end]

                # we want grad(wt - beta*momentum), so feed forward through this value and get grads
                W_lookahead = [w - beta*v for w,v in zip(self.W,momentum_W)]
                B_lookahead = [b - beta*v for b,v in zip(self.B,momentum_B)]
                
                
                A,H,y_prob = self.feed_forward(batch_data,W_lookahead,B_lookahead)
                dw,db = self.backpropagation(A,H,W_lookahead,B_lookahead,batch_data,y_prob,batch_label)
                for i in range(self.number_of_hidden_layers+1):
                    # dw[i] += self.L2reg_const * self.W[i]
                    momentum_W[i] = beta*momentum_W[i] + (dw[i])
                    momentum_B[i] = beta*momentum_B[i] + db[i]
                    
                    self.W[i] -= learning_rate * momentum_W[i]
                    self.B[i] -= learning_rate * momentum_B[i]
                
                if(b_id+1)%100 == 0:
                    _,_,test_y_prob = self.feed_forward(test_data,self.W,self.B)
                    test_loss = self.loss(test_label,test_y_prob)
                    train_acc = self.accuracy(batch_data,batch_label)
                    train_loss = self.loss(batch_label,y_prob)
                    test_acc = self.accuracy(test_data,test_label)
                    wandb.log({'train_loss': train_loss, 'train_acc': train_acc, 'val_loss': test_loss, 'val_acc': test_acc})
                    sys.stdout.write(f"\rEpoch {ep + 1}/{epochs} - Batch {b_id + 1}/{num_batches} - train-loss: {train_loss:.6f} train-acc: {train_acc:.6f} test-loss:{test_loss:.6f} test-acc : {test_acc:.6f} ")
                    sys.stdout.flush()
            print()

    def RMSprop_train(self,epochs,learning_rate,batch_size,train_data,train_label,test_data,test_label,beta,eps):
        v_W = [np.zeros_like(w) for w in self.W]
        v_B = [np.zeros_like(b) for b in self.B]
        num_batches = train_data.shape[0]//batch_size
        remaining_data = train_data.shape[0]%batch_size
        for ep in range(epochs):
            for b_id in range(num_batches+1):                
                batch_start = b_id * batch_size
                batch_end = min((b_id + 1) * batch_size, train_data.shape[0])
                batch_data = train_data[batch_start:batch_end]
                batch_label = train_label[batch_start:batch_end]

                W_copy = deepcopy(self.W)
                B_copy = deepcopy(self.B)
                A,H,y_prob = self.feed_forward(batch_data,W_copy,B_copy)

                
                
                dw,db = self.backpropagation(A,H,W_copy,B_copy,batch_data,y_prob,batch_label)
                for i in range(self.number_of_hidden_layers+1):
                    # dw[i] += self.L2reg_const * self.W[i]
                    
                    v_W[i] = beta*v_W[i] + (1-beta)*(dw[i]**2)
                    v_B[i] = beta*v_B[i] + (1-beta)*(db[i]**2)

                    adaptive_lr_w = learning_rate/(np.sqrt(v_W[i]+eps))
                    adaptive_lr_b = learning_rate/(np.sqrt(v_B[i]+eps))
                    self.W[i] -= adaptive_lr_w * (dw[i])
                    self.B[i] -= adaptive_lr_b * db[i]
                
                if(b_id+1)%100 == 0:
                    _,_,test_y_prob = self.feed_forward(test_data,self.W,self.B)
                    test_loss = self.loss(test_label,test_y_prob)
                    train_acc = self.accuracy(batch_data,batch_label)
                    train_loss = self.loss(batch_label,y_prob)
                    test_acc = self.accuracy(test_data,test_label)
                    wandb.log({'train_loss': train_loss, 'train_acc': train_acc, 'val_loss': test_loss, 'val_acc': test_acc})
                    sys.stdout.write(f"\rEpoch {ep + 1}/{epochs} - Batch {b_id + 1}/{num_batches} - train-loss: {train_loss:.6f} train-acc: {train_acc:.6f} test-loss:{test_loss:.6f} test-acc : {test_acc:.6f} ")
                    sys.stdout.flush()
            print()   

    def adam_train(self,epochs,learning_rate,batch_size,train_data,train_label,test_data,test_label,beta1=0.9,beta2=0.999,eps=1e-8):
        momentum1_W = [np.zeros_like(w) for w in self.W]
        momentum1_B = [np.zeros_like(b) for b in self.B]
        momentum2_W = [np.zeros_like(w) for w in self.W]
        momentum2_B = [np.zeros_like(b) for b in self.B]
        t = 0
        num_batches = train_data.shape[0]//batch_size
        remaining_data = train_data.shape[0]%batch_size
        for ep in range(epochs):
            for b_id in range(num_batches+1):
                t += 1
                batch_start = b_id * batch_size
                batch_end = min((b_id + 1) * batch_size, train_data.shape[0])
                batch_data = train_data[batch_start:batch_end]
                batch_label = train_label[batch_start:batch_end]

                W_copy = deepcopy(self.W)
                B_copy = deepcopy(self.B)
                A,H,y_prob = self.feed_forward(batch_data,W_copy,B_copy)
    
                
                dw,db = self.backpropagation(A,H,W_copy,B_copy,batch_data,y_prob,batch_label)
                for i in range(self.number_of_hidden_layers+1):
                    momentum1_W[i] = beta1*momentum1_W[i] + (1-beta1)*(dw[i])
                    momentum1_B[i] = beta1*momentum1_B[i] + (1-beta1)*(db[i])
    
                    momentum2_W[i] = beta2*momentum2_W[i] + (1-beta2)*(dw[i]**2)
                    momentum2_B[i] = beta2*momentum2_B[i] + (1-beta2)*(db[i]**2)
    
                    momentum1_W_hat = momentum1_W[i]/(1-(beta1**t))
                    momentum1_B_hat = momentum1_B[i]/(1-(beta1**t))
    
                    momentum2_W_hat = momentum2_W[i]/(1-(beta2**t))
                    momentum2_B_hat = momentum2_B[i]/(1-(beta2**t))
    
                    adaptive_lr_W = learning_rate/(np.sqrt(momentum2_W_hat) + eps)
                    adaptive_lr_B = learning_rate/(np.sqrt(momentum2_B_hat) + eps)
    
                    self.W[i] -= adaptive_lr_W * (momentum1_W_hat)
                    self.B[i] -= adaptive_lr_B * momentum1_B_hat
                    
                
                if(b_id+1)%100 == 0:
                    _,_,test_y_prob = self.feed_forward(test_data,self.W,self.B)
                    test_loss = self.loss(test_label,test_y_prob)
                    train_acc = self.accuracy(batch_data,batch_label)
                    train_loss = self.loss(batch_label,y_prob)
                    test_acc = self.accuracy(test_data,test_label)
                    # wandb.log({'train_loss': train_loss, 'train_acc': train_acc, 'val_loss': test_loss, 'val_acc': test_acc})
                    sys.stdout.write(f"\rEpoch {ep + 1}/{epochs} - Batch {b_id + 1}/{num_batches} - train-loss: {train_loss:.6f} train-acc: {train_acc:.6f} test-loss:{test_loss:.6f} test-acc : {test_acc:.6f} ")
                    sys.stdout.flush()
            print()

    def nadam_train(self,epochs,learning_rate,batch_size,train_data,train_label,test_data,test_label,beta1=0.9,beta2=0.999,eps=1e-8):
        momentum1_W = [np.zeros_like(w) for w in self.W]
        momentum1_B = [np.zeros_like(b) for b in self.B]
        momentum2_W = [np.zeros_like(w) for w in self.W]
        momentum2_B = [np.zeros_like(b) for b in self.B]
        t = 0
        num_batches = train_data.shape[0]//batch_size
        remaining_data = train_data.shape[0]%batch_size
        for ep in range(epochs):
            for b_id in range(num_batches+1):
                t += 1
                batch_start = b_id * batch_size
                batch_end = min((b_id + 1) * batch_size, train_data.shape[0])
                batch_data = train_data[batch_start:batch_end]
                batch_label = train_label[batch_start:batch_end]
                W_copy = deepcopy(self.W)
                B_copy = deepcopy(self.B)
                A,H,y_prob = self.feed_forward(batch_data,W_copy,B_copy)
                
                dw,db = self.backpropagation(A,H,W_copy,B_copy,batch_data,y_prob,batch_label)
                for i in range(self.number_of_hidden_layers+1):
                    # dw[i] += self.L2reg_const * self.W[i]
                    momentum1_W[i] = beta1*momentum1_W[i] + (1-beta1)*(dw[i])
                    momentum1_B[i] = beta1*momentum1_B[i] + (1-beta1)*(db[i])
    
                    momentum2_W[i] = beta2*momentum2_W[i] + (1-beta2)*(dw[i]**2)
                    momentum2_B[i] = beta2*momentum2_B[i] + (1-beta2)*(db[i]**2)
    
                    momentum1_W_hat = momentum1_W[i]/(1-(beta1**t))
                    momentum1_B_hat = momentum1_B[i]/(1-(beta1**t))
    
                    momentum2_W_hat = momentum2_W[i]/(1-(beta2**t))
                    momentum2_B_hat = momentum2_B[i]/(1-(beta2**t))
    
                    m_nestrov_W = beta1 * momentum1_W_hat + ((1 - beta1) * dw[i])/(1-beta1**t)
                    m_nestrov_B = beta1 * momentum1_B_hat + ((1 - beta1) * db[i])/(1-beta1**t)
    
                    adaptive_lr_W = learning_rate / (np.sqrt(momentum2_W_hat) + eps)
                    adaptive_lr_B = learning_rate / (np.sqrt(momentum2_B_hat) + eps)
    
                    self.W[i] -= adaptive_lr_W * m_nestrov_W
                    self.B[i] -= adaptive_lr_B * m_nestrov_B
                    
                
                if(b_id+1)%100 == 0:
                    _,_,test_y_prob = self.feed_forward(test_data,self.W,self.B)
                    test_loss = self.loss(test_label,test_y_prob)
                    train_acc = self.accuracy(batch_data,batch_label)
                    train_loss = self.loss(batch_label,y_prob)
                    test_acc = self.accuracy(test_data,test_label)
                    wandb.log({'train_loss': train_loss, 'train_acc': train_acc, 'val_loss': test_loss, 'val_acc': test_acc})
                    sys.stdout.write(f"\rEpoch {ep + 1}/{epochs} - Batch {b_id + 1}/{num_batches} - train-loss: {train_loss:.6f} train-acc: {train_acc:.6f} test-loss:{test_loss:.6f} test-acc : {test_acc:.6f} ")
                    sys.stdout.flush()
            print()
        
    def train(self,optimizer,epochs,learning_rate,batch_size,train_data,train_label,test_data,test_label):
        if optimizer == 'sgd':
            self.sgd_train(epochs,learning_rate,batch_size,train_data,train_label,test_data,test_label)
        elif optimizer == 'momentum':
            self.momentum_train(epochs,learning_rate,batch_size,train_data,train_label,test_data,test_label,0.9)
        elif optimizer == 'nestrov':
            self.nestrov_train(epochs,learning_rate,batch_size,train_data,train_label,test_data,test_label,0.9)
        elif optimizer == 'RMSprop':
            self.RMSprop_train(epochs,learning_rate,batch_size,train_data,train_label,test_data,test_label,0.8,1e-8)
        elif optimizer == 'adam':
            self.adam_train(epochs,learning_rate,batch_size,train_data,train_label,test_data,test_label,0.9,0.999,1e-8)
        elif optimizer == 'nadam':
            self.nadam_train(epochs,learning_rate,batch_size,train_data,train_label,test_data,test_label,0.9,0.999,1e-8)
            

    def backpropagation(self,A,H,W,B,batch_data,y_prob,batch_label):
        dW = []
        db = []
        
        # Calculate initial error (cross-entropy gradient for softmax)
        error = y_prob - batch_label  
        # Backpropagate through output layer (softmax)
        dW.insert(0, np.dot(H[-1].T, error)+self.L2reg_const * W[-1])
        db.insert(0, np.sum(error,axis=0,keepdims=True))
        
        # Backpropagate through hidden layers
        delta = error
        for i in range(self.number_of_hidden_layers - 1, 0, -1):
            delta = np.dot(delta, W[i+1].T) * self.activation_derivative(A[i])
            dW.insert(0, np.dot(H[i-1].T, delta)+ self.L2reg_const * W[i])
            db.insert(0, np.sum(delta, axis=0,keepdims=True))
        
        # Backpropagate through input layer
        delta = np.dot(delta, W[1].T) * self.activation_derivative(A[0])
        dW.insert(0, np.dot(batch_data.T, delta)+ self.L2reg_const * W[0])
        db.insert(0, np.sum(delta, axis=0,keepdims=True))
        # print(dW)
        # input()
        return dW, db



    

In [5]:
def train():
    var1 = wandb.init(project='dl-assignment1-experiment')
    config = var1.config

    input_size = train_data.shape[1]
    num_hidden_layers = config.num_of_hidden_layers

    num_neurons_each_layer = [ config.hidden_layer_size ] * num_hidden_layers 
    activation_function = config.activation_function
    type_of_init = config.weight_initialization
    L2reg_const = config.weight_decay

    run_name = f"hl_{config.num_of_hidden_layers}_bs_{config.batch_size}_ac_{config.activation_function}"
    
    wandb.run.name = run_name
    wandb.run.save()

    print(f"Starting training with run name: {run_name}")
    nn = Neural_Net(
        number_of_hidden_layers=num_hidden_layers,
        hidden_neurons_per_layer=num_neurons_each_layer,
        activation_function=activation_function,
        input_shape=input_size,
        type_of_init=type_of_init,
        L2reg_const=L2reg_const,
        output_shape = 10,
    )
    
    nn.train(
        optimizer=config.optimizer,
        epochs=config.epochs,
        learning_rate=config.learning_rate,
        train_data=train_data,  # Make sure to define train_data and labels
        train_label=train_label,
        test_data=val_data,
        test_label=val_label,
        batch_size=config.batch_size
    )

In [6]:
sweep_config = {
  'name': 'fashion-mnist-exp(random-select)-2.10',
  'method': 'bayes',
  'metric': {'goal': 'maximize', 'name': 'val_acc'},
  'parameters': {
        'num_of_hidden_layers':{'values':[3,4,5]},
        'hidden_layer_size': {'values': [32, 64, 128]},
      'activation_function': {'values': ['sigmoid', 'tanh', 'ReLU']},
      'batch_size': {'values': [16, 32, 64]},
      'epochs': {'values': [5, 10]},
      'learning_rate': {'values': [1e-3,1e-4]},
      'optimizer': {'values': ['sgd', 'momentum', 'nesterov', 'RMSprop', 'adam', 'nadam']},
      'weight_initialization': {'values': ['random', 'Xavier']},
      'weight_decay': {'values': [0, 0.0005,0.5]},
  }
}

In [7]:
sweep_id = wandb.sweep(sweep_config,project='dl-assignment1-experiment')
wandb.agent(sweep_id,train,count=50)

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


Create sweep with ID: hfa2u67m
Sweep URL: https://wandb.ai/cs24s031-indian-institute-of-technology-madras/dl-assignment1-experiment/sweeps/hfa2u67m


[34m[1mwandb[0m: Agent Starting Run: 5x2w277x with config:
[34m[1mwandb[0m: 	activation_function: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layer_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_of_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: nesterov
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_initialization: Xavier
[34m[1mwandb[0m: Currently logged in as: [33mcs24s031[0m ([33mcs24s031-indian-institute-of-technology-madras[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin




Starting training with run name: hl_5_bs_16_ac_sigmoid


[34m[1mwandb[0m: Agent Starting Run: 06kn8ph4 with config:
[34m[1mwandb[0m: 	activation_function: ReLU
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layer_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_of_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_initialization: Xavier


Starting training with run name: hl_3_bs_64_ac_ReLU
Epoch 1/5 - Batch 800/843 - train-loss: 0.638509 train-acc: 0.734375 test-loss:0.437303 test-acc : 0.840500 
Epoch 2/5 - Batch 800/843 - train-loss: 0.544990 train-acc: 0.765625 test-loss:0.377942 test-acc : 0.861167 
Epoch 3/5 - Batch 800/843 - train-loss: 0.504163 train-acc: 0.812500 test-loss:0.365339 test-acc : 0.867333 
Epoch 4/5 - Batch 800/843 - train-loss: 0.446966 train-acc: 0.796875 test-loss:0.348804 test-acc : 0.871167 
Epoch 5/5 - Batch 800/843 - train-loss: 0.407431 train-acc: 0.812500 test-loss:0.335916 test-acc : 0.875833 


0,1
train_acc,▁▇▃▃▅▆▅▁▆▇▃▅▅▇▇▂▇▇▃▅▅▇█▃▇▇▄▇▇▇█▃▇▇▄▆▇▇▇▃
train_loss,▆▃█▇▅▅▂█▃▃▆▅▄▄▂▇▂▂▆▅▃▃▂▆▂▂▅▄▃▃▁▅▂▂▄▄▃▃▁▅
val_acc,▁▃▄▃▅▆▆▅▆▆▆▆▆▇▇▇▇▇▇▇▇▇█▇▇▇▇██▇███▇██████
val_loss,█▅▅▅▄▃▃▃▃▂▃▂▂▂▂▂▂▂▂▂▂▂▁▂▂▂▂▁▁▂▁▁▁▂▁▁▁▂▁▁

0,1
train_acc,0.8125
train_loss,0.40743
val_acc,0.87583
val_loss,0.33592


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: ro996ths with config:
[34m[1mwandb[0m: 	activation_function: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layer_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_of_hidden_layers: 4
[34m[1mwandb[0m: 	optimizer: nesterov
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_initialization: Xavier


Starting training with run name: hl_4_bs_32_ac_tanh


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: i7gy8xmv with config:
[34m[1mwandb[0m: 	activation_function: ReLU
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layer_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_of_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: RMSprop
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_initialization: Xavier


Starting training with run name: hl_5_bs_16_ac_ReLU
Epoch 1/5 - Batch 3300/3375 - train-loss: 0.563206 train-acc: 0.937500 test-loss:0.544193 test-acc : 0.802000 
Epoch 2/5 - Batch 3300/3375 - train-loss: 0.434479 train-acc: 0.937500 test-loss:0.578895 test-acc : 0.836667 
Epoch 3/5 - Batch 3300/3375 - train-loss: 0.313870 train-acc: 0.875000 test-loss:0.698552 test-acc : 0.768667 
Epoch 4/5 - Batch 3300/3375 - train-loss: 0.426276 train-acc: 0.937500 test-loss:0.582190 test-acc : 0.815167 
Epoch 5/5 - Batch 3300/3375 - train-loss: 0.507809 train-acc: 0.812500 test-loss:0.671549 test-acc : 0.812833 


0,1
train_acc,▁▆▅▅▅▅▆▆▆▆▃██▆▁▅▅█▃▁▁▅█▁█▁█▆▃▃▅▁▆█▆▃▁█▆█
train_loss,▂▂▂▂▃▁▁▁▁▂▁▁▁▁▂▂▂▂▂▂▅▁▁▂▄▂▁▅▂▂▂▂▁▁▂▂█▂▃▂
val_acc,▁▂▂▄▄▆▄▅▆▇▆█▇▇▇▇█▇█▇▇█▇▇▆█▆█▅▇▆▆█▆▇█▃█▇█
val_loss,▅▃▂▄▁▁▂▁▁▁▁▂▂▃▂▄▂▃▁▂▂▅▃▂▃▄▄▄▃▂▂▂▅▃▃▅█▂▂▄

0,1
train_acc,0.8125
train_loss,0.50781
val_acc,0.81283
val_loss,0.67155


[34m[1mwandb[0m: Agent Starting Run: 42xp1tbd with config:
[34m[1mwandb[0m: 	activation_function: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layer_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_of_hidden_layers: 4
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_initialization: random


Starting training with run name: hl_4_bs_32_ac_sigmoid
Epoch 1/5 - Batch 1600/1687 - train-loss: 2.315431 train-acc: 0.093750 test-loss:2.302738 test-acc : 0.103167 
Epoch 2/5 - Batch 1600/1687 - train-loss: 2.315431 train-acc: 0.093750 test-loss:2.302738 test-acc : 0.103167 
Epoch 3/5 - Batch 1600/1687 - train-loss: 2.315431 train-acc: 0.093750 test-loss:2.302738 test-acc : 0.103167 
Epoch 4/5 - Batch 1600/1687 - train-loss: 2.315431 train-acc: 0.093750 test-loss:2.302738 test-acc : 0.103167 
Epoch 5/5 - Batch 1600/1687 - train-loss: 2.315431 train-acc: 0.093750 test-loss:2.302738 test-acc : 0.103167 


0,1
train_acc,▃▁▂▆▂▅▂▅█▆▃▅▂▂▅▅▃▂▃▅▇█▅▆▅▅▂▆█▅▅▃▂▆▂▂▆▅▂█
train_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_acc,▁▇▄▄▁▇▄▄▃▄▁▄▇▄▄▄▄▃▄▃▁█▇▇▄▄▄▃▆▃▁█▄▃▆▄█▇▄▇
val_loss,▁▄▃▅▃▂▄▁▄▅▅█▂▄▁▅▁▄▃▅▅▃█▂▄▁▄▃▂▃▄▄▃▁▃▅▃█▂▁

0,1
train_acc,0.09375
train_loss,2.31543
val_acc,0.10317
val_loss,2.30274


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: qverlvfv with config:
[34m[1mwandb[0m: 	activation_function: ReLU
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layer_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_of_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_initialization: Xavier


Starting training with run name: hl_3_bs_32_ac_ReLU
Epoch 1/5 - Batch 1600/1687 - train-loss: 0.540969 train-acc: 0.781250 test-loss:0.524432 test-acc : 0.818833 
Epoch 2/5 - Batch 1600/1687 - train-loss: 0.461502 train-acc: 0.812500 test-loss:0.469998 test-acc : 0.835500 
Epoch 3/5 - Batch 1600/1687 - train-loss: 0.432141 train-acc: 0.812500 test-loss:0.440778 test-acc : 0.845833 
Epoch 4/5 - Batch 1600/1687 - train-loss: 0.405985 train-acc: 0.843750 test-loss:0.421516 test-acc : 0.850667 
Epoch 5/5 - Batch 1600/1687 - train-loss: 0.393034 train-acc: 0.843750 test-loss:0.408044 test-acc : 0.857167 


0,1
train_acc,▃▁▇▂▅▅▃▆▄▃▅▅▄▅▅▅▆▆▄▆▅▄▆▇▄▇▅▅▄▅▇▅▆▅█▇▄▅▇▅
train_loss,▆█▃▆▆▆▅▃▄▃▁▄▆▅▄▃▄▅▄▅▆▅▄▄▂▃▄▄▄▄▃▂▃▄▁▄▃▅▃▂
val_acc,▁▅▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇██▇█▇▇▇████▇████████
val_loss,█▅▆▅▄▃▃▃▃▃▃▃▃▂▃▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▂▁▁▁▁▁▁▁▁

0,1
train_acc,0.84375
train_loss,0.39303
val_acc,0.85717
val_loss,0.40804


[34m[1mwandb[0m: Agent Starting Run: 08kxd6l7 with config:
[34m[1mwandb[0m: 	activation_function: ReLU
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layer_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_of_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_initialization: Xavier


Starting training with run name: hl_3_bs_32_ac_ReLU
Epoch 1/10 - Batch 1600/1687 - train-loss: 0.622370 train-acc: 0.750000 test-loss:0.517791 test-acc : 0.815667 
Epoch 2/10 - Batch 1600/1687 - train-loss: 0.597428 train-acc: 0.750000 test-loss:0.527341 test-acc : 0.811333 
Epoch 3/10 - Batch 1600/1687 - train-loss: 0.567578 train-acc: 0.750000 test-loss:0.521322 test-acc : 0.813333 
Epoch 4/10 - Batch 1600/1687 - train-loss: 0.557149 train-acc: 0.750000 test-loss:0.516112 test-acc : 0.816833 
Epoch 5/10 - Batch 1600/1687 - train-loss: 0.553761 train-acc: 0.718750 test-loss:0.512186 test-acc : 0.818000 
Epoch 6/10 - Batch 1600/1687 - train-loss: 0.553926 train-acc: 0.718750 test-loss:0.508912 test-acc : 0.818833 
Epoch 7/10 - Batch 1600/1687 - train-loss: 0.551040 train-acc: 0.718750 test-loss:0.506504 test-acc : 0.821167 
Epoch 8/10 - Batch 1600/1687 - train-loss: 0.551410 train-acc: 0.718750 test-loss:0.506302 test-acc : 0.820167 
Epoch 9/10 - Batch 1600/1687 - train-loss: 0.549035 

0,1
train_acc,▇▄▄▂▆▇█▄▁▇▇▂▄▁▇▃▅▅▅▂▃▄▃▅▇▆▅█▄▅▃▄▂█▄▄▃█▃▆
train_loss,▁▆█▆▂▂▆▆▆▃▅▂▃▃▅▅▃▆▅▆▁▆▅▄▃▃█▃▅▅▆▅▆▅▄▅▆▆▆▃
val_acc,▁▂▄▃▇▄▆▂▃▃▄▁▃█▅▇▅▄▁▅▅▃▆▂▆▅▅▅▆▆▆▆▅▆▇▇█▆▇▆
val_loss,█▂▃▁▂▄▂▄▄▃▃▃▂▂▄▂▃▄▁▂▅▂▃▂▂▁▂▃▂▂▂▂▃▂▂▁▁▂▂▃

0,1
train_acc,0.75
train_loss,0.53908
val_acc,0.82283
val_loss,0.50403


[34m[1mwandb[0m: Agent Starting Run: 8z9c9wqa with config:
[34m[1mwandb[0m: 	activation_function: ReLU
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layer_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_of_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: nesterov
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_initialization: Xavier


Starting training with run name: hl_3_bs_64_ac_ReLU


[34m[1mwandb[0m: Agent Starting Run: bjfjk0ea with config:
[34m[1mwandb[0m: 	activation_function: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layer_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_of_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_initialization: random


Starting training with run name: hl_5_bs_16_ac_sigmoid
Epoch 1/10 - Batch 3300/3375 - train-loss: 2.330401 train-acc: 0.062500 test-loss:2.315073 test-acc : 0.094333 
Epoch 2/10 - Batch 3300/3375 - train-loss: 2.330387 train-acc: 0.062500 test-loss:2.315049 test-acc : 0.094333 
Epoch 3/10 - Batch 3300/3375 - train-loss: 2.330405 train-acc: 0.062500 test-loss:2.315048 test-acc : 0.094333 
Epoch 4/10 - Batch 3300/3375 - train-loss: 2.330417 train-acc: 0.062500 test-loss:2.315047 test-acc : 0.094333 
Epoch 5/10 - Batch 3300/3375 - train-loss: 2.330423 train-acc: 0.062500 test-loss:2.315047 test-acc : 0.094333 
Epoch 6/10 - Batch 3300/3375 - train-loss: 2.330427 train-acc: 0.062500 test-loss:2.315046 test-acc : 0.094333 
Epoch 7/10 - Batch 3300/3375 - train-loss: 2.330431 train-acc: 0.062500 test-loss:2.315045 test-acc : 0.094333 
Epoch 8/10 - Batch 3300/3375 - train-loss: 2.330434 train-acc: 0.062500 test-loss:2.315045 test-acc : 0.094333 
Epoch 9/10 - Batch 3300/3375 - train-loss: 2.3304

0,1
train_acc,▅▃▅▆▁█▆▃▅▁▅▃▅▆▅▆▅▃▃▁▃▅▅▃▅▅▃▅▁▁▃▁▅▁▆▆▁▅▃▅
train_loss,▆▆▅▅▄▆▇▁▆▆▅▃▄▆▆▄▅▄█▆█▅▅▅▄▅▅▆▆▅▇▃▃▅▄▆▆▆▄▅
val_acc,▆▇▃▁▄▄▄▇▄▄▇▁▃▄▇▆▁▆▆▇▄▆▄▁▆▄▄▇▇▁▁█▄▇▄▆▆▄▃▄
val_loss,▇▅▆▅▂▁▃▅▇▁▃▁▃▃▆▃█▃▂▃▄▆▃█▅▂▂▆▂█▃▃▆▂▄▃▃▃▆▁

0,1
train_acc,0.0625
train_loss,2.33044
val_acc,0.09433
val_loss,2.31504


[34m[1mwandb[0m: Agent Starting Run: pv4hxh85 with config:
[34m[1mwandb[0m: 	activation_function: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layer_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_of_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_initialization: Xavier


Starting training with run name: hl_3_bs_32_ac_sigmoid
Epoch 1/10 - Batch 1600/1687 - train-loss: 2.100288 train-acc: 0.343750 test-loss:2.068513 test-acc : 0.299667 
Epoch 2/10 - Batch 1600/1687 - train-loss: 1.807203 train-acc: 0.250000 test-loss:1.779620 test-acc : 0.304500 
Epoch 3/10 - Batch 1600/1687 - train-loss: 1.777738 train-acc: 0.250000 test-loss:1.755450 test-acc : 0.290833 
Epoch 4/10 - Batch 1600/1687 - train-loss: 1.764734 train-acc: 0.281250 test-loss:1.745440 test-acc : 0.297833 
Epoch 5/10 - Batch 1600/1687 - train-loss: 1.752412 train-acc: 0.281250 test-loss:1.736321 test-acc : 0.331167 
Epoch 6/10 - Batch 1600/1687 - train-loss: 1.738156 train-acc: 0.343750 test-loss:1.725756 test-acc : 0.341833 
Epoch 7/10 - Batch 1600/1687 - train-loss: 1.722905 train-acc: 0.406250 test-loss:1.713915 test-acc : 0.345667 
Epoch 8/10 - Batch 1600/1687 - train-loss: 1.709909 train-acc: 0.406250 test-loss:1.703240 test-acc : 0.355667 
Epoch 9/10 - Batch 1600/1687 - train-loss: 1.7006

0,1
train_acc,▁▅▆▄▅▅▅▄▅▃▅█▄▃▅▅▄▄▄▃▇▃▄▄▃▅▅█▇▅▄▄▆▇▅▇▃▅▅▅
train_loss,██▇▅▅▂▂▃▃▂▃▂▂▂▂▁▂▃▂▂▂▂▁▂▂▂▂▂▂▂▁▂▂▂▂▁▂▁▂▁
val_acc,▁▄▁▂▄▅▃▁▆▄▂▃▅▃▃▄▄▄▅▃▅▅▄▇▅▆▅▆▅▆▆▆█▅▆█▆▅▆█
val_loss,██▇▆▅▄▃▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
train_acc,0.40625
train_loss,1.69444
val_acc,0.36
val_loss,1.68976


[34m[1mwandb[0m: Agent Starting Run: 6qqlw1n5 with config:
[34m[1mwandb[0m: 	activation_function: ReLU
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layer_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_of_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_initialization: random


Starting training with run name: hl_3_bs_64_ac_ReLU
Epoch 1/5 - Batch 800/843 - train-loss: 2.326782 train-acc: 0.109375 test-loss:2.317472 test-acc : 0.103167 
Epoch 2/5 - Batch 800/843 - train-loss: 2.311429 train-acc: 0.109375 test-loss:2.306259 test-acc : 0.103167 
Epoch 3/5 - Batch 800/843 - train-loss: 2.306073 train-acc: 0.109375 test-loss:2.303184 test-acc : 0.103167 
Epoch 4/5 - Batch 800/843 - train-loss: 2.304065 train-acc: 0.109375 test-loss:2.302454 test-acc : 0.103167 
Epoch 5/5 - Batch 800/843 - train-loss: 2.303253 train-acc: 0.109375 test-loss:2.302370 test-acc : 0.103167 


0,1
train_acc,▁▇▁▆█▃▂▅▁▇▁▆█▃▂▅▁▇▁▆█▃▂▅▂▇▁▆█▃▂▅▂▇▁▆▆▃▂▅
train_loss,█▄▃▁▃▃▆▃▄▂▂▁▂▂▄▂▃▂▂▁▂▂▃▂▂▂▁▁▂▂▂▂▂▂▁▁▂▂▂▂
val_acc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁█▁▁▁▁▁█▁█▁▁▁█▁█▁
val_loss,█▇▆▅▅▄▄▃▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
train_acc,0.10938
train_loss,2.30325
val_acc,0.10317
val_loss,2.30237


[34m[1mwandb[0m: Agent Starting Run: h1r51u0p with config:
[34m[1mwandb[0m: 	activation_function: ReLU
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layer_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_of_hidden_layers: 4
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_initialization: random


Starting training with run name: hl_4_bs_16_ac_ReLU
Epoch 1/5 - Batch 3300/3375 - train-loss: 28.061944 train-acc: 0.125000 test-loss:29.643548 test-acc : 0.140833 
Epoch 2/5 - Batch 3300/3375 - train-loss: 12.951666 train-acc: 0.625000 test-loss:13.382713 test-acc : 0.603833 
Epoch 3/5 - Batch 3300/3375 - train-loss: 5.704693 train-acc: 0.875000 test-loss:6.766394 test-acc : 0.767000   
Epoch 4/5 - Batch 3300/3375 - train-loss: 2.774138 train-acc: 0.812500 test-loss:4.840392 test-acc : 0.765833  
Epoch 5/5 - Batch 3300/3375 - train-loss: 1.356067 train-acc: 0.750000 test-loss:1.645040 test-acc : 0.766500 


0,1
train_acc,▂▂▁▁▂▂▂▁▁▁▄▄▂▃▄▆▆▅▇▇▇▇▇▆█▇▆▆▇▆▇▆▇▆▆▇▇█▆▆
train_loss,▇▇▇█▇▇▇█▆▆▆▄▅▄▄▂▃▂▂▃▃▂▃▃▂▂▁▂▁▂▂▃▁▂▂▃▂▂▂▁
val_acc,▁▁▁▁▁▁▁▁▄▄▆▇▇▆▇█▇▇████████████████████▇█
val_loss,████████▇▇▅▅▅▄▄▄▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▂▁▁▁

0,1
train_acc,0.75
train_loss,1.35607
val_acc,0.7665
val_loss,1.64504


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: h4j87mmn with config:
[34m[1mwandb[0m: 	activation_function: ReLU
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layer_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_of_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: nesterov
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_initialization: Xavier


Starting training with run name: hl_3_bs_64_ac_ReLU


[34m[1mwandb[0m: Agent Starting Run: 4v21capj with config:
[34m[1mwandb[0m: 	activation_function: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layer_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_of_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_initialization: Xavier


Starting training with run name: hl_5_bs_64_ac_tanh
Epoch 1/10 - Batch 800/843 - train-loss: 0.529459 train-acc: 0.828125 test-loss:0.473603 test-acc : 0.830500 
Epoch 2/10 - Batch 800/843 - train-loss: 0.504223 train-acc: 0.828125 test-loss:0.426920 test-acc : 0.849333 
Epoch 3/10 - Batch 800/843 - train-loss: 0.444650 train-acc: 0.843750 test-loss:0.419208 test-acc : 0.851167 
Epoch 4/10 - Batch 800/843 - train-loss: 0.415047 train-acc: 0.859375 test-loss:0.416495 test-acc : 0.853167 
Epoch 5/10 - Batch 800/843 - train-loss: 0.406344 train-acc: 0.875000 test-loss:0.404024 test-acc : 0.856500 
Epoch 6/10 - Batch 800/843 - train-loss: 0.421740 train-acc: 0.875000 test-loss:0.395326 test-acc : 0.861500 
Epoch 7/10 - Batch 800/843 - train-loss: 0.438942 train-acc: 0.875000 test-loss:0.393912 test-acc : 0.862167 
Epoch 8/10 - Batch 800/843 - train-loss: 0.450181 train-acc: 0.843750 test-loss:0.393826 test-acc : 0.862333 
Epoch 9/10 - Batch 800/843 - train-loss: 0.458909 train-acc: 0.84375

0,1
train_acc,▁▃▅▇▃█▇▃▄▆█▅▆▄▆▅▇▇▅▇▇▇▄██▅▅▇▆▇█▇▅▅▇█▇▇▅▅
train_loss,▅█▆▄▂▆▅▄▃▂▂▁▅▄▂▁▂▅▄▂▂▁▄▃▂▁▁▂▄▁▂▁▁▄▂▄▂▁▂▄
val_acc,▁▂▂▄▄▅▆▆▆▆▆▆▇▆▆▇▇▇▆▆▇▇▇▇▇▇▇█▇▇▇█▇▇▇▇█▇▇▇
val_loss,█▅▅▄▄▃▃▃▂▂▂▂▂▂▂▁▂▂▂▁▁▁▁▁▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
train_acc,0.84375
train_loss,0.45286
val_acc,0.86433
val_loss,0.39411


[34m[1mwandb[0m: Agent Starting Run: 4tsfa35a with config:
[34m[1mwandb[0m: 	activation_function: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layer_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_of_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_initialization: random


Starting training with run name: hl_3_bs_64_ac_sigmoid
Epoch 1/5 - Batch 800/843 - train-loss: 2.361183 train-acc: 0.156250 test-loss:2.335734 test-acc : 0.102333 
Epoch 2/5 - Batch 800/843 - train-loss: 2.361183 train-acc: 0.156250 test-loss:2.335734 test-acc : 0.102333 
Epoch 3/5 - Batch 800/843 - train-loss: 2.361183 train-acc: 0.156250 test-loss:2.335734 test-acc : 0.102333 
Epoch 4/5 - Batch 800/843 - train-loss: 2.361183 train-acc: 0.156250 test-loss:2.335734 test-acc : 0.102333 
Epoch 5/5 - Batch 800/843 - train-loss: 2.361183 train-acc: 0.156250 test-loss:2.335734 test-acc : 0.102333 


0,1
train_acc,▅▁▅█▅▁▅▅▅▁▅█▅▁▅▅▅▁▅█▅▁▅▅▅▁▅█▅▁▅▅▅▁▅█▅▁▅▅
train_loss,▃▇▁█▃▇▄▄▃▇▁█▃▇▄▄▃▇▁█▃▇▄▄▃▇▁█▃▇▄▄▃▇▁█▃▇▄▄
val_acc,▁▁▁█▇▁▂▅▁▁▁█▇▁▂▅▁▁▁█▇▁▂▅▁▁▁█▇▁▂▅▁▁▁█▇▁▂▅
val_loss,▆▁█▄▅▂▄▃▆▁█▄▅▂▄▃▆▁█▄▅▂▄▃▆▁█▄▅▂▄▃▆▁█▄▅▂▄▃

0,1
train_acc,0.15625
train_loss,2.36118
val_acc,0.10233
val_loss,2.33573


[34m[1mwandb[0m: Agent Starting Run: nmhz8d7s with config:
[34m[1mwandb[0m: 	activation_function: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layer_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_of_hidden_layers: 4
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_initialization: Xavier


Starting training with run name: hl_4_bs_32_ac_tanh
Epoch 1/10 - Batch 1600/1687 - train-loss: 0.598239 train-acc: 0.750000 test-loss:0.551170 test-acc : 0.806167 
Epoch 2/10 - Batch 1600/1687 - train-loss: 0.497064 train-acc: 0.812500 test-loss:0.480216 test-acc : 0.831833 
Epoch 3/10 - Batch 1600/1687 - train-loss: 0.446249 train-acc: 0.812500 test-loss:0.445909 test-acc : 0.845000 
Epoch 4/10 - Batch 1600/1687 - train-loss: 0.417349 train-acc: 0.812500 test-loss:0.424092 test-acc : 0.852333 
Epoch 5/10 - Batch 1600/1687 - train-loss: 0.399639 train-acc: 0.843750 test-loss:0.408492 test-acc : 0.859500 
Epoch 6/10 - Batch 1600/1687 - train-loss: 0.387234 train-acc: 0.843750 test-loss:0.396530 test-acc : 0.862333 
Epoch 7/10 - Batch 1600/1687 - train-loss: 0.378026 train-acc: 0.843750 test-loss:0.386963 test-acc : 0.864833 
Epoch 8/10 - Batch 1600/1687 - train-loss: 0.370963 train-acc: 0.843750 test-loss:0.379094 test-acc : 0.867333 
Epoch 9/10 - Batch 1600/1687 - train-loss: 0.365375 

0,1
train_acc,▃▁▃▆▃▃▂▂▆▆▃▂▄▇▆▃▃▄▃▆▄▃▅▆▆▃▆▇▆▆▇█▆▄▄▆▆▇▃▆
train_loss,▆█▅▆▄▄▂▃▂▂▄▇▄▃▂▂▄▃▄▅▃▁▂▃▂▂▁▃▃▃▃▄▃▃▂▂▁▂▆▁
val_acc,▁▂▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇███▇████▇█████
val_loss,█▅▃▃▃▂▂▂▂▂▂▂▂▂▂▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
train_acc,0.8125
train_loss,0.36085
val_acc,0.87
val_loss,0.36683


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: igdutdr9 with config:
[34m[1mwandb[0m: 	activation_function: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layer_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_of_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: nesterov
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_initialization: Xavier


Starting training with run name: hl_5_bs_32_ac_tanh


[34m[1mwandb[0m: Agent Starting Run: v2uah48o with config:
[34m[1mwandb[0m: 	activation_function: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layer_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_of_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_initialization: Xavier


Starting training with run name: hl_5_bs_16_ac_tanh
Epoch 1/10 - Batch 3300/3375 - train-loss: 0.727864 train-acc: 0.875000 test-loss:0.537027 test-acc : 0.806833 
Epoch 2/10 - Batch 3300/3375 - train-loss: 0.615931 train-acc: 0.875000 test-loss:0.464910 test-acc : 0.833667 
Epoch 3/10 - Batch 3300/3375 - train-loss: 0.590026 train-acc: 0.875000 test-loss:0.432259 test-acc : 0.848833 
Epoch 4/10 - Batch 3300/3375 - train-loss: 0.577843 train-acc: 0.875000 test-loss:0.412225 test-acc : 0.854167 
Epoch 5/10 - Batch 3300/3375 - train-loss: 0.566853 train-acc: 0.875000 test-loss:0.398073 test-acc : 0.859667 
Epoch 6/10 - Batch 3300/3375 - train-loss: 0.555590 train-acc: 0.875000 test-loss:0.387283 test-acc : 0.860833 
Epoch 7/10 - Batch 3300/3375 - train-loss: 0.543989 train-acc: 0.875000 test-loss:0.378695 test-acc : 0.861833 
Epoch 8/10 - Batch 3300/3375 - train-loss: 0.532266 train-acc: 0.875000 test-loss:0.371678 test-acc : 0.863667 
Epoch 9/10 - Batch 3300/3375 - train-loss: 0.520752 

0,1
train_acc,█▁▅▇▅▄▃▅▇▅▇█▄▃▃▇█▇▅▄▆▇▇▆█▇▆▆▅▇▇▆▇▆▇▄▅▄▇▅
train_loss,▇▄▇█▄▅▃▂▃▄▄▆▂▃▄▅▆▂▃▂▁▅▂▃▂▃▃▃▄▃▃▂▁▃▁▄▁▃▂▄
val_acc,▁▁▂▃▄▄▅▅▅▅▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇█▇▇█▇█████████
val_loss,█▆▄▄▃▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
train_acc,0.875
train_loss,0.50975
val_acc,0.8685
val_loss,0.36093


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: grmtgqrw with config:
[34m[1mwandb[0m: 	activation_function: ReLU
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layer_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_of_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_initialization: Xavier


Starting training with run name: hl_5_bs_16_ac_ReLU
Epoch 1/5 - Batch 3300/3375 - train-loss: 0.458330 train-acc: 0.812500 test-loss:0.405488 test-acc : 0.851167 
Epoch 2/5 - Batch 3300/3375 - train-loss: 0.448167 train-acc: 0.875000 test-loss:0.384101 test-acc : 0.860333 
Epoch 3/5 - Batch 3300/3375 - train-loss: 0.466247 train-acc: 0.875000 test-loss:0.376514 test-acc : 0.866167 
Epoch 4/5 - Batch 3300/3375 - train-loss: 0.582539 train-acc: 0.875000 test-loss:0.364501 test-acc : 0.869167 
Epoch 5/5 - Batch 3300/3375 - train-loss: 0.470253 train-acc: 0.875000 test-loss:0.357949 test-acc : 0.868833 


0,1
train_acc,▄▇▅▅▁▂▇▂▄▄▇▄▇█▅██▅▄▇▅▅▇▅▇▇▄▂▄▅▇▇▄▄▅▅▂▅▄█
train_loss,▄▃▅▂▃█▃▃▄▁▆▁▃▅▂▃▃▃▂▂▃▅▂▃▃▃▂▂▁▄▁▂▃▃▂▂▂▃▂▄
val_acc,▁▅▅▆▆▇▆▇▇▇▇▇▆▇▇▇▇▇▇▇█▇▇▇▇██▇████▇████▇██
val_loss,██▅▄▄▄▄▃▃▄▃▂▄▃▃▃▃▂▃▂▃▃▂▂▂▂▂▂▂▁▂▂▂▂▂▁▂▁▁▂

0,1
train_acc,0.875
train_loss,0.47025
val_acc,0.86883
val_loss,0.35795


[34m[1mwandb[0m: Agent Starting Run: m40bc69w with config:
[34m[1mwandb[0m: 	activation_function: ReLU
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layer_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_of_hidden_layers: 4
[34m[1mwandb[0m: 	optimizer: RMSprop
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_initialization: Xavier


Starting training with run name: hl_4_bs_64_ac_ReLU
Epoch 1/10 - Batch 800/843 - train-loss: 0.568014 train-acc: 0.859375 test-loss:0.481400 test-acc : 0.825500 
Epoch 2/10 - Batch 800/843 - train-loss: 0.771992 train-acc: 0.875000 test-loss:0.419379 test-acc : 0.859000 
Epoch 3/10 - Batch 800/843 - train-loss: 0.392286 train-acc: 0.890625 test-loss:0.520120 test-acc : 0.837167 
Epoch 4/10 - Batch 800/843 - train-loss: 0.352823 train-acc: 0.859375 test-loss:0.501049 test-acc : 0.846833 
Epoch 5/10 - Batch 800/843 - train-loss: 0.493624 train-acc: 0.875000 test-loss:0.450769 test-acc : 0.857667 
Epoch 6/10 - Batch 800/843 - train-loss: 0.354506 train-acc: 0.890625 test-loss:0.543059 test-acc : 0.837500 
Epoch 7/10 - Batch 800/843 - train-loss: 0.447164 train-acc: 0.890625 test-loss:0.487420 test-acc : 0.850833 
Epoch 8/10 - Batch 800/843 - train-loss: 0.363185 train-acc: 0.875000 test-loss:0.462935 test-acc : 0.857500 
Epoch 9/10 - Batch 800/843 - train-loss: 0.385318 train-acc: 0.90625

0,1
train_acc,▃▁▃▆▂▆▁▅▆▇▅▃▅█▆▅█▇▆▆▇▃▅▃▆▄▇▆▆▆▇▅▃▅▄▆▇▄▆▅
train_loss,█▆▄▃▅▂▄▇▄▃█▄▆▃▃▂▅▁▃▃▅▃▁▃▄▅▂▇▃▁▄▃▁▁▂▁▁▃▃▃
val_acc,▂▁▅▅▄▇▇▅▆▇▆▇▇▅▇▆▇▇█▇██▇▇▆█████▇▇▇███▇▇▇▇
val_loss,█▆▅▃▂▄▂▁▂▂▃▂▁▄▁▄▁▁▁▃▂▂▁▁▂▃▁▁▄▂▃▂▂▂▄▂▄▂▄▂

0,1
train_acc,0.92188
train_loss,0.31834
val_acc,0.867
val_loss,0.46949


[34m[1mwandb[0m: Agent Starting Run: 8zioh84o with config:
[34m[1mwandb[0m: 	activation_function: ReLU
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layer_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_of_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_initialization: Xavier


Starting training with run name: hl_3_bs_32_ac_ReLU
Epoch 1/10 - Batch 1600/1687 - train-loss: 0.599617 train-acc: 0.750000 test-loss:0.511347 test-acc : 0.819333 
Epoch 2/10 - Batch 1600/1687 - train-loss: 0.552947 train-acc: 0.750000 test-loss:0.459068 test-acc : 0.839667 
Epoch 3/10 - Batch 1600/1687 - train-loss: 0.531804 train-acc: 0.687500 test-loss:0.433657 test-acc : 0.848000 
Epoch 4/10 - Batch 1600/1687 - train-loss: 0.508047 train-acc: 0.656250 test-loss:0.417528 test-acc : 0.855833 
Epoch 5/10 - Batch 1600/1687 - train-loss: 0.474980 train-acc: 0.687500 test-loss:0.405972 test-acc : 0.860167 
Epoch 6/10 - Batch 1600/1687 - train-loss: 0.452693 train-acc: 0.718750 test-loss:0.396511 test-acc : 0.862167 
Epoch 7/10 - Batch 1600/1687 - train-loss: 0.429227 train-acc: 0.718750 test-loss:0.388515 test-acc : 0.864833 
Epoch 8/10 - Batch 1600/1687 - train-loss: 0.410437 train-acc: 0.812500 test-loss:0.382158 test-acc : 0.866667 
Epoch 9/10 - Batch 1600/1687 - train-loss: 0.388491 

0,1
train_acc,▂▂▇▂▄▅▃▃▁▅▅▂▅▅▆▇▁▄▂▃█▂▇▅▄▆▅▅▆▂▃▆█▅▇▇█▅▅▆
train_loss,▆▆▂▆▅▄▃▃▅▁▂▃█▂▄▅▃▂▃▃▄▄▁▄▅▄▇▃▂▂▇▃▁▁▁▄▃▂▁▃
val_acc,▁▁▁▁▂▄▃▄▄▄▅▅▅▅▆▆▆▆▆▅▆▆▆▆▆▇▇▇▇▇▇▇█▇██▇▇██
val_loss,█▆▄▄▄▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▂▁▁

0,1
train_acc,0.78125
train_loss,0.37513
val_acc,0.86967
val_loss,0.3711


[34m[1mwandb[0m: Agent Starting Run: w69jaf9s with config:
[34m[1mwandb[0m: 	activation_function: ReLU
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layer_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_of_hidden_layers: 4
[34m[1mwandb[0m: 	optimizer: RMSprop
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_initialization: Xavier


Starting training with run name: hl_4_bs_64_ac_ReLU
Epoch 1/10 - Batch 800/843 - train-loss: 0.570544 train-acc: 0.906250 test-loss:0.441171 test-acc : 0.837333 
Epoch 2/10 - Batch 800/843 - train-loss: 0.549253 train-acc: 0.890625 test-loss:0.466562 test-acc : 0.841667 
Epoch 3/10 - Batch 800/843 - train-loss: 0.439348 train-acc: 0.906250 test-loss:0.461961 test-acc : 0.834500 
Epoch 4/10 - Batch 800/843 - train-loss: 0.456388 train-acc: 0.921875 test-loss:0.509625 test-acc : 0.838833 
Epoch 5/10 - Batch 800/843 - train-loss: 0.554013 train-acc: 0.906250 test-loss:0.493730 test-acc : 0.833000 
Epoch 6/10 - Batch 800/843 - train-loss: 0.486797 train-acc: 0.890625 test-loss:0.495980 test-acc : 0.852000 
Epoch 7/10 - Batch 800/843 - train-loss: 0.570401 train-acc: 0.890625 test-loss:0.474562 test-acc : 0.850333 
Epoch 8/10 - Batch 800/843 - train-loss: 0.442650 train-acc: 0.890625 test-loss:0.415561 test-acc : 0.873833 
Epoch 9/10 - Batch 800/843 - train-loss: 0.507900 train-acc: 0.90625

0,1
train_acc,▁▆▆▅▆█▄▆▆▆▅▅▆▄▇▆▇▅▄▇▆▅▆▆▆▄▆█▆▆▆▆▇▇█▆▆▇▇▅
train_loss,▇▅▇█▂▄▁▃▆▃▄▃▄▆▂▃▃▆▃▃▁▅▁▄▂▅▂▁▅▁▆▂▂▄▂▂▁▁▂▅
val_acc,▁▃▅▅▅▆▅▇▇▅▇▇▅▇▇█▇▅▇█▅█▇▅▆█▇▇█▆▇▇▇██▇▇▇▆█
val_loss,█▇▇▄▄▄▃▂▄▂▃▂▄▁▂▂▁▃▃▁▃▂▂▁▂▁▂▂▁▂▂▃▂▁▁▃▄▃▂▁

0,1
train_acc,0.90625
train_loss,0.57974
val_acc,0.875
val_loss,0.44398


[34m[1mwandb[0m: Agent Starting Run: 38oeb79l with config:
[34m[1mwandb[0m: 	activation_function: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layer_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_of_hidden_layers: 4
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_initialization: Xavier


Starting training with run name: hl_4_bs_16_ac_tanh
Epoch 1/10 - Batch 3300/3375 - train-loss: 0.691990 train-acc: 0.750000 test-loss:0.569332 test-acc : 0.802167 
Epoch 2/10 - Batch 3300/3375 - train-loss: 0.555147 train-acc: 0.875000 test-loss:0.488478 test-acc : 0.829500 
Epoch 3/10 - Batch 3300/3375 - train-loss: 0.495020 train-acc: 0.875000 test-loss:0.450976 test-acc : 0.841833 
Epoch 4/10 - Batch 3300/3375 - train-loss: 0.466350 train-acc: 0.875000 test-loss:0.428249 test-acc : 0.848333 
Epoch 5/10 - Batch 3300/3375 - train-loss: 0.449772 train-acc: 0.875000 test-loss:0.412596 test-acc : 0.854833 
Epoch 6/10 - Batch 3300/3375 - train-loss: 0.437613 train-acc: 0.875000 test-loss:0.400971 test-acc : 0.858333 
Epoch 7/10 - Batch 3300/3375 - train-loss: 0.427212 train-acc: 0.875000 test-loss:0.391881 test-acc : 0.861333 
Epoch 8/10 - Batch 3300/3375 - train-loss: 0.417724 train-acc: 0.875000 test-loss:0.384482 test-acc : 0.865667 
Epoch 9/10 - Batch 3300/3375 - train-loss: 0.408883 

0,1
train_acc,▃▅▇▇▆▄▄▇▇▁▆█▇▇▆▇▅▄▇▅▇▇▄▇▆▅▆▇▇▆▇▆▇▆▄▇▇▇▄▆
train_loss,█▅▆▄▂▃▃▃▄▃▂▃▇▄▆▃▆▁▃▃▄▄▂▄▃▄▃▁▂▄▁▃▄▄▃▅▂▂▁▃
val_acc,▁▂▂▂▄▄▄▅▅▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇█████████████
val_loss,█▆▅▄▃▃▃▃▃▃▃▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
train_acc,0.875
train_loss,0.40059
val_acc,0.869
val_loss,0.37292


[34m[1mwandb[0m: Agent Starting Run: ktgf8pqr with config:
[34m[1mwandb[0m: 	activation_function: ReLU
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layer_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_of_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_initialization: Xavier


Starting training with run name: hl_5_bs_64_ac_ReLU
Epoch 1/5 - Batch 800/843 - train-loss: 0.577482 train-acc: 0.828125 test-loss:0.483902 test-acc : 0.820500 
Epoch 2/5 - Batch 800/843 - train-loss: 0.437280 train-acc: 0.859375 test-loss:0.420018 test-acc : 0.846833 
Epoch 3/5 - Batch 800/843 - train-loss: 0.424196 train-acc: 0.906250 test-loss:0.397224 test-acc : 0.857167 
Epoch 4/5 - Batch 800/843 - train-loss: 0.406849 train-acc: 0.906250 test-loss:0.389667 test-acc : 0.859333 
Epoch 5/5 - Batch 800/843 - train-loss: 0.393877 train-acc: 0.875000 test-loss:0.394184 test-acc : 0.859833 


0,1
train_acc,▁▂▁▂▄▅▇▂▅▆▄▄▇▅▇▄▅▇▃▅▇▆█▅▆▇▄▄▇▆█▅▆▇▃▅█▆█▄
train_loss,▇▆█▇▅▅▃▆▄▃▅▅▂▄▂▄▃▂▅▅▂▃▂▄▃▂▅▅▂▃▁▄▂▂▄▄▂▂▁▄
val_acc,▁▂▁▄▄▆▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇███▇▇████████████
val_loss,██▇▅▅▃▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▁▂▁▁▂▂▁▁▁▁▁▁▁▁▁▁▁▂

0,1
train_acc,0.875
train_loss,0.39388
val_acc,0.85983
val_loss,0.39418


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: fyjz5l82 with config:
[34m[1mwandb[0m: 	activation_function: ReLU
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layer_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_of_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: nesterov
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_initialization: Xavier


Starting training with run name: hl_3_bs_16_ac_ReLU


[34m[1mwandb[0m: Agent Starting Run: q0si9seh with config:
[34m[1mwandb[0m: 	activation_function: ReLU
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layer_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_of_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: nesterov
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_initialization: Xavier


Starting training with run name: hl_5_bs_64_ac_ReLU


[34m[1mwandb[0m: Agent Starting Run: mnbqxjjr with config:
[34m[1mwandb[0m: 	activation_function: ReLU
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layer_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_of_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_initialization: Xavier


Starting training with run name: hl_5_bs_32_ac_ReLU
Epoch 1/10 - Batch 1600/1687 - train-loss: 0.590592 train-acc: 0.750000 test-loss:0.481869 test-acc : 0.828833 
Epoch 2/10 - Batch 1600/1687 - train-loss: 0.602208 train-acc: 0.687500 test-loss:0.497876 test-acc : 0.823500 
Epoch 3/10 - Batch 1600/1687 - train-loss: 0.607154 train-acc: 0.718750 test-loss:0.496297 test-acc : 0.826167 
Epoch 4/10 - Batch 1600/1687 - train-loss: 0.575102 train-acc: 0.750000 test-loss:0.490668 test-acc : 0.826667 
Epoch 5/10 - Batch 1600/1687 - train-loss: 0.571074 train-acc: 0.750000 test-loss:0.481705 test-acc : 0.830833 
Epoch 6/10 - Batch 1600/1687 - train-loss: 0.556258 train-acc: 0.750000 test-loss:0.475093 test-acc : 0.832333 
Epoch 7/10 - Batch 1600/1687 - train-loss: 0.551282 train-acc: 0.750000 test-loss:0.473111 test-acc : 0.832500 
Epoch 8/10 - Batch 1600/1687 - train-loss: 0.542672 train-acc: 0.750000 test-loss:0.467933 test-acc : 0.836500 
Epoch 9/10 - Batch 1600/1687 - train-loss: 0.545436 

0,1
train_acc,▆█▅▄▃▂▁▄▂▅▄▂▆▆▅▄▂▆▆▅▆▃▆▄▄▆▃▆▅▃▅▆▃▃▆▆▆▄▃▃
train_loss,█▄▂▅▅▄▃▂▅▅█▅▃▅▁▁▅█▃▂▅▂▄▅▄▂▁▄▅▃▄▁▄▅▆▄▁█▄▂
val_acc,▂▅▆▅▂▃▇▅▅▁▄▂▆▄▄▅▅▆▄▄▅▇▆▇▂▆▆▁██▂██▇▃▂█▇▇█
val_loss,█▂▁▂▃▃▃▅▃▅▆▅▄▃▃▃▄▅▃▂▄▃▃▃▂▄▂▂▃▂▂▁▃▂▅▂▂▂▁▁

0,1
train_acc,0.78125
train_loss,0.54258
val_acc,0.836
val_loss,0.46879


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: ht2j5vhf with config:
[34m[1mwandb[0m: 	activation_function: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layer_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_of_hidden_layers: 4
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_initialization: Xavier


Starting training with run name: hl_4_bs_16_ac_tanh
Epoch 1/10 - Batch 3300/3375 - train-loss: 0.944716 train-acc: 0.687500 test-loss:0.537797 test-acc : 0.807000 
Epoch 2/10 - Batch 3300/3375 - train-loss: 0.869299 train-acc: 0.687500 test-loss:0.476674 test-acc : 0.828333 
Epoch 3/10 - Batch 3300/3375 - train-loss: 0.829362 train-acc: 0.687500 test-loss:0.446055 test-acc : 0.837500 
Epoch 4/10 - Batch 3300/3375 - train-loss: 0.799706 train-acc: 0.687500 test-loss:0.426326 test-acc : 0.847667 
Epoch 5/10 - Batch 3300/3375 - train-loss: 0.775552 train-acc: 0.687500 test-loss:0.412092 test-acc : 0.850333 
Epoch 6/10 - Batch 3300/3375 - train-loss: 0.754681 train-acc: 0.687500 test-loss:0.401125 test-acc : 0.852500 
Epoch 7/10 - Batch 3300/3375 - train-loss: 0.735662 train-acc: 0.687500 test-loss:0.392316 test-acc : 0.854833 
Epoch 8/10 - Batch 3300/3375 - train-loss: 0.717808 train-acc: 0.687500 test-loss:0.385042 test-acc : 0.856333 
Epoch 9/10 - Batch 3300/3375 - train-loss: 0.701046 

0,1
train_acc,▁█▇▂▂▇▄▄▇▇▅▄▄▁▅▇▅▇▁█▇▄▂▄▂▄▇▇▇▄▂▂▇▄▇▇▇▅▅▇
train_loss,▇▄▅▁▃▃▃▂▃▃▆█▁▅█▆▅▂▁▃▂▁▁▁▄▅▂▂▄▂▂▄▂▂▇▃▅▁▂▂
val_acc,▁▃▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇██▇█████████████
val_loss,█▄▄▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
train_acc,0.6875
train_loss,0.68554
val_acc,0.861
val_loss,0.37365


[34m[1mwandb[0m: Agent Starting Run: 9w2uyrsr with config:
[34m[1mwandb[0m: 	activation_function: ReLU
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layer_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_of_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_initialization: Xavier


Starting training with run name: hl_3_bs_32_ac_ReLU
Epoch 1/10 - Batch 1600/1687 - train-loss: 0.701892 train-acc: 0.718750 test-loss:0.545902 test-acc : 0.805000 
Epoch 2/10 - Batch 1600/1687 - train-loss: 0.647614 train-acc: 0.750000 test-loss:0.486744 test-acc : 0.824000 
Epoch 3/10 - Batch 1600/1687 - train-loss: 0.612026 train-acc: 0.750000 test-loss:0.455940 test-acc : 0.836333 
Epoch 4/10 - Batch 1600/1687 - train-loss: 0.595177 train-acc: 0.750000 test-loss:0.436457 test-acc : 0.843500 
Epoch 5/10 - Batch 1600/1687 - train-loss: 0.576747 train-acc: 0.750000 test-loss:0.421114 test-acc : 0.848500 
Epoch 6/10 - Batch 1600/1687 - train-loss: 0.567770 train-acc: 0.750000 test-loss:0.409393 test-acc : 0.852167 
Epoch 7/10 - Batch 1600/1687 - train-loss: 0.555474 train-acc: 0.781250 test-loss:0.399533 test-acc : 0.856333 
Epoch 8/10 - Batch 1600/1687 - train-loss: 0.542609 train-acc: 0.812500 test-loss:0.391231 test-acc : 0.859500 
Epoch 9/10 - Batch 1600/1687 - train-loss: 0.528129 

0,1
train_acc,▅▁▁▄▅▅▂▅▆▇▅▄▄▄▅▂▇▆▅▅▄▄▆▆▆▇▂▇▆▅▇▆▆▅▇▅▆▅▅█
train_loss,█▃▅▂▁▇▄▄▂▁▅▃▅▂▃▅▄▂▄▃▅▃▄▁▂▂▃▂▂▂▂▅▄▃▂▂▃▄▂▄
val_acc,▁▃▄▃▅▅▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇████▇█████████
val_loss,█▆▄▄▄▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
train_acc,0.8125
train_loss,0.52091
val_acc,0.864
val_loss,0.37731


[34m[1mwandb[0m: Agent Starting Run: uqv5oxqr with config:
[34m[1mwandb[0m: 	activation_function: ReLU
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layer_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_of_hidden_layers: 4
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_initialization: Xavier


Starting training with run name: hl_4_bs_64_ac_ReLU
Epoch 1/10 - Batch 800/843 - train-loss: 0.606106 train-acc: 0.765625 test-loss:0.518613 test-acc : 0.816667 
Epoch 2/10 - Batch 800/843 - train-loss: 0.524344 train-acc: 0.781250 test-loss:0.460785 test-acc : 0.837500 
Epoch 3/10 - Batch 800/843 - train-loss: 0.482745 train-acc: 0.796875 test-loss:0.433823 test-acc : 0.847667 
Epoch 4/10 - Batch 800/843 - train-loss: 0.467768 train-acc: 0.812500 test-loss:0.419739 test-acc : 0.851667 
Epoch 5/10 - Batch 800/843 - train-loss: 0.457675 train-acc: 0.843750 test-loss:0.410052 test-acc : 0.856667 
Epoch 6/10 - Batch 800/843 - train-loss: 0.451070 train-acc: 0.843750 test-loss:0.403816 test-acc : 0.859667 
Epoch 7/10 - Batch 800/843 - train-loss: 0.444790 train-acc: 0.843750 test-loss:0.400030 test-acc : 0.861167 
Epoch 8/10 - Batch 800/843 - train-loss: 0.441253 train-acc: 0.843750 test-loss:0.396927 test-acc : 0.860167 
Epoch 9/10 - Batch 800/843 - train-loss: 0.433746 train-acc: 0.84375

0,1
train_acc,▂▁▄▇▆▄▃▃▃▅▇▃▅█▄▇▃▅▅█▇▅█▇▇▆▅▇▅▅▅▇▆▄▅▇▅▄▆▅
train_loss,▇█▅▂▃▅▆▄▅▂▃▂▅▄▂▃▁▂▂▄▁▄▂▂▅▄▄▄▃▂▄▄▄▂▄▁▄▂▄▁
val_acc,▁▃▄▅▆▆▅▆▆▆▇▆▇▇▇▆▇▇▇▇▇██▇▇▇▇█████████████
val_loss,█▆▄▄▃▄▃▃▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
train_acc,0.85938
train_loss,0.4274
val_acc,0.85967
val_loss,0.39305


[34m[1mwandb[0m: Agent Starting Run: 2984qfcz with config:
[34m[1mwandb[0m: 	activation_function: ReLU
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layer_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_of_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_initialization: Xavier


Starting training with run name: hl_5_bs_64_ac_ReLU
Epoch 1/10 - Batch 800/843 - train-loss: 0.410087 train-acc: 0.828125 test-loss:0.413135 test-acc : 0.850000 
Epoch 2/10 - Batch 800/843 - train-loss: 0.458581 train-acc: 0.843750 test-loss:0.402105 test-acc : 0.856833 
Epoch 3/10 - Batch 800/843 - train-loss: 0.427991 train-acc: 0.828125 test-loss:0.359271 test-acc : 0.871500 
Epoch 4/10 - Batch 800/843 - train-loss: 0.425713 train-acc: 0.812500 test-loss:0.346384 test-acc : 0.871833 
Epoch 5/10 - Batch 800/843 - train-loss: 0.431678 train-acc: 0.812500 test-loss:0.343925 test-acc : 0.874833 
Epoch 6/10 - Batch 800/843 - train-loss: 0.319672 train-acc: 0.812500 test-loss:0.353282 test-acc : 0.872500 
Epoch 7/10 - Batch 800/843 - train-loss: 0.316882 train-acc: 0.859375 test-loss:0.351729 test-acc : 0.872500 
Epoch 8/10 - Batch 800/843 - train-loss: 0.325432 train-acc: 0.843750 test-loss:0.349922 test-acc : 0.877167 
Epoch 9/10 - Batch 800/843 - train-loss: 0.374026 train-acc: 0.87500

0,1
train_acc,▁▅▂▁▃▃▅▇▃▂▇▄▆▅▃▇▅▅▃▇▇▆█▃▆▅▇▄▇▇▅▇▇▇▅▇▅▇▆▅
train_loss,▆▄█▆▄▃▆▃▃▁▂▄▂▁▄▄▃▁▂▃▂▁▄▂▂▂▃▃▃▁▃▁▁▃▂▁▄▁▂▂
val_acc,▁▁▄▄▅▆▆▆▆▇▇▇▇▇▇▆▇▇▇▇▇█▇▇█▇▇▇▇▇▇█▇▇███▇▇█
val_loss,▆█▅▄▃▃▅▂▃▃▂▂▂▂▂▂▂▂▂▁▂▂▁▃▂▁▂▁▂▃▁▂▂▂▁▂▂▂▁▂

0,1
train_acc,0.875
train_loss,0.27293
val_acc,0.87617
val_loss,0.36101


[34m[1mwandb[0m: Agent Starting Run: 1s5srwy8 with config:
[34m[1mwandb[0m: 	activation_function: ReLU
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layer_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_of_hidden_layers: 4
[34m[1mwandb[0m: 	optimizer: nesterov
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_initialization: Xavier


Starting training with run name: hl_4_bs_32_ac_ReLU


[34m[1mwandb[0m: Agent Starting Run: m6atpw9w with config:
[34m[1mwandb[0m: 	activation_function: ReLU
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layer_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_of_hidden_layers: 4
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_initialization: Xavier


Starting training with run name: hl_4_bs_64_ac_ReLU
Epoch 1/10 - Batch 800/843 - train-loss: 0.597292 train-acc: 0.750000 test-loss:0.511917 test-acc : 0.816000 
Epoch 2/10 - Batch 800/843 - train-loss: 0.515881 train-acc: 0.796875 test-loss:0.463609 test-acc : 0.836833 
Epoch 3/10 - Batch 800/843 - train-loss: 0.477593 train-acc: 0.796875 test-loss:0.438108 test-acc : 0.846667 
Epoch 4/10 - Batch 800/843 - train-loss: 0.456247 train-acc: 0.828125 test-loss:0.422525 test-acc : 0.851500 
Epoch 5/10 - Batch 800/843 - train-loss: 0.451331 train-acc: 0.828125 test-loss:0.411990 test-acc : 0.852667 
Epoch 6/10 - Batch 800/843 - train-loss: 0.444223 train-acc: 0.828125 test-loss:0.404774 test-acc : 0.855167 
Epoch 7/10 - Batch 800/843 - train-loss: 0.439188 train-acc: 0.828125 test-loss:0.400408 test-acc : 0.855833 
Epoch 8/10 - Batch 800/843 - train-loss: 0.430451 train-acc: 0.828125 test-loss:0.397243 test-acc : 0.857833 
Epoch 9/10 - Batch 800/843 - train-loss: 0.425074 train-acc: 0.82812

0,1
train_acc,▁▁▂▅▃▅▅▅▇▃▃▅▅█▅▃▆█▃▅▆▅▆█▇▃█▄▇▇▄▅▇▄▇▅▅▅▇▄
train_loss,▅█▇▅▆▆▆▄▄▅▄▃▂▃▁▂▂▃▄▂▅▄▃▃▁▃▁▄▂▄▄▂▁▄▂▄▄▂▃▄
val_acc,▁▃▆▆▆▆▇▇▆▇▇▇▇█▇▇███▇████████████████████
val_loss,█▅▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▁▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
train_acc,0.82812
train_loss,0.42181
val_acc,0.8605
val_loss,0.3928


[34m[1mwandb[0m: Agent Starting Run: 72fowl5n with config:
[34m[1mwandb[0m: 	activation_function: ReLU
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layer_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_of_hidden_layers: 4
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_initialization: Xavier


Starting training with run name: hl_4_bs_16_ac_ReLU
Epoch 1/10 - Batch 3300/3375 - train-loss: 0.722127 train-acc: 0.750000 test-loss:0.538294 test-acc : 0.809833 
Epoch 2/10 - Batch 3300/3375 - train-loss: 0.657559 train-acc: 0.812500 test-loss:0.513550 test-acc : 0.828333 
Epoch 3/10 - Batch 3300/3375 - train-loss: 0.667776 train-acc: 0.812500 test-loss:0.517470 test-acc : 0.828833 
Epoch 4/10 - Batch 3300/3375 - train-loss: 0.680665 train-acc: 0.750000 test-loss:0.529436 test-acc : 0.826333 
Epoch 5/10 - Batch 3300/3375 - train-loss: 0.681721 train-acc: 0.750000 test-loss:0.541601 test-acc : 0.823500 
Epoch 6/10 - Batch 3300/3375 - train-loss: 0.685159 train-acc: 0.750000 test-loss:0.551123 test-acc : 0.823667 
Epoch 7/10 - Batch 3300/3375 - train-loss: 0.689145 train-acc: 0.750000 test-loss:0.558492 test-acc : 0.821167 
Epoch 8/10 - Batch 3300/3375 - train-loss: 0.689798 train-acc: 0.750000 test-loss:0.564099 test-acc : 0.820333 
Epoch 9/10 - Batch 3300/3375 - train-loss: 0.691309 

0,1
train_acc,▅▅▄▁██▁▇█▅▄▄▅█▂██▇▅▄█▅▇▁▅▅▇█▇▇▅▄▁▄▇▅▄█▇▅
train_loss,▄▁▂▄▄▄▂▂▅▃▂▄▂▂▂▄▄█▄▂▂▄▄▂▃▅▂▄▂▃█▃▃▂▂▁▃█▃▂
val_acc,▁▂▂▂▅▆▆▇▇▇▇▇▆█▇▆▇█▆▇▇▇▇▇▇▇▆▆▇▇▇▆▆▇▇▆▆▆▇▇
val_loss,█▆▃▂▂▁▁▁▂▁▁▁▁▁▂▂▂▂▂▂▂▃▃▃▂▃▃▂▃▃▃▃▃▃▃▃▃▃▄▃

0,1
train_acc,0.75
train_loss,0.69329
val_acc,0.819
val_loss,0.56953


[34m[1mwandb[0m: Agent Starting Run: xrr5hlxu with config:
[34m[1mwandb[0m: 	activation_function: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layer_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_of_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_initialization: Xavier


Starting training with run name: hl_5_bs_32_ac_tanh
Epoch 1/10 - Batch 1600/1687 - train-loss: 0.645303 train-acc: 0.750000 test-loss:0.558653 test-acc : 0.801500 
Epoch 2/10 - Batch 1600/1687 - train-loss: 0.546843 train-acc: 0.781250 test-loss:0.481208 test-acc : 0.827500 
Epoch 3/10 - Batch 1600/1687 - train-loss: 0.492347 train-acc: 0.781250 test-loss:0.445256 test-acc : 0.843333 
Epoch 4/10 - Batch 1600/1687 - train-loss: 0.454608 train-acc: 0.812500 test-loss:0.422932 test-acc : 0.852000 
Epoch 5/10 - Batch 1600/1687 - train-loss: 0.431422 train-acc: 0.812500 test-loss:0.407011 test-acc : 0.855500 
Epoch 6/10 - Batch 1600/1687 - train-loss: 0.416809 train-acc: 0.843750 test-loss:0.394782 test-acc : 0.860667 
Epoch 7/10 - Batch 1600/1687 - train-loss: 0.406466 train-acc: 0.843750 test-loss:0.384995 test-acc : 0.863000 
Epoch 8/10 - Batch 1600/1687 - train-loss: 0.398096 train-acc: 0.843750 test-loss:0.376953 test-acc : 0.865667 
Epoch 9/10 - Batch 1600/1687 - train-loss: 0.390604 

0,1
train_acc,▁▂▃▃▄▃▆▆▆▇▆▇▅▅▇▆▇▆▆▆▅▆▇▄▆▅▇▆▇▆▆▅█▆▆▆▇▇▆▇
train_loss,▄▅▆█▅▂▇▅▄▃▄▇▃▂▃▃▁▅▆▃▁▆▂▂▃▄▆▃▂▅▂▃▁▄▃▂▃▂▂▂
val_acc,▁▂▄▅▅▅▅▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇█▇██▇█████████████
val_loss,█▆▅▅▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
train_acc,0.84375
train_loss,0.38354
val_acc,0.86883
val_loss,0.36446


[34m[1mwandb[0m: Agent Starting Run: 19vy1w0d with config:
[34m[1mwandb[0m: 	activation_function: ReLU
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layer_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_of_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_initialization: Xavier


Starting training with run name: hl_5_bs_16_ac_ReLU
Epoch 1/10 - Batch 3300/3375 - train-loss: 0.647014 train-acc: 0.812500 test-loss:0.401158 test-acc : 0.857167 
Epoch 2/10 - Batch 3300/3375 - train-loss: 0.425907 train-acc: 0.812500 test-loss:0.376913 test-acc : 0.860667 
Epoch 3/10 - Batch 3300/3375 - train-loss: 0.417188 train-acc: 0.875000 test-loss:0.353416 test-acc : 0.870167 
Epoch 4/10 - Batch 3300/3375 - train-loss: 0.354973 train-acc: 0.875000 test-loss:0.367022 test-acc : 0.866833 
Epoch 5/10 - Batch 3300/3375 - train-loss: 0.372272 train-acc: 0.875000 test-loss:0.349307 test-acc : 0.876167 
Epoch 6/10 - Batch 3300/3375 - train-loss: 0.366347 train-acc: 0.937500 test-loss:0.384079 test-acc : 0.863833 
Epoch 7/10 - Batch 3300/3375 - train-loss: 0.341399 train-acc: 0.875000 test-loss:0.367544 test-acc : 0.869667 
Epoch 8/10 - Batch 3300/3375 - train-loss: 0.410472 train-acc: 0.875000 test-loss:0.351849 test-acc : 0.878833 
Epoch 9/10 - Batch 3300/3375 - train-loss: 0.364767 

0,1
train_acc,▁▅▄▇▄▇▅▇▇█▄███▅▅▅▅▄█▅▅▇▇█▅▄▅▇█▇▇█▄▄▅███▇
train_loss,█▅▂▄▅▄▂▇▃▃▅▃▄▁▄▄▂▂▄▃▃▅▄▃▄▅▄▆▂▄▄▁▃▁▅▂▄▄▂▃
val_acc,▁▄▅▆▇▆▆▆▇▇▇▆▆▇▇▇▇▆▇▇▇▇▇▇▇▇▇▇▇▇██▇▇████▇▇
val_loss,▇█▅▅▅▃▂▃▃▂▄▂▃▄▃▂▂▂▃▂▂▂▃▃▁▂▁▂▂▂▂▂▁▂▁▁▂▁▂▂

0,1
train_acc,0.9375
train_loss,0.39038
val_acc,0.87517
val_loss,0.3582


[34m[1mwandb[0m: Agent Starting Run: y1tx2lla with config:
[34m[1mwandb[0m: 	activation_function: ReLU
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layer_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_of_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_initialization: Xavier


Starting training with run name: hl_5_bs_32_ac_ReLU
Epoch 1/5 - Batch 1600/1687 - train-loss: 0.583312 train-acc: 0.718750 test-loss:0.474326 test-acc : 0.836667 
Epoch 2/5 - Batch 1600/1687 - train-loss: 0.567242 train-acc: 0.781250 test-loss:0.496766 test-acc : 0.828333 
Epoch 3/5 - Batch 1600/1687 - train-loss: 0.560170 train-acc: 0.750000 test-loss:0.499395 test-acc : 0.826333 
Epoch 4/5 - Batch 1600/1687 - train-loss: 0.542503 train-acc: 0.750000 test-loss:0.493438 test-acc : 0.828667 
Epoch 5/5 - Batch 1600/1687 - train-loss: 0.530123 train-acc: 0.750000 test-loss:0.485691 test-acc : 0.830833 


0,1
train_acc,▃▄▇▆▅▅▆▆▇▂▅▂▅▅▇█▅▅▅▄▅▅▆▆▅▅▅▅▃▁▅▆▆█▅▃▃▆▆▃
train_loss,▅▁▄▅▅▄▃▅▃▂▄▄█▅▃▄▂▄▁▅█▅▃▂▂▄▁▆▅▆▄▂▅▄▁▅▅▃▂▄
val_acc,▁▃▄▇█▇▅█▆▇▇▆▆▇▇▇▇▇▆▇▇▇▇▇▆▆▆▇▇▅▇▅▇▇▆▇█▆▇▇
val_loss,▄▃▂▁▂▅▄▂▄▄▃▄▄▆▅▃▅▇▅▆▄▅▅▄█▅█▅▅▅▄▄▅▃▄▅▃▅▇▄

0,1
train_acc,0.75
train_loss,0.53012
val_acc,0.83083
val_loss,0.48569


[34m[1mwandb[0m: Agent Starting Run: 7f928l0y with config:
[34m[1mwandb[0m: 	activation_function: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layer_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_of_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_initialization: Xavier


Starting training with run name: hl_5_bs_32_ac_tanh
Epoch 1/10 - Batch 1600/1687 - train-loss: 0.644315 train-acc: 0.718750 test-loss:0.542176 test-acc : 0.808000 
Epoch 2/10 - Batch 1600/1687 - train-loss: 0.554608 train-acc: 0.781250 test-loss:0.471653 test-acc : 0.832000 
Epoch 3/10 - Batch 1600/1687 - train-loss: 0.517230 train-acc: 0.843750 test-loss:0.437506 test-acc : 0.844833 
Epoch 4/10 - Batch 1600/1687 - train-loss: 0.491840 train-acc: 0.812500 test-loss:0.416152 test-acc : 0.853833 
Epoch 5/10 - Batch 1600/1687 - train-loss: 0.471524 train-acc: 0.781250 test-loss:0.401113 test-acc : 0.857000 
Epoch 6/10 - Batch 1600/1687 - train-loss: 0.454511 train-acc: 0.750000 test-loss:0.389662 test-acc : 0.860667 
Epoch 7/10 - Batch 1600/1687 - train-loss: 0.439681 train-acc: 0.781250 test-loss:0.380488 test-acc : 0.863167 
Epoch 8/10 - Batch 1600/1687 - train-loss: 0.426339 train-acc: 0.781250 test-loss:0.372892 test-acc : 0.867500 
Epoch 9/10 - Batch 1600/1687 - train-loss: 0.414181 

0,1
train_acc,▁▅▄▅▆▇▃▅▅█▄▃▇▇█▅▇▅▇▅▆▃▅▆▇▅▆▄▇▄▇▅▅▇▄▇▄▅▅▇
train_loss,█▇▆▃▂▅▆▅▂▆▅▃▆▂▄▃▄▄▅▂▆▄█▅▅▂▅▄▄▂▆▄▅▂▃▄▃▁▃▄
val_acc,▁▅▅▅▅▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇█▇▇███████████████
val_loss,██▇▆▆▅▅▅▄▄▃▃▄▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁

0,1
train_acc,0.78125
train_loss,0.40309
val_acc,0.87283
val_loss,0.36094


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: e3diryi0 with config:
[34m[1mwandb[0m: 	activation_function: ReLU
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layer_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_of_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_initialization: Xavier


Starting training with run name: hl_5_bs_64_ac_ReLU
Epoch 1/10 - Batch 800/843 - train-loss: 0.514422 train-acc: 0.828125 test-loss:0.416195 test-acc : 0.848667 
Epoch 2/10 - Batch 800/843 - train-loss: 0.528499 train-acc: 0.843750 test-loss:0.403032 test-acc : 0.859333 
Epoch 3/10 - Batch 800/843 - train-loss: 0.451599 train-acc: 0.875000 test-loss:0.364494 test-acc : 0.869667 
Epoch 4/10 - Batch 800/843 - train-loss: 0.366415 train-acc: 0.906250 test-loss:0.340145 test-acc : 0.875833 
Epoch 5/10 - Batch 800/843 - train-loss: 0.336966 train-acc: 0.890625 test-loss:0.330622 test-acc : 0.882000 
Epoch 6/10 - Batch 800/843 - train-loss: 0.317552 train-acc: 0.921875 test-loss:0.324490 test-acc : 0.883833 
Epoch 7/10 - Batch 800/843 - train-loss: 0.261819 train-acc: 0.921875 test-loss:0.323711 test-acc : 0.885333 
Epoch 8/10 - Batch 800/843 - train-loss: 0.268593 train-acc: 0.937500 test-loss:0.334211 test-acc : 0.887167 
Epoch 9/10 - Batch 800/843 - train-loss: 0.256090 train-acc: 0.93750

0,1
train_acc,▂▃▁▇▃▅▄▂▅▅▆▆▃▅▇▅▆▄▅▇▅█▆▄▅▆▇▅▅▅▆▇▆▆▆▆▅▆▇▆
train_loss,█▆▂▅▃▅▆▃▄▅▃▄▃▄▂▅▃▁▂▂▃▁▃▂▃▃▁▃▃▄▁▂▄▃▂▂▃▄▂▁
val_acc,▁▂▄▄▄▅▆▅▅▅▇▆▆▇▆▇▆▆▆▇▆▆▇▇▇▇▇▇▇▆▇▇▇██▇▆▇▇█
val_loss,█▆▆▅▄▃▃▃▂▃▂▂▂▂▂▂▁▂▂▂▁▂▂▂▂▁▂▂▁▂▁▁▂▂▁▂▂▂▂▂

0,1
train_acc,0.9375
train_loss,0.28092
val_acc,0.88567
val_loss,0.34039


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: jzucf1ap with config:
[34m[1mwandb[0m: 	activation_function: ReLU
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layer_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_of_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_initialization: Xavier


Starting training with run name: hl_5_bs_32_ac_ReLU
Epoch 1/10 - Batch 1600/1687 - train-loss: 0.650465 train-acc: 0.750000 test-loss:0.476205 test-acc : 0.824333 
Epoch 2/10 - Batch 1600/1687 - train-loss: 0.516672 train-acc: 0.750000 test-loss:0.422773 test-acc : 0.846833 
Epoch 3/10 - Batch 1600/1687 - train-loss: 0.482037 train-acc: 0.781250 test-loss:0.417100 test-acc : 0.852000 
Epoch 4/10 - Batch 1600/1687 - train-loss: 0.486252 train-acc: 0.750000 test-loss:0.421349 test-acc : 0.855500 
Epoch 5/10 - Batch 1600/1687 - train-loss: 0.490124 train-acc: 0.781250 test-loss:0.429998 test-acc : 0.854000 
Epoch 6/10 - Batch 1600/1687 - train-loss: 0.496383 train-acc: 0.750000 test-loss:0.436400 test-acc : 0.854000 
Epoch 7/10 - Batch 1600/1687 - train-loss: 0.491000 train-acc: 0.750000 test-loss:0.440393 test-acc : 0.852333 
Epoch 8/10 - Batch 1600/1687 - train-loss: 0.493270 train-acc: 0.781250 test-loss:0.444037 test-acc : 0.850500 
Epoch 9/10 - Batch 1600/1687 - train-loss: 0.494433 

0,1
train_acc,▃▂▇▄▁▂▆▆▄▄▅▄▅▄▃▆▃▅▄█▇▄▃▇▇▅▃▄▄▇▇▃▇▆▃▇▃▄▃▃
train_loss,▃▇▅▆▄▄█▅▄▅▁▅▂▃▁▄▅▃▂▁▅▅▂▆█▃▄▁▄▅▃▃▅▅▃▂▅▁█▃
val_acc,▁▂▃▄▅▇▇▇▇█████▇███▇████▇▇▇▆▇▇▇█▇█▇▇▇█▇██
val_loss,█▇▃▃▂▂▂▂▁▁▂▃▂▁▁▁▂▂▂▁▁▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂

0,1
train_acc,0.78125
train_loss,0.48743
val_acc,0.84883
val_loss,0.44665


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 3wwc6f7f with config:
[34m[1mwandb[0m: 	activation_function: ReLU
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layer_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_of_hidden_layers: 4
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_initialization: Xavier


Starting training with run name: hl_4_bs_64_ac_ReLU
Epoch 1/10 - Batch 800/843 - train-loss: 0.533271 train-acc: 0.765625 test-loss:0.519532 test-acc : 0.816833 
Epoch 2/10 - Batch 800/843 - train-loss: 0.455887 train-acc: 0.765625 test-loss:0.466336 test-acc : 0.834667 
Epoch 3/10 - Batch 800/843 - train-loss: 0.419563 train-acc: 0.812500 test-loss:0.438813 test-acc : 0.843167 
Epoch 4/10 - Batch 800/843 - train-loss: 0.395129 train-acc: 0.843750 test-loss:0.419752 test-acc : 0.850167 
Epoch 5/10 - Batch 800/843 - train-loss: 0.380178 train-acc: 0.828125 test-loss:0.407023 test-acc : 0.856000 
Epoch 6/10 - Batch 800/843 - train-loss: 0.369929 train-acc: 0.859375 test-loss:0.398708 test-acc : 0.859333 
Epoch 7/10 - Batch 800/843 - train-loss: 0.360396 train-acc: 0.875000 test-loss:0.390960 test-acc : 0.859500 
Epoch 8/10 - Batch 800/843 - train-loss: 0.352637 train-acc: 0.875000 test-loss:0.385575 test-acc : 0.863000 
Epoch 9/10 - Batch 800/843 - train-loss: 0.345506 train-acc: 0.87500

0,1
train_acc,▃▄▃▅▂▁▅▄▇▂▂▅▃▇▅█▄▇▅▄▇▆▅▅▇▄▇▅▆▄▅▅▇▅▇▅▅▇▅▅
train_loss,█▆▆▅▃▄▃▅▄▂▂▄▄▄▁▂▄▄▃▃▂▄▃▃▃▃▃▁▃▄▁▂▃▄▃▁▃▄▃▃
val_acc,▁▅▆▅▆▆▆▆▆▆▆▇▇▇▆▇▇▇▇▇▇█▇▇▇▇██▇▇███████▇██
val_loss,█▅▄▄▄▃▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▂▂▁▁▂▁▁▁▁▁

0,1
train_acc,0.875
train_loss,0.33862
val_acc,0.867
val_loss,0.37586


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: attst9h6 with config:
[34m[1mwandb[0m: 	activation_function: ReLU
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layer_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_of_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: RMSprop
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_initialization: Xavier


Starting training with run name: hl_5_bs_16_ac_ReLU
Epoch 1/10 - Batch 3300/3375 - train-loss: 0.592223 train-acc: 0.937500 test-loss:0.498950 test-acc : 0.829500 
Epoch 2/10 - Batch 3300/3375 - train-loss: 0.647355 train-acc: 0.875000 test-loss:0.438386 test-acc : 0.844667 
Epoch 3/10 - Batch 3300/3375 - train-loss: 0.562370 train-acc: 0.875000 test-loss:0.418009 test-acc : 0.859333 
Epoch 4/10 - Batch 3300/3375 - train-loss: 0.521839 train-acc: 0.875000 test-loss:0.417532 test-acc : 0.860667 
Epoch 5/10 - Batch 3300/3375 - train-loss: 0.492961 train-acc: 0.812500 test-loss:0.416052 test-acc : 0.866000 
Epoch 6/10 - Batch 3300/3375 - train-loss: 0.469559 train-acc: 0.812500 test-loss:0.419047 test-acc : 0.863167 
Epoch 7/10 - Batch 3300/3375 - train-loss: 0.479510 train-acc: 0.937500 test-loss:0.442405 test-acc : 0.860500 
Epoch 8/10 - Batch 3300/3375 - train-loss: 0.464191 train-acc: 0.937500 test-loss:0.451273 test-acc : 0.861667 
Epoch 9/10 - Batch 3300/3375 - train-loss: 0.334293 

0,1
train_acc,▁▆█▆▆▅▅▆▆██▆█▆██▆██▃█▆██▆▆█▆▅▆██▆███▃█▃▅
train_loss,▂▇▆▄▄▂▂▂▄█▄█▁▂▁▃▃▃▄▃▁▃▃▁▃▇▁▄▂▄▁▆▅▂▁▃▃▁▄▃
val_acc,▁▂▅▅▅▇▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇█▇██▇▇█▇██▇█▇▇█████
val_loss,█▇▃▃▂▂▃▂▂▂▁▁▁▂▁▁▁▁▁▁▁▁▁▁▂▂▁▁▂▂▁▁▁▁▁▁▁▂▂▂

0,1
train_acc,0.9375
train_loss,0.36721
val_acc,0.85633
val_loss,0.49904


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 72aqeckn with config:
[34m[1mwandb[0m: 	activation_function: ReLU
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layer_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_of_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_initialization: Xavier


Starting training with run name: hl_5_bs_16_ac_ReLU
Epoch 1/10 - Batch 3300/3375 - train-loss: 0.751342 train-acc: 0.812500 test-loss:0.664718 test-acc : 0.777833 
Epoch 2/10 - Batch 3300/3375 - train-loss: 0.703027 train-acc: 0.750000 test-loss:0.639768 test-acc : 0.783000 
Epoch 3/10 - Batch 3300/3375 - train-loss: 0.677467 train-acc: 0.750000 test-loss:0.621056 test-acc : 0.787667 
Epoch 4/10 - Batch 3300/3375 - train-loss: 0.680186 train-acc: 0.750000 test-loss:0.611985 test-acc : 0.792500 
Epoch 5/10 - Batch 3300/3375 - train-loss: 0.676035 train-acc: 0.750000 test-loss:0.607471 test-acc : 0.793167 
Epoch 6/10 - Batch 3300/3375 - train-loss: 0.670626 train-acc: 0.750000 test-loss:0.606745 test-acc : 0.793500 
Epoch 7/10 - Batch 3300/3375 - train-loss: 0.657097 train-acc: 0.750000 test-loss:0.606370 test-acc : 0.793000 
Epoch 8/10 - Batch 3300/3375 - train-loss: 0.649018 train-acc: 0.750000 test-loss:0.604106 test-acc : 0.794000 
Epoch 9/10 - Batch 3300/3375 - train-loss: 0.648955 

0,1
train_acc,▆▇▁▆▇▄▅▅█▅▃▅▄▁▅▇▇█▇▆▄▂▅▅█▅▅▆▅▄▅▅▇▅▄▇▂▇▅▅
train_loss,▃▃▅▄▇▄▇▄▂▃▄▇▃▅█▅▁▆▃▅▆▃▁▂▅▅▇▅▁▂█▂▅▁▁▇▃▁▃▃
val_acc,█▆▁▅▇▃▅▇▇▇▆▆▆▆▇█▇▆▆▆▇█▇▇▆▄▆█▆▇▆█▇█▇▇▇██▇
val_loss,▅▃▄█▃▄▂▂▃▂▃▂▃▂▂▂▁▁▂▁▁▃▁▂▂▂▃▁▂▄▂▁▃▂▂▂▁▁▁▂

0,1
train_acc,0.75
train_loss,0.64714
val_acc,0.79417
val_loss,0.60401


[34m[1mwandb[0m: Agent Starting Run: lyx5cekf with config:
[34m[1mwandb[0m: 	activation_function: ReLU
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layer_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_of_hidden_layers: 4
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_initialization: Xavier


Starting training with run name: hl_4_bs_64_ac_ReLU
Epoch 1/10 - Batch 800/843 - train-loss: 0.479883 train-acc: 0.812500 test-loss:0.508152 test-acc : 0.817833 
Epoch 2/10 - Batch 800/843 - train-loss: 0.383099 train-acc: 0.875000 test-loss:0.450214 test-acc : 0.838500 
Epoch 3/10 - Batch 800/843 - train-loss: 0.367412 train-acc: 0.859375 test-loss:0.423411 test-acc : 0.848500 
Epoch 4/10 - Batch 800/843 - train-loss: 0.344948 train-acc: 0.875000 test-loss:0.409240 test-acc : 0.853500 
Epoch 5/10 - Batch 800/843 - train-loss: 0.335872 train-acc: 0.859375 test-loss:0.397517 test-acc : 0.855000 
Epoch 6/10 - Batch 800/843 - train-loss: 0.323108 train-acc: 0.875000 test-loss:0.389901 test-acc : 0.857000 
Epoch 7/10 - Batch 800/843 - train-loss: 0.316115 train-acc: 0.843750 test-loss:0.384856 test-acc : 0.860000 
Epoch 8/10 - Batch 800/843 - train-loss: 0.316894 train-acc: 0.859375 test-loss:0.380778 test-acc : 0.862667 
Epoch 9/10 - Batch 800/843 - train-loss: 0.312371 train-acc: 0.85937

0,1
train_acc,▃▁▆▅▆▅▆▆▆▆▄▇█▆▇▅▇▅█▆▄▅▇█▆▄▅▅▇▄▅█▆▇▄▆▅▆▅▅
train_loss,▇▇██▄▅▅▄▂▃▂▃▆▅▂▅▄▃▂▅▃▁▃▂▃▅▄▃▁▂▂▃▂▂▄▁▃▂▅▁
val_acc,▁▃▃▅▅▅▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇██████▇████▇██████
val_loss,█▇▆▅▅▄▃▃▃▃▃▃▂▃▂▃▂▂▂▂▂▂▂▁▂▂▂▁▂▁▂▂▁▁▂▁▁▁▂▂

0,1
train_acc,0.84375
train_loss,0.30877
val_acc,0.865
val_loss,0.37162


[34m[1mwandb[0m: Agent Starting Run: p6t475he with config:
[34m[1mwandb[0m: 	activation_function: ReLU
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layer_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_of_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_initialization: Xavier


Starting training with run name: hl_5_bs_32_ac_ReLU
Epoch 1/5 - Batch 1600/1687 - train-loss: 0.528863 train-acc: 0.781250 test-loss:0.413693 test-acc : 0.849667 
Epoch 2/5 - Batch 1600/1687 - train-loss: 0.492435 train-acc: 0.781250 test-loss:0.362282 test-acc : 0.865833 
Epoch 3/5 - Batch 1600/1687 - train-loss: 0.337495 train-acc: 0.906250 test-loss:0.352361 test-acc : 0.876500 
Epoch 4/5 - Batch 1600/1687 - train-loss: 0.333489 train-acc: 0.875000 test-loss:0.349628 test-acc : 0.878833 
Epoch 5/5 - Batch 1600/1687 - train-loss: 0.353691 train-acc: 0.906250 test-loss:0.342601 test-acc : 0.878667 


0,1
train_acc,▇▁▅▃▄▅▆▂▇▆▆▅▃▆▃▅█▇▆▇▇▄▂▆▇▆█▅▅▅▅▂▅▅▆▅▅▆▅█
train_loss,▄█▅▆█▃▅▁▅▅█▄▅▃▅▄▃▅▅▂▄▃▃▄▁▅▅▄▄▂▄▃▁▅▅▃▄▆▃▄
val_acc,▁▄▅▅▆▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇▆▇▇██▇▇▆▇▇▇███▇█▇██
val_loss,█▇▅▅▄▃▄▄▃▂▃▃▂▂▂▂▂▂▂▁▂▂▁▂▃▁▁▂▂▁▂▁▁▁▁▁▂▁▂▁

0,1
train_acc,0.90625
train_loss,0.35369
val_acc,0.87867
val_loss,0.3426


[34m[1mwandb[0m: Agent Starting Run: 5zct9wbl with config:
[34m[1mwandb[0m: 	activation_function: ReLU
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layer_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_of_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: RMSprop
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_initialization: Xavier


Starting training with run name: hl_5_bs_32_ac_ReLU
Epoch 1/10 - Batch 1600/1687 - train-loss: 0.733439 train-acc: 0.812500 test-loss:0.446368 test-acc : 0.846333 
Epoch 2/10 - Batch 1600/1687 - train-loss: 0.762523 train-acc: 0.812500 test-loss:0.554798 test-acc : 0.837000 
Epoch 3/10 - Batch 1600/1687 - train-loss: 0.792825 train-acc: 0.875000 test-loss:0.602717 test-acc : 0.831000 
Epoch 4/10 - Batch 1600/1687 - train-loss: 0.522835 train-acc: 0.843750 test-loss:0.629505 test-acc : 0.831667 
Epoch 5/10 - Batch 1600/1687 - train-loss: 0.788210 train-acc: 0.843750 test-loss:0.657997 test-acc : 0.822667 
Epoch 6/10 - Batch 1600/1687 - train-loss: 0.811988 train-acc: 0.781250 test-loss:0.533914 test-acc : 0.836167 
Epoch 7/10 - Batch 1600/1687 - train-loss: 0.584218 train-acc: 0.843750 test-loss:0.537721 test-acc : 0.866833 
Epoch 8/10 - Batch 1600/1687 - train-loss: 0.509553 train-acc: 0.843750 test-loss:0.657025 test-acc : 0.838000 
Epoch 9/10 - Batch 1600/1687 - train-loss: 0.664448 

0,1
train_acc,▆▃▄▅▅▆▇▇▇▅▂▆▅▅▅▅▅█▅▆▄▅▄▅▇▅▁▅▅▇▆▅█▂▅▅▅█▂▅
train_loss,▄▂▃▃▃▂▂▁▂▃▃▂▂▂▃▃▂▂▃▃▂▄█▂▁▁▃▆▄▂▂▁▇▁▄▄▁▃▂▃
val_acc,▁▂▅▆▇▅▇▇▆▅▄▆▆▇█▆▇█▅▆▆▄▅▇█▆▄█▆█▂█▇▃▂▃▃▃▆█
val_loss,▃▃▃▂▁▂▂▂▂▃▃▂▂▂▄▃▂▂▂▂▃▄▄▄▃██▃▄▄▂▃▂▂▄▄█▄▃▄

0,1
train_acc,0.78125
train_loss,0.64289
val_acc,0.82667
val_loss,0.91597


[34m[1mwandb[0m: Agent Starting Run: g7hz8mux with config:
[34m[1mwandb[0m: 	activation_function: ReLU
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layer_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_of_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_initialization: Xavier


Starting training with run name: hl_5_bs_32_ac_ReLU
Epoch 1/10 - Batch 1600/1687 - train-loss: 0.826099 train-acc: 0.750000 test-loss:0.428974 test-acc : 0.847167 
Epoch 2/10 - Batch 1600/1687 - train-loss: 0.564152 train-acc: 0.750000 test-loss:0.391027 test-acc : 0.859500 
Epoch 3/10 - Batch 1600/1687 - train-loss: 0.487730 train-acc: 0.875000 test-loss:0.369982 test-acc : 0.870667 
Epoch 4/10 - Batch 1600/1687 - train-loss: 0.440671 train-acc: 0.781250 test-loss:0.395212 test-acc : 0.863000 
Epoch 5/10 - Batch 1600/1687 - train-loss: 0.444301 train-acc: 0.812500 test-loss:0.346352 test-acc : 0.877167 
Epoch 6/10 - Batch 1600/1687 - train-loss: 0.369337 train-acc: 0.843750 test-loss:0.363049 test-acc : 0.876667 
Epoch 7/10 - Batch 1600/1687 - train-loss: 0.304605 train-acc: 0.906250 test-loss:0.369183 test-acc : 0.875000 
Epoch 8/10 - Batch 1600/1687 - train-loss: 0.325551 train-acc: 0.875000 test-loss:0.345057 test-acc : 0.880833 
Epoch 9/10 - Batch 1600/1687 - train-loss: 0.292723 

0,1
train_acc,▅▅▅▁▄▃▅▄▅▅▄▇▆▇▅▄▂▆▄▅▆█▆▇▃██▅▆▇▇▆▇▃▆▇▅▅▅█
train_loss,▅▇█▆▃▁▅▅▆▅▂▄▅▅▄▅▄▃▆▅▄▃▅▃▁▁▄▃▄▃▂▃▃▃▁▄▃▂▄▁
val_acc,▁▃▃▅▄▅▃▄▅▆▅▆▆▅▆▆▆▅▅▇▇▆▇▅▇▆▆▆▆▆▇█▅▇█▆▇▇▇▇
val_loss,█▇▆▅▄▄▄▃▃▃▃▂▄▂▃▂▃▂▄▂▁▃▃▂▃▂▃▁▂▁▂▂▂▂▁▃▁▂▂▁

0,1
train_acc,0.90625
train_loss,0.31615
val_acc,0.88217
val_loss,0.38383


[34m[1mwandb[0m: Agent Starting Run: 9d6dgtw4 with config:
[34m[1mwandb[0m: 	activation_function: ReLU
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layer_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_of_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_initialization: Xavier


Starting training with run name: hl_5_bs_32_ac_ReLU
Epoch 1/5 - Batch 1600/1687 - train-loss: 0.551712 train-acc: 0.781250 test-loss:0.417451 test-acc : 0.848500 
Epoch 2/5 - Batch 1600/1687 - train-loss: 0.462384 train-acc: 0.843750 test-loss:0.401391 test-acc : 0.858333 
Epoch 3/5 - Batch 1600/1687 - train-loss: 0.413731 train-acc: 0.875000 test-loss:0.366942 test-acc : 0.868667 
Epoch 4/5 - Batch 1600/1687 - train-loss: 0.392140 train-acc: 0.843750 test-loss:0.378612 test-acc : 0.870833 
Epoch 5/5 - Batch 1600/1687 - train-loss: 0.316104 train-acc: 0.875000 test-loss:0.353109 test-acc : 0.880000 


0,1
train_acc,▆▂▃▁▂▄▄▅▂▆▆▃▂▄▄▄▇█▇▅▄▃▆▄▄▇▅▇█▇▂▄▅█▇▅▇▄▅▄
train_loss,▂█▅▅▄▃▃▆▅▅▅▄▆▃▅▁▄█▄▂▄▃▄▄▁▅▄▂▃▃▂▄▃▁▃█▄▆▂▂
val_acc,▁▁▅▄▅▆▆▆▇▅▆▇▆▅▇▇▇▇▇▇▇█▇▇▇▇█▇▆▇███▇▇█▇▇██
val_loss,█▅▄▄▅▃▄▃▃▃▂▃▂▃▂▂▂▂▂▃▁▂▂▂▂▁▁▂▁▁▁▂▁▁▂▁▃▂▁▁

0,1
train_acc,0.875
train_loss,0.3161
val_acc,0.88
val_loss,0.35311


[34m[1mwandb[0m: Agent Starting Run: ztubuo40 with config:
[34m[1mwandb[0m: 	activation_function: ReLU
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layer_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_of_hidden_layers: 4
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_initialization: Xavier


Starting training with run name: hl_4_bs_64_ac_ReLU
Epoch 1/10 - Batch 800/843 - train-loss: 0.443855 train-acc: 0.859375 test-loss:0.429825 test-acc : 0.844500 
Epoch 2/10 - Batch 800/843 - train-loss: 0.461144 train-acc: 0.828125 test-loss:0.428406 test-acc : 0.847333 
Epoch 3/10 - Batch 800/843 - train-loss: 0.463035 train-acc: 0.843750 test-loss:0.427606 test-acc : 0.845500 
Epoch 4/10 - Batch 800/843 - train-loss: 0.460699 train-acc: 0.843750 test-loss:0.422786 test-acc : 0.847667 
Epoch 5/10 - Batch 800/843 - train-loss: 0.458521 train-acc: 0.828125 test-loss:0.419889 test-acc : 0.850500 
Epoch 6/10 - Batch 800/843 - train-loss: 0.460978 train-acc: 0.828125 test-loss:0.416605 test-acc : 0.850500 
Epoch 7/10 - Batch 800/843 - train-loss: 0.444373 train-acc: 0.843750 test-loss:0.413389 test-acc : 0.853333 
Epoch 8/10 - Batch 800/843 - train-loss: 0.447468 train-acc: 0.859375 test-loss:0.409889 test-acc : 0.853333 
Epoch 9/10 - Batch 800/843 - train-loss: 0.438167 train-acc: 0.82812

0,1
train_acc,▁▅▃▃▆▃▆▃▅▂▂▄▃█▃▇▅▆▃▃▃▃▆▂▅▂▇▃▅▆▃▄▇▂▃▂▃▅▃▂
train_loss,█▆▅▆█▁▅▃▂▆▅▅▁▅▃▆█▅▅▁▅▇▅▅▅▂▆▂▃▆▅▃▂▆▃▇▁▂▃▁
val_acc,▁▅▆▇▇█▇▇▇█▇▇▇██▇▇▆▇▇▆▆▇▇▇▇█▇▇█▇▇██▇███▇█
val_loss,█▄▄▃▂▁▂▂▁▃▂▁▃▂▃▂▁▃▁▂▂▂▁▂▁▂▁▁▂▂▂▁▁▂▂▁▁▂▂▁

0,1
train_acc,0.82812
train_loss,0.43765
val_acc,0.8555
val_loss,0.40635


[34m[1mwandb[0m: Agent Starting Run: no10hyno with config:
[34m[1mwandb[0m: 	activation_function: ReLU
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layer_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_of_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_initialization: Xavier


Starting training with run name: hl_5_bs_16_ac_ReLU
Epoch 1/10 - Batch 3300/3375 - train-loss: 0.733194 train-acc: 0.812500 test-loss:0.533488 test-acc : 0.810667 
Epoch 2/10 - Batch 3300/3375 - train-loss: 0.720832 train-acc: 0.750000 test-loss:0.465561 test-acc : 0.832667 
Epoch 3/10 - Batch 3300/3375 - train-loss: 0.614617 train-acc: 0.812500 test-loss:0.435430 test-acc : 0.844667 
Epoch 4/10 - Batch 3300/3375 - train-loss: 0.583512 train-acc: 0.812500 test-loss:0.420452 test-acc : 0.850333 
Epoch 5/10 - Batch 3300/3375 - train-loss: 0.566065 train-acc: 0.812500 test-loss:0.406387 test-acc : 0.856167 
Epoch 6/10 - Batch 3300/3375 - train-loss: 0.541091 train-acc: 0.812500 test-loss:0.398145 test-acc : 0.855000 
Epoch 7/10 - Batch 3300/3375 - train-loss: 0.535813 train-acc: 0.812500 test-loss:0.389518 test-acc : 0.857000 
Epoch 8/10 - Batch 3300/3375 - train-loss: 0.549111 train-acc: 0.812500 test-loss:0.383347 test-acc : 0.859833 
Epoch 9/10 - Batch 3300/3375 - train-loss: 0.537301 

0,1
train_acc,▆▅▃▆█▅▆▆▆▃▆▃▁▆█▆▆█▆██▅▅▆▅▆███▃▆▆▃█▁▆▅▅▆█
train_loss,▆▆▄▃▆▄█▂▅▂▃▄▃▃▃▂▃▄▂▂▇▂▂▂▁▇▂▃▃▃▁▂▃▄▂▁▂▂▂▃
val_acc,▁▆▆▆▆▇▇▇▇▇▇█▇▇▇████▇█▇██████████████████
val_loss,██▅▄▄▃▄▃▄▃▃▄▃▂▃▂▂▂▃▂▂▃▃▂▂▃▂▂▂▂▁▁▁▁▂▁▂▁▂▁

0,1
train_acc,0.8125
train_loss,0.52439
val_acc,0.86317
val_loss,0.37663


In [11]:
wandb.finish()

In [12]:
nn = Neural_Net(784,4,[32,32,32,32],'ReLU',10,'Xavier',0.005)
nn.train('adam',10,0.001,32,train_data,train_label,val_data,val_label)

Epoch 1/10 - Batch 1600/1687 - train-loss: 0.489906 train-acc: 0.781250 test-loss:0.484841 test-acc : 0.820000 
Epoch 2/10 - Batch 400/1687 - train-loss: 0.436420 train-acc: 0.843750 test-loss:0.430519 test-acc : 0.839500 

KeyboardInterrupt: 

In [10]:
nn

<__main__.Neural_Net at 0x7bedde5efb90>