In [None]:
!pip install wandb



In [None]:
import wandb
wandb.login()

True

In [None]:
wandb.init(project="FeedForwardNeuralNetworks", entity="codefreaksubham")

sweep_config = {
    "method":"random"
}
metric = {
    "name" : "val_accuracy",
    "goal" : "maximize"
}

sweep_config['metric']=metric

parameter_dict = {
      "epochs" : {
      "values" : [5, 10]
      },
      "learning_rate" : {
      "values" : [1e-3, 1e-4]
      },
      "h_layers" : {
      "values" : [3, 4, 5]
      },
      "neurons" : {
      "values" : [32, 64, 128]
      },
      "optimizer" : {
      "values" : ['sgd', 'mgd', 'nesterov', 'rmsprop', 'adam', 'nadam']  
      },
      "batch_size" : {
      "values" : [16, 32, 64]
      },
      "activation" : {
      "values" : ['sigmoid', 'tanh', 'relu']
      }
  } 

sweep_config['parameters']=parameter_dict

import pprint

pprint.pprint(sweep_config)
sweep_id = wandb.sweep(sweep_config)



{'method': 'random',
 'metric': {'goal': 'maximize', 'name': 'val_accuracy'},
 'parameters': {'activation': {'values': ['sigmoid', 'tanh', 'relu']},
                'batch_size': {'values': [16, 32, 64]},
                'epochs': {'values': [5, 10]},
                'h_layers': {'values': [3, 4, 5]},
                'learning_rate': {'values': [0.001, 0.0001]},
                'neurons': {'values': [32, 64, 128]},
                'optimizer': {'values': ['sgd',
                                         'mgd',
                                         'nesterov',
                                         'rmsprop',
                                         'adam',
                                         'nadam']}}}
Create sweep with ID: luoyygwa
Sweep URL: https://wandb.ai/codefreaksubham/uncategorized/sweeps/luoyygwa


In [None]:
import numpy as np
import matplotlib.pyplot as plt
import math
from sklearn.model_selection import train_test_split

###Activation Functions

In [None]:
def sigmoid(x):
  return 1/(1 + np.exp(-x))

def d_sigmoid(x):
  return (1 - sigmoid(x)) * sigmoid(x)

def tanh(x):
  return np.tanh(x)

def d_tanh(x):
    return 1 - np.square(np.tanh(x))

def relu(x):
  return np.where(np.asarray(x) > 0, x, 0)

def d_relu(x):
    return np.where(x <= 0, 0, 1)

def softmax(x):
    e_x = np.exp(x)
    return e_x/e_x.sum()

###Loss Function:

In [None]:
#cross-entropy
def cross_entropy_loss(y, y_hat, i):
  return -np.log(y_hat[y[i]][0])

#squared-error
def squared_error(y, y_hat, i):
  y_hat[y[i]][0] = (1-y_hat[y[i]])**2
  return np.sum(np.square(y_hat))

In [None]:
def cost(y, y_hat, i):
  
  m = y.shape[0]
  c = (1/m) * np.sum(cross_entropy_loss(y, y_hat))
  c = np.squeeze(c) 

  return c

###Layer Class : parameters initialization for each layer

In [None]:
class Layer:

    activationFunc = {
        'tanh': (tanh, d_tanh),
        'sigmoid': (sigmoid, d_sigmoid),
        'relu' : (relu, d_relu),
        'softmax' : (softmax, None)
    }

    def __init__(self, inputs, neurons, activation):
        np.random.seed(33)
        sd = np.sqrt(2 / float(inputs + neurons))
        self.W = np.random.normal(0, sd, size=(neurons, inputs))  #np.random.randn(neurons, inputs)
        self.b = np.zeros((neurons, 1))
        self.act, self.d_act = self.activationFunc.get(activation)
        self.dW = 0
        self.db = 0

### Forward propagation

In [None]:
def forward_propagation(h, layers):
  m = len(layers)
  
  layers[0].a = np.dot(layers[0].W, h)
  layers[0].h = layers[0].act(layers[0].a)
  #print(layers[0].h.shape)
  
  for j in range(1, m-1):
    layers[j].a = np.dot(layers[j].W, layers[j-1].h)
    layers[j].h = layers[j].act(layers[j].a)
    #print(layers[j].h.shape)

  j+=1
  layers[j].a = np.dot(layers[j].W, layers[j-1].h)
  layers[j].h = softmax(layers[j].a)
  #print(layers[j].h.shape)

  return layers[m-1].h

###Backward_propagation

In [None]:
def backward_propagation(l, y_hat, layers, inp):
  
  #one-hot vector
  e_l = np.zeros((y_hat.shape[0], 1))
  e_l[l] = 1
  
  layers[len(layers)-1].da = -(e_l - y_hat)                 #gradient w.r.t activation of last layer (a_L)
  
  for j in range(len(layers)-1, 0, -1):
                        
    layers[j].dW += np.dot(layers[j].da, (layers[j-1].h).T)
    layers[j].db += layers[j].da

    layers[j-1].dh = np.dot((layers[j].W).T, layers[j].da)
    layers[j-1].da = np.multiply(layers[j-1].dh, layers[j-1].d_act(layers[j-1].a))

  layers[0].dW += np.dot(layers[0].da, inp.T)
  layers[0].db += layers[0].da

  return layers


###Gradient Descent

In [None]:
def update_params(learning_rate, layers, batch_size):
  for layer in layers:
    layer.W = layer.W - learning_rate * layer.dW/batch_size
    layer.b = layer.b - learning_rate * layer.db/batch_size

    layer.dW = 0
    layer.db = 0

###SGD / Batch Gradient Descent

In [None]:
def sgd(epochs, layers, learning_rate, x_train, y_train, batch_size):
    
    m = x_train.shape[0]
    costs = []

    for epoch in range(epochs):

      cost = 0

      for i in range(m):

        inp = x_train[i].reshape(784, 1)
        
        # Feedforward
        h = inp
        h = forward_propagation(h, layers)

        # Calulate cost to plot graph
        cost += cross_entropy_loss(y_train, h, i)
        #cost += cross_entropy_loss(y_train, h, i)

        # Backpropagation
        backward_propagation(y_train[i], h, layers, x_train[i].reshape(784, 1))

        #stocastic gradient decent
        if (i+1) % batch_size == 0:
          update_params(learning_rate, layers, batch_size)

      costs.append(cost/m)
      wandb.log({"epoch": epoch, "cost": cost/m})

      print("Cost after epoch " + str(epoch) + " :", cost/m)

    return costs, layers

###Momentum Gradient descent

In [None]:
def mgd(epochs, layers, learning_rate, x_train, y_train, batch_size):

    gamma = 0.9
    m = x_train.shape[0]
    costs = []

    for epoch in range(epochs):

      for layer in layers:
        layer.update_W = 0
        layer.update_b = 0

      cost = 0

      for i in range(m):

        inp = x_train[i].reshape(784, 1)
        
        # Feedforward
        h = inp
        h = forward_propagation(h, layers)

        # Calulate cost to plot graph
        cost += cross_entropy_loss(y_train, h, i)

        # Backpropagation
        backward_propagation(y_train[i], h, layers, x_train[i].reshape(784, 1))

        #momentum gradient decent
        if (i+1) % batch_size == 0:
          for layer in layers:

            layer.update_W = gamma*layer.update_W + learning_rate*layer.dW/batch_size
            layer.update_b = gamma*layer.update_b + learning_rate*layer.dW/batch_size

            layer.W = layer.W - layer.update_W
            layer.b = layer.b - layer.update_b

            layer.dW = 0
            layer.db = 0

            layer.update_W = 0
            layer.update_b = 0


      costs.append(cost/m)
      wandb.log({"epoch": epoch, "cost": cost/m})

      print("Cost after epoch " + str(epoch) + " :", cost/m)

    return costs, layers

###Nesterov Gradient Descent

In [None]:
def nesterov(epochs, layers, learning_rate, x_train, y_train, batch_size):

    gamma = 0.9
    m = x_train.shape[0]
    costs = []

    for epoch in range(epochs):

      for layer in layers:
        layer.update_W = 0
        layer.update_b = 0

      cost = 0

      for i in range(m):

        inp = x_train[i].reshape(784, 1)
        
        # Feedforward
        h = inp
        h = forward_propagation(h, layers)

        # Calulate cost to plot graph
        cost += cross_entropy_loss(y_train, h, i)

        #calculate W_lookaheads
        if (i+1) % batch_size == 0:
          for layer in layers:
            layer.W = layer.W - gamma * layer.update_W
            layer.b = layer.b - gamma * layer.update_b

        # Backpropagation
        backward_propagation(y_train[i], h, layers, x_train[i].reshape(784, 1))

        #nesterov gradient decent
        if (i+1) % batch_size == 0:
          for layer in layers:

            layer.update_W = gamma*layer.update_W + learning_rate*layer.dW/batch_size
            layer.update_b = gamma*layer.update_b + learning_rate*layer.dW/batch_size

            layer.W = layer.W - layer.update_W
            layer.b = layer.b - layer.update_b

            layer.dW = 0
            layer.db = 0

            layer.update_W = 0
            layer.update_b = 0

      costs.append(cost/m)
      wandb.log({"epoch": epoch, "cost": cost/m})
      print("Cost after epoch " + str(epoch) + " :", cost/m)
      
    return costs, layers  

###RMSProp

In [None]:
def rmsprop(epochs, layers, learning_rate, x_train, y_train, batch_size):

    epsilon, beta = 1e-8, 0.9
    m = x_train.shape[0]
    costs = []

    for epoch in range(epochs):

      for layer in layers:
        layer.update_W = 0
        layer.update_b = 0

      cost = 0

      for i in range(m):

        inp = x_train[i].reshape(784, 1)
        
        # Feedforward
        h = inp
        h = forward_propagation(h, layers)

        # Calulate cost to plot graph
        cost += cross_entropy_loss(y_train, h, i)

        # Backpropagation
        backward_propagation(y_train[i], h, layers, x_train[i].reshape(784, 1))

        #rmsprop gradient decent
        if (i+1) % batch_size == 0:
          for layer in layers:

            layer.update_W = beta*layer.update_W + (1-beta)*(layer.dW/batch_size)**2
            layer.update_b = beta*layer.update_b + (1-beta)*(layer.db/batch_size)**2

            layer.W = layer.W - (learning_rate / np.sqrt(layer.update_W + epsilon)) * (layer.dW/batch_size)
            layer.b = layer.b - (learning_rate / np.sqrt(layer.update_b + epsilon)) * (layer.db/batch_size)

            layer.dW = 0
            layer.db = 0

            layer.update_W = 0
            layer.update_b = 0


      costs.append(cost/m)
      wandb.log({"epoch": epoch, "cost": cost/m})

      print("Cost after epoch " + str(epoch) + " :", cost/m)

    return costs, layers

###Adam

In [None]:
def adam(epochs, layers, learning_rate, x_train, y_train, batch_size):

    epsilon, beta1, beta2 = 1e-8, 0.9, 0.99
    t = 0
    
    m = x_train.shape[0]
    costs = []

    for epoch in range(epochs):

      for layer in layers:
        layer.m_W, layer.m_b, layer.v_W, layer.v_b, layer.m_W_hat, layer.m_b_hat, layer.v_W_hat, layer.v_b_hat = 0, 0, 0, 0, 0, 0, 0, 0

      cost = 0

      for i in range(m):

        inp = x_train[i].reshape(784, 1)
        
        # Feedforward
        h = inp
        h = forward_propagation(h, layers)

        # Calulate cost to plot graph
        cost += cross_entropy_loss(y_train, h, i)

        # Backpropagation
        backward_propagation(y_train[i], h, layers, x_train[i].reshape(784, 1))

        #adam gradient decent
        if (i+1) % batch_size == 0:
          t+=1

          for layer in layers:

            layer.m_W = beta1 * layer.m_W + (1-beta1)*layer.dW/batch_size
            layer.m_b = beta1 * layer.m_b + (1-beta1)*layer.db/batch_size

            layer.v_W = beta2 * layer.v_W + (1-beta2)*((layer.dW/batch_size))**2
            layer.v_b = beta2 * layer.v_b + (1-beta2)*((layer.db/batch_size))**2

            layer.m_W_hat = layer.m_W/(1-math.pow(beta1, t))
            layer.m_b_hat = layer.m_b/(1-math.pow(beta1, t))

            layer.v_W_hat = layer.v_W/(1-math.pow(beta2, t))
            layer.v_b_hat = layer.v_b/(1-math.pow(beta2, t))

            layer.W = layer.W - (learning_rate/np.sqrt(layer.v_W_hat + epsilon))*layer.m_W_hat
            layer.b = layer.b - (learning_rate/np.sqrt(layer.v_b_hat + epsilon))*layer.m_b_hat

            layer.dW = 0
            layer.db = 0

            layer.m_W, layer.m_b, layer.v_W, layer.v_b, layer.m_W_hat, layer.m_b_hat, layer.v_W_hat, layer.v_b_hat = 0, 0, 0, 0, 0, 0, 0, 0


      costs.append(cost/m)
      wandb.log({"epoch": epoch, "cost": cost/m})

      print("Cost after epoch " + str(epoch) + " :", cost/m)

    return costs, layers

###NAdam

In [None]:
def nadam(epochs, layers, learning_rate, x_train, y_train, batch_size):

    epsilon, beta1, beta2 = 1e-8, 0.9, 0.99
    gamma = 0.9
    t = 0
    
    m = x_train.shape[0]
    costs = []

    for epoch in range(epochs):

      for layer in layers:
        layer.m_W, layer.m_b, layer.v_W, layer.v_b, layer.m_W_hat, layer.m_b_hat, layer.v_W_hat, layer.v_b_hat = 0, 0, 0, 0, 0, 0, 0, 0
        layer.update_W = 0
        layer.update_b = 0

      cost = 0

      for i in range(m):

        inp = x_train[i].reshape(784, 1)
        
        # Feedforward
        h = inp
        h = forward_propagation(h, layers)

        # Calulate cost to plot graph
        cost += cross_entropy_loss(y_train, h, i)
        #cost += squared_error(y_train, h, i)

        #calculate W_lookaheads
        if (i+1) % batch_size == 0:
          for layer in layers:
            layer.W = layer.W - gamma * layer.m_W
            layer.b = layer.b - gamma * layer.m_b
        
        # Backpropagation
        backward_propagation(y_train[i], h, layers, x_train[i].reshape(784, 1))

        #adam gradient decent
        if (i+1) % batch_size == 0:
          t+=1

          for layer in layers:

            layer.m_W = beta1 * layer.m_W + (1-beta1)*layer.dW/batch_size
            layer.m_b = beta1 * layer.m_b + (1-beta1)*layer.db/batch_size

            layer.v_W = beta2 * layer.v_W + (1-beta2)*((layer.dW/batch_size))**2
            layer.v_b = beta2 * layer.v_b + (1-beta2)*((layer.db/batch_size))**2

            layer.m_W_hat = layer.m_W/(1-math.pow(beta1, t))
            layer.m_b_hat = layer.m_b/(1-math.pow(beta1, t))

            layer.v_W_hat = layer.v_W/(1-math.pow(beta2, t))
            layer.v_b_hat = layer.v_b/(1-math.pow(beta2, t))

            layer.m_dash_W = beta1 * layer.m_W_hat + (1-beta1)*layer.dW/batch_size
            layer.m_dash_b = beta1 * layer.m_b_hat + (1-beta1)*layer.db/batch_size

            layer.W = layer.W - (learning_rate/np.sqrt(layer.v_W_hat + epsilon))*layer.m_dash_W
            layer.b = layer.b - (learning_rate/np.sqrt(layer.v_b_hat + epsilon))*layer.m_dash_b

            layer.dW = 0
            layer.db = 0

            layer.m_W, layer.m_b, layer.v_W, layer.v_b, layer.m_W_hat, layer.m_b_hat, layer.v_W_hat, layer.v_b_hat = 0, 0, 0, 0, 0, 0, 0, 0


      costs.append(cost/m)
      wandb.log({"epoch": epoch, "cost": cost/m})
      print("Cost after epoch " + str(epoch) + " :", cost/m)

    return costs, layers

###Putting all togather:

###Optimizer

In [None]:
def optimizer(layers, optimizer, epochs, learning_rate, x_train, y_train, batch_size):
  
  if optimizer == "sgd":
    return sgd(epochs, layers, learning_rate, x_train, y_train, batch_size)
  elif optimizer == "mgd":
    return mgd(epochs, layers, learning_rate, x_train, y_train, batch_size)
  elif optimizer == "nesterov":
    return nesterov(epochs, layers, learning_rate, x_train, y_train, batch_size)
  elif optimizer == "rmsprop":
    return rmsprop(epochs, layers, learning_rate, x_train, y_train, batch_size)
  elif optimizer == "adam":
    return adam(epochs, layers, learning_rate, x_train, y_train, batch_size)
  elif optimizer == "nadam":
    return nadam(epochs, layers, learning_rate, x_train, y_train, batch_size)
  else:
    print("No optimization algorithm named "+optimizer+" found")
    return "Error", "Error"

###Function to Predict

In [None]:
def predict(input, y, layers):
 
  prediction = forward_propagation(input, layers)

  loss = 0
  for i in range(len(y)):
    loss += squared_error(y, prediction[:, i].reshape(10,1), i) 

  prediction = prediction.argmax(axis=0)
  accuracy =  np.sum(prediction == y)/y.shape[0]

  return prediction, accuracy, loss/len(y)

###Import dataset and putting in appropriate format

In [None]:
from keras.datasets import fashion_mnist
(x_train_org, y_train_org), (x_test_org, y_test_org) = fashion_mnist.load_data()

In [None]:
print("x_train shape: ", x_train_org.shape)
print("y_train shape: ", y_train_org.shape)

x_train shape:  (60000, 28, 28)
y_train shape:  (60000,)


In [None]:
'''first_image = x_train_org[0]
#first_image = np.array(first_image, dtype='float')
#pixels = first_image.reshape((28, 28))
i = True
while(i):
  if
  plt.imshow(x_train_org[i], cmap='gray')
  plt.title("class: "+ str(y_train_org[i]))
  plt.show()'''

'first_image = x_train_org[0]\n#first_image = np.array(first_image, dtype=\'float\')\n#pixels = first_image.reshape((28, 28))\ni = True\nwhile(i):\n  if\n  plt.imshow(x_train_org[i], cmap=\'gray\')\n  plt.title("class: "+ str(y_train_org[i]))\n  plt.show()'

####Flattening the data

In [None]:
x_train_temp = x_train_org.reshape(x_train_org.shape[0], -1)
y_train_temp = y_train_org
x_test = x_test_org.reshape(x_test_org.shape[0], -1)
y_test = y_test_org

####Splliting dataset into training and validation

In [None]:
x_train, x_val, y_train, y_val = train_test_split(x_train_temp, y_train_temp, test_size=0.1, random_state=33)

In [None]:
print("x_train shape: ", x_train.shape)
print("y_train shape: ", y_train.shape)
print("x_val shape: ", x_val.shape)
print("y_val shape: ", y_val.shape)
print("x_test shape: ", x_test.shape)
print("y_test shape: ", y_test.shape)

x_train shape:  (54000, 784)
y_train shape:  (54000,)
x_val shape:  (6000, 784)
y_val shape:  (6000,)
x_test shape:  (10000, 784)
y_test shape:  (10000,)


###Train Model

In [None]:
def model_train(config=None):

  with wandb.init(config=config):
    config=wandb.config

    layers= [Layer(x_train.shape[1], config.neurons, config.activation)]
    for _ in range(0, config.h_layers-1):
      layers.append(Layer(config.neurons, config.neurons, config.activation))
    layers.append(Layer(config.neurons, 10, 'softmax'))

    costs, layers = optimizer(layers, config.optimizer, config.epochs, config.learning_rate, x_train, y_train, config.batch_size)

    output_val, accuracy_val, loss_val = predict(x_val.T, y_val, layers)
    output_test, accuracy_test, loss_test = predict(x_test.T, y_test, layers)

    wandb.log({"val_accuracy": accuracy_val, "accuracy": accuracy_test, "val_loss": loss_val})

    print("Validation accuracy: ", accuracy_val)
    print("Validation loss: ", loss_val)
    print("Test accuracy: ", accuracy_test)
    print("Test loss: ", loss_test)

In [None]:
wandb.agent(sweep_id, model_train, count=10)

[34m[1mwandb[0m: Agent Starting Run: xdylv5g8 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	h_layers: 3
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: nadam


Cost after epoch 0 : 0.9373404389572725
Cost after epoch 1 : 0.7591783485145288
Cost after epoch 2 : 0.7052616899867044
Cost after epoch 3 : 0.6907172335205498
Cost after epoch 4 : 0.6731189614060648
Cost after epoch 5 : 0.6606826817401243
Cost after epoch 6 : 0.6588365795415856
Cost after epoch 7 : 0.6517837551711405
Cost after epoch 8 : 0.6360489622597326
Cost after epoch 9 : 0.6408383207524596
Validation accuracy:  1.0
Validation loss:  0.9994367423386248
Test accuracy:  1.0
Test loss:  0.9996587623597004


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
accuracy,▁
cost,█▄▃▂▂▂▂▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
val_accuracy,▁
val_loss,▁

0,1
accuracy,1.0
cost,0.64084
epoch,9.0
val_accuracy,1.0
val_loss,0.99944


[34m[1mwandb[0m: Agent Starting Run: 1nrvnp2f with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	h_layers: 3
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: nesterov


Cost after epoch 0 : 3.909976180274886
Cost after epoch 1 : 1.5100300050803934
Cost after epoch 2 : 1.1668798513351024
Cost after epoch 3 : 1.0115909029464885
Cost after epoch 4 : 0.9164232047074226
Cost after epoch 5 : 0.8527682430527619
Cost after epoch 6 : 0.8072981446068034
Cost after epoch 7 : 0.7728963208024872
Cost after epoch 8 : 0.7455977608512045
Cost after epoch 9 : 0.723723489912425
Validation accuracy:  1.0
Validation loss:  0.9997282237285496
Test accuracy:  1.0
Test loss:  0.9997231351509841


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
accuracy,▁
cost,█▃▂▂▁▁▁▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
val_accuracy,▁
val_loss,▁

0,1
accuracy,1.0
cost,0.72372
epoch,9.0
val_accuracy,1.0
val_loss,0.99973


[34m[1mwandb[0m: Agent Starting Run: dsx4kvbs with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	h_layers: 5
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: sgd


  


Cost after epoch 0 : 2.406870007552771
Cost after epoch 1 : 2.3122529473067197
Cost after epoch 2 : 2.3031250418013447
Cost after epoch 3 : 2.302416159847361
Cost after epoch 4 : 2.302335238241831
Cost after epoch 5 : 2.302294502478119
Cost after epoch 6 : 2.3022569406116333
Cost after epoch 7 : 2.3022189097168684
Cost after epoch 8 : 2.302181231188797
Cost after epoch 9 : 2.302143102116708
Validation accuracy:  1.0
Validation loss:  0.9999333033596162
Test accuracy:  1.0
Test loss:  0.9999599790957194


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
accuracy,▁
cost,█▂▁▁▁▁▁▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
val_accuracy,▁
val_loss,▁

0,1
accuracy,1.0
cost,2.30214
epoch,9.0
val_accuracy,1.0
val_loss,0.99993


[34m[1mwandb[0m: Agent Starting Run: prasywq5 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	h_layers: 4
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: nesterov


Cost after epoch 0 : 1.6298172333992014
Cost after epoch 1 : 0.7593359490073542
Cost after epoch 2 : 0.6603336566631515
Cost after epoch 3 : 0.6058637843945853
Cost after epoch 4 : 0.5696789829206338
Validation accuracy:  1.0
Validation loss:  0.9998277716640902
Test accuracy:  1.0
Test loss:  0.9998062286802238


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
accuracy,▁
cost,█▂▂▁▁
epoch,▁▃▅▆█
val_accuracy,▁
val_loss,▁

0,1
accuracy,1.0
cost,0.56968
epoch,4.0
val_accuracy,1.0
val_loss,0.99983


[34m[1mwandb[0m: Agent Starting Run: fj7yhdb0 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	h_layers: 3
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: rmsprop


Cost after epoch 0 : 2.9068294403719968
Cost after epoch 1 : 0.9245851632769071
Cost after epoch 2 : 0.8791246215730693
Cost after epoch 3 : 0.8399862954506294
Cost after epoch 4 : 0.8258072965922194
Validation accuracy:  1.0
Validation loss:  0.9997523618095739
Test accuracy:  1.0
Test loss:  0.9998456911756257


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
accuracy,▁
cost,█▁▁▁▁
epoch,▁▃▅▆█
val_accuracy,▁
val_loss,▁

0,1
accuracy,1.0
cost,0.82581
epoch,4.0
val_accuracy,1.0
val_loss,0.99975


[34m[1mwandb[0m: Agent Starting Run: vs590wt5 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	h_layers: 3
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimizer: rmsprop


Cost after epoch 0 : 0.7207013197423098
Cost after epoch 1 : 0.5465664024391468
Cost after epoch 2 : 0.5174065900434192
Cost after epoch 3 : 0.4960085271537366
Cost after epoch 4 : 0.4776973780107413
Cost after epoch 5 : 0.4726786186785695
Cost after epoch 6 : 0.4704219122087476
Cost after epoch 7 : 0.4664267497944262
Cost after epoch 8 : 0.4576046387155233
Cost after epoch 9 : 0.4582053972051198
Validation accuracy:  1.0
Validation loss:  0.9993686721373086
Test accuracy:  1.0
Test loss:  0.9996182043952072


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
accuracy,▁
cost,█▃▃▂▂▁▁▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
val_accuracy,▁
val_loss,▁

0,1
accuracy,1.0
cost,0.45821
epoch,9.0
val_accuracy,1.0
val_loss,0.99937


[34m[1mwandb[0m: Agent Starting Run: jbvyxf48 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	h_layers: 3
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimizer: rmsprop


Cost after epoch 0 : 3.257238694272944




Cost after epoch 1 : nan
Cost after epoch 2 : nan
Cost after epoch 3 : nan
Cost after epoch 4 : nan
Cost after epoch 5 : nan
Cost after epoch 6 : nan
Cost after epoch 7 : nan
Cost after epoch 8 : nan
Cost after epoch 9 : nan
Validation accuracy:  0.103
Validation loss:  nan
Test accuracy:  0.1
Test loss:  nan


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
accuracy,▁
cost,▁
epoch,▁▂▃▃▄▅▆▆▇█
val_accuracy,▁
val_loss,

0,1
accuracy,0.1
cost,
epoch,9.0
val_accuracy,0.103
val_loss,


[34m[1mwandb[0m: Agent Starting Run: uus1q556 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	h_layers: 5
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: adam


Cost after epoch 0 : 1.411970952909726
Cost after epoch 1 : 0.8830146627463911
Cost after epoch 2 : 0.7785773557754683
Cost after epoch 3 : 0.7231803901201507
Cost after epoch 4 : 0.6920703337820974
Validation accuracy:  1.0
Validation loss:  0.9994218691747633
Test accuracy:  1.0
Test loss:  0.9996521439501943


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
accuracy,▁
cost,█▃▂▁▁
epoch,▁▃▅▆█
val_accuracy,▁
val_loss,▁

0,1
accuracy,1.0
cost,0.69207
epoch,4.0
val_accuracy,1.0
val_loss,0.99942


[34m[1mwandb[0m: Agent Starting Run: ntt6t3co with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	h_layers: 3
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: nadam


Cost after epoch 0 : 1.1200337958456004
Cost after epoch 1 : 0.753013575663645
Cost after epoch 2 : 0.7060478619298043
Cost after epoch 3 : 0.6759003878309224
Cost after epoch 4 : 0.6514761517826908
Cost after epoch 5 : 0.6311861134346957
Cost after epoch 6 : 0.6259159405199693
Cost after epoch 7 : 0.6139302647678063
Cost after epoch 8 : 0.6063066704545371
Cost after epoch 9 : 0.5972166231196262
Validation accuracy:  1.0
Validation loss:  0.9993976124654405
Test accuracy:  1.0
Test loss:  0.9996378621339287


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
accuracy,▁
cost,█▃▂▂▂▁▁▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
val_accuracy,▁
val_loss,▁

0,1
accuracy,1.0
cost,0.59722
epoch,9.0
val_accuracy,1.0
val_loss,0.9994


[34m[1mwandb[0m: Agent Starting Run: gxah2n8h with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	h_layers: 4
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimizer: nesterov


Cost after epoch 0 : 2.4629102363053357
Cost after epoch 1 : 2.352051631775484
Cost after epoch 2 : 2.3170729267527634
Cost after epoch 3 : 2.30559028084735
Cost after epoch 4 : 2.3019844526553923
Cost after epoch 5 : 2.300783490818018
Cost after epoch 6 : 2.300240100013249
Cost after epoch 7 : 2.29987215068376
Cost after epoch 8 : 2.299551123376818
Cost after epoch 9 : 2.299254329726428
Validation accuracy:  1.0
Validation loss:  0.999933109205317
Test accuracy:  1.0
Test loss:  0.99995985979564


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
accuracy,▁
cost,█▃▂▁▁▁▁▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
val_accuracy,▁
val_loss,▁

0,1
accuracy,1.0
cost,2.29925
epoch,9.0
val_accuracy,1.0
val_loss,0.99993


In [None]:
wandb.finish()

###Predictions and accuracy using validation data and test data

In [None]:
'''output_val, accuracy_val, loss_val = predict(x_val.T, y_val, layers)
output_test, accuracy_test, loss_test = predict(x_test.T, y_test, layers)
print("Validation accuracy: ", accuracy_val)
print("Validation loss: ", loss_val)
print("Test accuracy: ", accuracy_test)
print("Test loss: ", loss_test)'''

'output_val, accuracy_val, loss_val = predict(x_val.T, y_val, layers)\noutput_test, accuracy_test, loss_test = predict(x_test.T, y_test, layers)\nprint("Validation accuracy: ", accuracy_val)\nprint("Validation loss: ", loss_val)\nprint("Test accuracy: ", accuracy_test)\nprint("Test loss: ", loss_test)'