In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import wandb

from utils.neural_network import NeuralNetwork
from utils.wandb_classes import WandbTrainer
from utils.helper_functions import get_optimizer, load_data

In [2]:
wandb.login()

wandb: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
wandb: Currently logged in as: bullseye2608 (bullseye2608-indian-institute-of-technology-madras) to https://api.wandb.ai. Use `wandb login --relogin` to force relogin


True

In [3]:
X_train, y_train, X_val, y_val, X_test, y_test = load_data('fashion_mnist')

### Trials

In [4]:
nn = NeuralNetwork(layer_sizes=[784, 128, 128, 128, 10], 
                   activation_functions=['relu', 'relu', 'relu', 'softmax'], 
                   weight_init='xavier',
                   weight_decay=0.0, LOG_EACH=True)

H, A = nn.forward_propagation(X_train)
loss = nn.compute_loss(H[-1], y_train)
print(f'{nn.compute_accuracy(X_val, y_val) :>.6f}')

nn.set_optimizer({'name':'nadam', 'learning_rate':0.001, 'epsilon':1e-7, 'beta1':0.95})

0.125000


In [5]:
num_trial_datapoints = 54000

LOG_EACH = True

nn.train(X_train[:num_trial_datapoints], 
         y_train[:num_trial_datapoints], 
         X_val, y_val, 
         batch_size=64, 
         num_epochs=5, 
         loss_type='cross_entropy', 
         log_every=5000)

LOG_EACH = False

print('--'*20,'DONE','--'*20)
print(nn.compute_accuracy(X_test, y_test))

Running NadamOptimizer self.learning_rate = 0.001 self.beta1 = 0.95 self.beta2 = 0.999 self.epsilon = 1e-07
Epoch 1/5, Iteration   0/844 --> Train Loss: 2.26874, Val Loss: 2.17015
---------------------------------------- DONE ----------------------------------------
0.8787


### WANDB SWEEP

In [6]:
# # Create New sweep
# import yaml

# with open("sweep_config.yaml", "r") as file:
#         sweep_config = yaml.safe_load(file)

# sweep_id = wandb.sweep(sweep_config, 
#                        entity="bullseye2608-indian-institute-of-technology-madras",
#                        project="fashion_mnist_hp_search")

# # Run the sweep
# wandb.agent(sweep_id, wandb_sweep_helper_function, count=60)

In [None]:
# Continue the sweep

sweep_id_cont = "bullseye2608-indian-institute-of-technology-madras/fashion_mnist_hp_search/vhbqpquu"
trainer = WandbTrainer()

wandb.agent(sweep_id_cont, trainer.train, count=10)

wandb: Agent Starting Run: yvvmjcpv with config:
wandb: 	activation: relu
wandb: 	batch_size: 32
wandb: 	epochs: 10
wandb: 	hidden_layers: 3
wandb: 	hidden_size: 64
wandb: 	learning_rate: 0.001
wandb: 	optimizer: nadam
wandb: 	weight_decay: 0.0005
wandb: 	weight_init: xavier


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
train_accuracy,▁▄▅▆▆▆▆▇▇█
train_loss,▇▅▆▅▆▃▃▁▁█
val_accuracy,▁▄▆▆▅▅▆█▇█
val_loss,█▅▃▃▃▄▃▁▂▁

0,1
epoch,9.0
test_accuracy,0.8725
train_accuracy,0.90143
train_loss,0.50946
val_accuracy,0.881
val_loss,0.31738


wandb: Agent Starting Run: pe2wvocf with config:
wandb: 	activation: tanh
wandb: 	batch_size: 16
wandb: 	epochs: 10
wandb: 	hidden_layers: 5
wandb: 	hidden_size: 128
wandb: 	learning_rate: 0.0001
wandb: 	optimizer: nadam
wandb: 	weight_decay: 0.0005
wandb: 	weight_init: xavier


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
train_accuracy,▁▄▄▅▆▇▇█▇█
train_loss,▂▂▁▅█▅▁▄▂▂
val_accuracy,▁▄▄▅▆▆▆█▇▇
val_loss,█▅▅▄▃▂▂▁▂▁

0,1
epoch,9.0
test_accuracy,0.8716
train_accuracy,0.8977
train_loss,0.23107
val_accuracy,0.8795
val_loss,0.3237


wandb: Sweep Agent: Waiting for job.
wandb: Job received.
wandb: Agent Starting Run: c4dk9nfi with config:
wandb: 	activation: tanh
wandb: 	batch_size: 32
wandb: 	epochs: 10
wandb: 	hidden_layers: 3
wandb: 	hidden_size: 128
wandb: 	learning_rate: 0.001
wandb: 	optimizer: nadam
wandb: 	weight_decay: 0
wandb: 	weight_init: xavier


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
train_accuracy,▁▄▄▅▆▆▇▇▇█
train_loss,▃█▅▃▃▃▆▄▁▂
val_accuracy,▁▅▅▆█▇▇█▇▇
val_loss,█▅▄▃▁▂▂▂▂▂

0,1
epoch,9.0
test_accuracy,0.8804
train_accuracy,0.91787
train_loss,0.25623
val_accuracy,0.88333
val_loss,0.32092


wandb: Agent Starting Run: o6xddtkw with config:
wandb: 	activation: tanh
wandb: 	batch_size: 16
wandb: 	epochs: 5
wandb: 	hidden_layers: 4
wandb: 	hidden_size: 128
wandb: 	learning_rate: 0.0001
wandb: 	optimizer: nadam
wandb: 	weight_decay: 0.0005
wandb: 	weight_init: xavier


0,1
epoch,▁▃▅▆█
test_accuracy,▁
train_accuracy,▁▅▆██
train_loss,▆▆▁▃█
val_accuracy,▁▇▄█▇
val_loss,█▃▃▁▁

0,1
epoch,4.0
test_accuracy,0.8575
train_accuracy,0.88056
train_loss,0.58591
val_accuracy,0.86467
val_loss,0.3581


wandb: Agent Starting Run: w5mgl6m6 with config:
wandb: 	activation: relu
wandb: 	batch_size: 16
wandb: 	epochs: 10
wandb: 	hidden_layers: 3
wandb: 	hidden_size: 128
wandb: 	learning_rate: 0.0001
wandb: 	optimizer: nadam
wandb: 	weight_decay: 0.0005
wandb: 	weight_init: xavier


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
train_accuracy,▁▄▄▅▅▆▇▇▇█
train_loss,▂▃▅▁▂▇▁█▃▆
val_accuracy,▁▅▆▆▅▇▇▇▆█
val_loss,█▅▄▃▄▂▂▁▂▁

0,1
epoch,9.0
test_accuracy,0.8745
train_accuracy,0.90202
train_loss,0.48381
val_accuracy,0.88117
val_loss,0.31659


wandb: Agent Starting Run: ekdu1t5z with config:
wandb: 	activation: relu
wandb: 	batch_size: 16
wandb: 	epochs: 10
wandb: 	hidden_layers: 4
wandb: 	hidden_size: 128
wandb: 	learning_rate: 0.0001
wandb: 	optimizer: nadam
wandb: 	weight_decay: 0.0005
wandb: 	weight_init: xavier


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
train_accuracy,▁▁▃▅▅▇██▇█
train_loss,█▁▁▂▁▃▂▂▂▁
val_accuracy,▂▁▄▆▆▇██▇█
val_loss,██▅▄▄▂▁▁▂▁

0,1
epoch,9.0
test_accuracy,0.8734
train_accuracy,0.89881
train_loss,0.15837
val_accuracy,0.877
val_loss,0.32287


wandb: Agent Starting Run: abu9x97b with config:
wandb: 	activation: relu
wandb: 	batch_size: 32
wandb: 	epochs: 10
wandb: 	hidden_layers: 5
wandb: 	hidden_size: 128
wandb: 	learning_rate: 0.001
wandb: 	optimizer: nadam
wandb: 	weight_decay: 0
wandb: 	weight_init: xavier


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
train_accuracy,▁▂▄▅▅▆▆▆██
train_loss,▄▇▂▂▁█▁▁▁▂
val_accuracy,▁▂▅▆▅▆▆▅██
val_loss,█▇▅▃▄▂▃▅▂▁

0,1
epoch,9.0
test_accuracy,0.8836
train_accuracy,0.9165
train_loss,0.23232
val_accuracy,0.88767
val_loss,0.29687


wandb: Agent Starting Run: rq2xguaq with config:
wandb: 	activation: tanh
wandb: 	batch_size: 32
wandb: 	epochs: 10
wandb: 	hidden_layers: 3
wandb: 	hidden_size: 128
wandb: 	learning_rate: 0.0001
wandb: 	optimizer: nadam
wandb: 	weight_decay: 0.0005
wandb: 	weight_init: xavier
