In [59]:
import sys
import os
import numpy as np
import pickle
source_loc = "/project/wyin/jlee/ml-project/source"
sys.path.append(source_loc)
from utilities import ProblemStatement, UnscaledData, ScaledData, order_validation

data_loc, X_name, y_name = ProblemStatement().prob_vars
data = ScaledData(data_loc, X_name, y_name, check_data = True)    #Set check_data to False to suppress output.

Double checking dataset sizes:

Training set x size: (25534, 354). Training set y size: (25534, 3)

validation set x size: (3103, 354). Validation set y size: (3103, 3)

Testing set x size: (3238, 354). Testing set y size: (3238, 3)



Create the hyperparameter folder if it doesn't exist yet

In [60]:
if not os.path.exists("hyperparameters"):
    os.mkdir("hyperparameters")

Name this grid search run, and whether to save the resulting models.
Set hyperparameters for grid search here: 

1. layer_sizes: Architecture of the network, set as a list of lists, with each list element representing the sequential neuron numbers in each layer
2. learning_rate: Set as list of starting learning rates
3. batch_size: Set as list of batch sizes
4. schedule_factor: Set as list of factors. Factor determines how much the learning rate reduces on loss plateau
5. max_time: Set as "DD:HH:MM:SS" string

In [61]:
run_name = "test-4"
save_models = True

layer_sizes = [[354, 256, 128, 64, 32, 3], [354, 256, 140, 50, 3]]
learning_rate = [0.001, 0.005, 0.01]
batch_size = [128, 256]
schedule_factor = [0.2, 0.5]
max_time = "00:00:00:30"

Save the settings for the grid search to a pickle file.

In [62]:
f = open("hyperparameters/grid-search","wb")

settings = {'layer_sizes': layer_sizes, 'learning_rate': learning_rate, 
           'batch_size': batch_size, "schedule_factor": schedule_factor, 
            'max_time': max_time, 'run_name': run_name, 'save_models': save_models}
pickle.dump(settings, f)

f.close()

To run the grid search over the neural network models, run the following from the command line in the source folder:\
sbatch nn-runner.sbatch {total models to run} {location of problem definition file} {hyperparameter folder} {name of run}
<br /><br />
To see the models training in tensorboard, run:\
tensorboard --logdir {hyperparameter folder}/logs/{name of run}
<br /><br />
You can copy the outputs of the code snippet below and run it in command line in the source folder

In [63]:
prob_file = os.path.join(os.getcwd(), "problem-definition.txt")
hparam_loc = os.path.join(os.getcwd(), "hyperparameters")
log_loc = os.path.join(hparam_loc, "logs", run_name)
val_loc = os.path.join(hparam_loc, "val-ends", run_name)

num_models = len(layer_sizes) * len(learning_rate) * len(batch_size) * len(schedule_factor)

print(f"Run in command line to train models: \nsbatch nn-runner.sbatch {num_models} {prob_file} {hparam_loc} {run_name}\n")

print(f"Run in command line to see tensorboard: \ntensorboard --logdir {log_loc}")

Run in command line to train models: 
sbatch nn-runner.sbatch 24 /project/wyin/jlee/ml-project/user-guide/problem-definition.txt /project/wyin/jlee/ml-project/user-guide/hyperparameters test-4

Run in command line to see tensorboard: 
tensorboard --logdir /project/wyin/jlee/ml-project/user-guide/hyperparameters/logs/test-4


Once the models are finished training, you can see models sorted by best validation loss by using order_validation({log folder}). You can also get the absolute file location of the best performing model - useful for model evaluation (see other jupyter notebook), assuming you saved the model. 

In [68]:
errors = order_validation(val_loc)

for error, model in errors:
    print(f"model: {model}, error: {error}")
    
print(f"\nBest model location: {os.path.join(log_loc, errors[0][1])}")

model: [354, 256, 128, 64, 32, 3], 0.01, 256, 0.2, error: 8.90369756234577e-06
model: [354, 256, 128, 64, 32, 3], 0.001, 256, 0.2, error: 9.854635209194385e-06
model: [354, 256, 128, 64, 32, 3], 0.001, 128, 0.2, error: 2.1584812202490866e-05
model: [354, 256, 128, 64, 32, 3], 0.005, 128, 0.5, error: 2.194699482060969e-05
model: [354, 256, 128, 64, 32, 3], 0.001, 128, 0.5, error: 2.619006590975914e-05
model: [354, 256, 128, 64, 32, 3], 0.005, 256, 0.2, error: 3.1832056265557185e-05
model: [354, 256, 128, 64, 32, 3], 0.001, 256, 0.5, error: 3.234771793358959e-05
model: [354, 256, 128, 64, 32, 3], 0.01, 256, 0.5, error: 3.2730389648349956e-05
model: [354, 256, 128, 64, 32, 3], 0.005, 256, 0.5, error: 3.341983392601833e-05
model: [354, 256, 128, 64, 32, 3], 0.005, 128, 0.2, error: 4.23714991484303e-05
model: [354, 256, 128, 64, 32, 3], 0.01, 128, 0.2, error: 7.63205680414103e-05
model: [354, 256, 128, 64, 32, 3], 0.01, 128, 0.5, error: 0.00021948710491415113

Best model location: /project/