# Running Jupyter notebooks in batch

Using nbparameterise API to substitute variables in 'batch mode'

1. https://github.com/takluyver/nbparameterise
1. https://pypi.org/project/nbparameterise/

Import libraries necessary for nbparameterise

In [1]:
from nbclient import execute
import nbformat
from nbparameterise import extract_parameters, parameter_values, \
    replace_definitions

Import other libraries

In [2]:
import random
import time

Function to create string for output folder name (different for each batch run)

In [3]:
def create_date_time_string():
    # use current date time to create a string of the form:
    # yymmdd_hhmmss
    
    now_t = time.strftime("%H:%M:%S")
    now_d = time.strftime("%d/%m/%Y")

    now_t = now_t.replace(":","")
    now_d = now_d.replace("/","")
    now_d = now_d[6:8] + now_d[2:4] + now_d[0:2]
    
    date_time_string = now_d + "_" + now_t
    
    return (str(date_time_string))

In [4]:
date_time_string = create_date_time_string()
output_folder_str = f"{date_time_string}_output"

For each parameter to change (those in the first cell of the ipynb to be run in batch), create a lists of the available values.

In [5]:
# define the network structure
number_hidden_layers_list =  [1, 2, 3]
number_nodes_list = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
dropout_list = [0.25, 0.5]
wide_and_deep_list = [0, 1]
single_final_hidden_layer_list = [0]
learning_rate_list = [0.001, 0.003, 0.01]
calculation_batch_size_list = [32, 64, 512]
kfold_list = [5]
epoch_list = [5]
output_folder_list = [output_folder_str]

Define the run parameters

In [6]:
#the ipynb to run in batch
input_filename = "tensorflow_api"

#how many times to run the ipynb
number_runs = 5

Initialise lists to hold the parameter values for each of the runs

In [7]:
hl = []
nn = []
do = []
wd = []
sl = []
lr = []
bs = []
kf = []
of = []
ep = []

Populate the lists with random choices for each parameter to use for each run

In [8]:
#generate random choices for the parameters
for i in range(number_runs):
    hl.append(number_hidden_layers_list[random.randint(0,2)])   
    nn.append(number_nodes_list[random.randint(0,9)])   
    do.append(dropout_list[random.randint(0,1)])   
    wd.append(wide_and_deep_list[random.randint(0,1)])   
    sl.append(single_final_hidden_layer_list[random.randint(0,0)])
    lr.append(learning_rate_list[random.randint(0,2)])
    bs.append(calculation_batch_size_list[random.randint(0,2)])
    kf.append(kfold_list[random.randint(0,0)])
    of.append(output_folder_list[random.randint(0,0)])
    ep.append(epoch_list[random.randint(0,0)])

Read in the ipynb and get the parameters (those in the first cell)

In [9]:
nb = nbformat.read(f"{input_filename}.ipynb", 
                   as_version=4)

orig_parameters = extract_parameters(nb)

For each of the runs, change the parameter values, run the notebook and save as a new notebook with a new name

In [10]:
# initialise run counter
count_run = 0

#for each value in all of the parameter lists
for layer, node, dropo, w_d, single, learn, batch_size, kfold, folder, epoch \
    in zip(hl, nn, do, wd, sl, lr, bs, kf, of, ep):

    # print a message to show where in the run the code is
    count_run += 1
    print("")
    print("        *****       ")
    print(f"{count_run} of {number_runs}. Running for hidden layer: {layer}, "
          f"node: {node}, dropout: {dropo}, wide and deep: {w_d},"
          f" single final layer: {single}, learning rate: {learn},"
          f" batch_size: {batch_size}, kfold: {kfold}, epoch: {epoch} ")
    print("        *****       ")
    print("")

    # Update the parameters
    params = parameter_values(orig_parameters, 
                              number_hidden_layers = layer,
                              number_nodes = node, 
                              dropout = dropo, 
                              wide_and_deep = w_d, 
                              single_final_hidden_layer = single, 
                              learning_rate = learn, 
                              calculation_batch_size = batch_size,
                              number_kfold = kfold,
                              output_folder = folder,
                              max_epoch = epoch)

    # replace the parameter values and run the notebook
    new_nb = replace_definitions(nb, params)
    
    # name the new notebook
    learning_rate_str =  str(learn).replace("0.", "0-")
    dropout_str = str(dropo).replace("0.", "0-")
    scen_filename = (f"_{layer}_{node}_{dropout_str}_{w_d}_{single}_"
                f"{learning_rate_str}_{batch_size}_{kfold}_{epoch}")
      
    with open(f"{output_folder_str}/{input_filename}{scen_filename}.ipynb", 
              'w') as f:
        nbformat.write(new_nb, f)


        *****       
1 of 5. Running for hidden layer: 1, node: 6, dropout: 0.5, wide and deep: 1, single final layer: 0, learning rate: 0.001, batch_size: 64, kfold: 5, epoch: 5 
        *****       






        *****       
2 of 5. Running for hidden layer: 2, node: 1, dropout: 0.5, wide and deep: 0, single final layer: 0, learning rate: 0.001, batch_size: 32, kfold: 5, epoch: 5 
        *****       


        *****       
3 of 5. Running for hidden layer: 3, node: 8, dropout: 0.5, wide and deep: 1, single final layer: 0, learning rate: 0.003, batch_size: 32, kfold: 5, epoch: 5 
        *****       


        *****       
4 of 5. Running for hidden layer: 1, node: 6, dropout: 0.5, wide and deep: 0, single final layer: 0, learning rate: 0.001, batch_size: 64, kfold: 5, epoch: 5 
        *****       


        *****       
5 of 5. Running for hidden layer: 2, node: 5, dropout: 0.5, wide and deep: 0, single final layer: 0, learning rate: 0.001, batch_size: 32, kfold: 5, epoch: 5 
        *****       

