# Batch run models

In [None]:
%load_ext lab_black
import os, multiprocessing, time, papermill, sys
from IPython.display import clear_output
from tqdm import tqdm


sys.path.append("/home/jupyter/tf/src")
from meta import ModelConfig, make_batch_cfg, parse_batch_results

# nest_asyncio.apply()

## Make configs

In [None]:
batch_name = "chang_ssr_replicate"
batch_output_dir = f"models/batch_run/{batch_name}/"

# Gen OG random seed: [int(random.random() * 1e5) for x in range(10)]

code_name = "chang19_ssr_1k_test"

param_grid = {
    "oral_vocab_size": [1000, 2000, 3000, 4000, 5000, 6000],
    "n_mil_sample": [1.0, 2.0, 3.0, 4.0, 5.0, 6.0],
}

static_hpar = {
    "tf_root": "/home/jupyter/tf",
    "ort_units": 119,
    "pho_units": 250,
    "sem_units": 2446,
    "hidden_os_units": 500,
    "hidden_op_units": 100,
    "hidden_ps_units": 500,
    "hidden_sp_units": 500,
    "pho_cleanup_units": 50,
    "sem_cleanup_units": 50,
    "pho_noise_level": 0.0,
    "sem_noise_level": 0.0,
    "activation": "sigmoid",
    "tau": 1 / 3,
    "max_unit_time": 4.0,
    "output_ticks": 4,
    "rng_seed": 53797,
    "learning_rate": 0.01,
    "batch_size": 100,
    "save_freq": 10,
    "batch_name": batch_name,
}

batch_cfgs = make_batch_cfg(
    batch_name, batch_output_dir, static_hpar, param_grid, "hs04_master.ipynb"
)

## Parallel run
- Can run on Jupyter Lab
- Cannot run on VSCode

In [None]:
# Run
def run_batch(cfg):
    """
    Using papermill to run parameterized notebook
    To prevent overwriting, set default overwrite to False if needed
    """

    print("Running model {}".format(cfg["sn"]))
    os.makedirs(cfg["model_folder"], exist_ok=True)
    papermill.execute_notebook(
        cfg["in_notebook"],
        cfg["out_notebook"],
        parameters=cfg["params"],
        prepare_only=True
    )
    
    in_notebook = cfg["out_notebook"]
    out_dir = cfg["model_folder"]
    !jupyter nbconvert --ExecutePreprocessor.timeout=300000 --output-dir $out_dir --to notebook --execute $in_notebook
    !jupyter nbconvert --output-dir $out_dir --to html_toc $in_notebook
    clear_output()


# Run in parallel pool
with multiprocessing.Pool(2) as pool:
    pool.map(run_batch, batch_cfgs)

# for cfg in tqdm(batch_cfgs):
#     run_batch(cfg)


print("Done")

Compile and save results

In [None]:
# Cannot execute together with parallel run for some reason... maybe due to nest_asynio... forgot how to fix...
# cfgs, df = parse_batch_results(batch_cfgs)
# df.to_csv(batch_output_dir + "bcdf.csv")
# cfgs.to_csv(batch_output_dir + "cfgs.csv")

Shutdown compute engine

In [None]:
time.sleep(30)
print('shuting down')
!sudo poweroff