# Batch run models

In [None]:
import os, multiprocessing, time, papermill, sys, sqlite3
from IPython.display import clear_output
from tqdm import tqdm
from meta import make_batch_cfg, csv_to_bigquery
import pandas as pd

# nest_asyncio.apply()
# Gen OG random seed: [int(random.random() * 1e5) for x in range(10)]

## Make configs

Latest workflow: Use this to generate batch cfg
Run with quick_run_papermill.py and set which gpu to run on
Run up to 4 instance on Uconn server
May split GPU since there are a lot of head room 

In [None]:
from meta import make_batch_cfg

batch_name = "non_stationary"
batch_output_dir = f"models/{batch_name}/"

param_grid = {
    "batch_size": [1, 4, 64],
    "learning_rate": [.0001, .001, .005, .01],
}

static_hpar = {
    "tf_root": "/home/jal21012/triangle_model",
    "ort_units": 119,
    "pho_units": 250,
    "sem_units": 2446,
    "hidden_os_units": 500,
    "hidden_op_units": 100,
    "hidden_ps_units": 500,
    "hidden_sp_units": 500,
    "pho_cleanup_units": 50,
    "sem_cleanup_units": 50,
    "pho_noise_level": 0.0,
    "sem_noise_level": 0.0,
    "activation": "sigmoid",
    "tau": 1 / 3,
    "max_unit_time": 4.0,
    "inject_error_ticks": 11,
    "output_ticks": 13,
    # "learning_rate": 0.005,
    "zero_error_radius": 0.1,
    "save_freq": 20,
    "wf_compression": "log",
    "wf_clip_low": 0,
    "wf_clip_high": 999_999_999,
    "task_names": ("pho_sem", "sem_pho", "pho_pho", "sem_sem", "triangle"),
    "tasks_ps_oral": (0.4, 0.4, 0.1, 0.1, 0.0),
    "tasks_ps_reading": (0.2, 0.2, 0.05, 0.05, 0.5),
    "batch_name": batch_name,
    "total_sample": 10_000_000,
    # "batch_size": 1,
    "rng_seed": 2021
}

batch_cfgs = make_batch_cfg(
    batch_name, batch_output_dir, static_hpar, param_grid, "master.ipynb"
)

# Snapshot master template
!cp master.ipynb $batch_output_dir/master_snapshot.ipynb

## Push config to database

In [None]:
cfg_df = pd.DataFrame()

for i, cfg in enumerate(batch_cfgs):
    cfg_df = pd.concat([cfg_df, pd.DataFrame(cfg["params"], index=[i])])


sqlite_file = os.path.join(batch_output_dir, "batch_results.sqlite")
con = sqlite3.connect(sqlite_file)
cur = con.cursor()
cfg_df.to_sql("batch_config", con, if_exists="replace")

### Re-evaluation snippet

In [None]:
# cur.execute("SELECT name FROM sqlite_master WHERE type='table';")
# print(cur.fetchall())
# con.execute('DROP TABLE strain')

In [None]:
import tensorflow as tf
import meta, data_wrangling, modeling, evaluate

for i in tqdm(range(10)):

    code_name = cfg_df.code_name[i]
    cfg = meta.Config.from_json(
        os.path.join(batch_output_dir, code_name, "model_config.json")
    )


    # Push csv to BQ
    csv_file = os.path.join(
        batch_output_dir, code_name, "eval", "train_item_df.csv"
    )
    csv_to_bigquery(
        csv_file, dataset_name=batch_name, table_name="train"
    )

Shutdown compute engine

In [None]:
time.sleep(30)
print('shuting down')
!sudo poweroff