# Initialization

In [None]:
from pathlib import Path
from datetime import datetime
import helper
import yaml, pandas as pd, numpy as np, tqdm.auto as tqdm, shutil, subprocess
from helper import RenderJSON
import config_adapter


In [None]:
import itables
itables.init_notebook_mode(all_interactive=True )
itables.options.maxBytes = "1MB"
itables.options.lengthMenu = [25, 10, 50, 100, 200]
itables.options.buttons = ["copyHtml5", "csvHtml5", "excelHtml5"]
itables.options.layout={"topEnd": "pageLength", "top1": "searchBuilder"}

In [None]:
# Autofilled by calling script
param_path = "_" # Path to param config file
scripts_folder = "_" # Path to folder containing all scripts

In [None]:
param_path = Path(param_path)
scripts_folder = Path(scripts_folder)
params = config_adapter.load(param_path)
RenderJSON(params)

In [None]:
if "variables" in params:
    variables = config_adapter.normalize_yaml_paramlist(params["variables"], format=config_adapter.variable_param_format)
else:
    variables = {}
# handled_params = helper.replace_vals(params, variables)
ctx = config_adapter.Context()
ctx.methods["raw"] = config_adapter.raw
ctx.methods["expand_envvars"] = config_adapter.expand_envvars
for var in variables:
    config_adapter.add_variable_context(ctx, var)
RenderJSON(ctx.variables)


In [None]:
if "tables" in params:
    tables = config_adapter.normalize_yaml_paramlist(params["tables"], format=config_adapter.table_param_format)
else: 
    tables = []
ctx.methods["find_files"] = config_adapter.find_files
ctx.methods["regex_filter"] = config_adapter.regex_filter
ctx.methods["from_rows"] = config_adapter.from_rows
ctx.methods["longest_prefix_join"] = config_adapter.longest_prefix_join

for table in tables:
    it_table = config_adapter.handle_duplicate_over(config_adapter.get_duplicate_table(ctx, table), table)
    for t in it_table:
        config_adapter.add_table_context(ctx, t)
for n, t in ctx.tables.items():
    t.style.set_caption(n)
    display(t)

In [None]:
runs = [ctx.evaluate(r) for i in params["runs"] for r in config_adapter.handle_duplicate_over(config_adapter.get_duplicate_table(ctx, i), i) ]
display(RenderJSON(runs))
run_df = pd.DataFrame(runs).assign(run_num=np.arange(len(runs)))
run_df

In [None]:
# run_df = pd.DataFrame(handled_params["runs"]).assign(run_num=np.arange(len(handled_params["runs"])))
# run_df.insert(0, "run_num", run_df.pop("run_num"))
# if run_df["run_folder"].duplicated().any():
#     raise Exception("run folder must be different")
# run_df["imports"] = run_df.pop("imports")
# run_df["script_params"] = run_df.pop("script_params")
# run_df
    

In [None]:
infos = []
for _, row in tqdm.tqdm(run_df.iterrows(), total=len(run_df.index)):
    current_info = {}
    current_info["run_num"] = row["run_num"]
    current_info["script"] = row["script"]
    current_info["start_time"] = datetime.now()
    try:
        run_folder = Path(row["run_folder"]+".tmp")
        if run_folder.exists():
            shutil.rmtree(run_folder)
        dest_folder = Path(row["run_folder"])
        run_folder.mkdir(exist_ok=True, parents=True)
        if "imports" in row:
            if not isinstance(row["imports"], list):
                row["imports"] = [row["imports"]]
            for im in row["imports"]:
                fl = scripts_folder.glob(im)
                for f in fl:
                    shutil.copyfile(f, run_folder / f.name)
        with (run_folder / "params.yaml").open("w") as f:
            yaml.dump(row["script_params"], f)
        env = row['environment']  if "environment" in row else subprocess.run('echo $CONDA_DEFAULT_ENV', shell=True, stdout=subprocess.PIPE).stdout
        subprocess.run(f"conda run -n {env} papermill --cwd '{run_folder}' '{scripts_folder/row['script']}' '{run_folder/row['script']}'", shell=True, check=True)

        
    except Exception as e:
        current_info["sucess"] = False
        print(f"Error during run number {row['run_num']}")
        display(e)
    else:
        current_info["sucess"] = True
    try:
        subprocess.run(f'jupyter nbconvert --to html {run_folder/row["script"]}', shell=True, check=True)
    except Exception as e:
        print("Error while attempting to convert to notebook")
        display(e)
        current_info["html_sucess"] = False
    else:
        current_info["html_sucess"] = True
    if (run_folder / "__pycache__").exists():
        shutil.rmtree(run_folder / "__pycache__")
    if current_info["sucess"] & current_info["html_sucess"]:
        if dest_folder.exists():
            shutil.rmtree(dest_folder)
        shutil.move(run_folder, dest_folder)
    current_info["end_time"] = datetime.now()
    infos.append(current_info)
infos = pd.DataFrame(infos)
infos["duration"] = (infos["end_time"] - infos["start_time"]).dt.total_seconds()
infos=infos.drop(columns=["end_time"])
infos

In [None]:
all = infos.merge(run_df, how="left", on=["run_num", "script"])
all["notebook_url"] = '<a href="'+ all.apply(lambda row: str((Path(row["run_folder"]) / row["script"]).with_suffix(".html").resolve()), axis=1) + '">notebook</a>'
all.insert(1, "notebook_url", all.pop("notebook_url").where(all["html_sucess"]))
all
# all.drop(columns="script_params")

In [None]:
if "run_summary_folder" in params:
    all.to_csv(Path(ctx.evaluate(params["run_summary_folder"]))/"run_summary.tsv", sep="\t", index=False)
    