In [None]:
from pathlib import Path
from datetime import datetime
import helper
import yaml, pandas as pd, numpy as np, tqdm.auto as tqdm, shutil, subprocess


In [None]:
param_path = "_"
scripts_folder = "_"

In [None]:
param_path = Path(param_path)
scripts_folder = Path(scripts_folder)
params = yaml.safe_load(param_path.open("r"))
print(params)

In [None]:

variables =  params["variables"] if "variables" in params else {}
handled_params = helper.replace_vals(params, variables)
del params["variables"]
handled_params


In [None]:
run_df = pd.DataFrame(handled_params["runs"]).assign(run_num=np.arange(len(handled_params["runs"])))
if run_df["run_folder"].duplicated().any():
    raise Exception("run folder must be different")
run_df
    

In [None]:
infos = []
for _, row in tqdm.tqdm(run_df.iterrows(), total=len(run_df.index)):
    current_info = {}
    current_info["run_num"] = row["run_num"]
    current_info["script"] = row["script"]
    current_info["start_time"] = datetime.now()
    try:
        run_folder = Path(row["run_folder"]+".tmp")
        if run_folder.exists():
            shutil.rmtree(run_folder)
        dest_folder = Path(row["run_folder"])
        run_folder.mkdir(exist_ok=True, parents=True)
        if "imports" in row:
            if not isinstance(row["imports"], list):
                row["imports"] = [row["imports"]]
            for im in row["imports"]:
                shutil.copy(scripts_folder / im, run_folder / im)
        with (run_folder / "params.yaml").open("w") as f:
            yaml.dump(row["script_params"], f)
        env = row['environment']  if "environment" in row else subprocess.run('echo $CONDA_DEFAULT_ENV', shell=True, stdout=subprocess.PIPE).stdout
        subprocess.run(f"conda run -n {env} papermill --cwd '{run_folder}' '{scripts_folder/row['script']}' '{run_folder/row['script']}'", shell=True, check=True)

        
    except Exception as e:
        current_info["sucess"] = False
        print(f"Error during run number {row['run_num']}")
        display(e)
    else:
        current_info["sucess"] = True
    try:
        subprocess.run(f'jupyter nbconvert --to html {run_folder/row["script"]}', shell=True, check=True)
    except Exception as e:
        print("Error while attempting to convert to notebook")
        display(e)
        current_info["html_sucess"] = False
    else:
        current_info["html_sucess"] = True
    if (run_folder / "__pycache__").exists():
        shutil.rmtree(run_folder / "__pycache__")
    if current_info["sucess"] & current_info["html_sucess"]:
        if dest_folder.exists():
            shutil.rmtree(dest_folder)
        shutil.move(run_folder, dest_folder)
    current_info["end_time"] = datetime.now()
    infos.append(current_info)
infos = pd.DataFrame(infos)
infos["duration"] = (infos["end_time"] - infos["start_time"]).dt.total_seconds()
infos=infos.drop(columns=["end_time"])
infos

In [None]:
all = pd.merge(run_df, infos, how="outer", on=["run_num", "script"])
all

In [None]:
if "run_summary_folder" in handled_params:
    all.to_csv(Path(handled_params["run_summary_folder"])/"run_summary.tsv", sep="\t", index=False)
    