In [1]:
# default_exp run_flow

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
# export


import asyncio
import multiprocessing
import os
import subprocess
import sys
from itertools import product
from pathlib import Path, PosixPath
from typing import Any, Dict, Iterable

import pandas as pd
from fastcore.script import Param, call_parse
from nbdev.export import find_default_export, get_config, nbglob, read_nb

from sciflow.data_handler import extract_param_meta
from sciflow.params import params_as_dict
from sciflow.parse_module import FuncDetails, extract_steps
from sciflow.utils import prepare_env, get_flow_path

# Verify and Run Sciflow Flows

In [4]:
nb_path = Path(os.path.join("test", "test_export.ipynb"))
flow_path = get_flow_path(nb_path, flow_provider="sagemaker")
nb = read_nb(nb_path)
module_name = find_default_export(nb["cells"]).replace(".", "/")
test_module = os.path.join(get_config().path("lib_path"), f"{module_name}.py")
flows_dir = get_config(cfg_name="test/settings.ini").path("flows_path")
flow_name = os.path.basename(test_module)

In [5]:
# # export


# def prep_env():
#     if "USER" not in os.environ:
#         try:
#             os.environ["USER"] = os.environ["GIT_COMMITTER_NAME"]
#         except KeyError:
#             raise EnvironmentError(
#                 "Sciflow requires a known user for tracked execution. Add USER or GIT_COMMITTER_NAME to Jupyter environment variables"
#             )

In [6]:
# export


def run_shell_cmd(script: str):
    pipe = subprocess.Popen("%s" % script, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True)
    output = pipe.communicate()[0]
    return pipe, output.decode("utf-8").strip()

In [7]:
# export

def check_call_flow(flow_nb_path, flow_provider="metaflow", flow_command="show", params=None):
    prepare_env()
    if flow_nb_path.suffix == ".ipynb":
        flow_path = get_flow_path(nb_path, flow_provider=flow_provider)
    else:
        flow_path = flow_nb_path
    if params:
        args = " ".join([f"--{p[0]} {p[1]}" for p in params])
        flow_command = f"{flow_command} {args}"
    
    script = f"python '{flow_path}' {flow_command}"
    pipe, output = run_shell_cmd(script)
    return pipe.returncode, output

In [8]:
ret_code, output = check_call_flow(nb_path, flow_provider = 'metaflow', flow_command = 'show')
assert(ret_code==0)

In [9]:
# slow

ret_code, output = check_call_flow(nb_path, flow_provider = 'metaflow', flow_command = 'run')
assert(ret_code==0)
print(output)

Metaflow 2.5.2 executing TestExportFlow for user:'Donal Simmie'
Validating your flow...
    The graph looks good!
Running pylint...
    Pylint is happy!
2022-05-10 06:18:50.611 Workflow starting (run-id 1652163530336451):
2022-05-10 06:18:50.675 [1652163530336451/start/1 (pid 32463)] Task is starting.
2022-05-10 06:18:55.396 [1652163530336451/start/1 (pid 32463)] 3
2022-05-10 06:18:55.756 [1652163530336451/start/1 (pid 32463)] Task finished successfully.
2022-05-10 06:18:55.840 [1652163530336451/preprocess/2 (pid 32466)] Task is starting.
2022-05-10 06:19:00.550 [1652163530336451/preprocess/2 (pid 32466)] Preprocessing input data from /home/sagemaker-user/git/sciflow/nbs...
2022-05-10 06:19:01.919 [1652163530336451/preprocess/2 (pid 32466)] Task finished successfully.
2022-05-10 06:19:02.010 [1652163530336451/train/3 (pid 32469)] Task is starting.
2022-05-10 06:19:06.731 [1652163530336451/train/3 (pid 32469)] Training /home/sagemaker-user/git/sciflow on /home/sagemaker-user/git/sciflow

In [14]:
ret_code, output = check_call_flow(nb_path, flow_provider = 'sagemaker', flow_command = 'show')
assert(ret_code==0)

In [21]:
# slow 

ret_code, output = check_call_flow(nb_path, flow_provider = 'sagemaker', flow_command = 'run')
assert(ret_code==0)
print(output)

Starting Sciflow generated pipeline: pipeline-2022-05-10-06-54-03-180
{'PipelineArn': 'arn:aws:sagemaker:eu-west-1:368653567616:pipeline/test-export', 'PipelineExecutionArn': 'arn:aws:sagemaker:eu-west-1:368653567616:pipeline/test-export/execution/g7umlpneq9uv', 'PipelineExecutionDisplayName': 'execution-1652165647036', 'PipelineExecutionStatus': 'Executing', 'CreationTime': datetime.datetime(2022, 5, 10, 6, 54, 6, 678000, tzinfo=tzlocal()), 'LastModifiedTime': datetime.datetime(2022, 5, 10, 6, 54, 6, 678000, tzinfo=tzlocal()), 'CreatedBy': {'UserProfileArn': 'arn:aws:sagemaker:eu-west-1:368653567616:user-profile/d-likrmmebxomz/donal', 'UserProfileName': 'donal', 'DomainId': 'd-likrmmebxomz'}, 'LastModifiedBy': {'UserProfileArn': 'arn:aws:sagemaker:eu-west-1:368653567616:user-profile/d-likrmmebxomz/donal', 'UserProfileName': 'donal', 'DomainId': 'd-likrmmebxomz'}, 'ResponseMetadata': {'RequestId': '3b0db9e1-5215-41df-83db-760831e32c8b', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amzn-re

# TODO

Pass params into a sagemaker workflow

In [30]:
# export


def check_call_flows(
    config, flow_provider = 'metaflow', flow_command="show", ignore_suffix=None, exit_on_error=True
):
    flow_results = {}
    flows_dir = Path(config.path("flows_path"), flow_provider)
    
    if ignore_suffix:
        flow_file_names = [p for p in os.listdir(flows_dir) if not p.endswith(ignore_suffix)]
    else:
        flow_file_names = os.listdir(flows_dir)
    ret_codes = []
    exit_code = 0
    for flow_file_name in flow_file_names:
        flow_name = os.path.basename(flow_file_name)
        if flow_file_name.startswith('_sciflow'):
            continue
        if flow_file_name.endswith(".py"):
            ret_code, output = check_call_flow(Path(flows_dir, flow_file_name), flow_command=flow_command)
            flow_results[flow_name] = ret_code, output
            if ret_code == 0:
                print(f"Flow: {flow_name} {flow_command} verified")
            else:
                print(
                    f"Flow: {flow_name} {flow_command} verification failed\nDetails:\n{output}"
                )
            ret_codes.append(ret_code)
    if any([rc != 0 for rc in ret_codes]):
        exit_code = 1
        try:
            # Exit with an error code if running from a non interactive Python environment.
            get_ipython().__class__.__name__
        except NameError:
            if exit_on_error:
                return sys.exit(exit_code)
    return exit_code

In [31]:
check_call_flows(get_config(cfg_name="test/settings.ini"))

Flow: test_export.py show verified
Flow: test_data_handling.py show verified
Flow: test_module.py show verified
Flow: test_multistep_no_params.py show verified
Flow: test_multistep.py show verified


0

In [35]:
# slow

check_call_flows(get_config(cfg_name="test/settings.ini"), flow_command="--no-pylint run", ignore_suffix="_no_params.py")

Flow: test_export.py --no-pylint run verified
Flow: test_data_handling.py --no-pylint run verified
Flow: test_module.py --no-pylint run verified
Flow: test_multistep.py --no-pylint run verified


0

In [33]:
check_call_flows(get_config(cfg_name="test/settings.ini"), flow_provider="sagemaker")

Flow: test_export.py show verified
Flow: test_data_handling.py show verified
Flow: test_module.py show verified
Flow: test_multistep_no_params.py show verified
Flow: test_multistep.py show verified


0

In [34]:
# slow

nb_path = Path(Path(".").resolve(), "test", "test_multistep.ipynb")
ret_code, output = check_call_flow(
    nb_path,
    flow_command = 'run',
    params=[("traffic_percent", 1), ("model_level", "dispatcher")],
)
print(output)
assert ret_code == 0

Metaflow 2.5.2 executing TestMultistepFlow for user:'Donal Simmie'
Validating your flow...
    The graph looks good!
Running pylint...
    Pylint is happy!
2022-05-10 07:15:15.127 Workflow starting (run-id 1652166914856693):
2022-05-10 07:15:15.191 [1652166914856693/start/1 (pid 1879)] Task is starting.
2022-05-10 07:15:16.581 [1652166914856693/start/1 (pid 1879)] The first step
2022-05-10 07:15:17.000 [1652166914856693/start/1 (pid 1879)] Task finished successfully.
2022-05-10 07:15:17.089 [1652166914856693/preprocess/2 (pid 1886)] Task is starting.
2022-05-10 07:15:18.488 [1652166914856693/preprocess/2 (pid 1886)] I captialised the message: THE FIRST STEP
2022-05-10 07:15:18.943 [1652166914856693/preprocess/2 (pid 1886)] Task finished successfully.
2022-05-10 07:15:19.028 [1652166914856693/fit/3 (pid 1893)] Task is starting.
2022-05-10 07:15:20.896 [1652166914856693/fit/3 (pid 1893)] Task finished successfully.
2022-05-10 07:15:20.977 [1652166914856693/evaluate/4 (pid 1900)] Task is 

# [WIP] Aynsc Flow Running

> Run the flow you are working on from the notebook you are working on. This maximises the amount of experiments you can run as you don't have down time. While long running tasks are running you can keep exploring! :-)

In [None]:
# export


async def run_flow_task(flow_path, param_grid=None):
    flows_dir = flow_path.parent
    flow_module = os.path.basename(flow_path)
    flow_command = "--no-pylint run"
    prep_mf_env()
    if params:
        args = " ".join([f"--{k} {v}" for k, v in param_grid.items()])
        cmd = f"python '{os.path.join(flows_dir, flow_module)}' {flow_command} {args}"
    else:
        cmd = f"python '{os.path.join(flows_dir, flow_module)}' {flow_command}"
    proc = await asyncio.create_subprocess_shell(
        cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE
    )

    stdout, stderr = await proc.communicate()

    print(f"[{cmd!r} exited with {proc.returncode}]")
    if stdout:
        output = stdout.decode("utf-8").strip()
        print(f"[stdout]\n{output}")
    if stderr:
        print(f'[stderr]\n{stderr.decode("utf-8").strip()}')

    return proc.returncode

In [None]:
# export


def run_flow_async(nb_path, params=None):
    flow_path = get_flow_path(nb_path)
    loop = asyncio.get_event_loop()
    task = loop.create_task(run_flow_task(flow_path, params))
    return task

In [None]:
# slow

task = run_flow_async(
    os.path.join("test", "test_multistep.ipynb"),
    params={"traffic_percent": 10, "workers": 12},
)
task

In [None]:
# slow
await task

In [None]:
param_grid = {
    "traffic_percent": [1, 5, 10, 20, 50, 100],
    "model_level": ["router", "dispatcher"],
    "workers": [1],
}

In [None]:
# export


def iter_param_grid(param_grid):
    # https://github.com/scikit-learn/scikit-learn/blob/main/sklearn/model_selection/_search.py
    for p in [param_grid]:
        # Always sort the keys of a dictionary, for reproducibility
        items = sorted(p.items())
        if not items:
            yield {}
        else:
            keys, values = zip(*items)
            for v in product(*values):
                params = dict(zip(keys, v))
                yield params

In [None]:
assert [{"a": 1, "b": 1, "c": "hello"}, {"a": 2, "b": 1, "c": "hello"}] == list(
    iter_param_grid({"a": [1, 2], "b": [1], "c": ["hello"]})
)

In [None]:
# export


def sample_grid_space(param_grid: Dict[str, Iterable[Any]], num_samples: int):
    samples = []
    for i, sample in enumerate(iter_param_grid(param_grid)):
        samples.append(sample)
    if num_samples < len(samples):
        samples = pd.Series(samples).sample(num_samples).tolist()
    return samples

In [None]:
sample_space = sample_grid_space({"a": [1, 2], "b": [1], "c": ["hello"]}, 1)
assert sample_space[0]["b"] == 1
assert sample_space[0]["c"] == "hello"
assert sample_space[0]["a"] == 1 or sample_space[0]["a"] == 2

In [None]:
# export


def search_flow_grid(nb_path, param_grid, num_procs=None):
    if num_procs is None:
        num_procs = int((multiprocessing.cpu_count() / 2) - 1)

    param_sample_space = sample_grid_space(param_grid, num_procs)
    tasks = []
    for param_sample in param_sample_space:
        tasks.append(run_flow_async(nb_path, params=param_sample))
    return tasks

In [None]:
# slow
nb_path = Path(
    Path(".").resolve(),
    "test",
    "test_multistep.ipynb",
)
tasks = search_flow_grid(
    nb_path,
    {
        "traffic_percent": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 20, 30, 40, 50],
        "model_level": ["dispatcher"],
        "workers": [1],
    },
    2,
)

In [None]:
# slow

[t.done() for t in tasks]

In [None]:
# slow

[t.result() for t in tasks]

# Folding @ Home Type Exploration

> Explore wider search space in background. Try to always be making some use of resource. Needs persistent search space tracking.

In [None]:
# export


@call_parse
def sciflow_check_flows():
    check_flows(get_config())

In [None]:
# export


@call_parse
def sciflow_run_flows():
    check_flows(get_config(), "--no-pylint run")