In [1]:
# default_exp run_flow

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
# export


import asyncio
import multiprocessing
import os
import subprocess
import sys
from itertools import product
from pathlib import Path
from typing import Any, Dict, Iterable

import pandas as pd
from fastcore.script import call_parse
from nbdev.export import find_default_export, get_config, read_nb

from sciflow.utils import get_flow_path, prepare_env

# Verify and Run Sciflow Flows

In [91]:
nb_path = Path(Path(".").resolve(), "test", "test_export.ipynb")
flow_path = get_flow_path(nb_path, flow_provider="sagemaker")
nb = read_nb(nb_path)
module_name = find_default_export(nb["cells"]).replace(".", "/")
test_module = os.path.join(get_config().path("lib_path"), f"{module_name}.py")
flows_dir = get_config(cfg_name="test/settings.ini").path("flows_path")
flow_name = os.path.basename(test_module)

In [92]:
# export


def run_shell_cmd(script: str):
    pipe = subprocess.Popen(
        "%s" % script, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True
    )
    output = pipe.communicate()[0]
    return pipe, output.decode("utf-8").strip()

In [142]:
# export


def make_shell_cmd(flow_nb_path, flow_provider="metaflow", flow_command="show", params=None):
    prepare_env()
    if flow_nb_path.suffix == ".ipynb":
        flow_path = get_flow_path(flow_nb_path, flow_provider=flow_provider)
    else:
        flow_path = flow_nb_path
    if params:
        args = " ".join([f"--{k} {v}" for k, v in params.items()])
        
        flow_command = f"{flow_command} {args}"

    return f"python '{flow_path}' {flow_command}"

In [143]:
# export


def check_call_flow(
    flow_nb_path, flow_provider="metaflow", flow_command="show", params=None
):
    cmd = make_shell_cmd(flow_nb_path, flow_provider, flow_command, params)
    pipe, output = run_shell_cmd(cmd)
    return pipe.returncode, output

In [144]:
ret_code, output = check_call_flow(
    nb_path, flow_provider="metaflow", flow_command="show"
)
assert ret_code == 0

In [146]:
# slow

ret_code, output = check_call_flow(
    nb_path, flow_provider="metaflow", flow_command="run"
)
assert ret_code == 0
print(output)

Metaflow 2.5.2 executing TestExportFlow for user:'Donal Simmie'
Validating your flow...
    The graph looks good!
Running pylint...
    Pylint is happy!
2022-05-10 13:54:32.853 Workflow starting (run-id 1652190872572059):
2022-05-10 13:54:32.919 [1652190872572059/start/1 (pid 3857)] Task is starting.
2022-05-10 13:54:37.666 [1652190872572059/start/1 (pid 3857)] 3
2022-05-10 13:54:37.999 [1652190872572059/start/1 (pid 3857)] Task finished successfully.
2022-05-10 13:54:38.078 [1652190872572059/preprocess/2 (pid 3860)] Task is starting.
2022-05-10 13:54:42.823 [1652190872572059/preprocess/2 (pid 3860)] Preprocessing input data from /home/sagemaker-user/git/sciflow/nbs...
2022-05-10 13:54:44.159 [1652190872572059/preprocess/2 (pid 3860)] Task finished successfully.
2022-05-10 13:54:44.245 [1652190872572059/train/3 (pid 3863)] Task is starting.
2022-05-10 13:54:48.993 [1652190872572059/train/3 (pid 3863)] Training /home/sagemaker-user/git/sciflow on /home/sagemaker-user/git/sciflow/nbs...


In [147]:
ret_code, output = check_call_flow(
    nb_path, flow_provider="sagemaker", flow_command="show"
)
assert ret_code == 0

In [None]:
# slow

ret_code, output = check_call_flow(
    nb_path, flow_provider="sagemaker", flow_command="run"
)
assert ret_code == 0
print(output)

# TODO

Pass params into a sagemaker workflow

In [148]:
# export


def check_call_flows(
    config,
    flow_provider="metaflow",
    flow_command="show",
    ignore_suffix=None,
    exit_on_error=True,
):
    flow_results = {}
    flows_dir = Path(config.path("flows_path"), flow_provider)

    if ignore_suffix:
        flow_file_names = [
            p for p in os.listdir(flows_dir) if not p.endswith(ignore_suffix)
        ]
    else:
        flow_file_names = os.listdir(flows_dir)
    ret_codes = []
    exit_code = 0
    for flow_file_name in flow_file_names:
        flow_name = os.path.basename(flow_file_name)
        if flow_file_name.startswith("_sciflow"):
            continue
        if flow_file_name.endswith(".py"):
            ret_code, output = check_call_flow(
                Path(flows_dir, flow_file_name), flow_command=flow_command
            )
            flow_results[flow_name] = ret_code, output
            if ret_code == 0:
                print(f"Flow: {flow_name} {flow_command} verified")
            else:
                print(
                    f"Flow: {flow_name} {flow_command} verification failed\nDetails:\n{output}"
                )
            ret_codes.append(ret_code)
    if any([rc != 0 for rc in ret_codes]):
        exit_code = 1
        try:
            # Exit with an error code if running from a non interactive Python environment.
            get_ipython().__class__.__name__
        except NameError:
            if exit_on_error:
                return sys.exit(exit_code)
    return exit_code

In [149]:
check_call_flows(get_config(cfg_name="test/settings.ini"))

Flow: test_export.py show verified
Flow: test_data_handling.py show verified
Flow: test_module.py show verified
Flow: test_multistep_no_params.py show verified
Flow: test_multistep.py show verified


0

In [150]:
# slow

check_call_flows(
    get_config(cfg_name="test/settings.ini"),
    flow_command="--no-pylint run",
    ignore_suffix="_no_params.py",
)

Flow: test_export.py --no-pylint run verified
Flow: test_data_handling.py --no-pylint run verified
Flow: test_module.py --no-pylint run verified
Flow: test_multistep.py --no-pylint run verified


0

In [151]:
check_call_flows(get_config(cfg_name="test/settings.ini"), flow_provider="sagemaker")

Flow: test_export.py show verified
Flow: test_data_handling.py show verified
Flow: test_module.py show verified
Flow: test_multistep_no_params.py show verified
Flow: test_multistep.py show verified


0

In [154]:
# slow

nb_path = Path(Path(".").resolve(), "test", "test_multistep.ipynb")
ret_code, output = check_call_flow(
    nb_path,
    flow_command="run",
    params={"traffic_percent": 1, "model_level": "dispatcher"},
)
print(output)
assert ret_code == 0

Metaflow 2.5.2 executing TestMultistepFlow for user:'Donal Simmie'
Validating your flow...
    The graph looks good!
Running pylint...
    Pylint is happy!
2022-05-10 13:57:48.871 Workflow starting (run-id 1652191068595160):
2022-05-10 13:57:48.932 [1652191068595160/start/1 (pid 4318)] Task is starting.
2022-05-10 13:57:50.355 [1652191068595160/start/1 (pid 4318)] The first step
2022-05-10 13:57:50.755 [1652191068595160/start/1 (pid 4318)] Task finished successfully.
2022-05-10 13:57:50.836 [1652191068595160/preprocess/2 (pid 4325)] Task is starting.
2022-05-10 13:57:52.286 [1652191068595160/preprocess/2 (pid 4325)] I captialised the message: THE FIRST STEP
2022-05-10 13:57:52.720 [1652191068595160/preprocess/2 (pid 4325)] Task finished successfully.
2022-05-10 13:57:52.802 [1652191068595160/fit/3 (pid 4332)] Task is starting.
2022-05-10 13:57:54.635 [1652191068595160/fit/3 (pid 4332)] Task finished successfully.
2022-05-10 13:57:54.712 [1652191068595160/evaluate/4 (pid 4339)] Task is 

# [WIP] Aynsc Flow Running

> Run the flow you are working on from the notebook you are working on. This maximises the amount of experiments you can run as you don't have down time. While long running tasks are running you can keep exploring! :-)

In [161]:
# export

async def flow_task(flow_nb_path, flow_provider="metaflow", flow_command = "run", params=None):
    cmd = make_shell_cmd(flow_nb_path, flow_provider, flow_command, params)

    proc = await asyncio.create_subprocess_shell(
        cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE
    )

    stdout, stderr = await proc.communicate()

    output = None
    print(f"[{cmd!r} exited with {proc.returncode}]")
    if stderr:
        print(f'[stderr]\n{stderr.decode("utf-8").strip()}')
    if stdout:
        output = stdout.decode("utf-8").strip()
        print(f"[stdout]\n{output}")
    
    return proc.returncode, output

In [162]:
# export


def run_flow_async(flow_nb_path, flow_provider="metaflow", flow_command = "run", params=None):    
    loop = asyncio.get_event_loop()
    task = loop.create_task(flow_task(flow_nb_path, flow_provider, flow_command, params))
    return task

In [163]:
# slow

task = run_flow_async(
    Path(Path(".").resolve(), "test", "test_multistep.ipynb"),
    params={"traffic_percent": 10, "workers": 12},
)
task

<Task pending name='Task-77' coro=<flow_task() running at /tmp/ipykernel_32433/3296514773.py:3>>

In [164]:
# slow
await task
assert(0 == task.result()[0])

["python '/home/sagemaker-user/git/sciflow/nbs/test/flows/metaflow/test_multistep.py' run --traffic_percent 10 --workers 12" exited with 0]
[stderr]
Metaflow 2.5.2 executing TestMultistepFlow for user:'Donal Simmie'
Validating your flow...
    The graph looks good!
Running pylint...
    Pylint is happy!
[stdout]
2022-05-10 13:59:43.786 Workflow starting (run-id 1652191183528732):
2022-05-10 13:59:43.847 [1652191183528732/start/1 (pid 4454)] Task is starting.
2022-05-10 13:59:45.249 [1652191183528732/start/1 (pid 4454)] The first step
2022-05-10 13:59:45.643 [1652191183528732/start/1 (pid 4454)] Task finished successfully.
2022-05-10 13:59:45.724 [1652191183528732/preprocess/2 (pid 4461)] Task is starting.
2022-05-10 13:59:47.141 [1652191183528732/preprocess/2 (pid 4461)] I captialised the message: THE FIRST STEP
2022-05-10 13:59:47.563 [1652191183528732/preprocess/2 (pid 4461)] Task finished successfully.
2022-05-10 13:59:47.653 [1652191183528732/fit/3 (pid 4468)] Task is starting.
202

In [171]:
# slow

task = run_flow_async(
    Path(Path(".").resolve(), "test", "test_export.ipynb"),
    flow_provider = "sagemaker",
    params={"some_param": "async"},
)
task

<Task pending name='Task-97' coro=<flow_task() running at /tmp/ipykernel_32433/3296514773.py:3>>

In [172]:
# slow
await task
assert(0 == task.result()[0])

["python '/home/sagemaker-user/git/sciflow/nbs/test/flows/sagemaker/test_export.py' run --some_param async" exited with 0]
[stdout]
Starting Sciflow generated pipeline: pipeline-2022-05-10-15-07-48-150
{'PipelineArn': 'arn:aws:sagemaker:eu-west-1:368653567616:pipeline/test-export', 'PipelineExecutionArn': 'arn:aws:sagemaker:eu-west-1:368653567616:pipeline/test-export/execution/2k5ig2eooqkg', 'PipelineExecutionDisplayName': 'execution-1652195271478', 'PipelineExecutionStatus': 'Executing', 'CreationTime': datetime.datetime(2022, 5, 10, 15, 7, 51, 368000, tzinfo=tzlocal()), 'LastModifiedTime': datetime.datetime(2022, 5, 10, 15, 7, 51, 368000, tzinfo=tzlocal()), 'CreatedBy': {'UserProfileArn': 'arn:aws:sagemaker:eu-west-1:368653567616:user-profile/d-likrmmebxomz/donal', 'UserProfileName': 'donal', 'DomainId': 'd-likrmmebxomz'}, 'LastModifiedBy': {'UserProfileArn': 'arn:aws:sagemaker:eu-west-1:368653567616:user-profile/d-likrmmebxomz/donal', 'UserProfileName': 'donal', 'DomainId': 'd-likrm

In [173]:
# slow

task = run_flow_async(
    Path(Path(".").resolve(), "test", "test_multistep.ipynb"),
    flow_provider = "sagemaker",
    params={"traffic_percent": 10, "workers": 12},
)
task

<Task pending name='Task-103' coro=<flow_task() running at /tmp/ipykernel_32433/3296514773.py:3>>

In [174]:
# slow
await task
assert(0 == task.result()[0])

["python '/home/sagemaker-user/git/sciflow/nbs/test/flows/sagemaker/test_multistep.py' run --traffic_percent 10 --workers 12" exited with 0]
[stdout]
['run', '--traffic_percent', '10', '--workers', '12']
Starting Sciflow generated pipeline: pipeline-2022-05-10-16-06-20-727
{'PipelineArn': 'arn:aws:sagemaker:eu-west-1:368653567616:pipeline/test-multistep', 'PipelineExecutionArn': 'arn:aws:sagemaker:eu-west-1:368653567616:pipeline/test-multistep/execution/kftuf7iuwryx', 'PipelineExecutionDisplayName': 'execution-1652198784152', 'PipelineExecutionStatus': 'Executing', 'CreationTime': datetime.datetime(2022, 5, 10, 16, 6, 24, 66000, tzinfo=tzlocal()), 'LastModifiedTime': datetime.datetime(2022, 5, 10, 16, 6, 24, 66000, tzinfo=tzlocal()), 'CreatedBy': {'UserProfileArn': 'arn:aws:sagemaker:eu-west-1:368653567616:user-profile/d-likrmmebxomz/donal', 'UserProfileName': 'donal', 'DomainId': 'd-likrmmebxomz'}, 'LastModifiedBy': {'UserProfileArn': 'arn:aws:sagemaker:eu-west-1:368653567616:user-pro

In [126]:
param_grid = {
    "traffic_percent": [1, 5, 10, 20, 50, 100],
    "model_level": ["router", "dispatcher"],
    "workers": [1],
}

In [None]:
# export


def iter_param_grid(param_grid):
    # https://github.com/scikit-learn/scikit-learn/blob/main/sklearn/model_selection/_search.py
    for p in [param_grid]:
        # Always sort the keys of a dictionary, for reproducibility
        items = sorted(p.items())
        if not items:
            yield {}
        else:
            keys, values = zip(*items)
            for v in product(*values):
                params = dict(zip(keys, v))
                yield params

In [None]:
assert [{"a": 1, "b": 1, "c": "hello"}, {"a": 2, "b": 1, "c": "hello"}] == list(
    iter_param_grid({"a": [1, 2], "b": [1], "c": ["hello"]})
)

In [None]:
# export


def sample_grid_space(param_grid: Dict[str, Iterable[Any]], num_samples: int):
    samples = []
    for i, sample in enumerate(iter_param_grid(param_grid)):
        samples.append(sample)
    if num_samples < len(samples):
        samples = pd.Series(samples).sample(num_samples).tolist()
    return samples

In [None]:
sample_space = sample_grid_space({"a": [1, 2], "b": [1], "c": ["hello"]}, 1)
assert sample_space[0]["b"] == 1
assert sample_space[0]["c"] == "hello"
assert sample_space[0]["a"] == 1 or sample_space[0]["a"] == 2

In [None]:
# export


def search_flow_grid(nb_path, param_grid, num_procs=None):
    if num_procs is None:
        num_procs = int((multiprocessing.cpu_count() / 2) - 1)

    param_sample_space = sample_grid_space(param_grid, num_procs)
    tasks = []
    for param_sample in param_sample_space:
        tasks.append(run_flow_async(nb_path, params=param_sample))
    return tasks

In [None]:
# slow
nb_path = Path(
    Path(".").resolve(),
    "test",
    "test_multistep.ipynb",
)
tasks = search_flow_grid(
    nb_path,
    {
        "traffic_percent": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 20, 30, 40, 50],
        "model_level": ["dispatcher"],
        "workers": [1],
    },
    2,
)

In [None]:
# slow

[t.done() for t in tasks]

In [None]:
# slow

[t.result() for t in tasks]

# Folding @ Home Type Exploration

> Explore wider search space in background. Try to always be making some use of resource. Needs persistent search space tracking.

In [None]:
# export


@call_parse
def sciflow_check_flows():
    check_flows(get_config())

In [None]:
# export


@call_parse
def sciflow_run_flows():
    check_flows(get_config(), "--no-pylint run")