In [22]:
# | default_exp run_flow

In [23]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [24]:
# | export


import asyncio
import multiprocessing
import os
import sys
from itertools import product
from pathlib import Path
from typing import Any, Dict, Iterable

import pandas as pd
from execnb.nbio import read_nb
from fastcore.script import call_parse
from nbdev.config import get_config

from sciflow.utils import (
    chunks,
    find_default_export,
    get_flow_path,
    prepare_env,
    run_shell_cmd,
)

In [25]:
nb_path = Path(Path(".").resolve(), "test", "test_export.ipynb")
flow_path = get_flow_path(nb_path, flow_provider="sagemaker")
nb = read_nb(nb_path)
module_name = find_default_export(nb["cells"]).replace(".", "/")
test_module = os.path.join(get_config().path("lib_path"), f"{module_name}.py")
flows_dir = get_config(cfg_name="test/settings.ini").path("flows_path")
flow_name = os.path.basename(test_module)

# Verify or Run an Individual Flow

> `subprocess` is used to run flows as most flow providers bundle a CLI which makes for a consistent execution experience with minimal adaptation.

In [26]:
# | export


def check_is_init():
    root_path = str(get_config().path("root_path"))

    if root_path not in sys.path:
        print(f"PYTHONPATH={sys.path}")
        raise ValueError("Project is not in path; have you run sciflow_init?")

In [27]:
# | export


def make_shell_cmd(
    flow_nb_path, flow_provider="metaflow", flow_command="show", params=None
):
    prepare_env()
    if flow_nb_path.suffix == ".ipynb":
        flow_path = get_flow_path(flow_nb_path, flow_provider=flow_provider)
    else:
        flow_path = flow_nb_path
    if params:
        args = " ".join([f"--{k} {v}" for k, v in params.items()])

        flow_command = f"{flow_command} {args}"

    return f"python '{flow_path}' {flow_command}"

In [28]:
# | export


def check_call_flow(
    flow_nb_path, flow_provider="metaflow", flow_command="show", params=None
):
    check_is_init()

    cmd = make_shell_cmd(flow_nb_path, flow_provider, flow_command, params)
    pipe, output = run_shell_cmd(cmd)
    return pipe.returncode, output

In [29]:
sys_path = sys.path
sys.path = []

In [30]:
raised = False
try:
    check_call_flow(nb_path, flow_provider="metaflow", flow_command="show")
except:
    raised = True
assert raised

PYTHONPATH=[]


In [31]:
sys.path = sys_path

In [32]:
prepare_env()

In [33]:
ret_code, output = check_call_flow(
    nb_path, flow_provider="metaflow", flow_command="show"
)
assert ret_code == 0

In [34]:
# | notest

ret_code, output = check_call_flow(
    nb_path, flow_provider="metaflow", flow_command="run"
)
assert ret_code == 0
print(output)

Metaflow 2.10.0 executing TestExportFlow for user:Donal Simmie
Validating your flow...
    The graph looks good!
Running pylint...
    Pylint not found, so extra checks are disabled.
2023-10-17 10:13:29.125 Workflow starting (run-id 1697537608785479):
2023-10-17 10:13:29.255 [1697537608785479/start/1 (pid 17423)] Task is starting.
2023-10-17 10:13:33.842 [1697537608785479/start/1 (pid 17423)] 3
2023-10-17 10:13:34.170 [1697537608785479/start/1 (pid 17423)] Task finished successfully.
2023-10-17 10:13:34.308 [1697537608785479/preprocess/2 (pid 17427)] Task is starting.
2023-10-17 10:13:38.877 [1697537608785479/preprocess/2 (pid 17427)] Preprocessing input data from /home/sagemaker-user/git/sciflow/nbs...
2023-10-17 10:13:40.217 [1697537608785479/preprocess/2 (pid 17427)] Task finished successfully.
2023-10-17 10:13:40.356 [1697537608785479/train/3 (pid 17431)] Task is starting.
2023-10-17 10:13:44.924 [1697537608785479/train/3 (pid 17431)] Training /home/sagemaker-user/git/sciflow on /h

In [35]:
# | notest

ret_code, output = check_call_flow(nb_path, flow_command="run")
assert ret_code == 0
print(output)

Metaflow 2.10.0 executing TestExportFlow for user:Donal Simmie
Validating your flow...
    The graph looks good!
Running pylint...
    Pylint not found, so extra checks are disabled.
2023-10-17 10:13:56.489 Workflow starting (run-id 1697537636159181):
2023-10-17 10:13:56.612 [1697537636159181/start/1 (pid 17444)] Task is starting.
2023-10-17 10:14:01.187 [1697537636159181/start/1 (pid 17444)] 3
2023-10-17 10:14:01.547 [1697537636159181/start/1 (pid 17444)] Task finished successfully.
2023-10-17 10:14:01.696 [1697537636159181/preprocess/2 (pid 17448)] Task is starting.
2023-10-17 10:14:06.267 [1697537636159181/preprocess/2 (pid 17448)] Preprocessing input data from /home/sagemaker-user/git/sciflow/nbs...
2023-10-17 10:14:07.599 [1697537636159181/preprocess/2 (pid 17448)] Task finished successfully.
2023-10-17 10:14:07.735 [1697537636159181/train/3 (pid 17452)] Task is starting.
2023-10-17 10:14:12.317 [1697537636159181/train/3 (pid 17452)] Training /home/sagemaker-user/git/sciflow on /h

In [36]:
ret_code, output = check_call_flow(
    nb_path, flow_provider="sagemaker", flow_command="show"
)
assert ret_code == 0

In [67]:
# | notest

ret_code, output = check_call_flow(
    nb_path, flow_provider="sagemaker", flow_command="run"
)
print(output)
assert ret_code == 0

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/sagemaker-user/.config/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/sagemaker-user/.config/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/sagemaker-user/.config/sagemaker/config.yaml
Popping out 'ProcessingJobName' from the pipeline definition by default since it will be overridden at pipeline execution time. Please utilize the PipelineDefinitionConfig to persist this field in the pipeline definition if desired.
Popping out 'ProcessingJobName' from the pipeline definition by default since it will be overridden at pipeline execution time. Ple

# Verify/Run all Flows

In [68]:
# | export


def check_call_flows(
    config,
    flow_provider="metaflow",
    flow_command="show",
    ignore_suffix=None,
    exit_on_error=True,
):
    flow_results = {}
    flows_dir = Path(config.path("flows_path"), flow_provider)

    if ignore_suffix:
        flow_file_names = [
            p for p in os.listdir(flows_dir) if not p.endswith(ignore_suffix)
        ]
    else:
        flow_file_names = os.listdir(flows_dir)
    ret_codes = []
    exit_code = 0
    for flow_file_name in flow_file_names:
        flow_name = os.path.basename(flow_file_name)
        if flow_file_name.startswith("_sciflow"):
            continue
        if flow_file_name.endswith(".py"):
            ret_code, output = check_call_flow(
                Path(flows_dir, flow_file_name), flow_command=flow_command
            )
            flow_results[flow_name] = ret_code, output
            if ret_code == 0:
                print(f"Flow: {flow_name} {flow_command} verified")
            else:
                print(
                    f"Flow: {flow_name} {flow_command} verification failed\nDetails:\n{output}"
                )
            ret_codes.append(ret_code)
    if any([rc != 0 for rc in ret_codes]):
        exit_code = 1
        try:
            # Exit with an error code if running from a non interactive Python environment.
            get_ipython().__class__.__name__
        except NameError:
            if exit_on_error:
                return sys.exit(exit_code)
    return exit_code

In [69]:
check_call_flows(get_config(cfg_name="test/settings.ini"))

Flow: test_export.py show verified
Flow: test_data_handling.py show verified
Flow: test_module.py show verified
Flow: test_multistep_no_params.py show verified
Flow: test_multistep.py show verified


0

In [71]:
# | notest

check_call_flows(
    get_config(cfg_name="test/settings.ini"),
    flow_command="--no-pylint run",
    ignore_suffix="_no_params.py",
)

Flow: test_export.py --no-pylint run verified
Flow: test_data_handling.py --no-pylint run verified
Flow: test_module.py --no-pylint run verified
Flow: test_multistep.py --no-pylint run verified


0

In [58]:
check_call_flows(get_config(cfg_name="test/settings.ini"), flow_provider="sagemaker")

Flow: test_export.py show verified
Flow: test_data_handling.py show verified
Flow: test_module.py show verified
Flow: test_multistep_no_params.py show verified
Flow: test_multistep.py show verified


0

In [70]:
# | notest

nb_path = Path(Path(".").resolve(), "test", "test_multistep.ipynb")
ret_code, output = check_call_flow(
    nb_path,
    flow_command="run",
    params={"traffic_percent": 1, "model_level": "dispatcher"},
)
print(output)
assert ret_code == 0

Metaflow 2.10.0 executing TestMultistepFlow for user:Donal Simmie
Validating your flow...
    The graph looks good!
Running pylint...
    Pylint not found, so extra checks are disabled.
2023-10-17 12:42:02.148 Workflow starting (run-id 1697546521724161):
2023-10-17 12:42:02.267 [1697546521724161/start/1 (pid 18307)] Task is starting.
2023-10-17 12:42:03.140 [1697546521724161/start/1 (pid 18307)] The first step
2023-10-17 12:42:03.576 [1697546521724161/start/1 (pid 18307)] Task finished successfully.
2023-10-17 12:42:03.720 [1697546521724161/preprocess/2 (pid 18312)] Task is starting.
2023-10-17 12:42:04.604 [1697546521724161/preprocess/2 (pid 18312)] I captialised the message: THE FIRST STEP
2023-10-17 12:42:05.048 [1697546521724161/preprocess/2 (pid 18312)] Task finished successfully.
2023-10-17 12:42:05.190 [1697546521724161/fit/3 (pid 18317)] Task is starting.
2023-10-17 12:42:06.479 [1697546521724161/fit/3 (pid 18317)] Task finished successfully.
2023-10-17 12:42:06.618 [1697546521

# Aynsc Flow Running

> Run the flow you are working on from the notebook you are working on. This maximises the amount of experiments you can run as you don't have down time. While long running tasks are running you can keep exploring! :-)

In [59]:
# | export


async def flow_task(
    flow_nb_path, flow_provider="metaflow", flow_command="run", params=None
):
    cmd = make_shell_cmd(flow_nb_path, flow_provider, flow_command, params)

    proc = await asyncio.create_subprocess_shell(
        cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE
    )

    stdout, stderr = await proc.communicate()

    # print(f"[{cmd!r} exited with {proc.returncode}]")
    err = ""
    out = ""
    if stderr:
        err = f'[stderr]\n{stderr.decode("utf-8").strip()}'
    if stdout:
        out = f'[stdout]\n{stdout.decode("utf-8").strip()}'

    return proc.returncode, err + out

In [60]:
# | export


def run_flow_async(
    flow_nb_path, flow_provider="metaflow", flow_command="run", params=None
):
    loop = asyncio.get_event_loop()
    task = loop.create_task(
        flow_task(flow_nb_path, flow_provider, flow_command, params)
    )
    return task

In [61]:
# | notest


task = run_flow_async(
    Path(Path(".").resolve(), "test", "test_multistep.ipynb"),
    params={"traffic_percent": 10, "workers": 12},
)
task

<Task pending name='Task-20' coro=<flow_task() running at /tmp/ipykernel_14677/514015914.py:4>>

In [62]:
# | notest

await task
assert 0 == task.result()[0]

In [72]:
# | notest

task = run_flow_async(
    Path(Path(".").resolve(), "test", "test_export.ipynb"),
    flow_provider="sagemaker",
    params={"some_param": "async"},
)
task

<Task pending name='Task-26' coro=<flow_task() running at /tmp/ipykernel_14677/514015914.py:4>>

In [73]:
# | notest

await task
assert 0 == task.result()[0]

In [74]:
# | notest

task = run_flow_async(
    Path(Path(".").resolve(), "test", "test_multistep.ipynb"),
    flow_provider="sagemaker",
    params={"traffic_percent": 10, "workers": 12},
)
task

<Task pending name='Task-32' coro=<flow_task() running at /tmp/ipykernel_14677/514015914.py:4>>

In [75]:
# | notest

await task
assert 0 == task.result()[0]

AssertionError: 

In [None]:
# | export


async def run_flows_async(
    config,
    flow_provider="metaflow",
    flow_command="run",
    params=None,
    ignore_suffix=None,
    exit_on_error=True,
):
    flow_tasks = {}
    flows_dir = Path(config.path("flows_path"), flow_provider)

    if ignore_suffix:
        flow_file_names = [
            p for p in os.listdir(flows_dir) if not p.endswith(ignore_suffix)
        ]
    else:
        flow_file_names = os.listdir(flows_dir)
    ret_codes = []
    exit_code = 0
    loop = asyncio.get_event_loop()

    for flow_file_name in flow_file_names:
        flow_name = os.path.basename(flow_file_name)
        if flow_file_name.startswith("_sciflow"):
            continue
        if flow_file_name.endswith(".py"):
            task = loop.create_task(
                flow_task(
                    Path(flows_dir, flow_file_name), flow_provider, flow_command, params
                )
            )
            flow_tasks[flow_name] = task

    for flow_name, task in flow_tasks.items():
        await task
        ret_code = task.result()[0]
        if ret_code == 0:
            print(f"Flow: {flow_name} {flow_command} verified")
        else:
            print(
                f"Flow: {flow_name} {flow_command} verification failed\nDetails:\n{output}"
            )
        ret_codes.append(ret_code)
    if any([rc != 0 for rc in ret_codes]):
        exit_code = 1
        try:
            # Exit with an error code if running from a non interactive Python environment.
            get_ipython().__class__.__name__
        except NameError:
            if exit_on_error:
                return sys.exit(exit_code)
    return exit_code

In [None]:
# | notest

task = run_flows_async(
    get_config(cfg_name="test/settings.ini"),
    flow_command="--no-pylint run",
    ignore_suffix="_no_params.py",
)

In [None]:
# | notest

await task

In [None]:
# | notest

task = run_flows_async(
    get_config(cfg_name="test/settings.ini"),
    flow_provider="sagemaker",
    flow_command="run",
    ignore_suffix="_no_params.py",
)

In [None]:
# | notest

await task

# Utilities to Search Parameter Space

In [None]:
param_grid = {
    "traffic_percent": [1, 5, 10, 20, 50, 100],
    "model_level": ["router", "dispatcher"],
    "workers": [1],
}

In [None]:
# | export


def iter_param_grid(param_grid):
    # https://github.com/scikit-learn/scikit-learn/blob/main/sklearn/model_selection/_search.py
    for p in [param_grid]:
        # Always sort the keys of a dictionary, for reproducibility
        items = sorted(p.items())
        if not items:
            yield {}
        else:
            keys, values = zip(*items)
            for v in product(*values):
                params = dict(zip(keys, v))
                yield params

In [None]:
assert [{"a": 1, "b": 1, "c": "hello"}, {"a": 2, "b": 1, "c": "hello"}] == list(
    iter_param_grid({"a": [1, 2], "b": [1], "c": ["hello"]})
)

In [None]:
# | export


def sample_grid_space(param_grid: Dict[str, Iterable[Any]], num_samples: int):
    samples = []
    for i, sample in enumerate(iter_param_grid(param_grid)):
        samples.append(sample)
    if num_samples < len(samples):
        samples = pd.Series(samples).sample(num_samples).tolist()
    return samples

In [None]:
sample_space = sample_grid_space({"a": [1, 2], "b": [1], "c": ["hello"]}, 1)
assert sample_space[0]["b"] == 1
assert sample_space[0]["c"] == "hello"
assert sample_space[0]["a"] == 1 or sample_space[0]["a"] == 2

In [None]:
# | export


async def search_batches(flow_nb_path, flow_provider, task_batches):
    futures = []
    loop = asyncio.get_event_loop()
    for task_batch in task_batches:
        tasks = [
            (
                loop.create_task(
                    flow_task(
                        flow_nb_path,
                        flow_provider,
                        flow_command="run",
                        params=param_spec,
                    )
                )
            )
            for param_spec in task_batch
        ]
        futures.append(await asyncio.wait(tasks))
    return futures

In [None]:
# | export


def search_flow_grid(
    flow_nb_path,
    param_grid,
    flow_provider="metaflow",
    total_tasks=None,
    n_conc_tasks=None,
    local_mode=True,
):
    if total_tasks is None:
        total_tasks = len(list(iter_param_grid(param_grid)))

    if local_mode and n_conc_tasks is None:
        n_conc_tasks = int((multiprocessing.cpu_count() / 2) - 1)

    sample_space = sample_grid_space(param_grid, total_tasks)
    task_batches = list(chunks(sample_space, n_conc_tasks))
    futures = search_batches(flow_nb_path, flow_provider, task_batches)
    return futures

In [None]:
# | export


def extract_results(future_tasks):
    completed_tasks = [
        item for sublist in [list(ft[0]) for ft in future_tasks] for item in sublist
    ]
    results = [t.result() for t in completed_tasks]
    return results

In [None]:
nb_path = Path(
    Path(".").resolve(),
    "test",
    "test_export.ipynb",
)
param_grid = {
    "traffic_percent": [1, 2, 3, 4, 5, 7, 8, 10, 20, 30, 40, 50],
    "model_level": ["dispatcher"],
    "workers": [1],
}

In [None]:
# | notest

future_tasks = await ensure_future(
    search_flow_grid(
        nb_path,
        param_grid,
        flow_provider="sagemaker",
        total_tasks=None,
        n_conc_tasks=4,
    )
)

In [None]:
# | notest

results = extract_results(future_tasks)
results

In [None]:
# | export


@call_parse
def sciflow_check_metaflows():
    check_call_flows(get_config())

In [None]:
# | export


@call_parse
def sciflow_check_sagemaker_flows():
    check_call_flows(get_config(), flow_provider="sagemaker")

In [None]:
# | export


@call_parse
def sciflow_run_metaflows():
    check_call_flows(get_config(), flow_command="run")

In [None]:
# | export


@call_parse
def sciflow_run_sagemaker_flows():
    check_call_flows(get_config(), flow_command="run", flow_provider="sagemaker")