# `run_sequential`

> Run generated flows sequentially

In [None]:
# | default_exp run_sequential

In [None]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Imports

In [None]:
# | export


import asyncio
import multiprocessing
import os
import sys
from itertools import product
from pathlib import Path
from typing import Any, Dict, Iterable

import pandas as pd
from execnb.nbio import read_nb
from fastcore.script import call_parse
from nbdev.config import get_config

from sciflow.utils import (
    chunks,
    find_default_export,
    get_flow_path,
    prepare_env,
    run_shell_cmd,
)

# Test Setup

In [None]:
nb_path = Path(Path(".").resolve(), "test", "test_export.ipynb")
flow_path = get_flow_path(nb_path, flow_provider="sagemaker")
nb = read_nb(nb_path)
module_name = find_default_export(nb["cells"]).replace(".", "/")
test_module = os.path.join(get_config().path("lib_path"), f"{module_name}.py")
flows_dir = get_config(cfg_name="test/settings.ini").path("flows_path")
flow_name = os.path.basename(test_module)

# Helpers

## `check_is_init`

check if `sciflow_init` has been called

In [None]:
# | export


def check_is_init():
    root_path = str(get_config().path("root_path"))

    if root_path not in sys.path:
        print(f"PYTHONPATH={sys.path}")
        raise ValueError("Project is not in path; have you run sciflow_init?")

## `make_shell_cmd`

In [None]:
# | export


def make_shell_cmd(
    flow_nb_path, flow_provider="metaflow", flow_command="show", params=None
):
    prepare_env()
    if flow_nb_path.suffix == ".ipynb":
        flow_path = get_flow_path(flow_nb_path, flow_provider=flow_provider)
    else:
        flow_path = flow_nb_path
    if params:
        args = " ".join([f"--{k} {v}" for k, v in params.items()])

        flow_command = f"{flow_command} {args}"

    return f"python '{flow_path}' {flow_command}"

# Verify or Run an Individual Flow

> `subprocess` is used to run flows as most flow providers bundle a CLI which makes for a consistent execution experience with minimal adaptation.

## `check_call_flow`

In [None]:
# | export


def check_call_flow(
    flow_nb_path, flow_provider="metaflow", flow_command="show", params=None
):
    check_is_init()

    cmd = make_shell_cmd(flow_nb_path, flow_provider, flow_command, params)
    pipe, output = run_shell_cmd(cmd)
    return pipe.returncode, output

In [None]:
sys_path = sys.path
sys.path = []

In [None]:
raised = False
try:
    check_call_flow(nb_path, flow_provider="metaflow", flow_command="show")
except:
    raised = True
assert raised

PYTHONPATH=[]


In [None]:
sys.path = sys_path

In [None]:
prepare_env()

In [None]:
# | notest

ret_code, output = check_call_flow(
    nb_path, flow_provider="metaflow", flow_command="show"
)
assert ret_code == 0

CPU times: user 2.23 ms, sys: 3.69 ms, total: 5.92 ms
Wall time: 4.55 s


In [None]:
# | notest

ret_code, output = check_call_flow(
    nb_path, flow_provider="metaflow", flow_command="run"
)
assert ret_code == 0
print(output)

Metaflow 2.10.0 executing TestExportFlow for user:Donal Simmie
Validating your flow...
    The graph looks good!
Running pylint...
    Pylint not found, so extra checks are disabled.
2023-11-01 20:56:07.799 Workflow starting (run-id 1698872167405217):
2023-11-01 20:56:07.936 [1698872167405217/start/1 (pid 20952)] Task is starting.
2023-11-01 20:56:12.518 [1698872167405217/start/1 (pid 20952)] 3
2023-11-01 20:56:12.886 [1698872167405217/start/1 (pid 20952)] Task finished successfully.
2023-11-01 20:56:13.050 [1698872167405217/preprocess/2 (pid 20956)] Task is starting.
2023-11-01 20:56:17.645 [1698872167405217/preprocess/2 (pid 20956)] Preprocessing input data from /home/sagemaker-user/git/sciflow/nbs...
2023-11-01 20:56:19.029 [1698872167405217/preprocess/2 (pid 20956)] Task finished successfully.
2023-11-01 20:56:19.184 [1698872167405217/train/3 (pid 20960)] Task is starting.
2023-11-01 20:56:23.782 [1698872167405217/train/3 (pid 20960)] Training /home/sagemaker-user/git/sciflow on /h

In [None]:
# | notest

ret_code, output = check_call_flow(nb_path, flow_command="run")
assert ret_code == 0
print(output)

Metaflow 2.10.0 executing TestExportFlow for user:Donal Simmie
Validating your flow...
    The graph looks good!
Running pylint...
    Pylint not found, so extra checks are disabled.
2023-11-01 20:50:27.060 Workflow starting (run-id 1698871826657193):
2023-11-01 20:50:27.196 [1698871826657193/start/1 (pid 20771)] Task is starting.
2023-11-01 20:50:31.792 [1698871826657193/start/1 (pid 20771)] 3
2023-11-01 20:50:32.176 [1698871826657193/start/1 (pid 20771)] Task finished successfully.
2023-11-01 20:50:32.338 [1698871826657193/preprocess/2 (pid 20775)] Task is starting.
2023-11-01 20:50:36.934 [1698871826657193/preprocess/2 (pid 20775)] Preprocessing input data from /home/sagemaker-user/git/sciflow/nbs...
2023-11-01 20:50:38.318 [1698871826657193/preprocess/2 (pid 20775)] Task finished successfully.
2023-11-01 20:50:38.468 [1698871826657193/train/3 (pid 20779)] Task is starting.
2023-11-01 20:50:43.051 [1698871826657193/train/3 (pid 20779)] Training /home/sagemaker-user/git/sciflow on /h

In [None]:
# | notest

ret_code, output = check_call_flow(
    nb_path, flow_provider="sagemaker", flow_command="show"
)
assert ret_code == 0

CPU times: user 5.45 ms, sys: 0 ns, total: 5.45 ms
Wall time: 9.29 s


In [None]:
# | notest

ret_code, output = check_call_flow(
    nb_path, flow_provider="sagemaker", flow_command="run"
)
print(output)
assert ret_code == 0

KeyboardInterrupt: 

# Verify/Run all Flows

## `check_call_flows`

In [None]:
# | export


def check_call_flows(
    config,
    flow_provider="metaflow",
    flow_command="show",
    ignore_suffix=None,
    exit_on_error=True,
):
    flow_results = {}
    flows_dir = Path(config.path("flows_path"), flow_provider)

    if ignore_suffix:
        flow_file_names = [
            p for p in os.listdir(flows_dir) if not p.endswith(ignore_suffix)
        ]
    else:
        flow_file_names = os.listdir(flows_dir)
    ret_codes = []
    exit_code = 0
    for flow_file_name in flow_file_names:
        flow_name = os.path.basename(flow_file_name)
        if flow_file_name.startswith("_sciflow"):
            continue
        if flow_file_name.endswith(".py"):
            ret_code, output = check_call_flow(
                Path(flows_dir, flow_file_name), flow_command=flow_command
            )
            flow_results[flow_name] = ret_code, output
            if ret_code == 0:
                print(f"Flow: {flow_name} {flow_command} verified")
            else:
                print(
                    f"Flow: {flow_name} {flow_command} verification failed\nDetails:\n{output}"
                )
            ret_codes.append(ret_code)
    if any([rc != 0 for rc in ret_codes]):
        exit_code = 1
        try:
            # Exit with an error code if running from a non interactive Python environment.
            get_ipython().__class__.__name__
        except NameError:
            if exit_on_error:
                return sys.exit(exit_code)
    return exit_code

In [None]:
ret_code = check_call_flows(get_config(cfg_name="test/settings.ini"))
assert ret_code == 0

Flow: test_export.py show verified
Flow: test_data_handling.py show verified
Flow: test_module.py show verified
Flow: test_multistep_no_params.py show verified
Flow: test_multistep.py show verification failed
Details:
Traceback (most recent call last):
  File "/home/sagemaker-user/git/sciflow/nbs/test/flows/metaflow/test_multistep.py", line 5, in <module>
    from sciflow.test.test_multistep import something, preprocess, fit, evaluate
ImportError: cannot import name 'something' from 'sciflow.test.test_multistep' (/home/sagemaker-user/git/sciflow/sciflow/test/test_multistep.py)
Flow: test_export_step.py show verified


AssertionError: 

In [None]:
# | notest

check_call_flows(
    get_config(cfg_name="test/settings.ini"),
    flow_command="--no-pylint run",
    ignore_suffix="_no_params.py",
)

In [None]:
ret_code = check_call_flows(
    get_config(cfg_name="test/settings.ini"), flow_provider="sagemaker"
)
assert ret_code == 0

Flow: test_export.py show verified
Flow: test_data_handling.py show verified
Flow: test_module.py show verified
Flow: test_multistep_no_params.py show verified
Flow: test_multistep.py show verified
Flow: test_export_step.py show verified
CPU times: user 21.7 ms, sys: 19.6 ms, total: 41.3 ms
Wall time: 28.3 s


0

In [None]:
# | notest

nb_path = Path(Path(".").resolve(), "test", "test_multistep.ipynb")
ret_code, output = check_call_flow(
    nb_path,
    flow_command="run",
    params={"traffic_percent": 1, "model_level": "dispatcher"},
)
print(output)
assert ret_code == 0

# CLI Commands

## `sciflow_check_metaflows`

In [None]:
# | export


@call_parse
def sciflow_check_metaflows():
    check_call_flows(get_config())

## `sciflow_check_sagemaker_flows`

In [None]:
# | export


@call_parse
def sciflow_check_sagemaker_flows():
    check_call_flows(get_config(), flow_provider="sagemaker")

## `sciflow_run_metaflows`

In [None]:
# | export


@call_parse
def sciflow_run_metaflows():
    check_call_flows(get_config(), flow_command="run")

## `sciflow_run_sagemaker_flows`

In [None]:
# | export


@call_parse
def sciflow_run_sagemaker_flows():
    check_call_flows(get_config(), flow_command="run", flow_provider="sagemaker")