# `run_sequential`

> Run generated flows sequentially

In [2]:
# | default_exp run_sequential

In [3]:
%load_ext autoreload
%autoreload 2

# Imports

In [4]:
# | export


import asyncio
import multiprocessing
import os
import sys
from itertools import product
from pathlib import Path
from typing import Any, Dict, Iterable

import pandas as pd
from execnb.nbio import read_nb
from fastcore.script import call_parse
from nbdev.config import get_config

from sciflow.utils import (
    chunks,
    find_default_export,
    get_flow_path,
    prepare_env,
    run_shell_cmd,
)

# Test Setup

In [5]:
nb_path = Path(Path(".").resolve(), "test", "test_export.ipynb")
flow_path = get_flow_path(nb_path, flow_provider="sagemaker")
nb = read_nb(nb_path)
module_name = find_default_export(nb["cells"]).replace(".", "/")
test_module = os.path.join(get_config().path("lib_path"), f"{module_name}.py")
flows_dir = get_config(cfg_name="test/settings.ini").path("flows_path")
flow_name = os.path.basename(test_module)

# Helpers

## `check_is_init`

check if `sciflow_init` has been called

In [6]:
# | export


def check_is_init():
    root_path = str(get_config().path("root_path"))

    if root_path not in sys.path:
        print(f"PYTHONPATH={sys.path}")
        raise ValueError("Project is not in path; have you run sciflow_init?")

## `make_shell_cmd`

In [7]:
# | export


def make_shell_cmd(
    flow_nb_path, flow_provider="metaflow", flow_command="show", params=None
):
    prepare_env()
    if flow_nb_path.suffix == ".ipynb":
        flow_path = get_flow_path(flow_nb_path, flow_provider=flow_provider)
    else:
        flow_path = flow_nb_path
    if params:
        args = " ".join([f"--{k} {v}" for k, v in params.items()])

        flow_command = f"{flow_command} {args}"

    return f"python '{flow_path}' {flow_command}"

# Verify or Run an Individual Flow

> `subprocess` is used to run flows as most flow providers bundle a CLI which makes for a consistent execution experience with minimal adaptation.

## `check_call_flow`

In [8]:
# | export


def check_call_flow(
    flow_nb_path, flow_provider="metaflow", flow_command="show", params=None
):
    check_is_init()

    cmd = make_shell_cmd(flow_nb_path, flow_provider, flow_command, params)
    pipe, output = run_shell_cmd(cmd)
    return pipe.returncode, output

In [9]:
sys_path = sys.path
sys.path = []

In [10]:
raised = False
try:
    check_call_flow(nb_path, flow_provider="metaflow", flow_command="show")
except:
    raised = True
assert raised

PYTHONPATH=[]


In [11]:
sys.path = sys_path

In [12]:
prepare_env()

In [13]:
# | notest

ret_code, output = check_call_flow(
    nb_path, flow_provider="metaflow", flow_command="show"
)
assert ret_code == 0

In [14]:
# | notest

ret_code, output = check_call_flow(
    nb_path, flow_provider="metaflow", flow_command="run"
)
assert ret_code == 0
print(output)

Metaflow 2.10.0 executing TestExportFlow for user:Donal Simmie
Validating your flow...
    The graph looks good!
Running pylint...
    Pylint not found, so extra checks are disabled.
2023-11-20 15:24:18.180 Workflow starting (run-id 1700493857814240):
2023-11-20 15:24:18.297 [1700493857814240/start/1 (pid 12378)] Task is starting.
2023-11-20 15:24:22.912 [1700493857814240/start/1 (pid 12378)] 3
2023-11-20 15:24:23.284 [1700493857814240/start/1 (pid 12378)] Task finished successfully.
2023-11-20 15:24:23.433 [1700493857814240/preprocess/2 (pid 12382)] Task is starting.
2023-11-20 15:24:28.046 [1700493857814240/preprocess/2 (pid 12382)] Preprocessing input data from /home/sagemaker-user/git/sciflow/nbs...
2023-11-20 15:24:29.383 [1700493857814240/preprocess/2 (pid 12382)] Task finished successfully.
2023-11-20 15:24:29.518 [1700493857814240/train/3 (pid 12386)] Task is starting.
2023-11-20 15:24:34.140 [1700493857814240/train/3 (pid 12386)] Training /home/sagemaker-user/git/sciflow on /h

In [15]:
# | notest

ret_code, output = check_call_flow(nb_path, flow_command="run")
assert ret_code == 0
print(output)

Metaflow 2.10.0 executing TestExportFlow for user:Donal Simmie
Validating your flow...
    The graph looks good!
Running pylint...
    Pylint not found, so extra checks are disabled.
2023-11-20 15:24:45.694 Workflow starting (run-id 1700493885369816):
2023-11-20 15:24:45.809 [1700493885369816/start/1 (pid 12399)] Task is starting.
2023-11-20 15:24:50.397 [1700493885369816/start/1 (pid 12399)] 3
2023-11-20 15:24:50.726 [1700493885369816/start/1 (pid 12399)] Task finished successfully.
2023-11-20 15:24:50.856 [1700493885369816/preprocess/2 (pid 12403)] Task is starting.
2023-11-20 15:24:55.436 [1700493885369816/preprocess/2 (pid 12403)] Preprocessing input data from /home/sagemaker-user/git/sciflow/nbs...
2023-11-20 15:24:56.765 [1700493885369816/preprocess/2 (pid 12403)] Task finished successfully.
2023-11-20 15:24:56.902 [1700493885369816/train/3 (pid 12407)] Task is starting.
2023-11-20 15:25:01.500 [1700493885369816/train/3 (pid 12407)] Training /home/sagemaker-user/git/sciflow on /h

In [16]:
# | notest

ret_code, output = check_call_flow(
    nb_path, flow_provider="sagemaker", flow_command="show"
)
assert ret_code == 0

In [17]:
# | notest

ret_code, output = check_call_flow(
    nb_path, flow_provider="sagemaker", flow_command="run"
)
print(output)
assert ret_code == 0

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/sagemaker-user/.config/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/sagemaker-user/.config/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/sagemaker-user/.config/sagemaker/config.yaml
Popping out 'ProcessingJobName' from the pipeline definition by default since it will be overridden at pipeline execution time. Please utilize the PipelineDefinitionConfig to persist this field in the pipeline definition if desired.
Popping out 'ProcessingJobName' from the pipeline definition by default since it will be overridden at pipeline execution time. Ple

AssertionError: 

# Verify/Run all Flows

## `check_call_flows`

In [None]:
# | export


def check_call_flows(
    config,
    flow_provider="metaflow",
    flow_command="show",
    ignore_suffix=None,
    exit_on_error=True,
):
    flow_results = {}
    flows_dir = Path(config.path("flows_path"), flow_provider)

    if ignore_suffix:
        flow_file_names = [
            p for p in os.listdir(flows_dir) if not p.endswith(ignore_suffix)
        ]
    else:
        flow_file_names = os.listdir(flows_dir)
    ret_codes = []
    exit_code = 0
    for flow_file_name in flow_file_names:
        flow_name = os.path.basename(flow_file_name)
        if flow_file_name.startswith("_sciflow"):
            continue
        if flow_file_name.endswith(".py"):
            ret_code, output = check_call_flow(
                Path(flows_dir, flow_file_name), flow_command=flow_command
            )
            flow_results[flow_name] = ret_code, output
            if ret_code == 0:
                print(f"Flow: {flow_name} {flow_command} verified")
            else:
                print(
                    f"Flow: {flow_name} {flow_command} verification failed\nDetails:\n{output}"
                )
            ret_codes.append(ret_code)
    if any([rc != 0 for rc in ret_codes]):
        exit_code = 1
        try:
            # Exit with an error code if running from a non interactive Python environment.
            get_ipython().__class__.__name__
        except NameError:
            if exit_on_error:
                return sys.exit(exit_code)
    return exit_code

In [None]:
ret_code = check_call_flows(get_config(cfg_name="test/settings.ini"))
assert ret_code == 0

In [None]:
# | notest

check_call_flows(
    get_config(cfg_name="test/settings.ini"),
    flow_command="--no-pylint run",
    ignore_suffix="_no_params.py",
)

In [None]:
ret_code = check_call_flows(
    get_config(cfg_name="test/settings.ini"), flow_provider="sagemaker"
)
assert ret_code == 0

In [None]:
# | notest

nb_path = Path(Path(".").resolve(), "test", "test_multistep.ipynb")
ret_code, output = check_call_flow(
    nb_path,
    flow_command="run",
    params={"traffic_percent": 1, "model_level": "dispatcher"},
)
print(output)
assert ret_code == 0

# CLI Commands

## `sciflow_check_metaflows`

In [None]:
# | export


@call_parse
def sciflow_check_metaflows():
    check_call_flows(get_config())

## `sciflow_check_sagemaker_flows`

In [None]:
# | export


@call_parse
def sciflow_check_sagemaker_flows():
    check_call_flows(get_config(), flow_provider="sagemaker")

## `sciflow_run_metaflows`

In [None]:
# | export


@call_parse
def sciflow_run_metaflows():
    check_call_flows(get_config(), flow_command="run")

## `sciflow_run_sagemaker_flows`

In [None]:
# | export


@call_parse
def sciflow_run_sagemaker_flows():
    check_call_flows(get_config(), flow_command="run", flow_provider="sagemaker")