In [33]:
# default_exp metaflow

In [34]:
# export
import os
from pathlib import Path
from typing import Iterable
import subprocess

from fastcore.script import call_parse
from nbdev.export import Config, find_default_export, nbglob, read_nb
from sciflow.params import params_as_dict
from sciflow.parse_module import FuncDetails, extract_steps

# Sciflow Notebook to MetaFlow Flow

> Converts from a `sciflow` format notebook to a `metaflow` flow. 

Supported features:

* Linear/sequential DAGs
* Simple `Parameters`

In [35]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [41]:
nb_path = Path(os.path.join("test", "test_export.ipynb"))
nb = read_nb(nb_path)
module_name = find_default_export(nb["cells"]).replace('.', '/')
test_module = os.path.join(Config().path("lib_path"), f"{module_name}.py")

In [42]:
# export


def titleize(name):
    return name.title().replace("_", "")

In [43]:
assert titleize("snake_case") == "SnakeCase"

In [44]:
# export


def rename_steps_for_metaflow(steps):
    for i, step in enumerate(steps):
        if i == 0:
            step.name = "start"
        if i == len(steps) - 1:
            step.name = "end"

In [45]:
steps = extract_steps(test_module)

In [46]:
no_steps = extract_steps(os.path.join(Config().path("lib_path"), f"_nbdev.py"))
assert len(no_steps) == 0

In [47]:
assert ["first", "preprocess", "train", "last"] == [step.name for step in steps]
rename_steps_for_metaflow(steps)
assert ["start", "preprocess", "train", "end"] == [step.name for step in steps]

In [48]:
# export


def indent_multiline(multiline_text, indent=1):
    lines = multiline_text.strip().split("\n")
    spaces = "".join(["    " for _ in range(indent)])
    for i in range(len(lines)):
        prefix = spaces if i > 0 else spaces + '"""'
        lines[i] = prefix + lines[i]
    return "\n".join(lines) + '"""'

In [49]:
text = """
Some text
:param param: text
"""
assert '    """Some text\n    :param param: text"""' == indent_multiline(text)

In [68]:
# export


def nb_to_metaflow(nb_path: Path, flow_path: Path, silent=True):
    print(nb_path)
    nb = read_nb(nb_path)
    lib_name = Config().lib_name
    module_name = find_default_export(nb["cells"])
    if not module_name:
        return
    module_name = module_name.replace('.', '/')
    nb_name = os.path.basename(nb_path)
    exported_module = os.path.join(Config().path("lib_path"), f"{module_name}.py")
    steps = extract_steps(exported_module)
    if len(steps) == 0:
        return
    orig_step_names = [step.name for step in steps]
    if len(steps) == 1:
        steps.append(FuncDetails("end", None, None, False, "pass"))
    params = params_as_dict(nb_path)
    if len(params) == 0:
        print(f"No params cell found for: {os.path.basename(nb_path)}")
    flow_class_name = f"{titleize(module_name)}Flow"
    rename_steps_for_metaflow(steps)
    write_module_to_file(
        flow_path,
        flow_class_name,
        lib_name,
        module_name,
        orig_step_names,
        steps,
        params,
    )
    if not silent:
        print(f"Converted {nb_name} to {flow_class_name} in: {os.path.basename(flow_path)}")

In [69]:
#  export


def write_module_to_file(
    flow_path: Path,
    flow_class_name: str,
    lib_name: str,
    module_name: str,
    orig_step_names: Iterable[str],
    steps: Iterable[FuncDetails],
    params: dict,
):
    if not os.path.exists(flow_path.parent):
        os.mkdir(flow_path.parent)
    with open(flow_path, "w") as flow_file:
        flow_file.write("# SCIFLOW GENERATED FILE - DO NOT EDIT\n")
        flow_file.write("from metaflow import FlowSpec, step, Parameter\n")
        flow_file.write(
            f"from {lib_name}.{module_name} import {', '.join(orig_step_names)}\n"
        )
        flow_file.write(
            f"from {lib_name}.{module_name} import {', '.join(params.keys())}\n"
        )
        flow_file.write(f"\n\nclass {flow_class_name}(FlowSpec):\n")
        single_indent = "    "
        for param in params.keys():
            flow_file.write(
                f"{single_indent}{param} = Parameter('{param}', default={param})\n"
            )

        flow_file.write("\n")
        for i, step in enumerate(steps):
            flow_file.write(f"{single_indent}@step\n")
            flow_file.write(f"{single_indent}def {step.name}(self):\n")
            if step.docstring:
                flow_file.write(f"{indent_multiline(step.docstring, 2)}\n")
            # Check for padded step
            if i < len(orig_step_names):
                flow_step_args = ",".join(["self." + a for a in step.args.split(",")])
                flow_file.write(
                    f"{single_indent}{single_indent}{orig_step_names[i]}({flow_step_args})\n"
                )
            else:
                flow_file.write(f"{single_indent}{single_indent}pass\n")
            if i < len(steps) - 1:
                flow_file.write(
                    f"{single_indent}{single_indent}self.next(self.{steps[i+1].name})\n"
                )
            flow_file.write("\n")

        flow_file.write('if __name__ == "__main__":\n')
        flow_file.write(f"{single_indent}{flow_class_name}()")

In [70]:
# export


def get_module_name(nb_path):
    nb = read_nb(nb_path)
    module_name = find_default_export(nb["cells"])
    return module_name

In [71]:
flow_path = Path(
    os.path.join(Path(".").resolve(), "test", "flows", f"{get_module_name(nb_path)}.py")
)

In [72]:
flow_path

Path('/home/jovyan/git/sciflow/nbs/test/flows/test.test_export.py')

In [73]:
nb_to_metaflow(nb_path, flow_path, silent=False)

test/test_export.ipynb
Converted test_export.ipynb to Test/TestExportFlow in: test.test_export.py


# Ignore notebooks without Sciflow steps

In [74]:
nb_to_metaflow("packaging.ipynb", flow_path, silent=False)

packaging.ipynb


In [75]:
# export


def generate_flows(config: Config):
    flows_dir = config.path("flows_path")
    nb_paths = nbglob(recursive=True)
    for nb_path in nb_paths:
        flow_module_name = os.path.basename(nb_path).replace("ipynb", "py")
        nb_to_metaflow(
            nb_path, Path(os.path.join(flows_dir, flow_module_name)), silent=False
        )

In [78]:
generate_flows(Config(cfg_name="test/settings.ini"))

/home/jovyan/git/sciflow/nbs/metaflow.ipynb
/home/jovyan/git/sciflow/nbs/build.ipynb
/home/jovyan/git/sciflow/nbs/utils.ipynb
/home/jovyan/git/sciflow/nbs/parse_module.ipynb
/home/jovyan/git/sciflow/nbs/scilint.ipynb
/home/jovyan/git/sciflow/nbs/export_named.ipynb
/home/jovyan/git/sciflow/nbs/packaging.ipynb
/home/jovyan/git/sciflow/nbs/params.ipynb
/home/jovyan/git/sciflow/nbs/index.ipynb
/home/jovyan/git/sciflow/nbs/data_handlers.ipynb
/home/jovyan/git/sciflow/nbs/test/test_export.ipynb
Converted test_export.ipynb to Test/TestExportFlow in: test_export.py
/home/jovyan/git/sciflow/nbs/test/test_pandas_numpy.ipynb
Converted test_pandas_numpy.ipynb to Test/TestPandasNumpyFlow in: test_pandas_numpy.py
/home/jovyan/git/sciflow/nbs/test/test_top2vec.ipynb
Converted test_top2vec.ipynb to Test/TestTop2VecFlow in: test_top2vec.py
/home/jovyan/git/sciflow/nbs/test/test_module.ipynb
Converted test_module.ipynb to Test/TestModuleFlow in: test_module.py


In [None]:
# export


@call_parse
def sciflow_generate():
    generate_flows(Config())

In [115]:
def check_flows(config):
    flow_results = {}
    flows_dir = config.path('flows_path')
    for flow_path in os.listdir(flows_dir):
        flow_name = os.path.basename(flow_path)
        if flow_path.endswith('.py'):
            ret_code, output = check_flow(flows_dir, flow_path)
            flow_results[flow_name] = ret_code, output
            if ret_code == 0:
                print(f'Flow: {flow_name} verified')
            else:
                print(f'Flow: {flow_name} verification failed\nDetails:\n{output}')

In [116]:
def check_flow(flows_dir, flow_module):
    script = f"python '{os.path.join(flows_dir, flow_module)}' show"
    pipe = subprocess.Popen("%s" % script, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True)
    output = pipe.communicate()[0]
    return pipe.returncode, output.decode('utf-8').strip()

In [117]:
check_flows(Config(cfg_name="test/settings.ini"))

Flow: test_module.py verification failed
Details:
File "/home/jovyan/git/sciflow/nbs/test/flows/test_module.py", line 3
    from sciflow.test/test_module import first
                     ^
SyntaxError: invalid syntax
Flow: test_top2vec.py verification failed
Details:
File "/home/jovyan/git/sciflow/nbs/test/flows/test_top2vec.py", line 3
    from sciflow.test/test_top2vec import something, preprocess, fit, evaluate
                     ^
SyntaxError: invalid syntax
Flow: test_pandas_numpy.py verification failed
Details:
File "/home/jovyan/git/sciflow/nbs/test/flows/test_pandas_numpy.py", line 3
    from sciflow.test/test_pandas_numpy import first, preprocess, train, last
                     ^
SyntaxError: invalid syntax
Flow: test_export.py verification failed
Details:
File "/home/jovyan/git/sciflow/nbs/test/flows/test_export.py", line 3
    from sciflow.test/test_export import first, preprocess, train, last
                     ^
SyntaxError: invalid syntax
