In [1]:
# default_exp metaflow

In [174]:
# export 
import os
from pathlib import Path
from nbdev.export import Config
from sciflow.parse_module import extract_dag
from nbdev.export import find_default_export, read_nb
from sciflow.params import params_as_dict
from sciflow.parse_module import extract_steps, FuncDetails
from typing import Iterable

# Sciflow Notebook to MetaFlow Flow

> Converts from a `sciflow` format notebook to a `metaflow` flow. 

Supported features:

* Linear/sequential DAGs
* Simple `Parameters`

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [176]:
nb_path = Path(os.path.join("test", "test_export.ipynb"))
nb = read_nb(nb_path)
module_name = find_default_export(nb["cells"])
test_module = os.path.join(Config().path("lib_path"), f"{module_name}.py")

In [7]:
def titleize(name):
    return name.title().replace("_", "")

In [8]:
assert(titleize('snake_case') == 'SnakeCase')

In [9]:
def rename_steps_for_metaflow(steps):
    for i, step in enumerate(steps):
        if i == 0:
            step.name = "start"
        if i == len(steps) - 1:
            step.name = "end"

In [12]:
steps = extract_steps(test_module)

In [124]:
no_steps = extract_steps(os.path.join(Config().path("lib_path"), f"_nbdev.py"))
assert(len(no_steps)==0)

In [13]:
assert(['first', 'preprocess', 'train', 'last'] == [step.name for step in steps])
rename_steps_for_metaflow(steps)
assert(['start', 'preprocess', 'train', 'end'] == [step.name for step in steps])

In [68]:
def indent_multiline(multiline_text, indent=1):
    lines = multiline_text.strip().split('\n')
    spaces = ''.join(["    " for _ in range(indent)])
    for i in range(len(lines)):
        prefix = spaces if i > 0 else spaces + '"""'
        lines[i] = prefix + lines[i]
    return '\n'.join(lines) + '"""'

In [73]:
text = """
Some text
:param param: text
"""
assert('    """Some text\n    :param param: text"""' == indent_multiline(text))

In [172]:
def nb_to_metaflow(nb_path: Path, flow_path: Path, silent=True):
    nb = read_nb(nb_path)
    lib_name = Config().lib_name
    module_name = find_default_export(nb["cells"])
    exported_module = os.path.join(Config().path("lib_path"), f"{module_name}.py")
    steps = extract_steps(exported_module)
    if len(steps) == 0:
        if not silent:
            print(f"Skipping {nb_path} - no steps found")
        return
    orig_step_names = [step.name for step in steps]
    params = params_as_dict(nb_path)
    flow_class_name = f"{titleize(module_name)}Flow"
    rename_steps_for_metaflow(steps)
    write_module_to_file(flow_path, flow_class_name, lib_name, module_name, orig_step_names, 
                         steps, params)
    if not silent:
        print(f"Wrote {flow_class_name} to file: {flow_path}")

In [177]:
def write_module_to_file(flow_path: Path, flow_class_name: str, lib_name: str, module_name: str, 
                         orig_step_names: Iterable[str], steps: Iterable[FuncDetails], params: dict):
    with open(flow_path, 'w') as flow_file:
        flow_file.write("# SCIFLOW GENERATED FILE - DO NOT EDIT\n")
        flow_file.write("from metaflow import FlowSpec, step, Parameter\n")
        flow_file.write(f"from {lib_name}.{module_name} import {', '.join(orig_step_names)}\n")
        flow_file.write(f"from {lib_name}.{module_name} import {', '.join(params.keys())}\n")
        flow_file.write(f"\n\nclass {flow_class_name}(FlowSpec):\n")
        single_indent = "    "
        for param in params.keys():
            flow_file.write(f"{single_indent}_{param} = Parameter('{param}', default={param})\n")

        flow_file.write("\n")
        for i, step in enumerate(steps):
            flow_file.write(f"{single_indent}@step\n")
            flow_file.write(f"{single_indent}def {step.name}(self):\n")
            flow_file.write(f"{indent_multiline(step.docstring, 2)}\n")
            flow_file.write(f"{single_indent}{single_indent}{orig_step_names[i]}({step.args})\n")
            if i < len(steps) - 1:
                flow_file.write(f"{single_indent}{single_indent}self.next(self.{steps[i+1].name})\n")
            flow_file.write('\n')

        flow_file.write('if __name__ == "__main__":\n')
        flow_file.write(f"{single_indent}{flow_class_name}()")

In [178]:
def get_module_name(nb_path):
    nb = read_nb(nb_path)
    module_name = find_default_export(nb["cells"])
    return module_name

In [179]:
flow_path = os.path.join(Path('.').resolve(), "test", "flows", f"{get_module_name(nb_path)}.py")

In [180]:
nb_to_metaflow(nb_path, flow_path, silent=False)

Wrote TestExportFlow to file: /home/jovyan/git/sciflow/nbs/test/flows/test_export.py


# Ignore notebooks without Sciflow steps

In [170]:
nb_to_metaflow('packaging.ipynb', flow_path, silent=False)

Skipping packaging.ipynb - no steps found


# TODO

* Add recursive option to nb_to_metaflow