# Create Kale Workflows from Fireworks Workflows

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import itertools as it
import yaml

In [3]:
from fireworks import Firework, Workflow, LaunchPad, ScriptTask
from fireworks.core.rocket_launcher import rapidfire

In [4]:
from kale import workflow_objects as wo
from kale import workflow_widgets as ww

In [5]:
# set up the LaunchPad and reset it
fwconfig_file = '/opt/conda/lib/python3.6/site-packages/my_launchpad.yaml'
with open(fwconfig_file) as param_file:
    params = yaml.load(param_file)
launchpad = LaunchPad(**params)
#launchpad = LaunchPad()

In [6]:
launchpad.reset('', require_password=False)

2018-01-24 19:01:48,573 INFO Performing db tune-up
2018-01-24 19:01:48,596 INFO LaunchPad was RESET.


In [7]:
def sort_fw_links(links):
    """Use NetworkX to topologically sort tasks
    so that they can be created and added to the workflow
    """

In [8]:
def kale_from_fw(fw_wf):
    """Create Kale Workflow from Fireworks Workflow.
    Currently only implemented for ScriptTasks."""
    fireworks = fw_wf.fws
    kale_wf = wo.Workflow(fw_wf.name)
    
    # This dict will be used to index tasks by fw_id
    kale_tasks = dict()
    
    # Create tasks
    for fw in fireworks:
        # FireWorks are composed of FireTasks
        # We assume that FireTasks are all run simultaneously,
        # so they don't depend on one another.
        # All FireTasks within a FireWork will have the same
        # dependencies and children.
        fw_tasks = []
        for ft_num, ft in enumerate(fw.to_dict()['spec']['_tasks']):
            # Create Kale Task
            if ft['_fw_name'] == 'ScriptTask':
                kale_task = wo.CommandLineTask(
                    command=''.join(ft['script']),
                    name="{}_{}".format(fw.name, ft_num)
                )
            else:
                raise NotImplementedError("Only ScriptTasks are supported now.")
                
            # Add to workflow without dependencies
            kale_wf.add_task(kale_task)
            
            # Save tasks in this firework
            fw_tasks.append(kale_task)
                
        # Save this set of firetasks by firework ID.
        kale_tasks[fw.fw_id] = fw_tasks
    
    # Link tasks
    for parent_id, child_ids in wf.links.items():
        parent_task_list = kale_tasks[parent_id]
        for child_id in child_ids:
            child_task_list = kale_tasks[child_id]
            # Set all elements of child_task_list to depend on all elements of parent_task_list.
            for child_task, parent_task in it.product(child_task_list, parent_task_list):
                kale_wf.add_dependencies(child_task, [parent_task])
    
    return kale_wf

In [9]:
wpw = ww.WorkerPoolWidget()
wpw.add_pool('fw_pool', 4, 'fireworks')
wpw

In [10]:
# create the individual FireWorks and Workflow
fw1 = Firework(
    [
        ScriptTask.from_str(
            'echo "hello $(date)" >> ~/tmpmsg'
        ), 
        ScriptTask.from_str(
            'echo "hi $(date)" >> ~/tmpmsg'
        ), 
    ],
    name="hello"
)
fw2 = Firework(
    ScriptTask.from_str(
        'echo "goodbye $(date)" >> ~/tmpmsg'
    ), 
    name="goodbye"
)
fw3 = Firework(
    ScriptTask.from_str(
        'echo "" >> ~/tmpmsg'
    ), 
    name="blank"
)

wf = Workflow(
    [fw1, fw2, fw3],
    {fw1:fw2, fw2: fw3},
    name="test workflow"
)

In [11]:
kale_wf = kale_from_fw(wf)

Adding deps: <kale.workflow_objects.CommandLineTask object at 0x7f8a652dc978> <- [<kale.workflow_objects.CommandLineTask object at 0x7f8a652dc8d0>]
Adding deps: <kale.workflow_objects.CommandLineTask object at 0x7f8a652dc978> <- [<kale.workflow_objects.CommandLineTask object at 0x7f8a652dc7f0>]
Adding deps: <kale.workflow_objects.CommandLineTask object at 0x7f8a652dc940> <- [<kale.workflow_objects.CommandLineTask object at 0x7f8a652dc978>]


In [12]:
w = ww.WorkflowWidget(kale_wf, wpw)
w

In [20]:
kale_wf.dag.nodes()

NodeView((<kale.workflow_objects.CommandLineTask object at 0x7f8a652dc8d0>, <kale.workflow_objects.CommandLineTask object at 0x7f8a652dc7f0>, <kale.workflow_objects.CommandLineTask object at 0x7f8a652dc978>, <kale.workflow_objects.CommandLineTask object at 0x7f8a652dc940>))

In [21]:
t1 = kale_wf.index_dict[1]
t0 = kale_wf.index_dict[0]

In [25]:
[t.index[kale_wf] for t in w._get_children([t0])]

[2]

# I should put this in the actual get_children code

In [36]:
def get_names(task_list):
    return [task.name for task in task_list]
def get_ids(task_list):
    return [task.index[kale_wf] for task in task_list]
def get_tasks(task_id_list):
    return [kale_wf.index_dict[index] for index in task_id_list]

def get_children(task_id_list):
    task_list = get_tasks(task_id_list)
    children = w._get_children(task_list)
    return get_ids(children), get_names(children)

In [54]:
def get_all_children(task_id_list):
    ids = task_id_list[:] if isinstance(task_id_list, (tuple, list)) else [task_id_list]
    names = get_names(get_tasks(ids))
    while len(ids) > 0:
        print(ids, names)
        ids, names = get_children(ids)

In [57]:
get_all_children(0)

[0] ['hello_0']
[2] ['goodbye_0']
[3] ['blank_0']


In [39]:
ids, names = get_children(ids)
ids, names

([3], ['blank_0'])

In [26]:
w._get_children([t1])

[<kale.workflow_objects.CommandLineTask at 0x7f8a652dc978>]

In [18]:
t1.__class__

kale.workflow_objects.CommandLineTask

In [None]:
wf.links

In [None]:
# store workflow and launch it locally
launchpad.add_wf(wf)
rapidfire(launchpad)

In [None]:
cat ~/tmpmsg