In [1]:
import bqplot as bq
import networkx
import numpy as np
import ipywidgets as ipw
from copy import copy, deepcopy
from IPython.display import display, HTML

In [2]:
bq.Graph?

In [3]:
# With indices as dag nodes

class Workflow(object):
    def __init__(self, name):
        self.dag = networkx.graph.Graph()
        self.name = name
        self.index_dict = {}
        #self.fig_layout = ipw.Layout(width='600px', height='800px')
        self.fig_layout = ipw.Layout(width='1000px', height='800px')
        self._task_names = []
    
    def add_task(self, task, dependencies=None):
        """
        Add instantiated Task object to the Workflow.
        If dependencies=None, then this task will be executed
        as soon as possible upon starting the Workflow.
        A Task may appear only once per Workflow.
        """
        
        # Ensure that tasks are not repeated.
        if task in self.dag.nodes():
            raise ValueError("Task already present in Workflow. Please pass a deepcopy if you wish to repeat the Task.")
        elif task.name in self._task_names:
            raise ValueError("Task name '{}' already present in Workflow. Please use a unique name.".format(task.name))
            
        # Determine index for this Task in this Workflow
        index = self.dag.number_of_nodes()
        # Inform workflow and task of this assignment
        self.dag.add_node(task, index=index)
        task.index[self] = index
        
        if dependencies is not None:
            for dependency in dependencies:
                self.dag.add_edge(dependency, task)
                
    def get_task_by_name(self, name):
        "Return the Task object with the given name in this Workflow."
        for task in self.dag.nodes():
            try:
                if task.name == name:
                    return task
            except AttributeError:
                print("{} has no name.".format(task))

    def _gen_bqgraph(self):
        "Generate bqplot graph."
        
        pos = networkx.nx_pydot.graphviz_layout(self.dag, prog='dot')
        N = self.dag.number_of_nodes()
        
        x, y = [[pos[node][i] for node in self.dag.nodes()] for i in range(2)]

        node_data = [
            {
                'label': str(node.index[self]),
                'shape': 'rect',
                **node.get_user_dict()
            }
            for node in self.dag.nodes()
        ]
        link_data = [
            {
                'source': source.index[self],
                'target': target.index[self]
            } 
            for source, target in self.dag.edges()
        ]

        xs = bq.LinearScale()
        ys = bq.LinearScale()
        scales = {'x': xs, 'y': ys}
        
        graph = bq.Graph(
            node_data=node_data,
            link_data=link_data,
            scales=scales,
            link_type='line',
            highlight_links=False,
            x=x, y=y,
            selected_style={'stroke':'red'}
            #interactions = {
            #    'click': 'tooltip',
            #    'hover': 'select'
            #},
        )
        
        # graph.tooltip = bq.Tooltip(
        #     fields=self.dag.nodes()[0].user_fields
        # )
        
        return graph
    
    def get_bqgraph(self):
        "Retrieve, but do not regenerate bqplot graph."
        return self._bqgraph
    
    def draw_dag(self, layout=None):
        "Return bqplot figure representing DAG, regenerating graph."
        
        self._bqgraph = self._gen_bqgraph()
        
        graph = self.get_bqgraph()
        if layout == None:
            layout = self.fig_layout
            
        fig = bq.Figure(marks=[graph], layout=layout)
                        
        toolbar = bq.Toolbar(figure=fig)
        
        return ipw.VBox([fig, toolbar])
        

In [53]:
class Task(object):
    "One step in a Workflow. Must have a unique name."
    def __init__(self, name, input_files=[], output_files=[], 
                 params={}, num_cores=1, task_type='',
                substitute_strings=[], substitute_lists=[],
                user_fields=[]):
        
        # Name of task (must be unique)
        self.name = name
        
        # Type of task (Notebook, CommandLine, etc.)
        self.task_type = task_type
        
        # List of other Tasks which must complete 
        # before this Task can be run.
        self.dependencies = []
        
        # List of Tasks which depend on this Task.
        self.children = []
        
        # Files which this Task takes as input 
        # and must be present before run.
        self.input_files = input_files
        
        # Files which are generated or modified by this Taks.
        self.output_files = output_files
        
        # Number of CPU cores to run the task on
        self.num_cores = num_cores
        
        # Map workflow to the node index which
        # represents this task in that workflow.
        # Tasks may be in multiple workflows,
        self.index = {}
        
        # Parameters to replace in other arguments
        self.params = params
        
        # List of names of fields to substitute params.
        # If a child class calls Task.__init__ with
        # substitute_strings or substitute_lists as
        # nonempty lists, they will be included here.
        self._substitute_strings = [
            'name',
            'task_type'
        ] + substitute_strings
        self._substitute_lists = [
            'input_files',
            'output_files'
        ] + substitute_lists
        
        self._substitute_fields()
        
        # Fields which are of interest to the user
        self.user_fields = [
            'name', 
            'task_type', 
            'input_files', 
            'output_files',
            'num_cores'
        ] + user_fields
    
    def get_user_dict(self):
        "Generate dictionary of user field names and values"
        return {
            field: getattr(self, field) 
            for field in self.user_fields
        }
            
    def _substitute_fields(self):
        "Replace fields according to params dict."
        for field in self._substitute_strings:
            # Read current value
            before = getattr(self,field)
            # Replace fields
            after = before.format(**self.params)
            # Write new value
            setattr(self, field, after)
            
        for list_name in self._substitute_lists:
            field_list = getattr(self, list_name)
            # Read current values
            for i, before in enumerate(field_list):
                # Replace fields
                after = before.format(**self.params)
                # Write to working copy of list
                field_list[i] = after
            # Write working copy to actual list
            setattr(self, list_name, field_list)
                
    def _run(self):
        """
        Run this Task. Should be executed by a Workflow.
        This function should be overloaded by child classes.
        """
        print("Task run.")
        

class NotebookTask(Task):
    """
    
    Jupyter Notebook which should appear as a node in the Workflow DAG.
    If interactive == True, a kernel will be started and the
    notebook will be opened for user to interact with.
    Workflow will be blocked in the meantime.
    If false, notebook will be executed without opening,
    and Workflow will continue upon successful execution.
    """
    def __init__(self, name, interactive=True, **kwargs):
        self.task_type = 'NotebookTask'
        self.interactive = interactive
        
        user_fields = ['interactive']
        
        super().__init__(
            name=name,
            user_fields=user_fields,
            **kwargs)
    
    def _run(self):
        print("Notebook run.")
    
    def _unblock(self):
        """
        Return control to Workflow after interactive notebook
        execution is complete.
        """
        pass

    
class CommandLineTask(Task):
    "Command Line Task to be executed as a Workflow step."
    def __init__(self, name, command, **kwargs):
        
        self.command = command
        
        user_fields = ['command']
        
        super().__init__(
            name=name,
            task_type='CommandLineTask',
            substitute_strings=['command'],
            user_fields=user_fields,
            **kwargs
        )
        
    
    def _run(self):
        print("Command Line run.")

        
class PythonFunctionTask(Task):
    "Python function call to be executed as a Workflow step."
    def __init__(self, name, fun, fun_args, fun_kwargs, **kwargs):
        # Actual callable function to be executed.
        self.fun = fun
        
        user_fields = ['fun_args', 'fun_kwargs']
        
        super().__init__(
            name=name, 
            task_type='PythonFunctionTask',
            user_fields=user_fields,
            **kwargs
        )
    
    def _run(self):
        print("Python function run.")
        return self.fun(*fun_args, **fun_kwargs)
    
class BatchTask(Task):
    "Task which will be submitted to a batch queue to execute."
    def __init__(self, name, batch_script, **kwargs):
        self.batch_script = batch_script
        
        user_fields = ['batch_script']
        
        super().__init__(
            name=name, 
            task_type='BatchTask',
            user_fields=user_fields,
            **kwargs
        )
        
    def _run(self):
        print("Batch run.")

# Droplet Workflow

In [54]:
droplet_wf = Workflow('Droplet Workflow')

# Radius of droplets (Angstroms)
droplet_radii = [20,30,40,50,60,100]
# Shape of droplets
shape = 'sphere'
# Base directory for computations
base_dir = '$SCRATCH/droplet'

# Number of substrate images in each dimension
nx, ny = 10, 10

# Number of parts (dump files) per simulation
parts_per_sim = 8

# Generate substrate
gen_mica_task = CommandLineTask(
    name='gen_mica_{nx}x{ny}',
    command='{base_dir}/gen_droplet/scripts/gen_mica.sh {nx} {ny} {out_file}',
    output_files = [
        "{out_file}"
    ],
    params=dict(
        base_dir=base_dir,
        nx=nx,
        ny=ny,
        out_file="{base_dir}/gen_droplet/lammps_data/mica_{nx}x{ny}.data".format(
            base_dir=base_dir,
            nx=nx,
            ny=ny
        )
    )
)
droplet_wf.add_task(gen_mica_task)

# Loop over droplet sizes
for radius in droplet_radii:
    # Create droplet
    gen_droplet_task = CommandLineTask(
        name="gen_droplet-{radius}A",
        command="{base_dir}/gen_droplet/bin/waterdroplet_tip4p_new.out {radius} {shape}",
        output_files = [
            "{out_file}"
        ],
        params=dict(
            base_dir=base_dir,
            radius=radius,
            shape=shape,
            out_file="{base_dir}/gen_droplet/dump/droplet_{radius}A.lammpstrj".format(
                base_dir=base_dir,
                radius=radius
            )
        )
    )      
    droplet_wf.add_task(gen_droplet_task)
    
    # Combine with substrate
    combine_task = CommandLineTask(
        name="combine-{radius}A",
        command="{base_dir}/gen_droplet/scripts/combine_sub_strip.pl {substrate} {film} {gap}",
        input_files = [
            "{substrate}",
            "{film}"
        ],
        output_files = [
            "{base_dir}/gen_droplet/lammps_data/droplet_on_mica-{radius}A.data"
        ],
        params=dict(
            base_dir=base_dir,
            radius=radius,
            substrate=gen_mica_task.output_files[0],
            film=gen_droplet_task.output_files[0],
            gap=radius,
        )
    )
    droplet_wf.add_task(
        combine_task,
        dependencies=[
            gen_mica_task,
            gen_droplet_task
        ]
    )
    
    simulate_task = BatchTask(
        name="simulate-{radius}A",
        batch_script="{base_dir}/sub_scripts/simulate_{radius}A.batch",
        input_files = [
            combine_task.output_files[0],
            "{base_dir}/lammps_scripts/simulate_{radius}A.batch"
        ],
        output_files = [
            "{base_dir}/data/{radius}A/atom"+str(part)
            for part in range(1,parts_per_sim+1)
        ],
        num_cores=parts_per_sim,
        params=dict(
            base_dir=base_dir,
            radius=radius,
        )
    )
    droplet_wf.add_task(
        simulate_task,
        dependencies=[combine_task]
    )
    
    # Analyze each part independently
    for part in range(1,parts_per_sim+1):
        parse_task = CommandLineTask(
            name='parse-{radius}A_atom{part}',
            command='{base_dir}/exec/parse.sh {infile} {outfile}',
            input_files = ["{infile}"],
            output_files = ["{outfile}"],
            params=dict(
                base_dir=base_dir,
                radius=radius,
                part=part,
                infile=simulate_task.output_files[part-1],
                outfile="{base_dir}/results/{radius}A/waters.txt".format(
                    base_dir=base_dir,
                    radius=radius
                )
            )
        )
        droplet_wf.add_task(
            parse_task,
            dependencies=[simulate_task]
        )
        
        analyze_task = CommandLineTask(
            name='analyze-{radius}A_atom{part}',
            command='{base_dir}/exec/analyze.sh {infile} {outfile}',
            input_files = ["{infile}"],
            output_files = ["{outfile}"],
            params=dict(
                base_dir=base_dir,
                radius=radius,
                part=part,
                infile=parse_task.output_files[0],
                outfile="{base_dir}/results/{radius}A/calculated.txt".format(
                    base_dir=base_dir,
                    radius=radius
                )
            )
        )
        
        droplet_wf.add_task(
            analyze_task,
            dependencies=[parse_task]
        )
    
    combine_parts_task = CommandLineTask(
        name='combine_parts-{radius}A',
        command='{base_dir}/results/combineParts.sh {radius}A',
        input_files = [
            "{base_dir}/results/{radius}A/atom"+str(part)+"/calculated.txt"
            for part in range(1,parts_per_sim+1)
        ],
        output_files=["{base_dir}/results/{radius}A/combined.txt"],
        params=dict(
            base_dir=base_dir,
            radius=radius,
        )
    )
    droplet_wf.add_task(
        combine_parts_task,
        dependencies=[droplet_wf.get_task_by_name(
            'analyze-{radius}A_atom{part}'.format(
                radius=radius,
                part=part
            )
        )
        for part in range(1,parts_per_sim+1)
        ]
    )
    
combine_sims_task = CommandLineTask(
    name='combine_sims',
    command='{base_dir}/results/combineSims.sh',
    input_files = [
        "{base_dir}/results/"+str(radius)+"A/combined.txt"
        for radius in droplet_radii
    ],
    output_files=["{base_dir}/results/allResults.txt"],
    params=dict(base_dir=base_dir)
)
droplet_wf.add_task(
    combine_sims_task,
    dependencies=[
        droplet_wf.get_task_by_name(
            'combine_parts-{radius}A'.format(
                radius=radius
            )
        )
        for radius in droplet_radii
    ]
)
analysis_notebook_task = NotebookTask(
    name='analysis_notebook',
    interactive=True,
)
droplet_wf.add_task(
    analysis_notebook_task,
    dependencies=[combine_sims_task]
)

In [55]:
fig = droplet_wf.draw_dag()
#tb = bq.Toolbar(figure=fig)
#q = display(fig,tb)
fig

A Jupyter Widget

In [56]:
fig.layout.width=u'400px'

In [57]:
fig.children[0].marks[0].selected

[]

# Workflow GUI Widget

In [58]:
class EditHTML(ipw.VBox):
    def __init__(self, value='', text_height=400):
        super().__init__()
        self.HTML = ipw.HTMLMath(value=value)
        self.Text = ipw.Textarea(value=value)
        self.ToggleButton = ipw.Button(description='Toggle')
        
        self.elements = [self.HTML, self.Text]
        self.descriptions = ['Edit', 'Render']
        ipw.jslink((self.HTML, 'value'), (self.Text, 'value'))
        
        # Set height and width of Textarea
        self.Text.layout.height = u'{}px'.format(text_height)
        self.Text.layout.width = u'95%'
        
        # Set HTML view by default
        self.set_view(0)
        
        self.ToggleButton.on_click(self.toggle)
    
    def set_view(self, state):
        self.state = state
        self.children = [self.elements[state], self.ToggleButton]
        self.ToggleButton.description = self.descriptions[state]
        
    def toggle(self, caller):
        self.set_view((self.state+1)%2)

In [59]:
class WorkflowWidget(ipw.HBox):
    "Widget to draw DAG via bqplot and provide node-level info/interaction."
    
    @property
    def bqgraph(self):
        return self.workflow.get_bqgraph()
    
    def __init__(self, workflow):
        super(WorkflowWidget, self).__init__()
        
        # Define variables
        self.workflow = workflow
        self._fig_layout = ipw.Layout(width='400px', height='600px')
        self._xs = bq.LinearScale()
        self._ys = bq.LinearScale()
        self._scales = {'x': self._xs, 'y': self._ys}
        mgin = 10
        
        # Define elements
        self._metadata_template = """
        Node name: {name}
        <br>
        Last modified: {date}
        <br>
        Description: {word}
        """
        self._metadata_html = ipw.HTML()
        
        readme_html = EditHTML(r"""
            <h1>Radiative Transfer</h1>

            The Radiative Transfer Equation is given by

            <p>
            $$\nabla I \cdot \omega = -c\, I(x, \omega) + \int_\Omega \beta(|\omega-\omega'|)\, I(x, \omega')$$
            </p>

            It is useful for
            <ul>
            <li>
            Stellar astrophysics
            </li>
            <li>
            Kelp
            </li>
            <li>
            Nice conversations
            </li>
            </ul>

            And is explained well by the following diagram.
            <br />
            <br />
            <img width=300px src="http://soap.siteturbine.com/uploaded_files/www.oceanopticsbook.info/images/WebBook/0dd27b964e95146d0af2052b67c7b5df.png" />
        """)
        self._notebook_button = ipw.Button(
            description='Open Notebook',
            button_style='success'
        )
        self._log_path_input = ipw.Text(
            description='Log path',
            value='/etc/login.defs'
        )
        self._log_html = ipw.HTML()
        
        self._readme_area = ipw.VBox([
            readme_html
        ])
        self._info_area = ipw.VBox([
            self._notebook_button,
            self._metadata_html
        ])
        self._log_area = ipw.VBox([
            self._log_path_input,
            self._log_html
        ])
        
        self._graph_container = workflow.draw_dag(layout=self._fig_layout)
        self._graph_figure = self._graph_container.children[0]
        
        self._tab = ipw.Tab([
            self._readme_area,
            self._info_area,
            self._log_area
        ])
        
        self.output_area = ipw.Output()
        
        # Define layout
        self.children = [
            self._graph_container,
            self._tab,
        ]
        
        # Set attributes
        self._tab.set_title(0, 'Readme')
        self._tab.set_title(1, 'Info')
        self._tab.set_title(2, 'Log')
        self._tab.layout.height = self._fig_layout.height
        self._tab.layout.width = self._fig_layout.width
        
        #self._graph_figure.layout.border = '3px red solid'
        self._graph_figure.fig_margin = dict(
            left=mgin,
            right=mgin,
            bottom=mgin,
            top=mgin
        )
        self._graph_figure.min_aspect_ratio = 0
        
        # Graph style
        self.bqgraph.selected_style = dict(
            stroke='red'
        )
        
        # Default selections
        self._tab.selected_index = 0
        self.bqgraph.selected = [0]
        
        # Logic
        self.bqgraph.observe(self._call_update_metadata_html, names='selected')
        self._log_path_input.on_submit(self._call_read_log)
        
        # Run updates
        self._call_read_log()
    
    def _update_metadata_html(self, metadata):
        html = "<br>".join([
            """
            <b>{key}:</b> {value}
            """.format(
                key=key,
                value=value
                )
            for key,value in metadata.items()
        ])
        
        with self.output_area:
            print(html)
        
        self._metadata_html.value = html
        
       
    def _call_update_metadata_html(self, change):
        # Newly selected node (workflow step)
        # (Only take first if several are selected)
        
        if change['new'] is None:
            metadata = {}
            
        else:
            node_num = change['new'][0]

            node = self.workflow.dag.nodes()[node_num]

            with self.output_area:
                print("Selected node {}".format(node_num))

            metadata = {
                attr: getattr(node, attr)
                for attr in node.user_fields
            }

        self._update_metadata_html(metadata)

    def _read_log(self, log_path):
        try:
            with open(log_path) as log_file:
                log_text = log_file.read()
        except IOError:
            log_text = 'Error opening {}'.format(log_path)
        
        self._log_html.value = log_text
    
    def _call_read_log(self, caller=None):
        log_path = self._log_path_input.value
        self._read_log(log_path)

In [60]:
w = WorkflowWidget(droplet_wf)
w

A Jupyter Widget

In [12]:
w.bqgraph.selected

[0]

In [33]:
droplet_wf.dag.nodes()

[]

In [13]:
w.output_area

A Jupyter Widget

In [14]:
w.bqgraph.node_data[0]

{'input_files': [],
 'label': '0',
 'name': 'gen_mica_10x10',
 'num_cores': 1,
 'output_files': ['$SCRATCH/droplet/gen_droplet/lammps_data/mica_10x10.data'],
 'shape': 'rect',
 'task_type': 'CommandLineTask'}

In [15]:
w.workflow.dag.nodes()[0].__dict__

{'_substitute_lists': ['input_files', 'output_files'],
 '_substitute_strings': ['name', 'task_type', 'command'],
 'children': [],
 'command': '$SCRATCH/droplet/gen_droplet/scripts/gen_mica.sh 10 10 $SCRATCH/droplet/gen_droplet/lammps_data/mica_10x10.data',
 'dependencies': [],
 'index': {<__main__.Workflow at 0x2aab22f8e278>: 0},
 'input_files': [],
 'name': 'gen_mica_10x10',
 'num_cores': 1,
 'output_files': ['$SCRATCH/droplet/gen_droplet/lammps_data/mica_10x10.data'],
 'params': {'base_dir': '$SCRATCH/droplet',
  'nx': 10,
  'ny': 10,
  'out_file': '$SCRATCH/droplet/gen_droplet/lammps_data/mica_10x10.data'},
 'task_type': 'CommandLineTask',
 'user_fields': ['name',
  'task_type',
  'input_files',
  'output_files',
  'num_cores']}

In [16]:
f = w._graph_figure

In [17]:
w.workflow.dag.nodes()[2].name

'combine-20A'

In [18]:
w.bqgraph.node_data[2]

{'input_files': ['$SCRATCH/droplet/gen_droplet/lammps_data/mica_10x10.data',
  '$SCRATCH/droplet/gen_droplet/dump/droplet_20A.lammpstrj'],
 'label': '2',
 'name': 'combine-20A',
 'num_cores': 1,
 'output_files': ['$SCRATCH/droplet/gen_droplet/lammps_data/droplet_on_mica-20A.data'],
 'shape': 'rect',
 'task_type': 'CommandLineTask'}

In [19]:
n.__dict__

NameError: name 'n' is not defined

In [None]:
f.scale_y

In [None]:
droplet_wf.get_bqgraph()