Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixes to the DAG path visualizations #518

Merged
merged 3 commits into from
Nov 8, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -168,9 +168,11 @@ You should see the following output:
4 40 200 33.333333 0.200
5 50 400 43.333333 0.125

You should see the following image if you ran `dr.visualize_execution(output_columns, './my-dag.dot', {})`:
You should see the following image if you ran `dr.visualize_execution(output_columns, './my-dag.dot', {"format": "png"}, orient="TB")`:

![hello_world_image](hello_world_image.png)
Note: we treat displaying `Inputs` in a special manner for readability in our visualizations. So you'll likely see input
nodes repeated.

Congratulations - you just created your Hamilton dataflow that created a dataframe!

Expand Down
Binary file modified examples/hello_world/a_path.dot.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified examples/hello_world/my_dag.dot.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
44 changes: 22 additions & 22 deletions examples/hello_world/my_notebook.ipynb

Large diffs are not rendered by default.

141 changes: 90 additions & 51 deletions examples/lineage/lineage_snippets.ipynb

Large diffs are not rendered by default.

Binary file modified examples/lineage/lineage_v1.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
116 changes: 111 additions & 5 deletions hamilton/driver.py
Original file line number Diff line number Diff line change
Expand Up @@ -551,6 +551,10 @@ def _visualize_execution_helper(
inputs: Dict[str, Any] = None,
graphviz_kwargs: dict = None,
overrides: Dict[str, Any] = None,
show_legend: bool = True,
orient: str = "LR",
hide_inputs: bool = False,
deduplicate_inputs: bool = False,
):
"""Helper function to visualize execution, using a passed-in function graph.

Expand All @@ -559,8 +563,13 @@ def _visualize_execution_helper(
:param render_kwargs:
:param inputs:
:param graphviz_kwargs:
:param show_legend:
:param orient:
:param hide_inputs:
:param deduplicate_inputs:
:return:
"""
# TODO should determine if the visualization logic should live here or in the graph.py module
nodes, user_nodes = fn_graph.get_upstream_nodes(final_vars, inputs, overrides)
Driver.validate_inputs(fn_graph, adapter, user_nodes, inputs, nodes)
node_modifiers = {fv: {graph.VisualizationNodeModifiers.IS_OUTPUT} for fv in final_vars}
Expand All @@ -585,6 +594,10 @@ def _visualize_execution_helper(
graphviz_kwargs=graphviz_kwargs,
node_modifiers=node_modifiers,
strictly_display_only_passed_in_nodes=True,
show_legend=show_legend,
orient=orient,
hide_inputs=hide_inputs,
deduplicate_inputs=deduplicate_inputs,
)
except ImportError as e:
logger.warning(f"Unable to import {e}", exc_info=True)
Expand All @@ -598,6 +611,10 @@ def visualize_execution(
inputs: Dict[str, Any] = None,
graphviz_kwargs: dict = None,
overrides: Dict[str, Any] = None,
show_legend: bool = True,
orient: str = "LR",
hide_inputs: bool = False,
deduplicate_inputs: bool = False,
) -> Optional["graphviz.Digraph"]: # noqa F821
"""Visualizes Execution.

Expand All @@ -620,6 +637,13 @@ def visualize_execution(
E.g. dict(graph_attr={'ratio': '1'}) will set the aspect ratio to be equal of the produced image.
See https://graphviz.org/doc/info/attrs.html for options.
:param overrides: Optional. Overrides to the DAG.
:param show_legend: If True, add a legend to the visualization based on the DAG's nodes.
:param orient: `LR` stands for "left to right". Accepted values are TB, LR, BT, RL.
`orient` will be overwridden by the value of `graphviz_kwargs['graph_attr']['rankdir']`
see (https://graphviz.org/docs/attr-types/rankdir/)
:param hide_inputs: If True, no input nodes are displayed.
:param deduplicate_inputs: If True, remove duplicate input nodes.
Can improve readability depending on the specifics of the DAG.
:return: the graphviz object if you want to do more with it.
If returned as the result in a Jupyter Notebook cell, it will render.
"""
Expand All @@ -633,6 +657,10 @@ def visualize_execution(
inputs,
graphviz_kwargs,
overrides,
show_legend=show_legend,
orient=orient,
hide_inputs=hide_inputs,
deduplicate_inputs=deduplicate_inputs,
)

@capture_function_usage
Expand Down Expand Up @@ -665,6 +693,10 @@ def display_downstream_of(
output_file_path: str = None,
render_kwargs: dict = None,
graphviz_kwargs: dict = None,
show_legend: bool = True,
orient: str = "LR",
hide_inputs: bool = False,
deduplicate_inputs: bool = False,
) -> Optional["graphviz.Digraph"]: # noqa F821
"""Creates a visualization of the DAG starting from the passed in function name(s).

Expand All @@ -678,17 +710,37 @@ def display_downstream_of(
If you do not want to view the file, pass in `{'view':False}`.
:param graphviz_kwargs: Kwargs to be passed to the graphviz graph object to configure it.
E.g. dict(graph_attr={'ratio': '1'}) will set the aspect ratio to be equal of the produced image.
:param show_legend: If True, add a legend to the visualization based on the DAG's nodes.
:param orient: `LR` stands for "left to right". Accepted values are TB, LR, BT, RL.
`orient` will be overwridden by the value of `graphviz_kwargs['graph_attr']['rankdir']`
see (https://graphviz.org/docs/attr-types/rankdir/)
:param hide_inputs: If True, no input nodes are displayed.
:param deduplicate_inputs: If True, remove duplicate input nodes.
Can improve readability depending on the specifics of the DAG.
:return: the graphviz object if you want to do more with it.
If returned as the result in a Jupyter Notebook cell, it will render.
"""
downstream_nodes = self.graph.get_downstream_nodes(list(node_names))

nodes_to_display = set()
for n in downstream_nodes:
nodes_to_display.add(n)

for d in n.dependencies:
if d not in downstream_nodes:
nodes_to_display.add(d)

try:
return self.graph.display(
downstream_nodes,
nodes_to_display,
output_file_path,
render_kwargs=render_kwargs,
graphviz_kwargs=graphviz_kwargs,
strictly_display_only_passed_in_nodes=False,
strictly_display_only_passed_in_nodes=True,
show_legend=show_legend,
orient=orient,
hide_inputs=hide_inputs,
deduplicate_inputs=deduplicate_inputs,
)
except ImportError as e:
logger.warning(f"Unable to import {e}", exc_info=True)
Expand All @@ -700,6 +752,10 @@ def display_upstream_of(
output_file_path: str = None,
render_kwargs: dict = None,
graphviz_kwargs: dict = None,
show_legend: bool = True,
orient: str = "LR",
hide_inputs: bool = False,
deduplicate_inputs: bool = False,
) -> Optional["graphviz.Digraph"]: # noqa F821
"""Creates a visualization of the DAG going backwards from the passed in function name(s).

Expand All @@ -713,6 +769,13 @@ def display_upstream_of(
If you do not want to view the file, pass in `{'view':False}`. Optional.
:param graphviz_kwargs: Kwargs to be passed to the graphviz graph object to configure it.
E.g. dict(graph_attr={'ratio': '1'}) will set the aspect ratio to be equal of the produced image. Optional.
:param show_legend: If True, add a legend to the visualization based on the DAG's nodes.
:param orient: `LR` stands for "left to right". Accepted values are TB, LR, BT, RL.
`orient` will be overwridden by the value of `graphviz_kwargs['graph_attr']['rankdir']`
see (https://graphviz.org/docs/attr-types/rankdir/)
:param hide_inputs: If True, no input nodes are displayed.
:param deduplicate_inputs: If True, remove duplicate input nodes.
Can improve readability depending on the specifics of the DAG.
:return: the graphviz object if you want to do more with it.
If returned as the result in a Jupyter Notebook cell, it will render.
"""
Expand All @@ -726,8 +789,12 @@ def display_upstream_of(
output_file_path,
render_kwargs=render_kwargs,
graphviz_kwargs=graphviz_kwargs,
strictly_display_only_passed_in_nodes=False,
strictly_display_only_passed_in_nodes=True,
skrawcz marked this conversation as resolved.
Show resolved Hide resolved
node_modifiers=node_modifiers,
show_legend=show_legend,
orient=orient,
hide_inputs=hide_inputs,
deduplicate_inputs=deduplicate_inputs,
)
except ImportError as e:
logger.warning(f"Unable to import {e}", exc_info=True)
Expand Down Expand Up @@ -792,6 +859,10 @@ def visualize_path_between(
render_kwargs: dict = None,
graphviz_kwargs: dict = None,
strict_path_visualization: bool = False,
show_legend: bool = True,
orient: str = "LR",
hide_inputs: bool = False,
deduplicate_inputs: bool = False,
) -> Optional["graphviz.Digraph"]: # noqa F821
"""Visualizes the path between two nodes.

Expand All @@ -807,6 +878,13 @@ def visualize_path_between(
E.g. dict(graph_attr={'ratio': '1'}) will set the aspect ratio to be equal of the produced image.
:param strict_path_visualization: If True, only the nodes in the path will be visualized. If False, the
nodes in the path and their dependencies, i.e. parents, will be visualized.
:param show_legend: If True, add a legend to the visualization based on the DAG's nodes.
:param orient: `LR` stands for "left to right". Accepted values are TB, LR, BT, RL.
`orient` will be overwridden by the value of `graphviz_kwargs['graph_attr']['rankdir']`
see (https://graphviz.org/docs/attr-types/rankdir/)
:param hide_inputs: If True, no input nodes are displayed.
:param deduplicate_inputs: If True, remove duplicate input nodes.
Can improve readability depending on the specifics of the DAG.
:return: graphviz object.
:raise ValueError: if the upstream or downstream node names are not found in the graph,
or there is no path between them.
Expand Down Expand Up @@ -839,14 +917,27 @@ def visualize_path_between(
if n.name not in node_modifiers:
node_modifiers[n.name] = set()
node_modifiers[n.name].add(graph.VisualizationNodeModifiers.IS_PATH)

nodes_to_display = set()
for n in nodes_for_path:
nodes_to_display.add(n)

if strict_path_visualization is False:
for d in n.dependencies:
nodes_to_display.add(d)

try:
return self.graph.display(
nodes_for_path,
nodes_to_display,
output_file_path,
render_kwargs=render_kwargs,
graphviz_kwargs=graphviz_kwargs,
node_modifiers=node_modifiers,
strictly_display_only_passed_in_nodes=strict_path_visualization,
strictly_display_only_passed_in_nodes=True,
skrawcz marked this conversation as resolved.
Show resolved Hide resolved
show_legend=show_legend,
orient=orient,
hide_inputs=hide_inputs,
deduplicate_inputs=deduplicate_inputs,
)
except ImportError as e:
logger.warning(f"Unable to import {e}", exc_info=True)
Expand Down Expand Up @@ -1094,6 +1185,10 @@ def visualize_materialization(
inputs: Dict[str, Any] = None,
graphviz_kwargs: dict = None,
overrides: Dict[str, Any] = None,
show_legend: bool = True,
orient: str = "LR",
hide_inputs: bool = False,
deduplicate_inputs: bool = False,
) -> Optional["graphviz.Digraph"]: # noqa F821
"""Visualizes materialization. This helps give you a sense of how materialization
will impact the DAG.
Expand All @@ -1105,6 +1200,13 @@ def visualize_materialization(
:param inputs: Inputs to pass to execution. Optional.
:param graphviz_kwargs: Arguments to pass to graphviz. Optional.
:param overrides: Overrides to pass to execution. Optional.
:param show_legend: If True, add a legend to the visualization based on the DAG's nodes.
:param orient: `LR` stands for "left to right". Accepted values are TB, LR, BT, RL.
`orient` will be overwridden by the value of `graphviz_kwargs['graph_attr']['rankdir']`
see (https://graphviz.org/docs/attr-types/rankdir/)
:param hide_inputs: If True, no input nodes are displayed.
:param deduplicate_inputs: If True, remove duplicate input nodes.
Can improve readability depending on the specifics of the DAG.
:return: The graphviz graph, if you want to do something with it
"""
if additional_vars is None:
Expand All @@ -1125,6 +1227,10 @@ def visualize_materialization(
inputs,
graphviz_kwargs,
overrides,
show_legend=show_legend,
orient=orient,
hide_inputs=hide_inputs,
deduplicate_inputs=deduplicate_inputs,
)

def validate_execution(
Expand Down
43 changes: 26 additions & 17 deletions hamilton/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ def create_graphviz_graph(
Can improve readability depending on the specifics of the DAG.
:return: a graphviz.Digraph; use this to render/save a graph representation.
"""
PATH_COLOR = "#7A3B69"
PATH_COLOR = "red"

import graphviz

Expand Down Expand Up @@ -283,7 +283,6 @@ def _get_edge_style(from_type: str, to_type: str) -> Dict:
edge_style = dict()

if from_type == "expand":
print(from_type, to_type)
edge_style.update(
dir="both",
arrowhead="crow",
Expand Down Expand Up @@ -354,21 +353,6 @@ def _get_legend(node_types: Set[str]):
# prefer having the conditions explicit for now since they rely on
# heterogeneous VisualizationNodeModifiers and node.Node.node_role.
# Otherwise, it's difficult to manage seen nodes and the legend.
if node_modifiers.get(n.name):
modifiers = node_modifiers[n.name]
if VisualizationNodeModifiers.IS_OUTPUT in modifiers:
modifier_style = _get_function_modifier_style("output")
node_style.update(**modifier_style)
seen_node_types.add("output")

if VisualizationNodeModifiers.IS_OVERRIDE in modifiers:
modifier_style = _get_function_modifier_style("override")
node_style.update(**modifier_style)
seen_node_types.add("override")

if VisualizationNodeModifiers.IS_PATH in modifiers:
node_style["color"] = PATH_COLOR

if n.node_role == node.NodeType.EXPAND:
modifier_style = _get_function_modifier_style("expand")
node_style.update(**modifier_style)
Expand All @@ -386,12 +370,30 @@ def _get_legend(node_types: Set[str]):
node_style.update(**modifier_style)
seen_node_types.add("materializer")

if node_modifiers.get(n.name):
modifiers = node_modifiers[n.name]
if VisualizationNodeModifiers.IS_OUTPUT in modifiers:
modifier_style = _get_function_modifier_style("output")
node_style.update(**modifier_style)
seen_node_types.add("output")

if VisualizationNodeModifiers.IS_OVERRIDE in modifiers:
modifier_style = _get_function_modifier_style("override")
node_style.update(**modifier_style)
seen_node_types.add("override")

if VisualizationNodeModifiers.IS_PATH in modifiers:
# use PATH_COLOR only if no color applied, edges provide enough clarity
# currently, only EXPAND and COLLECT use the `color` attribue
node_style["color"] = node_style.get("color", PATH_COLOR)

digraph.node(n.name, label=label, **node_style)

# create edges
input_sets = dict()
for n in nodes:
to_type = "collect" if n.node_role == node.NodeType.COLLECT else ""
to_modifiers = node_modifiers.get(n.name, set())

input_nodes = set()
for d in n.dependencies:
Expand All @@ -406,7 +408,14 @@ def _get_legend(node_types: Set[str]):
continue

from_type = "expand" if d.node_role == node.NodeType.EXPAND else ""
dependency_modifiers = node_modifiers.get(d.name, set())
edge_style = _get_edge_style(from_type, to_type)
if (
VisualizationNodeModifiers.IS_PATH in dependency_modifiers
and VisualizationNodeModifiers.IS_PATH in to_modifiers
):
edge_style["color"] = PATH_COLOR

digraph.edge(d.name, n.name, **edge_style)

# skip input node creation
Expand Down
Binary file modified hello_world_image.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.