From c85790b3d8bef6e60508629bbc60637c33a86365 Mon Sep 17 00:00:00 2001
From: Lorenzo Cavazzi <43481553+lorenzo-cavazzi@users.noreply.github.com>
Date: Mon, 25 Jul 2022 19:11:14 +0200
Subject: [PATCH] feat(workflow): add dot output on workflow visualize (#3032)

- Fixes also a bug when providing the wrong columns

fix #2376

Co-authored-by: Ralf Grubenmann <ralf.grubenmann@sdsc.ethz.ch>
---
 docs/spelling_wordlist.txt                 |  1 +
 renku/command/format/workflow.py           |  5 ++
 renku/command/view_model/activity_graph.py | 86 +++++++++++++++++++-
 renku/ui/cli/workflow.py                   | 95 ++++++++++++++--------
 tests/cli/test_workflow.py                 | 18 ++++
 5 files changed, 172 insertions(+), 33 deletions(-)

diff --git a/docs/spelling_wordlist.txt b/docs/spelling_wordlist.txt
index e62b7e40b2..5dc1224226 100644
--- a/docs/spelling_wordlist.txt
+++ b/docs/spelling_wordlist.txt
@@ -79,6 +79,7 @@ Fortran
 GitLab
 GitPython
 GraphQL
+graphviz
 gapped
 git-lfs
 gitattributes
diff --git a/renku/command/format/workflow.py b/renku/command/format/workflow.py
index 619f303266..2e7d3f8ed8 100644
--- a/renku/command/format/workflow.py
+++ b/renku/command/format/workflow.py
@@ -91,3 +91,8 @@ def json(workflows, **kwargs):
     "description": ("short_description", "description"),
     "command": ("full_command", "command"),
 }
+
+WORKFLOW_VISUALIZE_FORMATS = {
+    "console": "console",
+    "dot": "dot",
+}
diff --git a/renku/command/view_model/activity_graph.py b/renku/command/view_model/activity_graph.py
index ccfea0eec5..c7d98e7f08 100644
--- a/renku/command/view_model/activity_graph.py
+++ b/renku/command/view_model/activity_graph.py
@@ -18,9 +18,12 @@
 """Activity graph view model."""
 
 from datetime import datetime
+from itertools import repeat
 from textwrap import shorten
 from typing import TYPE_CHECKING, Any, Callable, List, Optional, Tuple
 
+from renku.core import errors
+
 if TYPE_CHECKING:
     from grandalf.graphs import Edge
 
@@ -79,6 +82,45 @@ def _subgraph_order_key(self, subgraph) -> datetime:
 
         return max(activity_times)
 
+    def _format_vertex_raw(self, node, columns: List[Callable]) -> str:
+        """Return vertex text for a node.
+
+        Args:
+            node: The node to format.
+            columns (List[Callable]): The fields to include in the node text.
+
+        Returns:
+            string representation of node
+        """
+        import json
+
+        from renku.domain_model.provenance.activity import Activity
+
+        if isinstance(node, Activity):
+            text = "\n".join(c(node) for c in columns)
+        else:
+            text = node
+
+        # NOTE: double quotes are common in console command, repr() wouldn't escape properly
+        return json.dumps(text)
+
+    def _get_lambda_columns(self, columns):
+        """Return lambda columns.
+
+        Args:
+            columns (str): comma-separated column names.
+
+        Returns:
+            List[Callable] lambda columns
+        """
+
+        try:
+            return [ACTIVITY_GRAPH_COLUMNS[c] for c in columns.split(",")]
+        except KeyError as e:
+            wrong_values = ", ".join(e.args)
+            suggestion = ",".join(ACTIVITY_GRAPH_COLUMNS.keys())
+            raise errors.ParameterError(f"you can use any of '{suggestion}'", f"columns '{wrong_values}'")
+
     def layout_graph(self, columns):
         """Create a Sugiyama layout of the graph.
 
@@ -92,7 +134,7 @@ def layout_graph(self, columns):
 
         from renku.domain_model.provenance.activity import Activity
 
-        columns = [ACTIVITY_GRAPH_COLUMNS[c] for c in columns.split(",")]
+        columns = self._get_lambda_columns(columns)
 
         self.layouts: List[SugiyamaLayout] = []
 
@@ -184,6 +226,48 @@ def _add_edges_to_canvas(
         existing_edges.extend(new_edges)
         return max_y, edge_color
 
+    def dot_representation(self, columns: str) -> str:
+        """Return the graph as a Graphviz Dot string.
+
+        Args:
+            columns(str): Columns to include in node text.
+
+        Returns:
+            string representing the Graphviz Dot graph
+        """
+        import io
+
+        from renku.domain_model.provenance.activity import Activity
+
+        # compute node text
+        columns_callable = self._get_lambda_columns(columns)
+        activities_text = {}
+        for node in self.graph.nodes:
+            if isinstance(node, Activity):
+                output_text = "\n".join(c(node) for c in columns_callable)
+                activities_text[str(node)] = output_text
+
+        output = io.StringIO()
+        output.write("digraph {\n")
+
+        # add edges and track visited nodes
+        visited_nodes = []
+        for edge in self.graph.edges:
+            vertexes = tuple(map(self._format_vertex_raw, edge, repeat(columns_callable, 2)))
+            output.write(f"{vertexes[0]} -> {vertexes[1]};")
+            for vertex in vertexes:
+                if vertex not in visited_nodes:
+                    visited_nodes.append(vertex)
+
+        # add missing nodes
+        for node in self.graph.nodes:
+            lonely_node = self._format_vertex_raw(node, columns_callable)
+            if lonely_node not in visited_nodes:
+                output.write(f'"{lonely_node}";')
+
+        output.write("\n}")
+        return output.getvalue()
+
     def text_representation(
         self, columns: str, color: bool = True, ascii=False
     ) -> Tuple[Optional[str], Optional[List[List[Tuple["Point", "Point", Any]]]]]:
diff --git a/renku/ui/cli/workflow.py b/renku/ui/cli/workflow.py
index 97c0cff3d2..a01c16fe62 100644
--- a/renku/ui/cli/workflow.py
+++ b/renku/ui/cli/workflow.py
@@ -214,7 +214,7 @@
             --map output=output_{iter_index}.txt my-run
 
 This would execute ``my-run`` three times, where ``parameter-1`` values would be
-``10``, `20`` and ``30`` and the producing output files ``output_0.txt``,
+``10``, ``20`` and ``30`` and the producing output files ``output_0.txt``,
 ``output_1.txt`` and ``output_2.txt`` files in this order.
 
 In some cases it may be desirable to avoid updating the renku metadata
@@ -300,7 +300,7 @@
 
 .. code-block:: console
 
-   $ renku run --name step1-- cp input intermediate
+   $ renku run --name step1 -- cp input intermediate
    $ renku run --name step2 -- cp intermediate output
    $ renku workflow compose my-composed-workflow step1 step2
 
@@ -622,6 +622,18 @@
 This will allow you to navigate between workflow execution and see details
 by pressing the <Enter> key.
 
+If you prefer to elaborate the output graph further, or if you wish to export
+it for any reason, you can use the ``--format`` option to specify an output
+format.
+
+The following example generates the graph using the `dot` format. It can
+be stored in a file or piped directly to any compatible tool. Here we
+use the ``dot`` command line tool from graphviz to generate an SVG file.
+
+.. code-block:: console
+
+   $ renku workflow visualize --format dot <path> | dot -Tsvg > graph.svg
+
 Use ``renku workflow visualize -h`` to see all available options.
 
 .. cheatsheet::
@@ -702,7 +714,7 @@
 
 import renku.ui.cli.utils.color as color
 from renku.command.echo import ERROR
-from renku.command.format.workflow import WORKFLOW_COLUMNS, WORKFLOW_FORMATS
+from renku.command.format.workflow import WORKFLOW_COLUMNS, WORKFLOW_FORMATS, WORKFLOW_VISUALIZE_FORMATS
 from renku.command.view_model.activity_graph import ACTIVITY_GRAPH_COLUMNS
 from renku.core import errors
 from renku.ui.cli.utils.callback import ClickCallback
@@ -1148,24 +1160,32 @@ def execute(
 )
 @click.option("-x", "--exclude-files", is_flag=True, help="Hide file nodes, only show Runs.")
 @click.option("-a", "--ascii", is_flag=True, help="Only use Ascii characters for formatting.")
-@click.option("-i", "--interactive", is_flag=True, help="Interactively explore run graph.")
-@click.option("--no-color", is_flag=True, help="Don't colorize output.")
-@click.option("--pager", is_flag=True, help="Force use pager (less) for output.")
-@click.option("--no-pager", is_flag=True, help="Don't use pager (less) for output.")
 @click.option(
     "--revision",
     type=click.STRING,
     help="Git revision to generate the graph for.",
 )
+@click.option(
+    "--format",
+    type=click.Choice(list(WORKFLOW_VISUALIZE_FORMATS.keys())),
+    default="console",
+    help="Choose an output format.",
+)
+@click.option(
+    "-i", "--interactive", is_flag=True, help="Interactively explore run graph. Only avilable for console output"
+)
+@click.option("--no-color", is_flag=True, help="Don't colorize console output.")
+@click.option("--pager", is_flag=True, help="Force use pager (less) for console output.")
+@click.option("--no-pager", is_flag=True, help="Don't use pager (less) for console output.")
 @click.argument("paths", type=click.Path(exists=False, dir_okay=True), nargs=-1)
-def visualize(sources, columns, exclude_files, ascii, interactive, no_color, pager, no_pager, revision, paths):
+def visualize(sources, columns, exclude_files, ascii, revision, format, interactive, no_color, pager, no_pager, paths):
     """Visualization of workflows that produced outputs at the specified paths.
 
     Either PATHS or --from need to be set.
     """
     from renku.command.workflow import visualize_graph_command
 
-    if pager and no_pager:
+    if format == WORKFLOW_VISUALIZE_FORMATS["console"] and pager and no_pager:
         raise errors.ParameterError("Can't use both --pager and --no-pager.")
     if revision and not paths:
         raise errors.ParameterError("Can't use --revision without specifying PATHS.")
@@ -1175,36 +1195,47 @@ def visualize(sources, columns, exclude_files, ascii, interactive, no_color, pag
         .build()
         .execute(sources=sources, targets=paths, show_files=not exclude_files, revision=revision)
     )
-    text_output, navigation_data = result.output.text_representation(columns=columns, color=not no_color, ascii=ascii)
+    if format == WORKFLOW_VISUALIZE_FORMATS["dot"]:
+        output = result.output.dot_representation(columns=columns)
 
-    if not text_output:
+        if not output:
+            return
+
+        click.echo(output)
         return
+    else:
+        text_output, navigation_data = result.output.text_representation(
+            columns=columns, color=not no_color, ascii=ascii
+        )
 
-    if not interactive:
-        max_width = max(node[1].x for layer in navigation_data for node in layer)
-        tty_size = shutil.get_terminal_size(fallback=(120, 120))
+        if not text_output:
+            return
 
-        if no_pager or not sys.stdout.isatty() or os.system(f"less 2>{os.devnull}") != 0:
-            use_pager = False
-        elif pager:
-            use_pager = True
-        elif max_width < tty_size.columns:
-            use_pager = False
-        else:
-            use_pager = True
+        if not interactive:
+            max_width = max(node[1].x for layer in navigation_data for node in layer)
+            tty_size = shutil.get_terminal_size(fallback=(120, 120))
 
-        if use_pager:
-            show_text_with_pager(text_output)
-        else:
-            click.echo(text_output)
-        return
+            if no_pager or not sys.stdout.isatty() or os.system(f"less 2>{os.devnull}") != 0:
+                use_pager = False
+            elif pager:
+                use_pager = True
+            elif max_width < tty_size.columns:
+                use_pager = False
+            else:
+                use_pager = True
 
-    from renku.ui.cli.utils.curses import CursesActivityGraphViewer
+            if use_pager:
+                show_text_with_pager(text_output)
+            else:
+                click.echo(text_output)
+            return
 
-    viewer = CursesActivityGraphViewer(
-        text_output, navigation_data, result.output.vertical_space, use_color=not no_color
-    )
-    viewer.run()
+        from renku.ui.cli.utils.curses import CursesActivityGraphViewer
+
+        viewer = CursesActivityGraphViewer(
+            text_output, navigation_data, result.output.vertical_space, use_color=not no_color
+        )
+        viewer.run()
 
 
 @workflow.command()
diff --git a/tests/cli/test_workflow.py b/tests/cli/test_workflow.py
index 013765f9ee..926eb0c61f 100644
--- a/tests/cli/test_workflow.py
+++ b/tests/cli/test_workflow.py
@@ -917,6 +917,24 @@ def test_workflow_visualize_non_interactive(runner, project, client, workflow_gr
     assert "H" in result.output
 
 
+def test_workflow_visualize_dot(runner, project, client, workflow_graph):
+    """Test renku workflow visualize dot format."""
+
+    result = runner.invoke(cli, ["workflow", "visualize", "--format", "dot", "--revision", "HEAD^", "H", "S"])
+
+    assert 0 == result.exit_code, format_result_exception(result)
+    assert '"Y" -> "bash -c \\"cat X Y | tee R S\\"";' in result.output
+    assert '"X" -> "bash -c \\"cat X Y | tee R S\\"";' in result.output
+    assert '"bash -c \\"cat X Y | tee R S\\"" -> "R";' in result.output
+    assert '"bash -c \\"cat X Y | tee R S\\"" -> "S";' in result.output
+    assert 4 == result.output.count('"bash -c \\"cat X Y | tee R S\\"')
+
+    assert 1 == result.output.count('"echo other > H" -> "H"')
+    assert 1 == result.output.count('-> "H"')
+    assert 0 == result.output.count('"H" -->')
+    assert 1 == result.output.count('"H"')
+
+
 @pytest.mark.skip(
     "Doesn't actually work, not really a tty available in github actions, "
     "see https://github.com/actions/runner/issues/241"