We create a [Sankey diagram](https://plotly.com/python/sankey-diagram/) here to depict the data flows for hypothesis validation.

In [1]:
import plotly.graph_objects as go

In [5]:
fig = go.Figure(data=[go.Sankey(
    node = dict(
      pad = 15,
      thickness = 20,
      line = dict(color = "black", width = 0.5),
      label = ["BRENDA", "EC class file", "SAd", # sources [0,1,2]
               "Mystery Process A", "Mystery Process B", "Mystery Process C", # processing [3,4,5]
               "Top 10 Consensus", "Summary Statistics", "ENIGMA + SAdLSA/AF"], # final output [6,7,8]
      customdata = ["BRENDA", "EC class file", "SAdLSA", 
                    "Long name B2", "Long name C1", "Long name C2",
                   "Find consensus between EC predicted across top 10 and from TMalign/AF for a protein",
                   "Get summary statistics across the full dataset",
                   "Map ENIGMA fitness data to hypotheses from SAdLSA/AF"],
      hovertemplate='Node %{customdata} has total value %{value}<extra></extra>',
      color = "blue"
    ),
    link = dict(
      source = [0, 1, 2, 0, 1, 2, 3, 4, 5, 3, 3, 4], # indices correspond to labels, eg A1, A2, A2, B1, ...
      target = [3, 3, 3, 4, 4, 5, 6, 7, 8, 7, 8, 8],
      value = [8, 4, 2, 8, 4, 2, 1, 2, 3, 4, 5, 6],
      customdata = ["q","r","s","t","u","v"],
      hovertemplate='Link from node %{source.customdata}<br />'+
        'to node%{target.customdata}<br />has value %{value}'+
        '<br />and data %{customdata}<extra></extra>',
  ))])

fig.update_layout(title_text="Deep Learning-based Functional Annotation (DLFA) Pipeline", font_size=10)
fig.show()