In [1]:
import plotly.graph_objects as go

# List of labels for nodes
labels = [
    "Data Pool", 
    "Image Data", "Video Data", "Text Data", 
    "I Preprocess", "V Preprocess", "T Preprocess",
    "I Filter", "V Filter", "T Filter",
    "I Training", "V Training", "T Training",
    "I Eval", "V Eval", "T Eval"
]

# Define flow links
source = [
    0, 0, 0,  # Pool -> modalities
    1, 2, 3,  # modalities -> preprocess
    4, 5, 6,  # preprocess -> filter
    7, 8, 9,  # filter -> training
    10, 11, 12  # training -> eval
]

target = [
    1, 2, 3,
    4, 5, 6,
    7, 8, 9,
    10, 11, 12,
    13, 14, 15
]

values = [
    100, 120, 80,  # Data Pool distribution
    90, 110, 60,   # After preprocess
    75, 95, 50,    # After filter
    60, 70, 40,    # Training
    55, 65, 35     # Evaluation
]

fig = go.Figure(data=[go.Sankey(
    node=dict(
        pad=20,
        thickness=25,
        line=dict(color="black", width=0.5),
        label=labels,
        color="rgba(66, 135, 245, 0.7)"
    ),
    link=dict(
        source=source,
        target=target,
        value=values,
        color=[
            "rgba(255,0,0,0.4)",
            "rgba(255,165,0,0.4)",
            "rgba(255,215,0,0.4)",
            "rgba(0,128,0,0.4)",
            "rgba(0,128,128,0.4)",
            "rgba(0,255,255,0.4)",
            "rgba(0,0,255,0.4)",
            "rgba(139,0,255,0.4)",
            "rgba(255,0,255,0.4)",
            "rgba(220,20,60,0.4)",
            "rgba(34,139,34,0.4)",
            "rgba(0,191,255,0.4)",
            "rgba(255,140,0,0.4)",
            "rgba(148,0,211,0.4)",
            "rgba(199,21,133,0.4)"
        ]
    )
)])

fig.update_layout(title_text="Complex Multi-Stage Data Pipeline - Sankey Diagram", font_size=14)
fig.show()


In [2]:
import plotly.graph_objects as go

labels = [
    "Pool",
    "Image Data", "Video Data", "Text Data",
    "Image Preprocess", "Video Preprocess", "Text Preprocess",
    "Image Filter", "Video Filter", "Text Filter",
    "Image Training", "Video Training", "Text Training",
    "Image Eval", "Video Eval", "Text Eval"
]

# Cross-linked flows
source = [
    0,0,0,               # Pool → Modalities
    1,2,3,               # Modalities → Preprocess
    4,5,6,               # Preprocess → Filter
    7,8,9,               # Filter → Training
    10,11,12,            # Training → Eval

    # ★ Crosslinks ★
    1,                   # Image Data → Video Preprocess
    2,                   # Video Data → Image Filter
    8,                   # Video Filter → Text Training
    10,                  # Image Training → Video Eval
    5,                   # Video Preprocess → Text Eval
    7,                   # Image Filter → Text Eval
    3,                   # Text Data → Image Training
]

target = [
    1,2,3,
    4,5,6,
    7,8,9,
    10,11,12,
    13,14,15,

    # ★ Crosslinks ★
    5,                   # ID → VP
    7,                   # VD → IF
    12,                  # VF → TT
    14,                  # IT → VE
    15,                  # VP → TE
    15,                  # IF → TE
    10,                  # TD → IT
]

values = [
    150,150,100,
    140,130,90,
    120,110,80,
    100,95,70,
    90,85,60,

    # ★ Crosslinks ★
    30, 20, 15, 10, 25, 20, 18
]

fig = go.Figure(data=[go.Sankey(
    arrangement="freeform",
    node=dict(
        label=labels,
        pad=18,
        thickness=22,
        line=dict(color="black", width=0.6)
    ),
    link=dict(
        source=source,
        target=target,
        value=values,
        color="rgba(0,0,0,0.25)"  # keep neutral so crossing edges visible
    )
)])

fig.update_layout(
    title_text="Complex Data Processing Pipeline with Cross-Stage Links",
    font_size=13
)

fig.show()
