In [23]:
# ================================
# DAMA-Aligned GBP Reporting DFD
# WITH ORCHESTRATION LAYER ADDED
# (No changes to your existing structure)
# Colab-Executable Production Code
# ================================

# Install dependencies (Colab safe)
!apt-get -qq install graphviz
!pip install -q graphviz

from graphviz import Digraph

dfd = Digraph("GBP_DFD_DAMA_Compliant", format="pdf")
dfd.attr(rankdir="LR", splines="ortho", nodesep="1.6", ranksep="2.0")
dfd.attr(fontname="Helvetica")
# Set page size to A3 (11.7 x 16.5 inches) for better legibility
dfd.attr(size="11.7,16.5")

# ==========================================================
# SYSTEM BOUNDARY
# ==========================================================
with dfd.subgraph(name="cluster_system") as system:
    system.attr(label="GBP Reporting System",
                style="dashed",
                fontsize="16",
                fontname="Helvetica")

    # ======================================================
    # SOURCE SYSTEMS LAYER
    # ======================================================
    with system.subgraph(name="cluster_sources") as sources:
        sources.attr(label="Source Systems\nOwner: IT Operations Team",
                     style="filled",
                     color="lightgrey")

        sources.attr("node", shape="box", style="filled", fillcolor="#ff4d6d")

        sources.node("CRM", "CRM\n(Customers)\nOwner: CRM Admin")
        sources.node("ERP", "ERP\n(Orders)\nOwner: ERP Admin")
        sources.node("Inventory", "Inventory\n(Products)\nOwner: Inventory Manager")
        sources.node("Shipping", "Shipping\n(Logistics)\nOwner: Logistics Manager")
        sources.node("Returns", "Returns\n(RMA)\nOwner: Returns Manager")
        sources.node("HR", "HR\n(People)\nOwner: HR Manager")

    # ======================================================
    # ORCHESTRATION LAYER (ADDED ONLY)
    # ======================================================
    with system.subgraph(name="cluster_orchestration") as orchestration:
        orchestration.attr(label="Workflow Orchestration\nOwner: Data Engineering Team",
                           style="filled",
                           color="mistyrose")

        orchestration.attr("node", shape="hexagon", style="filled", fillcolor="#c27ba0")

        orchestration.node("Orchestrator",
                           "Workflow Orchestrator\nSchedule: Daily 12:00 AM\nDuration: ~2 Hours")

    # ======================================================
    # DATA INTEGRATION LAYER
    # ======================================================
    with system.subgraph(name="cluster_integration") as integration:
        integration.attr(label="Data Integration\nOwner: Data Engineering Team",
                         style="filled",
                         color="lightyellow")

        integration.attr("node", shape="box", style="rounded,filled")

        integration.node("ETL",
                         "Extract → Transform → Load (ETL)\nOwner: Data Engineers",
                         fillcolor="#7fbf7f")

        integration.node("DQ",
                         "Data Quality Validation\nQuality Checkpoint\nOwner: QA Analysts",
                         fillcolor="#f6b26b")

        integration.node("Integrate",
                         "Integrate All Sources\nOwner: Data Engineers",
                         fillcolor="#7fbf7f")

    # ======================================================
    # MASTER DATA LAYER
    # ======================================================
    with system.subgraph(name="cluster_master") as master:
        master.attr(label="Master Data\nOwner: Data Governance Team",
                    style="filled",
                    color="lightcyan")

        master.attr("node", shape="cylinder", style="filled")

        master.node("MasterDataset",
                    "GBP_DataSource\nMaster Dataset\n(10,020 rows)",
                    fillcolor="#f6b26b")

        master.node("OrdersTable", "Orders Table", fillcolor="#dddddd")
        master.node("ReturnsTable", "Returns Table", fillcolor="#dddddd")
        master.node("PeopleTable", "People Table", fillcolor="#dddddd")

        master.node("CleanDataset",
                    "Clean Dataset\n(9,997 rows)\nDeduplicated & Aligned 3NF\nOwner: Data Steward",
                    fillcolor="#93c47d")

    # ======================================================
    # ANALYTICS LAYER
    # ======================================================
    with system.subgraph(name="cluster_analytics") as analytics:
        analytics.attr(label="Analytics & Reporting\nOwner: Business Intelligence Team",
                       style="filled",
                       color="white")

        analytics.attr("node", shape="box", style="rounded,filled", fillcolor="#6fa8dc")

        analytics.node("Warehouse", "Data Warehouse\nOwner: BI Architect")
        analytics.node("Operational", "Operational Reports\nOwner: BI Developer")
        analytics.node("Dashboards", "Executive Dashboards\nOwner: BI Developer")
        analytics.node("Adhoc", "Ad-hoc Analysis\nOwner: Data Analyst")

# ==========================================================
# DATA FLOWS (Using xlabel to fix orthogonal warning)
# ==========================================================

dfd.edge("CRM", "ETL", xlabel="Customer Data")
dfd.edge("ERP", "ETL", xlabel="Order Transactions")
dfd.edge("Inventory", "ETL", xlabel="Product Catalog")
dfd.edge("Shipping", "ETL", xlabel="Shipping Data")
dfd.edge("Returns", "ETL", xlabel="Return Records")
dfd.edge("HR", "ETL", xlabel="Employee Data")

dfd.edge("ETL", "DQ", xlabel="Raw Integrated Data")
dfd.edge("DQ", "Integrate", xlabel="Validated Data")
dfd.edge("Integrate", "MasterDataset", xlabel="Integrated Master Data")

dfd.edge("MasterDataset", "OrdersTable", xlabel="Order Entities")
dfd.edge("MasterDataset", "ReturnsTable", xlabel="Return Entities")
dfd.edge("PeopleTable", "People Table", xlabel="People Entities")

dfd.edge("OrdersTable", "CleanDataset")
dfd.edge("ReturnsTable", "CleanDataset")
dfd.edge("PeopleTable", "CleanDataset")

dfd.edge("CleanDataset", "Warehouse", xlabel="Final Enriched Reporting Output")
dfd.edge("Warehouse", "Operational")
dfd.edge("Warehouse", "Dashboards")
dfd.edge("Warehouse", "Adhoc")

# ==========================================================
# ORCHESTRATION CONTROL FLOWS (DASHED)
# ==========================================================

dfd.edge("Orchestrator", "ETL", style="dashed", xlabel="Trigger ETL Job")
dfd.edge("ETL", "DQ", style="dashed", xlabel="On Success")
dfd.edge("DQ", "Integrate", style="dashed", xlabel="If Quality Passed")
dfd.edge("Integrate", "MasterDataset", style="dashed", xlabel="Load Master")
dfd.edge("MasterDataset", "CleanDataset", style="dashed", xlabel="Dedup & Align 3NF")
dfd.edge("CleanDataset", "Warehouse", style="dashed", xlabel="Publish to BI")

dfd.edge("DQ", "Orchestrator",
         style="dashed",
         color="red",
         xlabel="On Failure → Alert & Stop")

# ==========================================================
# RENDER OUTPUT
# ==========================================================

dfd.render("THE_FINALE_GBP_DFD_DAMA_Compliant", view=False)

'THE_FINALE_GBP_DFD_DAMA_Compliant.pdf'

In [24]:
# ================================
# DAMA-Aligned GBP Reporting DFD
# WITH CLEAN ORCHESTRATION + FIXED LAYOUT
# Colab-Executable Production Code
# ================================

!apt-get -qq install graphviz
!pip install -q graphviz

from graphviz import Digraph

dfd = Digraph("GBP_DFD_DAMA_Compliant", format="pdf")

# ---- GLOBAL LAYOUT FIXES ----
dfd.attr(
    rankdir="LR",
    splines="ortho",
    nodesep="1.2",          # more horizontal spacing
    ranksep="1.5",          # more vertical spacing
    fontsize="12",
    fontname="Helvetica",
    pad="0.5",
    size="24,14!",          # LARGE canvas size (fixes compression)
)

dfd.attr(
    label="GBP Reporting System – DAMA Compliant Data Flow with Orchestration",
    labelloc="t",
    fontsize="20"
)

# ==========================================================
# SYSTEM BOUNDARY
# ==========================================================
with dfd.subgraph(name="cluster_system") as system:
    system.attr(label="GBP Reporting System",
                style="dashed",
                fontsize="18")

    # ======================================================
    # SOURCE SYSTEMS
    # ======================================================
    with system.subgraph(name="cluster_sources") as sources:
        sources.attr(label="Source Systems\nOwner: IT Operations Team",
                     style="filled",
                     color="lightgrey")

        sources.attr("node", shape="box", style="filled",
                     fillcolor="#ff4d6d", fontsize="11")

        sources.node("CRM", "CRM\n(Customers)\nOwner: CRM Admin")
        sources.node("ERP", "ERP\n(Orders)\nOwner: ERP Admin")
        sources.node("Inventory", "Inventory\n(Products)\nOwner: Inventory Manager")
        sources.node("Shipping", "Shipping\n(Logistics)\nOwner: Logistics Manager")
        sources.node("Returns", "Returns\n(RMA)\nOwner: Returns Manager")
        sources.node("HR", "HR\n(People)\nOwner: HR Manager")

    # ======================================================
    # ORCHESTRATION
    # ======================================================
    with system.subgraph(name="cluster_orchestration") as orchestration:
        orchestration.attr(label="Workflow Orchestration\nOwner: Data Engineering Team",
                           style="filled",
                           color="mistyrose")

        orchestration.attr("node", shape="hexagon",
                           style="filled",
                           fillcolor="#c27ba0",
                           fontsize="12")

        orchestration.node(
            "Orchestrator",
            "Workflow Orchestrator\n"
            "Schedule: Daily 12:00 AM\n"
            "Duration: ~2 Hours"
        )

    # ======================================================
    # DATA INTEGRATION
    # ======================================================
    with system.subgraph(name="cluster_integration") as integration:
        integration.attr(label="Data Integration\nOwner: Data Engineering Team",
                         style="filled",
                         color="lightyellow")

        integration.attr("node", shape="box",
                         style="rounded,filled",
                         fontsize="11")

        integration.node("ETL",
                         "Extract → Transform → Load (ETL)\nOwner: Data Engineers",
                         fillcolor="#7fbf7f")

        integration.node("DQ",
                         "Data Quality Validation\nQuality Checkpoint\nOwner: QA Analysts",
                         fillcolor="#f6b26b")

        integration.node("Integrate",
                         "Integrate All Sources\nOwner: Data Engineers",
                         fillcolor="#7fbf7f")

    # ======================================================
    # MASTER DATA
    # ======================================================
    with system.subgraph(name="cluster_master") as master:
        master.attr(label="Master Data\nOwner: Data Governance Team",
                    style="filled",
                    color="lightcyan")

        master.attr("node", shape="cylinder",
                    style="filled",
                    fontsize="11")

        master.node("MasterDataset",
                    "GBP_DataSource\nMaster Dataset\n(10,020 rows)",
                    fillcolor="#f6b26b")

        master.node("OrdersTable", "Orders Table", fillcolor="#dddddd")
        master.node("ReturnsTable", "Returns Table", fillcolor="#dddddd")
        master.node("PeopleTable", "People Table", fillcolor="#dddddd")

        master.node("CleanDataset",
                    "Clean Dataset\n(9,997 rows)\nDeduplicated & Aligned 3NF\nOwner: Data Steward",
                    fillcolor="#93c47d")

    # ======================================================
    # ANALYTICS
    # ======================================================
    with system.subgraph(name="cluster_analytics") as analytics:
        analytics.attr(label="Analytics & Reporting\nOwner: Business Intelligence Team",
                       style="filled",
                       color="white")

        analytics.attr("node", shape="box",
                       style="rounded,filled",
                       fillcolor="#6fa8dc",
                       fontsize="11")

        analytics.node("Warehouse", "Data Warehouse\nOwner: BI Architect")
        analytics.node("Operational", "Operational Reports\nOwner: BI Developer")
        analytics.node("Dashboards", "Executive Dashboards\nOwner: BI Developer")
        analytics.node("Adhoc", "Ad-hoc Analysis\nOwner: Data Analyst")

# ==========================================================
# DATA FLOWS (using xlabel)
# ==========================================================

dfd.edge("CRM", "ETL", xlabel="Customer Data", fontsize="10")
dfd.edge("ERP", "ETL", xlabel="Order Transactions", fontsize="10")
dfd.edge("Inventory", "ETL", xlabel="Product Catalog", fontsize="10")
dfd.edge("Shipping", "ETL", xlabel="Shipping Data", fontsize="10")
dfd.edge("Returns", "ETL", xlabel="Return Records", fontsize="10")
dfd.edge("HR", "ETL", xlabel="Employee Data", fontsize="10")

dfd.edge("ETL", "DQ", xlabel="Raw Integrated Data", fontsize="10")
dfd.edge("DQ", "Integrate", xlabel="Validated Data", fontsize="10")
dfd.edge("Integrate", "MasterDataset", xlabel="Integrated Master Data", fontsize="10")

dfd.edge("MasterDataset", "OrdersTable", xlabel="Order Entities", fontsize="9")
dfd.edge("MasterDataset", "ReturnsTable", xlabel="Return Entities", fontsize="9")
dfd.edge("MasterDataset", "PeopleTable", xlabel="People Entities", fontsize="9")

dfd.edge("OrdersTable", "CleanDataset")
dfd.edge("ReturnsTable", "CleanDataset")
dfd.edge("PeopleTable", "CleanDataset")

dfd.edge("CleanDataset", "Warehouse", xlabel="Final Enriched Reporting Output", fontsize="10")
dfd.edge("Warehouse", "Operational")
dfd.edge("Warehouse", "Dashboards")
dfd.edge("Warehouse", "Adhoc")

# ==========================================================
# ORCHESTRATION (DASHED CONTROL FLOW)
# ==========================================================

dfd.edge("Orchestrator", "ETL", style="dashed", xlabel="Trigger ETL Job", fontsize="10")
dfd.edge("ETL", "DQ", style="dashed", xlabel="On Success", fontsize="9")
dfd.edge("DQ", "Integrate", style="dashed", xlabel="If Quality Passed", fontsize="9")
dfd.edge("Integrate", "MasterDataset", style="dashed", xlabel="Load Master", fontsize="9")
dfd.edge("MasterDataset", "CleanDataset", style="dashed", xlabel="Dedup & Align 3NF", fontsize="9")
dfd.edge("CleanDataset", "Warehouse", style="dashed", xlabel="Publish to BI", fontsize="9")

dfd.edge("DQ", "Orchestrator",
         style="dashed",
         color="red",
         xlabel="On Failure → Alert & Stop",
         fontsize="9")

# ==========================================================
# RENDER
# ==========================================================

dfd.render("THE_GBP_DFD_DAMA_Compliant_project", view=False)

'THE_GBP_DFD_DAMA_Compliant_project.pdf'