In [1]:
!pip install graphviz




In [4]:
from graphviz import Digraph

# Create a Digraph object
flowchart = Digraph("AI_Data_Processing_Flowchart", format="png")

# Define node attributes for bigger size
node_attrs = {
    "shape": "box",
    "style": "filled",
    "fontsize": "20",
    "width": "2",
    "height": "1",
}

# --- Nodes ---
## User Upload
flowchart.node("U", "User Uploads Cleaned Dataset", **node_attrs, fillcolor="lightblue")

## Error Handling for Missing Values
flowchart.node("CHECK_MISSING", "Check for Missing Values", shape="diamond", fontsize="20", width="2.5", height="1.2", fillcolor="lightgray")
flowchart.node("ERROR", "Return Error if Missing Values Found", **node_attrs, fillcolor="red")

## Column Formatting
flowchart.node("COL_FORMAT", "Convert Columns into a Single String", **node_attrs, fillcolor="lightgray")

## GPT Context Generation
flowchart.node("GPT_CONTEXT", "Generate Context for Dataset\n(using OpenAI GPT-3.5 Turbo)", **node_attrs, fillcolor="lightcoral")

## Database Initialization
flowchart.node("INIT_DB", "Initialize Database\n(Store Dataset in SQL DB)", **node_attrs, fillcolor="lightgray")

## User Query
flowchart.node("USER_QUERY", "User Inputs Query", **node_attrs, fillcolor="lightblue")

## Output Type Decision
flowchart.node("OUTPUT_TYPE", "Select Output Type:\nText or Visual", shape="diamond", fontsize="22", width="3", height="1.5", fillcolor="yellow")

### Text-based Output Path
flowchart.node("TEXT_DB", "Read Database for Info", **node_attrs, fillcolor="lightgray")
flowchart.node("TEXT_LLM", "LLM Processes Query", **node_attrs, fillcolor="lightgreen")
flowchart.node("TEXT_ANS", "Generate Text Answer", **node_attrs, fillcolor="orange")
flowchart.node("TEXT_LOOP", "Return to User Query", **node_attrs, fillcolor="lightblue")

### Visual-based Output Path
flowchart.node("VIS_CONTEXT", "Refer GPT Context for Dataset", **node_attrs, fillcolor="lightcoral")
flowchart.node("VIS_LLM", "LLM Processes Query", **node_attrs, fillcolor="lightgreen")
flowchart.node("VIS_CODE", "Generate Python Code for Visualization", **node_attrs, fillcolor="orange")
flowchart.node("VIS_EXEC", "Execute Code to Generate Visual", **node_attrs, fillcolor="yellow")
flowchart.node("VIS_LOOP", "Return to User Query", **node_attrs, fillcolor="lightblue")

## Future Scope
flowchart.node("FUTURE", "Future Enhancements\n(Memory, RAG, etc.)", shape="diamond", fontsize="22", width="3", height="1.5", style="dashed")

# --- Edges ---
## Upload Process
flowchart.edge("U", "CHECK_MISSING", label="Dataset Uploaded", fontsize="18")
flowchart.edge("CHECK_MISSING", "ERROR", label="If Missing Values Found", fontsize="18")
flowchart.edge("CHECK_MISSING", "COL_FORMAT", label="If No Missing Values", fontsize="18")

## Data Processing
flowchart.edge("COL_FORMAT", "GPT_CONTEXT", label="Combine Column Names", fontsize="18")
flowchart.edge("GPT_CONTEXT", "INIT_DB", label="Store Generated Context", fontsize="18")
flowchart.edge("INIT_DB", "USER_QUERY", label="Database Initialized", fontsize="18")

## Query & Output Flow
flowchart.edge("USER_QUERY", "OUTPUT_TYPE", label="User Enters Question", fontsize="18")

## Text Output Path
flowchart.edge("OUTPUT_TYPE", "TEXT_DB", label="If Output Type = Text", fontsize="18")
flowchart.edge("TEXT_DB", "TEXT_LLM", label="Fetch Info", fontsize="18")
flowchart.edge("TEXT_LLM", "TEXT_ANS", label="Process Query", fontsize="18")
flowchart.edge("TEXT_ANS", "TEXT_LOOP", label="Generate Answer", fontsize="18")
flowchart.edge("TEXT_LOOP", "USER_QUERY", label="Loop Back", fontsize="18")

## Visual Output Path
flowchart.edge("OUTPUT_TYPE", "VIS_CONTEXT", label="If Output Type = Visual", fontsize="18")
flowchart.edge("VIS_CONTEXT", "VIS_LLM", label="Refer Dataset Context", fontsize="18")
flowchart.edge("VIS_LLM", "VIS_CODE", label="Generate Visualization Code", fontsize="18")
flowchart.edge("VIS_CODE", "VIS_EXEC", label="Execute Code", fontsize="18")
flowchart.edge("VIS_EXEC", "VIS_LOOP", label="Display Graph", fontsize="18")
flowchart.edge("VIS_LOOP", "USER_QUERY", label="Loop Back", fontsize="18")

## Future Enhancements
flowchart.edge("USER_QUERY", "FUTURE", label="Expand Capabilities", fontsize="18")

# Render the flowchart
flowchart.render("ai_data_processing_flowchart", view=True)


'ai_data_processing_flowchart.png'