In [None]:
# general
!pip install langchain langgraph langchain_google_genai langgraph_supervisor

In [None]:
!pip install --upgrade langgraph langchain-core langchain-google-genai

In [None]:
# for bpmn linter tool

# 1. Forcefully remove the specific conflicting package from the old installation
!sudo dpkg --remove --force-remove-reinstreq libnode-dev

# 2. Purge any other old Node.js/npm packages to be safe
!sudo apt-get purge -y nodejs npm

# 3. Automatically remove all unused leftover dependencies
!sudo apt autoremove -y

# 4. Now, set up the source for Node.js v20
!curl -fsSL https://deb.nodesource.com/setup_20.x | sudo -E bash -

# 5. Install Node.js v20 and its compatible npm
!sudo apt-get install -y nodejs

# 6. Finally, install bpmnlint
!npm install -g bpmnlint

print("bpmnlint installation complete.")

In [None]:
#create local .bpmnlintrc file
!bpmnlint --init

In [None]:
import os
import pandas as pd
from google.colab import userdata

#setup the api key
os.environ['GOOGLE_API_KEY']=userdata.get('Google_API_Key')
print('Success!')

In [None]:
import base64
import requests
import time
import json
import subprocess
from typing import List, Dict, TypedDict, Annotated
from langgraph.graph import StateGraph, START, MessagesState, END
from langgraph.graph.message import add_messages
from langgraph.prebuilt import ToolNode, tools_condition
from langchain_core.prompts import PromptTemplate
from langchain.agents.agent_types import AgentType
from langchain_core.output_parsers import StrOutputParser
from langchain.load import dumps, loads
from langgraph.types import Command
from langgraph.checkpoint.memory import MemorySaver
from langchain.tools import StructuredTool
from langchain_core.runnables import Runnable, RunnableConfig
from langgraph.prebuilt import create_react_agent
from langchain_core.tools import tool
from langchain_google_genai import ChatGoogleGenerativeAI
from langgraph.prebuilt import create_react_agent
from langchain.agents import create_tool_calling_agent, AgentExecutor
from langchain.tools import tool
from langchain_core.messages import BaseMessage, HumanMessage, AIMessage
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder

In [None]:
# main script
# to run on your own use cases you simply have to upload your input texts as "uc1.txt-uc5.txt" in colab;
# if oyu prefer to change the input file name/path you'd have to do that in the main execution section at the bottom of this cell

@tool
def validate_bpmn_file(file_path: str):
    """
    Validates a .bpmn file against BPMN 2.0 standards using the bpmnlint linter.
    It checks for common modeling errors and best practices.
    The parameter `file_path` is the local path to the .bpmn file to be validated.
    """
    if not os.path.exists(file_path):
        return f"Error: File not found at '{file_path}'"
    try:
        command = ["bpmnlint", file_path]
        result = subprocess.run(command, capture_output=True, text=True, check=False)
        if result.returncode != 0:
            error_details = result.stdout + result.stderr
            return f"BPMN file is INVALID. \n--- Linter Errors ---\n{error_details}"
        else:
            return "BPMN file is VALID."
    except FileNotFoundError:
        return "Error: 'bpmnlint' command not found. Please ensure it is installed and in your PATH."
    except Exception as e:
        return f"An unexpected error occurred: {e}"

@tool
def read_file(file_path: str) -> str:
    """Reads the entire content of a file and returns it as a string."""
    try:
        with open(file_path, 'r', encoding='utf-8') as f:
            return f.read()
    except Exception as e:
        return f"Error reading file: {e}"

@tool
def write_file(file_path: str, content: str) -> str:
    """
    Writes the given content to a file, overwriting it if it exists.
    Includes a short pause to prevent race conditions.
    """
    try:
        with open(file_path, 'w', encoding='utf-8') as f:
            f.write(content)
        # Add a 1-second pause to ensure the file system has time to process the write.
        time.sleep(1)
        return f"Successfully wrote content to {file_path}"
    except Exception as e:
        return f"Error writing file: {e}"

# --- AGENT DEFINITIONS ---
llm = ChatGoogleGenerativeAI(model="gemini-2.5-pro", temperature=0.0)

# BASE AGENT
base_system_message = """
You are a BPMN 2.0 Modeler Agent. Your input will be a string with two file paths separated by a comma: the source text path, and the target BPMN file path for your output.

### Your Instructions:
1.  Parse the two paths from the input string.
2.  Your first action MUST be to use the `read_file` tool to get the content of the source text path.
3.  Generate a complete BPMN 2.0 XML structure based on the text.
    - include all involved entities from the text as pools (do not use (swim)lanes but put each participant in their own pool)
    - pay attention to gateways: these should represent conditions form the text; if not all incoming paths are required, you should choose a XOR join, not an AND join; a gateway is either a split or join not both
    - there should only be one incoming and one outgoing flow from events and tasks (exceptions for e.g. end events that should not have any outgoing flows of course)
    - **CRITICAL:** You MUST include the complete `<bpmndi:BPMNDiagram>` section. Every single element (actors, tasks, events, flows, etc.) MUST have a corresponding visual element (`<bpmndi:BPMNShape>` or `<bpmndi:BPMNEdge>`). An incomplete diagram is a failure.
4.  Your final action MUST be a single call to the `write_file` tool to save your generated XML to the target BPMN file path.
5.  After calling the tool, respond with a simple confirmation like "BPMN XML generated and saved."
"""

bpmn_modeler_prompt = ChatPromptTemplate.from_messages([
    ("system", base_system_message),
    ("human", "{input}"),
    ("placeholder", "{agent_scratchpad}"),
])
bpmn_modeler_tools = [write_file, read_file]
bpmn_modeler_agent = create_tool_calling_agent(llm, bpmn_modeler_tools, bpmn_modeler_prompt)
bpmn_modeler_executor = AgentExecutor(agent=bpmn_modeler_agent, tools=bpmn_modeler_tools, verbose=True)


# SYNTAX AGENT
syntax_system_message = """
You are a meticulous BPMN Syntax Corrector Agent. Your input will be a single file path to a BPMN file.

### Your Workflow (Follow this strictly):
1.  Your first action is to call the `validate_bpmn_file` tool on the file path from your input.
2.  **Analyze the Validation Result:**
    -   **If the result is "BPMN file is VALID.":** Your job is done. Respond with "BPMN file is valid."
    -   **If the result contains "Error: File not found":** Report this critical error to the supervisor.
    -   **If the result is "BPMN file is INVALID.":** You must now enter a correction loop.
        a. Use `read_file` to get the current broken XML from the input file path.
        b. Analyze the "Linter Errors" from the validation result and identify the necessary corrections.
        If you add e.g. events between tasks, make sure these are intermediate events. Usually an XOR split gateway should be eventually followed by an XOR join. Make sure you do not create deadlocks by using AND joins when in reality it should be a XOR join.
        Make sure the elements you create are correctly connected, e.g. flows are connected to element and to not eng in nowhere.
        c. **Generate the corrected XML in your thought process.** You must take the entire original XML content and apply the necessary fixes.
        d. **Your final action MUST be a single tool call to `write_file`. The `content` parameter for this tool call must be the *entire, complete, and corrected* BPMN XML string, and the `file_path` must be the same one from your input.**
        e. Go back to step 1 and re-validate your own work.
"""
syntax_corrector_prompt = ChatPromptTemplate.from_messages([
    ("system", syntax_system_message),
    ("human", "{input}"),
    ("placeholder", "{agent_scratchpad}"),
])
syntax_corrector_tools = [read_file, write_file, validate_bpmn_file]
syntax_corrector_agent = create_tool_calling_agent(llm, syntax_corrector_tools, syntax_corrector_prompt)
syntax_corrector_executor = AgentExecutor(agent=syntax_corrector_agent, tools=syntax_corrector_tools, verbose=True)


# LAYOUT AGENT
layout_system_message = """
You are an expert BPMN Layout Specialist, obsessed with creating clean, professional, and easy-to-read diagrams. Your input will be a single file path to a BPMN file. Your sole purpose is to perform a single, effective pass of layout correction.

### Your Workflow:
1.  **Read and Analyze:** Use the `read_file` tool to get the current BPMN XML content. Then, in your thought process, perform a comprehensive analysis based on the following **Layout Heuristics**:

    **Layout Heuristics (Apply all):**
    1.  **Strict Containment:** Absolutely NO element's bounds (`<dc:Bounds>`) may extend beyond the boundaries of its parent Pool or Lane. This is a critical error to fix.
    2.  **Eliminate Overlaps:**
        -   No two shapes (`<bpmndi:BPMNShape>`) can overlap.
        -   No label (`<bpmndi:BPMNLabel>`) can overlap with any part of another shape or a sequence flow line. Pay special attention to gateway labels; place them in a clear space adjacent to the gateway, not obstructing any flows.
    3.  **Enforce Orthogonal Flows:** ALL sequence flows (`<bpmndi:BPMNEdge>`) MUST be composed of horizontal and vertical line segments. For any two consecutive waypoints, either their x-coordinates or their y-coordinates MUST be identical. Correct any diagonal lines.
    4.  **Minimize Flow Crossings:** Reroute flows to go around other elements rather than through them. While some crossings may be unavoidable, they should be minimized and clean (perfect 90-degree intersections).

2.  **Decision and Execution:**
    -   **If you find NO layout issues** according to the heuristics above: Your job for this turn is done. Respond with the exact message: "Layout analysis complete. No issues found."
    -   **If you DO find layout issues:** You must fix them all at once.
        a. **Create a comprehensive plan** to fix ALL identified issues based on the heuristics.
        b. **Generate the corrected XML.** You must take the entire original XML content and apply ALL the necessary coordinate changes to create a single, corrected version of the file.
        c. **Your final action MUST be a single tool call to `write_file`. The `content` parameter must be the *entire, complete, and corrected* BPMN XML string, and the `file_path` must be the same one from your input.** After the tool call, respond with a summary of the changes you made.
"""
layout_corrector_prompt = ChatPromptTemplate.from_messages([
    ("system", layout_system_message),
    ("human", "{input}"),
    ("placeholder", "{agent_scratchpad}"),
])
layout_corrector_tools = [read_file, write_file]
layout_corrector_agent = create_tool_calling_agent(llm, layout_corrector_tools, layout_corrector_prompt)
layout_corrector_executor = AgentExecutor(agent=layout_corrector_agent, tools=layout_corrector_tools, verbose=True)


# SEMANTIC LOGIC AGENT
semantic_system_message = """
You are an expert BPMN Semantic Corrector Agent. Your input will be a string with two file paths separated by a comma: the source text path and the BPMN file path. Your sole purpose is to perform a single, effective pass of semantic correction.

### Your Workflow:
1.  **Parse and Read Files:** Parse the two paths from the input string. Use the `read_file` tool twice: once for the source text and once for the BPMN XML file.

2.  **Analyze and Compare:** In your own thought process, perform a detailed analysis to find ALL discrepancies between the source text and the BPMN model.

    **Analysis Task:**
    1.  **Completeness Check:** First, ensure all major actors, activities, and events described in the text are present in the BPMN model.
    2.  **Regulatory & Logic Check:** Pay special attention to the following types of rules and evaluate their implementation in the model:
        -   **Conditional Clauses:** Are `if/then` statements correctly modeled with gateways?
        -   **Temporal Constraints:** Are time-based rules (e.g., "within 3 days") modeled with timer events?
        -   **Obligatory Actions:** Are words like `must`, `shall`, `has to` modeled as part of a non-optional execution path?
        -   **Prohibited Actions:** Are phrases like `must not` correctly modeled as impossible paths or explicit exception flows?
        -   **Permitted Actions:** Are words like `can` or `may` correctly modeled as optional paths, for example, following a choice gateway?

3.  **Decision and Execution:**
    -   **If you find NO discrepancies:** Your job for this turn is done. Respond with the exact message: "Semantic analysis complete. The model accurately represents the source text."
    -   **If you DO find discrepancies:** You must fix them all at once.
        a. **Plan your changes** for all identified issues.
        b. **Generate the corrected XML.** You must take the entire original BPMN XML and modify the logical elements to fix ALL the reported errors. Do not create subprocess or add very uncommon BPMN elements.
        c. **Your final action MUST be a single tool call to `write_file`. The `content` parameter must be the *entire, complete, and corrected* BPMN XML string, and the `file_path` must be the BPMN path from your input.** After the tool call, respond with a summary of the changes you made.
"""
semantic_corrector_prompt = ChatPromptTemplate.from_messages([
    ("system", semantic_system_message),
    ("human", "{input}"),
    ("placeholder", "{agent_scratchpad}"),
])
semantic_logic_tools = [read_file, write_file]
semantic_logic_agent = create_tool_calling_agent(llm, semantic_logic_tools, semantic_corrector_prompt)
semantic_logic_executor = AgentExecutor(agent=semantic_logic_agent, tools=semantic_logic_tools, verbose=True)


# --- SUPERVISOR AND GRAPH SETUP ---

supervisor_system_message = """
You are a master supervisor of a team of BPMN specialist agents. Your job is to manage a workflow from text-to-perfect BPMN by giving clear, concise tasks to your workers.

Your team consists of:
1. **bpmn_modeler_agent**: Creates the initial draft.
2. **syntax_corrector_agent**: Checks for syntax errors.
3. **semantic_logic_agent**: Checks for semantic errors against the source text.
4. **layout_corrector_agent**: Checks for visual layout errors.

### Your Workflow Rules:

1. **Modeling (CRITICAL RULE)**:
   Your first task is to call `bpmn_modeler_agent`. Under NO circumstances should you call this agent again after it has run once.

2. **Correction Cycle (Syntax & Semantics)**:
   - Your goal is to get a "pass" from the agents in this order: `syntax_corrector_agent` → `semantic_logic_agent`.
   - These two agents are part of the **main correction loop**.
   - If either agent makes a correction and is still within its attempt limit, you must restart the loop by calling `syntax_corrector_agent` again.
   - If `semantic_logic_agent` passes, **or** has reached its 2-attempt limit, the main correction loop is considered complete.

3. **Final Layout Step**:
   - After the syntax and semantic steps are complete, you must call `layout_corrector_agent` **once**.
   - If `layout_corrector_agent` passes (finds no issues), the entire process is complete. Respond with `"END"`.
   - If it makes a correction, perform a final safety check by calling `syntax_corrector_agent`.
   - If this final syntax check passes, the process is complete. Respond with `"END"`.

### Decision Logic (VERY IMPORTANT):
Your decision MUST be based on the most recent message in the history.

- If the last message is `"BPMN file is valid."`, call `semantic_logic_agent`.
- If the last message is `"Semantic analysis complete. The model accurately represents the source text."`, call `layout_corrector_agent`.
- If the last message is `"Layout analysis complete. No issues found."`, the job is done. Call `"END"`.
- If any agent reports it made a correction, call `syntax_corrector_agent`, **except**:
    - If `layout_corrector_agent` made a correction, and the **subsequent** `syntax_corrector_agent` call passed, the job is done. Call `"END"`.

- **IMPORTANT**: You must not call `"END"` unless:
    - `layout_corrector_agent` has run **at least once**, and
    - Either it passed, or it made a correction followed by a passing `syntax_corrector_agent`.

### Output Format:
Your response MUST be a JSON object with two keys:
- `"next_agent"`: The string name of the next agent to call (e.g., `"syntax_corrector_agent"`) or `"END"`.
- `"task_description"`: A string containing ONLY the required file path(s) for the agent.
    - For `bpmn_modeler_agent`: Provide a string like `"source_path,target_path"`.
    - For `syntax_corrector_agent` and `layout_corrector_agent`: Provide the single path to the BPMN file.
    - For `semantic_logic_agent`: Provide a string like `"source_path,bpmn_path"`.
"""

supervisor_prompt = ChatPromptTemplate.from_messages([
    ("system", supervisor_system_message),
    ("placeholder", "{messages}"),
])

class AgentState(TypedDict):
    messages: Annotated[List[BaseMessage], lambda x, y: x + y]
    task: str
    next: str
    file_path: str
    source_path: str
    attempt_counts: Dict[str, int]
    agent_step_counts: Dict[str, int]
    modeler_has_run: bool

class SupervisorDecision(TypedDict):
    next_agent: str
    task_description: str

supervisor_chain = supervisor_prompt | llm.with_structured_output(SupervisorDecision)

def supervisor_node(state: AgentState):
    """
    Calls the supervisor to decide the next step and enforces workflow rules.
    """
    if state.get("attempt_counts") is None:
        state["attempt_counts"] = {}
    if state.get("agent_step_counts") is None:
        state["agent_step_counts"] = {}

    time.sleep(0.5)

    messages_for_supervisor = state['messages']

    context_message = HumanMessage(
        content=f"SYSTEM CONTEXT: The primary BPMN file is '{state['file_path']}'. The source text is at '{state['source_path']}'. Modeler has run: {state.get('modeler_has_run', False)}. Current attempt counts: {state.get('attempt_counts')}",
        name="System"
    )
    messages_for_supervisor = [context_message] + messages_for_supervisor

    decision = supervisor_chain.invoke({"messages": messages_for_supervisor})
    next_agent = decision['next_agent']
    task_description = decision['task_description']

    last_agent_name = ""
    if state['messages']:
        for msg in reversed(state['messages']):
            if isinstance(msg, AIMessage):
                last_agent_name = msg.name
                break

    semantic_attempts = state["attempt_counts"].get("semantic_logic_agent", 0)
    if last_agent_name == "semantic_logic_agent" and semantic_attempts >= 3:
        print("--- SEMANTIC AGENT MAX ATTEMPTS REACHED. Forcing progression to layout agent. ---")
        next_agent = "layout_corrector_agent"
        task_description = state['file_path']

    if state.get("modeler_has_run") and next_agent == "bpmn_modeler_agent":
        print("--- SUPERVISOR ERROR OVERRIDE: Attempted to call modeler again. Redirecting to syntax_corrector_agent. ---")
        next_agent = "syntax_corrector_agent"
        task_description = state['file_path']

    if next_agent != "END":
        current_attempts = state["attempt_counts"].get(next_agent, 0)
        state["attempt_counts"][next_agent] = current_attempts + 1

        if next_agent == "syntax_corrector_agent":
            max_attempts = 6
        elif next_agent == "semantic_logic_agent":
            max_attempts = 2
        elif next_agent == "layout_corrector_agent":
            max_attempts = 1
        else:
            max_attempts = 1 # Default for modeler

        if current_attempts >= max_attempts:
            print(f"--- MAX ATTEMPTS REACHED FOR AGENT: {next_agent} ({max_attempts} attempts). Aborting. ---")
            last_bpmn_content = read_file.invoke({"file_path": state['file_path']})
            final_message = (
                f"Process aborted after {max_attempts} failed attempts by {next_agent}.\n\n"
                f"Here is the last available version of the BPMN file:\n\n"
                f"```xml\n{last_bpmn_content}\n```"
            )
            return {"next": "END", "messages": [HumanMessage(content=final_message)]}

    return {
        "next": next_agent,
        "task": task_description,
        "attempt_counts": state["attempt_counts"],
        "messages": [],
    }

def worker_node(state: AgentState, agent_executor: AgentExecutor, agent_name: str):
    """
    Runs a worker agent on its assigned task and saves an intermediate snapshot of the BPMN file.
    """
    result = agent_executor.invoke({"input": state["task"]})

    if agent_name == "bpmn_modeler_agent":
        state["modeler_has_run"] = True

    current_step_count = state["agent_step_counts"].get(agent_name, 0) + 1
    state["agent_step_counts"][agent_name] = current_step_count

    if os.path.exists(state['file_path']):
        content = read_file.invoke({"file_path": state['file_path']})
        base, ext = os.path.splitext(state['file_path'])
        safe_agent_name = agent_name.replace('_agent', '')
        intermediate_filename = f"{base}_{safe_agent_name}_{current_step_count}{ext}"
        write_file.invoke({"file_path": intermediate_filename, "content": content})
        print(f"--- Saved intermediate state to {intermediate_filename} ---")

    return {
        "messages": [AIMessage(content=result["output"], name=agent_name)],
        "agent_step_counts": state["agent_step_counts"],
        "modeler_has_run": state.get("modeler_has_run", False),
    }


# Building graph
workflow = StateGraph(AgentState)
workflow.add_node("supervisor", supervisor_node)
workflow.add_node("bpmn_modeler_agent", lambda state: worker_node(state, bpmn_modeler_executor, "bpmn_modeler_agent"))
workflow.add_node("syntax_corrector_agent", lambda state: worker_node(state, syntax_corrector_executor, "syntax_corrector_agent"))
workflow.add_node("layout_corrector_agent", lambda state: worker_node(state, layout_corrector_executor, "layout_corrector_agent"))
workflow.add_node("semantic_logic_agent", lambda state: worker_node(state, semantic_logic_executor, "semantic_logic_agent"))

def router(state: AgentState):
    return state['next']

workflow.set_entry_point("supervisor")
workflow.add_conditional_edges(
    "supervisor",
    router,
    {
        "bpmn_modeler_agent": "bpmn_modeler_agent",
        "syntax_corrector_agent": "syntax_corrector_agent",
        "layout_corrector_agent": "layout_corrector_agent",
        "semantic_logic_agent": "semantic_logic_agent",
        "END": END
    }
)

workflow.add_edge("bpmn_modeler_agent", "supervisor")
workflow.add_edge("syntax_corrector_agent", "supervisor")
workflow.add_edge("layout_corrector_agent", "supervisor")
workflow.add_edge("semantic_logic_agent", "supervisor")


supervisor_app = workflow.compile()


def run_workflow(source_file: str, bpmn_file: str, run_number: int):
    """
    Sets up and runs a single workflow instance for a given use case.
    """
    print(f"\n--- STARTING RUN {run_number} FOR {os.path.basename(source_file)} ---")
    print(f"--- Output will be saved to files starting with: {os.path.splitext(bpmn_file)[0]} ---\n")

    initial_task_description = f"{source_file},{bpmn_file}"
    initial_state = {
        "messages": [HumanMessage(content=f"Start the process. The initial task for the modeler is: {initial_task_description}")],
        "file_path": bpmn_file,
        "source_path": source_file,
        "task": "",
        "attempt_counts": {},
        "agent_step_counts": {},
        "modeler_has_run": False,
    }

    final_state = None
    try:
        for chunk in supervisor_app.stream(initial_state, {"recursion_limit": 100}):
            final_state = chunk
    except Exception as e:
        print(f"\n!!!!!! An error occurred during the execution for {source_file}, run {run_number} !!!!!!")
        print(f"Error: {e}")
        with open("/content/outputs/error_log.txt", "a") as f:
            f.write(f"Timestamp: {time.ctime()}\nUseCase: {source_file}, Run: {run_number}\nError: {e}\n---\n")
        return

    print(f"\n--- SUPERVISOR WORKFLOW COMPLETE FOR RUN {run_number} OF {os.path.basename(source_file)} ---")
    if final_state and final_state.get("messages"):
        final_message = final_state["messages"][-1].content
        print("Final response from the graph:", final_message, sep='\n')

        summary_filename = f"{os.path.splitext(bpmn_file)[0]}_summary.txt"
        with open(summary_filename, 'w') as f:
            f.write(final_message)
        print(f"Final summary saved to {summary_filename}")

    print(f"\n{'='*60}\n")
    time.sleep(5)

# --- MAIN EXECUTION BLOCK ---
if __name__ == "__main__":
    use_case_files = [f"/content/uc{i}.txt" for i in range(1, 6)]
    runs_per_case = 3
    output_directory = "/content/outputs"

    if not os.path.exists(output_directory):
        os.makedirs(output_directory)

    for uc_path in use_case_files:
        if not os.path.exists(uc_path):
            print(f"WARNING: Source file {uc_path} not found. Skipping.")
            continue

        base_name = os.path.splitext(os.path.basename(uc_path))[0]

        for i in range(1, runs_per_case + 1):
            output_bpmn_file = os.path.join(output_directory, f"output_{base_name}_run{i}.bpmn")

            run_workflow(
                source_file=uc_path,
                bpmn_file=output_bpmn_file,
                run_number=i
            )

    print("All use cases and runs have been processed.")