In [2]:
import xml.etree.ElementTree as ET
import json
from haystack import Pipeline
from haystack.components.builders import ChatPromptBuilder
from haystack.dataclasses import ChatMessage
from haystack_integrations.components.generators.ollama import OllamaChatGenerator

# ==========================================
# 0. SETUP: DATEN & KONFIGURATION
# ==========================================

# Beispiel BPMN XML (Carry-On Baggage)
bpmn_content = """<?xml version="1.0" encoding="UTF-8"?>
<bpmn:definitions xmlns:bpmn="http://www.omg.org/spec/BPMN/20100524/MODEL" id="Definitions_1">
    <bpmn:startEvent name="Passenger arrives at Airport" />
  <bpmn:process id="Process_CarryOn" isExecutable="false">
    <bpmn:task name="Check amount, weight and dimensions" />
    <bpmn:exclusiveGateway name="Baggage compliant?" />
    <bpmn:task name="Pay fee (Credit Card only)" />
    <bpmn:exclusiveGateway name="High flight occupancy?" />
    <bpmn:task name="Offer free check-in" />
    <bpmn:task name="Remove valuables &amp; batteries" />
    <bpmn:task name="Transport in aircraft hold" />
    <bpmn:task name="Proceed to Boarding / Enter Cabin" />
  </bpmn:process>
</bpmn:definitions>"""

# LLM Konfiguration
llm_config = {
    "model": "llama3.1:8b",
    "url": "http://localhost:11434",
    "generation_kwargs": {"temperature": 0.0},
    "timeout": 3000
}

# ==========================================
# A. GENERATOR PIPELINE
# ==========================================

generator_template = """
You are a BPMN expert. Generate a descriptive text for the given BPMN model.

### Example
Input: <bpmn:task name="Login"/><bpmn:endEvent name="End"/>
Output: The user performs the Login task, and then the process ends.

### Task
Input: {{ bpmn_xml }}
Output:
"""

# FIX: Template UND required_variables direkt hier setzen
gen_prompt = ChatPromptBuilder(
    template=[ChatMessage.from_user(generator_template)],
    required_variables=["bpmn_xml"]
)
gen_llm = OllamaChatGenerator(**llm_config)

generator_pipeline = Pipeline()
generator_pipeline.add_component("prompt_builder", gen_prompt)
generator_pipeline.add_component("llm", gen_llm)
generator_pipeline.connect("prompt_builder.prompt", "llm.messages")

def run_generator(bpmn_data):
    # FIX: "template" hier NICHT mehr Ã¼bergeben, nur die Variable
    result = generator_pipeline.run(
        {
            "prompt_builder": {
                "bpmn_xml": bpmn_data
            }
        }
    )
    return result["llm"]["replies"][0].text

# ==========================================
# B. EXTRACTOR PIPELINE
# ==========================================

extractor_template = """
Extract all tasks, events, and gateways from the text below.
Return ONLY a valid JSON list of strings. No markdown, no explanations.

Text: "{{ text }}"

Output format: ["Element 1", "Element 2"]
"""

# FIX: Template statisch setzen
ext_prompt = ChatPromptBuilder(
    template=[ChatMessage.from_user(extractor_template)],
    required_variables=["text"]
)
ext_llm = OllamaChatGenerator(
    model=llm_config["model"], 
    url=llm_config["url"], 
    generation_kwargs={"format": "json", "temperature": 0.0}
)

extractor_pipeline = Pipeline()
extractor_pipeline.add_component("prompt_builder", ext_prompt)
extractor_pipeline.add_component("llm", ext_llm)
extractor_pipeline.connect("prompt_builder.prompt", "llm.messages")

def run_extractor(text):
    result = extractor_pipeline.run(
        {
            "prompt_builder": {
                "text": text
            }
        }
    )
    try:
        json_str = result["llm"]["replies"][0].text
        return json.loads(json_str)
    except json.JSONDecodeError:
        print("Fehler beim Parsen des JSONs.")
        return []

# ==========================================
# C. EVALUATION FUNCTION
# ==========================================

def get_mistakes(extracted_list, bpmn_xml_string):
    root = ET.fromstring(bpmn_xml_string)
    namespaces = {'bpmn': 'http://www.omg.org/spec/BPMN/20100524/MODEL'}
    
    ground_truth = set()
    for tag in ['task', 'exclusiveGateway', 'startEvent', 'endEvent']:
        for node in root.findall(f".//bpmn:{tag}", namespaces):
            if node.get('name'):
                ground_truth.add(node.get('name').strip())

    extracted_set = set(extracted_list)
    
    return {
        "missing": list(ground_truth - extracted_set),
        "hallucinated": list(extracted_set - ground_truth)
    }

# ==========================================
# D. CORRECTOR PIPELINE
# ==========================================

corrector_template = """
The following process description is inaccurate. 
Rewrite the text to fix the errors listed below.

Original Text: "{{ original_text }}"

Errors to fix:
- You missed these elements (include them): {{ missing }}
- You invented these elements (remove them): {{ hallucinated }}

Corrected Text:
"""

# FIX: Template statisch setzen
corr_prompt = ChatPromptBuilder(
    template=[ChatMessage.from_user(corrector_template)],
    required_variables=["original_text", "missing", "hallucinated"]
)
corr_llm = OllamaChatGenerator(**llm_config)

corrector_pipeline = Pipeline()
corrector_pipeline.add_component("prompt_builder", corr_prompt)
corrector_pipeline.add_component("llm", corr_llm)
corrector_pipeline.connect("prompt_builder.prompt", "llm.messages")

def run_corrector(text, mistakes):
    if not mistakes['missing'] and not mistakes['hallucinated']:
        return text
        
    result = corrector_pipeline.run(
        {
            "prompt_builder": {
                "original_text": text,
                "missing": str(mistakes['missing']),
                "hallucinated": str(mistakes['hallucinated'])
            }
        }
    )
    return result["llm"]["replies"][0].text

# ==========================================
# EXECUTION
# ==========================================

if __name__ == "__main__":
    print("--- 1. Generating Description ---")
    generated_text = run_generator(bpmn_content)
    print(f"Generated: {generated_text}\n")

    print("--- 2. Extracting Elements ---")
    extracted_elements = run_extractor(generated_text)
    print(f"Extracted: {extracted_elements}\n")

    print("--- 3. Evaluating Mistakes ---")
    mistakes = get_mistakes(extracted_elements, bpmn_content)
    print(f"Missing: {mistakes['missing']}")
    print(f"Hallucinated: {mistakes['hallucinated']}\n")

    print("--- 4. Correcting Text ---")
    final_text = run_corrector(generated_text, mistakes)
    print(f"Final Text: {final_text}")

--- 1. Generating Description ---
Generated: Here is a descriptive text for the given BPMN model:

**Process Description**

The process "CarryOn" represents the steps involved in checking in baggage at an airport. The process starts with the event "Passenger arrives at Airport", indicating that the passenger has arrived at the airport.

**Step 1: Check amount, weight and dimensions**

The first task is to check the amount, weight, and dimensions of the baggage. This involves verifying whether the baggage meets the airline's requirements for carry-on luggage.

**Decision Point: Baggage compliant?**

After checking the baggage, an exclusive gateway "Baggage compliant?" determines whether the baggage is compliant with the airline's regulations. If it is not compliant, the process may need to take alternative steps (not shown in this BPMN model).

**Step 2: Pay fee (Credit Card only)**

If the baggage is compliant, the next task is to pay a fee using a credit card.

**Decision Point: High 