# Praktikum Wirtschaftsinformatik: Exercise Sheet 5 - Task 2

Group Nr.3

Student Names: Felix Findeisen & Lucas Janssen

## First Step: All Imports

In [1]:
from haystack import Pipeline, Document, component
from haystack.document_stores.in_memory import InMemoryDocumentStore
from haystack.components.builders import PromptBuilder
from haystack_integrations.components.generators.ollama import OllamaGenerator
from typing import List

## Second Step: Retriever

Custom Component: Rule based Retriever - Task: "A custom retriever... retrieves the correct feedback... using rules"

In [2]:
@component
class ErrorRuleRetriever:
    """
    This retriever does not perform a similarity search 
    but filters documents based on fixed rules (status and element type).
    """
    def __init__(self, document_store: InMemoryDocumentStore):
        self.document_store = document_store

    # Input: Error Message - Output: Doc/ Rule/ Instruction
    @component.output_types(documents=List[Document])
    def run(self, error_status: str, element_type: str):
        # Load all Docs
        all_docs = self.document_store.filter_documents()
        
        selected_docs = []
        for doc in all_docs:
            # Rule based Logic to determine what constellation to use
            if (doc.meta.get("status") == error_status and 
                doc.meta.get("type") == element_type):
                selected_docs.append(doc)
        
        # Generic fallback if undefined
        if not selected_docs:
            print(f"No rule/ instruction found for: {element_type}/{error_status}")
        
        return {"documents": selected_docs}


## Third Step: Documents

The InMemoryDocumentStore is a very simple document store with no extra services. It is great for a simple RAG system.

Setup: Fill Document Store with Rules - Task: "Document Store containing rules to update a text"

In [3]:
document_store = InMemoryDocumentStore()

# Rules as meta data
rules = [
    Document(
        content="Task '{element_name}' is missing in the description. Please add a sentence describing this task at the correct logical position in the text.",
        meta={"status": "missing", "type": "task"}
    ),
    Document(
        content="Task '{element_name}' is hallucinated (it exists in text but not in model). Please remove the sentence describing this task from the text completely.",
        meta={"status": "hallucinated", "type": "task"}
    ),
    Document(
        content="The gateway '{element_name}' is described with incorrect logic. Please adjust the text to clearly distinguish whether paths are executed in parallel (AND) or exclusively (XOR), matching the BPMN model.",
        meta={"status": "missing", "type": "gateway"}
    ),
    # Expand with other rules/ instructions e.g. "hallucinated" & "gateway"
]
document_store.write_documents(rules)

3

## Fourth step: Prompt Builder

Task: "Instructs the LLM to change the textual description". We use Jinja2 Syntax, to dynamically integrate the rules

In [4]:
template = """
You are a BPMN assistant. Your job is to correct a textual process description based on an error analysis.

Context - BPMN XML (Source of Truth):
{{ bpmn_model }}

Current Textual Description (contains errors):
{{ current_text }}

Instruction for Correction:
{% for doc in documents %}
    {{ doc.content }}
{% endfor %}

Please provide the CORRECTED textual description only:
"""

prompt_builder = PromptBuilder(template=template)

PromptBuilder has 3 prompt variables, but `required_variables` is not set. By default, all prompt variables are treated as optional, which may lead to unintended behavior in multi-branch pipelines. To avoid unexpected execution, ensure that variables intended to be required are explicitly set in `required_variables`.


## Fifth Step: Generator

Select the model to use. We use ollama 3.2

In [5]:
generator = OllamaGenerator(
    model="llama3.2:3b",
    url="http://localhost:11434",
    generation_kwargs={
        "num_ctx": 4096,
        "temperature": 0.0, # 0.0 because of strict rules to not be creative
    }
)

## Sixth Step: Assembling the Pipeline

Connect the Elements of the RAG.

In [6]:
correction_pipeline = Pipeline()

# Retriever init
rule_retriever = ErrorRuleRetriever(document_store=document_store)

# Componenten add
correction_pipeline.add_component("retriever", rule_retriever)
correction_pipeline.add_component("prompt_builder", prompt_builder)
correction_pipeline.add_component("llm", generator)

# Connect
# Retriever gives  rules ("documents") to PromptBuilder
correction_pipeline.connect("retriever.documents", "prompt_builder.documents")
# Final Prompt coming from PromptBuilder to the LLM
correction_pipeline.connect("prompt_builder.prompt", "llm.prompt")

print("Pipeline Init Success")

Pipeline Init Success


## Seventh Step: Execution

#### First Execution: Missing Task

In [7]:
bpmn_model_snippet = """<bpmn:process id="Process_Airport" isExecutable="false">
    <bpmn:startEvent id="StartEvent_1" name="Passenger arrives">
      <bpmn:outgoing>Flow_1</bpmn:outgoing>
    </bpmn:startEvent>
    <bpmn:task id="Task_1" name="Check-in at Counter">
      <bpmn:incoming>Flow_1</bpmn:incoming>
      <bpmn:outgoing>Flow_2</bpmn:outgoing>
    </bpmn:task>
    <bpmn:sequenceFlow id="Flow_1" sourceRef="StartEvent_1" targetRef="Task_1" />
    <bpmn:task id="Task_2" name="Pass Security Check">
      <bpmn:incoming>Flow_2</bpmn:incoming>
      <bpmn:outgoing>Flow_3</bpmn:outgoing>
    </bpmn:task>
    <bpmn:sequenceFlow id="Flow_2" sourceRef="Task_1" targetRef="Task_2" />
    <bpmn:endEvent id="EndEvent_1" name="Passenger ready">
      <bpmn:incoming>Flow_3</bpmn:incoming>
    </bpmn:endEvent>
    <bpmn:sequenceFlow id="Flow_3" sourceRef="Task_2" targetRef="EndEvent_1" />
  </bpmn:process>"""
current_wrong_text = "The passenger arrives at the terminal. First, they proceed to Check-in at Counter to drop their bags. Finally, the Passenger is ready to board."
error_found_status = "missing"
error_found_type = "task"

response = correction_pipeline.run(
    {
        "retriever": {
            "error_status": error_found_status, 
            "element_type": error_found_type
        },
        "prompt_builder": {
            "bpmn_model": bpmn_model_snippet,
            "current_text": current_wrong_text,
            # Assuming correct replacement of the LLM with the Variables
        }
    }
)

print("\n--- CORRECTED TEXT ---")
print(response["llm"]["replies"][0])


--- CORRECTED TEXT ---
The passenger arrives at the terminal. First, they proceed to Check-in at Counter to drop their bags and then undergo security checks. Finally, the Passenger is ready to board.


#### Second Execution: 

In [8]:
bpmn_model_snippet = """<bpmn:process id="Process_Trip" isExecutable="false">
    <bpmn:startEvent id="StartEvent_1" name="Trip Planned">
      <bpmn:outgoing>Flow_1</bpmn:outgoing>
    </bpmn:startEvent>
    <bpmn:parallelGateway id="GW_Parallel" name="Booking Split">
      <bpmn:incoming>Flow_1</bpmn:incoming>
      <bpmn:outgoing>Flow_2</bpmn:outgoing>
      <bpmn:outgoing>Flow_3</bpmn:outgoing>
    </bpmn:parallelGateway>
    <bpmn:sequenceFlow id="Flow_1" sourceRef="StartEvent_1" targetRef="GW_Parallel" />
    <bpmn:task id="Activity_Flight" name="Book Flight">
      <bpmn:incoming>Flow_2</bpmn:incoming>
      <bpmn:outgoing>Flow_4</bpmn:outgoing>
    </bpmn:task>
    <bpmn:sequenceFlow id="Flow_2" sourceRef="GW_Parallel" targetRef="Activity_Flight" />
    <bpmn:task id="Activity_Hotel" name="Book Hotel">
      <bpmn:incoming>Flow_3</bpmn:incoming>
      <bpmn:outgoing>Flow_5</bpmn:outgoing>
    </bpmn:task>
    <bpmn:sequenceFlow id="Flow_3" sourceRef="GW_Parallel" targetRef="Activity_Hotel" />
    <bpmn:endEvent id="EndEvent_1" name="Trip Ready">
      <bpmn:incoming>Flow_4</bpmn:incoming>
      <bpmn:incoming>Flow_5</bpmn:incoming>
    </bpmn:endEvent>
    <bpmn:sequenceFlow id="Flow_4" sourceRef="Activity_Flight" targetRef="EndEvent_1" />
    <bpmn:sequenceFlow id="Flow_5" sourceRef="Activity_Hotel" targetRef="EndEvent_1" />
  </bpmn:process>"""
current_wrong_text = "When a trip is planned, we first Book Flight. After that is done, we proceed to Book Hotel. Finally, the trip is ready."
error_found_status = "missing"
error_found_type = "gateway"

response = correction_pipeline.run(
    {
        "retriever": {
            "error_status": error_found_status, 
            "element_type": error_found_type
        },
        "prompt_builder": {
            "bpmn_model": bpmn_model_snippet,
            "current_text": current_wrong_text,
            # Assuming correct replacement of the LLM with the Variables
        }
    }
)

print("\n--- CORRECTED TEXT ---")
print(response["llm"]["replies"][0])


--- CORRECTED TEXT ---
When a trip is planned, we first Book Flight and simultaneously Book Hotel. After both tasks are completed, the trip is ready.


#### Third Execution

In [11]:
bpmn_model_snippet = """<bpmn:process id="Process_Baggage_Handling" isExecutable="false">
    <bpmn:startEvent id="StartEvent_1" name="Bag received">
      <bpmn:outgoing>Flow_1</bpmn:outgoing>
    </bpmn:startEvent>
    <bpmn:task id="Task_Scan" name="Scan Tag">
      <bpmn:incoming>Flow_1</bpmn:incoming>
      <bpmn:outgoing>Flow_2</bpmn:outgoing>
    </bpmn:task>
    <bpmn:sequenceFlow id="Flow_1" sourceRef="StartEvent_1" targetRef="Task_Scan" />
    <bpmn:task id="Task_Load" name="Load onto Belt">
      <bpmn:incoming>Flow_2</bpmn:incoming>
      <bpmn:outgoing>Flow_3</bpmn:outgoing>
    </bpmn:task>
    <bpmn:sequenceFlow id="Flow_2" sourceRef="Task_Scan" targetRef="Task_Load" />
    <bpmn:endEvent id="EndEvent_1" name="Bag loaded">
      <bpmn:incoming>Flow_3</bpmn:incoming>
    </bpmn:endEvent>
    <bpmn:sequenceFlow id="Flow_3" sourceRef="Task_Load" targetRef="EndEvent_1" />
  </bpmn:process>"""
current_wrong_text = "The baggage handling process starts when a bag is received. First, the worker scans the tag. Then, they wrap the bag in protective plastic. Then, they load it onto the belt. Finally, a bag is loaded "
error_found_status = "hallucinated"
error_found_type = "task"

response = correction_pipeline.run(
    {
        "retriever": {
            "error_status": error_found_status, 
            "element_type": error_found_type
        },
        "prompt_builder": {
            "bpmn_model": bpmn_model_snippet,
            "current_text": current_wrong_text,
            # Assuming correct replacement of the LLM with the Variables
        }
    }
)

print("\n--- CORRECTED TEXT ---")
print(response["llm"]["replies"][0])


--- CORRECTED TEXT ---
The baggage handling process starts when a bag is received. First, they scan the tag. Then, they load it onto the belt. Finally, a bag is loaded.


#### Fourth Execution

In [10]:
bpmn_model_snippet = """<<bpmn:process id="Process_Turnaround" isExecutable="false">
    <bpmn:startEvent id="StartEvent_1" name="Aircraft at Gate">
      <bpmn:outgoing>Flow_1</bpmn:outgoing>
    </bpmn:startEvent>
    <bpmn:task id="Task_Deboard" name="Deboard Passengers">
      <bpmn:incoming>Flow_1</bpmn:incoming>
      <bpmn:outgoing>Flow_2</bpmn:outgoing>
    </bpmn:task>
    <bpmn:sequenceFlow id="Flow_1" sourceRef="StartEvent_1" targetRef="Task_Deboard" />
    <bpmn:parallelGateway id="Gateway_Split" name="Start Ground Ops">
      <bpmn:incoming>Flow_2</bpmn:incoming>
      <bpmn:outgoing>Flow_To_Refuel</bpmn:outgoing>
      <bpmn:outgoing>Flow_To_Baggage</bpmn:outgoing>
      <bpmn:outgoing>Flow_To_Cabin</bpmn:outgoing>
    </bpmn:parallelGateway>
    <bpmn:sequenceFlow id="Flow_2" sourceRef="Task_Deboard" targetRef="Gateway_Split" />
    
    <bpmn:task id="Task_Refuel" name="Refuel Aircraft">
      <bpmn:incoming>Flow_To_Refuel</bpmn:incoming>
      <bpmn:outgoing>Flow_From_Refuel</bpmn:outgoing>
    </bpmn:task>
    <bpmn:sequenceFlow id="Flow_To_Refuel" sourceRef="Gateway_Split" targetRef="Task_Refuel" />
    
    <bpmn:task id="Task_Unload" name="Unload Baggage">
      <bpmn:incoming>Flow_To_Baggage</bpmn:incoming>
      <bpmn:outgoing>Flow_Bag_1</bpmn:outgoing>
    </bpmn:task>
    <bpmn:sequenceFlow id="Flow_To_Baggage" sourceRef="Gateway_Split" targetRef="Task_Unload" />
    <bpmn:task id="Task_Load" name="Load Baggage">
      <bpmn:incoming>Flow_Bag_1</bpmn:incoming>
      <bpmn:outgoing>Flow_From_Baggage</bpmn:outgoing>
    </bpmn:task>
    <bpmn:sequenceFlow id="Flow_Bag_1" sourceRef="Task_Unload" targetRef="Task_Load" />
    
    <bpmn:task id="Task_Clean" name="Clean Cabin">
      <bpmn:incoming>Flow_To_Cabin</bpmn:incoming>
      <bpmn:outgoing>Flow_Cab_1</bpmn:outgoing>
    </bpmn:task>
    <bpmn:sequenceFlow id="Flow_To_Cabin" sourceRef="Gateway_Split" targetRef="Task_Clean" />
    <bpmn:task id="Task_Cater" name="Load Catering">
      <bpmn:incoming>Flow_Cab_1</bpmn:incoming>
      <bpmn:outgoing>Flow_From_Cabin</bpmn:outgoing>
    </bpmn:task>
    <bpmn:sequenceFlow id="Flow_Cab_1" sourceRef="Task_Clean" targetRef="Task_Cater" />
    
    <bpmn:parallelGateway id="Gateway_Join" name="Ground Ops Done">
      <bpmn:incoming>Flow_From_Refuel</bpmn:incoming>
      <bpmn:incoming>Flow_From_Baggage</bpmn:incoming>
      <bpmn:incoming>Flow_From_Cabin</bpmn:incoming>
      <bpmn:outgoing>Flow_Merge</bpmn:outgoing>
    </bpmn:parallelGateway>
    <bpmn:sequenceFlow id="Flow_From_Refuel" sourceRef="Task_Refuel" targetRef="Gateway_Join" />
    <bpmn:sequenceFlow id="Flow_From_Baggage" sourceRef="Task_Load" targetRef="Gateway_Join" />
    <bpmn:sequenceFlow id="Flow_From_Cabin" sourceRef="Task_Cater" targetRef="Gateway_Join" />
    
    <bpmn:task id="Task_Board" name="Board Passengers">
      <bpmn:incoming>Flow_Merge</bpmn:incoming>
      <bpmn:outgoing>Flow_End</bpmn:outgoing>
    </bpmn:task>
    <bpmn:sequenceFlow id="Flow_Merge" sourceRef="Gateway_Join" targetRef="Task_Board" />
    <bpmn:endEvent id="EndEvent_1" name="Ready for Departure">
      <bpmn:incoming>Flow_End</bpmn:incoming>
    </bpmn:endEvent>
    <bpmn:sequenceFlow id="Flow_End" sourceRef="Task_Board" targetRef="EndEvent_1" />
  </bpmn:process>"""
current_wrong_text = "As soon as the aircraft arrives at the gate, the passengers de-board immediately. While the ground crew refuels the aircraft, the flight attendants prepare for the next flight. Once refueling is complete, the new passengers board the plane, and the aircraft is ready for departure."
error_found_status = "missing"
error_found_type = "task"

response = correction_pipeline.run(
    {
        "retriever": {
            "error_status": error_found_status, 
            "element_type": error_found_type
        },
        "prompt_builder": {
            "bpmn_model": bpmn_model_snippet,
            "current_text": current_wrong_text,
            # Assuming correct replacement of the LLM with the Variables
        }
    }
)

print("\n--- CORRECTED TEXT ---")
print(response["llm"]["replies"][0])


--- CORRECTED TEXT ---
As soon as the aircraft arrives at the gate, the passengers de-board immediately. While the ground crew refuels the aircraft, the flight attendants prepare for the next flight and clean the cabin. Once refueling is complete, the new passengers board the plane, and the aircraft is ready for departure.
