In [3]:
import json
from haystack import Pipeline
from haystack.components.builders import ChatPromptBuilder
from haystack.dataclasses import ChatMessage
from haystack_integrations.components.generators.ollama import OllamaChatGenerator

# ==========================================
# 1. READABILITY SCORING COMPONENT
# ==========================================
class ReadabilityScorer:
    def __init__(self):
        # A. Define the Prompt Template
        # We ask the LLM to score clarity/flow on a scale of 0-10
        template_text = """
        You are an expert linguist and technical evaluator.
        Analyze the following text describing a Business Process (BPMN).

        Criteria for scoring:
        - **10**: Natural, flowing, easy to read, human-like structure.
        - **5**: Understandable but robotic, repetitive, or fragmented.
        - **0**: Unintelligible or unstructured.

        Text to Evaluate:
        "{{ passage }}"

        Return your evaluation strictly as a JSON object with a single key "score" (integer 0-10).
        Example: {"score": 8}
        """

        # B. Initialize Components
        prompt_builder = ChatPromptBuilder(
            template=[ChatMessage.from_user(template_text)],
            required_variables=["passage"]
        )

        # Using Llama 3.1:8b (ensure it is pulled via 'ollama pull llama3.1:8b')
        llm = OllamaChatGenerator(
            model="llama3.1:8b", 
            url="http://localhost:11434",
            generation_kwargs={
                "format": "json", # Enforces valid JSON output
                "temperature": 0.0 # Deterministic output for scoring
            }
        )

        # C. Build the Pipeline
        self.pipeline = Pipeline()
        self.pipeline.add_component("prompt_builder", prompt_builder)
        self.pipeline.add_component("llm", llm)
        self.pipeline.connect("prompt_builder.prompt", "llm.messages")

    def get_score(self, text):
        try:
            result = self.pipeline.run({"prompt_builder": {"passage": text}})
            response_text = result["llm"]["replies"][0].text
            data = json.loads(response_text)
            return int(data.get("score", 0))
        except Exception as e:
            print(f"Error extracting score: {e}")
            return 0

# ==========================================
# 2. DATA SETUP (3 Models)
# ==========================================
# We compare the texts you provided (Human) against simulated "Generated" output.

experiments = [
    {
        "model_name": "1. Delayed Baggage",
        # YOUR TEXT (Ground Truth)
        "human_text": (
            "The process regarding delayed baggage begins when a passenger notices their item is missing upon arrival at the destination. "
            "The passenger immediately reports the delayed baggage, either online or at the airport's baggage tracing counter. "
            "Parallel to the main process, the passenger has the option to check the status of their report online at any time. "
            "Once the report is filed, the airline or baggage service initiates the tracing procedure. As soon as the baggage is located, "
            "it is forwarded to the destination airport, where it undergoes customs clearance and internal processing."
        ),
        # SIMULATED GENERATED TEXT (Robotic/Bad)
        "generated_text": (
            "Start event passenger notices missing item. Task passenger reports delayed baggage. "
            "Parallel gateway split. Sequence flow 1 check status online. Sequence flow 2 initiate tracing. "
            "Task locate baggage. Task forward to destination. Task customs clearance. End event."
        )
    },
    {
        "model_name": "2. Security Check",
        # YOUR TEXT (Ground Truth)
        "human_text": (
            "The process is initiated when the passenger shows their boarding pass. Afterwards, the passenger proceeds to the security check area to undergo a security check. "
            "Following the screening, a decision is made regarding whether the passenger appears suspicious. "
            "If the passenger is deemed suspicious, they must undergo a manual control; if not, the manual control is skipped. "
            "Subsequently, the process evaluates the passenger's destination via a gateway checking if the target destination is within the Schengen Area."
        ),
        # SIMULATED GENERATED TEXT (Robotic/Bad)
        "generated_text": (
            "Start event show boarding pass. Task execute security check. Exclusive gateway suspicious? "
            "If yes, execute manual control. If no, proceed. Exclusive gateway Schengen area? "
            "If yes, skip passport control. If no, show passport. End process."
        )
    },
    {
        "model_name": "3. Arrivals & Customs",
        # YOUR TEXT (Ground Truth)
        "human_text": (
            "The process begins when the passenger arrives at the terminal. The flow immediately reaches an exclusive gateway that checks if the arrival is from the Schengen area. "
            "If the arrival is from the Schengen area, the passenger proceeds to passport control and shows their passport. "
            "If the arrival is not from the Schengen area, these control steps are skipped. "
            "Subsequently, the passenger goes to the baggage claim area."
        ),
        # SIMULATED GENERATED TEXT (Robotic/Bad)
        "generated_text": (
            "Passenger arrives start event. Exclusive Gateway Schengen check. Sequence flow yes passport control. "
            "Sequence flow no skip. Task baggage claim. Gateway has baggage? Yes take baggage. "
            "No proceed. Gateway declare goods? Task declare goods. End event."
        )
    }
]

# ==========================================
# 3. RUN EXPERIMENT
# ==========================================
scorer = ReadabilityScorer()

print(f"{'BPMN MODEL':<25} | {'TYPE':<12} | {'SCORE':<5} | {'SAMPLE TEXT (Truncated)'}")
print("-" * 90)

for exp in experiments:
    # 1. Evaluate Human Text
    human_score = scorer.get_score(exp["human_text"])
    
    # 2. Evaluate Generated Text
    gen_score = scorer.get_score(exp["generated_text"])
    
    # 3. Print Results
    print(f"{exp['model_name']:<25} | {'Human':<12} | {human_score:<5} | {exp['human_text'][:40]}...")
    print(f"{'':<25} | {'Generated':<12} | {gen_score:<5} | {exp['generated_text'][:40]}...")
    print("-" * 90)

BPMN MODEL                | TYPE         | SCORE | SAMPLE TEXT (Truncated)
------------------------------------------------------------------------------------------
Error extracting score: Expecting value: line 1 column 1 (char 0)
1. Delayed Baggage        | Human        | 9     | The process regarding delayed baggage be...
                          | Generated    | 0     | Start event passenger notices missing it...
------------------------------------------------------------------------------------------
Error extracting score: Expecting value: line 1 column 1 (char 0)
Error extracting score: Expecting value: line 1 column 1 (char 0)
2. Security Check         | Human        | 0     | The process is initiated when the passen...
                          | Generated    | 0     | Start event show boarding pass. Task exe...
------------------------------------------------------------------------------------------
Error extracting score: Expecting value: line 1 column 1 (char 0)
Error ex