In [1]:
import os
import json
from typing import List, Literal, Optional, Text, Union, Dict, Any
from IPython.display import Image, display
from langchain_core.runnables.graph_mermaid import MermaidDrawMethod
from agents.agents_modules.workflow import build_agent_workflow, workers_dict
from agents.dataloader import load_dataset_by_name, extract_example

  from .autonotebook import tqdm as notebook_tqdm
PyTorch version 2.8.0.dev20250515+cu128 available.


In [2]:
def save_result_to_json(state: dict, filename: str = "result.json", directory: str = "results") -> None:
    """
    Saves the given agent workflow state to a JSON file in a specified directory.

    Args:
        state (dict): The final state returned by the agent graph execution.
        filename (str): The name of the JSON file to save.
        directory (str): The directory where the file will be saved.

    Raises:
        IsADirectoryError: If the target file path is a directory.
    """

    file_path = os.path.join(directory, filename)
    os.makedirs(directory, exist_ok=True)

    if os.path.isdir(file_path):
        raise IsADirectoryError(f"Cannot write to '{file_path}' because it is a directory.")

    # Recursively convert objects to serializable types
    def make_serializable(obj):
        if isinstance(obj, list):
            return [make_serializable(x) for x in obj]
        elif hasattr(obj, "model_dump"):  # Pydantic BaseModel
            return obj.model_dump()
        elif isinstance(obj, dict):
            return {k: make_serializable(v) for k, v in obj.items()}
        else:
            return obj

    serializable_state = make_serializable(dict(state))

    with open(file_path, "w", encoding="utf-8") as f:
        json.dump(serializable_state, f, indent=4)

    print(f"Results saved to {file_path}")


In [3]:
name = "totto"  # Change this to the dataset you want to load
num = 345 # Sample id

data = load_dataset_by_name(name)
sample = extract_example(name, data["validation"][num])
data = sample['input']
ground_truth = sample['references']
target = sample['target']

query = f"""You are an agent designed to generate text from data for a data-to-text natural language generation. You can be provided data in the form of xml, table, meaning representations, graphs etc. 
Your task is to generate the appropriate text given the data information without omitting any field or adding extra information in essence called hallucination.
Here is the data generate text using table data:
{data}"""

initial_state = {
    "input": query,
    "raw_input": data,
    "plan": [],
    "result_steps": [],
    "response": "",
    "chat_history": [],
    "next": "",
    "next_input": "",
    "agent_outcome": "",
    "current_step": 0,
    "team_iterations": 0,
    "recursion_limit": 60,
}

print(f"\nInput: {data} \n")
print(f"Target: {target} \n")
print(f"Ground Truth: {ground_truth}")

Loading dataset: totto

Input: <page_title> List of Spanish regional legislatures </page_title> <section_title> Autonomous Communities </section_title> <table> <cell> Parliament of La Rioja Sp. Parlamento de La Rioja <col_header> Legislature name Local names </col_header> </cell> </table> 

Target: The Parliament of La Rioja (Spanish: Parlamento de La Rioja) is one of the autonomous communities of Spain. 

Ground Truth: ['The Parliament of La Rioja (Spanish: Parlamento de La Rioja) is one of the autonomous communities of Spain.', 'The Parliament of La Rioja (Spanish: Parlamento de La Rioja) is one of the autonomous communities of Spain.', 'The Parliament of La Rioja (Spanish: Parlamento de La Rioja) is one of the autonomous communities of Spain.']


In [4]:
provider = "openai" #ollama, openai, hf, aixplain
add_plan = False # True or False
process_flow = build_agent_workflow(add_plan=add_plan, workers_dict=workers_dict, provider=provider)
# display(Image(process_flow.get_graph(xray=True).draw_mermaid_png(draw_method=MermaidDrawMethod.PYPPETEER)))
# display(Image(process_flow.get_graph(xray=True).draw_mermaid_png()))

In [5]:
state = process_flow.invoke(initial_state, config={"recursion_limit": initial_state["recursion_limit"]})
prediction = state['response']

HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


ORCHESTRATOR OUTPUT:
Thought: The user has provided a request to generate text based on the provided data related to the 'List of Spanish regional legislatures'. The data is shown in a table format, and the relevant content that needs to be sequenced correctly includes the 'Legislature name' and 'Local names'. We need to assign the content ordering worker to properly sequence these pieces of information before moving to the next steps in the pipeline. 

Worker: content ordering
Worker Input: The data provided is as follows:
<page_title> List of Spanish regional legislatures </page_title> 
<section_title> Autonomous Communities </section_title> 
<table> 
<cell> Parliament of La Rioja Sp. Parlamento de La Rioja </cell> 
<col_header> Legislature name Local names </col_header> 
</table>

Your task is to sequence the relevant data fields as they should appear in the final text, focusing on the 'Legislature name' followed by 'Local names'.



[1m> Entering new AgentExecutor chain...[0m


HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


[32;1m[1;3m```
{
  "action": "Final Answer",
  "action_input": "<page_title> List of Spanish regional legislatures </page_title> <section_title> Autonomous Communities </section_title> <table> <col_header> Legislature name Local names </col_header> <cell> Parliament of La Rioja Sp. Parlamento de La Rioja </cell> </table>"
}
```[0m

[1m> Finished chain.[0m
WORKER: {'input': "The data provided is as follows:\n<page_title> List of Spanish regional legislatures </page_title> \n<section_title> Autonomous Communities </section_title> \n<table> \n<cell> Parliament of La Rioja Sp. Parlamento de La Rioja </cell> \n<col_header> Legislature name Local names </col_header> \n</table>\n\nYour task is to sequence the relevant data fields as they should appear in the final text, focusing on the 'Legislature name' followed by 'Local names'.", 'output': '<page_title> List of Spanish regional legislatures </page_title> <section_title> Autonomous Communities </section_title> <table> <col_header> Legi

HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


INSPECTOR: The output does not correctly reflect the required content ordering. The 'Legislature name' should precede the 'Local names' in the cell, but it appears combined in a single cell without proper separation.


HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


ORCHESTRATOR OUTPUT:
Thought: The content ordering worker has mistakenly placed the column header and the cell in the wrong order and included both in the output instead of just ordering the fields. The feedback indicates that the worker should only sequence the relevant data field components, specifically, the 'Legislature name' followed by 'Local names', without any additional HTML tags or structures. I will reassign the content ordering task to correct this mistake.

Worker: content ordering
Worker Input: 
'The data provided is as follows:
<page_title> List of Spanish regional legislatures </page_title> 
<section_title> Autonomous Communities </section_title> 
<table> 
<cell> Parliament of La Rioja Sp. Parlamento de La Rioja </cell> 
<col_header> Legislature name Local names </col_header> 
</table>

Your task is to sequence the relevant data fields correctly for the final text generation. The correct sequence should only include the 'Legislature name' followed by 'Local names', pres

HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


[32;1m[1;3m```
{
  "action": "Final Answer",
  "action_input": "Parliament of La Rioja Sp. Parlamento de La Rioja"
}
```[0m

[1m> Finished chain.[0m
WORKER: {'input': "'The data provided is as follows:\n<page_title> List of Spanish regional legislatures </page_title> \n<section_title> Autonomous Communities </section_title> \n<table> \n<cell> Parliament of La Rioja Sp. Parlamento de La Rioja </cell> \n<col_header> Legislature name Local names </col_header> \n</table>\n\nYour task is to sequence the relevant data fields correctly for the final text generation. The correct sequence should only include the 'Legislature name' followed by 'Local names', presented in a clear format without extra HTML tags. Please provide the ordered content.'", 'output': 'Parliament of La Rioja Sp. Parlamento de La Rioja'}



HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


INSPECTOR: The worker's output is correct as it successfully sequenced the relevant data fields 'Legislature name' followed by 'Local names' without any additional HTML tags or structures.


HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


ORCHESTRATOR OUTPUT:
Thought: The content ordering step has been completed successfully as the worker provided the relevant data in the correct sequence. The next step in the process is 'text structuring', which will group the ordered content into coherent sentence-level units based on the structured output provided. 

Worker: text structuring
Worker Input: 
'The data provided is as follows:
<page_title> List of Spanish regional legislatures </page_title> 
<section_title> Autonomous Communities </section_title> 
Ordered Content: Parliament of La Rioja Sp. Parlamento de La Rioja

Your task is to group the ordered content into coherent sentence-level units, creating a clear output that reflects the information provided.'



[1m> Entering new AgentExecutor chain...[0m


HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


[32;1m[1;3mAction:
```
{
  "action": "Final Answer",
  "action_input": "<snt>The data provided is as follows:</snt><snt>List of Spanish regional legislatures</snt><snt>Section Title: Autonomous Communities</snt><snt>Parliament of La Rioja: Parlamento de La Rioja</snt>"
}
```[0m

[1m> Finished chain.[0m
WORKER: {'input': "'The data provided is as follows:\n<page_title> List of Spanish regional legislatures </page_title> \n<section_title> Autonomous Communities </section_title> \nOrdered Content: Parliament of La Rioja Sp. Parlamento de La Rioja\n\nYour task is to group the ordered content into coherent sentence-level units, creating a clear output that reflects the information provided.'", 'output': '<snt>The data provided is as follows:</snt><snt>List of Spanish regional legislatures</snt><snt>Section Title: Autonomous Communities</snt><snt>Parliament of La Rioja: Parlamento de La Rioja</snt>'}



HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


INSPECTOR: CORRECT


HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


ORCHESTRATOR OUTPUT:
Thought: The text structuring worker has produced a response, but the inspector feedback indicates it should be reviewed. The content needs to be more coherent and should have fewer sentence fragments. The primary goal is to clearly delineate the information without added complexity. I will reassign the task to the 'text structuring' worker to better organize the output into fluent sentences.

Worker: text structuring
Worker Input: 
''The data provided is as follows:
<page_title> List of Spanish regional legislatures </page_title> 
<section_title> Autonomous Communities </section_title> 
Ordered Content: Parliament of La Rioja Sp. Parlamento de La Rioja

Your task is to group the ordered content into coherent sentence-level units. Ensure that the output reflects the information provided in a clear, concise manner, avoiding fragments. Please provide the structured text in a way that is easy to understand.''



[1m> Entering new AgentExecutor chain...[0m


HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


[32;1m[1;3m```
{
  "action": "Final Answer",
  "action_input": "<page_title> List of Spanish regional legislatures </page_title> <section_title> Autonomous Communities </section_title> <snt> Parliament of La Rioja </snt> <snt> Sp. Parlamento de La Rioja </snt>"
}
```[0m

[1m> Finished chain.[0m
WORKER: {'input': "''The data provided is as follows:\n<page_title> List of Spanish regional legislatures </page_title> \n<section_title> Autonomous Communities </section_title> \nOrdered Content: Parliament of La Rioja Sp. Parlamento de La Rioja\n\nYour task is to group the ordered content into coherent sentence-level units. Ensure that the output reflects the information provided in a clear, concise manner, avoiding fragments. Please provide the structured text in a way that is easy to understand.''", 'output': '<page_title> List of Spanish regional legislatures </page_title> <section_title> Autonomous Communities </section_title> <snt> Parliament of La Rioja </snt> <snt> Sp. Parlamento d

HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


INSPECTOR: The output contains sentence fragments and does not provide coherent sentence-level units as instructed. It should combine the content into complete sentences for better clarity and understanding.


HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


ORCHESTRATOR OUTPUT:
Thought: The output from the text structuring worker didn't effectively group the ordered content into coherent sentence-level units. Instead, the worker kept the original element structure and split the content into fragments, which doesn't adhere to the task requirements. I need to reassign the task to the same worker to create a clear and complete structured text that includes the ordered content in a continuous narrative format.

Worker: text structuring
Worker Input: 
''The data provided is as follows:
<page_title> List of Spanish regional legislatures </page_title> 
<section_title> Autonomous Communities </section_title> 
Ordered Content: Parliament of La Rioja Sp. Parlamento de La Rioja

Your task is to group the ordered content into coherent sentence-level units. The output should be formatted as a single, well-structured sentence or closely related sentences that articulate the information clearly, preserving the context of the data without introducing any

HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


[32;1m[1;3mAction:
```
{
  "action": "Final Answer",
  "action_input": "<snt>The data provided is as follows: <page_title>List of Spanish regional legislatures</page_title> <section_title>Autonomous Communities</section_title> <snt>Parliament of La Rioja is known in Spanish as Parlamento de La Rioja.</snt></snt>"
}
```[0m

[1m> Finished chain.[0m
WORKER: {'input': "''The data provided is as follows:\n<page_title> List of Spanish regional legislatures </page_title> \n<section_title> Autonomous Communities </section_title> \nOrdered Content: Parliament of La Rioja Sp. Parlamento de La Rioja\n\nYour task is to group the ordered content into coherent sentence-level units. The output should be formatted as a single, well-structured sentence or closely related sentences that articulate the information clearly, preserving the context of the data without introducing any fragments or unnecessary structure.''", 'output': '<snt>The data provided is as follows: <page_title>List of Spanish reg

HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


INSPECTOR: The output includes unnecessary repeated tags, which disrupts the coherence of the text. While it attempts to structure the information into a sentence, the repetition of the "<snt>" tag indicates a formatting issue rather than a clear and complete narrative.


HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


ORCHESTRATOR OUTPUT:
Thought: The text structuring worker's last output incorporates too much extraneous structure and fails to present the information in a clear and concise manner. It should focus on a straightforward presentation of the primary data without added tags that complicate the sentence. I will reassign the task to the text structuring worker with a revised prompt to ensure clarity and coherence in the output.

Worker: text structuring
Worker Input: '''The data provided is as follows:
<page_title> List of Spanish regional legislatures </page_title> 
<section_title> Autonomous Communities </section_title> 
Ordered Content: Parliament of La Rioja Sp. Parlamento de La Rioja

Your task is to group the ordered content into a coherent, single sentence that clearly expresses the information without extra structure or fragments. Focus on providing a straightforward statement that maintains the integrity of the data presented.''



[1m> Entering new AgentExecutor chain...[0m


HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


[32;1m[1;3m```
{
  "action": "Final Answer",
  "action_input": "<snt>The Parliament of La Rioja, also known as Parlamento de La Rioja, is part of the Spanish regional legislatures.</snt>"
}
```[0m

[1m> Finished chain.[0m
WORKER: {'input': "'''The data provided is as follows:\n<page_title> List of Spanish regional legislatures </page_title> \n<section_title> Autonomous Communities </section_title> \nOrdered Content: Parliament of La Rioja Sp. Parlamento de La Rioja\n\nYour task is to group the ordered content into a coherent, single sentence that clearly expresses the information without extra structure or fragments. Focus on providing a straightforward statement that maintains the integrity of the data presented.''", 'output': '<snt>The Parliament of La Rioja, also known as Parlamento de La Rioja, is part of the Spanish regional legislatures.</snt>'}



HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


INSPECTOR: CORRECT


HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


ORCHESTRATOR OUTPUT:
Thought: The last worker successfully structured the content into a coherent sentence that expresses the information clearly and accurately. Since the output meets the expectations of the task, I will move on to the next step in the pipeline, which is surface realization. 

Worker: surface realization
Worker Input: '''The data provided is as follows:
<page_title> List of Spanish regional legislatures </page_title> 
<section_title> Autonomous Communities </section_title> 
Ordered Content: Parliament of La Rioja Sp. Parlamento de La Rioja
Structured Text: The Parliament of La Rioja, also known as Parlamento de La Rioja, is part of the Spanish regional legislatures.

Your task is to transform the structured content into fluent, factually accurate natural language, ensuring the final output is polished and adheres to standard language conventions, with no omitted or added information. Please produce the final text output.' '''



[1m> Entering new AgentExecutor chain.

HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


[32;1m[1;3m```
{
  "action": "Final Answer",
  "action_input": "The Parliament of La Rioja, also known as Parlamento de La Rioja, is part of the Spanish regional legislatures."
}
```[0m

[1m> Finished chain.[0m
WORKER: {'input': "'''The data provided is as follows:\n<page_title> List of Spanish regional legislatures </page_title> \n<section_title> Autonomous Communities </section_title> \nOrdered Content: Parliament of La Rioja Sp. Parlamento de La Rioja\nStructured Text: The Parliament of La Rioja, also known as Parlamento de La Rioja, is part of the Spanish regional legislatures.\n\nYour task is to transform the structured content into fluent, factually accurate natural language, ensuring the final output is polished and adheres to standard language conventions, with no omitted or added information. Please produce the final text output.' '''", 'output': 'The Parliament of La Rioja, also known as Parlamento de La Rioja, is part of the Spanish regional legislatures.'}


Fetching 5 files: 100%|██████████| 5/5 [00:00<00:00, 38060.83it/s]
Lightning automatically upgraded your loaded checkpoint from v1.8.2 to v2.5.1.post0. To apply the upgrade to your files permanently, run `python -m pytorch_lightning.utilities.upgrade_checkpoint ../.cache/huggingface/hub/models--Unbabel--wmt22-cometkiwi-da/snapshots/1ad785194e391eebc6c53e2d0776cada8f83179a/checkpoints/model.ckpt`
Encoder model frozen.
/home/chinonso/anaconda3/envs/lang2/lib/python3.10/site-packages/pytorch_lightning/core/saving.py:195: Found keys that are not in the model state dict but in the checkpoint: ['encoder.model.embeddings.position_ids']
Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: True (cuda), used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
/home/chinonso/anaconda3/envs/lang2/lib/python3.10/site-packages/pytorch_lightning/train

Metric Evaluation Result: 0.358


HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


INSPECTOR: The worker's output is correct in terms of content and accurate reflection of the input. However, the metric evaluation result of 0.358 is below the average threshold of 0.6, indicating that the output may not meet the quality expectations required for the task.


HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


AGGREGATOR: The Parliament of La Rioja, also known as Parlamento de La Rioja, is part of the Spanish regional legislatures.


In [6]:
save_result_to_json(state, filename=f"{name}_{num}_{add_plan}.json")

Results saved to results/totto_345_False.json


In [7]:
prediction

'The Parliament of La Rioja, also known as Parlamento de La Rioja, is part of the Spanish regional legislatures.'

In [8]:
ground_truth

['The Parliament of La Rioja (Spanish: Parlamento de La Rioja) is one of the autonomous communities of Spain.',
 'The Parliament of La Rioja (Spanish: Parlamento de La Rioja) is one of the autonomous communities of Spain.',
 'The Parliament of La Rioja (Spanish: Parlamento de La Rioja) is one of the autonomous communities of Spain.']

In [9]:
target

'The Parliament of La Rioja (Spanish: Parlamento de La Rioja) is one of the autonomous communities of Spain.'

In [10]:
llm.id

NameError: name 'llm' is not defined

In [None]:
# from datasets import load_dataset

# rotowire = load_dataset("mrm8488/rotowire-sbnation")
# turku_hockey = datasets.load_dataset('GEM/turku_hockey_data2text')
# totto = datasets.load_dataset('GEM/totto')
# basketball = datasets.load_dataset('GEM/sportsett_basketball')
# webnlg = datasets.load_dataset('GEM/web_nlg', 'en')
# weather = datasets.load_dataset('GEM/conversational_weather')
# dart = datasets.load_dataset('GEM/dart')
# mlb = datasets.load_dataset('GEM/mlb_data_to_text')