In [1]:
import os
import json
from typing import List, Literal, Optional, Text, Union, Dict, Any
from IPython.display import Image, display
from langchain_core.runnables.graph_mermaid import MermaidDrawMethod
from agents.agents_modules.workflow import build_agent_workflow
from agents.dataloader import load_dataset_by_name, extract_example

  from .autonotebook import tqdm as notebook_tqdm
PyTorch version 2.8.0.dev20250515+cu128 available.


In [2]:
def save_result_to_json(state: dict, filename: str = "result.json", directory: str = "results") -> None:
    """
    Saves the given agent workflow state to a JSON file in a specified directory.

    Args:
        state (dict): The final state returned by the agent graph execution.
        filename (str): The name of the JSON file to save.
        directory (str): The directory where the file will be saved.

    Raises:
        IsADirectoryError: If the target file path is a directory.
    """

    file_path = os.path.join(directory, filename)
    os.makedirs(directory, exist_ok=True)

    if os.path.isdir(file_path):
        raise IsADirectoryError(f"Cannot write to '{file_path}' because it is a directory.")

    # Recursively convert objects to serializable types
    def make_serializable(obj):
        if isinstance(obj, list):
            return [make_serializable(x) for x in obj]
        elif hasattr(obj, "model_dump"):  # Pydantic BaseModel
            return obj.model_dump()
        elif isinstance(obj, dict):
            return {k: make_serializable(v) for k, v in obj.items()}
        else:
            return obj

    serializable_state = make_serializable(dict(state))

    with open(file_path, "w", encoding="utf-8") as f:
        json.dump(serializable_state, f, indent=4)

    print(f"Results saved to {file_path}")


In [3]:
# from datasets import load_dataset

# rotowire = load_dataset("mrm8488/rotowire-sbnation")
# turku_hockey = datasets.load_dataset('GEM/turku_hockey_data2text')
# totto = datasets.load_dataset('GEM/totto')
# sportsett_basketball = datasets.load_dataset('GEM/sportsett_basketball')
# webnlg = datasets.load_dataset('GEM/web_nlg', 'en')
# conversational_weather = datasets.load_dataset('GEM/conversational_weather')
# dart = datasets.load_dataset('GEM/dart')
# mlb = datasets.load_dataset('GEM/mlb_data_to_text')

In [4]:
name = "dart"  # Change this to the dataset you want to load
num = -10 # Sample id

data = load_dataset_by_name(name)
sample = extract_example(name, data["validation"][num])
data = sample.get('input', '')
ground_truth = sample.get('references', "")
target = sample.get('target', "")

query = f"""You are an agent designed to generate text from data for a data-to-text natural language generation. You can be provided data in the form of xml, table, meaning representations, graphs etc. 
Your task is to generate the appropriate text given the data information without omitting any field or adding extra information in essence called hallucination.
Here is the data generate text using table data:
{data}"""

print(f"Input: {data}")
print(f"Target: {target}")
print(f"Ground Truth: {ground_truth}")

Loading dataset: dart
Input: [['Wildwood', 'eatType', 'coffee shop'], ['Wildwood', 'food', 'English'], ['Wildwood', 'priceRange', 'less than £20'], ['Wildwood', 'customer rating', 'low'], ['Wildwood', 'near', 'Ranch']]
Target: Wildwood is a coffee shop prices are less than £20 serving English food near the Ranch with a low customer rating
Ground Truth: ['Wildwood is a coffee shop prices are less than £20 serving English food near the Ranch with a low customer rating', 'located near Ranch Wildwood coffee shop serves English food with a low customer rating prices are less than £20']


In [5]:
provider = "openai" #ollama, openai, hf, aixplain
add_plan = False # True or False
process_flow = build_agent_workflow(provider=provider)
# display(Image(process_flow.get_graph(xray=True).draw_mermaid_png(draw_method=MermaidDrawMethod.PYPPETEER)))
# display(Image(process_flow.get_graph(xray=True).draw_mermaid_png()))

In [None]:
initial_state = {
    "user_prompt": query,
    "raw_data": data,
    "history_of_steps": [],
    "final_response": "",
    "next_agent": "",
    "next_agent_payload": "",
    "current_step": 0,
    "iteration_count": 0,
    "max_iteration": 60,
}

state = process_flow.invoke(initial_state, config={"recursion_limit": initial_state["max_iteration"]})
prediction = state['final_response']

HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"




[1m> Entering new AgentExecutor chain...[0m


HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


[32;1m[1;3m```
{
  "action": "Final Answer",
  "action_input": "Wildwood is a coffee shop located near Ranch, offering English food. The price range is less than £20, although it has a low customer rating."
}
```[0m

[1m> Finished chain.[0m


HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


INSPECTOR: CORRECT


HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"




[1m> Entering new AgentExecutor chain...[0m


HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


[32;1m[1;3mAction:
```
{
  "action": "Final Answer",
  "action_input": "<snt>The coffee shop is called Wildwood.</snt><snt>It serves English food.</snt><snt>The price range is less than £20.</snt><snt>It has a low customer rating.</snt><snt>Wildwood is located near Ranch.</snt>"
}
```[0m

[1m> Finished chain.[0m


HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


INSPECTOR: CORRECT


HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"




[1m> Entering new AgentExecutor chain...[0m


HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


[32;1m[1;3m```
{
  "action": "Final Answer",
  "action_input": "Wildwood is a coffee shop that serves English food and is located near Ranch. The price range at Wildwood is less than £20, but it has a low customer rating."
}
```[0m

[1m> Finished chain.[0m


Fetching 5 files: 100%|██████████| 5/5 [00:00<00:00, 34379.54it/s]
Lightning automatically upgraded your loaded checkpoint from v1.8.2 to v2.5.1.post0. To apply the upgrade to your files permanently, run `python -m pytorch_lightning.utilities.upgrade_checkpoint ../.cache/huggingface/hub/models--Unbabel--wmt22-cometkiwi-da/snapshots/1ad785194e391eebc6c53e2d0776cada8f83179a/checkpoints/model.ckpt`
Encoder model frozen.
/home/chinonso/anaconda3/envs/lang2/lib/python3.10/site-packages/pytorch_lightning/core/saving.py:195: Found keys that are not in the model state dict but in the checkpoint: ['encoder.model.embeddings.position_ids']
Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: True (cuda), used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
/home/chinonso/anaconda3/envs/lang2/lib/python3.10/site-packages/pytorch_lightning/train

Metric Evaluation Result: 0.303


HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


INSPECTOR: The output omits the fact that "Wildwood" is the name of the coffee shop in the introductory sentence, leading to a loss of clarity. Additionally, the phrase "but it has a low customer rating" could imply a contrast that isn't supported by the structured content, which does not specify that the low rating affects the offering or pricing. This could mislead the reader.


HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"




[1m> Entering new AgentExecutor chain...[0m


HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: True (cuda), used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


[32;1m[1;3m```
{
  "action": "Final Answer",
  "action_input": "The coffee shop is called Wildwood. It serves English food and has a price range of less than £20. Wildwood is located near Ranch, although it has a low customer rating."
}
```[0m

[1m> Finished chain.[0m


/home/chinonso/anaconda3/envs/lang2/lib/python3.10/site-packages/pytorch_lightning/trainer/setup.py:177: GPU available but not used. You can set it by doing `Trainer(accelerator='gpu')`.
Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00,  8.82it/s]


Metric Evaluation Result: 0.261


HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


INSPECTOR: The output omits the requirement to explicitly state the name "Wildwood" in the introductory sentence, which detracts from clarity. Additionally, the phrasing "although it has a low customer rating" suggests a misleading contrast. The response should be adjusted to eliminate these issues.


HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"




[1m> Entering new AgentExecutor chain...[0m


HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: True (cuda), used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


[32;1m[1;3m```
{
  "action": "Final Answer",
  "action_input": "Wildwood is a coffee shop that serves English food and is located near Ranch. The price range at Wildwood is less than £20, and it has a low customer rating."
}
```[0m

[1m> Finished chain.[0m


/home/chinonso/anaconda3/envs/lang2/lib/python3.10/site-packages/pytorch_lightning/trainer/setup.py:177: GPU available but not used. You can set it by doing `Trainer(accelerator='gpu')`.
Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00,  9.18it/s]


Metric Evaluation Result: 0.28


HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


INSPECTOR: The worker's output omits the explicit introductory statement that clearly identifies the coffee shop as "Wildwood" and inadvertently suggests a contrast by presenting the low customer rating in the same sentence. The output should have placed more emphasis on "Wildwood" in a separate introductory sentence and refrained from any implication of a contrast regarding the rating.


HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"




[1m> Entering new AgentExecutor chain...[0m


HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: True (cuda), used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


[32;1m[1;3m```
{
  "action": "Final Answer",
  "action_input": "Wildwood is a coffee shop that serves English food. The price range is less than £20. It is located near Ranch. However, it has a low customer rating."
}
```[0m

[1m> Finished chain.[0m


/home/chinonso/anaconda3/envs/lang2/lib/python3.10/site-packages/pytorch_lightning/trainer/setup.py:177: GPU available but not used. You can set it by doing `Trainer(accelerator='gpu')`.
Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00,  2.17it/s]


Metric Evaluation Result: 0.273


HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


INSPECTOR: The output does not clearly state "Wildwood" as the coffee shop in the introductory sentence and includes the low customer rating in a contrasting manner by using "However." This does not adhere to the guidelines of clear and straightforward expression as requested.


HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"




[1m> Entering new AgentExecutor chain...[0m


HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: True (cuda), used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


[32;1m[1;3m```
{
  "action": "Final Answer",
  "action_input": "Wildwood is a coffee shop that serves English food, with a price range of less than £20. It is located near Ranch."
}
```[0m

[1m> Finished chain.[0m


/home/chinonso/anaconda3/envs/lang2/lib/python3.10/site-packages/pytorch_lightning/trainer/setup.py:177: GPU available but not used. You can set it by doing `Trainer(accelerator='gpu')`.
Predicting DataLoader 0: 100%|██████████| 1/1 [00:01<00:00,  1.55s/it]


Metric Evaluation Result: 0.269


HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


INSPECTOR: The output omits the mention of the low customer rating, which is a key factual element that was required to be included. Additionally, the introductory sentence could be more explicit in presenting the coffee shop as the main subject.


HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"




[1m> Entering new AgentExecutor chain...[0m


HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: True (cuda), used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


[32;1m[1;3m```
{
  "action": "Final Answer",
  "action_input": "Wildwood is a coffee shop that serves English food and is located near Ranch. The price range is less than £20, but it has a low customer rating."
}
```[0m

[1m> Finished chain.[0m


/home/chinonso/anaconda3/envs/lang2/lib/python3.10/site-packages/pytorch_lightning/trainer/setup.py:177: GPU available but not used. You can set it by doing `Trainer(accelerator='gpu')`.
Predicting DataLoader 0: 100%|██████████| 1/1 [00:01<00:00,  1.15s/it]


Metric Evaluation Result: 0.284


HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


INSPECTOR: The output omits the mention of the low customer rating in the introductory sentence, which is crucial information. It should explicitly state that Wildwood has a low customer rating while maintaining clarity.


HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"




[1m> Entering new AgentExecutor chain...[0m


HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: True (cuda), used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


[32;1m[1;3m```
{
  "action": "Final Answer",
  "action_input": "Wildwood is a coffee shop that serves English food, located near Ranch. It offers meals at a price range of less than £20. However, it is noteworthy that Wildwood has a low customer rating."
}
```[0m

[1m> Finished chain.[0m


/home/chinonso/anaconda3/envs/lang2/lib/python3.10/site-packages/pytorch_lightning/trainer/setup.py:177: GPU available but not used. You can set it by doing `Trainer(accelerator='gpu')`.
Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00,  7.39it/s]


Metric Evaluation Result: 0.309


HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


INSPECTOR: The low customer rating should have been emphasized more prominently in the introductory sentence as per the orchestrator's feedback.


HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"




[1m> Entering new AgentExecutor chain...[0m


HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: True (cuda), used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


[32;1m[1;3m```
{
  "action": "Final Answer",
  "action_input": "The coffee shop called Wildwood serves English food and is located near Ranch. It offers meals at a price range of less than £20. However, it is important to note that Wildwood has a low customer rating."
}
```[0m

[1m> Finished chain.[0m


/home/chinonso/anaconda3/envs/lang2/lib/python3.10/site-packages/pytorch_lightning/trainer/setup.py:177: GPU available but not used. You can set it by doing `Trainer(accelerator='gpu')`.
Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00,  7.91it/s]


Metric Evaluation Result: 0.272


HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


INSPECTOR: The worker's output omits the explicit mention of the coffee shop's low customer rating in the introductory sentence as instructed. Instead, it mentions the low rating later in the text, failing to emphasize it prominently in line with the orchestrator's feedback. Additionally, the output should include all points in a more cohesive manner, integrating the details more effectively. Therefore, the output does not fully meet the requirements of the task.


HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"




[1m> Entering new AgentExecutor chain...[0m


HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: True (cuda), used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


[32;1m[1;3m```
{
  "action": "Final Answer",
  "action_input": "Wildwood is a coffee shop that has a low customer rating. It serves English food and is located near Ranch. The price range for meals is less than £20."
}
```[0m

[1m> Finished chain.[0m


/home/chinonso/anaconda3/envs/lang2/lib/python3.10/site-packages/pytorch_lightning/trainer/setup.py:177: GPU available but not used. You can set it by doing `Trainer(accelerator='gpu')`.
Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00,  1.61it/s]


Metric Evaluation Result: 0.275


HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


INSPECTOR: The worker's output fails to emphasize the low customer rating prominently in the introductory sentence, as specified in the instructions. Additionally, the output could improve in integrating all details more clearly. The arrangement does not fully align with the requirements set by the orchestrator.


HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"




[1m> Entering new AgentExecutor chain...[0m


HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: True (cuda), used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


[32;1m[1;3m```
{
  "action": "Final Answer",
  "action_input": "Wildwood, a coffee shop located near Ranch, has a notable low customer rating. It specializes in English food and offers a price range of less than £20."
}
```[0m

[1m> Finished chain.[0m


/home/chinonso/anaconda3/envs/lang2/lib/python3.10/site-packages/pytorch_lightning/trainer/setup.py:177: GPU available but not used. You can set it by doing `Trainer(accelerator='gpu')`.
Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00,  8.16it/s]


Metric Evaluation Result: 0.266


HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


INSPECTOR: The output omits the required emphasis on the low customer rating in the introductory sentence. Additionally, it does not present the content in a way that integrates all details clearly for coherence. The phrase "notable low customer rating" may also create a misleading implication rather than emphasizing the low rating as requested.


HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"




[1m> Entering new AgentExecutor chain...[0m


HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: True (cuda), used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


[32;1m[1;3m```
{
  "action": "Final Answer",
  "action_input": "Wildwood has a low customer rating. This coffee shop serves English food and is located near Ranch. The price range for meals at Wildwood is less than £20."
}
```[0m

[1m> Finished chain.[0m


/home/chinonso/anaconda3/envs/lang2/lib/python3.10/site-packages/pytorch_lightning/trainer/setup.py:177: GPU available but not used. You can set it by doing `Trainer(accelerator='gpu')`.
Predicting DataLoader 0: 100%|██████████| 1/1 [00:03<00:00,  3.50s/it]


Metric Evaluation Result: 0.278


HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


INSPECTOR: The output does not adequately emphasize the low customer rating in the introductory sentence as required. While it includes the details, the structure does not align with the task instructions that specified to prominently feature the low customer rating in the introduction. Additionally, the integration of details lacks coherence and clarity, especially in how the key facts are presented.


HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"




[1m> Entering new AgentExecutor chain...[0m


HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: True (cuda), used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


[32;1m[1;3m```
{
  "action": "Final Answer",
  "action_input": "Wildwood coffee shop has a low customer rating, which is a crucial point to note. This coffee shop serves English food and has a price range of less than £20. Additionally, Wildwood is conveniently located near Ranch."
}
```[0m

[1m> Finished chain.[0m


/home/chinonso/anaconda3/envs/lang2/lib/python3.10/site-packages/pytorch_lightning/trainer/setup.py:177: GPU available but not used. You can set it by doing `Trainer(accelerator='gpu')`.
Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00,  7.49it/s]


Metric Evaluation Result: 0.282


HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


INSPECTOR: The output does not correctly reflect the specific instruction to prominently state the low customer rating in the introductory sentence. Instead, it begins with the name of the coffee shop and mentions the low rating afterwards. This misalignment with the task requirements affects the clarity and emphasis needed for the response.


HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


AGGREGATOR: Wildwood coffee shop has a low customer rating, which is a crucial point to note. This coffee shop serves English food and has a price range of less than £20. Additionally, Wildwood is conveniently located near Ranch.


In [7]:
save_result_to_json(state, filename=f"{name}_{num}.json")

Results saved to results/dart_-10.json


In [8]:
prediction

'Wildwood coffee shop has a low customer rating, which is a crucial point to note. This coffee shop serves English food and has a price range of less than £20. Additionally, Wildwood is conveniently located near Ranch.'

In [9]:
ground_truth

['Wildwood is a coffee shop prices are less than £20 serving English food near the Ranch with a low customer rating',
 'located near Ranch Wildwood coffee shop serves English food with a low customer rating prices are less than £20']

In [10]:
target

'Wildwood is a coffee shop prices are less than £20 serving English food near the Ranch with a low customer rating'