In [1]:
import os
import json
from typing import List, Literal, Optional, Text, Union, Dict, Any
from IPython.display import Image, display
from langchain_core.runnables.graph_mermaid import MermaidDrawMethod
from agents.agents_modules.workflow import build_agent_workflow
from agents.dataloader import load_dataset_by_name, extract_example

  from .autonotebook import tqdm as notebook_tqdm
PyTorch version 2.8.0.dev20250515+cu128 available.


In [2]:
def save_result_to_json(state: dict, filename: str = "result.json", directory: str = "results") -> None:
    """
    Saves the given agent workflow state to a JSON file in a specified directory.

    Args:
        state (dict): The final state returned by the agent graph execution.
        filename (str): The name of the JSON file to save.
        directory (str): The directory where the file will be saved.

    Raises:
        IsADirectoryError: If the target file path is a directory.
    """

    file_path = os.path.join(directory, filename)
    os.makedirs(directory, exist_ok=True)

    if os.path.isdir(file_path):
        raise IsADirectoryError(f"Cannot write to '{file_path}' because it is a directory.")

    # Recursively convert objects to serializable types
    def make_serializable(obj):
        if isinstance(obj, list):
            return [make_serializable(x) for x in obj]
        elif hasattr(obj, "model_dump"):  # Pydantic BaseModel
            return obj.model_dump()
        elif isinstance(obj, dict):
            return {k: make_serializable(v) for k, v in obj.items()}
        else:
            return obj

    serializable_state = make_serializable(dict(state))

    with open(file_path, "w", encoding="utf-8") as f:
        json.dump(serializable_state, f, indent=4)

    print(f"Results saved to {file_path}")


In [3]:
# from datasets import load_dataset

# rotowire = load_dataset("mrm8488/rotowire-sbnation")
# turku_hockey = datasets.load_dataset('GEM/turku_hockey_data2text')
# totto = datasets.load_dataset('GEM/totto')
# sportsett_basketball = datasets.load_dataset('GEM/sportsett_basketball')
# webnlg = datasets.load_dataset('GEM/web_nlg', 'en')
# conversational_weather = datasets.load_dataset('GEM/conversational_weather')
# dart = datasets.load_dataset('GEM/dart')
# mlb = datasets.load_dataset('GEM/mlb_data_to_text')

In [4]:
name = "dart"  # Change this to the dataset you want to load
num = -10 # Sample id

data = load_dataset_by_name(name)
sample = extract_example(name, data["validation"][num])
data = sample.get('input', '')
ground_truth = sample.get('references', "")
target = sample.get('target', "")

query = f"""You are an agent designed to generate text from data for a data-to-text natural language generation. You can be provided data in the form of xml, table, meaning representations, graphs etc. 
Your task is to generate the appropriate text given the data information without omitting any field or adding extra information in essence called hallucination.
Here is the data generate text using table data:
{data}"""

print(f"Input: {data}")
print(f"Target: {target}")
print(f"Ground Truth: {ground_truth}")

Loading dataset: dart
Input: [['Wildwood', 'eatType', 'coffee shop'], ['Wildwood', 'food', 'English'], ['Wildwood', 'priceRange', 'less than £20'], ['Wildwood', 'customer rating', 'low'], ['Wildwood', 'near', 'Ranch']]
Target: Wildwood is a coffee shop prices are less than £20 serving English food near the Ranch with a low customer rating
Ground Truth: ['Wildwood is a coffee shop prices are less than £20 serving English food near the Ranch with a low customer rating', 'located near Ranch Wildwood coffee shop serves English food with a low customer rating prices are less than £20']


In [18]:
provider = "openai" #ollama, openai, hf, aixplain
add_plan = False # True or False
process_flow = build_agent_workflow(provider=provider)
# display(Image(process_flow.get_graph(xray=True).draw_mermaid_png(draw_method=MermaidDrawMethod.PYPPETEER)))
# display(Image(process_flow.get_graph(xray=True).draw_mermaid_png()))

In [19]:
initial_state = {
    "user_prompt": query,
    "raw_data": data,
    "history_of_steps": [],
    "final_response": "",
    "next_agent": "",
    "next_agent_payload": "",
    "current_step": 0,
    "iteration_count": 0,
    "max_iteration": 60,
}

state = process_flow.invoke(initial_state, config={"recursion_limit": initial_state["max_iteration"]})
prediction = state['final_response']

HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"




[1m> Entering new AgentExecutor chain...[0m


HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


[32;1m[1;3m{
  "action": "Final Answer",
  "action_input": "<coffee_shop>\n  <name>Wildwood</name>\n  <eat_type>coffee shop</eat_type>\n  <food>English</food>\n  <price_range>less than £20</price_range>\n  <customer_rating>low</customer_rating>\n  <near>Ranch</near>\n</coffee_shop>"
}[0m

[1m> Finished chain.[0m


HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


INSPECTOR: CORRECT


HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"




[1m> Entering new AgentExecutor chain...[0m


HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


[32;1m[1;3m```json
{
  "action": "Final Answer",
  "action_input": "<snt>The coffee shop named Wildwood serves English food.</snt><snt>It has a price range of less than £20.</snt><snt>The customer rating is low.</snt><snt>It is located near Ranch.</snt>"
}
```[0m

[1m> Finished chain.[0m


HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


INSPECTOR: CORRECT


HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"




[1m> Entering new AgentExecutor chain...[0m


HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: True (cuda), used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


[32;1m[1;3m```
{
  "action": "Final Answer",
  "action_input": "The coffee shop named Wildwood serves English food, has a price range of less than £20, is located near Ranch, and has a low customer rating."
}
```[0m

[1m> Finished chain.[0m


/home/chinonso/anaconda3/envs/lang2/lib/python3.10/site-packages/pytorch_lightning/trainer/setup.py:177: GPU available but not used. You can set it by doing `Trainer(accelerator='gpu')`.
Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00,  7.79it/s]


Metric Evaluation Result: 0.248


HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


INSPECTOR: CORRECT


HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


AGGREGATOR: The coffee shop named Wildwood serves English food, has a price range of less than £20, is located near Ranch, and has a low customer rating.


In [7]:
save_result_to_json(state, filename=f"{name}_{num}.json")

Results saved to results/dart_-10.json


In [8]:
prediction

'Wildwood is a coffee shop that serves English food, with a price range of less than £20. However, it has a low customer rating and is located near Ranch.'

In [9]:
ground_truth

['Wildwood is a coffee shop prices are less than £20 serving English food near the Ranch with a low customer rating',
 'located near Ranch Wildwood coffee shop serves English food with a low customer rating prices are less than £20']

In [10]:
target

'Wildwood is a coffee shop prices are less than £20 serving English food near the Ranch with a low customer rating'