In [1]:
import os
import json
from typing import List, Literal, Optional, Text, Union, Dict, Any
from IPython.display import Image, display
from langchain_core.runnables.graph_mermaid import MermaidDrawMethod
from agents.agents_modules.workflow import build_agent_workflow, workers_dict
from agents.dataloader import load_dataset_by_name, extract_example

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
name = "totto"  # Change this to the dataset you want to load
n = 249 # Sample id

data = load_dataset_by_name(name)
sample = extract_example(name, data["train"][n])
data = sample['input']
ground_truth = sample['references']
target = sample['target']

query = f"""You are an agent designed to generate text from data for a data-to-text natural language generation. You can be provided data in the form of xml, table, meaning representations, graphs etc. 
Your task is to generate the appropriate text given the data information without omitting any field or adding extra information in essence called hallucination.
Here is the data generate text using table data:
{data}"""

initial_state = {
    "input": query,
    "plan": [],
    "result_steps": [],
    "response": "",
    "chat_history": [],
    "next": "",
    "next_input": "",
    "agent_outcome": "",
    "current_step": 0,
    "team_iterations": 0,
    "recursion_limit": 30,
}

print(f"Input: {data}")
print(f"Target: {target}")
print(f"Ground Truth: {ground_truth}")

Loading dataset: totto
Input: <page_title> Liza Lapira </page_title> <section_title> Filmography </section_title> <table> <cell> 2009 <col_header> Year </col_header> </cell> <cell> Fast & Furious <col_header> Title </col_header> </cell> <cell> Sophie Trinh <col_header> Role </col_header> </cell> <cell> 2010 <col_header> Year </col_header> </cell> <cell> Repo Men <col_header> Title </col_header> </cell> <cell> Alva <col_header> Role </col_header> </cell> </table>
Target: In 2009, Liza Lapira played Sophie Trinh in the film Fast & Furious and Alva in the 2010 film Repo Men.
Ground Truth: ['In 2009, Liza Lapira played Sophie Trinh in the film Fast & Furious and Alva in the 2010 film Repo Men.']


In [None]:
provider = "openai" #ollama, openai, hf, aixplain
process_flow = build_agent_workflow(add_plan=True, workers_dict=workers_dict, provider=provider)
# display(Image(process_flow.get_graph(xray=True).draw_mermaid_png(draw_method=MermaidDrawMethod.PYPPETEER)))
# display(Image(process_flow.get_graph(xray=True).draw_mermaid_png()))

In [4]:
state = process_flow.invoke(initial_state, config={"recursion_limit": initial_state["recursion_limit"]})
prediction = state['response']

ORCHESTRATOR OUTPUT:
Thought: The user has provided structured data about Liza Lapira's filmography in a tabular format. This includes the titles of films she appeared in, the year of release, and the roles she played. I will start with the 'content ordering' worker to gather and arrange the relevant fields in the correct order for our final output.

Worker: content ordering
Worker Input: 
- Original input data: 
  `<page_title> Liza Lapira </page_title> <section_title> Filmography </section_title> <table> <cell> 2009 <col_header> Year </col_header> </cell> <cell> Fast & Furious <col_header> Title </col_header> </cell> <cell> Sophie Trinh <col_header> Role </col_header> </cell> <cell> 2010 <col_header> Year </col_header> </cell> <cell> Repo Men <col_header> Title </col_header> </cell> <cell> Alva <col_header> Role </col_header> </cell> </table>`
- Current context: Focus on arranging the filmography data extracted from the table, noting the years, titles, and roles. 

Ordered Fields:
1.

In [5]:
prediction

'Liza Lapira\'s filmography includes the following notable roles: In 2009, she portrayed Sophie Trinh in the film "Fast & Furious." The following year, in 2010, she played the role of Alva in the movie "Repo Men."'

In [6]:
ground_truth

['In 2009, Liza Lapira played Sophie Trinh in the film Fast & Furious and Alva in the 2010 film Repo Men.']

In [7]:
llm.id

NameError: name 'llm' is not defined

In [None]:
from agents.evaluator import evaluate_single

scores = evaluate_single(ground_truth, prediction, data)
for metric, score in scores.items():
    print(f"{metric}: {score:.4f}")

Fetching 5 files: 100%|██████████| 5/5 [00:00<00:00, 91980.35it/s]
Lightning automatically upgraded your loaded checkpoint from v1.8.3.post1 to v2.5.1.post0. To apply the upgrade to your files permanently, run `python -m pytorch_lightning.utilities.upgrade_checkpoint ../.cache/huggingface/hub/models--Unbabel--wmt22-comet-da/snapshots/2760a223ac957f30acfb18c8aa649b01cf1d75f2/checkpoints/model.ckpt`
Encoder model frozen.
/home/chinonso/anaconda3/envs/lang2/lib/python3.10/site-packages/pytorch_lightning/core/saving.py:195: Found keys that are not in the model state dict but in the checkpoint: ['encoder.model.embeddings.position_ids']
The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. 
The tokenizer class you load from this checkpoint is 'BleurtSPTokenizer'. 
The class this function is called from is 'BertTokenizer'.
Xet Storage is enabled for this repo, but the 'hf_xet' package is not 

RuntimeError: CUDA error: out of memory
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.


In [None]:
import os
import json

dir_path = "results"
file_path = os.path.join(dir_path, "result.json")

os.makedirs(dir_path, exist_ok=True)

if os.path.isdir(file_path):
    raise IsADirectoryError(f"Cannot write to '{file_path}' because it is a directory.")

# Recursively convert objects to serializable types
def make_serializable(obj):
    if isinstance(obj, list):
        return [make_serializable(x) for x in obj]    

    elif hasattr(obj, "model_dump"):  # Pydantic BaseModel
        return obj.model_dump()
    elif isinstance(obj, dict):
        return {k: make_serializable(v) for k, v in obj.items()}
    else:
        return obj

serializable_state = make_serializable(dict(state))

with open(file_path, "w") as f:
    json.dump(serializable_state, f, indent=4)


In [None]:
import os
print("/home/chinonso/Data-to-text-Agent", '\n',os.listdir("/home/chinonso/Data-to-text-Agent"))
print("/home/chinonso/Data-to-text-Agent/agents", '\n', os.listdir("/home/chinonso/Data-to-text-Agent/agents"))

/home/chinonso/Data-to-text-Agent 
 ['lang2.txt', 'image.png', 'workflow.ipynb', 'agents_modules_', 'flow.ipynb', '.git', 'README.md', 'requirements_lang_mac.txt', 'somto.txt', 'results', 'agents', 'requirements_lang2_mac.txt', 'smt.txt', 'requirements.txt', 'Rotowire.ipynb', 'LICENSE']
/home/chinonso/Data-to-text-Agent/agents 
 ['dataloader.py', 'llm_model.py', 'evaluator.py', 'agents_modules', 'agent_prompts.py', '__pycache__', '__init__.py', 'utilities']


In [None]:
workers_list = [key for key, value in workers.items()]

# ——— Build your StateGraph ———
agent_workflow = StateGraph(StageExecute)

# 1) planner node
# agent_workflow.add_node("planner", Planner.run_model(Planner.create_model()))

# 2) orchestrator node
agent_workflow.add_node(
    "orchestrator",
    Orchestrator.run_model(Orchestrator.create_model(), workers=workers_list)
)

# 3) worker nodes
#    here tools can be [] or any list of LangChain tools you have;
#    initial_query is whatever you're passing in as the user input.
tools = []
initial_query = StageExecute["input"]  # or whatever your entry payload is
worker_names = add_workers(workers, agent_workflow, tools, initial_query)
agent_workflow.add_node("inspector", Inspector.run_model(Inspector.create_model()))
agent_workflow.add_node("evaluator", Inspector.run_model(Inspector.create_model()))
agent_workflow.add_node("aggregator", ResponseAggregator.run_model(ResponseAggregator.create_model()))


# 4) wire up the edges
#  start → planner → orchestrator
# agent_workflow.add_edge(START, "planner")
# agent_workflow.add_edge("planner", "orchestrator")
agent_workflow.add_edge(START, "orchestrator")

#  orchestrator uses the "next" field in state to pick which worker to run
#  we map each worker name to itself, and FINISH to END
conditional_map = {name: name for name in worker_names}
conditional_map["inspector"] = "inspector"  # Add inspector explicitly

agent_workflow.add_conditional_edges(
    "orchestrator",
    lambda state: state["next"],
    conditional_map
)

#  after a worker runs, end the graph
for name in worker_names:
    agent_workflow.add_edge(name, "inspector")
    
agent_workflow.add_conditional_edges("inspector", transition_after_inspection)
agent_workflow.add_edge("evaluator", "inspector")
agent_workflow.add_edge("aggregator", END)

#  set the entry point
# agent_workflow.set_entry_point("planner")
agent_workflow.set_entry_point("orchestrator")

#  compile into a runnable app
process_flow = agent_workflow.compile()

display(Image(process_flow.get_graph(xray=True).draw_mermaid_png()))

NameError: name 'workers' is not defined

In [None]:
# pip install --pre torch --index-url https://download.pytorch.org/whl/nightly/cu128

In [None]:
# from datasets import load_dataset

# rotowire = load_dataset("mrm8488/rotowire-sbnation")
# turku_hockey = datasets.load_dataset('GEM/turku_hockey_data2text')
# totto = datasets.load_dataset('GEM/totto')
# basketball = datasets.load_dataset('GEM/sportsett_basketball')
# webnlg = datasets.load_dataset('GEM/web_nlg', 'en')
# weather = datasets.load_dataset('GEM/conversational_weather')
# dart = datasets.load_dataset('GEM/dart')
# mlb = datasets.load_dataset('GEM/mlb_data_to_text')

In [None]:
# from langgraph.graph import START, END, StateGraph
# from utils import StageExecute, Agent, AgentExecuteInput
# from IPython.display import Image, SVG, display
# from typing import List, Literal, Optional, Text, Union, Dict, Any

# class Planner:
#     def create_model(self, task):
#         # Logic to generate plans
#         pass
#     @classmethod
#     def run_model(cls,):

#         pass

# class Orchestrator:
#     def create_model(self, task):
#         # Logic to generate plans
#         pass
#     @classmethod
#     def run_model(cls,):

#         pass

# class Worker:
#     def create_model(self, task):
#         # Logic to generate plans
#         pass
#     @classmethod
#     def run_model(cls,):

#         pass

# class Inspector:
#     def create_model(self, task):
#         # Logic to generate plans
#         pass
#     @classmethod
#     def run_model(cls,):

#         pass

# class ResponseAggregator:
#     def create_model(self, task):
#         # Logic to generate plans
#         pass
#     @classmethod
#     def run_model(cls,):

#         pass
    
# def transition_after_inspection(state: StageExecute) -> Literal["orchestrator", "aggregator"]:
#     """Decides the next stage after inspection based on task correctness."""
#     if "aggregator" in state and state["aggregator"]:
#         return "aggregator"
#     else:
#         return "orchestrator"

In [None]:
# workers = ["worker1", "worker2"]  # Example workers
# conditional_map = {worker: "inspector" for worker in workers}

# agent_workflow = StateGraph(StageExecute)
# agent_workflow.add_node("planner", Planner.run_model)
# agent_workflow.add_node("orchestrator", Orchestrator.run_model)
# agent_workflow.add_node("worker", Worker.run_model)
# agent_workflow.add_node("inspector", Inspector.run_model)
# agent_workflow.add_node("aggregator", ResponseAggregator.run_model)

# agent_workflow.add_edge(START, "planner")
# agent_workflow.add_edge("planner", "orchestrator")
# agent_workflow.add_edge("orchestrator", "worker")
# agent_workflow.add_edge("worker", "inspector")
# agent_workflow.add_conditional_edges("inspector", transition_after_inspection)
# agent_workflow.add_edge("aggregator", END)

# process_flow = agent_workflow.compile()

# display(Image(process_flow.get_graph(xray=True).draw_mermaid_png()))
# # display(SVG(process_flow.get_graph(xray=True).draw_mermaid_png()))

In [None]:
# # workers = ["worker1", "worker2"]  # Example workers
# conditional_map = {worker: "inspector" for worker in workers.keys()}

# def add_workers(tools= , query= , task_name):
# for task_name, task_description in workers.items():
    

# agent_workflow = StateGraph(StageExecute)
# agent_workflow.add_node("planner", Planner.run_model(Planner.create_model()))
# agent_workflow.add_node("orchestrator", Orchestrator.run_model(Orchestrator.create_model(), workers=workers_list))
# agent_workflow.add_node("worker", Worker.run_model(Worker.create_model(tools=workers_list, query=StageExecute["input"]), task_name=))
# # agent_workflow.add_node("inspector", Inspector.run_model)
# # agent_workflow.add_node("aggregator", ResponseAggregator.run_model)

# agent_workflow.add_edge(START, "planner")
# agent_workflow.add_edge("planner", "orchestrator")
# agent_workflow.add_edge("orchestrator", "worker")
# agent_workflow.add_edge("worker", END)
# # agent_workflow.add_edge("worker", "inspector")
# # # agent_workflow.add_edge("inspector", "orchestrator")
# # agent_workflow.add_conditional_edges("inspector", transition_after_inspection)
# # agent_workflow.add_edge("aggregator", END)

# process_flow = agent_workflow.compile()

# # display(Image(process_flow.get_graph(xray=True).draw_mermaid_png()))

In [None]:
# workers = {
#     "content selection": (
#         "The Content Selection agent is responsible for analyzing the raw input data, identifying the key "
#         "elements and relevant features that will form the backbone of the final text output. This agent "
#         "meticulously scans through tables, XML, or any structured data provided, and it distinguishes between "
#         "important information and peripheral details. Its tasks include filtering out noise, determining "
#         "salient content based on context, and ensuring that no essential data points are missed. The output "
#         "from this agent provides a curated list of information that will serve as the foundation for generating "
#         "a coherent and accurate text narrative."
#     ),
#     "text structuring": (
#         "The Text Structuring agent is tasked with taking the curated content from the Content Selection agent "
#         "and organizing it into a fluid and engaging narrative. This agent focuses on the arrangement, flow, "
#         "and logical segmentation of the text to ensure that the final output is not only accurate but also easily "
#         "comprehensible and well-structured. It works by mapping out the overall framework of the narrative, "
#         "allocating content into sections, and applying linguistic transformations that enhance readability. "
#         "Its work ensures that the text is not a mere aggregation of data points, but a well-crafted piece "
#         "that reflects both the detailed insights of the data and the clarity required for a high-quality narrative."
#     ),
#     "surface realization": (
#         "The Surface Realization agent is responsible for generating fluent and grammatically correct natural language "
#         "text from the structured and organized content provided by the previous stages. It combines the selected content "
#         "and structured layout to form complete sentences, using appropriate lexical choices, syntactic constructions, "
#         "and discourse markers. This agent ensures that the final text adheres to linguistic norms, maintains coherence, "
#         "and reads naturally as if written by a human."
#     )
# }