# Agent v2

Define two tools for the new agent: one retrieves a brief biographical summary of a person from Wikipedia, and the other fetches the current president of a given country using a Wikidata SPARQL query.


In [5]:
from langchain_core.tools import tool
import requests
from utils import *

# Define the tools for the agent to use, it is necessary to specify that each function is a tool
@tool
def get_summary_of(person: str) -> str:
    """Fetches a short biographical summary of the specified person from Wikipedia.

    Args:
        person: The name of the person to search for (e.g., 'Ada Lovelace', 'Albert Einstein').

    Returns:
        str: A brief summary of the person based on Wikipedia content.

    Raises:
        Exception: If there is an error fetching or processing the Wikipedia response.
    """

    try:
        title = person.replace(" ", "_")
        url = f"https://en.wikipedia.org/api/rest_v1/page/summary/{title}"
        response = requests.get(url, timeout=10,verify=False)
        response.raise_for_status()
        data = response.json()

        return data.get("extract", "No summary available.")
    
    except Exception as e:
        return f"Error: {e}"


@tool
def get_current_president_of(country: str) -> str:
    """Fetches the name of the current president of the specified country using Wikidata.

    Args:
        country: The name of the country to query (e.g., 'France', 'Argentina').

    Returns:
        str: The name of the current president of the country.

    Raises:
        Exception: If there is an error retrieving or parsing the data from Wikidata.
    """

    try:
        # Step 1: Get Wikidata ID for the country
        search_url = "https://www.wikidata.org/w/api.php"
        search_params = {
            "action": "wbsearchentities",
            "search": country,
            "language": "en",
            "format": "json",
            "type": "item"
        }
        search_resp = requests.get(search_url, params=search_params, timeout=10,verify=False)
        search_resp.raise_for_status()
        search_data = search_resp.json()
        entity_id = search_data["search"][0]["id"]

        # Step 2: Query president (P35) of the country entity
        sparql_url = "https://query.wikidata.org/sparql"
        query = f"""
        SELECT ?presidentLabel WHERE {{
          wd:{entity_id} wdt:P35 ?president.
          SERVICE wikibase:label {{ bd:serviceParam wikibase:language "en". }}
        }}
        """
        headers = {"Accept": "application/sparql-results+json"}
        sparql_resp = requests.get(sparql_url, params={"query": query}, headers=headers, timeout=10,verify=False)
        sparql_resp.raise_for_status()
        result = sparql_resp.json()
        bindings = result["results"]["bindings"]

        if not bindings:
            return "President not found."
        return bindings[0]["presidentLabel"]["value"]

    except Exception as e:
        return f"Error: {e}"


Initialise a local LLM and create a ReAct agent configured to answer questions by calling tools that fetch the current president of a country or a short summary of a person from external sources.

In [6]:
from langchain_ollama.chat_models import ChatOllama
from langgraph.prebuilt import create_react_agent

# Instanciamos el modelo LLM local usando Ollama (Llama 3.2)
llm = ChatOllama(
    model="llama3.2",   # Usamos el modelo Llama 3.2 local
    temperature=0
)

# Vinculamos nuestra herramienta al LLM
tools = [get_current_president_of,get_summary_of]
llm_with_tools = llm.bind_tools(tools)

agent = create_react_agent(
    model=llm_with_tools,
    tools=[get_current_president_of,get_summary_of],
    prompt = """
You are a ReAct agent. You must use tools to answer questions — do not assume you know any answer beforehand.


If the question is like "Who is the president of COUNTRY?":
1) Call the tool `get_current_president_of` with the country's name.
2) Use the tool's output as your answer.

If the question is like "Give me a summary of PERSON":
1) Call the tool `get_summary_of` with the person's name.
2) Use the tool output to generate a natural, fluent summary.
3) Your answer must be factually faithful to the tool output — do not invent or include information from outside sources.

If the question is like "Give me a summary of the president of COUNTRY":
1) First, call the tool `get_current_president_of` using the country name.
2) Then, take the returned name (the president's name) as a string.
3) Call the tool `get_summary_of` with that name.
4) Finally, use the result to write a fluent, factual summary based strictly on the tool output.

IMPORTANT:
- Do not use the name of the tool as input.
- Only use the actual content returned by the tool (the person's name).

Example:
- Question: "Give me a summary of Barack Obama?"
- Tool output: 'Barack Hussein Obama II is an American politician who was the 44th president of the United States from 2009 to 2017. A member of the Democratic Party, he was the first African American president in American history. Obama previously served as a U.S. senator representing Illinois from 2005 to 2008 and as an Illinois state senator from 1997 to 2004.'
- Your response: "Barack Obama was the 44th president of the United States, serving from 2009 to 2017. He was the first African American to hold this office and a member of the Democratic Party."
 
Example 2:
- Question: "Give me a summary of the president of Germany."
- First tool call: `get_current_president_of("Germany")`
- First tool output: "Frank-Walter Steinmeier"
- Second tool call: `get_summary_of("Frank-Walter Steinmeier")`
- Second tool output: "Frank-Walter Steinmeier is a German politician serving as President of Germany since 2017. He previously served twice as Minister for Foreign Affairs and as Chief of the Federal Chancellery. He is a member of the Social Democratic Party of Germany."
- Your response: "Frank-Walter Steinmeier is a German politician who has been President of Germany since 2017. He has also served as Minister for Foreign Affairs and is a member of the Social Democratic Party."

Don't call two tools at the same time — first call one, then the next.
IMPORTANT: Your final answer must only include facts that are *explicitly present* in the tool output. Do not add anything, even if you know it is true. Do not infer, expand, or include details from memory.

Make sure your response is accurate, relevant, and based only on the tool result.
Always make sure to wait for the result of each tool call before using its output in the next step. Do not hardcode or reuse tool names as strings. Use only the content returned by the tool as input for further reasoning or tool calls.

"""

)


Define the agent's state and build a simple graph with a single assistant node. The graph starts by invoking the agent and ends after the assistant provides its response.

In [7]:
from langchain_core.messages import AnyMessage
from langgraph.graph.message import add_messages
from typing import Annotated
from typing_extensions import TypedDict
from langchain.schema.messages import AIMessage,ToolMessage,HumanMessage
from langgraph.graph import START, END, StateGraph

# 1) State
class GraphState(TypedDict):
    messages: Annotated[list[AnyMessage], add_messages]

def assistant(state: GraphState):
    result = agent.invoke({"messages": state["messages"]})
    new_msgs = result["messages"]
    return {"messages": state["messages"] + new_msgs}

#Building the graph for the agent
builder = StateGraph(GraphState)
builder.add_node("assistant", assistant)

builder.add_edge(START, "assistant")                         
builder.add_edge("assistant", END)                          

react_graph = builder.compile()


Load a set of questions from a JSON file and compute ToolCallAccuracy for each turn and saves the results to a JSON file.


In [8]:
import json
with open("qa_dataset.json", "r") as f:
    qa_dataset = json.load(f)
    
results=await tool_evaluation(react_graph,qa_dataset,"Results/tools_v2.json")



Saved evaluation results to Results/tools_v2.json


Run the agent on each, convert the resulting message history into SingleTurnSample format for RAGAS, and store both the conversations and the formatted samples.


In [9]:
ragas_samples = [] 
conv = []           

for q in qa_dataset:
    # Send the question to the agent and get the full message trace
    result = react_graph.invoke({"messages": [HumanMessage(content=q["question"])]})
    
    # Convert messages to OpenAI-compatible format for tool usage evaluation
    messages = fix_tool_calls_for_openai_format(result["messages"])
    
    # Save full conversation for inspection or further analysis
    conv.append(messages)
    
    # Convert to SingleTurnSample for RAGAS and store its dictionary representation
    sample = lc_to_ragas_sample(messages)
    ragas_samples.append(sample.model_dump())




Extract key information from each conversation, including the question, tool call, tool response, and final model reply. Save both the full RAGAS samples and a minimal version of the data for later evaluation or inspection.


In [10]:
import json
from langchain_core.messages import HumanMessage, AIMessage

minimal_data = []

for conversation in conv:
    # Initialise fields to store key elements
    question = None
    last_response = None
    tool_calls = None
    tool_message = None

    for msg in conversation:
        if isinstance(msg, HumanMessage) and question is None:
            # Take the first user message as the question
            question = msg.content
        elif isinstance(msg, AIMessage):
            # Save tool call info if present, otherwise keep the response
            if "tool_calls" in msg.additional_kwargs:
                tool_calls = msg.additional_kwargs["tool_calls"][0]["function"]
            else:
                last_response = msg.content
        elif isinstance(msg, ToolMessage):
            # Save the tool's output message
            tool_message = msg.content

    # Append entry only if question and final response are available
    if question and last_response:
        minimal_data.append({
            "question": question,
            "tool_calls": tool_calls,
            "tool message": tool_message,
            "response": last_response
        })

# Save the dataset for the tool evaluation and for string evaluation
with open("Results/ragas_sample_v2.json", "w", encoding="utf-8") as f:
    json.dump(ragas_samples, f, indent=2, ensure_ascii=False)

with open("Results/conversation_v2.json", "w", encoding="utf-8") as f:
    json.dump(minimal_data, f, indent=2, ensure_ascii=False)


---

## EVALUATION

---


Load previously saved RAGAS-formatted data from a JSON file and reconstruct it as a list of SingleTurnSample objects for evaluation.


In [11]:
from ragas.dataset_schema import SingleTurnSample

with open("Results/ragas_sample_v2.json", "r") as f:
    data = json.load(f)

# Reconstruir lista de SingleTurnSample
samples = [SingleTurnSample(**d) for d in data]


Set up and run the evaluation of all single-turn samples using RAGAS metrics: context precision, faithfulness, and answer relevancy. Store the results in a DataFrame and export them to a CSV file.


In [12]:
from ragas.llms import LangchainLLMWrapper
from ragas.embeddings import LangchainEmbeddingsWrapper
from ragas.metrics import (LLMContextPrecisionWithoutReference,Faithfulness,ResponseRelevancy,)
from langchain.embeddings import HuggingFaceEmbeddings
from langchain_community.llms import Ollama
import asyncio
import pandas as pd


local_llm = Ollama(model="mistral", temperature=0, timeout=60000)
wrapped_llm = LangchainLLMWrapper(local_llm)

hf_embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
ragas_embeddings = LangchainEmbeddingsWrapper(hf_embeddings)

# Metrics
metrics = {
    "context_precision_no_ref": LLMContextPrecisionWithoutReference(llm=wrapped_llm),
    "faithfulness": Faithfulness(llm=wrapped_llm),
    "answer_relevancy": ResponseRelevancy(llm=wrapped_llm, embeddings=ragas_embeddings),
}

results = asyncio.run(evaluate_all_safe(samples,metrics))

df_results = pd.DataFrame(results)

df_results.to_csv("Results/results_v2.csv",index=False)

  local_llm = Ollama(model="mistral", temperature=0, timeout=60000)
  hf_embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
