# Agent 1

Define a dictionary that maps metal names to their corresponding Yahoo Finance ticker symbols for price retrieval using the yfinance library.


In [None]:
# Maps common metal names to their corresponding Yahoo Finance symbols.
METAL_SYMBOLS = {
    "gold": "GC=F",
    "silver": "SI=F",
    "platinum": "PL=F",
    "palladium": "PA=F",
    "copper": "HG=F",
    "aluminum": None,   
    "nickel": None,      
    "zinc": None,      
    "lead": None       
}

Define three tools for the new agent: one extracts metal information from a PDF, another retrieves recent metal prices from Yahoo Finance, and the third fetches a general description of the metal from Wikipedia.


In [None]:
from langchain_core.tools import tool
import requests
import fitz  # PyMuPDF
import re
import yfinance as yf
from utils import *

# Define the tools for the agent to use, it is necessary to specify that each function is a tool
@tool
def get_metal_info(metal_name: str, path: str = "Metals Description 2023.pdf") -> str:
    """
    Returns the description, uses, and price of a specified metal from the PDF.

    Args:
        metal_name (str): Name of the metal to search for.
        path (str): Path to the PDF file.

    Returns:
        str: Information about the metal or an error message.
    """
    try:
        if not path:
            path = "Metals Description 2023.pdf"
        doc = fitz.open(path)
        full_text = ""
        for page in doc:
            full_text += page.get_text()
        doc.close()

        pattern = re.compile(
            r"(?P<name>[A-Za-z]+)\s*Description:\s*(?P<desc>.*?)\s*Industrial Uses:\s*(?P<uses>.*?)\s*2023 Price:\s*(?P<price>[\d\.]+)\s*USD per gram",
            re.DOTALL
        )

        metals = {}
        for match in pattern.finditer(full_text):
            name = match.group("name").strip().lower()
            metals[name] = {
                "description": match.group("desc").strip(),
                "uses": match.group("uses").strip(),
                "price": match.group("price").strip()
            }

        key = metal_name.strip().lower()
        if key not in metals:
            return f"No information found for metal '{metal_name}'."

        info = metals[key]
        return (
            f"{metal_name.capitalize()}\n"
            f"- Description: {info['description']}\n"
            f"- Industrial Uses: {info['uses']}\n"
            f"- 2023 Price: {info['price']} USD/gram"
        )

    except Exception as e:
        return f"Error processing PDF: {str(e)}"


@tool
def get_metal_price_yfinance(metal_name: str) -> str:
    """
    Gets the latest closing price of a metal from Yahoo Finance.

    Args:
        metal_name (str): One of 'gold', 'silver', 'platinum', 'palladium', 'copper'.

    Returns:
        str: Price with date, or error message.
    """
    try:
        symbol = METAL_SYMBOLS.get(metal_name.lower())
        if not symbol:
            return f"Metal '{metal_name}' not supported."
        ticker = yf.Ticker(symbol)
        hist = ticker.history(period="5d")
        if hist.empty:
            return f"No price data available for {metal_name}."
        
        last_price = hist["Close"].iloc[-1]
        last_date = hist.index[-1].date()

        return f"The latest {metal_name} price was {last_price:.2f} USD per ounce on {last_date}."
    
    except Exception as e:
        return f"Error retrieving {metal_name} price: {e}"
    
@tool
def describe_metal(metal_name: str) -> str:
    """
    Tries to fetch a description of the metal from Wikipedia.
    Falls back from 'metal_name (metal)' to just 'metal_name' if necessary.
    """
    def fetch_summary(title: str):
        url = f"https://en.wikipedia.org/api/rest_v1/page/summary/{title.replace(' ', '_')}"
        response = requests.get(url, timeout=10,verify=False)
        response.raise_for_status()
        data = response.json()
        return data.get("extract", "No summary found.")
    
    try:
        return fetch_summary(f"{metal_name} (metal)")
    except requests.HTTPError:
        try:
            return fetch_summary(metal_name)
        except Exception as e2:
            return f"Error fetching metal description: {e2}"
    except Exception as e:
        return f"Error fetching metal description: {e}"


Initialise a local LLM and create a ReAct agent specialised in metal-related queries. The agent uses three tools to retrieve technical information from a PDF, current market prices from Yahoo Finance, or general descriptions from Wikipedia, following a structured reasoning prompt.


In [None]:
from langchain_ollama.chat_models import ChatOllama
from langgraph.prebuilt import create_react_agent

# Create de model
llm = ChatOllama(
    model="llama3.2",   
    temperature=0
)

# Link the tools to the LLM
tools = [get_metal_info,describe_metal,get_metal_price_yfinance]
llm_with_tools = llm.bind_tools(tools)

agent = create_react_agent(
    model=llm_with_tools,
    tools=tools,
    prompt= """

You are a ReAct agent specialized in answering questions about metals.
You have access to the following tools:

1. `get_metal_info(metal_name)`: Retrieves a technical description, industrial uses, and the 2023 reference price for a specific metal from a local PDF document.
2. `get_metal_price_yfinance(metal_name)`: Retrieves the most recent market price (USD/ounce) of common metals like gold, silver, platinum, palladium, and copper.
3. `describe_metal(metal_name)`: Provides a general encyclopedic description of a metal from Wikipedia.

You must follow this step-by-step reasoning:

1. First, identify exactly which tool matches the user's request.
    - Use `get_metal_info` if the question refers to "the document", "technical data", "description", "uses", or "2023 price".
    - Use `get_metal_price_yfinance` if the question is about current or market price.
    - Use `describe_metal` only if the user is asking for general knowledge.

2. Call the tool.

3. Then summarize or quote the tool output explicitly in your final answer. Do not invent information. Do not skip this step.

Your answers must be clear and informative. Do not write anything until the tool has responded. Always base your answers on the tool output.
"""

)



Set up the agent's graph structure with a conditional transition: the assistant node runs first, and the graph ends only if the last AI message does not contain a tool call. Otherwise, the assistant continues reasoning.


In [None]:
from langgraph.prebuilt import ToolNode
from langgraph.graph import START, END, StateGraph
from langchain_core.messages import AnyMessage
from langgraph.graph.message import add_messages
from typing import Annotated
from typing_extensions import TypedDict
from langchain.schema.messages import AIMessage,ToolMessage,HumanMessage

# 1) State
class GraphState(TypedDict):
    messages: Annotated[list[AnyMessage], add_messages]

def assistant(state: GraphState):
    result = agent.invoke({"messages": state["messages"]})
    new_msgs = result["messages"]
    return {"messages": state["messages"] + new_msgs}

#Building the graph for the agent
builder = StateGraph(GraphState)
builder.add_node("assistant", assistant)

builder.add_edge(START, "assistant")                         
builder.add_edge("assistant", END)            

react_graph = builder.compile()

Execute a predefined list of test questions through the agent, then format each conversation into RAGAS-compatible samples and store the raw message histories for later inspection or evaluation.


In [None]:
# Sample questions about metal descriptions and prices
test_questions = [
    ("Describe gold"),
    ("What does the document say about copper?"),
    ("What does the PDF say about the industrial uses of copper?"),
    ("Can you give me the description and price of silver from the document?"),
    ("What does the document mention about palladium's applications?"),
    ("What’s the current market price of gold?"),
    ("How much is silver trading at today?"),
    ("Can you check the latest price of copper and describe it?"),
    ("What is platinum and what are its main characteristics?"),
    ("What's the technical description of silver")
]

ragas_samples = []  # RAGAS-ready samples
conv = []           # Full message traces

for q in test_questions:
    result = react_graph.invoke({"messages": [HumanMessage(content=q)]})
    messages = fix_tool_calls_for_openai_format(result["messages"])
    conv.append(messages)
    sample = lc_to_ragas_sample(messages)
    ragas_samples.append(sample.model_dump())




Extract key elements from each conversation, including the question, tool call, tool output, and final response and save both the full RAGAS samples and a simplified version of the data for evaluation or manual inspection.


In [None]:
import json
from langchain_core.messages import HumanMessage, AIMessage

minimal_data = []

for conversation in conv:
    # Initialise fields to store key elements
    question = None
    last_response = None
    tool_calls = None
    tool_message = None

    for msg in conversation:
        if isinstance(msg, HumanMessage) and question is None:
            # Take the first user message as the question
            question = msg.content
        elif isinstance(msg, AIMessage):
            # Save tool call info if present, otherwise keep the response
            if "tool_calls" in msg.additional_kwargs:
                tool_calls = msg.additional_kwargs["tool_calls"][0]["function"]
            else:
                last_response = msg.content
        elif isinstance(msg, ToolMessage):
            # Save the tool's output message
            tool_message = msg.content

    # Append entry only if question and final response are available
    if question and last_response:
        minimal_data.append({
            "question": question,
            "tool_calls": tool_calls,
            "tool message": tool_message,
            "response": last_response
        })

# Save full RAGAS samples and minimal conversation data
with open("Results/ragas_sample_v1.json", "w", encoding="utf-8") as f:
    json.dump(ragas_samples, f, indent=2, ensure_ascii=False)

with open("Results/conversation_v1.json", "w", encoding="utf-8") as f:
    json.dump(minimal_data, f, indent=2, ensure_ascii=False)


---

# EVALUATION

---

Load the previously saved RAGAS-formatted samples from a JSON file and reconstruct them as SingleTurnSample objects for evaluation.


In [None]:
from ragas.dataset_schema import SingleTurnSample
import json

# Load saved RAGAS-formatted samples
with open("Results/ragas_sample_v1.json", "r") as f:
    data = json.load(f)

# Rebuild list of SingleTurnSample objects
samples = [SingleTurnSample(**d) for d in data]


Evaluate all single-turn samples using RAGAS metrics, context precision, faithfulness, and answer relevancy, then store the results in a DataFrame and export them as a CSV file for analysis.


In [None]:
from ragas.llms import LangchainLLMWrapper
from ragas.embeddings import LangchainEmbeddingsWrapper
from ragas.metrics import (LLMContextPrecisionWithoutReference,Faithfulness,ResponseRelevancy,)
from langchain.embeddings import HuggingFaceEmbeddings
from langchain_community.llms import Ollama
import asyncio
import pandas as pd


local_llm = Ollama(model="mistral", temperature=0, timeout=60000)
wrapped_llm = LangchainLLMWrapper(local_llm)

hf_embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
ragas_embeddings = LangchainEmbeddingsWrapper(hf_embeddings)

# Metrics
metrics = {
    "context_precision_no_ref": LLMContextPrecisionWithoutReference(llm=wrapped_llm),
    "faithfulness": Faithfulness(llm=wrapped_llm),
    "answer_relevancy": ResponseRelevancy(llm=wrapped_llm, embeddings=ragas_embeddings),
}

results = asyncio.run(evaluate_all_safe(samples,metrics))

df_results = pd.DataFrame(results)

df_results.to_csv("Results/results_v1.csv", index=False)

  local_llm = Ollama(model="mistral", temperature=0, timeout=60000)
  hf_embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
