### **Load the metadata.jsonl**

In [1]:
import json
import random
import pathlib

qa = [
    json.loads(line) for line in pathlib.Path("metadata.jsonl").read_text().splitlines()
]
few_shots = random.sample(qa, 3)

few_shots

[{'task_id': '8f80e01c-1296-4371-9486-bb3d68651a60',
  'Question': 'Using bass clef notes, what is the age of someone who has experienced the word spelled out in the sheet music by the note letters the total number of lines and notes minus the number of notes on lines in the image?',
  'Level': 2,
  'Final answer': '90',
  'file_name': '8f80e01c-1296-4371-9486-bb3d68651a60.png',
  'Annotator Metadata': {'Steps': '1. Open the file.\n2. Translate the letters to bass notes ("D E C A D E").\n3. Count the lines (5).\n4. Count the notes (6).\n5. Count the notes on lines (2).\n6. Add the lines and notes (11).\n7. Subtract the notes on lines (11 - 2).\n8. Multiply 10 by 9 (90).\n9. Note the age given.',
   'Number of steps': '9',
   'How long did this take?': '5 minutes',
   'Tools': '1. Image recognition\n2. Bass note data\n3. Calculator',
   'Number of tools': '3'}},
 {'task_id': '6b078778-0b90-464d-83f6-59511c811b01',
  'Question': "The Metropolitan Museum of Art has a portrait in its colle

### **Define the Tools**

In [2]:
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.schema import Document

embed = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")

docs = [
    Document(
        page_content=f"Q: {q['Question']}\nA: {q['Final answer']}",
        metadata={"id": q["task_id"]},
    )
    for q in qa
]

vstore = FAISS.from_documents(docs, embed)
retriever = vstore.as_retriever(k=3)

  embed = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
  from .autonotebook import tqdm as notebook_tqdm


### **Define the tools**

In [3]:
from langchain.tools.retriever import create_retriever_tool

similar_q_tool = create_retriever_tool(
    retriever,
    name="similar_questions",
    description="Return similar solved GAIA level-1 Q&A pairs",
)

### **Craft the system prompt**

In [4]:
from constants import SYSTEM_PROMPT

for ex in few_shots:
    SYSTEM_PROMPT += f"\nQ: {ex['Question']}\nA: {ex['Final answer']}"

print(f"System Prompt: {SYSTEM_PROMPT}")

System Prompt: 
You are a general AI assistant. I will ask you a question. Report your thoughts, and finish your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER].
YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings.
If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise.
If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise.
If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.

Q: Using bass clef notes, what is the age of someone who has experienced the word spelled out in the sheet music by the note letters the total number of lines and notes minus the number of notes on lines in the image?
A: 90
Q: The Metropolitan Museum of A

### **Choose an LLM Backend**

- Quick & free: gemini-1.5-flash (as in notebook)

- Familiar: gpt-4o via OpenAI

- Local: mistral-7b-instruct with ollama for zero cost


In [5]:
from langchain.chat_models import ChatOpenAI
import os

os.environ["OPENAI_API_KEY"]
llm = ChatOpenAI(model="gpt-4o", temperature=0)

  llm = ChatOpenAI(model="gpt-4o", temperature=0)


### **Wire it all together with Langgraph**

In [6]:
from langgraph.graph import MessagesState, START, StateGraph
from langgraph.prebuilt import tools_condition, ToolNode
from langchain_core.messages import SystemMessage


def assistant(state: MessagesState):
    return {
        "messages": [
            llm.invoke([SystemMessage(content=SYSTEM_PROMPT)] + state["messages"])
        ]
    }


builder = StateGraph(MessagesState)
builder.add_node("assistant", assistant)
builder.add_node("tools", ToolNode([similar_q_tool]))  # add web tools later
builder.add_edge(START, "assistant")
builder.add_conditional_edges("assistant", tools_condition)
builder.add_edge("tools", "assistant")

agent_graph = builder.compile()

In [None]:
def solve(question: str) -> str:
    out = agent_graph.invoke({"messages": [HumanMessage(content=question)]})
    return out["messages"][-1].content.strip()

### **Hook up the GAIA evaluation API**

In [None]:
import requests, os

API = "https://GAIA_API_URL"  # replace


def evaluate():
    qs = requests.get(f"{API}/questions").json()
    answers = [
        {"task_id": q["id"], "submitted_answer": solve(q["question"])} for q in qs
    ]

    payload = {
        "username": "enricozan",
        "agent_code": "https://huggingface.co/spaces/enricozan/gaia-ai-agent/tree/main",
        "answers": answers,
    }
    r = requests.post(f"{API}/submit", json=payload)
    print("Leaderboard response:", r.text)


### **Wrap it in Gradio for your Space**

In [None]:
import gradio as gr

with gr.Blocks() as demo:
    q = gr.Textbox(label="GAIA question")
    a = gr.Textbox(label="Answer")
    btn = gr.Button("Solve")
    btn.click(lambda x: solve(x), inputs=q, outputs=a)
demo.launch()