# Lang Graph Based Agent

In [None]:
# Imports and Constants

from langchain_core.tools import tool
from langchain_core.messages import HumanMessage, SystemMessage
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_community.document_loaders import WikipediaLoader
from langchain_community.tools.tavily_search import TavilySearchResults
from langchain_community.document_loaders import ArxivLoader
from langchain_community.document_loaders import YoutubeLoader
from langgraph.graph import MessagesState, START, StateGraph
from langgraph.prebuilt import ToolNode
from langgraph.prebuilt import tools_condition
from dotenv import load_dotenv
import requests
import random
import re
import time

load_dotenv()


In [None]:
# Tools

@tool
def multiply(a: int, b: int) -> int:
    """Multiply two integers."""
    return a * b

@tool
def add(a: int, b: int) -> int:
    """Add two integers."""
    return a + b

@tool
def subtract(a: int, b: int) -> int:
    """Subtract two integers."""
    return a - b

@tool
def divide(a: int, b: int) -> int:
    """Divide two integers."""
    if b == 0:
        raise ValueError("Cannot divide by zero.")
    return a / b

@tool
def youtube_transcript(video_url: str) -> str:
    """
    Retrieve the transcript (captions) of a YouTube video, if available.
    This tool extracts and returns the full transcript text from the given YouTube video URL. It is helpful for answering questions based on what is said in a video, such as summarizing content or pulling out spoken facts. It does not interpret visual elements, only spoken audio with captions.
    """
    docs = YoutubeLoader.from_youtube_url(video_url).load()
    return "\n".join([doc.page_content for doc in docs])

@tool
def wiki_search(query: str) -> str:
    """Search Wikipedia for a query and return maximum 2 results.
    
    Args:
        query: The search query."""
    print(f"🔍 Tool 'wiki_search' invoked with query: {query}")
    search_docs = WikipediaLoader(query=query, load_max_docs=2).load()
    formatted_search_docs = "\n\n---\n\n".join(
        [
            f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content}\n</Document>'
            for doc in search_docs
        ])
    return {"wiki_results": formatted_search_docs}

@tool
def web_search(query: str) -> str:
    """Search Tavily Web for a query and return maximum 3 results from the web."""
    print(f"🔍 Tool 'web_search' invoked with query: {query}")
    search_tool = TavilySearchResults(max_results=3)
    search_results = search_tool.invoke(input=query)

    formatted = "\n\n---\n\n".join(
        f"<Document source=\"{r.get('source', '')}\"/>\n{r.get('content', '')}\n</Document>"
        for r in search_results
    )
    return formatted


In [None]:
#System Prompt

# Load metadata.jsonl
import json
# Load the metadata.jsonl file
with open('metadata.jsonl', 'r') as jsonl_file:
    json_list = list(jsonl_file)

json_QA = []
for json_str in json_list:
    json_data = json.loads(json_str)
    json_QA.append(json_data)

random_samples = random.sample(json_QA, 3)

system_prompt = """
You are a helpful AI agent. After using tools or reasoning through a question, always return the answer on the last line in the format:
FINAL ANSWER: <answer>

You must reason step-by-step, use available tools when helpful, and produce a precise final answer.

==========================
🔁 ANSWERING STRATEGY
==========================

- Prioritize grounded evidence from tool outputs or the conversation context.
- Do NOT speculate if evidence is missing.
- Use web search if unable to answer using specific tools.

==========================
🧠 REASONING FORMAT
==========================

You must show your thought process, then conclude with this template:

FINAL ANSWER: [A number OR short string OR comma-separated list of values]

==========================
🚨 FINAL ANSWER RULES
==========================

❌ NEVER include explanation after FINAL ANSWER.
❌ NEVER include units (e.g., $, %, km) unless specifically requested.
❌ NEVER use commas in numbers (write 1000 instead of 1,000).
❌ NEVER use abbreviations or articles unless explicitly required.
❌ NEVER include icons or emojis before, within, or after the FINAL ANSWER

Answer types:
- Number → FINAL ANSWER: 42
- String → FINAL ANSWER: Paris
- Year → FINAL ANSWER: 2009
- List → FINAL ANSWER: blue, green, red

⚠️ BEFORE you give your FINAL ANSWER:
- Reread the question carefully.
- Identify **exactly** what entity type is being asked (e.g., name, number, city, year).
- Ensure the answer is **directly tied to the question**, not just something mentioned during reasoning.
- Be careful to NOT give intermediate entities (e.g., actor name when the question asks for the character).

==========================
✅ FINAL CHECKLIST
==========================
Before giving FINAL ANSWER:
- [ ] Did I use all relevant tools and their results or grounded information?
- [ ] Is my answer the specific format and type requested?
- [ ] Did I avoid guessing or using unrelated intermediate facts?
- [ ] Did I double check that my FINAL ANSWER matches the exact thing asked in the question?

==========================
📘 EXAMPLES
==========================
Below are some examples showing how to approach questions step by step.
"""

for i, samples in enumerate(random_samples):
    system_prompt += f"\nQuestion {i+1}: {samples['Question']}\nSteps:\n{samples['Annotator Metadata']['Steps']}\nTools:\n{samples['Annotator Metadata']['Tools']}\nFinal Answer: {samples['Final answer']}\n"
system_prompt += "\n==========================\n"
system_prompt += "Now, please answer the following question step by step.\n"

# save the system_prompt to a file
with open('system_prompt.txt', 'w') as f:
    f.write(system_prompt)

In [None]:
# Build Agent
tools = [
    multiply,
    add,
    subtract,
    divide,
    web_search,
    wiki_search,
    youtube_transcript
]

llm = ChatGoogleGenerativeAI(model="gemini-2.5-flash")
llm_with_tools = llm.bind_tools(tools)

sys_msg = SystemMessage(content=system_prompt)

# Assistant Node
def assistant_node(state: MessagesState):
    print("---Assistant Node---")
    result = llm_with_tools.invoke([sys_msg] + state["messages"])
    print(f"🎯 LLM Output: {result.content}")
    return {"messages": [result]}

# Build Graph
builder = StateGraph(MessagesState)
builder.add_node("assistant_node", assistant_node)
builder.add_node("tools", ToolNode(tools))
builder.add_edge(START, "assistant_node")
builder.add_conditional_edges(
    "assistant_node",
    # If the latest message (result) from assistant is a tool call -> tools_condition routes to tools
    # If the latest message (result) from assistant is a not a tool call -> tools_condition routes to END
    tools_condition,
)
builder.add_edge("tools", "assistant_node")

agent = builder.compile()

In [None]:
question = "What is 2 + 2?"
messages = [HumanMessage(content=question)]
messages = agent.invoke({"messages": messages})

In [None]:
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"

api_url = DEFAULT_API_URL
questions_url = f"{api_url}/questions"

print(f"Fetching questions from: {questions_url}")
try:
    response = requests.get(questions_url, timeout=15)
    response.raise_for_status()
    questions_data = response.json()
    if not questions_data:
            print("Fetched questions list is empty.")
    print(f"Fetched {len(questions_data)} questions.")
except requests.exceptions.RequestException as e:
    print(f"Error fetching questions: {e}")
except requests.exceptions.JSONDecodeError as e:
        print(f"Error decoding JSON response from questions endpoint: {e}")
        print(f"Response text: {response.text[:500]}")
except Exception as e:
    print(f"An unexpected error occurred fetching questions: {e}")

In [None]:
BASE_BACKOFF = 30  # seconds
MAX_RETRIES = 5

def extract_final_answer(submitted_text: str) -> str:
    match = re.search(r"FINAL ANSWER:\s*(.*)", submitted_text, re.IGNORECASE)
    if match:
        return match.group(1).strip()
    return "N/A"

def run_agent_with_retries(task_id, question_text):
    attempt = 0
    while attempt < MAX_RETRIES:
        try:
            print(f"\nTask ID: {task_id} - Attempt {attempt + 1} - Question: {question_text}")
            messages = agent.invoke({"messages": [HumanMessage(content=question_text)]}, debug=False)
            full_response = messages["messages"][-1].content
            submitted_answer = extract_final_answer(full_response)
            print(f"✅ Submitted Answer: {submitted_answer}")
            return submitted_answer  # successful run

        except Exception as e:
            attempt += 1
            error_msg = str(e)
            print(f"⚠️ Error on task {task_id}, attempt {attempt}: {error_msg}")

            if "429" in error_msg or "rate limit" in error_msg.lower():
                if attempt < MAX_RETRIES:
                    wait_time = BASE_BACKOFF * (2 ** (attempt - 1)) + random.uniform(0, 1)
                    print(f"⏳ Rate limit hit. Retrying in {wait_time:.2f} seconds...")
                    time.sleep(wait_time)
                else:
                    print("❌ Max retries reached.")
                    return f"AGENT ERROR: Rate limit (after {MAX_RETRIES} attempts)"
            else:
                print("❌ Non-retryable error.")
                return f"AGENT ERROR: {error_msg}"
    return "AGENT ERROR: Unknown"


In [None]:
# Main execution loop
results_log = []
answers_payload = []
print(f"Running agent on {len(questions_data)} questions...")

for item in questions_data[:]:  
    task_id = item.get("task_id")
    question_text = item.get("question")

    if not task_id or question_text is None:
        print(f"Skipping item with missing task_id or question: {item}")
        continue

    submitted_answer = run_agent_with_retries(task_id, question_text)

    answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
    results_log.append({
        "Task ID": task_id,
        "Question": question_text,
        "Submitted Answer": submitted_answer
    })

if not answers_payload:
    print("❌ Agent did not produce any answers to submit.")


In [None]:
for result in results_log:
    print(result)

In [None]:
# Test

with open('metadata.jsonl', 'r') as f:
    metadata_entries = [json.loads(line) for line in f.readlines()]
metadata_lookup = {entry['task_id']: entry for entry in metadata_entries}

# Backtesting results
print("\n=== 🔍 Backtest Results ===\n")
correct_count = 0
total = 0

for result in results_log:
    task_id = result["Task ID"]
    submitted = result["Submitted Answer"].strip().replace("FINAL ANSWER: ", "").strip()
    expected = metadata_lookup.get(task_id, {}).get("Final answer", "N/A")
    
    is_match = submitted.lower() == expected.lower()
    match_status = "✅ MATCH" if is_match else "❌ MISMATCH"
    if is_match:
        correct_count += 1
    total += 1

    print(f"Task ID: {task_id}")
    print(f"Your Answer: {submitted}")
    print(f"Expected Answer: {expected}")
    print(f"Result: {match_status}")
    print("-" * 50)

# Summary
accuracy = (correct_count / total * 100) if total else 0
print(f"\nSummary: {correct_count}/{total} correct ({accuracy:.2f}% accuracy)")
