# Search Tools

In [None]:
# Install packages
!pip install -qU langchain langchain-google-genai google-generativeai==0.8.5


In [None]:
!pip install -q ddgs

In [None]:
import os
from google.colab import userdata
os.environ["GOOGLE_API_KEY"] = userdata.get("GEMINI_API_KEY")  # paste your key
print(" API key set.")

 API key set.


# Define tools (DuckDuckGo + Wikipedia)

In [None]:
from typing import List
from ddgs import DDGS
import wikipedia

from langchain_core.tools import tool
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.prompts import ChatPromptTemplate
from langchain.agents import AgentExecutor, create_tool_calling_agent

wikipedia.set_lang("en")

def _format_hits(hits: List[dict], k: int = 5) -> str:
    lines = []
    for i, h in enumerate(hits[:k], 1):
        title = h.get("title") or h.get("heading") or ""
        url = h.get("href") or h.get("url") or ""
        snippet = h.get("body") or h.get("description") or h.get("snippet") or ""
        lines.append(f"{i}. {title}\n   {url}\n   {snippet}")
    return "\n".join(lines) if lines else "No results."

@tool
def web_search(query: str, max_results: int = 5, time_range: str = None) -> str:
    """DuckDuckGo web search. Returns up to `max_results` compact results with URLs and snippets.
    time_range can be 'd','w','m','y' (day, week, month, year) for recency, or None."""
    try:
        with DDGS() as ddg:
            hits = list(ddg.text(query, max_results=max_results, timelimit=time_range))
        return _format_hits(hits, k=max_results)
    except Exception as e:
        return f"Search error: {e}"

@tool
def wiki_summary(topic: str, sentences: int = 3) -> str:
    """Return a short Wikipedia summary with the canonical page URL."""
    try:
        page = wikipedia.page(topic, auto_suggest=True, redirect=True)
        summary = wikipedia.summary(page.title, sentences=sentences)
        return f"Title: {page.title}\nURL: {page.url}\nSummary: {summary}"
    except wikipedia.DisambiguationError as e:
        opts = "; ".join(e.options[:5])
        return f"Disambiguation: be specific. Options include: {opts}"
    except wikipedia.PageError:
        return "No matching page found."
    except Exception as e:
        return f"Wikipedia error: {e}"

# Build the search agent

In [None]:
def build_search_agent():
    llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", temperature=0.2)
    tools = [web_search, wiki_summary]

    prompt = ChatPromptTemplate.from_messages([
        ("system",
         "You are a research assistant.\n"
         "Use tools for ANY factual claims, numbers, dates, or 'latest' info.\n"
         "Choose:\n"
         "- web_search: for fresh/news or when URLs are required.\n"
         "- wiki_summary: for background/definitions.\n"
         "When you cite, include page titles and URLs from tool outputs.\n"
         "Keep answers concise. Prefer top 3 results. Do not hallucinate."),
        ("human", "{input}"),
        ("placeholder", "{agent_scratchpad}"),
    ])

    agent = create_tool_calling_agent(llm, tools, prompt)
    return AgentExecutor(agent=agent, tools=tools, verbose=False)  # clean output

search_agent = build_search_agent()
print(" Search agent ready.")

 Search agent ready.


# Quick tests

In [None]:
def ask(q: str):
    res = search_agent.invoke({"input": q})
    print(res["output"], "\n")

# 1) Simple fact + citation
ask("Who won the FIFA World Cup 2022? Give one sentence and include a source URL.")

# 2) Background from Wikipedia
ask("In 3 bullets, explain Retrieval-Augmented Generation. Cite the Wikipedia page with URL.")

# 3) Fresh headlines (limit and summarize)
ask("Find 3 recent headlines about generative AI this week. One line per item with URL.")

Argentina won the FIFA World Cup 2022. [https://en.wikipedia.org/wiki/2022_FIFA_World_Cup_Final] 

Here's a 3-bullet explanation of Retrieval-Augmented Generation (RAG), based on the Wikipedia summary:

*   RAG is an end-to-end trainable approach that combines the strengths of pre-trained language models with information retrieval.
*   It allows language models to access and incorporate information from external sources, enhancing their ability to generate accurate and contextually relevant responses.
*   RAG can be used for various natural language processing tasks, improving the quality and reliability of generated text.

I used the following Wikipedia page:

*   Title: Retrieval-augmented generation
*   URL: https://en.wikipedia.org/wiki/Retrieval-augmented_generation 

Here are three recent headlines about generative AI from this week:

*   AI News | Latest Headlines and Developments | Reuters ([https://www.reuters.com/technology/artificial-intelligence/](https://www.reuters.com/te