## Search Engine with Tools and Agents

In [1]:
## arxiv reseach paper
## tool creation

from langchain_community.tools import ArxivQueryRun,WikipediaQueryRun
from langchain_community.utilities import WikipediaAPIWrapper,ArxivAPIWrapper

In [2]:
## using the inbuilt tool of wikipedia

api_wrapper_wiki=WikipediaAPIWrapper(top_k_results=1,doc_content_chars_max=250)
wiki=WikipediaQueryRun(api_wrapper=api_wrapper_wiki)
wiki.name

'wikipedia'

In [3]:
## creating Arxiv Tool

api_wrapper_arxiv=ArxivAPIWrapper(top_k_results=1,doc_content_chars_max=250)
arxiv=ArxivQueryRun(api_wrapper=api_wrapper_arxiv)
arxiv.name

'arxiv'

In [4]:
## combinning both

tools=[wiki,arxiv]

tools

[WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper(wiki_client=<module 'wikipedia' from 'c:\\ProgramData\\anaconda3\\envs\\clean-openai\\Lib\\site-packages\\wikipedia\\__init__.py'>, top_k_results=1, lang='en', load_all_available_meta=False, doc_content_chars_max=250)),
 ArxivQueryRun(api_wrapper=ArxivAPIWrapper(arxiv_search=<class 'arxiv.Search'>, arxiv_exceptions=(<class 'arxiv.ArxivError'>, <class 'arxiv.UnexpectedEmptyPageError'>, <class 'arxiv.HTTPError'>), top_k_results=1, ARXIV_MAX_QUERY_LENGTH=300, continue_on_failure=False, load_max_docs=100, load_all_available_meta=False, doc_content_chars_max=250))]

In [5]:
## Custom tools[RAG Tools]


from langchain_community.document_loaders import WebBaseLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_huggingface import HuggingFaceEmbeddings


from dotenv import load_dotenv

load_dotenv()
import os

# Fetch values
hf_token = os.getenv("HF_TOKEN")
groq_api_key = os.getenv("GROQ_API_KEY")


# Load the web page
loader = WebBaseLoader("https://python.langchain.com/docs/integrations/providers/google/")
docs = loader.load()

# Split into chunks
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=500
)
documents = text_splitter.split_documents(docs)

# Build FAISS vector database
from langchain_huggingface import HuggingFaceEmbeddings
embeddings=HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
vectordb = FAISS.from_documents(documents, embeddings)

# Create retriever
retriever = vectordb.as_retriever()

retriever


USER_AGENT environment variable not set, consider setting it to identify your requests.


VectorStoreRetriever(tags=['FAISS', 'HuggingFaceEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x000001B84B896010>, search_kwargs={})

In [6]:
## creating our tool from retriever

from langchain.tools.retriever import create_retriever_tool
retriever_tool=create_retriever_tool(retriever,"langchain-search","Search any information about langchain")
retriever_tool.name



'langchain-search'

In [7]:
tools=[wiki,arxiv,retriever_tool]


In [15]:
tools

[WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper(wiki_client=<module 'wikipedia' from 'c:\\ProgramData\\anaconda3\\envs\\clean-openai\\Lib\\site-packages\\wikipedia\\__init__.py'>, top_k_results=1, lang='en', load_all_available_meta=False, doc_content_chars_max=250)),
 ArxivQueryRun(api_wrapper=ArxivAPIWrapper(arxiv_search=<class 'arxiv.Search'>, arxiv_exceptions=(<class 'arxiv.ArxivError'>, <class 'arxiv.UnexpectedEmptyPageError'>, <class 'arxiv.HTTPError'>), top_k_results=1, ARXIV_MAX_QUERY_LENGTH=300, continue_on_failure=False, load_max_docs=100, load_all_available_meta=False, doc_content_chars_max=250)),
 Tool(name='langchain-search', description='Search any information about langchain', args_schema=<class 'langchain_core.tools.retriever.RetrieverInput'>, func=functools.partial(<function _get_relevant_documents at 0x000001B85F14F100>, retriever=VectorStoreRetriever(tags=['FAISS', 'HuggingFaceEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x00000

In [8]:
## Run all this tools with Agent and LLM Models

## Tools, LLM --> AgentExecutor

from dotenv import load_dotenv

load_dotenv()
import os

# Fetch values
hf_token = os.getenv("HF_TOKEN")
groq_api_key = os.getenv("GROQ_API_KEY")

In [9]:
from langchain_groq import ChatGroq


# Instantiate ChatGroq LLM with your model
llm = ChatGroq(
    model="meta-llama/llama-4-maverick-17b-128e-instruct",
    api_key=groq_api_key
)

llm

ChatGroq(client=<groq.resources.chat.completions.Completions object at 0x000001B85F29EC90>, async_client=<groq.resources.chat.completions.AsyncCompletions object at 0x000001B83D2FC810>, model_name='meta-llama/llama-4-maverick-17b-128e-instruct', model_kwargs={}, groq_api_key=SecretStr('**********'))

In [10]:
 
  ## Create prompt template
  
from langchain.agents import initialize_agent, AgentType, load_tools
from langchain.prompts import ChatPromptTemplate
from langchain.schema import SystemMessage, HumanMessage

  
# Create prompt (the INSTRUCTIONS)
agent_prompt = ChatPromptTemplate.from_messages([
    SystemMessage(content="You are a research assistant with Wikipedia and arXiv access."),
    HumanMessage(content="""Question: {input}

Tools: 
- wikipedia: Search Wikipedia for general information
- arxiv: Search academic papers

Response Format:
Thought: your reasoning
Action: tool name  
Action Input: search query
Observation: tool result
Final Answer: comprehensive answer

Begin:""")
])

In [11]:
# Initialize agent (connect BRAIN to HANDS with INSTRUCTIONS)
agent = initialize_agent(
    tools=tools,          # HANDS
    llm=llm,              # BRAIN ← This is the correct place!
    agent=AgentType.STRUCTURED_CHAT_ZERO_SHOT_REACT_DESCRIPTION,
    prompt=agent_prompt,  # INSTRUCTIONS
    verbose=True
)

print("✅ Agent ready! Brain connected to hands with instructions!")

✅ Agent ready! Brain connected to hands with instructions!


  agent = initialize_agent(


In [12]:
# Test with some research queries
# Simple test to confirm it's working
test_query = "What is artificial intelligence?"
result = agent.run(test_query)
print("✅ SUCCESS! Agent is working!")
print(f"Result: {result}")

  result = agent.run(test_query)




[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: To answer this question, I need to provide a general definition of artificial intelligence. I can use Wikipedia to get a reliable and concise definition.

Action:
```
{
 "action": "wikipedia",
 "action_input": {"query": "artificial intelligence"}
}
```

[0m
Observation: [36;1m[1;3mPage: Artificial intelligence
Summary: Artificial intelligence (AI) is the capability of computational systems to perform tasks typically associated with human intelligence, such as learning, reasoning, problem-solving, perception, and decision-makin[0m
Thought:[32;1m[1;3mThought: The Wikipedia summary provides a clear and concise definition of artificial intelligence. I can now use this information to formulate a final answer.

Action:
```
{
 "action": "Final Answer",
 "action_input": "Artificial intelligence (AI) refers to the capability of computational systems to perform tasks typically associated with human intelligence, such as 

In [13]:
# This will really test both tools
advanced_query = """
Provide a comprehensive overview of artificial intelligence including:
1. Basic definition and history from Wikipedia
2. Current state of the field from recent arXiv papers
3. Latest advancements and research directions
"""

result = agent.run(advanced_query)
print(result)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mCertainly! I'll provide a comprehensive overview of artificial intelligence, covering its basic definition and history, current state of the field, and latest advancements and research directions.

Thought: First, I'll start by looking up the definition and history of artificial intelligence on Wikipedia.

Action:
```
{
 "action": "wikipedia",
 "action_input": "Artificial Intelligence"
}
```

[0m
Observation: [36;1m[1;3mPage: Artificial intelligence
Summary: Artificial intelligence (AI) is the capability of computational systems to perform tasks typically associated with human intelligence, such as learning, reasoning, problem-solving, perception, and decision-makin[0m
Thought:[32;1m[1;3m[0m

[1m> Finished chain.[0m



In [14]:
# Run any research query
query = "Explain attention mechanism in LLMs and find recent papers"
result = agent.run(query)
print(result)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: To explain the attention mechanism in Large Language Models (LLMs) and find recent papers on the topic, I will first provide an overview of the attention mechanism. Then, I will search for recent papers on arxiv.org related to this topic.

The attention mechanism is a key component of many LLMs, allowing them to focus on specific parts of the input when generating output. It was introduced to improve the performance of sequence-to-sequence models by enabling them to handle long-range dependencies more effectively.

Action:
```
{
 "action": "wikipedia",
 "action_input": "Attention Mechanism"
}
```

[0m
Observation: [36;1m[1;3mPage: Attention (machine learning)
Summary: In machine learning, attention is a method that determines the importance of each component in a sequence relative to the other components in that sequence. In natural language processing, importance is rep[0m
Thought:[32;1m[1;3mThought: The Wikip