# RAG with multi data source

To access tools like wikipedia, pdf, arxiv we need wrapper, here is the stp by step how to make the wrapper

In [34]:
## Wikipedia Tools ##
from langchain_community.tools import WikipediaQueryRun
from langchain_community.utilities import WikipediaAPIWrapper

In [35]:
api_wrapper = WikipediaAPIWrapper(top_k_results=1, doc_content_chars_max=250)
wiki = WikipediaQueryRun(api_wrapper=api_wrapper)
print(wiki.name)

# WikipediaQueryRun:- Is responsibe for running this api_wrapper
# WikipediaAPIWrapper:- is communicating with the API wikipedia

wikipedia


In [None]:
import os
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import FAISS
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
os.environ["GOOGLE_API_KEY"] = "GOOGLE_API_KEY"

loader = WebBaseLoader("https://docs.smith.langchain.com/")
docs = loader.load()
# Split the documents into smaller chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
documents = text_splitter.split_documents(docs)
# Initialize the embeddings
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")

vector_db = FAISS.from_documents(documents, embeddings)
retriver = vector_db.as_retriever
retriver

<bound method VectorStore.as_retriever of <langchain_community.vectorstores.faiss.FAISS object at 0x0000025F6194A020>>

## Create a retriver tools

In [37]:
from langchain.tools.retriever import create_retriever_tool
retrival_tool = create_retriever_tool(retriver, "langsmith_search", "search for information about langsmith")

In [38]:
retrival_tool.name

'langsmith_search'

In [39]:
## Arxiv Tools ##
from langchain_community.utilities import ArxivAPIWrapper
from langchain_community.tools import ArxivQueryRun

arxiv_wrapper=ArxivAPIWrapper(top_k_results=1, doc_content_chars_max=200)
arxiv=ArxivQueryRun(api_wrapper=arxiv_wrapper)
arxiv.name

'arxiv'

In [40]:
tools = [wiki, arxiv, retrival_tool]

# When ever user make a query first it goes to wiki then it retive the info. then it goes to arxiv, then it goes to retrivel_tool

In [41]:
tools

[WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper(wiki_client=<module 'wikipedia' from 'c:\\Users\\kdeepak_new\\anaconda3\\envs\\myr\\lib\\site-packages\\wikipedia\\__init__.py'>, top_k_results=1, lang='en', load_all_available_meta=False, doc_content_chars_max=250)),
 ArxivQueryRun(api_wrapper=ArxivAPIWrapper(arxiv_search=<class 'arxiv.Search'>, arxiv_exceptions=(<class 'arxiv.ArxivError'>, <class 'arxiv.UnexpectedEmptyPageError'>, <class 'arxiv.HTTPError'>), top_k_results=1, ARXIV_MAX_QUERY_LENGTH=300, continue_on_failure=False, load_max_docs=100, load_all_available_meta=False, doc_content_chars_max=200)),
 Tool(name='langsmith_search', description='search for information about langsmith', args_schema=<class 'langchain_core.tools.retriever.RetrieverInput'>, func=functools.partial(<function _get_relevant_documents at 0x0000025F6C12E560>, retriever=<bound method VectorStore.as_retriever of <langchain_community.vectorstores.faiss.FAISS object at 0x0000025F6194A020>>, document_prompt=Pro

In [42]:
from dotenv import load_dotenv
load_dotenv()
import os

api_key = os.getenv("GOOGLE_API_KEY") 
from langchain_google_genai import GoogleGenerativeAI
llm = GoogleGenerativeAI(model='gemini-1.5-pro', temperature=0)
response = llm.invoke("Hi how are you?")
print(response)

I'm doing well, thank you for asking! How are you today?



Since Gemini AI is a Google-based model, there isn’t a direct LangChain Hub prompt like OpenAI’s "hwchase17/openai-functions-agent". Instead, you need to manually create a prompt template.

# Create our own custom prompt template

In [55]:
from langchain_core.prompts import ChatPromptTemplate

# Create an improved prompt template
prompt = ChatPromptTemplate.from_messages([
    ("system", "You are a helpful and friendly AI assistant. Provide thoughtful, informative answers to user queries."),
    ("human", "{user_input}")
])

# Format the prompt with a user input
formatted_prompt = prompt.invoke({"user_input": "Hi, how are you?"})

# Get response from LLM
response = llm.invoke(formatted_prompt)

# Print response
print(response)


content="I'm doing well, thank you for asking! As a large language model, I don't experience emotions or feelings in the same way humans do, but I'm functioning optimally and ready to assist you. How can I help you today?" additional_kwargs={} response_metadata={'prompt_feedback': {'block_reason': 0, 'safety_ratings': []}, 'finish_reason': 'STOP', 'safety_ratings': []} id='run-fe8e0b8a-03bf-4aab-ade9-1be7ac18d80c-0' usage_metadata={'input_tokens': 24, 'output_tokens': 52, 'total_tokens': 76, 'input_token_details': {'cache_read': 0}}


In [44]:
# More casual/chatty:
# "You are a friendly AI assistant. Keep responses warm, engaging, and conversational."

# More professional:
# "You are a highly knowledgeable AI. Provide precise, well-structured answers in a professional tone."

In [45]:
# This code defines and executes a LangChain agent using Google's Gemini model, allowing it to call external tools when needed.

In [None]:
# Create the agent

from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.agents import create_tool_calling_agent, AgentExecutor
from langchain.tools import Tool
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder


# Define a sample tool
def sample_tool(input_text: str):
    return f"Tool executed successfully with input: {input_text}"

tools = [Tool(name="SampleTool", func=sample_tool, description="A sample tool for testing.")]

# ✅ Updated Prompt (Forcing the AI to use tools)
prompt = ChatPromptTemplate.from_messages([
    ("system", "You are a helpful AI assistant. Use tools when necessary to answer user queries."),
    ("human", "{user_input}"),
    MessagesPlaceholder(variable_name="agent_scratchpad")  # Required for agent execution
])

# Create the agent
agent = create_tool_calling_agent(llm, tools, prompt)

# ✅ Wrap agent in `AgentExecutor` to ensure it actually calls tools
agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)

# Run the agent with a direct tool-invoking request
response = agent_executor.invoke({"user_input": "Use the SampleTool with input 'Test Input'"})
print(response)




[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `SampleTool` with `Test Input`


[0m[36;1m[1;3mTool executed successfully with input: Test Input[0m[32;1m[1;3mTool executed successfully with input: Test Input[0m

[1m> Finished chain.[0m
{'user_input': "Use the SampleTool with input 'Test Input'", 'output': 'Tool executed successfully with input: Test Input'}


In [47]:
# To run the agent we need agent excecutor
agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)
agent_executor

AgentExecutor(verbose=True, agent=RunnableMultiActionAgent(runnable=RunnableAssign(mapper={
  agent_scratchpad: RunnableLambda(lambda x: message_formatter(x['intermediate_steps']))
})
| ChatPromptTemplate(input_variables=['agent_scratchpad', 'user_input'], input_types={'agent_scratchpad': list[typing.Annotated[typing.Union[typing.Annotated[langchain_core.messages.ai.AIMessage, Tag(tag='ai')], typing.Annotated[langchain_core.messages.human.HumanMessage, Tag(tag='human')], typing.Annotated[langchain_core.messages.chat.ChatMessage, Tag(tag='chat')], typing.Annotated[langchain_core.messages.system.SystemMessage, Tag(tag='system')], typing.Annotated[langchain_core.messages.function.FunctionMessage, Tag(tag='function')], typing.Annotated[langchain_core.messages.tool.ToolMessage, Tag(tag='tool')], typing.Annotated[langchain_core.messages.ai.AIMessageChunk, Tag(tag='AIMessageChunk')], typing.Annotated[langchain_core.messages.human.HumanMessageChunk, Tag(tag='HumanMessageChunk')], typing.Annota

In [49]:
agent_executor.invoke({"user_input": "Tell me about Langsmith"})



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mLangSmith is a platform built for debugging, testing, evaluating, and monitoring LLM applications.  It helps developers and researchers close the gap between LLM experimentation and production. Key features include:

* **Dataset Management & Analysis:** Create and manage datasets for training and evaluation, and analyze dataset characteristics to understand potential biases and areas for improvement.
* **Tracing & Debugging:** Gain deep visibility into the execution flow of your LLM application, enabling step-by-step debugging and analysis of prompts, completions, and intermediate data.
* **Evaluation & Monitoring:** Define custom metrics and benchmarks to rigorously evaluate LLM performance, and continuously monitor deployed applications to identify regressions and performance drifts.
* **Experiment Tracking:** Track and compare different LLM configurations, prompts, and training parameters, making it easier to identify opti

{'user_input': 'Tell me about Langsmith',
 'output': 'LangSmith is a platform built for debugging, testing, evaluating, and monitoring LLM applications.  It helps developers and researchers close the gap between LLM experimentation and production. Key features include:\n\n* **Dataset Management & Analysis:** Create and manage datasets for training and evaluation, and analyze dataset characteristics to understand potential biases and areas for improvement.\n* **Tracing & Debugging:** Gain deep visibility into the execution flow of your LLM application, enabling step-by-step debugging and analysis of prompts, completions, and intermediate data.\n* **Evaluation & Monitoring:** Define custom metrics and benchmarks to rigorously evaluate LLM performance, and continuously monitor deployed applications to identify regressions and performance drifts.\n* **Experiment Tracking:** Track and compare different LLM configurations, prompts, and training parameters, making it easier to identify optima

In [50]:
response = agent_executor.invoke({"user_input": "Tell me about Langsmith"})
print(response)




[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mLangSmith is a platform built by Google AI for debugging, testing, evaluating, and monitoring LLM workflows.  It helps developers and researchers close the gap between LLM experimentation and production.  There are a number of helpful features, such as:

* **Dataset Management**: Easily manage your datasets for training, evaluating, and testing your LLM applications.
* **Evaluation**: Evaluate and compare different LLM configurations on various metrics.
* **Debugging**: Dive deep into individual conversations to understand and fix issues with your LLM's behavior.
* **Monitoring**: Track the performance of your LLM application over time.  This is important for catching regressions.

If you'd like to learn more, you can check out the LangSmith documentation.  I can also answer more specific questions you may have about LangSmith.[0m

[1m> Finished chain.[0m
{'user_input': 'Tell me about Langsmith', 'output': "LangSmith is a 

In [51]:
agent_executor.invoke({"user_input": "What's the paper 1605.08386 about?"})



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mI am sorry, I cannot access external websites or specific PDF files, including arXiv papers. Therefore, I cannot provide information about the content of the paper 1605.08386.[0m

[1m> Finished chain.[0m


{'user_input': "What's the paper 1605.08386 about?",
 'output': 'I am sorry, I cannot access external websites or specific PDF files, including arXiv papers. Therefore, I cannot provide information about the content of the paper 1605.08386.'}

In [None]:
from langchain.tools import Tool
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.agents import create_tool_calling_agent, AgentExecutor
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain.utilities import ArxivAPIWrapper  # Arxiv search utility

# Define an arXiv research tool
arxiv_tool = Tool(
    name="ArxivSearch",
    func=ArxivAPIWrapper().run,  # This uses LangChain's arXiv API wrapper
    description="Search for research papers on arXiv using paper ID or keywords."
)

# Define the agent prompt
prompt = ChatPromptTemplate.from_messages([
    ("system", "You are a helpful AI assistant. Use tools when necessary to answer user queries."),
    ("human", "{user_input}"),
    MessagesPlaceholder(variable_name="agent_scratchpad")
])

# Create an agent with the arXiv tool
agent = create_tool_calling_agent(llm, [arxiv_tool], prompt)

# Wrap the agent in an executor
agent_executor = AgentExecutor(agent=agent, tools=[arxiv_tool], verbose=True)

# Run the agent to fetch a research paper summary
response = agent_executor.invoke({"user_input": "Find details about paper 1605.08386 on arXiv."})
print(response)




[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `ArxivSearch` with `1605.08386`


[0m[36;1m[1;3mPublished: 2016-05-26
Title: Heat-bath random walks with Markov bases
Authors: Caprice Stanley, Tobias Windisch
Summary: Graphs on lattice points are studied whose edges come from a finite set of
allowed moves of arbitrary length. We show that the diameter of these graphs on
fibers of a fixed integer matrix can be bounded from above by a constant. We
then study the mixing behaviour of heat-bath random walks on these graphs. We
also state explicit conditions on the set of moves so that the heat-bath random
walk, a generalization of the Glauber dynamics, is an expander in fixed
dimension.[0m[32;1m[1;3mThe paper "Heat-bath random walks with Markov bases" by Caprice Stanley and Tobias Windisch was published on 2016-05-26.  The summary is as follows: "Graphs on lattice points are studied whose edges come from a finite set of allowed moves of arbitrary length. We show