## RAG With multiple data-source

In [4]:
### Wikipedia tool
from langchain_community.tools import WikipediaQueryRun
from langchain_community.utilities import WikipediaAPIWrapper

In [None]:
api_wrapper=WikipediaAPIWrapper(top_k_results=1,doc_content_chars_max=250)
wiki=WikipediaQueryRun(api_wrapper=api_wrapper)

In [None]:
## 
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import FAISS
from langchain_openai import AzureOpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter

import os
from dotenv import load_dotenv

load_dotenv()

# Load
loader=WebBaseLoader('https://docs.smith.langchain.com/')

docs=loader.load()

# Split to chunks
text_splitter=RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)

chunk_docs=text_splitter.split_documents(docs)

# Vector DB and Vector Stores
db=FAISS.from_documents(chunk_docs,AzureOpenAIEmbeddings(model="gpt-text-embedding-3-large"))
# db

retriever=db.as_retriever()
retriever

VectorStoreRetriever(tags=['FAISS', 'AzureOpenAIEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x00000257F4B71990>, search_kwargs={})

In [17]:
## Create retriever tool
from langchain.tools.retriever import create_retriever_tool

retrieval_tool = create_retriever_tool(retriever, "langsmith_search","Search for information about langsmith")
retrieval_tool

Tool(name='langsmith_search', description='Search for information about langsmith', args_schema=<class 'langchain_core.tools.retriever.RetrieverInput'>, func=functools.partial(<function _get_relevant_documents at 0x00000257E3ADEDE0>, retriever=VectorStoreRetriever(tags=['FAISS', 'AzureOpenAIEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x00000257F4B71990>, search_kwargs={}), document_prompt=PromptTemplate(input_variables=['page_content'], input_types={}, partial_variables={}, template='{page_content}'), document_separator='\n\n', response_format='content'), coroutine=functools.partial(<function _aget_relevant_documents at 0x00000257E3E34220>, retriever=VectorStoreRetriever(tags=['FAISS', 'AzureOpenAIEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x00000257F4B71990>, search_kwargs={}), document_prompt=PromptTemplate(input_variables=['page_content'], input_types={}, partial_variables={}, template='{page_content}'), do

In [20]:
### Arxiv tool
from langchain_community.tools import ArxivQueryRun
from langchain_community.utilities import ArxivAPIWrapper

arxiv_wrapper=ArxivAPIWrapper(top_k_results=1,doc_content_chars_max=250)
arxiv=ArxivQueryRun(api_wrapper=arxiv_wrapper)

In [23]:
tools=[wiki,arxiv,retriever_tool]

In [33]:
from langchain_openai import AzureChatOpenAI

llm=AzureChatOpenAI(model='gpt-4o-mini', temperature=0.2)

In [34]:
## Prompt template
from langchain import hub #Get a prebuilt prompt from the hub

prompt = hub.pull('hwchase17/openai-functions-agent')
prompt.messages

[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=[], input_types={}, partial_variables={}, template='You are a helpful assistant'), additional_kwargs={}),
 MessagesPlaceholder(variable_name='chat_history', optional=True),
 HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['input'], input_types={}, partial_variables={}, template='{input}'), additional_kwargs={}),
 MessagesPlaceholder(variable_name='agent_scratchpad')]

In [35]:
## Agents
from langchain.agents import create_openai_tools_agent

agent = create_openai_tools_agent(llm,tools,prompt)
agent

RunnableAssign(mapper={
  agent_scratchpad: RunnableLambda(lambda x: format_to_openai_tool_messages(x['intermediate_steps']))
})
| ChatPromptTemplate(input_variables=['agent_scratchpad', 'input'], optional_variables=['chat_history'], input_types={'chat_history': list[typing.Annotated[typing.Union[typing.Annotated[langchain_core.messages.ai.AIMessage, Tag(tag='ai')], typing.Annotated[langchain_core.messages.human.HumanMessage, Tag(tag='human')], typing.Annotated[langchain_core.messages.chat.ChatMessage, Tag(tag='chat')], typing.Annotated[langchain_core.messages.system.SystemMessage, Tag(tag='system')], typing.Annotated[langchain_core.messages.function.FunctionMessage, Tag(tag='function')], typing.Annotated[langchain_core.messages.tool.ToolMessage, Tag(tag='tool')], typing.Annotated[langchain_core.messages.ai.AIMessageChunk, Tag(tag='AIMessageChunk')], typing.Annotated[langchain_core.messages.human.HumanMessageChunk, Tag(tag='HumanMessageChunk')], typing.Annotated[langchain_core.messages

In [39]:
## Agent executor to run the agent
from langchain.agents import AgentExecutor
agent_executor = AgentExecutor(agent=agent,tools=tools,verbose=True)

# agent_executor.invoke({"input":"What is LangChain?"})
# agent_executor.invoke({"input":"What is the paper 1605.08386 about?"})
agent_executor.invoke({"input":"How to use AzureOpenAIEmbedding?"})



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `wikipedia` with `{'query': 'Azure OpenAI Embedding'}`


[0m[36;1m[1;3mPage: Mistral AI
Summary: Mistral AI, headquartered in Paris, France specializes in artificial intelligence (AI) products and focuses on open-weight large language models, (LLMs). Founded in April 2023 by former engineers from Google DeepMind and Met[0m[32;1m[1;3m
Invoking: `langsmith_search` with `{'query': 'Azure OpenAI Embedding'}`


[0m[38;5;200m[1;3mPythonTypeScriptimport openaifrom langsmith import wrappers, traceable# Auto-trace LLM calls in-contextclient = wrappers.wrap_openai(openai.Client())@traceable # Auto-trace this functiondef pipeline(user_input: str):    result = client.chat.completions.create(        messages=[{"role": "user", "content": user_input}],        model="gpt-4o-mini"    )    return result.choices[0].message.contentpipeline("Hello, world!")# Out:  Hello there! How can I assist you today?import { OpenAI } from 

{'input': 'How to use AzureOpenAIEmbedding?',
 'output': 'To use Azure OpenAI Embedding, you can follow these steps:\n\n### 1. Install Required Libraries\nYou need to install the necessary libraries. If you\'re using Python, you can do this via pip:\n\n```bash\npip install -U langsmith openai\n```\n\nFor TypeScript, you can use:\n\n```bash\nyarn add langsmith openai\n```\n\n### 2. Create an API Key\n- Go to the Azure portal and navigate to the OpenAI service.\n- Create an API key by heading to the Settings page and clicking on "Create API Key".\n\n### 3. Set Up Your Environment\nYou need to set up your environment variables to include your API keys. In your terminal, you can set them like this:\n\n```bash\nexport LANGCHAIN_TRACING_V2=true\nexport LANGCHAIN_API_KEY=<your-langchain-api-key>\nexport OPENAI_API_KEY=<your-openai-api-key>\n```\n\n### 4. Use the Azure OpenAI Embedding in Your Code\nHere is an example of how to use Azure OpenAI Embedding in Python:\n\n```python\nimport openai\