# Building a research Agent with LangChain

In [25]:
from langchain_openai.chat_models import ChatOpenAI

In [26]:
llm_chat = ChatOpenAI(temperature=0)

In [27]:
from langchain.utilities import SerpAPIWrapper
from langchain.agents import tool

search = SerpAPIWrapper()

@tool
def web_search(query: str):
    """Searches the web given a user query and returns the most relevant links"""
    output = search.results(query)
    results = output["organic_results"]
    return [r["link"] for r in results]

web_search("Artificial Intelligence")

['https://en.wikipedia.org/wiki/Artificial_intelligence',
 'https://www.ibm.com/topics/artificial-intelligence',
 'https://www.techtarget.com/searchenterpriseai/definition/AI-Artificial-Intelligence',
 'https://builtin.com/artificial-intelligence',
 'https://cloud.google.com/learn/what-is-artificial-intelligence',
 'https://www.investopedia.com/terms/a/artificial-intelligence-ai.asp',
 'https://www.britannica.com/technology/artificial-intelligence']

In [28]:
from langchain import hub

prompt = hub.pull("hwchase17/openai-functions-agent")

prompt

ChatPromptTemplate(input_variables=['agent_scratchpad', 'input'], input_types={'chat_history': typing.List[typing.Union[langchain_core.messages.ai.AIMessage, langchain_core.messages.human.HumanMessage, langchain_core.messages.chat.ChatMessage, langchain_core.messages.system.SystemMessage, langchain_core.messages.function.FunctionMessage, langchain_core.messages.tool.ToolMessage]], 'agent_scratchpad': typing.List[typing.Union[langchain_core.messages.ai.AIMessage, langchain_core.messages.human.HumanMessage, langchain_core.messages.chat.ChatMessage, langchain_core.messages.system.SystemMessage, langchain_core.messages.function.FunctionMessage, langchain_core.messages.tool.ToolMessage]]}, messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=[], template='You are a helpful assistant')), MessagesPlaceholder(variable_name='chat_history', optional=True), HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['input'], template='{input}')), MessagesPlaceholder(

In [29]:
prompt.input_variables

['agent_scratchpad', 'input']

In [30]:
prompt.input_types

{'chat_history': typing.List[typing.Union[langchain_core.messages.ai.AIMessage, langchain_core.messages.human.HumanMessage, langchain_core.messages.chat.ChatMessage, langchain_core.messages.system.SystemMessage, langchain_core.messages.function.FunctionMessage, langchain_core.messages.tool.ToolMessage]],
 'agent_scratchpad': typing.List[typing.Union[langchain_core.messages.ai.AIMessage, langchain_core.messages.human.HumanMessage, langchain_core.messages.chat.ChatMessage, langchain_core.messages.system.SystemMessage, langchain_core.messages.function.FunctionMessage, langchain_core.messages.tool.ToolMessage]]}

In [31]:
prompt.__dict__

{'name': None,
 'input_variables': ['agent_scratchpad', 'input'],
 'input_types': {'chat_history': typing.List[typing.Union[langchain_core.messages.ai.AIMessage, langchain_core.messages.human.HumanMessage, langchain_core.messages.chat.ChatMessage, langchain_core.messages.system.SystemMessage, langchain_core.messages.function.FunctionMessage, langchain_core.messages.tool.ToolMessage]],
  'agent_scratchpad': typing.List[typing.Union[langchain_core.messages.ai.AIMessage, langchain_core.messages.human.HumanMessage, langchain_core.messages.chat.ChatMessage, langchain_core.messages.system.SystemMessage, langchain_core.messages.function.FunctionMessage, langchain_core.messages.tool.ToolMessage]]},
 'output_parser': None,
 'partial_variables': {},
 'messages': [SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=[], template='You are a helpful assistant')),
  MessagesPlaceholder(variable_name='chat_history', optional=True),
  HumanMessagePromptTemplate(prompt=PromptTemplate(input

To build an agent I need to do 2 essential things:

1. Connect a model to tools
2. Set up a loop in which the model continuously tries to solve the problem by either just calling the llm or calling a tool, observing the result and repeating until finished.

In [32]:
prompt

ChatPromptTemplate(input_variables=['agent_scratchpad', 'input'], input_types={'chat_history': typing.List[typing.Union[langchain_core.messages.ai.AIMessage, langchain_core.messages.human.HumanMessage, langchain_core.messages.chat.ChatMessage, langchain_core.messages.system.SystemMessage, langchain_core.messages.function.FunctionMessage, langchain_core.messages.tool.ToolMessage]], 'agent_scratchpad': typing.List[typing.Union[langchain_core.messages.ai.AIMessage, langchain_core.messages.human.HumanMessage, langchain_core.messages.chat.ChatMessage, langchain_core.messages.system.SystemMessage, langchain_core.messages.function.FunctionMessage, langchain_core.messages.tool.ToolMessage]]}, messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=[], template='You are a helpful assistant')), MessagesPlaceholder(variable_name='chat_history', optional=True), HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['input'], template='{input}')), MessagesPlaceholder(

In [33]:
from langchain.agents import create_openai_tools_agent

tools = [web_search]

react_agent = create_openai_tools_agent(llm_chat, tools, prompt)

In [34]:
from langchain.agents import AgentExecutor


react_agent_executor = AgentExecutor(agent=react_agent, tools=tools, verbose=True)

In [44]:
react_agent_executor.invoke({"query": "What is the best way to learn a new language?",
                             "agent_scratchpad": []})



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mTo learn a new language effectively, there are several strategies and techniques that can be helpful. Here are some of the best ways to learn a new language:

1. Immerse Yourself in the Language: One of the most effective ways to learn a new language is to immerse yourself in it as much as possible. This can be done by living in a country where the language is spoken, or by surrounding yourself with native speakers of the language. Immersion helps you to pick up the language naturally and develop your listening and speaking skills.

2. Take Language Classes: Taking language classes can provide you with a structured learning environment and guidance from a qualified teacher. Language classes can help you learn the grammar, vocabulary, and pronunciation of the language. Look for classes that focus on conversation practice to improve your speaking skills.

3. Practice Regularly: Consistency is key when learning a new language. S

{'query': 'What is the best way to learn a new language?',
 'agent_scratchpad': [],
 'output': "To learn a new language effectively, there are several strategies and techniques that can be helpful. Here are some of the best ways to learn a new language:\n\n1. Immerse Yourself in the Language: One of the most effective ways to learn a new language is to immerse yourself in it as much as possible. This can be done by living in a country where the language is spoken, or by surrounding yourself with native speakers of the language. Immersion helps you to pick up the language naturally and develop your listening and speaking skills.\n\n2. Take Language Classes: Taking language classes can provide you with a structured learning environment and guidance from a qualified teacher. Language classes can help you learn the grammar, vocabulary, and pronunciation of the language. Look for classes that focus on conversation practice to improve your speaking skills.\n\n3. Practice Regularly: Consisten

In [45]:
react_agent_executor.invoke({"query": "What is the best way to learn good programming practices?",
                             "agent_scratchpad": []})



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mTo learn good programming practices, there are several approaches you can take. Here are some of the best ways to learn and improve your programming skills:

1. Read Books and Online Resources: There are many books and online resources available that cover programming best practices. Some popular books include "Clean Code" by Robert C. Martin and "The Pragmatic Programmer" by Andrew Hunt and David Thomas. These resources provide valuable insights and guidelines on writing clean, maintainable, and efficient code.

2. Take Online Courses: Online learning platforms like Coursera, Udemy, and edX offer a wide range of programming courses that cover best practices. Look for courses that focus on software engineering principles, code quality, and design patterns. These courses often include practical exercises and assignments to help you apply what you've learned.

3. Practice Regularly: The more you practice programming, the better

{'query': 'What is the best way to learn good programming practices?',
 'agent_scratchpad': [],
 'output': 'To learn good programming practices, there are several approaches you can take. Here are some of the best ways to learn and improve your programming skills:\n\n1. Read Books and Online Resources: There are many books and online resources available that cover programming best practices. Some popular books include "Clean Code" by Robert C. Martin and "The Pragmatic Programmer" by Andrew Hunt and David Thomas. These resources provide valuable insights and guidelines on writing clean, maintainable, and efficient code.\n\n2. Take Online Courses: Online learning platforms like Coursera, Udemy, and edX offer a wide range of programming courses that cover best practices. Look for courses that focus on software engineering principles, code quality, and design patterns. These courses often include practical exercises and assignments to help you apply what you\'ve learned.\n\n3. Practice Re

In [47]:
react_agent_executor.invoke({"query": "Research the current most popular open source frameworks to build llm-based agents in Python and return 5 releavnt urls.",
                             "agent_scratchpad": []})



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `web_search` with `{'query': 'current most popular open source frameworks to build llm-based agents in Python'}`


[0m[36;1m[1;3m['https://github.com/Hannibal046/Awesome-LLM', 'https://www.simform.com/blog/open-source-llm/', 'https://odsc.medium.com/9-open-source-llms-and-agents-to-watch-728049d77060', 'https://www.unite.ai/best-open-source-llms/', 'https://neptune.ai/blog/best-open-source-mlops-tools', 'https://www.reddit.com/r/LocalLLaMA/comments/17m2lql/best_framework_for_llm_based_applications_in/', 'https://www.taskade.com/blog/top-autonomous-agents/', 'https://www.marktechpost.com/2024/01/03/10-open-source-tools-for-llm-applications-development/', 'https://www.datacamp.com/blog/top-open-source-llms', 'https://eng-shamsan.medium.com/large-language-model-evaluation-tools-2023-open-source-and-commercial-567525c57f82'][0m[32;1m[1;3mI have found some relevant URLs for you to explore:

1. [Awesome-LLM](https:

{'query': 'Research the current most popular open source frameworks to build llm-based agents in Python and return 5 releavnt urls.',
 'agent_scratchpad': [],
 'output': 'I have found some relevant URLs for you to explore:\n\n1. [Awesome-LLM](https://github.com/Hannibal046/Awesome-LLM)\n2. [Open Source LLM](https://www.simform.com/blog/open-source-llm/)\n3. [9 Open Source LLMs and Agents to Watch](https://odsc.medium.com/9-open-source-llms-and-agents-to-watch-728049d77060)\n4. [Best Open Source LLMs](https://www.unite.ai/best-open-source-llms/)\n5. [Best Open Source MLOps Tools](https://neptune.ai/blog/best-open-source-mlops-tools)\n\nYou can visit these URLs to learn more about the current most popular open source frameworks to build LLM-based agents in Python.'}

Now let's build this same agent but using a more in depth approach going one level down in the abstraction layers.

Now, let's go down the latter of abstraction and build this same agent but leveraging the lower-level capabilities of LangChain's LCEL language. 

In [35]:
from langchain_openai.chat_models import ChatOpenAI

In [36]:
llm_chat = ChatOpenAI(model="gpt-3.5-turbo-16k",temperature=0)

In [37]:
from langchain.agents import tool
from langchain.utilities import SerpAPIWrapper

search = SerpAPIWrapper()

@tool
def web_search(query: str):
    """Searches the web given a user query and returns the most relevant links"""
    output = search.results(query)
    results = output["organic_results"]
    return [r["link"] for r in results]


tools = [web_search]

In [38]:
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder

In [39]:
prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system", "You are a powerful research assistant. Given a user query you will perform research on the topic using the tools available."
        ),
        ("user", "{input}"),
        MessagesPlaceholder(variable_name="agent_scratchpad"),
    ]
)

# Bind tools to the LLM

In [40]:
from langchain_community.tools.convert_to_openai import format_tool_to_openai_tool

In [41]:
llm_with_tools = llm_chat.bind(tools=[format_tool_to_openai_tool(tool) for tool in tools])

Now, all we have to do is import two last utility functions:
- a component for formatting intermediate steps (agent action, tool output pairs) to input messages that can be sent to the model, 
- and a component for converting the output message into an agent action/agent finish.

In [42]:
from langchain.agents.format_scratchpad.openai_tools import format_to_openai_tool_messages
from langchain.agents.output_parsers.openai_tools import OpenAIToolsAgentOutputParser

In [43]:
agent = (
    {
        "input": lambda x: x["input"], 
        "agent_scratchpad": lambda x: format_to_openai_tool_messages(
            x["intermediate_steps"]
        ),
    }
    | prompt
    | llm_with_tools
    | OpenAIToolsAgentOutputParser()
)

In [44]:
from langchain.agents import AgentExecutor

agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)

In [46]:
list(agent_executor.stream({"input": "What are the latest open source LLMs available?"}))



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `web_search` with `{'query': 'latest open source LLMs'}`


[0m[36;1m[1;3m['https://github.com/eugeneyan/open-llms', 'https://www.datacamp.com/blog/top-open-source-llms', 'https://www.unite.ai/best-open-source-llms/', 'https://thenewstack.io/large-language-models-open-source-llms-in-2023/', 'https://github.com/Hannibal046/Awesome-LLM', 'https://medium.datadriveninvestor.com/list-of-open-source-large-language-models-llms-4eac551bda2e', 'https://www.techopedia.com/6-best-open-source-llms-to-watch-out-for-in-2024', 'https://www.upstage.ai/feed/insight/top-open-source-llms-2024', 'https://www.simform.com/blog/open-source-llm/', 'https://blog.n8n.io/open-source-llm/'][0m[32;1m[1;3mHere are some of the latest open source LLMs available:

1. [Open-LLMs](https://github.com/eugeneyan/open-llms): A collection of open source LLMs for various tasks.
2. [Top Open Source LLMs](https://www.datacamp.com/blog/top-open-source-l

[{'actions': [OpenAIToolAgentAction(tool='web_search', tool_input={'query': 'latest open source LLMs'}, log="\nInvoking: `web_search` with `{'query': 'latest open source LLMs'}`\n\n\n", message_log=[AIMessageChunk(content='', additional_kwargs={'tool_calls': [{'index': 0, 'id': 'call_vtpKs3zgTOcLOKczRz9poRpt', 'function': {'arguments': '{\n  "query": "latest open source LLMs"\n}', 'name': 'web_search'}, 'type': 'function'}]})], tool_call_id='call_vtpKs3zgTOcLOKczRz9poRpt')],
  'messages': [AIMessageChunk(content='', additional_kwargs={'tool_calls': [{'index': 0, 'id': 'call_vtpKs3zgTOcLOKczRz9poRpt', 'function': {'arguments': '{\n  "query": "latest open source LLMs"\n}', 'name': 'web_search'}, 'type': 'function'}]})]},
 {'steps': [AgentStep(action=OpenAIToolAgentAction(tool='web_search', tool_input={'query': 'latest open source LLMs'}, log="\nInvoking: `web_search` with `{'query': 'latest open source LLMs'}`\n\n\n", message_log=[AIMessageChunk(content='', additional_kwargs={'tool_calls

Now, what if we want the agent to actually read the links and return a research report rather than just the urls?

In [47]:
import requests
from bs4 import BeautifulSoup


@tool
def scrape_text(url: str):
    """Scrapes the text from a given URL and returns it."""
    # Send a GET request to the webpage
    try:
        response = requests.get(url)

        # Check if the request was successful
        if response.status_code == 200:
            # Parse the content of the request with BeautifulSoup
            soup = BeautifulSoup(response.text, "html.parser")

            # Extract all text from the webpage
            page_text = soup.get_text(separator=" ", strip=True)

            # Print the extracted text
            return page_text[:10000]
        else:
            return f"Failed to retrieve the webpage: Status code {response.status_code}"
    except Exception as e:
        print(e)
        return f"Failed to retrieve the webpage: {e}"

In [48]:
tools = [web_search, scrape_text]

In [49]:
llm_with_tools = llm_chat.bind(tools=[format_tool_to_openai_tool(tool) for tool in tools])

In [50]:
prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system", "You are a powerful research assistant. Given a user query you will perform research on the topic using the tools available."
        ),
        ("user", "Scrape and summarize the contents from these urls: {urls}"),
        MessagesPlaceholder(variable_name="agent_scratchpad"),
    ]
)

In [51]:
agent = (
    {
        "urls": lambda x: x["input"], 
        "agent_scratchpad": lambda x: format_to_openai_tool_messages(
            x["intermediate_steps"]
        ),
    }
    | prompt
    | llm_with_tools
    | OpenAIToolsAgentOutputParser()
)

In [52]:
from langchain.agents import AgentExecutor

agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)

In [53]:
agent_executor.invoke({"input": "What are the current best practices to deploy LLM agents to production?"})



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `scrape_text` with `{'url': 'https://towardsdatascience.com/deploying-llm-agents-to-production-best-practices-7e4e8e8e8e8e'}`


[0m[33;1m[1;3mFailed to retrieve the webpage: Status code 403[0m[32;1m[1;3m
Invoking: `scrape_text` with `{'url': 'https://towardsdatascience.com/deploying-llm-agents-to-production-best-practices-7e4e8e8e8e8e'}`


[0m[33;1m[1;3mFailed to retrieve the webpage: Status code 403[0m[32;1m[1;3m
Invoking: `scrape_text` with `{'url': 'https://towardsdatascience.com/deploying-llm-agents-to-production-best-practices-7e4e8e8e8e8e'}`


[0m[33;1m[1;3mFailed to retrieve the webpage: Status code 403[0m[32;1m[1;3m
Invoking: `web_search` with `{'query': 'best practices to deploy LLM agents to production'}`


[0m[36;1m[1;3m['https://medium.com/@aigeek_/best-practices-for-deploying-large-language-models-llms-in-production-fdc5bf240d6a', 'https://www.qwak.com/post/best-practices-when-depl

{'input': 'What are the current best practices to deploy LLM agents to production?',
 'output': 'Here are some resources that provide best practices for deploying LLM agents to production:\n\n1. [Best Practices for Deploying Large Language Models (LLMs) in Production](https://medium.com/@aigeek_/best-practices-for-deploying-large-language-models-llms-in-production-fdc5bf240d6a) - This Medium article discusses the challenges and considerations for deploying LLMs in production and provides best practices for prompt engineering, cost management, and monitoring.\n\n2. [Best Practices When Deploying LLMs in Production](https://www.qwak.com/post/best-practices-when-deploying-llms-in-production) - This blog post from Qwak provides insights into deploying LLMs in production, including considerations for privacy, security, and legal issues.\n\n3. [Deploying LLMs in Production: The Anatomy of LLM Applications](https://www.linkedin.com/pulse/deploying-llms-production-anatomy-llm-applications-xeno

We encounter a few issues when we try to just add the scrape function to the agent without any other consideration. 
How about we make this better by improving the inner chain of the agent?