# Building a research Agent with LangChain

In [15]:
from langchain_openai.chat_models import ChatOpenAI

In [16]:
llm_chat = ChatOpenAI(temperature=0)

In [17]:
from langchain.utilities import SerpAPIWrapper
from langchain.agents import tool

search = SerpAPIWrapper()

@tool
def web_search(query: str):
    """Searches the web given a user query and returns the most relevant links"""
    output = search.results(query)
    results = output["organic_results"]
    return [r["link"] for r in results]

web_search("Artificial Intelligence")

['https://en.wikipedia.org/wiki/Artificial_intelligence',
 'https://www.ibm.com/topics/artificial-intelligence',
 'https://www.techtarget.com/searchenterpriseai/definition/AI-Artificial-Intelligence',
 'https://builtin.com/artificial-intelligence',
 'https://cloud.google.com/learn/what-is-artificial-intelligence',
 'https://www.investopedia.com/terms/a/artificial-intelligence-ai.asp']

In [18]:
from langchain import hub

prompt = hub.pull("hwchase17/openai-functions-agent")

prompt

ChatPromptTemplate(input_variables=['agent_scratchpad', 'input'], input_types={'chat_history': typing.List[typing.Union[langchain_core.messages.ai.AIMessage, langchain_core.messages.human.HumanMessage, langchain_core.messages.chat.ChatMessage, langchain_core.messages.system.SystemMessage, langchain_core.messages.function.FunctionMessage, langchain_core.messages.tool.ToolMessage]], 'agent_scratchpad': typing.List[typing.Union[langchain_core.messages.ai.AIMessage, langchain_core.messages.human.HumanMessage, langchain_core.messages.chat.ChatMessage, langchain_core.messages.system.SystemMessage, langchain_core.messages.function.FunctionMessage, langchain_core.messages.tool.ToolMessage]]}, messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=[], template='You are a helpful assistant')), MessagesPlaceholder(variable_name='chat_history', optional=True), HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['input'], template='{input}')), MessagesPlaceholder(

In [19]:
prompt.input_variables

['agent_scratchpad', 'input']

In [20]:
prompt.input_types

{'chat_history': typing.List[typing.Union[langchain_core.messages.ai.AIMessage, langchain_core.messages.human.HumanMessage, langchain_core.messages.chat.ChatMessage, langchain_core.messages.system.SystemMessage, langchain_core.messages.function.FunctionMessage, langchain_core.messages.tool.ToolMessage]],
 'agent_scratchpad': typing.List[typing.Union[langchain_core.messages.ai.AIMessage, langchain_core.messages.human.HumanMessage, langchain_core.messages.chat.ChatMessage, langchain_core.messages.system.SystemMessage, langchain_core.messages.function.FunctionMessage, langchain_core.messages.tool.ToolMessage]]}

In [21]:
prompt.__dict__

{'name': None,
 'input_variables': ['agent_scratchpad', 'input'],
 'input_types': {'chat_history': typing.List[typing.Union[langchain_core.messages.ai.AIMessage, langchain_core.messages.human.HumanMessage, langchain_core.messages.chat.ChatMessage, langchain_core.messages.system.SystemMessage, langchain_core.messages.function.FunctionMessage, langchain_core.messages.tool.ToolMessage]],
  'agent_scratchpad': typing.List[typing.Union[langchain_core.messages.ai.AIMessage, langchain_core.messages.human.HumanMessage, langchain_core.messages.chat.ChatMessage, langchain_core.messages.system.SystemMessage, langchain_core.messages.function.FunctionMessage, langchain_core.messages.tool.ToolMessage]]},
 'output_parser': None,
 'partial_variables': {},
 'messages': [SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=[], template='You are a helpful assistant')),
  MessagesPlaceholder(variable_name='chat_history', optional=True),
  HumanMessagePromptTemplate(prompt=PromptTemplate(input

To build an agent I need to do 2 essential things:

1. Connect a model to tools
2. Set up a loop in which the model continuously tries to solve the problem by either just calling the llm or calling a tool, observing the result and repeating until finished.

In [22]:
prompt

ChatPromptTemplate(input_variables=['agent_scratchpad', 'input'], input_types={'chat_history': typing.List[typing.Union[langchain_core.messages.ai.AIMessage, langchain_core.messages.human.HumanMessage, langchain_core.messages.chat.ChatMessage, langchain_core.messages.system.SystemMessage, langchain_core.messages.function.FunctionMessage, langchain_core.messages.tool.ToolMessage]], 'agent_scratchpad': typing.List[typing.Union[langchain_core.messages.ai.AIMessage, langchain_core.messages.human.HumanMessage, langchain_core.messages.chat.ChatMessage, langchain_core.messages.system.SystemMessage, langchain_core.messages.function.FunctionMessage, langchain_core.messages.tool.ToolMessage]]}, messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=[], template='You are a helpful assistant')), MessagesPlaceholder(variable_name='chat_history', optional=True), HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['input'], template='{input}')), MessagesPlaceholder(

In [23]:
from langchain.agents import create_openai_tools_agent

tools = [web_search]

react_agent = create_openai_tools_agent(llm_chat, tools, prompt)

In [24]:
from langchain.agents import AgentExecutor


react_agent_executor = AgentExecutor(agent=react_agent, tools=tools, verbose=True)

In [25]:
react_agent_executor.invoke({"input": "What is the best way to learn a new language?",
                             "agent_scratchpad": []})



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThe best way to learn a new language may vary depending on your learning style and preferences. However, here are some effective strategies that can help:

1. Immerse yourself in the language: Surround yourself with the language as much as possible. Watch movies, TV shows, and videos in the target language, listen to podcasts or music, and try to have conversations with native speakers.

2. Take a structured course: Enroll in a language course, either online or in-person, that provides a structured curriculum and guidance from a qualified instructor. This can help you build a strong foundation and ensure you cover all the necessary aspects of the language.

3. Practice regularly: Consistency is key when learning a new language. Set aside dedicated time each day or week to practice listening, speaking, reading, and writing in the target language. Use language learning apps, flashcards, or language exchange platforms to practic

{'input': 'What is the best way to learn a new language?',
 'agent_scratchpad': [],
 'output': "The best way to learn a new language may vary depending on your learning style and preferences. However, here are some effective strategies that can help:\n\n1. Immerse yourself in the language: Surround yourself with the language as much as possible. Watch movies, TV shows, and videos in the target language, listen to podcasts or music, and try to have conversations with native speakers.\n\n2. Take a structured course: Enroll in a language course, either online or in-person, that provides a structured curriculum and guidance from a qualified instructor. This can help you build a strong foundation and ensure you cover all the necessary aspects of the language.\n\n3. Practice regularly: Consistency is key when learning a new language. Set aside dedicated time each day or week to practice listening, speaking, reading, and writing in the target language. Use language learning apps, flashcards, 

In [26]:
react_agent_executor.invoke({"input": "What is the best way to learn good programming practices?",
                             "agent_scratchpad": []})



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThe best way to learn good programming practices is through a combination of studying and hands-on practice. Here are some steps you can take:

1. Read books and online resources: There are many books and online resources available that cover programming best practices. Some popular books include "Clean Code" by Robert C. Martin and "The Pragmatic Programmer" by Andrew Hunt and David Thomas. These resources can provide you with a solid foundation in programming principles and best practices.

2. Take online courses or tutorials: Online courses and tutorials can provide structured learning experiences and help you understand programming best practices in a practical way. Platforms like Coursera, Udemy, and Codecademy offer a wide range of programming courses that cover best practices.

3. Join coding communities: Participating in coding communities, such as forums, online communities, and coding meetups, can expose you to diff

{'input': 'What is the best way to learn good programming practices?',
 'agent_scratchpad': [],
 'output': 'The best way to learn good programming practices is through a combination of studying and hands-on practice. Here are some steps you can take:\n\n1. Read books and online resources: There are many books and online resources available that cover programming best practices. Some popular books include "Clean Code" by Robert C. Martin and "The Pragmatic Programmer" by Andrew Hunt and David Thomas. These resources can provide you with a solid foundation in programming principles and best practices.\n\n2. Take online courses or tutorials: Online courses and tutorials can provide structured learning experiences and help you understand programming best practices in a practical way. Platforms like Coursera, Udemy, and Codecademy offer a wide range of programming courses that cover best practices.\n\n3. Join coding communities: Participating in coding communities, such as forums, online co

In [27]:
react_agent_executor.invoke({"input": "Research the current most popular open source frameworks to build llm-based agents in Python and return 5 releavnt urls.",
                             "agent_scratchpad": []})



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `web_search` with `{'query': 'most popular open source frameworks to build llm-based agents in Python'}`


[0m[36;1m[1;3m['https://github.com/kaushikb11/awesome-llm-agents', 'https://www.taskade.com/blog/top-autonomous-agents/', 'https://www.unite.ai/best-open-source-llms/', 'https://www.reddit.com/r/LocalLLaMA/comments/17m2lql/best_framework_for_llm_based_applications_in/', 'https://github.com/tensorchord/Awesome-LLMOps', 'https://pub.towardsai.net/inside-agents-the-new-open-source-framework-for-building-semi-autonomous-llm-agents-a78ffa172902', 'https://www.marktechpost.com/2024/01/03/10-open-source-tools-for-llm-applications-development/', 'https://sourceforge.net/directory/large-language-models-llm/python/', 'https://medium.com/@bijit211987/top-llm-dev-tool-and-when-to-use-them-in-your-ai-stack-622a651ece06'][0m[32;1m[1;3mHere are 5 relevant URLs for the most popular open source frameworks to build LLM-ba

{'input': 'Research the current most popular open source frameworks to build llm-based agents in Python and return 5 releavnt urls.',
 'agent_scratchpad': [],
 'output': 'Here are 5 relevant URLs for the most popular open source frameworks to build LLM-based agents in Python:\n\n1. [Awesome LLM Agents](https://github.com/kaushikb11/awesome-llm-agents)\n2. [Top Autonomous Agents](https://www.taskade.com/blog/top-autonomous-agents/)\n3. [Best Open Source LLMs](https://www.unite.ai/best-open-source-llms/)\n4. [Best Framework for LLM-based Applications](https://www.reddit.com/r/LocalLLaMA/comments/17m2lql/best_framework_for_llm_based_applications_in/)\n5. [Awesome LLMOps](https://github.com/tensorchord/Awesome-LLMOps)\n\nYou can visit these URLs to learn more about the frameworks and choose the one that suits your needs.'}

Now let's build this same agent but using a more in depth approach going one level down in the abstraction layers.

Now, let's go down the latter of abstraction and build this same agent but leveraging the lower-level capabilities of LangChain's LCEL language. 

In [28]:
from langchain_openai.chat_models import ChatOpenAI

In [29]:
llm_chat = ChatOpenAI(model="gpt-3.5-turbo-16k",temperature=0)

In [30]:
from langchain.agents import tool
from langchain.utilities import SerpAPIWrapper

search = SerpAPIWrapper()

@tool
def web_search(query: str):
    """Searches the web given a user query and returns the most relevant links"""
    output = search.results(query)
    results = output["organic_results"]
    return [r["link"] for r in results]


tools = [web_search]

In [31]:
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder

In [32]:
prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system", "You are a powerful research assistant. Given a user query you will perform research on the topic using the tools available."
        ),
        ("user", "{input}"),
        MessagesPlaceholder(variable_name="agent_scratchpad"),
    ]
)

# Bind tools to the LLM

In [33]:
from langchain_community.tools.convert_to_openai import format_tool_to_openai_tool

In [34]:
llm_with_tools = llm_chat.bind(tools=[format_tool_to_openai_tool(tool) for tool in tools])

Now, all we have to do is import two last utility functions:
- a component for formatting intermediate steps (agent action, tool output pairs) to input messages that can be sent to the model, 
- and a component for converting the output message into an agent action/agent finish.

In [35]:
from langchain.agents.format_scratchpad.openai_tools import format_to_openai_tool_messages
from langchain.agents.output_parsers.openai_tools import OpenAIToolsAgentOutputParser

In [36]:
agent = (
    {
        "input": lambda x: x["input"], 
        "agent_scratchpad": lambda x: format_to_openai_tool_messages(
            x["intermediate_steps"]
        ),
    }
    | prompt
    | llm_with_tools
    | OpenAIToolsAgentOutputParser()
)

In [37]:
from langchain.agents import AgentExecutor

agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)

In [38]:
list(agent_executor.stream({"input": "What are the latest open source LLMs available?"}))



[1m> Entering new AgentExecutor chain...[0m


[32;1m[1;3m
Invoking: `web_search` with `{'query': 'latest open source LLMs'}`


[0m[36;1m[1;3m['https://github.com/eugeneyan/open-llms', 'https://www.datacamp.com/blog/top-open-source-llms', 'https://www.unite.ai/best-open-source-llms/', 'https://thenewstack.io/large-language-models-open-source-llms-in-2023/', 'https://medium.datadriveninvestor.com/list-of-open-source-large-language-models-llms-4eac551bda2e', 'https://github.com/Hannibal046/Awesome-LLM', 'https://www.techopedia.com/6-best-open-source-llms-to-watch-out-for-in-2024', 'https://www.upstage.ai/feed/insight/top-open-source-llms-2024', 'https://www.simform.com/blog/open-source-llm/', 'https://blog.n8n.io/open-source-llm/'][0m[32;1m[1;3mHere are some of the latest open source LLMs available:

1. [Open-LLMs](https://github.com/eugeneyan/open-llms): A collection of open source LLMs on GitHub.

2. [Top Open Source LLMs](https://www.datacamp.com/blog/top-open-source-llms): A blog post that lists some of the top open sourc

[{'actions': [OpenAIToolAgentAction(tool='web_search', tool_input={'query': 'latest open source LLMs'}, log="\nInvoking: `web_search` with `{'query': 'latest open source LLMs'}`\n\n\n", message_log=[AIMessageChunk(content='', additional_kwargs={'tool_calls': [{'index': 0, 'id': 'call_Hj4El1AUkwDYVWnwuFZ3LqVh', 'function': {'arguments': '{\n  "query": "latest open source LLMs"\n}', 'name': 'web_search'}, 'type': 'function'}]})], tool_call_id='call_Hj4El1AUkwDYVWnwuFZ3LqVh')],
  'messages': [AIMessageChunk(content='', additional_kwargs={'tool_calls': [{'index': 0, 'id': 'call_Hj4El1AUkwDYVWnwuFZ3LqVh', 'function': {'arguments': '{\n  "query": "latest open source LLMs"\n}', 'name': 'web_search'}, 'type': 'function'}]})]},
 {'steps': [AgentStep(action=OpenAIToolAgentAction(tool='web_search', tool_input={'query': 'latest open source LLMs'}, log="\nInvoking: `web_search` with `{'query': 'latest open source LLMs'}`\n\n\n", message_log=[AIMessageChunk(content='', additional_kwargs={'tool_calls

Now, what if we want the agent to actually read the links and return a research report rather than just the urls?

In [39]:
import requests
from bs4 import BeautifulSoup


@tool
def scrape_text(url: str):
    """Scrapes the text from a given URL and returns it."""
    # Send a GET request to the webpage
    try:
        response = requests.get(url)

        # Check if the request was successful
        if response.status_code == 200:
            # Parse the content of the request with BeautifulSoup
            soup = BeautifulSoup(response.text, "html.parser")

            # Extract all text from the webpage
            page_text = soup.get_text(separator=" ", strip=True)

            # Print the extracted text
            return page_text[:10000]
        else:
            return f"Failed to retrieve the webpage: Status code {response.status_code}"
    except Exception as e:
        print(e)
        return f"Failed to retrieve the webpage: {e}"

In [40]:
tools = [web_search, scrape_text]

In [41]:
llm_with_tools = llm_chat.bind(tools=[format_tool_to_openai_tool(tool) for tool in tools])

In [42]:
prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system", "You are a powerful research assistant. Given a user query you will perform research on the topic using the tools available."
        ),
        ("user", "Scrape and summarize the contents from these urls: {urls}"),
        MessagesPlaceholder(variable_name="agent_scratchpad"),
    ]
)

In [43]:
agent = (
    {
        "urls": lambda x: x["input"], 
        "agent_scratchpad": lambda x: format_to_openai_tool_messages(
            x["intermediate_steps"]
        ),
    }
    | prompt
    | llm_with_tools
    | OpenAIToolsAgentOutputParser()
)

In [44]:
from langchain.agents import AgentExecutor

agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)

In [45]:
agent_executor.invoke({"input": "What are the current best practices to deploy LLM agents to production?"})



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `scrape_text` with `{'url': 'https://towardsdatascience.com/deploying-llm-agents-to-production-best-practices-7e4e8e8e8e8e'}`


[0m[33;1m[1;3mMedium Open in app Sign up Sign in Write Sign up Sign in PAGE NOT FOUND 404 Out of nothing, something. You can find (just about) anything on Medium — apparently even a page that doesn’t exist. Maybe these stories about finding what you didn’t know you were looking for will take you somewhere new? Home How To Get Lost Paul Greenberg Feb 22, 2022 · 11 min read Member-only How To Get Lost Paul Greenberg Feb 22, 2022 · 11 min read Member-only Lost in Conversation Omar Merza in The Coffeelicious Sep 19, 2017 · 5 min read Lost in Conversation Omar Merza in The Coffeelicious Sep 19, 2017 · 5 min read The Lost Photographs of Penang Ted Anthony in Stuff Dot Life Sep 19, 2017 · 10 min read Member-only The Lost Photographs of Penang Ted Anthony in Stuff Dot Life Sep 19, 2017 · 10 min

{'input': 'What are the current best practices to deploy LLM agents to production?',
 'output': "Here are some best practices for deploying LLM agents to production:\n\n1. Addressing the ambiguity of natural languages: LLMs can sometimes produce inconsistent and unexpected outputs, so prompt evaluation becomes crucial to ensure that the model understands the given examples and doesn't overfit to them. Prompt versioning and optimization also play vital roles in maintaining performance and cost-effectiveness.\n\n2. Cost and latency considerations: Longer prompts increase the cost of inference, while the length of the output directly impacts latency. It's important to analyze and optimize cost and latency for LLM applications, keeping in mind that the analysis can quickly become outdated due to the rapid evolution of the field.\n\n3. Different approaches: Prompting, finetuning, and prompt tuning are different approaches that can be employed while working with LLMs. Prompting requires only

We encounter a few issues when we try to just add the scrape function to the agent without any other consideration. 
How about we make this better by improving the inner chain of the agent?

In [46]:
from langchain.prompts import MessagesPlaceholder

MEMORY_KEY = "chat_history"
prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are very powerful assistant, but bad at calculating lengths of words.",
        ),
        MessagesPlaceholder(variable_name=MEMORY_KEY),
        ("user", "{input}"),
        MessagesPlaceholder(variable_name="agent_scratchpad"),
    ]
)

from langchain_core.messages import AIMessage, HumanMessage

chat_history = []

agent = (
    {
        "input": lambda x: x["input"],
        "agent_scratchpad": lambda x: format_to_openai_tool_messages(
            x["intermediate_steps"]
        ),
        "chat_history": lambda x: x["chat_history"],
    }
    | prompt
    | llm_with_tools
    | OpenAIToolsAgentOutputParser()
)
agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)

input1 = "how many letters in the word educa?"
result = agent_executor.invoke({"input": input1, "chat_history": chat_history})
chat_history.extend(
    [
        HumanMessage(content=input1),
        AIMessage(content=result["output"]),
    ]
)
agent_executor.invoke({"input": "is that a real word?", "chat_history": chat_history})



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThe word "educa" has 5 letters.[0m

[1m> Finished chain.[0m


[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `web_search` with `{'query': 'define educa'}`


[0m[36;1m[1;3m['https://www.merriam-webster.com/dictionary/education', 'https://en.wikipedia.org/wiki/Education', 'https://www.dictionary.com/browse/education', 'https://www.britannica.com/topic/education', 'https://dictionary.cambridge.org/us/dictionary/english/education', 'https://www.oxfordlearnersdictionaries.com/us/definition/english/education', 'https://learn.org/articles/What_is_Education.html', 'https://en.wikipedia.org/wiki/Definitions_of_education', 'https://link.springer.com/chapter/10.1007/978-3-030-83925-3_3', 'https://www.collinsdictionary.com/us/dictionary/english/education'][0m[32;1m[1;3mYes, "educa" is not a recognized word in the English language. It seems to be a misspelling or a truncated form of the word "education".[0

{'input': 'is that a real word?',
 'chat_history': [HumanMessage(content='how many letters in the word educa?'),
  AIMessage(content='The word "educa" has 5 letters.')],
 'output': 'Yes, "educa" is not a recognized word in the English language. It seems to be a misspelling or a truncated form of the word "education".'}