# Building a research Agent with LangChain

In [54]:
!pip install langchain
!pip install langchain-community
!pip install langchain-openai
!pip install langchainhub
!pip install beautifulsoup4

In [90]:
# uncomment this if running locally
# from dotenv import load_dotenv

# load_dotenv()

# FILEPATH: /Users/greatmaster/Desktop/projects/oreilly-live-trainings/oreilly_live_training_agents/notebooks/1.3-intro-agents-with-langchain.ipynb
# SET YOUR API KEYS FOR LANGSMITH AND TAVILY TO RUN THIS NOTEBOOK!
# langsmith: https://docs.smith.langchain.com/#:~:text=install%20-U%20langsmith-,Create%20an%20API%20key,click%20Create%20API%20Key.,-Setup%20your%20environment
# tavily: https://app.tavily.com/sign-in
# openai: https://platform.openai.com/docs/quickstart
import os
os.environ["LANGCHAIN_TRACING_V2"] = "true"
# os.environ["LANGCHAIN_API_KEY"] = "your-api-key"
# os.environ["TAVILY_API_KEY"] = "your-api-key"
# os.environ["OPENAI_API_KEY"] = 'your-api-key'

# EXERCISE: Create a simple agent that performs a productive task, whatever that means for you.

In [1]:
from langchain_openai import ChatOpenAI

In [2]:
llm_chat = ChatOpenAI(temperature=0)

In [3]:
from langchain_community.tools.tavily_search import TavilySearchResults
from langchain.agents import tool

search = TavilySearchResults()

results = search.invoke("biggest moutains in the world")
results

[{'url': 'https://en.wikipedia.org/wiki/List_of_highest_mountains_on_Earth',
  'content': "88 | 4\n87 |\n86 | 1\n85 | 8 1\n84 | 8\n83 |\n82 |\n81 | 8 6 6 2\n80 | 9 8 5 3 2\n79 | 5 4 3 3\n78 | 9 8 7 6 2 2 1 0\n77 | 9 9 8 8 5 5 4 1 0 0\n76 |\n9 7 6 6 4 1 1\n75 | 7 7 5 5 4 4 4 3 3 1 1\n74 | 9 9 9 9 7 6 6 6 5 5 3 3 2 2 2 1 1 1 0 0 0\n73 | 8 8 8 8 8 6 6 5 5 5 4 2 1 1 1 0\n72 | 9 9 9 8 8 8 8 7 7 6 6 5 4 4 4 3 2 2 1 1 0 0 0 0 0 0 0\nGallery[edit]\n1. All peaks 7,000\xa0m (23,000\xa0ft) or higher are located in East, Central or South Asia in a rectangle edged by Noshaq (7,492\xa0m or 24,580\xa0ft) on the Afghanistan–Pakistan border in the west, Jengish Chokusu (Tuōmù'ěr Fēng, 7,439\xa0m or 24,406\xa0ft) on the Kyrgyzstan–Xinjiang border to the north, Gongga Shan (Minya Konka, 7,556\xa0m or 24,790\xa0ft) in Sichuan to the east, and Kabru (7,412\xa0m or 24,318\xa0ft) on the Sikkim–Nepal border to the south.\n Mauna Loa (4,169\xa0m or 13,678\xa0ft) is the largest mountain on Earth in terms of bas

In [5]:
from langchain_community.tools.tavily_search import TavilySearchResults
from langchain.agents import tool

search = TavilySearchResults()

@tool
def web_search(query: str):
    """Searches the web given a user query and returns the most relevant links"""
    output = search.invoke(query)
    return [r['url'] for r in output if type(r)==dict]

web_search.invoke("Current resources on why pancakes are the best breakfast")

['https://www.kingarthurbaking.com/blog/2021/10/26/best-pancake-tips',
 'https://www.bhg.com/scrambled-pancakes-8674832',
 'https://www.medicinenet.com/are_pancakes_healthy_or_unhealthy_breakfast_food/article.htm',
 'https://www.chefsresource.com/faq/is-pancakes-good-for-breakfast/',
 'https://www.tastingtable.com/940660/why-pancakes-taste-better-at-a-diner-than-at-home/']

In [6]:
from langchain import hub

prompt = hub.pull("hwchase17/openai-tools-agent")

prompt

ChatPromptTemplate(input_variables=['agent_scratchpad', 'input'], input_types={'chat_history': typing.List[typing.Union[langchain_core.messages.ai.AIMessage, langchain_core.messages.human.HumanMessage, langchain_core.messages.chat.ChatMessage, langchain_core.messages.system.SystemMessage, langchain_core.messages.function.FunctionMessage, langchain_core.messages.tool.ToolMessage]], 'agent_scratchpad': typing.List[typing.Union[langchain_core.messages.ai.AIMessage, langchain_core.messages.human.HumanMessage, langchain_core.messages.chat.ChatMessage, langchain_core.messages.system.SystemMessage, langchain_core.messages.function.FunctionMessage, langchain_core.messages.tool.ToolMessage]]}, metadata={'lc_hub_owner': 'hwchase17', 'lc_hub_repo': 'openai-tools-agent', 'lc_hub_commit_hash': 'c18672812789a3b9697656dd539edf0120285dcae36396d0b548ae42a4ed66f5'}, messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=[], template='You are a helpful assistant')), MessagesPlacehold

In [7]:
prompt.input_variables

['agent_scratchpad', 'input']

In [8]:
prompt.input_types

{'chat_history': typing.List[typing.Union[langchain_core.messages.ai.AIMessage, langchain_core.messages.human.HumanMessage, langchain_core.messages.chat.ChatMessage, langchain_core.messages.system.SystemMessage, langchain_core.messages.function.FunctionMessage, langchain_core.messages.tool.ToolMessage]],
 'agent_scratchpad': typing.List[typing.Union[langchain_core.messages.ai.AIMessage, langchain_core.messages.human.HumanMessage, langchain_core.messages.chat.ChatMessage, langchain_core.messages.system.SystemMessage, langchain_core.messages.function.FunctionMessage, langchain_core.messages.tool.ToolMessage]]}

In [9]:
prompt.__dict__

{'name': None,
 'input_variables': ['agent_scratchpad', 'input'],
 'input_types': {'chat_history': typing.List[typing.Union[langchain_core.messages.ai.AIMessage, langchain_core.messages.human.HumanMessage, langchain_core.messages.chat.ChatMessage, langchain_core.messages.system.SystemMessage, langchain_core.messages.function.FunctionMessage, langchain_core.messages.tool.ToolMessage]],
  'agent_scratchpad': typing.List[typing.Union[langchain_core.messages.ai.AIMessage, langchain_core.messages.human.HumanMessage, langchain_core.messages.chat.ChatMessage, langchain_core.messages.system.SystemMessage, langchain_core.messages.function.FunctionMessage, langchain_core.messages.tool.ToolMessage]]},
 'output_parser': None,
 'partial_variables': {},
 'metadata': {'lc_hub_owner': 'hwchase17',
  'lc_hub_repo': 'openai-tools-agent',
  'lc_hub_commit_hash': 'c18672812789a3b9697656dd539edf0120285dcae36396d0b548ae42a4ed66f5'},
 'tags': None,
 'messages': [SystemMessagePromptTemplate(prompt=PromptTempl

To build an agent I need to do 2 essential things:

1. Connect a model to tools
2. Set up a loop in which the model continuously tries to solve the problem by either just calling the llm or calling a tool, observing the result and repeating until finished.

In [10]:
prompt

ChatPromptTemplate(input_variables=['agent_scratchpad', 'input'], input_types={'chat_history': typing.List[typing.Union[langchain_core.messages.ai.AIMessage, langchain_core.messages.human.HumanMessage, langchain_core.messages.chat.ChatMessage, langchain_core.messages.system.SystemMessage, langchain_core.messages.function.FunctionMessage, langchain_core.messages.tool.ToolMessage]], 'agent_scratchpad': typing.List[typing.Union[langchain_core.messages.ai.AIMessage, langchain_core.messages.human.HumanMessage, langchain_core.messages.chat.ChatMessage, langchain_core.messages.system.SystemMessage, langchain_core.messages.function.FunctionMessage, langchain_core.messages.tool.ToolMessage]]}, metadata={'lc_hub_owner': 'hwchase17', 'lc_hub_repo': 'openai-tools-agent', 'lc_hub_commit_hash': 'c18672812789a3b9697656dd539edf0120285dcae36396d0b548ae42a4ed66f5'}, messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=[], template='You are a helpful assistant')), MessagesPlacehold

In [11]:
from langchain.agents import create_tool_calling_agent

tools = [web_search]

agent = create_tool_calling_agent(llm_chat, tools, prompt)

In [12]:
from langchain.agents import AgentExecutor


agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)

In [13]:
agent_executor.invoke({"input": "What are good resources to learn about learning new languages?",
                             "agent_scratchpad": []})



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `web_search` with `{'query': 'resources to learn new languages'}`


[0m[36;1m[1;3m['https://nymag.com/strategist/article/how-to-learn-languages-at-home.html', 'https://www.pcmag.com/picks/the-best-language-learning-software', 'https://www.fluentu.com/blog/learn/free-language-learning-websites/', 'https://www.businessinsider.com/guides/learning/best-online-apps-courses-to-learn-new-language?op=1', 'https://www.lifewire.com/best-free-language-learning-websites-1357061'][0m[32;1m[1;3mI found some resources that can help you learn new languages:

1. [How to Learn Languages at Home - NYMag](https://nymag.com/strategist/article/how-to-learn-languages-at-home.html)
2. [The Best Language Learning Software - PCMag](https://www.pcmag.com/picks/the-best-language-learning-software)
3. [Free Language Learning Websites - FluentU](https://www.fluentu.com/blog/learn/free-language-learning-websites/)
4. [Best Online Apps and 

{'input': 'What are good resources to learn about learning new languages?',
 'agent_scratchpad': [],
 'output': 'I found some resources that can help you learn new languages:\n\n1. [How to Learn Languages at Home - NYMag](https://nymag.com/strategist/article/how-to-learn-languages-at-home.html)\n2. [The Best Language Learning Software - PCMag](https://www.pcmag.com/picks/the-best-language-learning-software)\n3. [Free Language Learning Websites - FluentU](https://www.fluentu.com/blog/learn/free-language-learning-websites/)\n4. [Best Online Apps and Courses to Learn New Language - Business Insider](https://www.businessinsider.com/guides/learning/best-online-apps-courses-to-learn-new-language?op=1)\n5. [Best Free Language Learning Websites - Lifewire](https://www.lifewire.com/best-free-language-learning-websites-1357061)\n\nYou can explore these resources to find the best fit for your language learning journey.'}

In [14]:
from langchain.agents import AgentExecutor, create_react_agent

prompt = hub.pull("hwchase17/react")

agent = create_react_agent(llm_chat, tools, prompt)

react_agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)

react_agent_executor.invoke({"input": "What is the best way to learn good programming practices?",
                             "agent_scratchpad": []})



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mLearning good programming practices is essential for becoming a successful programmer.
Action: web_search
Action Input: "best way to learn good programming practices"[0m[36;1m[1;3m['https://www.datacamp.com/tutorial/coding-best-practices-and-guidelines', 'https://www.educative.io/blog/coding-best-practices', 'https://www.freecodecamp.org/news/learn-coding-for-everyone-handbook/', 'https://www.freecodecamp.org/news/how-to-learn-programming/', 'https://careerfoundry.com/en/blog/web-development/ways-to-learn-to-code/'][0m[32;1m[1;3mThere are several resources available online that can help with learning good programming practices.
Action: web_search
Action Input: "coding best practices tutorial"[0m[36;1m[1;3m['https://www.datacamp.com/tutorial/coding-best-practices-and-guidelines', 'https://www.freecodecamp.org/news/how-to-write-clean-code/', 'https://www.freecodecamp.org/news/clean-coding-for-beginners/', 'https://www.

{'input': 'What is the best way to learn good programming practices?',
 'agent_scratchpad': [],
 'output': 'The best way to learn good programming practices is to explore tutorials and guides available online.'}

Now let's build this same agent but using a more in depth approach going one level down in the abstraction layers.

Now, let's go down the latter of abstraction and build this same agent but leveraging the lower-level capabilities of LangChain's LCEL language. 

In [15]:
from langchain_openai import ChatOpenAI

In [16]:
llm_chat = ChatOpenAI(model="gpt-4o",temperature=0)

In [17]:
from langchain_community.tools.tavily_search import TavilySearchResults
from langchain.agents import tool

search = TavilySearchResults()

@tool
def web_search(query: str):
    """Searches the web given a user query and returns the most relevant links"""
    output = search.invoke(query)
    return [r['url'] for r in output if type(r)==dict]


tools = [web_search]

In [18]:
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder

In [19]:
prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system", "You are a powerful research assistant.\
            Given a user query you will perform research on the \
                topic using the tools available."
        ),
        ("user", "{input}"),
        MessagesPlaceholder(variable_name="agent_scratchpad"),
    ]
)

# Bind tools to the LLM

In [20]:
llm_with_tools = llm_chat.bind_tools(tools)

Now, all we have to do is import two last utility functions:
- a component for formatting intermediate steps (agent action, tool output pairs) to input messages that can be sent to the model, 
- and a component for converting the output message into an agent action/agent finish.

In [22]:
from langchain.agents.format_scratchpad.tools import format_to_tool_messages
from langchain.agents.output_parsers import ToolsAgentOutputParser

In [23]:
agent = (
    {
        "input": lambda x: x["input"], 
        "agent_scratchpad": lambda x: format_to_tool_messages(
            x["intermediate_steps"]
        ),
    }
    | prompt
    | llm_with_tools
    | ToolsAgentOutputParser()
)

In [24]:
from langchain.agents import AgentExecutor

agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)

In [25]:
list(agent_executor.stream({"input": "What are some good resources about open source large language models?"}))



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `web_search` with `{'query': 'open source large language models resources'}`


[0m[36;1m[1;3m['https://llama.meta.com/llama2/', 'https://llama.meta.com/', 'https://ai.meta.com/blog/meta-llama-3/', 'https://www.datacamp.com/blog/top-open-source-llms', 'https://github.com/Hannibal046/Awesome-LLM'][0m[32;1m[1;3m
Invoking: `web_search` with `{'query': 'best open source large language models 2023'}`


[0m[36;1m[1;3m['https://huggingface.co/blog/2023-in-llms', 'https://cheatsheet.md/llm-leaderboard/best-open-source-llm.en', 'https://www.shakudo.io/blog/comparing-opensource-large-language-models', 'https://www.lakera.ai/blog/open-source-llms', 'https://github.com/eugeneyan/open-llms'][0m[32;1m[1;3mHere are some valuable resources about open-source large language models (LLMs):

1. **Meta's LLaMA (Large Language Model Meta AI)**
   - [LLaMA 2](https://llama.meta.com/llama2/)
   - [Meta AI Blog on LLaMA](https:/

[{'actions': [ToolAgentAction(tool='web_search', tool_input={'query': 'open source large language models resources'}, log="\nInvoking: `web_search` with `{'query': 'open source large language models resources'}`\n\n\n", message_log=[AIMessageChunk(content='', additional_kwargs={'tool_calls': [{'index': 0, 'id': 'call_CI0E1IbpFOQW4Im33wjwiGDq', 'function': {'arguments': '{"query": "open source large language models resources"}', 'name': 'web_search'}, 'type': 'function'}, {'index': 1, 'id': 'call_ltgbCa2tbtah5WScrGD0LxVc', 'function': {'arguments': '{"query": "best open source large language models 2023"}', 'name': 'web_search'}, 'type': 'function'}]}, response_metadata={'finish_reason': 'tool_calls'}, id='run-2f237d5b-b692-4adc-a232-b68d03c15edd', tool_calls=[{'name': 'web_search', 'args': {'query': 'open source large language models resources'}, 'id': 'call_CI0E1IbpFOQW4Im33wjwiGDq'}, {'name': 'web_search', 'args': {'query': 'best open source large language models 2023'}, 'id': 'call_

Now, what if we want the agent to actually read the links and return a research report rather than just the urls?

In [26]:
import requests
from bs4 import BeautifulSoup


@tool
def scrape_text(url: str):
    """Scrapes the text from a given URL and returns it."""
    # Send a GET request to the webpage
    try:
        response = requests.get(url)

        # Check if the request was successful
        if response.status_code == 200:
            # Parse the content of the request with BeautifulSoup
            soup = BeautifulSoup(response.text, "html.parser")

            # Extract all text from the webpage
            page_text = soup.get_text(separator=" ", strip=True)

            # Print the extracted text
            return page_text[:10000]
        else:
            return f"Failed to retrieve the webpage: Status code {response.status_code}"
    except Exception as e:
        print(e)
        return f"Failed to retrieve the webpage: {e}"

In [27]:
tools = [web_search, scrape_text]

In [28]:
llm_with_tools = llm_chat.bind_tools(tools)

In [29]:
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder

In [30]:
prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system", "You are a powerful research assistant. Given a user query you will perform research on the topic using the tools available."
        ),
        ("user", "Scrape and summarize the contents from these urls: {input}"),
        MessagesPlaceholder(variable_name="agent_scratchpad"),
    ]
)

In [31]:
agent = (
    {
        "input": lambda x: x["input"], 
        "agent_scratchpad": lambda x: format_to_tool_messages(
            x["intermediate_steps"]
        ),
    }
    | prompt
    | llm_with_tools
    | ToolsAgentOutputParser()
)

In [32]:
from langchain.agents import AgentExecutor

agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)

In [34]:
agent_executor.invoke({"input": "How to build a simple research agent using langchain in Python?"})



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `web_search` with `{'query': 'How to build a simple research agent using langchain in Python?'}`


[0m[36;1m[1;3m['https://python.langchain.com/v0.2/docs/tutorials/agents/', 'https://build5nines.com/introduction-to-building-ai-agents-with-langchain-and-python/', 'https://towardsdatascience.com/building-a-simple-agent-with-tools-and-toolkits-in-langchain-77e0f9bd1fa5', 'https://dev.to/timesurgelabs/how-to-make-an-ai-agent-in-10-minutes-with-langchain-3i2n', 'https://analyzingalpha.com/langchain-python-tutorial'][0m[32;1m[1;3m
Invoking: `scrape_text` with `{'url': 'https://python.langchain.com/v0.2/docs/tutorials/agents/'}`


[0m[33;1m[1;3mBuild an Agent | 🦜️🔗 LangChain Skip to main content LangChain 0.2 is out! Leave feedback on the v0.2 docs here . You can view the v0.1 docs here . Integrations API Reference More People Contributing Templates Cookbooks 3rd party tutorials YouTube arXiv v0.2 v0.2 v0.1 🦜️🔗 L

{'input': 'How to build a simple research agent using langchain in Python?',
 'output': "Here are the summaries of the contents from the URLs on how to build a simple research agent using LangChain in Python:\n\n1. **LangChain Documentation: Build an Agent**\n   - **Overview**: This guide provides a comprehensive tutorial on building an agent using LangChain. It covers the prerequisites, including familiarity with chat models, tools, and agents.\n   - **Key Components**:\n     - **Chat Models**: Used as reasoning engines to determine actions.\n     - **Tools**: Specialized functions or APIs for specific tasks.\n     - **Agents**: Systems that use LLMs to decide which tools to use and how to apply them.\n   - **Example**: The tutorial includes a code snippet for creating an agent that interacts with a search engine, demonstrating how to set up the agent, integrate tools, and use conversational memory.\n   - **Steps**:\n     1. Import necessary functionalities.\n     2. Create the agent 

We encounter a few issues when we try to just add the scrape function to the agent without any other consideration. 
How about we make this better by improving the inner chain of the agent?

In [35]:
from langchain.prompts import MessagesPlaceholder

MEMORY_KEY = "chat_history"
prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are very powerful assistant, but bad at calculating lengths of words.",
        ),
        MessagesPlaceholder(variable_name=MEMORY_KEY),
        ("user", "{input}"),
        MessagesPlaceholder(variable_name="agent_scratchpad"),
    ]
)

from langchain_core.messages import AIMessage, HumanMessage

chat_history = []

agent = (
    {
        "input": lambda x: x["input"],
        "agent_scratchpad": lambda x: format_to_tool_messages(
            x["intermediate_steps"]
        ),
        "chat_history": lambda x: x["chat_history"],
    }
    | prompt
    | llm_with_tools
    | ToolsAgentOutputParser()
)
agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)

input1 = "What are good resources on learning about the langchain framework?"
result = agent_executor.invoke({"input": input1, "chat_history": chat_history})
chat_history.extend(
    [
        HumanMessage(content=input1),
        AIMessage(content=result["output"]),
    ]
)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `web_search` with `{'query': 'LangChain framework learning resources'}`


[0m[36;1m[1;3m['https://medium.com/@ankity09/top-5-resources-to-learn-langchain-e2bdbbd11702', 'https://python.langchain.com/v0.2/docs/introduction/', 'https://medium.com/@ellen.hoeven/introducing-langchain-a-beginners-guide-️-5e751622edb7', 'https://medium.com/@zilliz_learn/ultimate-guide-to-getting-started-with-langchain-b9a87cb340f8', 'https://www.freecodecamp.org/news/beginners-guide-to-langchain/'][0m[32;1m[1;3m
Invoking: `web_search` with `{'query': 'LangChain framework tutorials'}`


[0m[36;1m[1;3m['https://python.langchain.com/v0.2/docs/introduction/', 'https://python.langchain.com/v0.2/docs/tutorials/', 'https://www.freecodecamp.org/news/beginners-guide-to-langchain/', 'https://www.datacamp.com/tutorial/how-to-build-llm-applications-with-langchain', 'https://www.baeldung.com/java-langchain-basics'][0m[32;1m[1;3mHere are s

In [36]:
result

{'input': 'What are good resources on learning about the langchain framework?',
 'chat_history': [HumanMessage(content='What are good resources on learning about the langchain framework?'),
  AIMessage(content="Here are some good resources for learning about the LangChain framework:\n\n### Articles and Guides\n1. [Top 5 Resources to Learn LangChain](https://medium.com/@ankity09/top-5-resources-to-learn-langchain-e2bdbbd11702)\n2. [Introducing LangChain: A Beginner's Guide](https://medium.com/@ellen.hoeven/introducing-langchain-a-beginners-guide-\\ufe0f-5e751622edb7)\n3. [Ultimate Guide to Getting Started with LangChain](https://medium.com/@zilliz_learn/ultimate-guide-to-getting-started-with-langchain-b9a87cb340f8)\n4. [Beginner's Guide to LangChain](https://www.freecodecamp.org/news/beginners-guide-to-langchain/)\n\n### Official Documentation\n1. [LangChain Introduction](https://python.langchain.com/v0.2/docs/introduction/)\n2. [LangChain Tutorials](https://python.langchain.com/v0.2/do