# Building a research Agent with LangChain

In [1]:
!pip install langchain
!pip install langchain-community
!pip install langchain-openai
!pip install langchainhub
!pip install beautifulsoup4

In [2]:
# uncomment this if running locally
# from dotenv import load_dotenv

# load_dotenv()

# Or if you are in Colab, uncoment below and add your api key
import os
os.environ["OPENAI_API_KEY"] = "your-api-key"

# EXERCISE: Create a simple agent that performs a productive task, whatever that means for you.

In [3]:
from langchain_openai.chat_models import ChatOpenAI

In [4]:
llm_chat = ChatOpenAI(temperature=0)

In [5]:
from langchain_community.tools.tavily_search import TavilySearchResults
from langchain.agents import tool

search = TavilySearchResults()

results = search.invoke("biggest moutains in the world")
results

[{'url': 'https://geology.com/records/highest-mountain-in-the-world.shtml',
  'content': 'That shape makes it possible for the top of Chimborazo, which is located slightly south of the equator (and shorter than Mount Everest) to be farther from the center of the Earth than the top of Mount Everest (which is a considerable distance north of the equator).\n However, the top of Chimborazo is farther from the center of the Earth than the top of Mount Everest and the top of Mauna Kea.\n However, Mauna Kea is an island, and if the distance from the bottom of the nearby Pacific Ocean floor to the peak of the island is measured, then Mauna Kea is "taller" than Mount Everest.\n Highest Mountain\nHighest Mountain in the World\nMount Everest: Highest Altitude\nHighest Altitude: An altitude of 8,848.86 meters (29,031.69 feet) above sea level makes Mount Everest the mountain on Earth with the highest altitude.\n In the diagram above, the gray dashed line is a perfect circle, and the solid blue line

In [16]:
from langchain_community.tools.tavily_search import TavilySearchResults
from langchain.agents import tool

search = TavilySearchResults()

@tool
def web_search(query: str):
    """Searches the web given a user query and returns the most relevant links"""
    output = search.invoke(query)
    return [r['url'] for r in output if type(r)==dict]

web_search("Current resources on why pancakes are the best breakfast")

['https://www.runnersworld.com/nutrition-weight-loss/a20788459/the-ultimate-guide-to-pancakes/',
 'https://betterme.world/articles/are-pancakes-healthy/',
 'https://www.epicurious.com/recipes-menus/fluffy-pancake-recipes-breakfast-gallery',
 'https://www.eatingwell.com/article/17772/which-is-a-healthier-breakfast-pancakes-or-eggs/',
 'https://www.weekand.com/healthy-living/article/benefits-pancakes-18003922.php']

In [7]:
from langchain import hub

prompt = hub.pull("hwchase17/openai-functions-agent")

prompt

ChatPromptTemplate(input_variables=['agent_scratchpad', 'input'], input_types={'chat_history': typing.List[typing.Union[langchain_core.messages.ai.AIMessage, langchain_core.messages.human.HumanMessage, langchain_core.messages.chat.ChatMessage, langchain_core.messages.system.SystemMessage, langchain_core.messages.function.FunctionMessage, langchain_core.messages.tool.ToolMessage]], 'agent_scratchpad': typing.List[typing.Union[langchain_core.messages.ai.AIMessage, langchain_core.messages.human.HumanMessage, langchain_core.messages.chat.ChatMessage, langchain_core.messages.system.SystemMessage, langchain_core.messages.function.FunctionMessage, langchain_core.messages.tool.ToolMessage]]}, metadata={'lc_hub_owner': 'hwchase17', 'lc_hub_repo': 'openai-functions-agent', 'lc_hub_commit_hash': 'a1655024b06afbd95d17449f21316291e0726f13dcfaf990cc0d18087ad689a5'}, messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=[], template='You are a helpful assistant')), MessagesPlace

In [8]:
prompt.input_variables

['agent_scratchpad', 'input']

In [9]:
prompt.input_types

{'chat_history': typing.List[typing.Union[langchain_core.messages.ai.AIMessage, langchain_core.messages.human.HumanMessage, langchain_core.messages.chat.ChatMessage, langchain_core.messages.system.SystemMessage, langchain_core.messages.function.FunctionMessage, langchain_core.messages.tool.ToolMessage]],
 'agent_scratchpad': typing.List[typing.Union[langchain_core.messages.ai.AIMessage, langchain_core.messages.human.HumanMessage, langchain_core.messages.chat.ChatMessage, langchain_core.messages.system.SystemMessage, langchain_core.messages.function.FunctionMessage, langchain_core.messages.tool.ToolMessage]]}

In [10]:
prompt.__dict__

{'name': None,
 'input_variables': ['agent_scratchpad', 'input'],
 'input_types': {'chat_history': typing.List[typing.Union[langchain_core.messages.ai.AIMessage, langchain_core.messages.human.HumanMessage, langchain_core.messages.chat.ChatMessage, langchain_core.messages.system.SystemMessage, langchain_core.messages.function.FunctionMessage, langchain_core.messages.tool.ToolMessage]],
  'agent_scratchpad': typing.List[typing.Union[langchain_core.messages.ai.AIMessage, langchain_core.messages.human.HumanMessage, langchain_core.messages.chat.ChatMessage, langchain_core.messages.system.SystemMessage, langchain_core.messages.function.FunctionMessage, langchain_core.messages.tool.ToolMessage]]},
 'output_parser': None,
 'partial_variables': {},
 'metadata': {'lc_hub_owner': 'hwchase17',
  'lc_hub_repo': 'openai-functions-agent',
  'lc_hub_commit_hash': 'a1655024b06afbd95d17449f21316291e0726f13dcfaf990cc0d18087ad689a5'},
 'tags': None,
 'messages': [SystemMessagePromptTemplate(prompt=PromptT

To build an agent I need to do 2 essential things:

1. Connect a model to tools
2. Set up a loop in which the model continuously tries to solve the problem by either just calling the llm or calling a tool, observing the result and repeating until finished.

In [11]:
prompt

ChatPromptTemplate(input_variables=['agent_scratchpad', 'input'], input_types={'chat_history': typing.List[typing.Union[langchain_core.messages.ai.AIMessage, langchain_core.messages.human.HumanMessage, langchain_core.messages.chat.ChatMessage, langchain_core.messages.system.SystemMessage, langchain_core.messages.function.FunctionMessage, langchain_core.messages.tool.ToolMessage]], 'agent_scratchpad': typing.List[typing.Union[langchain_core.messages.ai.AIMessage, langchain_core.messages.human.HumanMessage, langchain_core.messages.chat.ChatMessage, langchain_core.messages.system.SystemMessage, langchain_core.messages.function.FunctionMessage, langchain_core.messages.tool.ToolMessage]]}, metadata={'lc_hub_owner': 'hwchase17', 'lc_hub_repo': 'openai-functions-agent', 'lc_hub_commit_hash': 'a1655024b06afbd95d17449f21316291e0726f13dcfaf990cc0d18087ad689a5'}, messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=[], template='You are a helpful assistant')), MessagesPlace

In [17]:
from langchain.agents import create_openai_tools_agent

tools = [web_search]

agent = create_openai_tools_agent(llm_chat, tools, prompt)

In [18]:
from langchain.agents import AgentExecutor


agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)

In [19]:
agent_executor.invoke({"input": "What are good resources to learn about learning new languages?",
                             "agent_scratchpad": []})



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `web_search` with `{'query': 'resources to learn new languages'}`


[0m[36;1m[1;3m['https://www.duolingo.com/', 'https://www.fluentu.com/blog/free-language-learning-websites/', 'https://www.theintrepidguide.com/top-language-learning-resources/', 'https://www.businessinsider.com/guides/learning/best-online-apps-courses-to-learn-new-language?op=1', 'https://www.lifewire.com/best-free-language-learning-websites-1357061'][0m[32;1m[1;3mHere are some good resources to learn new languages:
1. [Duolingo](https://www.duolingo.com/)
2. [FluentU](https://www.fluentu.com/blog/free-language-learning-websites/)
3. [The Intrepid Guide - Top Language Learning Resources](https://www.theintrepidguide.com/top-language-learning-resources/)
4. [Business Insider - Best Online Apps and Courses to Learn New Language](https://www.businessinsider.com/guides/learning/best-online-apps-courses-to-learn-new-language?op=1)
5. [Lifewire - B

{'input': 'What are good resources to learn about learning new languages?',
 'agent_scratchpad': [],
 'output': 'Here are some good resources to learn new languages:\n1. [Duolingo](https://www.duolingo.com/)\n2. [FluentU](https://www.fluentu.com/blog/free-language-learning-websites/)\n3. [The Intrepid Guide - Top Language Learning Resources](https://www.theintrepidguide.com/top-language-learning-resources/)\n4. [Business Insider - Best Online Apps and Courses to Learn New Language](https://www.businessinsider.com/guides/learning/best-online-apps-courses-to-learn-new-language?op=1)\n5. [Lifewire - Best Free Language Learning Websites](https://www.lifewire.com/best-free-language-learning-websites-1357061)\n\nThese resources offer a variety of tools and methods to help you learn new languages effectively.'}

In [20]:
from langchain.agents import AgentExecutor, create_react_agent

prompt = hub.pull("hwchase17/react")

agent = create_react_agent(llm_chat, tools, prompt)

react_agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)


react_agent_executor.invoke({"input": "What is the best way to learn good programming practices?",
                             "agent_scratchpad": []})



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mIt's important to gather information on different methods for learning good programming practices.
Action: web_search
Action Input: "best way to learn good programming practices"[0m[36;1m[1;3m['https://www.educative.io/blog/coding-best-practices', 'https://www.freecodecamp.org/news/how-to-learn-programming/', 'https://www.datacamp.com/tutorial/coding-best-practices-and-guidelines', 'https://careerfoundry.com/en/blog/web-development/ways-to-learn-to-code/', 'https://www.coursera.org/articles/learning-how-to-code'][0m[32;1m[1;3mThere are various resources available online that can help in learning good programming practices.
Action: web_search
Action Input: "online resources for learning good programming practices"[0m[36;1m[1;3m[][0m[32;1m[1;3mIt seems like the search didn't return relevant results. Let's try a different approach.
Action: web_search
Action Input: "best practices for learning programming"[0m[36;1m

{'input': 'What is the best way to learn good programming practices?',
 'agent_scratchpad': [],
 'output': 'There are various online resources and courses available to learn good programming practices, such as Coursera, FreeCodeCamp, and DataCamp.'}

Now let's build this same agent but using a more in depth approach going one level down in the abstraction layers.

Now, let's go down the latter of abstraction and build this same agent but leveraging the lower-level capabilities of LangChain's LCEL language. 

In [21]:
from langchain_openai.chat_models import ChatOpenAI

In [22]:
llm_chat = ChatOpenAI(model="gpt-3.5-turbo-16k",temperature=0)

In [23]:
from langchain_community.tools.tavily_search import TavilySearchResults
from langchain.agents import tool

search = TavilySearchResults()

@tool
def web_search(query: str):
    """Searches the web given a user query and returns the most relevant links"""
    output = search.invoke(query)
    return [r['url'] for r in output if type(r)==dict]


tools = [web_search]

In [24]:
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder

In [25]:
prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system", "You are a powerful research assistant.\
            Given a user query you will perform research on the \
                topic using the tools available."
        ),
        ("user", "{input}"),
        MessagesPlaceholder(variable_name="agent_scratchpad"),
    ]
)

# Bind tools to the LLM

In [26]:
from langchain_community.tools.convert_to_openai import format_tool_to_openai_tool

In [27]:
llm_with_tools = llm_chat.bind_tools(tools)

Now, all we have to do is import two last utility functions:
- a component for formatting intermediate steps (agent action, tool output pairs) to input messages that can be sent to the model, 
- and a component for converting the output message into an agent action/agent finish.

In [28]:
from langchain.agents.format_scratchpad.openai_tools import format_to_openai_tool_messages
from langchain.agents.output_parsers.openai_tools import OpenAIToolsAgentOutputParser

In [29]:
agent = (
    {
        "input": lambda x: x["input"], 
        "agent_scratchpad": lambda x: format_to_openai_tool_messages(
            x["intermediate_steps"]
        ),
    }
    | prompt
    | llm_with_tools
    | OpenAIToolsAgentOutputParser()
)

In [33]:
from langchain.agents import AgentExecutor

agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)

In [34]:
list(agent_executor.stream({"input": "What are some good resources about open source large language models?"}))



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `web_search` with `{'query': 'resources about open source large language models'}`


[0m[36;1m[1;3m['https://ai.meta.com/blog/meta-llama-3/', 'https://llama.meta.com/llama2/', 'https://hypestudio.org/blog/guide-to-open-source-large-language-models/', 'https://llmmodels.org/open-source/', 'https://llama.meta.com/'][0m[32;1m[1;3mHere are some good resources about open source large language models:

1. [Meta LLAMA 3](https://ai.meta.com/blog/meta-llama-3/): This blog post provides information about Meta LLAMA 3, an open source large language model developed by Meta AI.

2. [LLAMA 2](https://llama.meta.com/llama2/): This website provides information about LLAMA 2, an open source large language model developed by Meta AI.

3. [Guide to Open Source Large Language Models](https://hypestudio.org/blog/guide-to-open-source-large-language-models/): This blog post provides a comprehensive guide to open source large langu

[{'actions': [ToolAgentAction(tool='web_search', tool_input={'query': 'resources about open source large language models'}, log="\nInvoking: `web_search` with `{'query': 'resources about open source large language models'}`\n\n\n", message_log=[AIMessageChunk(content='', additional_kwargs={'tool_calls': [{'index': 0, 'id': 'call_IQL2mPPKst7VW31TL5uWNqKY', 'function': {'arguments': '{\n  "query": "resources about open source large language models"\n}', 'name': 'web_search'}, 'type': 'function'}]}, response_metadata={'finish_reason': 'tool_calls'}, id='run-e77cdd4f-0ee1-4c12-8316-ceb5f386ac74', tool_calls=[{'name': 'web_search', 'args': {'query': 'resources about open source large language models'}, 'id': 'call_IQL2mPPKst7VW31TL5uWNqKY'}], tool_call_chunks=[{'name': 'web_search', 'args': '{\n  "query": "resources about open source large language models"\n}', 'id': 'call_IQL2mPPKst7VW31TL5uWNqKY', 'index': 0}])], tool_call_id='call_IQL2mPPKst7VW31TL5uWNqKY')],
  'messages': [AIMessageChun

Now, what if we want the agent to actually read the links and return a research report rather than just the urls?

In [35]:
import requests
from bs4 import BeautifulSoup


@tool
def scrape_text(url: str):
    """Scrapes the text from a given URL and returns it."""
    # Send a GET request to the webpage
    try:
        response = requests.get(url)

        # Check if the request was successful
        if response.status_code == 200:
            # Parse the content of the request with BeautifulSoup
            soup = BeautifulSoup(response.text, "html.parser")

            # Extract all text from the webpage
            page_text = soup.get_text(separator=" ", strip=True)

            # Print the extracted text
            return page_text[:10000]
        else:
            return f"Failed to retrieve the webpage: Status code {response.status_code}"
    except Exception as e:
        print(e)
        return f"Failed to retrieve the webpage: {e}"

In [36]:
tools = [web_search, scrape_text]

In [37]:
llm_with_tools = llm_chat.bind_tools(tools)

In [38]:
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder

In [39]:
prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system", "You are a powerful research assistant. Given a user query you will perform research on the topic using the tools available."
        ),
        ("user", "Scrape and summarize the contents from these urls: {input}"),
        MessagesPlaceholder(variable_name="agent_scratchpad"),
    ]
)

In [40]:
agent = (
    {
        "input": lambda x: x["input"], 
        "agent_scratchpad": lambda x: format_to_openai_tool_messages(
            x["intermediate_steps"]
        ),
    }
    | prompt
    | llm_with_tools
    | OpenAIToolsAgentOutputParser()
)

In [41]:
from langchain.agents import AgentExecutor

agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)

In [46]:
agent_executor.invoke({"input": "What are good resources on learning about langchain agents?"})



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `scrape_text` with `{'url': 'https://www.ibm.com/blogs/blockchain/2018/03/what-is-a-blockchain-consortium/'}`


[0m[33;1m[1;3mIBM Blog Exclusive Artificial intelligence April 26, 2024 Bigger isn’t always better: How hybrid AI pattern enables smaller language models Bigger is not always better, as specialized models outperform general-purpose models with lower infrastructure requirements. Read more Latest Articles Sustainability May 1, 2024 How generative AI will revolutionize supply chain 2 min read - From addressing queries to resolving alerts, learn how the combination of data and AI will transform businesses from reactive to proactive. Financial services April 29, 2024 How fintech innovation is driving digital transformation for communities across the globe 3 min read - From democratizing finance to establishing digital currencies, fintechs have been transformational for the financial services industry. Analy

{'input': 'What are good resources on learning about langchain agents?',
 'output': 'Here are some resources for learning about langchain agents:\n\n1. [Top 5 Resources to Learn Langchain](https://medium.com/@ankity09/top-5-resources-to-learn-langchain-e2bdbbd11702): This Medium article provides a list of top resources for learning about langchain, including agents.\n\n2. [Building a Simple Agent with Tools and Toolkits in Langchain](https://towardsdatascience.com/building-a-simple-agent-with-tools-and-toolkits-in-langchain-77e0f9bd1fa5): This article on Towards Data Science explains how to build a simple agent using tools and toolkits in langchain.\n\n3. [Langchain Agents Documentation](https://python.langchain.com/docs/modules/agents/): The official documentation of langchain provides information on agents and their usage.\n\n4. [How to Make an AI Agent in 10 Minutes with Langchain](https://dev.to/timesurgelabs/how-to-make-an-ai-agent-in-10-minutes-with-langchain-3i2n): This tutorial

We encounter a few issues when we try to just add the scrape function to the agent without any other consideration. 
How about we make this better by improving the inner chain of the agent?

In [52]:
from langchain.prompts import MessagesPlaceholder

MEMORY_KEY = "chat_history"
prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are very powerful assistant, but bad at calculating lengths of words.",
        ),
        MessagesPlaceholder(variable_name=MEMORY_KEY),
        ("user", "{input}"),
        MessagesPlaceholder(variable_name="agent_scratchpad"),
    ]
)

from langchain_core.messages import AIMessage, HumanMessage

chat_history = []

agent = (
    {
        "input": lambda x: x["input"],
        "agent_scratchpad": lambda x: format_to_openai_tool_messages(
            x["intermediate_steps"]
        ),
        "chat_history": lambda x: x["chat_history"],
    }
    | prompt
    | llm_with_tools
    | OpenAIToolsAgentOutputParser()
)
agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)

input1 = "What are good resources on learning about the langchain framework?"
result = agent_executor.invoke({"input": input1, "chat_history": chat_history})
chat_history.extend(
    [
        HumanMessage(content=input1),
        AIMessage(content=result["output"]),
    ]
)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `web_search` with `{'query': 'resources for learning about the langchain framework'}`


[0m[36;1m[1;3m['https://medium.com/@ankity09/top-5-resources-to-learn-langchain-e2bdbbd11702', 'https://python.langchain.com/docs/get_started/introduction/', 'https://www.pluralsight.com/resources/blog/data/getting-started-langchain', 'https://medium.com/@ellen.hoeven/introducing-langchain-a-beginners-guide-️-5e751622edb7', 'https://www.coursera.org/learn/langchain-application-development-essentials'][0m[32;1m[1;3mHere are some good resources for learning about the Langchain framework:

1. [Medium article: Top 5 Resources to Learn Langchain](https://medium.com/@ankity09/top-5-resources-to-learn-langchain-e2bdbbd11702)
2. [Langchain Documentation: Getting Started](https://python.langchain.com/docs/get_started/introduction/)
3. [Pluralsight Blog: Getting Started with Langchain](https://www.pluralsight.com/resources/blog/data

In [53]:
result

{'input': 'What are good resources on learning about the langchain framework?',
 'chat_history': [HumanMessage(content='What are good resources on learning about the langchain framework?'),
  AIMessage(content="Here are some good resources for learning about the Langchain framework:\n\n1. [Medium article: Top 5 Resources to Learn Langchain](https://medium.com/@ankity09/top-5-resources-to-learn-langchain-e2bdbbd11702)\n2. [Langchain Documentation: Getting Started](https://python.langchain.com/docs/get_started/introduction/)\n3. [Pluralsight Blog: Getting Started with Langchain](https://www.pluralsight.com/resources/blog/data/getting-started-langchain)\n4. [Medium article: Introducing Langchain - A Beginner's Guide](https://medium.com/@ellen.hoeven/introducing-langchain-a-beginners-guide-️-5e751622edb7)\n5. [Coursera: Langchain Application Development Essentials](https://www.coursera.org/learn/langchain-application-development-essentials)\n\nThese resources should provide you with a good