# Building a research Agent with LangChain

In [54]:
!pip install langchain
!pip install langchain-community
!pip install langchain-openai
!pip install langchainhub
!pip install beautifulsoup4

In [90]:
# uncomment this if running locally
# from dotenv import load_dotenv

# load_dotenv()

# FILEPATH: /Users/greatmaster/Desktop/projects/oreilly-live-trainings/oreilly_live_training_agents/notebooks/1.3-intro-agents-with-langchain.ipynb
# SET YOUR API KEYS FOR LANGSMITH AND TAVILY TO RUN THIS NOTEBOOK!
# langsmith: https://docs.smith.langchain.com/#:~:text=install%20-U%20langsmith-,Create%20an%20API%20key,click%20Create%20API%20Key.,-Setup%20your%20environment
# tavily: https://app.tavily.com/sign-in
# openai: https://platform.openai.com/docs/quickstart
import os
os.environ["LANGCHAIN_TRACING_V2"] = "true"
# os.environ["LANGCHAIN_API_KEY"] = "your-api-key"
# os.environ["TAVILY_API_KEY"] = "your-api-key"
# os.environ["OPENAI_API_KEY"] = 'your-api-key'

# EXERCISE: Create a simple agent that performs a productive task, whatever that means for you.

In [91]:
from langchain_openai import ChatOpenAI

In [92]:
llm_chat = ChatOpenAI(temperature=0)

In [93]:
from langchain_community.tools.tavily_search import TavilySearchResults
from langchain.agents import tool

search = TavilySearchResults()

results = search.invoke("biggest moutains in the world")
results

[{'url': 'https://largest.org/nature/mountains/',
  'content': 'We’ll also learn some interesting facts about the wildlife that thrives near each one, as well as what each mountain is known for!\nAnnapurna\nSource: wikimedia.org\nThis glorious mountain is technically named Annapurna I and its counterpart, Annapurna II, is also notable as the 16th-highest mountain in the world! Manaslu\nSource: wikimedia.org\nAlso known as Kutang, this mountain is located in the Mansiri Himal subrange of the Himalayas and is the highest mountain peak in the Gorkha District of Nepal. Leave a Comment Cancel reply\nComment\nRECENT POSTS\nPOPULAR POSTS\n10 Largest National Parks\n5 Largest Owls in the World\n10 Largest Cat Breeds in the World\nCATEGORIES\nSister Sites: Oldest.org | Rarest.org The mountain sits on the border of Pakistan and China and, since it is based on higher ground than Mount Everest, is actually the most prominent mountain in the world!\n The Himalayan blue sheep, or bharal, isn’t actua

In [94]:
from langchain_community.tools.tavily_search import TavilySearchResults
from langchain.agents import tool

search = TavilySearchResults()

@tool
def web_search(query: str):
    """Searches the web given a user query and returns the most relevant links"""
    output = search.invoke(query)
    return [r['url'] for r in output if type(r)==dict]

web_search("Current resources on why pancakes are the best breakfast")

['https://greatist.com/eat/pancakes-and-waffles-which-is-better',
 'https://www.weekand.com/healthy-living/article/benefits-pancakes-18003922.php',
 'https://www.eatingwell.com/article/17772/which-is-a-healthier-breakfast-pancakes-or-eggs/',
 'https://www.popsugar.com/food/why-pancakes-better-than-waffles-46335811',
 'https://www.kingarthurbaking.com/blog/2021/10/26/best-pancake-tips']

In [95]:
from langchain import hub

prompt = hub.pull("hwchase17/openai-functions-agent")

prompt

ChatPromptTemplate(input_variables=['agent_scratchpad', 'input'], input_types={'chat_history': typing.List[typing.Union[langchain_core.messages.ai.AIMessage, langchain_core.messages.human.HumanMessage, langchain_core.messages.chat.ChatMessage, langchain_core.messages.system.SystemMessage, langchain_core.messages.function.FunctionMessage, langchain_core.messages.tool.ToolMessage]], 'agent_scratchpad': typing.List[typing.Union[langchain_core.messages.ai.AIMessage, langchain_core.messages.human.HumanMessage, langchain_core.messages.chat.ChatMessage, langchain_core.messages.system.SystemMessage, langchain_core.messages.function.FunctionMessage, langchain_core.messages.tool.ToolMessage]]}, metadata={'lc_hub_owner': 'hwchase17', 'lc_hub_repo': 'openai-functions-agent', 'lc_hub_commit_hash': 'a1655024b06afbd95d17449f21316291e0726f13dcfaf990cc0d18087ad689a5'}, messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=[], template='You are a helpful assistant')), MessagesPlace

In [96]:
prompt.input_variables

['agent_scratchpad', 'input']

In [97]:
prompt.input_types

{'chat_history': typing.List[typing.Union[langchain_core.messages.ai.AIMessage, langchain_core.messages.human.HumanMessage, langchain_core.messages.chat.ChatMessage, langchain_core.messages.system.SystemMessage, langchain_core.messages.function.FunctionMessage, langchain_core.messages.tool.ToolMessage]],
 'agent_scratchpad': typing.List[typing.Union[langchain_core.messages.ai.AIMessage, langchain_core.messages.human.HumanMessage, langchain_core.messages.chat.ChatMessage, langchain_core.messages.system.SystemMessage, langchain_core.messages.function.FunctionMessage, langchain_core.messages.tool.ToolMessage]]}

In [98]:
prompt.__dict__

{'name': None,
 'input_variables': ['agent_scratchpad', 'input'],
 'input_types': {'chat_history': typing.List[typing.Union[langchain_core.messages.ai.AIMessage, langchain_core.messages.human.HumanMessage, langchain_core.messages.chat.ChatMessage, langchain_core.messages.system.SystemMessage, langchain_core.messages.function.FunctionMessage, langchain_core.messages.tool.ToolMessage]],
  'agent_scratchpad': typing.List[typing.Union[langchain_core.messages.ai.AIMessage, langchain_core.messages.human.HumanMessage, langchain_core.messages.chat.ChatMessage, langchain_core.messages.system.SystemMessage, langchain_core.messages.function.FunctionMessage, langchain_core.messages.tool.ToolMessage]]},
 'output_parser': None,
 'partial_variables': {},
 'metadata': {'lc_hub_owner': 'hwchase17',
  'lc_hub_repo': 'openai-functions-agent',
  'lc_hub_commit_hash': 'a1655024b06afbd95d17449f21316291e0726f13dcfaf990cc0d18087ad689a5'},
 'tags': None,
 'messages': [SystemMessagePromptTemplate(prompt=PromptT

To build an agent I need to do 2 essential things:

1. Connect a model to tools
2. Set up a loop in which the model continuously tries to solve the problem by either just calling the llm or calling a tool, observing the result and repeating until finished.

In [64]:
prompt

ChatPromptTemplate(input_variables=['agent_scratchpad', 'input'], input_types={'chat_history': typing.List[typing.Union[langchain_core.messages.ai.AIMessage, langchain_core.messages.human.HumanMessage, langchain_core.messages.chat.ChatMessage, langchain_core.messages.system.SystemMessage, langchain_core.messages.function.FunctionMessage, langchain_core.messages.tool.ToolMessage]], 'agent_scratchpad': typing.List[typing.Union[langchain_core.messages.ai.AIMessage, langchain_core.messages.human.HumanMessage, langchain_core.messages.chat.ChatMessage, langchain_core.messages.system.SystemMessage, langchain_core.messages.function.FunctionMessage, langchain_core.messages.tool.ToolMessage]]}, metadata={'lc_hub_owner': 'hwchase17', 'lc_hub_repo': 'openai-functions-agent', 'lc_hub_commit_hash': 'a1655024b06afbd95d17449f21316291e0726f13dcfaf990cc0d18087ad689a5'}, messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=[], template='You are a helpful assistant')), MessagesPlace

In [99]:
from langchain.agents import create_openai_tools_agent

tools = [web_search]

agent = create_openai_tools_agent(llm_chat, tools, prompt)

In [100]:
from langchain.agents import AgentExecutor


agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)

In [101]:
agent_executor.invoke({"input": "What are good resources to learn about learning new languages?",
                             "agent_scratchpad": []})



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `web_search` with `{'query': 'resources to learn new languages'}`


[0m[36;1m[1;3m['https://www.businessinsider.com/guides/learning/best-online-apps-courses-to-learn-new-language?op=1', 'https://nymag.com/strategist/article/how-to-learn-languages-at-home.html', 'https://www.theintrepidguide.com/top-language-learning-resources/', 'https://www.fluentu.com/blog/free-language-learning-websites/', 'https://www.lifewire.com/best-free-language-learning-websites-1357061'][0m[32;1m[1;3mI found some resources that can help you learn new languages:

1. [Business Insider - Best Online Apps and Courses to Learn New Language](https://www.businessinsider.com/guides/learning/best-online-apps-courses-to-learn-new-language?op=1)
2. [NY Mag - How to Learn Languages at Home](https://nymag.com/strategist/article/how-to-learn-languages-at-home.html)
3. [The Intrepid Guide - Top Language Learning Resources](https://www.theintrepidg

{'input': 'What are good resources to learn about learning new languages?',
 'agent_scratchpad': [],
 'output': 'I found some resources that can help you learn new languages:\n\n1. [Business Insider - Best Online Apps and Courses to Learn New Language](https://www.businessinsider.com/guides/learning/best-online-apps-courses-to-learn-new-language?op=1)\n2. [NY Mag - How to Learn Languages at Home](https://nymag.com/strategist/article/how-to-learn-languages-at-home.html)\n3. [The Intrepid Guide - Top Language Learning Resources](https://www.theintrepidguide.com/top-language-learning-resources/)\n4. [FluentU - Free Language Learning Websites](https://www.fluentu.com/blog/free-language-learning-websites/)\n5. [Lifewire - Best Free Language Learning Websites](https://www.lifewire.com/best-free-language-learning-websites-1357061)\n\nYou can explore these resources to find the best fit for your language learning journey.'}

In [102]:
from langchain.agents import AgentExecutor, create_react_agent

prompt = hub.pull("hwchase17/react")

agent = create_react_agent(llm_chat, tools, prompt)

react_agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)

react_agent_executor.invoke({"input": "What is the best way to learn good programming practices?",
                             "agent_scratchpad": []})



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mLearning good programming practices is essential for becoming a successful programmer.
Action: web_search
Action Input: "best way to learn good programming practices"[0m[36;1m[1;3m['https://www.educative.io/blog/coding-best-practices', 'https://www.coursera.org/articles/learning-how-to-code', 'https://www.freecodecamp.org/news/how-to-learn-programming/', 'https://www.datacamp.com/tutorial/coding-best-practices-and-guidelines', 'https://careerfoundry.com/en/blog/web-development/ways-to-learn-to-code/'][0m[32;1m[1;3mThere are various resources available online to learn good programming practices.
Action: web_search
Action Input: "best online resources for learning programming practices"[0m[36;1m[1;3m['https://www.freecodecamp.org/', 'https://www.codecademy.com/learn/learn-how-to-code', 'https://blog.bit.ai/resources-to-learn-programming/', 'https://www.codecademy.com/', 'https://leetcode.com/'][0m[32;1m[1;3mI now kn

{'input': 'What is the best way to learn good programming practices?',
 'agent_scratchpad': [],
 'output': 'The best way to learn good programming practices is to use online resources like FreeCodeCamp, Codecademy, and LeetCode.'}

Now let's build this same agent but using a more in depth approach going one level down in the abstraction layers.

Now, let's go down the latter of abstraction and build this same agent but leveraging the lower-level capabilities of LangChain's LCEL language. 

In [103]:
from langchain_openai import ChatOpenAI

In [104]:
llm_chat = ChatOpenAI(model="gpt-3.5-turbo-16k",temperature=0)

In [105]:
from langchain_community.tools.tavily_search import TavilySearchResults
from langchain.agents import tool

search = TavilySearchResults()

@tool
def web_search(query: str):
    """Searches the web given a user query and returns the most relevant links"""
    output = search.invoke(query)
    return [r['url'] for r in output if type(r)==dict]


tools = [web_search]

In [106]:
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder

In [107]:
prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system", "You are a powerful research assistant.\
            Given a user query you will perform research on the \
                topic using the tools available."
        ),
        ("user", "{input}"),
        MessagesPlaceholder(variable_name="agent_scratchpad"),
    ]
)

# Bind tools to the LLM

In [108]:
from langchain_community.tools.convert_to_openai import format_tool_to_openai_tool

In [109]:
llm_with_tools = llm_chat.bind_tools(tools)

Now, all we have to do is import two last utility functions:
- a component for formatting intermediate steps (agent action, tool output pairs) to input messages that can be sent to the model, 
- and a component for converting the output message into an agent action/agent finish.

In [110]:
from langchain.agents.format_scratchpad.openai_tools import format_to_openai_tool_messages
from langchain.agents.output_parsers.openai_tools import OpenAIToolsAgentOutputParser

In [111]:
agent = (
    {
        "input": lambda x: x["input"], 
        "agent_scratchpad": lambda x: format_to_openai_tool_messages(
            x["intermediate_steps"]
        ),
    }
    | prompt
    | llm_with_tools
    | OpenAIToolsAgentOutputParser()
)

In [112]:
from langchain.agents import AgentExecutor

agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)

In [79]:
list(agent_executor.stream({"input": "What are some good resources about open source large language models?"}))



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `web_search` with `{'query': 'resources about open source large language models'}`


[0m[36;1m[1;3m['https://hypestudio.org/blog/guide-to-open-source-large-language-models/', 'https://www.datacamp.com/blog/top-open-source-llms', 'https://llmmodels.org/open-source/', 'https://llama.meta.com/llama2/', 'https://llmmodels.org/'][0m[32;1m[1;3mHere are some good resources about open source large language models:

1. [Guide to Open Source Large Language Models](https://hypestudio.org/blog/guide-to-open-source-large-language-models/): This blog post provides a comprehensive guide to open source large language models, including an overview of different models and their applications.

2. [Top Open Source LLMS](https://www.datacamp.com/blog/top-open-source-llms): This blog post from DataCamp lists some of the top open source large language models available, along with their features and use cases.

3. [Open Source LLMS]

[{'actions': [ToolAgentAction(tool='web_search', tool_input={'query': 'resources about open source large language models'}, log="\nInvoking: `web_search` with `{'query': 'resources about open source large language models'}`\n\n\n", message_log=[AIMessageChunk(content='', additional_kwargs={'tool_calls': [{'index': 0, 'id': 'call_iHtMm2vDhXVGaaZjpI1nK27l', 'function': {'arguments': '{\n  "query": "resources about open source large language models"\n}', 'name': 'web_search'}, 'type': 'function'}]}, response_metadata={'finish_reason': 'tool_calls'}, id='run-c6cf92a0-748b-449a-92b0-f7232485c2c8', tool_calls=[{'name': 'web_search', 'args': {'query': 'resources about open source large language models'}, 'id': 'call_iHtMm2vDhXVGaaZjpI1nK27l'}], tool_call_chunks=[{'name': 'web_search', 'args': '{\n  "query": "resources about open source large language models"\n}', 'id': 'call_iHtMm2vDhXVGaaZjpI1nK27l', 'index': 0}])], tool_call_id='call_iHtMm2vDhXVGaaZjpI1nK27l')],
  'messages': [AIMessageChun

Now, what if we want the agent to actually read the links and return a research report rather than just the urls?

In [113]:
import requests
from bs4 import BeautifulSoup


@tool
def scrape_text(url: str):
    """Scrapes the text from a given URL and returns it."""
    # Send a GET request to the webpage
    try:
        response = requests.get(url)

        # Check if the request was successful
        if response.status_code == 200:
            # Parse the content of the request with BeautifulSoup
            soup = BeautifulSoup(response.text, "html.parser")

            # Extract all text from the webpage
            page_text = soup.get_text(separator=" ", strip=True)

            # Print the extracted text
            return page_text[:10000]
        else:
            return f"Failed to retrieve the webpage: Status code {response.status_code}"
    except Exception as e:
        print(e)
        return f"Failed to retrieve the webpage: {e}"

In [114]:
tools = [web_search, scrape_text]

In [115]:
llm_with_tools = llm_chat.bind_tools(tools)

In [116]:
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder

In [117]:
prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system", "You are a powerful research assistant. Given a user query you will perform research on the topic using the tools available."
        ),
        ("user", "Scrape and summarize the contents from these urls: {input}"),
        MessagesPlaceholder(variable_name="agent_scratchpad"),
    ]
)

In [118]:
agent = (
    {
        "input": lambda x: x["input"], 
        "agent_scratchpad": lambda x: format_to_openai_tool_messages(
            x["intermediate_steps"]
        ),
    }
    | prompt
    | llm_with_tools
    | OpenAIToolsAgentOutputParser()
)

In [119]:
from langchain.agents import AgentExecutor

agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)

In [87]:
agent_executor.invoke({"input": "What are good resources on learning about langchain agents?"})



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `scrape_text` with `{'url': 'https://www.ibm.com/blogs/blockchain/2018/03/what-is-a-blockchain-consortium/'}`


[0m[33;1m[1;3mIBM Blog Exclusive Artificial intelligence April 26, 2024 Bigger isn’t always better: How hybrid AI pattern enables smaller language models Bigger is not always better, as specialized models outperform general-purpose models with lower infrastructure requirements. Read more Latest Articles Sustainability May 1, 2024 How generative AI will revolutionize supply chain 2 min read - From addressing queries to resolving alerts, learn how the combination of data and AI will transform businesses from reactive to proactive. Financial services April 29, 2024 How fintech innovation is driving digital transformation for communities across the globe 3 min read - From democratizing finance to establishing digital currencies, fintechs have been transformational for the financial services industry. Analy

{'input': 'What are good resources on learning about langchain agents?',
 'output': 'Here are some resources on learning about langchain agents:\n\n1. [Top 5 Resources to Learn Langchain](https://medium.com/@ankity09/top-5-resources-to-learn-langchain-e2bdbbd11702): This Medium article provides a list of top resources to learn about langchain, including agents.\n\n2. [Python Langchain Documentation - Agents](https://python.langchain.com/docs/modules/agents/): The official documentation of Python Langchain provides information on agents and their usage.\n\n3. [Building a Simple Agent with Tools and Toolkits in Langchain](https://towardsdatascience.com/building-a-simple-agent-with-tools-and-toolkits-in-langchain-77e0f9bd1fa5): This article on Towards Data Science explains how to build a simple agent using tools and toolkits in langchain.\n\n4. [Langchain Agents Cheat Sheet](https://cheatsheet.md/langchain-tutorials/langchain-agents.en): This cheat sheet provides a quick reference guide f

We encounter a few issues when we try to just add the scrape function to the agent without any other consideration. 
How about we make this better by improving the inner chain of the agent?

In [88]:
from langchain.prompts import MessagesPlaceholder

MEMORY_KEY = "chat_history"
prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are very powerful assistant, but bad at calculating lengths of words.",
        ),
        MessagesPlaceholder(variable_name=MEMORY_KEY),
        ("user", "{input}"),
        MessagesPlaceholder(variable_name="agent_scratchpad"),
    ]
)

from langchain_core.messages import AIMessage, HumanMessage

chat_history = []

agent = (
    {
        "input": lambda x: x["input"],
        "agent_scratchpad": lambda x: format_to_openai_tool_messages(
            x["intermediate_steps"]
        ),
        "chat_history": lambda x: x["chat_history"],
    }
    | prompt
    | llm_with_tools
    | OpenAIToolsAgentOutputParser()
)
agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)

input1 = "What are good resources on learning about the langchain framework?"
result = agent_executor.invoke({"input": input1, "chat_history": chat_history})
chat_history.extend(
    [
        HumanMessage(content=input1),
        AIMessage(content=result["output"]),
    ]
)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `web_search` with `{'query': 'resources for learning about the langchain framework'}`


[0m[36;1m[1;3m['https://www.ibm.com/topics/langchain', 'https://medium.com/@ankity09/top-5-resources-to-learn-langchain-e2bdbbd11702', 'https://www.pluralsight.com/resources/blog/data/getting-started-langchain', 'https://python.langchain.com/docs/get_started/introduction/', 'https://medium.com/@ellen.hoeven/introducing-langchain-a-beginners-guide-️-5e751622edb7'][0m[32;1m[1;3mHere are some good resources for learning about the Langchain framework:

1. [IBM - Langchain](https://www.ibm.com/topics/langchain): IBM provides an overview of the Langchain framework and its features.

2. [Medium - Top 5 Resources to Learn Langchain](https://medium.com/@ankity09/top-5-resources-to-learn-langchain-e2bdbbd11702): This Medium article lists the top 5 resources for learning Langchain, including tutorials, documentation, and online cours

In [89]:
result

{'input': 'What are good resources on learning about the langchain framework?',
 'chat_history': [HumanMessage(content='What are good resources on learning about the langchain framework?'),
  AIMessage(content="Here are some good resources for learning about the Langchain framework:\n\n1. [IBM - Langchain](https://www.ibm.com/topics/langchain): IBM provides an overview of the Langchain framework and its features.\n\n2. [Medium - Top 5 Resources to Learn Langchain](https://medium.com/@ankity09/top-5-resources-to-learn-langchain-e2bdbbd11702): This Medium article lists the top 5 resources for learning Langchain, including tutorials, documentation, and online courses.\n\n3. [Pluralsight - Getting Started with Langchain](https://www.pluralsight.com/resources/blog/data/getting-started-langchain): Pluralsight offers a guide on getting started with Langchain, covering the basics and providing resources for further learning.\n\n4. [Langchain Documentation](https://python.langchain.com/docs/get