In [1]:
from dotenv import load_dotenv
_ = load_dotenv()
import os
os.environ['NVIDIA_API_KEY']=os.getenv('NVIDIA_API_KEY')

In [2]:
from langgraph.graph import StateGraph, END
from typing import TypedDict, Annotated
import operator
from langchain_core.messages import AnyMessage, SystemMessage, HumanMessage, ToolMessage
# from langchain_openai import ChatOpenAI
# from langchain_google_genai import GoogleGenerativeAIEmbeddings, GoogleGenerativeAI
# from langchain_google_vertexai import ChatVertexAI
from langchain_nvidia_ai_endpoints import ChatNVIDIA
from langchain_community.tools.tavily_search import TavilySearchResults

In [3]:
tool = TavilySearchResults(max_results=4) #increased number of results
print(type(tool))
print(tool.name)

<class 'langchain_community.tools.tavily_search.tool.TavilySearchResults'>
tavily_search_results_json


In [4]:
class AgentState(TypedDict):
    messages: Annotated[list[AnyMessage], operator.add]

In [29]:
class Agent:

    def __init__(self, model, tools, system=""):
        self.system = system
        graph = StateGraph(AgentState)
        graph.add_node("llm", self.call_openai)
        graph.add_node("action", self.take_action)
        graph.add_conditional_edges(
            "llm",
            self.exists_action,
            {True: "action", False: END}
        )
        graph.add_edge("action", "llm")
        graph.set_entry_point("llm")
        self.graph = graph.compile()
        self.tools = {t.name: t for t in tools}
        self.model = model.bind_tools(tools)

    def exists_action(self, state: AgentState):
        result = state['messages'][-1]
        return len(result.tool_calls) > 0

    def call_openai(self, state: AgentState):
        messages = state['messages']
        if self.system:
            messages = [SystemMessage(content=self.system)] + messages
        message = self.model.invoke(messages)
        return {'messages': [message]}

    def take_action(self, state: AgentState):
        tool_calls = state['messages'][-1].tool_calls
        results = []
        for t in tool_calls:
            print(f"Calling: {t}")
            if not t['name'] in self.tools:      # check for bad tool name from LLM
                print("\n ....bad tool name....")
                result = "bad tool name, retry"  # instruct LLM to retry if bad
            else:
                result = self.tools[t['name']].invoke(t['args'])
            results.append(ToolMessage(tool_call_id=t['id'], name=t['name'], content=str(result)))
        print("Back to the model!")
        return {'messages': results}

In [30]:
prompt = """You are a smart research assistant. Use the search engine to look up information. \
You are allowed to make multiple calls (either together or in sequence). \
Only look up information when you are sure of what you want. \
If you need to look up some information before asking a follow up question, you are allowed to do that!
"""

model = ChatNVIDIA(model="meta/llama-3.1-8b-instruct", api_key="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")  #reduce inference cost
abot = Agent(model, [tool], system=prompt)

In [33]:
messages = [HumanMessage(content="What is the weather in Mumbai?")]
result = abot.graph.invoke({"messages": messages})

Calling: {'name': 'tavily_search_results_json', 'args': {'query': 'Mumbai weather'}, 'id': 'chatcmpl-tool-960eb884b7d34377bcc4feda391c09cf', 'type': 'tool_call'}
Back to the model!


In [34]:
result

{'messages': [HumanMessage(content='What is the weather in Mumbai?'),
  AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'chatcmpl-tool-960eb884b7d34377bcc4feda391c09cf', 'type': 'function', 'function': {'name': 'tavily_search_results_json', 'arguments': '{"query": "Mumbai weather"}'}}]}, response_metadata={'role': 'assistant', 'content': None, 'tool_calls': [{'id': 'chatcmpl-tool-960eb884b7d34377bcc4feda391c09cf', 'type': 'function', 'function': {'name': 'tavily_search_results_json', 'arguments': '{"query": "Mumbai weather"}'}}], 'token_usage': {'prompt_tokens': 385, 'total_tokens': 414, 'completion_tokens': 29}, 'finish_reason': 'tool_calls', 'model_name': 'meta/llama-3.1-8b-instruct'}, id='run-aacb883a-10f7-4aff-ba46-c948fde2e1c6-0', tool_calls=[{'name': 'tavily_search_results_json', 'args': {'query': 'Mumbai weather'}, 'id': 'chatcmpl-tool-960eb884b7d34377bcc4feda391c09cf', 'type': 'tool_call'}], role='assistant'),
  ToolMessage(content='[{\'url\': \'https://www.weath

In [35]:
result['messages'][-1].content

'The current weather in Mumbai is 29.1 degrees Celsius, with a misty condition and a wind speed of 9 km/h from the west. The temperature is expected to be around 27.75 degrees Celsius during the day, with a minimum temperature of 26.98 degrees Celsius and a maximum temperature of 28.7 degrees Celsius. It is also expected to be relatively humid, with a humidity level of 70%.'

In [40]:
messages = [HumanMessage(content="What is the weather in SF and Mumbai?")]
result = abot.graph.invoke({"messages": messages})

Calling: {'name': 'tavily_search_results_json', 'args': {'query': 'San Francisco weather'}, 'id': 'chatcmpl-tool-bc0386be391a47b98292875bd6dce6a8', 'type': 'tool_call'}
Back to the model!
Calling: {'name': 'tavily_search_results_json', 'args': {'query': 'Mumbai weather'}, 'id': 'chatcmpl-tool-152fce9d2c6d481980e8c8f0e17b9224', 'type': 'tool_call'}
Back to the model!


In [41]:
result['messages'][-1].content

'The current weather in San Francisco is partly cloudy with a temperature of 57.9 degrees Fahrenheit and a humidity of 97%. The wind is blowing at 6.8 kilometers per hour from the north.\n\nThe current weather in Mumbai is misty with a temperature of 84.4 degrees Fahrenheit and a humidity of 70%. The wind is blowing at 9 kilometers per hour from the west.'

In [38]:
# Note, the query was modified to produce more consistent results. 
# Results may vary per run and over time as search information and models change.

query = "Who won the super bowl in 2024? In what state is the winning team headquarters located? \
What is the GDP of that state? Answer each question." 
messages = [HumanMessage(content=query)]

model = ChatNVIDIA(model="meta/llama-3.1-8b-instruct", api_key="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")  #reduce inference cost
abot = Agent(model, [tool], system=prompt)
result = abot.graph.invoke({"messages": messages})

In [39]:
print(result['messages'][-1].content)

I can't provide information about future events or data that may not be available yet. Is there something else I can help you with?


In [44]:
from langchain_nvidia_ai_endpoints import ChatNVIDIA

client = ChatNVIDIA(
  model="meta/llama-3.1-8b-instruct",
  api_key=os.environ['NVIDIA_API_KEY'], 
  temperature=0.2,
  top_p=0.7,
  max_tokens=1024,
)

llm_with_tools = client.bind_tools([tool])

query = "What is 120 * 999"

llm_with_tools.invoke(query)

AIMessage(content='119880', response_metadata={'role': 'assistant', 'content': '119880', 'token_usage': {'prompt_tokens': 315, 'total_tokens': 317, 'completion_tokens': 2}, 'finish_reason': 'stop', 'model_name': 'meta/llama-3.1-8b-instruct'}, id='run-aaba2582-3484-4d14-a06e-6cd5c438e6d2-0', role='assistant')