# Tool Calling

In [None]:
!pip install --upgrade langchain-cohere langchain langchain_community transformers


In [2]:
import getpass
import os

os.environ["COHERE_API_KEY"] = getpass.getpass()

from langchain_cohere import ChatCohere

llm = ChatCohere(model="command-r-plus")

 ········································


* 'allow_population_by_field_name' has been renamed to 'populate_by_name'
* 'smart_union' has been removed


In [3]:
# The function name, type hints, and docstring are all part of the tool
# schema that's passed to the model. Defining good, descriptive schemas
# is an extension of prompt engineering and is an important part of
# getting models to perform well.
def add(a: int, b: int) -> int:
    """Add two integers.

    Args:
        a: First integer
        b: Second integer
    """
    return a + b


def multiply(a: int, b: int) -> int:
    """Multiply two integers.

    Args:
        a: First integer
        b: Second integer
    """
    print("Multiply called")
    return a * b
tools = [add,multiply]

In [4]:
llm_with_tools = llm.bind_tools(tools)

query = "What is 3 * 12?"

llm_with_tools.invoke(query)

AIMessage(content='I will use the multiply tool to calculate the answer to this question.', additional_kwargs={'documents': None, 'citations': None, 'search_results': None, 'search_queries': None, 'is_search_required': None, 'generation_id': '4c82b846-9a50-438d-8db8-c35d41fc12e8', 'tool_calls': [{'id': 'fb8c3a07c8fd44cab16ff28df08527da', 'function': {'name': 'multiply', 'arguments': '{"a": 3, "b": 12}'}, 'type': 'function'}], 'token_count': {'input_tokens': 952.0, 'output_tokens': 61.0}}, response_metadata={'documents': None, 'citations': None, 'search_results': None, 'search_queries': None, 'is_search_required': None, 'generation_id': '4c82b846-9a50-438d-8db8-c35d41fc12e8', 'tool_calls': [{'id': 'fb8c3a07c8fd44cab16ff28df08527da', 'function': {'name': 'multiply', 'arguments': '{"a": 3, "b": 12}'}, 'type': 'function'}], 'token_count': {'input_tokens': 952.0, 'output_tokens': 61.0}}, id='run-e37cacb1-2f13-4956-a7ce-d3b136943d8d-0', tool_calls=[{'name': 'multiply', 'args': {'a': 3, 'b': 

In [5]:
query = "What is 3 * 12? Also, what is 11 + 49?"

llm_with_tools.invoke(query).tool_calls

[{'name': 'multiply',
  'args': {'a': 3, 'b': 12},
  'id': 'f8af879317f9437b8bb5dccd07848ad0',
  'type': 'tool_call'},
 {'name': 'add',
  'args': {'a': 11, 'b': 49},
  'id': 'cd40c25fe48b4429a3bed8953d469c97',
  'type': 'tool_call'}]

# Structuring chat model output

**With pydantic model**

In [6]:
from typing import Optional

from langchain_core.pydantic_v1 import BaseModel, Field


# Pydantic
class Joke(BaseModel):
    """Joke to tell user."""
    punchline: str = Field(description="The punchline to the joke")
    setup: str = Field(description="The setup of the joke")
    rating: Optional[int] = Field(
        default=None, description="How funny the joke is, from 1 to 10"
    )


structured_llm = llm.with_structured_output(Joke)

structured_llm.invoke("Tell me a joke about cats")

Joke(punchline='Because she wanted to be a first-aid kit!', setup='Why did the cat join the Red Cross?', rating=7)

**With json**

In [7]:
json_schema = {
    "title": "joke",
    "description": "Joke to tell user.",
    "type": "object",
    "properties": {
        "setup": {
            "type": "string",
            "description": "The setup of the joke",
        },
        "punchline": {
            "type": "string",
            "description": "The punchline to the joke",
        },
        "rating": {
            "type": "integer",
            "description": "How funny the joke is, from 1 to 10",
            "default": None,
        },
    },
    "required": ["setup", "punchline"],
}
structured_llm = llm.with_structured_output(json_schema)

structured_llm.invoke("Tell me a joke about cats")

{'topic': 'cats'}

**Structured with streaming**

In [8]:
# from typing_extensions import Annotated, TypedDict


# # TypedDict
# class Joke(TypedDict):
#     """Joke to tell user."""

#     setup: Annotated[str, ..., "The setup of the joke"]
#     punchline: Annotated[str, ..., "The punchline of the joke"]
#     rating: Annotated[Optional[int], None, "How funny the joke is, from 1 to 10"]


# structured_llm = llm.with_structured_output(Joke)

for chunk in structured_llm.stream("Tell me a joke about dogs"):
    print(chunk)

{'topic': 'dogs'}


**Prompting and parsing model outputs directly**

In [9]:
from typing import List

from langchain_core.output_parsers import PydanticOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.pydantic_v1 import BaseModel, Field


class Person(BaseModel):
    """Information about a person."""

    name: str = Field(..., description="The name of the person")
    height_in_meters: float = Field(
        ..., description="The height of the person expressed in meters."
    )


class People(BaseModel):
    """Identifying information about all people in a text."""

    people: List[Person]


# Set up a parser
parser = PydanticOutputParser(pydantic_object=People)

# Prompt
prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "Answer the user query. Wrap the output in `json` tags\n{format_instructions}",
        ),
        ("human", "{query}"),
    ]
).partial(format_instructions=parser.get_format_instructions())

In [10]:
query = "Anna is 23 years old and she is 6 feet tall"

print(prompt.invoke(query).to_string())

System: Answer the user query. Wrap the output in `json` tags
The output should be formatted as a JSON instance that conforms to the JSON schema below.

As an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}
the object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.

Here is the output schema:
```
{"description": "Identifying information about all people in a text.", "properties": {"people": {"title": "People", "type": "array", "items": {"$ref": "#/definitions/Person"}}}, "required": ["people"], "definitions": {"Person": {"title": "Person", "description": "Information about a person.", "type": "object", "properties": {"name": {"title": "Name", "description": "The name of the person", "type": "string"}, "height_in_meters": {"title": "Height In Meters", "description": "The heig

In [11]:
chain = prompt | llm | parser

chain.invoke({"query": query})

People(people=[Person(name='Anna', height_in_meters=1.8288)])

**Few shot prompt with tool calling**

In [12]:
from langchain_core.messages import AIMessage, HumanMessage, ToolMessage
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough

examples = [
    HumanMessage(
        "What's the product of 317253 and 128472 plus four", name="example_user"
    ),
    AIMessage(
        "",
        name="example_assistant",
        tool_calls=[
            {"name": "Multiply", "args": {"x": 317253, "y": 128472}, "id": "1"}
        ],
    ),
    ToolMessage("16505054784", tool_call_id="1"),
    AIMessage(
        "",
        name="example_assistant",
        tool_calls=[{"name": "Add", "args": {"x": 16505054784, "y": 4}, "id": "2"}],
    ),
    ToolMessage("16505054788", tool_call_id="2"),
    AIMessage(
        "The product of 317253 and 128472 plus four is 16505054788",
        name="example_assistant",
    ),
]

system = """You are bad at math but are an expert at using a calculator. 

Use past tool usage as an example of how to correctly use the tools."""
few_shot_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system),
        ("human", "{query}"),
    ]
)

chain = {"query": RunnablePassthrough()} | few_shot_prompt | llm_with_tools
chain.invoke("Whats 119 times 8 minus 20").tool_calls

[{'name': 'multiply',
  'args': {'a': 119, 'b': 8},
  'id': '4ef66e07420045339d0f97fcdd122173',
  'type': 'tool_call'},
 {'name': 'add',
  'args': {'a': 0, 'b': 0},
  'id': 'bf0447c135e448abb05334efa04d30df',
  'type': 'tool_call'}]

In [13]:
# Modify the chain to include steps for tool interaction and final AI output
chain = {"query": RunnablePassthrough()} | few_shot_prompt | llm_with_tools

# Execute the chain with the query
result = chain.invoke("Whats 119 times 8 minus 20")

# Extract tool calls and the final AI response
tool_calls = result.tool_calls
tool_calls

[{'name': 'multiply',
  'args': {'a': 119, 'b': 8},
  'id': 'ddc9af2aa5d04485bd3c49a02672640c',
  'type': 'tool_call'},
 {'name': 'add',
  'args': {'a': 0, 'b': 0},
  'id': '17eea473e64e4254abf1b9c8ba69cdb3',
  'type': 'tool_call'}]

**The model shouldn't be trying to add anything yet, since it technically can't know the results of 119 * 8 yet.By adding a prompt with above examples we can correct this behavior:**

In [14]:
few_shot_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system),
        *examples,
        ("human", "{query}"),
    ]
)

In [15]:
# Modify the chain to include steps for tool interaction and final AI output
chain = {"query": RunnablePassthrough()} | few_shot_prompt | llm_with_tools

# Execute the chain with the query
result = chain.invoke("Whats 119 times 8 minus 20")

# Extract tool calls and the final AI response
tool_calls = result.tool_calls
tool_calls

[{'name': 'multiply',
  'args': {'a': 119, 'b': 8},
  'id': '89c39038e89d47708335d309cb27b8a5',
  'type': 'tool_call'}]

# Caching messages

In [16]:
# <!-- ruff: noqa: F821 -->
from langchain_core.globals import set_llm_cache

In [17]:
%%time
from langchain_core.caches import InMemoryCache

set_llm_cache(InMemoryCache())

# The first time, it is not yet in cache, so it should take longer
llm.invoke("Tell me a joke")

CPU times: user 8.01 ms, sys: 132 µs, total: 8.14 ms
Wall time: 560 ms


AIMessage(content='Why did the chicken cross the road? \nTo get to the other side!', additional_kwargs={'documents': None, 'citations': None, 'search_results': None, 'search_queries': None, 'is_search_required': None, 'generation_id': '45380f5d-874a-4cac-821d-7826b19dda4f', 'token_count': {'input_tokens': 202.0, 'output_tokens': 17.0}}, response_metadata={'documents': None, 'citations': None, 'search_results': None, 'search_queries': None, 'is_search_required': None, 'generation_id': '45380f5d-874a-4cac-821d-7826b19dda4f', 'token_count': {'input_tokens': 202.0, 'output_tokens': 17.0}}, id='run-1f56636c-1a6e-4f80-93c6-f0caa7014c87-0', usage_metadata={'input_tokens': 202, 'output_tokens': 17, 'total_tokens': 219})

**Now above response is cached so should take less time**

In [18]:
%%time
llm.invoke("Tell me a joke")

CPU times: user 2.48 ms, sys: 65 µs, total: 2.55 ms
Wall time: 2.1 ms


AIMessage(content='Why did the chicken cross the road? \nTo get to the other side!', additional_kwargs={'documents': None, 'citations': None, 'search_results': None, 'search_queries': None, 'is_search_required': None, 'generation_id': '45380f5d-874a-4cac-821d-7826b19dda4f', 'token_count': {'input_tokens': 202.0, 'output_tokens': 17.0}}, response_metadata={'documents': None, 'citations': None, 'search_results': None, 'search_queries': None, 'is_search_required': None, 'generation_id': '45380f5d-874a-4cac-821d-7826b19dda4f', 'token_count': {'input_tokens': 202.0, 'output_tokens': 17.0}}, id='run-1f56636c-1a6e-4f80-93c6-f0caa7014c87-0', usage_metadata={'input_tokens': 202, 'output_tokens': 17, 'total_tokens': 219})

**Astream events**

In [19]:
idx = 0

async for event in llm.astream_events(
    "Write me a 1 verse song about goldfish on the moon", version="v1"
):
    idx += 1
    if idx >= 5:  # Truncate the output
        print("...Truncated")
        break
    print(event)

{'event': 'on_chat_model_start', 'run_id': '8bcd8841-a769-4f98-9213-0ebfa998d2ea', 'name': 'ChatCohere', 'tags': [], 'metadata': {}, 'data': {'input': 'Write me a 1 verse song about goldfish on the moon'}, 'parent_ids': []}


  async for event in llm.astream_events(


TooManyRequestsError: status_code: 429, body: data=None message="You are using a Trial key, which is limited to 10 API calls / minute. You can continue to use the Trial key for free or upgrade to a Production key with higher rate limits at 'https://dashboard.cohere.com/api-keys'. Contact us on 'https://discord.gg/XW44jPfYJu' or email us at support@cohere.com with any questions"

In [21]:
messages = [
    {"role": "system", "content": "you're a good assistant."},
    {"role": "system", "content": "you always respond with a joke."},
    {"role": "user", "content": "i wonder why it's called langchain"},
    {"role": "user", "content": "and who is harrison chasing anyways"},
]
llm.invoke(messages)

TooManyRequestsError: status_code: 429, body: data=None message="You are using a Trial key, which is limited to 10 API calls / minute. You can continue to use the Trial key for free or upgrade to a Production key with higher rate limits at 'https://dashboard.cohere.com/api-keys'. Contact us on 'https://discord.gg/XW44jPfYJu' or email us at support@cohere.com with any questions"