### [How to track token usage in ChatModels](https://python.langchain.com/docs/how_to/chat_token_usage_tracking/)

In [1]:
import getpass
import os

if "LANGCHAIN_API_KEY" not in os.environ:
    os.environ["LANGCHAIN_TRACING_V2"] = "true"
    os.environ["LANGCHAIN_API_KEY"] = getpass.getpass()

In [2]:
if not os.environ.get("OPENAI_API_KEY"):
    os.environ["OPENAI_API_KEY"] = getpass.getpass()

#### A. Using AIMessage.usage_metadata

In [1]:
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(model="gpt-4o-mini")
openai_response = llm.invoke("hello")
openai_response.usage_metadata

{'input_tokens': 8,
 'output_tokens': 10,
 'total_tokens': 18,
 'input_token_details': {'audio': 0, 'cache_read': 0},
 'output_token_details': {'audio': 0, 'reasoning': 0}}

In [2]:
print(f'OpenAI: {openai_response.response_metadata["token_usage"]}\n')

OpenAI: {'completion_tokens': 10, 'prompt_tokens': 8, 'total_tokens': 18, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}



#### B. Streaming

In [3]:
llm = ChatOpenAI(model="gpt-4o-mini")

aggregate = None
for chunk in llm.stream("hello", stream_usage=True):
    print(chunk)
    aggregate = chunk if aggregate is None else aggregate + chunk

content='' additional_kwargs={} response_metadata={} id='run-7cca7fd2-ad74-41a3-8345-06d62defe16c'
content='Hello' additional_kwargs={} response_metadata={} id='run-7cca7fd2-ad74-41a3-8345-06d62defe16c'
content='!' additional_kwargs={} response_metadata={} id='run-7cca7fd2-ad74-41a3-8345-06d62defe16c'
content=' How' additional_kwargs={} response_metadata={} id='run-7cca7fd2-ad74-41a3-8345-06d62defe16c'
content=' can' additional_kwargs={} response_metadata={} id='run-7cca7fd2-ad74-41a3-8345-06d62defe16c'
content=' I' additional_kwargs={} response_metadata={} id='run-7cca7fd2-ad74-41a3-8345-06d62defe16c'
content=' assist' additional_kwargs={} response_metadata={} id='run-7cca7fd2-ad74-41a3-8345-06d62defe16c'
content=' you' additional_kwargs={} response_metadata={} id='run-7cca7fd2-ad74-41a3-8345-06d62defe16c'
content=' today' additional_kwargs={} response_metadata={} id='run-7cca7fd2-ad74-41a3-8345-06d62defe16c'
content='?' additional_kwargs={} response_metadata={} id='run-7cca7fd2-ad74-

In [4]:
print(aggregate.content)
print(aggregate.usage_metadata)

Hello! How can I assist you today?
{'input_tokens': 8, 'output_tokens': 9, 'total_tokens': 17, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}}


In [5]:
aggregate = None
for chunk in llm.stream("hello"):
    print(chunk)

content='' additional_kwargs={} response_metadata={} id='run-1af0267f-4776-4854-a48c-0a126d81aefd'
content='Hello' additional_kwargs={} response_metadata={} id='run-1af0267f-4776-4854-a48c-0a126d81aefd'
content='!' additional_kwargs={} response_metadata={} id='run-1af0267f-4776-4854-a48c-0a126d81aefd'
content=' How' additional_kwargs={} response_metadata={} id='run-1af0267f-4776-4854-a48c-0a126d81aefd'
content=' can' additional_kwargs={} response_metadata={} id='run-1af0267f-4776-4854-a48c-0a126d81aefd'
content=' I' additional_kwargs={} response_metadata={} id='run-1af0267f-4776-4854-a48c-0a126d81aefd'
content=' assist' additional_kwargs={} response_metadata={} id='run-1af0267f-4776-4854-a48c-0a126d81aefd'
content=' you' additional_kwargs={} response_metadata={} id='run-1af0267f-4776-4854-a48c-0a126d81aefd'
content=' today' additional_kwargs={} response_metadata={} id='run-1af0267f-4776-4854-a48c-0a126d81aefd'
content='?' additional_kwargs={} response_metadata={} id='run-1af0267f-4776-

In [6]:
from pydantic import BaseModel, Field


class Joke(BaseModel):
    """Joke to tell user."""

    setup: str = Field(description="question to set up a joke")
    punchline: str = Field(description="answer to resolve the joke")


llm = ChatOpenAI(
    model="gpt-4o-mini",
    stream_usage=True,
)
# Under the hood, .with_structured_output binds tools to the
# chat model and appends a parser.
structured_llm = llm.with_structured_output(Joke)

async for event in structured_llm.astream_events("Tell me a joke", version="v2"):
    if event["event"] == "on_chat_model_end":
        print(f'Token usage: {event["data"]["output"].usage_metadata}\n')
    elif event["event"] == "on_chain_end":
        print(event["data"]["output"])
    else:
        pass

Token usage: {'input_tokens': 74, 'output_tokens': 26, 'total_tokens': 100, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}}

setup='Why did the scarecrow win an award?' punchline='Because he was outstanding in his field!'


#### C. Using Callbacks

In [7]:
from langchain_community.callbacks.manager import get_openai_callback

llm = ChatOpenAI(
    model="gpt-4o-mini",
    temperature=0,
    stream_usage=True,
)

with get_openai_callback() as cb:
    result = llm.invoke("Tell me a joke")
    print(cb)

Tokens Used: 29
	Prompt Tokens: 11
		Prompt Tokens Cached: 0
	Completion Tokens: 18
		Reasoning Tokens: 0
Successful Requests: 1
Total Cost (USD): $1.2449999999999998e-05


In [8]:
with get_openai_callback() as cb:
    result = llm.invoke("Tell me a joke")
    result2 = llm.invoke("Tell me a joke")
    print(cb.total_tokens)

57


In [9]:
with get_openai_callback() as cb:
    for chunk in llm.stream("Tell me a joke"):
        pass
    print(cb)

Tokens Used: 29
	Prompt Tokens: 11
		Prompt Tokens Cached: 0
	Completion Tokens: 18
		Reasoning Tokens: 0
Successful Requests: 1
Total Cost (USD): $1.2449999999999998e-05


In [11]:
from langchain.agents import AgentExecutor, create_tool_calling_agent, load_tools
from langchain_core.prompts import ChatPromptTemplate

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", "You're a helpful assistant"),
        ("human", "{input}"),
        ("placeholder", "{agent_scratchpad}"),
    ]
)
tools = load_tools(["wikipedia"])
agent = create_tool_calling_agent(llm, tools, prompt)
agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)

In [12]:
with get_openai_callback() as cb:
    response = agent_executor.invoke(
        {
            "input": "What's a hummingbird's scientific name and what's the fastest bird species?"
        }
    )
    print(f"Total Tokens: {cb.total_tokens}")
    print(f"Prompt Tokens: {cb.prompt_tokens}")
    print(f"Completion Tokens: {cb.completion_tokens}")
    print(f"Total Cost (USD): ${cb.total_cost}")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `wikipedia` with `{'query': 'Hummingbird'}`


[0m[36;1m[1;3mPage: Hummingbird
Summary: Hummingbirds are birds native to the Americas and comprise the biological family Trochilidae. With approximately 366 species and 113 genera, they occur from Alaska to Tierra del Fuego, but most species are found in Central and South America. As of 2024, 21 hummingbird species are listed as endangered or critically endangered, with numerous species declining in population.
Hummingbirds have varied specialized characteristics to enable rapid, maneuverable flight: exceptional metabolic capacity, adaptations to high altitude, sensitive visual and communication abilities, and long-distance migration in some species. Among all birds, male hummingbirds have the widest diversity of plumage color, particularly in blues, greens, and purples. Hummingbirds are the smallest mature birds, measuring 7.5–13 cm (3–5 in) in length. The smallest