# Tracking Token Usage and Costs

In [1]:
# Version log
! pip list | grep langchain

langchain                 0.2.5
langchain-community       0.2.5
langchain-core            0.2.7
langchain-openai          0.1.8
langchain-text-splitters  0.2.1
langchainhub              0.1.20


## Models setup

Here initialize three chat models powered by `gpt-35-turbo`, `gpt-4`, and `gpt-4o`.

In [2]:
from os import environ
from dotenv import dotenv_values
from langchain_openai import AzureChatOpenAI

config = dotenv_values(".env") 
environ['AZURE_OPENAI_ENDPOINT'] = config['AZURE_OPENAI_ENDPOINT']
environ['AZURE_OPENAI_API_KEY'] = config['AZURE_OPENAI_API_KEY']

gpt3_5 = AzureChatOpenAI(
    openai_api_version="2024-02-01",
    azure_deployment=config['GPT_3_5_DEPLOYMENT_NAME']
)

gpt4 =  AzureChatOpenAI(
    openai_api_version="2024-02-01",
    azure_deployment=config['GPT_4_DEPLOYMENT_NAME']
)

gpt4_o =  AzureChatOpenAI(
    openai_api_version="2024-02-01",
    azure_deployment=config['GPT_4o_DEPLOYMENT_NAME']
)

## Using the `AIMessage.usage_metadata`

In [3]:
response = gpt3_5.invoke("Finish this poem: I met a traveller from an antique land who said")

In [4]:
response.dict()

{'content': '"Two vast and trunkless legs of stone  \nStand in the desert. Near them, on the sand,  \nHalf sunk, a shattered visage lies, whose frown,  \nAnd wrinkled lip, and sneer of cold command,  \nTell that its sculptor well those passions read  \nWhich yet survive, stamped on these lifeless things,  \nThe hand that mocked them and the heart that fed:  \nAnd on the pedestal these words appear:  \n\'My name is Ozymandias, king of kings:  \nLook on my works, ye Mighty, and despair!\'  \nNothing beside remains. Round the decay  \nOf that colossal wreck, boundless and bare  \nThe lone and level sands stretch far away."',
 'additional_kwargs': {},
 'response_metadata': {'token_usage': {'completion_tokens': 146,
   'prompt_tokens': 22,
   'total_tokens': 168},
  'model_name': 'gpt-35-turbo',
  'system_fingerprint': None,
  'prompt_filter_results': [{'prompt_index': 0,
    'content_filter_results': {'hate': {'filtered': False, 'severity': 'safe'},
     'self_harm': {'filtered': False, 's

In [5]:
for chat_model in [gpt3_5, gpt4, gpt4_o]:
    response = chat_model.invoke("What's the answer to life, the universe, and everything?")
    print(response.response_metadata['model_name'], response.usage_metadata, sep=": ")

gpt-35-turbo: {'input_tokens': 21, 'output_tokens': 54, 'total_tokens': 75}
gpt-4: {'input_tokens': 20, 'output_tokens': 19, 'total_tokens': 39}
gpt-4o-2024-05-13: {'input_tokens': 19, 'output_tokens': 86, 'total_tokens': 105}


## Using `get_openai_callback`

This is an OpenAI-specific callback context manager. You create a context manager like this: `with get_openai_callback() as cb:`. Anything within it will be tracked.

In [6]:
from langchain_community.callbacks.manager import get_openai_callback

for chat_model in [gpt3_5, gpt4, gpt4_o]:
    with get_openai_callback() as cb:
        response = chat_model.invoke("Print the exact same word: hello")
        print('MODEL NAME: ', response.response_metadata['model_name'])
        print('RESPONSE: ', response.content)
        print(cb, end='\n\n')

MODEL NAME:  gpt-35-turbo
RESPONSE:  hello
Tokens Used: 16
	Prompt Tokens: 15
	Completion Tokens: 1
Successful Requests: 1
Total Cost (USD): $2.45e-05

MODEL NAME:  gpt-4
RESPONSE:  hello
Tokens Used: 15
	Prompt Tokens: 14
	Completion Tokens: 1
Successful Requests: 1
Total Cost (USD): $0.00048

MODEL NAME:  gpt-4o-2024-05-13
RESPONSE:  hello
Tokens Used: 15
	Prompt Tokens: 14
	Completion Tokens: 1
Successful Requests: 1
Total Cost (USD): $8.5e-05

