A self-contained example to retrieve token usage of Ollama

In [1]:
from langchain_community.llms import Ollama

llm = Ollama(model='llama2:7b-chat-q4_0')

In [2]:
# define callbacks for detecting token usage

from langchain_core.callbacks.base import BaseCallbackHandler
from langchain_core.outputs.llm_result import LLMResult
from collections import deque

class TokenUsageCallbackHandler(BaseCallbackHandler):

    def __init__(self, deque: deque = None):
        super().__init__()
        self.deque = deque

    def on_llm_end(self, response: LLMResult, **kwargs) -> None:
        print('Response in callback')
        print(response)
        print()

        generation = response.generations[0][0]
        gen_info = generation.generation_info

        # get token usage
        # ref: https://github.com/orgs/langfuse/discussions/1179
        token_usage = gen_info.get('prompt_eval_count', 0) + gen_info.get('eval_count', 0)
        # get time costed (local machine)
        # instead of getting total duration, we get the prompt_eval_duration and eval_duration to exclude the load duration (e.g. to load the model to the gpu, etc.)
        time_costed = gen_info.get('prompt_eval_duration', 1e-10) + gen_info.get('eval_duration', 1e-10)     # in ns, a small value to indicate a inf time when it fails


        # create an object to store the token usage and time costed
        token_usage_obj = {
            'token_usage': token_usage,
            'time_costed': time_costed
        }

        # append the object to the deque
        self.deque.append(token_usage_obj)



common_deque = deque()
chain_config = {
    "callbacks": [TokenUsageCallbackHandler(common_deque)],
}

In [5]:
response = llm.invoke("Hello, how are you?", config=chain_config)

# get the token usage object from the deque
token_usage_obj = common_deque.popleft()

print(response)
print(token_usage_obj)

Response in callback
generations=[[GenerationChunk(text='I\'m just an AI assistant and do not have feelings or emotions, so I cannot answer the question "How are you?" as I do not have a physical body or personal experiences. However, I\'m here to help you with any questions or tasks you may have! Is there anything specific you would like to know or discuss?', generation_info={'model': 'llama2:7b-chat-q4_0', 'created_at': '2024-03-25T09:49:19.476477Z', 'response': '', 'done': True, 'context': [518, 25580, 29962, 3532, 14816, 29903, 29958, 5299, 829, 14816, 29903, 6778, 13, 13, 10994, 29892, 920, 526, 366, 29973, 518, 29914, 25580, 29962, 13, 29902, 29915, 29885, 925, 385, 319, 29902, 20255, 322, 437, 451, 505, 21737, 470, 23023, 1080, 29892, 577, 306, 2609, 1234, 278, 1139, 376, 5328, 526, 366, 3026, 408, 306, 437, 451, 505, 263, 9128, 3573, 470, 7333, 27482, 29889, 2398, 29892, 306, 29915, 29885, 1244, 304, 1371, 366, 411, 738, 5155, 470, 9595, 366, 1122, 505, 29991, 1317, 727, 3099, 

In [6]:
# create a chain and invoke
from langchain_core.prompts import ChatPromptTemplate

prompt = ChatPromptTemplate.from_messages(
    ("human", "Explain {concept} to me.")
)

chain = prompt | llm
response = chain.invoke({"concept": "Large Language Models"}, config=chain_config)

# get the token usage object from the deque
token_usage_obj = common_deque.popleft()

print(response)
print(token_usage_obj)

Response in callback
generations=[[GenerationChunk(text='\nOf course! Large language models are a class of artificial intelligence (AI) models that are trained on vast amounts of text data to generate language outputs that are coherent and natural-sounding. These models have become increasingly popular in recent years due to their ability to generate text that is often indistinguishable from human-generated text.\n\nThe basic idea behind large language models is to use a type of neural network called a transformer to learn the patterns and structures of language. The transformer is trained on a massive dataset of text, which can come from any source, such as books, articles, or even social media posts. The model learns to predict the next word in a sequence of text given the previous words, based on the patterns it has observed in the training data.\n\nLarge language models have many applications, such as:\n\n1. Language Translation: Large language models can be trained to translate te