diff --git a/README.md b/README.md
index 40ccdf8..6408b06 100644
--- a/README.md
+++ b/README.md
@@ -259,7 +259,7 @@ The result of 47 + 23 is 70.
- model: `claude-sonnet-4-20250514`
- finish_reason: `stop`
- usage:
- `Usage(completion_tokens=17, prompt_tokens=533, total_tokens=550, completion_tokens_details=None, prompt_tokens_details=PromptTokensDetailsWrapper(audio_tokens=None, cached_tokens=0, text_tokens=None, image_tokens=None), cache_creation_input_tokens=0, cache_read_input_tokens=0)`
+ `Usage(completion_tokens=18, prompt_tokens=573, total_tokens=591, completion_tokens_details=None, prompt_tokens_details=PromptTokensDetailsWrapper(audio_tokens=None, cached_tokens=0, text_tokens=None, image_tokens=None), cache_creation_input_tokens=0, cache_read_input_tokens=0)`
@@ -312,7 +312,10 @@ Now I’ll add the result (70) to the third number (59):
'name': 'add_numbers',
'content': '129'}
-The answer is 129. So 47 + 23 + 59 = 129.
+The answer is **129**.
+
+I calculated this by first adding 47 + 23 = 70, then adding 70 + 59 =
+129.
@@ -320,7 +323,7 @@ The answer is 129. So 47 + 23 + 59 = 129.
- model: `claude-sonnet-4-20250514`
- finish_reason: `stop`
- usage:
- `Usage(completion_tokens=25, prompt_tokens=662, total_tokens=687, completion_tokens_details=None, prompt_tokens_details=PromptTokensDetailsWrapper(audio_tokens=None, cached_tokens=0, text_tokens=None, image_tokens=None), cache_creation_input_tokens=0, cache_read_input_tokens=0)`
+ `Usage(completion_tokens=41, prompt_tokens=702, total_tokens=743, completion_tokens_details=None, prompt_tokens_details=PromptTokensDetailsWrapper(audio_tokens=None, cached_tokens=0, text_tokens=None, image_tokens=None), cache_creation_input_tokens=0, cache_read_input_tokens=0)`
diff --git a/lisette/_modidx.py b/lisette/_modidx.py
index 08c2344..1a79da8 100644
--- a/lisette/_modidx.py
+++ b/lisette/_modidx.py
@@ -50,4 +50,16 @@
'lisette.core.patch_litellm': ('core.html#patch_litellm', 'lisette/core.py'),
'lisette.core.random_tool_id': ('core.html#random_tool_id', 'lisette/core.py'),
'lisette.core.remove_cache_ckpts': ('core.html#remove_cache_ckpts', 'lisette/core.py'),
- 'lisette.core.stream_with_complete': ('core.html#stream_with_complete', 'lisette/core.py')}}}
+ 'lisette.core.stream_with_complete': ('core.html#stream_with_complete', 'lisette/core.py')},
+ 'lisette.usage': { 'lisette.usage.LisetteUsageLogger': ('usage.html#lisetteusagelogger', 'lisette/usage.py'),
+ 'lisette.usage.LisetteUsageLogger.__init__': ('usage.html#lisetteusagelogger.__init__', 'lisette/usage.py'),
+ 'lisette.usage.LisetteUsageLogger._log_usage': ( 'usage.html#lisetteusagelogger._log_usage',
+ 'lisette/usage.py'),
+ 'lisette.usage.LisetteUsageLogger.async_log_success_event': ( 'usage.html#lisetteusagelogger.async_log_success_event',
+ 'lisette/usage.py'),
+ 'lisette.usage.LisetteUsageLogger.log_success_event': ( 'usage.html#lisetteusagelogger.log_success_event',
+ 'lisette/usage.py'),
+ 'lisette.usage.LisetteUsageLogger.user_id_fn': ( 'usage.html#lisetteusagelogger.user_id_fn',
+ 'lisette/usage.py'),
+ 'lisette.usage.Usage': ('usage.html#usage', 'lisette/usage.py'),
+ 'lisette.usage.Usage.total_cost': ('usage.html#usage.total_cost', 'lisette/usage.py')}}}
diff --git a/lisette/usage.py b/lisette/usage.py
new file mode 100644
index 0000000..f5307b7
--- /dev/null
+++ b/lisette/usage.py
@@ -0,0 +1,38 @@
+"""Lisette usage and cost monitoring"""
+
+# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/01_usage.ipynb.
+
+# %% auto 0
+__all__ = ['Usage', 'LisetteUsageLogger']
+
+# %% ../nbs/01_usage.ipynb
+from litellm.integrations.custom_logger import CustomLogger
+import time
+try:
+ from fastlite import *
+ from fastlite.core import dataclass
+except ImportError: raise ImportError("Please install `fastlite` to use sqlite based lisette usage logging.")
+
+# %% ../nbs/01_usage.ipynb
+class Usage: id:int; timestamp:float; model:str; user_id:str; prompt_tokens:int; completion_tokens:int; total_tokens:int; cached_tokens:int; cache_creation_tokens:int; cache_read_tokens:int; web_search_requests:int; response_cost:int
+
+# %% ../nbs/01_usage.ipynb
+class LisetteUsageLogger(CustomLogger):
+ def __init__(self, db_path):
+ self.db = Database(db_path)
+ self.usage = self.db.create(Usage)
+
+ async def async_log_success_event(self, kwargs, response_obj, start_time, end_time): self._log_usage(response_obj, kwargs['response_cost'], start_time, end_time)
+ def log_success_event(self, kwargs, response_obj, start_time, end_time): self._log_usage(response_obj, kwargs['response_cost'], start_time, end_time)
+ def _log_usage(self, response_obj, response_cost, start_time, end_time):
+ usage = response_obj.usage
+ ptd = usage.prompt_tokens_details
+ self.usage.insert(Usage(timestamp=time.time(), model=response_obj.model, user_id=self.user_id_fn(), prompt_tokens=usage.prompt_tokens, completion_tokens=usage.completion_tokens,
+ total_tokens=usage.total_tokens, cached_tokens=ptd.cached_tokens if ptd else 0, cache_creation_tokens=usage.cache_creation_input_tokens,
+ cache_read_tokens=usage.cache_read_input_tokens, web_search_requests=nested_idx(usage, 'server_tool_use', 'web_search_requests'), response_cost=response_cost))
+
+ def user_id_fn(self): raise NotImplementedError('Please implement `LisetteUsageLogger.user_id_fn` before initializing, e.g using fastcore.patch.')
+
+# %% ../nbs/01_usage.ipynb
+@patch
+def total_cost(self:Usage, sc=0.01): return self.response_cost + sc * ifnone(self.web_search_requests, 0)
diff --git a/nbs/00_core.ipynb b/nbs/00_core.ipynb
index 686ccb0..2ed76b6 100644
--- a/nbs/00_core.ipynb
+++ b/nbs/00_core.ipynb
@@ -773,12 +773,12 @@
"- id: `chatcmpl-xxx`\n",
"- model: `claude-sonnet-4-5-20250929`\n",
"- finish_reason: `stop`\n",
- "- usage: `Usage(completion_tokens=5, prompt_tokens=2073, total_tokens=2078, completion_tokens_details=None, prompt_tokens_details=PromptTokensDetailsWrapper(audio_tokens=None, cached_tokens=0, text_tokens=None, image_tokens=None, cache_creation_tokens=2070, cache_creation_token_details=CacheCreationTokenDetails(ephemeral_5m_input_tokens=2070, ephemeral_1h_input_tokens=0)), cache_creation_input_tokens=2070, cache_read_input_tokens=0)`\n",
+ "- usage: `Usage(completion_tokens=5, prompt_tokens=2073, total_tokens=2078, completion_tokens_details=None, prompt_tokens_details=PromptTokensDetailsWrapper(audio_tokens=None, cached_tokens=2070, text_tokens=None, image_tokens=None), cache_creation_input_tokens=0, cache_read_input_tokens=2070)`\n",
"\n",
""
],
"text/plain": [
- "ModelResponse(id='chatcmpl-xxx', created=1000000000, model='claude-sonnet-4-5-20250929', object='chat.completion', system_fingerprint=None, choices=[Choices(finish_reason='stop', index=0, message=Message(content='1', role='assistant', tool_calls=None, function_call=None, provider_specific_fields={'citations': None, 'thinking_blocks': None}))], usage=Usage(completion_tokens=5, prompt_tokens=2073, total_tokens=2078, completion_tokens_details=None, prompt_tokens_details=PromptTokensDetailsWrapper(audio_tokens=None, cached_tokens=0, text_tokens=None, image_tokens=None, cache_creation_tokens=2070, cache_creation_token_details=CacheCreationTokenDetails(ephemeral_5m_input_tokens=2070, ephemeral_1h_input_tokens=0)), cache_creation_input_tokens=2070, cache_read_input_tokens=0))"
+ "ModelResponse(id='chatcmpl-xxx', created=1000000000, model='claude-sonnet-4-5-20250929', object='chat.completion', system_fingerprint=None, choices=[Choices(finish_reason='stop', index=0, message=Message(content='1', role='assistant', tool_calls=None, function_call=None, provider_specific_fields={'citations': None, 'thinking_blocks': None}))], usage=Usage(completion_tokens=5, prompt_tokens=2073, total_tokens=2078, completion_tokens_details=None, prompt_tokens_details=PromptTokensDetailsWrapper(audio_tokens=None, cached_tokens=2070, text_tokens=None, image_tokens=None), cache_creation_input_tokens=0, cache_read_input_tokens=2070))"
]
},
"execution_count": null,
@@ -816,12 +816,12 @@
"- id: `chatcmpl-xxx`\n",
"- model: `claude-sonnet-4-5-20250929`\n",
"- finish_reason: `stop`\n",
- "- usage: `Usage(completion_tokens=5, prompt_tokens=4147, total_tokens=4152, completion_tokens_details=None, prompt_tokens_details=PromptTokensDetailsWrapper(audio_tokens=None, cached_tokens=2070, text_tokens=None, image_tokens=None, cache_creation_tokens=2074, cache_creation_token_details=CacheCreationTokenDetails(ephemeral_5m_input_tokens=2074, ephemeral_1h_input_tokens=0)), cache_creation_input_tokens=2074, cache_read_input_tokens=2070)`\n",
+ "- usage: `Usage(completion_tokens=5, prompt_tokens=4147, total_tokens=4152, completion_tokens_details=None, prompt_tokens_details=PromptTokensDetailsWrapper(audio_tokens=None, cached_tokens=4144, text_tokens=None, image_tokens=None), cache_creation_input_tokens=0, cache_read_input_tokens=4144)`\n",
"\n",
""
],
"text/plain": [
- "ModelResponse(id='chatcmpl-xxx', created=1000000000, model='claude-sonnet-4-5-20250929', object='chat.completion', system_fingerprint=None, choices=[Choices(finish_reason='stop', index=0, message=Message(content='2', role='assistant', tool_calls=None, function_call=None, provider_specific_fields={'citations': None, 'thinking_blocks': None}))], usage=Usage(completion_tokens=5, prompt_tokens=4147, total_tokens=4152, completion_tokens_details=None, prompt_tokens_details=PromptTokensDetailsWrapper(audio_tokens=None, cached_tokens=2070, text_tokens=None, image_tokens=None, cache_creation_tokens=2074, cache_creation_token_details=CacheCreationTokenDetails(ephemeral_5m_input_tokens=2074, ephemeral_1h_input_tokens=0)), cache_creation_input_tokens=2074, cache_read_input_tokens=2070))"
+ "ModelResponse(id='chatcmpl-xxx', created=1000000000, model='claude-sonnet-4-5-20250929', object='chat.completion', system_fingerprint=None, choices=[Choices(finish_reason='stop', index=0, message=Message(content='2', role='assistant', tool_calls=None, function_call=None, provider_specific_fields={'citations': None, 'thinking_blocks': None}))], usage=Usage(completion_tokens=5, prompt_tokens=4147, total_tokens=4152, completion_tokens_details=None, prompt_tokens_details=PromptTokensDetailsWrapper(audio_tokens=None, cached_tokens=4144, text_tokens=None, image_tokens=None), cache_creation_input_tokens=0, cache_read_input_tokens=4144))"
]
},
"execution_count": null,
@@ -851,7 +851,7 @@
{
"data": {
"text/plain": [
- "PromptTokensDetailsWrapper(audio_tokens=None, cached_tokens=2070, text_tokens=None, image_tokens=None, cache_creation_tokens=2074, cache_creation_token_details=CacheCreationTokenDetails(ephemeral_5m_input_tokens=2074, ephemeral_1h_input_tokens=0))"
+ "PromptTokensDetailsWrapper(audio_tokens=None, cached_tokens=4144, text_tokens=None, image_tokens=None)"
]
},
"execution_count": null,
@@ -3799,7 +3799,14 @@
"text": [
"Otters are charismatic members of the weasel family found on every continent except Australia and Antarctica. There are 13 species in total, including sea otters and river otters.\n",
"\n",
- "These aquatic mammals have elongated bodies, long tails, and soft, dense fur. In fact, otters have the densest fur of any animal—as many as a million hairs per square inch. Webbed feet and powerful tails make otters strong swimmers.\n",
+ "These aquatic mammals have elongated bodies, long tails, and soft, dense fur"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ ". In fact, otters have the densest fur of any animal—as many as a million hairs per square inch. Webbed feet and powerful tails make otters strong swimmers.\n",
"\n",
"All otters are expert hunters that eat fish, crustaceans, and other critters. Sea otters float on their backs, place a rock on their chest, then smash mollusks down on it until it breaks open. They're also known for being playful animals, engaging in activities like sliding into water on natural slides."
]
@@ -5554,7 +5561,7 @@
"metadata": {},
"outputs": [],
"source": [
- "#| hides\n",
+ "#| hide\n",
"import nbdev; nbdev.nbdev_export()"
]
},
diff --git a/nbs/01_usage.ipynb b/nbs/01_usage.ipynb
new file mode 100644
index 0000000..2581076
--- /dev/null
+++ b/nbs/01_usage.ipynb
@@ -0,0 +1,968 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "3bd3dbfa",
+ "metadata": {},
+ "source": [
+ "# Usage\n",
+ "\n",
+ "> Lisette usage and cost monitoring "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "c2c9427c",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#| default_exp usage"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "5b6856c6",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#| export\n",
+ "from litellm.integrations.custom_logger import CustomLogger\n",
+ "import time\n",
+ "try: \n",
+ " from fastlite import *\n",
+ " from fastlite.core import dataclass\n",
+ "except ImportError: raise ImportError(\"Please install `fastlite` to use sqlite based lisette usage logging.\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "743eedcf",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import litellm, importlib, httpx\n",
+ "from lisette.core import Chat, AsyncChat, patch_litellm\n",
+ "from fastcore.all import *\n",
+ "from cachy import enable_cachy"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "69144ce3",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "enable_cachy()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "2cff8990",
+ "metadata": {},
+ "source": [
+ "## Lisette Usage Logger"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "c9acabfc",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "importlib.reload(litellm); # to re-run the notebook without kernel restart"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "7beb5064",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "patch_litellm()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "aed71558",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#| export\n",
+ "class Usage: id:int; timestamp:float; model:str; user_id:str; prompt_tokens:int; completion_tokens:int; total_tokens:int; cached_tokens:int; cache_creation_tokens:int; cache_read_tokens:int; web_search_requests:int; response_cost:int"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "c9bf5fc1",
+ "metadata": {},
+ "source": [
+ "The precomputed response cost provided is available in `kwargs['response_cost']` according to the [litellm docs](https://docs.litellm.ai/docs/observability/custom_callback#whats-available-in-kwargs):"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "0ad2e088",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#| export\n",
+ "class LisetteUsageLogger(CustomLogger):\n",
+ " def __init__(self, db_path): \n",
+ " self.db = Database(db_path)\n",
+ " self.usage = self.db.create(Usage)\n",
+ " \n",
+ " async def async_log_success_event(self, kwargs, response_obj, start_time, end_time): self._log_usage(response_obj, kwargs['response_cost'], start_time, end_time)\n",
+ " def log_success_event(self, kwargs, response_obj, start_time, end_time): self._log_usage(response_obj, kwargs['response_cost'], start_time, end_time)\n",
+ " def _log_usage(self, response_obj, response_cost, start_time, end_time):\n",
+ " usage = response_obj.usage\n",
+ " ptd = usage.prompt_tokens_details\n",
+ " self.usage.insert(Usage(timestamp=time.time(), model=response_obj.model, user_id=self.user_id_fn(), prompt_tokens=usage.prompt_tokens, completion_tokens=usage.completion_tokens,\n",
+ " total_tokens=usage.total_tokens, cached_tokens=ptd.cached_tokens if ptd else 0, cache_creation_tokens=usage.cache_creation_input_tokens, \n",
+ " cache_read_tokens=usage.cache_read_input_tokens, web_search_requests=nested_idx(usage, 'server_tool_use', 'web_search_requests'), response_cost=response_cost))\n",
+ " \n",
+ " def user_id_fn(self): raise NotImplementedError('Please implement `LisetteUsageLogger.user_id_fn` before initializing, e.g using fastcore.patch.')"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "3bdfd5ca",
+ "metadata": {},
+ "source": [
+ "## Cost Utils"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "5ce652ed",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "class PrefixDict(dict):\n",
+ " def __getitem__(self, key):\n",
+ " if key in self.keys(): return super().__getitem__(key)\n",
+ " for k in self.keys(): \n",
+ " if key.startswith(k): return super().__getitem__(k)\n",
+ " raise KeyError(key)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "847758d3",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "model_prices = PrefixDict({\n",
+ " 'claude-sonnet-4-5': dict(input_prc = 3/1e6, cache_write_prc = 3.75/1e6, cache_read_prc = 0.3/1e6, output_prc = 15/1e6, web_search_prc = 10/1e3)\n",
+ "})"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "42be909f",
+ "metadata": {},
+ "source": [
+ "Simplified cost utils to demonstrate total cost calculation (use `Usage.response_cost` in prod):"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "6624d659",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "@patch(as_prop=True)\n",
+ "def inp_cost(self:Usage): return model_prices[self.model]['input_prc'] * (self.prompt_tokens - self.cache_read_tokens)\n",
+ "@patch(as_prop=True)\n",
+ "def cache_write_cost(self:Usage): return model_prices[self.model]['cache_write_prc'] * self.cache_creation_tokens\n",
+ "@patch(as_prop=True)\n",
+ "def cache_read_cost(self:Usage): return model_prices[self.model]['cache_read_prc'] * self.cache_read_tokens\n",
+ "@patch(as_prop=True)\n",
+ "def out_cost(self:Usage): return model_prices[self.model]['output_prc'] * self.completion_tokens\n",
+ "@patch(as_prop=True)\n",
+ "def web_cost(self:Usage): return model_prices[self.model]['web_search_prc'] * ifnone(self.web_search_requests, 0)\n",
+ "@patch(as_prop=True)\n",
+ "def cost(self:Usage): return self.inp_cost + self.cache_write_cost + self.cache_read_cost + self.out_cost + self.web_cost\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "432ef6d0",
+ "metadata": {},
+ "source": [
+ "A mapping of model pricing is also available in litellm, which is used to calculate the `response_cost`"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "b90af6ed",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "model_pricing = dict2obj(httpx.get(litellm.model_cost_map_url).json())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "35cc0ba6",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/markdown": [
+ "```python\n",
+ "{ 'cache_creation_input_token_cost': 3.75e-06,\n",
+ " 'cache_creation_input_token_cost_above_200k_tokens': 7.5e-06,\n",
+ " 'cache_read_input_token_cost': 3e-07,\n",
+ " 'cache_read_input_token_cost_above_200k_tokens': 6e-07,\n",
+ " 'input_cost_per_token': 3e-06,\n",
+ " 'input_cost_per_token_above_200k_tokens': 6e-06,\n",
+ " 'litellm_provider': 'anthropic',\n",
+ " 'max_input_tokens': 200000,\n",
+ " 'max_output_tokens': 64000,\n",
+ " 'max_tokens': 64000,\n",
+ " 'mode': 'chat',\n",
+ " 'output_cost_per_token': 1.5e-05,\n",
+ " 'output_cost_per_token_above_200k_tokens': 2.25e-05,\n",
+ " 'search_context_cost_per_query': { 'search_context_size_high': 0.01,\n",
+ " 'search_context_size_low': 0.01,\n",
+ " 'search_context_size_medium': 0.01},\n",
+ " 'supports_assistant_prefill': True,\n",
+ " 'supports_computer_use': True,\n",
+ " 'supports_function_calling': True,\n",
+ " 'supports_pdf_input': True,\n",
+ " 'supports_prompt_caching': True,\n",
+ " 'supports_reasoning': True,\n",
+ " 'supports_response_schema': True,\n",
+ " 'supports_tool_choice': True,\n",
+ " 'supports_vision': True,\n",
+ " 'tool_use_system_prompt_tokens': 346}\n",
+ "```"
+ ],
+ "text/plain": [
+ "{'cache_creation_input_token_cost': 3.75e-06,\n",
+ " 'cache_read_input_token_cost': 3e-07,\n",
+ " 'input_cost_per_token': 3e-06,\n",
+ " 'input_cost_per_token_above_200k_tokens': 6e-06,\n",
+ " 'output_cost_per_token_above_200k_tokens': 2.25e-05,\n",
+ " 'cache_creation_input_token_cost_above_200k_tokens': 7.5e-06,\n",
+ " 'cache_read_input_token_cost_above_200k_tokens': 6e-07,\n",
+ " 'litellm_provider': 'anthropic',\n",
+ " 'max_input_tokens': 200000,\n",
+ " 'max_output_tokens': 64000,\n",
+ " 'max_tokens': 64000,\n",
+ " 'mode': 'chat',\n",
+ " 'output_cost_per_token': 1.5e-05,\n",
+ " 'search_context_cost_per_query': {'search_context_size_high': 0.01,\n",
+ " 'search_context_size_low': 0.01,\n",
+ " 'search_context_size_medium': 0.01},\n",
+ " 'supports_assistant_prefill': True,\n",
+ " 'supports_computer_use': True,\n",
+ " 'supports_function_calling': True,\n",
+ " 'supports_pdf_input': True,\n",
+ " 'supports_prompt_caching': True,\n",
+ " 'supports_reasoning': True,\n",
+ " 'supports_response_schema': True,\n",
+ " 'supports_tool_choice': True,\n",
+ " 'supports_vision': True,\n",
+ " 'tool_use_system_prompt_tokens': 346}"
+ ]
+ },
+ "execution_count": null,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "model_pricing['claude-sonnet-4-5']"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "19ff68bd",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/markdown": [
+ "```python\n",
+ "{ 'cache_creation_input_token_cost_above_200k_tokens': 2.5e-07,\n",
+ " 'cache_read_input_token_cost': 2e-07,\n",
+ " 'cache_read_input_token_cost_above_200k_tokens': 4e-07,\n",
+ " 'input_cost_per_token': 2e-06,\n",
+ " 'input_cost_per_token_above_200k_tokens': 4e-06,\n",
+ " 'input_cost_per_token_batches': 1e-06,\n",
+ " 'litellm_provider': 'vertex_ai-language-models',\n",
+ " 'max_audio_length_hours': 8.4,\n",
+ " 'max_audio_per_prompt': 1,\n",
+ " 'max_images_per_prompt': 3000,\n",
+ " 'max_input_tokens': 1048576,\n",
+ " 'max_output_tokens': 65535,\n",
+ " 'max_pdf_size_mb': 30,\n",
+ " 'max_tokens': 65535,\n",
+ " 'max_video_length': 1,\n",
+ " 'max_videos_per_prompt': 10,\n",
+ " 'mode': 'chat',\n",
+ " 'output_cost_per_token': 1.2e-05,\n",
+ " 'output_cost_per_token_above_200k_tokens': 1.8e-05,\n",
+ " 'output_cost_per_token_batches': 6e-06,\n",
+ " 'source': 'https://cloud.google.com/vertex-ai/generative-ai/pricing',\n",
+ " 'supported_endpoints': ['/v1/chat/completions', '/v1/completions', '/v1/batch'],\n",
+ " 'supported_modalities': ['text', 'image', 'audio', 'video'],\n",
+ " 'supported_output_modalities': ['text'],\n",
+ " 'supports_audio_input': True,\n",
+ " 'supports_function_calling': True,\n",
+ " 'supports_pdf_input': True,\n",
+ " 'supports_prompt_caching': True,\n",
+ " 'supports_reasoning': True,\n",
+ " 'supports_response_schema': True,\n",
+ " 'supports_system_messages': True,\n",
+ " 'supports_tool_choice': True,\n",
+ " 'supports_video_input': True,\n",
+ " 'supports_vision': True,\n",
+ " 'supports_web_search': True}\n",
+ "```"
+ ],
+ "text/plain": [
+ "{'cache_read_input_token_cost': 2e-07,\n",
+ " 'cache_read_input_token_cost_above_200k_tokens': 4e-07,\n",
+ " 'cache_creation_input_token_cost_above_200k_tokens': 2.5e-07,\n",
+ " 'input_cost_per_token': 2e-06,\n",
+ " 'input_cost_per_token_above_200k_tokens': 4e-06,\n",
+ " 'input_cost_per_token_batches': 1e-06,\n",
+ " 'litellm_provider': 'vertex_ai-language-models',\n",
+ " 'max_audio_length_hours': 8.4,\n",
+ " 'max_audio_per_prompt': 1,\n",
+ " 'max_images_per_prompt': 3000,\n",
+ " 'max_input_tokens': 1048576,\n",
+ " 'max_output_tokens': 65535,\n",
+ " 'max_pdf_size_mb': 30,\n",
+ " 'max_tokens': 65535,\n",
+ " 'max_video_length': 1,\n",
+ " 'max_videos_per_prompt': 10,\n",
+ " 'mode': 'chat',\n",
+ " 'output_cost_per_token': 1.2e-05,\n",
+ " 'output_cost_per_token_above_200k_tokens': 1.8e-05,\n",
+ " 'output_cost_per_token_batches': 6e-06,\n",
+ " 'source': 'https://cloud.google.com/vertex-ai/generative-ai/pricing',\n",
+ " 'supported_endpoints': (#3) ['/v1/chat/completions','/v1/completions','/v1/batch'],\n",
+ " 'supported_modalities': (#4) ['text','image','audio','video'],\n",
+ " 'supported_output_modalities': (#1) ['text'],\n",
+ " 'supports_audio_input': True,\n",
+ " 'supports_function_calling': True,\n",
+ " 'supports_pdf_input': True,\n",
+ " 'supports_prompt_caching': True,\n",
+ " 'supports_reasoning': True,\n",
+ " 'supports_response_schema': True,\n",
+ " 'supports_system_messages': True,\n",
+ " 'supports_tool_choice': True,\n",
+ " 'supports_video_input': True,\n",
+ " 'supports_vision': True,\n",
+ " 'supports_web_search': True}"
+ ]
+ },
+ "execution_count": null,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "model_pricing['gemini-3-pro-preview']"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "0fd2987b",
+ "metadata": {},
+ "source": [
+ "## Examples"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "47cf6ad5",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "dbfp = Path('.lisette/litellm-usage.db')\n",
+ "dbfp.parent.mkdir(exist_ok=True)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "1e4a50ae",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "@patch\n",
+ "def user_id_fn(self:LisetteUsageLogger): return 'user-123'\n",
+ "logger = LisetteUsageLogger(dbfp)\n",
+ "litellm.callbacks = [logger]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "5842bb0f",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "slc = ','.join('id model user_id prompt_tokens completion_tokens total_tokens cached_tokens cache_creation_tokens cache_read_tokens web_search_requests response_cost'.split())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "ac32ac47",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# litellm.set_verbose = True"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "1d0af81a",
+ "metadata": {},
+ "source": [
+ "A simple example:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "a9215558",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/markdown": [
+ "2+2 = 4\n",
+ "\n",
+ "\n",
+ "\n",
+ "- id: `chatcmpl-xxx`\n",
+ "- model: `claude-sonnet-4-5-20250929`\n",
+ "- finish_reason: `stop`\n",
+ "- usage: `Usage(completion_tokens=11, prompt_tokens=14, total_tokens=25, completion_tokens_details=None, prompt_tokens_details=PromptTokensDetailsWrapper(audio_tokens=None, cached_tokens=0, text_tokens=None, image_tokens=None), cache_creation_input_tokens=0, cache_read_input_tokens=0)`\n",
+ "\n",
+ " "
+ ],
+ "text/plain": [
+ "ModelResponse(id='chatcmpl-xxx', created=1000000000, model='claude-sonnet-4-5-20250929', object='chat.completion', system_fingerprint=None, choices=[Choices(finish_reason='stop', index=0, message=Message(content='2+2 = 4', role='assistant', tool_calls=None, function_call=None, provider_specific_fields={'citations': None, 'thinking_blocks': None}))], usage=Usage(completion_tokens=11, prompt_tokens=14, total_tokens=25, completion_tokens_details=None, prompt_tokens_details=PromptTokensDetailsWrapper(audio_tokens=None, cached_tokens=0, text_tokens=None, image_tokens=None), cache_creation_input_tokens=0, cache_read_input_tokens=0))"
+ ]
+ },
+ "execution_count": null,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "chat = Chat('claude-sonnet-4-5-20250929')\n",
+ "chat(\"What is 2+2?\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "c4b82ed4",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Usage(id=1, timestamp=UNSET, model='claude-sonnet-4-5-20250929', user_id='user-123', prompt_tokens=14, completion_tokens=11, total_tokens=25, cached_tokens=0, cache_creation_tokens=0, cache_read_tokens=0, web_search_requests=None, response_cost=0.000207)"
+ ]
+ },
+ "execution_count": null,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "time.sleep(0.3) # wait for callback db write\n",
+ "u = logger.usage(select=slc)[-1]; u"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "39087125",
+ "metadata": {},
+ "source": [
+ "Our calculated cost matches litellm's `response_cost`. In some cases it might be better to use the custom calculation as we'll see in the remaining of this notebook:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "367cb32f",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "test_eq(u.cost, u.response_cost)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "55735017",
+ "metadata": {},
+ "source": [
+ "Now, let's test with streaming:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "58b8cde6",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "chat = Chat('claude-sonnet-4-5')\n",
+ "res = chat(\"Count from 1 to 5\", stream=True)\n",
+ "for o in res: pass"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "72e8eb30",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Usage(id=2, timestamp=UNSET, model='claude-sonnet-4-5', user_id='user-123', prompt_tokens=15, completion_tokens=17, total_tokens=32, cached_tokens=0, cache_creation_tokens=0, cache_read_tokens=0, web_search_requests=None, response_cost=0.00030000000000000003)"
+ ]
+ },
+ "execution_count": null,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "time.sleep(0.3)\n",
+ "u = logger.usage(select=slc)[-1]; u\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "13fadb74",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "test_eq(u.cost, u.response_cost)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "eab9bb4b",
+ "metadata": {},
+ "source": [
+ "Streaming logged successfully. Let's also verify async chat calls are logged properly."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "5270a8f1",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/markdown": [
+ "3 + 3 = 6\n",
+ "\n",
+ "\n",
+ "\n",
+ "- id: `chatcmpl-xxx`\n",
+ "- model: `claude-sonnet-4-5-20250929`\n",
+ "- finish_reason: `stop`\n",
+ "- usage: `Usage(completion_tokens=13, prompt_tokens=14, total_tokens=27, completion_tokens_details=None, prompt_tokens_details=PromptTokensDetailsWrapper(audio_tokens=None, cached_tokens=0, text_tokens=None, image_tokens=None), cache_creation_input_tokens=0, cache_read_input_tokens=0)`\n",
+ "\n",
+ " "
+ ],
+ "text/plain": [
+ "ModelResponse(id='chatcmpl-xxx', created=1000000000, model='claude-sonnet-4-5-20250929', object='chat.completion', system_fingerprint=None, choices=[Choices(finish_reason='stop', index=0, message=Message(content='3 + 3 = 6', role='assistant', tool_calls=None, function_call=None, provider_specific_fields={'citations': None, 'thinking_blocks': None}))], usage=Usage(completion_tokens=13, prompt_tokens=14, total_tokens=27, completion_tokens_details=None, prompt_tokens_details=PromptTokensDetailsWrapper(audio_tokens=None, cached_tokens=0, text_tokens=None, image_tokens=None), cache_creation_input_tokens=0, cache_read_input_tokens=0))"
+ ]
+ },
+ "execution_count": null,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "chat_async = AsyncChat('claude-sonnet-4-5-20250929')\n",
+ "await chat_async(\"What is 3+3?\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "e7a75d42",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Usage(id=3, timestamp=UNSET, model='claude-sonnet-4-5-20250929', user_id='user-123', prompt_tokens=14, completion_tokens=13, total_tokens=27, cached_tokens=0, cache_creation_tokens=0, cache_read_tokens=0, web_search_requests=None, response_cost=0.00023700000000000001)"
+ ]
+ },
+ "execution_count": null,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "time.sleep(0.3)\n",
+ "u = logger.usage(select=slc)[-1]; u"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "1916085a",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "test_eq(u.cost, u.response_cost)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "3f82d440",
+ "metadata": {},
+ "source": [
+ "Finally, let's test async streaming to ensure all API patterns are covered."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "d7791bff",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "ModelResponse(id='chatcmpl-xxx', created=1000000000, model='claude-sonnet-4-5-20250929', object='chat.completion', system_fingerprint=None, choices=[Choices(finish_reason='stop', index=0, message=Message(content='10, 11, 12, 13, 14, 15', role='assistant', tool_calls=None, function_call=None, provider_specific_fields=None))], usage=Usage(completion_tokens=20, prompt_tokens=38, total_tokens=58, completion_tokens_details=CompletionTokensDetailsWrapper(accepted_prediction_tokens=None, audio_tokens=None, reasoning_tokens=0, rejected_prediction_tokens=None, text_tokens=None), prompt_tokens_details=None))\n"
+ ]
+ }
+ ],
+ "source": [
+ "res = await chat_async(\"Count from 10 to 15\", stream=True)\n",
+ "async for o in res: pass\n",
+ "print(o)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "abd6b744",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Usage(id=4, timestamp=UNSET, model='claude-sonnet-4-5-20250929', user_id='user-123', prompt_tokens=38, completion_tokens=20, total_tokens=58, cached_tokens=0, cache_creation_tokens=0, cache_read_tokens=0, web_search_requests=None, response_cost=0.00041400000000000003)"
+ ]
+ },
+ "execution_count": null,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "time.sleep(0.3)\n",
+ "u = logger.usage(select=slc)[-1]; u"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "85ea9299",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "test_eq(u.cost, u.response_cost)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "bc60ec86",
+ "metadata": {},
+ "source": [
+ "Now let's run a prompt with web search:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "d76d3c46",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/markdown": [
+ "Based on the latest weather information for New York City:\n",
+ "\n",
+ "**Today (Friday, November 15):**\n",
+ "Sunshine early followed by cloudy skies this afternoon, with a high near 52°F and winds from the southwest at 5 to 10 mph.\n",
+ "\n",
+ "**Tonight:**\n",
+ "Light rain early, then remaining cloudy with a low around 50°F, winds from the southwest at 10 to 15 mph, and a 70% chance of rain.\n",
+ "\n",
+ "The city recently experienced its first snow of the season on Tuesday, bringing a taste of winter to November. Air quality has also been a concern, with pollution reaching unhealthy levels for sensitive groups.\n",
+ "\n",
+ "\n",
+ "\n",
+ "- id: `chatcmpl-xxx`\n",
+ "- model: `claude-sonnet-4-5-20250929`\n",
+ "- finish_reason: `stop`\n",
+ "- usage: `Usage(completion_tokens=286, prompt_tokens=9395, total_tokens=9681, completion_tokens_details=None, prompt_tokens_details=PromptTokensDetailsWrapper(audio_tokens=None, cached_tokens=0, text_tokens=None, image_tokens=None), server_tool_use=ServerToolUse(web_search_requests=1), cache_creation_input_tokens=0, cache_read_input_tokens=0)`\n",
+ "\n",
+ " "
+ ],
+ "text/plain": [
+ "ModelResponse(id='chatcmpl-xxx', created=1000000000, model='claude-sonnet-4-5-20250929', object='chat.completion', system_fingerprint=None, choices=[Choices(finish_reason='stop', index=0, message=Message(content='Based on the latest weather information for New York City:\\n\\n**Today (Friday, November 15):**\\nSunshine early followed by cloudy skies this afternoon, with a high near 52°F and winds from the southwest at 5 to 10 mph.\\n\\n**Tonight:**\\nLight rain early, then remaining cloudy with a low around 50°F, winds from the southwest at 10 to 15 mph, and a 70% chance of rain.\\n\\nThe city recently experienced its first snow of the season on Tuesday, bringing a taste of winter to November. Air quality has also been a concern, with pollution reaching unhealthy levels for sensitive groups.', role='assistant', tool_calls=None, function_call=None, provider_specific_fields={'citations': [[{'type': 'web_search_result_location', 'cited_text': 'zoom out · Showing Stations · Hourly Forecast for Today, Saturday 11/15Hourly for Today, Sat 11/15 · Today 11/15 · 4% / 0 in · Sunshine early followed...', 'url': 'https://www.wunderground.com/hourly/us/ny/new-york-city', 'title': 'New York City, NY Hourly Weather Forecast | Weather Underground', 'encrypted_index': 'EpEBCioICRgCIiQ4ODk4YTFkYy0yMTNkLTRhNmYtOTljYi03ZTBlNTUzZDc0NWISDHpSHBPOt9imlciNuBoMUT6RMMjfmqjUZ8UKIjD4etUhyewZFpGyyhF4dxcw67W5UZXDdBvjUaknHDPKRDkfv8euqbEmrMbwmPhWgQAqFSEOuv0dZAdfJp1/FaZs6YJwh29DWxgE'}], [{'type': 'web_search_result_location', 'cited_text': 'Tonight 11/15 · 68 % / 0.09 in · Light rain early. ', 'url': 'https://www.wunderground.com/hourly/us/ny/new-york-city', 'title': 'New York City, NY Hourly Weather Forecast | Weather Underground', 'encrypted_index': 'Eo8BCioICRgCIiQ4ODk4YTFkYy0yMTNkLTRhNmYtOTljYi03ZTBlNTUzZDc0NWISDIOBH1Q99Syc109nFBoMp/SiHQIcWDVzkGlnIjBBrl0Q/0uGredDx3q3IaYsvCb2Wd3yBAp6QP6fsIshlZBxm0RvK2LBiNthVL7FsWsqE/FjLWVS9/3zYpubW/H2EagTEVYYBA=='}, {'type': 'web_search_result_location', 'cited_text': 'Low around 50F. Winds SW at 10 to 15 mph. Chance of rain 70%. ', 'url': 'https://www.wunderground.com/hourly/us/ny/new-york-city', 'title': 'New York City, NY Hourly Weather Forecast | Weather Underground', 'encrypted_index': 'EpIBCioICRgCIiQ4ODk4YTFkYy0yMTNkLTRhNmYtOTljYi03ZTBlNTUzZDc0NWISDFdov2C9DPa6WjL/RxoMK5/tDBMkR5kigEzWIjCMcSgmDV7Tk2jgoIJG+3ktjqC8uEREipobaAOiLBJAMwovj66H+dnjh+Dw7Oz9z3cqFvEMomtDbMXpXF/CTQHhl4613xt5vzcYBA=='}], [{'type': 'web_search_result_location', 'cited_text': 'NYC woke up to its first snow of the season on Tuesday, as a burst of wintry showers moved through during the morning commute, bringing slick spots an...', 'url': 'https://www.fox5ny.com/weather/nyc-sees-first-snow-season-cold-november-air-moves', 'title': 'Is it snowing in NYC? Big Apple sees first flurries of the season | FOX 5 New York', 'encrypted_index': 'Eo8BCioICRgCIiQ4ODk4YTFkYy0yMTNkLTRhNmYtOTljYi03ZTBlNTUzZDc0NWISDJVlwE07RVLcjTrlohoMx4dAxCScm9Oq5HJ0IjBldlZQKn0ORI++QGeyxWYHR4MLIdNUQ63al9ILyeoKL+H+Z4SNvfoNAFhSNio3nuAqE3LICOeepRMyfdYpTEgG2imjahcYBA=='}], [{'type': 'web_search_result_location', 'cited_text': 'The air has reached a high level of pollution and is unhealthy for sensitive groups. ', 'url': 'https://www.accuweather.com/en/us/new-york/10021/weather-forecast/14-349727_1_al', 'title': 'New York City, NY Weather Forecast | AccuWeather', 'encrypted_index': 'Eo8BCioICRgCIiQ4ODk4YTFkYy0yMTNkLTRhNmYtOTljYi03ZTBlNTUzZDc0NWISDHU9txFYaPTut9vOIRoMmwxQ88FmlAQnbUBAIjBc9OwHBdDZb4vWThl3Z/v/7zqHbtNd/d42rlsnxc9Xrhu53Njt7DJmaf4AqgwFCuEqE/Ehz3pIPcs8oQWAfgvqSHLCSMoYBA=='}]], 'thinking_blocks': None}))], usage=Usage(completion_tokens=286, prompt_tokens=9395, total_tokens=9681, completion_tokens_details=None, prompt_tokens_details=PromptTokensDetailsWrapper(audio_tokens=None, cached_tokens=0, text_tokens=None, image_tokens=None), server_tool_use=ServerToolUse(web_search_requests=1), cache_creation_input_tokens=0, cache_read_input_tokens=0))"
+ ]
+ },
+ "execution_count": null,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "chat = Chat('claude-sonnet-4-5-20250929')\n",
+ "chat(\"What is the weather like in NYC? Search web.\", search=\"m\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "fb86f247",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Usage(id=5, timestamp=UNSET, model='claude-sonnet-4-5-20250929', user_id='user-123', prompt_tokens=9395, completion_tokens=286, total_tokens=9681, cached_tokens=0, cache_creation_tokens=0, cache_read_tokens=0, web_search_requests=1, response_cost=0.032475000000000004)"
+ ]
+ },
+ "execution_count": null,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "time.sleep(0.3)\n",
+ "u = logger.usage(select=slc)[-1]; u"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "3f7d58e3",
+ "metadata": {},
+ "source": [
+ "::: {.callout-important}\n",
+ "Litellm's `response_cost` doesn't take search request cost into account!\n",
+ ":::"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "c9a35480",
+ "metadata": {},
+ "source": [
+ "Now, this is a case where using the custom calculations is better as it will also include the web search request cost:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "dc09ac23",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "test_eq(u.cost, u.response_cost + model_prices[u.model]['web_search_prc'])"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "f8929bae",
+ "metadata": {},
+ "source": [
+ "Web search with streaming:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "ff66e5d8",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "chat = Chat('claude-sonnet-4-5')\n",
+ "res = chat(\"What is the weather like in NYC? Search web.\", search=\"m\", stream=True)\n",
+ "for o in res: pass\n",
+ "# print(o)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "4ee0c219",
+ "metadata": {},
+ "source": [
+ "::: {.callout-important}\n",
+ "Web search requests are not included in usage when `stream=True`. Here is an open [Issue](https://github.com/BerriAI/litellm/issues/16631)\n",
+ ":::\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "85cf0c2c",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Usage(id=6, timestamp=UNSET, model='claude-sonnet-4-5', user_id='user-123', prompt_tokens=9395, completion_tokens=280, total_tokens=9675, cached_tokens=0, cache_creation_tokens=0, cache_read_tokens=0, web_search_requests=None, response_cost=0.032385000000000004)"
+ ]
+ },
+ "execution_count": null,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "time.sleep(0.3)\n",
+ "u = logger.usage(select=slc)[-1]; u"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "83ce4d9b",
+ "metadata": {},
+ "source": [
+ "Once this [PR](https://github.com/BerriAI/litellm/pull/16826) is merged `web_search_requests` will be included with `stream=True`, and the following test should pass:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "23f51ee5",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# test_eq(u.cost, u.response_cost + model_prices[u.model]['web_search_prc'])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "13257134",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# u.cost"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "cc36e5e8",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "test_eq(len(logger.usage()), 6)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "126e3064",
+ "metadata": {},
+ "source": [
+ "Let's implement a utility to get the total cost including web search:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "9ef8ac82",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#| export\n",
+ "@patch\n",
+ "def total_cost(self:Usage, sc=0.01): return self.response_cost + sc * ifnone(self.web_search_requests, 0)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "661b19b5",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# test_close((L(logger.usage()).map(lambda o:o.total_cost(sc=0.01)).sum()), 0.086, 1e-3)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "b3577f0e",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "dbfp.parent.delete()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "41e335da",
+ "metadata": {},
+ "source": [
+ "# Export -"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "2b59b8b0",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#| hide\n",
+ "import nbdev; nbdev.nbdev_export()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "d620e45d",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {},
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/nbs/sidebar.yml b/nbs/sidebar.yml
index 45222d2..a443ffe 100644
--- a/nbs/sidebar.yml
+++ b/nbs/sidebar.yml
@@ -3,3 +3,4 @@ website:
contents:
- index.ipynb
- 00_core.ipynb
+ - 01_usage.ipynb