Skip to content

Commit ac4f32f

Browse files
Cost tracking for gemini-2.5-pro (#9837)
* build(model_prices_and_context_window.json): add google/gemini-2.0-flash-lite-001 versioned pricing Closes #9829 * build(model_prices_and_context_window.json): add initial support for 'supported_output_modalities' param * build(model_prices_and_context_window.json): add initial support for 'supported_output_modalities' param * build(model_prices_and_context_window.json): add supported endpoints to gemini-2.5-pro * build(model_prices_and_context_window.json): add gemini 200k+ pricing * feat(utils.py): support cost calculation for gemini-2.5-pro above 200k tokens Fixes #9807 * build: test dockerfile change * build: revert apk change * ci(config.yml): pip install wheel * ci: test problematic package first * ci(config.yml): pip install only binary * ci: try more things * ci: test different ml_dtypes version * ci(config.yml): check ml_dtypes==0.4.0 * ci: test * ci: cleanup config.yml * ci: specify ml dtypes in requirements.txt * ci: remove redisvl depedency (temporary) * fix: fix linting errors * test: update test * test: fix test
1 parent 4c1bb74 commit ac4f32f

File tree

10 files changed

+254
-94
lines changed

10 files changed

+254
-94
lines changed

.circleci/config.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1127,6 +1127,7 @@ jobs:
11271127
name: Install Dependencies
11281128
command: |
11291129
python -m pip install --upgrade pip
1130+
python -m pip install wheel setuptools
11301131
python -m pip install -r requirements.txt
11311132
pip install "pytest==7.3.1"
11321133
pip install "pytest-retry==1.6.3"

litellm/litellm_core_utils/llm_cost_calc/utils.py

Lines changed: 38 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -90,35 +90,45 @@ def _generic_cost_per_character(
9090
return prompt_cost, completion_cost
9191

9292

93-
def _get_prompt_token_base_cost(model_info: ModelInfo, usage: Usage) -> float:
93+
def _get_token_base_cost(model_info: ModelInfo, usage: Usage) -> Tuple[float, float]:
9494
"""
9595
Return prompt cost for a given model and usage.
9696
97-
If input_tokens > 128k and `input_cost_per_token_above_128k_tokens` is set, then we use the `input_cost_per_token_above_128k_tokens` field.
97+
If input_tokens > threshold and `input_cost_per_token_above_[x]k_tokens` or `input_cost_per_token_above_[x]_tokens` is set,
98+
then we use the corresponding threshold cost.
9899
"""
99-
input_cost_per_token_above_128k_tokens = model_info.get(
100-
"input_cost_per_token_above_128k_tokens"
101-
)
102-
if _is_above_128k(usage.prompt_tokens) and input_cost_per_token_above_128k_tokens:
103-
return input_cost_per_token_above_128k_tokens
104-
return model_info["input_cost_per_token"]
105-
106-
107-
def _get_completion_token_base_cost(model_info: ModelInfo, usage: Usage) -> float:
108-
"""
109-
Return prompt cost for a given model and usage.
110-
111-
If input_tokens > 128k and `input_cost_per_token_above_128k_tokens` is set, then we use the `input_cost_per_token_above_128k_tokens` field.
112-
"""
113-
output_cost_per_token_above_128k_tokens = model_info.get(
114-
"output_cost_per_token_above_128k_tokens"
115-
)
116-
if (
117-
_is_above_128k(usage.completion_tokens)
118-
and output_cost_per_token_above_128k_tokens
119-
):
120-
return output_cost_per_token_above_128k_tokens
121-
return model_info["output_cost_per_token"]
100+
prompt_base_cost = model_info["input_cost_per_token"]
101+
completion_base_cost = model_info["output_cost_per_token"]
102+
103+
## CHECK IF ABOVE THRESHOLD
104+
threshold: Optional[float] = None
105+
for key, value in sorted(model_info.items(), reverse=True):
106+
if key.startswith("input_cost_per_token_above_") and value is not None:
107+
try:
108+
# Handle both formats: _above_128k_tokens and _above_128_tokens
109+
threshold_str = key.split("_above_")[1].split("_tokens")[0]
110+
threshold = float(threshold_str.replace("k", "")) * (
111+
1000 if "k" in threshold_str else 1
112+
)
113+
if usage.prompt_tokens > threshold:
114+
prompt_base_cost = cast(
115+
float,
116+
model_info.get(key, prompt_base_cost),
117+
)
118+
completion_base_cost = cast(
119+
float,
120+
model_info.get(
121+
f"output_cost_per_token_above_{threshold_str}_tokens",
122+
completion_base_cost,
123+
),
124+
)
125+
break
126+
except (IndexError, ValueError):
127+
continue
128+
except Exception:
129+
continue
130+
131+
return prompt_base_cost, completion_base_cost
122132

123133

124134
def calculate_cost_component(
@@ -215,7 +225,9 @@ def generic_cost_per_token(
215225
if text_tokens == 0:
216226
text_tokens = usage.prompt_tokens - cache_hit_tokens - audio_tokens
217227

218-
prompt_base_cost = _get_prompt_token_base_cost(model_info=model_info, usage=usage)
228+
prompt_base_cost, completion_base_cost = _get_token_base_cost(
229+
model_info=model_info, usage=usage
230+
)
219231

220232
prompt_cost = float(text_tokens) * prompt_base_cost
221233

@@ -253,9 +265,6 @@ def generic_cost_per_token(
253265
)
254266

255267
## CALCULATE OUTPUT COST
256-
completion_base_cost = _get_completion_token_base_cost(
257-
model_info=model_info, usage=usage
258-
)
259268
text_tokens = usage.completion_tokens
260269
audio_tokens = 0
261270
if usage.completion_tokens_details is not None:

litellm/model_prices_and_context_window_backup.json

Lines changed: 68 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -380,6 +380,7 @@
380380
"supports_tool_choice": true,
381381
"supports_native_streaming": false,
382382
"supported_modalities": ["text", "image"],
383+
"supported_output_modalities": ["text"],
383384
"supported_endpoints": ["/v1/responses", "/v1/batch"]
384385
},
385386
"o1-pro-2025-03-19": {
@@ -401,6 +402,7 @@
401402
"supports_tool_choice": true,
402403
"supports_native_streaming": false,
403404
"supported_modalities": ["text", "image"],
405+
"supported_output_modalities": ["text"],
404406
"supported_endpoints": ["/v1/responses", "/v1/batch"]
405407
},
406408
"o1": {
@@ -4512,20 +4514,10 @@
45124514
"max_audio_length_hours": 8.4,
45134515
"max_audio_per_prompt": 1,
45144516
"max_pdf_size_mb": 30,
4515-
"input_cost_per_image": 0,
4516-
"input_cost_per_video_per_second": 0,
4517-
"input_cost_per_audio_per_second": 0,
4518-
"input_cost_per_token": 0,
4519-
"input_cost_per_character": 0,
4520-
"input_cost_per_token_above_128k_tokens": 0,
4521-
"input_cost_per_character_above_128k_tokens": 0,
4522-
"input_cost_per_image_above_128k_tokens": 0,
4523-
"input_cost_per_video_per_second_above_128k_tokens": 0,
4524-
"input_cost_per_audio_per_second_above_128k_tokens": 0,
4525-
"output_cost_per_token": 0,
4526-
"output_cost_per_character": 0,
4527-
"output_cost_per_token_above_128k_tokens": 0,
4528-
"output_cost_per_character_above_128k_tokens": 0,
4517+
"input_cost_per_token": 0.00000125,
4518+
"input_cost_per_token_above_200k_tokens": 0.0000025,
4519+
"output_cost_per_token": 0.00001,
4520+
"output_cost_per_token_above_200k_tokens": 0.000015,
45294521
"litellm_provider": "vertex_ai-language-models",
45304522
"mode": "chat",
45314523
"supports_system_messages": true,
@@ -4536,6 +4528,9 @@
45364528
"supports_pdf_input": true,
45374529
"supports_response_schema": true,
45384530
"supports_tool_choice": true,
4531+
"supported_endpoints": ["/v1/chat/completions", "/v1/completions"],
4532+
"supported_modalities": ["text", "image", "audio", "video"],
4533+
"supported_output_modalities": ["text"],
45394534
"source": "https://cloud.google.com/vertex-ai/generative-ai/pricing"
45404535
},
45414536
"gemini-2.0-pro-exp-02-05": {
@@ -4548,20 +4543,10 @@
45484543
"max_audio_length_hours": 8.4,
45494544
"max_audio_per_prompt": 1,
45504545
"max_pdf_size_mb": 30,
4551-
"input_cost_per_image": 0,
4552-
"input_cost_per_video_per_second": 0,
4553-
"input_cost_per_audio_per_second": 0,
4554-
"input_cost_per_token": 0,
4555-
"input_cost_per_character": 0,
4556-
"input_cost_per_token_above_128k_tokens": 0,
4557-
"input_cost_per_character_above_128k_tokens": 0,
4558-
"input_cost_per_image_above_128k_tokens": 0,
4559-
"input_cost_per_video_per_second_above_128k_tokens": 0,
4560-
"input_cost_per_audio_per_second_above_128k_tokens": 0,
4561-
"output_cost_per_token": 0,
4562-
"output_cost_per_character": 0,
4563-
"output_cost_per_token_above_128k_tokens": 0,
4564-
"output_cost_per_character_above_128k_tokens": 0,
4546+
"input_cost_per_token": 0.00000125,
4547+
"input_cost_per_token_above_200k_tokens": 0.0000025,
4548+
"output_cost_per_token": 0.00001,
4549+
"output_cost_per_token_above_200k_tokens": 0.000015,
45654550
"litellm_provider": "vertex_ai-language-models",
45664551
"mode": "chat",
45674552
"supports_system_messages": true,
@@ -4572,6 +4557,9 @@
45724557
"supports_pdf_input": true,
45734558
"supports_response_schema": true,
45744559
"supports_tool_choice": true,
4560+
"supported_endpoints": ["/v1/chat/completions", "/v1/completions"],
4561+
"supported_modalities": ["text", "image", "audio", "video"],
4562+
"supported_output_modalities": ["text"],
45754563
"source": "https://cloud.google.com/vertex-ai/generative-ai/pricing"
45764564
},
45774565
"gemini-2.0-flash-exp": {
@@ -4605,6 +4593,8 @@
46054593
"supports_vision": true,
46064594
"supports_response_schema": true,
46074595
"supports_audio_output": true,
4596+
"supported_modalities": ["text", "image", "audio", "video"],
4597+
"supported_output_modalities": ["text", "image"],
46084598
"source": "https://cloud.google.com/vertex-ai/generative-ai/pricing",
46094599
"supports_tool_choice": true
46104600
},
@@ -4629,6 +4619,8 @@
46294619
"supports_response_schema": true,
46304620
"supports_audio_output": true,
46314621
"supports_tool_choice": true,
4622+
"supported_modalities": ["text", "image", "audio", "video"],
4623+
"supported_output_modalities": ["text", "image"],
46324624
"source": "https://cloud.google.com/vertex-ai/generative-ai/pricing"
46334625
},
46344626
"gemini-2.0-flash-thinking-exp": {
@@ -4662,6 +4654,8 @@
46624654
"supports_vision": true,
46634655
"supports_response_schema": true,
46644656
"supports_audio_output": true,
4657+
"supported_modalities": ["text", "image", "audio", "video"],
4658+
"supported_output_modalities": ["text", "image"],
46654659
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash",
46664660
"supports_tool_choice": true
46674661
},
@@ -4696,6 +4690,8 @@
46964690
"supports_vision": true,
46974691
"supports_response_schema": false,
46984692
"supports_audio_output": false,
4693+
"supported_modalities": ["text", "image", "audio", "video"],
4694+
"supported_output_modalities": ["text", "image"],
46994695
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash",
47004696
"supports_tool_choice": true
47014697
},
@@ -4721,6 +4717,7 @@
47214717
"supports_audio_output": true,
47224718
"supports_audio_input": true,
47234719
"supported_modalities": ["text", "image", "audio", "video"],
4720+
"supported_output_modalities": ["text", "image"],
47244721
"supports_tool_choice": true,
47254722
"source": "https://ai.google.dev/pricing#2_0flash"
47264723
},
@@ -4743,6 +4740,32 @@
47434740
"supports_vision": true,
47444741
"supports_response_schema": true,
47454742
"supports_audio_output": true,
4743+
"supported_modalities": ["text", "image", "audio", "video"],
4744+
"supported_output_modalities": ["text"],
4745+
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash",
4746+
"supports_tool_choice": true
4747+
},
4748+
"gemini-2.0-flash-lite-001": {
4749+
"max_input_tokens": 1048576,
4750+
"max_output_tokens": 8192,
4751+
"max_images_per_prompt": 3000,
4752+
"max_videos_per_prompt": 10,
4753+
"max_video_length": 1,
4754+
"max_audio_length_hours": 8.4,
4755+
"max_audio_per_prompt": 1,
4756+
"max_pdf_size_mb": 50,
4757+
"input_cost_per_audio_token": 0.000000075,
4758+
"input_cost_per_token": 0.000000075,
4759+
"output_cost_per_token": 0.0000003,
4760+
"litellm_provider": "vertex_ai-language-models",
4761+
"mode": "chat",
4762+
"supports_system_messages": true,
4763+
"supports_function_calling": true,
4764+
"supports_vision": true,
4765+
"supports_response_schema": true,
4766+
"supports_audio_output": true,
4767+
"supported_modalities": ["text", "image", "audio", "video"],
4768+
"supported_output_modalities": ["text"],
47464769
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash",
47474770
"supports_tool_choice": true
47484771
},
@@ -4808,6 +4831,7 @@
48084831
"supports_audio_output": true,
48094832
"supports_audio_input": true,
48104833
"supported_modalities": ["text", "image", "audio", "video"],
4834+
"supported_output_modalities": ["text", "image"],
48114835
"supports_tool_choice": true,
48124836
"source": "https://ai.google.dev/pricing#2_0flash"
48134837
},
@@ -4833,6 +4857,8 @@
48334857
"supports_response_schema": true,
48344858
"supports_audio_output": true,
48354859
"supports_tool_choice": true,
4860+
"supported_modalities": ["text", "image", "audio", "video"],
4861+
"supported_output_modalities": ["text"],
48364862
"source": "https://ai.google.dev/gemini-api/docs/pricing#gemini-2.0-flash-lite"
48374863
},
48384864
"gemini/gemini-2.0-flash-001": {
@@ -4858,6 +4884,8 @@
48584884
"supports_response_schema": true,
48594885
"supports_audio_output": false,
48604886
"supports_tool_choice": true,
4887+
"supported_modalities": ["text", "image", "audio", "video"],
4888+
"supported_output_modalities": ["text", "image"],
48614889
"source": "https://ai.google.dev/pricing#2_0flash"
48624890
},
48634891
"gemini/gemini-2.5-pro-preview-03-25": {
@@ -4872,9 +4900,9 @@
48724900
"max_pdf_size_mb": 30,
48734901
"input_cost_per_audio_token": 0.0000007,
48744902
"input_cost_per_token": 0.00000125,
4875-
"input_cost_per_token_above_128k_tokens": 0.0000025,
4903+
"input_cost_per_token_above_200k_tokens": 0.0000025,
48764904
"output_cost_per_token": 0.0000010,
4877-
"output_cost_per_token_above_128k_tokens": 0.000015,
4905+
"output_cost_per_token_above_200k_tokens": 0.000015,
48784906
"litellm_provider": "gemini",
48794907
"mode": "chat",
48804908
"rpm": 10000,
@@ -4885,6 +4913,8 @@
48854913
"supports_response_schema": true,
48864914
"supports_audio_output": false,
48874915
"supports_tool_choice": true,
4916+
"supported_modalities": ["text", "image", "audio", "video"],
4917+
"supported_output_modalities": ["text"],
48884918
"source": "https://ai.google.dev/gemini-api/docs/pricing#gemini-2.5-pro-preview"
48894919
},
48904920
"gemini/gemini-2.0-flash-exp": {
@@ -4920,6 +4950,8 @@
49204950
"supports_audio_output": true,
49214951
"tpm": 4000000,
49224952
"rpm": 10,
4953+
"supported_modalities": ["text", "image", "audio", "video"],
4954+
"supported_output_modalities": ["text", "image"],
49234955
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash",
49244956
"supports_tool_choice": true
49254957
},
@@ -4946,6 +4978,8 @@
49464978
"supports_response_schema": true,
49474979
"supports_audio_output": false,
49484980
"supports_tool_choice": true,
4981+
"supported_modalities": ["text", "image", "audio", "video"],
4982+
"supported_output_modalities": ["text"],
49494983
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash-lite"
49504984
},
49514985
"gemini/gemini-2.0-flash-thinking-exp": {
@@ -4981,6 +5015,8 @@
49815015
"supports_audio_output": true,
49825016
"tpm": 4000000,
49835017
"rpm": 10,
5018+
"supported_modalities": ["text", "image", "audio", "video"],
5019+
"supported_output_modalities": ["text", "image"],
49845020
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash",
49855021
"supports_tool_choice": true
49865022
},
@@ -5017,6 +5053,8 @@
50175053
"supports_audio_output": true,
50185054
"tpm": 4000000,
50195055
"rpm": 10,
5056+
"supported_modalities": ["text", "image", "audio", "video"],
5057+
"supported_output_modalities": ["text", "image"],
50205058
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash",
50215059
"supports_tool_choice": true
50225060
},

litellm/proxy/_new_secret_config.yaml

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -29,10 +29,14 @@ model_list:
2929
model: databricks/databricks-claude-3-7-sonnet
3030
api_key: os.environ/DATABRICKS_API_KEY
3131
api_base: os.environ/DATABRICKS_API_BASE
32-
- model_name: "gemini/gemini-2.0-flash"
33-
litellm_params:
34-
model: gemini/gemini-2.0-flash
35-
api_key: os.environ/GEMINI_API_KEY
32+
- model_name: "llmaas-meta/llama-3.1-8b-instruct"
33+
litellm_params:
34+
model: nvidia_nim/meta/llama-3.3-70b-instruct
35+
api_key: "invalid"
36+
api_base: "http://0.0.0.0:8090"
37+
model_info:
38+
input_cost_per_token: "100"
39+
output_cost_per_token: "100"
3640

3741
litellm_settings:
3842
num_retries: 0

litellm/types/utils.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,9 @@ class ModelInfoBase(ProviderSpecificModelInfo, total=False):
120120
input_cost_per_character: Optional[float] # only for vertex ai models
121121
input_cost_per_audio_token: Optional[float]
122122
input_cost_per_token_above_128k_tokens: Optional[float] # only for vertex ai models
123+
input_cost_per_token_above_200k_tokens: Optional[
124+
float
125+
] # only for vertex ai gemini-2.5-pro models
123126
input_cost_per_character_above_128k_tokens: Optional[
124127
float
125128
] # only for vertex ai models
@@ -136,6 +139,9 @@ class ModelInfoBase(ProviderSpecificModelInfo, total=False):
136139
output_cost_per_token_above_128k_tokens: Optional[
137140
float
138141
] # only for vertex ai models
142+
output_cost_per_token_above_200k_tokens: Optional[
143+
float
144+
] # only for vertex ai gemini-2.5-pro models
139145
output_cost_per_character_above_128k_tokens: Optional[
140146
float
141147
] # only for vertex ai models

0 commit comments

Comments
 (0)