Litellm stable release 06 14 2025 (#11737)

krrishdholakia · web-flow · commit 0908618a1933 · 2025-06-14T16:56:29.000-07:00
* docs: initial commit with stable release changelog notes

* docs: style updates

* docs(index.md): updated changelog

* docs(index.md): cleanup

* docs(index.md): add general proxy improvements

* docs: index.md

cleanup
diff --git a/docs/my-website/release_notes/v1.72.6-stable/index.md b/docs/my-website/release_notes/v1.72.6-stable/index.md
@@ -0,0 +1,242 @@
+---
+title: "[PRE-RELEASE] v1.72.6-stable"
+slug: "v1-72-6-stable"
+date: 2025-06-14T10:00:00
+authors:
+  - name: Krrish Dholakia
+    title: CEO, LiteLLM
+    url: https://www.linkedin.com/in/krish-d/
+    image_url: https://pbs.twimg.com/profile_images/1298587542745358340/DZv3Oj-h_400x400.jpg
+  - name: Ishaan Jaffer
+    title: CTO, LiteLLM
+    url: https://www.linkedin.com/in/reffajnaahsi/
+    image_url: https://pbs.twimg.com/profile_images/1613813310264340481/lz54oEiB_400x400.jpg
+
+hide_table_of_contents: false
+---
+
+import Image from '@theme/IdealImage';
+import Tabs from '@theme/Tabs';
+import TabItem from '@theme/TabItem';
+
+
+## Deploy this version
+
+:::info
+
+This version is not out yet. 
+
+:::
+
+
+## TLDR
+
+* **Why Upgrade**
+
+* **Who Should Read**
+* **Risk of Upgrade**
+
+
+
+---
+
+## Key Highlights
+
+---
+
+
+## New / Updated Models
+
+### Pricing / Context Window Updates
+
+| Provider    | Model                                  | Context Window | Input ($/1M tokens) | Output ($/1M tokens) | Type |
+| ----------- | -------------------------------------- | -------------- | ------------------- | -------------------- | -------------------- |
+| VertexAI   | `vertex_ai/claude-opus-4`               | 200K           | $15.00              | $75.00               | New |
+| OpenAI   | `gpt-4o-audio-preview-2025-06-03`             | 128k           | $2.5 (text), $40 (audio)              | $10 (text), $80 (audio)               | New |
+| OpenAI | `o3-pro` | 200k | 20 | 80 | New |
+| OpenAI | `o3-pro-2025-06-10` | 200k | 20 | 80 | New |
+| OpenAI | `o3` | 200k | 2 | 8 | Updated |
+| OpenAI | `o3-2025-04-16` | 200k | 2 | 8 | Updated |
+| Azure | `azure/gpt-4o-mini-transcribe` | 16k | 1.25 (text), 3 (audio) | 5 (text) | New |
+| Mistral | `mistral/magistral-medium-latest` | 40k | 2 | 5 | New |
+| Mistral | `mistral/magistral-small-latest` | 40k | 0.5 | 1.5 | New |
+
+- Deepgram: `nova-3` cost per second pricing is [now supported](https://github.com/BerriAI/litellm/pull/11634).
+
+### Updated Models
+#### Bugs
+- **Watsonx**
+    - Ignore space id on Watsonx deployments (throws json errors) - [PR](https://github.com/BerriAI/litellm/pull/11527)
+- **Ollama**
+    - Set tool call id for streaming calls - [PR](https://github.com/BerriAI/litellm/pull/11528)
+- **Gemini (VertexAI + Google AI Studio)**
+    - Fix tool call indexes - [PR](https://github.com/BerriAI/litellm/pull/11558)
+    - Handle empty string for arguments in function calls - [PR](https://github.com/BerriAI/litellm/pull/11601)
+    - Add audio/ogg mime type support when inferring from file url’s - [PR](https://github.com/BerriAI/litellm/pull/11635)
+- **Custom LLM**
+    - Fix passing api_base, api_key, litellm_params_dict to custom_llm embedding methods - [PR](https://github.com/BerriAI/litellm/pull/11450) s/o [ElefHead](https://github.com/ElefHead)
+- **Huggingface**
+    - Add /chat/completions to endpoint url when missing - [PR](https://github.com/BerriAI/litellm/pull/11630)
+- **Deepgram**
+    - Support async httpx calls - [PR](https://github.com/BerriAI/litellm/pull/11641)
+- **Anthropic**
+    - Append prefix (if set) to assistant content start - [PR](https://github.com/BerriAI/litellm/pull/11719)
+
+#### Features
+- **VertexAI**
+    - Support vertex credentials set via env var on passthrough - [PR](https://github.com/BerriAI/litellm/pull/11527)
+    - Support for choosing ‘global’ region when model is only available there - [PR](https://github.com/BerriAI/litellm/pull/11566)
+    - Anthropic passthrough cost calculation + token tracking - [PR](https://github.com/BerriAI/litellm/pull/11611)
+    - Support ‘global’ vertex region on passthrough - [PR](https://github.com/BerriAI/litellm/pull/11661)
+- **Anthropic**
+    - ‘none’ tool choice param support - [PR](https://github.com/BerriAI/litellm/pull/11695)
+- **Perplexity**
+    - Add ‘reasoning_effort’ support - [PR](https://github.com/BerriAI/litellm/pull/11562)
+- **Mistral**
+    - Add mistral reasoning support - [PR](https://github.com/BerriAI/litellm/pull/11642)
+- **SGLang**
+    - Map context window exceeded error for proper handling - [PR](https://github.com/BerriAI/litellm/pull/11575/)
+- **Deepgram**
+    - Provider specific params support - [PR](https://github.com/BerriAI/litellm/pull/11638)
+- **Azure**
+    - Return content safety filter results - [PR](https://github.com/BerriAI/litellm/pull/11655)
+---
+
+## LLM API Endpoints
+
+#### Bugs
+- **Chat Completion**
+    - Streaming - Ensure consistent ‘created’ across chunks - [PR](https://github.com/BerriAI/litellm/pull/11528)
+#### Features
+- **MCP**
+    - Add controls for MCP Permission Management - [PR](https://github.com/BerriAI/litellm/pull/11598), 
+    - Add permission management for MCP List + Call Tool operations - [PR](https://github.com/BerriAI/litellm/pull/11682)
+    - Streamable HTTP server support - [PR](https://github.com/BerriAI/litellm/pull/11628), [PR](https://github.com/BerriAI/litellm/pull/11645)
+    - Use Experimental dedicated Rest endpoints for list, calling MCP tools - [PR](https://github.com/BerriAI/litellm/pull/11684)
+- **Responses API**
+    - NEW API Endpoint - List input items - [PR](https://github.com/BerriAI/litellm/pull/11602)
+    - Background mode for OpenAI + Azure OpenAI - [PR](https://github.com/BerriAI/litellm/pull/11640)
+    - Langfuse/other Logging support on responses api requests - [PR](https://github.com/BerriAI/litellm/pull/11685)
+- **Chat Completions**
+    - Bridge for Responses API - allows calling codex-mini via `/chat/completions` and `/v1/messages` - [PR](https://github.com/BerriAI/litellm/pull/11632), [PR](https://github.com/BerriAI/litellm/pull/11685)
+
+
+---
+
+## Spend Tracking
+
+#### Bugs
+- **End Users**
+    - Update enduser spend and budget reset date based on budget duration - [PR](https://github.com/BerriAI/litellm/pull/8460) (s/o [laurien16](https://github.com/laurien16))
+- **Custom Pricing**
+    - Convert scientific notation str to int - [PR](https://github.com/BerriAI/litellm/pull/11655)
+
+---
+
+## Management Endpoints / UI
+
+#### Bugs
+- **Users**
+    - `/user/info` - fix passing user with `+` in user id
+    - Add admin-initiated password reset flow - [PR](https://github.com/BerriAI/litellm/pull/11618)
+    - Fixes default user settings UI rendering error - [PR](https://github.com/BerriAI/litellm/pull/11674)
+- **Budgets**
+    - Correct success message when new user budget is created - [PR](https://github.com/BerriAI/litellm/pull/11608)
+
+#### Features
+- **Leftnav**
+    - Show remaining Enterprise users on UI
+- **MCP**
+    - New server add form - [PR](https://github.com/BerriAI/litellm/pull/11604)
+    - Allow editing mcp servers - [PR](https://github.com/BerriAI/litellm/pull/11693)
+- **Models**
+    - Add deepgram models on UI
+    - Model Access Group support on UI - [PR](https://github.com/BerriAI/litellm/pull/11719)
+- **Keys**
+    - Trim long user id’s - [PR](https://github.com/BerriAI/litellm/pull/11488)
+- **Logs**
+    - Add live tail feature to logs view, allows user to disable auto refresh in high traffic - [PR](https://github.com/BerriAI/litellm/pull/11712)
+    - Audit Logs - preview screenshot - [PR](https://github.com/BerriAI/litellm/pull/11715)
+
+---
+
+## Logging / Guardrails Integrations
+
+#### Bugs
+- **Arize**
+    - Change space_key header to space_id - [PR](https://github.com/BerriAI/litellm/pull/11595) (s/o [vanities](https://github.com/vanities))
+- **Prometheus**
+    - Fix total requests increment - [PR](https://github.com/BerriAI/litellm/pull/11718)
+
+#### Features
+- **Lasso Guardrails**
+    - [NEW] Lasso Guardrails support - [PR](https://github.com/BerriAI/litellm/pull/11565)
+- **Users**
+    - New `organizations` param on `/user/new` - allows adding users to orgs on creation - [PR](https://github.com/BerriAI/litellm/pull/11572/files)
+- **Prevent double logging when using bridge logic** - [PR](https://github.com/BerriAI/litellm/pull/11687)
+
+---
+
+## Performance / Reliability Improvements
+
+#### Bugs
+- **Tag based routing**
+    - Do not consider ‘default’ models when request specifies a tag - [PR](https://github.com/BerriAI/litellm/pull/11454) (s/o [thiagosalvatore](https://github.com/thiagosalvatore))
+
+#### Features
+- **Caching**
+    - New optional ‘litellm[caching]’ pip install for adding disk cache dependencies - [PR](https://github.com/BerriAI/litellm/pull/11600)
+
+---
+
+## General Proxy Improvements
+
+#### Bugs
+- **aiohttp**
+    - fixes for transfer encoding error on aiohttp transport - [PR](https://github.com/BerriAI/litellm/pull/11561)
+
+#### Features
+- **aiohttp**
+    - Enable System Proxy Support for aiohttp transport - [PR](https://github.com/BerriAI/litellm/pull/11616) (s/o [idootop](https://github.com/idootop))
+- **CLI**
+    - Make all commands show server URL - [PR](https://github.com/BerriAI/litellm/pull/10801)
+- **Unicorn**
+    - Allow setting keep alive timeout - [PR](https://github.com/BerriAI/litellm/pull/11594)
+- **Experimental Rate Limiting v2**
+    - Support specifying rate limit by output_tokens only - [PR](https://github.com/BerriAI/litellm/pull/11646)
+    - Decrement parallel requests on call failure - [PR](https://github.com/BerriAI/litellm/pull/11646)
+    - In-memory only rate limiting support - [PR](https://github.com/BerriAI/litellm/pull/11646)
+    - Return remaining rate limits by key/user/team - [PR](https://github.com/BerriAI/litellm/pull/11646)
+- **Helm**
+    - support extraContainers in migrations-job.yaml - [PR](https://github.com/BerriAI/litellm/pull/11649)
+
+
+
+
+---
+
+## New Contributors
+* @laurien16 made their first contribution in https://github.com/BerriAI/litellm/pull/8460
+* @fengbohello made their first contribution in https://github.com/BerriAI/litellm/pull/11547
+* @lapinek made their first contribution in https://github.com/BerriAI/litellm/pull/11570
+* @yanwork made their first contribution in https://github.com/BerriAI/litellm/pull/11586
+* @dhs-shine made their first contribution in https://github.com/BerriAI/litellm/pull/11575
+* @ElefHead made their first contribution in https://github.com/BerriAI/litellm/pull/11450
+* @idootop made their first contribution in https://github.com/BerriAI/litellm/pull/11616
+* @stevenaldinger made their first contribution in https://github.com/BerriAI/litellm/pull/11649
+* @thiagosalvatore made their first contribution in https://github.com/BerriAI/litellm/pull/11454
+* @vanities made their first contribution in https://github.com/BerriAI/litellm/pull/11595
+* @alvarosevilla95 made their first contribution in https://github.com/BerriAI/litellm/pull/11661
+
+---
+
+## Demo Instance
+
+Here's a Demo Instance to test changes:
+
+- Instance: https://demo.litellm.ai/
+- Login Credentials:
+    - Username: admin
+    - Password: sk-1234
+
+## [Git Diff](https://github.com/BerriAI/litellm/compare/v1.72.2-stable...1.72.6.rc)
diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json
@@ -4263,6 +4263,20 @@
         "supports_assistant_prefill": true,
         "supports_tool_choice": true
     },
+    "mistral/magistral-medium-latest": {
+        "max_tokens": 40000,
+        "max_input_tokens": 40000,
+        "max_output_tokens": 40000,
+        "input_cost_per_token": 2e-06,
+        "output_cost_per_token": 5e-06,
+        "litellm_provider": "mistral",
+        "mode": "chat",
+        "source": "https://mistral.ai/news/magistral",
+        "supports_function_calling": true,
+        "supports_assistant_prefill": true,
+        "supports_tool_choice": true,
+        "supports_reasoning": true
+    },
     "mistral/magistral-medium-2506": {
         "max_tokens": 40000,
         "max_input_tokens": 40000,
@@ -4277,15 +4291,29 @@
         "supports_tool_choice": true,
         "supports_reasoning": true
     },
+    "mistral/magistral-small-latest": {
+        "max_tokens": 40000,
+        "max_input_tokens": 40000,
+        "max_output_tokens": 40000,
+        "input_cost_per_token": 0.5e-6,
+        "output_cost_per_token": 1.5e-6,
+        "litellm_provider": "mistral",
+        "mode": "chat",
+        "source": "https://mistral.ai/pricing#api-pricing",
+        "supports_function_calling": true,
+        "supports_assistant_prefill": true,
+        "supports_tool_choice": true,
+        "supports_reasoning": true
+    },
     "mistral/magistral-small-2506": {
         "max_tokens": 40000,
         "max_input_tokens": 40000,
         "max_output_tokens": 40000,
-        "input_cost_per_token": 0.0,
-        "output_cost_per_token": 0.0,
+        "input_cost_per_token": 0.5e-06,
+        "output_cost_per_token": 1.5e-06,
         "litellm_provider": "mistral",
         "mode": "chat",
-        "source": "https://mistral.ai/news/magistral",
+        "source": "https://mistral.ai/pricing#api-pricing",
         "supports_function_calling": true,
         "supports_assistant_prefill": true,
         "supports_tool_choice": true,
diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json
@@ -4263,6 +4263,20 @@
         "supports_assistant_prefill": true,
         "supports_tool_choice": true
     },
+    "mistral/magistral-medium-latest": {
+        "max_tokens": 40000,
+        "max_input_tokens": 40000,
+        "max_output_tokens": 40000,
+        "input_cost_per_token": 2e-06,
+        "output_cost_per_token": 5e-06,
+        "litellm_provider": "mistral",
+        "mode": "chat",
+        "source": "https://mistral.ai/news/magistral",
+        "supports_function_calling": true,
+        "supports_assistant_prefill": true,
+        "supports_tool_choice": true,
+        "supports_reasoning": true
+    },
     "mistral/magistral-medium-2506": {
         "max_tokens": 40000,
         "max_input_tokens": 40000,
@@ -4277,15 +4291,29 @@
         "supports_tool_choice": true,
         "supports_reasoning": true
     },
+    "mistral/magistral-small-latest": {
+        "max_tokens": 40000,
+        "max_input_tokens": 40000,
+        "max_output_tokens": 40000,
+        "input_cost_per_token": 0.5e-6,
+        "output_cost_per_token": 1.5e-6,
+        "litellm_provider": "mistral",
+        "mode": "chat",
+        "source": "https://mistral.ai/pricing#api-pricing",
+        "supports_function_calling": true,
+        "supports_assistant_prefill": true,
+        "supports_tool_choice": true,
+        "supports_reasoning": true
+    },
     "mistral/magistral-small-2506": {
         "max_tokens": 40000,
         "max_input_tokens": 40000,
         "max_output_tokens": 40000,
-        "input_cost_per_token": 0.0,
-        "output_cost_per_token": 0.0,
+        "input_cost_per_token": 0.5e-06,
+        "output_cost_per_token": 1.5e-06,
         "litellm_provider": "mistral",
         "mode": "chat",
-        "source": "https://mistral.ai/news/magistral",
+        "source": "https://mistral.ai/pricing#api-pricing",
         "supports_function_calling": true,
         "supports_assistant_prefill": true,
         "supports_tool_choice": true,