1
+ import json
1
2
from typing import TYPE_CHECKING , Any , AsyncIterator , Dict , List , Optional , Tuple , Union
2
3
3
4
import httpx
13
14
AmazonInvokeConfig ,
14
15
)
15
16
from litellm .types .router import GenericLiteLLMParams
17
+ from litellm .types .utils import GenericStreamingChunk
16
18
from litellm .types .utils import GenericStreamingChunk as GChunk
17
19
from litellm .types .utils import ModelResponseStream
18
20
@@ -113,9 +115,9 @@ def transform_anthropic_messages_request(
113
115
114
116
# 1. anthropic_version is required for all claude models
115
117
if "anthropic_version" not in anthropic_messages_request :
116
- anthropic_messages_request [
117
- "anthropic_version"
118
- ] = self . DEFAULT_BEDROCK_ANTHROPIC_API_VERSION
118
+ anthropic_messages_request ["anthropic_version" ] = (
119
+ self . DEFAULT_BEDROCK_ANTHROPIC_API_VERSION
120
+ )
119
121
120
122
# 2. `stream` is not allowed in request body for bedrock invoke
121
123
if "stream" in anthropic_messages_request :
@@ -139,7 +141,26 @@ def get_async_streaming_response_iterator(
139
141
completion_stream = aws_decoder .aiter_bytes (
140
142
httpx_response .aiter_bytes (chunk_size = aws_decoder .DEFAULT_CHUNK_SIZE )
141
143
)
142
- return completion_stream
144
+ # Convert decoded Bedrock events to Server-Sent Events expected by Anthropic clients.
145
+ return self .bedrock_sse_wrapper (completion_stream )
146
+
147
+ async def bedrock_sse_wrapper (
148
+ self ,
149
+ completion_stream : AsyncIterator [
150
+ Union [bytes , GenericStreamingChunk , ModelResponseStream , dict ]
151
+ ],
152
+ ):
153
+ """
154
+ Bedrock invoke does not return SSE formatted data. This function is a wrapper to ensure litellm chunks are SSE formatted.
155
+ """
156
+ async for chunk in completion_stream :
157
+ if isinstance (chunk , dict ):
158
+ event_type : str = str (chunk .get ("type" , "message" ))
159
+ payload = f"event: { event_type } \n " f"data: { json .dumps (chunk )} \n \n "
160
+ yield payload .encode ()
161
+ else :
162
+ # For non-dict chunks, forward the original value unchanged so callers can leverage the richer Python objects if they wish.
163
+ yield chunk
143
164
144
165
145
166
class AmazonAnthropicClaudeMessagesStreamDecoder (AWSEventStreamDecoder ):
@@ -159,8 +180,22 @@ def _chunk_parser(
159
180
"""
160
181
Parse the chunk data into anthropic /messages format
161
182
162
- No transformation is needed for anthropic /messages format
163
-
164
- since bedrock invoke returns the response in the correct format
183
+ Bedrock returns usage metrics using camelCase keys. Convert these to
184
+ the Anthropic `/v1/messages` specification so callers receive a
185
+ consistent response shape when streaming.
165
186
"""
187
+ amazon_bedrock_invocation_metrics = chunk_data .pop (
188
+ "amazon-bedrock-invocationMetrics" , {}
189
+ )
190
+ if amazon_bedrock_invocation_metrics :
191
+ anthropic_usage = {}
192
+ if "inputTokenCount" in amazon_bedrock_invocation_metrics :
193
+ anthropic_usage ["input_tokens" ] = amazon_bedrock_invocation_metrics [
194
+ "inputTokenCount"
195
+ ]
196
+ if "outputTokenCount" in amazon_bedrock_invocation_metrics :
197
+ anthropic_usage ["output_tokens" ] = amazon_bedrock_invocation_metrics [
198
+ "outputTokenCount"
199
+ ]
200
+ chunk_data ["usage" ] = anthropic_usage
166
201
return chunk_data
0 commit comments