diff --git a/litellm/llms/bedrock.py b/litellm/llms/bedrock.py index 89d1bf16f494..4aa27b3c9d88 100644 --- a/litellm/llms/bedrock.py +++ b/litellm/llms/bedrock.py @@ -126,6 +126,8 @@ def map_openai_params(self, non_default_params: dict, optional_params: dict): optional_params["max_tokens"] = value if param == "tools": optional_params["tools"] = value + if param == "stream": + optional_params["stream"] = value return optional_params diff --git a/litellm/tests/test_streaming.py b/litellm/tests/test_streaming.py index 2896b4a7118d..6f8d73c31789 100644 --- a/litellm/tests/test_streaming.py +++ b/litellm/tests/test_streaming.py @@ -727,6 +727,31 @@ def test_completion_claude_stream_bad_key(): # pytest.fail(f"Error occurred: {e}") +def test_bedrock_claude_3_streaming(): + try: + litellm.set_verbose = True + response: ModelResponse = completion( + model="bedrock/anthropic.claude-3-sonnet-20240229-v1:0", + messages=messages, + max_tokens=10, + stream=True, + ) + complete_response = "" + # Add any assertions here to check the response + for idx, chunk in enumerate(response): + chunk, finished = streaming_format_tests(idx, chunk) + if finished: + break + complete_response += chunk + if complete_response.strip() == "": + raise Exception("Empty response received") + print(f"completion_response: {complete_response}") + except RateLimitError: + pass + except Exception as e: + pytest.fail(f"Error occurred: {e}") + + @pytest.mark.skip(reason="Replicate changed exceptions") def test_completion_replicate_stream_bad_key(): try: diff --git a/litellm/utils.py b/litellm/utils.py index 7466bd5c6947..6d42ec2d3597 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -8778,13 +8778,20 @@ def handle_bedrock_stream(self, chunk): text = chunk_data.get("completions")[0].get("data").get("text") is_finished = True finish_reason = "stop" - # anthropic mapping - elif "completion" in chunk_data: + ######## bedrock.anthropic mappings ############### + elif "completion" in chunk_data: # not claude-3 text = chunk_data["completion"] # bedrock.anthropic stop_reason = chunk_data.get("stop_reason", None) if stop_reason != None: is_finished = True finish_reason = stop_reason + elif "delta" in chunk_data: + if chunk_data["delta"].get("text", None) is not None: + text = chunk_data["delta"]["text"] + stop_reason = chunk_data["delta"].get("stop_reason", None) + if stop_reason != None: + is_finished = True + finish_reason = stop_reason ######## bedrock.cohere mappings ############### # meta mapping elif "generation" in chunk_data: