From 1f46733f6e71d13f9a48f5e8ffcd7bc7a1360e5f Mon Sep 17 00:00:00 2001 From: Ali Waleed <134522290+alizenhom@users.noreply.github.com> Date: Mon, 5 Aug 2024 17:38:46 +0300 Subject: [PATCH 1/2] handle streaming generate edge case for ollama --- .../instrumentation/ollama/patch.py | 10 ++++++---- src/langtrace_python_sdk/version.py | 2 +- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/src/langtrace_python_sdk/instrumentation/ollama/patch.py b/src/langtrace_python_sdk/instrumentation/ollama/patch.py index 9c13073a..9add0f54 100644 --- a/src/langtrace_python_sdk/instrumentation/ollama/patch.py +++ b/src/langtrace_python_sdk/instrumentation/ollama/patch.py @@ -165,22 +165,24 @@ def _set_input_attributes(span, kwargs, attributes): def _handle_streaming_response(span, response, api): accumulated_tokens = None + print("APIIII", api) if api == "chat": accumulated_tokens = {"message": {"content": "", "role": ""}} - if api == "completion": + if api == "completion" or api == "generate": accumulated_tokens = {"response": ""} span.add_event(Event.STREAM_START.value) try: for chunk in response: + content = None if api == "chat": + content = chunk["message"]["content"] accumulated_tokens["message"]["content"] += chunk["message"]["content"] accumulated_tokens["message"]["role"] = chunk["message"]["role"] if api == "generate": + content = chunk["response"] accumulated_tokens["response"] += chunk["response"] - set_event_completion_chunk( - span, chunk.get("response") or chunk.get("message").get("content") - ) + set_event_completion_chunk(span, content) _set_response_attributes(span, chunk | accumulated_tokens) finally: diff --git a/src/langtrace_python_sdk/version.py b/src/langtrace_python_sdk/version.py index 71fa01ea..0212ffee 100644 --- a/src/langtrace_python_sdk/version.py +++ b/src/langtrace_python_sdk/version.py @@ -1 +1 @@ -__version__ = "2.2.16" +__version__ = "2.2.17" From 21d0487ffe3051b1c6e543f29df62d1f852dc46d Mon Sep 17 00:00:00 2001 From: Ali Waleed <134522290+alizenhom@users.noreply.github.com> Date: Mon, 5 Aug 2024 17:40:56 +0300 Subject: [PATCH 2/2] async generate --- src/langtrace_python_sdk/instrumentation/ollama/patch.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/langtrace_python_sdk/instrumentation/ollama/patch.py b/src/langtrace_python_sdk/instrumentation/ollama/patch.py index 9add0f54..fd2ce922 100644 --- a/src/langtrace_python_sdk/instrumentation/ollama/patch.py +++ b/src/langtrace_python_sdk/instrumentation/ollama/patch.py @@ -165,7 +165,6 @@ def _set_input_attributes(span, kwargs, attributes): def _handle_streaming_response(span, response, api): accumulated_tokens = None - print("APIIII", api) if api == "chat": accumulated_tokens = {"message": {"content": "", "role": ""}} if api == "completion" or api == "generate": @@ -198,19 +197,22 @@ async def _ahandle_streaming_response(span, response, api): accumulated_tokens = None if api == "chat": accumulated_tokens = {"message": {"content": "", "role": ""}} - if api == "completion": + if api == "completion" or api == "generate": accumulated_tokens = {"response": ""} span.add_event(Event.STREAM_START.value) try: async for chunk in response: + content = None if api == "chat": + content = chunk["message"]["content"] accumulated_tokens["message"]["content"] += chunk["message"]["content"] accumulated_tokens["message"]["role"] = chunk["message"]["role"] if api == "generate": + content = chunk["response"] accumulated_tokens["response"] += chunk["response"] - set_event_completion_chunk(span, chunk) + set_event_completion_chunk(span, content) _set_response_attributes(span, chunk | accumulated_tokens) finally: # Finalize span after processing all chunks