chore(llmobs): token metrics name changes (#9657)

Makes the following updates to metric key names submitted to LLM Observability for openai & bedrock integrations `prompt_tokens` -> `input_tokens` `completion_tokens` -> `output_tokens` The backend already has the changes in place to accept these updated key names so a hard cutover is OK. A release note is not needed since metric key names used by our integrations (openai, langchain, bedrock) when submitting data to LLM Obs backend is an internal contract between the integration and backend. When users set metric key names for manually created spans, our documentation already instructs them to use input/output terminology - [x] Change(s) are motivated and described in the PR description - [x] Testing strategy is described if automated tests are not included in the PR - [x] Risks are described (performance impact, potential for breakage, maintainability) - [x] Change is maintainable (easy to change, telemetry, documentation) - [x] [Library release note guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html) are followed or label `changelog/no-changelog` is set - [x] Documentation is included (in-code, generated user docs, [public corp docs](https://github.com/DataDog/documentation/)) - [x] Backport labels are set (if [applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)) - [x] If this PR changes the public interface, I've notified `@DataDog/apm-tees`. - [x] Title is accurate - [x] All changes are related to the pull request's stated goal - [x] Description motivates each change - [x] Avoids breaking [API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces) changes - [x] Testing strategy adequately addresses listed risks - [x] Change is maintainable (easy to change, telemetry, documentation) - [x] Release note makes sense to a user of the library - [x] Author has acknowledged and discussed the performance implications of this PR as reported in the benchmarks PR comment - [x] Backport labels are set in a manner that is consistent with the [release branch maintenance policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting) --------- Co-authored-by: lievan <evan.li@datadoqhq.com> Co-authored-by: kyle <kyle@verhoog.ca>
DataDog · Jul 12, 2024 · 07d2c16 · 07d2c16
1 parent c14159e
commit 07d2c16
Show file tree

Hide file tree

Showing 11 changed files with 67 additions and 53 deletions.
diff --git a/ddtrace/llmobs/_constants.py b/ddtrace/llmobs/_constants.py
@@ -24,3 +24,7 @@
 )
 
 LANGCHAIN_APM_SPAN_NAME = "langchain.request"
+
+INPUT_TOKENS_METRIC_KEY = "input_tokens"
+OUTPUT_TOKENS_METRIC_KEY = "output_tokens"
+TOTAL_TOKENS_METRIC_KEY = "total_tokens"
diff --git a/ddtrace/llmobs/_integrations/bedrock.py b/ddtrace/llmobs/_integrations/bedrock.py
@@ -6,14 +6,17 @@
 from ddtrace._trace.span import Span
 from ddtrace.internal.logger import get_logger
 from ddtrace.llmobs._constants import INPUT_MESSAGES
+from ddtrace.llmobs._constants import INPUT_TOKENS_METRIC_KEY
 from ddtrace.llmobs._constants import METADATA
 from ddtrace.llmobs._constants import METRICS
 from ddtrace.llmobs._constants import MODEL_NAME
 from ddtrace.llmobs._constants import MODEL_PROVIDER
 from ddtrace.llmobs._constants import OUTPUT_MESSAGES
+from ddtrace.llmobs._constants import OUTPUT_TOKENS_METRIC_KEY
 from ddtrace.llmobs._constants import PARENT_ID_KEY
 from ddtrace.llmobs._constants import PROPAGATED_PARENT_ID_KEY
 from ddtrace.llmobs._constants import SPAN_KIND
+from ddtrace.llmobs._constants import TOTAL_TOKENS_METRIC_KEY
 from ddtrace.llmobs._integrations import BaseLLMIntegration
 from ddtrace.llmobs._utils import _get_llmobs_parent_id
 
@@ -61,9 +64,9 @@ def _llmobs_metrics(span: Span, formatted_response: Optional[Dict[str, Any]]) ->
         if formatted_response and formatted_response.get("text"):
             prompt_tokens = int(span.get_tag("bedrock.usage.prompt_tokens") or 0)
             completion_tokens = int(span.get_tag("bedrock.usage.completion_tokens") or 0)
-            metrics["prompt_tokens"] = prompt_tokens
-            metrics["completion_tokens"] = completion_tokens
-            metrics["total_tokens"] = prompt_tokens + completion_tokens
+            metrics[INPUT_TOKENS_METRIC_KEY] = prompt_tokens
+            metrics[OUTPUT_TOKENS_METRIC_KEY] = completion_tokens
+            metrics[TOTAL_TOKENS_METRIC_KEY] = prompt_tokens + completion_tokens
         return metrics
 
     @staticmethod

diff --git a/ddtrace/llmobs/_integrations/openai.py b/ddtrace/llmobs/_integrations/openai.py
@@ -10,12 +10,15 @@
 from ddtrace.internal.constants import COMPONENT
 from ddtrace.internal.utils.version import parse_version
 from ddtrace.llmobs._constants import INPUT_MESSAGES
+from ddtrace.llmobs._constants import INPUT_TOKENS_METRIC_KEY
 from ddtrace.llmobs._constants import METADATA
 from ddtrace.llmobs._constants import METRICS
 from ddtrace.llmobs._constants import MODEL_NAME
 from ddtrace.llmobs._constants import MODEL_PROVIDER
 from ddtrace.llmobs._constants import OUTPUT_MESSAGES
+from ddtrace.llmobs._constants import OUTPUT_TOKENS_METRIC_KEY
 from ddtrace.llmobs._constants import SPAN_KIND
+from ddtrace.llmobs._constants import TOTAL_TOKENS_METRIC_KEY
 from ddtrace.llmobs._integrations.base import BaseLLMIntegration
 from ddtrace.pin import Pin
 
@@ -221,17 +224,17 @@ def _set_llmobs_metrics_tags(span: Span, resp: Any, streamed: bool = False) -> D
             completion_tokens = span.get_metric("openai.response.usage.completion_tokens") or 0
             metrics.update(
                 {
-                    "prompt_tokens": prompt_tokens,
-                    "completion_tokens": completion_tokens,
-                    "total_tokens": prompt_tokens + completion_tokens,
+                    INPUT_TOKENS_METRIC_KEY: prompt_tokens,
+                    OUTPUT_TOKENS_METRIC_KEY: completion_tokens,
+                    TOTAL_TOKENS_METRIC_KEY: prompt_tokens + completion_tokens,
                 }
             )
         elif resp:
             metrics.update(
                 {
-                    "prompt_tokens": resp.usage.prompt_tokens,
-                    "completion_tokens": resp.usage.completion_tokens,
-                    "total_tokens": resp.usage.prompt_tokens + resp.usage.completion_tokens,
+                    INPUT_TOKENS_METRIC_KEY: resp.usage.prompt_tokens,
+                    OUTPUT_TOKENS_METRIC_KEY: resp.usage.completion_tokens,
+                    TOTAL_TOKENS_METRIC_KEY: resp.usage.prompt_tokens + resp.usage.completion_tokens,
                 }
             )
         return metrics
diff --git a/...lmobs_cassettes/tests.llmobs.test_llmobs_span_writer.test_send_chat_completion_event.yaml b/...lmobs_cassettes/tests.llmobs.test_llmobs_span_writer.test_send_chat_completion_event.yaml
@@ -11,8 +11,8 @@ interactions:
       256}}, "output": {"messages": [{"content": "Ah, a bold and foolish hobbit seeking
       to challenge my dominion in Mordor. Very well, little creature, I shall play
       along. But know that I am always watching, and your quest will not go unnoticed",
-      "role": "assistant"}]}}, "metrics": {"prompt_tokens": 64, "completion_tokens":
-      128, "total_tokens": 192}}]}}'
+      "role": "assistant"}]}}, "metrics": {"input_tokens": 64, "output_tokens":
+      128, "total_tokens": 192}}]}'
     headers:
       Content-Type:
       - application/json

diff --git a/...mobs_cassettes/tests.llmobs.test_llmobs_span_writer.test_send_completion_bad_api_key.yaml b/...mobs_cassettes/tests.llmobs.test_llmobs_span_writer.test_send_completion_bad_api_key.yaml
@@ -4,11 +4,12 @@ interactions:
       "12345678901", "trace_id": "98765432101", "parent_id": "", "session_id": "98765432101",
       "name": "completion_span", "tags": ["version:", "env:", "service:", "source:integration"],
       "start_ns": 1707763310981223236, "duration": 12345678900, "error": 0, "meta":
-      {"span.kind": "llm", "model_name": "ada", "model_provider": "openai", "input": {"messages":
-      [{"content": "who broke enigma?"}], "parameters": {"temperature": 0, "max_tokens":
-      256}}, "output": {"messages": [{"content": "\n\nThe Enigma code was broken by
-      a team of codebreakers at Bletchley Park, led by mathematician Alan Turing."}]}},
-      "metrics": {"prompt_tokens": 64, "completion_tokens": 128, "total_tokens": 192}}]}}'
+      {"span.kind": "llm", "model_name": "ada", "model_provider": "openai", "input":
+      {"messages": [{"content": "who broke enigma?"}], "parameters": {"temperature":
+      0, "max_tokens": 256}}, "output": {"messages": [{"content": "\n\nThe Enigma
+      code was broken by a team of codebreakers at Bletchley Park, led by mathematician
+      Alan Turing."}]}}, "metrics": {"input_tokens": 64, "output_tokens": 128,
+      "total_tokens": 192}}]}'
     headers:
       Content-Type:
       - application/json

diff --git a/...obs/llmobs_cassettes/tests.llmobs.test_llmobs_span_writer.test_send_completion_event.yaml b/...obs/llmobs_cassettes/tests.llmobs.test_llmobs_span_writer.test_send_completion_event.yaml
@@ -4,11 +4,12 @@ interactions:
       "12345678901", "trace_id": "98765432101", "parent_id": "", "session_id": "98765432101",
       "name": "completion_span", "tags": ["version:", "env:", "service:", "source:integration"],
       "start_ns": 1707763310981223236, "duration": 12345678900, "error": 0, "meta":
-      {"span.kind": "llm", "model_name": "ada", "model_provider": "openai", "input": {"messages":
-      [{"content": "who broke enigma?"}], "parameters": {"temperature": 0, "max_tokens":
-      256}}, "output": {"messages": [{"content": "\n\nThe Enigma code was broken by
-      a team of codebreakers at Bletchley Park, led by mathematician Alan Turing."}]}},
-      "metrics": {"prompt_tokens": 64, "completion_tokens": 128, "total_tokens": 192}}]}}'
+      {"span.kind": "llm", "model_name": "ada", "model_provider": "openai", "input":
+      {"messages": [{"content": "who broke enigma?"}], "parameters": {"temperature":
+      0, "max_tokens": 256}}, "output": {"messages": [{"content": "\n\nThe Enigma
+      code was broken by a team of codebreakers at Bletchley Park, led by mathematician
+      Alan Turing."}]}}, "metrics": {"input_tokens": 64, "output_tokens": 128,
+      "total_tokens": 192}}]}'
     headers:
       Content-Type:
       - application/json

diff --git a/...mobs/llmobs_cassettes/tests.llmobs.test_llmobs_span_writer.test_send_multiple_events.yaml b/...mobs/llmobs_cassettes/tests.llmobs.test_llmobs_span_writer.test_send_multiple_events.yaml
@@ -4,23 +4,24 @@ interactions:
       "12345678901", "trace_id": "98765432101", "parent_id": "", "session_id": "98765432101",
       "name": "completion_span", "tags": ["version:", "env:", "service:", "source:integration"],
       "start_ns": 1707763310981223236, "duration": 12345678900, "error": 0, "meta":
-      {"span.kind": "llm", "model_name": "ada", "model_provider": "openai", "input": {"messages":
-      [{"content": "who broke enigma?"}], "parameters": {"temperature": 0, "max_tokens":
-      256}}, "output": {"messages": [{"content": "\n\nThe Enigma code was broken by
-      a team of codebreakers at Bletchley Park, led by mathematician Alan Turing."}]}},
-      "metrics": {"prompt_tokens": 64, "completion_tokens": 128, "total_tokens": 192}},
-      {"span_id": "12345678902", "trace_id": "98765432102", "parent_id": "",
-      "session_id": "98765432102", "name": "chat_completion_span", "tags": ["version:", "env:",
-      "service:", "source:integration"], "start_ns": 1707763310981223936, "duration":
-      12345678900, "error": 0, "meta": {"span.kind": "llm", "model_name": "gpt-3.5-turbo",
-      "model_provider": "openai", "input": {"messages": [{"role": "system", "content":
-      "You are an evil dark lord looking for his one ring to rule them all"}, {"role":
-      "user", "content": "I am a hobbit looking to go to Mordor"}], "parameters":
-      {"temperature": 0.9, "max_tokens": 256}}, "output": {"messages": [{"content":
-      "Ah, a bold and foolish hobbit seeking to challenge my dominion in Mordor. Very
-      well, little creature, I shall play along. But know that I am always watching,
-      and your quest will not go unnoticed", "role": "assistant"}]}}, "metrics": {"prompt_tokens":
-      64, "completion_tokens": 128, "total_tokens": 192}}]}}'
+      {"span.kind": "llm", "model_name": "ada", "model_provider": "openai", "input":
+      {"messages": [{"content": "who broke enigma?"}], "parameters": {"temperature":
+      0, "max_tokens": 256}}, "output": {"messages": [{"content": "\n\nThe Enigma
+      code was broken by a team of codebreakers at Bletchley Park, led by mathematician
+      Alan Turing."}]}}, "metrics": {"input_tokens": 64, "output_tokens": 128,
+      "total_tokens": 192}}, {"span_id": "12345678902", "trace_id": "98765432102",
+      "parent_id": "", "session_id": "98765432102", "name": "chat_completion_span",
+      "tags": ["version:", "env:", "service:", "source:integration"], "start_ns":
+      1707763310981223936, "duration": 12345678900, "error": 0, "meta": {"span.kind":
+      "llm", "model_name": "gpt-3.5-turbo", "model_provider": "openai", "input": {"messages":
+      [{"role": "system", "content": "You are an evil dark lord looking for his one
+      ring to rule them all"}, {"role": "user", "content": "I am a hobbit looking
+      to go to Mordor"}], "parameters": {"temperature": 0.9, "max_tokens": 256}},
+      "output": {"messages": [{"content": "Ah, a bold and foolish hobbit seeking to
+      challenge my dominion in Mordor. Very well, little creature, I shall play along.
+      But know that I am always watching, and your quest will not go unnoticed", "role":
+      "assistant"}]}}, "metrics": {"input_tokens": 64, "output_tokens": 128,
+      "total_tokens": 192}}]}'
     headers:
       Content-Type:
       - application/json

diff --git a/.../llmobs/llmobs_cassettes/tests.llmobs.test_llmobs_span_writer.test_send_timed_events.yaml b/.../llmobs/llmobs_cassettes/tests.llmobs.test_llmobs_span_writer.test_send_timed_events.yaml
@@ -4,11 +4,12 @@ interactions:
       "12345678901", "trace_id": "98765432101", "parent_id": "", "session_id": "98765432101",
       "name": "completion_span", "tags": ["version:", "env:", "service:", "source:integration"],
       "start_ns": 1707763310981223236, "duration": 12345678900, "error": 0, "meta":
-      {"span.kind": "llm", "model_name": "ada", "model_provider": "openai", "input": {"messages":
-      [{"content": "who broke enigma?"}], "parameters": {"temperature": 0, "max_tokens":
-      256}}, "output": {"messages": [{"content": "\n\nThe Enigma code was broken by
-      a team of codebreakers at Bletchley Park, led by mathematician Alan Turing."}]}},
-      "metrics": {"prompt_tokens": 64, "completion_tokens": 128, "total_tokens": 192}}]}}'
+      {"span.kind": "llm", "model_name": "ada", "model_provider": "openai", "input":
+      {"messages": [{"content": "who broke enigma?"}], "parameters": {"temperature":
+      0, "max_tokens": 256}}, "output": {"messages": [{"content": "\n\nThe Enigma
+      code was broken by a team of codebreakers at Bletchley Park, led by mathematician
+      Alan Turing."}]}}, "metrics": {"input_tokens": 64, "output_tokens": 128,
+      "total_tokens": 192}}]}'
     headers:
       Content-Type:
       - application/json
@@ -51,8 +52,8 @@ interactions:
       256}}, "output": {"messages": [{"content": "Ah, a bold and foolish hobbit seeking
       to challenge my dominion in Mordor. Very well, little creature, I shall play
       along. But know that I am always watching, and your quest will not go unnoticed",
-      "role": "assistant"}]}}, "metrics": {"prompt_tokens": 64, "completion_tokens":
-      128, "total_tokens": 192}}]}}'
+      "role": "assistant"}]}}, "metrics": {"input_tokens": 64, "output_tokens":
+      128, "total_tokens": 192}}]}'
     headers:
       Content-Type:
       - application/json

diff --git a/tests/llmobs/test_llmobs_decorators.py b/tests/llmobs/test_llmobs_decorators.py
@@ -285,7 +285,7 @@ def f():
             input_data=[{"content": "test_prompt"}],
             output_data=[{"content": "test_response"}],
             tags={"custom_tag": "tag_value"},
-            metrics={"prompt_tokens": 10, "completion_tokens": 20, "total_tokens": 30},
+            metrics={"input_tokens": 10, "output_tokens": 20, "total_tokens": 30},
         )
 
     f()
@@ -299,7 +299,7 @@ def f():
             input_messages=[{"content": "test_prompt"}],
             output_messages=[{"content": "test_response"}],
             parameters={"temperature": 0.9, "max_tokens": 50},
-            token_metrics={"prompt_tokens": 10, "completion_tokens": 20, "total_tokens": 30},
+            token_metrics={"input_tokens": 10, "output_tokens": 20, "total_tokens": 30},
             tags={"custom_tag": "tag_value"},
             session_id="test_session_id",
         )
@@ -314,7 +314,7 @@ def f():
             input_data="test_prompt",
             output_data="test_response",
             tags={"custom_tag": "tag_value"},
-            metrics={"prompt_tokens": 10, "completion_tokens": 20, "total_tokens": 30},
+            metrics={"input_tokens": 10, "output_tokens": 20, "total_tokens": 30},
         )
 
     f()
@@ -328,7 +328,7 @@ def f():
             input_messages=[{"content": "test_prompt"}],
             output_messages=[{"content": "test_response"}],
             parameters={"temperature": 0.9, "max_tokens": 50},
-            token_metrics={"prompt_tokens": 10, "completion_tokens": 20, "total_tokens": 30},
+            token_metrics={"input_tokens": 10, "output_tokens": 20, "total_tokens": 30},
             tags={"custom_tag": "tag_value"},
             session_id="test_session_id",
         )

diff --git a/tests/llmobs/test_llmobs_service.py b/tests/llmobs/test_llmobs_service.py
@@ -615,8 +615,8 @@ def test_annotate_output_llm_message_wrong_type(LLMObs, mock_logs):
 
 def test_annotate_metrics(LLMObs):
     with LLMObs.llm(model_name="test_model") as span:
-        LLMObs.annotate(span=span, metrics={"prompt_tokens": 10, "completion_tokens": 20, "total_tokens": 30})
-        assert json.loads(span.get_tag(METRICS)) == {"prompt_tokens": 10, "completion_tokens": 20, "total_tokens": 30}
+        LLMObs.annotate(span=span, metrics={"input_tokens": 10, "output_tokens": 20, "total_tokens": 30})
+        assert json.loads(span.get_tag(METRICS)) == {"input_tokens": 10, "output_tokens": 20, "total_tokens": 30}
 
 
 def test_annotate_metrics_wrong_type(LLMObs, mock_logs):

diff --git a/tests/llmobs/test_llmobs_span_writer.py b/tests/llmobs/test_llmobs_span_writer.py
@@ -40,7 +40,7 @@ def _completion_event():
                 ]
             },
         },
-        "metrics": {"prompt_tokens": 64, "completion_tokens": 128, "total_tokens": 192},
+        "metrics": {"input_tokens": 64, "output_tokens": 128, "total_tokens": 192},
     }
 
 
@@ -78,7 +78,7 @@ def _chat_completion_event():
                 ]
             },
         },
-        "metrics": {"prompt_tokens": 64, "completion_tokens": 128, "total_tokens": 192},
+        "metrics": {"input_tokens": 64, "output_tokens": 128, "total_tokens": 192},
     }