Skip to content

Commit

Permalink
chore(llmobs): token metrics name changes (#9657)
Browse files Browse the repository at this point in the history
Makes the following updates to metric key names submitted to LLM
Observability for openai & bedrock integrations

`prompt_tokens` -> `input_tokens`
`completion_tokens` -> `output_tokens`

The backend already has the changes in place to accept these updated key
names so a hard cutover is OK.

A release note is not needed since metric key names used by our
integrations (openai, langchain, bedrock) when submitting data to LLM
Obs backend is an internal contract between the integration and backend.

When users set metric key names for manually created spans, our
documentation already instructs them to use input/output terminology

- [x] Change(s) are motivated and described in the PR description
- [x] Testing strategy is described if automated tests are not included
in the PR
- [x] Risks are described (performance impact, potential for breakage,
maintainability)
- [x] Change is maintainable (easy to change, telemetry, documentation)
- [x] [Library release note
guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html)
are followed or label `changelog/no-changelog` is set
- [x] Documentation is included (in-code, generated user docs, [public
corp docs](https://github.com/DataDog/documentation/))
- [x] Backport labels are set (if
[applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting))
- [x] If this PR changes the public interface, I've notified
`@DataDog/apm-tees`.

- [x] Title is accurate
- [x] All changes are related to the pull request's stated goal
- [x] Description motivates each change
- [x] Avoids breaking
[API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces)
changes
- [x] Testing strategy adequately addresses listed risks
- [x] Change is maintainable (easy to change, telemetry, documentation)
- [x] Release note makes sense to a user of the library
- [x] Author has acknowledged and discussed the performance implications
of this PR as reported in the benchmarks PR comment
- [x] Backport labels are set in a manner that is consistent with the
[release branch maintenance
policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)

---------

Co-authored-by: lievan <evan.li@datadoqhq.com>
Co-authored-by: kyle <kyle@verhoog.ca>
  • Loading branch information
3 people committed Jul 12, 2024
1 parent c14159e commit 07d2c16
Show file tree
Hide file tree
Showing 11 changed files with 67 additions and 53 deletions.
4 changes: 4 additions & 0 deletions ddtrace/llmobs/_constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,3 +24,7 @@
)

LANGCHAIN_APM_SPAN_NAME = "langchain.request"

INPUT_TOKENS_METRIC_KEY = "input_tokens"
OUTPUT_TOKENS_METRIC_KEY = "output_tokens"
TOTAL_TOKENS_METRIC_KEY = "total_tokens"
9 changes: 6 additions & 3 deletions ddtrace/llmobs/_integrations/bedrock.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,17 @@
from ddtrace._trace.span import Span
from ddtrace.internal.logger import get_logger
from ddtrace.llmobs._constants import INPUT_MESSAGES
from ddtrace.llmobs._constants import INPUT_TOKENS_METRIC_KEY
from ddtrace.llmobs._constants import METADATA
from ddtrace.llmobs._constants import METRICS
from ddtrace.llmobs._constants import MODEL_NAME
from ddtrace.llmobs._constants import MODEL_PROVIDER
from ddtrace.llmobs._constants import OUTPUT_MESSAGES
from ddtrace.llmobs._constants import OUTPUT_TOKENS_METRIC_KEY
from ddtrace.llmobs._constants import PARENT_ID_KEY
from ddtrace.llmobs._constants import PROPAGATED_PARENT_ID_KEY
from ddtrace.llmobs._constants import SPAN_KIND
from ddtrace.llmobs._constants import TOTAL_TOKENS_METRIC_KEY
from ddtrace.llmobs._integrations import BaseLLMIntegration
from ddtrace.llmobs._utils import _get_llmobs_parent_id

Expand Down Expand Up @@ -61,9 +64,9 @@ def _llmobs_metrics(span: Span, formatted_response: Optional[Dict[str, Any]]) ->
if formatted_response and formatted_response.get("text"):
prompt_tokens = int(span.get_tag("bedrock.usage.prompt_tokens") or 0)
completion_tokens = int(span.get_tag("bedrock.usage.completion_tokens") or 0)
metrics["prompt_tokens"] = prompt_tokens
metrics["completion_tokens"] = completion_tokens
metrics["total_tokens"] = prompt_tokens + completion_tokens
metrics[INPUT_TOKENS_METRIC_KEY] = prompt_tokens
metrics[OUTPUT_TOKENS_METRIC_KEY] = completion_tokens
metrics[TOTAL_TOKENS_METRIC_KEY] = prompt_tokens + completion_tokens
return metrics

@staticmethod
Expand Down
15 changes: 9 additions & 6 deletions ddtrace/llmobs/_integrations/openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,15 @@
from ddtrace.internal.constants import COMPONENT
from ddtrace.internal.utils.version import parse_version
from ddtrace.llmobs._constants import INPUT_MESSAGES
from ddtrace.llmobs._constants import INPUT_TOKENS_METRIC_KEY
from ddtrace.llmobs._constants import METADATA
from ddtrace.llmobs._constants import METRICS
from ddtrace.llmobs._constants import MODEL_NAME
from ddtrace.llmobs._constants import MODEL_PROVIDER
from ddtrace.llmobs._constants import OUTPUT_MESSAGES
from ddtrace.llmobs._constants import OUTPUT_TOKENS_METRIC_KEY
from ddtrace.llmobs._constants import SPAN_KIND
from ddtrace.llmobs._constants import TOTAL_TOKENS_METRIC_KEY
from ddtrace.llmobs._integrations.base import BaseLLMIntegration
from ddtrace.pin import Pin

Expand Down Expand Up @@ -221,17 +224,17 @@ def _set_llmobs_metrics_tags(span: Span, resp: Any, streamed: bool = False) -> D
completion_tokens = span.get_metric("openai.response.usage.completion_tokens") or 0
metrics.update(
{
"prompt_tokens": prompt_tokens,
"completion_tokens": completion_tokens,
"total_tokens": prompt_tokens + completion_tokens,
INPUT_TOKENS_METRIC_KEY: prompt_tokens,
OUTPUT_TOKENS_METRIC_KEY: completion_tokens,
TOTAL_TOKENS_METRIC_KEY: prompt_tokens + completion_tokens,
}
)
elif resp:
metrics.update(
{
"prompt_tokens": resp.usage.prompt_tokens,
"completion_tokens": resp.usage.completion_tokens,
"total_tokens": resp.usage.prompt_tokens + resp.usage.completion_tokens,
INPUT_TOKENS_METRIC_KEY: resp.usage.prompt_tokens,
OUTPUT_TOKENS_METRIC_KEY: resp.usage.completion_tokens,
TOTAL_TOKENS_METRIC_KEY: resp.usage.prompt_tokens + resp.usage.completion_tokens,
}
)
return metrics
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@ interactions:
256}}, "output": {"messages": [{"content": "Ah, a bold and foolish hobbit seeking
to challenge my dominion in Mordor. Very well, little creature, I shall play
along. But know that I am always watching, and your quest will not go unnoticed",
"role": "assistant"}]}}, "metrics": {"prompt_tokens": 64, "completion_tokens":
128, "total_tokens": 192}}]}}'
"role": "assistant"}]}}, "metrics": {"input_tokens": 64, "output_tokens":
128, "total_tokens": 192}}]}'
headers:
Content-Type:
- application/json
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,12 @@ interactions:
"12345678901", "trace_id": "98765432101", "parent_id": "", "session_id": "98765432101",
"name": "completion_span", "tags": ["version:", "env:", "service:", "source:integration"],
"start_ns": 1707763310981223236, "duration": 12345678900, "error": 0, "meta":
{"span.kind": "llm", "model_name": "ada", "model_provider": "openai", "input": {"messages":
[{"content": "who broke enigma?"}], "parameters": {"temperature": 0, "max_tokens":
256}}, "output": {"messages": [{"content": "\n\nThe Enigma code was broken by
a team of codebreakers at Bletchley Park, led by mathematician Alan Turing."}]}},
"metrics": {"prompt_tokens": 64, "completion_tokens": 128, "total_tokens": 192}}]}}'
{"span.kind": "llm", "model_name": "ada", "model_provider": "openai", "input":
{"messages": [{"content": "who broke enigma?"}], "parameters": {"temperature":
0, "max_tokens": 256}}, "output": {"messages": [{"content": "\n\nThe Enigma
code was broken by a team of codebreakers at Bletchley Park, led by mathematician
Alan Turing."}]}}, "metrics": {"input_tokens": 64, "output_tokens": 128,
"total_tokens": 192}}]}'
headers:
Content-Type:
- application/json
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,12 @@ interactions:
"12345678901", "trace_id": "98765432101", "parent_id": "", "session_id": "98765432101",
"name": "completion_span", "tags": ["version:", "env:", "service:", "source:integration"],
"start_ns": 1707763310981223236, "duration": 12345678900, "error": 0, "meta":
{"span.kind": "llm", "model_name": "ada", "model_provider": "openai", "input": {"messages":
[{"content": "who broke enigma?"}], "parameters": {"temperature": 0, "max_tokens":
256}}, "output": {"messages": [{"content": "\n\nThe Enigma code was broken by
a team of codebreakers at Bletchley Park, led by mathematician Alan Turing."}]}},
"metrics": {"prompt_tokens": 64, "completion_tokens": 128, "total_tokens": 192}}]}}'
{"span.kind": "llm", "model_name": "ada", "model_provider": "openai", "input":
{"messages": [{"content": "who broke enigma?"}], "parameters": {"temperature":
0, "max_tokens": 256}}, "output": {"messages": [{"content": "\n\nThe Enigma
code was broken by a team of codebreakers at Bletchley Park, led by mathematician
Alan Turing."}]}}, "metrics": {"input_tokens": 64, "output_tokens": 128,
"total_tokens": 192}}]}'
headers:
Content-Type:
- application/json
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,23 +4,24 @@ interactions:
"12345678901", "trace_id": "98765432101", "parent_id": "", "session_id": "98765432101",
"name": "completion_span", "tags": ["version:", "env:", "service:", "source:integration"],
"start_ns": 1707763310981223236, "duration": 12345678900, "error": 0, "meta":
{"span.kind": "llm", "model_name": "ada", "model_provider": "openai", "input": {"messages":
[{"content": "who broke enigma?"}], "parameters": {"temperature": 0, "max_tokens":
256}}, "output": {"messages": [{"content": "\n\nThe Enigma code was broken by
a team of codebreakers at Bletchley Park, led by mathematician Alan Turing."}]}},
"metrics": {"prompt_tokens": 64, "completion_tokens": 128, "total_tokens": 192}},
{"span_id": "12345678902", "trace_id": "98765432102", "parent_id": "",
"session_id": "98765432102", "name": "chat_completion_span", "tags": ["version:", "env:",
"service:", "source:integration"], "start_ns": 1707763310981223936, "duration":
12345678900, "error": 0, "meta": {"span.kind": "llm", "model_name": "gpt-3.5-turbo",
"model_provider": "openai", "input": {"messages": [{"role": "system", "content":
"You are an evil dark lord looking for his one ring to rule them all"}, {"role":
"user", "content": "I am a hobbit looking to go to Mordor"}], "parameters":
{"temperature": 0.9, "max_tokens": 256}}, "output": {"messages": [{"content":
"Ah, a bold and foolish hobbit seeking to challenge my dominion in Mordor. Very
well, little creature, I shall play along. But know that I am always watching,
and your quest will not go unnoticed", "role": "assistant"}]}}, "metrics": {"prompt_tokens":
64, "completion_tokens": 128, "total_tokens": 192}}]}}'
{"span.kind": "llm", "model_name": "ada", "model_provider": "openai", "input":
{"messages": [{"content": "who broke enigma?"}], "parameters": {"temperature":
0, "max_tokens": 256}}, "output": {"messages": [{"content": "\n\nThe Enigma
code was broken by a team of codebreakers at Bletchley Park, led by mathematician
Alan Turing."}]}}, "metrics": {"input_tokens": 64, "output_tokens": 128,
"total_tokens": 192}}, {"span_id": "12345678902", "trace_id": "98765432102",
"parent_id": "", "session_id": "98765432102", "name": "chat_completion_span",
"tags": ["version:", "env:", "service:", "source:integration"], "start_ns":
1707763310981223936, "duration": 12345678900, "error": 0, "meta": {"span.kind":
"llm", "model_name": "gpt-3.5-turbo", "model_provider": "openai", "input": {"messages":
[{"role": "system", "content": "You are an evil dark lord looking for his one
ring to rule them all"}, {"role": "user", "content": "I am a hobbit looking
to go to Mordor"}], "parameters": {"temperature": 0.9, "max_tokens": 256}},
"output": {"messages": [{"content": "Ah, a bold and foolish hobbit seeking to
challenge my dominion in Mordor. Very well, little creature, I shall play along.
But know that I am always watching, and your quest will not go unnoticed", "role":
"assistant"}]}}, "metrics": {"input_tokens": 64, "output_tokens": 128,
"total_tokens": 192}}]}'
headers:
Content-Type:
- application/json
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,12 @@ interactions:
"12345678901", "trace_id": "98765432101", "parent_id": "", "session_id": "98765432101",
"name": "completion_span", "tags": ["version:", "env:", "service:", "source:integration"],
"start_ns": 1707763310981223236, "duration": 12345678900, "error": 0, "meta":
{"span.kind": "llm", "model_name": "ada", "model_provider": "openai", "input": {"messages":
[{"content": "who broke enigma?"}], "parameters": {"temperature": 0, "max_tokens":
256}}, "output": {"messages": [{"content": "\n\nThe Enigma code was broken by
a team of codebreakers at Bletchley Park, led by mathematician Alan Turing."}]}},
"metrics": {"prompt_tokens": 64, "completion_tokens": 128, "total_tokens": 192}}]}}'
{"span.kind": "llm", "model_name": "ada", "model_provider": "openai", "input":
{"messages": [{"content": "who broke enigma?"}], "parameters": {"temperature":
0, "max_tokens": 256}}, "output": {"messages": [{"content": "\n\nThe Enigma
code was broken by a team of codebreakers at Bletchley Park, led by mathematician
Alan Turing."}]}}, "metrics": {"input_tokens": 64, "output_tokens": 128,
"total_tokens": 192}}]}'
headers:
Content-Type:
- application/json
Expand Down Expand Up @@ -51,8 +52,8 @@ interactions:
256}}, "output": {"messages": [{"content": "Ah, a bold and foolish hobbit seeking
to challenge my dominion in Mordor. Very well, little creature, I shall play
along. But know that I am always watching, and your quest will not go unnoticed",
"role": "assistant"}]}}, "metrics": {"prompt_tokens": 64, "completion_tokens":
128, "total_tokens": 192}}]}}'
"role": "assistant"}]}}, "metrics": {"input_tokens": 64, "output_tokens":
128, "total_tokens": 192}}]}'
headers:
Content-Type:
- application/json
Expand Down
8 changes: 4 additions & 4 deletions tests/llmobs/test_llmobs_decorators.py
Original file line number Diff line number Diff line change
Expand Up @@ -285,7 +285,7 @@ def f():
input_data=[{"content": "test_prompt"}],
output_data=[{"content": "test_response"}],
tags={"custom_tag": "tag_value"},
metrics={"prompt_tokens": 10, "completion_tokens": 20, "total_tokens": 30},
metrics={"input_tokens": 10, "output_tokens": 20, "total_tokens": 30},
)

f()
Expand All @@ -299,7 +299,7 @@ def f():
input_messages=[{"content": "test_prompt"}],
output_messages=[{"content": "test_response"}],
parameters={"temperature": 0.9, "max_tokens": 50},
token_metrics={"prompt_tokens": 10, "completion_tokens": 20, "total_tokens": 30},
token_metrics={"input_tokens": 10, "output_tokens": 20, "total_tokens": 30},
tags={"custom_tag": "tag_value"},
session_id="test_session_id",
)
Expand All @@ -314,7 +314,7 @@ def f():
input_data="test_prompt",
output_data="test_response",
tags={"custom_tag": "tag_value"},
metrics={"prompt_tokens": 10, "completion_tokens": 20, "total_tokens": 30},
metrics={"input_tokens": 10, "output_tokens": 20, "total_tokens": 30},
)

f()
Expand All @@ -328,7 +328,7 @@ def f():
input_messages=[{"content": "test_prompt"}],
output_messages=[{"content": "test_response"}],
parameters={"temperature": 0.9, "max_tokens": 50},
token_metrics={"prompt_tokens": 10, "completion_tokens": 20, "total_tokens": 30},
token_metrics={"input_tokens": 10, "output_tokens": 20, "total_tokens": 30},
tags={"custom_tag": "tag_value"},
session_id="test_session_id",
)
Expand Down
4 changes: 2 additions & 2 deletions tests/llmobs/test_llmobs_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -615,8 +615,8 @@ def test_annotate_output_llm_message_wrong_type(LLMObs, mock_logs):

def test_annotate_metrics(LLMObs):
with LLMObs.llm(model_name="test_model") as span:
LLMObs.annotate(span=span, metrics={"prompt_tokens": 10, "completion_tokens": 20, "total_tokens": 30})
assert json.loads(span.get_tag(METRICS)) == {"prompt_tokens": 10, "completion_tokens": 20, "total_tokens": 30}
LLMObs.annotate(span=span, metrics={"input_tokens": 10, "output_tokens": 20, "total_tokens": 30})
assert json.loads(span.get_tag(METRICS)) == {"input_tokens": 10, "output_tokens": 20, "total_tokens": 30}


def test_annotate_metrics_wrong_type(LLMObs, mock_logs):
Expand Down
4 changes: 2 additions & 2 deletions tests/llmobs/test_llmobs_span_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ def _completion_event():
]
},
},
"metrics": {"prompt_tokens": 64, "completion_tokens": 128, "total_tokens": 192},
"metrics": {"input_tokens": 64, "output_tokens": 128, "total_tokens": 192},
}


Expand Down Expand Up @@ -78,7 +78,7 @@ def _chat_completion_event():
]
},
},
"metrics": {"prompt_tokens": 64, "completion_tokens": 128, "total_tokens": 192},
"metrics": {"input_tokens": 64, "output_tokens": 128, "total_tokens": 192},
}


Expand Down

0 comments on commit 07d2c16

Please sign in to comment.