Skip to content

Commit

Permalink
chore(llmobs): token metrics name changes (#9657)
Browse files Browse the repository at this point in the history
Makes the following updates to metric key names submitted to LLM
Observability for openai & bedrock integrations

`prompt_tokens` -> `input_tokens`
`completion_tokens` -> `output_tokens`

The backend already has the changes in place to accept these updated key
names so a hard cutover is OK.

A release note is not needed since metric key names used by our
integrations (openai, langchain, bedrock) when submitting data to LLM
Obs backend is an internal contract between the integration and backend.

When users set metric key names for manually created spans, our
documentation already instructs them to use input/output terminology

- [x] Change(s) are motivated and described in the PR description
- [x] Testing strategy is described if automated tests are not included
in the PR
- [x] Risks are described (performance impact, potential for breakage,
maintainability)
- [x] Change is maintainable (easy to change, telemetry, documentation)
- [x] [Library release note
guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html)
are followed or label `changelog/no-changelog` is set
- [x] Documentation is included (in-code, generated user docs, [public
corp docs](https://github.com/DataDog/documentation/))
- [x] Backport labels are set (if
[applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting))
- [x] If this PR changes the public interface, I've notified
`@DataDog/apm-tees`.

- [x] Title is accurate
- [x] All changes are related to the pull request's stated goal
- [x] Description motivates each change
- [x] Avoids breaking
[API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces)
changes
- [x] Testing strategy adequately addresses listed risks
- [x] Change is maintainable (easy to change, telemetry, documentation)
- [x] Release note makes sense to a user of the library
- [x] Author has acknowledged and discussed the performance implications
of this PR as reported in the benchmarks PR comment
- [x] Backport labels are set in a manner that is consistent with the
[release branch maintenance
policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)

---------

Co-authored-by: lievan <evan.li@datadoqhq.com>
Co-authored-by: kyle <kyle@verhoog.ca>
  • Loading branch information
3 people committed Jul 11, 2024
1 parent 2ea8da9 commit e874c50
Show file tree
Hide file tree
Showing 4 changed files with 21 additions and 25 deletions.
1 change: 0 additions & 1 deletion ddtrace/llmobs/_constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@
)

LANGCHAIN_APM_SPAN_NAME = "langchain.request"
OPENAI_APM_SPAN_NAME = "openai.request"

INPUT_TOKENS_METRIC_KEY = "input_tokens"
OUTPUT_TOKENS_METRIC_KEY = "output_tokens"
Expand Down
3 changes: 0 additions & 3 deletions ddtrace/llmobs/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
from ddtrace.internal.logger import get_logger
from ddtrace.llmobs._constants import LANGCHAIN_APM_SPAN_NAME
from ddtrace.llmobs._constants import ML_APP
from ddtrace.llmobs._constants import OPENAI_APM_SPAN_NAME
from ddtrace.llmobs._constants import PARENT_ID_KEY
from ddtrace.llmobs._constants import PROPAGATED_PARENT_ID_KEY
from ddtrace.llmobs._constants import SESSION_ID
Expand Down Expand Up @@ -41,8 +40,6 @@ def _get_llmobs_parent_id(span: Span) -> Optional[str]:
def _get_span_name(span: Span) -> str:
if span.name == LANGCHAIN_APM_SPAN_NAME and span.resource != "":
return span.resource
elif span.name == OPENAI_APM_SPAN_NAME and span.resource != "":
return "openai.{}".format(span.resource)
return span.name


Expand Down
16 changes: 8 additions & 8 deletions tests/contrib/anthropic/test_anthropic_llmobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -302,7 +302,7 @@ def test_tools_sync(self, anthropic, ddtrace_global_config, mock_llmobs_writer,
},
{"content": WEATHER_OUTPUT_MESSAGE_2, "role": "assistant"},
],
metadata={"max_tokens": 200.0},
metadata={"temperature": 1.0, "max_tokens": 200.0},
token_metrics={"input_tokens": 599, "output_tokens": 152, "total_tokens": 751},
tags={"ml_app": "<ml-app-name>"},
)
Expand Down Expand Up @@ -355,7 +355,7 @@ def test_tools_sync(self, anthropic, ddtrace_global_config, mock_llmobs_writer,
"role": "assistant",
}
],
metadata={"max_tokens": 500.0},
metadata={"temperature": 1.0, "max_tokens": 500.0},
token_metrics={"input_tokens": 768, "output_tokens": 29, "total_tokens": 797},
tags={"ml_app": "<ml-app-name>"},
)
Expand Down Expand Up @@ -394,7 +394,7 @@ async def test_tools_async(self, anthropic, ddtrace_global_config, mock_llmobs_w
},
{"content": WEATHER_OUTPUT_MESSAGE_2, "role": "assistant"},
],
metadata={"max_tokens": 200.0},
metadata={"temperature": 1.0, "max_tokens": 200.0},
token_metrics={"input_tokens": 599, "output_tokens": 152, "total_tokens": 751},
tags={"ml_app": "<ml-app-name>"},
)
Expand Down Expand Up @@ -447,7 +447,7 @@ async def test_tools_async(self, anthropic, ddtrace_global_config, mock_llmobs_w
"role": "assistant",
}
],
metadata={"max_tokens": 500.0},
metadata={"temperature": 1.0, "max_tokens": 500.0},
token_metrics={"input_tokens": 768, "output_tokens": 29, "total_tokens": 797},
tags={"ml_app": "<ml-app-name>"},
)
Expand Down Expand Up @@ -496,7 +496,7 @@ def test_tools_sync_stream(self, anthropic, ddtrace_global_config, mock_llmobs_w
{"content": message[0]["text"], "role": "assistant"},
{"content": message[1]["text"], "role": "assistant"},
],
metadata={"max_tokens": 200.0},
metadata={"temperature": 1.0, "max_tokens": 200.0},
token_metrics={"input_tokens": 599, "output_tokens": 135, "total_tokens": 734},
tags={"ml_app": "<ml-app-name>"},
)
Expand Down Expand Up @@ -546,7 +546,7 @@ def test_tools_sync_stream(self, anthropic, ddtrace_global_config, mock_llmobs_w
"role": "assistant",
}
],
metadata={"max_tokens": 500.0},
metadata={"temperature": 1.0, "max_tokens": 500.0},
token_metrics={"input_tokens": 762, "output_tokens": 33, "total_tokens": 795},
tags={"ml_app": "<ml-app-name>"},
)
Expand Down Expand Up @@ -590,7 +590,7 @@ async def test_tools_async_stream_helper(
{"content": message.content[0].text, "role": "assistant"},
{"content": WEATHER_OUTPUT_MESSAGE_2, "role": "assistant"},
],
metadata={"max_tokens": 200.0},
metadata={"temperature": 1.0, "max_tokens": 200.0},
token_metrics={"input_tokens": 599, "output_tokens": 146, "total_tokens": 745},
tags={"ml_app": "<ml-app-name>"},
)
Expand Down Expand Up @@ -642,7 +642,7 @@ async def test_tools_async_stream_helper(
output_messages=[
{"content": "\n\nThe current weather in San Francisco, CA is 73°F.", "role": "assistant"}
],
metadata={"max_tokens": 500.0},
metadata={"temperature": 1.0, "max_tokens": 500.0},
token_metrics={"input_tokens": 762, "output_tokens": 18, "total_tokens": 780},
tags={"ml_app": "<ml-app-name>"},
)
Expand Down
26 changes: 13 additions & 13 deletions tests/contrib/openai/test_openai_llmobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def test_completion(self, openai, ddtrace_global_config, mock_llmobs_writer, moc
model_provider="openai",
input_messages=[{"content": "Hello world"}],
output_messages=[{"content": ", relax!” I said to my laptop"}, {"content": " (1"}],
metadata={"temperature": 0.8, "max_tokens": 10, "n": 2, "stop": ".", "user": "ddtrace-test"},
metadata={"temperature": 0.8, "max_tokens": 10},
token_metrics={"input_tokens": 2, "output_tokens": 12, "total_tokens": 14},
tags={"ml_app": "<ml-app-name>"},
integration="openai",
Expand All @@ -58,7 +58,7 @@ def test_completion_stream(self, openai, ddtrace_global_config, mock_llmobs_writ
model_provider="openai",
input_messages=[{"content": "Hello world"}],
output_messages=[{"content": expected_completion}],
metadata={"stream": True},
metadata={"temperature": 0},
token_metrics={"input_tokens": 2, "output_tokens": 16, "total_tokens": 18},
tags={"ml_app": "<ml-app-name>"},
integration="openai",
Expand Down Expand Up @@ -96,7 +96,7 @@ def test_chat_completion(self, openai, ddtrace_global_config, mock_llmobs_writer
model_provider="openai",
input_messages=input_messages,
output_messages=[{"role": "assistant", "content": choice.message.content} for choice in resp.choices],
metadata={"top_p": 0.9, "n": 2, "user": "ddtrace-test"},
metadata={"temperature": 0},
token_metrics={"input_tokens": 57, "output_tokens": 34, "total_tokens": 91},
tags={"ml_app": "<ml-app-name>"},
integration="openai",
Expand Down Expand Up @@ -134,7 +134,7 @@ async def test_chat_completion_stream(self, openai, ddtrace_global_config, mock_
model_provider="openai",
input_messages=input_messages,
output_messages=[{"content": expected_completion, "role": "assistant"}],
metadata={"stream": True, "user": "ddtrace-test"},
metadata={"temperature": 0},
token_metrics={"input_tokens": 8, "output_tokens": 12, "total_tokens": 20},
tags={"ml_app": "<ml-app-name>"},
integration="openai",
Expand Down Expand Up @@ -167,7 +167,7 @@ def test_chat_completion_function_call(self, openai, ddtrace_global_config, mock
model_provider="openai",
input_messages=[{"content": chat_completion_input_description, "role": "user"}],
output_messages=[{"content": expected_output, "role": "assistant"}],
metadata={"function_call": "auto", "user": "ddtrace-test"},
metadata={"temperature": 0},
token_metrics={"input_tokens": 157, "output_tokens": 57, "total_tokens": 214},
tags={"ml_app": "<ml-app-name>"},
integration="openai",
Expand Down Expand Up @@ -204,7 +204,7 @@ def test_chat_completion_function_call_stream(self, openai, ddtrace_global_confi
model_provider="openai",
input_messages=[{"content": chat_completion_input_description, "role": "user"}],
output_messages=[{"content": expected_output, "role": "assistant"}],
metadata={"stream": True, "user": "ddtrace-test", "function_call": "auto"},
metadata={"temperature": 0},
token_metrics={"input_tokens": 63, "output_tokens": 33, "total_tokens": 96},
tags={"ml_app": "<ml-app-name>"},
integration="openai",
Expand Down Expand Up @@ -232,7 +232,7 @@ def test_chat_completion_tool_call(self, openai, ddtrace_global_config, mock_llm
model_provider="openai",
input_messages=[{"content": chat_completion_input_description, "role": "user"}],
output_messages=[{"content": expected_output, "role": "assistant"}],
metadata={"tool_choice": "auto", "user": "ddtrace-test"},
metadata={"temperature": 0},
token_metrics={"input_tokens": 157, "output_tokens": 57, "total_tokens": 214},
tags={"ml_app": "<ml-app-name>"},
integration="openai",
Expand Down Expand Up @@ -339,7 +339,7 @@ def test_completion(self, openai, ddtrace_global_config, mock_llmobs_writer, moc
model_provider="openai",
input_messages=[{"content": "Hello world"}],
output_messages=[{"content": ", relax!” I said to my laptop"}, {"content": " (1"}],
metadata={"temperature": 0.8, "max_tokens": 10, "n": 2, "stop": ".", "user": "ddtrace-test"},
metadata={"temperature": 0.8, "max_tokens": 10},
token_metrics={"input_tokens": 2, "output_tokens": 12, "total_tokens": 14},
tags={"ml_app": "<ml-app-name>"},
integration="openai",
Expand Down Expand Up @@ -367,7 +367,7 @@ def test_completion_stream(self, openai, ddtrace_global_config, mock_llmobs_writ
model_provider="openai",
input_messages=[{"content": "Hello world"}],
output_messages=[{"content": expected_completion}],
metadata={"stream": True},
metadata={"temperature": 0},
token_metrics={"input_tokens": 2, "output_tokens": 2, "total_tokens": 4},
tags={"ml_app": "<ml-app-name>"},
integration="openai",
Expand Down Expand Up @@ -404,7 +404,7 @@ def test_chat_completion(self, openai, ddtrace_global_config, mock_llmobs_writer
model_provider="openai",
input_messages=input_messages,
output_messages=[{"role": "assistant", "content": choice.message.content} for choice in resp.choices],
metadata={"top_p": 0.9, "n": 2, "user": "ddtrace-test"},
metadata={"temperature": 0},
token_metrics={"input_tokens": 57, "output_tokens": 34, "total_tokens": 91},
tags={"ml_app": "<ml-app-name>"},
integration="openai",
Expand Down Expand Up @@ -443,7 +443,7 @@ def test_chat_completion_stream(self, openai, ddtrace_global_config, mock_llmobs
model_provider="openai",
input_messages=input_messages,
output_messages=[{"content": expected_completion, "role": "assistant"}],
metadata={"stream": True, "user": "ddtrace-test"},
metadata={"temperature": 0},
token_metrics={"input_tokens": 8, "output_tokens": 8, "total_tokens": 16},
tags={"ml_app": "<ml-app-name>"},
integration="openai",
Expand Down Expand Up @@ -475,7 +475,7 @@ def test_chat_completion_function_call(self, openai, ddtrace_global_config, mock
model_provider="openai",
input_messages=[{"content": chat_completion_input_description, "role": "user"}],
output_messages=[{"content": expected_output, "role": "assistant"}],
metadata={"function_call": "auto", "user": "ddtrace-test"},
metadata={"temperature": 0},
token_metrics={"input_tokens": 157, "output_tokens": 57, "total_tokens": 214},
tags={"ml_app": "<ml-app-name>"},
integration="openai",
Expand Down Expand Up @@ -510,7 +510,7 @@ def test_chat_completion_tool_call(self, openai, ddtrace_global_config, mock_llm
"role": "assistant",
}
],
metadata={"user": "ddtrace-test"},
metadata={"temperature": 0},
token_metrics={"input_tokens": 157, "output_tokens": 57, "total_tokens": 214},
tags={"ml_app": "<ml-app-name>"},
integration="openai",
Expand Down

0 comments on commit e874c50

Please sign in to comment.