diff --git a/ddtrace/contrib/openai/patch.py b/ddtrace/contrib/openai/patch.py index fb8236096b7..829e745cc66 100644 --- a/ddtrace/contrib/openai/patch.py +++ b/ddtrace/contrib/openai/patch.py @@ -630,7 +630,7 @@ def handle_request(self, pin, integration, span, args, kwargs): if kw_attr == "input" and integration.is_pc_sampled_span(span): if isinstance(kwargs["input"], list): for idx, inp in enumerate(kwargs["input"]): - span.set_tag_str("openai.request.input.%d" % idx, integration.trunc(inp)) + span.set_tag_str("openai.request.input.%d" % idx, integration.trunc(str(inp))) else: span.set_tag("openai.request.%s" % kw_attr, kwargs[kw_attr]) else: diff --git a/docs/spelling_wordlist.txt b/docs/spelling_wordlist.txt index f6b76bbba2a..d1a66d925e0 100644 --- a/docs/spelling_wordlist.txt +++ b/docs/spelling_wordlist.txt @@ -134,6 +134,7 @@ mysqlclient mysqldb namespace obfuscator +openai opensearch opentracer opentracing diff --git a/releasenotes/notes/fix-openai-embeddings-input-7feb1198fabac31b.yaml b/releasenotes/notes/fix-openai-embeddings-input-7feb1198fabac31b.yaml new file mode 100644 index 00000000000..f04931b1642 --- /dev/null +++ b/releasenotes/notes/fix-openai-embeddings-input-7feb1198fabac31b.yaml @@ -0,0 +1,5 @@ +--- +fixes: + - | + openai: Resolves an issue where using an array of tokens or an array of token arrays + for the Embeddings endpoint caused an AttributeError. diff --git a/tests/contrib/openai/test_openai.py b/tests/contrib/openai/test_openai.py index fe667acaa35..d4470457871 100644 --- a/tests/contrib/openai/test_openai.py +++ b/tests/contrib/openai/test_openai.py @@ -547,6 +547,32 @@ def test_embedding(api_key_in_env, request_api_key, openai, openai_vcr, snapshot openai.Embedding.create(api_key=request_api_key, input="hello world", model="text-embedding-ada-002") +@pytest.mark.snapshot(ignores=["meta.http.useragent"]) +def test_embedding_string_array(openai, openai_vcr, snapshot_tracer): + if not hasattr(openai, "Embedding"): + pytest.skip("embedding not supported for this version of openai") + with openai_vcr.use_cassette("embedding.yaml"): + openai.Embedding.create(input=["hello world", "hello again"], model="text-embedding-ada-002") + + +@pytest.mark.snapshot(ignores=["meta.http.useragent"]) +def test_embedding_token_array(openai, openai_vcr, snapshot_tracer): + if not hasattr(openai, "Embedding"): + pytest.skip("embedding not supported for this version of openai") + with openai_vcr.use_cassette("embedding.yaml"): + openai.Embedding.create(input=[1111, 2222, 3333], model="text-embedding-ada-002") + + +@pytest.mark.snapshot(ignores=["meta.http.useragent"]) +def test_embedding_array_of_token_arrays(openai, openai_vcr, snapshot_tracer): + if not hasattr(openai, "Embedding"): + pytest.skip("embedding not supported for this version of openai") + with openai_vcr.use_cassette("embedding.yaml"): + openai.Embedding.create( + input=[[1111, 2222, 3333], [4444, 5555, 6666], [7777, 8888, 9999]], model="text-embedding-ada-002" + ) + + @pytest.mark.asyncio @pytest.mark.snapshot(ignores=["meta.http.useragent"]) @pytest.mark.parametrize("api_key_in_env", [True, False]) diff --git a/tests/snapshots/tests.contrib.openai.test_openai.test_embedding_array_of_token_arrays.json b/tests/snapshots/tests.contrib.openai.test_openai.test_embedding_array_of_token_arrays.json new file mode 100644 index 00000000000..aa578de3f6a --- /dev/null +++ b/tests/snapshots/tests.contrib.openai.test_openai.test_embedding_array_of_token_arrays.json @@ -0,0 +1,41 @@ +[[ + { + "name": "openai.request", + "service": "", + "resource": "embeddings/text-embedding-ada-002", + "trace_id": 0, + "span_id": 1, + "parent_id": 0, + "type": "", + "error": 0, + "meta": { + "_dd.p.dm": "-0", + "api_base": "https://api.openai.com/v1", + "component": "openai", + "language": "python", + "openai.endpoint": "embeddings", + "openai.model": "text-embedding-ada-002", + "openai.organization.name": "datadog-4", + "openai.organization.ratelimit.requests.remaining": "2999", + "openai.request.input.0": "[1111, 2222, 3333]", + "openai.request.input.1": "[4444, 5555, 6666]", + "openai.request.input.2": "[7777, 8888, 9999]", + "openai.request.model": "text-embedding-ada-002", + "openai.user.api_key": "sk-...key>", + "runtime-id": "b168eb19ef14414ca786ac99826ef9e0" + }, + "metrics": { + "_dd.agent_psr": 1.0, + "_dd.measured": 1, + "_dd.top_level": 1, + "_dd.tracer_kr": 1.0, + "_sampling_priority_v1": 1, + "openai.response.data.embedding-length": 1536, + "openai.response.data.num-embeddings": 1, + "openai.response.usage.prompt_tokens": 2, + "openai.response.usage.total_tokens": 2, + "process_id": 85925 + }, + "duration": 3655000, + "start": 1684349256345286000 + }]] diff --git a/tests/snapshots/tests.contrib.openai.test_openai.test_embedding_string_array.json b/tests/snapshots/tests.contrib.openai.test_openai.test_embedding_string_array.json new file mode 100644 index 00000000000..6df9e36dedf --- /dev/null +++ b/tests/snapshots/tests.contrib.openai.test_openai.test_embedding_string_array.json @@ -0,0 +1,40 @@ +[[ + { + "name": "openai.request", + "service": "", + "resource": "embeddings/text-embedding-ada-002", + "trace_id": 0, + "span_id": 1, + "parent_id": 0, + "type": "", + "error": 0, + "meta": { + "_dd.p.dm": "-0", + "api_base": "https://api.openai.com/v1", + "component": "openai", + "language": "python", + "openai.endpoint": "embeddings", + "openai.model": "text-embedding-ada-002", + "openai.organization.name": "datadog-4", + "openai.organization.ratelimit.requests.remaining": "2999", + "openai.request.input.0": "hello world", + "openai.request.input.1": "hello again", + "openai.request.model": "text-embedding-ada-002", + "openai.user.api_key": "sk-...key>", + "runtime-id": "b168eb19ef14414ca786ac99826ef9e0" + }, + "metrics": { + "_dd.agent_psr": 1.0, + "_dd.measured": 1, + "_dd.top_level": 1, + "_dd.tracer_kr": 1.0, + "_sampling_priority_v1": 1, + "openai.response.data.embedding-length": 1536, + "openai.response.data.num-embeddings": 1, + "openai.response.usage.prompt_tokens": 2, + "openai.response.usage.total_tokens": 2, + "process_id": 85925 + }, + "duration": 3517000, + "start": 1684349256264173000 + }]] diff --git a/tests/snapshots/tests.contrib.openai.test_openai.test_embedding_token_array.json b/tests/snapshots/tests.contrib.openai.test_openai.test_embedding_token_array.json new file mode 100644 index 00000000000..1b5635f2827 --- /dev/null +++ b/tests/snapshots/tests.contrib.openai.test_openai.test_embedding_token_array.json @@ -0,0 +1,41 @@ +[[ + { + "name": "openai.request", + "service": "", + "resource": "embeddings/text-embedding-ada-002", + "trace_id": 0, + "span_id": 1, + "parent_id": 0, + "type": "", + "error": 0, + "meta": { + "_dd.p.dm": "-0", + "api_base": "https://api.openai.com/v1", + "component": "openai", + "language": "python", + "openai.endpoint": "embeddings", + "openai.model": "text-embedding-ada-002", + "openai.organization.name": "datadog-4", + "openai.organization.ratelimit.requests.remaining": "2999", + "openai.request.input.0": "1111", + "openai.request.input.1": "2222", + "openai.request.input.2": "3333", + "openai.request.model": "text-embedding-ada-002", + "openai.user.api_key": "sk-...key>", + "runtime-id": "b168eb19ef14414ca786ac99826ef9e0" + }, + "metrics": { + "_dd.agent_psr": 1.0, + "_dd.measured": 1, + "_dd.top_level": 1, + "_dd.tracer_kr": 1.0, + "_sampling_priority_v1": 1, + "openai.response.data.embedding-length": 1536, + "openai.response.data.num-embeddings": 1, + "openai.response.usage.prompt_tokens": 2, + "openai.response.usage.total_tokens": 2, + "process_id": 85925 + }, + "duration": 3522000, + "start": 1684349256312922000 + }]]