From f590523f26c2e4b0324b732c8794f1a6c94e399c Mon Sep 17 00:00:00 2001 From: Yun Kim Date: Wed, 17 May 2023 15:00:24 -0400 Subject: [PATCH 1/3] Allow tokens or token arrays as input for Embeddings endpoint --- ddtrace/contrib/openai/patch.py | 2 +- docs/spelling_wordlist.txt | 1 + ...nai-embeddings-input-7feb1198fabac31b.yaml | 5 +++ tests/contrib/openai/test_openai.py | 26 ++++++++++++ ....test_embedding_array_of_token_arrays.json | 41 +++++++++++++++++++ ...st_openai.test_embedding_string_array.json | 40 ++++++++++++++++++ ...est_openai.test_embedding_token_array.json | 41 +++++++++++++++++++ 7 files changed, 155 insertions(+), 1 deletion(-) create mode 100644 releasenotes/notes/fix-openai-embeddings-input-7feb1198fabac31b.yaml create mode 100644 tests/snapshots/tests.contrib.openai.test_openai.test_embedding_array_of_token_arrays.json create mode 100644 tests/snapshots/tests.contrib.openai.test_openai.test_embedding_string_array.json create mode 100644 tests/snapshots/tests.contrib.openai.test_openai.test_embedding_token_array.json diff --git a/ddtrace/contrib/openai/patch.py b/ddtrace/contrib/openai/patch.py index fb8236096b7..829e745cc66 100644 --- a/ddtrace/contrib/openai/patch.py +++ b/ddtrace/contrib/openai/patch.py @@ -630,7 +630,7 @@ def handle_request(self, pin, integration, span, args, kwargs): if kw_attr == "input" and integration.is_pc_sampled_span(span): if isinstance(kwargs["input"], list): for idx, inp in enumerate(kwargs["input"]): - span.set_tag_str("openai.request.input.%d" % idx, integration.trunc(inp)) + span.set_tag_str("openai.request.input.%d" % idx, integration.trunc(str(inp))) else: span.set_tag("openai.request.%s" % kw_attr, kwargs[kw_attr]) else: diff --git a/docs/spelling_wordlist.txt b/docs/spelling_wordlist.txt index f6b76bbba2a..d1a66d925e0 100644 --- a/docs/spelling_wordlist.txt +++ b/docs/spelling_wordlist.txt @@ -134,6 +134,7 @@ mysqlclient mysqldb namespace obfuscator +openai opensearch opentracer opentracing diff --git a/releasenotes/notes/fix-openai-embeddings-input-7feb1198fabac31b.yaml b/releasenotes/notes/fix-openai-embeddings-input-7feb1198fabac31b.yaml new file mode 100644 index 00000000000..226875520fb --- /dev/null +++ b/releasenotes/notes/fix-openai-embeddings-input-7feb1198fabac31b.yaml @@ -0,0 +1,5 @@ +--- +fixes: + - | + openai: Resolved an issue where using an array of tokens or an array of token arrays as the + input for the Embeddings endpoint caused an AttributeError when attempting to parse request parameters. diff --git a/tests/contrib/openai/test_openai.py b/tests/contrib/openai/test_openai.py index fe667acaa35..d4470457871 100644 --- a/tests/contrib/openai/test_openai.py +++ b/tests/contrib/openai/test_openai.py @@ -547,6 +547,32 @@ def test_embedding(api_key_in_env, request_api_key, openai, openai_vcr, snapshot openai.Embedding.create(api_key=request_api_key, input="hello world", model="text-embedding-ada-002") +@pytest.mark.snapshot(ignores=["meta.http.useragent"]) +def test_embedding_string_array(openai, openai_vcr, snapshot_tracer): + if not hasattr(openai, "Embedding"): + pytest.skip("embedding not supported for this version of openai") + with openai_vcr.use_cassette("embedding.yaml"): + openai.Embedding.create(input=["hello world", "hello again"], model="text-embedding-ada-002") + + +@pytest.mark.snapshot(ignores=["meta.http.useragent"]) +def test_embedding_token_array(openai, openai_vcr, snapshot_tracer): + if not hasattr(openai, "Embedding"): + pytest.skip("embedding not supported for this version of openai") + with openai_vcr.use_cassette("embedding.yaml"): + openai.Embedding.create(input=[1111, 2222, 3333], model="text-embedding-ada-002") + + +@pytest.mark.snapshot(ignores=["meta.http.useragent"]) +def test_embedding_array_of_token_arrays(openai, openai_vcr, snapshot_tracer): + if not hasattr(openai, "Embedding"): + pytest.skip("embedding not supported for this version of openai") + with openai_vcr.use_cassette("embedding.yaml"): + openai.Embedding.create( + input=[[1111, 2222, 3333], [4444, 5555, 6666], [7777, 8888, 9999]], model="text-embedding-ada-002" + ) + + @pytest.mark.asyncio @pytest.mark.snapshot(ignores=["meta.http.useragent"]) @pytest.mark.parametrize("api_key_in_env", [True, False]) diff --git a/tests/snapshots/tests.contrib.openai.test_openai.test_embedding_array_of_token_arrays.json b/tests/snapshots/tests.contrib.openai.test_openai.test_embedding_array_of_token_arrays.json new file mode 100644 index 00000000000..ba4e3da0d66 --- /dev/null +++ b/tests/snapshots/tests.contrib.openai.test_openai.test_embedding_array_of_token_arrays.json @@ -0,0 +1,41 @@ +[[ + { + "name": "openai.request", + "service": "", + "resource": "embeddings/text-embedding-ada-002", + "trace_id": 0, + "span_id": 1, + "parent_id": 0, + "type": "", + "error": 0, + "meta": { + "_dd.p.dm": "-0", + "api_base": "https://api.openai.com/v1", + "component": "openai", + "language": "python", + "openai.endpoint": "embeddings", + "openai.model": "text-embedding-ada-002", + "openai.organization.name": "datadog-4", + "openai.organization.ratelimit.requests.remaining": "2999", + "openai.request.input.0": "[1111, 2222, 3333]", + "openai.request.input.1": "[4444, 5555, 6666]", + "openai.request.input.2": "[7777, 8888, 9999]", + "openai.request.model": "text-embedding-ada-002", + "openai.user.api_key": "sk-...key", + "runtime-id": "b168eb19ef14414ca786ac99826ef9e0" + }, + "metrics": { + "_dd.agent_psr": 1.0, + "_dd.measured": 1, + "_dd.top_level": 1, + "_dd.tracer_kr": 1.0, + "_sampling_priority_v1": 1, + "openai.response.data.embedding-length": 1536, + "openai.response.data.num-embeddings": 1, + "openai.response.usage.prompt_tokens": 2, + "openai.response.usage.total_tokens": 2, + "process_id": 85925 + }, + "duration": 3655000, + "start": 1684349256345286000 + }]] diff --git a/tests/snapshots/tests.contrib.openai.test_openai.test_embedding_string_array.json b/tests/snapshots/tests.contrib.openai.test_openai.test_embedding_string_array.json new file mode 100644 index 00000000000..4721bf4c5db --- /dev/null +++ b/tests/snapshots/tests.contrib.openai.test_openai.test_embedding_string_array.json @@ -0,0 +1,40 @@ +[[ + { + "name": "openai.request", + "service": "", + "resource": "embeddings/text-embedding-ada-002", + "trace_id": 0, + "span_id": 1, + "parent_id": 0, + "type": "", + "error": 0, + "meta": { + "_dd.p.dm": "-0", + "api_base": "https://api.openai.com/v1", + "component": "openai", + "language": "python", + "openai.endpoint": "embeddings", + "openai.model": "text-embedding-ada-002", + "openai.organization.name": "datadog-4", + "openai.organization.ratelimit.requests.remaining": "2999", + "openai.request.input.0": "hello world", + "openai.request.input.1": "hello again", + "openai.request.model": "text-embedding-ada-002", + "openai.user.api_key": "sk-...key", + "runtime-id": "b168eb19ef14414ca786ac99826ef9e0" + }, + "metrics": { + "_dd.agent_psr": 1.0, + "_dd.measured": 1, + "_dd.top_level": 1, + "_dd.tracer_kr": 1.0, + "_sampling_priority_v1": 1, + "openai.response.data.embedding-length": 1536, + "openai.response.data.num-embeddings": 1, + "openai.response.usage.prompt_tokens": 2, + "openai.response.usage.total_tokens": 2, + "process_id": 85925 + }, + "duration": 3517000, + "start": 1684349256264173000 + }]] diff --git a/tests/snapshots/tests.contrib.openai.test_openai.test_embedding_token_array.json b/tests/snapshots/tests.contrib.openai.test_openai.test_embedding_token_array.json new file mode 100644 index 00000000000..afc14fc0ea0 --- /dev/null +++ b/tests/snapshots/tests.contrib.openai.test_openai.test_embedding_token_array.json @@ -0,0 +1,41 @@ +[[ + { + "name": "openai.request", + "service": "", + "resource": "embeddings/text-embedding-ada-002", + "trace_id": 0, + "span_id": 1, + "parent_id": 0, + "type": "", + "error": 0, + "meta": { + "_dd.p.dm": "-0", + "api_base": "https://api.openai.com/v1", + "component": "openai", + "language": "python", + "openai.endpoint": "embeddings", + "openai.model": "text-embedding-ada-002", + "openai.organization.name": "datadog-4", + "openai.organization.ratelimit.requests.remaining": "2999", + "openai.request.input.0": "1111", + "openai.request.input.1": "2222", + "openai.request.input.2": "3333", + "openai.request.model": "text-embedding-ada-002", + "openai.user.api_key": "sk-...key", + "runtime-id": "b168eb19ef14414ca786ac99826ef9e0" + }, + "metrics": { + "_dd.agent_psr": 1.0, + "_dd.measured": 1, + "_dd.top_level": 1, + "_dd.tracer_kr": 1.0, + "_sampling_priority_v1": 1, + "openai.response.data.embedding-length": 1536, + "openai.response.data.num-embeddings": 1, + "openai.response.usage.prompt_tokens": 2, + "openai.response.usage.total_tokens": 2, + "process_id": 85925 + }, + "duration": 3522000, + "start": 1684349256312922000 + }]] From e99259fef512e2ab50d391091acfac051bdb44f1 Mon Sep 17 00:00:00 2001 From: Yun Kim Date: Thu, 18 May 2023 12:09:36 -0400 Subject: [PATCH 2/3] Fix snapshot test api key expected --- ...openai.test_openai.test_embedding_array_of_token_arrays.json | 2 +- ....contrib.openai.test_openai.test_embedding_string_array.json | 2 +- ...s.contrib.openai.test_openai.test_embedding_token_array.json | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/snapshots/tests.contrib.openai.test_openai.test_embedding_array_of_token_arrays.json b/tests/snapshots/tests.contrib.openai.test_openai.test_embedding_array_of_token_arrays.json index ba4e3da0d66..aa578de3f6a 100644 --- a/tests/snapshots/tests.contrib.openai.test_openai.test_embedding_array_of_token_arrays.json +++ b/tests/snapshots/tests.contrib.openai.test_openai.test_embedding_array_of_token_arrays.json @@ -21,7 +21,7 @@ "openai.request.input.1": "[4444, 5555, 6666]", "openai.request.input.2": "[7777, 8888, 9999]", "openai.request.model": "text-embedding-ada-002", - "openai.user.api_key": "sk-...key", + "openai.user.api_key": "sk-...key>", "runtime-id": "b168eb19ef14414ca786ac99826ef9e0" }, "metrics": { diff --git a/tests/snapshots/tests.contrib.openai.test_openai.test_embedding_string_array.json b/tests/snapshots/tests.contrib.openai.test_openai.test_embedding_string_array.json index 4721bf4c5db..6df9e36dedf 100644 --- a/tests/snapshots/tests.contrib.openai.test_openai.test_embedding_string_array.json +++ b/tests/snapshots/tests.contrib.openai.test_openai.test_embedding_string_array.json @@ -20,7 +20,7 @@ "openai.request.input.0": "hello world", "openai.request.input.1": "hello again", "openai.request.model": "text-embedding-ada-002", - "openai.user.api_key": "sk-...key", + "openai.user.api_key": "sk-...key>", "runtime-id": "b168eb19ef14414ca786ac99826ef9e0" }, "metrics": { diff --git a/tests/snapshots/tests.contrib.openai.test_openai.test_embedding_token_array.json b/tests/snapshots/tests.contrib.openai.test_openai.test_embedding_token_array.json index afc14fc0ea0..1b5635f2827 100644 --- a/tests/snapshots/tests.contrib.openai.test_openai.test_embedding_token_array.json +++ b/tests/snapshots/tests.contrib.openai.test_openai.test_embedding_token_array.json @@ -21,7 +21,7 @@ "openai.request.input.1": "2222", "openai.request.input.2": "3333", "openai.request.model": "text-embedding-ada-002", - "openai.user.api_key": "sk-...key", + "openai.user.api_key": "sk-...key>", "runtime-id": "b168eb19ef14414ca786ac99826ef9e0" }, "metrics": { From ac42320fd53f39f3aa2d0e42467a0df8f31833c1 Mon Sep 17 00:00:00 2001 From: Yun Kim Date: Thu, 18 May 2023 15:50:30 -0400 Subject: [PATCH 3/3] Fix releasenote --- .../notes/fix-openai-embeddings-input-7feb1198fabac31b.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/releasenotes/notes/fix-openai-embeddings-input-7feb1198fabac31b.yaml b/releasenotes/notes/fix-openai-embeddings-input-7feb1198fabac31b.yaml index 226875520fb..f04931b1642 100644 --- a/releasenotes/notes/fix-openai-embeddings-input-7feb1198fabac31b.yaml +++ b/releasenotes/notes/fix-openai-embeddings-input-7feb1198fabac31b.yaml @@ -1,5 +1,5 @@ --- fixes: - | - openai: Resolved an issue where using an array of tokens or an array of token arrays as the - input for the Embeddings endpoint caused an AttributeError when attempting to parse request parameters. + openai: Resolves an issue where using an array of tokens or an array of token arrays + for the Embeddings endpoint caused an AttributeError.