From d4e4ec1656c157094e6796f70a76ba88facb51e0 Mon Sep 17 00:00:00 2001
From: Yahya Mouman <103438582+yahya-mouman@users.noreply.github.com>
Date: Fri, 21 Nov 2025 17:21:31 +0100
Subject: [PATCH 1/6] add prompt tracking to hallucination
---
.../evaluations/managed_evaluations/_index.md | 15 ++++++++-------
1 file changed, 8 insertions(+), 7 deletions(-)
diff --git a/content/en/llm_observability/evaluations/managed_evaluations/_index.md b/content/en/llm_observability/evaluations/managed_evaluations/_index.md
index 87f693a121e..92fc895ee0e 100644
--- a/content/en/llm_observability/evaluations/managed_evaluations/_index.md
+++ b/content/en/llm_observability/evaluations/managed_evaluations/_index.md
@@ -171,16 +171,16 @@ This check identifies instances where the LLM makes a claim that disagrees with
| Evaluated on Output | Evaluated using LLM | Hallucination flags any output that disagrees with the context provided to the LLM. |
##### Instrumentation
-
-In order to take advantage of Hallucination detection, you will need to annotate LLM spans with the user query and context:
+You can use [Prompt Tracking][6] annotations to track your prompts and set them up for hallucination configuration. Annotate your LLM spans with the user query and context so hallucination detection can evaluate model outputs against the retrieved data.
{{< code-block lang="python" >}}
from ddtrace.llmobs import LLMObs
-from ddtrace.llmobs.utils import Prompt
+from ddtrace.llmobs.types import Prompt
# if your llm call is auto-instrumented...
with LLMObs.annotation_context(
prompt=Prompt(
+ template="Generate an answer to this question :{user_question}. Only answer based on the information from this article : {article}"
variables={"user_question": user_question, "article": article},
rag_query_variables=["user_question"],
rag_context_variables=["article"]
@@ -195,18 +195,19 @@ def generate_answer():
...
LLMObs.annotate(
prompt=Prompt(
+ template="Generate an answer to this question :{user_question}. Only answer based on the information from this article : {article}"
variables={"user_question": user_question, "article": article},
rag_query_variables=["user_question"],
rag_context_variables=["article"]
),
)
{{< /code-block >}}
-
-The variables dictionary should contain the key-value pairs your app uses to construct the LLM input prompt (for example, the messages for an OpenAI chat completion request). Set `rag_query_variables` and `rag_context_variables` to indicate which variables constitute the query and the context, respectively. A list of variables is allowed to account for cases where multiple variables make up the context (for example, multiple articles retrieved from a knowledge base).
+The `variables` dictionary should contain the key–value pairs your app uses to construct the LLM input prompt (for example, the messages for an OpenAI chat completion request). Use `rag_query_variables` and `rag_context_variables` to specify which variables represent the user query and which represent the retrieval context. A list of variables is allowed to account for cases where multiple variables make up the context (for example, multiple articles retrieved from a knowledge base).
Hallucination detection does not run if either the rag query, the rag context, or the span output is empty.
+Prompt Tracking requires you to have the template set up to monitor and track your prompt versions.
-You can find more examples of instrumentation in the [SDK documentation][6].
+You can find more examples of prompt tracking and instrumentation in the [SDK documentation][6].
##### Hallucination configuration
Hallucination detection is only available for OpenAI.
@@ -337,7 +338,7 @@ This check ensures that sensitive information is handled appropriately and secur
[3]: https://app.datadoghq.com/llm/applications
[4]: /security/sensitive_data_scanner/
[5]: https://docs.datadoghq.com/api/latest/ip-ranges/
-[6]: https://docs.datadoghq.com/llm_observability/setup/sdk/
+[6]: https://docs.datadoghq.com/llm_observability/instrumentation/sdk?tab=python#prompt-tracking
[7]: https://app.datadoghq.com/dash/integration/llm_evaluations_token_usage
[9]: https://learnprompting.org/docs/prompt_hacking/offensive_measures/simple-instruction-attack
[10]: https://owasp.org/www-community/attacks/Code_Injection
From 70268a41b7761b09bf5e966ef3fbfe701a209803 Mon Sep 17 00:00:00 2001
From: Yahya Mouman <103438582+yahya-mouman@users.noreply.github.com>
Date: Fri, 21 Nov 2025 17:27:40 +0100
Subject: [PATCH 2/6] add version notice
---
.../evaluations/managed_evaluations/_index.md | 8 +++++---
1 file changed, 5 insertions(+), 3 deletions(-)
diff --git a/content/en/llm_observability/evaluations/managed_evaluations/_index.md b/content/en/llm_observability/evaluations/managed_evaluations/_index.md
index 92fc895ee0e..b030fdabb3e 100644
--- a/content/en/llm_observability/evaluations/managed_evaluations/_index.md
+++ b/content/en/llm_observability/evaluations/managed_evaluations/_index.md
@@ -180,7 +180,8 @@ from ddtrace.llmobs.types import Prompt
# if your llm call is auto-instrumented...
with LLMObs.annotation_context(
prompt=Prompt(
- template="Generate an answer to this question :{user_question}. Only answer based on the information from this article : {article}"
+ id="generate_answer_prompt",
+ template="Generate an answer to this question :{user_question}. Only answer based on the information from this article : {article}",
variables={"user_question": user_question, "article": article},
rag_query_variables=["user_question"],
rag_context_variables=["article"]
@@ -195,7 +196,8 @@ def generate_answer():
...
LLMObs.annotate(
prompt=Prompt(
- template="Generate an answer to this question :{user_question}. Only answer based on the information from this article : {article}"
+ id="generate_answer_prompt",
+ template="Generate an answer to this question :{user_question}. Only answer based on the information from this article : {article}",
variables={"user_question": user_question, "article": article},
rag_query_variables=["user_question"],
rag_context_variables=["article"]
@@ -205,8 +207,8 @@ def generate_answer():
The `variables` dictionary should contain the key–value pairs your app uses to construct the LLM input prompt (for example, the messages for an OpenAI chat completion request). Use `rag_query_variables` and `rag_context_variables` to specify which variables represent the user query and which represent the retrieval context. A list of variables is allowed to account for cases where multiple variables make up the context (for example, multiple articles retrieved from a knowledge base).
Hallucination detection does not run if either the rag query, the rag context, or the span output is empty.
-Prompt Tracking requires you to have the template set up to monitor and track your prompt versions.
+Prompt Tracking is available on python starting from the 3.15 version, It also requires an id for the prompt and the template set up to monitor and track your prompt versions.
You can find more examples of prompt tracking and instrumentation in the [SDK documentation][6].
##### Hallucination configuration
From 47fbbc2b8d8892de283f3deeddbc21a856a64195 Mon Sep 17 00:00:00 2001
From: Yahya Mouman <103438582+yahya-mouman@users.noreply.github.com>
Date: Fri, 21 Nov 2025 17:41:30 +0100
Subject: [PATCH 3/6] Update
content/en/llm_observability/evaluations/managed_evaluations/_index.md
Co-authored-by: Heston Hoffman
---
.../llm_observability/evaluations/managed_evaluations/_index.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/content/en/llm_observability/evaluations/managed_evaluations/_index.md b/content/en/llm_observability/evaluations/managed_evaluations/_index.md
index b030fdabb3e..d2feca7c12c 100644
--- a/content/en/llm_observability/evaluations/managed_evaluations/_index.md
+++ b/content/en/llm_observability/evaluations/managed_evaluations/_index.md
@@ -208,7 +208,7 @@ The `variables` dictionary should contain the key–value pairs your app uses to
Hallucination detection does not run if either the rag query, the rag context, or the span output is empty.
-Prompt Tracking is available on python starting from the 3.15 version, It also requires an id for the prompt and the template set up to monitor and track your prompt versions.
+Prompt Tracking is available on python starting from the 3.15 version, It also requires an ID for the prompt and the template set up to monitor and track your prompt versions.
You can find more examples of prompt tracking and instrumentation in the [SDK documentation][6].
##### Hallucination configuration
From 9a979e4db3dd79f9b7fb9c48efc3cd6835bc7c37 Mon Sep 17 00:00:00 2001
From: Yahya Mouman <103438582+yahya-mouman@users.noreply.github.com>
Date: Fri, 21 Nov 2025 17:41:46 +0100
Subject: [PATCH 4/6] Update
content/en/llm_observability/evaluations/managed_evaluations/_index.md
Co-authored-by: Heston Hoffman
---
.../llm_observability/evaluations/managed_evaluations/_index.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/content/en/llm_observability/evaluations/managed_evaluations/_index.md b/content/en/llm_observability/evaluations/managed_evaluations/_index.md
index d2feca7c12c..86d0520553a 100644
--- a/content/en/llm_observability/evaluations/managed_evaluations/_index.md
+++ b/content/en/llm_observability/evaluations/managed_evaluations/_index.md
@@ -204,7 +204,7 @@ def generate_answer():
),
)
{{< /code-block >}}
-The `variables` dictionary should contain the key–value pairs your app uses to construct the LLM input prompt (for example, the messages for an OpenAI chat completion request). Use `rag_query_variables` and `rag_context_variables` to specify which variables represent the user query and which represent the retrieval context. A list of variables is allowed to account for cases where multiple variables make up the context (for example, multiple articles retrieved from a knowledge base).
+The `variables` dictionary should contain the key-value pairs your app uses to construct the LLM input prompt (for example, the messages for an OpenAI chat completion request). Use `rag_query_variables` and `rag_context_variables` to specify which variables represent the user query and which represent the retrieval context. A list of variables is allowed to account for cases where multiple variables make up the context (for example, multiple articles retrieved from a knowledge base).
Hallucination detection does not run if either the rag query, the rag context, or the span output is empty.
From 56e753c293ff5430e0d9bba7167949a4510151a1 Mon Sep 17 00:00:00 2001
From: Yahya Mouman <103438582+yahya-mouman@users.noreply.github.com>
Date: Fri, 21 Nov 2025 17:42:07 +0100
Subject: [PATCH 5/6] Update
content/en/llm_observability/evaluations/managed_evaluations/_index.md
Co-authored-by: Heston Hoffman
---
.../llm_observability/evaluations/managed_evaluations/_index.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/content/en/llm_observability/evaluations/managed_evaluations/_index.md b/content/en/llm_observability/evaluations/managed_evaluations/_index.md
index 86d0520553a..5aed252aace 100644
--- a/content/en/llm_observability/evaluations/managed_evaluations/_index.md
+++ b/content/en/llm_observability/evaluations/managed_evaluations/_index.md
@@ -340,7 +340,7 @@ This check ensures that sensitive information is handled appropriately and secur
[3]: https://app.datadoghq.com/llm/applications
[4]: /security/sensitive_data_scanner/
[5]: https://docs.datadoghq.com/api/latest/ip-ranges/
-[6]: https://docs.datadoghq.com/llm_observability/instrumentation/sdk?tab=python#prompt-tracking
+[6]: /llm_observability/instrumentation/sdk?tab=python#prompt-tracking
[7]: https://app.datadoghq.com/dash/integration/llm_evaluations_token_usage
[9]: https://learnprompting.org/docs/prompt_hacking/offensive_measures/simple-instruction-attack
[10]: https://owasp.org/www-community/attacks/Code_Injection
From cee3b6f89f67f8b1c3e026085e499742f4891eaa Mon Sep 17 00:00:00 2001
From: Yahya Mouman <103438582+yahya-mouman@users.noreply.github.com>
Date: Fri, 21 Nov 2025 17:42:33 +0100
Subject: [PATCH 6/6] Update
content/en/llm_observability/evaluations/managed_evaluations/_index.md
Co-authored-by: Heston Hoffman
---
.../llm_observability/evaluations/managed_evaluations/_index.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/content/en/llm_observability/evaluations/managed_evaluations/_index.md b/content/en/llm_observability/evaluations/managed_evaluations/_index.md
index 5aed252aace..500a9aa32ab 100644
--- a/content/en/llm_observability/evaluations/managed_evaluations/_index.md
+++ b/content/en/llm_observability/evaluations/managed_evaluations/_index.md
@@ -339,7 +339,7 @@ This check ensures that sensitive information is handled appropriately and secur
[2]: https://app.datadoghq.com/llm/evaluations
[3]: https://app.datadoghq.com/llm/applications
[4]: /security/sensitive_data_scanner/
-[5]: https://docs.datadoghq.com/api/latest/ip-ranges/
+[5]: /api/latest/ip-ranges/
[6]: /llm_observability/instrumentation/sdk?tab=python#prompt-tracking
[7]: https://app.datadoghq.com/dash/integration/llm_evaluations_token_usage
[9]: https://learnprompting.org/docs/prompt_hacking/offensive_measures/simple-instruction-attack