DataDog · aniszoubiramar · Nov 20, 2025 · Nov 17, 2025 · Nov 19, 2025
@@ -1713,6 +1713,10 @@ def submit_evaluation(
                 error = "invalid_metric_label"
                 raise ValueError("label must be the specified name of the evaluation metric.")
 
+            if "." in label:
+                error = "invalid_label_value"
+                raise ValueError("label value must not contain a '.'.")
+
             metric_type = metric_type.lower()
             if metric_type not in ("categorical", "score", "boolean"):
                 error = "invalid_metric_type"

@@ -0,0 +1,4 @@
+fixes:
+  - |
+    LLM Observability: This fix resolves an issue where evaluation-metric labels containing dots could be interpreted as nested objects by adding validation that rejects such labels and provides a clear error message instructing users to use alternative naming conventions.
+
@@ -1630,6 +1630,13 @@ def test_submit_evaluation_empty_label_raises_error(llmobs, mock_llmobs_logs):
         )
 
 
+def test_submit_evaluation_label_value_with_a_period_raises_error(llmobs, mock_llmobs_logs):
+    with pytest.raises(ValueError, match="label value must not contain a '.'."):
+        llmobs.submit_evaluation(
+            span={"span_id": "123", "trace_id": "456"}, label="toxicity.0", metric_type="categorical", value="high"
+        )
+
+
 def test_submit_evaluation_incorrect_metric_type_raises_error(llmobs, mock_llmobs_logs):
     with pytest.raises(ValueError, match="metric_type must be one of 'categorical', 'score', or 'boolean'."):
         llmobs.submit_evaluation(