From f999e50760877d80e777a316c4e3af945c5d05f4 Mon Sep 17 00:00:00 2001
From: Diondra Peck <dipeck@microsoft.com>
Date: Mon, 28 Oct 2024 13:47:04 -0400
Subject: [PATCH 01/12] Update evaluate to allow optional params + add tests

---
 .../ai/evaluation/_evaluate/_evaluate.py      |  4 +-
 .../tests/e2etests/test_evaluate.py           | 47 +++++++++++++++++--
 2 files changed, 47 insertions(+), 4 deletions(-)

diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_evaluate.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_evaluate.py
index 689ce162122a..011a83759b45 100644
--- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_evaluate.py
+++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_evaluate.py
@@ -287,7 +287,9 @@ def _validate_columns_for_evaluators(
                 # Ignore the missing fields if "conversation" presents in the input data
                 missing_inputs = []
             else:
-                missing_inputs = [col for col in evaluator_params if col not in new_df.columns]
+                optional_params = evaluator._OPTIONAL_PARAMS if hasattr(evaluator, "_OPTIONAL_PARAMS") else []
+                excluded_params = set(new_df.columns).union(optional_params)
+                missing_inputs = [col for col in evaluator_params if col not in excluded_params]
 
                 # If "conversation" is the only parameter and it is missing, keep it in the missing inputs
                 # Otherwise, remove it from the missing inputs
diff --git a/sdk/evaluation/azure-ai-evaluation/tests/e2etests/test_evaluate.py b/sdk/evaluation/azure-ai-evaluation/tests/e2etests/test_evaluate.py
index 948660387773..9a4e3116f380 100644
--- a/sdk/evaluation/azure-ai-evaluation/tests/e2etests/test_evaluate.py
+++ b/sdk/evaluation/azure-ai-evaluation/tests/e2etests/test_evaluate.py
@@ -26,6 +26,12 @@ def data_file():
     return os.path.join(data_path, "evaluate_test_data.jsonl")
 
 
+@pytest.fixture
+def data_file_no_query():
+    data_path = os.path.join(pathlib.Path(__file__).parent.resolve(), "data")
+    return os.path.join(data_path, "evaluate_test_data_no_query.jsonl")
+
+
 @pytest.fixture
 def data_convo_file():
     data_path = os.path.join(pathlib.Path(__file__).parent.resolve(), "data")
@@ -98,8 +104,7 @@ def _get_run_from_run_history(flow_run_id, ml_client, project_scope):
 @pytest.mark.usefixtures("recording_injection", "recorded_test")
 @pytest.mark.localtest
 class TestEvaluate:
-    @pytest.mark.skip(reason="Temporary skip to merge 37201, will re-enable in subsequent pr")
-    def test_evaluate_with_groundedness_evaluator(self, model_config, data_file):
+    def test_evaluate_with_groundedness_evaluator_with_query(self, model_config, data_file):
         # data
         input_data = pd.read_json(data_file, lines=True)
 
@@ -131,7 +136,43 @@ def test_evaluate_with_groundedness_evaluator(self, model_config, data_file):
         )
         assert metrics.get("f1_score.f1_score") == list_mean_nan_safe(row_result_df["outputs.f1_score.f1_score"])
 
-        assert row_result_df["outputs.grounded.groundedness"][2] in [4, 5]
+        assert row_result_df["outputs.grounded.groundedness"][2] in [3, 4, 5]
+        assert row_result_df["outputs.f1_score.f1_score"][2] == 1
+        assert result["studio_url"] is None
+
+    def test_evaluate_with_groundedness_evaluator_without_query(self, model_config, data_file_no_query):
+        # data
+        input_data = pd.read_json(data_file_no_query, lines=True)
+
+        groundedness_eval = GroundednessEvaluator(model_config)
+        f1_score_eval = F1ScoreEvaluator()
+
+        # run the evaluation
+        result = evaluate(
+            data=data_file_no_query,
+            evaluators={"grounded": groundedness_eval, "f1_score": f1_score_eval},
+        )
+
+        row_result_df = pd.DataFrame(result["rows"])
+        metrics = result["metrics"]
+
+        # validate the results
+        assert result is not None
+        assert result["rows"] is not None
+        assert row_result_df.shape[0] == len(input_data)
+
+        assert "outputs.grounded.groundedness" in row_result_df.columns.to_list()
+        assert "outputs.f1_score.f1_score" in row_result_df.columns.to_list()
+
+        assert "grounded.groundedness" in metrics.keys()
+        assert "f1_score.f1_score" in metrics.keys()
+
+        assert metrics.get("grounded.groundedness") == list_mean_nan_safe(
+            row_result_df["outputs.grounded.groundedness"]
+        )
+        assert metrics.get("f1_score.f1_score") == list_mean_nan_safe(row_result_df["outputs.f1_score.f1_score"])
+
+        assert row_result_df["outputs.grounded.groundedness"][2] in [3, 4, 5]
         assert row_result_df["outputs.f1_score.f1_score"][2] == 1
         assert result["studio_url"] is None
 

From aa3118d0e0185c4e2be0aa31e5a8d1dfbb711257 Mon Sep 17 00:00:00 2001
From: Diondra Peck <dipeck@microsoft.com>
Date: Mon, 28 Oct 2024 13:49:26 -0400
Subject: [PATCH 02/12] Record tests

---
 sdk/evaluation/azure-ai-evaluation/assets.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sdk/evaluation/azure-ai-evaluation/assets.json b/sdk/evaluation/azure-ai-evaluation/assets.json
index 7144de427f88..99fe29491313 100644
--- a/sdk/evaluation/azure-ai-evaluation/assets.json
+++ b/sdk/evaluation/azure-ai-evaluation/assets.json
@@ -2,5 +2,5 @@
   "AssetsRepo": "Azure/azure-sdk-assets",
   "AssetsRepoPrefixPath": "python",
   "TagPrefix": "python/evaluation/azure-ai-evaluation",
-  "Tag": "python/evaluation/azure-ai-evaluation_f0444ef220"
+  "Tag": "python/evaluation/azure-ai-evaluation_2dcae6ba0e"
 }

From 4aca60175c5223b74612af4c59a9a8d5abf361ae Mon Sep 17 00:00:00 2001
From: Diondra Peck <dipeck@microsoft.com>
Date: Fri, 1 Nov 2024 16:15:45 -0400
Subject: [PATCH 03/12] Exclude optional params from 'missing inputs' warning

---
 .../azure/ai/evaluation/_evaluate/_evaluate.py                | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_evaluate.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_evaluate.py
index 448979c71cee..37d443647c89 100644
--- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_evaluate.py
+++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_evaluate.py
@@ -287,7 +287,9 @@ def _validate_columns_for_evaluators(
                 # Ignore the missing fields if "conversation" presents in the input data
                 missing_inputs = []
             else:
-                optional_params = evaluator._OPTIONAL_PARAMS if hasattr(evaluator, "_OPTIONAL_PARAMS") else []
+                optional_params = (
+                    evaluator._OPTIONAL_PARAMS if hasattr(evaluator, "_OPTIONAL_PARAMS") else []  # pylint: disable=protected-access
+                )
                 excluded_params = set(new_df.columns).union(optional_params)
                 missing_inputs = [col for col in evaluator_params if col not in excluded_params]
 

From 15efb4db5c8d0258549e0a32e574b0c6473d2756 Mon Sep 17 00:00:00 2001
From: Diondra Peck <dipeck@microsoft.com>
Date: Fri, 1 Nov 2024 16:52:53 -0400
Subject: [PATCH 04/12] Add tests

---
 .../azure-ai-evaluation/tests/conftest.py     |   9 ++
 .../tests/e2etests/test_evaluate.py           | 133 +++++++++++++-----
 2 files changed, 104 insertions(+), 38 deletions(-)

diff --git a/sdk/evaluation/azure-ai-evaluation/tests/conftest.py b/sdk/evaluation/azure-ai-evaluation/tests/conftest.py
index 4c27d1cf0c90..f4dfe7ee6a38 100644
--- a/sdk/evaluation/azure-ai-evaluation/tests/conftest.py
+++ b/sdk/evaluation/azure-ai-evaluation/tests/conftest.py
@@ -142,11 +142,20 @@ def live_connection_file_values():
         add_general_regex_sanitizer(regex=project_scope["project_name"], value=SanitizedValues.WORKSPACE_NAME)
         add_general_regex_sanitizer(regex=model_config["azure_endpoint"], value=mock_model_config["azure_endpoint"])
 
+    def promptflow_root_run_id_sanitizer():
+        """Sanitize the promptflow service isolation values."""
+        add_general_regex_sanitizer(
+            value="root_run_id",
+            regex=r'"root_run_id": "azure_ai_evaluation_evaluators_common_base_eval_asyncevaluatorbase_[^"]+"',
+            replacement='"root_run_id": "azure_ai_evaluation_evaluators_common_base_eval_asyncevaluatorbase_SANITIZED"'
+        )
+
     azure_workspace_triad_sanitizer()
     azureopenai_connection_sanitizer()
     openai_stainless_default_headers()
     azure_ai_generative_sanitizer()
     live_connection_file_values()
+    promptflow_root_run_id_sanitizer()
 
 
 @pytest.fixture
diff --git a/sdk/evaluation/azure-ai-evaluation/tests/e2etests/test_evaluate.py b/sdk/evaluation/azure-ai-evaluation/tests/e2etests/test_evaluate.py
index d2160a21d5db..bd3e76ef266b 100644
--- a/sdk/evaluation/azure-ai-evaluation/tests/e2etests/test_evaluate.py
+++ b/sdk/evaluation/azure-ai-evaluation/tests/e2etests/test_evaluate.py
@@ -119,7 +119,8 @@ def _get_run_from_run_history(flow_run_id, ml_client, project_scope):
 @pytest.mark.usefixtures("recording_injection", "recorded_test")
 @pytest.mark.localtest
 class TestEvaluate:
-    def test_evaluate_with_groundedness_evaluator_with_query(self, model_config, data_file):
+    @pytest.mark.skip(reason="Temporary skip to merge 37201, will re-enable in subsequent pr")
+    def test_evaluate_with_groundedness_evaluator(self, model_config, data_file):
         # data
         input_data = pd.read_json(data_file, lines=True)
 
@@ -151,43 +152,7 @@ def test_evaluate_with_groundedness_evaluator_with_query(self, model_config, dat
         )
         assert metrics.get("f1_score.f1_score") == list_mean_nan_safe(row_result_df["outputs.f1_score.f1_score"])
 
-        assert row_result_df["outputs.grounded.groundedness"][2] in [3, 4, 5]
-        assert row_result_df["outputs.f1_score.f1_score"][2] == 1
-        assert result["studio_url"] is None
-
-    def test_evaluate_with_groundedness_evaluator_without_query(self, model_config, data_file_no_query):
-        # data
-        input_data = pd.read_json(data_file_no_query, lines=True)
-
-        groundedness_eval = GroundednessEvaluator(model_config)
-        f1_score_eval = F1ScoreEvaluator()
-
-        # run the evaluation
-        result = evaluate(
-            data=data_file_no_query,
-            evaluators={"grounded": groundedness_eval, "f1_score": f1_score_eval},
-        )
-
-        row_result_df = pd.DataFrame(result["rows"])
-        metrics = result["metrics"]
-
-        # validate the results
-        assert result is not None
-        assert result["rows"] is not None
-        assert row_result_df.shape[0] == len(input_data)
-
-        assert "outputs.grounded.groundedness" in row_result_df.columns.to_list()
-        assert "outputs.f1_score.f1_score" in row_result_df.columns.to_list()
-
-        assert "grounded.groundedness" in metrics.keys()
-        assert "f1_score.f1_score" in metrics.keys()
-
-        assert metrics.get("grounded.groundedness") == list_mean_nan_safe(
-            row_result_df["outputs.grounded.groundedness"]
-        )
-        assert metrics.get("f1_score.f1_score") == list_mean_nan_safe(row_result_df["outputs.f1_score.f1_score"])
-
-        assert row_result_df["outputs.grounded.groundedness"][2] in [3, 4, 5]
+        assert row_result_df["outputs.grounded.groundedness"][2] in [4, 5]
         assert row_result_df["outputs.f1_score.f1_score"][2] == 1
         assert result["studio_url"] is None
 
@@ -766,3 +731,95 @@ def test_evaluate_aggregation(self, data_file, return_json, aggregate_return_jso
     @pytest.mark.skip(reason="TODO: Add test back")
     def test_prompty_with_threadpool_implementation(self):
         pass
+
+    def test_evaluate_with_groundedness_evaluator_with_query(self, model_config, data_file):
+        # data
+        input_data = pd.read_json(data_file, lines=True)
+
+        groundedness_eval = GroundednessEvaluator(model_config)
+
+        # run the evaluation
+        result = evaluate(
+            data=data_file,
+            evaluators={"grounded": groundedness_eval},
+        )
+
+        row_result_df = pd.DataFrame(result["rows"])
+        metrics = result["metrics"]
+
+        print(result["rows"])
+        print(metrics)
+
+
+        # validate the results
+        assert result is not None
+        assert result["rows"] is not None
+        assert row_result_df.shape[0] == len(input_data)
+        assert "outputs.grounded.groundedness" in row_result_df.columns.to_list()
+        assert "grounded.groundedness" in metrics.keys()
+        assert metrics.get("grounded.groundedness") == list_mean_nan_safe(
+            row_result_df["outputs.grounded.groundedness"]
+        )
+        assert row_result_df["outputs.grounded.groundedness"][2] in [3, 4, 5]
+        assert result["studio_url"] is None
+
+    def test_evaluate_with_groundedness_evaluator_without_query(self, model_config, data_file_no_query):
+        # data
+        input_data = pd.read_json(data_file_no_query, lines=True)
+
+        groundedness_eval = GroundednessEvaluator(model_config)
+
+        # run the evaluation
+        result = evaluate(
+            data=data_file_no_query,
+            evaluators={"grounded": groundedness_eval},
+        )
+
+        row_result_df = pd.DataFrame(result["rows"])
+        metrics = result["metrics"]
+
+        # validate the results
+        assert result is not None
+        assert result["rows"] is not None
+        assert row_result_df.shape[0] == len(input_data)
+
+        assert "outputs.grounded.groundedness" in row_result_df.columns.to_list()
+
+        assert "grounded.groundedness" in metrics.keys()
+
+        assert metrics.get("grounded.groundedness") == list_mean_nan_safe(
+            row_result_df["outputs.grounded.groundedness"]
+        )
+
+        assert row_result_df["outputs.grounded.groundedness"][2] in [3, 4, 5]
+        assert result["studio_url"] is None
+
+    def test_evaluate_with_groundedness_evaluator_with_convo(self, model_config, data_convo_file):
+        # data
+        input_data = pd.read_json(data_convo_file, lines=True)
+
+        groundedness_eval = GroundednessEvaluator(model_config)
+
+        # run the evaluation
+        result = evaluate(
+            data=data_convo_file,
+            evaluators={"grounded": groundedness_eval},
+        )
+
+        row_result_df = pd.DataFrame(result["rows"])
+        metrics = result["metrics"]
+
+        # validate the results
+        assert result is not None
+        assert result["rows"] is not None
+        assert row_result_df.shape[0] == len(input_data)
+
+        assert "outputs.grounded.groundedness" in row_result_df.columns.to_list()
+
+        assert "grounded.groundedness" in metrics.keys()
+
+        assert metrics.get("grounded.groundedness") == list_mean_nan_safe(
+            row_result_df["outputs.grounded.groundedness"]
+        )
+        assert row_result_df["outputs.grounded.groundedness"][1] in [3, 4, 5]
+        assert result["studio_url"] is None

From 3ee2f9c01e30d07f63fb41680aaa76f30561f3f7 Mon Sep 17 00:00:00 2001
From: Diondra Peck <dipeck@microsoft.com>
Date: Fri, 1 Nov 2024 17:00:13 -0400
Subject: [PATCH 05/12] Record tests

---
 sdk/evaluation/azure-ai-evaluation/assets.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sdk/evaluation/azure-ai-evaluation/assets.json b/sdk/evaluation/azure-ai-evaluation/assets.json
index 356e246839ab..1ea0a49b66cd 100644
--- a/sdk/evaluation/azure-ai-evaluation/assets.json
+++ b/sdk/evaluation/azure-ai-evaluation/assets.json
@@ -2,5 +2,5 @@
   "AssetsRepo": "Azure/azure-sdk-assets",
   "AssetsRepoPrefixPath": "python",
   "TagPrefix": "python/evaluation/azure-ai-evaluation",
-  "Tag": "python/evaluation/azure-ai-evaluation_043418c052"
+  "Tag": "python/evaluation/azure-ai-evaluation_5ec60aae8a"
 }

From 3e7f25a7a889138de67ba61cefd4d12f7c7251a4 Mon Sep 17 00:00:00 2001
From: Diondra Peck <dipeck@microsoft.com>
Date: Fri, 1 Nov 2024 17:34:14 -0400
Subject: [PATCH 06/12] Fix linting errors

---
 .../azure/ai/evaluation/_evaluate/_evaluate.py                | 4 +++-
 sdk/evaluation/azure-ai-evaluation/tests/conftest.py          | 2 +-
 .../azure-ai-evaluation/tests/e2etests/test_evaluate.py       | 4 ----
 3 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_evaluate.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_evaluate.py
index 37d443647c89..f879def1501a 100644
--- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_evaluate.py
+++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_evaluate.py
@@ -288,7 +288,9 @@ def _validate_columns_for_evaluators(
                 missing_inputs = []
             else:
                 optional_params = (
-                    evaluator._OPTIONAL_PARAMS if hasattr(evaluator, "_OPTIONAL_PARAMS") else []  # pylint: disable=protected-access
+                    evaluator._OPTIONAL_PARAMS  # pylint: disable=protected-access
+                    if hasattr(evaluator, "_OPTIONAL_PARAMS")
+                    else []
                 )
                 excluded_params = set(new_df.columns).union(optional_params)
                 missing_inputs = [col for col in evaluator_params if col not in excluded_params]
diff --git a/sdk/evaluation/azure-ai-evaluation/tests/conftest.py b/sdk/evaluation/azure-ai-evaluation/tests/conftest.py
index f4dfe7ee6a38..9302f64c161c 100644
--- a/sdk/evaluation/azure-ai-evaluation/tests/conftest.py
+++ b/sdk/evaluation/azure-ai-evaluation/tests/conftest.py
@@ -147,7 +147,7 @@ def promptflow_root_run_id_sanitizer():
         add_general_regex_sanitizer(
             value="root_run_id",
             regex=r'"root_run_id": "azure_ai_evaluation_evaluators_common_base_eval_asyncevaluatorbase_[^"]+"',
-            replacement='"root_run_id": "azure_ai_evaluation_evaluators_common_base_eval_asyncevaluatorbase_SANITIZED"'
+            replacement='"root_run_id": "azure_ai_evaluation_evaluators_common_base_eval_asyncevaluatorbase_SANITIZED"',
         )
 
     azure_workspace_triad_sanitizer()
diff --git a/sdk/evaluation/azure-ai-evaluation/tests/e2etests/test_evaluate.py b/sdk/evaluation/azure-ai-evaluation/tests/e2etests/test_evaluate.py
index bd3e76ef266b..b69c043e27c1 100644
--- a/sdk/evaluation/azure-ai-evaluation/tests/e2etests/test_evaluate.py
+++ b/sdk/evaluation/azure-ai-evaluation/tests/e2etests/test_evaluate.py
@@ -747,10 +747,6 @@ def test_evaluate_with_groundedness_evaluator_with_query(self, model_config, dat
         row_result_df = pd.DataFrame(result["rows"])
         metrics = result["metrics"]
 
-        print(result["rows"])
-        print(metrics)
-
-
         # validate the results
         assert result is not None
         assert result["rows"] is not None

From 9f75ed28da05ffc4a1817197ec46173bb3adc756 Mon Sep 17 00:00:00 2001
From: Diondra Peck <dipeck@microsoft.com>
Date: Fri, 1 Nov 2024 18:07:12 -0400
Subject: [PATCH 07/12] Merge from main and fix linting error

---
 .../azure/ai/evaluation/simulator/_direct_attack_simulator.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_direct_attack_simulator.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_direct_attack_simulator.py
index 7278ff3be2de..9da9d930acc4 100644
--- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_direct_attack_simulator.py
+++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_direct_attack_simulator.py
@@ -179,9 +179,7 @@ async def __call__(
         if not randomization_seed:
             randomization_seed = randint(0, 1000000)
 
-        regular_sim = AdversarialSimulator(
-            azure_ai_project=self.azure_ai_project, credential=self.credential
-        )
+        regular_sim = AdversarialSimulator(azure_ai_project=self.azure_ai_project, credential=self.credential)
         regular_sim_results = await regular_sim(
             scenario=scenario,
             target=target,

From 418c34bca848b0ab4d5f1eedfe96f78cbe5c4ead Mon Sep 17 00:00:00 2001
From: Diondra Peck <dipeck@microsoft.com>
Date: Fri, 1 Nov 2024 19:09:09 -0400
Subject: [PATCH 08/12] Add unit test for missing output

---
 .../tests/unittests/test_evaluate.py                   | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_evaluate.py b/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_evaluate.py
index 9e26bf9a992b..24491fec5bfd 100644
--- a/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_evaluate.py
+++ b/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_evaluate.py
@@ -648,3 +648,13 @@ def test_optional_inputs_with_target(self, questions_file, questions_answers_bas
         )  # type: ignore
         assert double_override_results["rows"][0]["outputs.echo.echo_query"] == "new query"
         assert double_override_results["rows"][0]["outputs.echo.echo_response"] == "new response"
+
+    def test_missing_inputs(self, questions_file):
+        """Test we are raising exception if required input is missing in data."""
+        with pytest.raises(EvaluationException) as cm:
+            evaluate(
+                data=questions_file,
+                target=_target_fn,
+                evaluators={"f1": F1ScoreEvaluator()},
+            )
+        assert "Some evaluators are missing required inputs:\n- f1: ['ground_truth']\n\n" in cm.value.args[0]
\ No newline at end of file

From 5cef42d4bb4b24533e8e367ddeec5cb5eb3b1517 Mon Sep 17 00:00:00 2001
From: Diondra Peck <dipeck@microsoft.com>
Date: Fri, 1 Nov 2024 19:27:18 -0400
Subject: [PATCH 09/12] Update convo test to assert per turn results

---
 .../azure-ai-evaluation/tests/e2etests/test_evaluate.py      | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/sdk/evaluation/azure-ai-evaluation/tests/e2etests/test_evaluate.py b/sdk/evaluation/azure-ai-evaluation/tests/e2etests/test_evaluate.py
index b69c043e27c1..625cf021e9d3 100644
--- a/sdk/evaluation/azure-ai-evaluation/tests/e2etests/test_evaluate.py
+++ b/sdk/evaluation/azure-ai-evaluation/tests/e2etests/test_evaluate.py
@@ -811,11 +811,12 @@ def test_evaluate_with_groundedness_evaluator_with_convo(self, model_config, dat
         assert row_result_df.shape[0] == len(input_data)
 
         assert "outputs.grounded.groundedness" in row_result_df.columns.to_list()
-
+        assert "outputs.grounded.evaluation_per_turn" in row_result_df.columns.to_list()
         assert "grounded.groundedness" in metrics.keys()
-
         assert metrics.get("grounded.groundedness") == list_mean_nan_safe(
             row_result_df["outputs.grounded.groundedness"]
         )
         assert row_result_df["outputs.grounded.groundedness"][1] in [3, 4, 5]
+        assert row_result_df["outputs.grounded.evaluation_per_turn"][0]["groundedness"][0] in [3.0, 4.0, 5.0]
+        assert row_result_df["outputs.grounded.evaluation_per_turn"][0]["groundedness_reason"][0] is not None
         assert result["studio_url"] is None

From 59506508395218f4537f76218a0398d93969b871 Mon Sep 17 00:00:00 2001
From: Diondra Peck <dipeck@microsoft.com>
Date: Fri, 1 Nov 2024 19:28:01 -0400
Subject: [PATCH 10/12] Run linting

---
 .../azure-ai-evaluation/tests/unittests/test_evaluate.py        | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_evaluate.py b/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_evaluate.py
index 24491fec5bfd..f8e65f92a3d7 100644
--- a/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_evaluate.py
+++ b/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_evaluate.py
@@ -657,4 +657,4 @@ def test_missing_inputs(self, questions_file):
                 target=_target_fn,
                 evaluators={"f1": F1ScoreEvaluator()},
             )
-        assert "Some evaluators are missing required inputs:\n- f1: ['ground_truth']\n\n" in cm.value.args[0]
\ No newline at end of file
+        assert "Some evaluators are missing required inputs:\n- f1: ['ground_truth']\n\n" in cm.value.args[0]

From 262443e49e5c30073bfee53469ece74edba8512b Mon Sep 17 00:00:00 2001
From: Diondra Peck <dipeck@microsoft.com>
Date: Fri, 1 Nov 2024 19:55:38 -0400
Subject: [PATCH 11/12] Add test data

---
 .../tests/e2etests/data/evaluate_test_data_no_query.jsonl      | 3 +++
 1 file changed, 3 insertions(+)
 create mode 100644 sdk/evaluation/azure-ai-evaluation/tests/e2etests/data/evaluate_test_data_no_query.jsonl

diff --git a/sdk/evaluation/azure-ai-evaluation/tests/e2etests/data/evaluate_test_data_no_query.jsonl b/sdk/evaluation/azure-ai-evaluation/tests/e2etests/data/evaluate_test_data_no_query.jsonl
new file mode 100644
index 000000000000..e807a941f640
--- /dev/null
+++ b/sdk/evaluation/azure-ai-evaluation/tests/e2etests/data/evaluate_test_data_no_query.jsonl
@@ -0,0 +1,3 @@
+{"context":"AML API only","response":"To create a run using the Azure Machine Learning API, you first need to create an Experiment. Once you have an experiment, you can create a Run object that is associated with that experiment. Here is some Python code that demonstrates this process:\n\n```\nfrom azureml.core import Experiment, Run\nfrom azureml.core.workspace import Workspace\n\n# Define workspace and experiment\nws = Workspace.from_config()\nexp = Experiment(workspace=ws, name='my_experiment')\n\n# Create a new run\nrun = exp.start_logging()\n```\n\nIn this code, the `from_config()` method reads the configuration file that you created when you set up your Azure Machine Learning workspace. The `Experiment` constructor creates an Experiment object that is associated with your workspace, and the `start_logging()` method creates a new Run object that is associated with the Experiment. Now you can use the `run` object to log metrics, upload files, and track other information related to your machine learning experiment.", "ground_truth":"Paris is the capital of France."}
+{"context":"Logging can be done using any OSS Sdk","response":"There are a few ways to log models in Azure Machine Learning. \n\nOne way is to use the `register_model()` method of the `Run` object. The `register_model()` method logs a model file in the Azure Machine Learning service workspace and makes it available for deployment. Here's an example:\n\n```python\nfrom azureml.core import Model\n\nmodel_path = '.\/outputs\/my_model.pkl'\nmodel = Model.register(workspace=ws, model_path=model_path, model_name='my_model')\n```\n\nThis code registers the model file located at `model_path` to the Azure Machine Learning service workspace with the name `my_model`. \n\nAnother way to log a model is to save it as an output of a `Run`. If your model generation code is part of a script or Jupyter notebook that runs as an Azure Machine Learning experiment, you can save the model file as an output of the `Run` object. Here's an example:\n\n```python\nfrom sklearn.linear_model import LogisticRegression\nfrom azureml.core.run import Run\n\n# Initialize a run object\nrun = Run.get_context()\n\n# Train your model\nX_train, y_train = ...\nclf = LogisticRegression().fit(X_train, y_train)\n\n# Save the model to the Run object's outputs directory\nmodel_path = 'outputs\/model.pkl'\njoblib.dump(value=clf, filename=model_path)\n\n# Log the model as a run artifact\nrun.upload_file(name=model_path, path_or_stream=model_path)\n```\n\nIn this code, `Run.get_context()` retrieves the current run context object, which you can use to track metadata and metrics for the run. After training your model, you can use `joblib.dump()` to save the model to a file, and then log the file as an artifact of the run using `run.upload_file()`.","ground_truth":"Paris is the capital of France."}
+{"context":"France is in Europe","response":"Paris is the capital of France.", "ground_truth":"Paris is the capital of France."}
\ No newline at end of file

From d39139599bfe28b9d923e9f2e204d61b0fd27f42 Mon Sep 17 00:00:00 2001
From: Diondra Peck <dipeck@microsoft.com>
Date: Fri, 1 Nov 2024 20:03:13 -0400
Subject: [PATCH 12/12] Re-record test

---
 sdk/evaluation/azure-ai-evaluation/assets.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sdk/evaluation/azure-ai-evaluation/assets.json b/sdk/evaluation/azure-ai-evaluation/assets.json
index 1ea0a49b66cd..1e52dc68cc08 100644
--- a/sdk/evaluation/azure-ai-evaluation/assets.json
+++ b/sdk/evaluation/azure-ai-evaluation/assets.json
@@ -2,5 +2,5 @@
   "AssetsRepo": "Azure/azure-sdk-assets",
   "AssetsRepoPrefixPath": "python",
   "TagPrefix": "python/evaluation/azure-ai-evaluation",
-  "Tag": "python/evaluation/azure-ai-evaluation_5ec60aae8a"
+  "Tag": "python/evaluation/azure-ai-evaluation_f6269c684c"
 }