diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_common/_base_eval.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_common/_base_eval.py index 4690a1522e73..155911e73135 100644 --- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_common/_base_eval.py +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_common/_base_eval.py @@ -295,7 +295,7 @@ def _convert_kwargs_to_eval_input(self, **kwargs) -> Union[List[Dict], List[Deri ) # Handle Conversation if conversation is not None: - if self.is_multi_modal_conversation(conversation): + if self._is_multi_modal_conversation(conversation): return self._derive_multi_modal_conversation_converter()(conversation) return self._derive_conversation_converter()(conversation) # Handle Singletons @@ -311,7 +311,7 @@ def _convert_kwargs_to_eval_input(self, **kwargs) -> Union[List[Dict], List[Deri target=ErrorTarget.CONVERSATION, ) - def is_multi_modal_conversation(self, conversation: Dict) -> bool: + def _is_multi_modal_conversation(self, conversation: Dict) -> bool: if "messages" not in conversation: return False messages = conversation["messages"] diff --git a/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_evaluate_performance.py b/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_evaluate_performance.py index 678ebe0b6b60..f610b6242559 100644 --- a/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_evaluate_performance.py +++ b/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_evaluate_performance.py @@ -48,7 +48,7 @@ def test_bulk_evaluate(self, big_f1_data_file, use_pf_client): max_duration = 2 # This test runs unreasonably slow in CI for some reason. Allow 20 seconds extra. if in_ci(): - max_duration += 20 + max_duration += 25 if use_pf_client: # PF client doesn't seem to parallelize, and takes about a second or 2 to start max_duration += 6.5 assert diff < max_duration @@ -76,7 +76,7 @@ def test_evaluate_parallelism(self, ten_queries_file, use_pf_client): # 2 batches at most, so it should take between 1 and 1.5 seconds. max_duration = 1.5 if use_pf_client: # PF client doesn't seem to parallelize, and takes about a second to start. - max_duration += 5 + max_duration += 7.5 assert diff < max_duration row_result_df = pd.DataFrame(result["rows"]) assert "outputs.slow.result" in row_result_df.columns