Evaluate Base Model Endpoints giving errors

### Operating System

MacOS

### Version Information

Python Version: 3.12.5
azure-ai-evaluation package version: 1.1.0
promptflow-tracing package version: 1.16.2
promptflow-core package version: 1.16.2

### Steps to reproduce

1. Run the `Evaluate_Base_Model_Endpoint.ipynb` notebook.
2. Run the cell with the loop over the models, invoking the evaluation API:
```
for model in models:
    randomNum = random.randint(1111, 9999)
    results = evaluate(
        evaluation_name="Eval-Run-" + str(randomNum) + "-" + model.title(),
        data=path,
        target=ModelEndpoints(env_var, model),
        evaluators={
            "relevance": relevance_evaluator,
        },
        evaluator_config={
            "relevance": {
                "column_mapping": {
                    "response": "${target.response}",
                    "context": "${data.context}",
                    "query": "${data.query}",
                },
            },
        },
    )
```

### Expected behavior

Evaluation for each model completes successfully. 

### Actual behavior

Getting the following error:
```
EvaluationException: (InternalError) Generate meta failed, detail error:
["Failed to collect flow entry '__main__:ModelEndpoints' in module '<module>'."]
```

### Addition information

Full stack trace:

```
---------------------------------------------------------------------------
GenerateFlowMetaJsonError                 Traceback (most recent call last)
File ~/Dev/github/azureai-samples/scenarios/evaluate/.venv/lib/python3.12/site-packages/azure/ai/evaluation/_evaluate/_evaluate.py:610, in evaluate(data, evaluators, evaluation_name, target, evaluator_config, azure_ai_project, output_path, **kwargs)
    609 try:
--> 610     return _evaluate(
    611         evaluation_name=evaluation_name,
    612         target=target,
    613         data=data,
    614         evaluators=evaluators,
    615         evaluator_config=evaluator_config,
    616         azure_ai_project=azure_ai_project,
    617         output_path=output_path,
    618         **kwargs,
    619     )
    620 except Exception as e:
    621     # Handle multiprocess bootstrap error

File ~/Dev/github/azureai-samples/scenarios/evaluate/.venv/lib/python3.12/site-packages/azure/ai/evaluation/_evaluate/_evaluate.py:704, in _evaluate(evaluators, evaluation_name, target, data, evaluator_config, azure_ai_project, output_path, **kwargs)
    703 if data is not None and target is not None:
--> 704     input_data_df, target_generated_columns, target_run = _apply_target_to_data(
    705         target, data, pf_client, input_data_df, evaluation_name, **kwargs
    706     )
    708     for evaluator_name, mapping in column_mapping.items():

File ~/Dev/github/azureai-samples/scenarios/evaluate/.venv/lib/python3.12/site-packages/azure/ai/evaluation/_evaluate/_evaluate.py:470, in _apply_target_to_data(target, data, pf_client, initial_data, evaluation_name, **kwargs)
    469 with TargetRunContext():
--> 470     run: Run = pf_client.run(
    471         flow=target,
    472         display_name=evaluation_name,
    473         data=data,
    474         stream=True,
    475         name=_run_name,
    476     )
    478 target_output: pd.DataFrame = pf_client.runs.get_details(run, all_results=True)

File ~/Dev/github/azureai-samples/scenarios/evaluate/.venv/lib/python3.12/site-packages/promptflow/_sdk/_pf_client.py:301, in PFClient.run(self, flow, data, run, column_mapping, variant, connections, environment_variables, name, display_name, tags, resume_from, code, init, **kwargs)
    246 """Run flow against provided data or run.
    247 
    248 .. note::
   (...)
    299 :rtype: ~promptflow.entities.Run
    300 """
--> 301 return self._run(
    302     flow=flow,
    303     data=data,
    304     run=run,
    305     column_mapping=column_mapping,
    306     variant=variant,
    307     connections=connections,
    308     environment_variables=environment_variables,
    309     name=name,
    310     display_name=display_name,
    311     tags=tags,
    312     resume_from=resume_from,
    313     code=code,
    314     init=init,
    315     **kwargs,
    316 )

File ~/Dev/github/azureai-samples/scenarios/evaluate/.venv/lib/python3.12/site-packages/promptflow/_sdk/_pf_client.py:226, in PFClient._run(self, flow, data, run, column_mapping, variant, connections, environment_variables, properties, name, display_name, tags, resume_from, code, init, **kwargs)
    210 run = Run(
    211     name=name,
    212     display_name=display_name,
   (...)
    224     dynamic_callable=dynamic_callable,
    225 )
--> 226 return self.runs.create_or_update(run=run, **kwargs)

File ~/Dev/github/azureai-samples/scenarios/evaluate/.venv/lib/python3.12/site-packages/promptflow/_sdk/_telemetry/activity.py:265, in monitor_operation.<locals>.monitor.<locals>.wrapper(self, *args, **kwargs)
    264     thread.start()
--> 265 return f(self, *args, **kwargs)

File ~/Dev/github/azureai-samples/scenarios/evaluate/.venv/lib/python3.12/site-packages/promptflow/_sdk/operations/_run_operations.py:135, in RunOperations.create_or_update(self, run, **kwargs)
    133 from promptflow._sdk._orchestrator import RunSubmitter
--> 135 created_run = RunSubmitter(client=self._client).submit(run=run, **kwargs)
    136 if stream:

File ~/Dev/github/azureai-samples/scenarios/evaluate/.venv/lib/python3.12/site-packages/promptflow/_sdk/_orchestrator/run_submitter.py:52, in RunSubmitter.submit(self, run, stream, **kwargs)
     51     wait(tasks, return_when=ALL_COMPLETED)
---> 52     task_results = [task.result() for task in tasks]
     54 # upload run to cloud if the trace destination is set to cloud

File ~/.pyenv/versions/3.12.5/lib/python3.12/concurrent/futures/_base.py:449, in Future.result(self, timeout)
    448 elif self._state == FINISHED:
--> 449     return self.__get_result()
    451 self._condition.wait(timeout)

File ~/.pyenv/versions/3.12.5/lib/python3.12/concurrent/futures/_base.py:401, in Future.__get_result(self)
    400 try:
--> 401     raise self._exception
    402 finally:
    403     # Break a reference cycle with the exception in self._exception

File ~/.pyenv/versions/3.12.5/lib/python3.12/concurrent/futures/thread.py:58, in _WorkItem.run(self)
     57 try:
---> 58     result = self.fn(*self.args, **self.kwargs)
     59 except BaseException as exc:

File ~/Dev/github/azureai-samples/scenarios/evaluate/.venv/lib/python3.12/site-packages/promptflow/_sdk/_orchestrator/run_submitter.py:131, in RunSubmitter._run_bulk(self, run, stream, **kwargs)
    130 flow_obj = load_flow(source=run.flow)
--> 131 with flow_overwrite_context(
    132     flow_obj, tuning_node, variant, connections=run.connections, init_kwargs=run.init
    133 ) as flow:
    134     self._submit_bulk_run(flow=flow, run=run, local_storage=local_storage, **kwargs)

File ~/.pyenv/versions/3.12.5/lib/python3.12/contextlib.py:137, in _GeneratorContextManager.__enter__(self)
    136 try:
--> 137     return next(self.gen)
    138 except StopIteration:

File ~/Dev/github/azureai-samples/scenarios/evaluate/.venv/lib/python3.12/site-packages/promptflow/_sdk/_orchestrator/utils.py:279, in flow_overwrite_context(flow, tuning_node, variant, connections, overrides, drop_node_variants, init_kwargs)
    278 with tempfile.TemporaryDirectory() as temp_dir:
--> 279     override_flow_yaml(
    280         flow=flow,
    281         flow_dag=flow_dag,
    282         flow_dir_path=flow_dir_path,
    283         tuning_node=tuning_node,
    284         variant=variant,
    285         connections=connections,
    286         overrides=overrides,
    287         drop_node_variants=drop_node_variants,
    288         init_kwargs=init_kwargs,
    289     )
    290     flow_path = dump_flow_dag_according_to_content(flow_dag=flow_dag, flow_path=Path(temp_dir))

File ~/Dev/github/azureai-samples/scenarios/evaluate/.venv/lib/python3.12/site-packages/promptflow/_sdk/_orchestrator/utils.py:232, in override_flow_yaml(flow, flow_dag, flow_dir_path, tuning_node, variant, connections, overrides, drop_node_variants, init_kwargs)
    229             logger.warning(
    230                 "Eager flow does not support tuning node, variant, connection override. " f"Dropping params {param}"
    231             )
--> 232     update_signatures(code=flow_dir_path, data=flow_dag)
    233 else:
    234     # always overwrite variant since we need to overwrite default variant if not specified.

File ~/Dev/github/azureai-samples/scenarios/evaluate/.venv/lib/python3.12/site-packages/promptflow/_sdk/_utilities/signature_utils.py:148, in update_signatures(code, data)
    147 entry = data.get("entry")
--> 148 signatures, _, _ = infer_signature_for_flex_flow(
    149     entry=entry,
    150     code=code.as_posix(),
    151     language=data.get(LANGUAGE_KEY, "python"),
    152     validate=False,
    153     include_primitive_output=True,
    154 )
    155 # TODO: allow user only specify partial signatures in the yaml

File ~/Dev/github/azureai-samples/scenarios/evaluate/.venv/lib/python3.12/site-packages/promptflow/_sdk/_utilities/signature_utils.py:72, in infer_signature_for_flex_flow(entry, language, code, keep_entry, validate, include_primitive_output)
     71     # TODO: extract description?
---> 72     flow_meta = inspector_proxy.get_entry_meta(entry=entry, working_dir=code)
     73 elif code is not None:
     74     # TODO: support specifying code when inferring signature?

File ~/Dev/github/azureai-samples/scenarios/evaluate/.venv/lib/python3.12/site-packages/promptflow/_proxy/_python_inspector_proxy.py:43, in PythonInspectorProxy.get_entry_meta(self, entry, working_dir, **kwargs)
     42 # generate flow.json only for eager flow for now
---> 43 return _generate_flow_meta(
     44     flow_directory=working_dir,
     45     source_path=resolve_python_entry_file(entry=flow_dag.get("entry"), working_dir=working_dir),
     46     data=flow_dag,
     47     timeout=timeout,
     48     load_in_subprocess=load_in_subprocess,
     49 )

File ~/Dev/github/azureai-samples/scenarios/evaluate/.venv/lib/python3.12/site-packages/promptflow/_core/entry_meta_generator.py:80, in _generate_flow_meta(flow_directory, source_path, data, timeout, load_in_subprocess)
     79     error_message = "Generate meta failed, detail error:\n" + str(exception_list)
---> 80     raise GenerateFlowMetaJsonError(error_message)
     81 return dict(meta_dict)

GenerateFlowMetaJsonError: Generate meta failed, detail error:
["Failed to collect flow entry '__main__:ModelEndpoints' in module '<module>'."]

The above exception was the direct cause of the following exception:

EvaluationException                       Traceback (most recent call last)
Cell In[9], line 23
     21 for model in models:
     22     randomNum = random.randint(1111, 9999)
---> 23     results = evaluate(
     24         evaluation_name="Eval-Run-" + str(randomNum) + "-" + model.title(),
     25         data=path,
     26         target=ModelEndpoints(env_var, model),
     27         evaluators={
     28             "relevance": relevance_evaluator,
     29         },
     30         evaluator_config={
     31             "relevance": {
     32                 "column_mapping": {
     33                     "response": "${target.response}",
     34                     "context": "${data.context}",
     35                     "query": "${data.query}",
     36                 },
     37             },
     38         },
     39     )

File ~/Dev/github/azureai-samples/scenarios/evaluate/.venv/lib/python3.12/site-packages/azure/ai/evaluation/_evaluate/_evaluate.py:644, in evaluate(data, evaluators, evaluation_name, target, evaluator_config, azure_ai_project, output_path, **kwargs)
    641 # Ensure a consistent user experience when encountering errors by converting
    642 # all other exceptions to EvaluationException.
    643 if not isinstance(e, EvaluationException):
--> 644     raise EvaluationException(
    645         message=str(e),
    646         target=ErrorTarget.EVALUATE,
    647         category=ErrorCategory.FAILED_EXECUTION,
    648         blame=ErrorBlame.SYSTEM_ERROR,
    649     ) from e
    651 raise e

EvaluationException: (InternalError) Generate meta failed, detail error:
["Failed to collect flow entry '__main__:ModelEndpoints' in module '<module>'."]
```

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Evaluate Base Model Endpoints giving errors #173

Operating System

Version Information

Steps to reproduce

Expected behavior

Actual behavior

Addition information

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Evaluate Base Model Endpoints giving errors #173

Description

Operating System

Version Information

Steps to reproduce

Expected behavior

Actual behavior

Addition information

Metadata

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Issue actions