-
Couldn't load subscription status.
- Fork 5
Classification : small fixes and storage related changes #365
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
c729531
a6a8146
98d58a3
d7988a3
98eca50
8364401
bd7c676
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -22,7 +22,7 @@ | |
| ModelEvaluationPublic, | ||
| ) | ||
| from app.core.db import engine | ||
| from app.core.cloud import AmazonCloudStorage | ||
| from app.core.cloud import get_cloud_storage | ||
| from app.core.finetune.evaluation import ModelEvaluator | ||
| from app.utils import get_openai_client, APIResponse | ||
| from app.api.deps import CurrentUserOrgProject, SessionDep | ||
|
|
@@ -33,6 +33,19 @@ | |
| router = APIRouter(prefix="/model_evaluation", tags=["model_evaluation"]) | ||
|
|
||
|
|
||
| def attach_prediction_file_url(model_obj, storage): | ||
| """ | ||
| Given a model-like object and a storage client, | ||
| attach a signed prediction data file URL (if available). | ||
| """ | ||
| s3_key = getattr(model_obj, "prediction_data_s3_object", None) | ||
| prediction_data_file_url = storage.get_signed_url(s3_key) if s3_key else None | ||
|
|
||
| return model_obj.model_copy( | ||
| update={"prediction_data_file_url": prediction_data_file_url} | ||
| ) | ||
|
|
||
|
|
||
| def run_model_evaluation( | ||
| eval_id: int, | ||
| current_user: CurrentUserOrgProject, | ||
|
|
@@ -46,7 +59,7 @@ def run_model_evaluation( | |
| client = get_openai_client( | ||
| db, current_user.organization_id, current_user.project_id | ||
| ) | ||
| storage = AmazonCloudStorage(current_user) | ||
| storage = get_cloud_storage(session=db, project_id=current_user.project_id) | ||
|
|
||
| try: | ||
| model_eval = update_model_eval( | ||
|
|
@@ -57,8 +70,8 @@ def run_model_evaluation( | |
| ) | ||
|
|
||
| evaluator = ModelEvaluator( | ||
| model_name=model_eval.model_name, | ||
| test_data_s3_url=model_eval.test_data_s3_url, | ||
| fine_tuned_model=model_eval.fine_tuned_model, | ||
| test_data_s3_object=model_eval.test_data_s3_object, | ||
| system_prompt=model_eval.system_prompt, | ||
| client=client, | ||
| storage=storage, | ||
|
|
@@ -71,7 +84,7 @@ def run_model_evaluation( | |
| project_id=current_user.project_id, | ||
| update=ModelEvaluationUpdate( | ||
| score=result["evaluation_score"], | ||
| prediction_data_s3_url=result["prediction_data_s3_url"], | ||
| prediction_data_s3_object=result["prediction_data_s3_object"], | ||
| status=ModelEvaluationStatus.completed, | ||
| ), | ||
| ) | ||
|
|
@@ -82,6 +95,7 @@ def run_model_evaluation( | |
| ) | ||
|
|
||
| except Exception as e: | ||
| error_msg = str(e) | ||
| logger.error( | ||
| f"[run_model_evaluation] Failed | eval_id={eval_id}, project_id={current_user.project_id}: {e}" | ||
| ) | ||
|
|
@@ -92,7 +106,8 @@ def run_model_evaluation( | |
| project_id=current_user.project_id, | ||
| update=ModelEvaluationUpdate( | ||
| status=ModelEvaluationStatus.failed, | ||
| error_message="failed during background job processing", | ||
| error_message="failed during background job processing:" | ||
| + error_msg, | ||
| ), | ||
| ) | ||
|
|
||
|
|
@@ -128,20 +143,20 @@ def evaluate_models( | |
| ) | ||
| raise HTTPException(status_code=400, detail="No fine-tuned job IDs provided") | ||
|
|
||
| evals: list[ModelEvaluationPublic] = [] | ||
| evaluations: list[ModelEvaluationPublic] = [] | ||
|
|
||
| for job_id in request.fine_tuning_ids: | ||
| fine_tuning_job = fetch_by_id(session, job_id, current_user.project_id) | ||
| active_evals = fetch_active_model_evals( | ||
| active_evaluations = fetch_active_model_evals( | ||
| session, job_id, current_user.project_id | ||
| ) | ||
|
|
||
| if active_evals: | ||
| if active_evaluations: | ||
| logger.info( | ||
| f"[evaluate_model] Skipping creation for {job_id}. Active evaluation exists, project_id:{current_user.project_id}" | ||
| ) | ||
| evals.extend( | ||
| ModelEvaluationPublic.model_validate(ev) for ev in active_evals | ||
| evaluations.extend( | ||
| ModelEvaluationPublic.model_validate(ev) for ev in active_evaluations | ||
| ) | ||
| continue | ||
|
|
||
|
|
@@ -153,22 +168,34 @@ def evaluate_models( | |
| status=ModelEvaluationStatus.pending, | ||
| ) | ||
|
|
||
| evals.append(ModelEvaluationPublic.model_validate(model_eval)) | ||
| evaluations.append(ModelEvaluationPublic.model_validate(model_eval)) | ||
|
|
||
| logger.info( | ||
| f"[evaluate_model] Created evaluation for fine_tuning_id {job_id} with eval ID={model_eval.id}, project_id:{current_user.project_id}" | ||
| ) | ||
|
|
||
| background_tasks.add_task(run_model_evaluation, model_eval.id, current_user) | ||
|
|
||
| response_data = [ | ||
| { | ||
| "id": ev.id, | ||
| "fine_tuning_id": ev.fine_tuning_id, | ||
| "fine_tuned_model": getattr(ev, "fine_tuned_model", None), | ||
| "document_id": getattr(ev, "document_id", None), | ||
| "status": ev.status, | ||
| } | ||
| for ev in evaluations | ||
| ] | ||
|
|
||
| return APIResponse.success_response( | ||
| {"message": "Model evaluation(s) started successfully", "data": evals} | ||
| {"message": "Model evaluation(s) started successfully", "data": response_data} | ||
| ) | ||
|
|
||
|
|
||
| @router.get( | ||
| "/{document_id}/top_model", | ||
| response_model=APIResponse[ModelEvaluationPublic], | ||
| response_model_exclude_none=True, | ||
| ) | ||
| def get_top_model_by_doc_id( | ||
| document_id: UUID, | ||
|
|
@@ -183,19 +210,38 @@ def get_top_model_by_doc_id( | |
| f"[get_top_model_by_doc_id] Fetching top model for document_id={document_id}, " | ||
| f"project_id={current_user.project_id}" | ||
| ) | ||
|
|
||
| top_model = fetch_top_model_by_doc_id(session, document_id, current_user.project_id) | ||
| storage = get_cloud_storage(session=session, project_id=current_user.project_id) | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. should we not handle errors here? what if get_signed_url fails? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. the way error handling is already there in the get signed url method, I will add it to the get cloud storage method as well, so that errors for this are also handled There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. added one more error handling to get cloud storage |
||
|
|
||
| top_model = attach_prediction_file_url(top_model, storage) | ||
|
|
||
| return APIResponse.success_response(top_model) | ||
|
|
||
|
|
||
| @router.get("/{document_id}", response_model=APIResponse[list[ModelEvaluationPublic]]) | ||
| def get_evals_by_doc_id( | ||
| document_id: UUID, session: SessionDep, current_user: CurrentUserOrgProject | ||
| @router.get( | ||
| "/{document_id}", | ||
| response_model=APIResponse[list[ModelEvaluationPublic]], | ||
| response_model_exclude_none=True, | ||
| ) | ||
| def get_evaluations_by_doc_id( | ||
| document_id: UUID, | ||
| session: SessionDep, | ||
| current_user: CurrentUserOrgProject, | ||
| ): | ||
| """ | ||
| Return all model evaluations for the given document_id within the current project. | ||
| """ | ||
| logger.info( | ||
| f"[get_evals_by_doc_id]Fetching evaluations for document_id: {document_id}, project_id: {current_user.project_id}" | ||
| f"[get_evaluations_by_doc_id] Fetching evaluations for document_id={document_id}, " | ||
| f"project_id={current_user.project_id}" | ||
| ) | ||
|
|
||
| evaluations = fetch_eval_by_doc_id(session, document_id, current_user.project_id) | ||
| return APIResponse.success_response(evaluations) | ||
| storage = get_cloud_storage(session=session, project_id=current_user.project_id) | ||
|
|
||
| updated_evaluations = [ | ||
| attach_prediction_file_url(ev, storage) for ev in evaluations | ||
| ] | ||
|
|
||
| return APIResponse.success_response(updated_evaluations) | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
if the
s3_keyis None, what happens? should we send empty/null value is response json?also maybe include error handling there, something like this
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If the s3_key is None, then there’s simply no prediction file to sign, so the function will set prediction_data_file_url to None. In the API response, this will show up as null, which is fine since the ModelEvaluationPublic schema already specifies a default of None for this field.
We don’t need to add extra error handling inside attach_prediction_file_url, because any errors related to generating the signed URL are already being logged and handled in the get_signed_url method itself. This keeps the logic clean — attach_prediction_file_url just decides whether or not to attempt URL generation, while get_signed_url is responsible for managing AWS-specific failures.