From c729531a675c9d93cd086503e533c4dc9a26e204 Mon Sep 17 00:00:00 2001 From: nishika26 Date: Mon, 1 Sep 2025 23:29:23 +0530 Subject: [PATCH 1/5] first commit covering all --- ...7_add_fine_tuning_and_model_evaluation_.py | 10 ++-- backend/app/api/routes/fine_tuning.py | 51 ++++++++++------ backend/app/api/routes/model_evaluation.py | 45 +++++++++++--- backend/app/core/finetune/evaluation.py | 28 ++++----- backend/app/core/finetune/preprocessing.py | 14 +++-- backend/app/crud/model_evaluation.py | 4 +- backend/app/models/fine_tuning.py | 17 +++--- backend/app/models/model_evaluation.py | 11 ++-- backend/app/seed_data/seed_data.py | 2 +- .../app/tests/api/routes/test_fine_tuning.py | 48 ++++++++++----- .../tests/api/routes/test_model_evaluation.py | 59 ++++++++++++++----- .../app/tests/crud/test_model_evaluation.py | 4 +- backend/app/tests/utils/test_data.py | 4 +- 13 files changed, 196 insertions(+), 101 deletions(-) diff --git a/backend/app/alembic/versions/6ed6ed401847_add_fine_tuning_and_model_evaluation_.py b/backend/app/alembic/versions/6ed6ed401847_add_fine_tuning_and_model_evaluation_.py index 6abded37..c0e2e639 100644 --- a/backend/app/alembic/versions/6ed6ed401847_add_fine_tuning_and_model_evaluation_.py +++ b/backend/app/alembic/versions/6ed6ed401847_add_fine_tuning_and_model_evaluation_.py @@ -58,10 +58,10 @@ def upgrade(): "fine_tuned_model", sqlmodel.sql.sqltypes.AutoString(), nullable=True ), sa.Column( - "train_data_s3_URI", sqlmodel.sql.sqltypes.AutoString(), nullable=True + "train_data_s3_object", sqlmodel.sql.sqltypes.AutoString(), nullable=True ), sa.Column( - "test_data_s3_URI", sqlmodel.sql.sqltypes.AutoString(), nullable=True + "test_data_s3_object", sqlmodel.sql.sqltypes.AutoString(), nullable=True ), sa.Column("error_message", sqlmodel.sql.sqltypes.AutoString(), nullable=True), sa.Column("project_id", sa.Integer(), nullable=False), @@ -87,14 +87,16 @@ def upgrade(): sa.Column("document_id", sa.Uuid(), nullable=False), sa.Column("model_name", sqlmodel.sql.sqltypes.AutoString(), nullable=False), sa.Column( - "test_data_s3_URI", sqlmodel.sql.sqltypes.AutoString(), nullable=False + "test_data_s3_object", sqlmodel.sql.sqltypes.AutoString(), nullable=False ), sa.Column("base_model", sqlmodel.sql.sqltypes.AutoString(), nullable=False), sa.Column("split_ratio", sa.Float(), nullable=False), sa.Column("system_prompt", sa.Text(), nullable=False), sa.Column("score", postgresql.JSON(astext_type=sa.Text()), nullable=True), sa.Column( - "prediction_data_s3_URI", sqlmodel.sql.sqltypes.AutoString(), nullable=True + "prediction_data_s3_object", + sqlmodel.sql.sqltypes.AutoString(), + nullable=True, ), sa.Column( "status", diff --git a/backend/app/api/routes/fine_tuning.py b/backend/app/api/routes/fine_tuning.py index c5e4ac44..0727f75a 100644 --- a/backend/app/api/routes/fine_tuning.py +++ b/backend/app/api/routes/fine_tuning.py @@ -14,7 +14,7 @@ FineTuningUpdate, FineTuningStatus, ) -from app.core.cloud import AmazonCloudStorage +from app.core.cloud import get_cloud_storage, storage from app.crud.document import DocumentCrud from app.utils import get_openai_client, APIResponse, mask_string, load_description from app.crud import ( @@ -69,16 +69,18 @@ def process_fine_tuning_job( client = get_openai_client( session, current_user.organization_id, project_id ) - storage = AmazonCloudStorage(current_user) - document_crud = DocumentCrud(session=session, owner_id=current_user.id) + storage = get_cloud_storage( + session=session, project_id=current_user.project_id + ) + document_crud = DocumentCrud(session, current_user.project_id) document = document_crud.read_one(request.document_id) preprocessor = DataPreprocessor( document, storage, ratio, request.system_prompt ) result = preprocessor.process() train_data_temp_filepath = result["train_jsonl_temp_filepath"] - train_data_s3_url = result["train_csv_s3_url"] - test_data_s3_url = result["test_csv_s3_url"] + train_data_s3_object = result["train_csv_s3_object"] + test_data_s3_object = result["test_csv_s3_object"] try: with open(train_data_temp_filepath, "rb") as train_f: @@ -100,7 +102,8 @@ def process_fine_tuning_job( job=fine_tune, update=FineTuningUpdate( status=FineTuningStatus.failed, - error_message="Failed during background job processing", + error_message="Error while uploading file to openai : " + + error_msg, ), ) return @@ -128,7 +131,8 @@ def process_fine_tuning_job( job=fine_tune, update=FineTuningUpdate( status=FineTuningStatus.failed, - error_message="Failed during background job processing", + error_message="Error while creating an openai fine tuning job : " + + error_msg, ), ) return @@ -138,8 +142,8 @@ def process_fine_tuning_job( job=fine_tune, update=FineTuningUpdate( training_file_id=training_file_id, - train_data_s3_url=train_data_s3_url, - test_data_s3_url=test_data_s3_url, + train_data_s3_object=train_data_s3_object, + test_data_s3_object=test_data_s3_object, split_ratio=ratio, provider_job_id=job.id, status=FineTuningStatus.running, @@ -164,7 +168,8 @@ def process_fine_tuning_job( job=fine_tune, update=FineTuningUpdate( status=FineTuningStatus.failed, - error_message="Failed during background job processing", + error_message="Error while processing the background job : " + + str(e), ), ) @@ -186,6 +191,7 @@ def fine_tune_from_CSV( current_user.organization_id, current_user.project_id, ) + results = [] for ratio in request.split_ratio: @@ -235,21 +241,19 @@ def fine_tune_from_CSV( @router.get( - "/{job_id}/refresh", + "/{fine_tuning_id}/refresh", description=load_description("fine_tuning/retrieve.md"), response_model=APIResponse[FineTuningJobPublic], ) def refresh_fine_tune_status( - job_id: int, session: SessionDep, current_user: CurrentUserOrgProject + fine_tuning_id: int, session: SessionDep, current_user: CurrentUserOrgProject ): project_id = current_user.project_id - job = fetch_by_id(session, job_id, project_id) + job = fetch_by_id(session, fine_tuning_id, project_id) client = get_openai_client(session, current_user.organization_id, project_id) + storage = get_cloud_storage(session=session, project_id=current_user.project_id) - if job.provider_job_id is None: - return APIResponse.success_response(job) - - else: + if job.provider_job_id is not None: try: openai_job = client.fine_tuning.jobs.retrieve(job.provider_job_id) except openai.OpenAIError as e: @@ -257,7 +261,7 @@ def refresh_fine_tune_status( logger.error( f"[Retrieve_fine_tune_status] Failed to retrieve OpenAI job | " f"provider_job_id={mask_string(job.provider_job_id)}, " - f"error={error_msg}, job_id={job_id}, project_id={project_id}" + f"error={error_msg}, fine_tuning_id={fine_tuning_id}, project_id={project_id}" ) raise HTTPException( status_code=502, detail=f"OpenAI API error: {error_msg}" @@ -285,6 +289,17 @@ def refresh_fine_tune_status( ): job = update_finetune_job(session=session, job=job, update=update_payload) + job = job.model_copy( + update={ + "train_data_file_url": storage.get_signed_url(job.train_data_s3_object) + if job.train_data_s3_object + else None, + "test_data_file_url": storage.get_signed_url(job.test_data_s3_object) + if job.test_data_s3_object + else None, + } + ) + return APIResponse.success_response(job) diff --git a/backend/app/api/routes/model_evaluation.py b/backend/app/api/routes/model_evaluation.py index 3394a344..a1dd8ee8 100644 --- a/backend/app/api/routes/model_evaluation.py +++ b/backend/app/api/routes/model_evaluation.py @@ -22,7 +22,7 @@ ModelEvaluationPublic, ) from app.core.db import engine -from app.core.cloud import AmazonCloudStorage +from app.core.cloud import get_cloud_storage from app.core.finetune.evaluation import ModelEvaluator from app.utils import get_openai_client, APIResponse from app.api.deps import CurrentUserOrgProject, SessionDep @@ -46,7 +46,7 @@ def run_model_evaluation( client = get_openai_client( db, current_user.organization_id, current_user.project_id ) - storage = AmazonCloudStorage(current_user) + storage = get_cloud_storage(session=db, project_id=current_user.project_id) try: model_eval = update_model_eval( @@ -58,7 +58,7 @@ def run_model_evaluation( evaluator = ModelEvaluator( model_name=model_eval.model_name, - test_data_s3_url=model_eval.test_data_s3_url, + test_data_s3_object=model_eval.test_data_s3_object, system_prompt=model_eval.system_prompt, client=client, storage=storage, @@ -71,7 +71,7 @@ def run_model_evaluation( project_id=current_user.project_id, update=ModelEvaluationUpdate( score=result["evaluation_score"], - prediction_data_s3_url=result["prediction_data_s3_url"], + prediction_data_s3_object=result["prediction_data_s3_object"], status=ModelEvaluationStatus.completed, ), ) @@ -92,7 +92,7 @@ def run_model_evaluation( project_id=current_user.project_id, update=ModelEvaluationUpdate( status=ModelEvaluationStatus.failed, - error_message="failed during background job processing", + error_message="failed during background job processing:" + str(e), ), ) @@ -169,6 +169,7 @@ def evaluate_models( @router.get( "/{document_id}/top_model", response_model=APIResponse[ModelEvaluationPublic], + response_model_exclude_none=True, ) def get_top_model_by_doc_id( document_id: UUID, @@ -183,19 +184,47 @@ def get_top_model_by_doc_id( f"[get_top_model_by_doc_id] Fetching top model for document_id={document_id}, " f"project_id={current_user.project_id}" ) + top_model = fetch_top_model_by_doc_id(session, document_id, current_user.project_id) + storage = get_cloud_storage(session=session, project_id=current_user.project_id) + + s3_key = getattr(top_model, "prediction_data_s3_object", None) + prediction_data_file_url = storage.get_signed_url(s3_key) if s3_key else None + + top_model = top_model.model_copy( + update={"prediction_data_file_url": prediction_data_file_url} + ) + return APIResponse.success_response(top_model) -@router.get("/{document_id}", response_model=APIResponse[list[ModelEvaluationPublic]]) +@router.get( + "/{document_id}", + response_model=APIResponse[list[ModelEvaluationPublic]], + response_model_exclude_none=True, +) def get_evals_by_doc_id( - document_id: UUID, session: SessionDep, current_user: CurrentUserOrgProject + document_id: UUID, + session: SessionDep, + current_user: CurrentUserOrgProject, ): """ Return all model evaluations for the given document_id within the current project. """ logger.info( - f"[get_evals_by_doc_id]Fetching evaluations for document_id: {document_id}, project_id: {current_user.project_id}" + f"[get_evals_by_doc_id] Fetching evaluations for document_id={document_id}, " + f"project_id={current_user.project_id}" ) + evaluations = fetch_eval_by_doc_id(session, document_id, current_user.project_id) + storage = get_cloud_storage(session=session, project_id=current_user.project_id) + + for ev in evaluations: + s3_key = getattr(ev, "prediction_data_s3_object", None) + prediction_data_file_url = storage.get_signed_url(s3_key) if s3_key else None + + ev = ev.model_copy( + update={"prediction_data_file_url": prediction_data_file_url} + ) + return APIResponse.success_response(evaluations) diff --git a/backend/app/core/finetune/evaluation.py b/backend/app/core/finetune/evaluation.py index 1f34fddb..4c47a12f 100644 --- a/backend/app/core/finetune/evaluation.py +++ b/backend/app/core/finetune/evaluation.py @@ -26,13 +26,13 @@ class ModelEvaluator: def __init__( self, model_name: str, - test_data_s3_url: str, + test_data_s3_object: str, storage: AmazonCloudStorage, system_prompt: str, client: OpenAI, ): self.model_name = model_name - self.test_data_s3_url = test_data_s3_url + self.test_data_s3_object = test_data_s3_object self.storage = storage self.system_instruction = system_prompt self.client = client @@ -51,9 +51,9 @@ def load_labels_and_prompts(self) -> None: - 'label' """ logger.info( - f"[ModelEvaluator.load_labels_and_prompts] Loading CSV from: {self.test_data_s3_url}" + f"[ModelEvaluator.load_labels_and_prompts] Loading CSV from: {self.test_data_s3_object}" ) - file_obj = self.storage.stream(self.test_data_s3_url) + file_obj = self.storage.stream(self.test_data_s3_object) try: df = pd.read_csv(file_obj) df.columns = [c.strip().lower() for c in df.columns] @@ -182,28 +182,28 @@ def generate_predictions(self) -> tuple[list[str], str]: unique_id = uuid.uuid4().hex filename = f"predictions_{self.model_name}_{unique_id}.csv" - prediction_data_s3_url = DataPreprocessor.upload_csv_to_s3( + prediction_data_s3_object = DataPreprocessor.upload_csv_to_s3( self.storage, prediction_data, filename ) - self.prediction_data_s3_url = prediction_data_s3_url + self.prediction_data_s3_object = prediction_data_s3_object logger.info( - f"[generate_predictions] Predictions CSV uploaded to S3 | url={prediction_data_s3_url}" + f"[generate_predictions] Predictions CSV uploaded to S3 | url={prediction_data_s3_object}" ) - return predictions, prediction_data_s3_url + return predictions, prediction_data_s3_object def evaluate(self) -> dict: """Evaluate using the predictions CSV previously uploaded to S3.""" - if not getattr(self, "prediction_data_s3_url", None): + if not getattr(self, "prediction_data_s3_object", None): raise RuntimeError( - "[evaluate] predictions_s3_url not set. Call generate_predictions() first." + "[evaluate] predictions_s3_object not set. Call generate_predictions() first." ) logger.info( - f"[evaluate] Streaming predictions CSV from: {self.prediction_data_s3_url}" + f"[evaluate] Streaming predictions CSV from: {self.prediction_data_s3_object}" ) - prediction_obj = self.storage.stream(self.prediction_data_s3_url) + prediction_obj = self.storage.stream(self.prediction_data_s3_object) try: df = pd.read_csv(prediction_obj) finally: @@ -229,12 +229,12 @@ def run(self) -> dict: """Run the full evaluation process: load data, generate predictions, evaluate results.""" try: self.load_labels_and_prompts() - predictions, prediction_data_s3_url = self.generate_predictions() + predictions, prediction_data_s3_object = self.generate_predictions() evaluation_results = self.evaluate() logger.info("[evaluate] Model evaluation completed successfully.") return { "evaluation_score": evaluation_results, - "prediction_data_s3_url": prediction_data_s3_url, + "prediction_data_s3_object": prediction_data_s3_object, } except Exception as e: logger.error(f"[evaluate] Error in running ModelEvaluator: {str(e)}") diff --git a/backend/app/core/finetune/preprocessing.py b/backend/app/core/finetune/preprocessing.py index 28fe9b23..52d652ee 100644 --- a/backend/app/core/finetune/preprocessing.py +++ b/backend/app/core/finetune/preprocessing.py @@ -47,9 +47,9 @@ def upload_csv_to_s3(storage, df, filename: str) -> str: ) try: - dest = storage.put(upload, basename=Path("datasets") / filename) + dest = storage.put(upload, file_path=Path("datasets") / filename) logger.info( - f"[upload_csv_to_s3] Upload successful | filename='{filename}', s3_url='{dest}'" + f"[upload_csv_to_s3] Upload successful | filename='{filename}', s3_object='{dest}'" ) return str(dest) except Exception as err: @@ -143,13 +143,15 @@ def process(self): test_csv_name = f"test_split_{test_percentage}_{unique_id}.csv" train_jsonl_name = f"train_data_{train_percentage}_{unique_id}.jsonl" - train_csv_url = self.upload_csv_to_s3(self.storage, train_data, train_csv_name) - test_csv_url = self.upload_csv_to_s3(self.storage, test_data, test_csv_name) + train_csv_object = self.upload_csv_to_s3( + self.storage, train_data, train_csv_name + ) + test_csv_object = self.upload_csv_to_s3(self.storage, test_data, test_csv_name) train_jsonl_path = self._save_to_jsonl(train_jsonl, train_jsonl_name) return { - "train_csv_s3_url": train_csv_url, - "test_csv_s3_url": test_csv_url, + "train_csv_s3_object": train_csv_object, + "test_csv_s3_object": test_csv_object, "train_jsonl_temp_filepath": train_jsonl_path, } diff --git a/backend/app/crud/model_evaluation.py b/backend/app/crud/model_evaluation.py index 7a1e2b3b..49f3ddcf 100644 --- a/backend/app/crud/model_evaluation.py +++ b/backend/app/crud/model_evaluation.py @@ -26,7 +26,7 @@ def create_model_evaluation( ) -> ModelEvaluation: fine_tuning_job = fetch_by_id(session, request.fine_tuning_id, project_id) - if fine_tuning_job.fine_tuned_model and fine_tuning_job.test_data_s3_url is None: + if fine_tuning_job.fine_tuned_model and fine_tuning_job.test_data_s3_object is None: logger.error( f"[create_model_evaluation] No fine tuned model or test data found for the given fine tuning ID | fine_tuning_id={request.fine_tuning_id}, project_id={project_id}" ) @@ -39,7 +39,7 @@ def create_model_evaluation( "split_ratio": fine_tuning_job.split_ratio, "model_name": fine_tuning_job.fine_tuned_model, "document_id": fine_tuning_job.document_id, - "test_data_s3_url": fine_tuning_job.test_data_s3_url, + "test_data_s3_object": fine_tuning_job.test_data_s3_object, "project_id": project_id, "organization_id": organization_id, "status": status, diff --git a/backend/app/models/fine_tuning.py b/backend/app/models/fine_tuning.py index 17bc1ccd..a3b0e866 100644 --- a/backend/app/models/fine_tuning.py +++ b/backend/app/models/fine_tuning.py @@ -64,11 +64,11 @@ class Fine_Tuning(FineTuningJobBase, table=True): fine_tuned_model: str | None = Field( default=None, description="Final fine tuned model name from OpenAI" ) - train_data_s3_url: str | None = Field( - default=None, description="S3 url of the training data stored ins S3" + train_data_s3_object: str | None = Field( + default=None, description="S3 URI of the training data stored ins S3" ) - test_data_s3_url: str | None = Field( - default=None, description="S3 url of the testing data stored ins S3" + test_data_s3_object: str | None = Field( + default=None, description="S3 URI of the testing data stored ins S3" ) error_message: str | None = Field( default=None, description="error message for when something failed" @@ -91,8 +91,8 @@ class Fine_Tuning(FineTuningJobBase, table=True): class FineTuningUpdate(SQLModel): training_file_id: Optional[str] = None - train_data_s3_url: Optional[str] = None - test_data_s3_url: Optional[str] = None + train_data_s3_object: Optional[str] = None + test_data_s3_object: Optional[str] = None split_ratio: Optional[float] = None provider_job_id: Optional[str] = None fine_tuned_model: Optional[str] = None @@ -108,13 +108,12 @@ class FineTuningJobPublic(SQLModel): base_model: str document_id: UUID provider_job_id: str | None = None + train_data_file_url: str | None = None + test_data_file_url: str | None = None status: str error_message: str | None = None fine_tuned_model: str | None = None training_file_id: str | None = None - train_data_s3_url: str | None = None - test_data_s3_url: str | None = None inserted_at: datetime updated_at: datetime - deleted_at: datetime | None = None diff --git a/backend/app/models/model_evaluation.py b/backend/app/models/model_evaluation.py index da24cdcd..7a10c2ae 100644 --- a/backend/app/models/model_evaluation.py +++ b/backend/app/models/model_evaluation.py @@ -47,7 +47,9 @@ class ModelEvaluation(ModelEvaluationBase, table=True): nullable=False, ) model_name: str = Field(description="fine tuned model name from OpenAI") - test_data_s3_url: str = Field(description="S3 url of the testing data stored in S3") + test_data_s3_object: str = Field( + description="S3 URI of the testing data stored in S3" + ) base_model: str = Field(nullable=False, description="Base model for fine-tuning") split_ratio: float = Field( nullable=False, description="the ratio the dataset was divided in" @@ -57,7 +59,7 @@ class ModelEvaluation(ModelEvaluationBase, table=True): sa_column=Column(JSON, nullable=True), description="Evaluation scores per metric (e.g., {'mcc': 0.85})", ) - prediction_data_s3_url: str | None = Field( + prediction_data_s3_object: str | None = Field( default=None, description="S3 URL where the prediction data generated by the fine-tuned model is stored", ) @@ -87,7 +89,7 @@ class ModelEvaluationUpdate(SQLModel): score: Optional[dict[str, float]] = None status: Optional[ModelEvaluationStatus] = None error_message: Optional[str] = None - prediction_data_s3_url: Optional[str] = None + prediction_data_s3_object: Optional[str] = None class ModelEvaluationPublic(ModelEvaluationBase): @@ -98,10 +100,9 @@ class ModelEvaluationPublic(ModelEvaluationBase): model_name: str split_ratio: float base_model: str - prediction_data_s3_url: str | None + prediction_data_file_url: str | None = None score: dict[str, float] | None = None status: ModelEvaluationStatus inserted_at: datetime updated_at: datetime - deleted_at: datetime | None = None diff --git a/backend/app/seed_data/seed_data.py b/backend/app/seed_data/seed_data.py index 5b629d24..8139f9c8 100644 --- a/backend/app/seed_data/seed_data.py +++ b/backend/app/seed_data/seed_data.py @@ -335,7 +335,7 @@ def create_document(session: Session, document_data_raw: dict) -> Document: document = Document( fname=document_data.fname, object_store_url=document_data.object_store_url, - owner_id=user.id, + project_id=project.id, ) session.add(document) diff --git a/backend/app/tests/api/routes/test_fine_tuning.py b/backend/app/tests/api/routes/test_fine_tuning.py index 477aea54..26270de3 100644 --- a/backend/app/tests/api/routes/test_fine_tuning.py +++ b/backend/app/tests/api/routes/test_fine_tuning.py @@ -1,8 +1,10 @@ import pytest + from unittest.mock import patch, MagicMock from app.tests.utils.test_data import create_test_fine_tuning_jobs from app.tests.utils.utils import get_document +from app.models import Fine_Tuning def create_file_mock(file_type): @@ -32,20 +34,20 @@ def test_finetune_from_csv_multiple_split_ratio( db, user_api_key_header, ): - document = get_document(db) - + document = get_document(db, "dalgo_sample.json") + print("document = ", document) for path in ["/tmp/train.jsonl", "/tmp/test.jsonl"]: with open(path, "w") as f: f.write("{}") - mock_preprocessor_cls.return_value = MagicMock( - process=MagicMock( - return_value={ - "train_file": "/tmp/train.jsonl", - "test_file": "/tmp/test.jsonl", - } - ) - ) + mock_preprocessor = MagicMock() + mock_preprocessor.process.return_value = { + "train_jsonl_temp_filepath": "/tmp/train.jsonl", + "train_csv_s3_object": "s3://bucket/train.csv", + "test_csv_s3_object": "s3://bucket/test.csv", + } + mock_preprocessor.cleanup = MagicMock() + mock_preprocessor_cls.return_value = mock_preprocessor mock_openai = MagicMock() mock_openai.files.create.side_effect = create_file_mock("fine-tune") @@ -61,16 +63,34 @@ def test_finetune_from_csv_multiple_split_ratio( "system_prompt": "you are a model able to classify", } - response = client.post( - "/api/v1/fine_tuning/fine_tune", json=body, headers=user_api_key_header - ) - assert response.status_code == 200 + with patch("app.api.routes.fine_tuning.Session") as SessionMock: + SessionMock.return_value.__enter__.return_value = db + SessionMock.return_value.__exit__.return_value = None + + response = client.post( + "/api/v1/fine_tuning/fine_tune", + json=body, + headers=user_api_key_header, + ) + assert response.status_code == 200 json_data = response.json() assert json_data["success"] is True assert json_data["data"]["message"] == "Fine-tuning job(s) started." assert json_data["metadata"] is None + jobs = db.query(Fine_Tuning).all() + assert len(jobs) == 3 + + for i, job in enumerate(jobs, start=1): + db.refresh(job) + assert job.status == "running" + assert job.provider_job_id == f"ft_mock_job_{i}" + assert job.training_file_id is not None + assert job.train_data_s3_object == "s3://bucket/train.csv" + assert job.test_data_s3_object == "s3://bucket/test.csv" + assert job.split_ratio in [0.5, 0.7, 0.9] + @pytest.mark.usefixtures("client", "db", "user_api_key_header") @patch("app.api.routes.fine_tuning.get_openai_client") diff --git a/backend/app/tests/api/routes/test_model_evaluation.py b/backend/app/tests/api/routes/test_model_evaluation.py index 9b969e47..3607dec4 100644 --- a/backend/app/tests/api/routes/test_model_evaluation.py +++ b/backend/app/tests/api/routes/test_model_evaluation.py @@ -1,30 +1,57 @@ -from unittest.mock import patch +from unittest.mock import patch, MagicMock from app.tests.utils.test_data import ( create_test_finetuning_job_with_extra_fields, create_test_model_evaluation, ) - - -@patch("app.api.routes.model_evaluation.run_model_evaluation") -def test_evaluate_model(mock_run_eval, client, db, user_api_key_header): +from app.models import ModelEvaluation + + +@patch("app.api.routes.model_evaluation.ModelEvaluator") +@patch("app.api.routes.model_evaluation.get_cloud_storage") +@patch("app.api.routes.model_evaluation.get_openai_client") +def test_evaluate_model_background_success( + mock_get_client, + mock_get_storage, + mock_evaluator_cls, + client, + db, + user_api_key_header, +): fine_tuned, _ = create_test_finetuning_job_with_extra_fields(db, [0.5]) body = {"fine_tuning_ids": [fine_tuned[0].id]} - resp = client.post( - "/api/v1/model_evaluation/evaluate_models/", - json=body, - headers=user_api_key_header, - ) + evaluator = MagicMock() + evaluator.run.return_value = { + "evaluation_score": 0.87, + "prediction_data_s3_object": "s3://bucket/preds.csv", + } + mock_evaluator_cls.return_value = evaluator + + with patch("app.api.routes.model_evaluation.Session") as SessionMock: + SessionMock.return_value.__enter__.return_value = db + SessionMock.return_value.__exit__.return_value = None + + resp = client.post( + "/api/v1/model_evaluation/evaluate_models/", + json=body, + headers=user_api_key_header, + ) + assert resp.status_code == 200, resp.text - j = resp.json() - evals = j["data"]["data"] - assert len(evals) == 1 - assert evals[0]["status"] == "pending" + payload = resp.json() + + eval_id = payload["data"]["data"][0]["id"] + + ev = db.get(ModelEvaluation, eval_id) + assert ev is not None, "evaluation row should exist after background task" + db.refresh(ev) - mock_run_eval.assert_called_once() - assert mock_run_eval.call_args[0][0] == evals[0]["id"] + assert ev.status == "completed" + assert ev.score == 0.87 + assert ev.prediction_data_s3_object == "s3://bucket/preds.csv" + assert not ev.error_message def test_evaluate_model_finetuning_not_found(client, user_api_key_header): diff --git a/backend/app/tests/crud/test_model_evaluation.py b/backend/app/tests/crud/test_model_evaluation.py index 0eaa72f7..57b27ded 100644 --- a/backend/app/tests/crud/test_model_evaluation.py +++ b/backend/app/tests/crud/test_model_evaluation.py @@ -37,7 +37,7 @@ def test_create_model_evaluation(db: Session): base_model=fine_tune.base_model, model_name=fine_tune.fine_tuned_model, document_id=fine_tune.document_id, - test_data_s3_url=fine_tune.test_data_s3_url, + test_data_s3_object=fine_tune.test_data_s3_object, status="pending", ) @@ -52,7 +52,7 @@ def test_create_model_evaluation(db: Session): assert created_eval.status == "pending" assert created_eval.document_id == fine_tune.document_id assert created_eval.model_name == fine_tune.fine_tuned_model - assert created_eval.test_data_s3_url == fine_tune.test_data_s3_url + assert created_eval.test_data_s3_object == fine_tune.test_data_s3_object def test_fetch_by_eval_id_success(db: Session): diff --git a/backend/app/tests/utils/test_data.py b/backend/app/tests/utils/test_data.py index edce345e..bd91cfbf 100644 --- a/backend/app/tests/utils/test_data.py +++ b/backend/app/tests/utils/test_data.py @@ -168,7 +168,7 @@ def create_test_finetuning_job_with_extra_fields( if jobs: for job in jobs: - job.test_data_s3_url = "test_data_s3_url_example" + job.test_data_s3_object = "test_data_s3_object_example" job.fine_tuned_model = "fine_tuned_model_name" return jobs, True @@ -186,7 +186,7 @@ def create_test_model_evaluation(db) -> list[ModelEvaluation]: base_model=fine_tune.base_model, model_name=fine_tune.fine_tuned_model, document_id=fine_tune.document_id, - test_data_s3_url=fine_tune.test_data_s3_url, + test_data_s3_object=fine_tune.test_data_s3_object, ) model_eval = create_model_evaluation( From a6a814625b9d8d388485c23dada847e977c6fec9 Mon Sep 17 00:00:00 2001 From: nishika26 Date: Tue, 2 Sep 2025 12:33:24 +0530 Subject: [PATCH 2/5] final fixes --- backend/app/api/routes/fine_tuning.py | 24 ++++++- backend/app/api/routes/model_evaluation.py | 45 ++++++++----- backend/app/crud/model_evaluation.py | 2 +- .../app/tests/api/routes/test_fine_tuning.py | 2 +- .../tests/api/routes/test_model_evaluation.py | 63 ++++++------------- .../app/tests/crud/test_model_evaluation.py | 2 +- 6 files changed, 74 insertions(+), 64 deletions(-) diff --git a/backend/app/api/routes/fine_tuning.py b/backend/app/api/routes/fine_tuning.py index 0727f75a..a0bd738f 100644 --- a/backend/app/api/routes/fine_tuning.py +++ b/backend/app/api/routes/fine_tuning.py @@ -311,6 +311,7 @@ def refresh_fine_tune_status( def retrieve_jobs_by_document( document_id: UUID, session: SessionDep, current_user: CurrentUserOrgProject ): + storage = get_cloud_storage(session=session, project_id=current_user.project_id) project_id = current_user.project_id jobs = fetch_by_document_id(session, document_id, project_id) if not jobs: @@ -321,4 +322,25 @@ def retrieve_jobs_by_document( status_code=404, detail="No fine-tuning jobs found for the given document ID", ) - return APIResponse.success_response(jobs) + updated_jobs = [] + for job in jobs: + train_url = ( + storage.get_signed_url(job.train_data_s3_object) + if job.train_data_s3_object + else None + ) + test_url = ( + storage.get_signed_url(job.test_data_s3_object) + if job.test_data_s3_object + else None + ) + + updated_job = job.model_copy( + update={ + "train_data_file_url": train_url, + "test_data_file_url": test_url, + } + ) + updated_jobs.append(updated_job) + + return APIResponse.success_response(updated_jobs) diff --git a/backend/app/api/routes/model_evaluation.py b/backend/app/api/routes/model_evaluation.py index a1dd8ee8..451c77e3 100644 --- a/backend/app/api/routes/model_evaluation.py +++ b/backend/app/api/routes/model_evaluation.py @@ -33,6 +33,19 @@ router = APIRouter(prefix="/model_evaluation", tags=["model_evaluation"]) +def attach_prediction_file_url(model_obj, storage): + """ + Given a model-like object and a storage client, + attach a signed prediction data file URL (if available). + """ + s3_key = getattr(model_obj, "prediction_data_s3_object", None) + prediction_data_file_url = storage.get_signed_url(s3_key) if s3_key else None + + return model_obj.model_copy( + update={"prediction_data_file_url": prediction_data_file_url} + ) + + def run_model_evaluation( eval_id: int, current_user: CurrentUserOrgProject, @@ -161,8 +174,19 @@ def evaluate_models( background_tasks.add_task(run_model_evaluation, model_eval.id, current_user) + response_data = [ + { + "id": ev.id, + "fine_tuning_id": ev.fine_tuning_id, + "model_name": getattr(ev, "model_name", None), + "document_id": getattr(ev, "document_id", None), + "status": ev.status, + } + for ev in evals + ] + return APIResponse.success_response( - {"message": "Model evaluation(s) started successfully", "data": evals} + {"message": "Model evaluation(s) started successfully", "data": response_data} ) @@ -188,12 +212,7 @@ def get_top_model_by_doc_id( top_model = fetch_top_model_by_doc_id(session, document_id, current_user.project_id) storage = get_cloud_storage(session=session, project_id=current_user.project_id) - s3_key = getattr(top_model, "prediction_data_s3_object", None) - prediction_data_file_url = storage.get_signed_url(s3_key) if s3_key else None - - top_model = top_model.model_copy( - update={"prediction_data_file_url": prediction_data_file_url} - ) + top_model = attach_prediction_file_url(top_model, storage) return APIResponse.success_response(top_model) @@ -219,12 +238,8 @@ def get_evals_by_doc_id( evaluations = fetch_eval_by_doc_id(session, document_id, current_user.project_id) storage = get_cloud_storage(session=session, project_id=current_user.project_id) - for ev in evaluations: - s3_key = getattr(ev, "prediction_data_s3_object", None) - prediction_data_file_url = storage.get_signed_url(s3_key) if s3_key else None - - ev = ev.model_copy( - update={"prediction_data_file_url": prediction_data_file_url} - ) + updated_evaluations = [ + attach_prediction_file_url(ev, storage) for ev in evaluations + ] - return APIResponse.success_response(evaluations) + return APIResponse.success_response(updated_evaluations) diff --git a/backend/app/crud/model_evaluation.py b/backend/app/crud/model_evaluation.py index 49f3ddcf..37f69add 100644 --- a/backend/app/crud/model_evaluation.py +++ b/backend/app/crud/model_evaluation.py @@ -128,7 +128,7 @@ def fetch_top_model_by_doc_id( for model_eval in model_evals: if model_eval.score is not None: - mcc = model_eval.score.get("mcc", None) + mcc = model_eval.score.get("mcc_score", None) if mcc is not None and mcc > highest_mcc: highest_mcc = mcc top_model = model_eval diff --git a/backend/app/tests/api/routes/test_fine_tuning.py b/backend/app/tests/api/routes/test_fine_tuning.py index 26270de3..e49a1ee5 100644 --- a/backend/app/tests/api/routes/test_fine_tuning.py +++ b/backend/app/tests/api/routes/test_fine_tuning.py @@ -35,7 +35,7 @@ def test_finetune_from_csv_multiple_split_ratio( user_api_key_header, ): document = get_document(db, "dalgo_sample.json") - print("document = ", document) + for path in ["/tmp/train.jsonl", "/tmp/test.jsonl"]: with open(path, "w") as f: f.write("{}") diff --git a/backend/app/tests/api/routes/test_model_evaluation.py b/backend/app/tests/api/routes/test_model_evaluation.py index 3607dec4..8c4e6727 100644 --- a/backend/app/tests/api/routes/test_model_evaluation.py +++ b/backend/app/tests/api/routes/test_model_evaluation.py @@ -1,57 +1,30 @@ -from unittest.mock import patch, MagicMock +from unittest.mock import patch from app.tests.utils.test_data import ( create_test_finetuning_job_with_extra_fields, create_test_model_evaluation, ) -from app.models import ModelEvaluation - - -@patch("app.api.routes.model_evaluation.ModelEvaluator") -@patch("app.api.routes.model_evaluation.get_cloud_storage") -@patch("app.api.routes.model_evaluation.get_openai_client") -def test_evaluate_model_background_success( - mock_get_client, - mock_get_storage, - mock_evaluator_cls, - client, - db, - user_api_key_header, -): - fine_tuned, _ = create_test_finetuning_job_with_extra_fields(db, [0.5]) - body = {"fine_tuning_ids": [fine_tuned[0].id]} - - evaluator = MagicMock() - evaluator.run.return_value = { - "evaluation_score": 0.87, - "prediction_data_s3_object": "s3://bucket/preds.csv", - } - mock_evaluator_cls.return_value = evaluator - with patch("app.api.routes.model_evaluation.Session") as SessionMock: - SessionMock.return_value.__enter__.return_value = db - SessionMock.return_value.__exit__.return_value = None - resp = client.post( - "/api/v1/model_evaluation/evaluate_models/", - json=body, - headers=user_api_key_header, - ) +@patch("app.api.routes.model_evaluation.run_model_evaluation") +def test_evaluate_model(mock_run_eval, client, db, user_api_key_header): + fine_tuned, _ = create_test_finetuning_job_with_extra_fields(db, [0.5]) + body = {"fine_tuning_ids": [fine_tuned[0].id]} + resp = client.post( + "/api/v1/model_evaluation/evaluate_models/", + json=body, + headers=user_api_key_header, + ) assert resp.status_code == 200, resp.text - payload = resp.json() - - eval_id = payload["data"]["data"][0]["id"] - - ev = db.get(ModelEvaluation, eval_id) - assert ev is not None, "evaluation row should exist after background task" - db.refresh(ev) + j = resp.json() + evals = j["data"]["data"] + assert len(evals) == 1 + assert evals[0]["status"] == "pending" - assert ev.status == "completed" - assert ev.score == 0.87 - assert ev.prediction_data_s3_object == "s3://bucket/preds.csv" - assert not ev.error_message + mock_run_eval.assert_called_once() + assert mock_run_eval.call_args[0][0] == evals[0]["id"] def test_evaluate_model_finetuning_not_found(client, user_api_key_header): @@ -75,7 +48,7 @@ def test_top_model_by_doc(client, db, user_api_key_header): model_eval = model_evals[0] model_eval.score = { - "mcc": 0.85, + "mcc_score": 0.85, } db.flush() @@ -88,7 +61,7 @@ def test_top_model_by_doc(client, db, user_api_key_header): json_data = response.json() assert json_data["data"]["score"] == { - "mcc": 0.85, + "mcc_score": 0.85, } assert json_data["data"]["model_name"] == model_eval.model_name assert json_data["data"]["document_id"] == str(model_eval.document_id) diff --git a/backend/app/tests/crud/test_model_evaluation.py b/backend/app/tests/crud/test_model_evaluation.py index 57b27ded..9a3f7d55 100644 --- a/backend/app/tests/crud/test_model_evaluation.py +++ b/backend/app/tests/crud/test_model_evaluation.py @@ -92,7 +92,7 @@ def test_fetch_eval_by_doc_id_not_found(db: Session): def test_fetch_top_model_by_doc_id_success(db: Session): model_evals = create_test_model_evaluation(db) model_eval = model_evals[0] - model_eval.score = {"mcc": 0.8} + model_eval.score = {"mcc_score": 0.8} db.flush() doc_id = model_eval.document_id From 98eca505c2c18f75267b6c8aaa77a0c782570a83 Mon Sep 17 00:00:00 2001 From: nishika26 Date: Wed, 3 Sep 2025 16:22:25 +0530 Subject: [PATCH 3/5] changing model name to fine tuned model in model eval --- ...d6ed401847_add_fine_tuning_and_model_evaluation_.py | 4 +++- backend/app/api/routes/model_evaluation.py | 4 ++-- backend/app/core/finetune/evaluation.py | 10 +++++----- backend/app/crud/model_evaluation.py | 2 +- backend/app/models/model_evaluation.py | 4 ++-- backend/app/tests/api/routes/test_model_evaluation.py | 2 +- backend/app/tests/crud/test_model_evaluation.py | 4 ++-- backend/app/tests/utils/test_data.py | 2 +- 8 files changed, 17 insertions(+), 15 deletions(-) diff --git a/backend/app/alembic/versions/6ed6ed401847_add_fine_tuning_and_model_evaluation_.py b/backend/app/alembic/versions/6ed6ed401847_add_fine_tuning_and_model_evaluation_.py index 749996ed..dc925366 100644 --- a/backend/app/alembic/versions/6ed6ed401847_add_fine_tuning_and_model_evaluation_.py +++ b/backend/app/alembic/versions/6ed6ed401847_add_fine_tuning_and_model_evaluation_.py @@ -87,7 +87,9 @@ def upgrade(): sa.Column("fine_tuning_id", sa.Integer(), nullable=False), sa.Column("id", sa.Integer(), nullable=False), sa.Column("document_id", sa.Uuid(), nullable=False), - sa.Column("model_name", sqlmodel.sql.sqltypes.AutoString(), nullable=False), + sa.Column( + "fine_tuned_model", sqlmodel.sql.sqltypes.AutoString(), nullable=False + ), sa.Column( "test_data_s3_object", sqlmodel.sql.sqltypes.AutoString(), nullable=False ), diff --git a/backend/app/api/routes/model_evaluation.py b/backend/app/api/routes/model_evaluation.py index 451c77e3..bdb5fcb2 100644 --- a/backend/app/api/routes/model_evaluation.py +++ b/backend/app/api/routes/model_evaluation.py @@ -70,7 +70,7 @@ def run_model_evaluation( ) evaluator = ModelEvaluator( - model_name=model_eval.model_name, + fine_tuned_model=model_eval.fine_tuned_model, test_data_s3_object=model_eval.test_data_s3_object, system_prompt=model_eval.system_prompt, client=client, @@ -178,7 +178,7 @@ def evaluate_models( { "id": ev.id, "fine_tuning_id": ev.fine_tuning_id, - "model_name": getattr(ev, "model_name", None), + "fine_tuned_model": getattr(ev, "fine_tuned_model", None), "document_id": getattr(ev, "document_id", None), "status": ev.status, } diff --git a/backend/app/core/finetune/evaluation.py b/backend/app/core/finetune/evaluation.py index 4c47a12f..527087eb 100644 --- a/backend/app/core/finetune/evaluation.py +++ b/backend/app/core/finetune/evaluation.py @@ -25,13 +25,13 @@ class ModelEvaluator: def __init__( self, - model_name: str, + fine_tuned_model: str, test_data_s3_object: str, storage: AmazonCloudStorage, system_prompt: str, client: OpenAI, ): - self.model_name = model_name + self.fine_tuned_model = fine_tuned_model self.test_data_s3_object = test_data_s3_object self.storage = storage self.system_instruction = system_prompt @@ -41,7 +41,7 @@ def __init__( self.y_true: list[str] = [] self.prompts: list[str] = [] - logger.info(f"ModelEvaluator initialized with model: {model_name}") + logger.info(f"ModelEvaluator initialized with model: {fine_tuned_model}") def load_labels_and_prompts(self) -> None: """ @@ -133,7 +133,7 @@ def generate_predictions(self) -> tuple[list[str], str]: try: response = self.client.responses.create( - model=self.model_name, + model=self.fine_tuned_model, instructions=self.system_instruction, input=prompt, ) @@ -181,7 +181,7 @@ def generate_predictions(self) -> tuple[list[str], str]: ) unique_id = uuid.uuid4().hex - filename = f"predictions_{self.model_name}_{unique_id}.csv" + filename = f"predictions_{self.fine_tuned_model}_{unique_id}.csv" prediction_data_s3_object = DataPreprocessor.upload_csv_to_s3( self.storage, prediction_data, filename ) diff --git a/backend/app/crud/model_evaluation.py b/backend/app/crud/model_evaluation.py index 37f69add..d93bcce5 100644 --- a/backend/app/crud/model_evaluation.py +++ b/backend/app/crud/model_evaluation.py @@ -37,7 +37,7 @@ def create_model_evaluation( "system_prompt": fine_tuning_job.system_prompt, "base_model": fine_tuning_job.base_model, "split_ratio": fine_tuning_job.split_ratio, - "model_name": fine_tuning_job.fine_tuned_model, + "fine_tuned_model": fine_tuning_job.fine_tuned_model, "document_id": fine_tuning_job.document_id, "test_data_s3_object": fine_tuning_job.test_data_s3_object, "project_id": project_id, diff --git a/backend/app/models/model_evaluation.py b/backend/app/models/model_evaluation.py index 7a10c2ae..900b57b6 100644 --- a/backend/app/models/model_evaluation.py +++ b/backend/app/models/model_evaluation.py @@ -46,7 +46,7 @@ class ModelEvaluation(ModelEvaluationBase, table=True): foreign_key="document.id", nullable=False, ) - model_name: str = Field(description="fine tuned model name from OpenAI") + fine_tuned_model: str = Field(description="fine tuned model name from OpenAI") test_data_s3_object: str = Field( description="S3 URI of the testing data stored in S3" ) @@ -97,7 +97,7 @@ class ModelEvaluationPublic(ModelEvaluationBase): id: int document_id: UUID - model_name: str + fine_tuned_model: str split_ratio: float base_model: str prediction_data_file_url: str | None = None diff --git a/backend/app/tests/api/routes/test_model_evaluation.py b/backend/app/tests/api/routes/test_model_evaluation.py index 8c4e6727..605775f6 100644 --- a/backend/app/tests/api/routes/test_model_evaluation.py +++ b/backend/app/tests/api/routes/test_model_evaluation.py @@ -63,7 +63,7 @@ def test_top_model_by_doc(client, db, user_api_key_header): assert json_data["data"]["score"] == { "mcc_score": 0.85, } - assert json_data["data"]["model_name"] == model_eval.model_name + assert json_data["data"]["fine_tuned_model"] == model_eval.fine_tuned_model assert json_data["data"]["document_id"] == str(model_eval.document_id) assert json_data["data"]["id"] == model_eval.id diff --git a/backend/app/tests/crud/test_model_evaluation.py b/backend/app/tests/crud/test_model_evaluation.py index 9a3f7d55..e0c6dba2 100644 --- a/backend/app/tests/crud/test_model_evaluation.py +++ b/backend/app/tests/crud/test_model_evaluation.py @@ -35,7 +35,7 @@ def test_create_model_evaluation(db: Session): fine_tuning_id=fine_tune.id, system_prompt=fine_tune.system_prompt, base_model=fine_tune.base_model, - model_name=fine_tune.fine_tuned_model, + fine_tuned_model=fine_tune.fine_tuned_model, document_id=fine_tune.document_id, test_data_s3_object=fine_tune.test_data_s3_object, status="pending", @@ -51,7 +51,7 @@ def test_create_model_evaluation(db: Session): assert created_eval.id is not None assert created_eval.status == "pending" assert created_eval.document_id == fine_tune.document_id - assert created_eval.model_name == fine_tune.fine_tuned_model + assert created_eval.fine_tuned_model == fine_tune.fine_tuned_model assert created_eval.test_data_s3_object == fine_tune.test_data_s3_object diff --git a/backend/app/tests/utils/test_data.py b/backend/app/tests/utils/test_data.py index bd91cfbf..cc9c840d 100644 --- a/backend/app/tests/utils/test_data.py +++ b/backend/app/tests/utils/test_data.py @@ -184,7 +184,7 @@ def create_test_model_evaluation(db) -> list[ModelEvaluation]: fine_tuning_id=fine_tune.id, system_prompt=fine_tune.system_prompt, base_model=fine_tune.base_model, - model_name=fine_tune.fine_tuned_model, + fine_tuned_model=fine_tune.fine_tuned_model, document_id=fine_tune.document_id, test_data_s3_object=fine_tune.test_data_s3_object, ) From 836440180c9e27c3127ccf93f384a81001c12850 Mon Sep 17 00:00:00 2001 From: nishika26 Date: Thu, 4 Sep 2025 00:18:48 +0530 Subject: [PATCH 4/5] better variable names --- backend/app/api/routes/fine_tuning.py | 3 ++- backend/app/api/routes/model_evaluation.py | 22 ++++++++++++---------- 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/backend/app/api/routes/fine_tuning.py b/backend/app/api/routes/fine_tuning.py index a0bd738f..23d47a00 100644 --- a/backend/app/api/routes/fine_tuning.py +++ b/backend/app/api/routes/fine_tuning.py @@ -159,6 +159,7 @@ def process_fine_tuning_job( ) except Exception as e: + error_msg = str(e) logger.error( f"[process_fine_tuning_job] Background job failure: {e} | " f"job_id={job_id}, project_id={project_id}|" @@ -169,7 +170,7 @@ def process_fine_tuning_job( update=FineTuningUpdate( status=FineTuningStatus.failed, error_message="Error while processing the background job : " - + str(e), + + error_msg, ), ) diff --git a/backend/app/api/routes/model_evaluation.py b/backend/app/api/routes/model_evaluation.py index bdb5fcb2..b38172dd 100644 --- a/backend/app/api/routes/model_evaluation.py +++ b/backend/app/api/routes/model_evaluation.py @@ -95,6 +95,7 @@ def run_model_evaluation( ) except Exception as e: + error_msg = str(e) logger.error( f"[run_model_evaluation] Failed | eval_id={eval_id}, project_id={current_user.project_id}: {e}" ) @@ -105,7 +106,8 @@ def run_model_evaluation( project_id=current_user.project_id, update=ModelEvaluationUpdate( status=ModelEvaluationStatus.failed, - error_message="failed during background job processing:" + str(e), + error_message="failed during background job processing:" + + error_msg, ), ) @@ -141,20 +143,20 @@ def evaluate_models( ) raise HTTPException(status_code=400, detail="No fine-tuned job IDs provided") - evals: list[ModelEvaluationPublic] = [] + evaluations: list[ModelEvaluationPublic] = [] for job_id in request.fine_tuning_ids: fine_tuning_job = fetch_by_id(session, job_id, current_user.project_id) - active_evals = fetch_active_model_evals( + active_evaluations = fetch_active_model_evals( session, job_id, current_user.project_id ) - if active_evals: + if active_evaluations: logger.info( f"[evaluate_model] Skipping creation for {job_id}. Active evaluation exists, project_id:{current_user.project_id}" ) - evals.extend( - ModelEvaluationPublic.model_validate(ev) for ev in active_evals + evaluations.extend( + ModelEvaluationPublic.model_validate(ev) for ev in active_evaluations ) continue @@ -166,7 +168,7 @@ def evaluate_models( status=ModelEvaluationStatus.pending, ) - evals.append(ModelEvaluationPublic.model_validate(model_eval)) + evaluations.append(ModelEvaluationPublic.model_validate(model_eval)) logger.info( f"[evaluate_model] Created evaluation for fine_tuning_id {job_id} with eval ID={model_eval.id}, project_id:{current_user.project_id}" @@ -182,7 +184,7 @@ def evaluate_models( "document_id": getattr(ev, "document_id", None), "status": ev.status, } - for ev in evals + for ev in evaluations ] return APIResponse.success_response( @@ -222,7 +224,7 @@ def get_top_model_by_doc_id( response_model=APIResponse[list[ModelEvaluationPublic]], response_model_exclude_none=True, ) -def get_evals_by_doc_id( +def get_evaluations_by_doc_id( document_id: UUID, session: SessionDep, current_user: CurrentUserOrgProject, @@ -231,7 +233,7 @@ def get_evals_by_doc_id( Return all model evaluations for the given document_id within the current project. """ logger.info( - f"[get_evals_by_doc_id] Fetching evaluations for document_id={document_id}, " + f"[get_evaluations_by_doc_id] Fetching evaluations for document_id={document_id}, " f"project_id={current_user.project_id}" ) From bd7c67662704c41b839d6da51251ec148ea38f79 Mon Sep 17 00:00:00 2001 From: nishika26 Date: Thu, 4 Sep 2025 13:19:06 +0530 Subject: [PATCH 5/5] error handling in get cloud storage and document not found error handling --- backend/app/core/cloud/storage.py | 9 ++++++++- backend/app/crud/fine_tuning.py | 6 ++++++ backend/app/tests/api/routes/test_fine_tuning.py | 2 +- backend/app/tests/crud/test_fine_tuning.py | 4 ++-- backend/app/tests/utils/test_data.py | 2 +- 5 files changed, 18 insertions(+), 5 deletions(-) diff --git a/backend/app/core/cloud/storage.py b/backend/app/core/cloud/storage.py index a3248a01..95c7f0dd 100644 --- a/backend/app/core/cloud/storage.py +++ b/backend/app/core/cloud/storage.py @@ -269,4 +269,11 @@ def get_cloud_storage(session: Session, project_id: int) -> CloudStorage: storage_path = project.storage_path - return AmazonCloudStorage(project_id=project_id, storage_path=storage_path) + try: + return AmazonCloudStorage(project_id=project_id, storage_path=storage_path) + except Exception as err: + logger.error( + f"[get_cloud_storage] Failed to initialize storage for project_id={project_id}: {err}", + exc_info=True, + ) + raise diff --git a/backend/app/crud/fine_tuning.py b/backend/app/crud/fine_tuning.py index 1890665b..2b80e968 100644 --- a/backend/app/crud/fine_tuning.py +++ b/backend/app/crud/fine_tuning.py @@ -12,6 +12,7 @@ FineTuningUpdate, FineTuningStatus, ) +from app.crud import DocumentCrud logger = logging.getLogger(__name__) @@ -38,6 +39,11 @@ def create_fine_tuning_job( ) return existing, False + document_crud = DocumentCrud( + session, project_id + ) # to check if the given document is present in the document table or not + document = document_crud.read_one(request.document_id) + fine_tune_data = request.model_dump(exclude_unset=True) base_data = { **fine_tune_data, diff --git a/backend/app/tests/api/routes/test_fine_tuning.py b/backend/app/tests/api/routes/test_fine_tuning.py index e49a1ee5..5582b73f 100644 --- a/backend/app/tests/api/routes/test_fine_tuning.py +++ b/backend/app/tests/api/routes/test_fine_tuning.py @@ -160,7 +160,7 @@ def test_retrieve_fine_tuning_job_failed( class TestFetchJob: def test_fetch_jobs_document(self, client, db, user_api_key_header): jobs, _ = create_test_fine_tuning_jobs(db, [0.3, 0.4]) - document = get_document(db) + document = get_document(db, "dalgo_sample.json") response = client.get( f"/api/v1/fine_tuning/{document.id}", headers=user_api_key_header diff --git a/backend/app/tests/crud/test_fine_tuning.py b/backend/app/tests/crud/test_fine_tuning.py index 387f6d55..d71a000b 100644 --- a/backend/app/tests/crud/test_fine_tuning.py +++ b/backend/app/tests/crud/test_fine_tuning.py @@ -17,7 +17,7 @@ def test_create_fine_tuning_job(db: Session): project = get_project(db, "Dalgo") - document = get_document(db) + document = get_document(db, "dalgo_sample.json") job_request = FineTuningJobCreate( document_id=document.id, @@ -100,7 +100,7 @@ def test_update_finetune_job(db: Session): def test_fetch_active_jobs_by_document_id(db: Session): project = get_project(db, "Dalgo") - document = get_document(db) + document = get_document(db, "dalgo_sample.json") job_request = FineTuningJobCreate( document_id=document.id, diff --git a/backend/app/tests/utils/test_data.py b/backend/app/tests/utils/test_data.py index cc9c840d..616904f2 100644 --- a/backend/app/tests/utils/test_data.py +++ b/backend/app/tests/utils/test_data.py @@ -135,7 +135,7 @@ def create_test_fine_tuning_jobs( ratios: list[float], ) -> tuple[list[Fine_Tuning], bool]: project = get_project(db, "Dalgo") - document = get_document(db) + document = get_document(db, "dalgo_sample.json") jobs = [] any_created = False