From c939d3a569a0b14688dd517bbb42f58b33a9b821 Mon Sep 17 00:00:00 2001 From: AkhileshNegi Date: Wed, 17 Sep 2025 18:51:20 +0530 Subject: [PATCH 01/18] experimenting claude --- CLAUDE.md | 105 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 105 insertions(+) create mode 100644 CLAUDE.md diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 00000000..f8193593 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,105 @@ +# CLAUDE.md + +This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. + +## Project Overview + +This is an AI Platform named Kaapi built with FastAPI (backend) and PostgreSQL (database), containerized with Docker. The platform provides AI capabilities including OpenAI assistants, fine-tuning, document processing, and collection management. + +## Key Commands + +### Development + +```bash +# Start development environment with auto-reload +source .venv/bin/activate +fastapi run --reload app/main.py + +# Run backend tests +uv run bash scripts/tests-start.sh + +# Seed data +uv run python -m app.seed_data.seed_data + +# Run pre-commit +uv run pre-commit run --all-files + +# Activate virtual environment +source .venv/bin/activate + +# Run linting and type checking +cd backend && bash scripts/lint.sh + +# Generate new Migration +alembic revision --autogenerate -m 'Add new meta' +``` + +### Testing + +```bash +# Run backend tests +uv run bash scripts/tests-start.sh +``` + +## Architecture + +### Backend Structure + +The backend follows a layered architecture: + +- **API Layer** (`backend/app/api/`): FastAPI routes organized by domain + - Authentication (`login.py`) + - Core resources: `users.py`, `organizations.py`, `projects.py` + - AI features: `assistants.py`, `fine_tuning.py`, `openai_conversation.py` + - Document management: `documents.py`, `collections.py`, `doc_transformation_job.py` + +- **Models** (`backend/app/models/`): SQLModel entities representing database tables + - User system: User, Organization, Project, ProjectUser + - AI components: Assistant, Thread, Message, FineTuning + - Document system: Document, Collection, DocumentCollection, DocTransformationJob + +- **CRUD Operations** (`backend/app/crud/`): Database operations for each model + +- **Core Services** (`backend/app/core/`): + - `providers.py`: OpenAI client management + - `finetune/`: Fine-tuning pipeline (preprocessing, evaluation) + - `doctransform/`: Document transformation services + - `cloud/storage.py`: S3 storage integration + - `langfuse/`: Observability and tracing + +### Database + +PostgreSQL with Alembic migrations. Key relationships: +- Organizations contain Projects +- Projects have Users (many-to-many via ProjectUser) +- Projects contain Collections and Documents +- Documents can belong to Collections (many-to-many) +- Projects have Assistants, Threads, and FineTuning jobs + +### Authentication & Security + +- JWT-based authentication +- API key support for programmatic access +- Role-based access control (User, Admin, Super Admin) +- Organization and project-level permissions + +## Environment Configuration + +Critical environment variables: +- `SECRET_KEY`: JWT signing key +- `POSTGRES_*`: Database connection +- `LOCAL_CREDENTIALS_ORG_OPENAI_API_KEY`: OpenAI API key +- `AWS_S3_BUCKET_PREFIX`: S3 storage configuration +- `LANGFUSE_*`: Observability configuration + +## Testing Strategy + +- Unit tests in `backend/app/tests/` +- Test fixtures use factory pattern +- Mock external services (OpenAI, S3) using `moto` and `openai_responses` +- Coverage reports generated automatically + +## Code Standards + +- Python 3.11+ with type hints +- Pre-commit hooks configured for consistency From 1482a0c141acc84f88b21bf6b9392f0be765893f Mon Sep 17 00:00:00 2001 From: AkhileshNegi Date: Thu, 18 Sep 2025 16:03:31 +0530 Subject: [PATCH 02/18] first stab with claude --- backend/app/api/routes/fine_tuning.py | 66 ++++++++++++++++++++++++--- 1 file changed, 60 insertions(+), 6 deletions(-) diff --git a/backend/app/api/routes/fine_tuning.py b/backend/app/api/routes/fine_tuning.py index 9d053393..c04a23a6 100644 --- a/backend/app/api/routes/fine_tuning.py +++ b/backend/app/api/routes/fine_tuning.py @@ -1,17 +1,19 @@ from typing import Optional import logging import time -from uuid import UUID +from uuid import UUID, uuid4 +from pathlib import Path import openai from sqlmodel import Session -from fastapi import APIRouter, HTTPException, BackgroundTasks +from fastapi import APIRouter, HTTPException, BackgroundTasks, File, Form, UploadFile from app.models import ( FineTuningJobCreate, FineTuningJobPublic, FineTuningUpdate, FineTuningStatus, + Document, ) from app.core.cloud import get_cloud_storage from app.crud.document import DocumentCrud @@ -179,12 +181,40 @@ def process_fine_tuning_job( description=load_description("fine_tuning/create.md"), response_model=APIResponse, ) -def fine_tune_from_CSV( +async def fine_tune_from_CSV( session: SessionDep, current_user: CurrentUserOrgProject, - request: FineTuningJobCreate, background_tasks: BackgroundTasks, + file: UploadFile = File(..., description="CSV file to use for fine-tuning"), + base_model: str = Form( + ..., description="Base model for fine-tuning (e.g., gpt-3.5-turbo)" + ), + split_ratio: str = Form( + ..., description="Comma-separated split ratios (e.g., '0.8' or '0.7,0.8,0.9')" + ), + system_prompt: str = Form(..., description="System prompt for the fine-tuning job"), ): + # Validate and parse split ratios + try: + split_ratios = [float(r.strip()) for r in split_ratio.split(",")] + for ratio in split_ratios: + if not (0 < ratio < 1): + raise ValueError( + f"Invalid split_ratio: {ratio}. Must be between 0 and 1." + ) + except ValueError as e: + raise HTTPException(status_code=400, detail=str(e)) + + # Validate system prompt + if not system_prompt.strip(): + raise HTTPException( + status_code=400, detail="System prompt must be a non-empty string" + ) + + # Validate file is CSV + if not file.filename.lower().endswith(".csv"): + raise HTTPException(status_code=400, detail="File must be a CSV file") + client = get_openai_client( # Used here only to validate the user's OpenAI key; # the actual client is re-initialized separately inside the background task session, @@ -192,9 +222,31 @@ def fine_tune_from_CSV( current_user.project_id, ) + # Upload the file to storage and create document + storage = get_cloud_storage(session=session, project_id=current_user.project_id) + document_id = uuid4() + object_store_url = storage.put(file, Path(str(document_id))) + + # Create document in database + document_crud = DocumentCrud(session, current_user.project_id) + document = Document( + id=document_id, + fname=file.filename, + object_store_url=str(object_store_url), + ) + created_document = document_crud.update(document) + + # Create FineTuningJobCreate request object + request = FineTuningJobCreate( + document_id=created_document.id, + base_model=base_model, + split_ratio=split_ratios, + system_prompt=system_prompt.strip(), + ) + results = [] - for ratio in request.split_ratio: + for ratio in split_ratios: job, created = create_fine_tuning_job( session=session, request=request, @@ -237,7 +289,9 @@ def fine_tune_from_CSV( else f"Started {created_count} job(s); {total - created_count} active fine-tuning job(s) already exists." ) - return APIResponse.success_response({"message": message, "jobs": job_infos}) + return APIResponse.success_response( + {"message": message, "document_id": str(created_document.id), "jobs": job_infos} + ) @router.get( From c3364d5b0dc81ef7e4c1aa1fe91346f2e1de3cd5 Mon Sep 17 00:00:00 2001 From: AkhileshNegi Date: Thu, 18 Sep 2025 16:03:41 +0530 Subject: [PATCH 03/18] first stab with claude --- CLAUDE.md | 3 --- 1 file changed, 3 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index f8193593..2dc7be8a 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -27,9 +27,6 @@ uv run pre-commit run --all-files # Activate virtual environment source .venv/bin/activate -# Run linting and type checking -cd backend && bash scripts/lint.sh - # Generate new Migration alembic revision --autogenerate -m 'Add new meta' ``` From ebcd9a0ede5852bee87bb2f9f22adaa770a60826 Mon Sep 17 00:00:00 2001 From: AkhileshNegi Date: Mon, 22 Sep 2025 22:53:16 +0530 Subject: [PATCH 04/18] adding additional logic to call evaluation directly if status is changed from in progress to completed --- backend/app/api/routes/fine_tuning.py | 68 +++++++++++++++++++++---- backend/app/api/routes/responses.py | 21 +------- backend/app/api/routes/threads.py | 9 +--- backend/app/core/finetune/evaluation.py | 2 +- backend/app/utils.py | 8 +++ 5 files changed, 70 insertions(+), 38 deletions(-) diff --git a/backend/app/api/routes/fine_tuning.py b/backend/app/api/routes/fine_tuning.py index c04a23a6..e2c3b5fb 100644 --- a/backend/app/api/routes/fine_tuning.py +++ b/backend/app/api/routes/fine_tuning.py @@ -14,19 +14,30 @@ FineTuningUpdate, FineTuningStatus, Document, + ModelEvaluationBase, + ModelEvaluationStatus, ) from app.core.cloud import get_cloud_storage from app.crud.document import DocumentCrud -from app.utils import get_openai_client, APIResponse, mask_string, load_description +from app.utils import ( + get_openai_client, + APIResponse, + mask_string, + load_description, + handle_openai_error, +) from app.crud import ( create_fine_tuning_job, fetch_by_id, update_finetune_job, fetch_by_document_id, + create_model_evaluation, + fetch_active_model_evals, ) from app.core.db import engine from app.api.deps import CurrentUserOrgProject, SessionDep from app.core.finetune.preprocessing import DataPreprocessor +from app.api.routes.model_evaluation import run_model_evaluation logger = logging.getLogger(__name__) @@ -43,13 +54,6 @@ } -def handle_openai_error(e: openai.OpenAIError) -> str: - """Extract error message from OpenAI error.""" - if isinstance(e.body, dict) and "message" in e.body: - return e.body["message"] - return str(e) - - def process_fine_tuning_job( job_id: int, ratio: float, @@ -300,7 +304,10 @@ async def fine_tune_from_CSV( response_model=APIResponse[FineTuningJobPublic], ) def refresh_fine_tune_status( - fine_tuning_id: int, session: SessionDep, current_user: CurrentUserOrgProject + fine_tuning_id: int, + background_tasks: BackgroundTasks, + session: SessionDep, + current_user: CurrentUserOrgProject, ): project_id = current_user.project_id job = fetch_by_id(session, fine_tuning_id, project_id) @@ -336,6 +343,12 @@ def refresh_fine_tune_status( error_message=openai_error_msg, ) + # Check if status is changing from running to completed + is_newly_completed = ( + job.status == FineTuningStatus.running + and update_payload.status == FineTuningStatus.completed + ) + if ( job.status != update_payload.status or job.fine_tuned_model != update_payload.fine_tuned_model @@ -343,6 +356,43 @@ def refresh_fine_tune_status( ): job = update_finetune_job(session=session, job=job, update=update_payload) + # If the job just completed, automatically trigger evaluation + if is_newly_completed: + logger.info( + f"[refresh_fine_tune_status] Fine-tuning job completed, triggering evaluation | " + f"fine_tuning_id={fine_tuning_id}, project_id={project_id}" + ) + + # Check if there's already an active evaluation for this job + active_evaluations = fetch_active_model_evals( + session, fine_tuning_id, project_id + ) + + if not active_evaluations: + # Create a new evaluation + model_eval = create_model_evaluation( + session=session, + request=ModelEvaluationBase(fine_tuning_id=fine_tuning_id), + project_id=project_id, + organization_id=current_user.organization_id, + status=ModelEvaluationStatus.pending, + ) + + # Queue the evaluation task + background_tasks.add_task( + run_model_evaluation, model_eval.id, current_user + ) + + logger.info( + f"[refresh_fine_tune_status] Created and queued evaluation | " + f"eval_id={model_eval.id}, fine_tuning_id={fine_tuning_id}, project_id={project_id}" + ) + else: + logger.info( + f"[refresh_fine_tune_status] Skipping evaluation creation - active evaluation exists | " + f"fine_tuning_id={fine_tuning_id}, project_id={project_id}" + ) + job = job.model_copy( update={ "train_data_file_url": storage.get_signed_url(job.train_data_s3_object) diff --git a/backend/app/api/routes/responses.py b/backend/app/api/routes/responses.py index 94e5f19d..d4e2389c 100644 --- a/backend/app/api/routes/responses.py +++ b/backend/app/api/routes/responses.py @@ -18,32 +18,13 @@ get_conversation_by_ancestor_id, ) from app.models import UserProjectOrg, OpenAIConversationCreate, OpenAIConversation -from app.utils import APIResponse, mask_string +from app.utils import APIResponse, mask_string, handle_openai_error from app.core.langfuse.langfuse import LangfuseTracer logger = logging.getLogger(__name__) router = APIRouter(tags=["responses"]) -def handle_openai_error(e: openai.OpenAIError) -> str: - """Extract error message from OpenAI error.""" - # Try to get error message from different possible attributes - if hasattr(e, "body") and isinstance(e.body, dict) and "message" in e.body: - return e.body["message"] - elif hasattr(e, "message"): - return e.message - elif hasattr(e, "response") and hasattr(e.response, "json"): - try: - error_data = e.response.json() - if isinstance(error_data, dict) and "error" in error_data: - error_info = error_data["error"] - if isinstance(error_info, dict) and "message" in error_info: - return error_info["message"] - except: - pass - return str(e) - - class ResponsesAPIRequest(BaseModel): assistant_id: str question: str diff --git a/backend/app/api/routes/threads.py b/backend/app/api/routes/threads.py index 95630bfb..be7e0578 100644 --- a/backend/app/api/routes/threads.py +++ b/backend/app/api/routes/threads.py @@ -13,7 +13,7 @@ from app.core import logging, settings from app.models import UserOrganization, OpenAIThreadCreate, UserProjectOrg from app.crud import upsert_thread_result, get_thread_result -from app.utils import APIResponse, mask_string +from app.utils import APIResponse, mask_string, handle_openai_error from app.crud.credentials import get_provider_credential from app.core.util import configure_openai from app.core.langfuse.langfuse import LangfuseTracer @@ -49,13 +49,6 @@ def send_callback(callback_url: str, data: dict): return False -def handle_openai_error(e: openai.OpenAIError) -> str: - """Extract error message from OpenAI error.""" - if isinstance(e.body, dict) and "message" in e.body: - return e.body["message"] - return str(e) - - def validate_thread(client: OpenAI, thread_id: str) -> tuple[bool, str]: """Validate if a thread exists and has no active runs.""" if not thread_id: diff --git a/backend/app/core/finetune/evaluation.py b/backend/app/core/finetune/evaluation.py index 527087eb..4a85e85c 100644 --- a/backend/app/core/finetune/evaluation.py +++ b/backend/app/core/finetune/evaluation.py @@ -11,7 +11,7 @@ matthews_corrcoef, ) from app.core.cloud import AmazonCloudStorage -from app.api.routes.fine_tuning import handle_openai_error +from app.utils import handle_openai_error from app.core.finetune.preprocessing import DataPreprocessor diff --git a/backend/app/utils.py b/backend/app/utils.py index 1c03839a..8f96fd95 100644 --- a/backend/app/utils.py +++ b/backend/app/utils.py @@ -7,6 +7,7 @@ import jwt import emails +import openai from jinja2 import Template from jwt.exceptions import InvalidTokenError from fastapi import HTTPException @@ -48,6 +49,13 @@ def failure_response( return cls(success=False, data=None, error=error_message, metadata=metadata) +def handle_openai_error(e: openai.OpenAIError) -> str: + """Extract error message from OpenAI error.""" + if hasattr(e, "body") and isinstance(e.body, dict) and "message" in e.body: + return e.body["message"] + return str(e) + + @dataclass class EmailData: html_content: str From 00b415f5bacef5707928115a230183b39be3beb1 Mon Sep 17 00:00:00 2001 From: AkhileshNegi Date: Tue, 23 Sep 2025 10:12:04 +0530 Subject: [PATCH 05/18] updating testcases --- .../app/tests/api/routes/test_fine_tuning.py | 121 +++++++++++------- 1 file changed, 73 insertions(+), 48 deletions(-) diff --git a/backend/app/tests/api/routes/test_fine_tuning.py b/backend/app/tests/api/routes/test_fine_tuning.py index 5582b73f..9162044a 100644 --- a/backend/app/tests/api/routes/test_fine_tuning.py +++ b/backend/app/tests/api/routes/test_fine_tuning.py @@ -1,10 +1,30 @@ +import os +import io import pytest - +from moto import mock_aws from unittest.mock import patch, MagicMock +import boto3 from app.tests.utils.test_data import create_test_fine_tuning_jobs from app.tests.utils.utils import get_document -from app.models import Fine_Tuning +from app.models import ( + Fine_Tuning, + FineTuningStatus, + ModelEvaluation, + ModelEvaluationStatus, +) +from app.core.config import settings + + +@pytest.fixture(scope="function") +def aws_credentials(): + """Set up AWS credentials for moto.""" + os.environ["AWS_ACCESS_KEY_ID"] = "testing" + os.environ["AWS_SECRET_ACCESS_KEY"] = "testing" + os.environ["AWS_SECURITY_TOKEN"] = "testing" + os.environ["AWS_SESSION_TOKEN"] = "testing" + os.environ["AWS_DEFAULT_REGION"] = "us-east-1" + os.environ["AWS_S3_BUCKET_PREFIX"] = "test-bucket" def create_file_mock(file_type): @@ -22,73 +42,78 @@ def _side_effect(file=None, purpose=None): return _side_effect -@pytest.mark.usefixtures("client", "db", "user_api_key_header") -@patch("app.api.routes.fine_tuning.DataPreprocessor") -@patch("app.api.routes.fine_tuning.get_openai_client") +@pytest.mark.usefixtures("client", "db", "user_api_key_header", "aws_credentials") class TestCreateFineTuningJobAPI: + @mock_aws def test_finetune_from_csv_multiple_split_ratio( self, - mock_get_openai_client, - mock_preprocessor_cls, client, db, user_api_key_header, ): - document = get_document(db, "dalgo_sample.json") + # Setup S3 bucket for moto + s3 = boto3.client("s3", region_name="us-east-1") + s3.create_bucket(Bucket="test-bucket") + # Create a test CSV file content + csv_content = "prompt,label\ntest1,label1\ntest2,label2\ntest3,label3" + + # Setup test files for preprocessing for path in ["/tmp/train.jsonl", "/tmp/test.jsonl"]: with open(path, "w") as f: - f.write("{}") - - mock_preprocessor = MagicMock() - mock_preprocessor.process.return_value = { - "train_jsonl_temp_filepath": "/tmp/train.jsonl", - "train_csv_s3_object": "s3://bucket/train.csv", - "test_csv_s3_object": "s3://bucket/test.csv", - } - mock_preprocessor.cleanup = MagicMock() - mock_preprocessor_cls.return_value = mock_preprocessor - - mock_openai = MagicMock() - mock_openai.files.create.side_effect = create_file_mock("fine-tune") - mock_openai.fine_tuning.jobs.create.side_effect = [ - MagicMock(id=f"ft_mock_job_{i}", status="running") for i in range(1, 4) - ] - mock_get_openai_client.return_value = mock_openai - - body = { - "document_id": str(document.id), - "base_model": "gpt-4", - "split_ratio": [0.5, 0.7, 0.9], - "system_prompt": "you are a model able to classify", - } - - with patch("app.api.routes.fine_tuning.Session") as SessionMock: - SessionMock.return_value.__enter__.return_value = db - SessionMock.return_value.__exit__.return_value = None - - response = client.post( - "/api/v1/fine_tuning/fine_tune", - json=body, - headers=user_api_key_header, - ) + f.write('{"prompt": "test", "completion": "label"}') + + with patch( + "app.api.routes.fine_tuning.get_cloud_storage" + ) as mock_get_cloud_storage: + with patch( + "app.api.routes.fine_tuning.get_openai_client" + ) as mock_get_openai_client: + with patch( + "app.api.routes.fine_tuning.process_fine_tuning_job" + ) as mock_process_job: + # Mock cloud storage + mock_storage = MagicMock() + mock_storage.put.return_value = "s3://test-bucket/test.csv" + mock_get_cloud_storage.return_value = mock_storage + + # Mock OpenAI client (for validation only) + mock_openai = MagicMock() + mock_get_openai_client.return_value = mock_openai + + # Create file upload data + csv_file = io.BytesIO(csv_content.encode()) + response = client.post( + "/api/v1/fine_tuning/fine_tune", + files={"file": ("test.csv", csv_file, "text/csv")}, + data={ + "base_model": "gpt-4", + "split_ratio": "0.5,0.7,0.9", + "system_prompt": "you are a model able to classify", + }, + headers=user_api_key_header, + ) assert response.status_code == 200 json_data = response.json() assert json_data["success"] is True assert json_data["data"]["message"] == "Fine-tuning job(s) started." assert json_data["metadata"] is None + assert "document_id" in json_data["data"] + assert "jobs" in json_data["data"] + assert len(json_data["data"]["jobs"]) == 3 + + # Verify that the background task was called for each split ratio + assert mock_process_job.call_count == 3 jobs = db.query(Fine_Tuning).all() assert len(jobs) == 3 - for i, job in enumerate(jobs, start=1): + for job in jobs: db.refresh(job) - assert job.status == "running" - assert job.provider_job_id == f"ft_mock_job_{i}" - assert job.training_file_id is not None - assert job.train_data_s3_object == "s3://bucket/train.csv" - assert job.test_data_s3_object == "s3://bucket/test.csv" + assert ( + job.status == "pending" + ) # Since background processing is mocked, status remains pending assert job.split_ratio in [0.5, 0.7, 0.9] From a2ef0050f3018fe0d8fa58f3f27d2cb2bdf24da3 Mon Sep 17 00:00:00 2001 From: AkhileshNegi Date: Tue, 23 Sep 2025 11:54:43 +0530 Subject: [PATCH 06/18] added more testcases --- .../app/tests/api/routes/test_fine_tuning.py | 264 ++++++++++++++++++ 1 file changed, 264 insertions(+) diff --git a/backend/app/tests/api/routes/test_fine_tuning.py b/backend/app/tests/api/routes/test_fine_tuning.py index 9162044a..23a96157 100644 --- a/backend/app/tests/api/routes/test_fine_tuning.py +++ b/backend/app/tests/api/routes/test_fine_tuning.py @@ -203,3 +203,267 @@ def test_fetch_jobs_document(self, client, db, user_api_key_header): for job in json_data["data"]: assert job["document_id"] == str(document.id) assert job["status"] == "pending" + + +@pytest.mark.usefixtures("client", "db", "user_api_key_header") +@patch("app.api.routes.fine_tuning.get_openai_client") +@patch("app.api.routes.fine_tuning.get_cloud_storage") +@patch("app.api.routes.fine_tuning.run_model_evaluation") +class TestAutoEvaluationTrigger: + """Test cases for automatic evaluation triggering when fine-tuning completes.""" + + def test_successful_auto_evaluation_trigger( + self, + mock_run_model_evaluation, + mock_get_cloud_storage, + mock_get_openai_client, + client, + db, + user_api_key_header, + ): + """Test that evaluation is automatically triggered when job status changes from running to completed.""" + # Setup: Create a fine-tuning job with running status + jobs, _ = create_test_fine_tuning_jobs(db, [0.7]) + job = jobs[0] + job.status = FineTuningStatus.running + job.provider_job_id = "ft_mock_job_123" + # Add required fields for model evaluation + job.test_data_s3_object = "test-bucket/test-data.csv" + job.system_prompt = "You are a helpful assistant" + db.add(job) + db.commit() + db.refresh(job) + + # Mock cloud storage + mock_storage = MagicMock() + mock_storage.get_signed_url.return_value = ( + "https://test.s3.amazonaws.com/signed-url" + ) + mock_get_cloud_storage.return_value = mock_storage + + # Mock OpenAI response indicating job completion + mock_openai_job = MagicMock( + status="succeeded", + fine_tuned_model="ft:gpt-4:custom-model:12345", + error=None, + ) + mock_openai = MagicMock() + mock_openai.fine_tuning.jobs.retrieve.return_value = mock_openai_job + mock_get_openai_client.return_value = mock_openai + + # Action: Refresh the fine-tuning job status + response = client.get( + f"/api/v1/fine_tuning/{job.id}/refresh", headers=user_api_key_header + ) + + # Verify response + assert response.status_code == 200 + json_data = response.json() + assert json_data["data"]["status"] == "completed" + assert json_data["data"]["fine_tuned_model"] == "ft:gpt-4:custom-model:12345" + + # Verify that model evaluation was triggered + mock_run_model_evaluation.assert_called_once() + call_args = mock_run_model_evaluation.call_args[0] + eval_id = call_args[0] + + # Verify evaluation was created in database + model_eval = ( + db.query(ModelEvaluation).filter(ModelEvaluation.id == eval_id).first() + ) + assert model_eval is not None + assert model_eval.fine_tuning_id == job.id + assert model_eval.status == ModelEvaluationStatus.pending + + def test_skip_evaluation_when_already_exists( + self, + mock_run_model_evaluation, + mock_get_cloud_storage, + mock_get_openai_client, + client, + db, + user_api_key_header, + ): + """Test that evaluation is skipped when an active evaluation already exists.""" + # Setup: Create a fine-tuning job with running status + jobs, _ = create_test_fine_tuning_jobs(db, [0.7]) + job = jobs[0] + job.status = FineTuningStatus.running + job.provider_job_id = "ft_mock_job_123" + # Add required fields for model evaluation + job.test_data_s3_object = "test-bucket/test-data.csv" + job.system_prompt = "You are a helpful assistant" + db.add(job) + db.commit() + + # Create an existing active evaluation + existing_eval = ModelEvaluation( + fine_tuning_id=job.id, + status=ModelEvaluationStatus.pending, + project_id=job.project_id, + organization_id=job.organization_id, + document_id=job.document_id, + fine_tuned_model="ft:gpt-4:test-model:123", + test_data_s3_object="test-bucket/test-data.csv", + base_model="gpt-4", + split_ratio=0.7, + system_prompt="You are a helpful assistant", + ) + db.add(existing_eval) + db.commit() + + # Mock cloud storage + mock_storage = MagicMock() + mock_storage.get_signed_url.return_value = ( + "https://test.s3.amazonaws.com/signed-url" + ) + mock_get_cloud_storage.return_value = mock_storage + + # Mock OpenAI response indicating job completion + mock_openai_job = MagicMock( + status="succeeded", + fine_tuned_model="ft:gpt-4:custom-model:12345", + error=None, + ) + mock_openai = MagicMock() + mock_openai.fine_tuning.jobs.retrieve.return_value = mock_openai_job + mock_get_openai_client.return_value = mock_openai + + # Action: Refresh the fine-tuning job status + response = client.get( + f"/api/v1/fine_tuning/{job.id}/refresh", headers=user_api_key_header + ) + + # Verify response + assert response.status_code == 200 + json_data = response.json() + assert json_data["data"]["status"] == "completed" + + # Verify that no new evaluation was triggered + mock_run_model_evaluation.assert_not_called() + + # Verify only one evaluation exists in database + evaluations = ( + db.query(ModelEvaluation) + .filter(ModelEvaluation.fine_tuning_id == job.id) + .all() + ) + assert len(evaluations) == 1 + assert evaluations[0].id == existing_eval.id + + def test_evaluation_not_triggered_for_non_completion_status_changes( + self, + mock_run_model_evaluation, + mock_get_cloud_storage, + mock_get_openai_client, + client, + db, + user_api_key_header, + ): + """Test that evaluation is not triggered for status changes other than to completed.""" + # Test Case 1: pending to running + jobs, _ = create_test_fine_tuning_jobs(db, [0.7]) + job = jobs[0] + job.status = FineTuningStatus.pending + job.provider_job_id = "ft_mock_job_123" + db.add(job) + db.commit() + + # Mock cloud storage + mock_storage = MagicMock() + mock_storage.get_signed_url.return_value = ( + "https://test.s3.amazonaws.com/signed-url" + ) + mock_get_cloud_storage.return_value = mock_storage + + mock_openai_job = MagicMock( + status="running", + fine_tuned_model=None, + error=None, + ) + mock_openai = MagicMock() + mock_openai.fine_tuning.jobs.retrieve.return_value = mock_openai_job + mock_get_openai_client.return_value = mock_openai + + response = client.get( + f"/api/v1/fine_tuning/{job.id}/refresh", headers=user_api_key_header + ) + + assert response.status_code == 200 + json_data = response.json() + assert json_data["data"]["status"] == "running" + mock_run_model_evaluation.assert_not_called() + + # Test Case 2: running to failed + job.status = FineTuningStatus.running + db.add(job) + db.commit() + + mock_openai_job.status = "failed" + mock_openai_job.error = MagicMock(message="Training failed") + + response = client.get( + f"/api/v1/fine_tuning/{job.id}/refresh", headers=user_api_key_header + ) + + assert response.status_code == 200 + json_data = response.json() + assert json_data["data"]["status"] == "failed" + mock_run_model_evaluation.assert_not_called() + + def test_evaluation_not_triggered_for_already_completed_jobs( + self, + mock_run_model_evaluation, + mock_get_cloud_storage, + mock_get_openai_client, + client, + db, + user_api_key_header, + ): + """Test that evaluation is not triggered when refreshing an already completed job.""" + # Setup: Create a fine-tuning job that's already completed + jobs, _ = create_test_fine_tuning_jobs(db, [0.7]) + job = jobs[0] + job.status = FineTuningStatus.completed + job.provider_job_id = "ft_mock_job_123" + job.fine_tuned_model = "ft:gpt-4:custom-model:12345" + db.add(job) + db.commit() + + # Mock cloud storage + mock_storage = MagicMock() + mock_storage.get_signed_url.return_value = ( + "https://test.s3.amazonaws.com/signed-url" + ) + mock_get_cloud_storage.return_value = mock_storage + + # Mock OpenAI response (job remains succeeded) + mock_openai_job = MagicMock( + status="succeeded", + fine_tuned_model="ft:gpt-4:custom-model:12345", + error=None, + ) + mock_openai = MagicMock() + mock_openai.fine_tuning.jobs.retrieve.return_value = mock_openai_job + mock_get_openai_client.return_value = mock_openai + + # Action: Refresh the fine-tuning job status + response = client.get( + f"/api/v1/fine_tuning/{job.id}/refresh", headers=user_api_key_header + ) + + # Verify response + assert response.status_code == 200 + json_data = response.json() + assert json_data["data"]["status"] == "completed" + + # Verify that no evaluation was triggered (since it wasn't newly completed) + mock_run_model_evaluation.assert_not_called() + + # Verify no evaluations exist in database for this job + evaluations = ( + db.query(ModelEvaluation) + .filter(ModelEvaluation.fine_tuning_id == job.id) + .all() + ) + assert len(evaluations) == 0 From f8e28e9028f5eb3547dafd200388e0fa8856f3c5 Mon Sep 17 00:00:00 2001 From: AkhileshNegi Date: Tue, 23 Sep 2025 12:26:22 +0530 Subject: [PATCH 07/18] added cancelled status in enum --- backend/app/api/routes/fine_tuning.py | 3 ++- backend/app/models/fine_tuning.py | 1 + backend/app/tests/api/routes/test_fine_tuning.py | 12 ++++++------ 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/backend/app/api/routes/fine_tuning.py b/backend/app/api/routes/fine_tuning.py index e2c3b5fb..670252c9 100644 --- a/backend/app/api/routes/fine_tuning.py +++ b/backend/app/api/routes/fine_tuning.py @@ -51,6 +51,7 @@ "running": FineTuningStatus.running, "succeeded": FineTuningStatus.completed, "failed": FineTuningStatus.failed, + "cancelled": FineTuningStatus.cancelled, } @@ -191,7 +192,7 @@ async def fine_tune_from_CSV( background_tasks: BackgroundTasks, file: UploadFile = File(..., description="CSV file to use for fine-tuning"), base_model: str = Form( - ..., description="Base model for fine-tuning (e.g., gpt-3.5-turbo)" + ..., description="Base model for fine-tuning (e.g., gpt-4.1-2025-04-14)" ), split_ratio: str = Form( ..., description="Comma-separated split ratios (e.g., '0.8' or '0.7,0.8,0.9')" diff --git a/backend/app/models/fine_tuning.py b/backend/app/models/fine_tuning.py index a3b0e866..4e326ee5 100644 --- a/backend/app/models/fine_tuning.py +++ b/backend/app/models/fine_tuning.py @@ -15,6 +15,7 @@ class FineTuningStatus(str, Enum): running = "running" completed = "completed" failed = "failed" + cancelled = "cancelled" class FineTuningJobBase(SQLModel): diff --git a/backend/app/tests/api/routes/test_fine_tuning.py b/backend/app/tests/api/routes/test_fine_tuning.py index 23a96157..321bb740 100644 --- a/backend/app/tests/api/routes/test_fine_tuning.py +++ b/backend/app/tests/api/routes/test_fine_tuning.py @@ -125,7 +125,7 @@ def test_retrieve_fine_tuning_job( ): jobs, _ = create_test_fine_tuning_jobs(db, [0.3]) job = jobs[0] - job.provider_job_id = "ft_mock_job_123" + job.provider_job_id = "ftjob-mock_job_123" db.flush() mock_openai_job = MagicMock( @@ -154,7 +154,7 @@ def test_retrieve_fine_tuning_job_failed( ): jobs, _ = create_test_fine_tuning_jobs(db, [0.3]) job = jobs[0] - job.provider_job_id = "ft_mock_job_123" + job.provider_job_id = "ftjob-mock_job_123" db.flush() mock_openai_job = MagicMock( @@ -226,7 +226,7 @@ def test_successful_auto_evaluation_trigger( jobs, _ = create_test_fine_tuning_jobs(db, [0.7]) job = jobs[0] job.status = FineTuningStatus.running - job.provider_job_id = "ft_mock_job_123" + job.provider_job_id = "ftjob-mock_job_123" # Add required fields for model evaluation job.test_data_s3_object = "test-bucket/test-data.csv" job.system_prompt = "You are a helpful assistant" @@ -289,7 +289,7 @@ def test_skip_evaluation_when_already_exists( jobs, _ = create_test_fine_tuning_jobs(db, [0.7]) job = jobs[0] job.status = FineTuningStatus.running - job.provider_job_id = "ft_mock_job_123" + job.provider_job_id = "ftjob-mock_job_123" # Add required fields for model evaluation job.test_data_s3_object = "test-bucket/test-data.csv" job.system_prompt = "You are a helpful assistant" @@ -365,7 +365,7 @@ def test_evaluation_not_triggered_for_non_completion_status_changes( jobs, _ = create_test_fine_tuning_jobs(db, [0.7]) job = jobs[0] job.status = FineTuningStatus.pending - job.provider_job_id = "ft_mock_job_123" + job.provider_job_id = "ftjob-mock_job_123" db.add(job) db.commit() @@ -425,7 +425,7 @@ def test_evaluation_not_triggered_for_already_completed_jobs( jobs, _ = create_test_fine_tuning_jobs(db, [0.7]) job = jobs[0] job.status = FineTuningStatus.completed - job.provider_job_id = "ft_mock_job_123" + job.provider_job_id = "ftjob-mock_job_123" job.fine_tuned_model = "ft:gpt-4:custom-model:12345" db.add(job) db.commit() From 397807dddffc8a3198490029688b665f452aaab4 Mon Sep 17 00:00:00 2001 From: AkhileshNegi Date: Tue, 23 Sep 2025 14:12:09 +0530 Subject: [PATCH 08/18] cleanups --- .../app/tests/api/routes/test_fine_tuning.py | 37 +++++++++---------- 1 file changed, 18 insertions(+), 19 deletions(-) diff --git a/backend/app/tests/api/routes/test_fine_tuning.py b/backend/app/tests/api/routes/test_fine_tuning.py index 321bb740..34ba4b34 100644 --- a/backend/app/tests/api/routes/test_fine_tuning.py +++ b/backend/app/tests/api/routes/test_fine_tuning.py @@ -1,4 +1,3 @@ -import os import io import pytest from moto import mock_aws @@ -16,17 +15,6 @@ from app.core.config import settings -@pytest.fixture(scope="function") -def aws_credentials(): - """Set up AWS credentials for moto.""" - os.environ["AWS_ACCESS_KEY_ID"] = "testing" - os.environ["AWS_SECRET_ACCESS_KEY"] = "testing" - os.environ["AWS_SECURITY_TOKEN"] = "testing" - os.environ["AWS_SESSION_TOKEN"] = "testing" - os.environ["AWS_DEFAULT_REGION"] = "us-east-1" - os.environ["AWS_S3_BUCKET_PREFIX"] = "test-bucket" - - def create_file_mock(file_type): counter = {"train": 0, "test": 0} @@ -42,7 +30,7 @@ def _side_effect(file=None, purpose=None): return _side_effect -@pytest.mark.usefixtures("client", "db", "user_api_key_header", "aws_credentials") +@pytest.mark.usefixtures("client", "db", "user_api_key_header") class TestCreateFineTuningJobAPI: @mock_aws def test_finetune_from_csv_multiple_split_ratio( @@ -52,8 +40,17 @@ def test_finetune_from_csv_multiple_split_ratio( user_api_key_header, ): # Setup S3 bucket for moto - s3 = boto3.client("s3", region_name="us-east-1") - s3.create_bucket(Bucket="test-bucket") + s3 = boto3.client("s3", region_name=settings.AWS_DEFAULT_REGION) + bucket_name = settings.AWS_S3_BUCKET_PREFIX + if settings.AWS_DEFAULT_REGION == "us-east-1": + s3.create_bucket(Bucket=bucket_name) + else: + s3.create_bucket( + Bucket=bucket_name, + CreateBucketConfiguration={ + "LocationConstraint": settings.AWS_DEFAULT_REGION + }, + ) # Create a test CSV file content csv_content = "prompt,label\ntest1,label1\ntest2,label2\ntest3,label3" @@ -74,7 +71,9 @@ def test_finetune_from_csv_multiple_split_ratio( ) as mock_process_job: # Mock cloud storage mock_storage = MagicMock() - mock_storage.put.return_value = "s3://test-bucket/test.csv" + mock_storage.put.return_value = ( + f"s3://{settings.AWS_S3_BUCKET_PREFIX}/test.csv" + ) mock_get_cloud_storage.return_value = mock_storage # Mock OpenAI client (for validation only) @@ -228,7 +227,7 @@ def test_successful_auto_evaluation_trigger( job.status = FineTuningStatus.running job.provider_job_id = "ftjob-mock_job_123" # Add required fields for model evaluation - job.test_data_s3_object = "test-bucket/test-data.csv" + job.test_data_s3_object = f"{settings.AWS_S3_BUCKET_PREFIX}/test-data.csv" job.system_prompt = "You are a helpful assistant" db.add(job) db.commit() @@ -291,7 +290,7 @@ def test_skip_evaluation_when_already_exists( job.status = FineTuningStatus.running job.provider_job_id = "ftjob-mock_job_123" # Add required fields for model evaluation - job.test_data_s3_object = "test-bucket/test-data.csv" + job.test_data_s3_object = f"{settings.AWS_S3_BUCKET_PREFIX}/test-data.csv" job.system_prompt = "You are a helpful assistant" db.add(job) db.commit() @@ -304,7 +303,7 @@ def test_skip_evaluation_when_already_exists( organization_id=job.organization_id, document_id=job.document_id, fine_tuned_model="ft:gpt-4:test-model:123", - test_data_s3_object="test-bucket/test-data.csv", + test_data_s3_object=f"{settings.AWS_S3_BUCKET_PREFIX}/test-data.csv", base_model="gpt-4", split_ratio=0.7, system_prompt="You are a helpful assistant", From ca862cf94be2aa57ace2e8eba250cab3e997f19e Mon Sep 17 00:00:00 2001 From: AkhileshNegi Date: Tue, 23 Sep 2025 14:22:50 +0530 Subject: [PATCH 09/18] update claude.md --- CLAUDE.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CLAUDE.md b/CLAUDE.md index 2dc7be8a..df2ca38b 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -33,6 +33,8 @@ alembic revision --autogenerate -m 'Add new meta' ### Testing +We also use .env.test to keep environment variable separate for test environment and can use it in testcases + ```bash # Run backend tests uv run bash scripts/tests-start.sh From 375eb5e2a2bf3a4082c7723742205d12a684e891 Mon Sep 17 00:00:00 2001 From: AkhileshNegi Date: Tue, 23 Sep 2025 19:20:15 +0530 Subject: [PATCH 10/18] coderabbit suggestion --- backend/app/api/routes/fine_tuning.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/app/api/routes/fine_tuning.py b/backend/app/api/routes/fine_tuning.py index 670252c9..00b48b91 100644 --- a/backend/app/api/routes/fine_tuning.py +++ b/backend/app/api/routes/fine_tuning.py @@ -217,7 +217,7 @@ async def fine_tune_from_CSV( ) # Validate file is CSV - if not file.filename.lower().endswith(".csv"): + if not file.filename.lower().endswith(".csv") and file.content_type != "text/csv": raise HTTPException(status_code=400, detail="File must be a CSV file") client = get_openai_client( # Used here only to validate the user's OpenAI key; From 38dcf456b1c51106d79d8133f37a20fb400136b1 Mon Sep 17 00:00:00 2001 From: AkhileshNegi Date: Thu, 25 Sep 2025 11:42:15 +0530 Subject: [PATCH 11/18] reverting unnecessary changes --- backend/app/api/routes/responses.py | 21 ++++++++++++++++++++- backend/app/api/routes/threads.py | 9 ++++++++- backend/app/core/finetune/evaluation.py | 3 +-- backend/app/utils.py | 8 -------- 4 files changed, 29 insertions(+), 12 deletions(-) diff --git a/backend/app/api/routes/responses.py b/backend/app/api/routes/responses.py index d4e2389c..94e5f19d 100644 --- a/backend/app/api/routes/responses.py +++ b/backend/app/api/routes/responses.py @@ -18,13 +18,32 @@ get_conversation_by_ancestor_id, ) from app.models import UserProjectOrg, OpenAIConversationCreate, OpenAIConversation -from app.utils import APIResponse, mask_string, handle_openai_error +from app.utils import APIResponse, mask_string from app.core.langfuse.langfuse import LangfuseTracer logger = logging.getLogger(__name__) router = APIRouter(tags=["responses"]) +def handle_openai_error(e: openai.OpenAIError) -> str: + """Extract error message from OpenAI error.""" + # Try to get error message from different possible attributes + if hasattr(e, "body") and isinstance(e.body, dict) and "message" in e.body: + return e.body["message"] + elif hasattr(e, "message"): + return e.message + elif hasattr(e, "response") and hasattr(e.response, "json"): + try: + error_data = e.response.json() + if isinstance(error_data, dict) and "error" in error_data: + error_info = error_data["error"] + if isinstance(error_info, dict) and "message" in error_info: + return error_info["message"] + except: + pass + return str(e) + + class ResponsesAPIRequest(BaseModel): assistant_id: str question: str diff --git a/backend/app/api/routes/threads.py b/backend/app/api/routes/threads.py index be7e0578..95630bfb 100644 --- a/backend/app/api/routes/threads.py +++ b/backend/app/api/routes/threads.py @@ -13,7 +13,7 @@ from app.core import logging, settings from app.models import UserOrganization, OpenAIThreadCreate, UserProjectOrg from app.crud import upsert_thread_result, get_thread_result -from app.utils import APIResponse, mask_string, handle_openai_error +from app.utils import APIResponse, mask_string from app.crud.credentials import get_provider_credential from app.core.util import configure_openai from app.core.langfuse.langfuse import LangfuseTracer @@ -49,6 +49,13 @@ def send_callback(callback_url: str, data: dict): return False +def handle_openai_error(e: openai.OpenAIError) -> str: + """Extract error message from OpenAI error.""" + if isinstance(e.body, dict) and "message" in e.body: + return e.body["message"] + return str(e) + + def validate_thread(client: OpenAI, thread_id: str) -> tuple[bool, str]: """Validate if a thread exists and has no active runs.""" if not thread_id: diff --git a/backend/app/core/finetune/evaluation.py b/backend/app/core/finetune/evaluation.py index 4a85e85c..4acc62e6 100644 --- a/backend/app/core/finetune/evaluation.py +++ b/backend/app/core/finetune/evaluation.py @@ -11,7 +11,6 @@ matthews_corrcoef, ) from app.core.cloud import AmazonCloudStorage -from app.utils import handle_openai_error from app.core.finetune.preprocessing import DataPreprocessor @@ -151,7 +150,7 @@ def generate_predictions(self) -> tuple[list[str], str]: break except openai.OpenAIError as e: - error_msg = handle_openai_error(e) + error_msg = str(e) logger.error( f"[generate_predictions] OpenAI API error at prompt {idx}/{total_prompts}: {error_msg}" ) diff --git a/backend/app/utils.py b/backend/app/utils.py index 8f96fd95..1c03839a 100644 --- a/backend/app/utils.py +++ b/backend/app/utils.py @@ -7,7 +7,6 @@ import jwt import emails -import openai from jinja2 import Template from jwt.exceptions import InvalidTokenError from fastapi import HTTPException @@ -49,13 +48,6 @@ def failure_response( return cls(success=False, data=None, error=error_message, metadata=metadata) -def handle_openai_error(e: openai.OpenAIError) -> str: - """Extract error message from OpenAI error.""" - if hasattr(e, "body") and isinstance(e.body, dict) and "message" in e.body: - return e.body["message"] - return str(e) - - @dataclass class EmailData: html_content: str From 8a1b496c86602dccf5b12c08814392898382616a Mon Sep 17 00:00:00 2001 From: AkhileshNegi Date: Thu, 25 Sep 2025 11:44:13 +0530 Subject: [PATCH 12/18] coderabbit suggestions --- backend/app/api/routes/fine_tuning.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/app/api/routes/fine_tuning.py b/backend/app/api/routes/fine_tuning.py index 00b48b91..32a3a6e6 100644 --- a/backend/app/api/routes/fine_tuning.py +++ b/backend/app/api/routes/fine_tuning.py @@ -220,7 +220,7 @@ async def fine_tune_from_CSV( if not file.filename.lower().endswith(".csv") and file.content_type != "text/csv": raise HTTPException(status_code=400, detail="File must be a CSV file") - client = get_openai_client( # Used here only to validate the user's OpenAI key; + get_openai_client( # Used here only to validate the user's OpenAI key; # the actual client is re-initialized separately inside the background task session, current_user.organization_id, From 9e8d046edfdd7e02f4b406a415bd0a2599e32c2b Mon Sep 17 00:00:00 2001 From: AkhileshNegi Date: Thu, 25 Sep 2025 12:24:45 +0530 Subject: [PATCH 13/18] remove import --- backend/app/api/routes/fine_tuning.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/backend/app/api/routes/fine_tuning.py b/backend/app/api/routes/fine_tuning.py index 32a3a6e6..f5ff511f 100644 --- a/backend/app/api/routes/fine_tuning.py +++ b/backend/app/api/routes/fine_tuning.py @@ -19,13 +19,7 @@ ) from app.core.cloud import get_cloud_storage from app.crud.document import DocumentCrud -from app.utils import ( - get_openai_client, - APIResponse, - mask_string, - load_description, - handle_openai_error, -) +from app.utils import get_openai_client, APIResponse, mask_string, load_description from app.crud import ( create_fine_tuning_job, fetch_by_id, From 724497b02e479ea7ae9a2d035c09ec53983dd7fa Mon Sep 17 00:00:00 2001 From: AkhileshNegi Date: Mon, 6 Oct 2025 10:27:16 +0530 Subject: [PATCH 14/18] merging endpoints --- CLAUDE.md | 104 ------------------------------------------------------ 1 file changed, 104 deletions(-) delete mode 100644 CLAUDE.md diff --git a/CLAUDE.md b/CLAUDE.md deleted file mode 100644 index df2ca38b..00000000 --- a/CLAUDE.md +++ /dev/null @@ -1,104 +0,0 @@ -# CLAUDE.md - -This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. - -## Project Overview - -This is an AI Platform named Kaapi built with FastAPI (backend) and PostgreSQL (database), containerized with Docker. The platform provides AI capabilities including OpenAI assistants, fine-tuning, document processing, and collection management. - -## Key Commands - -### Development - -```bash -# Start development environment with auto-reload -source .venv/bin/activate -fastapi run --reload app/main.py - -# Run backend tests -uv run bash scripts/tests-start.sh - -# Seed data -uv run python -m app.seed_data.seed_data - -# Run pre-commit -uv run pre-commit run --all-files - -# Activate virtual environment -source .venv/bin/activate - -# Generate new Migration -alembic revision --autogenerate -m 'Add new meta' -``` - -### Testing - -We also use .env.test to keep environment variable separate for test environment and can use it in testcases - -```bash -# Run backend tests -uv run bash scripts/tests-start.sh -``` - -## Architecture - -### Backend Structure - -The backend follows a layered architecture: - -- **API Layer** (`backend/app/api/`): FastAPI routes organized by domain - - Authentication (`login.py`) - - Core resources: `users.py`, `organizations.py`, `projects.py` - - AI features: `assistants.py`, `fine_tuning.py`, `openai_conversation.py` - - Document management: `documents.py`, `collections.py`, `doc_transformation_job.py` - -- **Models** (`backend/app/models/`): SQLModel entities representing database tables - - User system: User, Organization, Project, ProjectUser - - AI components: Assistant, Thread, Message, FineTuning - - Document system: Document, Collection, DocumentCollection, DocTransformationJob - -- **CRUD Operations** (`backend/app/crud/`): Database operations for each model - -- **Core Services** (`backend/app/core/`): - - `providers.py`: OpenAI client management - - `finetune/`: Fine-tuning pipeline (preprocessing, evaluation) - - `doctransform/`: Document transformation services - - `cloud/storage.py`: S3 storage integration - - `langfuse/`: Observability and tracing - -### Database - -PostgreSQL with Alembic migrations. Key relationships: -- Organizations contain Projects -- Projects have Users (many-to-many via ProjectUser) -- Projects contain Collections and Documents -- Documents can belong to Collections (many-to-many) -- Projects have Assistants, Threads, and FineTuning jobs - -### Authentication & Security - -- JWT-based authentication -- API key support for programmatic access -- Role-based access control (User, Admin, Super Admin) -- Organization and project-level permissions - -## Environment Configuration - -Critical environment variables: -- `SECRET_KEY`: JWT signing key -- `POSTGRES_*`: Database connection -- `LOCAL_CREDENTIALS_ORG_OPENAI_API_KEY`: OpenAI API key -- `AWS_S3_BUCKET_PREFIX`: S3 storage configuration -- `LANGFUSE_*`: Observability configuration - -## Testing Strategy - -- Unit tests in `backend/app/tests/` -- Test fixtures use factory pattern -- Mock external services (OpenAI, S3) using `moto` and `openai_responses` -- Coverage reports generated automatically - -## Code Standards - -- Python 3.11+ with type hints -- Pre-commit hooks configured for consistency From b6a7073f9f8497dc8983cf11c72150d2a20888b9 Mon Sep 17 00:00:00 2001 From: AkhileshNegi Date: Mon, 6 Oct 2025 11:29:19 +0530 Subject: [PATCH 15/18] following PEP8 standards --- backend/app/core/finetune/evaluation.py | 79 ++++++++++++++++--------- 1 file changed, 51 insertions(+), 28 deletions(-) diff --git a/backend/app/core/finetune/evaluation.py b/backend/app/core/finetune/evaluation.py index 4acc62e6..560a4c75 100644 --- a/backend/app/core/finetune/evaluation.py +++ b/backend/app/core/finetune/evaluation.py @@ -1,18 +1,17 @@ import difflib -import time import logging +import time +import uuid from typing import Set import openai import pandas as pd from openai import OpenAI -import uuid -from sklearn.metrics import ( - matthews_corrcoef, -) +from sklearn.metrics import matthews_corrcoef + from app.core.cloud import AmazonCloudStorage from app.core.finetune.preprocessing import DataPreprocessor - +from app.utils import handle_openai_error logger = logging.getLogger(__name__) @@ -50,7 +49,8 @@ def load_labels_and_prompts(self) -> None: - 'label' """ logger.info( - f"[ModelEvaluator.load_labels_and_prompts] Loading CSV from: {self.test_data_s3_object}" + f"[ModelEvaluator.load_labels_and_prompts] Loading CSV from: " + f"{self.test_data_s3_object}" ) file_obj = self.storage.stream(self.test_data_s3_object) try: @@ -65,11 +65,13 @@ def load_labels_and_prompts(self) -> None: if not query_col or not label_col: logger.error( - "[ModelEvaluator.load_labels_and_prompts] CSV must contain a 'label' column " - f"and one of: {possible_query_columns}" + "[ModelEvaluator.load_labels_and_prompts] CSV must " + "contain a 'label' column and one of: " + f"{possible_query_columns}" ) raise ValueError( - f"CSV must contain a 'label' column and one of: {possible_query_columns}" + f"CSV must contain a 'label' column and one of: " + f"{possible_query_columns}" ) prompts = df[query_col].astype(str).tolist() @@ -84,12 +86,15 @@ def load_labels_and_prompts(self) -> None: logger.info( "[ModelEvaluator.load_labels_and_prompts] " - f"Loaded {len(self.prompts)} prompts and {len(self.y_true)} labels; " - f"query_col={query_col}, label_col={label_col}, allowed_labels={self.allowed_labels}" + f"Loaded {len(self.prompts)} prompts and " + f"{len(self.y_true)} labels; " + f"query_col={query_col}, label_col={label_col}, " + f"allowed_labels={self.allowed_labels}" ) except Exception as e: logger.error( - f"[ModelEvaluator.load_labels_and_prompts] Failed to load/parse test CSV: {e}", + f"[ModelEvaluator.load_labels_and_prompts] " + f"Failed to load/parse test CSV: {e}", exc_info=True, ) raise @@ -110,13 +115,15 @@ def normalize_prediction(self, text: str) -> str: return closest[0] logger.warning( - f"[normalize_prediction] No close match found for '{t}'. Using default label '{next(iter(self.allowed_labels))}'." + f"[normalize_prediction] No close match found for '{t}'. " + f"Using default label '{next(iter(self.allowed_labels))}'." ) return next(iter(self.allowed_labels)) def generate_predictions(self) -> tuple[list[str], str]: logger.info( - f"[generate_predictions] Generating predictions for {len(self.prompts)} prompts." + f"[generate_predictions] Generating predictions for " + f"{len(self.prompts)} prompts." ) start_preds = time.time() predictions = [] @@ -127,7 +134,9 @@ def generate_predictions(self) -> tuple[list[str], str]: while attempt < self.retries: start_time = time.time() logger.info( - f"[generate_predictions] Processing prompt {idx}/{total_prompts} (Attempt {attempt + 1}/{self.retries})" + f"[generate_predictions] Processing prompt " + f"{idx}/{total_prompts} " + f"(Attempt {attempt + 1}/{self.retries})" ) try: @@ -140,7 +149,8 @@ def generate_predictions(self) -> tuple[list[str], str]: elapsed_time = time.time() - start_time if elapsed_time > self.max_latency: logger.warning( - f"[generate_predictions] Timeout exceeded for prompt {idx}/{total_prompts}. Retrying..." + f"[generate_predictions] Timeout exceeded for " + f"prompt {idx}/{total_prompts}. Retrying..." ) continue @@ -150,25 +160,31 @@ def generate_predictions(self) -> tuple[list[str], str]: break except openai.OpenAIError as e: - error_msg = str(e) + error_msg = handle_openai_error(e) logger.error( - f"[generate_predictions] OpenAI API error at prompt {idx}/{total_prompts}: {error_msg}" + f"[generate_predictions] OpenAI API error at prompt " + f"{idx}/{total_prompts}: {error_msg}" ) attempt += 1 if attempt == self.retries: predictions.append("openai_error") logger.error( - f"[generate_predictions] Maximum retries reached for prompt {idx}/{total_prompts}. Appending 'openai_error'." + f"[generate_predictions] Maximum retries reached " + f"for prompt {idx}/{total_prompts}. " + f"Appending 'openai_error'." ) else: logger.info( - f"[generate_predictions] Retrying prompt {idx}/{total_prompts} after OpenAI error ({attempt}/{self.retries})." + f"[generate_predictions] Retrying prompt " + f"{idx}/{total_prompts} after OpenAI error " + f"({attempt}/{self.retries})." ) total_elapsed = time.time() - start_preds logger.info( - f"[generate_predictions] Finished {total_prompts} prompts in {total_elapsed:.2f}s | " - f"Generated {len(predictions)} predictions." + f"[generate_predictions] Finished {total_prompts} prompts in " + f"{total_elapsed:.2f}s | Generated {len(predictions)} " + f"predictions." ) prediction_data = pd.DataFrame( @@ -187,7 +203,8 @@ def generate_predictions(self) -> tuple[list[str], str]: self.prediction_data_s3_object = prediction_data_s3_object logger.info( - f"[generate_predictions] Predictions CSV uploaded to S3 | url={prediction_data_s3_object}" + f"[generate_predictions] Predictions CSV uploaded to S3 | " + f"url={prediction_data_s3_object}" ) return predictions, prediction_data_s3_object @@ -196,11 +213,13 @@ def evaluate(self) -> dict: """Evaluate using the predictions CSV previously uploaded to S3.""" if not getattr(self, "prediction_data_s3_object", None): raise RuntimeError( - "[evaluate] predictions_s3_object not set. Call generate_predictions() first." + "[evaluate] predictions_s3_object not set. " + "Call generate_predictions() first." ) logger.info( - f"[evaluate] Streaming predictions CSV from: {self.prediction_data_s3_object}" + f"[evaluate] Streaming predictions CSV from: " + f"{self.prediction_data_s3_object}" ) prediction_obj = self.storage.stream(self.prediction_data_s3_object) try: @@ -210,7 +229,8 @@ def evaluate(self) -> dict: if "true_label" not in df.columns or "prediction" not in df.columns: raise ValueError( - "[evaluate] prediction data CSV must contain 'true_label' and 'prediction' columns." + "[evaluate] prediction data CSV must contain 'true_label' " + "and 'prediction' columns." ) y_true = df["true_label"].astype(str).str.strip().str.lower().tolist() @@ -225,7 +245,10 @@ def evaluate(self) -> dict: raise def run(self) -> dict: - """Run the full evaluation process: load data, generate predictions, evaluate results.""" + """Run the full evaluation process. + + Load data, generate predictions, and evaluate results. + """ try: self.load_labels_and_prompts() predictions, prediction_data_s3_object = self.generate_predictions() From c47b254cede639d0bcff4778314a60ed5a112ceb Mon Sep 17 00:00:00 2001 From: AkhileshNegi Date: Wed, 8 Oct 2025 12:04:42 +0530 Subject: [PATCH 16/18] removed redundant checks --- backend/app/api/routes/fine_tuning.py | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) diff --git a/backend/app/api/routes/fine_tuning.py b/backend/app/api/routes/fine_tuning.py index f5ff511f..401a761b 100644 --- a/backend/app/api/routes/fine_tuning.py +++ b/backend/app/api/routes/fine_tuning.py @@ -193,22 +193,11 @@ async def fine_tune_from_CSV( ), system_prompt: str = Form(..., description="System prompt for the fine-tuning job"), ): - # Validate and parse split ratios + # Parse split ratios (validation happens in FineTuningJobCreate model) try: split_ratios = [float(r.strip()) for r in split_ratio.split(",")] - for ratio in split_ratios: - if not (0 < ratio < 1): - raise ValueError( - f"Invalid split_ratio: {ratio}. Must be between 0 and 1." - ) except ValueError as e: - raise HTTPException(status_code=400, detail=str(e)) - - # Validate system prompt - if not system_prompt.strip(): - raise HTTPException( - status_code=400, detail="System prompt must be a non-empty string" - ) + raise HTTPException(status_code=400, detail=f"Invalid split_ratio format: {e}") # Validate file is CSV if not file.filename.lower().endswith(".csv") and file.content_type != "text/csv": From 0979fd84692c893022225cd4b719286e214771e5 Mon Sep 17 00:00:00 2001 From: AkhileshNegi Date: Fri, 10 Oct 2025 09:40:17 +0530 Subject: [PATCH 17/18] added as todo --- backend/app/api/routes/fine_tuning.py | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/backend/app/api/routes/fine_tuning.py b/backend/app/api/routes/fine_tuning.py index 401a761b..66baa3ad 100644 --- a/backend/app/api/routes/fine_tuning.py +++ b/backend/app/api/routes/fine_tuning.py @@ -185,15 +185,11 @@ async def fine_tune_from_CSV( current_user: CurrentUserOrgProject, background_tasks: BackgroundTasks, file: UploadFile = File(..., description="CSV file to use for fine-tuning"), - base_model: str = Form( - ..., description="Base model for fine-tuning (e.g., gpt-4.1-2025-04-14)" - ), - split_ratio: str = Form( - ..., description="Comma-separated split ratios (e.g., '0.8' or '0.7,0.8,0.9')" - ), - system_prompt: str = Form(..., description="System prompt for the fine-tuning job"), + base_model: str = Form(...), + split_ratio: str = Form(...), + system_prompt: str = Form(...), ): - # Parse split ratios (validation happens in FineTuningJobCreate model) + # Parse split ratios try: split_ratios = [float(r.strip()) for r in split_ratio.split(",")] except ValueError as e: @@ -211,6 +207,7 @@ async def fine_tune_from_CSV( ) # Upload the file to storage and create document + # ToDo: create a helper function and then use it rather than doing things in router storage = get_cloud_storage(session=session, project_id=current_user.project_id) document_id = uuid4() object_store_url = storage.put(file, Path(str(document_id))) @@ -277,9 +274,7 @@ async def fine_tune_from_CSV( else f"Started {created_count} job(s); {total - created_count} active fine-tuning job(s) already exists." ) - return APIResponse.success_response( - {"message": message, "document_id": str(created_document.id), "jobs": job_infos} - ) + return APIResponse.success_response({"message": message, "jobs": job_infos}) @router.get( From 8801b39c4840ef6d0a0e8c19fe477fe759b4387a Mon Sep 17 00:00:00 2001 From: AkhileshNegi Date: Fri, 10 Oct 2025 09:52:52 +0530 Subject: [PATCH 18/18] updated the testcase --- backend/app/tests/api/routes/test_fine_tuning.py | 1 - 1 file changed, 1 deletion(-) diff --git a/backend/app/tests/api/routes/test_fine_tuning.py b/backend/app/tests/api/routes/test_fine_tuning.py index 34ba4b34..abe00680 100644 --- a/backend/app/tests/api/routes/test_fine_tuning.py +++ b/backend/app/tests/api/routes/test_fine_tuning.py @@ -98,7 +98,6 @@ def test_finetune_from_csv_multiple_split_ratio( assert json_data["success"] is True assert json_data["data"]["message"] == "Fine-tuning job(s) started." assert json_data["metadata"] is None - assert "document_id" in json_data["data"] assert "jobs" in json_data["data"] assert len(json_data["data"]["jobs"]) == 3