ProjectTech4DevAI · AkhileshNegi · Sep 17, 2025 · Sep 18, 2025 · Sep 18, 2025 · Sep 22, 2025
diff --git a/CLAUDE.md b/CLAUDE.md
@@ -0,0 +1,104 @@
+# CLAUDE.md
+
+This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
+
+## Project Overview
+
+This is an AI Platform named Kaapi built with FastAPI (backend) and PostgreSQL (database), containerized with Docker. The platform provides AI capabilities including OpenAI assistants, fine-tuning, document processing, and collection management.
+
+## Key Commands
+
+### Development
+
+```bash
+# Start development environment with auto-reload
+source .venv/bin/activate
+fastapi run --reload app/main.py
+
+# Run backend tests
+uv run bash scripts/tests-start.sh
+
+# Seed data
+uv run python -m app.seed_data.seed_data
+
+# Run pre-commit
+uv run pre-commit run --all-files
+
+# Activate virtual environment
+source .venv/bin/activate
+
+# Generate new Migration
+alembic revision --autogenerate -m 'Add new meta'
+```
+
+### Testing
+
+We also use .env.test to keep environment variable separate for test environment and can use it in testcases
+
+```bash
+# Run backend tests
+uv run bash scripts/tests-start.sh
+```
+
+## Architecture
+
+### Backend Structure
+
+The backend follows a layered architecture:
+
+- **API Layer** (`backend/app/api/`): FastAPI routes organized by domain
+  - Authentication (`login.py`)
+  - Core resources: `users.py`, `organizations.py`, `projects.py`
+  - AI features: `assistants.py`, `fine_tuning.py`, `openai_conversation.py`
+  - Document management: `documents.py`, `collections.py`, `doc_transformation_job.py`
+
+- **Models** (`backend/app/models/`): SQLModel entities representing database tables
+  - User system: User, Organization, Project, ProjectUser
+  - AI components: Assistant, Thread, Message, FineTuning
+  - Document system: Document, Collection, DocumentCollection, DocTransformationJob
+
+- **CRUD Operations** (`backend/app/crud/`): Database operations for each model
+
+- **Core Services** (`backend/app/core/`):
+  - `providers.py`: OpenAI client management
+  - `finetune/`: Fine-tuning pipeline (preprocessing, evaluation)
+  - `doctransform/`: Document transformation services
+  - `cloud/storage.py`: S3 storage integration
+  - `langfuse/`: Observability and tracing
+
+### Database
+
+PostgreSQL with Alembic migrations. Key relationships:
+- Organizations contain Projects
+- Projects have Users (many-to-many via ProjectUser)
+- Projects contain Collections and Documents
+- Documents can belong to Collections (many-to-many)
+- Projects have Assistants, Threads, and FineTuning jobs
+
+### Authentication & Security
+
+- JWT-based authentication
+- API key support for programmatic access
+- Role-based access control (User, Admin, Super Admin)
+- Organization and project-level permissions
+
+## Environment Configuration
+
+Critical environment variables:
+- `SECRET_KEY`: JWT signing key
+- `POSTGRES_*`: Database connection
+- `LOCAL_CREDENTIALS_ORG_OPENAI_API_KEY`: OpenAI API key
+- `AWS_S3_BUCKET_PREFIX`: S3 storage configuration
+- `LANGFUSE_*`: Observability configuration
+
+## Testing Strategy
+
+- Unit tests in `backend/app/tests/`
+- Test fixtures use factory pattern
+- Mock external services (OpenAI, S3) using `moto` and `openai_responses`
+- Coverage reports generated automatically
+
+## Code Standards
+
+- Python 3.11+ with type hints
+- Pre-commit hooks configured for consistency
diff --git a/backend/app/api/routes/fine_tuning.py b/backend/app/api/routes/fine_tuning.py
@@ -1,17 +1,21 @@
 from typing import Optional
 import logging
 import time
-from uuid import UUID
+from uuid import UUID, uuid4
+from pathlib import Path
 
 import openai
 from sqlmodel import Session
-from fastapi import APIRouter, HTTPException, BackgroundTasks
+from fastapi import APIRouter, HTTPException, BackgroundTasks, File, Form, UploadFile
 
 from app.models import (
     FineTuningJobCreate,
     FineTuningJobPublic,
     FineTuningUpdate,
     FineTuningStatus,
+    Document,
+    ModelEvaluationBase,
+    ModelEvaluationStatus,
 )
 from app.core.cloud import get_cloud_storage
 from app.crud.document import DocumentCrud
@@ -21,10 +25,13 @@
     fetch_by_id,
     update_finetune_job,
     fetch_by_document_id,
+    create_model_evaluation,
+    fetch_active_model_evals,
 )
 from app.core.db import engine
 from app.api.deps import CurrentUserOrgProject, SessionDep
 from app.core.finetune.preprocessing import DataPreprocessor
+from app.api.routes.model_evaluation import run_model_evaluation
 
 
 logger = logging.getLogger(__name__)
@@ -38,16 +45,10 @@
     "running": FineTuningStatus.running,
     "succeeded": FineTuningStatus.completed,
     "failed": FineTuningStatus.failed,
+    "cancelled": FineTuningStatus.cancelled,
 }
 
 
-def handle_openai_error(e: openai.OpenAIError) -> str:
-    """Extract error message from OpenAI error."""
-    if isinstance(e.body, dict) and "message" in e.body:
-        return e.body["message"]
-    return str(e)
-
-
 def process_fine_tuning_job(
     job_id: int,
     ratio: float,
@@ -179,22 +180,72 @@ def process_fine_tuning_job(
     description=load_description("fine_tuning/create.md"),
     response_model=APIResponse,
 )
-def fine_tune_from_CSV(
+async def fine_tune_from_CSV(
     session: SessionDep,
     current_user: CurrentUserOrgProject,
-    request: FineTuningJobCreate,
     background_tasks: BackgroundTasks,
+    file: UploadFile = File(..., description="CSV file to use for fine-tuning"),
+    base_model: str = Form(
+        ..., description="Base model for fine-tuning (e.g., gpt-4.1-2025-04-14)"
+    ),
+    split_ratio: str = Form(
+        ..., description="Comma-separated split ratios (e.g., '0.8' or '0.7,0.8,0.9')"
+    ),
+    system_prompt: str = Form(..., description="System prompt for the fine-tuning job"),
 ):
-    client = get_openai_client(  # Used here only to validate the user's OpenAI key;
+    # Validate and parse split ratios
+    try:
+        split_ratios = [float(r.strip()) for r in split_ratio.split(",")]
+        for ratio in split_ratios:
+            if not (0 < ratio < 1):
+                raise ValueError(
+                    f"Invalid split_ratio: {ratio}. Must be between 0 and 1."
+                )
+    except ValueError as e:
+        raise HTTPException(status_code=400, detail=str(e))
+
+    # Validate system prompt
+    if not system_prompt.strip():
+        raise HTTPException(
+            status_code=400, detail="System prompt must be a non-empty string"
+        )
+
+    # Validate file is CSV
+    if not file.filename.lower().endswith(".csv") and file.content_type != "text/csv":
+        raise HTTPException(status_code=400, detail="File must be a CSV file")
-    # Validate file is CSV
-    if not file.filename.lower().endswith(".csv") and file.content_type != "text/csv":
-        raise HTTPException(status_code=400, detail="File must be a CSV file")
+    # Validate file is CSV
+    allowed_mime_types = {
+        "text/csv",
+        "application/csv",
+        "application/vnd.ms-excel",
+        "text/plain",
+    }
+    if (
+        not file.filename.lower().endswith(".csv")
+        or file.content_type not in allowed_mime_types
+    ):
+        raise HTTPException(status_code=400, detail="File must be a CSV file")
-    # Validate file is CSV
-    if not file.filename.lower().endswith(".csv") and file.content_type != "text/csv":
-        raise HTTPException(status_code=400, detail="File must be a CSV file")
+    # Validate file is CSV
+    allowed_mime_types = {
+        "text/csv",
+        "application/csv",
+        "application/vnd.ms-excel",
+        "text/plain",
+    }
+    if (
+        not file.filename.lower().endswith(".csv")
+        or file.content_type not in allowed_mime_types
+    ):
+        raise HTTPException(status_code=400, detail="File must be a CSV file")
+
+    get_openai_client(  # Used here only to validate the user's OpenAI key;
         # the actual client is re-initialized separately inside the background task
         session,
         current_user.organization_id,
         current_user.project_id,
     )
 
+    # Upload the file to storage and create document
+    storage = get_cloud_storage(session=session, project_id=current_user.project_id)
+    document_id = uuid4()
+    object_store_url = storage.put(file, Path(str(document_id)))
+
+    # Create document in database
+    document_crud = DocumentCrud(session, current_user.project_id)
+    document = Document(
+        id=document_id,
+        fname=file.filename,
+        object_store_url=str(object_store_url),
+    )
+    created_document = document_crud.update(document)
+
+    # Create FineTuningJobCreate request object
+    request = FineTuningJobCreate(
+        document_id=created_document.id,
+        base_model=base_model,
+        split_ratio=split_ratios,
+        system_prompt=system_prompt.strip(),
+    )
+
     results = []
 
-    for ratio in request.split_ratio:
+    for ratio in split_ratios:
         job, created = create_fine_tuning_job(
             session=session,
             request=request,
@@ -237,7 +288,9 @@ def fine_tune_from_CSV(
         else f"Started {created_count} job(s); {total - created_count} active fine-tuning job(s) already exists."
     )
 
-    return APIResponse.success_response({"message": message, "jobs": job_infos})
+    return APIResponse.success_response(
+        {"message": message, "document_id": str(created_document.id), "jobs": job_infos}
+    )
 
 
 @router.get(
@@ -246,7 +299,10 @@ def fine_tune_from_CSV(
     response_model=APIResponse[FineTuningJobPublic],
 )
 def refresh_fine_tune_status(
-    fine_tuning_id: int, session: SessionDep, current_user: CurrentUserOrgProject
+    fine_tuning_id: int,
+    background_tasks: BackgroundTasks,
+    session: SessionDep,
+    current_user: CurrentUserOrgProject,
 ):
     project_id = current_user.project_id
     job = fetch_by_id(session, fine_tuning_id, project_id)
@@ -282,13 +338,56 @@ def refresh_fine_tune_status(
             error_message=openai_error_msg,
         )
 
+        # Check if status is changing from running to completed
+        is_newly_completed = (
+            job.status == FineTuningStatus.running
+            and update_payload.status == FineTuningStatus.completed
+        )
+
         if (
             job.status != update_payload.status
             or job.fine_tuned_model != update_payload.fine_tuned_model
             or job.error_message != update_payload.error_message
         ):
             job = update_finetune_job(session=session, job=job, update=update_payload)
 
+        # If the job just completed, automatically trigger evaluation
+        if is_newly_completed:
+            logger.info(
+                f"[refresh_fine_tune_status] Fine-tuning job completed, triggering evaluation | "
+                f"fine_tuning_id={fine_tuning_id}, project_id={project_id}"
+            )
+
+            # Check if there's already an active evaluation for this job
+            active_evaluations = fetch_active_model_evals(
+                session, fine_tuning_id, project_id
+            )
+
+            if not active_evaluations:
+                # Create a new evaluation
+                model_eval = create_model_evaluation(
+                    session=session,
+                    request=ModelEvaluationBase(fine_tuning_id=fine_tuning_id),
+                    project_id=project_id,
+                    organization_id=current_user.organization_id,
+                    status=ModelEvaluationStatus.pending,
+                )
+
+                # Queue the evaluation task
+                background_tasks.add_task(
+                    run_model_evaluation, model_eval.id, current_user
+                )
+
+                logger.info(
+                    f"[refresh_fine_tune_status] Created and queued evaluation | "
+                    f"eval_id={model_eval.id}, fine_tuning_id={fine_tuning_id}, project_id={project_id}"
+                )
+            else:
+                logger.info(
+                    f"[refresh_fine_tune_status] Skipping evaluation creation - active evaluation exists | "
+                    f"fine_tuning_id={fine_tuning_id}, project_id={project_id}"
+                )
+
     job = job.model_copy(
         update={
             "train_data_file_url": storage.get_signed_url(job.train_data_s3_object)

diff --git a/backend/app/core/finetune/evaluation.py b/backend/app/core/finetune/evaluation.py
@@ -11,7 +11,6 @@
     matthews_corrcoef,
 )
 from app.core.cloud import AmazonCloudStorage
-from app.api.routes.fine_tuning import handle_openai_error
 from app.core.finetune.preprocessing import DataPreprocessor
 
 
@@ -151,7 +150,7 @@ def generate_predictions(self) -> tuple[list[str], str]:
                     break
 
                 except openai.OpenAIError as e:
-                    error_msg = handle_openai_error(e)
+                    error_msg = str(e)
                     logger.error(
                         f"[generate_predictions] OpenAI API error at prompt {idx}/{total_prompts}: {error_msg}"
                     )

diff --git a/backend/app/models/fine_tuning.py b/backend/app/models/fine_tuning.py
@@ -15,6 +15,7 @@ class FineTuningStatus(str, Enum):
     running = "running"
     completed = "completed"
     failed = "failed"
+    cancelled = "cancelled"
 
 
 class FineTuningJobBase(SQLModel):