diff --git a/backend/app/alembic/versions/f2589428c1d0_change_vector_store_id_to_vector_store_ids.py b/backend/app/alembic/versions/f2589428c1d0_change_vector_store_id_to_vector_store_ids.py new file mode 100644 index 00000000..f8c49fad --- /dev/null +++ b/backend/app/alembic/versions/f2589428c1d0_change_vector_store_id_to_vector_store_ids.py @@ -0,0 +1,58 @@ +"""Change vector_store_id to vector_store_ids in openai_assistant table + +Revision ID: f2589428c1d0 +Revises: 3389c67fdcb4 +Create Date: 2025-07-10 11:18:21.223114 + +""" +from alembic import op +import sqlalchemy as sa +from sqlalchemy.dialects import postgresql + +# revision identifiers, used by Alembic. +revision = "f2589428c1d0" +down_revision = "3389c67fdcb4" +branch_labels = None +depends_on = None + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.add_column( + "openai_assistant", + sa.Column("vector_store_ids", postgresql.ARRAY(sa.String()), nullable=True), + ) + + op.execute( + """ + UPDATE openai_assistant + SET vector_store_ids = ARRAY[vector_store_id] + WHERE vector_store_id IS NOT NULL + """ + ) + + op.drop_column("openai_assistant", "vector_store_id") + # ### end Alembic commands ### + + +def downgrade(): + # Add back the single vector_store_id column as nullable for safe data migration + op.add_column( + "openai_assistant", + sa.Column( + "vector_store_id", + sa.VARCHAR(length=255), + autoincrement=False, + nullable=True, # Allow nulls temporarily for safe migration + ), + ) + + op.execute( + """ + UPDATE openai_assistant + SET vector_store_id = vector_store_ids[1] + WHERE vector_store_ids IS NOT NULL AND array_length(vector_store_ids, 1) > 0 + """ + ) + + op.drop_column("openai_assistant", "vector_store_ids") diff --git a/backend/app/api/main.py b/backend/app/api/main.py index e6a74971..7db3c3d5 100644 --- a/backend/app/api/main.py +++ b/backend/app/api/main.py @@ -2,6 +2,7 @@ from app.api.routes import ( api_keys, + assistants, collections, documents, login, @@ -20,6 +21,7 @@ api_router = APIRouter() api_router.include_router(api_keys.router) +api_router.include_router(assistants.router) api_router.include_router(collections.router) api_router.include_router(credentials.router) api_router.include_router(documents.router) diff --git a/backend/app/api/routes/assistants.py b/backend/app/api/routes/assistants.py new file mode 100644 index 00000000..35f66146 --- /dev/null +++ b/backend/app/api/routes/assistants.py @@ -0,0 +1,43 @@ +from typing import Annotated + +from fastapi import APIRouter, Depends, Path +from sqlmodel import Session + +from app.api.deps import get_db, get_current_user_org_project +from app.crud import ( + fetch_assistant_from_openai, + sync_assistant, +) +from app.models import UserProjectOrg +from app.utils import APIResponse, get_openai_client + +router = APIRouter(prefix="/assistant", tags=["Assistants"]) + + +@router.post( + "/{assistant_id}/ingest", + response_model=APIResponse, + status_code=201, +) +def ingest_assistant_route( + assistant_id: Annotated[str, Path(description="The ID of the assistant to ingest")], + session: Session = Depends(get_db), + current_user: UserProjectOrg = Depends(get_current_user_org_project), +): + """ + Ingest an assistant from OpenAI and store it in the platform. + """ + + client = get_openai_client( + session, current_user.organization_id, current_user.project_id + ) + + openai_assistant = fetch_assistant_from_openai(assistant_id, client) + assistant = sync_assistant( + session=session, + organization_id=current_user.organization_id, + project_id=current_user.project_id, + openai_assistant=openai_assistant, + ) + + return APIResponse.success_response(assistant) diff --git a/backend/app/api/routes/responses.py b/backend/app/api/routes/responses.py index 233cab35..50121fe0 100644 --- a/backend/app/api/routes/responses.py +++ b/backend/app/api/routes/responses.py @@ -125,11 +125,11 @@ def process_response( "input": [{"role": "user", "content": request.question}], } - if assistant.vector_store_id: + if assistant.vector_store_ids: params["tools"] = [ { "type": "file_search", - "vector_store_ids": [assistant.vector_store_id], + "vector_store_ids": assistant.vector_store_ids, "max_num_results": assistant.max_num_results, } ] diff --git a/backend/app/crud/__init__.py b/backend/app/crud/__init__.py index 5c6fd102..6ec996c2 100644 --- a/backend/app/crud/__init__.py +++ b/backend/app/crud/__init__.py @@ -5,6 +5,7 @@ update_user, ) from .collection import CollectionCrud + from .document import DocumentCrud from .document_collection import DocumentCollectionCrud @@ -43,4 +44,8 @@ from .thread_results import upsert_thread_result, get_thread_result -from .assistants import get_assistant_by_id +from .assistants import ( + get_assistant_by_id, + fetch_assistant_from_openai, + sync_assistant, +) diff --git a/backend/app/crud/assistants.py b/backend/app/crud/assistants.py index 5025c617..3f040508 100644 --- a/backend/app/crud/assistants.py +++ b/backend/app/crud/assistants.py @@ -1,8 +1,17 @@ -from typing import Optional, List, Tuple -from sqlmodel import Session, select, and_ +import logging + +from typing import Optional + +import openai +from fastapi import HTTPException +from openai import OpenAI +from openai.types.beta import Assistant as OpenAIAssistant +from sqlmodel import Session, and_, select -from app.core.util import now from app.models import Assistant +from app.utils import mask_string + +logger = logging.getLogger(__name__) def get_assistant_by_id( @@ -16,3 +25,89 @@ def get_assistant_by_id( ) ) return session.exec(statement).first() + + +def fetch_assistant_from_openai(assistant_id: str, client: OpenAI) -> OpenAIAssistant: + """ + Fetch an assistant from OpenAI. + Returns OpenAI Assistant model. + """ + + try: + assistant = client.beta.assistants.retrieve(assistant_id=assistant_id) + return assistant + except openai.NotFoundError as e: + logger.error( + f"[fetch_assistant_from_openai] Assistant not found: {mask_string(assistant_id)} | {e}" + ) + raise HTTPException(status_code=404, detail="Assistant not found in OpenAI.") + except openai.OpenAIError as e: + logger.error( + f"[fetch_assistant_from_openai] OpenAI API error while retrieving assistant {mask_string(assistant_id)}: {e}" + ) + raise HTTPException(status_code=502, detail=f"OpenAI API error: {e}") + + +def sync_assistant( + session: Session, + organization_id: int, + project_id: int, + openai_assistant: OpenAIAssistant, +) -> Assistant: + """ + Insert an assistant into the database by converting OpenAI Assistant to local Assistant model. + """ + assistant_id = openai_assistant.id + + existing_assistant = get_assistant_by_id(session, assistant_id, organization_id) + if existing_assistant: + logger.info( + f"[sync_assistant] Assistant with ID {mask_string(assistant_id)} already exists in the database." + ) + raise HTTPException( + status_code=409, + detail=f"Assistant with ID {assistant_id} already exists.", + ) + + if not openai_assistant.instructions: + raise HTTPException( + status_code=400, + detail="Assistant has no instruction.", + ) + + vector_store_ids = [] + if openai_assistant.tool_resources and hasattr( + openai_assistant.tool_resources, "file_search" + ): + file_search = openai_assistant.tool_resources.file_search + if file_search and hasattr(file_search, "vector_store_ids"): + vector_store_ids = file_search.vector_store_ids or [] + + max_num_results = 20 + for tool in openai_assistant.tools or []: + if tool.type == "file_search": + file_search = getattr(tool, "file_search", None) + if file_search and hasattr(file_search, "max_num_results"): + max_num_results = file_search.max_num_results + break + + db_assistant = Assistant( + assistant_id=openai_assistant.id, + name=openai_assistant.name or openai_assistant.id, + instructions=openai_assistant.instructions, + model=openai_assistant.model, + vector_store_ids=vector_store_ids, + temperature=openai_assistant.temperature or 0.1, + max_num_results=max_num_results, + project_id=project_id, + organization_id=organization_id, + ) + + session.add(db_assistant) + session.commit() + session.refresh(db_assistant) + + logger.info( + f"[sync_assistant] Successfully ingested assistant with ID {mask_string(assistant_id)}." + ) + return db_assistant diff --git a/backend/app/models/assistants.py b/backend/app/models/assistants.py index 4163297d..5647455d 100644 --- a/backend/app/models/assistants.py +++ b/backend/app/models/assistants.py @@ -1,6 +1,8 @@ from datetime import datetime from typing import Optional, List from sqlmodel import Field, Relationship, SQLModel +from sqlalchemy import Column, String +from sqlalchemy.dialects.postgresql import ARRAY from app.core.util import now @@ -10,7 +12,9 @@ class AssistantBase(SQLModel): name: str instructions: str model: str - vector_store_id: str + vector_store_ids: List[str] = Field( + default_factory=list, sa_column=Column(ARRAY(String)) + ) temperature: float = 0.1 max_num_results: int = 20 project_id: int = Field(foreign_key="project.id") diff --git a/backend/app/seed_data/seed_data.json b/backend/app/seed_data/seed_data.json index 8fb40af5..0ad39f5c 100644 --- a/backend/app/seed_data/seed_data.json +++ b/backend/app/seed_data/seed_data.json @@ -57,23 +57,42 @@ { "is_active": true, "provider": "openai", - "credential": "{\"openai\": {\"api_key\": \"sk-proj-YxK21qI3i5SCxN\"}}", + "credential": "{\"api_key\": \"sk-proj-GlificI3i5SCxN\"}", "project_name": "Glific", "organization_name": "Project Tech4dev", "deleted_at": null + }, + { + "is_active": true, + "provider": "openai", + "credential": "{\"api_key\": \"sk-proj-DalgoI3i5SCxN\"}", + "project_name": "Dalgo", + "organization_name": "Project Tech4dev", + "deleted_at": null } ], "assistants": [ { - "assistant_id": "assistant_123", - "name": "Test Assistant", + "assistant_id": "assistant_glific", + "name": "Test Assistant Glific", "instructions": "Test instructions", "model": "gpt-4o", - "vector_store_id": "vs_123", + "vector_store_ids": ["vs_glific"], "temperature": 0.1, "max_num_results": 20, "project_name": "Glific", "organization_name": "Project Tech4dev" + }, + { + "assistant_id": "assistant_dalgo", + "name": "Test Assistant Dalgo", + "instructions": "Test instructions", + "model": "gpt-4o", + "vector_store_ids": ["vs_dalgo"], + "temperature": 0.1, + "max_num_results": 20, + "project_name": "Dalgo", + "organization_name": "Project Tech4dev" } ] } diff --git a/backend/app/seed_data/seed_data.py b/backend/app/seed_data/seed_data.py index 059c0794..f8a1c2ea 100644 --- a/backend/app/seed_data/seed_data.py +++ b/backend/app/seed_data/seed_data.py @@ -57,7 +57,7 @@ class AssistantData(BaseModel): name: str instructions: str model: str - vector_store_id: str + vector_store_ids: list[str] temperature: float max_num_results: int project_name: str @@ -261,7 +261,7 @@ def create_assistant(session: Session, assistant_data_raw: dict) -> Assistant: name=assistant_data.name, instructions=assistant_data.instructions, model=assistant_data.model, - vector_store_id=assistant_data.vector_store_id, + vector_store_ids=assistant_data.vector_store_ids, temperature=assistant_data.temperature, max_num_results=assistant_data.max_num_results, organization_id=organization.id, diff --git a/backend/app/tests/api/routes/test_assistants.py b/backend/app/tests/api/routes/test_assistants.py new file mode 100644 index 00000000..936e0c07 --- /dev/null +++ b/backend/app/tests/api/routes/test_assistants.py @@ -0,0 +1,31 @@ +import pytest +from fastapi.testclient import TestClient +from unittest.mock import patch +from app.tests.utils.openai import mock_openai_assistant + + +@pytest.fixture +def normal_user_api_key_header(): + return {"X-API-KEY": "ApiKey Px8y47B6roJHin1lWLkR88eiDrFdXSJRZmFQazzai8j9"} + + +@patch("app.api.routes.assistants.fetch_assistant_from_openai") +def test_ingest_assistant_success( + mock_fetch_assistant, + client: TestClient, + normal_user_api_key_header: str, +): + """Test successful assistant ingestion from OpenAI.""" + mock_assistant = mock_openai_assistant() + + mock_fetch_assistant.return_value = mock_assistant + + response = client.post( + f"/api/v1/assistant/{mock_assistant.id}/ingest", + headers=normal_user_api_key_header, + ) + + assert response.status_code == 201 + response_json = response.json() + assert response_json["success"] is True + assert response_json["data"]["assistant_id"] == mock_assistant.id diff --git a/backend/app/tests/api/routes/test_responses.py b/backend/app/tests/api/routes/test_responses.py index e24b7a4d..cec03e26 100644 --- a/backend/app/tests/api/routes/test_responses.py +++ b/backend/app/tests/api/routes/test_responses.py @@ -49,7 +49,7 @@ def test_responses_endpoint_success( headers = {"X-API-KEY": original_api_key} request_data = { - "assistant_id": "assistant_123", + "assistant_id": "assistant_glific", "question": "What is Glific?", "callback_url": "http://example.com/callback", } @@ -80,7 +80,7 @@ def test_responses_endpoint_without_vector_store( mock_assistant.model = "gpt-4" mock_assistant.instructions = "Test instructions" mock_assistant.temperature = 0.1 - mock_assistant.vector_store_id = None # No vector store configured + mock_assistant.vector_store_ids = [] # No vector store configured mock_get_assistant.return_value = mock_assistant # Setup mock OpenAI client diff --git a/backend/app/tests/crud/test_assistants.py b/backend/app/tests/crud/test_assistants.py new file mode 100644 index 00000000..585aa025 --- /dev/null +++ b/backend/app/tests/crud/test_assistants.py @@ -0,0 +1,115 @@ +import pytest +from sqlmodel import Session +from fastapi import HTTPException + +from app.tests.utils.openai import mock_openai_assistant +from app.tests.utils.utils import get_project +from app.crud.assistants import sync_assistant + + +class TestAssistant: + def test_sync_assistant_success(self, db: Session): + project = get_project(db) + openai_assistant = mock_openai_assistant( + assistant_id="asst_success", + vector_store_ids=["vs_1", "vs_2"], + max_num_results=20, + ) + + result = sync_assistant( + db, project.organization_id, project.id, openai_assistant + ) + + assert result.assistant_id == openai_assistant.id + assert result.project_id == project.id + assert result.organization_id == project.organization_id + assert result.name == openai_assistant.name + assert result.instructions == openai_assistant.instructions + assert result.model == openai_assistant.model + assert result.vector_store_ids == ["vs_1", "vs_2"] + assert result.temperature == openai_assistant.temperature + assert result.max_num_results == 20 + + def test_sync_assistant_already_exists(self, db: Session): + project = get_project(db) + openai_assistant = mock_openai_assistant( + assistant_id="asst_exists", + ) + + sync_assistant(db, project.organization_id, project.id, openai_assistant) + + with pytest.raises(HTTPException) as exc_info: + sync_assistant(db, project.organization_id, project.id, openai_assistant) + + assert exc_info.value.status_code == 409 + assert "already exists" in exc_info.value.detail + + def test_sync_assistant_no_instructions(self, db: Session): + project = get_project(db) + openai_assistant = mock_openai_assistant( + assistant_id="asst_no_instructions", + ) + openai_assistant.instructions = None + + with pytest.raises(HTTPException) as exc_info: + sync_assistant(db, project.organization_id, project.id, openai_assistant) + + assert exc_info.value.status_code == 400 + assert "no instruction" in exc_info.value.detail + + def test_sync_assistant_no_name(self, db: Session): + project = get_project(db) + openai_assistant = mock_openai_assistant( + assistant_id="asst_no_name", + ) + openai_assistant.name = None + + result = sync_assistant( + db, project.organization_id, project.id, openai_assistant + ) + + assert result.name == openai_assistant.id + assert result.assistant_id == openai_assistant.id + assert result.project_id == project.id + + def test_sync_assistant_no_vector_stores(self, db: Session): + project = get_project(db) + openai_assistant = mock_openai_assistant( + assistant_id="asst_no_vectors", vector_store_ids=None + ) + + result = sync_assistant( + db, project.organization_id, project.id, openai_assistant + ) + + assert result.vector_store_ids == [] + assert result.assistant_id == openai_assistant.id + assert result.project_id == project.id + + def test_sync_assistant_no_tools(self, db: Session): + project = get_project(db) + openai_assistant = mock_openai_assistant(assistant_id="asst_no_tools") + + openai_assistant.tool_resources = None + result = sync_assistant( + db, project.organization_id, project.id, openai_assistant + ) + + assert result.vector_store_ids == [] # Default value + assert result.assistant_id == openai_assistant.id + assert result.project_id == project.id + + def test_sync_assistant_no_tool_resources(self, db: Session): + project = get_project(db) + openai_assistant = mock_openai_assistant( + assistant_id="asst_no_tool_resources", + ) + openai_assistant.tools = None + + result = sync_assistant( + db, project.organization_id, project.id, openai_assistant + ) + + assert result.max_num_results == 20 + assert result.assistant_id == openai_assistant.id + assert result.project_id == project.id diff --git a/backend/app/tests/utils/openai.py b/backend/app/tests/utils/openai.py new file mode 100644 index 00000000..778d4804 --- /dev/null +++ b/backend/app/tests/utils/openai.py @@ -0,0 +1,39 @@ +from typing import Optional +import time + +from openai.types.beta import Assistant as OpenAIAssistant +from openai.types.beta.assistant import ToolResources, ToolResourcesFileSearch +from openai.types.beta.assistant_tool import FileSearchTool +from openai.types.beta.file_search_tool import FileSearch + + +def mock_openai_assistant( + assistant_id: str = "assistant_mock", + vector_store_ids: Optional[list[str]] = ["vs_1", "vs_2"], + max_num_results: int = 30, +) -> OpenAIAssistant: + return OpenAIAssistant( + id=assistant_id, + created_at=int(time.time()), + description="Mock description", + instructions="Mock instructions", + metadata={}, + model="gpt-4o", + name="Mock Assistant", + object="assistant", + tools=[ + FileSearchTool( + type="file_search", + file_search=FileSearch( + max_num_results=max_num_results, + ), + ) + ], + temperature=1.0, + tool_resources=ToolResources( + code_interpreter=None, + file_search=ToolResourcesFileSearch(vector_store_ids=vector_store_ids), + ), + top_p=1.0, + reasoning_effort=None, + ) diff --git a/backend/app/tests/utils/utils.py b/backend/app/tests/utils/utils.py index a9eefe45..d4812424 100644 --- a/backend/app/tests/utils/utils.py +++ b/backend/app/tests/utils/utils.py @@ -1,18 +1,16 @@ import random import string from uuid import UUID -from typing import List +from typing import Type, TypeVar import pytest from fastapi.testclient import TestClient from sqlmodel import Session, select -from typing import Type, TypeVar from app.core.config import settings from app.crud.user import get_user_by_email -from app.models import APIKeyPublic, Credential -from app.crud import create_api_key, get_api_key_by_value -from uuid import uuid4 +from app.crud.api_key import get_api_key_by_value +from app.models import APIKeyPublic, Project T = TypeVar("T") @@ -65,6 +63,28 @@ def get_non_existent_id(session: Session, model: Type[T]) -> int: return (result or 0) + 1 +def get_project(session: Session, name: str | None = None) -> Project: + """ + Retrieve an active project from the database. + + If a project name is provided, fetch the active project with that name. + If no name is provided, fetch any random project. + """ + if name: + statement = ( + select(Project).where(Project.name == name, Project.is_active).limit(1) + ) + else: + statement = select(Project).where(Project.is_active).limit(1) + + project = session.exec(statement).first() + + if not project: + raise ValueError("No active projects found") + + return project + + class SequentialUuidGenerator: def __init__(self, start=0): self.start = start diff --git a/backend/app/utils.py b/backend/app/utils.py index b7f7f793..8f5be811 100644 --- a/backend/app/utils.py +++ b/backend/app/utils.py @@ -5,16 +5,18 @@ from pathlib import Path from typing import Any, Dict, Generic, Optional, TypeVar -import emails # type: ignore import jwt +import emails from jinja2 import Template from jwt.exceptions import InvalidTokenError +from fastapi import HTTPException +from openai import OpenAI +from pydantic import BaseModel +from sqlmodel import Session from app.core import security from app.core.config import settings - -from typing import Generic, Optional, TypeVar -from pydantic import BaseModel +from app.crud.credentials import get_provider_credential logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) @@ -163,6 +165,32 @@ def mask_string(value: str, mask_char: str = "*") -> str: return value[:start] + (mask_char * num_mask) + value[end:] +def get_openai_client(session: Session, org_id: int, project_id: int) -> OpenAI: + """ + Fetch OpenAI credentials for the current org/project and return a configured client. + """ + credentials = get_provider_credential( + session=session, + org_id=org_id, + provider="openai", + project_id=project_id, + ) + + if not credentials or "api_key" not in credentials: + raise HTTPException( + status_code=400, + detail="OpenAI credentials not configured for this organization/project.", + ) + + try: + return OpenAI(api_key=credentials["api_key"]) + except Exception as e: + raise HTTPException( + status_code=500, + detail=f"Failed to configure OpenAI client: {str(e)}", + ) + + @ft.singledispatch def load_description(filename: Path) -> str: if not filename.exists():