From d46e0284c14c5ce39a9402b40cead25ed493e59d Mon Sep 17 00:00:00 2001 From: Aviraj <100823015+avirajsingh7@users.noreply.github.com> Date: Tue, 21 Oct 2025 08:38:16 +0530 Subject: [PATCH 01/15] intial commit v1 llm --- .../219033c644de_add_llm_im_jobs_table.py | 24 +++ backend/app/api/main.py | 2 + backend/app/api/routes/llm.py | 48 +++++ backend/app/models/__init__.py | 10 + backend/app/models/job.py | 1 + backend/app/models/llm/__init__.py | 19 ++ backend/app/models/llm/call.py | 75 +++++++ backend/app/services/llm/__init__.py | 23 +++ backend/app/services/llm/base_provider.py | 100 ++++++++++ backend/app/services/llm/jobs.py | 135 +++++++++++++ backend/app/services/llm/llm_service.py | 83 ++++++++ backend/app/services/llm/openai_provider.py | 185 ++++++++++++++++++ backend/app/services/llm/provider_factory.py | 89 +++++++++ 13 files changed, 794 insertions(+) create mode 100644 backend/app/alembic/versions/219033c644de_add_llm_im_jobs_table.py create mode 100644 backend/app/api/routes/llm.py create mode 100644 backend/app/models/llm/__init__.py create mode 100644 backend/app/models/llm/call.py create mode 100644 backend/app/services/llm/__init__.py create mode 100644 backend/app/services/llm/base_provider.py create mode 100644 backend/app/services/llm/jobs.py create mode 100644 backend/app/services/llm/llm_service.py create mode 100644 backend/app/services/llm/openai_provider.py create mode 100644 backend/app/services/llm/provider_factory.py diff --git a/backend/app/alembic/versions/219033c644de_add_llm_im_jobs_table.py b/backend/app/alembic/versions/219033c644de_add_llm_im_jobs_table.py new file mode 100644 index 00000000..404d00e4 --- /dev/null +++ b/backend/app/alembic/versions/219033c644de_add_llm_im_jobs_table.py @@ -0,0 +1,24 @@ +"""Add LLM in jobs table + +Revision ID: 219033c644de +Revises: e7c68e43ce6f +Create Date: 2025-10-17 15:38:33.565674 + +""" +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = '219033c644de' +down_revision = 'e7c68e43ce6f' +branch_labels = None +depends_on = None + + +def upgrade(): + op.execute("ALTER TYPE jobtype ADD VALUE IF NOT EXISTS 'LLM_API'") + + +def downgrade(): + pass diff --git a/backend/app/api/main.py b/backend/app/api/main.py index 01892168..62d5db5b 100644 --- a/backend/app/api/main.py +++ b/backend/app/api/main.py @@ -7,6 +7,7 @@ documents, doc_transformation_job, login, + llm, organization, openai_conversation, project, @@ -31,6 +32,7 @@ api_router.include_router(credentials.router) api_router.include_router(documents.router) api_router.include_router(doc_transformation_job.router) +api_router.include_router(llm.router) api_router.include_router(login.router) api_router.include_router(onboarding.router) api_router.include_router(openai_conversation.router) diff --git a/backend/app/api/routes/llm.py b/backend/app/api/routes/llm.py new file mode 100644 index 00000000..513d6204 --- /dev/null +++ b/backend/app/api/routes/llm.py @@ -0,0 +1,48 @@ +import logging + +from fastapi import APIRouter, Depends +from sqlmodel import Session + +from app.api.deps import AuthContext, SessionDep +from app.models.llm import LLMCallRequest +from app.services.llm.jobs import start_job +from app.utils import APIResponse + +logger = logging.getLogger(__name__) +router = APIRouter(tags=["llm"]) + + +@router.post("/llm/call") +async def llm_call( + request: LLMCallRequest, + _session: SessionDep, + _current_user: AuthContext +): + """ + Endpoint to initiate an LLM call as a background job. + """ + project_id = _current_user.project.id + organization_id = _current_user.organization.id + + logger.info( + f"[llm_call] Scheduling LLM call for provider: {request.llm.llm_model_spec.provider}, " + f"model: {request.llm.llm_model_spec.model}, " + f"project_id: {project_id}, org_id: {organization_id}" + ) + + # Start background job + job_id = start_job( + db=_session, + request=request, + project_id=project_id, + organization_id=organization_id, + ) + + logger.info( + f"[llm_call] LLM call job scheduled successfully | job_id={job_id}, " + f"project_id={project_id}" + ) + + return APIResponse.success_response( + data={"status": "processing", "message": "LLM call job scheduled"}, + ) diff --git a/backend/app/models/__init__.py b/backend/app/models/__init__.py index 15b61428..82d55566 100644 --- a/backend/app/models/__init__.py +++ b/backend/app/models/__init__.py @@ -48,6 +48,16 @@ from .job import Job, JobType, JobStatus, JobUpdate +from .llm import ( + LLMCallRequest, + LLMCallResponse, + LLMConfig, + LLMModelSpec, + ProviderType, + ReasoningConfig, + TextConfig, +) + from .message import Message from .model_evaluation import ( ModelEvaluation, diff --git a/backend/app/models/job.py b/backend/app/models/job.py index 4ddbd3b3..62851f5f 100644 --- a/backend/app/models/job.py +++ b/backend/app/models/job.py @@ -15,6 +15,7 @@ class JobStatus(str, Enum): class JobType(str, Enum): RESPONSE = "RESPONSE" + LLM_API = "LLM_API" class Job(SQLModel, table=True): diff --git a/backend/app/models/llm/__init__.py b/backend/app/models/llm/__init__.py new file mode 100644 index 00000000..f05d125f --- /dev/null +++ b/backend/app/models/llm/__init__.py @@ -0,0 +1,19 @@ +from .call import ( + LLMCallRequest, + LLMCallResponse, + LLMConfig, + LLMModelSpec, + ProviderType, + ReasoningConfig, + TextConfig, +) + +__all__ = [ + "LLMCallRequest", + "LLMCallResponse", + "LLMConfig", + "LLMModelSpec", + "ProviderType", + "ReasoningConfig", + "TextConfig", +] diff --git a/backend/app/models/llm/call.py b/backend/app/models/llm/call.py new file mode 100644 index 00000000..3fff487e --- /dev/null +++ b/backend/app/models/llm/call.py @@ -0,0 +1,75 @@ +from typing import Optional, Literal +from sqlmodel import SQLModel + + +# Supported LLM providers +ProviderType = Literal["openai", "anthropic", "google", "azure"] + + +class ReasoningConfig(SQLModel): + """Configuration for reasoning parameters (e.g., o-series models).""" + + effort: str # "low", "medium", "high" + + +class TextConfig(SQLModel): + """Configuration for text generation parameters.""" + + verbosity: str # "low", "medium", "high" + + +class LLMModelSpec(SQLModel): + """Specification for the LLM model and its parameters. + + This contains the actual model configuration that will be sent to the provider. + Supports both standard OpenAI models and advanced configurations. + """ + + model: str + provider: ProviderType = "openai" + temperature: Optional[float] = None + reasoning: Optional[ReasoningConfig] = None + text: Optional[TextConfig] = None + max_tokens: Optional[int] = None + top_p: Optional[float] = None + + +class LLMConfig(SQLModel): + """LLM configuration containing model specification. + + This wraps the model spec and can be extended with additional + provider-agnostic configuration in the future. + """ + prompt: str + vector_store_id: Optional[str] = None + llm_model_spec: LLMModelSpec + + +class LLMCallRequest(SQLModel): + """Request model for /v1/llm/call endpoint. + + This model decouples LLM calls from the assistants table, + allowing dynamic configuration per request. + + Structure: + - llm: LLMConfig (contains model_spec) + - prompt: The user's input + - vector_store_id: Optional vector store for RAG + - max_num_results: Number of results from vector store + """ + + llm: LLMConfig + max_num_results: int = 20 # For vector store file search + + +class LLMCallResponse(SQLModel): + """Response model for /v1/llm/call endpoint.""" + + status: str + response_id: str + message: str + model: str + input_tokens: int + output_tokens: int + total_tokens: int + file_search_results: Optional[list[dict]] = None diff --git a/backend/app/services/llm/__init__.py b/backend/app/services/llm/__init__.py new file mode 100644 index 00000000..c1b0b238 --- /dev/null +++ b/backend/app/services/llm/__init__.py @@ -0,0 +1,23 @@ +"""LLM services module. + +This module provides a provider-agnostic interface for executing LLM calls +through various providers (OpenAI, Anthropic, Google, etc.). + +Key components: +- BaseProvider: Abstract base class for all providers +- OpenAIProvider: OpenAI implementation +- ProviderFactory: Factory for creating provider instances +- execute_llm_call: Main entry point for LLM calls +""" + +from app.services.llm.base_provider import BaseProvider +from app.services.llm.llm_service import execute_llm_call +from app.services.llm.openai_provider import OpenAIProvider +from app.services.llm.provider_factory import ProviderFactory + +__all__ = [ + "BaseProvider", + "OpenAIProvider", + "ProviderFactory", + "execute_llm_call", +] diff --git a/backend/app/services/llm/base_provider.py b/backend/app/services/llm/base_provider.py new file mode 100644 index 00000000..39c75d57 --- /dev/null +++ b/backend/app/services/llm/base_provider.py @@ -0,0 +1,100 @@ +"""Base provider interface for LLM providers. + +This module defines the abstract base class that all LLM providers must implement. +It provides a provider-agnostic interface for executing LLM calls. +""" + +from abc import ABC, abstractmethod +from typing import Any + +from app.models.llm import LLMCallRequest, LLMCallResponse + + +class BaseProvider(ABC): + """Abstract base class for LLM providers. + + All provider implementations (OpenAI, Anthropic, etc.) must inherit from + this class and implement the required methods. + + Attributes: + client: The provider-specific client instance + """ + + def __init__(self, client: Any): + """Initialize the provider with client. + + Args: + client: Provider-specific client (e.g., OpenAI, Anthropic client) + """ + self.client = client + + @abstractmethod + def execute( + self, request: LLMCallRequest + ) -> tuple[LLMCallResponse | None, str | None]: + """Execute an LLM call using the provider. + + This is the main method that must be implemented by all providers. + It should handle the complete lifecycle of an LLM request: + 1. Build provider-specific parameters from the request + 2. Make the API call to the provider + 3. Extract results (including any additional features like RAG) + 4. Return standardized response + + Args: + request: LLM call request with configuration + + Returns: + Tuple of (response, error_message) + - If successful: (LLMCallResponse, None) + - If failed: (None, error_message) + + Raises: + NotImplementedError: If the provider hasn't implemented this method + """ + raise NotImplementedError("Providers must implement execute method") + + @abstractmethod + def build_params(self, request: LLMCallRequest) -> dict[str, Any]: + """Build provider-specific API parameters from the request. + + Convert the generic LLMCallRequest into provider-specific parameters. + This includes handling model names, temperature, tokens, and any + provider-specific features. + + Args: + request: LLM call request with configuration + + Returns: + Dictionary of provider-specific parameters + + Raises: + NotImplementedError: If the provider hasn't implemented this method + """ + raise NotImplementedError("Providers must implement build_params method") + + @abstractmethod + def supports_feature(self, feature: str) -> bool: + """Check if the provider supports a specific feature. + + Features might include: "reasoning", "text_config", "file_search", + "streaming", "function_calling", etc. + + Args: + feature: Feature name to check + + Returns: + True if the feature is supported, False otherwise + + Raises: + NotImplementedError: If the provider hasn't implemented this method + """ + raise NotImplementedError("Providers must implement supports_feature method") + + def get_provider_name(self) -> str: + """Get the name of the provider. + + Returns: + Provider name (e.g., "openai", "anthropic", "google") + """ + return self.__class__.__name__.replace("Provider", "").lower() diff --git a/backend/app/services/llm/jobs.py b/backend/app/services/llm/jobs.py new file mode 100644 index 00000000..a0cbf21c --- /dev/null +++ b/backend/app/services/llm/jobs.py @@ -0,0 +1,135 @@ +import logging +from uuid import UUID + +from fastapi import HTTPException +from sqlmodel import Session +from asgi_correlation_id import correlation_id + +from app.celery.utils import start_high_priority_job +from app.crud import JobCrud +from app.core.db import engine + +from app.models import JobType, JobStatus, JobUpdate +from app.models.llm import LLMCallRequest, LLMCallResponse +from app.services.llm import execute_llm_call +from app.utils import get_openai_client + +logger = logging.getLogger(__name__) + + +def start_job( + db: Session, request: LLMCallRequest, project_id: int, organization_id: int +) -> UUID: + """Create an LLM job and schedule Celery task.""" + trace_id = correlation_id.get() or "N/A" + job_crud = JobCrud(session=db) + job = job_crud.create(job_type=JobType.LLM_API, trace_id=trace_id) + + try: + task_id = start_high_priority_job( + function_path="app.services.llm.jobs.execute_job", + project_id=project_id, + job_id=str(job.id), + trace_id=trace_id, + request_data=request.model_dump(), + organization_id=organization_id, + ) + except Exception as e: + logger.error( + f"[start_job] Error starting Celery task: {str(e)} | job_id={job.id}, project_id={project_id}", + exc_info=True, + ) + job_update = JobUpdate(status=JobStatus.FAILED, error_message=str(e)) + job_crud.update(job_id=job.id, job_update=job_update) + raise HTTPException( + status_code=500, detail="Internal server error while executing LLM call" + ) + + logger.info( + f"[start_job] Job scheduled for LLM call | job_id={job.id}, project_id={project_id}, task_id={task_id}" + ) + return job.id + + +def execute_job( + request_data: dict, + project_id: int, + organization_id: int, + job_id: str, + task_id: str, + task_instance, +) -> LLMCallResponse | None: + """Celery task to process an LLM request asynchronously.""" + request = LLMCallRequest(**request_data) + job_id_uuid = UUID(job_id) + + logger.info( + f"[execute_job] Starting LLM job execution | job_id={job_id}, task_id={task_id}, " + f"provider={request.llm.provider}, model={request.llm.llm_model_spec.model}" + ) + + try: + # Update job status to PROCESSING + with Session(engine) as session: + job_crud = JobCrud(session=session) + job_crud.update( + job_id=job_id_uuid, job_update=JobUpdate(status=JobStatus.PROCESSING) + ) + + provider_type = request.llm.llm_model_spec.provider + + if provider_type == "openai": + client = get_openai_client(session, organization_id, project_id) + else: + error_msg = f"Provider '{provider_type}' is not yet supported" + logger.error(f"[execute_job] {error_msg} | job_id={job_id}") + job_crud = JobCrud(session=session) + job_crud.update( + job_id=job_id_uuid, + job_update=JobUpdate( + status=JobStatus.FAILED, error_message=error_msg + ), + ) + return None + + response, error = execute_llm_call( + request=request, + client=client, + ) + + with Session(engine) as session: + job_crud = JobCrud(session=session) + if response: + job_crud.update( + job_id=job_id_uuid, job_update=JobUpdate(status=JobStatus.SUCCESS) + ) + logger.info( + f"[execute_job] Successfully completed LLM job | job_id={job_id}, " + f"response_id={response.response_id}, tokens={response.total_tokens}" + ) + return response + else: + job_crud.update( + job_id=job_id_uuid, + job_update=JobUpdate( + status=JobStatus.FAILED, + error_message=error or "Unknown error occurred", + ), + ) + logger.error( + f"[execute_job] Failed to execute LLM job | job_id={job_id}, error={error}" + ) + return None + + except Exception as e: + error_message = f"Unexpected error in LLM job execution: {str(e)}" + logger.error( + f"[execute_job] {error_message} | job_id={job_id}", exc_info=True + ) + with Session(engine) as session: + job_crud = JobCrud(session=session) + job_crud.update( + job_id=job_id_uuid, + job_update=JobUpdate(status=JobStatus.FAILED, error_message=str(e)), + ) + raise diff --git a/backend/app/services/llm/llm_service.py b/backend/app/services/llm/llm_service.py new file mode 100644 index 00000000..62ef08bc --- /dev/null +++ b/backend/app/services/llm/llm_service.py @@ -0,0 +1,83 @@ +"""Main LLM service orchestration. + +This module provides the main entry point for executing LLM calls. +It uses the provider factory pattern to route requests to the appropriate +provider implementation (OpenAI, Anthropic, etc.). +""" + +import logging +from typing import Any + +from app.models.llm import LLMCallRequest, LLMCallResponse +from app.services.llm.provider_factory import ProviderFactory + +logger = logging.getLogger(__name__) + + +def execute_llm_call( + request: LLMCallRequest, + client: Any, +) -> tuple[LLMCallResponse | None, str | None]: + """Execute LLM call using the appropriate provider. + + This is the main orchestration function that routes requests to + provider-specific implementations. It uses the provider factory + to instantiate the correct provider based on the request configuration. + + The function is designed to be provider-agnostic, supporting multiple + LLM providers (OpenAI, Anthropic, Google, etc.) through a unified interface. + + Args: + request: LLM call request with configuration (includes provider type) + client: Provider-specific client instance + + Returns: + Tuple of (response, error_message) + - If successful: (LLMCallResponse, None) + - If failed: (None, error_message) + + Example: + >>> request = LLMCallRequest( + ... llm=LLMConfig( + ... provider="openai", + ... prompt="Hello, world!", + ... llm_model_spec=LLMModelSpec(model="gpt-4") + ... ) + ... ) + >>> response, error = execute_llm_call(request, openai_client) + """ + provider_type = request.llm.llm_model_spec.provider + + logger.info( + f"[execute_llm_call] Processing LLM call for provider: {provider_type}, " + f"model: {request.llm.llm_model_spec.model}" + ) + + try: + # Create the appropriate provider using the factory + provider = ProviderFactory.create_provider( + provider_type=provider_type, + client=client, + ) + + # Execute the LLM call through the provider + response, error = provider.execute(request) + + if response: + logger.info( + f"[execute_llm_call] Successfully generated response: {response.response_id}" + ) + else: + logger.error(f"[execute_llm_call] Failed to generate response: {error}") + + return response, error + + except ValueError as e: + error_message = str(e) + logger.error(f"[execute_llm_call] Provider error: {error_message}") + return None, error_message + + except Exception as e: + error_message = f"Unexpected error in LLM service: {str(e)}" + logger.error(f"[execute_llm_call] {error_message}", exc_info=True) + return None, error_message diff --git a/backend/app/services/llm/openai_provider.py b/backend/app/services/llm/openai_provider.py new file mode 100644 index 00000000..4f9334e3 --- /dev/null +++ b/backend/app/services/llm/openai_provider.py @@ -0,0 +1,185 @@ +"""OpenAI provider implementation. + +This module implements the BaseProvider interface for OpenAI models, +including support for standard models, o-series models with reasoning, +and file search capabilities. +""" + +import logging +from typing import Any + +import openai +from openai import OpenAI +from openai.types.responses.response import Response + +from app.models.llm import LLMCallRequest, LLMCallResponse +from app.services.llm.base_provider import BaseProvider +from app.utils import handle_openai_error + +logger = logging.getLogger(__name__) + + +class OpenAIProvider(BaseProvider): + """OpenAI implementation of the LLM provider. + + Supports: + - Standard OpenAI models (GPT-4, GPT-3.5, etc.) + - O-series models with reasoning configuration + - Text configuration for verbosity control + - Vector store file search integration + """ + + def __init__(self, client: OpenAI): + """Initialize OpenAI provider with client. + + Args: + client: OpenAI client instance + """ + super().__init__(client) + + def supports_feature(self, feature: str) -> bool: + """Check if OpenAI provider supports a specific feature. + + Args: + feature: Feature name (reasoning, text_config, file_search, etc.) + + Returns: + True if the feature is supported + """ + supported_features = { + "reasoning", + "text_config", + "file_search", + "temperature", + "max_tokens", + "top_p", + } + return feature in supported_features + + def build_params(self, request: LLMCallRequest) -> dict[str, Any]: + """Build OpenAI API parameters from LLMCallRequest. + + Converts our generic LLM config into OpenAI-specific parameters, + including support for advanced features like reasoning and text configs. + + Args: + request: LLM call request with configuration + + Returns: + Dictionary of OpenAI API parameters + """ + # Extract model spec for easier access + model_spec = request.llm.llm_model_spec + + params: dict[str, Any] = { + "model": model_spec.model, + "input": [{"role": "user", "content": request.llm.prompt}], + } + + # Add optional parameters if present + if model_spec.temperature is not None: + params["temperature"] = model_spec.temperature + + if model_spec.max_tokens is not None: + params["max_tokens"] = model_spec.max_tokens + + if model_spec.top_p is not None: + params["top_p"] = model_spec.top_p + + # Add advanced OpenAI configs (for o-series models, etc.) + if model_spec.reasoning: + params["reasoning"] = {"effort": model_spec.reasoning.effort} + + if model_spec.text: + params["text"] = {"verbosity": model_spec.text.verbosity} + + # Add vector store file search if provided + if request.llm.vector_store_id: + params["tools"] = [ + { + "type": "file_search", + "vector_store_ids": [request.llm.vector_store_id], + "max_num_results": request.max_num_results, + } + ] + params["include"] = ["file_search_call.results"] + + return params + + def _extract_file_search_results(self, response: Response) -> list[dict]: + """Extract file search results from OpenAI response. + + Args: + response: OpenAI response object + + Returns: + List of dicts with 'score' and 'text' fields + """ + results = [] + for tool_call in response.output: + if tool_call.type == "file_search_call": + results.extend( + {"score": hit.score, "text": hit.text} for hit in tool_call.results + ) + return results + + def execute( + self, request: LLMCallRequest + ) -> tuple[LLMCallResponse | None, str | None]: + """Execute OpenAI API call. + + Args: + request: LLM call request with configuration + + Returns: + Tuple of (response, error_message) + - If successful: (LLMCallResponse, None) + - If failed: (None, error_message) + """ + response: Response | None = None + error_message: str | None = None + + try: + # Extract model spec for easier access + model_spec = request.llm.llm_model_spec + + # Build parameters and make OpenAI call + params = self.build_params(request) + logger.info( + f"[OpenAIProvider] Making OpenAI call with model: {model_spec.model}" + ) + response = self.client.responses.create(**params) + + # Extract file search results if vector store was used + file_search_results = None + if request.llm.vector_store_id: + file_search_results = self._extract_file_search_results(response) + + # Build response + llm_response = LLMCallResponse( + status="success", + response_id=response.id, + message=response.output_text, + model=response.model, + input_tokens=response.usage.input_tokens, + output_tokens=response.usage.output_tokens, + total_tokens=response.usage.total_tokens, + file_search_results=file_search_results, + ) + + logger.info( + f"[OpenAIProvider] Successfully generated response: {response.id}" + ) + return llm_response, None + + except openai.OpenAIError as e: + error_message = handle_openai_error(e) + logger.error( + f"[OpenAIProvider] OpenAI API error: {error_message}", exc_info=True + ) + return None, error_message + + except Exception as e: + error_message = f"Unexpected error: {str(e)}" + logger.error(f"[OpenAIProvider] {error_message}", exc_info=True) + return None, error_message diff --git a/backend/app/services/llm/provider_factory.py b/backend/app/services/llm/provider_factory.py new file mode 100644 index 00000000..ba5b62eb --- /dev/null +++ b/backend/app/services/llm/provider_factory.py @@ -0,0 +1,89 @@ +"""Provider factory for creating LLM provider instances. + +This module provides a factory pattern for instantiating the appropriate +LLM provider based on the provider type specified in the request. +""" + +import logging +from typing import Any + +from app.models.llm import ProviderType +from app.services.llm.base_provider import BaseProvider +from app.services.llm.openai_provider import OpenAIProvider + +logger = logging.getLogger(__name__) + + +class ProviderFactory: + """Factory for creating provider instances. + + This class implements the factory pattern to instantiate the correct + provider based on the provider type. It maintains a registry of + available providers and their corresponding classes. + """ + + # Registry of provider types to their implementation classes + _PROVIDERS: dict[str, type[BaseProvider]] = { + "openai": OpenAIProvider, + # Future providers can be added here: + # "anthropic": AnthropicProvider, + # "google": GoogleProvider, + # "azure": AzureOpenAIProvider, + } + + @classmethod + def create_provider( + cls, provider_type: ProviderType, client: Any + ) -> BaseProvider: + """Create a provider instance based on the provider type. + + Args: + provider_type: Type of provider (openai, anthropic, etc.) + client: Provider-specific client instance + + Returns: + Instance of the appropriate provider + + Raises: + ValueError: If the provider type is not supported + """ + provider_class = cls._PROVIDERS.get(provider_type) + + if provider_class is None: + supported = ", ".join(cls._PROVIDERS.keys()) + raise ValueError( + f"Unsupported provider type: {provider_type}. " + f"Supported providers: {supported}" + ) + + logger.info(f"[ProviderFactory] Creating {provider_type} provider instance") + return provider_class(client=client) + + @classmethod + def get_supported_providers(cls) -> list[str]: + """Get list of supported provider types. + + Returns: + List of supported provider type strings + """ + return list(cls._PROVIDERS.keys()) + + @classmethod + def register_provider(cls, provider_type: str, provider_class: type[BaseProvider]): + """Register a new provider type. + + This allows for runtime registration of new providers, useful for + plugins or extensions. + + Args: + provider_type: Type identifier for the provider + provider_class: Provider class that implements BaseProvider + + Raises: + TypeError: If provider_class doesn't inherit from BaseProvider + """ + if not issubclass(provider_class, BaseProvider): + raise TypeError(f"{provider_class.__name__} must inherit from BaseProvider") + + logger.info(f"[ProviderFactory] Registering provider: {provider_type}") + cls._PROVIDERS[provider_type] = provider_class From e08fdcd5e2d7de795e883a60249135d4f169b748 Mon Sep 17 00:00:00 2001 From: Aviraj <100823015+avirajsingh7@users.noreply.github.com> Date: Tue, 21 Oct 2025 08:39:11 +0530 Subject: [PATCH 02/15] resolve using registry --- .../219033c644de_add_llm_im_jobs_table.py | 4 +- backend/app/api/routes/llm.py | 4 +- backend/app/models/llm/__init__.py | 31 +- backend/app/models/llm/call.py | 3 +- backend/app/models/llm/config.py | 74 +++ backend/app/models/llm/request.py | 23 + backend/app/models/llm/response.py | 32 ++ backend/app/models/llm/specs/__init__.py | 20 + backend/app/models/llm/specs/base.py | 192 +++++++ backend/app/models/llm/specs/registry.py | 89 +++ backend/app/services/llm/README.md | 353 ++++++++++++ backend/app/services/llm/__init__.py | 128 ++++- backend/app/services/llm/constants.py | 54 ++ backend/app/services/llm/exceptions.py | 130 +++++ backend/app/services/llm/jobs.py | 6 +- .../llm/{llm_service.py => orchestrator.py} | 7 +- .../app/services/llm/providers/__init__.py | 15 + .../{base_provider.py => providers/base.py} | 87 +-- .../factory.py} | 23 +- .../openai.py} | 95 +--- backend/app/services/llm/specs/__init__.py | 26 + backend/app/services/llm/specs/openai.py | 151 ++++++ .../app/services/llm/transformers/__init__.py | 16 + backend/app/services/llm/transformers/base.py | 84 +++ .../app/services/llm/transformers/factory.py | 89 +++ .../app/services/llm/transformers/openai.py | 77 +++ backend/docs/LLM_ARCHITECTURE.md | 512 ++++++++++++++++++ backend/test_new_architecture.py | 209 +++++++ backend/test_specs_only.py | 156 ++++++ 29 files changed, 2540 insertions(+), 150 deletions(-) create mode 100644 backend/app/models/llm/config.py create mode 100644 backend/app/models/llm/request.py create mode 100644 backend/app/models/llm/response.py create mode 100644 backend/app/models/llm/specs/__init__.py create mode 100644 backend/app/models/llm/specs/base.py create mode 100644 backend/app/models/llm/specs/registry.py create mode 100644 backend/app/services/llm/README.md create mode 100644 backend/app/services/llm/constants.py create mode 100644 backend/app/services/llm/exceptions.py rename backend/app/services/llm/{llm_service.py => orchestrator.py} (91%) create mode 100644 backend/app/services/llm/providers/__init__.py rename backend/app/services/llm/{base_provider.py => providers/base.py} (55%) rename backend/app/services/llm/{provider_factory.py => providers/factory.py} (78%) rename backend/app/services/llm/{openai_provider.py => providers/openai.py} (58%) create mode 100644 backend/app/services/llm/specs/__init__.py create mode 100644 backend/app/services/llm/specs/openai.py create mode 100644 backend/app/services/llm/transformers/__init__.py create mode 100644 backend/app/services/llm/transformers/base.py create mode 100644 backend/app/services/llm/transformers/factory.py create mode 100644 backend/app/services/llm/transformers/openai.py create mode 100644 backend/docs/LLM_ARCHITECTURE.md create mode 100644 backend/test_new_architecture.py create mode 100644 backend/test_specs_only.py diff --git a/backend/app/alembic/versions/219033c644de_add_llm_im_jobs_table.py b/backend/app/alembic/versions/219033c644de_add_llm_im_jobs_table.py index 404d00e4..056797f7 100644 --- a/backend/app/alembic/versions/219033c644de_add_llm_im_jobs_table.py +++ b/backend/app/alembic/versions/219033c644de_add_llm_im_jobs_table.py @@ -10,8 +10,8 @@ # revision identifiers, used by Alembic. -revision = '219033c644de' -down_revision = 'e7c68e43ce6f' +revision = "219033c644de" +down_revision = "e7c68e43ce6f" branch_labels = None depends_on = None diff --git a/backend/app/api/routes/llm.py b/backend/app/api/routes/llm.py index 513d6204..c3675419 100644 --- a/backend/app/api/routes/llm.py +++ b/backend/app/api/routes/llm.py @@ -14,9 +14,7 @@ @router.post("/llm/call") async def llm_call( - request: LLMCallRequest, - _session: SessionDep, - _current_user: AuthContext + request: LLMCallRequest, _session: SessionDep, _current_user: AuthContext ): """ Endpoint to initiate an LLM call as a background job. diff --git a/backend/app/models/llm/__init__.py b/backend/app/models/llm/__init__.py index f05d125f..724d0790 100644 --- a/backend/app/models/llm/__init__.py +++ b/backend/app/models/llm/__init__.py @@ -1,19 +1,44 @@ -from .call import ( - LLMCallRequest, - LLMCallResponse, +"""LLM models module. + +This module provides all data models for LLM functionality including +requests, responses, configurations, and model specifications. +""" + +from app.models.llm.call import LLMCall, LLMCallCreate +from app.models.llm.config import ( LLMConfig, LLMModelSpec, ProviderType, ReasoningConfig, TextConfig, ) +from app.models.llm.request import LLMCallRequest +from app.models.llm.response import LLMCallResponse +from app.models.llm.specs import ( + ModelCapabilities, + ModelSpec, + ModelSpecRegistry, + ParameterSpec, + model_spec_registry, +) __all__ = [ + # Database models + "LLMCall", + "LLMCallCreate", + # Request/Response models "LLMCallRequest", "LLMCallResponse", + # Configuration models "LLMConfig", "LLMModelSpec", "ProviderType", "ReasoningConfig", "TextConfig", + # Specification models + "ModelSpec", + "ModelCapabilities", + "ParameterSpec", + "ModelSpecRegistry", + "model_spec_registry", ] diff --git a/backend/app/models/llm/call.py b/backend/app/models/llm/call.py index 3fff487e..61cfa334 100644 --- a/backend/app/models/llm/call.py +++ b/backend/app/models/llm/call.py @@ -26,7 +26,7 @@ class LLMModelSpec(SQLModel): """ model: str - provider: ProviderType = "openai" + provider: ProviderType = "openai" temperature: Optional[float] = None reasoning: Optional[ReasoningConfig] = None text: Optional[TextConfig] = None @@ -40,6 +40,7 @@ class LLMConfig(SQLModel): This wraps the model spec and can be extended with additional provider-agnostic configuration in the future. """ + prompt: str vector_store_id: Optional[str] = None llm_model_spec: LLMModelSpec diff --git a/backend/app/models/llm/config.py b/backend/app/models/llm/config.py new file mode 100644 index 00000000..3cbce300 --- /dev/null +++ b/backend/app/models/llm/config.py @@ -0,0 +1,74 @@ +"""LLM configuration models. + +This module contains all configuration-related models for LLM requests, +including model specifications and advanced configuration options. +""" + +from typing import Literal, Optional + +from sqlmodel import SQLModel + +# Type definitions +ProviderType = Literal["openai", "anthropic", "google", "azure"] + + +class ReasoningConfig(SQLModel): + """Configuration for reasoning parameters (e.g., o-series models). + + Attributes: + effort: Reasoning effort level - "low", "medium", or "high" + """ + + effort: str # "low", "medium", "high" + + +class TextConfig(SQLModel): + """Configuration for text generation parameters. + + Attributes: + verbosity: Text verbosity level - "low", "medium", or "high" + """ + + verbosity: str # "low", "medium", "high" + + +class LLMModelSpec(SQLModel): + """Specification for the LLM model and its parameters. + + This contains the actual model configuration that will be sent to the provider. + Supports both standard models and advanced configurations. + + Attributes: + model: Model identifier (e.g., "gpt-4", "claude-3-opus") + provider: Provider type (openai, anthropic, google, azure) + temperature: Sampling temperature (0.0-2.0) + reasoning: Optional reasoning configuration for o-series models + text: Optional text verbosity configuration + max_tokens: Maximum number of tokens to generate + top_p: Nucleus sampling parameter (0.0-1.0) + """ + + model: str + provider: ProviderType = "openai" + temperature: Optional[float] = None + reasoning: Optional[ReasoningConfig] = None + text: Optional[TextConfig] = None + max_tokens: Optional[int] = None + top_p: Optional[float] = None + + +class LLMConfig(SQLModel): + """LLM configuration containing model specification and prompt. + + This wraps the model spec and can be extended with additional + provider-agnostic configuration in the future. + + Attributes: + prompt: The user's input prompt + vector_store_id: Optional vector store ID for RAG functionality + llm_model_spec: Model specification and parameters + """ + + prompt: str + vector_store_id: Optional[str] = None + llm_model_spec: LLMModelSpec diff --git a/backend/app/models/llm/request.py b/backend/app/models/llm/request.py new file mode 100644 index 00000000..f8ef2da9 --- /dev/null +++ b/backend/app/models/llm/request.py @@ -0,0 +1,23 @@ +"""LLM request models. + +This module contains request models for LLM API calls. +""" + +from sqlmodel import SQLModel + +from app.models.llm.config import LLMConfig + + +class LLMCallRequest(SQLModel): + """Request model for /v1/llm/call endpoint. + + This model decouples LLM calls from the assistants table, + allowing dynamic configuration per request. + + Attributes: + llm: LLM configuration containing model spec and prompt + max_num_results: Number of results to return from vector store file search + """ + + llm: LLMConfig + max_num_results: int = 20 # For vector store file search diff --git a/backend/app/models/llm/response.py b/backend/app/models/llm/response.py new file mode 100644 index 00000000..fc21413a --- /dev/null +++ b/backend/app/models/llm/response.py @@ -0,0 +1,32 @@ +"""LLM response models. + +This module contains response models for LLM API calls. +""" + +from typing import Optional + +from sqlmodel import SQLModel + + +class LLMCallResponse(SQLModel): + """Response model for /v1/llm/call endpoint. + + Attributes: + status: Response status (success, error, etc.) + response_id: Unique identifier for this response + message: The generated text response + model: Model identifier that was used + input_tokens: Number of input tokens consumed + output_tokens: Number of output tokens generated + total_tokens: Total tokens consumed (input + output) + file_search_results: Optional list of file search results from RAG + """ + + status: str + response_id: str + message: str + model: str + input_tokens: int + output_tokens: int + total_tokens: int + file_search_results: Optional[list[dict]] = None diff --git a/backend/app/models/llm/specs/__init__.py b/backend/app/models/llm/specs/__init__.py new file mode 100644 index 00000000..f048fa69 --- /dev/null +++ b/backend/app/models/llm/specs/__init__.py @@ -0,0 +1,20 @@ +"""Model specifications module.""" + +from app.models.llm.specs.base import ( + EffortLevel, + ModelCapabilities, + ModelSpec, + ParameterSpec, + VerbosityLevel, +) +from app.models.llm.specs.registry import ModelSpecRegistry, model_spec_registry + +__all__ = [ + "ModelSpec", + "ModelCapabilities", + "ParameterSpec", + "ModelSpecRegistry", + "model_spec_registry", + "EffortLevel", + "VerbosityLevel", +] diff --git a/backend/app/models/llm/specs/base.py b/backend/app/models/llm/specs/base.py new file mode 100644 index 00000000..e1b234d1 --- /dev/null +++ b/backend/app/models/llm/specs/base.py @@ -0,0 +1,192 @@ +"""Base model specification classes. + +This module defines the schema for LLM model specifications that declare: +- What parameters each model supports +- Valid ranges and types for parameters +- Provider-specific capabilities +- Validation rules for configurations +""" + +from typing import Any, Literal, Optional + +from pydantic import Field +from sqlmodel import SQLModel + + +# Parameter type definitions +EffortLevel = Literal["low", "medium", "high"] +VerbosityLevel = Literal["low", "medium", "high"] + + +class ParameterSpec(SQLModel): + """Specification for a single parameter. + + Attributes: + name: Parameter name + type: Parameter type (str, int, float, bool) + required: Whether parameter is required + default: Default value if not provided + min_value: Minimum value for numeric parameters + max_value: Maximum value for numeric parameters + allowed_values: List of allowed values for enum-like parameters + description: Human-readable parameter description + """ + + name: str = Field(description="Parameter name") + type: str = Field(description="Parameter type (str, int, float, bool)") + required: bool = Field(default=False, description="Whether parameter is required") + default: Optional[Any] = Field(default=None, description="Default value") + min_value: Optional[float] = Field( + default=None, description="Minimum value for numeric params" + ) + max_value: Optional[float] = Field( + default=None, description="Maximum value for numeric params" + ) + allowed_values: Optional[list[Any]] = Field( + default=None, description="List of allowed values" + ) + description: Optional[str] = Field(default=None, description="Parameter description") + + +class ModelCapabilities(SQLModel): + """Capabilities supported by a model. + + Attributes: + supports_reasoning: Whether the model supports reasoning configuration + supports_text_config: Whether the model supports text verbosity config + supports_file_search: Whether the model supports vector store file search + supports_function_calling: Whether the model supports function calling + supports_streaming: Whether the model supports streaming responses + supports_vision: Whether the model supports image inputs + """ + + supports_reasoning: bool = Field( + default=False, description="Supports reasoning configuration" + ) + supports_text_config: bool = Field( + default=False, description="Supports text verbosity config" + ) + supports_file_search: bool = Field( + default=False, description="Supports vector store file search" + ) + supports_function_calling: bool = Field( + default=False, description="Supports function calling" + ) + supports_streaming: bool = Field( + default=False, description="Supports streaming responses" + ) + supports_vision: bool = Field(default=False, description="Supports image inputs") + + +class ModelSpec(SQLModel): + """Complete specification for an LLM model. + + This is the single source of truth for what a model supports. + It defines capabilities, parameter constraints, and validation rules. + + Attributes: + model_name: Model identifier (e.g., 'gpt-4', 'claude-3-opus') + provider: Provider name (openai, anthropic, google, azure) + capabilities: What features this model supports + parameters: List of supported parameters with their constraints + """ + + model_config = {"protected_namespaces": ()} # Allow model_ prefix + + model_name: str = Field( + description="Model identifier (e.g., 'gpt-4', 'claude-3-opus')" + ) + provider: str = Field(description="Provider name (openai, anthropic, google, azure)") + capabilities: ModelCapabilities = Field( + description="What features this model supports" + ) + parameters: list[ParameterSpec] = Field( + default_factory=list, description="Supported parameters" + ) + + def validate_config(self, config: dict[str, Any]) -> tuple[bool, Optional[str]]: + """Validate a configuration against this model spec. + + Args: + config: Configuration dictionary to validate + + Returns: + Tuple of (is_valid, error_message) + - If valid: (True, None) + - If invalid: (False, error_message) + """ + # Build parameter lookup + param_specs = {p.name: p for p in self.parameters} + + # Check for unknown parameters + for key in config.keys(): + if key not in param_specs and key not in [ + "model", + "provider", + "prompt", + "vector_store_id", + ]: + return False, f"Unknown parameter '{key}' for model {self.model_name}" + + # Validate each parameter + for param_spec in self.parameters: + value = config.get(param_spec.name) + + # Check required parameters + if param_spec.required and value is None: + return False, f"Required parameter '{param_spec.name}' is missing" + + # Skip validation if value is None and not required + if value is None: + continue + + # Type validation + if param_spec.type == "int" and not isinstance(value, int): + return False, f"Parameter '{param_spec.name}' must be an integer" + elif param_spec.type == "float" and not isinstance(value, (int, float)): + return False, f"Parameter '{param_spec.name}' must be a number" + elif param_spec.type == "bool" and not isinstance(value, bool): + return False, f"Parameter '{param_spec.name}' must be a boolean" + elif param_spec.type == "str" and not isinstance(value, str): + return False, f"Parameter '{param_spec.name}' must be a string" + + # Range validation for numeric types + if param_spec.type in ["int", "float"]: + if param_spec.min_value is not None and value < param_spec.min_value: + return ( + False, + f"Parameter '{param_spec.name}' must be >= {param_spec.min_value}", + ) + if param_spec.max_value is not None and value > param_spec.max_value: + return ( + False, + f"Parameter '{param_spec.name}' must be <= {param_spec.max_value}", + ) + + # Allowed values validation + if param_spec.allowed_values is not None and value not in param_spec.allowed_values: + return ( + False, + f"Parameter '{param_spec.name}' must be one of {param_spec.allowed_values}", + ) + + return True, None + + def supports_feature(self, feature: str) -> bool: + """Check if this model supports a specific feature. + + Args: + feature: Feature name to check + + Returns: + True if feature is supported + """ + feature_map = { + "reasoning": self.capabilities.supports_reasoning, + "text_config": self.capabilities.supports_text_config, + "file_search": self.capabilities.supports_file_search, + "function_calling": self.capabilities.supports_function_calling, + "streaming": self.capabilities.supports_streaming, + "vision": self.capabilities.supports_vision, + } + return feature_map.get(feature, False) diff --git a/backend/app/models/llm/specs/registry.py b/backend/app/models/llm/specs/registry.py new file mode 100644 index 00000000..0e895c3e --- /dev/null +++ b/backend/app/models/llm/specs/registry.py @@ -0,0 +1,89 @@ +"""Model specification registry. + +This module provides a centralized registry for managing model specifications. +""" + +from typing import Any, Optional + +from app.models.llm.specs.base import ModelSpec + + +class ModelSpecRegistry: + """Registry for managing model specifications. + + This is a singleton that holds all known model specs and provides + lookup and validation capabilities. + """ + + _instance: Optional["ModelSpecRegistry"] = None + _specs: dict[str, ModelSpec] = {} + + def __new__(cls) -> "ModelSpecRegistry": + """Ensure singleton pattern.""" + if cls._instance is None: + cls._instance = super().__new__(cls) + return cls._instance + + def register(self, spec: ModelSpec) -> None: + """Register a model specification. + + Args: + spec: Model specification to register + """ + key = f"{spec.provider}:{spec.model_name}" + self._specs[key] = spec + + def get_spec(self, provider: str, model_name: str) -> Optional[ModelSpec]: + """Get a model specification. + + Args: + provider: Provider name + model_name: Model name + + Returns: + ModelSpec if found, None otherwise + """ + key = f"{provider}:{model_name}" + return self._specs.get(key) + + def validate_config( + self, provider: str, model_name: str, config: dict[str, Any] + ) -> tuple[bool, Optional[str]]: + """Validate a configuration against the model spec. + + Args: + provider: Provider name + model_name: Model name + config: Configuration to validate + + Returns: + Tuple of (is_valid, error_message) + """ + spec = self.get_spec(provider, model_name) + if spec is None: + # If no spec found, we can't validate - allow it through + # This maintains backward compatibility with models we haven't spec'd yet + return True, None + + return spec.validate_config(config) + + def list_models(self, provider: Optional[str] = None) -> list[ModelSpec]: + """List all registered model specs. + + Args: + provider: Optional provider filter + + Returns: + List of model specs + """ + if provider: + return [spec for spec in self._specs.values() if spec.provider == provider] + return list(self._specs.values()) + + def clear(self) -> None: + """Clear all registered specs (mainly for testing).""" + self._specs.clear() + + +# Global registry instance +model_spec_registry = ModelSpecRegistry() diff --git a/backend/app/services/llm/README.md b/backend/app/services/llm/README.md new file mode 100644 index 00000000..e2608bf4 --- /dev/null +++ b/backend/app/services/llm/README.md @@ -0,0 +1,353 @@ +# LLM Service Module + +A provider-agnostic interface for executing LLM calls. Currently supports OpenAI with an extensible architecture for future providers. + +## Architecture + +The LLM service follows a layered architecture with clear separation of concerns: + +``` +app/ +├── models/llm/ # Data models +│ ├── call.py # Database models +│ ├── config.py # Configuration models +│ ├── request.py # Request models +│ ├── response.py # Response models +│ └── specs/ # Model specifications +│ ├── base.py # Base spec classes +│ └── registry.py # Spec registry +│ +└── services/llm/ # Service layer + ├── __init__.py # Public API + ├── constants.py # Constants and enums + ├── exceptions.py # Custom exceptions + ├── orchestrator.py # Main entry point + ├── jobs.py # Celery job management + │ + ├── providers/ # Provider implementations + │ ├── base.py # Abstract base provider + │ ├── factory.py # Provider factory (extensible) + │ └── openai.py # OpenAI implementation + │ + ├── transformers/ # Request transformers + │ ├── base.py # Abstract transformer + │ ├── factory.py # Transformer factory (extensible) + │ └── openai.py # OpenAI transformer + │ + └── specs/ # Model specifications + ├── __init__.py # Spec initialization + └── openai.py # OpenAI model specs +``` + +## Key Components + +### 1. Orchestration Layer + +**`orchestrator.py`** - Main entry point for LLM calls +- Routes requests to appropriate providers +- Handles error handling and logging +- Provider-agnostic interface + +**`jobs.py`** - Celery job management +- Asynchronous job execution +- Job status tracking +- Integration with job queue + +### 2. Provider Layer + +**`BaseProvider`** - Abstract base class for all providers +- Defines standard interface +- Handles transformer integration +- Manages parameter building + +**`OpenAIProvider`** - OpenAI implementation +- GPT-4, GPT-3.5 models +- O-series reasoning models +- Vector store file search +- Full feature support + +**`ProviderFactory`** - Creates provider instances +- Supports provider registration for extensibility +- Runtime provider addition +- Currently registered: OpenAI + +### 3. Transformation Layer + +**`ConfigTransformer`** - Base transformer class +- Converts unified API to provider format +- Validates against model specs +- Extensible for new providers + +**`OpenAITransformer`** - OpenAI transformation +- Handles OpenAI Responses API format +- Supports reasoning configuration +- Vector store integration + +**`TransformerFactory`** - Creates transformer instances +- Loads model specs automatically +- Supports custom transformers + +### 4. Specification Layer + +**`ModelSpec`** - Model specification class +- Defines capabilities (reasoning, vision, etc.) +- Parameter constraints and validation +- Feature detection + +**`ModelSpecRegistry`** - Singleton registry +- Manages all model specs +- Provides lookup and validation +- Centralized spec storage + +### 5. Error Handling + +Custom exception hierarchy: +- `LLMServiceError` - Base exception +- `ProviderError` - Provider-specific errors +- `UnsupportedProviderError` - Unsupported provider +- `ValidationError` - Configuration validation +- `TransformationError` - Transformation failures +- `APICallError` - API call failures + +## Usage + +### Basic LLM Call + +```python +from app.services.llm import execute_llm_call +from app.models.llm import LLMCallRequest, LLMConfig, LLMModelSpec + +# Create request +request = LLMCallRequest( + llm=LLMConfig( + prompt="Explain quantum computing", + llm_model_spec=LLMModelSpec( + model="gpt-4", + provider="openai", + temperature=0.7, + max_tokens=500 + ) + ) +) + +# Execute call +response, error = execute_llm_call(request, openai_client) + +if response: + print(f"Response: {response.message}") + print(f"Tokens: {response.total_tokens}") +else: + print(f"Error: {error}") +``` + +### With Vector Store (RAG) + +```python +request = LLMCallRequest( + llm=LLMConfig( + prompt="What does the documentation say about authentication?", + vector_store_id="vs_abc123", + llm_model_spec=LLMModelSpec( + model="gpt-4", + provider="openai" + ) + ), + max_num_results=10 +) + +response, error = execute_llm_call(request, openai_client) + +# Access file search results +if response and response.file_search_results: + for result in response.file_search_results: + print(f"Score: {result['score']}, Text: {result['text']}") +``` + +### O-Series Models (Reasoning) + +```python +from app.models.llm import ReasoningConfig, TextConfig + +request = LLMCallRequest( + llm=LLMConfig( + prompt="Solve this complex problem...", + llm_model_spec=LLMModelSpec( + model="o1", + provider="openai", + reasoning=ReasoningConfig(effort="high"), + text=TextConfig(verbosity="medium") + ) + ) +) + +response, error = execute_llm_call(request, openai_client) +``` + +### Asynchronous Job + +```python +from app.services.llm.jobs import start_job + +# Schedule background job +job_id = start_job( + db=session, + request=request, + project_id=123, + organization_id=456 +) + +# Job runs asynchronously via Celery +print(f"Job scheduled: {job_id}") +``` + +## Adding New Providers + +### 1. Create Provider Implementation + +```python +# app/services/llm/providers/anthropic.py +from app.services.llm.providers.base import BaseProvider +from app.models.llm import LLMCallRequest, LLMCallResponse + +class AnthropicProvider(BaseProvider): + def execute(self, request: LLMCallRequest) -> tuple[LLMCallResponse | None, str | None]: + params = self.build_params(request) + response = self.client.messages.create(**params) + # Process response... + return llm_response, None +``` + +### 2. Create Transformer + +```python +# app/services/llm/transformers/anthropic.py +from app.services.llm.transformers.base import ConfigTransformer + +class AnthropicTransformer(ConfigTransformer): + def transform(self, request: LLMCallRequest) -> dict[str, Any]: + return { + "model": request.llm.llm_model_spec.model, + "messages": [{"role": "user", "content": request.llm.prompt}], + "max_tokens": request.llm.llm_model_spec.max_tokens or 1024, + # ... other Anthropic-specific params + } +``` + +### 3. Create Model Specs + +```python +# app/services/llm/specs/anthropic.py +from app.models.llm.specs import ModelSpec, ModelCapabilities, ParameterSpec + +def create_anthropic_specs() -> list[ModelSpec]: + return [ + ModelSpec( + model_name="claude-3-opus", + provider="anthropic", + capabilities=ModelCapabilities( + supports_streaming=True, + supports_vision=True, + # ... + ), + parameters=[ + ParameterSpec(name="temperature", type="float", min_value=0.0, max_value=1.0), + # ... + ] + ) + ] +``` + +### 4. Register Components + +```python +# Update factories +from app.services.llm.providers.factory import ProviderFactory +from app.services.llm.transformers.factory import TransformerFactory + +ProviderFactory.register_provider("anthropic", AnthropicProvider) +TransformerFactory.register_transformer("anthropic", AnthropicTransformer) +``` + +## Configuration + +### Constants + +Edit `constants.py` to update default values: + +```python +DEFAULT_TEMPERATURE = 1.0 +DEFAULT_TOP_P = 1.0 +DEFAULT_MAX_RESULTS = 20 +``` + +### Supported Providers + +Currently supported: `openai` + +The architecture is designed to be extensible. Future providers can be added following the pattern in "Adding New Providers" section. + +## Testing + +```python +# Test with mock client +from unittest.mock import Mock + +mock_client = Mock() +mock_client.responses.create.return_value = Mock( + id="resp_123", + output_text="Test response", + model="gpt-4", + usage=Mock(input_tokens=10, output_tokens=20, total_tokens=30) +) + +response, error = execute_llm_call(request, mock_client) +assert response.message == "Test response" +``` + +## Best Practices + +1. **Always use model specs** - Enable validation for production code +2. **Handle errors gracefully** - Check for both response and error +3. **Use type hints** - Maintain type safety throughout +4. **Log appropriately** - Use structured logging for debugging +5. **Follow the architecture** - Don't bypass the abstraction layers +6. **Add tests** - Test new providers and transformers thoroughly + +## Future Enhancements + +- [ ] Streaming response support +- [ ] Function calling for all providers +- [ ] Batch request processing +- [ ] Response caching +- [ ] Rate limiting +- [ ] Cost tracking +- [ ] Provider failover +- [ ] A/B testing between providers + +## Troubleshooting + +### Common Issues + +**Import errors after refactoring** +- Ensure old files are removed +- Check `__init__.py` exports +- Clear Python cache: `find . -type d -name __pycache__ -exec rm -r {} +` + +**Validation errors** +- Check model spec definitions +- Verify parameter constraints +- Use `model_spec.validate_config()` for debugging + +**Provider not found** +- Ensure provider is registered in factory +- Check provider name spelling +- Verify provider is in `SUPPORTED_PROVIDERS` + +## Contributing + +When adding new features: +1. Update relevant specs +2. Add comprehensive docstrings +3. Update this README +4. Add tests +5. Follow existing patterns diff --git a/backend/app/services/llm/__init__.py b/backend/app/services/llm/__init__.py index c1b0b238..c7571bd3 100644 --- a/backend/app/services/llm/__init__.py +++ b/backend/app/services/llm/__init__.py @@ -1,23 +1,129 @@ """LLM services module. -This module provides a provider-agnostic interface for executing LLM calls -through various providers (OpenAI, Anthropic, Google, etc.). +This module provides a provider-agnostic interface for executing LLM calls. +Currently supports OpenAI with an extensible architecture for future providers. -Key components: +Architecture: +----------- +The LLM service follows a layered architecture with clear separation of concerns: + +1. **Models Layer** (`app.models.llm`) + - Request/Response models + - Configuration models + - Model specifications + +2. **Orchestration Layer** + - `orchestrator.py`: Main entry point for LLM calls + - `jobs.py`: Celery job management + +3. **Provider Layer** (`providers/`) + - `base.py`: Abstract base provider + - `openai.py`: OpenAI implementation + - `factory.py`: Provider factory (extensible) + +4. **Transformation Layer** (`transformers/`) + - `base.py`: Abstract transformer + - `openai.py`: OpenAI transformer + - `factory.py`: Transformer factory (extensible) + +5. **Specification Layer** (`specs/`) + - `openai.py`: OpenAI model specs + - Model capability definitions + - Parameter validation rules + +Key Components: +-------------- +- execute_llm_call: Main entry point for LLM API calls - BaseProvider: Abstract base class for all providers -- OpenAIProvider: OpenAI implementation +- ConfigTransformer: Base class for request transformation +- ModelSpec: Model specification with validation - ProviderFactory: Factory for creating provider instances -- execute_llm_call: Main entry point for LLM calls +- TransformerFactory: Factory for creating transformers + +The architecture uses specification-driven configuration with: +1. Model specs defining capabilities and parameter constraints +2. Transformers converting unified API contracts to provider-specific formats +3. Automatic validation against model specifications +4. Custom exceptions for better error handling + +Usage Example: +------------- +```python +from app.services.llm import execute_llm_call +from app.models.llm import LLMCallRequest, LLMConfig, LLMModelSpec + +request = LLMCallRequest( + llm=LLMConfig( + prompt="Hello, world!", + llm_model_spec=LLMModelSpec( + model="gpt-4", + provider="openai", + temperature=0.7 + ) + ) +) + +response, error = execute_llm_call(request, openai_client) +``` """ -from app.services.llm.base_provider import BaseProvider -from app.services.llm.llm_service import execute_llm_call -from app.services.llm.openai_provider import OpenAIProvider -from app.services.llm.provider_factory import ProviderFactory +# Main orchestration +from app.services.llm.orchestrator import execute_llm_call + +# Providers +from app.services.llm.providers import ( + BaseProvider, + ProviderFactory, + OpenAIProvider, +) + +# Transformers +from app.services.llm.transformers import ( + ConfigTransformer, + TransformerFactory, + OpenAITransformer, +) + +# Constants and exceptions +from app.services.llm.constants import ( + ProviderType, + EffortLevel, + VerbosityLevel, + SUPPORTED_PROVIDERS, +) +from app.services.llm.exceptions import ( + LLMServiceError, + ProviderError, + UnsupportedProviderError, + ValidationError, + TransformationError, + APICallError, +) + +# Initialize model specs on module import +import app.services.llm.specs # noqa: F401 __all__ = [ + # Main entry point + "execute_llm_call", + # Providers "BaseProvider", - "OpenAIProvider", "ProviderFactory", - "execute_llm_call", + "OpenAIProvider", + # Transformers + "ConfigTransformer", + "TransformerFactory", + "OpenAITransformer", + # Constants + "ProviderType", + "EffortLevel", + "VerbosityLevel", + "SUPPORTED_PROVIDERS", + # Exceptions + "LLMServiceError", + "ProviderError", + "UnsupportedProviderError", + "ValidationError", + "TransformationError", + "APICallError", ] diff --git a/backend/app/services/llm/constants.py b/backend/app/services/llm/constants.py new file mode 100644 index 00000000..2aabd861 --- /dev/null +++ b/backend/app/services/llm/constants.py @@ -0,0 +1,54 @@ +"""Constants and type definitions for the LLM module. + +This module centralizes all constants, enums, and type definitions used +across the LLM service layer. +""" + +from typing import Literal + +# Provider type definitions +ProviderType = Literal["openai"] + +# Reasoning effort levels for o-series models +EffortLevel = Literal["low", "medium", "high"] + +# Text verbosity levels +VerbosityLevel = Literal["low", "medium", "high"] + +# Job status +JobStatusType = Literal["pending", "processing", "success", "failed"] + +# Default parameter values +DEFAULT_TEMPERATURE = 1.0 +DEFAULT_TOP_P = 1.0 +DEFAULT_MAX_RESULTS = 20 + +# Parameter constraints +MIN_TEMPERATURE = 0.0 +MAX_TEMPERATURE = 2.0 +MIN_TOP_P = 0.0 +MAX_TOP_P = 1.0 +MIN_MAX_TOKENS = 1 +MAX_MAX_TOKENS = 128000 + +# Supported providers +SUPPORTED_PROVIDERS = ["openai"] + +# Error messages +ERROR_UNSUPPORTED_PROVIDER = "Provider '{provider}' is not supported. Supported: {supported}" +ERROR_VALIDATION_FAILED = "Configuration validation failed: {details}" +ERROR_TRANSFORMATION_FAILED = "Failed to transform request: {details}" +ERROR_API_CALL_FAILED = "API call failed: {details}" +ERROR_UNKNOWN_PARAMETER = "Unknown parameter '{param}' for model {model}" +ERROR_REQUIRED_PARAMETER = "Required parameter '{param}' is missing" +ERROR_INVALID_TYPE = "Parameter '{param}' must be {expected_type}" +ERROR_OUT_OF_RANGE = "Parameter '{param}' must be between {min_val} and {max_val}" +ERROR_INVALID_VALUE = "Parameter '{param}' must be one of {allowed_values}" + +# Feature names for capability checks +FEATURE_REASONING = "reasoning" +FEATURE_TEXT_CONFIG = "text_config" +FEATURE_FILE_SEARCH = "file_search" +FEATURE_FUNCTION_CALLING = "function_calling" +FEATURE_STREAMING = "streaming" +FEATURE_VISION = "vision" diff --git a/backend/app/services/llm/exceptions.py b/backend/app/services/llm/exceptions.py new file mode 100644 index 00000000..045176af --- /dev/null +++ b/backend/app/services/llm/exceptions.py @@ -0,0 +1,130 @@ +"""Custom exceptions for the LLM module. + +This module defines all custom exceptions used throughout the LLM service layer, +providing better error handling and more descriptive error messages. +""" + + +class LLMServiceError(Exception): + """Base exception for all LLM service errors.""" + + pass + + +class ProviderError(LLMServiceError): + """Raised when there's an error with the provider configuration or execution.""" + + def __init__(self, provider: str, message: str): + self.provider = provider + self.message = message + super().__init__(f"Provider '{provider}' error: {message}") + + +class UnsupportedProviderError(ProviderError): + """Raised when an unsupported provider is requested.""" + + def __init__(self, provider: str, supported_providers: list[str]): + self.supported_providers = supported_providers + message = f"Unsupported provider. Supported: {', '.join(supported_providers)}" + super().__init__(provider, message) + + +class ValidationError(LLMServiceError): + """Raised when configuration validation fails.""" + + def __init__(self, message: str, parameter: str | None = None): + self.parameter = parameter + self.message = message + error_msg = f"Validation error" + if parameter: + error_msg += f" for parameter '{parameter}'" + error_msg += f": {message}" + super().__init__(error_msg) + + +class TransformationError(LLMServiceError): + """Raised when request transformation fails.""" + + def __init__(self, provider: str, message: str): + self.provider = provider + self.message = message + super().__init__(f"Transformation error for {provider}: {message}") + + +class ModelSpecNotFoundError(LLMServiceError): + """Raised when a model specification is not found.""" + + def __init__(self, provider: str, model_name: str): + self.provider = provider + self.model_name = model_name + super().__init__( + f"Model spec not found for provider '{provider}', model '{model_name}'" + ) + + +class APICallError(LLMServiceError): + """Raised when an API call to the provider fails.""" + + def __init__(self, provider: str, message: str, original_error: Exception | None = None): + self.provider = provider + self.message = message + self.original_error = original_error + super().__init__(f"API call failed for {provider}: {message}") + + +class ParameterError(ValidationError): + """Raised when there's an error with a specific parameter.""" + + def __init__(self, parameter: str, message: str): + super().__init__(message, parameter) + + +class RequiredParameterError(ParameterError): + """Raised when a required parameter is missing.""" + + def __init__(self, parameter: str): + super().__init__(parameter, f"Required parameter '{parameter}' is missing") + + +class InvalidParameterTypeError(ParameterError): + """Raised when a parameter has an invalid type.""" + + def __init__(self, parameter: str, expected_type: str, actual_type: str): + self.expected_type = expected_type + self.actual_type = actual_type + super().__init__( + parameter, + f"Must be {expected_type}, got {actual_type}" + ) + + +class ParameterOutOfRangeError(ParameterError): + """Raised when a parameter value is out of allowed range.""" + + def __init__(self, parameter: str, value: float, min_value: float | None, max_value: float | None): + self.value = value + self.min_value = min_value + self.max_value = max_value + + if min_value is not None and max_value is not None: + msg = f"Value {value} is out of range [{min_value}, {max_value}]" + elif min_value is not None: + msg = f"Value {value} must be >= {min_value}" + elif max_value is not None: + msg = f"Value {value} must be <= {max_value}" + else: + msg = f"Value {value} is invalid" + + super().__init__(parameter, msg) + + +class InvalidParameterValueError(ParameterError): + """Raised when a parameter has an invalid value.""" + + def __init__(self, parameter: str, value: any, allowed_values: list): + self.value = value + self.allowed_values = allowed_values + super().__init__( + parameter, + f"Value '{value}' is not allowed. Must be one of: {allowed_values}" + ) diff --git a/backend/app/services/llm/jobs.py b/backend/app/services/llm/jobs.py index a0cbf21c..5149531d 100644 --- a/backend/app/services/llm/jobs.py +++ b/backend/app/services/llm/jobs.py @@ -11,7 +11,7 @@ from app.models import JobType, JobStatus, JobUpdate from app.models.llm import LLMCallRequest, LLMCallResponse -from app.services.llm import execute_llm_call +from app.services.llm.orchestrator import execute_llm_call from app.utils import get_openai_client logger = logging.getLogger(__name__) @@ -123,9 +123,7 @@ def execute_job( except Exception as e: error_message = f"Unexpected error in LLM job execution: {str(e)}" - logger.error( - f"[execute_job] {error_message} | job_id={job_id}", exc_info=True - ) + logger.error(f"[execute_job] {error_message} | job_id={job_id}", exc_info=True) with Session(engine) as session: job_crud = JobCrud(session=session) job_crud.update( diff --git a/backend/app/services/llm/llm_service.py b/backend/app/services/llm/orchestrator.py similarity index 91% rename from backend/app/services/llm/llm_service.py rename to backend/app/services/llm/orchestrator.py index 62ef08bc..085c38e0 100644 --- a/backend/app/services/llm/llm_service.py +++ b/backend/app/services/llm/orchestrator.py @@ -9,7 +9,8 @@ from typing import Any from app.models.llm import LLMCallRequest, LLMCallResponse -from app.services.llm.provider_factory import ProviderFactory +from app.services.llm.providers.factory import ProviderFactory +from app.services.llm.exceptions import LLMServiceError logger = logging.getLogger(__name__) @@ -72,9 +73,9 @@ def execute_llm_call( return response, error - except ValueError as e: + except LLMServiceError as e: error_message = str(e) - logger.error(f"[execute_llm_call] Provider error: {error_message}") + logger.error(f"[execute_llm_call] LLM service error: {error_message}") return None, error_message except Exception as e: diff --git a/backend/app/services/llm/providers/__init__.py b/backend/app/services/llm/providers/__init__.py new file mode 100644 index 00000000..02841ce5 --- /dev/null +++ b/backend/app/services/llm/providers/__init__.py @@ -0,0 +1,15 @@ +"""LLM providers module. + +This module contains all provider implementations for different LLM services. +Currently supports OpenAI with an extensible factory pattern for future providers. +""" + +from app.services.llm.providers.base import BaseProvider +from app.services.llm.providers.factory import ProviderFactory +from app.services.llm.providers.openai import OpenAIProvider + +__all__ = [ + "BaseProvider", + "ProviderFactory", + "OpenAIProvider", +] diff --git a/backend/app/services/llm/base_provider.py b/backend/app/services/llm/providers/base.py similarity index 55% rename from backend/app/services/llm/base_provider.py rename to backend/app/services/llm/providers/base.py index 39c75d57..963e5f4c 100644 --- a/backend/app/services/llm/base_provider.py +++ b/backend/app/services/llm/providers/base.py @@ -1,13 +1,16 @@ """Base provider interface for LLM providers. This module defines the abstract base class that all LLM providers must implement. -It provides a provider-agnostic interface for executing LLM calls. +It provides a provider-agnostic interface for executing LLM calls with spec-based +transformation. """ from abc import ABC, abstractmethod -from typing import Any +from typing import Any, Optional from app.models.llm import LLMCallRequest, LLMCallResponse +from app.services.llm.transformers.base import ConfigTransformer +from app.services.llm.transformers.factory import TransformerFactory class BaseProvider(ABC): @@ -16,80 +19,86 @@ class BaseProvider(ABC): All provider implementations (OpenAI, Anthropic, etc.) must inherit from this class and implement the required methods. + This provider uses a transformer-based architecture where configuration + transformation is separated from the provider execution logic. + Attributes: client: The provider-specific client instance + transformer: ConfigTransformer for converting requests to provider format """ - def __init__(self, client: Any): - """Initialize the provider with client. + def __init__(self, client: Any, transformer: Optional[ConfigTransformer] = None): + """Initialize the provider with client and optional transformer. Args: client: Provider-specific client (e.g., OpenAI, Anthropic client) + transformer: Optional config transformer. If not provided, one will + be created using the TransformerFactory. """ self.client = client + self.transformer = transformer - @abstractmethod - def execute( - self, request: LLMCallRequest - ) -> tuple[LLMCallResponse | None, str | None]: - """Execute an LLM call using the provider. - - This is the main method that must be implemented by all providers. - It should handle the complete lifecycle of an LLM request: - 1. Build provider-specific parameters from the request - 2. Make the API call to the provider - 3. Extract results (including any additional features like RAG) - 4. Return standardized response + def _get_transformer(self, request: LLMCallRequest) -> ConfigTransformer: + """Get or create a transformer for this request. Args: - request: LLM call request with configuration + request: LLM call request Returns: - Tuple of (response, error_message) - - If successful: (LLMCallResponse, None) - - If failed: (None, error_message) - - Raises: - NotImplementedError: If the provider hasn't implemented this method + ConfigTransformer instance """ - raise NotImplementedError("Providers must implement execute method") + if self.transformer is None: + # Create transformer using factory + provider_name = self.get_provider_name() + model_name = request.llm.llm_model_spec.model + self.transformer = TransformerFactory.create_transformer( + provider=provider_name, + model_name=model_name, + use_spec=True, + ) + return self.transformer - @abstractmethod def build_params(self, request: LLMCallRequest) -> dict[str, Any]: """Build provider-specific API parameters from the request. - Convert the generic LLMCallRequest into provider-specific parameters. - This includes handling model names, temperature, tokens, and any - provider-specific features. + This method uses the transformer to convert the request. + Providers can override this if they need custom logic, but the + default implementation uses the transformer. Args: request: LLM call request with configuration Returns: Dictionary of provider-specific parameters - - Raises: - NotImplementedError: If the provider hasn't implemented this method """ - raise NotImplementedError("Providers must implement build_params method") + transformer = self._get_transformer(request) + return transformer.validate_and_transform(request) @abstractmethod - def supports_feature(self, feature: str) -> bool: - """Check if the provider supports a specific feature. + def execute( + self, request: LLMCallRequest + ) -> tuple[LLMCallResponse | None, str | None]: + """Execute an LLM call using the provider. - Features might include: "reasoning", "text_config", "file_search", - "streaming", "function_calling", etc. + This is the main method that must be implemented by all providers. + It should handle the complete lifecycle of an LLM request: + 1. Build provider-specific parameters (using transformer) + 2. Make the API call to the provider + 3. Extract results (including any additional features like RAG) + 4. Return standardized response Args: - feature: Feature name to check + request: LLM call request with configuration Returns: - True if the feature is supported, False otherwise + Tuple of (response, error_message) + - If successful: (LLMCallResponse, None) + - If failed: (None, error_message) Raises: NotImplementedError: If the provider hasn't implemented this method """ - raise NotImplementedError("Providers must implement supports_feature method") + raise NotImplementedError("Providers must implement execute method") def get_provider_name(self) -> str: """Get the name of the provider. diff --git a/backend/app/services/llm/provider_factory.py b/backend/app/services/llm/providers/factory.py similarity index 78% rename from backend/app/services/llm/provider_factory.py rename to backend/app/services/llm/providers/factory.py index ba5b62eb..98c5f6a1 100644 --- a/backend/app/services/llm/provider_factory.py +++ b/backend/app/services/llm/providers/factory.py @@ -8,8 +8,9 @@ from typing import Any from app.models.llm import ProviderType -from app.services.llm.base_provider import BaseProvider -from app.services.llm.openai_provider import OpenAIProvider +from app.services.llm.exceptions import UnsupportedProviderError +from app.services.llm.providers.base import BaseProvider +from app.services.llm.providers.openai import OpenAIProvider logger = logging.getLogger(__name__) @@ -29,6 +30,7 @@ class ProviderFactory: # "anthropic": AnthropicProvider, # "google": GoogleProvider, # "azure": AzureOpenAIProvider, + # "cohere": CohereProvider, } @classmethod @@ -45,15 +47,14 @@ def create_provider( Instance of the appropriate provider Raises: - ValueError: If the provider type is not supported + UnsupportedProviderError: If the provider type is not supported """ provider_class = cls._PROVIDERS.get(provider_type) if provider_class is None: - supported = ", ".join(cls._PROVIDERS.keys()) - raise ValueError( - f"Unsupported provider type: {provider_type}. " - f"Supported providers: {supported}" + raise UnsupportedProviderError( + provider=provider_type, + supported_providers=cls.get_supported_providers() ) logger.info(f"[ProviderFactory] Creating {provider_type} provider instance") @@ -69,7 +70,9 @@ def get_supported_providers(cls) -> list[str]: return list(cls._PROVIDERS.keys()) @classmethod - def register_provider(cls, provider_type: str, provider_class: type[BaseProvider]): + def register_provider( + cls, provider_type: str, provider_class: type[BaseProvider] + ) -> None: """Register a new provider type. This allows for runtime registration of new providers, useful for @@ -83,7 +86,9 @@ def register_provider(cls, provider_type: str, provider_class: type[BaseProvider TypeError: If provider_class doesn't inherit from BaseProvider """ if not issubclass(provider_class, BaseProvider): - raise TypeError(f"{provider_class.__name__} must inherit from BaseProvider") + raise TypeError( + f"{provider_class.__name__} must inherit from BaseProvider" + ) logger.info(f"[ProviderFactory] Registering provider: {provider_type}") cls._PROVIDERS[provider_type] = provider_class diff --git a/backend/app/services/llm/openai_provider.py b/backend/app/services/llm/providers/openai.py similarity index 58% rename from backend/app/services/llm/openai_provider.py rename to backend/app/services/llm/providers/openai.py index 4f9334e3..b3da475a 100644 --- a/backend/app/services/llm/openai_provider.py +++ b/backend/app/services/llm/providers/openai.py @@ -3,17 +3,20 @@ This module implements the BaseProvider interface for OpenAI models, including support for standard models, o-series models with reasoning, and file search capabilities. + +Uses spec-based transformation for configuration conversion. """ import logging -from typing import Any +from typing import Optional import openai from openai import OpenAI from openai.types.responses.response import Response from app.models.llm import LLMCallRequest, LLMCallResponse -from app.services.llm.base_provider import BaseProvider +from app.services.llm.providers.base import BaseProvider +from app.services.llm.transformers.base import ConfigTransformer from app.utils import handle_openai_error logger = logging.getLogger(__name__) @@ -27,84 +30,18 @@ class OpenAIProvider(BaseProvider): - O-series models with reasoning configuration - Text configuration for verbosity control - Vector store file search integration + + Uses OpenAITransformer for configuration conversion. """ - def __init__(self, client: OpenAI): - """Initialize OpenAI provider with client. + def __init__(self, client: OpenAI, transformer: Optional[ConfigTransformer] = None): + """Initialize OpenAI provider with client and optional transformer. Args: client: OpenAI client instance + transformer: Optional config transformer (will auto-create if not provided) """ - super().__init__(client) - - def supports_feature(self, feature: str) -> bool: - """Check if OpenAI provider supports a specific feature. - - Args: - feature: Feature name (reasoning, text_config, file_search, etc.) - - Returns: - True if the feature is supported - """ - supported_features = { - "reasoning", - "text_config", - "file_search", - "temperature", - "max_tokens", - "top_p", - } - return feature in supported_features - - def build_params(self, request: LLMCallRequest) -> dict[str, Any]: - """Build OpenAI API parameters from LLMCallRequest. - - Converts our generic LLM config into OpenAI-specific parameters, - including support for advanced features like reasoning and text configs. - - Args: - request: LLM call request with configuration - - Returns: - Dictionary of OpenAI API parameters - """ - # Extract model spec for easier access - model_spec = request.llm.llm_model_spec - - params: dict[str, Any] = { - "model": model_spec.model, - "input": [{"role": "user", "content": request.llm.prompt}], - } - - # Add optional parameters if present - if model_spec.temperature is not None: - params["temperature"] = model_spec.temperature - - if model_spec.max_tokens is not None: - params["max_tokens"] = model_spec.max_tokens - - if model_spec.top_p is not None: - params["top_p"] = model_spec.top_p - - # Add advanced OpenAI configs (for o-series models, etc.) - if model_spec.reasoning: - params["reasoning"] = {"effort": model_spec.reasoning.effort} - - if model_spec.text: - params["text"] = {"verbosity": model_spec.text.verbosity} - - # Add vector store file search if provided - if request.llm.vector_store_id: - params["tools"] = [ - { - "type": "file_search", - "vector_store_ids": [request.llm.vector_store_id], - "max_num_results": request.max_num_results, - } - ] - params["include"] = ["file_search_call.results"] - - return params + super().__init__(client, transformer) def _extract_file_search_results(self, response: Response) -> list[dict]: """Extract file search results from OpenAI response. @@ -128,6 +65,9 @@ def execute( ) -> tuple[LLMCallResponse | None, str | None]: """Execute OpenAI API call. + Uses the transformer to convert the request to OpenAI format, + with automatic validation against model specs. + Args: request: LLM call request with configuration @@ -143,7 +83,7 @@ def execute( # Extract model spec for easier access model_spec = request.llm.llm_model_spec - # Build parameters and make OpenAI call + # Build parameters using transformer (includes validation) params = self.build_params(request) logger.info( f"[OpenAIProvider] Making OpenAI call with model: {model_spec.model}" @@ -172,6 +112,11 @@ def execute( ) return llm_response, None + except ValueError as e: + error_message = f"Configuration validation failed: {str(e)}" + logger.error(f"[OpenAIProvider] {error_message}", exc_info=True) + return None, error_message + except openai.OpenAIError as e: error_message = handle_openai_error(e) logger.error( diff --git a/backend/app/services/llm/specs/__init__.py b/backend/app/services/llm/specs/__init__.py new file mode 100644 index 00000000..87de64d4 --- /dev/null +++ b/backend/app/services/llm/specs/__init__.py @@ -0,0 +1,26 @@ +"""Model specifications initialization. + +This module initializes all model specifications and registers them +with the global registry. +""" + +from app.services.llm.specs.openai import register_openai_specs + + +def initialize_model_specs() -> None: + """Initialize and register all model specifications. + + This should be called during application startup to ensure all + model specs are available for validation and transformation. + """ + # Register OpenAI specs + register_openai_specs() + + # Future: Register other provider specs + # register_anthropic_specs() + # register_google_specs() + # register_azure_specs() + + +# Auto-initialize when module is imported +initialize_model_specs() diff --git a/backend/app/services/llm/specs/openai.py b/backend/app/services/llm/specs/openai.py new file mode 100644 index 00000000..655731c2 --- /dev/null +++ b/backend/app/services/llm/specs/openai.py @@ -0,0 +1,151 @@ +"""OpenAI model specifications. + +This module contains specifications for OpenAI models including GPT-4, +GPT-3.5, and o-series models with reasoning capabilities. +""" + +from app.models.llm.specs import ( + ModelCapabilities, + ModelSpec, + ParameterSpec, + model_spec_registry, +) + + +def create_openai_specs() -> list[ModelSpec]: + """Create specifications for OpenAI models. + + Returns: + List of ModelSpec objects for OpenAI models + """ + specs = [] + + # Standard parameters for most OpenAI models + standard_params = [ + ParameterSpec( + name="temperature", + type="float", + required=False, + min_value=0.0, + max_value=2.0, + default=1.0, + description="Sampling temperature (0-2). Higher values make output more random.", + ), + ParameterSpec( + name="max_tokens", + type="int", + required=False, + min_value=1, + max_value=128000, + description="Maximum number of tokens to generate.", + ), + ParameterSpec( + name="top_p", + type="float", + required=False, + min_value=0.0, + max_value=1.0, + default=1.0, + description="Nucleus sampling parameter.", + ), + ] + + # GPT-4 models + gpt4_models = [ + "gpt-4", + "gpt-4-turbo", + "gpt-4-turbo-preview", + "gpt-4o", + "gpt-4o-mini", + ] + + for model_name in gpt4_models: + specs.append( + ModelSpec( + model_name=model_name, + provider="openai", + capabilities=ModelCapabilities( + supports_reasoning=False, + supports_text_config=False, + supports_file_search=True, + supports_function_calling=True, + supports_streaming=True, + supports_vision=True + if "4o" in model_name or "vision" in model_name + else False, + ), + parameters=standard_params.copy(), + ) + ) + + # GPT-3.5 models + gpt35_models = ["gpt-3.5-turbo", "gpt-3.5-turbo-16k"] + + for model_name in gpt35_models: + specs.append( + ModelSpec( + model_name=model_name, + provider="openai", + capabilities=ModelCapabilities( + supports_reasoning=False, + supports_text_config=False, + supports_file_search=True, + supports_function_calling=True, + supports_streaming=True, + supports_vision=False, + ), + parameters=standard_params.copy(), + ) + ) + + # O-series models (reasoning models) + o_series_params = standard_params.copy() + [ + ParameterSpec( + name="reasoning", + type="str", + required=False, + allowed_values=["low", "medium", "high"], + description="Reasoning effort level for o-series models.", + ), + ParameterSpec( + name="text", + type="str", + required=False, + allowed_values=["low", "medium", "high"], + description="Text verbosity level for o-series models.", + ), + ] + + o_series_models = [ + "o1", + "o1-preview", + "o1-mini", + "o3", + "o3-mini", + ] + + for model_name in o_series_models: + specs.append( + ModelSpec( + model_name=model_name, + provider="openai", + capabilities=ModelCapabilities( + supports_reasoning=True, + supports_text_config=True, + supports_file_search=True, + supports_function_calling=False, # o-series don't support functions yet + supports_streaming=True, + supports_vision=False, + ), + parameters=o_series_params.copy(), + ) + ) + + return specs + + +def register_openai_specs() -> None: + """Register all OpenAI model specs with the global registry.""" + specs = create_openai_specs() + for spec in specs: + model_spec_registry.register(spec) diff --git a/backend/app/services/llm/transformers/__init__.py b/backend/app/services/llm/transformers/__init__.py new file mode 100644 index 00000000..a43f8ff8 --- /dev/null +++ b/backend/app/services/llm/transformers/__init__.py @@ -0,0 +1,16 @@ +"""LLM transformers module. + +This module contains all transformer implementations for converting +unified API contracts to provider-specific formats. +Currently supports OpenAI with an extensible factory pattern for future providers. +""" + +from app.services.llm.transformers.base import ConfigTransformer +from app.services.llm.transformers.factory import TransformerFactory +from app.services.llm.transformers.openai import OpenAITransformer + +__all__ = [ + "ConfigTransformer", + "TransformerFactory", + "OpenAITransformer", +] diff --git a/backend/app/services/llm/transformers/base.py b/backend/app/services/llm/transformers/base.py new file mode 100644 index 00000000..a29bd149 --- /dev/null +++ b/backend/app/services/llm/transformers/base.py @@ -0,0 +1,84 @@ +"""Base configuration transformer for LLM providers. + +This module provides the transformation logic to convert from the unified +API contract to provider-specific configurations. It uses model specs to +guide the transformation and validation process. +""" + +from abc import ABC, abstractmethod +from typing import Any, Optional + +from app.models.llm import LLMCallRequest +from app.models.llm.specs import ModelSpec, model_spec_registry + + +class ConfigTransformer(ABC): + """Base class for provider-specific config transformers. + + Each provider (OpenAI, Anthropic, etc.) should implement a transformer + that knows how to convert our unified API contract into that provider's + specific API format. + + Attributes: + model_spec: Optional model specification for validation + """ + + def __init__(self, model_spec: Optional[ModelSpec] = None): + """Initialize transformer with optional model spec. + + Args: + model_spec: Optional model specification for validation + """ + self.model_spec = model_spec + + @abstractmethod + def transform(self, request: LLMCallRequest) -> dict[str, Any]: + """Transform unified request to provider-specific parameters. + + Args: + request: Unified LLM call request + + Returns: + Provider-specific parameter dictionary + + Raises: + ValueError: If transformation fails or validation errors occur + """ + raise NotImplementedError("Transformers must implement transform method") + + def validate_and_transform(self, request: LLMCallRequest) -> dict[str, Any]: + """Validate request against model spec and transform. + + Args: + request: Unified LLM call request + + Returns: + Provider-specific parameter dictionary + + Raises: + ValueError: If validation fails + """ + # If we have a model spec, validate the config + if self.model_spec: + config = { + "model": request.llm.llm_model_spec.model, + "provider": request.llm.llm_model_spec.provider, + "temperature": request.llm.llm_model_spec.temperature, + "max_tokens": request.llm.llm_model_spec.max_tokens, + "top_p": request.llm.llm_model_spec.top_p, + } + + # Add advanced configs if present + if request.llm.llm_model_spec.reasoning: + config["reasoning"] = request.llm.llm_model_spec.reasoning.effort + + if request.llm.llm_model_spec.text: + config["text"] = request.llm.llm_model_spec.text.verbosity + + # Validate against spec + is_valid, error_msg = self.model_spec.validate_config(config) + if not is_valid: + raise ValueError(f"Configuration validation failed: {error_msg}") + + # Perform transformation + return self.transform(request) diff --git a/backend/app/services/llm/transformers/factory.py b/backend/app/services/llm/transformers/factory.py new file mode 100644 index 00000000..5ed7c0b8 --- /dev/null +++ b/backend/app/services/llm/transformers/factory.py @@ -0,0 +1,89 @@ +"""Transformer factory for creating configuration transformers. + +This module provides a factory for instantiating the appropriate +transformer based on the provider type. +""" + +from typing import Optional + +from app.models.llm.specs import model_spec_registry +from app.services.llm.exceptions import UnsupportedProviderError +from app.services.llm.transformers.base import ConfigTransformer +from app.services.llm.transformers.openai import OpenAITransformer + + +class TransformerFactory: + """Factory for creating transformer instances. + + This factory creates the appropriate transformer based on the provider type + and optionally uses model specs for validation. + """ + + _TRANSFORMERS: dict[str, type[ConfigTransformer]] = { + "openai": OpenAITransformer, + # Future transformers can be added here: + # "anthropic": AnthropicTransformer, + # "google": GoogleTransformer, + } + + @classmethod + def create_transformer( + cls, + provider: str, + model_name: Optional[str] = None, + use_spec: bool = True, + ) -> ConfigTransformer: + """Create a transformer instance for the given provider. + + Args: + provider: Provider name (openai, anthropic, google, azure) + model_name: Optional model name to load spec for validation + use_spec: Whether to use model spec for validation (default: True) + + Returns: + ConfigTransformer instance + + Raises: + UnsupportedProviderError: If provider is not supported + """ + transformer_class = cls._TRANSFORMERS.get(provider.lower()) + if transformer_class is None: + raise UnsupportedProviderError( + provider=provider, + supported_providers=cls.get_supported_providers() + ) + + # Load model spec if available and requested + model_spec = None + if use_spec and model_name: + model_spec = model_spec_registry.get_spec(provider.lower(), model_name) + + return transformer_class(model_spec=model_spec) + + @classmethod + def get_supported_providers(cls) -> list[str]: + """Get list of supported provider types. + + Returns: + List of supported provider type strings + """ + return list(cls._TRANSFORMERS.keys()) + + @classmethod + def register_transformer( + cls, provider: str, transformer_class: type[ConfigTransformer] + ) -> None: + """Register a custom transformer for a provider. + + Args: + provider: Provider name + transformer_class: Transformer class to register + + Raises: + TypeError: If transformer_class doesn't inherit from ConfigTransformer + """ + if not issubclass(transformer_class, ConfigTransformer): + raise TypeError( + f"{transformer_class.__name__} must inherit from ConfigTransformer" + ) + cls._TRANSFORMERS[provider.lower()] = transformer_class diff --git a/backend/app/services/llm/transformers/openai.py b/backend/app/services/llm/transformers/openai.py new file mode 100644 index 00000000..c9ead3df --- /dev/null +++ b/backend/app/services/llm/transformers/openai.py @@ -0,0 +1,77 @@ +"""OpenAI configuration transformer. + +This module transforms unified API requests into OpenAI-specific format. +""" + +from typing import Any, Optional + +from app.models.llm import LLMCallRequest +from app.models.llm.specs import ModelSpec +from app.services.llm.transformers.base import ConfigTransformer + + +class OpenAITransformer(ConfigTransformer): + """Transformer for OpenAI API format. + + Converts unified API contract to OpenAI Responses API format. + Supports: + - Standard models (GPT-4, GPT-3.5) + - O-series models with reasoning configuration + - Text configuration for verbosity control + - Vector store file search integration + """ + + def __init__(self, model_spec: Optional[ModelSpec] = None): + """Initialize OpenAI transformer. + + Args: + model_spec: Optional model specification for validation + """ + super().__init__(model_spec) + + def transform(self, request: LLMCallRequest) -> dict[str, Any]: + """Transform request to OpenAI API parameters. + + Args: + request: Unified LLM call request + + Returns: + OpenAI API parameter dictionary + """ + model_spec = request.llm.llm_model_spec + + # Base parameters + params: dict[str, Any] = { + "model": model_spec.model, + "input": [{"role": "user", "content": request.llm.prompt}], + } + + # Add optional standard parameters + if model_spec.temperature is not None: + params["temperature"] = model_spec.temperature + + if model_spec.max_tokens is not None: + params["max_tokens"] = model_spec.max_tokens + + if model_spec.top_p is not None: + params["top_p"] = model_spec.top_p + + # Add advanced OpenAI configs (o-series models) + if model_spec.reasoning: + params["reasoning"] = {"effort": model_spec.reasoning.effort} + + if model_spec.text: + params["text"] = {"verbosity": model_spec.text.verbosity} + + # Add vector store file search if provided + if request.llm.vector_store_id: + params["tools"] = [ + { + "type": "file_search", + "vector_store_ids": [request.llm.vector_store_id], + "max_num_results": request.max_num_results, + } + ] + params["include"] = ["file_search_call.results"] + + return params diff --git a/backend/docs/LLM_ARCHITECTURE.md b/backend/docs/LLM_ARCHITECTURE.md new file mode 100644 index 00000000..e43240fe --- /dev/null +++ b/backend/docs/LLM_ARCHITECTURE.md @@ -0,0 +1,512 @@ +# LLM API Specification-Driven Architecture + +## Overview + +The LLM API now uses a **specification-driven architecture** that separates concerns between: + +1. **Model Specifications** - Define what each model supports +2. **Transformation Layer** - Convert unified API to provider-specific formats +3. **Validation** - Automatic validation against model specs +4. **Providers** - Execute API calls using transformed configurations + +This architecture eliminates the need for `build_params` logic in providers and centralizes configuration management. + +## Architecture Components + +### 1. Model Specifications (`app/models/llm/model_spec.py`) + +Model specifications are the **single source of truth** for what each LLM model supports. + +```python +from app.models.llm.model_spec import ModelSpec, ModelCapabilities, ParameterSpec + +spec = ModelSpec( + model_name="gpt-4", + provider="openai", + capabilities=ModelCapabilities( + supports_file_search=True, + supports_function_calling=True, + supports_streaming=True, + ), + parameters=[ + ParameterSpec( + name="temperature", + type="float", + min_value=0.0, + max_value=2.0, + default=1.0, + ), + ParameterSpec( + name="max_tokens", + type="int", + min_value=1, + max_value=128000, + ), + ], +) +``` + +**Key Features:** +- Declarative capability flags +- Parameter type and range constraints +- Automatic validation via `validate_config()` +- Feature detection via `supports_feature()` + +### 2. Model Registry (`ModelSpecRegistry`) + +The global registry manages all model specifications: + +```python +from app.models.llm.model_spec import model_spec_registry + +# Register a spec +model_spec_registry.register(spec) + +# Get a spec +spec = model_spec_registry.get_spec("openai", "gpt-4") + +# Validate config +is_valid, error = model_spec_registry.validate_config( + "openai", "gpt-4", {"temperature": 0.7} +) +``` + +### 3. Transformation Layer (`app/services/llm/transformer.py`) + +Transformers convert the unified API contract to provider-specific formats: + +```python +from app.services.llm.transformer import OpenAITransformer, TransformerFactory + +# Create transformer +transformer = TransformerFactory.create_transformer( + provider="openai", + model_name="gpt-4", + use_spec=True, # Enable validation +) + +# Transform and validate +params = transformer.validate_and_transform(request) +``` + +**Available Transformers:** +- `OpenAITransformer` - OpenAI Responses API format +- `AnthropicTransformer` - Anthropic Messages API format +- `GoogleTransformer` - Google Generative AI format +- `AzureOpenAITransformer` - Azure OpenAI (same as OpenAI) + +### 4. Updated Provider Interface (`BaseProvider`) + +Providers now use transformers automatically: + +```python +class BaseProvider(ABC): + def __init__(self, client: Any, transformer: Optional[ConfigTransformer] = None): + self.client = client + self.transformer = transformer + + def build_params(self, request: LLMCallRequest) -> dict[str, Any]: + """Uses transformer to build params with automatic validation.""" + transformer = self._get_transformer(request) + return transformer.validate_and_transform(request) +``` + +## Request Flow + +``` +┌─────────────────────────────────────────────────────────────────┐ +│ 1. API Request: LLMCallRequest │ +│ - Unified API contract │ +│ - Provider + model specified │ +│ - Standard parameters (temp, max_tokens, etc.) │ +└─────────────────────────────────────────────────────────────────┘ + ↓ +┌─────────────────────────────────────────────────────────────────┐ +│ 2. Provider Detection │ +│ - Extract provider type from request │ +│ - Create provider instance via ProviderFactory │ +└─────────────────────────────────────────────────────────────────┘ + ↓ +┌─────────────────────────────────────────────────────────────────┐ +│ 3. Transformer Creation │ +│ - TransformerFactory creates appropriate transformer │ +│ - Loads model spec from registry (if available) │ +│ - Transformer initialized with spec for validation │ +└─────────────────────────────────────────────────────────────────┘ + ↓ +┌─────────────────────────────────────────────────────────────────┐ +│ 4. Config Validation │ +│ - Extract parameters from request │ +│ - Validate against model spec: │ +│ ✓ Type checking (int, float, str, bool) │ +│ ✓ Range validation (min/max values) │ +│ ✓ Allowed values (e.g., "low"/"medium"/"high") │ +│ ✓ Required parameter checking │ +│ - Raise ValueError if validation fails │ +└─────────────────────────────────────────────────────────────────┘ + ↓ +┌─────────────────────────────────────────────────────────────────┐ +│ 5. Transformation │ +│ - Convert unified API to provider format: │ +│ • OpenAI: {model, input, temperature, ...} │ +│ • Anthropic: {model, messages, max_tokens, ...} │ +│ • Google: {model, contents, generation_config, ...} │ +│ - Add provider-specific features (reasoning, tools, etc.) │ +└─────────────────────────────────────────────────────────────────┘ + ↓ +┌─────────────────────────────────────────────────────────────────┐ +│ 6. Provider Execution │ +│ - Provider calls build_params() (uses transformer) │ +│ - Makes API call to LLM provider │ +│ - Returns standardized LLMCallResponse │ +└─────────────────────────────────────────────────────────────────┘ +``` + +## OpenAI Model Specifications + +Pre-configured specs for OpenAI models are in `app/services/llm/specs/openai_specs.py`: + +**Standard Models:** +- GPT-4 series: `gpt-4`, `gpt-4-turbo`, `gpt-4o`, `gpt-4o-mini` +- GPT-3.5 series: `gpt-3.5-turbo`, `gpt-3.5-turbo-16k` + +**O-series Models (Reasoning):** +- `o1`, `o1-preview`, `o1-mini` +- `o3`, `o3-mini` +- Support reasoning configuration and text verbosity + +**Capabilities by Model:** + +| Model | File Search | Functions | Streaming | Vision | Reasoning | +|-------|-------------|-----------|-----------|--------|-----------| +| GPT-4 | ✓ | ✓ | ✓ | ✗ | ✗ | +| GPT-4o | ✓ | ✓ | ✓ | ✓ | ✗ | +| GPT-3.5 | ✓ | ✓ | ✓ | ✗ | ✗ | +| O-series | ✓ | ✗ | ✓ | ✗ | ✓ | + +## Adding New Model Specs + +### Option 1: Add to Existing Provider Specs + +Edit `app/services/llm/specs/openai_specs.py` (or create new provider spec file): + +```python +def create_new_model_specs() -> list[ModelSpec]: + specs = [] + + specs.append( + ModelSpec( + model_name="new-model", + provider="provider-name", + capabilities=ModelCapabilities( + supports_streaming=True, + # ... other capabilities + ), + parameters=[ + ParameterSpec( + name="temperature", + type="float", + min_value=0.0, + max_value=1.0, + ), + # ... other parameters + ], + ) + ) + + return specs +``` + +### Option 2: Register Dynamically at Runtime + +```python +from app.models.llm.model_spec import model_spec_registry, ModelSpec + +spec = ModelSpec(...) +model_spec_registry.register(spec) +``` + +## Adding New Providers + +### Step 1: Create Provider Transformer + +```python +# app/services/llm/transformer.py + +class NewProviderTransformer(ConfigTransformer): + """Transformer for NewProvider API format.""" + + def transform(self, request: LLMCallRequest) -> dict[str, Any]: + """Transform to NewProvider API format.""" + model_spec = request.llm.llm_model_spec + + params = { + "model": model_spec.model, + # ... provider-specific format + } + + # Add parameters + if model_spec.temperature is not None: + params["temperature"] = model_spec.temperature + + return params +``` + +### Step 2: Register Transformer + +```python +# In TransformerFactory +_TRANSFORMERS = { + "openai": OpenAITransformer, + "anthropic": AnthropicTransformer, + "google": GoogleTransformer, + "newprovider": NewProviderTransformer, # Add here +} +``` + +### Step 3: Create Provider Implementation + +```python +# app/services/llm/newprovider_provider.py + +class NewProviderProvider(BaseProvider): + """NewProvider implementation.""" + + def execute(self, request: LLMCallRequest) -> tuple[LLMCallResponse | None, str | None]: + try: + # build_params() uses transformer automatically + params = self.build_params(request) + + # Make API call + response = self.client.generate(**params) + + # Return standardized response + return LLMCallResponse(...), None + except Exception as e: + return None, str(e) +``` + +### Step 4: Register Provider + +```python +# app/services/llm/provider_factory.py + +_PROVIDERS = { + "openai": OpenAIProvider, + "anthropic": AnthropicProvider, + "newprovider": NewProviderProvider, # Add here +} +``` + +### Step 5: Create Model Specs + +```python +# app/services/llm/specs/newprovider_specs.py + +def create_newprovider_specs() -> list[ModelSpec]: + return [ + ModelSpec( + model_name="newprovider-model-1", + provider="newprovider", + capabilities=ModelCapabilities(...), + parameters=[...], + ), + ] + +def register_newprovider_specs() -> None: + specs = create_newprovider_specs() + for spec in specs: + model_spec_registry.register(spec) +``` + +### Step 6: Initialize Specs + +```python +# app/services/llm/specs/__init__.py + +from app.services.llm.specs.newprovider_specs import register_newprovider_specs + +def initialize_model_specs() -> None: + register_openai_specs() + register_newprovider_specs() # Add here +``` + +## Configuration Validation Examples + +### Valid Configuration + +```python +request = LLMCallRequest( + llm=LLMConfig( + prompt="Hello", + llm_model_spec=LLMModelSpec( + model="gpt-4", + provider="openai", + temperature=0.7, # ✓ Within range [0.0, 2.0] + max_tokens=1000, # ✓ Within range [1, 128000] + ), + ), +) +# ✓ Passes validation +``` + +### Invalid Configuration (Out of Range) + +```python +request = LLMCallRequest( + llm=LLMConfig( + prompt="Hello", + llm_model_spec=LLMModelSpec( + model="gpt-4", + provider="openai", + temperature=5.0, # ✗ Out of range [0.0, 2.0] + ), + ), +) +# ✗ Raises ValueError: "Parameter 'temperature' must be <= 2.0" +``` + +### Invalid Configuration (Wrong Type) + +```python +request = LLMCallRequest( + llm=LLMConfig( + prompt="Hello", + llm_model_spec=LLMModelSpec( + model="gpt-4", + provider="openai", + temperature="high", # ✗ Should be float + ), + ), +) +# ✗ Raises ValueError: "Parameter 'temperature' must be a number" +``` + +### O-series Model Configuration + +```python +request = LLMCallRequest( + llm=LLMConfig( + prompt="Complex reasoning task", + llm_model_spec=LLMModelSpec( + model="o3-mini", + provider="openai", + temperature=0.5, + reasoning=ReasoningConfig(effort="high"), # ✓ Valid: "low", "medium", "high" + text=TextConfig(verbosity="medium"), + ), + ), +) +# ✓ Passes validation +``` + +## Benefits of This Architecture + +### 1. Separation of Concerns +- **Model Specs**: Define capabilities +- **Transformers**: Handle format conversion +- **Providers**: Execute API calls +- **Validation**: Centralized in specs + +### 2. Maintainability +- Add new models by adding specs (no code changes) +- Add new providers by implementing transformer +- Validation logic in one place + +### 3. Type Safety +- Compile-time type checking +- Runtime validation against specs +- Clear error messages + +### 4. Extensibility +- Easy to add new providers +- Easy to add new models +- Runtime spec registration + +### 5. Testability +- Test specs independently +- Test transformers independently +- Test providers independently +- Mock transformers for provider tests + +## Migration Guide + +### Before (Old Architecture) + +```python +class OpenAIProvider(BaseProvider): + def build_params(self, request: LLMCallRequest) -> dict[str, Any]: + # Manual parameter building + params = { + "model": request.llm.llm_model_spec.model, + "input": [{"role": "user", "content": request.llm.prompt}], + } + + # Manual parameter handling + if request.llm.llm_model_spec.temperature is not None: + params["temperature"] = request.llm.llm_model_spec.temperature + + # No validation! + return params +``` + +### After (New Architecture) + +```python +class OpenAIProvider(BaseProvider): + # build_params() inherited from BaseProvider + # Automatically uses OpenAITransformer + # Automatic validation via model specs + pass +``` + +The `build_params()` method is now in `BaseProvider` and automatically: +1. Creates appropriate transformer +2. Validates configuration against model spec +3. Transforms to provider format +4. Raises clear errors if validation fails + +## Testing + +Run the architecture test: + +```bash +uv run python test_specs_only.py +``` + +Expected output: +``` +Testing Core Specification and Transformation Logic +====================================================================== + +1. Model Specification... +✓ Valid config: True +✓ Invalid config rejected: True + +2. Transformers... +✓ OpenAI transform: model=gpt-4, temp=0.7 +✓ O-series transform: reasoning={'effort': 'high'} +✓ Anthropic transform: model=claude-3-opus, max_tokens=1024 + +3. OpenAI Model Specs... +✓ Created 12 OpenAI model specs +✓ GPT-4 spec: gpt-4, params=['temperature', 'max_tokens', 'top_p'] + +4. Spec-based Validation... +✓ Valid O-series config passed validation +✓ Invalid config correctly rejected + +====================================================================== +SUCCESS: Core architecture works correctly! +====================================================================== +``` + +## Future Enhancements + +1. **Database-backed Specs**: Store model specs in database for dynamic updates +2. **Spec Versioning**: Version model specs for backward compatibility +3. **Capability Discovery**: API endpoint to list available models and capabilities +4. **Advanced Validation**: Custom validators, cross-parameter validation +5. **Streaming Support**: Add streaming capability to transformers +6. **Function Calling**: Unified function calling across providers +7. **Cost Tracking**: Add pricing info to model specs diff --git a/backend/test_new_architecture.py b/backend/test_new_architecture.py new file mode 100644 index 00000000..b1027e84 --- /dev/null +++ b/backend/test_new_architecture.py @@ -0,0 +1,209 @@ +"""Standalone test script for the new specification-driven architecture. + +This script tests the core functionality without importing the full app, +avoiding circular import issues. +""" + +import sys +from pathlib import Path + +# Add backend to path +backend_path = Path(__file__).parent +sys.path.insert(0, str(backend_path)) + +print("=" * 70) +print("Testing Specification-Driven LLM Architecture") +print("=" * 70) + +# Test 1: Model Spec Creation and Validation +print("\n1. Testing Model Specification...") +from app.models.llm.model_spec import ( + ModelSpec, + ModelCapabilities, + ParameterSpec, + ModelSpecRegistry, +) + +spec = ModelSpec( + model_name="gpt-4", + provider="openai", + capabilities=ModelCapabilities( + supports_file_search=True, + supports_function_calling=True, + ), + parameters=[ + ParameterSpec( + name="temperature", + type="float", + min_value=0.0, + max_value=2.0, + ), + ParameterSpec( + name="max_tokens", + type="int", + min_value=1, + ), + ], +) + +print(f"✓ Created ModelSpec for {spec.model_name}") +print(f" - Provider: {spec.provider}") +print(f" - Supports file_search: {spec.capabilities.supports_file_search}") +print(f" - Parameters: {[p.name for p in spec.parameters]}") + +# Test 2: Configuration Validation +print("\n2. Testing Configuration Validation...") +valid_config = { + "model": "gpt-4", + "temperature": 0.7, + "max_tokens": 1000, +} + +is_valid, error = spec.validate_config(valid_config) +print(f"✓ Valid config validation: {is_valid} (error: {error})") + +invalid_config = { + "model": "gpt-4", + "temperature": 5.0, # Out of range +} + +is_valid, error = spec.validate_config(invalid_config) +print(f"✓ Invalid config validation: {is_valid} (error: {error})") + +# Test 3: Model Registry +print("\n3. Testing Model Registry...") +registry = ModelSpecRegistry() +registry.clear() +registry.register(spec) + +retrieved = registry.get_spec("openai", "gpt-4") +print(f"✓ Registry retrieval: {retrieved.model_name if retrieved else 'None'}") + +# Test 4: OpenAI Specs Registration +print("\n4. Testing OpenAI Specs...") +# Import directly to avoid circular import in __init__.py +import sys +sys.path.insert(0, str(backend_path / "app")) +from services.llm.specs.openai_specs import create_openai_specs + +specs = create_openai_specs() +print(f"✓ Created {len(specs)} OpenAI model specs") + +model_names = [s.model_name for s in specs[:5]] +print(f" - Sample models: {', '.join(model_names)}") + +# Test 5: Transformers +print("\n5. Testing Transformers...") +from app.models.llm import ( + LLMCallRequest, + LLMConfig, + LLMModelSpec, + ReasoningConfig, +) + +# Import transformers directly +from services.llm.transformer import OpenAITransformer, AnthropicTransformer + +openai_transformer = OpenAITransformer() +print(f"✓ Created OpenAITransformer") + +request = LLMCallRequest( + llm=LLMConfig( + prompt="Hello, world!", + llm_model_spec=LLMModelSpec( + model="gpt-4", + provider="openai", + temperature=0.7, + max_tokens=1000, + ), + ), +) + +params = openai_transformer.transform(request) +print(f"✓ Transformed request to OpenAI params:") +print(f" - Model: {params['model']}") +print(f" - Temperature: {params.get('temperature')}") +print(f" - Max tokens: {params.get('max_tokens')}") + +# Test 6: O-series model transformation +print("\n6. Testing O-series Model Transformation...") +o_series_request = LLMCallRequest( + llm=LLMConfig( + prompt="Complex reasoning task", + llm_model_spec=LLMModelSpec( + model="o3-mini", + provider="openai", + temperature=0.5, + reasoning=ReasoningConfig(effort="high"), + ), + ), +) + +o_params = openai_transformer.transform(o_series_request) +print(f"✓ Transformed o-series request:") +print(f" - Model: {o_params['model']}") +print(f" - Reasoning effort: {o_params.get('reasoning', {}).get('effort')}") + +# Test 7: Anthropic Transformer +print("\n7. Testing Anthropic Transformer...") +anthropic_transformer = AnthropicTransformer() + +anthropic_request = LLMCallRequest( + llm=LLMConfig( + prompt="Hello, Claude!", + llm_model_spec=LLMModelSpec( + model="claude-3-opus", + provider="anthropic", + max_tokens=2048, + ), + ), +) + +anthropic_params = anthropic_transformer.transform(anthropic_request) +print(f"✓ Transformed Anthropic request:") +print(f" - Model: {anthropic_params['model']}") +print(f" - Max tokens: {anthropic_params.get('max_tokens')}") +print(f" - Messages format: {type(anthropic_params.get('messages'))}") + +# Test 8: Validation with Transformer +print("\n8. Testing Validation in Transformer...") +o_spec = next((s for s in specs if s.model_name == "o3-mini"), None) +if o_spec: + transformer_with_spec = OpenAITransformer(model_spec=o_spec) + + # Valid request + try: + validated_params = transformer_with_spec.validate_and_transform(o_series_request) + print(f"✓ Valid request passed validation") + except ValueError as e: + print(f"✗ Valid request failed: {e}") + + # Invalid request (temperature out of range) + invalid_request = LLMCallRequest( + llm=LLMConfig( + prompt="Test", + llm_model_spec=LLMModelSpec( + model="o3-mini", + provider="openai", + temperature=10.0, # Out of range! + ), + ), + ) + + try: + transformer_with_spec.validate_and_transform(invalid_request) + print(f"✗ Invalid request should have failed validation") + except ValueError as e: + print(f"✓ Invalid request correctly rejected: {str(e)[:50]}...") + +print("\n" + "=" * 70) +print("All Tests Completed Successfully!") +print("=" * 70) +print("\nSummary of New Architecture:") +print("1. ✓ Model specs define capabilities and parameter constraints") +print("2. ✓ Validation happens at the spec level") +print("3. ✓ Transformers convert unified API to provider formats") +print("4. ✓ Automatic validation during transformation") +print("5. ✓ Registry manages all model specifications") +print("6. ✓ Supports OpenAI (standard & o-series), Anthropic, Google") +print("\nThe architecture is ready to use!") diff --git a/backend/test_specs_only.py b/backend/test_specs_only.py new file mode 100644 index 00000000..7893ba34 --- /dev/null +++ b/backend/test_specs_only.py @@ -0,0 +1,156 @@ +"""Simple test for model specs and transformers without full app imports.""" + +import sys +from pathlib import Path + +backend_path = Path(__file__).parent +sys.path.insert(0, str(backend_path)) + +print("=" * 70) +print("Testing Core Specification and Transformation Logic") +print("=" * 70) + +# Test Model Spec +print("\n1. Model Specification...") +from app.models.llm.model_spec import ModelSpec, ModelCapabilities, ParameterSpec + +spec = ModelSpec( + model_name="gpt-4", + provider="openai", + capabilities=ModelCapabilities(supports_file_search=True), + parameters=[ + ParameterSpec(name="temperature", type="float", min_value=0.0, max_value=2.0) + ], +) + +config = {"temperature": 0.7} +is_valid, error = spec.validate_config(config) +print(f"✓ Valid config: {is_valid}") + +config = {"temperature": 5.0} +is_valid, error = spec.validate_config(config) +print(f"✓ Invalid config rejected: {not is_valid} - {error}") + +# Test Transformers (import module directly by file, avoiding __init__) +print("\n2. Transformers...") + +# Import transformer module directly +import importlib.util +spec_tf = importlib.util.spec_from_file_location( + "transformer", + backend_path / "app" / "services" / "llm" / "transformer.py" +) +transformer = importlib.util.module_from_spec(spec_tf) +spec_tf.loader.exec_module(transformer) + +from app.models.llm.call import LLMCallRequest, LLMConfig, LLMModelSpec, ReasoningConfig + +openai_tf = transformer.OpenAITransformer() + +request = LLMCallRequest( + llm=LLMConfig( + prompt="Test", + llm_model_spec=LLMModelSpec( + model="gpt-4", + provider="openai", + temperature=0.7, + ), + ), +) + +params = openai_tf.transform(request) +print(f"✓ OpenAI transform: model={params['model']}, temp={params.get('temperature')}") + +# Test O-series +o_request = LLMCallRequest( + llm=LLMConfig( + prompt="Think hard", + llm_model_spec=LLMModelSpec( + model="o3", + provider="openai", + reasoning=ReasoningConfig(effort="high"), + ), + ), +) + +o_params = openai_tf.transform(o_request) +print(f"✓ O-series transform: reasoning={o_params.get('reasoning')}") + +# Test Anthropic +anthropic_tf = transformer.AnthropicTransformer() +anthropic_request = LLMCallRequest( + llm=LLMConfig( + prompt="Hello", + llm_model_spec=LLMModelSpec( + model="claude-3-opus", + provider="anthropic", + max_tokens=1024, + ), + ), +) + +anthropic_params = anthropic_tf.transform(anthropic_request) +print(f"✓ Anthropic transform: model={anthropic_params['model']}, max_tokens={anthropic_params['max_tokens']}") + +# Test OpenAI Specs +print("\n3. OpenAI Model Specs...") + +# Import openai_specs directly +spec_os = importlib.util.spec_from_file_location( + "openai_specs", + backend_path / "app" / "services" / "llm" / "specs" / "openai_specs.py" +) +openai_specs = importlib.util.module_from_spec(spec_os) +spec_os.loader.exec_module(openai_specs) + +specs = openai_specs.create_openai_specs() +print(f"✓ Created {len(specs)} OpenAI model specs") + +gpt4 = next(s for s in specs if s.model_name == "gpt-4") +print(f"✓ GPT-4 spec: {gpt4.model_name}, params={[p.name for p in gpt4.parameters[:3]]}") + +o3 = next(s for s in specs if s.model_name == "o3-mini") +print(f"✓ O3-mini spec: supports_reasoning={o3.capabilities.supports_reasoning}") + +# Test validation with spec +print("\n4. Spec-based Validation...") +tf_with_spec = transformer.OpenAITransformer(model_spec=o3) + +valid_o_request = LLMCallRequest( + llm=LLMConfig( + prompt="Test", + llm_model_spec=LLMModelSpec( + model="o3-mini", + provider="openai", + temperature=0.5, + reasoning=ReasoningConfig(effort="medium"), + ), + ), +) + +try: + params = tf_with_spec.validate_and_transform(valid_o_request) + print(f"✓ Valid O-series config passed validation") +except ValueError as e: + print(f"✗ Should have passed: {e}") + +invalid_o_request = LLMCallRequest( + llm=LLMConfig( + prompt="Test", + llm_model_spec=LLMModelSpec( + model="o3-mini", + provider="openai", + temperature=10.0, # Out of range + ), + ), +) + +try: + params = tf_with_spec.validate_and_transform(invalid_o_request) + print(f"✗ Should have failed validation") +except ValueError as e: + print(f"✓ Invalid config correctly rejected") + +print("\n" + "=" * 70) +print("SUCCESS: Core architecture works correctly!") +print("=" * 70) From 95be54b8d5d9630272bd2e3308e062693d470b46 Mon Sep 17 00:00:00 2001 From: Aviraj <100823015+avirajsingh7@users.noreply.github.com> Date: Tue, 21 Oct 2025 08:39:59 +0530 Subject: [PATCH 03/15] resolve using sqlModel --- backend/app/api/routes/llm.py | 14 +- backend/app/models/__init__.py | 2 - backend/app/models/llm/__init__.py | 23 -- backend/app/models/llm/call.py | 76 ---- backend/app/models/llm/config.py | 45 +-- backend/app/models/llm/response.py | 4 - backend/app/models/llm/specs/__init__.py | 20 - backend/app/models/llm/specs/base.py | 192 ---------- backend/app/models/llm/specs/registry.py | 89 ----- backend/app/services/llm/README.md | 353 ------------------ backend/app/services/llm/__init__.py | 107 ------ backend/app/services/llm/constants.py | 54 --- backend/app/services/llm/exceptions.py | 130 ------- backend/app/services/llm/orchestrator.py | 6 - backend/app/services/llm/providers/base.py | 52 +-- backend/app/services/llm/providers/factory.py | 40 +- backend/app/services/llm/providers/openai.py | 60 +-- backend/app/services/llm/specs/__init__.py | 27 +- backend/app/services/llm/specs/openai.py | 282 +++++++------- .../app/services/llm/transformers/__init__.py | 16 - backend/app/services/llm/transformers/base.py | 84 ----- .../app/services/llm/transformers/factory.py | 89 ----- .../app/services/llm/transformers/openai.py | 77 ---- backend/test_new_architecture.py | 209 ----------- backend/test_specs_only.py | 156 -------- 25 files changed, 187 insertions(+), 2020 deletions(-) delete mode 100644 backend/app/models/llm/call.py delete mode 100644 backend/app/models/llm/specs/__init__.py delete mode 100644 backend/app/models/llm/specs/base.py delete mode 100644 backend/app/models/llm/specs/registry.py delete mode 100644 backend/app/services/llm/README.md delete mode 100644 backend/app/services/llm/constants.py delete mode 100644 backend/app/services/llm/exceptions.py delete mode 100644 backend/app/services/llm/transformers/__init__.py delete mode 100644 backend/app/services/llm/transformers/base.py delete mode 100644 backend/app/services/llm/transformers/factory.py delete mode 100644 backend/app/services/llm/transformers/openai.py delete mode 100644 backend/test_new_architecture.py delete mode 100644 backend/test_specs_only.py diff --git a/backend/app/api/routes/llm.py b/backend/app/api/routes/llm.py index c3675419..e1d3ed04 100644 --- a/backend/app/api/routes/llm.py +++ b/backend/app/api/routes/llm.py @@ -1,7 +1,6 @@ import logging -from fastapi import APIRouter, Depends -from sqlmodel import Session +from fastapi import APIRouter from app.api.deps import AuthContext, SessionDep from app.models.llm import LLMCallRequest @@ -22,12 +21,6 @@ async def llm_call( project_id = _current_user.project.id organization_id = _current_user.organization.id - logger.info( - f"[llm_call] Scheduling LLM call for provider: {request.llm.llm_model_spec.provider}, " - f"model: {request.llm.llm_model_spec.model}, " - f"project_id: {project_id}, org_id: {organization_id}" - ) - # Start background job job_id = start_job( db=_session, @@ -36,11 +29,6 @@ async def llm_call( organization_id=organization_id, ) - logger.info( - f"[llm_call] LLM call job scheduled successfully | job_id={job_id}, " - f"project_id={project_id}" - ) - return APIResponse.success_response( data={"status": "processing", "message": "LLM call job scheduled"}, ) diff --git a/backend/app/models/__init__.py b/backend/app/models/__init__.py index 82d55566..20cfac7f 100644 --- a/backend/app/models/__init__.py +++ b/backend/app/models/__init__.py @@ -54,8 +54,6 @@ LLMConfig, LLMModelSpec, ProviderType, - ReasoningConfig, - TextConfig, ) from .message import Message diff --git a/backend/app/models/llm/__init__.py b/backend/app/models/llm/__init__.py index 724d0790..a1807e83 100644 --- a/backend/app/models/llm/__init__.py +++ b/backend/app/models/llm/__init__.py @@ -4,41 +4,18 @@ requests, responses, configurations, and model specifications. """ -from app.models.llm.call import LLMCall, LLMCallCreate from app.models.llm.config import ( LLMConfig, LLMModelSpec, - ProviderType, - ReasoningConfig, - TextConfig, ) from app.models.llm.request import LLMCallRequest from app.models.llm.response import LLMCallResponse -from app.models.llm.specs import ( - ModelCapabilities, - ModelSpec, - ModelSpecRegistry, - ParameterSpec, - model_spec_registry, -) __all__ = [ - # Database models - "LLMCall", - "LLMCallCreate", # Request/Response models "LLMCallRequest", "LLMCallResponse", # Configuration models "LLMConfig", "LLMModelSpec", - "ProviderType", - "ReasoningConfig", - "TextConfig", - # Specification models - "ModelSpec", - "ModelCapabilities", - "ParameterSpec", - "ModelSpecRegistry", - "model_spec_registry", ] diff --git a/backend/app/models/llm/call.py b/backend/app/models/llm/call.py deleted file mode 100644 index 61cfa334..00000000 --- a/backend/app/models/llm/call.py +++ /dev/null @@ -1,76 +0,0 @@ -from typing import Optional, Literal -from sqlmodel import SQLModel - - -# Supported LLM providers -ProviderType = Literal["openai", "anthropic", "google", "azure"] - - -class ReasoningConfig(SQLModel): - """Configuration for reasoning parameters (e.g., o-series models).""" - - effort: str # "low", "medium", "high" - - -class TextConfig(SQLModel): - """Configuration for text generation parameters.""" - - verbosity: str # "low", "medium", "high" - - -class LLMModelSpec(SQLModel): - """Specification for the LLM model and its parameters. - - This contains the actual model configuration that will be sent to the provider. - Supports both standard OpenAI models and advanced configurations. - """ - - model: str - provider: ProviderType = "openai" - temperature: Optional[float] = None - reasoning: Optional[ReasoningConfig] = None - text: Optional[TextConfig] = None - max_tokens: Optional[int] = None - top_p: Optional[float] = None - - -class LLMConfig(SQLModel): - """LLM configuration containing model specification. - - This wraps the model spec and can be extended with additional - provider-agnostic configuration in the future. - """ - - prompt: str - vector_store_id: Optional[str] = None - llm_model_spec: LLMModelSpec - - -class LLMCallRequest(SQLModel): - """Request model for /v1/llm/call endpoint. - - This model decouples LLM calls from the assistants table, - allowing dynamic configuration per request. - - Structure: - - llm: LLMConfig (contains model_spec) - - prompt: The user's input - - vector_store_id: Optional vector store for RAG - - max_num_results: Number of results from vector store - """ - - llm: LLMConfig - max_num_results: int = 20 # For vector store file search - - -class LLMCallResponse(SQLModel): - """Response model for /v1/llm/call endpoint.""" - - status: str - response_id: str - message: str - model: str - input_tokens: int - output_tokens: int - total_tokens: int - file_search_results: Optional[list[dict]] = None diff --git a/backend/app/models/llm/config.py b/backend/app/models/llm/config.py index 3cbce300..e30bc93b 100644 --- a/backend/app/models/llm/config.py +++ b/backend/app/models/llm/config.py @@ -4,33 +4,10 @@ including model specifications and advanced configuration options. """ -from typing import Literal, Optional +from typing import Literal from sqlmodel import SQLModel -# Type definitions -ProviderType = Literal["openai", "anthropic", "google", "azure"] - - -class ReasoningConfig(SQLModel): - """Configuration for reasoning parameters (e.g., o-series models). - - Attributes: - effort: Reasoning effort level - "low", "medium", or "high" - """ - - effort: str # "low", "medium", "high" - - -class TextConfig(SQLModel): - """Configuration for text generation parameters. - - Attributes: - verbosity: Text verbosity level - "low", "medium", or "high" - """ - - verbosity: str # "low", "medium", "high" - class LLMModelSpec(SQLModel): """Specification for the LLM model and its parameters. @@ -42,19 +19,19 @@ class LLMModelSpec(SQLModel): model: Model identifier (e.g., "gpt-4", "claude-3-opus") provider: Provider type (openai, anthropic, google, azure) temperature: Sampling temperature (0.0-2.0) - reasoning: Optional reasoning configuration for o-series models - text: Optional text verbosity configuration + reasoning_effort: Reasoning effort level for o-series models ("low", "medium", "high") + text_verbosity: Text verbosity level ("low", "medium", "high") max_tokens: Maximum number of tokens to generate top_p: Nucleus sampling parameter (0.0-1.0) """ model: str - provider: ProviderType = "openai" - temperature: Optional[float] = None - reasoning: Optional[ReasoningConfig] = None - text: Optional[TextConfig] = None - max_tokens: Optional[int] = None - top_p: Optional[float] = None + provider: str = "openai" + temperature: float | None = None + reasoning_effort: Literal["low", "medium", "high"] | None = None + text_verbosity: Literal["low", "medium", "high"] | None = None + max_tokens: int | None = None + top_p: float | None = None class LLMConfig(SQLModel): @@ -65,10 +42,10 @@ class LLMConfig(SQLModel): Attributes: prompt: The user's input prompt - vector_store_id: Optional vector store ID for RAG functionality + vector_store_id: Vector store ID for RAG functionality llm_model_spec: Model specification and parameters """ prompt: str - vector_store_id: Optional[str] = None + vector_store_id: str | None = None llm_model_spec: LLMModelSpec diff --git a/backend/app/models/llm/response.py b/backend/app/models/llm/response.py index fc21413a..66159f60 100644 --- a/backend/app/models/llm/response.py +++ b/backend/app/models/llm/response.py @@ -2,9 +2,6 @@ This module contains response models for LLM API calls. """ - -from typing import Optional - from sqlmodel import SQLModel @@ -29,4 +26,3 @@ class LLMCallResponse(SQLModel): input_tokens: int output_tokens: int total_tokens: int - file_search_results: Optional[list[dict]] = None diff --git a/backend/app/models/llm/specs/__init__.py b/backend/app/models/llm/specs/__init__.py deleted file mode 100644 index f048fa69..00000000 --- a/backend/app/models/llm/specs/__init__.py +++ /dev/null @@ -1,20 +0,0 @@ -"""Model specifications module.""" - -from app.models.llm.specs.base import ( - EffortLevel, - ModelCapabilities, - ModelSpec, - ParameterSpec, - VerbosityLevel, -) -from app.models.llm.specs.registry import ModelSpecRegistry, model_spec_registry - -__all__ = [ - "ModelSpec", - "ModelCapabilities", - "ParameterSpec", - "ModelSpecRegistry", - "model_spec_registry", - "EffortLevel", - "VerbosityLevel", -] diff --git a/backend/app/models/llm/specs/base.py b/backend/app/models/llm/specs/base.py deleted file mode 100644 index e1b234d1..00000000 --- a/backend/app/models/llm/specs/base.py +++ /dev/null @@ -1,192 +0,0 @@ -"""Base model specification classes. - -This module defines the schema for LLM model specifications that declare: -- What parameters each model supports -- Valid ranges and types for parameters -- Provider-specific capabilities -- Validation rules for configurations -""" - -from typing import Any, Literal, Optional - -from pydantic import Field -from sqlmodel import SQLModel - - -# Parameter type definitions -EffortLevel = Literal["low", "medium", "high"] -VerbosityLevel = Literal["low", "medium", "high"] - - -class ParameterSpec(SQLModel): - """Specification for a single parameter. - - Attributes: - name: Parameter name - type: Parameter type (str, int, float, bool) - required: Whether parameter is required - default: Default value if not provided - min_value: Minimum value for numeric parameters - max_value: Maximum value for numeric parameters - allowed_values: List of allowed values for enum-like parameters - description: Human-readable parameter description - """ - - name: str = Field(description="Parameter name") - type: str = Field(description="Parameter type (str, int, float, bool)") - required: bool = Field(default=False, description="Whether parameter is required") - default: Optional[Any] = Field(default=None, description="Default value") - min_value: Optional[float] = Field( - default=None, description="Minimum value for numeric params" - ) - max_value: Optional[float] = Field( - default=None, description="Maximum value for numeric params" - ) - allowed_values: Optional[list[Any]] = Field( - default=None, description="List of allowed values" - ) - description: Optional[str] = Field(default=None, description="Parameter description") - - -class ModelCapabilities(SQLModel): - """Capabilities supported by a model. - - Attributes: - supports_reasoning: Whether the model supports reasoning configuration - supports_text_config: Whether the model supports text verbosity config - supports_file_search: Whether the model supports vector store file search - supports_function_calling: Whether the model supports function calling - supports_streaming: Whether the model supports streaming responses - supports_vision: Whether the model supports image inputs - """ - - supports_reasoning: bool = Field( - default=False, description="Supports reasoning configuration" - ) - supports_text_config: bool = Field( - default=False, description="Supports text verbosity config" - ) - supports_file_search: bool = Field( - default=False, description="Supports vector store file search" - ) - supports_function_calling: bool = Field( - default=False, description="Supports function calling" - ) - supports_streaming: bool = Field( - default=False, description="Supports streaming responses" - ) - supports_vision: bool = Field(default=False, description="Supports image inputs") - - -class ModelSpec(SQLModel): - """Complete specification for an LLM model. - - This is the single source of truth for what a model supports. - It defines capabilities, parameter constraints, and validation rules. - - Attributes: - model_name: Model identifier (e.g., 'gpt-4', 'claude-3-opus') - provider: Provider name (openai, anthropic, google, azure) - capabilities: What features this model supports - parameters: List of supported parameters with their constraints - """ - - model_config = {"protected_namespaces": ()} # Allow model_ prefix - - model_name: str = Field( - description="Model identifier (e.g., 'gpt-4', 'claude-3-opus')" - ) - provider: str = Field(description="Provider name (openai, anthropic, google, azure)") - capabilities: ModelCapabilities = Field( - description="What features this model supports" - ) - parameters: list[ParameterSpec] = Field( - default_factory=list, description="Supported parameters" - ) - - def validate_config(self, config: dict[str, Any]) -> tuple[bool, Optional[str]]: - """Validate a configuration against this model spec. - - Args: - config: Configuration dictionary to validate - - Returns: - Tuple of (is_valid, error_message) - - If valid: (True, None) - - If invalid: (False, error_message) - """ - # Build parameter lookup - param_specs = {p.name: p for p in self.parameters} - - # Check for unknown parameters - for key in config.keys(): - if key not in param_specs and key not in [ - "model", - "provider", - "prompt", - "vector_store_id", - ]: - return False, f"Unknown parameter '{key}' for model {self.model_name}" - - # Validate each parameter - for param_spec in self.parameters: - value = config.get(param_spec.name) - - # Check required parameters - if param_spec.required and value is None: - return False, f"Required parameter '{param_spec.name}' is missing" - - # Skip validation if value is None and not required - if value is None: - continue - - # Type validation - if param_spec.type == "int" and not isinstance(value, int): - return False, f"Parameter '{param_spec.name}' must be an integer" - elif param_spec.type == "float" and not isinstance(value, (int, float)): - return False, f"Parameter '{param_spec.name}' must be a number" - elif param_spec.type == "bool" and not isinstance(value, bool): - return False, f"Parameter '{param_spec.name}' must be a boolean" - elif param_spec.type == "str" and not isinstance(value, str): - return False, f"Parameter '{param_spec.name}' must be a string" - - # Range validation for numeric types - if param_spec.type in ["int", "float"]: - if param_spec.min_value is not None and value < param_spec.min_value: - return ( - False, - f"Parameter '{param_spec.name}' must be >= {param_spec.min_value}", - ) - if param_spec.max_value is not None and value > param_spec.max_value: - return ( - False, - f"Parameter '{param_spec.name}' must be <= {param_spec.max_value}", - ) - - # Allowed values validation - if param_spec.allowed_values is not None and value not in param_spec.allowed_values: - return ( - False, - f"Parameter '{param_spec.name}' must be one of {param_spec.allowed_values}", - ) - - return True, None - - def supports_feature(self, feature: str) -> bool: - """Check if this model supports a specific feature. - - Args: - feature: Feature name to check - - Returns: - True if feature is supported - """ - feature_map = { - "reasoning": self.capabilities.supports_reasoning, - "text_config": self.capabilities.supports_text_config, - "file_search": self.capabilities.supports_file_search, - "function_calling": self.capabilities.supports_function_calling, - "streaming": self.capabilities.supports_streaming, - "vision": self.capabilities.supports_vision, - } - return feature_map.get(feature, False) diff --git a/backend/app/models/llm/specs/registry.py b/backend/app/models/llm/specs/registry.py deleted file mode 100644 index 0e895c3e..00000000 --- a/backend/app/models/llm/specs/registry.py +++ /dev/null @@ -1,89 +0,0 @@ -"""Model specification registry. - -This module provides a centralized registry for managing model specifications. -""" - -from typing import Any, Optional - -from app.models.llm.specs.base import ModelSpec - - -class ModelSpecRegistry: - """Registry for managing model specifications. - - This is a singleton that holds all known model specs and provides - lookup and validation capabilities. - """ - - _instance: Optional["ModelSpecRegistry"] = None - _specs: dict[str, ModelSpec] = {} - - def __new__(cls) -> "ModelSpecRegistry": - """Ensure singleton pattern.""" - if cls._instance is None: - cls._instance = super().__new__(cls) - return cls._instance - - def register(self, spec: ModelSpec) -> None: - """Register a model specification. - - Args: - spec: Model specification to register - """ - key = f"{spec.provider}:{spec.model_name}" - self._specs[key] = spec - - def get_spec(self, provider: str, model_name: str) -> Optional[ModelSpec]: - """Get a model specification. - - Args: - provider: Provider name - model_name: Model name - - Returns: - ModelSpec if found, None otherwise - """ - key = f"{provider}:{model_name}" - return self._specs.get(key) - - def validate_config( - self, provider: str, model_name: str, config: dict[str, Any] - ) -> tuple[bool, Optional[str]]: - """Validate a configuration against the model spec. - - Args: - provider: Provider name - model_name: Model name - config: Configuration to validate - - Returns: - Tuple of (is_valid, error_message) - """ - spec = self.get_spec(provider, model_name) - if spec is None: - # If no spec found, we can't validate - allow it through - # This maintains backward compatibility with models we haven't spec'd yet - return True, None - - return spec.validate_config(config) - - def list_models(self, provider: Optional[str] = None) -> list[ModelSpec]: - """List all registered model specs. - - Args: - provider: Optional provider filter - - Returns: - List of model specs - """ - if provider: - return [spec for spec in self._specs.values() if spec.provider == provider] - return list(self._specs.values()) - - def clear(self) -> None: - """Clear all registered specs (mainly for testing).""" - self._specs.clear() - - -# Global registry instance -model_spec_registry = ModelSpecRegistry() diff --git a/backend/app/services/llm/README.md b/backend/app/services/llm/README.md deleted file mode 100644 index e2608bf4..00000000 --- a/backend/app/services/llm/README.md +++ /dev/null @@ -1,353 +0,0 @@ -# LLM Service Module - -A provider-agnostic interface for executing LLM calls. Currently supports OpenAI with an extensible architecture for future providers. - -## Architecture - -The LLM service follows a layered architecture with clear separation of concerns: - -``` -app/ -├── models/llm/ # Data models -│ ├── call.py # Database models -│ ├── config.py # Configuration models -│ ├── request.py # Request models -│ ├── response.py # Response models -│ └── specs/ # Model specifications -│ ├── base.py # Base spec classes -│ └── registry.py # Spec registry -│ -└── services/llm/ # Service layer - ├── __init__.py # Public API - ├── constants.py # Constants and enums - ├── exceptions.py # Custom exceptions - ├── orchestrator.py # Main entry point - ├── jobs.py # Celery job management - │ - ├── providers/ # Provider implementations - │ ├── base.py # Abstract base provider - │ ├── factory.py # Provider factory (extensible) - │ └── openai.py # OpenAI implementation - │ - ├── transformers/ # Request transformers - │ ├── base.py # Abstract transformer - │ ├── factory.py # Transformer factory (extensible) - │ └── openai.py # OpenAI transformer - │ - └── specs/ # Model specifications - ├── __init__.py # Spec initialization - └── openai.py # OpenAI model specs -``` - -## Key Components - -### 1. Orchestration Layer - -**`orchestrator.py`** - Main entry point for LLM calls -- Routes requests to appropriate providers -- Handles error handling and logging -- Provider-agnostic interface - -**`jobs.py`** - Celery job management -- Asynchronous job execution -- Job status tracking -- Integration with job queue - -### 2. Provider Layer - -**`BaseProvider`** - Abstract base class for all providers -- Defines standard interface -- Handles transformer integration -- Manages parameter building - -**`OpenAIProvider`** - OpenAI implementation -- GPT-4, GPT-3.5 models -- O-series reasoning models -- Vector store file search -- Full feature support - -**`ProviderFactory`** - Creates provider instances -- Supports provider registration for extensibility -- Runtime provider addition -- Currently registered: OpenAI - -### 3. Transformation Layer - -**`ConfigTransformer`** - Base transformer class -- Converts unified API to provider format -- Validates against model specs -- Extensible for new providers - -**`OpenAITransformer`** - OpenAI transformation -- Handles OpenAI Responses API format -- Supports reasoning configuration -- Vector store integration - -**`TransformerFactory`** - Creates transformer instances -- Loads model specs automatically -- Supports custom transformers - -### 4. Specification Layer - -**`ModelSpec`** - Model specification class -- Defines capabilities (reasoning, vision, etc.) -- Parameter constraints and validation -- Feature detection - -**`ModelSpecRegistry`** - Singleton registry -- Manages all model specs -- Provides lookup and validation -- Centralized spec storage - -### 5. Error Handling - -Custom exception hierarchy: -- `LLMServiceError` - Base exception -- `ProviderError` - Provider-specific errors -- `UnsupportedProviderError` - Unsupported provider -- `ValidationError` - Configuration validation -- `TransformationError` - Transformation failures -- `APICallError` - API call failures - -## Usage - -### Basic LLM Call - -```python -from app.services.llm import execute_llm_call -from app.models.llm import LLMCallRequest, LLMConfig, LLMModelSpec - -# Create request -request = LLMCallRequest( - llm=LLMConfig( - prompt="Explain quantum computing", - llm_model_spec=LLMModelSpec( - model="gpt-4", - provider="openai", - temperature=0.7, - max_tokens=500 - ) - ) -) - -# Execute call -response, error = execute_llm_call(request, openai_client) - -if response: - print(f"Response: {response.message}") - print(f"Tokens: {response.total_tokens}") -else: - print(f"Error: {error}") -``` - -### With Vector Store (RAG) - -```python -request = LLMCallRequest( - llm=LLMConfig( - prompt="What does the documentation say about authentication?", - vector_store_id="vs_abc123", - llm_model_spec=LLMModelSpec( - model="gpt-4", - provider="openai" - ) - ), - max_num_results=10 -) - -response, error = execute_llm_call(request, openai_client) - -# Access file search results -if response and response.file_search_results: - for result in response.file_search_results: - print(f"Score: {result['score']}, Text: {result['text']}") -``` - -### O-Series Models (Reasoning) - -```python -from app.models.llm import ReasoningConfig, TextConfig - -request = LLMCallRequest( - llm=LLMConfig( - prompt="Solve this complex problem...", - llm_model_spec=LLMModelSpec( - model="o1", - provider="openai", - reasoning=ReasoningConfig(effort="high"), - text=TextConfig(verbosity="medium") - ) - ) -) - -response, error = execute_llm_call(request, openai_client) -``` - -### Asynchronous Job - -```python -from app.services.llm.jobs import start_job - -# Schedule background job -job_id = start_job( - db=session, - request=request, - project_id=123, - organization_id=456 -) - -# Job runs asynchronously via Celery -print(f"Job scheduled: {job_id}") -``` - -## Adding New Providers - -### 1. Create Provider Implementation - -```python -# app/services/llm/providers/anthropic.py -from app.services.llm.providers.base import BaseProvider -from app.models.llm import LLMCallRequest, LLMCallResponse - -class AnthropicProvider(BaseProvider): - def execute(self, request: LLMCallRequest) -> tuple[LLMCallResponse | None, str | None]: - params = self.build_params(request) - response = self.client.messages.create(**params) - # Process response... - return llm_response, None -``` - -### 2. Create Transformer - -```python -# app/services/llm/transformers/anthropic.py -from app.services.llm.transformers.base import ConfigTransformer - -class AnthropicTransformer(ConfigTransformer): - def transform(self, request: LLMCallRequest) -> dict[str, Any]: - return { - "model": request.llm.llm_model_spec.model, - "messages": [{"role": "user", "content": request.llm.prompt}], - "max_tokens": request.llm.llm_model_spec.max_tokens or 1024, - # ... other Anthropic-specific params - } -``` - -### 3. Create Model Specs - -```python -# app/services/llm/specs/anthropic.py -from app.models.llm.specs import ModelSpec, ModelCapabilities, ParameterSpec - -def create_anthropic_specs() -> list[ModelSpec]: - return [ - ModelSpec( - model_name="claude-3-opus", - provider="anthropic", - capabilities=ModelCapabilities( - supports_streaming=True, - supports_vision=True, - # ... - ), - parameters=[ - ParameterSpec(name="temperature", type="float", min_value=0.0, max_value=1.0), - # ... - ] - ) - ] -``` - -### 4. Register Components - -```python -# Update factories -from app.services.llm.providers.factory import ProviderFactory -from app.services.llm.transformers.factory import TransformerFactory - -ProviderFactory.register_provider("anthropic", AnthropicProvider) -TransformerFactory.register_transformer("anthropic", AnthropicTransformer) -``` - -## Configuration - -### Constants - -Edit `constants.py` to update default values: - -```python -DEFAULT_TEMPERATURE = 1.0 -DEFAULT_TOP_P = 1.0 -DEFAULT_MAX_RESULTS = 20 -``` - -### Supported Providers - -Currently supported: `openai` - -The architecture is designed to be extensible. Future providers can be added following the pattern in "Adding New Providers" section. - -## Testing - -```python -# Test with mock client -from unittest.mock import Mock - -mock_client = Mock() -mock_client.responses.create.return_value = Mock( - id="resp_123", - output_text="Test response", - model="gpt-4", - usage=Mock(input_tokens=10, output_tokens=20, total_tokens=30) -) - -response, error = execute_llm_call(request, mock_client) -assert response.message == "Test response" -``` - -## Best Practices - -1. **Always use model specs** - Enable validation for production code -2. **Handle errors gracefully** - Check for both response and error -3. **Use type hints** - Maintain type safety throughout -4. **Log appropriately** - Use structured logging for debugging -5. **Follow the architecture** - Don't bypass the abstraction layers -6. **Add tests** - Test new providers and transformers thoroughly - -## Future Enhancements - -- [ ] Streaming response support -- [ ] Function calling for all providers -- [ ] Batch request processing -- [ ] Response caching -- [ ] Rate limiting -- [ ] Cost tracking -- [ ] Provider failover -- [ ] A/B testing between providers - -## Troubleshooting - -### Common Issues - -**Import errors after refactoring** -- Ensure old files are removed -- Check `__init__.py` exports -- Clear Python cache: `find . -type d -name __pycache__ -exec rm -r {} +` - -**Validation errors** -- Check model spec definitions -- Verify parameter constraints -- Use `model_spec.validate_config()` for debugging - -**Provider not found** -- Ensure provider is registered in factory -- Check provider name spelling -- Verify provider is in `SUPPORTED_PROVIDERS` - -## Contributing - -When adding new features: -1. Update relevant specs -2. Add comprehensive docstrings -3. Update this README -4. Add tests -5. Follow existing patterns diff --git a/backend/app/services/llm/__init__.py b/backend/app/services/llm/__init__.py index c7571bd3..f454b459 100644 --- a/backend/app/services/llm/__init__.py +++ b/backend/app/services/llm/__init__.py @@ -1,72 +1,3 @@ -"""LLM services module. - -This module provides a provider-agnostic interface for executing LLM calls. -Currently supports OpenAI with an extensible architecture for future providers. - -Architecture: ------------ -The LLM service follows a layered architecture with clear separation of concerns: - -1. **Models Layer** (`app.models.llm`) - - Request/Response models - - Configuration models - - Model specifications - -2. **Orchestration Layer** - - `orchestrator.py`: Main entry point for LLM calls - - `jobs.py`: Celery job management - -3. **Provider Layer** (`providers/`) - - `base.py`: Abstract base provider - - `openai.py`: OpenAI implementation - - `factory.py`: Provider factory (extensible) - -4. **Transformation Layer** (`transformers/`) - - `base.py`: Abstract transformer - - `openai.py`: OpenAI transformer - - `factory.py`: Transformer factory (extensible) - -5. **Specification Layer** (`specs/`) - - `openai.py`: OpenAI model specs - - Model capability definitions - - Parameter validation rules - -Key Components: --------------- -- execute_llm_call: Main entry point for LLM API calls -- BaseProvider: Abstract base class for all providers -- ConfigTransformer: Base class for request transformation -- ModelSpec: Model specification with validation -- ProviderFactory: Factory for creating provider instances -- TransformerFactory: Factory for creating transformers - -The architecture uses specification-driven configuration with: -1. Model specs defining capabilities and parameter constraints -2. Transformers converting unified API contracts to provider-specific formats -3. Automatic validation against model specifications -4. Custom exceptions for better error handling - -Usage Example: -------------- -```python -from app.services.llm import execute_llm_call -from app.models.llm import LLMCallRequest, LLMConfig, LLMModelSpec - -request = LLMCallRequest( - llm=LLMConfig( - prompt="Hello, world!", - llm_model_spec=LLMModelSpec( - model="gpt-4", - provider="openai", - temperature=0.7 - ) - ) -) - -response, error = execute_llm_call(request, openai_client) -``` -""" - # Main orchestration from app.services.llm.orchestrator import execute_llm_call @@ -77,28 +8,6 @@ OpenAIProvider, ) -# Transformers -from app.services.llm.transformers import ( - ConfigTransformer, - TransformerFactory, - OpenAITransformer, -) - -# Constants and exceptions -from app.services.llm.constants import ( - ProviderType, - EffortLevel, - VerbosityLevel, - SUPPORTED_PROVIDERS, -) -from app.services.llm.exceptions import ( - LLMServiceError, - ProviderError, - UnsupportedProviderError, - ValidationError, - TransformationError, - APICallError, -) # Initialize model specs on module import import app.services.llm.specs # noqa: F401 @@ -110,20 +19,4 @@ "BaseProvider", "ProviderFactory", "OpenAIProvider", - # Transformers - "ConfigTransformer", - "TransformerFactory", - "OpenAITransformer", - # Constants - "ProviderType", - "EffortLevel", - "VerbosityLevel", - "SUPPORTED_PROVIDERS", - # Exceptions - "LLMServiceError", - "ProviderError", - "UnsupportedProviderError", - "ValidationError", - "TransformationError", - "APICallError", ] diff --git a/backend/app/services/llm/constants.py b/backend/app/services/llm/constants.py deleted file mode 100644 index 2aabd861..00000000 --- a/backend/app/services/llm/constants.py +++ /dev/null @@ -1,54 +0,0 @@ -"""Constants and type definitions for the LLM module. - -This module centralizes all constants, enums, and type definitions used -across the LLM service layer. -""" - -from typing import Literal - -# Provider type definitions -ProviderType = Literal["openai"] - -# Reasoning effort levels for o-series models -EffortLevel = Literal["low", "medium", "high"] - -# Text verbosity levels -VerbosityLevel = Literal["low", "medium", "high"] - -# Job status -JobStatusType = Literal["pending", "processing", "success", "failed"] - -# Default parameter values -DEFAULT_TEMPERATURE = 1.0 -DEFAULT_TOP_P = 1.0 -DEFAULT_MAX_RESULTS = 20 - -# Parameter constraints -MIN_TEMPERATURE = 0.0 -MAX_TEMPERATURE = 2.0 -MIN_TOP_P = 0.0 -MAX_TOP_P = 1.0 -MIN_MAX_TOKENS = 1 -MAX_MAX_TOKENS = 128000 - -# Supported providers -SUPPORTED_PROVIDERS = ["openai"] - -# Error messages -ERROR_UNSUPPORTED_PROVIDER = "Provider '{provider}' is not supported. Supported: {supported}" -ERROR_VALIDATION_FAILED = "Configuration validation failed: {details}" -ERROR_TRANSFORMATION_FAILED = "Failed to transform request: {details}" -ERROR_API_CALL_FAILED = "API call failed: {details}" -ERROR_UNKNOWN_PARAMETER = "Unknown parameter '{param}' for model {model}" -ERROR_REQUIRED_PARAMETER = "Required parameter '{param}' is missing" -ERROR_INVALID_TYPE = "Parameter '{param}' must be {expected_type}" -ERROR_OUT_OF_RANGE = "Parameter '{param}' must be between {min_val} and {max_val}" -ERROR_INVALID_VALUE = "Parameter '{param}' must be one of {allowed_values}" - -# Feature names for capability checks -FEATURE_REASONING = "reasoning" -FEATURE_TEXT_CONFIG = "text_config" -FEATURE_FILE_SEARCH = "file_search" -FEATURE_FUNCTION_CALLING = "function_calling" -FEATURE_STREAMING = "streaming" -FEATURE_VISION = "vision" diff --git a/backend/app/services/llm/exceptions.py b/backend/app/services/llm/exceptions.py deleted file mode 100644 index 045176af..00000000 --- a/backend/app/services/llm/exceptions.py +++ /dev/null @@ -1,130 +0,0 @@ -"""Custom exceptions for the LLM module. - -This module defines all custom exceptions used throughout the LLM service layer, -providing better error handling and more descriptive error messages. -""" - - -class LLMServiceError(Exception): - """Base exception for all LLM service errors.""" - - pass - - -class ProviderError(LLMServiceError): - """Raised when there's an error with the provider configuration or execution.""" - - def __init__(self, provider: str, message: str): - self.provider = provider - self.message = message - super().__init__(f"Provider '{provider}' error: {message}") - - -class UnsupportedProviderError(ProviderError): - """Raised when an unsupported provider is requested.""" - - def __init__(self, provider: str, supported_providers: list[str]): - self.supported_providers = supported_providers - message = f"Unsupported provider. Supported: {', '.join(supported_providers)}" - super().__init__(provider, message) - - -class ValidationError(LLMServiceError): - """Raised when configuration validation fails.""" - - def __init__(self, message: str, parameter: str | None = None): - self.parameter = parameter - self.message = message - error_msg = f"Validation error" - if parameter: - error_msg += f" for parameter '{parameter}'" - error_msg += f": {message}" - super().__init__(error_msg) - - -class TransformationError(LLMServiceError): - """Raised when request transformation fails.""" - - def __init__(self, provider: str, message: str): - self.provider = provider - self.message = message - super().__init__(f"Transformation error for {provider}: {message}") - - -class ModelSpecNotFoundError(LLMServiceError): - """Raised when a model specification is not found.""" - - def __init__(self, provider: str, model_name: str): - self.provider = provider - self.model_name = model_name - super().__init__( - f"Model spec not found for provider '{provider}', model '{model_name}'" - ) - - -class APICallError(LLMServiceError): - """Raised when an API call to the provider fails.""" - - def __init__(self, provider: str, message: str, original_error: Exception | None = None): - self.provider = provider - self.message = message - self.original_error = original_error - super().__init__(f"API call failed for {provider}: {message}") - - -class ParameterError(ValidationError): - """Raised when there's an error with a specific parameter.""" - - def __init__(self, parameter: str, message: str): - super().__init__(message, parameter) - - -class RequiredParameterError(ParameterError): - """Raised when a required parameter is missing.""" - - def __init__(self, parameter: str): - super().__init__(parameter, f"Required parameter '{parameter}' is missing") - - -class InvalidParameterTypeError(ParameterError): - """Raised when a parameter has an invalid type.""" - - def __init__(self, parameter: str, expected_type: str, actual_type: str): - self.expected_type = expected_type - self.actual_type = actual_type - super().__init__( - parameter, - f"Must be {expected_type}, got {actual_type}" - ) - - -class ParameterOutOfRangeError(ParameterError): - """Raised when a parameter value is out of allowed range.""" - - def __init__(self, parameter: str, value: float, min_value: float | None, max_value: float | None): - self.value = value - self.min_value = min_value - self.max_value = max_value - - if min_value is not None and max_value is not None: - msg = f"Value {value} is out of range [{min_value}, {max_value}]" - elif min_value is not None: - msg = f"Value {value} must be >= {min_value}" - elif max_value is not None: - msg = f"Value {value} must be <= {max_value}" - else: - msg = f"Value {value} is invalid" - - super().__init__(parameter, msg) - - -class InvalidParameterValueError(ParameterError): - """Raised when a parameter has an invalid value.""" - - def __init__(self, parameter: str, value: any, allowed_values: list): - self.value = value - self.allowed_values = allowed_values - super().__init__( - parameter, - f"Value '{value}' is not allowed. Must be one of: {allowed_values}" - ) diff --git a/backend/app/services/llm/orchestrator.py b/backend/app/services/llm/orchestrator.py index 085c38e0..2b0f8b22 100644 --- a/backend/app/services/llm/orchestrator.py +++ b/backend/app/services/llm/orchestrator.py @@ -10,7 +10,6 @@ from app.models.llm import LLMCallRequest, LLMCallResponse from app.services.llm.providers.factory import ProviderFactory -from app.services.llm.exceptions import LLMServiceError logger = logging.getLogger(__name__) @@ -73,11 +72,6 @@ def execute_llm_call( return response, error - except LLMServiceError as e: - error_message = str(e) - logger.error(f"[execute_llm_call] LLM service error: {error_message}") - return None, error_message - except Exception as e: error_message = f"Unexpected error in LLM service: {str(e)}" logger.error(f"[execute_llm_call] {error_message}", exc_info=True) diff --git a/backend/app/services/llm/providers/base.py b/backend/app/services/llm/providers/base.py index 963e5f4c..7492f5fe 100644 --- a/backend/app/services/llm/providers/base.py +++ b/backend/app/services/llm/providers/base.py @@ -6,11 +6,9 @@ """ from abc import ABC, abstractmethod -from typing import Any, Optional +from typing import Any from app.models.llm import LLMCallRequest, LLMCallResponse -from app.services.llm.transformers.base import ConfigTransformer -from app.services.llm.transformers.factory import TransformerFactory class BaseProvider(ABC): @@ -19,60 +17,20 @@ class BaseProvider(ABC): All provider implementations (OpenAI, Anthropic, etc.) must inherit from this class and implement the required methods. - This provider uses a transformer-based architecture where configuration - transformation is separated from the provider execution logic. + Each provider uses its own spec class for parameter validation and conversion + to the provider's API format. Attributes: client: The provider-specific client instance - transformer: ConfigTransformer for converting requests to provider format """ - def __init__(self, client: Any, transformer: Optional[ConfigTransformer] = None): - """Initialize the provider with client and optional transformer. + def __init__(self, client: Any): + """Initialize the provider with client. Args: client: Provider-specific client (e.g., OpenAI, Anthropic client) - transformer: Optional config transformer. If not provided, one will - be created using the TransformerFactory. """ self.client = client - self.transformer = transformer - - def _get_transformer(self, request: LLMCallRequest) -> ConfigTransformer: - """Get or create a transformer for this request. - - Args: - request: LLM call request - - Returns: - ConfigTransformer instance - """ - if self.transformer is None: - # Create transformer using factory - provider_name = self.get_provider_name() - model_name = request.llm.llm_model_spec.model - self.transformer = TransformerFactory.create_transformer( - provider=provider_name, - model_name=model_name, - use_spec=True, - ) - return self.transformer - - def build_params(self, request: LLMCallRequest) -> dict[str, Any]: - """Build provider-specific API parameters from the request. - - This method uses the transformer to convert the request. - Providers can override this if they need custom logic, but the - default implementation uses the transformer. - - Args: - request: LLM call request with configuration - - Returns: - Dictionary of provider-specific parameters - """ - transformer = self._get_transformer(request) - return transformer.validate_and_transform(request) @abstractmethod def execute( diff --git a/backend/app/services/llm/providers/factory.py b/backend/app/services/llm/providers/factory.py index 98c5f6a1..6dd9bb3b 100644 --- a/backend/app/services/llm/providers/factory.py +++ b/backend/app/services/llm/providers/factory.py @@ -8,7 +8,6 @@ from typing import Any from app.models.llm import ProviderType -from app.services.llm.exceptions import UnsupportedProviderError from app.services.llm.providers.base import BaseProvider from app.services.llm.providers.openai import OpenAIProvider @@ -34,9 +33,7 @@ class ProviderFactory: } @classmethod - def create_provider( - cls, provider_type: ProviderType, client: Any - ) -> BaseProvider: + def create_provider(cls, provider_type: ProviderType, client: Any) -> BaseProvider: """Create a provider instance based on the provider type. Args: @@ -44,17 +41,18 @@ def create_provider( client: Provider-specific client instance Returns: - Instance of the appropriate provider + Instance of the appropriate provider (BaseProvider) Raises: - UnsupportedProviderError: If the provider type is not supported + ValueError: If the provider type is not supported """ provider_class = cls._PROVIDERS.get(provider_type) if provider_class is None: - raise UnsupportedProviderError( - provider=provider_type, - supported_providers=cls.get_supported_providers() + supported = cls.get_supported_providers() + raise ValueError( + f"Provider '{provider_type}' is not supported. " + f"Supported providers: {', '.join(supported)}" ) logger.info(f"[ProviderFactory] Creating {provider_type} provider instance") @@ -68,27 +66,3 @@ def get_supported_providers(cls) -> list[str]: List of supported provider type strings """ return list(cls._PROVIDERS.keys()) - - @classmethod - def register_provider( - cls, provider_type: str, provider_class: type[BaseProvider] - ) -> None: - """Register a new provider type. - - This allows for runtime registration of new providers, useful for - plugins or extensions. - - Args: - provider_type: Type identifier for the provider - provider_class: Provider class that implements BaseProvider - - Raises: - TypeError: If provider_class doesn't inherit from BaseProvider - """ - if not issubclass(provider_class, BaseProvider): - raise TypeError( - f"{provider_class.__name__} must inherit from BaseProvider" - ) - - logger.info(f"[ProviderFactory] Registering provider: {provider_type}") - cls._PROVIDERS[provider_type] = provider_class diff --git a/backend/app/services/llm/providers/openai.py b/backend/app/services/llm/providers/openai.py index b3da475a..e680bbb4 100644 --- a/backend/app/services/llm/providers/openai.py +++ b/backend/app/services/llm/providers/openai.py @@ -4,19 +4,19 @@ including support for standard models, o-series models with reasoning, and file search capabilities. -Uses spec-based transformation for configuration conversion. +Uses OpenAISpec for parameter validation and API conversion. """ import logging -from typing import Optional import openai from openai import OpenAI from openai.types.responses.response import Response +from pydantic import ValidationError from app.models.llm import LLMCallRequest, LLMCallResponse from app.services.llm.providers.base import BaseProvider -from app.services.llm.transformers.base import ConfigTransformer +from app.services.llm.specs import OpenAISpec from app.utils import handle_openai_error logger = logging.getLogger(__name__) @@ -31,42 +31,24 @@ class OpenAIProvider(BaseProvider): - Text configuration for verbosity control - Vector store file search integration - Uses OpenAITransformer for configuration conversion. + Uses OpenAISpec for parameter validation and conversion. """ - def __init__(self, client: OpenAI, transformer: Optional[ConfigTransformer] = None): - """Initialize OpenAI provider with client and optional transformer. + def __init__(self, client: OpenAI): + """Initialize OpenAI provider with client. Args: client: OpenAI client instance - transformer: Optional config transformer (will auto-create if not provided) """ - super().__init__(client, transformer) - - def _extract_file_search_results(self, response: Response) -> list[dict]: - """Extract file search results from OpenAI response. - - Args: - response: OpenAI response object - - Returns: - List of dicts with 'score' and 'text' fields - """ - results = [] - for tool_call in response.output: - if tool_call.type == "file_search_call": - results.extend( - {"score": hit.score, "text": hit.text} for hit in tool_call.results - ) - return results + super().__init__(client) + self.client = client def execute( self, request: LLMCallRequest ) -> tuple[LLMCallResponse | None, str | None]: """Execute OpenAI API call. - Uses the transformer to convert the request to OpenAI format, - with automatic validation against model specs. + Uses OpenAISpec to validate and convert the request to OpenAI format. Args: request: LLM call request with configuration @@ -80,20 +62,14 @@ def execute( error_message: str | None = None try: - # Extract model spec for easier access - model_spec = request.llm.llm_model_spec + # Create and validate OpenAI spec from request + spec = OpenAISpec.from_llm_request(request) - # Build parameters using transformer (includes validation) - params = self.build_params(request) - logger.info( - f"[OpenAIProvider] Making OpenAI call with model: {model_spec.model}" - ) - response = self.client.responses.create(**params) + # Convert to API parameters (validation happens during spec creation) + params = spec.to_api_params() - # Extract file search results if vector store was used - file_search_results = None - if request.llm.vector_store_id: - file_search_results = self._extract_file_search_results(response) + logger.info(f"[OpenAIProvider] Making OpenAI call with model: {spec.model}") + response = self.client.responses.create(**params) # Build response llm_response = LLMCallResponse( @@ -104,7 +80,6 @@ def execute( input_tokens=response.usage.input_tokens, output_tokens=response.usage.output_tokens, total_tokens=response.usage.total_tokens, - file_search_results=file_search_results, ) logger.info( @@ -112,6 +87,11 @@ def execute( ) return llm_response, None + except ValidationError as e: + error_message = f"Configuration validation failed: {str(e)}" + logger.error(f"[OpenAIProvider] {error_message}", exc_info=True) + return None, error_message + except ValueError as e: error_message = f"Configuration validation failed: {str(e)}" logger.error(f"[OpenAIProvider] {error_message}", exc_info=True) diff --git a/backend/app/services/llm/specs/__init__.py b/backend/app/services/llm/specs/__init__.py index 87de64d4..f4f6c6cf 100644 --- a/backend/app/services/llm/specs/__init__.py +++ b/backend/app/services/llm/specs/__init__.py @@ -1,26 +1,3 @@ -"""Model specifications initialization. +from .openai import OpenAISpec -This module initializes all model specifications and registers them -with the global registry. -""" - -from app.services.llm.specs.openai import register_openai_specs - - -def initialize_model_specs() -> None: - """Initialize and register all model specifications. - - This should be called during application startup to ensure all - model specs are available for validation and transformation. - """ - # Register OpenAI specs - register_openai_specs() - - # Future: Register other provider specs - # register_anthropic_specs() - # register_google_specs() - # register_azure_specs() - - -# Auto-initialize when module is imported -initialize_model_specs() +__all__ = ["OpenAISpec"] diff --git a/backend/app/services/llm/specs/openai.py b/backend/app/services/llm/specs/openai.py index 655731c2..1b6d711b 100644 --- a/backend/app/services/llm/specs/openai.py +++ b/backend/app/services/llm/specs/openai.py @@ -1,151 +1,151 @@ -"""OpenAI model specifications. +"""OpenAI specification model. -This module contains specifications for OpenAI models including GPT-4, -GPT-3.5, and o-series models with reasoning capabilities. +This module defines the OpenAI-specific parameter specification with built-in +validation and conversion to API format. """ -from app.models.llm.specs import ( - ModelCapabilities, - ModelSpec, - ParameterSpec, - model_spec_registry, -) +from typing import Any, Literal +from pydantic import Field, model_validator +from sqlmodel import SQLModel -def create_openai_specs() -> list[ModelSpec]: - """Create specifications for OpenAI models. +from app.models.llm.request import LLMCallRequest - Returns: - List of ModelSpec objects for OpenAI models - """ - specs = [] - - # Standard parameters for most OpenAI models - standard_params = [ - ParameterSpec( - name="temperature", - type="float", - required=False, - min_value=0.0, - max_value=2.0, - default=1.0, - description="Sampling temperature (0-2). Higher values make output more random.", - ), - ParameterSpec( - name="max_tokens", - type="int", - required=False, - min_value=1, - max_value=128000, - description="Maximum number of tokens to generate.", - ), - ParameterSpec( - name="top_p", - type="float", - required=False, - min_value=0.0, - max_value=1.0, - default=1.0, - description="Nucleus sampling parameter.", - ), - ] - - # GPT-4 models - gpt4_models = [ - "gpt-4", - "gpt-4-turbo", - "gpt-4-turbo-preview", - "gpt-4o", - "gpt-4o-mini", - ] - - for model_name in gpt4_models: - specs.append( - ModelSpec( - model_name=model_name, - provider="openai", - capabilities=ModelCapabilities( - supports_reasoning=False, - supports_text_config=False, - supports_file_search=True, - supports_function_calling=True, - supports_streaming=True, - supports_vision=True - if "4o" in model_name or "vision" in model_name - else False, - ), - parameters=standard_params.copy(), - ) - ) - # GPT-3.5 models - gpt35_models = ["gpt-3.5-turbo", "gpt-3.5-turbo-16k"] - - for model_name in gpt35_models: - specs.append( - ModelSpec( - model_name=model_name, - provider="openai", - capabilities=ModelCapabilities( - supports_reasoning=False, - supports_text_config=False, - supports_file_search=True, - supports_function_calling=True, - supports_streaming=True, - supports_vision=False, - ), - parameters=standard_params.copy(), - ) - ) +class OpenAISpec(SQLModel): + """OpenAI API specification with validation. - # O-series models (reasoning models) - o_series_params = standard_params.copy() + [ - ParameterSpec( - name="reasoning", - type="str", - required=False, - allowed_values=["low", "medium", "high"], - description="Reasoning effort level for o-series models.", - ), - ParameterSpec( - name="text", - type="str", - required=False, - allowed_values=["low", "medium", "high"], - description="Text verbosity level for o-series models.", - ), - ] - - o_series_models = [ - "o1", - "o1-preview", - "o1-mini", - "o3", - "o3-mini", - ] - - for model_name in o_series_models: - specs.append( - ModelSpec( - model_name=model_name, - provider="openai", - capabilities=ModelCapabilities( - supports_reasoning=True, - supports_text_config=True, - supports_file_search=True, - supports_function_calling=False, # o-series don't support functions yet - supports_streaming=True, - supports_vision=False, - ), - parameters=o_series_params.copy(), - ) - ) - - return specs + This model defines all OpenAI-specific parameters with their constraints, + provides validation, and handles conversion to OpenAI API format. + Attributes: + model: Model identifier (e.g., "gpt-4", "gpt-3.5-turbo", "o1-preview") + prompt: The user's input prompt + temperature: Sampling temperature (0.0-2.0) + max_tokens: Maximum number of tokens to generate (must be positive) + top_p: Nucleus sampling parameter (0.0-1.0) + reasoning_effort: Optional reasoning effort level for o-series models ("low", "medium", "high") + text_verbosity: Optional text verbosity level ("low", "medium", "high") + vector_store_id: Optional vector store ID for file search + max_num_results: Maximum number of file search results (1-50) + """ -def register_openai_specs() -> None: - """Register all OpenAI model specs with the global registry.""" - specs = create_openai_specs() - for spec in specs: - model_spec_registry.register(spec) + # Required parameters + model: str = Field(description="Model identifier") + prompt: str = Field(description="User input prompt") + + # Optional standard parameters + temperature: float | None = Field( + default=None, + ge=0.0, + le=2.0, + description="Sampling temperature between 0.0 and 2.0", + ) + max_tokens: int | None = Field( + default=None, gt=0, description="Maximum tokens to generate" + ) + top_p: float | None = Field( + default=None, ge=0.0, le=1.0, description="Nucleus sampling parameter" + ) + + # Advanced OpenAI-specific parameters + reasoning_effort: Literal["low", "medium", "high"] | None = Field( + default=None, description="Reasoning effort level for o-series models" + ) + text_verbosity: Literal["low", "medium", "high"] | None = Field( + default=None, description="Text verbosity level" + ) + + # Vector store file search + vector_store_id: str | None = Field( + default=None, description="Vector store ID for file search" + ) + max_num_results: int | None = Field( + default=None, ge=1, le=50, description="Max file search results" + ) + + @model_validator(mode="after") + def validate_vector_store(self) -> "OpenAISpec": + """Validate vector store configuration. + + Ensures that if vector_store_id is provided, it's a valid non-empty string. + + Returns: + Self for method chaining + + Raises: + ValueError: If vector_store_id is invalid + """ + if self.vector_store_id is not None and not self.vector_store_id.strip(): + raise ValueError("vector_store_id cannot be empty") + return self + + def to_api_params(self) -> dict[str, Any]: + """Convert to OpenAI API parameters. + + Transforms this spec into the format expected by OpenAI's Responses API. + + Returns: + Dictionary of API parameters ready for openai.responses.create() + """ + # Base parameters - always required + params: dict[str, Any] = { + "model": self.model, + "input": [{"role": "user", "content": self.prompt}], + } + + # Add optional standard parameters + if self.temperature is not None: + params["temperature"] = self.temperature + + if self.max_tokens is not None: + params["max_tokens"] = self.max_tokens + + if self.top_p is not None: + params["top_p"] = self.top_p + + # Add advanced OpenAI configurations + if self.reasoning_effort is not None: + params["reasoning"] = {"effort": self.reasoning_effort} + + if self.text_verbosity is not None: + params["text"] = {"verbosity": self.text_verbosity} + + # Add vector store file search if provided + if self.vector_store_id: + params["tools"] = [ + { + "type": "file_search", + "vector_store_ids": [self.vector_store_id], + "max_num_results": self.max_num_results or 20, + } + ] + params["include"] = ["file_search_call.results"] + + return params + + @classmethod + def from_llm_request(cls, request: "LLMCallRequest") -> "OpenAISpec": + """Create OpenAISpec from LLMCallRequest. + + Convenience method to convert from the unified API request format. + + Args: + request: Unified LLM call request + + Returns: + OpenAISpec instance + """ + model_spec = request.llm.llm_model_spec + + return cls( + model=model_spec.model, + prompt=request.llm.prompt, + temperature=model_spec.temperature, + max_tokens=model_spec.max_tokens, + top_p=model_spec.top_p, + reasoning_effort=model_spec.reasoning_effort, + text_verbosity=model_spec.text_verbosity, + vector_store_id=request.llm.vector_store_id, + max_num_results=request.max_num_results, + ) diff --git a/backend/app/services/llm/transformers/__init__.py b/backend/app/services/llm/transformers/__init__.py deleted file mode 100644 index a43f8ff8..00000000 --- a/backend/app/services/llm/transformers/__init__.py +++ /dev/null @@ -1,16 +0,0 @@ -"""LLM transformers module. - -This module contains all transformer implementations for converting -unified API contracts to provider-specific formats. -Currently supports OpenAI with an extensible factory pattern for future providers. -""" - -from app.services.llm.transformers.base import ConfigTransformer -from app.services.llm.transformers.factory import TransformerFactory -from app.services.llm.transformers.openai import OpenAITransformer - -__all__ = [ - "ConfigTransformer", - "TransformerFactory", - "OpenAITransformer", -] diff --git a/backend/app/services/llm/transformers/base.py b/backend/app/services/llm/transformers/base.py deleted file mode 100644 index a29bd149..00000000 --- a/backend/app/services/llm/transformers/base.py +++ /dev/null @@ -1,84 +0,0 @@ -"""Base configuration transformer for LLM providers. - -This module provides the transformation logic to convert from the unified -API contract to provider-specific configurations. It uses model specs to -guide the transformation and validation process. -""" - -from abc import ABC, abstractmethod -from typing import Any, Optional - -from app.models.llm import LLMCallRequest -from app.models.llm.specs import ModelSpec, model_spec_registry - - -class ConfigTransformer(ABC): - """Base class for provider-specific config transformers. - - Each provider (OpenAI, Anthropic, etc.) should implement a transformer - that knows how to convert our unified API contract into that provider's - specific API format. - - Attributes: - model_spec: Optional model specification for validation - """ - - def __init__(self, model_spec: Optional[ModelSpec] = None): - """Initialize transformer with optional model spec. - - Args: - model_spec: Optional model specification for validation - """ - self.model_spec = model_spec - - @abstractmethod - def transform(self, request: LLMCallRequest) -> dict[str, Any]: - """Transform unified request to provider-specific parameters. - - Args: - request: Unified LLM call request - - Returns: - Provider-specific parameter dictionary - - Raises: - ValueError: If transformation fails or validation errors occur - """ - raise NotImplementedError("Transformers must implement transform method") - - def validate_and_transform(self, request: LLMCallRequest) -> dict[str, Any]: - """Validate request against model spec and transform. - - Args: - request: Unified LLM call request - - Returns: - Provider-specific parameter dictionary - - Raises: - ValueError: If validation fails - """ - # If we have a model spec, validate the config - if self.model_spec: - config = { - "model": request.llm.llm_model_spec.model, - "provider": request.llm.llm_model_spec.provider, - "temperature": request.llm.llm_model_spec.temperature, - "max_tokens": request.llm.llm_model_spec.max_tokens, - "top_p": request.llm.llm_model_spec.top_p, - } - - # Add advanced configs if present - if request.llm.llm_model_spec.reasoning: - config["reasoning"] = request.llm.llm_model_spec.reasoning.effort - - if request.llm.llm_model_spec.text: - config["text"] = request.llm.llm_model_spec.text.verbosity - - # Validate against spec - is_valid, error_msg = self.model_spec.validate_config(config) - if not is_valid: - raise ValueError(f"Configuration validation failed: {error_msg}") - - # Perform transformation - return self.transform(request) diff --git a/backend/app/services/llm/transformers/factory.py b/backend/app/services/llm/transformers/factory.py deleted file mode 100644 index 5ed7c0b8..00000000 --- a/backend/app/services/llm/transformers/factory.py +++ /dev/null @@ -1,89 +0,0 @@ -"""Transformer factory for creating configuration transformers. - -This module provides a factory for instantiating the appropriate -transformer based on the provider type. -""" - -from typing import Optional - -from app.models.llm.specs import model_spec_registry -from app.services.llm.exceptions import UnsupportedProviderError -from app.services.llm.transformers.base import ConfigTransformer -from app.services.llm.transformers.openai import OpenAITransformer - - -class TransformerFactory: - """Factory for creating transformer instances. - - This factory creates the appropriate transformer based on the provider type - and optionally uses model specs for validation. - """ - - _TRANSFORMERS: dict[str, type[ConfigTransformer]] = { - "openai": OpenAITransformer, - # Future transformers can be added here: - # "anthropic": AnthropicTransformer, - # "google": GoogleTransformer, - } - - @classmethod - def create_transformer( - cls, - provider: str, - model_name: Optional[str] = None, - use_spec: bool = True, - ) -> ConfigTransformer: - """Create a transformer instance for the given provider. - - Args: - provider: Provider name (openai, anthropic, google, azure) - model_name: Optional model name to load spec for validation - use_spec: Whether to use model spec for validation (default: True) - - Returns: - ConfigTransformer instance - - Raises: - UnsupportedProviderError: If provider is not supported - """ - transformer_class = cls._TRANSFORMERS.get(provider.lower()) - if transformer_class is None: - raise UnsupportedProviderError( - provider=provider, - supported_providers=cls.get_supported_providers() - ) - - # Load model spec if available and requested - model_spec = None - if use_spec and model_name: - model_spec = model_spec_registry.get_spec(provider.lower(), model_name) - - return transformer_class(model_spec=model_spec) - - @classmethod - def get_supported_providers(cls) -> list[str]: - """Get list of supported provider types. - - Returns: - List of supported provider type strings - """ - return list(cls._TRANSFORMERS.keys()) - - @classmethod - def register_transformer( - cls, provider: str, transformer_class: type[ConfigTransformer] - ) -> None: - """Register a custom transformer for a provider. - - Args: - provider: Provider name - transformer_class: Transformer class to register - - Raises: - TypeError: If transformer_class doesn't inherit from ConfigTransformer - """ - if not issubclass(transformer_class, ConfigTransformer): - raise TypeError( - f"{transformer_class.__name__} must inherit from ConfigTransformer" - ) - cls._TRANSFORMERS[provider.lower()] = transformer_class diff --git a/backend/app/services/llm/transformers/openai.py b/backend/app/services/llm/transformers/openai.py deleted file mode 100644 index c9ead3df..00000000 --- a/backend/app/services/llm/transformers/openai.py +++ /dev/null @@ -1,77 +0,0 @@ -"""OpenAI configuration transformer. - -This module transforms unified API requests into OpenAI-specific format. -""" - -from typing import Any, Optional - -from app.models.llm import LLMCallRequest -from app.models.llm.specs import ModelSpec -from app.services.llm.transformers.base import ConfigTransformer - - -class OpenAITransformer(ConfigTransformer): - """Transformer for OpenAI API format. - - Converts unified API contract to OpenAI Responses API format. - Supports: - - Standard models (GPT-4, GPT-3.5) - - O-series models with reasoning configuration - - Text configuration for verbosity control - - Vector store file search integration - """ - - def __init__(self, model_spec: Optional[ModelSpec] = None): - """Initialize OpenAI transformer. - - Args: - model_spec: Optional model specification for validation - """ - super().__init__(model_spec) - - def transform(self, request: LLMCallRequest) -> dict[str, Any]: - """Transform request to OpenAI API parameters. - - Args: - request: Unified LLM call request - - Returns: - OpenAI API parameter dictionary - """ - model_spec = request.llm.llm_model_spec - - # Base parameters - params: dict[str, Any] = { - "model": model_spec.model, - "input": [{"role": "user", "content": request.llm.prompt}], - } - - # Add optional standard parameters - if model_spec.temperature is not None: - params["temperature"] = model_spec.temperature - - if model_spec.max_tokens is not None: - params["max_tokens"] = model_spec.max_tokens - - if model_spec.top_p is not None: - params["top_p"] = model_spec.top_p - - # Add advanced OpenAI configs (o-series models) - if model_spec.reasoning: - params["reasoning"] = {"effort": model_spec.reasoning.effort} - - if model_spec.text: - params["text"] = {"verbosity": model_spec.text.verbosity} - - # Add vector store file search if provided - if request.llm.vector_store_id: - params["tools"] = [ - { - "type": "file_search", - "vector_store_ids": [request.llm.vector_store_id], - "max_num_results": request.max_num_results, - } - ] - params["include"] = ["file_search_call.results"] - - return params diff --git a/backend/test_new_architecture.py b/backend/test_new_architecture.py deleted file mode 100644 index b1027e84..00000000 --- a/backend/test_new_architecture.py +++ /dev/null @@ -1,209 +0,0 @@ -"""Standalone test script for the new specification-driven architecture. - -This script tests the core functionality without importing the full app, -avoiding circular import issues. -""" - -import sys -from pathlib import Path - -# Add backend to path -backend_path = Path(__file__).parent -sys.path.insert(0, str(backend_path)) - -print("=" * 70) -print("Testing Specification-Driven LLM Architecture") -print("=" * 70) - -# Test 1: Model Spec Creation and Validation -print("\n1. Testing Model Specification...") -from app.models.llm.model_spec import ( - ModelSpec, - ModelCapabilities, - ParameterSpec, - ModelSpecRegistry, -) - -spec = ModelSpec( - model_name="gpt-4", - provider="openai", - capabilities=ModelCapabilities( - supports_file_search=True, - supports_function_calling=True, - ), - parameters=[ - ParameterSpec( - name="temperature", - type="float", - min_value=0.0, - max_value=2.0, - ), - ParameterSpec( - name="max_tokens", - type="int", - min_value=1, - ), - ], -) - -print(f"✓ Created ModelSpec for {spec.model_name}") -print(f" - Provider: {spec.provider}") -print(f" - Supports file_search: {spec.capabilities.supports_file_search}") -print(f" - Parameters: {[p.name for p in spec.parameters]}") - -# Test 2: Configuration Validation -print("\n2. Testing Configuration Validation...") -valid_config = { - "model": "gpt-4", - "temperature": 0.7, - "max_tokens": 1000, -} - -is_valid, error = spec.validate_config(valid_config) -print(f"✓ Valid config validation: {is_valid} (error: {error})") - -invalid_config = { - "model": "gpt-4", - "temperature": 5.0, # Out of range -} - -is_valid, error = spec.validate_config(invalid_config) -print(f"✓ Invalid config validation: {is_valid} (error: {error})") - -# Test 3: Model Registry -print("\n3. Testing Model Registry...") -registry = ModelSpecRegistry() -registry.clear() -registry.register(spec) - -retrieved = registry.get_spec("openai", "gpt-4") -print(f"✓ Registry retrieval: {retrieved.model_name if retrieved else 'None'}") - -# Test 4: OpenAI Specs Registration -print("\n4. Testing OpenAI Specs...") -# Import directly to avoid circular import in __init__.py -import sys -sys.path.insert(0, str(backend_path / "app")) -from services.llm.specs.openai_specs import create_openai_specs - -specs = create_openai_specs() -print(f"✓ Created {len(specs)} OpenAI model specs") - -model_names = [s.model_name for s in specs[:5]] -print(f" - Sample models: {', '.join(model_names)}") - -# Test 5: Transformers -print("\n5. Testing Transformers...") -from app.models.llm import ( - LLMCallRequest, - LLMConfig, - LLMModelSpec, - ReasoningConfig, -) - -# Import transformers directly -from services.llm.transformer import OpenAITransformer, AnthropicTransformer - -openai_transformer = OpenAITransformer() -print(f"✓ Created OpenAITransformer") - -request = LLMCallRequest( - llm=LLMConfig( - prompt="Hello, world!", - llm_model_spec=LLMModelSpec( - model="gpt-4", - provider="openai", - temperature=0.7, - max_tokens=1000, - ), - ), -) - -params = openai_transformer.transform(request) -print(f"✓ Transformed request to OpenAI params:") -print(f" - Model: {params['model']}") -print(f" - Temperature: {params.get('temperature')}") -print(f" - Max tokens: {params.get('max_tokens')}") - -# Test 6: O-series model transformation -print("\n6. Testing O-series Model Transformation...") -o_series_request = LLMCallRequest( - llm=LLMConfig( - prompt="Complex reasoning task", - llm_model_spec=LLMModelSpec( - model="o3-mini", - provider="openai", - temperature=0.5, - reasoning=ReasoningConfig(effort="high"), - ), - ), -) - -o_params = openai_transformer.transform(o_series_request) -print(f"✓ Transformed o-series request:") -print(f" - Model: {o_params['model']}") -print(f" - Reasoning effort: {o_params.get('reasoning', {}).get('effort')}") - -# Test 7: Anthropic Transformer -print("\n7. Testing Anthropic Transformer...") -anthropic_transformer = AnthropicTransformer() - -anthropic_request = LLMCallRequest( - llm=LLMConfig( - prompt="Hello, Claude!", - llm_model_spec=LLMModelSpec( - model="claude-3-opus", - provider="anthropic", - max_tokens=2048, - ), - ), -) - -anthropic_params = anthropic_transformer.transform(anthropic_request) -print(f"✓ Transformed Anthropic request:") -print(f" - Model: {anthropic_params['model']}") -print(f" - Max tokens: {anthropic_params.get('max_tokens')}") -print(f" - Messages format: {type(anthropic_params.get('messages'))}") - -# Test 8: Validation with Transformer -print("\n8. Testing Validation in Transformer...") -o_spec = next((s for s in specs if s.model_name == "o3-mini"), None) -if o_spec: - transformer_with_spec = OpenAITransformer(model_spec=o_spec) - - # Valid request - try: - validated_params = transformer_with_spec.validate_and_transform(o_series_request) - print(f"✓ Valid request passed validation") - except ValueError as e: - print(f"✗ Valid request failed: {e}") - - # Invalid request (temperature out of range) - invalid_request = LLMCallRequest( - llm=LLMConfig( - prompt="Test", - llm_model_spec=LLMModelSpec( - model="o3-mini", - provider="openai", - temperature=10.0, # Out of range! - ), - ), - ) - - try: - transformer_with_spec.validate_and_transform(invalid_request) - print(f"✗ Invalid request should have failed validation") - except ValueError as e: - print(f"✓ Invalid request correctly rejected: {str(e)[:50]}...") - -print("\n" + "=" * 70) -print("All Tests Completed Successfully!") -print("=" * 70) -print("\nSummary of New Architecture:") -print("1. ✓ Model specs define capabilities and parameter constraints") -print("2. ✓ Validation happens at the spec level") -print("3. ✓ Transformers convert unified API to provider formats") -print("4. ✓ Automatic validation during transformation") -print("5. ✓ Registry manages all model specifications") -print("6. ✓ Supports OpenAI (standard & o-series), Anthropic, Google") -print("\nThe architecture is ready to use!") diff --git a/backend/test_specs_only.py b/backend/test_specs_only.py deleted file mode 100644 index 7893ba34..00000000 --- a/backend/test_specs_only.py +++ /dev/null @@ -1,156 +0,0 @@ -"""Simple test for model specs and transformers without full app imports.""" - -import sys -from pathlib import Path - -backend_path = Path(__file__).parent -sys.path.insert(0, str(backend_path)) - -print("=" * 70) -print("Testing Core Specification and Transformation Logic") -print("=" * 70) - -# Test Model Spec -print("\n1. Model Specification...") -from app.models.llm.model_spec import ModelSpec, ModelCapabilities, ParameterSpec - -spec = ModelSpec( - model_name="gpt-4", - provider="openai", - capabilities=ModelCapabilities(supports_file_search=True), - parameters=[ - ParameterSpec(name="temperature", type="float", min_value=0.0, max_value=2.0) - ], -) - -config = {"temperature": 0.7} -is_valid, error = spec.validate_config(config) -print(f"✓ Valid config: {is_valid}") - -config = {"temperature": 5.0} -is_valid, error = spec.validate_config(config) -print(f"✓ Invalid config rejected: {not is_valid} - {error}") - -# Test Transformers (import module directly by file, avoiding __init__) -print("\n2. Transformers...") - -# Import transformer module directly -import importlib.util -spec_tf = importlib.util.spec_from_file_location( - "transformer", - backend_path / "app" / "services" / "llm" / "transformer.py" -) -transformer = importlib.util.module_from_spec(spec_tf) -spec_tf.loader.exec_module(transformer) - -from app.models.llm.call import LLMCallRequest, LLMConfig, LLMModelSpec, ReasoningConfig - -openai_tf = transformer.OpenAITransformer() - -request = LLMCallRequest( - llm=LLMConfig( - prompt="Test", - llm_model_spec=LLMModelSpec( - model="gpt-4", - provider="openai", - temperature=0.7, - ), - ), -) - -params = openai_tf.transform(request) -print(f"✓ OpenAI transform: model={params['model']}, temp={params.get('temperature')}") - -# Test O-series -o_request = LLMCallRequest( - llm=LLMConfig( - prompt="Think hard", - llm_model_spec=LLMModelSpec( - model="o3", - provider="openai", - reasoning=ReasoningConfig(effort="high"), - ), - ), -) - -o_params = openai_tf.transform(o_request) -print(f"✓ O-series transform: reasoning={o_params.get('reasoning')}") - -# Test Anthropic -anthropic_tf = transformer.AnthropicTransformer() -anthropic_request = LLMCallRequest( - llm=LLMConfig( - prompt="Hello", - llm_model_spec=LLMModelSpec( - model="claude-3-opus", - provider="anthropic", - max_tokens=1024, - ), - ), -) - -anthropic_params = anthropic_tf.transform(anthropic_request) -print(f"✓ Anthropic transform: model={anthropic_params['model']}, max_tokens={anthropic_params['max_tokens']}") - -# Test OpenAI Specs -print("\n3. OpenAI Model Specs...") - -# Import openai_specs directly -spec_os = importlib.util.spec_from_file_location( - "openai_specs", - backend_path / "app" / "services" / "llm" / "specs" / "openai_specs.py" -) -openai_specs = importlib.util.module_from_spec(spec_os) -spec_os.loader.exec_module(openai_specs) - -specs = openai_specs.create_openai_specs() -print(f"✓ Created {len(specs)} OpenAI model specs") - -gpt4 = next(s for s in specs if s.model_name == "gpt-4") -print(f"✓ GPT-4 spec: {gpt4.model_name}, params={[p.name for p in gpt4.parameters[:3]]}") - -o3 = next(s for s in specs if s.model_name == "o3-mini") -print(f"✓ O3-mini spec: supports_reasoning={o3.capabilities.supports_reasoning}") - -# Test validation with spec -print("\n4. Spec-based Validation...") -tf_with_spec = transformer.OpenAITransformer(model_spec=o3) - -valid_o_request = LLMCallRequest( - llm=LLMConfig( - prompt="Test", - llm_model_spec=LLMModelSpec( - model="o3-mini", - provider="openai", - temperature=0.5, - reasoning=ReasoningConfig(effort="medium"), - ), - ), -) - -try: - params = tf_with_spec.validate_and_transform(valid_o_request) - print(f"✓ Valid O-series config passed validation") -except ValueError as e: - print(f"✗ Should have passed: {e}") - -invalid_o_request = LLMCallRequest( - llm=LLMConfig( - prompt="Test", - llm_model_spec=LLMModelSpec( - model="o3-mini", - provider="openai", - temperature=10.0, # Out of range - ), - ), -) - -try: - params = tf_with_spec.validate_and_transform(invalid_o_request) - print(f"✗ Should have failed validation") -except ValueError as e: - print(f"✓ Invalid config correctly rejected") - -print("\n" + "=" * 70) -print("SUCCESS: Core architecture works correctly!") -print("=" * 70) From a7f63d85f2e569013c4954f4933f967415bba12b Mon Sep 17 00:00:00 2001 From: Aviraj <100823015+avirajsingh7@users.noreply.github.com> Date: Tue, 21 Oct 2025 08:40:50 +0530 Subject: [PATCH 04/15] Implement from llm request --- backend/app/models/__init__.py | 1 - backend/app/services/llm/orchestrator.py | 16 +--- backend/app/services/llm/providers/factory.py | 3 +- backend/app/services/llm/providers/openai.py | 49 +++++++++- backend/app/services/llm/specs/openai.py | 94 ++++++++++++++----- 5 files changed, 120 insertions(+), 43 deletions(-) diff --git a/backend/app/models/__init__.py b/backend/app/models/__init__.py index 20cfac7f..10f9cbec 100644 --- a/backend/app/models/__init__.py +++ b/backend/app/models/__init__.py @@ -53,7 +53,6 @@ LLMCallResponse, LLMConfig, LLMModelSpec, - ProviderType, ) from .message import Message diff --git a/backend/app/services/llm/orchestrator.py b/backend/app/services/llm/orchestrator.py index 2b0f8b22..ce1de4c1 100644 --- a/backend/app/services/llm/orchestrator.py +++ b/backend/app/services/llm/orchestrator.py @@ -21,11 +21,7 @@ def execute_llm_call( """Execute LLM call using the appropriate provider. This is the main orchestration function that routes requests to - provider-specific implementations. It uses the provider factory - to instantiate the correct provider based on the request configuration. - - The function is designed to be provider-agnostic, supporting multiple - LLM providers (OpenAI, Anthropic, Google, etc.) through a unified interface. + provider-specific implementations. Args: request: LLM call request with configuration (includes provider type) @@ -35,16 +31,6 @@ def execute_llm_call( Tuple of (response, error_message) - If successful: (LLMCallResponse, None) - If failed: (None, error_message) - - Example: - >>> request = LLMCallRequest( - ... llm=LLMConfig( - ... provider="openai", - ... prompt="Hello, world!", - ... llm_model_spec=LLMModelSpec(model="gpt-4") - ... ) - ... ) - >>> response, error = execute_llm_call(request, openai_client) """ provider_type = request.llm.llm_model_spec.provider diff --git a/backend/app/services/llm/providers/factory.py b/backend/app/services/llm/providers/factory.py index 6dd9bb3b..bf2881a4 100644 --- a/backend/app/services/llm/providers/factory.py +++ b/backend/app/services/llm/providers/factory.py @@ -7,7 +7,6 @@ import logging from typing import Any -from app.models.llm import ProviderType from app.services.llm.providers.base import BaseProvider from app.services.llm.providers.openai import OpenAIProvider @@ -33,7 +32,7 @@ class ProviderFactory: } @classmethod - def create_provider(cls, provider_type: ProviderType, client: Any) -> BaseProvider: + def create_provider(cls, provider_type: str, client: Any) -> BaseProvider: """Create a provider instance based on the provider type. Args: diff --git a/backend/app/services/llm/providers/openai.py b/backend/app/services/llm/providers/openai.py index e680bbb4..d552c527 100644 --- a/backend/app/services/llm/providers/openai.py +++ b/backend/app/services/llm/providers/openai.py @@ -43,6 +43,49 @@ def __init__(self, client: OpenAI): super().__init__(client) self.client = client + def _extract_message_from_output(self, output: list) -> str: + """Extract message text from response.output array. + + The Responses API returns output as a list that can contain various types: + - ResponseOutputMessage: Contains the assistant's text message + - ResponseFileSearchToolCall: File search results + - ResponseFunctionToolCall: Function call results + - ResponseReasoningItem: Reasoning traces + - etc. + + Args: + output: List of output items from the response + + Returns: + The extracted message text, or empty string if no message found + + Raises: + ValueError: If output format is unexpected + """ + if not output: + logger.warning("[OpenAIProvider] Empty output array in response") + return "" + + # Find the first ResponseOutputMessage in the output + for item in output: + # Check if it's a message type (has 'role' and 'content' attributes) + if hasattr(item, "type") and item.type == "message": + if hasattr(item, "content"): + # Content is a list of content items + if isinstance(item.content, list) and len(item.content) > 0: + # Get the first text content + first_content = item.content[0] + if hasattr(first_content, "text"): + return first_content.text + elif hasattr(first_content, "type") and first_content.type == "text": + return getattr(first_content, "text", "") + return "" + + logger.warning( + f"[OpenAIProvider] No message found in output array with {len(output)} items" + ) + return "" + def execute( self, request: LLMCallRequest ) -> tuple[LLMCallResponse | None, str | None]: @@ -71,11 +114,15 @@ def execute( logger.info(f"[OpenAIProvider] Making OpenAI call with model: {spec.model}") response = self.client.responses.create(**params) + # Extract message text from response.output array + # The output is a list that can contain various item types + message_text = self._extract_message_from_output(response.output) + # Build response llm_response = LLMCallResponse( status="success", response_id=response.id, - message=response.output_text, + message=message_text, model=response.model, input_tokens=response.usage.input_tokens, output_tokens=response.usage.output_tokens, diff --git a/backend/app/services/llm/specs/openai.py b/backend/app/services/llm/specs/openai.py index 1b6d711b..b49375d3 100644 --- a/backend/app/services/llm/specs/openai.py +++ b/backend/app/services/llm/specs/openai.py @@ -13,36 +13,27 @@ class OpenAISpec(SQLModel): - """OpenAI API specification with validation. + """OpenAI Responses API specification with validation. - This model defines all OpenAI-specific parameters with their constraints, + This model defines all OpenAI Responses API parameters with their constraints, provides validation, and handles conversion to OpenAI API format. - Attributes: - model: Model identifier (e.g., "gpt-4", "gpt-3.5-turbo", "o1-preview") - prompt: The user's input prompt - temperature: Sampling temperature (0.0-2.0) - max_tokens: Maximum number of tokens to generate (must be positive) - top_p: Nucleus sampling parameter (0.0-1.0) - reasoning_effort: Optional reasoning effort level for o-series models ("low", "medium", "high") - text_verbosity: Optional text verbosity level ("low", "medium", "high") - vector_store_id: Optional vector store ID for file search - max_num_results: Maximum number of file search results (1-50) + Aligns with OpenAI Responses API contract as of 2025. """ # Required parameters - model: str = Field(description="Model identifier") + model: str = Field(description="Model identifier (e.g., 'gpt-4o', 'gpt-4.1')") prompt: str = Field(description="User input prompt") - # Optional standard parameters + # Sampling parameters temperature: float | None = Field( default=None, ge=0.0, le=2.0, description="Sampling temperature between 0.0 and 2.0", ) - max_tokens: int | None = Field( - default=None, gt=0, description="Maximum tokens to generate" + max_output_tokens: int | None = Field( + default=None, gt=0, description="Maximum tokens to generate (Responses API uses max_output_tokens)" ) top_p: float | None = Field( default=None, ge=0.0, le=1.0, description="Nucleus sampling parameter" @@ -52,10 +43,29 @@ class OpenAISpec(SQLModel): reasoning_effort: Literal["low", "medium", "high"] | None = Field( default=None, description="Reasoning effort level for o-series models" ) + reasoning_generate_summary: bool | None = Field( + default=None, description="Whether to generate reasoning summary for o-series models" + ) text_verbosity: Literal["low", "medium", "high"] | None = Field( default=None, description="Text verbosity level" ) + # Conversation and state management + instructions: str | None = Field( + default=None, description="System instructions for the model" + ) + previous_response_id: str | None = Field( + default=None, description="Previous response ID for conversation continuity" + ) + store: bool | None = Field( + default=None, description="Whether to store the conversation with OpenAI" + ) + + # Tool configuration + parallel_tool_calls: bool | None = Field( + default=None, description="Whether to enable parallel tool calls" + ) + # Vector store file search vector_store_id: str | None = Field( default=None, description="Vector store ID for file search" @@ -64,6 +74,14 @@ class OpenAISpec(SQLModel): default=None, ge=1, le=50, description="Max file search results" ) + # Response configuration + truncation: Literal["auto", "disabled"] | None = Field( + default=None, description="Truncation strategy for long contexts" + ) + metadata: dict[str, str] | None = Field( + default=None, description="Custom metadata for the request" + ) + @model_validator(mode="after") def validate_vector_store(self) -> "OpenAISpec": """Validate vector store configuration. @@ -81,9 +99,10 @@ def validate_vector_store(self) -> "OpenAISpec": return self def to_api_params(self) -> dict[str, Any]: - """Convert to OpenAI API parameters. + """Convert to OpenAI Responses API parameters. Transforms this spec into the format expected by OpenAI's Responses API. + Uses the official API contract with correct parameter names and structure. Returns: Dictionary of API parameters ready for openai.responses.create() @@ -94,23 +113,42 @@ def to_api_params(self) -> dict[str, Any]: "input": [{"role": "user", "content": self.prompt}], } - # Add optional standard parameters + # Add optional sampling parameters if self.temperature is not None: params["temperature"] = self.temperature - if self.max_tokens is not None: - params["max_tokens"] = self.max_tokens + if self.max_output_tokens is not None: + params["max_output_tokens"] = self.max_output_tokens if self.top_p is not None: params["top_p"] = self.top_p + # Add conversation and state management + if self.instructions is not None: + params["instructions"] = self.instructions + + if self.previous_response_id is not None: + params["previous_response_id"] = self.previous_response_id + + if self.store is not None: + params["store"] = self.store + # Add advanced OpenAI configurations - if self.reasoning_effort is not None: - params["reasoning"] = {"effort": self.reasoning_effort} + if self.reasoning_effort is not None or self.reasoning_generate_summary is not None: + reasoning_config: dict[str, Any] = {} + if self.reasoning_effort is not None: + reasoning_config["effort"] = self.reasoning_effort + if self.reasoning_generate_summary is not None: + reasoning_config["generate_summary"] = self.reasoning_generate_summary + params["reasoning"] = reasoning_config if self.text_verbosity is not None: params["text"] = {"verbosity": self.text_verbosity} + # Add tool configuration + if self.parallel_tool_calls is not None: + params["parallel_tool_calls"] = self.parallel_tool_calls + # Add vector store file search if provided if self.vector_store_id: params["tools"] = [ @@ -122,13 +160,21 @@ def to_api_params(self) -> dict[str, Any]: ] params["include"] = ["file_search_call.results"] + # Add response configuration + if self.truncation is not None: + params["truncation"] = self.truncation + + if self.metadata is not None: + params["metadata"] = self.metadata + return params @classmethod - def from_llm_request(cls, request: "LLMCallRequest") -> "OpenAISpec": + def from_llm_request(cls, request: LLMCallRequest) -> "OpenAISpec": """Create OpenAISpec from LLMCallRequest. Convenience method to convert from the unified API request format. + Maps the provider-agnostic max_tokens to OpenAI's max_output_tokens. Args: request: Unified LLM call request @@ -142,7 +188,7 @@ def from_llm_request(cls, request: "LLMCallRequest") -> "OpenAISpec": model=model_spec.model, prompt=request.llm.prompt, temperature=model_spec.temperature, - max_tokens=model_spec.max_tokens, + max_output_tokens=model_spec.max_tokens, # Map max_tokens to max_output_tokens top_p=model_spec.top_p, reasoning_effort=model_spec.reasoning_effort, text_verbosity=model_spec.text_verbosity, From e76e2c83d0396ba66ed7f36dd1f68de5a3a4505f Mon Sep 17 00:00:00 2001 From: Aviraj <100823015+avirajsingh7@users.noreply.github.com> Date: Tue, 21 Oct 2025 08:41:44 +0530 Subject: [PATCH 05/15] remove md --- backend/docs/LLM_ARCHITECTURE.md | 512 ------------------------------- 1 file changed, 512 deletions(-) delete mode 100644 backend/docs/LLM_ARCHITECTURE.md diff --git a/backend/docs/LLM_ARCHITECTURE.md b/backend/docs/LLM_ARCHITECTURE.md deleted file mode 100644 index e43240fe..00000000 --- a/backend/docs/LLM_ARCHITECTURE.md +++ /dev/null @@ -1,512 +0,0 @@ -# LLM API Specification-Driven Architecture - -## Overview - -The LLM API now uses a **specification-driven architecture** that separates concerns between: - -1. **Model Specifications** - Define what each model supports -2. **Transformation Layer** - Convert unified API to provider-specific formats -3. **Validation** - Automatic validation against model specs -4. **Providers** - Execute API calls using transformed configurations - -This architecture eliminates the need for `build_params` logic in providers and centralizes configuration management. - -## Architecture Components - -### 1. Model Specifications (`app/models/llm/model_spec.py`) - -Model specifications are the **single source of truth** for what each LLM model supports. - -```python -from app.models.llm.model_spec import ModelSpec, ModelCapabilities, ParameterSpec - -spec = ModelSpec( - model_name="gpt-4", - provider="openai", - capabilities=ModelCapabilities( - supports_file_search=True, - supports_function_calling=True, - supports_streaming=True, - ), - parameters=[ - ParameterSpec( - name="temperature", - type="float", - min_value=0.0, - max_value=2.0, - default=1.0, - ), - ParameterSpec( - name="max_tokens", - type="int", - min_value=1, - max_value=128000, - ), - ], -) -``` - -**Key Features:** -- Declarative capability flags -- Parameter type and range constraints -- Automatic validation via `validate_config()` -- Feature detection via `supports_feature()` - -### 2. Model Registry (`ModelSpecRegistry`) - -The global registry manages all model specifications: - -```python -from app.models.llm.model_spec import model_spec_registry - -# Register a spec -model_spec_registry.register(spec) - -# Get a spec -spec = model_spec_registry.get_spec("openai", "gpt-4") - -# Validate config -is_valid, error = model_spec_registry.validate_config( - "openai", "gpt-4", {"temperature": 0.7} -) -``` - -### 3. Transformation Layer (`app/services/llm/transformer.py`) - -Transformers convert the unified API contract to provider-specific formats: - -```python -from app.services.llm.transformer import OpenAITransformer, TransformerFactory - -# Create transformer -transformer = TransformerFactory.create_transformer( - provider="openai", - model_name="gpt-4", - use_spec=True, # Enable validation -) - -# Transform and validate -params = transformer.validate_and_transform(request) -``` - -**Available Transformers:** -- `OpenAITransformer` - OpenAI Responses API format -- `AnthropicTransformer` - Anthropic Messages API format -- `GoogleTransformer` - Google Generative AI format -- `AzureOpenAITransformer` - Azure OpenAI (same as OpenAI) - -### 4. Updated Provider Interface (`BaseProvider`) - -Providers now use transformers automatically: - -```python -class BaseProvider(ABC): - def __init__(self, client: Any, transformer: Optional[ConfigTransformer] = None): - self.client = client - self.transformer = transformer - - def build_params(self, request: LLMCallRequest) -> dict[str, Any]: - """Uses transformer to build params with automatic validation.""" - transformer = self._get_transformer(request) - return transformer.validate_and_transform(request) -``` - -## Request Flow - -``` -┌─────────────────────────────────────────────────────────────────┐ -│ 1. API Request: LLMCallRequest │ -│ - Unified API contract │ -│ - Provider + model specified │ -│ - Standard parameters (temp, max_tokens, etc.) │ -└─────────────────────────────────────────────────────────────────┘ - ↓ -┌─────────────────────────────────────────────────────────────────┐ -│ 2. Provider Detection │ -│ - Extract provider type from request │ -│ - Create provider instance via ProviderFactory │ -└─────────────────────────────────────────────────────────────────┘ - ↓ -┌─────────────────────────────────────────────────────────────────┐ -│ 3. Transformer Creation │ -│ - TransformerFactory creates appropriate transformer │ -│ - Loads model spec from registry (if available) │ -│ - Transformer initialized with spec for validation │ -└─────────────────────────────────────────────────────────────────┘ - ↓ -┌─────────────────────────────────────────────────────────────────┐ -│ 4. Config Validation │ -│ - Extract parameters from request │ -│ - Validate against model spec: │ -│ ✓ Type checking (int, float, str, bool) │ -│ ✓ Range validation (min/max values) │ -│ ✓ Allowed values (e.g., "low"/"medium"/"high") │ -│ ✓ Required parameter checking │ -│ - Raise ValueError if validation fails │ -└─────────────────────────────────────────────────────────────────┘ - ↓ -┌─────────────────────────────────────────────────────────────────┐ -│ 5. Transformation │ -│ - Convert unified API to provider format: │ -│ • OpenAI: {model, input, temperature, ...} │ -│ • Anthropic: {model, messages, max_tokens, ...} │ -│ • Google: {model, contents, generation_config, ...} │ -│ - Add provider-specific features (reasoning, tools, etc.) │ -└─────────────────────────────────────────────────────────────────┘ - ↓ -┌─────────────────────────────────────────────────────────────────┐ -│ 6. Provider Execution │ -│ - Provider calls build_params() (uses transformer) │ -│ - Makes API call to LLM provider │ -│ - Returns standardized LLMCallResponse │ -└─────────────────────────────────────────────────────────────────┘ -``` - -## OpenAI Model Specifications - -Pre-configured specs for OpenAI models are in `app/services/llm/specs/openai_specs.py`: - -**Standard Models:** -- GPT-4 series: `gpt-4`, `gpt-4-turbo`, `gpt-4o`, `gpt-4o-mini` -- GPT-3.5 series: `gpt-3.5-turbo`, `gpt-3.5-turbo-16k` - -**O-series Models (Reasoning):** -- `o1`, `o1-preview`, `o1-mini` -- `o3`, `o3-mini` -- Support reasoning configuration and text verbosity - -**Capabilities by Model:** - -| Model | File Search | Functions | Streaming | Vision | Reasoning | -|-------|-------------|-----------|-----------|--------|-----------| -| GPT-4 | ✓ | ✓ | ✓ | ✗ | ✗ | -| GPT-4o | ✓ | ✓ | ✓ | ✓ | ✗ | -| GPT-3.5 | ✓ | ✓ | ✓ | ✗ | ✗ | -| O-series | ✓ | ✗ | ✓ | ✗ | ✓ | - -## Adding New Model Specs - -### Option 1: Add to Existing Provider Specs - -Edit `app/services/llm/specs/openai_specs.py` (or create new provider spec file): - -```python -def create_new_model_specs() -> list[ModelSpec]: - specs = [] - - specs.append( - ModelSpec( - model_name="new-model", - provider="provider-name", - capabilities=ModelCapabilities( - supports_streaming=True, - # ... other capabilities - ), - parameters=[ - ParameterSpec( - name="temperature", - type="float", - min_value=0.0, - max_value=1.0, - ), - # ... other parameters - ], - ) - ) - - return specs -``` - -### Option 2: Register Dynamically at Runtime - -```python -from app.models.llm.model_spec import model_spec_registry, ModelSpec - -spec = ModelSpec(...) -model_spec_registry.register(spec) -``` - -## Adding New Providers - -### Step 1: Create Provider Transformer - -```python -# app/services/llm/transformer.py - -class NewProviderTransformer(ConfigTransformer): - """Transformer for NewProvider API format.""" - - def transform(self, request: LLMCallRequest) -> dict[str, Any]: - """Transform to NewProvider API format.""" - model_spec = request.llm.llm_model_spec - - params = { - "model": model_spec.model, - # ... provider-specific format - } - - # Add parameters - if model_spec.temperature is not None: - params["temperature"] = model_spec.temperature - - return params -``` - -### Step 2: Register Transformer - -```python -# In TransformerFactory -_TRANSFORMERS = { - "openai": OpenAITransformer, - "anthropic": AnthropicTransformer, - "google": GoogleTransformer, - "newprovider": NewProviderTransformer, # Add here -} -``` - -### Step 3: Create Provider Implementation - -```python -# app/services/llm/newprovider_provider.py - -class NewProviderProvider(BaseProvider): - """NewProvider implementation.""" - - def execute(self, request: LLMCallRequest) -> tuple[LLMCallResponse | None, str | None]: - try: - # build_params() uses transformer automatically - params = self.build_params(request) - - # Make API call - response = self.client.generate(**params) - - # Return standardized response - return LLMCallResponse(...), None - except Exception as e: - return None, str(e) -``` - -### Step 4: Register Provider - -```python -# app/services/llm/provider_factory.py - -_PROVIDERS = { - "openai": OpenAIProvider, - "anthropic": AnthropicProvider, - "newprovider": NewProviderProvider, # Add here -} -``` - -### Step 5: Create Model Specs - -```python -# app/services/llm/specs/newprovider_specs.py - -def create_newprovider_specs() -> list[ModelSpec]: - return [ - ModelSpec( - model_name="newprovider-model-1", - provider="newprovider", - capabilities=ModelCapabilities(...), - parameters=[...], - ), - ] - -def register_newprovider_specs() -> None: - specs = create_newprovider_specs() - for spec in specs: - model_spec_registry.register(spec) -``` - -### Step 6: Initialize Specs - -```python -# app/services/llm/specs/__init__.py - -from app.services.llm.specs.newprovider_specs import register_newprovider_specs - -def initialize_model_specs() -> None: - register_openai_specs() - register_newprovider_specs() # Add here -``` - -## Configuration Validation Examples - -### Valid Configuration - -```python -request = LLMCallRequest( - llm=LLMConfig( - prompt="Hello", - llm_model_spec=LLMModelSpec( - model="gpt-4", - provider="openai", - temperature=0.7, # ✓ Within range [0.0, 2.0] - max_tokens=1000, # ✓ Within range [1, 128000] - ), - ), -) -# ✓ Passes validation -``` - -### Invalid Configuration (Out of Range) - -```python -request = LLMCallRequest( - llm=LLMConfig( - prompt="Hello", - llm_model_spec=LLMModelSpec( - model="gpt-4", - provider="openai", - temperature=5.0, # ✗ Out of range [0.0, 2.0] - ), - ), -) -# ✗ Raises ValueError: "Parameter 'temperature' must be <= 2.0" -``` - -### Invalid Configuration (Wrong Type) - -```python -request = LLMCallRequest( - llm=LLMConfig( - prompt="Hello", - llm_model_spec=LLMModelSpec( - model="gpt-4", - provider="openai", - temperature="high", # ✗ Should be float - ), - ), -) -# ✗ Raises ValueError: "Parameter 'temperature' must be a number" -``` - -### O-series Model Configuration - -```python -request = LLMCallRequest( - llm=LLMConfig( - prompt="Complex reasoning task", - llm_model_spec=LLMModelSpec( - model="o3-mini", - provider="openai", - temperature=0.5, - reasoning=ReasoningConfig(effort="high"), # ✓ Valid: "low", "medium", "high" - text=TextConfig(verbosity="medium"), - ), - ), -) -# ✓ Passes validation -``` - -## Benefits of This Architecture - -### 1. Separation of Concerns -- **Model Specs**: Define capabilities -- **Transformers**: Handle format conversion -- **Providers**: Execute API calls -- **Validation**: Centralized in specs - -### 2. Maintainability -- Add new models by adding specs (no code changes) -- Add new providers by implementing transformer -- Validation logic in one place - -### 3. Type Safety -- Compile-time type checking -- Runtime validation against specs -- Clear error messages - -### 4. Extensibility -- Easy to add new providers -- Easy to add new models -- Runtime spec registration - -### 5. Testability -- Test specs independently -- Test transformers independently -- Test providers independently -- Mock transformers for provider tests - -## Migration Guide - -### Before (Old Architecture) - -```python -class OpenAIProvider(BaseProvider): - def build_params(self, request: LLMCallRequest) -> dict[str, Any]: - # Manual parameter building - params = { - "model": request.llm.llm_model_spec.model, - "input": [{"role": "user", "content": request.llm.prompt}], - } - - # Manual parameter handling - if request.llm.llm_model_spec.temperature is not None: - params["temperature"] = request.llm.llm_model_spec.temperature - - # No validation! - return params -``` - -### After (New Architecture) - -```python -class OpenAIProvider(BaseProvider): - # build_params() inherited from BaseProvider - # Automatically uses OpenAITransformer - # Automatic validation via model specs - pass -``` - -The `build_params()` method is now in `BaseProvider` and automatically: -1. Creates appropriate transformer -2. Validates configuration against model spec -3. Transforms to provider format -4. Raises clear errors if validation fails - -## Testing - -Run the architecture test: - -```bash -uv run python test_specs_only.py -``` - -Expected output: -``` -Testing Core Specification and Transformation Logic -====================================================================== - -1. Model Specification... -✓ Valid config: True -✓ Invalid config rejected: True - -2. Transformers... -✓ OpenAI transform: model=gpt-4, temp=0.7 -✓ O-series transform: reasoning={'effort': 'high'} -✓ Anthropic transform: model=claude-3-opus, max_tokens=1024 - -3. OpenAI Model Specs... -✓ Created 12 OpenAI model specs -✓ GPT-4 spec: gpt-4, params=['temperature', 'max_tokens', 'top_p'] - -4. Spec-based Validation... -✓ Valid O-series config passed validation -✓ Invalid config correctly rejected - -====================================================================== -SUCCESS: Core architecture works correctly! -====================================================================== -``` - -## Future Enhancements - -1. **Database-backed Specs**: Store model specs in database for dynamic updates -2. **Spec Versioning**: Version model specs for backward compatibility -3. **Capability Discovery**: API endpoint to list available models and capabilities -4. **Advanced Validation**: Custom validators, cross-parameter validation -5. **Streaming Support**: Add streaming capability to transformers -6. **Function Calling**: Unified function calling across providers -7. **Cost Tracking**: Add pricing info to model specs From 2941118cb658002ef3addc033b8036738cd77a08 Mon Sep 17 00:00:00 2001 From: Aviraj <100823015+avirajsingh7@users.noreply.github.com> Date: Tue, 21 Oct 2025 15:08:33 +0530 Subject: [PATCH 06/15] rename OpenAISpec to OpenAIResponseSpec --- backend/app/services/llm/providers/openai.py | 15 +++++++++------ backend/app/services/llm/specs/__init__.py | 4 ++-- .../llm/specs/{openai.py => openai/response.py} | 10 +++++----- 3 files changed, 16 insertions(+), 13 deletions(-) rename backend/app/services/llm/specs/{openai.py => openai/response.py} (97%) diff --git a/backend/app/services/llm/providers/openai.py b/backend/app/services/llm/providers/openai.py index d552c527..5af7cddd 100644 --- a/backend/app/services/llm/providers/openai.py +++ b/backend/app/services/llm/providers/openai.py @@ -4,7 +4,7 @@ including support for standard models, o-series models with reasoning, and file search capabilities. -Uses OpenAISpec for parameter validation and API conversion. +Uses OpenAIResponseSpec for parameter validation and API conversion. """ import logging @@ -16,7 +16,7 @@ from app.models.llm import LLMCallRequest, LLMCallResponse from app.services.llm.providers.base import BaseProvider -from app.services.llm.specs import OpenAISpec +from app.services.llm.specs import OpenAIResponseSpec from app.utils import handle_openai_error logger = logging.getLogger(__name__) @@ -31,7 +31,7 @@ class OpenAIProvider(BaseProvider): - Text configuration for verbosity control - Vector store file search integration - Uses OpenAISpec for parameter validation and conversion. + Uses OpenAIResponseSpec for parameter validation and conversion. """ def __init__(self, client: OpenAI): @@ -77,7 +77,10 @@ def _extract_message_from_output(self, output: list) -> str: first_content = item.content[0] if hasattr(first_content, "text"): return first_content.text - elif hasattr(first_content, "type") and first_content.type == "text": + elif ( + hasattr(first_content, "type") + and first_content.type == "text" + ): return getattr(first_content, "text", "") return "" @@ -91,7 +94,7 @@ def execute( ) -> tuple[LLMCallResponse | None, str | None]: """Execute OpenAI API call. - Uses OpenAISpec to validate and convert the request to OpenAI format. + Uses OpenAIResponseSpec to validate and convert the request to OpenAI format. Args: request: LLM call request with configuration @@ -106,7 +109,7 @@ def execute( try: # Create and validate OpenAI spec from request - spec = OpenAISpec.from_llm_request(request) + spec = OpenAIResponseSpec.from_llm_request(request) # Convert to API parameters (validation happens during spec creation) params = spec.to_api_params() diff --git a/backend/app/services/llm/specs/__init__.py b/backend/app/services/llm/specs/__init__.py index f4f6c6cf..c010a127 100644 --- a/backend/app/services/llm/specs/__init__.py +++ b/backend/app/services/llm/specs/__init__.py @@ -1,3 +1,3 @@ -from .openai import OpenAISpec +from .openai.response import OpenAIResponseSpec -__all__ = ["OpenAISpec"] +__all__ = ["OpenAIResponseSpec"] diff --git a/backend/app/services/llm/specs/openai.py b/backend/app/services/llm/specs/openai/response.py similarity index 97% rename from backend/app/services/llm/specs/openai.py rename to backend/app/services/llm/specs/openai/response.py index b49375d3..d6cdc948 100644 --- a/backend/app/services/llm/specs/openai.py +++ b/backend/app/services/llm/specs/openai/response.py @@ -12,7 +12,7 @@ from app.models.llm.request import LLMCallRequest -class OpenAISpec(SQLModel): +class OpenAIResponseSpec(SQLModel): """OpenAI Responses API specification with validation. This model defines all OpenAI Responses API parameters with their constraints, @@ -83,7 +83,7 @@ class OpenAISpec(SQLModel): ) @model_validator(mode="after") - def validate_vector_store(self) -> "OpenAISpec": + def validate_vector_store(self) -> "OpenAIResponseSpec": """Validate vector store configuration. Ensures that if vector_store_id is provided, it's a valid non-empty string. @@ -170,8 +170,8 @@ def to_api_params(self) -> dict[str, Any]: return params @classmethod - def from_llm_request(cls, request: LLMCallRequest) -> "OpenAISpec": - """Create OpenAISpec from LLMCallRequest. + def from_llm_request(cls, request: LLMCallRequest) -> "OpenAIResponseSpec": + """Create OpenAIResponseSpec from LLMCallRequest. Convenience method to convert from the unified API request format. Maps the provider-agnostic max_tokens to OpenAI's max_output_tokens. @@ -180,7 +180,7 @@ def from_llm_request(cls, request: LLMCallRequest) -> "OpenAISpec": request: Unified LLM call request Returns: - OpenAISpec instance + OpenAIResponseSpec instance """ model_spec = request.llm.llm_model_spec From a8b957788d80e72be1cd09c2a0b7621d7ba08050 Mon Sep 17 00:00:00 2001 From: Aviraj <100823015+avirajsingh7@users.noreply.github.com> Date: Wed, 22 Oct 2025 07:36:18 +0530 Subject: [PATCH 07/15] Enhanced OpenAISpec Configuration --- .../app/services/llm/specs/openai/response.py | 354 +++++++++++------- 1 file changed, 211 insertions(+), 143 deletions(-) diff --git a/backend/app/services/llm/specs/openai/response.py b/backend/app/services/llm/specs/openai/response.py index d6cdc948..e6532ae2 100644 --- a/backend/app/services/llm/specs/openai/response.py +++ b/backend/app/services/llm/specs/openai/response.py @@ -1,15 +1,89 @@ -"""OpenAI specification model. +"""OpenAI Responses API specification model. This module defines the OpenAI-specific parameter specification with built-in -validation and conversion to API format. +validation and conversion to API format based on the official OpenAI Responses API contract. + +Reference: https://platform.openai.com/docs/api-reference/responses/create """ from typing import Any, Literal -from pydantic import Field, model_validator +from pydantic import Field, field_validator, model_validator from sqlmodel import SQLModel -from app.models.llm.request import LLMCallRequest + +class ReasoningConfig(SQLModel): + """Configuration options for reasoning models (gpt-5 and o-series models only).""" + + effort: Literal["minimal", "low", "medium", "high"] | None = Field( + default="medium", + description=( + "Constrains effort on reasoning for reasoning models. " + "Reducing reasoning effort can result in faster responses and fewer tokens used. " + "Note: The gpt-5-pro model defaults to (and only supports) high reasoning effort." + ), + ) + summary: Literal["auto", "concise", "detailed"] | None = Field( + default=None, + description=( + "A summary of the reasoning performed by the model. " + "This can be useful for debugging and understanding the model's reasoning process." + ), + ) + + +class TextFormatConfig(SQLModel): + """An object specifying the format that the model must output.""" + + type: Literal["text", "json_object", "json_schema"] = Field( + description=( + "The format type. " + "'text': Plain text output. " + "'json_object': Older JSON mode (not recommended for newer models). " + "'json_schema': Structured Outputs with JSON schema validation (recommended)." + ) + ) + json_schema: dict[str, Any] | None = Field( + default=None, + description="The JSON schema to validate against when type is 'json_schema'.", + ) + + +class TextConfig(SQLModel): + """Configuration options for text response from the model.""" + + format: TextFormatConfig | None = Field( + default=None, + description=( + "An object specifying the format that the model must output. " + "Default format is { 'type': 'text' }." + ), + ) + verbosity: Literal["low", "medium", "high"] | None = Field( + default="medium", + description=( + "Constrains the verbosity of the model's response. " + "Lower values result in more concise responses, higher values result in more verbose responses." + ), + ) + + +class FileSearchTool(SQLModel): + """Tool configuration for searching through vector stores.""" + + type: Literal["file_search"] = Field( + default="file_search", + description="The type of tool. Always 'file_search'.", + ) + vector_store_ids: list[str] = Field( + description="Vector store IDs to search through.", + ) + max_num_results: int | None = Field( + default=None, + ge=1, + le=50, + description="Maximum number of results for file_search tool.", + ) class OpenAIResponseSpec(SQLModel): @@ -18,180 +92,174 @@ class OpenAIResponseSpec(SQLModel): This model defines all OpenAI Responses API parameters with their constraints, provides validation, and handles conversion to OpenAI API format. - Aligns with OpenAI Responses API contract as of 2025. + Aligns with OpenAI Responses API contract (POST https://api.openai.com/v1/responses). + Reference: https://platform.openai.com/docs/api-reference/responses/create """ - # Required parameters - model: str = Field(description="Model identifier (e.g., 'gpt-4o', 'gpt-4.1')") - prompt: str = Field(description="User input prompt") + model: str | None = Field( + default="gpt-4o", + description=( + "Model ID used to generate the response, like gpt-4o or o3. " + "OpenAI offers a wide range of models with different capabilities, performance characteristics, and price points." + ), + ) - # Sampling parameters - temperature: float | None = Field( + input: str = Field( default=None, - ge=0.0, - le=2.0, - description="Sampling temperature between 0.0 and 2.0", - ) - max_output_tokens: int | None = Field( - default=None, gt=0, description="Maximum tokens to generate (Responses API uses max_output_tokens)" + description=( + "Text used to generate a response. " + "Can be a simple text string (equivalent to a user role message), or a list of input items with different content types." + ), ) - top_p: float | None = Field( - default=None, ge=0.0, le=1.0, description="Nucleus sampling parameter" + + # Conversation + conversation: str | None = Field( + default=None, + description=( + "The conversation that this response belongs to. Items from this conversation are prepended to input_items. " + "Can be a conversation ID (string) or a conversation object. Defaults to null." + ), ) - # Advanced OpenAI-specific parameters - reasoning_effort: Literal["low", "medium", "high"] | None = Field( - default=None, description="Reasoning effort level for o-series models" + previous_response_id: str | None = Field( + default=None, + description=( + "The unique ID of the previous response to the model. Use this to create multi-turn conversations. " + "Cannot be used in conjunction with conversation." + ), ) - reasoning_generate_summary: bool | None = Field( - default=None, description="Whether to generate reasoning summary for o-series models" + + # Instructions & Context + + instructions: str | None = Field( + default=None, + description=( + "A system (or developer) message inserted into the model's context. " + "When using with previous_response_id, the instructions from a previous response will not be carried over." + ), ) - text_verbosity: Literal["low", "medium", "high"] | None = Field( - default=None, description="Text verbosity level" + include: Literal["file_search_call.results"] | None = Field( + default=None, + description=( + "Specify additional output data to include in the model response. " + "Currently supported values are: " + "file_search_call.results, " + ), ) - # Conversation and state management - instructions: str | None = Field( - default=None, description="System instructions for the model" + # Sampling Parameters + + temperature: float | None = Field( + default=1.0, + ge=0.0, + le=2.0, + description=( + "What sampling temperature to use, between 0 and 2. " + "Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. " + "We generally recommend altering this or top_p but not both." + ), ) - previous_response_id: str | None = Field( - default=None, description="Previous response ID for conversation continuity" + + top_p: float | None = Field( + default=1.0, + ge=0.0, + le=1.0, + description=( + "An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. " + "So 0.1 means only the tokens comprising the top 10% probability mass are considered. " + "We generally recommend altering this or temperature but not both." + ), ) - store: bool | None = Field( - default=None, description="Whether to store the conversation with OpenAI" + + max_output_tokens: int | None = Field( + default=None, + gt=0, + description=( + "An upper bound for the number of tokens that can be generated for a response, " + "including visible output tokens and reasoning tokens." + ), ) - # Tool configuration - parallel_tool_calls: bool | None = Field( - default=None, description="Whether to enable parallel tool calls" + # Tools (File Search Only) + + tools: list[FileSearchTool] | None = Field( + default=None, + description="File search tools for searching through vector stores.", ) - # Vector store file search - vector_store_id: str | None = Field( - default=None, description="Vector store ID for file search" + # Response Configuration + + text: TextConfig | None = Field( + default=None, + description=( + "Configuration options for a text response from the model. " + "Can be plain text or structured JSON data." + ), ) - max_num_results: int | None = Field( - default=None, ge=1, le=50, description="Max file search results" + + reasoning: ReasoningConfig | None = Field( + default=None, + description=( + "Configuration options for reasoning models (gpt-5 and o-series models only). " + "Controls reasoning effort and summary generation." + ), ) - # Response configuration truncation: Literal["auto", "disabled"] | None = Field( - default=None, description="Truncation strategy for long contexts" + default="disabled", + description=( + "The truncation strategy to use for the model response. " + "'auto': If input exceeds context window, truncate by dropping items from beginning. " + "'disabled' (default): Request fails with 400 error if input exceeds context window." + ), ) - metadata: dict[str, str] | None = Field( - default=None, description="Custom metadata for the request" + + # Advanced Options + + prompt_cache_key: str | None = Field( + default=None, + description=( + "Used by OpenAI to cache responses for similar requests to optimize cache hit rates. " + ), ) @model_validator(mode="after") - def validate_vector_store(self) -> "OpenAIResponseSpec": - """Validate vector store configuration. - - Ensures that if vector_store_id is provided, it's a valid non-empty string. + def validate_conversation_previous_response_exclusivity( + self, + ) -> "OpenAIResponseSpec": + """Validate that conversation and previous_response_id are not used together. Returns: Self for method chaining Raises: - ValueError: If vector_store_id is invalid + ValueError: If both conversation and previous_response_id are provided """ - if self.vector_store_id is not None and not self.vector_store_id.strip(): - raise ValueError("vector_store_id cannot be empty") + if self.conversation is not None and self.previous_response_id is not None: + raise ValueError( + "Cannot use both 'conversation' and 'previous_response_id' parameters together" + ) + return self - def to_api_params(self) -> dict[str, Any]: - """Convert to OpenAI Responses API parameters. + @model_validator(mode="after") + def validate_temperature_top_p(self) -> "OpenAIResponseSpec": + """Warn if both temperature and top_p are altered from defaults. - Transforms this spec into the format expected by OpenAI's Responses API. - Uses the official API contract with correct parameter names and structure. + Note: This is a soft validation (warning), not a hard error. Returns: - Dictionary of API parameters ready for openai.responses.create() + Self for method chaining """ - # Base parameters - always required - params: dict[str, Any] = { - "model": self.model, - "input": [{"role": "user", "content": self.prompt}], - } - - # Add optional sampling parameters - if self.temperature is not None: - params["temperature"] = self.temperature - - if self.max_output_tokens is not None: - params["max_output_tokens"] = self.max_output_tokens - - if self.top_p is not None: - params["top_p"] = self.top_p - - # Add conversation and state management - if self.instructions is not None: - params["instructions"] = self.instructions - - if self.previous_response_id is not None: - params["previous_response_id"] = self.previous_response_id - - if self.store is not None: - params["store"] = self.store - - # Add advanced OpenAI configurations - if self.reasoning_effort is not None or self.reasoning_generate_summary is not None: - reasoning_config: dict[str, Any] = {} - if self.reasoning_effort is not None: - reasoning_config["effort"] = self.reasoning_effort - if self.reasoning_generate_summary is not None: - reasoning_config["generate_summary"] = self.reasoning_generate_summary - params["reasoning"] = reasoning_config - - if self.text_verbosity is not None: - params["text"] = {"verbosity": self.text_verbosity} - - # Add tool configuration - if self.parallel_tool_calls is not None: - params["parallel_tool_calls"] = self.parallel_tool_calls - - # Add vector store file search if provided - if self.vector_store_id: - params["tools"] = [ - { - "type": "file_search", - "vector_store_ids": [self.vector_store_id], - "max_num_results": self.max_num_results or 20, - } - ] - params["include"] = ["file_search_call.results"] - - # Add response configuration - if self.truncation is not None: - params["truncation"] = self.truncation - - if self.metadata is not None: - params["metadata"] = self.metadata - - return params - - @classmethod - def from_llm_request(cls, request: LLMCallRequest) -> "OpenAIResponseSpec": - """Create OpenAIResponseSpec from LLMCallRequest. - - Convenience method to convert from the unified API request format. - Maps the provider-agnostic max_tokens to OpenAI's max_output_tokens. - - Args: - request: Unified LLM call request + # OpenAI recommends altering temperature OR top_p, but not both + # We'll allow it but could log a warning in production + if ( + self.temperature is not None + and self.temperature != 1.0 + and self.top_p is not None + and self.top_p != 1.0 + ): + # In a production setting, you might want to log a warning here + pass - Returns: - OpenAIResponseSpec instance - """ - model_spec = request.llm.llm_model_spec - - return cls( - model=model_spec.model, - prompt=request.llm.prompt, - temperature=model_spec.temperature, - max_output_tokens=model_spec.max_tokens, # Map max_tokens to max_output_tokens - top_p=model_spec.top_p, - reasoning_effort=model_spec.reasoning_effort, - text_verbosity=model_spec.text_verbosity, - vector_store_id=request.llm.vector_store_id, - max_num_results=request.max_num_results, - ) + return self From 2d191ee79e7cdfe06beeb7b50665b2d79bc6320e Mon Sep 17 00:00:00 2001 From: Aviraj <100823015+avirajsingh7@users.noreply.github.com> Date: Wed, 22 Oct 2025 12:07:20 +0530 Subject: [PATCH 08/15] Define intial unified api contract --- backend/app/api/routes/llm.py | 6 +- backend/app/models/__init__.py | 2 - backend/app/models/llm/__init__.py | 8 -- backend/app/models/llm/request.py | 133 ++++++++++++++++-- backend/app/services/llm/jobs.py | 33 ++--- backend/app/services/llm/orchestrator.py | 6 +- backend/app/services/llm/providers/base.py | 2 +- backend/app/services/llm/providers/openai.py | 7 +- .../app/services/llm/specs/openai/response.py | 105 ++++++++------ 9 files changed, 201 insertions(+), 101 deletions(-) diff --git a/backend/app/api/routes/llm.py b/backend/app/api/routes/llm.py index e1d3ed04..29ae0b46 100644 --- a/backend/app/api/routes/llm.py +++ b/backend/app/api/routes/llm.py @@ -2,18 +2,18 @@ from fastapi import APIRouter -from app.api.deps import AuthContext, SessionDep +from app.api.deps import AuthContextDep, SessionDep from app.models.llm import LLMCallRequest from app.services.llm.jobs import start_job from app.utils import APIResponse logger = logging.getLogger(__name__) -router = APIRouter(tags=["llm"]) +router = APIRouter(tags=["LLM"]) @router.post("/llm/call") async def llm_call( - request: LLMCallRequest, _session: SessionDep, _current_user: AuthContext + _current_user: AuthContextDep, _session: SessionDep, request: LLMCallRequest ): """ Endpoint to initiate an LLM call as a background job. diff --git a/backend/app/models/__init__.py b/backend/app/models/__init__.py index 10f9cbec..791da4ba 100644 --- a/backend/app/models/__init__.py +++ b/backend/app/models/__init__.py @@ -51,8 +51,6 @@ from .llm import ( LLMCallRequest, LLMCallResponse, - LLMConfig, - LLMModelSpec, ) from .message import Message diff --git a/backend/app/models/llm/__init__.py b/backend/app/models/llm/__init__.py index a1807e83..de19cb90 100644 --- a/backend/app/models/llm/__init__.py +++ b/backend/app/models/llm/__init__.py @@ -3,11 +3,6 @@ This module provides all data models for LLM functionality including requests, responses, configurations, and model specifications. """ - -from app.models.llm.config import ( - LLMConfig, - LLMModelSpec, -) from app.models.llm.request import LLMCallRequest from app.models.llm.response import LLMCallResponse @@ -15,7 +10,4 @@ # Request/Response models "LLMCallRequest", "LLMCallResponse", - # Configuration models - "LLMConfig", - "LLMModelSpec", ] diff --git a/backend/app/models/llm/request.py b/backend/app/models/llm/request.py index f8ef2da9..f50b7c6e 100644 --- a/backend/app/models/llm/request.py +++ b/backend/app/models/llm/request.py @@ -1,23 +1,126 @@ -"""LLM request models. +from sqlmodel import SQLModel, Field +from typing import Any, Literal +from pydantic import model_validator -This module contains request models for LLM API calls. -""" -from sqlmodel import SQLModel +class ReasoningOptions(SQLModel): + """Configuration for reasoning models (o-series, gpt-5).""" -from app.models.llm.config import LLMConfig + effort: Literal["minimal", "low", "medium", "high"] | None = Field( + default="medium", + description=( + "Constrains effort on reasoning for reasoning models. " + "Reducing reasoning effort can result in faster responses and fewer tokens used. " + "Note: The gpt-5-pro model defaults to (and only supports) high reasoning effort." + ), + ) + summary: Literal["auto", "concise", "detailed"] | None = Field( + default=None, + description=( + "A summary of the reasoning performed by the model. " + "This can be useful for debugging and understanding the model's reasoning process." + ), + ) -class LLMCallRequest(SQLModel): - """Request model for /v1/llm/call endpoint. +class CompletionConfig(SQLModel): + """Generic LLM completion configuration supporting multiple providers.""" + + provider: Literal["openai"] = Field( + default="openai", description="LLM provider to use" + ) + model: str = Field( + default="gpt-4o", + min_length=1, + description="Model name/identifier to use for completion", + ) + + input: str = Field( + ..., min_length=1, description="User input text/prompt for the model" + ) + + # RAG + vector_store_ids: list[str] | None = Field( + default=None, description="Vector store IDs to search through." + ) + max_num_results: int | None = Field( + default=None, + ge=1, + le=50, + description="Maximum number of results for RAG. Applies when vector_store_ids are provided.", + ) + + # Context + instructions: str | None = Field( + default=None, description="System instructions/prompt for the model" + ) + + conversation_id: str | None = Field( + default=None, description="Conversation ID to continue existing conversation" + ) + + previous_response_id: str | None = Field( + default=None, + description="ID of previous response for multi-turn conversations (mutually exclusive with conversation)", + ) + + # Response Configuration + reasoning: ReasoningOptions | None = Field( + default=None, + description="Reasoning configuration for models with reasoning capabilities (o-series, etc.)", + ) - This model decouples LLM calls from the assistants table, - allowing dynamic configuration per request. + # Sampling Parameters + temperature: float | None = Field( + default=None, + ge=0.0, + le=2.0, + description=( + "Sampling temperature (0-2): higher = more random, lower = more deterministic" + "We generally recommend altering this or top_p but not both." + ), + ) - Attributes: - llm: LLM configuration containing model spec and prompt - max_num_results: Number of results to return from vector store file search - """ + top_p: float | None = Field( + default=None, + ge=0.0, + le=1.0, + description=( + "Nucleus sampling: consider tokens with top_p probability mass" + "We generally recommend altering this or temperature but not both." + ), + ) + + max_output_tokens: int | None = Field( + default=None, gt=0, description="Maximum tokens to generate in the response." + ) + + # Validators + @model_validator(mode="after") + def validate_conversation_exclusivity(self) -> "CompletionConfig": + """Ensure conversation_id and previous_response_id are not used together.""" + if self.conversation_id is not None and self.previous_response_id is not None: + raise ValueError( + "Cannot use both 'conversation_id' and 'previous_response_id' together" + ) + return self + + @model_validator(mode="after") + def vector_store_list_not_empty(self) -> "CompletionConfig": + """Ensure vector_store_ids is not an empty list if provided.""" + if self.vector_store_ids is not None and len(self.vector_store_ids) == 0: + raise ValueError("'vector_store_ids' cannot be an empty list") + return self + + +class LLMConfig(SQLModel): + completion: CompletionConfig = Field(..., description="Completion configuration") + + +class LLMCallRequest(SQLModel): + """User-facing API request for LLM completion.""" - llm: LLMConfig - max_num_results: int = 20 # For vector store file search + config: LLMConfig + metadata: dict[str, Any] | None = Field( + default=None, description="Optional metadata for tracking and context" + ) diff --git a/backend/app/services/llm/jobs.py b/backend/app/services/llm/jobs.py index 5149531d..a4553ab2 100644 --- a/backend/app/services/llm/jobs.py +++ b/backend/app/services/llm/jobs.py @@ -5,12 +5,12 @@ from sqlmodel import Session from asgi_correlation_id import correlation_id -from app.celery.utils import start_high_priority_job from app.crud import JobCrud from app.core.db import engine -from app.models import JobType, JobStatus, JobUpdate -from app.models.llm import LLMCallRequest, LLMCallResponse +from app.models import JobType, JobStatus, JobUpdate, LLMCallRequest, LLMCallResponse + +from app.celery.utils import start_high_priority_job from app.services.llm.orchestrator import execute_llm_call from app.utils import get_openai_client @@ -63,9 +63,12 @@ def execute_job( request = LLMCallRequest(**request_data) job_id_uuid = UUID(job_id) + provider = request.config.completion.provider + model = request.config.completion.model + logger.info( f"[execute_job] Starting LLM job execution | job_id={job_id}, task_id={task_id}, " - f"provider={request.llm.provider}, model={request.llm.llm_model_spec.model}" + f"provider={provider}, model={model}" ) try: @@ -76,26 +79,12 @@ def execute_job( job_id=job_id_uuid, job_update=JobUpdate(status=JobStatus.PROCESSING) ) - provider_type = request.llm.llm_model_spec.provider - - if provider_type == "openai": + if provider == "openai": client = get_openai_client(session, organization_id, project_id) else: - error_msg = f"Provider '{provider_type}' is not yet supported" - logger.error(f"[execute_job] {error_msg} | job_id={job_id}") - job_crud = JobCrud(session=session) - job_crud.update( - job_id=job_id_uuid, - job_update=JobUpdate( - status=JobStatus.FAILED, error_message=error_msg - ), - ) - return None + raise ValueError(f"Unsupported provider: {provider}") - response, error = execute_llm_call( - request=request, - client=client, - ) + response, error = execute_llm_call(request=request, client=client) with Session(engine) as session: job_crud = JobCrud(session=session) @@ -107,7 +96,7 @@ def execute_job( f"[execute_job] Successfully completed LLM job | job_id={job_id}, " f"response_id={response.response_id}, tokens={response.total_tokens}" ) - return response + return response.model_dump() else: job_crud.update( job_id=job_id_uuid, diff --git a/backend/app/services/llm/orchestrator.py b/backend/app/services/llm/orchestrator.py index ce1de4c1..af639fdb 100644 --- a/backend/app/services/llm/orchestrator.py +++ b/backend/app/services/llm/orchestrator.py @@ -8,7 +8,7 @@ import logging from typing import Any -from app.models.llm import LLMCallRequest, LLMCallResponse +from app.models import LLMCallRequest, LLMCallResponse from app.services.llm.providers.factory import ProviderFactory logger = logging.getLogger(__name__) @@ -32,11 +32,11 @@ def execute_llm_call( - If successful: (LLMCallResponse, None) - If failed: (None, error_message) """ - provider_type = request.llm.llm_model_spec.provider + provider_type = request.config.completion.provider logger.info( f"[execute_llm_call] Processing LLM call for provider: {provider_type}, " - f"model: {request.llm.llm_model_spec.model}" + f"model: {request.config.completion.model}" ) try: diff --git a/backend/app/services/llm/providers/base.py b/backend/app/services/llm/providers/base.py index 7492f5fe..acdd5825 100644 --- a/backend/app/services/llm/providers/base.py +++ b/backend/app/services/llm/providers/base.py @@ -8,7 +8,7 @@ from abc import ABC, abstractmethod from typing import Any -from app.models.llm import LLMCallRequest, LLMCallResponse +from app.models import LLMCallRequest, LLMCallResponse class BaseProvider(ABC): diff --git a/backend/app/services/llm/providers/openai.py b/backend/app/services/llm/providers/openai.py index 5af7cddd..a80d444c 100644 --- a/backend/app/services/llm/providers/openai.py +++ b/backend/app/services/llm/providers/openai.py @@ -17,7 +17,6 @@ from app.models.llm import LLMCallRequest, LLMCallResponse from app.services.llm.providers.base import BaseProvider from app.services.llm.specs import OpenAIResponseSpec -from app.utils import handle_openai_error logger = logging.getLogger(__name__) @@ -109,12 +108,13 @@ def execute( try: # Create and validate OpenAI spec from request - spec = OpenAIResponseSpec.from_llm_request(request) + spec = OpenAIResponseSpec.from_completion_config(request.config.completion) # Convert to API parameters (validation happens during spec creation) params = spec.to_api_params() logger.info(f"[OpenAIProvider] Making OpenAI call with model: {spec.model}") + response = self.client.responses.create(**params) # Extract message text from response.output array @@ -148,6 +148,9 @@ def execute( return None, error_message except openai.OpenAIError as e: + # imported here to avoid circular imports + from app.utils import handle_openai_error + error_message = handle_openai_error(e) logger.error( f"[OpenAIProvider] OpenAI API error: {error_message}", exc_info=True diff --git a/backend/app/services/llm/specs/openai/response.py b/backend/app/services/llm/specs/openai/response.py index e6532ae2..491dd9c4 100644 --- a/backend/app/services/llm/specs/openai/response.py +++ b/backend/app/services/llm/specs/openai/response.py @@ -7,10 +7,13 @@ """ from typing import Any, Literal +import typing -from pydantic import Field, field_validator, model_validator +from pydantic import Field, model_validator from sqlmodel import SQLModel +from app.models.llm.request import CompletionConfig + class ReasoningConfig(SQLModel): """Configuration options for reasoning models (gpt-5 and o-series models only).""" @@ -32,42 +35,6 @@ class ReasoningConfig(SQLModel): ) -class TextFormatConfig(SQLModel): - """An object specifying the format that the model must output.""" - - type: Literal["text", "json_object", "json_schema"] = Field( - description=( - "The format type. " - "'text': Plain text output. " - "'json_object': Older JSON mode (not recommended for newer models). " - "'json_schema': Structured Outputs with JSON schema validation (recommended)." - ) - ) - json_schema: dict[str, Any] | None = Field( - default=None, - description="The JSON schema to validate against when type is 'json_schema'.", - ) - - -class TextConfig(SQLModel): - """Configuration options for text response from the model.""" - - format: TextFormatConfig | None = Field( - default=None, - description=( - "An object specifying the format that the model must output. " - "Default format is { 'type': 'text' }." - ), - ) - verbosity: Literal["low", "medium", "high"] | None = Field( - default="medium", - description=( - "Constrains the verbosity of the model's response. " - "Lower values result in more concise responses, higher values result in more verbose responses." - ), - ) - - class FileSearchTool(SQLModel): """Tool configuration for searching through vector stores.""" @@ -189,14 +156,6 @@ class OpenAIResponseSpec(SQLModel): # Response Configuration - text: TextConfig | None = Field( - default=None, - description=( - "Configuration options for a text response from the model. " - "Can be plain text or structured JSON data." - ), - ) - reasoning: ReasoningConfig | None = Field( default=None, description=( @@ -263,3 +222,59 @@ def validate_temperature_top_p(self) -> "OpenAIResponseSpec": pass return self + + @classmethod + def from_completion_config(cls, config: CompletionConfig) -> "OpenAIResponseSpec": + """Convert generic CompletionConfig to OpenAI ResponseSpec. + + Args: + config: Generic completion configuration + + Returns: + OpenAI-specific response specification + """ + # Build tools list if vector stores are provided + tools = None + if config.vector_store_ids: + tools = [ + FileSearchTool( + vector_store_ids=config.vector_store_ids, + max_num_results=config.max_num_results, + ) + ] + + # Convert ReasoningOptions to ReasoningConfig if provided + reasoning = None + if config.reasoning: + reasoning = ReasoningConfig( + effort=config.reasoning.effort, + summary=config.reasoning.summary, + ) + + return cls( + model=config.model, + input=config.input, + instructions=config.instructions, + conversation=config.conversation_id, + previous_response_id=config.previous_response_id, + temperature=config.temperature, + top_p=config.top_p, + max_output_tokens=config.max_output_tokens, + tools=tools, + reasoning=reasoning, + ) + + def to_api_params(self) -> dict[str, Any]: + """Convert OpenAIResponseSpec to OpenAI API parameters. + + Converts the spec to a dictionary suitable for passing to the OpenAI API, + excluding None values and properly formatting nested objects. + + Returns: + Dictionary of API parameters ready to be passed to openai.responses.create() + """ + params = self.model_dump(exclude_none=True) + + print(params) + + return params From 6e359d5a1b1a1d4df0d341dbd8137df2bcb24aca Mon Sep 17 00:00:00 2001 From: Aviraj <100823015+avirajsingh7@users.noreply.github.com> Date: Thu, 23 Oct 2025 16:36:29 +0530 Subject: [PATCH 09/15] Use flexible json for config --- backend/app/models/llm/__init__.py | 2 +- backend/app/models/llm/config.py | 51 -------- backend/app/models/llm/request.py | 121 +++--------------- backend/app/services/llm/jobs.py | 7 +- backend/app/services/llm/orchestrator.py | 6 +- backend/app/services/llm/providers/factory.py | 5 +- backend/app/services/llm/providers/openai.py | 75 ++--------- 7 files changed, 44 insertions(+), 223 deletions(-) delete mode 100644 backend/app/models/llm/config.py diff --git a/backend/app/models/llm/__init__.py b/backend/app/models/llm/__init__.py index de19cb90..4360b253 100644 --- a/backend/app/models/llm/__init__.py +++ b/backend/app/models/llm/__init__.py @@ -3,7 +3,7 @@ This module provides all data models for LLM functionality including requests, responses, configurations, and model specifications. """ -from app.models.llm.request import LLMCallRequest +from app.models.llm.request import LLMCallRequest, CompletionConfig from app.models.llm.response import LLMCallResponse __all__ = [ diff --git a/backend/app/models/llm/config.py b/backend/app/models/llm/config.py deleted file mode 100644 index e30bc93b..00000000 --- a/backend/app/models/llm/config.py +++ /dev/null @@ -1,51 +0,0 @@ -"""LLM configuration models. - -This module contains all configuration-related models for LLM requests, -including model specifications and advanced configuration options. -""" - -from typing import Literal - -from sqlmodel import SQLModel - - -class LLMModelSpec(SQLModel): - """Specification for the LLM model and its parameters. - - This contains the actual model configuration that will be sent to the provider. - Supports both standard models and advanced configurations. - - Attributes: - model: Model identifier (e.g., "gpt-4", "claude-3-opus") - provider: Provider type (openai, anthropic, google, azure) - temperature: Sampling temperature (0.0-2.0) - reasoning_effort: Reasoning effort level for o-series models ("low", "medium", "high") - text_verbosity: Text verbosity level ("low", "medium", "high") - max_tokens: Maximum number of tokens to generate - top_p: Nucleus sampling parameter (0.0-1.0) - """ - - model: str - provider: str = "openai" - temperature: float | None = None - reasoning_effort: Literal["low", "medium", "high"] | None = None - text_verbosity: Literal["low", "medium", "high"] | None = None - max_tokens: int | None = None - top_p: float | None = None - - -class LLMConfig(SQLModel): - """LLM configuration containing model specification and prompt. - - This wraps the model spec and can be extended with additional - provider-agnostic configuration in the future. - - Attributes: - prompt: The user's input prompt - vector_store_id: Vector store ID for RAG functionality - llm_model_spec: Model specification and parameters - """ - - prompt: str - vector_store_id: str | None = None - llm_model_spec: LLMModelSpec diff --git a/backend/app/models/llm/request.py b/backend/app/models/llm/request.py index f50b7c6e..fab477af 100644 --- a/backend/app/models/llm/request.py +++ b/backend/app/models/llm/request.py @@ -1,126 +1,45 @@ from sqlmodel import SQLModel, Field from typing import Any, Literal -from pydantic import model_validator -class ReasoningOptions(SQLModel): - """Configuration for reasoning models (o-series, gpt-5).""" +# Query Parameters (dynamic per request) +class QueryParams(SQLModel): + """Query-specific parameters for each LLM call.""" - effort: Literal["minimal", "low", "medium", "high"] | None = Field( - default="medium", - description=( - "Constrains effort on reasoning for reasoning models. " - "Reducing reasoning effort can result in faster responses and fewer tokens used. " - "Note: The gpt-5-pro model defaults to (and only supports) high reasoning effort." - ), - ) - summary: Literal["auto", "concise", "detailed"] | None = Field( + input: str = Field(..., min_length=1, description="User input text/prompt") + conversation_id: str | None = Field( default=None, - description=( - "A summary of the reasoning performed by the model. " - "This can be useful for debugging and understanding the model's reasoning process." - ), + description="Optional conversation ID. If not provided, a new conversation will be created.", ) class CompletionConfig(SQLModel): - """Generic LLM completion configuration supporting multiple providers.""" + """Completion configuration with provider and parameters.""" provider: Literal["openai"] = Field( default="openai", description="LLM provider to use" ) - model: str = Field( - default="gpt-4o", - min_length=1, - description="Model name/identifier to use for completion", - ) - - input: str = Field( - ..., min_length=1, description="User input text/prompt for the model" + params: dict[str, Any] = Field( + ..., description="Provider-specific parameters (schema varies by provider)" ) - # RAG - vector_store_ids: list[str] | None = Field( - default=None, description="Vector store IDs to search through." - ) - max_num_results: int | None = Field( - default=None, - ge=1, - le=50, - description="Maximum number of results for RAG. Applies when vector_store_ids are provided.", - ) - # Context - instructions: str | None = Field( - default=None, description="System instructions/prompt for the model" - ) +class LLMCallConfig(SQLModel): + """Complete configuration for LLM call including all processing stages.""" - conversation_id: str | None = Field( - default=None, description="Conversation ID to continue existing conversation" - ) - - previous_response_id: str | None = Field( - default=None, - description="ID of previous response for multi-turn conversations (mutually exclusive with conversation)", - ) - - # Response Configuration - reasoning: ReasoningOptions | None = Field( - default=None, - description="Reasoning configuration for models with reasoning capabilities (o-series, etc.)", - ) - - # Sampling Parameters - temperature: float | None = Field( - default=None, - ge=0.0, - le=2.0, - description=( - "Sampling temperature (0-2): higher = more random, lower = more deterministic" - "We generally recommend altering this or top_p but not both." - ), - ) - - top_p: float | None = Field( - default=None, - ge=0.0, - le=1.0, - description=( - "Nucleus sampling: consider tokens with top_p probability mass" - "We generally recommend altering this or temperature but not both." - ), - ) - - max_output_tokens: int | None = Field( - default=None, gt=0, description="Maximum tokens to generate in the response." - ) - - # Validators - @model_validator(mode="after") - def validate_conversation_exclusivity(self) -> "CompletionConfig": - """Ensure conversation_id and previous_response_id are not used together.""" - if self.conversation_id is not None and self.previous_response_id is not None: - raise ValueError( - "Cannot use both 'conversation_id' and 'previous_response_id' together" - ) - return self - - @model_validator(mode="after") - def vector_store_list_not_empty(self) -> "CompletionConfig": - """Ensure vector_store_ids is not an empty list if provided.""" - if self.vector_store_ids is not None and len(self.vector_store_ids) == 0: - raise ValueError("'vector_store_ids' cannot be an empty list") - return self - - -class LLMConfig(SQLModel): completion: CompletionConfig = Field(..., description="Completion configuration") + # Future additions: + # classifier: ClassifierConfig | None = None + # pre_filter: PreFilterConfig | None = None class LLMCallRequest(SQLModel): """User-facing API request for LLM completion.""" - config: LLMConfig - metadata: dict[str, Any] | None = Field( - default=None, description="Optional metadata for tracking and context" + query: QueryParams = Field(..., description="Query-specific parameters") + config: LLMCallConfig = Field( + ..., description="Configuration for the LLM call" + ) + callback_url: str | None = Field( + default=None, description="Webhook URL for async response delivery" ) diff --git a/backend/app/services/llm/jobs.py b/backend/app/services/llm/jobs.py index a4553ab2..f613920f 100644 --- a/backend/app/services/llm/jobs.py +++ b/backend/app/services/llm/jobs.py @@ -60,11 +60,14 @@ def execute_job( task_instance, ) -> LLMCallResponse | None: """Celery task to process an LLM request asynchronously.""" + + request = LLMCallRequest(**request_data) job_id_uuid = UUID(job_id) - provider = request.config.completion.provider - model = request.config.completion.model + config = request.config + provider = config.completion.provider + model = config.completion.params.get("model", "N/A") logger.info( f"[execute_job] Starting LLM job execution | job_id={job_id}, task_id={task_id}, " diff --git a/backend/app/services/llm/orchestrator.py b/backend/app/services/llm/orchestrator.py index af639fdb..e2402658 100644 --- a/backend/app/services/llm/orchestrator.py +++ b/backend/app/services/llm/orchestrator.py @@ -32,12 +32,8 @@ def execute_llm_call( - If successful: (LLMCallResponse, None) - If failed: (None, error_message) """ - provider_type = request.config.completion.provider - logger.info( - f"[execute_llm_call] Processing LLM call for provider: {provider_type}, " - f"model: {request.config.completion.model}" - ) + provider_type = request.config.completion.provider try: # Create the appropriate provider using the factory diff --git a/backend/app/services/llm/providers/factory.py b/backend/app/services/llm/providers/factory.py index bf2881a4..47a93376 100644 --- a/backend/app/services/llm/providers/factory.py +++ b/backend/app/services/llm/providers/factory.py @@ -49,12 +49,13 @@ def create_provider(cls, provider_type: str, client: Any) -> BaseProvider: if provider_class is None: supported = cls.get_supported_providers() + logger.error( + f"[ProviderFactory] Unsupported provider type requested: {provider_type}" + ) raise ValueError( f"Provider '{provider_type}' is not supported. " f"Supported providers: {', '.join(supported)}" ) - - logger.info(f"[ProviderFactory] Creating {provider_type} provider instance") return provider_class(client=client) @classmethod diff --git a/backend/app/services/llm/providers/openai.py b/backend/app/services/llm/providers/openai.py index a80d444c..b0b751c7 100644 --- a/backend/app/services/llm/providers/openai.py +++ b/backend/app/services/llm/providers/openai.py @@ -4,7 +4,7 @@ including support for standard models, o-series models with reasoning, and file search capabilities. -Uses OpenAIResponseSpec for parameter validation and API conversion. +Directly passes user configuration to OpenAI API without transformation. """ import logging @@ -14,9 +14,8 @@ from openai.types.responses.response import Response from pydantic import ValidationError -from app.models.llm import LLMCallRequest, LLMCallResponse +from app.models.llm import CompletionConfig, LLMCallResponse, LLMCallRequest from app.services.llm.providers.base import BaseProvider -from app.services.llm.specs import OpenAIResponseSpec logger = logging.getLogger(__name__) @@ -30,7 +29,8 @@ class OpenAIProvider(BaseProvider): - Text configuration for verbosity control - Vector store file search integration - Uses OpenAIResponseSpec for parameter validation and conversion. + Directly passes user configuration to OpenAI API. + User is responsible for providing valid OpenAI parameters. """ def __init__(self, client: OpenAI): @@ -42,58 +42,12 @@ def __init__(self, client: OpenAI): super().__init__(client) self.client = client - def _extract_message_from_output(self, output: list) -> str: - """Extract message text from response.output array. - - The Responses API returns output as a list that can contain various types: - - ResponseOutputMessage: Contains the assistant's text message - - ResponseFileSearchToolCall: File search results - - ResponseFunctionToolCall: Function call results - - ResponseReasoningItem: Reasoning traces - - etc. - - Args: - output: List of output items from the response - - Returns: - The extracted message text, or empty string if no message found - - Raises: - ValueError: If output format is unexpected - """ - if not output: - logger.warning("[OpenAIProvider] Empty output array in response") - return "" - - # Find the first ResponseOutputMessage in the output - for item in output: - # Check if it's a message type (has 'role' and 'content' attributes) - if hasattr(item, "type") and item.type == "message": - if hasattr(item, "content"): - # Content is a list of content items - if isinstance(item.content, list) and len(item.content) > 0: - # Get the first text content - first_content = item.content[0] - if hasattr(first_content, "text"): - return first_content.text - elif ( - hasattr(first_content, "type") - and first_content.type == "text" - ): - return getattr(first_content, "text", "") - return "" - - logger.warning( - f"[OpenAIProvider] No message found in output array with {len(output)} items" - ) - return "" - def execute( self, request: LLMCallRequest ) -> tuple[LLMCallResponse | None, str | None]: """Execute OpenAI API call. - Uses OpenAIResponseSpec to validate and convert the request to OpenAI format. + Directly passes the user's config params to OpenAI API along with input. Args: request: LLM call request with configuration @@ -107,25 +61,24 @@ def execute( error_message: str | None = None try: - # Create and validate OpenAI spec from request - spec = OpenAIResponseSpec.from_completion_config(request.config.completion) + completion_config = request.config.completion - # Convert to API parameters (validation happens during spec creation) - params = spec.to_api_params() + params = { + **completion_config.params, + } + params["input"] = request.query.input - logger.info(f"[OpenAIProvider] Making OpenAI call with model: {spec.model}") + # Add conversation_id if provided + if request.query.conversation_id: + params["conversation_id"] = request.query.conversation_id response = self.client.responses.create(**params) - # Extract message text from response.output array - # The output is a list that can contain various item types - message_text = self._extract_message_from_output(response.output) - # Build response llm_response = LLMCallResponse( status="success", response_id=response.id, - message=message_text, + message=response.output_text, model=response.model, input_tokens=response.usage.input_tokens, output_tokens=response.usage.output_tokens, From bb74ab61b8bdf38462b392b6ef2f8755ca4ce5f7 Mon Sep 17 00:00:00 2001 From: Aviraj <100823015+avirajsingh7@users.noreply.github.com> Date: Thu, 23 Oct 2025 19:58:43 +0530 Subject: [PATCH 10/15] Handle callback --- backend/app/models/llm/request.py | 4 +- backend/app/services/llm/jobs.py | 64 ++++++++++++++++++------------- 2 files changed, 39 insertions(+), 29 deletions(-) diff --git a/backend/app/models/llm/request.py b/backend/app/models/llm/request.py index fab477af..e26e6b81 100644 --- a/backend/app/models/llm/request.py +++ b/backend/app/models/llm/request.py @@ -37,9 +37,7 @@ class LLMCallRequest(SQLModel): """User-facing API request for LLM completion.""" query: QueryParams = Field(..., description="Query-specific parameters") - config: LLMCallConfig = Field( - ..., description="Configuration for the LLM call" - ) + config: LLMCallConfig = Field(..., description="Configuration for the LLM call") callback_url: str | None = Field( default=None, description="Webhook URL for async response delivery" ) diff --git a/backend/app/services/llm/jobs.py b/backend/app/services/llm/jobs.py index f613920f..f52380c3 100644 --- a/backend/app/services/llm/jobs.py +++ b/backend/app/services/llm/jobs.py @@ -1,6 +1,7 @@ import logging from uuid import UUID +from aiohttp import request from fastapi import HTTPException from sqlmodel import Session from asgi_correlation_id import correlation_id @@ -12,7 +13,7 @@ from app.celery.utils import start_high_priority_job from app.services.llm.orchestrator import execute_llm_call -from app.utils import get_openai_client +from app.utils import get_openai_client, send_callback, APIResponse logger = logging.getLogger(__name__) @@ -51,6 +52,26 @@ def start_job( return job.id +def handle_job_error(job_id: UUID, callback_url: str | None, error: str): + """Handle job failure uniformly callback, and DB update.""" + with Session(engine) as session: + job_crud = JobCrud(session=session) + + callback = APIResponse.failure_response(error=error) + if callback_url: + send_callback( + callback_url=callback_url, + data=callback.model_dump(), + ) + + job_crud.update( + job_id=job_id, + job_update=JobUpdate(status=JobStatus.FAILED, error_message=error), + ) + + return callback.model_dump() + + def execute_job( request_data: dict, project_id: int, @@ -61,9 +82,8 @@ def execute_job( ) -> LLMCallResponse | None: """Celery task to process an LLM request asynchronously.""" - request = LLMCallRequest(**request_data) - job_id_uuid = UUID(job_id) + job_id: UUID = UUID(job_id) config = request.config provider = config.completion.provider @@ -79,7 +99,7 @@ def execute_job( with Session(engine) as session: job_crud = JobCrud(session=session) job_crud.update( - job_id=job_id_uuid, job_update=JobUpdate(status=JobStatus.PROCESSING) + job_id=job_id, job_update=JobUpdate(status=JobStatus.PROCESSING) ) if provider == "openai": @@ -92,34 +112,26 @@ def execute_job( with Session(engine) as session: job_crud = JobCrud(session=session) if response: + callback = APIResponse.success_response(data=response) + send_callback( + callback_url=request.callback_url, + data=callback.model_dump(), + ) job_crud.update( - job_id=job_id_uuid, job_update=JobUpdate(status=JobStatus.SUCCESS) + job_id=job_id, job_update=JobUpdate(status=JobStatus.SUCCESS) ) logger.info( f"[execute_job] Successfully completed LLM job | job_id={job_id}, " f"response_id={response.response_id}, tokens={response.total_tokens}" ) - return response.model_dump() + return callback.model_dump() else: - job_crud.update( - job_id=job_id_uuid, - job_update=JobUpdate( - status=JobStatus.FAILED, - error_message=error or "Unknown error occurred", - ), - ) - logger.error( - f"[execute_job] Failed to execute LLM job | job_id={job_id}, error={error}" - ) - return None + return handle_job_error(job_id, request.callback_url, error) except Exception as e: - error_message = f"Unexpected error in LLM job execution: {str(e)}" - logger.error(f"[execute_job] {error_message} | job_id={job_id}", exc_info=True) - with Session(engine) as session: - job_crud = JobCrud(session=session) - job_crud.update( - job_id=job_id_uuid, - job_update=JobUpdate(status=JobStatus.FAILED, error_message=str(e)), - ) - raise + error = f"Unexpected error in LLM job execution: {str(e)}" + logger.error( + f"[execute_job] {error} | job_id={job_id}, task_id={task_id}", + exc_info=True, + ) + return handle_job_error(job_id, request.callback_url, error) From b6a3fd93475abafc271db667413f37d54ac053ec Mon Sep 17 00:00:00 2001 From: Aviraj <100823015+avirajsingh7@users.noreply.github.com> Date: Thu, 23 Oct 2025 21:02:51 +0530 Subject: [PATCH 11/15] Refactor LLM provider architecture: remove factory pattern, introduce registry for provider instantiation, and update OpenAI provider execution logic. --- backend/app/models/llm/__init__.py | 2 +- backend/app/services/llm/__init__.py | 17 ----- backend/app/services/llm/jobs.py | 21 +++--- backend/app/services/llm/orchestrator.py | 60 ---------------- .../app/services/llm/providers/__init__.py | 17 ++--- backend/app/services/llm/providers/base.py | 4 +- backend/app/services/llm/providers/factory.py | 68 ------------------- backend/app/services/llm/providers/openai.py | 27 ++++---- .../app/services/llm/providers/registry.py | 62 +++++++++++++++++ 9 files changed, 95 insertions(+), 183 deletions(-) delete mode 100644 backend/app/services/llm/orchestrator.py delete mode 100644 backend/app/services/llm/providers/factory.py create mode 100644 backend/app/services/llm/providers/registry.py diff --git a/backend/app/models/llm/__init__.py b/backend/app/models/llm/__init__.py index 4360b253..9242693c 100644 --- a/backend/app/models/llm/__init__.py +++ b/backend/app/models/llm/__init__.py @@ -3,7 +3,7 @@ This module provides all data models for LLM functionality including requests, responses, configurations, and model specifications. """ -from app.models.llm.request import LLMCallRequest, CompletionConfig +from app.models.llm.request import LLMCallRequest, CompletionConfig, QueryParams from app.models.llm.response import LLMCallResponse __all__ = [ diff --git a/backend/app/services/llm/__init__.py b/backend/app/services/llm/__init__.py index f454b459..55443f64 100644 --- a/backend/app/services/llm/__init__.py +++ b/backend/app/services/llm/__init__.py @@ -1,22 +1,5 @@ -# Main orchestration -from app.services.llm.orchestrator import execute_llm_call - # Providers from app.services.llm.providers import ( BaseProvider, - ProviderFactory, OpenAIProvider, ) - - -# Initialize model specs on module import -import app.services.llm.specs # noqa: F401 - -__all__ = [ - # Main entry point - "execute_llm_call", - # Providers - "BaseProvider", - "ProviderFactory", - "OpenAIProvider", -] diff --git a/backend/app/services/llm/jobs.py b/backend/app/services/llm/jobs.py index f52380c3..9e0d78ea 100644 --- a/backend/app/services/llm/jobs.py +++ b/backend/app/services/llm/jobs.py @@ -1,19 +1,18 @@ import logging from uuid import UUID -from aiohttp import request from fastapi import HTTPException from sqlmodel import Session from asgi_correlation_id import correlation_id -from app.crud import JobCrud +from app.crud.jobs import JobCrud +from app.utils import send_callback, APIResponse from app.core.db import engine from app.models import JobType, JobStatus, JobUpdate, LLMCallRequest, LLMCallResponse from app.celery.utils import start_high_priority_job -from app.services.llm.orchestrator import execute_llm_call -from app.utils import get_openai_client, send_callback, APIResponse +from app.services.llm.providers.registry import get_llm_provider logger = logging.getLogger(__name__) @@ -102,12 +101,16 @@ def execute_job( job_id=job_id, job_update=JobUpdate(status=JobStatus.PROCESSING) ) - if provider == "openai": - client = get_openai_client(session, organization_id, project_id) - else: - raise ValueError(f"Unsupported provider: {provider}") + provider_instance = get_llm_provider( + session=session, + provider_type=provider, + project_id=project_id, + organization_id=organization_id, + ) - response, error = execute_llm_call(request=request, client=client) + response, error = provider_instance.execute( + completion_config=config.completion, query=request.query + ) with Session(engine) as session: job_crud = JobCrud(session=session) diff --git a/backend/app/services/llm/orchestrator.py b/backend/app/services/llm/orchestrator.py deleted file mode 100644 index e2402658..00000000 --- a/backend/app/services/llm/orchestrator.py +++ /dev/null @@ -1,60 +0,0 @@ -"""Main LLM service orchestration. - -This module provides the main entry point for executing LLM calls. -It uses the provider factory pattern to route requests to the appropriate -provider implementation (OpenAI, Anthropic, etc.). -""" - -import logging -from typing import Any - -from app.models import LLMCallRequest, LLMCallResponse -from app.services.llm.providers.factory import ProviderFactory - -logger = logging.getLogger(__name__) - - -def execute_llm_call( - request: LLMCallRequest, - client: Any, -) -> tuple[LLMCallResponse | None, str | None]: - """Execute LLM call using the appropriate provider. - - This is the main orchestration function that routes requests to - provider-specific implementations. - - Args: - request: LLM call request with configuration (includes provider type) - client: Provider-specific client instance - - Returns: - Tuple of (response, error_message) - - If successful: (LLMCallResponse, None) - - If failed: (None, error_message) - """ - - provider_type = request.config.completion.provider - - try: - # Create the appropriate provider using the factory - provider = ProviderFactory.create_provider( - provider_type=provider_type, - client=client, - ) - - # Execute the LLM call through the provider - response, error = provider.execute(request) - - if response: - logger.info( - f"[execute_llm_call] Successfully generated response: {response.response_id}" - ) - else: - logger.error(f"[execute_llm_call] Failed to generate response: {error}") - - return response, error - - except Exception as e: - error_message = f"Unexpected error in LLM service: {str(e)}" - logger.error(f"[execute_llm_call] {error_message}", exc_info=True) - return None, error_message diff --git a/backend/app/services/llm/providers/__init__.py b/backend/app/services/llm/providers/__init__.py index 02841ce5..8abb5441 100644 --- a/backend/app/services/llm/providers/__init__.py +++ b/backend/app/services/llm/providers/__init__.py @@ -1,15 +1,8 @@ -"""LLM providers module. - -This module contains all provider implementations for different LLM services. -Currently supports OpenAI with an extensible factory pattern for future providers. -""" - from app.services.llm.providers.base import BaseProvider -from app.services.llm.providers.factory import ProviderFactory from app.services.llm.providers.openai import OpenAIProvider -__all__ = [ - "BaseProvider", - "ProviderFactory", - "OpenAIProvider", -] +from app.services.llm.providers.registry import ( + get_llm_provider, + get_supported_providers, + PROVIDER_REGISTRY, +) diff --git a/backend/app/services/llm/providers/base.py b/backend/app/services/llm/providers/base.py index acdd5825..1979b7df 100644 --- a/backend/app/services/llm/providers/base.py +++ b/backend/app/services/llm/providers/base.py @@ -8,7 +8,7 @@ from abc import ABC, abstractmethod from typing import Any -from app.models import LLMCallRequest, LLMCallResponse +from app.models.llm import CompletionConfig, LLMCallResponse, QueryParams class BaseProvider(ABC): @@ -34,7 +34,7 @@ def __init__(self, client: Any): @abstractmethod def execute( - self, request: LLMCallRequest + self, completion_config: CompletionConfig, query: QueryParams ) -> tuple[LLMCallResponse | None, str | None]: """Execute an LLM call using the provider. diff --git a/backend/app/services/llm/providers/factory.py b/backend/app/services/llm/providers/factory.py deleted file mode 100644 index 47a93376..00000000 --- a/backend/app/services/llm/providers/factory.py +++ /dev/null @@ -1,68 +0,0 @@ -"""Provider factory for creating LLM provider instances. - -This module provides a factory pattern for instantiating the appropriate -LLM provider based on the provider type specified in the request. -""" - -import logging -from typing import Any - -from app.services.llm.providers.base import BaseProvider -from app.services.llm.providers.openai import OpenAIProvider - -logger = logging.getLogger(__name__) - - -class ProviderFactory: - """Factory for creating provider instances. - - This class implements the factory pattern to instantiate the correct - provider based on the provider type. It maintains a registry of - available providers and their corresponding classes. - """ - - # Registry of provider types to their implementation classes - _PROVIDERS: dict[str, type[BaseProvider]] = { - "openai": OpenAIProvider, - # Future providers can be added here: - # "anthropic": AnthropicProvider, - # "google": GoogleProvider, - # "azure": AzureOpenAIProvider, - # "cohere": CohereProvider, - } - - @classmethod - def create_provider(cls, provider_type: str, client: Any) -> BaseProvider: - """Create a provider instance based on the provider type. - - Args: - provider_type: Type of provider (openai, anthropic, etc.) - client: Provider-specific client instance - - Returns: - Instance of the appropriate provider (BaseProvider) - - Raises: - ValueError: If the provider type is not supported - """ - provider_class = cls._PROVIDERS.get(provider_type) - - if provider_class is None: - supported = cls.get_supported_providers() - logger.error( - f"[ProviderFactory] Unsupported provider type requested: {provider_type}" - ) - raise ValueError( - f"Provider '{provider_type}' is not supported. " - f"Supported providers: {', '.join(supported)}" - ) - return provider_class(client=client) - - @classmethod - def get_supported_providers(cls) -> list[str]: - """Get list of supported provider types. - - Returns: - List of supported provider type strings - """ - return list(cls._PROVIDERS.keys()) diff --git a/backend/app/services/llm/providers/openai.py b/backend/app/services/llm/providers/openai.py index b0b751c7..ad2c7951 100644 --- a/backend/app/services/llm/providers/openai.py +++ b/backend/app/services/llm/providers/openai.py @@ -14,7 +14,12 @@ from openai.types.responses.response import Response from pydantic import ValidationError -from app.models.llm import CompletionConfig, LLMCallResponse, LLMCallRequest +from app.models.llm import ( + CompletionConfig, + LLMCallResponse, + LLMCallRequest, + QueryParams, +) from app.services.llm.providers.base import BaseProvider logger = logging.getLogger(__name__) @@ -43,7 +48,7 @@ def __init__(self, client: OpenAI): self.client = client def execute( - self, request: LLMCallRequest + self, completion_config: CompletionConfig, query: QueryParams ) -> tuple[LLMCallResponse | None, str | None]: """Execute OpenAI API call. @@ -61,16 +66,14 @@ def execute( error_message: str | None = None try: - completion_config = request.config.completion - params = { **completion_config.params, } - params["input"] = request.query.input + params["input"] = query.input # Add conversation_id if provided - if request.query.conversation_id: - params["conversation_id"] = request.query.conversation_id + if query.conversation_id: + params["conversation_id"] = query.conversation_id response = self.client.responses.create(**params) @@ -90,13 +93,9 @@ def execute( ) return llm_response, None - except ValidationError as e: - error_message = f"Configuration validation failed: {str(e)}" - logger.error(f"[OpenAIProvider] {error_message}", exc_info=True) - return None, error_message - - except ValueError as e: - error_message = f"Configuration validation failed: {str(e)}" + except TypeError as e: + # handle unexpected arguments gracefully + error_message = f"Invalid or unexpected parameter in Config: {str(e)}" logger.error(f"[OpenAIProvider] {error_message}", exc_info=True) return None, error_message diff --git a/backend/app/services/llm/providers/registry.py b/backend/app/services/llm/providers/registry.py new file mode 100644 index 00000000..4aee2791 --- /dev/null +++ b/backend/app/services/llm/providers/registry.py @@ -0,0 +1,62 @@ +"""Provider factory for creating LLM provider instances. + +This module provides a registry pattern for instantiating the appropriate +LLM provider based on the provider type specified in the request. +""" + +import logging +from typing import Any + +from sqlmodel import Session +from app.services.llm.providers.base import BaseProvider +from app.services.llm.providers.openai import OpenAIProvider + +logger = logging.getLogger(__name__) + + +# Registry of provider types to their implementation classes +PROVIDER_REGISTRY: dict[str, type[BaseProvider]] = { + "openai": OpenAIProvider, + # Future providers can be added here: + # "anthropic": AnthropicProvider, + # "google": GoogleProvider, +} + + +def get_llm_provider( + session: Session, provider_type: str, project_id: int, organization_id: int +) -> BaseProvider: + from app.utils import get_openai_client + + provider_class = PROVIDER_REGISTRY.get(provider_type) + + if provider_class is None: + supported = list(PROVIDER_REGISTRY.keys()) + logger.error( + f"[get_llm_provider] Unsupported provider type requested: {provider_type}" + ) + raise ValueError( + f"Provider '{provider_type}' is not supported. " + f"Supported providers: {', '.join(supported)}" + ) + + if provider_type == "openai": + client = get_openai_client( + session=session, org_id=organization_id, project_id=project_id + ) + else: + logger.error( + f"[get_llm_provider] Unsupported provider type requested: {provider_type}" + ) + raise ValueError(f"Provider '{provider_type}' is not supported.") + + return provider_class(client=client) + + +def get_supported_providers() -> list[str]: + """Get list of supported provider types. + + Returns: + List of supported provider type strings + """ + return list(PROVIDER_REGISTRY.keys()) From 86f68552ec9f0d57188379848da41bcf1a58415e Mon Sep 17 00:00:00 2001 From: Aviraj <100823015+avirajsingh7@users.noreply.github.com> Date: Thu, 23 Oct 2025 21:06:56 +0530 Subject: [PATCH 12/15] remove spec --- backend/app/services/llm/specs/__init__.py | 3 - .../app/services/llm/specs/openai/response.py | 280 ------------------ 2 files changed, 283 deletions(-) delete mode 100644 backend/app/services/llm/specs/__init__.py delete mode 100644 backend/app/services/llm/specs/openai/response.py diff --git a/backend/app/services/llm/specs/__init__.py b/backend/app/services/llm/specs/__init__.py deleted file mode 100644 index c010a127..00000000 --- a/backend/app/services/llm/specs/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from .openai.response import OpenAIResponseSpec - -__all__ = ["OpenAIResponseSpec"] diff --git a/backend/app/services/llm/specs/openai/response.py b/backend/app/services/llm/specs/openai/response.py deleted file mode 100644 index 491dd9c4..00000000 --- a/backend/app/services/llm/specs/openai/response.py +++ /dev/null @@ -1,280 +0,0 @@ -"""OpenAI Responses API specification model. - -This module defines the OpenAI-specific parameter specification with built-in -validation and conversion to API format based on the official OpenAI Responses API contract. - -Reference: https://platform.openai.com/docs/api-reference/responses/create -""" - -from typing import Any, Literal -import typing - -from pydantic import Field, model_validator -from sqlmodel import SQLModel - -from app.models.llm.request import CompletionConfig - - -class ReasoningConfig(SQLModel): - """Configuration options for reasoning models (gpt-5 and o-series models only).""" - - effort: Literal["minimal", "low", "medium", "high"] | None = Field( - default="medium", - description=( - "Constrains effort on reasoning for reasoning models. " - "Reducing reasoning effort can result in faster responses and fewer tokens used. " - "Note: The gpt-5-pro model defaults to (and only supports) high reasoning effort." - ), - ) - summary: Literal["auto", "concise", "detailed"] | None = Field( - default=None, - description=( - "A summary of the reasoning performed by the model. " - "This can be useful for debugging and understanding the model's reasoning process." - ), - ) - - -class FileSearchTool(SQLModel): - """Tool configuration for searching through vector stores.""" - - type: Literal["file_search"] = Field( - default="file_search", - description="The type of tool. Always 'file_search'.", - ) - vector_store_ids: list[str] = Field( - description="Vector store IDs to search through.", - ) - max_num_results: int | None = Field( - default=None, - ge=1, - le=50, - description="Maximum number of results for file_search tool.", - ) - - -class OpenAIResponseSpec(SQLModel): - """OpenAI Responses API specification with validation. - - This model defines all OpenAI Responses API parameters with their constraints, - provides validation, and handles conversion to OpenAI API format. - - Aligns with OpenAI Responses API contract (POST https://api.openai.com/v1/responses). - Reference: https://platform.openai.com/docs/api-reference/responses/create - """ - - model: str | None = Field( - default="gpt-4o", - description=( - "Model ID used to generate the response, like gpt-4o or o3. " - "OpenAI offers a wide range of models with different capabilities, performance characteristics, and price points." - ), - ) - - input: str = Field( - default=None, - description=( - "Text used to generate a response. " - "Can be a simple text string (equivalent to a user role message), or a list of input items with different content types." - ), - ) - - # Conversation - conversation: str | None = Field( - default=None, - description=( - "The conversation that this response belongs to. Items from this conversation are prepended to input_items. " - "Can be a conversation ID (string) or a conversation object. Defaults to null." - ), - ) - - previous_response_id: str | None = Field( - default=None, - description=( - "The unique ID of the previous response to the model. Use this to create multi-turn conversations. " - "Cannot be used in conjunction with conversation." - ), - ) - - # Instructions & Context - - instructions: str | None = Field( - default=None, - description=( - "A system (or developer) message inserted into the model's context. " - "When using with previous_response_id, the instructions from a previous response will not be carried over." - ), - ) - include: Literal["file_search_call.results"] | None = Field( - default=None, - description=( - "Specify additional output data to include in the model response. " - "Currently supported values are: " - "file_search_call.results, " - ), - ) - - # Sampling Parameters - - temperature: float | None = Field( - default=1.0, - ge=0.0, - le=2.0, - description=( - "What sampling temperature to use, between 0 and 2. " - "Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. " - "We generally recommend altering this or top_p but not both." - ), - ) - - top_p: float | None = Field( - default=1.0, - ge=0.0, - le=1.0, - description=( - "An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. " - "So 0.1 means only the tokens comprising the top 10% probability mass are considered. " - "We generally recommend altering this or temperature but not both." - ), - ) - - max_output_tokens: int | None = Field( - default=None, - gt=0, - description=( - "An upper bound for the number of tokens that can be generated for a response, " - "including visible output tokens and reasoning tokens." - ), - ) - - # Tools (File Search Only) - - tools: list[FileSearchTool] | None = Field( - default=None, - description="File search tools for searching through vector stores.", - ) - - # Response Configuration - - reasoning: ReasoningConfig | None = Field( - default=None, - description=( - "Configuration options for reasoning models (gpt-5 and o-series models only). " - "Controls reasoning effort and summary generation." - ), - ) - - truncation: Literal["auto", "disabled"] | None = Field( - default="disabled", - description=( - "The truncation strategy to use for the model response. " - "'auto': If input exceeds context window, truncate by dropping items from beginning. " - "'disabled' (default): Request fails with 400 error if input exceeds context window." - ), - ) - - # Advanced Options - - prompt_cache_key: str | None = Field( - default=None, - description=( - "Used by OpenAI to cache responses for similar requests to optimize cache hit rates. " - ), - ) - - @model_validator(mode="after") - def validate_conversation_previous_response_exclusivity( - self, - ) -> "OpenAIResponseSpec": - """Validate that conversation and previous_response_id are not used together. - - Returns: - Self for method chaining - - Raises: - ValueError: If both conversation and previous_response_id are provided - """ - if self.conversation is not None and self.previous_response_id is not None: - raise ValueError( - "Cannot use both 'conversation' and 'previous_response_id' parameters together" - ) - - return self - - @model_validator(mode="after") - def validate_temperature_top_p(self) -> "OpenAIResponseSpec": - """Warn if both temperature and top_p are altered from defaults. - - Note: This is a soft validation (warning), not a hard error. - - Returns: - Self for method chaining - """ - # OpenAI recommends altering temperature OR top_p, but not both - # We'll allow it but could log a warning in production - if ( - self.temperature is not None - and self.temperature != 1.0 - and self.top_p is not None - and self.top_p != 1.0 - ): - # In a production setting, you might want to log a warning here - pass - - return self - - @classmethod - def from_completion_config(cls, config: CompletionConfig) -> "OpenAIResponseSpec": - """Convert generic CompletionConfig to OpenAI ResponseSpec. - - Args: - config: Generic completion configuration - - Returns: - OpenAI-specific response specification - """ - # Build tools list if vector stores are provided - tools = None - if config.vector_store_ids: - tools = [ - FileSearchTool( - vector_store_ids=config.vector_store_ids, - max_num_results=config.max_num_results, - ) - ] - - # Convert ReasoningOptions to ReasoningConfig if provided - reasoning = None - if config.reasoning: - reasoning = ReasoningConfig( - effort=config.reasoning.effort, - summary=config.reasoning.summary, - ) - - return cls( - model=config.model, - input=config.input, - instructions=config.instructions, - conversation=config.conversation_id, - previous_response_id=config.previous_response_id, - temperature=config.temperature, - top_p=config.top_p, - max_output_tokens=config.max_output_tokens, - tools=tools, - reasoning=reasoning, - ) - - def to_api_params(self) -> dict[str, Any]: - """Convert OpenAIResponseSpec to OpenAI API parameters. - - Converts the spec to a dictionary suitable for passing to the OpenAI API, - excluding None values and properly formatting nested objects. - - Returns: - Dictionary of API parameters ready to be passed to openai.responses.create() - """ - params = self.model_dump(exclude_none=True) - - print(params) - - return params From 47f4e250c4147fd03cbec0aaa0f749cb51a0787f Mon Sep 17 00:00:00 2001 From: Aviraj <100823015+avirajsingh7@users.noreply.github.com> Date: Fri, 24 Oct 2025 09:05:55 +0530 Subject: [PATCH 13/15] Refactor LLM provider modules: remove unnecessary docstrings, simplify comments, and clarify provider responsibilities. --- backend/app/models/llm/__init__.py | 11 -------- backend/app/services/llm/jobs.py | 3 +-- backend/app/services/llm/providers/base.py | 26 +++++++------------ backend/app/services/llm/providers/openai.py | 24 ++--------------- .../app/services/llm/providers/registry.py | 7 +---- 5 files changed, 13 insertions(+), 58 deletions(-) diff --git a/backend/app/models/llm/__init__.py b/backend/app/models/llm/__init__.py index 9242693c..6a7a454f 100644 --- a/backend/app/models/llm/__init__.py +++ b/backend/app/models/llm/__init__.py @@ -1,13 +1,2 @@ -"""LLM models module. - -This module provides all data models for LLM functionality including -requests, responses, configurations, and model specifications. -""" from app.models.llm.request import LLMCallRequest, CompletionConfig, QueryParams from app.models.llm.response import LLMCallResponse - -__all__ = [ - # Request/Response models - "LLMCallRequest", - "LLMCallResponse", -] diff --git a/backend/app/services/llm/jobs.py b/backend/app/services/llm/jobs.py index 9e0d78ea..faf4a26f 100644 --- a/backend/app/services/llm/jobs.py +++ b/backend/app/services/llm/jobs.py @@ -86,11 +86,10 @@ def execute_job( config = request.config provider = config.completion.provider - model = config.completion.params.get("model", "N/A") logger.info( f"[execute_job] Starting LLM job execution | job_id={job_id}, task_id={task_id}, " - f"provider={provider}, model={model}" + f"provider={provider}" ) try: diff --git a/backend/app/services/llm/providers/base.py b/backend/app/services/llm/providers/base.py index 1979b7df..303f7690 100644 --- a/backend/app/services/llm/providers/base.py +++ b/backend/app/services/llm/providers/base.py @@ -1,8 +1,7 @@ """Base provider interface for LLM providers. This module defines the abstract base class that all LLM providers must implement. -It provides a provider-agnostic interface for executing LLM calls with spec-based -transformation. +It provides a provider-agnostic interface for executing LLM calls. """ from abc import ABC, abstractmethod @@ -17,18 +16,18 @@ class BaseProvider(ABC): All provider implementations (OpenAI, Anthropic, etc.) must inherit from this class and implement the required methods. - Each provider uses its own spec class for parameter validation and conversion - to the provider's API format. + Providers directly pass user configuration to their respective APIs. + User is responsible for providing valid provider-specific parameters. Attributes: client: The provider-specific client instance """ def __init__(self, client: Any): - """Initialize the provider with client. + """Initialize provider with client. Args: - client: Provider-specific client (e.g., OpenAI, Anthropic client) + client: Provider-specific client instance """ self.client = client @@ -36,25 +35,18 @@ def __init__(self, client: Any): def execute( self, completion_config: CompletionConfig, query: QueryParams ) -> tuple[LLMCallResponse | None, str | None]: - """Execute an LLM call using the provider. + """Execute LLM API call. - This is the main method that must be implemented by all providers. - It should handle the complete lifecycle of an LLM request: - 1. Build provider-specific parameters (using transformer) - 2. Make the API call to the provider - 3. Extract results (including any additional features like RAG) - 4. Return standardized response + Directly passes the user's config params to provider API along with input. Args: - request: LLM call request with configuration + completion_config: LLM completion configuration + query: Query parameters including input and conversation_id Returns: Tuple of (response, error_message) - If successful: (LLMCallResponse, None) - If failed: (None, error_message) - - Raises: - NotImplementedError: If the provider hasn't implemented this method """ raise NotImplementedError("Providers must implement execute method") diff --git a/backend/app/services/llm/providers/openai.py b/backend/app/services/llm/providers/openai.py index ad2c7951..f1c992ca 100644 --- a/backend/app/services/llm/providers/openai.py +++ b/backend/app/services/llm/providers/openai.py @@ -1,12 +1,3 @@ -"""OpenAI provider implementation. - -This module implements the BaseProvider interface for OpenAI models, -including support for standard models, o-series models with reasoning, -and file search capabilities. - -Directly passes user configuration to OpenAI API without transformation. -""" - import logging import openai @@ -26,18 +17,6 @@ class OpenAIProvider(BaseProvider): - """OpenAI implementation of the LLM provider. - - Supports: - - Standard OpenAI models (GPT-4, GPT-3.5, etc.) - - O-series models with reasoning configuration - - Text configuration for verbosity control - - Vector store file search integration - - Directly passes user configuration to OpenAI API. - User is responsible for providing valid OpenAI parameters. - """ - def __init__(self, client: OpenAI): """Initialize OpenAI provider with client. @@ -55,7 +34,8 @@ def execute( Directly passes the user's config params to OpenAI API along with input. Args: - request: LLM call request with configuration + completion_config: Configuration for the completion request + query: Query parameters including input and optional conversation_id Returns: Tuple of (response, error_message) diff --git a/backend/app/services/llm/providers/registry.py b/backend/app/services/llm/providers/registry.py index 4aee2791..464123f6 100644 --- a/backend/app/services/llm/providers/registry.py +++ b/backend/app/services/llm/providers/registry.py @@ -1,9 +1,3 @@ -"""Provider factory for creating LLM provider instances. - -This module provides a registry pattern for instantiating the appropriate -LLM provider based on the provider type specified in the request. -""" - import logging from typing import Any @@ -26,6 +20,7 @@ def get_llm_provider( session: Session, provider_type: str, project_id: int, organization_id: int ) -> BaseProvider: + # Import here to avoid circular imports from app.utils import get_openai_client provider_class = PROVIDER_REGISTRY.get(provider_type) From 9780d6bad8c168b4989988893a567e40fccf255b Mon Sep 17 00:00:00 2001 From: Aviraj <100823015+avirajsingh7@users.noreply.github.com> Date: Fri, 24 Oct 2025 14:00:49 +0530 Subject: [PATCH 14/15] Add support for including raw LLM provider response in API calls and update response handling --- backend/app/models/llm/request.py | 4 +++ backend/app/models/llm/response.py | 33 ++++++++--------- backend/app/services/llm/jobs.py | 6 ++-- backend/app/services/llm/providers/base.py | 6 +++- backend/app/services/llm/providers/openai.py | 37 ++++++++------------ 5 files changed, 42 insertions(+), 44 deletions(-) diff --git a/backend/app/models/llm/request.py b/backend/app/models/llm/request.py index e26e6b81..ad5dc7cc 100644 --- a/backend/app/models/llm/request.py +++ b/backend/app/models/llm/request.py @@ -41,3 +41,7 @@ class LLMCallRequest(SQLModel): callback_url: str | None = Field( default=None, description="Webhook URL for async response delivery" ) + include_provider_response: bool = Field( + default=False, + description="Whether to include the raw LLM provider response in the output", + ) diff --git a/backend/app/models/llm/response.py b/backend/app/models/llm/response.py index 66159f60..969fe8c5 100644 --- a/backend/app/models/llm/response.py +++ b/backend/app/models/llm/response.py @@ -2,27 +2,22 @@ This module contains response models for LLM API calls. """ -from sqlmodel import SQLModel +from sqlmodel import SQLModel, Field -class LLMCallResponse(SQLModel): - """Response model for /v1/llm/call endpoint. - - Attributes: - status: Response status (success, error, etc.) - response_id: Unique identifier for this response - message: The generated text response - model: Model identifier that was used - input_tokens: Number of input tokens consumed - output_tokens: Number of output tokens generated - total_tokens: Total tokens consumed (input + output) - file_search_results: Optional list of file search results from RAG - """ - - status: str - response_id: str - message: str - model: str +class Diagnostics(SQLModel): input_tokens: int output_tokens: int total_tokens: int + model: str + provider: str + + +class LLMCallResponse(SQLModel): + id: str = Field(..., description="Unique id provided by the LLM provider.") + conversation_id: str | None = None + output: str + usage: Diagnostics + llm_response: dict | None = Field( + default=None, description="Raw Response from LLM provider." + ) diff --git a/backend/app/services/llm/jobs.py b/backend/app/services/llm/jobs.py index faf4a26f..5abf66aa 100644 --- a/backend/app/services/llm/jobs.py +++ b/backend/app/services/llm/jobs.py @@ -108,7 +108,9 @@ def execute_job( ) response, error = provider_instance.execute( - completion_config=config.completion, query=request.query + completion_config=config.completion, + query=request.query, + include_provider_response=request.include_provider_response, ) with Session(engine) as session: @@ -124,7 +126,7 @@ def execute_job( ) logger.info( f"[execute_job] Successfully completed LLM job | job_id={job_id}, " - f"response_id={response.response_id}, tokens={response.total_tokens}" + f"response_id={response.id}, tokens={response.usage.total_tokens}" ) return callback.model_dump() else: diff --git a/backend/app/services/llm/providers/base.py b/backend/app/services/llm/providers/base.py index 303f7690..b9895a4e 100644 --- a/backend/app/services/llm/providers/base.py +++ b/backend/app/services/llm/providers/base.py @@ -33,7 +33,10 @@ def __init__(self, client: Any): @abstractmethod def execute( - self, completion_config: CompletionConfig, query: QueryParams + self, + completion_config: CompletionConfig, + query: QueryParams, + include_provider_response: bool = False, ) -> tuple[LLMCallResponse | None, str | None]: """Execute LLM API call. @@ -42,6 +45,7 @@ def execute( Args: completion_config: LLM completion configuration query: Query parameters including input and conversation_id + include_provider_response: Whether to include the raw LLM provider response in the output Returns: Tuple of (response, error_message) diff --git a/backend/app/services/llm/providers/openai.py b/backend/app/services/llm/providers/openai.py index f1c992ca..b242b51a 100644 --- a/backend/app/services/llm/providers/openai.py +++ b/backend/app/services/llm/providers/openai.py @@ -27,21 +27,11 @@ def __init__(self, client: OpenAI): self.client = client def execute( - self, completion_config: CompletionConfig, query: QueryParams + self, + completion_config: CompletionConfig, + query: QueryParams, + include_provider_response: bool = False, ) -> tuple[LLMCallResponse | None, str | None]: - """Execute OpenAI API call. - - Directly passes the user's config params to OpenAI API along with input. - - Args: - completion_config: Configuration for the completion request - query: Query parameters including input and optional conversation_id - - Returns: - Tuple of (response, error_message) - - If successful: (LLMCallResponse, None) - - If failed: (None, error_message) - """ response: Response | None = None error_message: str | None = None @@ -59,14 +49,18 @@ def execute( # Build response llm_response = LLMCallResponse( - status="success", - response_id=response.id, - message=response.output_text, - model=response.model, - input_tokens=response.usage.input_tokens, - output_tokens=response.usage.output_tokens, - total_tokens=response.usage.total_tokens, + id=response.id, + output=response.output_text, + usage={ + "input_tokens": response.usage.input_tokens, + "output_tokens": response.usage.output_tokens, + "total_tokens": response.usage.total_tokens, + "model": response.model, + "provider": "openai", + }, ) + if include_provider_response: + llm_response.llm_response = response.model_dump() logger.info( f"[OpenAIProvider] Successfully generated response: {response.id}" @@ -76,7 +70,6 @@ def execute( except TypeError as e: # handle unexpected arguments gracefully error_message = f"Invalid or unexpected parameter in Config: {str(e)}" - logger.error(f"[OpenAIProvider] {error_message}", exc_info=True) return None, error_message except openai.OpenAIError as e: From 1cf27876d354c12bf641fa4e9ed755643ab745f0 Mon Sep 17 00:00:00 2001 From: Aviraj <100823015+avirajsingh7@users.noreply.github.com> Date: Fri, 24 Oct 2025 14:21:36 +0530 Subject: [PATCH 15/15] Refactor LLM API and provider modules: update response handling in llm_call, reorganize imports, and enhance logging in OpenAIProvider. --- backend/app/api/routes/llm.py | 12 +++--- backend/app/models/llm/request.py | 3 +- backend/app/services/llm/__init__.py | 5 +++ backend/app/services/llm/jobs.py | 37 ++++++++++--------- .../app/services/llm/providers/__init__.py | 3 +- backend/app/services/llm/providers/openai.py | 9 ++--- .../app/services/llm/providers/registry.py | 3 +- 7 files changed, 41 insertions(+), 31 deletions(-) diff --git a/backend/app/api/routes/llm.py b/backend/app/api/routes/llm.py index 29ae0b46..678d2500 100644 --- a/backend/app/api/routes/llm.py +++ b/backend/app/api/routes/llm.py @@ -3,15 +3,16 @@ from fastapi import APIRouter from app.api.deps import AuthContextDep, SessionDep -from app.models.llm import LLMCallRequest +from app.models import LLMCallRequest, Message from app.services.llm.jobs import start_job from app.utils import APIResponse + logger = logging.getLogger(__name__) router = APIRouter(tags=["LLM"]) -@router.post("/llm/call") +@router.post("/llm/call", response_model=APIResponse[Message]) async def llm_call( _current_user: AuthContextDep, _session: SessionDep, request: LLMCallRequest ): @@ -21,8 +22,7 @@ async def llm_call( project_id = _current_user.project.id organization_id = _current_user.organization.id - # Start background job - job_id = start_job( + start_job( db=_session, request=request, project_id=project_id, @@ -30,5 +30,7 @@ async def llm_call( ) return APIResponse.success_response( - data={"status": "processing", "message": "LLM call job scheduled"}, + data=Message( + message=f"Your response is being generated and will be delivered via callback." + ), ) diff --git a/backend/app/models/llm/request.py b/backend/app/models/llm/request.py index ad5dc7cc..cad87d57 100644 --- a/backend/app/models/llm/request.py +++ b/backend/app/models/llm/request.py @@ -1,6 +1,7 @@ -from sqlmodel import SQLModel, Field from typing import Any, Literal +from sqlmodel import Field, SQLModel + # Query Parameters (dynamic per request) class QueryParams(SQLModel): diff --git a/backend/app/services/llm/__init__.py b/backend/app/services/llm/__init__.py index 55443f64..2b376a86 100644 --- a/backend/app/services/llm/__init__.py +++ b/backend/app/services/llm/__init__.py @@ -3,3 +3,8 @@ BaseProvider, OpenAIProvider, ) +from app.services.llm.providers import ( + PROVIDER_REGISTRY, + get_llm_provider, + get_supported_providers, +) diff --git a/backend/app/services/llm/jobs.py b/backend/app/services/llm/jobs.py index 5abf66aa..daaab2e9 100644 --- a/backend/app/services/llm/jobs.py +++ b/backend/app/services/llm/jobs.py @@ -1,19 +1,18 @@ import logging from uuid import UUID +from asgi_correlation_id import correlation_id from fastapi import HTTPException from sqlmodel import Session -from asgi_correlation_id import correlation_id -from app.crud.jobs import JobCrud -from app.utils import send_callback, APIResponse from app.core.db import engine - -from app.models import JobType, JobStatus, JobUpdate, LLMCallRequest, LLMCallResponse - +from app.crud.jobs import JobCrud +from app.models import JobStatus, JobType, JobUpdate, LLMCallRequest, LLMCallResponse +from app.utils import APIResponse, send_callback from app.celery.utils import start_high_priority_job from app.services.llm.providers.registry import get_llm_provider + logger = logging.getLogger(__name__) @@ -113,14 +112,16 @@ def execute_job( include_provider_response=request.include_provider_response, ) - with Session(engine) as session: - job_crud = JobCrud(session=session) - if response: - callback = APIResponse.success_response(data=response) - send_callback( - callback_url=request.callback_url, - data=callback.model_dump(), - ) + if response: + callback = APIResponse.success_response(data=response) + send_callback( + callback_url=request.callback_url, + data=callback.model_dump(), + ) + + with Session(engine) as session: + job_crud = JobCrud(session=session) + job_crud.update( job_id=job_id, job_update=JobUpdate(status=JobStatus.SUCCESS) ) @@ -129,8 +130,10 @@ def execute_job( f"response_id={response.id}, tokens={response.usage.total_tokens}" ) return callback.model_dump() - else: - return handle_job_error(job_id, request.callback_url, error) + + return handle_job_error( + job_id, request.callback_url, error=error or "Unknown error occurred" + ) except Exception as e: error = f"Unexpected error in LLM job execution: {str(e)}" @@ -138,4 +141,4 @@ def execute_job( f"[execute_job] {error} | job_id={job_id}, task_id={task_id}", exc_info=True, ) - return handle_job_error(job_id, request.callback_url, error) + return handle_job_error(job_id, request.callback_url, error=error) diff --git a/backend/app/services/llm/providers/__init__.py b/backend/app/services/llm/providers/__init__.py index 8abb5441..8d31b3a7 100644 --- a/backend/app/services/llm/providers/__init__.py +++ b/backend/app/services/llm/providers/__init__.py @@ -1,8 +1,7 @@ from app.services.llm.providers.base import BaseProvider from app.services.llm.providers.openai import OpenAIProvider - from app.services.llm.providers.registry import ( + PROVIDER_REGISTRY, get_llm_provider, get_supported_providers, - PROVIDER_REGISTRY, ) diff --git a/backend/app/services/llm/providers/openai.py b/backend/app/services/llm/providers/openai.py index b242b51a..6bace6cf 100644 --- a/backend/app/services/llm/providers/openai.py +++ b/backend/app/services/llm/providers/openai.py @@ -3,16 +3,15 @@ import openai from openai import OpenAI from openai.types.responses.response import Response -from pydantic import ValidationError from app.models.llm import ( CompletionConfig, LLMCallResponse, - LLMCallRequest, QueryParams, ) from app.services.llm.providers.base import BaseProvider + logger = logging.getLogger(__name__) @@ -63,7 +62,7 @@ def execute( llm_response.llm_response = response.model_dump() logger.info( - f"[OpenAIProvider] Successfully generated response: {response.id}" + f"[OpenAIProvider.execute] Successfully generated response: {response.id}" ) return llm_response, None @@ -78,11 +77,11 @@ def execute( error_message = handle_openai_error(e) logger.error( - f"[OpenAIProvider] OpenAI API error: {error_message}", exc_info=True + f"[OpenAIProvider.execute] OpenAI API error: {error_message}", exc_info=True ) return None, error_message except Exception as e: error_message = f"Unexpected error: {str(e)}" - logger.error(f"[OpenAIProvider] {error_message}", exc_info=True) + logger.error(f"[OpenAIProvider.execute] {error_message}", exc_info=True) return None, error_message diff --git a/backend/app/services/llm/providers/registry.py b/backend/app/services/llm/providers/registry.py index 464123f6..3aea7803 100644 --- a/backend/app/services/llm/providers/registry.py +++ b/backend/app/services/llm/providers/registry.py @@ -1,10 +1,11 @@ import logging -from typing import Any from sqlmodel import Session + from app.services.llm.providers.base import BaseProvider from app.services.llm.providers.openai import OpenAIProvider + logger = logging.getLogger(__name__)