From 90d5288463f839d0e79e18fb88d90741998b49f5 Mon Sep 17 00:00:00 2001 From: Aviraj <100823015+avirajsingh7@users.noreply.github.com> Date: Tue, 24 Jun 2025 14:25:12 +0530 Subject: [PATCH 01/12] logging middleware and logs for api key collection documents open ai and AWS storage --- backend/app/api/routes/api_keys.py | 22 +++--- backend/app/api/routes/collections.py | 67 ++++++++++++----- backend/app/api/routes/documents.py | 16 +++- backend/app/core/cloud/storage.py | 104 ++++++++++++++++++++++---- backend/app/core/logger.py | 26 +++++-- backend/app/core/middleware.py | 22 ++++++ backend/app/crud/api_key.py | 29 ++++++- backend/app/crud/collection.py | 35 ++++++++- backend/app/crud/document.py | 30 +++++++- backend/app/crud/rag/open_ai.py | 80 ++++++++++++++++++-- backend/app/main.py | 8 +- 11 files changed, 369 insertions(+), 70 deletions(-) create mode 100644 backend/app/core/middleware.py diff --git a/backend/app/api/routes/api_keys.py b/backend/app/api/routes/api_keys.py index 1afecfc7..b4db29df 100644 --- a/backend/app/api/routes/api_keys.py +++ b/backend/app/api/routes/api_keys.py @@ -1,4 +1,4 @@ -import uuid +import logging from fastapi import APIRouter, Depends, HTTPException from sqlmodel import Session from app.api.deps import get_db, get_current_active_superuser @@ -14,6 +14,7 @@ from app.utils import APIResponse from app.core.exception_handlers import HTTPException +logger = logging.getLogger(__name__) router = APIRouter(prefix="/apikeys", tags=["API Keys"]) @@ -27,17 +28,18 @@ def create_key( """ Generate a new API key for the user's organization. """ - # Validate organization project = validate_project(session, project_id) existing_api_key = get_api_key_by_project_user(session, project_id, user_id) if existing_api_key: + logger.warning( + f"[create_key] API key already exists | project_id={project_id}, user_id={user_id}" + ) raise HTTPException( status_code=400, detail="API Key already exists for this user and project.", ) - # Create and return API key api_key = create_api_key( session, organization_id=project.organization_id, @@ -57,21 +59,18 @@ def list_keys( Retrieve all API keys for the given project. Superusers get all keys; regular users get only their own. """ - # Validate project project = validate_project(session=session, project_id=project_id) if current_user.is_superuser: - # Superuser: fetch all API keys for the project api_keys = get_api_keys_by_project(session=session, project_id=project_id) else: - # Regular user: fetch only their own API key user_api_key = get_api_key_by_project_user( session=session, project_id=project_id, user_id=current_user.id ) api_keys = [user_api_key] if user_api_key else [] - # Raise an exception if no API keys are found for the project if not api_keys: + logger.warning(f"[list_keys] No API keys found | project_id={project_id}") raise HTTPException( status_code=404, detail="No API keys found for this project.", @@ -91,6 +90,7 @@ def get_key( """ api_key = get_api_key(session, api_key_id) if not api_key: + logger.warning(f"[get_key] API key not found | api_key_id={api_key_id}") raise HTTPException(404, "API Key does not exist") return APIResponse.success_response(api_key) @@ -106,10 +106,12 @@ def revoke_key( Soft delete an API key (revoke access). """ api_key = get_api_key(session, api_key_id) - if not api_key: + logger.warning( + f"[apikey.revoke] API key not found or already deleted | api_key_id={api_key_id}" + ) raise HTTPException(404, "API key not found or already deleted") delete_api_key(session, api_key_id) - - return APIResponse.success_response({"message": "API key revoked successfully"}) + logger.info(f"[revoke_key] API key revoked | api_key_id={api_key_id}") + return APIResponse.success_response({"message": "API key revoked successfully"}) \ No newline at end of file diff --git a/backend/app/api/routes/collections.py b/backend/app/api/routes/collections.py index 7c903930..c5fdecf4 100644 --- a/backend/app/api/routes/collections.py +++ b/backend/app/api/routes/collections.py @@ -22,6 +22,7 @@ from app.models.collection import CollectionStatus from app.utils import APIResponse, load_description +logger = logging.getLogger(__name__) router = APIRouter(prefix="/collections", tags=["collections"]) @@ -59,6 +60,9 @@ def model_post_init(self, __context: Any): self.documents = list(set(self.documents)) def __call__(self, crud: DocumentCrud): + logger.info( + f"[DocumentOptions.call] Starting batch iteration for documents | {{'batch_size': {self.batch_size}, 'total_documents': {len(self.documents)}}}" + ) (start, stop) = (0, self.batch_size) while True: view = self.documents[start:stop] @@ -130,9 +134,15 @@ def success(self, body): class SilentCallback(CallbackHandler): def fail(self, body): + logger.info( + f"[SilentCallback.fail] Silent callback failure | {{'body': '{body}'}}" + ) return def success(self, body): + logger.info( + f"[SilentCallback.success] Silent callback success | {{'body': '{body}'}}" + ) return @@ -140,25 +150,37 @@ class WebHookCallback(CallbackHandler): def __init__(self, url: HttpUrl, payload: ResponsePayload): super().__init__(payload) self.url = url + logger.info( + f"[WebHookCallback.init] Initialized webhook callback | {{'url': '{url}'}}" + ) def __call__(self, response: APIResponse, status: str): time = ResponsePayload.now() payload = replace(self.payload, status=status, time=time) response.metadata = asdict(payload) - + logger.info( + f"[WebHookCallback.call] Posting callback | {{'url': '{self.url}', 'status': '{status}'}}" + ) post_callback(self.url, response) def fail(self, body): + logger.error(f"[WebHookCallback.fail] Callback failed | {{'body': '{body}'}}") self(APIResponse.failure_response(body), "incomplete") def success(self, body): + logger.info( + f"[WebHookCallback.success] Callback succeeded" + ) self(APIResponse.success_response(body), "complete") def _backout(crud: OpenAIAssistantCrud, assistant_id: str): try: crud.delete(assistant_id) - except OpenAIError: + except OpenAIError as err: + logger.error( + f"[backout] Failed to delete assistant | {{'assistant_id': '{assistant_id}', 'error': '{str(err)}'}}" + ) warnings.warn( ": ".join( [ @@ -200,18 +222,10 @@ def do_create_collection( storage.get_file_size_kb(doc.object_store_url) for doc in flat_docs ] - logging.info( - f"[VectorStore Update] Uploading {len(flat_docs)} documents to vector store {vector_store.id}" - ) list(vector_store_crud.update(vector_store.id, storage, docs)) - logging.info(f"[VectorStore Upload] Upload completed") assistant_options = dict(request.extract_super_type(AssistantOptions)) - logging.info( - f"[Assistant Create] Creating assistant with options: {assistant_options}" - ) assistant = assistant_crud.create(vector_store.id, **assistant_options) - logging.info(f"[Assistant Create] Assistant created: {assistant.id}") collection = collection_crud.read_one(UUID(payload.key)) collection.llm_service_id = assistant.id @@ -220,22 +234,21 @@ def do_create_collection( collection.updated_at = now() if flat_docs: - logging.info( - f"[DocumentCollection] Linking {len(flat_docs)} documents to collection {collection.id}" - ) DocumentCollectionCrud(session).create(collection, flat_docs) collection_crud._update(collection) elapsed = time.time() - start_time logging.info( - f"Collection created: {collection.id} | Time: {elapsed:.2f}s | " + f"[do_create_collection] Collection created: {collection.id} | Time: {elapsed:.2f}s | " f"Files: {len(flat_docs)} | Sizes: {file_sizes_kb} KB | Types: {list(file_exts)}" ) callback.success(collection.model_dump(mode="json")) except Exception as err: - logging.error(f"[Collection Creation Failed] {err} ({type(err).__name__})") + logger.error( + f"[do_create_collection] Collection Creation Failed | {{'collection_id': '{payload.key}', 'error': '{str(err)}'}}" + ) if "assistant" in locals(): _backout(assistant_crud, assistant.id) try: @@ -244,7 +257,9 @@ def do_create_collection( collection.updated_at = now() collection_crud._update(collection) except Exception as suberr: - logging.warning(f"[Collection Status Update Failed] {suberr}") + logger.warning( + f"[do_create_collection] Failed to update collection status | {{'collection_id': '{payload.key}', 'reason': '{str(suberr)}'}}" + ) callback.fail(str(err)) @@ -282,6 +297,10 @@ def create_collection( payload, ) + logger.info( + f"[create_collection] Background task for collection creation scheduled | " + f"{{'collection_id': '{collection.id}'}}" + ) return APIResponse.success_response(data=None, metadata=asdict(payload)) @@ -301,12 +320,18 @@ def do_delete_collection( collection = collection_crud.read_one(request.collection_id) assistant = OpenAIAssistantCrud() data = collection_crud.delete(collection, assistant) + logger.info( + f"[do_delete_collection] Collection deleted successfully | {{'collection_id': '{collection.id}'}}" + ) callback.success(data.model_dump(mode="json")) except (ValueError, PermissionError, SQLAlchemyError) as err: + logger.warning( + f"[do_delete_collection] Failed to delete collection | {{'collection_id': '{request.collection_id}', 'error': '{str(err)}'}}" + ) callback.fail(str(err)) except Exception as err: - warnings.warn( - 'Unexpected exception "{}": {}'.format(type(err).__name__, err), + logger.error( + f"[do_delete_collection] Unexpected error during deletion | {{'collection_id': '{request.collection_id}', 'error': '{str(err)}', 'error_type': '{type(err).__name__}'}}" ) callback.fail(str(err)) @@ -333,6 +358,10 @@ def delete_collection( payload, ) + logger.info( + f"[delete_collection] Background task for deletion scheduled | " + f"{{'collection_id': '{request.collection_id}'}}" + ) return APIResponse.success_response(data=None, metadata=asdict(payload)) @@ -381,4 +410,4 @@ def collection_documents( document_collection_crud = DocumentCollectionCrud(session) collection = collection_crud.read_one(collection_id) data = document_collection_crud.read(collection, skip, limit) - return APIResponse.success_response(data) + return APIResponse.success_response(data) \ No newline at end of file diff --git a/backend/app/api/routes/documents.py b/backend/app/api/routes/documents.py index 9e6d1c2e..5eb8f93d 100644 --- a/backend/app/api/routes/documents.py +++ b/backend/app/api/routes/documents.py @@ -1,3 +1,4 @@ +import logging from uuid import UUID, uuid4 from typing import List from pathlib import Path @@ -12,6 +13,7 @@ from app.core.cloud import AmazonCloudStorage from app.crud.rag import OpenAIAssistantCrud +logger = logging.getLogger(__name__) router = APIRouter(prefix="/documents", tags=["documents"]) @@ -43,6 +45,7 @@ def upload_doc( ): storage = AmazonCloudStorage(current_user) document_id = uuid4() + object_store_url = storage.put(src, Path(str(document_id))) crud = DocumentCrud(session, current_user.id) @@ -52,6 +55,9 @@ def upload_doc( object_store_url=str(object_store_url), ) data = crud.update(document) + logger.info( + f"[upload_doc] Document uploaded successfully | {{'user_id': '{current_user.id}', 'document_id': '{document_id}'}}" + ) return APIResponse.success_response(data) @@ -71,6 +77,9 @@ def remove_doc( document = d_crud.delete(doc_id) data = c_crud.delete(document, a_crud) + logger.info( + f"[remove_doc] Document deleted successfully | {{'user_id': '{current_user.id}', 'document_id': '{doc_id}'}}" + ) return APIResponse.success_response(data) @@ -92,9 +101,14 @@ def permanent_delete_doc( document = d_crud.read_one(doc_id) c_crud.delete(document, a_crud) + storage.delete(document.object_store_url) d_crud.delete(doc_id) + logger.info( + f"[permanent_delete_doc] Document permanently deleted from Cloud and soft deleted from DB | " + f"{{'user_id': '{current_user.id}', 'document_id': '{doc_id}'}}" + ) return APIResponse.success_response(document) @@ -110,4 +124,4 @@ def doc_info( ): crud = DocumentCrud(session, current_user.id) data = crud.read_one(doc_id) - return APIResponse.success_response(data) + return APIResponse.success_response(data) \ No newline at end of file diff --git a/backend/app/core/cloud/storage.py b/backend/app/core/cloud/storage.py index 0e8eaba4..fa7ad193 100644 --- a/backend/app/core/cloud/storage.py +++ b/backend/app/core/cloud/storage.py @@ -1,5 +1,5 @@ import os - +import logging import functools as ft from pathlib import Path from dataclasses import dataclass, asdict @@ -13,6 +13,8 @@ from app.api.deps import CurrentUser from app.core.config import settings +logger = logging.getLogger(__name__) + class CloudStorageError(Exception): pass @@ -21,6 +23,9 @@ class CloudStorageError(Exception): class AmazonCloudStorageClient: @ft.cached_property def client(self): + logger.info( + f"[AmazonCloudStorageClient.client] Initializing S3 client | {{'region': '{settings.AWS_DEFAULT_REGION}'}}" + ) kwargs = {} cred_params = ( ("aws_access_key_id", "AWS_ACCESS_KEY_ID"), @@ -31,25 +36,53 @@ def client(self): for i, j in cred_params: kwargs[i] = os.environ.get(j, getattr(settings, j)) - return boto3.client("s3", **kwargs) + client = boto3.client("s3", **kwargs) + logger.info( + f"[AmazonCloudStorageClient.client] S3 client initialized | {{'region': '{settings.AWS_DEFAULT_REGION}'}}" + ) + return client def create(self): + logger.info( + f"[AmazonCloudStorageClient.create] Checking/creating S3 bucket | {{'bucket': '{settings.AWS_S3_BUCKET}'}}" + ) try: # does the bucket exist... self.client.head_bucket(Bucket=settings.AWS_S3_BUCKET) + logger.info( + f"[AmazonCloudStorageClient.create] Bucket exists | {{'bucket': '{settings.AWS_S3_BUCKET}'}}" + ) except ValueError as err: + logger.error( + f"[AmazonCloudStorageClient.create] Invalid bucket configuration | {{'bucket': '{settings.AWS_S3_BUCKET}', 'error': '{str(err)}'}}" + ) raise CloudStorageError(err) from err except ClientError as err: response = int(err.response["Error"]["Code"]) if response != 404: + logger.error( + f"[AmazonCloudStorageClient.create] Unexpected AWS error | {{'bucket': '{settings.AWS_S3_BUCKET}', 'error': '{str(err)}', 'code': {response}}}" + ) raise CloudStorageError(err) from err # ... if not create it - self.client.create_bucket( - Bucket=settings.AWS_S3_BUCKET, - CreateBucketConfiguration={ - "LocationConstraint": settings.AWS_DEFAULT_REGION, - }, + logger.warning( + f"[AmazonCloudStorageClient.create] Bucket not found, creating | {{'bucket': '{settings.AWS_S3_BUCKET}'}}" ) + try: + self.client.create_bucket( + Bucket=settings.AWS_S3_BUCKET, + CreateBucketConfiguration={ + "LocationConstraint": settings.AWS_DEFAULT_REGION, + }, + ) + logger.info( + f"[AmazonCloudStorageClient.create] Bucket created successfully | {{'bucket': '{settings.AWS_S3_BUCKET}'}}" + ) + except ClientError as create_err: + logger.error( + f"[AmazonCloudStorageClient.create] Failed to create bucket | {{'bucket': '{settings.AWS_S3_BUCKET}', 'error': '{str(create_err)}'}}" + ) + raise CloudStorageError(create_err) from create_err @dataclass(frozen=True) @@ -69,7 +102,8 @@ def to_url(self): for k in ParseResult._fields: kwargs.setdefault(k) - return ParseResult(**kwargs) + url = ParseResult(**kwargs) + return url @classmethod def from_url(cls, url: str): @@ -78,7 +112,8 @@ def from_url(cls, url: str): if path.is_absolute(): path = path.relative_to(path.root) - return cls(Bucket=url.netloc, Key=str(path)) + instance = cls(Bucket=url.netloc, Key=str(path)) + return instance class CloudStorage: @@ -98,6 +133,9 @@ def __init__(self, user: CurrentUser): self.aws = AmazonCloudStorageClient() def put(self, source: UploadFile, basename: Path) -> SimpleStorageName: + logger.info( + f"[AmazonCloudStorage.put] Starting file upload | {{'user_id': '{self.user.id}', 'filename': '{source.filename}', 'basename': '{basename}'}}" + ) key = Path(str(self.user.id), basename) destination = SimpleStorageName(str(key)) @@ -111,30 +149,68 @@ def put(self, source: UploadFile, basename: Path) -> SimpleStorageName: }, **kwargs, ) + logger.info( + f"[AmazonCloudStorage.put] File uploaded successfully | {{'user_id': '{self.user.id}', 'bucket': '{destination.Bucket}', 'key': '{destination.Key}'}}" + ) except ClientError as err: + logger.error( + f"[AmazonCloudStorage.put] AWS upload error | {{'user_id': '{self.user.id}', 'bucket': '{destination.Bucket}', 'key': '{destination.Key}', 'error': '{str(err)}'}}" + ) raise CloudStorageError(f'AWS Error: "{err}"') from err return destination def stream(self, url: str) -> StreamingBody: + logger.info( + f"[AmazonCloudStorage.stream] Starting file stream | {{'user_id': '{self.user.id}', 'url': '{url}'}}" + ) name = SimpleStorageName.from_url(url) kwargs = asdict(name) try: - return self.aws.client.get_object(**kwargs).get("Body") + body = self.aws.client.get_object(**kwargs).get("Body") + logger.info( + f"[AmazonCloudStorage.stream] File streamed successfully | {{'user_id': '{self.user.id}', 'bucket': '{name.Bucket}', 'key': '{name.Key}'}}" + ) + return body except ClientError as err: + logger.error( + f"[AmazonCloudStorage.stream] AWS stream error | {{'user_id': '{self.user.id}', 'bucket': '{name.Bucket}', 'key': '{name.Key}', 'error': '{str(err)}'}}" + ) raise CloudStorageError(f'AWS Error: "{err}" ({url})') from err def get_file_size_kb(self, url: str) -> float: + logger.info( + f"[AmazonCloudStorage.get_file_size_kb] Starting file size retrieval | {{'user_id': '{self.user.id}', 'url': '{url}'}}" + ) name = SimpleStorageName.from_url(url) kwargs = asdict(name) - response = self.aws.client.head_object(**kwargs) - size_bytes = response["ContentLength"] - return round(size_bytes / 1024, 2) + try: + response = self.aws.client.head_object(**kwargs) + size_bytes = response["ContentLength"] + size_kb = round(size_bytes / 1024, 2) + logger.info( + f"[AmazonCloudStorage.get_file_size_kb] File size retrieved successfully | {{'user_id': '{self.user.id}', 'bucket': '{name.Bucket}', 'key': '{name.Key}', 'size_kb': {size_kb}}}" + ) + return size_kb + except ClientError as err: + logger.error( + f"[AmazonCloudStorage.get_file_size_kb] AWS head object error | {{'user_id': '{self.user.id}', 'bucket': '{name.Bucket}', 'key': '{name.Key}', 'error': '{str(err)}'}}" + ) + raise CloudStorageError(f'AWS Error: "{err}" ({url})') from err def delete(self, url: str) -> None: + logger.info( + f"[AmazonCloudStorage.delete] Starting file deletion | {{'user_id': '{self.user.id}', 'url': '{url}'}}" + ) name = SimpleStorageName.from_url(url) kwargs = asdict(name) try: self.aws.client.delete_object(**kwargs) + logger.info( + f"[AmazonCloudStorage.delete] File deleted successfully | {{'user_id': '{self.user.id}', 'bucket': '{name.Bucket}', 'key': '{name.Key}'}}" + ) except ClientError as err: - raise CloudStorageError(f'AWS Error: "{err}" ({url})') from err + logger.error( + f"[AmazonCloudStorage.delete] AWS delete error | {{'user_id': '{self.user.id}', 'bucket': '{name.Bucket}', 'key': '{name.Key}', 'error': '{str(err)}'}}" + ) + raise CloudStorageError(f'AWS Error: "{err}" ({url})') from err \ No newline at end of file diff --git a/backend/app/core/logger.py b/backend/app/core/logger.py index b51d207f..bc4179b7 100644 --- a/backend/app/core/logger.py +++ b/backend/app/core/logger.py @@ -4,18 +4,28 @@ from app.core.config import settings LOG_DIR = settings.LOG_DIR -if not os.path.exists(LOG_DIR): - os.makedirs(LOG_DIR) +os.makedirs(LOG_DIR, exist_ok=True) LOG_FILE_PATH = os.path.join(LOG_DIR, "app.log") LOGGING_LEVEL = logging.INFO -LOGGING_FORMAT = "%(asctime)s - %(name)s - %(levelname)s - %(message)s" +LOGGING_FORMAT = "%(asctime)s - %(levelname)s - %(name)s - %(message)s" -logging.basicConfig(level=LOGGING_LEVEL, format=LOGGING_FORMAT) +# Create root logger +logger = logging.getLogger() +logger.setLevel(LOGGING_LEVEL) -file_handler = RotatingFileHandler(LOG_FILE_PATH, maxBytes=10485760, backupCount=5) -file_handler.setLevel(LOGGING_LEVEL) -file_handler.setFormatter(logging.Formatter(LOGGING_FORMAT)) +# Formatter +formatter = logging.Formatter(LOGGING_FORMAT) -logging.getLogger("").addHandler(file_handler) +# Stream handler (console) +stream_handler = logging.StreamHandler() +stream_handler.setFormatter(formatter) +logger.addHandler(stream_handler) + +# Rotating file handler +file_handler = RotatingFileHandler( + LOG_FILE_PATH, maxBytes=10 * 1024 * 1024, backupCount=5 +) +file_handler.setFormatter(formatter) +logger.addHandler(file_handler) \ No newline at end of file diff --git a/backend/app/core/middleware.py b/backend/app/core/middleware.py new file mode 100644 index 00000000..674c2d88 --- /dev/null +++ b/backend/app/core/middleware.py @@ -0,0 +1,22 @@ +import logging +import time +from fastapi import Request, Response + +logger = logging.getLogger("http_request_logger") + + +async def http_request_logger(request: Request, call_next) -> Response: + start_time = time.time() + try: + response = await call_next(request) + except Exception as e: + logger.exception("Unhandled exception during request") + raise + + process_time = (time.time() - start_time) * 1000 # ms + client_ip = request.client.host if request.client else "unknown" + + logger.info( + f"{request.method} {request.url.path} - {response.status_code} [{process_time:.2f}ms]" + ) + return response \ No newline at end of file diff --git a/backend/app/crud/api_key.py b/backend/app/crud/api_key.py index e787514f..95e06304 100644 --- a/backend/app/crud/api_key.py +++ b/backend/app/crud/api_key.py @@ -1,5 +1,6 @@ import uuid import secrets +import logging from sqlmodel import Session, select from app.core.security import ( get_password_hash, @@ -11,6 +12,8 @@ from app.core.exception_handlers import HTTPException from app.models.api_key import APIKey, APIKeyPublic +logger = logging.getLogger(__name__) + def generate_api_key() -> tuple[str, str]: """Generate a new API key and its hash.""" @@ -48,6 +51,9 @@ def create_api_key( api_key_dict = api_key.model_dump() api_key_dict["key"] = raw_key # Return the raw key to the user + logger.info( + f"[create_api_key] API key creation completed | {{'api_key_id': {api_key.id}, 'user_id': {user_id}, 'project_id': {project_id}}}" + ) return APIKeyPublic.model_validate(api_key_dict) @@ -66,8 +72,9 @@ def get_api_key(session: Session, api_key_id: int) -> APIKeyPublic | None: # Decrypt the key decrypted_key = decrypt_api_key(api_key.key) api_key_dict["key"] = decrypted_key - return APIKeyPublic.model_validate(api_key_dict) + + logger.warning(f"[get_api_key] API key not found | {{'api_key_id': {api_key_id}}}") return None @@ -77,12 +84,21 @@ def delete_api_key(session: Session, api_key_id: int) -> None: """ api_key = session.get(APIKey, api_key_id) + if not api_key: + logger.warning( + f"[delete_api_key] API key not found | {{'api_key_id': {api_key_id}}}" + ) + return + api_key.is_deleted = True api_key.deleted_at = now() api_key.updated_at = now() session.add(api_key) session.commit() + logger.info( + f"[delete_api_key] API key soft deleted successfully | {{'api_key_id': {api_key_id}}}" + ) def get_api_key_by_value(session: Session, api_key_value: str) -> APIKeyPublic | None: @@ -97,10 +113,12 @@ def get_api_key_by_value(session: Session, api_key_value: str) -> APIKeyPublic | decrypted_key = decrypt_api_key(api_key.key) if api_key_value == decrypted_key: api_key_dict = api_key.model_dump() - api_key_dict["key"] = decrypted_key - return APIKeyPublic.model_validate(api_key_dict) + + logger.warning( + f"[get_api_key_by_value] API key not found | {{'action': 'not_found'}}" + ) return None @@ -122,6 +140,9 @@ def get_api_key_by_project_user( api_key_dict["key"] = decrypt_api_key(api_key.key) return APIKeyPublic.model_validate(api_key_dict) + logger.warning( + f"[get_api_key_by_project_user] API key not found | {{'project_id': {project_id}, 'user_id': '{user_id}'}}" + ) return None @@ -140,4 +161,4 @@ def get_api_keys_by_project(session: Session, project_id: int) -> list[APIKeyPub key_dict["key"] = decrypt_api_key(key.key) result.append(APIKeyPublic.model_validate(key_dict)) - return result + return result \ No newline at end of file diff --git a/backend/app/crud/collection.py b/backend/app/crud/collection.py index fe3e1b03..5eb2edd2 100644 --- a/backend/app/crud/collection.py +++ b/backend/app/crud/collection.py @@ -1,3 +1,4 @@ +import logging import functools as ft from uuid import UUID from typing import Optional @@ -10,6 +11,8 @@ from .document_collection import DocumentCollectionCrud +logger = logging.getLogger(__name__) + class CollectionCrud: def __init__(self, session: Session, owner_id: int): @@ -24,11 +27,17 @@ def _update(self, collection: Collection): self.owner_id, collection.owner_id, ) + logger.error( + f"[CollectionCrud._update] Permission error | {{'collection_id': '{collection.id}', 'error': '{err}'}}" + ) raise PermissionError(err) self.session.add(collection) self.session.commit() self.session.refresh(collection) + logger.info( + f"[CollectionCrud._update] Collection updated successfully | {{'collection_id': '{collection.id}'}}" + ) return collection @@ -41,6 +50,9 @@ def _exists(self, collection: Collection): ) .scalar() ) + logger.info( + f"[CollectionCrud._exists] Existence check completed | {{'llm_service_id': '{collection.llm_service_id}', 'exists': {bool(present)}}}" + ) return bool(present) @@ -73,7 +85,8 @@ def read_one(self, collection_id: UUID): ) ) - return self.session.exec(statement).one() + collection = self.session.exec(statement).one() + return collection def read_all(self): statement = select(Collection).where( @@ -83,20 +96,31 @@ def read_all(self): ) ) - return self.session.exec(statement).all() + collections = self.session.exec(statement).all() + return collections @ft.singledispatchmethod def delete(self, model, remote): # remote should be an OpenAICrud + logger.error( + f"[CollectionCrud.delete] Invalid model type | {{'model_type': '{type(model).__name__}'}}" + ) raise TypeError(type(model)) @delete.register def _(self, model: Collection, remote): remote.delete(model.llm_service_id) model.deleted_at = now() - return self._update(model) + collection = self._update(model) + logger.info( + f"[CollectionCrud.delete] Collection deleted successfully | {{'collection_id': '{model.id}'}}" + ) + return collection @delete.register def _(self, model: Document, remote): + logger.info( + f"[CollectionCrud.delete] Starting document deletion from collections | {{'document_id': '{model.id}'}}" + ) statement = ( select(Collection) .join( @@ -110,5 +134,8 @@ def _(self, model: Document, remote): for c in self.session.execute(statement): self.delete(c.Collection, remote) self.session.refresh(model) + logger.info( + f"[CollectionCrud.delete] Document deletion from collections completed | {{'document_id': '{model.id}'}}" + ) - return model + return model \ No newline at end of file diff --git a/backend/app/crud/document.py b/backend/app/crud/document.py index ecb81890..615c1741 100644 --- a/backend/app/crud/document.py +++ b/backend/app/crud/document.py @@ -1,3 +1,4 @@ +import logging from uuid import UUID from typing import Optional, List @@ -7,6 +8,8 @@ from app.core.util import now from app.core.exception_handlers import HTTPException +logger = logging.getLogger(__name__) + class DocumentCrud: def __init__(self, session: Session, owner_id: int): @@ -23,6 +26,9 @@ def read_one(self, doc_id: UUID): result = self.session.exec(statement).one_or_none() if result is None: + logger.warning( + f"[DocumentCrud.read_one] Document not found | {{'doc_id': '{doc_id}', 'owner_id': {self.owner_id}}}" + ) raise HTTPException(status_code=404, detail="Document not found") return result @@ -40,14 +46,21 @@ def read_many( ) if skip is not None: if skip < 0: + logger.error( + f"[DocumentCrud.read_many] Invalid skip value | {{'owner_id': {self.owner_id}, 'skip': {skip}, 'error': 'Negative skip'}}" + ) raise ValueError(f"Negative skip: {skip}") statement = statement.offset(skip) if limit is not None: if limit < 0: + logger.error( + f"[DocumentCrud.read_many] Invalid limit value | {{'owner_id': {self.owner_id}, 'limit': {limit}, 'error': 'Negative limit'}}" + ) raise ValueError(f"Negative limit: {limit}") statement = statement.limit(limit) - return self.session.exec(statement).all() + documents = self.session.exec(statement).all() + return documents def read_each(self, doc_ids: List[UUID]): statement = select(Document).where( @@ -60,6 +73,9 @@ def read_each(self, doc_ids: List[UUID]): (m, n) = map(len, (results, doc_ids)) if m != n: + logger.error( + f"[DocumentCrud.read_each] Mismatch in retrieved documents | {{'owner_id': {self.owner_id}, 'requested_count': {n}, 'retrieved_count': {m}}}" + ) raise ValueError(f"Requested {n} retrieved {m}") return results @@ -72,6 +88,9 @@ def update(self, document: Document): self.owner_id, document.owner_id, ) + logger.error( + f"[DocumentCrud.update] Permission error | {{'doc_id': '{document.id}', 'error': '{error}'}}" + ) raise PermissionError(error) document.updated_at = now() @@ -79,6 +98,9 @@ def update(self, document: Document): self.session.add(document) self.session.commit() self.session.refresh(document) + logger.info( + f"[DocumentCrud.update] Document updated successfully | {{'doc_id': '{document.id}', 'owner_id': {self.owner_id}}}" + ) return document @@ -87,4 +109,8 @@ def delete(self, doc_id: UUID): document.deleted_at = now() document.updated_at = now() - return self.update(document) + updated_document = self.update(document) + logger.info( + f"[DocumentCrud.delete] Document deleted successfully | {{'doc_id': '{doc_id}', 'owner_id': {self.owner_id}}}" + ) + return updated_document \ No newline at end of file diff --git a/backend/app/crud/rag/open_ai.py b/backend/app/crud/rag/open_ai.py index ae2f58c7..3afda22a 100644 --- a/backend/app/crud/rag/open_ai.py +++ b/backend/app/crud/rag/open_ai.py @@ -11,6 +11,8 @@ from app.core.config import settings from app.models import Document +logger = logging.getLogger(__name__) + def vs_ls(client: OpenAI, vector_store_id: str): kwargs = {} @@ -43,13 +45,24 @@ def __str__(self): return type(self).__name__ def __call__(self, resource, retries=1): + logger.info( + f"[ResourceCleaner.call] Starting resource cleanup | {{'cleaner_type': '{self}', 'resource': '{resource}', 'retries': {retries}}}" + ) for i in range(retries): try: self.clean(resource) + logger.info( + f"[ResourceCleaner.call] Resource cleaned successfully | {{'cleaner_type': '{self}', 'resource': '{resource}'}}" + ) return except OpenAIError as err: - logging.error(err) + logger.error( + f"[ResourceCleaner.call] OpenAI error during cleanup | {{'cleaner_type': '{self}', 'resource': '{resource}', 'error': '{str(err)}'}}" + ) + logger.warning( + f"[ResourceCleaner.call] Cleanup failure after retries | {{'cleaner_type': '{self}', 'resource': '{resource}'}}" + ) warnings.warn(f"[{self} {resource}] Cleanup failure") def clean(self, resource): @@ -58,13 +71,22 @@ def clean(self, resource): class AssistantCleaner(ResourceCleaner): def clean(self, resource): + logger.info( + f"[AssistantCleaner.clean] Deleting assistant | {{'assistant_id': '{resource}'}}" + ) self.client.beta.assistants.delete(resource) class VectorStoreCleaner(ResourceCleaner): def clean(self, resource): + logger.info( + f"[VectorStoreCleaner.clean] Starting vector store cleanup | {{'vector_store_id': '{resource}'}}" + ) for i in vs_ls(self.client, resource): self.client.files.delete(i.id) + logger.info( + f"[VectorStoreCleaner.clean] Deleting vector store | {{'vector_store_id': '{resource}'}}" + ) self.client.vector_stores.delete(resource) @@ -75,9 +97,19 @@ def __init__(self, client=None): class OpenAIVectorStoreCrud(OpenAICrud): def create(self): - return self.client.vector_stores.create() + logger.info( + f"[OpenAIVectorStoreCrud.create] Creating vector store | {{'action': 'create'}}" + ) + vector_store = self.client.vector_stores.create() + logger.info( + f"[OpenAIVectorStoreCrud.create] Vector store created | {{'vector_store_id': '{vector_store.id}'}}" + ) + return vector_store def read(self, vector_store_id: str): + logger.info( + f"[OpenAIVectorStoreCrud.read] Reading files from vector store | {{'vector_store_id': '{vector_store_id}'}}" + ) yield from vs_ls(self.client, vector_store_id) def update( @@ -97,10 +129,16 @@ def update( files.append(f_obj) + logger.info( + f"[OpenAIVectorStoreCrud.update] Uploading files to vector store | {{'vector_store_id': '{vector_store_id}', 'file_count': {len(files)}}}" + ) req = self.client.vector_stores.file_batches.upload_and_poll( vector_store_id=vector_store_id, files=files, ) + logger.info( + f"[OpenAIVectorStoreCrud.update] File upload completed | {{'vector_store_id': '{vector_store_id}', 'completed_files': {req.file_counts.completed}, 'total_files': {req.file_counts.total}}}" + ) if req.file_counts.completed != req.file_counts.total: view = {x.fname: x for x in docs} for i in self.read(vector_store_id): @@ -112,25 +150,40 @@ def update( "error": "OpenAI document processing error", "documents": list(view.values()), } + logger.error( + f"[OpenAIVectorStoreCrud.update] Document processing error | {{'vector_store_id': '{vector_store_id}', 'error': '{error['error']}', 'failed_documents': {len(error['documents'])}}}" + ) raise InterruptedError(json.dumps(error, cls=BaseModelEncoder)) while files: f_obj = files.pop() f_obj.close() + logger.info( + f"[OpenAIVectorStoreCrud.update] Closed file stream | {{'vector_store_id': '{vector_store_id}', 'filename': '{f_obj.name}'}}" + ) yield from docs def delete(self, vector_store_id: str, retries: int = 3): if retries < 1: + logger.error( + f"[OpenAIVectorStoreCrud.delete] Invalid retries value | {{'vector_store_id': '{vector_store_id}', 'retries': {retries}}}" + ) raise ValueError("Retries must be greater-than 1") cleaner = VectorStoreCleaner(self.client) cleaner(vector_store_id) + logger.info( + f"[OpenAIVectorStoreCrud.delete] Vector store deleted | {{'vector_store_id': '{vector_store_id}'}}" + ) class OpenAIAssistantCrud(OpenAICrud): def create(self, vector_store_id: str, **kwargs): - return self.client.beta.assistants.create( + logger.info( + f"[OpenAIAssistantCrud.create] Creating assistant | {{'vector_store_id': '{vector_store_id}'}}" + ) + assistant = self.client.beta.assistants.create( tools=[ { "type": "file_search", @@ -145,8 +198,15 @@ def create(self, vector_store_id: str, **kwargs): }, **kwargs, ) + logger.info( + f"[OpenAIAssistantCrud.create] Assistant created | {{'assistant_id': '{assistant.id}', 'vector_store_id': '{vector_store_id}'}}" + ) + return assistant def delete(self, assistant_id: str): + logger.info( + f"[OpenAIAssistantCrud.delete] Starting assistant deletion | {{'assistant_id': '{assistant_id}'}}" + ) assistant = self.client.beta.assistants.retrieve(assistant_id) vector_stores = assistant.tool_resources.file_search.vector_store_ids try: @@ -154,13 +214,21 @@ def delete(self, assistant_id: str): except ValueError as err: if vector_stores: names = ", ".join(vector_stores) - msg = f"Too many attached vector stores: {names}" + logger.error( + f"[OpenAIAssistantCrud.delete] Too many vector stores attached | {{'assistant_id': '{assistant_id}', 'vector_stores': '{names}'}}" + ) + raise ValueError(f"Too many attached vector stores: {names}") else: - msg = "No vector stores found" - raise ValueError(msg) + logger.error( + f"[OpenAIAssistantCrud.delete] No vector stores found | {{'assistant_id': '{assistant_id}'}}" + ) + raise ValueError("No vector stores found") v_crud = OpenAIVectorStoreCrud(self.client) v_crud.delete(vector_store_id) cleaner = AssistantCleaner(self.client) cleaner(assistant_id) + logger.info( + f"[OpenAIAssistantCrud.delete] Assistant deleted | {{'assistant_id': '{assistant_id}'}}" + ) \ No newline at end of file diff --git a/backend/app/main.py b/backend/app/main.py index 6ae1d33c..16d4deb3 100644 --- a/backend/app/main.py +++ b/backend/app/main.py @@ -1,11 +1,13 @@ import sentry_sdk -from fastapi import FastAPI, HTTPException +from fastapi import FastAPI from fastapi.routing import APIRoute from app.api.main import api_router from app.core.config import settings +import app.core.logger from app.core.exception_handlers import register_exception_handlers +from app.core.middleware import http_request_logger def custom_generate_unique_id(route: APIRoute) -> str: @@ -21,6 +23,8 @@ def custom_generate_unique_id(route: APIRoute) -> str: generate_unique_id_function=custom_generate_unique_id, ) +app.middleware("http")(http_request_logger) + app.include_router(api_router, prefix=settings.API_V1_STR) -register_exception_handlers(app) +register_exception_handlers(app) \ No newline at end of file From 5bcf0b77c463c6fb286abc42945416c40ed61408 Mon Sep 17 00:00:00 2001 From: Aviraj <100823015+avirajsingh7@users.noreply.github.com> Date: Tue, 24 Jun 2025 14:41:27 +0530 Subject: [PATCH 02/12] pre commit run --- backend/app/api/routes/api_keys.py | 2 +- backend/app/api/routes/collections.py | 6 ++---- backend/app/api/routes/documents.py | 2 +- backend/app/core/cloud/storage.py | 2 +- backend/app/core/logger.py | 2 +- backend/app/core/middleware.py | 2 +- backend/app/crud/api_key.py | 2 +- backend/app/crud/collection.py | 2 +- backend/app/crud/document.py | 2 +- backend/app/crud/rag/open_ai.py | 2 +- backend/app/main.py | 2 +- 11 files changed, 12 insertions(+), 14 deletions(-) diff --git a/backend/app/api/routes/api_keys.py b/backend/app/api/routes/api_keys.py index b4db29df..cd50e058 100644 --- a/backend/app/api/routes/api_keys.py +++ b/backend/app/api/routes/api_keys.py @@ -114,4 +114,4 @@ def revoke_key( delete_api_key(session, api_key_id) logger.info(f"[revoke_key] API key revoked | api_key_id={api_key_id}") - return APIResponse.success_response({"message": "API key revoked successfully"}) \ No newline at end of file + return APIResponse.success_response({"message": "API key revoked successfully"}) diff --git a/backend/app/api/routes/collections.py b/backend/app/api/routes/collections.py index c5fdecf4..5b260e5b 100644 --- a/backend/app/api/routes/collections.py +++ b/backend/app/api/routes/collections.py @@ -168,9 +168,7 @@ def fail(self, body): self(APIResponse.failure_response(body), "incomplete") def success(self, body): - logger.info( - f"[WebHookCallback.success] Callback succeeded" - ) + logger.info(f"[WebHookCallback.success] Callback succeeded") self(APIResponse.success_response(body), "complete") @@ -410,4 +408,4 @@ def collection_documents( document_collection_crud = DocumentCollectionCrud(session) collection = collection_crud.read_one(collection_id) data = document_collection_crud.read(collection, skip, limit) - return APIResponse.success_response(data) \ No newline at end of file + return APIResponse.success_response(data) diff --git a/backend/app/api/routes/documents.py b/backend/app/api/routes/documents.py index 5eb8f93d..d75c6920 100644 --- a/backend/app/api/routes/documents.py +++ b/backend/app/api/routes/documents.py @@ -124,4 +124,4 @@ def doc_info( ): crud = DocumentCrud(session, current_user.id) data = crud.read_one(doc_id) - return APIResponse.success_response(data) \ No newline at end of file + return APIResponse.success_response(data) diff --git a/backend/app/core/cloud/storage.py b/backend/app/core/cloud/storage.py index fa7ad193..b60fac70 100644 --- a/backend/app/core/cloud/storage.py +++ b/backend/app/core/cloud/storage.py @@ -213,4 +213,4 @@ def delete(self, url: str) -> None: logger.error( f"[AmazonCloudStorage.delete] AWS delete error | {{'user_id': '{self.user.id}', 'bucket': '{name.Bucket}', 'key': '{name.Key}', 'error': '{str(err)}'}}" ) - raise CloudStorageError(f'AWS Error: "{err}" ({url})') from err \ No newline at end of file + raise CloudStorageError(f'AWS Error: "{err}" ({url})') from err diff --git a/backend/app/core/logger.py b/backend/app/core/logger.py index bc4179b7..1ee17613 100644 --- a/backend/app/core/logger.py +++ b/backend/app/core/logger.py @@ -28,4 +28,4 @@ LOG_FILE_PATH, maxBytes=10 * 1024 * 1024, backupCount=5 ) file_handler.setFormatter(formatter) -logger.addHandler(file_handler) \ No newline at end of file +logger.addHandler(file_handler) diff --git a/backend/app/core/middleware.py b/backend/app/core/middleware.py index 674c2d88..4ad3d12e 100644 --- a/backend/app/core/middleware.py +++ b/backend/app/core/middleware.py @@ -19,4 +19,4 @@ async def http_request_logger(request: Request, call_next) -> Response: logger.info( f"{request.method} {request.url.path} - {response.status_code} [{process_time:.2f}ms]" ) - return response \ No newline at end of file + return response diff --git a/backend/app/crud/api_key.py b/backend/app/crud/api_key.py index 95e06304..56fa3045 100644 --- a/backend/app/crud/api_key.py +++ b/backend/app/crud/api_key.py @@ -161,4 +161,4 @@ def get_api_keys_by_project(session: Session, project_id: int) -> list[APIKeyPub key_dict["key"] = decrypt_api_key(key.key) result.append(APIKeyPublic.model_validate(key_dict)) - return result \ No newline at end of file + return result diff --git a/backend/app/crud/collection.py b/backend/app/crud/collection.py index 5eb2edd2..a5f5c7cb 100644 --- a/backend/app/crud/collection.py +++ b/backend/app/crud/collection.py @@ -138,4 +138,4 @@ def _(self, model: Document, remote): f"[CollectionCrud.delete] Document deletion from collections completed | {{'document_id': '{model.id}'}}" ) - return model \ No newline at end of file + return model diff --git a/backend/app/crud/document.py b/backend/app/crud/document.py index 615c1741..f092c51c 100644 --- a/backend/app/crud/document.py +++ b/backend/app/crud/document.py @@ -113,4 +113,4 @@ def delete(self, doc_id: UUID): logger.info( f"[DocumentCrud.delete] Document deleted successfully | {{'doc_id': '{doc_id}', 'owner_id': {self.owner_id}}}" ) - return updated_document \ No newline at end of file + return updated_document diff --git a/backend/app/crud/rag/open_ai.py b/backend/app/crud/rag/open_ai.py index 3afda22a..e1f2e761 100644 --- a/backend/app/crud/rag/open_ai.py +++ b/backend/app/crud/rag/open_ai.py @@ -231,4 +231,4 @@ def delete(self, assistant_id: str): cleaner(assistant_id) logger.info( f"[OpenAIAssistantCrud.delete] Assistant deleted | {{'assistant_id': '{assistant_id}'}}" - ) \ No newline at end of file + ) diff --git a/backend/app/main.py b/backend/app/main.py index 16d4deb3..09c6f0e9 100644 --- a/backend/app/main.py +++ b/backend/app/main.py @@ -27,4 +27,4 @@ def custom_generate_unique_id(route: APIRoute) -> str: app.include_router(api_router, prefix=settings.API_V1_STR) -register_exception_handlers(app) \ No newline at end of file +register_exception_handlers(app) From 5fad65028d1b86156acf9f2f6f65744a759c7c21 Mon Sep 17 00:00:00 2001 From: Aviraj <100823015+avirajsingh7@users.noreply.github.com> Date: Wed, 25 Jun 2025 14:48:56 +0530 Subject: [PATCH 03/12] remove unnecessary logs --- backend/app/core/cloud/storage.py | 9 --------- backend/app/crud/collection.py | 3 --- 2 files changed, 12 deletions(-) diff --git a/backend/app/core/cloud/storage.py b/backend/app/core/cloud/storage.py index b60fac70..426f6ff3 100644 --- a/backend/app/core/cloud/storage.py +++ b/backend/app/core/cloud/storage.py @@ -23,9 +23,6 @@ class CloudStorageError(Exception): class AmazonCloudStorageClient: @ft.cached_property def client(self): - logger.info( - f"[AmazonCloudStorageClient.client] Initializing S3 client | {{'region': '{settings.AWS_DEFAULT_REGION}'}}" - ) kwargs = {} cred_params = ( ("aws_access_key_id", "AWS_ACCESS_KEY_ID"), @@ -37,9 +34,6 @@ def client(self): kwargs[i] = os.environ.get(j, getattr(settings, j)) client = boto3.client("s3", **kwargs) - logger.info( - f"[AmazonCloudStorageClient.client] S3 client initialized | {{'region': '{settings.AWS_DEFAULT_REGION}'}}" - ) return client def create(self): @@ -179,9 +173,6 @@ def stream(self, url: str) -> StreamingBody: raise CloudStorageError(f'AWS Error: "{err}" ({url})') from err def get_file_size_kb(self, url: str) -> float: - logger.info( - f"[AmazonCloudStorage.get_file_size_kb] Starting file size retrieval | {{'user_id': '{self.user.id}', 'url': '{url}'}}" - ) name = SimpleStorageName.from_url(url) kwargs = asdict(name) try: diff --git a/backend/app/crud/collection.py b/backend/app/crud/collection.py index a5f5c7cb..b596ba32 100644 --- a/backend/app/crud/collection.py +++ b/backend/app/crud/collection.py @@ -118,9 +118,6 @@ def _(self, model: Collection, remote): @delete.register def _(self, model: Document, remote): - logger.info( - f"[CollectionCrud.delete] Starting document deletion from collections | {{'document_id': '{model.id}'}}" - ) statement = ( select(Collection) .join( From 63adfb80b6187d99f502e4d596d378e258012479 Mon Sep 17 00:00:00 2001 From: Aviraj <100823015+avirajsingh7@users.noreply.github.com> Date: Wed, 25 Jun 2025 15:20:31 +0530 Subject: [PATCH 04/12] remove unnessecary logs from storage.py --- backend/app/core/cloud/storage.py | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/backend/app/core/cloud/storage.py b/backend/app/core/cloud/storage.py index 426f6ff3..1a4c1fb9 100644 --- a/backend/app/core/cloud/storage.py +++ b/backend/app/core/cloud/storage.py @@ -37,15 +37,9 @@ def client(self): return client def create(self): - logger.info( - f"[AmazonCloudStorageClient.create] Checking/creating S3 bucket | {{'bucket': '{settings.AWS_S3_BUCKET}'}}" - ) try: # does the bucket exist... self.client.head_bucket(Bucket=settings.AWS_S3_BUCKET) - logger.info( - f"[AmazonCloudStorageClient.create] Bucket exists | {{'bucket': '{settings.AWS_S3_BUCKET}'}}" - ) except ValueError as err: logger.error( f"[AmazonCloudStorageClient.create] Invalid bucket configuration | {{'bucket': '{settings.AWS_S3_BUCKET}', 'error': '{str(err)}'}}" @@ -127,9 +121,6 @@ def __init__(self, user: CurrentUser): self.aws = AmazonCloudStorageClient() def put(self, source: UploadFile, basename: Path) -> SimpleStorageName: - logger.info( - f"[AmazonCloudStorage.put] Starting file upload | {{'user_id': '{self.user.id}', 'filename': '{source.filename}', 'basename': '{basename}'}}" - ) key = Path(str(self.user.id), basename) destination = SimpleStorageName(str(key)) @@ -155,9 +146,6 @@ def put(self, source: UploadFile, basename: Path) -> SimpleStorageName: return destination def stream(self, url: str) -> StreamingBody: - logger.info( - f"[AmazonCloudStorage.stream] Starting file stream | {{'user_id': '{self.user.id}', 'url': '{url}'}}" - ) name = SimpleStorageName.from_url(url) kwargs = asdict(name) try: @@ -190,9 +178,6 @@ def get_file_size_kb(self, url: str) -> float: raise CloudStorageError(f'AWS Error: "{err}" ({url})') from err def delete(self, url: str) -> None: - logger.info( - f"[AmazonCloudStorage.delete] Starting file deletion | {{'user_id': '{self.user.id}', 'url': '{url}'}}" - ) name = SimpleStorageName.from_url(url) kwargs = asdict(name) try: From bf18df886a3b3583cc1e7d809d3ac37a4f8e1017 Mon Sep 17 00:00:00 2001 From: Aviraj <100823015+avirajsingh7@users.noreply.github.com> Date: Thu, 26 Jun 2025 10:37:20 +0530 Subject: [PATCH 05/12] add mask to senstive data in logging --- backend/app/api/routes/collections.py | 6 ++- backend/app/core/cloud/storage.py | 53 +++++++++++++++------------ backend/app/utils.py | 12 ++++++ 3 files changed, 46 insertions(+), 25 deletions(-) diff --git a/backend/app/api/routes/collections.py b/backend/app/api/routes/collections.py index 5b260e5b..a7378e4a 100644 --- a/backend/app/api/routes/collections.py +++ b/backend/app/api/routes/collections.py @@ -135,13 +135,13 @@ def success(self, body): class SilentCallback(CallbackHandler): def fail(self, body): logger.info( - f"[SilentCallback.fail] Silent callback failure | {{'body': '{body}'}}" + f"[SilentCallback.fail] Silent callback failure" ) return def success(self, body): logger.info( - f"[SilentCallback.success] Silent callback success | {{'body': '{body}'}}" + f"[SilentCallback.success] Silent callback success" ) return @@ -309,6 +309,8 @@ def do_delete_collection( payload: ResponsePayload, ): if request.callback_url is None: + print("Using silent callback") + print(f"Payload: {payload}") callback = SilentCallback(payload) else: callback = WebHookCallback(request.callback_url, payload) diff --git a/backend/app/core/cloud/storage.py b/backend/app/core/cloud/storage.py index 1a4c1fb9..5431b116 100644 --- a/backend/app/core/cloud/storage.py +++ b/backend/app/core/cloud/storage.py @@ -12,6 +12,7 @@ from app.api.deps import CurrentUser from app.core.config import settings +from app.utils import mask_string logger = logging.getLogger(__name__) @@ -38,23 +39,24 @@ def client(self): def create(self): try: - # does the bucket exist... self.client.head_bucket(Bucket=settings.AWS_S3_BUCKET) except ValueError as err: logger.error( - f"[AmazonCloudStorageClient.create] Invalid bucket configuration | {{'bucket': '{settings.AWS_S3_BUCKET}', 'error': '{str(err)}'}}" + f"[AmazonCloudStorageClient.create] Invalid bucket configuration | " + f"{{'bucket': '{mask_string(settings.AWS_S3_BUCKET)}', 'error': '{str(err)}'}}" ) raise CloudStorageError(err) from err except ClientError as err: response = int(err.response["Error"]["Code"]) if response != 404: logger.error( - f"[AmazonCloudStorageClient.create] Unexpected AWS error | {{'bucket': '{settings.AWS_S3_BUCKET}', 'error': '{str(err)}', 'code': {response}}}" + f"[AmazonCloudStorageClient.create] Unexpected AWS error | " + f"{{'bucket': '{mask_string(settings.AWS_S3_BUCKET)}', 'error': '{str(err)}', 'code': {response}}}" ) raise CloudStorageError(err) from err - # ... if not create it logger.warning( - f"[AmazonCloudStorageClient.create] Bucket not found, creating | {{'bucket': '{settings.AWS_S3_BUCKET}'}}" + f"[AmazonCloudStorageClient.create] Bucket not found, creating | " + f"{{'bucket': '{mask_string(settings.AWS_S3_BUCKET)}'}}" ) try: self.client.create_bucket( @@ -64,11 +66,13 @@ def create(self): }, ) logger.info( - f"[AmazonCloudStorageClient.create] Bucket created successfully | {{'bucket': '{settings.AWS_S3_BUCKET}'}}" + f"[AmazonCloudStorageClient.create] Bucket created successfully | " + f"{{'bucket': '{mask_string(settings.AWS_S3_BUCKET)}'}}" ) except ClientError as create_err: logger.error( - f"[AmazonCloudStorageClient.create] Failed to create bucket | {{'bucket': '{settings.AWS_S3_BUCKET}', 'error': '{str(create_err)}'}}" + f"[AmazonCloudStorageClient.create] Failed to create bucket | " + f"{{'bucket': '{mask_string(settings.AWS_S3_BUCKET)}', 'error': '{str(create_err)}'}}" ) raise CloudStorageError(create_err) from create_err @@ -89,9 +93,7 @@ def to_url(self): } for k in ParseResult._fields: kwargs.setdefault(k) - - url = ParseResult(**kwargs) - return url + return ParseResult(**kwargs) @classmethod def from_url(cls, url: str): @@ -99,9 +101,7 @@ def from_url(cls, url: str): path = Path(url.path) if path.is_absolute(): path = path.relative_to(path.root) - - instance = cls(Bucket=url.netloc, Key=str(path)) - return instance + return cls(Bucket=url.netloc, Key=str(path)) class CloudStorage: @@ -123,23 +123,24 @@ def __init__(self, user: CurrentUser): def put(self, source: UploadFile, basename: Path) -> SimpleStorageName: key = Path(str(self.user.id), basename) destination = SimpleStorageName(str(key)) - kwargs = asdict(destination) + try: self.aws.client.upload_fileobj( source.file, ExtraArgs={ - # 'Metadata': self.user.model_dump(), "ContentType": source.content_type, }, **kwargs, ) logger.info( - f"[AmazonCloudStorage.put] File uploaded successfully | {{'user_id': '{self.user.id}', 'bucket': '{destination.Bucket}', 'key': '{destination.Key}'}}" + f"[AmazonCloudStorage.put] File uploaded successfully | " + f"{{'user_id': '{self.user.id}', 'bucket': '{mask_string(destination.Bucket)}', 'key': '{mask_string(destination.Key)}'}}" ) except ClientError as err: logger.error( - f"[AmazonCloudStorage.put] AWS upload error | {{'user_id': '{self.user.id}', 'bucket': '{destination.Bucket}', 'key': '{destination.Key}', 'error': '{str(err)}'}}" + f"[AmazonCloudStorage.put] AWS upload error | " + f"{{'user_id': '{self.user.id}', 'bucket': '{mask_string(destination.Bucket)}', 'key': '{mask_string(destination.Key)}', 'error': '{str(err)}'}}" ) raise CloudStorageError(f'AWS Error: "{err}"') from err @@ -151,12 +152,14 @@ def stream(self, url: str) -> StreamingBody: try: body = self.aws.client.get_object(**kwargs).get("Body") logger.info( - f"[AmazonCloudStorage.stream] File streamed successfully | {{'user_id': '{self.user.id}', 'bucket': '{name.Bucket}', 'key': '{name.Key}'}}" + f"[AmazonCloudStorage.stream] File streamed successfully | " + f"{{'user_id': '{self.user.id}', 'bucket': '{mask_string(name.Bucket)}', 'key': '{mask_string(name.Key)}'}}" ) return body except ClientError as err: logger.error( - f"[AmazonCloudStorage.stream] AWS stream error | {{'user_id': '{self.user.id}', 'bucket': '{name.Bucket}', 'key': '{name.Key}', 'error': '{str(err)}'}}" + f"[AmazonCloudStorage.stream] AWS stream error | " + f"{{'user_id': '{self.user.id}', 'bucket': '{mask_string(name.Bucket)}', 'key': '{mask_string(name.Key)}', 'error': '{str(err)}'}}" ) raise CloudStorageError(f'AWS Error: "{err}" ({url})') from err @@ -168,12 +171,14 @@ def get_file_size_kb(self, url: str) -> float: size_bytes = response["ContentLength"] size_kb = round(size_bytes / 1024, 2) logger.info( - f"[AmazonCloudStorage.get_file_size_kb] File size retrieved successfully | {{'user_id': '{self.user.id}', 'bucket': '{name.Bucket}', 'key': '{name.Key}', 'size_kb': {size_kb}}}" + f"[AmazonCloudStorage.get_file_size_kb] File size retrieved successfully | " + f"{{'user_id': '{self.user.id}', 'bucket': '{mask_string(name.Bucket)}', 'key': '{mask_string(name.Key)}', 'size_kb': {size_kb}}}" ) return size_kb except ClientError as err: logger.error( - f"[AmazonCloudStorage.get_file_size_kb] AWS head object error | {{'user_id': '{self.user.id}', 'bucket': '{name.Bucket}', 'key': '{name.Key}', 'error': '{str(err)}'}}" + f"[AmazonCloudStorage.get_file_size_kb] AWS head object error | " + f"{{'user_id': '{self.user.id}', 'bucket': '{mask_string(name.Bucket)}', 'key': '{mask_string(name.Key)}', 'error': '{str(err)}'}}" ) raise CloudStorageError(f'AWS Error: "{err}" ({url})') from err @@ -183,10 +188,12 @@ def delete(self, url: str) -> None: try: self.aws.client.delete_object(**kwargs) logger.info( - f"[AmazonCloudStorage.delete] File deleted successfully | {{'user_id': '{self.user.id}', 'bucket': '{name.Bucket}', 'key': '{name.Key}'}}" + f"[AmazonCloudStorage.delete] File deleted successfully | " + f"{{'user_id': '{self.user.id}', 'bucket': '{mask_string(name.Bucket)}', 'key': '{mask_string(name.Key)}'}}" ) except ClientError as err: logger.error( - f"[AmazonCloudStorage.delete] AWS delete error | {{'user_id': '{self.user.id}', 'bucket': '{name.Bucket}', 'key': '{name.Key}', 'error': '{str(err)}'}}" + f"[AmazonCloudStorage.delete] AWS delete error | " + f"{{'user_id': '{self.user.id}', 'bucket': '{mask_string(name.Bucket)}', 'key': '{mask_string(name.Key)}', 'error': '{str(err)}'}}" ) raise CloudStorageError(f'AWS Error: "{err}" ({url})') from err diff --git a/backend/app/utils.py b/backend/app/utils.py index 63712ebd..b7f7f793 100644 --- a/backend/app/utils.py +++ b/backend/app/utils.py @@ -151,6 +151,18 @@ def verify_password_reset_token(token: str) -> str | None: return None +def mask_string(value: str, mask_char: str = "*") -> str: + if not value: + return "" + + length = len(value) + num_mask = length // 2 + start = (length - num_mask) // 2 + end = start + num_mask + + return value[:start] + (mask_char * num_mask) + value[end:] + + @ft.singledispatch def load_description(filename: Path) -> str: if not filename.exists(): From 1a56355282e1c05c81e773cb21e27f269fa0931a Mon Sep 17 00:00:00 2001 From: Aviraj <100823015+avirajsingh7@users.noreply.github.com> Date: Thu, 26 Jun 2025 10:49:46 +0530 Subject: [PATCH 06/12] pre commit run --- backend/app/api/routes/collections.py | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/backend/app/api/routes/collections.py b/backend/app/api/routes/collections.py index a7378e4a..ffb47b17 100644 --- a/backend/app/api/routes/collections.py +++ b/backend/app/api/routes/collections.py @@ -134,15 +134,11 @@ def success(self, body): class SilentCallback(CallbackHandler): def fail(self, body): - logger.info( - f"[SilentCallback.fail] Silent callback failure" - ) + logger.info(f"[SilentCallback.fail] Silent callback failure") return def success(self, body): - logger.info( - f"[SilentCallback.success] Silent callback success" - ) + logger.info(f"[SilentCallback.success] Silent callback success") return @@ -309,8 +305,6 @@ def do_delete_collection( payload: ResponsePayload, ): if request.callback_url is None: - print("Using silent callback") - print(f"Payload: {payload}") callback = SilentCallback(payload) else: callback = WebHookCallback(request.callback_url, payload) From a1326350eab1045285acf225419c49300cf9a33f Mon Sep 17 00:00:00 2001 From: Aviraj <100823015+avirajsingh7@users.noreply.github.com> Date: Thu, 26 Jun 2025 15:44:22 +0530 Subject: [PATCH 07/12] warning to error in collection --- backend/app/api/routes/collections.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/app/api/routes/collections.py b/backend/app/api/routes/collections.py index ffb47b17..1057cbdd 100644 --- a/backend/app/api/routes/collections.py +++ b/backend/app/api/routes/collections.py @@ -319,7 +319,7 @@ def do_delete_collection( ) callback.success(data.model_dump(mode="json")) except (ValueError, PermissionError, SQLAlchemyError) as err: - logger.warning( + logger.error( f"[do_delete_collection] Failed to delete collection | {{'collection_id': '{request.collection_id}', 'error': '{str(err)}'}}" ) callback.fail(str(err)) From df7ad757b8dbb15e82b859c49ae98837f1d79bd2 Mon Sep 17 00:00:00 2001 From: Aviraj <100823015+avirajsingh7@users.noreply.github.com> Date: Thu, 26 Jun 2025 15:51:59 +0530 Subject: [PATCH 08/12] removed unnecessary logs --- backend/app/api/routes/api_keys.py | 1 - backend/app/api/routes/documents.py | 10 ---------- 2 files changed, 11 deletions(-) diff --git a/backend/app/api/routes/api_keys.py b/backend/app/api/routes/api_keys.py index cd50e058..125df075 100644 --- a/backend/app/api/routes/api_keys.py +++ b/backend/app/api/routes/api_keys.py @@ -113,5 +113,4 @@ def revoke_key( raise HTTPException(404, "API key not found or already deleted") delete_api_key(session, api_key_id) - logger.info(f"[revoke_key] API key revoked | api_key_id={api_key_id}") return APIResponse.success_response({"message": "API key revoked successfully"}) diff --git a/backend/app/api/routes/documents.py b/backend/app/api/routes/documents.py index d75c6920..03d2cd98 100644 --- a/backend/app/api/routes/documents.py +++ b/backend/app/api/routes/documents.py @@ -55,9 +55,6 @@ def upload_doc( object_store_url=str(object_store_url), ) data = crud.update(document) - logger.info( - f"[upload_doc] Document uploaded successfully | {{'user_id': '{current_user.id}', 'document_id': '{document_id}'}}" - ) return APIResponse.success_response(data) @@ -77,9 +74,6 @@ def remove_doc( document = d_crud.delete(doc_id) data = c_crud.delete(document, a_crud) - logger.info( - f"[remove_doc] Document deleted successfully | {{'user_id': '{current_user.id}', 'document_id': '{doc_id}'}}" - ) return APIResponse.success_response(data) @@ -105,10 +99,6 @@ def permanent_delete_doc( storage.delete(document.object_store_url) d_crud.delete(doc_id) - logger.info( - f"[permanent_delete_doc] Document permanently deleted from Cloud and soft deleted from DB | " - f"{{'user_id': '{current_user.id}', 'document_id': '{doc_id}'}}" - ) return APIResponse.success_response(document) From b031b49f2b7b1f0914511b45b500f7b58552d450 Mon Sep 17 00:00:00 2001 From: Aviraj <100823015+avirajsingh7@users.noreply.github.com> Date: Fri, 27 Jun 2025 11:56:53 +0530 Subject: [PATCH 09/12] Add exc_info=True for full trace in error --- backend/app/api/routes/collections.py | 25 ++++++++++++------------- backend/app/api/routes/responses.py | 9 ++++++--- backend/app/api/routes/threads.py | 2 +- backend/app/core/cloud/storage.py | 21 ++++++++++++++------- backend/app/crud/collection.py | 6 ++++-- backend/app/crud/document.py | 12 ++++++++---- backend/app/crud/rag/open_ai.py | 15 ++++++++++----- 7 files changed, 55 insertions(+), 35 deletions(-) diff --git a/backend/app/api/routes/collections.py b/backend/app/api/routes/collections.py index 1057cbdd..a8331047 100644 --- a/backend/app/api/routes/collections.py +++ b/backend/app/api/routes/collections.py @@ -160,7 +160,10 @@ def __call__(self, response: APIResponse, status: str): post_callback(self.url, response) def fail(self, body): - logger.error(f"[WebHookCallback.fail] Callback failed | {{'body': '{body}'}}") + logger.error( + f"[WebHookCallback.fail] Callback failed | {{'body': '{body}'}}", + exc_info=True, + ) self(APIResponse.failure_response(body), "incomplete") def success(self, body): @@ -173,15 +176,8 @@ def _backout(crud: OpenAIAssistantCrud, assistant_id: str): crud.delete(assistant_id) except OpenAIError as err: logger.error( - f"[backout] Failed to delete assistant | {{'assistant_id': '{assistant_id}', 'error': '{str(err)}'}}" - ) - warnings.warn( - ": ".join( - [ - f"Unable to remove assistant {assistant_id}", - "platform DB may be out of sync with OpenAI", - ] - ) + f"[backout] Failed to delete assistant | {{'assistant_id': '{assistant_id}', 'error': '{str(err)}'}}", + exc_info=True, ) @@ -241,7 +237,8 @@ def do_create_collection( except Exception as err: logger.error( - f"[do_create_collection] Collection Creation Failed | {{'collection_id': '{payload.key}', 'error': '{str(err)}'}}" + f"[do_create_collection] Collection Creation Failed | {{'collection_id': '{payload.key}', 'error': '{str(err)}'}}", + exc_info=True, ) if "assistant" in locals(): _backout(assistant_crud, assistant.id) @@ -320,12 +317,14 @@ def do_delete_collection( callback.success(data.model_dump(mode="json")) except (ValueError, PermissionError, SQLAlchemyError) as err: logger.error( - f"[do_delete_collection] Failed to delete collection | {{'collection_id': '{request.collection_id}', 'error': '{str(err)}'}}" + f"[do_delete_collection] Failed to delete collection | {{'collection_id': '{request.collection_id}', 'error': '{str(err)}'}}", + exc_info=True, ) callback.fail(str(err)) except Exception as err: logger.error( - f"[do_delete_collection] Unexpected error during deletion | {{'collection_id': '{request.collection_id}', 'error': '{str(err)}', 'error_type': '{type(err).__name__}'}}" + f"[do_delete_collection] Unexpected error during deletion | {{'collection_id': '{request.collection_id}', 'error': '{str(err)}', 'error_type': '{type(err).__name__}'}}", + exc_info=True, ) callback.fail(str(err)) diff --git a/backend/app/api/routes/responses.py b/backend/app/api/routes/responses.py index ed098af6..efa0472a 100644 --- a/backend/app/api/routes/responses.py +++ b/backend/app/api/routes/responses.py @@ -163,7 +163,8 @@ def process_response( except openai.OpenAIError as e: error_message = handle_openai_error(e) logger.error( - f"OpenAI API error during response processing: {error_message}, project_id={request.project_id}, organization_id={organization_id}" + f"OpenAI API error during response processing: {error_message}, project_id={request.project_id}, organization_id={organization_id}", + exc_info=True, ) callback_response = ResponsesAPIResponse.failure_response(error=error_message) @@ -197,7 +198,8 @@ async def responses( ) if not assistant: logger.error( - f"Assistant not found: assistant_id={request.assistant_id}, project_id={request.project_id}, organization_id={_current_user.organization_id}" + f"Assistant not found: assistant_id={request.assistant_id}, project_id={request.project_id}, organization_id={_current_user.organization_id}", + exc_info=True, ) raise HTTPException( status_code=404, @@ -212,7 +214,8 @@ async def responses( ) if not credentials or "api_key" not in credentials: logger.error( - f"OpenAI API key not configured for org_id={_current_user.organization_id}, project_id={request.project_id}, organization_id={_current_user.organization_id}" + f"OpenAI API key not configured for org_id={_current_user.organization_id}, project_id={request.project_id}, organization_id={_current_user.organization_id}", + exc_info=True, ) return { "success": False, diff --git a/backend/app/api/routes/threads.py b/backend/app/api/routes/threads.py index 2019d0e8..6ef88d4d 100644 --- a/backend/app/api/routes/threads.py +++ b/backend/app/api/routes/threads.py @@ -43,7 +43,7 @@ def send_callback(callback_url: str, data: dict): response.raise_for_status() return True except requests.RequestException as e: - logger.error(f"Callback failed: {str(e)}") + logger.error(f"Callback failed: {str(e)}", exc_info=True) return False diff --git a/backend/app/core/cloud/storage.py b/backend/app/core/cloud/storage.py index 5431b116..0c225d91 100644 --- a/backend/app/core/cloud/storage.py +++ b/backend/app/core/cloud/storage.py @@ -43,7 +43,8 @@ def create(self): except ValueError as err: logger.error( f"[AmazonCloudStorageClient.create] Invalid bucket configuration | " - f"{{'bucket': '{mask_string(settings.AWS_S3_BUCKET)}', 'error': '{str(err)}'}}" + f"{{'bucket': '{mask_string(settings.AWS_S3_BUCKET)}', 'error': '{str(err)}'}}", + exc_info=True, ) raise CloudStorageError(err) from err except ClientError as err: @@ -51,7 +52,8 @@ def create(self): if response != 404: logger.error( f"[AmazonCloudStorageClient.create] Unexpected AWS error | " - f"{{'bucket': '{mask_string(settings.AWS_S3_BUCKET)}', 'error': '{str(err)}', 'code': {response}}}" + f"{{'bucket': '{mask_string(settings.AWS_S3_BUCKET)}', 'error': '{str(err)}', 'code': {response}}}", + exc_info=True, ) raise CloudStorageError(err) from err logger.warning( @@ -72,7 +74,8 @@ def create(self): except ClientError as create_err: logger.error( f"[AmazonCloudStorageClient.create] Failed to create bucket | " - f"{{'bucket': '{mask_string(settings.AWS_S3_BUCKET)}', 'error': '{str(create_err)}'}}" + f"{{'bucket': '{mask_string(settings.AWS_S3_BUCKET)}', 'error': '{str(create_err)}'}}", + exc_info=True, ) raise CloudStorageError(create_err) from create_err @@ -140,7 +143,8 @@ def put(self, source: UploadFile, basename: Path) -> SimpleStorageName: except ClientError as err: logger.error( f"[AmazonCloudStorage.put] AWS upload error | " - f"{{'user_id': '{self.user.id}', 'bucket': '{mask_string(destination.Bucket)}', 'key': '{mask_string(destination.Key)}', 'error': '{str(err)}'}}" + f"{{'user_id': '{self.user.id}', 'bucket': '{mask_string(destination.Bucket)}', 'key': '{mask_string(destination.Key)}', 'error': '{str(err)}'}}", + exc_info=True, ) raise CloudStorageError(f'AWS Error: "{err}"') from err @@ -159,7 +163,8 @@ def stream(self, url: str) -> StreamingBody: except ClientError as err: logger.error( f"[AmazonCloudStorage.stream] AWS stream error | " - f"{{'user_id': '{self.user.id}', 'bucket': '{mask_string(name.Bucket)}', 'key': '{mask_string(name.Key)}', 'error': '{str(err)}'}}" + f"{{'user_id': '{self.user.id}', 'bucket': '{mask_string(name.Bucket)}', 'key': '{mask_string(name.Key)}', 'error': '{str(err)}'}}", + exc_info=True, ) raise CloudStorageError(f'AWS Error: "{err}" ({url})') from err @@ -178,7 +183,8 @@ def get_file_size_kb(self, url: str) -> float: except ClientError as err: logger.error( f"[AmazonCloudStorage.get_file_size_kb] AWS head object error | " - f"{{'user_id': '{self.user.id}', 'bucket': '{mask_string(name.Bucket)}', 'key': '{mask_string(name.Key)}', 'error': '{str(err)}'}}" + f"{{'user_id': '{self.user.id}', 'bucket': '{mask_string(name.Bucket)}', 'key': '{mask_string(name.Key)}', 'error': '{str(err)}'}}", + exc_info=True, ) raise CloudStorageError(f'AWS Error: "{err}" ({url})') from err @@ -194,6 +200,7 @@ def delete(self, url: str) -> None: except ClientError as err: logger.error( f"[AmazonCloudStorage.delete] AWS delete error | " - f"{{'user_id': '{self.user.id}', 'bucket': '{mask_string(name.Bucket)}', 'key': '{mask_string(name.Key)}', 'error': '{str(err)}'}}" + f"{{'user_id': '{self.user.id}', 'bucket': '{mask_string(name.Bucket)}', 'key': '{mask_string(name.Key)}', 'error': '{str(err)}'}}", + exc_info=True, ) raise CloudStorageError(f'AWS Error: "{err}" ({url})') from err diff --git a/backend/app/crud/collection.py b/backend/app/crud/collection.py index b596ba32..ffe4d75d 100644 --- a/backend/app/crud/collection.py +++ b/backend/app/crud/collection.py @@ -28,7 +28,8 @@ def _update(self, collection: Collection): collection.owner_id, ) logger.error( - f"[CollectionCrud._update] Permission error | {{'collection_id': '{collection.id}', 'error': '{err}'}}" + f"[CollectionCrud._update] Permission error | {{'collection_id': '{collection.id}', 'error': '{err}'}}", + exc_info=True, ) raise PermissionError(err) @@ -102,7 +103,8 @@ def read_all(self): @ft.singledispatchmethod def delete(self, model, remote): # remote should be an OpenAICrud logger.error( - f"[CollectionCrud.delete] Invalid model type | {{'model_type': '{type(model).__name__}'}}" + f"[CollectionCrud.delete] Invalid model type | {{'model_type': '{type(model).__name__}'}}", + exc_info=True, ) raise TypeError(type(model)) diff --git a/backend/app/crud/document.py b/backend/app/crud/document.py index f092c51c..06c079d5 100644 --- a/backend/app/crud/document.py +++ b/backend/app/crud/document.py @@ -47,14 +47,16 @@ def read_many( if skip is not None: if skip < 0: logger.error( - f"[DocumentCrud.read_many] Invalid skip value | {{'owner_id': {self.owner_id}, 'skip': {skip}, 'error': 'Negative skip'}}" + f"[DocumentCrud.read_many] Invalid skip value | {{'owner_id': {self.owner_id}, 'skip': {skip}, 'error': 'Negative skip'}}", + exc_info=True, ) raise ValueError(f"Negative skip: {skip}") statement = statement.offset(skip) if limit is not None: if limit < 0: logger.error( - f"[DocumentCrud.read_many] Invalid limit value | {{'owner_id': {self.owner_id}, 'limit': {limit}, 'error': 'Negative limit'}}" + f"[DocumentCrud.read_many] Invalid limit value | {{'owner_id': {self.owner_id}, 'limit': {limit}, 'error': 'Negative limit'}}", + exc_info=True, ) raise ValueError(f"Negative limit: {limit}") statement = statement.limit(limit) @@ -74,7 +76,8 @@ def read_each(self, doc_ids: List[UUID]): (m, n) = map(len, (results, doc_ids)) if m != n: logger.error( - f"[DocumentCrud.read_each] Mismatch in retrieved documents | {{'owner_id': {self.owner_id}, 'requested_count': {n}, 'retrieved_count': {m}}}" + f"[DocumentCrud.read_each] Mismatch in retrieved documents | {{'owner_id': {self.owner_id}, 'requested_count': {n}, 'retrieved_count': {m}}}", + exc_info=True, ) raise ValueError(f"Requested {n} retrieved {m}") @@ -89,7 +92,8 @@ def update(self, document: Document): document.owner_id, ) logger.error( - f"[DocumentCrud.update] Permission error | {{'doc_id': '{document.id}', 'error': '{error}'}}" + f"[DocumentCrud.update] Permission error | {{'doc_id': '{document.id}', 'error': '{error}'}}", + exc_info=True, ) raise PermissionError(error) diff --git a/backend/app/crud/rag/open_ai.py b/backend/app/crud/rag/open_ai.py index e1f2e761..2fcfca37 100644 --- a/backend/app/crud/rag/open_ai.py +++ b/backend/app/crud/rag/open_ai.py @@ -57,7 +57,8 @@ def __call__(self, resource, retries=1): return except OpenAIError as err: logger.error( - f"[ResourceCleaner.call] OpenAI error during cleanup | {{'cleaner_type': '{self}', 'resource': '{resource}', 'error': '{str(err)}'}}" + f"[ResourceCleaner.call] OpenAI error during cleanup | {{'cleaner_type': '{self}', 'resource': '{resource}', 'error': '{str(err)}'}}", + exc_info=True, ) logger.warning( @@ -151,7 +152,8 @@ def update( "documents": list(view.values()), } logger.error( - f"[OpenAIVectorStoreCrud.update] Document processing error | {{'vector_store_id': '{vector_store_id}', 'error': '{error['error']}', 'failed_documents': {len(error['documents'])}}}" + f"[OpenAIVectorStoreCrud.update] Document processing error | {{'vector_store_id': '{vector_store_id}', 'error': '{error['error']}', 'failed_documents': {len(error['documents'])}}}", + exc_info=True, ) raise InterruptedError(json.dumps(error, cls=BaseModelEncoder)) @@ -167,7 +169,8 @@ def update( def delete(self, vector_store_id: str, retries: int = 3): if retries < 1: logger.error( - f"[OpenAIVectorStoreCrud.delete] Invalid retries value | {{'vector_store_id': '{vector_store_id}', 'retries': {retries}}}" + f"[OpenAIVectorStoreCrud.delete] Invalid retries value | {{'vector_store_id': '{vector_store_id}', 'retries': {retries}}}", + exc_info=True, ) raise ValueError("Retries must be greater-than 1") @@ -215,12 +218,14 @@ def delete(self, assistant_id: str): if vector_stores: names = ", ".join(vector_stores) logger.error( - f"[OpenAIAssistantCrud.delete] Too many vector stores attached | {{'assistant_id': '{assistant_id}', 'vector_stores': '{names}'}}" + f"[OpenAIAssistantCrud.delete] Too many vector stores attached | {{'assistant_id': '{assistant_id}', 'vector_stores': '{names}'}}", + exc_info=True, ) raise ValueError(f"Too many attached vector stores: {names}") else: logger.error( - f"[OpenAIAssistantCrud.delete] No vector stores found | {{'assistant_id': '{assistant_id}'}}" + f"[OpenAIAssistantCrud.delete] No vector stores found | {{'assistant_id': '{assistant_id}'}}", + exc_info=True, ) raise ValueError("No vector stores found") From 629a234ff0be0d99c26eda2f16f764d6d7a7a7f8 Mon Sep 17 00:00:00 2001 From: Aviraj <100823015+avirajsingh7@users.noreply.github.com> Date: Fri, 27 Jun 2025 12:38:13 +0530 Subject: [PATCH 10/12] use try catch properly --- backend/app/api/routes/collections.py | 5 +-- backend/app/api/routes/responses.py | 8 ++-- backend/app/crud/collection.py | 26 ++++++++----- backend/app/crud/document.py | 56 ++++++++++++++++----------- backend/app/crud/rag/open_ai.py | 47 ++++++++++++---------- 5 files changed, 80 insertions(+), 62 deletions(-) diff --git a/backend/app/api/routes/collections.py b/backend/app/api/routes/collections.py index a8331047..68e4b6a8 100644 --- a/backend/app/api/routes/collections.py +++ b/backend/app/api/routes/collections.py @@ -160,10 +160,7 @@ def __call__(self, response: APIResponse, status: str): post_callback(self.url, response) def fail(self, body): - logger.error( - f"[WebHookCallback.fail] Callback failed | {{'body': '{body}'}}", - exc_info=True, - ) + logger.warning(f"[WebHookCallback.fail] Callback failed | {{'body': '{body}'}}") self(APIResponse.failure_response(body), "incomplete") def success(self, body): diff --git a/backend/app/api/routes/responses.py b/backend/app/api/routes/responses.py index efa0472a..e7832cd5 100644 --- a/backend/app/api/routes/responses.py +++ b/backend/app/api/routes/responses.py @@ -197,9 +197,8 @@ async def responses( _session, request.assistant_id, _current_user.organization_id ) if not assistant: - logger.error( + logger.warning( f"Assistant not found: assistant_id={request.assistant_id}, project_id={request.project_id}, organization_id={_current_user.organization_id}", - exc_info=True, ) raise HTTPException( status_code=404, @@ -213,9 +212,8 @@ async def responses( project_id=request.project_id, ) if not credentials or "api_key" not in credentials: - logger.error( - f"OpenAI API key not configured for org_id={_current_user.organization_id}, project_id={request.project_id}, organization_id={_current_user.organization_id}", - exc_info=True, + logger.warning( + f"OpenAI API key not configured for org_id={_current_user.organization_id}, project_id={request.project_id}, organization_id={_current_user.organization_id}" ) return { "success": False, diff --git a/backend/app/crud/collection.py b/backend/app/crud/collection.py index ffe4d75d..b08ddcf5 100644 --- a/backend/app/crud/collection.py +++ b/backend/app/crud/collection.py @@ -27,11 +27,14 @@ def _update(self, collection: Collection): self.owner_id, collection.owner_id, ) - logger.error( - f"[CollectionCrud._update] Permission error | {{'collection_id': '{collection.id}', 'error': '{err}'}}", - exc_info=True, - ) - raise PermissionError(err) + try: + raise PermissionError(err) + except PermissionError as e: + logger.error( + f"[CollectionCrud._update] Permission error | {{'collection_id': '{collection.id}', 'error': '{str(e)}'}}", + exc_info=True, + ) + raise self.session.add(collection) self.session.commit() @@ -102,11 +105,14 @@ def read_all(self): @ft.singledispatchmethod def delete(self, model, remote): # remote should be an OpenAICrud - logger.error( - f"[CollectionCrud.delete] Invalid model type | {{'model_type': '{type(model).__name__}'}}", - exc_info=True, - ) - raise TypeError(type(model)) + try: + raise TypeError(type(model)) + except TypeError as err: + logger.error( + f"[CollectionCrud.delete] Invalid model type | {{'model_type': '{type(model).__name__}'}}", + exc_info=True, + ) + raise @delete.register def _(self, model: Collection, remote): diff --git a/backend/app/crud/document.py b/backend/app/crud/document.py index 06c079d5..4ee87c8b 100644 --- a/backend/app/crud/document.py +++ b/backend/app/crud/document.py @@ -44,22 +44,29 @@ def read_many( Document.deleted_at.is_(None), ) ) + if skip is not None: if skip < 0: - logger.error( - f"[DocumentCrud.read_many] Invalid skip value | {{'owner_id': {self.owner_id}, 'skip': {skip}, 'error': 'Negative skip'}}", - exc_info=True, - ) - raise ValueError(f"Negative skip: {skip}") + try: + raise ValueError(f"Negative skip: {skip}") + except ValueError as err: + logger.error( + f"[DocumentCrud.read_many] Invalid skip value | {{'owner_id': {self.owner_id}, 'skip': {skip}, 'error': '{str(err)}'}}", + exc_info=True, + ) + raise statement = statement.offset(skip) + if limit is not None: if limit < 0: - logger.error( - f"[DocumentCrud.read_many] Invalid limit value | {{'owner_id': {self.owner_id}, 'limit': {limit}, 'error': 'Negative limit'}}", - exc_info=True, - ) - raise ValueError(f"Negative limit: {limit}") - statement = statement.limit(limit) + try: + raise ValueError(f"Negative limit: {limit}") + except ValueError as err: + logger.error( + f"[DocumentCrud.read_many] Invalid limit value | {{'owner_id': {self.owner_id}, 'limit': {limit}, 'error': '{str(err)}'}}", + exc_info=True, + ) + raise documents = self.session.exec(statement).all() return documents @@ -75,11 +82,14 @@ def read_each(self, doc_ids: List[UUID]): (m, n) = map(len, (results, doc_ids)) if m != n: - logger.error( - f"[DocumentCrud.read_each] Mismatch in retrieved documents | {{'owner_id': {self.owner_id}, 'requested_count': {n}, 'retrieved_count': {m}}}", - exc_info=True, - ) - raise ValueError(f"Requested {n} retrieved {m}") + try: + raise ValueError(f"Requested {n} retrieved {m}") + except ValueError as err: + logger.error( + f"[DocumentCrud.read_each] Mismatch in retrieved documents | {{'owner_id': {self.owner_id}, 'requested_count': {n}, 'retrieved_count': {m}}}", + exc_info=True, + ) + raise return results @@ -91,12 +101,14 @@ def update(self, document: Document): self.owner_id, document.owner_id, ) - logger.error( - f"[DocumentCrud.update] Permission error | {{'doc_id': '{document.id}', 'error': '{error}'}}", - exc_info=True, - ) - raise PermissionError(error) - + try: + raise PermissionError(error) + except PermissionError as err: + logger.error( + f"[DocumentCrud.update] Permission error | {{'doc_id': '{document.id}', 'error': '{str(err)}'}}", + exc_info=True, + ) + raise document.updated_at = now() self.session.add(document) diff --git a/backend/app/crud/rag/open_ai.py b/backend/app/crud/rag/open_ai.py index 2fcfca37..0e29f8e9 100644 --- a/backend/app/crud/rag/open_ai.py +++ b/backend/app/crud/rag/open_ai.py @@ -151,11 +151,14 @@ def update( "error": "OpenAI document processing error", "documents": list(view.values()), } - logger.error( - f"[OpenAIVectorStoreCrud.update] Document processing error | {{'vector_store_id': '{vector_store_id}', 'error': '{error['error']}', 'failed_documents': {len(error['documents'])}}}", - exc_info=True, - ) - raise InterruptedError(json.dumps(error, cls=BaseModelEncoder)) + try: + raise InterruptedError(json.dumps(error, cls=BaseModelEncoder)) + except InterruptedError as err: + logger.error( + f"[OpenAIVectorStoreCrud.update] Document processing error | {{'vector_store_id': '{vector_store_id}', 'error': '{error['error']}', 'failed_documents': {len(error['documents'])}}}", + exc_info=True, + ) + raise while files: f_obj = files.pop() @@ -168,11 +171,14 @@ def update( def delete(self, vector_store_id: str, retries: int = 3): if retries < 1: - logger.error( - f"[OpenAIVectorStoreCrud.delete] Invalid retries value | {{'vector_store_id': '{vector_store_id}', 'retries': {retries}}}", - exc_info=True, - ) - raise ValueError("Retries must be greater-than 1") + try: + raise ValueError("Retries must be greater-than 1") + except ValueError as err: + logger.error( + f"[OpenAIVectorStoreCrud.delete] Invalid retries value | {{'vector_store_id': '{vector_store_id}', 'retries': {retries}}}", + exc_info=True, + ) + raise cleaner = VectorStoreCleaner(self.client) cleaner(vector_store_id) @@ -212,22 +218,21 @@ def delete(self, assistant_id: str): ) assistant = self.client.beta.assistants.retrieve(assistant_id) vector_stores = assistant.tool_resources.file_search.vector_store_ids + try: (vector_store_id,) = vector_stores - except ValueError as err: + except ValueError: if vector_stores: names = ", ".join(vector_stores) - logger.error( - f"[OpenAIAssistantCrud.delete] Too many vector stores attached | {{'assistant_id': '{assistant_id}', 'vector_stores': '{names}'}}", - exc_info=True, - ) - raise ValueError(f"Too many attached vector stores: {names}") + err = ValueError(f"Too many attached vector stores: {names}") else: - logger.error( - f"[OpenAIAssistantCrud.delete] No vector stores found | {{'assistant_id': '{assistant_id}'}}", - exc_info=True, - ) - raise ValueError("No vector stores found") + err = ValueError("No vector stores found") + + logger.error( + f"[OpenAIAssistantCrud.delete] Invalid vector store state | {{'assistant_id': '{assistant_id}', 'vector_stores': '{vector_stores}'}}", + exc_info=True, + ) + raise err v_crud = OpenAIVectorStoreCrud(self.client) v_crud.delete(vector_store_id) From 7555a476c73b2fa4fa4714c7440a71959f4b999c Mon Sep 17 00:00:00 2001 From: Aviraj <100823015+avirajsingh7@users.noreply.github.com> Date: Fri, 27 Jun 2025 12:40:07 +0530 Subject: [PATCH 11/12] remove warning module --- backend/app/crud/rag/open_ai.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/backend/app/crud/rag/open_ai.py b/backend/app/crud/rag/open_ai.py index 0e29f8e9..69479888 100644 --- a/backend/app/crud/rag/open_ai.py +++ b/backend/app/crud/rag/open_ai.py @@ -1,6 +1,5 @@ import json import logging -import warnings import functools as ft from typing import Iterable @@ -64,7 +63,6 @@ def __call__(self, resource, retries=1): logger.warning( f"[ResourceCleaner.call] Cleanup failure after retries | {{'cleaner_type': '{self}', 'resource': '{resource}'}}" ) - warnings.warn(f"[{self} {resource}] Cleanup failure") def clean(self, resource): raise NotImplementedError() From 7c19029051ac20c5ff44d9553a680a98fa2065bb Mon Sep 17 00:00:00 2001 From: Aviraj <100823015+avirajsingh7@users.noreply.github.com> Date: Fri, 27 Jun 2025 12:49:04 +0530 Subject: [PATCH 12/12] Add offset in document crud --- backend/app/crud/document.py | 1 + 1 file changed, 1 insertion(+) diff --git a/backend/app/crud/document.py b/backend/app/crud/document.py index 4ee87c8b..f2f2e3a0 100644 --- a/backend/app/crud/document.py +++ b/backend/app/crud/document.py @@ -67,6 +67,7 @@ def read_many( exc_info=True, ) raise + statement = statement.limit(limit) documents = self.session.exec(statement).all() return documents