From 1d53062ddd998c5df039efa0de94a45216a48162 Mon Sep 17 00:00:00 2001 From: Jerome White Date: Wed, 12 Mar 2025 16:06:00 +0530 Subject: [PATCH 01/88] Cleaner syntax --- backend/app/api/routes/documents.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/backend/app/api/routes/documents.py b/backend/app/api/routes/documents.py index 63f0f3a8..d6f6762c 100644 --- a/backend/app/api/routes/documents.py +++ b/backend/app/api/routes/documents.py @@ -17,15 +17,15 @@ def list_docs( skip: int = 0, limit: int = 100, ): - statement = (select(Document) - .where( - and_( - Document.owner_id == current_user.id, - Document.deleted_at.is_(None), - ), - ) - .offset(skip) - .limit(limit)) + statement = ( + select(Document) + .where(and_( + Document.owner_id == current_user.id, + Document.deleted_at.is_(None), + )) + .offset(skip) + .limit(limit) + ) docs = (session .exec(statement) .all()) From e8f0d9cefe9692077283b5873b7dc9805f68845e Mon Sep 17 00:00:00 2001 From: Jerome White Date: Mon, 17 Mar 2025 12:37:40 +0530 Subject: [PATCH 02/88] Interface for cloud storage functionality --- backend/app/core/storage.py | 79 +++++++++++++++++++++++++++++++++++++ 1 file changed, 79 insertions(+) create mode 100644 backend/app/core/storage.py diff --git a/backend/app/core/storage.py b/backend/app/core/storage.py new file mode 100644 index 00000000..f77048e8 --- /dev/null +++ b/backend/app/core/storage.py @@ -0,0 +1,79 @@ +from uuid import uuid4 +from typing import ClassVar +from pathlib import Path +from dataclasses import dataclass, asdict +from urllib.parse import ParseResult + +import boto3 +from fastapi import UploadFile +from botocore.exceptions import ClientError + +from app.api.deps import CurrentUser + +@dataclass(frozen=True) +class SimpleStorageName: + Bucket: str + Key: str + _bucket_delimiter: ClassVar[str] = '-' + + def __init__(self, user: CurrentUser, basename: Path): + org_proj = ( + user.organization, + user.project, + ) + parts = ('_'.join(x.strip().split()) for x in org_proj) + self.Bucket = self._bucket_delimiter.join(parts) + self.Key = str(basename.with_name(str(uuid4()))) + + def to_url(self): + kwargs = { + 'scheme': 's3', + 'netloc': self.Bucket, + 'path': self.Key, + } + for k in ParseResult._fields: + kwargs.setdefault(k) + + return ParseResult(**kwargs) + +class CloudStorage: + def put(self, user: CurrentUser, source: UploadFile): + raise NotImplementedError() + +class AmazonCloudStorage(CloudStorage): + def __init__(self): + super().__init__() + self.client = boto3.client('s3') + + def test_and_create(self, target: SimpleStorageName): + try: + # does the bucket exist... + self.client.head_bucket(Bucket=target.Bucket) + except ClientError as err: + response = int(err.response['Error']['Code']) + if response != 404: + raise + # ... if not create it + self.client.create_bucket(Bucket=target.Bucket) + + def put(self, user: CurrentUser, source: UploadFile): + fname_external = Path(source.filename) + assert not fname_external.parent.name, 'Source is not a basename' + destination = SimpleStorageName(user, fname_external) + + kwargs = asdict(destination) + metadata = user.dict() + try: + self.test_and_create(destination) + self.client.upload_fileobj( + source.file, + ExtraArgs={ + 'Metadata': metadata, + 'ContentType': source.content_type, + }, + **kwargs, + ) + except ClientError as err: + raise ConnectionError(f'AWS Error: "{err}"') from err + + return destination.to_url() From 3a8e62a62741c0046bd2d47b667d4be4e9d2d1e6 Mon Sep 17 00:00:00 2001 From: Jerome White Date: Mon, 17 Mar 2025 12:40:25 +0530 Subject: [PATCH 03/88] Create module dedicated to cloud functionality --- backend/app/core/cloud/__init__.py | 1 + backend/app/core/{ => cloud}/storage.py | 0 2 files changed, 1 insertion(+) create mode 100644 backend/app/core/cloud/__init__.py rename backend/app/core/{ => cloud}/storage.py (100%) diff --git a/backend/app/core/cloud/__init__.py b/backend/app/core/cloud/__init__.py new file mode 100644 index 00000000..02003ecf --- /dev/null +++ b/backend/app/core/cloud/__init__.py @@ -0,0 +1 @@ +from .storage import AmazonCloudStorage diff --git a/backend/app/core/storage.py b/backend/app/core/cloud/storage.py similarity index 100% rename from backend/app/core/storage.py rename to backend/app/core/cloud/storage.py From 41e4652daa233a4fb5c81cc21e73e2206ad9a22c Mon Sep 17 00:00:00 2001 From: Jerome White Date: Mon, 17 Mar 2025 13:06:19 +0530 Subject: [PATCH 04/88] Take the user in the constructor --- backend/app/core/cloud/storage.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/backend/app/core/cloud/storage.py b/backend/app/core/cloud/storage.py index f77048e8..ac3a254f 100644 --- a/backend/app/core/cloud/storage.py +++ b/backend/app/core/cloud/storage.py @@ -37,12 +37,15 @@ def to_url(self): return ParseResult(**kwargs) class CloudStorage: - def put(self, user: CurrentUser, source: UploadFile): + def __init__(self, user: CurrentUser): + self.user = user + + def put(self, source: UploadFile): raise NotImplementedError() class AmazonCloudStorage(CloudStorage): - def __init__(self): - super().__init__() + def __init__(self, user: CurrentUser): + super().__init__(user) self.client = boto3.client('s3') def test_and_create(self, target: SimpleStorageName): @@ -56,13 +59,13 @@ def test_and_create(self, target: SimpleStorageName): # ... if not create it self.client.create_bucket(Bucket=target.Bucket) - def put(self, user: CurrentUser, source: UploadFile): + def put(self, source: UploadFile): fname_external = Path(source.filename) assert not fname_external.parent.name, 'Source is not a basename' - destination = SimpleStorageName(user, fname_external) + destination = SimpleStorageName(self.user, fname_external) kwargs = asdict(destination) - metadata = user.dict() + metadata = self.user.dict() try: self.test_and_create(destination) self.client.upload_fileobj( From 6af4395c9970e7ca8e23718e01f19c0772d56517 Mon Sep 17 00:00:00 2001 From: Jerome White Date: Mon, 17 Mar 2025 13:07:35 +0530 Subject: [PATCH 05/88] Rename the document list route Route names will conform to common Unix operations. --- backend/app/api/routes/documents.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/backend/app/api/routes/documents.py b/backend/app/api/routes/documents.py index d6f6762c..03b61959 100644 --- a/backend/app/api/routes/documents.py +++ b/backend/app/api/routes/documents.py @@ -7,10 +7,7 @@ router = APIRouter(prefix="/documents", tags=["documents"]) -@router.get( - "/ls", - response_model=DocumentList, -) +@router.get("/ls", response_model=DocumentList) def list_docs( session: SessionDep, current_user: CurrentUser, From 4cbb94f2fcfd15f4ad8d0713884417e9270cb7d5 Mon Sep 17 00:00:00 2001 From: Jerome White Date: Mon, 17 Mar 2025 13:09:15 +0530 Subject: [PATCH 06/88] Implementation of file upload --- backend/app/api/routes/documents.py | 31 ++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/backend/app/api/routes/documents.py b/backend/app/api/routes/documents.py index 03b61959..5ecec1ee 100644 --- a/backend/app/api/routes/documents.py +++ b/backend/app/api/routes/documents.py @@ -1,7 +1,11 @@ -from fastapi import APIRouter +from pathlib import Path +from urllib.parse import urlunparse + +from fastapi import APIRouter, File, UploadFile from sqlmodel import select, and_ from app.api.deps import CurrentUser, SessionDep +from app.core.cloud import AmazonCloudStorage from app.models import Document, DocumentList router = APIRouter(prefix="/documents", tags=["documents"]) @@ -28,3 +32,28 @@ def list_docs( .all()) return DocumentList(docs=docs) + +@router.post("/cp") +def upload_doc( + doc: UploadFile, + session: SessionDep, + current_user: CurrentUser, +): + storage = AmazonCloudStorage(current_user) + try: + object_store_url = storage.put(doc) + except ConnectionError as err: + raise HTTPException(status_code=500, detail=str(err)) + fname_internal = Path(object_store_url.path) + + document = Document( + owner_id=user.id, + fname_external=Path(doc.filename), + fname_internal=fname_internal.stem, + object_store_url=urlunparse(object_store_url), + ) + session.add(document) + session.commit() + session.refresh(document) + + return document.id From 56f2887b5a689b3f6396965a5e5a6bcd575f274d Mon Sep 17 00:00:00 2001 From: Jerome White Date: Mon, 17 Mar 2025 21:58:49 +0530 Subject: [PATCH 07/88] Default placeholders for AWS variables --- .env | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.env b/.env index 1d44286e..9df95374 100644 --- a/.env +++ b/.env @@ -43,3 +43,8 @@ SENTRY_DSN= # Configure these with your own Docker registry images DOCKER_IMAGE_BACKEND=backend DOCKER_IMAGE_FRONTEND=frontend + +# AWS +AWS_ACCESS_KEY_ID= +AWS_SECRET_ACCESS_KEY= +AWS_DEFAULT_REGION= From 235ef85addc7467fa4c803531b187afebd012613 Mon Sep 17 00:00:00 2001 From: Jerome White Date: Mon, 17 Mar 2025 22:12:14 +0530 Subject: [PATCH 08/88] Take AWS credentials from .env --- backend/app/core/cloud/storage.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/backend/app/core/cloud/storage.py b/backend/app/core/cloud/storage.py index ac3a254f..5aa8cc0b 100644 --- a/backend/app/core/cloud/storage.py +++ b/backend/app/core/cloud/storage.py @@ -9,6 +9,7 @@ from botocore.exceptions import ClientError from app.api.deps import CurrentUser +from app.core.config import settings @dataclass(frozen=True) class SimpleStorageName: @@ -46,7 +47,12 @@ def put(self, source: UploadFile): class AmazonCloudStorage(CloudStorage): def __init__(self, user: CurrentUser): super().__init__(user) - self.client = boto3.client('s3') + self.client = boto3.client( + 's3', + aws_access_key_id=settings.AWS_ACCESS_KEY_ID, + aws_secret_access_key=settings.AWS_SECRET_ACCESS_KEY, + region_name=settings.AWS_DEFAULT_REGION, + ) def test_and_create(self, target: SimpleStorageName): try: From 303c981ce9019ce3ea62a854262c993bfe080841 Mon Sep 17 00:00:00 2001 From: Jerome White Date: Wed, 19 Mar 2025 11:56:45 +0530 Subject: [PATCH 09/88] Perform bucket creation at startup --- backend/app/initial_storage.py | 27 +++++++++++++++++++++++++++ backend/scripts/prestart.sh | 11 +++++++++-- 2 files changed, 36 insertions(+), 2 deletions(-) create mode 100644 backend/app/initial_storage.py diff --git a/backend/app/initial_storage.py b/backend/app/initial_storage.py new file mode 100644 index 00000000..a486ac22 --- /dev/null +++ b/backend/app/initial_storage.py @@ -0,0 +1,27 @@ +import logging + +from app.core.cloud import AmazonCloudStorageClient + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +def init() -> None: + aws = AmazonCloudStorageClient() + try: + # does the bucket exist... + aws.client.head_bucket(Bucket=settings.AWS_S3_BUCKET) + except ClientError as err: + response = int(err.response['Error']['Code']) + if response != 404: + raise + # ... if not create it + aws.client.create_bucket(Bucket=settings.AWS_S3_BUCKET) + +def main() -> None: + logger.info("START: setup cloud storage") + init() + logger.info("END: setup cloud storage") + + +if __name__ == "__main__": + main() diff --git a/backend/scripts/prestart.sh b/backend/scripts/prestart.sh index 1b395d51..ce08c17e 100644 --- a/backend/scripts/prestart.sh +++ b/backend/scripts/prestart.sh @@ -9,5 +9,12 @@ python app/backend_pre_start.py # Run migrations alembic upgrade head -# Create initial data in DB -python app/initial_data.py +# Initialize services +services=( + app/initial_data.py + app/initial_storage.py +) + +for i in ${services[@]}; do + python $i +done From 11e3d548b16aec54419cec4dcb9a9be616395a9b Mon Sep 17 00:00:00 2001 From: Jerome White Date: Wed, 19 Mar 2025 11:57:33 +0530 Subject: [PATCH 10/88] Use a single bucket for all clients https://github.com/ProjectTech4DevAI/ai-platform/pull/59#issuecomment-2729060338 --- backend/app/core/config.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/backend/app/core/config.py b/backend/app/core/config.py index d58e03c8..a54bc669 100644 --- a/backend/app/core/config.py +++ b/backend/app/core/config.py @@ -95,6 +95,11 @@ def emails_enabled(self) -> bool: FIRST_SUPERUSER: EmailStr FIRST_SUPERUSER_PASSWORD: str + @computed_field # type: ignore[prop-decorator] + @property + def AWS_S3_BUCKET(self) -> str: + return f'ai-platform-documents-{self.ENVIRONMENT}' + def _check_default_secret(self, var_name: str, value: str | None) -> None: if value == "changethis": message = ( From 0973953be4625bc2067649802c9cf81cf146e744 Mon Sep 17 00:00:00 2001 From: Jerome White Date: Wed, 19 Mar 2025 11:58:52 +0530 Subject: [PATCH 11/88] Changes to support new bucket semantics * Since there is a single bucket per client: storage class no longer responsible for name generation * Since the bucket is created at platform startup: storage class no longer needs to worry about it, and client creation needs to be globally accessible. --- backend/app/core/cloud/__init__.py | 2 +- backend/app/core/cloud/storage.py | 44 +++++++++++------------------- 2 files changed, 17 insertions(+), 29 deletions(-) diff --git a/backend/app/core/cloud/__init__.py b/backend/app/core/cloud/__init__.py index 02003ecf..cf8fea60 100644 --- a/backend/app/core/cloud/__init__.py +++ b/backend/app/core/cloud/__init__.py @@ -1 +1 @@ -from .storage import AmazonCloudStorage +from .storage import AmazonCloudStorage, AmazonCloudStorageClient diff --git a/backend/app/core/cloud/storage.py b/backend/app/core/cloud/storage.py index 5aa8cc0b..9b7ca50a 100644 --- a/backend/app/core/cloud/storage.py +++ b/backend/app/core/cloud/storage.py @@ -1,3 +1,4 @@ +import functools as ft from uuid import uuid4 from typing import ClassVar from pathlib import Path @@ -11,19 +12,23 @@ from app.api.deps import CurrentUser from app.core.config import settings +class AmazonCloudStorageClient: + @ft.cached_property + def client(self): + return boto3.client( + 's3', + aws_access_key_id=settings.AWS_ACCESS_KEY_ID, + aws_secret_access_key=settings.AWS_SECRET_ACCESS_KEY, + region_name=settings.AWS_DEFAULT_REGION, + ) + @dataclass(frozen=True) class SimpleStorageName: Bucket: str Key: str - _bucket_delimiter: ClassVar[str] = '-' - def __init__(self, user: CurrentUser, basename: Path): - org_proj = ( - user.organization, - user.project, - ) - parts = ('_'.join(x.strip().split()) for x in org_proj) - self.Bucket = self._bucket_delimiter.join(parts) + def __init__(self, basename: Path): + self.Bucket = settings.AWS_S3_BUCKET self.Key = str(basename.with_name(str(uuid4()))) def to_url(self): @@ -47,34 +52,17 @@ def put(self, source: UploadFile): class AmazonCloudStorage(CloudStorage): def __init__(self, user: CurrentUser): super().__init__(user) - self.client = boto3.client( - 's3', - aws_access_key_id=settings.AWS_ACCESS_KEY_ID, - aws_secret_access_key=settings.AWS_SECRET_ACCESS_KEY, - region_name=settings.AWS_DEFAULT_REGION, - ) - - def test_and_create(self, target: SimpleStorageName): - try: - # does the bucket exist... - self.client.head_bucket(Bucket=target.Bucket) - except ClientError as err: - response = int(err.response['Error']['Code']) - if response != 404: - raise - # ... if not create it - self.client.create_bucket(Bucket=target.Bucket) + self.aws = AmazonCloudStorageClient() def put(self, source: UploadFile): fname_external = Path(source.filename) assert not fname_external.parent.name, 'Source is not a basename' - destination = SimpleStorageName(self.user, fname_external) + destination = SimpleStorageName(fname_external) kwargs = asdict(destination) metadata = self.user.dict() try: - self.test_and_create(destination) - self.client.upload_fileobj( + self.aws.client.upload_fileobj( source.file, ExtraArgs={ 'Metadata': metadata, From 0350ce255085760e4385ca537ef985dc21983454 Mon Sep 17 00:00:00 2001 From: Jerome White Date: Wed, 19 Mar 2025 12:12:20 +0530 Subject: [PATCH 12/88] Missing imports --- backend/app/initial_storage.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/backend/app/initial_storage.py b/backend/app/initial_storage.py index a486ac22..5c97012b 100644 --- a/backend/app/initial_storage.py +++ b/backend/app/initial_storage.py @@ -1,6 +1,9 @@ import logging +from botocore.exceptions import ClientError + from app.core.cloud import AmazonCloudStorageClient +from app.core.config import settings logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) From d1e97e1d6fb5ee3eb95723b51d9245e9b793f38b Mon Sep 17 00:00:00 2001 From: Jerome White Date: Wed, 19 Mar 2025 12:13:16 +0530 Subject: [PATCH 13/88] Unused imports --- backend/app/core/cloud/storage.py | 1 - 1 file changed, 1 deletion(-) diff --git a/backend/app/core/cloud/storage.py b/backend/app/core/cloud/storage.py index 9b7ca50a..5cbaae93 100644 --- a/backend/app/core/cloud/storage.py +++ b/backend/app/core/cloud/storage.py @@ -1,6 +1,5 @@ import functools as ft from uuid import uuid4 -from typing import ClassVar from pathlib import Path from dataclasses import dataclass, asdict from urllib.parse import ParseResult From c5319cb1748732072602802266decd02baf293dd Mon Sep 17 00:00:00 2001 From: Jerome White Date: Wed, 19 Mar 2025 12:33:48 +0530 Subject: [PATCH 14/88] Lift timestamp generation to common location --- backend/app/core/util.py | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 backend/app/core/util.py diff --git a/backend/app/core/util.py b/backend/app/core/util.py new file mode 100644 index 00000000..13fbc2aa --- /dev/null +++ b/backend/app/core/util.py @@ -0,0 +1,4 @@ +from datetime import datetime, timezone + +def now(): + return datetime.now(timezone.utc).replace(tzinfo=None) From 9607567a816d14efb4246e8b6f2f2cd6578e193e Mon Sep 17 00:00:00 2001 From: Jerome White Date: Wed, 19 Mar 2025 12:34:42 +0530 Subject: [PATCH 15/88] Timestamp generation is global resource --- backend/app/models/document.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/backend/app/models/document.py b/backend/app/models/document.py index 4f039d31..4eab87ad 100644 --- a/backend/app/models/document.py +++ b/backend/app/models/document.py @@ -1,13 +1,11 @@ from uuid import UUID, uuid4 from pathlib import Path -from datetime import datetime, timezone +from datetime import datetime from sqlmodel import Field, Relationship, SQLModel from .user import User - -def now(): - return datetime.now(timezone.utc).replace(tzinfo=None) +from app.core.util import now class Document(SQLModel, table=True): id: UUID = Field( From 4d7c7c7e809884bb1d9c2022fc958f0a7ec4eb07 Mon Sep 17 00:00:00 2001 From: Jerome White Date: Wed, 19 Mar 2025 12:34:57 +0530 Subject: [PATCH 16/88] Updating a document is not yet supported --- backend/app/models/document.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/app/models/document.py b/backend/app/models/document.py index 4eab87ad..8fd65cb5 100644 --- a/backend/app/models/document.py +++ b/backend/app/models/document.py @@ -23,7 +23,7 @@ class Document(SQLModel, table=True): created_at: datetime = Field( default_factory=now, ) - updated_at: datetime | None + # updated_at: datetime | None deleted_at: datetime | None owner: User = Relationship(back_populates='documents') From 6e118c60476a3bc854a387a0ac3fe03c1eeea181 Mon Sep 17 00:00:00 2001 From: Jerome White Date: Wed, 19 Mar 2025 13:33:21 +0530 Subject: [PATCH 17/88] Flesh out document remove and stat --- backend/app/api/routes/documents.py | 47 +++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/backend/app/api/routes/documents.py b/backend/app/api/routes/documents.py index 5ecec1ee..b2ca2b5b 100644 --- a/backend/app/api/routes/documents.py +++ b/backend/app/api/routes/documents.py @@ -6,6 +6,7 @@ from app.api.deps import CurrentUser, SessionDep from app.core.cloud import AmazonCloudStorage +from app.core.util import now from app.models import Document, DocumentList router = APIRouter(prefix="/documents", tags=["documents"]) @@ -57,3 +58,49 @@ def upload_doc( session.refresh(document) return document.id + +@router.get("/rm/{doc_id}") +def delete_doc( + session: SessionDep, + current_user: CurrentUser, +): + deleted_at = now() + statement = ( + update(Document) + .where(and_( + Document.id == doc_id, + Document.owner_id == current_user.id, + )) + .values(deleted_at=deleted_at) + ) + session.exec(statement) + + # TODO: perform delete on the collection + +@router.get("/stat/{doc_id}", response_model=Document) +def doc_info( + session: SessionDep, + current_user: CurrentUser, +): + statement = ( + select(Document) + .where(Document.id == doc_id) + ) + docs = (session + .exec(statement) + .all()) + n = len(docs) + if n == 1: + return docs[0] + + (status_code, reason) = (500, 'not unique') if n else (400, 'not found') + detail = f'Document "{doc_id}" {reason}' + + raise HTTPException(status_code=status_code, detail=detail) + +# @router.get("/assign", response_model=DocumentList) +# def assign_doc( +# session: SessionDep, +# current_user: CurrentUser, +# ): +# pass From a4510fffd685fab8d73fa58b9735bc467a304aee Mon Sep 17 00:00:00 2001 From: Jerome White Date: Wed, 19 Mar 2025 14:41:01 +0530 Subject: [PATCH 18/88] Must specify region when creating a bucket See: https://stackoverflow.com/a/49665620 --- backend/app/initial_storage.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/backend/app/initial_storage.py b/backend/app/initial_storage.py index 5c97012b..eb89f601 100644 --- a/backend/app/initial_storage.py +++ b/backend/app/initial_storage.py @@ -18,7 +18,12 @@ def init() -> None: if response != 404: raise # ... if not create it - aws.client.create_bucket(Bucket=settings.AWS_S3_BUCKET) + aws.client.create_bucket( + Bucket=settings.AWS_S3_BUCKET, + CreateBucketConfiguration={ + 'LocationConstraint': settings.AWS_DEFAULT_REGION, + }, + ) def main() -> None: logger.info("START: setup cloud storage") From 31e5fae63a800ee303cbb2f9736ad70a722199fb Mon Sep 17 00:00:00 2001 From: Jerome White Date: Wed, 19 Mar 2025 14:41:31 +0530 Subject: [PATCH 19/88] Add boto3 requirement --- backend/pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/backend/pyproject.toml b/backend/pyproject.toml index 1c77b83d..d51fd0de 100644 --- a/backend/pyproject.toml +++ b/backend/pyproject.toml @@ -21,6 +21,7 @@ dependencies = [ "pydantic-settings<3.0.0,>=2.2.1", "sentry-sdk[fastapi]<2.0.0,>=1.40.6", "pyjwt<3.0.0,>=2.8.0", + "boto3>1.37.14" ] [tool.uv] From 18654ba20bc950ead441b5f48ff64665ea36692d Mon Sep 17 00:00:00 2001 From: Jerome White Date: Wed, 19 Mar 2025 14:49:12 +0530 Subject: [PATCH 20/88] Repeating AWS environment variables in the settings --- backend/app/core/config.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/backend/app/core/config.py b/backend/app/core/config.py index a54bc669..b98e104e 100644 --- a/backend/app/core/config.py +++ b/backend/app/core/config.py @@ -95,6 +95,10 @@ def emails_enabled(self) -> bool: FIRST_SUPERUSER: EmailStr FIRST_SUPERUSER_PASSWORD: str + AWS_ACCESS_KEY_ID: str + AWS_SECRET_ACCESS_KEY: str + AWS_DEFAULT_REGION: str + @computed_field # type: ignore[prop-decorator] @property def AWS_S3_BUCKET(self) -> str: From 67274e61a4b6394fbe6c50350ffe2d5fccd49ec2 Mon Sep 17 00:00:00 2001 From: Jerome White Date: Wed, 19 Mar 2025 14:50:12 +0530 Subject: [PATCH 21/88] Client expected to pass the basename of the destination Storage class builds bucket path based on user information and the basename. This allows the basename to follow the clients convention. --- backend/app/core/cloud/storage.py | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/backend/app/core/cloud/storage.py b/backend/app/core/cloud/storage.py index 5cbaae93..b6b15b89 100644 --- a/backend/app/core/cloud/storage.py +++ b/backend/app/core/cloud/storage.py @@ -1,8 +1,8 @@ +import logging import functools as ft -from uuid import uuid4 from pathlib import Path from dataclasses import dataclass, asdict -from urllib.parse import ParseResult +from urllib.parse import ParseResult, urlunparse import boto3 from fastapi import UploadFile @@ -23,12 +23,11 @@ def client(self): @dataclass(frozen=True) class SimpleStorageName: - Bucket: str Key: str + Bucket: str = settings.AWS_S3_BUCKET - def __init__(self, basename: Path): - self.Bucket = settings.AWS_S3_BUCKET - self.Key = str(basename.with_name(str(uuid4()))) + def __str__(self): + return urlunparse(self.to_url()) def to_url(self): kwargs = { @@ -45,7 +44,7 @@ class CloudStorage: def __init__(self, user: CurrentUser): self.user = user - def put(self, source: UploadFile): + def put(self, source: UploadFile, basename: str): raise NotImplementedError() class AmazonCloudStorage(CloudStorage): @@ -53,10 +52,10 @@ def __init__(self, user: CurrentUser): super().__init__(user) self.aws = AmazonCloudStorageClient() - def put(self, source: UploadFile): - fname_external = Path(source.filename) - assert not fname_external.parent.name, 'Source is not a basename' - destination = SimpleStorageName(fname_external) + def put(self, source: UploadFile, basename: str): + # key = Path(user.organization, user.project, basename) + key = Path('test-org', 'test-project', basename) + destination = SimpleStorageName(str(key)) kwargs = asdict(destination) metadata = self.user.dict() @@ -64,7 +63,7 @@ def put(self, source: UploadFile): self.aws.client.upload_fileobj( source.file, ExtraArgs={ - 'Metadata': metadata, + # 'Metadata': metadata, 'ContentType': source.content_type, }, **kwargs, @@ -72,4 +71,4 @@ def put(self, source: UploadFile): except ClientError as err: raise ConnectionError(f'AWS Error: "{err}"') from err - return destination.to_url() + return destination From fa146859c5e8cd4dec98f3e26558adcc81399162 Mon Sep 17 00:00:00 2001 From: Jerome White Date: Wed, 19 Mar 2025 14:51:53 +0530 Subject: [PATCH 22/88] Corrected upload file specification --- backend/app/api/routes/documents.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/backend/app/api/routes/documents.py b/backend/app/api/routes/documents.py index b2ca2b5b..c341515b 100644 --- a/backend/app/api/routes/documents.py +++ b/backend/app/api/routes/documents.py @@ -1,7 +1,7 @@ +from uuid import uuid4 from pathlib import Path -from urllib.parse import urlunparse -from fastapi import APIRouter, File, UploadFile +from fastapi import APIRouter, File, UploadFile, HTTPException from sqlmodel import select, and_ from app.api.deps import CurrentUser, SessionDep @@ -36,9 +36,9 @@ def list_docs( @router.post("/cp") def upload_doc( - doc: UploadFile, session: SessionDep, current_user: CurrentUser, + src: UploadFile = File(...), ): storage = AmazonCloudStorage(current_user) try: From 483eb3d1cf2e36071be1b38ef8246f7d1921faf2 Mon Sep 17 00:00:00 2001 From: Jerome White Date: Wed, 19 Mar 2025 14:52:11 +0530 Subject: [PATCH 23/88] Build basename that matches the UUID expectation of the model --- backend/app/api/routes/documents.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/backend/app/api/routes/documents.py b/backend/app/api/routes/documents.py index c341515b..0ebde357 100644 --- a/backend/app/api/routes/documents.py +++ b/backend/app/api/routes/documents.py @@ -41,17 +41,17 @@ def upload_doc( src: UploadFile = File(...), ): storage = AmazonCloudStorage(current_user) + basename = uuid4() try: - object_store_url = storage.put(doc) + object_store_url = storage.put(src, str(basename)) except ConnectionError as err: raise HTTPException(status_code=500, detail=str(err)) - fname_internal = Path(object_store_url.path) document = Document( - owner_id=user.id, - fname_external=Path(doc.filename), - fname_internal=fname_internal.stem, - object_store_url=urlunparse(object_store_url), + id=basename, + owner_id=current_user.id, + fname=src.filename, + object_store_url=str(object_store_url), ) session.add(document) session.commit() From a0f3e98dcb058a563ee9c0a30728e0e63168a084 Mon Sep 17 00:00:00 2001 From: Jerome White Date: Wed, 19 Mar 2025 14:52:57 +0530 Subject: [PATCH 24/88] SQLAlchemy cannot process Path types natively --- backend/app/models/document.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/backend/app/models/document.py b/backend/app/models/document.py index 8fd65cb5..e40f03aa 100644 --- a/backend/app/models/document.py +++ b/backend/app/models/document.py @@ -17,8 +17,7 @@ class Document(SQLModel, table=True): nullable=False, ondelete='CASCADE', ) - fname_external: Path - fname_internal: Path + fname: str object_store_url: str created_at: datetime = Field( default_factory=now, From d4ba9b912d6456b16a5a57ec35a56786ac249f6c Mon Sep 17 00:00:00 2001 From: Jerome White Date: Wed, 19 Mar 2025 16:36:18 +0530 Subject: [PATCH 25/88] Ensure document ID is passed to route body --- backend/app/api/routes/documents.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/backend/app/api/routes/documents.py b/backend/app/api/routes/documents.py index 0ebde357..98613947 100644 --- a/backend/app/api/routes/documents.py +++ b/backend/app/api/routes/documents.py @@ -1,8 +1,8 @@ -from uuid import uuid4 +from uuid import UUID, uuid4 from pathlib import Path from fastapi import APIRouter, File, UploadFile, HTTPException -from sqlmodel import select, and_ +from sqlmodel import select, update, and_ from app.api.deps import CurrentUser, SessionDep from app.core.cloud import AmazonCloudStorage @@ -63,6 +63,7 @@ def upload_doc( def delete_doc( session: SessionDep, current_user: CurrentUser, + doc_id: UUID, ): deleted_at = now() statement = ( @@ -81,6 +82,7 @@ def delete_doc( def doc_info( session: SessionDep, current_user: CurrentUser, + doc_id: UUID, ): statement = ( select(Document) From ca87b6b022c3649618b5d226428ba42d89bcd23a Mon Sep 17 00:00:00 2001 From: Jerome White Date: Wed, 19 Mar 2025 16:45:32 +0530 Subject: [PATCH 26/88] Corrected database interaction when deleting --- backend/app/api/routes/documents.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/backend/app/api/routes/documents.py b/backend/app/api/routes/documents.py index 98613947..679e8b83 100644 --- a/backend/app/api/routes/documents.py +++ b/backend/app/api/routes/documents.py @@ -74,7 +74,11 @@ def delete_doc( )) .values(deleted_at=deleted_at) ) - session.exec(statement) + result = session.exec(statement) + if not result.rowcount: + detail = f'Item "{doc_id}" not found' + raise HTTPException(status_code=404, detail=detail) + session.commit() # TODO: perform delete on the collection From 7a883e623a35c13e7b49cc26e6d3a67448b40fb4 Mon Sep 17 00:00:00 2001 From: Jerome White Date: Wed, 19 Mar 2025 16:57:38 +0530 Subject: [PATCH 27/88] More graceful handling of non singular results --- backend/app/api/routes/documents.py | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/backend/app/api/routes/documents.py b/backend/app/api/routes/documents.py index 679e8b83..1b65a919 100644 --- a/backend/app/api/routes/documents.py +++ b/backend/app/api/routes/documents.py @@ -3,6 +3,7 @@ from fastapi import APIRouter, File, UploadFile, HTTPException from sqlmodel import select, update, and_ +from sqlalchemy.exc import NoResultFound, MultipleResultsFound from app.api.deps import CurrentUser, SessionDep from app.core.cloud import AmazonCloudStorage @@ -92,17 +93,14 @@ def doc_info( select(Document) .where(Document.id == doc_id) ) - docs = (session - .exec(statement) - .all()) - n = len(docs) - if n == 1: - return docs[0] - - (status_code, reason) = (500, 'not unique') if n else (400, 'not found') - detail = f'Document "{doc_id}" {reason}' + result = session.exec(statement) - raise HTTPException(status_code=status_code, detail=detail) + try: + return result.one() + except NoResultFound as err: + raise HTTPException(status_code=404, detail=str(err)) + except MultipleResultsFound as err: + raise HTTPException(status_code=500, detail=str(err)) # @router.get("/assign", response_model=DocumentList) # def assign_doc( From df5338c36b49d8775d739ca390f293bab57e187b Mon Sep 17 00:00:00 2001 From: Jerome White Date: Sat, 22 Mar 2025 20:02:58 +0530 Subject: [PATCH 28/88] Move document database interactions to CRUD --- backend/app/api/routes/documents.py | 55 +++++++------------------ backend/app/crud/__init__.py | 4 +- backend/app/crud/document.py | 62 +++++++++++++++++++++++++++++ 3 files changed, 79 insertions(+), 42 deletions(-) create mode 100644 backend/app/crud/document.py diff --git a/backend/app/api/routes/documents.py b/backend/app/api/routes/documents.py index 1b65a919..9ff58b09 100644 --- a/backend/app/api/routes/documents.py +++ b/backend/app/api/routes/documents.py @@ -2,13 +2,13 @@ from pathlib import Path from fastapi import APIRouter, File, UploadFile, HTTPException -from sqlmodel import select, update, and_ + from sqlalchemy.exc import NoResultFound, MultipleResultsFound +from app.crud import DocumentCrud +from app.models import Document, DocumentList from app.api.deps import CurrentUser, SessionDep from app.core.cloud import AmazonCloudStorage -from app.core.util import now -from app.models import Document, DocumentList router = APIRouter(prefix="/documents", tags=["documents"]) @@ -20,20 +20,8 @@ def list_docs( skip: int = 0, limit: int = 100, ): - statement = ( - select(Document) - .where(and_( - Document.owner_id == current_user.id, - Document.deleted_at.is_(None), - )) - .offset(skip) - .limit(limit) - ) - docs = (session - .exec(statement) - .all()) - - return DocumentList(docs=docs) + crud = DocumentCrud(session) + return crud.read_many(current_user.id, skip, limit) @router.post("/cp") def upload_doc( @@ -48,15 +36,14 @@ def upload_doc( except ConnectionError as err: raise HTTPException(status_code=500, detail=str(err)) + crud = DocumentCrud(session) document = Document( id=basename, owner_id=current_user.id, fname=src.filename, object_store_url=str(object_store_url), ) - session.add(document) - session.commit() - session.refresh(document) + crud.update(document) return document.id @@ -66,20 +53,11 @@ def delete_doc( current_user: CurrentUser, doc_id: UUID, ): - deleted_at = now() - statement = ( - update(Document) - .where(and_( - Document.id == doc_id, - Document.owner_id == current_user.id, - )) - .values(deleted_at=deleted_at) - ) - result = session.exec(statement) - if not result.rowcount: - detail = f'Item "{doc_id}" not found' - raise HTTPException(status_code=404, detail=detail) - session.commit() + crud = DocumentCrud(session) + try: + crud.delete(doc_id, current_user.id) + except FileNotFoundError as err: + raise HTTPException(status_code=404, detail=str(err)) # TODO: perform delete on the collection @@ -89,14 +67,9 @@ def doc_info( current_user: CurrentUser, doc_id: UUID, ): - statement = ( - select(Document) - .where(Document.id == doc_id) - ) - result = session.exec(statement) - + crud = DocumentCrud(session) try: - return result.one() + return crud.read_one(doc_id) except NoResultFound as err: raise HTTPException(status_code=404, detail=str(err)) except MultipleResultsFound as err: diff --git a/backend/app/crud/__init__.py b/backend/app/crud/__init__.py index c0fbc4e7..85edaa12 100644 --- a/backend/app/crud/__init__.py +++ b/backend/app/crud/__init__.py @@ -4,4 +4,6 @@ create_user, get_user_by_email, update_user, -) \ No newline at end of file +) + +from .document import DocumentCrud diff --git a/backend/app/crud/document.py b/backend/app/crud/document.py new file mode 100644 index 00000000..63f431b6 --- /dev/null +++ b/backend/app/crud/document.py @@ -0,0 +1,62 @@ +from uuid import UUID +from typing import Optional + +from sqlmodel import Session, select, update, and_ + +from app.models import Document, DocumentList +from app.core.util import now + +class CrudObject: + def __init__(self, session: Session): + self.session = session + +class DocumentCrud(CrudObject): + def read_one(self, doc_id: UUID): + statement = ( + select(Document) + .where(Document.id == doc_id) + ) + + return self.session.exec(statement).one() + + def read_many( + self, + owner_id: UUID, + skip: Optional[int] = None, + limit: Optional[int] = None, + ): + statement = ( + select(Document) + .where(and_( + Document.owner_id == owner_id, + Document.deleted_at.is_(None), + )) + ) + if skip is not None: + statement = statement.offset(skip) + if limit is not None: + statement = statement.limit(limit) + + docs = self.session.exec(statement).all() + + return DocumentList(docs=docs) + + def update(self, doc: Document): + self.session.add(document) + self.session.commit() + self.session.refresh(document) + + def delete(self, doc_id: UUID, owner_id: UUID): + statement = ( + update(Document) + .where(and_( + Document.id == doc_id, + Document.owner_id == owner_id, + )) + .values(deleted_at=now()) + ) + result = self.session.exec(statement) + if not result.rowcount: + raise FileNotFoundError(f'Item "{doc_id}" not found') + + self.session.commit() From 6d5dda7a3c473581bdd7d3efd6b82eb869304795 Mon Sep 17 00:00:00 2001 From: Jerome White Date: Mon, 24 Mar 2025 16:29:28 +0530 Subject: [PATCH 29/88] Ignore Emacs backup files --- .gitignore | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index c0b7dd63..c807732b 100644 --- a/.gitignore +++ b/.gitignore @@ -14,4 +14,7 @@ ENV/ # .DS_Store: macOS Finder metadata file that stores folder view settings and icon positions. -**/.DS_Store \ No newline at end of file +**/.DS_Store + +# Emacs +*~ From 0fd2bf74f1eafa4297bca0c783afdf8534e7cb94 Mon Sep 17 00:00:00 2001 From: Jerome White Date: Mon, 24 Mar 2025 16:29:54 +0530 Subject: [PATCH 30/88] Allow document list to be iterable --- backend/app/models/document.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/backend/app/models/document.py b/backend/app/models/document.py index e40f03aa..fe9e9948 100644 --- a/backend/app/models/document.py +++ b/backend/app/models/document.py @@ -32,3 +32,6 @@ class DocumentList(SQLModel): def __len__(self): return len(self.docs) + + def __iter__(self): + yield from self.docs From 0a5c8969c407d0c42c170dd60ada13fe18a0276c Mon Sep 17 00:00:00 2001 From: Jerome White Date: Mon, 24 Mar 2025 16:30:15 +0530 Subject: [PATCH 31/88] Corrected parameter naming --- backend/app/crud/document.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/app/crud/document.py b/backend/app/crud/document.py index 63f431b6..d4f37a48 100644 --- a/backend/app/crud/document.py +++ b/backend/app/crud/document.py @@ -41,7 +41,7 @@ def read_many( return DocumentList(docs=docs) - def update(self, doc: Document): + def update(self, document: Document): self.session.add(document) self.session.commit() self.session.refresh(document) From 7df677148b0c0feb62f3bf9c84342ba781a6275e Mon Sep 17 00:00:00 2001 From: Jerome White Date: Mon, 24 Mar 2025 16:30:33 +0530 Subject: [PATCH 32/88] Initial document CRUD tests * Test for read_* methods * Infrastructure for making it work (utils) --- backend/app/tests/crud/documents/_utils.py | 52 ++++++++++++++ .../tests/crud/documents/test_read_many.py | 69 +++++++++++++++++++ .../app/tests/crud/documents/test_read_one.py | 35 ++++++++++ 3 files changed, 156 insertions(+) create mode 100644 backend/app/tests/crud/documents/_utils.py create mode 100644 backend/app/tests/crud/documents/test_read_many.py create mode 100644 backend/app/tests/crud/documents/test_read_one.py diff --git a/backend/app/tests/crud/documents/_utils.py b/backend/app/tests/crud/documents/_utils.py new file mode 100644 index 00000000..41dda2ae --- /dev/null +++ b/backend/app/tests/crud/documents/_utils.py @@ -0,0 +1,52 @@ +import functools as ft +from uuid import UUID +from pathlib import Path + +from sqlmodel import Session, delete +from sqlalchemy.exc import IntegrityError +from sqlalchemy.dialects.sqlite import insert + +from app.crud import DocumentCrud +from app.core.config import settings +from app.crud.user import get_user_by_email +from app.models import Document, UserCreate + +class Constants: + n_documents = 10 + +@ft.cache +def get_user_id_by_email(session: Session): + user = get_user_by_email(session=session, email=settings.FIRST_SUPERUSER) + return user.id + + +@ft.cache +def int_to_uuid(value): + return UUID(int=value) + +def rm_documents(session: Session): + session.exec(delete(Document)) + session.commit() + +def insert_documents(session: Session, n: int): + owner_id = get_user_id_by_email(session) + + crud = DocumentCrud(session) + for i in range(n): + args = str(int_to_uuid(i)).split('-') + fname = Path('/', *args).with_suffix('.xyz') + document = Document( + id=int_to_uuid(i), + owner_id=owner_id, + fname=fname.name, + object_store_url=fname.as_uri(), + ) + session.add(document) + session.commit() + session.refresh(document) + + yield document + +def insert_document(session: Session): + (document, ) = insert_documents(session, 1) + return document diff --git a/backend/app/tests/crud/documents/test_read_many.py b/backend/app/tests/crud/documents/test_read_many.py new file mode 100644 index 00000000..00765d5d --- /dev/null +++ b/backend/app/tests/crud/documents/test_read_many.py @@ -0,0 +1,69 @@ +import pytest +from sqlmodel import Session + +from app.crud import DocumentCrud +from app.core.config import settings + +from _utils import ( + Constants, + get_user_id_by_email, + insert_documents, + int_to_uuid, + rm_documents, +) + +@pytest.fixture +def document_collection(db: Session): + rm_documents(db) + return list(insert_documents(db, Constants.n_documents)) + +@pytest.fixture(scope='class') +def clean_db_fixture(db: Session): + yield + rm_documents(db) + +@pytest.mark.usefixtures('clean_db_fixture') +class TestDatabaseReadMany: + def test_number_read_is_expected( + self, + db: Session, + document_collection: list, + ): + crud = DocumentCrud(db) + owner_id = get_user_id_by_email(db) + documents = crud.read_many(owner_id) + + assert len(documents) == Constants.n_documents + + def test_deleted_docs_excluded( + self, + db: Session, + document_collection: list, + ): + assert all(x.deleted_at is None for x in document_collection) + + def test_skip_is_respected( + self, + db: Session, + document_collection: list, + ): + crud = DocumentCrud(db) + owner_id = get_user_id_by_email(db) + skip = Constants.n_documents // 2 + doc_ids = set(x.id for x in crud.read_many(owner_id, skip=skip)) + + for i in range(skip, Constants.n_documents): + doc = int_to_uuid(i) + assert doc in doc_ids + + def test_limit_is_respected( + self, + db: Session, + document_collection: list, + ): + crud = DocumentCrud(db) + owner_id = get_user_id_by_email(db) + limit = Constants.n_documents // 2 + documents = crud.read_many(owner_id, limit=limit) + + assert len(documents) == limit diff --git a/backend/app/tests/crud/documents/test_read_one.py b/backend/app/tests/crud/documents/test_read_one.py new file mode 100644 index 00000000..56698389 --- /dev/null +++ b/backend/app/tests/crud/documents/test_read_one.py @@ -0,0 +1,35 @@ +import pytest +from sqlmodel import Session +from sqlalchemy.exc import NoResultFound + +from app.crud import DocumentCrud + +from _utils import insert_document, int_to_uuid, rm_documents + +@pytest.fixture +def clean_db_fixture(db: Session): + rm_documents(db) + yield + rm_documents(db) + +class TestDatabaseReadOne: + def test_can_select_valid_id( + self, + db: Session, + clean_db_fixture: None, + ): + crud = DocumentCrud(db) + document = insert_document(db) + result = crud.read_one(document.id) + + assert result.id == document.id + + def test_cannot_select_invalid_id( + self, + db: Session, + clean_db_fixture: None, + ): + crud = DocumentCrud(db) + document_id = int_to_uuid(0) + with pytest.raises(NoResultFound): + crud.read_one(document_id) From 429a162606ae606d0d0f50b06e05de0f05ea8538 Mon Sep 17 00:00:00 2001 From: Jerome White Date: Mon, 24 Mar 2025 17:49:49 +0530 Subject: [PATCH 33/88] Document update returns inserted document --- backend/app/crud/document.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/backend/app/crud/document.py b/backend/app/crud/document.py index d4f37a48..e9c77e4f 100644 --- a/backend/app/crud/document.py +++ b/backend/app/crud/document.py @@ -46,6 +46,8 @@ def update(self, document: Document): self.session.commit() self.session.refresh(document) + return document + def delete(self, doc_id: UUID, owner_id: UUID): statement = ( update(Document) From 83c08be88fbab348ba6360c12e76afd3b172e92d Mon Sep 17 00:00:00 2001 From: Jerome White Date: Mon, 24 Mar 2025 17:50:22 +0530 Subject: [PATCH 34/88] Lift document creation function --- backend/app/tests/crud/documents/_utils.py | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/backend/app/tests/crud/documents/_utils.py b/backend/app/tests/crud/documents/_utils.py index 41dda2ae..6d75ca0b 100644 --- a/backend/app/tests/crud/documents/_utils.py +++ b/backend/app/tests/crud/documents/_utils.py @@ -28,19 +28,25 @@ def rm_documents(session: Session): session.exec(delete(Document)) session.commit() +def mk_document(owner_id, index=0): + doc_id = int_to_uuid(index) + + args = str(doc_id).split('-') + fname = Path('/', *args).with_suffix('.xyz') + return Document( + id=doc_id, + owner_id=owner_id, + fname=fname.name, + object_store_url=fname.as_uri(), + ) + def insert_documents(session: Session, n: int): owner_id = get_user_id_by_email(session) crud = DocumentCrud(session) for i in range(n): - args = str(int_to_uuid(i)).split('-') - fname = Path('/', *args).with_suffix('.xyz') - document = Document( - id=int_to_uuid(i), - owner_id=owner_id, - fname=fname.name, - object_store_url=fname.as_uri(), - ) + document = mk_document(owner_id, i) + session.add(document) session.commit() session.refresh(document) From 1eead727020a643e198763fce81f46125030154d Mon Sep 17 00:00:00 2001 From: Jerome White Date: Mon, 24 Mar 2025 17:50:34 +0530 Subject: [PATCH 35/88] Test document update --- .../app/tests/crud/documents/test_update.py | 57 +++++++++++++++++++ 1 file changed, 57 insertions(+) create mode 100644 backend/app/tests/crud/documents/test_update.py diff --git a/backend/app/tests/crud/documents/test_update.py b/backend/app/tests/crud/documents/test_update.py new file mode 100644 index 00000000..c4b6d912 --- /dev/null +++ b/backend/app/tests/crud/documents/test_update.py @@ -0,0 +1,57 @@ +import pytest +from uuid import UUID +from typing import ClassVar +from sqlmodel import Session +from dataclasses import dataclass + +from app.crud import DocumentCrud +from app.core.config import settings + +from _utils import ( + Constants, + get_user_id_by_email, + insert_documents, + int_to_uuid, + mk_document, + rm_documents, +) + +@dataclass +class State: + crud: DocumentCrud + owner_id: UUID + doc_id: ClassVar[int] = 0 + + def add(self): + document = mk_document(self.owner_id, self.doc_id) + self.doc_id += 1 + + return self.crud.update(document) + + def get(self): + return self.crud.read_many(self.owner_id) + +@pytest.fixture +def state(db: Session): + rm_documents(db) + + crud = DocumentCrud(db) + owner_id = get_user_id_by_email(db) + + return State(crud, owner_id) + +class TestDatabaseUpdate: + def test_update_adds_one(self, state: State): + before = state.get() + state.add() + after = state.get() + + assert len(before) + 1 == len(after) + + def test_sequential_update_is_ordered(self, state: State): + (a, b) = map(state.add, range(2)) + assert a.create_at >= b.created_at + + def test_sequential_update_is_ordered(self, state: State): + document = state.add() + assert document.deleted_at is None From c2225b5aa21e95a9fced692c18bcb3854cc0d842 Mon Sep 17 00:00:00 2001 From: Jerome White Date: Mon, 24 Mar 2025 17:51:42 +0530 Subject: [PATCH 36/88] Whitespace --- backend/app/tests/crud/documents/test_update.py | 1 - 1 file changed, 1 deletion(-) diff --git a/backend/app/tests/crud/documents/test_update.py b/backend/app/tests/crud/documents/test_update.py index c4b6d912..2f2dacaa 100644 --- a/backend/app/tests/crud/documents/test_update.py +++ b/backend/app/tests/crud/documents/test_update.py @@ -25,7 +25,6 @@ class State: def add(self): document = mk_document(self.owner_id, self.doc_id) self.doc_id += 1 - return self.crud.update(document) def get(self): From 912568db920708df9e8806ea04574dc2d27949c3 Mon Sep 17 00:00:00 2001 From: Jerome White Date: Mon, 24 Mar 2025 17:53:40 +0530 Subject: [PATCH 37/88] Linted --- backend/app/tests/crud/documents/test_update.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/backend/app/tests/crud/documents/test_update.py b/backend/app/tests/crud/documents/test_update.py index 2f2dacaa..ef9d43c6 100644 --- a/backend/app/tests/crud/documents/test_update.py +++ b/backend/app/tests/crud/documents/test_update.py @@ -1,17 +1,14 @@ -import pytest from uuid import UUID from typing import ClassVar -from sqlmodel import Session from dataclasses import dataclass +import pytest +from sqlmodel import Session + from app.crud import DocumentCrud -from app.core.config import settings from _utils import ( - Constants, get_user_id_by_email, - insert_documents, - int_to_uuid, mk_document, rm_documents, ) @@ -51,6 +48,6 @@ def test_sequential_update_is_ordered(self, state: State): (a, b) = map(state.add, range(2)) assert a.create_at >= b.created_at - def test_sequential_update_is_ordered(self, state: State): + def test_insert_does_not_delete(self, state: State): document = state.add() assert document.deleted_at is None From 85742ed0662f77a6280233c0f2cb4976aec81f90 Mon Sep 17 00:00:00 2001 From: Jerome White Date: Mon, 24 Mar 2025 17:57:29 +0530 Subject: [PATCH 38/88] Corrected update_at test --- backend/app/tests/crud/documents/test_update.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/backend/app/tests/crud/documents/test_update.py b/backend/app/tests/crud/documents/test_update.py index ef9d43c6..abe899fd 100644 --- a/backend/app/tests/crud/documents/test_update.py +++ b/backend/app/tests/crud/documents/test_update.py @@ -45,8 +45,8 @@ def test_update_adds_one(self, state: State): assert len(before) + 1 == len(after) def test_sequential_update_is_ordered(self, state: State): - (a, b) = map(state.add, range(2)) - assert a.create_at >= b.created_at + (a, b) = (state.add() for _ in range(2)) + assert a.created_at <= b.created_at def test_insert_does_not_delete(self, state: State): document = state.add() From 8ad1ec8da13585085c2f7d97af52136210a26558 Mon Sep 17 00:00:00 2001 From: Jerome White Date: Mon, 24 Mar 2025 17:59:02 +0530 Subject: [PATCH 39/88] Appropriate class variable naming --- backend/app/tests/crud/documents/test_update.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/backend/app/tests/crud/documents/test_update.py b/backend/app/tests/crud/documents/test_update.py index abe899fd..a447b221 100644 --- a/backend/app/tests/crud/documents/test_update.py +++ b/backend/app/tests/crud/documents/test_update.py @@ -17,11 +17,11 @@ class State: crud: DocumentCrud owner_id: UUID - doc_id: ClassVar[int] = 0 + _doc_id: ClassVar[int] = 0 def add(self): - document = mk_document(self.owner_id, self.doc_id) - self.doc_id += 1 + document = mk_document(self.owner_id, self._doc_id) + self._doc_id += 1 return self.crud.update(document) def get(self): From 7761ecd681aeb4786d6c7b0a000467d86f68ed44 Mon Sep 17 00:00:00 2001 From: Jerome White Date: Mon, 24 Mar 2025 18:16:58 +0530 Subject: [PATCH 40/88] Move document creation into a class --- backend/app/tests/crud/documents/_utils.py | 57 +++++++++++-------- .../app/tests/crud/documents/test_update.py | 31 ++++------ 2 files changed, 44 insertions(+), 44 deletions(-) diff --git a/backend/app/tests/crud/documents/_utils.py b/backend/app/tests/crud/documents/_utils.py index 6d75ca0b..6ad1ea9c 100644 --- a/backend/app/tests/crud/documents/_utils.py +++ b/backend/app/tests/crud/documents/_utils.py @@ -11,15 +11,11 @@ from app.crud.user import get_user_by_email from app.models import Document, UserCreate -class Constants: - n_documents = 10 - @ft.cache def get_user_id_by_email(session: Session): user = get_user_by_email(session=session, email=settings.FIRST_SUPERUSER) return user.id - @ft.cache def int_to_uuid(value): return UUID(int=value) @@ -28,31 +24,44 @@ def rm_documents(session: Session): session.exec(delete(Document)) session.commit() -def mk_document(owner_id, index=0): - doc_id = int_to_uuid(index) - - args = str(doc_id).split('-') - fname = Path('/', *args).with_suffix('.xyz') - return Document( - id=doc_id, - owner_id=owner_id, - fname=fname.name, - object_store_url=fname.as_uri(), - ) - def insert_documents(session: Session, n: int): - owner_id = get_user_id_by_email(session) - crud = DocumentCrud(session) - for i in range(n): - document = mk_document(owner_id, i) + docs = DocumentMaker(session) - session.add(document) + for (_, d) in zip(range(n), docs): + session.add(d) session.commit() - session.refresh(document) - - yield document + session.refresh(d) + yield d def insert_document(session: Session): (document, ) = insert_documents(session, 1) return document + +class Constants: + n_documents = 10 + +class DocumentMaker: + def __init__(self, session: Session): + self.owner_id = get_user_id_by_email(session) + self.index = 0 + + def __iter__(self): + return self + + def __next__(self): + doc_id = self.get_and_increment() + args = str(doc_id).split('-') + fname = Path('/', *args).with_suffix('.xyz') + + return Document( + id=doc_id, + owner_id=self.owner_id, + fname=fname.name, + object_store_url=fname.as_uri(), + ) + + def get_and_increment(self): + doc_id = int_to_uuid(self.index) + self.index += 1 + return doc_id diff --git a/backend/app/tests/crud/documents/test_update.py b/backend/app/tests/crud/documents/test_update.py index a447b221..f6a73312 100644 --- a/backend/app/tests/crud/documents/test_update.py +++ b/backend/app/tests/crud/documents/test_update.py @@ -1,6 +1,5 @@ from uuid import UUID from typing import ClassVar -from dataclasses import dataclass import pytest from sqlmodel import Session @@ -8,46 +7,38 @@ from app.crud import DocumentCrud from _utils import ( - get_user_id_by_email, - mk_document, + DocumentMaker, rm_documents, ) -@dataclass -class State: - crud: DocumentCrud - owner_id: UUID - _doc_id: ClassVar[int] = 0 +class TestState: + def __init__(self, db: Session): + self.crud = DocumentCrud(db) + self.documents = DocumentMaker(db) def add(self): - document = mk_document(self.owner_id, self._doc_id) - self._doc_id += 1 - return self.crud.update(document) + return self.crud.update(next(self.documents)) def get(self): - return self.crud.read_many(self.owner_id) + return self.crud.read_many(self.documents.owner_id) @pytest.fixture def state(db: Session): rm_documents(db) - - crud = DocumentCrud(db) - owner_id = get_user_id_by_email(db) - - return State(crud, owner_id) + return TestState(db) class TestDatabaseUpdate: - def test_update_adds_one(self, state: State): + def test_update_adds_one(self, state: TestState): before = state.get() state.add() after = state.get() assert len(before) + 1 == len(after) - def test_sequential_update_is_ordered(self, state: State): + def test_sequential_update_is_ordered(self, state: TestState): (a, b) = (state.add() for _ in range(2)) assert a.created_at <= b.created_at - def test_insert_does_not_delete(self, state: State): + def test_insert_does_not_delete(self, state: TestState): document = state.add() assert document.deleted_at is None From d3e0c409e15475e64633eeae8ac7cad469ad98e4 Mon Sep 17 00:00:00 2001 From: Jerome White Date: Mon, 24 Mar 2025 18:20:42 +0530 Subject: [PATCH 41/88] Linted --- backend/app/tests/crud/documents/_utils.py | 17 ++++++----------- .../app/tests/crud/documents/test_read_many.py | 1 - backend/app/tests/crud/documents/test_update.py | 3 --- 3 files changed, 6 insertions(+), 15 deletions(-) diff --git a/backend/app/tests/crud/documents/_utils.py b/backend/app/tests/crud/documents/_utils.py index 6ad1ea9c..7d7f99c5 100644 --- a/backend/app/tests/crud/documents/_utils.py +++ b/backend/app/tests/crud/documents/_utils.py @@ -3,13 +3,10 @@ from pathlib import Path from sqlmodel import Session, delete -from sqlalchemy.exc import IntegrityError -from sqlalchemy.dialects.sqlite import insert -from app.crud import DocumentCrud from app.core.config import settings from app.crud.user import get_user_by_email -from app.models import Document, UserCreate +from app.models import Document @ft.cache def get_user_id_by_email(session: Session): @@ -25,14 +22,12 @@ def rm_documents(session: Session): session.commit() def insert_documents(session: Session, n: int): - crud = DocumentCrud(session) - docs = DocumentMaker(session) - - for (_, d) in zip(range(n), docs): - session.add(d) + documents = DocumentMaker(session) + for (_, doc) in zip(range(n), documents): + session.add(doc) session.commit() - session.refresh(d) - yield d + session.refresh(doc) + yield doc def insert_document(session: Session): (document, ) = insert_documents(session, 1) diff --git a/backend/app/tests/crud/documents/test_read_many.py b/backend/app/tests/crud/documents/test_read_many.py index 00765d5d..4dd06ef6 100644 --- a/backend/app/tests/crud/documents/test_read_many.py +++ b/backend/app/tests/crud/documents/test_read_many.py @@ -2,7 +2,6 @@ from sqlmodel import Session from app.crud import DocumentCrud -from app.core.config import settings from _utils import ( Constants, diff --git a/backend/app/tests/crud/documents/test_update.py b/backend/app/tests/crud/documents/test_update.py index f6a73312..393bbf54 100644 --- a/backend/app/tests/crud/documents/test_update.py +++ b/backend/app/tests/crud/documents/test_update.py @@ -1,6 +1,3 @@ -from uuid import UUID -from typing import ClassVar - import pytest from sqlmodel import Session From 0b8812396002f996fff8b2d46a65c123b3d89ee7 Mon Sep 17 00:00:00 2001 From: Jerome White Date: Mon, 24 Mar 2025 18:35:36 +0530 Subject: [PATCH 42/88] Test document CRUD delete --- .../app/tests/crud/documents/test_delete.py | 34 +++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 backend/app/tests/crud/documents/test_delete.py diff --git a/backend/app/tests/crud/documents/test_delete.py b/backend/app/tests/crud/documents/test_delete.py new file mode 100644 index 00000000..5a099017 --- /dev/null +++ b/backend/app/tests/crud/documents/test_delete.py @@ -0,0 +1,34 @@ +import pytest +from sqlmodel import Session, select + +from app.crud import DocumentCrud +from app.models import Document + +from _utils import ( + insert_document, + rm_documents, +) + +@pytest.fixture +def document(db: Session): + rm_documents(db) + document = insert_document(db) + + crud = DocumentCrud(db) + crud.delete(document.id, document.owner_id) + + statement = ( + select(Document) + .where(Document.id == document.id) + ) + return db.exec(statement).one() + +class TestDatabaseUpdate: + def test_delete_is_soft(self, document: Document): + assert document is not None + + def test_delete_marks_deleted(self, document: Document): + assert document.deleted_at is not None + + def test_delete_follows_insert(self, document: Document): + assert document.created_at <= document.deleted_at From 6e161796b7927f2e3f6845f0d6022d768dd09655 Mon Sep 17 00:00:00 2001 From: Jerome White Date: Mon, 24 Mar 2025 18:39:31 +0530 Subject: [PATCH 43/88] All test methods cleanup after themselves --- backend/app/tests/crud/documents/_utils.py | 7 +++++++ backend/app/tests/crud/documents/test_delete.py | 2 ++ backend/app/tests/crud/documents/test_read_many.py | 6 +----- backend/app/tests/crud/documents/test_read_one.py | 8 +++++++- backend/app/tests/crud/documents/test_update.py | 2 ++ 5 files changed, 19 insertions(+), 6 deletions(-) diff --git a/backend/app/tests/crud/documents/_utils.py b/backend/app/tests/crud/documents/_utils.py index 7d7f99c5..d6de7b11 100644 --- a/backend/app/tests/crud/documents/_utils.py +++ b/backend/app/tests/crud/documents/_utils.py @@ -2,6 +2,7 @@ from uuid import UUID from pathlib import Path +import pytest from sqlmodel import Session, delete from app.core.config import settings @@ -33,6 +34,12 @@ def insert_document(session: Session): (document, ) = insert_documents(session, 1) return document +@pytest.fixture(scope='class') +def clean_db_fixture(db: Session): + rm_documents(db) + yield + rm_documents(db) + class Constants: n_documents = 10 diff --git a/backend/app/tests/crud/documents/test_delete.py b/backend/app/tests/crud/documents/test_delete.py index 5a099017..6a68d404 100644 --- a/backend/app/tests/crud/documents/test_delete.py +++ b/backend/app/tests/crud/documents/test_delete.py @@ -5,6 +5,7 @@ from app.models import Document from _utils import ( + clean_db_fixture, insert_document, rm_documents, ) @@ -23,6 +24,7 @@ def document(db: Session): ) return db.exec(statement).one() +@pytest.mark.usefixtures('clean_db_fixture') class TestDatabaseUpdate: def test_delete_is_soft(self, document: Document): assert document is not None diff --git a/backend/app/tests/crud/documents/test_read_many.py b/backend/app/tests/crud/documents/test_read_many.py index 4dd06ef6..74b88350 100644 --- a/backend/app/tests/crud/documents/test_read_many.py +++ b/backend/app/tests/crud/documents/test_read_many.py @@ -5,6 +5,7 @@ from _utils import ( Constants, + clean_db_fixture, get_user_id_by_email, insert_documents, int_to_uuid, @@ -16,11 +17,6 @@ def document_collection(db: Session): rm_documents(db) return list(insert_documents(db, Constants.n_documents)) -@pytest.fixture(scope='class') -def clean_db_fixture(db: Session): - yield - rm_documents(db) - @pytest.mark.usefixtures('clean_db_fixture') class TestDatabaseReadMany: def test_number_read_is_expected( diff --git a/backend/app/tests/crud/documents/test_read_one.py b/backend/app/tests/crud/documents/test_read_one.py index 56698389..29ee08b6 100644 --- a/backend/app/tests/crud/documents/test_read_one.py +++ b/backend/app/tests/crud/documents/test_read_one.py @@ -4,7 +4,12 @@ from app.crud import DocumentCrud -from _utils import insert_document, int_to_uuid, rm_documents +from _utils import ( + clean_db_fixture, + insert_document, + int_to_uuid, + rm_documents, +) @pytest.fixture def clean_db_fixture(db: Session): @@ -12,6 +17,7 @@ def clean_db_fixture(db: Session): yield rm_documents(db) +@pytest.mark.usefixtures('clean_db_fixture') class TestDatabaseReadOne: def test_can_select_valid_id( self, diff --git a/backend/app/tests/crud/documents/test_update.py b/backend/app/tests/crud/documents/test_update.py index 393bbf54..0d0849de 100644 --- a/backend/app/tests/crud/documents/test_update.py +++ b/backend/app/tests/crud/documents/test_update.py @@ -4,6 +4,7 @@ from app.crud import DocumentCrud from _utils import ( + clean_db_fixture, DocumentMaker, rm_documents, ) @@ -24,6 +25,7 @@ def state(db: Session): rm_documents(db) return TestState(db) +@pytest.mark.usefixtures('clean_db_fixture') class TestDatabaseUpdate: def test_update_adds_one(self, state: TestState): before = state.get() From 6f9f6cdcaa0bd59d29fbcdf418b9a5f2e8631e1e Mon Sep 17 00:00:00 2001 From: Jerome White Date: Tue, 25 Mar 2025 16:45:14 +0530 Subject: [PATCH 44/88] Gracefully handle negative skip's and limit's --- backend/app/crud/document.py | 4 ++ backend/app/models/document.py | 3 ++ .../tests/crud/documents/test_read_many.py | 42 +++++++++++++++++++ 3 files changed, 49 insertions(+) diff --git a/backend/app/crud/document.py b/backend/app/crud/document.py index e9c77e4f..3d9880e1 100644 --- a/backend/app/crud/document.py +++ b/backend/app/crud/document.py @@ -33,8 +33,12 @@ def read_many( )) ) if skip is not None: + if skip < 0: + raise ValueError(f'Negative skip: {skip}') statement = statement.offset(skip) if limit is not None: + if limit < 0: + raise ValueError(f'Negative limit: {limit}') statement = statement.limit(limit) docs = self.session.exec(statement).all() diff --git a/backend/app/models/document.py b/backend/app/models/document.py index fe9e9948..e122bc71 100644 --- a/backend/app/models/document.py +++ b/backend/app/models/document.py @@ -30,6 +30,9 @@ class Document(SQLModel, table=True): class DocumentList(SQLModel): docs: list[Document] + def __bool__(self): + return bool(self.docs) + def __len__(self): return len(self.docs) diff --git a/backend/app/tests/crud/documents/test_read_many.py b/backend/app/tests/crud/documents/test_read_many.py index 74b88350..3a8ed542 100644 --- a/backend/app/tests/crud/documents/test_read_many.py +++ b/backend/app/tests/crud/documents/test_read_many.py @@ -51,6 +51,27 @@ def test_skip_is_respected( doc = int_to_uuid(i) assert doc in doc_ids + def test_zero_skip_includes_all( + self, + db: Session, + document_collection: list, + ): + crud = DocumentCrud(db) + owner_id = get_user_id_by_email(db) + documents = crud.read_many(owner_id, skip=0) + + assert len(documents) == Constants.n_documents + + def test_negative_skip_raises_exception( + self, + db: Session, + document_collection: list, + ): + crud = DocumentCrud(db) + owner_id = get_user_id_by_email(db) + with pytest.raises(ValueError): + crud.read_many(owner_id, skip=-1) + def test_limit_is_respected( self, db: Session, @@ -62,3 +83,24 @@ def test_limit_is_respected( documents = crud.read_many(owner_id, limit=limit) assert len(documents) == limit + + def test_zero_limit_includes_none( + self, + db: Session, + document_collection: list, + ): + crud = DocumentCrud(db) + owner_id = get_user_id_by_email(db) + documents = crud.read_many(owner_id, limit=0) + + assert not documents + + def test_negative_limit_raises_exception( + self, + db: Session, + document_collection: list, + ): + crud = DocumentCrud(db) + owner_id = get_user_id_by_email(db) + with pytest.raises(ValueError): + crud.read_many(owner_id, limit=-1) From 7d07223465086e8757b0a356ce705c9a17aaba55 Mon Sep 17 00:00:00 2001 From: Jerome White Date: Tue, 25 Mar 2025 17:12:27 +0530 Subject: [PATCH 45/88] Fixture usage is more explicit and straightforward --- .../tests/crud/documents/test_read_many.py | 112 ++++++++++-------- 1 file changed, 63 insertions(+), 49 deletions(-) diff --git a/backend/app/tests/crud/documents/test_read_many.py b/backend/app/tests/crud/documents/test_read_many.py index 3a8ed542..8cab417b 100644 --- a/backend/app/tests/crud/documents/test_read_many.py +++ b/backend/app/tests/crud/documents/test_read_many.py @@ -1,3 +1,5 @@ +from uuid import UUID + import pytest from sqlmodel import Session @@ -12,38 +14,56 @@ rm_documents, ) +class DocumentManager: + def __init__(self, db: Session): + self.db = db + self.documents = None + + rm_documents(self.db) + + def __iter__(self): + if self.documents is None: + raise AttributeError() + yield from self.documents + + def add(self, n): + self.documents = list(insert_documents(self.db, n)) + +@pytest.fixture +def documents(db: Session): + return DocumentManager(db) + @pytest.fixture -def document_collection(db: Session): - rm_documents(db) - return list(insert_documents(db, Constants.n_documents)) +def crud(db: Session): + return DocumentCrud(db) + +@pytest.fixture +def owner_id(db: Session): + return get_user_id_by_email(db) @pytest.mark.usefixtures('clean_db_fixture') class TestDatabaseReadMany: def test_number_read_is_expected( self, - db: Session, - document_collection: list, + crud: DocumentCrud, + owner_id: UUID, + documents: DocumentManager, ): - crud = DocumentCrud(db) - owner_id = get_user_id_by_email(db) - documents = crud.read_many(owner_id) + documents.add(Constants.n_documents) + docs = crud.read_many(owner_id) + assert len(docs) == Constants.n_documents - assert len(documents) == Constants.n_documents - - def test_deleted_docs_excluded( - self, - db: Session, - document_collection: list, - ): - assert all(x.deleted_at is None for x in document_collection) + def test_deleted_docs_are_excluded(self, documents: DocumentManager): + documents.add(Constants.n_documents) + assert all(x.deleted_at is None for x in documents) def test_skip_is_respected( self, - db: Session, - document_collection: list, + crud: DocumentCrud, + owner_id: UUID, + documents: DocumentManager, ): - crud = DocumentCrud(db) - owner_id = get_user_id_by_email(db) + documents.add(Constants.n_documents) skip = Constants.n_documents // 2 doc_ids = set(x.id for x in crud.read_many(owner_id, skip=skip)) @@ -53,54 +73,48 @@ def test_skip_is_respected( def test_zero_skip_includes_all( self, - db: Session, - document_collection: list, + crud: DocumentCrud, + owner_id: UUID, + documents: DocumentManager, ): - crud = DocumentCrud(db) - owner_id = get_user_id_by_email(db) - documents = crud.read_many(owner_id, skip=0) - - assert len(documents) == Constants.n_documents + documents.add(Constants.n_documents) + docs = crud.read_many(owner_id, skip=0) + assert len(docs) == Constants.n_documents def test_negative_skip_raises_exception( self, - db: Session, - document_collection: list, + crud: DocumentCrud, + owner_id: UUID, ): - crud = DocumentCrud(db) - owner_id = get_user_id_by_email(db) with pytest.raises(ValueError): crud.read_many(owner_id, skip=-1) def test_limit_is_respected( self, - db: Session, - document_collection: list, + crud: DocumentCrud, + owner_id: UUID, + documents: DocumentManager, ): - crud = DocumentCrud(db) - owner_id = get_user_id_by_email(db) + documents.add(Constants.n_documents) limit = Constants.n_documents // 2 - documents = crud.read_many(owner_id, limit=limit) + docs = crud.read_many(owner_id, limit=limit) - assert len(documents) == limit + assert len(docs) == limit - def test_zero_limit_includes_none( + def test_zero_limit_includes_nothing( self, - db: Session, - document_collection: list, + crud: DocumentCrud, + owner_id: UUID, + documents: DocumentManager, ): - crud = DocumentCrud(db) - owner_id = get_user_id_by_email(db) - documents = crud.read_many(owner_id, limit=0) - - assert not documents + documents.add(Constants.n_documents) + docs = crud.read_many(owner_id, limit=0) + assert not docs def test_negative_limit_raises_exception( self, - db: Session, - document_collection: list, + crud: DocumentCrud, + owner_id: UUID, ): - crud = DocumentCrud(db) - owner_id = get_user_id_by_email(db) with pytest.raises(ValueError): crud.read_many(owner_id, limit=-1) From 0a57ce6b67ef975c445f0d7836d815beed5fc011 Mon Sep 17 00:00:00 2001 From: Jerome White Date: Tue, 25 Mar 2025 17:21:51 +0530 Subject: [PATCH 46/88] Better usage of fixtures --- .../app/tests/crud/documents/test_update.py | 47 ++++++++++--------- 1 file changed, 26 insertions(+), 21 deletions(-) diff --git a/backend/app/tests/crud/documents/test_update.py b/backend/app/tests/crud/documents/test_update.py index 0d0849de..a29044b8 100644 --- a/backend/app/tests/crud/documents/test_update.py +++ b/backend/app/tests/crud/documents/test_update.py @@ -9,35 +9,40 @@ rm_documents, ) -class TestState: - def __init__(self, db: Session): - self.crud = DocumentCrud(db) - self.documents = DocumentMaker(db) - - def add(self): - return self.crud.update(next(self.documents)) - - def get(self): - return self.crud.read_many(self.documents.owner_id) - @pytest.fixture -def state(db: Session): +def documents(db: Session): rm_documents(db) - return TestState(db) + return DocumentMaker(db) + +@pytest.fixture +def crud(db: Session): + return DocumentCrud(db) @pytest.mark.usefixtures('clean_db_fixture') class TestDatabaseUpdate: - def test_update_adds_one(self, state: TestState): - before = state.get() - state.add() - after = state.get() + def test_update_adds_one( + self, + crud: DocumentCrud, + documents: DocumentMaker, + ): + before = crud.read_many(documents.owner_id) + crud.update(next(documents)) + after = crud.read_many(documents.owner_id) assert len(before) + 1 == len(after) - def test_sequential_update_is_ordered(self, state: TestState): - (a, b) = (state.add() for _ in range(2)) + def test_sequential_update_is_ordered( + self, + crud: DocumentCrud, + documents: DocumentMaker, + ): + (a, b) = (crud.update(y) for (_, y) in zip(range(2), documents)) assert a.created_at <= b.created_at - def test_insert_does_not_delete(self, state: TestState): - document = state.add() + def test_insert_does_not_delete( + self, + crud: DocumentCrud, + documents: DocumentMaker, + ): + document = crud.update(next(documents)) assert document.deleted_at is None From 2ee4787eb0be4d6c46eed4ba477312f419bd5189 Mon Sep 17 00:00:00 2001 From: Jerome White Date: Tue, 25 Mar 2025 17:22:40 +0530 Subject: [PATCH 47/88] Import reordering --- backend/app/tests/crud/documents/test_update.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/app/tests/crud/documents/test_update.py b/backend/app/tests/crud/documents/test_update.py index a29044b8..fa4f9f2e 100644 --- a/backend/app/tests/crud/documents/test_update.py +++ b/backend/app/tests/crud/documents/test_update.py @@ -4,8 +4,8 @@ from app.crud import DocumentCrud from _utils import ( - clean_db_fixture, DocumentMaker, + clean_db_fixture, rm_documents, ) From 5e29a5a520a3316c484618895210a12559b34823 Mon Sep 17 00:00:00 2001 From: Jerome White Date: Tue, 25 Mar 2025 18:19:18 +0530 Subject: [PATCH 48/88] Lift document crud testing utils to global testing utilities --- backend/app/tests/crud/documents/test_delete.py | 2 +- backend/app/tests/crud/documents/test_read_many.py | 2 +- backend/app/tests/crud/documents/test_read_one.py | 2 +- backend/app/tests/crud/documents/test_update.py | 2 +- .../app/tests/{crud/documents/_utils.py => utils/document.py} | 0 5 files changed, 4 insertions(+), 4 deletions(-) rename backend/app/tests/{crud/documents/_utils.py => utils/document.py} (100%) diff --git a/backend/app/tests/crud/documents/test_delete.py b/backend/app/tests/crud/documents/test_delete.py index 6a68d404..5316eb3d 100644 --- a/backend/app/tests/crud/documents/test_delete.py +++ b/backend/app/tests/crud/documents/test_delete.py @@ -4,7 +4,7 @@ from app.crud import DocumentCrud from app.models import Document -from _utils import ( +from app.tests.utils.document import ( clean_db_fixture, insert_document, rm_documents, diff --git a/backend/app/tests/crud/documents/test_read_many.py b/backend/app/tests/crud/documents/test_read_many.py index 8cab417b..0990be87 100644 --- a/backend/app/tests/crud/documents/test_read_many.py +++ b/backend/app/tests/crud/documents/test_read_many.py @@ -5,7 +5,7 @@ from app.crud import DocumentCrud -from _utils import ( +from app.tests.utils.document import ( Constants, clean_db_fixture, get_user_id_by_email, diff --git a/backend/app/tests/crud/documents/test_read_one.py b/backend/app/tests/crud/documents/test_read_one.py index 29ee08b6..53cff829 100644 --- a/backend/app/tests/crud/documents/test_read_one.py +++ b/backend/app/tests/crud/documents/test_read_one.py @@ -4,7 +4,7 @@ from app.crud import DocumentCrud -from _utils import ( +from app.tests.utils.document import ( clean_db_fixture, insert_document, int_to_uuid, diff --git a/backend/app/tests/crud/documents/test_update.py b/backend/app/tests/crud/documents/test_update.py index fa4f9f2e..f547a79f 100644 --- a/backend/app/tests/crud/documents/test_update.py +++ b/backend/app/tests/crud/documents/test_update.py @@ -3,7 +3,7 @@ from app.crud import DocumentCrud -from _utils import ( +from app.tests.utils.document import ( DocumentMaker, clean_db_fixture, rm_documents, diff --git a/backend/app/tests/crud/documents/_utils.py b/backend/app/tests/utils/document.py similarity index 100% rename from backend/app/tests/crud/documents/_utils.py rename to backend/app/tests/utils/document.py From 11ee2c91ffa6fbf882217790ee6e7d3c51f52427 Mon Sep 17 00:00:00 2001 From: Jerome White Date: Wed, 26 Mar 2025 00:05:59 +0530 Subject: [PATCH 49/88] Take read error into account --- backend/app/api/routes/documents.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/backend/app/api/routes/documents.py b/backend/app/api/routes/documents.py index 9ff58b09..7da507bb 100644 --- a/backend/app/api/routes/documents.py +++ b/backend/app/api/routes/documents.py @@ -21,7 +21,10 @@ def list_docs( limit: int = 100, ): crud = DocumentCrud(session) - return crud.read_many(current_user.id, skip, limit) + try: + return crud.read_many(current_user.id, skip, limit) + except ValueError as err: + raise HTTPException(status_code=500, detail=str(err)) @router.post("/cp") def upload_doc( From a81bd493ede436a4c6732ea5b7e83da67c2a4ea7 Mon Sep 17 00:00:00 2001 From: Jerome White Date: Wed, 26 Mar 2025 00:06:25 +0530 Subject: [PATCH 50/88] Test document list route --- .../app/tests/api/routes/documents/test_ls.py | 111 ++++++++++++++++++ 1 file changed, 111 insertions(+) create mode 100644 backend/app/tests/api/routes/documents/test_ls.py diff --git a/backend/app/tests/api/routes/documents/test_ls.py b/backend/app/tests/api/routes/documents/test_ls.py new file mode 100644 index 00000000..f0cf3a8f --- /dev/null +++ b/backend/app/tests/api/routes/documents/test_ls.py @@ -0,0 +1,111 @@ +import logging +import itertools as it +import functools as ft +from uuid import UUID +from pathlib import Path +from datetime import datetime +from urllib.parse import ParseResult, urlunparse + +import pytest +from sqlmodel import Session +from fastapi.testclient import TestClient + +from app.models import Document +from app.core.config import settings +from app.tests.utils.document import insert_document, rm_documents + +class RouteCreator: + _empty = ParseResult(*it.repeat('', len(ParseResult._fields))) + + def __init__(self, endpoint): + self.endpoint = endpoint + self.root = Path(settings.API_V1_STR, 'documents') + + def __str__(self): + return urlunparse(self.to_url()) + + def to_url(self): + path = self.root.joinpath(self.endpoint) + return self._empty._replace(path=str(path)) + +@pytest.fixture +def url(): + creator = RouteCreator('ls') + return creator.to_url() + +@pytest.fixture +def document(db: Session): + return insert_document(db) + +@ft.singledispatch +def to_string(value): + return value + +@to_string.register +def _(value: UUID): + return str(value) + +@to_string.register +def _(value: datetime): + return value.isoformat() + +class TestDocumentRouteList: + def test_empty_db_returns_empty_list( + self, + db: Session, + url: ParseResult, + client: TestClient, + superuser_token_headers: dict[str, str], + ): + rm_documents(db) + docs = (client + .get(urlunparse(url), headers=superuser_token_headers) + .json() + .get('docs')) + + assert not docs + + def test_response_is_success( + self, + url: ParseResult, + client: TestClient, + superuser_token_headers: dict[str, str], + ): + response = client.get(urlunparse(url), headers=superuser_token_headers) + assert response.is_success + + def test_item_reflects_database( + self, + url: ParseResult, + document: Document, + client: TestClient, + superuser_token_headers: dict[str, str], + ): + source = { x: to_string(y) for (x, y) in dict(document).items() } + target = (client + .get(urlunparse(url), headers=superuser_token_headers) + .json() + .get('docs') + .pop()) + + assert target == source + + def test_negative_skip_produces_error( + self, + url: ParseResult, + client: TestClient, + superuser_token_headers: dict[str, str], + ): + route = urlunparse(url._replace(query='skip=-1')) + response = client.get(route, headers=superuser_token_headers) + assert response.is_error + + def test_negative_limit_produces_error( + self, + url: ParseResult, + client: TestClient, + superuser_token_headers: dict[str, str], + ): + route = urlunparse(url._replace(query='limit=-1')) + response = client.get(route, headers=superuser_token_headers) + assert response.is_error From ddc6cf5384a1099a15e36167cb8fe0eaddd0d95b Mon Sep 17 00:00:00 2001 From: Jerome White Date: Wed, 26 Mar 2025 00:35:17 +0530 Subject: [PATCH 51/88] Simplify test parameters --- .../app/tests/api/routes/documents/test_ls.py | 69 +++++++++++-------- 1 file changed, 41 insertions(+), 28 deletions(-) diff --git a/backend/app/tests/api/routes/documents/test_ls.py b/backend/app/tests/api/routes/documents/test_ls.py index f0cf3a8f..3223bce0 100644 --- a/backend/app/tests/api/routes/documents/test_ls.py +++ b/backend/app/tests/api/routes/documents/test_ls.py @@ -4,6 +4,7 @@ from uuid import UUID from pathlib import Path from datetime import datetime +from dataclasses import dataclass from urllib.parse import ParseResult, urlunparse import pytest @@ -14,7 +15,7 @@ from app.core.config import settings from app.tests.utils.document import insert_document, rm_documents -class RouteCreator: +class Route: _empty = ParseResult(*it.repeat('', len(ParseResult._fields))) def __init__(self, endpoint): @@ -28,15 +29,36 @@ def to_url(self): path = self.root.joinpath(self.endpoint) return self._empty._replace(path=str(path)) +@dataclass +class WebCrawler: + client: TestClient + superuser_token_headers: dict[str, str] + + @ft.singledispatchmethod + def get(self, route: str): + return self.client.get(route, headers=self.superuser_token_headers) + + @get.register + def _(self, route: Route): + return self.get(str(route)) + + @get.register + def _(self, route: ParseResult): + return self.get(urlunparse(route)) + @pytest.fixture def url(): - creator = RouteCreator('ls') - return creator.to_url() + route = Route('ls') + return route.to_url() @pytest.fixture def document(db: Session): return insert_document(db) +@pytest.fixture +def crawler(client: TestClient, superuser_token_headers: dict[str, str]): + return WebCrawler(client, superuser_token_headers) + @ft.singledispatch def to_string(value): return value @@ -50,40 +72,33 @@ def _(value: datetime): return value.isoformat() class TestDocumentRouteList: + def test_response_is_success(self, url: ParseResult, crawler: Route): + response = crawler.get(url) + assert response.is_success + def test_empty_db_returns_empty_list( self, db: Session, url: ParseResult, - client: TestClient, - superuser_token_headers: dict[str, str], + crawler: Route, ): rm_documents(db) - docs = (client - .get(urlunparse(url), headers=superuser_token_headers) + docs = (crawler + .get(url) .json() .get('docs')) assert not docs - def test_response_is_success( - self, - url: ParseResult, - client: TestClient, - superuser_token_headers: dict[str, str], - ): - response = client.get(urlunparse(url), headers=superuser_token_headers) - assert response.is_success - def test_item_reflects_database( self, url: ParseResult, document: Document, - client: TestClient, - superuser_token_headers: dict[str, str], + crawler: Route, ): source = { x: to_string(y) for (x, y) in dict(document).items() } - target = (client - .get(urlunparse(url), headers=superuser_token_headers) + target = (crawler + .get(url) .json() .get('docs') .pop()) @@ -93,19 +108,17 @@ def test_item_reflects_database( def test_negative_skip_produces_error( self, url: ParseResult, - client: TestClient, - superuser_token_headers: dict[str, str], + crawler: Route, ): - route = urlunparse(url._replace(query='skip=-1')) - response = client.get(route, headers=superuser_token_headers) + target = url._replace(query='skip=-1') + response = crawler.get(urlunparse(target)) assert response.is_error def test_negative_limit_produces_error( self, url: ParseResult, - client: TestClient, - superuser_token_headers: dict[str, str], + crawler: Route, ): - route = urlunparse(url._replace(query='limit=-1')) - response = client.get(route, headers=superuser_token_headers) + target = url._replace(query='limit=-1') + response = crawler.get(urlunparse(target)) assert response.is_error From 0fefd197841ca4b91bb1e41a9232ac3deca9ebee Mon Sep 17 00:00:00 2001 From: Jerome White Date: Wed, 26 Mar 2025 00:44:13 +0530 Subject: [PATCH 52/88] Lift common document endpoint testing resources to utils --- .../app/tests/api/routes/documents/test_ls.py | 47 +--------------- backend/app/tests/utils/document.py | 56 +++++++++++++++++-- 2 files changed, 51 insertions(+), 52 deletions(-) diff --git a/backend/app/tests/api/routes/documents/test_ls.py b/backend/app/tests/api/routes/documents/test_ls.py index 3223bce0..4315363c 100644 --- a/backend/app/tests/api/routes/documents/test_ls.py +++ b/backend/app/tests/api/routes/documents/test_ls.py @@ -1,64 +1,19 @@ -import logging -import itertools as it import functools as ft from uuid import UUID -from pathlib import Path from datetime import datetime -from dataclasses import dataclass from urllib.parse import ParseResult, urlunparse import pytest from sqlmodel import Session -from fastapi.testclient import TestClient from app.models import Document -from app.core.config import settings -from app.tests.utils.document import insert_document, rm_documents - -class Route: - _empty = ParseResult(*it.repeat('', len(ParseResult._fields))) - - def __init__(self, endpoint): - self.endpoint = endpoint - self.root = Path(settings.API_V1_STR, 'documents') - - def __str__(self): - return urlunparse(self.to_url()) - - def to_url(self): - path = self.root.joinpath(self.endpoint) - return self._empty._replace(path=str(path)) - -@dataclass -class WebCrawler: - client: TestClient - superuser_token_headers: dict[str, str] - - @ft.singledispatchmethod - def get(self, route: str): - return self.client.get(route, headers=self.superuser_token_headers) - - @get.register - def _(self, route: Route): - return self.get(str(route)) - - @get.register - def _(self, route: ParseResult): - return self.get(urlunparse(route)) +from app.tests.utils.document import Route, crawler, document, rm_documents @pytest.fixture def url(): route = Route('ls') return route.to_url() -@pytest.fixture -def document(db: Session): - return insert_document(db) - -@pytest.fixture -def crawler(client: TestClient, superuser_token_headers: dict[str, str]): - return WebCrawler(client, superuser_token_headers) - @ft.singledispatch def to_string(value): return value diff --git a/backend/app/tests/utils/document.py b/backend/app/tests/utils/document.py index d6de7b11..7429820a 100644 --- a/backend/app/tests/utils/document.py +++ b/backend/app/tests/utils/document.py @@ -1,9 +1,13 @@ +import itertools as it import functools as ft from uuid import UUID from pathlib import Path +from dataclasses import dataclass +from urllib.parse import ParseResult, urlunparse import pytest from sqlmodel import Session, delete +from fastapi.testclient import TestClient from app.core.config import settings from app.crud.user import get_user_by_email @@ -34,12 +38,6 @@ def insert_document(session: Session): (document, ) = insert_documents(session, 1) return document -@pytest.fixture(scope='class') -def clean_db_fixture(db: Session): - rm_documents(db) - yield - rm_documents(db) - class Constants: n_documents = 10 @@ -67,3 +65,49 @@ def get_and_increment(self): doc_id = int_to_uuid(self.index) self.index += 1 return doc_id + + +class Route: + _empty = ParseResult(*it.repeat('', len(ParseResult._fields))) + + def __init__(self, endpoint): + self.endpoint = endpoint + self.root = Path(settings.API_V1_STR, 'documents') + + def __str__(self): + return urlunparse(self.to_url()) + + def to_url(self): + path = self.root.joinpath(self.endpoint) + return self._empty._replace(path=str(path)) + +@dataclass +class WebCrawler: + client: TestClient + superuser_token_headers: dict[str, str] + + @ft.singledispatchmethod + def get(self, route: str): + return self.client.get(route, headers=self.superuser_token_headers) + + @get.register + def _(self, route: Route): + return self.get(str(route)) + + @get.register + def _(self, route: ParseResult): + return self.get(urlunparse(route)) + +@pytest.fixture(scope='class') +def clean_db_fixture(db: Session): + rm_documents(db) + yield + rm_documents(db) + +@pytest.fixture +def document(db: Session): + return insert_document(db) + +@pytest.fixture +def crawler(client: TestClient, superuser_token_headers: dict[str, str]): + return WebCrawler(client, superuser_token_headers) From db1d908ca7a3cebd5d07fdbe74fa95ed965262c6 Mon Sep 17 00:00:00 2001 From: Jerome White Date: Wed, 26 Mar 2025 01:53:29 +0530 Subject: [PATCH 53/88] Return number of rows deleted --- backend/app/api/routes/documents.py | 2 +- backend/app/crud/document.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/backend/app/api/routes/documents.py b/backend/app/api/routes/documents.py index 7da507bb..47834c75 100644 --- a/backend/app/api/routes/documents.py +++ b/backend/app/api/routes/documents.py @@ -58,7 +58,7 @@ def delete_doc( ): crud = DocumentCrud(session) try: - crud.delete(doc_id, current_user.id) + return crud.delete(doc_id, current_user.id) except FileNotFoundError as err: raise HTTPException(status_code=404, detail=str(err)) diff --git a/backend/app/crud/document.py b/backend/app/crud/document.py index 3d9880e1..4250dcd8 100644 --- a/backend/app/crud/document.py +++ b/backend/app/crud/document.py @@ -64,5 +64,6 @@ def delete(self, doc_id: UUID, owner_id: UUID): result = self.session.exec(statement) if not result.rowcount: raise FileNotFoundError(f'Item "{doc_id}" not found') - self.session.commit() + + return result.rowcount From d6854f09f3ae692a0fa4f7988557ec98a335f251 Mon Sep 17 00:00:00 2001 From: Jerome White Date: Wed, 26 Mar 2025 01:57:59 +0530 Subject: [PATCH 54/88] Test document endpoint deletion --- .../tests/api/routes/documents/test_delete.py | 69 +++++++++++++++++++ 1 file changed, 69 insertions(+) create mode 100644 backend/app/tests/api/routes/documents/test_delete.py diff --git a/backend/app/tests/api/routes/documents/test_delete.py b/backend/app/tests/api/routes/documents/test_delete.py new file mode 100644 index 00000000..f49c9f08 --- /dev/null +++ b/backend/app/tests/api/routes/documents/test_delete.py @@ -0,0 +1,69 @@ +from pathlib import Path +from urllib.parse import ParseResult, urlunparse + +import pytest +from sqlmodel import Session, select +from fastapi.testclient import TestClient + +from app.models import Document +from app.core.config import settings +from app.tests.utils.document import ( + DocumentMaker, + Route, + crawler, + insert_document, + rm_documents, +) + +class DeletedRoute(Route): + def update(self, doc: Document): + endpoint = Path(self.endpoint, str(doc.id)) + return type(self)(endpoint) + +@pytest.fixture +def route(): + return DeletedRoute('rm') + +@pytest.fixture +def document(db: Session): + rm_documents(db) + return insert_document(db) + +class TestDocumentRouteDelete: + def test_response_is_success( + self, + db: Session, + route: DeletedRoute, + crawler: Route, + document: Document, + ): + response = crawler.get(route.update(document)) + assert response.is_success + + def test_item_is_soft_deleted( + self, + db: Session, + route: DeletedRoute, + crawler: Route, + document: Document, + ): + crawler.get(route.update(document)) + + statement = ( + select(Document) + .where(Document.id == document.id) + ) + result = db.exec(statement).one() + + return result.deleted_at is not None + + def test_cannot_delete_unknown_document( + self, + db: Session, + route: DeletedRoute, + crawler: Route, + ): + rm_documents(db) + maker = DocumentMaker(db) + response = crawler.get(route.update(next(maker))) + assert response.is_error From 0232620c730b0fb482411b6b6b1c2721da10e8f0 Mon Sep 17 00:00:00 2001 From: Jerome White Date: Wed, 26 Mar 2025 01:58:59 +0530 Subject: [PATCH 55/88] Consisten Session variable naming --- backend/app/tests/utils/document.py | 37 +++++++++++++---------------- 1 file changed, 16 insertions(+), 21 deletions(-) diff --git a/backend/app/tests/utils/document.py b/backend/app/tests/utils/document.py index 7429820a..06a34737 100644 --- a/backend/app/tests/utils/document.py +++ b/backend/app/tests/utils/document.py @@ -14,36 +14,36 @@ from app.models import Document @ft.cache -def get_user_id_by_email(session: Session): - user = get_user_by_email(session=session, email=settings.FIRST_SUPERUSER) +def get_user_id_by_email(db: Session): + user = get_user_by_email(session=db, email=settings.FIRST_SUPERUSER) return user.id @ft.cache def int_to_uuid(value): return UUID(int=value) -def rm_documents(session: Session): - session.exec(delete(Document)) - session.commit() +def rm_documents(db: Session): + db.exec(delete(Document)) + db.commit() -def insert_documents(session: Session, n: int): - documents = DocumentMaker(session) +def insert_documents(db: Session, n: int): + documents = DocumentMaker(db) for (_, doc) in zip(range(n), documents): - session.add(doc) - session.commit() - session.refresh(doc) + db.add(doc) + db.commit() + db.refresh(doc) yield doc -def insert_document(session: Session): - (document, ) = insert_documents(session, 1) +def insert_document(db: Session): + (document, ) = insert_documents(db, 1) return document class Constants: n_documents = 10 class DocumentMaker: - def __init__(self, session: Session): - self.owner_id = get_user_id_by_email(session) + def __init__(self, db: Session): + self.owner_id = get_user_id_by_email(db) self.index = 0 def __iter__(self): @@ -66,19 +66,18 @@ def get_and_increment(self): self.index += 1 return doc_id - class Route: _empty = ParseResult(*it.repeat('', len(ParseResult._fields))) + _root = Path(settings.API_V1_STR, 'documents') def __init__(self, endpoint): self.endpoint = endpoint - self.root = Path(settings.API_V1_STR, 'documents') def __str__(self): return urlunparse(self.to_url()) def to_url(self): - path = self.root.joinpath(self.endpoint) + path = self._root.joinpath(self.endpoint) return self._empty._replace(path=str(path)) @dataclass @@ -104,10 +103,6 @@ def clean_db_fixture(db: Session): yield rm_documents(db) -@pytest.fixture -def document(db: Session): - return insert_document(db) - @pytest.fixture def crawler(client: TestClient, superuser_token_headers: dict[str, str]): return WebCrawler(client, superuser_token_headers) From 4955bed3e55e8b9b90f0be96507c2dba10bd3b6d Mon Sep 17 00:00:00 2001 From: Jerome White Date: Wed, 26 Mar 2025 01:59:25 +0530 Subject: [PATCH 56/88] Special list document type --- backend/app/tests/api/routes/documents/test_ls.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/backend/app/tests/api/routes/documents/test_ls.py b/backend/app/tests/api/routes/documents/test_ls.py index 4315363c..bab25c18 100644 --- a/backend/app/tests/api/routes/documents/test_ls.py +++ b/backend/app/tests/api/routes/documents/test_ls.py @@ -7,13 +7,22 @@ from sqlmodel import Session from app.models import Document -from app.tests.utils.document import Route, crawler, document, rm_documents +from app.tests.utils.document import ( + Route, + crawler, + insert_document, + rm_documents, +) @pytest.fixture def url(): route = Route('ls') return route.to_url() +@pytest.fixture +def document(db: Session): + return insert_document(db) + @ft.singledispatch def to_string(value): return value @@ -48,8 +57,8 @@ def test_empty_db_returns_empty_list( def test_item_reflects_database( self, url: ParseResult, - document: Document, crawler: Route, + document: Document, ): source = { x: to_string(y) for (x, y) in dict(document).items() } target = (crawler From d6acea78448a8215c10eb0ff85c935cb8306fcba Mon Sep 17 00:00:00 2001 From: Jerome White Date: Wed, 26 Mar 2025 09:38:02 +0530 Subject: [PATCH 57/88] Ability to add component to URL path --- backend/app/tests/utils/document.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/backend/app/tests/utils/document.py b/backend/app/tests/utils/document.py index 06a34737..d49514f1 100644 --- a/backend/app/tests/utils/document.py +++ b/backend/app/tests/utils/document.py @@ -80,6 +80,10 @@ def to_url(self): path = self._root.joinpath(self.endpoint) return self._empty._replace(path=str(path)) + def append(self, doc: Document): + endpoint = Path(self.endpoint, str(doc.id)) + return type(self)(endpoint) + @dataclass class WebCrawler: client: TestClient From 094d12263fef7f4bc5f5adb7a52af33a3dcee48c Mon Sep 17 00:00:00 2001 From: Jerome White Date: Wed, 26 Mar 2025 09:38:24 +0530 Subject: [PATCH 58/88] Corrections to injected types --- .../tests/api/routes/documents/test_delete.py | 27 +++++------ .../app/tests/api/routes/documents/test_ls.py | 45 +++++++++++-------- 2 files changed, 37 insertions(+), 35 deletions(-) diff --git a/backend/app/tests/api/routes/documents/test_delete.py b/backend/app/tests/api/routes/documents/test_delete.py index f49c9f08..371b8767 100644 --- a/backend/app/tests/api/routes/documents/test_delete.py +++ b/backend/app/tests/api/routes/documents/test_delete.py @@ -10,19 +10,15 @@ from app.tests.utils.document import ( DocumentMaker, Route, + WebCrawler, crawler, insert_document, rm_documents, ) -class DeletedRoute(Route): - def update(self, doc: Document): - endpoint = Path(self.endpoint, str(doc.id)) - return type(self)(endpoint) - @pytest.fixture def route(): - return DeletedRoute('rm') + return Route('rm') @pytest.fixture def document(db: Session): @@ -32,22 +28,21 @@ def document(db: Session): class TestDocumentRouteDelete: def test_response_is_success( self, - db: Session, - route: DeletedRoute, - crawler: Route, + route: Route, + crawler: WebCrawler, document: Document, ): - response = crawler.get(route.update(document)) + response = crawler.get(route.append(document)) assert response.is_success def test_item_is_soft_deleted( self, db: Session, - route: DeletedRoute, - crawler: Route, + route: Route, + crawler: WebCrawler, document: Document, ): - crawler.get(route.update(document)) + crawler.get(route.append(document)) statement = ( select(Document) @@ -60,10 +55,10 @@ def test_item_is_soft_deleted( def test_cannot_delete_unknown_document( self, db: Session, - route: DeletedRoute, - crawler: Route, + route: Route, + crawler: WebCrawler, ): rm_documents(db) maker = DocumentMaker(db) - response = crawler.get(route.update(next(maker))) + response = crawler.get(route.append(next(maker))) assert response.is_error diff --git a/backend/app/tests/api/routes/documents/test_ls.py b/backend/app/tests/api/routes/documents/test_ls.py index bab25c18..619fb906 100644 --- a/backend/app/tests/api/routes/documents/test_ls.py +++ b/backend/app/tests/api/routes/documents/test_ls.py @@ -9,15 +9,22 @@ from app.models import Document from app.tests.utils.document import ( Route, + WebCrawler, crawler, insert_document, rm_documents, ) +class ListRoute(Route): + def pushq(self, key, value): + query = '='.join(map(str, (key, value))) + return (self + .to_url() + ._replace(query=query)) + @pytest.fixture -def url(): - route = Route('ls') - return route.to_url() +def route(): + return ListRoute('ls') @pytest.fixture def document(db: Session): @@ -36,19 +43,19 @@ def _(value: datetime): return value.isoformat() class TestDocumentRouteList: - def test_response_is_success(self, url: ParseResult, crawler: Route): - response = crawler.get(url) + def test_response_is_success(self, route: Route, crawler: WebCrawler): + response = crawler.get(route) assert response.is_success def test_empty_db_returns_empty_list( self, db: Session, - url: ParseResult, - crawler: Route, + route: Route, + crawler: WebCrawler, ): rm_documents(db) docs = (crawler - .get(url) + .get(route) .json() .get('docs')) @@ -56,13 +63,13 @@ def test_empty_db_returns_empty_list( def test_item_reflects_database( self, - url: ParseResult, - crawler: Route, + route: Route, + crawler: WebCrawler, document: Document, ): source = { x: to_string(y) for (x, y) in dict(document).items() } target = (crawler - .get(url) + .get(route) .json() .get('docs') .pop()) @@ -71,18 +78,18 @@ def test_item_reflects_database( def test_negative_skip_produces_error( self, - url: ParseResult, - crawler: Route, + route: ListRoute, + crawler: WebCrawler, ): - target = url._replace(query='skip=-1') - response = crawler.get(urlunparse(target)) + url = route.pushq('skip', -1) + response = crawler.get(urlunparse(url)) assert response.is_error def test_negative_limit_produces_error( self, - url: ParseResult, - crawler: Route, + route: ListRoute, + crawler: WebCrawler, ): - target = url._replace(query='limit=-1') - response = crawler.get(urlunparse(target)) + url = route.pushq('limit', -1) + response = crawler.get(urlunparse(url)) assert response.is_error From 86933f934a253d3234008d9edc1248aa2b4395ac Mon Sep 17 00:00:00 2001 From: Jerome White Date: Wed, 26 Mar 2025 10:10:53 +0530 Subject: [PATCH 59/88] Simplification of Route semantics * Route type now handles all URL operations required for the crawler * Crawler assumes it will be called with a single type (Route) --- .../app/tests/api/routes/documents/test_ls.py | 19 +++------- backend/app/tests/utils/document.py | 36 +++++++++++-------- 2 files changed, 27 insertions(+), 28 deletions(-) diff --git a/backend/app/tests/api/routes/documents/test_ls.py b/backend/app/tests/api/routes/documents/test_ls.py index 619fb906..5acac745 100644 --- a/backend/app/tests/api/routes/documents/test_ls.py +++ b/backend/app/tests/api/routes/documents/test_ls.py @@ -15,16 +15,9 @@ rm_documents, ) -class ListRoute(Route): - def pushq(self, key, value): - query = '='.join(map(str, (key, value))) - return (self - .to_url() - ._replace(query=query)) - @pytest.fixture def route(): - return ListRoute('ls') + return Route('ls') @pytest.fixture def document(db: Session): @@ -78,18 +71,16 @@ def test_item_reflects_database( def test_negative_skip_produces_error( self, - route: ListRoute, + route: Route, crawler: WebCrawler, ): - url = route.pushq('skip', -1) - response = crawler.get(urlunparse(url)) + response = crawler.get(route.pushq('skip', -1)) assert response.is_error def test_negative_limit_produces_error( self, - route: ListRoute, + route: Route, crawler: WebCrawler, ): - url = route.pushq('limit', -1) - response = crawler.get(urlunparse(url)) + response = crawler.get(route.pushq('limit', -1)) assert response.is_error diff --git a/backend/app/tests/utils/document.py b/backend/app/tests/utils/document.py index d49514f1..b544b9e6 100644 --- a/backend/app/tests/utils/document.py +++ b/backend/app/tests/utils/document.py @@ -70,36 +70,44 @@ class Route: _empty = ParseResult(*it.repeat('', len(ParseResult._fields))) _root = Path(settings.API_V1_STR, 'documents') - def __init__(self, endpoint): + def __init__(self, endpoint, **qs_args): self.endpoint = endpoint + self.qs_args = qs_args def __str__(self): return urlunparse(self.to_url()) def to_url(self): path = self._root.joinpath(self.endpoint) - return self._empty._replace(path=str(path)) + kwargs = { + 'path': str(path), + } + if self.qs_args: + query = '&'.join(it.starmap('{}={}'.format, self.qs_args.items())) + kwargs['query'] = query + + return self._empty._replace(**kwargs) def append(self, doc: Document): endpoint = Path(self.endpoint, str(doc.id)) - return type(self)(endpoint) + return type(self)(endpoint, **self.qs_args) + + def pushq(self, key, value): + qs_args = self.qs_args | { + key: value, + } + return type(self)(self.endpoint, **qs_args) @dataclass class WebCrawler: client: TestClient superuser_token_headers: dict[str, str] - @ft.singledispatchmethod - def get(self, route: str): - return self.client.get(route, headers=self.superuser_token_headers) - - @get.register - def _(self, route: Route): - return self.get(str(route)) - - @get.register - def _(self, route: ParseResult): - return self.get(urlunparse(route)) + def get(self, route: Route): + return self.client.get( + str(route), + headers=self.superuser_token_headers, + ) @pytest.fixture(scope='class') def clean_db_fixture(db: Session): From a01db58363acd024a49a4b4b2bf4bd7d9f208fc2 Mon Sep 17 00:00:00 2001 From: Jerome White Date: Wed, 26 Mar 2025 10:16:19 +0530 Subject: [PATCH 60/88] Linted --- backend/app/tests/api/routes/documents/test_delete.py | 5 ----- backend/app/tests/api/routes/documents/test_ls.py | 1 - 2 files changed, 6 deletions(-) diff --git a/backend/app/tests/api/routes/documents/test_delete.py b/backend/app/tests/api/routes/documents/test_delete.py index 371b8767..0e57820a 100644 --- a/backend/app/tests/api/routes/documents/test_delete.py +++ b/backend/app/tests/api/routes/documents/test_delete.py @@ -1,12 +1,7 @@ -from pathlib import Path -from urllib.parse import ParseResult, urlunparse - import pytest from sqlmodel import Session, select -from fastapi.testclient import TestClient from app.models import Document -from app.core.config import settings from app.tests.utils.document import ( DocumentMaker, Route, diff --git a/backend/app/tests/api/routes/documents/test_ls.py b/backend/app/tests/api/routes/documents/test_ls.py index 5acac745..b860d3c7 100644 --- a/backend/app/tests/api/routes/documents/test_ls.py +++ b/backend/app/tests/api/routes/documents/test_ls.py @@ -1,7 +1,6 @@ import functools as ft from uuid import UUID from datetime import datetime -from urllib.parse import ParseResult, urlunparse import pytest from sqlmodel import Session From 6253c51d1d82593062efe197a77ddc67d85471b2 Mon Sep 17 00:00:00 2001 From: Jerome White Date: Wed, 26 Mar 2025 11:45:41 +0530 Subject: [PATCH 61/88] Delete uses update --- backend/app/api/routes/documents.py | 2 +- backend/app/crud/document.py | 20 ++++++-------------- 2 files changed, 7 insertions(+), 15 deletions(-) diff --git a/backend/app/api/routes/documents.py b/backend/app/api/routes/documents.py index 47834c75..29c1edbe 100644 --- a/backend/app/api/routes/documents.py +++ b/backend/app/api/routes/documents.py @@ -59,7 +59,7 @@ def delete_doc( crud = DocumentCrud(session) try: return crud.delete(doc_id, current_user.id) - except FileNotFoundError as err: + except (NoResultFound, PermissionError) as err: raise HTTPException(status_code=404, detail=str(err)) # TODO: perform delete on the collection diff --git a/backend/app/crud/document.py b/backend/app/crud/document.py index 4250dcd8..4622b08f 100644 --- a/backend/app/crud/document.py +++ b/backend/app/crud/document.py @@ -53,17 +53,9 @@ def update(self, document: Document): return document def delete(self, doc_id: UUID, owner_id: UUID): - statement = ( - update(Document) - .where(and_( - Document.id == doc_id, - Document.owner_id == owner_id, - )) - .values(deleted_at=now()) - ) - result = self.session.exec(statement) - if not result.rowcount: - raise FileNotFoundError(f'Item "{doc_id}" not found') - self.session.commit() - - return result.rowcount + document = self.read_one(doc_id) + if document.owner_id != owner_id: + error = f'User {owner_id} does not own document {doc_id}' + raise PermissionError(error) + document.deleted_at = now() + self.update(document) From 9349d8876155051e8e5f5312ecbb70a010b69c44 Mon Sep 17 00:00:00 2001 From: Jerome White Date: Wed, 26 Mar 2025 11:46:23 +0530 Subject: [PATCH 62/88] Lift document comparison to test utilities --- .../app/tests/api/routes/documents/test_ls.py | 21 ++------------ backend/app/tests/utils/document.py | 29 +++++++++++++++++++ 2 files changed, 32 insertions(+), 18 deletions(-) diff --git a/backend/app/tests/api/routes/documents/test_ls.py b/backend/app/tests/api/routes/documents/test_ls.py index b860d3c7..12a299fe 100644 --- a/backend/app/tests/api/routes/documents/test_ls.py +++ b/backend/app/tests/api/routes/documents/test_ls.py @@ -1,12 +1,9 @@ -import functools as ft -from uuid import UUID -from datetime import datetime - import pytest from sqlmodel import Session from app.models import Document from app.tests.utils.document import ( + DocumentComparator, Route, WebCrawler, crawler, @@ -22,18 +19,6 @@ def route(): def document(db: Session): return insert_document(db) -@ft.singledispatch -def to_string(value): - return value - -@to_string.register -def _(value: UUID): - return str(value) - -@to_string.register -def _(value: datetime): - return value.isoformat() - class TestDocumentRouteList: def test_response_is_success(self, route: Route, crawler: WebCrawler): response = crawler.get(route) @@ -59,14 +44,14 @@ def test_item_reflects_database( crawler: WebCrawler, document: Document, ): - source = { x: to_string(y) for (x, y) in dict(document).items() } target = (crawler .get(route) .json() .get('docs') .pop()) + source = DocumentComparator(document) - assert target == source + assert source == target def test_negative_skip_produces_error( self, diff --git a/backend/app/tests/utils/document.py b/backend/app/tests/utils/document.py index b544b9e6..0d7e14b3 100644 --- a/backend/app/tests/utils/document.py +++ b/backend/app/tests/utils/document.py @@ -2,6 +2,7 @@ import functools as ft from uuid import UUID from pathlib import Path +from datetime import datetime from dataclasses import dataclass from urllib.parse import ParseResult, urlunparse @@ -109,6 +110,34 @@ def get(self, route: Route): headers=self.superuser_token_headers, ) +class DocumentComparator: + @ft.singledispatchmethod + @staticmethod + def to_string(value): + return value + + @to_string.register + @staticmethod + def _(value: UUID): + return str(value) + + @to_string.register + @staticmethod + def _(value: datetime): + return value.isoformat() + + def __init__(self, document: Document): + self.document = document + + def __eq__(self, other: dict): + this = dict(self.to_dict()) + return this == other + + def to_dict(self): + document = dict(self.document) + for (k, v) in document.items(): + yield (k, self.to_string(v)) + @pytest.fixture(scope='class') def clean_db_fixture(db: Session): rm_documents(db) From 2bcd66cd8d467ec7a246402886231c6b1d33846a Mon Sep 17 00:00:00 2001 From: Jerome White Date: Wed, 26 Mar 2025 11:46:54 +0530 Subject: [PATCH 63/88] Tests assert --- backend/app/tests/api/routes/documents/test_delete.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/app/tests/api/routes/documents/test_delete.py b/backend/app/tests/api/routes/documents/test_delete.py index 0e57820a..731e204a 100644 --- a/backend/app/tests/api/routes/documents/test_delete.py +++ b/backend/app/tests/api/routes/documents/test_delete.py @@ -45,7 +45,7 @@ def test_item_is_soft_deleted( ) result = db.exec(statement).one() - return result.deleted_at is not None + assert result.deleted_at is not None def test_cannot_delete_unknown_document( self, From 29f10c19b6d0a52bdd1086cdd47aaf0b1f84753a Mon Sep 17 00:00:00 2001 From: Jerome White Date: Wed, 26 Mar 2025 11:47:09 +0530 Subject: [PATCH 64/88] Must refresh the session before interacting with the database --- backend/app/tests/api/routes/documents/test_delete.py | 1 + 1 file changed, 1 insertion(+) diff --git a/backend/app/tests/api/routes/documents/test_delete.py b/backend/app/tests/api/routes/documents/test_delete.py index 731e204a..54a1d045 100644 --- a/backend/app/tests/api/routes/documents/test_delete.py +++ b/backend/app/tests/api/routes/documents/test_delete.py @@ -39,6 +39,7 @@ def test_item_is_soft_deleted( ): crawler.get(route.append(document)) + db.refresh(document) statement = ( select(Document) .where(Document.id == document.id) From ecf385ad94187540b772b1d1635dc003683a9308 Mon Sep 17 00:00:00 2001 From: Jerome White Date: Wed, 26 Mar 2025 11:48:18 +0530 Subject: [PATCH 65/88] Tests for document stat route --- .../tests/api/routes/documents/test_stat.py | 63 +++++++++++++++++++ 1 file changed, 63 insertions(+) create mode 100644 backend/app/tests/api/routes/documents/test_stat.py diff --git a/backend/app/tests/api/routes/documents/test_stat.py b/backend/app/tests/api/routes/documents/test_stat.py new file mode 100644 index 00000000..0ce5c895 --- /dev/null +++ b/backend/app/tests/api/routes/documents/test_stat.py @@ -0,0 +1,63 @@ +import logging +from pathlib import Path +from urllib.parse import ParseResult, urlunparse + +import pytest +from sqlmodel import Session, select +from fastapi.testclient import TestClient + +from app.models import Document +from app.core.config import settings +from app.tests.utils.document import ( + DocumentComparator, + DocumentMaker, + Route, + WebCrawler, + crawler, + insert_document, + rm_documents, +) + +@pytest.fixture +def route(): + return Route('stat') + +@pytest.fixture +def document(db: Session): + rm_documents(db) + return insert_document(db) + +class TestDocumentRouteStat: + def test_response_is_success( + self, + route: Route, + crawler: WebCrawler, + document: Document, + ): + response = crawler.get(route.append(document)) + assert response.is_success + + def test_stat_reflects_database( + self, + route: Route, + crawler: WebCrawler, + document: Document, + ): + target = (crawler + .get(route.append(document)) + .json()) + logging.critical(target) + source = DocumentComparator(document) + + assert source == target + + def test_cannot_stat_unknown_document( + self, + db: Session, + route: Route, + crawler: Route, + ): + rm_documents(db) + maker = DocumentMaker(db) + response = crawler.get(route.append(next(maker))) + assert response.is_error From e9a280b4408a186324a656475cf18ed379bb88dd Mon Sep 17 00:00:00 2001 From: Jerome White Date: Wed, 26 Mar 2025 12:49:07 +0530 Subject: [PATCH 66/88] Linted --- backend/app/tests/api/routes/documents/test_stat.py | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/backend/app/tests/api/routes/documents/test_stat.py b/backend/app/tests/api/routes/documents/test_stat.py index 0ce5c895..88b3f3e7 100644 --- a/backend/app/tests/api/routes/documents/test_stat.py +++ b/backend/app/tests/api/routes/documents/test_stat.py @@ -1,13 +1,7 @@ -import logging -from pathlib import Path -from urllib.parse import ParseResult, urlunparse - import pytest -from sqlmodel import Session, select -from fastapi.testclient import TestClient +from sqlmodel import Session from app.models import Document -from app.core.config import settings from app.tests.utils.document import ( DocumentComparator, DocumentMaker, @@ -46,7 +40,6 @@ def test_stat_reflects_database( target = (crawler .get(route.append(document)) .json()) - logging.critical(target) source = DocumentComparator(document) assert source == target From ade27ffbe5e77f89a0d8f3256476605321b3db34 Mon Sep 17 00:00:00 2001 From: Jerome White Date: Wed, 26 Mar 2025 13:38:26 +0530 Subject: [PATCH 67/88] Better temporary bucket naming --- backend/app/core/cloud/storage.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/backend/app/core/cloud/storage.py b/backend/app/core/cloud/storage.py index b6b15b89..50b57989 100644 --- a/backend/app/core/cloud/storage.py +++ b/backend/app/core/cloud/storage.py @@ -53,8 +53,7 @@ def __init__(self, user: CurrentUser): self.aws = AmazonCloudStorageClient() def put(self, source: UploadFile, basename: str): - # key = Path(user.organization, user.project, basename) - key = Path('test-org', 'test-project', basename) + key = Path(str(self.user.id), basename) destination = SimpleStorageName(str(key)) kwargs = asdict(destination) From 8dd5bc16fec016d68515a1ec1f479a37a41101d5 Mon Sep 17 00:00:00 2001 From: Jerome White Date: Wed, 26 Mar 2025 13:38:37 +0530 Subject: [PATCH 68/88] Move from deprecated way of Pydantic to dict --- backend/app/core/cloud/storage.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/app/core/cloud/storage.py b/backend/app/core/cloud/storage.py index 50b57989..eb17022d 100644 --- a/backend/app/core/cloud/storage.py +++ b/backend/app/core/cloud/storage.py @@ -57,7 +57,7 @@ def put(self, source: UploadFile, basename: str): destination = SimpleStorageName(str(key)) kwargs = asdict(destination) - metadata = self.user.dict() + metadata = self.user.model_dump() try: self.aws.client.upload_fileobj( source.file, From 43d8d40977d0552f3d532ae7070c971c60e06432 Mon Sep 17 00:00:00 2001 From: Jerome White Date: Wed, 26 Mar 2025 14:17:35 +0530 Subject: [PATCH 69/88] Lift bucket creation to global module --- backend/app/core/cloud/storage.py | 16 ++++++++++++++++ backend/app/initial_storage.py | 15 +-------------- 2 files changed, 17 insertions(+), 14 deletions(-) diff --git a/backend/app/core/cloud/storage.py b/backend/app/core/cloud/storage.py index eb17022d..4fd39e33 100644 --- a/backend/app/core/cloud/storage.py +++ b/backend/app/core/cloud/storage.py @@ -21,6 +21,22 @@ def client(self): region_name=settings.AWS_DEFAULT_REGION, ) + def create(self): + try: + # does the bucket exist... + self.client.head_bucket(Bucket=settings.AWS_S3_BUCKET) + except ClientError as err: + response = int(err.response['Error']['Code']) + if response != 404: + raise + # ... if not create it + self.client.create_bucket( + Bucket=settings.AWS_S3_BUCKET, + CreateBucketConfiguration={ + 'LocationConstraint': settings.AWS_DEFAULT_REGION, + }, + ) + @dataclass(frozen=True) class SimpleStorageName: Key: str diff --git a/backend/app/initial_storage.py b/backend/app/initial_storage.py index eb89f601..436fb81f 100644 --- a/backend/app/initial_storage.py +++ b/backend/app/initial_storage.py @@ -10,20 +10,7 @@ def init() -> None: aws = AmazonCloudStorageClient() - try: - # does the bucket exist... - aws.client.head_bucket(Bucket=settings.AWS_S3_BUCKET) - except ClientError as err: - response = int(err.response['Error']['Code']) - if response != 404: - raise - # ... if not create it - aws.client.create_bucket( - Bucket=settings.AWS_S3_BUCKET, - CreateBucketConfiguration={ - 'LocationConstraint': settings.AWS_DEFAULT_REGION, - }, - ) + aws.create() def main() -> None: logger.info("START: setup cloud storage") From a5623deb1a6130abbeac97c28866f7032d5cbff8 Mon Sep 17 00:00:00 2001 From: Jerome White Date: Wed, 26 Mar 2025 14:26:45 +0530 Subject: [PATCH 70/88] Unused import --- backend/app/core/cloud/storage.py | 1 - 1 file changed, 1 deletion(-) diff --git a/backend/app/core/cloud/storage.py b/backend/app/core/cloud/storage.py index 4fd39e33..55c4a55b 100644 --- a/backend/app/core/cloud/storage.py +++ b/backend/app/core/cloud/storage.py @@ -1,4 +1,3 @@ -import logging import functools as ft from pathlib import Path from dataclasses import dataclass, asdict From 172e0b2187d689871c3287d7fb331814af296bb1 Mon Sep 17 00:00:00 2001 From: Jerome White Date: Wed, 26 Mar 2025 14:40:54 +0530 Subject: [PATCH 71/88] Return all information about an uploaded document --- backend/app/api/routes/documents.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/app/api/routes/documents.py b/backend/app/api/routes/documents.py index 29c1edbe..802dc7b6 100644 --- a/backend/app/api/routes/documents.py +++ b/backend/app/api/routes/documents.py @@ -48,7 +48,7 @@ def upload_doc( ) crud.update(document) - return document.id + return document @router.get("/rm/{doc_id}") def delete_doc( From cf84aaf01824369ebd734758dda5d84dcb94c55b Mon Sep 17 00:00:00 2001 From: Jerome White Date: Wed, 26 Mar 2025 14:41:18 +0530 Subject: [PATCH 72/88] Updates to Python packages * Bump boto3 version * Add moto dependency --- backend/pyproject.toml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/backend/pyproject.toml b/backend/pyproject.toml index d51fd0de..72f0143d 100644 --- a/backend/pyproject.toml +++ b/backend/pyproject.toml @@ -21,7 +21,8 @@ dependencies = [ "pydantic-settings<3.0.0,>=2.2.1", "sentry-sdk[fastapi]<2.0.0,>=1.40.6", "pyjwt<3.0.0,>=2.8.0", - "boto3>1.37.14" + "boto3>=1.37.20", + "moto[s3]>=5.1.1", ] [tool.uv] From 85b07be8231a0d7f6a007cbac4941495b38137da Mon Sep 17 00:00:00 2001 From: Jerome White Date: Wed, 26 Mar 2025 14:42:08 +0530 Subject: [PATCH 73/88] Test upload endpoint --- .../tests/api/routes/documents/test_upload.py | 90 +++++++++++++++++++ 1 file changed, 90 insertions(+) create mode 100644 backend/app/tests/api/routes/documents/test_upload.py diff --git a/backend/app/tests/api/routes/documents/test_upload.py b/backend/app/tests/api/routes/documents/test_upload.py new file mode 100644 index 00000000..d7ff7450 --- /dev/null +++ b/backend/app/tests/api/routes/documents/test_upload.py @@ -0,0 +1,90 @@ +import os +import mimetypes +from pathlib import Path +from tempfile import NamedTemporaryFile +from urllib.parse import urlparse + +import boto3 +import pytest +from sqlmodel import Session, select +from moto import mock_aws + +from app.core.cloud import AmazonCloudStorageClient +from app.core.config import settings +from app.models import Document +from app.tests.utils.document import ( + Route, + WebCrawler, + crawler, +) + +def upload(route: Route, scratch: Path, crawler: WebCrawler): + (mtype, _) = mimetypes.guess_type(str(scratch)) + with scratch.open('rb') as fp: + return crawler.client.post( + str(route), + headers=crawler.superuser_token_headers, + files={ + 'src': (str(scratch), fp, mtype), + }, + ) + +@pytest.fixture +def scratch(): + with NamedTemporaryFile(mode='w', suffix='.txt') as fp: + print('Hello World', file=fp, flush=True) + yield Path(fp.name) + +@pytest.fixture +def route(): + return Route('cp') + +@pytest.fixture(scope='class') +def aws_setup(): + os.environ['AWS_ACCESS_KEY_ID'] = 'testing' + os.environ['AWS_SECRET_ACCESS_KEY'] = 'testing' + os.environ['AWS_SECURITY_TOKEN'] = 'testing' + os.environ['AWS_SESSION_TOKEN'] = 'testing' + os.environ['AWS_DEFAULT_REGION'] = settings.AWS_DEFAULT_REGION + +@mock_aws +@pytest.mark.usefixtures('aws_setup') +class TestDocumentRouteUpload: + def test_adds_to_database( + self, + db: Session, + route: Route, + scratch: Path, + crawler: WebCrawler, + ): + aws = AmazonCloudStorageClient() + aws.create() + + response = upload(route, scratch, crawler) + doc_id = (response + .json() + .get('id')) + statement = ( + select(Document) + .where(Document.id == doc_id) + ) + result = db.exec(statement).one() + + assert result.fname == str(scratch) + + def test_adds_to_S3( + self, + route: Route, + scratch: Path, + crawler: Route, + ): + aws = AmazonCloudStorageClient() + aws.create() + + response = upload(route, scratch, crawler) + url = urlparse(response.json().get('object_store_url')) + key = Path(url.path) + key = key.relative_to(key.root) + + client = boto3.client('s3') + assert client.head_object(Bucket=url.netloc, Key=str(key)) From 3293d07fbc63059bb26ec59aed591866a3724cd1 Mon Sep 17 00:00:00 2001 From: Jerome White Date: Wed, 26 Mar 2025 15:25:28 +0530 Subject: [PATCH 74/88] Use object to upload documents --- .../tests/api/routes/documents/test_upload.py | 37 +++++++++++-------- 1 file changed, 21 insertions(+), 16 deletions(-) diff --git a/backend/app/tests/api/routes/documents/test_upload.py b/backend/app/tests/api/routes/documents/test_upload.py index d7ff7450..512bd0c6 100644 --- a/backend/app/tests/api/routes/documents/test_upload.py +++ b/backend/app/tests/api/routes/documents/test_upload.py @@ -6,8 +6,9 @@ import boto3 import pytest -from sqlmodel import Session, select from moto import mock_aws +from sqlmodel import Session, select +from fastapi.testclient import TestClient from app.core.cloud import AmazonCloudStorageClient from app.core.config import settings @@ -15,19 +16,19 @@ from app.tests.utils.document import ( Route, WebCrawler, - crawler, ) -def upload(route: Route, scratch: Path, crawler: WebCrawler): - (mtype, _) = mimetypes.guess_type(str(scratch)) - with scratch.open('rb') as fp: - return crawler.client.post( - str(route), - headers=crawler.superuser_token_headers, - files={ - 'src': (str(scratch), fp, mtype), - }, - ) +class WebUploader(WebCrawler): + def put(self, route: Route, scratch: Path): + (mtype, _) = mimetypes.guess_type(str(scratch)) + with scratch.open('rb') as fp: + return self.client.post( + str(route), + headers=self.superuser_token_headers, + files={ + 'src': (str(scratch), fp, mtype), + }, + ) @pytest.fixture def scratch(): @@ -39,6 +40,10 @@ def scratch(): def route(): return Route('cp') +@pytest.fixture +def uploader(client: TestClient, superuser_token_headers: dict[str, str]): + return WebUploader(client, superuser_token_headers) + @pytest.fixture(scope='class') def aws_setup(): os.environ['AWS_ACCESS_KEY_ID'] = 'testing' @@ -55,12 +60,12 @@ def test_adds_to_database( db: Session, route: Route, scratch: Path, - crawler: WebCrawler, + uploader: WebUploader, ): aws = AmazonCloudStorageClient() aws.create() - response = upload(route, scratch, crawler) + response = uploader.put(route, scratch) doc_id = (response .json() .get('id')) @@ -76,12 +81,12 @@ def test_adds_to_S3( self, route: Route, scratch: Path, - crawler: Route, + uploader: WebUploader, ): aws = AmazonCloudStorageClient() aws.create() - response = upload(route, scratch, crawler) + response = uploader.put(route, scratch) url = urlparse(response.json().get('object_store_url')) key = Path(url.path) key = key.relative_to(key.root) From 397e316d5694c547a2ef1890e73f82f22a4f9c2f Mon Sep 17 00:00:00 2001 From: Jerome White Date: Thu, 27 Mar 2025 11:58:50 +0530 Subject: [PATCH 75/88] General document test cleanup * Fewer fixtures that are more general * Remove class fixtures; assume someone else manages database state * Consolidate manual database interaction into a single class * Fix test class name clashes --- .../tests/api/routes/documents/test_delete.py | 23 +++-- .../app/tests/api/routes/documents/test_ls.py | 33 ++++--- .../tests/api/routes/documents/test_stat.py | 24 ++--- .../tests/api/routes/documents/test_upload.py | 7 +- .../app/tests/crud/documents/test_delete.py | 13 +-- .../tests/crud/documents/test_read_many.py | 96 +++++++------------ .../app/tests/crud/documents/test_read_one.py | 34 ++----- .../app/tests/crud/documents/test_update.py | 11 +-- backend/app/tests/utils/document.py | 62 ++++++------ 9 files changed, 127 insertions(+), 176 deletions(-) diff --git a/backend/app/tests/api/routes/documents/test_delete.py b/backend/app/tests/api/routes/documents/test_delete.py index 54a1d045..9ec82356 100644 --- a/backend/app/tests/api/routes/documents/test_delete.py +++ b/backend/app/tests/api/routes/documents/test_delete.py @@ -4,30 +4,26 @@ from app.models import Document from app.tests.utils.document import ( DocumentMaker, + DocumentStore, Route, WebCrawler, crawler, - insert_document, - rm_documents, ) @pytest.fixture def route(): return Route('rm') -@pytest.fixture -def document(db: Session): - rm_documents(db) - return insert_document(db) - class TestDocumentRouteDelete: def test_response_is_success( self, + db: Session, route: Route, crawler: WebCrawler, - document: Document, ): - response = crawler.get(route.append(document)) + store = DocumentStore(db) + response = crawler.get(route.append(store.put())) + assert response.is_success def test_item_is_soft_deleted( @@ -35,10 +31,11 @@ def test_item_is_soft_deleted( db: Session, route: Route, crawler: WebCrawler, - document: Document, ): - crawler.get(route.append(document)) + store = DocumentStore(db) + document = store.put() + crawler.get(route.append(document)) db.refresh(document) statement = ( select(Document) @@ -54,7 +51,9 @@ def test_cannot_delete_unknown_document( route: Route, crawler: WebCrawler, ): - rm_documents(db) + DocumentStore.clear(db) + maker = DocumentMaker(db) response = crawler.get(route.append(next(maker))) + assert response.is_error diff --git a/backend/app/tests/api/routes/documents/test_ls.py b/backend/app/tests/api/routes/documents/test_ls.py index 12a299fe..cad3811b 100644 --- a/backend/app/tests/api/routes/documents/test_ls.py +++ b/backend/app/tests/api/routes/documents/test_ls.py @@ -4,33 +4,35 @@ from app.models import Document from app.tests.utils.document import ( DocumentComparator, + DocumentStore, Route, WebCrawler, crawler, - insert_document, - rm_documents, ) -@pytest.fixture -def route(): - return Route('ls') +class QueryRoute(Route): + def pushq(self, key, value): + qs_args = self.qs_args | { + key: value, + } + return type(self)(self.endpoint, **qs_args) @pytest.fixture -def document(db: Session): - return insert_document(db) +def route(): + return QueryRoute('ls') class TestDocumentRouteList: - def test_response_is_success(self, route: Route, crawler: WebCrawler): + def test_response_is_success(self, route: QueryRoute, crawler: WebCrawler): response = crawler.get(route) assert response.is_success def test_empty_db_returns_empty_list( self, db: Session, - route: Route, + route: QueryRoute, crawler: WebCrawler, ): - rm_documents(db) + DocumentStore.clear(db) docs = (crawler .get(route) .json() @@ -40,22 +42,23 @@ def test_empty_db_returns_empty_list( def test_item_reflects_database( self, - route: Route, + db: Session, + route: QueryRoute, crawler: WebCrawler, - document: Document, ): + store = DocumentStore(db) + source = DocumentComparator(store.put()) target = (crawler .get(route) .json() .get('docs') .pop()) - source = DocumentComparator(document) assert source == target def test_negative_skip_produces_error( self, - route: Route, + route: QueryRoute, crawler: WebCrawler, ): response = crawler.get(route.pushq('skip', -1)) @@ -63,7 +66,7 @@ def test_negative_skip_produces_error( def test_negative_limit_produces_error( self, - route: Route, + route: QueryRoute, crawler: WebCrawler, ): response = crawler.get(route.pushq('limit', -1)) diff --git a/backend/app/tests/api/routes/documents/test_stat.py b/backend/app/tests/api/routes/documents/test_stat.py index 88b3f3e7..6071f672 100644 --- a/backend/app/tests/api/routes/documents/test_stat.py +++ b/backend/app/tests/api/routes/documents/test_stat.py @@ -5,42 +5,41 @@ from app.tests.utils.document import ( DocumentComparator, DocumentMaker, + DocumentStore, Route, WebCrawler, crawler, - insert_document, - rm_documents, ) @pytest.fixture def route(): return Route('stat') -@pytest.fixture -def document(db: Session): - rm_documents(db) - return insert_document(db) - class TestDocumentRouteStat: def test_response_is_success( self, + db: Session, route: Route, crawler: WebCrawler, - document: Document, ): - response = crawler.get(route.append(document)) + store = DocumentStore(db) + response = crawler.get(route.append(store.put())) + assert response.is_success def test_stat_reflects_database( self, + db: Session, route: Route, crawler: WebCrawler, - document: Document, ): + store = DocumentStore(db) + document = store.put() + source = DocumentComparator(document) + target = (crawler .get(route.append(document)) .json()) - source = DocumentComparator(document) assert source == target @@ -50,7 +49,8 @@ def test_cannot_stat_unknown_document( route: Route, crawler: Route, ): - rm_documents(db) + DocumentStore.clear(db) maker = DocumentMaker(db) response = crawler.get(route.append(next(maker))) + assert response.is_error diff --git a/backend/app/tests/api/routes/documents/test_upload.py b/backend/app/tests/api/routes/documents/test_upload.py index 512bd0c6..39a0ca72 100644 --- a/backend/app/tests/api/routes/documents/test_upload.py +++ b/backend/app/tests/api/routes/documents/test_upload.py @@ -45,7 +45,7 @@ def uploader(client: TestClient, superuser_token_headers: dict[str, str]): return WebUploader(client, superuser_token_headers) @pytest.fixture(scope='class') -def aws_setup(): +def aws_credentials(): os.environ['AWS_ACCESS_KEY_ID'] = 'testing' os.environ['AWS_SECRET_ACCESS_KEY'] = 'testing' os.environ['AWS_SECURITY_TOKEN'] = 'testing' @@ -53,7 +53,7 @@ def aws_setup(): os.environ['AWS_DEFAULT_REGION'] = settings.AWS_DEFAULT_REGION @mock_aws -@pytest.mark.usefixtures('aws_setup') +@pytest.mark.usefixtures('aws_credentials') class TestDocumentRouteUpload: def test_adds_to_database( self, @@ -91,5 +91,4 @@ def test_adds_to_S3( key = Path(url.path) key = key.relative_to(key.root) - client = boto3.client('s3') - assert client.head_object(Bucket=url.netloc, Key=str(key)) + assert aws.client.head_object(Bucket=url.netloc, Key=str(key)) diff --git a/backend/app/tests/crud/documents/test_delete.py b/backend/app/tests/crud/documents/test_delete.py index 5316eb3d..ee354acf 100644 --- a/backend/app/tests/crud/documents/test_delete.py +++ b/backend/app/tests/crud/documents/test_delete.py @@ -4,16 +4,12 @@ from app.crud import DocumentCrud from app.models import Document -from app.tests.utils.document import ( - clean_db_fixture, - insert_document, - rm_documents, -) +from app.tests.utils.document import DocumentStore @pytest.fixture def document(db: Session): - rm_documents(db) - document = insert_document(db) + store = DocumentStore(db) + document = store.put() crud = DocumentCrud(db) crud.delete(document.id, document.owner_id) @@ -24,8 +20,7 @@ def document(db: Session): ) return db.exec(statement).one() -@pytest.mark.usefixtures('clean_db_fixture') -class TestDatabaseUpdate: +class TestDatabaseDelete: def test_delete_is_soft(self, document: Document): assert document is not None diff --git a/backend/app/tests/crud/documents/test_read_many.py b/backend/app/tests/crud/documents/test_read_many.py index 0990be87..385273ff 100644 --- a/backend/app/tests/crud/documents/test_read_many.py +++ b/backend/app/tests/crud/documents/test_read_many.py @@ -6,115 +6,89 @@ from app.crud import DocumentCrud from app.tests.utils.document import ( - Constants, - clean_db_fixture, - get_user_id_by_email, - insert_documents, + DocumentStore, int_to_uuid, - rm_documents, ) -class DocumentManager: - def __init__(self, db: Session): - self.db = db - self.documents = None - - rm_documents(self.db) - - def __iter__(self): - if self.documents is None: - raise AttributeError() - yield from self.documents - - def add(self, n): - self.documents = list(insert_documents(self.db, n)) - -@pytest.fixture -def documents(db: Session): - return DocumentManager(db) - @pytest.fixture def crud(db: Session): return DocumentCrud(db) @pytest.fixture -def owner_id(db: Session): - return get_user_id_by_email(db) +def store(db: Session): + ds = DocumentStore(db) + for _ in ds.fill(TestDatabaseReadMany._ndocs): + pass + + return ds -@pytest.mark.usefixtures('clean_db_fixture') class TestDatabaseReadMany: + _ndocs = 10 + def test_number_read_is_expected( self, crud: DocumentCrud, - owner_id: UUID, - documents: DocumentManager, + store: DocumentStore, ): - documents.add(Constants.n_documents) - docs = crud.read_many(owner_id) - assert len(docs) == Constants.n_documents + docs = crud.read_many(store.owner) + assert len(docs) == self._ndocs - def test_deleted_docs_are_excluded(self, documents: DocumentManager): - documents.add(Constants.n_documents) - assert all(x.deleted_at is None for x in documents) + def test_deleted_docs_are_excluded( + self, + crud: DocumentCrud, + store: DocumentStore, + ): + assert all(x.deleted_at is None for x in crud.read_many(store.owner)) def test_skip_is_respected( self, crud: DocumentCrud, - owner_id: UUID, - documents: DocumentManager, + store: DocumentStore, ): - documents.add(Constants.n_documents) - skip = Constants.n_documents // 2 - doc_ids = set(x.id for x in crud.read_many(owner_id, skip=skip)) + skip = self._ndocs // 2 + doc_ids = set(x.id for x in crud.read_many(store.owner, skip=skip)) - for i in range(skip, Constants.n_documents): - doc = int_to_uuid(i) + for i in range(skip, self._ndocs): + doc = int_to_uuid(i) # see DocumentMaker assert doc in doc_ids def test_zero_skip_includes_all( self, crud: DocumentCrud, - owner_id: UUID, - documents: DocumentManager, + store: DocumentStore, ): - documents.add(Constants.n_documents) - docs = crud.read_many(owner_id, skip=0) - assert len(docs) == Constants.n_documents + docs = crud.read_many(store.owner, skip=0) + assert len(docs) == self._ndocs def test_negative_skip_raises_exception( self, crud: DocumentCrud, - owner_id: UUID, + store: DocumentStore, ): with pytest.raises(ValueError): - crud.read_many(owner_id, skip=-1) + crud.read_many(store.owner, skip=-1) def test_limit_is_respected( self, crud: DocumentCrud, - owner_id: UUID, - documents: DocumentManager, + store: DocumentStore, ): - documents.add(Constants.n_documents) - limit = Constants.n_documents // 2 - docs = crud.read_many(owner_id, limit=limit) + limit = self._ndocs // 2 + docs = crud.read_many(store.owner, limit=limit) assert len(docs) == limit def test_zero_limit_includes_nothing( self, crud: DocumentCrud, - owner_id: UUID, - documents: DocumentManager, + store: DocumentStore, ): - documents.add(Constants.n_documents) - docs = crud.read_many(owner_id, limit=0) - assert not docs + assert not crud.read_many(store.owner, limit=0) def test_negative_limit_raises_exception( self, crud: DocumentCrud, - owner_id: UUID, + store: DocumentStore, ): with pytest.raises(ValueError): - crud.read_many(owner_id, limit=-1) + crud.read_many(store.owner, limit=-1) diff --git a/backend/app/tests/crud/documents/test_read_one.py b/backend/app/tests/crud/documents/test_read_one.py index 53cff829..1f050481 100644 --- a/backend/app/tests/crud/documents/test_read_one.py +++ b/backend/app/tests/crud/documents/test_read_one.py @@ -4,38 +4,24 @@ from app.crud import DocumentCrud -from app.tests.utils.document import ( - clean_db_fixture, - insert_document, - int_to_uuid, - rm_documents, -) +from app.tests.utils.document import DocumentStore @pytest.fixture -def clean_db_fixture(db: Session): - rm_documents(db) - yield - rm_documents(db) +def store(db: Session): + return DocumentStore(db) -@pytest.mark.usefixtures('clean_db_fixture') class TestDatabaseReadOne: - def test_can_select_valid_id( - self, - db: Session, - clean_db_fixture: None, - ): + def test_can_select_valid_id(self, db: Session, store: DocumentStore): + document = store.put() + crud = DocumentCrud(db) - document = insert_document(db) result = crud.read_one(document.id) assert result.id == document.id - def test_cannot_select_invalid_id( - self, - db: Session, - clean_db_fixture: None, - ): + def test_cannot_select_invalid_id(self, db: Session, store: DocumentStore): + document = next(store.maker) + crud = DocumentCrud(db) - document_id = int_to_uuid(0) with pytest.raises(NoResultFound): - crud.read_one(document_id) + crud.read_one(document.id) diff --git a/backend/app/tests/crud/documents/test_update.py b/backend/app/tests/crud/documents/test_update.py index f547a79f..06634a78 100644 --- a/backend/app/tests/crud/documents/test_update.py +++ b/backend/app/tests/crud/documents/test_update.py @@ -3,22 +3,17 @@ from app.crud import DocumentCrud -from app.tests.utils.document import ( - DocumentMaker, - clean_db_fixture, - rm_documents, -) +from app.tests.utils.document import DocumentMaker, DocumentStore @pytest.fixture def documents(db: Session): - rm_documents(db) - return DocumentMaker(db) + store = DocumentStore(db) + return store.maker @pytest.fixture def crud(db: Session): return DocumentCrud(db) -@pytest.mark.usefixtures('clean_db_fixture') class TestDatabaseUpdate: def test_update_adds_one( self, diff --git a/backend/app/tests/utils/document.py b/backend/app/tests/utils/document.py index 0d7e14b3..46acecc5 100644 --- a/backend/app/tests/utils/document.py +++ b/backend/app/tests/utils/document.py @@ -23,25 +23,6 @@ def get_user_id_by_email(db: Session): def int_to_uuid(value): return UUID(int=value) -def rm_documents(db: Session): - db.exec(delete(Document)) - db.commit() - -def insert_documents(db: Session, n: int): - documents = DocumentMaker(db) - for (_, doc) in zip(range(n), documents): - db.add(doc) - db.commit() - db.refresh(doc) - yield doc - -def insert_document(db: Session): - (document, ) = insert_documents(db, 1) - return document - -class Constants: - n_documents = 10 - class DocumentMaker: def __init__(self, db: Session): self.owner_id = get_user_id_by_email(db) @@ -67,6 +48,37 @@ def get_and_increment(self): self.index += 1 return doc_id +class DocumentStore: + @staticmethod + def clear(db: Session): + db.exec(delete(Document)) + db.commit() + + @property + def owner(self): + return self.maker.owner_id + + def __init__(self, db: Session): + self.db = db + self.maker = DocumentMaker(self.db) + self.clear(self.db) + + def put(self): + doc = next(self.maker) + + self.db.add(doc) + self.db.commit() + self.db.refresh(doc) + + return doc + + def extend(self, n: int): + for _ in range(n): + yield self.put() + + def fill(self, n: int): + return list(self.extend(n)) + class Route: _empty = ParseResult(*it.repeat('', len(ParseResult._fields))) _root = Path(settings.API_V1_STR, 'documents') @@ -93,12 +105,6 @@ def append(self, doc: Document): endpoint = Path(self.endpoint, str(doc.id)) return type(self)(endpoint, **self.qs_args) - def pushq(self, key, value): - qs_args = self.qs_args | { - key: value, - } - return type(self)(self.endpoint, **qs_args) - @dataclass class WebCrawler: client: TestClient @@ -138,12 +144,6 @@ def to_dict(self): for (k, v) in document.items(): yield (k, self.to_string(v)) -@pytest.fixture(scope='class') -def clean_db_fixture(db: Session): - rm_documents(db) - yield - rm_documents(db) - @pytest.fixture def crawler(client: TestClient, superuser_token_headers: dict[str, str]): return WebCrawler(client, superuser_token_headers) From 318d803aab4fa9f4df61cd33156f31204501e962 Mon Sep 17 00:00:00 2001 From: Jerome White Date: Thu, 27 Mar 2025 12:16:29 +0530 Subject: [PATCH 76/88] Remove unused imports --- backend/app/tests/api/routes/documents/test_ls.py | 1 - backend/app/tests/api/routes/documents/test_stat.py | 1 - backend/app/tests/api/routes/documents/test_upload.py | 1 - backend/app/tests/crud/documents/test_read_many.py | 2 -- 4 files changed, 5 deletions(-) diff --git a/backend/app/tests/api/routes/documents/test_ls.py b/backend/app/tests/api/routes/documents/test_ls.py index cad3811b..3c5be821 100644 --- a/backend/app/tests/api/routes/documents/test_ls.py +++ b/backend/app/tests/api/routes/documents/test_ls.py @@ -1,7 +1,6 @@ import pytest from sqlmodel import Session -from app.models import Document from app.tests.utils.document import ( DocumentComparator, DocumentStore, diff --git a/backend/app/tests/api/routes/documents/test_stat.py b/backend/app/tests/api/routes/documents/test_stat.py index 6071f672..893430bc 100644 --- a/backend/app/tests/api/routes/documents/test_stat.py +++ b/backend/app/tests/api/routes/documents/test_stat.py @@ -1,7 +1,6 @@ import pytest from sqlmodel import Session -from app.models import Document from app.tests.utils.document import ( DocumentComparator, DocumentMaker, diff --git a/backend/app/tests/api/routes/documents/test_upload.py b/backend/app/tests/api/routes/documents/test_upload.py index 39a0ca72..73e03600 100644 --- a/backend/app/tests/api/routes/documents/test_upload.py +++ b/backend/app/tests/api/routes/documents/test_upload.py @@ -4,7 +4,6 @@ from tempfile import NamedTemporaryFile from urllib.parse import urlparse -import boto3 import pytest from moto import mock_aws from sqlmodel import Session, select diff --git a/backend/app/tests/crud/documents/test_read_many.py b/backend/app/tests/crud/documents/test_read_many.py index 385273ff..a3642673 100644 --- a/backend/app/tests/crud/documents/test_read_many.py +++ b/backend/app/tests/crud/documents/test_read_many.py @@ -1,5 +1,3 @@ -from uuid import UUID - import pytest from sqlmodel import Session From d39b03ef365497e8d9ea49042e0a524d849e3f84 Mon Sep 17 00:00:00 2001 From: Jerome White Date: Thu, 27 Mar 2025 13:03:35 +0530 Subject: [PATCH 77/88] Remove extraneous code --- backend/app/api/routes/documents.py | 7 ------- backend/app/core/cloud/storage.py | 3 +-- 2 files changed, 1 insertion(+), 9 deletions(-) diff --git a/backend/app/api/routes/documents.py b/backend/app/api/routes/documents.py index 802dc7b6..d96e027c 100644 --- a/backend/app/api/routes/documents.py +++ b/backend/app/api/routes/documents.py @@ -77,10 +77,3 @@ def doc_info( raise HTTPException(status_code=404, detail=str(err)) except MultipleResultsFound as err: raise HTTPException(status_code=500, detail=str(err)) - -# @router.get("/assign", response_model=DocumentList) -# def assign_doc( -# session: SessionDep, -# current_user: CurrentUser, -# ): -# pass diff --git a/backend/app/core/cloud/storage.py b/backend/app/core/cloud/storage.py index 55c4a55b..e7a09427 100644 --- a/backend/app/core/cloud/storage.py +++ b/backend/app/core/cloud/storage.py @@ -72,12 +72,11 @@ def put(self, source: UploadFile, basename: str): destination = SimpleStorageName(str(key)) kwargs = asdict(destination) - metadata = self.user.model_dump() try: self.aws.client.upload_fileobj( source.file, ExtraArgs={ - # 'Metadata': metadata, + # 'Metadata': self.user.model_dump(), 'ContentType': source.content_type, }, **kwargs, From 847eed945c7b1ecfa2772d7b4deca60fddd1956b Mon Sep 17 00:00:00 2001 From: Jerome White Date: Thu, 27 Mar 2025 15:36:11 +0530 Subject: [PATCH 78/88] Document crud takes respects user throughout Each method takes into account whether the client is allowed to access the documents they are trying to touch. To do this, owner is dropped from the method signatures; instead being specified at construction. --- backend/app/crud/document.py | 32 ++++++++++++++++++++------------ 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/backend/app/crud/document.py b/backend/app/crud/document.py index 4622b08f..295acd1d 100644 --- a/backend/app/crud/document.py +++ b/backend/app/crud/document.py @@ -6,29 +6,31 @@ from app.models import Document, DocumentList from app.core.util import now -class CrudObject: - def __init__(self, session: Session): +class DocumentCrud: + def __init__(self, session: Session, owner_id: UUID): self.session = session + self.owner_id = owner_id -class DocumentCrud(CrudObject): def read_one(self, doc_id: UUID): statement = ( select(Document) - .where(Document.id == doc_id) + .where(and_( + Document.owner_id == self.owner_id, + Document.id == doc_id, + )) ) return self.session.exec(statement).one() def read_many( self, - owner_id: UUID, skip: Optional[int] = None, limit: Optional[int] = None, ): statement = ( select(Document) .where(and_( - Document.owner_id == owner_id, + Document.owner_id == self.owner_id, Document.deleted_at.is_(None), )) ) @@ -40,22 +42,28 @@ def read_many( if limit < 0: raise ValueError(f'Negative limit: {limit}') statement = statement.limit(limit) - docs = self.session.exec(statement).all() return DocumentList(docs=docs) def update(self, document: Document): + if not document.owner_id: + document.owner_id = self.owner_id + elif document.owner_id != self.owner_id: + error = 'Invalid document ownership: owner={} attempter={}'.format( + self.owner_id, + document.owner_id, + ) + raise PermissionError(error) + self.session.add(document) self.session.commit() self.session.refresh(document) return document - def delete(self, doc_id: UUID, owner_id: UUID): + def delete(self, doc_id: UUID): document = self.read_one(doc_id) - if document.owner_id != owner_id: - error = f'User {owner_id} does not own document {doc_id}' - raise PermissionError(error) document.deleted_at = now() - self.update(document) + + return self.update(document) From cb5e9948e5a46c687c7ffabfe362e71e977c025b Mon Sep 17 00:00:00 2001 From: Jerome White Date: Thu, 27 Mar 2025 15:38:21 +0530 Subject: [PATCH 79/88] Routes respect new document CRUD interface --- backend/app/api/routes/documents.py | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/backend/app/api/routes/documents.py b/backend/app/api/routes/documents.py index d96e027c..6b13d750 100644 --- a/backend/app/api/routes/documents.py +++ b/backend/app/api/routes/documents.py @@ -20,9 +20,9 @@ def list_docs( skip: int = 0, limit: int = 100, ): - crud = DocumentCrud(session) + crud = DocumentCrud(session, current_user.id) try: - return crud.read_many(current_user.id, skip, limit) + return crud.read_many(skip, limit) except ValueError as err: raise HTTPException(status_code=500, detail=str(err)) @@ -39,16 +39,14 @@ def upload_doc( except ConnectionError as err: raise HTTPException(status_code=500, detail=str(err)) - crud = DocumentCrud(session) + crud = DocumentCrud(session, current_user.id) document = Document( id=basename, - owner_id=current_user.id, fname=src.filename, - object_store_url=str(object_store_url), + object_store_url=str(object_store_url) ) - crud.update(document) - return document + return crud.update(document) @router.get("/rm/{doc_id}") def delete_doc( @@ -56,10 +54,10 @@ def delete_doc( current_user: CurrentUser, doc_id: UUID, ): - crud = DocumentCrud(session) + crud = DocumentCrud(session, current_user.id) try: - return crud.delete(doc_id, current_user.id) - except (NoResultFound, PermissionError) as err: + return crud.delete(doc_id) + except NoResultFound as err: raise HTTPException(status_code=404, detail=str(err)) # TODO: perform delete on the collection @@ -70,7 +68,7 @@ def doc_info( current_user: CurrentUser, doc_id: UUID, ): - crud = DocumentCrud(session) + crud = DocumentCrud(session, current_user.id) try: return crud.read_one(doc_id) except NoResultFound as err: From a2dbcdff402a7dd8b16b0ac4f9fd877d9e6ff114 Mon Sep 17 00:00:00 2001 From: Jerome White Date: Thu, 27 Mar 2025 15:38:59 +0530 Subject: [PATCH 80/88] Integer to UUID now a standalone generator --- backend/app/tests/utils/document.py | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/backend/app/tests/utils/document.py b/backend/app/tests/utils/document.py index 46acecc5..61710309 100644 --- a/backend/app/tests/utils/document.py +++ b/backend/app/tests/utils/document.py @@ -19,20 +19,31 @@ def get_user_id_by_email(db: Session): user = get_user_by_email(session=db, email=settings.FIRST_SUPERUSER) return user.id -@ft.cache -def int_to_uuid(value): - return UUID(int=value) +class DocumentIndexGenerator: + def __init__(self, start=0): + self.start = start + + def __iter__(self): + return self + + def __next__(self): + uu_id = self.peek() + self.start += 1 + return uu_id + + def peek(self): + return UUID(int=self.start) class DocumentMaker: def __init__(self, db: Session): self.owner_id = get_user_id_by_email(db) - self.index = 0 + self.index = DocumentIndexGenerator() def __iter__(self): return self def __next__(self): - doc_id = self.get_and_increment() + doc_id = next(self.index) args = str(doc_id).split('-') fname = Path('/', *args).with_suffix('.xyz') @@ -43,11 +54,6 @@ def __next__(self): object_store_url=fname.as_uri(), ) - def get_and_increment(self): - doc_id = int_to_uuid(self.index) - self.index += 1 - return doc_id - class DocumentStore: @staticmethod def clear(db: Session): From 3ad53f0e8ff426a9b413a0a92dff7d478859ed1c Mon Sep 17 00:00:00 2001 From: Jerome White Date: Thu, 27 Mar 2025 15:39:10 +0530 Subject: [PATCH 81/88] Better variable naming --- backend/app/tests/utils/document.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/backend/app/tests/utils/document.py b/backend/app/tests/utils/document.py index 61710309..3ee4111c 100644 --- a/backend/app/tests/utils/document.py +++ b/backend/app/tests/utils/document.py @@ -62,15 +62,15 @@ def clear(db: Session): @property def owner(self): - return self.maker.owner_id + return self.documents.owner_id def __init__(self, db: Session): self.db = db - self.maker = DocumentMaker(self.db) + self.documents = DocumentMaker(self.db) self.clear(self.db) def put(self): - doc = next(self.maker) + doc = next(self.documents) self.db.add(doc) self.db.commit() From 8b7637fd116f3083a4c27449281d607b222548fa Mon Sep 17 00:00:00 2001 From: Jerome White Date: Thu, 27 Mar 2025 15:40:11 +0530 Subject: [PATCH 82/88] Tests take into account new document CRUD interface --- .../app/tests/crud/documents/test_delete.py | 14 ++++- .../tests/crud/documents/test_read_many.py | 53 ++++++++++--------- .../app/tests/crud/documents/test_read_one.py | 18 +++++-- .../app/tests/crud/documents/test_update.py | 50 ++++++++++++----- 4 files changed, 90 insertions(+), 45 deletions(-) diff --git a/backend/app/tests/crud/documents/test_delete.py b/backend/app/tests/crud/documents/test_delete.py index ee354acf..4d78c6f8 100644 --- a/backend/app/tests/crud/documents/test_delete.py +++ b/backend/app/tests/crud/documents/test_delete.py @@ -1,5 +1,6 @@ import pytest from sqlmodel import Session, select +from sqlalchemy.exc import NoResultFound from app.crud import DocumentCrud from app.models import Document @@ -11,8 +12,8 @@ def document(db: Session): store = DocumentStore(db) document = store.put() - crud = DocumentCrud(db) - crud.delete(document.id, document.owner_id) + crud = DocumentCrud(db, document.owner_id) + crud.delete(document.id) statement = ( select(Document) @@ -29,3 +30,12 @@ def test_delete_marks_deleted(self, document: Document): def test_delete_follows_insert(self, document: Document): assert document.created_at <= document.deleted_at + + def test_cannot_delete_others_documents(self, db: Session): + store = DocumentStore(db) + document = store.put() + other_owner_id = store.documents.index.peek() + + crud = DocumentCrud(db, other_owner_id) + with pytest.raises(NoResultFound): + crud.delete(document.id) diff --git a/backend/app/tests/crud/documents/test_read_many.py b/backend/app/tests/crud/documents/test_read_many.py index a3642673..bc1592b3 100644 --- a/backend/app/tests/crud/documents/test_read_many.py +++ b/backend/app/tests/crud/documents/test_read_many.py @@ -3,14 +3,7 @@ from app.crud import DocumentCrud -from app.tests.utils.document import ( - DocumentStore, - int_to_uuid, -) - -@pytest.fixture -def crud(db: Session): - return DocumentCrud(db) +from app.tests.utils.document import DocumentStore, DocumentIndexGenerator @pytest.fixture def store(db: Session): @@ -25,68 +18,76 @@ class TestDatabaseReadMany: def test_number_read_is_expected( self, - crud: DocumentCrud, + db: Session, store: DocumentStore, ): - docs = crud.read_many(store.owner) + crud = DocumentCrud(db, store.owner) + docs = crud.read_many() assert len(docs) == self._ndocs def test_deleted_docs_are_excluded( self, - crud: DocumentCrud, + db: Session, store: DocumentStore, ): - assert all(x.deleted_at is None for x in crud.read_many(store.owner)) + crud = DocumentCrud(db, store.owner) + assert all(x.deleted_at is None for x in crud.read_many()) def test_skip_is_respected( self, - crud: DocumentCrud, + db: Session, store: DocumentStore, ): + crud = DocumentCrud(db, store.owner) skip = self._ndocs // 2 - doc_ids = set(x.id for x in crud.read_many(store.owner, skip=skip)) + doc_ids = set(x.id for x in crud.read_many(skip=skip)) + index = DocumentIndexGenerator(skip) - for i in range(skip, self._ndocs): - doc = int_to_uuid(i) # see DocumentMaker + for (_, doc) in zip(range(skip, self._ndocs), index): assert doc in doc_ids def test_zero_skip_includes_all( self, - crud: DocumentCrud, + db: Session, store: DocumentStore, ): - docs = crud.read_many(store.owner, skip=0) + crud = DocumentCrud(db, store.owner) + docs = crud.read_many(skip=0) assert len(docs) == self._ndocs def test_negative_skip_raises_exception( self, - crud: DocumentCrud, + db: Session, store: DocumentStore, ): + crud = DocumentCrud(db, store.owner) with pytest.raises(ValueError): - crud.read_many(store.owner, skip=-1) + crud.read_many(skip=-1) def test_limit_is_respected( self, - crud: DocumentCrud, + db: Session, store: DocumentStore, ): + crud = DocumentCrud(db, store.owner) limit = self._ndocs // 2 - docs = crud.read_many(store.owner, limit=limit) + docs = crud.read_many(limit=limit) assert len(docs) == limit def test_zero_limit_includes_nothing( self, - crud: DocumentCrud, + db: Session, store: DocumentStore, ): - assert not crud.read_many(store.owner, limit=0) + crud = DocumentCrud(db, store.owner) + assert not crud.read_many(limit=0) def test_negative_limit_raises_exception( self, - crud: DocumentCrud, + db: Session, store: DocumentStore, ): + crud = DocumentCrud(db, store.owner) with pytest.raises(ValueError): - crud.read_many(store.owner, limit=-1) + crud.read_many(limit=-1) diff --git a/backend/app/tests/crud/documents/test_read_one.py b/backend/app/tests/crud/documents/test_read_one.py index 1f050481..0d25055e 100644 --- a/backend/app/tests/crud/documents/test_read_one.py +++ b/backend/app/tests/crud/documents/test_read_one.py @@ -14,14 +14,26 @@ class TestDatabaseReadOne: def test_can_select_valid_id(self, db: Session, store: DocumentStore): document = store.put() - crud = DocumentCrud(db) + crud = DocumentCrud(db, store.owner) result = crud.read_one(document.id) assert result.id == document.id def test_cannot_select_invalid_id(self, db: Session, store: DocumentStore): - document = next(store.maker) + document = next(store.documents) - crud = DocumentCrud(db) + crud = DocumentCrud(db, store.owner) + with pytest.raises(NoResultFound): + crud.read_one(document.id) + + def test_cannot_read_others_documents( + self, + db: Session, + store: DocumentStore, + ): + document = store.put() + other = DocumentStore(db) + + crud = DocumentCrud(db, other.owner) with pytest.raises(NoResultFound): crud.read_one(document.id) diff --git a/backend/app/tests/crud/documents/test_update.py b/backend/app/tests/crud/documents/test_update.py index 06634a78..6a14217c 100644 --- a/backend/app/tests/crud/documents/test_update.py +++ b/backend/app/tests/crud/documents/test_update.py @@ -8,36 +8,58 @@ @pytest.fixture def documents(db: Session): store = DocumentStore(db) - return store.maker - -@pytest.fixture -def crud(db: Session): - return DocumentCrud(db) + return store.documents class TestDatabaseUpdate: - def test_update_adds_one( - self, - crud: DocumentCrud, - documents: DocumentMaker, - ): - before = crud.read_many(documents.owner_id) + def test_update_adds_one(self, db: Session, documents: DocumentMaker): + crud = DocumentCrud(db, documents.owner_id) + + before = crud.read_many() crud.update(next(documents)) - after = crud.read_many(documents.owner_id) + after = crud.read_many() assert len(before) + 1 == len(after) def test_sequential_update_is_ordered( self, - crud: DocumentCrud, + db: Session, documents: DocumentMaker, ): + crud = DocumentCrud(db, documents.owner_id) (a, b) = (crud.update(y) for (_, y) in zip(range(2), documents)) + assert a.created_at <= b.created_at def test_insert_does_not_delete( self, - crud: DocumentCrud, + db: Session, documents: DocumentMaker, ): + crud = DocumentCrud(db, documents.owner_id) document = crud.update(next(documents)) + assert document.deleted_at is None + + def test_update_sets_default_owner( + self, + db: Session, + documents: DocumentMaker, + ): + crud = DocumentCrud(db, documents.owner_id) + document = next(documents) + document.owner_id = None + result = crud.update(document) + + assert result.owner_id == document.owner_id + + def test_update_respects_owner( + self, + db: Session, + documents: DocumentMaker, + ): + document = next(documents) + document.owner_id = documents.index.peek() + + crud = DocumentCrud(db, documents.owner_id) + with pytest.raises(PermissionError): + crud.update(document) From 96472e180671c741dc58f84bf3ee99277f0c6994 Mon Sep 17 00:00:00 2001 From: Jerome White Date: Thu, 27 Mar 2025 15:46:11 +0530 Subject: [PATCH 83/88] Unused imports --- backend/app/api/routes/documents.py | 1 - backend/app/crud/document.py | 2 +- backend/app/models/document.py | 3 +-- 3 files changed, 2 insertions(+), 4 deletions(-) diff --git a/backend/app/api/routes/documents.py b/backend/app/api/routes/documents.py index 6b13d750..4f0d1541 100644 --- a/backend/app/api/routes/documents.py +++ b/backend/app/api/routes/documents.py @@ -1,5 +1,4 @@ from uuid import UUID, uuid4 -from pathlib import Path from fastapi import APIRouter, File, UploadFile, HTTPException diff --git a/backend/app/crud/document.py b/backend/app/crud/document.py index 295acd1d..d4ead6cb 100644 --- a/backend/app/crud/document.py +++ b/backend/app/crud/document.py @@ -1,7 +1,7 @@ from uuid import UUID from typing import Optional -from sqlmodel import Session, select, update, and_ +from sqlmodel import Session, select, and_ from app.models import Document, DocumentList from app.core.util import now diff --git a/backend/app/models/document.py b/backend/app/models/document.py index e122bc71..363eb6fb 100644 --- a/backend/app/models/document.py +++ b/backend/app/models/document.py @@ -1,11 +1,10 @@ from uuid import UUID, uuid4 -from pathlib import Path from datetime import datetime from sqlmodel import Field, Relationship, SQLModel -from .user import User from app.core.util import now +from .user import User class Document(SQLModel, table=True): id: UUID = Field( From 20ddd201b8828ea90c6f5aa04fad2b1a8065ed2c Mon Sep 17 00:00:00 2001 From: Jerome White Date: Fri, 28 Mar 2025 11:34:14 +0530 Subject: [PATCH 84/88] More descriptive AWS error type --- backend/app/core/cloud/__init__.py | 6 +++++- backend/app/core/cloud/storage.py | 7 +++++-- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/backend/app/core/cloud/__init__.py b/backend/app/core/cloud/__init__.py index cf8fea60..cde54f38 100644 --- a/backend/app/core/cloud/__init__.py +++ b/backend/app/core/cloud/__init__.py @@ -1 +1,5 @@ -from .storage import AmazonCloudStorage, AmazonCloudStorageClient +from .storage import ( + AmazonCloudStorage, + AmazonCloudStorageClient, + CloudStorageError, +) diff --git a/backend/app/core/cloud/storage.py b/backend/app/core/cloud/storage.py index e7a09427..7dc83fc9 100644 --- a/backend/app/core/cloud/storage.py +++ b/backend/app/core/cloud/storage.py @@ -10,6 +10,9 @@ from app.api.deps import CurrentUser from app.core.config import settings +class CloudStorageError(Exception): + pass + class AmazonCloudStorageClient: @ft.cached_property def client(self): @@ -27,7 +30,7 @@ def create(self): except ClientError as err: response = int(err.response['Error']['Code']) if response != 404: - raise + raise CloudStorageError(err) from err # ... if not create it self.client.create_bucket( Bucket=settings.AWS_S3_BUCKET, @@ -82,6 +85,6 @@ def put(self, source: UploadFile, basename: str): **kwargs, ) except ClientError as err: - raise ConnectionError(f'AWS Error: "{err}"') from err + raise CloudStorageError(f'AWS Error: "{err}"') from err return destination From 33cac516cecdd521c5c5a67e872434aa7bfbd3cc Mon Sep 17 00:00:00 2001 From: Jerome White Date: Fri, 28 Mar 2025 11:34:41 +0530 Subject: [PATCH 85/88] Catch generic exceptions to ensure service does not go down --- backend/app/api/routes/documents.py | 31 ++++++++++++++++++++++------- 1 file changed, 24 insertions(+), 7 deletions(-) diff --git a/backend/app/api/routes/documents.py b/backend/app/api/routes/documents.py index 4f0d1541..cd1a54aa 100644 --- a/backend/app/api/routes/documents.py +++ b/backend/app/api/routes/documents.py @@ -1,16 +1,20 @@ +import warnings from uuid import UUID, uuid4 from fastapi import APIRouter, File, UploadFile, HTTPException -from sqlalchemy.exc import NoResultFound, MultipleResultsFound +from sqlalchemy.exc import NoResultFound, MultipleResultsFound, SQLAlchemyError from app.crud import DocumentCrud from app.models import Document, DocumentList from app.api.deps import CurrentUser, SessionDep -from app.core.cloud import AmazonCloudStorage +from app.core.cloud import AmazonCloudStorage, CloudStorageError router = APIRouter(prefix="/documents", tags=["documents"]) +def raise_from_unknown(error: Exception): + warnings.warn(f'Unexpected exception "{type(err).__name__}": {err}') + raise HTTPException(status_code=500, detail=str(err)) @router.get("/ls", response_model=DocumentList) def list_docs( @@ -22,8 +26,10 @@ def list_docs( crud = DocumentCrud(session, current_user.id) try: return crud.read_many(skip, limit) - except ValueError as err: + except (ValueError, SQLAlchemyError) as err: raise HTTPException(status_code=500, detail=str(err)) + except Exception as err: + raise_from_unknown(err) @router.post("/cp") def upload_doc( @@ -35,8 +41,10 @@ def upload_doc( basename = uuid4() try: object_store_url = storage.put(src, str(basename)) - except ConnectionError as err: - raise HTTPException(status_code=500, detail=str(err)) + except CloudStorageError as err: + raise HTTPException(status_code=503, detail=str(err)) + except Exception as err: + raise_from_unknown(err) crud = DocumentCrud(session, current_user.id) document = Document( @@ -45,7 +53,12 @@ def upload_doc( object_store_url=str(object_store_url) ) - return crud.update(document) + try: + return crud.update(document) + except SQLAlchemyError as err: + raise HTTPException(status_code=503, detail=str(err)) + except Exception as err: + raise_from_unknown(err) @router.get("/rm/{doc_id}") def delete_doc( @@ -58,6 +71,8 @@ def delete_doc( return crud.delete(doc_id) except NoResultFound as err: raise HTTPException(status_code=404, detail=str(err)) + except Exception as err: + raise_from_unknown(err) # TODO: perform delete on the collection @@ -73,4 +88,6 @@ def doc_info( except NoResultFound as err: raise HTTPException(status_code=404, detail=str(err)) except MultipleResultsFound as err: - raise HTTPException(status_code=500, detail=str(err)) + raise HTTPException(status_code=503, detail=str(err)) + except Exception as err: + raise_from_unknown(err) From 0bf097a68da5e973e3251327adee00b3c60a6360 Mon Sep 17 00:00:00 2001 From: Jerome White Date: Fri, 28 Mar 2025 12:00:13 +0530 Subject: [PATCH 86/88] Ensure boto3 respects environment The preference for client creation is to take parameters values from the environment; platform "settings" are a backup. This ensures that when mocking S3, things work as expected. --- backend/app/core/cloud/storage.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/backend/app/core/cloud/storage.py b/backend/app/core/cloud/storage.py index 7dc83fc9..bb1a2a44 100644 --- a/backend/app/core/cloud/storage.py +++ b/backend/app/core/cloud/storage.py @@ -1,3 +1,4 @@ +import os import functools as ft from pathlib import Path from dataclasses import dataclass, asdict @@ -16,13 +17,18 @@ class CloudStorageError(Exception): class AmazonCloudStorageClient: @ft.cached_property def client(self): - return boto3.client( - 's3', - aws_access_key_id=settings.AWS_ACCESS_KEY_ID, - aws_secret_access_key=settings.AWS_SECRET_ACCESS_KEY, - region_name=settings.AWS_DEFAULT_REGION, + kwargs = {} + cred_params = ( + ('aws_access_key_id', 'AWS_ACCESS_KEY_ID'), + ('aws_secret_access_key', 'AWS_SECRET_ACCESS_KEY'), + ('region_name', 'AWS_DEFAULT_REGION'), ) + for (i, j) in cred_params: + kwargs[i] = os.environ.get(j, getattr(settings, j)) + + return boto3.client('s3', **kwargs) + def create(self): try: # does the bucket exist... From 8eb7ae25ee214d48ef23d5b94d0379aad6a50bdf Mon Sep 17 00:00:00 2001 From: Jerome White Date: Fri, 28 Mar 2025 12:01:54 +0530 Subject: [PATCH 87/88] Log cloud storage errors during startup --- backend/app/initial_storage.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/backend/app/initial_storage.py b/backend/app/initial_storage.py index 436fb81f..4b48a2a5 100644 --- a/backend/app/initial_storage.py +++ b/backend/app/initial_storage.py @@ -2,7 +2,7 @@ from botocore.exceptions import ClientError -from app.core.cloud import AmazonCloudStorageClient +from app.core.cloud import AmazonCloudStorageClient, CloudStorageError from app.core.config import settings logging.basicConfig(level=logging.INFO) @@ -10,7 +10,10 @@ def init() -> None: aws = AmazonCloudStorageClient() - aws.create() + try: + aws.create() + except CloudStorageError as err: + logging.error(err) def main() -> None: logger.info("START: setup cloud storage") From df9e0d14c26345c7f2a6cf17107113502b0a1625 Mon Sep 17 00:00:00 2001 From: Jerome White Date: Fri, 28 Mar 2025 12:07:49 +0530 Subject: [PATCH 88/88] Corrected variable naming --- backend/app/api/routes/documents.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/backend/app/api/routes/documents.py b/backend/app/api/routes/documents.py index cd1a54aa..185a6144 100644 --- a/backend/app/api/routes/documents.py +++ b/backend/app/api/routes/documents.py @@ -13,8 +13,11 @@ router = APIRouter(prefix="/documents", tags=["documents"]) def raise_from_unknown(error: Exception): - warnings.warn(f'Unexpected exception "{type(err).__name__}": {err}') - raise HTTPException(status_code=500, detail=str(err)) + warnings.warn('Unexpected exception "{}": {}'.format( + type(error).__name__, + error, + )) + raise HTTPException(status_code=500, detail=str(error)) @router.get("/ls", response_model=DocumentList) def list_docs(