Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
88 commits
Select commit Hold shift + click to select a range
1d53062
Cleaner syntax
jerome-white Mar 12, 2025
e8f0d9c
Interface for cloud storage functionality
jerome-white Mar 17, 2025
3a8e62a
Create module dedicated to cloud functionality
jerome-white Mar 17, 2025
41e4652
Take the user in the constructor
jerome-white Mar 17, 2025
6af4395
Rename the document list route
jerome-white Mar 17, 2025
4cbb94f
Implementation of file upload
jerome-white Mar 17, 2025
56f2887
Default placeholders for AWS variables
jerome-white Mar 17, 2025
235ef85
Take AWS credentials from .env
jerome-white Mar 17, 2025
303c981
Perform bucket creation at startup
jerome-white Mar 19, 2025
11e3d54
Use a single bucket for all clients
jerome-white Mar 19, 2025
0973953
Changes to support new bucket semantics
jerome-white Mar 19, 2025
0350ce2
Missing imports
jerome-white Mar 19, 2025
d1e97e1
Unused imports
jerome-white Mar 19, 2025
c5319cb
Lift timestamp generation to common location
jerome-white Mar 19, 2025
9607567
Timestamp generation is global resource
jerome-white Mar 19, 2025
4d7c7c7
Updating a document is not yet supported
jerome-white Mar 19, 2025
6e118c6
Flesh out document remove and stat
jerome-white Mar 19, 2025
a4510ff
Must specify region when creating a bucket
jerome-white Mar 19, 2025
31e5fae
Add boto3 requirement
jerome-white Mar 19, 2025
18654ba
Repeating AWS environment variables in the settings
jerome-white Mar 19, 2025
67274e6
Client expected to pass the basename of the destination
jerome-white Mar 19, 2025
fa14685
Corrected upload file specification
jerome-white Mar 19, 2025
483eb3d
Build basename that matches the UUID expectation of the model
jerome-white Mar 19, 2025
a0f3e98
SQLAlchemy cannot process Path types natively
jerome-white Mar 19, 2025
d4ba9b9
Ensure document ID is passed to route body
jerome-white Mar 19, 2025
ca87b6b
Corrected database interaction when deleting
jerome-white Mar 19, 2025
7a883e6
More graceful handling of non singular results
jerome-white Mar 19, 2025
df5338c
Move document database interactions to CRUD
jerome-white Mar 22, 2025
6d5dda7
Ignore Emacs backup files
jerome-white Mar 24, 2025
0fd2bf7
Allow document list to be iterable
jerome-white Mar 24, 2025
0a5c896
Corrected parameter naming
jerome-white Mar 24, 2025
7df6771
Initial document CRUD tests
jerome-white Mar 24, 2025
429a162
Document update returns inserted document
jerome-white Mar 24, 2025
83c08be
Lift document creation function
jerome-white Mar 24, 2025
1eead72
Test document update
jerome-white Mar 24, 2025
c2225b5
Whitespace
jerome-white Mar 24, 2025
912568d
Linted
jerome-white Mar 24, 2025
85742ed
Corrected update_at test
jerome-white Mar 24, 2025
8ad1ec8
Appropriate class variable naming
jerome-white Mar 24, 2025
7761ecd
Move document creation into a class
jerome-white Mar 24, 2025
d3e0c40
Linted
jerome-white Mar 24, 2025
0b88123
Test document CRUD delete
jerome-white Mar 24, 2025
6e16179
All test methods cleanup after themselves
jerome-white Mar 24, 2025
6f9f6cd
Gracefully handle negative skip's and limit's
jerome-white Mar 25, 2025
7d07223
Fixture usage is more explicit and straightforward
jerome-white Mar 25, 2025
0a57ce6
Better usage of fixtures
jerome-white Mar 25, 2025
2ee4787
Import reordering
jerome-white Mar 25, 2025
5e29a5a
Lift document crud testing utils to global testing utilities
jerome-white Mar 25, 2025
11ee2c9
Take read error into account
jerome-white Mar 25, 2025
a81bd49
Test document list route
jerome-white Mar 25, 2025
ddc6cf5
Simplify test parameters
jerome-white Mar 25, 2025
0fefd19
Lift common document endpoint testing resources to utils
jerome-white Mar 25, 2025
db1d908
Return number of rows deleted
jerome-white Mar 25, 2025
d6854f0
Test document endpoint deletion
jerome-white Mar 25, 2025
0232620
Consisten Session variable naming
jerome-white Mar 25, 2025
4955bed
Special list document type
jerome-white Mar 25, 2025
d6acea7
Ability to add component to URL path
jerome-white Mar 26, 2025
094d122
Corrections to injected types
jerome-white Mar 26, 2025
86933f9
Simplification of Route semantics
jerome-white Mar 26, 2025
a01db58
Linted
jerome-white Mar 26, 2025
6253c51
Delete uses update
jerome-white Mar 26, 2025
9349d88
Lift document comparison to test utilities
jerome-white Mar 26, 2025
2bcd66c
Tests assert
jerome-white Mar 26, 2025
29f10c1
Must refresh the session before interacting with the database
jerome-white Mar 26, 2025
ecf385a
Tests for document stat route
jerome-white Mar 26, 2025
e9a280b
Linted
jerome-white Mar 26, 2025
ade27ff
Better temporary bucket naming
jerome-white Mar 26, 2025
8dd5bc1
Move from deprecated way of Pydantic to dict
jerome-white Mar 26, 2025
43d8d40
Lift bucket creation to global module
jerome-white Mar 26, 2025
a5623de
Unused import
jerome-white Mar 26, 2025
172e0b2
Return all information about an uploaded document
jerome-white Mar 26, 2025
cf84aaf
Updates to Python packages
jerome-white Mar 26, 2025
85b07be
Test upload endpoint
jerome-white Mar 26, 2025
3293d07
Use object to upload documents
jerome-white Mar 26, 2025
397e316
General document test cleanup
jerome-white Mar 27, 2025
318d803
Remove unused imports
jerome-white Mar 27, 2025
d39b03e
Remove extraneous code
jerome-white Mar 27, 2025
847eed9
Document crud takes respects user throughout
jerome-white Mar 27, 2025
cb5e994
Routes respect new document CRUD interface
jerome-white Mar 27, 2025
a2dbcdf
Integer to UUID now a standalone generator
jerome-white Mar 27, 2025
3ad53f0
Better variable naming
jerome-white Mar 27, 2025
8b7637f
Tests take into account new document CRUD interface
jerome-white Mar 27, 2025
96472e1
Unused imports
jerome-white Mar 27, 2025
20ddd20
More descriptive AWS error type
jerome-white Mar 28, 2025
33cac51
Catch generic exceptions to ensure service does not go down
jerome-white Mar 28, 2025
0bf097a
Ensure boto3 respects environment
jerome-white Mar 28, 2025
8eb7ae2
Log cloud storage errors during startup
jerome-white Mar 28, 2025
df9e0d1
Corrected variable naming
jerome-white Mar 28, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .env
Original file line number Diff line number Diff line change
Expand Up @@ -43,3 +43,8 @@ SENTRY_DSN=
# Configure these with your own Docker registry images
DOCKER_IMAGE_BACKEND=backend
DOCKER_IMAGE_FRONTEND=frontend

# AWS
AWS_ACCESS_KEY_ID=
AWS_SECRET_ACCESS_KEY=
AWS_DEFAULT_REGION=
5 changes: 4 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,7 @@ ENV/


# .DS_Store: macOS Finder metadata file that stores folder view settings and icon positions.
**/.DS_Store
**/.DS_Store

# Emacs
*~
105 changes: 84 additions & 21 deletions backend/app/api/routes/documents.py
Original file line number Diff line number Diff line change
@@ -1,33 +1,96 @@
from fastapi import APIRouter
from sqlmodel import select, and_
import warnings
from uuid import UUID, uuid4

from app.api.deps import CurrentUser, SessionDep
from fastapi import APIRouter, File, UploadFile, HTTPException

from sqlalchemy.exc import NoResultFound, MultipleResultsFound, SQLAlchemyError

from app.crud import DocumentCrud
from app.models import Document, DocumentList
from app.api.deps import CurrentUser, SessionDep
from app.core.cloud import AmazonCloudStorage, CloudStorageError

router = APIRouter(prefix="/documents", tags=["documents"])

def raise_from_unknown(error: Exception):
warnings.warn('Unexpected exception "{}": {}'.format(
type(error).__name__,
error,
))
raise HTTPException(status_code=500, detail=str(error))

@router.get(
"/ls",
response_model=DocumentList,
)
@router.get("/ls", response_model=DocumentList)
def list_docs(
session: SessionDep,
current_user: CurrentUser,
skip: int = 0,
limit: int = 100,
):
statement = (select(Document)
.where(
and_(
Document.owner_id == current_user.id,
Document.deleted_at.is_(None),
),
)
.offset(skip)
.limit(limit))
docs = (session
.exec(statement)
.all())

return DocumentList(docs=docs)
crud = DocumentCrud(session, current_user.id)
try:
return crud.read_many(skip, limit)
except (ValueError, SQLAlchemyError) as err:
raise HTTPException(status_code=500, detail=str(err))
except Exception as err:
raise_from_unknown(err)

@router.post("/cp")
def upload_doc(
session: SessionDep,
current_user: CurrentUser,
src: UploadFile = File(...),
):
storage = AmazonCloudStorage(current_user)
basename = uuid4()
try:
object_store_url = storage.put(src, str(basename))
except CloudStorageError as err:
raise HTTPException(status_code=503, detail=str(err))
except Exception as err:
raise_from_unknown(err)

crud = DocumentCrud(session, current_user.id)
document = Document(
id=basename,
fname=src.filename,
object_store_url=str(object_store_url)
)

try:
return crud.update(document)
except SQLAlchemyError as err:
raise HTTPException(status_code=503, detail=str(err))
except Exception as err:
raise_from_unknown(err)

@router.get("/rm/{doc_id}")
def delete_doc(
session: SessionDep,
current_user: CurrentUser,
doc_id: UUID,
):
crud = DocumentCrud(session, current_user.id)
try:
return crud.delete(doc_id)
except NoResultFound as err:
raise HTTPException(status_code=404, detail=str(err))
except Exception as err:
raise_from_unknown(err)

# TODO: perform delete on the collection

@router.get("/stat/{doc_id}", response_model=Document)
def doc_info(
session: SessionDep,
current_user: CurrentUser,
doc_id: UUID,
):
crud = DocumentCrud(session, current_user.id)
try:
return crud.read_one(doc_id)
except NoResultFound as err:
raise HTTPException(status_code=404, detail=str(err))
except MultipleResultsFound as err:
raise HTTPException(status_code=503, detail=str(err))
except Exception as err:
raise_from_unknown(err)
5 changes: 5 additions & 0 deletions backend/app/core/cloud/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
from .storage import (
AmazonCloudStorage,
AmazonCloudStorageClient,
CloudStorageError,
)
96 changes: 96 additions & 0 deletions backend/app/core/cloud/storage.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
import os
import functools as ft
from pathlib import Path
from dataclasses import dataclass, asdict
from urllib.parse import ParseResult, urlunparse

import boto3
from fastapi import UploadFile
from botocore.exceptions import ClientError

from app.api.deps import CurrentUser
from app.core.config import settings

class CloudStorageError(Exception):
pass

class AmazonCloudStorageClient:
@ft.cached_property
def client(self):
kwargs = {}
cred_params = (
('aws_access_key_id', 'AWS_ACCESS_KEY_ID'),
('aws_secret_access_key', 'AWS_SECRET_ACCESS_KEY'),
('region_name', 'AWS_DEFAULT_REGION'),
)

for (i, j) in cred_params:
kwargs[i] = os.environ.get(j, getattr(settings, j))

return boto3.client('s3', **kwargs)

def create(self):
try:
# does the bucket exist...
self.client.head_bucket(Bucket=settings.AWS_S3_BUCKET)
except ClientError as err:
response = int(err.response['Error']['Code'])
if response != 404:
raise CloudStorageError(err) from err
# ... if not create it
self.client.create_bucket(
Bucket=settings.AWS_S3_BUCKET,
CreateBucketConfiguration={
'LocationConstraint': settings.AWS_DEFAULT_REGION,
},
)

@dataclass(frozen=True)
class SimpleStorageName:
Key: str
Bucket: str = settings.AWS_S3_BUCKET

def __str__(self):
return urlunparse(self.to_url())

def to_url(self):
kwargs = {
'scheme': 's3',
'netloc': self.Bucket,
'path': self.Key,
}
for k in ParseResult._fields:
kwargs.setdefault(k)

return ParseResult(**kwargs)

class CloudStorage:
def __init__(self, user: CurrentUser):
self.user = user

def put(self, source: UploadFile, basename: str):
raise NotImplementedError()

class AmazonCloudStorage(CloudStorage):
def __init__(self, user: CurrentUser):
super().__init__(user)
self.aws = AmazonCloudStorageClient()

def put(self, source: UploadFile, basename: str):
key = Path(str(self.user.id), basename)
destination = SimpleStorageName(str(key))

kwargs = asdict(destination)
try:
self.aws.client.upload_fileobj(
source.file,
ExtraArgs={
# 'Metadata': self.user.model_dump(),
'ContentType': source.content_type,
},
**kwargs,
)
except ClientError as err:
raise CloudStorageError(f'AWS Error: "{err}"') from err

return destination
9 changes: 9 additions & 0 deletions backend/app/core/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,15 @@ def emails_enabled(self) -> bool:
FIRST_SUPERUSER: EmailStr
FIRST_SUPERUSER_PASSWORD: str

AWS_ACCESS_KEY_ID: str
AWS_SECRET_ACCESS_KEY: str
AWS_DEFAULT_REGION: str

@computed_field # type: ignore[prop-decorator]
@property
def AWS_S3_BUCKET(self) -> str:
return f'ai-platform-documents-{self.ENVIRONMENT}'

def _check_default_secret(self, var_name: str, value: str | None) -> None:
if value == "changethis":
message = (
Expand Down
4 changes: 4 additions & 0 deletions backend/app/core/util.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
from datetime import datetime, timezone

def now():
return datetime.now(timezone.utc).replace(tzinfo=None)
4 changes: 3 additions & 1 deletion backend/app/crud/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,6 @@
create_user,
get_user_by_email,
update_user,
)
)

from .document import DocumentCrud
69 changes: 69 additions & 0 deletions backend/app/crud/document.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
from uuid import UUID
from typing import Optional

from sqlmodel import Session, select, and_

from app.models import Document, DocumentList
from app.core.util import now

class DocumentCrud:
def __init__(self, session: Session, owner_id: UUID):
self.session = session
self.owner_id = owner_id

def read_one(self, doc_id: UUID):
statement = (
select(Document)
.where(and_(
Document.owner_id == self.owner_id,
Document.id == doc_id,
))
)

return self.session.exec(statement).one()

def read_many(
self,
skip: Optional[int] = None,
limit: Optional[int] = None,
):
statement = (
select(Document)
.where(and_(
Document.owner_id == self.owner_id,
Document.deleted_at.is_(None),
))
)
if skip is not None:
if skip < 0:
raise ValueError(f'Negative skip: {skip}')
statement = statement.offset(skip)
if limit is not None:
if limit < 0:
raise ValueError(f'Negative limit: {limit}')
statement = statement.limit(limit)
docs = self.session.exec(statement).all()

return DocumentList(docs=docs)

def update(self, document: Document):
if not document.owner_id:
document.owner_id = self.owner_id
elif document.owner_id != self.owner_id:
error = 'Invalid document ownership: owner={} attempter={}'.format(
self.owner_id,
document.owner_id,
)
raise PermissionError(error)

self.session.add(document)
self.session.commit()
self.session.refresh(document)

return document

def delete(self, doc_id: UUID):
document = self.read_one(doc_id)
document.deleted_at = now()

return self.update(document)
25 changes: 25 additions & 0 deletions backend/app/initial_storage.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
import logging

from botocore.exceptions import ClientError

from app.core.cloud import AmazonCloudStorageClient, CloudStorageError
from app.core.config import settings

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

def init() -> None:
aws = AmazonCloudStorageClient()
try:
aws.create()
except CloudStorageError as err:
logging.error(err)

def main() -> None:
logger.info("START: setup cloud storage")
init()
logger.info("END: setup cloud storage")


if __name__ == "__main__":
main()
Loading
Loading