-
Couldn't load subscription status.
- Fork 5
Implement Permanent File Deletion for Document Endpoints #240
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
8c99ec1
d55614e
dbce14e
e9de5d5
e99159f
55f1a71
34b209a
cbb95bd
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,5 @@ | ||
| This operation soft deletes the document — meaning its metadata and reference are retained in the database, but it is marked as deleted. The actual file stored in cloud storage (e.g., S3) is permanently deleted, and this action is irreversible. | ||
| If the document is part of an active collection, those collections | ||
| will be deleted using the collections delete interface. Noteably, this | ||
| means all OpenAI Vector Store's and Assistant's to which this document | ||
| belongs will be deleted. | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -74,6 +74,30 @@ def remove_doc( | |
| return APIResponse.success_response(data) | ||
|
|
||
|
|
||
| @router.delete( | ||
| "/remove/{doc_id}/permanent", | ||
| description=load_description("documents/permanent_delete.md"), | ||
| response_model=APIResponse[Document], | ||
| ) | ||
| def permanent_delete_doc( | ||
| session: SessionDep, | ||
| current_user: CurrentUser, | ||
| doc_id: UUID = FastPath(description="Document to permanently delete"), | ||
| ): | ||
| a_crud = OpenAIAssistantCrud() | ||
| d_crud = DocumentCrud(session, current_user.id) | ||
| c_crud = CollectionCrud(session, current_user.id) | ||
|
Comment on lines
+87
to
+89
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. not a big fan of these names like There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @AkhileshNegi that's right, |
||
| storage = AmazonCloudStorage(current_user) | ||
|
|
||
| document = d_crud.read_one(doc_id) | ||
|
|
||
| c_crud.delete(document, a_crud) | ||
| storage.delete(document.object_store_url) | ||
| d_crud.delete(doc_id) | ||
|
|
||
| return APIResponse.success_response(document) | ||
|
|
||
|
|
||
| @router.get( | ||
| "/info/{doc_id}", | ||
| description=load_description("documents/info.md"), | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,91 @@ | ||
| import os | ||
| from pathlib import Path | ||
| from urllib.parse import urlparse | ||
|
|
||
| import pytest | ||
| from botocore.exceptions import ClientError | ||
| from moto import mock_aws | ||
| from sqlmodel import Session, select | ||
|
|
||
| import openai_responses | ||
|
|
||
| from app.core.cloud import AmazonCloudStorageClient | ||
| from app.core.config import settings | ||
| from app.models import Document | ||
| from app.tests.utils.document import ( | ||
| DocumentStore, | ||
| DocumentMaker, | ||
| Route, | ||
| WebCrawler, | ||
| crawler, | ||
| ) | ||
| from app.tests.utils.utils import openai_credentials | ||
|
|
||
|
|
||
| @pytest.fixture | ||
| def route(): | ||
| return Route("remove") | ||
|
|
||
|
|
||
| @pytest.fixture(scope="class") | ||
| def aws_credentials(): | ||
| os.environ["AWS_ACCESS_KEY_ID"] = "testing" | ||
| os.environ["AWS_SECRET_ACCESS_KEY"] = "testing" | ||
| os.environ["AWS_SECURITY_TOKEN"] = "testing" | ||
| os.environ["AWS_SESSION_TOKEN"] = "testing" | ||
| os.environ["AWS_DEFAULT_REGION"] = settings.AWS_DEFAULT_REGION | ||
|
Comment on lines
+31
to
+36
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think at some point we also need to create .env.test as we add more testcases that may need similar behaviour. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Agreed. |
||
|
|
||
|
|
||
| @pytest.mark.usefixtures("openai_credentials", "aws_credentials") | ||
| @mock_aws | ||
| class TestDocumentRoutePermanentRemove: | ||
| @openai_responses.mock() | ||
| def test_permanent_delete_document_from_s3( | ||
| self, | ||
| db: Session, | ||
| route: Route, | ||
| crawler: WebCrawler, | ||
| ): | ||
| # Setup AWS | ||
| aws = AmazonCloudStorageClient() | ||
| aws.create() | ||
|
|
||
| # Setup document in DB and S3 | ||
| store = DocumentStore(db) | ||
| document = store.put() | ||
| s3_key = Path(urlparse(document.object_store_url).path).relative_to("/") | ||
| aws.client.put_object( | ||
| Bucket=settings.AWS_S3_BUCKET, Key=str(s3_key), Body=b"test" | ||
| ) | ||
|
|
||
| # Delete document | ||
| response = crawler.delete(route.append(document, suffix="permanent")) | ||
| assert response.is_success | ||
|
|
||
| db.refresh(document) | ||
|
|
||
| stmt = select(Document).where(Document.id == document.id) | ||
| doc_in_db = db.exec(stmt).first() | ||
| assert doc_in_db is not None | ||
| assert doc_in_db.deleted_at is not None | ||
|
|
||
| with pytest.raises(ClientError) as exc_info: | ||
| aws.client.head_object( | ||
| Bucket=settings.AWS_S3_BUCKET, | ||
| Key=str(s3_key), | ||
| ) | ||
| assert exc_info.value.response["Error"]["Code"] == "404" | ||
|
|
||
| @openai_responses.mock() | ||
| def test_cannot_delete_nonexistent_document( | ||
| self, | ||
| db: Session, | ||
| route: Route, | ||
| crawler: WebCrawler, | ||
| ): | ||
| DocumentStore.clear(db) | ||
|
|
||
| maker = DocumentMaker(db) | ||
| response = crawler.delete(route.append(next(maker), suffix="permanent")) | ||
|
|
||
| assert response.is_error | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Not a very significant change but the line where it states that the file gets permanently deleted from S3 should come as the first line