Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions backend/app/api/docs/collections/create.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,9 @@ pipeline:
* Attach the Vector Store to an OpenAI
[Assistant](https://platform.openai.com/docs/api-reference/assistants). Use
parameters in the request body relevant to an Assistant to flesh out
its configuration.
its configuration. Note that an assistant will only be created when you pass both
"model" and "instruction" in the request body otherwise only a vector store will be
created from the documents given.

If any one of the OpenAI interactions fail, all OpenAI resources are
cleaned up. If a Vector Store is unable to be created, for example,
Expand All @@ -23,5 +25,5 @@ The immediate response from the endpoint is `collection_job` object which is
going to contain the collection "job ID", status and action type ("CREATE").
Once the collection has been created, information about the collection will
be returned to the user via the callback URL. If a callback URL is not provided,
clients can poll the `collection job info` endpoint with the `id` in the
clients can check the `collection job info` endpoint with the `id` in the
`collection_job` object returned as it is the `job id`, to retrieve the same information.
4 changes: 3 additions & 1 deletion backend/app/api/docs/collections/delete.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,6 @@ documents can still be accessed via the documents endpoints. The response from t
endpoint will be a `collection_job` object which will contain the collection `job ID`,
status and action type ("DELETE"). when you take the id returned and use the collection job
info endpoint, if the job is successful, you will get the status as successful and nothing will
be returned as the collection as it has been deleted and marked as deleted.
be returned for the collection as it has been deleted. Additionally, if a `callback_url` was
provided in the request body, you will receive a message indicating whether the deletion was
successful.
3 changes: 1 addition & 2 deletions backend/app/api/docs/collections/job_info.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
Retrieve information about a collection job by the collection job ID. This endpoint can be considered the polling endpoint for collection creation job. This endpoint provides detailed status and metadata for a specific collection job
in the AI platform. It is especially useful for:
Retrieve information about a collection job by the collection job ID. This endpoint provides detailed status and metadata for a specific collection job in the AI platform. It is especially useful for:

* Fetching the collection job object containing the ID which will be collection job id, collection ID, status of the job as well as error message.

Expand Down
39 changes: 17 additions & 22 deletions backend/app/api/routes/collections.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,9 @@
from uuid import UUID
from typing import List

from fastapi import APIRouter, Query
from fastapi import APIRouter, Query, HTTPException
from fastapi import Path as FastPath


from app.api.deps import SessionDep, CurrentUserOrgProject
from app.crud import (
CollectionCrud,
Expand All @@ -20,13 +19,11 @@
CollectionJobCreate,
)
from app.models.collection import (
ResponsePayload,
CreationRequest,
DeletionRequest,
CollectionPublic,
)
from app.utils import APIResponse, load_description
from app.services.collections.helpers import extract_error_message
from app.services.collections import (
create_collection as create_service,
delete_collection as delete_service,
Expand Down Expand Up @@ -55,22 +52,31 @@ def create_collection(
)
)

this = inspect.currentframe()
route = router.url_path_for(this.f_code.co_name)
payload = ResponsePayload(
status="processing", route=route, key=str(collection_job.id)
# True iff both model and instructions were provided in the request body
with_assistant = bool(
getattr(request, "model", None) and getattr(request, "instructions", None)
)

create_service.start_job(
db=session,
request=request,
payload=payload,
collection_job_id=collection_job.id,
project_id=current_user.project_id,
organization_id=current_user.organization_id,
with_assistant=with_assistant,
)

return APIResponse.success_response(collection_job)
metadata = None
if not with_assistant:
metadata = {
"note": (
"This job will create a vector store only (no Assistant). "
"Assistant creation happens when both 'model' and 'instructions' are included."
),
"with_assistant": False,
}

return APIResponse.success_response(collection_job, metadata=metadata)


@router.post(
Expand All @@ -82,30 +88,19 @@ def delete_collection(
current_user: CurrentUserOrgProject,
request: DeletionRequest,
):
collection_crud = CollectionCrud(session, current_user.project_id)
collection = collection_crud.read_one(request.collection_id)

collection_job_crud = CollectionJobCrud(session, current_user.project_id)
collection_job = collection_job_crud.create(
CollectionJobCreate(
action_type=CollectionActionType.DELETE,
project_id=current_user.project_id,
status=CollectionJobStatus.PENDING,
collection_id=collection.id,
collection_id=request.collection_id,
)
)

this = inspect.currentframe()
route = router.url_path_for(this.f_code.co_name)
payload = ResponsePayload(
status="processing", route=route, key=str(collection_job.id)
)

delete_service.start_job(
db=session,
request=request,
payload=payload,
collection=collection,
collection_job_id=collection_job.id,
project_id=current_user.project_id,
organization_id=current_user.organization_id,
Expand Down
47 changes: 21 additions & 26 deletions backend/app/crud/collection/collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,19 +20,6 @@ def __init__(self, session: Session, project_id: int):
self.project_id = project_id

def _update(self, collection: Collection):
if not collection.project_id:
collection.project_id = self.project_id
elif collection.project_id != self.project_id:
err = (
f"Invalid collection ownership: owner_project={self.project_id} "
f"attempter={collection.project_id}"
)
logger.error(
"[CollectionCrud._update] Permission error | "
f"{{'collection_id': '{collection.id}', 'error': '{err}'}}"
)
raise PermissionError(err)

self.session.add(collection)
self.session.commit()
self.session.refresh(collection)
Expand All @@ -53,29 +40,28 @@ def _exists(self, collection: Collection) -> bool:
return present

def create(
self,
collection: Collection,
documents: Optional[list[Document]] = None,
):
self, collection: Collection, documents: Optional[list[Document]] = None
) -> Collection:
existing = None
try:
existing = self.read_one(collection.id)
except HTTPException as e:
if e.status_code == 404:
self.session.add(collection)
self.session.commit()
self.session.refresh(collection)
else:
if e.status_code != 404:
raise
else:

if existing is not None:
logger.warning(
"[CollectionCrud.create] Collection already present | "
f"{{'collection_id': '{collection.id}'}}"
)
return existing

self.session.add(collection)
self.session.commit()
self.session.refresh(collection)

if documents:
dc_crud = DocumentCollectionCrud(self.session)
dc_crud.create(collection, documents)
DocumentCollectionCrud(self.session).create(collection, documents)

return collection

Expand Down Expand Up @@ -116,6 +102,12 @@ def read_all(self):
collections = self.session.exec(statement).all()
return collections

def delete_by_id(self, collection_id: UUID) -> Collection:
coll = self.read_one(collection_id)
coll.deleted_at = now()

return self._update(coll)

@ft.singledispatchmethod
def delete(self, model, remote): # remote should be an OpenAICrud
try:
Expand Down Expand Up @@ -145,7 +137,10 @@ def _(self, model: Document, remote):
DocumentCollection,
DocumentCollection.collection_id == Collection.id,
)
.where(DocumentCollection.document_id == model.id)
.where(
DocumentCollection.document_id == model.id,
Collection.deleted_at.is_(None),
)
.distinct()
)

Expand Down
55 changes: 35 additions & 20 deletions backend/app/models/collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from typing import Any, Optional

from sqlmodel import Field, Relationship, SQLModel
from pydantic import HttpUrl
from pydantic import HttpUrl, model_validator

from app.core.util import now
from .organization import Organization
Expand Down Expand Up @@ -36,21 +36,6 @@ class Collection(SQLModel, table=True):
project: Project = Relationship(back_populates="collections")


class ResponsePayload(SQLModel):
"""Response metadata for background jobs—gives status, route, a UUID key,
and creation time."""

status: str
route: str
key: str = Field(default_factory=lambda: str(uuid4()))
time: datetime = Field(default_factory=now)

@classmethod
def now(cls):
"""Returns current UTC time without timezone info"""
return now()


# pydantic models -
class DocumentOptions(SQLModel):
documents: list[UUID] = Field(
Expand All @@ -73,27 +58,57 @@ class AssistantOptions(SQLModel):
# Fields to be passed along to OpenAI. They must be a subset of
# parameters accepted by the OpenAI.clien.beta.assistants.create
# API.
model: str = Field(
model: Optional[str] = Field(
default=None,
description=(
"**[To Be Deprecated]** "
"OpenAI model to attach to this assistant. The model "
"must be compatable with the assistants API; see the "
"OpenAI [model documentation](https://platform.openai.com/docs/models/compare) for more."
),
)
instructions: str = Field(

instructions: Optional[str] = Field(
default=None,
description=(
"Assistant instruction. Sometimes referred to as the " '"system" prompt.'
"**[To Be Deprecated]** "
"Assistant instruction. Sometimes referred to as the "
'"system" prompt.'
),
)
temperature: float = Field(
default=1e-6,
description=(
"**[To Be Deprecated]** "
"Model temperature. The default is slightly "
"greater-than zero because it is [unknown how OpenAI "
"handles zero](https://community.openai.com/t/clarifications-on-setting-temperature-0/886447/5)."
),
)

@model_validator(mode="before")
def _assistant_fields_all_or_none(cls, values: dict[str, Any]) -> dict[str, Any]:
def norm(x: Any) -> Any:
if x is None:
return None
if isinstance(x, str):
s = x.strip()
return s if s else None
return x # let Pydantic handle non-strings

model = norm(values.get("model"))
instructions = norm(values.get("instructions"))

if (model is None) ^ (instructions is None):
raise ValueError(
"To create an Assistant, provide BOTH 'model' and 'instructions'. "
"If you only want a vector store, remove both fields."
)

values["model"] = model
values["instructions"] = instructions
return values


class CallbackRequest(SQLModel):
callback_url: Optional[HttpUrl] = Field(
Expand All @@ -108,7 +123,7 @@ class CreationRequest(
CallbackRequest,
):
def extract_super_type(self, cls: "CreationRequest"):
for field_name in cls.__fields__.keys():
for field_name in cls.model_fields.keys():
field_value = getattr(self, field_name)
yield (field_name, field_value)

Expand Down
Loading