Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[DOCS0-2] Document all public classes, methods and functions #2050

Merged
merged 1 commit into from
May 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

#### Changed defaults / behaviours

- Add docstrings in component classes and methods.
- Run Tests from within the container
- Add model dict output indexing in graph
- Make lance upsert for added vectors
Expand All @@ -20,6 +19,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- At the end of the test, drop the collection instead of the database
- Force load vector indices during backfill
- Fix pandas database (in-memory)
- Add docstrings in component classes and methods.

#### New Features & Functionality
- Add nightly image for pre-release testing in the cloud environment
Expand Down
16 changes: 14 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -110,9 +110,21 @@ extend-select = [
#"W", # PyCode Warning
"E", # PyCode Error
#"N", # pep8-naming
#"D", # pydocstyle
"D", # pydocstyle
]
ignore = [
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Which ruff settings are these?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"D100", # Missing docstring in public module
"D104", # Missing docstring in public package
"D107", # Missing docstring in __init__
"D105", # Missing docstring in magic method
"D212", # Multi-line docstring summary should start at the first line
"D213", # Multi-line docstring summary should start at the second line
"D401",
"E402",
]
ignore = ["E402"]

[tool.ruff.isort]
combine-as-imports = true

[tool.ruff.per-file-ignores]
"test/**" = ["D"]
6 changes: 3 additions & 3 deletions superduperdb/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,9 @@


def run():
"""
Entrypoint for the CLI. This is the function that is called when the
user runs `python -m superduperdb`.
"""Entrypoint for the CLI.

This is the function that is called when the user runs `python -m superduperdb`.
"""
try:
app(standalone_mode=False)
Expand Down
43 changes: 28 additions & 15 deletions superduperdb/backends/base/artifacts.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,28 +29,36 @@ def __init__(

@property
def serializers(self):
"""Return the serializers."""
assert self._serializers is not None, 'Serializers not initialized!'
return self._serializers

@serializers.setter
def serializers(self, value):
"""Set the serializers.

:param value: The serializers.
"""
self._serializers = value

@abstractmethod
def url(self):
"""
Artifact store connection url
"""
"""Artifact store connection url."""
pass

@abstractmethod
def _delete_artifact(self, file_id: str):
"""
Delete artifact from artifact store
:param file_id: File id uses to identify artifact in store
"""Delete artifact from artifact store.

:param file_id: File id uses to identify artifact in store.
"""

def delete(self, r: t.Dict):
"""Delete artifact from artifact store.

:param r: dictionary with mandatory fields
{'file_id'}
"""
if '_content' in r and 'file_id' in r['_content']:
return self._delete_artifact(r['_content']['file_id'])
for v in r.values():
Expand All @@ -76,6 +84,12 @@ def exists(
datatype: t.Optional[str] = None,
uri: t.Optional[str] = None,
):
"""Check if artifact exists in artifact store.

:param file_id: file id of artifact in the store
:param datatype: Datatype of the artifact
:param uri: URI of the artifact
"""
if file_id is None:
assert uri is not None, "if file_id is None, uri can\'t be None"
file_id = _construct_file_id_from_uri(uri)
Expand All @@ -91,7 +105,7 @@ def _save_bytes(self, serialized: bytes, file_id: str):

@abstractmethod
def _save_file(self, file_path: str, file_id: str) -> str:
"""Save file in artifact store and return file_id"""
"""Save file in artifact store and return file_id."""
pass

def save_artifact(self, r: t.Dict):
Expand Down Expand Up @@ -147,7 +161,7 @@ def _load_bytes(self, file_id: str) -> bytes:
@abstractmethod
def _load_file(self, file_id: str) -> str:
"""
Load file from artifact store and return path
Load file from artifact store and return path.

:param file_id: Identifier of artifact in the store
"""
Expand All @@ -159,7 +173,6 @@ def load_artifact(self, r):

:param r: Mandatory fields {'file_id', 'datatype'}
"""

datatype = self.serializers[r['datatype']]
file_id = r.get('file_id')
if r.get('encodable') == 'file':
Expand All @@ -174,9 +187,9 @@ def load_artifact(self, r):
return datatype.decode_data(x)

def save(self, r: t.Dict) -> t.Dict:
"""
Save list of artifacts and replace the artifacts with file reference
:param r: `dict` of artifacts
"""Save list of artifacts and replace the artifacts with file reference.

:param r: `dict` of artifacts.
"""
if isinstance(r, dict):
if '_content' in r and r['_content']['leaf_type'] in {
Expand All @@ -196,11 +209,11 @@ def save(self, r: t.Dict) -> t.Dict:

@abstractmethod
def disconnect(self):
"""
Disconnect the client
"""
"""Disconnect the client."""
pass


class ArtifactSavingError(Exception):
"""Error when saving artifact in artifact store fails."""

pass
34 changes: 10 additions & 24 deletions superduperdb/backends/base/compute.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,26 +3,20 @@


class ComputeBackend(ABC):
"""
Abstraction for sending jobs to a distributed compute platform.
"""
"""Abstraction for sending jobs to a distributed compute platform."""

@abstractproperty
def type(self) -> str:
"""
Return the type of compute engine
"""
"""Return the type of compute engine."""
pass

@abstractproperty
def name(self) -> str:
"""
Return the name of current compute engine
"""
"""Return the name of current compute engine."""
pass

def get_local_client(self):
'''Returns a local version of self'''
"""Returns a local version of self."""
pass

@abstractmethod
Expand All @@ -37,22 +31,18 @@ def submit(self, function: t.Callable, **kwargs) -> t.Any:

@abstractproperty
def tasks(self) -> t.Any:
"""
List for all tasks
"""
"""List for all tasks."""
pass

@abstractmethod
def wait_all(self) -> None:
"""
Waits for all pending tasks to complete.
"""
"""Waits for all pending tasks to complete."""
pass

@abstractmethod
def result(self, identifier: str) -> t.Any:
"""
Retrieves the result of a previously submitted task.
"""Retrieves the result of a previously submitted task.

Note: This will block until the future is completed.

:param identifier: The identifier of the submitted task.
Expand All @@ -61,14 +51,10 @@ def result(self, identifier: str) -> t.Any:

@abstractmethod
def disconnect(self) -> None:
"""
Disconnect the client.
"""
"""Disconnect the client."""
pass

@abstractmethod
def shutdown(self) -> None:
"""
Shuts down the compute cluster.
"""
"""Shuts down the compute cluster."""
pass
55 changes: 36 additions & 19 deletions superduperdb/backends/base/data_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,12 @@


class BaseDataBackend(ABC):
"""Base data backend for the database.

:param conn: The connection to the databackend database.
:param name: The name of the databackend.
"""

db_type = None

def __init__(self, conn: t.Any, name: str):
Expand All @@ -16,27 +22,22 @@ def __init__(self, conn: t.Any, name: str):

@property
def db(self):
"""Return the datalayer."""
raise NotImplementedError

@abstractmethod
def url(self):
"""
Databackend connection url
"""
"""Databackend connection url."""
pass

@abstractmethod
def build_metadata(self):
"""
Build a default metadata store based on current connection.
"""
"""Build a default metadata store based on current connection."""
pass

@abstractmethod
def build_artifact_store(self):
"""
Build a default artifact store based on current connection.
"""
"""Build a default artifact store based on current connection."""
pass

@abstractmethod
Expand All @@ -46,41 +47,57 @@ def create_output_dest(
datatype: t.Union[None, DataType, FieldType],
flatten: bool = False,
):
"""Create an output destination for the database.

:param predict_id: The predict id of the output destination.
:param datatype: The datatype of the output destination.
:param flatten: Whether to flatten the output destination.
"""
pass

@abstractmethod
def check_output_dest(self, predict_id) -> bool:
"""Check if the output destination exists.

:param predict_id: The identifier of the output destination.
"""
pass

@abstractmethod
def get_table_or_collection(self, identifier):
"""Get a table or collection from the database.

:param identifier: The identifier of the table or collection.
"""
pass

def set_content_bytes(self, r, key, bytes_):
"""Set content bytes.

:param r: The row.
:param key: The key.
:param bytes_: The bytes.
"""
raise NotImplementedError

@abstractmethod
def drop(self, force: bool = False):
"""
Drop the databackend.
"""Drop the databackend.

:param force: If ``True``, don't ask for confirmation.
"""

@abstractmethod
def disconnect(self):
"""
Disconnect the client
"""
"""Disconnect the client."""

@abstractmethod
def list_tables_or_collections(self):
"""
List all tables or collections in the database.
"""
"""List all tables or collections in the database."""

@staticmethod
def infer_schema(data: t.Mapping[str, t.Any], identifier: t.Optional[str] = None):
"""
Infer a schema from a given data object
"""Infer a schema from a given data object.

:param data: The data object
:param identifier: The identifier for the schema, if None, it will be generated
Expand Down