Skip to content

Commit

Permalink
Remove UT dependent deadcode
Browse files Browse the repository at this point in the history
  • Loading branch information
kartik4949 committed May 23, 2024
1 parent 8b11bc9 commit f1c210b
Show file tree
Hide file tree
Showing 22 changed files with 38 additions and 603 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Fix pandas database (in-memory)
- Add and update docstrings in component classes and methods.
- Changed the rest implementation to use new serialization
- Remove unused deadcode from the project

#### New Features & Functionality

Expand Down
30 changes: 0 additions & 30 deletions superduperdb/backends/base/artifacts.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,17 +129,6 @@ def delete_artifact(self, r: t.Dict):
for file_path in r['_files']:
self._delete_bytes(file_path)

def update_artifact(self, old_r: t.Dict, new_r: t.Dict):
"""Update artifact in artifact store.
This method deletes the old artifact and saves the new artifact.
:param old_r: dictionary with mandatory fields
:param new_r: dictionary with mandatory fields
"""
self.delete_artifact(old_r)
return self.save_artifact(new_r)

@abstractmethod
def get_bytes(self, file_id: str) -> bytes:
"""
Expand All @@ -158,25 +147,6 @@ def get_file(self, file_id: str) -> str:
"""
pass

def load_artifact(self, r):
"""
Load artifact from artifact store, and deserialize.
:param r: Mandatory fields {'file_id', 'datatype'}
"""
datatype = self.serializers[r['datatype']]
file_id = r.get('file_id')
if r.get('encodable') == 'file':
x = self.get_file(file_id)
else:
# TODO We should always have file_id available at load time (because saved)
uri = r.get('uri')
if file_id is None:
assert uri is not None, '"uri" and "file_id" can\'t both be None'
file_id = _construct_file_id_from_uri(uri)
x = self.get_bytes(file_id)
return datatype.decode_data(x)

@abstractmethod
def disconnect(self):
"""Disconnect the client."""
Expand Down
62 changes: 0 additions & 62 deletions superduperdb/backends/base/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,20 +155,6 @@ def show_component_versions(self, type_id: str, identifier: str):
"""
pass

def get_indexing_listener_of_vector_index(
self, identifier: str, version: t.Optional[int] = None
):
"""Get the indexing listener of a vector index.
:param identifier: identifier of vector index
:param version: version of vector index
"""
info = self.get_component(
'vector_index', identifier=identifier, version=version
)
indexing_listener = info['dict']['indexing_listener']['identifier']
return indexing_listener

@abstractmethod
def delete_component_version(self, type_id: str, identifier: str, version: int):
"""
Expand Down Expand Up @@ -197,16 +183,6 @@ def component_version_has_parents(
"""
pass

# TODO - do we still need this or can it be handled by configuration?
@abstractmethod
def get_metadata(self, key):
"""
Get metadata from the metadata store.
:param key: key of metadata
"""
pass

@abstractmethod
def get_latest_version(
self, type_id: str, identifier: str, allow_hidden: bool = False
Expand Down Expand Up @@ -274,34 +250,6 @@ def _update_object(
):
pass

def update_object(
self,
identifier: str,
type_id: str,
key: str,
value: str,
version: t.Optional[int] = None,
):
"""
Update an object in the metadata store.
:param identifier: identifier of object
:param type_id: type of object
:param key: key to be updated
:param value: value to be updated
:param version: version of object
"""
if version is not None:
version = self.get_latest_version(type_id, identifier)
assert isinstance(version, int)
return self._update_object(
identifier=identifier,
type_id=type_id,
key=key,
value=value,
version=version,
)

@abstractmethod
def _replace_object(self, info, identifier, type_id, version):
pass
Expand Down Expand Up @@ -361,16 +309,6 @@ def hide_component_version(self, type_id: str, identifier: str, version: int):
"""
pass

@abstractmethod
def update_metadata(self, key, value):
"""
Update metadata in the metadata store.
:param key: Key of metadata
:param value: Value of metadata
"""
pass

def add_query(self, query: 'Select', model: str):
"""Add query id to query table.
Expand Down
58 changes: 14 additions & 44 deletions superduperdb/backends/mongodb/data_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
from superduperdb.backends.ibis.field_types import FieldType
from superduperdb.backends.mongodb.artifacts import MongoArtifactStore
from superduperdb.backends.mongodb.metadata import MongoMetaDataStore
from superduperdb.base.document import Document
from superduperdb.base.enums import DBType
from superduperdb.components.datatype import DataType
from superduperdb.misc.colors import Colors
Expand Down Expand Up @@ -97,53 +96,10 @@ def get_table_or_collection(self, identifier):
"""
return self._db[identifier]

def unset_outputs(self, info: t.Dict):
"""Unset the output field in the data backend.
:param info: dictionary containing information about the output field
"""
select = Document.decode(info['select']).unpack()
logging.info(f'unsetting output field _outputs.{info["key"]}.{info["model"]}')
doc = {'$unset': {f'_outputs.{info["key"]}.{info["model"]}': 1}}
update = select.update(doc)
return self.db[select.collection].update_many(update.filter, update.update)

def list_vector_indexes(self):
"""List all vector indexes in the data backend."""
indexes = []
for coll in self.db.list_collection_names():
i = self.db.command({'listSearchIndexes': coll})
try:
batch = i['cursor']['firstBatch'][0]
except IndexError:
continue
if '_outputs' in batch['latestDefinition']['mappings']['fields']:
indexes.append(batch['name'])
return indexes

def list_tables_or_collections(self):
"""List all tables or collections in the data backend."""
return self.db.list_collection_names()

def delete_vector_index(self, vector_index):
"""
Delete a vector index in the data backend if an Atlas deployment.
:param vector_index: vector index to delete
"""
# see `VectorIndex` class for details
# indexing_listener contains a `Select` object
assert not isinstance(vector_index.indexing_listener, str)
select = vector_index.indexing_listener.select

# TODO: probably MongoDB queries should all have a common base class
self.db.command(
{
"dropSearchIndex": select.table_or_collection.identifier,
"name": vector_index.identifier,
}
)

def disconnect(self):
"""Disconnect the client."""

Expand All @@ -165,6 +121,20 @@ def create_output_dest(
"""
pass

def exists(self, table_or_collection, id, key):
"""Check if a document exists in the data backend.
:param table_or_collection: table or collection identifier
:param id: document identifier
:param key: key to check
"""
return (
self.db[table_or_collection].find_one(
{'_id': id, f'{key}._content.bytes': {'$exists': 1}}
)
is not None
)

def check_output_dest(self, predict_id) -> bool:
"""Check if the output destination exists.
Expand Down
15 changes: 0 additions & 15 deletions superduperdb/backends/mongodb/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,21 +89,6 @@ def get_job(self, identifier: str):
"""
return self.job_collection.find_one({'identifier': identifier})

def get_metadata(self, key: str):
"""Get metadata from the metadata store.
:param key: key to be retrieved
"""
return self.meta_collection.find_one({'key': key})['value']

def update_metadata(self, key: str, value: str):
"""Update metadata in the metadata store.
:param key: key to be updated
:param value: value to be updated
"""
return self.meta_collection.update_one({'key': key}, {'$set': {'value': value}})

def get_latest_version(
self, type_id: str, identifier: str, allow_hidden: bool = False
) -> int:
Expand Down
34 changes: 0 additions & 34 deletions superduperdb/backends/sqlalchemy/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,6 @@
from superduperdb.backends.sqlalchemy.db_helper import get_db_config
from superduperdb.misc.colors import Colors

if t.TYPE_CHECKING:
pass


class SQLAlchemyMetadata(MetaDataStore):
"""
Expand Down Expand Up @@ -378,29 +375,6 @@ def _replace_object(self, info, identifier, type_id, version):
)
session.execute(stmt)

def replace_component(
self,
info: t.Dict[str, t.Any],
identifier: str,
type_id: str,
version: t.Optional[int] = None,
) -> None:
"""Replace a component in the metadata store.
:param info: the information to replace
:param identifier: the identifier of the component
:param type_id: the type of the component
:param version: the version of the component
"""
if version is not None:
version = self.get_latest_version(type_id, identifier)
return self._replace_object(
info=info,
identifier=identifier,
type_id=type_id,
version=version,
)

def show_components(self, type_id: t.Optional[str] = None):
"""Show all components in the database.
Expand Down Expand Up @@ -482,14 +456,6 @@ def get_job(self, job_id: str):
res = self.query_results(self.job_table, stmt, session)
return res[0] if res else None

def listen_job(self, identifier: str):
"""Listen a job.
:param identifier: the identifier of the job
"""
# Not supported currently
raise NotImplementedError

def show_jobs(
self,
component_identifier: t.Optional[str] = None,
Expand Down
3 changes: 0 additions & 3 deletions superduperdb/base/cursor.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ class SuperDuperCursor:
:param id_field: the field to use as the document id
:param db: the datalayer to use to decode the documents
:param scores: a dict of scores to add to the documents
:param decode_function: a function to use to decode the documents
:param schema: the schema to use to decode the documents
:param _it: an iterator to keep track of the current position in the cursor,
Default is 0.
Expand All @@ -30,8 +29,6 @@ class SuperDuperCursor:
id_field: str
db: t.Optional['Datalayer'] = None
scores: t.Optional[t.Dict[str, float]] = None
# TODO: Remove the unused decode_function
decode_function: t.Optional[t.Callable] = None
schema: t.Optional['Schema'] = None

_it: int = 0
Expand Down
20 changes: 20 additions & 0 deletions superduperdb/base/variables.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,26 @@ def _find_variables(r):
return []


def _find_variables_with_path(r):
if isinstance(r, dict):
out = []
for k, v in r.items():
tmp = _find_variables_with_path(v)
for p in tmp:
out.append({'path': [k] + p['path'], 'variable': p['variable']})
return out
elif isinstance(r, (list, tuple)):
out = []
for i, v in enumerate(r):
tmp = _find_variables_with_path(v)
for p in tmp:
out.append({'path': [i] + p['path'], 'variable': p['variable']})
return out
elif isinstance(r, Variable):
return [{'path': [], 'variable': r}]
return []


def _replace_variables(x, db, **kwargs):
from .document import Document

Expand Down
Loading

0 comments on commit f1c210b

Please sign in to comment.