Skip to content

Commit

Permalink
Replace elide_data_returned with count timeout
Browse files Browse the repository at this point in the history
  • Loading branch information
ml-evs committed Sep 27, 2023
1 parent e7ed485 commit 13521da
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 14 deletions.
7 changes: 4 additions & 3 deletions optimade/server/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,9 +157,10 @@ class ServerConfig(BaseSettings):
None, description="Host settings to pass through to the `Elasticsearch` class."
)

elide_data_returned: bool = Field(
False,
description="Whether to skip counting all the results for every query (to set the `data_returned` field), as this may be too strenuous for large databases. Currently only supports MongoDB.",
mongo_count_timeout: int = Field(
5,
description="""Number of seconds to allow MongoDB to perform a full database count before falling back to `null`.
This operation can require a full COLLSCAN for empty queries which can be prohibitively slow if the database does not fit into the active set, hence a timeout can drastically speed-up response times.""",
)

mongo_database: str = Field(
Expand Down
2 changes: 1 addition & 1 deletion optimade/server/entry_collections/entry_collections.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ def insert(self, data: List[EntryResource]) -> None:
"""

@abstractmethod
def count(self, **kwargs: Any) -> int:
def count(self, **kwargs: Any) -> Union[int, None]:
"""Returns the number of entries matching the query specified
by the keyword arguments.
Expand Down
24 changes: 14 additions & 10 deletions optimade/server/entry_collections/mongo.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

if CONFIG.database_backend.value == "mongodb":
from pymongo import MongoClient, version_tuple
from pymongo.errors import ExecutionTimeout

if version_tuple[0] < 4:
LOGGER.warning(
Expand Down Expand Up @@ -67,9 +68,9 @@ def __len__(self) -> int:
"""Returns the total number of entries in the collection."""
return self.collection.estimated_document_count()

def count(self, **kwargs: Any) -> int:
def count(self, **kwargs: Any) -> Union[int, None]:
"""Returns the number of entries matching the query specified
by the keyword arguments.
by the keyword arguments, or `None` if the count timed out.
Parameters:
**kwargs: Query parameters as keyword arguments. The keys
Expand All @@ -80,11 +81,15 @@ def count(self, **kwargs: Any) -> int:
for k in list(kwargs.keys()):
if k not in ("filter", "skip", "limit", "hint", "maxTimeMS"):
del kwargs[k]
if "filter" not in kwargs: # "filter" is needed for count_documents()
kwargs["filter"] = {}
if "filter" not in kwargs:
return self.collection.estimated_document_count()

Check warning on line 85 in optimade/server/entry_collections/mongo.py

View check run for this annotation

Codecov / codecov/patch

optimade/server/entry_collections/mongo.py#L85

Added line #L85 was not covered by tests
else:
return self.collection.count_documents(**kwargs)
if "maxTimeMS" not in kwargs:
kwargs["maxTimeMS"] = 1000 * CONFIG.mongo_count_timeout
try:
return self.collection.count_documents(**kwargs)
except ExecutionTimeout:
return None

Check warning on line 92 in optimade/server/entry_collections/mongo.py

View check run for this annotation

Codecov / codecov/patch

optimade/server/entry_collections/mongo.py#L91-L92

Added lines #L91 - L92 were not covered by tests

def insert(self, data: List[EntryResource]) -> None:
"""Add the given entries to the underlying database.
Expand Down Expand Up @@ -164,13 +169,12 @@ def _run_db_query(
criteria_nolimit = criteria.copy()
criteria_nolimit.pop("limit", None)
skip = criteria_nolimit.pop("skip", 0)
if CONFIG.elide_data_returned:
data_returned = None
# Only correct most of the time: if the total number of remaining results is exactly the page limit
# then this will incorrectly say there is more_data_available
data_returned = self.count(**criteria_nolimit)
# Only correct most of the time: if the total number of remaining results is exactly the page limit
# then this will incorrectly say there is more_data_available
if data_returned is None:
more_data_available = nresults_now == criteria.get("limit", 0)

Check warning on line 176 in optimade/server/entry_collections/mongo.py

View check run for this annotation

Codecov / codecov/patch

optimade/server/entry_collections/mongo.py#L176

Added line #L176 was not covered by tests
else:
data_returned = self.count(**criteria_nolimit)
more_data_available = nresults_now + skip < data_returned
else:
# SingleEntryQueryParams, e.g., /structures/{entry_id}
Expand Down

0 comments on commit 13521da

Please sign in to comment.