Skip to content

Commit

Permalink
Merge pull request #349 from AxFoundation/max_buffer_mongo_backend
Browse files Browse the repository at this point in the history
limit buffer for mongo backend
  • Loading branch information
JoranAngevaare committed Nov 13, 2020
2 parents 3f6216b + b16240e commit 152801b
Showing 1 changed file with 32 additions and 0 deletions.
32 changes: 32 additions & 0 deletions strax/storage/mongo.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,14 @@
from datetime import datetime
from pytz import utc as py_utc
from warnings import warn
from sys import getsizeof
export, __all__ = strax.exporter()

# Some data is stored in the buffer. Delete when either of these values
# are exceeded
DEFAULT_MONGO_BACKEND_BUFFER_MB = 200
DEFAULT_MONGO_BACKEND_BUFFER_NRUNS = 5


@export
class MongoBackend(StorageBackend):
Expand All @@ -32,7 +38,12 @@ def __init__(self, uri, database, col_name=None):
self.client = MongoClient(uri)
self.db = self.client[database]
self.col_name = col_name

# Attributes for the chunks-buffer
self.chunks_registry = {}
self._buffered_backend_keys = []
self._buff_mb = DEFAULT_MONGO_BACKEND_BUFFER_MB
self._buff_nruns = DEFAULT_MONGO_BACKEND_BUFFER_NRUNS

def _read_chunk(self, backend_key, chunk_info, dtype, compressor):
"""See strax.Backend"""
Expand Down Expand Up @@ -115,6 +126,27 @@ def _build_chunk_registry(self, backend_key):
# issues or json-encoding headaches.
self.chunks_registry[backend_key + str(chunk_key)] = doc.copy()

# Some bookkeeping to make sure we don't buffer too much in this
# backend. We still need to return at least one hence the 'and'.
# See: https://github.com/AxFoundation/strax/issues/346
if backend_key not in self._buffered_backend_keys:
self._buffered_backend_keys.append(backend_key)
while ((getsizeof(self.chunks_registry) / 1e6 > self._buff_mb
and len(self._buffered_backend_keys) > 1)
or len(self._buffered_backend_keys) > self._buff_nruns):
self._clean_first_key_from_registry()

def _clean_first_key_from_registry(self):
"""
Remove the first item in the self.buffered_keys and all the
associated keys in the self.chunks_registry to limit RAM-usage
"""
# only clean the first entry from the list
to_clean = self._buffered_backend_keys[0]
for registry_key in list(self.chunks_registry.keys()):
if to_clean in registry_key:
del self.chunks_registry[registry_key]
del self._buffered_backend_keys[0]

@export
class MongoFrontend(StorageFrontend):
Expand Down

0 comments on commit 152801b

Please sign in to comment.