Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add a backend for rucio #300

Merged
merged 16 commits into from
Aug 14, 2020
Merged
1 change: 1 addition & 0 deletions strax/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

from .storage.common import *
from .storage.files import *
from .storage.rucio import *
from .storage.mongo import *
from .storage.s3 import *
from .storage.zipfiles import *
Expand Down
2 changes: 1 addition & 1 deletion strax/processing/pulse_processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -260,7 +260,7 @@ def _find_hits(records, min_amplitude, min_height_over_noise,
for i in range(n_samples):
# We can't use enumerate over r['data'],
# numba gives errors if we do.
# TODO: file issue?
# maybe file an issue?
x = r['data'][i]

satisfy_threshold = x >= threshold
Expand Down
48 changes: 48 additions & 0 deletions strax/storage/rucio.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
import json
import hashlib
import os.path as osp

import strax
from strax.storage.files import dirname_to_prefix

export, __all__ = strax.exporter()


@export
class rucio(strax.StorageBackend):
"""Get data from a rucio directory
"""
def __init__(self, root_dir, *args, **kwargs):
super().__init__(*args, **kwargs)
self.root_dir = root_dir

def get_metadata(self, dirname:str, **kwargs):
prefix = dirname_to_prefix(dirname)
metadata_json = f'{prefix}-metadata.json'
fn = rucio_path(self.root_dir, metadata_json, dirname)
folder = osp.join('/', *fn.split('/')[:-1])
if not osp.exists(folder):
raise strax.DataNotAvailable(f"No folder for matadata at {fn}")
if not osp.exists(fn):
raise strax.DataCorrupted(f"Folder exists but no matadata at {fn}")

with open(fn, mode='r') as f:
return json.loads(f.read())

def _read_chunk(self, dirname, chunk_info, dtype, compressor):
fn = rucio_path(self.root_dir, chunk_info['filename'], dirname)
return strax.load_file(fn, dtype=dtype, compressor=compressor)

def _saver(self, dirname, metadata):
raise NotImplementedError(
"Cannot save directly into rucio, upload with admix instead")


def rucio_path(root_dir, filename, dirname):
"""Convert target to path according to rucio convention"""
scope = "xnt_"+dirname.split('-')[0]
rucio_did = "{0}:{1}".format(scope, filename)
rucio_md5 = hashlib.md5(rucio_did.encode('utf-8')).hexdigest()
t1 = rucio_md5[0:2]
t2 = rucio_md5[2:4]
return osp.join(root_dir, scope, t1, t2, filename)
1 change: 0 additions & 1 deletion strax/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,6 @@ def profile_threaded(filename):
monitoring_gil = True
except (RuntimeError, ImportError):
monitoring_gil = False
pass

yappi.start()
yield
Expand Down