Skip to content

Commit

Permalink
Enable RTDC copier to accept list of features (#247)
Browse files Browse the repository at this point in the history
* enh: enable selected features to be copied

* test: copy a list of features with rtdc_copy

* Update CHANGELOG

* reg: iterate over feature list

* Include negative control test

---------

Co-authored-by: ralajan <raghava.alajangi@mpl.mpg.de>
Co-authored-by: Paul Müller <paulmueller@users.noreply.github.com>
  • Loading branch information
3 people committed Jan 25, 2024
1 parent 6fef6c1 commit 03a84a0
Show file tree
Hide file tree
Showing 3 changed files with 64 additions and 9 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
0.57.2
- enh: allow to specify list of features for data copier
0.57.1
- fix: RTDCWriter.rectify_metadata fails when image feature is empty
- fix: handle empty write requests in export.hdf5 and RTDCWriter (#242)
Expand Down
46 changes: 39 additions & 7 deletions dclab/rtdc_dataset/copier.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""Helper methods for copying .rtdc data"""
from __future__ import annotations

from typing import Literal
from typing import List, Literal

import h5py
import h5py.h5o
Expand All @@ -15,11 +15,30 @@

def rtdc_copy(src_h5file: h5py.Group,
dst_h5file: h5py.Group,
features: Literal['all', 'scalar', 'none'] = "all",
features: List[str] | Literal['all', 'scalar', 'none'] = "all",
include_logs: bool = True,
include_tables: bool = True,
meta_prefix: str = ""):
"""Create a compressed copy of an RT-DC file"""
"""Create a compressed copy of an RT-DC file
Parameters
----------
src_h5file: h5py.Group
Input HDF5 file
dst_h5file: h5py.Group
Output HDF5 file
features: list of strings or one of ['all', 'scalar', 'none']
If this is a list then it specifies the features that are copied from
`src_h5file` to `dst_h5file`. Alternatively, you may specify 'all'
(copy all features), 'scalar' (copy only scalar features), or 'none'
(don't copy any features).
include_logs: bool
Copy the logs from `src_h5file` to `dst_h5file`.
include_tables: bool
Copy the tables from `src_h5file` to `dst_h5file`.
meta_prefix: str
Add this prefix to the name of the logs and tables in `dst_h5file`.
"""
# metadata
for akey in src_h5file.attrs:
dst_h5file.attrs[akey] = src_h5file.attrs[akey]
Expand Down Expand Up @@ -55,11 +74,24 @@ def rtdc_copy(src_h5file: h5py.Group,
**hdf5plugin.Zstd(clevel=5))

# events
if features != "none":
scalar_only = features == "scalar"
if isinstance(features, list):
feature_iter = features
elif features == "all":
feature_iter = list(src_h5file["events"])
elif features == "scalar":
feature_iter = [feat for feat in src_h5file["events"]
if feature_exists(feat, scalar_only=True)]
elif features == "none":
feature_iter = []
else:
raise ValueError(f"`features` must be either a list of feature names "
f"or one of 'all', 'scalar' or 'none', got "
f"'{features}'")

if feature_iter:
dst_h5file.require_group("events")
for feat in src_h5file["events"]:
if feature_exists(feat, scalar_only=scalar_only):
for feat in feature_iter:
if feature_exists(feat):
# Skip all defective features. These are features that
# are known to be invalid (e.g. ancillary features that
# were computed falsely) and must be recomputed by dclab.
Expand Down
25 changes: 23 additions & 2 deletions tests/test_rtdc_copier.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ def test_copy_tables():
tab_data = np.zeros((10, len(columns)))
tab_data[:, 0] = np.arange(10)
tab_data[:, 1] = 1000
tab_data[:, 2] = np.linspace(np.pi, 2*np.pi, 10)
tab_data[:, 2] = np.linspace(np.pi, 2 * np.pi, 10)
rec_arr = np.rec.array(tab_data, dtype=ds_dt)
# sanity check
assert np.all(rec_arr["bread"][:].flatten() == np.arange(10))
Expand All @@ -140,7 +140,7 @@ def test_copy_tables():
assert np.all(tab_data["bread"][:].flatten() == np.arange(10))
assert np.all(tab_data["beer"][:].flatten() == 1000)
assert np.all(tab_data["chocolate"][:].flatten() == np.linspace(
np.pi, 2*np.pi, 10))
np.pi, 2 * np.pi, 10))


def test_copy_tables_hdf5_issue_3214():
Expand Down Expand Up @@ -209,3 +209,24 @@ def test_copy_scalar_features_only():
with h5py.File(path_copy) as hc:
assert "image" not in hc["events"]
assert "deform" in hc["events"]


def test_copy_specified_feature_list():
path = retrieve_data("fmt-hdf5_image-bg_2020.zip")
path_copy = path.with_name("test_copy.rtdc")

# copy
with h5py.File(path) as h5, h5py.File(path_copy, "w") as hc:
# make sure image data is there
assert "image" in h5["events"]
assert "area_um" in h5["events"]
assert "deform" in h5["events"]
rtdc_copy(src_h5file=h5,
dst_h5file=hc,
features=["image", "deform"])

# Make sure this worked
with h5py.File(path_copy) as hc:
assert "image" in hc["events"]
assert "area_um" not in hc["events"]
assert "deform" in hc["events"]

0 comments on commit 03a84a0

Please sign in to comment.