Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ scan = NexusLoader('file.hdf')
scan('energy') # --> returns data from '/entry/instrument/monochromator/energy'
scan('signal') # --> returns data from default signal, e.g. '/entry/measurement/sum'
scan('axes') # --> returns data from default axes, e.g. '/entry/measurement/theta'
scan('image_data') # --> returns data from default >3D dataset containing image data
scan.map.get_path('energy') # -> returns '/entry/instrument/monochromator/energy'
[data1, data2] = scan.get_data(['dataset_name_1', 'dataset_name_2'])
data = scan.eval('dataset_name_1 * 100 + 2')
Expand Down
35 changes: 32 additions & 3 deletions docs/usage/examples.md
Original file line number Diff line number Diff line change
Expand Up @@ -47,15 +47,44 @@ axes, signal = scan('axes, signal') # NeXus default signal and axes are in the n
```

#### Rules for names in eval/format spec:
- 'filename', 'filepath' - these are always available
- 'name' - returns value of dataset '/entry/group/name'
- 'group_name' - return value of dataset '/entry/group/name'
- 'class_name' - return value of dataset '/entry/group/name' where group has NXclass: class
- 'name@attr' - returns attribute 'attr' associated with dataset 'name'
- '_name' - retrun hdf path of dataset 'name'
- '__name' - return default name of dataset 'name' (used when requesting 'axes' or 'signal'
- 'filename', 'filepath' - these are always available
- '_name' - return hdf path of dataset 'name'
- '__name' - return default name of dataset 'name' (used when requesting 'axes' or 'signal')
- 's_*name*': string representation of dataset (includes units if available)
- '*name*@*attr*': returns attribute of dataset *name*
- '*name*?(*default*)': returns default if *name* doesn't exist
- '(name1|name2|name3)': returns the first available of the names
- '(name1|name2?(default))': returns the first available name or default


#### New in V0.8.1: local variables in eval/format
Additional variables can be assigned to the local namespace accessed during eval or format, either directly accessing
data, or as shorthand for a path or expression.

```python
from hdfmap import NexusLoader

scan = NexusLoader('file.nxs')

# add local data
scan.map.add_local(my_parameter=800.)
monitor = scan.eval('ic1monitor / my_parameter')
# add replacement path
scan.map.add_named_expression(cmd='/entry1/scan_command')
cmd = scan.eval('cmd')
# add short-hand expressions
expr = {
'cmd': 'scan_command',
'normby': 'Transmission/count_time/(ic1monitor/800.)',
}
scan.map.add_named_expression(**expr)
ydata = scan.eval('signal/normby')
```


### formatted strings from metadata
Format strings can also be parsed to obtain data from the hdf files.
Expand Down
4 changes: 2 additions & 2 deletions src/hdfmap/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,8 @@
'set_all_logging_level', 'version_info', 'module_info'
]

__version__ = "0.8.0"
__date__ = "2025/02/07"
__version__ = "0.8.1"
__date__ = "2025/03/10"


def version_info() -> str:
Expand Down
41 changes: 34 additions & 7 deletions src/hdfmap/eval_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,11 @@ def subfun(m):
return re_long_floats.sub(subfun, string)


def is_image(shape: tuple[int]):
"""Return True/False if dataset shape is suitable for image data"""
return len(shape) >= 3 and (shape[-2] - 1) * (shape[-1] - 1) > 1


def dataset2data(dataset: h5py.Dataset, index: int | slice = (), direct_load=False) -> datetime.datetime | str | np.ndarray:
"""
Read the data from a h5py Dataset and convert to either datetime, str or squeezed numpy array
Expand Down Expand Up @@ -261,12 +266,11 @@ def generate_namespace(hdf_file: h5py.File, hdf_namespace: dict[str, str], ident
if name.startswith('_') and name[1:] in hdf_namespace}
hdf_names = {name: generate_identifier(hdf_namespace[name[2:]]) for name in identifiers
if name.startswith('__') and name[2:] in hdf_namespace}
# add extra params
extras = extra_hdf_data(hdf_file)
return {**defaults, **extras, **hdf_paths, **hdf_names, **strings, **namespace}
return {**defaults, **hdf_paths, **hdf_names, **strings, **namespace}


def eval_hdf(hdf_file: h5py.File, expression: str, hdf_namespace: dict[str, str],
data_namespace: dict[str, typing.Any], replace_names: dict[str, str],
default: typing.Any = DEFAULT) -> typing.Any:
"""
Evaluate an expression using the namespace of the hdf file
Expand All @@ -282,23 +286,39 @@ def eval_hdf(hdf_file: h5py.File, expression: str, hdf_namespace: dict[str, str]
- '(name1|name2|name3)': returns the first available of the names
- '(name1|name2?(default))': returns the first available name or default

Additional variables can be added to the evaluation local namespace using data_namespace.

Shorthand variables for expressions can be assigned using replace_names = {'new_name': 'favoitie*expression'}

:param hdf_file: h5py.File object
:param expression: str expression to be evaluated
:param hdf_namespace: dict of {'variable name': '/hdf/dataset/path'}
:param data_namespace: dict of {'variable name': value}
:param replace_names: dict of {'variable_name': expression}
:param default: returned if varname not in namespace
:return: eval(expression)
"""
if not expression.strip(): # don't evaluate empty strings
return expression
if expression in hdf_file: # if expression is a hdf path, just return the data
# replace names with expressions
for name, replacement in replace_names.items():
expression = expression.replace(name, replacement)
# if expression is a hdf path, just return the data
if expression in hdf_file:
return dataset2data(hdf_file[expression])
# raise error if doing something unsafe
check_unsafe_eval(expression)
# get extra data
extra_data = extra_hdf_data(hdf_file)
# find name@attribute in expression
attributes = {
f"attr__{name}_{attr}": dataset_attribute(hdf_file[path], attr)
for name, attr in re_dataset_attributes.findall(expression)
if (path := hdf_namespace.get(name, '')) in hdf_file
}
extra_data.update(attributes)
# add data values
extra_data.update(data_namespace)
# replace name@attribute in expression
expression = re_dataset_attributes.sub(r'attr__\g<1>_\g<2>', expression)
# find values with defaults '..?(..)'
Expand All @@ -311,30 +331,37 @@ def eval_hdf(hdf_file: h5py.File, expression: str, hdf_namespace: dict[str, str]
# find alternate names '(opt1|opt2|opt3)'
for alt_names in re_dataset_alternate.findall(expression): # alt_names = 'opt1|opt2|opt3
names = alt_names.split('|')
name = next((n for n in names if n in hdf_namespace), names[-1]) # first available name or last name
# first available name in data_namespace or hdf_namespace or last name
name = next(
(n for n in names if n in attributes),
next((n for n in names if n in hdf_namespace), names[-1])
)
expression = expression.replace(f"({alt_names})", name) # replace parentheses
# find identifiers matching names in the namespace
identifiers = [name for name in hdf_namespace if name in re_special_characters.split(expression)]
# find other non-builtin identifiers
identifiers += [name for name in find_identifiers(expression) if name not in identifiers]
namespace = generate_namespace(hdf_file, hdf_namespace, identifiers, default)
namespace.update(attributes) # replace attributes
namespace.update(extra_data) # matching names in namespace are replaced by those in extra_data
logger.info(f"Expression: {expression}\nidentifiers: {identifiers}\n")
logger.debug(f"namespace: {namespace}\n")
return eval(expression, GLOBALS, namespace)


def format_hdf(hdf_file: h5py.File, expression: str, hdf_namespace: dict[str, str],
data_namespace: dict[str, typing.Any], replace_names: dict[str, str],
default: typing.Any = DEFAULT) -> str:
"""
Evaluate a formatted string expression using the namespace of the hdf file
:param hdf_file: h5py.File object
:param expression: str expression using {name} format specifiers
:param hdf_namespace: dict of {'variable name': '/hdf/dataset/path'}
:param data_namespace: dict of {'variable name': value}
:param replace_names: dict of {'variable_name': expression}
:param default: returned if varname not in namespace
:return: eval_hdf(f"expression")
"""
expression = 'f"""' + expression + '"""' # convert to fstr
return eval_hdf(hdf_file, expression, hdf_namespace, default)
return eval_hdf(hdf_file, expression, hdf_namespace, data_namespace, replace_names, default)


34 changes: 28 additions & 6 deletions src/hdfmap/hdfmap_class.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,13 @@
from . import load_hdf
from .logging import create_logger
from .eval_functions import (expression_safe_name, extra_hdf_data, eval_hdf,
format_hdf, dataset2data, dataset2str, DEFAULT, SEP, generate_identifier, build_hdf_path)
format_hdf, dataset2data, dataset2str, is_image,
DEFAULT, SEP, generate_identifier, build_hdf_path)


# parameters
LOCAL_NAME = 'local_name' # dataset attribute name for alt_name
IMAGE_DATA = 'image_data' # namespace name for default image data

# logger
logger = create_logger(__name__)
Expand Down Expand Up @@ -134,6 +136,8 @@ class HdfMap:
- map.find_paths('string') -> return list of dataset paths containing string
- map.find_names('string') -> return list of dataset names containing string
- map.find_attr('attr_name') -> return list of paths of groups or datasets containing attribute 'attr_name'
- map.add_local(local_variable=value) -> add to the local namespace accessed by eval
- map.add_named_expression(alternate_name='expression') -> add local variables for expressions replaced during eval
### File Methods
- map.get_metadata(h5py.File) -> returns dict of value datasets
- map.get_scannables(h5py.File) -> returns dict of scannable datasets
Expand All @@ -158,6 +162,8 @@ def __init__(self, file: h5py.File | None = None):
self.scannables = {} # stores array dataset paths with given size, by name
self.combined = {} # stores array and value paths (arrays overwrite values)
self.image_data = {} # stores dataset paths of image data
self._local_data = {} # stores variables and data to be used in eval
self._alternate_names = {} # stores variable names for expressions to be evaluated
self._default_image_path = None

if isinstance(file, h5py.File):
Expand Down Expand Up @@ -264,7 +270,7 @@ def _store_dataset(self, hdf_dataset: h5py.Dataset, hdf_path: str, name: str):
shape=hdf_dataset.shape,
attrs=dict(hdf_dataset.attrs),
)
if hdf_dataset.ndim >= 3:
if is_image(hdf_dataset.shape):
self.image_data[name] = hdf_path
self.image_data[group_name] = hdf_path
self.arrays.update(names)
Expand Down Expand Up @@ -311,16 +317,28 @@ def _populate(self, hdf_group: h5py.Group, root: str = '',
elif isinstance(obj, h5py.Dataset) and not isinstance(link, h5py.SoftLink):
self._store_dataset(obj, hdf_path, name)

def add_local(self, **kwargs):
"""Add value to the local namespace, used in eval"""
self._local_data.update(kwargs)

def add_named_expression(self, **kwargs):
"""Add named expression to the local namespace, used in eval"""
self._alternate_names.update(kwargs)

def populate(self, hdf_file: h5py.File):
"""Populate all datasets from file"""
self.filename = hdf_file.filename
self._local_data.update(extra_hdf_data(hdf_file))
self._populate(hdf_file)
size = self.most_common_size()
self.generate_scannables(size)

def generate_combined(self):
"""Finalise the mapped namespace by combining dataset names"""
self.combined = {**self.values, **self.arrays, **self.scannables}
if self.image_data:
# add default 'image_data'
self.image_data[IMAGE_DATA] = next(iter(self.image_data.values()))
self.combined = {**self.values, **self.arrays, **self.image_data, **self.scannables}

def all_attrs(self) -> dict:
"""Return dict of all attributes in self.datasets and self.groups"""
Expand Down Expand Up @@ -490,7 +508,11 @@ def find_datasets(self, *names_or_classes: str) -> list[str]:

[paths, ] = m.find_datasets('NXslit', 'x_gap')

Intended for use finding datasets assosiated with groups with a certain hierarchy
Intended for use finding datasets associated with groups with a certain hierarchy

Note that arguments are checked against the dataset namespace first, so if the argument appears
in both lists, it will be assumed to be a dataset.

:params names_or_classes: dataset names, group names or group class names
:returns: list of hdf dataset paths
"""
Expand Down Expand Up @@ -826,7 +848,7 @@ def eval(self, hdf_file: h5py.File, expression: str, default=DEFAULT):
:param default: returned if varname not in namespace
:return: eval(expression)
"""
return eval_hdf(hdf_file, expression, self.combined, default)
return eval_hdf(hdf_file, expression, self.combined, self._local_data, self._alternate_names, default)

def format_hdf(self, hdf_file: h5py.File, expression: str, default=DEFAULT) -> str:
"""
Expand All @@ -836,7 +858,7 @@ def format_hdf(self, hdf_file: h5py.File, expression: str, default=DEFAULT) -> s
:param default: returned if varname not in namespace
:return: eval_hdf(f"expression")
"""
return format_hdf(hdf_file, expression, self.combined, default)
return format_hdf(hdf_file, expression, self.combined, self._local_data, self._alternate_names, default)

def create_dataset_summary(self, hdf_file: h5py.File) -> str:
"""Create summary of all datasets in file"""
Expand Down
44 changes: 30 additions & 14 deletions src/hdfmap/nexus.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,17 @@
Nexus Related functions and nexus class
"""

import os
import h5py

from .logging import create_logger
from .hdfmap_class import HdfMap, disp_dict
from .eval_functions import generate_identifier, build_hdf_path
from .hdfmap_class import HdfMap, disp_dict, IMAGE_DATA
from .eval_functions import generate_identifier, build_hdf_path, is_image

NX_CLASS = 'NX_class'
NX_ENTRY = 'NXentry'
NX_DATA = 'NXdata'
NX_DEFINITION = 'definition'
NX_LOCALNAME = 'local_name'
NX_DEFAULT = 'default'
NX_RUN = 'entry_identifier'
Expand Down Expand Up @@ -180,6 +182,7 @@ class NexusMap(HdfMap):
# Special behaviour
nxmap['axes'] -> return path of default axes dataset
nxmap['signal'] -> return path of default signal dataset
nxmap['image_data'] -> return path of first area detector data object
[axes_paths], [signal_paths] = nxmap.nexus_default_paths()
[axes_names], [signal_names] = nxmap.nexus_default_names() # returns default names in nxmap.scannables
"""
Expand Down Expand Up @@ -208,6 +211,12 @@ def info_nexus(self, scannables=True, image_data=True, metadata=False) -> str:
out += f""
return out

def _store_group(self, hdf_group: h5py.Group, path: str, name: str):
super()._store_group(hdf_group, path, name)
if NX_DEFINITION in hdf_group:
definition = hdf_group[NX_DEFINITION].asstr()[()] # e.g. NXmx or NXxas
self.classes[definition].append(path)

def _default_nexus_paths(self, hdf_file):
"""Load Nexus default axes and signal"""
try:
Expand Down Expand Up @@ -293,31 +302,38 @@ def generate_scannables_from_scan_fields_or_nxdata(self, hdf_file: h5py.File):

def generate_image_data_from_nxdetector(self):
"""find the NXdetector group and assign the image data"""
#TODO: add image_data to detector path if data not found
self.image_data = {}
scan_dim = len(self.scannables_shape())
if NX_DETECTOR in self.classes:
for group_path in self.classes[NX_DETECTOR]:
detector_name = generate_identifier(group_path)
# detector data is stored in NXdata in dataset 'data'
data_path = build_hdf_path(group_path, NX_DETECTOR_DATA)
image_data_path = build_hdf_path(group_path, NX_IMAGE_DATA)
logger.debug(f"Looking for image_data at: '{data_path}' or '{image_data_path}'")
if data_path in self.datasets and len(self.datasets[data_path].shape) > scan_dim:
if data_path in self.datasets and is_image(self.datasets[data_path].shape):
logger.info(f"Adding image_data ['{detector_name}'] = '{data_path}'")
self.image_data[detector_name] = data_path
self.arrays[detector_name] = data_path
# also save image_data if available
if image_data_path in self.datasets:
detector_name = f"{detector_name}_image_list"
logger.info(f"Adding image_data ['{detector_name}'] = '{image_data_path}'")
self.image_data[detector_name] = image_data_path
self.arrays[detector_name] = image_data_path
elif image_data_path in self.datasets:
logger.info(f"Adding image_data ['{detector_name}'] = '{image_data_path}'")
self.image_data[detector_name] = image_data_path
self.arrays[detector_name] = image_data_path
else:
# Use first dataset with > 2 dimensions
image_datasets = [
image_dataset = next((
path for name in self.get_group_datasets(group_path)
if len(self.datasets[path := build_hdf_path(group_path, name)].shape) >= 3
]
if image_datasets:
logger.info(f"Adding image_data ['{detector_name}'] = '{image_datasets[0]}'")
self.image_data[detector_name] = image_datasets[0]
if is_image(self.datasets[path := build_hdf_path(group_path, name)].shape)
), False)
if image_dataset:
logger.info(f"Adding image_data ['{detector_name}'] = '{image_dataset}'")
self.image_data[detector_name] = image_dataset
self.arrays[detector_name] = image_dataset

if not self.image_data:
logger.warning("No NXdetector found, image_data not populated!")
Expand Down Expand Up @@ -358,10 +374,10 @@ def populate(self, hdf_file: h5py.File, groups=None, default_entry_only=False):
if not self.datasets:
logger.warning("No datasets found!")

self.generate_scannables_from_scan_fields_or_nxdata(hdf_file)

# find the NXdetector group and assign the image data
self.generate_image_data_from_nxdetector()
# find the scannable arrays and generate self.combined
self.generate_scannables_from_scan_fields_or_nxdata(hdf_file)

def get_plot_data(self, hdf_file: h5py.File):
"""
Expand Down Expand Up @@ -393,7 +409,7 @@ def get_plot_data(self, hdf_file: h5py.File):
signal_units = [self.get_attr(path, NX_UNITS, '') for name, path in signals.items()]
axes_labels = [name + (f" [{unit}]" if unit else '') for name, unit in zip(axes, axes_units)]
signal_labels = [name + (f" [{unit}]" if unit else '') for name, unit in zip(signals, signal_units)]
title = f"{self.filename}\n{self.get_data(hdf_file, NX_TITLE)}"
title = f"{os.path.basename(self.filename)}\n{self.get_data(hdf_file, NX_TITLE)}"

xdata = (
self.get_data(hdf_file, next(iter(axes.values()))).flatten()
Expand Down
Loading