DiamondLightSource · DanPorter · Mar 24, 2025 · Mar 10, 2025 · Mar 24, 2025
diff --git a/docs/index.md b/docs/index.md
@@ -9,6 +9,7 @@ scan = NexusLoader('file.hdf')
 scan('energy')  # --> returns data from '/entry/instrument/monochromator/energy'
 scan('signal')  # --> returns data from default signal, e.g. '/entry/measurement/sum'
 scan('axes')  # --> returns data from default axes, e.g. '/entry/measurement/theta'
+scan('image_data') # --> returns data from default >3D dataset containing image data
 scan.map.get_path('energy')  # -> returns '/entry/instrument/monochromator/energy'
 [data1, data2] = scan.get_data(['dataset_name_1', 'dataset_name_2'])
 data = scan.eval('dataset_name_1 * 100 + 2')

diff --git a/docs/usage/examples.md b/docs/usage/examples.md
@@ -47,15 +47,44 @@ axes, signal = scan('axes, signal') # NeXus default signal and axes are in the n
 ```
 
 #### Rules for names in eval/format spec:
+ - 'filename', 'filepath' - these are always available
  - 'name' - returns value of dataset '/entry/group/name'
  - 'group_name' - return value of dataset '/entry/group/name'
  - 'class_name' - return value of dataset '/entry/group/name' where group has NXclass: class
  - 'name@attr' - returns attribute 'attr' associated with dataset 'name'
- - '_name' - retrun hdf path of dataset 'name'
- - '__name' - return default name of dataset 'name' (used when requesting 'axes' or 'signal'
- - 'filename', 'filepath' - these are always available
+ - '_name' - return hdf path of dataset 'name'
+ - '__name' - return default name of dataset 'name' (used when requesting 'axes' or 'signal')
+ - 's_*name*': string representation of dataset (includes units if available)
+ - '*name*@*attr*': returns attribute of dataset *name*
+ - '*name*?(*default*)': returns default if *name* doesn't exist
+ - '(name1|name2|name3)': returns the first available of the names
+ - '(name1|name2?(default))': returns the first available name or default
 
 
+#### New in V0.8.1: local variables in eval/format
+Additional variables can be assigned to the local namespace accessed during eval or format, either directly accessing
+data, or as shorthand for a path or expression.
+
+```python
+from hdfmap import NexusLoader
+
+scan = NexusLoader('file.nxs')
+
+# add local data
+scan.map.add_local(my_parameter=800.)
+monitor = scan.eval('ic1monitor / my_parameter')
+# add replacement path
+scan.map.add_named_expression(cmd='/entry1/scan_command')
+cmd = scan.eval('cmd')
+# add short-hand expressions
+expr = {
+    'cmd': 'scan_command',
+    'normby': 'Transmission/count_time/(ic1monitor/800.)',
+}
+scan.map.add_named_expression(**expr)
+ydata = scan.eval('signal/normby')
+```
+
 
 ### formatted strings from metadata
 Format strings can also be parsed to obtain data from the hdf files. 

diff --git a/src/hdfmap/__init__.py b/src/hdfmap/__init__.py
@@ -65,8 +65,8 @@
     'set_all_logging_level', 'version_info', 'module_info'
 ]
 
-__version__ = "0.8.0"
-__date__ = "2025/02/07"
+__version__ = "0.8.1"
+__date__ = "2025/03/10"
 
 
 def version_info() -> str:

diff --git a/src/hdfmap/eval_functions.py b/src/hdfmap/eval_functions.py
@@ -91,6 +91,11 @@ def subfun(m):
     return re_long_floats.sub(subfun, string)
 
 
+def is_image(shape: tuple[int]):
+    """Return True/False if dataset shape is suitable for image data"""
+    return len(shape) >= 3 and (shape[-2] - 1) * (shape[-1] - 1) > 1
+
+
 def dataset2data(dataset: h5py.Dataset, index: int | slice = (), direct_load=False) -> datetime.datetime | str | np.ndarray:
     """
     Read the data from a h5py Dataset and convert to either datetime, str or squeezed numpy array
@@ -261,12 +266,11 @@ def generate_namespace(hdf_file: h5py.File, hdf_namespace: dict[str, str], ident
                  if name.startswith('_') and name[1:] in hdf_namespace}
     hdf_names = {name: generate_identifier(hdf_namespace[name[2:]]) for name in identifiers
                  if name.startswith('__') and name[2:] in hdf_namespace}
-    # add extra params
-    extras = extra_hdf_data(hdf_file)
-    return {**defaults, **extras, **hdf_paths, **hdf_names, **strings, **namespace}
+    return {**defaults, **hdf_paths, **hdf_names, **strings, **namespace}
 
 
 def eval_hdf(hdf_file: h5py.File, expression: str, hdf_namespace: dict[str, str],
+             data_namespace: dict[str, typing.Any], replace_names: dict[str, str],
              default: typing.Any = DEFAULT) -> typing.Any:
     """
     Evaluate an expression using the namespace of the hdf file
@@ -282,23 +286,39 @@ def eval_hdf(hdf_file: h5py.File, expression: str, hdf_namespace: dict[str, str]
      - '(name1|name2|name3)': returns the first available of the names
      - '(name1|name2?(default))': returns the first available name or default
 
+    Additional variables can be added to the evaluation local namespace using data_namespace.
+
+    Shorthand variables for expressions can be assigned using replace_names = {'new_name': 'favoitie*expression'}
+
     :param hdf_file: h5py.File object
     :param expression: str expression to be evaluated
     :param hdf_namespace: dict of {'variable name': '/hdf/dataset/path'}
+    :param data_namespace: dict of {'variable name': value}
+    :param replace_names: dict of {'variable_name': expression}
     :param default: returned if varname not in namespace
     :return: eval(expression)
     """
     if not expression.strip():  # don't evaluate empty strings
         return expression
-    if expression in hdf_file:  # if expression is a hdf path, just return the data
+    # replace names with expressions
+    for name, replacement in replace_names.items():
+        expression = expression.replace(name, replacement)
+    # if expression is a hdf path, just return the data
+    if expression in hdf_file:
         return dataset2data(hdf_file[expression])
+    # raise error if doing something unsafe
     check_unsafe_eval(expression)
+    # get extra data
+    extra_data = extra_hdf_data(hdf_file)
     # find name@attribute in expression
     attributes = {
         f"attr__{name}_{attr}": dataset_attribute(hdf_file[path], attr)
         for name, attr in re_dataset_attributes.findall(expression)
         if (path := hdf_namespace.get(name, '')) in hdf_file
     }
+    extra_data.update(attributes)
+    # add data values
+    extra_data.update(data_namespace)
     # replace name@attribute in expression
     expression = re_dataset_attributes.sub(r'attr__\g<1>_\g<2>', expression)
     # find values with defaults '..?(..)'
@@ -311,30 +331,37 @@ def eval_hdf(hdf_file: h5py.File, expression: str, hdf_namespace: dict[str, str]
     # find alternate names '(opt1|opt2|opt3)'
     for alt_names in re_dataset_alternate.findall(expression):  # alt_names = 'opt1|opt2|opt3
         names = alt_names.split('|')
-        name = next((n for n in names if n in hdf_namespace), names[-1])  # first available name or last name
+        # first available name in data_namespace or hdf_namespace or last name
+        name = next(
+            (n for n in names if n in attributes),
+            next((n for n in names if n in hdf_namespace), names[-1])
+        )
         expression = expression.replace(f"({alt_names})", name)  # replace parentheses
     # find identifiers matching names in the namespace
     identifiers = [name for name in hdf_namespace if name in re_special_characters.split(expression)]
     # find other non-builtin identifiers
     identifiers += [name for name in find_identifiers(expression) if name not in identifiers]
     namespace = generate_namespace(hdf_file, hdf_namespace, identifiers, default)
-    namespace.update(attributes)  # replace attributes
+    namespace.update(extra_data)  # matching names in namespace are replaced by those in extra_data
     logger.info(f"Expression: {expression}\nidentifiers: {identifiers}\n")
     logger.debug(f"namespace: {namespace}\n")
     return eval(expression, GLOBALS, namespace)
 
 
 def format_hdf(hdf_file: h5py.File, expression: str, hdf_namespace: dict[str, str],
+               data_namespace: dict[str, typing.Any], replace_names: dict[str, str],
                default: typing.Any = DEFAULT) -> str:
     """
     Evaluate a formatted string expression using the namespace of the hdf file
     :param hdf_file: h5py.File object
     :param expression: str expression using {name} format specifiers
     :param hdf_namespace: dict of {'variable name': '/hdf/dataset/path'}
+    :param data_namespace: dict of {'variable name': value}
+    :param replace_names: dict of {'variable_name': expression}
     :param default: returned if varname not in namespace
     :return: eval_hdf(f"expression")
     """
     expression = 'f"""' + expression + '"""'  # convert to fstr
-    return eval_hdf(hdf_file, expression, hdf_namespace, default)
+    return eval_hdf(hdf_file, expression, hdf_namespace, data_namespace, replace_names, default)
 
 
diff --git a/src/hdfmap/hdfmap_class.py b/src/hdfmap/hdfmap_class.py
@@ -11,11 +11,13 @@
 from . import load_hdf
 from .logging import create_logger
 from .eval_functions import (expression_safe_name, extra_hdf_data, eval_hdf,
-                             format_hdf, dataset2data, dataset2str, DEFAULT, SEP, generate_identifier, build_hdf_path)
+                             format_hdf, dataset2data, dataset2str, is_image,
+                             DEFAULT, SEP, generate_identifier, build_hdf_path)
 
 
 # parameters
 LOCAL_NAME = 'local_name'  # dataset attribute name for alt_name
+IMAGE_DATA = 'image_data'  # namespace name for default image data
 
 # logger
 logger = create_logger(__name__)
@@ -134,6 +136,8 @@ class HdfMap:
     - map.find_paths('string') -> return list of dataset paths containing string
     - map.find_names('string') -> return list of dataset names containing string
     - map.find_attr('attr_name') -> return list of paths of groups or datasets containing attribute 'attr_name'
+    - map.add_local(local_variable=value) -> add to the local namespace accessed by eval
+    - map.add_named_expression(alternate_name='expression') -> add local variables for expressions replaced during eval
     ### File Methods
     - map.get_metadata(h5py.File) -> returns dict of value datasets
     - map.get_scannables(h5py.File) -> returns dict of scannable datasets
@@ -158,6 +162,8 @@ def __init__(self, file: h5py.File | None = None):
         self.scannables = {}  # stores array dataset paths with given size, by name
         self.combined = {}  # stores array and value paths (arrays overwrite values)
         self.image_data = {}  # stores dataset paths of image data
+        self._local_data = {}  # stores variables and data to be used in eval
+        self._alternate_names = {}  # stores variable names for expressions to be evaluated
         self._default_image_path = None
 
         if isinstance(file, h5py.File):
@@ -264,7 +270,7 @@ def _store_dataset(self, hdf_dataset: h5py.Dataset, hdf_path: str, name: str):
             shape=hdf_dataset.shape,
             attrs=dict(hdf_dataset.attrs),
         )
-        if hdf_dataset.ndim >= 3:
+        if is_image(hdf_dataset.shape):
             self.image_data[name] = hdf_path
             self.image_data[group_name] = hdf_path
             self.arrays.update(names)
@@ -311,16 +317,28 @@ def _populate(self, hdf_group: h5py.Group, root: str = '',
             elif isinstance(obj, h5py.Dataset) and not isinstance(link, h5py.SoftLink):
                 self._store_dataset(obj, hdf_path, name)
 
+    def add_local(self, **kwargs):
+        """Add value to the local namespace, used in eval"""
+        self._local_data.update(kwargs)
+
+    def add_named_expression(self, **kwargs):
+        """Add named expression to the local namespace, used in eval"""
+        self._alternate_names.update(kwargs)
+
     def populate(self, hdf_file: h5py.File):
         """Populate all datasets from file"""
         self.filename = hdf_file.filename
+        self._local_data.update(extra_hdf_data(hdf_file))
         self._populate(hdf_file)
         size = self.most_common_size()
         self.generate_scannables(size)
 
     def generate_combined(self):
         """Finalise the mapped namespace by combining dataset names"""
-        self.combined = {**self.values, **self.arrays, **self.scannables}
+        if self.image_data:
+            # add default 'image_data'
+            self.image_data[IMAGE_DATA] = next(iter(self.image_data.values()))
+        self.combined = {**self.values, **self.arrays, **self.image_data, **self.scannables}
 
     def all_attrs(self) -> dict:
         """Return dict of all attributes in self.datasets and self.groups"""
@@ -490,7 +508,11 @@ def find_datasets(self, *names_or_classes: str) -> list[str]:
 
             [paths, ] = m.find_datasets('NXslit', 'x_gap')
 
-        Intended for use finding datasets assosiated with groups with a certain hierarchy
+        Intended for use finding datasets associated with groups with a certain hierarchy
+
+        Note that arguments are checked against the dataset namespace first, so if the argument appears
+        in both lists, it will be assumed to be a dataset.
+
         :params names_or_classes:  dataset names, group names or group class names
         :returns: list of hdf dataset paths
         """
@@ -826,7 +848,7 @@ def eval(self, hdf_file: h5py.File, expression: str, default=DEFAULT):
         :param default: returned if varname not in namespace
         :return: eval(expression)
         """
-        return eval_hdf(hdf_file, expression, self.combined, default)
+        return eval_hdf(hdf_file, expression, self.combined, self._local_data, self._alternate_names, default)
 
     def format_hdf(self, hdf_file: h5py.File, expression: str, default=DEFAULT) -> str:
         """
@@ -836,7 +858,7 @@ def format_hdf(self, hdf_file: h5py.File, expression: str, default=DEFAULT) -> s
         :param default: returned if varname not in namespace
         :return: eval_hdf(f"expression")
         """
-        return format_hdf(hdf_file, expression, self.combined, default)
+        return format_hdf(hdf_file, expression, self.combined, self._local_data, self._alternate_names, default)
 
     def create_dataset_summary(self, hdf_file: h5py.File) -> str:
         """Create summary of all datasets in file"""

diff --git a/src/hdfmap/nexus.py b/src/hdfmap/nexus.py
@@ -2,15 +2,17 @@
 Nexus Related functions and nexus class
 """
 
+import os
 import h5py
 
 from .logging import create_logger
-from .hdfmap_class import HdfMap, disp_dict
-from .eval_functions import generate_identifier, build_hdf_path
+from .hdfmap_class import HdfMap, disp_dict, IMAGE_DATA
+from .eval_functions import generate_identifier, build_hdf_path, is_image
 
 NX_CLASS = 'NX_class'
 NX_ENTRY = 'NXentry'
 NX_DATA = 'NXdata'
+NX_DEFINITION = 'definition'
 NX_LOCALNAME = 'local_name'
 NX_DEFAULT = 'default'
 NX_RUN = 'entry_identifier'
@@ -180,6 +182,7 @@ class NexusMap(HdfMap):
     # Special behaviour
     nxmap['axes'] -> return path of default axes dataset
     nxmap['signal'] -> return path of default signal dataset
+    nxmap['image_data'] -> return path of first area detector data object
     [axes_paths], [signal_paths] = nxmap.nexus_default_paths()
     [axes_names], [signal_names] = nxmap.nexus_default_names()  # returns default names in nxmap.scannables
     """
@@ -208,6 +211,12 @@ def info_nexus(self, scannables=True, image_data=True, metadata=False) -> str:
         out += f""
         return out
 
+    def _store_group(self, hdf_group: h5py.Group, path: str, name: str):
+        super()._store_group(hdf_group, path, name)
+        if NX_DEFINITION in hdf_group:
+            definition = hdf_group[NX_DEFINITION].asstr()[()]  # e.g. NXmx or NXxas
+            self.classes[definition].append(path)
+
     def _default_nexus_paths(self, hdf_file):
         """Load Nexus default axes and signal"""
         try:
@@ -293,31 +302,38 @@ def generate_scannables_from_scan_fields_or_nxdata(self, hdf_file: h5py.File):
 
     def generate_image_data_from_nxdetector(self):
         """find the NXdetector group and assign the image data"""
-        #TODO: add image_data to detector path if data not found
         self.image_data = {}
-        scan_dim = len(self.scannables_shape())
         if NX_DETECTOR in self.classes:
             for group_path in self.classes[NX_DETECTOR]:
                 detector_name = generate_identifier(group_path)
                 # detector data is stored in NXdata in dataset 'data'
                 data_path = build_hdf_path(group_path, NX_DETECTOR_DATA)
                 image_data_path = build_hdf_path(group_path, NX_IMAGE_DATA)
                 logger.debug(f"Looking for image_data at: '{data_path}' or '{image_data_path}'")
-                if data_path in self.datasets and len(self.datasets[data_path].shape) > scan_dim:
+                if data_path in self.datasets and is_image(self.datasets[data_path].shape):
                     logger.info(f"Adding image_data ['{detector_name}'] = '{data_path}'")
                     self.image_data[detector_name] = data_path
+                    self.arrays[detector_name] = data_path
+                    # also save image_data if available
+                    if image_data_path in self.datasets:
+                        detector_name = f"{detector_name}_image_list"
+                        logger.info(f"Adding image_data ['{detector_name}'] = '{image_data_path}'")
+                        self.image_data[detector_name] = image_data_path
+                        self.arrays[detector_name] = image_data_path
                 elif image_data_path in self.datasets:
                     logger.info(f"Adding image_data ['{detector_name}'] = '{image_data_path}'")
                     self.image_data[detector_name] = image_data_path
+                    self.arrays[detector_name] = image_data_path
                 else:
                     # Use first dataset with > 2 dimensions
-                    image_datasets = [
+                    image_dataset = next((
                         path for name in self.get_group_datasets(group_path)
-                        if len(self.datasets[path := build_hdf_path(group_path, name)].shape) >= 3
-                    ]
-                    if image_datasets:
-                        logger.info(f"Adding image_data ['{detector_name}'] = '{image_datasets[0]}'")
-                        self.image_data[detector_name] = image_datasets[0]
+                        if is_image(self.datasets[path := build_hdf_path(group_path, name)].shape)
+                    ), False)
+                    if image_dataset:
+                        logger.info(f"Adding image_data ['{detector_name}'] = '{image_dataset}'")
+                        self.image_data[detector_name] = image_dataset
+                        self.arrays[detector_name] = image_dataset
 
         if not self.image_data:
             logger.warning("No NXdetector found, image_data not populated!")
@@ -358,10 +374,10 @@ def populate(self, hdf_file: h5py.File, groups=None, default_entry_only=False):
         if not self.datasets:
             logger.warning("No datasets found!")
 
-        self.generate_scannables_from_scan_fields_or_nxdata(hdf_file)
-
         # find the NXdetector group and assign the image data
         self.generate_image_data_from_nxdetector()
+        # find the scannable arrays and generate self.combined
+        self.generate_scannables_from_scan_fields_or_nxdata(hdf_file)
 
     def get_plot_data(self, hdf_file: h5py.File):
         """
@@ -393,7 +409,7 @@ def get_plot_data(self, hdf_file: h5py.File):
         signal_units = [self.get_attr(path, NX_UNITS, '') for name, path in signals.items()]
         axes_labels = [name + (f" [{unit}]" if unit else '') for name, unit in zip(axes, axes_units)]
         signal_labels = [name + (f" [{unit}]" if unit else '') for name, unit in zip(signals, signal_units)]
-        title = f"{self.filename}\n{self.get_data(hdf_file, NX_TITLE)}"
+        title = f"{os.path.basename(self.filename)}\n{self.get_data(hdf_file, NX_TITLE)}"
 
         xdata = (
             self.get_data(hdf_file, next(iter(axes.values()))).flatten()