feat: support series files in SeriesFolder format and deprecate holo_kw

RI-imaging · Mar 24, 2022 · 9903cbc · 9903cbc
1 parent b16958d
commit 9903cbc
Show file tree

Hide file tree

Showing 14 changed files with 133 additions and 97 deletions.
diff --git a/CHANGELOG b/CHANGELOG
@@ -1,8 +1,10 @@
 0.13.0
  - BREAKING CHANGE: renamed all file format classes
  - feat: add file format class for QLSI data
- - setup: bump qpimage from 0.7.7 to 0.8.0
+ - feat: support series files in SeriesFolder format
+ - setup: bump qpimage from 0.7.7 to 0.8.3
  - ref: simplify submodule structure and detection of file formats
+ - ref: deprecate `holo_kw`
 0.12.1
  - maintenance release
 0.12.0

diff --git a/qpformat/core.py b/qpformat/core.py
@@ -18,7 +18,8 @@ def guess_format(path):
 
 
 def load_data(path, fmt=None, bg_data=None, bg_fmt=None,
-              meta_data=None, holo_kw=None, as_type="float32"):
+              meta_data=None, holo_kw=None, qpretrieve_kw=None,
+              as_type="float32"):
     """Load experimental data
 
     Parameters
@@ -33,12 +34,15 @@ def load_data(path, fmt=None, bg_data=None, bg_fmt=None,
     bg_fmt: str
         The file format to use (see `file_formats.formats`)
         for the background. If set to `None`, the file format
-        is be guessed.
+        is guessed.
     meta_data: dict
         Meta data (see `qpimage.meta.META_KEYS`)
     holo_kw: dict
-        Keyword arguments for hologram data; See
-        :func:`qpimage.holo.get_field` for valid keyword arguments.
+        Deprecated, please use `qpretrieve_kw` instead!
+    qpretrieve_kw: dict
+        Keyword arguments passed to
+        :ref:`qpretrieve <qpretrieve:index>` for
+        phase retrieval from interferometric data.
     as_type: str
         Defines the data type that the input data is casted to.
         The default is "float32" which saves memory. If high
@@ -51,8 +55,6 @@ def load_data(path, fmt=None, bg_data=None, bg_fmt=None,
     dataobj: SeriesData or SingleData
         Object that gives lazy access to the experimental data.
     """
-    if holo_kw is None:
-        holo_kw = {}
     if meta_data is None:
         meta_data = {}
     path = pathlib.Path(path).resolve()
@@ -77,6 +79,7 @@ def load_data(path, fmt=None, bg_data=None, bg_fmt=None,
     dataobj = formats_dict[fmt](path=path,
                                 meta_data=meta_data,
                                 holo_kw=holo_kw,
+                                qpretrieve_kw=qpretrieve_kw,
                                 as_type=as_type)
 
     if bg_data is not None:
@@ -91,6 +94,7 @@ def load_data(path, fmt=None, bg_data=None, bg_fmt=None,
                 bgobj = formats_dict[bg_fmt](path=bg_path,
                                              meta_data=meta_data,
                                              holo_kw=holo_kw,
+                                             qpretrieve_kw=qpretrieve_kw,
                                              as_type=as_type)
                 dataobj.set_bg(bgobj)
 

diff --git a/qpformat/file_formats/fmt_series_folder.py b/qpformat/file_formats/fmt_series_folder.py
@@ -25,14 +25,14 @@ def __init__(self, *args, **kwargs):
         super(SeriesFolder, self).__init__(*args, **kwargs)
         self._files = None
         self._formats = None
-        self._dataset = None
-
+        self._series = None
         self.format_dict = get_format_dict()
 
+    @lru_cache()
     def __len__(self):
-        return len(self.files)
+        return len(self._get_sub_image_mapping())
 
-    @lru_cache(maxsize=32)
+    @lru_cache()
     def _get_cropped_file_names(self):
         """self.files with common path prefix/suffix removed"""
         files = [ff.name for ff in self.files]
@@ -41,26 +41,29 @@ def _get_cropped_file_names(self):
         cropped = [f[len(prefix):-len(suffix)] for f in files]
         return cropped
 
-    def _get_dataset(self, idx):
-        if self._dataset is None:
-            self._dataset = [None] * len(self)
-        if self._dataset[idx] is None:
-            format_class = self.format_dict[self._formats[idx]]
-            self._dataset[idx] = format_class(path=self._files[idx],
-                                              meta_data=self.meta_data,
-                                              as_type=self.as_type,
-                                              holo_kw=self.holo_kw)
-        if len(self._dataset[idx]) != 1:
-            msg = "Multiple images per file are not supported in the " \
-                  + "SeriesFolder file format! Besides the fact that it " \
-                  + "would add unnecessary complexity, it is also really a " \
-                  + "bad idea to do this. Please restructure your " \
-                  + "workflow accordingly. The offending file is " \
-                  + "'{}'.".format(self.files[idx])
-            raise NotImplementedError(msg)
-        return self._dataset[idx]
-
-    @lru_cache(maxsize=32)
+    def _get_series_from_file(self, file_idx):
+        if self._series is None:
+            self._series = [None] * len(self.files)
+        if self._series[file_idx] is None:
+            format_class = self.format_dict[self._formats[file_idx]]
+            self._series[file_idx] = format_class(
+                path=self._files[file_idx],
+                meta_data=self.meta_data,
+                as_type=self.as_type,
+                qpretrieve_kw=self.qpretrieve_kw)
+        return self._series[file_idx]
+
+    @lru_cache()
+    def _get_sub_image_mapping(self):
+        mapping = []
+        for file_idx in range(len(self.files)):
+            ds = self._get_series_from_file(file_idx)
+            for jj in range(len(ds)):
+                # index of file, index of image in file
+                mapping.append((file_idx, jj))
+        return mapping
+
+    @lru_cache()
     def _identifier_data(self):
         """Return a unique identifier for the folder data"""
         # Use only file names
@@ -79,6 +82,9 @@ def _search_files(path):
 
         .. versionchanged:: 0.6.0
             `path` is not searched recursively anymore
+
+        .. versionchanged:: 0.13.0
+            series file formats are now supported
         """
         path = pathlib.Path(path)
         fifo = []
@@ -87,31 +93,33 @@ def _search_files(path):
             if fp.is_dir():
                 continue
             for fmt in get_format_classes():
-                # series data is not supported in SeriesFolder
-                if not fmt.is_series and fmt.verify(fp):
+                if fmt.verify(fp):
                     fifo.append((fp, fmt.__name__))
                     break
 
-        # ignore qpimage formats if multiple formats were
+        # Ignore qpimage formats if multiple formats were
         # detected.
         theformats = [ff[1] for ff in fifo]
         formset = set(theformats)
         if len(formset) > 1:
             fmts_qpimage = ["SinglePhaseQpimageHDF5",
                             "SeriesPhaseQpimageHDF5"]
             fifo = [ff for ff in fifo if ff[1] not in fmts_qpimage]
-        # ignore raw tif files if single_tif_phasics is detected
+
+        # Ignore raw tif files if SinglePhasePhasicsTif is detected
         if len(formset) > 1 and "SinglePhasePhasicsTif" in theformats:
             fmts_badtif = "SingleRawOAHTif"
             fifo = [ff for ff in fifo if ff[1] not in fmts_badtif]
-        # otherwise, prevent multiple file formats
+
+        # Otherwise, prevent multiple file formats in one directory.
         theformats2 = [ff[1] for ff in fifo]
         formset2 = set(theformats2)
         if len(formset2) > 1:
             msg = "Qpformat does not support multiple different file " \
                   + "formats within one directory: {}".format(formset2)
             raise MultipleFormatsNotSupportedError(msg)
-        # sort the lists
+
+        # Finally, sort the list.
         fifo = sorted(fifo)
         return fifo
 
@@ -127,7 +135,7 @@ def files(self):
     @property
     def storage_type(self):
         """The storage type depends on the wrapped file format"""
-        ds = self._get_dataset(0)
+        ds = self._get_series_from_file(0)
         return ds.storage_type
 
     def get_identifier(self, idx):
@@ -136,24 +144,28 @@ def get_identifier(self, idx):
         .. versionchanged:: 0.4.2
             indexing starts at 1 instead of 0
         """
-        name = self._get_cropped_file_names()[idx]
-        return "{}:{}:{}".format(self.identifier, name, idx + 1)
+        file_idx, jj = self._get_sub_image_mapping()[idx]
+        name = self._get_cropped_file_names()[file_idx]
+        return f"{self.identifier}:{name}:{jj}:{idx}"
 
     def get_name(self, idx):
         """Return name of data at index `idx`
 
         .. versionadded:: 0.4.2
         """
-        return "{}".format(self.path / self.files[idx])
+        file_idx, jj = self._get_sub_image_mapping()[idx]
+        return f"{self.path / self.files[file_idx]}:{jj}"
 
     def get_time(self, idx):
-        ds = self._get_dataset(idx)
-        return ds.get_time()
+        file_idx, jj = self._get_sub_image_mapping()[idx]
+        ds = self._get_series_from_file(file_idx)
+        return ds.get_time(jj)
 
     def get_qpimage_raw(self, idx):
         """Return QPImage without background correction"""
-        ds = self._get_dataset(idx)
-        qpi = ds.get_qpimage_raw()
+        file_idx, jj = self._get_sub_image_mapping()[idx]
+        ds = self._get_series_from_file(file_idx)
+        qpi = ds.get_qpimage_raw(jj)
         qpi["identifier"] = self.get_identifier(idx)
         return qpi
 

diff --git a/qpformat/file_formats/fmts_raw_oah/series_raw_oah_hyperspy_hdf5.py b/qpformat/file_formats/fmts_raw_oah/series_raw_oah_hyperspy_hdf5.py
@@ -92,7 +92,7 @@ def get_qpimage_raw(self, idx=0):
         qpi = qpimage.QPImage(data=data,
                               which_data="raw-oah",
                               meta_data=meta_data,
-                              holo_kw=self.holo_kw,
+                              qpretrieve_kw=self.qpretrieve_kw,
                               h5dtype=self.as_type)
         # set identifier
         qpi["identifier"] = self.get_identifier(idx)

diff --git a/qpformat/file_formats/fmts_raw_oah/series_raw_oah_qpformat_hdf5.py b/qpformat/file_formats/fmts_raw_oah/series_raw_oah_qpformat_hdf5.py
@@ -44,7 +44,7 @@ def get_qpimage_raw(self, idx):
         qpi = qpimage.QPImage(data=data,
                               which_data="raw-oah",
                               meta_data=meta_data,
-                              holo_kw=self.holo_kw,
+                              qpretrieve_kw=self.qpretrieve_kw,
                               h5dtype=self.as_type)
         # set identifier
         qpi["identifier"] = self.get_identifier(idx)

diff --git a/qpformat/file_formats/fmts_raw_oah/series_raw_oah_tif_zip.py b/qpformat/file_formats/fmts_raw_oah/series_raw_oah_tif_zip.py
@@ -34,10 +34,11 @@ def _get_dataset(self, idx):
             zf = zipfile.ZipFile(self.path)
             pt = zf.open(self.files[idx])
             fd = io.BytesIO(pt.read())
-            self._dataset[idx] = SingleRawOAHTif(path=fd,
-                                                 meta_data=self.meta_data,
-                                                 as_type=self.as_type,
-                                                 holo_kw=self.holo_kw)
+            self._dataset[idx] = SingleRawOAHTif(
+                path=fd,
+                meta_data=self.meta_data,
+                as_type=self.as_type,
+                qpretrieve_kw=self.qpretrieve_kw)
         return self._dataset[idx]
 
     @staticmethod

diff --git a/qpformat/file_formats/fmts_raw_oah/single_raw_oah_qpformat_hdf5.py b/qpformat/file_formats/fmts_raw_oah/single_raw_oah_qpformat_hdf5.py
@@ -38,7 +38,7 @@ def get_qpimage_raw(self, idx=0):
         qpi = qpimage.QPImage(data=holo,
                               which_data="raw-oah",
                               meta_data=meta_data,
-                              holo_kw=self.holo_kw,
+                              qpretrieve_kw=self.qpretrieve_kw,
                               h5dtype=self.as_type)
         # set identifier
         qpi["identifier"] = self.get_identifier()

diff --git a/qpformat/file_formats/fmts_raw_oah/single_raw_oah_tif.py b/qpformat/file_formats/fmts_raw_oah/single_raw_oah_tif.py
@@ -46,7 +46,7 @@ def get_qpimage_raw(self, idx=0):
         qpi = qpimage.QPImage(data=holo,
                               which_data="raw-oah",
                               meta_data=meta_data,
-                              holo_kw=self.holo_kw,
+                              qpretrieve_kw=self.qpretrieve_kw,
                               h5dtype=self.as_type)
         # set identifier
         qpi["identifier"] = self.get_identifier()

diff --git a/qpformat/file_formats/fmts_raw_qlsi/series_raw_qlsi_qpformat_hdf5.py b/qpformat/file_formats/fmts_raw_qlsi/series_raw_qlsi_qpformat_hdf5.py
@@ -44,7 +44,7 @@ def get_qpimage_raw(self, idx):
         qpi = qpimage.QPImage(data=data,
                               which_data="raw-qlsi",
                               meta_data=meta_data,
-                              holo_kw=self.holo_kw,
+                              qpretrieve_kw=self.qpretrieve_kw,
                               h5dtype=self.as_type)
         # set identifier
         qpi["identifier"] = self.get_identifier(idx)

diff --git a/qpformat/file_formats/fmts_raw_qlsi/single_raw_qlsi_qpformat_hdf5.py b/qpformat/file_formats/fmts_raw_qlsi/single_raw_qlsi_qpformat_hdf5.py
@@ -38,7 +38,7 @@ def get_qpimage_raw(self, idx=0):
         qpi = qpimage.QPImage(data=holo,
                               which_data="raw-qlsi",
                               meta_data=meta_data,
-                              holo_kw=self.holo_kw,
+                              qpretrieve_kw=self.qpretrieve_kw,
                               h5dtype=self.as_type)
         # set identifier
         qpi["identifier"] = self.get_identifier()

diff --git a/qpformat/file_formats/series_base.py b/qpformat/file_formats/series_base.py
@@ -3,6 +3,7 @@
 import functools
 import io
 import pathlib
+import warnings
 
 import numpy as np
 import qpimage
@@ -17,7 +18,8 @@ class SeriesData(object):
     is_series = True
     priority = 0  # decrease to get higher priority
 
-    def __init__(self, path, meta_data=None, holo_kw=None, as_type="float32"):
+    def __init__(self, path, meta_data=None, holo_kw=None, qpretrieve_kw=None,
+                 as_type="float32"):
         """
         Parameters
         ----------
@@ -26,18 +28,43 @@ def __init__(self, path, meta_data=None, holo_kw=None, as_type="float32"):
         meta_data: dict
             Dictionary containing meta data.
             see :py:class:`qpimage.META_KEYS`.
+        holo_kw: dict
+            Deprecated, please use `qpretrieve_kw` instead!
+        qpretrieve_kw: dict
+            Keyword arguments passed to
+            :ref:`qpretrieve <qpretrieve:index>` for
+            phase retrieval from interferometric data.
         as_type: str
             Defines the data type that the input data is casted to.
             The default is "float32" which saves memory. If high
             numerical accuracy is required (does not apply for a
             simple 2D phase analysis), set this to double precision
             ("float64").
         """
-        #: Enforced dtype via keyword arguments
-        if holo_kw is None:
-            holo_kw = {}
+        if qpretrieve_kw is None:
+            qpretrieve_kw = {}
+
+        if holo_kw is not None:
+            warnings.warn(
+                "`holo_kw` is deprecated! Please use `qpretrieve_kw` instead",
+                DeprecationWarning)
+            # map deprecated parameters to `qpretrieve_kw`
+            for key in holo_kw:
+                if key == "sideband":
+                    if holo_kw[key] in [-1, 1]:
+                        qpretrieve_kw["invert_phase"] = holo_kw[key] == -1
+                    else:
+                        qpretrieve_kw["sideband_freq"] = holo_kw[key]
+                        qpretrieve_kw["invert_phase"] = False
+                elif key == "zero_pad":
+                    qpretrieve_kw["padding"] = holo_kw["zero_pad"]
+                else:
+                    qpretrieve_kw[key] = holo_kw[key]
+
         if meta_data is None:
             meta_data = {}
+
+        #: Enforced dtype via keyword arguments
         self.as_type = as_type
         if isinstance(path, io.IOBase):
             # io.IOBase
@@ -54,9 +81,8 @@ def __init__(self, path, meta_data=None, holo_kw=None, as_type="float32"):
                 raise ValueError(msg)
         #: Enforced metadata via keyword arguments
         self.meta_data = copy.copy(meta_data)
-        #: Hologram retrieval; keyword arguments for
-        #: :func:`qpimage.holo.get_field`.
-        self.holo_kw = holo_kw
+        #: Keyword arguments for interferometric phase retrieval
+        self.qpretrieve_kw = qpretrieve_kw
         self._bgdata = []
         #: Unique string that identifies the background data that
         #: was set using `set_bg`.
@@ -142,10 +168,9 @@ def _identifier_meta(self):
         for key in sorted(list(self.meta_data.keys())):
             value = self.meta_data[key]
             data.append("{}={}".format(key, value))
-        # hologram info
-        for key in sorted(list(self.holo_kw.keys())):
-            value = self.holo_kw[key]
-            data.append("{}={}".format(key, value))
+        # qpretrieve keywords
+        for key in sorted(list(self.qpretrieve_kw.keys())):
+            data.append(f"{key}={self.qpretrieve_kw[key]}")
         return hash_obj(data)
 
     @property

diff --git a/setup.py b/setup.py
@@ -25,7 +25,7 @@
     long_description=open('README.rst').read() if exists('README.rst') else '',
     install_requires=["h5py>=2.7.0",
                       "numpy>=1.12.0",
-                      "qpimage>=0.8.0",
+                      "qpimage>=0.8.3",
                       "tifffile>=2020.5.25",
                       ],
     python_requires='>=3.9, <4',