Skip to content

Commit

Permalink
feat: add 'storage_type' property describing the type of data stored …
Browse files Browse the repository at this point in the history
…in a dataset
  • Loading branch information
paulmueller committed Mar 8, 2018
1 parent 853c103 commit a680f5b
Show file tree
Hide file tree
Showing 12 changed files with 113 additions and 24 deletions.
24 changes: 13 additions & 11 deletions CHANGELOG
Original file line number Diff line number Diff line change
@@ -1,17 +1,19 @@
0.1.4
- add hologram file formats: HyperSpy and tif-based
- use hologram keyword arguments to generate dataset identifer
- feat: add "storage_type" property describing which type of data
is stored originally in a dataset
- feat: add hologram file formats: HyperSpy and tif-based
- fix: use hologram keyword arguments to generate dataset identifer
0.1.3
- format series and single hdf5: override raw meta data
- include background data in determination of data set identifiers
- save memory by hard-linking background image data in QPSeries
- feat: save memory by hard-linking background image data in QPSeries
- fix: format series and single hdf5: override raw meta data
- fix: include background data in determination of data set identifiers
0.1.2
- implement SeriesData.saveh5 (export as qpimage.QPSeries) (#2)
- add unique part of file name to SeriesFolder image identifiers (#2)
- change API for SingleData ("idx=0" for user convenience)
- extract identifiers from hdf5 files
- feat: change API for SingleData ("idx=0" for user convenience)
- feat: implement SeriesData.saveh5 (export as qpimage.QPSeries) (#2)
- feat: add unique part of file name to SeriesFolder image identifiers (#2)
- feat: extract identifiers from hdf5 files
0.1.1
- support pathlib on high level
- add SeriesData.identifier
- feat: support pathlib
- feat: add SeriesData.identifier
0.1.0
- initial release
1 change: 1 addition & 0 deletions qpformat/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
from ._version import version as __version__ # noqa: F401
from .core import load_data # noqa: F401
from . import file_formats # noqa: F401
21 changes: 21 additions & 0 deletions qpformat/file_formats/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,16 @@
from .single_npy_numpy import SingleNpyNumpy
from .single_tif_holo import SingleTifHolo
from .single_tif_phasics import SingleTifPhasics
from flake8.style_guide import lru_cache


class MultipleFormatsNotSupportedError(BaseException):
pass


class SeriesFolder(SeriesData):
# storage_type is implemented as a property

def __init__(self, *args, **kwargs):
super(SeriesFolder, self).__init__(*args, **kwargs)
self._files = None
Expand Down Expand Up @@ -58,6 +65,7 @@ def _identifier_data(self):
return hash_obj(data)

@staticmethod
@lru_cache(maxsize=32)
def _search_files(path):
fifo = []
for root, _dirs, files in os.walk(path):
Expand All @@ -66,13 +74,21 @@ def _search_files(path):
for fmt in formats:
if fmt.verify(fp):
fifo.append((fp, fmt.__name__))
break
# ignore qpimage formats if multiple formats were
# detected.
theformats = [ff[1] for ff in fifo]
formset = set(theformats)
if len(formset) > 1:
fmts_qpimage = ["SingleHdf5Qpimage", "SeriesHdf5Qpimage"]
fifo = [ff for ff in fifo if ff[1] not in fmts_qpimage]
# otherwise, prevent multiple file formats
theformats2 = [ff[1] for ff in fifo]
formset2 = set(theformats2)
if len(formset) > 1:
msg = "Qpformat does not support multiple different file " \
+ "formats within one directory: {}".format(formset2)
raise MultipleFormatsNotSupportedError(msg)
# sort the lists
fifo = sorted(fifo)
return fifo
Expand All @@ -85,6 +101,11 @@ def files(self):
self._formats = [ff[1] for ff in fifo]
return self._files

@property
def storage_type(self):
ds = self._get_dataset(0)
return ds.storage_type

def get_identifier(self, idx):
"""Return an identifier for the data at index `idx`"""
name = self._get_cropped_file_names()[idx]
Expand Down
1 change: 1 addition & 0 deletions qpformat/file_formats/series_hdf5_hyperspy.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ class SeriesHdf5HyperSpy(SeriesData):
hyperspy.io_plugins.hspy
"""
storage_type = "hologram"

def __len__(self):
return len(self._get_experiments())
Expand Down
2 changes: 2 additions & 0 deletions qpformat/file_formats/series_hdf5_qpimage.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@


class SeriesHdf5Qpimage(SeriesData):
storage_type = "phase,amplitude"

def __init__(self, *args, **kwargs):
super(SeriesHdf5Qpimage, self).__init__(*args, **kwargs)
self._qpseries = qpimage.QPSeries(h5file=self.path,
Expand Down
2 changes: 2 additions & 0 deletions qpformat/file_formats/series_zip_tif_phasics.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@


class SeriesZipTifPhasics(SeriesData):
storage_type = "phase,intensity"

def __init__(self, *args, **kwargs):
super(SeriesZipTifPhasics, self).__init__(*args, **kwargs)
self._files = None
Expand Down
2 changes: 2 additions & 0 deletions qpformat/file_formats/single_hdf5_qpimage.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@


class SingleHdf5Qpimage(SingleData):
storage_type = "phase,amplitude"

@property
def identifier(self):
with qpimage.QPImage(h5file=self.path, h5mode="r") as qpi:
Expand Down
18 changes: 13 additions & 5 deletions qpformat/file_formats/single_npy_numpy.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import qpimage

from .dataset import SingleData
from flake8.style_guide import lru_cache


class SingleNpyNumpy(SingleData):
Expand All @@ -13,18 +14,25 @@ class SingleNpyNumpy(SingleData):
2D ndarray (no pickled objects). The ndarray is either
complex-valued (scattered field) or real-valued (phase).
"""
# storage type is implemented as a property

@property
@lru_cache(maxsize=32)
def storage_type(self):
nf = np.load(self.path, mmap_mode="c", allow_pickle=False)
if np.iscomplexobj(nf):
st = "field"
else:
st = "phase"
return st

def get_qpimage_raw(self, idx=0):
"""Return QPImage without background correction"""
# Load experimental data
nf = np.load(self.path, mmap_mode="c", allow_pickle=False)
if np.iscomplexobj(nf):
which_data = "field"
else:
which_data = "phase"
meta_data = copy.copy(self.meta_data)
qpi = qpimage.QPImage(data=nf,
which_data=which_data,
which_data=self.storage_type,
meta_data=meta_data)
return qpi

Expand Down
2 changes: 2 additions & 0 deletions qpformat/file_formats/single_tif_holo.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@

class SingleTifHolo(SingleData):
"""DataSet for single hologram images"""
storage_type = "hologram"

@staticmethod
def _get_tif(path):
if not isinstance(path, str):
Expand Down
2 changes: 2 additions & 0 deletions qpformat/file_formats/single_tif_phasics.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ class LoadTifPhasicsError(BaseException):


class SingleTifPhasics(SingleData):
storage_type = "phase,intensity"

def __init__(self, path, meta_data={}, *args, **kwargs):
"""DataSet for single "SID PHA*.tif" files by Phasics S.A.
Expand Down
12 changes: 4 additions & 8 deletions tests/test_single_tif_phasics.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,26 +38,22 @@
extratags=extratags,
append=True)
"""

from os.path import abspath, dirname, join
import sys
import pathlib

import numpy as np

# Add parent directory to beginning of path variable
sys.path.insert(0, dirname(dirname(abspath(__file__))))
import qpformat # noqa: E402
import qpformat


def test_load_data():
path = join(dirname(abspath(__file__)), "data/single_phasics.tif")
path = pathlib.Path(__file__).parent / "data" / "single_phasics.tif"
ds = qpformat.load_data(path)
assert ds.path == path
assert "SingleTifPhasics" in ds.__repr__()


def test_data_content():
path = join(dirname(abspath(__file__)), "data/single_phasics.tif")
path = pathlib.Path(__file__).parent / "data" / "single_phasics.tif"
ds = qpformat.load_data(path)
assert ds.get_time() == 1461951095.00827
qpi = ds.get_qpimage()
Expand Down
50 changes: 50 additions & 0 deletions tests/test_storage_type.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
import pathlib
import tempfile
import shutil

import qpformat


def test_phasics():
path = pathlib.Path(__file__).parent / "data" / "single_phasics.tif"
ds = qpformat.load_data(path)
assert ds.storage_type == "phase,intensity"


def test_phasics_zip():
path = pathlib.Path(__file__).parent / "data" / "series_phasics.zip"
ds = qpformat.load_data(path)
assert ds.storage_type == "phase,intensity"


def test_qpimage():
path = pathlib.Path(__file__).parent / "data" / "single_qpimage.h5"
ds = qpformat.load_data(path)
assert ds.storage_type == "phase,amplitude"


def test_bad_folder():
path = pathlib.Path(__file__).parent / "data"
try:
qpformat.load_data(path)
except qpformat.file_formats.MultipleFormatsNotSupportedError:
pass
else:
raise ValueError("Multiple formats not supported")


def test_good_folder():
path = pathlib.Path(__file__).parent / "data"
dpath = pathlib.Path(tempfile.mkdtemp(prefix="qpformat_test_"))
shutil.copy(str(path / "single_qpimage.h5"), str(dpath / "1.h5"))
shutil.copy(str(path / "single_qpimage.h5"), str(dpath / "2.h5"))
ds = qpformat.load_data(dpath)
assert ds.storage_type == "phase,amplitude"


if __name__ == "__main__":
# Run all tests
loc = locals()
for key in list(loc.keys()):
if key.startswith("test_") and hasattr(loc[key], "__call__"):
loc[key]()

0 comments on commit a680f5b

Please sign in to comment.