In [1]:
import os
import contextlib
import glob

import numpy as np
import hyperspy.api as hs
import pyxem
import mrcfile
import stemtool

import libertem.api as lt
from libertem.executor.inline import InlineJobExecutor
from libertem.udf.base import UDF
from libertem.common.buffers import reshaped_view



In [2]:
sourcefolder = '/cachedata/users/clausen/libertem-test-data/'
targetfolder = '/cachedata/users/weber/libertem-test-data-raw/'
assert os.path.exists(sourcefolder)
assert os.path.exists(targetfolder)

In [3]:
@contextlib.contextmanager
def targetmap(targetfolder, relpath, shape, dtype):
    targetname = os.path.join(targetfolder, f'{relpath}_{shape}_{dtype}.raw')
    os.makedirs(os.path.dirname(targetname), exist_ok=True)
    target = np.memmap(targetname, mode='w+', shape=shape, dtype=dtype)
    yield target
    del target

In [4]:
for path in [
    'default.blo', 'default.ser']:
    print(path)
    data = hs.load(os.path.join(sourcefolder, path))
    with targetmap(targetfolder, path, data.data.shape, data.data.dtype) as target:
        target[:] = data.data

default.blo
default.ser


In [5]:
relpath = 'default.mib'
assert os.path.exists(os.path.join(sourcefolder, relpath))
data = pyxem.utils.io_utils.load_mib(os.path.join(sourcefolder, relpath))
shape = (32, 32, 256, 256)
with targetmap(targetfolder, relpath, shape, data.data.dtype) as target:
    target[:] = data.data.reshape(shape).compute()

This mib file appears to be TEM data. The stack is returned with no reshaping.


In [6]:
shape = (10, 3838, 3710)
with targetmap(targetfolder, 'dm/2018-7-17 15_29.dm4', shape, 'float32') as target:
    for i, f in enumerate(sorted(glob.glob(os.path.join(sourcefolder, 'dm/*.dm4')))):
        data = hs.load(f)
        print(i, f, data.data.shape)
        target[i] = data.data

0 /cachedata/users/clausen/libertem-test-data/dm/2018-7-17 15_29_0000.dm4 (3838, 3710)
1 /cachedata/users/clausen/libertem-test-data/dm/2018-7-17 15_29_0001.dm4 (3838, 3710)
2 /cachedata/users/clausen/libertem-test-data/dm/2018-7-17 15_29_0002.dm4 (3838, 3710)
3 /cachedata/users/clausen/libertem-test-data/dm/2018-7-17 15_29_0003.dm4 (3838, 3710)
4 /cachedata/users/clausen/libertem-test-data/dm/2018-7-17 15_29_0004.dm4 (3838, 3710)
5 /cachedata/users/clausen/libertem-test-data/dm/2018-7-17 15_29_0005.dm4 (3838, 3710)
6 /cachedata/users/clausen/libertem-test-data/dm/2018-7-17 15_29_0006.dm4 (3838, 3710)
7 /cachedata/users/clausen/libertem-test-data/dm/2018-7-17 15_29_0007.dm4 (3838, 3710)
8 /cachedata/users/clausen/libertem-test-data/dm/2018-7-17 15_29_0008.dm4 (3838, 3710)
9 /cachedata/users/clausen/libertem-test-data/dm/2018-7-17 15_29_0009.dm4 (3838, 3710)


In [7]:
shape = (40, 3838, 3710)
with targetmap(targetfolder, 'dm/3D/alpha-50.dm3', shape, 'float32') as target:
    for i, f in enumerate(sorted(glob.glob(os.path.join(sourcefolder, 'dm/3D/*.dm3')))):
        data = hs.load(f)
        print(i, f, data.data.shape)
        target[i*20:(i+1)*20] = data.data

0 /cachedata/users/clausen/libertem-test-data/dm/3D/alpha-50_obj.dm3 (20, 3838, 3710)
1 /cachedata/users/clausen/libertem-test-data/dm/3D/alpha-50_ref.dm3 (20, 3838, 3710)


In [8]:
class ExportUDF(UDF):
    def __init__(self, targetmap):
        super().__init__(targetmap=targetmap)
        
    def get_preferred_input_dtype(self):
        return UDF.USE_NATIVE_DTYPE
    
    def get_result_buffers(self):
        return {}        
    
    def process_tile(self, tile):
        p = self.params
        shape = self.meta.dataset_shape
        v = reshaped_view(p.targetmap, (np.prod(shape.nav), ) + tuple(shape.sig))
        v[self.meta.slice.get()] = tile

In [9]:
# K2IS uses LiberTEM to generate the reference for now since there are only two alternative readers available: Written
# in Matlab by Colin Ophus, and integrated in GMS.

relpath = 'Capture52/Capture52_.gtg'
ctx = lt.Context(executor=InlineJobExecutor())
ds = ctx.load('auto', path=os.path.join(sourcefolder, relpath))
with targetmap(
        targetfolder=targetfolder,
        relpath=relpath,
        shape=tuple(ds.shape),
        dtype=ds.dtype) as target:
    ctx.run_udf(udf=ExportUDF(targetmap=target), dataset=ds, progress=True)

100%|██████████| 34/34 [00:19<00:00,  1.75it/s]


In [10]:
relpath = 'frms6/C16_15_24_151203_019.hdr'
ctx = lt.Context(executor=InlineJobExecutor())
ds = ctx.load('frms6', path=os.path.join(sourcefolder, relpath), enable_offset_correction=False)
assert tuple(ds.shape) == (256, 256, 264, 264)
with targetmap(
        targetfolder=targetfolder,
        relpath=relpath,
        shape=tuple(ds.shape),
        dtype='uint16') as target:
    # stemtool opens data as 3D stack, therefore flatten nav
    v = reshaped_view(target, (np.prod(ds.shape.nav), ) + tuple(ds.shape.sig))
    offset = 0
    # We skip the first file, whch contains a zero reference
    for i in range(1, 4):
        fname = os.path.join(sourcefolder, f'frms6/C16_15_24_151203_019_00{i}.frms6')
        raw_shape = stemtool.util.pnccd.Frms6Reader.getDataShape(fname)
        frame_count = raw_shape[-1]
        data = stemtool.util.pnccd.Frms6Reader.readData(
            fname,
            image_range=(0,frame_count),
            pixels_x = raw_shape[0],
            pixels_y = raw_shape[1]
        )
        v[offset:offset+frame_count] = np.moveaxis(  # undo the transpose that pycroscopy does
            np.repeat(  # unbinning 4x in x direction
                # invert lower half and attach right of upper half
                # The detector consists of two chips that are arranged head-to-head
                # The outputs of the two chips are just concatenated in the file, while LiberTEM
                # re-assembles the data taking the spatial relation into account
                np.concatenate((data[:264], np.flip(data[264:], axis=(0, 1,))), axis=1),
                4, axis=1  # repeat options
            ),
            (0, 1, 2), (2, 1, 0)  # moveaxis options
        )
        offset += frame_count
    
    # Consistency check since the reader above is not trivial
    for count in range(10):
        i = np.random.randint(0, np.prod(ds.shape.nav))
        y = i // 256
        x = i % 256
        a = ctx.create_pick_analysis(dataset=ds, y=y, x=x)
        res = ctx.run(a)
        assert np.allclose(res['intensity'].raw_data, target[y, x])

In [11]:
for size in (4, 256):
    relpath = f'EMPAD/scan_11_x{size}_y{size}.raw'
    shape = (size, size, 128, 128)
    dtype = 'float32'
    with targetmap(
            targetfolder=targetfolder,
            relpath=relpath,
            shape=shape,
            dtype=dtype) as target:
        source = np.memmap(os.path.join(sourcefolder, relpath), shape=(size, size, 130, 128), dtype=dtype, mode='r')
        target[:] = source[:, :, :128, :]

In [12]:
relpath = 'mrc/20200821_92978_movie.mrc'
mrc = mrcfile.open(os.path.join(sourcefolder, relpath))
with targetmap(
        targetfolder=targetfolder,
        relpath=relpath,
        shape=mrc.data.shape,
        dtype=mrc.data.dtype) as target:
    target[:] = mrc.data