In [1]:
from pathlib import Path

from czitools import read_tools
from czitools import metadata_tools as czimd

from pylibCZIrw import czi as pyczi

import dask.array as da
import numpy as np

import pandas as pd
from pandarallel import pandarallel

from tqdm.auto import tqdm

In [2]:
filepath = Path('./data/20xObjective-Fused-noor-adult.czi')
#filepath = Path(r'E:\PROJECTS\LSdata\2022 CCI demo\syam demo brain\2 part of brain EGFP+647.czi')
#filepath = Path(r'E:\PROJECTS\LSdata\elena\No5\Image_11.czi')
# get the metadata at once as one big class
mdata = czimd.CziMetadata(filepath)

# get only specific metadata
czi_dimensions = czimd.CziDimensions(filepath)
print("SizeS: ", czi_dimensions.SizeS)
print("SizeT: ", czi_dimensions.SizeT)
print("SizeZ: ", czi_dimensions.SizeZ)
print("SizeC: ", czi_dimensions.SizeC)
print("SizeY: ", czi_dimensions.SizeY)
print("SizeX: ", czi_dimensions.SizeX)

SizeS:  1
SizeT:  1
SizeZ:  704
SizeC:  1
SizeY:  5760
SizeX:  5760


In [3]:
image_info = pd.DataFrame()
z_planes = np.arange(0,czi_dimensions.SizeZ)

#z_planes = np.arange(0,20)

image_info['z_index'] = z_planes

image_info['file_path'] = filepath

image_info['size_x'] = czi_dimensions.SizeX
image_info['size_y'] = czi_dimensions.SizeY
image_info['size_z'] = czi_dimensions.SizeZ
image_info['size_t'] = czi_dimensions.SizeT

image_info['size_channels'] = czi_dimensions.SizeC
image_info['size_scenes'] = czi_dimensions.SizeS

image_info.sample(5)

Unnamed: 0,z_index,file_path,size_x,size_y,size_z,size_t,size_channels,size_scenes
542,542,data\20xObjective-Fused-noor-adult.czi,5760,5760,704,1,1,1
79,79,data\20xObjective-Fused-noor-adult.czi,5760,5760,704,1,1,1
617,617,data\20xObjective-Fused-noor-adult.czi,5760,5760,704,1,1,1
696,696,data\20xObjective-Fused-noor-adult.czi,5760,5760,704,1,1,1
613,613,data\20xObjective-Fused-noor-adult.czi,5760,5760,704,1,1,1


In [4]:
with pyczi.open_czi(str(filepath)) as czidoc:
    plane = {'C': 0, 'Z': 0, 'T': 0}
    frame_0 = czidoc.read(plane=plane)

assert image_info['size_x'][0] == frame_0.shape[0], 'sanity check failed in x dim'
assert image_info['size_y'][0] == frame_0.shape[1], 'sanity check failed in y dim'
# if it fails look at notes in next cell

In [5]:
# assertions are not always true, code runs even in this case. I am not sure at this point of why these values can be different.
image_info['size_x'] = frame_0.shape[0]
image_info['size_y'] = frame_0.shape[1]
image_info.sample(5)

Unnamed: 0,z_index,file_path,size_x,size_y,size_z,size_t,size_channels,size_scenes
538,538,data\20xObjective-Fused-noor-adult.czi,5760,5760,704,1,1,1
300,300,data\20xObjective-Fused-noor-adult.czi,5760,5760,704,1,1,1
537,537,data\20xObjective-Fused-noor-adult.czi,5760,5760,704,1,1,1
178,178,data\20xObjective-Fused-noor-adult.czi,5760,5760,704,1,1,1
628,628,data\20xObjective-Fused-noor-adult.czi,5760,5760,704,1,1,1


In [6]:
def optimal_size(current_size, res_levels):
    # helper function that asses the best size given the desired resolution level
    
    div_factor = np.power(2,res_levels)
    rem = np.remainder(current_size, div_factor)

    print(f'current size: {current_size}, factor: {div_factor}, reminder: {rem}')

    if rem > 0:
        extra = div_factor-rem
    else:
        extra = 0

    print(f'we need to add: {extra}, so new size is: {current_size+extra}')

    return current_size+extra


In [7]:
res_levels_x = 4
res_levels_y = 4
res_levels_z = 2

x_size = czi_dimensions.SizeX
y_size = czi_dimensions.SizeY
z_size = czi_dimensions.SizeZ

total_x = int(optimal_size(image_info['size_x'][0], res_levels_x))
total_y = int(optimal_size(image_info['size_y'][0] , res_levels_y))
total_z = int(optimal_size(image_info['size_z'][0], res_levels_z))

image_info['zarr_size_x'] = total_x 
image_info['zarr_size_y'] = total_y
image_info['zarr_size_z'] = total_z

image_info.sample(5)

current size: 5760, factor: 16, reminder: 0
we need to add: 0, so new size is: 5760
current size: 5760, factor: 16, reminder: 0
we need to add: 0, so new size is: 5760
current size: 704, factor: 4, reminder: 0
we need to add: 0, so new size is: 704


Unnamed: 0,z_index,file_path,size_x,size_y,size_z,size_t,size_channels,size_scenes,zarr_size_x,zarr_size_y,zarr_size_z
129,129,data\20xObjective-Fused-noor-adult.czi,5760,5760,704,1,1,1,5760,5760,704
164,164,data\20xObjective-Fused-noor-adult.czi,5760,5760,704,1,1,1,5760,5760,704
30,30,data\20xObjective-Fused-noor-adult.czi,5760,5760,704,1,1,1,5760,5760,704
405,405,data\20xObjective-Fused-noor-adult.czi,5760,5760,704,1,1,1,5760,5760,704
409,409,data\20xObjective-Fused-noor-adult.czi,5760,5760,704,1,1,1,5760,5760,704


In [8]:
from functools import partial

def get_quantiles_df(row, qvalues, ch = 0):
    from numpy import quantile
    from pylibCZIrw.czi import open_czi

    zidx = row['z_index']

    with open_czi(str(row['file_path'])) as czidoc:
        plane = {'C': ch, 'Z': zidx, 'T': 0}
        frame = czidoc.read(plane=plane)

    if not qvalues:
        raise 'No quantile deff'
    else:
        assert len(qvalues) == 2, 'max 2 values'
        assert qvalues[0] < qvalues[1]

        val = quantile(frame, [qvalues[0], qvalues[1]])
        current_max = val[1].astype(frame.dtype)
        current_min = val[0].astype(frame.dtype)

   
    return current_min, current_max

def get_int_limits_df(row, ch = 0):
    from pylibCZIrw.czi import open_czi

    zidx = row['z_index']
    print(zidx)
    with open_czi(str(row['file_path'])) as czidoc:
        plane = {'C': ch, 'Z': zidx, 'T': 0}
        frame = czidoc.read(plane=plane)

        current_max = frame.max()
        current_min = frame.min()

    return current_min, current_max

def test_df(row):
    print(row)

In [9]:
# Important user input
ch = 0
pandarallel.initialize(use_memory_fs=False, progress_bar=True)

image_info[['min', 'max']] = image_info.parallel_apply(partial(get_int_limits_df, ch=ch), axis='columns',result_type='expand')

image_info[['q0.05', 'q0.99',]] = image_info.parallel_apply(partial(get_quantiles_df, qvalues = [0.05, 0.995], ch =ch),axis='columns',result_type='expand')
#positions_df['mean_int'] = positions_df.apply(lambda x: get_mean(section_path.joinpath(x['bmp_name'])), axis=1)
#positions_df['median_int'] = positions_df['abs_path'].parallel_apply(get_median)

image_info.sample(10)

INFO: Pandarallel will run on 20 workers.
INFO: Pandarallel will use standard multiprocessing data transfer (pipe) to transfer data between the main process and workers.

https://nalepae.github.io/pandarallel/troubleshooting/


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=36), Label(value='0 / 36'))), HBox…

VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=36), Label(value='0 / 36'))), HBox…

Unnamed: 0,z_index,file_path,size_x,size_y,size_z,size_t,size_channels,size_scenes,zarr_size_x,zarr_size_y,zarr_size_z,min,max,q0.05,q0.99
408,408,data\20xObjective-Fused-noor-adult.czi,5760,5760,704,1,1,1,5760,5760,704,30,1696,89,265
536,536,data\20xObjective-Fused-noor-adult.czi,5760,5760,704,1,1,1,5760,5760,704,43,9207,89,268
412,412,data\20xObjective-Fused-noor-adult.czi,5760,5760,704,1,1,1,5760,5760,704,44,2683,89,272
208,208,data\20xObjective-Fused-noor-adult.czi,5760,5760,704,1,1,1,5760,5760,704,49,520,91,273
236,236,data\20xObjective-Fused-noor-adult.czi,5760,5760,704,1,1,1,5760,5760,704,33,538,90,261
67,67,data\20xObjective-Fused-noor-adult.czi,5760,5760,704,1,1,1,5760,5760,704,57,533,91,268
297,297,data\20xObjective-Fused-noor-adult.czi,5760,5760,704,1,1,1,5760,5760,704,42,1845,90,273
621,621,data\20xObjective-Fused-noor-adult.czi,5760,5760,704,1,1,1,5760,5760,704,58,1422,88,202
689,689,data\20xObjective-Fused-noor-adult.czi,5760,5760,704,1,1,1,5760,5760,704,46,4053,88,216
137,137,data\20xObjective-Fused-noor-adult.czi,5760,5760,704,1,1,1,5760,5760,704,55,544,91,264


In [10]:
min_val = image_info['q0.05'].min()
max_val = image_info['q0.99'].max()

min_val = image_info['min'].min()
max_val = image_info['max'].quantile(.95).astype(np.uint)

print(max_val)
print(min_val)

7497
18


In [11]:
import zarr
def rm_tree(pth):
    pth = Path(pth)
    for child in pth.glob('*'):
        if child.is_file():
            child.unlink()
        else:
            rm_tree(child)
    pth.rmdir()

In [12]:
data_path = filepath.parent
czi_name = filepath.name
zarr_name = czi_name.split('.czi')
zarr_name = zarr_name[0] + '_ch' +ch.__str__()+'.zarr'

z0_path = data_path.joinpath(zarr_name)

print(z0_path)

data\20xObjective-Fused-noor-adult_ch0.zarr


In [13]:
z_dtype = np.uint8

if z0_path.exists():
  rm_tree(z0_path)
  
store = zarr.DirectoryStore(z0_path)
chunk_size = 512
z = zarr.creation.open_array(store=store, mode='a', shape=(total_z, total_y, total_x), chunks=(1, chunk_size,chunk_size), dtype=z_dtype)
z

<zarr.core.Array (704, 5760, 5760) uint8>

# dynamically fill in values

In [14]:
# dynamically fill in values
from functools import partial
from multiprocess import Pool

def correct_write(zidx, zarr_store, filepath, min_val, max_val, bitdepth, ch=0):
    from numpy import multiply, uint8, uint16
    from pylibCZIrw.czi import open_czi

    uint8max = 255
    uint16max = 65535

    if bitdepth is uint8:
        outMax = uint8max
    elif bitdepth is uint16:
        outMax = uint16max
    else:
        raise('what?')


    with open_czi(str(filepath)) as czidoc:
        plane = {'C': ch, 'Z': zidx, 'T': 0}
        frame = czidoc.read(plane=plane)

    out = frame-min_val
    out = multiply(out, outMax/(max_val-min_val))
    out[out<0] = 0
    out[out>outMax] = outMax

    assert out.min() >= 0, 'problems in min value'
    assert out.max() <= outMax, 'problems in min value'
    out = out.astype(bitdepth)
    
    zarr_store[zidx,:out.shape[0],:out.shape[1]] = out[:,:,0]


with Pool(20) as pool:
    seq = [zidx for zidx in range(z_size)]
    pool.map(partial(correct_write, zarr_store=z, filepath=filepath, min_val=min_val, max_val=max_val, bitdepth = np.uint8, ch=ch), seq)


In [15]:
import dask.array as da
# like numpy.mean, but maintains dtype, helper function
def mean_dtype(arr, **kwargs):
    return np.mean(arr, **kwargs).astype(arr.dtype)

In [16]:
# it is still not quite clear to me why, but we need to rechunk de data at this stage
# if not zarr writting later on will fail
d0 = da.from_zarr(store).rechunk((64,512,512))
d0

Unnamed: 0,Array,Chunk
Bytes,21.75 GiB,16.00 MiB
Shape,"(704, 5760, 5760)","(64, 512, 512)"
Dask graph,1584 chunks in 3 graph layers,1584 chunks in 3 graph layers
Data type,uint8 numpy.ndarray,uint8 numpy.ndarray
"Array Chunk Bytes 21.75 GiB 16.00 MiB Shape (704, 5760, 5760) (64, 512, 512) Dask graph 1584 chunks in 3 graph layers Data type uint8 numpy.ndarray",5760  5760  704,

Unnamed: 0,Array,Chunk
Bytes,21.75 GiB,16.00 MiB
Shape,"(704, 5760, 5760)","(64, 512, 512)"
Dask graph,1584 chunks in 3 graph layers,1584 chunks in 3 graph layers
Data type,uint8 numpy.ndarray,uint8 numpy.ndarray


In [18]:
from dask.diagnostics import ProgressBar
ProgressBar().register()

min_dask_val = d0.min()
max_dask_val = d0.max()
print(min_dask_val.compute())
print(max_dask_val.compute())

[########################################] | 100% Completed | 105.65 s
0
[########################################] | 100% Completed | 104.68 s
255


In [19]:
# get an object containing only the dimension information
czi_scale = czimd.CziScaling(filepath)
x_pix_size = czi_scale.X
y_pix_size = czi_scale.Y
z_pix_size = czi_scale.Z
print(f'pixel size in microns (x, y, z): {x_pix_size:.3f}, {y_pix_size:.3f}, {z_pix_size:.3f}')

pixel size in microns (x, y, z): 0.320, 0.320, 1.410


In [20]:
# only rescale in x-y
# scale in z y x
d1_s = (1,2,2)
d1 = da.coarsen(mean_dtype, d0, {0:d1_s[0],1:d1_s[1],2:d1_s[2]}).rechunk((64,512,512))
# approaching isotorpic, only scale in x-y
d2_s = (1,4,4)
d2 = da.coarsen(mean_dtype, d0, {0:d2_s[0],1:d2_s[1],2:d2_s[2]}).rechunk((64,256,256))
# close to isotropic now
d3_s = (1,8,8)
d3 = da.coarsen(mean_dtype, d0, {0:d3_s[0],1:d3_s[1],2:d3_s[2]}).rechunk((64,64,64))
# one further scale for fast 3D rendering
d4_s = (2,16,16)
d4 = da.coarsen(mean_dtype, d0, {0:d4_s[0],1:d4_s[1],2:d4_s[2]}).rechunk((64,64,64))
d4

Unnamed: 0,Array,Chunk
Bytes,43.51 MiB,256.00 kiB
Shape,"(352, 360, 360)","(64, 64, 64)"
Dask graph,216 chunks in 5 graph layers,216 chunks in 5 graph layers
Data type,uint8 numpy.ndarray,uint8 numpy.ndarray
"Array Chunk Bytes 43.51 MiB 256.00 kiB Shape (352, 360, 360) (64, 64, 64) Dask graph 216 chunks in 5 graph layers Data type uint8 numpy.ndarray",360  360  352,

Unnamed: 0,Array,Chunk
Bytes,43.51 MiB,256.00 kiB
Shape,"(352, 360, 360)","(64, 64, 64)"
Dask graph,216 chunks in 5 graph layers,216 chunks in 5 graph layers
Data type,uint8 numpy.ndarray,uint8 numpy.ndarray


In [21]:
from ome_zarr.io import parse_url
from ome_zarr.writer import write_multiscale
from ome_zarr.writer import write_multiscales_metadata

In [22]:
# I can probably build this programmatically, for the moment I take a shortcut. 
# This assumes an image with full resolution and one downscale by 2x2
initial_pix_unit = 'micrometer'
coordtfs = [
        [{'type': 'scale', 'scale': [z_pix_size, y_pix_size, x_pix_size]},
         {'type': 'translation', 'translation': [0, 0, 0]}],

        [{'type': 'scale', 'scale': [z_pix_size*d1_s[0], y_pix_size*d1_s[1], x_pix_size*d1_s[2]]},
         {'type': 'translation', 'translation': [0, 0, 0]}],

        [{'type': 'scale', 'scale': [z_pix_size*d2_s[0], y_pix_size*d2_s[1], x_pix_size*d2_s[2]]},
         {'type': 'translation', 'translation': [0, 0, 0]}],

        [{'type': 'scale', 'scale': [z_pix_size*d3_s[0], y_pix_size*d3_s[1], x_pix_size*d3_s[2]]},
         {'type': 'translation', 'translation': [0, 0, 0]}],

        [{'type': 'scale', 'scale': [z_pix_size*d4_s[0], y_pix_size*d4_s[1], x_pix_size*d4_s[2]]},
         {'type': 'translation', 'translation': [0, 0, 0]}],

        ]
axes = [{'name': 'z', 'type': 'space', 'unit': initial_pix_unit},
        {'name': 'y', 'type': 'space', 'unit': initial_pix_unit},
        {'name': 'x', 'type': 'space', 'unit': initial_pix_unit}]

In [23]:
# Open the zarr group manually

omezarr_name = czi_name.split('.czi')
omezarr_name = omezarr_name[0] + '_ch' +ch.__str__()+'.ome.zarr'

path = data_path.joinpath(omezarr_name)

if path.exists():
  rm_tree(path)

store = parse_url(path, mode='w').store
root = zarr.group(store=store)

# Use OME write multiscale;
write_multiscale([d0, d1, d2, d3, d4],
        group=root, axes=axes, coordinate_transformations=coordtfs
        )
# add omero metadata: the napari ome-zarr plugin uses this to pass rendering
# options to napari.
root.attrs['omero'] = {
        'channels': [{
                'color': 'ffffff',
                'label': 'ch'+ch.__str__(),
                'active': True,
                }]
        }

[########################################] | 100% Completed | 118.73 s
[########################################] | 100% Completed | 119.26 s
[########################################] | 100% Completed | 108.06 s
[########################################] | 100% Completed | 105.87 s
[########################################] | 100% Completed | 103.76 s


In [24]:
if z0_path.exists():
  rm_tree(z0_path)