In [1]:
import numpy as np
import openslide as os
import itk
import zarr
from numcodecs import blosc
from numcodecs import Blosc
from napari_lazy_openslide.lazy_openslide import OpenSlideStore
from matplotlib import pyplot as plt
from histomics_stream.codecs import kwjpeg, jpeg2k
from zarr_jpeg import jpeg
%matplotlib inline
print(blosc.list_compressors())

['blosclz', 'lz4', 'lz4hc', 'snappy', 'zlib', 'zstd']


In [2]:
filename = './TCGA-BH-A0BZ-01Z-00-DX1.45EB3E93-A871-49C6-9EAE-90D98AE01913.svs'

In [3]:
slide = os.OpenSlide(filename)

In [4]:
slide.dimensions

(112334, 85047)

In [5]:
slide.level_count

4

In [6]:
slide.level_dimensions

((112334, 85047), (28083, 21261), (7020, 5315), (3510, 2657))

In [7]:
print(slide.properties[os.PROPERTY_NAME_OBJECTIVE_POWER])
print(slide.level_downsamples)
print(np.float32(slide.properties[os.PROPERTY_NAME_OBJECTIVE_POWER]) / np.array(slide.level_downsamples))

40
(1.0, 4.000106160445668, 16.001655664637788, 32.006322491681914)
[40.          9.99973461  2.49974133  1.24975308]


In [8]:
slide.properties

<_PropertyMap {'aperio.AppMag': '40', 'aperio.DSR ID': 'resc3-dsr2', 'aperio.Date': '06/08/11', 'aperio.DisplayColor': '0', 'aperio.Exposure Scale': '0.000001', 'aperio.Exposure Time': '109', 'aperio.Filename': 'TCGA-BH-A0BZ-01Z-00-DX1', 'aperio.Focus Offset': '0.000000', 'aperio.ICC Profile': 'ScanScope v1', 'aperio.ImageID': '67763', 'aperio.Left': '32.302200', 'aperio.LineAreaXOffset': '0.000000', 'aperio.LineAreaYOffset': '0.000000', 'aperio.LineCameraSkew': '0.001550', 'aperio.MPP': '0.2505', 'aperio.OriginalHeight': '85147', 'aperio.OriginalWidth': '117000', 'aperio.Parmset': 'GOG136 on RESBPCLACIE01', 'aperio.ScanScope ID': 'SS1248CNTLR', 'aperio.StripeWidth': '1000', 'aperio.Time': '18:52:43', 'aperio.Time Zone': 'GMT-04:00', 'aperio.Title': 'TCGA-BH-A0BZ-01Z-00-DX1', 'aperio.Top': '22.371477', 'aperio.User': 'e7a90227-eba8-4fea-a625-16cb7e474785', 'openslide.comment': 'Aperio Image Library v10.2.41\r\n117000x85147 [0,0 112334x85047] (256x256) JPEG/RGB Q=30|AppMag = 40|StripeWi

In [9]:
store256 = OpenSlideStore(filename, tilesize=256)

In [10]:
source_group256 = zarr.open(store256, mode='r')
print([scale for scale in source_group256.keys()])
print(type(source_group256['0'][0,0,0]))
print(source_group256['0'].shape)
print(source_group256['0'].order)

['0', '1', '2', '3']
<class 'numpy.uint8'>
(85047, 112334, 4)
C


In [11]:
print(source_group256.tree())

/
 ├── 0 (85047, 112334, 4) uint8
 ├── 1 (21261, 28083, 4) uint8
 ├── 2 (5315, 7020, 4) uint8
 └── 3 (2657, 3510, 4) uint8


In [12]:
dest256 = zarr.DirectoryStore('default256.zarr')
dest_group256 = zarr.group(store=dest256, overwrite=True)
dest_group256.attrs[os.PROPERTY_NAME_OBJECTIVE_POWER] = slide.properties[os.PROPERTY_NAME_OBJECTIVE_POWER]
dest_group256.attrs['level_downsamples'] = slide.level_downsamples
print(dest_group256.attrs[os.PROPERTY_NAME_OBJECTIVE_POWER])
for scale in source_group256.keys():
    arrayIn = source_group256[scale][..., :3]
    arrayOut = dest_group256.create_dataset(scale, shape=arrayIn.shape, chunks=(256,256), dtype='|u1')
    arrayOut[:] = arrayIn
print(type(dest_group256['0'][0,0,0]))

40
<class 'numpy.uint8'>


In [13]:
store1024 = OpenSlideStore(filename, tilesize=1024)
source_group1024 = zarr.open(store1024, mode='r')
dest1024 = zarr.DirectoryStore('default1024.zarr')
dest_group1024 = zarr.group(store=dest1024, overwrite=True)
dest_group1024.attrs[os.PROPERTY_NAME_OBJECTIVE_POWER] = slide.properties[os.PROPERTY_NAME_OBJECTIVE_POWER]
dest_group1024.attrs['level_downsamples'] = slide.level_downsamples
for scale in source_group1024.keys():
    arrayIn = source_group1024[scale][..., :3]
    arrayOut = dest_group1024.create_dataset(scale, shape=arrayIn.shape, chunks=(1024,1024), dtype='|u1')
    arrayOut[:] = arrayIn

In [14]:
store2048 = OpenSlideStore(filename, tilesize=2048)
source_group2048 = zarr.open(store2048, mode='r')
dest2048 = zarr.DirectoryStore('default2048.zarr')
dest_group2048 = zarr.group(store=dest2048, overwrite=True)
dest_group2048.attrs[os.PROPERTY_NAME_OBJECTIVE_POWER] = slide.properties[os.PROPERTY_NAME_OBJECTIVE_POWER]
dest_group2048.attrs['level_downsamples'] = slide.level_downsamples
for scale in source_group2048.keys():
    arrayIn = source_group2048[scale][..., :3]
    arrayOut = dest_group2048.create_dataset(scale, shape=arrayIn.shape, chunks=(2048,2048), dtype='|u1')
    arrayOut[:] = arrayIn

In [15]:
store4096 = OpenSlideStore(filename, tilesize=4096)
source_group4096 = zarr.open(store4096, mode='r')
dest4096 = zarr.DirectoryStore('default4096.zarr')
dest_group4096 = zarr.group(store=dest4096, overwrite=True)
dest_group4096.attrs[os.PROPERTY_NAME_OBJECTIVE_POWER] = slide.properties[os.PROPERTY_NAME_OBJECTIVE_POWER]
dest_group4096.attrs['level_downsamples'] = slide.level_downsamples
for scale in source_group4096.keys():
    arrayIn = source_group4096[scale][..., :3]
    arrayOut = dest_group4096.create_dataset(scale, shape=arrayIn.shape, chunks=(4096,4096), dtype='|u1')
    arrayOut[:] = arrayIn

In [16]:
# does not work??
# zarr.convenience.copy_store(store, dest, if_exists='replace')

In [17]:
!du --apparent-size -sBM {filename}

1120M	./TCGA-BH-A0BZ-01Z-00-DX1.45EB3E93-A871-49C6-9EAE-90D98AE01913.svs


In [18]:
!du --apparent-size -sBM ./default*.zarr | sort -h

9571M	./default1024.zarr
9571M	./default256.zarr
9591M	./default2048.zarr
9639M	./default4096.zarr


In [19]:
store1024 = OpenSlideStore(filename, tilesize=1024)
source_group1024 = zarr.open(store1024, mode='r')
print([source_group1024[scale].shape for scale in source_group1024.keys()])
dest1024 = zarr.DirectoryStore('zstd_compressor1024.zarr')
dest_group1024 = zarr.group(store=dest1024, overwrite=True)
dest_group1024.attrs[os.PROPERTY_NAME_OBJECTIVE_POWER] = slide.properties[os.PROPERTY_NAME_OBJECTIVE_POWER]
dest_group1024.attrs['level_downsamples'] = slide.level_downsamples
compressor = Blosc(cname='zstd', clevel=5, shuffle=Blosc.SHUFFLE)
for scale in source_group1024.keys():
    arrayIn = source_group1024[scale][..., :3]
    arrayOut = dest_group1024.create_dataset(scale, shape=arrayIn.shape, chunks=(1024,1024), compressor=compressor, dtype='|u1')
    arrayOut[:] = arrayIn

[(85047, 112334, 4), (21261, 28083, 4), (5315, 7020, 4), (2657, 3510, 4)]


In [20]:
store2048 = OpenSlideStore(filename, tilesize=2048)
source_group2048 = zarr.open(store2048, mode='r')
print([source_group2048[scale].shape for scale in source_group2048.keys()])
dest2048 = zarr.DirectoryStore('zstd_compressor2048.zarr')
dest_group2048 = zarr.group(store=dest2048, overwrite=True)
dest_group2048.attrs[os.PROPERTY_NAME_OBJECTIVE_POWER] = slide.properties[os.PROPERTY_NAME_OBJECTIVE_POWER]
dest_group2048.attrs['level_downsamples'] = slide.level_downsamples
compressor = Blosc(cname='zstd', clevel=5, shuffle=Blosc.SHUFFLE)
for scale in source_group2048.keys():
    arrayIn = source_group2048[scale][..., :3]
    arrayOut = dest_group2048.create_dataset(scale, shape=arrayIn.shape, chunks=(2048,2048), compressor=compressor, dtype='|u1')
    arrayOut[:] = arrayIn

[(85047, 112334, 4), (21261, 28083, 4), (5315, 7020, 4), (2657, 3510, 4)]


In [21]:
store1024 = OpenSlideStore(filename, tilesize=1024)
source_group1024 = zarr.open(store1024, mode='r')
print([source_group1024[scale].shape for scale in source_group1024.keys()])
dest1024 = zarr.DirectoryStore('jpeg75_compressor1024.zarr')
dest_group1024 = zarr.group(store=dest1024, overwrite=True)
dest_group1024.attrs[os.PROPERTY_NAME_OBJECTIVE_POWER] = slide.properties[os.PROPERTY_NAME_OBJECTIVE_POWER]
dest_group1024.attrs['level_downsamples'] = slide.level_downsamples
compressor = jpeg(quality=75)
for scale in source_group1024.keys():
    arrayIn = np.transpose(source_group1024[scale][..., :3])
    arrayOut = dest_group1024.create_dataset(scale, shape=arrayIn.shape, chunks=(3, 1024,1024), compressor=compressor, dtype='|u1')
    arrayOut[:] = arrayIn

[(85047, 112334, 4), (21261, 28083, 4), (5315, 7020, 4), (2657, 3510, 4)]


In [22]:
store2048 = OpenSlideStore(filename, tilesize=2048)
source_group2048 = zarr.open(store2048, mode='r')
print([source_group2048[scale].shape for scale in source_group2048.keys()])
dest2048 = zarr.DirectoryStore('jpeg75_compressor2048.zarr')
dest_group2048 = zarr.group(store=dest2048, overwrite=True)
dest_group2048.attrs[os.PROPERTY_NAME_OBJECTIVE_POWER] = slide.properties[os.PROPERTY_NAME_OBJECTIVE_POWER]
dest_group2048.attrs['level_downsamples'] = slide.level_downsamples
compressor = jpeg(quality=75)
for scale in source_group2048.keys():
    arrayIn = np.transpose(source_group2048[scale][..., :3])
    arrayOut = dest_group2048.create_dataset(scale, shape=arrayIn.shape, chunks=(3, 2048,2048), compressor=compressor, dtype='|u1')
    arrayOut[:] = arrayIn

[(85047, 112334, 4), (21261, 28083, 4), (5315, 7020, 4), (2657, 3510, 4)]


In [23]:
store1024 = OpenSlideStore(filename, tilesize=1024)
source_group1024 = zarr.open(store1024, mode='r')
print([source_group1024[scale].shape for scale in source_group1024.keys()])
dest1024 = zarr.DirectoryStore('jpeg50_compressor1024.zarr')
dest_group1024 = zarr.group(store=dest1024, overwrite=True)
dest_group1024.attrs[os.PROPERTY_NAME_OBJECTIVE_POWER] = slide.properties[os.PROPERTY_NAME_OBJECTIVE_POWER]
dest_group1024.attrs['level_downsamples'] = slide.level_downsamples
compressor = jpeg(quality=50)
for scale in source_group1024.keys():
    arrayIn = np.transpose(source_group1024[scale][..., :3])
    arrayOut = dest_group1024.create_dataset(scale, shape=arrayIn.shape, chunks=(3, 1024,1024), compressor=compressor, dtype='|u1')
    arrayOut[:] = arrayIn

[(85047, 112334, 4), (21261, 28083, 4), (5315, 7020, 4), (2657, 3510, 4)]


In [24]:
store2048 = OpenSlideStore(filename, tilesize=2048)
source_group2048 = zarr.open(store2048, mode='r')
print([source_group2048[scale].shape for scale in source_group2048.keys()])
dest2048 = zarr.DirectoryStore('jpeg50_compressor2048.zarr')
dest_group2048 = zarr.group(store=dest2048, overwrite=True)
dest_group2048.attrs[os.PROPERTY_NAME_OBJECTIVE_POWER] = slide.properties[os.PROPERTY_NAME_OBJECTIVE_POWER]
dest_group2048.attrs['level_downsamples'] = slide.level_downsamples
compressor = jpeg(quality=50)
for scale in source_group2048.keys():
    arrayIn = np.transpose(source_group2048[scale][..., 0:3])
    arrayOut = dest_group2048.create_dataset(scale, shape=arrayIn.shape, chunks=(3, 2048,2048), compressor=compressor, dtype='|u1')
    arrayOut[:] = arrayIn

[(85047, 112334, 4), (21261, 28083, 4), (5315, 7020, 4), (2657, 3510, 4)]


In [25]:
store2048 = OpenSlideStore(filename, tilesize=2048)
source_group2048 = zarr.open(store2048, mode='r')
print([source_group2048[scale].shape for scale in source_group2048.keys()])
dest2048 = zarr.DirectoryStore('kwjpeg30_compressor2048.zarr')
dest_group2048 = zarr.group(store=dest2048, overwrite=True)
dest_group2048.attrs[os.PROPERTY_NAME_OBJECTIVE_POWER] = slide.properties[os.PROPERTY_NAME_OBJECTIVE_POWER]
dest_group2048.attrs['level_downsamples'] = slide.level_downsamples
compressor = kwjpeg(quality=30)
for scale in source_group2048.keys():
    arrayIn = source_group2048[scale][..., :3]
    arrayOut = dest_group2048.create_dataset(scale, shape=arrayIn.shape, chunks=(2048,2048), compressor=compressor, dtype='|u1')
    arrayOut[:] = arrayIn

[(85047, 112334, 4), (21261, 28083, 4), (5315, 7020, 4), (2657, 3510, 4)]


In [26]:
store256 = OpenSlideStore(filename, tilesize=256)
source_group256 = zarr.open(store256, mode='r')
print([source_group256[scale].shape for scale in source_group256.keys()])
dest256 = zarr.DirectoryStore('kwjpeg30_compressor256.zarr')
dest_group256 = zarr.group(store=dest256, overwrite=True)
dest_group256.attrs[os.PROPERTY_NAME_OBJECTIVE_POWER] = slide.properties[os.PROPERTY_NAME_OBJECTIVE_POWER]
dest_group256.attrs['level_downsamples'] = slide.level_downsamples
compressor = kwjpeg(quality=30)
for scale in source_group256.keys():
    arrayIn = source_group256[scale][..., :3]
    arrayOut = dest_group256.create_dataset(scale, shape=arrayIn.shape, chunks=(256,256), compressor=compressor, dtype='|u1')
    arrayOut[:] = arrayIn

[(85047, 112334, 4), (21261, 28083, 4), (5315, 7020, 4), (2657, 3510, 4)]


In [27]:
store256 = OpenSlideStore(filename, tilesize=256)
source_group256 = zarr.open(store256, mode='r')
print([source_group256[scale].shape for scale in source_group256.keys()])
dest256 = zarr.DirectoryStore('kwjpegVarious_compressor256.zarr')
dest_group256 = zarr.group(store=dest256, overwrite=True)
dest_group256.attrs[os.PROPERTY_NAME_OBJECTIVE_POWER] = slide.properties[os.PROPERTY_NAME_OBJECTIVE_POWER]
dest_group256.attrs['level_downsamples'] = slide.level_downsamples

arrayIn = source_group256[0][..., :3]
arrayOut = dest_group256.create_dataset(0, shape=arrayIn.shape, chunks=(256,256), compressor=kwjpeg(quality=30), dtype='|u1')
arrayOut[:] = arrayIn

arrayIn = source_group256[1][..., :3]
arrayOut = dest_group256.create_dataset(1, shape=arrayIn.shape, chunks=(256,256), compressor=kwjpeg(quality=65), dtype='|u1')
arrayOut[:] = arrayIn

arrayIn = source_group256[2][..., :3]
arrayOut = dest_group256.create_dataset(2, shape=arrayIn.shape, chunks=(256,256), compressor=kwjpeg(quality=82), dtype='|u1')
arrayOut[:] = arrayIn

arrayIn = source_group256[3][..., :3]
arrayOut = dest_group256.create_dataset(3, shape=arrayIn.shape, chunks=(256,256), compressor=kwjpeg(quality=91), dtype='|u1')
arrayOut[:] = arrayIn

[(85047, 112334, 4), (21261, 28083, 4), (5315, 7020, 4), (2657, 3510, 4)]


In [28]:
store2048 = OpenSlideStore(filename, tilesize=2048)
source_group2048 = zarr.open(store2048, mode='r')
print([source_group2048[scale].shape for scale in source_group2048.keys()])
dest2048 = zarr.DirectoryStore('kwjpeg75_compressor2048.zarr')
dest_group2048 = zarr.group(store=dest2048, overwrite=True)
dest_group2048.attrs[os.PROPERTY_NAME_OBJECTIVE_POWER] = slide.properties[os.PROPERTY_NAME_OBJECTIVE_POWER]
dest_group2048.attrs['level_downsamples'] = slide.level_downsamples
compressor = kwjpeg(quality=75)
for scale in source_group2048.keys():
    arrayIn = source_group2048[scale][..., :3]
    arrayOut = dest_group2048.create_dataset(scale, shape=arrayIn.shape, chunks=(2048,2048), compressor=compressor, dtype='|u1')
    arrayOut[:] = arrayIn

[(85047, 112334, 4), (21261, 28083, 4), (5315, 7020, 4), (2657, 3510, 4)]


In [29]:
store2048 = OpenSlideStore(filename, tilesize=2048)
source_group2048 = zarr.open(store2048, mode='r')
print([source_group2048[scale].shape for scale in source_group2048.keys()])
dest2048 = zarr.DirectoryStore('kwjpeg100_compressor2048.zarr')
dest_group2048 = zarr.group(store=dest2048, overwrite=True)
dest_group2048.attrs[os.PROPERTY_NAME_OBJECTIVE_POWER] = slide.properties[os.PROPERTY_NAME_OBJECTIVE_POWER]
dest_group2048.attrs['level_downsamples'] = slide.level_downsamples
compressor = kwjpeg(quality=100)
for scale in source_group2048.keys():
    arrayIn = source_group2048[scale][..., :3]
    arrayOut = dest_group2048.create_dataset(scale, shape=arrayIn.shape, chunks=(2048,2048), compressor=compressor, dtype='|u1')
    arrayOut[:] = arrayIn

[(85047, 112334, 4), (21261, 28083, 4), (5315, 7020, 4), (2657, 3510, 4)]


In [30]:
store256 = OpenSlideStore(filename, tilesize=256)
source_group256 = zarr.open(store256, mode='r')
print([source_group256[scale].shape for scale in source_group256.keys()])
dest256 = zarr.DirectoryStore('kwjpeg100_compressor256.zarr')
dest_group256 = zarr.group(store=dest256, overwrite=True)
dest_group256.attrs[os.PROPERTY_NAME_OBJECTIVE_POWER] = slide.properties[os.PROPERTY_NAME_OBJECTIVE_POWER]
dest_group256.attrs['level_downsamples'] = slide.level_downsamples
compressor = kwjpeg(quality=100)
for scale in source_group256.keys():
    arrayIn = source_group256[scale][..., :3]
    arrayOut = dest_group256.create_dataset(scale, shape=arrayIn.shape, chunks=(256,256), compressor=compressor, dtype='|u1')
    arrayOut[:] = arrayIn

[(85047, 112334, 4), (21261, 28083, 4), (5315, 7020, 4), (2657, 3510, 4)]


In [37]:
store2048 = OpenSlideStore(filename, tilesize=2048)
source_group2048 = zarr.open(store2048, mode='r')
print([source_group2048[scale].shape for scale in source_group2048.keys()])
dest2048 = zarr.DirectoryStore('jpeg2klevel80_compressor2048.zarr')
dest_group2048 = zarr.group(store=dest2048, overwrite=True)
dest_group2048.attrs[os.PROPERTY_NAME_OBJECTIVE_POWER] = slide.properties[os.PROPERTY_NAME_OBJECTIVE_POWER]
dest_group2048.attrs['level_downsamples'] = slide.level_downsamples
compressor = jpeg2k(quality=80)
for scale in source_group2048.keys():
    arrayIn = source_group2048[scale][..., :3]
    arrayOut = dest_group2048.create_dataset(scale, shape=arrayIn.shape, chunks=(2048,2048), compressor=compressor, dtype='|u1')
    arrayOut[:] = arrayIn

[(85047, 112334, 4), (21261, 28083, 4), (5315, 7020, 4), (2657, 3510, 4)]


In [None]:
!du --apparent-size -sBM ./*_compressor*.zarr | sort -h

In [None]:
a2 = zarr.array(arrayOut, compressor=compressor)
a2

In [None]:
store2048B = zarr.DirectoryStore('default2048.zarr')
source_group2048B = zarr.open(store2048B, mode='r')
print(dest_group2048.attrs[os.PROPERTY_NAME_OBJECTIVE_POWER])
print(source_group2048B.attrs[os.PROPERTY_NAME_OBJECTIVE_POWER])
print([scale for scale in dest_group2048.keys()])
print([scale for scale in source_group2048B.keys()])
print([source_group2048B[scale].shape for scale in source_group2048B.keys()])
objective = np.float32(source_group2048B.attrs[os.PROPERTY_NAME_OBJECTIVE_POWER])
estimated = np.array(objective / source_group2048B.attrs['level_downsamples'])
print(estimated)
chunk = source_group2048B[format(0)][0:2048, 0:2048]
print(chunk.shape)
print(chunk[...,:3].shape)
print([x for x in source_group2048B.attrs.keys()])

In [None]:
zarr.group?

In [None]:
zarr.DirectoryStore?