## Exercise 2: Working with Zarr Arrays in Dask

In [1]:
import dask.array as da
import os

data_path = r'/archive/shared/MIL/2d.zarr/'
os.listdir(data_path)

FileNotFoundError: [Errno 2] No such file or directory: '/archive/shared/MIL/2d.zarr/'

In [22]:
import zarr

# Open as zarr group to inspect structure
zarr_group = zarr.open(data_path, mode='r')
print("Top level components:", list(zarr_group.keys()))

# Check what's inside the '0' component
if '0' in zarr_group:
    level_0 = zarr_group['0']
    print("Level 0 type:", type(level_0))
    if hasattr(level_0, 'keys'):
        print("Level 0 contents:", list(level_0.keys()))
    else:
        print("Level 0 is an array with shape:", level_0.shape)

arr = da.from_zarr(data_path, component='0/0')
print("Data array created from Zarr component '0/0'")

Top level components: ['OME', '0']
Level 0 type: <class 'zarr.core.group.Group'>
Level 0 contents: ['7', '6', '3', '5', '2', '4', '1', '0', '8']
Data array created from Zarr component '0/0'


In [28]:
# Inspecting Array Structure
print(f"Shape: {arr.shape}")           # (1, 46, 1, 48960, 32640)
print(f"Chunks: {arr.chunks}")         #
print(f"Dtype: {arr.dtype}")           # uint16
print(f"Size: {arr.nbytes / 1e9:.2f} GB")  # 147 GB

t, c, z, y, x = arr.shape
print(f"Dimensions: Time: {t}, Channels: {c}, Z-slices: {z}, Height: {y}, Width: {x}")

Shape: (1, 46, 1, 48960, 32640)
Chunks: ((1,), (1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), (1,), (1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 832), (1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 896))
Dtype: >u2
Size: 147.02 GB
Dimensions: Time: 1, Channels: 46, Z-slices: 1, Height: 48960, Width: 32640


In [27]:
# Rechunk for different access patterns
spatial_chunks = arr.rechunk((1, 1, 256, 256, 256))      # Spatial operations

Shape: (1, 46, 1, 48960, 32640)
Chunks: ((1,), (1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), (1,), (1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 832), (1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 896))
Dtype: >u2
Size: 147.02 GB
Dimensions: Time: 1, Channels: 46, Z-slices: 1, Height: 48960, Width: 32640


In [None]:
temporal_chunks = arr.rechunk((-1, c, z, 256, 256))   # Time-series analysis

In [None]:
balanced_chunks = arr.rechunk((t, 6, z, 256, 256))    # Balanced approach

In [25]:
# Save rechunked arrays to Zarr
balanced_chunks.to_zarr('optimized.zarr')