In [11]:

# import xarray as xr
from pathlib import Path
import numpy as np
import zarr

class Data:
    def __init__(self, zarr_path):
        self._zarr_path = Path(zarr_path)
        self._ds = None
        self._loaded = False

        self._ds = self.dataset

    @property
    def zarr_path(self):
        return self._zarr_path

    @zarr_path.setter
    def zarr_path(self, new_path):
        self._zarr_path = Path(new_path)
        self._loaded = False  # force reload on next access

    @property
    def dataset(self):
        """Lazily load the dataset if not already loaded."""
        if not self._zarr_path.exists():
            self._zarr_path.mkdir(parents=True, exist_ok=True)
            zarr.open(self._zarr_path, mode='w')  # Initialize an empty Zarr dataset
        if not self._loaded:
            self._ds = zarr.open(self._zarr_path, mode='a')  # Open in append mode for read/write access
            self._loaded = True
        return self._ds

    def __get__(self, instance, value):
        # Retrieve the value from the Zarr dataset
        return self.dataset.get(value, None)
    
    def __set__(self, instance, value):
        # Store the value in the Zarr dataset
        self.dataset[instance] = value






In [12]:
zimg = Data("zarr_output/big_image_stack.zarr")

zimg.metadata = {"project": "neuro123", "date": "2025-04-27"}
print(zimg.metadata)

# Generate a fake image and store it in the dataset
fake_image = np.random.rand(100, 100)  # Create a 100x100 random image
zimg.images = fake_image[np.newaxis, ...]  # Add a new time dimension and store it

# Get a slice of images
slice_stack = zimg.images[0]

# Compute mean and save
zimg.mean = slice_stack.mean(axis=1)

{'project': 'neuro123', 'date': '2025-04-27'}


In [13]:
from concurrent.futures import ThreadPoolExecutor
import numpy as np

# Function to simulate parallel access to the Data class
def parallel_task(zimg, task_id):
    # Each task writes and reads data
    zimg.dataset[f"task_{task_id}_data"] = np.random.rand(10, 10)
    retrieved_data = zimg.dataset[f"task_{task_id}_data"]
    return f"Task {task_id} completed. Data shape: {retrieved_data.shape}"

# Create a ThreadPoolExecutor to test parallel access
with ThreadPoolExecutor(max_workers=4) as executor:
    # Submit multiple tasks to the executor
    futures = [executor.submit(parallel_task, zimg, i) for i in range(4)]

    # Collect and print results
    for future in futures:
        print(future.result())

Task 0 completed. Data shape: (10, 10)
Task 1 completed. Data shape: (10, 10)
Task 2 completed. Data shape: (10, 10)
Task 3 completed. Data shape: (10, 10)


In [14]:
# Function to perform computation on a slice of the image and save the result
def process_and_save(zimg, image_slice, index):
    # Perform some computation (e.g., compute the square of the slice)
    processed_slice = np.square(image_slice)
    # Save the processed slice to the Zarr dataset
    zimg.dataset[f"processed_slice_{index}"] = processed_slice
    return f"Processed slice {index} saved. Shape: {processed_slice.shape}"

# Split the fake_image into chunks for parallel processing
num_chunks = 4
image_chunks = np.array_split(fake_image, num_chunks, axis=0)

# Use ThreadPoolExecutor to process and save each chunk in parallel
with ThreadPoolExecutor(max_workers=num_chunks) as executor:
    futures = [
        executor.submit(process_and_save, zimg, chunk, i)
        for i, chunk in enumerate(image_chunks)
    ]

    # Collect and print results
    for future in futures:
        print(future.result())

Processed slice 0 saved. Shape: (25, 100)
Processed slice 1 saved. Shape: (25, 100)
Processed slice 2 saved. Shape: (25, 100)
Processed slice 3 saved. Shape: (25, 100)


In [15]:
# Function to scale a chunk and save it to the Zarr dataset
def scale_and_save(zimg, chunk, index):
    scaled_chunk = chunk * 1000
    zimg.dataset[f"scaled_slice_{index}"] = scaled_chunk
    return f"Scaled slice {index} saved. Shape: {scaled_chunk.shape}"

# Use ThreadPoolExecutor to process and save each chunk in parallel
with ThreadPoolExecutor(max_workers=num_chunks) as executor:
    futures = [
        executor.submit(scale_and_save, zimg, chunk, i)
        for i, chunk in enumerate(image_chunks)
    ]

    # Collect and print results
    for future in futures:
        print(future.result())

Scaled slice 0 saved. Shape: (25, 100)
Scaled slice 1 saved. Shape: (25, 100)
Scaled slice 2 saved. Shape: (25, 100)
Scaled slice 3 saved. Shape: (25, 100)


In [16]:
# Example dictionary of masks
masks = {
    "nuc_mask": np.random.randint(0, 2, size=(100, 100)),  # Binary mask for nucleus
    "cyto_mask": np.random.randint(0, 2, size=(100, 100))  # Binary mask for cytoplasm
}

# Save masks to the Zarr dataset
for mask_name, mask_array in masks.items():
    zimg.dataset[f"mask_{mask_name}"] = mask_array

# Test the saved masks
for mask_name in masks.keys():
    saved_mask = zimg.dataset[f"mask_{mask_name}"][:]
    print(f"Saved Mask: {mask_name}")
    print(f"Shape: {saved_mask.shape}")
    print(f"Unique values: {np.unique(saved_mask)}")  # Should be 0 and 1 for binary masks
    print()

Saved Mask: nuc_mask
Shape: (100, 100)
Unique values: [0 1]

Saved Mask: cyto_mask
Shape: (100, 100)
Unique values: [0 1]

