In [2]:
%load_ext autoreload
%autoreload 2

import os
from datetime import datetime
from loguru import logger
from pathlib import Path
import numpy as np
import spatialdata as sd
from napari_spatialdata import Interactive

from multiplex_pipeline.utils.config_loaders import load_analysis_settings
from multiplex_pipeline.processors import build_processor
from multiplex_pipeline.processors.controller import ResourceBuildingController

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


### Load analysis settings

In [3]:
# load analysis configuration
settings_path = r'C:\BLCA-7_Analysis\analysis_settings_BLCA7.yaml'

overwrite_mask = True

settings = load_analysis_settings(settings_path)

### Define the logger

In [4]:
log_file = settings.log_dir_path / f"cores_segmenation_{datetime.now():%Y-%m-%d_%H-%M-%S}.log"

logger.remove()
l = logger.add(lambda msg: print(msg, end=""))
l = logger.add(log_file, level="DEBUG", enqueue=True)

### Define cores for the analysis

In [5]:
core_dir = settings.analysis_dir / 'cores'
path_list = [core_dir / f for f in os.listdir(core_dir)]
path_list.sort()
path_list

[WindowsPath('C:/BLCA-7_Analysis/cores/Core_000.zarr')]

### Setup

In [7]:
# setup builders of additional data elements

if getattr(settings,'additional_elements',None):
    
    builders_list = []

    for builder_settings in settings.additional_elements:
        
        params = dict(getattr(builder_settings,'parameters',None)) or {}

        builder = build_processor(builder_settings.category, builder_settings.type, **params) 
        
        builder_controller = ResourceBuildingController(builder=builder, 
                                            input_names=builder_settings.input, 
                                            output_names=builder_settings.output, 
                                            keep=builder_settings.keep, 
                                            overwrite=True,
                                            pyramid_levels=settings.sdata_storage.max_pyramid_level,
                                            downscale = settings.sdata_storage.downscale,
                                            chunk_size = settings.sdata_storage.chunk_size,
                                            )
        
        logger.info(f"Image transformer of type '{builder_settings.type}' for image '{builder_settings.input}' has been created.")

        builders_list.append(builder_controller)

else:
    builders_list = []
    logger.info("No resource builders specified.")

2025-10-17 14:22:07.484 | INFO     | __main__:<module>:23 - Image transformer of type 'normalize' for image 'DAPI' has been created.
2025-10-17 14:22:07.484 | INFO     | __main__:<module>:23 - Image transformer of type 'normalize' for image 'CD45' has been created.
2025-10-17 14:22:07.484 | INFO     | __main__:<module>:23 - Image transformer of type 'normalize' for image 'CD44' has been created.
2025-10-17 14:22:07.485 | INFO     | __main__:<module>:23 - Image transformer of type 'normalize' for image 'HLA1' has been created.
2025-10-17 14:22:07.485 | INFO     | __main__:<module>:23 - Image transformer of type 'normalize' for image 'NaKATPase' has been created.
2025-10-17 14:22:07.485 | INFO     | __main__:<module>:23 - Image transformer of type 'normalize' for image 'CD11C' has been created.
2025-10-17 14:22:07.485 | INFO     | __main__:<module>:23 - Image transformer of type 'normalize' for image 'pCK26' has been created.
2025-10-17 14:22:07.485 | INFO     | __main__:<module>:23 - Im

### Processing

In [None]:
# # Optional - to detect problems early (runs <10s per sdata object)
# for sd_path in path_list:
    
#     logger.info(f"Validating {sd_path.name}")

#     # get sdata
#     sdata = sd.read_zarr(sd_path)

#     # check that the pipeline can run on provide sdata
#     settings.validate_pipeline(sdata)

version mismatch: detected: RasterFormatV02, requested: FormatV04
  compressor, fill_value = _kwargs_compat(compressor, fill_value, kwargs)
version mismatch: detected: RasterFormatV02, requested: FormatV04
version mismatch: detected: RasterFormatV02, requested: FormatV04
version mismatch: detected: RasterFormatV02, requested: FormatV04
version mismatch: detected: RasterFormatV02, requested: FormatV04
version mismatch: detected: RasterFormatV02, requested: FormatV04


2025-10-17 13:34:57.535 | INFO     | __main__:<module>:4 - Validating Core_000.zarr


version mismatch: detected: RasterFormatV02, requested: FormatV04
version mismatch: detected: RasterFormatV02, requested: FormatV04
version mismatch: detected: RasterFormatV02, requested: FormatV04


2025-10-17 13:34:58.191 | INFO     | multiplex_pipeline.utils.config_schema:validate_pipeline:184 - ✅ Pipeline validation successful.


In [9]:
for sd_path in path_list:
    
    logger.info(f"Processing {sd_path.name}")

    # get sdata
    sdata = sd.read_zarr(sd_path)

    # check that the pipeline can run on provide sdata
    settings.validate_pipeline(sdata)

    # run builders of additional elements
    for builder_controller in builders_list:
        sdata = builder_controller.run(sdata)

version mismatch: detected: RasterFormatV02, requested: FormatV04
  compressor, fill_value = _kwargs_compat(compressor, fill_value, kwargs)
version mismatch: detected: RasterFormatV02, requested: FormatV04
version mismatch: detected: RasterFormatV02, requested: FormatV04
version mismatch: detected: RasterFormatV02, requested: FormatV04
version mismatch: detected: RasterFormatV02, requested: FormatV04
version mismatch: detected: RasterFormatV02, requested: FormatV04


2025-10-17 14:24:07.933 | INFO     | __main__:<module>:3 - Processing Core_000.zarr


version mismatch: detected: RasterFormatV02, requested: FormatV04
version mismatch: detected: RasterFormatV02, requested: FormatV04
version mismatch: detected: RasterFormatV02, requested: FormatV04


2025-10-17 14:24:08.285 | INFO     | multiplex_pipeline.utils.config_schema:validate_pipeline:184 - ✅ Pipeline validation successful.
2025-10-17 14:24:08.285 | INFO     | multiplex_pipeline.processors.controller:validate_resolution_present:85 - All channels have required resolution level: 0
2025-10-17 14:24:09.025 | INFO     | multiplex_pipeline.processors.image_transformers:run:73 - Applied normalization (percentiles 1.0–99.5) → [98.0, 4311.0]
2025-10-17 14:24:09.026 | INFO     | multiplex_pipeline.processors.controller:run:208 - New element(s) '['DAPI_norm']' have been created.
2025-10-17 14:24:09.154 | INFO     | multiplex_pipeline.processors.controller:validate_resolution_present:85 - All channels have required resolution level: 0
2025-10-17 14:24:10.110 | INFO     | multiplex_pipeline.processors.image_transformers:run:73 - Applied normalization (percentiles 1.0–99.5) → [98.0, 338.0]
2025-10-17 14:24:10.111 | INFO     | multiplex_pipeline.processors.controller:run:208 - New element

  intersection = torch.sparse.mm(onehot1, onehot2.T).to_dense()


2025-10-17 14:24:40.985 | INFO     | multiplex_pipeline.processors.controller:run:208 - New element(s) '['instanseg_nucleus_org', 'instanseg_cell_org']' have been created.
2025-10-17 14:24:42.193 | INFO     | multiplex_pipeline.processors.controller:run:226 - Mask 'instanseg_nucleus_org' has been saved to disk.
2025-10-17 14:24:43.575 | INFO     | multiplex_pipeline.processors.controller:run:226 - Mask 'instanseg_cell_org' has been saved to disk.
2025-10-17 14:24:43.690 | INFO     | multiplex_pipeline.processors.controller:validate_resolution_present:85 - All channels have required resolution level: 0
2025-10-17 14:24:47.231 | INFO     | multiplex_pipeline.processors.controller:run:208 - New element(s) '['blob']' have been created.
2025-10-17 14:24:48.475 | INFO     | multiplex_pipeline.processors.controller:run:226 - Mask 'blob' has been saved to disk.
2025-10-17 14:24:48.509 | INFO     | multiplex_pipeline.processors.controller:validate_resolution_present:85 - All channels have requi

### Sneak peek

In [26]:
# refresh the object
sdata_org = sd.read_zarr(path_list[0])
sdata_org

version mismatch: detected: RasterFormatV02, requested: FormatV04
  compressor, fill_value = _kwargs_compat(compressor, fill_value, kwargs)


SpatialData object, with associated Zarr store: C:\BLCA-2_Analysis_todel\cores\Core_000.zarr
├── Images
│     └── 'DAPI': DataTree[cyx] (1, 5696, 5568), (1, 2848, 2784)
├── Labels
│     ├── 'blob': DataTree[yx] (5696, 5568), (2848, 2784), (1424, 1392)
│     ├── 'cytoplasm': DataTree[yx] (5696, 5568), (2848, 2784), (1424, 1392)
│     ├── 'instanseg_cell': DataTree[yx] (5696, 5568), (2848, 2784), (1424, 1392)
│     ├── 'instanseg_nucleus': DataTree[yx] (5696, 5568), (2848, 2784), (1424, 1392)
│     └── 'ring': DataTree[yx] (5696, 5568), (2848, 2784), (1424, 1392)
└── Tables
      ├── 'instanseg_data': AnnData (11394, 6)
      └── 'instanseg_table': AnnData (11394, 4)
with coordinate systems:
    ▸ 'global', with elements:
        DAPI (Images), blob (Labels), cytoplasm (Labels), instanseg_cell (Labels), instanseg_nucleus (Labels), ring (Labels)

In [None]:
Interactive(sdata_org)

<napari_spatialdata._interactive.Interactive at 0x20d16ba0810>

In [10]:
sdata

SpatialData object, with associated Zarr store: C:\BLCA-7_Analysis\cores\Core_000.zarr
├── Images
│     ├── 'CD11C': DataTree[cyx] (1, 4992, 5312), (1, 2496, 2656), (1, 1248, 1328)
│     ├── 'CD11C_norm': DataTree[cyx] (1, 4992, 5312), (1, 2496, 2656), (1, 1248, 1328)
│     ├── 'CD44': DataTree[cyx] (1, 4992, 5312), (1, 2496, 2656), (1, 1248, 1328)
│     ├── 'CD44_norm': DataTree[cyx] (1, 4992, 5312), (1, 2496, 2656), (1, 1248, 1328)
│     ├── 'CD45': DataTree[cyx] (1, 4992, 5312), (1, 2496, 2656), (1, 1248, 1328)
│     ├── 'CD45_norm': DataTree[cyx] (1, 4992, 5312), (1, 2496, 2656), (1, 1248, 1328)
│     ├── 'DAPI': DataTree[cyx] (1, 4992, 5312), (1, 2496, 2656), (1, 1248, 1328)
│     ├── 'DAPI_norm': DataTree[cyx] (1, 4992, 5312), (1, 2496, 2656), (1, 1248, 1328)
│     ├── 'HES1': DataTree[cyx] (1, 4992, 5312), (1, 2496, 2656), (1, 1248, 1328)
│     ├── 'HES1_norm': DataTree[cyx] (1, 4992, 5312), (1, 2496, 2656), (1, 1248, 1328)
│     ├── 'HLA1': DataTree[cyx] (1, 4992, 5312), (1, 24