# Cut Whole Slide Images into SpatialData objects 
# (with Globus sourcing)

In [1]:
%load_ext autoreload
%autoreload 2

from datetime import datetime
from pathlib import Path

import pandas as pd
from loguru import logger

from plex_pipe.utils.config_loaders import load_analysis_settings
from plex_pipe.utils.globus_utils import GlobusConfig
from plex_pipe.core_cutting.input_strategy import GlobusFileStrategy
from plex_pipe.core_cutting.controller import CorePreparationController

## Read in config

In [2]:
# load analysis configuration
config_path = Path.cwd().parents[1] / "examples/example_pipeline_config_globus.yaml"

config = load_analysis_settings(config_path)



In [None]:
# get globus config
globus_config_path = Path.cwd().parents[1] / "examples/example_pipeline_config_globus.yaml"
from_collection_id = "r_collection_id"
to_collection_id = "cbi_collection_id"

gc = GlobusConfig.from_yaml(globus_config_path, 
                            source_key = from_collection_id, 
                            dest_key = to_collection_id
                            )

## Define the logger

In [4]:
log_file = config.log_dir_path / f"rois_cutting_{datetime.now():%Y-%m-%d_%H-%M-%S}.log"

logger.remove()
l = logger.add(lambda msg: print(msg, end=""))
l = logger.add(log_file, level="DEBUG", enqueue=True)

## Define ROIs for processing

In [5]:
df_path = Path.cwd().parents[0] / config.roi_info_file_path

df = pd.read_pickle(df_path)
df.head()

Unnamed: 0,roi_name,row_start,row_stop,column_start,column_stop,poly_type,polygon_vertices
0,ROI_000,256.0,5056.0,256.0,5120.0,rectangle,"[[5056.0, 5120.0], [5056.0, 256.0], [256.0, 25..."
1,ROI_001,128.0,4992.0,6912.0,11776.0,rectangle,"[[4992.0, 11776.0], [4992.0, 6912.0], [128.0, ..."


## Discover and choose marker files based on config

In [6]:
# create a strategy to source files
strategy = GlobusFileStrategy(config = config, gc=gc, cleanup_enabled=False)

# strategy holds the transfer map for Globus
print('\n TRANSFER MAP: \n')
print(strategy.transfer_map)

# activate transfer(s)
strategy.submit_all_transfers(batch_size=1)

2026-02-19 11:48:40.664 | INFO     | plex_pipe.core_cutting.channel_scanner:scan_channels_from_list:73 - Discovered 131 channels:
2026-02-19 11:48:40.664 | INFO     | plex_pipe.core_cutting.channel_scanner:scan_channels_from_list:75 - 001_CDC25C <- /CellDive/BLCA-1A/BLCA-1A_Final/BLCA-1A_1.0.4_R000_FITC_CDC25C-AF488_FINAL_AFR_F.ome.tif
2026-02-19 11:48:40.664 | INFO     | plex_pipe.core_cutting.channel_scanner:scan_channels_from_list:75 - 001_DAPI <- /CellDive/BLCA-1A/BLCA-1A_Final/BLCA-1A_1.0.4_R000_DAPI__FINAL_F.ome.tif
2026-02-19 11:48:40.664 | INFO     | plex_pipe.core_cutting.channel_scanner:scan_channels_from_list:75 - 001_cycD3 <- /CellDive/BLCA-1A/BLCA-1A_Final/BLCA-1A_1.0.4_R000_Cy7_cycD3-AF750_FINAL_AFR_F.ome.tif
2026-02-19 11:48:40.664 | INFO     | plex_pipe.core_cutting.channel_scanner:scan_channels_from_list:75 - 001_pH2AX <- /CellDive/BLCA-1A/BLCA-1A_Final/BLCA-1A_1.0.4_R000_Cy3_pH2AX-AF555_FINAL_AFR_F.ome.tif
2026-02-19 11:48:40.664 | INFO     | plex_pipe.core_cutting.ch

In [7]:
controller = CorePreparationController(
    metadata_df = df,
    file_strategy = strategy,
    temp_dir = config.roi_dir_tif_path,
    output_dir = config.roi_dir_output_path,
    margin = config.roi_cutting.margin,
    mask_value = config.roi_cutting.mask_value,
    max_pyramid_levels = config.sdata_storage.max_pyramid_level,
    chunk_size = config.sdata_storage.chunk_size,
    downscale = config.sdata_storage.downscale,
    temp_roi_delete=True,
)

controller.run()

2026-02-19 11:48:47.342 | INFO     | plex_pipe.core_cutting.controller:run:98 - Starting ROI preparation controller...
2026-02-19 11:50:19.581 | INFO     | plex_pipe.core_cutting.controller:run:103 - Channel DAPI ready. Starting cutting...
2026-02-19 11:50:20.704 | DEBUG    | plex_pipe.core_cutting.controller:cut_channel:84 - Cut and saved ROI ROI_000, channel DAPI.
2026-02-19 11:50:21.038 | DEBUG    | plex_pipe.core_cutting.controller:cut_channel:84 - Cut and saved ROI ROI_001, channel DAPI.
2026-02-19 11:50:21.040 | DEBUG    | plex_pipe.core_cutting.controller:cut_channel:89 - Closed file handle for channel DAPI.
2026-02-19 11:50:21.041 | INFO     | plex_pipe.core_cutting.input_strategy:cleanup:160 - Skipping cleanup for D:\plex-pipe\examples\output\sample_analysis_globus\temp\BLCA-1A_1.0.4_R000_DAPI__FINAL_F.ome.tif; cleanup is disabled.
2026-02-19 11:50:21.041 | INFO     | plex_pipe.core_cutting.controller:run:110 - All channels processed. Starting assembly...


  da_delayed = da.to_zarr(


2026-02-19 11:50:22.192 | INFO     | plex_pipe.core_cutting.assembler:assemble_core:112 - ROI 'ROI_000' assembled with channels: ['DAPI']
2026-02-19 11:50:22.210 | DEBUG    | plex_pipe.core_cutting.assembler:_cleanup_core_files:130 - Deleted intermediate TIFF: D:\plex-pipe\examples\output\sample_analysis_globus\temp\ROI_000\DAPI.tiff


  da_delayed = da.to_zarr(


2026-02-19 11:50:23.127 | INFO     | plex_pipe.core_cutting.assembler:assemble_core:112 - ROI 'ROI_001' assembled with channels: ['DAPI']
2026-02-19 11:50:23.137 | DEBUG    | plex_pipe.core_cutting.assembler:_cleanup_core_files:130 - Deleted intermediate TIFF: D:\plex-pipe\examples\output\sample_analysis_globus\temp\ROI_001\DAPI.tiff
2026-02-19 11:50:23.140 | INFO     | plex_pipe.core_cutting.controller:run:117 - All cores assembled. Controller run complete.
