In [2]:
%load_ext autoreload
%autoreload 2

import os
from datetime import datetime
from tifffile import imread, imwrite
import numpy as np
import pandas as pd
from skimage.draw import polygon
from pathlib import Path
from loguru import logger
from IPython.display import clear_output

from multiplex_pipeline.utils.config_loaders import load_analysis_settings
from multiplex_pipeline.core_cutting.channel_scanner import discover_channels, build_transfer_map
from multiplex_pipeline.core_cutting.controller import CorePreparationController
from multiplex_pipeline.core_cutting.file_io import GlobusFileStrategy
from multiplex_pipeline.utils.globus_utils import GlobusConfig, create_globus_tc
from multiplex_pipeline.utils.file_utils import GlobusPathConverter

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [3]:
# define pathways
settings_path = r'C:\BLCA-2_Analysis_todel\analysis_settings_BLCA2_todel.yaml'

globus_config_path = r'D:\globus_config\globus_config.yaml'

### Load analysis settings

In [4]:
# load analysis configuration
settings = load_analysis_settings(settings_path)
settings

{'image_dir': 'R:/CellDive/BLCA-2/BLCA-2_Final',
 'analysis_name': 'BLCA-2_Analysis_todel',
 'local_analysis_dir': 'C:/',
 'remote_analysis_dir': '/ix1/kkedziora/blca_analysis',
 'log_dir': WindowsPath('C:/BLCA-2_Analysis_todel/logs'),
 'detection_image': 'BLCA-2_1.0.4_R000_DAPI__FINAL_F.ome.tif',
 'core_info_file_path': WindowsPath('C:/BLCA-2_Analysis_todel/cores.csv'),
 'cores_dir_tif': WindowsPath('C:/BLCA-2_Analysis_todel/temp'),
 'cores_dir_output': WindowsPath('C:/BLCA-2_Analysis_todel/cores'),
 'include_channels': None,
 'exclude_channels': ['008_ECad'],
 'use_markers': 'DAPI',
 'ignore_markers': ['Antibody1',
  'TNFa',
  'Snail1',
  'SKP2',
  'ProgRc',
  'Plk1',
  'PH3',
  'PDL1',
  'p65',
  'p130',
  'p-p130',
  'p-Cdc6',
  'LAG3',
  'IL-8',
  'HER2',
  'ERa',
  'EpCAM',
  'E2F1',
  'cycD3',
  'cycB2',
  'CDC25C',
  'CD86',
  'CD73',
  'CD69',
  'CD62L',
  'CD56',
  'CD4',
  'CD25',
  'CD19',
  'CD27',
  'CCR7',
  'cCASP3'],
 'additional_elements': [{'category': 'image_transfo

### Define the logger

In [5]:
log_file = settings['log_dir'] / f"cores_cutting_{datetime.now():%Y-%m-%d_%H-%M-%S}.log"

logger.remove()
logger.add(lambda msg: print(msg, end=""))
logger.add(log_file, level="DEBUG", enqueue=True)

2

### Set up Globus

In [6]:
# get globus config
gc = GlobusConfig.from_config_files(globus_config_path, from_collection = 'r_collection_id', to_collection = 'cbi_collection_id')
tc = create_globus_tc(gc.client_id, gc.transfer_tokens)

In [7]:
# if Windows paths change to Globus
image_path = settings['image_dir']
if ":/" in settings['image_dir'] or ":\\" in settings['image_dir']:
    conv = GlobusPathConverter(layout="single_drive")
    image_path = conv.windows_to_globus(image_path)

image_path

'/CellDive/BLCA-2/BLCA-2_Final'

In [8]:
channel_map = discover_channels(image_path, 
                                include_channels=settings['include_channels'], 
                                exclude_channels=settings['exclude_channels'], 
                                use_markers=settings.get('use_markers'), 
                                ignore_markers=settings.get('ignore_markers'),
                                gc=gc)

2025-10-15 14:17:08.667 | INFO     | multiplex_pipeline.core_cutting.channel_scanner:scan_channels_from_list:84 - Discovered 131 channels:
2025-10-15 14:17:08.668 | INFO     | multiplex_pipeline.core_cutting.channel_scanner:scan_channels_from_list:86 - 001_CDC25C <- /CellDive/BLCA-2/BLCA-2_Final/BLCA-2_1.0.4_R000_FITC_CDC25C-AF488_FINAL_AFR_F.ome.tif
2025-10-15 14:17:08.668 | INFO     | multiplex_pipeline.core_cutting.channel_scanner:scan_channels_from_list:86 - 001_DAPI <- /CellDive/BLCA-2/BLCA-2_Final/BLCA-2_1.0.4_R000_DAPI__FINAL_F.ome.tif
2025-10-15 14:17:08.668 | INFO     | multiplex_pipeline.core_cutting.channel_scanner:scan_channels_from_list:86 - 001_cycD3 <- /CellDive/BLCA-2/BLCA-2_Final/BLCA-2_1.0.4_R000_Cy7_cycD3-AF750_FINAL_AFR_F.ome.tif
2025-10-15 14:17:08.668 | INFO     | multiplex_pipeline.core_cutting.channel_scanner:scan_channels_from_list:86 - 001_pH2AX <- /CellDive/BLCA-2/BLCA-2_Final/BLCA-2_1.0.4_R000_Cy3_pH2AX-AF555_FINAL_AFR_F.ome.tif
2025-10-15 14:17:08.668 | INF

In [9]:
df_path = settings['core_info_file_path'].with_suffix('.pkl')

df = pd.read_pickle(df_path)
df.head()

Unnamed: 0,core_name,row_start,row_stop,column_start,column_stop,poly_type,polygon_vertices
0,Core_000,832.0,6528.0,7552.0,13120.0,rectangle,"[[832.0, 13120.0], [6528.0, 13120.0], [6528.0,..."
1,Core_001,256.0,6144.0,27520.0,33408.0,rectangle,"[[256.0, 33408.0], [6144.0, 33408.0], [6144.0,..."
2,Core_002,384.0,6208.0,34496.0,40384.0,rectangle,"[[384.0, 40384.0], [6208.0, 40384.0], [6208.0,..."
3,Core_003,768.0,6336.0,41408.0,46976.0,rectangle,"[[768.0, 46976.0], [6336.0, 46976.0], [6336.0,..."
4,Core_004,1088.0,6272.0,14336.0,19840.0,rectangle,"[[1088.0, 19840.0], [6272.0, 19840.0], [6272.0..."


In [10]:
# build transfer map
transfer_cache_dir = settings['temp_dir']
transfer_map = build_transfer_map(channel_map, transfer_cache_dir)

# build a dict for transfered images
image_paths = {
    ch: str(Path(transfer_cache_dir) / Path(remote).name)
    for ch, (remote, _) in transfer_map.items()
}
image_paths

{'DAPI': 'C:\\BLCA-2_Analysis_todel\\temp\\BLCA-2_1.0.4_R000_DAPI__FINAL_F.ome.tif'}

In [11]:
transfer_map

{'DAPI': ('/CellDive/BLCA-2/BLCA-2_Final/BLCA-2_1.0.4_R000_DAPI__FINAL_F.ome.tif',
  '/C/BLCA-2_Analysis_todel/temp/BLCA-2_1.0.4_R000_DAPI__FINAL_F.ome.tif')}

In [None]:
strategy = GlobusFileStrategy(tc=tc, transfer_map=transfer_map, gc=gc, cleanup_enabled = True) # submits transfers when initialized, should it stay like this?

controller = CorePreparationController(
    metadata_df = df[1:2],
    image_paths = image_paths,
    temp_dir = settings['cores_dir_tif'],
    output_dir = settings['cores_dir_output'],
    file_strategy = strategy,
    margin = settings['core_cutting']['margin'],
    mask_value = settings['core_cutting']['mask_value'],
    max_pyramid_levels = settings['sdata_storage']['max_pyramid_level'],
    chunk_size = settings['sdata_storage']['chunk_size'],
    downscale = settings['sdata_storage']['downscale'],
)

controller.run()

2025-10-15 14:17:19.958 | INFO     | multiplex_pipeline.core_cutting.file_io:submit_all_transfers:91 - Submitted transfer for DAPI to /C/BLCA-2_Analysis_todel/temp/BLCA-2_1.0.4_R000_DAPI__FINAL_F.ome.tif (task_id=2d38c8cd-a9f3-11f0-904e-0e092d85c59b)


KeyError: 'chunk_size'