In [1]:

import os
import glob
from pathlib import Path
import shutil

import numpy as np
import pandas as pd
from tqdm import tqdm
# from unittest.mock import patch

from lib.utils.io_utils import get_tif_list
from lib.fstack import stack_cyc
from lib.cidre import cidre_correct, cidre_walk
from lib.register import register_meta
from lib.stitch import patch_tiles
from lib.stitch import template_stitch
from lib.stitch import stitch_offset

from lib.register import register_manual
from lib.stitch import stitch_manual
from lib.os_snippets import try_mkdir
    
from skimage.io import imread
from skimage.io import imsave
from skimage.util import img_as_uint

c:\Users\Mingchuan\anaconda3\envs\cell-typing\lib\site-packages\numpy\.libs\libopenblas.EL2C6PLE4ZYW3ECEVIV3OXXGRN2NRFM2.gfortran-win_amd64.dll
c:\Users\Mingchuan\anaconda3\envs\cell-typing\lib\site-packages\numpy\.libs\libopenblas64__v0.3.21-gcc_10_3_0.dll


In [2]:
from skimage.transform import resize


def resize_pad(img, size):
    img_resized = resize(img, size, anti_aliasing=True)
    img_padded = np.zeros(img.shape)
    y_start, x_start = (img.shape[0] - size[0]) // 2, (img.shape[1] - size[1]) // 2
    img_padded[y_start:y_start+size[0], x_start:x_start+size[1]] = img_resized
    img_padded = img_as_uint(img_padded)
    return img_padded


def resize_dir(in_dir, out_dir, chn):
    Path(out_dir).mkdir(exist_ok=True)
    chn_sizes = {'cy3': 2302, 'TxRed': 2303, 'FAM': 2301, 'DAPI': 2300}
    size = chn_sizes[chn]
    im_list = list(Path(in_dir).glob(f'*.tif'))
    for im_path in tqdm(im_list, desc=Path(in_dir).name):
        im = imread(im_path)
        im = resize_pad(im, (size, size))
        imsave(Path(out_dir)/im_path.name, im, check_contrast=False)


def resize_batch(in_dir, out_dir):
    try_mkdir(out_dir)
    cyc_paths = list(Path(in_dir).glob('cyc_*_*'))
    for cyc_path in cyc_paths:
        chn = cyc_path.name.split('_')[-1]
        if chn == 'cy5':
            shutil.copytree(cyc_path, Path(out_dir)/cyc_path.name)
        else:
            resize_dir(cyc_path, Path(out_dir)/cyc_path.name, chn)

In [3]:
SRC_DIR = Path(r'E:\TMC\spatial_data\raw')
BASE_DIR = Path(r'E:\TMC\spatial_data\processed')
RUN_ID = '20240418_100um_Gel_Actb_test2_new_sample'
src_dir = SRC_DIR / RUN_ID
dest_dir = BASE_DIR / f'{RUN_ID}_processed'

# 2D workflow
aif_dir = dest_dir / 'focal_stacked'
sdc_dir = dest_dir / 'background_corrected'
rgs_dir = dest_dir / 'registered'
stc_dir = dest_dir / 'stitched'
rsz_dir = dest_dir / 'resized'

# 3D workflow
cid_dir = dest_dir / 'cidre'
air_dir = dest_dir / 'airlocalize_stack'

# process2D

In [4]:
def process_2d():
    # raw_cyc_list = list(src_dir.glob('cyc_*'))
    # for cyc in raw_cyc_list:
    #   cyc_num = int(cyc.name.split('_')[1])
    #   stack_cyc(src_dir, aif_dir, cyc_num)

    cidre_walk(str(aif_dir), str(sdc_dir))

    rgs_dir.mkdir(exist_ok=True)
    ref_cyc = 1
    ref_chn = 'cy3'
    ref_chn_1 = 'cy5'
    ref_dir = sdc_dir / f'cyc_{ref_cyc}_{ref_chn}'
    im_names = get_tif_list(ref_dir)

    meta_df = register_meta(str(sdc_dir), str(rgs_dir), ['cy3', 'cy5'], im_names, ref_cyc, ref_chn)
    meta_df.to_csv(rgs_dir / 'integer_offsets.csv')

    # register_manual(rgs_dir/'cyc_1_cy3', sdc_dir/'cyc_1_cy5', rgs_dir/'cyc_1_cy5') #
    register_manual(rgs_dir/'cyc_1_cy3', sdc_dir / 'cyc_1_FAM', rgs_dir/'cyc_1_FAM')
    register_manual(rgs_dir/'cyc_1_cy3', sdc_dir / 'cyc_1_TxRed', rgs_dir/'cyc_1_TxRed')
    register_manual(rgs_dir/'cyc_1_cy3', sdc_dir/'cyc_1_DAPI', rgs_dir/'cyc_1_DAPI')  # 0103 revised! Please remove this !
    
    patch_tiles(rgs_dir/f'cyc_{ref_cyc}_{ref_chn}', 28 * 22)

    resize_batch(rgs_dir, rsz_dir)

    stc_dir.mkdir(exist_ok=True)
    template_stitch(rsz_dir/f'cyc_{ref_cyc}_{ref_chn_1}', stc_dir, 28, 22)
    
    # offset_df = pd.read_csv(rgs_dir / 'integer_offsets.csv', index_col=0)
    # stitch_offset(rgs_dir, stc_dir, offset_df)

# process3D

In [5]:
# Define your per-slice and per-stack programs
def process_slice(slice_2d, channel): 
    if channel != 'cy5':
        # resize and pad the slice
        chn_sizes = {'cy3': 2302, 'txred': 2303, 'fam': 2301, 'dapi': 2300}
        size = chn_sizes[channel]
        slice_2d = resize_pad(slice_2d, (size, size))
    return slice_2d  # Placeholder

# Adjust shift_correction
def shift_correction(signal_df, shift_df, meta_df, tile, cyc, ref_cyc=1):
    adjusted_signals = []
    file = f'FocalStack_{tile:03d}.tif'
    for _, signal_row in signal_df.iterrows():    
        local_x, local_y = signal_row['x_in_pix'], signal_row['y_in_pix']

        # Apply shift if not reference cycle
        if cyc != ref_cyc:
            shift_entry = shift_df.loc[cyc, file]  # Assuming shift_df is indexed by cycle and file
            y_shift, x_shift = map(int, shift_entry.split(' '))
            current_x = local_x + x_shift
            current_y = local_y + y_shift
        else: current_x, currenty = local_x, local_y

        adjusted_signals.append((current_x, current_y))

    xy_adjusted = pd.DataFrame(adjusted_signals, columns=['x_in_pix', 'y_in_pix'])
    signal_df[['x_in_pix', 'y_in_pix']] = xy_adjusted[['x_in_pix', 'y_in_pix']]
    
    return signal_df

def stitch_3d(signal_df, meta_df, tile):
    adjusted_signals = []
    file = f'FocalStack_{tile:03d}.tif'
    # Find the metadata row for this file to get its global position
    for _, signal_row in signal_df.iterrows(): 
        meta_row = meta_df.loc[meta_df['file'] == file].iloc[0]
        local_x, local_y = signal_row['x_in_pix'], signal_row['y_in_pix']
        global_x_start, global_y_start = meta_row['x'], meta_row['y']
        global_x = global_x_start + local_x
        global_y = global_y_start + local_y
        adjusted_signals.append((global_x, global_y))

    xy_adjusted = pd.DataFrame(adjusted_signals, columns=['x_in_pix', 'y_in_pix'])
    signal_df[['x_in_pix', 'y_in_pix']] = xy_adjusted[['x_in_pix', 'y_in_pix']]
    return signal_df

In [6]:
import sys
import re
from collections import defaultdict
from zmq import CHANNEL

import tifffile
from lib.stitch import read_meta
from lib.AIRLOCALIZE.airlocalize import airlocalize


extract_points_cycle = ['C001']
CHANNELS = ['cy3', 'cy5', 'fam', 'txred']

def process_3d():

    # generate corrected 3d image of each tile
    # cidre_correct(str(src_dir), str(cid_dir))
    stack_name = dict()
    file_groups = defaultdict(list)
    for file_path in glob.glob(os.path.join(src_dir, '*.tif')):
        filename = os.path.basename(file_path)
        parts = filename.split('-')
        cycle, tile, channel = parts[0], parts[1], parts[2]
        z_index = int(filename.split('Z')[-1].split('.')[0])
        file_groups[(cycle, tile, channel)].append((z_index, file_path))
        if tile in stack_name: stack_name[tile].append(cycle)
        else: stack_name[tile] = [cycle]

    stack_name = {key: sorted(value, key=lambda x: int(x[1:])) for key, value in stack_name.items()}
    file_groups = {k: sorted(v) for k, v in file_groups.items()}  # Sort by Z index within each group

    for (cycle, tile, channel), files in tqdm(file_groups.items(), desc='Processing stacks'):
        stack = np.array([process_slice(imread(file_path), channel) for _, file_path in files])
        os.makedirs(air_dir / tile / cycle, exist_ok=True)
        imsave(air_dir / tile / cycle / f"{channel.lower()}.tif", stack)


    # extract spot candidates from cyc1-4
    for tile in tqdm(stack_name.keys(), desc='Detecting candidate points'):
        for cycle in stack_name[tile]:
            if cycle in extract_points_cycle: 
                tile_cycle_dir = air_dir  / tile / cycle
                # perform airlocalization
                airlocalize(parameters_filename='Image_process/lib/AIRLOCALIZE/parameters.yaml', 
                            default_parameters='Image_process/lib/AIRLOCALIZE/parameters_default.yaml',
                            update={'dataFileName': tile_cycle_dir, 'saveDirName': tile_cycle_dir, 'verbose':False, 'multiChannelCandidates': True})
                
                spots_file = [_ for _ in os.listdir(tile_cycle_dir) if _.endswith('spots.csv')]
                if 'intensity_local.csv' in os.listdir(air_dir/tile):
                    df = pd.read_csv(air_dir / tile / 'intensity_local.csv')
                    df = pd.concat([df] + [pd.read_csv(tile_cycle_dir / file) for file in spots_file], axis=1)
                else: df = pd.concat([pd.read_csv(tile_cycle_dir / file) for file in spots_file], axis=1)
                df.to_csv(air_dir / tile / 'intensity_local.csv', index=False)


    # # multi-channel read
    # shift_df = pd.read_csv(rgs_dir / 'integer_offsets.csv', index_col=0)

    # for tile in tqdm(stack_name.keys(), desc='Reading spots'):
    #     combined_candidates = pd.read_csv(air_dir / tile / 'combined_candidates.csv')
    #     intensity_read = combined_candidates[['z_in_pix', 'x_in_pix', 'y_in_pix']].round().astype(np.uint16).drop_duplicates()
    #     intensity_read = intensity_read.reset_index()

    #     for cycle in stack_name[tile]:
    #         with tifffile.TiffFile(air_dir / tile / 'cy3.tif') as tif:
    #             shape = tif.series[0].shape

    #         coordinates = intensity_read[['z_in_pix', 'x_in_pix', 'y_in_pix']]
    #         coordinates = shift_correction(coordinates, shift_df, cyc=int(cycle[1:]), ref_cyc=1)
    #         coordinates = coordinates[
    #             (0 <= coordinates['x_in_pix'] < shape[1]) &
    #             (0 <= coordinates['y_in_pix'] < shape[2]) &
    #             (0 <= coordinates['z_in_pix'] < shape[0]) ]
    #         z_coords = coordinates['z_in_pix'].to_numpy()
    #         y_coords = coordinates['y_in_pix'].to_numpy()
    #         x_coords = coordinates['x_in_pix'].to_numpy()

    #         for channel in CHANNELS:
    #             image = imread(air_dir/tile/f'{channel}.tif')
    #             coordinates[f'{cycle}_{channel}'] = image[z_coords, y_coords, x_coords]
    #             intensity_read['cyc{}_{}'.format(int(cycle[1:]), channel)] = coordinates[f'{cycle}_{channel}']

    #     intensity_read.to_csv(aif_dir / tile / 'intensity_local.csv')
    

    # stitch the intensity
    meta_df = read_meta(stc_dir)
    pattern = r'\((\d+)\, *(\d+)\)'
    meta_df['match'] = meta_df['position'].apply(lambda x: re.match(pattern, x))
    meta_df['y'] = meta_df['match'].apply(lambda x: int(x.group(2)))
    meta_df['x'] = meta_df['match'].apply(lambda x: int(x.group(1)))
    
    intensity = None
    for tile in tqdm(stack_name.keys(), desc='Stitching'):
        signal_df = pd.read_csv(aif_dir / tile / 'intensity_local.csv')
        if intensity is None: intensity = stitch_3d(signal_df, meta_df, tile)
        else: intensity = pd.concat([intensity, stitch_3d(signal_df, meta_df, tile)])

    intensity.to_csv(air_dir / 'intensity.csv')

# main

In [7]:
# def main():
#     process_2d()
#     process_3d()

# if __name__ == '__main__':
#     main()

# test

In [8]:
import sys
import re
from collections import defaultdict
from zmq import CHANNEL

import tifffile
from lib.stitch import read_meta
from lib.AIRLOCALIZE.airlocalize import airlocalize

extract_points_cycle = ['C001']
def process_3d():
    # cidre_correct(str(src_dir), str(cid_dir))
    stack_name = dict()
    file_groups = defaultdict(list)
    for file_path in glob.glob(str(src_dir / 'cyc_1_tile_stitch_test' / '*.tif')):
        filename = os.path.basename(file_path)
        parts = filename.split('-')
        cycle, tile, channel = parts[0], parts[1], parts[2]
        z_index = int(filename.split('Z')[-1].split('.')[0])
        file_groups[(cycle, tile, channel)].append((z_index, file_path))
        if tile in stack_name: stack_name[tile].add(cycle)
        else: stack_name[tile] = {cycle}

    stack_name = {key: sorted(value, key=lambda x: int(x[1:])) for key, value in stack_name.items()}
    file_groups = {k: sorted(v) for k, v in file_groups.items()}  # Sort by Z index within each group

    # for (cycle, tile, channel), files in tqdm(file_groups.items(), desc='Processing stacks'):
    #     stack = np.array([process_slice(imread(file_path), channel) for _, file_path in files])
    #     os.makedirs(air_dir / tile / cycle, exist_ok=True)
    #     imsave(air_dir / tile / cycle / f"{channel.lower()}.tif", stack, check_contrast=False)


    # extract spot candidates from cyc1-4
    for tile in tqdm(stack_name.keys(), desc='Detecting candidate points'):
        for cycle in stack_name[tile]:
            if cycle in extract_points_cycle: 
                tile_cycle_dir = air_dir  / tile / cycle
                # perform airlocalization
                airlocalize(parameters_filename='./lib/AIRLOCALIZE/parameters.yaml', 
                            default_parameters='./lib/AIRLOCALIZE/parameters_default.yaml', 
                            update={'dataFileName': tile_cycle_dir, 'saveDirName': tile_cycle_dir, 'verbose':True, 'multiChannelCandidates': True})
                
                spots_file = [_ for _ in os.listdir(tile_cycle_dir) if _.endswith('spots.csv')]
                if 'intensity_local.csv' in os.listdir(air_dir/tile):
                    df = pd.read_csv(air_dir / tile / 'intensity_local.csv')
                    df = pd.concat([df] + [pd.read_csv(tile_cycle_dir / file) for file in spots_file], axis=1)
                else: df = pd.concat([pd.read_csv(tile_cycle_dir / file) for file in spots_file], axis=1)
                df.to_csv(air_dir / tile / 'intensity_local.csv', index=False)

process_3d()

Detecting candidate points:   0%|          | 0/4 [00:00<?, ?it/s]

Found 1 files to analyze.
Analyzing file: cy5.tif...
Retrieving image for cy5.tif...
Image scaled from 226.0 to 2288.0
Smoothing cy5.tif, mode: LoG...
Image scaled from -2725.379194488983 to 5014.253662665929
Image scaled from 0.0 to 32767.0
Smoothing cy5.tif done.
Threshold value is 14548.717142145391 in absolute units
Predetected 34996 spots;
Found 34996 spot candidates.
Localizing spots...
Retrieving image for cy5.tif...
Image scaled from 226.0 to 2288.0


Fit predetected spots in cy5.tif: 100%|██████████| 34996/34996 [01:26<00:00, 404.89it/s]
Detecting candidate points:  25%|██▌       | 1/4 [09:51<29:35, 591.73s/it]

Found 1 files to analyze.
Analyzing file: cy5.tif...
Retrieving image for cy5.tif...
Image scaled from 218.0 to 3237.0
Smoothing cy5.tif, mode: LoG...
Image scaled from -2766.4469706295135 to 5072.422666331294
Image scaled from 0.0 to 32767.0
Smoothing cy5.tif done.
Threshold value is 14572.554711848476 in absolute units
Predetected 36500 spots;
Found 36500 spot candidates.
Localizing spots...
Retrieving image for cy5.tif...
Image scaled from 218.0 to 3237.0


Fit predetected spots in cy5.tif: 100%|██████████| 36500/36500 [01:27<00:00, 416.04it/s]
Detecting candidate points:  50%|█████     | 2/4 [19:40<19:40, 590.13s/it]

Found 1 files to analyze.
Analyzing file: cy5.tif...
Retrieving image for cy5.tif...
Image scaled from 177.0 to 1073.0
Smoothing cy5.tif, mode: LoG...
Image scaled from -2996.9531597865084 to 4396.179600600583
Image scaled from 0.0 to 32767.0
Smoothing cy5.tif done.
Threshold value is 16194.135210149994 in absolute units
Predetected 34248 spots;
Found 34248 spot candidates.
Localizing spots...
Retrieving image for cy5.tif...
Image scaled from 177.0 to 1073.0


Fit predetected spots in cy5.tif: 100%|██████████| 34248/34248 [01:27<00:00, 392.63it/s]
Detecting candidate points:  75%|███████▌  | 3/4 [29:21<09:45, 585.67s/it]

Found 1 files to analyze.
Analyzing file: cy5.tif...
Retrieving image for cy5.tif...
Image scaled from 194.0 to 2902.0
Smoothing cy5.tif, mode: LoG...
Image scaled from -2845.862285801606 to 4580.24960915822
Image scaled from 0.0 to 32767.0
Smoothing cy5.tif done.
Threshold value is 15143.907423608096 in absolute units
Predetected 34922 spots;
Found 34922 spot candidates.
Localizing spots...
Retrieving image for cy5.tif...
Image scaled from 194.0 to 2902.0


Fit predetected spots in cy5.tif: 100%|██████████| 34922/34922 [01:20<00:00, 432.52it/s]
Detecting candidate points: 100%|██████████| 4/4 [38:49<00:00, 582.33s/it]
