In [1]:
import cv2 
import tifffile 
import numpy as np
import math
import pandas as pd
import json

from typing import List, Dict

import logging 

import skimage 
import skimage.io
import skimage.measure
import skimage.morphology
from scipy.io import loadmat

import os 


import skimage.io as io
from datetime import datetime
from matplotlib import pyplot as plt
date = datetime.today().strftime('%Y-%m-%d')

from tqdm import tqdm

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')


In [2]:
DFCI_geomx = tifffile.imread('../data/geomx/DFCI_geomx.ome.tiff')
DFCI_geomx_position = pd.read_csv('../data/DFCI_geomx_position.csv')
Rochester_geomx = tifffile.imread('../data/geomx/Rochester_geomx.ome.tiff')
Rochester_geomx_position = pd.read_csv('../data/Rochester_geomx_position.csv')

In [3]:
LOWERBOUND_DF = pd.read_csv('../data/threshold_csv/lowerBound.csv')
UPPERBOUND_DF = pd.read_csv('../data/threshold_csv/upperBound.csv')
SELECTED_CORES = ["Rochester_4", "Rochester_6",
                    "Rochester_7", "Rochester_9", "Rochester_11", "Rochester_12",
                    "Rochester_13", "Rochester_14",
                    "Rochester_15", "Rochester_16", "Rochester_17", "Rochester_18",
                    "Rochester_19", "Rochester_21", "Rochester_23",
                    "Rochester_25", "DFCI_2.2", "DFCI_3.2",
                    "DFCI_4.1", "DFCI_7.1", "DFCI_8.1",
                    "DFCI_12.1", "DFCI_13.2", "DFCI_14.1", "DFCI_15.2", "DFCI_17.1",
                    "DFCI_18.2", "DFCI_19.2", "DFCI_22.2", "DFCI_23.2"]
MARKERS_TO_EXTRACT = ['Syto13', 'Tox', 'PD-L1']
CHANNELS = ['Tox', 'PD-L1']



In [4]:
DFCI_dict = {}

Rochester_dict = {}

GEOMX_CHANNEL = ['Syto13', 'Tox', 'PD-L1', 'Myc']

for i in DFCI_geomx_position['Core']:
    if f'DFCI_{i}' in SELECTED_CORES:
        DFCI_dict[i] = {}
        x1 = DFCI_geomx_position[DFCI_geomx_position['Core'] == i]['x1'].values[0]
        x2 = DFCI_geomx_position[DFCI_geomx_position['Core'] == i]['x2'].values[0]
        y1 = DFCI_geomx_position[DFCI_geomx_position['Core'] == i]['y1'].values[0]
        y2 = DFCI_geomx_position[DFCI_geomx_position['Core'] == i]['y2'].values[0]
        core_img = DFCI_geomx[:, y1:y2, x1:x2]
        print(f"Processing core {i}")
        for j in range(len(GEOMX_CHANNEL)):
            DFCI_dict[i][GEOMX_CHANNEL[j]] = core_img[j]
    else:
        continue


for i in Rochester_geomx_position['Core']:
    if f'Rochester_{i}' in SELECTED_CORES:
        Rochester_dict[i] = {}
        x1 = Rochester_geomx_position[Rochester_geomx_position['Core'] == i]['x1'].values[0]
        x2 = Rochester_geomx_position[Rochester_geomx_position['Core'] == i]['x2'].values[0]
        y1 = Rochester_geomx_position[Rochester_geomx_position['Core'] == i]['y1'].values[0]
        y2 = Rochester_geomx_position[Rochester_geomx_position['Core'] == i]['y2'].values[0]
        core_img = Rochester_geomx[:, y1:y2, x1:x2]
        print(f"Processing core {i}")
        for j in range(len(GEOMX_CHANNEL)):
            Rochester_dict[i][GEOMX_CHANNEL[j]] = core_img[j]
    else:
        continue
            

    

Processing core 2.2
Processing core 3.2
Processing core 4.1
Processing core 7.1
Processing core 8.1
Processing core 12.1
Processing core 13.2
Processing core 14.1
Processing core 15.2
Processing core 17.1
Processing core 18.2
Processing core 19.2
Processing core 22.2
Processing core 23.2
Processing core 4
Processing core 6
Processing core 7
Processing core 9
Processing core 11
Processing core 12
Processing core 13
Processing core 14
Processing core 15
Processing core 16
Processing core 17
Processing core 18
Processing core 19
Processing core 21
Processing core 23
Processing core 25


In [5]:
from typing import Dict, List
def gate_markers(core_img: Dict[str, np.ndarray], coreName, channels, lowerBound_df, upperBound_df):

    for marker in channels:
        channel_img = core_img[marker]
        lowerBound = lowerBound_df[lowerBound_df['Core'] == coreName][marker].values.item()
        upperBound = upperBound_df[upperBound_df['Core'] == coreName][marker].values.item()
        if not upperBound == 0:
            img_filtered = np.copy(channel_img)
            img_filtered[img_filtered <= lowerBound] = 0
            img_filtered[img_filtered >= upperBound] = 0
        else:
            img_filtered = np.copy(channel_img)
            img_filtered[img_filtered <= lowerBound] = 0
        core_img[marker] = img_filtered
    return core_img


def extract_single_cell_info(core_img: Dict[str, np.ndarray], segmentation_mask: np.ndarray, 
                             interested_markers: List[str], output_path: str):
    """Extract single cell information from a core."""
    array_list = [core_img[channel] for channel in interested_markers]
    counts_no_noise = np.stack(array_list, axis=2)

    stats = skimage.measure.regionprops(segmentation_mask)
    label_num = len(stats)

    channel_num = len(array_list)
    data = np.zeros((label_num, channel_num))
    data_scale_size = np.zeros((label_num, channel_num))
    cell_sizes = np.zeros((label_num, 1))
    cell_props = np.zeros((label_num, 3))

    for i, region in enumerate(stats):
        cell_label = region.label
        label_counts = [counts_no_noise[coord[0], coord[1], :] for coord in region.coords]
        data[i] = np.sum(label_counts, axis=0)
        data_scale_size[i] = data[i] / region.area
        cell_sizes[i] = region.area
        cell_props[i] = [cell_label, region.centroid[0], region.centroid[1]]

    col_names = [marker for marker in interested_markers if marker != 'Empty']

    data_df = pd.DataFrame(data, columns=col_names)
    data_full = pd.concat([
        pd.DataFrame(cell_props, columns=["cellLabel", "Y_cent", "X_cent"]),
        pd.DataFrame(cell_sizes, columns=["cellSize"]),
        data_df
    ], axis=1)

    data_scale_size_df = pd.DataFrame(data_scale_size, columns=col_names)
    data_scale_size_full = pd.concat([
        pd.DataFrame(cell_props, columns=["cellLabel", "Y_cent", "X_cent"]),
        pd.DataFrame(cell_sizes, columns=["cellSize"]),
        data_scale_size_df
    ], axis=1)

    os.makedirs(output_path, exist_ok=True)
    data_full.to_csv(os.path.join(output_path, "data.csv"), index=False)
    data_scale_size_full.to_csv(os.path.join(output_path, "dataScaleSize.csv"), index=False)






In [6]:
for core_num, core_img in tqdm(DFCI_dict.items(), desc="Processing cores"):
    # Gate markers
    core_img_filtered = gate_markers(core_img, f'DFCI_{core_num}', CHANNELS, LOWERBOUND_DF, UPPERBOUND_DF)

    # Load segmentation mask
    seg_mask_path = f'../output/seg_results/DFCI_geomx/core_{core_num}/MESMER_mask.tiff'
    try:
        segmentation_mask = tifffile.imread(seg_mask_path)
    except FileNotFoundError:
        #logging.warning(f"Segmentation mask not found for core {core_num}. Skipping.")
        continue

    # Extract single cell information
    info_output_path = f'../output/extracted_info_geomx_gated/DFCI/{core_num}'
    extract_single_cell_info(core_img_filtered, segmentation_mask, MARKERS_TO_EXTRACT, info_output_path)
    #logging.info(f'Single cell information extracted for core {core_num}')

Processing cores: 100%|██████████| 14/14 [02:30<00:00, 10.78s/it]


In [7]:
for core_num, core_img in tqdm(Rochester_dict.items(), desc="Processing cores"):
    # Gate markers
    core_img_filtered = gate_markers(core_img, f'Rochester_{core_num}', CHANNELS, LOWERBOUND_DF, UPPERBOUND_DF)

    # Load segmentation mask
    seg_mask_path = f'../output/img_registration/fusion_to_geomx/Rochester/Rochester_{core_num}/Rochester_aligned_MESMER_mask.tiff'
    try:
        segmentation_mask = tifffile.imread(seg_mask_path).astype('uint32')
    except FileNotFoundError:
        #logging.warning(f"Segmentation mask not found for core {core_num}. Skipping.")
        continue

    # Extract single cell information
    info_output_path = f'../output/extracted_info_geomx_gated/Rochester/{core_num}'
    extract_single_cell_info(core_img_filtered, segmentation_mask, MARKERS_TO_EXTRACT, info_output_path)

Processing cores: 100%|██████████| 16/16 [05:53<00:00, 22.12s/it]


In [None]:
tifffile.imshow(core_img['Syto13'])

In [None]:
tifffile.imshow(Rochester_dict['25']['Tox'])

In [None]:
tifffile.imshow(core_img_filtered['Tox'])

In [None]:
tifffile.imshow((Rochester_dict['25']['Tox'] - core_img_filtered['Tox']), cmap = 'gray', vmax = 1)

In [None]:
(core_img['PD-L1'] - core_img_filtered['PD-L1'])

In [None]:
LOWERBOUND_DF

In [None]:
for marker in CHANNELS:
    print(marker)
    channel_img = Rochester_dict['4'][marker]
    lowerBound = LOWERBOUND_DF[LOWERBOUND_DF['Core'] == 'Rochester_4'][marker].values.item()
    upperBound = UPPERBOUND_DF[LOWERBOUND_DF['Core'] == 'Rochester_4'][marker].values.item()
    print(f'{lowerBound}, {upperBound}')
    print([i for i in np.unique(channel_img) if i <= lowerBound])
    if not upperBound == 0:
        img_filtered = np.copy(channel_img)
        img_filtered[img_filtered <= lowerBound] = 0
        img_filtered[img_filtered >= upperBound] = 0
    else:
        img_filtered = np.copy(channel_img)
        img_filtered[img_filtered <= lowerBound] = 0
    print([i for i in np.unique(img_filtered) if i <= lowerBound])
    tifffile.imshow(channel_img[2500:3000, 2500:3000], vmax = 500, cmap = 'gray')
    tifffile.imshow(img_filtered[2500:3000, 2500:3000], vmax = 500, cmap = 'gray')