In [1]:
import os
from pathlib import Path
import pandas as pd
import numpy as np

from tifffile import TiffFile, imwrite
from scipy.spatial import ConvexHull
from numba import njit
from concurrent.futures import ThreadPoolExecutor

c:\Users\Mingchuan\anaconda3\envs\cell-typing\lib\site-packages\numpy\.libs\libopenblas.EL2C6PLE4ZYW3ECEVIV3OXXGRN2NRFM2.gfortran-win_amd64.dll
c:\Users\Mingchuan\anaconda3\envs\cell-typing\lib\site-packages\numpy\.libs\libopenblas64__v0.3.21-gcc_10_3_0.dll


In [None]:
CHANNELS = ['cy5', 'TxRed', 'cy3', 'FAM']
BASE_DIR = Path('E:/TMC/PRISM_pipeline/dataset/processed')
RUN_ID = '20230227_test'
src_dir = BASE_DIR / f'{RUN_ID}_processed'
stc_dir = src_dir / 'stitched'
read_dir = src_dir / 'readout'
seg_dir = src_dir / 'segmented'
cel_typ_dir = src_dir / 'analysis_cell_typing'
os.makedirs(seg_dir, exist_ok=True)

# read rna info and dapi to get proper shape

In [2]:
def extract_points(rna_info):
    cell_points = {}

    # Iterate through the DataFrame and store the cell index at the corresponding x, y, z position in the output array
    cell_index_set = sorted([int(_) for _ in rna_info["Cell Index"].unique()])
    for index in cell_index_set:
        cell_points[index] = []

    for index, row in rna_info.iterrows():
        z, x, y = int(row["z_in_pix"]), int(row["x_in_pix"]), int(row["y_in_pix"])
        cell_index = row["Cell Index"]
        cell_points[cell_index].append([z, x, y])
    return cell_points

In [3]:
# get the shape of output image
with TiffFile(seg_dir/'dapi_predict.tif') as tif:
    image = tif.asarray()
    shape = image.shape

# read rna df and process of points
rna_info = pd.read_csv(read_dir/'mapped_genes_preprocessed.csv')
zscale, xscale, yscale = (1/3.36, 1, 1)
rna_info["z_in_pix"] *= zscale
rna_info["x_in_pix"] *= xscale
rna_info["y_in_pix"] *= yscale
rna_info["z_in_pix"] = [int(_) for _ in rna_info["z_in_pix"]]
min_shape = (rna_info["z_in_pix"].max(), rna_info["x_in_pix"].max(), rna_info["y_in_pix"].max())

input_cells = extract_points(rna_info=rna_info)

print(f"shape={shape}; min_shape={min_shape}; cell_num={len(input_cells.keys())}")

shape=(121, 1929, 6415); min_shape=(116, 1926, 6410); cell_num=8420


# Fill the convex

In [4]:
@njit
def get_filled_hull_numba(mask, equations):
    ran = mask.shape
    for y in range(ran[2]):
        for x in range(ran[1]):
            for z in range(ran[0]):
                inside_hull = True
                for i in range(equations.shape[0]):
                    plane = equations[i]
                    if plane[0] * z + plane[1] * x + plane[2] * y + plane[3] > 0:
                        inside_hull = False
                        break
                if inside_hull:
                    mask[z, x, y] = True
    return mask


def get_filled_hull(hull, shape):
    mask = np.zeros(shape, dtype=bool)
    mask = get_filled_hull_numba(mask, hull.equations)

    return mask

In [19]:
# set the rule for gray scale
gray_scale = dict()

cell_info = pd.read_csv(read_dir/'cell_info.csv', index_col=0)
# tmp_category = [line.strip() for line in open(cel_typ_dir/'annotated_subtype.txt', 'r')]
# for _, subtype in enumerate(tmp_category):
#     for cell_index in cell_info[cell_info.subtype==subtype].index:
#         gray_scale[cell_index] = _ + 1

for cell_index in cell_info[cell_info.subtype!='other'].index:
    gray_scale[cell_index] = cell_index + 1

In [17]:
# import scanpy as sc

# adata = sc.read_h5ad(os.path.join(workdir, 'cell_typing', 'direct', 'adata.h5ad'))
# adata.obs.index = [int(_) for _ in adata.obs.index]
# cell_info = adata.obs.copy()
# tmp_category = [line.strip() for line in open(os.path.join(workdir, 'cell_typing', 'direct', 'annotated_subtype.txt'), 'r')]

In [18]:
# gray_scale = dict()
# for _, subtype in enumerate(tmp_category):
#     for cell_index in cell_info[cell_info.subtype==subtype].index:
#         gray_scale[cell_index] = _ + 1

In [21]:
labels = list(cell_info.index)
filled_hull = np.zeros(shape, dtype=np.uint16)
def process_label(label):
    points = np.array(input_cells[label])
    scan_range = np.array([np.min(points, axis=0), np.max(points, axis=0) + 1])
    hull = ConvexHull(points - np.min(points, axis=0))

    mask = get_filled_hull(hull, tuple(scan_range[1] - scan_range[0]))
    filled_hull[scan_range[0][0] : scan_range[1][0], 
                scan_range[0][1] : scan_range[1][1], 
                scan_range[0][2] : scan_range[1][2]][mask] = gray_scale[label]

with ThreadPoolExecutor(max_workers=16) as executor:
    executor.map(process_label, labels)

imwrite(seg_dir/'cell_convex.tif', filled_hull)

## Single cell projection 

In [21]:
single_cell_dir = src_dir/'analysis_subcellular'

In [22]:
cell_info = pd.read_csv(cel_typ_dir/'cell_Info.csv', index_col=0)
rna_df = pd.read_csv(read_dir/'mapped_genes_processed.csv', index_col=0)

  rna_df = pd.read_csv(os.path.join(workdir, 'mapped_genes_processed.csv'), index_col=0)


In [23]:
import tifffile

with tifffile.TiffFile(seg_dir/'dapi_predict.tif') as tif:
    dapi_predict = np.sign(tif.asarray()).astype(np.uint8)
with tifffile.TiffFile(seg_dir/'cell_convex.tif') as tif:
    cell_convex = np.sign(tif.asarray()).astype(np.uint8)

In [24]:
type_of_interest = [line.strip() for line in open(os.path.join(workdir, 'cell_typing', 'annotated_subtype.txt'), 'r')]
cell_info['subtype'] = pd.Categorical(cell_info['subtype'], categories=type_of_interest, ordered=True)

In [28]:
from tqdm import tqdm
gene_order_list = ['Gapdh','Slc1a3', 'Slc17a7', 'Snap25',
             'Rasgrf2','Rgs4', 'Prox1', 'Plcxd2', 'Vxn', 'Pcp4', 'Nr4a2', 'Ctgf',
             'Gad1', 'Gad2', 'Pvalb', 'Sst', 'Vip', 'Lamp5',
             'Aqp4', 'Apod', 'Plp1', 'Cx3cr1', 'Pmch', 'Gfap',
             'Cck', 'Mbp', 'Rprm', 'Enpp2', 'Nov', 'Rorb', 
             ]

In [29]:
from scipy.ndimage import zoom
import warnings
warnings.filterwarnings("ignore", category=UserWarning, module="tifffile")


# valid_cells = cell_info.loc[valid_index]
valid_cells = cell_info.copy()
for _, tmp_cell_type in enumerate(type_of_interest):
    out_path = os.path.join(workdir, 'subcellular_analysis', 'single_cell_projection_in_out', f'{_+1}_{tmp_cell_type}')
    os.makedirs(out_path, exist_ok=True)
    for index in tqdm(valid_cells[valid_cells['subtype']==tmp_cell_type].index, desc=tmp_cell_type):
        # if len(rna_df[rna_df['Cell Index']==index])<10:
        #     continue
        centroid = valid_cells[['ce_z_in_pix','ce_x_in_pix','ce_y_in_pix']].loc[index].values
        cell_rnas = rna_df[rna_df['Cell Index']==index]
        min_pos = np.min(cell_rnas[['z_in_pix','x_in_pix','y_in_pix']].values, axis=0)
        max_pos = np.max(cell_rnas[['z_in_pix','x_in_pix','y_in_pix']].values, axis=0)

        shape = max_pos - min_pos + 40
        shape=np.uint16(shape)
        cell_array = np.zeros(shape=shape, dtype=np.uint8)


        nucleus_array = dapi_predict[
            int((min_pos[0]-20)//3.36): int((max_pos[0]+20)//3.36), 
            int(min_pos[1] - 20): int(max_pos[1] + 20), 
            int(min_pos[2] - 20): int(max_pos[2] + 20)
            ].astype(np.uint8)
        
        convex_array = cell_convex[
            int((min_pos[0]-20)//3.36): int((max_pos[0]+20)//3.36), 
            int(min_pos[1] - 20): int(max_pos[1] + 20), 
            int(min_pos[2] - 20): int(max_pos[2] + 20)
            ].astype(np.uint8)

        zoom_factor = (shape[0] / (int((max_pos[0]+20)//3.36) - int((min_pos[0]-20)//3.36)), 1, 1)
        nucleus_array = zoom(nucleus_array, zoom_factor, order=3)
        convex_array = zoom(convex_array, zoom_factor, order=3)
        
        centroid = centroid - min_pos + 20
        centroid[0] = int(centroid[0])
        centroid = np.uint16(centroid)
        
        try:
            cell_array[centroid[0], centroid[1], centroid[2]] = 50
        except:
            print(f'cell_{index} centroid out of bound.')
            # continue

        for _, gene in enumerate(gene_order_list):
            gene_rnas = cell_rnas[cell_rnas['Gene'] == gene][['z_in_pix','x_in_pix','y_in_pix']].values
            in_nu = cell_rnas[cell_rnas['Gene'] == gene][['in_nu']].values
            for pos, in_nu_tmp in zip(gene_rnas, in_nu):
                pos = np.uint16(pos-min_pos+20)
                if in_nu_tmp[0]:
                    cell_array[pos[0], pos[1], pos[2]] = _ + 1
                else:
                    cell_array[pos[0], pos[1], pos[2]] = _ + 51
        
        tifffile.imwrite(os.path.join(out_path, f'{index},{len(cell_rnas)},1.tif'), cell_array)
        tifffile.imwrite(os.path.join(out_path, f'{index},{len(cell_rnas)},2.tif'), nucleus_array)
        tifffile.imwrite(os.path.join(out_path, f'{index},{len(cell_rnas)},3.tif'), convex_array)

Ex-thalamus:   0%|          | 0/2429 [00:00<?, ?it/s]

Ex-thalamus:   2%|▏         | 59/2429 [00:37<24:01,  1.64it/s]

cell_105 centroid out of bound.


Ex-thalamus:  37%|███▋      | 910/2429 [08:04<09:40,  2.62it/s]

cell_2462 centroid out of bound.


Ex-thalamus:  43%|████▎     | 1050/2429 [09:18<10:06,  2.27it/s]

cell_2705 centroid out of bound.


Ex-thalamus:  65%|██████▍   | 1571/2429 [13:47<04:53,  2.93it/s]

cell_5161 centroid out of bound.


Ex-thalamus:  66%|██████▌   | 1599/2429 [13:56<03:17,  4.21it/s]

cell_5605 centroid out of bound.


Ex-thalamus:  70%|███████   | 1702/2429 [14:49<06:13,  1.95it/s]

cell_5822 centroid out of bound.


Ex-thalamus:  79%|███████▉  | 1923/2429 [16:44<02:39,  3.18it/s]

cell_6609 centroid out of bound.


Ex-thalamus:  94%|█████████▍| 2290/2429 [19:59<00:57,  2.40it/s]

cell_7758 centroid out of bound.


Ex-thalamus:  99%|█████████▊| 2395/2429 [20:49<00:07,  4.79it/s]

cell_8297 centroid out of bound.


Ex-thalamus: 100%|██████████| 2429/2429 [21:02<00:00,  1.92it/s]
In-Pvalb: 100%|██████████| 707/707 [06:25<00:00,  1.84it/s]
In-Sst: 100%|██████████| 433/433 [03:34<00:00,  2.02it/s]
In-Vip:  97%|█████████▋| 339/351 [02:59<00:05,  2.09it/s]

cell_8263 centroid out of bound.


In-Vip: 100%|██████████| 351/351 [03:04<00:00,  1.90it/s]
Glial-Astrocyte:  38%|███▊      | 252/657 [01:41<02:02,  3.31it/s]

cell_4354 centroid out of bound.


Glial-Astrocyte:  55%|█████▌    | 363/657 [02:21<01:18,  3.73it/s]

cell_5340 centroid out of bound.


Glial-Astrocyte:  73%|███████▎  | 480/657 [03:07<00:42,  4.15it/s]

cell_6937 centroid out of bound.


Glial-Astrocyte: 100%|██████████| 657/657 [04:08<00:00,  2.65it/s]
Glial-Microglia: 100%|██████████| 205/205 [01:33<00:00,  2.19it/s]
Glial-Oligodendrocyte:  31%|███▏      | 185/588 [01:17<02:06,  3.19it/s]

cell_3174 centroid out of bound.


Glial-Oligodendrocyte:  38%|███▊      | 221/588 [01:33<02:33,  2.40it/s]

cell_3648 centroid out of bound.


Glial-Oligodendrocyte:  58%|█████▊    | 340/588 [02:16<01:28,  2.82it/s]

cell_4890 centroid out of bound.


Glial-Oligodendrocyte: 100%|██████████| 588/588 [03:46<00:00,  2.60it/s]
