In [1]:
import os, sys
sys.path.append('..')
from os.path import abspath, dirname
import zarr
import z5py
import numpy as np
import pandas as pd
from glob import glob 
from skimage.measure import regionprops
from skimage.io import imread, imsave
from scipy import stats
from scipy.stats import skewnorm, lognorm
from scipy.optimize import minimize
import itertools

from easi_fish import n5_metadata_utils as n5mu
from easi_fish import roi_prop, spot, intensity
import warnings
warnings.filterwarnings('ignore')

import importlib
importlib.reload(spot)
importlib.reload(roi_prop)
importlib.reload(intensity)

"""
Spot counts for cells with highly expressed genes (dense spots)
1. Measure total intensity of every ROI after bleed-through correction and background subtraction.
2. Calculate the number of spot from total intensity based on unit-spot intensity
3. Correlate the number of spots (from air-localize) with the total fluorescence intensity/voxel in each ROI and determine a 'cutoff'. 
   Spot count > cutoff: use spot count converted based on total fluorescence intensity; 
   Spot count < cutoff: use spot count from Airlocalize
"""

"\nSpot counts for cells with highly expressed genes (dense spots)\n1. Measure total intensity of every ROI after bleed-through correction and background subtraction.\n2. Calculate the number of spot from total intensity based on unit-spot intensity\n3. Correlate the number of spots (from air-localize) with the total fluorescence intensity/voxel in each ROI and determine a 'cutoff'. \n   Spot count > cutoff: use spot count converted based on total fluorescence intensity; \n   Spot count < cutoff: use spot count from Airlocalize\n"

In [2]:
def get_unit_spot_intn(path_spot):
    """estimate unit spot intensity
    """
    spot = np.loadtxt(path_spot, delimiter=',')
    # (x, y, z, I)
    vox=[0.92,0.92,0.84]
    spot[:,:3]=spot[:,:3]/vox  # convert from physical unit to pixel unit
    ##remove spots on edges (eliminate false detection)
    spot = spot[np.logical_and(spot[:,0]<=1500, spot[:,0]>250)]
    spot = spot[np.logical_and(spot[:,1]<=1500, spot[:,1]>250)]
    spot = spot[np.logical_and(spot[:,2]<=650,  spot[:,2]>150)]   
    
    ## assign the most frequent intensity as the single-spot-intensity
    spot_int = spot[:,3]
    spot_int = spot_int[spot_int!=-8.0] # ???
    n,b=np.histogram(spot_int, bins=5000)
    unit_intn = b[np.argmax(n)]
    
    return unit_intn

def get_spot_counts_from_intn(path_intn, path_spot, roi_meta):
    """estimate spot_counts from cell_intensities; estimate unit-spot intensity first
    """
    unit_intn = get_unit_spot_intn(path_spot) # get unit intn
    
    cell_int = pd.read_csv(path_intn, sep=',', index_col=0)
    cell_int = cell_int.reindex(roi_meta.index) ## only include intact ROIs###

    vec_mean = cell_int['mean_intensity'].values
    vec_area = roi_meta['area'].values

    # background
    n,b = np.histogram(vec_mean, bins=1000)
    bg = b[np.argmax(n)]    
    
    # count
    vec_count = np.clip(vec_mean - bg, 0, None)*vec_area/unit_intn
    return vec_count

In [3]:
## input
input_dir = "D:\\SWAP\\Vincent\\lt171_FlpO\\gene_new_4tile\\outputs"
bleed_thru_epsilon = 1
output_dir = os.path.join(input_dir, f"testfull-aug25_epsilon{bleed_thru_epsilon}")  

# fixed image (directory - n5 format)
fix_dir = os.path.join(input_dir, "r2\\export_sigma3.n5")
# get appropriate image data within fix_dir
subpath='\\c3\\s2'

# registered image (directory - n5 format)
reg_dir = os.path.join(input_dir, "r1\\registration\\lt171_gene_4tile_r1-to-lt171_gene_4tile_r2\\warped")  

# segmentation mask (tif format accepted here)
lb_dir  = os.path.join(input_dir, "r2\\segmentation\\lt171_gene_4tile_r2-c3.tif" ) 

# spot dir 
spot_dir = os.path.join(input_dir, "spots_pooled") # pool spots together; warpped and fixed
intn_dir = os.path.join(input_dir, "intensities_pooled") # pool spots together; warpped and fixed
# for every gene
rounds = ['r1', 'r2']
channels = ['c0', 'c1', 'c2', 'c4']
# r1 should be the wrappped one
fx_spots = [os.path.join(spot_dir, f'spots_{r}_{c}.txt') 
                 for r, c in itertools.product(rounds, channels)]
fx_intns = [os.path.join(intn_dir, f'{r}_{c}_intensity.csv') 
                 for r, c in itertools.product(rounds, channels)]

for f in fx_spots:
    assert os.path.isfile(f)
for f in fx_intns:
    assert os.path.isfile(f)

## output
# out_mask = os.path.join(output_dir, 'mask.tif')
out_badroi = os.path.join(output_dir, 'bad_roi_list.npy')
out_allroi = os.path.join(output_dir, "roi_all.csv") 
out_roi = os.path.join(output_dir, "roi.csv") 
out_spots = os.path.join(output_dir, "spotcount.csv")
out_spots_intn = os.path.join(output_dir, "spotcount_intn.csv")
# out_intensity = os.path.join(output_dir, "intensity_c0_r2.csv")
out_spots_merged = os.path.join(output_dir, 'spotcount_merged.csv')

In [4]:
%%time
#voxel size in µm (x, y, z) (post-expansion)
vox= n5mu.read_voxel_spacing(fix_dir, subpath)
#image size in pixel (x, y, z)
grid=n5mu.read_voxel_grid(fix_dir, subpath)
#image size in physical space (x, y, z) (post-expansion)
size=grid*vox
print('voxel size is:',vox)
print('image size in pixel unit is:',grid)
print('image size in um unit is:',size)

# get appropriate image data
print("loading images...")
# fix = zarr.open(store=zarr.N5Store(fix_dir), mode='r')     
# img1 = fix[subpath][:, :, :]

reg = zarr.open(store=zarr.N5Store(reg_dir), mode='r')     
img2 = reg[subpath][:, :, :]
print("all images loaded")

if not os.path.isdir(output_dir):
    print(output_dir)
    os.mkdir(output_dir)

mask=np.full((grid[2], grid[1], grid[0]),1)
mask[img2==0]=0
# imsave(out_mask, mask)
print("mask generated")
print("mask dimension is:", mask.shape)

lb=imread(lb_dir)
print(lb.shape)
roi = np.max(lb)
print(roi)

# # Get list of ROIs that are fully or partially outside the mask 
### Make sure to only include ROIs that are intact and in the overlapping regions across all rounds of FISH
bad_roi=np.unique(lb[mask==0])
if bad_roi[0] == 0:
    bad_roi = bad_roi[1:]
np.save(out_badroi, bad_roi)
print("# of ROIs rejected:", len(bad_roi))

voxel size is: [0.92 0.92 0.84]
image size in pixel unit is: [ 916  915 1019]
image size in um unit is: [842.72003 841.8     855.95996]
loading images...
all images loaded
D:\SWAP\Vincent\lt171_FlpO\gene_new_4tile\outputs\testfull-aug25_epsilon1
mask generated
mask dimension is: (1019, 915, 916)
(1019, 915, 916)
13758
# of ROIs rejected: 891
CPU times: total: 29 s
Wall time: 1min 41s


In [5]:
%%time
roi_meta_all = roi_prop.roi_prop_v2(lb)
roi_meta_all.to_csv(out_allroi)

roi_meta = roi_meta_all.set_index('roi').copy()
roi_meta = roi_meta.loc[roi_meta.index.difference(bad_roi)]
roi_meta.to_csv(out_roi)

CPU times: total: 18.4 s
Wall time: 18.4 s


In [6]:
# remove bleed through!
spots_bld_thru_removed = {}
c_qry = 'c0'
c_ref = 'c4'
for r in rounds:
    f_ref = os.path.join(spot_dir, f'spots_{r}_{c_ref}.txt')
    f_qry = os.path.join(spot_dir, f'spots_{r}_{c_qry}.txt')
    f_qry_removed = os.path.join(spot_dir, f'removed_spots_{r}_{c_qry}.txt')
    
    ref_dots = np.loadtxt(f_ref, delimiter=',')
    qry_dots = np.loadtxt(f_qry, delimiter=',')
    qry_kept, qry_removed = spot.remove_bleed_thru_spots(ref_dots, qry_dots, epsilon=bleed_thru_epsilon)
    
    # keep kept 
    spots_bld_thru_removed[f"{r}_{c_qry}"] = qry_kept
    # save removed
    np.savetxt(f_qry_removed, qry_removed, delimiter=",")

7035/93393 = 7.5% removed
7198/93393 = 7.7% removed


In [7]:
%%time
# count spots
lb_id = np.unique(lb[lb!=0]) # exclude 0
lb_id = np.hstack([[0], lb_id]) # include 0
spotcount = pd.DataFrame(index=lb_id)
for i, (r, c) in enumerate(itertools.product(rounds, channels)):
    if f"{r}_{c}" in spots_bld_thru_removed.keys():
        print(f"{r}_{c}: load from bleed_thru_corrected spots")
        spots_rc = spots_bld_thru_removed[f"{r}_{c}"]
    else:
        f_spots = fx_spots[i]
        print(f"{r}_{c}: load from {f_spots}")
        spots_rc = np.loadtxt(f_spots, delimiter=',')
        
    res = spot.spot_counts_worker(lb, spots_rc, lb_id=lb_id, 
                             remove_emptymask=True, 
                             verbose=True,
                             )
    spotcount[f"{r}_{c}"] = res 
spotcount = spotcount.iloc[1:] # remove 0
spotcount.to_csv(out_spots)
spotcount

r1_c0: load from bleed_thru_corrected spots
removed 1 due to nan
84,358/86,358 spots in range (916, 915, 1019)
r1_c1: load from D:\SWAP\Vincent\lt171_FlpO\gene_new_4tile\outputs\spots_pooled\spots_r1_c1.txt
removed 7 due to nan
315,425/327,944 spots in range (916, 915, 1019)
r1_c2: load from D:\SWAP\Vincent\lt171_FlpO\gene_new_4tile\outputs\spots_pooled\spots_r1_c2.txt
removed 0 due to nan
36,062/36,786 spots in range (916, 915, 1019)
r1_c4: load from D:\SWAP\Vincent\lt171_FlpO\gene_new_4tile\outputs\spots_pooled\spots_r1_c4.txt
removed 1 due to nan
34,949/36,009 spots in range (916, 915, 1019)
r2_c0: load from bleed_thru_corrected spots
removed 0 due to nan
86,180/86,195 spots in range (916, 915, 1019)
r2_c1: load from D:\SWAP\Vincent\lt171_FlpO\gene_new_4tile\outputs\spots_pooled\spots_r2_c1.txt
removed 0 due to nan
327,929/327,944 spots in range (916, 915, 1019)
r2_c2: load from D:\SWAP\Vincent\lt171_FlpO\gene_new_4tile\outputs\spots_pooled\spots_r2_c2.txt
removed 0 due to nan
36,77

Unnamed: 0,r1_c0,r1_c1,r1_c2,r1_c4,r2_c0,r2_c1,r2_c2,r2_c4
1,15.0,46.0,37.0,25.0,0.0,1.0,0.0,0.0
2,4.0,13.0,0.0,0.0,0.0,2.0,2.0,1.0
3,5.0,20.0,0.0,1.0,2.0,5.0,0.0,0.0
4,2.0,14.0,0.0,2.0,0.0,4.0,0.0,1.0
5,4.0,23.0,0.0,4.0,14.0,12.0,0.0,2.0
...,...,...,...,...,...,...,...,...
13754,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
13755,0.0,2.0,0.0,1.0,0.0,2.0,0.0,0.0
13756,0.0,0.0,0.0,1.0,3.0,4.0,0.0,0.0
13757,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0


In [8]:
# spot count calculated from total fluorescence intensity
spotcount_intn = pd.DataFrame(index=roi_meta.index, dtype=float)
for i, (r, c) in enumerate(itertools.product(rounds, channels)):
    f_spots = fx_spots[i]
    f_intns = fx_intns[i]
    print(r, c, f_spots, f_intns)
    
    vec_count = get_spot_counts_from_intn(f_intns, f_spots, roi_meta)
    spotcount_intn[f'{r}_{c}'] = vec_count
spotcount_intn.to_csv(out_spots_intn)
spotcount_intn

r1 c0 D:\SWAP\Vincent\lt171_FlpO\gene_new_4tile\outputs\spots_pooled\spots_r1_c0.txt D:\SWAP\Vincent\lt171_FlpO\gene_new_4tile\outputs\intensities_pooled\r1_c0_intensity.csv
r1 c1 D:\SWAP\Vincent\lt171_FlpO\gene_new_4tile\outputs\spots_pooled\spots_r1_c1.txt D:\SWAP\Vincent\lt171_FlpO\gene_new_4tile\outputs\intensities_pooled\r1_c1_intensity.csv
r1 c2 D:\SWAP\Vincent\lt171_FlpO\gene_new_4tile\outputs\spots_pooled\spots_r1_c2.txt D:\SWAP\Vincent\lt171_FlpO\gene_new_4tile\outputs\intensities_pooled\r1_c2_intensity.csv
r1 c4 D:\SWAP\Vincent\lt171_FlpO\gene_new_4tile\outputs\spots_pooled\spots_r1_c4.txt D:\SWAP\Vincent\lt171_FlpO\gene_new_4tile\outputs\intensities_pooled\r1_c4_intensity.csv
r2 c0 D:\SWAP\Vincent\lt171_FlpO\gene_new_4tile\outputs\spots_pooled\spots_r2_c0.txt D:\SWAP\Vincent\lt171_FlpO\gene_new_4tile\outputs\intensities_pooled\r2_c0_intensity.csv
r2 c1 D:\SWAP\Vincent\lt171_FlpO\gene_new_4tile\outputs\spots_pooled\spots_r2_c1.txt D:\SWAP\Vincent\lt171_FlpO\gene_new_4tile\out

Unnamed: 0_level_0,r1_c0,r1_c1,r1_c2,r1_c4,r2_c0,r2_c1,r2_c2,r2_c4
roi,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1,159.088134,1.696923,128.996454,28.162674,110.971892,26.697905,131.621644,115.916887
2,0.633554,0.000000,0.000000,0.000000,1.903046,17.455683,3.499464,0.964036
3,2.779255,6.333779,1.746320,1.945363,6.107428,17.821450,2.853233,2.977236
4,1.108907,0.000000,0.000000,0.000000,0.175173,18.729604,2.699448,0.000000
5,1.623296,4.023971,1.140955,2.262592,7.055661,20.491200,6.391334,3.859741
...,...,...,...,...,...,...,...,...
13754,0.000000,0.000000,0.000000,0.000000,0.000000,1.898004,0.072911,0.000000
13755,0.000000,6.526348,0.000000,0.000000,0.000000,6.352342,0.000000,0.000000
13756,0.000000,0.000000,0.000000,0.000000,0.000000,2.306024,0.225444,0.000000
13757,0.220968,0.000000,0.116865,0.291333,0.101884,1.581346,0.288074,0.342997


In [9]:
# update spotcount using df_count
spotcount_sub = spotcount.reindex(roi_meta.index)
volumes = (roi_meta['area']*2*2*2/(0.92*0.92*0.84)) # convert um^3 to voxel values
density = spotcount_sub.divide(volumes, axis=0)
cond = density <= 0.01  ##this threshold corresponds to spot-spot distance ~1.3 um apart
print((~cond).sum())
spotcount_merged = spotcount_sub.where(cond, spotcount_intn)  
spotcount_merged.to_csv(out_spots_merged)
spotcount_merged

r1_c0     5
r1_c1    27
r1_c2     7
r1_c4     1
r2_c0     1
r2_c1     9
r2_c2     1
r2_c4     0
dtype: int64


Unnamed: 0_level_0,r1_c0,r1_c1,r1_c2,r1_c4,r2_c0,r2_c1,r2_c2,r2_c4
roi,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1,159.088134,1.696923,128.996454,28.162674,0.0,1.0,0.0,0.0
2,4.000000,13.000000,0.000000,0.000000,0.0,2.0,2.0,1.0
3,5.000000,20.000000,0.000000,1.000000,2.0,5.0,0.0,0.0
4,2.000000,14.000000,0.000000,2.000000,0.0,4.0,0.0,1.0
5,4.000000,23.000000,0.000000,4.000000,14.0,12.0,0.0,2.0
...,...,...,...,...,...,...,...,...
13754,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0
13755,0.000000,2.000000,0.000000,1.000000,0.0,2.0,0.0,0.0
13756,0.000000,0.000000,0.000000,1.000000,3.0,4.0,0.0,0.0
13757,0.000000,0.000000,0.000000,2.000000,0.0,0.0,0.0,0.0
