In [1]:
import os, sys
sys.path.append('..')
from os.path import abspath, dirname
import zarr
import z5py
import numpy as np
import pandas as pd
from glob import glob 
from skimage.measure import regionprops
from skimage.io import imread, imsave
from scipy import stats
from scipy.stats import skewnorm, lognorm
from scipy.optimize import minimize
import itertools

from easi_fish import n5_metadata_utils as n5mu
from easi_fish import roi_prop, spot, intensity
import warnings
warnings.filterwarnings('ignore')

import importlib
importlib.reload(spot)
importlib.reload(roi_prop)
importlib.reload(intensity)

<module 'easi_fish.intensity' from '/data/qlyu/v1/easifish-proc/data_processing/../easi_fish/intensity.py'>

"""
Spot counts for cells with highly expressed genes (dense spots)
1. Measure total intensity of every ROI after bleed-through correction and background subtraction.
2. Calculate the number of spot from total intensity based on unit-spot intensity
3. Correlate the number of spots (from air-localize) with the total fluorescence intensity/voxel in each ROI and determine a 'cutoff'. 
   Spot count > cutoff: use spot count converted based on total fluorescence intensity; 
   Spot count < cutoff: use spot count from Airlocalize
"""

In [2]:
def get_unit_spot_intn(path_spot):
    """estimate unit spot intensity
    """
    spot = np.loadtxt(path_spot, delimiter=',')
    # (x, y, z, I)
    vox=[0.92,0.92,0.84]
    spot[:,:3]=spot[:,:3]/vox  # convert from physical unit to pixel unit
    ##remove spots on edges (eliminate false detection)
    spot = spot[np.logical_and(spot[:,0]<=1500, spot[:,0]>250)]
    spot = spot[np.logical_and(spot[:,1]<=1500, spot[:,1]>250)]
    spot = spot[np.logical_and(spot[:,2]<=650,  spot[:,2]>150)]   
    
    ## assign the most frequent intensity as the single-spot-intensity
    spot_int = spot[:,3]
    spot_int = spot_int[spot_int!=-8.0] # ???
    n,b=np.histogram(spot_int, bins=5000)
    unit_intn = b[np.argmax(n)]
    
    return unit_intn

def get_spot_counts_from_intn(path_intn, path_spot, roi_meta):
    """estimate spot_counts from cell_intensities; estimate unit-spot intensity first
    """
    unit_intn = get_unit_spot_intn(path_spot) # get unit intn
    
    cell_int = pd.read_csv(path_intn, sep=',', index_col=0)
    cell_int = cell_int.reindex(roi_meta.index) ## only include intact ROIs###

    vec_mean = cell_int['mean_intensity'].values
    vec_area = roi_meta['area'].values

    # background
    n,b = np.histogram(vec_mean, bins=1000)
    bg = b[np.argmax(n)]    
    
    # count
    vec_count = np.clip(vec_mean - bg, 0, None)*vec_area/unit_intn
    return vec_count

In [3]:
## input
theround = 'r1'
rounds = [theround]
input_dir = f"/data/qlyu/v1/data/easifish/lt186/{theround}"
output_dir = os.path.join(input_dir, "proc")  

channels = ['c0', 'c1', 'c2', 'c4']
bleed_thru_epsilon = 1

# fixed image
fix_dir = os.path.join(input_dir, "stitching/export.n5")
subpath='/c3/s3' 

# segmentation mask (tif format accepted here)
lb_dir  = os.path.join(input_dir, f"segmentation/{theround}-c3.tif" ) 

# spot dir for every gene
spot_dir = os.path.join(input_dir, "spots") 
intn_dir = os.path.join(input_dir, "intensities") 
fx_spots = [os.path.join(spot_dir, f'spots_{c}.txt') 
                 for c in channels]
fx_intns = [os.path.join(intn_dir, f'{theround}_{c}_intensity.csv') 
                 for c in channels]

for f in fx_spots:
    assert os.path.isfile(f)
for f in fx_intns:
    assert os.path.isfile(f)

## output
out_badroi = os.path.join(output_dir, 'bad_roi_list.npy')
out_allroi = os.path.join(output_dir, "roi_all.csv") 
out_roi = os.path.join(output_dir, "roi.csv") 
out_spots = os.path.join(output_dir, "spotcount.csv")
out_spots_intn = os.path.join(output_dir, "spotcount_intn.csv")
out_spots_merged = os.path.join(output_dir, 'spotcount_merged.csv')

In [4]:
%%time
# output dir
if not os.path.isdir(output_dir):
    print(output_dir)
    os.mkdir(output_dir)
    
#voxel size in µm (x, y, z) (post-expansion)
vox  = n5mu.read_voxel_spacing(fix_dir, subpath)
#image size in pixel (x, y, z)
grid = n5mu.read_voxel_grid(fix_dir, subpath)
#image size in physical space (x, y, z) (post-expansion)
size=grid*vox
print('voxel size is:',vox)
print('image size in pixel unit is:',grid)
print('image size in um unit is:',size)

# get appropriate image data
print("loading images...")
# fix = zarr.open(store=zarr.N5Store(fix_dir), mode='r')     
# img1 = fix[subpath][:, :, :]

lb=imread(lb_dir)
print(lb.shape)
roi = np.max(lb)
print(roi)

# mask=np.full((grid[2], grid[1], grid[0]),1)
# for reg_dir in reg_dirs:
#     reg = zarr.open(store=zarr.N5Store(reg_dir), mode='r')     
#     img2 = reg[subpath][:, :, :]
#     print("image loaded")
#     mask[img2==0]=0
    
# # imsave(out_mask, mask)
# print("mask generated")
# print("mask dimension is:", mask.shape)

# # # Get list of ROIs that are fully or partially outside the mask 
# ### Make sure to only include ROIs that are intact and in the overlapping regions across all rounds of FISH
# bad_roi=np.unique(lb[mask==0])
# if bad_roi[0] == 0:
#     bad_roi = bad_roi[1:]
# np.save(out_badroi, bad_roi)
# print("# of ROIs rejected:", len(bad_roi))

/data/qlyu/v1/data/easifish/lt186/r1/proc
voxel size is: [1.84 1.84 1.68]
image size in pixel unit is: [1131  913  648]
image size in um unit is: [2081.04 1679.92 1088.64]
loading images...
(648, 913, 1131)
28228
CPU times: user 3.18 s, sys: 1.64 s, total: 4.81 s
Wall time: 3.8 s


In [5]:
%%time
roi_meta_all = roi_prop.roi_prop_v2(lb)
roi_meta_all.to_csv(out_allroi)

roi_meta = roi_meta_all.set_index('roi').copy()
roi_meta = roi_meta #.loc[roi_meta.index.difference(bad_roi)]
roi_meta.to_csv(out_roi)

CPU times: user 13.2 s, sys: 96.2 ms, total: 13.3 s
Wall time: 13.3 s


In [6]:
# remove bleed through!
spots_bld_thru_removed = {}
c_qry = 'c0'
c_ref = 'c4'

f_ref = os.path.join(spot_dir, f'spots_{c_ref}.txt')
f_qry = os.path.join(spot_dir, f'spots_{c_qry}.txt')
f_qry_removed = os.path.join(output_dir, f'removed_spots_{c_qry}.txt')

ref_dots = np.loadtxt(f_ref, delimiter=',')
qry_dots = np.loadtxt(f_qry, delimiter=',')
qry_kept, qry_removed = spot.remove_bleed_thru_spots(ref_dots, qry_dots, epsilon=bleed_thru_epsilon)

# keep kept 
spots_bld_thru_removed[f"{theround}_{c_qry}"] = qry_kept
    
# save removed
np.savetxt(f_qry_removed, qry_removed, delimiter=",")

24/157916 = 0.0% removed


In [7]:
%%time

# count spots
lb_id = np.unique(lb[lb!=0]) # exclude 0
lb_id = np.hstack([[0], lb_id]) # include 0
spotcount = pd.DataFrame(index=lb_id)
for i, (r, c) in enumerate(itertools.product(rounds, channels)):
    if f"{r}_{c}" in spots_bld_thru_removed.keys():
        print(f"{r}_{c}: load from bleed_thru_corrected spots")
        spots_rc = spots_bld_thru_removed[f"{theround}_{c}"]
    else:
        f_spots = fx_spots[i]
        print(f"{r}_{c}: load from {f_spots}")
        spots_rc = np.loadtxt(f_spots, delimiter=',')
        
    res = spot.spot_counts_worker(lb, spots_rc, lb_id=lb_id, 
                             remove_emptymask=True, 
                             verbose=True,
                             )
    spotcount[f"{r}_{c}"] = res 
spotcount = spotcount.iloc[1:] # remove 0
spotcount.to_csv(out_spots)
spotcount

r1_c0: load from bleed_thru_corrected spots
removed 0 due to nan
31,338/157,892 spots in range (1131, 913, 648)
r1_c1: load from /data/qlyu/v1/data/easifish/lt186/r1/spots/spots_c1.txt
removed 0 due to nan
126/810 spots in range (1131, 913, 648)
r1_c2: load from /data/qlyu/v1/data/easifish/lt186/r1/spots/spots_c2.txt
removed 0 due to nan
5,885/62,806 spots in range (1131, 913, 648)
r1_c4: load from /data/qlyu/v1/data/easifish/lt186/r1/spots/spots_c4.txt
removed 0 due to nan
124/675 spots in range (1131, 913, 648)
CPU times: user 4.94 s, sys: 114 ms, total: 5.05 s
Wall time: 5.04 s


Unnamed: 0,r1_c0,r1_c1,r1_c2,r1_c4
1,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,0.0
...,...,...,...,...
28224,0.0,0.0,0.0,0.0
28225,0.0,0.0,0.0,0.0
28226,0.0,0.0,0.0,0.0
28227,0.0,0.0,0.0,0.0


In [8]:
# spot count calculated from total fluorescence intensity
spotcount_intn = pd.DataFrame(index=roi_meta.index, dtype=float)
for i, (r, c) in enumerate(itertools.product(rounds, channels)):
    f_spots = fx_spots[i]
    f_intns = fx_intns[i]
    print(r, c, f_spots, f_intns)
    
    vec_count = get_spot_counts_from_intn(f_intns, f_spots, roi_meta)
    spotcount_intn[f'{r}_{c}'] = vec_count
spotcount_intn.to_csv(out_spots_intn)
spotcount_intn

r1 c0 /data/qlyu/v1/data/easifish/lt186/r1/spots/spots_c0.txt /data/qlyu/v1/data/easifish/lt186/r1/intensities/r1_c0_intensity.csv
r1 c1 /data/qlyu/v1/data/easifish/lt186/r1/spots/spots_c1.txt /data/qlyu/v1/data/easifish/lt186/r1/intensities/r1_c1_intensity.csv
r1 c2 /data/qlyu/v1/data/easifish/lt186/r1/spots/spots_c2.txt /data/qlyu/v1/data/easifish/lt186/r1/intensities/r1_c2_intensity.csv
r1 c4 /data/qlyu/v1/data/easifish/lt186/r1/spots/spots_c4.txt /data/qlyu/v1/data/easifish/lt186/r1/intensities/r1_c4_intensity.csv


Unnamed: 0_level_0,r1_c0,r1_c1,r1_c2,r1_c4
roi,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,3.098025,0.000000,0.000000,0.0
2,18.225710,0.000000,0.000000,0.0
3,0.000000,0.000000,0.000000,0.0
4,0.000000,0.000000,0.000000,0.0
5,2.955263,0.000000,0.000000,0.0
...,...,...,...,...
28224,0.000000,0.000000,0.034886,0.0
28225,0.737193,4.357239,0.033043,0.0
28226,0.000000,1.877237,0.000000,0.0
28227,0.000000,0.000000,0.000000,0.0


In [9]:
# update spotcount using df_count
spotcount_sub = spotcount.reindex(roi_meta.index)
volumes = (roi_meta['area']*2*2*2/(0.92*0.92*0.84)) # convert um^3 to voxel values
density = spotcount_sub.divide(volumes, axis=0)
cond = density <= 0.01  ##this threshold corresponds to spot-spot distance ~1.3 um apart
print((~cond).sum())
spotcount_merged = spotcount_sub.where(~cond, spotcount_intn)  
spotcount_merged.to_csv(out_spots_merged)
spotcount_merged

r1_c0    0
r1_c1    0
r1_c2    0
r1_c4    0
dtype: int64


Unnamed: 0_level_0,r1_c0,r1_c1,r1_c2,r1_c4
roi,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,3.098025,0.000000,0.000000,0.0
2,18.225710,0.000000,0.000000,0.0
3,0.000000,0.000000,0.000000,0.0
4,0.000000,0.000000,0.000000,0.0
5,2.955263,0.000000,0.000000,0.0
...,...,...,...,...
28224,0.000000,0.000000,0.034886,0.0
28225,0.737193,4.357239,0.033043,0.0
28226,0.000000,1.877237,0.000000,0.0
28227,0.000000,0.000000,0.000000,0.0
