In [1]:
import os, sys
from os.path import abspath, dirname
import zarr
import z5py
import numpy as np
import pandas as pd
from glob import glob 
from skimage.measure import regionprops
from skimage.io import imread, imsave
from scipy import stats
from scipy.stats import skewnorm, lognorm
from scipy.optimize import minimize
import itertools
import collections

sys.path.append('../../')
from easi_fish import n5_metadata_utils as n5mu
from easi_fish import roi_prop, spot, intensity
import warnings
warnings.filterwarnings('ignore')

import importlib
importlib.reload(spot)
importlib.reload(roi_prop)
importlib.reload(intensity)

<module 'easi_fish.intensity' from '/u/project/zipursky/f7xiesnm/code/easifish-proc/bydatasets/lt172/../../easi_fish/intensity.py'>

In [2]:
import matplotlib.pyplot as plt
import seaborn as sns

In [3]:
from IPython.display import set_matplotlib_formats
set_matplotlib_formats('retina')

sns.set_style('white', rc={'axes.grid':True})
sns.set_context('talk')

Spot counts for cells with highly expressed genes (dense spots)
1. Measure total intensity of every ROI after bleed-through correction and background subtraction.
2. Calculate the number of spot from total intensity based on unit-spot intensity
3. Correlate the number of spots (from air-localize) with the total fluorescence intensity/voxel in each ROI and determine a 'cutoff'. 
   Spot count > cutoff: use spot count converted based on total fluorescence intensity; 
   Spot count < cutoff: use spot count from Airlocalize

### On units
- all images are based on pixel units - 
- roi meta file (output) are based on physical unit (um - pre-expansion)
- spots files are in um (post-expansion)

In [4]:
## input

# theround = 'r2'
# rounds = [theround]
# channels = ['c0', 'c1', 'c2', 'c4']

ddir = '/u/home/f/f7xiesnm/project-zipursky/easifish/lt172'
!mkdir -p $ddir/proc
output_dir = ddir + '/proc/v1-v2'

fix_round = 'r3'
mov_rounds = ['r1', 'r2', 'r4', 'r5']
round_channels = collections.OrderedDict({
    'r1': ('c0', 'c1', 'c2', 'c4'),
    'r2': ('c0', 'c1', 'c2', 'c4'),
    'r3': ('c0', 'c1', 'c2', 'c4'),
    'r4': ('c0', 'c1', 'c2', 'c4'),
    'r5': ('c0', 'c1', 'c2', 'c4'),
})

dapi_channel = 'c3_reg' # for 
lb_scale = 's3'
lb_res = [1.84,1.84,1.68]
ex = 2

# images
subpath     =   '/c3/s3' 
subpath_reg =   '/c3_reg/s3' 
fix_dir  =  ddir + f"/outputs/{fix_round}/stitching/export.n5"
lb_dir  =   ddir + f"/outputs/{fix_round}/segmentation/{fix_round}-c3.tif"
reg_dirs = [
    ddir + f"/outputs/r1/registration/r1-to-r3/warped",
    ddir + f"/outputs/r2/registration/r2-to-r3/warped",
    ddir + f"/outputs/r4/registration/r4-to-r3/warped",
    ddir + f"/outputs/r5/registration/r5-to-r3/warped",
    ]


# spot dir for every gene
# fx_spots = np.hstack([[ddir + f'/outputs/{r}/spots/spots_{c}.txt' 
#                         for c in chs]
#                         for r,chs in round_channels.items()])

# fx_intns = np.hstack([[ddir + f'/outputs/{r}/intensities/{r}_{c}_intensity.csv' 
#                         for c in chs]
#                         for r,chs in round_channels.items()])

fx_spots = [
    ddir + f'/outputs/r1/spots/r1-to-r3-v1/spots_c0-warped.txt',
    ddir + f'/outputs/r1/spots/r1-to-r3-v1/spots_c1-warped.txt',
    ddir + f'/outputs/r1/spots/r1-to-r3-v1/spots_c2-warped.txt',
    ddir + f'/outputs/r1/spots/r1-to-r3-v1/spots_c4-warped.txt',
    
    ddir + f'/outputs/r2/spots/r2-to-r3-v1/spots_c0-warped.txt',
    ddir + f'/outputs/r2/spots/r2-to-r3-v1/spots_c1-warped.txt',
    ddir + f'/outputs/r2/spots/r2-to-r3-v1/spots_c2-warped.txt',
    ddir + f'/outputs/r2/spots/r2-to-r3-v1/spots_c4-warped.txt',
    
    ddir + f'/outputs/r3/spots/spots_c0.txt',
    ddir + f'/outputs/r3/spots/spots_c1.txt',
    ddir + f'/outputs/r3/spots/spots_c2.txt',
    ddir + f'/outputs/r3/spots/spots_c4.txt',
    
    ddir + f'/outputs/r4/spots/r4-to-r3-v1/spots_c0-warped.txt',
    ddir + f'/outputs/r4/spots/r4-to-r3-v1/spots_c1-warped.txt',
    ddir + f'/outputs/r4/spots/r4-to-r3-v1/spots_c2-warped.txt',
    ddir + f'/outputs/r4/spots/r4-to-r3-v1/spots_c4-warped.txt',
    
    ddir + f'/outputs/r5/spots/r5-to-r3-v1/spots_c0-warped.txt',
    ddir + f'/outputs/r5/spots/r5-to-r3-v1/spots_c1-warped.txt',
    ddir + f'/outputs/r5/spots/r5-to-r3-v1/spots_c2-warped.txt',
    ddir + f'/outputs/r5/spots/r5-to-r3-v1/spots_c4-warped.txt',
]

# intn_threshs = [110]*len(fx_spots)
intn_threshs = [10]*len(fx_spots)
# intn_threshs = [
#     150, 
#     150, 
    
#     110,
#     200, #
#     110,
#     110, 
    
#     110,
#     200, #
#     110,
#     110, 
    
#     110,
#     200, #
#     110,
#     110, 
# ]
    
for f in fx_spots:
    assert os.path.isfile(f)
# for f in fx_intns:
#     assert os.path.isfile(f)

## output
out_badroi = os.path.join(output_dir, 'bad_roi_list.npy')
out_allroi = os.path.join(output_dir, "roi_all.csv") 
out_roi = os.path.join(output_dir, "roi.csv") 
out_spots = os.path.join(output_dir, "spotcount.csv")

In [5]:
REMOVE_BLEEDTHRU = False

# remove bleed through!
if REMOVE_BLEEDTHRU:
    
    bleed_thru_epsilon = 1
    c_qry = 'c0'
    c_ref = 'c4'

    f_ref = ddir+f'/outputs/{theround}/spots/spots_{c_ref}.txt'
    f_qry = ddir+f'/outputs/{theround}/spots/spots_{c_qry}.txt'
    f_qry_kept = os.path.join(output_dir, f'kept_spots_{theround}_{c_qry}.txt')

    ref_dots = np.loadtxt(f_ref, delimiter=',')
    qry_dots = np.loadtxt(f_qry, delimiter=',')
    qry_kept, qry_removed = spot.remove_bleed_thru_spots(ref_dots, qry_dots, epsilon=bleed_thru_epsilon)

    # save 
    np.savetxt(f_qry_kept, qry_kept, delimiter=",")
    
    ### TODO - replace f_qry with f_qry_kept in spot list

In [6]:
%%time
# output dir
if not os.path.isdir(output_dir):
    print(output_dir)
    os.mkdir(output_dir)
    
# image size in pixel (x, y, z)
grid = n5mu.read_voxel_grid(fix_dir, subpath)
# voxel resolution in µm (x, y, z) (post-expansion)
vox  = n5mu.read_voxel_spacing(fix_dir, subpath)
# image size in physical space (x, y, z) (post-expansion)
size = grid*vox
print('subpath: ', subpath)
print('voxel size: ', vox)
print('image size (pixel): ', grid)
print('image size (um post-ex): ', size)

# get image data
# print("loading images...")
# fix = zarr.open(store=zarr.N5Store(fix_dir), mode='r')     
# img1 = fix[subpath][:, :, :]

# get segmentation mask
lb = imread(lb_dir)
# roi = np.max(lb) # this is only correct if this lb is uncropped
roi = len(np.unique(lb[lb!=0])) # this would be better
print(lb.shape)
print('num roi: ', roi)

/u/home/f/f7xiesnm/project-zipursky/easifish/lt172/proc/v1-v2
subpath:  /c3/s3
voxel size:  [1.84 1.84 1.68]
image size (pixel):  [457 454 452]
image size (um post-ex):  [840.88 835.36 759.36]
(452, 454, 457)
num roi:  6606
CPU times: user 1.43 s, sys: 506 ms, total: 1.94 s
Wall time: 1.81 s


In [7]:
# %%time
# bad_roi = []
# for reg_dir in reg_dirs:
#     reg = zarr.open(store=zarr.N5Store(reg_dir), mode='r')     
#     img2 = reg[subpath][...]
#     print("image loaded")
    
#     # get bad pixels -> bad roi
#     bad_roi_thisround = np.unique(lb[np.nonzero(img2==0)])
#     bad_roi.append(bad_roi_thisround)
#     print("# of ROIs rejected this round:", len(bad_roi_thisround))
    
# bad_roi = np.unique(np.hstack(bad_roi))
# print("# of ROIs rejected:", len(bad_roi))
# bad_roi

In [8]:
%%time
# # Get list of ROIs that are fully or partially outside the mask 
### Make sure to only include ROIs that are intact and in the overlapping regions across all rounds of FISH
### remove any unregistered parts
mask = np.ones(grid[::-1])
for reg_dir in reg_dirs:
    reg = zarr.open(store=zarr.N5Store(reg_dir), mode='r')     
    img2 = reg[subpath_reg][...]
    print("image loaded")
    mask[img2==0]=0
print("mask generated")

bad_roi=np.unique(lb[mask==0])
if bad_roi[0] == 0: # remove the label 0 - extracellular space
    bad_roi = bad_roi[1:]
np.save(out_badroi, bad_roi)
print("# of ROIs rejected:", len(bad_roi))

image loaded
image loaded
image loaded
image loaded
mask generated
# of ROIs rejected: 283
CPU times: user 2.76 s, sys: 1.56 s, total: 4.31 s
Wall time: 27.1 s


In [9]:
%%time
# get cell locations (in um - pre-expansion) from segmentation mask
roi_meta_all = roi_prop.roi_prop_v2(lb, lb_res, ex)
roi_meta_all.to_csv(out_allroi)

roi_meta = roi_meta_all.set_index('roi').copy()
roi_meta = roi_meta #.loc[roi_meta.index.difference(bad_roi)]
roi_meta.to_csv(out_roi)

CPU times: user 3.42 s, sys: 226 ms, total: 3.64 s
Wall time: 3.67 s


In [10]:
%%time

# count spots for every cell (roi)

# all labels
lb_id = np.unique(lb[lb!=0]) # exclude 0
lb_id = np.hstack([[0], lb_id]) # include 0 - noncell
# selected cells
lb_id_selected = roi_meta.index.values

# prep
spotcount = pd.DataFrame(index=lb_id_selected, dtype=int)
# spotcount_intn = pd.DataFrame(index=lb_id_selected, dtype=float)

i = 0
for r,chs in round_channels.items():
    for c in chs:
        # f_intns = fx_intns[i]
        f_spots = fx_spots[i]
        intn_th = intn_threshs[i]
        print(r, c, f_spots) #, f_intns)

        # spots
        spots_rc = np.loadtxt(f_spots, delimiter=',')
        print(len(spots_rc))

        # filter
        filter_cond = spots_rc[:,3] > intn_th
        spots_rc = spots_rc[filter_cond]
        print(len(spots_rc))

        # count spots
        res = spot.spot_counts_worker(lb, spots_rc, lb_res,
                                      lb_id=lb_id, 
                                      remove_noncell=True, 
                                      selected_roi_list=lb_id_selected,
                                      )
        spotcount[f"{r}_{c}"] = res 

        # # count spots by intensity
        # res = spot.get_spot_counts_from_intn(f_intns, f_spots, roi_meta, lb_res)
        # spotcount_intn[f'{r}_{c}'] = res
        i = i + 1
    
# save results
spotcount.to_csv(out_spots)
# spotcount_intn.to_csv(out_spots_intn)

r1 c0 /u/home/f/f7xiesnm/project-zipursky/easifish/lt172/outputs/r1/spots/r1-to-r3-v1/spots_c0-warped.txt
448387
444742
removed 31 spots due to nan
296,942/444,742 spots in range (457, 454, 452)
r1 c1 /u/home/f/f7xiesnm/project-zipursky/easifish/lt172/outputs/r1/spots/r1-to-r3-v1/spots_c1-warped.txt
688827
685079
removed 39 spots due to nan
534,995/685,079 spots in range (457, 454, 452)
r1 c2 /u/home/f/f7xiesnm/project-zipursky/easifish/lt172/outputs/r1/spots/r1-to-r3-v1/spots_c2-warped.txt
432190
428470
removed 41 spots due to nan
280,844/428,470 spots in range (457, 454, 452)
r1 c4 /u/home/f/f7xiesnm/project-zipursky/easifish/lt172/outputs/r1/spots/r1-to-r3-v1/spots_c4-warped.txt
409947
406372
removed 28 spots due to nan
258,387/406,372 spots in range (457, 454, 452)
r2 c0 /u/home/f/f7xiesnm/project-zipursky/easifish/lt172/outputs/r2/spots/r2-to-r3-v1/spots_c0-warped.txt
732585
729309
removed 43 spots due to nan
520,270/729,309 spots in range (457, 454, 452)
r2 c1 /u/home/f/f7xiesnm/

In [11]:
# # reconcile two spot counts
# dist_cutoff = 2 # um pre-expansion
# density = spotcount.divide(roi_meta['area'], axis=0)
# cond = density < 1/(dist_cutoff**3) # keep spots if density is low

# print("# cells have high density: ", (~cond).sum()) 
# spotcount_merged = spotcount.where(cond, spotcount_intn)  # replace where the condition is False
# spotcount_merged.to_csv(out_spots_merged)
# spotcount_merged