In [1]:
"""
Spot counts for cells with highly expressed genes (dense spots)
1. Measure total intensity of every ROI after bleed-through correction and background subtraction.
2. Calculate the number of spot from total intensity based on unitary spot fluorescence intensity
3. Correlate the number of spots (from air-localize) with the total fluorescence intensity/voxel in each ROI and determine a 'cutoff'. 
   Spot count > cutoff: use spot count converted based on total fluorescence intensity; 
   Spot count < cutoff: use spot count from Airlocalize
"""

import os, sys,z5py
import numpy as np
import pandas as pd
from glob import glob 
from skimage.measure import regionprops
from skimage.io import imread, imsave
from os.path import abspath, dirname
from scipy import stats
from scipy.stats import skewnorm,lognorm
from scipy.optimize import minimize

In [2]:
### Specify directories to data files
input_dir = "D:\\SWAP\\Vincent\\lt171_FlpO\\gene_new_4tile\\outputs"

spot_dir = os.path.join(input_dir, "r2\\spots")
spotcount_csv = os.path.join(spot_dir, 'spots_r2.csv')
roi_csv = os.path.join(spot_dir, 'roi.csv')
genename_csv = os.path.join(input_dir, 'GeneName.csv')
intensity_dir = os.path.join(input_dir, 'r2\\intensities')
### end input

### outputs
out_csv = os.path.join(spot_dir, 'spotcount_dense_spot_corrected.csv')

In [3]:
spotcount=pd.read_csv(spotcount_csv, sep=',', index_col=0)
roi=pd.read_csv(roi_csv, sep=',', index_col=0)
GeneName=pd.read_csv(genename_csv, sep=',', index_col=0)[['r2']]
GeneName.index = np.char.add('spots_', GeneName.index)

print(spotcount.shape, roi.shape)
GeneName

(62, 4) (58, 9)


Unnamed: 0,r2
spots_c0,Kcnip3
spots_c1,Rorb
spots_c2,Cdh13
spots_c4,Cntn5


In [4]:
### Identify unitary spot fluorescence intensity for every gene
fx=sorted(glob(os.path.join(spot_dir, '*.txt')))
for f in fx:
    r=os.path.basename(f).split('.')[0]
    print(r)
    spot=np.loadtxt(f, delimiter=',')
    vox=[0.92,0.92,0.84]
    spot[:,:3]=spot[:,:3]/vox  # convert from physical unit to pixel unit
    for i in range(2):
        spot=spot[np.logical_and(spot[:,i]<=1500,spot[:,i]>250)]
    spot=spot[np.logical_and(spot[:,2]<=650,spot[:,2]>150)]   ##remove spots on edges (eliminate false detection)
    spot_int= spot[:,3]
    spot_int=spot_int[spot_int!=-8.0]
    n,b=np.histogram(spot_int, bins=5000)
    GeneName.loc['%s' % (r), 'single_spot_intensity']=b[np.argwhere(n == n.max())][0][0] 
    
    ##Note that the histogram maximum is used as an estimate for single spot intensity. We also tried fitting the data to a skewed normal (or log-normal) distribution and then estimate the peak (see below).  
#     ae, loce, scalee = skewnorm.fit(spot_int)
#     def skew_fit(n):
#         return -skewnorm.pdf(n, ae, loce, scalee)
#     GeneName.loc['%s' % (r), 'single_spot_intensity']=minimize(skew_fit,0,method='Powell').x


spots_c0
spots_c1
spots_c2
spots_c4


In [5]:
GeneName

Unnamed: 0,r2,single_spot_intensity
spots_c0,Kcnip3,134.172289
spots_c1,Rorb,135.834766
spots_c2,Cdh13,132.043173
spots_c4,Cntn5,131.390024


In [6]:
# df_mean is mean_fluorescence_intensity (after background subtraction) 
# df_total is total_fluorescence_intensity (after subtracting background)
# df_count is spot count calculated from total fluorescence intensity

df_mean = pd.DataFrame(data=np.empty([len(roi),0]), index=roi.index, dtype=float)
df_total = pd.DataFrame(data=np.empty([len(roi),0]), index=roi.index, dtype=float)
df_count = pd.DataFrame(data=np.empty([len(roi),0]), index=roi.index, dtype=float)

# over all channels and rounds
for c in ['c0', 'c1', 'c2', 'c4']: # channels
    fx=sorted(glob(os.path.join(intensity_dir, f"*_{c}_intensity.csv"))) # all rounds for that channel
    for f in fx:
        print(os.path.basename(f))
        r = 'spots' # rounds 
        print(r, c)
        # break
        cell_int=pd.read_csv(f,sep=',', index_col=0)
        cell_int=cell_int[cell_int.index.isin(roi.index)]  ## only include intact ROIs###
        n,b=np.histogram(cell_int['mean_intensity'], bins=1000) ## Idenfity background###
        bg=b[np.argwhere(n == n.max())][0][0]                   ## Idenfity background###
        
        df_mean['%s_%s' % (r,c)]=np.maximum(0,cell_int['mean_intensity']-bg)
        df_total['%s_%s' % (r,c)]=np.maximum(0,(cell_int['mean_intensity']-bg))*roi['area']
        df_count['%s_%s' % (r,c)]=df_total['%s_%s' % (r,c)]/GeneName.loc['%s_%s' % (r,c), 'single_spot_intensity']

# proc and save
df_cutoff = spotcount.copy()
for i in df_count.columns:
    density=spotcount[i]/(roi['area']*2*2*2/(0.92*0.92*0.84)) # convert um^3 to voxel values
    for j in density[density>0.01].index:  ##this threshold corresponds to spot-spot distance ~1.3 um apart
        df_cutoff.loc[j,i]=df_count.loc[j,i].copy()
        
df_cutoff.to_csv(out_csv)

lt171_gene_4tile_r2_c0_intensity.csv
spots c0
lt171_gene_4tile_r2_c1_intensity.csv
spots c1
lt171_gene_4tile_r2_c2_intensity.csv
spots c2
lt171_gene_4tile_r2_c4_intensity.csv
spots c4
