In [1]:
"""
Spot counts for cells with highly expressed genes (dense spots)
1. Measure total intensity of every ROI after bleed-through correction and background subtraction.
2. Calculate the number of spot from total intensity based on unitary spot fluorescence intensity
3. Correlate the number of spots (from air-localize) with the total fluorescence intensity/voxel in each ROI and determine a 'cutoff'. 
   Spot count > cutoff: use spot count converted based on total fluorescence intensity; 
   Spot count < cutoff: use spot count from Airlocalize
"""

import os, sys,z5py
import numpy as np
import pandas as pd
from glob import glob 
from skimage.measure import regionprops
from skimage.io import imread, imsave
from os.path import abspath, dirname
from scipy import stats
from scipy.stats import skewnorm,lognorm
from scipy.optimize import minimize
import itertools

In [2]:
### Specify directories to data files
input_dir = "D:\\SWAP\\Vincent\\lt171_FlpO\\gene_new_4tile\\outputs"
out_dir = os.path.join(input_dir, "testfull-aug5-v2")  
roi_csv = os.path.join(out_dir, 'roi.csv')
f_spots = os.path.join(out_dir, f'spots.csv')
out_csv = os.path.join(out_dir, 'spotcount_dense_spot_corrected.csv')

# for every gene
rounds = ['r1', 'r2']
channels = ['c0', 'c1', 'c2', 'c4']

# r1 should be the wrappped one
fx_spots = [os.path.join(input_dir, f'spots_pooled\\spots_{r}_{c}.txt') 
                 for r, c in itertools.product(rounds, channels)]
fx_intns = [os.path.join(input_dir, f'intensities_pooled\\{r}_{c}_intensity.csv') 
                 for r, c in itertools.product(rounds, channels)]

for f in fx_spots:
    assert os.path.isfile(f)
for f in fx_intns:
    assert os.path.isfile(f)
assert os.path.isfile(f_spots)

In [3]:
roi = pd.read_csv(roi_csv, sep=',', index_col=0)# .set_index('roi')
roi

Unnamed: 0_level_0,z,y,x,area
roi,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,363.489055,17.368291,73.106487,102.469416
2,125.553784,51.560521,396.296073,1692.922728
3,111.095382,104.351716,254.707325,1133.917848
4,55.515114,52.433029,164.453027,1471.898064
5,324.728038,91.253685,221.069806,1436.260392
...,...,...,...,...
13754,206.419432,212.102989,10.116867,234.888696
13755,252.961215,197.694189,313.463300,412.632696
13756,265.635180,65.784569,393.811699,170.012136
13757,193.609296,72.832144,206.992864,160.413960


In [4]:
spotcount = pd.read_csv(f_spots, sep=',', index_col=0)
spotcount.columns = [col.replace('spots_', '') for col in spotcount.columns]
spotcount

Unnamed: 0,r1_c0,r1_c1,r1_c2,r1_c4,r2_c0,r2_c1,r2_c2,r2_c4
1,34.0,46.0,37.0,25.0,0.0,1.0,0.0,0.0
2,4.0,13.0,0.0,0.0,0.0,2.0,2.0,1.0
3,5.0,20.0,0.0,1.0,2.0,5.0,0.0,0.0
4,3.0,14.0,0.0,2.0,0.0,4.0,0.0,1.0
5,5.0,23.0,0.0,4.0,14.0,12.0,0.0,2.0
...,...,...,...,...,...,...,...,...
13754,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
13755,0.0,2.0,0.0,1.0,0.0,2.0,0.0,0.0
13756,0.0,0.0,0.0,1.0,3.0,4.0,0.0,0.0
13757,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0


In [5]:
### Identify unit spot intensity for every gene
unit_intns = []
for i, (r, c) in enumerate(itertools.product(rounds, channels)):
    f = fx_spots[i]
    print(r, c, f)
    
    # (x, y, z, I)
    spot=np.loadtxt(f, delimiter=',')
    vox=[0.92,0.92,0.84]
    spot[:,:3]=spot[:,:3]/vox  # convert from physical unit to pixel unit
    ##remove spots on edges (eliminate false detection)
    spot = spot[np.logical_and(spot[:,0]<=1500, spot[:,0]>250)]
    spot = spot[np.logical_and(spot[:,1]<=1500, spot[:,1]>250)]
    spot = spot[np.logical_and(spot[:,2]<=650,  spot[:,2]>150)]   
    
    ## assign the most frequent intensity as the single-spot-intensity
    spot_int = spot[:,3]
    spot_int = spot_int[spot_int!=-8.0] # ???
    n,b=np.histogram(spot_int, bins=5000)
    unit_intn = b[np.argmax(n)]
    unit_intns.append(unit_intn)
    
unit_intns

r1 c0 D:\SWAP\Vincent\lt171_FlpO\gene_new_4tile\outputs\spots_pooled\spots_r1_c0.txt
r1 c1 D:\SWAP\Vincent\lt171_FlpO\gene_new_4tile\outputs\spots_pooled\spots_r1_c1.txt
r1 c2 D:\SWAP\Vincent\lt171_FlpO\gene_new_4tile\outputs\spots_pooled\spots_r1_c2.txt
r1 c4 D:\SWAP\Vincent\lt171_FlpO\gene_new_4tile\outputs\spots_pooled\spots_r1_c4.txt
r2 c0 D:\SWAP\Vincent\lt171_FlpO\gene_new_4tile\outputs\spots_pooled\spots_r2_c0.txt
r2 c1 D:\SWAP\Vincent\lt171_FlpO\gene_new_4tile\outputs\spots_pooled\spots_r2_c1.txt
r2 c2 D:\SWAP\Vincent\lt171_FlpO\gene_new_4tile\outputs\spots_pooled\spots_r2_c2.txt
r2 c4 D:\SWAP\Vincent\lt171_FlpO\gene_new_4tile\outputs\spots_pooled\spots_r2_c4.txt


[134.68825386657716,
 142.20793122253417,
 137.06713259735108,
 135.11447283935547,
 134.68825055999997,
 142.20793256000002,
 137.06713656,
 137.09174240000002]

In [6]:
# df_count is spot count calculated from total fluorescence intensity
df_count = pd.DataFrame(index=roi.index, dtype=float)
for i, (r, c) in enumerate(itertools.product(rounds, channels)):
    f = fx_intns[i]
    unit_int = unit_intns[i]
    print(r, c, f)

    cell_int = pd.read_csv(f, sep=',', index_col=0)
    cell_int = cell_int.reindex(roi.index) ## only include intact ROIs###

    vec_mean = cell_int['mean_intensity'].values
    vec_area = roi['area'].values

    # background
    n,b = np.histogram(vec_mean, bins=1000)
    bg = b[np.argmax(n)]    
    
    # count
    vec_count = np.clip(vec_mean - bg, 0, None)*vec_area/unit_int
    df_count[f'{r}_{c}'] = vec_count

df_count

r1 c0 D:\SWAP\Vincent\lt171_FlpO\gene_new_4tile\outputs\intensities_pooled\r1_c0_intensity.csv
r1 c1 D:\SWAP\Vincent\lt171_FlpO\gene_new_4tile\outputs\intensities_pooled\r1_c1_intensity.csv
r1 c2 D:\SWAP\Vincent\lt171_FlpO\gene_new_4tile\outputs\intensities_pooled\r1_c2_intensity.csv
r1 c4 D:\SWAP\Vincent\lt171_FlpO\gene_new_4tile\outputs\intensities_pooled\r1_c4_intensity.csv
r2 c0 D:\SWAP\Vincent\lt171_FlpO\gene_new_4tile\outputs\intensities_pooled\r2_c0_intensity.csv
r2 c1 D:\SWAP\Vincent\lt171_FlpO\gene_new_4tile\outputs\intensities_pooled\r2_c1_intensity.csv
r2 c2 D:\SWAP\Vincent\lt171_FlpO\gene_new_4tile\outputs\intensities_pooled\r2_c2_intensity.csv
r2 c4 D:\SWAP\Vincent\lt171_FlpO\gene_new_4tile\outputs\intensities_pooled\r2_c4_intensity.csv


Unnamed: 0_level_0,r1_c0,r1_c1,r1_c2,r1_c4,r2_c0,r2_c1,r2_c2,r2_c4
roi,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1,159.088134,1.696923,128.996454,28.162674,110.971892,26.697905,131.621644,115.916887
2,0.633554,0.000000,0.000000,0.000000,1.903046,17.455683,3.499464,0.964036
3,2.779255,6.333779,1.746320,1.945363,6.107428,17.821450,2.853233,2.977236
4,1.108907,0.000000,0.000000,0.000000,0.175173,18.729604,2.699448,0.000000
5,1.623296,4.023971,1.140955,2.262592,7.055661,20.491200,6.391334,3.859741
...,...,...,...,...,...,...,...,...
13754,0.000000,0.000000,0.000000,0.000000,0.000000,1.898004,0.072911,0.000000
13755,0.000000,6.526348,0.000000,0.000000,0.000000,6.352342,0.000000,0.000000
13756,0.000000,0.000000,0.000000,0.000000,0.000000,2.306024,0.225444,0.000000
13757,0.220968,0.000000,0.116865,0.291333,0.101884,1.581346,0.288074,0.342997


In [7]:
# proc and save
# update spotcount using df_count
df_cutoff = spotcount.copy()
# loop over genes
for i in df_count.columns:
    density = spotcount[i]/(roi['area']*2*2*2/(0.92*0.92*0.84)) # convert um^3 to voxel values
    cond = density > 0.01 ##this threshold corresponds to spot-spot distance ~1.3 um apart
    print(i, cond.sum())
    df_cutoff.loc[cond, i] = df_count.loc[cond, i].copy()
    
df_cutoff

r1_c0 6
r1_c1 27
r1_c2 7
r1_c4 1
r2_c0 1
r2_c1 9
r2_c2 1
r2_c4 0


Unnamed: 0,r1_c0,r1_c1,r1_c2,r1_c4,r2_c0,r2_c1,r2_c2,r2_c4
1,159.088134,1.696923,128.996454,28.162674,0.0,1.0,0.0,0.0
2,4.000000,13.000000,0.000000,0.000000,0.0,2.0,2.0,1.0
3,5.000000,20.000000,0.000000,1.000000,2.0,5.0,0.0,0.0
4,3.000000,14.000000,0.000000,2.000000,0.0,4.0,0.0,1.0
5,5.000000,23.000000,0.000000,4.000000,14.0,12.0,0.0,2.0
...,...,...,...,...,...,...,...,...
13754,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0
13755,0.000000,2.000000,0.000000,1.000000,0.0,2.0,0.0,0.0
13756,0.000000,0.000000,0.000000,1.000000,3.0,4.0,0.0,0.0
13757,0.000000,0.000000,0.000000,2.000000,0.0,0.0,0.0,0.0


In [8]:
df_cutoff.to_csv(out_csv)

In [9]:
test = pd.read_csv(out_csv, index_col=0)
for col in test:
    print(np.max(test[col]))
test

930.0
1615.0
1115.0
66.0
96.0
247.0
79.0
32.0


Unnamed: 0,r1_c0,r1_c1,r1_c2,r1_c4,r2_c0,r2_c1,r2_c2,r2_c4
1,159.088134,1.696923,128.996454,28.162674,0.0,1.0,0.0,0.0
2,4.000000,13.000000,0.000000,0.000000,0.0,2.0,2.0,1.0
3,5.000000,20.000000,0.000000,1.000000,2.0,5.0,0.0,0.0
4,3.000000,14.000000,0.000000,2.000000,0.0,4.0,0.0,1.0
5,5.000000,23.000000,0.000000,4.000000,14.0,12.0,0.0,2.0
...,...,...,...,...,...,...,...,...
13754,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0
13755,0.000000,2.000000,0.000000,1.000000,0.0,2.0,0.0,0.0
13756,0.000000,0.000000,0.000000,1.000000,3.0,4.0,0.0,0.0
13757,0.000000,0.000000,0.000000,2.000000,0.0,0.0,0.0,0.0
