# Dot detection using DAOStarFinder

In [None]:
#custom function
from daostarfinder_dotdetection import *
from util import pil_imread
#enhance figure display
%config InlineBackend.figure_format = 'retina'

In [None]:
import plotly.express as px
import plotly.graph_objects as go
import numpy as np

def plot_2d_locs_on_2d_image(df_locs_2d_1, df_locs_2d_2, img_2d,add_trace = True, zmax=1000):
    
    #For Plotting 2d image
    #-------------------------------------------
    fig = px.imshow(
        img_2d,
        width=700,
        height=700,
        binary_string=True,
        binary_compression_level=4,
        binary_backend='pil',
        zmax = zmax
    )
    #-------------------------------------------
    
    #For Plotting 2d dots
    #-------------------------------------------
    fig.add_trace(go.Scattergl(
        x=df_locs_2d_1.x,
        y=df_locs_2d_1.y,
        mode='markers',
        marker_symbol='cross',
        marker=dict(
            #maxdisplayed=1000,
            size=4
            ),
        name = "Gaussian"
        )
    )
    if add_trace == True:
        fig.add_trace(go.Scattergl(
            x=df_locs_2d_2.x,
            y=df_locs_2d_2.y,
            mode='markers',
            marker_symbol='cross',
            marker=dict(
                #maxdisplayed=1000,
                size=4
                ),
            name = "LoG"
            )
        )
    #-------------------------------------------
    
    fig.show()
    
def plot_3d_locs_on_2d_image(df_tiff_1, df_tiff_2, tiff, channel, 
                             raw_src = None, raw_image = False, add_trace = True, zmax=10):
    
    if raw_image == False:

        #PLot All Z's that had dot detection
        #-------------------------------------------
        for z in range(len(tiff[:,channel-1])):
            if add_trace == False:
                df_locs_2d_1 = df_tiff_1[(df_tiff_1.z > z-1) & (df_tiff_1.z < z+1)]
                plot_2d_locs_on_2d_image(df_locs_2d_1, None, tiff[z, channel-1], zmax=zmax, add_trace=add_trace)
            else:
                df_locs_2d_1 = df_tiff_1[(df_tiff_1.z > z-1) & (df_tiff_1.z < z+1)]
                df_locs_2d_2= df_tiff_2[(df_tiff_2.z > z-1) & (df_tiff_2.z < z+1)]
                plot_2d_locs_on_2d_image(df_locs_2d_1,df_locs_2d_2, tiff[z, channel-1],add_trace=add_trace, zmax=zmax)
    else:
        #read raw image
        tiff = tf.imread(raw_src)
        if len(tiff.shape) == 3:
            tiff = tiff.reshape(1,tiff.shape[0],tiff.shape[1],tiff.shape[2])
        print("shape =", tiff.shape)
        #plot
        for z in range(len(tiff[:,channel-1])):
            if add_trace == False:
                df_locs_2d_1 = df_tiff_1[(df_tiff_1.z > z-1) & (df_tiff_1.z < z+1)]
                plot_2d_locs_on_2d_image(df_locs_2d_1,None, tiff[z, channel-1], zmax=zmax, add_trace=add_trace)
            else:
                df_locs_2d_1 = df_tiff_1[(df_tiff_1.z > z-1) & (df_tiff_1.z < z+1)]
                df_locs_2d_2= df_tiff_2[(df_tiff_2.z > z-1) & (df_tiff_2.z < z+1)]
                plot_2d_locs_on_2d_image(df_locs_2d_1,df_locs_2d_2, tiff[z, channel-1],add_trace=add_trace, zmax=zmax)

In [None]:
import time
#start time
start = time.time()
hyb = 0
pos = 0
#image sources
img_src = f"/groups/CaiLab/personal/Lex/raw/052922_4kgene/notebook_pyfiles/pre_processed_images/HybCycle_{hyb}/MMStack_Pos{pos}.ome.tif"
img_raw = f'/groups/CaiLab/personal/Lex/raw/052922_4kgene/notebook_pyfiles/dapi_aligned/fiducial_aligned/HybCycle_{hyb}/MMStack_Pos{pos}.ome.tif'
raw_src=None

#img_src: path to image
#HybCycle: which hybcycle are we looking at
#size_cutoff: number of standard deviation away from mean size area
#threshold: absolute pixel intensity the spot must be greater than
#channel: which channel you want to analyze

dots = dot_detection(img_src, HybCycle=hyb, size_cutoff=4, threshold=0.01,channel=1, swapaxes=False)
print(f"This task took {(time.time() - start)/60} minutes")


In [None]:
# plot dots on top of image
plot_3d_locs_on_2d_image(dots,None, tiff=None, channel=1, raw_src = img_raw, 
                         raw_image = True, add_trace=False, zmax=5000)

In [None]:
# plot dots on top of image
tiff = pil_imread(img_src, swapaxes=True)
plot_3d_locs_on_2d_image(dots,None, tiff=tiff, channel=1, raw_src = None, 
                         raw_image = False, add_trace=False, zmax=0.1)

# Quick colocalization check

For colocalization betweenn 1st and last hyb. Use the "hyb_coloc" scripts in colocalization_files. The necessary batch files are there for analyzing multiple channels and pos in parallel.

# Check if there are weird dropouts in a certain hyb for each threshold

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns

In [None]:
#collect arrays
channel = 4
comb_dots_list = []
for i in range(11):
    path = f"/groups/CaiLab/personal/Lex/raw/020422_20kdash_3t3/notebook_pyfiles/dots_comb/Channel_{channel}/MMStack_Pos0/Threshold_{i}/Dot_Locations/locations_z_0.csv"
    arr1  = pd.read_csv(path)
    comb_dots_list.append(arr1)

In [None]:
#counts total number of dots across hybs
final_counts = []
for df in comb_dots_list:
    dots_per_hyb = []
    for i in df["hyb"].unique():
        dots_per_hyb.append(len(df[df["hyb"]==i]))
    final_counts.append(dots_per_hyb)

In [None]:
#generate subplot canvas
x = 4
fig, ax = plt.subplots(x, x, figsize=(8,10), sharex = True, constrained_layout=True)
fig.text(0.5, 0.2, 'HybCycles', ha='center')
fig.text(-0.03, 0.6, 'Total Counts', va='center', rotation='vertical')
i = 0

#fill subplots
for row in ax:
    for col in row:
        try:
            #generate scatter plot of gene
            col.bar(x=np.arange(1,len(final_counts[i])+1,1) ,height=final_counts[i])
            sns.despine()
            col.set_title(f"Threshold {i}")
        except IndexError:
            col.remove()
        i += 1
plt.show()

# Combine threshold channels (for dash or across channel data)

In [None]:
from pathlib import Path
import pandas as pd

In [None]:
#define channels used
channel = [1,2,3,4]
#define number of z's
num_z = 1
#define number of thresholds
num_thresholds = 11
#for each channel in a specific threshold, grab the locations file
#then, concatenate those files and write it out
for i in range(num_thresholds):
    for z in range(num_z):
        thresh_df = []
        for c in channel:
            paths = f"/groups/CaiLab/personal/Lex/raw/020422_20kdash_3t3/notebook_pyfiles/dots_comb/Channel_{c}/MMStack_Pos0/Threshold_{i}/Dot_Locations/locations_z_{z}.csv"
            df = pd.read_csv(paths)
            thresh_df.append(df)
        df_comb = pd.concat(thresh_df).reset_index(drop=True)
        output= Path(f"/groups/CaiLab/personal/Lex/raw/020422_20kdash_3t3/notebook_pyfiles/dots_comb/channels_combined/Threshold_{i}")
        output.mkdir(parents=True,exist_ok=True)
        output = output / f"locations_z_{z}.csv"
        df_comb.to_csv(str(output))

In [None]:
#check
import pandas as pd
df = pd.read_csv("../../dots_comb/channels_combined/Threshold_0/locations_z_0.csv")

In [None]:
sorted(df["hyb"].unique())

# Combine channels for all pos (after picking best set) if you encoded across channels

In [None]:
from tqdm import tqdm

#define channels used
channel = [1,2,3,4]
#define total number of pos
pos_tot = 107
#define number of z's
num_z = 2
#define number of thresholds
num_thresholds = 11
#for each channel in a specific position, grab the locations file
#then, concatenate those files and write it out
for i in tqdm(range(pos_tot)):
    for z in range(num_z):
        df_list = []
        for c in channel:
            paths = f"/groups/CaiLab/personal/Lex/raw/112221_20kdash_3t3/notebook_pyfiles/dots_comb/final/Channel_{c}/MMStack_Pos{i}/locations_z_{z}.csv"
            df = pd.read_csv(paths)
            df_list.append(df)
        df_comb = pd.concat(df_list).reset_index(drop=True)
        output= Path(f"/groups/CaiLab/personal/Lex/raw/112221_20kdash_3t3/notebook_pyfiles/dots_comb/final/channels_combined_daostar/MMStack_Pos{i}")
        output.mkdir(parents=True,exist_ok=True)
        output = output / f"locations_z_{z}.csv"
        df_comb.to_csv(str(output))

In [None]:
#check
import pandas as pd
df = pd.read_csv("../../dots_comb/final/channels_combined_daostar/MMStack_Pos0/locations_z_0.csv")

In [None]:
df["hyb"].unique()