In [None]:
#data wrangling and analysis packages
import pandas as pd
import tifffile as tf
import dash_radial_decoding as drd
from importlib import reload
import numpy as np
#plotting packages and linear regression packages
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import linregress
#check ram
import psutil

In [None]:
#get paths
location_path = "/groups/CaiLab/personal/Lex/raw/20k_dash_063021_3t3/notebook_pyfiles/dots_comb/final/MMStack_Pos0/Dot_Locations/locations_z_0.csv"
decoded_truth = pd.read_csv("/groups/CaiLab/personal/Lex/raw/20k_dash_063021_3t3/notebook_pyfiles/decoded/Pos_0/Z_Slice_0/pre_seg_diff_0_minseeds_4_filtered.csv")
codebook_path = "/groups/CaiLab/personal/Lex/raw/20k_dash_063021_3t3/barcode_key/codebook_4channel_12hyb_140nt.csv"
output_dir = "/groups/CaiLab/personal/Lex/data_analysis_code/decoding_files/DASH_Radial_Decoding/decoded/Pos0"

In [None]:
pd.read_csv(codebook_path )

In [None]:
12**4

In [None]:
drd.radial_decoding_parallel(location_path, codebook_path, n_neighbors=4,
                    num_barcodes = 4, radius=np.sqrt(3),diff=0,
                    min_seed=4, hybs = 12, output_dir = output_dir, ignore_errors=True)

For sbatch submissions, request 4 cores and 32-50 GB ram for maximum speed. Generally should take less than 5 min.

In [None]:
#read in output
gene_locations = pd.read_csv("./decoded/Pos0/diff_0_minseed_4_z_0_finalgenes.csv", index_col=0)

In [None]:
gene_locations

In [None]:
decoded_truth

In [None]:
gene_locations[gene_locations["genes"] == gene_locations["genes"][0]].sort_values("intensity")

In [None]:
decoded_truth[decoded_truth["gene"] == gene_locations["genes"][0]].sort_values("intensity")

In [None]:
gene_counts_dash_decoder = pd.DataFrame(gene_locations.groupby(['genes']).size()).reset_index()
gene_counts_dash_decoder.columns = ["Genes","DASH Decoder"]
gene_counts_matlab_decoder = pd.DataFrame(decoded_truth.groupby(['gene']).size()).reset_index()
gene_counts_matlab_decoder.columns = ["Genes","MATLAB Decoder"]

In [None]:
len(gene_counts_dash_decoder)

In [None]:
len(gene_counts_matlab_decoder)

In [None]:
comb = pd.merge(gene_counts_matlab_decoder,gene_counts_dash_decoder)

In [None]:
comb

In [None]:
#plotting packages and linear regression packages
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import linregress

In [None]:
#pearson calc
linreg = linregress(x = comb["MATLAB Decoder"], y = comb["DASH Decoder"])
pearsonr = linreg[2]
pearsonr = round(pearsonr,2)
pearsonr

In [None]:
plt.scatter(comb["MATLAB Decoder"],comb["DASH Decoder"], s = 5, alpha=0.5)
plt.ylabel("DASH Decoder Counts", fontsize=12)
plt.xlabel("MATLAB Decoder Counts", fontsize=12)
plt.xticks(fontsize=12, rotation=0)
plt.yticks(fontsize=12, rotation=0)
plt.annotate(f"Pearson's r= {pearsonr}", (1,100), fontsize=12)
sns.despine()

In [None]:
plt.scatter(comb["MATLAB Decoder"],comb["DASH Decoder"], s = 5, alpha=0.5)
plt.ylabel("DASH Decoder Counts", fontsize=12)
plt.xlabel("MATLAB Decoder Counts", fontsize=12)
plt.xlim(0,40)
plt.ylim(0,40)
plt.xticks(fontsize=12, rotation=0)
plt.yticks(fontsize=12, rotation=0)
plt.annotate(f"Pearson's r= {pearsonr}", (1,35), fontsize=12)
sns.despine()

In [None]:
#separate fake genes
fake_genes = gene_locations[gene_locations["genes"].str.startswith("fake")]
fake_genes_mat = decoded_truth[decoded_truth["gene"].str.startswith("fake")]

In [None]:
#separate true genes
true_genes = gene_locations.drop(fake_genes.index)
true_genes_mat = decoded_truth.drop(fake_genes_mat.index)

In [None]:
#dots lost between dash decoder and matlab decoder
len(true_genes)-len(true_genes_mat)

In [None]:
#fold change in dots
len(true_genes)/len(true_genes_mat)

In [None]:
#total percent fake for dash decoder
len(fake_genes)/(len(true_genes)+len(fake_genes))

In [None]:
#total percent fake for matlab decoder
len(fake_genes_mat)/(len(true_genes_mat)+len(fake_genes_mat))

In [None]:
codebook = pd.read_csv(codebook_path, index_col=0)
#calculate false positive rate for dash ouput
fakebook = codebook[codebook.index.str.startswith("fake")]

M_on = len(codebook)
M_off = len(fakebook)
N_off = len(fake_genes)
N_on = len(true_genes)
false_count_freq = N_off/M_off
false_positive_counts = M_on*false_count_freq
norm_false_positive_rate = false_positive_counts/N_on

In [None]:
norm_false_positive_rate

In [None]:
#calculate false positive rate for matlab decoder
fakebook = codebook[codebook.index.str.startswith("fake")]

M_on = len(codebook)
M_off = len(fakebook)
N_off = len(fake_genes_mat)
N_on = len(true_genes_mat)
false_count_freq = N_off/M_off
false_positive_counts = M_on*false_count_freq
norm_false_positive_rate_mat = false_positive_counts/N_on

In [None]:
norm_false_positive_rate_mat

In [None]:
len(set(true_genes_mat["gene"])-set(true_genes["genes"]))

In [None]:
set(true_genes_mat["gene"])-set(true_genes["genes"])

# Check RNA-seq

In [None]:
rnaseq = pd.read_csv("/groups/CaiLab/personal/Lex/data_analysis_code/post_analysis_files/nih3t3_FPKM.csv")

In [None]:
rnaseq= rnaseq[["tracking_id","3T3 B1"]]
rnaseq.columns = ["Genes","FPKM"]
rnaseq["Genes"]=rnaseq["Genes"].str.lower()

In [None]:
comb_matlab = pd.merge(rnaseq,gene_counts_matlab_decoder)
comb_dash = pd.merge(rnaseq,gene_counts_dash_decoder)

In [None]:
sort_matlab = comb_matlab.sort_values(["FPKM"])
sort_dash = comb_dash.sort_values(["FPKM"])

In [None]:
#pearson calc
linreg = linregress(x = sort_matlab["FPKM"], y = sort_matlab["MATLAB Decoder"])
pearsonr = linreg[2]
pearsonr = round(pearsonr,2)
pearsonr

In [None]:
#pearson calc
linreg = linregress(x = sort_dash["FPKM"], y = sort_dash["DASH Decoder"])
pearsonr = linreg[2]
pearsonr = round(pearsonr,2)
pearsonr

# Overlay on top of raw

In [None]:
import plotly.express as px
import plotly.graph_objects as go
import numpy as np

def plot_2d_locs_on_2d_image(df_locs_2d_1, img_2d, zmax=1000):
    
    #For Plotting 2d image
    #-------------------------------------------
    fig = px.imshow(
        img_2d,
        width=700,
        height=700,
        binary_string=True,
        binary_compression_level=4,
        binary_backend='pil',
        zmax = zmax,
        animation_frame=0
    )
    #-------------------------------------------
    
    #For Plotting 2d dots
    #-------------------------------------------
    fig.append_trace(go.Scattergl(
        x=df_locs_2d_1.x,
        y=df_locs_2d_1.y,
        mode='markers',
        marker_symbol='cross',
        marker=dict(
            #maxdisplayed=1000,
            size=4
            ),
        name = "Gaussian"
        ),1,1
    )
    #-------------------------------------------
    fig.show()

In [None]:
rik = gene_locations[gene_locations["genes"] == gene_locations["genes"].unique()[3]]
rik_mat = decoded_truth[decoded_truth["gene"] == gene_locations["genes"].unique()[3]]

In [None]:
rik

In [None]:
gene_locations["genes"].unique()[3]

In [None]:
#make collection of images across hyb cycles
pos=0
z=0
image_hyb = []
for i in [0,3,6,11]:
    img_raw = tf.imread(f'/groups/CaiLab/personal/Lex/raw/20k_dash_063021_3t3/notebook_pyfiles/aberr_corrected/HybCycle_{i}/MMStack_Pos{pos}.ome.tif')
    image_hyb.append(img_raw[z])

In [None]:
#take certain channels
image_hyb_ch = []
k=0
for i in [2,2,1,0]:
    image_hyb_ch.append(image_hyb[k][i])
    k += 1

In [None]:
image_hyb = np.array(image_hyb_ch)

In [None]:
rik_mat

In [None]:
rik

In [None]:
plot_2d_locs_on_2d_image(rik, image_hyb, zmax=4000)

In [None]:
plot_2d_locs_on_2d_image(rik_mat, image_hyb, zmax=4000)

In [None]:
mat = decoded_truth[decoded_truth["gene"] == "pmp2"]

In [None]:
mat

In [None]:
gene_locations[gene_locations["genes"] == "pmp2"]

In [None]:
#make collection of images across hyb cycles
pos=0
z=0
image_hyb = []
for i in [2,3,6,11]:
    img_raw = tf.imread(f'/groups/CaiLab/personal/Lex/raw/20k_dash_063021_3t3/notebook_pyfiles/aberr_corrected/HybCycle_{i}/MMStack_Pos{pos}.ome.tif')
    image_hyb.append(img_raw[z])

In [None]:
#take certain channels
image_hyb_ch = []
k=0
for i in [2,3,3,1]:
    image_hyb_ch.append(image_hyb[k][i])
    k += 1

In [None]:
image_hyb = np.array(image_hyb_ch)

In [None]:
plot_2d_locs_on_2d_image(mat, image_hyb, zmax=4000)