# plot intensity variance vs time for 5D images
This notebook is designed for looking at a single batch of data and do various diagnostic plots to decide how to filter the data.


In [2]:
import napari
import torch
from skimage.measure import regionprops_table, regionprops
import scipy as sp
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import glob
import os
from matplotlib_scalebar.scalebar import ScaleBar
import gc
import seaborn as sns
from sklearn.mixture import GaussianMixture
from bioio import BioImage
import bioio_nd2
import bioio_tifffile
from bioio.writers import OmeTiffWriter


anisotropy = (3.2,1,1) # Relative scale of (Z,X,Y) axes

nucChannel = 0 # red emerin rings
spotChannel = 1 # green spots

# in lucien's original scripts:
# channel 0 is green spots
# channel 1 is red emerin

pd.set_option('display.max_columns', None)

In [None]:
# on server
base_path = '/mnt/external.data/MeisterLab/jsemple/lhinder/segmentation_Dario/'
df_paths = pd.DataFrame.from_dict({'data_path': [base_path+'/SDC1/1273/20241108_hs/',
                                                base_path+'/DPY27/1268/20241107_hs/']})

output_path=base_path+'/heatshock/'

if not os.path.exists(output_path+"/plots"):
    os.makedirs(output_path+"/plots")

df=pd.DataFrame()
dist=pd.DataFrame()
for path in df_paths['data_path']:
    df_tmp=pd.read_csv(path+"nuclei_analysis_v001.csv")
    df = pd.concat([df,df_tmp])
    dist_tmp=pd.read_pickle(path+"dist_analysis_v001.pkl")
    dist = pd.concat([dist,dist_tmp])

df.head()
dist.head()

Unnamed: 0,label,bb_dimZ,bb_dimY,bb_dimX,centroid_z,centroid_y,centroid_x,major_axis_length,solidity,mean,median,std,sum,variance,max,min,volume,id,timepoint,intensity_dist_nuclei,intensity_dist_spots,intensity_dist,zproj_spots,zproj_nuclei
0,1,7,94,86,2.595147,636.169992,693.925964,92.824887,0.872761,107.571959,107.0,6.484235,3391206,42.045298,136,84,31525,DPY27_tl_20241010_1268_E_early_3h_5min_5um,1,"[106.1374269005848, 105.04518072289157, 104.00...","[107.00877192982456, 107.25602409638554, 107.9...","[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14...","[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,..."
0,1,12,109,105,12.47402,496.429384,694.701003,101.528729,0.830031,107.529925,107.0,6.489472,6841699,42.113248,133,85,63626,DPY27_tl_20241010_1268_E_early_3h_5min_5um,2,"[106.60734463276836, 106.06997084548105, 105.3...","[107.32768361581921, 106.99708454810495, 107.9...","[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14...","[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,..."
0,2,12,79,86,11.454992,719.387144,850.006206,90.298296,0.924498,106.020521,106.0,6.155991,5347251,37.89622,131,83,50436,DPY27_tl_20241010_1268_E_early_3h_5min_5um,2,"[107.51757188498402, 106.41447368421052, 105.7...","[106.10543130990415, 105.83223684210526, 106.3...","[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14...","[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,..."
0,3,4,92,78,15.633955,713.228358,532.298057,91.633967,0.873681,107.080584,107.0,6.356482,1878943,40.404859,130,84,17547,DPY27_tl_20241010_1268_E_early_3h_5min_5um,2,"[103.67024128686327, 102.51445086705202, 103.1...","[107.02144772117963, 106.85260115606937, 106.3...","[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14...","[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,..."
0,1,13,98,95,5.480531,732.856621,844.87412,103.079933,0.919313,107.605859,107.0,6.522628,8190958,42.544679,133,84,76120,DPY27_tl_20241010_1268_E_early_3h_5min_5um,3,"[109.30933333333333, 108.14402173913044, 106.9...","[107.288, 107.23097826086956, 107.581749049429...","[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14...","[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,..."


## Add some variables
Radius might scale more linearly than volume
Coefficient of variation is independent of the mean signal

In [4]:
def radiusFromVolume(volume):
    return (3*volume/(4*np.pi))**(1/3)

def coefficientOfVariation(std,mean):
    return std/mean

timeInterval=5

df['cv'] = coefficientOfVariation(df['std'],df['mean'])
df['time'] = df['timepoint']*timeInterval

df.head()


Unnamed: 0,label,bb_dimZ,bb_dimY,bb_dimX,centroid_z,centroid_y,centroid_x,major_axis_length,solidity,mean,median,std,sum,variance,max,min,volume,id,timepoint,zproj_spots,zproj_nuclei,filename,date,experiment,strain,protein,raw_filepath,denoised_filepath,cv,time
0,1,7,94,86,2.595147,636.169992,693.925964,92.824887,0.872761,107.571959,107.0,6.484235,3391206,42.045298,136,84,31525,DPY27_tl_20241010_1268_E_early_3h_5min_5um,1,[[0 0 0 ... 0 0 0]\n [0 0 0 ... 0 0 0]\n [0 0 ...,[[0 0 0 ... 0 0 0]\n [0 0 0 ... 0 0 0]\n [0 0 ...,20241010_1268_E_early_3h_5min_5um,20241010,tl,1268,DPY27,/mnt/external.data/MeisterLab/Dario/Imaging/DP...,/mnt/external.data/MeisterLab/Dario/Imaging/DP...,0.060278,5
1,1,12,109,105,12.47402,496.429384,694.701003,101.528729,0.830031,107.529925,107.0,6.489472,6841699,42.113248,133,85,63626,DPY27_tl_20241010_1268_E_early_3h_5min_5um,2,[[0 0 0 ... 0 0 0]\n [0 0 0 ... 0 0 0]\n [0 0 ...,[[0 0 0 ... 0 0 0]\n [0 0 0 ... 0 0 0]\n [0 0 ...,20241010_1268_E_early_3h_5min_5um,20241010,tl,1268,DPY27,/mnt/external.data/MeisterLab/Dario/Imaging/DP...,/mnt/external.data/MeisterLab/Dario/Imaging/DP...,0.06035,10
2,2,12,79,86,11.454992,719.387144,850.006206,90.298296,0.924498,106.020521,106.0,6.155991,5347251,37.89622,131,83,50436,DPY27_tl_20241010_1268_E_early_3h_5min_5um,2,[[0 0 0 ... 0 0 0]\n [0 0 0 ... 0 0 0]\n [0 0 ...,[[0 0 0 ... 0 0 0]\n [0 0 0 ... 0 0 0]\n [0 0 ...,20241010_1268_E_early_3h_5min_5um,20241010,tl,1268,DPY27,/mnt/external.data/MeisterLab/Dario/Imaging/DP...,/mnt/external.data/MeisterLab/Dario/Imaging/DP...,0.058064,10
3,3,4,92,78,15.633955,713.228358,532.298057,91.633967,0.873681,107.080584,107.0,6.356482,1878943,40.404859,130,84,17547,DPY27_tl_20241010_1268_E_early_3h_5min_5um,2,[[0 0 0 ... 0 0 0]\n [0 0 0 ... 0 0 0]\n [0 0 ...,[[0 0 0 ... 0 0 0]\n [0 0 0 ... 0 0 0]\n [0 0 ...,20241010_1268_E_early_3h_5min_5um,20241010,tl,1268,DPY27,/mnt/external.data/MeisterLab/Dario/Imaging/DP...,/mnt/external.data/MeisterLab/Dario/Imaging/DP...,0.059362,10
4,1,13,98,95,5.480531,732.856621,844.87412,103.079933,0.919313,107.605859,107.0,6.522628,8190958,42.544679,133,84,76120,DPY27_tl_20241010_1268_E_early_3h_5min_5um,3,[[0 0 0 ... 0 0 0]\n [0 0 0 ... 0 0 0]\n [0 0 ...,[[0 0 0 ... 0 0 0]\n [0 0 0 ... 0 0 0]\n [0 0 ...,20241010_1268_E_early_3h_5min_5um,20241010,tl,1268,DPY27,/mnt/external.data/MeisterLab/Dario/Imaging/DP...,/mnt/external.data/MeisterLab/Dario/Imaging/DP...,0.060616,15


# Plot by protein

In [5]:
proteins = df.protein.unique()

for p in proteins:
    df_protein = df[df['protein'] == p]
    plt.figure(figsize = (12,5),dpi = 200)
    sns.violinplot(data=df_protein, y="cv", x="time",color="skyblue", inner=None)
    plt.xticks(rotation=45)
    plt.xlabel("Time (min)")
    plt.ylabel("CV of mSG::"+p+" intensity")
    
    # add box for heatshock
    plt.hlines(y=df_protein.cv.max(), xmin=1, xmax=7, color='red', linewidth=2)
    plt.text(x=3, y=df_protein.cv.max()*1.01, s='37$^\circ$C', color='red', ha='center')
    plt.axvspan(1, 7, color='pink', alpha=0.3)

    # add horizontal line for t=0
    t0_mean = df_protein[df_protein['time'] == 0]['cv'].median()
    plt.axhline(y=t0_mean, color='grey', linestyle='--', linewidth=0.5)
    
    # add median points
    medians_df = df_protein.groupby(['timepoint','time'])['cv'].median().reset_index(name='Median')
    plt.scatter(x=medians_df.timepoint, y=medians_df.Median, color='black', s=10)

    plt.title(p+'::mSG signal upon heatshock') 
    plt.savefig(output_path+"/plots/"+p+"_cv.png")
    plt.close()


In [None]:
proteins = df.protein.unique()

for p in proteins:
    df_protein = df[df['protein'] == p]
    plt.figure(figsize = (12,5),dpi = 200)
    sns.violinplot(data=df_protein, y="sum", x="time",color="light grey blue", inner=None)
    plt.xticks(rotation=45)
    plt.xlabel("Time (min)")
    plt.ylabel("Sum of mSG::"+p+" intensity")

    # add box for heatshock
    plt.hlines(y=df_protein['sum'].max(), xmin=1, xmax=7, color='red', linewidth=2)
    plt.text(x=3, y=df_protein['sum'].max()*1.01, s='37$^\circ$C', color='red', ha='center')
    plt.axvspan(1, 7, color='pink', alpha=0.3)

    # add horizontal line for t=0
    t0_mean = df_protein[df_protein['time'] == 0]['sum'].median()
    plt.axhline(y=t0_mean, color='grey', linestyle='--', linewidth=0.5)

    # add median points
    medians_df = df_protein.groupby(['timepoint','time'])['sum'].median().reset_index(name='Median')
    plt.scatter(x=medians_df.timepoint, y=medians_df.Median, color='black', s=10)

    plt.title(p+'::mSG signal upon heatshock') 
    plt.savefig(output_path+"/plots/"+p+"_sum.png")
    plt.close()
