# plot intensity variance vs time for 5D images
This notebook is designed for looking at a single batch of data and do various diagnostic plots to decide how to filter the data.


In [26]:
import napari
import torch
from skimage.measure import regionprops_table, regionprops
import scipy as sp
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import glob
import os
from matplotlib_scalebar.scalebar import ScaleBar
import gc
import seaborn as sns
from sklearn.mixture import GaussianMixture
from bioio import BioImage
import bioio_nd2
import bioio_tifffile
from bioio.writers import OmeTiffWriter


anisotropy = (3.2,1,1) # Relative scale of (Z,X,Y) axes

nucChannel = 0 # red emerin rings
spotChannel = 1 # green spots

# in lucien's original scripts:
# channel 0 is green spots
# channel 1 is red emerin

pd.set_option('display.max_columns', None)

In [27]:
# on server
base_path = '/mnt/external.data/MeisterLab/jsemple/lhinder/segmentation_Dario/'
df_paths = pd.DataFrame.from_dict({'data_path': [base_path+'/DPY27/1268/20241010_tl/']})

output_path=base_path+'/timelapse/'

if not os.path.exists(output_path+"/plots"):
    os.makedirs(output_path+"/plots")

df=pd.DataFrame()
dist=pd.DataFrame()
for path in df_paths['data_path']:
    df_tmp=pd.read_csv(path+"nuclei_analysis_v001.csv")
    df = pd.concat([df,df_tmp])
    dist_tmp=pd.read_pickle(path+"dist_analysis_v001.pkl")
    dist = pd.concat([dist,dist_tmp])

df.head()
dist.head()

Unnamed: 0,label,bb_dimZ,bb_dimY,bb_dimX,centroid_z,centroid_y,centroid_x,major_axis_length,solidity,mean,median,std,sum,variance,max,min,volume,id,timepoint,intensity_dist_nuclei,intensity_dist_spots,intensity_dist,zproj_spots,zproj_nuclei
0,1,15,62,62,7.305231,466.06973,753.641967,65.735915,0.95324,107.3838,107.0,6.99599,4154894,48.943879,142,84,38692,SDC1_hs_20241107_1273_E_30minHS_3h_5min_5um,0,"[108.00420168067227, 111.53744493392071, 113.3...","[106.8655462184874, 106.48017621145374, 105.17...","[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14...","[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,..."
0,2,7,56,46,2.810326,484.216725,247.815991,58.494876,0.957811,108.796521,108.0,8.401155,1363438,70.579407,159,86,12532,SDC1_hs_20241107_1273_E_30minHS_3h_5min_5um,0,"[105.9738219895288, 107.92391304347827, 108.99...","[106.79057591623037, 106.33695652173913, 107.4...","[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14...","[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,..."
0,3,8,66,65,3.140306,495.362486,409.269104,70.796049,0.953176,106.554075,106.0,6.43893,2518299,41.45982,139,85,23634,SDC1_hs_20241107_1273_E_30minHS_3h_5min_5um,0,"[106.7520325203252, 108.14285714285714, 109.27...","[106.34146341463415, 105.73949579831933, 106.3...","[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14...","[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,..."
0,4,2,43,44,0.452809,491.301138,336.767169,50.233617,0.946472,109.892398,109.0,8.778071,299237,77.054525,157,87,2723,SDC1_hs_20241107_1273_E_30minHS_3h_5min_5um,0,"[107.24025974025975, 107.13793103448276, 104.9...","[109.57792207792208, 109.98620689655172, 110.0...","[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14...","[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 107, 113...","[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 109, 108..."
0,5,9,62,63,3.554125,500.626306,706.707218,66.476736,0.955589,109.380087,108.0,8.733336,2480631,76.27116,176,86,22679,SDC1_hs_20241107_1273_E_30minHS_3h_5min_5um,0,"[106.71615720524018, 108.47111111111111, 109.1...","[107.09170305676857, 105.68888888888888, 106.1...","[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14...","[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,..."


## Add some variables
Radius might scale more linearly than volume
Coefficient of variation is independent of the mean signal

In [28]:
def radiusFromVolume(volume):
    return (3*volume/(4*np.pi))**(1/3)

def coefficientOfVariation(std,mean):
    return std/mean

timeInterval=5

df['cv'] = coefficientOfVariation(df['std'],df['mean'])
df['time'] = df['timepoint']*timeInterval

df.head()


Unnamed: 0,label,bb_dimZ,bb_dimY,bb_dimX,centroid_z,centroid_y,centroid_x,major_axis_length,solidity,mean,median,std,sum,variance,max,min,volume,id,timepoint,zproj_spots,zproj_nuclei,filename,date,experiment,strain,protein,raw_filepath,denoised_filepath,cv,time
0,1,15,62,62,7.305231,466.06973,753.641967,65.735915,0.95324,107.3838,107.0,6.99599,4154894,48.943879,142,84,38692,SDC1_hs_20241107_1273_E_30minHS_3h_5min_5um,0,[[0 0 0 ... 0 0 0]\n [0 0 0 ... 0 0 0]\n [0 0 ...,[[0 0 0 ... 0 0 0]\n [0 0 0 ... 0 0 0]\n [0 0 ...,20241107_1273_E_30minHS_3h_5min_5um,20241108,hs,1273,SDC1,/mnt/external.data/MeisterLab/Dario/Imaging/SD...,/mnt/external.data/MeisterLab/Dario/Imaging/SD...,0.065149,0
1,2,7,56,46,2.810326,484.216725,247.815991,58.494876,0.957811,108.796521,108.0,8.401155,1363438,70.579407,159,86,12532,SDC1_hs_20241107_1273_E_30minHS_3h_5min_5um,0,[[0 0 0 ... 0 0 0]\n [0 0 0 ... 0 0 0]\n [0 0 ...,[[0 0 0 ... 0 0 0]\n [0 0 0 ... 0 0 0]\n [0 0 ...,20241107_1273_E_30minHS_3h_5min_5um,20241108,hs,1273,SDC1,/mnt/external.data/MeisterLab/Dario/Imaging/SD...,/mnt/external.data/MeisterLab/Dario/Imaging/SD...,0.077219,0
2,3,8,66,65,3.140306,495.362486,409.269104,70.796049,0.953176,106.554075,106.0,6.43893,2518299,41.45982,139,85,23634,SDC1_hs_20241107_1273_E_30minHS_3h_5min_5um,0,[[0 0 0 ... 0 0 0]\n [0 0 0 ... 0 0 0]\n [0 0 ...,[[0 0 0 ... 0 0 0]\n [0 0 0 ... 0 0 0]\n [0 0 ...,20241107_1273_E_30minHS_3h_5min_5um,20241108,hs,1273,SDC1,/mnt/external.data/MeisterLab/Dario/Imaging/SD...,/mnt/external.data/MeisterLab/Dario/Imaging/SD...,0.060429,0
3,4,2,43,44,0.452809,491.301138,336.767169,50.233617,0.946472,109.892398,109.0,8.778071,299237,77.054525,157,87,2723,SDC1_hs_20241107_1273_E_30minHS_3h_5min_5um,0,[[0 0 0 ... 0 0 0]\n [0 0 0 ... 0 0 0]\n [0 0 ...,[[0 0 0 ... 0 0 0]\n [0 0 0 ... 0 0 0]\n [0 0 ...,20241107_1273_E_30minHS_3h_5min_5um,20241108,hs,1273,SDC1,/mnt/external.data/MeisterLab/Dario/Imaging/SD...,/mnt/external.data/MeisterLab/Dario/Imaging/SD...,0.079879,0
4,5,9,62,63,3.554125,500.626306,706.707218,66.476736,0.955589,109.380087,108.0,8.733336,2480631,76.27116,176,86,22679,SDC1_hs_20241107_1273_E_30minHS_3h_5min_5um,0,[[0 0 0 ... 0 0 0]\n [0 0 0 ... 0 0 0]\n [0 0 ...,[[0 0 0 ... 0 0 0]\n [0 0 0 ... 0 0 0]\n [0 0 ...,20241107_1273_E_30minHS_3h_5min_5um,20241108,hs,1273,SDC1,/mnt/external.data/MeisterLab/Dario/Imaging/SD...,/mnt/external.data/MeisterLab/Dario/Imaging/SD...,0.079844,0


# Plot by protein

In [29]:
proteins = df.protein.unique()

for p in proteins:
    df_protein = df[df['protein'] == p]
    plt.figure(figsize = (12,5),dpi = 200)
    sns.violinplot(data=df_protein, y="cv", x="time",color="skyblue", inner=None)
    plt.xticks(rotation=45)
    plt.xlabel("Time (min)")
    plt.ylabel("CV of mSG::"+p+" intensity")
    plt.hlines(y=df_protein.cv.max(), xmin=1, xmax=7, color='red', linewidth=2)
    plt.text(x=3, y=df_protein.cv.max()*1.01, s='37$^\circ$C', color='red', ha='center')

    plt.axvspan(1, 7, color='pink', alpha=0.3)

    t0_mean = df_protein[df_protein['time'] == 0]['cv'].median()
    plt.axhline(y=t0_mean, color='grey', linestyle='--', linewidth=0.5)
    medians_df = df_protein.groupby(['timepoint','time'])['cv'].median().reset_index(name='Median')
    plt.scatter(x=medians_df.timepoint, y=medians_df.Median, color='black', s=10)

    plt.title(p+'::mSG signal upon heatshock') 
    plt.savefig(output_path+"/plots/"+p+"_cv.png")
    plt.close()


In [None]:
proteins = df.protein.unique()

for p in proteins:
    df_protein = df[df['protein'] == p]
    plt.figure(figsize = (12,5),dpi = 200)
    sns.violinplot(data=df_protein, y="sum", x="time",color="light grey blue", inner=None)
    plt.xticks(rotation=45)
    plt.xlabel("Time (min)")
    plt.ylabel("Sum of mSG::"+p+" intensity")

    # add box for heatshock
    plt.hlines(y=df_protein['sum'].max(), xmin=1, xmax=7, color='red', linewidth=2)
    plt.text(x=3, y=df_protein['sum'].max()*1.01, s='37$^\circ$C', color='red', ha='center')
    plt.axvspan(1, 7, color='pink', alpha=0.3)

    # add horizontal line for t=0
    t0_mean = df_protein[df_protein['time'] == 0]['sum'].median()
    plt.axhline(y=t0_mean, color='grey', linestyle='--', linewidth=0.5)

    # add median points
    medians_df = df_protein.groupby(['timepoint','time'])['sum'].median().reset_index(name='Median')
    plt.scatter(x=medians_df.timepoint, y=medians_df.Median, color='black', s=10)

    plt.title(p+'::mSG signal upon heatshock') 
    plt.savefig(output_path+"/plots/"+p+"_sum.png")
    plt.close()
