This is a testing notebook for processing IRE1 cluster data based on CSV output files from Cell Profiler.

In [None]:
# load modules

# uncomment for debugging
%load_ext autoreload
%autoreload 2
%matplotlib inline

import os, sys, inspect
import matplotlib.pylab as plt
import numpy as np
import pandas as pd
from scipy import stats
import pprint
import re
import time
import seaborn as sns
import warnings


# Disable future warnings for seaborn
warnings.simplefilter(action='ignore', category=FutureWarning)


# Add source code directory (src) to path to enable module import
curr_frame = inspect.getfile(inspect.currentframe())
curr_dir = os.path.dirname(os.path.abspath(curr_frame))
parent_dir = os.path.dirname(curr_dir)
module_dir = os.path.join(parent_dir, 'src')
os.sys.path.insert(0, module_dir)

import cellprofiler_tools as cpt


In [None]:
# Load data from CSV files
data_dir = '../data/processed/CellProfiler_FociQuant04/csv_outputs'

image_file_csv = 'FociQuant04_Image.csv'
er_masks_csv = 'FociQuant04_ER_masks_accepted.csv'
ire1_clust_csv = 'FociQuant04_Clusters_in_ER_masks_masked.csv'

nuclei_all_csv = 'FociQuant04_Nuclei_all.csv'
er_masks_all_csv = 'FociQuant04_ER_masks_all.csv'
nuclei_accepted_csv = 'FociQuant04_Nuclei_accepted.csv'

# Load the image file
image_full_file = os.path.join(data_dir, image_file_csv)
images = cpt.get_data_cp_csv(image_full_file)
#images = cpt.get_data_cp_csv(image_full_file, data_fields=['ImageNumber','FileName_DNA_DAPI'])

er_masks = cpt.get_data_cp_csv(os.path.join(data_dir, er_masks_csv))
ire1_clust = cpt.get_data_cp_csv(os.path.join(data_dir, ire1_clust_csv))

nuclei_all = cpt.get_data_cp_csv(os.path.join(data_dir, nuclei_all_csv))
er_masks_all = cpt.get_data_cp_csv(os.path.join(data_dir, er_masks_all_csv))
nuclei_accepted = cpt.get_data_cp_csv(os.path.join(data_dir, nuclei_accepted_csv))

print('Loaded')

In [None]:
# Plot cell median intensities and remove high-intensity outliers

# Create a dataframe for all cells that are included in the analysis
cells = er_masks.copy()
cells.index.name = 'Cell_ID'

result_name = 'Intensity_IntegratedIntensity_IRE1_mNeonGreen'
#result_name = 'Intensity_MeanIntensity_IRE1_mNeonGreen'
#result_name = 'Intensity_StdIntensity_IRE1_mNeonGreen'

# Remove outliers
#max_median_intensity = 0.005
#cells = cells.loc[cells[result_name] < max_median_intensity]


condition = 'Metadata_hours_Tm'
cpt.add_image_prop_to_cells (cells, images, condition)

ax = sns.boxplot(x=condition, y=result_name, data=cells, color='steelblue', showfliers=False)
ax.set_title(result_name)
ax.set_xlabel(condition)
ax.set_ylabel(result_name)
ax.set_ylim(bottom=0)
plt.show()



In [None]:
# Plot fraction of cells with clusters per condition
condition = 'Metadata_hours_Tm'

#cells['Has_IRE1_clusters'] = cells['Children_IRE1_clusters_Count'].astype('bool')
cells['Has_IRE1_clusters'] = cells['Children_Clusters_in_ER_masks_masked_Count'].astype('bool')


frac_clust = cpt.bootstrap_cell_prop (cells, 'Has_IRE1_clusters', condition)

ax = sns.barplot(data=frac_clust, color='steelblue', ci="sd")
ax.set_title('Formation and dissolution of IRE1 clusters in stressed cells')
ax.set_xlabel('Hours of Tm treatment')
ax.set_ylabel('Fraction of cells with clusters')
ax.set_ylim(bottom=0)
plt.show()

In [None]:
# add condition column to clusters
condition = 'Metadata_hours_Tm'
result_name = 'AreaShape_Area'
#result_name2 = 'Intensity_IntegratedIntensity_IRE1_mNeonGreen'
#result_name = 'Intensity_IntegratedIntensity_IRE1_mNeonGreen'
cpt.add_image_prop_to_cells (ire1_clust, images, condition)

excluded_conditinos = [32]

clust_filt = ire1_clust.copy()
for cond in excluded_conditinos:
    clust_filt = clust_filt.loc[clust_filt[condition] != cond, :]

#ax = sns.regplot(x=result_name, y=result_name2, data=ire1_clust)
ax = sns.barplot(x=condition, y=result_name, data=clust_filt, color='steelblue', ci=68)
#ax = sns.swarmplot(x=condition, y=result_name, data=clust_filt, color=".25")
plt.show()

In [None]:
# Relate clusters to nuclei and add nuclear geometry params to clusters

# Relate clusters to nuclei
prop = 'ObjectNumber'
rel_col = 'Parent_Nuclei_all'
n = 'Nuclei_Accepted_ObjID'
cpt.add_child_prop_to_parents (nuclei_all, nuclei_accepted, prop, rel_col, n)

cpt.add_parent_prop(er_masks_all, nuclei_all, n, 'Parent_Nuclei_all', n)
cpt.add_parent_prop(er_masks, er_masks_all, n, 'Parent_ER_masks_all', n)
cpt.add_parent_prop(ire1_clust, er_masks, n, 'Parent_ER_masks_accepted', n)

#Add nucleus coordinates and radii to clusters
props = ['AreaShape_Center_X',
         'AreaShape_Center_Y',
         'AreaShape_MeanRadius']
props_mod = []
for prop in props:
    result_name = prop + '_Nucleus'
    cpt.add_parent_prop(ire1_clust, nuclei_accepted, prop, n, result_name)
    props_mod.append(result_name)

In [None]:
#Calculate and plot cluster to nucleus distances
condition = 'Metadata_hours_Tm'
result_1 = 'Dist_to_Nucleus_Edge'
result_2 = 'AreaShape_Area'

n_x = ire1_clust['AreaShape_Center_X_Nucleus']
n_y = ire1_clust['AreaShape_Center_Y_Nucleus']
c_x = ire1_clust['AreaShape_Center_X']
c_y = ire1_clust['AreaShape_Center_Y']

d = np.sqrt(np.square(n_x-c_x)+np.square(n_y-c_y))
ire1_clust['Dist_to_Nucleus_Center'] = d
ire1_clust['Dist_to_Nucleus_Edge'] = d - ire1_clust['AreaShape_MeanRadius_Nucleus']

excluded_conditinos = [0, 32]

clust_filt = ire1_clust.copy()
for cond in excluded_conditinos:
    clust_filt = clust_filt.loc[clust_filt[condition] != cond, :]

#ax = sns.regplot(x=result_2, y=result_1, data=clust_filt)
#ax = sns.barplot(x=condition, y=result_1, data=clust_filt, color='steelblue', ci=68)

ax = sns.swarmplot(x=condition, y=result_1, data=clust_filt, color=".25", size=3)
ax = sns.boxplot(x=condition, y=result_1, data=clust_filt, showfliers=False)

cond1 = 1
cond2 = 2
data1 = ire1_clust[ire1_clust[condition] == cond1]['Dist_to_Nucleus_Edge']
data2 = ire1_clust[ire1_clust[condition] == cond2]['Dist_to_Nucleus_Edge']

print(stats.ttest_ind(data1,data2, equal_var = False))

In [None]:
# Plot cluster properties per cell

#prop = 'AreaShape_Compactness'
#prop = 'AreaShape_Area'
prop = 'Intensity_IntegratedIntensity_IRE1_mNeonGreen'
stat='sum'

result_name = 'IRE1_clust_'+prop+'_'+stat
rel_col = 'Parent_ER_masks_accepted'
condition = 'Metadata_hours_Tm'


cpt.add_child_prop_to_parents (cells, ire1_clust, prop, rel_col, 
                             result_name, statistic=stat)
cells_valid = cells.dropna(subset=[result_name])

ax = sns.barplot(x=condition, y=result_name, data=cells_valid, color='steelblue', ci=68)
#ax = sns.swarmplot(x=condition, y=result_name, data=cells_valid, color=".25")
ax.set_title(result_name)
ax.set_xlabel(condition)
ax.set_ylabel(result_name)
ax.set_ylim(bottom=0)
plt.show()

In [None]:
# Plot scatterplot of parent vs. child properties for each cell

prop_parent = 'Intensity_IntegratedIntensity_IRE1_mNeonGreen'
prop_child = 'Intensity_IntegratedIntensity_IRE1_mNeonGreen'
#prop_child = 'AreaShape_Area'
stat='sum'

child_result = 'IRE1_clust_'+prop_child+'_'+stat
rel_col = 'Parent_ER_masks_accepted'
group_name = 'Metadata_hours_Tm'

group_str = group_name + '_str'
cells[group_str] = cells[group_name].astype(str) + ' h Tm'
excluded_groups = ['32 h Tm']

cpt.add_child_prop_to_parents (cells, ire1_clust, prop_child, rel_col, 
                             child_result, statistic=stat)
cells_valid = cells.dropna(subset=[child_result])

for group in cells_valid[group_str].unique():
    if group in excluded_groups:
        continue
    cells_group = cells_valid.loc[cells_valid[group_str] == group]
    ax=sns.regplot(x=prop_parent, y=child_result, data=cells_group, ci=68,
                   x_estimator=np.mean, scatter=True, label=group)
ax.legend(loc="best")

#ax = sns.scatterplot(x=prop_parent, y=child_result, data=cells_valid, hue=group_str)

#sns.kdeplot(cells_valid[prop_parent], cells_valid[child_result])
#sns.jointplot(x=prop_parent, y=child_result, data=cells_valid,
#             kind="reg")

#ax.set_title(child_result+' vs '+ prop_parent)
#ax.set_xlabel(prop_parent)
#ax.set_ylabel(prop_child)
ax.set_xlim(left=0)
plt.show()

In [None]:
# Plot fraction of IRE1 in clusters per cell

prop_parent = 'Intensity_IntegratedIntensity_IRE1_mNeonGreen'
prop_child = 'Intensity_IntegratedIntensity_IRE1_mNeonGreen'
stat='sum'

child_result = 'IRE1_clust_'+prop_child+'_'+stat
rel_col = 'Parent_ER_masks_accepted'
group_name = 'Metadata_hours_Tm'
group_str = group_name + '_str'
fraction_clust = 'Fraction_IRE1_in_clusters'

cells[group_str] = cells[group_name].astype(str) + ' h Tm'

cpt.add_child_prop_to_parents (cells, ire1_clust, prop_child, rel_col, 
                             child_result, statistic=stat)

cells[fraction_clust] = cells[child_result] / cells[prop_parent]
cells[fraction_clust].fillna(0, inplace=True)

ax = sns.barplot(x=group_name, y=fraction_clust, 
                 data=cells, color='steelblue', ci=68)
#ax = sns.swarmplot(x=group_name, y=fraction_clust, data=cells, color=".25")

ax.set_title(fraction_clust)
ax.set_xlabel(group_name)
ax.set_ylabel(fraction_clust)
plt.show()

In [None]:
# Plot number of cluster per cell for cells that have clusters

result_name = 'Children_Clusters_in_ER_masks_masked_Count'
#result_name = 'Intensity_IntegratedIntensity_IRE1_mNeonGreen'
condition = 'Metadata_hours_Tm'

cells_valid = cells.dropna(subset=[result_name])
cells_valid = cells_valid.loc[cells_valid[result_name] > 0]

ax = sns.barplot(x=condition, y=result_name, data=cells_valid, color='steelblue', ci=68)
#ax = sns.swarmplot(x=condition, y=result_name, data=cells_valid, color="0.25")
#ax = sns.violinplot(x=condition, y=result_name, data=cells_valid)
ax.set_title(result_name)
ax.set_xlabel(condition)
ax.set_ylabel(result_name)
ax.set_ylim(bottom=0)
plt.show()



In [None]:
"""
# Uncomment for a quick timer
start = time.time()
print("Start timer")

end = time.time()
print(end - start)
"""

string = 'C1-AVG_vVB_190416_03_Well03_02hTm_1_MMStack_Pos0.ome.tif'


z = re.match(r".*_(\d+)hTm_.*", string)

print(z.group(1))

