Figure(s) in the manuscript created by this notebook: Fig.3A, 2B, 2C, 2D.

This notebook extracts cluster data from CellProfiler measurements of cluster formation in fixed cells and plots various cluster and cell parameters over a timecourse of stress.

In [None]:
# User-defined parameters for analysis:

# Plotting and figure saving params
save_figs = False
save_dir = '../reports/figures/Fig3A-D_Fixed_cell_clusters'
plot_settings = '../src/plotting_settings.py'

# Source data metadata
# CellProfiler outputs everything in pixels. Input size of pixel in microns
pixel_size = 0.206 # um per pixel

# Source data location
data_dir = '../data/processed/Fig3A-D_Fixed_cell_clusters/csv_outputs'

image_file_csv = 'FociQuant06_Image.csv'
er_masks_csv = 'FociQuant06_ER_masks_accepted.csv'
ire1_clust_csv = 'FociQuant06_Clusters_in_ER_masks_masked.csv'

nuclei_all_csv = 'FociQuant06_Nuclei_all.csv'
er_masks_all_csv = 'FociQuant06_ER_masks_all.csv'
nuclei_accepted_csv = 'FociQuant06_Nuclei_accepted.csv'


In [None]:
# load modules

# uncomment block below for debugging
"""
%load_ext autoreload
%autoreload 2
%matplotlib inline
from IPython.core.debugger import set_trace
"""

import os, sys, inspect
import matplotlib
import matplotlib.pylab as plt
import numpy as np
import pandas as pd
from scipy import stats
import pprint
import re
import time
import seaborn as sns
import warnings

# Disable future warnings for seaborn
warnings.simplefilter(action='ignore', category=FutureWarning)

# Add source code directory (src) to path to enable module import
module_dir = '../src'
os.sys.path.insert(0, module_dir)

import cellprofiler_tools as cpt

In [None]:
# Set up figure save dirs and load plotting style
if save_figs:
    %matplotlib
    %run $plot_settings save
    
    # Make directory for saving figures
    save_dir_pdf = os.path.join(save_dir, 'pdf')
    if not os.path.exists(save_dir_pdf):
        os.makedirs(save_dir_pdf)
else:
    %matplotlib inline
    %run $plot_settings plot_only

In [None]:
# Load data from CSV files

image_full_file = os.path.join(data_dir, image_file_csv)
images = cpt.get_data_cp_csv(image_full_file)

er_masks = cpt.get_data_cp_csv(os.path.join(data_dir, er_masks_csv))
ire1_clust = cpt.get_data_cp_csv(os.path.join(data_dir, ire1_clust_csv))

nuclei_all = cpt.get_data_cp_csv(os.path.join(data_dir, nuclei_all_csv))
er_masks_all = cpt.get_data_cp_csv(os.path.join(data_dir, er_masks_all_csv))
nuclei_accepted = cpt.get_data_cp_csv(os.path.join(data_dir, nuclei_accepted_csv))

print('Loaded')

In [None]:
# Create a dataframe for all cells
cells = er_masks.copy()
cells.index.name = 'Cell_ID'

result_name = 'Intensity_IntegratedIntensity_Corr_mNeonGreen'

condition = 'Metadata_hours_Tm'
condition2 = 'Metadata_dox'
cpt.add_image_prop_to_objects (cells, images, condition)
cpt.add_image_prop_to_objects (cells, images, condition2)

# Assign -1 hrs Tm to the no-dox condition (for convenience of representation)
cells.loc[cells[condition2] == 0, condition] = -1

fig, ax = plt.subplots()
fig.tight_layout(pad=2)

ax = sns.boxplot(x=condition, y=result_name, data=cells, color='steelblue', 
                 showfliers=False, ax=ax)
ax.set_title(result_name)
ax.set_xlabel(condition)
ax.set_ylabel(result_name)
ax.set_ylim(bottom=0)

if save_figs:
    fig_filename_pdf = os.path.join(save_dir_pdf, 'Cell_Intensity_vs_timepoint.pdf')
    plt.savefig(fig_filename_pdf)

plt.show()

In [None]:
# Plot fraction of cells with clusters per condition
excluded_conditions = [-1]

condition = 'Metadata_hours_Tm'

#cells['Has_IRE1_clusters'] = cells['Children_IRE1_clusters_Count'].astype('bool')
cells['Has_IRE1_clusters'] = cells['Children_Clusters_in_ER_masks_masked_Count'].astype('bool')

cells_filt = cells.copy()
for cond in excluded_conditions:
    cells_filt = cells_filt.loc[cells_filt[condition] != cond, :]


frac_clust = cpt.bootstrap_cell_prop (cells_filt, 'Has_IRE1_clusters', condition)

fig, ax = plt.subplots()
fig.tight_layout(pad=2)

ax = sns.barplot(data=frac_clust, color='steelblue', ci="sd")
ax.set_title('Fraction of cells with clusters over time')
ax.set_xlabel('Hours of Tm treatment')
ax.set_ylabel('Fraction of cells with clusters')
ax.set_ylim(bottom=0)

if save_figs:
    fig_filename_pdf = os.path.join(save_dir_pdf, 'Fraction_cell_with_clusters.pdf')
    plt.savefig(fig_filename_pdf)

plt.show()
print(len(frac_clust))

In [None]:
# Plot cluster area distribution over conditions
excluded_conditions = []

condition = 'Metadata_hours_Tm'
result_name = 'AreaShape_Area'
#result_name2 = 'Intensity_IntegratedIntensity_IRE1_mNeonGreen'
#result_name = 'Intensity_IntegratedIntensity_IRE1_mNeonGreen'
cpt.add_image_prop_to_objects (ire1_clust, images, condition)

clust_filt = ire1_clust.copy()
for cond in excluded_conditions:
    clust_filt = clust_filt.loc[clust_filt[condition] != cond, :]

result_name_microns = 'Cluster_area_um2'
pixel_area = pixel_size**2
clust_filt[result_name_microns] = clust_filt[result_name] *pixel_area


fig, ax = plt.subplots()
fig.tight_layout(pad=2)
    
ax = sns.barplot(x=condition, y=result_name_microns, data=clust_filt, 
                 color='steelblue', ci=68)

if save_figs:
    fig_filename_pdf = os.path.join(save_dir_pdf, 'Cluster_areas_vs_timepoint.pdf')
    plt.savefig(fig_filename_pdf)

plt.show()
print(len(clust_filt))

In [None]:
# Relate clusters to nuclei and add nuclear geometry params to clusters

# Relate clusters to nuclei
prop = 'ObjectNumber'
rel_col = 'Parent_Nuclei_all'
n = 'Nuclei_Accepted_ObjID'
cpt.add_child_prop_to_parents (nuclei_all, nuclei_accepted, prop, rel_col, n)

cpt.add_parent_prop(er_masks_all, nuclei_all, n, 'Parent_Nuclei_all', n)
cpt.add_parent_prop(er_masks, er_masks_all, n, 'Parent_ER_masks_all', n)
cpt.add_parent_prop(ire1_clust, er_masks, n, 'Parent_ER_masks_accepted', n)

#Add nucleus coordinates and radii to clusters
props = ['AreaShape_Center_X',
         'AreaShape_Center_Y',
         'AreaShape_MeanRadius']
props_mod = []
for prop in props:
    result_name = prop + '_Nucleus'
    cpt.add_parent_prop(ire1_clust, nuclei_accepted, prop, n, result_name)
    props_mod.append(result_name)

In [None]:
#Calculate and plot cluster to nucleus distances
excluded_conditions = [0, 32]

condition = 'Metadata_hours_Tm'
result_1 = 'Dist_to_Nucleus_Edge'
result_2 = 'AreaShape_Area'

n_x = ire1_clust['AreaShape_Center_X_Nucleus']
n_y = ire1_clust['AreaShape_Center_Y_Nucleus']
c_x = ire1_clust['AreaShape_Center_X']
c_y = ire1_clust['AreaShape_Center_Y']

d = np.sqrt(np.square(n_x-c_x)+np.square(n_y-c_y))
ire1_clust['Dist_to_Nucleus_Center'] = d
ire1_clust['Dist_to_Nucleus_Edge'] = d - ire1_clust['AreaShape_MeanRadius_Nucleus']

clust_filt = ire1_clust.copy()
for cond in excluded_conditions:
    clust_filt = clust_filt.loc[clust_filt[condition] != cond, :]

fig, ax = plt.subplots()
fig.tight_layout(pad=2)

#ax = sns.swarmplot(x=condition, y=result_1, data=clust_filt, color=".25", size=1)
ax = sns.boxplot(x=condition, y=result_1, data=clust_filt, showfliers=False)

if save_figs:
    fig_filename_pdf = os.path.join(save_dir_pdf, 'Cluster_to_nucleus_distance.pdf')
    plt.savefig(fig_filename_pdf)

cond1 = 1
cond2 = 2
data1 = ire1_clust[ire1_clust[condition] == cond1]['Dist_to_Nucleus_Edge']
data2 = ire1_clust[ire1_clust[condition] == cond2]['Dist_to_Nucleus_Edge']

print(stats.ttest_ind(data1,data2, equal_var = False))

In [None]:
# Plot mean cluster intensity by cell
excluded_conditions = [-1,0,24]


#prop = 'AreaShape_Compactness'
#prop = 'AreaShape_Area'
prop = 'Intensity_IntegratedIntensity_Corr_mNeonGreen'
stat='mean'

result_name = 'IRE1_clust_'+prop+'_'+stat
rel_col = 'Parent_ER_masks_accepted'
condition = 'Metadata_hours_Tm'

cells_filt = cells.copy()
for cond in excluded_conditions:
    cells_filt = cells_filt.loc[cells_filt[condition] != cond, :]


cpt.add_child_prop_to_parents (cells_filt, ire1_clust, prop, rel_col, 
                             result_name, statistic=stat)
cells_valid = cells_filt.dropna(subset=[result_name])

fig, ax = plt.subplots()
fig.tight_layout(pad=2)

ax = sns.barplot(x=condition, y=result_name, data=cells_valid, color='steelblue', ci=68)
ax.set_title(result_name)
ax.set_xlabel(condition)
ax.set_ylabel(result_name)
ax.set_ylim(bottom=0)
plt.show()


if save_figs:
    fig_filename_pdf = os.path.join(save_dir_pdf, 'Sum_cluster_intensity_per_cell.pdf')
    plt.savefig(fig_filename_pdf)


In [None]:
# Plot fraction of IRE1 in clusters per cell
excluded_conditions = [-1]

prop_parent = 'Intensity_IntegratedIntensity_Corr_mNeonGreen'
prop_child = 'Intensity_IntegratedIntensity_Corr_mNeonGreen'
stat='sum'

child_result = 'IRE1_clust_'+prop_child+'_'+stat
rel_col = 'Parent_ER_masks_accepted'
group_name = 'Metadata_hours_Tm'
group_str = group_name + '_str'
fraction_clust = 'Fraction_IRE1_in_clusters'

cells[group_str] = cells[group_name].astype(str) + ' h Tm'

cpt.add_child_prop_to_parents (cells, ire1_clust, prop_child, rel_col, 
                             child_result, statistic=stat)

cells[fraction_clust] = cells[child_result] / cells[prop_parent]
cells[fraction_clust].fillna(0, inplace=True)

cells_filt = cells.copy()
for cond in excluded_conditions:
    cells_filt = cells_filt.loc[cells_filt[condition] != cond, :]


fig, ax = plt.subplots()
fig.tight_layout(pad=2)

ax = sns.barplot(x=group_name, y=fraction_clust, 
                 data=cells_filt, color='steelblue', ci=68)
#ax = sns.swarmplot(x=group_name, y=fraction_clust, data=cells, color=".25")

ax.set_title(fraction_clust)
ax.set_xlabel(group_name)
ax.set_ylabel(fraction_clust)
plt.show()

if save_figs:
    fig_filename_pdf = os.path.join(save_dir_pdf, 'Fraction_IRE1_in_clusters.pdf')
    plt.savefig(fig_filename_pdf)

In [None]:
# Plot number of cluster per cell for cells that have clusters
excluded_conditions = [-1]

result_name = 'Children_Clusters_in_ER_masks_masked_Count'
#result_name = 'Intensity_IntegratedIntensity_IRE1_mNeonGreen'
condition = 'Metadata_hours_Tm'

cells_filt = cells.copy()
for cond in excluded_conditions:
    cells_filt = cells_filt.loc[cells_filt[condition] != cond, :]


cells_valid = cells_filt.dropna(subset=[result_name])
cells_valid = cells_valid.loc[cells_valid[result_name] > 0]

fig, ax = plt.subplots()
fig.tight_layout(pad=2)

ax = sns.barplot(x=condition, y=result_name, data=cells_valid, color='steelblue', ci=68)
ax.set_title(result_name)
ax.set_xlabel(condition)
ax.set_ylabel(result_name)
ax.set_ylim(bottom=0)
plt.show()

if save_figs:
    fig_filename_pdf = os.path.join(save_dir_pdf, 
                                    'Number_IRE1_clusters_per_cell_with_clusters.pdf')
    plt.savefig(fig_filename_pdf)


In [None]:
# Plot scatterplot of parent vs. child properties for each cell
excluded_groups = ['32 h Tm']

prop_parent = 'Intensity_IntegratedIntensity_Corr_mNeonGreen'
prop_child = 'Intensity_IntegratedIntensity_Corr_mNeonGreen'
#prop_child = 'AreaShape_Area'
stat='sum'

child_result = 'IRE1_clust_'+prop_child+'_'+stat
rel_col = 'Parent_ER_masks_accepted'
group_name = 'Metadata_hours_Tm'

group_str = group_name + '_str'
cells[group_str] = cells[group_name].astype(str) + ' h Tm'

cpt.add_child_prop_to_parents (cells, ire1_clust, prop_child, rel_col, 
                             child_result, statistic=stat)
cells_valid = cells.dropna(subset=[child_result])

for group in cells_valid[group_str].unique():
    if group in excluded_groups:
        continue
    cells_group = cells_valid.loc[cells_valid[group_str] == group]
    ax=sns.regplot(x=prop_parent, y=child_result, data=cells_group, ci=68,
                   x_estimator=np.mean, scatter=True, label=group)
ax.legend(loc="best")

ax.set_xlim(left=0)
plt.show()

In [None]:
# Get number of cells for each data point

times = 'Metadata_hours_Tm'
for time in cells[times].unique():
    cells_in_time = cells.loc[cells[times] == time]
    print('Cells in time ', time, ': ', len(cells_in_time))
