This notebook extracts cluster data from a CellProfiler .csv measurements of cluster formation movies, where cells are tracked frame-to-frame over the timecourse of stress, and constructs cell-by-cell timecourse plots

In [None]:
# load modules

# uncomment for debugging
%load_ext autoreload
%autoreload 2
%matplotlib inline
from IPython.core.debugger import set_trace

import os, sys, inspect
import matplotlib
import matplotlib.pylab as plt
import numpy as np
import pandas as pd
from scipy import stats
import itertools
import pprint
import re
import time
import seaborn as sns
import warnings
from IPython.core.debugger import set_trace


# Disable future warnings for seaborn
warnings.simplefilter(action='ignore', category=FutureWarning)


# Add source code directory (src) to path to enable module import
curr_frame = inspect.getfile(inspect.currentframe())
curr_dir = os.path.dirname(os.path.abspath(curr_frame))
parent_dir = os.path.dirname(curr_dir)
module_dir = os.path.join(parent_dir, 'src')
os.sys.path.insert(0, module_dir)

import cellprofiler_tools as cpt


In [None]:
# Set up plot export and plotting styles

# Plotting and figure saving params
save_figs = True
save_dir = '../reports/figures/CellProfiler_FociQuant05_LiveCells'
    
# create save figure dir and set up figure/font sizes
if save_figs:
    %matplotlib
    matplotlib.rcParams['figure.figsize'] = 2.75, 1.5
    save_dir_pdf = os.path.join(save_dir, 'pdf')
    if not os.path.exists(save_dir_pdf):
        os.makedirs(save_dir_pdf)
    
    # Set up fonts
    matplotlib.rc("font", family="Arial")

    matplotlib.rcParams['pdf.fonttype'] = 42 # Make fonts editable
    matplotlib.rcParams['axes.linewidth']= 0.5
    matplotlib.rcParams['lines.linewidth'] = 0.5

    SMALL_SIZE = 5
    MEDIUM_SIZE = 6
    BIGGER_SIZE = 7

    plt.rc('font', size=MEDIUM_SIZE)          # controls default text sizes
    plt.rc('axes', titlesize=MEDIUM_SIZE)     # fontsize of the axes title
    plt.rc('axes', labelsize=MEDIUM_SIZE)    # fontsize of the x and y labels
    plt.rc('xtick', labelsize=MEDIUM_SIZE)    # fontsize of the tick labels
    plt.rc('ytick', labelsize=MEDIUM_SIZE)    # fontsize of the tick labels
    plt.rc('legend', fontsize=MEDIUM_SIZE)    # legend fontsize
    plt.rc('figure', titlesize=BIGGER_SIZE)  # fontsize of the figure title
    
    
else:
    %matplotlib inline
    matplotlib.rcParams['figure.figsize'] = 8, 6



In [None]:
# Load data from CSV files
data_dir = '../data/processed/CellProfiler_FociQuant05_LiveCells_20movies/csv_outputs'

# CellProfiler outputs everything in pixels. Input size of pixel in microns:
pixel_size = 0.206 # um per pixel
# Indicate frame duration in the movie
time_step = 5 # In minutes

image_file_csv = 'FociQuant05_Image.csv'
er_masks_csv = 'FociQuant05_ER_masks_accepted.csv'
ire1_clust_csv = 'FociQuant05_Clusters_in_ER_masks_masked.csv'

nuclei_all_csv = 'FociQuant05_Nuclei_all.csv'
er_masks_all_csv = 'FociQuant05_ER_masks_all.csv'
nuclei_accepted_csv = 'FociQuant05_Nuclei_accepted.csv'

# Load the image file
image_full_file = os.path.join(data_dir, image_file_csv)
images = cpt.get_data_cp_csv(image_full_file)
#images = cpt.get_data_cp_csv(image_full_file, data_fields=['ImageNumber','FileName_DNA_DAPI'])

er_masks = cpt.get_data_cp_csv(os.path.join(data_dir, er_masks_csv))
ire1_clust = cpt.get_data_cp_csv(os.path.join(data_dir, ire1_clust_csv))

nuclei_all = cpt.get_data_cp_csv(os.path.join(data_dir, nuclei_all_csv))
er_masks_all = cpt.get_data_cp_csv(os.path.join(data_dir, er_masks_all_csv))
nuclei_accepted = cpt.get_data_cp_csv(os.path.join(data_dir, nuclei_accepted_csv))

print('Loaded')

***

Data loading is finished at this point. Analysis cells follow.

***

In [None]:
# Organize cells into single-cell trajectories and create the cleaned-up dataframe
# cell_filt, which excludes short trajectories duplicated trajectories (duplications
# can arise from CellProfiler incorrectly splitting nuclei during tracking).
# 'cells' and 'cells_filt' have a new column, 'Track_and_group', which holds
# a unique ID for that particular cell's trajectory.

min_traj_frames = 100 # minimum length, in frames, of a valid trajectory
max_final_clust = 0 # Maximum number of clusters that are allowed to be left
                    # at the end of the trajectory

# Create a dataframe for all cells that are included in the analysis
cells = er_masks.copy()
cells.index.name = 'Cell_ID'

cpt.add_image_prop_to_objects(cells, images, 'Metadata_Frame')
cpt.add_image_prop_to_objects(cells, images, 'Group_Number')

cells['tStress_hrs'] = cells['Metadata_Frame'] * time_step / 60

# Add track and group labels to cells
cpt.add_parent_prop(cells, nuclei_accepted, 'TrackObjects_Label_25', 
                    'Parent_Nuclei_accepted', 'Track_Label')
cells['Track_Label_str'] = 'Track_' + cells['Track_Label'].astype(str)
cells['Track_and_group'] = cells['Track_Label_str'] + '_Group_' + cells['Group_Number'].astype(str)

# Filter cells by min trajectory duration
final_age = 'TrackObjects_FinalAge_25'
num_clust = 'Children_Clusters_in_ER_masks_masked_Count'

cpt.add_parent_prop(cells, nuclei_accepted, final_age, 
                    'Parent_Nuclei_accepted', final_age)
final_frames = cells[cells[final_age].notnull()]
duration_filt = final_frames[final_age] >= min_traj_frames
declust_filt = final_frames[num_clust] <= max_final_clust
unique_filt = ~final_frames.duplicated(subset='Track_and_group', keep=False)
track_labels_filt = final_frames.loc[duration_filt & unique_filt & declust_filt, 'Track_and_group']
cells_filt = cells[cells['Track_and_group'].isin(track_labels_filt)].copy()

print('Done')

In [None]:
# Filter cells further to contain only trajectories that start and end with no clusters.
# Build a dataframe of trajectories containing start and end points.

num_clust = 'Children_Clusters_in_ER_masks_masked_Count'
min_clust = 5 # Minimum number of clusters per frame to count the cell as clustering

# filter out invalid trajectories and normalize trajectory times
cells_filt2 = cpt.norm_clust_time_by_track(cells_filt, num_clust, time_step, min_clust)

# Plot results
fig, ax = plt.subplots()
fig.tight_layout(pad=2)
result_name = 'Children_Clusters_in_ER_masks_masked_Count'

ax = sns.regplot(x='Time_Norm', y=result_name, data=cells_filt2, x_bins=50, 
                    ax=ax, fit_reg=False, scatter_kws={'s':2})

ax.set_xlim(left=-0.1, right=1.5)

if save_figs:
    fig_filename_pdf = os.path.join(save_dir_pdf, 'Norm_time_num_clust.pdf')
    plt.savefig(fig_filename_pdf)

plt.show()

In [None]:
# Plot number of clusters per cell and mean cluster intensities 
# over the normalized stress timecourse

cpt.add_child_prop_to_parents(cells_filt2, ire1_clust, 
                    'Intensity_IntegratedIntensity_IRE1_mNeonGreen',
                    'Parent_ER_masks_accepted', 'Intensity_Clust_Mean',
                    statistic='mean')

# Plot results
fig, ax = plt.subplots()
fig.tight_layout(pad=2)

palette = itertools.cycle(sns.color_palette())
ax = sns.regplot(x='Time_Norm', y=num_clust, data=cells_filt2, x_bins=60, 
                    ax=ax, fit_reg=False, color=next(palette), scatter_kws={'s':2})
ax2 = ax.twinx()
ax2 = sns.regplot(x='Time_Norm', y='Intensity_Clust_Mean', data=cells_filt2, x_bins=60, 
                    ax=ax2, fit_reg=False, color=next(palette), scatter_kws={'s':2})
ax.set_xlim(left=-0.1, right=1.2)

if save_figs:
    fig_filename_pdf = os.path.join(save_dir_pdf, 'Norm_time_num_clust_vs_mean_intensity.pdf')
    plt.savefig(fig_filename_pdf)

plt.show()


In [None]:
# Plot cell intensities over time

result_name_1 = 'Intensity_IntegratedIntensity_IRE1_mNeonGreen'
result_name_2 = 'Children_Clusters_in_ER_masks_masked_Count'


fig, ax = plt.subplots(1,2)
fig.tight_layout(pad=2)

ax[0] = sns.lineplot(x='tStress_hrs', y=result_name_1, data=cells, ax=ax[0])
ax[0].set_ylim(bottom=0)
ax[0].set_title('All cells')

ax[1] = sns.lineplot(x='tStress_hrs', y=result_name_1, data=cells_filt2, ax=ax[1])
ax[1].set_ylim(bottom=0)
ax[1].set_title('Filtered cells')

if save_figs:
    fig_filename_pdf = os.path.join(save_dir_pdf, 'Cell_Intensity_over_time.pdf')
    plt.savefig(fig_filename_pdf)

plt.show()