Data needed in input:
- Chords extracted from segmentation masks of epifluorescence timelapses (.npy archives, here generally called `cld_results.npy`) in `cld_results_dir`. To obtain these: 
    - First get segmentation masks from epifluorescence timelapses by using the FIJI macro "Fiji utils/RNA_epifluorescence_timelapse_segmentation.ijm"). 
    - Then, perform CLD extraction using the notebook at "CLD/CLD_from_Binary_Masks.ipynb"
- Particle Analysis results from binary segmentation (obtained from the first sub-step mentioned above) in `segmentation_results_dir`.

This analysis applies both to experiments in bulk and within synthetic cells, as well as to single sticky constructs or binary systems with all sticky or sticky/non-sticky components. This script gives an example of this analysis applied to bulk assembly of single sticky constructs (A, B, C) (segmentation performed with Li thresholding).

In [None]:
# Import dependencies
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import seaborn as sns

from tqdm.notebook import tqdm

# Mean CLD and condensate number

In [None]:
# Helper function to process chord lengths while keeping information for different sample repeats
# (i.e. microscopy FOVs) separate
def cld_to_chords_unj(cld_dict, binning = 1): 
    """
    params: 
    cld_dict: input dictionary containing chords_x and chords_y per sample repeat vs time
    binning: if images (and masks) have been binned, change binning factor, default is 1 
    (image analysed in original format and resolution, 2044px x 2048px)
    
    returns: chords_vs_time_unj: dictionary containing merged XY chord lengths vs time 
        for each repeat (i.e. microscopy field-of-view) of each sample
    """
    # Initialise dictionary
    chords_vs_time_unj = {}
    # Chord Lengths are in px -> need to convert to um to extract physical size information
    px_um_conv = 3.0852 # px/um for 20x lens used on Nikon Ti2 -- CHANGE TO MATCH YOUR SETUP
    # If we have binning, we need to adjust this px_um_conv factor
    pxum_conv_bin = px_um_conv/binning 
    # Looping through samples in experiment
    for ind, sample in enumerate(cld_dict.keys()): 
        print(sample)
        # Initialising inner sample list
        chords_vs_time_unj[sample] = {}
        # Looping through repeats - keeping repeats separate
        if len(list(cld_dict[sample].keys())) == 3: 
            for repeat in cld_dict[sample].keys(): 
                chords_vs_time_unj[sample][repeat] = []
                # Looping through timepoints
                for timepoint in tqdm(range(len(cld_dict[sample][1]['count_x']))):
                    chords_xy = (1/pxum_conv_bin)*np.concatenate(
                        (cld_dict[sample][repeat]['count_x'][timepoint],
                         cld_dict[sample][repeat]['count_y'][timepoint])
                    )
                    chords_vs_time_unj[sample][repeat].append(list(chords_xy))
        # ...unless they refer to the same FOV, but different channels -- i.e. RNA nanostar C
        elif len(list(cld_dict[sample].keys())) == 6: 
            print('More than repeats - will merge 1-4, 2-5, 3-6')
            for repeat in list(cld_dict[sample].keys())[:3]: 
                chords_vs_time_unj[sample][repeat] = []
                # Looping through timepoints
                for timepoint in tqdm(range(len(cld_dict[sample][1]['count_x']))):
                    chords_xy1 = (1/pxum_conv_bin)*np.concatenate(
                        (cld_dict[sample][repeat]['count_x'][timepoint], 
                         cld_dict[sample][repeat]['count_y'][timepoint])
                    )
                    chords_xy2 = (1/pxum_conv_bin)*np.concatenate(
                        (cld_dict[sample][repeat+3]['count_x'][timepoint], 
                         cld_dict[sample][repeat+3]['count_y'][timepoint])
                    )
                    chords_vs_time_unj[sample][repeat].append(list(chords_xy1)+list(chords_xy2))
    return chords_vs_time_unj

In [None]:
# Helper function to compute mean number of condensates in a given FOV 
# and the corresponding standard error of the mean
def get_mean_stderr_number(df_list): 
    """
    params:
    df_list: list of pandas dataframes from CSVs of Particle Analysis results performed in ImageJ/FIJI.
    
    returns: 
    mean, standard error of the mean of number of condensates per FOV
    """
    num_list, lens = [], []
    for df in df_list: 
        num_list.append(np.array(df.groupby('Slice').count()['Area']))
        lens.append(len(np.array(df.groupby('Slice').count()['Area'])))
    for i in range(len(num_list)): 
        if len(num_list[i]) < np.max(np.array(lens)): 
            # Pads with zeros in early timepoints in case of no condensates detected to ensure same length
            num_list[i] = np.concatenate((np.zeros(np.max(np.array(lens) - len(num_list[i]))), num_list[i]))
    num_list = np.array(num_list)
    return np.mean(num_list, axis = 0), np.std(num_list, ddof = 1, axis = 0)/(len(num_list)**0.5)

In [None]:
# CHANGE THESE TO ABSOLUTE PATHS TO CORRECT DIRECTORIES
cld_results_dir = "/ABSOLUTE/PATH/TO/CLD/RESULTS/"
segmentation_results_dir = "/ABSOLUTE/PATH/TO/SEGMENTATION/RESULTS/"

In [None]:
# Load results and process them
cld_single_bulk = np.load(cld_results_dir + 'cld_single_bulk.npy', allow_pickle = True).item()
chords_single_bulk_unj = cld_to_chords_unj(cld_single_bulk, binning = 1)

In [None]:
# Extract mean and standard deviation vs time for all sample repeats
mean_single_bulk_unj, std_single_bulk_unj = {}, {}
# Loop through samples in dictionary keys
for sample in chords_single_bulk_unj.keys(): 
    # Initialise blank timepoint-spanning lists within the output dictionaries
    mean_single_bulk_unj[sample], std_single_bulk_unj[sample] = {}, {}
    # Loop through repeats
    for repeat in tqdm(range(1, 1+len(chords_single_bulk_unj[sample].keys()))):
        #print(repeat)
        mean_single_bulk_unj[sample][repeat], std_single_bulk_unj[sample][repeat] = [], []
        # Loop through timepoints - one CLD per sample per timepoint
        for timepoint in tqdm(range(len(chords_single_bulk_unj[sample][repeat]))):
            mean_single_bulk_unj[sample][repeat].append(np.mean(chords_single_bulk_unj[sample][repeat][timepoint]))
            std_single_bulk_unj[sample][repeat].append(np.std(chords_single_bulk_unj[sample][repeat][timepoint], ddof = 1))

In [None]:
mean_single_bulk_comb, std_single_bulk_comb = {}, {}
# Loop through samples in dictionary keys
for sample in mean_single_bulk_unj.keys(): 
    # Initialise blank timepoint-spanning lists within the output dictionaries
    means_list = [np.array(mean_single_bulk_unj[sample][repeat]) for repeat in list(mean_single_bulk_unj[sample].keys())]
    mean_single_bulk_comb[sample] = np.mean(means_list, axis = 0)
    std_single_bulk_comb[sample] = np.std(means_list, axis = 0, ddof = 1)

In [None]:
# Plotting Number of Condensates vs Time

# Helper function to read corresponding CSVs
def read_segmentation_csvs(directory, sample_tag): 
    return [
        pd.read_csv(file) 
        for file in list(os.listdir(directory)) 
        if sample_tag in file
    ]

# A and B - not using C as it does not form discrete condensates
segm_a_unj = read_segmentation_csvs(segmentation_results_dir, '_A_')
segm_b_unj = read_segmentation_csvs(segmentation_results_dir, '_B_')

In [None]:
# Extract number of condensates (mean, standard error)
num_a = get_mean_stderr_number(segm_a_unj)
num_b = get_mean_stderr_number(segm_b_unj)

# and compose into dictionary
num_bulk = {
    'NS_A': num_a, 
    'NS_B': num_b, 
}

In [None]:
# Initialise x array = time (minutes)
time = np.concatenate((np.arange(0, 10*60 + 15, 15), np.arange(10*60+30, 10*60+15 + 38*60 + 30, 30))) # minutes

# Initialise colour palette and plotting labels
colours = {'NS_A' : 'orangered', 'NS_B' : 'cyan', 'NS_C' : 'gray'}
labels = {'NS_A' : 'A', 'NS_B' : 'B', 'NS_C' : 'C'}

plt.subplots(2, 1, figsize = (3, 5))
plt.subplots_adjust(hspace= .05)

# Mean CLD
plt.subplot(211)
for ind, sample in enumerate(mean_single_bulk_comb.keys()): 
    # Plot solid line for mean profile
    plt.plot(
        time/60, 
        np.array(mean_single_bulk_comb[sample]), 
        lw = 2.0, 
        label = labels[sample], 
        color = colours[sample]
    )
    # Plot shaded region for standard error proxy = standard deviation at that particular timepoint
    plt.fill_between(
        time/60, 
        np.array(mean_single_bulk_comb[sample]) - np.array(std_single_bulk_comb[sample]), 
        np.array(mean_single_bulk_comb[sample]) + np.array(std_single_bulk_comb[sample]), 
        color = colours[sample], 
        alpha = 0.2
    )
    # Plot embellishments
    ax = plt.gca()
    ax.tick_params(direction = 'in', length = 6)
    plt.ylabel(r'$\rm\mu_{CLD}$ [$\rm\mu$m]', fontsize = 20)
    plt.yticks([0, 25, 50, 75], [0, 25, 50, 75], fontsize = 20)
    plt.ylim([-5, 85])
    plt.xticks([0, 12, 24, 36, 48], [], fontsize = 20)
plt.legend(frameon = False, fontsize = 20)

# Number of condensates per FOV
plt.subplot(212)
for ind, sample in enumerate(num_bulk.keys()): 
    if sample != 'NS_C':
        # Plot mean number of condensates
        plt.plot(
            time/60, 
            num_bulk[sample][0], 
            lw = 2.0, 
            label = labels[sample], 
            color = colours[sample]
        )
        # Plot shaded regions = standard error of the mean
        plt.fill_between(
            time/60, 
            num_bulk[sample][0] - num_bulk[sample][1], 
            num_bulk[sample][0] + num_bulk[sample][1], 
            color = colours[sample], 
            alpha = 0.2
        )
        ax = plt.gca()
        ax.tick_params(direction = 'in', length = 6, which = 'major')
        ax.tick_params(direction = 'in', length = 3, which = 'minor')
        plt.ylabel('N', fontsize = 20)
        plt.yticks(
            [0, 500, 1000, 1500, 2000, 2500], 
            [0, 500, 1000, 1500, 2000, 2500], 
            fontsize = 20
        )
        plt.ylim([20, 5000])
        if ind == 2: 
            plt.xlabel('Time [h]', fontsize = 20)
            plt.xticks(
                [0, 12, 24, 36, 48], 
                [0, '', 24, '', 48], 
                fontsize = 20
            )
        else: 
            plt.xticks(
                [0, 12, 24, 36, 48], 
                []
            )
        plt.yscale('log')
        plt.xlabel('Time [h]', fontsize = 20)

plt.xticks([0, 12, 24, 36, 48], [0, '', 24, '', 48], fontsize = 20)
plt.show()

# Ridge Plots

In [None]:
# Helper function to process CLD results for Ridge Plots
def mean_std_stderr_cl(cld_dict, px_um_conv = 3.0852, binning = 1): 
    """
    params: 
    cld_dict: input dictionary containing chords_x and chords_y per sample repeat vs time
    
    px_um_conv: pixel to micron conversion factor for 20x lens used on Nikon Ti2 -- CHANGE TO MATCH YOUR SETUP
    
    binning: if images (and masks) have been binned, change binning factor, default is 1 
    (image analysed in original format and resolution, 2044px x 2048px)
    
    returns: 
    chords_vs_time: dictionary containing chord lengths merged across repeats 
    and XY chords vs time for each sample
    
    mean_std: dictionary containing mean, standard deviation and standard error of the mean CLD 
    """
    # Chord Lengths are in px -> need to convert to um to extract physical size information
    # If we have binning, we need to adjust this px_um_conv factor
    pxum_conv_bin = px_um_conv/binning 
    
    # Step 1: Extracting chord lengths per sample by merging chords_x and chords_y for all repeats (divided on a time basis) 
    # into single chords_vs_time dictionary. chords_vs_time dict has samples as keys,
    # and values are chords vs time (merged x, y and across all sample repeats = imaged FOVs)
    # Initialise unified dictionary
    chords_vs_time = {}
    # Looping through samples in experiment
    for sample, ind in zip(cld_dict.keys(), range(len(cld_dict.keys()))): 
        print(sample)
        # Initialising inner sample list
        chords_vs_time[sample] = []
        # Looping through timepoints
        for timepoint in tqdm(range(len(cld_dict[sample][1]['count_x']))):
            chords_xy_time = []
            # Looping through repeats (FOVs in same sample/capillary)
            for repeat in cld_dict[sample].keys(): 
                # Merge xy chords for the particular repeat
                chords_xy = (1/pxum_conv_bin)*np.concatenate((cld_dict[sample][repeat]['count_x'][timepoint], 
                                        cld_dict[sample][repeat]['count_y'][timepoint]))
                # Append to the growing list across repeats for this particular timepoint
                chords_xy_time = chords_xy_time + list(chords_xy)
            chords_vs_time[sample].append(chords_xy_time)
    
    
    # Initialise dictionary containing mean chord_length, stddev and stderr on chord_length
    mean_std = {'mean' : {}, 'std' : {}, 'stderr' : {}}
    for sample, ind in zip(chords_vs_time.keys(), tqdm(range(len(chords_vs_time.keys())))): 
        mean_std['mean'][sample] = []
        mean_std['std'][sample] = []
        mean_std['stderr'][sample] = []
        for timepoint in tqdm(range(len(chords_vs_time[sample]))): 
            x = chords_vs_time[sample][timepoint]
            counts, bins = np.histogram(x)
            mids = 0.5*(bins[1:] + bins[:-1])
            probs = counts / np.sum(counts)
            mean = np.sum(probs * mids) 
            sd = np.sqrt(np.sum(probs * (mids - mean)**2))
            mean_std['mean'][sample].append(mean)
            mean_std['std'][sample].append(sd)
            mean_std['stderr'][sample].append(sd/np.sqrt(len(x)))
        mean_std['mean'][sample] = np.array(mean_std['mean'][sample])
        mean_std['std'][sample] = np.array(mean_std['std'][sample])
        mean_std['stderr'][sample] = np.array(mean_std['stderr'][sample])
    return chords_vs_time, mean_std

In [None]:
# Process CLD results (already loaded above)
chords_vs_time_single_bulk, mean_std_stderr_bulk = mean_std_stderr_cl(cld_single_bulk, binning = 1);

In [None]:
# Helper function to get KDE plots for various timepoints
def get_kdes(chords_vs_time_dict, sample, timepoints): 
    xs, ys = [], []
    for timepoint in timepoints: 
        x, y = sns.kdeplot(np.array(chords_vs_time_dict[sample][timepoint])).lines[0].get_data();
        plt.close();
        xs.append(np.array(x))
        ys.append(np.array(y))
    return xs, ys

# Common features of ridge plots for bulk sticky constructs
timepoints = [1, 25, 44, 56, 68, 80, 92, 104, 116]
alphas = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
hours = [0, 6, 12, 18, 24, 30, 36, 42, 48]
texts = [str(hour) + ' h' for hour in hours]

In [None]:
# A - Figure S17
xs, ys = get_kdes(chords_vs_time_single_bulk, 'NS_A', timepoints)
    
plt.figure(figsize = (10, 6))
for i in range(len(xs)): 
    plt.plot(
        xs[i], 
        ys[i] + (len(xs)-i-1)*0.015, 
        color = 'k'
    )
    plt.fill_between(
        xs[i], 
        (len(xs)-i-1)*0.015, ys[i] + (len(xs)-i-1)*0.015, 
        color = 'orangered', 
        alpha = alphas[i]
    )
    plt.axhline((len(xs)-i-1)*0.015, color = 'black')
    if i!=0:
        plt.text(230, (len(xs)-i-1)*0.0153, texts[i], fontsize = 20)
    else: 
        plt.text(230, (len(xs)-i-1)*0.0153, '15 min', fontsize = 20)
plt.ylim([0, 0.20])
sns.despine(left = True)
plt.yticks([]); 
plt.xticks(
    [0, 25, 50, 75, 100, 125, 150, 175, 200, 225, 250], 
    [0, '', 50, '', 100, '', 150, '', 200, '', 250], 
    fontsize = 20
)
plt.xlabel(r'Size [$\rm\mu$m]', fontsize = 20)
plt.show()

In [None]:
# B - Figure S17
xs, ys = get_kdes(chords_vs_time_single_bulk, 'NS_B', timepoints)

plt.figure(figsize = (10, 6))
for i in range(len(xs)): 
    plt.plot(
        xs[i],
        ys[i] + (len(xs)-i-1)*0.017, 
        color = 'k'
    )
    plt.fill_between(
        xs[i], 
        (len(xs)-i-1)*0.017, ys[i] + (len(xs)-i-1)*0.017, 
        color = 'cyan', 
        alpha = alphas[i]
    )
    plt.axhline((len(xs)-i-1)*0.017, color = 'black')
    if i!=0:
        plt.text(215, (len(xs)-i-1)*0.0173, texts[i], fontsize = 20)
    else: 
        plt.text(215, (len(xs)-i-1)*0.0173, '15 min', fontsize = 20)
plt.ylim([0, 0.22])
sns.despine(left = True)
plt.yticks([]); 
plt.xticks(
    [0, 25, 50, 75, 100, 125, 150, 175, 200, 225, 250], 
    [0, '', 50, '', 100, '', 150, '', 200, '', 250], 
    fontsize = 20
)
plt.xlabel(r'Size [$\rm\mu$m]', fontsize = 20)
plt.show()

In [None]:
# C - Figure S17
xs, ys = get_kdes(chords_vs_time_single_bulk, 'NS_C', timepoints)

plt.figure(figsize = (10, 6))
for i in range(len(xs)): 
    plt.plot(
        xs[i], 
        ys[i] + (len(xs)-i-1)*0.015, 
        color = 'k'
    )
    plt.fill_between(
        xs[i], 
        (len(xs)-i-1)*0.015, ys[i] + (len(xs)-i-1)*0.015, 
        color = 'gray', 
        alpha = alphas[i]
    )
    plt.axhline((len(xs)-i-1)*0.015, color = 'black')
    if i!=0:
        plt.text(430, (len(xs)-i-1)*0.0153, texts[i], fontsize = 20)
    else: 
        plt.text(430, (len(xs)-i-1)*0.0153, '15 min', fontsize = 20)
plt.ylim([0, 0.24])
sns.despine(left = True)
plt.yticks([]); 
plt.xticks(
    [0, 50, 100, 150, 200, 250, 300, 350, 400, 450, 500], 
    [0, '', 100, '', 200, '', 300, '', 400, '', 500], 
    fontsize = 20
)
plt.xlabel(r'Size [$\rm\mu$m]', fontsize = 20)
plt.show()