Data needed in input: 
- confocal micrographs (20x lens, found in the data repo at "Confocal_micrographs/Droplets - Confocal Micrographs/20x FOVs/Protein_Capturing_Systems/") and droplet ROI masks. 
    - The latter can be obtained by manually annotating in-focus emulsion droplets/synthetic cells in FIJI using the circular selection tool, and later exporting the ROIs with the `Masks from ROIs` FIJI plugin (https://github.com/LauLauThom/MaskFromRois-Fiji). 
 
The script assumes both images and masks are located in `img_dir`.

# Imports and function definitions

In [None]:
# Import dependencies
import cv2
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import seaborn as sns
import tiffile
import skimage

from tqdm import tqdm

In [None]:
# Analysis function to compute protein fluorescence intensity ratio 
# within/outside the organelle for a single droplet/synthetic cell
def compute_inside_outside_ratio(
    mask_channel_img, 
    signal_channel_img, 
    roi_mask, 
    blur=True, 
    plot=False
):
    """
    
    params: 
        mask_channel_img: image corresponding to the fluorescent channel of the RNA nanostar aptamer
            cognate dye (MG, DFHBI). Used to threshold and segment the organelles within the 
            examined synthetic cells/droplets.
        signal_channel_img: image corresponding to the fluorescent channel of the fluorescent protein
            of interest (EYFP, TxRed-STV, Alexa405-STV). This is the signal intensity we are interested
            in comparing inside the organelle and within the droplet lumen. 
        roi_mask: ROI mask of synthetic cells/droplets to examine.
        blur: optional boolean flag enabling gaussian denoising.
        plot: optional boolean flag enabling plots to be displayed during the analysis.
        
    
    returns: 
    """
    # Normalise/boolify the droplet ROI mask
    normalised_roi_mask = roi_mask/255
    
    # Optional denoising step
    if blur:
        # Denoise via Gaussian Blur - default kernel is (3,3)
        blur = cv2.GaussianBlur(mask_channel_img,(3, 3),0)
    else: 
        blur = mask_channel_img
        
    # Mask the mask_channel_img via the normalised roi_mask
    blur = blur * normalised_roi_mask
    
    # Threshold the RNA organelle - Li unsupervised thresholding works best 
    # when you have removed out of focus droplets, etc.
    threshold = skimage.filters.threshold_li(blur)
    
    # Get organelle binary mask - above threshold + intersected with normalised droplet roi mask
    binarised = (mask_channel_img > threshold) * normalised_roi_mask
    
    # Get droplet lumen binary mask: below threshold + intersected with normalised droplet roi mask
    rev_binarised = (mask_channel_img <= threshold) * normalised_roi_mask
    # Apply morphological opening to remove thin halos from organelle segmentation
    outside_binary = skimage.morphology.opening(rev_binarised)
    
    # Compute signal within organelle mask and droplet lumen mask
    inside = binarised * signal_channel_img
    outside = outside_binary * signal_channel_img
    
    # Optional plots to check output of calculation in image format
    if plot:
        plt.subplots(1, 3, figsize = (12, 4))
        plt.subplot(131)
        plt.imshow(inside)
        plt.subplot(132)
        plt.imshow(outside)
        plt.subplot(133)
        plt.hist(inside.flatten(), label = 'Inside')
        plt.hist(outside.flatten(), label = 'Outside')
        plt.legend(frameon = False)
        plt.show()
    
    # Compute mean signal inside
    mean_inside = np.mean(inside)
    # Compute mean signal outside - avoids bias due to size of lumen compared to size of condensates
    mean_outside = np.mean(outside)
    # Return the ratio between the two mean signals
    return mean_inside/mean_outside

In [None]:
def analyse_individual_droplets(
    mask_channel_img, 
    signal_channel_img, 
    roi_mask, 
    blur=True, 
    plot=False
): 
    """
    Analyses individual droplets within a region of interest (ROI) mask by 
    computing the ratio of signal intensity from `signal_channel_img`
    within the organelle/condensate and outside it, i.e. in the droplet lumen.

    params:
        mask_channel_img: Image array for the mask channel.
        signal_channel_img: Image array for the signal channel.
        roi_mask: ROI mask indicating regions of interest for droplet analysis.
        blur: boolean flag to enable gaussian denoising.
        plot: boolean flag to enable plots during the analysis.

    Returns:
        an array of mean inside-outside ratios for each droplet, containing only finite values.
    """
    # Threshold ROI mask
    ret, thresh = cv2.threshold(roi_mask, 127, 255, 0)
    # Find contours of individual droplets
    contours, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
    # Initialise list of mean ratios
    inside_outside_ratios = []
    # Loop through droplets
    for i in tqdm(range(len(contours))): 
        # Initialise blank mask to same shape as ROI mask
        single_mask = np.zeros(shape = (roi_mask.shape[0], roi_mask.shape[1]))
        cv2.drawContours(single_mask, [contours[i]], -1, (255,255,255), cv2.FILLED)
        # perform analysis on the updated single_mask
        inside_outside_ratio = compute_inside_outside_ratio(
            mask_channel_img=mask_channel_img, 
            signal_channel_img=signal_channel_img, 
            roi_mask=single_mask, 
            blur=blur, 
            plot=plot
        )
        # append result to growing results list
        inside_outside_ratios.append(inside_outside_ratio)
    # Transform results in array and return only finite values (discard NaNs)
    results = np.array(inside_outside_ratios)
    return results[np.isfinite(results)]

In [None]:
# Helper function to produce boxplots for Figure 5c
def boxplot_with_jittered_datapoints(data, palette, order, ylabels): 
    """
    params:
        data: pd.DataFrame with columns 'Ratio', 'Condition' and 'hue'
        palette: colour dictionary
        order: order in which to display 'Condition' boxes
        ylabels: labels to use on y-axis
    
    returns: 
        produces and shows plot
    """
    # Setup figure
    fig, ax = plt.subplots(figsize=(3, 1.33))

    # Boxplot
    sns.boxplot(
        data=data, 
        x='Ratio', 
        y='Condition', 
        hue='hue', 
        saturation=0.7,
        fliersize=0, 
        notch=False, 
        palette=palette, 
        order=order,
    )
    # Add datapoints with jitter
    sns.stripplot(
        data=data, 
        x='Ratio', 
        y='Condition', 
        hue='hue', 
        edgecolor='black', 
        linewidth=1.0, 
        alpha=0.6, 
        palette=palette, 
        order=order,
    ) 

    # Set axis labels and ticks
    ax.set_xlabel(r'$\rm \xi$', fontsize = 20)
    ax.set_yticklabels(ylabels, fontsize = 20)
    ax.set_xticks([0, 1, 2, 3, 4, 5], labels = [0, 1, 2, 3, 4, 5], fontsize = 20)
    ax.set_xlim([-0.5, 5.5])
    ax.tick_params(direction = 'in', length = 8)
    ax.set_ylabel('');
    plt.show()

# Define directories and perform analysis

In [None]:
# CHANGE TO ABSOLUTE PATH OF DIRECTORY CONTAINING IMAGES
img_dir = "/ABSOLUTE/PATH/TO/IMAGE/DIRECTORY/"
os.chdir(img_dir)


# Filenames are consistent with those used in the datarepo, 
# just ensure to name the masks accordingly when generating them. 

# A_YFP and YFP_apt system: channels: 0 - MG, 1 - BF, 2 - YFP/EYFP
# +YFP_apt-T
yfp_apt_image_file = 'A_OH1+YFP_Apt+EYFP_20x_bin8x8.tif'
yfp_apt_mask_file = 'A_OH1+YFP_Apt+EYFP_20x_bin8x8_mask.tif'

# -YFP_apt-T
no_yfp_apt_image_file = 'A_OH1-YFP_Apt+EYFP_20x_bin8x8.tif'
no_yfp_apt_mask_file = 'A_OH1-YFP_Apt+EYFP_20x_bin8x8_mask.tif'


# B_STV and Biotin_DNA system: channels: 0 - DFHBI, 1 - TxRed-STV, 2 - BF
# +Biotin_DNA (two images, two masks)
biotin_image_file1 = 'B_OH2+Biotin_DNA_TxRedSTV_20x_bin8x8.tif'
biotin_mask_file1 = 'B_OH2+Biotin_DNA_TxRedSTV_20x_bin8x8_mask.tif'
biotin_image_file2 = 'B_OH2+Biotin_DNA_TxRedSTV_20x_bin8x8_2.tif'
biotin_mask_file2 = 'B_OH2+Biotin_DNA_TxRedSTV_20x_bin8x8_2_mask.tif'

# -Biotin_DNA
no_biotin_image_file = 'B_OH2-Biotin_DNA_TxRedSTV_20x_bin8x8.tif'
no_biotin_mask_file = 'B_OH2-Biotin_DNA_TxRedSTV_20x_bin8x8_mask.tif'


# B_STV and STV_apt system: channels: 0 - BF, 1 - DFHBI, 2 - Alexa405-STV
# +STV_apt-T
stv_apt_image_file = 'B_OH2+STV_Apt+Alexa405-STV_20x_bin4x4.tif'
stv_apt_mask_file = 'B_OH2+STV_Apt+Alexa405-STV_20x_bin4x4_mask.tif'

# -STV_apt-T
no_stv_apt_image_file = 'B_OH2-STV_Apt+Alexa405-STV_20x_bin4x4.tif'
no_stv_apt_mask_file = 'B_OH2-STV_Apt+Alexa405-STV_20x_bin4x4_mask.tif'

In [None]:
# A_YFP + YFP_apt system
# In the presence of YFP_apt-T
yfp_apt_mg = np.array(tiffile.imread(yfp_apt_image_file)[0])  # MG channel
yfp_apt_yfp = np.array(tiffile.imread(yfp_apt_image_file)[2])  # EYFP channel
yfp_apt_roi_mask = np.array(tiffile.imread(yfp_apt_mask_file))  # Droplet ROI mask 
yfp_apt_ratios = analyse_individual_droplets(
    yfp_apt_mg, 
    yfp_apt_yfp, 
    yfp_apt_roi_mask, 
    plot=True
)
       
# In the absence of YFP_apt-T
no_yfp_apt_mg = np.array(tiffile.imread(no_yfp_apt_image_file)[0])
no_yfp_apt_yfp = np.array(tiffile.imread(no_yfp_apt_image_file)[2])
no_yfp_apt_roi_mask = np.array(tiffile.imread(no_yfp_apt_mask_file))
no_yfp_apt_ratios = analyse_individual_droplets(
    no_yfp_apt_mg, 
    no_yfp_apt_yfp, 
    no_yfp_apt_roi_mask, 
    plot = True
)

In [None]:
# B_STV and TxRed-STV via Biotin_DNA system
# In the presence of Biotin_DNA
biotin_dfhbi_1 = np.array(tiffile.imread(biotin_image_file1)[0])  # DFHBI
biotin_stv_1 = np.array(tiffile.imread(biotin_image_file1)[1])  # TexasRed-STV
biotin_mask_1 = np.array(tiffile.imread(biotin_mask_file1))

biotin_dfhbi_2 = np.array(tiffile.imread(biotin_image_file2)[0])  # DFHBI
biotin_stv_2 = np.array(tiffile.imread(biotin_image_file2)[1])  # TexasRed-STV
biotin_mask_2 = np.array(tiffile.imread(biotin_mask_file2))

biotin_ratios_1, biotin_ratios_2 = (
    analyse_individual_droplets(
        dfhbi_img, 
        stv_img, 
        mask, 
        plot=True
    )
    for dfhbi_img, stv_img, mask in zip(
        [biotin_dfhbi_1, biotin_dfhbi_2], [biotin_stv_1, biotin_stv_2], [biotin_mask_1, biotin_mask_2]
    )
)
# Concatenate results from 2 FOVs
biotin_ratios = np.concatenate([biotin_ratios_1, biotin_ratios_2])

# In the absence of Biotin_DNA
no_biotin_dfhbi = np.array(tiffile.imread(no_biotin_image_file)[0])
no_biotin_stv = np.array(tiffile.imread(no_biotin_image_file)[1])
no_biotin_mask = np.array(tiffile.imread(no_biotin_mask_file))
no_biotin_ratios = analyse_individual_droplets(
    no_biotin_dfhbi, 
    no_biotin_stv, 
    no_biotin_mask, 
    plot=True
)

In [None]:
# B_STV and Alexa405-STV via STV_apt system
# In the presence of STV_apt-T
stv_apt_dfhbi = np.array(tiffile.imread(stv_apt_image_file)[1])  # DFHBI is channel 1
stv_apt_stv = np.array(tiffile.imread(stv_apt_image_file)[2])  # Alexa405-STV is channel 2
stv_apt_roi_mask = np.array(tiffile.imread(stv_apt_mask_file))
stv_apt_ratios = analyse_individual_droplets(
    stv_apt_dfhbi, 
    stv_apt_stv, 
    stv_apt_roi_mask, 
    plot=True
)

# In the absence of STV_apt-T
no_stv_apt_dfhbi = np.array(tiffile.imread(no_stv_apt_image_file)[1])
no_stv_apt_stv = np.array(tiffile.imread(no_stv_apt_image_file)[2])
no_stv_apt_roi_mask = np.array(tiffile.imread(no_stv_apt_mask_file))
no_stv_apt_ratios = analyse_individual_droplets(
    no_stv_apt_dfhbi, 
    no_stv_apt_stv, 
    no_stv_apt_roi_mask, 
    plot=True
)

# Process results into pandas DataFrames and plot

In [None]:
# Create sample 'name' list to use as dataframe column
conditions = np.concatenate([
    ['+YFP_apt']*len(yfp_apt_ratios), 
    ['-YFP_apt']*len(no_yfp_apt_ratios), 
    ['+Biotin_DNA']*len(biotin_ratios), 
    ['-Biotin_DNA']*len(no_biotin_ratios), 
    ['+STV_apt']*len(stv_apt_ratios), 
    ['-STV_apt']*len(no_stv_apt_ratios)]
)

# Build dictionary
data = {
    'Condition' : conditions, 
    'Ratio' : np.concatenate(
        [
            yfp_apt_ratios, 
            no_yfp_apt_ratios,
            biotin_ratios,
            no_biotin_ratios,
            stv_apt_ratios,
            no_stv_apt_ratios
        ]
    )
}
# Convert to pandas DataFrame
df = pd.DataFrame(data)
# Visualise a simple boxplot (all systems at once) using seaborn
sns.boxplot(data = df, y = 'Condition', x = 'Ratio')

In [None]:
# Figure 5c-i: A_YFP and YFP_apt system

# Subset dataframe
eyfp_data = df[df.Condition.isin(['-YFP_apt', '+YFP_apt'])]
eyfp_data['hue'] = eyfp_data.Condition

# Choose colour palette
palette = {
    '-YFP_apt': 'darkorange',
    '+YFP_apt': 'gold',
}

boxplot_with_jittered_datapoints(
    data=eyfp_data, 
    palette=palette,
    order=['-YFP_apt', '+YFP_apt'], 
    ylabels=[r'$\rm -YFP_{apt}$-T', r'$\rm +YFP_{apt}$-T'],
)

In [None]:
# Figure 5c-ii: B_STV and STV_apt system

# Subset dataframe
stv_data = df[df.Condition.isin(['-STV_apt', '+STV_apt'])]
stv_data['hue'] = stv_data.Condition

# Choose colour palette
palette = {
    '-STV_apt': 'mediumslateblue',
    '+STV_apt': 'magenta',
}

boxplot_with_jittered_datapoints(
    data=stv_data, 
    palette=palette,
    order=['-STV_apt', '+STV_apt'], 
    ylabels=[r'$\rm -STV_{apt}$-T', r'$\rm +STV_{apt}$-T'],
)

In [None]:
# Figure 5c-iii: B_STV and Biotin_DNA system

# Subset dataframe
biotin_data = df[df.Condition.isin(['-Biotin_DNA', '+Biotin_DNA'])]
biotin_data['hue'] = biotin_data.Condition

# Choose colour palette
palette = {
    '-Biotin_DNA': 'thistle',
    '+Biotin_DNA': 'firebrick',
}


boxplot_with_jittered_datapoints(
    data=biotin_data, 
    palette=palette,
    order=['-Biotin_DNA', '+Biotin_DNA'], 
    ylabels=[r'$\rm -Biotin_{DNA}$', r'$\rm +Biotin_{DNA}$'],
)