# Analyze Single Cell Fluorescence Intensity from embryo sections

This code was used to analyze BRE activity within single neural crest nuclei from section images.

Required inputs for this script:

1. .csv file containing source data for each image documenting the area, mean, intden, and raw intden for individual cell regions of interest (ROIs) masked and measured in Fiji

Script prepared by Mike Piacentino, March 2021

### Import Packages

In [1]:
# Import data handling and analysis packages
import os
import pandas as pd
import glob
from scipy import stats
import scikit_posthocs

# Import plotting packages
import iqplot
import bokeh.io
from bokeh.io import output_file, show
from bokeh.layouts import column, row
bokeh.io.output_notebook()

### Import Data

In [4]:
source_data = pd.read_csv('Fig1_source_data.csv')
source_data.head()

Unnamed: 0.1,Unnamed: 0,ExptDate,Electroporation,Stains,Embryo,Somites,Section,Population,Channel,ROI,Area,Mean,IntDen,RawIntDen,SecID
0,1,20210121,BRE;H2Bd2EGFP,Pax7;RFP;d2EGFP,Emb6,11ss,sec3,pNC,Pax7,0001-0505,40.605,322.387,13090.479,254041.0,20210121_Emb6_11ss_sec3
1,2,20210121,BRE;H2Bd2EGFP,Pax7;RFP;d2EGFP,Emb6,11ss,sec3,pNC,Pax7,0002-0494,19.684,435.673,8575.817,166427.0,20210121_Emb6_11ss_sec3
2,3,20210121,BRE;H2Bd2EGFP,Pax7;RFP;d2EGFP,Emb6,11ss,sec3,pNC,Pax7,0003-0508,26.125,398.142,10401.541,201858.0,20210121_Emb6_11ss_sec3
3,4,20210121,BRE;H2Bd2EGFP,Pax7;RFP;d2EGFP,Emb6,11ss,sec3,pNC,Pax7,0004-0516,23.806,258.158,6145.812,119269.0,20210121_Emb6_11ss_sec3
4,5,20210121,BRE;H2Bd2EGFP,Pax7;RFP;d2EGFP,Emb6,11ss,sec3,pNC,Pax7,0005-0516,10.46,270.182,2826.211,54847.0,20210121_Emb6_11ss_sec3


### Calculate Corrected Total Cellular Fluorescence (CTCF) for each ROI

This will determine the fluorescence in each ROI (expression level), corrected for size and background intensity in the image

<center>$CTCF = IntDen_{ROI} - (Area_{ROI} * Mean_{background})$</center>

In [5]:
# Determine channels and sections in dataframe to prepare loops
channels_list = source_data['Channel'].unique().tolist()
sec_list = source_data['SecID'].unique().tolist()

# Initialize output dataframe
df_ctcf = pd.DataFrame()
output_list = []

# Loop through sections...
for sec in sec_list:
    df_sec = pd.DataFrame(source_data[source_data['SecID'] == sec])
    
    # ... then loop through channels
    for ch in channels_list:
        df_ch = pd.DataFrame(df_sec[df_sec['Channel'] == ch])
        # Save out mean background intensity
        background_mean = df_ch.loc[df_ch['ROI'] == 'background']['Mean']
        
        # Make an ROI list, remove the 'background' ROI, then loop through ROIs
        roi_list = df_ch['ROI'].unique().tolist()
        roi_list.remove('background')
        for roi in roi_list:
            df_roi = pd.DataFrame(df_ch[df_ch['ROI'] == roi])
            # Calculate CTCF
            df_roi['CTCF'] = float(df_roi['IntDen']) - (float(df_roi['Area']) * float(background_mean))
            # Append list to populate output dataframe
            output_list.append(df_roi)

# Compile dataframe
df_ctcf = pd.concat(output_list)
df_ctcf.head(3)

Unnamed: 0.1,Unnamed: 0,ExptDate,Electroporation,Stains,Embryo,Somites,Section,Population,Channel,ROI,Area,Mean,IntDen,RawIntDen,SecID,CTCF
0,1,20210121,BRE;H2Bd2EGFP,Pax7;RFP;d2EGFP,Emb6,11ss,sec3,pNC,Pax7,0001-0505,40.605,322.387,13090.479,254041.0,20210121_Emb6_11ss_sec3,12667.009455
1,2,20210121,BRE;H2Bd2EGFP,Pax7;RFP;d2EGFP,Emb6,11ss,sec3,pNC,Pax7,0002-0494,19.684,435.673,8575.817,166427.0,20210121_Emb6_11ss_sec3,8370.532564
2,3,20210121,BRE;H2Bd2EGFP,Pax7;RFP;d2EGFP,Emb6,11ss,sec3,pNC,Pax7,0003-0508,26.125,398.142,10401.541,201858.0,20210121_Emb6_11ss_sec3,10129.083375


### Now determine the ratio of BRE CTCF/RFP CTCF to normalize BRE intensity to an electroporation control

In [6]:
# Determine sections in dataframe to prepare loop, and initialize output dataframe
sec_list = df_ctcf['SecID'].unique().tolist()
norm_output = []
df_norm_bre = pd.DataFrame()

# Loop through sections...
for sec in sec_list:
    df_sec = pd.DataFrame(df_ctcf[df_ctcf['SecID'] == sec])

    # Make an ROI list, then loop through ROIs
    roi_list = df_sec['ROI'].unique().tolist()
    for roi in roi_list:
        # Extract dataframe specific to ROI
        roi_df = df_sec.loc[df_sec['ROI'] == roi]
        # Record CTCF values for BRE and RFP within this ROI
        bre = float(roi_df.loc[roi_df['Channel']=='BRE']['CTCF'])
        rfp = float(roi_df.loc[roi_df['Channel']=='RFP']['CTCF'])
        # Extract just the BRE row
        roi_bre_df = roi_df.loc[roi_df['Channel']=='BRE'].reset_index().drop(['index'], axis=1)
        # Add Normalized CTCF to the BRE row
        roi_bre_df.loc[0, 'NormCTCF'] = bre/rfp
        # Append list to populate output dataframe
        norm_output.append(roi_bre_df)

# Compile dataframe
df_norm_bre = pd.concat(norm_output)
df_norm_bre.head()

Unnamed: 0.1,Unnamed: 0,ExptDate,Electroporation,Stains,Embryo,Somites,Section,Population,Channel,ROI,Area,Mean,IntDen,RawIntDen,SecID,CTCF,NormCTCF
0,139,20210121,BRE;H2Bd2EGFP,Pax7;RFP;d2EGFP,Emb6,11ss,sec3,pNC,BRE,0001-0505,40.605,268.901,10918.686,211894.0,20210121_Emb6_11ss_sec3,10630.187475,1.84886
0,140,20210121,BRE;H2Bd2EGFP,Pax7;RFP;d2EGFP,Emb6,11ss,sec3,pNC,BRE,0002-0494,19.684,408.516,8041.255,156053.0,20210121_Emb6_11ss_sec3,7901.40018,0.964088
0,141,20210121,BRE;H2Bd2EGFP,Pax7;RFP;d2EGFP,Emb6,11ss,sec3,pNC,BRE,0003-0508,26.125,22.329,583.36,11321.0,20210121_Emb6_11ss_sec3,397.741875,0.577789
0,142,20210121,BRE;H2Bd2EGFP,Pax7;RFP;d2EGFP,Emb6,11ss,sec3,pNC,BRE,0004-0516,23.806,486.71,11586.811,224860.0,20210121_Emb6_11ss_sec3,11417.66937,2.798568
0,143,20210121,BRE;H2Bd2EGFP,Pax7;RFP;d2EGFP,Emb6,11ss,sec3,pNC,BRE,0005-0516,10.46,378.911,3963.559,76919.0,20210121_Emb6_11ss_sec3,3889.2407,5.512238


## Exploratory Plotting in ECDF and Stripbox Formats

In [12]:
# Define data subset to plot
data = df_norm_bre[~df_norm_bre['ROI'].isin(['background'])]  #Selects all single-cell ROIs

# Build ECDF plot
ecdf = iqplot.ecdf(
                    # Data to plot
                        data=data,
                        q='NormCTCF', q_axis='x',
                        cats=['Somites'],

                    # Plot details
                        style='staircase',
                        conf_int=True, ptiles=[2.5, 97.5],        #95% CIs, use [16,84] for SEM (68% CI)
                        x_axis_type='log',
                        frame_width=500, frame_height=400,

                    # Plot Customizations
                        order=['6ss','7ss','8ss','9ss','10ss','11ss'],
                        palette=['#9ecae1','#6baed6','#4292c6','#2171b5','#08519c','#08306b'],
                        title='Norm. BRE CTCF',
                        y_axis_label='Empirical Cumulative Distribution Frequency',
                        x_axis_label='Log Normalized BRE activity',
    
)
ecdf.axis.axis_label_text_font_size = '14px'
ecdf.axis.major_label_text_font_size = '14px'
ecdf.axis.axis_label_text_font_style = 'normal'

# Build Stripbox plot
stripbox = iqplot.stripbox(
                    # Data to plot
                        data=data,
                        q='NormCTCF', q_axis='y',
                        cats=['Somites'], 

                    # Plot details
                        jitter=True, jitter_kwargs=dict(width=0.3),
                        marker_kwargs=dict(alpha=0.2),
                        box_kwargs=dict(line_color='black', line_width=1.5),
                        whisker_kwargs=dict(line_color='black', line_width=1.5),
                        median_kwargs=dict(line_color='black', line_width=2),
                        top_level='box',
                        y_axis_type='log',
                        frame_width=500, frame_height=400,

                    # Plot customizations
                        order=['6ss','7ss','8ss','9ss','10ss','11ss'],
                        color_column='Population',
                        title='Norm. BRE CTCF',
                        y_axis_label='Log Normalized BRE Activity',
                        x_axis_label='Embryo stage',
                        show_legend=True,
)
stripbox.axis.axis_label_text_font_size = '14px'
stripbox.axis.major_label_text_font_size = '14px'
stripbox.axis.axis_label_text_font_style = 'normal'
stripbox.legend.location = 'bottom_center'

# Display plots
show(row(ecdf, stripbox))

# Statistical Analysis

#### One-Way ANOVA with Tukey's Post-hoc for Multiple Comparisons

In [13]:
# Determine samples to compare
samples_list = data['Somites'].unique().tolist()
samples_list

['11ss', '10ss', '6ss', '8ss', '7ss', '9ss']

In [20]:
### One-way ANOVA

# Define metric to test
metric = 'NormCTCF'
sample_column = 'Somites'
# Define samples to compare
sample1 = '6ss'
sample2 = '7ss'
sample3 = '8ss'
sample4 = '9ss'
sample5 = '10ss'
sample6 = '11ss'
# ... can add more

# Run One-way ANOVA test
anova_result = stats.f_oneway(
                data.loc[data[sample_column]==sample1][metric]
               ,data.loc[data[sample_column]==sample2][metric]
               ,data.loc[data[sample_column]==sample3][metric]
               ,data.loc[data[sample_column]==sample4][metric]
               ,data.loc[data[sample_column]==sample5][metric]
    )

# Tukey's post-hoc for multiple comparisons
mult_compar = scikit_posthocs.posthoc_tukey(data, val_col=metric, group_col=sample_column).round(6)

# Display test results
print('One-way Anova test results: \n\t\t\t statistic=' + str(anova_result[0]) + 
    '\n\t\t\t p-value=' + str(anova_result[1]))
print("\nTukey's post-hoc multiple comparison result: \n" + str(mult_compar) +'\n')
# mult_compar.to_csv("Results of Tukey's Posthoc.csv")

# Get number of cells within this test
for sample in data[sample_column].unique().tolist():
    temp_df = data.loc[data[sample_column] == sample]
    print('n = ' + str(len(temp_df)) + ' cells in the ' + str(sample) + ' dataset.')

One-way Anova test results: 
			 statistic=17.307450949273257
			 p-value=6.163481412788096e-14

Tukey's post-hoc multiple comparison result: 
          11ss      10ss       6ss    8ss       7ss       9ss
11ss  1.000000  0.900000  0.019195  0.001  0.900000  0.019009
10ss  0.900000  1.000000  0.011133  0.001  0.900000  0.033013
6ss   0.019195  0.011133  1.000000  0.001  0.076756  0.001000
8ss   0.001000  0.001000  0.001000  1.000  0.001000  0.900000
7ss   0.900000  0.900000  0.076756  0.001  1.000000  0.037638
9ss   0.019009  0.033013  0.001000  0.900  0.037638  1.000000

n = 1097 cells in the 11ss dataset.
n = 942 cells in the 10ss dataset.
n = 152 cells in the 6ss dataset.
n = 277 cells in the 8ss dataset.
n = 305 cells in the 7ss dataset.
n = 93 cells in the 9ss dataset.
