# Analyze Fluorescent Intensity Data

## Import Modules

In [13]:
# Import data handling and analysis packages
import os
import glob
import pandas as pd
from scipy import stats

# Import plotting packages
import iqplot
import bokeh.io
from bokeh.io import output_file, show
from bokeh.layouts import column, row
bokeh.io.output_notebook()

## Import and parse raw data

In [14]:
# Add experiment date here to apply to dataframe
path = os.path.abspath('')+'/raw_data_csvs/'
full_df = pd.DataFrame()
list_ = []

# For loop to bring in files and concatenate them into a single dataframe


for file_ in glob.glob(path + "/*.csv"):
    df = pd.read_csv(file_)
    # Determine Image name from file name
    df['Image'] = os.path.splitext(os.path.basename(file_))[0]
    # Split values in ROI label
    df['Fluor'], df['ROI'] = zip(*df['Label'].map(lambda x: x.split(':')))
    # Split values in Image name column
    (df['ExptDate'], df['Treatment'], df['Dose'], df['Stains'], df['Embryo'], 
        df['Somites'], df['Section']) = zip(*df['Image'].map(lambda x: x.split('_')))
    list_.append(df)

full_df = pd.concat(list_)
full_df.head()

Unnamed: 0,Unnamed: 1,Label,Area,Mean,IntDen,RawIntDen,Image,Fluor,ROI,ExptDate,Treatment,Dose,Stains,Embryo,Somites,Section
0,1,H2BRFP:background,11.594,8.973,104.037,2019.0,20210830_SMPD3FLAG_3ugul_BF;Pax7;H2BRFP;TCFLef...,H2BRFP,background,20210830,SMPD3FLAG,3ugul,BF;Pax7;H2BRFP;TCFLefH2Bd2EGFP,Emb1,8ss,sec1
1,2,H2BRFP:background,14.067,8.707,122.484,2377.0,20210830_SMPD3FLAG_3ugul_BF;Pax7;H2BRFP;TCFLef...,H2BRFP,background,20210830,SMPD3FLAG,3ugul,BF;Pax7;H2BRFP;TCFLefH2Bd2EGFP,Emb1,8ss,sec1
2,3,H2BRFP:background,7.42,9.153,67.915,1318.0,20210830_SMPD3FLAG_3ugul_BF;Pax7;H2BRFP;TCFLef...,H2BRFP,background,20210830,SMPD3FLAG,3ugul,BF;Pax7;H2BRFP;TCFLefH2Bd2EGFP,Emb1,8ss,sec1
3,4,H2BRFP:Cntl,4296.436,66.936,287585.668,5581045.0,20210830_SMPD3FLAG_3ugul_BF;Pax7;H2BRFP;TCFLef...,H2BRFP,Cntl,20210830,SMPD3FLAG,3ugul,BF;Pax7;H2BRFP;TCFLefH2Bd2EGFP,Emb1,8ss,sec1
4,5,H2BRFP:Expt,4491.216,68.131,305989.869,5938207.0,20210830_SMPD3FLAG_3ugul_BF;Pax7;H2BRFP;TCFLef...,H2BRFP,Expt,20210830,SMPD3FLAG,3ugul,BF;Pax7;H2BRFP;TCFLefH2Bd2EGFP,Emb1,8ss,sec1


## Calculate Corrected Total Cellular Fluorescence (CTCF) for each ROI

This will determine the fluorescence in each ROI (expression level), corrected for size and background intensity in the image, and normalize signaling reporter intensity (TCFLef::H2B-d2EGFP) over ubiquitous marker (H2B-RFP) to account for electroporation efficiency

In [27]:
# Define channels
expt_channel = 'TCFLefH2Bd2EGFP'
cntl_channel = 'H2BRFP'
cntl_construct = 'H2BRFP'
expt_construct = 'SMPD3FLAG'

# Get a list of treatments
treatment_list = full_df.Treatment.unique().tolist()
date_list = full_df.ExptDate.unique().tolist()

# Mean background values and group by Treatment, Embryo, Fluor, ROI and Section
mean_sections = ((full_df.groupby(['ExptDate','Treatment', 'Embryo', 'Fluor', 'ROI', 'Section', 'ExptDate'])
                  ['Area', 'Mean', 'IntDen']).mean())

# Loop trough treatments, performing each analysis and exporting CSV file for each treatment
full_results = pd.DataFrame()
list_ = []

for j in date_list:
    date = j
    df_date = pd.DataFrame(mean_sections.xs(date))
    
    for i in treatment_list:
        # Slice dataframe to process only embryos with given treatment
        treatment = i
        df_treatment = pd.DataFrame(df_date.xs(treatment))

        # Determine CTCF values = ROI IntDen - (background mean * ROI area)
        # Calculate background (background mean * ROI area)
        background_corr_cntl = (df_treatment.xs('background', level='ROI')['Mean'] 
                        * df_treatment.xs('Cntl', level='ROI')['Area'])
        background_corr_expt = (df_treatment.xs('background', level='ROI')['Mean'] 
                        * df_treatment.xs('Expt', level='ROI')['Area'])

        # Slice out only Cntl or Expt values in IntDen
        intdens_cntl = df_treatment.xs('Cntl', level='ROI')['IntDen'] 
        intdens_expt = df_treatment.xs('Expt', level='ROI')['IntDen'] 

        # Subtract background from IntDens to determine CTCF and concatenate into single dataframe
        sub_cntl = pd.DataFrame(intdens_cntl - background_corr_cntl)
        sub_expt = pd.DataFrame(intdens_expt - background_corr_expt)
        full_ctcf = pd.concat([sub_cntl, sub_expt], keys = ['Cntl', 'Expt'])
        full_ctcf.columns = ['CTCF']

        # Pull out TCFLef and H2BRFP values
        ctcf_expt_channel = full_ctcf.xs(expt_channel, level='Fluor')['CTCF'] 
        ctcf_cntl_channel = full_ctcf.xs(cntl_channel, level='Fluor')['CTCF'] 

        # Normalize for electroporation efficiency by determining TCFLef/RFP
        electroporation_norm = pd.DataFrame(ctcf_expt_channel / ctcf_cntl_channel)
        electroporation_norm.columns = ['CTCF']
        electroporation_norm.index.names = ['Side', 'Embryo', 'Section', 'ExptDate']

        # Average sections grouped by embryos before generating Expt/Cntl ratio
        averaged_sections = electroporation_norm.groupby(['Side','Embryo', 'ExptDate']).mean()

        # Pull out Cntl and Expt CTCFs
        ctcf_cntl = averaged_sections.xs('Cntl', level='Side')['CTCF'] 
        ctcf_expt = averaged_sections.xs('Expt', level='Side')['CTCF'] 

        # Generate ratios as Expt/Cntl
        ratios_sections = pd.DataFrame(ctcf_expt / ctcf_cntl)
        ratios_sections.columns = ['Expt/Cntl CTCF']

        # Normalize individual values to mean of control group
        norm_cntl = ctcf_cntl/(float(ctcf_cntl.mean()))
        norm_cntl = pd.DataFrame(norm_cntl)
        norm_cntl.columns = [cntl_construct + ' normCTCF']
        norm_expt = ctcf_expt/(float(ctcf_cntl.mean()))
        norm_expt = pd.DataFrame(norm_expt)
        norm_expt.columns = [expt_construct + ' normCTCF']

        # Combine processed values into single dataframe and output as csv file 'Results.csv'
        ctcf_cntl = pd.DataFrame(ctcf_cntl)
        ctcf_cntl.columns = ['Cntl CTCF']
        ctcf_expt = pd.DataFrame(ctcf_expt)
        ctcf_expt.columns = ['Expt CTCF']
        results = (pd.concat([ctcf_cntl, ctcf_expt, ratios_sections, norm_cntl, norm_expt], axis=1, sort=True)).reset_index()
        results['ID'] = results.ExptDate.str.cat(results.Embryo)
        results['Readout'] = str(expt_channel)
        list_.append(results)
    
full_results = pd.concat(list_)
full_results.to_csv(expt_construct + ' ' + expt_channel + ' normCTCF Results.csv')
full_results.head(10)

Unnamed: 0,Embryo,ExptDate,Cntl CTCF,Expt CTCF,Expt/Cntl CTCF,H2BRFP normCTCF,SMPD3FLAG normCTCF,ID,Readout
0,Emb1,20210830,0.784439,0.497574,0.634306,0.941358,0.597109,20210830Emb1,TCFLefH2Bd2EGFP
1,Emb3,20210830,1.378069,0.606512,0.440117,1.653738,0.727839,20210830Emb3,TCFLefH2Bd2EGFP
2,Emb4,20210830,0.337408,0.334423,0.991152,0.404903,0.401321,20210830Emb4,TCFLefH2Bd2EGFP
0,Emb3,20210824,3.482912,3.932463,1.129073,1.281019,1.446364,20210824Emb3,TCFLefH2Bd2EGFP
1,Emb4,20210824,1.107882,1.187456,1.071825,0.40748,0.436748,20210824Emb4,TCFLefH2Bd2EGFP
2,Emb5,20210824,3.115905,2.99343,0.960694,1.146033,1.100987,20210824Emb5,TCFLefH2Bd2EGFP
3,Emb6,20210824,2.912831,2.81711,0.967138,1.071342,1.036136,20210824Emb6,TCFLefH2Bd2EGFP
4,Emb7,20210824,2.974776,2.68383,0.902196,1.094126,0.987116,20210824Emb7,TCFLefH2Bd2EGFP


## Plot and perform statistical analysis

In [30]:
data = full_results.reset_index()

# Build Stripbox plot
stripbox = iqplot.stripbox(
                    # Data to plot
                        data=data,
                        q='Expt/Cntl CTCF', q_axis='y',
                        cats=['Readout'], 

                    # Plot details
                        jitter=True, jitter_kwargs=dict(width=0.3),
                        marker_kwargs=dict(alpha=0.8, size=8
#                                            ,color='darkgray'
                                          ),
                        box_kwargs=dict(line_color='black', line_width=1.5),
                        whisker_kwargs=dict(line_color='black', line_width=1.5),
                        median_kwargs=dict(line_color='black', line_width=2),
                        top_level='box',
                        frame_width=150, frame_height=300,

                    # Plot customizations
#                         order=targets,
                        y_range=(0,1.5),
                        y_axis_label='Normalized CTCF (Expt/Cntl)',
                        x_axis_label=expt_construct,
                        show_legend=False,
)

# Final customizations
stripbox.axis.axis_label_text_font_size = '16px'
stripbox.axis.major_label_text_font_size = '16px'
stripbox.axis.axis_label_text_font_style = 'bold'
stripbox.xaxis.major_label_text_font_style = 'italic'

# View plot
show(stripbox)

In [31]:
################### Isolate data for analysis ###################
# Pull out only cells and treaments of interest, and rename ROIs with the appropriate constructs
data=full_results.reset_index()
data=data.loc[data['Readout'] == expt_channel]
data=data.filter(['ID', cntl_construct + ' normCTCF', expt_construct + ' normCTCF'])
data=data.melt(id_vars=['ID'], var_name='ROI', value_name='Norm CTCF')
data['ROI'] = data['ROI'].str.replace(' normCTCF', '')

################### Plot as strip plot ###################
# Plot as strip plot
p1 = iqplot.strip(data=data
                ,q='Norm CTCF', q_axis='y'
                ,cats=['ROI'], parcoord_column='ID'
                ,y_range=(0,2)
                ,frame_height = 300, frame_width = 150
                ,y_axis_label= 'Normalized CTCF'
#                 ,color_column='Image'
                ,marker_kwargs=dict(size=5,color='black')
                ,parcoord_kwargs=dict(line_width=1,color='gray')
#                 ,show_legend=True
              )

# p1.axis.axis_label_text_font_style = 'bold italic'
p1.axis.axis_label_text_font_size = '14px'
p1.axis.major_label_text_font_size = '12px'
p1.axis.axis_label_text_font_style = 'normal'
p1.xaxis.major_label_orientation = 7

show(row(p1))

################### Perform statistical analysis ###################

# Perform Paired t test 
cntl = data.loc[data['ROI'] == cntl_construct]['Norm CTCF']
expt = data.loc[data['ROI'] == expt_construct]['Norm CTCF']
ttest = stats.ttest_rel(cntl,expt)

# Display test results
print('Paired t-test results: \n\t\t statistic=' + str(ttest[0]) + 
    '\n\t\t p-value=' + str(ttest[1]))

Paired t-test results: 
		 statistic=1.3095521076205425
		 p-value=0.2316888357457469
