# SMPD3 Enh3 Activity after Sox10 Bing Site Mutagenesis
## Fluorescence intensity measurements

Analyze whole mount fluorescent intensity measurements. This script with import raw measurements from Fiji, then normalize results based on electroporation efficiency.

In [31]:
# Import data handling and analysis packages
import os
import glob
import pandas as pd
from scipy import stats


# Import plotting packages
import iqplot
import bokeh.io
from bokeh.models import Span, Title
from bokeh.io import output_file, show
from bokeh.layouts import column, row
bokeh.io.output_notebook()

## Import source data

In [32]:
## Navigate to CSV path
path = os.path.abspath('')+'/raw_source_data/'
full_df = pd.DataFrame()
list_ = []

for file_ in glob.glob(path + "/*.csv"):         # For loop to bring in files and concatenate them into a single dataframe
    df = pd.read_csv(file_)
    df['Image'] = os.path.splitext(os.path.basename(file_))[0]                      # Determine Image name from file name
    (df['ExptDate'], df['Treatment'], df['Stains'], df['Embryo'],                   # Split values in Image name column
         df['Somites'], df['Mag']) = zip(*df['Image'].map(lambda x: x.split('_')))
    df['EmbID'] = df['ExptDate'] + '_' + df['Embryo']
    df['Fluor'] = df['Label'].map(lambda x: x.split(':')[0])                       # Split values in ROI label
    df['ROI'] = df['Label'].map(lambda x: x.split(':')[1])                          # Split values in ROI label
    list_.append(df)

full_df = pd.concat(list_)
full_df.head()

Unnamed: 0,Unnamed: 1,Label,Area,Mean,IntDen,RawIntDen,Image,ExptDate,Treatment,Stains,Embryo,Somites,Mag,EmbID,Fluor,ROI
0,1,RFP:CntlArea,106625.456,49.483,5276103.0,6399434.0,20211116_Enh3delSox10_BF;Pax7;RFP;GFP_Emb5_9ss...,20211116,Enh3delSox10,BF;Pax7;RFP;GFP,Emb5,9ss,5xMIP,20211116_Emb5,RFP,CntlArea
1,2,RFP:ExptArea,96808.563,13.074,1265660.0,1535131.0,20211116_Enh3delSox10_BF;Pax7;RFP;GFP_Emb5_9ss...,20211116,Enh3delSox10,BF;Pax7;RFP;GFP,Emb5,9ss,5xMIP,20211116_Emb5,RFP,ExptArea
2,3,EGFP:CntlArea,106625.456,197.519,21060590.0,25544590.0,20211116_Enh3delSox10_BF;Pax7;RFP;GFP_Emb5_9ss...,20211116,Enh3delSox10,BF;Pax7;RFP;GFP,Emb5,9ss,5xMIP,20211116_Emb5,EGFP,CntlArea
3,4,EGFP:ExptArea,96808.563,172.385,16688310.0,20241411.0,20211116_Enh3delSox10_BF;Pax7;RFP;GFP_Emb5_9ss...,20211116,Enh3delSox10,BF;Pax7;RFP;GFP,Emb5,9ss,5xMIP,20211116_Emb5,EGFP,ExptArea
0,1,RFP:CntlArea,94233.762,111.944,10548920.0,12794887.0,20211116_Enh3delSox10_BF;Pax7;RFP;GFP_Emb9_8ss...,20211116,Enh3delSox10,BF;Pax7;RFP;GFP,Emb9,8ss,5xMIP,20211116_Emb9,RFP,CntlArea


## Extract, organize, and analyse results

- Pull out raw results and calculate Corrected Total Cellular Fluorescence (CTCF) -> output = no_norm_results
- Normalize results to electroporation efficiency by calculating RFP/GFP signal -> output = elec_norm_results
- Generate table with ratios of Experiment/Control values -> output = ratio_results

In [33]:
# Define control and experimental constructs and copy out raw data to analyze
cntl_construct = 'Enh3::RFP'
expt_construct = 'Enh3ΔSox10::RFP'
data_df = full_df.copy()
 
# Initialize for final dataframe collection
no_norm_results_list = []

# Loop through fluorescent target channels:
fluor_list = full_df.Fluor.unique().tolist()
for fluor in fluor_list:
    df_fluor = data_df.loc[data_df['Fluor'] == fluor][['ExptDate','Fluor','EmbID','Treatment',
                                                       'Somites','ROI','IntDen','RawIntDen','Mean','Area']]

    # Loop through embryos:
    embryo_results_list = []          
    embryo_list = df_fluor.EmbID.unique().tolist()
    for embryo in embryo_list:
        df_embryo = df_fluor.loc[df_fluor['EmbID'] == embryo]
        cntl_intden = float(df_embryo.loc[df_embryo['ROI'] == 'CntlArea']['IntDen'])
        expt_intden = float(df_embryo.loc[df_embryo['ROI'] == 'ExptArea']['IntDen'])

        # Assemble output df from specific values in each embryo dataset, and append to building list of embryo dfs
        data = {'Fluor': [fluor, fluor], 'EmbID': [embryo, embryo] 
                ,'Treatment': [df_embryo.tail(1)['Treatment'].values[0], df_embryo.tail(1)['Treatment'].values[0]]
                ,'Somites': [df_embryo.tail(1)['Somites'].values[0], df_embryo.tail(1)['Somites'].values[0]]
                ,'ROI': [cntl_construct, expt_construct]
                ,'IntDen': [cntl_intden, expt_intden]
               }
        embryo_results_list.append(pd.DataFrame(data))

    # Combine embryo 
    no_norm_results_list.append(pd.concat(embryo_results_list, sort=False).reset_index().drop('index', axis=1))

# Assemble the final unnormalized results
no_norm_results = pd.concat(no_norm_results_list,sort=False).reset_index().drop('index', axis=1)

# Pull out separate channel DFs for electroporation normalization "ElecNorm" (Enh3 RFP/GFP)
elec_norm_results = no_norm_results.loc[no_norm_results['Fluor'] == 'RFP'].reset_index().drop(['index'], axis=1)
gfp_results = no_norm_results.loc[no_norm_results['Fluor'] == 'EGFP'].reset_index().drop(['index'], axis=1)
elec_norm_results['ElecNormIntDen'] = elec_norm_results['IntDen']/gfp_results['IntDen']

# Pull out ROIs to produce ratio results (Experiment/Control values)
ratio_results = elec_norm_results.loc[elec_norm_results['ROI'] == expt_construct].reset_index().drop(['index'], axis=1)
cntl_side_results = elec_norm_results.loc[elec_norm_results['ROI'] == cntl_construct].reset_index().drop(['index'], axis=1)
ratio_results['ratio IntDen'] = ratio_results['IntDen']/cntl_side_results['IntDen']
ratio_results['ratio ElecNormIntDen'] = ratio_results['ElecNormIntDen']/cntl_side_results['ElecNormIntDen']
ratio_results = ratio_results.drop(['IntDen'], axis=1)
ratio_results.head(10)

Unnamed: 0,Fluor,EmbID,Treatment,Somites,ROI,ElecNormIntDen,ratio IntDen,ratio ElecNormIntDen
0,RFP,20211116_Emb5,Enh3delSox10,9ss,Enh3ΔSox10::RFP,0.075841,0.239885,0.302735
1,RFP,20211116_Emb9,Enh3delSox10,8ss,Enh3ΔSox10::RFP,0.071124,0.108157,0.2317
2,RFP,20211206_Emb1,Enh3delSox10,8ss,Enh3ΔSox10::RFP,0.279064,0.307666,0.589961
3,RFP,20211206_Emb5,Enh3delSox10,7ss,Enh3ΔSox10::RFP,0.45372,0.338117,0.712951
4,RFP,20211116_Emb3,Enh3delSox10,8ss,Enh3ΔSox10::RFP,0.068707,0.127315,0.299935
5,RFP,20211116_Emb10,Enh3delSox10,11ss,Enh3ΔSox10::RFP,0.085204,0.217456,0.406476
6,RFP,20211116_Emb2,Enh3delSox10,11ss,Enh3ΔSox10::RFP,0.077828,0.73865,0.407974
7,RFP,20211116_Emb6,Enh3delSox10,10ss,Enh3ΔSox10::RFP,0.058906,0.398594,0.249257
8,RFP,20211116_Emb11,Enh3delSox10,9ss,Enh3ΔSox10::RFP,0.157281,0.232105,0.404578
9,RFP,20211206_Emb4,Enh3delSox10,10ss,Enh3ΔSox10::RFP,0.546622,0.640123,0.692664


## Plot ratio values

- Plot measured vallues as parallel coordinate plot showing control vs experimental sides
- Plot stripbox plot with the corresponding ratio results
- Perform two-tailed paired t test to determine if we can reject the null hypothesis

In [34]:
################### Isolate data for analysis ###################
# Annotate data further to plot 
cntl_construct = 'Enh3::RFP'
expt_construct = 'Enh3ΔSox10::RFP'
metric_parcoord = 'ElecNormIntDen'
metric_ratio = 'ratio ElecNormIntDen'
dataset_to_plot_parcoord = elec_norm_results 
dataset_to_plot_ratio = ratio_results

# Target to parse:
target = ['RFP']
stages = ['8ss', '9ss', '10ss', '11ss']

# Pull out only cells and treaments of interest, and rename ROIs with the appropriate constructs
df_parcoord = dataset_to_plot_parcoord.loc[dataset_to_plot_parcoord['Fluor'].isin(target)].copy()
df_parcoord.replace(to_replace = {'Cntl': cntl_construct, 'Expt': expt_construct}, inplace=True)
df_parcoord = df_parcoord.loc[df_parcoord['Somites'].isin(stages)].copy()

df_ratio = dataset_to_plot_ratio.loc[dataset_to_plot_ratio['Fluor'].isin(target)].copy()
df_ratio = df_ratio.loc[df_ratio['Somites'].isin(stages)].copy()

################### Plot as parallel coordinate plot ###################
# Plot as strip plot
# p1 = iqplot.strip(
#                 # Data to plot
#                 data=df_parcoord
#                 ,q=metric_parcoord, q_axis='y'
#                 ,cats='ROI', parcoord_column='EmbID'
#                 # Plot customizations
#                 ,marker_kwargs=dict(alpha=1, size=10, color='dimgray', line_color='white')
#                 ,parcoord_kwargs=dict(line_width=1,color='gray')
#                 ,show_legend=True   
# #                 ,y_range=(-2,2)
#                 ,frame_height = 400, frame_width = 300
#                 ,x_axis_label='Treatment'
# #                 ,tooltips=[("Embryo", "@EmbID"), ]
#               )
# # Final customizations
# p1.axis.axis_label_text_font_size = '16px'
# p1.axis.major_label_text_font_size = '16px'
# p1.axis.axis_label_text_font_style = 'bold'
# p1.xaxis.major_label_text_font_style = 'italic'
# p1.legend.location = 'top_right'

################### Plot as stripbox plot ###################
# Build Stripbox plot
p2 = iqplot.stripbox(
                # Data to plot
                data=df_ratio
                ,q=metric_ratio, q_axis='y'
                ,cats='ROI' 
                # Plot customizations
                ,jitter=True ,jitter_kwargs=dict(width=0.3)
                ,marker_kwargs=dict(alpha=1, size=10, color='dimgray', line_color='white')
                ,box_kwargs=dict(line_color='black', line_width=1.5,fill_color='white', fill_alpha=1)
                ,whisker_kwargs=dict(line_color='black', line_width=1.5)
                ,median_kwargs=dict(line_color='black', line_width=2)
                ,top_level='strip'
                ,show_legend=False
                ,y_range=(0,1.5)
                ,min_data=3
                ,frame_height = 400, frame_width = 150
                ,x_axis_label='Enhancer Construct'
                ,y_axis_label='RFP Expression Relative to Control'
                ,tooltips=[("Embryo", "@EmbID"), ]
)
# Final customizations
p2.axis.axis_label_text_font_size = '16px'
p2.axis.major_label_text_font_size = '16px'
p2.axis.axis_label_text_font_style = 'bold'
p2.xaxis.major_label_text_font_style = 'italic'
vline = Span(location=1,dimension='width', level='underlay',
             line_color='darkgray',line_width=2)
p2.add_layout(vline)

################### Display plots ###################
show(row( p2))

################### Perform statistical analysis ###################
# Perform Paired t test 
cntl = df_parcoord.loc[df_parcoord['ROI'] == cntl_construct][metric_parcoord]
expt = df_parcoord.loc[df_parcoord['ROI'] == expt_construct][metric_parcoord]
ttest = stats.ttest_rel(cntl,expt)

# Display test results
print('Paired t-test results: \n\t\t statistic = ' + str(ttest[0]) + 
    '\n\t\t p-value = ' + str(ttest[1]))
print('n = ' + str(len(cntl)) + ' embryos')



Paired t-test results: 
		 statistic = 11.70026189643754
		 p-value = 2.5990512609339157e-06
n = 9 embryos


In [36]:
expt_side = 'Enh3ΔSox10::RFP'
cntl_side = 'Enh3::RFP'
output_file = 'Enh3_delSox10_Data'

# Prepare data for exporting to Prism
df_prism = df_parcoord.pivot(index=('Fluor','EmbID','Treatment','Somites'), 
                             columns='ROI', values='ElecNormIntDen').reset_index()
df_prism['Norm '+ cntl_side] = df_prism[cntl_side]/df_prism[cntl_side].mean()
df_prism['Norm '+ expt_side] = df_prism[expt_side]/df_prism[cntl_side].mean()
df_prism['Norm Ratio'] = (df_prism[expt_side]/df_prism[cntl_side])

df_prism.to_csv(output_file+'.csv')
