# Analyze Sox9 and Msx1 HCR Intensity

## Import Modules

In [1]:
# Import data handling and analysis packages
import os
import glob
import pandas as pd
from scipy import stats

# Import plotting packages
import iqplot
import bokeh.io
from bokeh.io import output_file, show
from bokeh.layouts import column, row
bokeh.io.output_notebook()

## Import and parse raw data

In [2]:
# Navigate to CSV path
path = os.path.abspath('')+'/raw_data_csvs/'
full_df = pd.DataFrame()
list_ = []

for file_ in glob.glob(path + "/*.csv"):         # For loop to bring in files and concatenate them into a single dataframe
    df = pd.read_csv(file_)
    df['Image'] = os.path.splitext(os.path.basename(file_))[0]                      # Determine Image name from file name
    df['Stain'], df['ROI'] = zip(*df['Label'].map(lambda x: x.split(':')))          # Split values in ROI label
    (df['ExptDate'], df['Treatment'], df['Dose'], df['Stains'], df['Embryo'],                   # Split values in Image name column
        df['Somites'], df['Mag']) = zip(*df['Image'].map(lambda x: x.split('_')))
    list_.append(df)

full_df = pd.concat(list_)
full_df.head()

Unnamed: 0,Unnamed: 1,Label,Area,Mean,Min,Max,IntDen,RawIntDen,Image,Stain,ROI,ExptDate,Treatment,Dose,Stains,Embryo,Somites,Mag
0,1,Sox9:background,98.317,26.589,23,31,2614.169,12683.0,"20200115_nSMase2MO_0,8mM_Sox9,Msx1,GFP,BF_Emb4...",Sox9,background,20200115,nSMase2MO,"0,8mM","Sox9,Msx1,GFP,BF",Emb4,8ss,10x
1,2,Sox9:background,204.673,26.344,23,31,5391.995,26160.0,"20200115_nSMase2MO_0,8mM_Sox9,Msx1,GFP,BF_Emb4...",Sox9,background,20200115,nSMase2MO,"0,8mM","Sox9,Msx1,GFP,BF",Emb4,8ss,10x
2,3,Sox9:background,247.958,26.501,22,31,6571.184,31881.0,"20200115_nSMase2MO_0,8mM_Sox9,Msx1,GFP,BF_Emb4...",Sox9,background,20200115,nSMase2MO,"0,8mM","Sox9,Msx1,GFP,BF",Emb4,8ss,10x
3,4,Sox9:Cntl,21012.083,363.043,50,1339,7628282.874,37009659.0,"20200115_nSMase2MO_0,8mM_Sox9,Msx1,GFP,BF_Emb4...",Sox9,Cntl,20200115,nSMase2MO,"0,8mM","Sox9,Msx1,GFP,BF",Emb4,8ss,10x
4,5,Sox9:Expt,14998.237,143.132,47,834,2146729.264,10415151.0,"20200115_nSMase2MO_0,8mM_Sox9,Msx1,GFP,BF_Emb4...",Sox9,Expt,20200115,nSMase2MO,"0,8mM","Sox9,Msx1,GFP,BF",Emb4,8ss,10x


## Calculate Corrected Total Cellular Fluorescence (CTCF) for each ROI

This will determine the fluorescence in each ROI (expression level), corrected for size and background intensity in the image, and perform this analysis for each stain in the dataset

In [3]:
# Get a list of treatments and stains
treatment_list = full_df.Treatment.unique()
treatment_list = treatment_list.tolist()
stain_list = full_df.Stain.unique()
stain_list = stain_list.tolist()

# Mean background values and group by Treatment, Embryo, Fluor, ROI and Section
mean_sections = ((full_df.groupby(['Stain', 'Treatment', 'Embryo', 'ROI', 'ExptDate'])
                  ['Area', 'Mean', 'IntDen']).mean())

# Loop through stains, performing the following analysis
for j in stain_list:
    stain = j
    df_stain = pd.DataFrame(mean_sections.xs(stain))
    
    # Loop trough treatments, performing each analysis and exporting CSV file for each treatment
    for i in treatment_list:
        # Slice dataframe to process only embryos with given treatment
        treatment = i
        df_treatment = pd.DataFrame(df_stain.xs(treatment))

        # Determine CTCF values = ROI IntDen - (background mean * ROI area)
        # Calculate background (background mean * ROI area)
        background_corr_cntl = (df_treatment.xs('background', level='ROI')['Mean'] 
                        * df_treatment.xs('Cntl', level='ROI')['Area'])
        background_corr_expt = (df_treatment.xs('background', level='ROI')['Mean'] 
                        * df_treatment.xs('Expt', level='ROI')['Area'])

        # Slice out only Cntl or Expt values in IntDen
        intdens_cntl = df_treatment.xs('Cntl', level='ROI')['IntDen'] 
        intdens_expt = df_treatment.xs('Expt', level='ROI')['IntDen'] 

        # Subtract background from IntDens to determine CTCF and concatenate into single dataframe
        sub_cntl = pd.DataFrame(intdens_cntl - background_corr_cntl)
        sub_expt = pd.DataFrame(intdens_expt - background_corr_expt)
        full_ctcf = pd.concat([sub_cntl, sub_expt], keys = ['Cntl', 'Expt'])
        full_ctcf.columns = ['CTCF']

        # Combine raw values, generate ratio
        ctcf_cntl = full_ctcf.xs('Cntl').reset_index()
        ctcf_cntl.rename(columns={'CTCF':'Cntl CTCF'}, inplace=True)
        ctcf_expt = full_ctcf.xs('Expt').reset_index()
        ctcf_expt.rename(columns={'CTCF':'Expt CTCF'}, inplace=True)
        results = pd.concat([ctcf_cntl,ctcf_expt], axis=1)
        results['Expt/Cntl CTCF'] = ctcf_expt['Expt CTCF'] / ctcf_cntl['Cntl CTCF']
        results = results.loc[:,~results.columns.duplicated()]
        results = results.groupby(['Embryo', 'ExptDate']).mean().reset_index()

        # Normalize all migration area values to mean of control group
        norm_cntl = pd.DataFrame(results['Cntl CTCF']/(float(results['Cntl CTCF'].mean())))
        norm_cntl.rename(columns={'Cntl CTCF':'Norm Cntl CTCF'}, inplace=True)
        norm_expt = pd.DataFrame(results['Expt CTCF']/(float(results['Cntl CTCF'].mean())))
        norm_expt.rename(columns={'Expt CTCF':'Norm Expt CTCF'}, inplace=True)
        norm_expt.columns = ['Norm Expt CTCF']
        results = pd.concat([results, norm_cntl, norm_expt], axis=1, sort=False)
        results['EmbID'] = results['ExptDate'] + results['Embryo']
        results.to_csv(stain + '_' + treatment + '_CTCF.csv')

## Plot and perform statistical analysis - Sox9

In [4]:
data = pd.read_csv('Sox9_nSMase2MO_CTCF.csv').reset_index()

# Build Stripbox plot
stripbox = iqplot.stripbox(
                    # Data to plot
                        data=data,
                        q='Expt/Cntl CTCF', q_axis='y',

                    # Plot details
                        jitter=True, jitter_kwargs=dict(width=0.3),
                        marker_kwargs=dict(alpha=0.8, size=8
#                                            ,color='darkgray'
                                          ),
                        box_kwargs=dict(line_color='black', line_width=1.5),
                        whisker_kwargs=dict(line_color='black', line_width=1.5),
                        median_kwargs=dict(line_color='black', line_width=2),
                        top_level='box',
                        frame_width=150, frame_height=300,

                    # Plot customizations
#                         order=targets,
                        y_range=(0,2),
                        y_axis_label='Normalized CTCF (Expt/Cntl)',
                        show_legend=False,
)

# Final customizations
stripbox.axis.axis_label_text_font_size = '16px'
stripbox.axis.major_label_text_font_size = '16px'
stripbox.axis.axis_label_text_font_style = 'bold'
stripbox.xaxis.major_label_text_font_style = 'italic'

# View plot
show(stripbox)

In [5]:
################### Isolate data for analysis ###################
# Pull out only cells and treaments of interest, and rename ROIs with the appropriate constructs
data = pd.read_csv('Sox9_nSMase2MO_CTCF.csv').reset_index()
data=data.filter(['EmbID', 'Norm Cntl CTCF', 'Norm Expt CTCF'])
data=data.melt(id_vars=['EmbID'], var_name='ROI', value_name='Norm CTCF')

################### Plot as strip plot ###################
# Plot as strip plot
p1 = iqplot.strip(data=data
                ,q='Norm CTCF', q_axis='y'
                ,cats=['ROI'], parcoord_column='EmbID'
                ,y_range=(0,3.5)
                ,frame_height = 300, frame_width = 150
                ,y_axis_label= 'Normalized CTCF'
#                 ,color_column='Image'
                ,marker_kwargs=dict(size=5,color='black')
                ,parcoord_kwargs=dict(line_width=1,color='gray')
#                 ,show_legend=True
              )

# p1.axis.axis_label_text_font_style = 'bold italic'
p1.axis.axis_label_text_font_size = '14px'
p1.axis.major_label_text_font_size = '12px'
p1.axis.axis_label_text_font_style = 'normal'
p1.xaxis.major_label_orientation = 7

show(row(p1))

################### Perform statistical analysis ###################

# Perform Paired t test 
cntl = data.loc[data['ROI'] == 'Norm Cntl CTCF']['Norm CTCF']
expt = data.loc[data['ROI'] == 'Norm Expt CTCF']['Norm CTCF']
ttest = stats.ttest_rel(cntl,expt)

# Display test results
print('Paired t-test results: \n\t\t statistic=' + str(ttest[0]) + 
    '\n\t\t p-value=' + str(ttest[1]))

Paired t-test results: 
		 statistic=3.187747931995409
		 p-value=0.0188899865088981


## Plot and perform statistical analysis - Msx1

In [6]:
data = pd.read_csv('Msx1_nSMase2MO_CTCF.csv').reset_index()

# Build Stripbox plot
stripbox = iqplot.stripbox(
                    # Data to plot
                        data=data,
                        q='Expt/Cntl CTCF', q_axis='y',

                    # Plot details
                        jitter=True, jitter_kwargs=dict(width=0.3),
                        marker_kwargs=dict(alpha=0.8, size=8
#                                            ,color='darkgray'
                                          ),
                        box_kwargs=dict(line_color='black', line_width=1.5),
                        whisker_kwargs=dict(line_color='black', line_width=1.5),
                        median_kwargs=dict(line_color='black', line_width=2),
                        top_level='box',
                        frame_width=150, frame_height=300,

                    # Plot customizations
#                         order=targets,
                        y_range=(0,2),
                        y_axis_label='Normalized CTCF (Expt/Cntl)',
                        show_legend=False,
)

# Final customizations
stripbox.axis.axis_label_text_font_size = '16px'
stripbox.axis.major_label_text_font_size = '16px'
stripbox.axis.axis_label_text_font_style = 'bold'
stripbox.xaxis.major_label_text_font_style = 'italic'

# View plot
show(stripbox)

In [7]:
################### Isolate data for analysis ###################
# Pull out only cells and treaments of interest, and rename ROIs with the appropriate constructs
data = pd.read_csv('Msx1_nSMase2MO_CTCF.csv').reset_index()
data=data.filter(['EmbID', 'Norm Cntl CTCF', 'Norm Expt CTCF'])
data=data.melt(id_vars=['EmbID'], var_name='ROI', value_name='Norm CTCF')

################### Plot as strip plot ###################
# Plot as strip plot
p1 = iqplot.strip(data=data
                ,q='Norm CTCF', q_axis='y'
                ,cats=['ROI'], parcoord_column='EmbID'
                ,y_range=(0,2)
                ,frame_height = 300, frame_width = 150
                ,y_axis_label= 'Normalized CTCF'
#                 ,color_column='Image'
                ,marker_kwargs=dict(size=5,color='black')
                ,parcoord_kwargs=dict(line_width=1,color='gray')
#                 ,show_legend=True
              )

# p1.axis.axis_label_text_font_style = 'bold italic'
p1.axis.axis_label_text_font_size = '14px'
p1.axis.major_label_text_font_size = '12px'
p1.axis.axis_label_text_font_style = 'normal'
p1.xaxis.major_label_orientation = 7

show(row(p1))

################### Perform statistical analysis ###################

# Perform Paired t test 
cntl = data.loc[data['ROI'] == 'Norm Cntl CTCF']['Norm CTCF']
expt = data.loc[data['ROI'] == 'Norm Expt CTCF']['Norm CTCF']
ttest = stats.ttest_rel(cntl,expt)

# Display test results
print('Paired t-test results: \n\t\t statistic=' + str(ttest[0]) + 
    '\n\t\t p-value=' + str(ttest[1]))

Paired t-test results: 
		 statistic=2.6014156408304454
		 p-value=0.04058509801704345
