# Import Modules

In [32]:
import os
import glob
import pandas as pd
import numpy as np
from scipy import stats

import seaborn as sns
import matplotlib.pyplot as plt

In [33]:
# Add experiment date here to apply to dataframe
analysis_date = '20210903'

path = os.path.abspath('')+'/csvs/'
full_df = pd.DataFrame()
list_ = []

# For loop to bring in files and concatenate them into a single dataframe


for file_ in glob.glob(path + "/*.csv"):
    df = pd.read_csv(file_)
    # Determine Image name from file name
    df['Image'] = os.path.splitext(os.path.basename(file_))[0]
    # Split values in ROI label
    df['Fluor'], df['ROI'] = zip(*df['Label'].map(lambda x: x.split(':')))
    # Split values in Image name column
    (df['ExptDate'], df['Treatment'], df['Dose'], df['Stains'], df['Embryo'], 
        df['Somites'], df['Section']) = zip(*df['Image'].map(lambda x: x.split('_')))
    list_.append(df)

full_df = pd.concat(list_)
full_df.head()

Unnamed: 0,Unnamed: 1,Label,Area,Mean,IntDen,RawIntDen,Image,Fluor,ROI,ExptDate,Treatment,Dose,Stains,Embryo,Somites,Section
0,1,H2BRFP:background,11.594,8.973,104.037,2019.0,20210830_SMPD3FLAG_3ugul_BF;Pax7;H2BRFP;TCFLef...,H2BRFP,background,20210830,SMPD3FLAG,3ugul,BF;Pax7;H2BRFP;TCFLefH2Bd2EGFP,Emb1,8ss,sec1
1,2,H2BRFP:background,14.067,8.707,122.484,2377.0,20210830_SMPD3FLAG_3ugul_BF;Pax7;H2BRFP;TCFLef...,H2BRFP,background,20210830,SMPD3FLAG,3ugul,BF;Pax7;H2BRFP;TCFLefH2Bd2EGFP,Emb1,8ss,sec1
2,3,H2BRFP:background,7.42,9.153,67.915,1318.0,20210830_SMPD3FLAG_3ugul_BF;Pax7;H2BRFP;TCFLef...,H2BRFP,background,20210830,SMPD3FLAG,3ugul,BF;Pax7;H2BRFP;TCFLefH2Bd2EGFP,Emb1,8ss,sec1
3,4,H2BRFP:Cntl,4296.436,66.936,287585.668,5581045.0,20210830_SMPD3FLAG_3ugul_BF;Pax7;H2BRFP;TCFLef...,H2BRFP,Cntl,20210830,SMPD3FLAG,3ugul,BF;Pax7;H2BRFP;TCFLefH2Bd2EGFP,Emb1,8ss,sec1
4,5,H2BRFP:Expt,4491.216,68.131,305989.869,5938207.0,20210830_SMPD3FLAG_3ugul_BF;Pax7;H2BRFP;TCFLef...,H2BRFP,Expt,20210830,SMPD3FLAG,3ugul,BF;Pax7;H2BRFP;TCFLefH2Bd2EGFP,Emb1,8ss,sec1


In [42]:
# Get a list of treatments and dates
treatment_list = full_df.Treatment.unique().tolist()
date_list = full_df.ExptDate.unique().tolist()

control_treat = 'H2B-RFP'
experiment_treat = 'nSMase2-FLAG'

# Parse out specific somite stages
data = full_df.loc[full_df['Somites'].isin(['7ss','8ss', '9ss'])]

# Mean background values and group by Treatment, Embryo, Fluor, ROI and Section
mean_sections = ((data.groupby(['ExptDate', 'Treatment', 'Dose', 'Embryo', 'Somites', 'Fluor', 'ROI', 'Section'])
                  ['Area', 'Mean', 'IntDen']).mean())

# Loop trough treatments, performing each analysis and exporting CSV file for each treatment
full_results = pd.DataFrame()
list_ = []

for j in date_list:
    date = j
    df_date = pd.DataFrame(mean_sections.xs(date))
    
    for i in treatment_list:
        # Slice dataframe to process only embryos with given treatment
        treatment = i
        df_treatment = pd.DataFrame(df_date.xs(treatment))

        # Determine CTCF values = ROI IntDen - (background mean * ROI area)
        # Calculate background (background mean * ROI area)
        background_corr_cntl = (df_treatment.xs('background', level='ROI')['Mean'] 
                        * df_treatment.xs('Cntl', level='ROI')['Area'])
        background_corr_expt = (df_treatment.xs('background', level='ROI')['Mean'] 
                        * df_treatment.xs('Expt', level='ROI')['Area'])

        # Slice out only Cntl or Expt values in IntDen
        intdens_cntl = df_treatment.xs('Cntl', level='ROI')['IntDen'] 
        intdens_expt = df_treatment.xs('Expt', level='ROI')['IntDen'] 

        # Subtract background from IntDens to determine CTCF and concatenate into single dataframe
        sub_cntl = pd.DataFrame(intdens_cntl - background_corr_cntl)
        sub_expt = pd.DataFrame(intdens_expt - background_corr_expt)
        full_ctcf = pd.concat([sub_cntl, sub_expt], keys = ['Cntl', 'Expt'])
        full_ctcf.columns = ['CTCF']

        # Pull out TCFLef and H2BRFP values
        ctcf_tcflef = full_ctcf.xs('TCFLefH2Bd2EGFP', level='Fluor')['CTCF'] 
        ctcf_RFP = full_ctcf.xs('H2BRFP', level='Fluor')['CTCF'] 

        # Normalize for electroporation efficiency by determining TCFLef/RFP
        electroporation_norm = pd.DataFrame(ctcf_tcflef / ctcf_RFP)
        electroporation_norm.columns = ['CTCF']
        electroporation_norm.index.names = ['Side', 'Dose', 'Embryo', 'Somites', 'Section']

        # Average sections grouped by embryos before generating Expt/Cntl ratio
        averaged_sections = electroporation_norm.groupby(['Side','Embryo', 'Somites']).mean()

        # Pull out Cntl and Expt CTCFs
        ctcf_cntl = averaged_sections.xs('Cntl', level='Side')['CTCF'] 
        ctcf_expt = averaged_sections.xs('Expt', level='Side')['CTCF'] 

        # Generate ratios as Expt/Cntl
        ratios_sections = pd.DataFrame(ctcf_expt / ctcf_cntl)
        ratios_sections.columns = ['Expt/Cntl CTCF']

        # Normalize individual values to mean of control group
        norm_cntl = ctcf_cntl/(float(ctcf_cntl.mean()))
        norm_cntl = pd.DataFrame(norm_cntl)
        norm_cntl.columns = [control_treat]
        norm_expt = ctcf_expt/(float(ctcf_cntl.mean()))
        norm_expt = pd.DataFrame(norm_expt)
        norm_expt.columns = [experiment_treat]

        # Combine processed values into single dataframe and output as csv file 'Results.csv'
        ctcf_cntl = pd.DataFrame(ctcf_cntl)
        ctcf_cntl.columns = ['Cntl CTCF']
        ctcf_expt = pd.DataFrame(ctcf_expt)
        ctcf_expt.columns = ['Expt CTCF']
        results = (pd.concat([ctcf_cntl, ctcf_expt, ratios_sections, norm_cntl, norm_expt], axis=1, sort=True)).reset_index()
        list_.append(results)
        
full_results = pd.concat(list_)
full_results.to_csv(analysis_date + '_' + treatment + '_CTCFResults.csv')

full_results

Unnamed: 0,Embryo,Somites,Cntl CTCF,Expt CTCF,Expt/Cntl CTCF,H2B-RFP,nSMase2-FLAG
0,Emb1,8ss,0.784439,0.497574,0.634306,0.827669,0.524995
1,Emb2,7ss,1.291158,1.147399,0.888659,1.362314,1.210632
2,Emb3,8ss,1.378069,0.606512,0.440117,1.454014,0.639937
3,Emb4,8ss,0.337408,0.334423,0.991152,0.356003,0.352853
0,Emb2,7ss,2.363766,2.730684,1.155226,0.888741,1.026697
1,Emb3,8ss,3.482912,3.932463,1.129073,1.309524,1.478548
2,Emb4,8ss,1.107882,1.187456,1.071825,0.416547,0.446466
3,Emb5,8ss,3.115905,2.99343,0.960694,1.171534,1.125486
4,Emb6,8ss,2.912831,2.81711,0.967138,1.095181,1.059192
5,Emb7,8ss,2.974776,2.68383,0.902196,1.118472,1.009081


In [36]:

pd.DataFrame(mean_sections.xs(date))

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,Unnamed: 7_level_0,Area,Mean,IntDen
Treatment,ExptDate,Dose,Embryo,Somites,Fluor,ROI,Section,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
SMPD3FLAG,20210824,3ugul,Emb3,8ss,H2BRFP,Cntl,sec1,4058.836000,87.646000,3.557425e+05
SMPD3FLAG,20210824,3ugul,Emb3,8ss,H2BRFP,Cntl,sec2,6258.609000,86.210000,5.395557e+05
SMPD3FLAG,20210824,3ugul,Emb3,8ss,H2BRFP,Cntl,sec3,5357.367000,127.443000,6.827616e+05
SMPD3FLAG,20210824,3ugul,Emb3,8ss,H2BRFP,Expt,sec1,3065.151000,50.094000,1.535449e+05
SMPD3FLAG,20210824,3ugul,Emb3,8ss,H2BRFP,Expt,sec2,5014.648000,60.936000,3.055724e+05
SMPD3FLAG,20210824,3ugul,Emb3,8ss,H2BRFP,Expt,sec3,3347.272000,40.632000,1.360048e+05
SMPD3FLAG,20210824,3ugul,Emb3,8ss,H2BRFP,background,sec1,22.260667,8.811667,1.967377e+02
SMPD3FLAG,20210824,3ugul,Emb3,8ss,H2BRFP,background,sec2,34.730333,8.861667,3.076110e+02
SMPD3FLAG,20210824,3ugul,Emb3,8ss,H2BRFP,background,sec3,24.424667,8.847667,2.145323e+02
SMPD3FLAG,20210824,3ugul,Emb3,8ss,TCFLefH2Bd2EGFP,Cntl,sec1,4058.836000,306.951000,1.245864e+06
