# Analyze Snai2 and Cad6B Staining Intensity

### Import Modules

In [2]:
# Import packages
import datetime
import os
import glob
import pandas as pd
import numpy as np

# Import plotting packages
import matplotlib as mpl
import seaborn as sns

print("matplotlib v{}".format(mpl.__version__))
print("seaborn v{}".format(sns.__version__))

matplotlib v3.3.2
seaborn v0.11.0


### Assemble image data into a single dataframe

In [7]:
# Navigate to CSV path
path = os.path.abspath('')+'/csvs/'
full_df = pd.DataFrame()
list_ = []

for file_ in glob.glob(path + "/*.csv"):         # For loop to bring in files and concatenate them into a single dataframe
    df = pd.read_csv(file_)
    df['Image'] = os.path.splitext(os.path.basename(file_))[0]                      # Determine Image name from file name
    df['Stain'], df['ROI'] = zip(*df['Label'].map(lambda x: x.split(':')))          # Split values in ROI label
    (df['ExptDate'], df['Treatment'], df['Dose'], df['Stains'], df['Embryo'],                   # Split values in Image name column
        df['Somites'], df['Section']) = zip(*df['Image'].map(lambda x: x.split('_')))
    df['EmbID'] = df['ExptDate'] + '_' + df['Stains'] + '_' + df['Embryo'] + '_' + df['Somites']
    list_.append(df)

full_df = pd.concat(list_)
full_df.head()

Unnamed: 0,Unnamed: 1,Label,Area,Mean,IntDen,RawIntDen,Image,Stain,ROI,ExptDate,Treatment,Dose,Stains,Embryo,Somites,Section,EmbID
0,1,H2BRFP:background,19.787,8.708,172.313,3344.0,20210824_SMPD3FLAG_3ugul_BF;Pax7;H2BRFP;TCFLef...,H2BRFP,background,20210824,SMPD3FLAG,3ugul,BF;Pax7;H2BRFP;TCFLefH2Bd2EGFP;Snai2,Emb4,8ss,sec1,20210824_BF;Pax7;H2BRFP;TCFLefH2Bd2EGFP;Snai2_...
1,2,H2BRFP:background,27.516,8.712,239.713,4652.0,20210824_SMPD3FLAG_3ugul_BF;Pax7;H2BRFP;TCFLef...,H2BRFP,background,20210824,SMPD3FLAG,3ugul,BF;Pax7;H2BRFP;TCFLefH2Bd2EGFP;Snai2,Emb4,8ss,sec1,20210824_BF;Pax7;H2BRFP;TCFLefH2Bd2EGFP;Snai2_...
2,3,H2BRFP:background,18.705,8.807,164.738,3197.0,20210824_SMPD3FLAG_3ugul_BF;Pax7;H2BRFP;TCFLef...,H2BRFP,background,20210824,SMPD3FLAG,3ugul,BF;Pax7;H2BRFP;TCFLefH2Bd2EGFP;Snai2,Emb4,8ss,sec1,20210824_BF;Pax7;H2BRFP;TCFLefH2Bd2EGFP;Snai2_...
3,4,H2BRFP:Cntl,4622.306,361.988,1673220.729,32471438.0,20210824_SMPD3FLAG_3ugul_BF;Pax7;H2BRFP;TCFLef...,H2BRFP,Cntl,20210824,SMPD3FLAG,3ugul,BF;Pax7;H2BRFP;TCFLefH2Bd2EGFP;Snai2,Emb4,8ss,sec1,20210824_BF;Pax7;H2BRFP;TCFLefH2Bd2EGFP;Snai2_...
4,5,H2BRFP:Expt,3066.851,165.055,506198.069,9823557.0,20210824_SMPD3FLAG_3ugul_BF;Pax7;H2BRFP;TCFLef...,H2BRFP,Expt,20210824,SMPD3FLAG,3ugul,BF;Pax7;H2BRFP;TCFLefH2Bd2EGFP;Snai2,Emb4,8ss,sec1,20210824_BF;Pax7;H2BRFP;TCFLefH2Bd2EGFP;Snai2_...


In [13]:
# Add experiment date here to apply to dataframe
now = datetime.datetime.now()
analysis_date = now.strftime("%Y%m%d")

# Get a list of treatments and stains
treatment_list = full_df.Treatment.unique()
treatment_list = treatment_list.tolist()
stain_list = full_df.Stain.unique()
stain_list = stain_list.tolist()

data = full_df.loc[full_df['Somites'].isin(['7ss', '8ss', '9ss'])]

# Mean background values and group by Treatment, Embryo, Fluor, ROI and Section
mean_sections = ((full_df.groupby(['Stain', 'Treatment', 'Embryo', 'EmbID', 'ROI', 'Section', 'ExptDate'])
                  ['Area', 'Mean', 'IntDen']).mean())

# Loop through stains, performing the following analysis
for j in stain_list:
    stain = j
    df_stain = pd.DataFrame(mean_sections.xs(stain))
    
    # Loop trough treatments, performing each analysis and exporting CSV file for each treatment
    for i in treatment_list:
        # Slice dataframe to process only embryos with given treatment
        treatment = i
        df_treatment = pd.DataFrame(df_stain.xs(treatment))

        # Determine CTCF values = ROI IntDen - (background mean * ROI area)
        # Calculate background (background mean * ROI area)
        background_corr_cntl = (df_treatment.xs('background', level='ROI')['Mean'] 
                        * df_treatment.xs('Cntl', level='ROI')['Area'])
        background_corr_expt = (df_treatment.xs('background', level='ROI')['Mean'] 
                        * df_treatment.xs('Expt', level='ROI')['Area'])

        # Slice out only Cntl or Expt values in IntDen
        intdens_cntl = df_treatment.xs('Cntl', level='ROI')['IntDen'] 
        intdens_expt = df_treatment.xs('Expt', level='ROI')['IntDen'] 

        # Subtract background from IntDens to determine CTCF and concatenate into single dataframe
        sub_cntl = pd.DataFrame(intdens_cntl - background_corr_cntl)
        sub_expt = pd.DataFrame(intdens_expt - background_corr_expt)
        full_ctcf = pd.concat([sub_cntl, sub_expt], keys = ['Cntl', 'Expt'])
        full_ctcf.columns = ['CTCF']

        # Combine raw values, generate ratio
        ctcf_cntl = full_ctcf.xs('Cntl').reset_index()
        ctcf_cntl.rename(columns={'CTCF':'Cntl CTCF'}, inplace=True)
        ctcf_expt = full_ctcf.xs('Expt').reset_index()
        ctcf_expt.rename(columns={'CTCF':'Expt CTCF'}, inplace=True)
        results = pd.concat([ctcf_cntl,ctcf_expt], axis=1)
        results['Expt/Cntl CTCF'] = ctcf_expt['Expt CTCF'] / ctcf_cntl['Cntl CTCF']
        results = results.loc[:,~results.columns.duplicated()]
        results = results.groupby(['Embryo', 'ExptDate', 'EmbID']).mean().reset_index()

        # Normalize means

            # Normalize all migration area values to mean of control group
        norm_cntl = pd.DataFrame(results['Cntl CTCF']/(float(results['Cntl CTCF'].mean())))
        norm_cntl.rename(columns={'Cntl CTCF':'Norm Cntl CTCF'}, inplace=True)
        norm_expt = pd.DataFrame(results['Expt CTCF']/(float(results['Cntl CTCF'].mean())))
        norm_expt.rename(columns={'Expt CTCF':'Norm Expt CTCF'}, inplace=True)
        norm_expt.columns = ['Norm Expt CTCF']
        results = pd.concat([results, norm_cntl, norm_expt], axis=1, sort=False)
        results.to_csv(analysis_date + '_' + stain + '_' + treatment + '_Intensity.csv')
        
results

Unnamed: 0,Embryo,ExptDate,EmbID,Cntl CTCF,Expt CTCF,Expt/Cntl CTCF,Norm Cntl CTCF,Norm Expt CTCF
0,Emb1,20210824,20210824_BF;Pax7;H2BRFP;BREH2Bd2EGFP;Snai2_Emb...,903491.6,406733.8,0.45018,1.124905,0.50641
1,Emb1,20210830,20210830_BF;Pax7;H2BRFP;BREH2Bd2EGFP;Snai2_Emb...,1034452.0,1174323.0,1.229714,1.287959,1.462108
2,Emb1,20210830,20210830_BF;Pax7;H2BRFP;TCFLefH2Bd2EGFP;Snai2_...,641253.0,644318.5,1.004781,0.798401,0.802218
3,Emb2,20210830,20210830_BF;Pax7;H2BRFP;TCFLefH2Bd2EGFP;Snai2_...,612422.1,721116.4,1.177482,0.762505,0.897836
4,Emb3,20210824,20210824_BF;Pax7;H2BRFP;TCFLefH2Bd2EGFP;Snai2_...,710031.4,459592.8,0.653131,0.884035,0.572223
5,Emb3,20210830,20210830_BF;Pax7;H2BRFP;BREH2Bd2EGFP;Snai2_Emb...,796192.2,741036.7,0.949004,0.991311,0.922639
6,Emb3,20210830,20210830_BF;Pax7;H2BRFP;TCFLefH2Bd2EGFP;Snai2_...,503349.2,472031.6,0.950915,0.626702,0.58771
7,Emb4,20210824,20210824_BF;Pax7;H2BRFP;TCFLefH2Bd2EGFP;Snai2_...,1133085.0,774520.9,0.68133,1.410764,0.964329
8,Emb4,20210830,20210830_BF;Pax7;H2BRFP;TCFLefH2Bd2EGFP;Snai2_...,569579.9,429600.0,0.75424,0.709164,0.53488
9,Emb5,20210824,20210824_BF;Pax7;H2BRFP;BREH2Bd2EGFP;Snai2_Emb...,604786.5,610194.0,0.991401,0.752998,0.759731
