# Analyze Ceramide Staining Intensity

### Import Modules

In [1]:
# Import packages
import datetime
import os
import glob
import pandas as pd
import numpy as np

# Import plotting packages
import matplotlib as mpl
import seaborn as sns
import iqplot
import bokeh.io
bokeh.io.output_notebook()
# import dabest

print("matplotlib v{}".format(mpl.__version__))
print("seaborn v{}".format(sns.__version__))
# print("dabest v{}".format(dabest.__version__))

matplotlib v3.3.2
seaborn v0.11.0


### Assemble image data into a single dataframe

In [2]:
# Navigate to CSV path
path = os.path.abspath('')+'/raw_data_csvs/'
full_df = pd.DataFrame()
list_ = []

for file_ in glob.glob(path + "/*.csv"):         # For loop to bring in files and concatenate them into a single dataframe
    df = pd.read_csv(file_)
    df['Image'] = os.path.splitext(os.path.basename(file_))[0]                      # Determine Image name from file name
    df['Stain'], df['ROI'] = zip(*df['Label'].map(lambda x: x.split(':')))          # Split values in ROI label
    (df['ExptDate'], df['Treatment'], df['Stains'], df['Embryo'],                   # Split values in Image name column
        df['Somites'], df['Section']) = zip(*df['Image'].map(lambda x: x.split('_')))
    list_.append(df)

full_df = pd.concat(list_)
full_df.head()

Unnamed: 0,Unnamed: 1,Label,Area,Mean,Min,Max,IntDen,RawIntDen,Image,Stain,ROI,ExptDate,Treatment,Stains,Embryo,Somites,Section
0,1,Ceramide:background,13.758,23.903,10,45,328.858,6382.0,"20190322_SMPD3MO0,8mM_Pax7TCFLefRFPGFPCeramide...",Ceramide,background,20190322,"SMPD3MO0,8mM",Pax7TCFLefRFPGFPCeramide,Emb7,8ss,sec1
1,2,Ceramide:background,12.882,24.1,11,45,310.462,6025.0,"20190322_SMPD3MO0,8mM_Pax7TCFLefRFPGFPCeramide...",Ceramide,background,20190322,"SMPD3MO0,8mM",Pax7TCFLefRFPGFPCeramide,Emb7,8ss,sec1
2,3,Ceramide:background,12.315,24.427,12,47,300.826,5838.0,"20190322_SMPD3MO0,8mM_Pax7TCFLefRFPGFPCeramide...",Ceramide,background,20190322,"SMPD3MO0,8mM",Pax7TCFLefRFPGFPCeramide,Emb7,8ss,sec1
3,4,Ceramide:Cntl,2951.014,85.652,7,364,252761.184,4905222.0,"20190322_SMPD3MO0,8mM_Pax7TCFLefRFPGFPCeramide...",Ceramide,Cntl,20190322,"SMPD3MO0,8mM",Pax7TCFLefRFPGFPCeramide,Emb7,8ss,sec1
4,5,Ceramide:Expt,2184.211,79.073,11,292,172712.326,3351750.0,"20190322_SMPD3MO0,8mM_Pax7TCFLefRFPGFPCeramide...",Ceramide,Expt,20190322,"SMPD3MO0,8mM",Pax7TCFLefRFPGFPCeramide,Emb7,8ss,sec1


In [3]:
# Add experiment date here to apply to dataframe
now = datetime.datetime.now()
analysis_date = now.strftime("%Y%m%d")

# Get a list of treatments
treatment_list = full_df.Treatment.unique()
treatment_list = treatment_list.tolist()

# Mean background values and group by Treatment, Embryo, Fluor, ROI and Section
mean_sections = ((full_df.groupby(['Treatment', 'Embryo', 'Stain', 'ROI', 'Section', 'ExptDate'])
                  ['Area', 'Mean', 'IntDen']).mean())

# Loop trough treatments, performing each analysis and exporting CSV file for each treatment
for i in treatment_list:
    # Slice dataframe to process only embryos with given treatment
    treatment = i
    df_treatment = pd.DataFrame(mean_sections.xs(treatment))

    # Determine CTCF values = ROI IntDen - (background mean * ROI area)
    # Calculate background (background mean * ROI area)
    background_corr_cntl = (df_treatment.xs('background', level='ROI')['Mean'] 
                    * df_treatment.xs('Cntl', level='ROI')['Area'])
    background_corr_expt = (df_treatment.xs('background', level='ROI')['Mean'] 
                    * df_treatment.xs('Expt', level='ROI')['Area'])

    # Slice out only Cntl or Expt values in IntDen
    intdens_cntl = df_treatment.xs('Cntl', level='ROI')['IntDen'] 
    intdens_expt = df_treatment.xs('Expt', level='ROI')['IntDen'] 
    
    # Subtract background from IntDens to determine CTCF and concatenate into single dataframe
    sub_cntl = pd.DataFrame(intdens_cntl - background_corr_cntl)
    sub_expt = pd.DataFrame(intdens_expt - background_corr_expt)
    full_ctcf = pd.concat([sub_cntl, sub_expt], keys = ['Cntl', 'Expt'])
    full_ctcf.columns = ['CTCF']
    
    # Combine raw values, generate ratio
    ctcf_cntl = full_ctcf.xs('Cntl').reset_index()
    ctcf_cntl.rename(columns={'CTCF':'Cntl CTCF'}, inplace=True)
    ctcf_expt = full_ctcf.xs('Expt').reset_index()
    ctcf_expt.rename(columns={'CTCF':'Expt CTCF'}, inplace=True)
    results = pd.concat([ctcf_cntl,ctcf_expt], axis=1)
    results['Expt/Cntl CTCF'] = ctcf_expt['Expt CTCF'] / ctcf_cntl['Cntl CTCF']
    results = results.loc[:,~results.columns.duplicated()]
    results = results.groupby(['Embryo', 'Stain', 'ExptDate']).mean().reset_index()
    
    # Normalize means
    
        # Normalize all migration area values to mean of control group
    norm_cntl = pd.DataFrame(results['Cntl CTCF']/(float(results['Cntl CTCF'].mean())))
    norm_cntl.rename(columns={'Cntl CTCF':'Norm Cntl CTCF'}, inplace=True)
    norm_expt = pd.DataFrame(results['Expt CTCF']/(float(results['Cntl CTCF'].mean())))
    norm_expt.rename(columns={'Expt CTCF':'Norm Expt CTCF'}, inplace=True)
    results = pd.concat([results, norm_cntl, norm_expt], axis=1, sort=False)
    results.to_csv(analysis_date + '_' + treatment + '_Results.csv')

In [4]:
results.head()

Unnamed: 0,Embryo,Stain,ExptDate,Cntl CTCF,Expt CTCF,Expt/Cntl CTCF,Norm Cntl CTCF,Norm Expt CTCF
0,Emb1,Ceramide,20190322,233662.249525,215138.021947,0.89389,1.151444,1.06016
1,Emb10,Ceramide,20190322,112782.028563,139258.228538,1.225986,0.555769,0.686238
2,Emb2,Ceramide,20190322,226603.895711,167773.042112,0.764506,1.116661,0.826754
3,Emb3,Ceramide,20190322,199501.590887,152238.302963,0.810064,0.983106,0.750202
4,Emb4,Ceramide,20190322,194906.420719,177976.859888,1.013061,0.960462,0.877037
