# Analyze Fluorescence Intensity in Sections

Performing this analysis for TCF/Lef::H2B-d2EGFP signal in m3::Dyn1(K44A)-electroporated crest

## Import Modules

In [39]:
# Import packages
import datetime
import os
import glob
import pandas as pd
import numpy as np
from scipy import stats

# Import plotting packages
import iqplot
import bokeh.io
import matplotlib as mpl
import seaborn as sns

print("matplotlib v{}".format(mpl.__version__))
print("seaborn v{}".format(sns.__version__))
bokeh.io.output_notebook()


# Add experiment date here to apply to dataframe
now = datetime.datetime.now()
analysis_date = now.strftime("%Y%m%d")

matplotlib v3.3.2
seaborn v0.11.0


## Collect Data from Fiji Analysis

In [40]:
# Navigate to CSV path
path = os.path.abspath('')+'/raw_data_csvs/'
full_df = pd.DataFrame()
list_ = []

for file_ in glob.glob(path + "/*.csv"):         # For loop to bring in files and concatenate them into a single dataframe
    df = pd.read_csv(file_)
    df['Image'] = os.path.splitext(os.path.basename(file_))[0]                      # Determine Image name from file name
    (df['ExptDate'], df['Treatment'], df['Stains'], df['Embryo'],                   # Split values in Image name column
         df['Somites'], df['Section']) = zip(*df['Image'].map(lambda x: x.split('_')))
    df['EmbID'] = df['ExptDate'] + '_' + df['Embryo']
    df['SectionID'] = df['EmbID'] + '_' + df['Section']
    df['Target'] = df['Label'].map(lambda x: x.split(':')[0])                       # Split values in ROI label
    df['ROI'] = df['Label'].map(lambda x: x.split(':')[1])                          # Split values in ROI label
    list_.append(df)

full_df = pd.concat(list_)
full_df.head()

Unnamed: 0,Unnamed: 1,Label,Area,Mean,IntDen,RawIntDen,Image,ExptDate,Treatment,Stains,Embryo,Somites,Section,EmbID,SectionID,Target,ROI
0,1,TCF/Lef:background:c:4/5 - 20210909_m3Dyn1KA_B...,10.976,6.798,74.614,1448.0,20210909_m3Dyn1KA_BF;Pax7;RFP;TCFLef;DAPI_Emb6...,20210909,m3Dyn1KA,BF;Pax7;RFP;TCFLef;DAPI,Emb6,8ss,sec3,20210909_Emb6,20210909_Emb6_sec3,TCF/Lef,background
1,2,TCF/Lef:background:c:4/5 - 20210909_m3Dyn1KA_B...,13.449,7.031,94.556,1835.0,20210909_m3Dyn1KA_BF;Pax7;RFP;TCFLef;DAPI_Emb6...,20210909,m3Dyn1KA,BF;Pax7;RFP;TCFLef;DAPI,Emb6,8ss,sec3,20210909_Emb6,20210909_Emb6_sec3,TCF/Lef,background
2,3,TCF/Lef:background:c:4/5 - 20210909_m3Dyn1KA_B...,23.806,7.24,172.365,3345.0,20210909_m3Dyn1KA_BF;Pax7;RFP;TCFLef;DAPI_Emb6...,20210909,m3Dyn1KA,BF;Pax7;RFP;TCFLef;DAPI,Emb6,8ss,sec3,20210909_Emb6,20210909_Emb6_sec3,TCF/Lef,background
3,4,TCF/Lef:Expt:c:4/5 - 20210909_m3Dyn1KA_BF;Pax7...,5117.603,129.235,661371.881,12834945.0,20210909_m3Dyn1KA_BF;Pax7;RFP;TCFLef;DAPI_Emb6...,20210909,m3Dyn1KA,BF;Pax7;RFP;TCFLef;DAPI,Emb6,8ss,sec3,20210909_Emb6,20210909_Emb6_sec3,TCF/Lef,Expt
4,5,TCF/Lef:Cntl:c:4/5 - 20210909_m3Dyn1KA_BF;Pax7...,6071.714,290.363,1763002.333,34213789.0,20210909_m3Dyn1KA_BF;Pax7;RFP;TCFLef;DAPI_Emb6...,20210909,m3Dyn1KA,BF;Pax7;RFP;TCFLef;DAPI,Emb6,8ss,sec3,20210909_Emb6,20210909_Emb6_sec3,TCF/Lef,Cntl


## Analyze Data
Calculates CTCF (CTCF = ROI IntDen - (background mean * ROI area)), then normalizes CTCF to the mean of the control group for each treatment in the dataframe

In [41]:
# Define control and experimental constructs
cntl_construct = 'pBS'
expt_construct = 'm3Dyn1KA'

# Get a list of treatments
treatment_list = full_df.Treatment.unique()
treatment_list = treatment_list.tolist()

# Mean background values and group by Treatment, Embryo, Fluor, ROI and Section
mean_sections = ((full_df.groupby(['Treatment', 'Target', 'EmbID', 'Somites', 'SectionID', 'ROI'])
                  ['Area', 'Mean', 'IntDen']).mean())
mean_sections.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Area,Mean,IntDen
Treatment,Target,EmbID,Somites,SectionID,ROI,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
m3Dyn1KA,TCF/Lef,20200807_Emb1,8ss,20200807_Emb1_sec1,Cntl,2569.175,2925.196,7515341.0
m3Dyn1KA,TCF/Lef,20200807_Emb1,8ss,20200807_Emb1_sec1,Expt,2371.162,2536.338,6014067.0
m3Dyn1KA,TCF/Lef,20200807_Emb1,8ss,20200807_Emb1_sec1,background,7.525333,511.915,3890.643
m3Dyn1KA,TCF/Lef,20200807_Emb1,8ss,20200807_Emb1_sec2,Cntl,1778.156,3291.851,5853425.0
m3Dyn1KA,TCF/Lef,20200807_Emb1,8ss,20200807_Emb1_sec2,Expt,1581.521,2299.651,3636948.0


In [42]:
# Initialize for final dataframe collection
full_results = pd.DataFrame()
list_ = []

# Loop trough treatments, performing each analysis and exporting CSV file for each treatment
for i in treatment_list:
    # Slice dataframe to process only embryos with given treatment
    treatment = i
    df_treatment = pd.DataFrame(mean_sections.xs(treatment))
    
    # Calculate CTCF
    # Calculate background (background mean * ROI area)
    background_corr_cntl = (df_treatment.xs('background', level='ROI')['Mean'] 
                    * df_treatment.xs('Cntl', level='ROI')['Area'])
    background_corr_expt = (df_treatment.xs('background', level='ROI')['Mean'] 
                    * df_treatment.xs('Expt', level='ROI')['Area'])
    # Slice out only Cntl or Expt values in IntDen
    intdens_cntl = df_treatment.xs('Cntl', level='ROI')['IntDen'] 
    intdens_expt = df_treatment.xs('Expt', level='ROI')['IntDen'] 
    
    # Subtract background from IntDens to determine CTCF and concatenate into single dataframe
    sub_cntl = pd.DataFrame(intdens_cntl - background_corr_cntl).reset_index()
    sub_cntl.rename(columns={0:'Cntl CTCF'}, inplace=True)
    sub_expt = pd.DataFrame(intdens_expt - background_corr_expt).reset_index()
    sub_expt.rename(columns={0:'Expt CTCF'}, inplace=True)
    full_ctcf = sub_cntl.merge(sub_expt)
    full_ctcf['CTCF Ratio'] = full_ctcf['Expt CTCF'] / full_ctcf['Cntl CTCF']
    list_.append(full_ctcf)
    
# full_results.head()
full_results_by_section = pd.concat(list_, sort=False)
# Optional: Save out data
full_results_by_section.to_csv('m3Dyn1KA_TCFLef_CTCF_Results_by_section.csv')
full_results_by_section.head()

Unnamed: 0,Target,EmbID,Somites,SectionID,Cntl CTCF,Expt CTCF,CTCF Ratio
0,TCF/Lef,20200807_Emb1,8ss,20200807_Emb1_sec1,6200142.0,4800234.0,0.774214
1,TCF/Lef,20200807_Emb1,8ss,20200807_Emb1_sec2,4850855.0,2745246.0,0.56593
2,TCF/Lef,20200807_Emb1,8ss,20200807_Emb1_sec3,3318874.0,3056243.0,0.920868
3,TCF/Lef,20200817_Emb1,8ss,20200817_Emb1_sec1,7930794.0,4885535.0,0.616021
4,TCF/Lef,20200817_Emb1,8ss,20200817_Emb1_sec2,6335538.0,4176907.0,0.659282


In [44]:
# Mean sections for a given embryo
full_results_by_bro = ((full_results_by_section.groupby(['Target', 'EmbID', 'Somites'])
                  ['Cntl CTCF', 'Expt CTCF', 'CTCF Ratio']).mean()).reset_index()

# Normalize results to the mean of the control dataset
# Note: Experiment on 20210909 imaged on a different microscope and thus should be normalized internally
normalized_list = []
sept9_experiment = full_results_by_bro.loc[full_results_by_bro['EmbID'].isin(['20210909_Emb3','20210909_Emb4','20210909_Emb5','20210909_Emb6'])]
sept9_experiment['Norm Cntl CTCF'] = sept9_experiment['Cntl CTCF']/(sept9_experiment['Cntl CTCF'].mean())
sept9_experiment['Norm Expt CTCF'] = sept9_experiment['Expt CTCF']/(sept9_experiment['Cntl CTCF'].mean())
normalized_list.append(sept9_experiment)
other_experiments = full_results_by_bro.loc[full_results_by_bro['EmbID'].isin(['20200807_Emb1','20200817_Emb1','20200821_Emb1'])]
other_experiments['Norm Cntl CTCF'] = other_experiments['Cntl CTCF']/(other_experiments['Cntl CTCF'].mean())
other_experiments['Norm Expt CTCF'] = other_experiments['Expt CTCF']/(other_experiments['Cntl CTCF'].mean())
normalized_list.append(other_experiments)

# Compile into final DF
full_results = pd.concat(normalized_list)
full_results.to_csv('m3Dyn1KA_TCFLef_CTCF_Results_by_embryo.csv')
full_results.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # Remove the CWD from sys.path while we load stuff.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  del sys.path[0]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation:

Unnamed: 0,Target,EmbID,Somites,Cntl CTCF,Expt CTCF,CTCF Ratio,Norm Cntl CTCF,Norm Expt CTCF
3,TCF/Lef,20210909_Emb3,8ss,1324174.0,595787.8,0.486446,1.374951,0.618634
4,TCF/Lef,20210909_Emb4,8ss,612177.0,434880.9,0.724473,0.635652,0.451557
5,TCF/Lef,20210909_Emb5,8ss,812466.0,574122.8,0.703251,0.843621,0.596138
6,TCF/Lef,20210909_Emb6,8ss,1103464.0,501329.7,0.552731,1.145777,0.520554
0,TCF/Lef,20200807_Emb1,8ss,4789957.0,3533907.0,0.75367,0.881222,0.650143
