In [2]:
# Import packages
import datetime
import os
import glob
import pandas as pd
import numpy as np

# Import plotting packages
import matplotlib as mpl
import seaborn as sns

print("matplotlib v{}".format(mpl.__version__))
print("seaborn v{}".format(sns.__version__))

# Add experiment date here to apply to dataframe
now = datetime.datetime.now()
analysis_date = now.strftime("%Y%m%d")

matplotlib v3.3.2
seaborn v0.11.0


In [3]:
# Navigate to CSV path
path = os.path.abspath('')+'/raw_data_csvs/'
full_df = pd.DataFrame()
list_ = []

for file_ in glob.glob(path + "/*.csv"):         # For loop to bring in files and concatenate them into a single dataframe
    df = pd.read_csv(file_)
    df['Image'] = os.path.splitext(os.path.basename(file_))[0]                      # Determine Image name from file name
    df['Stain'], df['ROI'] = zip(*df['Label'].map(lambda x: x.split(':')))          # Split values in ROI label
    (df['ExptDate'], df['Treatment'], df['Stains'], df['Embryo'],                   # Split values in Image name column
        df['Somites'], df['Mag']) = zip(*df['Image'].map(lambda x: x.split('_')))
    list_.append(df)

full_df = pd.concat(list_)
full_df.head()

Unnamed: 0,Unnamed: 1,Label,Area,Mean,Min,Max,IntDen,RawIntDen,Image,Stain,ROI,ExptDate,Treatment,Stains,Embryo,Somites,Mag
0,1,TFAP2b:Expt,16524.891,8720.56,0,58188,144106300.0,209049276.0,20200619_SMPD3gRNA1_TFAP2b;SMPD3;Cas9eGFP_Emb1...,TFAP2b,Expt,20200619,SMPD3gRNA1,TFAP2b;SMPD3;Cas9eGFP,Emb1,6ss,10x
1,2,TFAP2b:Cntl,18390.248,9905.307,0,59028,182161100.0,264253792.0,20200619_SMPD3gRNA1_TFAP2b;SMPD3;Cas9eGFP_Emb1...,TFAP2b,Cntl,20200619,SMPD3gRNA1,TFAP2b;SMPD3;Cas9eGFP,Emb1,6ss,10x
2,3,SMPD3:Expt,16524.891,4850.079,0,49930,80147020.0,116266087.0,20200619_SMPD3gRNA1_TFAP2b;SMPD3;Cas9eGFP_Emb1...,SMPD3,Expt,20200619,SMPD3gRNA1,TFAP2b;SMPD3;Cas9eGFP,Emb1,6ss,10x
3,4,SMPD3:Cntl,18390.248,6475.689,0,60501,119089500.0,172758434.0,20200619_SMPD3gRNA1_TFAP2b;SMPD3;Cas9eGFP_Emb1...,SMPD3,Cntl,20200619,SMPD3gRNA1,TFAP2b;SMPD3;Cas9eGFP,Emb1,6ss,10x
0,1,TFAP2b:Expt,24042.847,7615.361,0,34718,183095000.0,265608549.0,20200609_SMPD3gRNA1_SMPD3;TFAP2b;Cas9eGFP_Emb3...,TFAP2b,Expt,20200609,SMPD3gRNA1,SMPD3;TFAP2b;Cas9eGFP,Emb3,7ss,10x


In [20]:
probe = 'TFAP2b'
probe_df = full_df.loc[full_df['Stain'] == probe]

# Get a list of treatments
treatment_list = probe_df.Treatment.unique().tolist()
date_list = probe_df.ExptDate.unique().tolist()

# Mean counts across sections
trimmed_df = pd.DataFrame((probe_df.groupby(['Treatment', 'ExptDate', 'ExptDate', 'Embryo', 'ROI', 'Stain'])['IntDen']).mean())

# Initialize list to append results
results_list = []

# Loop trough treatments, performing each analysis and exporting CSV file for each treatment
for i in treatment_list:
    # Slice dataframe to process only embryos with given treatment
    treatment = i
    df_treatment = pd.DataFrame(trimmed_df.xs(treatment))
    
    for j in date_list:
        date = j
        df_date = pd.DataFrame(df_treatment.xs(date))

        # Extract means for Cntl and Expt counts
        intden_cntl = df_date.xs('Cntl', level='ROI')['IntDen'] 
        intden_expt = df_date.xs('Expt', level='ROI')['IntDen']

        # Generate ratios as Expt/Cntl
        intden_ratios = pd.DataFrame(intden_expt / intden_cntl)
        intden_ratios.columns = ['Expt/Cntl']

        # Normalize individual values to mean of control group
        norm_cntl = intden_cntl/(float(intden_cntl.mean()))
        norm_expt = intden_expt/(float(intden_cntl.mean()))

        # Combine processed values into single dataframe and output as csv file
        intden_cntl = pd.DataFrame(intden_cntl)
        intden_cntl.columns = ['Cntl IntDen']
        intden_expt = pd.DataFrame(intden_expt)
        intden_expt.columns = ['Expt IntDen']
        intden_ratios = pd.DataFrame(intden_ratios)
        intden_ratios.columns = ['Expt/Cntl IntDen']
        norm_cntl = pd.DataFrame(norm_cntl)
        norm_cntl.columns = ['Norm Cntl IntDen']
        norm_expt = pd.DataFrame(norm_expt)
        norm_expt.columns = ['Norm Expt IntDen']
        results = (pd.concat([intden_cntl, intden_expt, intden_ratios, norm_cntl, norm_expt], axis=1, sort=True)).reset_index()
        results['Embryo ID'] = results.ExptDate.str.cat(results.Embryo)
        results_list.append(results)

final_results = pd.concat(results_list)
    
# Save out results at CSV file, update file name
final_results.to_csv(analysis_date + '_' + treatment + '_' + probe +'_HCR_IntDen.csv')
final_results

Unnamed: 0,ExptDate,Embryo,Stain,Cntl IntDen,Expt IntDen,Expt/Cntl IntDen,Norm Cntl IntDen,Norm Expt IntDen,Embryo ID
0,20200619,Emb1,TFAP2b,182161100.0,144106300.0,0.791093,0.480861,0.380406,20200619Emb1
1,20200619,Emb2,TFAP2b,383169000.0,382284300.0,0.997691,1.011473,1.009138,20200619Emb2
2,20200619,Emb3,TFAP2b,622233900.0,521828400.0,0.838637,1.642547,1.3775,20200619Emb3
3,20200619,Emb4,TFAP2b,203482400.0,184920100.0,0.908777,0.537144,0.488144,20200619Emb4
4,20200619,Emb5,TFAP2b,503067000.0,388654000.0,0.772569,1.327975,1.025952,20200619Emb5
0,20200609,Emb2,TFAP2b,146088600.0,163841400.0,1.121521,0.925798,1.038302,20200609Emb2
1,20200609,Emb3,TFAP2b,169506400.0,183095000.0,1.080165,1.074202,1.160316,20200609Emb3


In [22]:
probe = 'SMPD3'
probe_df = full_df.loc[full_df['Stain'] == probe]

# Get a list of treatments
treatment_list = probe_df.Treatment.unique().tolist()
date_list = probe_df.ExptDate.unique().tolist()

# Mean counts across sections
trimmed_df = pd.DataFrame((probe_df.groupby(['Treatment', 'ExptDate', 'ExptDate', 'Embryo', 'ROI', 'Stain'])['IntDen']).mean())

# Initialize list to append results
results_list = []

# Loop trough treatments, performing each analysis and exporting CSV file for each treatment
for i in treatment_list:
    # Slice dataframe to process only embryos with given treatment
    treatment = i
    df_treatment = pd.DataFrame(trimmed_df.xs(treatment))
    
    for j in date_list:
        date = j
        df_date = pd.DataFrame(df_treatment.xs(date))

        # Extract means for Cntl and Expt counts
        intden_cntl = df_date.xs('Cntl', level='ROI')['IntDen'] 
        intden_expt = df_date.xs('Expt', level='ROI')['IntDen']

        # Generate ratios as Expt/Cntl
        intden_ratios = pd.DataFrame(intden_expt / intden_cntl)
        intden_ratios.columns = ['Expt/Cntl']

        # Normalize individual values to mean of control group
        norm_cntl = intden_cntl/(float(intden_cntl.mean()))
        norm_expt = intden_expt/(float(intden_cntl.mean()))

        # Combine processed values into single dataframe and output as csv file
        intden_cntl = pd.DataFrame(intden_cntl)
        intden_cntl.columns = ['Cntl IntDen']
        intden_expt = pd.DataFrame(intden_expt)
        intden_expt.columns = ['Expt IntDen']
        intden_ratios = pd.DataFrame(intden_ratios)
        intden_ratios.columns = ['Expt/Cntl IntDen']
        norm_cntl = pd.DataFrame(norm_cntl)
        norm_cntl.columns = ['Norm Cntl IntDen']
        norm_expt = pd.DataFrame(norm_expt)
        norm_expt.columns = ['Norm Expt IntDen']
        results = (pd.concat([intden_cntl, intden_expt, intden_ratios, norm_cntl, norm_expt], axis=1, sort=True)).reset_index()
        results['Embryo ID'] = results.ExptDate.str.cat(results.Embryo)
        results_list.append(results)

final_results = pd.concat(results_list)
    
# Save out results at CSV file, update file name
final_results.to_csv(analysis_date + '_' + treatment + '_' + probe +'_HCR_IntDen.csv')
final_results

Unnamed: 0,ExptDate,Embryo,Stain,Cntl IntDen,Expt IntDen,Expt/Cntl IntDen,Norm Cntl IntDen,Norm Expt IntDen,Embryo ID
0,20200619,Emb1,SMPD3,119089500.0,80147020.0,0.672998,0.308108,0.207356,20200619Emb1
1,20200619,Emb2,SMPD3,396321800.0,312083600.0,0.78745,1.025363,0.807422,20200619Emb2
2,20200619,Emb3,SMPD3,668289900.0,391103900.0,0.585231,1.728998,1.011863,20200619Emb3
3,20200619,Emb4,SMPD3,241346000.0,146100500.0,0.605357,0.62441,0.377991,20200619Emb4
4,20200619,Emb5,SMPD3,507545800.0,324916200.0,0.640171,1.313121,0.840622,20200619Emb5
0,20200609,Emb2,SMPD3,119779900.0,104337700.0,0.871079,0.984358,0.857453,20200609Emb2
1,20200609,Emb3,SMPD3,123586700.0,81225220.0,0.657232,1.015642,0.667513,20200609Emb3
