## Analysis of Transferrin-633 Intensity in Neural Crest Explants

In [10]:
# Import necessary packages
import os
import glob
import pandas as pd
import numpy as np
from scipy import stats
import scikit_posthocs

# Import plotting packages
import iqplot
import bokeh.io
from bokeh.io import output_file, show
from bokeh.layouts import column, row
bokeh.io.output_notebook()

### Initial import, concatenation, and annotating of data from Fiji

In [11]:
# Define path to directory with measurements
path = os.path.abspath('raw_source_data/')
df_summary = pd.DataFrame()
list_summary = []

# For loop to bring in files and concatenate them into a single dataframe
for file_ in glob.glob(path + "/*_Intensity.csv"):
    df = pd.read_csv(file_)
    # Determine Image name from file name, then parse experiment details from Image name
    df['Image'] = os.path.splitext(os.path.basename(file_))[0]
    (df['Date'], df['Treatment'], df['Explant'], 
             df['Stains'], df['FOV'], df['Measurement']) = zip(*df['Image'].map(lambda x:x.split('_')))
    (df['Channel'], df['ROI']) = zip(*df['Label'].map(lambda x:x.split(':')))
    df['ROI_ID'] = df['Image'] + '_' + df['ROI']

    # Drop unnecessary columns for tidiness
    df = df.drop(['Label', 'RawIntDen','IntDen', 'Stains', 'Measurement'], axis = 1)
    df = df.loc[df['ROI'] != 'background']
    df = df.drop_duplicates()
    list_summary.append(df)

df_summary = pd.concat(list_summary, sort=False)
df_summary = df_summary.drop(columns=' ').drop_duplicates()

# Preview dataframe to confirm import successful
df_summary.head(2)

Unnamed: 0,Area,Mean,Image,Date,Treatment,Explant,FOV,Channel,ROI,ROI_ID
1,329.138,4172.373,20221001_nSMase2FLAG_Explant1_RFP;TPMT;PMGFP;C...,20221001,nSMase2FLAG,Explant1,FOV2,H2BRFP,0004-0526-0740,20221001_nSMase2FLAG_Explant1_RFP;TPMT;PMGFP;C...
2,574.472,1461.648,20221001_nSMase2FLAG_Explant1_RFP;TPMT;PMGFP;C...,20221001,nSMase2FLAG,Explant1,FOV2,H2BRFP,0004-0610-0795,20221001_nSMase2FLAG_Explant1_RFP;TPMT;PMGFP;C...


### Examine the distribution of GFP expression in the dataset

In [12]:
plotting_metric = 'Mean'
channel = 'Ceramide'
categories = 'Treatment'

treatment_list = [
    'ControlMO',
    'SMPD3MO',
    'H2BRFP',
    'nSMase2FLAG',
]

df_subset = df_summary
df_subset = df_subset.loc[df_subset['Treatment'].isin(treatment_list)]
df_subset = df_subset.loc[df_subset['Channel'] == channel]

test_ecdf = iqplot.ecdf(
       data = df_subset, q=plotting_metric, cats=categories
        ,show_legend=False
        ,style='staircase'
        ,conf_int=True
        ,ptiles=(2.5, 97.5)
        ,order=treatment_list
        ,x_axis_type='log'
)
test_stripbox = iqplot.stripbox(
       data = df_subset, q=plotting_metric, q_axis='y'
        ,cats=categories, order=treatment_list
        ,spread='jitter' ,jitter_kwargs=dict(width=0.25) 
        ,box_kwargs=dict(line_color='black', line_width=1.5)
        ,whisker_kwargs=dict(line_color='black', line_width=1.5)
        ,median_kwargs=dict(line_color='maroon', line_width=4)
        ,top_level='box'
        ,y_axis_type='log'
)

show(row(test_ecdf, test_stripbox))
df_subset.to_csv('source_data.csv')