In [11]:
# Import necessary packages
import os
import glob
import pandas as pd
import numpy as np
from scipy import stats
import scikit_posthocs

# Import plotting packages
import iqplot
import bokeh.io
from bokeh.io import output_file, show
from bokeh.layouts import column, row
bokeh.io.output_notebook()

# Import data from directory of measurement tables, collected from Fiji

In [12]:
# Define path to directory with measurements
path = os.path.abspath('raw_data_csvs/')
df_summary = pd.DataFrame()
list_summary = []

# For loop to bring in files and concatenate them into a single dataframe
for file_ in glob.glob(path + "/*_Puncta.csv"):
    df = pd.read_csv(file_)
    # Determine Image name from file name, then parse experiment details from Image name
    df['Image'] = os.path.splitext(os.path.basename(file_))[0]
    (df['Date'], df['Embryo'], df['Treatment'], df['Stains'], 
             df['FOV'], df['del1']) = zip(*df['Image'].map(lambda x:x.split('_')))

    # Drop unnecessary columns for tidiness
    df = df.drop(['Slice', 'Total Area', 'Average Size', '%Area', 'Mean', 'IntDen', 'del1'], axis = 1)
    
    # Compile data
    list_summary.append(df)
df_summary = pd.concat(list_summary, sort=False)

# Preview dataframe to confirm import successful
df_summary.head()

# Assign import to full_results df
full_results = df_summary.copy()
full_results.replace(to_replace=('DMSO;1percent','GW4869;25uM'), value=('DMSO','GW4869'), inplace=True)
full_results.head()

Unnamed: 0,Count,Image,Date,Embryo,Treatment,Stains,FOV
0,2,20210804_Emb4_GW4869;25uM_Phalloidin;TPMT;DAPI...,20210804,Emb4,GW4869,Phalloidin;TPMT;DAPI;Tf633,FOV2
1,1,20210804_Emb4_GW4869;25uM_Phalloidin;TPMT;DAPI...,20210804,Emb4,GW4869,Phalloidin;TPMT;DAPI;Tf633,FOV2
2,0,20210804_Emb4_GW4869;25uM_Phalloidin;TPMT;DAPI...,20210804,Emb4,GW4869,Phalloidin;TPMT;DAPI;Tf633,FOV2
3,1,20210804_Emb4_GW4869;25uM_Phalloidin;TPMT;DAPI...,20210804,Emb4,GW4869,Phalloidin;TPMT;DAPI;Tf633,FOV2
4,0,20210804_Emb4_GW4869;25uM_Phalloidin;TPMT;DAPI...,20210804,Emb4,GW4869,Phalloidin;TPMT;DAPI;Tf633,FOV2


# Plot data as an ECDF plot

In [13]:
# Make ECDF plot using iqplot
data_ecdf = iqplot.ecdf(
    data=full_results, q='Count', cats='Treatment', q_axis='x'
    ,style='staircase'
    ,order=treatment_list
#     ,x_range = (-1.5,35)
        ,line_kwargs=dict(line_width=3)
    ,conf_int=True, n_bs_reps=1000, ptiles=[2.5, 97.5]   # ptiles values equate to 95% CIs

    # Other customization parameters
  ,frame_height = 350, frame_width = 450
    ,x_axis_label='Transferrin-633 Puncta Count', y_axis_label='Cumulative Distribution Frequency'
    ,show_legend=True
)

# Other customization parameters
data_ecdf.axis.axis_label_text_font_size = '20px'
data_ecdf.axis.axis_label_text_font_style = 'normal'
data_ecdf.axis.major_label_text_font_size = '18px'


# data_ecdf.output_backend = "svg"
show(row(data_ecdf))

# Kolmogorov-Smirnov Test
Useful for comparing two datasets, not multiple comparisons

In [10]:
### Kolmogorov-Smirnov test - NO MULTIPLE COMPARISONS
# Define samples to compare
sample1 = 'DMSO'
sample2 = 'GW4869'
metric = 'Count'

# Run 2-sample Kolmogorov-Smirnov Test
ks_result = stats.ks_2samp(df_subset.loc[df_subset['Treatment']==sample1][metric]
                           ,df_subset.loc[df_subset['Treatment']==sample2][metric]
                          )
                              
# Display results of Kolmogorov-Smirnov test
print('Two-sample Kolmogorov-Smirnov test results for ' + sample1 + ' vs ' + sample2 + ': \n\t\t\t\t statistic=' + str(ks_result[0]) + 
    '\n\t\t\t\t p-value=' + str(ks_result[1]))

# Get number of cells within this test
for treatment in df_subset['Treatment'].unique().tolist():
    temp_df = df_subset.loc[df_subset['Treatment'] == treatment]
    print('n = ' + str(len(temp_df)) + ' cells in the ' + str(treatment) + ' dataset.')

Two-sample Kolmogorov-Smirnov test results for DMSO vs GW4869: 
				 statistic=0.8829774326619753
				 p-value=8.444019006889688e-158
n = 364 cells in the GW4869 dataset.
n = 317 cells in the DMSO dataset.
