### Import necessary packages

In [1]:
import os
import glob
import pandas as pd
import numpy as np
from scipy import stats
import scikit_posthocs
import iqplot
import bokeh.io
bokeh.io.output_notebook()

### Import data from directory of measurement tables, collected from Fiji

In [6]:
# Define path to directory with measurements
path = os.path.abspath('3_Intensity/')
df_summary = pd.DataFrame()
list_summary = []

# For loop to bring in files and concatenate them into a single dataframe
for file_ in glob.glob(path + "/*Intensity.csv"):
    df = pd.read_csv(file_)
    # Determine Image name from file name, then parse experiment details from Image name
    df['Image'] = os.path.splitext(os.path.basename(file_))[0]
    (df['Date'], df['CellLine'], df['Treatment'], df['Rep'], df['Channels'], 
     df['FOV'], df['Measurement']) = zip(*df['Image'].map(lambda x:x.split('_')))

    (df['DeleteMe'], df['ROI']) = zip(*df['Label'].map(lambda x:x.split(':')))

    # Drop unnecessary columns for tidiness
    df = df.drop(['Label', 'Channels', 'Measurement', 'DeleteMe'], axis = 1)
    
    # Compile data
    list_summary.append(df)
df_summary = pd.concat(list_summary, sort=False)


# Preview dataframe to confirm import successful
df_summary.head()

Unnamed: 0,Unnamed: 1,Area,Mean,IntDen,RawIntDen,Image,Date,CellLine,Treatment,Rep,FOV,ROI
0,1,36.836,191.84,7066.613,290446.0,20200820_U2OS_SMPD3_Rep1_RFP;GPIGFP;Tf_FOV12_I...,20200820,U2OS,SMPD3,Rep1,FOV12,background
1,2,400.475,1010.79,404796.591,16637611.0,20200820_U2OS_SMPD3_Rep1_RFP;GPIGFP;Tf_FOV12_I...,20200820,U2OS,SMPD3,Rep1,FOV12,0003-0399-0824
0,1,24.817,499.493,12395.829,509483.0,20200820_U2OS_SGMS2_Rep1_RFP;GPIGFP;Tf_FOV4_In...,20200820,U2OS,SGMS2,Rep1,FOV4,background
1,2,621.686,986.842,613505.401,25215786.0,20200820_U2OS_SGMS2_Rep1_RFP;GPIGFP;Tf_FOV4_In...,20200820,U2OS,SGMS2,Rep1,FOV4,0003-0725-0662
0,1,12.043,145.871,1756.787,72206.0,20200820_U2OS_SMPD3N130A_Rep1_RFP;GPIGFP;Tf_FO...,20200820,U2OS,SMPD3N130A,Rep1,FOV6,background


### Calculate Corrected Total Cellular Fluorescence (CTCF)

CTCF calculation accounts for differences in image background value and cell size
- CTCF = ROI IntDen - (ROI Area * Background ROI Mean)

In [40]:
# Initialize for final dataframe collection
full_results = pd.DataFrame()
full_results_list = []

# Loop through each image to calculate CTCF:
for image in df_summary['Image']:
    df_image = df_summary.loc[df_summary['Image'] == image]
    background = float(df_image.loc[df_image['ROI'] == 'background']['Mean'])
    df_cells = df_image.loc[df_image['ROI'] != 'background'].copy()
    df_cells['CTCF'] = df_cells['IntDen'] - (df_cells['Area']*background)
    full_results_list.append(df_cells)

# Assemble and view the final results
full_results = pd.concat(full_results_list,sort=False).reset_index().drop('index', axis=1)
full_results.head()

Unnamed: 0,Unnamed: 1,Area,Mean,IntDen,RawIntDen,Image,Date,CellLine,Treatment,Rep,FOV,ROI,CTCF
0,2,400.475,1010.79,404796.591,16637611.0,20200820_U2OS_SMPD3_Rep1_RFP;GPIGFP;Tf_FOV12_I...,20200820,U2OS,SMPD3,Rep1,FOV12,0003-0399-0824,327969.467
1,2,400.475,1010.79,404796.591,16637611.0,20200820_U2OS_SMPD3_Rep1_RFP;GPIGFP;Tf_FOV12_I...,20200820,U2OS,SMPD3,Rep1,FOV12,0003-0399-0824,327969.467
2,2,621.686,986.842,613505.401,25215786.0,20200820_U2OS_SGMS2_Rep1_RFP;GPIGFP;Tf_FOV4_In...,20200820,U2OS,SGMS2,Rep1,FOV4,0003-0725-0662,302977.595802
3,2,621.686,986.842,613505.401,25215786.0,20200820_U2OS_SGMS2_Rep1_RFP;GPIGFP;Tf_FOV4_In...,20200820,U2OS,SGMS2,Rep1,FOV4,0003-0725-0662,302977.595802
4,2,540.642,962.766,520511.538,21393630.0,20200820_U2OS_SMPD3N130A_Rep1_RFP;GPIGFP;Tf_FO...,20200820,U2OS,SMPD3N130A,Rep1,FOV6,0003-0517-0263,441647.548818


# Parse dataframe by desired 'cell line' and 'treatment' combinations, then plot results

In [42]:
# Generate and display list of cell lines and treatments present in this dataset
cell_list = full_results['CellLine'].unique().tolist()
treatment_list = full_results['Treatment'].unique().tolist()

print('Cells lines: ' + str(cell_list))
print('Treatments: ' + str(treatment_list))

Cells lines: ['U2OS']
Treatments: ['SMPD3', 'SGMS2', 'SMPD3N130A', '2aRFP', 'Dyn1K44A']


In [64]:
# Prepare for parsing data:
# To populate this "comment on/off" code block, copy the results of th cell lists above
cells = [
    'U2OS', 
]
treatments = [
        '2aRFP',
        'Dyn1K44A',
        'SMPD3',
        'SMPD3N130A', 
        ]

# Copy dataset to not disrupt raw data
df_subset = full_results

# Pull out only cells and treaments of interest
df_subset = df_subset.loc[df_subset['CellLine'].isin(cells)]
df_subset = df_subset.loc[df_subset['Treatment'].isin(treatments)]

# Make stripbox plot using iqplot
data_stripbox = iqplot.stripbox(
    data=df_subset, q='CTCF', cats='Treatment', q_axis='x'
    
# Plot details
    ,jitter=True, jitter_kwargs=dict(width=0.4)
    ,marker_kwargs=dict(alpha=0.5, size=5
                    ,color='darkgray'
                      )
    ,box_kwargs=dict(line_color='black', line_width=1.5)
    ,whisker_kwargs=dict(line_color='black', line_width=1.5)
    ,median_kwargs=dict(line_color='black', line_width=2)
    ,top_level='box'
    ,frame_width=400, frame_height=200
    
# Other customization parameters
    ,order = treatments
#     ,palette = ['#1f77b4', '#d62728', '#2ca02c', '#ff7f0e']
#     ,y_axis_label='Treatment'
#     ,y_axis_label='Transferrin-633 Puncta Count'
#     ,x_range=(0,50)
)

# Other customization parameters
data_stripbox.axis.axis_label_text_font_size = '20px'
data_stripbox.axis.axis_label_text_font_style = 'normal'
data_stripbox.axis.major_label_text_font_size = '18px'
data_stripbox.xaxis.major_label_orientation = 7

# data_stripbox.output_backend = "svg"
bokeh.io.show(data_stripbox)

# Parse dataframe by desired 'cell line' and 'treatment' combinations, then plot results

In [7]:
# Generate and display list of cell lines and treatments present in this dataset
cell_list = df_summary['CellLine'].unique().tolist()
treatment_list = df_summary['Treatment'].unique().tolist()

print('Cells lines: ' + str(cell_list))
print('Treatments: ' + str(treatment_list))

Cells lines: ['U2OS']
Treatments: ['SMPD3', 'SGMS2', 'SMPD3N130A', '2aRFP', 'Dyn1K44A']


In [17]:
# Prepare for parsing data:
# To populate this "comment on/off" code block, copy the results of th cell lists above
cells = [
    'U2OS', 
]
treatments = [
        '2aRFP',
        'Dyn1K44A',
        'SMPD3',
        'SMPD3N130A', 
        ]

# Copy dataset to not disrupt raw data
df_subset = df_summary

# Pull out only cells and treaments of interest
df_subset = df_subset.loc[df_subset['CellLine'].isin(cells)]
df_subset = df_subset.loc[df_subset['Treatment'].isin(treatments)]
df_subset = df_subset.loc[df_subset['ROI'] == 'background']
# df_subset = df_subset['Count'].dropna()

# Make stripbox plot using iqplot
data_stripbox = iqplot.stripbox(
    data=df_subset, q='Area', cats='Treatment', q_axis='y'
    
# Plot details
    ,jitter=True, jitter_kwargs=dict(width=0.4)
    ,marker_kwargs=dict(alpha=0.8, size=6
                    ,color='gray'
                      )
    ,box_kwargs=dict(line_color='black', line_width=1.5)
    ,whisker_kwargs=dict(line_color='black', line_width=1.5)
    ,median_kwargs=dict(line_color='black', line_width=2)
    ,top_level='box'
    ,frame_width=250, frame_height=350


    # Other customization parameters

    ,order = treatments
#     ,palette = ['#1f77b4', '#d62728', '#2ca02c', '#ff7f0e']
    ,x_axis_label='Treatment'
#     ,y_axis_label='Transferrin-633 Puncta Count'
#     ,x_range=(0,50)
)

# Other customization parameters
data_stripbox.axis.axis_label_text_font_size = '20px'
data_stripbox.axis.axis_label_text_font_style = 'normal'
data_stripbox.axis.major_label_text_font_size = '18px'
data_stripbox.xaxis.major_label_orientation = 7

# data_stripbox.output_backend = "svg"
bokeh.io.show(data_stripbox)

In [39]:
# Prepare for parsing data:
# To populate this "comment on/off" code block, copy the results of th cell lists above
cells = [
    'U2OS', 
]
treatments = [
        '2aRFP',
        'Dyn1K44A',
        'SMPD3',
        'SMPD3N130A', 
        ]

# Copy dataset to not disrupt raw data
df_subset = df_summary

# Pull out only cells and treaments of interest
df_subset = df_subset.loc[df_subset['CellLine'].isin(cells)]
df_subset = df_subset.loc[df_subset['Treatment'].isin(treatments)]
# df_subset = df_subset['Count'].dropna()

# Make stripbox plot using iqplot
data_stripbox = iqplot.stripbox(
    data=df_subset, q='Count', cats='Treatment', q_axis='y'
    
# Plot details
    ,jitter=True, jitter_kwargs=dict(width=0.4)
    ,marker_kwargs=dict(alpha=0.8, size=6
                    ,color='gray'
                      )
    ,box_kwargs=dict(line_color='black', line_width=1.5)
    ,whisker_kwargs=dict(line_color='black', line_width=1.5)
    ,median_kwargs=dict(line_color='black', line_width=2)
    ,top_level='box'
    ,frame_width=250, frame_height=350


    # Other customization parameters

    ,order = treatments
    ,palette = ['#1f77b4', '#d62728', '#2ca02c', '#ff7f0e']
    ,x_axis_label='Treatment'
    ,y_axis_label='Transferrin-633 Puncta Count'
    ,x_range=(0,50)
)

# Other customization parameters
data_stripbox.axis.axis_label_text_font_size = '20px'
data_stripbox.axis.axis_label_text_font_style = 'normal'
data_stripbox.axis.major_label_text_font_size = '18px'
data_stripbox.xaxis.major_label_orientation = 7

# data_stripbox.output_backend = "svg"
bokeh.io.show(data_stripbox)

# Kruskal-Wallis Test with Dunn's Multiple Comparisons Correction
Useful for comparing multiple datasets

In [13]:
# Reminder of treatments to compare
treatment_list

['SMPD3', 'SMPD3N130A', 'Dyn1K44A', '2aRFP']

In [15]:
### Kruskal-Wallis test
# Define samples to compare
sample1 = '2aRFP'
sample2 = 'Dyn1K44A'
sample3 = 'SMPD3'
sample4 = 'SMPD3N130A'

# Run Kruskal-Wallis test
kw_result = stats.kruskal(
                df_subset.loc[df_subset['Treatment']==sample1]['Count']
               ,df_subset.loc[df_subset['Treatment']==sample2]['Count']
               ,df_subset.loc[df_subset['Treatment']==sample3]['Count']
               ,df_subset.loc[df_subset['Treatment']==sample4]['Count']
    )

# Dunn's Posthoc for Multiple Comparisons
mult_compar = scikit_posthocs.posthoc_dunn(df_subset, val_col='Count', group_col='Treatment'
                                           , sort=False, p_adjust='bonferroni').round(10)

# Display test results
print('Kruskal-Wallis test results: \n\t\t\t\t statistic=' + str(kw_result[0]) + 
    '\n\t\t\t\t p-value=' + str(kw_result[1]))
print("\nDunn's posthoc multiple comparison result: \n" + str(mult_compar) +'\n')
# mult_compar.to_csv("Results of Dunn's Posthoc.csv")

# Get number of cells within this test
for treatment in df_subset['Treatment'].unique().tolist():
    temp_df = df_subset.loc[df_subset['Treatment'] == treatment]
    print('n = ' + str(len(temp_df)) + ' cells in the ' + str(treatment) + ' dataset.')

Kruskal-Wallis test results: 
				 statistic=42.81823141572586
				 p-value=2.689693437699311e-09

Dunn's posthoc multiple comparison result: 
                   SMPD3  SMPD3N130A      Dyn1K44A     2aRFP
SMPD3       1.000000e+00    0.034371  1.000000e-09  0.047485
SMPD3N130A  3.437108e-02    1.000000  6.998710e-04  1.000000
Dyn1K44A    1.000000e-09    0.000700  1.000000e+00  0.000048
2aRFP       4.748527e-02    1.000000  4.791080e-05  1.000000

n = 29 cells in the SMPD3 dataset.
n = 36 cells in the SMPD3N130A dataset.
n = 35 cells in the Dyn1K44A dataset.
n = 50 cells in the 2aRFP dataset.
