In [1]:
import numpy as np
import pandas as pd
import json 

import bokeh
from bokeh.models.widgets import Panel, Tabs
from bokeh.io import output_file, show
from bokeh.models import ColumnDataSource
from bokeh.plotting import figure
from bokeh.sampledata.commits import data
from bokeh.transform import jitter

from scipy import stats

bokeh.io.output_notebook()

The purpose of this code is to plot the generated data from the data frames generated from `image_analysis.ipynb` after validating these paramets with `single_image_analysis.ipynb`. 

We can start by importing the dataframe of interest and printing it.

In [2]:
# Load in the dataframe
df = pd.read_csv('../Lung_Paper_Images/Quantification/Lung_Quantification.csv', comment='#')
df.head()

Unnamed: 0,Date,Tissue,Size Threshold,Gaussian Size,Truncation,Intensity Threshold,Minimum Size,Virus,Animal,Replicate,Count,Cells Quantified,Brightness List,Applied Threshold,Image Multiplication Factor,Minimum Pixel Value,Maximum Pixel Value,Area Threshold,Total Area
0,6/30/2020,Lung,0.62,5,2,1000,10,AAV5,1,1,84,[ 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 ...,"{'1': 0.09374644248769036, '2': 0.396884378149...",0.301449,0.706774,-0.184533,0.5276,0.020282,31273793.26
1,6/30/2020,Lung,0.62,5,2,1000,10,AAV5,1,2,105,[ 1 2 3 4 5 6 7 8 9 10 11 ...,"{'1': 0.34954984628418456, '2': 0.301035344397...",0.32245,0.69957,-0.204931,0.543257,0.019231,32916921.38
2,6/30/2020,Lung,0.62,5,2,1000,10,AAV5,2,1,101,[ 1 2 3 4 5 6 7 8 9 10 11 ...,"{'1': 0.040592697357633566, '2': 0.07469444680...",0.243228,0.591669,-0.151784,0.571073,0.020107,48020362.14
3,6/30/2020,Lung,0.62,5,2,1000,10,AAV5,2,2,108,[ 1 2 3 4 5 6 7 8 9 10 11 ...,"{'1': 0.08277810712698083, '2': 0.062237664806...",0.396656,0.584836,-0.267212,0.474624,0.019407,42443016.38
4,6/30/2020,Lung,0.62,5,2,1000,10,AAV5,3,1,28,[ 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 ...,"{'1': 0.07123424141933186, '2': 0.198157759863...",0.411377,0.589563,-0.211362,0.348148,0.019425,26451775.48


Here we can perform some transformations on the data. From the counts and the area data, we want to determine the counts per millimeter squared of tissue. Since the current tissue area is in square micrometers, we will convert it to millimeters squared simultaneously.

Next, we determine the median of the brightness values of all the cells.

We can then resort the data (to make sure that the plots are generated identically between runs if the variants weren't run in the same order). Additionally, since we abbreviated AAV.CAP-A4 with CAPA4, we can replace the instances of this string to generate more formal plots. 

In [3]:
# Using Determine the transduction as a function of tissue area
df['Cells Transduced per mm2'] = df['Count'] / df['Total Area'] * 1000000

# Get the median brightness from the brightness dictionary
for inds in np.arange(0, len(df)):     
    # Load the brightness dictionary, replacing the single quotation marks in the keys with double quotation marks for json compliance. 
    brightness_dict = json.loads(df.loc[inds, 'Brightness List'].replace("\'", "\""))
    
    # Determine the median of each brightness dictionary and store it in the dataframe
    df.loc[inds, 'Brightness Median'] = np.median(list(brightness_dict.values()))

df['Brightness Median'] = df['Brightness Median'].fillna(0)
    
#Sort the rows 
df = df.sort_values(by=['Virus', 'Animal', 'Replicate'])

# Rename the specific rows for plotting purposes. 
df['Virus'] = df['Virus'].replace('CAPA4','AAV.CAP-A4')

With our updated dataframe, we can determine the average values of the replicates for both _Cells Transduced per mm2_ and _Brightness Median_. We generate new dataframes from these averages to plot from.

In [4]:
#Group the data by the specific virus and animals
grouped = df.groupby(['Virus','Animal'])

# Make two new dataframes that track cell transduction and median cell brightness respectively
df_counts = grouped['Cells Transduced per mm2'].mean().reset_index()
df_brightness = grouped['Brightness Median'].mean().reset_index()

Finally, we can generate our plots using the Bokeh package. 

First, we plot the _Cells Transduced per mm2_:

In [5]:
#Initialize an output file to save an html of the generated plot
#output_file("../Lung_Paper_Images/Quantification/Lung/lung_transduction_per_sq_mm.html")

# Store the data for plotting
source = ColumnDataSource(df_counts)

# Create the catagories to plot from
catagories = list(df['Virus'].unique())

# Intitialize a figure
p = figure(plot_width=300, plot_height=300, x_range=catagories, y_axis_label = 'GFP+ cells per mm\u00b2 tissue area')

# Plot the data as a scatter plot
p.circle(x=jitter('Virus', width=0.2, range=p.x_range), y='Cells Transduced per mm2', source=source, alpha=1)

#Code to add mean and standard deviation lines
bar_list = [0.5, 1.5, 2.5]
mean_width = 0.25
whisker_width = 0.05

sample_size = 6

i = 0
for virus in ['AAV5', 'AAV9', 'AAV.CAP-A4']:
    inds = df_counts['Virus'] == virus
    
    mean = np.mean(df_counts.loc[inds,'Cells Transduced per mm2'])
    std_dev = np.std(df_counts.loc[inds,'Cells Transduced per mm2'])
    std_eot_mean = np.std(df_counts.loc[inds,'Cells Transduced per mm2']) / np.sqrt(sample_size)
    
    p.hbar(mean, height = 0, right=(bar_list[i]-mean_width), left=(bar_list[i]+mean_width), color = 'black')
    p.hbar(std_eot_mean + mean, height = 0, right=(bar_list[i]-whisker_width), left=(bar_list[i]+whisker_width), color = 'black')
    p.hbar(mean - std_eot_mean, height = 0, right=(bar_list[i]-whisker_width), left=(bar_list[i]+whisker_width), color = 'black')
    p.segment(bar_list[i], mean, bar_list[i], std_eot_mean + mean, color = 'black')
    p.segment(bar_list[i], mean, bar_list[i], mean - std_eot_mean, color = 'black')
    i += 1
    
# Simply modify plot characteristics 
p.plot_height = 350
p.plot_width = 350

p.xgrid.grid_line_color = None
p.ygrid.grid_line_color = None

#p.xaxis.minor_tick_line_color = None
p.yaxis.minor_tick_line_color = None

p.xaxis.major_label_text_font_size = '12pt'
p.yaxis.major_label_text_font_size = '12pt'

p.xaxis.axis_label_text_font_size = '18pt'
p.yaxis.axis_label_text_font_size = '18pt'

p.background_fill_color = None 
p.border_fill_color = None

p.output_backend = "svg"

show(p)

In [6]:
grouped = df_counts.groupby(['Virus'])

df_counts_grouped = grouped['Cells Transduced per mm2'].mean().reset_index()

df_counts_grouped = df_counts_grouped.dropna()

df_counts_grouped

Unnamed: 0,Virus,Cells Transduced per mm2
0,AAV.CAP-A4,112.379617
1,AAV5,1.862014
2,AAV9,6.216616


In [7]:
inds = df_counts_grouped['Virus'] == 'AAV9'
df_counts_grouped['Relative Transduction to AAV9'] = df_counts_grouped['Cells Transduced per mm2'] / df_counts_grouped.loc[inds, 'Cells Transduced per mm2'].values

inds = df_counts_grouped['Virus'] == 'AAV5'
df_counts_grouped['Relative Transduction to AAV5'] = df_counts_grouped['Cells Transduced per mm2'] / df_counts_grouped.loc[inds, 'Cells Transduced per mm2'].values

In [8]:
df_counts_grouped

Unnamed: 0,Virus,Cells Transduced per mm2,Relative Transduction to AAV9,Relative Transduction to AAV5
0,AAV.CAP-A4,112.379617,18.077299,60.353806
1,AAV5,1.862014,0.299522,1.0
2,AAV9,6.216616,1.0,3.338652


In [9]:
pop_AAV5 = df_counts[df_counts['Virus'] == 'AAV5']
pop_AAV9 = df_counts[df_counts['Virus'] == 'AAV9']
pop_CAPA4 = df_counts[df_counts['Virus'] == 'AAV.CAP-A4']

In [10]:
stat_5_9 = stats.ttest_ind(pop_AAV5['Cells Transduced per mm2'], pop_AAV9['Cells Transduced per mm2'], equal_var = False)
stat_A4_5 = stats.ttest_ind(pop_CAPA4['Cells Transduced per mm2'], pop_AAV5['Cells Transduced per mm2'], equal_var = False)
stat_A4_9 = stats.ttest_ind(pop_CAPA4['Cells Transduced per mm2'], pop_AAV9['Cells Transduced per mm2'], equal_var = False)

print("P values between groups are:\n AAV5 and AAV9: %f \n AAV5 and CAPA4: %f \n AAV9 and CAPA4: %f" % (stat_5_9[1], stat_A4_5[1], stat_A4_9[1]))

P values between groups are:
 AAV5 and AAV9: 0.000128 
 AAV5 and CAPA4: 0.001676 
 AAV9 and CAPA4: 0.002001


Second, we plot the _Median Brightness_:

In [11]:
#Initialize an output file to save an html of the generated plot
#output_file("../Lung_Paper_Images/Quantification/Lung/lung_median_brightness.html")

# Store the data for plotting
source = ColumnDataSource(df_brightness)

# Create the catagories to plot from
catagories = list(df['Virus'].unique())

# Intitialize a figure
p = figure(plot_width=300, plot_height=300, x_range=catagories, y_axis_label = 'Median Cell Brightness')

# Plot the data as a scatter plot
p.circle(x=jitter('Virus', width=0.2, range=p.x_range), y='Brightness Median', source=source, alpha=1)

#Code to add mean and standard error of the mean lines
bar_list = [0.5, 1.5, 2.5]
mean_width = 0.25
whisker_width = 0.05

sample_size = 6

i = 0
for virus in ['AAV5', 'AAV9', 'AAV.CAP-A4']:
    inds = df_counts['Virus'] == virus
    
    mean = np.mean(df_brightness.loc[inds,'Brightness Median'])
    std_dev = np.std(df_brightness.loc[inds,'Brightness Median'])
    std_eot_mean = np.std(df_brightness.loc[inds,'Brightness Median']) / np.sqrt(sample_size)
    
    p.hbar(mean, height = 0, right=(bar_list[i]-mean_width), left=(bar_list[i]+mean_width), color = 'black')
    p.hbar(std_eot_mean + mean, height = 0, right=(bar_list[i]-whisker_width), left=(bar_list[i]+whisker_width), color = 'black')
    p.hbar(mean - std_eot_mean, height = 0, right=(bar_list[i]-whisker_width), left=(bar_list[i]+whisker_width), color = 'black')
    p.segment(bar_list[i], mean, bar_list[i], std_eot_mean + mean, color = 'black')
    p.segment(bar_list[i], mean, bar_list[i], mean - std_eot_mean, color = 'black')
    i += 1
    
p.plot_height = 350
p.plot_width = 350

p.xgrid.grid_line_color = None
p.ygrid.grid_line_color = None

#p.xaxis.minor_tick_line_color = None
p.yaxis.minor_tick_line_color = None

p.xaxis.major_label_text_font_size = '12pt'
p.yaxis.major_label_text_font_size = '12pt'

p.xaxis.axis_label_text_font_size = '18pt'
p.yaxis.axis_label_text_font_size = '18pt'

p.background_fill_color = None 
p.border_fill_color = None

p.output_backend = "svg"

show(p)

In [12]:
grouped = df_brightness.groupby(['Virus'])

df_brightness_grouped = grouped['Brightness Median'].mean().reset_index()

df_brightness_grouped = df_brightness_grouped.dropna()

df_brightness_grouped

Unnamed: 0,Virus,Brightness Median
0,AAV.CAP-A4,0.136621
1,AAV5,0.102483
2,AAV9,0.142874


In [13]:
inds = df_brightness_grouped['Virus'] == 'AAV9'
df_brightness_grouped['Relative Brightness to AAV9'] = df_brightness_grouped['Brightness Median'] / df_brightness_grouped.loc[inds, 'Brightness Median'].values

inds = df_brightness_grouped['Virus'] == 'AAV5'
df_brightness_grouped['Relative Brightness to AAV5'] = df_brightness_grouped['Brightness Median'] / df_brightness_grouped.loc[inds, 'Brightness Median'].values

In [14]:
df_brightness_grouped

Unnamed: 0,Virus,Brightness Median,Relative Brightness to AAV9,Relative Brightness to AAV5
0,AAV.CAP-A4,0.136621,0.956231,1.333111
1,AAV5,0.102483,0.717293,1.0
2,AAV9,0.142874,1.0,1.39413


In [15]:
pop_AAV5 = df_brightness[df_brightness['Virus'] == 'AAV5']
pop_AAV9 = df_brightness[df_brightness['Virus'] == 'AAV9']
pop_CAPA4 = df_brightness[df_brightness['Virus'] == 'AAV.CAP-A4']

In [16]:
stat_5_9 = stats.ttest_ind(pop_AAV5['Brightness Median'], pop_AAV9['Brightness Median'], equal_var = False)
stat_A4_5 = stats.ttest_ind(pop_CAPA4['Brightness Median'], pop_AAV5['Brightness Median'], equal_var = False)
stat_A4_9 = stats.ttest_ind(pop_CAPA4['Brightness Median'], pop_AAV9['Brightness Median'], equal_var = False)

print("P values between groups are:\n AAV5 and AAV9: %f \n AAV5 and CAPA4: %f \n AAV9 and CAPA4: %f" % (stat_5_9[1], stat_A4_5[1], stat_A4_9[1]))

P values between groups are:
 AAV5 and AAV9: 0.005023 
 AAV5 and CAPA4: 0.004081 
 AAV9 and CAPA4: 0.531225
