# Analysis of BioTek plate reader growth curves

## Date: 2020/0711

(c) 2020 Manuel Razo. This work is licensed under a [Creative Commons Attribution License CC-BY 4.0](https://creativecommons.org/licenses/by/4.0/). All code contained herein is licensed under an [MIT license](https://opensource.org/licenses/MIT)

---

In [1]:
import os
import itertools
import git

# Our numerical workhorses
import numpy as np
import scipy as sp
import scipy.signal
import pandas as pd

# Import matplotlib stuff for plotting
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import matplotlib as mpl

# Seaborn, useful for graphics
import seaborn as sns

# Import Interactive plot libraries
import bokeh.plotting
import bokeh.layouts
from bokeh.themes import Theme
import holoviews as hv
import hvplot
import hvplot.pandas
import panel as pn

# Import the project utils
import fit_seq 

# This enables SVG graphics inline
%config InlineBackend.figure_format = 'retina'

bokeh.io.output_notebook()
hv.extension('bokeh')

In [2]:
# Set PBoC plotting format
fit_seq.viz.pboc_style_mpl()
# Increase dpi
mpl.rcParams['figure.dpi'] = 110

# Set PBoC style for plot
theme = Theme(json=fit_seq.viz.pboc_style_bokeh())
hv.renderer('bokeh').theme = theme

## Purpose
The purpose of this experiment is to address the lack of reproducibility across
wells in the plate. With a single strain in the same media the objective was to
observe variability in the growth curves as a function of the position in a 
96-well plate

## Strain Information

| Plasmid | Genotype | Host Strain | Shorthand |
| :------ | :------- | ----------: | --------: |
| `pZS4*5-CFP`| `galK<>25O1+11-tetA-C51m` |  HG105 |`WT - CFP` |

### Run metadata

In [3]:
# Find home directory for repo
repo = git.Repo("./", search_parent_directories=True)
homedir = repo.working_dir

# Find date
workdir = os.getcwd().split('/')[-1]
DATE = int(workdir.split('_')[0])
RUN_NO = int(workdir.split('_')[1][-1])

## Per-well growth rate analysis

Let's begin by importing the growth rates as inferred with the Gaussian process method. We will start with the per-well analysis.

In [4]:
df_gp = pd.read_csv(
    f"./output/{DATE}_r{RUN_NO}_gp_per_well.csv", index_col=False
)
# Specify row and columns
df_gp["row"] = [x[0] for x in df_gp.well]
df_gp["col"] = [x[1::] for x in df_gp.well]


def sort_by_time(df, time="time_min"):
    """
    Function to sort each well data by time
    """
    return df.sort_values(by=time)


# Apply function and then apply drop level to remove resulting multiindex
df_gp = df_gp.groupby("well").apply(sort_by_time).droplevel(level=0)

df_gp.head()

Unnamed: 0,time_min,temp_C,OD600,strain,well,media,pos_selection,promoter,volume_marker,date,run_number,gp_OD600,gp_OD600_std,gp_growth_rate,gp_growth_rate_std,gp_doubling_time,gp_doubling_time_std,row,col
0,24.166667,37.0,0.066,WTlac_T_CFP,A01,M9_0.5%_glucose,1.5_ugmL_tetracycline,WTlac,CFP,20200711,1,0.064239,0.014977,5e-05,0.000127,3811.71,237722.825515,A,1
1,49.166667,37.1,0.068,WTlac_T_CFP,A01,M9_0.5%_glucose,1.5_ugmL_tetracycline,WTlac,CFP,20200711,1,0.065744,0.013186,7.7e-05,0.000104,4914.54,156006.114461,A,1
2,74.166667,37.0,0.07,WTlac_T_CFP,A01,M9_0.5%_glucose,1.5_ugmL_tetracycline,WTlac,CFP,20200711,1,0.06862,0.013027,0.000103,8.2e-05,5545.725,282554.842684,A,1
3,99.166667,37.0,0.072,WTlac_T_CFP,A01,M9_0.5%_glucose,1.5_ugmL_tetracycline,WTlac,CFP,20200711,1,0.070954,0.012735,0.00012,6.4e-05,5557.85,284176.052629,A,1
4,124.166667,37.1,0.074,WTlac_T_CFP,A01,M9_0.5%_glucose,1.5_ugmL_tetracycline,WTlac,CFP,20200711,1,0.073509,0.012741,0.000134,4.9e-05,5166.33,41050.043019,A,1


### Whole-plate growth curves

Let's quickly take a look at all raw data from each well. This is just a rough look at the kind of data we are looking at.

In [5]:
hv.output(size=50)
# Generate hvplot
df_gp.hvplot(
    x="time_min",
    y="OD600",
    row="col",
    col="row",
    xlabel="time (min)",
    ylabel="OD600",
    xticks=3,
    yticks=3,
)

### Whole-plate growth rates

Now let's take a look at all of the growth rates.

In [6]:
hv.output(size=50)
# Generate hvplot
df_gp.hvplot(
    x="time_min",
    y="gp_growth_rate",
    row="col",
    col="row",
    xlabel="time (min)",
    ylabel="λ (min\u207B\u00B9)",
    xticks=3,
    yticks=3,
)

### Per-well growth curves (with `HoloViews`)

These measurements are really noisy, especially at the beginning of the growth curves. Let's take a look at the individual trajectories. For this we will use `HoloViews` rather than `hvplot` to quickly get an interactive widget with which change the curve.

In [None]:
# Generate curves per well with dropdown menu
hv_OD = hv.Curve(
    data=df_gp,
    kdims=[("time_min", "time (min)",), ("OD600", "OD600")],
    vdims=["well"],
).groupby("well")

hv_gr = hv.Curve(
    data=df_gp,
    kdims=[
        ("time_min", "time (min)",),
        ("growth_rate", "growth rate (min\u207B\u00B9)"),
    ],
    vdims=["well"],
).groupby("well")

# Generate layout for plots on top of each other
hv_layout = hv.Layout(
    hv_OD.opts(width=800, height=400, xlabel="")
    + hv_gr.opts(width=800, height=400)
).cols(1)
hv_layout

### grouped curves (with `Panel`)

Another way to display these data is by grouping by the strain and the selection they grew in rather than by single well to account for technical replicates.

In [31]:
# Well selector`
row_select = pn.widgets.CrossSelector(
    name='rows',  
    options=list(np.unique([x[0] for x in df_gp.well]))
)
column_select = pn.widgets.CrossSelector(
    name='columns',  
    options=list(np.unique([x[1::] for x in df_gp.well]))
)
color_by = pn.widgets.RadioBoxGroup(
    name='color by',
    options=['Row', 'Column'],
    inline=True)

# Generate function to plot the data
@pn.depends(
    row_select.param.value,
    column_select.param.value,
    color_by.param.value,
)
def plot_groups(row_select, column_select, color_by):
    """
    Function to generate interactive plot
    """
    # Initialize list to save plots
    od_plots = list()
    gr_plots = list()
    
    # Generate all pairs of groups
    wells = list(itertools.product(row_select, column_select))
    wells = [well[0] + well[1] for well in wells]
    
    if color_by == "Row":
        # Define colors for groups
        colors = bokeh.palettes.Category20_12[0:len(row_select)]
        # Set colors in dictionary
        col_dict = dict(zip(row_select, colors))
    elif color_by == "Column":
        # Define colors for groups
        colors = bokeh.palettes.Category20_12[0:len(column_select)]
        # Set colors in dictionary
        col_dict = dict(zip(column_select, colors))
    
    # Check if groups are being selected
    if len(wells) > 0:
        # Initialize figure for OD600
        od_fig = bokeh.plotting.figure(
            width=800,
            height=250,
            x_axis_label="",
            y_axis_label="OD600",
            toolbar_location="above",
            tools=["box_zoom", "pan", "wheel_zoom", "reset"],
        )
        # Add legend outside plot
        od_fig.add_layout(bokeh.models.Legend(), 'right')
        
        # Initialize figure for growth rate
        gr_fig = bokeh.plotting.figure(
            width=800,
            height=250,
            x_axis_label="time (min)",
            y_axis_label="growth rate",
            toolbar_location=None,
            x_range=od_fig.x_range,
            tools=""
        )
        
        # Loop through groups and plot each group
        for well in wells:
            # Extract data
            data = df_gp[(df_gp["well"] == well)].sort_values(by="time_min")

            # Declare bokeh data source
            source = bokeh.models.ColumnDataSource(data)
            
            # Define color
            if color_by == "Row":
                color = col_dict[well[0]]
            elif color_by == "Column":
                color = col_dict[well[1::]]
                
            # Plot growth curve
            od_fig.line(
                x="time_min",
                y="OD600",
                source=source,
                line_width=1.5,
                color=color,
            )
            # Plot growth curve
            gr_fig.line(
                x="time_min",
                y="gp_growth_rate",
                source=source,
                line_width=1.5,
                color=color,
            )
                
        # Apply PBoC format
        fit_seq.viz.pboc_single(od_fig)
        fit_seq.viz.pboc_single(gr_fig)
        
        return pn.pane.Bokeh(bokeh.layouts.column([od_fig, gr_fig]))

pn.Column(
    row_select,
    column_select,
    color_by,
    plot_groups,
)

## Conclusions

From the data we can see that the usual range of wells we use C03-F10 or even B03-G10 are in the clear for reproducible analysis. 
This might not be the best concentration of tetracycline to have tested. Probably 1 µg/mL would have been a better choice.