# Image analysis of expression level of three promoters

c) 2019 Manuel Razo. This work is licensed under a 
[Creative Commons Attribution License CC-BY 4.0](https://creativecommons.org/licenses/by/4.0/). 
All code contained herein is licensed under an 
[MIT license](https://opensource.org/licenses/MIT).

In [14]:
import os
import git

# Our numerical workhorses
import numpy as np
import scipy as sp
import pandas as pd

# Import Interactive plot libraries
import bokeh.plotting
import bokeh.layouts
from bokeh.themes import Theme
import holoviews as hv
import hvplot
import hvplot.pandas
import panel as pn
import bokeh_catplot

# Import project package
import fit_seq

bokeh.io.output_notebook()
hv.extension('bokeh')

In [3]:
# Extract theme from project library
theme = Theme(json=fit_seq.viz.pboc_style_bokeh())

# Set PBoC style for holoviews
hv.renderer('bokeh').theme = theme

# Set PBoC style for Bokeh
bokeh.io.curdoc().theme = theme

| | |
|-|-|
| __Date__ | 2020-07-18 |
| __Equipment__ | Tenjin Nikon Microscope |
| __User__ | mrazomej |

# Description
The objective of this experiment was to quickly test if the expression level of
the three promoters not only follows the expected rank order, but if it is in
agreement with the predictions from the Sort-Seq energy matrix and the
thermodynamic model.

## Strain infromation
| Genotype | plasmid | Host Strain | Shorthand |
| :------- | :------ | :---------- | :-------- |
| `none` | `pZS4-mCherry` | HG105 | `auto-mCh` |
| `galK<>2*lacUV5-tetA-C51m` | `pZS4-mCherry` | HG105 | `UV5-mCh` |
| `galK<>2*WTlac-tetA-C51m` | `pZS4-mCherry` | HG105 | `WT-mCh` |
| `galK<>2*3.19kBT-tetA-C51m` | `pZS4-mCherry` | HG105 | `3.19-mCh` |
| `none` | `pZS4-CFP` | HG105 | `auto-CFP` |
| `galK<>2*lacUV5-tetA-C51m` | `pZS4-CFP` | HG105 | `UV5-CFP` |
| `galK<>2*WTlac-tetA-C51m` | `pZS4-CFP` | HG105 | `WT-CFP` |
| `galK<>2*3.19kBT-tetA-C51m` | `pZS4-CFP` | HG105 | `3.19-CFP` |

The images were automatically segmented in the `image_processing.py` notebook. Let's read the output of this analysis into memory.

In [23]:
# Read segmented images into memory
df = pd.read_csv("./output/20200718_raw_segmentation.csv")

print(df.shape)
df.head()

(4706, 8)


Unnamed: 0,area,eccentricity,solidity,mean_intensity,date,username,promoter,volume_marker
0,2.480075,0.954723,0.94373,2742.045997,20200718,mrazomej,auto,mCherry
1,2.028,0.945876,0.924855,2776.589583,20200718,mrazomej,auto,mCherry
2,1.998425,0.946719,0.947896,2817.976744,20200718,mrazomej,auto,mCherry
3,2.25615,0.949699,0.936842,2745.93633,20200718,mrazomej,auto,mCherry
4,1.76605,0.918067,0.981221,2828.901914,20200718,mrazomej,auto,mCherry


Let's apply size and eccentricity filters.

In [27]:
# Apply the area and eccentricity bounds.
df_filt = df[
    (df.area > 0.5) & (df.area < 6.0) & (df.eccentricity > 0.8)
]

print(df_filt.shape)
df_filt.head()

(4637, 8)


Unnamed: 0,area,eccentricity,solidity,mean_intensity,date,username,promoter,volume_marker
0,2.480075,0.954723,0.94373,2742.045997,20200718,mrazomej,auto,mCherry
1,2.028,0.945876,0.924855,2776.589583,20200718,mrazomej,auto,mCherry
2,1.998425,0.946719,0.947896,2817.976744,20200718,mrazomej,auto,mCherry
3,2.25615,0.949699,0.936842,2745.93633,20200718,mrazomej,auto,mCherry
4,1.76605,0.918067,0.981221,2828.901914,20200718,mrazomej,auto,mCherry


In order to compare the measurements we need to compute two things:
1. Multiply mean_intensity $\times$ area to get total fluorescent counts within the cell.
2. Subtract the mean autofluorescence for each volume marker.

In [28]:
# Add total intensity column
df_filt = df_filt.assign(intensity=df_filt["mean_intensity"] * df_filt["area"])

# Define function to extract mean autofluorescence
def subtract_auto(data):
    # Extract autofluorescence data
    auto = data[data["promoter"] == "auto"]
    # Compute mean autofluorescence
    mean_auto = auto["intensity"].mean()
    # Subtract mean autofluorescence and return df_filt
    return data.assign(intensity_bgsub=data["intensity"] - mean_auto)

# Group data by volume marker
df_filt = df_filt.groupby("volume_marker").apply(subtract_auto)

# Drop extra index level
df_filt = df_filt.droplevel(0, axis=0).reset_index()

df_filt.head()

Unnamed: 0,index,area,eccentricity,solidity,mean_intensity,date,username,promoter,volume_marker,intensity,intensity_bgsub
0,1138,2.27305,0.960151,0.945518,2840.540892,20200718,mrazomej,auto,CFP,6456.691475,-500.324141
1,1139,2.032225,0.956512,0.948718,2889.081081,20200718,mrazomej,auto,CFP,5871.2628,-1085.752816
2,1140,2.632175,0.966652,0.834003,3128.725522,20200718,mrazomej,auto,CFP,8235.3531,1278.337484
3,1141,4.08135,0.870526,0.673171,3232.980331,20200718,mrazomej,auto,CFP,13194.924275,6237.908659
4,1142,2.133625,0.946099,0.926606,3088.015842,20200718,mrazomej,auto,CFP,6588.6678,-368.347816


Let's look at the intensity ECDFs for each of the strains

In [32]:
# Initialize bokeh plots for each volume marker
pmch = bokeh.plotting.figure(
    width=400,
    height=300,
    x_axis_label="intensity (a.u.)",
    y_axis_label="ECDF",
    title="mCherry"
)
pcfp = bokeh.plotting.figure(
    width=400,
    height=300,
    x_axis_label="intensity (a.u.)",
    y_axis_label="ECDF",
    title="CFP"
)

# Plot ECDF for each volume marker
bokeh_catplot.ecdf(
    data=df_filt[df_filt["volume_marker"] == "mCherry"],
    cats="promoter",
    val="intensity_bgsub",
    p=pmch
)
bokeh_catplot.ecdf(
    data=df_filt[df_filt["volume_marker"] == "CFP"],
    cats="promoter",
    val="intensity_bgsub",
    p=pcfp
)

# Show plots
bokeh.io.show(bokeh.layouts.row([pmch, pcfp]))

Let's plot the mean intensity as a function of the predicted binding energy.

In [51]:
# Define binding energies
energy_dict = {
    "lacUV5": -7.93,
    "WTlac": -5.19,
    "3.19kBT": -3.19,
    "auto": np.nan,
}

# Add energy column to dataframe
df_filt = df_filt.assign(
    energy_kBT=[energy_dict[x] for x in df_filt["promoter"]]
)

# Define colors for volume markers
colors = {
    "mCherry": bokeh.palettes.Reds8,
    "CFP": bokeh.palettes.Blues8,
}

# Group by volume marker and binding energy
df_group = df_filt[df_filt.promoter != "auto"].groupby(["energy_kBT", "volume_marker"])

# Initialize bokeh plot
p_energy = bokeh.plotting.figure(
    width=500,
    height=350,
    x_axis_label="energy (kʙT)",
    y_axis_label="<expression> (a.u.)",
)

# Loop through groups
for i, (group, data) in enumerate(df_group):
    # Define index (for color dictionary)
    if i > 2:
        i = i - 3
    # Plot mean fluorescence
    p_energy.scatter(
        x=group[0],
        y=data["intensity_bgsub"].mean(),
        color=colors[group[1]][i],
        marker="o",
        size=8,
    )
    
bokeh.io.show(p_energy)

## Conclusion

The measurements were too close to the autofluorescent strains. These were done using the YFP filter cube, but the strains have a sfGFP construct. The scaling of the mCherry strains looks like is going in the right direction, but it is hard to conclude anything from this data given the wrong filter cube.