In [1]:
import numpy as np
import pandas as pd
import scipy.special

import bokeh.io
import bokeh.plotting

bokeh.io.output_notebook()

Tuning number of RNAP:

In [2]:
# make a range of polymerase copy number values to plot
P = np.floor(np.linspace(1,10000,200))

# parameters needed for p_bound
delta_e_P1 = -2.9 #kBT
delta_e_T7 = -8.1 #kBT
N_NS = 4.6E6 #bp

# compute p_bound from range of P values and parameters
p_bound_P1 = 1 / (1 + N_NS/P * np.exp(delta_e_P1))
p_bound_T7 = 1 / (1 + N_NS/P * np.exp(delta_e_T7))

In [3]:
p = bokeh.plotting.figure(
    frame_height=250,
    frame_width=350,
    x_axis_label='number of RNA polymerase molecules',
    y_axis_label='probability RNAP bound',
    y_axis_type="log",
)

p.line(
    x=P,
    y=p_bound_P1,
    line_join='bevel',
    line_width=2,
    line_color='red',
    legend_label="lac P1",
)

p.line(
    x=P,
    y=p_bound_T7,
    line_join='bevel',
    line_width=2,
    line_color='blue',
    legend_label="T7 A1"
)

p.legend.location = "bottom_right"

bokeh.io.show(p)

Instead, let's tune the binding energy, which is more relevant for our situation of changing sequences:

In [32]:
# make a range of polymerase copy number values to plot
e = np.linspace(-5, -1, 400)

# parameters needed for p_bound
P = 3000
N_NS = 4.6E6 #bp

# compute p_bound from range of e values and parameters
p_bound = 1 / (1 + N_NS/P * np.exp(e))

In [33]:
p = bokeh.plotting.figure(
    frame_height=250,
    frame_width=350,
    x_axis_label='binding energy (kBT)',
    y_axis_label='probability RNAP bound',
    y_axis_type="linear",
)

p.line(
    x=e,
    y=p_bound,
    line_join='bevel',
    line_width=2,
    line_color='red',
)


bokeh.io.show(p)

Now let's consider our data of binding energy and growth rates, computing p_bound from the binding energy.

In [34]:
data = pd.read_csv("./growth_rates.txt", sep=",")
data["p bound"] =  1 / (1 + N_NS/P * np.exp(data["binding energy (kBT)"]))
data

Unnamed: 0,binding energy (kBT),growth rate (per hr),strain,p bound
0,-1.43,0.087,RandSeq3 unevolved,0.002718
1,-3.31,0.242,RandSeq3 evolved,0.017546
2,-4.04,0.341,RandSeq29,0.035736


Just for reference, let's place these strains on the plot from before.

In [35]:
p = bokeh.plotting.figure(
    frame_height=250,
    frame_width=350,
    x_axis_label='binding energy (kBT)',
    y_axis_label='probability RNAP bound',
    y_axis_type="linear",
)

p.line(
    x=e,
    y=p_bound,
    line_join='bevel',
    line_width=2,
    line_color='red',
)

p.circle(
    x=data['binding energy (kBT)'],
    y=data['p bound']
)


bokeh.io.show(p)

Let's now see how p_bound and growth rate are related from our data:

In [36]:
p = bokeh.plotting.figure(
    frame_height=250,
    frame_width=350,
    y_axis_label='growth rate (per hr)',
    x_axis_label='probability RNAP bound',
    y_axis_type="linear",
)


p.circle(
    x=data['p bound'],
    y=data['growth rate (per hr)']
)

bokeh.io.show(p)

Let's fit a line to this data:

In [37]:
m, b = np.polyfit(data['p bound'], data['growth rate (per hr)'], deg=1)
x = np.linspace(0.0001,0.04,200)
y = m * x + b
print(m,b)

7.6088459999403515 0.08130010110490518


In [38]:
p = bokeh.plotting.figure(
    frame_height=250,
    frame_width=350,
    y_axis_label='growth rate (per hr)',
    x_axis_label='probability RNAP bound',
    y_axis_type="linear",
)


p.circle(
    x=data['p bound'],
    y=data['growth rate (per hr)']
)

p.line(
    x=x,
    y=y
)

bokeh.io.show(p)

Let's put it all together now. Below is Tiba's code for parsing an energy matrix:

In [39]:
# read in the energy matrix
data = pd.read_csv("../../data/brewster_matrixS2.txt", sep=" ", comment="#", header=None)
data = data[5: -6] #trimming matrix to 30 bp
data = data.reset_index(drop=True)
data.columns = ['A','C','G','T']
data.head()

Unnamed: 0,A,C,G,T
0,0.305961,0.681616,0.36014,-0.313427
1,0.122283,0.247441,0.171605,-0.313427
2,1.500683,1.490967,-0.313427,0.633869
3,-0.313427,1.032246,-0.138758,0.699062
4,1.064641,-0.214039,1.119622,-0.313427


In [40]:
RandSeq1 = "ATAGGAGCGTCATCAAACGCGCCGTTCAGGTTCTGGTTCTCCATGCTATAGTTAAGCCGCACAACGGGTACTACCACTCCCTGTAGTCCGCTTTACCGTTCTC"
RandSeq1_trimmed = 'CGTTCAGGTTCTGGTTCTCCATGCCATAGT'

In [41]:
def energy(sequence):
    """
    Input:
         sequence: 30 bp for the promoter region
    Output:
        total_energy: the total energy for the given sequence in K_bT"""
    #Initializing the counter for the total energy.
    total_energy = 0
    
    #Adds the energy value for each base together for the entire sequence
    for position, letter in enumerate(sequence):
        #Determines the energy for a given position and base pair location using the energy matrix
        energy_of_base = data.loc[position,letter]
        total_energy += energy_of_base
        
    return(total_energy)

And finally, let's write a growth rate function that will give the predicted growth rate from a given sequence.

In [22]:
def growth_rate(sequence, m=7.61, b=0.0813):
    """
    Given a sequnence, returns the growth rate (in units of hr^-1)
    Uses the prediced biding energy of the sequence and the linear relationship
    between probability bound and growth rate (as paramaterized by m and b)
    """
    e = energy(sequence)
    p_bound = 1 / (1 + N_NS/P * np.exp(e))
    growth_rate = m*p_bound + b
    return growth_rate
    

In [42]:
# unevovled RandSeq1
growth_rate('CGTTCAGGTTCTGGTTCTCCATGCCATAGT')

0.11139560114884847

In [45]:
# evovled RandSeq1
growth_rate('CGTTCAGGTTCTGGTTCTCCATGCTATAGT')

0.17146827234507406