# Estimates - Protein translation, number of ribosomes
(c) 2020 The Authors. All creative work is published under a [CC-BY 4.0](https://creativecommons.org/licenses/by/4.0/) permissive license. All software is released under a standard [MIT](https://opensource.org/licenses/MIT) license. **This software is provided as-is and may be subject to change.**

In [119]:
import numpy as np
import pandas as pd
import panel as pn 
import altair as alt
import tqdm
import prot.viz
prot.viz.altair_theme()
pn.extension('vega')
_ = alt.data_transformers.enable('default')

In this notebook, we provide a series of estimates for the copy numbers of various protein complexes needed to support bacterial cell doubling. The aim is to provide some expectation about how these numbers may vary with growth rate and then to compare to available experimental data.


# Load in data 

In [120]:
# Load the complex subunit counts. 
subunits = pd.read_csv('../../data/compiled_annotated_complexes.csv')

# # Load the compiled data 
data = pd.read_csv('../../data/compiled_absolute_measurements.csv')
# data['gene_name'] = data['gene_name'].str.lower()

# Compute the minimum number of complexes. 
complex_count = subunits.groupby(['dataset', 'dataset_name', 'condition', 'growth_rate_hr', 'complex_annotation', 'complex'])['n_units'].mean().reset_index()

In [121]:
annotated_cplxs = pd.read_csv('../../data/ecocyc_raw_data/annotated_complexes.csv')
ribosome_genes = annotated_cplxs[annotated_cplxs.complex == 'CPLX0-3964'].gene_name.unique()

In [122]:
len(annotated_cplxs[annotated_cplxs.complex == 'CPLX0-3964'].gene_name.unique())

55

# Limits of protein synthesis

Given the requirement for the ribosomes to replicate themselves and all other proteins in order for a cell to divide, it'll be insightful to look at the maximum rate of duplication at all the growth rates we're considering. One hypothesis is that the growth rate of bacterium is going to be set by the duplication time required for a complete ribosome complex, plus some time required to also replicate all the other proteins in the cell. It's not clear to me how this might playout at different growth rates and relative fractions of ribosomal protein, so lets take a look.


Given some number of ribosomes and some number of proteins, lets try to estimate the time required to replicate a cell. Use numbers from data at each growth rate. So our baseline time will be the time required to replicate itself. 

The time to replicate non-ribosomal proteins = total number of amino acids / (rate of translation (aa/sec . ribosome) * number of ribosomes)). 

The total number of amino acids is going to be given from the mass of non-ribosomal protein and average molecular weight of an amino acid,

$f$ / (100 g/mol) * $N_A$

Assume translation rate is 20 aa/sec.

In [123]:

ribo_limit_df = pd.DataFrame()
for c, d in data.groupby(['dataset', 'condition', 'growth_rate_hr']):
    frac_ribo = d[d['go_terms'].astype(str).str.contains('GO:0005840')].fg_per_cell.sum()/ d.fg_per_cell.sum()
#     frac_ribo = d[d['cog_category'] == 'translation, ribosomal structure and biogenesis'].fg_per_cell.sum()/ d.fg_per_cell.sum()
#     N_aa = d[~d['go_terms'].astype(str).str.contains('GO:0005840')].fg_per_cell.sum() * 1E-15 * 6.022E23 / 100
#     N_aa = d.fg_per_cell.sum() * 1E-15 * 6.022E23 / 110
    N_aa = d[~d['go_terms'].astype(str).str.contains('GO:0005840')].fg_per_cell.sum() * 1E-15 * 6.022E23 / 110
    N_ribosome = complex_count[(complex_count.dataset == c[0]) & \
                              (complex_count.condition == c[1]) & \
                              (complex_count.growth_rate_hr == c[2]) &
                              (complex_count.complex == 'CPLX0-3964')].n_units.values[0]

    t = ((N_aa / (20.0 * N_ribosome)) /60)
    lambda_max = np.log(2) / (t / 60)

    
    data_list = {'frac_ribo' : frac_ribo,
                'N_aa' : N_aa,
                'N_ribosome' : N_ribosome,
                't_min' : t,
                'dataset' : c[0],
                'condition' : c[1],
                'growth_rate_hr' : c[2],
                 'cell_cycle_t' : 60 * (np.log(2) / c[2]),
                'lambda_max' : lambda_max}
    ribo_limit_df = ribo_limit_df.append(data_list,
                                        ignore_index = True)
    

In [124]:
p = alt.Chart(ribo_limit_df).mark_point().encode(
        x = alt.X('growth_rate_hr:Q', scale = alt.Scale(domain=[0,2.6]), title = 'growth rate (hr-1)'),
        y = alt.Y('lambda_max:Q', scale = alt.Scale(domain=[0,2.6]), title = 'maximum growth rate, translation limited (hr-1)'),
    color = alt.Color('dataset:N'),
    tooltip = ['dataset', 'condition', 'lambda_max', 'growth_rate_hr', 'frac_ribo' ]
    )

# boundary for translation rate given number of ribosome
source = pd.DataFrame({'x' : np.linspace(0,2.6, 200), 
                       'y1' : np.ones(len(np.linspace(0,2.6, 200))) * 0}, columns = ['x', 
                                                                  'y1'])
trans_max = alt.Chart(source).mark_area(opacity=0.2, line=False, strokeOpacity = 0).encode(
    x='x:Q',
    y='y1:Q',
    y2='x:Q' 
)

# plot everything together
alt.layer(
    trans_max, p,
).configure_axis(
    labelFontSize=16,
    titleFontSize=16
    )


In [125]:
# main plot
p = alt.Chart(ribo_limit_df).mark_point().encode(
        x = alt.X('cell_cycle_t:Q', scale = alt.Scale(domain=[0,400.0]), title = 'measured cell cycle time (min.)'),
        y = alt.Y('t_min:Q',scale = alt.Scale(domain=[0,400]), title = 'cell cycle time, translation limited (min.)'),
        color = alt.Color('frac_ribo:Q',  scale=alt.Scale(scheme='viridis'), title = 'ribosome mass fraction'),
        tooltip = ['dataset', 'condition', 'lambda_max', 'growth_rate_hr', 'frac_ribo' ])

# boundary for making a ribosome (and everything else needed to translate)
source = pd.DataFrame({'x' : np.linspace(0,14.2, 400), 
                       'y1' : np.zeros(len(np.arange(0,400))),
                       'y2' : np.ones(len(np.arange(0,400))) * 400}, 
                      columns = ['x', 'y1', 'y2'])

ribo_max = alt.Chart(source).mark_area(opacity=0.4, line=False, strokeOpacity = 0).encode(
    x='x:Q',
    y='y1:Q',
    y2='y2:Q',
)

# boundary for translation rate given number of ribosome
source = pd.DataFrame({'x' : np.arange(0,400), 
                       'y2' : np.ones(len(np.arange(0,400))) * 400}, columns = ['x', 
                                                                  'y2'])
trans_max = alt.Chart(source).mark_area(opacity=0.2, line=False, strokeOpacity = 0).encode(
    x='x:Q',
    y='x:Q',
    y2='y2:Q' 
)

# plot everything together
alt.layer(
    ribo_max, trans_max, p
).configure_axis(
    labelFontSize=16,
    titleFontSize=16
    )


In [126]:
p = alt.Chart(ribo_limit_df).mark_point().encode(
        x = alt.X('cell_cycle_t:Q', scale = alt.Scale(domain=[0,400.0]), title = 'measured cell cycle time (min.)'),
        y = alt.Y('t_min:Q',scale = alt.Scale(domain=[0,400]), title = 'cell cycle time, translation limited (min.)'),
    color = alt.Color('dataset:N'),
    tooltip = ['condition']
    )

source = pd.DataFrame({'x' : np.arange(0,400), 
                       'y2' : np.ones(len(np.arange(0,400))) * 400}, columns = ['x', 
                                                                  'y2'])
line = alt.Chart(source).mark_area(opacity=0.2, line=False, strokeOpacity = 0).encode(
    x='x:Q',
    y='x:Q',
    y2='y2:Q',
    
)

alt.layer(
    p, line
).configure_axis(
    labelFontSize=16,
    titleFontSize=16
    )



From the plots above, it actually doesn't appear that the slower growth rates (longer cell cycle times) are translation limited. Indeed, we know from other work that the rate of translation apparently slows down at slower growth rates. So one hypothesis is that the cell is getting closer to the translation-limited regime, but otherwise, appears to be limited by something else (and in this case, my guess is the nutrient). 

From the data, it should also be possible to estimate the effective translation rate from each data point (sort of the inverse of what I've done.). Lets take a look.

In [127]:

elangation_rate_est_df = pd.DataFrame()
for c, d in data.groupby(['dataset', 'condition', 'growth_rate_hr']):
    frac_ribo = d[d['go_terms'].astype(str).str.contains('GO:0005840')].fg_per_cell.sum()/ d.fg_per_cell.sum()
#     frac_ribo = d[d['cog_category'] == 'translation, ribosomal structure and biogenesis'].fg_per_cell.sum()/ d.fg_per_cell.sum()
#     N_aa = d[~d['go_terms'].astype(str).str.contains('GO:0005840')].fg_per_cell.sum() * 1E-15 * 6.022E23 / 100
    N_aa = d.fg_per_cell.sum() * 1E-15 * 6.022E23 / 110
    N_ribosome = complex_count[(complex_count.dataset == c[0]) & \
                              (complex_count.condition == c[1]) & \
                              (complex_count.growth_rate_hr == c[2]) &
                              (complex_count.complex == 'CPLX0-3964')].n_units.values[0]

    t = (np.log(2) / c[2]) * 60

    e_rate = (N_aa / (  t * N_ribosome ) )/60

    data_list = {'frac_ribo' : frac_ribo,
                'N_aa' : N_aa,
                'N_ribosome' : N_ribosome,
                't_min' : t,
                'dataset' : c[0],
                'condition' : c[1],
                'growth_rate_hr' : c[2],
                 'e_rate' :e_rate}
    elangation_rate_est_df = elangation_rate_est_df.append(data_list,
                                        ignore_index = True)



In [128]:
alt.Chart(elangation_rate_est_df).mark_point().encode(
        x = alt.X('growth_rate_hr:Q', title = 'growth rate (hr-1)'),
        y = alt.Y('e_rate:Q', title = 'estimated translation rate (aa/sec.)'),
    color = alt.Color('dataset:N'),
    ).configure_axis(
    labelFontSize=16,
    titleFontSize=16
    )

As a next step, with strong evidence from literature that the translation rate is indeed decreasing at slow growth conditions, lets try to take this into account. Ultimately, this seems to suggest that the cell cannot take advantage of its ribosomal capacity (since it should otherwise use it to grow faster), and I think we need to try to pinpoint the source of this. But for now it is useful to just see how the predicted growth rate compares to the measured rates when we take this into account. Specifically, Dai et al. 2016 find a clear Michaelis-Menten relationship between the elongation rate and RNA/protein ratio (which can also be related to total ribosomal mass). 
First, since I don't have easy access to RNA/protein ratio, we are going to make use of a linear expectation between RNA/protein ratio and ribosomal (r-protein) abundance. They consider this in Supplementary Figure 8, which appears to give (0.2105 - 0.0515) / (0.508 - 0.121) = 0.411 relative r-protein per RNA/protein ratio (or 1/0.411 = 2.4 slope reported in caption). 

To keep consistent with their notation, use $k$ for elongation rate in aa/s instead of $\tau_r$. In their model, they propose that elongation rate be divided into two coarse-grained timescales: A) binding of the ternary complex (TC) to the ribosome (TCs, comprising aminoacyl-tRNA, elongation factor Tu (EF-Tu) and guanosine triphosphate (GTP)), which depends inversely on the effective TC concentration $[TC_{eff}]$ due to limitation on TC diffusion and, B) other enzymatic processes (for example, translocation), the rate of which does not depend on TC concentration. If we let these two timescales be 1/($k_{on} \cdot [TC_{eff}]$) and 1/$k_{elong}$, then the elongation rate is given by,

\begin{equation}
\frac{1}{k} = \frac{1}{k_{on} \cdot [TC_{eff}]} + \frac{1}{k_{elong}} 
\end{equation}
where $k_{elong}$ is the maximal rate of peptide elongation and $k_{elong}/k_{on}$ is the binding constant of the effective TC with the ribosome. Further taking $[TC_{eff}]$ is proportional to the RNA/protein ratio,

\begin{equation}
[TC_{eff}] = C \cdot (R/P), 
\end{equation}

They find that  $k_{elong}$ = 22 aa/s, $k_{on}$ = 6.4 $\mu M^{-1}s^{-1}$, and $C$ = 31 $\mu M$.  So in summary, with (R/P) = r-protein fraction x 0.411, we have,

\begin{equation}
\frac{1}{k} = \frac{1}{k_{on} \cdot C \cdot r-protein fraction / 0.411} + \frac{1}{k_{elong}} 
\end{equation}

One important point to note is that I don't know exactly what in considered the r-protein. Looking into their Supplemental Table 9, it looks like it's just the core-ribosomal subunits. 

Dai, X., Zhu, M., Warren, M., Balakrishnan, R., Patsalo, V., Okano, H., et al. (2016). Reduction of translating ribosomes enables Escherichia coli to maintain elongation rates during slow growth. Nature Microbiology, 2(2), 16231. http://doi.org/10.1038/nmicrobiol.2016.231

In [247]:
ribo_limit_df_2 = pd.DataFrame()
for c, d in data.groupby(['dataset', 'condition', 'growth_rate_hr']):
    frac_ribo = d[d['go_terms'].astype(str).str.contains('GO:0005840')].fg_per_cell.sum()/ d.fg_per_cell.sum()
    N_aa = d[~d['go_terms'].astype(str).str.contains('GO:0005840')].fg_per_cell.sum() * 1E-15 * 6.022E23 / 110
    N_ribosome = complex_count[(complex_count.dataset == c[0]) & \
                              (complex_count.condition == c[1]) & \
                              (complex_count.growth_rate_hr == c[2]) &
                              (complex_count.complex == 'CPLX0-3964')].n_units.values[0]

    r_t = 1/ (1/(6.4 * 31*frac_ribo / 0.411) + 1/(22))
#     t_dai = ((N_aa / (tau_r * N_ribosome)) /60)
#     lambda_max_dai = np.log(2) / (t_dai / 60)
    lambda_max_dai = (np.log(2) * r_t * frac_ribo/ 7459) * 3600
    t_dai = 60 * np.log(2) / lambda_max_dai
    
    
#     t = ((N_aa / (20 * N_ribosome)) /60)
#     lambda_max = np.log(2) / (t / 60)
    lambda_max = (np.log(2) * 20.0 * frac_ribo/ 7459) * 3600
    t = 60 * np.log(2) / lambda_max

    data_list = {'frac_ribo' : frac_ribo,
                'N_aa' : N_aa,
                'N_ribosome' : N_ribosome,
                't_min' : t,
                 't_min_dai' : t_dai,
                'dataset' : c[0],
                'condition' : c[1],
                'growth_rate_hr' : c[2],
                 'cell_cycle_t' : 60 * (np.log(2) / c[2]),
                'lambda_max' : lambda_max,
                'lambda_max_dai' : lambda_max_dai}
    ribo_limit_df_2 = ribo_limit_df_2.append(data_list,
                                        ignore_index = True)
    

In [248]:
# main plot
p = alt.Chart(ribo_limit_df_2).mark_point().encode(
        x = alt.X('cell_cycle_t:Q', scale = alt.Scale(domain=[0,400.0]), title = 'measured cell cycle time (min.)'),
        y = alt.Y('t_min_dai:Q',scale = alt.Scale(domain=[0,400]), title = 'cell cycle time, translation limited (min.)'),
        color = alt.Color('dataset:N'),
        tooltip = ['dataset', 'condition', 'lambda_max', 'growth_rate_hr', 'frac_ribo' ])

# boundary for making a ribosome (and everything else needed to translate)
source1 = pd.DataFrame({'x' : np.linspace(0,14.2, 400), 
                       'y1' : np.zeros(len(np.arange(0,400))),
                       'y2' : np.ones(len(np.arange(0,400))) * 400}, 
                      columns = ['x', 'y1', 'y2'])

ribo_max = alt.Chart(source1).mark_area(opacity=0.4, line=False, strokeOpacity = 0).encode(
    x='x:Q',
    y='y1:Q',
    y2='y2:Q',
)

# boundary for translation rate given number of ribosome
source2 = pd.DataFrame({'x' : np.arange(0,400), 
                       'y2' : np.ones(len(np.arange(0,400))) * 400}, columns = ['x', 
                                                                  'y2'])
trans_max_dai = alt.Chart(source2).mark_area(opacity=0.2, line=False, strokeOpacity = 0).encode(
    x='x:Q',
    y='x:Q',
    y2='y2:Q' 
)

# plot everything together
dai_corr = alt.layer(
    ribo_max, trans_max_dai, p
)


In [249]:
# main plot
p = alt.Chart(ribo_limit_df_2).mark_point().encode(
        x = alt.X('cell_cycle_t:Q', scale = alt.Scale(domain=[0,400.0]), title = 'measured cell cycle time (min.)'),
        y = alt.Y('t_min:Q',scale = alt.Scale(domain=[0,400]), title = 'cell cycle time, translation limited (min.)'),
        color = alt.Color('dataset:N'),
        tooltip = ['dataset', 'condition', 'lambda_max', 'growth_rate_hr', 'frac_ribo' ])

# boundary for making a ribosome (and everything else needed to translate)
source = pd.DataFrame({'x' : np.linspace(0,14.2, 400), 
                       'y1' : np.zeros(len(np.arange(0,400))),
                       'y2' : np.ones(len(np.arange(0,400))) * 400}, 
                      columns = ['x', 'y1', 'y2'])

ribo_max = alt.Chart(source).mark_area(opacity=0.4, line=False, strokeOpacity = 0).encode(
    x='x:Q',
    y='y1:Q',
    y2='y2:Q',
)

# boundary for translation rate given number of ribosome
source = pd.DataFrame({'x' : np.arange(0,400), 
                       'y2' : np.ones(len(np.arange(0,400))) * 400}, columns = ['x', 
                                                                  'y2'])
trans_max = alt.Chart(source).mark_area(opacity=0.2, line=False, strokeOpacity = 0).encode(
    x='x:Q',
    y='x:Q',
    y2='y2:Q' 
)


ribo_limit_df_3 = pd.DataFrame()
for c, d in data[data.dataset == 'schmidt_2016'].groupby(['dataset', 'condition', 'growth_rate_hr']):
    frac_ribo = d[d['go_terms'].astype(str).str.contains('GO:0005840')].fg_per_cell.sum()/ d.fg_per_cell.sum()
    
    N_aa = d[~d['go_terms'].astype(str).str.contains('GO:0005840')].fg_per_cell.sum() * 1E-15 * 6.022E23 / 110
    N_ribosome = complex_count[(complex_count.dataset == c[0]) & \
                              (complex_count.condition == c[1]) & \
                              (complex_count.growth_rate_hr == c[2]) &
                              (complex_count.complex == 'CPLX0-3964')].n_units.values[0]

    tau_r = 1/ (1/(6.4 * 31*frac_ribo / 0.411) + 1/(22))
    t_dai = ((N_aa / (tau_r * N_ribosome)) /60)
    lambda_max_dai = np.log(2) / (t_dai / 60)
                 
    data_list = {'frac_ribo' : frac_ribo,
                'N_aa' : N_aa,
                'N_ribosome' : N_ribosome,
                 't_min_dai' : t_dai,
                'dataset' : c[0],
                'condition' : c[1],
                'growth_rate_hr' : c[2],
                 'cell_cycle_t' : 60 * (np.log(2) / c[2]),
                'lambda_max_dai' : lambda_max_dai}
    ribo_limit_df_3 = ribo_limit_df_3.append(data_list,
                                        ignore_index = True)
    

# plot everything together
constant_k = alt.layer(
    ribo_max, trans_max, p
)

In [250]:
(constant_k | dai_corr).configure_axis(
    labelFontSize=16,
    titleFontSize=16
    )

In [251]:
dai_corr.configure_axis(
    labelFontSize=16,
    titleFontSize=16
    )

Lastly, lets plot the  maximum growth rate as a function of the ribosomal fraction. 

In [265]:
p = alt.Chart(ribo_limit_df_2).mark_point().encode(
        x = alt.X('frac_ribo:Q', scale = alt.Scale(domain=[0,0.4]),
                  title = 'ribosomal fraction (by mass)'),
        y = alt.Y('growth_rate_hr:Q',scale = alt.Scale(domain=[0,2.5]),
                  title = 'growth rate (translation limited, hr-1)'),
    color = alt.Color('dataset:N')
                 
    )



lambda_max_dai_all = []
frac_ribo_all = np.linspace(0.01, 0.35, 5000)
for frac_ribo in np.linspace(0.01, 0.35, 5000):
    r_t = 1/ (1/(6.4 * 31*frac_ribo / 0.411) + 1/(22))
    lambda_max_dai_all = np.append(lambda_max_dai_all, (np.log(2) * r_t * frac_ribo/ 7459) * 3600)

source = pd.DataFrame({'max_growth_rate_hr' : lambda_max_dai_all, 
                       'frac_ribo' : frac_ribo_all}, columns = ['max_growth_rate_hr', 
                                                                  'frac_ribo'])

l = alt.Chart(source).mark_line().encode(
        x = alt.X('frac_ribo:Q', scale = alt.Scale(domain=[0,0.4]),
                  title = 'ribosomal fraction (by mass)'),
        y = alt.Y('max_growth_rate_hr:Q',scale = alt.Scale(domain=[0,2.5]),
                  title = 'growth rate (translation limited, hr-1)')
                 
    )

lambda_max_dai_all = []
frac_ribo_all = np.linspace(0.01, 0.35, 5000)
for frac_ribo in np.linspace(0.01, 0.35, 5000):
    r_t = 20
    lambda_max_dai_all = np.append(lambda_max_dai_all, (np.log(2) * r_t * frac_ribo/ 7459) * 3600)

source = pd.DataFrame({'max_growth_rate_hr' : lambda_max_dai_all, 
                       'frac_ribo' : frac_ribo_all}, columns = ['max_growth_rate_hr', 
                                                                  'frac_ribo'])

l2 = alt.Chart(source).mark_line(strokeDash = [2,2]).encode(
        x = alt.X('frac_ribo:Q', scale = alt.Scale(domain=[0,0.4]),
                  title = 'ribosomal fraction (by mass)'),
        y = alt.Y('max_growth_rate_hr:Q',scale = alt.Scale(domain=[0,2.5]),
                  title = 'growth rate (translation limited, hr-1)')
                 
    )

alt.layer(p,l, l2).configure_axis(
    labelFontSize=16,
    titleFontSize=16
    )


It would also be great if we could plot the contributions to the growth limits as we are seeing here. Lets start:

In [133]:
# # main plot
ribo_limit_df_ = ribo_limit_df_2[(ribo_limit_df_2.dataset == 'schmidt_2016')]
ribo_limit_df_ = ribo_limit_df_.append(ribo_limit_df_2[(ribo_limit_df_2.dataset == 'li_2014')])

p = alt.Chart(ribo_limit_df_).mark_point().encode(
        x = alt.X('cell_cycle_t:Q', scale = alt.Scale(domain=[0,400.0]), title = 'measured cell cycle time (min.)'),
        y = alt.Y('t_min_dai:Q',scale = alt.Scale(domain=[0,400]), title = 'cell cycle time, translation limited (min.)'),
        color = alt.Color('dataset:N'),
        tooltip = ['dataset', 'condition', 'lambda_max', 'growth_rate_hr', 'frac_ribo' ])

# boundary for making a ribosome (and everything else needed to translate)
source = pd.DataFrame({'x' : np.linspace(0,400, 400), 
                       'y1' : np.zeros(len(np.arange(0,400))),
                       'y2' : np.ones(len(np.arange(0,400))) * 14.2}, 
                      columns = ['x', 'y1', 'y2'])

ribo_max = alt.Chart(source).mark_area(opacity=0.6, line=False, strokeOpacity = 0).encode(
    x='x:Q',
    y='y1:Q',
    y2='y2:Q',
)

# boundary for translation rate as a function of R/P ratio 
df_temp = ribo_limit_df_2[ribo_limit_df_2.dataset == 'schmidt_2016']
array_len = len(df_temp.cell_cycle_t.values)
source = pd.DataFrame({'x' : np.linspace(0,400, array_len), 
                       'cell_cycle_t' : df_temp.sort_values('growth_rate_hr').cell_cycle_t.values, 
                       't_min_dai' : df_temp.sort_values('growth_rate_hr').t_min_dai.values,
                       't_min' : df_temp.sort_values('growth_rate_hr').t_min.values,
                      'y' : np.ones(array_len) * 14.2}, columns = ['x','cell_cycle_t', 
                                                                  't_min_dai', 't_min', 'y'])

trans_max = alt.Chart(source).mark_area(opacity=0.4, line=False, strokeOpacity = 0).encode(
    x='cell_cycle_t:Q',
    y='y:Q',
    y2='t_min:Q' 
)

trans_max_dai = alt.Chart(source).mark_area(opacity=0.2, line=False, strokeOpacity = 0).encode(
    x='cell_cycle_t:Q',
    y='t_min:Q',
    y2='t_min_dai:Q' 
)

alt.layer(
    p, ribo_max, trans_max_dai, trans_max
).configure_axis(
    labelFontSize=16,
    titleFontSize=16
    )

In [None]:
# note: something is screwy re the genes listed in ribosome_genes ; 
# I need to look into this to make sure n_units is correct

# Toy model

Lets make a toy model to look at how the growth rate may vary under translation limitation. Specifcally, lets say that a cell must consist of some fraction of proteins that make up the cell, plus the ribosomes needed to replicate make these proteins. 

The minimal time to replicate the proteome of an entire cell, assuming that translation is limiting, can be estimated from the number of amino acids, $N_{aa}$ that make up the cell, the copies of ribosomes per cell $N_{R}$, and the translation rate of a ribosome. This time will be given by the sum of translation times needed to duplicate the pool of proteins and the pool of ribosomes. If we assume a translation rate of 20 aa/sec, or 1,200 aa / min, this can be estimated as,

\begin{equation}
t = \frac{N_{aa}}{1200 \frac{aa}{\text{min}} * N_R} + \frac{7459 aa * N_R}{1200 \frac{aa}{\text{min}} * N_R},
\end{equation}

where the first fraction is the time to translate all the proteins that make up the cell, while the second fraction is the time to replicate the ribosomes. Note that 7459 aa refers to the number of amino acids in a complete ribosome (BioNumber 101175). We can then calculate a translation-limited growth rate from,


\begin{equation}
\lambda =  \frac{ln(2)} {t}.
\end{equation}

We can see from this that the only way to increase the growth rate is to increase the number of ribosomes. For simplicity we have assumed that the number or total mass of all the other proteins stays constant. Given that the number of ribosomes will increase as growth rate increases, the cell will also need to get larger. I would expect that the pool of other proteins, or some fraction of these, would then need to increase in proportion to the cell size. In any case, this provides a first approximation. 
 
Lets take a look at how the number of ribosomes needs to vary in order to reach the maximum growth rate. For this, lets begin with the conditions found in Schmidt *et al.* with cells grown with minimal media and glucose supplementation. 

In [7]:
ribo_limit_df[(ribo_limit_df.dataset == 'schmidt_2016') & 
               (ribo_limit_df.condition == 'glucose')]

Unnamed: 0,N_aa,N_ribosome,cell_cycle_t,condition,dataset,frac_ribo,growth_rate_hr,lambda_max,t_min
33,1330646000.0,20656.3,71.704881,glucose,schmidt_2016,0.119859,0.58,0.774725,53.682028


In [215]:
# grab pertinent information from the Schmidt minimal media + glucose dataset
for c, d in data.groupby(['dataset', 'condition', 'growth_rate_hr']):
    if [c[0], c[1]] == ['schmidt_2016', 'glucose']:
        cell_mass = d.fg_per_cell.sum()
        cell_mass_nr = d[~d['go_terms'].astype(str).str.contains('GO:0005840')].fg_per_cell.sum() 
        frac_ribo = d[d['go_terms'].astype(str).str.contains('GO:0005840')].fg_per_cell.sum()/ d.fg_per_cell.sum()
#         N_aa = d.fg_per_cell.sum() * 1E-15 * 6.022E23 / 110
        N_aa = d[~d['go_terms'].astype(str).str.contains('GO:0005840')].fg_per_cell.sum() * 1E-15 * 6.022E23 / 110
        N_ribosome = complex_count[(complex_count.dataset == c[0]) & \
                                  (complex_count.condition == c[1]) & \
                                  (complex_count.growth_rate_hr == c[2]) &
                                  (complex_count.complex == 'CPLX0-3964')].n_units.values[0]

In [266]:
lambda_max_all = []
frac_ribo_all = []
N_ribosome_all = []
# quick google search bionumber says there are 7,459 aa in a ribosome in E. coli
# for N_ribosome_ in np.linspace(N_ribosome/5, N_ribosome*100, 5000):
for N_ribosome_ in np.linspace(10000, 150000, 5000):
    t_ = ((N_aa / (20.0 * N_ribosome_)) /60) + (((7459 * N_ribosome_)/ (20.0 * N_ribosome_)) /60)
    mass_nonribo = (N_aa / (1E-15 * 6.022E23 / 110))
    mass_ribo = (7459 * N_ribosome_) / (1E-15 * 6.022E23 / 110) 
    N_ribosome_all = np.append(N_ribosome_all, N_ribosome_)
    frac_ribo_all = np.append(frac_ribo_all, mass_ribo / (mass_ribo + mass_nonribo))
    lambda_max_all = np.append(lambda_max_all, np.log(2) / (t_ / 60))
    

Above, we have calculated. the mximum growth rate we should be able to achive under translation-limited growth. Lets begin by plotting this as a function of the ribosomal fraction and the number of ribosomes.

In [267]:
source = pd.DataFrame({'max_growth_rate_hr' : lambda_max_all, 
                       'frac_ribo' : frac_ribo_all, 
                      'N_ribosomes' : N_ribosome_all}, columns = ['max_growth_rate_hr', 
                                                                  'frac_ribo',
                                                                 'N_ribosomes'])

l = alt.Chart(source).mark_line().encode(
        x = alt.X('frac_ribo:Q', scale = alt.Scale(domain=[0,0.6]),
                  title = 'ribosomal fraction (by mass)'),
        y = alt.Y('max_growth_rate_hr:Q',scale = alt.Scale(domain=[0,6.2]),
                  title = 'maximum growth rate (translation limited, hr-1)')
                 
    )

                       
r = alt.Chart(source).mark_line().encode(
        x = alt.X('N_ribosomes:Q',
                    title = 'number of ribosomes'),
        y = alt.Y('max_growth_rate_hr:Q', scale = alt.Scale(domain=[0,7]),
                   title = 'maximum growth rate (translation limited, hr-1)'),
    )

# add line for ribosome synthesis limit
source = pd.DataFrame({'x' : N_ribosome_all, 
                       'y' : np.ones(len(frac_ribo_all)) * (np.log(2) * 20 / 7459) * 3600},
                      columns = ['x', 'y'])

r_ = alt.Chart(source).mark_line(strokeDash=[3,3]).encode(
        x = alt.X('x:Q'),
        y = alt.Y('y:Q')
    )

rr = alt.layer(r, r_)  

(rr | l).configure_axis(
    labelFontSize=16,
    titleFontSize=16
    )




In [268]:
lambda_max_all

array([0.41037269, 0.41145128, 0.41252951, ..., 3.31119061, 3.31150297,
       3.31181526])

In [26]:
source = pd.DataFrame({'max_growth_rate_hr' : lambda_max_all[:150], 
                       'frac_ribo' : frac_ribo_all[:150], 
                      'N_ribosomes' : N_ribosome_all[:150]}, columns = ['max_growth_rate_hr', 
                                                                  'frac_ribo',
                                                                 'N_ribosomes'])

l = alt.Chart(source).mark_line().encode(
        x = alt.X('frac_ribo:Q', scale = alt.Scale(domain=[0,0.6]),
                  title = 'ribosomal fraction (by mass)'),
        y = alt.Y('max_growth_rate_hr:Q',scale = alt.Scale(domain=[0,2]),
                  title = 'maximum growth rate (translation limited, hr-1)')
                 
    )

r = alt.Chart(source).mark_line().encode(
        x = alt.X('N_ribosomes:Q', scale = alt.Scale(domain=[1,100000]),
                    title = 'number of ribosomes'),
        y = alt.Y('max_growth_rate_hr:Q', scale = alt.Scale(domain=[0,2]),
                   title = 'maximum growth rate (translation limited, hr-1)'),
    )

(r | l).configure_axis(
    labelFontSize=16,
    titleFontSize=16
    )




While somewhat a given, it is very interesting that the only way to increase growth rate would be to increase the number of ribosomes (though, also the translation rate, if that is possible). Lets also take a look at the plot with flipped axes.

In [202]:

source = pd.DataFrame({'max_growth_rate_hr' : lambda_max_all, 
                       'frac_ribo' : frac_ribo_all, 
                      'N_ribosomes' : N_ribosome_all}, columns = ['max_growth_rate_hr', 
                                                                  'frac_ribo',
                                                                 'N_ribosomes'])

l = alt.Chart(source).mark_line().encode(
        x = alt.X('max_growth_rate_hr:Q', scale = alt.Scale(domain=[0.4,6.2]),
                  title = 'maximum growth rate (translation limited, hr-1)'),
        y = alt.Y('frac_ribo:Q',scale = alt.Scale(domain=[0,1]),
                  title = 'ribosomal fraction (by mass)'),
    )

r = alt.Chart(source).mark_line().encode(
        x = alt.X('max_growth_rate_hr:Q', scale = alt.Scale(domain=[0.4,6.2]),
                  title = 'maximum growth rate (translation limited, hr-1)'),
        y = alt.Y('N_ribosomes:Q',
                  title = 'number of ribosomes')
    )

(l | r).configure_axis(
    labelFontSize=16,
    titleFontSize=16
    )




Here I've plotted the entire range considered, which goes to a maximum growth rate when essentially the entire cell is made up of ribosomes. It's interesting to see that the ribosomal fraction grows linearly with growth rate (something shown in bacterial growth laws), and that the total mass is going to grow exponentially. This is also consistent with the expected scaling relationships. Though one caveat that needs to be considered given knowledge that the rate of translation isn't maximum under nutrient limitation, is how might this change the above trends. Specifically, does this mean that the cell is making more ribosomes than it can truely utilize in the chance that it comes across a more nutrient rich environment and wants to grow fast more quickly?

We only see growth in the range of about 0.2 $hr^{-1}$ to 2 $hr^{-1}$, so lets focus in on that range.

In [203]:
source = pd.DataFrame({'max_growth_rate_hr' : lambda_max_all[:150], 
                       'frac_ribo' : frac_ribo_all[:150], 
                      'N_ribosomes' : N_ribosome_all[:150]}, columns = ['max_growth_rate_hr', 
                                                                  'frac_ribo',
                                                                 'N_ribosomes'])

l = alt.Chart(source).mark_line().encode(
        x = alt.X('max_growth_rate_hr:Q', scale = alt.Scale(domain=[0,2.]),
                  title = 'maximum growth rate (translation limited, hr-1)'),
        y = alt.Y('frac_ribo:Q',scale = alt.Scale(domain=[0,1]),
                  title = 'ribosomal fraction (by mass)'),
    )

r = alt.Chart(source).mark_line().encode(
        x = alt.X('max_growth_rate_hr:Q', scale = alt.Scale(domain=[0,2.]),
                  title = 'maximum growth rate (translation limited, hr-1)'),
        y = alt.Y('N_ribosomes:Q', scale = alt.Scale(domain=[1,100000]),
                  title = 'number of ribosomes')
    )

(l | r).configure_axis(
    labelFontSize=16,
    titleFontSize=16
    )



It may make sense to consider the time needed to make all the ribosomal proteins, if those are also directly involved in synthesis of a protein. One question I want to check is how long does it take to replicate this entire mass of protein. Lets check.

In [204]:
# grab information 
for c, d in data.groupby(['dataset', 'condition', 'growth_rate_hr']):
    if [c[0], c[1]] == ['schmidt_2016', 'glucose']:
        n_aa = d[d['cog_category'] == 'translation, ribosomal structure and biogenesis'].fg_per_cell.sum()* 1E-15 * 6.022E23 / 110
        N_ribosome = complex_count[(complex_count.dataset == c[0]) & \
                                  (complex_count.condition == c[1]) & \
                                  (complex_count.growth_rate_hr == c[2]) &
                                   (complex_count.complex == 'CPLX0-3964')].n_units.values[0]
        
t = ((n_aa/N_ribosome )/ (20.0 ))/60
print(round(t,2), 'min.')

14.17 min.


Lets also estimate cell volume as the number of ribosomes increases. Lets assume that the mass consists of 214 fg of non-ribosomal mass, the mass of ribosomes, and ribosomal RNA (1.66 MDa per ribosome). Lets also assume that the cell dry mass is 30 percent, and the cell mass density is 1.1 g/ml. 

At 1.66 MDa RNA per ribosome, the RNA mass for a single ribosome is found from 1.66 MDa / $N_A$.  

In [209]:
lambda_max_all = []
frac_ribo_all = []
N_ribosome_all = []
# quick google search bionumber says there are 7,459 aa in a ribosome in E. coli
for N_ribosome_ in np.linspace(N_ribosome/5, N_ribosome*100, 5000):
# for N_ribosome_ in np.linspace(10000, 150000, 5000):
    t_ = ((N_aa / (20.0 * N_ribosome_)) /60) + (((7459 * N_ribosome_)/ (20.0 * N_ribosome_)) /60)
    mass_nonribo = (N_aa / (1E-15 * 6.022E23 / 110))
    mass_ribo = (7459 * N_ribosome_) / (1E-15 * 6.022E23 / 110) 
    N_ribosome_all = np.append(N_ribosome_all, N_ribosome_)
    frac_ribo_all = np.append(frac_ribo_all, mass_ribo / (mass_ribo + mass_nonribo))
    lambda_max_all = np.append(lambda_max_all, np.log(2) / (t_ / 60))
    
    
    
Av_num = 6.022*1E23
cell_mass = 214*1E-15 + (N_ribosome_all/ Av_num)*800000 + (N_ribosome_all/ Av_num)*1600000
vol = 1E12*((cell_mass/0.3)/1.1 )#in ml



In [210]:
source = pd.DataFrame({'N_ribosome' : N_ribosome_all, 
                       'cell volume' : vol, 
                      'lambda_max' : lambda_max_all}, 
                    columns = ['N_ribosome', 'cell volume', 'lambda_max'])

alt.Chart(source).mark_line().encode(
    x=alt.X('N_ribosome:Q',title = 'Number of ribosomes', scale = alt.Scale(type = 'log')),
    y=alt.Y('cell volume:Q',title = 'minimum cell volume (fL)', scale = alt.Scale(type = 'log'))
).configure_axis(
    labelFontSize=16,
    titleFontSize=16,
    tickWidth = 1,
    tickSize = 3,
    tickColor = 'black'
    )



In [181]:
vol

array([ 0.70702485,  0.71286831,  0.71871177, ..., 29.90679976,
       29.91264322, 29.91848668])

I'm very curious about the apparently linear scaling between growth rate and ribosomal mass fraction. I need to go through the algerbra. For now, lets just look at how this changes for different amounts of cellular protein for a given number of ribosomes.

In [150]:
# grab pertinent information from the Schmidt minimal media + glucose dataset
for c, d in data.groupby(['dataset', 'condition', 'growth_rate_hr']):
    if [c[0], c[1]] == ['schmidt_2016', 'glucose']:
        cell_mass = d.fg_per_cell.sum()
        cell_mass_nr = d[~d['go_terms'].astype(str).str.contains('GO:0005840')].fg_per_cell.sum() 
        frac_ribo = d[d['go_terms'].astype(str).str.contains('GO:0005840')].fg_per_cell.sum()/ d.fg_per_cell.sum()
#         N_aa = d.fg_per_cell.sum() * 1E-15 * 6.022E23 / 110
        N_aa = d[~d['go_terms'].astype(str).str.contains('GO:0005840')].fg_per_cell.sum() * 1E-15 * 6.022E23 / 110
        N_ribosome = complex_count[(complex_count.dataset == c[0]) & \
                                  (complex_count.condition == c[1]) & \
                                  (complex_count.growth_rate_hr == c[2]) &
                                  (complex_count.complex == 'CPLX0-3964')].n_units.values[0]

        
        
source = pd.DataFrame(columns = ['max_growth_rate_hr','frac_ribo','N_ribosomes', 'factor'])

for factor in [0.1,1,10]:
    lambda_max_all = []
    frac_ribo_all = []
    N_ribosome_all = []
    N_aa_ = N_aa*factor
    # quick google search bionumber says there are 7,459 aa in a ribosome in E. coli
    for N_ribosome_ in np.linspace(N_ribosome/5, N_ribosome*100, 5000):
        t_ = ((N_aa_ / (20.0 * N_ribosome_)) /60) + (((7459 * N_ribosome_)/ (20.0 * N_ribosome_)) /60)
        mass_nonribo = (N_aa_ / (1E-15 * 6.022E23 / 110))
        mass_ribo = (7459 * N_ribosome_) / (1E-15 * 6.022E23 / 110) 
        N_ribosome_all = np.append(N_ribosome_all, N_ribosome_)
        frac_ribo_all = np.append(frac_ribo_all, mass_ribo / (mass_ribo + mass_nonribo))
        lambda_max_all = np.append(lambda_max_all, np.log(2) / (t_ / 60))


        source = source.append({'max_growth_rate_hr' : np.log(2) / (t_ / 60), 
                               'frac_ribo' :  mass_ribo / (mass_ribo + mass_nonribo), 
                              'N_ribosomes' : N_ribosome_,
                               'factor' : factor},
                              ignore_index = True)

In [179]:
alt.data_transformers.disable_max_rows()


low = alt.Chart(source[source.factor == 0.1]).mark_line(strokeDash=[4,3], size = 5,  opacity = 0.4).encode(
        x = alt.X('max_growth_rate_hr:Q', scale = alt.Scale(domain=[0.4,6.2]),
                  title = 'maximum growth rate (translation limited, hr-1)'),
        y = alt.Y('frac_ribo:Q',scale = alt.Scale(domain=[0,1]),
                  title = 'ribosomal fraction (by mass)'),
        color = 'factor:N'
    )

normal = alt.Chart(source[source.factor == 1]).mark_line(strokeDash=[3,1], size = 5,  opacity = 0.4).encode(
        x = alt.X('max_growth_rate_hr:Q', scale = alt.Scale(domain=[0.4,6.2]),
                  title = 'maximum growth rate (translation limited, hr-1)'),
        y = alt.Y('frac_ribo:Q',scale = alt.Scale(domain=[0,1]),
                  title = 'ribosomal fraction (by mass)'),
        color = 'factor:N'
    )

high = alt.Chart(source[source.factor == 10]).mark_line(strokeDash=[2,2], size = 5, opacity = 0.4).encode(
        x = alt.X('max_growth_rate_hr:Q', scale = alt.Scale(domain=[0.4,6.2]),
                  title = 'maximum growth rate (translation limited, hr-1)'),
        y = alt.Y('frac_ribo:Q',scale = alt.Scale(domain=[0,1]),
                  title = 'ribosomal fraction (by mass)'),
        color = 'factor:N'
    )


alt.layer(
    low, normal, high
).configure_axis(
    labelFontSize=16,
    titleFontSize=16
    )
