# Estimates - Protein translation, number of ribosomes
(c) 2020 The Authors. All creative work is published under a [CC-BY 4.0](https://creativecommons.org/licenses/by/4.0/) permissive license. All software is released under a standard [MIT](https://opensource.org/licenses/MIT) license. **This software is provided as-is and may be subject to change.**

In [2]:
import numpy as np
import pandas as pd
import panel as pn 
import altair as alt
import tqdm
import prot.viz
prot.viz.altair_theme()
pn.extension('vega')
_ = alt.data_transformers.enable('default')

  import pandas.util.testing as tm


In this notebook, we provide a series of estimates for the copy numbers of various protein complexes needed to support bacterial cell doubling. The aim is to provide some expectation about how these numbers may vary with growth rate and then to compare to available experimental data.

The specific numbers related to translation include:
- Ribosomes
- Synthesis of tRNA
- Enzymes for amino acid synthesis

# Ribosomes

In [73]:
# Load the complex subunit counts. 
subunits = pd.read_csv('../../data/compiled_annotated_complexes.csv')

# # Load the compiled data 
data = pd.read_csv('../../data/compiled_absolute_measurements.csv')
data['gene_name'] = data['gene_name'].str.lower()

# Compute the minimum number of complexes. 
complex_count = subunits.groupby(['dataset', 'dataset_name', 'condition', 'growth_rate_hr', 'complex_annotation', 'complex'])['n_units'].mean().reset_index()

In [74]:
data.head()

Unnamed: 0,gene_name,b_number,condition,go_terms,cog_class,cog_category,cog_letter,growth_rate_hr,gene_product,tot_per_cell,fg_per_cell,dataset,dataset_name,strain
0,insh-1,b0259,lb_miller,GO:0005829; GO:0032196; GO:0006313; GO:0006310...,cellular processes and signaling,"mobilome: prophages, transposons",X,1.9,CP4-6 prophage; IS5 transposase and trans-acti...,376.512099,0.02458,schmidt_2016,Schmidt et al. 2016,BW25113
1,insh-1,b0259,glycerol_pAA,GO:0005829; GO:0032196; GO:0006313; GO:0006310...,cellular processes and signaling,"mobilome: prophages, transposons",X,1.27,CP4-6 prophage; IS5 transposase and trans-acti...,150.630122,0.009834,schmidt_2016,Schmidt et al. 2016,BW25113
2,insh-1,b0259,acetate,GO:0005829; GO:0032196; GO:0006313; GO:0006310...,cellular processes and signaling,"mobilome: prophages, transposons",X,0.3,CP4-6 prophage; IS5 transposase and trans-acti...,18.205011,0.001188,schmidt_2016,Schmidt et al. 2016,BW25113
3,insh-1,b0259,fumarate,GO:0005829; GO:0032196; GO:0006313; GO:0006310...,cellular processes and signaling,"mobilome: prophages, transposons",X,0.42,CP4-6 prophage; IS5 transposase and trans-acti...,59.163413,0.003862,schmidt_2016,Schmidt et al. 2016,BW25113
4,insh-1,b0259,galactose,GO:0005829; GO:0032196; GO:0006313; GO:0006310...,cellular processes and signaling,"mobilome: prophages, transposons",X,0.26,CP4-6 prophage; IS5 transposase and trans-acti...,38.830065,0.002535,schmidt_2016,Schmidt et al. 2016,BW25113


In [75]:
annotated_cplxs = pd.read_csv('../../data/ecocyc_raw_data/annotated_complexes.csv')
ribosome_genes = annotated_cplxs[annotated_cplxs.annotation == 'ribosome'].gene_name.unique()

In [76]:
len(annotated_cplxs[annotated_cplxs.annotation == 'ribosome'].gene_name.unique())

55

# Limits of protein synthesis

Given the requirement for the ribosomes to replicate themselves and all other proteins in order for a cell to divide, it'll be insightful to look at the maximum rate of duplication at all the growth rates we're considering. One hypothesis is that the growth rate of bacterium is going to be set by the duplication time required for a complete ribosome complex, plus some time required to also replicate all the other proteins in the cell. It's not clear to me how this might playout at different growth rates and relative fractions of ribosomal protein, so lets take a look.


Given some number of ribosomes and some number of proteins, lets try to estimate the time required to replicate a cell. Use numbers from data at each growth rate. So our baseline time will be the time required to replicate itself. 

The time to replicate non-ribosomal proteins = total number of amino acids / (rate of translation (aa/sec . ribosome) * number of ribosomes)). 

The total number of amino acids is going to be given from the mass of non-ribosomal protein and average molecular weight of an amino acid,

$f$ / (100 g/mol) * $N_A$

Assume translation rate is 20 aa/sec.

In [140]:

ribo_limit_df = pd.DataFrame()
for c, d in data.groupby(['dataset', 'condition', 'growth_rate_hr']):
    frac_ribo = d[d['go_terms'].astype(str).str.contains('GO:0005840')].fg_per_cell.sum()/ d.fg_per_cell.sum()
#     frac_ribo = d[d['cog_category'] == 'translation, ribosomal structure and biogenesis'].fg_per_cell.sum()/ d.fg_per_cell.sum()
#     N_aa = d[~d['go_terms'].astype(str).str.contains('GO:0005840')].fg_per_cell.sum() * 1E-15 * 6.022E23 / 100
    N_aa = d.fg_per_cell.sum() * 1E-15 * 6.022E23 / 110
    N_ribosome = complex_count[(complex_count.dataset == c[0]) & \
                              (complex_count.condition == c[1]) & \
                              (complex_count.growth_rate_hr == c[2]) &
                              (complex_count.complex_annotation == 'ribosome')].n_units.values[0]

    t = ((N_aa / (20.0 * N_ribosome)) /60)
    lambda_max = np.log(2) / (t / 60)

    
    data_list = {'frac_ribo' : frac_ribo,
                'N_aa' : N_aa,
                'N_ribosome' : N_ribosome,
                't_min' : t,
                'dataset' : c[0],
                'condition' : c[1],
                'growth_rate_hr' : c[2],
                 'cell_cycle_t' : 60 * (np.log(2) / c[2]),
                'lambda_max' : lambda_max}
    ribo_limit_df = ribo_limit_df.append(data_list,
                                        ignore_index = True)
    
#     # calculate time to make ribosome.
#     print(c)
#     N_aa_ribo = d[d['go_terms'].astype(str).str.contains('GO:0005840')].fg_per_cell.sum() * 1E-15 * 6.022E23 / 100
#     print(((N_aa_ribo / (19.0 * N_ribosome)) /60))


In [141]:
alt.Chart(ribo_limit_df).mark_point().encode(
        x = alt.X('growth_rate_hr:Q', scale = alt.Scale(domain=[0,2.0]), title = 'growth rate ($hr^{-1}$)'),
        y = alt.Y('lambda_max:Q', scale = alt.Scale(domain=[0,2.0]), title = 'maximum growth rate, translation limited ($hr^{-1}$)'),
    color = alt.Color('frac_ribo:Q'),
    tooltip = ['dataset', 'condition', 'lambda_max', 'growth_rate_hr' ]
    ).configure_axis(
    labelFontSize=16,
    titleFontSize=16
    )

In [142]:
alt.Chart(ribo_limit_df).mark_point().encode(
        x = alt.X('cell_cycle_t:Q', scale = alt.Scale(domain=[0,400.0]), title = 'cell cycle time (min.)'),
        y = alt.Y('t_min:Q',scale = alt.Scale(domain=[0,400]), title = 'cell cycle time, translation limited (min.)'),
    color = alt.Color('dataset:N'),
    ).configure_axis(
    labelFontSize=16,
    titleFontSize=16
    )

In [None]:
# note: something is screwy re the genes listed in ribosome_genes ; 
# I need to look into this to make sure n_units is correct

# Toy model

Lets make a toy model to look at how the growth rate may vary under translation limitation. Specifcally, lets say that a cell must consist of some fraction of proteins that make up the cell, plus the ribosomes needed to replicate make these proteins. 

The minimal time to replicate the proteome of an entire cell, assuming that translation is limiting, can be estimated from the number of amino acids, $N_{aa}$ that make up the cell, the copies of ribosomes per cell $N_{R}$, and the translation rate of a ribosome. This time will be given by the sum of translation times needed to duplicate the pool of proteins and the pool of ribosomes. If we assume a translation rate of 20 aa/sec, or 1,200 aa / min, this can be estimated as,

\begin{equation}
t = \frac{N_{aa}}{1200 \frac{aa}{\text{min}} * N_R} + \frac{7459 aa * N_R}{1200 \frac{aa}{\text{min}} * N_R},
\end{equation}

where the first fraction is the time to translate all the proteins that make up the cell, while the second fraction is the time to replicate the ribosomes. Note that 7459 aa refers to the number of amino acids in a complete ribosome (BioNumber 101175). We can then calculate a translation-limited growth rate from,


\begin{equation}
\lambda =  \frac{ln(2)} {t}.
\end{equation}

We can see from this that the only way to increase the growth rate is to increase the number of ribosomes. For simplicity we have assumed that the number or total mass of all the other proteins stays constant. Given that the number of ribosomes will increase as growth rate increases, the cell will also need to get larger. I would expect that the pool of other proteins, or some fraction of these, would then need to increase in proportion to the cell size. In any case, this provides a first approximation. 
 
Lets take a look at how the number of ribosomes needs to vary in order to reach the maximum growth rate. For this, lets begin with the conditions found in Schmidt *et al.* with cells grown with minimal media and glucose supplementation. 

In [139]:
ribo_limit_df[(ribo_limit_df.dataset == 'schmidt_2016') & 
               (ribo_limit_df.condition == 'glucose')]

Unnamed: 0,N_aa,N_ribosome,cell_cycle_t,condition,dataset,frac_ribo,growth_rate_hr,lambda_max,t_min
33,1330646000.0,20656.3,71.704881,glucose,schmidt_2016,0.119859,0.58,0.852198,48.801844


In [148]:
# grab pertinent information from the Schmidt minimal media + glucose dataset
for c, d in data.groupby(['dataset', 'condition', 'growth_rate_hr']):
    if [c[0], c[1]] == ['schmidt_2016', 'glucose']:
        cell_mass = d.fg_per_cell.sum()
        frac_ribo = d[d['go_terms'].astype(str).str.contains('GO:0005840')].fg_per_cell.sum()/ d.fg_per_cell.sum()
        N_aa = d.fg_per_cell.sum() * 1E-15 * 6.022E23 / 110
        N_ribosome = complex_count[(complex_count.dataset == c[0]) & \
                                  (complex_count.condition == c[1]) & \
                                  (complex_count.growth_rate_hr == c[2]) &
                                  (complex_count.complex_annotation == 'ribosome')].n_units.values[0]

In [218]:
lambda_max_all = []
frac_ribo_all = []
N_ribosome_all = []
# quick google search bionumber says there are 7,459 aa in a ribosome in E. coli
for N_ribosome_ in np.linspace(N_ribosome/5, N_ribosome*100, 5000):
    t_ = ((N_aa / (20.0 * N_ribosome_)) /60) + (((7459 * N_ribosome_)/ (20.0 * N_ribosome_)) /60)
    mass_nonribo = (N_aa / (1E-15 * 6.022E23 / 110))
    mass_ribo = (7459 * N_ribosome_) / (1E-15 * 6.022E23 / 110) 
    N_ribosome_all = np.append(N_ribosome_all, N_ribosome_)
    frac_ribo_all = np.append(frac_ribo_all, mass_ribo / (mass_ribo + mass_nonribo))
    lambda_max_all = np.append(lambda_max_all, np.log(2) / (t_ / 60))
    
source = pd.DataFrame({'max_growth_rate_hr' : lambda_max_all, 
                       'frac_ribo' : frac_ribo_all, 
                      'N_ribosomes' : N_ribosome_all}, columns = ['max_growth_rate_hr', 
                                                                  'frac_ribo',
                                                                 'N_ribosomes'])

l = alt.Chart(source).mark_line().encode(
        x = alt.X('max_growth_rate_hr:Q', scale = alt.Scale(domain=[0.4,6.2]),
                  title = 'maximum growth rate (translation limited, hr-1)'),
        y = alt.Y('frac_ribo:Q',scale = alt.Scale(domain=[0,1]),
                  title = 'ribosomal fraction (by mass)'),
    )

r = alt.Chart(source).mark_line().encode(
        x = alt.X('max_growth_rate_hr:Q', scale = alt.Scale(domain=[0.4,6.2]),
                  title = 'maximum growth rate (translation limited, hr-1)'),
        y = alt.Y('N_ribosomes:Q',
                  title = 'number of ribosomes')
    )

(l | r).configure_axis(
    labelFontSize=16,
    titleFontSize=16
    )



We only see growth in the range of about 0.2 $hr^{-1}$ to 2 $hr^{-1}$, so lets focus in on that range.

In [219]:
source = pd.DataFrame({'max_growth_rate_hr' : lambda_max_all[:150], 
                       'frac_ribo' : frac_ribo_all[:150], 
                      'N_ribosomes' : N_ribosome_all[:150]}, columns = ['max_growth_rate_hr', 
                                                                  'frac_ribo',
                                                                 'N_ribosomes'])

l = alt.Chart(source).mark_line().encode(
        x = alt.X('max_growth_rate_hr:Q', scale = alt.Scale(domain=[0,2.]),
                  title = 'maximum growth rate (translation limited, hr-1)'),
        y = alt.Y('frac_ribo:Q',scale = alt.Scale(domain=[0,1]),
                  title = 'ribosomal fraction (by mass)'),
    )

r = alt.Chart(source).mark_line().encode(
        x = alt.X('max_growth_rate_hr:Q', scale = alt.Scale(domain=[0,2.]),
                  title = 'maximum growth rate (translation limited, hr-1)'),
        y = alt.Y('N_ribosomes:Q', scale = alt.Scale(domain=[1,100000]),
                  title = 'number of ribosomes')
    )

(l | r).configure_axis(
    labelFontSize=16,
    titleFontSize=16
    )

