The simulation was done by:  
(1) block the free uptake of amino acids  
(2) set the glucose uptake as 1 mmol/gDW/h  
(3) add reaction --> met, met is one of amino acids or dNTP
(4) maximize the flux in (3)  
(5) the objective value is the maximal amount of aa or dNTP produced from 1 mmol/gDW/h glucose
(6) the cost is defined as the amount of glucose required to produce 1 mmol/gDW/h aa or dNTP. 


In [1]:
import cobra
import pandas as pd
import sys, os
import xlrd
from Bio.PDB.Polypeptide import one_to_three

In [2]:
ecpy_path = '../../../ecpy/'
sys.path.append(os.path.abspath(ecpy_path))
import utils
import ecpy

In [3]:
model_file = '../../../ModelFiles/json/Halo_GEM_v1.json'
model = cobra.io.load_json_model(model_file)

Using license file /Users/gangl/gurobi.lic
Academic license - for non-commercial use only


In [4]:
def get_cost(met_id,model):
    with model:
        utils.set_yeast_extraction(model,lb=0,ub=0)
        # set NGAM as 0
        model.reactions.NGAM.lower_bound = 0
        model.reactions.NGAM.upper_bound = 0
        
        # set glucose uptake as 1
        model.reactions.Exchange_Glucopyranose.upper_bound = 1
        
        # add met sink reaction
        rxn_id = 'tmp'
        rxn_sink = cobra.Reaction(rxn_id)
        rxn_sink.add_metabolites({model.metabolites.get_by_id(met_id):-1})
        model.add_reaction(rxn_sink)
        model.objective = 'tmp'
        model.objective_direction = 'max'
        f1 = model.optimize().objective_value
    return 1/f1

In [5]:
def format_aa(aa_id):
    old2new = {
        'GLT': 'GLU',
        'L-ALPHA-ALANINE': 'ALA',
        'L-ASPARTATE': 'ASP'
    }
    return old2new.get(aa_id,aa_id)

In [6]:
AAcost = pd.DataFrame()
for met in model.reactions.Protein_synthesis.reactants:
    aa = format_aa(met.id.replace('_c',''))
    AAcost.loc[aa,'cost(mmolGlc/mmolMet)'] = get_cost(met.id,model)
AAcost.to_csv('../ProteinDNACost/aa_cost.csv')
print(AAcost)

     cost(mmolGlc/mmolMet)
ARG               3.129890
ASN               1.564945
CYS               2.699531
GLN               2.034429
GLU               1.877934
GLY               0.704225
HIS               3.247261
ILE               3.129890
ALA               1.095462
ASP               1.251956
LEU               2.816901
LYS               2.973396
MET               4.342723
PHE               4.733959
PRO               2.347418
SER               1.251956
THR               1.877934
TRP               5.985915
TYR               4.577465
VAL               2.190923


In [7]:
dNTPcost = pd.DataFrame()
for met in model.reactions.DNA_synthesis.reactants:
    dNTPcost.loc[met.id.replace('_c',''),'cost(mmolGlc/mmolMet)'] = get_cost(met.id,model)
dNTPcost.to_csv('../ProteinDNACost/dNTP_cost.csv')
print(dNTPcost)

      cost(mmolGlc/mmolMet)
DATP               5.594679
DCTP               4.851330
DGTP               5.594679
TTP                5.555556


In [9]:
NTPcost = pd.DataFrame()
for met in model.reactions.RNA_synthesis.reactants:
    NTPcost.loc[met.id.replace('_c',''),'cost(mmolGlc/mmolMet)'] = get_cost(met.id,model)
dNTPcost.to_csv('../ProteinDNACost/NTP_cost.csv')
print(NTPcost)

     cost(mmolGlc/mmolMet)
ATP               5.438185
CTP               4.694836
GTP               5.438185
UTP               4.381847


#### Calculate the protein cost

In [10]:
one2dNTP = {
    'a':'DATP',
    'c': 'DCTP',
    't': 'TTP',
    'g': 'DGTP'
           }

In [11]:
one2NTP = {
    'a':'ATP',
    'c': 'CTP',
    't': 'UTP',
    'g': 'GTP'
           }

In [12]:
def get_protein_cost(seq):
    cost = 0
    for aa in seq: cost += AAcost.loc[one_to_three(aa),'cost(mmolGlc/mmolMet)']
    return cost

In [13]:
def get_dna_cost(seq):
    cost = 0
    for nn in seq: cost += dNTPcost.loc[one2dNTP[nn],'cost(mmolGlc/mmolMet)']
    return cost

In [17]:
def get_rna_cost(seq):
    cost = 0
    for nn in seq: cost += NTPcost.loc[one2NTP[nn],'cost(mmolGlc/mmolMet)']
    return cost

In [18]:
annofile = '../../../../Halo-Omics/genomics/Tsinghua/genomic annotation of TD Chromosome-3rd seq-Version3.0-20200527.xlsx'
book = xlrd.open_workbook(annofile)
sh = book.sheet_by_name('CDS')


In [19]:
protein_cost = pd.DataFrame()
for i in range(sh.nrows-2):
    i += 2
    ID = sh.cell(i,2).value
    dna_seq  = sh.cell(i,12).value.replace('*','')
    prot_seq = sh.cell(i,13).value.replace('*','')
    
    dna_cost = get_dna_cost(dna_seq)
    prot_cost = get_protein_cost(prot_seq)
    rna_cost = get_rna_cost(dna_seq)
    
    protein_cost.loc[ID,'dna'] = dna_cost
    protein_cost.loc[ID,'rna'] = rna_cost
    protein_cost.loc[ID,'protein'] = prot_cost
    
print(protein_cost.head(5))

                       dna           rna      protein
TD01GL000001   7939.593114   7337.715180  1119.561815
TD01GL000002   5981.611894   5514.866980   812.989045
TD01GL000003   6751.056338   6240.884194   993.192488
TD01GL000004  13118.348983  12129.147105  1850.743349
TD01GL000005   3128.873239   2891.784038   424.178404


In [20]:
protein_cost.to_csv('../ProteinDNACost/protein_cost.csv')