In [2]:
import cobra
import xlwt
from Bio.PDB import Polypeptide as pp
import pandas as pd
import os

In [3]:
DATA_DIR = '../../../ComplementaryData/'

In [4]:
model = cobra.io.read_sbml_model(os.path.join(DATA_DIR,'iML1515.xml'))

Academic license - for non-commercial use only


In [5]:
g = model.genes[0]
print(g.id,g.name,g.annotation)

b2551 b2551 {'uniprot': 'P0A825', 'asap': 'ABE-0008389', 'ecogene': 'EG10408', 'ncbigene': '947022', 'ncbigi': 'gi:16130476'}


#### 1. Biomass equation

In [6]:
BOFs = list()
for rxn in model.reactions:
    try:
        if 'biomass' in rxn.name.lower(): 
            BOFs.append(rxn)
    except:None

In [13]:
for rxn in BOFs:
    print(rxn.name,len(rxn.metabolites))
    #print(rxn.reaction)

E. coli biomass objective function (iML1515) - core - with 75.37 GAM estimate 70
E. coli biomass objective function (iML1515) - WT - with 75.37 GAM estimate 99


In [7]:
print(len(BOFs[1].metabolites))

99


In [14]:
for met in BOFs[1].metabolites:
    if met not in BOFs[0].metabolites: 
        print(met)

2dmmql8_c
5mthf_c
accoa_c
adocbl_c
chor_c
clpn160_p
clpn161_p
clpn181_p
colipa_e
enter_c
glycogen_c
gthrd_c
hemeO_c
lipopb_c
malcoa_c
mococdp_c
mocogdp_c
mql8_c
murein3p3p_p
murein3px4p_p
murein4p4p_p
murein4px4p_p
murein4px4px4p_p
nadh_c
nadph_c
pe181_p
pg160_p
pg161_p
pg181_p
ptrc_c
q8h2_c
spmd_c


In [15]:
def save_biomass_xlsx(sh,rxn):
    # write head
    head = ['name','id','coeff']
    for met in rxn.metabolites:
        for anno in met.annotation.keys():
            if anno not in head: head.append(anno)
    
    for i in range(len(head)): sh.write(0,i,head[i])
    
    k = 1
    for met in rxn.metabolites:
        sh.write(k,0,met.name)
        sh.write(k,1,met.id)
        sh.write(k,2,rxn.get_coefficient(met))
        
        j = 3
        for anno in head[3:]:
            sh.write(k,j,met.annotation.get(anno,None))
            j += 1
        k += 1

In [16]:
def save_medium(sh,met_ids,model):
    mets = dict()
    for met in model.metabolites: 
        if met.id in met_ids:
            mets[met.id] = met
    
    head = ['name','id']
    for met in mets.values():
        for anno in met.annotation.keys():
            if anno not in head: head.append(anno)
    
    for i in range(len(head)): sh.write(0,i,head[i])
    
    k = 1
    for met_id in met_ids:
        met = mets[met_id]
        sh.write(k,0,met.name)
        sh.write(k,1,met.id)
        
        j = 2
        for anno in head[2:]:
            sh.write(k,j,met.annotation.get(anno,None))
            j += 1
        k += 1

In [21]:
book = xlwt.Workbook()
sh1 = book.add_sheet('WT')
sh2 = book.add_sheet('Core')
save_biomass_xlsx(sh1,BOFs[1])
save_biomass_xlsx(sh2,BOFs[0])

book.save('../Results/biomass_iML1515.xls')

medium = pd.read_excel('../Results/biomass_iJO1366.xls','WT')
medium.to_csv('../Results/biomass_WT_iJO1366.tsv',sep='\t')

medium = pd.read_excel('../Results/biomass_iJO1366.xls','Core')
medium.to_csv('../Results/biomass_Core_iJO1366.tsv',sep='\t')

#### 2.find ids for medium

In [18]:
medium_ids = ['na1_e','cl_e', # NaCl
              'glc__D_e', # Glucose
              'urea_e', # Urea
              'mg2_e','so4_e', # MgSO4
              'pi_c',# instead of HPO4, PO4 is used
              'fe3_e','nh4_e','cit_e', # Fe(III)-NH4-Citrate
              'fe2_e',
              'ca2_e', # CaCl2
              'zn2_e', # ZnSo4
              'mn2_e', # MnCl2
              'cobalt2_c',# CoCl2
              'k_c',   #KH2PO4
              # H3BO3 is missing
              'cu2_e', #CuSO4
              'ni2_e', # NiCl2
              'mobd_e', # NaMoO4,
              'o2_e'#O2
             ]
# add 20 amino acids
for aa in pp.aa3: 
    if aa == 'GLY':medium_ids.append('gly_e')
    else:medium_ids.append('{}__L_e'.format(aa.lower()))
    
# according to paper https://onlinelibrary.wiley.com/doi/epdf/10.1002/bit.10608, 
# yeast extract contains adenine, trehalose and lactate 
medium_ids += ['ade_e', # adenine
               'tre_e', # trehalose
               'lac__L_e',#lac__L_e
              ]
# a question for 19 amino acids (except Glycine) and lactate, should we consider D-type as well?

In [19]:
# secreted metabolites, Known secretion metabolites: 
# citrate, acetate and pyruvate

sec_ids = [ #citrate is already in medium
           'ac_e',#acetate
           'pyr_e',#Pyruvate
           'co2_e',#co2
           'h2o_e' # water
             ]

In [20]:
book = xlwt.Workbook()
sh = book.add_sheet('medium')
save_medium(sh,medium_ids,model)

sh = book.add_sheet('secretion')
save_medium(sh,sec_ids,model)

book.save('../Results/medium.xls')

medium = pd.read_excel('../Results/medium.xls','medium')
medium.to_csv('../Results/medium.tsv',sep='\t')

medium = pd.read_excel('../Results/medium.xls','secretion')
medium.to_csv('../Results/secretion.tsv',sep='\t')