In [1]:
import cameo
from cobra import Model, Reaction, Metabolite
from cobra.io import read_sbml_model
from cobra.io import save_json_model
from cameo.flux_analysis.simulation import pfba
import cobra.test
import os
from Functions_Modules.curation_tools import *

In [2]:
relative_directory = os.getcwd()
filename = relative_directory + '/Reconstructions/MethylococcusModel8.xml'
model = cameo.load_model(filename)

Final 'clean-up' step. I'll get rid of all MNXR or MNXM tagged reactions or metabolites.

In [4]:
for met in list(model.metabolites):
    if met.id.startswith('MNXM'):
        print(met.id, met.reactions)
        model.remove_reactions(met.reactions)
        if met.id in model.metabolites:
            met.remove_from_model('destructive')

('MNXM21342_c', frozenset([<Reaction MNXR85991_c at 0x10dacb750>, <Reaction MNXM21342_out at 0x10dc36f50>]))
('MNXM5285_c', frozenset([<Reaction MNXM5285_out at 0x10dc36fd0>]))
('MNXM7549_c', frozenset([<Reaction MNXM7549_out at 0x10dc3f090>]))
('MNXM4463_c', frozenset([<Reaction MNXM4463_out at 0x10dc3f110>]))
('MNXM6386_c', frozenset([<Reaction MNXM6386_out at 0x10dc3f190>]))
('MNXM2142_c', frozenset([<Reaction MNXM2142_out at 0x10dc3f590>]))
('MNXM7231_c', frozenset([<Reaction MNXM7231_out at 0x10dc3f810>, <Reaction MNXR68614_c at 0x10dad7e50>]))
('MNXM95609_c', frozenset([<Reaction MNXM95609_out at 0x10dc492d0>]))
('MNXM96150_c', frozenset([<Reaction MNXM96150_out at 0x10dc499d0>, <Reaction MNXR85732_c at 0x10dae9950>]))
('MNXM92688_c', frozenset([<Reaction MNXM92688_out at 0x10dc53290>, <Reaction MNXR59223_c at 0x10db0e0d0>]))
('MNXM1740_c', frozenset([<Reaction MNXM1740_out at 0x10dc53390>, <Reaction MNXR73718_c at 0x10db0e310>]))
('MNXM2120_c', frozenset([<Reaction MNXM2120_out 

In [5]:
MNXR_rxns = [rxn for rxn in model.reactions if rxn.id.startswith('MNXR')]
model.remove_reactions(MNXR_rxns)

Check for funktioning FBA after import:  

In [7]:
model.objective.variables

{0 <= BIOMASS_REACTION_reverse_42726 <= 0, 0.1 <= BIOMASS_REACTION <= 1000.0}

In [8]:
solution = show_uptake_excretion(model,model.reactions.get_by_id('BIOMASS_REACTION'))

so4_in 0.0273340396411
pi_in 0.0742559453479
o2_in 27.6123082969
co2_out 9.73776818209
h2o_out 29.7680791941
ACP_out 3.15502832193e-17
hco3_out 2.37162203479
h2o2_out 0.00108844802029
co_out 4.66477722983e-06
2dhp_out 0.045398233971
5prdmbz_out 0.000155492574328
EX_no3_e -1.3686798476
ch4_in 18.405
Fe_in 0.000951614554886
BIOMASS_REACTION 0.155492574328


In [9]:
model.remove_reactions([model.reactions.ATPM])


Demand and Sink reactions in the models are tagged either with an \_out or \_in suffix, while they should bear the prefix EX\_. Transporters in the model have not been predicted properly. So far the only Transport-RXN that have been added denote simple diffusion of a few basic compounds. Transporter information will be inferred from Merlin's curated TCDB selection or from http://www.membranetransport.org/ output.

In [10]:
demands = [rxn for rxn in model.reactions if rxn.id.endswith('_out')]

for rxn in demands:
    if rxn.reactants != []:
        new_id = rxn.reactants[0].id
        descriptive_name = rxn.reactants[0].name
                
        try: 
            model.reactions.get_by_id('EX_' + new_id)
        except KeyError:
            rxn.name = '{} exchange'.format(descriptive_name)
            rxn.id = 'EX_' + new_id
            print rxn

EX_fdp_c: fdp_c --> 
EX_g3p_c: g3p_c --> 
EX_dhap_c: dhap_c --> 
EX_h_c: h_c --> 
EX_nadh_c: nadh_c --> 
EX_acald_c: acald_c --> 
EX_etoh_c: etoh_c --> 
EX_nad_c: nad_c --> 
EX_pyr_c: pyr_c --> 
EX_thmpp_c: thmpp_c --> 
EX_co2_c: co2_c --> 
EX_2ahethmpp_c: 2ahethmpp_c --> 
EX_dhlpro_c: dhlpro_c --> 
EX_accoa_c: accoa_c --> 
EX_coa_c: coa_c --> 
EX_atp_c: atp_c --> 
EX_adp_c: adp_c --> 
EX_pep_c: pep_c --> 
EX_h2o_c: h2o_c --> 
EX_2pg_c: 2pg_c --> 
EX_3pg_c: 3pg_c --> 
EX_13dpg_c: 13dpg_c --> 
EX_pi_c: pi_c --> 
EX_f6p_c: f6p_c --> 
EX_g6p_A_c: g6p_A_c --> 
EX_g1p_c: g1p_c --> 
EX_g6p_c: g6p_c --> 
EX_glc_bD_c: glc_bD_c --> 
EX_Glc_aD_c: Glc_aD_c --> 
EX_ppi_c: ppi_c --> 
EX_amp_c: amp_c --> 
EX_ac_c: ac_c --> 
EX_fum_c: fum_c --> 
EX_succ_c: succ_c --> 
EX_akg_c: akg_c --> 
EX_succoa_c: succoa_c --> 
EX_icit_c: icit_c --> 
EX_mal__L_c: mal__L_c --> 
EX_acon_C_c: acon_C_c --> 
EX_cit_c: cit_c --> 
EX_oaa_c: oaa_c --> 
EX_2ddg6p_c: 2ddg6p_c --> 
EX_prpp_c: prpp_c --> 
EX_r5p_c: r5p_c -->

In [11]:
model.solve().f

0.1554925743277103

In [12]:
exchanges = [rxn for rxn in model.reactions if rxn.id.endswith('_in')]
make_reversible = []
for rxn in exchanges:
    if rxn.products != []:
        new_id = rxn.products[0].id
        descriptive_name = rxn.products[0].name
                
        try: 
            model.reactions.get_by_id('EX_' + new_id)
            make_reversible.append(rxn)
        except KeyError:
            print rxn.id

ch4_in
glycogen_in
Fe_in


In [13]:
rxn = model.reactions.Fe_in
new_id = rxn.products[0].id
descriptive_name = rxn.products[0].name
rxn.id = 'EX_' + new_id
rxn.name = '{} exchange'.format(descriptive_name)

In [14]:
rxn = model.reactions.glycogen_in
new_id = rxn.products[0].id
descriptive_name = rxn.products[0].name
rxn.id = 'EX_' + new_id
rxn.name = '{} exchange'.format(descriptive_name)

In [15]:
rxn = model.reactions.ch4_in
new_id = rxn.products[0].id
descriptive_name = rxn.products[0].name
rxn.id = 'EX_' + new_id
rxn.name = '{} exchange'.format(descriptive_name)

In [16]:
model.solve().f

0.1554925743277103

In [17]:
model.remove_reactions(make_reversible[0:2])
make_reversible[2:]

[<Reaction nh3_in at 0x10db46610>,
 <Reaction na1_in at 0x10db46690>,
 <Reaction k_in at 0x10db46710>,
 <Reaction mg2_in at 0x10db46790>,
 <Reaction ca2_in at 0x10db46810>,
 <Reaction so4_in at 0x10db46890>,
 <Reaction cl_in at 0x10db46910>,
 <Reaction pi_in at 0x10db46990>,
 <Reaction h_in at 0x10db46a10>,
 <Reaction h2o_in at 0x10db46a90>,
 <Reaction co2_in at 0x10db46b10>,
 <Reaction o2_in at 0x10db46b90>]

In [18]:
for rxn in make_reversible[2:]:
    rxn.lower_bound = 0
    rxn.upper_bound = 0
    try:
        solution = model.solve()
        print rxn.id,solution.f
    except:
        print rxn.id
    rxn.lower_bound = -1000
    rxn.upper_bound = 1000

nh3_in 0.155492574328
na1_in 0.22201063896
k_in 0.22201063896
mg2_in 0.22201063896
ca2_in 0.22201063896
so4_in
cl_in 0.22201063896
pi_in
h_in 0.22201063896
h2o_in 0.22201063896
co2_in 0.22201063896
o2_in


In [19]:
model.reactions.EX_so4_c.lower_bound = -1000

In [20]:
model.reactions.EX_pi_c.lower_bound = -1000

In [21]:
model.reactions.EX_o2_c.lower_bound = -1000

In [22]:
model.remove_reactions(make_reversible[2:])

In [23]:
model.solve().f

0.1554925743277102

In [24]:
solution = show_uptake_excretion(model,model.reactions.get_by_id('BIOMASS_REACTION'))

EX_co2_c 9.80917347274
EX_h2o_c 29.839484494
EX_pi_c -0.0742559453479
EX_ACP_c 2.23345647532e-17
EX_hco3_c 2.30021674414
EX_o2_c -27.6123082969
EX_trdox_c 4.62223186653e-33
EX_so4_c -0.0273340396411
EX_h2o2_c 0.00108844802029
EX_co_c 4.66477722983e-06
EX_2dhp_c 0.045398233971
EX_dhpt_c 8.67361737989e-19
EX_5prdmbz_c 0.000155492574328
EX_no3_e -1.3686798476
EX_ch4_p 18.405
EX_fe2_p_None_ 0.000951614554886
BIOMASS_REACTION 0.155492574328


In [25]:
model.remove_reactions([rxn for rxn in model.reactions if rxn.id.endswith('_out')])

Fix up those reactions and metabolites that for some reason have a \_None\_ suffix.

In [26]:
None_reactions = [rxn for rxn in model.reactions if rxn.id.endswith('_None_')]

for rxn in None_reactions:
    print rxn
    rxn.id = rxn.id.replace('_None_','')
    
None_metabolites = [met for met in model.metabolites if met.id.endswith('_None_')]

for met in None_metabolites:
    print met
    met.id = met.id.replace('_None_','')

EX_fe2_p_None_:  --> fe2_p_None_
EX_ttdca_c_None_: ttdca_c_None_ --> 
EX_ptdca_c_None_: ptdca_c_None_ --> 
EX_hpdca_c_None_: hpdca_c_None_ --> 
EX_ttdcea_c_None_: ttdcea_c_None_ --> 
EX_cpoa2h_c_None_: cpoa2h_c_None_ --> 
ttdca_c_None_
ptdca_c_None_
ttdcea_c_None_
hpdca_c_None_
cpoa2h_c_None_
pc_MC_c_None_
pe_MC_c_None_
pg_MC_c_None_
clpn_MC_c_None_
peptido_MC_c_None_
colipa_e_None_
fe2_p_None_
tdecoa_c_None_
tdcoa_c_None_
ppACP_c_None_
ptdcalACP_c_None_
hpdcalACP_c_None_
cpoa2hcoa_c_None_
2ombzl_c_None_
2ommbl_c_None_
2omhmbl_c_None_
Ssq23epx_c_None_
codhpre6_c_None_
thrp_c_None_
4hba_c_None_
mc_fattyacid_c_None_
mc_fattyacidcoa_c_None_
1agpgafa_c_None_
pa_MC_c_None_
cdpdag_MC_c_None_
pgp_MC_c_None_
ps_MC_c_None_
pme_c_None_
pdme_c_None_
glyc_c_None_
udcpp_c_None_
adphep_LD_c_None_


In [27]:
None_reactions = [rxn for rxn in model.reactions if rxn.id.endswith('_None')]

for rxn in None_reactions:
    print rxn
    rxn.id = rxn.id.replace('_None','')
    
None_metabolites = [met for met in model.metabolites if met.id.endswith('_None')]

for met in None_metabolites:
    print met
    met.id = met.id.replace('_None','')

urea_c_None
doxopa_c_None
allphn_c_None
no3_e_None


In [28]:
model.solve().f

0.15549257432771105

### Add transport reactions based on HMMER prediction and parsing against TCDB

Import TMHMM as a pandas dataframe, clean up the 'IDs' which currently are FASTA line markers (>lcl|AE017282.2_prot_MCAXXXX_XXXX), but I want to reduce them to just the gene IDs (MCAXXXX).

In [29]:
from pandas import read_csv
import pandas as pd

In [30]:
filepath = '/Users/clie/Desktop/EFPro2/Experimental Data & Standards/Transmembrane Domain Analysis TMHMM/AE017282.2 - TMHMM result short.csv'
TMHMM_raw = read_csv(filepath, delimiter='\t')
TMHMM_raw.ID = TMHMM_raw.ID.apply(lambda x: x.split('_')[-2])

Import PSORT results as a pandas dataframe, clean up the 'IDs' which currently are FASTA line markers, same as above.

In [31]:
filepath = '/Users/clie/Desktop/EFPro2/Experimental Data & Standards/PsortB - Compartment Prediction/PSORTb.csv'
PSORT_raw = read_csv(filepath, delimiter=',')
PSORT_raw.SeqID = PSORT_raw.SeqID.apply(lambda x: x.split('_')[-3])

Reshape the TMHMM_raw pandas dataframe (now with cleaned up IDs) so that it no longer contains:

- Rows with an ExpAA value lower than 18. Any value above that and it is very likely to be a transmembrane protein (OR have a signal peptide) according to the [TMHMM 1.0 User Guide.](/Users/clie/Desktop/EFPro2/Experimental Data & Standards/Transmembrane Domain Analysis TMHMM/TMHMM1.0-UserGuide.pdf)

- Rows with a PredHel value lower than 1. Any value above that and it is very likely to be a transmembrane protein (OR have a signal peptide) according to the [TMHMM 1.0 User Guide.](/Users/clie/Desktop/EFPro2/Experimental Data & Standards/Transmembrane Domain Analysis TMHMM/TMHMM1.0-UserGuide.pdf)

This will give us a list of IDs of putative transporter proteins.

In [32]:
TMHMM_helix_predicted = TMHMM_raw.loc[(TMHMM_raw['ExpAA'] >= 18) & (TMHMM_raw['PredHel'] >= 1)]
# TMHMM_ID2Index = TMHMM_raw.set_index('ID')
# TMHMM_dictionary = TMHMM_ID2Index.to_dict('index')

In [33]:
putative_transporter_gene_IDs = list(TMHMM_helix_predicted.set_index('ID').index)

Next we construct the BLAST DB from the TCDB FAA file using Ipython Magic/ Shell commands. First we move the current working directory to the corresponding folder and then run BLAST+'s makeblastdb shell command.

In [34]:
%cd '/Users/clie/Desktop/EFPro2/Experimental Data & Standards/TCDB Exports/'

/Users/clie/Desktop/EFPro2/Experimental Data & Standards/TCDB Exports


In [35]:
if os.path.isfile('TCDBSeqDB.pin'):
    pass
else:
    !makeblastdb -in TCDBSeq.faa -dbtype prot -out TCDBSeqDB

Now for each putative transporter gene ID from the TMHMM_helix_predicted Dataframe, lets grab the AA sequence, then compile them all into a FAA file a preparation to BLAST it against the TCDB BLAST DB.

In [36]:
full_central_map = pd.read_csv('/Users/clie/Desktop/EFPro2/Genome/UniProtExport/FullCentralMappingOriginalGenBank.csv')

In [37]:
ID_2_AASeq = full_central_map.set_index('Unnamed: 0').to_dict('index')

In [38]:
manual_check_bin = []

if os.path.isfile('putative_transporters.faa'):
    pass
else:
    !touch putative_transporters.faa
    n=60
    for gene_id in putative_transporter_gene_IDs:
        try: 
            sequence = ID_2_AASeq[gene_id]['AA Sequence']
            seq_snippets = [sequence[i:i+n] for i in range(0,len(sequence),n)]
            FASTA_tag = '\>' + gene_id
            !echo $FASTA_tag >> putative_transporters.faa
            for snippet in seq_snippets:
                !echo $snippet >> putative_transporters.faa
        except:
            print ("No sequence found for {0}".format(gene_id))
            manual_check_bin.append(gene_id)
    

The FASTA file for our query sequences has been created now, so now we BLAST all the query sequences against the TCDBSeqDB, in order to more clearly identify transporters. We will apply strong similarity criteria for the BLAST operation as to avoid getting (m)any false-positives.

The criteria will be:
- E-value = E^-9
- Max target sequences = 1 i.e. I am chosing to ignore multiple matches as I want to recieve unambiguous mappings.
- Max Alignments = 1 i.e. I am chosing to ignore multiple matches as I want to recieve unambiguous mappings.

In [39]:
if os.path.isfile('annotated_putative_transporters.csv'):
    pass
else:
    !blastp -db TCDBSeqDB -query putative_transporters.faa -max_hsps 1 -max_target_seqs 1 -evalue 1e-9 -outfmt 10 -out annotated_putative_transporters.csv 

In [40]:
column_IDs = ['qseqid','sseqid','pident','length','mismatch','gapopen','qstart','qend','sstart','send','evalue','bitscore','TC_Number']

Import the BLAST results back in the form of a Pandas Dataframe.

After a manual check of the matches with the three lowest bitscores and the three lowest identities I decided not to further restrict this selection. This is, because even though the identity/ overall score was low, the descriptions or e.coli gene names of the qseqid on Uniprot matched those from the TCDB of the sseqid.

In [41]:
BLAST_results = pd.read_csv('annotated_putative_transporters.csv', names = column_IDs)

In [42]:
BLAST_results.TC_Number = BLAST_results.sseqid.apply(lambda x: x.split('|')[-1])
BLAST_results.sseqid = BLAST_results.sseqid.apply(lambda x: x.split('|')[-2])

In [43]:
BLAST_results.sort_values('TC_Number', ascending=False)

Unnamed: 0,qseqid,sseqid,pident,length,mismatch,gapopen,qstart,qend,sstart,send,evalue,bitscore,TC_Number
96,MCA0963,B9DIS8,30.114,352,231,9,89,427,14,363,8.570000e-34,136.0,9.B.97.5.1
242,MCA2175,V4NQP3,42.241,116,67,0,1,116,1,116,1.750000e-22,84.0,9.B.72.1.6
259,MCA2301,A5W2Y3,51.149,174,85,0,7,180,55,228,2.280000e-54,171.0,9.B.69.1.1
260,MCA2302,A5W2Y4,50.000,58,28,1,33,89,5,62,1.490000e-10,50.8,9.B.69.1.1
4,MCA0019,D5QHE9,36.364,143,85,2,4,146,42,178,2.480000e-16,71.2,9.B.62.5.2
320,MCA2966,V5C7X7,64.681,235,80,2,1,235,1,232,4.960000e-101,292.0,9.B.43.2.1
183,MCA1650,D4ZZR3,36.538,208,116,5,355,556,196,393,1.110000e-26,112.0,9.B.34.1.3
134,MCA1237,D4ZZR3,35.882,170,101,3,275,439,225,391,4.580000e-23,100.0,9.B.34.1.3
179,MCA1583,D4ZZR3,32.948,173,107,2,304,470,220,389,1.280000e-23,101.0,9.B.34.1.3
174,MCA1533,D4ZZR3,34.104,173,107,2,522,689,221,391,2.470000e-20,93.6,9.B.34.1.3


Now that we have the Gene ID matched with corresponding TC-Numbers, we will group genes with identical functions.

In [44]:
BLAST_results_refocused = pd.DataFrame([BLAST_results['qseqid'],BLAST_results['TC_Number']]).T

In [45]:
TC_number_genes_grouped = BLAST_results_refocused.groupby('TC_Number')
TC_to_gene = TC_number_genes_grouped.aggregate(lambda x:list(x))
TC_to_gene_dict = TC_to_gene.to_dict('index')

Now we will try to find the TC-Numbers in the curated TCDB-extract from MERLIN (Provided by Bruno and Óscar)

In [46]:
MERLIN_curated_TCDB_export = read_csv('/Users/clie/Desktop/EFPro2/Experimental Data & Standards/TCDB Exports/ MERLIN_metabolites_transport_database-Curated Export.csv')

In [47]:
TCDB_Dict = MERLIN_curated_TCDB_export.set_index('TCDB ID').to_dict('index')

In [48]:
non_curated_transporters = []

for key in TC_to_gene_dict.keys():
    if key in TCDB_Dict.keys():
        print key, TCDB_Dict[key]['metabolite'],TCDB_Dict[key]['direction'],TCDB_Dict[key]['reversibility'],TCDB_Dict[key]['reacting_metabolites']
    else:
        non_curated_transporters.append(key) # Check manually at later stage!

1.A.11.2.7 NH3; methylamine in True --
3.A.12.1.2 DNA in False 1:ATP; 1:water || 1:ADP; 1:orthophosphate
3.D.9.1.1 proton out True 1: Oxidized donor || 1:Reduced acceptor
2.A.7.3.29 drugs out True --
3.D.4.10.2 proton:proton:proton:proton out False 2:nitric oxide; 2:Ferrocytochrome c; 2:proton || 1:nitrous oxide; 2:Ferricytochrome c; 1:water
9.B.102.5.2 unknown in True --
2.A.9.3.1 protein out False --
1.A.77.3.9 Mg2+; Ca2+ in False --
1.A.23.1.1 K+ out True --
3.A.1.102.1 polysaccharide out False 1:ATP; 1:water || 1:ADP; 1:orthophosphate
2.A.3.8.12 L-threonine // L-serine in // out True --
3.A.1.107.3 heme out False 1:ATP; 1:water || 1:ADP; 1:orthophosphate
2.A.1.42.1 lysophospholipid in True --
3.A.3.5.18 copper out False 1:ATP; 1:water || 1:ADP; 1:orthophosphate
2.A.4.7.7 proton // Mn2+ in // out False --
3.A.3.5.20 gold out False 1:ATP; 1:water || 1:ADP; 1:orthophosphate
3.A.1.114.1 glycolipids out False 1:ATP; 1:water || 1:ADP; 1:orthophosphate
9.B.102.5.5 unknown in True --
1.B.5

Here should now follow resolving the TC Numbers to Reaction strings and translating the metabolite names to metabolite IDs, but as it turned out I had no clearance to use the MERLIN-curated TCDB export. Hence this is postponed.

Instead, I will write all entries from the Notes field to the Annotation fields to be included in the most recent version of the SMBL export. Newer, non-legacy formats do not support the Notes field.

I will try to match the MIRIAM nomenclature.

In [49]:
for met in model.metabolites:
    try:
        len(met.notes.keys()) > 1
        met_notes = met.notes
        anno_dict = {}
        for key in met_notes.keys():
            if key == 'BIGG':
                anno_dict['bigg.metabolite'] = met_notes[key][0].lstrip('\'').rstrip('\'').split(',')[0]
            elif key == 'KEGG':
                    value_entries = met_notes[key][0].lstrip('\'').rstrip('\'').split(',')
                    if len(value_entries) > 1:
                        anno_dict['kegg.compound'] = [x.lstrip(' ') for x in value_entries]
                    else:
                        anno_dict['kegg.compound'] = value_entries[0].lstrip(' ')
            elif key == 'SEED':
                value_entries = met_notes[key][0].lstrip('\'').rstrip('\'').split(',')
                if len(value_entries) > 1:
                    anno_dict['seed.compound'] = [x for x in value_entries]
                else:
                    anno_dict['seed.compound'] = value_entries[0]
            elif key == 'MXNREF':
                value_entries = met_notes[key][0].lstrip('\'').rstrip('\'').split(',')
                if len(value_entries) > 1:
                    anno_dict['metanetx.chemical'] = [key+':'+x.lstrip(' ') for x in value_entries]
                else:
                    anno_dict['metanetx.chemical'] = value_entries[0]
            elif key == 'METACYC':
                value_entries = met_notes[key][0].lstrip('\'').rstrip('\'').split(',')
                if len(value_entries) > 1:
                    anno_dict['biocyc'] = ['META:'+x.lstrip(' ') for x in value_entries]
                else:
                    anno_dict['biocyc'] = 'META:'+ value_entries[0].lstrip(' ')
            elif key == 'SMILES':
                pass
            elif key == 'BRENDA':
                pass
            elif key == 'INCHI':
                value_entries = met_notes[key][0].lstrip('\'').rstrip('\'')
                anno_dict['inchi'] = met_notes[key][0].lstrip('\'').rstrip('\'')
            elif key == 'HMDB':
                value_entries = met_notes[key][0].lstrip('\'').rstrip('\'').split(',')
                if len(value_entries) > 1:
                    anno_dict[key.lower()] = [x.lstrip(' ') for x in value_entries]
                else:
                    anno_dict[key.lower()] = value_entries[0].lstrip(' ')
            else:
                value_entries = met_notes[key][0].lstrip('\'').rstrip('\'').split(',')
                if len(value_entries) > 1:
                    anno_dict[key.lower()] = [key+':'+x.lstrip(' ') for x in value_entries]
                else:
                    anno_dict[key.lower()] = key+':'+value_entries[0].lstrip(' ')
        met.annotation = anno_dict
    except:
        pass

In [50]:
for rxn in model.reactions:
    try:
        len(rxn.notes.keys()) > 1
        rxn_notes = rxn.notes
        anno_dict = {}
        for key in rxn_notes.keys():
            if key == 'BIGG':
                anno_dict['bigg.reaction'] = rxn_notes[key][0].lstrip('\'').rstrip('\'').split(',')[0]
            elif key == 'KEGG':
                    value_entries = rxn_notes[key][0].lstrip('\'').rstrip('\'').split(',')
                    if len(value_entries) > 1:
                        anno_dict['kegg.reaction'] = [x.lstrip(' ') for x in value_entries]
                    else:
                        anno_dict['kegg.reaction'] = value_entries[0].lstrip(' ')
            elif key == 'SEED':
                value_entries = rxn_notes[key][0].lstrip('\'').rstrip('\'').split(',')
                if len(value_entries) > 1:
                    anno_dict['seed'] = [x.lstrip(' ') for x in value_entries]
                else:
                    anno_dict['seed'] = value_entries[0].lstrip(' ')
            elif key == 'MXNREF':
                value_entries = rxn_notes[key][0].lstrip('\'').rstrip('\'').split(',')
                if len(value_entries) > 1:
                    anno_dict['metanetx.reaction'] = [key+':'+x.lstrip(' ') for x in value_entries]
                else:
                    anno_dict['metanetx.reaction'] = value_entries[0]
            elif key == 'METACYC':
                value_entries = rxn_notes[key][0].lstrip('\'').rstrip('\'').split(',')
                if len(value_entries) > 1:
                    anno_dict['biocyc'] = ['META:'+x.lstrip(' ') for x in value_entries]
                else:
                    anno_dict['biocyc'] = 'META:'+ value_entries[0].lstrip(' ')
            elif key == 'GENE ASSOCIATION':
                pass
            elif key == 'CONFIDENCE SCORE':
                pass
            elif key == 'INHIBITOR':
                pass
            elif key == 'LOCALIZATION':
                pass
            elif key == 'COFACTOR':
                pass
            elif key == 'SUBSYSTEM':
                pass
            elif key == 'BRENDA':
                pass
            elif key == 'EC NUMBER':
                value_entries = rxn_notes[key][0].lstrip('\'').rstrip('\'').split(';')
                if len(value_entries) > 1:
                    anno_dict['brenda'] = [x.lstrip(' ') for x in value_entries]
                else:
                    anno_dict['brenda'] = value_entries[0].lstrip(' ')
            elif key == 'REACTOME':
                value_entries = rxn_notes[key][0].lstrip('\'').rstrip('\'').split(',')
                if len(value_entries) > 1:
                    anno_dict['reactome'] = [x.lstrip(' ') for x in value_entries]
                else:
                    anno_dict['reactome'] = value_entries[0].lstrip(' ')
            elif key == 'RHEA':
                value_entries = rxn_notes[key][0].lstrip('\'').rstrip('\'').split(',')
                if len(value_entries) > 1:
                    anno_dict['rhea'] = [x.lstrip(' ') for x in value_entries]
                else:
                    anno_dict['rhea'] = value_entries[0].lstrip(' ')
            else:
                value_entries = rxn_notes[key][0].lstrip('\'').rstrip('\'').split(',')
                if len(value_entries) > 1:
                    anno_dict[key.lower()] = [key+':'+x.lstrip(' ') for x in value_entries]
                else:
                    anno_dict[key.lower()] = key+':'+value_entries[0].lstrip(' ')
        rxn.annotation = anno_dict
    except:
        pass

In [51]:
for rxn in model.reactions:
    if type(rxn.notes) == dict:
        for key in rxn.notes.keys():
            if key in ['CONFIDENCE SCORE', 'LOCALIZATION', 'SUBSYSTEM','INHIBITOR','COFACTOR']:
                value_entries = rxn.notes[key][0].strip('\'').split(',')
                if len(value_entries) > 1:
                    rxn.notes[key] = [x.lstrip(' ') for x in value_entries]
                else:
                    rxn.notes[key] = value_entries[0]
            else:
                rxn.notes.pop(key)

In [52]:
for met in model.metabolites:
    try:
        for key in met.notes.keys():
            if key not in ['SMILES']:
                met.notes.pop(key)
    except:
        print '{1} has no notes section'.format(met)

Based on errors I recieved with the SMBL Model Validator, here are some last fixes.

Metabolites must be exported with having a compartment at met.compartment!

In [53]:
mets_with_missing_compartments = [met for met in model.metabolites if met.compartment == '']

In [54]:
for met in mets_with_missing_compartments:
    met.compartment = met.id.split('_')[-1]
    #print met.compartment

All metabolites must have either no formula or a chemically viable one. 'nan' or 'None' or symbols like '(', ')' and '.' are not accepted with fbc:chemicalFormula

In [55]:
mets_with_faulty_formula = [met for met in model.metabolites if met.formula == 'nan']

In [56]:
for met in mets_with_faulty_formula:
    met.formula = ''

In [57]:
mets_with_faulty_formula = [met for met in model.metabolites if ('.' or '(' or ')') in met.formula]

In [58]:
for met in mets_with_faulty_formula:
    met.formula = ''

In [60]:
# verify that it still work as it did before:

solution = show_uptake_excretion(model,model.reactions.get_by_id('BIOMASS_REACTION'))

EX_co2_c 9.80917347274
EX_h2o_c 29.839484494
EX_pi_c -0.0742559453479
EX_ACP_c 2.23345647532e-17
EX_hco3_c 2.30021674414
EX_o2_c -27.6123082969
EX_trdox_c 4.62223186653e-33
EX_so4_c -0.0273340396411
EX_h2o2_c 0.00108844802029
EX_co_c 4.66477722983e-06
EX_2dhp_c 0.045398233971
EX_dhpt_c 8.67361737989e-19
EX_5prdmbz_c 0.000155492574328
EX_no3_e -1.3686798476
EX_ch4_p 18.405
EX_fe2_p 0.000951614554886
BIOMASS_REACTION 0.155492574328


Export in the model in the most recent SBML version: Level 3, Version 1 and with fbc_package.

In [61]:
target_filename_json = relative_directory + '/Reconstructions/MethylococcusModel9.json'
target_filename_xml = relative_directory + '/Reconstructions/MethylococcusModel9.xml'
cobra.io.write_sbml_model(model, target_filename_xml, use_fbc_package=True)
cobra.io.save_json_model(model, target_filename_json,pretty = True)