In [1]:
import pandas as pd
import numpy as np
from cobra  import Model, Reaction, Metabolite

In [2]:
model_name = 'iMAC868'
sheets = ['reactions', 'metabolites', 'genes', 'reaction bounds', 'std. transformed Gibbs energies', 'proteomics data']

In [3]:
loaded = [pd.read_excel('./Data/iMAC868/iMAC868.xlsx', sheet_name=s) for s in sheets];

In [4]:
saved = [loaded[i].to_csv('./Data/iMAC868/iMAC868_' + sheets[i] + '.csv', index=False) for i in range(len(loaded))];

In [5]:
sheets_dict = dict(enumerate(sheets))
sheets_dict = {v: k for k, v in sheets_dict.items()}

In [6]:
loaded[sheets_dict['reactions']].head()

Unnamed: 0,Abbreviation,Name,Reaction,GPR,Subsystem,Reversible,Lower bound,Upper bound,Objective
0,ADSL1,adenylosuccinate lyase,dcamp[c] <==> amp[c] + fum[c],MA3971,Alanine and Aspartate Metabolism,0,0.0,1000,0
1,ADSS,adenylosuccinate synthase,asp-L[c] + gtp[c] + imp[c] <==> dcamp[c] + gdp...,(MA1919 or MA4118),Alanine and Aspartate Metabolism,0,0.0,1000,0
2,ALATA_L,L-alanine transaminase,akg[c] + ala-L[c] <==> glu-L[c] + pyr[c],(MA0636 or MA0925 or MA1385 or MA1712 or MA1819),Alanine and Aspartate Metabolism,1,-1000.0,1000,0
3,ALATRS,Alanyl-tRNA synthetase,ala-L[c] + atp[c] + trnaala[c] <==> amp[c] + p...,(MA0194 or MA2014),Alanine and Aspartate Metabolism,0,0.0,1000,0
4,APAT2,beta-alanine:2-oxoglutarate aminotransferase,akg[c] + ala-B[c] <==> glu-L[c] + msa[c],MA2859,Alanine and Aspartate Metabolism,0,0.0,1000,0


### Load reactions properties

In [7]:
reactions_abbreviations       = loaded[0]['Abbreviation'].tolist()
reaction_names      = loaded[0]['Name'].tolist()
reaction_subsystem  = loaded[0]['Subsystem'].tolist()
lower_bound         = loaded[0]['Lower bound'].tolist()
upper_bound         = loaded[0]['Upper bound'].tolist()
objective           = loaded[0]['Objective'].tolist()
reaction_arr        = loaded[0]['Reaction'].tolist()
gene_reaction_rules = loaded[0]['GPR'].tolist()

### Load metabolite properties

In [8]:
met_abbreviations = loaded[1]['Abbreviation'].tolist()
met_names = loaded[1]['Name'].tolist()
met_formula = loaded[1]['Formula (charged)'].tolist()
met_charge = loaded[1]['Charge'].tolist()
met_KEGG_id = loaded[1]['KEGG ID'].tolist()

### Load genes properties

In [9]:
gene_loci = loaded[2]['Loci'].tolist()
gene_annotation = loaded[2]['Annotation'].tolist()
gene_ec_number = loaded[2]['EC #'].tolist()
gene_protein_abbreviation = loaded[2]['Protein abbreviation'].tolist()

In [10]:
model = Model('iMAC868')


metabolites = {Metabolite(met_abbreviations[m], formula=met_formula[m], name=met_names[m], compartment='c'):met_charge[m] for m in range(len(met_abbreviations))}
gene_reaction_rules = [str(gene_reaction_rules[i]).replace('(', '').replace(')' , '').replace('\n', '') for i in range(len(gene_reaction_rules))]

for i in range(len(reactions_abbreviations)):

    reaction             = Reaction(reactions_abbreviations[i])

    reaction.name        = str(reaction_names[i])
    reaction.subsystem   = str(reaction_subsystem[i])
    reaction.lower_bound = lower_bound[i]
    reaction.upper_bound = upper_bound[i]
    reaction.gene_reaction_rule =  gene_reaction_rules[i]

    reaction.add_metabolites(metabolites) if i == len(reactions_abbreviations)-1 else None    
    model.add_reaction(reaction)

Scaling...
 A: min|aij| =  1.000e+00  max|aij| =  1.000e+00  ratio =  1.000e+00
Problem data seem to be well scaled


In [11]:
objective_name = [reactions_abbreviations[i] for i in range(len(objective)) if objective[i] == 1][0]

In [12]:
model.objective = objective_name
model.objective.expression

1.0*overall - 1.0*overall_reverse_00a0e

### Model Validation

In [13]:
import tempfile
from pprint import pprint
from cobra.io import write_sbml_model, validate_sbml_model

with tempfile.NamedTemporaryFile(suffix='.xml') as f_sbml:
    write_sbml_model(model, filename=f_sbml.name)
    report = validate_sbml_model(filename=f_sbml.name)

pprint(report)

SBML errors in validation, check error log for details.


(<Model iMAC868 at 0x7fc7df6833d0>,
 {'COBRA_CHECK': ["Metabolite 'lac-D[c]' formula 'C3H5O3 ' not alphanumeric",
                  "Metabolite 'lgt-S[c]' formula 'C13H20N3O8S ' not "
                  'alphanumeric',
                  "Metabolite 'mercppyr[c]' formula 'C3H3O3S ' not "
                  'alphanumeric'],
  'COBRA_ERROR': [],
  'COBRA_FATAL': [],
  'SBML_ERROR': ['E0 (Error): SBML component consistency (fbc, L377); Chemical '
                 'formula must be string; The value of attribute '
                 "'fbc:chemicalFormula' on the SBML <species> object must be "
                 'set to a string consisting only of atomic names or user '
                 'defined compounds and their occurrence.\n'
                 'Reference: L3V1 Fbc V3 Section 3.4\n'
                 " Encountered ' ' when expecting a capital letter.The "
                 "chemicalFormula 'C3H5O3 ' has incorrect syntax.\n",
                 'E1 (Error): SBML component consistency (fbc, L381); Che