In [1]:
import pandas as pd
import cobra
from cobra import Model, Reaction, Metabolite

## Read in and edit the metabolite data

In [2]:
metabolites = pd.read_excel('metabolites_extended.xlsx')   #Import
metabolites.head()                                         #Show Table

Unnamed: 0,Abbreviation,Name,Formula (neutral),Formula (Charged),Charge,Compartment,KEGG ID,Bigg ID,PubChemID,CheBI ID,Inchi String,Smiles,Remark
0,cJB00034[e],L-Sorbose,,C6H12O6,0,,C00247,srb__L,6904.0,,,,cpd00212
1,cJB00124[c],3-Demethylubiquinol-9,,C53H82O4,-1,,C03226,,86583403.0,,,,cpd24737
2,cJB00125[c],Ubiquinol-9,,C54H84O4,0,,C01967,q9h2,45281170.0,,,,
3,cJB00126[c],Dihydroaeruginoic acid,,C10H9NO3S,0,,,,5484365.0,,,,
4,cJB00127[c],cis-2-Decenoic acid,,C10H17O2,-1,,,,5282724.0,,,,


In [3]:
#Delete Columns that are not needed 
metabolites=metabolites.drop(['PubChemID', 'CheBI ID', 'Inchi String', 'Smiles', 'Formula (neutral)'], axis=1)
metabolites.head()

Unnamed: 0,Abbreviation,Name,Formula (Charged),Charge,Compartment,KEGG ID,Bigg ID,Remark
0,cJB00034[e],L-Sorbose,C6H12O6,0,,C00247,srb__L,cpd00212
1,cJB00124[c],3-Demethylubiquinol-9,C53H82O4,-1,,C03226,,cpd24737
2,cJB00125[c],Ubiquinol-9,C54H84O4,0,,C01967,q9h2,
3,cJB00126[c],Dihydroaeruginoic acid,C10H9NO3S,0,,,,
4,cJB00127[c],cis-2-Decenoic acid,C10H17O2,-1,,,,


In [4]:
# Add the right compartment to the column - according to [c] or [e]-tag on the metabolites
metabolites.loc[metabolites['Abbreviation'].str.contains('\[c\]'), 'Compartment'] = 'cytosol'
metabolites.loc[metabolites['Abbreviation'].str.contains('\[e\]'), 'Compartment'] = 'extracellular'
metabolites.tail()

Unnamed: 0,Abbreviation,Name,Formula (Charged),Charge,Compartment,KEGG ID,Bigg ID,Remark
1290,cpd01700[c],D-Citramalate,C5H6O,-2,cytosol,C02612,citm,
1291,cpd03593[c],D-erythro-3-Methylmalate,C5H6O5,-2,cytosol,C06032,r3mmal,
1292,cpd02605[c],2-isopropyl-3-oxosuccinate,C7H8O5,-2,cytosol,C04236,3c4mop,
1293,cpd10162[c],(R)-3-Hydroxy-3-methyl-2-oxopentanoate,C6H9O4,-1,cytosol,C14463,3hmop,
1294,cpd02569[c],2-Oxo-3-hydroxyisovalerate,C5H7O4,-1,cytosol,C04181,3hmoa,


In [5]:
#Change the compartment tag from ""[c]"" to "_c" and "["e]"" to "_e"
metabolites.loc[metabolites['Abbreviation'].str.contains('\[e\]'), 'Abbreviation'] = metabolites['Abbreviation'].str[:-3]+"_e"
metabolites.loc[metabolites['Abbreviation'].str.contains('\[c\]'), 'Abbreviation'] = metabolites['Abbreviation'].str[:-3]+"_c"
metabolites.tail()

Unnamed: 0,Abbreviation,Name,Formula (Charged),Charge,Compartment,KEGG ID,Bigg ID,Remark
1290,cpd01700_c,D-Citramalate,C5H6O,-2,cytosol,C02612,citm,
1291,cpd03593_c,D-erythro-3-Methylmalate,C5H6O5,-2,cytosol,C06032,r3mmal,
1292,cpd02605_c,2-isopropyl-3-oxosuccinate,C7H8O5,-2,cytosol,C04236,3c4mop,
1293,cpd10162_c,(R)-3-Hydroxy-3-methyl-2-oxopentanoate,C6H9O4,-1,cytosol,C14463,3hmop,
1294,cpd02569_c,2-Oxo-3-hydroxyisovalerate,C5H7O4,-1,cytosol,C04181,3hmoa,


In [6]:
# Get rid of float(NaN) in Bigg IDs -> otherwise adding annotation fails
for i in range(len(metabolites)):
    if type(metabolites.loc[i]['Bigg ID']) != str:
        metabolites.loc[i, 'Bigg ID'] = str('')

## Read in the reactions file

In [7]:
reactions = pd.read_excel('reactions_extended.xlsx')
reactions.head(2)

Unnamed: 0,Abbreviation,Name,Reaction,Reactions with MetNames,GPR,Genes,Protein,Subsystems,Reversible,Lower bound,Upper bound,Objective,Confidence Score,EC. Number,Notes,References
0,rxn03253,acyl-CoA dehydrogenase (decanoyl-CoA),cpd00015[c] + cpd03128[c] -> cpd00982[c] + cpd...,FAD + Decanoyl-CoA -> FADH2 + (2E)-Decenoyl-CoA,((PA14_52900) or (PA14_06600) or (PA14_31580)),PA14_52900 PA14_06600 PA14_31580,,Fatty acid metabolism,0,0.0,1000.0,0,,,*JAB 06/23/14 - switched out NAD for FAD based...,"Guzik, M. W., Narancic, T., Ilic-Tomic, T., Vo..."
1,rxn02720,acyl-CoA dehydrogenase (dodecanoyl-CoA),cpd00015[c] + cpd01260[c] -> cpd00982[c] + cpd...,FAD + Lauroyl-CoA -> FADH2 + (2E)-Dodecenoyl-CoA,((PA14_52900) or (PA14_06600) or (PA14_31580)),PA14_52900 PA14_06600 PA14_31580,,Fatty acid metabolism,0,0.0,1000.0,0,,,*JAB 06/23/14 - switched out NAD for FAD based...,"Guzik, M. W., Narancic, T., Ilic-Tomic, T., Vo..."


In [8]:
#Delete columns that are not needed
reactions = reactions.drop(['Reactions with MetNames', 'Genes', 'Protein', 'Confidence Score'], axis=1)
reactions.head(2)

Unnamed: 0,Abbreviation,Name,Reaction,GPR,Subsystems,Reversible,Lower bound,Upper bound,Objective,EC. Number,Notes,References
0,rxn03253,acyl-CoA dehydrogenase (decanoyl-CoA),cpd00015[c] + cpd03128[c] -> cpd00982[c] + cpd...,((PA14_52900) or (PA14_06600) or (PA14_31580)),Fatty acid metabolism,0,0.0,1000.0,0,,*JAB 06/23/14 - switched out NAD for FAD based...,"Guzik, M. W., Narancic, T., Ilic-Tomic, T., Vo..."
1,rxn02720,acyl-CoA dehydrogenase (dodecanoyl-CoA),cpd00015[c] + cpd01260[c] -> cpd00982[c] + cpd...,((PA14_52900) or (PA14_06600) or (PA14_31580)),Fatty acid metabolism,0,0.0,1000.0,0,,*JAB 06/23/14 - switched out NAD for FAD based...,"Guzik, M. W., Narancic, T., Ilic-Tomic, T., Vo..."


In [9]:
#Change compartment-tag
for i in range(len(reactions)):
    if '[c]' in reactions['Reaction'][i]:
        reactions.loc[i, 'Reaction'] = reactions['Reaction'][i].replace('[c]', '_c')
    
    if '[e]' in reactions['Reaction'][i]:
        reactions.loc[i, 'Reaction'] =  reactions['Reaction'][i].replace('[e]', '_e')
        
reactions.head(2)

Unnamed: 0,Abbreviation,Name,Reaction,GPR,Subsystems,Reversible,Lower bound,Upper bound,Objective,EC. Number,Notes,References
0,rxn03253,acyl-CoA dehydrogenase (decanoyl-CoA),cpd00015_c + cpd03128_c -> cpd00982_c + cpd031...,((PA14_52900) or (PA14_06600) or (PA14_31580)),Fatty acid metabolism,0,0.0,1000.0,0,,*JAB 06/23/14 - switched out NAD for FAD based...,"Guzik, M. W., Narancic, T., Ilic-Tomic, T., Vo..."
1,rxn02720,acyl-CoA dehydrogenase (dodecanoyl-CoA),cpd00015_c + cpd01260_c -> cpd00982_c + cpd020...,((PA14_52900) or (PA14_06600) or (PA14_31580)),Fatty acid metabolism,0,0.0,1000.0,0,,*JAB 06/23/14 - switched out NAD for FAD based...,"Guzik, M. W., Narancic, T., Ilic-Tomic, T., Vo..."


In [10]:
#Change GPR annotation
for i in range(len(reactions)):
    if type(reactions['GPR'][i]) is float:
        continue
    else:
        if '(' in reactions['GPR'][i]:
            reactions.loc[i, 'GPR'] = reactions['GPR'][i].replace('(', '')
        if ')' in reactions['GPR'][i]:
            reactions.loc[i, 'GPR'] = reactions['GPR'][i].replace(')', '')
reactions.head(2)

Unnamed: 0,Abbreviation,Name,Reaction,GPR,Subsystems,Reversible,Lower bound,Upper bound,Objective,EC. Number,Notes,References
0,rxn03253,acyl-CoA dehydrogenase (decanoyl-CoA),cpd00015_c + cpd03128_c -> cpd00982_c + cpd031...,PA14_52900 or PA14_06600 or PA14_31580,Fatty acid metabolism,0,0.0,1000.0,0,,*JAB 06/23/14 - switched out NAD for FAD based...,"Guzik, M. W., Narancic, T., Ilic-Tomic, T., Vo..."
1,rxn02720,acyl-CoA dehydrogenase (dodecanoyl-CoA),cpd00015_c + cpd01260_c -> cpd00982_c + cpd020...,PA14_52900 or PA14_06600 or PA14_31580,Fatty acid metabolism,0,0.0,1000.0,0,,*JAB 06/23/14 - switched out NAD for FAD based...,"Guzik, M. W., Narancic, T., Ilic-Tomic, T., Vo..."


In [11]:
#Change compartment-tag
for i in range(len(reactions)):
    if '(e)' in reactions['Abbreviation'][i]:
        reactions.loc[i, 'Abbreviation'] =  reactions['Abbreviation'][i].replace('(e)', '_e')

    if '(c)' in reactions['Abbreviation'][i]:
        reactions.loc[i, 'Abbreviation'] =  reactions['Abbreviation'][i].replace('(c)', '_c')

In [12]:
def get_sub_and_prod(index):
    '''
    Get the substrate and product of a reaction using the index of the reaction
    '''
    
    react = reactions.loc[index]['Reaction']
    if '->' in react:
        split = react.split('->')
    elif ('<=>') in react: 
        split = react.split('<=>')
    substrates = split[0].split('+')
    products = split[1].split('+')

    substrates = [item.strip() for item in substrates]
    products = [item.strip() for item in products]

    return(substrates, products)

In [13]:
def get_factors(substrates):
    '''
    Get the factors of the substrates of the reactions
    '''
    substrates_factor = []
    plane_substrates = []

    for sub in substrates: 
        splitting = sub.split()
        #print(len(splitting))
        if len(splitting)== 1:
            substrates_factor.append(1)
            plane_substrates.append(sub)
        elif len(splitting) == 2:
            substrates_factor.append(float(splitting[0]))
            plane_substrates.append(splitting[1])
        elif len(splitting) == 0:
            continue
        else:
            print('ERROR!', sub)

    #print(substrates)
    #print(plane_substrates)
    #print(substrates_factor)
    return(plane_substrates, substrates_factor)

In [14]:
#Testing of the function
substrates, products = get_sub_and_prod(323)
print(substrates)
print(products)

['0.5 cpd00080_c', '0.13 cpd15239_c', '0.03 cpd15268_c', '0.33 cpd15277_c', '0.51 cpd15572_c']
['0.01 cpd11422_c', 'cpd11493_c']


In [15]:
plane_sub, sub_factor = get_factors(substrates)
print(plane_sub)
print(sub_factor)

['cpd00080_c', 'cpd15239_c', 'cpd15268_c', 'cpd15277_c', 'cpd15572_c']
[0.5, 0.13, 0.03, 0.33, 0.51]


## Create a new, empty model

In [16]:
PA14_model = Model('PA14_model')
print('%i reactions initially' % len(PA14_model.reactions))
print('%i metabolites initially' % len(PA14_model.metabolites))
print('%i genes initially' % len(PA14_model.genes))

0 reactions initially
0 metabolites initially
0 genes initially


In [17]:
for index in range(len(reactions)):
    ID = reactions.loc[index]['Abbreviation']
    reaction = Reaction(ID)
    reaction.name = reactions.loc[index]['Name']
    reaction.subsystem = reactions.loc[index]['Subsystems']
    reaction.lower_bound = reactions.loc[index]['Lower bound']
    reaction.upper_bound = reactions.loc[index]['Upper bound']
    if type(reactions.loc[index]['GPR']) != float:
        reaction.gene_reaction_rule = reactions.loc[index]['GPR']
    
    substrates, products = get_sub_and_prod(index)

    plane_sub, factors_sub = get_factors(substrates)
    plane_prod, factors_prod = get_factors(products)
    
    for i in range(len(plane_sub)):
        reaction.add_metabolites({
            Metabolite(plane_sub[i]): -factors_sub[i]
        })
    for i in range(len(plane_prod)):
        reaction.add_metabolites({
            Metabolite(plane_prod[i]): factors_prod[i]
        })
    #print(index, ID, reaction.reaction)
    PA14_model.add_reactions([reaction])

In [18]:
print('%i reactions' % len(PA14_model.reactions))
print('%i metabolites' % len(PA14_model.metabolites))
print('%i genes' % len(PA14_model.genes))

1518 reactions
1295 metabolites
1137 genes


In [19]:
#Create a metabolite object for all the listed metabolites
for index in range(len(metabolites)): 
    ID = metabolites.loc[index]['Abbreviation']
    PA14_model.metabolites.get_by_id(ID).name = metabolites.loc[index]['Name']
    PA14_model.metabolites.get_by_id(ID).formula = metabolites.loc[index]['Formula (Charged)']
    PA14_model.metabolites.get_by_id(ID).compartment = metabolites.loc[index]['Compartment']

In [20]:
#PA14_model.objective = "PA14_Biomass"
PA14_model.optimize().objective_value

0.0

In [21]:
for react in PA14_model.reactions:
    if 'EX' in react.id:
        react.lower_bound = 0
        react.upper_bound = 1000

In [22]:
'''Definition of Medium and non-active reactions'''
medium = ['EX CO2 e', 'EX D-Glucose e', 'EX D-Lactate e', 'EX Fe2+ e', 'EX Glycine e', 
          'EX H+ e', 'EX H2O e', 'EX K+ e', 'EX L-Alanine e', 'EX L-Arginine e', 'EX L-Aspartate e', 
          'EX L-Cysteine e', 'EX L-Glutamate e', 'EX L-Histidine e', 'EX L-Isoleucine e', 'EX L-Leucine e', 
          'EX L-Lysine e', 'EX L-Methionine e', 'EX L-Phenylalanine e', 'EX L-Proline e', 'EX L-Serine e', 
          'EX L-Threonine e', 'EX L-Tryptophan e', 'EX L-Tyrosine e', 'EX L-Valine e', 'EX Mg e', 'EX Na+ e', 
          'EX NH3 e', 'EX Nitrate e', 'EX Ornithine e', 'EX Phosphate e', 'EX Sulfate e']
non_active_exchange =['Gluconate secretion', '2-ketogluconate secretion']

In [23]:
for react in PA14_model.reactions:
    if react.name in medium: 
        react.lower_bound = -10
    elif react.name == 'EX O2 e':
        react.lower_bound = -20
    elif react.name in non_active_exchange:
        react.lower_bound = 0
        react.upper_bound = 0

In [24]:
PA14_model.objective = "PA14_Biomass"
PA14_model.optimize().objective_value

18.95808787453769

In [25]:
cobra.io.write_sbml_model(PA14_model, "created_PA14.xml")

#### Analysing cpd11416_c = Biomass compound without a formula

In [26]:
#from cobra.flux_analysis import (single_reaction_deletion)

In [27]:
#with PA14_model:
#    PA14_model.reactions.EX_cpd11416_c.knock_out()
#    print('pfk knocked out: ', PA14_model.optimize())

## LibSBML - extensions

In [26]:
from libsbml import * 
reader = SBMLReader()
document = reader.readSBMLFromFile('created_PA14.xml')
model = document.getModel()

In [27]:
print('# Errors: ', document.getNumErrors())
document.checkConsistency()
document.checkL3v1Compatibility() #-> compatibility given 

# Errors:  0


0

In [28]:
print('Species:', model.getNumSpecies())
print('Reactions: ', model.getNumReactions())
print('Compartments: ', model.getNumCompartments())

Species: 1295
Reactions:  1518
Compartments:  2


# Species

### Add Charge to species

In [29]:
species_list = model.getListOfSpecies()
for species in species_list:
    splugin = species.getPlugin('fbc')
    species_id = species.getId()[2:]
    charge = int(metabolites.loc[metabolites['Abbreviation'] == species_id]['Charge'])
    splugin.setCharge(charge)

### Add Annotations to species

In [30]:
### First: MetaID is needed to add a CV-Term
species_list = model.getListOfSpecies()
for species in species_list:
    species.setMetaId(species.getId())

In [50]:
#species_list = model.getListOfSpecies()
#for species in species_list:
#    species_id = species.getId()[2:]
#    annot = metabolites.loc[metabolites['Abbreviation'] == species_id]['Bigg ID'].iloc[0]
#    species.setAnnotation(annot)

In [31]:
success = 0
bigg_ids = 0
for species in species_list:
    species_id = species.getId()[2:]     #get the id of the reaction, without the Prefix 'R_'
    bigg_col = metabolites.loc[metabolites['Abbreviation'] == species_id]['Bigg ID'].iloc[0] #get the content of the pandas df
    #kegg_col = metabolites.loc[metabolites['Abbreviation'] == species_id]['KEGG ID'].iloc[0]
    if type(bigg_col) == str:
        bigg_ids += 1
        cv = CVTerm()
        cv.setQualifierType(BIOLOGICAL_QUALIFIER)
        cv.setBiologicalQualifierType(BQB_IS)
        cv.addResource('http://identifiers.org/bigg.metabolite/'+bigg_col)
                
        status = species.addCVTerm(cv)
        if status == LIBSBML_OPERATION_SUCCESS:
            success += 1
        else:
            print(species, bigg_col)
            
    #if type(kegg_col) == str:
        #bigg_ids += 1
    #    cv = CVTerm()
     #   cv.setQualifierType(BIOLOGICAL_QUALIFIER)
      #  cv.setBiologicalQualifierType(BQB_IS)
       # cv.addResource('http://identifiers.org/kegg.compound/'+kegg_col)
                
        #status = species.addCVTerm(cv)
        #if status == LIBSBML_OPERATION_SUCCESS:
        #    success += 1
        #else:
        #    print(species, bigg_col)

print(bigg_ids)           
print(success)

1295
1295


In [32]:
newdocument = model.getSBMLDocument()
writeSBMLToFile(newdocument,'created_PA14.xml') # 1 means success, 0 means failure

1

# Groups Extension

In [33]:
#gets the uri for the proper group versions
#http://sbml.org/Software/libSBML/5.13.0/docs//python-api/classlibsbml_1_1_groups_extension.html
groupextension = GroupsExtension()

#http://sbml.org/Software/libSBML/5.17.0/docs//python-api/classlibsbml_1_1_groups_extension.html#a4329b4cd00a64df7b695a6d170d0a802
#sbml level, sbml version, package version
groupURI = groupextension.getURI(3,1,1)

In [34]:
#enables the package
document.enablePackage(groupURI, 'groups', True)

# Makes it such that having groups is not required
result = document.setPkgRequired('groups', False)

#prints whether it was successful or not
OperationReturnValue_toString(result)

# if you write out the document now, it should have two additional expressions in the "<sbml>"
# 'http://www.sbml.org/sbml/level3/version1/groups/version1'
# 'groups:required='false'

'The operation was successful. '

In [55]:
#writeSBMLToFile(document, 'group_testing.xml')

In [35]:
#document = reader.readSBMLFromFile('group_testing.xml')
group_model = document.getModel()

In [36]:
n_plugins = group_model.getNumPlugins()
print(n_plugins)
plugin1 = group_model.getPlugin(0)
print(plugin1)
plugin2 = group_model.getPlugin(1)
print(plugin2)

2
<libsbml.FbcModelPlugin; proxy of <Swig Object of type 'FbcModelPlugin *' at 0x000000558EFB89C0> >
<libsbml.GroupsModelPlugin; proxy of <Swig Object of type 'GroupsModelPlugin *' at 0x000000558EFB8840> >


In [37]:
#document.enablePackage(groupURI, 'groups', True)
mplugin = document.getPlugin('groups')
print(mplugin)

<libsbml.SBMLDocumentPlugin; proxy of <Swig Object of type 'SBMLDocumentPlugin *' at 0x000000558EFB8AE0> >


### Create dictionary with subsystems and their respective reactions 

In [38]:
subsystems = list(set(list(reactions['Subsystems'])))
print(len(subsystems))

group_dict = {}

for sub in subsystems:
    sub_list = []
    for i in range(len(reactions)):
        if reactions.loc[i]['Subsystems'] == sub:
            sub_list.append(reactions.loc[i]['Abbreviation'])
    group_dict[sub] = sub_list
    
print(len(group_dict))

78
78


In [39]:
print(group_dict['Glycerophospholipid metabolism'])

['rxn00616', 'rxn00611', 'rxn00612', 'rxn08669', 'rPY00184', 'rxn08229', 'rxn08230', 'rPY00183', 'rxn08231', 'rxn08232', 'rxn00539', 'rxn01073', 'rxn00758', 'rxn13817', 'rxn13815', 'rxn09108', 'rxn09109', 'rxn09110', 'rxn09111', 'rxn09112', 'rxn09113', 'rxn09114', 'rPY00215', 'rxn13889', 'rPY00214', 'rxn09101', 'rxn09102', 'rxn09103', 'rxn09104', 'rxn09105', 'rxn09106', 'rxn09107', 'rxn13887', 'rxn09205', 'rxn09206', 'rxn09207', 'rxn09208', 'rxn09209', 'rxn09210', 'rxn09211', 'rxn10233', 'rxn10236', 'rxn10232', 'rxn10237', 'rxn10235', 'rxn10234', 'rxn13894', 'rxn09197', 'rxn09198', 'rxn09199', 'rPY00165', 'rxn09200', 'rxn09201', 'rPY00170', 'rxn09202', 'rxn09203', 'rxn13818', 'rxn05964', 'rxn06079', 'rxn06087', 'rxn05901', 'rxn06140', 'rxn06043', 'rxn04684', 'rxn00538', 'rxn05829', 'rxn05824']


### And now add the dictionary to the groups

In [40]:
groups = group_model.getPlugin('groups')
print(groups.getNumGroups())

#print(groups)
group_list = groups.getListOfGroups()
print(len(group_list))

0
0


In [41]:
keys = list(group_dict.keys())
num_reactions = [len(sub) for sub in list(group_dict.values())]
group_id = 0

for i in range(0, len(group_dict)):     #94 reactions included 
    group_list.createGroup()
    group_list[i].setName(keys[i])
    group_list[i].setKind('partonomy')
    group_list[i].setSBOTerm("SBO:0000633")
    for j in range(0, num_reactions[i]):
        group_list[i].createMember()       #Number of reactions participating in this pathway 
        
n_groups = groups.getNumGroups()
print('Number of groups: {}'.format(n_groups)) 

Number of groups: 78


In [42]:
#Corresponding number of members is created in the previous step. 
# The reaction id is set as ID 
group_list = groups.getListOfGroups()

for i in range(0, n_groups):
    group =group_list[i].getName()
    num_members = group_list[i].getNumMembers()
    member_list = group_list[i].getListOfMembers()
    reaction_list = group_dict[group]
    for j in range(0, num_members): 
        #member_list[j].setId(reaction_list[j])  #setRefId? 
        member_list[j].setIdRef('R_' + reaction_list[j])

In [43]:
#Example: 
group_list = groups.getListOfGroups()
print('First group is: {}'.format(group_list[0].getName()))

sulfur_num = group_list[0].getNumMembers()
print('This group has {} members'.format(sulfur_num))



print('\nThe members (reactions) are:')

sulfur_reactions = group_list[0].getListOfMembers()
for i in range(0, sulfur_num):
    print(sulfur_reactions[i].getIdRef())

First group is: Tyrosine metabolism
This group has 8 members

The members (reactions) are:
R_rxn01945
R_rJB00104
R_rxn03055
R_rxn02276
R_rxn00527
R_rxn00993
R_rxn01825
R_rxn02366


In [44]:
newdocument = group_model.getSBMLDocument()
writeSBMLToFile(newdocument,'created_PA14.xml') # 1 means success, 0 means failure

1

# Reactions

In [45]:
document = reader.readSBMLFromFile('created_PA14.xml')
reaction_model = document.getModel()

In [46]:
reactions.head(2)

Unnamed: 0,Abbreviation,Name,Reaction,GPR,Subsystems,Reversible,Lower bound,Upper bound,Objective,EC. Number,Notes,References
0,rxn03253,acyl-CoA dehydrogenase (decanoyl-CoA),cpd00015_c + cpd03128_c -> cpd00982_c + cpd031...,PA14_52900 or PA14_06600 or PA14_31580,Fatty acid metabolism,0,0.0,1000.0,0,,*JAB 06/23/14 - switched out NAD for FAD based...,"Guzik, M. W., Narancic, T., Ilic-Tomic, T., Vo..."
1,rxn02720,acyl-CoA dehydrogenase (dodecanoyl-CoA),cpd00015_c + cpd01260_c -> cpd00982_c + cpd020...,PA14_52900 or PA14_06600 or PA14_31580,Fatty acid metabolism,0,0.0,1000.0,0,,*JAB 06/23/14 - switched out NAD for FAD based...,"Guzik, M. W., Narancic, T., Ilic-Tomic, T., Vo..."


In [47]:
# first add meta_ids to the reactions to enable CV-Terms addition
reaction_list = reaction_model.getListOfReactions()
for react in reaction_list:
    react.setMetaId(react.getId())

### Add E.C. Numbers

In [48]:
#Add the E.C. Number as Biological Qualifier Resource 
for react in reaction_list:
    react_id = react.getId()[2:]     #get the id of the reaction, without the Prefix 'R_'
    ec_col = reactions.loc[reactions['Abbreviation'] == react_id]['EC. Number'].iloc[0] #get the content of the pandas df
    if type(ec_col) == str:
        if ec_col == 'Undetermined': 
            continue
        elif 'TC-' in ec_col:
            ec = ec_col.split(',')
            EC_num = ec[1]
            cv = CVTerm()
            cv.setQualifierType(BIOLOGICAL_QUALIFIER)
            cv.setBiologicalQualifierType(BQB_IS)
            cv.addResource('http://identifiers.org/tcdb/'+EC_num)
                
            react.addCVTerm(cv)
            
        else:
            ec = ec_col.split(',')
            if len(ec) > 1:
                for i in range(len(ec)):
                    EC_num = ec[i].strip()
                    cv = CVTerm()
                    cv.setQualifierType(BIOLOGICAL_QUALIFIER)
                    cv.setBiologicalQualifierType(BQB_IS)
                    cv.addResource('http://identifiers.org/ec-code/'+EC_num)
                
                    react.addCVTerm(cv)
                      
            else: 
                EC_num = ec[0]
                cv = CVTerm()
                cv.setQualifierType(BIOLOGICAL_QUALIFIER)
                cv.setBiologicalQualifierType(BQB_IS)
                cv.addResource('http://identifiers.org/ec-code/'+EC_num)
                
                react.addCVTerm(cv)

### Add Authors/References

In [49]:
url = '"http://www.w3.org/1999/xhtml"'

for react in reaction_list:
    react_id = react.getId()[2:]     #get the id of the reaction, withour the Prefix 'R_'
    ref_col = reactions.loc[reactions['Abbreviation'] == react_id]['References'].iloc[0] #get the content of the pandas df
    
    if type(ref_col) == str:
        if '&' in ref_col:
            ref_col = ref_col.replace('&', 'and')
        
        note = '<body xmlns='+url+'><p>REFERENCES: '+ ref_col +' </p></body>'
        
        react.setNotes(note)

### Add Notes from Excel file

In [50]:
num_notes = 0
appended = 0
created = 0

url = '"http://www.w3.org/1999/xhtml"'

for react in reaction_list:
    react_id = react.getId()[2:]     #get the id of the reaction, without the Prefix 'R_'
    notes_col = reactions.loc[reactions['Abbreviation'] == react_id]['Notes'].iloc[0] #get the content of the pandas df
    
    if type(notes_col) == str:
        num_notes += 1
        
        if '>' in notes_col or '<' in notes_col:
                notes_col = notes_col.replace('->', 'rightarrow')
                notes_col = notes_col.replace('=>', 'rightarrow')
                notes_col = notes_col.replace('<-', 'leftarrow')
                notes_col = notes_col.replace('<=', 'leftarrow')
                notes_col = notes_col.replace('<=>', 'bidirectional arrow')
                notes_col = notes_col.replace('<', 'leq')
                notes_col = notes_col.replace('>', 'geq')
                
        if react.isSetNotes():
            note = '<body xmlns='+url+'><p>NOTES: '+ notes_col +' </p></body>'
            status = react.appendNotes(note)
            
            if status == LIBSBML_OPERATION_SUCCESS:
                appended += 1 
            else:
                print(react.getId(), note)
                
        else:
            note = '<body xmlns='+url+'><p>NOTES: '+ notes_col +' </p></body>'
            status = react.setNotes(note)
            if status == LIBSBML_OPERATION_SUCCESS:
                created += 1 
            else:
                print(react.getId(), '\n', notes_col)
            
        
        #eact.getNotesString()
            
print(num_notes)
print(appended)
print(created)

808
93
715


In [51]:
newdocument = reaction_model.getSBMLDocument()
writeSBMLToFile(newdocument,'created_PA14.xml') # 1 means success, 0 means failure

1