# Explore SMETANA-relevant models

In [3]:
model1 = kbase_api.get_from_ws("E_iAH991V2",40576)
model2 = kbase_api.get_from_ws("E_iML1515.kb",40576)
models = [model1, model2]
%run ../../../modelseedpy/community/mscompatibility.py
new_models = MSCompatibility.align_exchanges(models, True, "smetana_conflicts.json")




Standardize exchange reactions in Bacteroides_thetaiotaomicron_VPI-5482.fbamdl.23 


{'original': {'reaction': 'core2_e0 <=> '},
 'new': {'reaction': 'cpd36802_e0 <=> '},
 'justification': 'The cpd36802_e0 ID already exists in model '
                  'Bacteroides_thetaiotaomicron_VPI-5482.fbamdl.23, so each '
                  'reaction (here EX_core2_e0) must be updated.'}


{'original': {'id': 'core2_e0', 'name': 'core2_e0'},
 'new': {'id': 'cpd36802_e0', 'name': 'Core2_e0'},
 'justification': 'The core2_e0 and cpd36802_e0 distinction in '
                  'Bacteroides_thetaiotaomicron_VPI-5482.fbamdl.23 is '
                  'incompatible. The core2_e0 ID is not a ModelSEED Database '
                  'ID. The core2_e0 and cpd36802_e0 metabolites were matched '
                  'via their name.'}


{'original': {'reaction': 'cpd01399_e0 <=> '},
 'new': {'reaction': 'cpd00537_e0 <=> '},
 'justification': 'The cpd00537_e0 ID already exists in model '
                  'Bacter

In [6]:
from cobra.medium import minimal_medium
for model in new_models:
    print(minimal_medium(model, minimize_components=True))

None
None


# Load the models

In [1]:
# define the environment path 
from pprint import pprint
import os
local_cobrakbase_path = os.path.join('C:', 'Users', 'Andrew Freiburger','Documents','Argonne','cobrakbase')
os.environ["HOME"] = local_cobrakbase_path

# import the models
import cobrakbase
token = 'RBP7R34IILI6IQN5533VVSVUZM6H7YU7'
kbase_api = cobrakbase.KBaseAPI(token)
model1 = kbase_api.get_from_ws("iML1515",76994)
model2 = kbase_api.get_from_ws("iSB1139.kb.gf",30650)
models = [model1, model2]



cobrakbase 0.2.8


# comparison of the new formulations

In [2]:
%run ../../../modelseedpy/community/mscompatibility.py  # the static method logic
new_models, unknown_met_ids = MSCompatibility.standardize(models, conflicts_file_name="orig_conflicts.json")




Standardize exchange reactions in iML1515 


{'original': {'id': 'metsox-R-L-e_e0', 'name': 'L-methionine-R-sulfoxide'},
 'new': {'id': 'cpd14720_e0', 'name': 'L-methionine-R-sulfoxide_e0'},
 'justification': 'The metsox-R-L-e_e0 and cpd14720_e0 distinction in iML1515 '
                  'is incompatible. The metsox-R-L-e_e0 ID is not a ModelSEED '
                  'Database ID. The metsox-R-L-e_e0 and cpd14720_e0 '
                  'metabolites were matched via their name.'}


{'original': {'id': 'cpd03191_e0', 'name': 'D-Glucuronate 1-phosphate_e0'},
 'new': {'id': 'cpd00880_e0', 'name': 'D-Glucuronate 1-phosphate_e0'},
 'justification': 'The cpd03191_e0 and cpd00880_e0 distinction in iML1515 is '
                  'incompatible. The cpd03191_e0 and cpd00880_e0 metabolites '
                  'were matched via their name. The ID match was verified with '
                  "the ['BiGG'] cross-reference(s)."}


{'original': {'id': 'sq-e_e0', 'name': 'Sulphoquinovose'},
 'new': {'i

In [3]:
print(new_models)
print(unknown_met_ids)

[<FBAModel iML1515 at 0x1c21630f070>, <FBAModel iSB1139 at 0x1c21afbc580>]
['metglcur-e_e0']


### Compatibilization validation 

In [3]:
from modelseedpy import FBAHelper

residual_metabolites = {}
for model in new_models:
    residual_metabolites[model.id] = {}
    
    ## model metabolites
    print("\n", model.id, "\n", "="*10)
    print("\nmetabolites\n", "="*10) #, "\n")
    residual_metabolites[model.id]["metabolites"] = [met.id for met in model.metabolites if "cpd" not in met.id]
    print(len(residual_metabolites[model.id]["metabolites"]))

    ## model exchanges
    print("\nexchanges\n", "="*10) #, "\n")
    residual_metabolites[model.id]["exchanges"] = [met.id for ex_rxn in FBAHelper.exchange_reactions(model) for met in ex_rxn.metabolites if "cpd" not in met.id]
    print(len(residual_metabolites[model.id]["exchanges"]))

    ## model reactions
    print("\n\nreactions\n", "="*10) #, "\n\n")
    residual_metabolites[model.id]["reactions"] = set([met.id for rxn in model.reactions for met in rxn.metabolites if "cpd" not in met.id])
    print(len(residual_metabolites[model.id]["reactions"]))

    ## objective value
    print(model.optimize())


 iML1515 

metabolites
90

exchanges
1


reactions
90
<Solution 75.545 at 0x2135cf6d940>

 iSB1139 

metabolites
319

exchanges
0


reactions
319
<Solution 22.211 at 0x2135cf6dac0>


In [4]:
print([residual_metabolites[model.id]["exchanges"] for model in new_models])

[['metglcur-e_e0'], []]


# comparison of the old formulations

In [5]:
# data = kbase.get_object("iML1515",76994) 

In [22]:
%run ../../../modelseedpy/community/mscompatibility_original.py  # actually the original class-based logic
model1 = kbase_api.get_from_ws("iML1515",76994)
model2 = kbase_api.get_from_ws("iSB1139.kb.gf",30650)
models = [model1, model2]

# msdb_path = os.path.join("..", "..", "..", "..", "ModelSEEDDatabase")
msdb_path = os.path.join("..", "..", "..", "..", "ModelSEEDDatabase")
mscompat = MSCompatibility(msdb_path)
mscompat.align_exchanges(models, standardize=True, conflicts_file_name="orig_conflicts.json")

iML1515 original optimization: <Solution 75.502 at 0x25ce02aceb0>
iML1515 optimization after _correct_met(): <Solution 75.502 at 0x25c4c9a3d60>


{'original': {'id': 'tartr-D-e_e0', 'name': 'D-tartrate'},
 'new': {'id': 'cpd19018_e0', 'name': 'D-tartrate_e0'},
 'justification': 'The tartr-D-e_e0 ID is not a ModelSEED Database ID.'}
iML1515 optimization after _correct_met(): <Solution 75.502 at 0x25ce02acf10>
iML1515 optimization after _correct_met(): <Solution 75.502 at 0x25ce02acd30>
iML1515 optimization after _correct_met(): <Solution 75.502 at 0x25ce02acfd0>
iML1515 optimization after _correct_met(): <Solution 75.502 at 0x25ce02acd90>
iML1515 optimization after _correct_met(): <Solution 75.502 at 0x25ce02ace50>
iML1515 optimization after _correct_met(): <Solution 75.502 at 0x25ce02acf10>
iML1515 optimization after _correct_met(): <Solution 75.502 at 0x25ce02acd30>
iML1515 optimization after _correct_met(): <Solution 75.502 at 0x25ce02acfd0>
iML1515 optimization after _correct_met():

[<FBAModel iML1515 at 0x25c4c9a3fd0>, <FBAModel iSB1139 at 0x25bba9b3f70>]

In [26]:
for model in old_models:
    print(model.optimize())

<Solution 0.000 at 0x7fe902317590>
<Solution 0.000 at 0x7fe902317d90>


# Export the MSDB objects

In [5]:
from collections import OrderedDict
import os, json

# import and parse ModelSEED Database reactions and compounds
reaction_ids = OrderedDict()
msdb = os.path.join("..", "..", "..", "..", "ModelSEEDDatabase")
for num in range(0,49):
    with open(os.path.join(msdb, 'Biochemistry', f'reaction_{num:0>2}.json'), 'r') as rxns:
        reactions = json.load(rxns)
        for rxn in reactions:
            reaction_ids[rxn['id']] = rxn['name']

compounds_cross_references, compound_names = OrderedDict(), OrderedDict()
for num in range(0,38):
    with open(os.path.join(msdb, 'Biochemistry', f'compound_{num:0>2}.json'), 'r') as cpds:
        try:
            compounds = json.load(cpds)
        except:
            print(f'compound_{num:0>2}.json is probably empty.')
            continue
        for cpd in compounds:
            compounds_cross_references[cpd['id']] = {}
            if cpd['aliases'] is not None:
                for category in cpd['aliases']:
                    content = category.split(';')
                    if 'Name' in category:
                        content[0] = content[0].split(':')[0].strip()
                        names = [name.strip() for name in content]
                        names.append(cpd['name'])
                        for name in names:
                            if name not in compound_names:
                                compound_names[name] = cpd['id']
                    else:
                        first = content[0].split(':')
                        db = first[0].strip()
                        content[0] = first[1]
                        compounds_cross_references[cpd['id']][db] = [x.strip() for x in content]
                        
# export the content
with open("compound_Xrefs.json" , 'w') as cpdXRefs_out:
    json.dump(compounds_cross_references, cpdXRefs_out, indent=3)
with open("compoundNames.json" , 'w') as cpdNames_out:
    json.dump(compound_names, cpdNames_out, indent=3)
with open("reactionIDs.json" , 'w') as rxnID_out:
    json.dump(reaction_ids, rxnID_out, indent=3)

# Align exchange reactions

In [6]:
%run ../../../modelseedpy/core/mscompatibility.py
mscompat = MSCompatibility(modelseed_db_path = os.path.join('..', '..', '..', '..', 'ModelSEEDDatabase'))

model1 = kbase_api.get_from_ws("iML1515",76994)
model2 = kbase_api.get_from_ws("iSB1139.kb.gf",30650)
models = [model1, model2]

before_model_rxns = after_model_rxns = []
for model in models:
    for rxn in model.reactions:
        before_model_rxns.append(rxn.reaction)
    if 'r293_e0' in model.metabolites:
        print('The metabolite r293_e0 is in model', models.index(model))

models = mscompat.align_exchanges(models, conflicts_file_name='exchanges_conflicts.json', model_names = ["iML1515", "iSB1139.kb.gf"])

print('\n\n')
for model in models:
    for rxn in model.reactions:
        after_model_rxns.append(rxn.reaction)
    if 'r293_e0' not in model.metabolites:
        print('The metabolite r293_e0 is not in model', models.index(model))
        
    # export JSON versions of the models
    
        
for new_rxn in after_model_rxns:
    if new_rxn not in before_model_rxns:
        print('new rxn\t',  new_rxn, 'old rxn\t', before_model_rxns[after_model_rxns.index(new_rxn)])

Exception: File `'../../../modelseedpy/core/mscompatibility.py'` not found.

# Standardize exchange metabolites

In [None]:
%run ../../../modelseedpy/core/mscompatibility.py

model1 = kbase_api.get_from_ws("iML1515",76994)
model2 = kbase_api.get_from_ws("iSB1139.kb.gf",30650)
mscompat = MSCompatibility(modelseed_db_path = os.path.join('..', '..', '..', '..', 'ModelSEEDDatabase'))
models = [model1, model2]

before_model_rxns, after_model_rxns = [], []
for model in models:
    for rxn in model.reactions:
        before_model_rxns.append(rxn.reaction)
    if 'r293_e0' in model.metabolites:
        print('The metabolite r293_e0 is in model', models.index(model))

models = mscompat.standardize(models, conflicts_file_name = 'standardized_exchange_metabolites.json', model_names = ["iML1515", "iSB1139.kb.gf"])

for model in models:
    for rxn in model.reactions:
        after_model_rxns.append(rxn.reaction)
    if 'r293_e0' not in model.metabolites:
        print('The metabolite r293_e0 is not in model', models.index(model))
        
new_rxn_count = 0
for new_rxn in after_model_rxns:
    if new_rxn not in before_model_rxns:
        new_rxn_count += 1
print(f'{new_rxn_count} reactions were changed.')

# Standardize all metabolites

In [None]:
%run ../../../modelseedpy/core/mscompatibility.py

model1 = kbase_api.get_from_ws("iML1515",76994)
model2 = kbase_api.get_from_ws("iSB1139.kb.gf",30650)
mscompat = MSCompatibility(modelseed_db_path = os.path.join('..', '..', '..', 'ModelSEEDDatabase'))
models = [model1, model2]

before_model_rxns, after_model_rxns = [], []
for model in models:
    for rxn in model.reactions:
        before_model_rxns.append(rxn.reaction)
    if 'r293_e0' in model.metabolites:
        print('The metabolite r293_e0 is in model', models.index(model))

model1, model2 = mscompat.standardize(models, exchanges = False, conflicts_file_name = 'standardized_metabolites.json')
# models = mscompat.standardize_MSD(models,conflicts_file_name = 'standardized_metabolites.txt', metabolites = False)

for model in models:
    for rxn in model.reactions:
        after_model_rxns.append(rxn.reaction)
    if 'r293_e0' not in model.metabolites:
        print('The metabolite r293_e0 is not in model', models.index(model))
        
new_rxn_count = 0
for new_rxn in after_model_rxns:
    if new_rxn not in before_model_rxns:
        new_rxn_count += 1
print(f'{new_rxn_count} reactions were changed.')

# Brainstorming

In [None]:
pprint(set(met.compartment for met in model1.metabolites))

In [None]:
with open('iML1515_exchanges.txt', 'w') as exchange_reactions:
    exchange_reactions.write('IDs\t\t\t\tReaction\n')
    for ex_rxn in models[0].exchanges:
        exchange_reactions.write(ex_rxn.id+'\t\t'+ex_rxn.reaction+'\n')
        
with open('iSB1139_exchanges.txt', 'w') as exchange_reactions:
    exchange_reactions.write('IDs\t\t\t\tReaction\n')
    for ex_rxn in models[1].exchanges:
        exchange_reactions.write(ex_rxn.id+'\t\t'+ex_rxn.reaction+'\n')

In [None]:
from pandas import DataFrame
DataFrame = 

In [None]:
from itertools import combinations
species = {1:3,2:3,3:4,4:5}
for species_1, species_2 in combinations(species, 2):
    print(species_1, species_2)

In [None]:
test =  'AraCyc: OH; WATER'
content = test.split(';')
content[0] = content[0].split(':')
db = content[0][0]
content[0] = content[0][1]
dic = {db:[x.strip() for x in content]}
print(dic)

In [None]:
for met in models[0].metabolites:
    if 'cpd' not in met.id:
        print(met.id)
        for rxn in met.reactions:
            print(rxn.reaction)
        print('\n\n')

In [None]:
print(sum(rxn for model in models for rxn in model.reactions))

In [None]:
pfba, self_pfba = None, False
if pfba == None or self_pfba == None:
    print('yes')

In [None]:
if 'cpd00035' in model1.metabolites:
    print('not replaced')

In [None]:
# model1 = kbase_api.get_from_ws("iML1515",76994)
# model2 = kbase_api.get_from_ws("iSB1139.kb.gf",30650)
import re

for rxn in model1.reactions:
    pprint(rxn.id)
    break

for rxn in model1.modelreactions:
    pprint(rxn)
    if 3 == model1.modelreactions.index(rxn):
        break
    # print(rxn.reactants)
    # print(rxn.products, '\n\n')
    # for index, met_id in enumerate([re.sub('_\w\d', '', met.id) for met in rxn.reactants]):
    #     if met_id in [re.sub('_\w\d', '', met.id) for met in rxn.products]:
    #         print(rxn.metabolites[rxn.reactants[index]])
    #         print(rxn)
    #         break
            
# # exclude reactions where the erroneous ID is both a reactant and product (which is currently an evasive condition)
# for met_id in [re.sub('_\w\d', '', react_met.id) for react_met in rxn.reactants]:
#     print(met_id)
#     if met_id in [re.sub('_\w\d', '', pro_met.id) for pro_met in rxn.products] and met_id == original_id:
#         warn(f'ReactionError: The {met.id} metabolite in the {rxn.id} reaction cannot be substituted as both a reactant and product.')
#         continue

In [None]:
from cobra.core.dictlist import DictList

test = DictList()
test.append({'a':4, 'b':5})
print(test)

In [None]:
from collections import OrderedDict

x = OrderedDict({'x':2, 'c':4})
x.pop('c')
print(x)
print(list(x.keys()).index('x'))
print(dir(x))

In [None]:
print(model1.annotation)
# print(dir(model1))

# for met in model.metabolites:
#     print(dir(met))
#     print(met.summary())
#     if met == model.metabolites[4]:
#         break
        
mnx_count = 0
for rxn in model1.modelcompounds:
    if 1 >= model1.modelcompounds.index(rxn):
        pprint(rxn)
    if 'ModelSeed' in rxn['dblinks']:
        mnx_count += 1
print(f'\n\nmodel1 {mnx_count}/{len(model1.modelcompounds)}\n\n')    

mnx_count = 0
for rxn in model2.modelcompounds:
    if 1 >= model2.modelcompounds.index(rxn):
        pprint(rxn)
    if 'ModelSeed' in rxn['dblinks']:
        mnx_count += 1
print(f'\n\nmodel2 {mnx_count}/{len(model2.modelcompounds)}')    

In [None]:
from periodictable import formula

# print(dir(Formula))

fm = formula('K2SO4 CaSO4 H2O')
print(fm.mass_fraction)
print(fm.atoms)
print(fm.mass)

In [None]:
from numpy import negative
import re

# parse the reaction string
reaction = "(1) cpd00002[0] + (1) cpd00222[0] => (1) cpd00008[0] + (1) cpd00067[0] + (1) cpd00284[0]"
compounds = reaction.split('=>')
reactant, product = compounds[0], compounds[1]
reactants = [x.strip() for x in reactant.split('+')]
products = [x.strip() for x in product.split('+')]
reactant_met = [x.split(' ') for x in reactants]
product_met = [x.split(' ') for x in products]

# assemble a dictionary for the reaction
reaction_dict = {}
for met in reactant_met:
    stoich = float(re.search('(\d+)', met[0]).group())
    met[1] = met[1].replace('[0]', '_c0')
    met[1] = met[1].replace('[1]', '_c0')
    reaction_dict[met[1]] = negative(stoich)
for met in product_met:
    stoich = float(re.search('(\d+)', met[0]).group())
    met[1] = met[1].replace('[0]', '_c0')
    met[1] = met[1].replace('[1]', '_c0')
    reaction_dict[met[1]] = stoich
    

print(reactant_met, product_met)
print(reaction_dict)