In [2]:
import os
import cobra
import cobrakbase
import json
import csv
import logging
import cplex
import optlang
import re
from optlang.symbolics import Zero, add
import cobra.util.solver as sutil
from cobrakbase.core.converters import KBaseFBAModelToCobraBuilder
from cobrakbase.Workspace.WorkspaceClient import Workspace as WorkspaceClient
from cobrakbase.core.kbase_object_factory import KBaseObjectFactory
from cobrakbase.core.fba_utilities import KBaseFBAUtilities
from cobra.core.dictlist import DictList
from cobra.core import Gene, Metabolite, Model, Reaction
from IPython.core.display import HTML


cobrakbase 0.2.7


In [46]:
params = {
    "workspace": 29280,
    "fbamodel_id" : 'MMSyn3',
    "fbamodel_workspace" : 29280,
	"media_id" : "Complete",
	"media_workspace" : "KBaseMedia",
	"target_reaction" : "bio1",
	"source_fbamodel_id" : "MMSyn3Expansion",
	"source_fbamodel_workspace" : 29280,
	"feature_ko_list" : [],
	"reaction_ko_list" : [],
	"blacklist" : [],
	"custom_bound_list" : [],
	"media_supplement_list" : [],
	"minimum_target_flux" : 0.1,
    "max" : 1,
    "default_uptake" : 0,
    "default_excretion" : 100,
    "add_modelseed_reactions" : 1,
    "use_modelseed_peak_hits": 1,
    "gapfilling_annotation_sources" : []
}
kbase_api = cobrakbase.KBaseAPI()
media = kbase_api.get_from_ws("ArgonneLBMedia","KBaseMedia")
kbmodel = kbase_api.get_from_ws(params["fbamodel_id"],params["fbamodel_workspace"])
sourcemodel = kbase_api.get_from_ws(params["source_fbamodel_id"],params["source_fbamodel_workspace"])
builder = KBaseFBAModelToCobraBuilder(kbmodel)
builder = builder.with_media(media)
model = builder.build()

model.solver = 'cplex'

metabolomics = kbase_api.get_from_ws("JCVI_Syn3_metabolomics",29280)
#Matching metabolomics data up to the ModelSEED
modelseed_hash = {}
if  params["add_modelseed_reactions"]*params["use_modelseed_peak_hits"] == 1:
    modelseed = cobrakbase.modelseed.from_local('/Users/chenry/code/ModelSEEDDatabase')
    modelseed_hash = metabolomics.map_to_modelseed(modelseed,"_c0")
print(len(modelseed_hash))

#Saving the list of original reactions
original_reactions = {}
for reaction in model.reactions:
    original_reactions[reaction.id] = 1
original_metabolites = {}
for metabolite in model.metabolites:
    original_metabolites[metabolite.id] = 1

#Building metabolite hash and printing for viz in escher
metabolites_with_peaks = dict()
for met in sourcemodel.modelcompounds:
    if hasattr(met,'dblinks'):
        if "KBWS/29280/JCVI_Syn3_metabolomics" in met.dblinks:
            array = met.dblinks["KBWS/29280/JCVI_Syn3_metabolomics"]
            metabolites_with_peaks[met.id] = 1
for peak in modelseed_hash:
    for cpd in modelseed_hash[peak]:
        metabolites_with_peaks[cpd] = 1
#Printing metabolites in TSV format
file = open("expanded_jcvi_metabolites.csv","w") 
file.write("metabolite,concentration\n")
for compound in metabolites_with_peaks:
    if compound in original_metabolites:
        file.write(compound+",1\n") 
    elif re.search('cpd', compound):
        file.write(compound+",10\n")
    elif re.search('enzc', compound):
        file.write(compound+",100\n")
    elif re.search('spontc', compound):
        file.write(compound+",100\n")    
file.close()

#Creating peak string from model
peakstring = sourcemodel.build_peakstring("KBWS/29280/JCVI_Syn3_metabolomics",modelseed_hash)

utilities = KBaseFBAUtilities(model,kbmodel,kbase_api,media = None,
                 default_uptake = 100,default_excretion = 100,blacklist = [])

#Extending model for gapfilling
utilities.default_uptake = 0
penalties = utilities.build_model_extended_for_gapfilling(1,[sourcemodel],[],10)

#Adding drain fluxes for co-reactants used by reaction rules
coreactants = [
    "cpd01569","cpd00203","cpd00364","cpd00071","cpd00022","cpd00196","cpd00013","cpd00051","cpd00059","cpd00132","cpd00146","cpd00011","cpd00010","cpd01160","cpd00084","cpd00335","cpd00055","cpd00106","cpd00027","cpd00023","cpd00053","cpd30635","cpd00229","cpd10147","cpd00040","cpd00025","cpd00119","cpd22829","cpd00039","cpd20696","cpd00428","cf00002","cpd00075","cf00003","cpd00007","cpd00009","cpd00016","cpd00019","cpd00017","cpd00078","cpd00081","cpd00073","cpd00001","cpd00044","cpd00202","cpd00004","cpd00003","cpd00024","cpd09249","cpd00127","cpd00045","cpd00029","cpd00936","cpd00012","cpd00895","cpd00113","cpd00359","cpd00002","cpd00008","cpd00018"
]
coreactant_drains = []
coreactant_drain_hash = {}
for reactant in coreactants:
    reactant = reactant+"_c0"
    if reactant not in model.metabolites:
        print("Reactant not found:"+reactant)
        next
    drain_reaction = utilities.add_drain_from_metabolite_id(
        reactant,
        -100,
        100,
        "DM_",
        "Demand for "
    )
    if drain_reaction != None and drain_reaction.id not in model.reactions:
        coreactant_drains.append(drain_reaction)
    coreactant_drain_hash[drain_reaction.id] = [reactant,drain_reaction] 
model.add_reactions(coreactant_drains)
print("1-done")

#Adding thermodynamic constraints
utilities.add_simple_thermo_constraints()
print("2-done")

#Adding metabolomics constraints
drain_fluxes = utilities.add_intracellular_metabolomics_constraints(peakstring,builder)
print("3-done")

#Setting objective function
target_reaction = None
if params["target_reaction"][0:3] == "bio":
    target_reaction = model.reactions.get_by_id(params["target_reaction"]+"_biomass")
else:
    target_reaction = model.reactions.get_by_id(params["target_reaction"])
sense = "max"
if params["max"] == 0:
    sense = "min"
biomass_objective = model.problem.Objective(
    1 * target_reaction.flux_expression,
    direction=sense)
model.objective = biomass_objective
print("4-done")

#Setting metabolomics objective
if target_reaction != None:
    target_reaction.lower_bound = 0.1
metabolite_objective = model.problem.Objective(
    Zero,
    direction="max")
obj_coef = dict()
for peak in utilities.metabolomics_peak_variables:
    obj_coef[utilities.metabolomics_peak_variables[peak]] = 1
model.objective = metabolite_objective
metabolite_objective.set_linear_coefficients(obj_coef)
print("6-done")

#Printing LP file
with open('GapfillSimpleThermoMetabolite.lp', 'w') as out:
    out.write(str(model.solver))

217
261
1-done
2-done
3-done
4-done
6-done


In [25]:
#Optimizing active metabolites
maxmet_solution = model.optimize()
print(maxmet_solution.objective_value)

In [28]:
#Constraining objective
max_metabolite_constraint = model.problem.Constraint(
    #Zero,lb=maxmet_solution.objective_value,ub=maxmet_solution.objective_value,name="max_metabolite_constraint"
    Zero,lb=236,ub=236,name="max_metabolite_constraint"
)
model.add_cons_vars(max_metabolite_constraint)
model.solver.update()
obj_coef = dict()
for peak in utilities.metabolomics_peak_variables:
    obj_coef[utilities.metabolomics_peak_variables[peak]] = 1
max_metabolite_constraint.set_linear_coefficients(obj_coef)

#Printing LP file
with open('GapfillSimpleThermoMetaboliteFixed.lp', 'w') as out:
    out.write(str(model.solver))


In [29]:
#Creating new objective to minimize reactions of all types
reaction_objective = model.problem.Objective(
    Zero,
    direction="min")
obj_coef = dict()
for reaction in model.reactions:
    if reaction.id in drain_fluxes:
        #REALLY minimizing drain fluxes
        obj_coef[reaction.reverse_variable] = 1000
        obj_coef[reaction.forward_variable] = 1000
    elif reaction.id[0:5] == "spont":
        obj_coef[reaction.reverse_variable] = 1
        obj_coef[reaction.forward_variable] = 1
    elif reaction.id in coreactant_drain_hash:
        if coreactant_drain_hash[reaction.id][0] in original_metabolites:
            obj_coef[reaction.reverse_variable] = 100
            obj_coef[reaction.forward_variable] = 100
        else:
            obj_coef[reaction.reverse_variable] = 3
            obj_coef[reaction.forward_variable] = 3
    elif reaction.id in penalties:
        #Minimizing gapfilled reactions
        if "reverse" in penalties[reaction.id]:
            obj_coef[reaction.reverse_variable] = abs(penalties[reaction.id]["reverse"])
        else:
            obj_coef[reaction.reverse_variable] = 0.1
        if "forward" in penalties[reaction.id]:
            obj_coef[reaction.forward_variable] = abs(penalties[reaction.id]["forward"])
        else:
            obj_coef[reaction.forward_variable] = 0.1
    elif reaction.id[0:3].lower() != "ex_" and reaction.id[0:3].lower() != "dm_":
        #Minimizing all other reactions
        obj_coef[reaction.reverse_variable] = 0.1
        obj_coef[reaction.forward_variable] = 0.1
model.objective = reaction_objective
reaction_objective.set_linear_coefficients(obj_coef)

#Printing LP file
with open('GapfillSimpleThermoMetaboliteFixedObj.lp', 'w') as out:
    out.write(str(model.solver))
    

In [None]:
#Optimizing active reactions
minrxnsolution = model.optimize()


In [44]:
solution_data = ""
with open('/Users/chenry/Dropbox/workspace/KBase Project/MetabolomicsMethods/JCVI_fluxes.txt', 'r') as solution_file:
    solution_data = solution_file.read()

line_array = solution_data.split("\n")
reaction_fluxes = dict()
drain_fluxes = dict()
exchange_fluxes = dict()
modelflux = 0
modelseedflux = 0
exchange = 0
drain = 0
enz = 0
spont = 0
for line in line_array:
    array = line.split("\t")
    varname = array[0]
    varname = varname.split("#")[0]
    vartype = "rxn"
    if len(array) >= 2:
        flux = float(array[1])
        if re.search('^rxn\d+_[a-z]\d+$', varname) != None: 
            if varname in original_reactions:
                modelflux += 1
            else:
                modelseedflux += 1
        elif re.search('^(rxn\d+_[a-z]\d+)_reverse_[a-z0-9]+$', varname) != None:
            m = re.search('^(rxn\d+_[a-z]\d+)_reverse_[a-z0-9]+$', varname)
            varname = m[1]
            flux = -1*float(array[1])
            if varname in original_reactions:
                modelflux += 1
            else:
                modelseedflux += 1
        elif re.search('^(DM_.+)$', varname) != None:
            m = re.search('^(DM_.+)$', varname)
            vartype = "dm"
            varname = m[1]
            drain += 1
        elif re.search('^_(EX_.+)_reverse_[a-z0-9]+$', varname) != None:
            m = re.search('^_(EX_.+)_reverse_[a-z0-9]+$', varname)
            vartype = "ex"
            varname = m[1]
            flux = -1*float(array[1])
            exchange += 1
        elif re.search('^_(EX_.+)$', varname) != None:
            m = re.search('^_(EX_.+)$', varname)
            vartype = "ex"
            varname = m[1]
            exchange += 1
        elif re.search('^_(enzr\d+_c0)$', varname) != None:
            m = re.search('^_(enzr\d+_c0)$', varname)
            varname = m[1]
            enz += 1
        elif re.search('^(spontr\d+_c0)$', varname) != None:
            m = re.search('^(spontr\d+_c0)$', varname)
            varname = m[1]
            spont += 1
        elif re.search('^_(enzr\d+_c0_reverse_[a-z0-9]+)$', varname) != None:
            m = re.search('^_(enzr\d+_c0_reverse_[a-z0-9]+)$', varname)
            varname = m[1]
            flux = -1*float(array[1])
            enz += 1
        elif re.search('^(spontr\d+_c0)_reverse_[a-z0-9]+$', varname) != None:
            m = re.search('^(spontr\d+_c0)_reverse_[a-z0-9]+$', varname)
            varname = m[1]
            flux = -1*float(array[1])
            spont += 1
        elif re.search('^_*([A-Za-z0-9-]+_c0)$', varname) != None:
            m = re.search('^_*([A-Za-z0-9-]+_c0)$', varname)
            varname = m[1]
            modelflux += 1
        elif re.search('^_*([A-Za-z0-9-]+_c0)_reverse_[a-z0-9]+$', varname) != None:
            m = re.search('^_*([A-Za-z0-9-]+_c0)_reverse_[a-z0-9]+$', varname)
            varname = m[1]
            flux = -1*float(array[1])
            modelflux += 1
        if vartype == "rxn":
            if varname in model.reactions:
                reaction_fluxes[varname] = flux
            #else:
                #print(varname+" reaction not in model!")
        elif vartype == "ex":
            if varname in model.metabolites:
                exchange_fluxes[varname] = flux
            #else:
                #print(varname+" exchange not in model!")
        elif vartype == "dm":
            if varname in model.metabolites:
                drain_fluxes[varname] = flux
            #else:
                #print(varname+" drain not in model!")
                
#Removing gapfilled reactions that have no flux
metabolite_hash = {}
reaction_list = []
for reaction in model.reactions:
    if reaction.id in original_reactions or reaction.id in reaction_fluxes or reaction.id in drain_fluxes or reaction.id in exchange_fluxes:
        reaction_list.append(reaction)
        for metabolite in reaction.metabolites:
            metabolite_hash[metabolite.id] = metabolite
new_model = Model("Expanded MMSyn3 model")
new_model.genes = model.genes
new_model.add_metabolites(metabolite_hash.values())
new_model.add_reactions(reaction_list)

#Printing model in json format
cobra.io.save_json_model(new_model, "expanded_jcvi.json")
#Printing fluxes in TSV format
file = open("expanded_jcvi_fluxes.csv","w") 
file.write("reaction,flux\n")
for reaction in reaction_fluxes:
    flux = reaction_fluxes[reaction]
    if reaction in original_reactions:
        flux += 10
    elif re.search('spontr', reaction):
        flux += 100
    elif re.search('enzr', reaction):
        flux += 10000    
    elif re.search('rxn\d\d\d\d\d', reaction):
        flux += 1000
    file.write(reaction+","+str(flux)+"\n")
for reaction in exchange_fluxes:
    flux = exchange_fluxes[reaction]
    if reaction in original_reactions:
        flux += 10
    else:
        flux += 100
    file.write(reaction+","+str(flux)+"\n")
for reaction in drain_fluxes:
    flux = drain_fluxes[reaction]
    if reaction in original_reactions:
        flux += 10
    else:
        flux += 100
    file.write(reaction+","+str(flux)+"\n")
file.close()

print("modelflux:"+str(modelflux))
print("modelseedflux:"+str(modelseedflux))
print("exchange:"+str(exchange))
print("drain:"+str(drain))
print("enz:"+str(enz))
print("spont:"+str(spont))

modelflux:152
modelseedflux:208
exchange:166
drain:7
enz:84
spont:74


In [3]:
kbase_api = cobrakbase.KBaseAPI()

#Loading fluxes into a hash
flux_hash = {}
solution_data = ""
with open('/Users/chenry/Dropbox/workspace/KBase Project/MetabolomicsMethods/JCVI_fluxes.txt', 'r') as solution_file:
    solution_data = solution_file.read()
line_array = solution_data.split("\n")
count = 0
peak_solution_hash = {}
for line in line_array:
    array = line.split("\t")
    varname = array[0]
    varname = varname.split("#")[0] 
    if len(array) >= 2:
        flux = float(array[1])
        vartype = "rxn"
        if re.search('^(peak\.\d+)',varname) != None:
            m = re.search('^(peak\.\d+)', varname)
            vartype = "peak"
            varname = m[1]
        elif re.search('^rxn\d+_[a-z]\d+$', varname) != None: 
            vartype = "rxn"
        elif re.search('^(rxn\d+_[a-z]\d+)_reverse_[a-z0-9]+$', varname) != None:
            m = re.search('^(rxn\d+_[a-z]\d+)_reverse_[a-z0-9]+$', varname)
            vartype = "rxn"
            varname = m[1]
            flux = -1*float(array[1])
        elif re.search('^(DM_.+)$', varname) != None:
            m = re.search('^(DM_.+)$', varname)
            vartype = "dm"
            varname = m[1]
        elif re.search('^_(EX_.+)_reverse_[a-z0-9]+$', varname) != None:
            m = re.search('^_(EX_.+)_reverse_[a-z0-9]+$', varname)
            vartype = "ex"
            varname = m[1]
            flux = -1*float(array[1])
        elif re.search('^_(EX_.+)$', varname) != None:
            m = re.search('^_(EX_.+)$', varname)
            vartype = "ex"
            varname = m[1]
        elif re.search('^_(enzr\d+_c0)$', varname) != None:
            m = re.search('^_(enzr\d+_c0)$', varname)
            vartype = "rxn"
            varname = m[1]
        elif re.search('^(spontr\d+_c0)$', varname) != None:
            m = re.search('^(spontr\d+_c0)$', varname)
            vartype = "rxn"
            varname = m[1]
        elif re.search('^_(enzr\d+_c0_reverse_[a-z0-9]+)$', varname) != None:
            m = re.search('^_(enzr\d+_c0_reverse_[a-z0-9]+)$', varname)
            vartype = "rxn"
            varname = m[1]
            flux = -1*float(array[1])
        elif re.search('^(spontr\d+_c0)_reverse_[a-z0-9]+$', varname) != None:
            m = re.search('^(spontr\d+_c0)_reverse_[a-z0-9]+$', varname)
            vartype = "rxn"
            varname = m[1]
            flux = -1*float(array[1])
        elif re.search('^_*([A-Za-z0-9-]+_c0)$', varname) != None:
            m = re.search('^_*([A-Za-z0-9-]+_c0)$', varname)
            vartype = "rxn"
            varname = m[1]
        elif re.search('^_*([A-Za-z0-9-]+_c0)_reverse_[a-z0-9]+$', varname) != None:
            m = re.search('^_*([A-Za-z0-9-]+_c0)_reverse_[a-z0-9]+$', varname)
            vartype = "rxn"
            varname = m[1]
            flux = -1*float(array[1])
            modelflux += 1
        varname = varname.replace("__DASH__","-")
        if vartype == "rxn":
            count += 1
            corrected_flux = None
            if varname in flux_hash:
                print(varname+" repeated:"+str(flux_hash[varname])+";"+str(flux))
                if flux_hash[varname] == -1*flux:
                     corrected_flux = 0
            flux_hash[varname] = flux
            if corrected_flux != None:
                flux_hash[varname] = 0
        elif vartype == "peak":
            if varname in peak_solution_hash:
                print("Repeat peak:"+varname)
            peak_solution_hash[varname] = flux

print("Peaks:"+str(len(peak_solution_hash.keys())))
print("Reactions:"+str(count))
print("Distinct reaction:"+str(len(flux_hash.keys())))       
#Loading ModelSEED reactions and compounds
baseinchi_all = {}
baseinchi_modelseed_hash = {}
modelseed_rxn_hash = {}
modelseed_cpd_hash = {}
with open('/Users/chenry/code/fba_tools/data/Reactions.json') as json_file:
    input_data = json.load(json_file)
    for rxn in input_data:
        modelseed_rxn_hash[rxn["id"]+"_c0"] = rxn
with open('/Users/chenry/code/fba_tools/data/Compounds.json') as json_file:
    input_data = json.load(json_file)
    for cpd in input_data:
        modelseed_cpd_hash[cpd["id"]] = cpd
        cpd["reactions"] = 0
        cpd["peaks"] = {}
        cpd["baseinchi"] = ""
        if "inchikey" in cpd:
            cpd["baseinchi"] = cpd["inchikey"].split("-")[0]
    #Adding reactions counts to ModelSEED compounds
    for rxnid in modelseed_rxn_hash:
        if "compound_ids" in modelseed_rxn_hash[rxnid]:
            for cpd in modelseed_rxn_hash[rxnid]["compound_ids"]:
                modelseed_cpd_hash[cpd]["reactions"] += 1 
    #Loading ModelSEED compounds into base inchi hash - if collisions occur, keep version with more reactions
    #modelseed_cpd_hash["cpd00027"]["reactions"] = 10000000000
    for cpd in input_data:
        if len(cpd["baseinchi"]) > 0:
            if cpd["baseinchi"] not in baseinchi_all:
                baseinchi_all[cpd["baseinchi"]] = []
            baseinchi_all[cpd["baseinchi"]].append(cpd)
            if cpd["baseinchi"] in baseinchi_modelseed_hash:
                if cpd["reactions"] > baseinchi_modelseed_hash[cpd["baseinchi"]]["reactions"]:
                    baseinchi_modelseed_hash[cpd["baseinchi"]] = cpd
            else:
                baseinchi_modelseed_hash[cpd["baseinchi"]] = cpd

baseinchi_modelseed_hash["WQZGKKKJIJFFOK"] = modelseed_cpd_hash["cpd00027"]
#Printing supplementary data
model_compounds = []
model_reactions = []
compound_hash = {}
reaction_hash = {}
basemodel = kbase_api.get_object("MMSyn3",29280)
basemdl_cpd_hash = {}
mdl_rxn_hash = {}
mdl_cpd_hash = {}
baseinchi_hash = {}
for cpd in basemodel["modelcompounds"]:
    data = {
        "id":cpd["id"],
        "name":cpd["name"],
        "formula":cpd["formula"],
        "charge":cpd["charge"],
        "smiles":cpd["smiles"],
        "inchikey":cpd["inchikey"],
        "baseinchi":cpd["inchikey"].split("-")[0],
        "modelseed":"",
        "peaks":{},
        "flux":0,
        "generation":0,
        "reactions":0,
        "exp_rxns":0
    }
    if re.search("cpd\d+",cpd["id"]):
        data["modelseed"] = cpd["id"].split("_")[0]
    if cpd["id"] not in compound_hash:
        model_compounds.append(cpd["id"])
        compound_hash[cpd["id"]] = data
        mdl_cpd_hash[cpd["id"]] = data
        basemdl_cpd_hash[cpd["id"]] = data
for rxn in basemodel["modelreactions"]:
    original_id = rxn["string_attributes"]["original_id"]
    original_id = original_id.replace("R_","")
    data = {
        "id":rxn["id"],
        "modelseed":"",
        "name":rxn["name"],
        "operators":[],
        "equation":"",
        "definition":"",
        "generation":0,
        "flux":0,
        "genes":"",
        "original_id":original_id,
        "compounds":{}
    }
    model_reactions.append(rxn["id"])
    reaction_hash[rxn["id"]] = data
    mdl_rxn_hash[rxn["id"]] = data
    if re.search("rxn\d+",rxn["id"]):
        data["modelseed"] = rxn["id"].split("_")[0]
    if rxn["id"] in flux_hash:
        data["flux"] = flux_hash[rxn["id"]]
    if "imported_gpr" in rxn:
        data["genes"] = rxn["imported_gpr"]
    for rgt in rxn["modelReactionReagents"]:
        cpd_id = rgt["modelcompound_ref"].split("/")[-1]
        compound_hash[cpd_id]["reactions"] += 1
        compound_hash[cpd_id]["flux"] += abs(data["flux"])
        data["compounds"][cpd_id] = rgt["coefficient"]
for cpdid in compound_hash:
    cpd = compound_hash[cpdid]
    if len(cpd["baseinchi"]) > 0:
        if cpd["baseinchi"] not in baseinchi_all:
            baseinchi_all[cpd["baseinchi"]] = []
        baseinchi_all[cpd["baseinchi"]].append(cpd)
        if re.search("_e0",cpdid) == None:
            if cpd["baseinchi"] not in baseinchi_hash:
                baseinchi_hash[cpd["baseinchi"]] = cpd
            elif cpd["reactions"] > baseinchi_hash[cpd["baseinchi"]]["reactions"]:
                baseinchi_hash[cpd["baseinchi"]] = cpd
for biocpd in basemodel["biomasses"][0]["biomasscompounds"]:
    biocpdid = biocpd["modelcompound_ref"].split("/").pop()
    if biocpdid in compound_hash:
        compound_hash[biocpdid]["flux"] += flux_hash["bio1_biomass"]
    else:
        print("Not found biomass cpd:"+biocpdid)
    
#Loading the expansion and adding compounds to the base inchi hash
expansion_cpd_hash = {}
expansion_rxn_hash = {}
expansion_compounds = []
expansion_reactions = []
expmodel = kbase_api.get_object("MMSyn3Expansion",29280)
substitutions = {}
for cpd in expmodel["modelcompounds"]:
    data = {
        "id":cpd["id"],
        "name":cpd["name"],
        "formula":cpd["formula"],
        "charge":cpd["charge"],
        "smiles":cpd["smiles"],
        "inchikey":cpd["inchikey"],
        "baseinchi":cpd["inchikey"].split("-")[0],
        "modelseed":"",
        "peaks":{},
        "generation":0,
        "flux":0,
        "exp_rxns":0,
        "reactions":0
    }
    if "numerical_attributes" in cpd and "generation" in cpd["numerical_attributes"]:
        data["generation"] = cpd["numerical_attributes"]["generation"]
    #Checking if this is a match for an existing compound
    if cpd["id"] in basemdl_cpd_hash:
        test = 1
        #print(cpd["id"])
    elif len(data["baseinchi"]) > 0:
        if data["baseinchi"] in baseinchi_hash:
            data = baseinchi_hash[data["baseinchi"]]
            substitutions[cpd["id"]] = data["id"]
            print("Previous cpd match:"+cpd["id"]+";"+substitutions[cpd["id"]])
        elif data["baseinchi"] in baseinchi_modelseed_hash:
            #This is a new compound, but it is a match for the modelseed
            data["id"] = baseinchi_modelseed_hash[data["baseinchi"]]["id"]+"_c0"
            data["modelseed"] = baseinchi_modelseed_hash[data["baseinchi"]]["id"]
            substitutions[cpd["id"]] = data["id"]
            #print("ModelSEED cpd match:"+cpd["id"]+";"+substitutions[cpd["id"]])
        if data["baseinchi"] not in baseinchi_hash:
            baseinchi_hash[data["baseinchi"]] = data
        if data["baseinchi"] not in baseinchi_all:
            baseinchi_all[data["baseinchi"]] = []
        if data not in baseinchi_all[data["baseinchi"]]:
            baseinchi_all[data["baseinchi"]].append(data)
    #Adding compound expanded compound hash
    if data["id"] not in expansion_cpd_hash:
        expansion_cpd_hash[data["id"]] = data
        expansion_compounds.append(data["id"])
    #Adding compound to all compound hash
    if data["id"] not in compound_hash:
        compound_hash[data["id"]] = data
for rxn in expmodel["modelreactions"]:
    gen = 0
    if "generation" in rxn["numerical_attributes"]:
        gen = rxn["numerical_attributes"]["generation"]
    data = {
        "id":rxn["id"],
        "name":rxn["name"],
        "operators":[],
        "equation":"",
        "definition":"",
        "generation":gen,
        "flux":0,
        "compounds":{},
        "genes":""
    }
    reaction_hash[rxn["id"]] = data
    expansion_rxn_hash[rxn["id"]] = data
    expansion_reactions.append(data["id"])
    if "dblinks" in rxn and "PickAxe" in rxn["dblinks"]:
        for op in rxn["dblinks"]["PickAxe"]:
            op = op.replace("enzymatic.","")
            op = op.replace("spontaneous.","")
            data["operators"].append(op)
    if rxn["id"] in flux_hash:
        data["flux"] = flux_hash[rxn["id"]]
        model_reactions.append(rxn["id"])
        mdl_rxn_hash[rxn["id"]] = data
    for rgt in rxn["modelReactionReagents"]:
        cpd_id = rgt["modelcompound_ref"].split("/")[-1]
        if cpd_id in substitutions:
            cpd_id = substitutions[cpd_id]
        compound_hash[cpd_id]["exp_rxns"] += 1
        data["compounds"][cpd_id] = rgt["coefficient"]
        if rxn["id"] in flux_hash:
            compound_hash[cpd_id]["flux"] += abs(data["flux"])
            if cpd_id not in mdl_cpd_hash:
                mdl_cpd_hash[cpd_id] = compound_hash[cpd_id]
                model_compounds.append(cpd_id)
        
#Adding ModelSEED reactions and compounds with flux
model_rxn = 0
ms_rxn = 0
for rxn in flux_hash:
    if abs(flux_hash[rxn]) > 0:
        if rxn in mdl_rxn_hash:
            model_rxn += 1
        elif rxn not in mdl_rxn_hash and rxn in modelseed_rxn_hash:
            ms_rxn += 1
            data = {
                "id":rxn,
                "name":modelseed_rxn_hash[rxn]["name"],
                "operators":[],
                "equation":"",
                "definition":"",
                "generation":0,
                "flux":flux_hash[rxn],
                "genes":"",
                "compounds":{},
                "modelseed":rxn,
                "kbase_id":rxn
            }
            reaction_hash[rxn] = data
            model_reactions.append(rxn)
            mdl_rxn_hash[rxn] = data
            for item in modelseed_rxn_hash[rxn]["stoichiometry"]:
                array = item.split(":")
                cpd = array[1]
                suffix = "_c0"
                if array[2] == "1":
                    suffix = "_e0"
                cpdid = cpd+suffix
                if cpdid in substitutions:
                    cpdid = substitutions[cpdid]
                data["compounds"][cpdid] = float(array[0])
                if cpdid not in compound_hash:
                    cpddata = {
                        "id":cpdid,
                        "name":modelseed_cpd_hash[cpd]["name"]+suffix,
                        "formula":modelseed_cpd_hash[cpd]["formula"],
                        "charge":modelseed_cpd_hash[cpd]["charge"],
                        "inchikey":"",
                        "baseinchi":"",
                        "modelseed":cpd,
                        "peaks":{},
                        "flux":0,
                        "reactions":0
                    }
                    if "inchikey" in modelseed_cpd_hash[cpd]:
                        cpddata["inchikey"] = modelseed_cpd_hash[cpd]["inchikey"]
                        cpddata["baseinchi"] = modelseed_cpd_hash[cpd]["inchikey"].split("-")[0]
                    compound_hash[cpdid] = cpddata
                compound_hash[cpdid]["reactions"] += 1
                compound_hash[cpdid]["flux"] += abs(flux_hash[rxn])
                if cpdid not in mdl_cpd_hash:
                    mdl_cpd_hash[cpdid] = compound_hash[cpdid]
                    model_compounds.append(cpdid)
        #else:
            #print("No match:"+rxn)
            
#Loading metabolomics, matching to the ModelSEED, matching to the model, matching to the expansion, and printing
peaks = []
metabolomics = kbase_api.get_from_ws("JCVI_Syn3_metabolomics",29280)
fluxcount = 0
peaks_found = {}
peak_active = 0
for peak in metabolomics.peaks:
    data = {
        "id":peak.id,
        "name":peak.attributes["name"],
        "rt":peak.attributes["retention_time"],
        "mz":peak.attributes["aggregate_mz"],
        "polarity":peak.attributes["polarity"],
        "formula":peak.attributes["formula"],
        "smiles":peak.attributes["smiles"],
        "inchikey":peak.attributes["inchikey"],
        "modelseed_compounds":{},
        "model_compounds":{},
        "expansion_compounds":{},
        "flux":0,
        "solution":0
    }
    peaks_found[peak.id] = 1
    if peak.id in peak_solution_hash:
        data["solution"] = 1
        peak_active += 1
    peaks.append(data)
    if len(data["inchikey"]) > 0:
        baseinchi = data["inchikey"].split("-")[0]
        if baseinchi in baseinchi_all:
            for cpd in baseinchi_all[baseinchi]:
                cpdid = cpd["id"]
                cpdid = cpdid.replace("_c0","")
                cpdid = cpdid.replace("_e0","")
                if cpdid+"_c0" in compound_hash:
                    cpd = compound_hash[cpdid+"_c0"]
                cpd["peaks"][data["id"]] = 1
                if "flux" in cpd:
                    data["flux"] += abs(cpd["flux"])
                if cpdid in modelseed_cpd_hash:
                    data["modelseed_compounds"][cpdid] = 1
                if cpdid+"_c0" in basemdl_cpd_hash:
                    data["model_compounds"][cpdid] = 1
                if cpdid+"_c0" in expansion_cpd_hash:
                    data["expansion_compounds"][cpdid] = 1
    if data["flux"] > 0:
        fluxcount += 1
print("Flux count:"+str(fluxcount))
print("Active peak count:"+str(peak_active))

for peak in peak_solution_hash:
    if peak not in peaks_found:
        print("Peak not found:"+peak)

#Building reaction equations
for rxn in reaction_hash:
    data = reaction_hash[rxn]
    new_hash = {}
    products = ""
    def_products = ""
    for cpdid in data["compounds"]:
        cpddata = compound_hash[cpdid]
        if data["compounds"][cpdid] < 0:
            if len(data["equation"]) > 0:
                data["equation"] += " + "
                data["definition"] += " + "
            if data["compounds"][cpdid] != -1:
                data["equation"] += "("+str(-1*data["compounds"][cpdid])+") "
                data["definition"] += "("+str(-1*data["compounds"][cpdid])+") "    
            data["equation"] += cpddata["id"]
            data["definition"] += cpddata["name"]
        elif data["compounds"][cpdid] > 0:
            if len(products) > 0:
                products += " + "
                def_products += " + "
            if data["compounds"][cpdid] != 1:
                products += "("+str(data["compounds"][cpdid])+") "
                def_products += "("+str(data["compounds"][cpdid])+") "
            products += cpddata["id"]
            def_products += cpddata["name"]
    data["equation"] += " => "+products
    data["definition"] += " => "+def_products                   
  
#Loading COBRA model and swapping out missed compound matches
model= cobra.io.load_json_model("JCVISyn3Exp-cobrapy_model.json")
met_rep = {}
remove_list = []
for metabolite in substitutions:
    if metabolite in model.metabolites:
        met = model.metabolites.get_by_id(metabolite)
        if substitutions[metabolite] in model.metabolites:
            rep = model.metabolites.get_by_id(substitutions[metabolite])
            met_rep[met] = rep
            remove_list.append(met)
for reaction in model.reactions:
    for met in met_rep:
        if met in reaction.metabolites:
            reaction.add_metabolites({met_rep[met]:reaction.metabolites[met]},combine=True)    
model.remove_metabolites(remove_list,False)

#Printing final data
file = open("JCVISyn3Exp_model_reactions.tsv","w") 
file.write("Reaction ID\tName\tOperators\tEquation\tDefinition\tGenes\tFlux\n")
columns = ["name","operators","equation","definition","genes","flux"]
for rxnid in model_reactions:
    item = reaction_hash[rxnid]
    item["operators"] = ", ".join(item["operators"])
    line = item["id"]
    item["flux"] = str(item["flux"])
    for column in columns:
            line += "\t"
            if column in item:
                line += item[column]
    file.write(line+"\n")
file.close()

file = open("JCVISyn3Exp_model_compounds.tsv","w") 
file.write("Compound ID\tName\tFormula\tCharge\tSmiles\tInchikey\tModelSEED\tPeaks\tFlux\n")
columns = ["name","formula","charge","smiles","inchikey","modelseed","peaks","flux"]
for cpdid in model_compounds:
    item = compound_hash[cpdid]
    line = item["id"]  
    item["peaks"] = ", ".join(item["peaks"].keys())
    item["flux"] = str(item["flux"])
    item["charge"] = str(item["charge"])
    for column in columns:
            line += "\t"
            if column in item:
                line += item[column]
    file.write(line+"\n")
file.close()
    
file = open("JCVISyn3_full_expansion_reactions.tsv","w") 
file.write("Reaction ID\tName\tOperators\tEquation\tDefinition\tGeneration\n")
columns = ["name","operators","equation","definition","generation"]
for rxnid in expansion_reactions:
    item = reaction_hash[rxnid]
    item["operators"] = ", ".join(item["operators"])
    line = item["id"]
    item["generation"] = str(item["generation"])
    for column in columns:
            line += "\t"
            if column in item:
                line += item[column]
    file.write(line+"\n")
file.close()
    
file = open("JCVISyn3_full_expansion_compounds.tsv","w") 
file.write("Compound ID\tName\tFormula\tCharge\tSmiles\tInchikey\tModelSEED\tPeaks\tGeneration\n")
columns = ["name","formula","charge","smiles","inchikey","modelseed","peaks","generation"]
for cpdid in expansion_compounds:
    item = compound_hash[cpdid]
    line = item["id"]
    if not isinstance(item["charge"], str):
        item["charge"] = str(item["charge"])
    if not isinstance(item["generation"], str):
        item["generation"] = str(item["generation"])
    if not isinstance(item["peaks"], str):
        item["peaks"] = ", ".join(item["peaks"].keys())
    for column in columns:
            line += "\t"
            if column in item:
                line += item[column]
    file.write(line+"\n")
file.close()
    
file = open("JCVISyn3_peaks.tsv","w") 
file.write("Peak ID\tName\tRT\tM/Z\tPolarity\tFormula\tSmiles\tInchikey\tModelSEED compounds\tModel compounds\tExpansion compounds\tFlux\tSolution\n")
columns = ["name","rt","mz","polarity","formula","smiles","inchikey","modelseed_compounds","model_compounds","expansion_compounds","flux","solution"]
for item in peaks:
    line = item["id"]
    item["modelseed_compounds"] = ", ".join(item["modelseed_compounds"].keys())
    item["model_compounds"] = ", ".join(item["model_compounds"].keys())
    item["expansion_compounds"] = ", ".join(item["expansion_compounds"].keys())
    item["flux"] = str(item["flux"])
    item["solution"] = str(item["solution"])
    for column in columns:
            line += "\t"
            if column in item:
                line += item[column]
    file.write(line+"\n")
file.close()
    
#Print SBML
modelseed = cobrakbase.modelseed.from_local('/Users/chenry/code/ModelSEEDDatabase')
cobrakbase.annotate_model_with_modelseed(model, modelseed)
filename = "JCVISyn3Exp_model.xml"
cobra.io.write_sbml_model(model,filename)
filename = "JCVISyn3Exp_model.json"
cobra.io.save_json_model(model,filename)

rxn01548_c0 repeated:0.0005;-0.0005
rxn01859_c0 repeated:6.9e-05;-0.000406
rxn01446_c0 repeated:0.0005;-0.0005
rxn01513_c0 repeated:0.0005;-0.000337
rxn01127_c0 repeated:0.000406;-0.0005
rxn01322_c0 repeated:0.0005;-0.0005
rxn01374_c0 repeated:0.0005;-0.0005
rxn12846_c0 repeated:0.0005;-0.0005
rxn01729_c0 repeated:0.0005;-0.0005
rxn08471_c0 repeated:0.0005;-0.0005
rxn00514_c0 repeated:0.0005;-0.0005
rxn09265_c0 repeated:0.0005;-0.0005
rxn00758_c0 repeated:0.0005;-0.0005
rxn12847_c0 repeated:0.0005;-0.0005
rxn04068_c0 repeated:0.0005;-0.0005
rxn00879_c0 repeated:0.0005;-0.0005
rxn02275_c0 repeated:0.0005;-0.0005
rxn01531_c0 repeated:0.0005;-0.0005
rxn03484_c0 repeated:0.0005;-0.0005
rxn02414_c0 repeated:0.0005;-0.0005
rxn02028_c0 repeated:0.00025;-0.00075
rxn00751_c0 repeated:0.0005;-0.0005
rxn00941_c0 repeated:0.000163;-0.000174
rxn06077_c0 repeated:0.0005;-0.0005
rxn00469_c0 repeated:0.00025;-0.00025
rxn00160_c0 repeated:0.00025;-0.00025
rxn03339_c0 repeated:0.0005;-0.0005
rxn01537_c0

Flux count:182
Active peak count:236


In [51]:

        
   


if len(data["inchikey"]) > 0:
        if cpd["id"] not in mdl_id_hash:
            #First check if this compound matches an existing compound
            if data["baseinchi"] in baseinchi_hash:
                replacements[cpd["id"]] =  baseinchi_hash[data["baseinchi"]]["id"]
            else:
                baseinchi_hash[data["baseinchi"]] = data
                baseinchi_expansion_hash[data["baseinchi"]] = data
                expansion_compounds.append(data)
                compound_hash[cpd["id"]] = data
                #Checking if this compound matches the ModelSEED
                if data["baseinchi"] in baseinchi_modelseed_hash:
                    data["modelseed"] = baseinchi_modelseed_hash[data["baseinchi"]["id"]
                    if baseinchi_modelseed_hash[data["baseinchi"]["id"] != cpd["id"]:
                        print("ModelSEED ID mismatch:"+baseinchi_modelseed_hash[data["baseinchi"]["id"]+";"+cpd["id"])
        else:
            baseinchi_expansion_hash[data["baseinchi"]] = compound_hash[cpd["id"]]
            expansion_compounds.append(compound_hash[cpd["id"]])

SyntaxError: invalid syntax (<ipython-input-51-b7ba028d2426>, line 15)