In [1]:
import platform
print("python version " + platform.python_version())
import sys
import json
import cobra
import cplex
import re
import os
from os.path import exists
import logging
from configparser import ConfigParser
config = ConfigParser()
config.read("config.cfg")
paths = config.get("script", "syspaths").split(";")
for path in paths:
    sys.path.append(path)
import cobrakbase
from escher import Builder
from optlang.symbolics import Zero, add
from modelseedpy import MSPackageManager, MSGapfill, FBAHelper, MSGrowthPhenotypes, MSModelUtil, MSATPCorrection
from cobrakbase.core.kbasefba.newmodeltemplate_builder import NewModelTemplateBuilder
from annotation_ontology_api.annotation_ontology_apiServiceClient import annotation_ontology_api
from modelseedpy.helpers import get_template
from sklearn.metrics import r2_score
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
from IPython.display import HTML
print("Required modules loaded")

python version 3.7.6
cobrakbase 0.2.8
Required modules loaded


In [3]:
#This code saves the Biolog simulation results from each model into a dataframe
from cobra.flux_analysis import flux_variability_analysis
#kbase_api = cobrakbase.KBaseAPI()
#Reading genome list
lines = []
genomes = []
with open('FullGenomeList.txt') as f:
    lines = f.readlines()
for line in lines:
    line = line.rstrip("\n")
    genomes.append(line.split("\t"))
data = {"GenomeID":[],"ModelType":[]}
mdltypes = ["DRAM"]#,"DRAM","DRAM.RAST"
biologs = kbase_api.list_objects(101536, object_type="KBasePhenotypes.PhenotypeSet", include_metadata=True)
genome_biologs = {}
for item in biologs:
    if "GenomeID" in item[10]:
        kbpheno = kbase_api.get_object(item[1], 101536)
        genome_biologs[item[10]["GenomeID"]] = MSGrowthPhenotypes.from_kbase_object(kbpheno,kbase_api)
print("Biologs loaded!")
data = {"GenomeID/Phenotype":[],"ModelType":[],"Accuracy":[],"TP":[],"TN":[],"FP":[],"FN":[]}
phenotype_data = {}
#with open('Phenotypes.json', 'r') as f:
#    phenotype_data = json.load(f)
count = 0
for mdltype in mdltypes:
    print(mdltype)
    for genome in genomes:
        genomeid = genome[0]
        print(genomeid,count)
        count += 1
        pheno = genome_biologs[genomeid]
        if exists("GFModels/RAST/"+genomeid+".json") and exists("GFModels/DRAM/"+genomeid+".json") and exists("GFModels/DRAM.RAST/"+genomeid+".json"):
            modelws = 101658
            if genome[2] == "Refseq":
                modelws = 101656
            gfmodel = cobra.io.load_json_model("GFModels/"+mdltype+"/"+genomeid+".json")
            output = pheno.simulate_phenotypes(gfmodel,"bio1",True)
            data["GenomeID/Phenotype"].append(genomeid)
            data["ModelType"].append(mdltype)
            total = output["summary"]["Count"][1] + output["summary"]["Count"][2] + output["summary"]["Count"][3] + output["summary"]["Count"][4]
            data["Accuracy"].append((output["summary"]["Count"][1]+output["summary"]["Count"][2])/total)
            data["TP"].append(output["summary"]["Count"][1]/total)
            data["TN"].append(output["summary"]["Count"][2]/total)
            data["FP"].append(output["summary"]["Count"][3]/total)
            data["FN"].append(output["summary"]["Count"][4]/total)
            for index, row in output["details"].iterrows():
                if row["Phenotype"] not in phenotype_data:
                    phenotype_data[row["Phenotype"]] = {}
                if mdltype not in phenotype_data[row["Phenotype"]]:
                    phenotype_data[row["Phenotype"]][mdltype] = {"Accuracy":0,"TP":0,"TN":0,"FP":0,"FN":0,"Count":0}
                phenotype_data[row["Phenotype"]][mdltype]["Count"] += 1
                if row["Class"] == "CP":
                    phenotype_data[row["Phenotype"]][mdltype]["TP"] += 1
                    phenotype_data[row["Phenotype"]][mdltype]["Accuracy"] += 1
                if row["Class"] == "CN":
                    phenotype_data[row["Phenotype"]][mdltype]["TN"] += 1
                    phenotype_data[row["Phenotype"]][mdltype]["Accuracy"] += 1
                if row["Class"] == "FP":
                    phenotype_data[row["Phenotype"]][mdltype]["FP"] += 1
                if row["Class"] == "FN":
                    phenotype_data[row["Phenotype"]][mdltype]["FN"] += 1
            with open('Phenotypes-DRAM.json', 'w') as f:
                json.dump(phenotype_data, f, indent=4)
            df = pd.DataFrame(data)
            df.to_csv("biolog_simulations_DRAM.csv")
for pheno in phenotype_data:
    for mdltype in phenotype_data[pheno]:
        data["GenomeID/Phenotype"].append(pheno)
        data["ModelType"].append(mdltype)
        data["Accuracy"].append(phenotype_data[pheno][mdltype]["Accuracy"]/phenotype_data[pheno][mdltype]["Count"])
        data["TP"].append(phenotype_data[pheno][mdltype]["TP"]/phenotype_data[pheno][mdltype]["Count"])
        data["TN"].append(phenotype_data[pheno][mdltype]["TN"]/phenotype_data[pheno][mdltype]["Count"])
        data["FP"].append(phenotype_data[pheno][mdltype]["FP"]/phenotype_data[pheno][mdltype]["Count"])
        data["FN"].append(phenotype_data[pheno][mdltype]["FN"]/phenotype_data[pheno][mdltype]["Count"])
df = pd.DataFrame(data)
df.to_csv("biolog_simulations_DRAM.csv")
HTML(df.to_html(render_links=True, escape=False))

Biologs loaded!
DRAM
1341683.3 0
1341683.5 1
1217677.3 2
487316.5 3
202950.11 4
62977.6 5
1341679.5 6
1217667.3 7
1217693.3 8
1120925.3 9
1120925.4 10
1217655.3 11
981324.3 12
1120929.6 13
1217715.3 14
1120926.3 15
1217692.3 16
1217653.3 17
1148157.7 18
520708.3 19
1217672.3 20
520709.3 21
106654.22 22
1217628.3 23
1217635.3 24
1217643.3 25
470.9146 26
575584.19 27
575584.33 28
470.9249 29
1120928.3 30
202955.4 31
1217658.3 32
52133.21 33
52133.21 34
1197884.5 35
52133.20 36
1191460.12 37
52133.18 38
225937.3 39
208964.12 40
303.176 41
303.176 42
321662.16 43
1148509.10 44
1148509.5 45
1148509.9 46
702115.9 47
702115.10 48
321846.3 49
286.2272 50
294.204 51
294.160 52
2054929.3 53
294.203 54
294.161 55
294.165 56
286.2315 57
286.2260 58
286.2262 59
379731.5 60
644801.3 61
323850.11 62
326297.10 63
325240.15 64
402882.13 65
693973.6 66
407976.7 67
211586.12 68
94122.6 69
243277.254 70
345073.21 71
754260.3 72
416269.6 73
754259.3 74
754262.3 75
149539.1451 76
90371.3209 77
99287.12 78
1

Unnamed: 0,GenomeID/Phenotype,ModelType,Accuracy,TP,TN,FP,FN
0,1341683.3,DRAM,0.828125,0.046875,0.78125,0.15625,0.015625
1,1341683.5,DRAM,0.828125,0.046875,0.78125,0.15625,0.015625
2,1217677.3,DRAM,0.78125,0.25,0.53125,0.046875,0.171875
3,487316.5,DRAM,0.609375,0.234375,0.375,0.03125,0.359375
4,202950.11,DRAM,0.828125,0.21875,0.609375,0.078125,0.09375
5,62977.6,DRAM,0.734375,0.015625,0.71875,0.265625,0.0
6,1341679.5,DRAM,0.8125,0.046875,0.765625,0.109375,0.078125
7,1217667.3,DRAM,0.828125,0.046875,0.78125,0.140625,0.03125
8,1217693.3,DRAM,0.8125,0.046875,0.765625,0.140625,0.046875
9,1120925.3,DRAM,0.765625,0.03125,0.734375,0.140625,0.09375


In [2]:
import json
kbase_api = cobrakbase.KBaseAPI()
genome_hash = {}
biologs = kbase_api.list_objects(101536, object_type="KBasePhenotypes.PhenotypeSet", include_metadata=True)
for item in biologs:
    if "GenomeID" in item[10]:
        genome_hash[item[10]["GenomeID"]] = {}
refgenomes = kbase_api.list_objects(89638, object_type="KBaseGenomes.Genome", include_metadata=True)
for genome in refgenomes:
    if genome[1] in genome_hash:
        genome_hash[genome[1]]["info"] = genome[1]
        genome_hash[genome[1]]["source"] = "Refseq"
        genome_hash[genome[1]]["genes"] = genome[10]["Number of Protein Encoding Genes"]
        genome_hash[genome[1]]["taxonomy"] = genome[10]["Taxonomy"]
        genome_hash[genome[1]]["species"] = genome[10]["Name"]
        genome_hash[genome[1]]["domain"] = genome[10]["Domain"]
ccsrgenomes = kbase_api.list_objects(89639, object_type="KBaseGenomes.Genome", include_metadata=True)
for genome in ccsrgenomes:
    if genome[1] in genome_hash:
        genome_hash[genome[1]]["info"] = genome[1]
        genome_hash[genome[1]]["source"] = "CCESR"
        genome_hash[genome[1]]["genes"] = genome[10]["Number of Protein Encoding Genes"]
        genome_hash[genome[1]]["taxonomy"] = genome[10]["Taxonomy"]
        genome_hash[genome[1]]["species"] = genome[10]["Name"]
        genome_hash[genome[1]]["domain"] = genome[10]["Domain"]
with open('OrderedGenomes.tsv') as f:
    lines = f.readlines() 
for i in range(0,len(lines)):
    line = lines[i].rstrip("\n")
    array = line.split("\t")
    if array[0] in genome_hash:
        genome_hash[array[0]]["found"] = 1
        data = genome_hash[array[0]]
        print(array[0],array[1],data["source"],data["genes"],data["domain"],data["species"],data["taxonomy"], sep = '\t')
for g in genome_hash.keys():
    if "found" not in genome_hash[g]:
        data = genome_hash[g]
        print(g,"",data["source"],data["genes"],data["domain"],data["species"],data["taxonomy"], sep = '\t')

1341683.3	Acinetobacter brisouii	Refseq	2916	Bacteria	Acinetobacter brisouii CIP 110357	cellular organisms:Bacteria:Proteobacteria:Gammaproteobacteria:Pseudomonadales:Moraxellaceae:Acinetobacter:Acinetobacter brisouii:Acinetobacter brisouii CIP 110357
1341683.5	Acinetobacter brisouii	Refseq	2957	Bacteria	Acinetobacter brisouii CIP 110357 strain DSM 18516	cellular organisms:Bacteria:Proteobacteria:Gammaproteobacteria:Pseudomonadales:Moraxellaceae:Acinetobacter:Acinetobacter brisouii:Acinetobacter brisouii CIP 110357
1217677.3	Acinetobacter soli	Refseq	3110	Bacteria	Acinetobacter soli NIPH 2899	cellular organisms:Bacteria:Proteobacteria:Gammaproteobacteria:Pseudomonadales:Moraxellaceae:Acinetobacter:Acinetobacter soli:Acinetobacter soli NIPH 2899
487316.5	Acinetobacter soli LUH 14692	Refseq	3115	Bacteria	Acinetobacter soli strain KCTC 22184	cellular organisms:Bacteria:Proteobacteria:Gammaproteobacteria:Pseudomonadales:Moraxellaceae:Acinetobacter:Acinetobacter soli
202950.11	Acinetobacter

407148.6	Campylobacter jejuni 81116	Refseq	1599	Bacteria	Campylobacter jejuni subsp. jejuni 81116	cellular organisms:Bacteria:Proteobacteria:delta/epsilon subdivisions:Epsilonproteobacteria:Campylobacterales:Campylobacteraceae:Campylobacter:Campylobacter jejuni:Campylobacter jejuni subsp. jejuni:Campylobacter jejuni subsp. jejuni 81116
1365661.3	Campylobacter jejuni K1 	Refseq	1661	Bacteria	Campylobacter jejuni K1	cellular organisms:Bacteria:Proteobacteria:delta/epsilon subdivisions:Epsilonproteobacteria:Campylobacterales:Campylobacteraceae:Campylobacter:Campylobacter jejuni:Campylobacter jejuni K1
85963.7	Helicobacter pylori J99	Refseq	1587	Bacteria	Helicobacter pylori J99	cellular organisms:Bacteria:Proteobacteria:delta/epsilon subdivisions:Epsilonproteobacteria:Campylobacterales:Helicobacteraceae:Helicobacter:Helicobacter pylori:Helicobacter pylori J99
1321939.6	Helicobacter pylori UM037	Refseq	1695	Bacteria	Helicobacter pylori UM037	cellular organisms:Bacteria:Proteobacteria:delta/

In [34]:
kbase_api = cobrakbase.KBaseAPI()
anno_api = annotation_ontology_api()
gmm = kbase_api.get_from_ws("Carbon-D-Glucose","KBaseMedia")
#Reading genome list
lines = []
with open('GenomeList2.txt') as f:
    lines = f.readlines() 
for i in range(0,len(lines)):
    line = lines[i]. rstrip("\n")
    array = line.split("\t")
    genomeid = array[0]
    genomews = int(array[1])
    modelws = int(array[2])
    mdlid = genomeid+".RAST.mdl"
    rastid = genomeid+".fbamodel"
    if not exists("GFModels/RAST/"+genomeid+".json"):
        model = kbase_api.get_from_ws(mdlid,modelws)
        orig_model = kbase_api.get_from_ws(rastid,modelws)  
        #Computing reaction scores
        reaction_genes = {}
        output = anno_api.get_annotation_ontology_events({
            "input_ref" : str(genomews)+"/"+genomeid,
        })
        events = output["events"]
        for event in events:
            for gene in event["ontology_terms"]:
                for term in event["ontology_terms"][gene]:
                    if "modelseed_ids" in term:
                        for rxn in term["modelseed_ids"]:
                            newrxn = re.sub("^MSRXN:","",rxn)
                            if newrxn not in reaction_genes:
                                reaction_genes[newrxn] = {}
                            if gene not in reaction_genes[newrxn]:
                                reaction_genes[newrxn][gene] = 0            
                            reaction_genes[newrxn][gene] += 1
        #Computing reactions to add
        add_reactions = []
        for rxn in orig_model.reactions:
            if rxn.id in model.reactions:
                #Matching bounds for overlapping reactions
                other_rxn = model.reactions.get_by_id(rxn.id)
                other_rxn.upper_bound = rxn.upper_bound
                other_rxn.lower_bound = rxn.lower_bound
            else:
                coreid = rxn.id.split("_")[0]
                if coreid in reaction_genes:
                    bestgene = None
                    for gene in reaction_genes[coreid]:
                        if bestgene == None or reaction_genes[coreid][gene] > reaction_genes[coreid][bestgene]:
                            bestgene = gene
                    rxn.gene_reaction_rule = bestgene
                add_reactions.append(rxn)
        model.add_reactions(add_reactions)
        pkgmgr = MSPackageManager.get_pkg_mgr(model)
        pkgmgr.getpkg("KBaseMediaPkg").build_package(gmm)
        solution = model.optimize()
        if solution.objective_value > 0.01:
            print(genomeid+"\t"+str(solution.objective_value))
            cobra.io.save_json_model(model,"GFModels/RAST/"+genomeid+".json")
        else:
            print(genomeid+" failed validation and will not be saved!")

384.349	0.4025036208370415
242606.49	0.6101142502762357
1822464.28	0.761631469671809
242605.50 failed validation and will not be saved!
237.94	0.6599233455868145
110932.16	0.6116441175494638
242605.55	0.46686131609583764
381.96	0.7519033532028354
242606.50	0.5958188585379929
1883.1831	0.6329534263749929


IndexError: list index out of range

In [37]:
kbase_api = cobrakbase.KBaseAPI()
anno_api = annotation_ontology_api()
core = NewModelTemplateBuilder.from_dict(get_template('template_core'), None).build()
gmm = kbase_api.get_from_ws("Carbon-D-Glucose","KBaseMedia")
glc_o2_atp_media = kbase_api.get_from_ws("Glc.O2.atp",94026)
#Reading genome list
lines = []
with open('GenomeList2.txt') as f:
    lines = f.readlines()
mdltypes = ["DRAM.RAST"]#,"DRAM"
for other_type in mdltypes:
    for i in range(0,len(lines)):
        line = lines[i]. rstrip("\n")
        array = line.split("\t")
        genomeid = array[0]
        genomews = int(array[1])
        modelws = int(array[2])
        #Retrieving models
        if exists("GFModels/RAST/"+genomeid+".json") and not exists("GFModels/"+other_type+"/"+genomeid+".json"):
            rast_model = cobra.io.load_json_model("GFModels/RAST/"+genomeid+".json")
            other_model = kbase_api.get_from_ws(genomeid+"."+other_type+".mdl",modelws)
            pkgmgr = MSPackageManager.get_pkg_mgr(rast_model)
            otherpkgmgr = MSPackageManager.get_pkg_mgr(other_model)
            #Computing the ATP production of base production
            atpmethod = MSATPCorrection(rast_model,core,[glc_o2_atp_media])
            evaluation = atpmethod.evaluate_growth_media()
            atpmethod.restore_noncore_reactions()
            threshold = 4*evaluation[glc_o2_atp_media.id]
            if threshold > 50:
                threshold = 50
            output = FBAHelper.add_atp_hydrolysis(other_model,"c0")
            tests = [{"media":glc_o2_atp_media,"objective":output["reaction"].id,"is_max_threshold":True,"threshold":threshold}]
            #Running pFBA to get miminal active fluxes needed for growth
            pkgmgr.getpkg("KBaseMediaPkg").build_package(gmm)
            solution = cobra.flux_analysis.pfba(rast_model)
            #Adding active reactions from RAST model to other model if not already there
            add_reactions = []
            for reaction in rast_model.reactions:  
                if reaction.id in solution.fluxes and abs(solution.fluxes[reaction.id]) > 0:
                    if reaction.id not in other_model.reactions:
                        add_reactions.append(reaction)
                    else:
                        #Synchronizing bounds but leaving genes in other model intact
                        other_rxn = other_model.reactions.get_by_id(reaction.id)
                        other_rxn.lower_bound = reaction.lower_bound
                        other_rxn.upper_bound = reaction.upper_bound
            other_model.add_reactions(add_reactions)
            #Now extending the core reactions from RAST model with all other reactions
            extend_reactions = []
            for reaction in other_model.reactions:
                if reaction.id not in solution.fluxes or abs(solution.fluxes[reaction.id]) == 0:
                    if reaction.lower_bound < 0:
                        extend_reactions.append([reaction,"<"])
                    if reaction.upper_bound > 0:
                        extend_reactions.append([reaction,">"])
                elif solution.fluxes[reaction.id] < 0 and reaction.upper_bound > 0:
                    extend_reactions.append([reaction,">"])
                elif solution.fluxes[reaction.id] > 0 and reaction.lower_bound < 0:
                    extend_reactions.append([reaction,"<"])
            filtered_list = FBAHelper.reaction_expansion_test(other_model,extend_reactions,tests,otherpkgmgr)
            #Removing all filtered reactions
            remove_list = []
            for item in filtered_list:
                other_rxn = other_model.reactions.get_by_id(item[0].id)
                if item[1] == ">":
                    other_rxn.upper_bound = 0
                else:
                    other_rxn.lower_bound = 0
                if other_rxn.upper_bound == 0 and other_rxn.lower_bound == 0:
                    remove_list.append(other_rxn)
            if len(remove_list) > 0:
                other_model.remove_reactions(remove_list)
            other_model.objective = "bio1"
            other_model.objective.direction = "max"
            otherpkgmgr.getpkg("KBaseMediaPkg").build_package(gmm)
            solution = other_model.optimize()
            if solution.objective_value > 0.01:
                print("ONE:"+genomeid+"\t"+str(solution.objective_value))
                otherpkgmgr.getpkg("ObjConstPkg").build_package(0.05,None)
                reaction_objective = other_model.problem.Objective(
                    Zero,
                    direction="min")
                obj_coef = dict()
                for reaction in add_reactions:
                    reaction = other_model.reactions.get_by_id(reaction.id)
                    if reaction.upper_bound > 0:
                        obj_coef[reaction.forward_variable] = 1
                    if reaction.lower_bound < 0:
                        obj_coef[reaction.reverse_variable] = 1
                other_model.objective = reaction_objective
                reaction_objective.set_linear_coefficients(obj_coef)
                solution = other_model.optimize()
                remove_list = []
                for reaction in add_reactions:
                    if reaction.id not in solution.fluxes or abs(solution.fluxes[reaction.id]) < 0.0000001:
                        remove_list.append(reaction)
                other_model.remove_reactions(remove_list)
                other_model.objective = "bio1"
                other_model.objective.direction = "max"
                solution = other_model.optimize()
            if solution.objective_value > 0.01:
                print(genomeid+"\t"+other_type+"\t"+str(solution.objective_value))
                cobra.io.save_json_model(other_model,"GFModels/"+other_type+"/"+genomeid+".json")
            else:
                print(genomeid+" failed validation and will not be saved!")

ONE:33882.144	0.6841047274544663
33882.144	DRAM.RAST	0.6841047274544659
ONE:1663.231	0.613727921047004
1663.231	DRAM.RAST	0.44962579799396807
ONE:384.349	0.46738707401993046
384.349	DRAM.RAST	0.41957690988566515
ONE:1663.232	0.6206902483461237
1663.232	DRAM.RAST	0.5957099802265265
ONE:32008.572	0.6110376338276252
32008.572	DRAM.RAST	0.602275513264378
ONE:242606.49	0.6101142502762407
242606.49	DRAM.RAST	0.6017926231018406
ONE:34073.57	0.6195969069533789
34073.57	DRAM.RAST	0.6138212554195764
ONE:1822464.28	0.7616314696718077
1822464.28	DRAM.RAST	0.6730922287176991
ONE:1822464.31	0.5928037078261122
1822464.31	DRAM.RAST	0.5821970146637384
ONE:1663.236	0.745206539271809
1663.236	DRAM.RAST	0.6890120902775752
ONE:104336.22	0.6256573530794918
104336.22	DRAM.RAST	0.6256573530794914
ONE:2590785.4	0.664706676365502
2590785.4	DRAM.RAST	0.6237817803382977
ONE:1663.237	0.6211743072334591
1663.237	DRAM.RAST	0.6139988039314356
ONE:1883.1832	0.6127708910194071
1883.1832	DRAM.RAST	0.6035607701720241
ONE

IndexError: list index out of range

In [3]:
#This code prints the data required for the model dashboard
from cobra.flux_analysis import flux_variability_analysis
kbase_api = cobrakbase.KBaseAPI()
gmm = kbase_api.get_from_ws("Carbon-D-Glucose","KBaseMedia")
#Reading genome list
lines = []
genomes = []
with open('FullGenomeList.txt') as f:
    lines = f.readlines()
for line in lines:
    line = line.rstrip("\n")
    genomes.append(line.split("\t"))
data = {"GenomeID":[],"Label":[],"Domain":[],"Taxa":[],"Species":[],"ModelType":[],
        "Type":[],"Genes":[],"MdlGenes":[],"GFGenes":[],"MdlReactions":[],"GFReactions":[],
        "NoGeneGFRxns":[],"Blocked":[],"Growth":[]}
mdltypes = ["RAST","DRAM","DRAM.RAST"]
for mdltype in mdltypes:
    for genome in genomes:
        genomeid = genome[0]
        if exists("GFModels/RAST/"+genomeid+".json") and exists("GFModels/DRAM/"+genomeid+".json") and exists("GFModels/DRAM.RAST/"+genomeid+".json"):
            modelws = 101658
            if genome[2] == "Refseq":
                modelws = 101656
            data["Label"].append(genome[1])
            data["Domain"].append(genome[4])
            data["Taxa"].append(genome[6])
            data["Species"].append(genome[5])
            data["Genes"].append(int(genome[3]))
            data["Type"].append(genome[2])
            data["GenomeID"].append(genome[0])
            data["ModelType"].append(mdltype)
            #Getting ungapfilled genome
            model = kbase_api.get_from_ws(genomeid+"."+mdltype+".mdl",modelws)
            if model == None:
                print("Missing model",genomeid,mdltype,modelws)
            else:
                data["MdlGenes"].append(len(model.genes))
                data["MdlReactions"].append(len(model.reactions))
                #Getting gapfilled genome
                gfmodel = cobra.io.load_json_model("GFModels/"+mdltype+"/"+genomeid+".json")
                data["GFGenes"].append(len(gfmodel.genes)-len(model.genes))
                data["GFReactions"].append(len(gfmodel.reactions)-len(model.reactions))
                nogenegf = 0
                for reaction in gfmodel.reactions:
                    if reaction.id not in model.reactions and len(reaction.genes) == 0:
                        nogenegf += 1
                pkgmgr = MSPackageManager.get_pkg_mgr(gfmodel)
                pkgmgr.getpkg("KBaseMediaPkg").build_package(gmm)
                solution = gfmodel.optimize()
                fva = flux_variability_analysis(gfmodel, gfmodel.reactions,fraction_of_optimum=0.1)
                blocked = 0
                for reaction in gfmodel.reactions:
                    if fva["maximum"][reaction.id] == 0 and fva["minimum"][reaction.id] == 0:
                        blocked += 1
                data["NoGeneGFRxns"].append(nogenegf)
                data["Blocked"].append(blocked)
                data["Growth"].append(solution.objective_value)
df = pd.DataFrame(data)
df.to_csv("biolog_model_dashboard.csv")
HTML(df.to_html(render_links=True, escape=False))

Unnamed: 0,GenomeID,Label,Domain,Taxa,Species,ModelType,Type,Genes,MdlGenes,GFGenes,MdlReactions,GFReactions,NoGeneGFRxns,Blocked,Growth
0,1341683.3,Acinetobacter brisouii,Bacteria,cellular organisms:Bacteria:Proteobacteria:Gammaproteobacteria:Pseudomonadales:Moraxellaceae:Acinetobacter:Acinetobacter brisouii:Acinetobacter brisouii CIP 110357,Acinetobacter brisouii CIP 110357,RAST,Refseq,2916,799,9,1156,92,66,533,0.543375
1,1341683.5,Acinetobacter brisouii,Bacteria,cellular organisms:Bacteria:Proteobacteria:Gammaproteobacteria:Pseudomonadales:Moraxellaceae:Acinetobacter:Acinetobacter brisouii:Acinetobacter brisouii CIP 110357,Acinetobacter brisouii CIP 110357 strain DSM 18516,RAST,Refseq,2957,789,10,1158,106,80,540,0.181317
2,1217677.3,Acinetobacter soli,Bacteria,cellular organisms:Bacteria:Proteobacteria:Gammaproteobacteria:Pseudomonadales:Moraxellaceae:Acinetobacter:Acinetobacter soli:Acinetobacter soli NIPH 2899,Acinetobacter soli NIPH 2899,RAST,Refseq,3110,858,12,1235,108,79,579,0.193542
3,487316.5,Acinetobacter soli LUH 14692,Bacteria,cellular organisms:Bacteria:Proteobacteria:Gammaproteobacteria:Pseudomonadales:Moraxellaceae:Acinetobacter:Acinetobacter soli,Acinetobacter soli strain KCTC 22184,RAST,Refseq,3115,865,9,1249,95,70,583,0.198777
4,202950.11,Acinetobacter baylyi,Bacteria,cellular organisms:Bacteria:Proteobacteria:Gammaproteobacteria:Pseudomonadales:Moraxellaceae:Acinetobacter:Acinetobacter baylyi,Acinetobacter baylyi strain AB93A21,RAST,Refseq,3474,891,10,1262,103,76,587,0.228109
5,62977.6,Acinetobacter sp. ADP1,Bacteria,cellular organisms:Bacteria:Proteobacteria:Gammaproteobacteria:Pseudomonadales:Moraxellaceae:Acinetobacter:Acinetobacter baylyi:Acinetobacter baylyi ADP1,Acinetobacter sp. ADP1,RAST,Refseq,3209,882,11,1258,108,82,572,0.228198
6,1341679.5,Acinetobacter indicus,Bacteria,cellular organisms:Bacteria:Proteobacteria:Gammaproteobacteria:Pseudomonadales:Moraxellaceae:Acinetobacter:Acinetobacter indicus:Acinetobacter indicus CIP 110367,Acinetobacter indicus CIP 110367 strain DSM 25388,RAST,Refseq,2922,744,12,1086,112,83,496,0.178335
7,1217667.3,Acinetobacter lwoffii,Bacteria,cellular organisms:Bacteria:Proteobacteria:Gammaproteobacteria:Pseudomonadales:Moraxellaceae:Acinetobacter:Acinetobacter lwoffii:Acinetobacter lwoffii ATCC 9957 = CIP 70.31,Acinetobacter lwoffii CIP 70.31,RAST,Refseq,3236,769,8,1206,115,87,591,0.167541
8,1217693.3,Acinetobacter variabilis,Bacteria,cellular organisms:Bacteria:Proteobacteria:Gammaproteobacteria:Pseudomonadales:Moraxellaceae:Acinetobacter:Acinetobacter variabilis,Acinetobacter sp. NIPH 2171,RAST,Refseq,3200,796,10,1156,106,76,527,0.607526
9,1120925.3,Acinetobacter bouvetii,Bacteria,cellular organisms:Bacteria:Proteobacteria:Gammaproteobacteria:Pseudomonadales:Moraxellaceae:Acinetobacter:Acinetobacter bouvetii:Acinetobacter bouvetii DSM 14964 = CIP 107468,Acinetobacter bouvetii DSM 14964 = CIP 107468 [PRJNA201594],RAST,Refseq,3292,752,9,1135,110,84,521,0.181303


In [None]:
#This code runs the biolog fitting on the RAST models and saves the results
#This code saves the Biolog simulation results from each model into a dataframe
from cobra.flux_analysis import flux_variability_analysis
kbase_api = cobrakbase.KBaseAPI()
#Reading genome list
lines = []
genomes = []
with open('FullGenomeList.txt') as f:
    lines = f.readlines()
for line in lines:
    line = line.rstrip("\n")
    genomes.append(line.split("\t"))
data = {"GenomeID":[],"ModelType":[]}
mdltypes = ["RAST","DRAM","DRAM.RAST"]
biologs = kbase_api.list_objects(101536, object_type="KBasePhenotypes.PhenotypeSet", include_metadata=True)
genome_biologs = {}
for item in biologs:
    if "GenomeID" in item[10]:
        genome_biologs[item[10]["GenomeID"]] = item[1]
data = {"GenomeID/Phenotype":[],"ModelType":[],"Accuracy":[],"TP":[],"TN":[],"FP":[],"FN":[]}
phenotype_data = {}
for mdltype in mdltypes:
    for genome in genomes:
        genomeid = genome[0]
        biolog_name = genome_biologs[genomeid]
        if exists("GFModels/RAST/"+genomeid+".json") and exists("GFModels/DRAM/"+genomeid+".json") and exists("GFModels/DRAM.RAST/"+genomeid+".json"):
            modelws = 101658
            if genome[2] == "Refseq":
                modelws = 101656
            gfmodel = cobra.io.load_json_model("GFModels/"+mdltype+"/"+genomeid+".json")
            kbpheno = kbase_api.get_object(biolog_name, 101536)
            pheno = MSGrowthPhenotypes.from_kbase_object(kbpheno,kbase_api)
            output = pheno.simulate_phenotypes(gfmodel,"bio1",True)
            data["GenomeID/Phenotype"].append(genomeid)
            data["ModelType"].append(mdltype)
            total = output["summary"]["Count"]["CP"] +
                output["summary"]["Count"]["CN"] +
                output["summary"]["Count"]["FP"] +
                output["summary"]["Count"]["FN"]
            data["Accuracy"].append((output["summary"]["Count"]["CP"]+output["summary"]["Count"]["CN"])/total)
            data["TP"].append(output["summary"]["Count"]["CP"]/total)
            data["TN"].append(output["summary"]["Count"]["CN"]/total)
            data["FP"].append(output["summary"]["Count"]["FP"]/total)
            data["FN"].append(output["summary"]["Count"]["FN"]/total)
            for row in output["details"].rows:
                if row["Phenotype"] not in phenotype_data:
                    phenotype_data[row["Phenotype"]] = {}
                if mdltype not in phenotype_data[row["Phenotype"]]:
                    phenotype_data[row["Phenotype"]][mdltype] = {"Accuracy":0,"TP":0,"TN":0,"FP":0,"FN":0,"Count":0}
                phenotype_data[row["Phenotype"]][mdltype]["Count"] += 1
                if row["Class"] == "CP":
                    phenotype_data[row["Phenotype"]][mdltype]["TP"] += 1
                    phenotype_data[row["Phenotype"]][mdltype]["Accuracy"] += 1
                if row["Class"] == "CN":
                    phenotype_data[row["Phenotype"]][mdltype]["TN"] += 1
                    phenotype_data[row["Phenotype"]][mdltype]["Accuracy"] += 1
                if row["Class"] == "FP":
                    phenotype_data[row["Phenotype"]][mdltype]["FP"] += 1
                if row["Class"] == "FN":
                    phenotype_data[row["Phenotype"]][mdltype]["FN"] += 1
for pheno in phenotype_data:
    for mdltype in phenotype_data[pheno]:
        data["GenomeID/Phenotype"].append(pheno)
        data["ModelType"].append(mdltype)
        data["Accuracy"].append(phenotype_data[pheno][mdltype]["Accuracy"]/phenotype_data[pheno][mdltype]["Count"])
        data["TP"].append(phenotype_data[pheno][mdltype]["TP"]/phenotype_data[pheno][mdltype]["Count"])
        data["TN"].append(phenotype_data[pheno][mdltype]["TN"]/phenotype_data[pheno][mdltype]["Count"])
        data["FP"].append(phenotype_data[pheno][mdltype]["FP"]/phenotype_data[pheno][mdltype]["Count"])
        data["FN"].append(phenotype_data[pheno][mdltype]["FN"]/phenotype_data[pheno][mdltype]["Count"])
df = pd.DataFrame(data)
df.to_csv("biolog_simulations.csv")
HTML(df.to_html(render_links=True, escape=False))

In [16]:
sys.quit()
import json
kbase_api = cobrakbase.KBaseAPI()
#Computing reaction scores
reaction_genes = {}
anno_api = annotation_ontology_api()
output = anno_api.get_annotation_ontology_events({
    "input_ref" : "89639/1663.230",
})
events = output["events"]
for event in events:
    for gene in event["ontology_terms"]:
        for term in event["ontology_terms"][gene]:
            if "modelseed_ids" in term:
                for rxn in term["modelseed_ids"]:
                    newrxn = re.sub("^MSRXN:","",rxn)
                    if newrxn not in reaction_genes:
                        reaction_genes[newrxn] = {}
                    if gene not in reaction_genes[newrxn]:
                        reaction_genes[newrxn][gene] = 0            
                    reaction_genes[newrxn][gene] += 1
json_string = json.dumps(reaction_genes, indent=4, sort_keys=True)      
with open('json_data.json', 'w') as outfile:
    outfile.write(json_string)
#Getting model, media, templates
model = kbase_api.get_from_ws("1663.230.RAST.mdl",101658)
model.solver = 'optlang-cplex'
glc_o2_atp_media = kbase_api.get_from_ws("Glc.O2.atp",94026)
gmm = kbase_api.get_from_ws("Carbon-D-Glucose","KBaseMedia")
template = kbase_api.get_from_ws("GramNegModelTemplateV4","NewKBaseModelTemplates")
core = NewModelTemplateBuilder.from_dict(get_template('template_core'), None).build()
#Computing ATP to build tests to ensure gapfilling doesn't break ATP
atpmethod = MSATPCorrection(model,core,[glc_o2_atp_media])
evaluation = atpmethod.evaluate_growth_media()
atpmethod.restore_noncore_reactions()
tests = [{"media":glc_o2_atp_media,"objective":atpmethod.atp_hydrolysis.id,"is_max_threshold":True,"threshold":1.2*evaluation[glc_o2_atp_media.id]}]
#Gapfilling
msgapfill = MSGapfill(model,[template],[],tests,reaction_genes,[])
msgapfill.lp_filename = "Gapfill.lp"
gfresults = msgapfill.run_gapfilling(gmm,"bio1")
print(json.dumps(gfresults, indent=4, sort_keys=True))
model = msgapfill.integrate_gapfill_solution(gfresults)
cobra.io.save_json_model(model,"GFModels/RAST/1663.230.json")

AttributeError: module 'sys' has no attribute 'quit'

In [10]:
model = cobra.io.load_json_model("GFModels/RAST/1663.230.json")
pkgmgr = MSPackageManager.get_pkg_mgr(model)
pkgmgr.getpkg("KBaseMediaPkg").build_package(gmm)
solution = model.optimize()
model.summary()

Unnamed: 0_level_0,IN_FLUXES,IN_FLUXES,OUT_FLUXES,OUT_FLUXES,OBJECTIVES,OBJECTIVES
Unnamed: 0_level_1,ID,FLUX,ID,FLUX,ID,FLUX
0,cpd00013_e0,5.115616,cpd00001_e0,11.056228,bio1,0.165813
1,cpd00027_e0,5.0,cpd00067_e0,7.292117,,
2,cpd10516_e0,2.484073,cpd00011_e0,4.901437,,
3,cpd00009_e0,0.134732,cpd00156_e0,3.691705,,
4,,,cpd10515_e0,2.482212,,
5,,,cpd11416_c0,0.165813,,


In [3]:
import json
kbase_api = cobrakbase.KBaseAPI()
anno_api = annotation_ontology_api()
genome_ws = {
    101656:89638,
    101658:89639
}
gmm = kbase_api.get_from_ws("Carbon-D-Glucose","KBaseMedia")
biologs = kbase_api.list_objects(101536, object_type="KBasePhenotypes.PhenotypeSet", include_metadata=True)
mdl_hash = {}
refmodels = kbase_api.list_objects(101656, object_type="KBaseFBA.FBAModel")
for mdl in refmodels:
    mdl_hash[mdl[1]] = mdl[6]
ccsrmodels = kbase_api.list_objects(101658, object_type="KBaseFBA.FBAModel")
for mdl in ccsrmodels:
    mdl_hash[mdl[1]] = mdl[6]
types = ["RAST","DRAM","DRAM.RAST"]
#data = ["Type":[],"Genome":[],"Objective":[]]
for item in biologs:
    if "GenomeID" in item[10]:
        genomeid = item[10]["GenomeID"]
        for mdltype in types:
            mdlid = genomeid+"."+mdltype+".mdl"
            if exists("GFModels/"+mdltype+"/"+genomeid+".json"):
                model = cobra.io.load_json_model("GFModels/"+mdltype+"/"+genomeid+".json")
                solution = model.optimize()
                print(mdltype+"\t"+genomeid+"\t"+str(solution.objective_value))

RAST	149698.22	0.0
DRAM	149698.22	45.28165804792266
DRAM.RAST	149698.22	0.0
RAST	702115.9	0.0
DRAM	702115.9	0.0
DRAM.RAST	702115.9	0.0
RAST	1663.218	0.0
DRAM	1663.218	0.0
DRAM.RAST	1663.218	0.0
RAST	242605.18	0.0
DRAM	242605.18	0.0
DRAM.RAST	242605.18	0.0
RAST	134536.40	0.0
DRAM	134536.40	0.0
DRAM.RAST	134536.40	0.0
RAST	379.417	0.0
DRAM	379.417	0.0
DRAM.RAST	379.417	0.0
RAST	1883.1827	0.0
DRAM	1883.1827	0.0
DRAM.RAST	1883.1827	0.0
RAST	1663.219	0.0
DRAM	1663.219	0.0
DRAM.RAST	1663.219	0.0
RAST	1883.1829	0.0
DRAM	1883.1829	0.0


In [14]:
kbase_api = cobrakbase.KBaseAPI()
gmm = kbase_api.get_from_ws("Carbon-D-Glucose","KBaseMedia")
cpd_hash = {"cpd00011":{}}
for cpd in gmm.mediacompounds:
    cpd_hash[cpd.id] = cpd
cpd_hash.pop("cpd00027")
biologs = kbase_api.list_objects(101536, object_type="KBasePhenotypes.PhenotypeSet", include_metadata=True)
msdb = cobrakbase.modelseed.from_local('/Users/chenry/code/ModelSEEDDatabase')
for item in biologs:
    if "GenomeID" in item[10] and item[10]["GenomeID"] == "393305.7":
        kbpheno = kbase_api.get_object(item[1], item[6])
        phenoset = MSGrowthPhenotypes.from_kbase_object(kbpheno,kbase_api)
        data = {"Name":[],"ModelSEED":[],"Inchi":[],"Smiles":[]}
        for pheno in phenoset.phenotypes:
            if pheno.media.name[0:7] == "Carbon-":
                data["Name"].append(pheno.media.name[7:])
                cpdids = []
                for cpd in pheno.media.mediacompounds:
                    if cpd.id not in cpd_hash:
                        cpdids.append(cpd.id)
                
                data["ModelSEED"].append(";".join(cpdids))
                inchi = ""
                smiles = ""
                if len(cpdids) > 0 and cpdids[0] in msdb.compounds:
                    if "inchikey" in msdb.compounds[cpdids[0]]:
                        inchi = msdb.compounds[cpdids[0]]["inchikey"]
                    if "smiles" in msdb.compounds[cpdids[0]]:
                        smiles = msdb.compounds[cpdids[0]]["smiles"]
                data["Inchi"].append(inchi)
                data["Smiles"].append(smiles)
df = pd.DataFrame(data)
df.to_csv("media_modelseed_translation.csv")
HTML(df.to_html(render_links=True, escape=False))

Unnamed: 0,Name,ModelSEED,Inchi,Smiles
0,D-Trehalose,cpd00794,HDTRYLNUVZCQOY-LIZSDCNHSA-N,OC[C@H]1O[C@H](O[C@H]2O[C@H](CO)[C@@H](O)[C@H](O)[C@H]2O)[C@H](O)[C@@H](O)[C@@H]1O
1,N-Acetyl-b-D-Mannosamine,cpd00492,OVRNDRQMDRJTHS-ZTVVOAFPSA-N,CC(=O)N[C@@H]1C(O)O[C@H](CO)[C@@H](O)[C@@H]1O
2,Salicin,cpd01030,NGFMICBWJRZIBI-UJPOAAIJSA-N,OCc1ccccc1O[C@@H]1O[C@H](CO)[C@@H](O)[C@H](O)[C@H]1O
3,Pectin,cpd11601,,
4,Dextrin,cpd11594,,
5,a-Hydroxy-Butyric Acid,cpd03561,AFENDNXGAFYKQO-VKHMYHEASA-M,CC[C@H](O)C(=O)[O-]
6,L-Fucose,cpd00751,SHZGCJCMOBCMKK-DHVFOXMCSA-N,C[C@@H]1OC(O)[C@@H](O)[C@H](O)[C@@H]1O
7,a-D-Lactose,cpd00208,GUBGYTABKSRVRQ-QKKXKWKRSA-N,OC[C@H]1OC(O)[C@H](O)[C@@H](O)[C@@H]1O[C@@H]1O[C@H](CO)[C@H](O)[C@H](O)[C@H]1O
8,Citric Acid,cpd00137,KRKNYBCHXYNGOX-UHFFFAOYSA-K,O=C([O-])CC(O)(CC(=O)[O-])C(=O)[O-]
9,Acetic Acid,cpd00029,QTBSBXVTEAMEQO-UHFFFAOYSA-M,CC(=O)[O-]


In [None]:
kbase_api = cobrakbase.KBaseAPI()
mdl_hash = {}
models = kbase_api.list_objects(101658, object_type="KBaseFBA.FBAModel", include_metadata=True)
for model in models:
    if len(model[1]) > 14 and model[1][-13:] == "DRAM.RAST.mdl":
        genomeid = model[1][0:-14]
        if genomeid not in mdl_hash:
            mdl_hash[genomeid] = {}
        mdl_hash[genomeid]["DRAM+RAST"] = model
    elif model[1][-8:] == "DRAM.mdl":
        genomeid = model[1][0:-9]
        if genomeid not in mdl_hash:
            mdl_hash[genomeid] = {}
        mdl_hash[genomeid]["DRAM"] = model
    elif model[1][-8:] == "fbamodel":
        genomeid = model[1][0:-9]
        if genomeid not in mdl_hash:
            mdl_hash[genomeid] = {}
        mdl_hash[genomeid]["RAST"] = model
models = kbase_api.list_objects(101656, object_type="KBaseFBA.FBAModel", include_metadata=True)
for model in models:
    if len(model[1]) > 14 and model[1][-13:] == "DRAM.RAST.mdl":
        genomeid = model[1][0:-14]
        if genomeid not in mdl_hash:
            mdl_hash[genomeid] = {}
        mdl_hash[genomeid]["DRAM+RAST"] = model
    elif model[1][-8:] == "DRAM.mdl":
        genomeid = model[1][0:-9]
        if genomeid not in mdl_hash:
            mdl_hash[genomeid] = {}
        mdl_hash[genomeid]["DRAM"] = model
    elif model[1][-8:] == "RAST.mdl":
        genomeid = model[1][0:-9]
        if genomeid not in mdl_hash:
            mdl_hash[genomeid] = {}
        mdl_hash[genomeid]["RAST"] = model
biologs = kbase_api.list_objects(101536, object_type="KBasePhenotypes.PhenotypeSet", include_metadata=True)
mdltypes = ["RAST","DRAM","DRAM+RAST"]
glc_o2_atp_media = kbase_api.get_from_ws("Glc.O2",94026)
gmm = kbase_api.get_from_ws("Carbon-D-Glucose","KBaseMedia")
template = kbase_api.get_from_ws("GramNegModelTemplateV3","NewKBaseModelTemplates")
for item in biologs:
    if "GenomeID" in item[10] and item[10]["GenomeID"] in mdl_hash:
        for mdltype in mdltypes:
            if mdltype in mdl_hash[item[10]["GenomeID"]]:
                #Computing ATP production
                model = kbase_api.get_from_ws(mdl_hash[item[10]["GenomeID"]][mdltype][1],mdl_hash[item[10]["GenomeID"]][mdltype][6])
                atpmethod = MSATPCorrection(model,None,[glc_o2_atp_media])
                evaluation = atpmethod.evaluate_growth_media()
                #Gapfilling in minimal media
                msgapfill = MSGapfill(model,[template],[],[],{},[]):
                gfresults = msgapfill.run_gapfilling(gmm,"bio1")
                model = msgapfill.integrate_gapfill_solution(gfresults)
                #Saving gapfilling model
                cobra.io.save_json_model(model, mdltype+"mdls/"+item[10]["GenomeID"]+".json")
                #Simulating phenotypes
                kbpheno = kbase_api.get_object(item[1], item[6])
                pheno = MSGrowthPhenotypes.from_kbase_object(kbpheno,kbase_api)
                output = pheno.simulate_phenotypes(model,"bio1",True,True,template)
                #Saving phenotype data        

In [2]:
kbase_api = cobrakbase.KBaseAPI()
model = kbase_api.get_from_ws('93062.19.fbamodel', 101656)
kbpheno = kbase_api.get_object('Staphylococcus_aureus_subsp._aureus_COL', 101536)
pheno = MSGrowthPhenotypes.from_kbase_object(kbpheno,kbase_api)
template = kbase_api.get_from_ws("GramNegModelTemplateV3","NewKBaseModelTemplates")
output = pheno.simulate_phenotypes(model,"bio1",True,True,template)
HTML(output["summary"].to_html(render_links=True, escape=False))

Unnamed: 0,Label,Count
0,Accuracy,1.0
1,CP,26.0
2,CN,0.0
3,FP,0.0
4,FN,0.0


In [3]:
HTML(output["details"].to_html(render_links=True, escape=False))

Unnamed: 0,Phenotype,Observed growth,Simulated growth,Class,Transports missing,Gapfilled reactions
0,D-Trehalose,1.0,0.332784,CP,,
1,N-Acetyl-b-D-Mannosamine,1.0,0.059761,CP,,
2,Dextrin,1.0,0.582895,CP,,>rxn05531_c0;>rxn09989_c0;>rxn21635_c0;>rxn22816_c0;>rxn11279_c0;>rxn16218_c0;>rxn10504_c0;>rxn22817_c0;>rxn20834_c0;>rxn16217_c0;>rxn14424_c0;>rxn12930_c0;>rxn12932_c0;>rxn12940_c0;>rxn12939_c0;>rxn12929_c0;>rxn25637_c0;>rxn18935_c0;>rxn25638_c0;>rxn12928_c0;>rxn12942_c0;>rxn25474_c0;>rxn12941_c0;>rxn11251_c0;>rxn30633_c0;>rxn19728_c0;>rxn20235_c0;>rxn25479_c0;>rxn22114_c0;>rxn16114_c0;>rxn15060_c0;>rxn15270_c0;>rxn15940_c0;>rxn30917_c0;>rxn08921_c0;>rxn47493_c0;>rxn14423_c0;>rxn10120_c0;>rxn43975_c0;>rxn42732_c0;>rxn41263_c0;>rxn20839_c0;
3,a-Hydroxy-Butyric-Acid,1.0,0.30065,CP,,rxn22816_c0;>rxn11279_c0;>rxn16218_c0;>rxn10504_c0;>rxn22817_c0;>rxn20834_c0;>rxn16217_c0;>rxn12932_c0;>rxn25637_c0;>rxn18935_c0;>rxn25638_c0;>rxn12942_c0;>rxn02485_c0;>rxn30633_c0;>rxn16622_c0;>rxn15060_c0;>rxn15064_c0;>rxn20508_c0;>rxn30917_c0;>rxn15939_c0;>rxn40674_c0;>rxn08921_c0;>rxn10120_c0
4,a-D-Lactose,1.0,0.332784,CP,,
5,b-Methyl-D-Glucoside,1.0,0.313419,CP,,rxn22816_c0;>rxn11279_c0;>rxn10504_c0;>rxn22817_c0;>rxn20834_c0;>rxn12932_c0;>rxn12939_c0;>rxn12928_c0;>rxn12943_c0;>rxn12942_c0;>rxn02485_c0;>rxn12934_c0;>rxn11251_c0;>rxn30633_c0;>rxn16622_c0;>rxn15060_c0;>rxn11285_c0;>rxn20508_c0;>rxn15940_c0;>rxn30917_c0;>rxn40674_c0;>rxn08921_c0;>rxn47493_c0;>rxn08473_c0;>rxn10120_c0
6,Maltose,1.0,0.254482,CP,,
7,D-Galactose,1.0,0.515625,CP,,rxn11279_c0;>rxn16218_c0;>rxn16217_c0;>rxn14424_c0;>rxn12930_c0;>rxn12932_c0;>rxn12940_c0;>rxn12929_c0;>rxn25637_c0;>rxn18935_c0;>rxn25743_c0;>rxn12942_c0;>rxn02485_c0;>rxn12941_c0;>rxn11251_c0;>rxn30633_c0;>rxn16114_c0;>rxn16622_c0;>rxn15060_c0;>rxn15064_c0;>rxn20508_c0;>rxn15940_c0;>rxn30917_c0;>rxn41492_c0;>rxn44493_c0;>rxn42332_c0;>rxn40674_c0;>rxn47493_c0;>rxn14423_c0
8,D-Fructose-6-Phosphate,1.0,0.275812,CP,,
9,Formic-Acid,1.0,0.58385,CP,,rxn22816_c0;>rxn11279_c0;>rxn10504_c0;>rxn22817_c0;>rxn20834_c0;>rxn14424_c0;>rxn12930_c0;>rxn12932_c0;>rxn12940_c0;>rxn12929_c0;>rxn12938_c0;>rxn12927_c0;>rxn12921_c0;>rxn12942_c0;>rxn02485_c0;>rxn12941_c0;>rxn11251_c0;>rxn30633_c0;>rxn16114_c0;>rxn16622_c0;>rxn15060_c0;>rxn20508_c0;>rxn15940_c0;>rxn30917_c0;>rxn40674_c0;>rxn08921_c0;>rxn47493_c0;>rxn14423_c0;>rxn10120_c0;>rxn43975_c0;>rxn42732_c0;>rxn41263_c0;>rxn20839_c0


In [4]:
#dextrin = pheno.phenotypes.get_by_id("Dextrin")
#modelutl = MSModelUtil(model)
#template = kbase_api.get_from_ws("GramNegModelTemplateV3","NewKBaseModelTemplates")
#dextrin.gapfill_model_for_phenotype(modelutl,[template],None)

In [5]:
#output = pheno.simulate_phenotypes(model,"bio1",True)
#HTML(output["summary"].to_html(render_links=True, escape=False))

In [6]:
#HTML(output["details"].to_html(render_links=True, escape=False))

In [None]:
Selected media
for all genomes:
    simulate all phenotypes
    for all media:
        fit model to media
        resimulate phenotypes
        save the phenotype dataframe and the gapfilled models
Pick the best compound