In [1]:
import platform
print("python version " + platform.python_version())
import sys
import json
import cobra
import cplex
import re
import os
from os.path import exists
import logging
from configparser import ConfigParser
config = ConfigParser()
config.read("config.cfg")
paths = config.get("script", "syspaths").split(";")
for path in paths:
    sys.path.append(path)
import cobrakbase
from escher import Builder
from optlang.symbolics import Zero, add
from modelseedpy import MSPackageManager, MSGenome, MSMedia, MSModelUtil, MSBuilder, MSGapfill, FBAHelper, MSGrowthPhenotypes, MSModelUtil, MSATPCorrection
from cobrakbase.core.kbasefba.newmodeltemplate_builder import NewModelTemplateBuilder
from annotation_ontology_api.annotation_ontology_apiServiceClient import annotation_ontology_api
from cobra.flux_analysis import flux_variability_analysis
from modelseedpy.helpers import get_classifier
from modelseedpy.helpers import get_template, get_classifier
from modelseedpy.core.mstemplate import MSTemplateBuilder
from cobrakbase.sdk.sdkhelper import SDKHelper
from sklearn.metrics import r2_score
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
from IPython.display import HTML
print("Required modules loaded")
kbase_api = cobrakbase.KBaseAPI()

python version 3.7.6
cobrakbase 0.2.8
Required modules loaded


In [None]:
def run_gapfilling(params)
    #Processing parameters
    SDKHelper.validate_args(params,["media_list","workspace"],{
        "model_list":None,
        "model_objs":{},
        "atp_objs":{}
        "atp_safe":True,
        "suffix":".gf",
        "atp_media_list":[],
        "objective":"bio1"
    })
    #Retrieving models if not provided already
    if "model_objs" not in params or len(params["model_objs"]) == 0:
        params["model_objs"] = []
        for mdl_ref in params["model_list"]:
            model = kbase_api.get_from_ws(mdl_ref,None)
            mdlutl = MSModelUtil(model)
            mdl_hash[mdlutl] = kbase_api.get_object(mdl_ref,None)
            params["model_objs"].append(mdlutl)
    #Retrieving media objects from references
    media_objs = []
    for media_ref in params["media_list"]:
        media = kbase_api.get_from_ws(media_ref,None)
        media_objs.append(media)
    atp_media_objs = []
    for media_ref in params["atp_media_list"]:
        media = kbase_api.get_from_ws(media_ref,None)
        atp_media_objs.append(media)
    #Iterating over each model and running gapfilling
    for mdlutl in params["model_objs"]:
        mdlutl.model.solver = config["solver"]
        tests = []
        if params["atp_safe"]:
            if mdlutl not in atp_objs:
                atp_objs[mdlutl] = MSATPCorrection(mdlutl,core,atp_media_objs)
                atp_objs[mdlutl].evaluate_growth_media()
                atp_objs[mdlutl].determine_growth_media()
                atp_objs[mdlutl].apply_growth_media_gapfilling()
                atp_objs[mdlutl].evaluate_growth_media()
                atp_objs[mdlutl].expand_model_to_genome_scale()
            tests = atp_objs[mdlutl].build_tests()
        #Iterating over all media specified for gapfilling
        for media in media_objs:
            #Gapfilling
            msgapfill = MSGapfill(mdlutl,[template],[],tests,{},[])
            gfresults = msgapfill.run_gapfilling(media,params["objective"])
            model = msgapfill.integrate_gapfill_solution(gfresults)
            mdlutl.pkgmgr.getpkg("KBaseMediaPkg").build_package(media)
            solution = model.optimize()
            print("GS objective value:",solution.objective_value)
            newmodel = mdl_hash[mdlutl]
            modelutl.add_gapfilling_solution_to_kbase_model(mdl_hash[mdlutl],gfresults,media_ref = media.info.workspace_id+"/"+media.info.id)
        #Saving completely gapfilled model
        kbase_api.save_object(mdlutl.model.info.id+params["suffix"], mdlutl.model.info.workspace_id, "KBaseFBA.FBAModel", newmodel)

In [6]:
df = pd.DataFrame({})  
newrow = {'Name': 'Amy', 'Maths': 89, 'Science': 93}
newrow["Test1"] = "None"
df = df.append(newrow, ignore_index = True)
df.loc[df['Name'] == "Amy",'Maths'] = 90
display(df)

Unnamed: 0,Maths,Name,Science,Test1
0,90.0,Amy,93.0,


In [2]:
params = {
    "genome_refs":["80735/GCF_002999235.1.RAST"],
    "genome_annotation_priorities":[],
    "run_gapfilling":False,
    "atp_safe":True,
    "atp_media_list":["94026/Glc.O2.atp"],
    "gapfilling_media_list":["KBaseMedia/Carbon-Pyruvic-Acid"],
    "suffix":".mdl",
    "workspace":119529,
    "core_template":"auto",
    "gs_template":"auto",
    "gs_template_ref":None,
    "template_reactions_only":True,
    "output_core_models":False
}
modelobjs = []
#Preloading core and preselected template
templates = {
    "core" : NewModelTemplateBuilder.from_dict(get_template('template_core'), None).build(),
    "gp" : None,
    "gn" : None,
    "custom": None
}
if params["gs_template"] == "custom":
    templates["custom"] = kbase_api.get_from_ws(params["gs_template_ref"],None)
#Retrieving ATP media objects
for media_ref in params["atp_media_list"]:
    media = kbase_api.get_from_ws(media_ref,None)
    atp_media_objs.append(media)
#Initializing ontology API, classifier, and SSO ontology
anno_api = annotation_ontology_api()
genome_classifier = get_classifier('knn_ACNP_RAST_filter')
sso_ontology = {}
with open("SSO_dictionary.json") as json_file:
    sso_ontology = json.load(json_file)
#Initializing output data tables
result_table = pd.DataFrame({})
default_output = {"Model":None,"Genome":None,"Genes":None,"Class":None,
                  "Model genes":None,"Reactions":None,"ATP yeilds":None,
                  "Core GF":None,"GS GF":None,"Auxotrophy":None,"Growth":None,"Comments":None}
#Retrieving genomes and building models one by one
count = 0
gen_infos = kbase_api.get_obj_info()#TODO
for genome in params["genome_refs"]:
    #Initializing output row
    current_output = default_output.copy()
    comments = []
    current_output["Model"] = gid+params["suffix"]
    current_output["Genome"] = ghash[gid]["Name"]
    current_output["Genes"] = ghash[gid]["Number of Protein Encoding Genes"]
    #Pulling annotation priority
    anno_priority = None
    if count < len(params["genome_annotation_priorities"]):
        anno_priority = params["genome_annotation_priorities"][count]
    if not anno_priority:
        comments.append("No annotation priorities provided. Using RAST by default.")
    #Retrieving genome annotations
    annotations = anno_api.get_annotation_ontology_events({"input_ref":genome})
    template_type = params["gs_template"]
    if template_type == "auto":
        #Loading SSO terms and classifying genome
        func_hash = None
        for event in annotations["events"]:
            if event["ontology_id"] == "SSO":
                func_hash = set()
                for gene in event["ontology_terms"]:
                    if gene["term"] in sso_ontology["term_hash"]:
                        func_hash.append(sso_ontology["term_hash"][gene["term"]]["name"])
        if func_hash = None:
            if len(anno_priority) == 0 and anno_priority == None:
                current_output["Comments"] = "No RAST annotation and no other annotation source selected - cannot build model"
                result_table.append(current_output)
                next
            comments.append("No RAST annotation. Cannot classify genome. Assuming gram negative template.")
            current_output["Class"] = "--"
        else:
            current_output["Class"] = genome_classifier.classify({"genome":list(func_hash)})
        if current_output["Class"] == "P":
            template_type = "gp"
        elif current_output["Class"] == "N" or current_output["Class"] == "--":
            template_type = "gn"
    if template_type not in templates:
        current_output["Comments"] = "Template type "+template_type+" not recognized"
        result_table.append(current_output)
        next
    elif templates[template_type] == None:
        if template_type == "gn":
            templates[template_type] = kbase_api.get_from_ws("GramNegModelTemplateV4","NewKBaseModelTemplates")
        if template_type == "gp":
            templates[template_type] = kbase_api.get_from_ws("GramPosModelTemplateV4","NewKBaseModelTemplates")
    curr_template = templates[template_type]
    #Building model
    builder = MSBuilder(genobj,template)
    model = builder.build(gen_infos[count].id+params["suffix"],index='0',allow_all_non_grp_reactions=False,annotate_with_rast=False)
    mdlutl = MSModelUtil(model)
    kbjson = mdlutl.kbjson()
    kbjson["genome_ref"] = #TODO
    kbjson["template_ref"] = #TODO
    #Running ATP method
    if params["atp_safe"]:
        mdlutl.atputl = MSATPCorrection(mdlutl,templates["core"],atp_media_objs)
        mdlutl.atputl.evaluate_growth_media()
        mdlutl.atputl.determine_growth_media()
        mdlutl.atputl.apply_growth_media_gapfilling()
        mdlutl.atputl.evaluate_growth_media()
        mdlutl.atputl.expand_model_to_genome_scale()
        current_output["ATP yeilds"] = #TODO
        current_output["Core GF"] = #TODO
    else:
        current_output["ATP yeilds"] = "NA"
        current_output["Core GF"] = "NA"
    #Running gapfilling
    current_output["GS GF"] = "NA"
    current_output["Auxotrophy"] = "NA"
    if params["run_gapfilling"]:
        run_gapfilling({
            "media_list":params["gapfilling_media_list"],
            "model_objs":[mdlutl],
            "atp_safe":params["atp_safe"],
            "workspace":params["workspace"],
            "suffix":params["suffix"],
            "atp_media_list":params["atp_media_list"],
            "objective":"bio1",
            "output_data":{mdlutl:current_output}
        })
    #Filling in model output
    current_output["Reactions"] = len(model.reactions)
    current_output["Model genes"] = len(model.genes)
    model.objective = "bio1"
    current_output["Growth"] = model.slim_optimize()
    #Saving model
    
    result_table = result_table.append(current_output, ignore_index = True)
display(result_table)

C3V41_RS00005 function None
C3V41_RS00010 function None
C3V41_RS00015 function None
C3V41_RS00020 function None
C3V41_RS00025 function None
C3V41_RS00030 function None
C3V41_RS00035 function None
C3V41_RS00040 function None
C3V41_RS00045 function None
C3V41_RS00050 function None
C3V41_RS00055 function None
C3V41_RS12690 function None
C3V41_RS00070 function None
C3V41_RS00075 function None
C3V41_RS00080 function None
C3V41_RS00085 function None
C3V41_RS00090 function None
C3V41_RS00095 function None
C3V41_RS12695 function None
C3V41_RS12700 function None
C3V41_RS00105 function None
C3V41_RS00110 function None
C3V41_RS00115 function None
C3V41_RS00120 function None
C3V41_RS00125 function None
C3V41_RS00130 function None
C3V41_RS00135 function None
C3V41_RS00140 function None
C3V41_RS00145 function None
C3V41_RS00150 function None
C3V41_RS00155 function None
C3V41_RS00160 function None
C3V41_RS00165 function None
C3V41_RS00170 function None
C3V41_RS00180 function None
C3V41_RS00185 functi

C3V41_RS03345 function None
C3V41_RS03350 function None
C3V41_RS03355 function None
C3V41_RS03360 function None
C3V41_RS03365 function None
C3V41_RS03370 function None
C3V41_RS03375 function None
C3V41_RS03380 function None
C3V41_RS03385 function None
C3V41_RS03390 function None
C3V41_RS03395 function None
C3V41_RS03400 function None
C3V41_RS03405 function None
C3V41_RS03430 function None
C3V41_RS03435 function None
C3V41_RS03440 function None
C3V41_RS03445 function None
C3V41_RS03450 function None
C3V41_RS12760 function None
C3V41_RS03460 function None
C3V41_RS03465 function None
C3V41_RS03470 function None
C3V41_RS03475 function None
C3V41_RS03480 function None
C3V41_RS13035 function None
C3V41_RS03485 function None
C3V41_RS03490 function None
C3V41_RS03495 function None
C3V41_RS03500 function None
C3V41_RS03505 function None
C3V41_RS03510 function None
C3V41_RS03515 function None
C3V41_RS03520 function None
C3V41_RS03525 function None
C3V41_RS03530 function None
C3V41_RS03535 functi

C3V41_RS05935 function None
C3V41_RS13095 function None
C3V41_RS05945 function None
C3V41_RS05950 function None
C3V41_RS05955 function None
C3V41_RS05960 function None
C3V41_RS05965 function None
C3V41_RS05970 function None
C3V41_RS05975 function None
C3V41_RS05980 function None
C3V41_RS05985 function None
C3V41_RS05990 function None
C3V41_RS05995 function None
C3V41_RS06000 function None
C3V41_RS06005 function None
C3V41_RS06010 function None
C3V41_RS06015 function None
C3V41_RS06020 function None
C3V41_RS06025 function None
C3V41_RS06030 function None
C3V41_RS06035 function None
C3V41_RS06040 function None
C3V41_RS06045 function None
C3V41_RS06050 function None
C3V41_RS06055 function None
C3V41_RS06060 function None
C3V41_RS06065 function None
C3V41_RS06070 function None
C3V41_RS06075 function None
C3V41_RS06080 function None
C3V41_RS06085 function None
C3V41_RS06090 function None
C3V41_RS06095 function None
C3V41_RS06100 function None
C3V41_RS06105 function None
C3V41_RS06110 functi

C3V41_RS08525 function None
C3V41_RS08530 function None
C3V41_RS08535 function None
C3V41_RS08540 function None
C3V41_RS13140 function None
C3V41_RS08550 function None
C3V41_RS08555 function None
C3V41_RS08565 function None
C3V41_RS08570 function None
C3V41_RS08575 function None
C3V41_RS08580 function None
C3V41_RS08585 function None
C3V41_RS08595 function None
C3V41_RS08610 function None
C3V41_RS08615 function None
C3V41_RS08620 function None
C3V41_RS08625 function None
C3V41_RS08630 function None
C3V41_RS08635 function None
C3V41_RS08640 function None
C3V41_RS08645 function None
C3V41_RS08650 function None
C3V41_RS08655 function None
C3V41_RS08660 function None
C3V41_RS08665 function None
C3V41_RS08670 function None
C3V41_RS08675 function None
C3V41_RS08680 function None
C3V41_RS08685 function None
C3V41_RS08690 function None
C3V41_RS08695 function None
C3V41_RS08700 function None
C3V41_RS08705 function None
C3V41_RS08710 function None
C3V41_RS08715 function None
C3V41_RS08720 functi

C3V41_RS12920 function None
C3V41_RS10360 function None
C3V41_RS10365 function None
C3V41_RS10370 function None
C3V41_RS10375 function None
C3V41_RS10380 function None
C3V41_RS10385 function None
C3V41_RS10390 function None
C3V41_RS10395 function None
C3V41_RS10400 function None
C3V41_RS10405 function None
C3V41_RS10415 function None
C3V41_RS10420 function None
C3V41_RS10425 function None
C3V41_RS10430 function None
C3V41_RS10435 function None
C3V41_RS10440 function None
C3V41_RS10445 function None
C3V41_RS10450 function None
C3V41_RS10455 function None
C3V41_RS10460 function None
C3V41_RS10465 function None
C3V41_RS10470 function None
C3V41_RS10475 function None
C3V41_RS10480 function None
C3V41_RS10485 function None
C3V41_RS10490 function None
C3V41_RS10495 function None
C3V41_RS10500 function None
C3V41_RS10505 function None
C3V41_RS10510 function None
C3V41_RS10515 function None
C3V41_RS10520 function None
C3V41_RS10525 function None
C3V41_RS10530 function None
C3V41_RS13210 functi



IndexError: The genomes or genomeSet that you have submitted wasn’t annotated using the                 RAST annotation pipeline. Please annotate the genomes via ‘Annotate Microbial Genome’ app                 (https://narrative.kbase.us/#appcatalog/app/RAST_SDK/reannotate_microbial_genome/release)or                 genomeSets via Annotate Multiple Microbial Genomes’ app                 (https://narrative.kbase.us/#appcatalog/app/RAST_SDK/reannotate_microbial_genomes/release) and                 resubmit the RAST annotated genome/genomeSets into the Predict Phenotype app. (

In [1]:
def build_models(params)
    params = {
        "genome_refs":[],
        "run_gapfilling":True,
        "atp_safe":True,
        "atp_media_list":[],
        "gapfilling_media_list":[],
        "suffix":".mdl",
        "workspace":,
        "core_template":"auto",
        "gs_template":"auto"
    }
    output = {}
    modelobjs = []
    model_atp_objs = {}
    #Retrieving gs template if specified
    template = None
    if not params["gs_template"] == "auto":
        template = kbase_api.get_from_ws(core_template,None)
    #Retrieving ATP media objects
    atp_media_objs = []
    for media_ref in params["atp_media_list"]:
        media = kbase_api.get_from_ws(media_ref,None)
        atp_media_objs.append(media)
    #Retrieving genomes and building models one by one
    for genome in params["genome_refs"]:
        #Retrieving genome
        genobj = kbase_api.get_from_ws(genome,None)
        #Building model
        builder = MSBuilder(genobj,template)
        model = builder.build(genome.info.id+params["suffix"],index='0',allow_all_non_grp_reactions=False,annotate_with_rast=False):
        #Running ATP method
        mdlutl = MSModelUtil(model)
        atp_objs[mdlutl] = MSATPCorrection(mdlutl,core,atp_media_objs)
        atp_objs[mdlutl].evaluate_growth_media()
        atp_objs[mdlutl].determine_growth_media()
        atp_objs[mdlutl].apply_growth_media_gapfilling()
        atp_objs[mdlutl].evaluate_growth_media()
        atp_objs[mdlutl].expand_model_to_genome_scale()
    #Running gapfilling if requested, otherwise saving
    if run_gapfilling:
        gf_params = {
            "media_list":params["gapfilling_media_list"],
            "model_objs":modelobjs,
            "atp_objs":model_atp_objs,
            "atp_safe":params["atp_safe"],
            "workspace":params["workspace"],
            "suffix":params["suffix"],
            "atp_media_list":params["atp_media_list"],
            "objective":"bio1",
            "output_df":output
        }
        #Running gapfillings, which will trigger a save of the model
        return run_gapfilling(gf_params)    
    else:
        #Saving completely gapfilled model
        for mdl in modelobjs:
            kbase_api.save_object(mdl.info.id+recon_params["suffix"],recon_params["workspace"], "KBaseFBA.FBAModel", mdl)
    return output


SyntaxError: invalid syntax (<ipython-input-1-476aa3025aa5>, line 8)

In [None]:
f = open('AuxoProblemModels.txt')
genome_list = f.read().split("\n")
filter_hash = {}
for item in genome_list:
    filter_hash[item] = 1
#mdlws = 114104#Glucose minimal media workspace
#Loading cached data
#f = open('GMMHash.json')
#genome_hash = json.load(f)
mdlws = 114650#Auxotrophy media workspace
models = kbase_api.list_objects(mdlws, object_type="KBaseFBA.FBAModel", include_metadata=True)
for item in models:
    genomeid = item[1][0:-4]
    if genomeid in filter_hash and "reactions" not in genome_hash[genomeid]:
        print(genomeid)
        model = kbase_api.get_from_ws(item[1],mdlws)
        pkgmgr = MSPackageManager.get_pkg_mgr(model)
        pkgmgr.getpkg("KBaseMediaPkg").build_package(None)
        fva = flux_variability_analysis(model,model.reactions,fraction_of_optimum=0.1)
        gfcount = 0
        blocked = 0
        for reaction in model.reactions:
            if len(reaction.genes) == 0:
                gfcount += 1
            if fva["maximum"][reaction.id] == 0 and fva["minimum"][reaction.id] == 0:
                blocked += 1
        gfcount += -22
        genome_hash[genomeid]["reactions"] = len(model.reactions)
        genome_hash[genomeid]["mdlgenes"] = len(model.genes)
        genome_hash[genomeid]["gfreactions"] = gfcount
        genome_hash[genomeid]["blocked"] = blocked
        with open('problem_hash.json', 'w') as outfile:
            json.dump(genome_hash, outfile)

In [2]:
universals = {}
template = kbase_api.get_from_ws('GramPosModelTemplateV4', 12998)
for reaction in template.reactions:
    if reaction.type == "universal" or reaction.type == "spontaneous":
        universals[reaction.id+"0"] = 1
auxotrans = [
   "rxn00068","rxn09693","rxn05663","rxn05301","rxn05306","rxn05669","rxn05244","rxn05243","rxn05496","rxn05217","rxn05508","rxn05307","rxn05300","rxn05582","rxn09672","rxn09696","rxn09690","rxn05297","rxn09678","rxn05305","rxn05299","rxn05303","rxn05638","rxn05687","rxn05652","rxn12666","rxn08192","rxn10147","rxn05310","rxn05645","rxn05308","rxn05255","rxn09657","rxn05148" 
]
for rxn in auxotrans:
    universals[rxn+"_c0"] = 1
f = open('auxo_hash.json')
genome_hash = json.load(f)
mdlws = 114104#Minimal media workspace
#Creating auxotrophy media
compound_sets = [
    ["cpd00065","cpd00069","cpd00066","cpd00393"],#AAA+folate
    ["cpd00118","cpd00264"],#put,sper
    ["cpd00028","cpd00557","cpd00635"],#heme,cbl
    ["cpd00033","cpd00054"],#gly,ser
    ["cpd00039","cpd00161"],#lys,thre
    ["cpd00107","cpd00156","cpd00322"],#leu,val,isoleu
    ["cpd00065"],
    ["cpd00069"],
    ["cpd00066"],
    ["cpd00156"],
    ["cpd00322"],
    ["cpd00107"],
    ["cpd00132"],
    ["cpd00054"],
    ["cpd00161"],
    ["cpd00033"],
    ["cpd00060"],
    ["cpd00084"],
    ["cpd00039"],
    ["cpd00119"],
    ["cpd00051"],
    ["cpd00129"],
    ["cpd00118"],
    ["cpd00264"],
    ["cpd00028"],
    ["cpd00557"],
    ["cpd00635"],
    ["cpd00218"],
    ["cpd00220"],
    ["cpd00644"],
    ["cpd00393"],
    ["cpd00305"],
    ["cpd00104"],
    ["cpd00215"]
]
gmmedia = kbase_api.get_from_ws("Carbon-D-Glucose","KBaseMedia")
input_dictionary = {}
for cpdset in compound_sets:
    for cpd in cpdset:
        input_dictionary[cpd] = 1
auxomedia = MSMedia.from_dict(input_dictionary)
auxomedia.merge(gmmedia)
#Iterating over models and checking each for auxotrophy analysis
models = kbase_api.list_objects(mdlws, object_type="KBaseFBA.FBAModel", include_metadata=True)
for item in models:
    genomeid = item[1][0:-4]
    if genomeid in genome_hash and "reactions" not in genome_hash[genomeid]:
        print(genomeid)
        model = kbase_api.get_from_ws(item[1],mdlws)
        mdlutl = MSModelUtil(model)
        mdlutl.add_missing_exchanges(auxomedia)
        genome_hash[genomeid]["auxo"] = {}
        model.reactions.bio1.lower_bound = 0.1
        gfreactions = []
        model.objective = model.problem.Objective(
            Zero,
            direction="min")
        obj_coef = dict()
        for reaction in model.reactions:
            if len(reaction.genes) == 0 and reaction.id not in universals:
                obj_coef[reaction.reverse_variable] = 1
                obj_coef[reaction.forward_variable] = 1
                gfreactions.append(reaction)
        genome_hash[genomeid]["Gapfilled reactions"] = len(gfreactions)
        model.objective.set_linear_coefficients(obj_coef)
        mdlutl.pkgmgr.getpkg("KBaseMediaPkg").build_package(gmmedia)
        solution = model.optimize()
        basegf = 0
        for rxn in gfreactions:
            if abs(solution.fluxes[rxn.id]) < 0.00000001:
                basegf += 1
        genome_hash[genomeid]["Base useless gapfill"] = basegf
        for cpdset in compound_sets:
            input_dictionary = {}
            for cpd in cpdset:
                input_dictionary[cpd] = 1
            newmedia = MSMedia.from_dict(input_dictionary)
            newmedia.merge(gmmedia)
            mdlutl.pkgmgr.getpkg("KBaseMediaPkg").build_package(newmedia)
            solution = model.optimize()
            gapfilling = 0
            for rxn in gfreactions:
                if abs(solution.fluxes[rxn.id]) < 0.00000001:
                    gapfilling += 1
            genome_hash[genomeid]["auxo"][",".join(cpdset)] = gapfilling-genome_hash[genomeid]["Base useless gapfill"]
        genome_hash[genomeid]["reactions"] = len(model.reactions)
        genome_hash[genomeid]["mdlgenes"] = len(model.genes)
        with open('auxo_hash.json', 'w') as outfile:
            json.dump(genome_hash, outfile)

NameError: name 'kbase_api' is not defined

In [3]:
auxotrophy_data = {"GenomeID":[],"Reactions":[],"MdlGenes":[],"Useless gapfill":[],"Original gapfill":[]}
for cpdset in compound_sets:
    cpdstring = ",".join(cpdset)
    auxotrophy_data[cpdstring] = []
items = []
with open('genome_list.json') as json_file:
    items = json.load(json_file)
for item in items:
    item = item+".RAST"
    if item in genome_hash:
        data = genome_hash[item]
        auxotrophy_data["GenomeID"].append(item)
        if "Gapfilled reactions" in data:
            auxotrophy_data["Original gapfill"].append(data["Gapfilled reactions"])
        else:
            auxotrophy_data["Original gapfill"].append("")
        if "Base useless gapfill" in data:
            auxotrophy_data["Useless gapfill"].append(data["Base useless gapfill"])
        else:
            auxotrophy_data["Useless gapfill"].append("")
        if "reactions" in data:
            auxotrophy_data["Reactions"].append(data["reactions"])
        else:
            auxotrophy_data["Reactions"].append("")
        if "mdlgenes" in data:
            auxotrophy_data["MdlGenes"].append(data["mdlgenes"])
        else:
            auxotrophy_data["MdlGenes"].append("")  
        if "auxo" in data:
            for cpdset in compound_sets:
                cpdstring = ",".join(cpdset)
                if cpdstring in data["auxo"]:
                    auxotrophy_data[cpdstring].append(data["auxo"][cpdstring])
                else:
                    auxotrophy_data[cpdstring].append("")
        else:
            for cpdset in compound_sets:
                cpdstring = ",".join(cpdset)
                auxotrophy_data[cpdstring].append("")
df = pd.DataFrame(auxotrophy_data)
df.to_csv("AuxotrophyData.csv")
HTML(df.to_html(render_links=True, escape=False))

NameError: name 'compound_sets' is not defined

In [2]:
#Printing dataframe
data = {"GenomeID":[],"Taxonomy":[],"Species":[],"Genes":[],"MdlGenes":[],"MdlReactions":[],
        "GFReactions":[],"Blocked":[],"OldReactions":[],"OldGenes":[],"OldGFReactions":[]}

genome_hash = {}
with open('oldmodel.json') as json_file:
    genome_hash = json.load(json_file)

items = []
with open('genome_list.json') as json_file:
    items = json.load(json_file)

for item in items:
    item = item+".RAST"
    if item not in genome_hash:
        print(item)
    if item in genome_hash:
        gdata = genome_hash[item]
        data["GenomeID"].append(item)
        data["Genes"].append(gdata["genes"])
        data["Taxonomy"].append(gdata["taxonomy"])
        data["Species"].append(gdata["species"])
        if "mdlgenes" in gdata:
            data["MdlGenes"].append(gdata["mdlgenes"])
            data["MdlReactions"].append(gdata["reactions"])
            data["GFReactions"].append(gdata["gfreactions"])
            data["Blocked"].append(gdata["blocked"])
        else:
            data["MdlGenes"].append(0)
            data["MdlReactions"].append(0)
            data["GFReactions"].append(0)
            data["Blocked"].append(0)
        if "old_mdlgenes" in gdata:
            data["OldReactions"].append(gdata["old_reactions"])
            data["OldGenes"].append(gdata["old_mdlgenes"])
            data["OldGFReactions"].append(gdata["old_gfreactions"])
        else:
            data["OldReactions"].append(0)
            data["OldGenes"].append(0)
            data["OldGFReactions"].append(0)
df = pd.DataFrame(data)
df.to_csv("OldModels.csv")
HTML(df.to_html(render_links=True, escape=False))

Unnamed: 0,GenomeID,Taxonomy,Species,Genes,MdlGenes,MdlReactions,GFReactions,Blocked,OldReactions,OldGenes,OldGFReactions
0,GCF_000513475.1.RAST,cellular organisms; Bacteria; unclassified Bacteria; Bacteria candidate phyla; Candidatus Dependentiae; Candidatus Babeliae; Candidatus Babeliales; Candidatus Babeliaceae; Candidatus Babela,Candidatus Babela massiliensis,985,128,643,458,117,0,0,0
1,GCF_000266885.1.RAST,cellular organisms; Bacteria; Spirochaetes; Spirochaetia; Leptospirales; Leptospiraceae; Turneriella; Turneriella parva,Turneriella parva DSM 21527,4140,551,925,271,273,0,0,0
2,GCF_000017605.1.RAST,cellular organisms; Bacteria; Spirochaetes; Spirochaetia; Leptospirales; Leptospiraceae; Leptospira; Leptospira biflexa; Leptospira biflexa serovar Patoc,Leptospira biflexa serovar Patoc strain 'Patoc 1 (Ames)',3671,523,913,247,309,0,0,0
3,GCF_000017685.1.RAST,cellular organisms; Bacteria; Spirochaetes; Spirochaetia; Leptospirales; Leptospiraceae; Leptospira; Leptospira biflexa; Leptospira biflexa serovar Patoc,Leptospira biflexa serovar Patoc strain 'Patoc 1 (Paris)',3684,523,911,247,307,0,0,0
4,GCF_001729245.1.RAST,cellular organisms; Bacteria; Spirochaetes; Spirochaetia; Leptospirales; Leptospiraceae; Leptospira,Leptospira tipperaryensis,4096,536,889,268,283,0,0,0
5,GCF_003722295.1.RAST,cellular organisms; Bacteria; Spirochaetes; Spirochaetia; Leptospirales; Leptospiraceae; Leptospira,Leptospira kmetyi,3978,529,889,266,286,0,0,0
6,GCF_000941035.1.RAST,cellular organisms; Bacteria; Spirochaetes; Spirochaetia; Leptospirales; Leptospiraceae; Leptospira; Leptospira interrogans; Leptospira interrogans serovar Linhai,Leptospira interrogans serovar Linhai str. 56609,4074,493,877,257,272,0,0,0
7,GCF_000007685.1.RAST,cellular organisms; Bacteria; Spirochaetes; Spirochaetia; Leptospirales; Leptospiraceae; Leptospira; Leptospira interrogans; Leptospira interrogans serovar Copenhageni,Leptospira interrogans serovar Copenhageni str. Fiocruz L1-130,3686,469,864,270,257,0,0,0
8,GCF_000092565.1.RAST,cellular organisms; Bacteria; Spirochaetes; Spirochaetia; Leptospirales; Leptospiraceae; Leptospira; Leptospira interrogans; Leptospira interrogans serovar Lai,Leptospira interrogans serovar Lai str. 56601,3849,471,864,271,258,0,0,0
9,GCF_000231175.1.RAST,cellular organisms; Bacteria; Spirochaetes; Spirochaetia; Leptospirales; Leptospiraceae; Leptospira; Leptospira interrogans; Leptospira interrogans serovar Lai,Leptospira interrogans serovar Lai str. IPAV,3795,471,864,271,259,0,0,0
