In [1]:
import platform
print("python version " + platform.python_version())
import sys
import json
import cobra
import cplex
import re
import os
from os.path import exists
import logging
from configparser import ConfigParser
config = ConfigParser()
config.read("config.cfg")
paths = config.get("script", "syspaths").split(";")
for path in paths:
    sys.path.append(path)
import cobrakbase
from escher import Builder
from optlang.symbolics import Zero, add
from modelseedpy import MSPackageManager, MSMedia, MSModelUtil, MSGapfill, FBAHelper, MSGrowthPhenotypes, MSModelUtil, MSATPCorrection
from cobrakbase.core.kbasefba.newmodeltemplate_builder import NewModelTemplateBuilder
from annotation_ontology_api.annotation_ontology_apiServiceClient import annotation_ontology_api
from cobra.flux_analysis import flux_variability_analysis
from modelseedpy.helpers import get_template
from sklearn.metrics import r2_score
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
from IPython.display import HTML
print("Required modules loaded")
kbase_api = cobrakbase.KBaseAPI()

python version 3.7.6
cobrakbase 0.2.8


KeyboardInterrupt: 

In [2]:
genomes = kbase_api.list_objects(80735, object_type="KBaseGenomes.Genome", include_metadata=True)
genome_hash = {}
for genome in genomes:
    genome_hash[genome[1]] = {
        "genes":genome[10]["Number of Protein Encoding Genes"],
        "species":genome[10]["Name"],
        "taxonomy":genome[10]["Taxonomy"]
    }

In [None]:
f = open('AuxoProblemModels.txt')
genome_list = f.read().split("\n")
filter_hash = {}
for item in genome_list:
    filter_hash[item] = 1
#mdlws = 114104#Glucose minimal media workspace
#Loading cached data
#f = open('GMMHash.json')
#genome_hash = json.load(f)
mdlws = 114650#Auxotrophy media workspace
models = kbase_api.list_objects(mdlws, object_type="KBaseFBA.FBAModel", include_metadata=True)
for item in models:
    genomeid = item[1][0:-4]
    if genomeid in filter_hash and "reactions" not in genome_hash[genomeid]:
        print(genomeid)
        model = kbase_api.get_from_ws(item[1],mdlws)
        pkgmgr = MSPackageManager.get_pkg_mgr(model)
        pkgmgr.getpkg("KBaseMediaPkg").build_package(None)
        fva = flux_variability_analysis(model,model.reactions,fraction_of_optimum=0.1)
        gfcount = 0
        blocked = 0
        for reaction in model.reactions:
            if len(reaction.genes) == 0:
                gfcount += 1
            if fva["maximum"][reaction.id] == 0 and fva["minimum"][reaction.id] == 0:
                blocked += 1
        gfcount += -22
        genome_hash[genomeid]["reactions"] = len(model.reactions)
        genome_hash[genomeid]["mdlgenes"] = len(model.genes)
        genome_hash[genomeid]["gfreactions"] = gfcount
        genome_hash[genomeid]["blocked"] = blocked
        with open('problem_hash.json', 'w') as outfile:
            json.dump(genome_hash, outfile)

In [2]:
universals = {}
template = kbase_api.get_from_ws('GramPosModelTemplateV4', 12998)
for reaction in template.reactions:
    if reaction.type == "universal" or reaction.type == "spontaneous":
        universals[reaction.id+"0"] = 1
auxotrans = [
   "rxn00068","rxn09693","rxn05663","rxn05301","rxn05306","rxn05669","rxn05244","rxn05243","rxn05496","rxn05217","rxn05508","rxn05307","rxn05300","rxn05582","rxn09672","rxn09696","rxn09690","rxn05297","rxn09678","rxn05305","rxn05299","rxn05303","rxn05638","rxn05687","rxn05652","rxn12666","rxn08192","rxn10147","rxn05310","rxn05645","rxn05308","rxn05255","rxn09657","rxn05148" 
]
for rxn in auxotrans:
    universals[rxn+"_c0"] = 1
f = open('auxo_hash.json')
genome_hash = json.load(f)
mdlws = 114104#Minimal media workspace
#Creating auxotrophy media
compound_sets = [
    ["cpd00065","cpd00069","cpd00066","cpd00393"],#AAA+folate
    ["cpd00118","cpd00264"],#put,sper
    ["cpd00028","cpd00557","cpd00635"],#heme,cbl
    ["cpd00033","cpd00054"],#gly,ser
    ["cpd00039","cpd00161"],#lys,thre
    ["cpd00107","cpd00156","cpd00322"],#leu,val,isoleu
    ["cpd00065"],
    ["cpd00069"],
    ["cpd00066"],
    ["cpd00156"],
    ["cpd00322"],
    ["cpd00107"],
    ["cpd00132"],
    ["cpd00054"],
    ["cpd00161"],
    ["cpd00033"],
    ["cpd00060"],
    ["cpd00084"],
    ["cpd00039"],
    ["cpd00119"],
    ["cpd00051"],
    ["cpd00129"],
    ["cpd00118"],
    ["cpd00264"],
    ["cpd00028"],
    ["cpd00557"],
    ["cpd00635"],
    ["cpd00218"],
    ["cpd00220"],
    ["cpd00644"],
    ["cpd00393"],
    ["cpd00305"],
    ["cpd00104"],
    ["cpd00215"]
]
gmmedia = kbase_api.get_from_ws("Carbon-D-Glucose","KBaseMedia")
input_dictionary = {}
for cpdset in compound_sets:
    for cpd in cpdset:
        input_dictionary[cpd] = 1
auxomedia = MSMedia.from_dict(input_dictionary)
auxomedia.merge(gmmedia)
#Iterating over models and checking each for auxotrophy analysis
models = kbase_api.list_objects(mdlws, object_type="KBaseFBA.FBAModel", include_metadata=True)
for item in models:
    genomeid = item[1][0:-4]
    if genomeid in genome_hash and "reactions" not in genome_hash[genomeid]:
        print(genomeid)
        model = kbase_api.get_from_ws(item[1],mdlws)
        mdlutl = MSModelUtil(model)
        mdlutl.add_missing_exchanges(auxomedia)
        genome_hash[genomeid]["auxo"] = {}
        model.reactions.bio1.lower_bound = 0.1
        gfreactions = []
        model.objective = model.problem.Objective(
            Zero,
            direction="min")
        obj_coef = dict()
        for reaction in model.reactions:
            if len(reaction.genes) == 0 and reaction.id not in universals:
                obj_coef[reaction.reverse_variable] = 1
                obj_coef[reaction.forward_variable] = 1
                gfreactions.append(reaction)
        genome_hash[genomeid]["Gapfilled reactions"] = len(gfreactions)
        model.objective.set_linear_coefficients(obj_coef)
        mdlutl.pkgmgr.getpkg("KBaseMediaPkg").build_package(gmmedia)
        solution = model.optimize()
        basegf = 0
        for rxn in gfreactions:
            if abs(solution.fluxes[rxn.id]) < 0.00000001:
                basegf += 1
        genome_hash[genomeid]["Base useless gapfill"] = basegf
        for cpdset in compound_sets:
            input_dictionary = {}
            for cpd in cpdset:
                input_dictionary[cpd] = 1
            newmedia = MSMedia.from_dict(input_dictionary)
            newmedia.merge(gmmedia)
            mdlutl.pkgmgr.getpkg("KBaseMediaPkg").build_package(newmedia)
            solution = model.optimize()
            gapfilling = 0
            for rxn in gfreactions:
                if abs(solution.fluxes[rxn.id]) < 0.00000001:
                    gapfilling += 1
            genome_hash[genomeid]["auxo"][",".join(cpdset)] = gapfilling-genome_hash[genomeid]["Base useless gapfill"]
        genome_hash[genomeid]["reactions"] = len(model.reactions)
        genome_hash[genomeid]["mdlgenes"] = len(model.genes)
        with open('auxo_hash.json', 'w') as outfile:
            json.dump(genome_hash, outfile)

NameError: name 'kbase_api' is not defined

In [3]:
auxotrophy_data = {"GenomeID":[],"Reactions":[],"MdlGenes":[],"Useless gapfill":[],"Original gapfill":[]}
for cpdset in compound_sets:
    cpdstring = ",".join(cpdset)
    auxotrophy_data[cpdstring] = []
items = []
with open('genome_list.json') as json_file:
    items = json.load(json_file)
for item in items:
    item = item+".RAST"
    if item in genome_hash:
        data = genome_hash[item]
        auxotrophy_data["GenomeID"].append(item)
        if "Gapfilled reactions" in data:
            auxotrophy_data["Original gapfill"].append(data["Gapfilled reactions"])
        else:
            auxotrophy_data["Original gapfill"].append("")
        if "Base useless gapfill" in data:
            auxotrophy_data["Useless gapfill"].append(data["Base useless gapfill"])
        else:
            auxotrophy_data["Useless gapfill"].append("")
        if "reactions" in data:
            auxotrophy_data["Reactions"].append(data["reactions"])
        else:
            auxotrophy_data["Reactions"].append("")
        if "mdlgenes" in data:
            auxotrophy_data["MdlGenes"].append(data["mdlgenes"])
        else:
            auxotrophy_data["MdlGenes"].append("")  
        if "auxo" in data:
            for cpdset in compound_sets:
                cpdstring = ",".join(cpdset)
                if cpdstring in data["auxo"]:
                    auxotrophy_data[cpdstring].append(data["auxo"][cpdstring])
                else:
                    auxotrophy_data[cpdstring].append("")
        else:
            for cpdset in compound_sets:
                cpdstring = ",".join(cpdset)
                auxotrophy_data[cpdstring].append("")
df = pd.DataFrame(auxotrophy_data)
df.to_csv("AuxotrophyData.csv")
HTML(df.to_html(render_links=True, escape=False))

NameError: name 'compound_sets' is not defined

In [4]:
#Printing dataframe
data = {"GenomeID":[],"Taxonomy":[],"Species":[],"Genes":[],"MdlGenes":[],"MdlReactions":[],
        "GFReactions":[],"Blocked":[],"OldReactions":[],"OldGenes":[],"OldGFReactions":[]}

items = []
with open('genome_list.json') as json_file:
    items = json.load(json_file)

for item in items:
    item = item+".RAST"
    if item not in genome_hash:
        print(item)
    if item in genome_hash:
        gdata = genome_hash[item]
        data["GenomeID"].append(item)
        data["Genes"].append(gdata["genes"])
        data["Taxonomy"].append(gdata["taxonomy"])
        data["Species"].append(gdata["species"])
        if "mdlgenes" in gdata:
            data["MdlGenes"].append(gdata["mdlgenes"])
            data["MdlReactions"].append(gdata["reactions"])
            data["GFReactions"].append(gdata["gfreactions"])
            data["Blocked"].append(gdata["blocked"])
        else:
            data["MdlGenes"].append(0)
            data["MdlReactions"].append(0)
            data["GFReactions"].append(0)
            data["Blocked"].append(0)
        if "old_mdlgenes" in gdata:
            data["OldReactions"].append(gdata["old_reactions"])
            data["OldGenes"].append(gdata["old_mdlgenes"])
            data["OldGFReactions"].append(gdata["old_gfreactions"])
        else:
            data["OldReactions"].append(0)
            data["OldGenes"].append(0)
            data["OldGFReactions"].append(0)
df = pd.DataFrame(data)
df.to_csv("AuxotrophyDashboard.csv")
HTML(df.to_html(render_links=True, escape=False))

NameError: name 'genome_hash' is not defined

In [5]:
#Printing dataframe
data = {"GenomeID":[],"Taxonomy":[],"Species":[],"Genes":[],"MdlGenes":[],"MdlReactions":[],
        "GFReactions":[],"Blocked":[],"OldReactions":[],"OldGenes":[],"OldGFReactions":[]}

items = []
with open('genome_list.json') as json_file:
    items = json.load(json_file)

for item in items:
    item = item+".RAST"
    if item not in genome_hash:
        print(item)
    if item in genome_hash:
        gdata = genome_hash[item]
        data["GenomeID"].append(item)
        data["Genes"].append(gdata["genes"])
        data["Taxonomy"].append(gdata["taxonomy"])
        data["Species"].append(gdata["species"])
        if "mdlgenes" in gdata:
            data["MdlGenes"].append(gdata["mdlgenes"])
            data["MdlReactions"].append(gdata["reactions"])
            data["GFReactions"].append(gdata["gfreactions"])
            data["Blocked"].append(gdata["blocked"])
        else:
            data["MdlGenes"].append(0)
            data["MdlReactions"].append(0)
            data["GFReactions"].append(0)
            data["Blocked"].append(0)
        if "old_mdlgenes" in gdata:
            data["OldReactions"].append(gdata["old_reactions"])
            data["OldGenes"].append(gdata["old_mdlgenes"])
            data["OldGFReactions"].append(gdata["old_gfreactions"])
        else:
            data["OldReactions"].append(0)
            data["OldGenes"].append(0)
            data["OldGFReactions"].append(0)
df = pd.DataFrame(data)
df.to_csv("AuxotrophyDashboard.csv")
HTML(df.to_html(render_links=True, escape=False))

NameError: name 'genome_hash' is not defined

In [6]:
#Printing dataframe
data = {"GenomeID":[],"Taxonomy":[],"Species":[],"Genes":[],"MdlGenes":[],"MdlReactions":[],
        "GFReactions":[],"Blocked":[],"OldReactions":[],"OldGenes":[],"OldGFReactions":[]}

items = []
with open('genome_list.json') as json_file:
    items = json.load(json_file)

for item in items:
    item = item+".RAST"
    if item not in genome_hash:
        print(item)
    if item in genome_hash:
        gdata = genome_hash[item]
        data["GenomeID"].append(item)
        data["Genes"].append(gdata["genes"])
        data["Taxonomy"].append(gdata["taxonomy"])
        data["Species"].append(gdata["species"])
        if "mdlgenes" in gdata:
            data["MdlGenes"].append(gdata["mdlgenes"])
            data["MdlReactions"].append(gdata["reactions"])
            data["GFReactions"].append(gdata["gfreactions"])
            data["Blocked"].append(gdata["blocked"])
        else:
            data["MdlGenes"].append(0)
            data["MdlReactions"].append(0)
            data["GFReactions"].append(0)
            data["Blocked"].append(0)
        if "old_mdlgenes" in gdata:
            data["OldReactions"].append(gdata["old_reactions"])
            data["OldGenes"].append(gdata["old_mdlgenes"])
            data["OldGFReactions"].append(gdata["old_gfreactions"])
        else:
            data["OldReactions"].append(0)
            data["OldGenes"].append(0)
            data["OldGFReactions"].append(0)
df = pd.DataFrame(data)
df.to_csv("AuxotrophyDashboard.csv")
HTML(df.to_html(render_links=True, escape=False))

NameError: name 'genome_hash' is not defined

In [7]:
#Printing dataframe
data = {"GenomeID":[],"Taxonomy":[],"Species":[],"Genes":[],"MdlGenes":[],"MdlReactions":[],
        "GFReactions":[],"Blocked":[],"OldReactions":[],"OldGenes":[],"OldGFReactions":[]}

items = []
with open('genome_list.json') as json_file:
    items = json.load(json_file)

for item in items:
    item = item+".RAST"
    if item not in genome_hash:
        print(item)
    if item in filter_hash:
        gdata = genome_hash[item]
        data["GenomeID"].append(item)
        data["Genes"].append(gdata["genes"])
        data["Taxonomy"].append(gdata["taxonomy"])
        data["Species"].append(gdata["species"])
        if "mdlgenes" in gdata:
            data["MdlGenes"].append(gdata["mdlgenes"])
            data["MdlReactions"].append(gdata["reactions"])
            data["GFReactions"].append(gdata["gfreactions"])
            data["Blocked"].append(gdata["blocked"])
        else:
            data["MdlGenes"].append(0)
            data["MdlReactions"].append(0)
            data["GFReactions"].append(0)
            data["Blocked"].append(0)
        if "old_mdlgenes" in gdata:
            data["OldReactions"].append(gdata["old_reactions"])
            data["OldGenes"].append(gdata["old_mdlgenes"])
            data["OldGFReactions"].append(gdata["old_gfreactions"])
        else:
            data["OldReactions"].append(0)
            data["OldGenes"].append(0)
            data["OldGFReactions"].append(0)
df = pd.DataFrame(data)
df.to_csv("ProblemAuxoDashboard.csv")
HTML(df.to_html(render_links=True, escape=False))

NameError: name 'genome_hash' is not defined

In [None]:
genome_list = 