# Configure KBase Jupyter Dev Environment
<sub><sup>(contact chenry@anl.gov with questions)</sub></sup>

In [8]:
import platform
print("python version " + platform.python_version())
import sys
import json
from json import dump
import os
import pandas as pd
from pandas import DataFrame, read_csv, concat, set_option
from os.path import exists
from pathlib import Path
import logging
import shutil
from configparser import ConfigParser

config = ConfigParser()
if not exists(str(Path.home()) + '/.kbase/config'):    
    if exists("/scratch/shared/code/sharedconfig.cfg"):
        shutil.copyfile("/scratch/shared/code/sharedconfig.cfg",str(Path.home()) + '/.kbase/config')
    else:
        print("You much create a config file in ~/.kbase/config before running this notebook. See instructions: https://docs.google.com/document/d/1fQ6iS_uaaZKbjWtw1MgzqilklttIibNO9XIIJWgxWKo/edit")
        sys.exit(1)
config.read(str(Path.home()) + '/.kbase/config')
paths = config.get("DevEnv","syspaths").split(";")
codebase = config.get("DevEnv","codebase",fallback="")
for i,filepath in enumerate(paths):
    if filepath[0:1] != "/":
        paths[i] = codebase+"/"+filepath
sys.path = paths + sys.path

from chenry_utility_module.kbdevutils import KBDevUtils
kbdevutil = KBDevUtils("ModelSEED2")
from modelseedpy import AnnotationOntology, MSPackageManager, MSModelUtil, MSBuilder, MSATPCorrection, MSGapfill, MSModelReport
from modelseedpy.core.mstemplate import MSTemplateBuilder
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
msrecon = kbdevutil.msseedrecon()
annoapi = kbdevutil.anno_client(native_python_api=True)

com_media = msrecon.get_media("KBaseMedia/Complete")
gmm_media = msrecon.get_media("KBaseMedia/Carbon-D-Glucose")
auxo_media = msrecon.get_media("94026/Auxotrophy_media")

python version 3.9.13
Output files printed to:/Users/chenry/workspace/Notebooks//ModelSEED2//sessions/default/output when using KBDevUtils.output_dir


In [9]:
#Pulling super annotated ecoli genome
genome_ref = "77537/Eco_RAST_Prokka_BlastKOALA_PTools_DeepEC_DeepGO"
#Link to AnnotationOntology code in github:
annoont = AnnotationOntology(annoapi.get_annotation_ontology_events({
    "input_ref" : genome_ref
}),genome_ref)
#You will call the get_reaction_gene_hash function of annoont to get the reaction gene hash
#Pulling ecoli model
model = msrecon.get_model("151253/GCF_000005845.2.RAST.NewGMM.mdl")
media = msrecon.get_media("KBaseMedia/Carbon-L-Phenylalanine")
model.pkgmgr.getpkg("KBaseMediaPkg").build_package(media)
#Loading transcriptome
expression = pd.read_csv("ExpressionData.tsv",sep="\t")
#Getting MSGapfill object
msgapfill = MSGapfill(
    model,
    [msrecon.get_template(model.model.template_ref)],
    [],
    [],
    blacklist=[],
    default_target="bio1",
    minimum_obj=0.01
)
#Running score computation
msgapfill.compute_reaction_weights_from_expression_data(expression,annoont)
#This function needs to include the following code:
#msgapfill.reaction_scores = computed_weights
#msgapfill.gfpkgmgr.getpkg("GapfillingPkg").compute_gapfilling_penalties(reaction_scores=msgapfill.reaction_scores)
#msgapfill.gfpkgmgr.getpkg("GapfillingPkg").build_gapfilling_objective_function()
#Running gapfilling
msgapfill.run_multi_gapfill([media],target="bio1")
#We should find a reaction that must be gapfilled that has multiple gene candidates only one of which has an expression score


1706216668.597505 INFO: get_annotation_ontology_events:{
    "input_ref": "77537/Eco_RAST_Prokka_BlastKOALA_PTools_DeepEC_DeepGO"
}


1706216696.5901818 INFO: metabolites 1517
1706216704.327056 INFO: reactions 1650
1706216710.906146 INFO: Default biomass: [bio1]


Initial solution: {'reversed': {}, 'new': {'rxn01315_c0': '>', 'rxn00495_c0': '>'}, 'media': <cobrakbase.core.kbasebiochem.media.Media object at 0x7fb2c2adab20>, 'target': 'bio1', 'minobjective': 0.01, 'binary_check': False}


1706216805.9388428 INFO: rxn01315_c0> not needed:0.7151331156379079
1706216805.953702 INFO: Carbon-L-Phenylalanine/bio1:rxn00495_c0> needed:0.0 with min obj:0.01


Unneeded: [['rxn01315_c0', '>', 'new', 100]]
Growth: 0.7151331156379119 Carbon-L-Phenylalanine
Adding gapfilling {'growth': 0.7151331156379119, 'media': <cobrakbase.core.kbasebiochem.media.Media object at 0x7fb2c2adab20>, 'target': 'bio1', 'minobjective': 0.01, 'binary_check': False, 'new': {'rxn00495_c0': '>'}, 'reversed': {}}


{<cobrakbase.core.kbasebiochem.media.Media at 0x7fb2c2adab20>: {'growth': 0.7151331156379119,
  'media': <cobrakbase.core.kbasebiochem.media.Media at 0x7fb2c2adab20>,
  'target': 'bio1',
  'minobjective': 0.01,
  'binary_check': False,
  'new': {'rxn00495_c0': '>'},
  'reversed': {}}}

# Code cell to use to run pipeline on all of Aimee's genomes

In [None]:
#Run full pipeline on https://narrative.kbase.us/narrative/167956
