# Configure KBase Jupyter Dev Environment

In [1]:
%run agoracommutil.py
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)

python version 3.9.13
KBBaseModules 0.0.1


1713067476.7560909 INFO: Note: NumExpr detected 16 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8.
1713067476.756782 INFO: NumExpr defaulting to 8 threads.


modelseedpy 0.3.3
cobrakbase 0.3.1
Output files printed to:/Users/chenry/code/notebooks/MicrobiomeNotebooks/WesternDietMicrobiomes/nboutput when using KBDevUtils.output_dir
ModelSEED: /Users/chenry/code//kb_sdk/run_local/workdir/tmp/


# Loading ASV abundances for all samples

In [31]:
asv_abundances = {"Sample1":{"ASV00001;seqs=15073372;samples=2336":0.5,"ASV00002;seqs=7303020;samples=1403":0.5}}
asv_abundances = {"Sample2":{
    "ASV00002;seqs=7303020;samples=1403":0.8333333333333334,
    "ASV00003;seqs=7068340;samples=1916":11.333333333333334,
    "ASV00004;seqs=6473178;samples=1764":593.3333333333333,
    "ASV00005;seqs=5940024;samples=3018":8.027777777777779,
    "ASV00006;seqs=5698377;samples=2075":100.88888888888889
}}
#Normalizing abundance
for sample in asv_abundances:
    total_abundance = sum(asv_abundances[sample].values())
    for asv in asv_abundances[sample]:
        asv_abundances[sample][asv] = asv_abundances[sample][asv]/total_abundance
util.kbdevutil.save("asv_abundances", asv_abundances)

# Loading metabolomics data

In [32]:
import pandas as pd
df = pd.read_csv('metabolomics differences.csv')
metabolomics_data = {}
for index, row in df.iterrows():
    metabolomics_data[row["day_diet diff"]] = {}
    for col in df:
        if col != "day_diet diff":
            if col in row:
                metabolomics_data[row["day_diet diff"]][col] = row[col]
util.kbdevutil.save("metabolomics_data", metabolomics_data)

# Merging models to produce community models for all samples

In [9]:
import cobra
from cobra import Model, Reaction, Metabolite
asv_abundances = util.kbdevutil.load("asv_abundances")
constraints = {}
biomass_compounds = {}
abundance_by_name = {}
species_reactions = {}
newmodels = {}
biomass_exchange_ids = {}
for sample, abundances in asv_abundances.items():
    print("Processing sample: "+sample)
    newmodels[sample] = Model(sample,sample)
    constraints[sample] = {}
    species_reactions[sample] = {}
    biomass_compounds[sample] = {}
    abundance_by_name[sample] = {}
    biomass_exchange_ids[sample] = []
    for asv, abundance in abundances.items():
        print("Processing ASV: "+asv+" with abundance: "+str(abundance)+" in sample "+sample)
        name_array = asv.split(";")
        species_reactions[sample][name_array[0]] = []
        abundance_by_name[sample][name_array[0]] = abundance
        model = cobra.io.read_sbml_model("models/"+asv+'.xml')
        model.objective = "EX_biomass(e)"
        solution = model.optimize()
        print(solution)
        new_metabolites = []
        new_reactions = []
        for met in model.metabolites:
            if met.compartment != "e":
                met.id += "_"+str(name_array[0])
            if met.id not in newmodels[sample].metabolites:
                new_metabolites.append(met)
                if "biomass" in met.id:
                    biomass_compounds[sample][name_array[0]] = met.id
        for rxn in model.reactions:
            if rxn.id[0:3] == "EX_":
                if "biomass" in rxn.id:
                    rxn.id += "_"+str(name_array[0])
                    if rxn.id not in biomass_exchange_ids[sample]:
                         biomass_exchange_ids[sample].append(rxn.id)
            else:
                rxn.id += "_"+str(name_array[0])
                species_reactions[sample][name_array[0]].append(rxn.id)
            if rxn.id not in newmodels[sample].reactions:
                new_reactions.append(rxn)
        # Adding new reactions and compounds to base model
        newmodels[sample].add_reactions(new_reactions)
        newmodels[sample].add_metabolites(new_metabolites)
    # Create community biomass
    comm_biomass = Metabolite("community_biomass", None, "Community biomass", 0, "c")
    metabolites = {comm_biomass: 1}
    drainmet = {comm_biomass: -1}
    for asv in biomass_compounds[sample]:
        met = newmodels[sample].metabolites.get_by_id(biomass_compounds[sample][asv])
        metabolites[met] = -1*abundance_by_name[sample][asv]
    comm_biorxn = Reaction(id="bio1", name="bio1", lower_bound=0, upper_bound=100)
    comm_biorxn.add_metabolites(metabolites)
    comm_drain = Reaction(id="EX_community_biomass", name="EX_community_biomass", lower_bound=0, upper_bound=100)
    comm_drain.add_metabolites(drainmet)
    newmodels[sample].add_reactions([comm_biorxn,comm_drain])
    for bioex_id in biomass_exchange_ids[sample]:
        rxn = newmodels[sample].reactions.get_by_id(bioex_id)
        rxn.lower_bound = 0
        rxn.upper_bound = 0
    for bioex_id in biomass_exchange_ids[sample]:
        rxn = newmodels[sample].reactions.get_by_id(bioex_id)
        rxn.lower_bound = 0
        rxn.upper_bound = 100
        newmodels[sample].objective = bioex_id
        solution = newmodels[sample].optimize()
        print(bioex_id,solution.objective_value)
        rxn.lower_bound = 0
        rxn.upper_bound = 0
    newmodels[sample].objective = "bio1"
    cobra.io.save_json_model(newmodels[sample], 'nboutput/'+sample+'.json')
    solution = newmodels[sample].optimize()
util.kbdevutil.save("constraints", constraints)
util.kbdevutil.save("biomass_compounds", biomass_compounds)
util.kbdevutil.save("abundance_by_name", abundance_by_name)
util.kbdevutil.save("species_reactions", species_reactions)
util.kbdevutil.save("biomass_exchange_ids", biomass_exchange_ids)

Processing sample: Sample2
Processing ASV: ASV00002;seqs=7303020;samples=1403 with abundance: 0.0011664528169835532 in sample Sample2
<Solution 151.387 at 0x7fa1139ebe50>
Processing ASV: ASV00003;seqs=7068340;samples=1916 with abundance: 0.015863758310976323 in sample Sample2
<Solution 280.109 at 0x7fa11404d130>
Processing ASV: ASV00004;seqs=6473178;samples=1764 with abundance: 0.8305144056922897 in sample Sample2
<Solution 57.494 at 0x7fa0f1683d00>
Processing ASV: ASV00005;seqs=5940024;samples=3018 with abundance: 0.01123682880360823 in sample Sample2
<Solution 150.594 at 0x7fa114969430>
Processing ASV: ASV00006;seqs=5698377;samples=2075 with abundance: 0.14121855437614214 in sample Sample2
<Solution 149.949 at 0x7fa0d37a4f70>
EX_biomass(e)_ASV00002 100.0
EX_biomass(e)_ASV00003 100.0
EX_biomass(e)_ASV00004 59.690226007154585
EX_biomass(e)_ASV00005 100.0
EX_biomass(e)_ASV00006 100.0


# Add elemental uptake constraint

In [14]:
from optlang.symbolics import Zero, add
element_parameters = {"C":100}
if "constraints" not in globals():
    constraints = util.kbdevutil.load("constraints")
for sample in asv_abundances:
    #Removing constraints if they're there
    if "elements" in constraints[sample]:
        for element in constraints[sample]["elements"]:
            newmodels[sample].remove_cons_vars(constraints[sample]["elements"][element])
    constraints[sample]["elements"] = {}
    for element, value in element_parameters.items():
        constraints[sample]["elements"][element] = {}
        coef = {}
        for rxn in newmodels[sample].reactions:
            if rxn.id[0:3] == "EX_":
                total = 0
                for metabolite in rxn.metabolites:
                    elements = metabolite.elements
                    if element in elements:
                        total += elements[element] * rxn.metabolites[metabolite]
                if total < 0:
                    coef[rxn.reverse_variable] = -1*total
                elif total > 0:
                    coef[rxn.forward_variable] = total
        constraints[sample]["elements"][element] = newmodels[sample].problem.Constraint(Zero, lb=None, ub=value, name=element+"_element_uptake")
        newmodels[sample].add_cons_vars(constraints[sample]["elements"][element])
        newmodels[sample].solver.update()
        if len(coef) > 0:
            constraints[sample]["elements"][element].set_linear_coefficients(coef)

Deleting existing constraint


# Adding comm-kinetics constraint

In [15]:
#Removing constraints if they're there
kinetic_coef = 400
if "constraints" not in globals():
    constraints = util.kbdevutil.load("constraints")
abundance_by_name = util.kbdevutil.load("abundance_by_name")
species_reactions = util.kbdevutil.load("species_reactions")
for sample in asv_abundances:
    if "kinetic" in constraints[sample]:
        for asv in constraints[sample]["kinetic"]:
            newmodels[sample].remove_cons_vars(constraints[sample]["kinetic"][asv])
    constraints[sample]["kinetic"] = {}
    for asv in abundance_by_name[sample]:
        biorxn = newmodels[sample].reactions.get_by_id("bio1")
        coef = {
            biorxn.forward_variable:-1*kinetic_coef*abundance_by_name[sample][asv],
            biorxn.reverse_variable:kinetic_coef*abundance_by_name[sample][asv]
        }
        for rxn_id in species_reactions[sample][asv]:
            rxn = newmodels[sample].reactions.get_by_id(rxn_id)
            coef[rxn.forward_variable] = 1
            coef[rxn.reverse_variable] = 1
        constraints[sample]["kinetic"][asv] = newmodels[sample].problem.Constraint(Zero, lb=None, ub=0, name=asv+"_kinetic")
        newmodels[sample].add_cons_vars(constraints[sample]["kinetic"][asv])
        newmodels[sample].solver.update()
        if len(coef) > 0:
            constraints[sample]["kinetic"][asv].set_linear_coefficients(coef) 
        

Deleting existing constraints
Deleting existing constraints
Deleting existing constraints
Deleting existing constraints
Deleting existing constraints


# Constraining model to produce biomass near max value

In [19]:
fraction_optimal_growth = 0.8
if "constraints" not in globals():
    constraints = util.kbdevutil.load("constraints")
for sample in asv_abundances:
    if "objconst" in constraints[sample]:
        newmodels[sample].remove_cons_vars(constraints[sample]["objconst"])
    obj_value = newmodels[sample].slim_optimize()
    coef = newmodels[sample].solver.objective.get_linear_coefficients(
        newmodels[sample].solver.objective.variables
    )
    constraints[sample]["objconst"] = newmodels[sample].problem.Constraint(Zero, lb=0.8*obj_value, ub=None, name="objconst")
    newmodels[sample].add_cons_vars(constraints[sample]["objconst"])
    newmodels[sample].solver.update()
    constraints[sample]["objconst"].set_linear_coefficients(coef)

# Minimizing probabilities and maximizing metabolomics agreement

In [33]:
prob_exp = 1
min_prob = 0.05
ex_weight = 1
media_coef = 100
metabolomics_data = util.kbdevutil.load("metabolomics_data")
found = {}
metabolomics_exchanges = {}
for sample in asv_abundances:
    metabolomics_exchanges[sample] = {}
    coef = {}
    row = metabolomics_data["RC-ABX_-1.5"]
    for rxn in newmodels[sample].reactions:
        if rxn.id[0:3] == "EX_":
            met_id = rxn.id[3:-3]
            if met_id in row:
                found[met_id] = row[met_id]
                if row[met_id] > 0:
                    print("FORWARD",met_id)
                    metabolomics_exchanges[met_id] = [rxn.id,row[met_id]]
                    coef.update({rxn.forward_variable: -1*media_coef})
                    coef.update({rxn.reverse_variable: 1000})
                elif row[met_id] < 0:
                    metabolomics_exchanges[met_id] = [rxn.id,row[met_id]]
                    print("REVERSE",met_id)
                    coef.update({rxn.reverse_variable: -1*media_coef})
                    coef.update({rxn.forward_variable: 1000})
            else:
                print("NO_DATA",met_id)
                coef.update({rxn.forward_variable: ex_weight})
                coef.update({rxn.reverse_variable: ex_weight})
        elif "probability" in rxn.notes:
            coef.update(
                {
                    rxn.forward_variable: max(
                        min_prob, (1 - float(rxn.notes["probability"]) ** prob_exp)
                    )
                }
            )
            coef.update(
                {
                    rxn.reverse_variable: max(
                        min_prob, (1 - float(rxn.notes["probability"]) ** prob_exp)
                    )
                }
            )
    newmodels[sample].objective = newmodels[sample].problem.Objective(Zero, direction="min")
    newmodels[sample].objective.set_linear_coefficients(coef)
    for item in row:
        if item not in found:
            print("NOT_FOUND",item)

NO_DATA 12dgr180
NO_DATA 15dap
NO_DATA 2obut
NO_DATA 34dhphe
NO_DATA 4abut
NO_DATA 4ahmmp
NO_DATA 7a_czp
NO_DATA C02528
NO_DATA acald
REVERSE acgam
NO_DATA actn_R
NO_DATA adn
NO_DATA adocbl
NO_DATA ala_D
REVERSE ala_L
NO_DATA alaasp
NO_DATA alagln
NO_DATA alaglu
NO_DATA alagly
NO_DATA alahis
NO_DATA alaleu
NO_DATA alathr
NO_DATA anzp
REVERSE arab_L
NO_DATA arabttr
NO_DATA arbt
NO_DATA arg_L
NO_DATA arsenb
NO_DATA biomass(e)_ASV00
NO_DATA btn
NO_DATA ca2
NO_DATA cd2
NO_DATA cgly
NO_DATA chlphncl
NO_DATA chol
REVERSE cholate
NO_DATA chols
NO_DATA cl
NO_DATA co2
NO_DATA cobalt2
NO_DATA crn
NO_DATA ctbt
NO_DATA cu2
NO_DATA czp
NO_DATA dfdcytd
NO_DATA dfduri
NO_DATA dgchol
NO_DATA dhna
NO_DATA dopa
REVERSE drib
NO_DATA etoh
NO_DATA fe2
NO_DATA fe3
NO_DATA fecrm
NO_DATA fol
NO_DATA for
FORWARD fru
REVERSE gal
NO_DATA gam
NO_DATA gbbtn
NO_DATA gchola
REVERSE glc_D
NO_DATA gln_L
REVERSE glu_L
REVERSE gly
NO_DATA glyasn
NO_DATA glyasp
NO_DATA glyb
NO_DATA glyc
NO_DATA glycys
NO_DATA glygln
NO_D

# Test code

In [26]:
sample = "Sample2"
with open("nboutput/test.lp", "w") as out:
    out.write(str(newmodels[sample].solver))
solution = newmodels[sample].optimize()
print(solution)

<Solution 3424.335 at 0x7fa0d35a23a0>
