In [1]:
from __future__ import print_function

import cobra
import cobra.test
# import mackinac
import numpy as np
import csv
import glob
import pickle
import pandas as pd
import time
import sys
from copy import deepcopy
from collections import defaultdict
from cobra.flux_analysis import gapfill
from cobra.flux_analysis import pfba
from cobra.flux_analysis.parsimonious import add_pfba
from cobra.flux_analysis import sample
from cobra.core.solution import get_solution

# Set default logger to python logger to avoid warnings given when adding reactions and/or metaboites 
# because "cobra.core.model" doesn't innately have a logger.
import logging
logging.basicConfig()
logger = logging.getLogger('logger')

In [2]:
def set_media(model, media, universal, verbose=False):

    # Find and close all exchange reactions in the model
    model_rxns = [rxn.id for rxn in model.reactions]
    for rxn in model_rxns:
        if rxn.startswith('EX_') and rxn.endswith('_e'):
            model.reactions.get_by_id(rxn).lower_bound = 0.0

    # Check for existence of exchange reactions for the media metabolites in the model
    for metabolite in media:
        met = metabolite[1]+'_e'
        if 'EX_'+met in model_rxns:
            model.reactions.get_by_id('EX_'+met).lower_bound = -1000.
        else:
            # Create exchange reaction and add to model
            if verbose:
                print("added exchange rxn for " + met)
            new_exchange = cobra.Reaction('EX_'+met)
            new_exchange.name = met + ' exchange'
            met_obj = universal.metabolites.get_by_id(met)
            new_exchange.add_metabolites({met_obj:-1})
            new_exchange.lower_bound = -1000.
            new_exchange.upper_bound = 1000.
            model.add_reaction(new_exchange)
            model.repair()

In [None]:
# Read in list of genome IDs
with open('../Data/20_species_1023_genomes.csv') as csvfile:
    genome_ids_list = []
    for line in csvfile:
        genome_ids_list.append(line.strip())
len(genome_ids_list)

In [None]:
# Initialize functions
# Initialize global variables
# Set media cpds
# Open model
# Add water transporter and fix name
# loop: Change media
    # turn off all exchanges
    # Turn on correct exchanges, add if missing
    
    # Loop: Set demand reaction
        # Check for production
        # Gapfill with probanno
        # Determine the reactions that were added to gapfill and identify the likelihoods
        # Optimize and find solution
        # Average likelihoods to create metric for media condition and specific demand reaction
        # save information in data structure


In [None]:
t = time.time()
counter = 0

universal = cobra.io.load_json_model("../Data/GramPosUni.json")
genome_id = '220668.9'
model = cobra.io.read_sbml_model('../gap_models/'+ genome_id +'.xml')
likelihoods = pickle.load(open('../likelihoods/'+ genome_id +'.probs'))

# Ensure free water exhange
model.reactions.get_by_id('rxn05319_c').name = "Water transport"
model.reactions.get_by_id('rxn05319_c').bounds = (-1000., 1000.)

# Create specific Media List
media_list = bsm + M9_sources
set_media(model, media_list, universal, verbose=False)

# Run through each amino acid to check for production
aa_like = {}
sys.stdout.write('Starting Loop')
for aa_list in aas:
    sys.stdout.write('\n'+ str(counter))
    aa = aa_list[1]+'_c'
    # Add Demand Reaction for metabolite
    metabolite = model.metabolites.get_by_id(aa)
    demand = model.add_boundary(metabolite, type='demand')
    model.objective = demand
    # Gapfill
    sys.stdout.write('...gapfilling...')
    gaps_to_fill = gapfill(model, universal, demand_reactions=False) # Update to probannopy gapfill function; use Gurobi
    # Fill the gaps
    rxns_to_add = []
    for gap in gaps_to_fill:
        model.add_reactions(gap) ### I NEED TO REMOVE THESE REACTIONS
    # Optimize with full pathway
    sys.stdout.write('optimizing...')
    solution = model.optimize()
    # Find reactions that carry flux and their likelihood score
    df = solution.fluxes.to_frame()
    active = df.loc[(abs(df['fluxes'])) > 0.1]
    like_list = []
    for rxn in list(active.index):
        if rxn.startswith('rxn'):
            try:
                like_list.append(likelihoods[rxn])
            except:
                pass
    avg_like = np.mean(like_list)
    sys.stdout.write('Average Likelihood of: ' + aa_list[1] + ' is ' + str(avg_like))
    aa_like[aa_list[1]] = avg_like
    model.remove_reactions([demand])
    # ADD REMOVAL OF GAPS
    counter += 1

elapsed = time.time() - t
print("Time to complete:" + str(elapsed/60) + "mins")

In [None]:
aa_like

In [None]:
# There are no demand or exchange reactions in the universal reaction bag
for rxn in universal.reactions:
    if rxn.id.startswith('DM'):
        print('DM in reaction bag')
    elif rxn.id.startswith('EX'):
        print('EX in reaction bag')

In [None]:
gaps_to_fill

In [None]:
print(likelihoods['rxn00898_c'])
print(likelihoods['rxn02186_c'])

In [None]:
all_likes = []
passed_rxns = []
counter = 0
pass_counter = 0
for rxn in model.reactions:
    if rxn.id.startswith('rxn'):
        try:
            all_likes.append(likelihoods[rxn.id])
            counter += 1
        except:
            passed_rxns.append(rxn.id)
            pass_counter += 1
            pass
global_avg = np.mean(all_likes)

print(global_avg)
print(counter)
print(pass_counter)

In [None]:
passed_rxns

In [None]:
print(model.reactions.get_by_id('rxn02374_c').name)
print(model.reactions.get_by_id('rxn05319_c').name)
print(model.reactions.get_by_id('rxn04457_c').name)
print(model.reactions.get_by_id('rxn02916_c').name)
print(model.reactions.get_by_id('rxn03012_c').name)
print(model.reactions.get_by_id('rxn10571_c').name)
print(model.reactions.get_by_id('rxn04132_c').name)
print(model.reactions.get_by_id('rxn05195_c').name)
print(model.reactions.get_by_id('rxn05468_c').name)
print(model.reactions.get_by_id('rxn04133_c').name)
print(model.reactions.get_by_id('rxn05467_c').name)
print(model.reactions.get_by_id('rxn12215_c').name)
print(model.reactions.get_by_id('rxn05522_c').name)
print(model.reactions.get_by_id('rxn08688_c').name)
print(model.reactions.get_by_id('rxn05238_c').name)
print(model.reactions.get_by_id('rxn02976_c').name)
print(model.reactions.get_by_id('rxn08764_c').name)
print(model.reactions.get_by_id('rxn13022_c').name)

In [None]:
print(model.reactions.get_by_id('rxn02374_c').name)
# print(model.reactions.get_by_id('rxn05319_c').name)
print(model.reactions.get_by_id('rxn04457_c').name)
print(model.reactions.get_by_id('rxn02916_c').name)
print(model.reactions.get_by_id('rxn03012_c').name)
# print(model.reactions.get_by_id('rxn10571_c').name)
print(model.reactions.get_by_id('rxn04132_c').name)
# print(model.reactions.get_by_id('rxn05195_c').name)
print(model.reactions.get_by_id('rxn05468_c').name)
print(model.reactions.get_by_id('rxn04133_c').name)
# print(model.reactions.get_by_id('rxn05467_c').name)
# print(model.reactions.get_by_id('rxn12215_c').name)
# print(model.reactions.get_by_id('rxn05522_c').name)
# print(model.reactions.get_by_id('rxn08688_c').name)
print(model.reactions.get_by_id('rxn05238_c').name)
print(model.reactions.get_by_id('rxn02976_c').name)
# print(model.reactions.get_by_id('rxn08764_c').name)
# print(model.reactions.get_by_id('rxn13022_c').name)

In [None]:
model.reactions.get_by_id('rxn02374_c')

In [None]:
model.reactions.get_by_id('rxn04457_c')

In [None]:
model.reactions.get_by_id('rxn02916_c')

In [None]:
model.reactions.get_by_id('rxn03012_c')

In [None]:
model.reactions.get_by_id('rxn04132_c')

In [None]:
model.reactions.get_by_id('rxn05468_c')

In [None]:
model.reactions.get_by_id('rxn04133_c')

In [None]:
model.reactions.get_by_id('rxn05238_c')

In [None]:
model.reactions.get_by_id('rxn02976_c')

In [None]:
# turn off O2 exchange
model.reactions.get_by_id('EX_cpd00007_e').lower_bound = 0.

In [3]:
# Basal Synthetic Media
bsm = [
    ['H+','cpd00067'],
    ['H2O','cpd00001'],
    ['CO2','cpd00011'],
    ['O2','cpd00007'],
    ['N2','cpd00528'], 
#     ['H2','cpd11640'], # Only with no O2
    
    ['K+','cpd00205'],
    ['Na+','cpd00971'],
    ['Mg','cpd00254'],
    ['Mn2+','cpd00030'],
    ['Fe2+','cpd10515'], # Iron ion in heme
    ['Ca2+','cpd00063'], # Calcium pantothenate;cpd19112
    
    ['Vitamin B12r','cpd00423'], # C62H91CoN13O14P : cobalamin;cpd03424;cpd00730 : not present in any exchange reactions
    ['Cobinamide','cpd03422'], #EXs : related to cobalamin (B12) Added to ensure cells have access to B12
    ['BIOT','cpd00104'], # C10H15N2O3S : biotin B7
    ['PAN','cpd00644'], # C9H16NO5 : Pantothenate B5
    ['Folate','cpd00393'], # C19H17N7O6 : B9
    ['Niacin','cpd00218'], # C6H4NO2 : B3
    ['Pyridoxal','cpd00215'], # C8H9NO3 : B6
    ['Riboflavin','cpd00220'], # C17H19N4O6 : B2
    ['thiamin','cpd00305'], # C12H17N4OS : B1
    
#     ['Phosphate','cpd00009'], # HO4P : In M9 Defaults
    
    ['Thioglycolate','cpd01415'], # C2H3O2S : not present in any exchange reactions
#     ['Sulfate','cpd00048'], # O4S : In M9 Defaults
    
    ['Acetate','cpd00029'], # C2H3O2 : not present in any exchange reactions
    ['Citrate','cpd00137'], # C6H5O7 : Consider removing. 
#     ['Polysorbate 60','cpd24450'], # C35H68O10 : Almost tween 80 : not present in any reactions
#     ['Ethyl acetate','cpd00633'], # C4H8O2 : not present in any exchange reactions, only present in one reaction at all
    
    ['ABEE','cpd00443'] # C7H6NO2 : aminobenzoate : not present in any exchange reactions
]

# Potentially add to BSM (from M9 media)
M9_ions = [
    ['Cl-','cpd00099'],
    ['Co2+','cpd00149'],
    ['Cu2+','cpd00058'],
    ['Fe3','cpd10516'],
#     ['Sodium molybdate','cpd11145'], # This doesn't connect to anything
    ['Ni2+','cpd00244'],
    ['Selenate','cpd03396'],
    ['Selenite','cpd03387'],
    ['Zn2+','cpd00034']
]

# Enviromental Metabolites with Exchange reactions
[
#     ['CO2','cpd00011'], #EXs : 
#     ['Ca2+','cpd00063'], #EXs : 
#     ['Cd2+','cpd01012'], #EXs : Removed because toxic
#     ['chromate','cpd11595'], #EXs : Removed because toxic
#     ['Cl-','cpd00099'], #EXs : 
#     ['Co2+','cpd00149'], #EXs : In M9
#     ['Cu2+','cpd00058'], #EXs : In M9
#     ['Fe2+','cpd10515'], #EXs : 
#     ['H+','cpd00067'], #EXs : 
#     ['H2','cpd11640'], #EXs : 
#     ['H2O','cpd00001'], #EXs : 
#     ['Hg2+','cpd00531'], #EXs : Removed because toxic
#     ['K+','cpd00205'], #EXs : 
#     ['Mg','cpd00254'], #EXs : 
#     ['Mn2+','cpd00030'], #EXs : 
#     ['Na+','cpd00971'], #EXs : 
#     ['Ni2+','cpd00244'], #EXs : In M9
#     ['O2','cpd00007'], #EXs : 
#     ['Pb','cpd04097'], #EXs : Removed because toxic
#     ['Zn2+','cpd00034'], #EXs : In M9
#     ['fe3','cpd10516'] #EXs : In M9
]

# M9 Base : https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4932939/
# [
#     ['Ca2+','cpd00063'],
#     ['Cl-','cpd00099'],
#     ['CO2','cpd00011'],
#     ['Co2+','cpd00149'],
#     ['Cu2+','cpd00058'],
#     ['Fe2+','cpd10515'],
#     ['Fe3','cpd10516'],
#     ['H+','cpd00067'],
#     ['H2O','cpd00001'],
#     ['K+','cpd00205'],
#     ['Mg','cpd00254'],
#     ['Mn2+','cpd00030'],
#     ['Sodium molybdate','cpd11145'],
#     ['Na+','cpd00971'],
#     ['Ni2+','cpd00244'],
#     ['Selenate','cpd03396'],
#     ['Selenite','cpd03387'],
#     ['Zn2+','cpd00034']
# ]

# M9 default carbon, nitrogen, phosphorous, and sulfur sources
M9_sources = [
    ['D-Glucose','cpd00027'],
    ['NH3','cpd00013'], # this is actually NH4 : ammonium
    ['Phosphate','cpd00009'],
    ['Sulfate','cpd00048']
]

# Vitamins
vit_k = [
#     ['BIOT','cpd00104'], #EXs : Biotin
#     ['Cobinamide','cpd03422'], #EXs : related to cobalamin (B12)
#     ['Folate','cpd00393'], #EXs : 
    ['Menaquinone 7','cpd11606'], #EXs : Vitamine K2 : Add when there is no O2
#     ['Niacin','cpd00218'], #EXs : 
#     ['PAN','cpd00644'], #EXs : Pantothenate
#     ['Pyridoxal','cpd00215'], #EXs : 
#     ['Riboflavin','cpd00220'], #EXs : 
#     ['Thiamin','cpd00305'] #EXs : 
]

# For aerobic simulations, O2 was added with a lower bound of −20 and to 0 for anaerobic simulations.

# DNA/RNA related metabolites
rna_bases = [
#     ['35ccmp','cpd00696'], #EXs : 
#     ['AMP','cpd00018'], #EXs : 
    ['Adenosine','cpd00182'], #EXs : In BSM (as adenine)
#     ['Adenosine 3-5-bisphosphate','cpd00045'], #EXs : 
    ['Cytosine','cpd00307'], #EXs : 
#     ['Deoxyadenosine','cpd00438'], #EXs : 
#     ['Deoxycytidine','cpd00654'], #EXs : 
#     ['Deoxyguanosine','cpd00277'], #EXs : In BSM
#     ['Deoxyinosine','cpd03279'], #EXs : 
#     ['Deoxyuridine','cpd00412'], #EXs : 
#     ['GMP','cpd00126'], #EXs : 
#     ['GTP','cpd00038'], #EXs : 
    ['Guanosine','cpd00311'], #EXs : In BSM (as Guanine)
#     ['Inosine','cpd00246'], #EXs : 
#     ['HYXN','cpd00226'], #EXs : Hypoxanthine
#     ['Nicotinamide ribonucleotide','cpd00355'], #EXs : 
#     ['TTP','cpd00357'], #EXs : Deoxythymidine triphosphate
    ['Thymidine','cpd00184'], #EXs : In BSM
#     ['Thyminose','cpd01242'], #EXs : deoxyribose
#     ['Uracil','cpd00092'], #EXs : 
    ['Uridine','cpd00249'], #EXs : In BSM (as uracil)
#     ['XAN','cpd00309'], #EXs : Xanthine
#     ['Xanthosine','cpd01217'], #EXs : 
#     ['dATP','cpd00115'], #EXs : 
#     ['dGTP','cpd00241'], #EXs : 
#     ['dTMP','cpd00298'] #EXs : 
]

# Check to see if these metabolites are used in pathways? Should I add some of these to media? 
# Yes for ATP, and GTP. (TTP, CTP as well?)

In [None]:
bsm + M9_sources + rna_bases

In [None]:
# Carbon Sources from all Exchanges and additional interesting sources
[
    ['4-Hydroxybenzoate','cpd00136'], #EXs : found in coconuts
    ['2-keto-3-deoxygluconate','cpd00176'], #EXs : degraded pectin product
    ['Amylotriose','cpd01262'], #EXs : 
    ['CELB','cpd00158'], #EXs : Cellobiose
    ['D-Fructose','cpd00082'], #EXs : 
    ['D-Glucose','cpd00027'], #EXs : 
    ['D-Mannitol','cpd00314'], #EXs : sweetener the is poorly absorbed in the gut
    ['D-Mannose','cpd00138'], #EXs : related to mucin
    ['Ribose','cpd00105'], #EXs : 
    ['Dextrin','cpd11594'], #EXs : 
    ['Dulcose','cpd01171'], #EXs : Galactitol
    ['GLCN','cpd00222'], #EXs : Gluconate 
    ['GLUM','cpd00276'], #EXs : Glucosamine
    ['Galactose','cpd00108'], #EXs : 
    ['L-Arabinose','cpd00224'], #EXs : 
    ['L-Inositol','cpd00121'], #EXs : 
    ['L-Lactate','cpd00159'], #EXs : 
    ['L-Malate','cpd00130'], #EXs : 
    ['Glycerol','cpd00100'], #EXs : 
    ['LACT','cpd00208'], #EXs : lactose
    ['Maltohexaose','cpd01329'], #EXs : 
    ['Maltose','cpd00179'], #EXs : 
    ['Melibiose','cpd03198'], #EXs :
    ['Palmitate','cpd00214'], #EXs : 
    ['Propionate','cpd00141'], #EXs : 
    ['Salicin','cpd01030'], #EXs : 
    ['Sorbitol','cpd00588'], #EXs : 
    ['Stachyose','cpd01133'], #EXs : 
    ['Succinate','cpd00036'], #EXs : 
    ['Sucrose','cpd00076'], #EXs : 
    ['TRHL','cpd00794'], #EXs : Trehalose
    ['Ursin','cpd03696'], #EXs : Arbutin
    ['Xylose','cpd00154'], #EXs : 
    ['hexadecenoate','cpd15237'] #EXs : 
]

# Nitrogen Sources
[
#     ['NH3','cpd00013'], #EXs : 
    ['Allantoin','cpd01092'], #EXs : degradation product of purines
    ['BET','cpd00540'], #EXs : Betaine
    ['Choline','cpd00098'], #EXs : Found in milk
    ['GABA','cpd00281'], #EXs : Could also be a carbon source
    ['Nitrate','cpd00209'], #EXs : 
    ['Nitrite','cpd00075'], #EXs : 
    ['Spermidine','cpd00264'], #EXs : 
    ['Urea','cpd00073'], #EXs : 
    ['crotonobetaine','cpd08305'] #EXs : 
]

# Sulfur Sources
[
    ['H2S2O3','cpd00268'], #EXs : Thiosulfate
    ['Isethionate','cpd03048'], #EXs : C2H5O4S
#     ['Sulfate','cpd00048'], #EXs : O4S
    ['Sulfite','cpd00081'], #EXs : HO3S
    ['Sulfoacetate','cpd09878'], #EXs : C2H2O5S
    ['ethanesulfonate','cpd11579'], #EXs : C2H5O3S
    ['methanesulfonate','cpd08023'] #EXs : CH3O3S
]

# Phosphorus Sources
[
    ['Phosphate','cpd00009'] #EX :
]

In [4]:
# Amino Acid related metabolites
aas = [
    ['D-Alanine','cpd00117'], #EXs : 
    ['D-Glutamate','cpd00186'], #EXs : 
    ['D-Methionine','cpd00637'], #EXs : 
    ['D-Serine','cpd00550'], #EXs : 
    ['Glycine','cpd00033'], #EXs : 1
    ['L-Alanine','cpd00035'], #EXs : 2
    ['L-Arginine','cpd00051'], #EXs : 3
    ['L-Asparagine','cpd00132'], #EXs : 4
    ['L-Aspartate','cpd00041'], #EXs : 5

    ['L-Cysteine','cpd00084'], #EXs : 7
    ['L-Glutamate','cpd00023'], #EXs : 8
    ['L-Glutamine','cpd00053'], #EXs : 9
    ['L-Histidine','cpd00119'], #EXs : 10
    ['L-Isoleucine','cpd00322'], #EXs : 11
    ['L-Leucine','cpd00107'], #EXs : 12
    ['L-Lysine','cpd00039'], #EXs : 13
    ['L-Methionine','cpd00060'], #EXs : 14
    ['L-Phenylalanine','cpd00066'], #EXs : 15
    ['L-Proline','cpd00129'], #EXs : 16
    ['L-Serine','cpd00054'], #EXs : 17
    ['L-Threonine','cpd00161'], #EXs : 18
    ['L-Tryptophan','cpd00065'], #EXs : 19
    ['L-Tyrosine','cpd00069'], #EXs : 20
    ['L-Valine','cpd00156'] #EXs : 21
]
# Explore leave one out with core amino acids. 

# Dimers, and other amino acid related mets
aa_related = [
    ['2-Oxoglutarate','cpd00024'], #EXs : 
    ['Ala-Gln','cpd11587'], #EXs : 
    ['Ala-His','cpd11584'], #EXs : 
    ['Ala-Leu','cpd11583'], #EXs : 
    ['ala-L-asp-L','cpd11593'], #EXs : 
    ['ala-L-glu-L','cpd11586'], #EXs : 
    ['ala-L-Thr-L','cpd11582'], #EXs : 
    ['Aminoethanol','cpd00162'], #EXs : Ethanolamine
    ['Carnitine','cpd00266'], #EXs : 
    ['Chorismate','cpd00216'], #EXs : 
    ['L-Cysteate','cpd00395'], #EXs : 
    ['Cys-Gly','cpd01017'], #EXs : 
    ['Gly-Cys','cpd15603'], #EXs : 
    ['Gly-Gln','cpd11580'], #EXs : 
    ['Gly-Leu','cpd15604'], #EXs : 
    ['Gly-Met','cpd11591'], #EXs : 
    ['Gly-Phe','cpd15605'], #EXs : 
    ['Gly-Tyr','cpd15606'], #EXs : 
    ['gly-asn-L','cpd11581'], #EXs : 
    ['gly-asp-L','cpd11589'], #EXs : 
    ['gly-glu-L','cpd11592'], #EXs : 
    ['gly-pro-L','cpd11588'], #EXs : 
    ['L-Methionine S-oxide','cpd01914'], #EXs :
    ['L-alanylglycine','cpd11585'], #EXs : 
    ['L-methionine R-oxide','cpd11576'], #EXs : 
    ['met-L-ala-L','cpd11590'], #EXs :
    ['S-Adenosyl-L-methionine','cpd00017'], #EXs : 
    ['S-Methyl-L-methionine','cpd02027'], #EXs : 
    ['S-Ribosylhomocysteine','cpd02227'], #EXs : 
    ['N-Acetyl-D-glucosamine','cpd00122'], #EXs : 
    ['N-Acetyl-D-mannosamine','cpd00492'], #EXs : 
    ['Ornithine','cpd00064'], #EXs : 
    ['Putrescine','cpd00118'], #EXs : 
    ['Taurine','cpd00210'], #EXs : 
    ['meso-2,6-Diaminopimelate','cpd00516'] #EXs : related to lysine
]

In [None]:
# Production
# H2O2 -- cpd00025
# Acetate -- cpd00029
# Butyrate -- cpd00211
# isobutyrate -- cpd01711
# GABA -- cpd00281
# ethanol -- cpd00363
# Propionate -- cpd00141
# formate -- cpd00047
# Valerate -- cpd00597
# Isovaleric acid -- cpd05178 (wrong eqn)
# sulforaphane -- 
# https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5541232/
# thiamin -- cpd00305
# Pyridoxal phosphate (B6) -- cpd00016
# BIOT (biotin, B7) -- cpd00104
# (CH3)3NO (TMAO) -- cpd00811
# Indole-3-(carb)aldehyde -- cpd05401
# https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4337795/
# Acetaldehyde -- cpd00071
# Deoxycholate -- cpd02733
# Chorismate -- cpd00216
# Hexanoate -- cpd01113
# 

# Consumption
# Galactose -- cpd00108; cpd01112
# L-galactose -- cpd01257
# lactose -- cpd00208
# beta-lactose -- cpd01354
# sucrose -- cpd00076
# trehalose (TRHL) -- cpd00794
# maltose -- cpd00179
# D-Mannose -- cpd00138
# D-Fructose -- cpd00082
# Inulin -- cpd27312
# ethanol -- cpd00363
# Carnitine -- cpd00266
# Citrate -- cpd00137
# GLUM (D-glucosamine) -- cpd00276
# 


In [None]:
# Using pFBA

t = time.time()
counter = 0

universal = cobra.io.load_json_model("../Data/GramPosUni.json")
genome_id = '220668.9'
model = cobra.io.read_sbml_model('../gap_models/'+ genome_id +'.xml')
likelihoods = pickle.load(open('../likelihoods/'+ genome_id +'.probs'))

# Ensure free water exhange
model.reactions.get_by_id('rxn05319_c').name = "Water transport"
model.reactions.get_by_id('rxn05319_c').bounds = (-1000., 1000.)

# Create specific Media List
media_list = bsm + M9_sources
set_media(model, media_list, universal, verbose=False)

# Run through each amino acid to check for production
aa_like = {}
sys.stdout.write('Starting Loop')
# for aa_list in aas[0:1]:

sys.stdout.write('\n'+ str(counter))
# aa = aa_list[1]+'_c'
aa = 'cpd00117_c'
# Add Demand Reaction for metabolite
metabolite = model.metabolites.get_by_id(aa)
demand = model.add_boundary(metabolite, type='demand')
model.objective = demand
# Gapfill
sys.stdout.write('...gapfilling...')
gaps_to_fill = gapfill(model, universal, demand_reactions=False) # Update to probannopy gapfill function; use Gurobi
# Fill the gaps
rxns_to_add = []
for gap in gaps_to_fill:
    model.add_reactions(gap)
# Optimize with full pathway
sys.stdout.write('optimizing...')
solution = pfba(model, objective = demand)
# Find reactions that carry flux
df = solution.fluxes.to_frame()
active = df.loc[(abs(df['fluxes'])) > 0.1]
# Add demand reactions for any metabolites produced, but not consumed

# Acquire likelihood scores for reactions that carry flux
like_list = []
rxns_w_likes = []
for rxn in list(active.index):
    if rxn.startswith('rxn'):
        try:
            like_list.append(likelihoods[rxn])
            rxns_w_likes.append(rxn)
        except:
            pass
avg_like = np.mean(like_list)
sys.stdout.write('Average Likelihood of: ' + aa_list[1] + ' is ' + str(avg_like))
aa_like[aa_list[1]] = avg_like
# Remove demand and filled gaps
model.remove_reactions([demand])
model.remove_reactions(gaps_to_fill[0]) # This might not work when multiple reactions are added... 

    ## Remove demands added to remove extra metabolites produced
    
counter += 1

elapsed = time.time() - t
print("\nTime to complete:" + str(elapsed/60) + "mins")
print('\n')
print(gaps_to_fill)
print(rxns_w_likes)
print(like_list)

In [None]:
print(gaps_to_fill)
print(rxns_w_likes)
print(like_list)
# print(likelihoods['rxn00904_c'])

In [None]:
like_list = []
rxns_w_likes = []
for rxn in list(active.index):
    if rxn.startswith('rxn'):
        try:
            like_list.append(likelihoods[rxn])
            rxns_w_likes.append(rxn)
        except:
            pass

In [None]:
all_mets = []
for rxn in list(active.index):
    if rxn.startswith('rxn'):
        print(rxn)
        rxn_obj = model.reactions.get_by_id(rxn)
        for met in rxn_obj.metabolites:
            if (met.id.endswith('_c')) & (met.id != aa):
                all_mets.append(met.id)
all_mets = set(all_mets)
all_mets

In [None]:
active

In [None]:
metabolite = model.metabolites.get_by_id(aa)
demand = model.add_boundary(metabolite, type='demand')
model.objective = demand

In [None]:
model.reactions.get_by_id('DM_cpd00117_c')

In [None]:
for met in all_mets:
    metabolite = model.metabolites.get_by_id(met)
    model.add_boundary(metabolite, type='demand')

In [None]:
len(model.boundary)

In [None]:
solution = pfba(model, objective = demand)
# Find reactions that carry flux
df = solution.fluxes.to_frame()
active = df.loc[(abs(df['fluxes'])) > 0.1]
# Add demand reactions for any metabolites produced, but not consumed

# Acquire likelihood scores for reactions that carry flux
like_list = []
rxns_w_likes = []
for rxn in list(active.index):
    if rxn.startswith('rxn'):
        try:
            like_list.append(likelihoods[rxn])
            rxns_w_likes.append(rxn)
        except:
            pass
avg_like = np.mean(like_list)
sys.stdout.write('Average Likelihood of: ' + aa + ' is ' + str(avg_like))
aa_like[aa_list[1]] = avg_like

active

In [None]:
print(rxns_w_likes)
print(like_list)

In [None]:
model.reactions.get_by_id('DM_cpd00067_c')

In [None]:
model.boundary

In [None]:
for met in all_mets:
    try:
        model.remove_reactions([model.reactions.get_by_id('DM_'+met)])
    except:
        print('skipped')
        print(met)

In [None]:
len(model.boundary)

In [None]:
model.remove_reactions([model.reactions.get_by_id('DM_cpd00102_c')])

In [None]:
[model.reactions.get_by_id('DM_cpd00102_c')]

In [None]:
like_list = []
rxns_w_likes = []
for rxn in list(active.index):
    if rxn.startswith('rxn'):
        try:
            like_list.append(likelihoods[rxn])
            rxns_w_likes.append(rxn)
        except:
            pass

In [None]:
print(likelihoods['rxn10481_c'])

In [None]:
model.remove_reactions(gaps_to_fill[0])

In [None]:
# model.solver = 'gurobi'

In [None]:
# Using pFBA with new media components (RNA bases + thymidine...)

t = time.time()
counter = 0

universal = cobra.io.load_json_model("../Data/GramPosUni.json")
genome_id = '220668.9'
model = cobra.io.read_sbml_model('../gap_models/'+ genome_id +'.xml')
likelihoods = pickle.load(open('../likelihoods/'+ genome_id +'.probs'))

# Ensure free water exhange
model.reactions.get_by_id('rxn05319_c').name = "Water transport"
model.reactions.get_by_id('rxn05319_c').bounds = (-1000., 1000.)

# Create specific Media List
media_list = bsm + M9_sources + rna_bases
set_media(model, media_list, universal, verbose=False)

# Run through each amino acid to check for production
aa_like = {}
sys.stdout.write('Starting Loop')
# for aa_list in aas[0:1]:

sys.stdout.write('\n'+ str(counter))
# aa = aa_list[1]+'_c'
aa = 'cpd00117_c'
# Add Demand Reaction for metabolite
metabolite = model.metabolites.get_by_id(aa)
demand = model.add_boundary(metabolite, type='demand')
model.objective = demand
# Gapfill
sys.stdout.write('...gapfilling...')
gaps_to_fill = gapfill(model, universal, demand_reactions=False) # Update to probannopy gapfill function; use Gurobi
# Fill the gaps
rxns_to_add = []
for gap in gaps_to_fill:
    model.add_reactions(gap)
# Optimize with full pathway
sys.stdout.write('optimizing...')
solution = pfba(model, objective = demand)
# Find reactions that carry flux
df = solution.fluxes.to_frame()
active = df.loc[(abs(df['fluxes'])) > 0.1]
# Add demand reactions for all metabolites, not used for optimization, simply there to stop blocked reactions
all_mets = []
for rxn in list(active.index):
    if rxn.startswith('rxn'):
#         print(rxn)
        rxn_obj = model.reactions.get_by_id(rxn)
        for met in rxn_obj.metabolites:
            if (met.id.endswith('_c')) & (met.id != aa):
                all_mets.append(met.id)
all_mets = set(all_mets)
all_mets

for met in all_mets:
    metabolite = model.metabolites.get_by_id(met)
    model.add_boundary(metabolite, type='demand')

# Optimize again with new model with additional demands for active metabolites
sys.stdout.write('optimizing_2...')
solution = pfba(model, objective = demand)
# Find reactions that carry flux
df = solution.fluxes.to_frame()
active = df.loc[(abs(df['fluxes'])) > 0.1]

# Acquire likelihood scores for reactions that carry flux
like_list = []
rxns_w_likes = []
for rxn in list(active.index):
    if rxn.startswith('rxn'):
        try:
            like_list.append(likelihoods[rxn])
            rxns_w_likes.append(rxn)
        except:
            pass
avg_like = np.mean(like_list)
sys.stdout.write('Average Likelihood of: ' + aa + ' is ' + str(avg_like)) # Change back to aa_list[1]
aa_like[aa] = avg_like # Change back to aa_list[1]
# Remove aa demand, filled gaps, and all other demands added to allow for free flowing flux
model.remove_reactions([demand])
model.remove_reactions(gaps_to_fill[0]) # This might not work when multiple reactions are added... 

for met in all_mets:
    try:
        model.remove_reactions([model.reactions.get_by_id('DM_'+met)])
    except:
        print('skipped')
        print(met)

counter += 1

elapsed = time.time() - t
print("\nTime to complete:" + str(elapsed/60) + "mins")
print('\n')
print(gaps_to_fill)
print(rxns_w_likes)
print(like_list)

In [None]:
active

In [None]:
# Using pFBA with new media components (RNA bases + thymidine...) 
# Remove reaction likelihoods of zero from model

t = time.time()
counter = 0

sys.stdout.write('Loading in models...')

universal = cobra.io.load_json_model("../Data/GramPosUni.json")
genome_id = '220668.9'
model = cobra.io.read_sbml_model('../gap_models/'+ genome_id +'.xml')
likelihoods = pickle.load(open('../likelihoods/'+ genome_id +'.probs'))

sys.stdout.write('Adding Water...')

# Ensure free water exchange
model.reactions.get_by_id('rxn05319_c').name = "Water transport"
model.reactions.get_by_id('rxn05319_c').bounds = (-1000., 1000.)

sys.stdout.write('Adjusting models...')

# Remove all reactions with zero likelihood and insert them into universal with GPRs
rxn_ids = [reaction.id for reaction in model.reactions]
rxn_id_zero_like = []
for rxn in rxn_ids:
    if rxn.startswith('rxn'):
        try:
            if likelihoods[rxn] == 0.0:
                rxn_id_zero_like.append(rxn)
        except:
            pass
        
rxn_objs = []
for rxn in rxn_id_zero_like:
    rxn_objs.append(model.reactions.get_by_id(rxn))
    
model.remove_reactions(rxn_objs)
universal.remove_reactions(rxn_id_zero_like)
universal.add_reactions(rxn_objs)

# Create specific Media List
media_list = bsm + M9_sources + rna_bases
set_media(model, media_list, universal, verbose=False)

# Run through each amino acid to check for production
aa_like = {}
sys.stdout.write('Starting Loop...')
# for aa_list in aas[0:1]:

sys.stdout.write('\n'+ str(counter))
# aa = aa_list[1]+'_c'
aa = 'cpd00117_c'
# Add Demand Reaction for metabolite
metabolite = model.metabolites.get_by_id(aa)
demand = model.add_boundary(metabolite, type='demand')
model.objective = demand

# Gapfill
sys.stdout.write('Gapfilling...')
gaps_to_fill = gapfill(model, universal, demand_reactions=False) # Update to probannopy gapfill function; use Gurobi
# Fill the gaps
rxns_to_add = []
for gap in gaps_to_fill:
    model.add_reactions(gap)
# Optimize with full pathway
sys.stdout.write('Optimizing...')
solution = pfba(model, objective = demand)
# Find reactions that carry flux
df = solution.fluxes.to_frame()
active = df.loc[(abs(df['fluxes'])) > 0.1]
# Add demand reactions for all metabolites, not used for optimization, simply there to stop blocked reactions
all_mets = []
for rxn in list(active.index):
    if rxn.startswith('rxn'):
#         print(rxn)
        rxn_obj = model.reactions.get_by_id(rxn)
        for met in rxn_obj.metabolites:
            if (met.id.endswith('_c')) & (met.id != aa):
                all_mets.append(met.id)
all_mets = set(all_mets)
all_mets

for met in all_mets:
    metabolite = model.metabolites.get_by_id(met)
    model.add_boundary(metabolite, type='demand')

# Optimize again with new model with additional demands for active metabolites
sys.stdout.write('Optimizing Again...')
solution = pfba(model, objective = demand)
# Find reactions that carry flux
df = solution.fluxes.to_frame()
active = df.loc[(abs(df['fluxes'])) > 0.1]

# Acquire likelihood scores for reactions that carry flux
like_list = []
rxns_w_likes = []
for rxn in list(active.index):
    if rxn.startswith('rxn'):
        try:
            like_list.append(likelihoods[rxn])
            rxns_w_likes.append(rxn)
        except:
            pass
avg_like = np.mean(like_list)
sys.stdout.write('Average Likelihood of: ' + aa + ' is ' + str(avg_like)) # Change back to aa_list[1]
aa_like[aa] = avg_like # Change back to aa_list[1]
# Remove aa demand, filled gaps, and all other demands added to allow for free flowing flux
model.remove_reactions([demand])
model.remove_reactions(gaps_to_fill[0]) # This might not work when multiple reactions are added... 

for met in all_mets:
    try:
        model.remove_reactions([model.reactions.get_by_id('DM_'+met)])
    except:
        print('skipped')
        print(met)

counter += 1

elapsed = time.time() - t
print("\nTime to complete:" + str(elapsed/60) + "mins")
print('\n')
print(gaps_to_fill)
print(rxns_w_likes)
print(like_list)

In [None]:
# Move forward even though copper transporters are back in the list; probannopy might fix this issue.
active

In [None]:
# Using pFBA with new media components (RNA bases + thymidine...) 
# Remove reaction likelihoods of zero from model
# Add demands for all metabolites in model to avoid any reactions being blocked

t = time.time()
counter = 0

sys.stdout.write('Loading in models...')

universal = cobra.io.load_json_model("../Data/GramPosUni.json")
genome_id = '220668.9'
model = cobra.io.read_sbml_model('../gap_models/'+ genome_id +'.xml')
likelihoods = pickle.load(open('../likelihoods/'+ genome_id +'.probs'))

sys.stdout.write('Adding Water...')

# Ensure free diffusion of water
model.reactions.get_by_id('rxn05319_c').name = "Water transport"
model.reactions.get_by_id('rxn05319_c').bounds = (-1000., 1000.)

sys.stdout.write('Adjusting models...')

# Remove all reactions with less than 0.1 likelihood and insert them into universal with GPRs
rxn_ids = [reaction.id for reaction in model.reactions]
rxn_id_zero_like = []
for rxn in rxn_ids:
    if rxn.startswith('rxn'):
        try:
            if likelihoods[rxn] < 0.1:
                rxn_id_zero_like.append(rxn)
        except:
            pass

rxn_objs = []
for rxn in rxn_id_zero_like:
    rxn_objs.append(model.reactions.get_by_id(rxn))

model.remove_reactions(rxn_objs)
universal.remove_reactions(rxn_id_zero_like)
universal.add_reactions(rxn_objs)

# Add GPRs from likelihood dict to universal reactions that are still missing them
rxn_ids = [reaction.id for reaction in universal.reactions]
rxns_to_fix = []
for rxn in rxn_ids:
    if rxn.startswith('rxn'):
        try: # This catches all of the reactions that have no likelihood value.
            if likelihoods[rxn] > 0.0:
                if universal.reactions.get_by_id(rxn).gene_reaction_rule == '':
                    if likelihoods.data[rxn]['gpr'] != '':
                        rxns_to_fix.append(rxn)
                        universal.reactions.get_by_id(rxn).gene_reaction_rule = likelihoods.data[rxn]['gpr']
        except:
            pass

# Add demand for all metabolites in Universal model to stop blocked reactions
all_mets = []
for met in universal.metabolites:
    if (met.id.endswith('_c')): # & (met.id != aa):
        model.add_boundary(met, type='demand')

total_dataset_dict = {}
carb_idx = 0
nit_idx = 0
product_idx = 0
carbon = 'D-Glucose'
nitrogen = 'NH3'
        
### Double For loop to set media

# Create specific Media List
media_list = bsm + M9_sources + rna_bases # + nitrogen + carbon
set_media(model, media_list, universal, verbose=False)

# Run through each amino acid to check for production
aa_like = {}
sys.stdout.write('Starting Loop...')
for aa_list in aas[0:10]:

    sys.stdout.write('\n'+ 'Loop' + str(counter) + ' ')
    aa = aa_list[1]+'_c'
    product_idx += 1 #Keep track to which product is being maximized
    product = aa_list[0]
    # aa = 'cpd00117_c'

    # Add Demand Reaction for metabolite of interest and set to be objective
    demand = model.reactions.get_by_id('DM_'+ aa) # demand
    model.objective = demand

    # Gapfill with probanno
    sys.stdout.write('Gapfilling...')
    # model.solver = 'gurobi'
    # gaps_to_fill = probabilistic_gapfill(model, universal, likelihoods, dm_rxns=False, ex_rxns=False)
    gaps_to_fill = gapfill(model, universal, demand_reactions=False) # Update to probannopy gapfill function; use Gurobi
    # Fill the gaps
    rxns_to_add = []
    for gap in gaps_to_fill:
        model.add_reactions(gap)

    # Optimize with full pathway
    sys.stdout.write('Optimizing...')
    solution = pfba(model, objective = demand)
    # Find reactions that carry flux
    df = solution.fluxes.to_frame()
    active = df.loc[(abs(df['fluxes'])) > 0.1]
    
    # Acquire likelihood scores for reactions that carry flux
    flux_rxns = []
    like_list = []
    for rxn in list(active.index):
        if rxn.startswith('rxn'):
            try:
                flux_rxns.append([str(rxn),likelihoods[str(rxn)]])
                like_list.append(likelihoods[str(rxn)])
            except:
                pass
    avg_like = np.mean(like_list)
    sys.stdout.write('Ave likelihood of: ' + aa_list[1] + ' is ' + str(avg_like)) 
#     aa_like[aa] = avg_like # Change back to aa_list[1]
    # Remove aa demand, filled gaps
    model.remove_reactions([demand])
    model.remove_reactions(gaps_to_fill[0]) # This might not work when multiple reactions are added... 

    counter += 1

    report_dict = {}

    report_dict['Model_ID'] = genome_id
    report_dict['Carbon'] = carbon
    report_dict['Nitrogen'] = nitrogen
    report_dict['objective'] = product
    report_dict['avg_path_like'] = avg_like
    report_dict['gaps_filled'] = gaps_to_fill[0]
    report_dict['reactions_w_flux'] = flux_rxns
    report_dict['active_rxns'] = active
    
    report_dict_ID = genome_id + ':' + str(carb_idx) + '.' + str(nit_idx) + '.' + str(product_idx)
    total_dataset_dict[report_dict_ID] = report_dict
    
    elapsed = time.time() - t
    sys.stdout.write('Run time: ' + str(elapsed/60) + " [mins]")

file_name = "../metabolic_output/%s.data" % (genome_id)
pickle.dump(total_dataset_dict, open(file_name, "wb"))

elapsed = time.time() - t
print("\nTime to complete: " + str(elapsed/60) + " [mins]")
# print('\n')
# print(gaps_to_fill)
# # print(rxns_w_likes)
# # print(like_list)
# print(flux_rxns)

In [None]:
report_dict = {}

report_dict['Model_ID'] = genome_id
report_dict['Carbon'] = nitrogen
report_dict['Nitrogen'] = carbon
report_dict['objective'] = aa
report_dict['avg_path_like'] = avg_like
report_dict['gaps_filled'] = gaps_to_fill[0]
report_dict['reactions_w_flux'] = flux_rxns
report_dict['active_rxns'] = active

total_dataset_dict = {}

i = 1
j = 1
k = 1

total_dataset_dict[genome_id + ':' + str(i) + '.' + str(j) + '.' + str(k)] = report_dict

In [None]:
active

In [None]:
# for met in all_mets:
#     try:
#         model.remove_reactions([model.reactions.get_by_id('DM_'+met)])
#     except:
#         print('skipped')
#         print(met)

# Spontaneous diffusion based reactions
# diff_rxns = findDiffusionRxns(universal)

# Add dummy GPRs to rest of the reactions that should have genes so pFBA treats them all the same.
# rxns_with_tempGPR = []
# for rxn in universal.reactions:
#     if rxn.id.startswith('rxn') and rxn.id not in diff_rxns and rxn.gene_reaction_rule == '':
#         universal.reactions.get_by_id(rxn.id).gene_reaction_rule = 'Temp_GPR'
#         rxns_with_tempGPR.append(rxn.id)

In [None]:
# Very fast and efficient gap filling function
def iterative_pFBA_GapFill(model, bag, tasks=None, task_lb=0.01, obj=None, iters=100, add_exchanges=True, extracellular='Extracellular'):
    '''
    Function that utilizes iterations of pFBA solution with a universal reaction bag 
    in order to gapfill a model.
    
    Parameters
    ----------
    model : cobra.Model
        Model to be gapfilled
    bag : cobra.Model
        Reaction bag to gapfill from
    obj : string
        Reaction ID for objective function in model to be gapfilled.
    tasks : list or None
        List of reactions IDs (strings) of metabolic tasks 
        to set a minimum lower bound for. 
    lb : float
        Lower bound for objective and any metabolic tasks
    iters : int
        Number of gapfilling rounds. Unique reactions from each round are 
        saved and the union is added simulatneously to the model.
    add_exchanges : bool
        Identifies extracellular metabolites added during gapfilling that
        are not associated with exchange reactions and creates them
    extracellular : string
        Label for extracellular compartment of model
    '''
    start_time = time.time()
    
    # Save some basic network info for downstream membership testing
    orig_rxns = list(copy.deepcopy(model.reactions))
    orig_rxn_ids = set([str(x.id) for x in model.reactions])
    orig_cpd_ids = set([str(y.id) for y in model.metabolites])
    univ_rnx_ids = set([str(z.id) for z in bag.reactions])
    
    # Find overlap in model and reaction bag
    overlap_rxn_ids = univ_rnx_ids.intersection(orig_rxn_ids)
    
    # Get model objective reaction ID
    if obj == None:
        obj = get_objective(model)
    else:
        obj = obj
    
    # Modify universal reaction bag
    new_rxn_ids = set()
    with bag as universal:

        # Remove overlapping reactions from universal bag, and reset objective if needed
        for rxn in overlap_rxn_ids: 
            universal.reactions.get_by_id(rxn).remove_from_model()
    
        # Add pFBA to universal model and add model reactions
        add_pfba(universal)
        universal.add_reactions(orig_rxns)
        universal.reactions.get_by_id(obj).lower_bound = task_lb
    
        # Set metabolic tasks that must carry flux in gapfilled solution
        if tasks != None:
            for task in tasks:                    
                universal.reactions.get_by_id(task).lower_bound = task_lb
                
        # Optimize and run flux sampling
        universal = copy.deepcopy(universal) # reset solver
        solution = universal.optimize()
        print('Sampling ' + str(iters) + ' flux distributions...')
        flux_samples = sample(universal, iters)
        rnxs = list(flux_samples.columns)
         
        # Assess the sampled flux distributions
        for distribution in flux_samples.iterrows():
            for flux in range(0, len(list(distribution[1]))):
                if list(distribution[1])[flux] > 1e-6: 
                    new_rxn_ids |= set([rnxs[flux]]).difference(orig_rxn_ids)
    
    # Get reactions and metabolites to be added to the model
    new_rxns = copy.deepcopy([bag.reactions.get_by_id(rxn) for rxn in new_rxn_ids])
    new_cpd_ids = set()
    for rxn in new_rxns: new_cpd_ids |= set([str(x.id) for x in list(rxn.metabolites)])
    new_cpd_ids = new_cpd_ids.difference(orig_cpd_ids)
    new_cpds = copy.deepcopy([bag.reactions.get_by_id(cpd) for rxn in new_cpd_ids])
    
    # Copy model and gapfill 
    new_model = copy.deepcopy(model)
    new_model.add_metabolites(new_cpds)
    new_model.add_reactions(new_rxns)
    
#     # Identify extracellular metabolites with no exchanges
#     if add_exchanges == True:
#         new_exchanges = extend_exchanges(new_model, new_cpd_ids, extracellular)
#         if len(new_exchanges) > 0: 
#             print('Identified and filled ' + str(len(new_exchanges)) + ' missing exchange reactions.')
#             new_rxn_ids |= new_exchanges
    
#     duration = int(round(time.time() - start_time))
#     print('Took ' + str(duration) + ' seconds to gapfill ' + str(len(new_rxn_ids)) + ' reactions and ' + str(len(new_cpd_ids)) + ' metabolites.') 
    
    return new_model, new_rxn_ids


# # Adds missing exchanges for extracellulart metbaolites
# def extend_exchanges(model, cpd_ids, ex):
    
#     model_exchanges = set(find_boundary_types(model, 'exchange', external_compartment=ex))
#     new_ex_ids = set()
    
#     for cpd in cpd_ids:
#         cpd = model.metabolites.get_by_id(cpd)
#         if cpd.compartment != extracellular:
#             continue
#         else:
#             if bool(set(cpd.reactions) & model_exchanges) == False:
#                 try:
#                     new_id = 'EX_' + cpd.id
#                     model.add_boundary(cpd, type='exchange', reaction_id=new_id, lb=-1000.0, ub=1000.0)
#                     new_ex_ids |= set([new_id])
#                 except ValueError:
#                     pass
                
#     return new_ex_ids


# Returns the reaction ID of the objective reaction
def get_objective(model):
    
    if len(list(model.objective.variables)) == 0:
        raise IndexError('Model has no objective set.')
    
    expression = str(model.objective.expression).split()
    if 'reverse' in expression[0]:
        obj_id = expression[2].split('*')[-1]
    else:
        obj_id = expression[0].split('*')[-1]
            
    return obj_id

In [None]:
# Using pFBA with new media components (RNA bases + thymidine...) 
# Remove reaction likelihoods of zero from model
# Add demands for all metabolites in model to avoid any reactions being blocked
# Use thresholded likelihoods to penalize reactions with no likelihood using pFBA gapfill

t = time.time()
counter = 0

sys.stdout.write('Loading in models...')

universal_orig = cobra.io.load_json_model("../Data/GramPosUni.json")
universal = copy.deepcopy(universal_orig)
genome_id = '220668.9'
model = cobra.io.read_sbml_model('../gap_models/'+ genome_id +'.xml')
likelihoods = pickle.load(open('../likelihoods/'+ genome_id +'.probs'))

sys.stdout.write('Adding Water...')

# Ensure free diffusion of water
model.reactions.get_by_id('rxn05319_c').name = "Water transport"
model.reactions.get_by_id('rxn05319_c').bounds = (-1000., 1000.)

sys.stdout.write('Removing Model Rxns...')
# Add all model reactions to Universal model
model_reactions = copy.deepcopy(model.reactions)
model_reactions_to_remove = []
for rxn in model.reactions:
    if rxn.id in set([reaction.id for reaction in universal.reactions]):
        model_reactions_to_remove.append(rxn)
universal.remove_reactions([reaction.id for reaction in model_reactions_to_remove])
universal.add_reactions(model_reactions)

sys.stdout.write('Add Demands...')
# Add demand for all metabolites in Universal model to stop blocked reactions
all_mets = []
for met in universal.metabolites:
    if (met.id.endswith('_c')):
        universal.add_boundary(met, type='demand')

sys.stdout.write('Add Missing GPRs...')
# Add GPRs from likelihood dict to universal reactions that are still missing them
rxn_ids = [reaction.id for reaction in universal.reactions]
rxns_to_fix = []
for rxn in rxn_ids:
    if rxn.startswith('rxn'):
        try: # This catches all of the reactions that have no likelihood value.
            if likelihoods[rxn] > 0.0:
                if universal.reactions.get_by_id(rxn).gene_reaction_rule == '':
                    if likelihoods.data[rxn]['gpr'] != '':
                        rxns_to_fix.append(rxn)
                        universal.reactions.get_by_id(rxn).gene_reaction_rule = likelihoods.data[rxn]['gpr']
        except:
            pass

sys.stdout.write('Remove High Likelihood rxns...')
# Remove reactions that have greater than 0.1 likelihood from universal
rxns_w_like = []
for rxn in universal.reactions:
    if rxn.id.startswith('rxn'):
        try:
            if likelihoods[rxn.id] >= 0.1:
                rxns_w_like.append(rxn)
        except:
            pass

# rxns_w_like = copy.deepcopy(rxns_w_like)
universal.remove_reactions(rxns_w_like)

sys.stdout.write('Add pFBA...')
# Add pFBA to universal model
add_pfba(universal)

# Add likely reactions back into Universal so their flux is not minimized
universal.add_reactions(rxns_w_like)

total_dataset_dict = {}
carb_idx = 0
nit_idx = 0
product_idx = 0
carbon = 'D-Glucose'
nitrogen = 'NH3'

### Double For-loop to set media

# Create specific Media List
media_list = bsm + M9_sources + rna_bases # + nitrogen + carbon
set_media(universal, media_list, universal_orig, verbose=False)

# Run through each amino acid to check for production
aa_like = {}
sys.stdout.write('Starting Loop...')
for aa_list in aas[0:10]:

    sys.stdout.write('\n'+ 'Loop' + str(counter) + ' ')
    aa = aa_list[1]+'_c'
    product = aa_list[0]

    task_lb = 100
    obj = 'DM_'+ aa
    universal.reactions.get_by_id(obj).lower_bound = task_lb

    # Optimize and run flux sampling
    sys.stdout.write('Optimizing...')
    solution = universal.optimize()

    # Reset Objective lower bound for next loop
    universal.reactions.get_by_id(obj).lower_bound = 0.0

    # Optimize with full pathway
    sys.stdout.write('Constructing Dict...')
    # Find reactions that carry flux
    df = solution.fluxes.to_frame()
    active = df.loc[(abs(df['fluxes'])) > 0.1]

    # Acquire likelihood scores for reactions that carry flux
    flux_rxns = []
    like_list = []
    for rxn in list(active.index):
        if rxn.startswith('rxn'):
            try:
                flux_rxns.append([str(rxn),likelihoods[str(rxn)]])
                like_list.append(likelihoods[str(rxn)])
            except:
                pass
    avg_like = np.mean(like_list)
    sys.stdout.write('Ave likelihood of: ' + aa + ' is ' + str(avg_like)) 
#     aa_like[aa] = avg_like # Change back to aa_list[1]

    counter += 1

    report_dict = {}

    report_dict['Model_ID'] = genome_id
    report_dict['Carbon'] = carbon
    report_dict['Nitrogen'] = nitrogen
    report_dict['objective'] = product
    report_dict['avg_path_like'] = avg_like
#     report_dict['gaps_filled'] = gaps_to_fill[0]
    report_dict['reactions_w_flux'] = flux_rxns
    report_dict['active_rxns'] = active

    report_dict_ID = genome_id + ':' + str(carb_idx) + '.' + str(nit_idx) + '.' + str(product_idx)
    total_dataset_dict[report_dict_ID] = report_dict
    product_idx += 1 #Keep track to which product is being maximized

    elapsed = time.time() - t
    sys.stdout.write('Run time: ' + str(elapsed/60) + " [mins]")

file_name = "../metabolic_output/%s.data" % (genome_id)
pickle.dump(total_dataset_dict, open(file_name, "wb"))

elapsed = time.time() - t
print("\nTime to complete: " + str(elapsed/60) + " [mins]")

In [None]:
total_dataset_dict

In [None]:
rxns_w_like

In [None]:
universal.objective.expression

In [20]:
import re
import requests
from cobra.core.model import copy

def probabilistic_gapfill(model, universal_model, reaction_probabilities, clean_exchange_rxns=True, default_penalties=None, dm_rxns=False, ex_rxns=False, **solver_parameters):
    """
    Gapfill a model using probabilistic weights
    :param default_penalties:
    :param model: cobra Model object, the model to be gapfilled
    :param universal_model: cobra Model object representing the database of reactions to choose from
    :param reaction_probabilities: reaction_probabilities dictionary
    :return:
    """
    universal_model = universal_model.copy()
    model = clean_exchange_reactions(model) if clean_exchange_rxns else model.copy()
    if default_penalties is None:
        default_penalties = {'Universal': 1, 'Exchange': 100, 'Demand': 1, 'Reverse': 75}
    penalties = default_penalties
    reactions_to_remove = []
    for r in universal_model.reactions:
        if model.reactions.has_id(r.id):
            reactions_to_remove.append(r)
            penalties[r.id] = 0  # In the model
        elif r.id in reaction_probabilities:
            penalties[r.id] = max(0, 1 - reaction_probabilities[r.id]) * (penalties[r.id] if r.id in penalties else 1)
    universal_model.remove_reactions(reactions_to_remove)
    return cobra.flux_analysis.gapfill(model, universal_model, penalties=penalties, demand_reactions=dm_rxns, exchange_reactions=ex_rxns, **solver_parameters)


def clean_exchange_reactions(model, regex='.*_e([0-9]*)$'):
    model = model.copy()
    compound_regex = re.compile(regex)
    mets_to_clean = [m for m in model.metabolites if compound_regex.match(m.id)]
    for m in mets_to_clean:
        m.remove_from_model()
    return model



In [None]:
# Using pFBA with new media components (RNA bases + thymidine...) 
# Remove reaction likelihoods of zero from model
# Add demands for all metabolites in model to avoid any reactions being blocked
# Use thresholded likelihoods to penalize reactions with no likelihood using pFBA gapfill
# Use pFBA answer + 0 likelihood reactions from reconstructed model + higher likelihood reactions to probanno gapfill

t = time.time()
counter = 0

sys.stdout.write('Loading in models...')

universal = cobra.io.load_json_model("../Data/GramPosUni.json")
genome_id = '220668.9'
model = cobra.io.read_sbml_model('../gap_models/'+ genome_id +'.xml')
likelihoods = pickle.load(open('../likelihoods/'+ genome_id +'.probs'))

sys.stdout.write('Adding Water...')

# Ensure free diffusion of water
model.reactions.get_by_id('rxn05319_c').name = "Water transport"
model.reactions.get_by_id('rxn05319_c').bounds = (-1000., 1000.)

sys.stdout.write('Set-up Universal...')

### Set up Universal
# Add all model reactions to Universal model
model_reactions = copy.deepcopy(model.reactions)
model_reactions_to_remove = []
for rxn in model.reactions:
    if rxn.id in set([reaction.id for reaction in universal.reactions]):
        model_reactions_to_remove.append(rxn)
universal.remove_reactions([reaction.id for reaction in model_reactions_to_remove])
universal.add_reactions(model_reactions)

# Add GPRs from likelihood dict to universal reactions that are still missing them
rxn_ids = [reaction.id for reaction in universal.reactions]
rxns_to_fix = []
for rxn in rxn_ids:
    if rxn.startswith('rxn'):
        try: # This catches all of the reactions that have no likelihood value.
            if likelihoods[rxn] > 0.0:
                if universal.reactions.get_by_id(rxn).gene_reaction_rule == '':
                    if likelihoods.data[rxn]['gpr'] != '':
                        rxns_to_fix.append(rxn)
                        universal.reactions.get_by_id(rxn).gene_reaction_rule = likelihoods.data[rxn]['gpr']
        except:
            pass

# Add demand for all metabolites in Universal model to stop blocked reactions
all_mets = []
for met in universal.metabolites:
    if (met.id.endswith('_c')):
        universal.add_boundary(met, type='demand')

### Set Up Model: remove low likelihood reactions
sys.stdout.write('Set-up Model...')
low_like_model = []
for rxn in model.reactions:
    if rxn.id.startswith('rxn'):
        try:
            if likelihoods[rxn.id] <= 0.1:
                low_like_model.append(rxn.id)
        except:
            pass
model_rxns_to_remove = [model.reactions.get_by_id(rxn) for rxn in low_like_model]
model.remove_reactions(model_rxns_to_remove)

### Set Up Bag: Leave only low-likelihood-model reactions, and high-likelihood-non-model reactions for now
sys.stdout.write('Set-up Bag...')

# Make deepcopy of universal for bag to process later
bag = copy.deepcopy(universal)

# Find reaction IDs for the high-likelihood reactions only in the universal model
high_like_non_model = []
for rxn in universal.reactions:
    if rxn.id.startswith('rxn') and rxn.id not in [reaction.id for reaction in model.reactions]:
        try:
            if likelihoods[rxn.id] >= 0.1:
                high_like_non_model.append(rxn.id)
        except:
            pass

uni_rxn_ids = [reaction.id for reaction in universal.reactions]
rxns_to_remove = set(uni_rxn_ids).difference(set(high_like_non_model).union(set(low_like_model)))

sys.stdout.write('Trimming Bag...')
bag.remove_reactions([bag.reactions.get_by_id(rxn) for rxn in rxns_to_remove])

# Save Models
sys.stdout.write('Saving...')
cobra.io.save_json_model(universal, "universal.json")
cobra.io.save_json_model(model, "model.json")
cobra.io.save_json_model(bag, "bag.json")

elapsed = time.time() - t
print("\nTime to complete: " + str(elapsed/60) + " [mins]")

In [None]:
t = time.time()

sys.stdout.write('Loading Models...')
universal = cobra.io.load_json_model('universal.json')
model = cobra.io.load_json_model('model.json')
bag = cobra.io.load_json_model('bag.json')
genome_id = '220668.9'
likelihoods = pickle.load(open('../likelihoods/'+ genome_id +'.probs'))

counter = 0

total_dataset_dict = {}
carb_idx = 0
nit_idx = 0
product_idx = 0
carbon = 'D-Glucose'
nitrogen = 'NH3'

### Double For-loop to set media

# Create specific Media List
media_list = bsm + M9_sources + rna_bases # + nitrogen + carbon
set_media(model, media_list, universal, verbose=False)
set_media(universal, media_list, universal, verbose=False)

# Run through each amino acid to check for production
aa_like = {}
sys.stdout.write('Starting Loop...')
for aa_list in aas[0:10]:
    
    sys.stdout.write('\n'+ 'Loop' + str(counter) + ' ')
    aa = aa_list[1]+'_c'
    product = aa_list[0]
    
#     task_lb = 100
    obj = 'DM_'+ aa
    universal.objective = universal.reactions.get_by_id(obj)
    
    # Optimize universal with FBA to get possible reactions
    sys.stdout.write('Optimizing...')
    solution = universal.optimize()
    sys.stdout.write(str(round(universal.slim_optimize())) + '...')
    df = solution.fluxes.to_frame()
    active = df.loc[(abs(df['fluxes'])) > 0.1]
    
    # Add solution space to bag
    sys.stdout.write('Add to bag...')
    rxns_to_add_to_bag = []
    for rxn in active.index:
        if rxn not in [reaction.id for reaction in bag.reactions]:
            rxns_to_add_to_bag.append(universal.reactions.get_by_id(rxn))
    rxns_to_add_to_bag_copy = deepcopy(rxns_to_add_to_bag)
    
    bag.add_reactions(rxns_to_add_to_bag_copy)
    
    sys.stdout.write('Bag size'+ len(bag.reactions) +'...')

    # Add Demand Reaction for metabolite of interest and set to be objective
    sys.stdout.write('Add demand to model...')
    if aa not in [met.id for met in model.metabolites]:
        metabolite = deepcopy(universal.metabolite.get_by_id(aa))
        model.add_metabolites([metabolite])
    else:
        metabolite = model.metabolites.get_by_id(aa)
    demand = model.add_boundary(metabolite, type='demand')
#     model.add_reactions([demand])
    model.objective = demand
    
    # Gapfill with probanno
    sys.stdout.write('Gapfilling...')
    # model.solver = 'gurobi'
#     gaps_to_fill = probabilistic_gapfill(model, bag, likelihoods, dm_rxns=False, ex_rxns=False)
#     gaps_to_fill = gapfill(model, bag, demand_reactions=False) # Update to probannopy gapfill function; use Gurobi
    
    with bag as bag:
        default_penalties = {'Universal': 1, 'Exchange': 100, 'Demand': 1, 'Reverse': 75}
        penalties = default_penalties
        reactions_to_remove = []
        for r in bag.reactions:
            if model.reactions.has_id(r.id):
                reactions_to_remove.append(r)
                penalties[r.id] = 0  # In the model
            elif r.id in likelihoods:
                penalties[r.id] = max(0, 1 - likelihoods[r.id]) * (penalties[r.id] if r.id in penalties else 1)
            else:
                penalties[r.id] = 1
        bag.remove_reactions(reactions_to_remove)
        gaps_to_fill = gapfill(model, bag, penalties=penalties, demand_reactions=False)
    
    # Fill the gaps
    rxns_to_add = []
    for gap in gaps_to_fill:
        model.add_reactions(gap)

    # Optimize with filled pathway
    sys.stdout.write('pFBA...')
    solution = pfba(model, objective = demand)
    sys.stdout.write(str(round(model.slim_optimize())) + '...')

    sys.stdout.write('Constructing Dict...')
    # Find reactions that carry flux
    df = solution.fluxes.to_frame()
    active = df.loc[(abs(df['fluxes'])) > 0.1]

    # Acquire likelihood scores for reactions that carry flux
    flux_rxns = []
    like_list = []
    for rxn in list(active.index):
        if rxn.startswith('rxn'):
            try:
                flux_rxns.append([str(rxn),likelihoods[str(rxn)]])
                like_list.append(likelihoods[str(rxn)])
            except:
                pass
    avg_like = np.mean(like_list)
    sys.stdout.write('Ave likelihood of: ' + aa + ' is ' + str(avg_like)) 

    counter += 1

    report_dict = {}

    report_dict['Model_ID'] = genome_id
    report_dict['Carbon'] = carbon
    report_dict['Nitrogen'] = nitrogen
    report_dict['objective'] = product
    report_dict['avg_path_like'] = avg_like
    report_dict['gaps_filled'] = gaps_to_fill[0]
    report_dict['reactions_w_flux'] = flux_rxns
    report_dict['active_rxns'] = active

    report_dict_ID = genome_id + ':' + str(carb_idx) + '.' + str(nit_idx) + '.' + str(product_idx)
    total_dataset_dict[report_dict_ID] = report_dict
    product_idx += 1 #Keep track to which product is being maximized

    # Remove reactions to reset
    sys.stdout.write('Resetting...')
    model.remove_reactions([demand])
    model.remove_reactions(gaps_to_fill[0])
    bag.remove_reactions(rxns_to_add_to_bag_copy)
    
    elapsed = time.time() - t
    sys.stdout.write('Run time: ' + str(elapsed/60) + " [mins]")

file_name = "../metabolic_output/%s.data" % (genome_id)
pickle.dump(total_dataset_dict, open(file_name, "wb"))

elapsed = time.time() - t
print("\nTime to complete: " + str(elapsed/60) + " [mins]")

Loading Models...Starting Loop...
Loop0 Optimizing...1000.0...Add to bag...Add demand to model...Gapfilling...pFBA...1000.0...Constructing Dict...Ave likelihood of: cpd00117_c is 0.4601774975211276Resetting...Run time: 0.43443018198 [mins]
Loop1 Optimizing...1000.0...Add to bag...Add demand to model...Gapfilling...