In [62]:
from __future__ import print_function

import cobra
import cobra.test
# import mackinac
import numpy as np
import csv
import glob
import pickle
import pandas as pd
import time
import sys
from collections import defaultdict
from cobra.flux_analysis import gapfill
from cobra.flux_analysis import pfba

# Set default logger to python logger to avoid warnings given when adding reactions and/or metaboites 
# because "cobra.core.model" doesn't innately have a logger.
import logging
logging.basicConfig()
logger = logging.getLogger('logger')

In [2]:
def set_media(model, media, universal, verbose=False):

    # Find and close all exchange reactions in the model
    model_rxns = [rxn.id for rxn in model.reactions]
    for rxn in model_rxns:
        if rxn.startswith('EX_') and rxn.endswith('_e'):
            model.reactions.get_by_id(rxn).lower_bound = 0.0

    # Check for existence of exchange reactions for the media metabolites in the model
    for metabolite in media:
        met = metabolite[1]+'_e'
        if 'EX_'+met in model_rxns:
            model.reactions.get_by_id('EX_'+met).lower_bound = -1000.
        else:
            # Create exchange reaction and add to model
            if verbose:
                print("added exchange rxn for " + met)
            new_exchange = cobra.Reaction('EX_'+met)
            new_exchange.name = met + ' exchange'
            met_obj = universal.metabolites.get_by_id(met)
            new_exchange.add_metabolites({met_obj:-1})
            new_exchange.lower_bound = -1000.
            new_exchange.upper_bound = 1000.
            model.add_reaction(new_exchange)
            model.repair()

In [None]:
# Read in list of genome IDs
with open('../Data/20_species_1023_genomes.csv') as csvfile:
    genome_ids_list = []
    for line in csvfile:
        genome_ids_list.append(line.strip())
len(genome_ids_list)

In [None]:
# Initialize functions
# Initialize global variables
# Set media cpds
# Open model
# Add water transporter and fix name
# loop: Change media
    # turn off all exchanges
    # Turn on correct exchanges, add if missing
    
    # Loop: Set demand reaction
        # Check for production
        # Gapfill with probanno
        # Determine the reactions that were added to gapfill and identify the likelihoods
        # Optimize and find solution
        # Average likelihoods to create metric for media condition and specific demand reaction
        # save information in data structure


In [18]:
t = time.time()
counter = 0

universal = cobra.io.load_json_model("../Data/GramPosUni.json")
genome_id = '220668.9'
model = cobra.io.read_sbml_model('../gap_models/'+ genome_id +'.xml')
likelihoods = pickle.load(open('../likelihoods/'+ genome_id +'.probs'))

# Ensure free water exhange
model.reactions.get_by_id('rxn05319_c').name = "Water transport"
model.reactions.get_by_id('rxn05319_c').bounds = (-1000., 1000.)

# Create specific Media List
media_list = bsm + M9_sources
set_media(model, media_list, universal, verbose=False)

# Run through each amino acid to check for production
aa_like = {}
sys.stdout.write('Starting Loop')
for aa_list in aas:
    sys.stdout.write('\n'+ str(counter))
    aa = aa_list[1]+'_c'
    # Add Demand Reaction for metabolite
    metabolite = model.metabolites.get_by_id(aa)
    demand = model.add_boundary(metabolite, type='demand')
    model.objective = demand
    # Gapfill
    sys.stdout.write('...gapfilling...')
    gaps_to_fill = gapfill(model, universal, demand_reactions=False) # Update to probannopy gapfill function; use Gurobi
    # Fill the gaps
    rxns_to_add = []
    for gap in gaps_to_fill:
        model.add_reactions(gap) ### I NEED TO REMOVE THESE REACTIONS
    # Optimize with full pathway
    sys.stdout.write('optimizing...')
    solution = model.optimize()
    # Find reactions that carry flux and their likelihood score
    df = solution.fluxes.to_frame()
    active = df.loc[(abs(df['fluxes'])) > 0.1]
    like_list = []
    for rxn in list(active.index):
        if rxn.startswith('rxn'):
            try:
                like_list.append(likelihoods[rxn])
            except:
                pass
    avg_like = np.mean(like_list)
    sys.stdout.write('Average Likelihood of: ' + aa_list[1] + ' is ' + str(avg_like))
    aa_like[aa_list[1]] = avg_like
    model.remove_reactions([demand])
    # ADD REMOVAL OF GAPS
    counter += 1

elapsed = time.time() - t
print("Time to complete:" + str(elapsed/60) + "mins")

Starting Loop
0...gapfilling...optimizing...Average Likelihood of: cpd00117 is 0.32239418829432986
1...gapfilling...optimizing...Average Likelihood of: cpd00186 is 0.40811404675637747
2...gapfilling...optimizing...Average Likelihood of: cpd00637 is 0.3539055920939708
3...gapfilling...optimizing...Average Likelihood of: cpd00550 is 0.3876222618910973
4...gapfilling...optimizing...Average Likelihood of: cpd00033 is 0.25864420133992655
5...gapfilling...optimizing...Average Likelihood of: cpd00035 is 0.2617434799782508
6...gapfilling...optimizing...Average Likelihood of: cpd00051 is 0.30615169412786564
7...gapfilling...optimizing...Average Likelihood of: cpd00132 is 0.37908217357377966
8...gapfilling...optimizing...Average Likelihood of: cpd00041 is 0.3919271330103651
9...gapfilling...

No handlers could be found for logger "cobra.core.model"


optimizing...Average Likelihood of: cpd00084 is 0.40759115411391483
10...gapfilling...optimizing...Average Likelihood of: cpd00023 is 0.3770658617850227
11...gapfilling...optimizing...Average Likelihood of: cpd00053 is 0.24679149221825047
12...gapfilling...optimizing...Average Likelihood of: cpd00119 is 0.34746531593017904
13...gapfilling...optimizing...Average Likelihood of: cpd00322 is 0.3048953761712908
14...gapfilling...optimizing...Average Likelihood of: cpd00107 is 0.35867506074632505
15...gapfilling...optimizing...Average Likelihood of: cpd00039 is 0.38470369962458417
16...gapfilling...optimizing...Average Likelihood of: cpd00060 is 0.3625340412746876
17...gapfilling...optimizing...Average Likelihood of: cpd00066 is 0.37888939227146257
18...gapfilling...optimizing...Average Likelihood of: cpd00129 is 0.3553058212741118
19...gapfilling...optimizing...Average Likelihood of: cpd00054 is 0.2855063409092786
20...gapfilling...optimizing...Average Likelihood of: cpd00161 is 0.396418896

In [23]:
aa_like

{'cpd00023': 0.3770658617850227,
 'cpd00033': 0.25864420133992655,
 'cpd00035': 0.2617434799782508,
 'cpd00039': 0.38470369962458417,
 'cpd00041': 0.3919271330103651,
 'cpd00051': 0.30615169412786564,
 'cpd00053': 0.24679149221825047,
 'cpd00054': 0.2855063409092786,
 'cpd00060': 0.3625340412746876,
 'cpd00065': 0.38664620586239773,
 'cpd00066': 0.37888939227146257,
 'cpd00069': 0.3976185204163214,
 'cpd00084': 0.40759115411391483,
 'cpd00107': 0.35867506074632505,
 'cpd00117': 0.32239418829432986,
 'cpd00119': 0.34746531593017904,
 'cpd00129': 0.3553058212741118,
 'cpd00132': 0.37908217357377966,
 'cpd00156': 0.317618970729169,
 'cpd00161': 0.396418896014934,
 'cpd00186': 0.40811404675637747,
 'cpd00322': 0.3048953761712908,
 'cpd00550': 0.3876222618910973,
 'cpd00637': 0.3539055920939708}

In [22]:
# There are no demand or exchange reactions in the universal reaction bag
for rxn in universal.reactions:
    if rxn.id.startswith('DM'):
        print('DM in reaction bag')
    elif rxn.id.startswith('EX'):
        print('EX in reaction bag')

In [25]:
gaps_to_fill

[[<Reaction rxn00898_c at 0x7eff87546350>,
  <Reaction rxn02186_c at 0x7eff87546410>]]

In [28]:
print(likelihoods['rxn00898_c'])
print(likelihoods['rxn02186_c'])

0.0
0.0


In [33]:
all_likes = []
passed_rxns = []
counter = 0
pass_counter = 0
for rxn in model.reactions:
    if rxn.id.startswith('rxn'):
        try:
            all_likes.append(likelihoods[rxn.id])
            counter += 1
        except:
            passed_rxns.append(rxn.id)
            pass_counter += 1
            pass
global_avg = np.mean(all_likes)

print(global_avg)
print(counter)
print(pass_counter)

0.2733090489303243
975
18


In [34]:
passed_rxns

['rxn02374_c',
 'rxn05319_c',
 'rxn04457_c',
 'rxn02916_c',
 'rxn03012_c',
 'rxn10571_c',
 'rxn04132_c',
 'rxn05195_c',
 'rxn05468_c',
 'rxn04133_c',
 'rxn05467_c',
 u'rxn12215_c',
 u'rxn05522_c',
 u'rxn08688_c',
 u'rxn05238_c',
 u'rxn02976_c',
 u'rxn08764_c',
 u'rxn13022_c']

In [43]:
print(model.reactions.get_by_id('rxn02374_c').name)
print(model.reactions.get_by_id('rxn05319_c').name)
print(model.reactions.get_by_id('rxn04457_c').name)
print(model.reactions.get_by_id('rxn02916_c').name)
print(model.reactions.get_by_id('rxn03012_c').name)
print(model.reactions.get_by_id('rxn10571_c').name)
print(model.reactions.get_by_id('rxn04132_c').name)
print(model.reactions.get_by_id('rxn05195_c').name)
print(model.reactions.get_by_id('rxn05468_c').name)
print(model.reactions.get_by_id('rxn04133_c').name)
print(model.reactions.get_by_id('rxn05467_c').name)
print(model.reactions.get_by_id('rxn12215_c').name)
print(model.reactions.get_by_id('rxn05522_c').name)
print(model.reactions.get_by_id('rxn08688_c').name)
print(model.reactions.get_by_id('rxn05238_c').name)
print(model.reactions.get_by_id('rxn02976_c').name)
print(model.reactions.get_by_id('rxn08764_c').name)
print(model.reactions.get_by_id('rxn13022_c').name)

R03314
Water transport
R06605
R04175
R04336
Mg2+-importing ATPase
R06063
ATP phosphohydrolase (ferric-ion-transporting)
TRANS-RXNAVI-26568.ce
R06064
CO2 transporter via diffusion
5-methyltetrahydropteroyltri-l-glutamate synthesis
citrate transport in via Co complex
Hydrogen sulfide oxidation
TRANS-RXNBWI-115353.ce
R04269
ketol-acid reductoisomerase (2-Acetolactate)
3-isopropylmalate dehydrogenase


In [44]:
print(model.reactions.get_by_id('rxn02374_c').name)
# print(model.reactions.get_by_id('rxn05319_c').name)
print(model.reactions.get_by_id('rxn04457_c').name)
print(model.reactions.get_by_id('rxn02916_c').name)
print(model.reactions.get_by_id('rxn03012_c').name)
# print(model.reactions.get_by_id('rxn10571_c').name)
print(model.reactions.get_by_id('rxn04132_c').name)
# print(model.reactions.get_by_id('rxn05195_c').name)
print(model.reactions.get_by_id('rxn05468_c').name)
print(model.reactions.get_by_id('rxn04133_c').name)
# print(model.reactions.get_by_id('rxn05467_c').name)
# print(model.reactions.get_by_id('rxn12215_c').name)
# print(model.reactions.get_by_id('rxn05522_c').name)
# print(model.reactions.get_by_id('rxn08688_c').name)
print(model.reactions.get_by_id('rxn05238_c').name)
print(model.reactions.get_by_id('rxn02976_c').name)
# print(model.reactions.get_by_id('rxn08764_c').name)
# print(model.reactions.get_by_id('rxn13022_c').name)

R03314
R06605
R04175
R04336
R06063
TRANS-RXNAVI-26568.ce
R06064
TRANS-RXNBWI-115353.ce
R04269


In [58]:
model.reactions.get_by_id('rxn02374_c')

0,1
Reaction identifier,rxn02374_c
Name,R03314
Memory address,0x07effa8ae0810
Stoichiometry,cpd00858_c --> cpd00001_c + cpd00067_c + cpd02431_c  L-Glutamate5-semialdehyde --> H2O + H+ + 1-Pyrroline-5-carboxylate
GPR,
Lower bound,0.0
Upper bound,1000.0


In [57]:
model.reactions.get_by_id('rxn04457_c')

0,1
Reaction identifier,rxn04457_c
Name,R06605
Memory address,0x07effa4ffb350
Stoichiometry,"cpd09027_c --> cpd00011_c + cpd01567_c  5-Hydroxy-2-oxo-4-ureido-2,5-dihydro-1H-imidazole-5-carboxylate --> CO2 + (R)-Allantoin"
GPR,
Lower bound,0.0
Upper bound,1000.0


In [47]:
model.reactions.get_by_id('rxn02916_c')

0,1
Reaction identifier,rxn02916_c
Name,R04175
Memory address,0x07effa913bd50
Stoichiometry,cpd02074_c --> cpd00001_c + cpd00067_c + cpd00922_c  2-Oxo-6-aminocaproate --> H2O + H+ + delta1-Piperideine-2-carboxylate
GPR,
Lower bound,0.0
Upper bound,1000.0


In [48]:
model.reactions.get_by_id('rxn03012_c')

0,1
Reaction identifier,rxn03012_c
Name,R04336
Memory address,0x07effa94f68d0
Stoichiometry,cpd02414_c --> cpd00001_c + cpd00067_c + cpd02465_c  L-2-Amino-6-oxopimelate --> H2O + H+ + tetrahydrodipicolinate
GPR,
Lower bound,0.0
Upper bound,1000.0


In [49]:
model.reactions.get_by_id('rxn04132_c')

0,1
Reaction identifier,rxn04132_c
Name,R06063
Memory address,0x07effa8c2a8d0
Stoichiometry,cpd00001_c + cpd08625_c --> cpd00011_c + cpd00067_c + cpd01567_c  H2O + 5-Hydroxyisourate --> CO2 + H+ + (R)-Allantoin
GPR,
Lower bound,0.0
Upper bound,1000.0


In [50]:
model.reactions.get_by_id('rxn05468_c')

0,1
Reaction identifier,rxn05468_c
Name,TRANS-RXNAVI-26568.ce
Memory address,0x07effab166150
Stoichiometry,cpd00007_e <=> cpd00007_c  O2 <=> O2
GPR,
Lower bound,-1000.0
Upper bound,1000.0


In [51]:
model.reactions.get_by_id('rxn04133_c')

0,1
Reaction identifier,rxn04133_c
Name,R06064
Memory address,0x07effaabbc5d0
Stoichiometry,cpd00001_c + cpd08625_c --> cpd00011_c + cpd00067_c + cpd01092_c  H2O + 5-Hydroxyisourate --> CO2 + H+ + Allantoin
GPR,
Lower bound,0.0
Upper bound,1000.0


In [52]:
model.reactions.get_by_id('rxn05238_c')

0,1
Reaction identifier,rxn05238_c
Name,TRANS-RXNBWI-115353.ce
Memory address,0x07effa33fb090
Stoichiometry,cpd00048_e <=> cpd00048_c  Sulfate <=> Sulfate
GPR,
Lower bound,-1000.0
Upper bound,1000.0


In [53]:
model.reactions.get_by_id('rxn02976_c')

0,1
Reaction identifier,rxn02976_c
Name,R04269
Memory address,0x07eff9658de90
Stoichiometry,cpd00024_c + cpd02273_c <=> cpd00023_c + cpd03591_c  2-Oxoglutarate + L-threo-3-Methylaspartate <=> L-Glutamate + Methyloxaloacetate
GPR,
Lower bound,-1000.0
Upper bound,1000.0


In [None]:
# turn off O2 exchange
model.reactions.get_by_id('EX_cpd00007_e').lower_bound = 0.

In [8]:
# Basal Synthetic Media
bsm = [
    ['H+','cpd00067'],
    ['H2O','cpd00001'],
    ['CO2','cpd00011'],
    ['O2','cpd00007'],
    ['N2','cpd00528'], 
#     ['H2','cpd11640'], # Only with no O2
    
    ['K+','cpd00205'],
    ['Na+','cpd00971'],
    ['Mg','cpd00254'],
    ['Mn2+','cpd00030'],
    ['Fe2+','cpd10515'], # Iron ion in heme
    ['Ca2+','cpd00063'], # Calcium pantothenate;cpd19112
    
    ['Vitamin B12r','cpd00423'], # C62H91CoN13O14P : cobalamin;cpd03424;cpd00730 : not present in any exchange reactions
    ['Cobinamide','cpd03422'], #EXs : related to cobalamin (B12) Added to ensure cells have access to B12
    ['BIOT','cpd00104'], # C10H15N2O3S : biotin B7
    ['PAN','cpd00644'], # C9H16NO5 : Pantothenate B5
    ['Folate','cpd00393'], # C19H17N7O6 : B9
    ['Niacin','cpd00218'], # C6H4NO2 : B3
    ['Pyridoxal','cpd00215'], # C8H9NO3 : B6
    ['Riboflavin','cpd00220'], # C17H19N4O6 : B2
    ['thiamin','cpd00305'], # C12H17N4OS : B1
    
#     ['Phosphate','cpd00009'], # HO4P : In M9 Defaults
    
    ['Thioglycolate','cpd01415'], # C2H3O2S : not present in any exchange reactions
#     ['Sulfate','cpd00048'], # O4S : In M9 Defaults
    
    ['Acetate','cpd00029'], # C2H3O2 : not present in any exchange reactions
    ['Citrate','cpd00137'], # C6H5O7 : Consider removing. 
#     ['Polysorbate 60','cpd24450'], # C35H68O10 : Almost tween 80 : not present in any reactions
#     ['Ethyl acetate','cpd00633'], # C4H8O2 : not present in any exchange reactions, only present in one reaction at all
    
    ['ABEE','cpd00443'] # C7H6NO2 : aminobenzoate : not present in any exchange reactions
]

# Potentially add to BSM (from M9 media)
M9_ions = [
    ['Cl-','cpd00099'],
    ['Co2+','cpd00149'],
    ['Cu2+','cpd00058'],
    ['Fe3','cpd10516'],
#     ['Sodium molybdate','cpd11145'], # This doesn't connect to anything
    ['Ni2+','cpd00244'],
    ['Selenate','cpd03396'],
    ['Selenite','cpd03387'],
    ['Zn2+','cpd00034']
]

# Enviromental Metabolites with Exchange reactions
[
#     ['CO2','cpd00011'], #EXs : 
#     ['Ca2+','cpd00063'], #EXs : 
#     ['Cd2+','cpd01012'], #EXs : Removed because toxic
#     ['chromate','cpd11595'], #EXs : Removed because toxic
#     ['Cl-','cpd00099'], #EXs : 
#     ['Co2+','cpd00149'], #EXs : In M9
#     ['Cu2+','cpd00058'], #EXs : In M9
#     ['Fe2+','cpd10515'], #EXs : 
#     ['H+','cpd00067'], #EXs : 
#     ['H2','cpd11640'], #EXs : 
#     ['H2O','cpd00001'], #EXs : 
#     ['Hg2+','cpd00531'], #EXs : Removed because toxic
#     ['K+','cpd00205'], #EXs : 
#     ['Mg','cpd00254'], #EXs : 
#     ['Mn2+','cpd00030'], #EXs : 
#     ['Na+','cpd00971'], #EXs : 
#     ['Ni2+','cpd00244'], #EXs : In M9
#     ['O2','cpd00007'], #EXs : 
#     ['Pb','cpd04097'], #EXs : Removed because toxic
#     ['Zn2+','cpd00034'], #EXs : In M9
#     ['fe3','cpd10516'] #EXs : In M9
]

# M9 Base : https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4932939/
# [
#     ['Ca2+','cpd00063'],
#     ['Cl-','cpd00099'],
#     ['CO2','cpd00011'],
#     ['Co2+','cpd00149'],
#     ['Cu2+','cpd00058'],
#     ['Fe2+','cpd10515'],
#     ['Fe3','cpd10516'],
#     ['H+','cpd00067'],
#     ['H2O','cpd00001'],
#     ['K+','cpd00205'],
#     ['Mg','cpd00254'],
#     ['Mn2+','cpd00030'],
#     ['Sodium molybdate','cpd11145'],
#     ['Na+','cpd00971'],
#     ['Ni2+','cpd00244'],
#     ['Selenate','cpd03396'],
#     ['Selenite','cpd03387'],
#     ['Zn2+','cpd00034']
# ]

# M9 default carbon, nitrogen, phosphorous, and sulfur sources
M9_sources = [
    ['D-Glucose','cpd00027'],
    ['NH3','cpd00013'], # this is actually NH4 : ammonium
    ['Phosphate','cpd00009'],
    ['Sulfate','cpd00048']
]

# Vitamins
vit_k = [
#     ['BIOT','cpd00104'], #EXs : Biotin
#     ['Cobinamide','cpd03422'], #EXs : related to cobalamin (B12)
#     ['Folate','cpd00393'], #EXs : 
    ['Menaquinone 7','cpd11606'], #EXs : Vitamine K2 : Add when there is no O2
#     ['Niacin','cpd00218'], #EXs : 
#     ['PAN','cpd00644'], #EXs : Pantothenate
#     ['Pyridoxal','cpd00215'], #EXs : 
#     ['Riboflavin','cpd00220'], #EXs : 
#     ['Thiamin','cpd00305'] #EXs : 
]

# For aerobic simulations, O2 was added with a lower bound of −20 and to 0 for anaerobic simulations.

In [61]:
bsm + M9_sources

[['H+', 'cpd00067'],
 ['H2O', 'cpd00001'],
 ['CO2', 'cpd00011'],
 ['O2', 'cpd00007'],
 ['N2', 'cpd00528'],
 ['K+', 'cpd00205'],
 ['Na+', 'cpd00971'],
 ['Mg', 'cpd00254'],
 ['Mn2+', 'cpd00030'],
 ['Fe2+', 'cpd10515'],
 ['Ca2+', 'cpd00063'],
 ['Vitamin B12r', 'cpd00423'],
 ['Cobinamide', 'cpd03422'],
 ['BIOT', 'cpd00104'],
 ['PAN', 'cpd00644'],
 ['Folate', 'cpd00393'],
 ['Niacin', 'cpd00218'],
 ['Pyridoxal', 'cpd00215'],
 ['Riboflavin', 'cpd00220'],
 ['thiamin', 'cpd00305'],
 ['Thioglycolate', 'cpd01415'],
 ['Acetate', 'cpd00029'],
 ['Citrate', 'cpd00137'],
 ['ABEE', 'cpd00443'],
 ['D-Glucose', 'cpd00027'],
 ['NH3', 'cpd00013'],
 ['Phosphate', 'cpd00009'],
 ['Sulfate', 'cpd00048']]

In [None]:
# Carbon Sources from all Exchanges and additional interesting sources
[
    ['4-Hydroxybenzoate','cpd00136'], #EXs : found in coconuts
    ['2-keto-3-deoxygluconate','cpd00176'], #EXs : degraded pectin product
    ['Amylotriose','cpd01262'], #EXs : 
    ['CELB','cpd00158'], #EXs : Cellobiose
    ['D-Fructose','cpd00082'], #EXs : 
    ['D-Glucose','cpd00027'], #EXs : 
    ['D-Mannitol','cpd00314'], #EXs : sweetener the is poorly absorbed in the gut
    ['D-Mannose','cpd00138'], #EXs : related to mucin
    ['Ribose','cpd00105'], #EXs : 
    ['Dextrin','cpd11594'], #EXs : 
    ['Dulcose','cpd01171'], #EXs : Galactitol
    ['GLCN','cpd00222'], #EXs : Gluconate 
    ['GLUM','cpd00276'], #EXs : Glucosamine
    ['Galactose','cpd00108'], #EXs : 
    ['L-Arabinose','cpd00224'], #EXs : 
    ['L-Inositol','cpd00121'], #EXs : 
    ['L-Lactate','cpd00159'], #EXs : 
    ['L-Malate','cpd00130'], #EXs : 
    ['Glycerol','cpd00100'], #EXs : 
    ['LACT','cpd00208'], #EXs : lactose
    ['Maltohexaose','cpd01329'], #EXs : 
    ['Maltose','cpd00179'], #EXs : 
    ['Melibiose','cpd03198'], #EXs : 
    ['Palmitate','cpd00214'], #EXs : 
    ['Propionate','cpd00141'], #EXs : 
    ['Salicin','cpd01030'], #EXs : 
    ['Sorbitol','cpd00588'], #EXs : 
    ['Stachyose','cpd01133'], #EXs : 
    ['Succinate','cpd00036'], #EXs : 
    ['Sucrose','cpd00076'], #EXs : 
    ['TRHL','cpd00794'], #EXs : Trehalose
    ['Ursin','cpd03696'], #EXs : Arbutin
    ['Xylose','cpd00154'], #EXs : 
    ['hexadecenoate','cpd15237'] #EXs : 
]

# Nitrogen Sources
[
#     ['NH3','cpd00013'], #EXs : 
    ['Allantoin','cpd01092'], #EXs : degradation product of purines
    ['BET','cpd00540'], #EXs : Betaine
    ['Choline','cpd00098'], #EXs : Found in milk
    ['GABA','cpd00281'], #EXs : Could also be a carbon source
    ['Nitrate','cpd00209'], #EXs : 
    ['Nitrite','cpd00075'], #EXs : 
    ['Spermidine','cpd00264'], #EXs : 
    ['Urea','cpd00073'], #EXs : 
    ['crotonobetaine','cpd08305'] #EXs : 
]

# Sulfur Sources
[
    ['H2S2O3','cpd00268'], #EXs : Thiosulfate
    ['Isethionate','cpd03048'], #EXs : C2H5O4S
#     ['Sulfate','cpd00048'], #EXs : O4S
    ['Sulfite','cpd00081'], #EXs : HO3S
    ['Sulfoacetate','cpd09878'], #EXs : C2H2O5S
    ['ethanesulfonate','cpd11579'], #EXs : C2H5O3S
    ['methanesulfonate','cpd08023'] #EXs : CH3O3S
]

# Phosphorus Sources
[
    ['Phosphate','cpd00009'] #EX :
]

In [7]:
# Amino Acid related metabolites
aas = [
    ['D-Alanine','cpd00117'], #EXs : 
    ['D-Glutamate','cpd00186'], #EXs : 
    ['D-Methionine','cpd00637'], #EXs : 
    ['D-Serine','cpd00550'], #EXs : 
    ['Glycine','cpd00033'], #EXs : 1
    ['L-Alanine','cpd00035'], #EXs : 2
    ['L-Arginine','cpd00051'], #EXs : 3
    ['L-Asparagine','cpd00132'], #EXs : 4
    ['L-Aspartate','cpd00041'], #EXs : 5

    ['L-Cysteine','cpd00084'], #EXs : 7
    ['L-Glutamate','cpd00023'], #EXs : 8
    ['L-Glutamine','cpd00053'], #EXs : 9
    ['L-Histidine','cpd00119'], #EXs : 10
    ['L-Isoleucine','cpd00322'], #EXs : 11
    ['L-Leucine','cpd00107'], #EXs : 12
    ['L-Lysine','cpd00039'], #EXs : 13
    ['L-Methionine','cpd00060'], #EXs : 14
    ['L-Phenylalanine','cpd00066'], #EXs : 15
    ['L-Proline','cpd00129'], #EXs : 16
    ['L-Serine','cpd00054'], #EXs : 17
    ['L-Threonine','cpd00161'], #EXs : 18
    ['L-Tryptophan','cpd00065'], #EXs : 19
    ['L-Tyrosine','cpd00069'], #EXs : 20
    ['L-Valine','cpd00156'] #EXs : 21
]
# Explore leave one out with core amino acids. 

# Dimers, and other amino acid related mets
aa_related = [
    ['2-Oxoglutarate','cpd00024'], #EXs : 
    ['Ala-Gln','cpd11587'], #EXs : 
    ['Ala-His','cpd11584'], #EXs : 
    ['Ala-Leu','cpd11583'], #EXs : 
    ['ala-L-asp-L','cpd11593'], #EXs : 
    ['ala-L-glu-L','cpd11586'], #EXs : 
    ['ala-L-Thr-L','cpd11582'], #EXs : 
    ['Aminoethanol','cpd00162'], #EXs : Ethanolamine
    ['Carnitine','cpd00266'], #EXs : 
    ['Chorismate','cpd00216'], #EXs : 
    ['L-Cysteate','cpd00395'], #EXs : 
    ['Cys-Gly','cpd01017'], #EXs : 
    ['Gly-Cys','cpd15603'], #EXs : 
    ['Gly-Gln','cpd11580'], #EXs : 
    ['Gly-Leu','cpd15604'], #EXs : 
    ['Gly-Met','cpd11591'], #EXs : 
    ['Gly-Phe','cpd15605'], #EXs : 
    ['Gly-Tyr','cpd15606'], #EXs : 
    ['gly-asn-L','cpd11581'], #EXs : 
    ['gly-asp-L','cpd11589'], #EXs : 
    ['gly-glu-L','cpd11592'], #EXs : 
    ['gly-pro-L','cpd11588'], #EXs : 
    ['L-Methionine S-oxide','cpd01914'], #EXs :
    ['L-alanylglycine','cpd11585'], #EXs : 
    ['L-methionine R-oxide','cpd11576'], #EXs : 
    ['met-L-ala-L','cpd11590'], #EXs :
    ['S-Adenosyl-L-methionine','cpd00017'], #EXs : 
    ['S-Methyl-L-methionine','cpd02027'], #EXs : 
    ['S-Ribosylhomocysteine','cpd02227'], #EXs : 
    ['N-Acetyl-D-glucosamine','cpd00122'], #EXs : 
    ['N-Acetyl-D-mannosamine','cpd00492'], #EXs : 
    ['Ornithine','cpd00064'], #EXs : 
    ['Putrescine','cpd00118'], #EXs : 
    ['Taurine','cpd00210'], #EXs : 
    ['meso-2,6-Diaminopimelate','cpd00516'] #EXs : related to lysine
]

In [None]:
# DNA/RNA related metabolites
[
    ['35ccmp','cpd00696'], #EXs : 
    ['AMP','cpd00018'], #EXs : 
    ['Adenosine','cpd00182'], #EXs : 
    ['Adenosine 3-5-bisphosphate','cpd00045'], #EXs : 
    ['Cytosine','cpd00307'], #EXs : 
    ['Deoxyadenosine','cpd00438'], #EXs : 
    ['Deoxycytidine','cpd00654'], #EXs : 
    ['Deoxyguanosine','cpd00277'], #EXs : 
    ['Deoxyinosine','cpd03279'], #EXs : 
    ['Deoxyuridine','cpd00412'], #EXs : 
    ['GMP','cpd00126'], #EXs : 
    ['GTP','cpd00038'], #EXs : 
    ['Guanosine','cpd00311'], #EXs : 
    ['Inosine','cpd00246'], #EXs : 
    ['HYXN','cpd00226'], #EXs : Hypoxanthine
    ['Nicotinamide ribonucleotide','cpd00355'], #EXs : 
    ['TTP','cpd00357'], #EXs : Deoxythymidine triphosphate
    ['Thymidine','cpd00184'], #EXs : 
    ['Thyminose','cpd01242'], #EXs : deoxyribose
    ['Uracil','cpd00092'], #EXs : 
    ['Uridine','cpd00249'], #EXs : 
    ['XAN','cpd00309'], #EXs : Xanthine
    ['Xanthosine','cpd01217'], #EXs : 
    ['dATP','cpd00115'], #EXs : 
    ['dGTP','cpd00241'], #EXs : 
    ['dTMP','cpd00298'] #EXs : 
]

# Check to see if these metabolites are used in pathways? Should I add some of these to media? 

In [None]:
# Production
# H2O2 -- cpd00025
# Acetate -- cpd00029
# Butyrate -- cpd00211
# isobutyrate -- cpd01711
# GABA -- cpd00281
# ethanol -- cpd00363
# Propionate -- cpd00141
# formate -- cpd00047
# Valerate -- cpd00597
# Isovaleric acid -- cpd05178 (wrong eqn)
# sulforaphane -- 
# https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5541232/
# thiamin -- cpd00305
# Pyridoxal phosphate (B6) -- cpd00016
# BIOT (biotin, B7) -- cpd00104
# (CH3)3NO (TMAO) -- cpd00811
# Indole-3-(carb)aldehyde -- cpd05401
# https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4337795/
# Acetaldehyde -- cpd00071
# Deoxycholate -- cpd02733
# Chorismate -- cpd00216
# Hexanoate -- cpd01113
# 

# Consumption
# Galactose -- cpd00108; cpd01112
# L-galactose -- cpd01257
# lactose -- cpd00208
# beta-lactose -- cpd01354
# sucrose -- cpd00076
# trehalose (TRHL) -- cpd00794
# maltose -- cpd00179
# D-Mannose -- cpd00138
# D-Fructose -- cpd00082
# Inulin -- cpd27312
# ethanol -- cpd00363
# Carnitine -- cpd00266
# Citrate -- cpd00137
# GLUM (D-glucosamine) -- cpd00276
# 


In [92]:
# Using pFBA

t = time.time()
counter = 0

universal = cobra.io.load_json_model("../Data/GramPosUni.json")
genome_id = '220668.9'
model = cobra.io.read_sbml_model('../gap_models/'+ genome_id +'.xml')
likelihoods = pickle.load(open('../likelihoods/'+ genome_id +'.probs'))

# Ensure free water exhange
model.reactions.get_by_id('rxn05319_c').name = "Water transport"
model.reactions.get_by_id('rxn05319_c').bounds = (-1000., 1000.)

# Create specific Media List
media_list = bsm + M9_sources
set_media(model, media_list, universal, verbose=False)

# Run through each amino acid to check for production
aa_like = {}
sys.stdout.write('Starting Loop')
# for aa_list in aas[0:1]:

sys.stdout.write('\n'+ str(counter))
# aa = aa_list[1]+'_c'
aa = 'cpd00186_c'
# Add Demand Reaction for metabolite
metabolite = model.metabolites.get_by_id(aa)
demand = model.add_boundary(metabolite, type='demand')
model.objective = demand
# Gapfill
sys.stdout.write('...gapfilling...')
gaps_to_fill = gapfill(model, universal, demand_reactions=False) # Update to probannopy gapfill function; use Gurobi
# Fill the gaps
rxns_to_add = []
for gap in gaps_to_fill:
    model.add_reactions(gap) ### I NEED TO REMOVE THESE REACTIONS
# Optimize with full pathway
sys.stdout.write('optimizing...')
solution = pfba(model, objective = demand)
# Find reactions that carry flux and their likelihood score
df = solution.fluxes.to_frame()
active = df.loc[(abs(df['fluxes'])) > 0.1]
like_list = []
rxns_w_likes = []
for rxn in list(active.index):
    if rxn.startswith('rxn'):
        try:
            like_list.append(likelihoods[rxn])
            rxns_w_likes.append
        except:
            pass
avg_like = np.mean(like_list)
sys.stdout.write('Average Likelihood of: ' + aa_list[1] + ' is ' + str(avg_like))
aa_like[aa_list[1]] = avg_like
# Remove demand and filled gaps
model.remove_reactions([demand])
model.remove_reactions(gaps_to_fill[0]) # This might not work when multiple reactions are added... 
counter += 1

elapsed = time.time() - t
print("\nTime to complete:" + str(elapsed/60) + "mins")
print('\n')
print(gaps_to_fill)
print(like_list)

Starting Loop
0...gapfilling...optimizing...Average Likelihood of: cpd00156 is 0.4529859296342513
Time to complete:24.4370933652mins




NameError: name 'gaps_to_add' is not defined

In [None]:
print(gaps_to_fill)
print(rxns_w_likes)
print(like_list)
# print(likelihoods['rxn00904_c'])

In [None]:
active

In [91]:
model.reactions.get_by_id('rxn05528_c').check_mass_balance()

{}

In [87]:
like_list = []
rxns_w_likes = []
for rxn in list(active.index):
    if rxn.startswith('rxn'):
        try:
            like_list.append(likelihoods[rxn])
            rxns_w_likes.append(rxn)
        except:
            pass

In [84]:
print(likelihoods['rxn10481_c'])

0.00167266590947


In [69]:
model.remove_reactions(gaps_to_fill[0])

In [None]:
# model.solver = 'gurobi'