In [1]:
from __future__ import print_function

import cobra
import cobra.test
# import mackinac
import numpy as np
import csv
import glob
import pickle
import pandas as pd
import time
import sys
from copy import deepcopy
from collections import defaultdict
# from cobra.flux_analysis import gapfill
from gapfill2 import *
from cobra.flux_analysis import pfba
from cobra.flux_analysis.parsimonious import add_pfba
from cobra.flux_analysis import sample
from cobra.core.solution import get_solution


# Set default logger to python logger to avoid warnings given when adding reactions and/or metaboites 
# because "cobra.core.model" doesn't innately have a logger.
import logging
logging.basicConfig()
logger = logging.getLogger('logger')

In [2]:
def set_media(model, media, universal, verbose=False):

    # Find and close all exchange reactions in the model
    model_rxns = [rxn.id for rxn in model.reactions]
    for rxn in model_rxns:
        if rxn.startswith('EX_') and rxn.endswith('_e'):
            model.reactions.get_by_id(rxn).lower_bound = 0.0

    # Check for existence of exchange reactions for the media metabolites in the model
    for metabolite in media:
        met = metabolite[1]+'_e'
        if 'EX_'+met in model_rxns:
            model.reactions.get_by_id('EX_'+met).lower_bound = -1000.
        else:
            # Create exchange reaction and add to model
            if verbose:
                print("added exchange rxn for " + met)
            new_exchange = cobra.Reaction('EX_'+met)
            new_exchange.name = met + ' exchange'
            met_obj = universal.metabolites.get_by_id(met)
            new_exchange.add_metabolites({met_obj:-1})
            new_exchange.lower_bound = -1000.
            new_exchange.upper_bound = 1000.
            model.add_reaction(new_exchange)
            model.repair()

In [3]:
# Basal Synthetic Media
bsm = [
    ['H+','cpd00067'],
    ['H2O','cpd00001'],
    ['CO2','cpd00011'],
    ['O2','cpd00007'],
    ['N2','cpd00528'], 
#     ['H2','cpd11640'], # Only with no O2
    
    ['K+','cpd00205'],
    ['Na+','cpd00971'],
    ['Mg','cpd00254'],
    ['Mn2+','cpd00030'],
    ['Fe2+','cpd10515'], # Iron ion in heme
    ['Ca2+','cpd00063'], # Calcium pantothenate;cpd19112
    
    ['Vitamin B12r','cpd00423'], # C62H91CoN13O14P : cobalamin;cpd03424;cpd00730 : not present in any exchange reactions
    ['Cobinamide','cpd03422'], #EXs : related to cobalamin (B12) Added to ensure cells have access to B12
    ['BIOT','cpd00104'], # C10H15N2O3S : biotin B7
    ['PAN','cpd00644'], # C9H16NO5 : Pantothenate B5
    ['Folate','cpd00393'], # C19H17N7O6 : B9
    ['Niacin','cpd00218'], # C6H4NO2 : B3
    ['Pyridoxal','cpd00215'], # C8H9NO3 : B6
    ['Riboflavin','cpd00220'], # C17H19N4O6 : B2
    ['thiamin','cpd00305'], # C12H17N4OS : B1
    
#     ['Phosphate','cpd00009'], # HO4P : In M9 Defaults
    
    ['Thioglycolate','cpd01415'], # C2H3O2S : not present in any exchange reactions
#     ['Sulfate','cpd00048'], # O4S : In M9 Defaults
    
    ['Acetate','cpd00029'], # C2H3O2 : not present in any exchange reactions
    ['Citrate','cpd00137'], # C6H5O7 : Consider removing. 
#     ['Polysorbate 60','cpd24450'], # C35H68O10 : Almost tween 80 : not present in any reactions
#     ['Ethyl acetate','cpd00633'], # C4H8O2 : not present in any exchange reactions, only present in one reaction at all
    
    ['ABEE','cpd00443'] # C7H6NO2 : aminobenzoate : not present in any exchange reactions
]

# Potentially add to BSM (from M9 media)
M9_ions = [
    ['Cl-','cpd00099'],
    ['Co2+','cpd00149'],
    ['Cu2+','cpd00058'],
    ['Fe3','cpd10516'],
#     ['Sodium molybdate','cpd11145'], # This doesn't connect to anything
    ['Ni2+','cpd00244'],
    ['Selenate','cpd03396'],
    ['Selenite','cpd03387'],
    ['Zn2+','cpd00034']
]

# Enviromental Metabolites with Exchange reactions
[
#     ['CO2','cpd00011'], #EXs : 
#     ['Ca2+','cpd00063'], #EXs : 
#     ['Cd2+','cpd01012'], #EXs : Removed because toxic
#     ['chromate','cpd11595'], #EXs : Removed because toxic
#     ['Cl-','cpd00099'], #EXs : 
#     ['Co2+','cpd00149'], #EXs : In M9
#     ['Cu2+','cpd00058'], #EXs : In M9
#     ['Fe2+','cpd10515'], #EXs : 
#     ['H+','cpd00067'], #EXs : 
#     ['H2','cpd11640'], #EXs : 
#     ['H2O','cpd00001'], #EXs : 
#     ['Hg2+','cpd00531'], #EXs : Removed because toxic
#     ['K+','cpd00205'], #EXs : 
#     ['Mg','cpd00254'], #EXs : 
#     ['Mn2+','cpd00030'], #EXs : 
#     ['Na+','cpd00971'], #EXs : 
#     ['Ni2+','cpd00244'], #EXs : In M9
#     ['O2','cpd00007'], #EXs : 
#     ['Pb','cpd04097'], #EXs : Removed because toxic
#     ['Zn2+','cpd00034'], #EXs : In M9
#     ['fe3','cpd10516'] #EXs : In M9
]

# M9 Base : https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4932939/
# [
#     ['Ca2+','cpd00063'],
#     ['Cl-','cpd00099'],
#     ['CO2','cpd00011'],
#     ['Co2+','cpd00149'],
#     ['Cu2+','cpd00058'],
#     ['Fe2+','cpd10515'],
#     ['Fe3','cpd10516'],
#     ['H+','cpd00067'],
#     ['H2O','cpd00001'],
#     ['K+','cpd00205'],
#     ['Mg','cpd00254'],
#     ['Mn2+','cpd00030'],
#     ['Sodium molybdate','cpd11145'],
#     ['Na+','cpd00971'],
#     ['Ni2+','cpd00244'],
#     ['Selenate','cpd03396'],
#     ['Selenite','cpd03387'],
#     ['Zn2+','cpd00034']
# ]

# M9 default carbon, nitrogen, phosphorous, and sulfur sources
M9_sources = [
    ['D-Glucose','cpd00027'],
    ['NH3','cpd00013'], # this is actually NH4 : ammonium
    ['Phosphate','cpd00009'],
    ['Sulfate','cpd00048']
]

# Vitamins
vit_k = [
#     ['BIOT','cpd00104'], #EXs : Biotin
#     ['Cobinamide','cpd03422'], #EXs : related to cobalamin (B12)
#     ['Folate','cpd00393'], #EXs : 
    ['Menaquinone 7','cpd11606'], #EXs : Vitamine K2 : Add when there is no O2
#     ['Niacin','cpd00218'], #EXs : 
#     ['PAN','cpd00644'], #EXs : Pantothenate
#     ['Pyridoxal','cpd00215'], #EXs : 
#     ['Riboflavin','cpd00220'], #EXs : 
#     ['Thiamin','cpd00305'] #EXs : 
]

# For aerobic simulations, O2 was added with a lower bound of −20 and to 0 for anaerobic simulations.

# DNA/RNA related metabolites
rna_bases = [
#     ['35ccmp','cpd00696'], #EXs : 
#     ['AMP','cpd00018'], #EXs : 
    ['Adenosine','cpd00182'], #EXs : In BSM (as adenine)
#     ['Adenosine 3-5-bisphosphate','cpd00045'], #EXs : 
    ['Cytosine','cpd00307'], #EXs : 
#     ['Deoxyadenosine','cpd00438'], #EXs : 
#     ['Deoxycytidine','cpd00654'], #EXs : 
#     ['Deoxyguanosine','cpd00277'], #EXs : In BSM
#     ['Deoxyinosine','cpd03279'], #EXs : 
#     ['Deoxyuridine','cpd00412'], #EXs : 
#     ['GMP','cpd00126'], #EXs : 
#     ['GTP','cpd00038'], #EXs : 
    ['Guanosine','cpd00311'], #EXs : In BSM (as Guanine)
#     ['Inosine','cpd00246'], #EXs : 
#     ['HYXN','cpd00226'], #EXs : Hypoxanthine
#     ['Nicotinamide ribonucleotide','cpd00355'], #EXs : 
#     ['TTP','cpd00357'], #EXs : Deoxythymidine triphosphate
    ['Thymidine','cpd00184'], #EXs : In BSM
#     ['Thyminose','cpd01242'], #EXs : deoxyribose
#     ['Uracil','cpd00092'], #EXs : 
    ['Uridine','cpd00249'], #EXs : In BSM (as uracil)
#     ['XAN','cpd00309'], #EXs : Xanthine
#     ['Xanthosine','cpd01217'], #EXs : 
#     ['dATP','cpd00115'], #EXs : 
#     ['dGTP','cpd00241'], #EXs : 
#     ['dTMP','cpd00298'] #EXs : 
]

# Check to see if these metabolites are used in pathways? Should I add some of these to media? 
# Yes for ATP, and GTP. (TTP, CTP as well?)

In [4]:
# Amino Acid related metabolites
aas = [
    ['D-Alanine','cpd00117'], #EXs : 
    ['D-Glutamate','cpd00186'], #EXs : 
    ['D-Methionine','cpd00637'], #EXs : 
    ['D-Serine','cpd00550'], #EXs : 
    ['Glycine','cpd00033'], #EXs : 1
    ['L-Alanine','cpd00035'], #EXs : 2
    ['L-Arginine','cpd00051'], #EXs : 3
    ['L-Asparagine','cpd00132'], #EXs : 4
    ['L-Aspartate','cpd00041'], #EXs : 5

    ['L-Cysteine','cpd00084'], #EXs : 7
    ['L-Glutamate','cpd00023'], #EXs : 8
    ['L-Glutamine','cpd00053'], #EXs : 9
    ['L-Histidine','cpd00119'], #EXs : 10
    ['L-Isoleucine','cpd00322'], #EXs : 11
    ['L-Leucine','cpd00107'], #EXs : 12
    ['L-Lysine','cpd00039'], #EXs : 13
    ['L-Methionine','cpd00060'], #EXs : 14
    ['L-Phenylalanine','cpd00066'], #EXs : 15
    ['L-Proline','cpd00129'], #EXs : 16
    ['L-Serine','cpd00054'], #EXs : 17
    ['L-Threonine','cpd00161'], #EXs : 18
    ['L-Tryptophan','cpd00065'], #EXs : 19
    ['L-Tyrosine','cpd00069'], #EXs : 20
    ['L-Valine','cpd00156'] #EXs : 21
]
# Explore leave one out with core amino acids. 

# Dimers, and other amino acid related mets
aa_related = [
    ['2-Oxoglutarate','cpd00024'], #EXs : 
    ['Ala-Gln','cpd11587'], #EXs : 
    ['Ala-His','cpd11584'], #EXs : 
    ['Ala-Leu','cpd11583'], #EXs : 
    ['ala-L-asp-L','cpd11593'], #EXs : 
    ['ala-L-glu-L','cpd11586'], #EXs : 
    ['ala-L-Thr-L','cpd11582'], #EXs : 
    ['Aminoethanol','cpd00162'], #EXs : Ethanolamine
    ['Carnitine','cpd00266'], #EXs : 
    ['Chorismate','cpd00216'], #EXs : 
    ['L-Cysteate','cpd00395'], #EXs : 
    ['Cys-Gly','cpd01017'], #EXs : 
    ['Gly-Cys','cpd15603'], #EXs : 
    ['Gly-Gln','cpd11580'], #EXs : 
    ['Gly-Leu','cpd15604'], #EXs : 
    ['Gly-Met','cpd11591'], #EXs : 
    ['Gly-Phe','cpd15605'], #EXs : 
    ['Gly-Tyr','cpd15606'], #EXs : 
    ['gly-asn-L','cpd11581'], #EXs : 
    ['gly-asp-L','cpd11589'], #EXs : 
    ['gly-glu-L','cpd11592'], #EXs : 
    ['gly-pro-L','cpd11588'], #EXs : 
    ['L-Methionine S-oxide','cpd01914'], #EXs :
    ['L-alanylglycine','cpd11585'], #EXs : 
    ['L-methionine R-oxide','cpd11576'], #EXs : 
    ['met-L-ala-L','cpd11590'], #EXs :
    ['S-Adenosyl-L-methionine','cpd00017'], #EXs : 
    ['S-Methyl-L-methionine','cpd02027'], #EXs : 
    ['S-Ribosylhomocysteine','cpd02227'], #EXs : 
    ['N-Acetyl-D-glucosamine','cpd00122'], #EXs : 
    ['N-Acetyl-D-mannosamine','cpd00492'], #EXs : 
    ['Ornithine','cpd00064'], #EXs : 
    ['Putrescine','cpd00118'], #EXs : 
    ['Taurine','cpd00210'], #EXs : 
    ['meso-2,6-Diaminopimelate','cpd00516'] #EXs : related to lysine
]

In [5]:
#!/usr/bin/python
'''
Gapfilling function that utilizes pFBA and flux sampling to find most
parsimonious additional reactions to achieve minimum flux through the objective
Author: Matthew Jenior
'''
import pandas
import math
import copy
import time
import random

# Using Cobrapy 0.13.0
import cobra
import cobra.test
from cobra.flux_analysis.sampling import OptGPSampler
from cobra.manipulation.delete import *
from cobra.flux_analysis.parsimonious import add_pfba
from cobra.medium import find_boundary_types

# pFBA gapfiller
def pfba_gapfill(model, reaction_bag, likelihoods, obj=None, obj_lb=10., obj_constraint=False,
                 iters=1, tasks=None, task_lb=0.05, 
                 add_exchanges=True, extracellular='e'):
    '''
    Function that utilizes iterations of pFBA solution with a universal reaction bag 
    in order to gapfill a model.
    
    Parameters
    ----------
    model : cobra.Model
        Model to be gapfilled
    reaction_bag : cobra.Model
        Reaction bag reference to use during gapfilling
    obj : string
        Reaction ID for objective function in model to be gapfilled.
    obj_lb : float
        Lower bound for objective function
    obj_constraint : bool
        Sets objective as contstraint which must be maximized
    tasks : list or None
        List of reactions IDs (strings) of metabolic tasks 
        to set a minimum lower bound for
    task_lb : float
        Lower bound for any metabolic tasks
    iters : int
        Number of gapfilling rounds. Unique reactions from each round are 
        saved and the union is added simulatneously to the model
    add_exchanges : bool
        Identifies extracellular metabolites added during gapfilling that
        are not associated with exchange reactions and creates them
    extracellular : string
        Label for extracellular compartment of model
    '''
    start_time = time.time()
    
    # Save some basic network info for downstream membership testing
    orig_rxn_ids = set([str(x.id) for x in model.reactions])
    orig_cpd_ids = set([str(y.id) for y in model.metabolites])
    univ_rxn_ids = set([str(z.id) for z in reaction_bag.reactions])
    
    # Find overlap in model and reaction bag
    overlap_rxn_ids = univ_rxn_ids.intersection(orig_rxn_ids)
    
    # Get model objective reaction ID
    if obj == None:
        obj = get_objective(model)
    else:
        obj = obj
    
    # Modify universal reaction bag
    new_rxn_ids = set()
    print('Creating universal model...')
    with reaction_bag as universal:

        # Remove overlapping reactions from universal bag, and reset objective if needed
        for rxn in overlap_rxn_ids: 
            universal.reactions.get_by_id(rxn).remove_from_model()
        
        # Set objective in universal if told by user
        # Made constraint as fraction of minimum in next step
        if obj_constraint == True:
            universal.add_reactions([model.reactions.get_by_id(obj)])
            universal.objective = obj
            orig_rxn_ids.remove(obj)
            orig_rxns = []
            for rxn in orig_rxn_ids: 
                orig_rxns.append(copy.deepcopy(model.reactions.get_by_id(rxn)))
        else:
            orig_rxns = list(copy.deepcopy(model.reactions))
            
        # Add pFBA to universal model and add model reactions
        print('Add pFBA...')
        add_pfba_likely(universal, likelihoods)
        
#         updated_universal = copy.deepcopy(universal)
#         universal = copy.deepcopy(universal) # reset solver
        universal.add_reactions(orig_rxns)
        
        # If previous objective not set as constraint, set minimum lower bound
        if obj_constraint == False: 
            universal.reactions.get_by_id(obj).lower_bound = obj_lb
    
        # Set metabolic tasks that must carry flux in gapfilled solution
        if tasks != None:
            for task in tasks:                    
                universal.reactions.get_by_id(task).lower_bound = task_lb
                
        # Run FBA and save solution
        print('Optimizing model with combined reactions...')
        solution = universal.optimize()

        if iters > 1:
            print('Generating flux sampling object...')
            optgp_object = OptGPSampler(universal, processes=4)
        
            # Assess the sampled flux distributions
            print('Sampling ' + str(iters) + ' flux distributions...')
            flux_samples = optgp_object.sample(iters)
            rxns = list(flux_samples.columns)
            for distribution in flux_samples.iterrows():
                for flux in range(0, len(list(distribution[1]))):
                    if abs(list(distribution[1])[flux]) > 1e-6:
                        new_rxn_ids |= set([rxns[flux]]).difference(orig_rxn_ids)
        else:
            rxns = list(solution.fluxes.index)
            fluxes = list(solution.fluxes)
            for flux in range(0, len(fluxes)):
                if abs(fluxes[flux]) > 1e-6:
                    new_rxn_ids |= set([rxns[flux]])
    
    # Screen new reaction IDs
    if obj in new_rxn_ids: new_rxn_ids.remove(obj)
    for rxn in orig_rxn_ids:
        try:
            new_rxn_ids.remove(rxn)
        except:
            continue
    
    # Get reactions and metabolites to be added to the model
    print('Retrieving reactions and metabolites needed for gapfilling...')
    new_rxns = copy.deepcopy([reaction_bag.reactions.get_by_id(rxn) for rxn in new_rxn_ids])
    new_cpd_ids = set()
    for rxn in new_rxns: new_cpd_ids |= set([str(x.id) for x in list(rxn.metabolites)])
    new_cpd_ids = new_cpd_ids.difference(orig_cpd_ids)
    new_cpds = copy.deepcopy([reaction_bag.metabolites.get_by_id(cpd) for cpd in new_cpd_ids])
    
    # Copy model and gapfill 
    print('Gapfilling model...')
    new_model = copy.deepcopy(model)
    new_model.add_metabolites(new_cpds)
    new_model.add_reactions(new_rxns)
    
    # Identify extracellular metabolites with no exchanges
    if add_exchanges == True:
        new_exchanges = extend_exchanges(new_model, new_cpd_ids, extracellular)
        if len(new_exchanges) > 0: new_rxn_ids |= new_exchanges
    
    duration = int(round(time.time() - start_time))
    print('Took ' + str(duration) + ' seconds to gapfill ' + str(len(new_rxn_ids)) + \
          ' reactions and ' + str(len(new_cpd_ids)) + ' metabolites.') 
    
    new_obj_val = new_model.slim_optimize()
    if new_obj_val > 1e-6:
        print('Gapfilled model objective now carries flux (' + str(new_obj_val) + ').')
    else:
        print('Gapfilled model objective still does not carry flux.')
    
    return {'NewModel':new_model, 'gaps':new_rxn_ids, 'mets':new_cpd_ids}


# Adds missing exchanges for extracellulart metbaolites
def extend_exchanges(model, cpd_ids, ex):
    
    model_exchanges = set(find_boundary_types(model, 'exchange', external_compartment=ex))
    new_ex_ids = set()
    
    for cpd in cpd_ids:
        cpd = model.metabolites.get_by_id(cpd)
        if str(cpd.compartment) != ex:
            continue
        else:
            if bool(set(cpd.reactions) & model_exchanges) == False:
                try:
                    new_id = 'EX_' + cpd.id
                    model.add_boundary(cpd, type='exchange', reaction_id=new_id, lb=-1000.0, ub=1000.0)
                    new_ex_ids |= set([new_id])
                except ValueError:
                    pass

    return new_ex_ids


# Returns the reaction ID of the objective reaction
def get_objective(model):
    
    if len(list(model.objective.variables)) == 0:
        raise IndexError('Model has no objective set.')
    
    expression = str(model.objective.expression).split()
    if 'reverse' in expression[0]:
        obj_id = expression[2].split('*')[-1]
    else:
        obj_id = expression[0].split('*')[-1]
            
    return obj_id

from __future__ import absolute_import

import logging
from warnings import warn
from itertools import chain

from optlang.symbolics import Zero

from cobra.util import solver as sutil
from cobra.core.solution import get_solution

LOGGER = logging.getLogger(__name__)

def add_pfba_likely(model, likelihoods, objective=None, fraction_of_optimum=1.0):
    """Add pFBA objective

    Add objective to minimize the summed flux of all reactions to the
    current objective.

    See Also
    -------
    pfba

    Parameters
    ----------
    model : cobra.Model
        The model to add the objective to
    objective :
        An objective to set in combination with the pFBA objective.
    fraction_of_optimum : float
        Fraction of optimum which must be maintained. The original objective
        reaction is constrained to be greater than maximal_value *
        fraction_of_optimum.
    """
    if objective is not None:
        model.objective = objective
    if model.solver.objective.name == '_pfba_objective':
        raise ValueError('The model already has a pFBA objective.')
    sutil.fix_objective_as_constraint(model, fraction=fraction_of_optimum)
    reaction_variables = ((rxn.forward_variable, rxn.reverse_variable)
                          for rxn in model.reactions)
    variables = chain(*reaction_variables)
    dict1 = {}
    fail_report = []
    model_reactions = [rxn.id.split('_')[0] for rxn in model.reactions if rxn.id.startswith('rxn')]
    for v in variables:
#         print(set([str(v.name.split('_')[0])]))
#         print(model_reactions[0])
        if set([str(v.name.split('_')[0])]).issubset(set(model_reactions)) and str(v.name.split('_')[0]).startswith('rxn'):
            rxn_id = (v.name.split('_')[0] + '_c')
#             print(rxn_id)
#         for rxn in model.reactions:
#             if v.name.startswith(rxn.id):
            try:
                dict1[v] = max(0.0, 1.0 - likelihoods[rxn_id])
#                 print('likelihood added')
            except:
                try:
                    dict1[v] = 1.0
                except:
                    print('FAILED')
                    pass
                pass
        elif str(v.name.split('_')[0]).startswith('DM'):
            dict1[v] = 1.0
        else:
            fail_report.append(1)

    model.objective = model.problem.Objective(
        Zero, direction='min', sloppy=True, name="_pfba_objective")
    model.objective.set_linear_coefficients(dict1)
#     return dict1

In [6]:
# genome_id = '220668.9'
# model = cobra.io.read_sbml_model('../gap_models/'+ genome_id +'.xml')
# likelihoods = pickle.load(open('../likelihoods/'+ genome_id +'.probs'))

# dict_out = add_pfba_likely(model, likelihoods)

# dict_out


In [7]:
# Using pFBA with new media components (RNA bases + thymidine...) 
# Remove reaction likelihoods of zero from model
# Add demands for all metabolites in model to avoid any reactions being blocked
# Use thresholded likelihoods to penalize reactions with no likelihood using pFBA gapfill
# Use pFBA answer + 0 likelihood reactions from reconstructed model + higher likelihood reactions to probanno gapfill

# Weighted pFBA gapfill solution

t = time.time()
# counter = 0

sys.stdout.write('Loading in models...')

universal = cobra.io.load_json_model("../Data/GramPosUni.json")
genome_id = '220668.9'
model = cobra.io.read_sbml_model('../gap_models/'+ genome_id +'.xml')
likelihoods = pickle.load(open('../likelihoods/'+ genome_id +'.probs'))

sys.stdout.write('Adding Water...')

# Ensure free diffusion of water
model.reactions.get_by_id('rxn05319_c').name = "Water transport"
model.reactions.get_by_id('rxn05319_c').bounds = (-1000., 1000.)

sys.stdout.write('Set-up Universal...')

# Add demand for all metabolites in Universal model to stop blocked reactions
all_mets = []
for met in universal.metabolites:
    if (met.id.endswith('_c')):
        universal.add_boundary(met, type='demand')

### Set Up Model: remove low likelihood reactions
sys.stdout.write('Set-up Model...')
low_like_model = []
for rxn in model.reactions:
    if rxn.id.startswith('rxn'):
        try:
            if likelihoods[rxn.id] <= 0.1:
                low_like_model.append(rxn.id)
        except:
            pass
model_rxns_to_remove = [model.reactions.get_by_id(rxn) for rxn in low_like_model]
model.remove_reactions(model_rxns_to_remove)

# metabolite = model.metabolites.get_by_id('cpd00065_c')
# demand = model.add_boundary(metabolite, type='demand')
# model.objective = demand

# Create specific Media List
media_list = bsm + M9_sources + rna_bases # + nitrogen + carbon
set_media(model, media_list, universal, verbose=False)
set_media(universal, media_list, universal, verbose=False)

print(str((time.time() - t)/60) + 'mins to complete')


Loading in models...Adding Water...Set-up Universal...Set-up Model...0.535631914934mins to complete


In [8]:
# universal = cobra.io.load_json_model("../Data/GramPosUni.json")
# [rxn.id for rxn in universal.reactions if not str(rxn.id).startswith('rxn')]
universal
# 6884 mets; 8657 rxns;


0,1
Name,GramPositive.modeltemplate
Memory address,0x07f4d0d37fd50
Number of metabolites,6884
Number of reactions,15041
Objective expression,0
Compartments,"c, e"


In [9]:
# global_time = time.time()

# new_gapfill_data = pfba_gapfill(model, universal, likelihoods, obj=None, obj_lb=10., obj_constraint=False, iters=1, add_exchanges=False)

# print(str((time.time() - global_time)/60) + 'mins to complete')

In [None]:
# Run through each amino acid to check for production
global_time = time.time()
aa_like = {}
counter = 0
sys.stdout.write('Starting Loop...')

total_dataset_dict = {}
carb_idx = 0
nit_idx = 0
product_idx = 0
carbon = 'D-Glucose'
nitrogen = 'NH3'

for aa_list in aas:
    
    sys.stdout.write('\n'+ 'Loop' + str(counter) + ' ')
    aa = aa_list[1]+'_c'
    product = aa_list[0]
    
    with model as temp_model:
        metabolite = temp_model.metabolites.get_by_id(aa)
        demand = temp_model.add_boundary(metabolite, type='demand')
        temp_model.objective = demand

        sys.stdout.write('Gapfilling...')
        dont_continue = 0
        new_gapfill_data = pfba_gapfill(temp_model, universal, likelihoods, obj=None, obj_lb=10., obj_constraint=False, iters=1, add_exchanges=False)

#         try:
#             new_gapfill_data = pfba_gapfill(temp_model, universal, likelihoods, obj=None, obj_lb=10., obj_constraint=False, iters=1, add_exchanges=False)
#         except:
#             try:
#                 new_gapfill_data = pfba_gapfill(temp_model, universal, likelihoods, obj=None, obj_lb=10., obj_constraint=False, iters=1, add_exchanges=False)
#             except:
#                 dont_continue = 1
#                 pass

        gaps_to_fill = new_gapfill_data['gaps']
        new_model = new_gapfill_data['NewModel']
        mets_added = new_gapfill_data['mets']

        if dont_continue == 0:
            # Optimize with filled pathway
            sys.stdout.write('pFBA...')
            solution = pfba(new_model, objective = demand)
            sys.stdout.write(str(round(model.slim_optimize())) + '...')

            sys.stdout.write('Constructing Dict...')
            # Find reactions that carry flux
            df = solution.fluxes.to_frame()
            active = df.loc[(abs(df['fluxes'])) > 0.1]
            
            demand_list = []
            for rxn_id in active.index:
                if rxn_id.startswith('DM') and rxn_id != demand.id:
                    demand_list.append(rxn_id)
                    print('demands added')

            # Acquire likelihood scores for reactions that carry flux
            flux_rxns = []
            like_list = []
            gap_flux_rxns = []
            gap_like_list = []
            path_flux_rxns = []
            path_like_list = []
            for rxn in list(active.index):
                if rxn in gaps_to_fill and rxn.startswith('rxn'):
                    try:
                        gap_flux_rxns.append([str(rxn),likelihoods[str(rxn)]])
                        gap_like_list.append(likelihoods[str(rxn)])
#                         print('success')
                    except:
                        pass
                if rxn not in gaps_to_fill and rxn.startswith('rxn'):
                    try:
                        path_flux_rxns.append([str(rxn),likelihoods[str(rxn)]])
                        path_like_list.append(likelihoods[str(rxn)])
#                         print('success2')
                    except:
                        pass
                if rxn.startswith('rxn'):
                    try:
                        flux_rxns.append([str(rxn),likelihoods[str(rxn)]])
                        like_list.append(likelihoods[str(rxn)])
                    except:
                        pass
            avg_like = np.mean(like_list)
            gap_avg_like = np.mean(gap_like_list)
            path_avg_like = np.mean(path_like_list)
            sys.stdout.write('Ave likelihood of: ' + aa + ' is ' + str(avg_like))

            counter += 1

            report_dict = {}

            report_dict['Model_ID'] = genome_id
            report_dict['Carbon'] = carbon
            report_dict['Nitrogen'] = nitrogen
            report_dict['objective'] = product
            report_dict['opt_before'] = model.slim_optimize()
            report_dict['opt_after'] = new_model.slim_optimize()
            report_dict['avg_path_like'] = avg_like
            report_dict['gap_avg_like'] = gap_avg_like
            report_dict['path_avg_like'] = path_avg_like
            report_dict['gaps_filled'] = gaps_to_fill
            report_dict['mets_added'] = mets_added
            report_dict['reactions_w_flux'] = flux_rxns
            report_dict['gaps_w_flux'] = gap_flux_rxns
            report_dict['path_w_flux'] = path_flux_rxns
            report_dict['active_rxns'] = active
            report_dict['demands'] = demand_list

            report_dict_ID = genome_id + ':' + str(carb_idx) + '.' + str(nit_idx) + '.' + str(product_idx)
            total_dataset_dict[report_dict_ID] = report_dict
            product_idx += 1 #Keep track to which product is being maximized

            elapsed = time.time() - t
            sys.stdout.write('Run time: ' + str(elapsed/60) + " [mins]")

        elif dont_continue == 1:
            sys.stdout.write('Failed to gapfill...')
            counter += 1

            report_dict = {}

            report_dict['Model_ID'] = genome_id
            report_dict['Carbon'] = carbon
            report_dict['Nitrogen'] = nitrogen
            report_dict['objective'] = product
            report_dict['opt_before'] = "Failed to gapfill"
            report_dict['opt_after'] = "Failed to gapfill"
            report_dict['avg_path_like'] = "Failed to gapfill"
            report_dict['gap_avg_like'] = "Failed to gapfill"
            report_dict['path_avg_like'] = "Failed to gapfill"
            report_dict['gaps_filled'] = "Failed to gapfill"
            report_dict['mets_added'] = "Failed to gapfill"
            report_dict['reactions_w_flux'] = "Failed to gapfill"
            report_dict['gaps_w_flux'] = "Failed to gapfill"
            report_dict['path_w_flux'] = "Failed to gapfill"
            report_dict['active_rxns'] = "Failed to gapfill"
            report_dict['demands'] = "Failed to gapfill"

            report_dict_ID = genome_id + ':' + str(carb_idx) + '.' + str(nit_idx) + '.' + str(product_idx)
            total_dataset_dict[report_dict_ID] = report_dict
            product_idx += 1 #Keep track to which product is being maximized

            elapsed = time.time() - t
            sys.stdout.write('Run time: ' + str(elapsed/60) + " [mins]")

file_name = "../metabolic_output/%s.data" % (genome_id)
pickle.dump(total_dataset_dict, open(file_name, "wb"))

# elapsed = time.time() - t
# print("\nTime to complete: " + str(elapsed/60) + " [mins]")
print(str((time.time() - global_time)/60) + 'mins to complete')


Starting Loop...
Loop0 Gapfilling...Creating universal model...
Add pFBA...
Optimizing model with combined reactions...
Retrieving reactions and metabolites needed for gapfilling...
Gapfilling model...
Took 55 seconds to gapfill 2 reactions and 0 metabolites.
Gapfilled model objective now carries flux (1000.0).
pFBA...0.0...Constructing Dict...Ave likelihood of: cpd00117_c is 0.5173929430770764Run time: 1.46228676637 [mins]
Loop1 Gapfilling...Creating universal model...
Add pFBA...


In [None]:
total_dataset_dict

In [None]:
hours = 34*9*49/60
hours

In [None]:
#     ['D-Alanine','cpd00117'], #EXs : 
#     ['D-Glutamate','cpd00186'], #EXs : 
#     ['D-Methionine','cpd00637'], #EXs : 
#     ['D-Serine','cpd00550'], #EXs : 
#     ['Glycine','cpd00033'], #EXs : 1
#     ['L-Alanine','cpd00035'], #EXs : 2
#     ['L-Arginine','cpd00051'], #EXs : 3
#     ['L-Asparagine','cpd00132'], #EXs : 4
#     ['L-Aspartate','cpd00041'], #EXs : 5

In [None]:
# print(model.slim_optimize())
# solution = model.optimize()
# df = solution.fluxes.to_frame()
# active = df.loc[(abs(df['fluxes'])) > 0.1]

# active


In [None]:
# # print(new_model.slim_optimize())
# # solution = new_model.optimize()
# solution = pfba(new_model, objective = demand)
# df = solution.fluxes.to_frame()
# active = df.loc[(abs(df['fluxes'])) > 0.1]
# print(str(len(active.index)))
# active


In [None]:
# for rxn in active.index:
#     try:
#         if rxn.startswith('rxn'):
#             print(rxn)
#             print(likelihoods[rxn])
#     except:
#         pass


In [None]:
# Acquire likelihood scores for reactions that carry flux
# aa = ' the amino acid'
# flux_rxns = []
# like_list = []
# for rxn in list(active.index):
#     if rxn.startswith('rxn'):
#         try:
#             flux_rxns.append([str(rxn),likelihoods[str(rxn)]])
#             like_list.append(likelihoods[str(rxn)])
#         except:
#             pass
# avg_like = np.mean(like_list)
# sys.stdout.write('Ave likelihood of: ' + aa + ' is ' + str(avg_like))
