In [1]:
from __future__ import print_function
from __future__ import absolute_import

import cobra
import cobra.test
import numpy as np
import csv
import glob
import pickle
import pandas as pd
import math
import copy
import time
import random
import time
import sys

from copy import deepcopy
from collections import defaultdict
from cobra.flux_analysis import sample
from cobra.core.solution import get_solution
from cobra.flux_analysis.sampling import OptGPSampler
from cobra.manipulation.delete import *
from cobra.medium import find_boundary_types
from cobra.flux_analysis import pfba

from warnings import warn
from itertools import chain
from optlang.symbolics import Zero
from cobra.util import solver as sutil
from cobra.core.solution import get_solution

import logging
LOGGER = logging.getLogger(__name__)

In [2]:
# Functions
def set_media(model, media, universal, verbose=False):

    # Find and close all exchange reactions in the model
    model_rxns = [rxn.id for rxn in model.reactions]
    for rxn in model_rxns:
        if rxn.startswith('EX_') and rxn.endswith('_e'):
            model.reactions.get_by_id(rxn).lower_bound = 0.0

    # Check for existence of exchange reactions for the media metabolites in the model
    for metabolite in media:
        met = metabolite[1]+'_e'
        if 'EX_'+met in model_rxns:
            model.reactions.get_by_id('EX_'+met).lower_bound = -1000.
        else:
            # Create exchange reaction and add to model
            if verbose:
                print("added exchange rxn for " + met)
            new_exchange = cobra.Reaction('EX_'+met)
            new_exchange.name = met + ' exchange'
            met_obj = universal.metabolites.get_by_id(met)
            new_exchange.add_metabolites({met_obj:-1})
            new_exchange.lower_bound = -1000.
            new_exchange.upper_bound = 1000.
            model.add_reaction(new_exchange)
            model.repair()
            
def pfba_gapfill(model, reaction_bag, likelihoods, obj=None, obj_lb=10., obj_constraint=False,
                 iters=1, tasks=None, task_lb=0.05, 
                 add_exchanges=True, extracellular='e'):

    start_time = time.time()
    # Save some basic network info for downstream membership testing
    orig_rxn_ids = set([str(x.id) for x in model.reactions])
    orig_cpd_ids = set([str(y.id) for y in model.metabolites])

    # Get model objective reaction ID
    if obj == None:
        obj = get_objective(model)
    else:
        obj = obj
    
    # Modify universal reaction bag
    new_rxn_ids = set()
    with reaction_bag as universal:

        # Remove overlapping reactions from universal bag, and reset objective if needed
        orig_rxns = list(copy.deepcopy(model.reactions))
            
        # Add pFBA to universal model and add model reactions
        
        add_pfba_likely(universal, likelihoods)
        
        universal.add_reactions(orig_rxns)
        
        # If previous objective not set as constraint, set minimum lower bound
        if obj_constraint == False: 
            universal.reactions.get_by_id(obj).lower_bound = obj_lb
        
        # Run FBA and save solution
        solution = universal.optimize()
#         print([bound.id for bound in universal.boundary if bound.lower_bound != 0.0 and bound.upper_bound != 0.0 and bound.id.startswith('EX')])
        
        # Identify which reactions carry flux in solution
        rxns = list(solution.fluxes.index)
        fluxes = list(solution.fluxes)
        for flux in range(0, len(fluxes)):
            if abs(fluxes[flux]) > 1e-6:
                new_rxn_ids |= set([rxns[flux]])
        
    # Screen new reaction IDs
    if obj in new_rxn_ids: new_rxn_ids.remove(obj)
    for rxn in orig_rxn_ids:
        try:
            new_rxn_ids.remove(rxn)
        except:
            continue
    
    # Get reactions and metabolites to be added to the model
    new_rxns = copy.deepcopy([reaction_bag.reactions.get_by_id(rxn) for rxn in new_rxn_ids])
    new_cpd_ids = set()
    for rxn in new_rxns: new_cpd_ids |= set([str(x.id) for x in list(rxn.metabolites)])
    new_cpd_ids = new_cpd_ids.difference(orig_cpd_ids)
    new_cpds = copy.deepcopy([reaction_bag.metabolites.get_by_id(cpd) for cpd in new_cpd_ids])
    
    # Copy model and gapfill
    new_model = copy.deepcopy(model)
    new_model.add_metabolites(new_cpds)
    new_model.add_reactions(new_rxns)
    
    duration = int(round(time.time() - start_time))
    print('Took ' + str(duration) + ' seconds to gapfill ' + str(len(new_rxn_ids)) + \
          ' reactions and ' + str(len(new_cpd_ids)) + ' metabolites.') 
    
    return {'NewModel':new_model, 'gaps':new_rxn_ids, 'mets':new_cpd_ids}

def add_pfba_likely(model, likelihoods, objective=None, fraction_of_optimum=1.0):
    if objective is not None:
        model.objective = objective
    if model.solver.objective.name == '_pfba_objective':
        raise ValueError('The model already has a pFBA objective.')
    sutil.fix_objective_as_constraint(model, fraction=fraction_of_optimum)
    reaction_variables = ((rxn.forward_variable, rxn.reverse_variable)
                          for rxn in model.reactions)
    variables = chain(*reaction_variables)
    dict1 = {}
    fail_report = []
    model_reactions = [rxn.id.split('_')[0] for rxn in model.reactions if rxn.id.startswith('rxn')]
    for v in variables:
        if set([str(v.name.split('_')[0])]).issubset(set(model_reactions)) and str(v.name.split('_')[0]).startswith('rxn'):
            rxn_id = (v.name.split('_')[0] + '_c')
            try:
                dict1[v] = max([0.0, 1.0 - likelihoods[rxn_id]])
            except:
                try:
                    dict1[v] = 1.0
                except:
                    print('FAILED')
                    pass
                pass
            
        elif str(v.name.split('_')[0]).startswith('DM'):
            dict1[v] = 1.0
        else:
            fail_report.append(1)
    model.objective = model.problem.Objective(Zero, direction='min', sloppy=True, name="_pfba_objective")
    model.objective.set_linear_coefficients(dict1)

In [None]:
rxns_w_likelihood = []
for rxn in model.reactions:
    try:
        rxns_w_likelihood.append(likelihoods[rxn.id])
    except:
        pass

In [3]:
bsm = [
    ['H+','cpd00067'],
    ['H2O','cpd00001'],
    ['CO2','cpd00011'],
    ['O2','cpd00007'],
    ['N2','cpd00528'], 
#     ['H2','cpd11640'], # Only with no O2
    
    ['K+','cpd00205'],
    ['Na+','cpd00971'],
    ['Mg','cpd00254'],
    ['Mn2+','cpd00030'],
    ['Fe2+','cpd10515'], # Iron ion in heme
    ['Ca2+','cpd00063'], # Calcium pantothenate;cpd19112
    
    ['Vitamin B12r','cpd00423'], # C62H91CoN13O14P : cobalamin;cpd03424;cpd00730 : not present in any exchange reactions
    ['Cobinamide','cpd03422'], #EXs : related to cobalamin (B12) Added to ensure cells have access to B12
    ['BIOT','cpd00104'], # C10H15N2O3S : biotin B7
    ['PAN','cpd00644'], # C9H16NO5 : Pantothenate B5
    ['Folate','cpd00393'], # C19H17N7O6 : B9
    ['Niacin','cpd00218'], # C6H4NO2 : B3
    ['Pyridoxal','cpd00215'], # C8H9NO3 : B6
    ['Riboflavin','cpd00220'], # C17H19N4O6 : B2
    ['thiamin','cpd00305'], # C12H17N4OS : B1
    
    ['Thioglycolate','cpd01415'], # C2H3O2S : not present in any exchange reactions
    ['Acetate','cpd00029'], # C2H3O2 : not present in any exchange reactions
    ['Citrate','cpd00137'], # C6H5O7 : Consider removing. 
    ['ABEE','cpd00443'] # C7H6NO2 : aminobenzoate : not present in any exchange reactions
]

# M9 default carbon, nitrogen, phosphorous, and sulfur sources
M9_sources = [
    ['D-Glucose','cpd00027'],
    ['NH3','cpd00013'], # this is actually NH4 : ammonium
    ['Phosphate','cpd00009'],
    ['Sulfate','cpd00048']
]

# Vitamins
vit_k = [
    ['Menaquinone 7','cpd11606'], #EXs : Vitamine K2 : Add when there is no O2
]

# For aerobic simulations, O2 was added with a lower bound of −20 and to 0 for anaerobic simulations.

# DNA/RNA related metabolites
rna_bases = [
    ['Adenosine','cpd00182'], #EXs : In BSM (as adenine)
    ['Cytosine','cpd00307'], #EXs : 
    ['Guanosine','cpd00311'], #EXs : In BSM (as Guanine)
    ['Thymidine','cpd00184'], #EXs : In BSM
    ['Uridine','cpd00249'], #EXs : In BSM (as uracil)
]

products = [
    ['L-Lactate','cpd00159'],
    ['D-Lactate',''] ### ADD CPD ID
]

In [7]:
t = time.time()

sys.stdout.write('Loading in models...')

universal = cobra.io.load_json_model("../Data/GramPosUni.json")
genome_id = '220668.9'
model = cobra.io.read_sbml_model('../gap_models/'+ genome_id +'.xml')
likelihoods = pickle.load(open('../likelihoods/'+ genome_id +'.probs'))

sys.stdout.write('Adding Water...')

# Ensure free diffusion of water
model.reactions.get_by_id('rxn05319_c').name = "Water transport"
model.reactions.get_by_id('rxn05319_c').bounds = (-1000., 1000.)

sys.stdout.write('Set-up Universal...')

# Add demand for all metabolites in Universal model to stop blocked reactions
all_mets = []
for met in universal.metabolites:
    if (met.id.endswith('_c')):
        universal.add_boundary(met, type='demand')

### Set Up Model: remove low likelihood reactions
sys.stdout.write('Set-up Model...')
low_like_model = []
for rxn in model.reactions:
    if rxn.id.startswith('rxn'):
        try:
            if likelihoods[rxn.id] <= 0.1:
                low_like_model.append(rxn.id)
        except:
            pass
model_rxns_to_remove = [model.reactions.get_by_id(rxn) for rxn in low_like_model]
model.remove_reactions(model_rxns_to_remove)

# Remove model reactions from universal
orig_rxn_ids = set([str(x.id) for x in model.reactions])
univ_rxn_ids = set([str(z.id) for z in universal.reactions])
overlap_rxn_ids = univ_rxn_ids.intersection(orig_rxn_ids)
sys.stdout.write('removing reactions from universal...')
for rxn in overlap_rxn_ids: 
    universal.reactions.get_by_id(rxn).remove_from_model()

Loading in models...Adding Water...Set-up Universal...Set-up Model...removing reactions from universal...

In [9]:
media_list = bsm + M9_sources + rna_bases
set_media(model, media_list, universal, verbose=False)
model.medium

{'EX_cpd00001_e': 1000.0,
 'EX_cpd00007_e': 1000.0,
 'EX_cpd00009_e': 1000.0,
 'EX_cpd00011_e': 1000.0,
 'EX_cpd00013_e': 1000.0,
 'EX_cpd00027_e': 1000.0,
 'EX_cpd00029_e': 1000.0,
 'EX_cpd00030_e': 1000.0,
 'EX_cpd00048_e': 1000.0,
 'EX_cpd00063_e': 1000.0,
 'EX_cpd00067_e': 1000.0,
 'EX_cpd00104_e': 1000.0,
 'EX_cpd00137_e': 1000.0,
 'EX_cpd00182_e': 1000.0,
 'EX_cpd00184_e': 1000.0,
 'EX_cpd00205_e': 1000.0,
 'EX_cpd00215_e': 1000.0,
 'EX_cpd00218_e': 1000.0,
 'EX_cpd00220_e': 1000.0,
 'EX_cpd00249_e': 1000.0,
 'EX_cpd00254_e': 1000.0,
 'EX_cpd00305_e': 1000.0,
 'EX_cpd00307_e': 1000.0,
 'EX_cpd00311_e': 1000.0,
 'EX_cpd00393_e': 1000.0,
 'EX_cpd00423_e': 1000.0,
 'EX_cpd00443_e': 1000.0,
 'EX_cpd00528_e': 1000.0,
 'EX_cpd00644_e': 1000.0,
 'EX_cpd00971_e': 1000.0,
 'EX_cpd01415_e': 1000.0,
 'EX_cpd03422_e': 1000.0,
 'EX_cpd10515_e': 1000.0}

In [None]:
# Remove ModelSEED annotated reactions containing oxygen
universal.metabolites.cpd00007_c.remove_from_model(destructive=True)
universal.metabolites.cpd00007_e.remove_from_model(destructive=True)
# cpd00532 as well
# Also remove O2 comtaining reactions from model

In [None]:
media_list = bsm + M9_sources + rna_bases
set_media(model, media_list, universal, verbose=False)

product = 'cpd#####' # lactate

with model as temp_model, universal as temp_universal:

    try:
        metabolite = temp_model.metabolites.get_by_id(product)
        demand = temp_model.add_boundary(metabolite, type='demand')
        temp_model.objective = demand
    except:
        if set(product).issubset(set([met.id for met in model.metabolites])) == 0:
            temp_model.add_metabolites(copy.deepcopy(temp_universal.metabolites.get_by_id(product)))
            metabolite = temp_model.metabolites.get_by_id(product)
            demand = temp_model.add_boundary(metabolite, type='demand')
            temp_model.objective = demand

    temp_universal.reactions.get_by_id(demand.id).remove_from_model()

    try:
        new_gapfill_data = pfba_gapfill(temp_model, temp_universal, likelihoods, obj=demand.id, obj_lb=10., obj_constraint=False, iters=1, add_exchanges=False)
    except:
        try:
            sys.stdout.write('Restart Gapfilling...')
            new_gapfill_data = pfba_gapfill(temp_model, temp_universal, likelihoods, obj=demand.id, obj_lb=10., obj_constraint=False, iters=1, add_exchanges=False)
        except:
            sys.stdout.write('Failed Gapfilling...')
            dont_continue = 1
            pass

    gaps_to_fill = new_gapfill_data['gaps']
    new_model = new_gapfill_data['NewModel']
    mets_added = new_gapfill_data['mets']

# Optimize with filled pathway
sys.stdout.write('pFBA...')
solution = pfba(new_model, objective = demand)
sys.stdout.write(str(round(new_model.slim_optimize())) + '...')

# Find reactions that carry flux
df = solution.fluxes.to_frame()
active = df.loc[(abs(df['fluxes'])) > 0.1]

# What are the likelihoods of the reactions that have flux?
