In [2]:
from __future__ import print_function

import cobra
import cobra.test
# import mackinac
import numpy as np
import csv
import glob
import pickle
import pandas as pd
import time
import sys
from collections import defaultdict
from cobra.flux_analysis import gapfill
from cobra.flux_analysis import pfba

# Set default logger to python logger to avoid warnings given when adding reactions and/or metaboites 
# because "cobra.core.model" doesn't innately have a logger.
import logging
logging.basicConfig()
logger = logging.getLogger('logger')

In [None]:
genome_id = '220668.9'
model = cobra.io.read_sbml_model('../gap_models/'+ genome_id +'.xml')
likelihoods = pickle.load(open('../likelihoods/'+ genome_id +'.probs'))
universal = cobra.io.load_json_model("../Data/GramPosUni.json")

# Ensure free water exhange
model.reactions.get_by_id('rxn05319_c').name = "Water transport"
model.reactions.get_by_id('rxn05319_c').bounds = (-1000., 1000.)

In [None]:
all_likes = []
passed_rxns = []
zero_rxn_likelihood = []
non_zero_rxn_likelihood = []
counter = 0
pass_counter = 0
for rxn in model.reactions:
    if rxn.id.startswith('rxn'):
        try:
            all_likes.append(likelihoods[rxn.id])
            if likelihoods[rxn.id] < 0.01:
                zero_rxn_likelihood.append(rxn.id)
                print(rxn.name)
            elif likelihoods[rxn.id] > 0.0:
                non_zero_rxn_likelihood.append(rxn.id)
            counter += 1
        except:
            passed_rxns.append(rxn.id)
            pass_counter += 1
            pass
global_avg = np.mean(all_likes)

print(global_avg)
print(counter)
print(pass_counter)

In [None]:
len(non_zero_rxn_likelihood)

In [None]:
# How many reactions have a probability score?

universal_non_zero = []
missing = []
for rxn in universal.reactions:
    if rxn.id.startswith('rxn'):
        try:
            if likelihoods[rxn.id] > 0.0:
                universal_non_zero.append(rxn.id)
        except:
            missing.append(rxn.id)
print(len(universal_non_zero))
print(len(missing))

In [None]:
# What is the avg likelihood of all of these? 
all_likelihoods = []
high_likelihoods =[]
rxn_ids = [reaction.id for reaction in model.reactions]

for rxn in universal_non_zero:
    all_likelihoods.append(likelihoods[rxn])
    if likelihoods[rxn] > 0.6 and rxn not in rxn_ids:
        high_likelihoods.append(rxn)
avg_like = np.mean(all_likelihoods)
avg_like
print(len(high_likelihoods))

In [None]:
high_likelihoods

In [None]:
import matplotlib.pyplot as plt
plt.hist(all_likelihoods, bins=100)
plt.title("Histogram with 100 bins")
plt

In [None]:
# Average adjustments by adding high-likelihood reactions to model
high_likelihoods =[]
rxn_ids = [reaction.id for reaction in model.reactions]
lowest_like = 0.6

for rxn in universal_non_zero:
    if likelihoods[rxn] > lowest_like and rxn not in rxn_ids:
        high_likelihoods.append(rxn)

missing = []
old_model = []
for rxn in rxn_ids:
    if rxn.startswith('rxn'):
        try:
            old_model.append(likelihoods[rxn])
        except:
            missing.append(rxn)
            pass
avg_like = np.mean(old_model)
print(avg_like)

new_model = []
for rxn in high_likelihoods:
    new_model.append(likelihoods[rxn])
for rxn in rxn_ids:
    try:
        new_model.append(likelihoods[rxn])
    except:
        pass
avg_like = np.mean(new_model)
print(avg_like)

In [None]:
print(likelihoods.data['rxn00786_c']['gpr'])
print(likelihoods.data['rxn00786_c']['probability'])
print(likelihoods.data['rxn00786_c']['complexes'])

In [None]:
likelihoods.data['rxn00783_c']

In [None]:
model.reactions.get_by_id('rxn00786_c').gene_reaction_rule

In [None]:
# Are there reactions that have a zero likelihood in the model that 
# are not in the universal reaction bag?
rxn_ids = [reaction.id for reaction in model.reactions]
missing = []
old_model = []
rxn_id_zero_like = []
for rxn in rxn_ids:
    if rxn.startswith('rxn'):
        try:
            old_model.append(likelihoods[rxn])
            if likelihoods[rxn] == 0.0:
                rxn_id_zero_like.append(rxn)
        except:
            missing.append(rxn)
            pass
len(rxn_id_zero_like)

missing_rxns = []
for rxn in rxn_id_zero_like:
    if rxn not in [reaction.id for reaction in universal.reactions]:
        missing_rxns.append(rxn)
len(missing_rxns)

# No

In [None]:
rxn_objs = []
for rxn in rxn_id_zero_like:
#     print(model.reactions.get_by_id(rxn).gene_reaction_rule)
    rxn_objs.append(model.reactions.get_by_id(rxn))
    
# overlap_rxns = [rxn.id for rxn in model.reactions if rxn.id in [x.id for x in universal.reactions]]
model.reactions.get_by_id(rxn_id_zero_like[0])

In [None]:
# Shouldn't have gpr
universal = cobra.io.load_json_model("../Data/GramPosUni.json")
universal.reactions.get_by_id(rxn_id_zero_like[0])

In [None]:
# Shouldn't be present
universal.remove_reactions(rxn_id_zero_like)
universal.reactions.get_by_id(rxn_id_zero_like[0])

In [None]:
# Should have same gpr as reaction object from model above (it does)
universal.add_reactions(rxn_objs)
universal.reactions.get_by_id(rxn_id_zero_like[0])

In [None]:
# Final version to implement in other script

# rxn_ids = [reaction.id for reaction in model.reactions]
# rxn_id_zero_like = []
# for rxn in rxn_ids:
#     if rxn.startswith('rxn'):
#         try:
#             if likelihoods[rxn] == 0.0:
#                 rxn_id_zero_like.append(rxn)
#         except:
#             pass
# rxn_objs = []
# for rxn in rxn_id_zero_like:
#     rxn_objs.append(model.reactions.get_by_id(rxn))
# universal.remove_reactions(rxn_id_zero_like)
# universal.add_reactions(rxn_objs)
# model.remove_reactions(rxn_id_zero_like)

In [None]:
# How many reactions in the model have a non-zero likelihood and are missing a GPR?
rxn_ids = [reaction.id for reaction in universal.reactions]
rxns_of_interest = []
for rxn in rxn_ids:
    if rxn.startswith('rxn'):
        try:
             if likelihoods[rxn] > 0.0:
#             if model.reactions.get_by_id(rxn).gene_reaction_rule == '':
                if likelihoods.data[rxn]['gpr'] != '':
                    rxns_of_interest.append(rxn)
        except:
            pass

len(set(rxns_of_interest))

# n = 803
        
# print(likelihoods.data[rxn_ids[n]]['gpr'])
# print(likelihoods.data[rxn_ids[n]]['gpr'] != '')

# print(model.reactions.get_by_id(rxn_ids[n]).gene_reaction_rule)
# print(model.reactions.get_by_id(rxn_ids[n]).gene_reaction_rule == '')


In [None]:
rxn_ids = [reaction.id for reaction in model.reactions]
rxn_total = []
for rxn in rxn_ids:
    if rxn.startswith('rxn'):
        rxn_total.append(rxn)

len(set(rxn_total))

In [None]:
# How many reactions have a likelihood but are not included in the model already?
c = set(rxns_of_interest).difference(set(rxn_total))

len(c)

In [None]:
met_ids = [met.id for met in model.metabolites]

len(met_ids)

In [None]:
all_mets = []

aa = 'cpd00117_c'

for met in universal.metabolites:
    if (met.id.endswith('_c')) & (met.id != aa):
        all_mets.append(met.id)
len(all_mets)

In [None]:
all_mets

In [None]:
c = set(met_ids).difference(set(met_ids_uni))

len(c)

In [22]:
genome_id = '220668.9'
model = cobra.io.read_sbml_model('../gap_models/'+ genome_id +'.xml')
likelihoods = pickle.load(open('../likelihoods/'+ genome_id +'.probs'))
universal = cobra.io.load_json_model("../Data/GramPosUni.json")
# How many reactions have gprs in the universal model?

for rxn in universal.reactions[0:1]:
    print(rxn.gene_reaction_rule)





In [20]:
print(universal.reactions[0].gene_reaction_rule)




In [99]:
# Identify transport reactions (for any number compartments)
def findDiffusionRxns(model):
    transporters = []
    compartments = set(list(model.compartments))
    if len(compartments) == 1:
        raise Exception('Model only has one compartment!')
    
    for reaction in model.reactions:
        reactant_baseID = set([x.id.split('_')[0] for x in reaction.reactants])
        product_baseID = set([x.id.split('_')[0] for x in reaction.products])
        if len(reaction.metabolites) == 2 and reactant_baseID == product_baseID:
            transporters.append(reaction.id)

    return(transporters)

In [110]:
# Remove all reactions with zero likelihood and insert them into universal with GPRs
# Also add GPRs for reactions with non-zero likelihood to Universal model

genome_id = '220668.9'
model = cobra.io.read_sbml_model('../gap_models/'+ genome_id +'.xml')
likelihoods = pickle.load(open('../likelihoods/'+ genome_id +'.probs'))
universal = cobra.io.load_json_model("../Data/GramPosUni.json")

# Ensure free water exchange
model.reactions.get_by_id('rxn05319_c').name = "Water transport"
model.reactions.get_by_id('rxn05319_c').bounds = (-1000., 1000.)

# Remove 0-likelihood reactions from model and move to universal
rxn_ids = [reaction.id for reaction in model.reactions]
rxn_id_zero_like = []
for rxn in rxn_ids:
    if rxn.startswith('rxn'):
        try:
            if likelihoods[rxn] == 0.0:
                rxn_id_zero_like.append(rxn)
        except:
            pass

rxn_objs = []
for rxn in rxn_id_zero_like:
    rxn_objs.append(model.reactions.get_by_id(rxn))

model.remove_reactions(rxn_objs)
universal.remove_reactions(rxn_id_zero_like)
universal.add_reactions(rxn_objs)

# Add GPRs from likelihood dict to universal reactions that are still missing them
rxn_ids = [reaction.id for reaction in universal.reactions]
rxns_to_fix = []
for rxn in rxn_ids:
    if rxn.startswith('rxn'):
        try: # This catches all of the reactions that have no likelihood value.
            if likelihoods[rxn] > 0.0:
                if universal.reactions.get_by_id(rxn).gene_reaction_rule == '':
                    if likelihoods.data[rxn]['gpr'] != '':
                        rxns_to_fix.append(rxn)
                        universal.reactions.get_by_id(rxn).gene_reaction_rule = likelihoods.data[rxn]['gpr']
        except:
            pass

# Spontaneous diffusion based reactions
diff_rxns = findDiffusionRxns(universal)

# Add dummy GPRs to rest of the reactions that should have genes so pFBA treats them all the same.
rxns_with_tempGPR = []
for rxn in universal.reactions:
    if rxn.id.startswith('rxn') and rxn.id not in diff_rxns and rxn.gene_reaction_rule == '':
        universal.reactions.get_by_id(rxn.id).gene_reaction_rule = 'Temp_GPR'
        rxns_with_tempGPR.append(rxn.id)


0,1
Reaction identifier,rxn00594_c
Name,"Anthranilate,NADH:oxygen oxidoreductase (1,2-hydroxylating, deaminating, decarboxylating)"
Memory address,0x07fd348d85e90
Stoichiometry,cpd00004_c + cpd00007_c + 3.0 cpd00067_c + cpd00093_c <=> cpd00003_c + cpd00011_c + cpd00013_c + cpd00077_c  NADH + O2 + 3.0 H+ + Anthranilate <=> NAD + CO2 + NH3 + Catechol
GPR,Temp_GPR
Lower bound,-1000.0
Upper bound,1000.0


In [47]:
# How many reactions in the model don't have a reaction likelihood at all?
genome_id = '220668.9'
model = cobra.io.read_sbml_model('../gap_models/'+ genome_id +'.xml')
likelihoods = pickle.load(open('../likelihoods/'+ genome_id +'.probs'))

rxn_ids = [reaction.id for reaction in model.reactions]
rxn_id_no_like = []
for rxn in rxn_ids:
    if rxn.startswith('rxn'):
        try:
            likelihoods[rxn]
        except:
            rxn_id_no_like.append(rxn)
            pass
print(len(rxn_id_no_like))

11


In [48]:
rxn_id_no_like

['rxn02374_c',
 'rxn05319_c',
 'rxn04457_c',
 'rxn02916_c',
 'rxn03012_c',
 'rxn10571_c',
 'rxn04132_c',
 'rxn05195_c',
 'rxn05468_c',
 'rxn04133_c',
 'rxn05467_c']

In [71]:
# Find diffusion reactions in universal model
universal = cobra.io.load_json_model("../Data/GramPosUni.json")

transporters = findDiffusionRxns(universal)

In [98]:
universal.reactions.get_by_id(transporters[218])

0,1
Reaction identifier,rxn11326_c
Name,4-aminobutyrate transport via diffusion (extracellular to periplasm)
Memory address,0x07fd3536b2a90
Stoichiometry,cpd00281_c <=> cpd00281_e  GABA <=> GABA
GPR,
Lower bound,-1000.0
Upper bound,1000.0


In [None]:
# Find transporters that don't use ATP
universal = cobra.io.load_json_model("../Data/GramPosUni.json")
transporters = findTransports(universal)
rxns_to_remove = []
for rxn_id in transporters:
    mets = universal.reactions.get_by_id(str(rxn_id)).metabolites
    for met in mets:
        if met.id == 'cpd00002_c':
            rxns_to_remove.append(rxn_id)
nonATP_transporters = set(transporters).difference(set(rxns_to_remove))
len(nonATP_transporters)

In [None]:
def quantumGapFill(model, universal=None, objective='bio1', lb=0.05):
    
    # Grab overlapping reaction IDs from original model and replace in universal bag
    # Resets flux penalties for those reactions
    overlap_rxns = [rxn.id for rxn in model.reactions if rxn.id in [x.id for x in universal.reactions]]
    universal.remove_reactions(overlap_rxns)
    universal.add_reactions(list(model.reactions))
    
    # Set new objective and lower bound
    universal.reactions.get_by_id('biomass').lower_bound = lb
    universal.objective = universal.reactions.get_by_id('biomass')
    
    # Optimize and retreive new reactions
    solution = universal.optimize()
    solution = solution.fluxes.to_frame()
    active = solution.loc[(abs(solution['fluxes'])) > 1e-6]
    active = [str(x) for x in list(active.index)]
    rxns_to_be_added = [universal.reactions.get_by_id(rxn) for rxn in active if not rxn in [x.id for x in model.reactions]]
    
    # Add new rections to original model
    model.add_reactions(rxns_to_be_added)
    
    return model

# Identify transport reactions (for any number compartments)
def findTransports(model):
    transporters = []
    compartments = set(list(model.compartments))
    if len(compartments) == 1:
        raise Exception('Model only has one compartment!')
    
    for reaction in model.reactions:
        
        reactant_compartments = set([x.compartment for x in reaction.reactants])
        product_compartments = set([x.compartment for x in reaction.products])
        reactant_baseID = set([x.id.split('_')[0] for x in reaction.reactants])
        product_baseID = set([x.id.split('_')[0] for x in reaction.products])
        
        if reactant_compartments == product_compartments and reactant_baseID != product_baseID:
            continue
        elif bool(compartments & reactant_compartments) == True and bool(compartments & product_compartments) == True:
            transporters.append(reaction.id)
        
    return(transporters)   

def missingRxns(model, extracellular=['e','Extracellular']):

    transporters = set(findTransports(model))
    exchanges = set([x.id for x in model.exchanges])
    
    missing_exchanges = []
    missing_transports = []
    
    for metabolite in model.metabolites:
        if not metabolite.compartment in extracellular or metabolite.id.split('_')[1] != 'e':
            continue

        curr_rxns = set([x.id for x in list(metabolite.reactions)])
        
        if bool(curr_rxns & transporters) == False:
            missing_transports.append(metabolite.id)
        if bool(curr_rxns & exchanges) == False:
            missing_exchanges.append(metabolite.id)
    
    if len(missing_transports) != 0:
        print(str(len(missing_transports)) + ' extracellular metabolites are missing transport reactions')
    if len(missing_exchanges) != 0:
        print(str(len(missing_exchanges)) + ' extracellular metabolites are missing exchange reactions')
        
    return missing_transports, missing_exchanges