In [71]:
import cobra
print cobra.__version__
from cobra.flux_analysis.gapfilling import GapFiller
import glob
import time
import pickle
import copy

0.13.0


In [18]:
# Identify gapfilled reactions
def findGapFilled(model):
    gapfilled = []
    for index in model.reactions:
        if not index.id in ['bio1']:
            if len(list(index.genes)) == 0:
                if not index in model.boundary:
                    gapfilled.append(index.id)

#     if len(gapfilled) > 0:
#         print(str(len(gapfilled)) + ' reactions not associated with genes')
    
    return gapfilled

# Remove gapfilled rxns from the PATRIC model
def pruneGaps(model, orphans, gap_object):
    model_temp = copy.deepcopy(model)
    gaps_total = set()
    for index in gap_object:
        gaps = [x for x in index['reactions']]
        gaps_total |= set(gaps)

    model_gaps = []
    for x in gaps_total:
        try:
            model_gaps.append(model_temp.reactions.get_by_id(x).id)
        except:
            pass

    remove = [x for x in model_gaps if x in orphans]    

#     print('Reactions removed: ' + str(len(remove)))
    model_temp.remove_reactions(remove)

    return(model_temp)

In [28]:
universal = cobra.io.load_json_model("../Data/GramPosUni.json")

In [3]:
## Remove all gapfilled reactions from models and save new models
model_paths = glob.glob('../models/*.xml')
genome_ids = [x.replace("../models/","").replace(".xml","") for x in model_paths]
model_paths = glob.glob('../gap_models/*.xml')
gap_models_ids = [x.replace("../gap_models/","").replace(".xml","") for x in model_paths]

failed_index = []
for genome_id in genome_ids:
    if not genome_id in gap_models_ids:
        try:
            file_name = "../models/%s.xml" % (genome_id)
            model = cobra.io.read_sbml_model(file_name)
            orphans = findGapFilled(model)
            pickle_path = "../gapfilled/%s.gf" % (genome_id)
            gap_object = pickle.load(open(pickle_path, "rb"))
            pruned_model = pruneGaps(model, orphans, gap_object)
            write_file_name = "../gap_models/%s.xml" % (genome_id)
            cobra.io.write_sbml_model(pruned_model, write_file_name)
#             genome_ids.remove(genome_id)
        except:
            failed_index.append(genome_id)
            pass

In [38]:
solution = model.optimize()
print(solution)

<Solution 58.405 at 0x7f8028b38650>


In [39]:
solution = pruned_model.optimize()
print(solution)

<Solution 0.000 at 0x7f802a1c5550>


In [53]:
# Test Gapfill using just reactions removed
genome_id = '220668.9'

file_name = "../models/%s.xml" % (genome_id)
model = cobra.io.read_sbml_model(file_name)
orphans = findGapFilled(model)

pickle_path = "../gapfilled/%s.gf" % (genome_id)
gap_object = pickle.load(open(pickle_path, "rb"))
pruned_model = pruneGaps(model, orphans, gap_object)

loaded_model_path = "../gap_models/%s.xml" % (genome_id)
loaded_model = cobra.io.read_sbml_model(loaded_model_path)

solution = model.optimize()
print(solution)

solution = pruned_model.optimize()
print(solution)

solution = loaded_model.optimize()
print(solution)

Reactions removed: 109
<Solution 68.440 at 0x7fd6ecbd1710>
<Solution 0.000 at 0x7fd6f3a75ed0>
<Solution 0.000 at 0x7fd6f363ee10>


In [23]:
print(len(orphans))

120


In [54]:
gaps_total = set()
for index in gap_object:
    gaps = [x for x in index['reactions']]
    gaps_total |= set(gaps)

print(len(gaps_total))

model_gaps = []
for x in gaps_total:
    try:
        model_gaps.append(model.reactions.get_by_id(x).id)
    except:
        pass

remove = [x for x in model_gaps if x in orphans]    

print('Reactions removed: ' + str(len(remove)))

113
Reactions removed: 109


In [55]:
set(remove) ^ set(orphans)

{'rxn02374_c',
 'rxn02916_c',
 'rxn03012_c',
 'rxn04132_c',
 'rxn04133_c',
 'rxn04457_c',
 'rxn05195_c',
 'rxn05319_c',
 'rxn05467_c',
 'rxn05468_c',
 'rxn10571_c'}

In [56]:
# Are all of the removed reactions in the universal model?
uni_rxns = set([reaction.id for reaction in universal.reactions])
set(remove).issubset(uni_rxns)

True

In [57]:
# Create new mini-universal with just the removed reactions to test to be sure that gapfill is working
mini_uni = cobra.Model("mini_universal_reactions")
for i in remove:
    reaction = model.reactions.get_by_id(i)
    mini_uni.add_reaction(reaction.copy())
    model.remove_reactions([reaction])

model.optimize().objective_value

0.0

In [58]:
model

0,1
Name,220668.9
Memory address,0x07fd6ed8a9890
Number of metabolites,1211
Number of reactions,1064
Objective expression,-1.0*bio1_reverse_b18f7 + 1.0*bio1
Compartments,"Cytosol, Extracellular"


In [72]:
gapfiller = GapFiller(model, mini_uni, demand_reactions=False, integer_threshold=1e-9)
solution = gapfiller.fill(iterations=1)
# solution = gapfill(model, mini_uni, demand_reactions=False)
for reaction in solution[0]:
    print(reaction.id)

rxn10254_c
rxn02269_c
rxn09142_c
rxn10293_c
rxn10273_c
rxn00851_c
rxn05376_c
rxn05168_c
rxn08333_c
rxn09135_c
rxn10338_c
rxn05386_c
rxn05365_c
rxn12225_c
rxn10275_c
rxn05380_c
rxn03538_c
rxn10473_c
rxn10181_c
rxn02897_c
rxn02286_c
rxn05029_c
rxn10161_c
rxn05144_c
rxn10336_c
rxn05397_c
rxn05394_c
rxn10233_c
rxn10094_c
rxn02011_c
rxn05360_c
rxn05406_c
rxn02303_c
rxn10227_c
rxn02476_c
rxn05372_c
rxn10310_c
rxn10205_c
rxn05393_c
rxn05389_c
rxn05175_c
rxn03395_c
rxn05385_c
rxn03537_c
rxn10226_c
rxn05364_c
rxn10309_c
rxn03393_c
rxn10283_c
rxn05179_c
rxn08040_c
rxn00693_c
rxn13661_c
rxn10337_c
rxn05377_c
rxn12224_c
rxn05381_c
rxn05368_c
rxn05398_c
rxn05267_c
rxn10274_c
rxn10215_c
rxn10266_c
rxn10292_c
rxn02056_c
rxn02304_c
rxn10214_c
rxn11946_c
rxn10197_c
rxn00178_c
rxn05373_c
rxn02287_c
rxn02774_c
rxn05390_c
rxn10232_c
rxn04139_c
rxn10180_c
rxn00961_c
rxn06022_c
rxn05148_c
rxn05402_c
rxn10265_c
rxn05405_c
rxn05361_c
rxn12008_c
rxn10206_c
rxn05401_c
rxn05255_c
rxn03150_c
rxn03164_c
rxn03397_c

In [None]:
t = time.time()
# model = cobra.io.read_sbml_model('../models/1051650.8.xml')
with model as model_x:
    gaps = findGapFilled(model_x)
    for rxn in gaps:
        model_x.remove_reactions([rxn])
#     print(model_x.optimize().objective_value)
    solution = gapfill(model, universal, demand_reactions=False)
    for reaction in solution[0]:
        print(reaction.id)
        
elapsed = time.time() - t
print "Time to complete:", elapsed/60, "mins"

133 reactions not associated with genes


In [None]:
solution