In [33]:
import pickle
import numpy as np
import matplotlib.pyplot as plt
import glob
import time
import sys
import cobra

In [4]:
# Create TSV for mean/median genome likelihood values
t = time.time()

# genome_ids = glob.glob('../metabolic_output/*.data')
# genome_ids = [x.replace("../metabolic_output/","").replace(".data","") for x in genome_ids]
genome_ids = pickle.load(open("../Data/Filtered_lacto_genome_ids.data", "rb"))

# sys.stdout.write('Loading in model...')

output_file = '../Data/genome_likelihoods.tsv'

output_file = open(output_file, 'w')
header = 'genomes\t' + 'mean\t' + 'median\n'
output_file.write(header)

for genome_id in genome_ids:
    try:
        model = cobra.io.read_sbml_model('../gap_models/'+ genome_id +'.xml')
        likelihoods = pickle.load(open('../likelihoods/'+ genome_id +'.probs'))
        model.reactions.get_by_id('rxn05319_c').name = "Water transport"
        model.reactions.get_by_id('rxn05319_c').bounds = (-1000., 1000.)
        ### Set Up Model: remove low likelihood reactions
        low_like_model = []
        for rxn in model.reactions:
            if rxn.id.startswith('rxn'):
                try:
                    if likelihoods[rxn.id] <= 0.1:
                        low_like_model.append(rxn.id)
                except:
                    pass
        model_rxns_to_remove = [model.reactions.get_by_id(rxn) for rxn in low_like_model]
        model.remove_reactions(model_rxns_to_remove)
        # Generate list of likelihoods for all reactions in model
        model_rxn_likes = []
        for rxn in model.reactions:
            try:
                model_rxn_likes.append(likelihoods[rxn.id])
            except:
                pass
        # Write info to tsv file
        output_file.write(genome_id + '\t')
        output_file.write(str(np.mean(model_rxn_likes)) + '\t')
        output_file.write(str(np.median(model_rxn_likes)) + '\n')
    except:
        continue

print(str(round(time.time() - t)) + 'seconds to complete')

2059.0seconds to complete


In [5]:
# Create TSV for genome size
t = time.time()

# genome_ids = glob.glob('../metabolic_output/*.data')
# genome_ids = [x.replace("../metabolic_output/","").replace(".data","") for x in genome_ids]
genome_ids = pickle.load(open("../Data/Filtered_lacto_genome_ids.data", "rb"))

# sys.stdout.write('Loading in model...')

output_file = '../Data/genome_sizes.tsv'  

output_file = open(output_file, 'w')
header = 'genomes\t' + 'size\t' + 'placeholder\n'
output_file.write(header)

for genome_id in genome_ids:
    try:
        model = cobra.io.read_sbml_model('../gap_models/'+ genome_id +'.xml')
        likelihoods = pickle.load(open('../likelihoods/'+ genome_id +'.probs'))
        model.reactions.get_by_id('rxn05319_c').name = "Water transport"
        model.reactions.get_by_id('rxn05319_c').bounds = (-1000., 1000.)
        ### Set Up Model: remove low likelihood reactions
        low_like_model = []
        for rxn in model.reactions:
            if rxn.id.startswith('rxn'):
                try:
                    if likelihoods[rxn.id] <= 0.1:
                        low_like_model.append(rxn.id)
                except:
                    pass
        model_rxns_to_remove = [model.reactions.get_by_id(rxn) for rxn in low_like_model]
        model.remove_reactions(model_rxns_to_remove)

        # Write info to tsv file
        output_file.write(genome_id + '\t')
        output_file.write(str(len(model.reactions)) + '\t')
        output_file.write('0\n')
    except:
        continue

print(str(round(time.time() - t)) + 'seconds to complete')

2080.0seconds to complete


In [32]:
# Stats by reaction
t = time.time()
genome_ids = pickle.load(open("../Data/Filtered_lacto_genome_ids.data", "rb"))

output_file = '../Data/reaction_likelihoods_all_genomes.tsv'  

output_file = open(output_file, 'w')
likelihoods = pickle.load(open('../likelihoods_py3/'+ genome_ids[0] +'.probs'))
rxn_list = likelihoods.keys()

header = 'genome_id\t'
for n in range(0,len(likelihoods.keys())):
    header = header + str(likelihoods.keys()[n]) + '\t'
header = header + 'placeholder\n'
output_file.write(header)

for genome_id in genome_ids:
    if not genome_id == '1590.482':
        likelihoods = pickle.load(open('../likelihoods_py3/'+ genome_id +'.probs'))
        output_file.write(genome_id + '\t')
        for rxn in rxn_list:
            output_file.write(str(likelihoods[rxn]) + '\t')
        output_file.write('0\n')

print(str(round(time.time() - t)) + 'seconds to complete')

21.0seconds to complete


In [30]:
# one genome does not have reaction likelihoods
for genome_id in genome_ids:
    if not genome_id == '1590.482':
        try:
            likelihoods = pickle.load(open('../likelihoods_py3/'+ genome_id +'.probs'))
        except:
            print(genome_id)

In [None]:
universal = cobra.io.load_json_model("../Data/GramPosUni.json")

In [50]:
# Look into reactions with greatest median values to find examples to use

# read file
with open('../Data/reaction_medians.csv') as csvfile:
    reaction_medians_w_names = []
    for line in csvfile:
        line = line.strip()
#         print(line)
#         try:
        rxn_id = line.split(',')[1].strip('"')
        like = line.split(',')[2]
        try:
            reaction_medians_w_names.append([rxn_id,like,universal.reactions.get_by_id(rxn_id).name])
        except:
            print(rxn_id)
#         except:
#             continue
reaction_medians_w_names


rxn_id
placeholder


[['rxn10336_c', '0.995880714273', u'stearoyl-cardiolipin synthase'],
 ['rxn10337_c', '0.995880714273', u'isoheptadecanoyl-cardiolipin synthase'],
 ['rxn10338_c',
  '0.995880714273',
  u'anteisoheptadecanoyl-cardiolipin synthase'],
 ['rxn10335_c', '0.995880714273', u'myristoyl-cardiolipin synthase'],
 ['rxn10342_c', '0.995880714273', u'isohexadecanoyl-cardiolipin synthase'],
 ['rxn10340_c', '0.995880714273', u'isopentadecanoyl-cardiolipin synthase'],
 ['rxn10334_c', '0.995880714273', u'palmitoyl-cardiolipin synthase'],
 ['rxn10339_c', '0.995880714273', u'isotetradecanoyl-cardiolipin synthase'],
 ['rxn10341_c',
  '0.995880714273',
  u'anteisopentadecanoyl-cardiolipin synthase'],
 ['rxn10113_c',
  '0.993026664715',
  u'cytochrome oxidase bo3 (ubiquinol-8: 2.5 protons)'],
 ['rxn10806_c',
  '0.993026664715',
  u'cytochrome oxidase bd (menaquinol-8: 2 protons) (periplasm)'],
 ['rxn05581_c', '0.9677412644275', u'RXN0-1683.ce.maizeexp.GLYCEROL_GLYCEROL'],
 ['rxn03135_c', '0.9670273094435', u'R

In [44]:
universal.reactions.get_by_id('rxn10338_c').name

u'anteisoheptadecanoyl-cardiolipin synthase'