In [2]:
from __future__ import print_function

import cobra
import cobra.test
import mackinac
import numpy as np
import csv
import glob
import pickle
import pandas as pd

In [3]:
mackinac.get_token('tjmoutinho')

patric password: ········


u'tjmoutinho@patricbrc.org'

In [6]:
# Read in list of all genomes on PATRIC and list of existing models in folder
with open('../Data/genome_ids_all.csv') as csvfile:
    genome_ids_list = []
    for line in csvfile:
        genome_ids_list.append(line.strip())
# genome_ids_list
models = glob.glob('../models/*.xml')
models = [x.replace("../models/","").replace(".xml","") for x in models]
# len(genome_ids_list)

In [3]:
len(genome_ids_list)

1507

In [4]:
# Check for duplicates
len(models) == len(set(models))

True

In [None]:
# Download additional models and convert to cobra model, then save to sbml
failed_genome_index = []
for j in range(10):
    for genome_id in genome_ids_list:
        if not genome_id in models:
            try:
                model_file_name = "../models/%s.xml" % (genome_id)
                mackinac.create_patric_model(genome_id,genome_id)
                model = mackinac.create_cobra_model_from_patric_model(genome_id)
                cobra.io.write_sbml_model(model, model_file_name)
                genome_ids_list.remove(genome_id)
            except:
                failed_genome_index.append(genome_id)
                pass

In [None]:
# Download fasta files

# Check for existing fasta files by name
fasta_file_paths = glob.glob('../fastas/*.faa')
fasta_genome_ids = [x.replace("../fastas/","").replace(".faa","") for x in fasta_file_paths]
# Loop to make all missing Fasta Files
failed_genome_fasta_index = []
for genome_id in genome_ids_list:
    if not genome_id in fasta_genome_ids:
        try:
            feature_list = mackinac.get_genome_features(genome_id, annotation='PATRIC')
            file_name = "../fastas/%s.faa" % (genome_id)
            feats = mackinac.features_to_protein_fasta_file(feature_list, file_name)
            #print("Number of Features in genome " + str(x) + ": " + str(feats))
        except:
            failed_genome_fasta_index.append(genome_id)
            pass

In [None]:
# Download features files

# Check for existing fasta files by name
feature_file_paths = glob.glob('../features/*.feats')
feature_genome_ids = [x.replace("../features/","").replace(".feats","") for x in feature_file_paths]
# Loop to make all missing Fasta Files
missing_feat_files = []
for genome_id in genome_ids_list:
    if not genome_id in feature_genome_ids:
        try:
            feature_list = mackinac.get_genome_features(genome_id, annotation='PATRIC')
            file_name = "../features/%s.feats" % (genome_id)
            pickle.dump(feature_list, open(file_name, "wb"))
        except:
            missing_feat_files.append(genome_id)
            pass

In [None]:
# Download Gapfilled Reaction Objects

# Check for existing gapfill object files by name
gf_file_paths = glob.glob('../gapfilled/*.gf')
existing_gf_ids = [x.replace("../gapfilled/","").replace(".gf","") for x in gf_file_paths]
# Loop to make all missing Fasta Files
failed_genome_gf_index = []
for genome_id in genome_ids_list:
    if not genome_id in existing_gf_ids:
        try:
            gf_solutions = mackinac.get_patric_gapfill_solutions(genome_id)
            file_name = "../gapfilled/%s.gf" % (genome_id)
            pickle.dump(gf_solutions, open(file_name, "wb"))
        except:
            failed_genome_gf_index.append(genome_id)
            pass

In [None]:
failed_genome_gf_index

In [2]:
mackinac.list_workspace_objects('/chenry/public/modelsupport/templates', print_output=True)

Contents of /chenry/public/modelsupport/templates:
-rr chenry    	  27225828	2016-06-23T15:25:46Z	modeltemplate	/chenry/public/modelsupport/templates/Core.modeltemplate
-rr chenry    	  19776723	2016-06-27T21:31:36Z	modeltemplate	/chenry/public/modelsupport/templates/newplant.modeltemplate
-rr chenry    	  73533708	2016-09-08T07:17:41Z	modeltemplate	/chenry/public/modelsupport/templates/FullBiomass.modeltemplate
-rr chenry    	  26559014	2016-11-16T06:52:49Z	modeltemplate	/chenry/public/modelsupport/templates/GramNegative.modeltemplate
-rr chenry    	  26564644	2016-11-16T06:55:11Z	modeltemplate	/chenry/public/modelsupport/templates/GramPositive.modeltemplate
-rr seaver    	  20808794	2017-04-14T19:34:42Z	modeltemplate	/chenry/public/modelsupport/templates/plant.modeltemplate
-rr chenry    	  26564644	2017-08-10T15:30:06Z	modeltemplate	/chenry/public/modelsupport/templates/GramPosModelTemplate
-rr chenry    	  26559014	2017-08-10T15:30:25Z	modeltemplate	/chenry/public/modelsupport/temp

In [6]:
universal = mackinac.create_universal_model('/chenry/public/modelsupport/templates/GramPosModelTemplate')

In [7]:
universal

0,1
Name,GramPositive.modeltemplate
Memory address,0x07f2fcc7c0110
Number of metabolites,6884
Number of reactions,8657
Objective expression,0
Compartments,"c, e"


In [79]:
cobra.io.save_json_model(universal, "../Data/GramPosUni.json")

In [72]:
neg = mackinac.create_universal_model('/chenry/public/modelsupport/templates/GramNegModelTemplate')

In [73]:
neg

0,1
Name,GramNegative.modeltemplate
Memory address,0x07f2fcba4c1d0
Number of metabolites,6884
Number of reactions,8657
Objective expression,0
Compartments,"c, e"


In [8]:
core = mackinac.create_universal_model('/chenry/public/modelsupport/templates/CoreModelTemplate')

In [56]:
core
# type(core)

0,1
Name,Core
Memory address,0x07f2fcc344d90
Number of metabolites,6896
Number of reactions,8671
Objective expression,0
Compartments,"c, e"


In [64]:
rxns = core.reactions
# str(rxns[0].id)

In [61]:
core_ids_set = set()
for rxn in core.reactions:
    core_ids_set |= (set([str(rxn.id)]))
    
pos_ids_set = set()
for rxn in universal.reactions:
    pos_ids_set |= (set([str(rxn.id)]))
    
diff_ids = core_ids_set ^ pos_ids_set
diff_ids

{'rxn14412_c',
 'rxn14414_c',
 'rxn14416_c',
 'rxn14418_c',
 'rxn14419_c',
 'rxn14420_c',
 'rxn14421_c',
 'rxn14422_c',
 'rxn14423_c',
 'rxn14424_c',
 'rxn14425_c',
 'rxn14426_c',
 'rxn14427_c',
 'rxn14428_c'}