# Copying all AGORA genomes into KBase narrative

In [None]:
ws_client = util.ws_client()
workspace_id = 19217
done = False
skip = 0
full_output = {}
start_after = None
while not done:
    input = {
        "ids":[workspace_id],
        "includeMetadata":1,
        "type":"KBaseGenomes.Genome"
    }
    if start_after:
        input["startafter"] = start_after
    output = ws_client.list_objects(input)
    start_after = str(workspace_id)+"/"+str(output[-1][0])
    for item in output:
        full_output[item[1]] = item
    print(len(output),"/",len(full_output),start_after)
    if len(output) < 5000:
        done = True
with open('genomelist.txt', 'r') as file:
    genomelist = [line.strip() for line in file.readlines()]
output = ws_client.list_objects({"ids":[178418]})
model_hash = {}
for item in output:
    model_hash[item[1]] = item
for item in genomelist:
    if item in full_output and item not in model_hash:
        print(item)
        ws_client.copy_object({
            "to":{"wsid":178418,"name":item},
            "from":{"wsid":19217,"name":item}
        })
    elif item not in full_output:
        print("Missing",item)

# compute FVA for all individual AGORA2 models that load (~6/7 of the models)

In [None]:
from cobra.flux_analysis.variability import flux_variability_analysis
from cobra.io import read_sbml_model
from glob import glob
from os import path

def remove_groups(model_path):
    # print(f"{model_path}\t\t\t\t\t", end="\r")
    try:
        model = read_sbml_model(model_path)
        # if path.exists(f"AGORA2_fva/{model.id}_fva.csv"):  pass
        # else:
        fva_csv = flux_variability_analysis(model)
        fva_csv.to_csv(f"AGORA2_fva/{model.id}_fva.csv")
    except:  print("\t\t", model_path)
    

from datetime import datetime  ;  from multiprocess import Pool
pool = Pool(6)
pool.map(remove_groups, [model_path for model_path in [model_path for model_path in glob("AGORA2/*.xml")] 
                         if not path.exists(f"AGORA2_fva/M_{path.split(model_path)[1].replace('.xml', '')}_fva.csv")])

In [None]:
from cobra.flux_analysis.variability import flux_variability_analysis
from cobra.io import read_sbml_model
from glob import glob
from os import path

for model_path in glob("AGORA2/*.xml"):
    if path.exists(path.join("AGORA2_fva", f"{path.split(model_path)[1]}_fva.csv")):  continue
    print(f"{model_path}\t\t\t\t\t", end="\r")
    try:
        model = read_sbml_model(model_path)
        fva_csv = flux_variability_analysis(model)
        fva_csv.to_csv(f"AGORA2_fva/{model.id}_fva.csv")
    except Exception as e:   print("\t\t", model_path)


# parse metabolite production/consumption from the FVA data

# create a superset AGORA2 model

In [None]:
from cobra.io import read_sbml_model
from cobra import Model
from glob import glob

reactions, metabolites = [], []
modelRxns, modelMets = [], []
rxnIDs, metIDs = set(), set()
for i, path in enumerate(glob("../../AGORA2_models/*.xml")):
    model = read_sbml_model(path)
    for rxn in model.reactions:
        if not rxn.id in rxnIDs:   reactions.append(rxn)
        rxn.id += f"_{i}"
        modelRxns.append(rxn)
    for met in model.metabolites:
        if met.id in metIDs: continue
        metabolites.append(met)
        met.id += f"_{i}"
        modelMets.append(met)
agora2Model = Model(id="agora2Model", name="agora2_Model")
agora2Model.add_reactions(reactions)
agora2Model.add_metabolites(metabolites)

# assess convert all IDs

In [None]:
from json import load

baseConvert = load(open("../../CommScores/commscores/data/BiGG_to_MSID.json", 'r'))
cpdNames = load(open("../../CommScores/commscores/data/compoundNames.json", 'r'))
addedVals = load(open("../../metabolomics data/BiGG_metabolite_mapping.json", 'r'))

baseConvert.update()

In [2]:
from collections import Counter
from json import load

# open the parsed ModelSEED Database reactions and compounds content
with open("/Users/afreiburger/Documents/CommScores/commscores/data/compound_Xrefs.json", "r") as cpdXRefs:
    compounds_cross_references = load(cpdXRefs)
with open("/Users/afreiburger/Documents/CommScores/commscores/data/compoundNames.json", "r") as cpdNames:
    compoundNames = load(cpdNames)
baseConvert = load(open("../../CommScores/commscores/data/BiGG_to_MSID.json", 'r'))

from cobra.io import read_sbml_model
from glob import glob

rxnIDs, metIDs = set(), set()
missedMatches = {}
for i, path in enumerate(glob("../../AGORA2_models/*.xml")):
    model = read_sbml_model(path)
    for met in model.metabolites:
        if met.id not in baseConvert:
            missedMatches[met.id] = met.name

print(Counter(missedMatches))