# TODO - assemble a representative pangenome model for all models that map to a 16S sequence

In [1]:
# determine the frequency of reactions in all models that are mapped to a single 16S genome
# filter reactions that are below a frequency threshold

In [1]:
from json import load
from pandas import read_excel

spreadsheet = read_excel(r"MicrobeID_to_GCF_mapping_AGORA2.xlsx", "Sheet1")
model_gcf_mapping = dict(zip(spreadsheet["GCF ID"].to_numpy(), spreadsheet["MicrobeID"].to_numpy()))
print(model_gcf_mapping)

with open("AGORA2 to ASV Exact Matches/AGORA2_6_14_23_16S_ASV_to_Genome.json", "r") as jsonIn:
    asv_to_genomes = load(jsonIn)
    
unique_asv_mappings = {k: list(set(v)) for k,v in asv_to_genomes.items()}
print(unique_asv_mappings)

{'GCF_000160075.2': 'Abiotrophia_defectiva_ATCC_49176', 'GCF_000376245.1': 'Acaricomes_phytoseiuli_DSM_14247', 'GCF_000018105.1': 'Acaryochloris_marina_MBIC11017', 'GCF_900103835.1': 'Acetanaerobacterium_elongatum_CGMCC_1_5012', 'GCF_900248245.1': 'Acetatifactor_muris_GP69', 'GCF_000179595.2': 'Acetivibrio_cellulolyticus_CD2', 'GCF_001461035.1': 'Acetivibrio_ethanolgignens_ACET_33324', 'GCF_001766835.1': 'Acetobacterium_wieringae_DSM_1911', 'GCF_000219125.1': 'Acetonema_longum_DSM_6540', 'GCF_000219745.1': 'Achromobacter_insuavis_AXX_A', 'GCF_000165835.1': 'Achromobacter_xylosoxidans_A8', 'GCF_001598595.1': 'Achromobacter_xylosoxidans_NBRC_15126', 'GCF_001457475.1': 'Achromobacter_xylosoxidans_NCTC10807', 'GCF_900103005.1': 'Acidaminobacter_hydrogenoformans_DSM_2784', 'GCF_000025305.1': 'Acidaminococcus_fermentans_DSM_20731', 'GCF_000230275.1': 'Acidaminococcus_intestini_RyC_MR95', 'GCF_900095825.1': 'Acidaminococcus_massiliensis_Marseille_P2828', 'GCF_000468835.1': 'Acidaminococcus_sp

In [2]:
from cobra.io import read_sbml_model
from json import dump

broken_models = []
for asv, gcfs in unique_asv_mappings.items():
    reaction_counts_per_asv = {}
    print("\n", asv)
    for gcf in gcfs:
        try:
            model = read_sbml_model("/vol/ml/AGORA2/" + f"{model_gcf_mapping[gcf]}.xml")
            for rxn in model.reactions:
                if rxn.id in reaction_counts_per_asv:  reaction_counts_per_asv[rxn.id] += 1
                else: reaction_counts_per_asv[rxn.id] = 1
            print(model.id)
        except Exception as e:
            print(gcf, model_gcf_mapping[gcf]) #, e)
            broken_models.append(model_gcf_mapping[gcf])
    if reaction_counts_per_asv == {}:  continue
    reaction_counts_per_asv = {rxnID:(count/len(gcfs)) for rxnID,count in reaction_counts_per_asv.items()}
    reaction_counts_per_asv["numGCFs"] = len(gcfs)
    with open(f"reaction_counts_per_asv/{asv}.json", "w") as jsonOut:
        dump(reaction_counts_per_asv, jsonOut, indent=3)


 ASV00001;seqs=10697401;samples=1978
M_Blautia_coccoides_YL58
M_Blautia_sp_YL58

 ASV00001;seqs=15073372;samples=2336
M_Blautia_coccoides_YL58
M_Blautia_sp_YL58

 ASV00002;seqs=7303020;samples=1403
M_Enterococcus_faecium_E1626
M_Enterococcus_faecium_E1861
M_Enterococcus_faecium_UAA1023
M_Enterococcus_faecium_1141733
M_Enterococcus_faecium_UAA723
M_Enterococcus_faecium_UAA715
M_Enterococcus_faecium_7330884_2
M_Enterococcus_faecium_H17575
M_Enterococcus_faecium_VAN_335
M_Enterococcus_faecium_UAA1280
M_Enterococcus_faecium_9730357_1
M_Enterococcus_faecium_E1185
M_Enterococcus_hirae_ATCC_9790
M_Enterococcus_mundtii_QU_25
M_Enterococcus_faecium_U0317
M_Enterococcus_faecium_9830091_5
M_Enterococcus_faecium_1231408
M_Enterococcus_faecium_UAA909
M_Enterococcus_faecium_UAA945
M_Enterococcus_faecium_UAA1484
M_Enterococcus_faecium_TX1330
M_Enterococcus_faecium_E1133
M_Enterococcus_faecium_9731352_4
M_Enterococcus_faecium_E1574
M_Enterococcus_faecium_E0680
M_Enterococcus_faecium_UAA407
GCF_000395

In [4]:
from cobra.io import write_sbml_model
from cobra import Model, Reaction
from glob import glob
from json import load
import re

threshold = .5

for asv, gcfs in unique_asv_mappings.items():
    try:
        with open(f"reaction_counts_per_asv/{asv}.json", "r") as jsonIn:  asv_mappings = load(jsonIn)
    except FileNotFoundError as e:
        print(e) ; continue
    # asv = re.search(r"(?<=asv\/)(.+)(?=\.json)", asv).group()
    asv_name = asv.split(";")[0]
    reactions = [rxnID for rxnID, proportion in asv_mappings.items() if proportion > threshold and rxnID != "numGCFs"]
    megaModel = Model(asv_name, f"MegaModel for {asv_name} from {asv_mappings['numGCFs']} GCFs")
    captured_reactions, captured_rxnIDs = [], set()
    print("\n", asv)
    for gcf in gcfs:
        try:
            model = read_sbml_model("/vol/ml/AGORA2/" + f"{model_gcf_mapping[gcf]}.xml")
            print(model.id)
        except Exception as e:
            print(gcf, model_gcf_mapping[gcf], e)
            broken_models.append(model_gcf_mapping[gcf])
            continue
        captured_reactions.extend([rxn for rxn in model.reactions if rxn.id in reactions and rxn.id not in captured_rxnIDs])
        captured_rxnIDs.update([rxn.id for rxn in model.reactions])
    megaModel.add_reactions(captured_reactions)
    for rxn in megaModel.reactions:
        print(rxn.reaction)
        break
    print(len(megaModel.reactions), len(reactions))
    write_sbml_model(megaModel, f"reaction_counts_per_asv/{asv}.xml")


 ASV00001;seqs=10697401;samples=1978
M_Blautia_coccoides_YL58
M_Blautia_sp_YL58
23dhmp[c] + nadp[c] <=> 3h3mop[c] + h[c] + nadph[c]
1119 1119

 ASV00001;seqs=15073372;samples=2336
M_Blautia_coccoides_YL58
M_Blautia_sp_YL58
23dhmp[c] + nadp[c] <=> 3h3mop[c] + h[c] + nadph[c]
1119 1119

 ASV00002;seqs=7303020;samples=1403
M_Enterococcus_faecium_E1626
M_Enterococcus_faecium_E1861
M_Enterococcus_faecium_UAA1023
M_Enterococcus_faecium_1141733
M_Enterococcus_faecium_UAA723
M_Enterococcus_faecium_UAA715
M_Enterococcus_faecium_7330884_2
M_Enterococcus_faecium_H17575
M_Enterococcus_faecium_VAN_335
M_Enterococcus_faecium_UAA1280
M_Enterococcus_faecium_9730357_1
M_Enterococcus_faecium_E1185
M_Enterococcus_hirae_ATCC_9790
M_Enterococcus_mundtii_QU_25
M_Enterococcus_faecium_U0317
M_Enterococcus_faecium_9830091_5
M_Enterococcus_faecium_1231408
M_Enterococcus_faecium_UAA909
M_Enterococcus_faecium_UAA945
M_Enterococcus_faecium_UAA1484
M_Enterococcus_faecium_TX1330
M_Enterococcus_faecium_E1133
M_Enter