In [36]:
import sys

if ".." not in sys.path:
    sys.path.append("..")

In [38]:
import pandas as pd
import numpy as np
from cobra.io import read_sbml_model

GENES1 = "../model_building/genes/Ruegeria-pomeroyi-DSS-3-representative-genome-gene-to-reaction-mapping.tsv"
GENES2 = "../model_building/genes/Ruegeria-pomeroyi-DSS-3-gene-to-reaction-mapping.tsv"
MODEL_FILE = "../model/Rpom_05.xml"

# Load gene to reaction mapping
gene_to_reaction_1 = pd.read_csv(GENES1, sep="\t")
gene_to_reaction_2 = pd.read_csv(GENES2, sep="\t")

# Load model
model = read_sbml_model(MODEL_FILE)

In [40]:
# Some genes catalyze multiple reactions. Split the Reaction column into lists, broken by " // "
gene_to_reaction_1["Reaction"] = gene_to_reaction_1["Reaction"].str.split(" // ")
gene_to_reaction_2["Reaction"] = gene_to_reaction_2["Reaction"].str.split(" // ")


gene_to_reaction_2.head()

Unnamed: 0,Gene Name,Accession-1,Left-End-Position,Right-End-Position,Product,Reaction,Left,EC-Number,Right,ARALIP,KEGG reaction,METANETX,RHEA,Common-Name
0,G1RHL-3273,,4572154,4572399,G1RHL-3273-MONOMER,,,,,,,,,
1,G1RHL-378,,557329,558357,G1RHL-378-MONOMER,,,,,,,,,
2,G1RHL-3020,,4215298,4216131,G1RHL-3020-MONOMER,,,,,,,,,
3,SPO1136,SPO1136,1191288,1192661,SPO1136-MONOMER,[R101-RXN],CPD-470 // 2-KETOGLUTARATE,EC-2.6.1.76,L-ASPARTATE-SEMIALDEHYDE // GLT,,,,,
4,G1RHL-1195,,1693553,1694386,G1RHL-1195-MONOMER,,,,,,,,,


In [46]:
from collections import Counter

for i, mapping in enumerate([gene_to_reaction_1, gene_to_reaction_2]):
    print(f"Gene to reaction mapping {i + 1} =================================================")
        
    # For each reaction in the model, check if its stem is in the gene to reaction mapping
    has_match = []
    has_no_match = []
    for reaction in model.reactions:
        reaction_stem = reaction.annotation.get("stem", reaction.id)
        
        in_mapping = any(reaction_stem in reactions
                for reactions in mapping["Reaction"]
                if isinstance(reactions, list))
        
        if in_mapping:
            has_match.append(reaction)
        else:
            has_no_match.append(reaction)

    # Count how many reactions are in the mapping
    print(len(has_match), "reactions are in the gene to reaction mapping")
    print(len(has_no_match), "reactions are not in the gene to reaction mapping")

    # Of the model reactions that are not in the model, count how many have genes in the model
    # and how many do not
    has_genes = []
    for reaction in has_no_match:
        has_genes.append(len(reaction.genes) > 0)

    print(f"\t- of missing reactions, {Counter(has_genes)[True]} have genes in the model and {Counter(has_genes)[False]} do not")
        

774 reactions are in the gene to reaction mapping
1201 reactions are not in the gene to reaction mapping
	 - of missing reactions, 806 have genes in the model and 395 do not
887 reactions are in the gene to reaction mapping
1088 reactions are not in the gene to reaction mapping
	 - of missing reactions, 696 have genes in the model and 392 do not


In [55]:
# Create a Venn diagram of the genes that are in one mapping or the other

# For each reaction in the model, check if its stem is in the gene to reaction mapping
reactions_in_mapping_1 = set()
reactions_in_mapping_2 = set()
for i, mapping in enumerate([gene_to_reaction_1, gene_to_reaction_2]):
    has_match = []
    has_no_match = []
    for reaction in model.reactions:
        reaction_stem = reaction.annotation.get("stem", reaction.id)
        
        in_mapping = any(reaction_stem in reactions
                for reactions in mapping["Reaction"]
                if isinstance(reactions, list))
        
        if in_mapping:
            [reactions_in_mapping_1, reactions_in_mapping_2][i].add(reaction)

unique_to_1 = reactions_in_mapping_1 - reactions_in_mapping_2
unique_to_2 = reactions_in_mapping_2 - reactions_in_mapping_1
shared = reactions_in_mapping_1 & reactions_in_mapping_2

print("1 | 2")
print(f"({len(unique_to_1)}({len(shared)}){len(unique_to_2)})")


1 | 2
(131(643)244)


In [71]:
# Create a combined mapping by taking the union of the two mappings
combined_mapping = gene_to_reaction_1[["Gene Name", "Reaction"]].merge(gene_to_reaction_2[["Gene Name", "Reaction"]], how="outer", on="Gene Name", suffixes=("_1", "_2"))

# Combine the reactions
reactions_combined = []
for reactions_1, reactions_2 in zip(combined_mapping["Reaction_1"], combined_mapping["Reaction_2"]):
    if isinstance(reactions_1, list) and isinstance(reactions_2, list):
        reactions_combined.append(list(set(reactions_1 + reactions_2)))
    elif isinstance(reactions_1, list):
        reactions_combined.append(reactions_1)
    elif isinstance(reactions_2, list):
        reactions_combined.append(reactions_2)
    else:
        reactions_combined.append([])

combined_mapping["Reactions combined"] = reactions_combined
combined_mapping

Unnamed: 0,Gene Name,Reaction_1,Reaction_2,Reactions combined
0,G1RHL-1,,,[]
1,G1RHL-10,,,[]
2,G1RHL-100,,,[]
3,G1RHL-1000,,,[]
4,G1RHL-1001,,,[]
...,...,...,...,...
8805,SPO_SP23SB,,,[]
8806,SPO_SP23SC,,,[]
8807,SPO_SP5SD,,,[]
8808,SPO_SP5SE,,,[]


In [72]:
# For each reaction in the model, check if its stem is in the gene to reaction mapping
has_match = []
has_no_match = []
for reaction in model.reactions:
    reaction_stem = reaction.annotation.get("stem", reaction.id)

    in_mapping = any(reaction_stem in reactions
            for reactions in combined_mapping["Reactions combined"]
            if isinstance(reactions, list))

    if in_mapping:
        has_match.append(reaction)
    else:
        has_no_match.append(reaction)

# Count how many reactions are in the mapping
print(len(has_match), "reactions are in the gene to reaction mapping")
print(len(has_no_match), "reactions are not in the gene to reaction mapping")


1018 reactions are in the gene to reaction mapping
957 reactions are not in the gene to reaction mapping


In [81]:
# Of the reactions that are not in the mapping, count how many have non-zero flux on glucose

with model:
    # Set maintenance and glucose uptake
    model.reactions.get_by_id("EX_glc").lower_bound = -10
    model.reactions.get_by_id("ATPM").bounds = 25, 25

    sol = model.optimize()
    print(sol.objective_value)

    missing_reaction_fluxes = {}
    for reaction in has_no_match:
        if len(reaction.genes) > 0:
            missing_reaction_fluxes[reaction.id] = sol.fluxes[reaction.id]
    
print(f"Of the {len(has_no_match)} reactions that are not in the gene to reaction mapping\n" \
      "and have some associated genes, "\
      f"{len([flux for flux in missing_reaction_fluxes.values() if abs(flux) > 0])} have non-zero flux on glucose")
print()

# Print reactions with non-zero flux
for reaction, flux in missing_reaction_fluxes.items():
    if abs(flux) > 0:
        print(reaction, flux)

0.7189837855935002
Of the 957 reactions that are not in the gene to reaction mapping
and have some associated genes, 25 have non-zero flux on glucose

1.5.5.1-RXN-ETF-Reduced/UBIQUINONE-10//ETF-Oxidized/CPD-9958/PROTON.56. 0.9240385495426946
3-HYDROXBUTYRYL-COA-DEHYDRATASE-RXN -0.6694789062692265
ADCLY-RXN 0.0004810001263046868
ERYTH4PDEHYDROG-RXN 0.0001603333754349021
HOMOCYSMET-RXN 0.09197986189117927
ORNDECARBOX-RXN 0.30742947831654077
RXN-11667 1.4853621635005583
RXN-14014-DELTA1-PIPERIDEINE-2-6-DICARBOXYLATE/NAD/WATER//CPD-14443/NADH/PROTON.70. 0.20554771567874808
RXN-16025 0.1081552923113625
RXN-16032-PALMITYL-COA/CPD0-2113//CPD-17273/CO-A.39. -0.1081552923113625
RXN-17018 0.04588123103871193
RXN-7968-SHIKIMATE/NAD//3-DEHYDRO-SHIKIMATE/NADH/PROTON.47. 7.269141271967499
RXN-8960 0.8158832572313321
RXN0-5107 -0.01881292966346336
RXN0-6705 0.04588123103871193
SUCCINYLDIAMINOPIMTRANS-RXN 0.20554771567874813
SUCCSEMIALDDEHYDROG-RXN 0.3074294783165407
TRANS-RXN-141A 1.7974595125685766e