## Clean up model and add SBO and ECO Terms

### Import and methods

In [1]:
import cobra
import pandas as pd
import libsbml

reader = libsbml.SBMLReader()
writer = libsbml.SBMLWriter()

model = cobra.io.read_sbml_model("2.2/finegoldia_magna_ATCC_29328_2.2.fo.ch.mp.mcb.lt.re.ar.gpr.pw.gf1.gfmm.gf2.gfco3.circ.mcb2.xml")

In [2]:
model.reactions.ATPM.annotation.update({"sbo": "SBO:0000176"})

### SBO Terms

#### Reactions

In [3]:
for r in model.reactions:
    if "sbo" not in r.annotation:
        if r.id in model.exchanges or r.id.startswith("EX_"):
            r.annotation.update({"sbo": "SBO:0000627"})   # add exchange reactions
        elif not r.reactants:
            r.annotation.update({"sbo": "SBO:0000632"})   # add sink reactions
        elif not r.products:
            r.annotation.update({"sbo": "SBO:0000628"})   # add demand reactions
        else:
            bigg_ids = [m.id[:-2] for m in r.metabolites]
            if len(bigg_ids) > len(set(bigg_ids)):
                r.annotation.update({"sbo": "SBO:0000185"})   # add transport reactions
            else:
                r.annotation.update({"sbo": "SBO:0000176"})   # add metabolic reactions

#### Metabolites

In [4]:
for m in model.metabolites:
    if "sbo" not in m.annotation:
        m.annotation.update({"sbo": "SBO:0000247"})   # add metabolites

#### Genes

In [5]:
for g in model.genes:
    if "sbo" not in g.annotation:
        g.annotation.update({"sbo": "SBO:0000243"})   # add genes

In [6]:
cobra.io.write_sbml_model(model, "2.2/finegoldia_magna_ATCC_29328_2.2.fo.ch.mp.mcb.lt.re.ar.gpr.pw.gf1.gfmm.gf2.gfco3.circ.mcb2.sbo.xml")

In [7]:
import memote
# Make memote report
result = memote.test_model(model, results=True)  #, skip=["test_find_metabolites_not_produced_with_open_bounds"])
report = memote.snapshot_report(result[1], config=None, html=True)
with open("2.2/memote-reports/report.fo.ch.mp.mcb.lt.re.ar.gpr.pw.gf1.gfmm.gf2.gfco3.circ.mcb2.sbo.html", "w") as handle:
    handle.write(report)

platform linux -- Python 3.8.8, pytest-4.6.11, py-1.10.0, pluggy-0.13.1
rootdir: /home/salatan/anaconda3/lib/python3.8/site-packages/memote/suite/tests
plugins: anyio-3.3.0
collected 145 items

../../../../../../../home/salatan/anaconda3/lib/python3.8/site-packages/memote/suite/tests/test_annotation.py ...FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF.F.F......FF....FFFF.F.FF.FFF..                             [ 44%]
../../../../../../../home/salatan/anaconda3/lib/python3.8/site-packages/memote/suite/tests/test_basic.py ......F........F.FFFFFF                                                                            [ 60%]
../../../../../../../home/salatan/anaconda3/lib/python3.8/site-packages/memote/suite/tests/test_biomass.py .F.....F.F                                                                                       [ 67%]
../../../../../../../home/salatan/anaconda3/lib/python3.8/site-packages/memote/suite/tests/test_consistency.py .ssssssssssssssssFFFFFFFFFF                                   

### ECO-Terms

In [8]:
def add_eco_annotation(model, lnk, s_id, element):  
    c = libsbml.CVTerm()
    c.setQualifierType(libsbml.BIOLOGICAL_QUALIFIER)
    c.setBiologicalQualifierType(libsbml.BQB_IS_DESCRIBED_BY)
    c.addResource(lnk)

    if element == "species":
        list_cv = [model.getSpecies(s_id).getCVTerm(i) for i in range(model.getSpecies(s_id).getNumCVTerms())]
        if not c in list_cv:     # eliminate duplicates
            model.getSpecies(s_id).addCVTerm(c)
            
    elif element == "reaction":
        list_cv = [model.getReaction(s_id).getCVTerm(i) for i in range(model.getReaction(s_id).getNumCVTerms())]
        if not c in list_cv:
            model.getReaction(s_id).addCVTerm(c)
            
    elif element == "gene":
        list_cv = [model.getPlugin("fbc").getGeneProduct(s_id).getCVTerm(i) for i in range(model.getPlugin("fbc").getGeneProduct(s_id).getNumCVTerms())]
        if not c in list_cv:
            model.getPlugin("fbc").getGeneProduct(s_id).addCVTerm(c)
    
    return model

In [9]:
doc = reader.readSBML("2.2/finegoldia_magna_ATCC_29328_2.2.fo.ch.mp.mcb.lt.re.ar.gpr.pw.gf1.gfmm.gf2.gfco3.circ.mcb2.sbo.xml")
model = doc.getModel()

In [10]:
manually_curated = pd.read_csv("2.2/tables/manually_curated_all.tsv", sep="\t",header=None)
manually_curated = list(manually_curated[0])

In [11]:
blast_evidence = ["THRD_L_1","BZDIOLDH"]

In [12]:
model_draft = cobra.io.read_sbml_model("2.2/finegoldia_magna_ATCC_29328_2.2.xml")
print(len(model_draft.reactions))

1166


In [13]:
model_bgf = cobra.io.read_sbml_model("2.2/finegoldia_magna_ATCC_29328_2.2.fo.ch.mp.mcb.lt.re.ar.gpr.pw.gf1.gfmm.gf2.xml")
model_gf = cobra.io.read_sbml_model("2.2/finegoldia_magna_ATCC_29328_2.2.fo.ch.mp.mcb.lt.re.ar.gpr.pw.gf1.gfmm.gf2.gfco3.xml")

reac_ids_bgf = [r.id for r in model_bgf.reactions]
reac_ids_gf = [r.id for r in model_gf.reactions]
added_automated_gf = list(set(reac_ids_gf) - set(reac_ids_bgf))
print(added_automated_gf)

['NNDPR', 'DAPDC', 'KAS13', 'KARA2', 'DHAD2', 'KAS8', 'KAS7', 'ASPO2', 'ACHBS', 'BZ12DOX', 'PPND']


In [14]:
from tqdm import tqdm
for i in tqdm(range(model.getNumSpecies())):
    s_id = model.getSpecies(i).getId()
    if s_id in manually_curated:
        lnk = "https://identifiers.org/ECO:0007759"
    elif s_id in blast_evidence:
        lnk = "https://identifiers.org/ECO:0000031"
    elif s_id in model_draft.metabolites:
        lnk = "https://identifiers.org/ECO:0007482" # biological system reconstruction evidence based on homology evidence used in automatic assertion
    elif s_id in added_automated_gf:
        lnk = "https://identifiers.org/ECO:0000363" # computational inference used in automatic assertion
    else:
        lnk = "https://identifiers.org/ECO:0007636"
    model = add_eco_annotation(model, lnk, s_id, "species")
    
for i in tqdm(range(model.getNumReactions())):
    s_id = model.getReaction(i).getId()
    if s_id in manually_curated:
        lnk = "https://identifiers.org/ECO:0007759"
    elif s_id in blast_evidence:
        lnk = "https://identifiers.org/ECO:0000031"
    elif s_id in model_draft.reactions:
        lnk = "https://identifiers.org/ECO:0007482"
    elif s_id in added_automated_gf:
        lnk = "https://identifiers.org/ECO:0000363"
    else:
        lnk = "https://identifiers.org/ECO:0007636"
    model = add_eco_annotation(model, lnk, s_id, "reaction")
        
for i in tqdm(range(model.getPlugin("fbc").getNumGeneProducts())):
    s_id = model.getPlugin("fbc").getGeneProduct(i).getId()
    if s_id in manually_curated:
        lnk = "https://identifiers.org/ECO:0007759"
    elif s_id in blast_evidence:
        lnk = "https://identifiers.org/ECO:0000031"
    elif s_id in model_draft.genes:
        lnk = "https://identifiers.org/ECO:0007482"
    elif s_id in added_automated_gf:
        lnk = "https://identifiers.org/ECO:0000363"
    else:
        lnk = "https://identifiers.org/ECO:0007636"
    model = add_eco_annotation(model, lnk, s_id, "gene")

100%|██████████| 968/968 [00:00<00:00, 17702.42it/s]
100%|██████████| 1317/1317 [00:00<00:00, 13802.40it/s]
100%|██████████| 601/601 [00:00<00:00, 14282.59it/s]


In [15]:
# Saving new model
doc.setModel(model)
writer.writeSBML(doc, "2.2/finegoldia_magna_ATCC_29328_2.2.fo.ch.mp.mcb.lt.re.ar.gpr.pw.gf1.gfmm.gf2.gfco3.circ.mcb2.sbo.eco.xml")

True

In [16]:
model = cobra.io.read_sbml_model("2.2/finegoldia_magna_ATCC_29328_2.2.fo.ch.mp.mcb.lt.re.ar.gpr.pw.gf1.gfmm.gf2.gfco3.circ.mcb2.sbo.eco.xml")
# Make memote report
result = memote.test_model(model, results=True)  #, skip=["test_find_metabolites_not_produced_with_open_bounds"])
report = memote.snapshot_report(result[1], config=None, html=True)
with open("2.2/memote-reports/report.fo.ch.mp.mcb.lt.re.ar.gpr.pw.gf1.gfmm.gf2.gfco3.circ.mcb2.sbo.eco.html", "w") as handle:
    handle.write(report)

platform linux -- Python 3.8.8, pytest-4.6.11, py-1.10.0, pluggy-0.13.1
rootdir: /home/salatan/anaconda3/lib/python3.8/site-packages/memote/suite/tests
plugins: anyio-3.3.0
collected 145 items

../../../../../../../home/salatan/anaconda3/lib/python3.8/site-packages/memote/suite/tests/test_annotation.py ...FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF.F.F......FF....FFFF.F.FF.FFF..                             [ 44%]
../../../../../../../home/salatan/anaconda3/lib/python3.8/site-packages/memote/suite/tests/test_basic.py ......F........F.FFFFFF                                                                            [ 60%]
../../../../../../../home/salatan/anaconda3/lib/python3.8/site-packages/memote/suite/tests/test_biomass.py .F.....F.F                                                                                       [ 67%]
../../../../../../../home/salatan/anaconda3/lib/python3.8/site-packages/memote/suite/tests/test_consistency.py .ssssssssssssssssFFFFFFFFFF                                   

### Clean Up annotations via own python code:

In [17]:
# Model polishing via ModelPolisher
# cd ModelPolisher
# systemctl start docker
# sudo docker-compose run -v /mnt/Data/Uni/7.Semester/Nextcloud_SysBio_Linux/Josua_Carl_BT/finegoldia_magna_py/2.2:/models/ polisher java -jar /ModelPolisher-2.1-beta.jar --input=/models/finegoldia_magna_ATCC_29328_2.2.fo.ch.mp.mcb.lt.re.ar.gpr.pw.gf1.gfmm.gf2.gfco3.circ.mcb2.sbo.eco.xml --output=/models/mp-output/finegoldia_magna_ATCC_29328_2.2.mp2.xml  --annotate-with-bigg=true --add-adb-annotations=true --output-combine=true

In [18]:
!python annotate_links_from_mp.py "2.2/mp-output2/model.xml" "2.2/finegoldia_magna_ATCC_29328_2.2.fo.ch.mp.mcb.lt.re.ar.gpr.pw.gf1.gfmm.gf2.gfco3.circ.mcb2.sbo.eco.xml" "2.2/finegoldia_magna_ATCC_29328_2.2.fo.ch.mp.mcb.lt.re.ar.gpr.pw.gf1.gfmm.gf2.gfco3.circ.mcb2.sbo.eco.mp2.xml" "finegoldia_magna" "2.2"

100%|████████████████████████████████████| 14684/14684 [00:25<00:00, 569.91it/s]


In [19]:
!python annotate_reactions.py "2.2/finegoldia_magna_ATCC_29328_2.2.fo.ch.mp.mcb.lt.re.ar.gpr.pw.gf1.gfmm.gf2.gfco3.circ.mcb2.sbo.eco.mp2.xml" "2.2/finegoldia_magna_ATCC_29328_2.2.fo.ch.mp.mcb.lt.re.ar.gpr.pw.gf1.gfmm.gf2.gfco3.circ.mcb2.sbo.eco.mp2.re.xml" "2.2/tables/reactions_not_in_bigg_2.tsv" "2.2/memote-reports/annotated_genes_2.html"

100%|██████████████████████████████████████| 1317/1317 [00:03<00:00, 425.95it/s]
100%|███████████████████████████████████████| 1317/1317 [04:44<00:00,  4.62it/s]
platform linux -- Python 3.8.8, pytest-4.6.11, py-1.10.0, pluggy-0.13.1
rootdir: /home/salatan/anaconda3/lib/python3.8/site-packages/memote/suite/tests
plugins: anyio-3.3.0
collected 145 items                                                            [0m[1m

../../../../../../../home/salatan/anaconda3/lib/python3.8/site-packages/memote/suite/tests/test_annotation.py [32m.[0m[36m [  0%]
[0m[32m.[0m[32m.[0m[31mF[0m[31mF[0m[31mF[0m[31mF[0m[31mF[0m[31mF[0m[31mF[0m[31mF[0m[31mF[0m[31mF[0m[31mF[0m[31mF[0m[31mF[0m[31mF[0m[31mF[0m[31mF[0m[31mF[0m[31mF[0m[31mF[0m[31mF[0m[31mF[0m[31mF[0m[31mF[0m[31mF[0m[31mF[0m[31mF[0m[31mF[0m[31mF[0m[31mF[0m[31mF[0m[31mF[0m[32m.[0m[31mF[0m[32m.[0m[31mF[0m[32m.[0m[32m.[0m[32m.[0m[32m.[0m[32m.[0m[32m.[0m[31mF[0