# Add cross-references to other databases

In [1]:
from cobramod.core.crossreferences import add_crossreferences

from cobra.io import write_sbml_model, read_sbml_model

import cobra
import pandas as pd

### Add cross-references with CobraMod

In [2]:
model = read_sbml_model("../models/RcH10_v3.xml")

add_crossreferences(object=model,directory="/Users/idunmariaburgos/Ruminiclostridium_cellulolyticum_model/Ruminiclostridium-cellullolyticum-model-final/notebooks/")

write_sbml_model(model,"../models/RcH10_v4.xml")

100%|██████████| 2590/2590 [02:11<00:00, 19.71it/s] 


### Annotation of genes

In [3]:
model = read_sbml_model("../models/RcH10_v4.xml")

uniprot = pd.read_csv("../input/uniprot-proteome_UP000001349.tab",sep="\t")
uniprot = uniprot[~uniprot["Cross-reference (RefSeq)"].isna()]
uniprot

for gene in model.genes:
    id_ = gene.id
    annotation = gene.annotation

    annotation_id = uniprot[uniprot["Cross-reference (RefSeq)"].str.contains(id_[:-2])]
    if annotation_id.size==0:
        continue
    
    refseq_id = annotation_id["Cross-reference (RefSeq)"].values[0][:-3] if len(annotation_id["Cross-reference (RefSeq)"].values)>0 else False
    uniprot_id = annotation_id["Entry"].values[0] if len(annotation_id["Entry"].values)>0 else False
    kegg_id = annotation_id["Gene names"].values[0].split(" ")[-1] if len(annotation_id["Gene names"].values)>0 else False

    if refseq_id: annotation["refseq"]=refseq_id 
    if uniprot_id: annotation["uniprot"]=uniprot_id
    if kegg_id: annotation["kegg.genes"]="cce:"+kegg_id

    gene.annotation=annotation

write_sbml_model(model,"../models/RcH10_v5.xml")

https://identifiers.org/inchikey/ does not conform to 'http(s)://identifiers.org/collection/id' or'http(s)://identifiers.org/COLLECTION:id
https://identifiers.org/inchi/ does not conform to 'http(s)://identifiers.org/collection/id' or'http(s)://identifiers.org/COLLECTION:id
https://identifiers.org/inchikey/ does not conform to 'http(s)://identifiers.org/collection/id' or'http(s)://identifiers.org/COLLECTION:id
https://identifiers.org/inchi/ does not conform to 'http(s)://identifiers.org/collection/id' or'http(s)://identifiers.org/COLLECTION:id
https://identifiers.org/inchikey/ does not conform to 'http(s)://identifiers.org/collection/id' or'http(s)://identifiers.org/COLLECTION:id
https://identifiers.org/inchi/ does not conform to 'http(s)://identifiers.org/collection/id' or'http(s)://identifiers.org/COLLECTION:id
https://identifiers.org/inchikey/ does not conform to 'http(s)://identifiers.org/collection/id' or'http(s)://identifiers.org/COLLECTION:id
https://identifiers.org/inchi/ does 

### Fixing URIs in metadata

In [4]:
! source activate rcell_rec;python "../scripts/fix_annotation.py"

loading models...
fixing metabolites...
checking URIs for metabolites...
 Having problems with following URIs:
	 Timeout for https://identifiers.org/inchi:InChI=1S/C5H14N2/c6-4-2-1-3-5-7/h1-7H2/p+2. Retrying...
	 Removed problematic URIs for M_15dap_c
	 Timeout for https://identifiers.org/inchi:InChI=1S/C5H14N2/c6-4-2-1-3-5-7/h1-7H2/p+2. Retrying...
	 Removed problematic URIs for M_15dap_e
	 Timeout for https://identifiers.org/inchi:InChI=1S/C5H14N2/c6-4-2-1-3-5-7/h1-7H2/p+2. Retrying...
	 Removed problematic URIs for M_15dap_p
	 Removed problematic URIs for M_1agpe120_p
	 Removed problematic URIs for M_1agpe160_p
	 Removed problematic URIs for M_1agpe180_p
	 Timeout for https://identifiers.org/inchi:InChI=1S/C22H45O9P/c1-2-3-4-5-6-7-8-9-10-11-12-13-14-15-22(26)29-17-21(25)19-31-32(27,28)30-18-20(24)16-23/h20-21,23-25H,2-19H2,1H3,(H,27,28)/p-1/t20-,21+/m0/s1. Retrying...
	 Failed to connect to https://identifiers.org/inchi:InChI=1S/C22H45O9P/c1-2-3-4-5-6-7-8-9-10-11-12-13-14-15-22(26)2

In [5]:
model_cobra = cobra.io.read_sbml_model("../models/RcH10_final.xml")

cobra.io.write_sbml_model(model_cobra,"../models/iIB727.xml")

### Add genes for the iFS431 model ('other_models/h10-C_cellulolyticium.xml' -> 'iFS431_genes.xml')

In [6]:
! source activate rcell_rec;python "../scripts/iFS431_add_genes.py"

Load model '../models/other_models/h10-C_cellulolyticium.xml'
Load model data
Add GPRs to model...
Save model as '../models/other_models/iFS431_genes.xml'


In [8]:
model_cobra = cobra.io.read_sbml_model("../models/other_models/iFS431_genes.xml")

Model does not contain SBML fbc package information.
'' is not a valid SBML 'SId'.
Encoding LOWER_BOUND and UPPER_BOUND in KineticLaw is discouraged, use fbc:fluxBounds instead: <Reaction BIOMASS>
Encoding LOWER_BOUND and UPPER_BOUND in KineticLaw is discouraged, use fbc:fluxBounds instead: <Reaction a_39>
Encoding LOWER_BOUND and UPPER_BOUND in KineticLaw is discouraged, use fbc:fluxBounds instead: <Reaction a_40>
Encoding LOWER_BOUND and UPPER_BOUND in KineticLaw is discouraged, use fbc:fluxBounds instead: <Reaction a_41>
Encoding LOWER_BOUND and UPPER_BOUND in KineticLaw is discouraged, use fbc:fluxBounds instead: <Reaction a_42>
Encoding LOWER_BOUND and UPPER_BOUND in KineticLaw is discouraged, use fbc:fluxBounds instead: <Reaction a_43>
Encoding LOWER_BOUND and UPPER_BOUND in KineticLaw is discouraged, use fbc:fluxBounds instead: <Reaction a_44>
Encoding LOWER_BOUND and UPPER_BOUND in KineticLaw is discouraged, use fbc:fluxBounds instead: <Reaction a_6>
Encoding LOWER_BOUND and UP

In [11]:
cobra.io.write_sbml_model(model_cobra,"../models/other_models/iFS431_genes2.xml")