In [1]:
from SPARQLWrapper import SPARQLWrapper, JSON, CSV
import IPython
import subprocess
import time

In [2]:
endpointURL_reactome_v87 = "http://localhost:3030/reactome_v87/query"
endpointURL_reactome_pc = "http://localhost:3030/reactome_pc/query"
endpointURL_panther_pc = "http://localhost:3030/panther_pc/query"
endpointURL_pathbank_pc = "http://localhost:3030/pathbank_pc/query"
endpointURL_humancyc_pc = "http://localhost:3030/humancyc_pc/query"
endpointURL_kegg_pc = "http://localhost:3030/kegg_pc/query"
endpointURL_pid_pc = "http://localhost:3030/pid_pc/query"
endpointURL_inoh_pc = "http://localhost:3030/inoh_pc/query"
endpointURL_netpath_pc = "http://localhost:3030/netpath_pc/query"
rdfFormat = "turtle"

In [3]:
reactomeVersion = 87
prefixes = """
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs:<http://www.w3.org/2000/01/rdf-schema#>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
PREFIX dc: <http://purl.org/dc/elements/1.1/>
PREFIX dcterms: <http://purl.org/dc/terms/>

PREFIX chebi: <http://purl.obolibrary.org/obo/chebi/>
PREFIX chebidb: <http://purl.obolibrary.org/obo/CHEBI_>
PREFIX chebirel: <http://purl.obolibrary.org/obo/CHEBI#>
PREFIX oboInOwl: <http://www.geneontology.org/formats/oboInOwl#>

PREFIX bp3: <http://www.biopax.org/release/biopax-level3.owl#>

# Homo_sapiens-20170221.owl
#PREFIX reactome: <http://www.reactome.org/biopax/59/48887#> 
#
# Homo_sapiens-20210608.owl
#PREFIX reactome: <http://www.reactome.org/biopax/77/48887#>
#
# Homo_sapiens-20220614.owl
#PREFIX reactome: <http://www.reactome.org/biopax/81/48887#>
#
# Homo_sapiens-20221130.owl
#PREFIX reactome: <http://www.reactome.org/biopax/83/48887#>

PREFIX reactome: <http://www.reactome.org/biopax/{}/48887#>
""".format(reactomeVersion)

biopaxURI = "http://www.biopax.org/release/biopax-level3.owl#"

In [4]:
def displaySparqlResults(results):
    '''
    Displays as HTML the result of a SPARQLWrapper query in a Jupyter notebook.
    
        Parameters:
            results (dictionnary): the result of a call to SPARQLWrapper.query().convert()
    '''
    variableNames = results['head']['vars']
    tableCode = '<table><tr><th>{}</th></tr><tr>{}</tr></table>'.format('</th><th>'.join(variableNames), '</tr><tr>'.join('<td>{}</td>'.format('</td><td>'.join([row[vName]['value'] if vName in row.keys() else "&nbsp;" for vName in variableNames]))for row in results["results"]["bindings"]))
    IPython.display.display(IPython.display.HTML(tableCode))

## Mappings to UniProt

In [5]:
# First SPARQL query using the string "UniProt" to map to UniProt resources
query1 = """
SELECT ?metric ?count
# Number of instances of Protein
WHERE {
OPTIONAL {
    {
        SELECT ?metric (COUNT(DISTINCT ?protein) AS ?count)
        WHERE {
            BIND("Protein" AS ?metric)
            ?protein rdf:type/(rdfs:subClassOf*) bp3:Protein .
        }
        GROUP BY ?metric
    }
    UNION
    {
        SELECT ?metric (COUNT(DISTINCT ?protein_ref) AS ?count)
        WHERE {
            BIND("ProteinReference" AS ?metric)
            ?protein_ref rdf:type bp3:ProteinReference .
        }
        GROUP BY ?metric
    }
    UNION
    {
        SELECT ?metric (COUNT(DISTINCT ?protein_without_ref) AS ?count)
        WHERE {
            BIND("Protein with no PR" AS ?metric)
            OPTIONAL {
            ?protein_without_ref rdf:type/(rdfs:subClassOf*) bp3:Protein .
            FILTER NOT EXISTS {
                ?protein_without_ref bp3:entityReference ?ref .
                ?ref rdf:type bp3:ProteinReference .
            }
        }
        }
        GROUP BY ?metric
    }
    UNION
    {
        SELECT ?metric (COUNT(DISTINCT ?protein_that_map_to_uniprot) AS ?count)
        WHERE {
            BIND("Uniprot mappings" AS ?metric)
            ?protein_that_map_to_uniprot rdf:type bp3:Protein .
            ?protein_that_map_to_uniprot bp3:entityReference ?protein_ref .
            ?protein_ref rdf:type bp3:ProteinReference .
            ?protein_ref bp3:xref ?protein_ref_xref .
            ?protein_ref_xref rdf:type bp3:UnificationXref .
            ?protein_ref_xref bp3:db "UniProt" .
            ?protein_ref_xref bp3:id ?up_id .
        }
        GROUP BY ?metric
    }
    UNION 
    {
        SELECT ?metric (COUNT(DISTINCT ?protein_no_map_to_uniprot) AS ?count)
        WHERE {
            BIND("Protein with no mapping" AS ?metric)
            ?protein_no_map_to_uniprot rdf:type/(rdfs:subClassOf*) bp3:Protein .
            ?protein_no_map_to_uniprot bp3:entityReference ?protein_ref .
            FILTER NOT EXISTS {
                ?protein_ref bp3:xref ?xref .
                ?xref rdf:type bp3:UnificationXref .
                ?xref bp3:db "UniProt" .
                ?xref bp3:id ?id .
            }
        }
        GROUP BY ?metric
    }
    UNION 
    {
        SELECT ?metric (COUNT(DISTINCT ?up_id) AS ?count)
        WHERE {
            BIND("Unique Uniprot" AS ?metric)
            ?unif_xref rdf:type bp3:UnificationXref .
            ?unif_xref bp3:db "UniProt" .
            ?unif_xref bp3:id ?up_id .
        }
        GROUP BY ?metric
    }
   }
}
ORDER BY ?metric

"""

# Second SPARQL query using the string "uniprot knowledgebase" to map to UniProt resources
query2 = """
SELECT ?metric ?count
# Number of instances of Protein
WHERE {
    {
        SELECT ?metric (COUNT(DISTINCT ?protein) AS ?count)
        WHERE {
            BIND("Protein" AS ?metric)
            ?protein rdf:type/(rdfs:subClassOf*) bp3:Protein .
        }
        GROUP BY ?metric
    }
    UNION
    {
        SELECT ?metric (COUNT(DISTINCT ?protein_ref) AS ?count)
        WHERE {
            BIND("ProteinReference" AS ?metric)
            ?protein_ref rdf:type bp3:ProteinReference .
        }
        GROUP BY ?metric
    }
    UNION
    {
        SELECT ?metric (COUNT(DISTINCT ?protein_without_ref) AS ?count)
        WHERE {
      BIND("Protein with no PR" AS ?metric)
      OPTIONAL {
            ?protein_without_ref rdf:type/(rdfs:subClassOf*) bp3:Protein .
            FILTER NOT EXISTS {
                ?protein_without_ref bp3:entityReference ?ref .
                ?ref rdf:type bp3:ProteinReference .
            }
        }
    }
        GROUP BY ?metric
    }
    UNION
    {
        SELECT ?metric (COUNT(DISTINCT ?protein_that_map_to_uniprot) AS ?count)
        WHERE {
            BIND("Uniprot mappings" AS ?metric)
            ?protein_that_map_to_uniprot rdf:type bp3:Protein .
            ?protein_that_map_to_uniprot bp3:entityReference ?protein_ref .
            ?protein_ref rdf:type bp3:ProteinReference .
            ?protein_ref bp3:xref ?protein_ref_xref .
            ?protein_ref_xref rdf:type bp3:UnificationXref .
            ?protein_ref_xref bp3:db "uniprot knowledgebase" .
            ?protein_ref_xref bp3:id ?up_id .
        }
        GROUP BY ?metric
    }
    UNION 
    {
        SELECT ?metric (COUNT(DISTINCT ?protein_no_map_to_uniprot) AS ?count)
        WHERE {
            BIND("Protein with no mapping" AS ?metric)
            ?protein_no_map_to_uniprot rdf:type/(rdfs:subClassOf*) bp3:Protein .
            ?protein_no_map_to_uniprot bp3:entityReference ?protein_ref .
            FILTER NOT EXISTS {
                ?protein_ref bp3:xref ?xref .
                ?xref rdf:type bp3:UnificationXref .
                ?xref bp3:db "uniprot knowledgebase" .
                ?xref bp3:id ?id .
            }
        }
        GROUP BY ?metric
    }
    UNION 
    {
        SELECT ?metric (COUNT(DISTINCT ?up_id) AS ?count)
        WHERE {
            BIND("Unique Uniprot" AS ?metric)
            ?unif_xref rdf:type bp3:UnificationXref .
            ?unif_xref bp3:db "uniprot knowledgebase" .
            ?unif_xref bp3:id ?up_id .
        }
        GROUP BY ?metric
    }
}
ORDER BY ?metric

"""

TODO: change the path to fuseki server and BioPAX files + BioPAX ontology in the command line to launch the SPARQL endpoint

#### Query on Reactome BioPAX standalone export version 87

In [6]:
# TODO: change path
command = [
    '/home/cbeust/Softwares/JenaFuseki/apache-jena-fuseki-4.9.0/fuseki-server',
    '--file', '/home/cbeust/Projects/2024/BioPAX_Review/Compare_BioPAX_Files/ReactomeBioPAX/_00_Reactome_Data_v87/Homo_sapiens.owl',
    '--file', '/home/cbeust/Projects/2024/BioPAX_Review/BioPAX_Ontology/biopax-level3.owl',
    '/reactome_v87']
process = subprocess.Popen(command)
time.sleep(60)

# execute SPARQL query
sparql = SPARQLWrapper(endpointURL_reactome_v87)
sparql.setQuery(prefixes+query1)

# display results
sparql.setReturnFormat(JSON)
results = sparql.query().convert()
displaySparqlResults(results)

# export to CSV
sparql.setReturnFormat(CSV)
results = sparql.query().convert()
with open(f"../Results/reactome_v87_mappings_uniprot.csv", "wb") as f:
    f.write(results)

# end process
process.kill()
time.sleep(60)

19:11:03 INFO  Server          :: Dataset: in-memory: load file: /home/cbeust/Projects/2024/BioPAX_Review/Compare_BioPAX_Files/ReactomeBioPAX/_00_Reactome_Data_v87/Homo_sapiens.owl
19:11:04 WARN  riot            :: [line: 67280, col: 52] {W137} Input is large. Switching off checking for illegal reuse of rdf:ID's.
19:11:24 INFO  Server          :: Dataset: in-memory: load file: /home/cbeust/Projects/2024/BioPAX_Review/BioPAX_Ontology/biopax-level3.owl
19:11:24 INFO  Server          :: Running in read-only mode for /reactome_v87
19:11:24 INFO  Server          :: Apache Jena Fuseki 4.9.0
19:11:24 INFO  Config          :: FUSEKI_HOME=/home/cbeust/Softwares/JenaFuseki/apache-jena-fuseki-4.9.0
19:11:24 INFO  Config          :: FUSEKI_BASE=/home/cbeust/Projects/2024/BioPAX_Review/BioPAXReview2024Codes/Figure4-5/Scripts/run
19:11:24 INFO  Config          :: Shiro file: file:///home/cbeust/Projects/2024/BioPAX_Review/BioPAXReview2024Codes/Figure4-5/Scripts/run/shiro.ini
19:11:24 INFO  Server   

metric,count
Protein,31649
Protein with no PR,3193
Protein with no mapping,743
ProteinReference,11808
Uniprot mappings,27713
Unique Uniprot,11421


19:12:04 INFO  Fuseki          :: [2] GET http://localhost:3030/reactome_v87/query?query=%0APREFIX+rdf%3A+%3Chttp%3A//www.w3.org/1999/02/22-rdf-syntax-ns%23%3E%0APREFIX+rdfs%3A%3Chttp%3A//www.w3.org/2000/01/rdf-schema%23%3E%0APREFIX+owl%3A+%3Chttp%3A//www.w3.org/2002/07/owl%23%3E%0APREFIX+xsd%3A+%3Chttp%3A//www.w3.org/2001/XMLSchema%23%3E%0APREFIX+dc%3A+%3Chttp%3A//purl.org/dc/elements/1.1/%3E%0APREFIX+dcterms%3A+%3Chttp%3A//purl.org/dc/terms/%3E%0A%0APREFIX+chebi%3A+%3Chttp%3A//purl.obolibrary.org/obo/chebi/%3E%0APREFIX+chebidb%3A+%3Chttp%3A//purl.obolibrary.org/obo/CHEBI_%3E%0APREFIX+chebirel%3A+%3Chttp%3A//purl.obolibrary.org/obo/CHEBI%23%3E%0APREFIX+oboInOwl%3A+%3Chttp%3A//www.geneontology.org/formats/oboInOwl%23%3E%0A%0APREFIX+bp3%3A+%3Chttp%3A//www.biopax.org/release/biopax-level3.owl%23%3E%0A%0A%23+Homo_sapiens-20170221.owl%0A%23PREFIX+reactome%3A+%3Chttp%3A//www.reactome.org/biopax/59/48887%23%3E+%0A%23%0A%23+Homo_sapiens-20210608.owl%0A%23PREFIX+reactome%3A+%3Chttp%3A//www.rea

#### Query on BioPAX export of Reactome from PathwayCommons

In [7]:
# TODO: change path
command = [
    '/home/cbeust/Softwares/JenaFuseki/apache-jena-fuseki-4.9.0/fuseki-server',
    '--file', '/home/cbeust/Projects/2024/BioPAX_Review/Compare_BioPAX_Files/PathwayCommonsFiles/PathwayCommons12.reactome.BIOPAX.owl',
    '--file', '/home/cbeust/Projects/2024/BioPAX_Review/BioPAX_Ontology/biopax-level3.owl',
    '/reactome_pc']
process = subprocess.Popen(command)
time.sleep(60)

# execute SPARQL query
sparql = SPARQLWrapper(endpointURL_reactome_pc)
sparql.setQuery(prefixes+query2)

# display results
sparql.setReturnFormat(JSON)
results = sparql.query().convert()
displaySparqlResults(results)

# export to CSV
sparql.setReturnFormat(CSV)
results = sparql.query().convert()
with open(f"../Results/reactome_pc_mappings_uniprot.csv", "wb") as f:
    f.write(results)

# end process    
process.kill()
time.sleep(60)

19:13:05 INFO  Server          :: Dataset: in-memory: load file: /home/cbeust/Projects/2024/BioPAX_Review/Compare_BioPAX_Files/PathwayCommonsFiles/PathwayCommons12.reactome.BIOPAX.owl
19:13:06 WARN  riot            :: [line: 100212, col: 81] {W137} Input is large. Switching off checking for illegal reuse of rdf:ID's.
19:13:25 INFO  Server          :: Dataset: in-memory: load file: /home/cbeust/Projects/2024/BioPAX_Review/BioPAX_Ontology/biopax-level3.owl
19:13:25 INFO  Server          :: Running in read-only mode for /reactome_pc
19:13:26 INFO  Server          :: Apache Jena Fuseki 4.9.0
19:13:26 INFO  Config          :: FUSEKI_HOME=/home/cbeust/Softwares/JenaFuseki/apache-jena-fuseki-4.9.0
19:13:26 INFO  Config          :: FUSEKI_BASE=/home/cbeust/Projects/2024/BioPAX_Review/BioPAXReview2024Codes/Figure4-5/Scripts/run
19:13:26 INFO  Config          :: Shiro file: file:///home/cbeust/Projects/2024/BioPAX_Review/BioPAXReview2024Codes/Figure4-5/Scripts/run/shiro.ini
19:13:26 INFO  Server

metric,count
Protein,26582
Protein with no PR,0
Protein with no mapping,2866
ProteinReference,12836
Uniprot mappings,23716
Unique Uniprot,10973


19:14:06 INFO  Fuseki          :: [2] GET http://localhost:3030/reactome_pc/query?query=%0APREFIX+rdf%3A+%3Chttp%3A//www.w3.org/1999/02/22-rdf-syntax-ns%23%3E%0APREFIX+rdfs%3A%3Chttp%3A//www.w3.org/2000/01/rdf-schema%23%3E%0APREFIX+owl%3A+%3Chttp%3A//www.w3.org/2002/07/owl%23%3E%0APREFIX+xsd%3A+%3Chttp%3A//www.w3.org/2001/XMLSchema%23%3E%0APREFIX+dc%3A+%3Chttp%3A//purl.org/dc/elements/1.1/%3E%0APREFIX+dcterms%3A+%3Chttp%3A//purl.org/dc/terms/%3E%0A%0APREFIX+chebi%3A+%3Chttp%3A//purl.obolibrary.org/obo/chebi/%3E%0APREFIX+chebidb%3A+%3Chttp%3A//purl.obolibrary.org/obo/CHEBI_%3E%0APREFIX+chebirel%3A+%3Chttp%3A//purl.obolibrary.org/obo/CHEBI%23%3E%0APREFIX+oboInOwl%3A+%3Chttp%3A//www.geneontology.org/formats/oboInOwl%23%3E%0A%0APREFIX+bp3%3A+%3Chttp%3A//www.biopax.org/release/biopax-level3.owl%23%3E%0A%0A%23+Homo_sapiens-20170221.owl%0A%23PREFIX+reactome%3A+%3Chttp%3A//www.reactome.org/biopax/59/48887%23%3E+%0A%23%0A%23+Homo_sapiens-20210608.owl%0A%23PREFIX+reactome%3A+%3Chttp%3A//www.reac

#### Query on BioPAX export of Panther from PathwayCommons

In [8]:
# TODO: change path
command = [
    '/home/cbeust/Softwares/JenaFuseki/apache-jena-fuseki-4.9.0/fuseki-server',
    '--file', '/home/cbeust/Projects/2024/BioPAX_Review/Compare_BioPAX_Files/PathwayCommonsFiles/PathwayCommons12.panther.BIOPAX.owl',
    '--file', '/home/cbeust/Projects/2024/BioPAX_Review/BioPAX_Ontology/biopax-level3.owl',
    '/panther_pc']
process = subprocess.Popen(command)
time.sleep(60)

# execute SPARQL query
sparql = SPARQLWrapper(endpointURL_panther_pc)
sparql.setQuery(prefixes+query2)

# display results
sparql.setReturnFormat(JSON)
results = sparql.query().convert()
displaySparqlResults(results)

# export to CSV
sparql.setReturnFormat(CSV)
results = sparql.query().convert()
with open("../Results/panther_pc_mappings_uniprot.csv", "wb") as f:
    f.write(results)

# end process
process.kill()
time.sleep(60)

19:15:07 INFO  Server          :: Dataset: in-memory: load file: /home/cbeust/Projects/2024/BioPAX_Review/Compare_BioPAX_Files/PathwayCommonsFiles/PathwayCommons12.panther.BIOPAX.owl
19:15:08 WARN  riot            :: [line: 80968, col: 73] {W137} Input is large. Switching off checking for illegal reuse of rdf:ID's.
19:15:10 INFO  Server          :: Dataset: in-memory: load file: /home/cbeust/Projects/2024/BioPAX_Review/BioPAX_Ontology/biopax-level3.owl
19:15:10 INFO  Server          :: Running in read-only mode for /panther_pc
19:15:10 INFO  Server          :: Apache Jena Fuseki 4.9.0
19:15:10 INFO  Config          :: FUSEKI_HOME=/home/cbeust/Softwares/JenaFuseki/apache-jena-fuseki-4.9.0
19:15:10 INFO  Config          :: FUSEKI_BASE=/home/cbeust/Projects/2024/BioPAX_Review/BioPAXReview2024Codes/Figure4-5/Scripts/run
19:15:10 INFO  Config          :: Shiro file: file:///home/cbeust/Projects/2024/BioPAX_Review/BioPAXReview2024Codes/Figure4-5/Scripts/run/shiro.ini
19:15:10 INFO  Server   

metric,count
Protein,5342
Protein with no PR,0
Protein with no mapping,3014
ProteinReference,3878
Uniprot mappings,2328
Unique Uniprot,2171


19:16:07 INFO  Fuseki          :: [2] GET http://localhost:3030/panther_pc/query?query=%0APREFIX+rdf%3A+%3Chttp%3A//www.w3.org/1999/02/22-rdf-syntax-ns%23%3E%0APREFIX+rdfs%3A%3Chttp%3A//www.w3.org/2000/01/rdf-schema%23%3E%0APREFIX+owl%3A+%3Chttp%3A//www.w3.org/2002/07/owl%23%3E%0APREFIX+xsd%3A+%3Chttp%3A//www.w3.org/2001/XMLSchema%23%3E%0APREFIX+dc%3A+%3Chttp%3A//purl.org/dc/elements/1.1/%3E%0APREFIX+dcterms%3A+%3Chttp%3A//purl.org/dc/terms/%3E%0A%0APREFIX+chebi%3A+%3Chttp%3A//purl.obolibrary.org/obo/chebi/%3E%0APREFIX+chebidb%3A+%3Chttp%3A//purl.obolibrary.org/obo/CHEBI_%3E%0APREFIX+chebirel%3A+%3Chttp%3A//purl.obolibrary.org/obo/CHEBI%23%3E%0APREFIX+oboInOwl%3A+%3Chttp%3A//www.geneontology.org/formats/oboInOwl%23%3E%0A%0APREFIX+bp3%3A+%3Chttp%3A//www.biopax.org/release/biopax-level3.owl%23%3E%0A%0A%23+Homo_sapiens-20170221.owl%0A%23PREFIX+reactome%3A+%3Chttp%3A//www.reactome.org/biopax/59/48887%23%3E+%0A%23%0A%23+Homo_sapiens-20210608.owl%0A%23PREFIX+reactome%3A+%3Chttp%3A//www.react

#### Query on BioPAX export of PathBank from PathwayCommons

In [9]:
# TODO: change path
command = [
    '/home/cbeust/Softwares/JenaFuseki/apache-jena-fuseki-4.9.0/fuseki-server',
    '--file', '/home/cbeust/Projects/2024/BioPAX_Review/Compare_BioPAX_Files/PathwayCommonsFiles/PathwayCommons12.pathbank.BIOPAX.owl',
    '--file', '/home/cbeust/Projects/2024/BioPAX_Review/BioPAX_Ontology/biopax-level3.owl',
    '/pathbank_pc']
process = subprocess.Popen(command)
time.sleep(60)

# execute SPARQL query
sparql = SPARQLWrapper(endpointURL_pathbank_pc)
sparql.setQuery(prefixes+query2)

# display results
sparql.setReturnFormat(JSON)
results = sparql.query().convert()
displaySparqlResults(results)

# export to CSV
sparql.setReturnFormat(CSV)
results = sparql.query().convert()
with open("../Results/pathbank_pc_mappings_uniprot.csv", "wb") as f:
    f.write(results)

# end process
process.kill()
time.sleep(60)

19:17:08 INFO  Server          :: Dataset: in-memory: load file: /home/cbeust/Projects/2024/BioPAX_Review/Compare_BioPAX_Files/PathwayCommonsFiles/PathwayCommons12.pathbank.BIOPAX.owl
19:17:09 WARN  riot            :: [line: 89570, col: 87] {W137} Input is large. Switching off checking for illegal reuse of rdf:ID's.
19:17:11 INFO  Server          :: Dataset: in-memory: load file: /home/cbeust/Projects/2024/BioPAX_Review/BioPAX_Ontology/biopax-level3.owl
19:17:11 INFO  Server          :: Running in read-only mode for /pathbank_pc
19:17:11 INFO  Server          :: Apache Jena Fuseki 4.9.0
19:17:11 INFO  Config          :: FUSEKI_HOME=/home/cbeust/Softwares/JenaFuseki/apache-jena-fuseki-4.9.0
19:17:11 INFO  Config          :: FUSEKI_BASE=/home/cbeust/Projects/2024/BioPAX_Review/BioPAXReview2024Codes/Figure4-5/Scripts/run
19:17:11 INFO  Config          :: Shiro file: file:///home/cbeust/Projects/2024/BioPAX_Review/BioPAXReview2024Codes/Figure4-5/Scripts/run/shiro.ini
19:17:11 INFO  Server 

metric,count
Protein,2471
Protein with no PR,0
Protein with no mapping,71
ProteinReference,1606
Uniprot mappings,2400
Unique Uniprot,1555


19:18:08 INFO  Fuseki          :: [2] GET http://localhost:3030/pathbank_pc/query?query=%0APREFIX+rdf%3A+%3Chttp%3A//www.w3.org/1999/02/22-rdf-syntax-ns%23%3E%0APREFIX+rdfs%3A%3Chttp%3A//www.w3.org/2000/01/rdf-schema%23%3E%0APREFIX+owl%3A+%3Chttp%3A//www.w3.org/2002/07/owl%23%3E%0APREFIX+xsd%3A+%3Chttp%3A//www.w3.org/2001/XMLSchema%23%3E%0APREFIX+dc%3A+%3Chttp%3A//purl.org/dc/elements/1.1/%3E%0APREFIX+dcterms%3A+%3Chttp%3A//purl.org/dc/terms/%3E%0A%0APREFIX+chebi%3A+%3Chttp%3A//purl.obolibrary.org/obo/chebi/%3E%0APREFIX+chebidb%3A+%3Chttp%3A//purl.obolibrary.org/obo/CHEBI_%3E%0APREFIX+chebirel%3A+%3Chttp%3A//purl.obolibrary.org/obo/CHEBI%23%3E%0APREFIX+oboInOwl%3A+%3Chttp%3A//www.geneontology.org/formats/oboInOwl%23%3E%0A%0APREFIX+bp3%3A+%3Chttp%3A//www.biopax.org/release/biopax-level3.owl%23%3E%0A%0A%23+Homo_sapiens-20170221.owl%0A%23PREFIX+reactome%3A+%3Chttp%3A//www.reactome.org/biopax/59/48887%23%3E+%0A%23%0A%23+Homo_sapiens-20210608.owl%0A%23PREFIX+reactome%3A+%3Chttp%3A//www.reac

#### Query on BioPAX export of HumanCyc from PathwayCommons

In [10]:
# TODO: change path
command = [
    '/home/cbeust/Softwares/JenaFuseki/apache-jena-fuseki-4.9.0/fuseki-server',
    '--file', '/home/cbeust/Projects/2024/BioPAX_Review/Compare_BioPAX_Files/PathwayCommonsFiles/PathwayCommons12.humancyc.BIOPAX.owl',
    '--file', '/home/cbeust/Projects/2024/BioPAX_Review/BioPAX_Ontology/biopax-level3.owl',
    '/humancyc_pc']
process = subprocess.Popen(command)
time.sleep(60)

# execute SPARQL query
sparql = SPARQLWrapper(endpointURL_humancyc_pc)
sparql.setQuery(prefixes+query2)

# display results
sparql.setReturnFormat(JSON)
results = sparql.query().convert()
displaySparqlResults(results)

# export to CSV
sparql.setReturnFormat(CSV)
results = sparql.query().convert()
with open("../Results/humancyc_pc_mappings_uniprot.csv", "wb") as f:
    f.write(results)

# end process
process.kill()
time.sleep(60)

19:19:09 INFO  Server          :: Dataset: in-memory: load file: /home/cbeust/Projects/2024/BioPAX_Review/Compare_BioPAX_Files/PathwayCommonsFiles/PathwayCommons12.humancyc.BIOPAX.owl
19:19:10 WARN  riot            :: [line: 108021, col: 73] {W137} Input is large. Switching off checking for illegal reuse of rdf:ID's.
19:19:13 INFO  Server          :: Dataset: in-memory: load file: /home/cbeust/Projects/2024/BioPAX_Review/BioPAX_Ontology/biopax-level3.owl
19:19:13 INFO  Server          :: Running in read-only mode for /humancyc_pc
19:19:13 INFO  Server          :: Apache Jena Fuseki 4.9.0
19:19:13 INFO  Config          :: FUSEKI_HOME=/home/cbeust/Softwares/JenaFuseki/apache-jena-fuseki-4.9.0
19:19:13 INFO  Config          :: FUSEKI_BASE=/home/cbeust/Projects/2024/BioPAX_Review/BioPAXReview2024Codes/Figure4-5/Scripts/run
19:19:13 INFO  Config          :: Shiro file: file:///home/cbeust/Projects/2024/BioPAX_Review/BioPAXReview2024Codes/Figure4-5/Scripts/run/shiro.ini
19:19:13 INFO  Server

metric,count
Protein,2652
Protein with no PR,0
Protein with no mapping,387
ProteinReference,2568
Uniprot mappings,2265
Unique Uniprot,2213


19:20:09 INFO  Fuseki          :: [2] GET http://localhost:3030/humancyc_pc/query?query=%0APREFIX+rdf%3A+%3Chttp%3A//www.w3.org/1999/02/22-rdf-syntax-ns%23%3E%0APREFIX+rdfs%3A%3Chttp%3A//www.w3.org/2000/01/rdf-schema%23%3E%0APREFIX+owl%3A+%3Chttp%3A//www.w3.org/2002/07/owl%23%3E%0APREFIX+xsd%3A+%3Chttp%3A//www.w3.org/2001/XMLSchema%23%3E%0APREFIX+dc%3A+%3Chttp%3A//purl.org/dc/elements/1.1/%3E%0APREFIX+dcterms%3A+%3Chttp%3A//purl.org/dc/terms/%3E%0A%0APREFIX+chebi%3A+%3Chttp%3A//purl.obolibrary.org/obo/chebi/%3E%0APREFIX+chebidb%3A+%3Chttp%3A//purl.obolibrary.org/obo/CHEBI_%3E%0APREFIX+chebirel%3A+%3Chttp%3A//purl.obolibrary.org/obo/CHEBI%23%3E%0APREFIX+oboInOwl%3A+%3Chttp%3A//www.geneontology.org/formats/oboInOwl%23%3E%0A%0APREFIX+bp3%3A+%3Chttp%3A//www.biopax.org/release/biopax-level3.owl%23%3E%0A%0A%23+Homo_sapiens-20170221.owl%0A%23PREFIX+reactome%3A+%3Chttp%3A//www.reactome.org/biopax/59/48887%23%3E+%0A%23%0A%23+Homo_sapiens-20210608.owl%0A%23PREFIX+reactome%3A+%3Chttp%3A//www.reac

#### Query on BioPAX export of KEGG from PathwayCommons

In [11]:
# TODO: change path
command = [
    '/home/cbeust/Softwares/JenaFuseki/apache-jena-fuseki-4.9.0/fuseki-server',
    '--file', '/home/cbeust/Projects/2024/BioPAX_Review/Compare_BioPAX_Files/PathwayCommonsFiles/PathwayCommons12.kegg.BIOPAX.owl',
    '--file', '/home/cbeust/Projects/2024/BioPAX_Review/BioPAX_Ontology/biopax-level3.owl',
    '/kegg_pc']
process = subprocess.Popen(command)
time.sleep(60)

# execute SPARQL query
sparql = SPARQLWrapper(endpointURL_kegg_pc)
sparql.setQuery(prefixes+query2)

# display results
sparql.setReturnFormat(JSON)
results = sparql.query().convert()
displaySparqlResults(results)

# export to CSV
sparql.setReturnFormat(CSV)
results = sparql.query().convert()
with open("../Results/kegg_pc_mappings_uniprot.csv", "wb") as f:
    f.write(results)

# end process
process.kill()
time.sleep(60)

19:21:09 INFO  Server          :: Dataset: in-memory: load file: /home/cbeust/Projects/2024/BioPAX_Review/Compare_BioPAX_Files/PathwayCommonsFiles/PathwayCommons12.kegg.BIOPAX.owl
19:21:11 WARN  riot            :: [line: 96269, col: 75] {W137} Input is large. Switching off checking for illegal reuse of rdf:ID's.
19:21:12 INFO  Server          :: Dataset: in-memory: load file: /home/cbeust/Projects/2024/BioPAX_Review/BioPAX_Ontology/biopax-level3.owl
19:21:12 INFO  Server          :: Running in read-only mode for /kegg_pc
19:21:12 INFO  Server          :: Apache Jena Fuseki 4.9.0
19:21:12 INFO  Config          :: FUSEKI_HOME=/home/cbeust/Softwares/JenaFuseki/apache-jena-fuseki-4.9.0
19:21:12 INFO  Config          :: FUSEKI_BASE=/home/cbeust/Projects/2024/BioPAX_Review/BioPAXReview2024Codes/Figure4-5/Scripts/run
19:21:12 INFO  Config          :: Shiro file: file:///home/cbeust/Projects/2024/BioPAX_Review/BioPAXReview2024Codes/Figure4-5/Scripts/run/shiro.ini
19:21:13 INFO  Server         

metric,count
Protein,1872
Protein with no PR,0
Protein with no mapping,927
ProteinReference,1704
Uniprot mappings,945
Unique Uniprot,795


19:22:09 INFO  Fuseki          :: [2] GET http://localhost:3030/kegg_pc/query?query=%0APREFIX+rdf%3A+%3Chttp%3A//www.w3.org/1999/02/22-rdf-syntax-ns%23%3E%0APREFIX+rdfs%3A%3Chttp%3A//www.w3.org/2000/01/rdf-schema%23%3E%0APREFIX+owl%3A+%3Chttp%3A//www.w3.org/2002/07/owl%23%3E%0APREFIX+xsd%3A+%3Chttp%3A//www.w3.org/2001/XMLSchema%23%3E%0APREFIX+dc%3A+%3Chttp%3A//purl.org/dc/elements/1.1/%3E%0APREFIX+dcterms%3A+%3Chttp%3A//purl.org/dc/terms/%3E%0A%0APREFIX+chebi%3A+%3Chttp%3A//purl.obolibrary.org/obo/chebi/%3E%0APREFIX+chebidb%3A+%3Chttp%3A//purl.obolibrary.org/obo/CHEBI_%3E%0APREFIX+chebirel%3A+%3Chttp%3A//purl.obolibrary.org/obo/CHEBI%23%3E%0APREFIX+oboInOwl%3A+%3Chttp%3A//www.geneontology.org/formats/oboInOwl%23%3E%0A%0APREFIX+bp3%3A+%3Chttp%3A//www.biopax.org/release/biopax-level3.owl%23%3E%0A%0A%23+Homo_sapiens-20170221.owl%0A%23PREFIX+reactome%3A+%3Chttp%3A//www.reactome.org/biopax/59/48887%23%3E+%0A%23%0A%23+Homo_sapiens-20210608.owl%0A%23PREFIX+reactome%3A+%3Chttp%3A//www.reactome

#### Query on BioPAX export of PID from PathwayCommons

In [12]:
# TODO: change path
command = [
    '/home/cbeust/Softwares/JenaFuseki/apache-jena-fuseki-4.9.0/fuseki-server',
    '--file', '/home/cbeust/Projects/2024/BioPAX_Review/Compare_BioPAX_Files/PathwayCommonsFiles/PathwayCommons12.pid.BIOPAX.owl',
    '--file', '/home/cbeust/Projects/2024/BioPAX_Review/BioPAX_Ontology/biopax-level3.owl',
    '/pid_pc']
process = subprocess.Popen(command)
time.sleep(60)

# execute SPARQL query
sparql = SPARQLWrapper(endpointURL_pid_pc)
sparql.setQuery(prefixes+query2)

# display results
sparql.setReturnFormat(JSON)
results = sparql.query().convert()
displaySparqlResults(results)

# export to CSV
sparql.setReturnFormat(CSV)
results = sparql.query().convert()
with open("../Results/pid_pc_mappings_uniprot.csv", "wb") as f:
    f.write(results)

# end process
process.kill()
time.sleep(60)

19:23:10 INFO  Server          :: Dataset: in-memory: load file: /home/cbeust/Projects/2024/BioPAX_Review/Compare_BioPAX_Files/PathwayCommonsFiles/PathwayCommons12.pid.BIOPAX.owl
19:23:11 WARN  riot            :: [line: 77671, col: 87] {W137} Input is large. Switching off checking for illegal reuse of rdf:ID's.
19:23:16 INFO  Server          :: Dataset: in-memory: load file: /home/cbeust/Projects/2024/BioPAX_Review/BioPAX_Ontology/biopax-level3.owl
19:23:16 INFO  Server          :: Running in read-only mode for /pid_pc
19:23:16 INFO  Server          :: Apache Jena Fuseki 4.9.0
19:23:16 INFO  Config          :: FUSEKI_HOME=/home/cbeust/Softwares/JenaFuseki/apache-jena-fuseki-4.9.0
19:23:16 INFO  Config          :: FUSEKI_BASE=/home/cbeust/Projects/2024/BioPAX_Review/BioPAXReview2024Codes/Figure4-5/Scripts/run
19:23:16 INFO  Config          :: Shiro file: file:///home/cbeust/Projects/2024/BioPAX_Review/BioPAXReview2024Codes/Figure4-5/Scripts/run/shiro.ini
19:23:16 INFO  Server          :

metric,count
Protein,6584
Protein with no PR,0
Protein with no mapping,175
ProteinReference,2735
Uniprot mappings,6409
Unique Uniprot,2620


19:24:10 INFO  Fuseki          :: [2] GET http://localhost:3030/pid_pc/query?query=%0APREFIX+rdf%3A+%3Chttp%3A//www.w3.org/1999/02/22-rdf-syntax-ns%23%3E%0APREFIX+rdfs%3A%3Chttp%3A//www.w3.org/2000/01/rdf-schema%23%3E%0APREFIX+owl%3A+%3Chttp%3A//www.w3.org/2002/07/owl%23%3E%0APREFIX+xsd%3A+%3Chttp%3A//www.w3.org/2001/XMLSchema%23%3E%0APREFIX+dc%3A+%3Chttp%3A//purl.org/dc/elements/1.1/%3E%0APREFIX+dcterms%3A+%3Chttp%3A//purl.org/dc/terms/%3E%0A%0APREFIX+chebi%3A+%3Chttp%3A//purl.obolibrary.org/obo/chebi/%3E%0APREFIX+chebidb%3A+%3Chttp%3A//purl.obolibrary.org/obo/CHEBI_%3E%0APREFIX+chebirel%3A+%3Chttp%3A//purl.obolibrary.org/obo/CHEBI%23%3E%0APREFIX+oboInOwl%3A+%3Chttp%3A//www.geneontology.org/formats/oboInOwl%23%3E%0A%0APREFIX+bp3%3A+%3Chttp%3A//www.biopax.org/release/biopax-level3.owl%23%3E%0A%0A%23+Homo_sapiens-20170221.owl%0A%23PREFIX+reactome%3A+%3Chttp%3A//www.reactome.org/biopax/59/48887%23%3E+%0A%23%0A%23+Homo_sapiens-20210608.owl%0A%23PREFIX+reactome%3A+%3Chttp%3A//www.reactome.

#### Query on BioPAX export of INOH from PathwayCommons

In [13]:
# TODO: change path
command = [
    '/home/cbeust/Softwares/JenaFuseki/apache-jena-fuseki-4.9.0/fuseki-server',
    '--file', '/home/cbeust/Projects/2024/BioPAX_Review/Compare_BioPAX_Files/PathwayCommonsFiles/PathwayCommons12.inoh.BIOPAX.owl',
    '--file', '/home/cbeust/Projects/2024/BioPAX_Review/BioPAX_Ontology/biopax-level3.owl',
    '/inoh_pc']
process = subprocess.Popen(command)
time.sleep(60)

# execute SPARQL query
sparql = SPARQLWrapper(endpointURL_inoh_pc)
sparql.setQuery(prefixes+query2)

# display results
sparql.setReturnFormat(JSON)
results = sparql.query().convert()
displaySparqlResults(results)

# export to CSV
sparql.setReturnFormat(CSV)
results = sparql.query().convert()
with open("../Results/inoh_pc_mappings_uniprot.csv", "wb") as f:
    f.write(results)

# end process
process.kill()
time.sleep(60)

19:25:11 INFO  Server          :: Dataset: in-memory: load file: /home/cbeust/Projects/2024/BioPAX_Review/Compare_BioPAX_Files/PathwayCommonsFiles/PathwayCommons12.inoh.BIOPAX.owl
19:25:12 WARN  riot            :: [line: 100079, col: 94] {W137} Input is large. Switching off checking for illegal reuse of rdf:ID's.
19:25:19 INFO  Server          :: Dataset: in-memory: load file: /home/cbeust/Projects/2024/BioPAX_Review/BioPAX_Ontology/biopax-level3.owl
19:25:19 INFO  Server          :: Running in read-only mode for /inoh_pc
19:25:19 INFO  Server          :: Apache Jena Fuseki 4.9.0
19:25:19 INFO  Config          :: FUSEKI_HOME=/home/cbeust/Softwares/JenaFuseki/apache-jena-fuseki-4.9.0
19:25:19 INFO  Config          :: FUSEKI_BASE=/home/cbeust/Projects/2024/BioPAX_Review/BioPAXReview2024Codes/Figure4-5/Scripts/run
19:25:19 INFO  Config          :: Shiro file: file:///home/cbeust/Projects/2024/BioPAX_Review/BioPAXReview2024Codes/Figure4-5/Scripts/run/shiro.ini
19:25:19 INFO  Server        

metric,count
Protein,12804
Protein with no PR,0
Protein with no mapping,9938
ProteinReference,2296
Uniprot mappings,2866
Unique Uniprot,1642


19:26:11 INFO  Fuseki          :: [2] GET http://localhost:3030/inoh_pc/query?query=%0APREFIX+rdf%3A+%3Chttp%3A//www.w3.org/1999/02/22-rdf-syntax-ns%23%3E%0APREFIX+rdfs%3A%3Chttp%3A//www.w3.org/2000/01/rdf-schema%23%3E%0APREFIX+owl%3A+%3Chttp%3A//www.w3.org/2002/07/owl%23%3E%0APREFIX+xsd%3A+%3Chttp%3A//www.w3.org/2001/XMLSchema%23%3E%0APREFIX+dc%3A+%3Chttp%3A//purl.org/dc/elements/1.1/%3E%0APREFIX+dcterms%3A+%3Chttp%3A//purl.org/dc/terms/%3E%0A%0APREFIX+chebi%3A+%3Chttp%3A//purl.obolibrary.org/obo/chebi/%3E%0APREFIX+chebidb%3A+%3Chttp%3A//purl.obolibrary.org/obo/CHEBI_%3E%0APREFIX+chebirel%3A+%3Chttp%3A//purl.obolibrary.org/obo/CHEBI%23%3E%0APREFIX+oboInOwl%3A+%3Chttp%3A//www.geneontology.org/formats/oboInOwl%23%3E%0A%0APREFIX+bp3%3A+%3Chttp%3A//www.biopax.org/release/biopax-level3.owl%23%3E%0A%0A%23+Homo_sapiens-20170221.owl%0A%23PREFIX+reactome%3A+%3Chttp%3A//www.reactome.org/biopax/59/48887%23%3E+%0A%23%0A%23+Homo_sapiens-20210608.owl%0A%23PREFIX+reactome%3A+%3Chttp%3A//www.reactome

#### Query on BioPAX export of NetPath from PathwayCommons

In [14]:
# TODO: change path
command = [
    '/home/cbeust/Softwares/JenaFuseki/apache-jena-fuseki-4.9.0/fuseki-server',
    '--file', '/home/cbeust/Projects/2024/BioPAX_Review/Compare_BioPAX_Files/PathwayCommonsFiles/PathwayCommons12.netpath.BIOPAX.owl',
    '--file', '/home/cbeust/Projects/2024/BioPAX_Review/BioPAX_Ontology/biopax-level3.owl',
    '/netpath_pc']
process = subprocess.Popen(command)
time.sleep(60)

# execute SPARQL query
sparql = SPARQLWrapper(endpointURL_netpath_pc)
sparql.setQuery(prefixes+query2)

# display results
sparql.setReturnFormat(JSON)
results = sparql.query().convert()
displaySparqlResults(results)

# export to CSV
sparql.setReturnFormat(CSV)
results = sparql.query().convert()
with open("../Results/netpath_pc_mappings_uniprot.csv", "wb") as f:
    f.write(results)

# end process
process.kill()
time.sleep(60)

19:27:12 INFO  Server          :: Dataset: in-memory: load file: /home/cbeust/Projects/2024/BioPAX_Review/Compare_BioPAX_Files/PathwayCommonsFiles/PathwayCommons12.netpath.BIOPAX.owl
19:27:13 WARN  riot            :: [line: 83771, col: 87] {W137} Input is large. Switching off checking for illegal reuse of rdf:ID's.
19:27:14 INFO  Server          :: Dataset: in-memory: load file: /home/cbeust/Projects/2024/BioPAX_Review/BioPAX_Ontology/biopax-level3.owl
19:27:14 INFO  Server          :: Running in read-only mode for /netpath_pc
19:27:14 INFO  Server          :: Apache Jena Fuseki 4.9.0
19:27:15 INFO  Config          :: FUSEKI_HOME=/home/cbeust/Softwares/JenaFuseki/apache-jena-fuseki-4.9.0
19:27:15 INFO  Config          :: FUSEKI_BASE=/home/cbeust/Projects/2024/BioPAX_Review/BioPAXReview2024Codes/Figure4-5/Scripts/run
19:27:15 INFO  Config          :: Shiro file: file:///home/cbeust/Projects/2024/BioPAX_Review/BioPAXReview2024Codes/Figure4-5/Scripts/run/shiro.ini
19:27:15 INFO  Server   

metric,count
Protein,4861
Protein with no PR,0
Protein with no mapping,39
ProteinReference,1507
Uniprot mappings,4822
Unique Uniprot,1485


19:28:12 INFO  Fuseki          :: [2] GET http://localhost:3030/netpath_pc/query?query=%0APREFIX+rdf%3A+%3Chttp%3A//www.w3.org/1999/02/22-rdf-syntax-ns%23%3E%0APREFIX+rdfs%3A%3Chttp%3A//www.w3.org/2000/01/rdf-schema%23%3E%0APREFIX+owl%3A+%3Chttp%3A//www.w3.org/2002/07/owl%23%3E%0APREFIX+xsd%3A+%3Chttp%3A//www.w3.org/2001/XMLSchema%23%3E%0APREFIX+dc%3A+%3Chttp%3A//purl.org/dc/elements/1.1/%3E%0APREFIX+dcterms%3A+%3Chttp%3A//purl.org/dc/terms/%3E%0A%0APREFIX+chebi%3A+%3Chttp%3A//purl.obolibrary.org/obo/chebi/%3E%0APREFIX+chebidb%3A+%3Chttp%3A//purl.obolibrary.org/obo/CHEBI_%3E%0APREFIX+chebirel%3A+%3Chttp%3A//purl.obolibrary.org/obo/CHEBI%23%3E%0APREFIX+oboInOwl%3A+%3Chttp%3A//www.geneontology.org/formats/oboInOwl%23%3E%0A%0APREFIX+bp3%3A+%3Chttp%3A//www.biopax.org/release/biopax-level3.owl%23%3E%0A%0A%23+Homo_sapiens-20170221.owl%0A%23PREFIX+reactome%3A+%3Chttp%3A//www.reactome.org/biopax/59/48887%23%3E+%0A%23%0A%23+Homo_sapiens-20210608.owl%0A%23PREFIX+reactome%3A+%3Chttp%3A//www.react