# Querying of BioPAX exports of pathway databases: Get the number of instances of main BioPAX classes

#### Import libraries

In [None]:
from SPARQLWrapper import SPARQLWrapper, JSON, CSV
import IPython
import subprocess
import time

#### Define the URL of local SPARQL endpoints

In [None]:
endpointURL_reactome_v65 = "http://localhost:3030/reactome_v65/query"
endpointURL_reactome_pc = "http://localhost:3030/reactome_pc/query"
endpointURL_panther_pc = "http://localhost:3030/panther_pc/query"
endpointURL_pathbank_pc = "http://localhost:3030/pathbank_pc/query"
endpointURL_humancyc_pc = "http://localhost:3030/humancyc_pc/query"
endpointURL_kegg_pc = "http://localhost:3030/kegg_pc/query"
endpointURL_pid_pc = "http://localhost:3030/pid_pc/query"
endpointURL_inoh_pc = "http://localhost:3030/inoh_pc/query"
endpointURL_netpath_pc = "http://localhost:3030/netpath_pc/query"
rdfFormat = "turtle"

#### Define RDF prefixes for SPARQL queries

In [None]:
reactomeVersion = 65 
prefixes = """
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs:<http://www.w3.org/2000/01/rdf-schema#>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
PREFIX dc: <http://purl.org/dc/elements/1.1/>
PREFIX dcterms: <http://purl.org/dc/terms/>

PREFIX chebi: <http://purl.obolibrary.org/obo/chebi/>
PREFIX chebidb: <http://purl.obolibrary.org/obo/CHEBI_>
PREFIX chebirel: <http://purl.obolibrary.org/obo/CHEBI#>
PREFIX oboInOwl: <http://www.geneontology.org/formats/oboInOwl#>

PREFIX bp3: <http://www.biopax.org/release/biopax-level3.owl#>

# Homo_sapiens-20170221.owl
#PREFIX reactome: <http://www.reactome.org/biopax/59/48887#> 
#
# Homo_sapiens-20210608.owl
#PREFIX reactome: <http://www.reactome.org/biopax/77/48887#>
#
# Homo_sapiens-20220614.owl
#PREFIX reactome: <http://www.reactome.org/biopax/81/48887#>
#
# Homo_sapiens-20221130.owl
#PREFIX reactome: <http://www.reactome.org/biopax/83/48887#>

PREFIX reactome: <http://www.reactome.org/biopax/{}/48887#>
""".format(reactomeVersion)

biopaxURI = "http://www.biopax.org/release/biopax-level3.owl#"

#### Function to display the results of SPARQL queries

In [None]:
def displaySparqlResults(results):
    '''
    Displays as HTML the result of a SPARQLWrapper query in a Jupyter notebook.
    
        Parameters:
            results (dictionnary): the result of a call to SPARQLWrapper.query().convert()
    '''
    variableNames = results['head']['vars']
    tableCode = '<table><tr><th>{}</th></tr><tr>{}</tr></table>'.format('</th><th>'.join(variableNames), '</tr><tr>'.join('<td>{}</td>'.format('</td><td>'.join([row[vName]['value'] if vName in row.keys() else "&nbsp;" for vName in variableNames]))for row in results["results"]["bindings"]))
    IPython.display.display(IPython.display.HTML(tableCode))

## Count number of instances of BioPAX classes in BioPAX exports of pathway databases

TODO: change the path to fuseki server and BioPAX files + BioPAX ontology in the command line to launch the SPARQL endpoint

#### Define SPARQL query

In [None]:
query = """
SELECT ?class_of_interest (COUNT(DISTINCT ?instances) AS ?nb_instances) 
WHERE {  
  # BioPAX classe for which we want to count the number of instances
  VALUES ?class_of_interest { bp3:SmallMolecule bp3:Interaction bp3:Pathway bp3:PathwayStep bp3:BiochemicalReaction bp3:Protein bp3:Dna bp3:Rna }
  OPTIONAL {
  ?instances rdf:type/(rdfs:subClassOf*) ?class_of_interest .
  }
}
GROUP BY ?class_of_interest
"""

#### Query on Reactome BioPAX standalone export version 65

In [None]:
# TODO: change path
command = [
    '/home/cbeust/Softwares/JenaFuseki/apache-jena-fuseki-4.9.0/fuseki-server',
    '--file', '/home/cbeust/Projects/2024/BioPAX_Review/Compare_BioPAX_Files/ReactomeBioPAX/_00_Reactome_Data_v65/Homo_sapiens.owl',
    '--file', '/home/cbeust/Projects/2024/BioPAX_Review/BioPAX_Ontology/biopax-level3.owl',
    '/reactome_v65']
process = subprocess.Popen(command)
time.sleep(60)
# execute SPARQL query
sparql = SPARQLWrapper(endpointURL_reactome_v65)
sparql.setQuery(prefixes+query)

# display results
sparql.setReturnFormat(JSON)
results = sparql.query().convert()
displaySparqlResults(results)

# export results to CSV
sparql.setReturnFormat(CSV)
results = sparql.query().convert()
with open(f"../Results/reactome_v65_counts_bp_classes.csv", "wb") as f:
    f.write(results)

# end process
process.kill()
time.sleep(60)

#### Query on BioPAX export of Reactome from PathwayCommons

In [None]:
# TODO: change path
command = [
    '/home/cbeust/Softwares/JenaFuseki/apache-jena-fuseki-4.9.0/fuseki-server',
    '--file', '/home/cbeust/Projects/2024/BioPAX_Review/Compare_BioPAX_Files/PathwayCommonsFiles/PathwayCommons12.reactome.BIOPAX.owl',
    '--file', '/home/cbeust/Projects/2024/BioPAX_Review/BioPAX_Ontology/biopax-level3.owl',
    '/reactome_pc']
process = subprocess.Popen(command)
time.sleep(60)

# execute SPARQL query
sparql = SPARQLWrapper(endpointURL_reactome_pc)
sparql.setQuery(prefixes+query)

# display results
sparql.setReturnFormat(JSON)
results = sparql.query().convert()
displaySparqlResults(results)

# export results to CSV
sparql.setReturnFormat(CSV)
results = sparql.query().convert()
with open(f"../Results/reactome_pc_counts_bp_classes.csv", "wb") as f:
    f.write(results)

# end process
process.kill()
time.sleep(60)

#### Query on BioPAX export of Panther from PathwayCommons

In [None]:
# TODO: change path
command = [
    '/home/cbeust/Softwares/JenaFuseki/apache-jena-fuseki-4.9.0/fuseki-server',
    '--file', '/home/cbeust/Projects/2024/BioPAX_Review/Compare_BioPAX_Files/PathwayCommonsFiles/PathwayCommons12.panther.BIOPAX.owl',
    '--file', '/home/cbeust/Projects/2024/BioPAX_Review/BioPAX_Ontology/biopax-level3.owl',
    '/panther_pc']
process = subprocess.Popen(command)
time.sleep(60)

# execute SPARQL query
sparql = SPARQLWrapper(endpointURL_panther_pc)
sparql.setQuery(prefixes+query)

# display results
sparql.setReturnFormat(JSON)
results = sparql.query().convert()
displaySparqlResults(results)

# export results to CSV
sparql.setReturnFormat(CSV)
results = sparql.query().convert()
with open("../Results/panther_pc_counts_bp_classes.csv", "wb") as f:
    f.write(results)

# end process
process.kill()
time.sleep(60)

#### Query on BioPAX export of PathBank from PathwayCommons

In [None]:
# TODO: change path
command = [
    '/home/cbeust/Softwares/JenaFuseki/apache-jena-fuseki-4.9.0/fuseki-server',
    '--file', '/home/cbeust/Projects/2024/BioPAX_Review/Compare_BioPAX_Files/PathwayCommonsFiles/PathwayCommons12.pathbank.BIOPAX.owl',
    '--file', '/home/cbeust/Projects/2024/BioPAX_Review/BioPAX_Ontology/biopax-level3.owl',
    '/pathbank_pc']
process = subprocess.Popen(command)
time.sleep(60)

# execute SPARQL query
sparql = SPARQLWrapper(endpointURL_pathbank_pc)
sparql.setQuery(prefixes+query)

# display results
sparql.setReturnFormat(JSON)
results = sparql.query().convert()
displaySparqlResults(results)

# export results to CSV
sparql.setReturnFormat(CSV)
results = sparql.query().convert()
with open("../Results/pathbank_pc_counts_bp_classes.csv", "wb") as f:
    f.write(results)

# end process
process.kill()
time.sleep(60)

#### Query on BioPAX export of HumanCyc from PathwayCommons

In [None]:
# TODO: change path
command = [
    '/home/cbeust/Softwares/JenaFuseki/apache-jena-fuseki-4.9.0/fuseki-server',
    '--file', '/home/cbeust/Projects/2024/BioPAX_Review/Compare_BioPAX_Files/PathwayCommonsFiles/PathwayCommons12.humancyc.BIOPAX.owl',
    '--file', '/home/cbeust/Projects/2024/BioPAX_Review/BioPAX_Ontology/biopax-level3.owl',
    '/humancyc_pc']
process = subprocess.Popen(command)
time.sleep(60)

# execute SPARQL query
sparql = SPARQLWrapper(endpointURL_humancyc_pc)
sparql.setQuery(prefixes+query)

# display results
sparql.setReturnFormat(JSON)
results = sparql.query().convert()
displaySparqlResults(results)

# export results to CSV
sparql.setReturnFormat(CSV)
results = sparql.query().convert()
with open("../Results/humancyc_pc_counts_bp_classes.csv", "wb") as f:
    f.write(results)

# end process
process.kill()
time.sleep(60)

#### Query on BioPAX export of KEGG from PathwayCommons

In [None]:
# TODO: change path
command = [
    '/home/cbeust/Softwares/JenaFuseki/apache-jena-fuseki-4.9.0/fuseki-server',
    '--file', '/home/cbeust/Projects/2024/BioPAX_Review/Compare_BioPAX_Files/PathwayCommonsFiles/PathwayCommons12.kegg.BIOPAX.owl',
    '--file', '/home/cbeust/Projects/2024/BioPAX_Review/BioPAX_Ontology/biopax-level3.owl',
    '/kegg_pc']
process = subprocess.Popen(command)
time.sleep(60)
# execute SPARQL query
sparql = SPARQLWrapper(endpointURL_kegg_pc)
sparql.setQuery(prefixes+query)

# display results
sparql.setReturnFormat(JSON)
results = sparql.query().convert()
displaySparqlResults(results)

# export results to CSV
sparql.setReturnFormat(CSV)
results = sparql.query().convert()
with open("../Results/kegg_pc_counts_bp_classes.csv", "wb") as f:
    f.write(results)

# end process
process.kill()
time.sleep(60)

#### Query on BioPAX export of PID from PathwayCommons

In [None]:
# TODO: change path
command = [
    '/home/cbeust/Softwares/JenaFuseki/apache-jena-fuseki-4.9.0/fuseki-server',
    '--file', '/home/cbeust/Projects/2024/BioPAX_Review/Compare_BioPAX_Files/PathwayCommonsFiles/PathwayCommons12.pid.BIOPAX.owl',
    '--file', '/home/cbeust/Projects/2024/BioPAX_Review/BioPAX_Ontology/biopax-level3.owl',
    '/pid_pc']
process = subprocess.Popen(command)
time.sleep(60)

# execute SPARQL query
sparql = SPARQLWrapper(endpointURL_pid_pc)
sparql.setQuery(prefixes+query)

# display results
sparql.setReturnFormat(JSON)
results = sparql.query().convert()
displaySparqlResults(results)

# export results to CSV
sparql.setReturnFormat(CSV)
results = sparql.query().convert()
with open("../Results/pid_pc_counts_bp_classes.csv", "wb") as f:
    f.write(results)

# end process
process.kill()
time.sleep(60)

#### Query on BioPAX export of INOH from PathwayCommons

In [None]:
# TODO: change path
command = [
    '/home/cbeust/Softwares/JenaFuseki/apache-jena-fuseki-4.9.0/fuseki-server',
    '--file', '/home/cbeust/Projects/2024/BioPAX_Review/Compare_BioPAX_Files/PathwayCommonsFiles/PathwayCommons12.inoh.BIOPAX.owl',
    '--file', '/home/cbeust/Projects/2024/BioPAX_Review/BioPAX_Ontology/biopax-level3.owl',
    '/inoh_pc']
process = subprocess.Popen(command)
time.sleep(60)

# execute SPARQL query
sparql = SPARQLWrapper(endpointURL_inoh_pc)
sparql.setQuery(prefixes+query)

# display results
sparql.setReturnFormat(JSON)
results = sparql.query().convert()
displaySparqlResults(results)

# export results to CSV
sparql.setReturnFormat(CSV)
results = sparql.query().convert()
with open("../Results/inoh_pc_counts_bp_classes.csv", "wb") as f:
    f.write(results)

# end process
process.kill()
time.sleep(60)

#### Query on BioPAX export of NetPath from PathwayCommons

In [None]:
# TODO: change path
command = [
    '/home/cbeust/Softwares/JenaFuseki/apache-jena-fuseki-4.9.0/fuseki-server',
    '--file', '/home/cbeust/Projects/2024/BioPAX_Review/Compare_BioPAX_Files/PathwayCommonsFiles/PathwayCommons12.netpath.BIOPAX.owl',
    '--file', '/home/cbeust/Projects/2024/BioPAX_Review/BioPAX_Ontology/biopax-level3.owl',
    '/netpath_pc']
process = subprocess.Popen(command)
time.sleep(60)

# execute SPARQL query
sparql = SPARQLWrapper(endpointURL_netpath_pc)
sparql.setQuery(prefixes+query)

# display results
sparql.setReturnFormat(JSON)
results = sparql.query().convert()
displaySparqlResults(results)

# export results to CSV
sparql.setReturnFormat(CSV)
results = sparql.query().convert()
with open("../Results/netpath_pc_counts_bp_classes.csv", "wb") as f:
    f.write(results)

# end process
process.kill()
time.sleep(60)