## BioPAX abstraction: SIF view

> This code contains SPARQL queries allowing to abstract a BioPAX graph into SIF binary interactions.

> The abstraction rules used here are from PathwayCommons: http://www.pathwaycommons.org/pc2/formats#sif_relations 

> These rules are used in the ChiBE software to perform BioPAX to SIF translation.

#### Import librairies and launch SPARQL endpoint

In [1]:
from SPARQLWrapper import SPARQLWrapper, JSON, CSV, N3, XML, TURTLE
import subprocess
import time
import os
from requests.utils import requote_uri
from urllib.parse import quote
import re
import rdflib
import pandas as pd

In [2]:
endpoint_reactome = "http://localhost:3030/reactome"
rdfFormat = "turtle"
current_directory = os.getcwd()
BioPAX_Ontology_file_path = os.path.join(current_directory, '../..', 'ReactomeData', 'biopax-level3.owl')
ReactomeBioPAX_file_path = os.path.join(current_directory, '../..', 'ReactomeData', 'Homo_sapiens_v92.owl')

In [3]:
def extract_prefix_mappings(prefixes_string):
    """
    Extract prefix mappings from the SPARQL prefixes string.
    
    Parameters:
    prefixes_string (str): String containing PREFIX declarations
    
    Returns:
    dict: Mapping of full URIs to their prefixes
    """
    # Extract prefix declarations using regex
    prefix_pattern = re.compile(r'PREFIX\s+(\w+):\s*<([^>]+)>', re.IGNORECASE)
    return {uri: prefix for prefix, uri in prefix_pattern.findall(prefixes_string)}

def convert_to_prefixed_uri(uri_string, prefix_mappings):
    """
    Convert a full URI to prefixed format.
    
    Parameters:
    uri_string (str): Full URI string
    prefix_mappings (dict): Mapping of URIs to prefixes
    
    Returns:
    str: URI in prefixed format (e.g., 'reactome:Protein')
    """
    for uri_base, prefix in prefix_mappings.items():
        if uri_string.startswith(uri_base):
            local_part = uri_string[len(uri_base):]
            return f"{prefix}:{local_part}"
    return uri_string  # Return original if no prefix matches

def save_for_cytoscape(sparql, prefixes_string, output_file, format='csv', separator=','):
    """
    Save SPARQL CONSTRUCT results in a format compatible with Cytoscape,
    using prefix notation for URIs.
    
    Parameters:
    sparql (SPARQLWrapper): Configured SPARQLWrapper instance with query
    prefixes_string (str): String containing PREFIX declarations
    output_file (str): Path to save the output file
    format (str): Output format ('csv' or 'tsv')
    separator (str): Column separator (',' for CSV, '\t' for TSV)
    """
    # Extract prefix mappings
    prefix_mappings = extract_prefix_mappings(prefixes_string)
    
    # Get the results as an RDF graph
    sparql.setReturnFormat(TURTLE)
    results = sparql.queryAndConvert()
    
    # Create an RDFlib graph
    g = rdflib.Graph()
    if isinstance(results, bytes):
        g.parse(data=results.decode('utf-8'), format='turtle')
    else:
        g.parse(data=results, format='turtle')
    
    # Convert triples to a list of dictionaries with prefixed URIs
    triples_data = []
    for s, p, o in g:# Extraction of "Signaling by EGFR" (R-HSA-177929) pathway from Reactome BioPAX export v65
        # Convert each URI to prefixed format
        subject = convert_to_prefixed_uri(str(s), prefix_mappings)
        predicate = convert_to_prefixed_uri(str(p), prefix_mappings)
        object_ = convert_to_prefixed_uri(str(o), prefix_mappings)
        
        triples_data.append({
            'Source': subject,
            'Interaction': predicate,
            'Target': object_
        })
    
    # Convert to DataFrame for easy CSV/TSV export
    df = pd.DataFrame(triples_data)
    
    # Save to file
    if format == 'csv':
        df.to_csv(output_file, index=False, sep=',')
    else:  # tsv
        df.to_csv(output_file, index=False, sep='\t')
    
    print(f"Saved {len(triples_data)} interactions to {output_file}")
    return df

def preview_network_data(df, n=5):
    """
    Preview the network data before importing into Cytoscape.
    
    Parameters:
    df (pandas.DataFrame): DataFrame containing the network data
    n (int): Number of rows to preview
    """
    print(f"\nPreview of network data ({len(df)} total interactions):")
    print(f"\nFirst {n} interactions:")
    print(df.head(n))
    
    # Print some basic network statistics
    unique_nodes = set(df['Source'].unique()) | set(df['Target'].unique())
    print(f"\nNetwork statistics:")
    print(f"Number of unique nodes: {len(unique_nodes)}")
    print(f"Number of interactions: {len(df)}")
    print(f"Unique interaction types:")
    for interaction in sorted(df['Interaction'].unique()):
        print(f"  - {interaction}")

In [None]:
reactomeVersion = 92
prefixes = """
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs:<http://www.w3.org/2000/01/rdf-schema#>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
PREFIX dc: <http://purl.org/dc/elements/1.1/>
PREFIX dcterms: <http://purl.org/dc/terms/>
PREFIX chebi: <http://purl.obolibrary.org/obo/chebi/>
PREFIX chebidb: <http://purl.obolibrary.org/obo/CHEBI_>
PREFIX chebirel: <http://purl.obolibrary.org/obo/CHEBI#>
PREFIX oboInOwl: <http://www.geneontology.org/formats/oboInOwl#>
PREFIX bp3: <http://www.biopax.org/release/biopax-level3.owl#>
PREFIX reactome: <http://www.reactome.org/biopax/92/48887#>
PREFIX abstraction:<http://abstraction/#>
""".format(reactomeVersion)

biopaxURI = "http://www.biopax.org/release/biopax-level3.owl#"

In [5]:
command = [
    '/home/cbeust/Softwares/JenaFuseki/apache-jena-fuseki-4.9.0/fuseki-server',
    '--file', ReactomeBioPAX_file_path,
    '--file', BioPAX_Ontology_file_path,
    '/reactome']

process = subprocess.Popen(command)
time.sleep(60)

11:45:17 INFO  Server          :: Dataset: in-memory: load file: /home/cbeust/Projects/2025/BioPAXAbstraction/01_SIFAbstraction/Scripts/../../ReactomeData/Homo_sapiens_v92.owl
11:45:19 WARN  riot            :: [line: 63261, col: 51] {W137} Input is large. Switching off checking for illegal reuse of rdf:ID's.
11:45:39 INFO  Server          :: Dataset: in-memory: load file: /home/cbeust/Projects/2025/BioPAXAbstraction/01_SIFAbstraction/Scripts/../../ReactomeData/biopax-level3.owl
11:45:39 INFO  Server          :: Running in read-only mode for /reactome
11:45:40 INFO  Server          :: Apache Jena Fuseki 4.9.0
11:45:40 INFO  Config          :: FUSEKI_HOME=/home/cbeust/Softwares/JenaFuseki/apache-jena-fuseki-4.9.0
11:45:40 INFO  Config          :: FUSEKI_BASE=/home/cbeust/Projects/2025/BioPAXAbstraction/01_SIFAbstraction/Scripts/run
11:45:40 INFO  Config          :: Shiro file: file:///home/cbeust/Projects/2025/BioPAXAbstraction/01_SIFAbstraction/Scripts/run/shiro.ini
11:45:40 INFO  Serve

## Translation of SIF abstraction rules in SPARQL queries

#### 1 - Controls State Change Of
First protein controls a reaction that changes the state of the second protein

Reaction test Reactome : R-HSA-202437, Phosphorylation of CARMA1

https://reactome.org/content/detail/R-HSA-202437

In [None]:
start_time = time.time()
query_controls_state_change_of = """ 
CONSTRUCT {
  ?enzyme abstraction:ControlsStateChangeOf ?protLeft 
}
WHERE {
  ?reaction bp3:xref [
    bp3:db "Reactome" ;
    bp3:id ?reactionID
  ] .
  ?reaction rdf:type/(rdfs:subClassOf*) bp3:BiochemicalReaction .
  
  ?catalysis bp3:controlled ?reaction .
  ?catalysis (rdf:type/rdfs:subClassOf*) bp3:Control .
  ?catalysis bp3:controller ?enzyme .
  ?enzyme ((bp3:component|bp3:memberPhysicalEntity)*) ?enzymeProt .
  ?enzymeProt rdf:type bp3:Protein .
  
  ?reaction bp3:left/((bp3:component|bp3:memberPhysicalEntity)*) ?protLeft .
  ?protLeft rdf:type bp3:Protein .
  ?protLeft bp3:entityReference ?protRef .
  
  ?reaction bp3:right/((bp3:component|bp3:memberPhysicalEntity)*) ?protRight .
  ?protRight rdf:type bp3:Protein .
  ?protRight bp3:entityReference ?protRef .
  
  # feature
  ?protRight bp3:feature ?feature .
  ?feature (rdf:type/rdfs:subClassOf*) bp3:EntityFeature .
}
"""


# execute SPARQL query
sparql = SPARQLWrapper(endpoint_reactome)
sparql.setQuery(prefixes+query_controls_state_change_of)
df = save_for_cytoscape(sparql, prefixes, "../Results/ReactomeHomoSapiensv92/01-ControlsStateChangeOf.csv", format='csv')
preview_network_data(df)
print("--- %s seconds ---" % (time.time() - start_time))

NameError: name 'time' is not defined

#### 2 - Controls Transport Of

First protein controls a reaction that changes the cellular location of the second protein

Reaction test Reactome : R-HSA-168166, NF-kappa-B complex is transported from cytosol to nucleus 

In [7]:
start_time = time.time()
query_transport_of = """
CONSTRUCT {
  ?enzymeProt abstraction:ControlsTransportOf ?protLeft
}
WHERE {
  ?reaction rdf:type/(rdfs:subClassOf*) bp3:BiochemicalReaction .

  ?reaction bp3:left/((bp3:component|bp3:memberPhysicalEntity)*) ?protLeft .
  ?protLeft rdf:type bp3:Protein .
  ?protLeft bp3:entityReference ?protRef .
  
  ?reaction bp3:right/((bp3:component|bp3:memberPhysicalEntity)*) ?protRight .
  ?protRight rdf:type bp3:Protein .
  ?protRight bp3:entityReference ?protRef .

  # Define cellular locations for Protein 1 and Protein 2
  ?protLeft bp3:cellularLocation ?cellularLocVocab1 .
  ?cellularLocVocab1 bp3:term ?location1 .
  ?protRight bp3:cellularLocation ?cellularLocVocab2 .
  ?cellularLocVocab2 bp3:term ?location2 .
  
  ?catalysis bp3:controlled ?reaction .
  ?catalysis (rdf:type/rdfs:subClassOf*) bp3:Control .
  ?catalysis bp3:controller ?enzyme .
  ?enzyme ((bp3:component|bp3:memberPhysicalEntity)*) ?enzymeProt .
  ?enzymeProt rdf:type bp3:Protein .

  # Ensure cellular locations of protein 1 and protein 2 are different + ensure that they have the same reference
  FILTER (?cellularLocVocab1 != ?cellularLocVocab2)
}
"""

sparql = SPARQLWrapper(endpoint_reactome)
sparql.setQuery(prefixes+query_transport_of)
df = save_for_cytoscape(sparql, prefixes, "../Results/ReactomeHomoSapiensv92/02-ControlsTransportOf.csv", format='csv')
preview_network_data(df)
print("--- %s seconds ---" % (time.time() - start_time))

14:20:00 INFO  Fuseki          :: [11] GET http://localhost:3030/reactome?query=%0APREFIX+rdf%3A+%3Chttp%3A//www.w3.org/1999/02/22-rdf-syntax-ns%23%3E%0APREFIX+rdfs%3A%3Chttp%3A//www.w3.org/2000/01/rdf-schema%23%3E%0APREFIX+owl%3A+%3Chttp%3A//www.w3.org/2002/07/owl%23%3E%0APREFIX+xsd%3A+%3Chttp%3A//www.w3.org/2001/XMLSchema%23%3E%0APREFIX+dc%3A+%3Chttp%3A//purl.org/dc/elements/1.1/%3E%0APREFIX+dcterms%3A+%3Chttp%3A//purl.org/dc/terms/%3E%0APREFIX+chebi%3A+%3Chttp%3A//purl.obolibrary.org/obo/chebi/%3E%0APREFIX+chebidb%3A+%3Chttp%3A//purl.obolibrary.org/obo/CHEBI_%3E%0APREFIX+chebirel%3A+%3Chttp%3A//purl.obolibrary.org/obo/CHEBI%23%3E%0APREFIX+oboInOwl%3A+%3Chttp%3A//www.geneontology.org/formats/oboInOwl%23%3E%0APREFIX+bp3%3A+%3Chttp%3A//www.biopax.org/release/biopax-level3.owl%23%3E%0APREFIX+reactome%3A+%3Chttp%3A//www.reactome.org/biopax/91/48887%23%3E%0APREFIX+abstraction%3A%3Chttp%3A//abstraction/%23%3E%0A%0ACONSTRUCT+%7B%0A++%3FenzymeProt+abstraction%3AControlsTransportOf+%3FprotLef

Saved 32639 interactions to ../Results/ReactomeHomoSapiensv92/02-ControlsTransportOf.csv

Preview of network data (32639 total interactions):

First 5 interactions:
                                              Source  \
0  http://www.reactome.org/biopax/92/48887#Protei...   
1  http://www.reactome.org/biopax/92/48887#Protei...   
2  http://www.reactome.org/biopax/92/48887#Protei...   
3  http://www.reactome.org/biopax/92/48887#Protei...   
4  http://www.reactome.org/biopax/92/48887#Protei...   

                       Interaction  \
0  abstraction:ControlsTransportOf   
1  abstraction:ControlsTransportOf   
2  abstraction:ControlsTransportOf   
3  abstraction:ControlsTransportOf   
4  abstraction:ControlsTransportOf   

                                              Target  
0  http://www.reactome.org/biopax/92/48887#Protei...  
1  http://www.reactome.org/biopax/92/48887#Protei...  
2  http://www.reactome.org/biopax/92/48887#Protei...  
3  http://www.reactome.org/biopax/92/48887#Protei

14:43:59 INFO  Fuseki          :: [12] POST http://localhost:3030/reactome/query
14:43:59 INFO  Fuseki          :: [12] Query = PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> PREFIX rdfs:<http://www.w3.org/2000/01/rdf-schema#> PREFIX owl: <http://www.w3.org/2002/07/owl#> PREFIX xsd: <http://www.w3.org/2001/XMLSchema#> PREFIX dc: <http://purl.org/dc/elements/1.1/> PREFIX dcterms: <http://purl.org/dc/terms/> PREFIX chebi: <http://purl.obolibrary.org/obo/chebi/> PREFIX chebidb: <http://purl.obolibrary.org/obo/CHEBI_> PREFIX chebirel: <http://purl.obolibrary.org/obo/CHEBI#> PREFIX oboInOwl: <http://www.geneontology.org/formats/oboInOwl#> PREFIX bp3: <http://www.biopax.org/release/biopax-level3.owl#> PREFIX reactome: <http://www.reactome.org/biopax/91/48887#> PREFIX abstraction:<http://abstraction/#>   CONSTRUCT { 	?enzymeProt abstraction:controlsPhosphorylationOf ?protLeft 	} WHERE {   VALUES ?reactionID {"R-HSA-388831"}   ?reaction bp3:xref [     bp3:db "Reactome" ;     bp3:id 

#### 3 - Controls Phosphorylation Of

First protein controls a reaction that changes the phosphorylation status of the second protein

Reaction test Reactome : R-HSA-388831, Phosphorylation of CD28

In [None]:
start_time = time.time()
query_controls_phosphorylation_of = """
CONSTRUCT {
	?enzymeProt abstraction:ControlsPhosphorylationOf ?protLeft
	}
WHERE {
  ?reaction rdf:type/(rdfs:subClassOf*) bp3:BiochemicalReaction .
  
  ?catalysis bp3:controlled ?reaction .
  ?catalysis (rdf:type/rdfs:subClassOf*) bp3:Control .
  ?catalysis bp3:controller ?enzyme .
  ?enzyme ((bp3:component|bp3:memberPhysicalEntity)*) ?enzymeProt .
  ?enzymeProt rdf:type bp3:Protein .
  
  ?reaction bp3:left/((bp3:component|bp3:memberPhysicalEntity)*) ?protLeft .
  ?protLeft rdf:type bp3:Protein .
  ?protLeft bp3:entityReference ?protLeftRef .
  
  ?reaction bp3:right/((bp3:component|bp3:memberPhysicalEntity)*) ?protRight .
  ?protRight rdf:type bp3:Protein .
  ?protRight bp3:entityReference ?protRightRef .

  # Complete phosphorylation check
  ?protRight bp3:feature ?feature .
  ?feature rdf:type bp3:ModificationFeature .
  ?feature bp3:modificationType ?modificationType .
  ?modificationType rdf:type bp3:SequenceModificationVocabulary .
  ?modificationType bp3:term ?modificationTerm .
  FILTER(CONTAINS(LCASE(?modificationTerm), "phospho"))
 
  # Ensure left and right proteins are the same (unphosphorylated vs phosphorylated)
  FILTER(?protLeftRef = ?protRightRef)
}
"""

sparql = SPARQLWrapper(endpoint_reactome)
sparql.setQuery(prefixes+query_controls_phosphorylation_of)
df = save_for_cytoscape(sparql, prefixes, "../Results/ReactomeHomoSapiensv92/03-ControlsPhosphorylationOf.csv", format='csv')
preview_network_data(df)
print("--- %s seconds ---" % (time.time() - start_time))

15:27:38 INFO  Fuseki          :: [12] GET http://localhost:3030/reactome?query=%0APREFIX+rdf%3A+%3Chttp%3A//www.w3.org/1999/02/22-rdf-syntax-ns%23%3E%0APREFIX+rdfs%3A%3Chttp%3A//www.w3.org/2000/01/rdf-schema%23%3E%0APREFIX+owl%3A+%3Chttp%3A//www.w3.org/2002/07/owl%23%3E%0APREFIX+xsd%3A+%3Chttp%3A//www.w3.org/2001/XMLSchema%23%3E%0APREFIX+dc%3A+%3Chttp%3A//purl.org/dc/elements/1.1/%3E%0APREFIX+dcterms%3A+%3Chttp%3A//purl.org/dc/terms/%3E%0APREFIX+chebi%3A+%3Chttp%3A//purl.obolibrary.org/obo/chebi/%3E%0APREFIX+chebidb%3A+%3Chttp%3A//purl.obolibrary.org/obo/CHEBI_%3E%0APREFIX+chebirel%3A+%3Chttp%3A//purl.obolibrary.org/obo/CHEBI%23%3E%0APREFIX+oboInOwl%3A+%3Chttp%3A//www.geneontology.org/formats/oboInOwl%23%3E%0APREFIX+bp3%3A+%3Chttp%3A//www.biopax.org/release/biopax-level3.owl%23%3E%0APREFIX+reactome%3A+%3Chttp%3A//www.reactome.org/biopax/91/48887%23%3E%0APREFIX+abstraction%3A%3Chttp%3A//abstraction/%23%3E%0A%0ACONSTRUCT+%7B%0A%09%3FenzymeProt+abstraction%3AControlsPhosphorylationOf+%3F

Saved 75621 interactions to ../Results/ReactomeHomoSapiensv92/03-ControlsPhosphorylationOf.csv

Preview of network data (75621 total interactions):

First 5 interactions:
                                              Source  \
0  http://www.reactome.org/biopax/92/48887#Protei...   
1  http://www.reactome.org/biopax/92/48887#Protei...   
2  http://www.reactome.org/biopax/92/48887#Protei...   
3  http://www.reactome.org/biopax/92/48887#Protei...   
4  http://www.reactome.org/biopax/92/48887#Protei...   

                             Interaction  \
0  abstraction:ControlsPhosphorylationOf   
1  abstraction:ControlsPhosphorylationOf   
2  abstraction:ControlsPhosphorylationOf   
3  abstraction:ControlsPhosphorylationOf   
4  abstraction:ControlsPhosphorylationOf   

                                              Target  
0  http://www.reactome.org/biopax/92/48887#Protei...  
1  http://www.reactome.org/biopax/92/48887#Protei...  
2  http://www.reactome.org/biopax/92/48887#Protei...  
3  http

15:37:16 INFO  Fuseki          :: [19] 200 OK (24.543 s)
15:39:39 INFO  Fuseki          :: [20] POST http://localhost:3030/reactome/sparql
15:39:39 INFO  Fuseki          :: [20] Query = PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> PREFIX rdfs:<http://www.w3.org/2000/01/rdf-schema#> PREFIX owl: <http://www.w3.org/2002/07/owl#> PREFIX xsd: <http://www.w3.org/2001/XMLSchema#> PREFIX dc: <http://purl.org/dc/elements/1.1/> PREFIX dcterms: <http://purl.org/dc/terms/> PREFIX chebi: <http://purl.obolibrary.org/obo/chebi/> PREFIX chebidb: <http://purl.obolibrary.org/obo/CHEBI_> PREFIX chebirel: <http://purl.obolibrary.org/obo/CHEBI#> PREFIX oboInOwl: <http://www.geneontology.org/formats/oboInOwl#> PREFIX bp3: <http://www.biopax.org/release/biopax-level3.owl#> PREFIX reactome: <http://www.reactome.org/biopax/91/48887#> PREFIX abstraction:<http://abstraction/#>   SELECT DISTINCT ?enzymeProt ?protLeft WHERE {   VALUES ?reactionID {"R-HSA-168166"}   ?reaction bp3:xref [     bp3:db "Rea

#### 4 - Controls Expression Of
First protein controls a conversion or a template reaction that changes expression of the second protein

Reaction test Reactome : R-HSA-6790038, Expression of HSP90B1

In [None]:
start_time = time.time()
query_controls_expression_of = """ 
CONSTRUCT {
  ?enzymeProt abstraction:ControlsExpressionOf ?productProt
}
WHERE {
  # core template reaction pattern
  ?tr rdf:type bp3:TemplateReaction .
  ?tr bp3:product ?product .
  ?product ((bp3:component|bp3:memberPhysicalEntity)*) ?productProt .
  ?productProt rdf:type bp3:Protein .
  
  ?catalysis bp3:controlled ?tr .
  ?catalysis rdf:type bp3:TemplateReactionRegulation .
  ?catalysis bp3:controller ?enzyme .
  ?enzyme ((bp3:component|bp3:memberPhysicalEntity)*) ?enzymeProt .
  ?enzymeProt rdf:type bp3:Protein .
}
"""

sparql = SPARQLWrapper(endpoint_reactome)
sparql.setQuery(prefixes+query_controls_expression_of)
df = save_for_cytoscape(sparql, prefixes, "../Results/ReactomeHomoSapiensv92/04-ControlsExpressionOf.csv", format='csv')
preview_network_data(df)
print("--- %s seconds ---" % (time.time() - start_time))

Saved 1 interactions to ../Results/ReactomeHomoSapiensv92/04-ControlsExpressionOf.csv

Preview of network data (1 total interactions):

First 5 interactions:
                                              Source  \
0  http://www.reactome.org/biopax/92/48887#Protei...   

                        Interaction  \
0  abstraction:ControlsExpressionOf   

                                              Target  
0  http://www.reactome.org/biopax/92/48887#Protei...  

Network statistics:
Number of unique nodes: 2
Number of interactions: 1
Unique interaction types:
  - abstraction:ControlsExpressionOf
--- 0.02582406997680664 seconds ---


15:40:20 INFO  Fuseki          :: [21] GET http://localhost:3030/reactome?query=%0APREFIX+rdf%3A+%3Chttp%3A//www.w3.org/1999/02/22-rdf-syntax-ns%23%3E%0APREFIX+rdfs%3A%3Chttp%3A//www.w3.org/2000/01/rdf-schema%23%3E%0APREFIX+owl%3A+%3Chttp%3A//www.w3.org/2002/07/owl%23%3E%0APREFIX+xsd%3A+%3Chttp%3A//www.w3.org/2001/XMLSchema%23%3E%0APREFIX+dc%3A+%3Chttp%3A//purl.org/dc/elements/1.1/%3E%0APREFIX+dcterms%3A+%3Chttp%3A//purl.org/dc/terms/%3E%0APREFIX+chebi%3A+%3Chttp%3A//purl.obolibrary.org/obo/chebi/%3E%0APREFIX+chebidb%3A+%3Chttp%3A//purl.obolibrary.org/obo/CHEBI_%3E%0APREFIX+chebirel%3A+%3Chttp%3A//purl.obolibrary.org/obo/CHEBI%23%3E%0APREFIX+oboInOwl%3A+%3Chttp%3A//www.geneontology.org/formats/oboInOwl%23%3E%0APREFIX+bp3%3A+%3Chttp%3A//www.biopax.org/release/biopax-level3.owl%23%3E%0APREFIX+reactome%3A+%3Chttp%3A//www.reactome.org/biopax/91/48887%23%3E%0APREFIX+abstraction%3A%3Chttp%3A//abstraction/%23%3E%0A+%0ACONSTRUCT+%7B%0A++%3FenzymeProt+abstraction%3AControlsExpressionOf+%3Fprodu

15:44:09 INFO  Fuseki          :: [22] POST http://localhost:3030/reactome/sparql
15:44:09 INFO  Fuseki          :: [22] Query = PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> PREFIX rdfs:<http://www.w3.org/2000/01/rdf-schema#> PREFIX owl: <http://www.w3.org/2002/07/owl#> PREFIX xsd: <http://www.w3.org/2001/XMLSchema#> PREFIX dc: <http://purl.org/dc/elements/1.1/> PREFIX dcterms: <http://purl.org/dc/terms/> PREFIX chebi: <http://purl.obolibrary.org/obo/chebi/> PREFIX chebidb: <http://purl.obolibrary.org/obo/CHEBI_> PREFIX chebirel: <http://purl.obolibrary.org/obo/CHEBI#> PREFIX oboInOwl: <http://www.geneontology.org/formats/oboInOwl#> PREFIX bp3: <http://www.biopax.org/release/biopax-level3.owl#> PREFIX reactome: <http://www.reactome.org/biopax/91/48887#> PREFIX abstraction:<http://abstraction/#>   CONSTRUCT {    ?enzymeProt1 abstraction:CatalysisPrecedes ?enzymeProt2 } WHERE {   VALUES ?reaction1ID {"R-HSA-266046"}   ?reaction1 bp3:xref [     bp3:db "Reactome" ;     bp3:id 

#### 5 - Catalysis Precedes
Equivalent of sequential_catalysis from Paxtools SIF rules
First protein controls a conversion whose input molecule is input to another reaction controlled by a second protein 

Reaction test Reactome : R-HSA-2142688, Synthesis of 5-eicosatetranoic acids

In [6]:
start_time = time.time()
query_catalysis_precedes = """ 
CONSTRUCT {
   ?enzymeProtA abstraction:CatalysisPrecedes ?enzymeProtB
}
WHERE {
  ?reaction1 rdf:type/(rdfs:subClassOf*) bp3:Conversion .
  ?reaction1 bp3:xref [
    bp3:db "Reactome" ;
    bp3:id ?reaction1ID
  ] .
  
  ?catalysis1 bp3:controlled ?reaction1 .
  ?catalysis1 (rdf:type/rdfs:subClassOf*) bp3:Control .
  ?catalysis1 bp3:controller ?enzymeA .
  ?enzymeA ((bp3:component|bp3:memberPhysicalEntity)*) ?enzymeProtA .
  ?enzymeProtA rdf:type bp3:Protein .
  
  ?reaction1 bp3:right/((bp3:component|bp3:memberPhysicalEntity)*) ?connectingMolecule .
  ?reaction2 bp3:left/((bp3:component|bp3:memberPhysicalEntity)*) ?connectingMolecule .
  ?connectingMolecule rdf:type bp3:SmallMolecule .
  
  ?reaction2 rdf:type/(rdfs:subClassOf*) bp3:Conversion .
  ?reaction2 bp3:xref [
    bp3:db "Reactome" ;
    bp3:id ?reaction2ID
  ] .
  
  ?catalysis2 bp3:controlled ?reaction2 .
  ?catalysis2 (rdf:type/rdfs:subClassOf*) bp3:Control .
  ?catalysis2 bp3:controller ?enzymeB .
  ?enzymeB ((bp3:component|bp3:memberPhysicalEntity)*) ?enzymeProtB .
  ?enzymeProtB rdf:type bp3:Protein .

  FILTER (?enzymeA != ?enzymeB)
}

"""

sparql = SPARQLWrapper(endpoint_reactome)
sparql.setQuery(prefixes+query_catalysis_precedes)
df = save_for_cytoscape(sparql, prefixes, "../Results/ReactomeHomoSapiensv92/05-CatalysisPrecedes.csv", format='csv')
preview_network_data(df)
print("--- %s seconds ---" % (time.time() - start_time))

11:46:26 INFO  Fuseki          :: [5] GET http://localhost:3030/reactome?query=%0APREFIX+rdf%3A+%3Chttp%3A//www.w3.org/1999/02/22-rdf-syntax-ns%23%3E%0APREFIX+rdfs%3A%3Chttp%3A//www.w3.org/2000/01/rdf-schema%23%3E%0APREFIX+owl%3A+%3Chttp%3A//www.w3.org/2002/07/owl%23%3E%0APREFIX+xsd%3A+%3Chttp%3A//www.w3.org/2001/XMLSchema%23%3E%0APREFIX+dc%3A+%3Chttp%3A//purl.org/dc/elements/1.1/%3E%0APREFIX+dcterms%3A+%3Chttp%3A//purl.org/dc/terms/%3E%0APREFIX+chebi%3A+%3Chttp%3A//purl.obolibrary.org/obo/chebi/%3E%0APREFIX+chebidb%3A+%3Chttp%3A//purl.obolibrary.org/obo/CHEBI_%3E%0APREFIX+chebirel%3A+%3Chttp%3A//purl.obolibrary.org/obo/CHEBI%23%3E%0APREFIX+oboInOwl%3A+%3Chttp%3A//www.geneontology.org/formats/oboInOwl%23%3E%0APREFIX+bp3%3A+%3Chttp%3A//www.biopax.org/release/biopax-level3.owl%23%3E%0APREFIX+reactome%3A+%3Chttp%3A//www.reactome.org/biopax/91/48887%23%3E%0APREFIX+abstraction%3A%3Chttp%3A//abstraction/%23%3E%0A+%0ACONSTRUCT+%7B%0A+++%3FenzymeProtA+abstraction%3ACatalysisPrecedes+%3FenzymeP

KeyboardInterrupt: 

#### 6 - In Complex With
Equivalent component of Paxtools SIF rules
Proteins are members of the same complex

Reaction test Reactome : R-HSA-2187368,  STUB1 (CHIP) ubiquitinates SMAD3 

In [None]:
start_time = time.time()
query_in_complex_with = """ 
CONSTRUCT {
  ?prot1 abstraction:InComplexWith ?prot2
}
WHERE {
  # Case 1: Complex with several different proteins
  {
	?complex rdf:type/rdfs:subClassOf* bp3:Complex .
	?complex ((bp3:component|bp3:memberPhysicalEntity)*) ?prot1 .
	?complex ((bp3:component|bp3:memberPhysicalEntity)*) ?prot2 .
	?prot1 rdf:type bp3:Protein .
	?prot2 rdf:type bp3:Protein .
    
	# Avoid pairing proteins from the same collection
	FILTER (?prot1 != ?prot2 || NOT EXISTS { ?prot1 bp3:memberPhysicalEntity+ ?x . ?prot2 bp3:memberPhysicalEntity+ ?y })
    
	# Avoid self-pairing and duplicate pairs
	FILTER (STR(?prot1) < STR(?prot2))
  }
 
  UNION
 
  # Case 2: Same protein with stoichiometry > 1
  {
	?complex rdf:type/rdfs:subClassOf* bp3:Complex .
	# Check for stoichiometry > 1
	?complex bp3:componentStoichiometry ?stoich .
	?stoich bp3:physicalEntity ?entity1 .
	?stoich bp3:stoichiometricCoefficient ?coeff .
	FILTER (?coeff > 1)
	?prot1 rdf:type bp3:Protein .
    
    ?entity1 ((bp3:component|bp3:memberPhysicalEntity)*) ?prot1 .
	?prot1 rdf:type bp3:Protein .
    
	# For self-pairing, use the same entity
	BIND (?prot1 AS ?prot2)
  }
}
"""

sparql = SPARQLWrapper(endpoint_reactome)
sparql.setQuery(prefixes+query_in_complex_with)
df = save_for_cytoscape(sparql, prefixes, "../Results/ReactomeHomoSapiens/06-InComplexWith.csv", format='csv')
preview_network_data(df)
print("--- %s seconds ---" % (time.time() - start_time))

#### 7 - Interacts With 
Proteins are participants of the same MolecularInteraction

Problem : no instance of bp3:MolecularInteraction in Reactome

In [None]:
start_time = time.time()
query_interacts_with = """  
CONSTRUCT {
  ?Prot1 abstraction:InteractsWith ?Prot2
}
WHERE {
  # Core molecular interaction pattern
  ?MolecularInteraction (rdf:type/rdfs:subClassOf*) bp3:MolecularInteraction .
  ?MolecularInteraction bp3:participant ?Participant1 .
  ?MolecularInteraction bp3:participant ?Participant2 .
  
  ?Participant1 ((bp3:component|bp3:memberPhysicalEntity)*) ?Prot1 .
  ?Prot1 rdf:type bp3:Protein .
  ?Participant2 ((bp3:component|bp3:memberPhysicalEntity)*) ?Prot2 .
  ?Prot2 rdf:type bp3:Protein .
  
  FILTER (STR(?Prot1) < STR(?Prot2))
}
"""

sparql = SPARQLWrapper(endpoint_reactome)
sparql.setQuery(prefixes+query_in_complex_with)
df = save_for_cytoscape(sparql, prefixes, "../Results/ReactomeHomoSapiensv92/07-InteractsWith.csv", format='csv')
preview_network_data(df)
print("--- %s seconds ---" % (time.time() - start_time))

#### 8 - Neighbor Of 
Proteins are participants or controlers of the same interaction

Reaction test Reactome : R-HSA-170844, Latent TGF-beta-1 is cleaved by FURIN

In [None]:
start_time = time.time()
query_neighbor_of = """ 
CONSTRUCT {
   ?protein1 abstraction:NeighborOf ?protein2
}
WHERE {
  VALUES ?reactionID {"R-HSA-170844"}
  
  ?reaction rdf:type/(rdfs:subClassOf*) bp3:BiochemicalReaction .
  ?reaction bp3:displayName ?reactionLabel .
  ?reaction bp3:xref [
    bp3:db "Reactome" ;
    bp3:id ?reactionID
  ] .
  
  ?reaction ((^bp3:controlled/bp3:controller)|bp3:left|bp3:right)/((bp3:component|bp3:memberPhysicalEntity)*) ?protein1 .
  ?protein1 rdf:type bp3:Protein .
  ?reaction ((^bp3:controlled/bp3:controller)|bp3:left|bp3:right)/((bp3:component|bp3:memberPhysicalEntity)*) ?protein2 .
  ?protein2 rdf:type bp3:Protein .
  FILTER (STR(?protein1) < STR(?protein2))
  
  OPTIONAL { ?protein1 bp3:displayName ?protein1Label . }
  OPTIONAL {
    ?protein1 bp3:xref [
      bp3:db "Reactome" ;
      bp3:id ?protein1ID
    ] .
  }
  
  OPTIONAL { ?protein2 bp3:displayName ?protein2Label . }
  OPTIONAL {
    ?protein2 bp3:xref [
      bp3:db "Reactome" ;
      bp3:id ?protein2ID
    ] .
  }
  
}
"""

sparql = SPARQLWrapper(endpoint_reactome)
sparql.setQuery(prefixes+query_neighbor_of)
df = save_for_cytoscape(sparql, prefixes, "../Results/ReactomeHomoSapiensv92/08-NeighborOf.csv", format='csv')
preview_network_data(df)
print("--- %s seconds ---" % (time.time() - start_time))

#### 9 - Consumption controled by
The small molecule is consumed by a reaction that is controled by a protein 

Reaction test Reactome : R-HSA-2142688, Synthesis of 5-eicosatetranoic acids

In [None]:
start_time = time.time()
query_consumption_controled_by = """  
CONSTRUCT {
  ?reactant abstraction:ConsumptionControlledBy ?enzymeProt
}
WHERE {
  # Core reaction pattern
  ?reaction rdf:type bp3:BiochemicalReaction .
  ?reactant rdf:type bp3:SmallMolecule .
  ?product rdf:type bp3:SmallMolecule .
 
  ?reaction bp3:left ?reactant .
  ?reaction bp3:right ?product .
  ?catalysis bp3:controlled ?reaction .
  ?catalysis bp3:controller ?enzyme .
  ?enzyme ((bp3:component|bp3:memberPhysicalEntity)*) ?enzymeProt .
  ?enzymeProt rdf:type bp3:Protein .
}
"""

sparql = SPARQLWrapper(endpoint_reactome)
sparql.setQuery(prefixes+query_consumption_controled_by)
df = save_for_cytoscape(sparql, prefixes, "../Results/ReactomeHomoSapiensv92/09-ConsumptionControledBy.csv", format='csv')
preview_network_data(df)
print("--- %s seconds ---" % (time.time() - start_time))

#### 10 - Controls Production 
The protein controls a reaction of which the small molecule is an output

Reaction test Reactome : R-HSA-2142688, Synthesis of 5-eicosatetranoic acids

In [None]:
start_time = time.time()
query_controls_production = """ 
CONSTRUCT {
  ?enzymeProt abstraction:ControlsProductionOf ?product
}
WHERE {
  # Core reaction pattern
  ?reaction rdf:type bp3:BiochemicalReaction .
 
  ?reaction bp3:left ?reactant .
  ?reaction bp3:right ?product .
  ?reactant rdf:type bp3:SmallMolecule .
  ?product rdf:type bp3:SmallMolecule .
  
  ?catalysis bp3:controlled ?reaction .
  ?catalysis rdf:type bp3:Catalysis .
  ?catalysis bp3:controller ?enzyme .
  ?enzyme ((bp3:component|bp3:memberPhysicalEntity)*) ?enzymeProt .
  ?enzymeProt rdf:type bp3:Protein .
}
"""

sparql = SPARQLWrapper(endpoint_reactome)
sparql.setQuery(prefixes+query_controls_production)
df = save_for_cytoscape(sparql, prefixes, "../Results/ReactomeHomoSapiensv92/10-ControlsProduction.csv", format='csv')
preview_network_data(df)
print("--- %s seconds ---" % (time.time() - start_time))

#### 11 - Controls Transport Of Chemical
The protein controls a reaction that changes the cellular location of the small molecule

Reaction test : R-HSA-70635, Urea Cycle

In [None]:
start_time = time.time()
query_controls_transport_of_chemical = """ 
CONSTRUCT {
  ?enzymeProt abstraction:ControlsTransportOfChemical ?SmallMolecule1
}
WHERE {
  # Core reaction pattern
  ?SmallMolecule1 rdf:type bp3:SmallMolecule ;
        	bp3:cellularLocation ?cellularLocVocab1 .
  ?SmallMolecule2 rdf:type bp3:SmallMolecule ;
        	bp3:cellularLocation ?cellularLocVocab2 .
 
  ?SmallMolecule1 bp3:entityReference ?SmallMolecule1Ref .
  ?SmallMolecule2 bp3:entityReference ?SmallMolecule2Ref .
  ?cellularLocVocab1 bp3:term ?location1 .
  ?cellularLocVocab2 bp3:term ?location2 .

  ?reaction rdf:type bp3:BiochemicalReaction ;
                   	bp3:left ?SmallMolecule1 ;
                   	bp3:right ?SmallMolecule2 .

  ?catalysis bp3:controlled ?reaction .
  ?catalysis rdf:type bp3:Catalysis .
  ?catalysis bp3:controller ?enzyme .
  ?enzyme ((bp3:component|bp3:memberPhysicalEntity)*) ?enzymeProt .
  ?enzymeProt rdf:type bp3:Protein .
}
"""

sparql = SPARQLWrapper(endpoint_reactome)
sparql.setQuery(prefixes+query_controls_transport_of_chemical)
df = save_for_cytoscape(sparql, prefixes, "../Results/ReactomeHomoSapiensv92/11-ControlsTransportOfChemical.csv", format='csv')
preview_network_data(df)
print("--- %s seconds ---" % (time.time() - start_time))

#### 12 - Chemical Affects
A small molecule has an effect on the protein state

Reaction test Reactome : R-HSA-202437,  Phosphorylation of CARMA1 

In [None]:
start_time = time.time()
query_chemical_affects = """  
CONSTRUCT {
  ?catalyzerChemical abstraction:ChemicalAffects ?protLeft
}
WHERE {
  # Core reaction pattern
  ?reaction rdf:type/(rdfs:subClassOf*) bp3:BiochemicalReaction .
  ?reaction bp3:left/((bp3:component|bp3:memberPhysicalEntity)*) ?protLeft .
  ?protLeft rdf:type bp3:Protein .
  ?protLeft bp3:entityReference ?leftProteinRef .
  ?reaction bp3:right/((bp3:component|bp3:memberPhysicalEntity)*) ?protRight .
  ?protRight rdf:type bp3:Protein .
  ?protRight bp3:entityReference ?rightProteinRef . 
  
  ?catalysis bp3:controlled ?reaction .
  ?catalysis rdf:type bp3:Catalysis .
  ?catalysis bp3:controller ?catalyzer .
  ?catalyzer ((bp3:component|bp3:memberPhysicalEntity)*) ?catalyzerChemical .
  ?catalyzerChemical rdf:type bp3:SmallMolecule .

  # Complete modification check
  ?protRight bp3:feature ?feature .
  ?feature (rdf:type/rdfs:subClassOf*) bp3:EntityFeature .
 
  # Ensure left and right proteins are the same
  FILTER(?leftProteinRef = ?rightProteinRef)
}
"""

sparql = SPARQLWrapper(endpoint_reactome)
sparql.setQuery(prefixes+query_chemical_affects)
df = save_for_cytoscape(sparql, prefixes, "../Results/ReactomeHomoSapiensv92/12-ChemicalAffects.csv", format='csv')
preview_network_data(df)
print("--- %s seconds ---" % (time.time() - start_time))

#### 13 - Reacts With
Small molecules are input to a biochemical reaction 

Reaction test Reactome : R-HSA-2142688, Synthesis of 5-eicosatetranoic acids

In [None]:
start_time = time.time()
query_reacts_with = """ 
CONSTRUCT {
  ?smallMolecule1 abstraction:reactsWith ?smallMolecule2 .
}
WHERE {
  ?smallMolecule1 rdf:type bp3:SmallMolecule .
  ?smallMolecule2 rdf:type bp3:SmallMolecule .
 
  ?reaction bp3:left ?smallMolecule1 .
  ?reaction bp3:left ?smallMolecule2 .
  ?reaction rdf:type bp3:BiochemicalReaction .
 
  FILTER (STR(?smallMolecule1) < STR(?smallMolecule2))
}
"""

sparql = SPARQLWrapper(endpoint_reactome)
sparql.setQuery(prefixes+query_reacts_with)
df = save_for_cytoscape(sparql, prefixes, "../Results/ReactomeHomoSapiensv92/13-ReactsWith.csv", format='csv')
preview_network_data(df)
print("--- %s seconds ---" % (time.time() - start_time))

In [7]:
# end process
process.kill()
time.sleep(60)