## BioPAX abstraction: SIF view

> This code contains SPARQL queries allowing to abstract a BioPAX graph into SIF binary interactions.

> The abstraction rules used here are from PathwayCommons: http://www.pathwaycommons.org/pc2/formats#sif_relations 

> These rules are used in the ChiBE software to perform BioPAX to SIF translation.

#### Import librairies and launch SPARQL endpoint

In [None]:
from SPARQLWrapper import SPARQLWrapper, JSON, CSV, N3, XML, TURTLE
import subprocess
import time
import os
from requests.utils import requote_uri
from urllib.parse import quote
import re
import rdflib
import pandas as pd

In [None]:
endpoint_reactome = "http://localhost:3030/reactome"
rdfFormat = "turtle"
current_directory = os.getcwd()
BioPAX_Ontology_file_path = os.path.join(current_directory, '../', 'Data', 'biopax_ontology/biopax-level3.owl')
ReactomeBioPAX_file_path = os.path.join(current_directory, '../', 'Data', 'reactome/Homo_sapiens_v94.owl')

In [None]:
def extract_prefix_mappings(prefixes_string):
    """
    Extract prefix mappings from the SPARQL prefixes string.
    
    Parameters:
    prefixes_string (str): String containing PREFIX declarations
    
    Returns:
    dict: Mapping of full URIs to their prefixes
    """
    # Extract prefix declarations using regex
    prefix_pattern = re.compile(r'PREFIX\s+(\w+):\s*<([^>]+)>', re.IGNORECASE)
    return {uri: prefix for prefix, uri in prefix_pattern.findall(prefixes_string)}

def convert_to_prefixed_uri(uri_string, prefix_mappings):
    """
    Convert a full URI to prefixed format.
    
    Parameters:
    uri_string (str): Full URI string
    prefix_mappings (dict): Mapping of URIs to prefixes
    
    Returns:
    str: URI in prefixed format (e.g., 'reactome:Protein')
    """
    for uri_base, prefix in prefix_mappings.items():
        if uri_string.startswith(uri_base):
            local_part = uri_string[len(uri_base):]
            return f"{prefix}:{local_part}"
    return uri_string  # Return original if no prefix matches

def save_for_cytoscape(sparql, prefixes_string, output_file, format='csv', separator=','):
    """
    Save SPARQL CONSTRUCT results in a format compatible with Cytoscape,
    using prefix notation for URIs.
    
    Parameters:
    sparql (SPARQLWrapper): Configured SPARQLWrapper instance with query
    prefixes_string (str): String containing PREFIX declarations
    output_file (str): Path to save the output file
    format (str): Output format ('csv' or 'tsv')
    separator (str): Column separator (',' for CSV, '\t' for TSV)
    """
    # Extract prefix mappings
    prefix_mappings = extract_prefix_mappings(prefixes_string)
    
    # Get the results as an RDF graph
    sparql.setReturnFormat(TURTLE)
    results = sparql.queryAndConvert()
    
    # Create an RDFlib graph
    g = rdflib.Graph()
    if isinstance(results, bytes):
        g.parse(data=results.decode('utf-8'), format='turtle')
    else:
        g.parse(data=results, format='turtle')
    
    # Convert triples to a list of dictionaries with prefixed URIs
    triples_data = []
    for s, p, o in g:# Extraction of "Signaling by EGFR" (R-HSA-177929) pathway from Reactome BioPAX export v65
        # Convert each URI to prefixed format
        subject = convert_to_prefixed_uri(str(s), prefix_mappings)
        predicate = convert_to_prefixed_uri(str(p), prefix_mappings)
        object_ = convert_to_prefixed_uri(str(o), prefix_mappings)
        
        triples_data.append({
            'Source': subject,
            'Interaction': predicate,
            'Target': object_
        })
    
    # Convert to DataFrame for easy CSV/TSV export
    df = pd.DataFrame(triples_data)
    
    # Save to file
    if format == 'csv':
        df.to_csv(output_file, index=False, sep=',')
    else:  # tsv
        df.to_csv(output_file, index=False, sep='\t')
    
    print(f"Saved {len(triples_data)} interactions to {output_file}")
    return df

def preview_network_data(df, n=5):
    """
    Preview the network data before importing into Cytoscape.
    
    Parameters:
    df (pandas.DataFrame): DataFrame containing the network data
    n (int): Number of rows to preview
    """
    print(f"\nPreview of network data ({len(df)} total interactions):")
    print(f"\nFirst {n} interactions:")
    print(df.head(n))
    
    # Print some basic network statistics
    unique_nodes = set(df['Source'].unique()) | set(df['Target'].unique())
    print(f"\nNetwork statistics:")
    print(f"Number of unique nodes: {len(unique_nodes)}")
    print(f"Number of interactions: {len(df)}")
    print(f"Unique interaction types:")
    for interaction in sorted(df['Interaction'].unique()):
        print(f"  - {interaction}")

In [None]:
reactomeVersion = 94
prefixes = f"""
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs:<http://www.w3.org/2000/01/rdf-schema#>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
PREFIX dc: <http://purl.org/dc/elements/1.1/>
PREFIX dcterms: <http://purl.org/dc/terms/>
PREFIX chebi: <http://purl.obolibrary.org/obo/chebi/>
PREFIX chebidb: <http://purl.obolibrary.org/obo/CHEBI_>
PREFIX chebirel: <http://purl.obolibrary.org/obo/CHEBI#>
PREFIX oboInOwl: <http://www.geneontology.org/formats/oboInOwl#>
PREFIX bp3: <http://www.biopax.org/release/biopax-level3.owl#>
PREFIX reactome: <http://www.reactome.org/biopax/40/48887#>
PREFIX abstraction:<http://abstraction/#>
"""

biopaxURI = "http://www.biopax.org/release/biopax-level3.owl#"

In [None]:
command = [
    '/home/cbeust/Softwares/JenaFuseki/apache-jena-fuseki-4.9.0/fuseki-server',
    '--file', ReactomeBioPAX_file_path,
    '--file', BioPAX_Ontology_file_path,
    '/reactome']

process = subprocess.Popen(command)
time.sleep(60)

## Translation of SIF abstraction rules in SPARQL queries

#### 1 - Controls State Change Of
First protein controls a reaction that changes the state of the second protein

In [None]:
start_time = time.time()
query_controls_state_change_of = """ 
CONSTRUCT {
  ?enzymeRef abstraction:ControlsStateChangeOf ?protRef
}
WHERE {
  ?reaction rdf:type/(rdfs:subClassOf*) bp3:Conversion .
  
  ?catalysis bp3:controlled ?reaction .
  ?catalysis (rdf:type/rdfs:subClassOf*) bp3:Control .
  ?catalysis bp3:controller ?enzyme .
  ?enzyme rdf:type bp3:Protein .
  ?enzyme bp3:entityReference ?enzymeRef .
  ?enzymeRef rdf:type/(rdfs:subClassOf*) bp3:EntityReference .
  
  ?reaction bp3:left ?protLeft .
  ?protLeft rdf:type bp3:Protein .
  ?protLeft bp3:entityReference ?protRef .
  
  ?reaction bp3:right ?protRight .
  ?protRight rdf:type bp3:Protein .
  ?protRight bp3:entityReference ?protRef .

  ?protRef rdf:type/(rdfs:subClassOf*) bp3:EntityReference .

  ?protRight bp3:feature ?feature .
  ?feature rdf:type/(rdfs:subClassOf*) bp3:EntityFeature .
  FILTER (?enzymeRef != ?protRef)
}
"""

# execute SPARQL query
sparql = SPARQLWrapper(endpoint_reactome)
sparql.setQuery(prefixes+query_controls_state_change_of)
df = save_for_cytoscape(sparql, prefixes, "../Results/ReactomeHomoSapiens94/01-ControlsStateChangeOf.csv", format='csv')
preview_network_data(df)
print("--- %s seconds ---" % (time.time() - start_time))

#### 2 - Controls Transport Of

First protein controls a reaction that changes the cellular location of the second protein

In [None]:
start_time = time.time()
query_transport_of = """
CONSTRUCT {
  ?enzymeRef abstraction:ControlsTransportOf ?protRef
}
WHERE {
  ?reaction rdf:type/(rdfs:subClassOf*) bp3:Conversion .

  ?reaction bp3:left ?protLeft .
  ?protLeft rdf:type bp3:Protein .
  ?protLeft bp3:entityReference ?protRef .
  
  ?reaction bp3:right ?protRight .
  ?protRight rdf:type bp3:Protein .
  ?protRight bp3:entityReference ?protRef .

  ?protRef rdf:type bp3:ProteinReference . 

  # Define cellular locations for Protein 1 and Protein 2
  ?protLeft bp3:cellularLocation ?cellularLocVocab1 .
  ?protRight bp3:cellularLocation ?cellularLocVocab2 .
  
  ?catalysis bp3:controlled ?reaction .
  ?catalysis (rdf:type/rdfs:subClassOf*) bp3:Control .
  ?catalysis bp3:controller ?enzyme .
  ?enzyme rdf:type bp3:Protein .
  ?enzyme bp3:entityReference ?enzymeRef .
  ?enzymeRef rdf:type bp3:ProteinReference .

  # Ensure cellular locations of protein 1 and protein 2 are different + ensure that they have the same reference
  FILTER (?cellularLocVocab1 != ?cellularLocVocab2)
  FILTER (?enzymeRef != ?protRef)
}

"""

sparql = SPARQLWrapper(endpoint_reactome)
sparql.setQuery(prefixes+query_transport_of)
df = save_for_cytoscape(sparql, prefixes, "../Results/ReactomeHomoSapiens94/02-ControlsTransportOf.csv", format='csv')
preview_network_data(df)
print("--- %s seconds ---" % (time.time() - start_time))

#### 3 - Controls Phosphorylation Of

First protein controls a reaction that changes the phosphorylation status of the second protein

Reaction test Reactome : R-HSA-388831, Phosphorylation of CD28

In [None]:
start_time = time.time()
query_controls_phosphorylation_of = """
CONSTRUCT {
	?enzymeRef abstraction:ControlsPhosphorylationOf ?protRef
	}
WHERE {
  ?reaction rdf:type/(rdfs:subClassOf*) bp3:Conversion .
  
  ?catalysis bp3:controlled ?reaction .
  ?catalysis rdf:type/(rdfs:subClassOf*) bp3:Control .
  ?catalysis bp3:controller ?enzyme .
  ?enzyme rdf:type bp3:Protein .
  ?enzyme bp3:entityReference ?enzymeRef .
  ?enzymeRef rdf:type/(rdfs:subClassOf*) bp3:EntityReference .
  
  ?reaction bp3:left ?protLeft .
  ?protLeft rdf:type bp3:Protein .
  ?protLeft bp3:entityReference ?protRef .
  
  ?reaction bp3:right ?protRight .
  ?protRight rdf:type bp3:Protein .
  ?protRight bp3:entityReference ?protRef .

  ?protRef rdf:type/(rdfs:subClassOf*) bp3:EntityReference .

  # Complete phosphorylation check
  ?protRight bp3:feature ?feature .
  ?feature rdf:type bp3:ModificationFeature .
  ?feature bp3:modificationType ?modificationType .
  ?modificationType rdf:type bp3:SequenceModificationVocabulary .
  ?modificationType bp3:term ?modificationTerm .
  FILTER(CONTAINS(LCASE(?modificationTerm), "phospho"))
}
"""

sparql = SPARQLWrapper(endpoint_reactome)
sparql.setQuery(prefixes+query_controls_phosphorylation_of)
df = save_for_cytoscape(sparql, prefixes, "../Results/ReactomeHomoSapiens94/03-ControlsPhosphorylationOf.csv", format='csv')
preview_network_data(df)
print("--- %s seconds ---" % (time.time() - start_time))

#### 4 - Controls Expression Of
First protein controls a conversion or a template reaction that changes expression of the second protein

In [None]:
start_time = time.time()
query_controls_expression_of = """ 
CONSTRUCT {
  ?enzymeRef abstraction:ControlsExpressionOf ?productRef
}
WHERE { 
  OPTIONAL {
    ?reaction rdf:type/(rdfs:subClassOf*) bp3:TemplateReaction .
    ?reaction bp3:product ?product .
    ?product rdf:type bp3:Protein .
    ?catalysis bp3:controlled ?reaction .
    ?catalysis rdf:type bp3:TemplateReactionRegulation .
  }
  
  OPTIONAL {
    ?reaction rdf:type/(rdfs:subClassOf*) bp3:Conversion .
    ?reaction bp3:right ?product .
    ?product rdf:type bp3:Protein .
    ?catalysis bp3:controlled ?reaction .
    ?catalysis rdf:type/(rdfs:subClassOf*) bp3:Control .
  }

  ?product bp3:entityReference ?productRef .
  ?productRef rdf:type bp3:ProteinReference .
  ?catalysis bp3:controller ?enzyme .
  ?enzyme rdf:type bp3:Protein .
  ?enzyme bp3:entityReference ?enzymeRef .
  ?enzymeRef rdf:type bp3:ProteinReference .
}
"""

sparql = SPARQLWrapper(endpoint_reactome)
sparql.setQuery(prefixes+query_controls_expression_of)
df = save_for_cytoscape(sparql, prefixes, "../Results/ReactomeHomoSapiens94/04-ControlsExpressionOf.csv", format='csv')
preview_network_data(df)
print("--- %s seconds ---" % (time.time() - start_time))

#### 5 - Catalysis Precedes
Equivalent of sequential_catalysis from Paxtools SIF rules
First protein controls a conversion whose input molecule is input to another reaction controlled by a second protein 

Reaction test Reactome : R-HSA-2142688, Synthesis of 5-eicosatetranoic acids

In [None]:
start_time = time.time()
query_catalysis_precedes = """ 
CONSTRUCT {
   ?enzymeRefA abstraction:CatalysisPrecedes ?enzymeRefB
}
WHERE {
  ?reaction1 rdf:type/(rdfs:subClassOf*) bp3:Conversion .
  
  ?catalysis1 bp3:controlled ?reaction1 .
  ?catalysis1 (rdf:type/rdfs:subClassOf*) bp3:Control .
  ?catalysis1 bp3:controller ?enzymeA .
  ?enzymeA rdf:type bp3:Protein .
  ?enzymeA bp3:entityReference ?enzymeRefA .
  ?enzymeRefA rdf:type/(rdfs:subClassOf*) bp3:EntityReference .
  
  ?reaction1 bp3:right ?connectingMolecule .
  ?reaction2 bp3:left ?connectingMolecule .
  ?connectingMolecule rdf:type bp3:SmallMolecule .
  
  ?reaction2 rdf:type/(rdfs:subClassOf*) bp3:Conversion .
  
  ?catalysis2 bp3:controlled ?reaction2 .
  ?catalysis2 (rdf:type/rdfs:subClassOf*) bp3:Control .
  ?catalysis2 bp3:controller ?enzymeB .
  ?enzymeB rdf:type bp3:Protein .
  ?enzymeB bp3:entityReference ?enzymeRefB .
  ?enzymeRefB rdf:type/(rdfs:subClassOf*) bp3:EntityReference .

  FILTER (?enzymeRefA != ?enzymeRefB)
}

"""

sparql = SPARQLWrapper(endpoint_reactome)
sparql.setQuery(prefixes+query_catalysis_precedes)
df = save_for_cytoscape(sparql, prefixes, "../Results/ReactomeHomoSapiens94/05-CatalysisPrecedes.csv", format='csv')
preview_network_data(df)
print("--- %s seconds ---" % (time.time() - start_time))

#### 6 - In Complex With
Equivalent component of Paxtools SIF rules
Proteins are members of the same complex

Reaction test Reactome : R-HSA-2187368,  STUB1 (CHIP) ubiquitinates SMAD3 

In [None]:
start_time = time.time()
query_in_complex_with = """ 
CONSTRUCT {
  ?prot1Ref abstraction:InComplexWith ?prot2Ref
}
WHERE {
  # Case 1: Complex with several different proteins
  {
	?complex rdf:type/(rdfs:subClassOf*) bp3:Complex .
	?complex bp3:component* ?prot1 .
	?complex bp3:component* ?prot2 .
	?prot1 rdf:type bp3:Protein .
	?prot2 rdf:type bp3:Protein .
    
    
	# Avoid self-pairing and duplicate pairs
	FILTER (STR(?prot1) < STR(?prot2))
  }
 
  UNION
 
  # Case 2: Same protein with stoichiometry > 1
  {
	?complex rdf:type/(rdfs:subClassOf*) bp3:Complex .
	# Check for stoichiometry > 1
	?complex bp3:componentStoichiometry ?stoich .
	?stoich bp3:physicalEntity ?entity1 .
	?stoich bp3:stoichiometricCoefficient ?coeff .
	FILTER (?coeff > 1)
    
    ?entity1 bp3:component ?prot1 .
	?prot1 rdf:type bp3:Protein .
    
	# For self-pairing, use the same entity
	BIND (?prot1 AS ?prot2)
  }

  ?prot1 bp3:entityReference ?prot1Ref .
  ?prot2 bp3:entityReference ?prot2Ref .
  ?prot1Ref rdf:type bp3:ProteinReference .
  ?prot2Ref rdf:type bp3:ProteinReference .
}
"""

sparql = SPARQLWrapper(endpoint_reactome)
sparql.setQuery(prefixes+query_in_complex_with)
df = save_for_cytoscape(sparql, prefixes, "../Results/ReactomeHomoSapiens94/06-InComplexWith.csv", format='csv')
preview_network_data(df)
print("--- %s seconds ---" % (time.time() - start_time))

#### 7 - Interacts With 
Proteins are participants of the same MolecularInteraction

Problem : no instance of bp3:MolecularInteraction in Reactome

In [None]:
start_time = time.time()
query_interacts_with = """  
CONSTRUCT {
  ?participant1Ref abstraction:InteractsWith ?participant2Ref
}
WHERE {
  VALUES ?participantType { bp3:Protein bp3:SmallMolecule }
  # Core molecular interaction pattern
  ?MolecularInteraction rdf:type/(rdfs:subClassOf*) bp3:MolecularInteraction .
  ?MolecularInteraction bp3:participant ?participant1 .
  ?participant1 rdf:type ?participantType .
  ?MolecularInteraction bp3:participant ?participant2 .
  ?participant2 rdf:type ?participantType . 

  ?participant1 bp3:entityReference ?participant1Ref .
  ?participant1Ref rdf:type/(rdfs:subClassOf*) bp3:EntityReference .
  ?participant2 bp3:entityReference ?participant2Ref .
  ?participant2Ref rdf:type/(rdfs:subClassOf*) bp3:EntityReference .
  
  FILTER (STR(?participant1Ref) < STR(?participant2Ref))
}
"""

sparql = SPARQLWrapper(endpoint_reactome)
sparql.setQuery(prefixes+query_interacts_with)
df = save_for_cytoscape(sparql, prefixes, "../Results/ReactomeHomoSapiens94/07-InteractsWith.csv", format='csv')
preview_network_data(df)
print("--- %s seconds ---" % (time.time() - start_time))

#### 8 - Neighbor Of 
Proteins are participants or controlers of the same interaction

Reaction test Reactome : R-HSA-170844, Latent TGF-beta-1 is cleaved by FURIN

In [None]:
start_time = time.time()

query_neighbor_of = """ 
CONSTRUCT {
   ?prot1Ref abstraction:NeighborOf ?prot2Ref
}
WHERE {
  ?reaction rdf:type/(rdfs:subClassOf*) bp3:Interaction .
  
  ?reaction ((^bp3:controlled/bp3:controller)|bp3:left|bp3:right) ?prot1 .
  ?prot1 rdf:type bp3:Protein .
  ?prot1 bp3:entityReference ?prot1Ref .
  ?prot1Ref rdf:type bp3:ProteinReference .
  ?reaction ((^bp3:controlled/bp3:controller)|bp3:left|bp3:right) ?prot2 .
  ?prot2 rdf:type bp3:Protein .
  ?prot2 bp3:entityReference ?prot2Ref .
  ?prot2Ref rdf:type bp3:ProteinReference .
  FILTER (STR(?prot1Ref) < STR(?prot2Ref))
}
"""

sparql = SPARQLWrapper(endpoint_reactome)
sparql.setQuery(prefixes+query_neighbor_of)
df = save_for_cytoscape(sparql, prefixes, "../Results/ReactomeHomoSapiens94/08-NeighborOf.csv", format='csv')
preview_network_data(df)
print("--- %s seconds ---" % (time.time() - start_time))

#### 9 - Consumption controled by
The small molecule is consumed by a reaction that is controled by a protein 

Reaction test Reactome : R-HSA-2142688, Synthesis of 5-eicosatetranoic acids

In [None]:
start_time = time.time()
query_consumption_controled_by = """  
CONSTRUCT {
  ?reactantRef abstraction:ConsumptionControlledBy ?enzymeRef
}
WHERE {
  ?reaction rdf:type/(rdfs:subClassOf*) bp3:Conversion .
  ?reaction bp3:left ?reactant .
  ?reactant rdf:type bp3:SmallMolecule .
  ?reactant bp3:entityReference ?reactantRef .
  ?reactantRef rdf:type bp3:SmallMoleculeReference .
  ?reaction bp3:right ?product .
  ?product rdf:type bp3:SmallMolecule .
  ?catalysis bp3:controlled ?reaction .
  ?catalysis bp3:controller ?enzyme .
  ?enzyme rdf:type bp3:Protein .
  ?enzyme bp3:entityReference ?enzymeRef .
  ?enzymeRef rdf:type bp3:ProteinReference .
}
"""

sparql = SPARQLWrapper(endpoint_reactome)
sparql.setQuery(prefixes+query_consumption_controled_by)
df = save_for_cytoscape(sparql, prefixes, "../Results/ReactomeHomoSapiens94/09-ConsumptionControledBy.csv", format='csv')
preview_network_data(df)
print("--- %s seconds ---" % (time.time() - start_time))

#### 10 - Controls Production Of
The protein controls a reaction of which the small molecule is an output

Reaction test Reactome : R-HSA-2142688, Synthesis of 5-eicosatetranoic acids

In [None]:
start_time = time.time()
query_controls_production = """ 
CONSTRUCT {
  ?enzymeRef abstraction:ControlsProductionOf ?productRef
}
WHERE {
  ?reaction rdf:type/(rdfs:subClassOf*) bp3:Conversion .
  ?reaction bp3:left ?reactant .
  ?reactant rdf:type bp3:SmallMolecule .
  ?reaction bp3:right ?product .
  ?product rdf:type bp3:SmallMolecule .
  ?product bp3:entityReference ?productRef .
  ?productRef rdf:type bp3:SmallMoleculeReference .
  ?catalysis bp3:controlled ?reaction .
  ?catalysis bp3:controller ?enzyme .
  ?enzyme rdf:type bp3:Protein .
  ?enzyme bp3:entityReference ?enzymeRef .
  ?enzymeRef rdf:type bp3:ProteinReference .
}
"""

sparql = SPARQLWrapper(endpoint_reactome)
sparql.setQuery(prefixes+query_controls_production)
df = save_for_cytoscape(sparql, prefixes, "../Results/ReactomeHomoSapiens94/10-ControlsProduction.csv", format='csv')
preview_network_data(df)
print("--- %s seconds ---" % (time.time() - start_time))

#### 11 - Controls Transport Of Chemical
The protein controls a reaction that changes the cellular location of the small molecule

Reaction test : R-HSA-70635, Urea Cycle

In [None]:
start_time = time.time()
query_controls_transport_of_chemical = """ 
CONSTRUCT {
  ?enzymeRef abstraction:ControlsTransportOfChemical ?SmallMoleculeRef
}
WHERE {
  ?reaction rdf:type/(rdfs:subClassOf*) bp3:Conversion .
  ?reaction bp3:left ?SmallMolecule1 .
  ?SmallMolecule1 rdf:type bp3:SmallMolecule .
  ?SmallMolecule1 bp3:cellularLocation ?cellularLocVocab1 .
  ?cellularLocVocab1 bp3:term ?location1 .
  ?SmallMolecule1 bp3:entityReference ?SmallMoleculeRef .
  
  ?reaction bp3:right ?SmallMolecule2 . 
  ?SmallMolecule2 rdf:type bp3:SmallMolecule .
  ?SmallMolecule2  bp3:cellularLocation ?cellularLocVocab2 .
  ?cellularLocVocab2 bp3:term ?location2 .
  ?SmallMolecule2 bp3:entityReference ?SmallMoleculeRef .

  ?SmallMoleculeRef rdf:type bp3:SmallMoleculeReference .

  ?catalysis bp3:controlled ?reaction .
  ?catalysis rdf:type/(rdfs:subClassOf*) bp3:Control .
  ?catalysis bp3:controller ?enzyme .
  ?enzyme rdf:type bp3:Protein .
  ?enzyme bp3:entityReference ?enzymeRef .
  ?enzymeRef rdf:type bp3:ProteinReference .

  FILTER(?location1 != ?location2)
}
"""

sparql = SPARQLWrapper(endpoint_reactome)
sparql.setQuery(prefixes+query_controls_transport_of_chemical)
df = save_for_cytoscape(sparql, prefixes, "../Results/ReactomeHomoSapiens94/11-ControlsTransportOfChemical.csv", format='csv')
preview_network_data(df)
print("--- %s seconds ---" % (time.time() - start_time))

#### 12 - Chemical Affects
A small molecule has an effect on the protein state

Reaction test Reactome : R-HSA-202437,  Phosphorylation of CARMA1 

In [None]:
start_time = time.time()
query_chemical_affects = """  
CONSTRUCT {
  ?chemicalCatalyzerRef abstraction:ChemicalAffects ?proteinRef .
}
WHERE {

  OPTIONAL {
    ?complex rdf:type bp3:Complex .
    ?complex bp3:component ?proteinComponent .
    ?proteinComponent rdf:type bp3:Protein .
    # complete modification check 
    ?proteinComponent bp3:feature ?feature .
    ?feature rdf:type/(rdfs:subClassOf*) bp3:EntityFeature .
    ?complex bp3:component ?chemicalCatalyzer .
    ?chemicalCatalyzer rdf:type bp3:SmallMolecule .
    ?chemicalCatalyzer bp3:entityReference ?chemicalCatalyzerRef .
    ?chemicalCatalyzerRef rdf:type bp3:SmallMoleculeReference .

    BIND(?proteinComponent AS ?protein1)
  }

  OPTIONAL {
    ?reaction rdf:type/(rdfs:subClassOf*) bp3:Conversion .
    ?reaction bp3:left ?protLeft .
    ?protLeft rdf:type bp3:Protein .
    ?protLeft bp3:entityReference ?protRef .

    ?reaction bp3:right ?protRight .
    ?protRight rdf:type bp3:Protein .
    ?protRight bp3:entityReference ?protRef .

    ?protRef rdf:type bp3:ProteinReference . 

    ?catalysis bp3:controlled ?reaction .
    ?catalysis rdf:type/(rdfs:subClassOf*) bp3:Control .
    ?catalysis bp3:controller ?chemicalCatalyzer .
    ?chemicalCatalyzer rdf:type bp3:SmallMolecule .
    ?chemicalCatalyzer bp3:entityReference ?chemicalCatalyzerRef .
    ?chemicalCatalyzerRef rdf:type bp3:SmallMoleculeReference .

    # complete modification check 
    ?protRight bp3:feature ?feature .
    ?feature rdf:type/(rdfs:subClassOf*) bp3:EntityFeature .

    BIND(?protLeft AS ?protein2)
  }
  BIND( COALESCE(?protein1, ?protein2) AS ?protein )
  ?protein bp3:entityReference ?proteinRef .
  ?proteinRef rdf:type bp3:ProteinReference .
}
"""

sparql = SPARQLWrapper(endpoint_reactome)
sparql.setQuery(prefixes+query_chemical_affects)
df = save_for_cytoscape(sparql, prefixes, "../Results/ReactomeHomoSapiens94/12-ChemicalAffects.csv", format='csv')
preview_network_data(df)
print("--- %s seconds ---" % (time.time() - start_time))

#### 13 - Reacts With
Small molecules are input to a biochemical reaction 

Reaction test Reactome : R-HSA-2142688, Synthesis of 5-eicosatetranoic acids

In [None]:
start_time = time.time()
query_reacts_with = """ 
CONSTRUCT {
  ?smallMolecule1Ref abstraction:reactsWith ?smallMolecule2Ref .
}
WHERE {
  ?reaction rdf:type/(rdfs:subClassOf*) bp3:BiochemicalReaction .
  ?reaction bp3:left ?smallMolecule1 .
  ?smallMolecule1 rdf:type bp3:SmallMolecule .
  ?smallMolecule1 bp3:entityReference ?smallMolecule1Ref .
  ?smallMolecule1Ref rdf:type bp3:SmallMoleculeReference .
  
  ?reaction bp3:left ?smallMolecule2 .
  ?smallMolecule2 rdf:type bp3:SmallMolecule .
  ?smallMolecule2 bp3:entityReference ?smallMolecule2Ref .
  ?smallMolecule2Ref rdf:type bp3:SmallMoleculeReference .
  
  FILTER (STR(?smallMolecule1Ref) < STR(?smallMolecule2Ref))
}
"""

sparql = SPARQLWrapper(endpoint_reactome)
sparql.setQuery(prefixes+query_reacts_with)
df = save_for_cytoscape(sparql, prefixes, "../Results/ReactomeHomoSapiens94/13-ReactsWith.csv", format='csv')
preview_network_data(df)
print("--- %s seconds ---" % (time.time() - start_time))

#### 14 - Used To Produce
A reaction consumes a small molecule to produce another small molecule

In [None]:
start_time = time.time()
query_used_to_produce = """ 
CONSTRUCT {
  ?smallMolecule1Ref abstraction:UsedToProduce ?smallMolecule2Ref .
}
WHERE {
  ?reaction rdf:type/(rdfs:subClassOf*) bp3:Conversion .
  ?reaction bp3:left ?smallMolecule1 .
  ?reaction bp3:right ?smallMolecule2 .
  ?smallMolecule1 rdf:type bp3:SmallMolecule .
  ?smallMolecule1 bp3:entityReference ?smallMolecule1Ref .
  ?smallMolecule1Ref rdf:type bp3:SmallMoleculeReference .
  ?smallMolecule2 rdf:type bp3:SmallMolecule .
  ?smallMolecule2 bp3:entityReference ?smallMolecule2Ref .
  ?smallmolecule2Ref rdf:type bp3:SmallMoleculeReference .
 
  FILTER (?smallMolecule1Ref != ?smallMolecule2Ref) 
}
"""

sparql = SPARQLWrapper(endpoint_reactome)
sparql.setQuery(prefixes+query_used_to_produce)
df = save_for_cytoscape(sparql, prefixes, "../Results/ReactomeHomoSapiens94/14-UsedToProduce.csv", format='csv')
preview_network_data(df)
print("--- %s seconds ---" % (time.time() - start_time))

In [None]:
# end process
process.kill()
time.sleep(60)