# SHACL Advanced features and constraints explorer

This notebook is a tool to explore the SHACL advanced features and constraints. It is based on the [SHACL Playground](https://shacl.org/playground/) and the [SHACL documentation](https://www.w3.org/TR/shacl/).

For pyshacl, see the [pyshacl documentation](https://pyshacl.readthedocs.io/en/latest/) and the [pyshacl AF issue](https://github.com/RDFLib/pySHACL/issues/189).

In [5]:
import rdflib

import matplotlib
import matplotlib.pyplot as plt

from pyshacl import Validator
from rdflib import Graph
# inline matplotlib plotting in jupyter notebook

%matplotlib inline

## SHACL Advanced Features (AF) with pyshacl

In [None]:


g = Graph()

smts = """
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix C: <http://example.org/> .
@prefix ex: <http://example.org/> .

C:A rdfs:subClassOf C:B . 
ex:something a C:A .
"""

g.parse(data=smts)
v = Validator(g,
      inference='rdfs',
      advanced=True)

v.run()
print(v.target_graph.serialize(format='ttl'))

In [None]:
# target graph method

v = Validator(g, shacl_graph=myshapes, options={"advanced": True, "inference": "rdfs"})
conforms, report_graph, report_text = v.run()
expanded_graph = v.target_graph #<-- This gets the expanded data graph

In [None]:
# inplace modifier


v = Validator(g, shacl_graph=myshapes, options={"advanced": True, "inplace": True, "inference": "rdfs"})
conforms, report_graph, report_text = v.run()
g #<-- g is expanded inplace

In [None]:
g = rdflib.Graph()

test_ontology_filename = "shacl_sparql_test_data.ttl"

# g.parse(".\\Ontology_tests\\data_for_query3.ttl", format="turtle")
g.parse(test_ontology_filename, format="turtle")

# with open(".\\Ontology_tests\\knownQuery8.ttl", "r") as file:
#     known_query1 = file.read()

g

In [14]:
data = """
@prefix cml: <http://www.xml-cml.org/schema/cml2/core#> .
@prefix ro: <http://www.example.org/reaction-ontology#> .
@prefix ex: <http://www.example.org/chemicals#> .

# Define reactants and products with their chemical structures
ex:Reactant1 cml:chemicalStructure "SMILES:CCO" .  # Ethanol
ex:Reactant2 cml:chemicalStructure "SMILES:O=C=O" .  # Carbon dioxide
ex:Reactant3 cml:chemicalStructure "SMILES:CC(=O)O" .  # Acetic acid
ex:Reactant4 cml:chemicalStructure "SMILES:H2O" .  # Water
ex:Reactant5 cml:chemicalStructure "SMILES:O2" .  # Oxygen

ex:Product1 cml:chemicalStructure "SMILES:CCOCC(=O)O" .  # Ethyl acetate
ex:Product2 cml:chemicalStructure "SMILES:CCOCC" .  # Diethyl ether
ex:Product3 cml:chemicalStructure "SMILES:COO" .  # Formate
ex:Product4 cml:chemicalStructure "SMILES:CCOCO" .  # Ethylene glycol

# Idealized Reaction 1: Ethanol + Carbon dioxide -> Ethyl acetate
ex:IdealReaction1 a ro:Reaction ;
                  ro:type "idealized" ;
                  ro:hasReactant ex:Reactant1 , ex:Reactant2 ;
                  ro:hasProduct ex:Product1 .

# Idealized Reaction 2: Acetic acid -> Diethyl ether + Formate
ex:IdealReaction2 a ro:Reaction ;
                  ro:type "idealized" ;
                  ro:hasReactant ex:Reactant3 ;
                  ro:hasProduct ex:Product2 , ex:Product3 .

# Idealized Reaction 3: Ethanol -> Ethylene glycol
ex:IdealReaction3 a ro:Reaction ;
                  ro:type "idealized" ;
                  ro:hasReactant ex:Reactant1 ;
                  ro:hasProduct ex:Product4 .

# Experimental Reaction 1: Matches Idealized Reaction 1 exactly
ex:ExperimentReaction1 a ro:Reaction ;
                       ro:type "experimental" ;
                       ro:hasReactant ex:Reactant1 , ex:Reactant2 ;
                       ro:hasProduct ex:Product1 .

# Experimental Reaction 2: Matches Idealized Reaction 2 with an additional reactant (Water)
ex:ExperimentReaction2 a ro:Reaction ;
                       ro:type "experimental" ;
                       ro:hasReactant ex:Reactant3 , ex:Reactant4 ;  # Additional reactant (Water)
                       ro:hasProduct ex:Product2 , ex:Product3 .

# Experimental Reaction 3: More reactants but matches Idealized Reaction 1 in relevant reactants and products
ex:ExperimentReaction3 a ro:Reaction ;
                       ro:type "experimental" ;
                       ro:hasReactant ex:Reactant1 , ex:Reactant2 , ex:Reactant4 ;  # Additional reactant (Water)
                       ro:hasProduct ex:Product1 .

# Experimental Reaction 4: Correct reactants for Idealized Reaction 1 but incorrect product
ex:ExperimentReaction4 a ro:Reaction ;
                       ro:type "experimental" ;
                       ro:hasReactant ex:Reactant1 , ex:Reactant2 ;
                       ro:hasProduct ex:Product2 .  # Incorrect product

# Experimental Reaction 5: Matches Idealized Reaction 3 with an additional reactant (Oxygen)
ex:ExperimentReaction5 a ro:Reaction ;
                       ro:type "experimental" ;
                       ro:hasReactant ex:Reactant1 , ex:Reactant5 ;  # Additional reactant (Oxygen)
                       ro:hasProduct ex:Product4 .

# Experimental Reaction 6: Non-matching reaction with entirely different products
ex:ExperimentReaction6 a ro:Reaction ;
                       ro:type "experimental" ;
                       ro:hasReactant ex:Reactant2 , ex:Reactant4 ;  # Different set of reactants
                       ro:hasProduct ex:Product3 .  # Different product
"""



In [15]:
g = rdflib.Graph()
g.parse(data=data, format="turtle")

<Graph identifier=N0af77fc8e04448a69d910d5e82b3eb2b (<class 'rdflib.graph.Graph'>)>

In [16]:
knows_query = """
PREFIX cml: <http://www.xml-cml.org/schema/cml2/core#>
PREFIX ro: <http://www.example.org/reaction-ontology#>

INSERT {?experimentReaction a ?idealReaction}
WHERE {
  # Idealized Reaction
  ?idealReaction a ro:Reaction ;
                 ro:type "idealized" .

  {
    SELECT  ?idealReaction  (GROUP_CONCAT(DISTINCT ?idealReactantStructure ; separator=",") AS ?idealReactantsList) 
                            (GROUP_CONCAT(DISTINCT ?idealProductStructure ; separator=",") AS ?idealProductsList)
    WHERE {
      ?idealReaction ro:hasReactant ?idealReactant .
      ?idealReactant cml:chemicalStructure ?idealReactantStructure .
      
      ?idealReaction ro:hasProduct ?idealProduct .
      ?idealProduct cml:chemicalStructure ?idealProductStructure .
    }
    GROUP BY ?idealReaction
  }

  # Experimental Reaction
  ?experimentReaction a ro:Reaction ;
                      ro:type "experimental" .

  {
    SELECT ?experimentReaction (GROUP_CONCAT(DISTINCT ?experimentReactantStructure ; separator=",") AS ?experimentReactantsList) (GROUP_CONCAT(DISTINCT ?experimentProductStructure ; separator=",") AS ?experimentProductsList)
    WHERE {
      ?experimentReaction ro:hasReactant ?experimentReactant .
      ?experimentReactant cml:chemicalStructure ?experimentReactantStructure .
      
      ?experimentReaction ro:hasProduct ?experimentProduct .
      ?experimentProduct cml:chemicalStructure ?experimentProductStructure .
    }
    GROUP BY ?experimentReaction
  }

  # Compare the product lists exactly
  FILTER (STR(?idealProductsList) = STR(?experimentProductsList))
  
  # Ensure all ideal reactants are in the experimental reactants list
  FILTER NOT EXISTS {
    # Iterate over each ideal reactant
    ?idealReaction ro:hasReactant ?idealReactant .
    ?idealReactant cml:chemicalStructure ?idealReactantStructure .
    
    # Check if this ideal reactant is missing in the experimental reaction's reactants
    FILTER NOT EXISTS {
      ?experimentReaction ro:hasReactant ?experimentReactant .
      ?experimentReactant cml:chemicalStructure ?idealReactantStructure .
    }
  }
}
"""

In [19]:
knows_query = """
PREFIX cml: <http://www.xml-cml.org/schema/cml2/core#>
PREFIX ro: <http://www.example.org/reaction-ontology#>

SELECT  ?idealReaction  (GROUP_CONCAT(DISTINCT ?idealReactantStructure ; separator=",") AS ?idealReactantsList) 
                            (GROUP_CONCAT(DISTINCT ?idealProductStructure ; separator=",") AS ?idealProductsList)
    WHERE {
      ?idealReaction ro:hasReactant ?idealReactant .
      ?idealReactant cml:chemicalStructure ?idealReactantStructure .
      
      ?idealReaction ro:hasProduct ?idealProduct .
      ?idealProduct cml:chemicalStructure ?idealProductStructure .
    }
    GROUP BY ?idealReaction
"""


In [23]:
## SPARQL Query

for r in g.query(knows_query):
    print("Ideal Reaction: ", r["idealReaction"])
    print("Ideal Reactant List: ",r["idealReactantsList"])
    print("Product List:", r["idealProductsList"])

Ideal Reaction:  http://www.example.org/chemicals#IdealReaction1
Ideal Reactant List:  SMILES:CCO,SMILES:O=C=O
Product List: SMILES:CCOCC(=O)O
Ideal Reaction:  http://www.example.org/chemicals#ExperimentReaction1
Ideal Reactant List:  SMILES:CCO,SMILES:O=C=O
Product List: SMILES:CCOCC(=O)O
Ideal Reaction:  http://www.example.org/chemicals#ExperimentReaction3
Ideal Reactant List:  SMILES:CCO,SMILES:O=C=O,SMILES:H2O
Product List: SMILES:CCOCC(=O)O
Ideal Reaction:  http://www.example.org/chemicals#IdealReaction2
Ideal Reactant List:  SMILES:CC(=O)O
Product List: SMILES:CCOCC,SMILES:COO
Ideal Reaction:  http://www.example.org/chemicals#ExperimentReaction2
Ideal Reactant List:  SMILES:CC(=O)O,SMILES:H2O
Product List: SMILES:CCOCC,SMILES:COO
Ideal Reaction:  http://www.example.org/chemicals#ExperimentReaction4
Ideal Reactant List:  SMILES:CCO,SMILES:O=C=O
Product List: SMILES:CCOCC
Ideal Reaction:  http://www.example.org/chemicals#ExperimentReaction6
Ideal Reactant List:  SMILES:O=C=O,SMILES

In [17]:
qres = g.update(knows_query)
# for row in qres:
#     print(f"{row}")



In [18]:
graph_ttl = g.serialize(format='turtle')

print(graph_ttl)


@prefix cml: <http://www.xml-cml.org/schema/cml2/core#> .
@prefix ex: <http://www.example.org/chemicals#> .
@prefix ro: <http://www.example.org/reaction-ontology#> .

ex:ExperimentReaction1 a ex:IdealReaction1,
        ro:Reaction ;
    ro:hasProduct ex:Product1 ;
    ro:hasReactant ex:Reactant1,
        ex:Reactant2 ;
    ro:type "experimental" .

ex:ExperimentReaction2 a ro:Reaction ;
    ro:hasProduct ex:Product2,
        ex:Product3 ;
    ro:hasReactant ex:Reactant3,
        ex:Reactant4 ;
    ro:type "experimental" .

ex:ExperimentReaction3 a ex:IdealReaction1,
        ro:Reaction ;
    ro:hasProduct ex:Product1 ;
    ro:hasReactant ex:Reactant1,
        ex:Reactant2,
        ex:Reactant4 ;
    ro:type "experimental" .

ex:ExperimentReaction4 a ro:Reaction ;
    ro:hasProduct ex:Product2 ;
    ro:hasReactant ex:Reactant1,
        ex:Reactant2 ;
    ro:type "experimental" .

ex:ExperimentReaction5 a ex:IdealReaction3,
        ro:Reaction ;
    ro:hasProduct ex:Product4 ;
    ro:has

In [None]:
# saving results to file
result_output_filename = 'generated_outputs/result_1.ttl'
g.serialize(destination=result_output_filename, format='turtle')

In [None]:
# open SHACL shape file

shacl_shape_filename = "shacl_sparql_test_shape.ttl"

with open(shacl_shape_filename, "r") as file:
    known_query1 = file.read()


qres = g.query(known_query1)

for row1 in qres:
    print(f"Experimental Reaction: {row1.experimentalReaction}, Ideal Reaction: {row1.idealReaction}")
