# SHACL Advanced features and constraints explorer

This notebook is a tool to explore the SHACL advanced features and constraints. It is based on the [SHACL Playground](https://shacl.org/playground/) and the [SHACL documentation](https://www.w3.org/TR/shacl/).

For pyshacl, see the [pyshacl documentation](https://pyshacl.readthedocs.io/en/latest/) and the [pyshacl AF issue](https://github.com/RDFLib/pySHACL/issues/189).

In [14]:
import rdflib

import matplotlib
import matplotlib.pyplot as plt

from pyshacl import Validator
from rdflib import Graph
# inline matplotlib plotting in jupyter notebook

%matplotlib inline

## SHACL Advanced Features (AF) with pyshacl

In [None]:


g = Graph()

smts = """
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix C: <http://example.org/> .
@prefix ex: <http://example.org/> .

C:A rdfs:subClassOf C:B . 
ex:something a C:A .
"""

g.parse(data=smts)
v = Validator(g,
      inference='rdfs',
      advanced=True)

v.run()
print(v.target_graph.serialize(format='ttl'))

In [None]:
# target graph method

v = Validator(g, shacl_graph=myshapes, options={"advanced": True, "inference": "rdfs"})
conforms, report_graph, report_text = v.run()
expanded_graph = v.target_graph #<-- This gets the expanded data graph

In [None]:
# inplace modifier


v = Validator(g, shacl_graph=myshapes, options={"advanced": True, "inplace": True, "inference": "rdfs"})
conforms, report_graph, report_text = v.run()
g #<-- g is expanded inplace

In [None]:
g = rdflib.Graph()

test_ontology_filename = "shacl_sparql_test_data.ttl"

# g.parse(".\\Ontology_tests\\data_for_query3.ttl", format="turtle")
g.parse(test_ontology_filename, format="turtle")

# with open(".\\Ontology_tests\\knownQuery8.ttl", "r") as file:
#     known_query1 = file.read()

g

In [8]:
knows_query = """
PREFIX cml: <http://www.xml-cml.org/schema/cml2/core#>
PREFIX ro: <http://www.example.org/reaction-ontology#>

INSERT {?experimentReaction a ?idealReaction}
WHERE {
  # Idealized Reaction
  ?idealReaction a ro:Reaction ;
                 ro:type "idealized" .

  {
    SELECT  ?idealReaction  (GROUP_CONCAT(DISTINCT ?idealReactantStructure ; separator=",") AS ?idealReactantsList) 
                            (GROUP_CONCAT(DISTINCT ?idealProductStructure ; separator=",") AS ?idealProductsList)
    WHERE {
      ?idealReaction ro:hasReactant ?idealReactant .
      ?idealReactant cml:chemicalStructure ?idealReactantStructure .
      
      ?idealReaction ro:hasProduct ?idealProduct .
      ?idealProduct cml:chemicalStructure ?idealProductStructure .
    }
    GROUP BY ?idealReaction
  }

  # Experimental Reaction
  ?experimentReaction a ro:Reaction ;
                      ro:type "experimental" .

  {
    SELECT ?experimentReaction (GROUP_CONCAT(DISTINCT ?experimentReactantStructure ; separator=",") AS ?experimentReactantsList) (GROUP_CONCAT(DISTINCT ?experimentProductStructure ; separator=",") AS ?experimentProductsList)
    WHERE {
      ?experimentReaction ro:hasReactant ?experimentReactant .
      ?experimentReactant cml:chemicalStructure ?experimentReactantStructure .
      
      ?experimentReaction ro:hasProduct ?experimentProduct .
      ?experimentProduct cml:chemicalStructure ?experimentProductStructure .
    }
    GROUP BY ?experimentReaction
  }

  # Compare the product lists exactly
  FILTER (STR(?idealProductsList) = STR(?experimentProductsList))
  
  # Ensure all ideal reactants are in the experimental reactants list
  FILTER NOT EXISTS {
    # Iterate over each ideal reactant
    ?idealReaction ro:hasReactant ?idealReactant .
    ?idealReactant cml:chemicalStructure ?idealReactantStructure .
    
    # Check if this ideal reactant is missing in the experimental reaction's reactants
    FILTER NOT EXISTS {
      ?experimentReaction ro:hasReactant ?experimentReactant .
      ?experimentReactant cml:chemicalStructure ?idealReactantStructure .
    }
  }
}
"""

In [9]:
qres = g.update(knows_query)
# for row in qres:
#     print(f"{row}")



In [None]:
graph_ttl = g.serialize(format='turtle')

print(graph_ttl)

In [None]:
# saving results to file
result_output_filename = 'generated_outputs/result_1.ttl'
g.serialize(destination=result_output_filename, format='turtle')

In [None]:
# open SHACL shape file

shacl_shape_filename = "shacl_sparql_test_shape.ttl"

with open(shacl_shape_filename, "r") as file:
    known_query1 = file.read()


qres = g.query(known_query1)

for row1 in qres:
    print(f"Experimental Reaction: {row1.experimentalReaction}, Ideal Reaction: {row1.idealReaction}")
