# SHACL tutorial

This tutorial is based on the youtube webinar of Holger Knublauch  [SHACL Tutorial](https://www.youtube.com/watch?v=ccs-KhnWR1U)
    
For SKOS, see also
 [SKOS Tutorial](https://www.youtube.com/watch?v=3Q4v6vzj3Qk) by [DataFlair](https://www.youtube.com/channel/UCs6nmQZQ6pO1qG0e2fV6G7g).

and
 [SKOS Primer](https://www.w3.org/TR/skos-primer/) and the [SKOS Reference](https://www.w3.org/TR/skos-reference/).

In [None]:
! pip install pydotplus
! pip install graphviz

In [5]:
import rdflib

import matplotlib
import matplotlib.pyplot as plt

from pyshacl import Validator
from rdflib import Graph
# inline matplotlib plotting in jupyter notebook

import io
import pydotplus
from IPython.display import display, Image


# dot graph visualization

from rdflib.tools.rdf2dot import rdf2dot

# register rdf2dot plugin with rdflib

rdflib.plugin.register('dot', rdflib.tools.rdf2dot, 'RDF2DOT', 'RDF2DOT')

%matplotlib inline

In [2]:
# create a simple SKOS RDF graph (turtle format)

simple_skos_ttl_graph = """
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
@prefix owl: <http://www.w3.org/2002/07/owl#> .
@prefix sh: <http://www.w3.org/ns/shacl#> .
@prefix skos: <http://www.w3.org/2004/02/skos/core#> .
@prefix ex: <http://example.org/> .
@prefix shdemo: <http://w3id.org/oso/shacl-skos-demo-ontology/> .

<http://w3id.org/oso/shacl-skos-demo-ontology> rdf:type owl:Ontology .


# City --------------------------------------------------------------

shdemo:City
    a owl:Class ;
    a sh:NodeShape ;
    rdfs:label "City"@en ;
    rdfs:subClassOf skos:Concept;
    sh:property shdemo:City-Population;
    .
# property shapes for City
shdemo:City-Population
    a sh:PropertyShape ;
    sh:path shdemo:population ;
    sh:datatype xsd:integer ;
    sh:minCount 1 ;
    sh:name "population"@en ;
    .

    
## Country --------------------------------------------------------------
shdemo:Country
    a owl:Class ;
    a sh:NodeShape ;
    rdfs:label "Country"@en ;
    rdfs:subClassOf skos:Concept;
    sh:property shdemo:Country-isoCode;
    sh:property shdemo:Country-capital;
    .

# property shapes for Country
# ISO Code
shdemo:Country-isoCode
    a sh:PropertyShape ;
    sh:path shdemo:isoCode ;
    sh:datatype xsd:string ;
    sh:minLength 2 ;
    sh:maxLength 2 ;
    sh:regex "[A-Z]{2}" ;
    sh:name "ISO Code"@en ;
    .    

 # Capital
shdemo:Country-capital
    a sh:PropertyShape ;
    sh:path shdemo:capital ;
    sh:class shdemo:City ;
    sh:datatype xsd:string ;
    sh:maxCount 1 ;
    sh:name "Capital"@en ;
    .   
# Capital of Country (inverse)

shdemo:City-capitalOf
    a sh:PropertyShape ;
    sh:path [ 
        sh:inversePath shdemo:Country-capital ;
        ] ;
    sh:class shdemo:Country ;
    sh:name "capital of"@en ;
    .

"""


In [6]:
# s. for more info:  https://stackoverflow.com/questions/39274216/visualize-an-rdflib-graph-in-python

def visualize(g):
    stream = io.StringIO()
    rdf2dot(g, stream, opts = {display})
    dg = pydotplus.graph_from_dot_data(stream.getvalue())
    png = dg.create_png()
    display(Image(png))

In [None]:
g = Graph()
g.parse(data=simple_skos_ttl_graph, format="turtle")
g.serialize(format="turtle")

visualize(g)

In [None]:
## visualize the graph

g = Graph()
g.parse(data=simple_skos_ttl_graph, format="turtle")
g.serialize(format="turtle")

# visualize the graph

# dot = g.serialize(format='dot')
# with open("simple_skos_graph.dot", "wb") as f:
#     f.write(dot)


In [None]:
shape_graph = rdflib.Graph()
shape_graph.parse(data=simple_skos_ttl_graph, format="turtle")

In [10]:
# create a Dataset that fullfills the SHACL constraints by instantiating the classes and properties defined in the ontology

valid_dataset = """
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
@prefix owl: <http://www.w3.org/2002/07/owl#> .
@prefix sh: <http://www.w3.org/ns/shacl#> .
@prefix skos: <http://www.w3.org/2004/02/skos/core#> .
@prefix shdemo: <http://w3id.org/oso/shacl-skos-demo-ontology/> .

@prefix extax: <http://example.org/example-taxonomy> .


extax:Germany a shdemo:Country ;
    shdemo:isoCode "DE" ;
    shdemo:capital "Berlin" ;
    shdemo:population 3769495 ;
    .

extax:Berlin a shdemo:City ;
    shdemo:population 3769495 ;
    .

extax:France a shdemo:Country ;
    shdemo:isoCode "FR" ;
    shdemo:capital "Paris" ;
    shdemo:population 2140526 ;
    .

extax:Paris a shdemo:City ;
    shdemo:population 2140526 ;
    .

"""

In [11]:
# create a Dataset that violates the SHACL constraints

invalid_dataset = """
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
@prefix owl: <http://www.w3.org/2002/07/owl#> .
@prefix sh: <http://www.w3.org/ns/shacl#> .
@prefix skos: <http://www.w3.org/2004/02/skos/core#> .
@prefix shdemo: <http://w3id.org/oso/shacl-skos-demo-ontology/> .

@prefix extax: <http://example.org/example-taxonomy> .

extax:Germany a shdemo:Country ;
    shdemo:isoCode "DE3" ;
    shdemo:capital "Berlin" ;
    shdemo:population 0 ;
    .

extax:Berlin a shdemo:City ;
    shdemo:population 3769495 ;
    .

extax:France a shdemo:Country ;
    shdemo:isoCode "fr" ;
    shdemo:capital "Paris" ;
    shdemo:population 2140526 ;
    .

extax:Paris a shdemo:City ;
    shdemo:population 2140526 ;
    .

extax:Italy a shdemo:Country ;
    shdemo:isoCode "ITA" ;
    shdemo:capital "Rome" ;
    shdemo:capital "Paris" ;
    shdemo:population 2872800 ;
    .

extax:Rome a shdemo:City ;
    shdemo:population 0 ;
    .

"""


In [None]:
valid_graph = rdflib.Graph()
valid_graph.parse(data=valid_dataset, format="turtle")


In [None]:
# validate the valid dataset with pyshacl

v = Validator(valid_graph, shacl_graph=shape_graph,  allow_warnings=True ) # inference='rdfs', abort_on_error=False,

conforms, results_graph, results_text = v.run()
conforms, results_graph, results_text

In [None]:
invalid_graph = rdflib.Graph()
invalid_graph.parse(data=invalid_dataset, format="turtle")

In [None]:
# validate the invalid dataset with pyshacl

v = Validator(invalid_graph, shacl_graph=shape_graph, inference='rdfs', abort_on_error=False  )

conforms, results_graph, results_text = v.run()

conforms, results_graph, print(results_text)

## Further applications of the SHACL ontology

next to validation of RDF data, which was demonstrated above, SHACL can also be used for:

- automatic data generation
- data curation (suggestion of missing data or corrections)
- data transformation


## Property Groups

## Orders