<a href="https://colab.research.google.com/github/BowieSteutel/acc-nlp-firecodes/blob/main/1D_JSON_to_SHACL.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>



# **Module 1D - SHACL Alignment**



*For demonstration, this module uses the small subset (3 regulations) and custom simple regulations as regulatory information, and the correct use case model (processed by module 2) as building information.*

---
# **Prepare libraries**

In [15]:
# load standard libraries
import json # for the regulatory information

In [16]:
# pySHACL (for SHACL demonstration)
!pip install pyshacl --quiet
import pyshacl

# RDFLib (for SHACL demonstration)
!pip install rdflib --quiet
import rdflib
from rdflib import Graph, Literal, URIRef, Namespace
from rdflib import RDF, RDFS, XSD, SH, OWL

# **Prepare inputs & outputs**

In [17]:
# @title Change root directory (update after downloading)

root_directory = "/content/drive/MyDrive/FINAL_CODE_THESIS" #  @param {"type":"string", "placeholder":""}
import sys
from pathlib import Path
if 'google.colab' in sys.modules:
    from google.colab import drive
    drive.mount('/content/drive', force_remount=False)
    %cd {root_directory}

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
/content/drive/MyDrive/FINAL_CODE_THESIS


In [18]:
# @title Define filepaths
# path_hierarchy = "output/BBL_hier_elements.csv" # @param {type:"string", "placeholder":"path to hierarchy, relative to file_path (.csv)"}
path_subset = "output/BBL_subset.csv" # @param {type:"string", "placeholder":"path to subset, relative to file_path (.csv)"}
use_case_correct = "output/use_case_correct.ttl" # @param {type:"string", placeholder:"(.ttl)"}
reg_json_path = "output/regulatory_information_subset_small.json" # @param {type:"string", placeholder:"(.json)"}
ont_path = "input/custom_ontology.ttl" # @param {type:"string"}
output_shape_path = "output/shapes.ttl" # @param {type:"string", placeholder:"(.ttl)"}


---
## **Parse files**

In [19]:
# Load JSON file as dictionary
with open(reg_json_path) as json_file:
    reg_json = json.load(json_file)
reg_json

[{'INFO': {'reg_CURIE': 'bbl:C4_S4_2_P4_2_8_A4_50_SUB1',
   'label': 'Article 4.50(1)',
   'seeAlso': 'https://wetten.overheid.nl/BWBR0041297/2024-08-01#Hoofdstuk4_Afdeling4.2_Paragraaf4.2.8_Artikel4.50_Lid1',
   'text_en': 'A closed space is in a fire compartment.',
   'text_original': 'Een besloten ruimte ligt in een brandcompartiment.'},
  'TARGETS': {'element': 'bot:Space',
   'pset': 'pset:Other',
   'property': 'props:Isenclosed'},
  'CONSTRAINTS': {'containment_path': 'ex:locatedInCompartment',
   'containment_class': 'ex:FireCompartment'}},
 {'INFO': {'reg_CURIE': 'bbl:C4_S4_2_P4_2_8_A4_50_SUB3',
   'label': 'Article 4.50(3)',
   'seeAlso': 'https://wetten.overheid.nl/BWBR0041297/2024-08-01#Hoofdstuk4_Afdeling4.2_Paragraaf4.2.8_Artikel4.50_Lid3',
   'text_en': 'A road tunnel tube with a length of more than 250 m is in a fire compartment.',
   'text_original': 'Een wegtunnelbuis met een lengte van meer dan 250 m ligt in een brandcompartiment.'},
  'TARGETS': {'element': 'ifc:Ifc

In [20]:
# Parse data graphs (for demonstration)
data_graph_correct = Graph()
data_graph_correct.parse(use_case_correct, format="turtle")
print(data_graph_correct.serialize(format="turtle")[:1000])

@prefix beo: <https://w3id.org/beo#> .
@prefix bot: <https://w3id.org/bot#> .
@prefix ex: <https://example.org/ns#> .
@prefix ifc: <https://w3id.org/ifc/IFC4X3_ADD2#> .
@prefix inst: <https://linkedbuildingdata.net/ifc/resources20250531_181504/> .
@prefix props: <https://w3id.org/props#> .
@prefix pset: <https://example.org/pset#> .
@prefix qudt: <http://qudt.org/schema/shacl/qudt/> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix unit: <https://qudt.org/vocab/unit/> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

inst:FlowTerminal_33464 a bot:Element,
        ifc:IfcFlowTerminal ;
    rdfs:label "Plumbing_Fixtures_WC:540x360x410mm:367489"^^xsd:string ;
    ex:hasMaterial inst:Material_26888,
        inst:Material_33448 ;
    pset:Constraints [ rdfs:label "Constraints"^^xsd:string ;
            props:DefaultElevation [ qudt:NumericValue 0e+00 ;
                    qudt:hasUnit unit:M ] ;
            props:ElevationFromLevel [ qudt:NumericValue 0e+00 ;
           

In [21]:
# Parse ontology graph
ont_graph = Graph()
ont_graph.parse(ont_path, format="turtle")
print(ont_graph.serialize(format="turtle"))

@prefix bot: <https://w3id.org/bot#> .
@prefix ex: <https://example.org/ns#> .
@prefix owl: <http://www.w3.org/2002/07/owl#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .

ex:ProtectedSubFireCompartment a owl:Class ;
    rdfs:subClassOf ex:SubFireCompartment .

ex:adjacentCompartment a owl:ObjectProperty,
        owl:SymmetricProperty ;
    rdfs:comment "Only fire compartments of the same type can be considered adjacent" ;
    rdfs:domain ex:FireCompartment ;
    rdfs:range ex:FireCompartment ;
    owl:propertyDisjointWith ex:locatedInCompartment,
        ex:partOfCompartment .

ex:hasMaterial a owl:ObjectProperty ;
    rdfs:domain bot:Element ;
    rdfs:range ex:Material .

ex:locatedInCompartment a owl:ObjectProperty,
        owl:TransitiveProperty ;
    rdfs:comment "Subject cannot be subclass of object" ;
    rdfs:domain bot:Zone ;
    rdfs:range ex:FireCompartment ;
    owl:propertyDisjointWith ex:partOfCompartment .

ex:Material a owl:Class ;
    owl:disjointWith bot:

---
# **Prepare example demonstration**

(using the data graph from module 2 and the SHACL report reading function from module 3)

In [22]:
def read_SHACL_report(data_graph, shapes, ontology=None):
    # Prepare default namespaces + custom namespaces only used for the shapes graph
    l_default_namespaces = [
        f"@prefix rdf: <{RDF}> .",
        f"@prefix rdfs: <{RDFS}> .",
        f"@prefix xsd: <{XSD}> .",
        f"@prefix sh: <{SH}> .",
        f"@prefix owl: <{OWL}> .",
        "@prefix bbl: <https://example.org/BBL#> .",
    ]

    # add default namespaces to mapping dictionary
    default_namespace_map = {}
    for i in l_default_namespaces:
        default_namespace_map[i.split(":")[0].split("@prefix")[1].lstrip(" ")] = i.split("<")[1].split(">")[0]

    # find namespaces in data graph
    l_namespaces = []
    for i in data_graph.serialize(format="turtle").format("UTF-8").split('\n'):
        # check if current line contains a prefix
        if "@prefix" in i:
            l_namespaces.append(i)

    # add data graph namespaces to mapping dictionary
    namespace_map = {}
    for i in l_namespaces:
        namespace_map[i.split(":")[0].split("@prefix")[1].lstrip(" ")] = i.split("<")[1].split(">")[0]

    # check if namespaces from the data graph clash with the default namespaces (which should not happen)
    common_namespaces = set(default_namespace_map.keys()) & set(namespace_map.keys())  # Find overlapping prefixes
    mismatched_namespaces = [URL for URL in common_namespaces if default_namespace_map[URL] != namespace_map[URL]]  # Find mismatches

    if mismatched_namespaces:  #stops at error
        print("NAMESPACE CLASHES FOUND")
        for ns in mismatched_namespaces:
            print(f"@prefix {ns}: change {default_namespace_map[ns]} in data graph to {namespace_map[ns]}")
        return False
    else: # if no clashes are found, add merge the namespace dictionaries and continue
        l_namespaces = l_default_namespaces + l_namespaces
        namespace_map = {**default_namespace_map, **namespace_map}

    shapes_graph = '\n'.join(l_namespaces)+'\n\n'
    if type(shapes) == list: #for multiple shape inputs
        shapes_graph += '\n'.join(shapes)
    else: # for a single shape inpout
        shapes_graph += shapes

    # validate data graph with SHACL shapes
    if ontology: # if an ontology is given for inference
        conforms, report_graph, report_text = pyshacl.validate(
            data_graph,
            shacl_graph=shapes_graph,
            data_graph_format="ttl",
            shacl_graph_format="ttl",
            ont_graph=ontology,
            ont_graph_format="ttl",
            do_owl_imports=False, # allow for owl:imports in shapes graph or ontology graph
            inference="rdfs", # One of "rdfs", "owlrl", "both", "none", or None
            abort_on_first=False,
            allow_infos=False,
            allow_warnings=False,
            meta_shacl=False,
            advanced=True,
            js=False,
            debug=False,
            serialize_report_graph="ttl"
            )
    else: # if no ontology is given for inference
        conforms, report_graph, report_text = pyshacl.validate(
            data_graph,
            shacl_graph=shapes_graph,
            data_graph_format="ttl",
            shacl_graph_format="ttl",
            inference="none",
            abort_on_first=False,
            allow_infos=False,
            allow_warnings=False,
            meta_shacl=False,
            advanced=True,
            js=False,
            debug=False,
            serialize_report_graph="ttl"
            )


    # Parse report graph
    g = Graph()
    g.parse(report_graph, format='turtle')

    # Directly save SHACL report (optional)
    # g.serialize(destination=path+"output/compliance_report.ttl")

    # Make sure shapes_graph is still an RDF Graph
    if isinstance(shapes_graph, str):
        shapes_graph = Graph().parse(data=shapes_graph, format="turtle")

    # Enrich SHACL report with rdfs:label, rdfs:comment, rdfs:seeAlso
    for result in g.subjects(RDF.type, SH.ValidationResult):
        source_shape = g.value(result, SH.sourceShape)
        if source_shape:
            # Fetch metadata from the SHACL shapes graph
            for label in shapes_graph.objects(source_shape, RDFS.label):
                g.add((result, RDFS.label, label))
            for comment in shapes_graph.objects(source_shape, RDFS.comment):
                g.add((result, RDFS.comment, comment))
            for seeAlso in shapes_graph.objects(source_shape, RDFS.seeAlso):
                g.add((result, RDFS.seeAlso, seeAlso))

    # Directly save the enriched SHACL report (optional)
    # g.serialize(destination=path+"output/enriched_compliance_report.ttl")

    # Functions to convert the SHACL report into a dictionary
    def replace_with_prefix(uri, namespace_map):
        for prefix, full_uri in namespace_map.items():
            if uri.startswith(full_uri):
                return uri.replace(full_uri, f"{prefix}:")
        return uri  # Return the URI as is if no prefix matches

    def convert_result(r, g, namespace_map):
        if not g.value(r, SH.focusNode):
            return None
        #print(g.value(r, SH.focusNode))
        converted_results = g.value(r, SH.focusNode)
        converted_results = {
            "focusNode": str(g.value(r, SH.focusNode)),
            "value": str(g.value(r, SH.value)),
            "resultMessage": str(g.value(r, SH.resultMessage)),
            "resultSeverity": str(g.value(r, SH.resultSeverity)),
            "sourceConstraintComponent": str(g.value(r, SH.sourceConstraintComponent)),
            "sourceShape": str(g.value(r, SH.sourceShape)),
            #"group": str(g.value(r, SH.group)),
            # "label": str(g.value(r, RDFS.label)),
            # "comment": str(g.value(r, RDFS.comment)),
            # "seeAlso": str(g.value(r, RDFS.seeAlso)),
            # "name": str(g.value(r, SH.name)),
            # "path": str(g.value(r, SH.path)),
        }

        # Add RDFS info, if present
        if g.value(r, RDFS.label):
            converted_results["label"] = str(g.value(r, RDFS.label))
        if g.value(r, RDFS.comment):
            converted_results["comment"] = str(g.value(r, RDFS.comment))
        if g.value(r, RDFS.seeAlso):
            converted_results["seeAlso"] = str(g.value(r, RDFS.seeAlso))

        # replace URIs with namespaces, if present
        for i in converted_results:
            if converted_results[i] is not None:
                converted_results[i] = replace_with_prefix(converted_results[i], namespace_map)
        return converted_results



    # Iterate through the validation results in the graph
    validation_results = []
    result_dict = {}
    l_results = [] # to avoid duplicates
    for result in g.subjects(RDF.type, SH.ValidationResult):
        # filter out constraints from blank nodes (which would otherwise appear twice)
        if g.value(result, SH.sourceConstraintComponent) not in ["sh:OrConstraintComponent", "sh:AndConstraintComponent", "sh:NotConstraintComponent", "sh:XoneConstraintComponent"]:
            details = g.value(result, SH.detail)
            if details and details not in l_results:
                l_results.append(result)
                l_results.append(details)
                result_dict = convert_result(result, g, namespace_map)
                result_dict["details"] = convert_result(details, g, namespace_map)
                validation_results.append(result_dict)

            elif result not in l_results: # avoids duplicates
                l_results.append(result)
                result_dict = convert_result(result, g, namespace_map)
                validation_results.append(result_dict)
            result_dict = {}

    # function for splitting printed results after a certain amount of characters
    def wrap_result(text, n=130):
        words = text.split()
        result = []
        current_line = ""

        for word in words:
            if len(current_line) + len(word) + 1 <= n:
                current_line += (" " + word) if current_line else word
            else:
                result.append(current_line)
                current_line = word

        if current_line:
            result.append(current_line)

        return '\n'.join(result)

    # If no issues are found, return empty
    if len(validation_results) == 0:
        print("Model complies with regulations")
        return None, None
    # Return as textual report
    for result in validation_results:
            # get article name or source shape name
            try:
                label = result['label'] + " - "
            except:
                label = ""

            # try to find reference
            try:
                details = result['details']
                print(f'{result["resultSeverity"][3:].title()} for {result["focusNode"]}')# Violation details:")
                # If rdfs:comment is included, print this before the message
                try:
                    print(f'{label}"{wrap_result(result["comment"])}"')
                    print(f'{result["resultSeverity"][3:].title()} : {wrap_result(result["resultMessage"])}')
                except:
                    print(f'{label}{wrap_result(result["resultMessage"])}')

                # Print result message, with directly referenced values if present
                if details['value'] and wrap_result(details['resultMessage']).lower().startswith('value'):
                    print(f"{details['value']} {str(wrap_result(details['resultMessage']))[6:]}")
                else:
                    print(wrap_result(details['resultMessage']))


            except:
                # None
                print(f'{result["resultSeverity"][3:].title()} for {result["focusNode"]}')# Violation details:")
                # Get result message, with directly referenced values if present
                if result['value'] and wrap_result(result['resultMessage']).lower().startswith('value'):
                    resultmessage = f"{result['value']} {str(wrap_result(result['resultMessage']))[6:]}"
                else:
                    resultmessage = wrap_result(result['resultMessage'])

                # If rdfs:comment is included, print this before the message
                try:
                    print(f'{label}"{wrap_result(result["comment"])}"')
                    print(f'{result["resultSeverity"][3:].title()} : {resultmessage}')
                except:
                    print(f'{label}{resultmessage}')

            # Write reference, if defined
            try:
                print(f"(see also: {result['seeAlso']})")
                print()
            except:
                print()


    return g, validation_results #return both the TTL (standard) and dictionary (simplified and ready for further processing) version of the report


---
# **SHACL template examples**

Example of template possibilities included in the Proof of Concept (not an exhaustive list)

## Basic target & constraint examples

Basic regulation SHACL shape structure:

```
{reg_CURIE}
    a sh:NodeShape ;
    rdfs:label "Article X.XX/X" ;
    rdfs:seeAlso <{URL}#{FRAGMENT}> ;
    rdfs:comment "{REGULATION_TEXT}"@en ;
    sh:target [
        a sh:SPARQLTarget ;
        sh:select """
            SELECT DISTINCT ?this WHERE {
                {TARGET}
            }
        """ ;
    ] ;
    sh:property {reg_CURIE}_PROP1 ;
    sh:property {reg_CURIE}_PROP2 ;
    (...)

{reg_CURIE}_PROP1
    a sh:PropertyShape ;
    rdfs:label "Article X.XX/X" ;
    rdfs:seeAlso <{URL}#{FRAGMENT}> ;
    rdfs:comment "{REGULATION_TEXT}"@en ;
    {CONSTRAINTS}

{reg_CURIE}_PROP2
    a sh:PropertyShape ;
    rdfs:label "Article X.XX/X" ;
    rdfs:seeAlso <{URL}#{FRAGMENT}> ;
    rdfs:comment "{REGULATION_TEXT}"@en ;
    {CONSTRAINTS}

(...)
```



### Subject class check

**Subject class target template:**
```
{reg_CURIE}
    a sh:NodeShape ;
    sh:target [
        a sh:SPARQLTarget ;
        sh:select """
            SELECT DISTINCT ?this WHERE {
                ?this a {SUBJECT_CLASS} .
            }
        """ ;
    ] ;
```


**Subject class constraint template**
```
{reg_CURIE}_PROP1
    a sh:PropertyShape ;
    sh:path rdf:type ;
    sh:hasValue {SUBJECT_CLASS}
    ] .
```



**Example**

Here is a SHACL shape checking whether inst:Space_930 is a wall (which it isn't):


In [23]:
shape = '''

bbl:SHAPE
    a sh:NodeShape ;
    rdfs:label "Article X.XX/X" ;
    rdfs:seeAlso <http://example.org> ;
    rdfs:comment "A space is located in a sub-fire compartment"@en ;
    sh:targetNode inst:Space_930 ;
    sh:property bbl:SHAPE_PROP1 .

bbl:SHAPE_PROP1
    a sh:PropertyShape ;
    rdfs:label "Article X.XX/X" ;
    rdfs:seeAlso <http://example.org> ;
    rdfs:comment "A space is located in a sub-fire compartment"@en ;
    sh:path rdf:type ;
    sh:hasValue bot:Wall .
'''

report_graph, report_dict = read_SHACL_report(data_graph_correct, shape, ont_graph)

Violation for inst:Space_930
Article X.XX/X - "A space is located in a sub-fire compartment"
Violation : Node inst:Space_930->rdf:type does not contain a value in the set: ['bot:Wall']
(see also: http://example.org)



### Object class check (existential)

(e.g. for topology)

**Object class target template**

```
{reg_CURIE}
    a sh:NodeShape ;
    sh:target [
        a sh:SPARQLTarget ;
        sh:select """
            SELECT DISTINCT ?this WHERE {
                ?this a {SUJECT_CLASS} .
                ?this {PATH} ?X .
                ?X a {OBJECT_CLASS} .
            }
            GROUPBY ?this
            HAVING (COUNT(?X) >= {CARDINALITY_MIN})
            HAVING (COUNT(?X) <= {CARDINALITY_MAX})
        """ ;
    ] ;

```


**Object class constraint template**

```
{reg_CURIE}_PROP1
    a sh:PropertyShape ;
    sh:path {PATH} ;
    sh:class {OBJECT_CLASS} ;
    sh:minCount {CARDINALITY_MIN} ;
    sh:maxCount {CARDINALITY_MAX} ;
```

**Constraint example**

In [24]:
shape = '''

bbl:SHAPE
    a sh:NodeShape ;
    rdfs:label "Article X.XX/X" ;
    rdfs:seeAlso <http://example.org> ;
    rdfs:comment "A space is located in a sub-fire compartment"@en ;
    sh:target [
        a sh:SPARQLTarget ;
        sh:select """
            SELECT DISTINCT ?this WHERE {
                ?this a bot:Space .
            }
        """ ;
    ] ;
    sh:property bbl:SHAPE_PROP1 .

bbl:SHAPE_PROP1
    a sh:PropertyShape ;
    rdfs:label "Article X.XX/X" ;
    rdfs:seeAlso <http://example.org> ;
    rdfs:comment "A space is located in a sub-fire compartment"@en ;
    sh:path ex:locatedInCompartment ;
    sh:class ex:SubFireCompartment ;
    sh:minCount 1 .
'''

report_graph, report_dict = read_SHACL_report(data_graph_correct, shape, ont_graph)

Model complies with regulations


### Direct object property check (incomplete)

(e.g. for topology)

WIP

### Object class nested check (incomplete)

(e.g. for materials)

Nested check based on an object of a specific class related to the subject:

**Object property target template**

```
{reg_CURIE}
    a sh:NodeShape ;
    sh:target [
        a sh:SPARQLTarget ;
        sh:select """
            SELECT DISTINCT ?this WHERE {
                ?this a {SUJECT_CLASS} .
                ?this {PATH} ?X .
                ?X a {OBJECT_CLASS} .
                ?X ...
            }
            GROUPBY ?this
            HAVING (COUNT(?X) >= {CARDINALITY_MIN})
            HAVING (COUNT(?X) <= {CARDINALITY_MAX})
        """ ;
    ] ;

```


**Object class constraint template**

```
{reg_CURIE}_PROP1
    a sh:PropertyShape ;
    sh:path {PATH} ;
    sh:class {OBJECT_CLASS} ;
    sh:node [
        sh:property [
          ...
    ] ;
```

**Constraint example**

In [25]:
shape = '''

bbl:SHAPE
    a sh:NodeShape ;
    rdfs:label "Article X.XX/X" ;
    rdfs:seeAlso <http://example.org> ;
    rdfs:comment "A space is located in a sub-fire compartment" ;
    sh:target [
        a sh:SPARQLTarget ;
        sh:select """
            SELECT DISTINCT ?this WHERE {
                ?this a bot:Space .
            }
        """ ;
    ] ;
    sh:property bbl:SHAPE_PROP1 .

bbl:SHAPE_PROP1
    a sh:PropertyShape ;
    rdfs:label "Article X.XX/X" ;
    rdfs:seeAlso <http://example.org> ;
    rdfs:comment "A space is located in a sub-fire compartment" ;
    sh:path ex:locatedInCompartment ;
    sh:class ex:SubFireCompartment ;
    sh:minCount 1 .
'''

report_graph, report_dict = read_SHACL_report(data_graph_correct, shape, ont_graph)

Model complies with regulations


### Quantity check (exact)

**Quantity target template**

Units will not used in SPARQL queries since they might result in false positives if the units are incorrect

---

**CHECK IF THERE IS ALSO A UNIT PRESENT. IF SO, THE VALUE IS IN A BLANK NODE:**

```
{reg_CURIE}
    a sh:NodeShape ;
    sh:target [
        a sh:SPARQLTarget ;
        sh:select """
            SELECT DISTINCT ?this WHERE {
                ?this a {OBJECT_CLASS} .
                ?this {PSET} ?pset .
                ?pset {PROPERTY} ?prop .
                ?prop qudt:NumericValue ?numvalue .
                FILTER (?numvalue {COMPARISON} {VALUE})
            }
            GROUPBY ?this
            HAVING (COUNT(?prop) = 1)
        """ ;
    ] ;

```

**ELSE:**


```
    sh:target [
        a sh:SPARQLTarget ;
        sh:select """
            SELECT DISTINCT ?this WHERE {
                ?this a {OBJECT_CLASS} .
                ?this {PSET} ?pset .
                ?pset {PROPERTY} ?prop .
                FILTER (?prop {COMPARISON} {VALUE})
            }
            GROUPBY ?this
            HAVING (COUNT(?prop) = 1)
        """ ;
    ] ;

```


**Quantity target example**

In [26]:
shape = '''
bbl:SHAPE
    a sh:NodeShape ;
    rdfs:label "Article X.XX/X" ;
    rdfs:seeAlso <http://example.org> ;
    rdfs:comment "Walls with a length of at least 11 m should have a maximum area of 30 m." ;
    sh:target [
        a sh:SPARQLTarget ;
        sh:select """
            SELECT DISTINCT ?this WHERE {
                ?this a beo:Wall .
                ?this pset:Dimensions ?pset .
                ?pset props:Length ?prop .
                ?prop qudt:NumericValue ?numvalue .
                FILTER (?numvalue >= 11)
            }
        """ ;
    ] ;
    sh:property bbl:SHAPE_PROP1 ;
    sh:property bbl:SHAPE_PROP2 .


bbl:SHAPE_PROP1
    a sh:PropertyShape ;
    rdfs:label "Article X.XX/X" ;
    rdfs:seeAlso <http://example.org> ;
    rdfs:comment "Walls with a length of at least 11 m should have a maximum area of 30 m." ;
    sh:path ( pset:Dimensions props:Area qudt:NumericValue ) ;
    sh:maxInclusive 30 ;
    sh:minCount 1 .

bbl:SHAPE_PROP2
    a sh:PropertyShape ;
    rdfs:label "Article X.XX/X" ;
    rdfs:seeAlso <http://example.org> ;
    rdfs:comment "Walls with a length of at least 11 m should have a maximum area of 30 m." ;
    sh:path ( pset:Dimensions props:Area qudt:hasUnit ) ;
    sh:hasValue unit:M2 ;
    sh:minCount 1 .
'''

report_graph, report_dict = read_SHACL_report(data_graph_correct, shape, ont_graph)

Violation for inst:Wall_1514
Article X.XX/X - "Walls with a length of at least 11 m should have a maximum area of 30 m."
Violation : 34.6783 is not <= Literal("30", datatype=xsd:integer)
(see also: http://example.org)

Violation for inst:Wall_1546
Article X.XX/X - "Walls with a length of at least 11 m should have a maximum area of 30 m."
Violation : 36.3073 is not <= Literal("30", datatype=xsd:integer)
(see also: http://example.org)



Example of false positive where no violations are shown due to different units used:

In [27]:
shape = '''
bbl:SHAPE
    a sh:NodeShape ;
    rdfs:label "Article X.XX/X" ;
    rdfs:seeAlso <http://example.org> ;
    rdfs:comment "Walls with a length of at least 11 m should have a maximum area of 30 m."@en ;
    sh:target [
        a sh:SPARQLTarget ;
        sh:select """
            SELECT DISTINCT ?this WHERE {
                ?this a beo:Wall .
                ?this pset:Dimensions ?pset .
                ?pset props:Length ?prop .
                ?prop qudt:NumericValue ?numvalue .
                ?prop qudt:hasUnit ?unit .
                FILTER (?numvalue >= 11)
                FILTER (?unit = unit:MilliM)
            }
        """ ;
    ] ;
    sh:property bbl:SHAPE_PROP1 ;
    sh:property bbl:SHAPE_PROP2 .


bbl:SHAPE_PROP1
    a sh:PropertyShape ;
    rdfs:label "Article X.XX/X" ;
    rdfs:seeAlso <http://example.org> ;
    rdfs:comment "Walls with a length of at least 11 m should have a maximum area of 30 m."@en ;
    sh:path ( pset:Dimensions props:Area qudt:NumericValue ) ;
    sh:maxInclusive 30 ;
    sh:minCount 1 ;
    sh:maxCount 1 .

bbl:SHAPE_PROP2
    a sh:PropertyShape ;
    rdfs:label "Article X.XX/X" ;
    rdfs:seeAlso <http://example.org> ;
    rdfs:comment "Walls with a length of at least 11 m should have a maximum area of 30 m."@en ;
    sh:path ( pset:Dimensions props:Length qudt:hasUnit ) ;
    sh:hasValue unit:M ;
    sh:minCount 1 ;
    sh:maxCount 1 .
'''

report_graph, report_dict = read_SHACL_report(data_graph_correct, shape, ont_graph)

Model complies with regulations


NOTE: false positives are still not completely eliminated but they will occur less using the first method

**Quantity constraint template**

---

**CHECK IF THERE IS ALSO A UNIT PRESENT. IF SO, THE VALUE IS IN A BLANK NODE:**
```
{reg_CURIE}_PROP1
    a sh:PropertyShape ;
    sh:path ( {PSET} {PROPERTY} qudt:NumericValue ) ;
    {COMPARISON} {VALUE} ;
    sh:minCount 1 ;
    sh:maxCount 1 .

{reg_CURIE}_PROP2
    a sh:PropertyShape ;
    sh:path ( {PSET} {PROPERTY} qudt:hasUnit ) ;
    sh:hasValue unit:{UNIT} ;
    sh:minCount 1 ;
    sh:maxCount 1 .
'''
```

**FOR UNITLESS VALUES:**
```
{reg_CURIE}_PROP1
    a sh:PropertyShape ;
    sh:path ( {PSET} {PROPERTY} ) ;
    {COMPARISON} {VALUE} ;
    sh:minCount 1 ;
    sh:maxCount 1 .
'''
```

**Example**

In [28]:
shape = '''
bbl:SHAPE
    a sh:NodeShape ;
    rdfs:label "Article X.XX/X" ;
    rdfs:seeAlso <http://example.org> ;
    rdfs:comment "Walls should have a length of at least 4 m."@en ;
    sh:target [
        a sh:SPARQLTarget ;
        sh:select """
            SELECT DISTINCT ?this WHERE {
                ?this a beo:Wall .
            }
        """ ;
    ] ;
    sh:property bbl:SHAPE_PROP1 ;
    sh:property bbl:SHAPE_PROP2 .


bbl:SHAPE_PROP1
    a sh:PropertyShape ;
    rdfs:label "Article X.XX/X" ;
    rdfs:seeAlso <http://example.org> ;
    rdfs:comment "Walls should have a length of at least 4 m."@en ;
    sh:path ( pset:Dimensions props:Length qudt:NumericValue ) ;
    sh:minInclusive 4 ;
    sh:minCount 1 ;
    sh:maxCount 1 .

bbl:SHAPE_PROP2
    a sh:PropertyShape ;
    rdfs:label "Article X.XX/X" ;
    rdfs:seeAlso <http://example.org> ;
    rdfs:comment "Walls should have a length of at least 4 m."@en ;
    sh:path ( pset:Dimensions props:Length qudt:hasUnit ) ;
    sh:hasValue unit:M ;
    sh:minCount 1 ;
    sh:maxCount 1 .
'''

report_graph, report_dict = read_SHACL_report(data_graph_correct, shape, ont_graph)

Violation for inst:Wall_10934
Article X.XX/X - "Walls should have a length of at least 4 m."
Violation : 3.61 is not >= Literal("4", datatype=xsd:integer)
(see also: http://example.org)

Violation for inst:Wall_1786
Article X.XX/X - "Walls should have a length of at least 4 m."
Violation : 3.61 is not >= Literal("4", datatype=xsd:integer)
(see also: http://example.org)

Violation for inst:Wall_10902
Article X.XX/X - "Walls should have a length of at least 4 m."
Violation : 3.61 is not >= Literal("4", datatype=xsd:integer)
(see also: http://example.org)



Alternatively, `sh:qualifiedValueShape` could be used:

```
bbl:SHAPE_PROP2
    a sh:PropertyShape ;
    rdfs:label "Article X.XX/X" ;
    rdfs:seeAlso <http://example.org> ;
    rdfs:comment "Walls should have a length of at least 4 m."@en ;
    sh:path ( pset:Dimensions props:Length qudt:hasUnit ) ;
    sh:hasValue unit:M ;
    sh:minCount 1 ;
    sh:maxCount 1 .
```

### Quality check (single value)

**Quality target template**

```
{reg_CURIE}
    a sh:NodeShape ;
    sh:target [
        a sh:SPARQLTarget ;
        sh:select """
            SELECT DISTINCT ?this WHERE {
                ?this a {OBJECT_CLASS} .
                ?this {PSET} ?pset .
                ?pset {PROPERTY} ?prop .
                FILTER (?prop = {VALUE})
            }
            GROUPBY ?this
            HAVING (COUNT(?2) >= {CARDINALITY_MIN})
            HAVING (COUNT(?2) <= {CARDINALITY_MAX})
        """ ;
    ] ;

```


**Quality constraint Template**

```
{reg_CURIE}_PROP1
    a sh:PropertyShape ;
    sh:path ( {PSET} {PROPERTY} qudt:NumericValue ) ;
    sh:hasValue {VALUE} ;
    sh:minCount {CARDINALITY_MIN} ;
    sh:maxCount {CARDINALITY_MAX} ;

```


**Constraint example**

In [29]:
shape = '''
bbl:SHAPE
    a sh:NodeShape ;
    rdfs:label "Article X.XX/X" ;
    rdfs:seeAlso <http://example.org> ;
    rdfs:comment "Walls should have a length of at least 6 m."@en ;
    sh:target [
        a sh:SPARQLTarget ;
        sh:select """
            SELECT DISTINCT ?this WHERE {
                ?this a beo:Wall .
            }
        """ ;
    ] ;
    sh:property bbl:SHAPE_PROP1 ;
    sh:property bbl:SHAPE_PROP2 .


bbl:SHAPE_PROP1
    a sh:PropertyShape ;
    rdfs:label "Article X.XX/X" ;
    rdfs:seeAlso <http://example.org> ;
    rdfs:comment "Walls should have a length of at least 6 m."@en ;
    sh:path ( pset:Dimensions props:Length qudt:NumericValue ) ;
    sh:minInclusive 6 ;
    sh:minCount 1 .

bbl:SHAPE_PROP2
    a sh:PropertyShape ;
    rdfs:label "Article X.XX/X" ;
    rdfs:seeAlso <http://example.org> ;
    rdfs:comment "Walls should have a length of at least 6 m."@en ;
    sh:path ( pset:Dimensions props:Length qudt:hasUnit ) ;
    sh:hasValue unit:M ;
    sh:minCount 1 .
'''

report_graph, report_dict = read_SHACL_report(data_graph_correct, shape)

Violation for inst:Wall_1786
Article X.XX/X - "Walls should have a length of at least 6 m."
Violation : 3.61 is not >= Literal("6", datatype=xsd:integer)
(see also: http://example.org)

Violation for inst:Wall_1727
Article X.XX/X - "Walls should have a length of at least 6 m."
Violation : 4.99 is not >= Literal("6", datatype=xsd:integer)
(see also: http://example.org)

Violation for inst:Wall_10934
Article X.XX/X - "Walls should have a length of at least 6 m."
Violation : 3.61 is not >= Literal("6", datatype=xsd:integer)
(see also: http://example.org)

Violation for inst:Wall_10902
Article X.XX/X - "Walls should have a length of at least 6 m."
Violation : 3.61 is not >= Literal("6", datatype=xsd:integer)
(see also: http://example.org)



### Quality check (list)

using `FILTER ( IN ( X, Y, Z) )` for targets and `sh:in ( X Y Z )` for constraints

*sh:in specifies exact literals!*

### Material check

**Subject class target template:**
```
{reg_CURIE}
    a sh:NodeShape ;
    sh:target [
        a sh:SPARQLTarget ;
        sh:select """
            SELECT DISTINCT ?this WHERE {
                ?this a {OBJECT_CLASS}
                ?this ex:hasMaterial ?material .
                ?material {MATERIAL_PSET} ?mat_pset .
                ?pset {MATERIAL_PROPERTY} ?mat_prop .
                FILTER (?cat = "{MATERIAL}"^^xsd:string)
            }
        """ ;
    ] ;
```


**Subject class constraint template**
```
{reg_CURIE}_PROP1
    a sh:PropertyShape ;
    sh:path ( ex:hasMaterial {MATERIAL_PSET} {MATERIAL_PROPERTY} ) ;
    sh:hasValue "{MATERIAL}"^^xsd:string ;
    sh:minCount 1 ;
    ] .
```



**Example**

Here is a SHACL shape checking whether doors are made of steel (which they are not), and one checking whether they are made of wood (which they are):


In [30]:
shape = '''
bbl:SHAPE
  a sh:NodeShape ;
  sh:targetClass beo:Door ;
  sh:property [
    sh:path (ex:hasMaterial pset:Identity props:Category ) ;
    sh:hasValue "Glass"^^xsd:string ;
    sh:MinCount 1 ;
    ] .
'''
report_graph, report_dict = read_SHACL_report(data_graph_correct, shape, ont_graph)

shape = '''
bbl:SHAPE
  a sh:NodeShape ;
  sh:targetClass beo:Door ;
  sh:property [
    sh:path (ex:hasMaterial pset:Identity props:Category ) ;
    sh:hasValue "Wood"^^xsd:string ;
    sh:MinCount 1 ;
    ] .
'''
report_graph, report_dict = read_SHACL_report(data_graph_correct, shape, ont_graph)

Violation for inst:Door_9127
Node inst:Door_9127->( ex:hasMaterial pset:Identity props:Category ) does not contain a value in the set: ['Literal("Glass",
datatype=xsd:string)']

Violation for inst:Door_39685
Node inst:Door_39685->( ex:hasMaterial pset:Identity props:Category ) does not contain a value in the set: ['Literal("Glass",
datatype=xsd:string)']

Violation for inst:Door_25587
Node inst:Door_25587->( ex:hasMaterial pset:Identity props:Category ) does not contain a value in the set: ['Literal("Glass",
datatype=xsd:string)']

Violation for inst:Door_18246
Node inst:Door_18246->( ex:hasMaterial pset:Identity props:Category ) does not contain a value in the set: ['Literal("Glass",
datatype=xsd:string)']

Model complies with regulations


### Datatype check

**Datatype target template**

```
{reg_CURIE}
    a sh:NodeShape ;
    sh:target [
        a sh:SPARQLTarget ;
        sh:select """
            SELECT DISTINCT ?this WHERE {
                (...)
                FILTER ( datatype(?X) = {datatype} )
                FILTER ( !isLiteral(?X) ) # for typeless values
            }
        """ ;
    ] ;
```

**Datatype constraint template**

```
{reg_CURIE}_PROP1
    a sh:PropertyShape ;
      sh:path {PATH} ;
      sh:dataType {datatype} ;
      sh:nodeKind sh:Literal ; #for literals
```

### Cardinality check

**Target cardinality template**


Cardinality not used in SPARQL queries since this might result in false positives if the data graph is not well-formed

```
{reg_CURIE}
    a sh:NodeShape ;
    sh:target [
        a sh:SPARQLTarget ;
        sh:select """
            SELECT DISTINCT ?this WHERE {
                (...)
            }
            GROUPBY ?this
            HAVING (COUNT(?X) >= {CARDINALITY_MIN})
            HAVING (COUNT(?X) <= {CARDINALITY_MAX})
        """ ;
    ] ;

```


**Constraint cardinality Template**

```
    sh:path {PATH}
    sh:minCount {CARDINALITY_MIN} ;
    sh:maxCount {CARDINALITY_MAX} ;

```


## Clause logic templates (targets)

### Applicability (UNION)


**UNION=AND/OR (inclusive) for target classes, and UNION=OR (inclusive) for other parts of target!**

Requires an extra nested shape to keep information in report, with target in parent shape


```
{reg_CURIE}
    a sh:NodeShape ;
    sh:target [
            a sh:SPARQLTarget ;
            sh:select """
                SELECT DISTINCT ?this WHERE {
                    { ?this a {CLASS_1} . }
                    UNION
                    { ?this a {CLASS_2} . }
                    UNION
                    { ?this a {CLASS_3} . }
                }
            """ ;
        ] ;
```

**EXAMPLE**

In [31]:
shape_UNION = '''
bbl:UNION
    a sh:NodeShape ;
    rdfs:label "Article X.XX/X" ;
    rdfs:seeAlso <http://example.org> ;
    rdfs:comment "All windows and walls should have an area of at least 3 m2"@en ;
    sh:target [
        a sh:SPARQLTarget ;
        sh:select """
            SELECT DISTINCT ?this WHERE {
                { ?this a beo:Wall . }
                UNION { ?this a beo:Window . }
            }
        """ ;
    ] ;
    sh:property bbl:UNION_PROP1 ;
    sh:property bbl:UNION_PROP2 .

bbl:UNION_PROP1
    a sh:PropertyShape ;
    rdfs:label "Article X.XX/X" ;
    rdfs:seeAlso <http://example.org> ;
    rdfs:comment "All windows and walls should have an area of at least 3 m2"@en ;
    sh:path ( pset:Dimensions props:Area qudt:NumericValue ) ;
    sh:minInclusive 3 ;
    sh:minCount 1 .

bbl:UNION_PROP2
    a sh:PropertyShape ;
    rdfs:label "Article X.XX/X" ;
    rdfs:seeAlso <http://example.org> ;
    rdfs:comment "All windows and walls should have an area of at least 3 m2"@en ;
    sh:path ( pset:Dimensions props:Area qudt:hasUnit ) ;
    sh:hasValue unit:M2 ;
    sh:minCount 1 .
'''

report_graph, report_dict = read_SHACL_report(data_graph_correct, shape_UNION, ont_graph)

Violation for inst:Window_10033
Article X.XX/X - "All windows and walls should have an area of at least 3 m2"
Violation : 1.995144 is not >= Literal("3", datatype=xsd:integer)
(see also: http://example.org)

Violation for inst:Window_10101
Article X.XX/X - "All windows and walls should have an area of at least 3 m2"
Violation : 1.995144 is not >= Literal("3", datatype=xsd:integer)
(see also: http://example.org)

Violation for inst:Window_10125
Article X.XX/X - "All windows and walls should have an area of at least 3 m2"
Violation : 1.995144 is not >= Literal("3", datatype=xsd:integer)
(see also: http://example.org)



### Exception (MINUS)

Same as above, but with MINUS instead of UNION:
```
{reg_CURIE}
    a sh:NodeShape ;
    sh:target [
            a sh:SPARQLTarget ;
            sh:select """
                SELECT DISTINCT ?this WHERE {
                    { ?this a {CLASS_1} . }
                    MINUS
                    { ?this a {CLASS_2} . }
                    MINUS
                    { ?this a {CLASS_3} . }
                }
            """ ;
        ] ;
```

In [32]:
shape_MINUS = '''

bbl:MINUS
    a sh:NodeShape ;
    rdfs:label "Article X.XX/X" ;
    rdfs:seeAlso <http://example.org> ;
    rdfs:comment "All elements except for walls, doors, plates and windows should always be internal."@en ;
    sh:target [
        a sh:SPARQLTarget ;
        sh:select """
            SELECT DISTINCT ?this WHERE {
                { ?this a bot:Element . }
                MINUS
                { ?this a beo:Wall . }
                MINUS
                { ?this a beo:Door . }
                MINUS
                { ?this a beo:Plate . }
                MINUS
                { ?this a beo:Window . }
            }
        """ ;
    ] ;
    sh:property bbl:MINUS_PROP1 .

bbl:MINUS_PROP1
    a sh:PropertyShape ;
    rdfs:label "Article X.XX/X" ;
    rdfs:seeAlso <http://example.org> ;
    rdfs:comment "All elements except for walls, doors, plates and windows should always be internal."@en ;
    sh:path ( pset:Other props:Isexternal ) ;
    sh:hasValue false ;
    sh:minCount 1 .

'''

report_graph, report_dict = read_SHACL_report(data_graph_correct, shape_MINUS, ont_graph)

Violation for inst:Furniture_45580
Article X.XX/X - "All elements except for walls, doors, plates and windows should always be internal."
Violation : Less than 1 values on inst:Furniture_45580->( pset:Other props:Isexternal )
(see also: http://example.org)

Violation for inst:Slab_1153
Article X.XX/X - "All elements except for walls, doors, plates and windows should always be internal."
Violation : Less than 1 values on inst:Slab_1153->( pset:Other props:Isexternal )
(see also: http://example.org)

Violation for inst:Furniture_29891
Article X.XX/X - "All elements except for walls, doors, plates and windows should always be internal."
Violation : Less than 1 values on inst:Furniture_29891->( pset:Other props:Isexternal )
(see also: http://example.org)

Violation for inst:Furniture_26390
Article X.XX/X - "All elements except for walls, doors, plates and windows should always be internal."
Violation : Node inst:Furniture_26390->( pset:Other props:Isexternal ) does not contain a value in t

**EXAMPLE:** All check length of all elements except for doors and windows

### INTERSECT?

Left out, since sometimes "and" is in sentence, but "or" is meant.

**Alternatively: use INTERSECT for every "and" in target except when referring to object class?**

What to do for a case like:

"A wall with a minimum height of 2m and a minimum length of 5m ..."

## Conditional clause templates


Conditional clauses:

1.  **(EL)IF → THEN:** current target (MINUS previous targets) → constraints
1.  **ELSE: (not implemented yet)** all base class targets MINUS all other targets → constraints

ELSE only applies when there is  no explicit target for a conditional clause in JSON file?


Negative conditions, such as "Unless (target), (constraints)", are converted as: base target MINUS target (= exception) → constraints

### IF-THEN

This can be done with the regular target+contraint shape:

In [33]:
shape_IFTHEN = '''
bbl:SHAPE_IFTHEN
    a sh:NodeShape ;
    rdfs:label "Article X.XX" ;
    rdfs:seeAlso <http://example.org> ;
    rdfs:comment "If a wall is external, it should have a length of at least 12 m."@en ;
    sh:target [
        a sh:SPARQLTarget ;
        sh:select """
            SELECT DISTINCT ?this WHERE {
                ?this a beo:Wall .
                ?this pset:Common ?pset .
                ?pset props:Isexternal ?prop.
                FILTER (?prop = True)
            }
        """ ;
    ] ;
    sh:property bbl:SHAPE_IFTHEN_PROP1 .

bbl:SHAPE_IFTHEN_PROP1
    rdfs:label "Article X.XX" ;
    rdfs:seeAlso <http://example.org> ;
    rdfs:comment "If a wall is external, it should have a length of at least 12 m."@en ;
    a sh:PropertyShape ;
    sh:path ( pset:Dimensions props:Length qudt:NumericValue ) ;
    sh:minInclusive 12 ;
    sh:minCount 1 .


'''
report_graph, report_dict = read_SHACL_report(data_graph_correct, shape_IFTHEN, ont_graph)

Violation for inst:Wall_1402
Article X.XX - "If a wall is external, it should have a length of at least 12 m."
Violation : 10.0 is not >= Literal("12", datatype=xsd:integer)
(see also: http://example.org)



Also not implemented: nested targets.

For example:

"... applies to:

a. (target)

b. (target), if (condition)"



### IF-THEN-ELIF-THEN

Even when else is not present, it should be included as an empty node in `sh:or` to not return false negatives?

In [34]:
shape_IFTHENELSE = '''
bbl:SHAPE_IFTHENELIF
    a sh:NodeShape ;
    rdfs:label "Article X.XX" ;
    rdfs:comment "If a wall is external, it should have a length of at least 12 m. Else, if a wall has an area of at least 14m, it should have a height of at least 5m."@en ;
    rdfs:seeAlso <http://example.org> ;
    sh:and (
      [
          sh:target [
              a sh:SPARQLTarget ;
              sh:select """
                  SELECT DISTINCT ?this WHERE {
                      ?this a beo:Wall .
                      ?this pset:Common ?1 .
                      FILTER (?2 = True)
                      ?1 props:Isexternal ?2.
                  }
              """ ;
          ] ;
          sh:property bbl:SHAPE_IFTHENELIF_PROP1 ;
      ] [
          sh:target [
              a sh:SPARQLTarget ;
              sh:select """
                  SELECT DISTINCT ?this WHERE {
                      { ?this a beo:Wall .
                      ?this pset:Dimensions ?pset .
                      ?pset props:Area ?prop.
                      ?prop qudt:NumericValue ?numvalue.
                      FILTER (?numvalue >= 14) }
                      MINUS
                      { ?this a beo:Wall .
                      ?this pset:Common ?pset .
                      ?pset props:Isexternal ?prop .
                      FILTER (?prop = True) }
                  }
              """ ;
          ] ;
          sh:property bbl:SHAPE_IFTHENELIF_PROP2 ;
      ]
    ) .

bbl:SHAPE_IFTHENELIF_PROP1
    a sh:PropertyShape ;
    rdfs:label "Article X.XX" ;
    rdfs:comment ">>If a wall is external, it should have a length of at least 12 m.<< Else, if a wall has an area of at least 14m, it should have a height of at least 5m."@en ;
    rdfs:seeAlso <http://example.org> ;
    sh:path ( pset:Dimensions props:Length qudt:NumericValue ) ;
    sh:minInclusive 12 ;
    sh:minCount 1 .

bbl:SHAPE_IFTHENELIF_PROP2
    a sh:PropertyShape ;
    rdfs:label "Article X.XX" ;
    rdfs:comment "If a wall is external, it should have a length of at least 12 m. >>Else, if a wall has an area of at least 14m, it should have a height of at least 5m.<<"@en ;
    rdfs:seeAlso <http://example.org> ;
    sh:path ( pset:Dimensions props:Length qudt:NumericValue ) ;
    sh:minInclusive 5 ;
    sh:minCount 1 .

'''
report_graph, report_dict = read_SHACL_report(data_graph_correct, shape_IFTHENELSE, ont_graph)

Violation for inst:Wall_1402
Article X.XX - ">>If a wall is external, it should have a length of at least 12 m.<< Else, if a wall has an area of at least 14m, it should have a
height of at least 5m."
Violation : 10.0 is not >= Literal("12", datatype=xsd:integer)
(see also: http://example.org)



### IF-THEN-ELSE

*not implemented yet*

This can be done with an `or` statement
This code assumes that the target class(es) remains the same, which should be mentioned both in the main shape and the blank nodes

```
sh:and (
    [
      sh:target [
          a sh:SPARQLTarget ;
          sh:select """
              SELECT DISTINCT ?this WHERE {
                    (TARGET)
          """ ;
      ] ;
      (
    ] [

    ] )

```

"it" in this case refers to "a wall", and should first be replaced with coreference resolution

In [35]:
shape_IFTHENELSE = '''
bbl:SHAPE_IFTHENELSE
    a sh:NodeShape ;
    rdfs:label "Article X.XX" ;
    rdfs:comment "If a wall is external, it should have a length of at least 12 m. Else, it should have a length of at least 4 m."@en ;
    rdfs:seeAlso <http://example.org> ;
    sh:and (
      [
          sh:target [
              a sh:SPARQLTarget ;
              sh:select """
                  SELECT DISTINCT ?this WHERE {
                      ?this a beo:Wall .
                      ?this pset:Common ?1 .
                      FILTER (?2 = True)
                      ?1 props:Isexternal ?2.
                  }
              """ ;
          ] ;
          sh:property bbl:SHAPE_IFTHENELSE_PROP1 ;
      ] [
          sh:target [
              a sh:SPARQLTarget ;
              sh:select """
                  SELECT DISTINCT ?this WHERE {
                      { ?this a beo:Wall . }
                      MINUS
                      {?this a beo:Wall .
                      ?this pset:Common ?1 .
                      ?1 props:Isexternal ?2.
                      FILTER (?2 = True) }
                  }
              """ ;
          ] ;
          sh:property bbl:SHAPE_IFTHENELSE_PROP2 ;
      ]
    ) .

bbl:SHAPE_IFTHENELSE_PROP1
    a sh:PropertyShape ;
    rdfs:label "Article X.XX" ;
    rdfs:comment "If a wall is external, it should have a length of at least 12 m. Else, it should have a length of at least 4 m."@en ;
    rdfs:seeAlso <http://example.org> ;
    sh:path ( pset:Dimensions props:Length qudt:NumericValue ) ;
    sh:minInclusive 12 ;
    sh:minCount 1 .

bbl:SHAPE_IFTHENELSE_PROP2
    a sh:PropertyShape ;
    rdfs:label "Article X.XX" ;
    rdfs:comment "If a wall is external, it should have a length of at least 12 m. Else, it should have a length of at least 4 m."@en ;
    rdfs:seeAlso <http://example.org> ;
    sh:path ( pset:Dimensions props:Length qudt:NumericValue ) ;
    sh:minInclusive 5 ;
    sh:minCount 1 .

'''
report_graph, report_dict = read_SHACL_report(data_graph_correct, shape_IFTHENELSE, ont_graph)

Violation for inst:Wall_10902
Article X.XX - "If a wall is external, it should have a length of at least 12 m. Else, it should have a length of at least 4 m."
Violation : 3.61 is not >= Literal("5", datatype=xsd:integer)
(see also: http://example.org)

Violation for inst:Wall_1402
Article X.XX - "If a wall is external, it should have a length of at least 12 m. Else, it should have a length of at least 4 m."
Violation : 10.0 is not >= Literal("12", datatype=xsd:integer)
(see also: http://example.org)

Violation for inst:Wall_10934
Article X.XX - "If a wall is external, it should have a length of at least 12 m. Else, it should have a length of at least 4 m."
Violation : 3.61 is not >= Literal("5", datatype=xsd:integer)
(see also: http://example.org)

Violation for inst:Wall_1727
Article X.XX - "If a wall is external, it should have a length of at least 12 m. Else, it should have a length of at least 4 m."
Violation : 4.99 is not >= Literal("5", datatype=xsd:integer)
(see also: http://exa

---
# **Regulatory information conversion functions** (from JSON to SHACL)



1. Choose template based on conditional structure (IF-THEN, IF-THEN-ELSE, IF-THEN-ELIF-THEN, etc)
1. Write targets
1. Write constraints


## Target


In [36]:
# function for writing triples in the SPARQL query
def writeSPARQL_triples(target, output, tab, subject, material_id=None, props_id=None):
    sep = '\n'+tab
    l_triples = [] #list of current statements
    # write triples in target dictionary

    for key, value in target.items():
        # check for subject (instance) classes
        if key == 'element':
            l_triples.append(f'  {subject} a {value} . ')

        # check class of custom triples (e.g. 'spatial_path' + 'spatial_class' )
        # THIS WILL REQUIRE CUSTOM METHODS TO STORING THE VAR ID IF YOU WERE TO REUSE THEM!
        if key.endswith('_path') and target.get(key.replace('_path', '_class')):
            path_id = key.replace('_path', '')
            l_triples.append(f'  ?this {value} ?{path_id} . ')
            l_triples.append(f'  ?{path_id} a {target[key.replace("_path", "_class")]} . ')
            #CARDINALITY?

        # check class of custom triples with reverse paths (e.g. 'spatial_invpath' + 'spatial_class' )
        # THIS WILL REQUIRE CUSTOM METHODS TO STORING THE VAR ID IF YOU WERE TO REUSE THEM!
        if key.endswith('_invpath') and target.get(key.replace('_invpath', '_class')):
            path_id = key.replace('_invpath', '')
            l_triples.append(f'  ?{path_id} {value} ?this . ')
            l_triples.append(f'  ?{path_id} a {target[key.replace("_invpath", "_class")]} . ')

        # check value of custom triples
        if key.endswith('_predicate') and target.get(key.replace('_predicate', '_value')):
            value_id = key.replace('_predicate', '')
            l_triples.append(f'  ?this {value} ?{value_id} . ')
            l_triples.append(f'  FILTER ( ?{value_id} = {target[key.replace("_predicate", "_value")]} ) ')

        # check for materials (INCOMPLETE)
        elif key == 'material_category':
            l_triples.append(f'  {subject} ex:hasMaterial ?material . ')
            # check if material is in pset
            if target.get('material_pset'):
                l_triples.append(f'  ?material {target.get("material_pset")} ?pset . ')
                l_triples.append(f'  ?pset {target.get("material_property")} ?mat_prop . ')
            else:
                l_triples.append(f'  ?material {target.get("material_property")} ?mat_prop . ')
            l_triples.append(f'  FILTER ( ?mat_prop = "{value}"^^xsd:string ) ')


        # check for properties
        elif key == 'property':
            if target.get('pset'):
                l_triples.append(f'  {subject} {target.get("pset")} ?pset . ')
                l_triples.append(f'  ?pset {value} ?prop . ')
            else:
                pset_id = None
                l_triples.append(f'  {subject} {value} ?prop . ')

        # check for exact qualities
        elif key == 'value':
            # determine the datatype of the value
            if type(value) == str:
                value = f'"{value}"'
            else:
                value = value
            l_triples.append(f'  FILTER ( ?prop = {value}) ')

        # check for pattern-based qualities
        elif key == 'value_pattern':
            l_triples.append(f'  FILTER ( !regex(str(?prop), {value}, "i") ) ' )

        # check for quantities
        elif key == 'quantity':
            # if there is a also a unit stated, the value should be in a blank node:
            if target.get('unit'):
                l_triples.append(f'  ?prop qudt:NumericValue ?num. ')
                # check for comparison
                if target.get('comparison'):
                    l_triples.append(f'  FILTER ( ?num {target["comparison"]} {value} ) ')
                #otherwise, default to '='
                else:
                    l_triples.append(f'  FILTER ( ?num = {value} ) ')
                # check for datatype of properties
                if key.endswith('datatype'):
                    l_triples.append(f'  FILTER ( datatype( ?num = {value} ) ) ')
            # otherwise, the value is immediately checkable:
            else:
                # check for comparison
                if target.get('comparison'):
                    l_triples.append(f'  FILTER ( ?prop {target["comparison"]} {value} ) ')
                #otherwise, default to '='
                else:
                    l_triples.append(f'  FILTER ( ?prop = {value} ) ')
                # check for datatype of properties
                if key.endswith('datatype'):
                    l_triples.append(f'  FILTER ( datatype( ?prop = {value} ) ) ')

        # check for classifications
        elif key == 'classification':
            if target.get('compliant_classes'):
                l_triples.append(f'''FILTER ( ?prop IN ({', '.join('"'+x+'"' for x in target["compliant_classes"])}) )''')
            else:
                l_triples.append(f'  FILTER ( ?prop = {value} ) ')

    # join strings
    sep = '\n'+tab
    output += sep.join(l_triples)

    # check if current string is multiline
    if len(l_triples) > 1:
        multi = True

    global counter
    # check for logical operators
    for key, value in target.items():
        if key.startswith('UNION'):
            # before = counter
            output = writeSPARQL_JOIN("UNION", value, output, tab+'    ', subject, material_id, props_id)
            # counter = before #reset counter
        elif key.startswith('INTERSECT'):
            # before = counter
            output = writeSPARQL_JOIN("INTERSECT", value, output, tab+'    ', subject, material_id, props_id)
            # counter = before #reset counter
        elif key.startswith('MINUS'):
            # before = counter
            output = writeSPARQL_MINUS(value, output, tab+'    ', subject, material_id, props_id)
            # counter = before #reset counter

    return output

# function for SPARQL MINUS booleans
def writeSPARQL_MINUS(target, output, tab, subject, material_id=None, props_id=None):
    multi = False
    # put item in list if not already in list
    if type(target) != list:
        target = [target]
    # write MINUS lines (or not if only one item)
    output += f'\n{tab}MINUS\n{tab}{{'
    output += f'}}\n{tab}MINUS\n{tab}{{'.join([writeSPARQL_triples(subtarget, "", tab, subject, material_id, props_id) for subtarget in target])
    # if bracket expression is multiline, write closing curly bracket on next line. Else, write at the end of current line.
    if multi:
        output += f'}} \n{tab[:-1]}'
    else:
        output += f'}} '
    return output

# function for SPARQL other booleans
def writeSPARQL_JOIN(operator, target, output, tab, subject, material_id=None, props_id=None):
    multi = False
    # put item in list if not already in list
    if type(target) != list:
        target = [target]
    # write join lines (or not if only one item)
    output += f'\n{tab}{{'
    output += f'}}\n{tab}{operator}\n{tab}{{'.join([writeSPARQL_triples(subtarget, "", tab, subject, material_id, props_id) for subtarget in target])
    # # if expression is multiline, write closing curly bracket on next line. Else, write at the end of current line.
    if multi:
        output += f'}} \n{tab[:-1]}'
    else:
        output += f'}} '
    return output

# simplified function for converting the target dictionary to SHACL+SPARQL
def writeTarget_simple(target, tab='    '):
    # global cardinality # if working with cardinality, this is neededd
    template_target_start = f'''sh:target [
    a sh:SPARQLTarget ;
    sh:select """
        SELECT DISTINCT ?this WHERE {{'''
    output = ''
    template_target_end = f'''
    """ ;
]'''

    # write start of query
    output +=  '\n'.join(tab+x for x in template_target_start.split('\n'))

    # write content of query
    output += '\n            '+tab+'{'
    output += writeSPARQL_triples(target, "", tab+'            ', '?this')
    output += '\n            '+tab+'}'

    # write end of query
    output += f'\n{tab}'+'        }'
    output +=  '\n'.join(tab+x for x in template_target_end.split('\n'))
    return output


### Validation

In [37]:
# simple target
target_test_1 = {
    'TARGETS': {'element': 'beo:Door'}}

# multiple targets, with quantity
target_test_2 = {
    'TARGETS': {
        'UNION_1': [
            {'element': 'beo:Door', 'pset': 'pset:Dimensions', 'property': 'props:Length', 'quantity': 10, 'unit': 'unit:M'},
            {'element': 'beo:Window'}]}}

# multiple targets and exceptions
target_test_3 = {
    'TARGETS': {
        'UNION_1': [
            {'element': 'beo:Door', 'pset': 'pset:Dimensions', 'property': 'props:Length'},
            {'element': 'beo:Window'}],
        'MINUS_2': [
            {'element': 'beo:Wall'},
            {'element': 'beo:CurtainWall'}]}}

# logical operators
target_test_4 = {
    'TARGETS': {
        'UNION_1': [
            {'element': 'beo:Door',
                'UNION_2':[
                    {'pset': 'pset:Dimensions', 'property': 'props:Length', 'comparison': '>=', 'quantity': 20, 'unit': 'unit:M', 'cardinality': {'min': 1, 'max': 1}},
                    {'pset': 'pset:Dimensions', 'property': 'props:Width', 'quantity': 10, 'unit': 'unit:M', 'cardinality': {'min': 1, 'max': 1}}]},
            {'element': 'beo:Window'}],
        'MINUS_3': [
            {'element': 'beo:Wall'},
            {'element': 'beo:CurtainWall',
                'UNION_4':[
                    {'pset': 'pset:Dimensions', 'property': 'props:Length', 'comparison': '>=', 'quantity': 30, 'unit': 'unit:M'},
                    {'pset': 'pset:Dimensions', 'property': 'props:Width', 'quantity': 50, 'unit': 'unit:M'}]}]}}

# nested logical operators
target_test_5 = {
    'TARGETS': {
        'UNION_1': [
            {'element': 'beo:Door',
                'INTERSECT_2': [
                    {'pset': 'pset:Dimensions', 'property': 'props:Length',
                        'UNION_3':[
                            {'comparison': '>=', 'quantity': 5, 'unit': 'unit:M'},
                            {'comparison': '<=', 'quantity': 20, 'unit': 'unit:M'}]},
                    {'pset': 'pset:Dimensions', 'property': 'props:Height', 'comparison': '>', 'quantity': 3, 'unit': 'unit:M'}]},
            {'element': 'beo:Window', 'pset': 'pset:Dimensions', 'property': 'props:Length', 'comparison': '>=', 'quantity': 20, 'unit': 'unit:M'}],
        'MINUS_4': [
            {'element': 'beo:Wall'},
            {'element': 'beo:CurtainWall'}]}}

# testing for "A wall with a length or width of least 5 m ..."
target_test_6 = {
    'TARGETS': {
        'element': 'beo:Wall',
        'INTERSECT_1':[
            {'pset': 'pset:Dimensions', 'property': 'props:Length', 'comparison': '<', 'quantity': 5},
            {'pset': 'pset:Dimensions', 'property': 'props:Width', 'comparison': '<', 'quantity': 5}]}}

# BAD EXAMPLE: alternative to 6 with simplified dictionary, will not return a complete query with the current function
target_test_7 = {
    'TARGETS': {
        'element': 'beo:Wall',
        'INTERSECT_1':[
            {'pset': 'pset:Dimensions', 'property': 'props:Length'},
            {'pset': 'pset:Dimensions', 'property': 'props:Width'}],
        'comparison': '<', 'quantity': 5}}


# testing for "A space located in a fire compartment ..."
target_test_8 = {
    'TARGETS': {'element': 'bot:Space', 'spatial_path': 'ex:locatedInCompartment', 'spatial_class': 'ex:FireCompartment'}}



i=0
for t in [target_test_1,
          target_test_2,
          target_test_3,
          target_test_4,
          target_test_5,
          target_test_6,
          target_test_7,
          target_test_8,
          ]:
          i+=1
          print(f"target_test_{i}")
          output = writeTarget_simple(t['TARGETS'])
          print(output)
          print()

target_test_1
    sh:target [
        a sh:SPARQLTarget ;
        sh:select """
            SELECT DISTINCT ?this WHERE {
                {  ?this a beo:Door . 
                }
            }    
        """ ;
    ]

target_test_2
    sh:target [
        a sh:SPARQLTarget ;
        sh:select """
            SELECT DISTINCT ?this WHERE {
                {
                    {  ?this a beo:Door . 
                      ?this pset:Dimensions ?pset . 
                      ?pset props:Length ?prop . 
                      ?prop qudt:NumericValue ?num. 
                      FILTER ( ?num = 10 ) }
                    UNION
                    {  ?this a beo:Window . } 
                }
            }    
        """ ;
    ]

target_test_3
    sh:target [
        a sh:SPARQLTarget ;
        sh:select """
            SELECT DISTINCT ?this WHERE {
                {
                    {  ?this a beo:Door . 
                      ?this pset:Dimensions ?pset . 
                      ?pset prop

## Constraints

In [38]:
# function for cardinality check
def checkCardinality(prop, l_properties, cardinality="cardinality"):
    # check if there is a custom cardinality
    if prop.get(cardinality):
        if prop.get(cardinality).get('min'):
            l_properties.append(f'sh:minCount {prop.get(cardinality).get("min")}')
        # else: # default min cardinality
        #     l_properties.append(f'sh:minCount 1')
        if prop.get(cardinality).get('max'):
            l_properties.append(f'sh:maxCount {prop.get(cardinality).get("max")}')
    else: # default cardinality
        l_properties.append(f'sh:minCount 1')
        # l_properties[-1] += [f'sh:maxCount 1 ']
    return l_properties

# function for finding info about the regulation
def writeInfo(reg_info, tab):
    global source_lang # find source language
    label = f'{tab}rdfs:label "{reg_info["label"]}" ;\n' if reg_info.get('label') else ""
    comment_en = f'{tab}rdfs:comment """{reg_info["text_en"]}"""@en ;\n' if reg_info.get('text_en') else ""
    comment_source = f'{tab}rdfs:comment """{reg_info["text_original"]}"""@{source_lang} ;\n' if reg_info.get('text_original') else ""
    seeAlso = f'{tab}rdfs:seeAlso <{reg_info["seeAlso"]}> ;\n' if reg_info.get('seeAlso') else ""
    return label+comment_en+comment_source+seeAlso

# function for writing a property shape
def writePropertyNode(reg_info, l_properties):
  global prop_shapes # for storing property shapes
  global propshape_id # for storing property id in name
  sep = ' ;\n' # for separation between lines
  reg_CURIE = f'{reg_info["reg_CURIE"]}_PROP{str(propshape_id)}' # property shape CURIE
  # write property shapes
  prop_shapes[reg_CURIE] = f'''
{reg_CURIE}
    a sh:PropertyShape ;
{writeInfo(reg_info, tab='    ')}{sep.join(['    '+x for x in l_properties])} .
'''
  propshape_id += 1 # increase prop id counter

# function for converting dictionary entries to SHACL properties
def writeConstraints(reg_info, prop, tab='    '):
    global propshape_id # for storing property shape ids
    dict_comparisons = { # for converting comparisons to SHACL predicates
        "=": "sh:hasValue",
        ">": "sh:minExclusive",
        ">=": "sh:minInclusive",
        "<": "sh:maxExclusive",
        "<=": "sh:maxInclusive",
    }
    sep = ' ;\n'+tab # for separation between lines
    output = "" # for the main SHACL shape code
    reg_code = reg_info.get('reg_CURIE') # for SHACL shape identifiers
    for key, value in prop.items():
        # # check for boolean logic and add to property group accordingly
        if key.startswith('AND'):
            output += ''.join([writeConstraints(reg_info, p, tab) for p in value])
        if key.startswith('NOT'):
            output += sep+f'sh:not [\n{tab}    '
            output += ''.join([writeConstraints(reg_info, p, tab+'    ') for p in value]).lstrip(sep)
            output += f' ;\n{tab}]'
        if key.startswith('OR'):
            output += sep+'sh:or ('
            output += f'\n{tab}    ['
            output += f' ;\n{tab}    ] ['.join(['\n        '+tab+writeConstraints(reg_info, p, tab+'        ').lstrip(sep) for p in value])
            output += f' ;\n{tab}    ]\n{tab})'


        # check for subject (instance) classes
        if key == 'element':
            l_properties = ['sh:path rdf:type', f'sh:hasValue {value}']
            # store output, property shape and and increase counter
            output += sep+f'sh:property {reg_code}_PROP{str(propshape_id)}'
            writePropertyNode(reg_info, l_properties)


        # check direct object classes
        elif '_path' in key and prop.get(key.replace('_path', '_class')):
            l_properties = [f'sh:path {prop.get(key)}', f'sh:class {prop.get(key.replace("_path", "_class"))}']
            # check for cardinality
            # l_properties = checkCardinality(prop, l_properties, key.replace('_path', '_cardinality'))
            l_properties = checkCardinality(prop, l_properties, 'cardinality')
            # store output, property shape and and increase counter
            output += sep+f'sh:property {reg_code}_PROP{str(propshape_id)}'
            writePropertyNode(reg_info, l_properties)

        # check direct inverse object classes
        elif '_invpath' in key and prop.get(key.replace('_invpath', '_class')):
            l_properties = [f'sh:path [ sh:inversePath {prop.get(key)} ]', f'sh:class {prop.get(key.replace("_invpath", "_class"))}']
            # check for cardinality
            # l_properties = checkCardinality(prop, l_properties, key.replace('_path', '_cardinality'))
            l_properties = checkCardinality(prop, l_properties, 'cardinality')
            # store output, property shape and and increase counter
            output += sep+f'sh:property {reg_code}_PROP{str(propshape_id)}'
            writePropertyNode(reg_info, l_properties)

        # check direct object values
        elif '_predicate' in key and prop.get(key.replace('_predicate', '_value')):
            l_properties = [f'sh:path {prop.get(key)}', f'sh:hasValue {prop.get(key.replace("_path", "_value"))}']
            # check for cardinality
            # l_properties = checkCardinality(prop, l_properties, key.replace('_path', '_cardinality'))
            l_properties = checkCardinality(prop, l_properties, 'cardinality')
            # store output, property shape and and increase counter
            output += sep+f'sh:property {reg_code}_PROP{str(propshape_id)}'
            writePropertyNode(reg_info, l_properties)

        # quality constraints
        elif key == 'material_category':
            # check whether material property is in a property set
            if prop.get('material_pset'):
                l_properties = [f'sh:path ( ex:hasMaterial {prop.get("material_pset")} {prop.get("material_property")} )']
            else:
                l_properties = [f'sh:path ( ex:hasMaterial {prop.get("material_property")} )']
            l_properties.append(f'sh:hasValue "{value}"^^xsd:string')
            # check for cardinality
            # l_properties = checkCardinality(prop, l_properties, 'material_cardinality')
            l_properties = checkCardinality(prop, l_properties, 'cardinality')
            # store output, property shape and and increase counter
            output += sep+f'sh:property {reg_code}_PROP{str(propshape_id)}'
            writePropertyNode(reg_info, l_properties)

        # exact quality constraints
        elif key == 'value' and prop.get('property'):
            # check whether property is in a property set
            if prop.get('pset'):
                l_properties = [f'sh:path ( {prop.get("pset")} {prop.get("property")} )']
            else:
                l_properties = [f'sh:path {prop.get("property")}']
            # determine the datatype of the value
            if type(value) == str:
                value = f'"{value}"'
            else:
                value = value
            l_properties.append(f'sh:hasValue {value}')
            # # optional datatype check (make a separate node for this?)
            # if prop.get('datatype'):
            #     l_properties.append(f'sh:datatype {prop.get("datatype")}')
            # check for cardinality
            # l_properties = checkCardinality(prop, l_properties, 'quality_cardinality')
            l_properties = checkCardinality(prop, l_properties, 'cardinality')
            # store output, property shape and and increase counter
            output += sep+f'sh:property {reg_code}_PROP{str(propshape_id)}'
            writePropertyNode(reg_info, l_properties)

        # pattern-based quality constraints
        elif key == 'value_pattern' and prop.get('property'):
            # check whether property is in a property set
            if prop.get('pset'):
                l_properties = [f'sh:path ( {prop.get("pset")} {prop.get("property")} )']
            else:
                l_properties = [f'sh:path {prop.get("property")}']
            l_properties.append(f'sh:pattern "{value}"')
            # # optional datatype check (make a separate node for this?)
            # if prop.get('datatype'):
            #     l_properties.append(f'sh:datatype {prop.get("datatype")}')
            # check for cardinality
            # l_properties = checkCardinality(prop, l_properties, 'quality_cardinality')
            l_properties = checkCardinality(prop, l_properties, 'cardinality')
            # store output, property shape and and increase counter
            output += sep+f'sh:property {reg_code}_PROP{str(propshape_id)}'
            writePropertyNode(reg_info, l_properties)

        # check for classifications
        elif key == 'classification' and prop.get('property'):
            if prop.get('pset'):
                l_properties = [f'sh:path ( {prop.get("pset")} {prop.get("property")} )']
            else:
                l_properties = [f'sh:path {prop.get("property")}']
            # check if classification has a list of compliant classes
            if prop.get('compliant_classes'):
                l_properties.append(f'''sh:in ( {' '.join('"'+x+'"^^xsd:string' for x in prop["compliant_classes"])} )''')
                # l_properties.append(f'''sh:in ( {' '.join('"'+x+'"' for x in prop["compliant_classes"])} )''')
            # if not, look for exact value (MIGHT NOT GIVE DESIRED RESULTS YET)
            else:
                l_properties.append(f'sh:hasValue "{value}"')

            # check for cardinality
            l_properties = checkCardinality(prop, l_properties, 'cardinality')

            # store output, property shape and and increase counter
            output += sep+f'sh:property {reg_code}_PROP{str(propshape_id)}'
            writePropertyNode(reg_info, l_properties)


        # quantity constraints
        elif key == 'quantity' and {prop.get('property')}:
            # check whether property is in a property set.
            if prop.get('pset'):
                # if there is also a unit stated, the value should be in a blank node:
                if prop.get('unit'):
                    l_properties = [f'sh:path ( {prop.get("pset")} {prop.get("property")} qudt:NumericValue )']
                else:
                    l_properties = [f'sh:path ( {prop.get("pset")} {prop.get("property")} )']
            else:
                # if there is a unit stated, the value should be in a blank node:
                if prop.get('unit'):
                    l_properties = [f'sh:path {prop.get("property")}']
                else:
                    l_properties = [f'sh:path ( {prop.get("property")} qudt:NumericValue )']

            # check for comparison
            if prop.get('comparison') and prop.get('comparison') in dict_comparisons.keys():
                l_properties.append(f'{dict_comparisons[prop.get("comparison")]} {value}')
            #otherwise, default to '='
            else:
                l_properties.append(f'sh:hasValue {value}')
            # check for cardinality for base
            # l_properties = checkCardinality(prop, l_properties, 'quantity_cardinality')
            l_properties = checkCardinality(prop, l_properties, 'cardinality')

            # store output, property shape and and increase counter
            output += sep+f'sh:property {reg_code}_PROP{str(propshape_id)}'
            writePropertyNode(reg_info,  l_properties)

            # check again for units, and store the unit check in a separate property shape
            if prop.get('unit'):
                # include cardinality with quantity check (always exactly 1 numeric value per quantity)
                # add new property shape for unit check
                l_properties = [l_properties[0].replace('qudt:NumericValue', 'qudt:hasUnit')]
                l_properties.append(f'qudt:hasUnit {prop.get("unit")}')
                # include cardinality check (always exactly 1 unit per quantity)
                l_properties.append(f'sh:minCount 1')
                l_properties.append(f'sh:maxCount 1')

            # store output, property shape and and increase counter
            output += sep+f'sh:property {reg_code}_PROP{str(propshape_id)}'
            writePropertyNode(reg_info, l_properties)

    return output

#### validation

In [39]:
# single property + comparison + cardinality
constraint_test_1 = {
    'INFO': {'reg_CURIE': 'bbl:XXXX_SUB1'},
    'CONSTRAINTS': {'spatial_path': 'ex:locatedInCompartment', 'spatial_class': 'ex:FireCompartment'}}


# single property + comparison + cardinality
constraint_test_2 = {
    'INFO': {'reg_CURIE': 'bbl:XXXX_SUB2'},
    'CONSTRAINTS': {'pset': 'pset:Dimensions', 'property': 'props:Length', 'comparison': '>=', 'quantity': 20, 'unit': 'unit:M', 'cardinality': {'min': 1, 'max': 1}},}

# multiple properties (AND)
constraint_test_3 = {
    'INFO': {'reg_CURIE': 'bbl:XXXX_SUB3'},
    'CONSTRAINTS': {
        'AND_1': [
            {'pset': 'pset:Dimensions', 'property': 'props:Length', 'comparison': '>=', 'quantity': 20, 'unit': 'unit:M', 'cardinality': {'min': 1, 'max': 1}},
            {'pset': 'pset:Dimensions', 'property': 'props:Length', 'comparison': '<=', 'quantity': 40, 'unit': 'unit:M', 'cardinality': {'min': 1, 'max': 1}},
            {'pset': 'pset:Other', 'property': 'props:FireClass', 'value': 'C'}]}}

# property OR propery
constraint_test_4 = {
    'INFO': {'reg_CURIE': 'bbl:XXXX_SUB4'},
    'CONSTRAINTS':
        {'OR_1':[
          {'pset': 'pset:Dimensions', 'property': 'props:Length', 'comparison': '>=', 'quantity': 20, 'unit': 'unit:M', 'cardinality': {'min': 1, 'max': 1}},
          {'pset': 'pset:Dimensions', 'property': 'props:Width', 'quantity': 10, 'unit': 'unit:M', 'quantitycardinality': {'min': 1, 'max': 1}},
          {'pset': 'pset:Other', 'property': 'props:FireClass', 'value': 'C'}]}}

prop_shapes = {}
propshape_id = 1
for c in [constraint_test_1, constraint_test_2, constraint_test_3, constraint_test_4]:
    propshape_id = 1 # reset property shape id for each regulation
    tab = ''
    prop_shapes = {}
    output = writeConstraints(c['INFO'], c['CONSTRAINTS'])
    print(output)
    print()
    for node in prop_shapes.values():
        print(node)
        print()

 ;
    sh:property bbl:XXXX_SUB1_PROP1


bbl:XXXX_SUB1_PROP1
    a sh:PropertyShape ;
    sh:path ex:locatedInCompartment ;
    sh:class ex:FireCompartment ;
    sh:minCount 1 .


 ;
    sh:property bbl:XXXX_SUB2_PROP1 ;
    sh:property bbl:XXXX_SUB2_PROP2


bbl:XXXX_SUB2_PROP1
    a sh:PropertyShape ;
    sh:path ( pset:Dimensions props:Length qudt:NumericValue ) ;
    sh:minInclusive 20 ;
    sh:minCount 1 ;
    sh:maxCount 1 .



bbl:XXXX_SUB2_PROP2
    a sh:PropertyShape ;
    sh:path ( pset:Dimensions props:Length qudt:hasUnit ) ;
    qudt:hasUnit unit:M ;
    sh:minCount 1 ;
    sh:maxCount 1 .


 ;
    sh:property bbl:XXXX_SUB3_PROP1 ;
    sh:property bbl:XXXX_SUB3_PROP2 ;
    sh:property bbl:XXXX_SUB3_PROP3 ;
    sh:property bbl:XXXX_SUB3_PROP4 ;
    sh:property bbl:XXXX_SUB3_PROP5


bbl:XXXX_SUB3_PROP1
    a sh:PropertyShape ;
    sh:path ( pset:Dimensions props:Length qudt:NumericValue ) ;
    sh:minInclusive 20 ;
    sh:minCount 1 ;
    sh:maxCount 1 .



bbl:XXXX_SUB3_PROP2

## Full shapes graph generation (including conditional structures)

In [40]:
# simplified function for converting the target dictionary to SHACL+SPARQL, also implementing conditional structures
def writeTarget(target, previous_targets, tab='    '):
    # global cardinality # if working with cardinality, this is needed
    # define target template:
    template_target_start = f'''sh:target [
    a sh:SPARQLTarget ;
        sh:select """
            SELECT DISTINCT ?this WHERE {{'''
    template_target_end = f'''
        """ ;
]'''

    # write start of query
    output = '\n'.join(tab+str(line) for line in template_target_start.split('\n'))

    # write content of query
    current_target = '\n            '+tab+'{'
    current_target += writeSPARQL_triples(target, "", tab+'            ', '?this')
    current_target += '\n            '+tab+'}'

    output += current_target

    # if there are multiple conditional statements, subtract previous targets
    if len(previous_targets) > 0:
        # join previous targets into one shape
        for p_t in previous_targets:
            output += '\n'+tab+'            MINUS\n'+tab+'            {'
            output += '\n'.join(tab+str(line) for line in p_t.split('\n'))
            output += '\n'+tab+'            }'

    # add target to previous targets
    previous_targets.append(current_target)
    # write end of query
    output += '\n'+tab+'        }'
    output += '\n'.join(tab+str(line) for line in template_target_end.split('\n'))
    return output

source_lang = "nl"
# create global variables
def convertRegulation(reg):
    # reset variables
    global previous_targets
    global prop_shapes
    global propshape_id
    previous_targets = [] # reset previous targets
    prop_shapes = {} # reset property shape list
    propshape_id = 1 # reset property shape id for each regulation
    tab = '    ' # reset tab
    shape = '' # prepare shape string

    # find source language
    global source_lang
    if not source_lang:
        print("source language missing, assuming english as source language")
        # source_lang = "en"

    if not reg.get('INFO'):
        print("missing meta information for:", reg)
        return
    else:
        reg_info = reg['INFO']

    # define SHACL shape template:
    template_shape_start = f'''
{reg_info.get('reg_CURIE')}
    a sh:NodeShape ;
{writeInfo(reg_info, tab='    ')}'''
    # write start of shape
    shape = '\n'.join(line for line in template_shape_start.split('\n'))

    # find conditional clause structures
    if reg.get('CONDITION_1'):
        # find conditions
        conditions = sorted(x for x in reg if x.startswith('CONDITION_'))
        shape += '    sh:and (\n        '
        for c in conditions:
            shape += '[\n    '
            if reg[c].get('TARGETS') and reg[c].get('CONSTRAINTS'):
                # write target
                shape += '\n    '.join('    '+str(line) for line in writeTarget(reg[c]['TARGETS'], previous_targets, tab = '    ').split('\n'))
                # WHEN ALSO IMPLEMENTING ELSE STATEMENTS, FIND BASE TARGETS FROM PREVIOUS TARGETS AS WELL!!

                # try to find if the conditional statement has a custom text. If not, use default text
                cond_info = reg_info
                if reg[c].get('TEXT'):
                    cond_info['text_en'] = reg[c]['TEXT']
                # write constraints
                shape += '\n    '.join('    '+str(line) for line in writeConstraints(cond_info, reg[c]['CONSTRAINTS'], tab = '    ').split('\n'))
            else: # error message if no target and/or constraint is found (needs to be changed if ELSE structures are implemented)
                print("could not find targets & constraints for", reg['INFO'].get('reg_CURIE'))
                return
            shape += ' ;\n'+tab+'    ] '


        shape += '\n'+tab+')' # end OR operator
        shape += ' . \n' # end main SHACL shape
        shape += ''.join([x for x in prop_shapes.values()]) # write property shapes

    # Else, find non-conditional clauses
    elif reg.get('TARGETS') and reg.get('CONSTRAINTS'):
        # write target
        shape += '\n'.join(tab+str(line) for line in writeTarget(reg['TARGETS'], []).split('\n'))
        # write constraints
        shape += '\n'.join(tab+str(line) for line in writeConstraints(reg_info, reg['CONSTRAINTS']).split('\n'))

        shape += ' . \n' # end main SHACL shape
        shape += ''.join(prop_shapes.values()) # write property shapes

    # For unrecognized structures, return error message
    else:
        print("could not convert the following regulation due to incorrect formation of dictionary target & constraint:", reg['INFO'].get('reg_CURIE'))
        return

    # remove global variables used in function only
    del prop_shapes
    del previous_targets
    return shape+'\n' # extra line break between regulations

#### Validation

In [41]:
# simple target & constraint
full_test_1 = {
    'INFO': {
        'reg_CURIE': 'bbl:XXXX_1',
        'label': 'Article X.1',
        'seeAlso': 'http://example.com',
        'text_en' : 'An external wall has a length of at least 12 m.',
        'text_original' : 'Een muur heeft een lengte van minstens 12 m.'},
    'TARGETS': {
        'UNION_1': [
            {'element': 'beo:Wall', 'pset': 'pset_other', 'property': 'props:IsExternal', 'value': True}]},
    'CONSTRAINTS': {
        'pset': 'pset:Dimensions', 'property': 'props:Length', 'comparison': '>=', 'quantity': 12, 'unit': 'unit:M'}}

# including OR in target & constraint.
full_test_2 = {
    'INFO': {
        'reg_CURIE': 'bbl:XXXX_2',
        'label': 'Article X.2',
        'seeAlso': 'http://example.com',
        'text_en' : 'Window and doors have a height or width of at least 1 m.'},
    'TARGETS'  : {
        'UNION_1': [
            {'element': 'beo:Window'},
            {'element': 'beo:Door'}]},
    'CONSTRAINTS': {
        'OR_2': [
            {'pset': 'pset:Dimensions', 'property': 'props:Height', 'comparison': '>=', 'quantity': 1, 'unit': 'unit:M'},
            {'pset': 'pset:Dimensions', 'property': 'props:Width', 'comparison': '>=', 'quantity': 1, 'unit': 'unit:M'}]}}

# exception
full_test_3 = {
    'INFO': {
        'reg_CURIE': 'bbl:XXXX_3',
        'label': 'Article X.3',
        'seeAlso': 'http://example.com',
        'text_en' : "Unless the area is less than 1 m2, an external window has a height of at least 2 m"},
    'TARGETS'  : {
        'UNION_1': [
            {'element': 'beo:Window', 'pset': 'pset:Other', 'property': 'props:IsExternal', 'value': True}],
        'MINUS_1': [
            {'element': 'beo:Window', 'pset': 'pset:Dimensions', 'property': 'props:Area', 'comparison': '<', 'quantity': 1, 'unit': 'unit:M2'}]},
    'CONSTRAINTS': {
        'OR_1': [
            {'pset': 'pset:Dimensions', 'property': 'props:Height', 'comparison': '>=', 'quantity': 1, 'unit': 'unit:M'},
            {'pset': 'pset:Dimensions', 'property': 'props:Width', 'comparison': '>=', 'quantity': 1, 'unit': 'unit:M'}]}}

# conditional statements
full_test_4 = {
    'INFO': {
        'reg_CURIE': 'bbl:XXXX_4',
        'label': 'Article X.4',
        'seeAlso': 'http://example.com',
        'text_en' : 'If a wall is external, it should have a length of at least 12 m. Otherwise, a wall should have a length of at least 4m.',
    },
    'CONDITION_1': { # IF ... THEN
        'TEXT' : '>>If a wall is external, it should have a length of at least 12 m.<< Otherwise, a wall should have a length of at least 4m.',
        'TARGETS'  : {'element': 'beo:Wall', 'pset': 'pset_other', 'property': 'props:IsExternal', 'value': True},
        'CONSTRAINTS': {'pset': 'pset:Dimensions', 'property': 'props:Length', 'comparison': '>=', 'quantity': 12, 'unit': 'unit:M'},
    },
    'CONDITION_2': { # ELIF ... THEN
        'TEXT' : 'If a wall is external, it should have a length of at least 12 m. >>Otherwise, a wall should have a length of at least 4m.<<',
        'TARGETS'  : {'element': 'beo:Wall'},
        'CONSTRAINTS': {'pset': 'pset:Dimensions', 'property': 'props:Length', 'comparison': '>=', 'quantity': 4, 'unit': 'unit:M'},
    },
}


for reg in [full_test_1, full_test_2, full_test_3, full_test_4]:
    print(convertRegulation(reg))



bbl:XXXX_1
    a sh:NodeShape ;
    rdfs:label "Article X.1" ;
    rdfs:comment """An external wall has a length of at least 12 m."""@en ;
    rdfs:comment """Een muur heeft een lengte van minstens 12 m."""@nl ;
    rdfs:seeAlso <http://example.com> ;
        sh:target [
            a sh:SPARQLTarget ;
                sh:select """
                    SELECT DISTINCT ?this WHERE {
                    {
                        {  ?this a beo:Wall . 
                          ?this pset_other ?pset . 
                          ?pset props:IsExternal ?prop . 
                          FILTER ( ?prop = True) } 
                    }
                }    
                """ ;
        ]     ;
        sh:property bbl:XXXX_1_PROP1 ;
        sh:property bbl:XXXX_1_PROP2 . 

bbl:XXXX_1_PROP1
    a sh:PropertyShape ;
    rdfs:label "Article X.1" ;
    rdfs:comment """An external wall has a length of at least 12 m."""@en ;
    rdfs:comment """Een muur heeft een lengte van minstens 12 m."""@nl ;


## References




### Internal references (cross-references)

target = class
rest van target als node? werkt dit?

cross-references storen,
buiten SHACL om linken


Possible types:

*   informative (give example)
*   definition (give example)
*   exemptions/exceptions (give example)
*   additions (give example)
*   normative (give example)




### External references

Will not be included in the SHACL shapes for the Proof of Concept.



---
# **Convert regulations from JSON to SHACL**

In [42]:
shapes_graph = ""
for reg in reg_json:
    shapes_graph += convertRegulation(reg)

print(shapes_graph)

# Save shapes graph as TTL
with open(output_shape_path, "w", encoding="utf-8") as file:
    file.write(shapes_graph)


bbl:C4_S4_2_P4_2_8_A4_50_SUB1
    a sh:NodeShape ;
    rdfs:label "Article 4.50(1)" ;
    rdfs:comment """A closed space is in a fire compartment."""@en ;
    rdfs:comment """Een besloten ruimte ligt in een brandcompartiment."""@nl ;
    rdfs:seeAlso <https://wetten.overheid.nl/BWBR0041297/2024-08-01#Hoofdstuk4_Afdeling4.2_Paragraaf4.2.8_Artikel4.50_Lid1> ;
        sh:target [
            a sh:SPARQLTarget ;
                sh:select """
                    SELECT DISTINCT ?this WHERE {
                    {  ?this a bot:Space . 
                      ?this pset:Other ?pset . 
                      ?pset props:Isenclosed ?prop . 
                    }
                }    
                """ ;
        ]     ;
        sh:property bbl:C4_S4_2_P4_2_8_A4_50_SUB1_PROP1 . 

bbl:C4_S4_2_P4_2_8_A4_50_SUB1_PROP1
    a sh:PropertyShape ;
    rdfs:label "Article 4.50(1)" ;
    rdfs:comment """A closed space is in a fire compartment."""@en ;
    rdfs:comment """Een besloten ruimte ligt in een br

---
# Manual regulatory document validation

Regulations that aren't yet extractable, but are possible to transform with the current module

In [43]:
manual_json_1 = {
    'INFO': {
        'reg_CURIE': 'bbl:C4_S4_2_P4_2_8_A4_51_SUB3',
        'label': 'Article 4.51/3',
        'seeAlso': 'https://wetten.overheid.nl/BWBR0041297/2024-08-01#Hoofdstuk4_Afdeling4.2_Paragraaf4.2.8_Artikel4.51_Lid3',
        'text_en' : 'A fire compartment extends over no more than one building plot.',
        'text_original' : 'Een brandcompartiment strekt zich uit over niet meer dan een bouwwerkperceel.',
    },
    'EXTERNAL_REFERENCES': [
        {'label': 'NEN 6075'}
    ],
    'TARGETS'  : {'element': 'ex:FireCompartment'},
    'CONSTRAINTS': {'spatial_invpath': 'ex:locatedInCompartment', 'spatial_class' : 'bot:Site', 'cardinality': {'max': 1}},
}

print(convertRegulation(manual_json_1))


bbl:C4_S4_2_P4_2_8_A4_51_SUB3
    a sh:NodeShape ;
    rdfs:label "Article 4.51/3" ;
    rdfs:comment """A fire compartment extends over no more than one building plot."""@en ;
    rdfs:comment """Een brandcompartiment strekt zich uit over niet meer dan een bouwwerkperceel."""@nl ;
    rdfs:seeAlso <https://wetten.overheid.nl/BWBR0041297/2024-08-01#Hoofdstuk4_Afdeling4.2_Paragraaf4.2.8_Artikel4.51_Lid3> ;
        sh:target [
            a sh:SPARQLTarget ;
                sh:select """
                    SELECT DISTINCT ?this WHERE {
                    {  ?this a ex:FireCompartment . 
                    }
                }    
                """ ;
        ]     ;
        sh:property bbl:C4_S4_2_P4_2_8_A4_51_SUB3_PROP1 . 

bbl:C4_S4_2_P4_2_8_A4_51_SUB3_PROP1
    a sh:PropertyShape ;
    rdfs:label "Article 4.51/3" ;
    rdfs:comment """A fire compartment extends over no more than one building plot."""@en ;
    rdfs:comment """Een brandcompartiment strekt zich uit over niet meer dan

In [44]:
manual_json_2 = {
    'INFO': {
        'reg_CURIE': 'bbl:C4_S4_2_P4_2_8_A4_51_SUB3',
        'label': 'Article 4.51/3',
        'seeAlso': 'https://wetten.overheid.nl/BWBR0041297/2024-08-01#Hoofdstuk4_Afdeling4.2_Paragraaf4.2.8_Artikel4.51_Lid3',
        'text_en' : 'A fire compartment extends over no more than one building plot.',
        'text_original' : 'Een brandcompartiment strekt zich uit over niet meer dan een bouwwerkperceel.',
    },
    'EXTERNAL_REFERENCES': [
        {'label': 'NEN 6075'}
    ],
    'TARGETS'  : {'element': 'ex:FireCompartment'},
    'CONSTRAINTS': {'spatial_invpath': 'ex:locatedInCompartment', 'spatial_class' : 'bot:Site', 'cardinality': {'max': 1}},
}

print(convertRegulation(manual_json_2))


bbl:C4_S4_2_P4_2_8_A4_51_SUB3
    a sh:NodeShape ;
    rdfs:label "Article 4.51/3" ;
    rdfs:comment """A fire compartment extends over no more than one building plot."""@en ;
    rdfs:comment """Een brandcompartiment strekt zich uit over niet meer dan een bouwwerkperceel."""@nl ;
    rdfs:seeAlso <https://wetten.overheid.nl/BWBR0041297/2024-08-01#Hoofdstuk4_Afdeling4.2_Paragraaf4.2.8_Artikel4.51_Lid3> ;
        sh:target [
            a sh:SPARQLTarget ;
                sh:select """
                    SELECT DISTINCT ?this WHERE {
                    {  ?this a ex:FireCompartment . 
                    }
                }    
                """ ;
        ]     ;
        sh:property bbl:C4_S4_2_P4_2_8_A4_51_SUB3_PROP1 . 

bbl:C4_S4_2_P4_2_8_A4_51_SUB3_PROP1
    a sh:PropertyShape ;
    rdfs:label "Article 4.51/3" ;
    rdfs:comment """A fire compartment extends over no more than one building plot."""@en ;
    rdfs:comment """Een brandcompartiment strekt zich uit over niet meer dan