<a href="https://colab.research.google.com/github/BowieSteutel/acc-nlp-firecodes/blob/main/3_Compliance_Checking.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>



# **Module 3 - Compliance Checking**



# **Prepare libaries**

In [42]:
# Import standard libraries
import time # for measuring required time for functions
import json # for exporting the compliance reports

In [43]:
# pySHACL (for SHACL reasoning)
!pip install pyshacl --quiet
#!pip install --upgrade pyshacl
import pyshacl

In [44]:
# RDFLib (for ontologies & namespaces)
!pip install rdflib --quiet
import rdflib

from rdflib import Graph, Literal, URIRef, Namespace
from rdflib import RDF, RDFS, XSD, SH, OWL

---
# **Prepare inputs**

In [45]:
# @title Change root directory (update after downloading)

root_directory = "/content/drive/MyDrive/FINAL_CODE_THESIS" #  @param {"type":"string", "placeholder":""}
import sys
from pathlib import Path
if 'google.colab' in sys.modules:
    from google.colab import drive
    drive.mount('/content/drive', force_remount=False)
    %cd {root_directory}

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
/content/drive/MyDrive/FINAL_CODE_THESIS


In [46]:
# @title Define filepaths
# Data graphs
use_case_correct = "output/use_case_correct.ttl" # @param {type:"string", placeholder:"(ttl)"}
use_case_correct_materialized = "output/use_case_correct_materialized.ttl" # @param {type:"string", placeholder:"(ttl)"}
use_case_incorrect = "output/use_case_incorrect.ttl" # @param {type:"string", placeholder:"(ttl)"}
use_case_incorrect_materialized = "output/use_case_incorrect_materialized.ttl" # @param {type:"string", placeholder:"(ttl)"}

# Shapes graph
shapes_path = "output/shapes.ttl" # @param {type:"string", placeholder:"(ttl)"}

# Ontologies (optional, for real-time inference)
ont_path_custom = "input/custom_ontology.ttl" # @param {type:"string", placeholder:"(ttl)"}
ont_path_alignment = "input/ontology_alignment.ttl" # @param {type:"string", placeholder:"(ttl)"}

---
# **Load graphs**

## Parse data graphs

In [47]:
data_graph_correct = Graph()
data_graph_correct.parse(use_case_correct, format="turtle")
print(data_graph_correct.serialize(format="turtle")[:1000])

@prefix beo: <https://w3id.org/beo#> .
@prefix bot: <https://w3id.org/bot#> .
@prefix ex: <https://example.org/ns#> .
@prefix ifc: <https://w3id.org/ifc/IFC4X3_ADD2#> .
@prefix inst: <https://linkedbuildingdata.net/ifc/resources20250531_181504/> .
@prefix props: <https://w3id.org/props#> .
@prefix pset: <https://example.org/pset#> .
@prefix qudt: <http://qudt.org/schema/shacl/qudt/> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix unit: <https://qudt.org/vocab/unit/> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

inst:FlowTerminal_33464 a bot:Element,
        ifc:IfcFlowTerminal ;
    rdfs:label "Plumbing_Fixtures_WC:540x360x410mm:367489"^^xsd:string ;
    ex:hasMaterial inst:Material_26888,
        inst:Material_33448 ;
    pset:Constraints [ rdfs:label "Constraints"^^xsd:string ;
            props:DefaultElevation [ qudt:NumericValue 0e+00 ;
                    qudt:hasUnit unit:M ] ;
            props:ElevationFromLevel [ qudt:NumericValue 0e+00 ;
           

In [48]:
data_graph_incorrect = Graph()
data_graph_incorrect.parse(use_case_incorrect, format="turtle")
print(data_graph_incorrect.serialize(format="turtle")[:1000])

@prefix beo: <https://w3id.org/beo#> .
@prefix bot: <https://w3id.org/bot#> .
@prefix ex: <https://example.org/ns#> .
@prefix ifc: <https://w3id.org/ifc/IFC4X3_ADD2#> .
@prefix inst: <https://linkedbuildingdata.net/ifc/resources20250531_181534/> .
@prefix props: <https://w3id.org/props#> .
@prefix pset: <https://example.org/pset#> .
@prefix qudt: <http://qudt.org/schema/shacl/qudt/> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix unit: <https://qudt.org/vocab/unit/> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

inst:FlowTerminal_33481 a bot:Element,
        ifc:IfcFlowTerminal ;
    rdfs:label "Plumbing_Fixtures_WC:540x360x410mm:367489"^^xsd:string ;
    ex:hasMaterial inst:Material_26905,
        inst:Material_33465 ;
    pset:Constraints [ rdfs:label "Constraints"^^xsd:string ;
            props:DefaultElevation [ qudt:NumericValue 0e+00 ;
                    qudt:hasUnit unit:M ] ;
            props:ElevationFromLevel [ qudt:NumericValue 0e+00 ;
           

In [49]:
data_graph_correct_materialized = Graph()
data_graph_correct_materialized.parse(use_case_correct_materialized, format="turtle")
print(len(data_graph_correct_materialized))

12226


In [50]:
data_graph_incorrect_materialized = Graph()
data_graph_incorrect_materialized.parse(use_case_incorrect_materialized, format="turtle")
print(len(data_graph_incorrect_materialized))

12218


## Parse shapes graph

In [51]:
# load the shapes graph as a string
with open(shapes_path, "r", encoding="utf-8") as file:
    shapes_graph = file.read()

# show shapes graph
print(shapes_graph)


bbl:C4_S4_2_P4_2_8_A4_50_SUB1
    a sh:NodeShape ;
    rdfs:label "Article 4.50(1)" ;
    rdfs:comment """A closed space is in a fire compartment."""@en ;
    rdfs:comment """Een besloten ruimte ligt in een brandcompartiment."""@nl ;
    rdfs:seeAlso <https://wetten.overheid.nl/BWBR0041297/2024-08-01#Hoofdstuk4_Afdeling4.2_Paragraaf4.2.8_Artikel4.50_Lid1> ;
        sh:target [
            a sh:SPARQLTarget ;
                sh:select """
                    SELECT DISTINCT ?this WHERE {
                    {  ?this a bot:Space . 
                      ?this pset:Other ?pset . 
                      ?pset props:Isenclosed ?prop . 
                    }
                }    
                """ ;
        ]     ;
        sh:property bbl:C4_S4_2_P4_2_8_A4_50_SUB1_PROP1 . 

bbl:C4_S4_2_P4_2_8_A4_50_SUB1_PROP1
    a sh:PropertyShape ;
    rdfs:label "Article 4.50(1)" ;
    rdfs:comment """A closed space is in a fire compartment."""@en ;
    rdfs:comment """Een besloten ruimte ligt in een br

## Parse ontology graph

In [52]:
# Parse custom ontology graph
ont_graph_custom = Graph()
ont_graph_custom.parse(ont_path_custom, format="turtle")
print(ont_graph_custom.serialize(format="turtle"))

@prefix bot: <https://w3id.org/bot#> .
@prefix ex: <https://example.org/ns#> .
@prefix owl: <http://www.w3.org/2002/07/owl#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .

ex:ProtectedSubFireCompartment a owl:Class ;
    rdfs:subClassOf ex:SubFireCompartment .

ex:adjacentCompartment a owl:ObjectProperty,
        owl:SymmetricProperty ;
    rdfs:comment "Only fire compartments of the same type can be considered adjacent" ;
    rdfs:domain ex:FireCompartment ;
    rdfs:range ex:FireCompartment ;
    owl:propertyDisjointWith ex:locatedInCompartment,
        ex:partOfCompartment .

ex:hasMaterial a owl:ObjectProperty ;
    rdfs:domain bot:Element ;
    rdfs:range ex:Material .

ex:locatedInCompartment a owl:ObjectProperty,
        owl:TransitiveProperty ;
    rdfs:comment "Subject cannot be subclass of object" ;
    rdfs:domain bot:Zone ;
    rdfs:range ex:FireCompartment ;
    owl:propertyDisjointWith ex:partOfCompartment .

ex:Material a owl:Class ;
    owl:disjointWith bot:

In [53]:
# Parse alignment ontology
ont_graph_alignment = Graph()
ont_graph_alignment.parse(ont_path_alignment, format="turtle")
print(ont_graph_alignment.serialize(format="turtle"))

@prefix beo: <https://w3id.org/beo#> .
@prefix bot: <https://w3id.org/bot#> .
@prefix ex: <https://example.org/ns#> .
@prefix ifc: <https://w3id.org/ifc/IFC4X3_ADD2#> .
@prefix owl: <http://www.w3.org/2002/07/owl#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .

ex: a owl:Ontology ;
    owl:imports <https://cramonell.github.io/beo/actual/ontology.ttl>,
        <https://cramonell.github.io/ifc/ifcowl/IFC4X3_ADD2/actual/ontology.ttl>,
        <https://w3c-lbd-cg.github.io/bot/bot.ttl> .

ifc:IfcBuilding rdfs:subClassOf bot:Building .

ifc:IfcBuildingElement rdfs:subClassOf beo:BuiltElement .

ifc:IfcBuildingStorey rdfs:subClassOf bot:Storey .

ifc:IfcCurtainWall rdfs:subClassOf beo:CurtainWall .

ifc:IfcDoor rdfs:subClassOf beo:Door .

ifc:IfcElement rdfs:subClassOf bot:Element .

ifc:IfcMaterial rdfs:subClassOf ex:Material .

ifc:IfcPlate rdfs:subClassOf beo:Plate .

ifc:IfcRoof rdfs:subClassOf beo:Roof .

ifc:IfcSite rdfs:subClassOf bot:Site .

ifc:IfcSlab rdfs:subClassOf be

---
# **Prepare compliance report generation**

In [54]:
def read_SHACL_report(data_graph, shapes, ontology=None, inference='rdfs'):
    # Prepare default namespaces + custom namespaces only used for the shapes graph
    l_default_namespaces = [
        f"@prefix rdf: <{RDF}> .",
        f"@prefix rdfs: <{RDFS}> .",
        f"@prefix xsd: <{XSD}> .",
        f"@prefix sh: <{SH}> .",
        f"@prefix owl: <{OWL}> .",
        "@prefix bbl: <https://example.org/BBL#> .",
    ]

    # add default namespaces to mapping dictionary
    default_namespace_map = {}
    for i in l_default_namespaces:
        default_namespace_map[i.split(":")[0].split("@prefix")[1].lstrip(" ")] = i.split("<")[1].split(">")[0]

    # find namespaces in data graph
    l_namespaces = []
    for i in data_graph.serialize(format="turtle").format("UTF-8").split('\n'):
        # check if current line contains a prefix
        if "@prefix" in i:
            l_namespaces.append(i)

    # add data graph namespaces to mapping dictionary
    namespace_map = {}
    for i in l_namespaces:
        namespace_map[i.split(":")[0].split("@prefix")[1].lstrip(" ")] = i.split("<")[1].split(">")[0]

    # check if namespaces from the data graph clash with the default namespaces (which should not happen)
    common_namespaces = set(default_namespace_map.keys()) & set(namespace_map.keys())  # Find overlapping prefixes
    mismatched_namespaces = [URL for URL in common_namespaces if default_namespace_map[URL] != namespace_map[URL]]  # Find mismatches

    if mismatched_namespaces:  #stops at error
        print("NAMESPACE CLASHES FOUND")
        for ns in mismatched_namespaces:
            print(f"@prefix {ns}: change {default_namespace_map[ns]} in data graph to {namespace_map[ns]}")
        return False
    else: # if no clashes are found, add merge the namespace dictionaries and continue
        l_namespaces = l_default_namespaces + l_namespaces
        namespace_map = {**default_namespace_map, **namespace_map}

    shapes_graph = '\n'.join(l_namespaces)+'\n\n'
    if type(shapes) == list: #for multiple shape inputs
        shapes_graph += '\n'.join(shapes)
    else: # for a single shape inpout
        shapes_graph += shapes

    # print(shapes_graph)
    # validate data graph with SHACL shapes

    if ontology: # if an ontology is given for inference
        conforms, report_graph, report_text = pyshacl.validate(
            data_graph,
            shacl_graph=shapes_graph,
            data_graph_format="ttl",
            shacl_graph_format="ttl",
            ont_graph=ontology,
            ont_graph_format="ttl",
            do_owl_imports=True, # allow for owl:imports in shapes graph or ontology graph
            inference="owlrl", # One of "rdfs", "owlrl", "both", "none", or None
            abort_on_first=False,
            allow_infos=False,
            allow_warnings=False,
            meta_shacl=False,
            advanced=True, # SPARQL target is advanced
            js=False,
            debug=False,
            serialize_report_graph="ttl"
            )
    else: # if no ontology is given for inference
        conforms, report_graph, report_text = pyshacl.validate(
            data_graph,
            shacl_graph=shapes_graph,
            data_graph_format="ttl",
            shacl_graph_format="ttl",
            inference="none",
            abort_on_first=False,
            allow_infos=False,
            allow_warnings=False,
            meta_shacl=False,
            advanced=True, # SPARQL target is advanced
            js=False,
            debug=False,
            serialize_report_graph="ttl"
            )


    # Parse report graph
    g = Graph()
    g.parse(report_graph, format='turtle')

    # Directly save SHACL report (optional)
    # g.serialize(destination="output/compliance_report.ttl")

    # Make sure shapes_graph is still an RDF Graph
    if isinstance(shapes_graph, str):
        shapes_graph = Graph().parse(data=shapes_graph, format="turtle")

    # Enrich SHACL report with rdfs:label, rdfs:comment, rdfs:seeAlso
    for result in g.subjects(RDF.type, SH.ValidationResult):
        source_shape = g.value(result, SH.sourceShape)
        if source_shape:
            # Fetch metadata from the SHACL shapes graph
            for label in shapes_graph.objects(source_shape, RDFS.label):
                g.add((result, RDFS.label, label))
            for comment in shapes_graph.objects(source_shape, RDFS.comment):
                g.add((result, RDFS.comment, comment))
            for seeAlso in shapes_graph.objects(source_shape, RDFS.seeAlso):
                g.add((result, RDFS.seeAlso, seeAlso))

    # Directly save the enriched SHACL report (optional)
    # g.serialize(destination="output/enriched_compliance_report.ttl")

    # Functions to convert the SHACL report into a dictionary
    def replace_with_prefix(uri, namespace_map):
        for prefix, full_uri in namespace_map.items():
            if uri.startswith(full_uri):
                return uri.replace(full_uri, f"{prefix}:")
        return uri  # Return the URI as is if no prefix matches

    def convert_result(r, g, namespace_map):
        if not g.value(r, SH.focusNode):
            return None
        #print(g.value(r, SH.focusNode))
        converted_results = g.value(r, SH.focusNode)
        converted_results = {
            "focusNode": str(g.value(r, SH.focusNode)),
            "value": str(g.value(r, SH.value)),
            "resultMessage": str(g.value(r, SH.resultMessage)),
            "resultSeverity": str(g.value(r, SH.resultSeverity)),
            "sourceConstraintComponent": str(g.value(r, SH.sourceConstraintComponent)),
            "sourceShape": str(g.value(r, SH.sourceShape)),
            #"group": str(g.value(r, SH.group)),
            # "label": str(g.value(r, RDFS.label)),
            # "comment": str(g.value(r, RDFS.comment)),
            # "seeAlso": str(g.value(r, RDFS.seeAlso)),
            # "name": str(g.value(r, SH.name)),
            # "path": str(g.value(r, SH.path)),
        }

        # Add RDFS info, if present
        if g.value(r, RDFS.label):
            converted_results["label"] = str(g.value(r, RDFS.label))
        if g.value(r, RDFS.comment):
            converted_results["comment"] = str(g.value(r, RDFS.comment))
        if g.value(r, RDFS.seeAlso):
            converted_results["seeAlso"] = str(g.value(r, RDFS.seeAlso))

        # replace URIs with namespaces, if present
        for i in converted_results:
            if converted_results[i] is not None:
                converted_results[i] = replace_with_prefix(converted_results[i], namespace_map)
        return converted_results



    # Iterate through the validation results in the graph
    validation_results = []
    result_dict = {}
    l_results = [] # to avoid duplicates
    for result in g.subjects(RDF.type, SH.ValidationResult):
        # filter out constraints from blank nodes (which would otherwise appear twice)
        if g.value(result, SH.sourceConstraintComponent) not in ["sh:OrConstraintComponent", "sh:AndConstraintComponent", "sh:NotConstraintComponent", "sh:XoneConstraintComponent"]:
            details = g.value(result, SH.detail)
            if details and details not in l_results:
                l_results.append(result)
                l_results.append(details)
                result_dict = convert_result(result, g, namespace_map)
                result_dict["details"] = convert_result(details, g, namespace_map)
                validation_results.append(result_dict)

            elif result not in l_results: # avoids duplicates
                l_results.append(result)
                result_dict = convert_result(result, g, namespace_map)
                validation_results.append(result_dict)
            result_dict = {}

    # function for splitting printed results after a certain amount of characters
    def wrap_result(text, n=130):
        words = text.split()
        result = []
        current_line = ""

        for word in words:
            if len(current_line) + len(word) + 1 <= n:
                current_line += (" " + word) if current_line else word
            else:
                result.append(current_line)
                current_line = word

        if current_line:
            result.append(current_line)

        return '\n'.join(result)

    # If no issues are found, return empty
    if len(validation_results) == 0:
        print("Model complies with regulations")
        # return "", {}
    # Return as textual report
    for result in validation_results:
            # get article name or source shape name
            try:
                label = result['label'] + " - "
            except:
                label = ""

            # try to find reference
            try:
                details = result['details']
                print(f'{result["resultSeverity"][3:].title()} for {result["focusNode"]}')# Violation details:")
                # If rdfs:comment is included, print this before the message
                try:
                    print(f'{label}"{wrap_result(result["comment"])}"')
                    print(f'{result["resultSeverity"][3:].title()} : {wrap_result(result["resultMessage"])}')
                except:
                    print(f'{label}{wrap_result(result["resultMessage"])}')

                # Print result message, with directly referenced values if present
                if details['value'] and wrap_result(details['resultMessage']).lower().startswith('value'):
                    print(f"{details['value']} {str(wrap_result(details['resultMessage']))[6:]}")
                else:
                    print(wrap_result(details['resultMessage']))


            except:
                print(f'{result["resultSeverity"][3:].title()} for {result["focusNode"]}')# Violation details:")
                # Get result message, with directly referenced values if present
                if result['value'] and wrap_result(result['resultMessage']).lower().startswith('value'):
                    resultmessage = f"{result['value']} {str(wrap_result(result['resultMessage']))[6:]}"
                else:
                    resultmessage = wrap_result(result['resultMessage'])

                # If rdfs:comment is included, print this before the message
                try:
                    print(f'{label}"{wrap_result(result["comment"])}"')
                    print(f'{result["resultSeverity"][3:].title()} : {resultmessage}')
                except:
                    print(f'{label}{resultmessage}')

            # Write reference, if defined
            try:
                print(f"(see also: {result['seeAlso']})")
                print()
            except:
                print()


    return g, validation_results #return both the TTL (standard) and dictionary (simplified and ready for further processing) version of the report


---
# **Subset validation**

Test SHACL shapes converted from the regulations on the correct and incorrect use case models

In [55]:
report_graph_correct, report_dict_correct = read_SHACL_report(data_graph_correct_materialized, shapes_graph)

Model complies with regulations


In [56]:
# Export compliance report
report_graph_correct.serialize(destination="output/compliance_report_correct.ttl")
with open("output/compliance_report_correct.json", 'w') as json_file:
    json.dump(report_dict_correct, json_file)

In [57]:
report_graph_incorrect, report_dict_incorrect = read_SHACL_report(data_graph_incorrect_materialized, shapes_graph)

Violation for inst:Space_902
Article 4.50(1) - "A closed space is in a fire compartment."
Violation : Less than 1 values on inst:Space_902->ex:locatedInCompartment
(see also: https://wetten.overheid.nl/BWBR0041297/2024-08-01#Hoofdstuk4_Afdeling4.2_Paragraaf4.2.8_Artikel4.50_Lid1)

Violation for inst:Door_25604
Article 4.92(2) - "Contrary to the first sub-article, a door, a window, a frame or a structural component to be equated with that is a fire class D,
determined according to NEN-EN 13501-1."
Violation : E Literal("E", datatype=xsd:string) not in list ['Literal("A1", datatype=xsd:string)', 'Literal("B", datatype=xsd:string)',
'Literal("D", datatype=xsd:string)', 'Literal("A2", datatype=xsd:string)', 'Literal("C", datatype=xsd:string)']
(see also: https://wetten.overheid.nl/BWBR0041297/2024-08-01#Hoofdstuk4_Afdeling4.2_Paragraaf4.2.14_Artikel4.92_Lid2)

Violation for inst:Door_39702
Article 4.92(2) - "Contrary to the first sub-article, a door, a window, a frame or a structural compo

In [58]:
# Export compliance report
report_graph_incorrect.serialize(destination="output/compliance_report_incorrect.ttl")
with open("output/compliance_report_incorrect.json", 'w') as json_file:
    json.dump(report_dict_incorrect, json_file)

---
# **Method validation**

## Test inference



The following shapes will return the correct instances if one or more of the following is true:

* Classes are assigned explicitly in the data graph
* Classes are inferred via full materialization of the data graph before validation
* Classes are inferred by pySHACL using real-time inference based on imported ontologies

## Test 1

The shape below will return all compartments if inference is implemented, since all compartments have the transitive class ex:FireCompartment

In [59]:
shape_inferencetest1 = '''
ex:inferencetest1
    a sh:NodeShape ;
    sh:target [
        a sh:SPARQLTarget ;
        sh:select """
            SELECT DISTINCT ?this WHERE {
                { ?this a ex:FireCompartment . }
            }
        """ ;
    ] ;
    sh:property [
        sh:path rdfs:label ;
        sh:minCount 2 ;
    ] .
'''

**No inference**

Without using ontologies for inference, only instances with explicitly the requested class are returned:

In [60]:
start_time = time.time()
report_graph, report_dict = read_SHACL_report(data_graph_correct, shape_inferencetest1)
print("--- %s seconds ---" % (time.time() - start_time))

Violation for inst:Compartment_1012
Less than 2 values on inst:Compartment_1012->rdfs:label

--- 0.6247546672821045 seconds ---


**Materialization**

This method uses a materialized data graph to find inferred data

In [61]:
start_time = time.time()
report_graph, report_dict = read_SHACL_report(data_graph_correct_materialized, shape_inferencetest1)
print("--- %s seconds ---" % (time.time() - start_time))

Violation for inst:Compartment_1012
Less than 2 values on inst:Compartment_1012->rdfs:label

Violation for inst:Compartment_1093
Less than 2 values on inst:Compartment_1093->rdfs:label

Violation for inst:Compartment_1040
Less than 2 values on inst:Compartment_1040->rdfs:label

Violation for inst:Compartment_1067
Less than 2 values on inst:Compartment_1067->rdfs:label

Violation for inst:Compartment_1121
Less than 2 values on inst:Compartment_1121->rdfs:label

--- 1.1152112483978271 seconds ---


**Real-time inference**

This requires no materialization of data graphs, which makes the input more flexible but can be a lot slower for individual queries

In [62]:
# rdfs inference
start_time = time.time()
report_graph, report_dict = read_SHACL_report(data_graph_correct, shape_inferencetest1,  ont_graph_custom+ont_graph_alignment)
print("--- %s seconds ---" % (time.time() - start_time))

Violation for inst:Compartment_1012
Less than 2 values on inst:Compartment_1012->rdfs:label

Violation for inst:Compartment_1121
Less than 2 values on inst:Compartment_1121->rdfs:label

Violation for inst:Compartment_1040
Less than 2 values on inst:Compartment_1040->rdfs:label

Violation for inst:Compartment_1067
Less than 2 values on inst:Compartment_1067->rdfs:label

Violation for inst:Compartment_1093
Less than 2 values on inst:Compartment_1093->rdfs:label

--- 188.84583449363708 seconds ---


**Real-time inference (without imports)**

The method below skips the alignment module, which is faster but only works for this specific shape because the required inference is part of the custom ontology (instead of external ontologies)

In [63]:
start_time = time.time()
report_graph, report_dict = read_SHACL_report(data_graph_correct, shape_inferencetest1, ont_graph_custom)
print("--- %s seconds ---" % (time.time() - start_time))

Violation for inst:Compartment_1121
Less than 2 values on inst:Compartment_1121->rdfs:label

Violation for inst:Compartment_1067
Less than 2 values on inst:Compartment_1067->rdfs:label

Violation for inst:Compartment_1040
Less than 2 values on inst:Compartment_1040->rdfs:label

Violation for inst:Compartment_1012
Less than 2 values on inst:Compartment_1012->rdfs:label

Violation for inst:Compartment_1093
Less than 2 values on inst:Compartment_1093->rdfs:label

--- 5.821429491043091 seconds ---


## Test 2

The following shape should return instances with beo: and ifc: types.

In [64]:
shape_inferencetest2 = '''
ex:inferencetest2
    a sh:NodeShape ;
    sh:target [
        a sh:SPARQLTarget ;
        sh:select """
            SELECT DISTINCT ?this WHERE {
                { ?this a bot:Element . }
            }
        """ ;
    ] ;
    sh:property [
        sh:path rdfs:label ;
        sh:minCount 2 ;
    ] .
'''

 **No inference**
 Since bot:Element classes are explicitly stated when the converter in module 2 is used, this shape does not require inference or materialization for correct results. If another converter is used, inference might be required to return elements correctly.



In [65]:
start_time = time.time()
report_graph, report_dict = read_SHACL_report(data_graph_correct, shape_inferencetest2)
print("--- %s seconds ---" % (time.time() - start_time))

Violation for inst:FlowTerminal_45085
Less than 2 values on inst:FlowTerminal_45085->rdfs:label

Violation for inst:Furniture_29916
Less than 2 values on inst:Furniture_29916->rdfs:label

Violation for inst:Wall_10934
Less than 2 values on inst:Wall_10934->rdfs:label

Violation for inst:OpeningElement_57190
Less than 2 values on inst:OpeningElement_57190->rdfs:label

Violation for inst:Furniture_45580
Less than 2 values on inst:Furniture_45580->rdfs:label

Violation for inst:CurtainWall_10627
Less than 2 values on inst:CurtainWall_10627->rdfs:label

Violation for inst:Furniture_26485
Less than 2 values on inst:Furniture_26485->rdfs:label

Violation for inst:Furniture_26912
Less than 2 values on inst:Furniture_26912->rdfs:label

Violation for inst:OpeningElement_57113
Less than 2 values on inst:OpeningElement_57113->rdfs:label

Violation for inst:Wall_1546
Less than 2 values on inst:Wall_1546->rdfs:label

Violation for inst:Furniture_27025
Less than 2 values on inst:Furniture_27025->rdf

**Materialized**

In [66]:
start_time = time.time()
report_graph, report_dict = read_SHACL_report(data_graph_correct_materialized, shape_inferencetest2)
print("--- %s seconds ---" % (time.time() - start_time))

Violation for inst:Furniture_45652
Less than 2 values on inst:Furniture_45652->rdfs:label

Violation for inst:OpeningElement_57190
Less than 2 values on inst:OpeningElement_57190->rdfs:label

Violation for inst:Wall_1786
Less than 2 values on inst:Wall_1786->rdfs:label

Violation for inst:Furniture_45604
Less than 2 values on inst:Furniture_45604->rdfs:label

Violation for inst:Plate_10709
Less than 2 values on inst:Plate_10709->rdfs:label

Violation for inst:Furniture_45628
Less than 2 values on inst:Furniture_45628->rdfs:label

Violation for inst:Door_39685
Less than 2 values on inst:Door_39685->rdfs:label

Violation for inst:Furniture_29916
Less than 2 values on inst:Furniture_29916->rdfs:label

Violation for inst:Slab_10558
Less than 2 values on inst:Slab_10558->rdfs:label

Violation for inst:Door_9127
Less than 2 values on inst:Door_9127->rdfs:label

Violation for inst:Wall_1654
Less than 2 values on inst:Wall_1654->rdfs:label

Violation for inst:Door_25587
Less than 2 values on i

**Real-time inference**

In [67]:
# rdfs inference
start_time = time.time()
report_graph, report_dict = read_SHACL_report(data_graph_correct, shape_inferencetest2,  ont_graph_custom+ont_graph_alignment)
print("--- %s seconds ---" % (time.time() - start_time))

Violation for inst:Furniture_29866
Less than 2 values on inst:Furniture_29866->rdfs:label

Violation for inst:OpeningElement_57190
Less than 2 values on inst:OpeningElement_57190->rdfs:label

Violation for inst:OpeningElement_57160
Less than 2 values on inst:OpeningElement_57160->rdfs:label

Violation for inst:Furniture_45628
Less than 2 values on inst:Furniture_45628->rdfs:label

Violation for inst:Wall_45518
Less than 2 values on inst:Wall_45518->rdfs:label

Violation for inst:Furniture_26953
Less than 2 values on inst:Furniture_26953->rdfs:label

Violation for inst:Furniture_45652
Less than 2 values on inst:Furniture_45652->rdfs:label

Violation for inst:Wall_26977
Less than 2 values on inst:Wall_26977->rdfs:label

Violation for inst:Furniture_27001
Less than 2 values on inst:Furniture_27001->rdfs:label

Violation for inst:Window_10515
Less than 2 values on inst:Window_10515->rdfs:label

Violation for inst:Furniture_29941
Less than 2 values on inst:Furniture_29941->rdfs:label

Viola

## Test 3

The following function should return all spatial elements except for compartments and escape routes (i.e. site, building, storey and space), since they are part of bot:Zone as can be inferred from BOT.

In [68]:
shape_inferencetest3 = '''
ex:inferencetest3
    a sh:NodeShape ;
    sh:target [
        a sh:SPARQLTarget ;
        sh:select """
            SELECT DISTINCT ?this WHERE {
                { ?this a bot:Zone . }
                MINUS
                { ?this a bot:Element . }
            }
        """ ;
    ] ;
    sh:property [
        sh:path rdfs:label ;
        sh:minCount 2 ;
    ] .
'''

**No inference**

Without using ontologies for inference, only instances with explicitly the requested class are returned, which do not exist in the use case graph:

In [69]:
start_time = time.time()
report_graph, report_dict = read_SHACL_report(data_graph_correct, shape_inferencetest3)
print("--- %s seconds ---" % (time.time() - start_time))

Model complies with regulations
--- 0.3596956729888916 seconds ---


**Materialization**

This method uses a materialized data graph to find inferred data

In [70]:
start_time = time.time()
report_graph, report_dict = read_SHACL_report(data_graph_correct_materialized, shape_inferencetest3)
print("--- %s seconds ---" % (time.time() - start_time))

Violation for inst:Space_327
Less than 2 values on inst:Space_327->rdfs:label

Violation for inst:Space_903
Less than 2 values on inst:Space_903->rdfs:label

Violation for inst:Space_781
Less than 2 values on inst:Space_781->rdfs:label

Violation for inst:Space_145
Less than 2 values on inst:Space_145->rdfs:label

Violation for inst:Space_506
Less than 2 values on inst:Space_506->rdfs:label

--- 0.6160399913787842 seconds ---


**Real-time inference**

This requires no materialization of data graphs, which makes the input more flexible but it can be a lot slower for individual queries

In [71]:
# rdfs inference
start_time = time.time()
report_graph, report_dict = read_SHACL_report(data_graph_correct, shape_inferencetest3, ont_graph_custom+ont_graph_alignment)
print("--- %s seconds ---" % (time.time() - start_time))

Violation for inst:Space_781
Less than 2 values on inst:Space_781->rdfs:label

Violation for inst:Storey_113
Less than 2 values on inst:Storey_113->rdfs:label

Violation for inst:Space_327
Less than 2 values on inst:Space_327->rdfs:label

Violation for inst:Site_120
Less than 2 values on inst:Site_120->rdfs:label

Violation for inst:Space_903
Less than 2 values on inst:Space_903->rdfs:label

Violation for inst:Space_145
Less than 2 values on inst:Space_145->rdfs:label

Violation for inst:Space_506
Less than 2 values on inst:Space_506->rdfs:label

Violation for inst:Building_110
Less than 2 values on inst:Building_110->rdfs:label

Violation for inst:Storey_117
Less than 2 values on inst:Storey_117->rdfs:label

--- 205.0161325931549 seconds ---


## Comparison of approaches using the subset shapes

*(commented out since this takes a long time!)*

In [72]:
# import contextlib
# import io

# f = io.StringIO()
# def time_validation(data_graph, shapes_graph, ont_graph=None, inference=None, n=1):
#     l_times = []
#     while len(l_times) < n: # repeat the measurement n times in order to get a more reliable sample size
#         start_time = time.time()
#         with contextlib.redirect_stdout(f):
#             result = read_SHACL_report(data_graph, shapes_graph, ont_graph, inference)
#             l_times.append(time.time() - start_time)
#     return l_times

# l_comparison = []
# for shape in shapes_graph.split('\n\n\n'):
#     l_times = time_validation(data_graph_correct, shape, n=2)
#     l_comparison.append([shape.lstrip('\n').split('\n')[0], 'no inference', 'correct use case', sum(l_times)/len(l_times)])
#     print(l_comparison[-1])
#     l_times = time_validation(data_graph_incorrect, shape, n=2)
#     l_comparison.append([shape.lstrip('\n').split('\n')[0], 'no inference', 'incorrect use case', sum(l_times)/len(l_times)])
#     print(l_comparison[-1])
# for shape in shapes_graph.split('\n\n\n'):
#     l_times = time_validation(data_graph_correct_materialized, shape, n=2)
#     l_comparison.append([shape.lstrip('\n').split('\n')[0], 'materialized', 'correct use case', sum(l_times)/len(l_times)])
#     print(l_comparison[-1])
#     l_times = time_validation(data_graph_incorrect_materialized, shape, n=2)
#     l_comparison.append([shape.lstrip('\n').split('\n')[0], 'materialized', 'incorrect use case', sum(l_times)/len(l_times)])
#     print(l_comparison[-1])
# for shape in shapes_graph.split('\n\n\n'):
#     l_times = time_validation(data_graph_correct, shape, ont_graph_custom)
#     l_comparison.append([shape.lstrip('\n').split('\n')[0], 'RDFS inference (internal only)', 'correct use case', sum(l_times)/len(l_times)])
#     print(l_comparison[-1])
#     l_times = time_validation(data_graph_incorrect, shape, ont_graph_custom)
#     l_comparison.append([shape.lstrip('\n').split('\n')[0], 'RDFS inference (internal only)', 'incorrect use case', sum(l_times)/len(l_times)])
#     print(l_comparison[-1])
# for shape in shapes_graph.split('\n\n\n'):
#     l_times = time_validation(data_graph_correct, shape, ont_graph_custom, inference='owlrl')
#     l_comparison.append([shape.lstrip('\n').split('\n')[0], 'OWL-RL inference (internal only)', 'correct use case', sum(l_times)/len(l_times)])
#     print(l_comparison[-1])
#     l_times = time_validation(data_graph_incorrect, shape, ont_graph_custom, inference='owlrl')
#     l_comparison.append([shape.lstrip('\n').split('\n')[0], 'OWL-RL inference (internal only)', 'incorrect use case', sum(l_times)/len(l_times)])
#     print(l_comparison[-1])
# for shape in shapes_graph.split('\n\n\n'):
#     l_times = time_validation(data_graph_correct, shape, ont_graph_custom+ont_graph_alignment)
#     l_comparison.append([shape.lstrip('\n').split('\n')[0], 'RDFS inference (internal+external)', 'correct use case', sum(l_times)/len(l_times)])
#     print(l_comparison[-1])
#     l_times = time_validation(data_graph_incorrect, shape, ont_graph_custom+ont_graph_alignment)
#     l_comparison.append([shape.lstrip('\n').split('\n')[0], 'RDFS inference (internal+external)', 'incorrect use case', sum(l_times)/len(l_times)])
#     print(l_comparison[-1])
# for shape in shapes_graph.split('\n\n\n'):
#     l_times = time_validation(data_graph_correct, shape, ont_graph_custom+ont_graph_alignment, inference='owlrl')
#     l_comparison.append([shape.lstrip('\n').split('\n')[0], 'OWL-RL inference (internal+external)', 'correct use case', sum(l_times)/len(l_times)])
#     print(l_comparison[-1])
#     l_times = time_validation(data_graph_incorrect, shape, ont_graph_custom+ont_graph_alignment, inference='owlrl')
#     l_comparison.append([shape.lstrip('\n').split('\n')[0], 'OWL-RL inference (internal+external)', 'incorrect use case', sum(l_times)/len(l_times)])
#     print(l_comparison[-1])
#
# # get all methods from list
# print("RESULTS:")
# cc_methods = set(x[1] for x in l_comparison)
# for m in cc_methods:
#   measurements = [x[-1] for x in l_comparison if x[1] == m]
#   print(m, sum(measurements)/len(measurements))

**RESULTS:**


| Method | Average runtime |
|:-------|:---------:|
| no inference | 0.59s |
| materialized | 1.35s |
| RDFS inference (internal only) | 6.12s |
| OWL-RL inference (internal only) | 6.54s |
| RDFS inference (internal+external)|  190s |
| OWL-RL inference (internal+external)|  202s |

*Materialization of the data graph using internal+external ontologies only took approximately 7 seconds, which means that materialization is a lot faster even for running a single SHACL shape.*

*Also, performing real-time inference using OWL-RL instead of RDFS can take a lot longer, while materialization using OWL-RL instead of RDFS does not require extra time.*