# 1) Setup

## Imports

In [12]:
# ----------------------------------------
# Imports
# ----------------------------------------
import os

from rdflib import Namespace, SH, RDF, BNode
from pyshacl import validate

from tasty import constants as tc
from tasty import graphs as tg

## Inputs
Define the key variables and input information here

***Items to Change***
- `SHAPE`: this is the name of the SHACL equipment shape against which you would like to validate your sample equipment in the instance data
- `SAMPLE`: this is the name of the sample equipment in your instance data
- `input_namespace_uri`: this is the namespace uri used for your sample equipment in the instance data
- `data_graph_filename`: this is the filename/filepath of the instance data for the data graph
- `shapes_graph_filename`: this it the filename/filepath of the SHACL shapes data for the shape graph 
***Remaining Items*** </br>
These items should be okay as is, but can be changed if need be. If you are printing out results, <u>*make sure that the output directory exists in your local file structure*</u>.
- `output_directory`: this is the directory where output files will be printed to below
- `tasty_main_directory`: this is the absolute path of the main tasty directory. It should just be the parent directory of the current working directory.

In [13]:
# ----------------------------------------
# User Defined Variables
# ----------------------------------------

SHAPE = 'NREL-VAV-Test-Shape'
SAMPLE = 'NREL-Equip-Test'
input_namespace_uri = 'urn:sample/'

data_graph_filename = 'examples/example_data/input/example_data.ttl'
shapes_graph_filename = 'examples/example_data/input/example_shapes.ttl'

output_directory = os.path.join(os.path.abspath(''), 'example_data/output')
tasty_main_directory = os.path.join(os.path.abspath(''), '../')
# print(tasty_main_directory)

# 2) Main Code

## Definitions
This defines additional variables and helper functions to be used below

In [14]:
# ----------------------------------------
# Variables and Constants
# ----------------------------------------

NAMESPACE = Namespace(input_namespace_uri)
shape_name = tc.PH_SHAPES_NREL[SHAPE]
target_node = NAMESPACE[SAMPLE]

# ----------------------------------------
# Helper Function Definitions
# ----------------------------------------

def get_data_graph():
    n = tg.get_versioned_graph(tc.HAYSTACK, tc.V3_9_10)
    f = os.path.join(tasty_main_directory, data_graph_filename)
    n.parse(f, format='turtle')
    return n


def get_shapes_graph():
    g = tg.get_versioned_graph(tc.HAYSTACK, tc.V3_9_10)
    f = os.path.join(tasty_main_directory, shapes_graph_filename)
    g.parse(f, format='turtle')
    return g


def print_graph_to_file(g, filename):
    output_filename = os.path.join(output_directory, filename + ".ttl")
    g.serialize(output_filename, format='turtle')


def print_graph(g):
    print(g.serialize(format='turtle').decode('utf-8'))

## Generate Graphs

### Create Data, Shapes, and Ontology Graphs 
Create the data and shapes graph using the helper functions defined above. The data and shapes graphs are generated using rdflib's `parse` function to import the graphs defined in `data_graph_filename` and the `shapes_graph_filename` respectively. The ontology graph is generated by the `load_ontology` method from tasty's `graphs` module (imported as `tg`).

In [15]:
# ----------------------------------------
# Generate Graphs
# ----------------------------------------

# Data Graph
data_graph = get_data_graph()
print("...loaded data graph")

# Shapes Graph
shapes_graph = get_shapes_graph()
print("...loaded shapes graph")

# Ontology Graph
ont_graph = tg.load_ontology(tc.HAYSTACK, tc.V3_9_10)
print("...loaded ontology graph")


...loaded data graph
...loaded shapes graph
...loaded ontology graph


### Add Sample Equipment as Target Node

First we add a triple to the shapes graph:
- The **subject** is the SHACL equipment shape
- The **predicate** is `sh:targetNode`
- The **object** is the sample equipment

This indicates that the sample shape should conform to the overall SHACL equipment shape 

In [16]:
# add Instance Equipment as target node to SHACL Equipment Shape
shapes_graph.add((shape_name, SH.targetNode, target_node))
print(f"\tadded '{target_node}' as target node to {shape_name}")

	added 'urn:sample/NREL-Equip-Test' as target node to https://project-haystack.org/datashapes/nrel#NREL-VAV-Test-Shape


Next we iterate over all *nodes* of the SHACL equipment shape using rdflidb's `triples()` function which supports basic triple pattern matching ([see documentation here](https://rdflib.readthedocs.io/en/stable/intro_to_graphs.html)). For each triple with a subject of the SHACL equipment shape and predicate of `sh:node`, we take the object (i.e. all of the functional group shapes which constitute the equipment shape) and add the sample equipment as a target node to these shapes. This is done so that the validation results will identify specific points that fail to validate, rather than simply functional group shapes.</br>
So for each *node* (functional group shape) add a triple to the shapes graph:
- The **subject** is the *node* (functional group shape)
- The **predicate** is `sh:targetNode`
- The **object** is the sample equipment

Ultimately, this means we are indicating that the sample equipment should conform to each of these functional group shapes independently. Note that this is acceptable currently because there is no `maxCount` on the functional group shape's `equipRef` path. 

In [17]:
# add Instance Equipment as target node to SHACL Functional Groups Shapes
for s1, p1, o1 in shapes_graph.triples((shape_name, SH.node, None)):
    shapes_graph.add((o1, SH.targetNode, target_node))
    print(f"\tadded '{target_node}' as target node to {o1}")

	added 'urn:sample/NREL-Equip-Test' as target node to https://project-haystack.org/datashapes/nrel#TestFunctionalShape1
	added 'urn:sample/NREL-Equip-Test' as target node to https://project-haystack.org/datashapes/nrel#TestFunctionalShape2


# 3) Validation

## PySHACL Validation

In [18]:
# ----------------------------------------
# Run pySCHACL Validation
# ----------------------------------------
result = validate(data_graph, shacl_graph=shapes_graph, ont_graph=ont_graph)
conforms, results_graph, results = result

print(f"Conforms: {conforms}")

Conforms: False


In [19]:
print_graph(results_graph)

@prefix nrel: <https://project-haystack.org/datashapes/nrel#> .
@prefix phIoT: <https://project-haystack.org/def/phIoT/3.9.10#> .
@prefix sh: <http://www.w3.org/ns/shacl#> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

[] a sh:ValidationReport ;
    sh:conforms false ;
    sh:result [ a sh:ValidationResult ;
            sh:focusNode <urn:sample/NREL-Equip-Test> ;
            sh:resultMessage "Value does not conform to every Shape in ('nrel:TestFunctionalShape1', 'nrel:TestFunctionalShape1')" ;
            sh:resultSeverity sh:Violation ;
            sh:sourceConstraintComponent sh:NodeConstraintComponent ;
            sh:sourceShape nrel:NREL-VAV-Test-Shape ;
            sh:value <urn:sample/NREL-Equip-Test> ],
        [ a sh:ValidationResult ;
            sh:focusNode <urn:sample/NREL-Equip-Test> ;
            sh:resultPath [ sh:inversePath phIoT:equipRef ] ;
            sh:resultSeverity sh:Violation ;
            sh:sourceConstraintComponent sh:QualifiedMinCountConstraintComp

## Determine Missing Points
This implements a rudimentary logic for finding the missing points (simple shapes) from the pySHACL results graph. The process is as follows:
1. Find each "validation result" which represents one SHACL constraint that was not met. This is done by iterating through all the triples in the graph and finding the triple with a `rdf:type` of `sh:ValidationResult`. The subject of this match will be the URI of the "validation result" node.
2. For each of these "validation results" look at the `sh:sourceShape`
3. If it is a BNode (as opposed to a URI) then we assume this refers to one of the constraints on the functional group SHACL shape (and therefore one of the "simple shapes") and it will have a `sh:qualifiedValueShape` which should be a URI of one of the simple shapes.
4. Add this shape to the list of missing points

*Note: this logic likely needs to be refined*

In [20]:
missing_points = []

# Find the Validation Results
for subject, predicate, object in results_graph.triples((None, RDF.type, SH.ValidationResult)):
#     print(f"Subject:{subject}\tPredicate:{predicate}\tObject:{object}")

    # check if Validation result points to a BNode
    for node in results_graph.objects(subject=subject, predicate=SH.sourceShape):
#         print(f"\tNode:{node}\t\tIs BNode:{isinstance(node, BNode)}")

        if isinstance(node, BNode):
            point = results_graph.value(subject=node, predicate=SH.qualifiedValueShape)
            missing_points.append(point)

if len(missing_points) <= 0:
    print("No Points Missing")
else:
    print("Missing Points:")
    for point in missing_points:
        for subject, predicate, object in shapes_graph.triples((point, SH.ClassConstraintComponent, None)):
            print(f"Subject:{subject}\tPredicate:{predicate}\tObject:{object}")
#         print(f"\t{point}")
            

Missing Points:


## Print pySHACL Graphs and Results to File (Optional) 

In [21]:
# ----------------------------------------
# Print Output Files
# ----------------------------------------
# Print Results to File
fn = os.path.join(output_directory, "results.txt")
f = open(fn, "w")
f.write(results)
f.close()
print("...printed results")

# Print Graphs to File(s)
print_graph_to_file(data_graph, "data_graph")
print("...printed data graph")
print_graph_to_file(shapes_graph, "shapes_graph")
print("...printed shapes graph")
print_graph_to_file(results_graph, "results_graph")
print("...printed results graph")

...printed results
...printed data graph
...printed shapes graph
...printed results graph


## 3b) Brick Validation (Optional)
Brickscehma uses pyshacl for validation, so it gives us the same result. In this case, we just passed in the shapes graph directly, so this is not actually testing conformance against an actual brick model or using the brick schema in any significant way.

In [22]:
# ----------------------------------------
# Run BrickSchema Validation
# ----------------------------------------

from brickschema import Graph

# Set Up Graphs
dg = Graph()
df = os.path.join(tasty_main_directory, data_graph_filename)
dg.load_file(df)

sg = Graph()
sf = os.path.join(tasty_main_directory, shapes_graph_filename)
sg.load_file(sf)

valid, _, report = dg.validate(shape_graphs=[sg])
print(f"Brick Validation - Conforms: {valid}")
if not valid:
    print(report)

Brick Validation - Conforms: False
Validation Report
Conforms: False
Results (1):
Constraint Violation in NodeConstraintComponent (http://www.w3.org/ns/shacl#NodeConstraintComponent):
	Severity: sh:Violation
	Source Shape: nrel:NREL-VAV-Test-Shape
	Focus Node: sample:NREL-Equip-Test
	Value Node: sample:NREL-Equip-Test
	Message: Value does not conform to every Shape in ('nrel:TestFunctionalShape1', 'nrel:TestFunctionalShape1')

