# Sample Skyspark VAV Validation

# 1) Setup

## Imports

In [2]:
# ----------------------------------------
# Imports
# ----------------------------------------
import os

from rdflib import Namespace, SH, RDF, BNode
from pyshacl import validate

from tasty import constants as tc
from tasty import graphs as tg

## Inputs
Define the key variables and input information here

***Items to Change***
- `SHAPE`: this is the name of the SHACL equipment shape against which you would like to validate your sample equipment in the instance data
- `SAMPLE`: this is the name of the sample equipment in your instance data
- `input_namespace_uri`: this is the namespace uri used for your sample equipment in the instance data
- `data_graph_filename`: this is the filename/filepath of the instance data for the data graph
- `shapes_graph_filename`: this it the filename/filepath of the SHACL shapes data for the shape graph 
***Remaining Items*** </br>
These items should be okay as is, but can be changed if need be. If you are printing out results, <u>*make sure that the output directory exists in your local file structure*</u>.
- `output_directory`: this is the directory where output files will be printed to below
- `tasty_main_directory`: this is the absolute path of the main tasty directory. It should just be the parent directory of the current working directory.

In [3]:
# ----------------------------------------
# User Defined Variables
# ----------------------------------------

SHAPE = 'NREL-VAV-SD-Cooling-Only-Shape'
SAMPLE = '214466de-7abb28a7'
input_namespace_uri = 'urn:/_#'

raw_data_graph_filename = 'tests/files/data/sample_skyspark_vav_raw.ttl'
data_graph_filename = 'tests/files/data/sample_skyspark_vav_clean.ttl'
shapes_graph_filename = 'tasty/generated_shapes/haystack_all.ttl'

output_directory = os.path.join(os.path.abspath(''), 'example_data/output')
tasty_main_directory = os.path.join(os.path.abspath(''), '../')
# print(tasty_main_directory)

### Pre-Process Raw Input File

In [4]:
import re

# read in the file
f1 = os.path.join(tasty_main_directory, raw_data_graph_filename)
with open(f1, 'r') as raw_file:
    filedata = raw_file.read()

# remove date-time fields in the middle of the definition
filedata = re.sub('\n.*\^{2}xsd:dateTime.*;', '', filedata)
# remove date-time fields at the end of the definition
filedata = re.sub(';\n.*\^{2}xsd:dateTime.*.', '.', filedata)

# add urn namespace to graph
filedata = re.sub('@prefix', '@prefix _: <urn:/_#> .\n@prefix', filedata, count = 1)

# delete the project haystack namespaces
filedata = re.sub('/3.9.9', '/3.9.10', filedata)

print(filedata)
f2 = os.path.join(tasty_main_directory, data_graph_filename)
with open(f2, 'w') as clean_file:
    clean_file.write(filedata)


@prefix _: <urn:/_#> .
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
@prefix owl: <http://www.w3.org/2002/07/owl#> .
@prefix core: <https://skyfoundry.com/def/core/3.0.27#> .
@prefix conn: <https://skyfoundry.com/def/conn/3.0.27#> .
@prefix his: <https://skyfoundry.com/def/his/3.0.27.1#> .
@prefix ph: <https://project-haystack.org/def/ph/3.9.10#> .
@prefix bacnet: <https://skyfoundry.com/def/bacnet/3.0.27#> .
@prefix phScience: <https://project-haystack.org/def/phScience/3.9.10#> .
@prefix phIoT: <https://project-haystack.org/def/phIoT/3.9.10#> .
@prefix point: <https://skyfoundry.com/def/point/3.0.27#> .
@prefix sql: <https://skyfoundry.com/def/sql/3.0.27#> .

ph:hasTag a owl:ObjectProperty ;
    rdfs:range ph:marker .

_:211c90ab-701fa511 a bacnet:bacnetPoint ;
    ph:hasTag phScience:air,
        bacnet:bacnetPoint,
        phIoT:his,
        phIoT:point,
       

# 2) Main Code

## Definitions
This defines additional variables and helper functions to be used below

In [5]:
# ----------------------------------------
# Variables and Constants
# ----------------------------------------

NAMESPACE = Namespace(input_namespace_uri)
shape_name = tc.PH_SHAPES_NREL[SHAPE]
target_node = NAMESPACE[SAMPLE]

POINT = Namespace("https://skyfoundry.com/def/point/3.0.27#")
BACNET = Namespace("https://skyfoundry.com/def/bacnet/3.0.27#")

# valid_tag_nodes = [tc.PH_3_9_9["zone"], "air", "temp", "sensor", "sp", "cmd", "discharge", "damper", "humidity", "co2", "occupied",
#                            "occupancyIndicator", "cooling", "heating", "effective", "occ", "unocc", "standby", "operating", "mode", "request"
#                            "leaving", "entering", "flow", "min", "max", "pressure", ]

invalid_tags = [tc.PHIOT_3_9_10["point"], tc.PHIOT_3_9_10["his"], POINT["hisCollectCov"], tc.PHIOT_3_9_10["cur"], BACNET["bacnetPoint"]]

# ----------------------------------------
# Helper Function Definitions
# ----------------------------------------

def get_data_graph():
    n = tg.get_versioned_graph(tc.HAYSTACK, tc.V3_9_10)
    f = os.path.join(tasty_main_directory, data_graph_filename)
    n.parse(f, format='turtle')
    return n


def get_shapes_graph():
    g = tg.get_versioned_graph(tc.HAYSTACK, tc.V3_9_10)
    f = os.path.join(tasty_main_directory, shapes_graph_filename)
    g.parse(f, format='turtle')
    return g


def print_graph_to_file(g, filename):
    output_filename = os.path.join(output_directory, filename + ".ttl")
    g.serialize(output_filename, format='turtle')


def print_graph(g):
    print(g.serialize(format='turtle').decode('utf-8'))

## Generate Graphs

### Create Data, Shapes, and Ontology Graphs 
Create the data and shapes graph using the helper functions defined above. The data and shapes graphs are generated using rdflib's `parse` function to import the graphs defined in `data_graph_filename` and the `shapes_graph_filename` respectively. The ontology graph is generated by the `load_ontology` method from tasty's `graphs` module (imported as `tg`).

In [6]:
# ----------------------------------------
# Generate Graphs
# ----------------------------------------

# Data Graph
data_graph = get_data_graph()
print("...loaded data graph")

# Shapes Graph
shapes_graph = get_shapes_graph()
print("...loaded shapes graph")

# Ontology Graph
ont_graph = tg.load_ontology(tc.HAYSTACK, tc.V3_9_10)
print("...loaded ontology graph")


...loaded data graph
...loaded shapes graph
...loaded ontology graph


### Post Process of Data Graph

Notes: 
1) need to fix prefixes from 9 to 10
2) need to delete time-date fields
3) need to get proper namespace

#### a) Bind Default Namespace to Graph
The default namespace generated by skyspark appears to be an underscore "\_". There is no prefix namespace associated with this on the output .ttl graph from skyspark, so there is no official URI. For the purpose of this excercise, we will use "urn:\/\_#", but we may wish to revisit this. The nasmespace is defined above under the "Inputs" section. 

In [9]:
# data_graph.bind("_", NAMESPACE)

# # fix project-haystack namespaces
# data_graph.bind("ph", tc.PH_3_9_10, replace = True)
# data_graph.bind("phScience", tc.PHSCIENCE_3_9_10, replace = True)
# data_graph.bind("phIoT", tc.PHIOT_3_9_10, replace = True)

#### a) Keep Only Valid Tags in Data Graph

In [7]:
# keep only valid tags
for s1, p1, o1 in data_graph.triples((None, tc.PHIOT_3_9_10["equipRef"], target_node)):
    print(s1)
    for s, p, o in data_graph.triples((s1, tc.PH_3_9_10["hasTag"], None)):
        if o in invalid_tags:
            data_graph.remove((s, p, o))

urn:/_#211c90ab-701fa511
urn:/_#211c90b6-0da18cd7
urn:/_#211c90b6-52117d4d
urn:/_#22f5d821-8e310c3e
urn:/_#211c90b7-f2430aa1


#### b) Add First Class Point Type

In [8]:
from tasty import point_mapper as pm

# load the point tree
pt = pm.PointTree('schemas/haystack/defs_3_9_10.ttl', 'point')
root = pt.get_root()
print(len(root.children))

# Start by getting all backnet points
for s, p, o in data_graph.triples((None, RDF.type, BACNET['bacnetPoint'])):
    print(f"Point: {s}")
    print(f"Tags: ")
    
    # get the tags for this point
    tags = []
    for s1,p1,o1 in data_graph.triples((s, tc.PH_3_9_10["hasTag"], None)):
        tag = o1[o1.find('#')+1:]
        print(f"\t{tag}")
        tags.append(tag)
    
    # now determine first class point type
    fc_point = pt.determine_first_class_point_type(root, tags)
    print(f"\tFirst Class Entity Type: {fc_point.type}\n")
    
    # add first class point type as class to the point
    data_graph.add((s, RDF.type,tc.PHIOT_3_9_10[fc_point.type]))
    # remove the tags associated with first class point
    for tag in fc_point.tags:
        # using all three namespaces because i do not know which is correct
        # TODO: develop method for determining proper namespace
        data_graph.remove((s, tc.PH_3_9_10["hasTag"], tc.PHIOT_3_9_10[tag]))
        data_graph.remove((s, tc.PH_3_9_10["hasTag"], tc.PHSCIENCE_3_9_10[tag]))
        data_graph.remove((s, tc.PH_3_9_10["hasTag"], tc.PH_3_9_10[tag]))
    
    

8
Point: urn:/_#211c90b6-52117d4d
Tags: 
	temp
	zone
	air
	sp
	First Class Entity Type: air-temp-sp

Point: urn:/_#211c90ab-701fa511
Tags: 
	temp
	zone
	sensor
	air
	First Class Entity Type: air-temp-sensor

Point: urn:/_#211c90b7-f2430aa1
Tags: 
	sensor
	unocc
	First Class Entity Type: point

Point: urn:/_#211c90b6-0da18cd7
Tags: 
	discharge
	cmd
	air
	damper
	First Class Entity Type: point



In [9]:
print_graph(data_graph)

@prefix bacnet: <https://skyfoundry.com/def/bacnet/3.0.27#> .
@prefix conn: <https://skyfoundry.com/def/conn/3.0.27#> .
@prefix core: <https://skyfoundry.com/def/core/3.0.27#> .
@prefix his: <https://skyfoundry.com/def/his/3.0.27.1#> .
@prefix owl: <http://www.w3.org/2002/07/owl#> .
@prefix p_: <urn:/_#> .
@prefix ph: <https://project-haystack.org/def/ph/3.9.10#> .
@prefix phIoT: <https://project-haystack.org/def/phIoT/3.9.10#> .
@prefix phScience: <https://project-haystack.org/def/phScience/3.9.10#> .
@prefix point: <https://skyfoundry.com/def/point/3.0.27#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix sql: <https://skyfoundry.com/def/sql/3.0.27#> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

ph:hasTag a owl:ObjectProperty ;
    rdfs:range ph:marker .

p_:211c90ab-701fa511 a phIoT:air-temp-sensor,
        bacnet:bacnetPoint ;
    rdfs:label "S&TF UFVAV-3 Zone Temp" ;
    ph:hasTag phIoT:zone ;
    ph:kind "Number" ;
    ph:tz "Denver" ;
    ph:unit "°F" ;


### Add Sample Equipment as Target Node

First we add a triple to the shapes graph:
- The **subject** is the SHACL equipment shape
- The **predicate** is `sh:targetNode`
- The **object** is the sample equipment

This indicates that the sample shape should conform to the overall SHACL equipment shape 

In [10]:
# add Instance Equipment as target node to SHACL Equipment Shape
shapes_graph.add((shape_name, SH.targetNode, target_node))
print(f"\tadded '{target_node}' as target node to {shape_name}")

	added 'urn:/_#214466de-7abb28a7' as target node to https://project-haystack.org/datashapes/nrel#NREL-VAV-SD-Cooling-Only-Shape


Next we iterate over all *nodes* of the SHACL equipment shape using rdflidb's `triples()` function which supports basic triple pattern matching ([see documentation here](https://rdflib.readthedocs.io/en/stable/intro_to_graphs.html)). For each triple with a subject of the SHACL equipment shape and predicate of `sh:node`, we take the object (i.e. all of the functional group shapes which constitute the equipment shape) and add the sample equipment as a target node to these shapes. This is done so that the validation results will identify specific points that fail to validate, rather than simply functional group shapes.</br>
So for each *node* (functional group shape) add a triple to the shapes graph:
- The **subject** is the *node* (functional group shape)
- The **predicate** is `sh:targetNode`
- The **object** is the sample equipment

Ultimately, this means we are indicating that the sample equipment should conform to each of these functional group shapes independently. Note that this is acceptable currently because there is no `maxCount` on the functional group shape's `equipRef` path. 

In [11]:
# add Instance Equipment as target node to SHACL Functional Groups Shapes
for s1, p1, o1 in shapes_graph.triples((shape_name, SH.node, None)):
    shapes_graph.add((o1, SH.targetNode, target_node))
    print(f"\tadded '{target_node}' as target node to {o1}")

	added 'urn:/_#214466de-7abb28a7' as target node to https://project-haystack.org/datashapes/nrel#ZoneModeControlShape
	added 'urn:/_#214466de-7abb28a7' as target node to https://project-haystack.org/datashapes/nrel#ZoneHeatingSetpointsShape
	added 'urn:/_#214466de-7abb28a7' as target node to https://project-haystack.org/datashapes/nrel#ZoneCoolingSetpointsShape
	added 'urn:/_#214466de-7abb28a7' as target node to https://project-haystack.org/datashapes/nrel#AirTemperatureControlShape
	added 'urn:/_#214466de-7abb28a7' as target node to https://project-haystack.org/datashapes/nrel#ZoneOccupancySensorsShape
	added 'urn:/_#214466de-7abb28a7' as target node to https://project-haystack.org/datashapes/nrel#ZoneThermalComfortSensorsShape
	added 'urn:/_#214466de-7abb28a7' as target node to https://project-haystack.org/datashapes/nrel#VAVAirFlowControlShape
	added 'urn:/_#214466de-7abb28a7' as target node to https://project-haystack.org/datashapes/nrel#ZoneDemandControlledVentilationShape


# 3) Validation

## PySHACL Validation

In [12]:
# ----------------------------------------
# Run pySCHACL Validation
# ----------------------------------------
result = validate(data_graph, shacl_graph=shapes_graph, ont_graph=ont_graph)
conforms, results_graph, results = result

print(f"Conforms: {conforms}")

Conforms: False


In [13]:
print_graph(results_graph)

@prefix nrel: <https://project-haystack.org/datashapes/nrel#> .
@prefix phIoT: <https://project-haystack.org/def/phIoT/3.9.10#> .
@prefix phShapes: <https://project-haystack.org/datashapes/core#> .
@prefix sh: <http://www.w3.org/ns/shacl#> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

[] a sh:ValidationReport ;
    sh:conforms false ;
    sh:result [ a sh:ValidationResult ;
            sh:focusNode <urn:/_#214466de-7abb28a7> ;
            sh:resultPath [ sh:inversePath phIoT:equipRef ] ;
            sh:resultSeverity sh:Violation ;
            sh:sourceConstraintComponent sh:QualifiedMinCountConstraintComponent ;
            sh:sourceShape [ sh:path [ sh:inversePath phIoT:equipRef ] ;
                    sh:qualifiedMaxCount 1 ;
                    sh:qualifiedMinCount 1 ;
                    sh:qualifiedValueShape phShapes:zone-air-co2-sp-shape ;
                    sh:qualifiedValueShapesDisjoint true ] ],
        [ a sh:ValidationResult ;
            sh:focusNode <urn:/_#214

## Determine Missing Points
This implements a rudimentary logic for finding the missing points (simple shapes) from the pySHACL results graph. The process is as follows:
1. Find each "validation result" which represents one SHACL constraint that was not met. This is done by iterating through all the triples in the graph and finding the triple with a `rdf:type` of `sh:ValidationResult`. The subject of this match will be the URI of the "validation result" node.
2. For each of these "validation results" look at the `sh:sourceShape`
3. If it is a BNode (as opposed to a URI) then we assume this refers to one of the constraints on the functional group SHACL shape (and therefore one of the "simple shapes") and it will have a `sh:qualifiedValueShape` which should be a URI of one of the simple shapes.
4. Add this shape to the list of missing points

*Note: this logic likely needs to be refined*

In [14]:
missing_points = []

# Find the Validation Results
for subject, predicate, object in results_graph.triples((None, RDF.type, SH.ValidationResult)):
#     print(f"Subject:{subject}\tPredicate:{predicate}\tObject:{object}")

    # check if Validation result points to a BNode
    for node in results_graph.objects(subject=subject, predicate=SH.sourceShape):
#         print(f"\tNode:{node}\t\tIs BNode:{isinstance(node, BNode)}")

        if isinstance(node, BNode):
            point = results_graph.value(subject=node, predicate=SH.qualifiedValueShape)
            missing_points.append(point)

if len(missing_points) <= 0:
    print("No Points Missing")
else:
    print(f"{len(missing_points)} Missing Points:")
    for point in missing_points:
#         for subject, predicate, object in shapes_graph.triples((point, SH.class, None)):
#             print(f"Subject:{subject}\tPredicate:{predicate}\tObject:{object}")
        print(f"\t{point}")
            

26 Missing Points:
	https://project-haystack.org/datashapes/core#zone-air-co2-sensor-shape
	https://project-haystack.org/datashapes/core#zone-air-co2-sp-shape
	https://project-haystack.org/datashapes/nrel#MinimumHeatingDischargeAirFlowSetpointShape
	https://project-haystack.org/datashapes/nrel#MaximumHeatingDischargeAirFlowSetpointShape
	https://project-haystack.org/datashapes/nrel#DischargeAirFlowShape
	https://project-haystack.org/datashapes/nrel#DischargeAirFlowSetpointShape
	https://project-haystack.org/datashapes/nrel#DischargeAirDamperFeedbackShape
	https://project-haystack.org/datashapes/nrel#MinimumCoolingDischargeAirFlowSetpointShape
	https://project-haystack.org/datashapes/nrel#MaximumCoolingDischargeAirFlowSetpointShape
	https://project-haystack.org/datashapes/nrel#ZoneRelativeHumidityShape
	https://project-haystack.org/datashapes/nrel#ZoneTemperatureCoolingStandbySetpointShape
	https://project-haystack.org/datashapes/nrel#ZoneTemperatureCoolingEffectiveSetpointShape
	https:

## Print pySHACL Graphs and Results to File (Optional) 

In [24]:
# ----------------------------------------
# Print Output Files
# ----------------------------------------
# Print Results to File
fn = os.path.join(output_directory, "results.txt")
f = open(fn, "w")
f.write(results)
f.close()
print("...printed results")

# Print Graphs to File(s)
print_graph_to_file(data_graph, "data_graph")
print("...printed data graph")
print_graph_to_file(shapes_graph, "shapes_graph")
print("...printed shapes graph")
print_graph_to_file(results_graph, "results_graph")
print("...printed results graph")

...printed results
...printed data graph
...printed shapes graph
...printed results graph


## 3b) Brick Validation (Optional)
Brickscehma uses pyshacl for validation, so it gives us the same result. In this case, we just passed in the shapes graph directly, so this is not actually testing conformance against an actual brick model or using the brick schema in any significant way.

In [25]:
# ----------------------------------------
# Run BrickSchema Validation
# ----------------------------------------

from brickschema import Graph

# Set Up Graphs
dg = Graph()
df = os.path.join(tasty_main_directory, data_graph_filename)
dg.load_file(df)

sg = Graph()
sf = os.path.join(tasty_main_directory, shapes_graph_filename)
sg.load_file(sf)

valid, _, report = dg.validate(shape_graphs=[sg])
print(f"Brick Validation - Conforms: {valid}")
if not valid:
    print(report)

Brick Validation - Conforms: True
