# SPARQL Queries for Classes and Categories (DOP-59)
## Load data
The following loads the TWONTO data from the turtle file and the required libraies for data processing. 

In [9]:
from rdflib import Graph
import pandas as pd
g = Graph()
g.parse(r'../../GitHub/TWONTO/OWL/TWONTO.ttl')


<Graph identifier=Nbef9f97ed3e742149fce0989522310c4 (<class 'rdflib.graph.Graph'>)>

## Class Query
The following covers the code to query the turtle file for the class data. The scope of the assets are all the descendents of 'discrete asset' and 'system' with the following exclusions:
1. Assets with the 'tw:not_relevant_to_asset_or_maintenance_management' property 'true'
2. Assets with the 'tw:exclude_from_asset_class_list ' property 'true'
3. Assets with the 'owl:deprecated' property 'true'   

The output should have the following formatting:   

1. Class name
2. Synonyms (comma seperated in multiple)
3. Class definition
4. Class IRI
5. Parent IRI 

In [10]:
class_q ='''
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>  # Ensure SKOS is defined
PREFIX tw: <http://www.toronto.ca/TWONTO#>

SELECT 
    (STR(?label) AS ?Class_name)
    (GROUP_CONCAT (DISTINCT(STR(COALESCE(?synonym, ""))); SEPARATOR=", ") AS ?Class_synonym)  # If no synonym, return an empty string
    (STR(COALESCE(?def, "")) AS ?Class_definition)   # If no definition, return an empty string
    (STR(?sub) AS ?Class_IRI)
    (STR(?parent) AS ?Parent_IRI)  
WHERE {     
    ?sub rdfs:label ?label ;
        rdfs:subClassOf ?parent .
    ?discrete_asset rdfs:label 'discrete asset' .
    ?system rdfs:label 'system' .
    
    OPTIONAL { ?sub skos:altLabel ?synonym }  # Synonym is optional
    OPTIONAL { ?sub rdfs:isDefinedBy ?def }   # Definition is optional
    
    FILTER NOT EXISTS { ?sub owl:deprecated "true"^^xsd:boolean }
    FILTER NOT EXISTS { ?sub tw:exclude_from_asset_class_list "true"^^xsd:boolean }
    FILTER NOT EXISTS { ?sub tw:not_relevant_to_asset_or_maintenance_management "true"^^xsd:boolean }

    # Select only descendants of "Discrete Asset" and "System"
    ?sub rdfs:subClassOf+ ?ancestor .
    FILTER (?ancestor IN (?discrete_asset, ?system))
}
GROUP BY ?label ?def ?sub ?parent
'''
Class_name, Class_synonym, Class_definition, Class_IRI, Parent_IRI= [],[],[],[],[]

for x in g.query(class_q):
    Class_name.append(x['Class_name'].value)
    Class_synonym.append(x['Class_synonym'].value)
    Class_definition.append(x['Class_definition'].value)
    Class_IRI.append(x['Class_IRI'].value)
    Parent_IRI.append(x['Parent_IRI'].value)

Classes = pd.DataFrame({'Class_name':Class_name,
'Class_synonym':Class_synonym,
'Class_definition':Class_definition,
'Class_IRI':Class_IRI,
'Parent_IRI':Parent_IRI})
Classes

Unnamed: 0,Class_name,Class_synonym,Class_definition,Class_IRI,Parent_IRI
0,access controlled entry point,,,http://www.toronto.ca/TWONTO#mazos-pikaz-gapob...,http://www.toronto.ca/TWONTO#00017
1,access controlled vehicle gate system,,,http://www.toronto.ca/TWONTO#sufov-kojip-romig...,http://www.toronto.ca/TWONTO#mazos-pikaz-gapob...
2,distributed function system,,a system in a primary function is performed by...,http://www.toronto.ca/TWONTO#00016,http://www.toronto.ca/TWONTO#00012
3,localized function system,hierarchal system,a system in which the assets playing the same ...,http://www.toronto.ca/TWONTO#00017,http://www.toronto.ca/TWONTO#00012
4,system functional block,,A system part of a bigger localized function s...,http://www.toronto.ca/TWONTO#00018,http://www.toronto.ca/TWONTO#00017
...,...,...,...,...,...
381,inverter,,,http://www.toronto.ca/TWONTO#01919,http://www.toronto.ca/TWONTO#00298
382,process,,,http://www.toronto.ca/TWONTO#00491,http://www.toronto.ca/TWONTO#00720
383,facility,,,http://www.toronto.ca/TWONTO#00330,http://www.toronto.ca/TWONTO#00720
384,facility role,,,http://www.toronto.ca/TWONTO#00330,http://www.toronto.ca/TWONTO#00720


## Category Query
The following covers the code to query the turtle file for the category data. The scope of the assets are all the descendents of 'member of a Toronto Water Broader Asset Category (MBC)' with the following exclusions:
1. Asset with the 'tw:is_category_class' property 'true'
1. Assets with the 'tw:not_relevant_to_asset_or_maintenance_management' property 'true'
2. Assets with the 'tw:exclude_from_asset_class_list ' property 'true'
3. Assets with the 'owl:deprecated' property 'true'   

The output should have the following formatting:   
   
1. Class name
2. Synonyms (comma seperated in multiple)
3. Class definition
4. Class IRI
5. Parent IRI 

In [11]:
cat_q ='''
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>  # Ensure SKOS is defined
PREFIX tw: <http://www.toronto.ca/TWONTO#>

SELECT 
    (STR(?label) AS ?Cat_name)
    (GROUP_CONCAT (DISTINCT(STR(COALESCE(?synonym, ""))); SEPARATOR=", ") AS ?Cat_synonym)  # If no synonym, return an empty string
    (STR(COALESCE(?def, "")) AS ?Cat_definition)   # If no definition, return an empty string
    (STR(?sub) AS ?Cat_IRI)
    (STR(?parent) AS ?Parent_IRI)  
WHERE {     
    ?sub rdfs:label ?label ;
        rdfs:subClassOf ?parent .

    ?MBC rdfs:label 'member of a Toronto Water Broader Asset Category (MBC)' .
    ?sub rdfs:subClassOf+ ?MBC .
    ?sub tw:is_category_class "true"^^xsd:boolean .

    OPTIONAL { ?sub skos:altLabel ?synonym }  # Synonym is optional
    OPTIONAL { ?sub rdfs:isDefinedBy ?def }   # Definition is optional
    
    FILTER NOT EXISTS { ?sub owl:deprecated "true"^^xsd:boolean }
    FILTER NOT EXISTS { ?sub tw:exclude_from_asset_class_list "true"^^xsd:boolean }
    FILTER NOT EXISTS { ?sub tw:not_relevant_to_asset_or_maintenance_management "true"^^xsd:boolean }
    
}
GROUP BY ?label ?def ?sub ?parent
'''
Cat_name, Cat_synonym, Cat_definition, Cat_IRI, Parent_IRI= [],[],[],[],[]

for x in g.query(cat_q):
    Cat_name.append(x['Cat_name'].value)
    Cat_synonym.append(x['Cat_synonym'].value)
    Cat_definition.append(x['Cat_definition'].value)
    Cat_IRI.append(x['Cat_IRI'].value)
    Parent_IRI.append(x['Parent_IRI'].value)

Cats = pd.DataFrame({'Cat_name':Cat_name,
'Cat_synonym':Cat_synonym,
'Cat_definition':Cat_definition,
'Cat_IRI':Cat_IRI,
'Parent_IRI':Parent_IRI})
Cats

Unnamed: 0,Cat_name,Cat_synonym,Cat_definition,Cat_IRI,Parent_IRI
0,HVAC system component (MBC),,,http://www.toronto.ca/TWONTO#00108,http://www.toronto.ca/TWONTO#00408
1,boiler fitting,,"a valve, gauge, regulating or controlling devi...",http://www.toronto.ca/TWONTO#00184,http://www.toronto.ca/TWONTO#00358
2,chemical or concentration analyzer (MBC),,,http://www.toronto.ca/TWONTO#00208,http://www.toronto.ca/TWONTO#00520
3,construction or earthmoving machine (MBC),skid steer,,http://www.toronto.ca/TWONTO#00238,http://www.toronto.ca/TWONTO#00747
4,electrical distribution system component (MBC),,,http://www.toronto.ca/TWONTO#00298,http://www.toronto.ca/TWONTO#00408
5,fall protection system component (MBC),,,http://www.toronto.ca/TWONTO#00339,http://www.toronto.ca/TWONTO#00657
6,first aid equipment or component (MBC),,,http://www.toronto.ca/TWONTO#00354,http://www.toronto.ca/TWONTO#00657
7,pipe fitting (MBC),,,http://www.toronto.ca/TWONTO#00358,http://www.toronto.ca/TWONTO#00573
8,instrument air or pneumatic system component (...,,,http://www.toronto.ca/TWONTO#00444,http://www.toronto.ca/TWONTO#00803
9,landscape feature or piece of green infrastruc...,,,http://www.toronto.ca/TWONTO#00459,http://www.toronto.ca/TWONTO#00522
