In [59]:
import pandas as pd

## Load SPARQL data

```sparql
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
PREFIX tw: <http://www.toronto.ca/TWONTO#>
SELECT ?SPARQL ?Avantis
WHERE { ?SPARQL tw:is_equivalent_to_Avantis_class ?Avantis }
```

```sparql
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
PREFIX tw: <http://www.toronto.ca/TWONTO#>
SELECT ?SPARQL ?Avantis
	WHERE { ?SPARQL tw:is_superclass_of_avantis_class ?Avantis }
```

In [60]:
df = pd.read_excel("SPARQL_class.xlsx", header= 1)
df["Avantis_Class"] = df["Avantis_Class"].str.split("\n")
df = df.explode("Avantis_Class").reset_index(drop=True)
df = df.map(lambda x: x.replace("_x000D_", "") if isinstance(x, str) else x)
Class = dict(zip(df['Avantis_Class'],df['TWONTO']))

df = pd.read_excel("SPARQL_superclass.xlsx", header= 1)
df["Super_Class"] = df["Super_Class"].str.split("\n")
df = df.explode("Super_Class").reset_index(drop=True)
df = df.map(lambda x: x.replace("_x000D_", "") if isinstance(x, str) else x)
Super_Class = dict(zip(df['Super_Class'],df['TWONTO']))

df = pd.read_excel("SPARQL_tag.xlsx", header= 1)
df["Tag"] = df["Tag"].str.split("\n")
df = df.explode("Tag").reset_index(drop=True)
df = df.map(lambda x: x.replace("_x000D_", "") if isinstance(x, str) else x)
Tag = dict(zip(df['Tag'],df['TWONTO']))

df = pd.read_excel("SPARQL_supertag.xlsx", header= 1)
df["Super_Tag"] = df["Super_Tag"].str.split("\n")
df = df.explode("Super_Tag").reset_index(drop=True)
df = df.map(lambda x: x.replace("_x000D_", "") if isinstance(x, str) else x)
Super_Tag = dict(zip(df['Super_Tag'],df['TWONTO']))

df = pd.read_excel("SPARQL_category.xlsx", header= 1)
df["Category"] = df["Category"].str.split("\n")
df = df.explode("Category").reset_index(drop=True)
df = df.map(lambda x: x.replace("_x000D_", "") if isinstance(x, str) else x)
Category = dict(zip(df['Category'],df['TWONTO']))

df = pd.read_excel("SPARQL_supercategory.xlsx", header= 1)
df["Super_Category"] = df["Super_Category"].str.split("\n")
df = df.explode("Super_Category").reset_index(drop=True)
df = df.map(lambda x: x.replace("_x000D_", "") if isinstance(x, str) else x)
Super_Category = dict(zip(df['Super_Category'],df['TWONTO']))

Tag

{'HU': 'http://www.toronto.ca/TWONTO#humidier',
 'CHL': 'http://www.toronto.ca/TWONTO#chlorinator_system',
 'OZ': 'http://www.toronto.ca/TWONTO#ozone_generator',
 'GB': 'http://www.toronto.ca/TWONTO#gearbox',
 'ALR': 'http://www.toronto.ca/TWONTO#alarm_device',
 'CLR': 'http://www.toronto.ca/TWONTO#clarifier',
 'AE': 'http://www.toronto.ca/TWONTO#instrument_element',
 'FM': 'http://www.toronto.ca/TWONTO#pressurized_sewer_segment',
 'G': 'http://www.toronto.ca/TWONTO#channel_gate',
 'PDP': 'http://www.toronto.ca/TWONTO#600V_insulated_case_electrical_cabinet',
 'SCAM': 'http://www.toronto.ca/TWONTO#security_camera',
 'AMP': 'http://www.toronto.ca/TWONTO#public_annoucement_speaker',
 'PA': 'http://www.toronto.ca/TWONTO#public_annoucement_speaker',
 'EV': 'http://www.toronto.ca/TWONTO#AC_evaporator_unit',
 'D': 'http://www.toronto.ca/TWONTO#dehumidifier',
 'CU': 'http://www.toronto.ca/TWONTO#ac_condenser_unit',
 'PDM': 'http://www.toronto.ca/TWONTO#pulsation_dampener',
 'CRN': 'http://www.

In [61]:
import pyodbc
import os
from sqlalchemy.engine import URL
from sqlalchemy import create_engine
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

sns.set_theme()

connect = 'DSN=Avantis6-P;UID='+ os.environ['Avantis_User'] + ';PWD=' + os.environ['Avantis_Pass']
connection_url = URL.create("mssql+pyodbc", query={"odbc_connect": connect})

engine = create_engine(connection_url)


In [77]:
SQL1 = "SELECT [id] as [Id], [contname] as [Class] ,[entclsid] as [Category]\
    FROM ([AvantisP].[mc].[MAINTENT] Full Join [AvantisP].[mc].CATVAL ON MAINTENT.cat1_oi = CATVAL.cvoi) \
    Full Join [AvantisP].[mc].[ENTCLASS] on MAINTENT.entclsref_oi = ENTCLASS.entcloi"

df1 = pd.read_sql(SQL1,engine)
df1 = df1[df1['Id'].values != None]
df1['Tag'] = df1['Id'].str.extract(r"-([a-zA-Z]+)-\d+")
df1

Unnamed: 0,Id,Class,Category,Tag
0,ZDATA PILOT,,,
1,FHO WORK AREA 1,,,
2,FHO WORK AREA 2,,,
3,FHO-AUX,,,
4,FHO-GRK,,,
...,...,...,...,...
115653,TAB-DIG-LSL-8087,"Switch,Level-Low",Sensor,LSL
115654,TAB-DIG-P-1373A,Pump,Pump,P
115655,TAB-DIG-CP-1373,Control Panel,Control Panel,CP
115656,TAB-DIG-CP-1374A,Control Panel,Control Panel,CP


In [78]:
entityMatch = pd.DataFrame()
entityMatch['Id'] = df1['Id'] 
entityMatch['Class_Match'] = [Class.get(x,"") for x in df1['Class']]
entityMatch['Super_Class_Match'] = [Super_Class.get(x,"") for x in df1['Class']]
entityMatch['Tag_Match'] = [Tag.get(x,"") for x in df1['Tag']]
entityMatch['Super_Tag_Match'] = [Super_Tag.get(x,"") for x in df1['Tag']]
entityMatch['Category_Match'] = [Category.get(x,"") for x in df1['Category']]
entityMatch['Super_Category_Match'] = [Super_Category.get(x,"") for x in df1['Category']]
entityMatch

Unnamed: 0,Id,Class_Match,Super_Class_Match,Tag_Match,Super_Tag_Match,Category_Match,Super_Category_Match
0,ZDATA PILOT,,,,,,
1,FHO WORK AREA 1,,,,,,
2,FHO WORK AREA 2,,,,,,
3,FHO-AUX,,,,,,
4,FHO-GRK,,,,,,
...,...,...,...,...,...,...,...
115653,TAB-DIG-LSL-8087,,,,http://www.toronto.ca/TWONTO#level_switch,http://www.toronto.ca/TWONTO#instrument_element,
115654,TAB-DIG-P-1373A,http://www.toronto.ca/TWONTO#pump_-_without_drive,,http://www.toronto.ca/TWONTO#pump_-_without_drive,,http://www.toronto.ca/TWONTO#pump_-_without_drive,
115655,TAB-DIG-CP-1373,http://www.toronto.ca/TWONTO#process_control_p...,,http://www.toronto.ca/TWONTO#process_control_p...,,http://www.toronto.ca/TWONTO#process_control_p...,
115656,TAB-DIG-CP-1374A,http://www.toronto.ca/TWONTO#process_control_p...,,http://www.toronto.ca/TWONTO#process_control_p...,,http://www.toronto.ca/TWONTO#process_control_p...,
