In [1]:
#!pip install rdflib
#!pip install pydot2
#!pip install pydotplus

In [2]:
import rdflib
from rdflib.namespace import RDFS
from rdflib import URIRef, BNode, Literal
import re
from collections import defaultdict
import numpy as np
import pandas as pd
import json

In [3]:
def ns(url):
  url = url.replace("http://buildsys.org/ontologies/brick#", "")
  url = url.replace("http://buildsys.org/ontologies/brickFrame#", "")
  return url

BRICKF = rdflib.Namespace('http://buildsys.org/ontologies/brickFrame#')
BRICK  = rdflib.Namespace('http://buildsys.org/ontologies/brick#')
TAG    = rdflib.Namespace('http://buildsys.org/ontologies/brickTag#')
TAGSET = rdflib.Namespace('http://buildsys.org/ontologies/brickTagSet#')

g = rdflib.Graph()
g.bind('bf', BRICKF)
g.bind('tag', TAG)
g.bind('ts', TAGSET)
g.bind(':', BRICK)
result = g.parse('BrickFrame.ttl', format='n3')
print(len(g))

145


### Load Tag and TagSets from Definition

In [4]:
#r = requests.get('docs.google.com/feeds/download/spreadsheets/Export?key=1QTSu0UxJ7UqRvgTW2P1Q4qudoBbvPqXpEhYiulyjcro&exportFormat=xlsx')
#r.status_code

In [5]:
dfTags=pd.read_excel('src/Schema Engineering.xlsx',"Tags")
schemaTags=set(pd.unique(dfTags.Tag.dropna().ravel()))
len(schemaTags)
dfTags.head()

Unnamed: 0,Dimension,Tag,Definition,Related Terminologies,hasSynonym,Author,Reviewed,Review Count,Comments
0,Equipment>HVAC,HX,Heat Exchanger,,,,,,
1,Equipment>Fire Safety System,FCP,Fire Control Panel,,,,,,
2,Equipment>Water System,CWS,Chilled Water System,,,,,,
3,Equipment>Water System,DHWS,Domestic Hot Water System,,,,,,
4,Equipment>Water System,HWS,Hot Water System,,,,,,


In [6]:
dfTagSets=pd.read_excel('src/Schema Engineering.xlsx',"TagSets")
schemaTagSets=set()
for ts in pd.unique(dfTagSets.TagSet.dropna().ravel()):
  schemaTagSets.add(ts.replace(' ','_'))
for ts in pd.unique(dfTagSets.hasSynonym.dropna().ravel()):
  for ts2 in ts.split(","):
   schemaTagSets.add(ts2.replace(' ','_'))
for row in pd.unique(dfTagSets.usesEquipment.dropna().ravel()):
  for ts in row.split(';'):
    schemaTagSets.add(ts.replace(' ','_'))
for row in pd.unique(dfTagSets.isPartOf.dropna().ravel()):
  for ts in row.split(';'):
    schemaTagSets.add(ts.replace(' ','_'))
len(schemaTagSets)
dfTagSets.head()

Unnamed: 0,Dimension,TagSet,usesEquipment,isPartOf,hasSynonym,Definition,hasUnit,hasAbbreviation,can be named? (can be instantiated?),can be unnamed? (cannot be instantiated?),Author,Reviewed,Review Count,Discussion,usesPoint,usesLocation,usesMeasurement
0,Equipment,Elevator,,,,,,,,,,,,,,,
1,Equipment,Energy Storage,,,,,,,,,,,,,,,
2,Equipment,Fire Safety System,,,,,,,,,,,,,,,
3,Equipment,Heating Ventilation Air Conditioning System,,,HVAC,,,,,,,,,,,,
4,Equipment,Lighting System,,,,,,,,,,,,,,,


In [7]:
schemaUsedTags=set()
schemaTagSetTags={}
for ts in schemaTagSets:
  schemaUsedTags.update(ts.split('_'))
  schemaTagSetTags[ts]=set(ts.split('_'))
schemaMissingTags=schemaUsedTags - schemaTags
print("Missing Tags:" + str(len(schemaMissingTags)))
print(schemaMissingTags)

Missing Tags:1
{''}


In [8]:
def IndivName(name):
  return re.sub(r'\s','_',re.sub(r'[^\d\w\s]', '', name))

In [9]:
def getStr(s):
  if type(s)=='str' or type(s)=='unicode':
    return s
  else:
    return ''

### Write Tags

write header

In [10]:
nsTag   = ":"
fo = open('BrickTag.ttl', 'w')
fo.write("""@prefix owl: <http://www.w3.org/2002/07/owl#> .
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix skos: <http://www.w3.org/2004/02/skos/core#> .\n""")
fo.write("@prefix bf:  <http://buildsys.org/ontologies/BrickFrame#> .\n")
fo.write("@prefix : <http://buildsys.org/ontologies/BrickTag#> .\n\n")
fo.write("<http://buildsys.org/ontologies/BrickTag>  a owl:Ontology ;\n")
fo.write("\towl:imports <http://buildsys.org/ontologies/BrickFrame> ;\n")
fo.write('\trdfs:comment "Domain Tag Definition"@en .\n\n')

44

Write Tag Hierachy

In [11]:
brickTags={}
for hir in pd.unique(dfTags.Dimension.dropna().ravel()):
  tags=hir.split('>')
  atags=""
  for i in range(len(tags)):
    tag=tags[i]
    otags=atags
    atags=(atags+"_"+tag).strip("_")
    if atags not in brickTags:
      indivLocName=nsTag+IndivName(atags)
      if i>0:
        fo.write("\n "+indivLocName+"  rdfs:subClassOf   "+brickTags[otags]+";")
      else:
        fo.write("\n "+indivLocName+"  rdfs:subClassOf   bf:Tag;")
      fo.write('\n\t\t\t rdf:type   owl:Class ;')
      fo.write('\n\t\t\t bf:isHierarchical  "";')
      fo.write('\n\t\t\t skos:definition ""@en ;\n')
      fo.write('\n\t\t\t rdfs:label "'+tag+'"@en .\n')
      brickTags[atags]=indivLocName;
      parent=tag;

Add tag leaves

In [12]:
# create location individuals
for idx in dfTags.index:
  #parent=brickTags[str(dfTags.loc[idx, "Dimension"]).split('>')[-1]]
  parent=brickTags[str(dfTags.loc[idx, "Dimension"]).replace('>','_')]
  indivLocName=nsTag + IndivName(str(dfTags.loc[idx, "Tag"]))
  fo.write("\n "+indivLocName+"  rdfs:subClassOf   "+parent+";")
  fo.write('\n\t\t\t rdf:type   owl:Class ;')
  fo.write('\n\t\t\t skos:definition "'+getStr(dfTags.loc[idx, "Definition"])+'"@en ;\n')
  fo.write('\n\t\t\t rdfs:label "'+str(dfTags.loc[idx, "Tag"])+'"@en .\n')
  brickTags[tag]=indivLocName;
  #print(parent,indivLocName)

In [13]:
fo.close()

In [14]:
# format
g = rdflib.Graph()
result = g.parse('BrickTag.ttl', format='n3')
g.serialize(destination='BrickTag.ttl', format='turtle')

### Write TagSets

write headers

In [15]:
nsTag= "tag:" # "ts:"
nsTagSet= ":" # "ts:"
fo = open('Brick.ttl', 'w')
fo.write("""@prefix owl: <http://www.w3.org/2002/07/owl#> .
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix skos: <http://www.w3.org/2004/02/skos/core#> .\n""")
fo.write("@prefix bf:  <http://buildsys.org/ontologies/BrickFrame#> .\n")
fo.write("@prefix tag: <http://buildsys.org/ontologies/BrickTag#> .\n")
fo.write("@prefix :    <http://buildsys.org/ontologies/Brick#> .\n\n")
fo.write("<http://buildsys.org/ontologies/Brick>  a owl:Ontology ;\n")
fo.write("\towl:imports <http://buildsys.org/ontologies/BrickFrame> ;\n")
fo.write("\towl:imports <http://buildsys.org/ontologies/BrickTag> ;\n")
fo.write('\trdfs:comment "Domain TagSet Definition"@en .\n\n')

47

Write TagSet Hierachy

In [16]:
brickTagSets={}
for hir in pd.unique(dfTagSets.Dimension.dropna().ravel()):
  tags=hir.split('>')
  atags=""
  for i in range(len(tags)):
    tag=tags[i]
    otags=atags
    #Joern's definition of atags (i.e. class names)
    #atags=(atags+"_"+tag).strip('_')
    #Bharath's definition of class names (atags)
    atags=tag.strip('_')
    if atags not in brickTagSets:
      indivLocName=nsTagSet + IndivName(atags)
      if i>0:
        fo.write("\n "+indivLocName+"  rdfs:subClassOf   "+brickTagSets[otags]+";")
      else:
        fo.write("\n "+indivLocName+"  rdfs:subClassOf   bf:TagSet;")
      fo.write('\n\t\t\t rdf:type   owl:Class ;')
      #fo.write('\n\t\t\t bf:isHierarchical  "";')
      fo.write('\n\t\t\t skos:definition ""@en ;\n')
      fo.write('\n\t\t\t rdfs:label "'+tag+'"@en .\n')
      brickTagSets[atags]=indivLocName;

Add TagSets leaves

In [17]:
# create location individuals
for idx in dfTagSets.index:
  tagsets=set([str(dfTagSets.loc[idx, "TagSet"])]) | set(str(dfTagSets.loc[idx, "hasSynonym"]).split(","))
  ots=None
  for tagset in tagsets:
    if tagset!="nan":
        #tagset=str(dfTagSets.loc[idx, "TagSet"])
      #Bharath's definition of parent
      parent= brickTagSets[str(dfTagSets.loc[idx, "Dimension"]).split('>')[-1]]
      #Joern's definition of parent
      #parent= brickTagSets[str(dfTagSets.loc[idx, "Dimension"]).replace('>',"_")]
      indivLocName=nsTagSet + IndivName(tagset)
      fo.write("\n "+indivLocName+"  rdfs:subClassOf   "+parent+";")
      fo.write('\n\t\t\t rdf:type   owl:Class ;')
      for tag in tagset.split():
         #fo.write('\n\t\t\t rdfs:subClassOf [ a owl:Restriction ; owl:onProperty :hasTag ; owl:someValuesFrom tag:'+tag+' ];')
        fo.write('\n\t\t\t bf:usesTag tag:'+tag+';')
      if ots:
        fo.write('\n\t\t\t owl:equivalentClass '+ots+' ;')
      fo.write('\n\t\t\t skos:definition "'+getStr(dfTagSets.loc[idx, "Definition"])+'"@en ;\n')
      fo.write('\n\t\t\t rdfs:label "'+str(dfTagSets.loc[idx, "TagSet"])+'"@en .\n')
      brickTagSets[tagset]=indivLocName;
      ots=indivLocName;
      #print(parent,indivLocName)

In [18]:
fo.close()

### New Structure

Classify Tags by Type

In [19]:
#Classify tags by dimension from "Tags" sheet in Schema Engineering
dfMeasTags  = dfTags.loc[dfTags.Dimension == "MeasurementProperty>PhysicalProperties"]
dfMeasPropTags = dfTags.loc[dfTags.Dimension.str.startswith("MeasurementProperty")]
dfLocTags   = dfTags.loc[dfTags.Dimension == "Location"]
dfPointTags = dfTags.loc[dfTags.Dimension == "Point"]
dfEquipTags = dfTags.loc[dfTags.Dimension.str.startswith("Equipment")]

#Identify dimensions based on tags
def getPointDim(istr):
  for pt in dfPointTags.Tag:
    if pt in str(istr): return pt
  return "UndefinedPoint"

def getMeasDim(istr):
  for pt in dfMeasTags.Tag:
    if pt in str(istr): return pt
  return "UndefinedMeasurement"

def getLocDim(istr):
  for pt in dfLocTags.Tag:
    if pt in str(istr): return pt
  return "UndefinedLocation"

def getEquipDim(istr):
  for pt in dfEquipTags.Tag:
    if pt in str(istr): return pt
  return "UndefinedEquipment"

def filterMeas(istr):
  wstr=' '+str(istr)+' '
  for pt in dfPointTags.Tag:
    if pt in wstr: wstr=wstr.replace(' '+pt+' ', ' ')
  for pt in dfLocTags.Tag:
    if pt in wstr: wstr=wstr.replace(' '+pt+' ', ' ')
  for pt in dfEquipTags.Tag:
    if pt in wstr: wstr=wstr.replace(' '+pt+' ', ' ')
  return wstr.strip()

def getLastDim(istr):
  if ">"   not in istr:
    return "UndefinedEquipment"
  else:
    sstr=istr.split(">")
    return sstr[-1] # IndivName(sstr[-1])

In [20]:
removeSynonyms=False
synonyms={}
for r in dfTagSets.index:
  if str(dfTagSets.hasSynonym[r])!="nan":
    syn=dfTagSets.hasSynonym[r].split(";")
    for s in syn:
      synonyms[s]=dfTagSets.TagSet[r]

In [21]:
dfTagSets["Measurement2"]=dfTagSets.TagSet
dfMM=dfTagSets.Measurement2.ravel()
for r in range(len(dfMM)):
  dfM=" "+str(dfMM[r])+" "
  if str(dfTagSets.usesLocation[r])!="nan":
    for loc in str(dfTagSets.usesLocation[r]).split(";"):
      dfM=dfM.replace(" "+loc.strip()+" ", " ")
  if str(dfTagSets.usesEquipment[r])!="nan":
    for eq in str(dfTagSets.usesEquipment[r]).split(";"):
      dfM=dfM.replace(" "+eq.strip()+" ", " ")
      if removeSynonyms and eq.strip() in synonyms:
        for s in synonyms[eq.strip()].split(' '):
          dfM=dfM.replace(" "+s+" ", " ")
  if str(dfTagSets.usesPoint[r])!="nan":
    dfM=dfM.replace(" "+str(dfTagSets.usesPoint[r])+" ", " ")
  dfMM[r]=dfM.strip()
dfTagSets["Measurement2"]=dfMM

In [22]:
#Classify each tagset as a dimension based on their tags
dfTagSets['Point']=dfTagSets.TagSet.apply(getPointDim)
dfTagSets['MeasurementProperty']=dfTagSets.TagSet.apply(getMeasDim)
dfTagSets['Location']=dfTagSets.TagSet.apply(getLocDim)
dfTagSets['Parent']=dfTagSets.Dimension.apply(getLastDim)
dfTagSets['Measurement']=dfTagSets.TagSet.apply(filterMeas)

In [23]:
# Classify TagSets
dfTagSetsEquipment=dfTagSets.loc[dfTagSets.Dimension.str.startswith("Equipment")]
dfTagSetsLocation=dfTagSets.loc[dfTagSets.Dimension.str.startswith("Location")]
dfTagSetsEqLoc=dfTagSets.loc[dfTagSets.Dimension.str.startswith("Equipment") | dfTagSets.Dimension.str.startswith("Location")]
dfTagSetsPoints=dfTagSets.loc[dfTagSets.Dimension.str.startswith("Point")]
dfTagSetsMeas=dfTagSets.loc[dfTagSets.Dimension.str.startswith("MeasurementProperty")]

In [24]:
dfTagSetsPoints.loc[:,('Measurement','Measurement2')]

Unnamed: 0,Measurement,Measurement2
164,Flow Loss,Air Flow Loss
165,Change,Change Filter
166,CO2 High,CO2 High
167,Communication Loss,Communication Loss
168,Overload,Compressor Overload
169,Leak,Condensate Leak
170,Leak,Condensate Leak
171,Deionised,Deionised Water
172,Smoke Detected,Discharge Air Smoke Detected
173,Temperature High,Discharge Air Temperature High Limit


In [25]:
#pd.unique(dfPointTagSets['Measurement2'].ravel())

In [26]:
# Missing tags
for dim in ["Location", "Equipment", "Point", "MeasurementProperty"]:
  usedTags=set([t for i in dfTagSets.loc[dfTagSets.Dimension.str.startswith(dim)].TagSet.str.split(' ') for t in i])
  defTags =set(dfTags.loc[dfTags.Dimension.str.startswith(dim)].Tag)
  #print("Missing Tags for "+dim+"\n"+str(usedTags-defTags)+"\n")

In [27]:
defMeasTagSets=set(dfTagSetsMeas.TagSet)
usedMeasTagSets=set(dfTagSetsPoints.Measurement)
#print("Used TagSets for Measurement\n"+str(defMeasTagSets-(defMeasTagSets-usedMeasTagSets))+"\n")
#print("Unused TagSets for Measurement\n"+str(defMeasTagSets-usedMeasTagSets)+"\n")
#print("Missing TagSets for Measurement\n"+str(usedMeasTagSets-defMeasTagSets)+"\n")
#usedMeasTagSets=set(dfTagSetsPoints.Measurement2)
#print("Used TagSets for Measurement\n"+str(defMeasTagSets-(defMeasTagSets-usedMeasTagSets))+"\n")
#print("Unused TagSets for Measurement2\n"+str(defMeasTagSets-usedMeasTagSets)+"\n")
#print("Missing TagSets for Measurement2\n"+str(usedMeasTagSets-defMeasTagSets)+"\n")

In [28]:
createEquipmentTagSets=True
setEquivalent=False
usedMeasOnly=True
writeUsedBy=True

In [29]:
tagsetsPoints={}
for r in dfTagSetsPoints.index:
  if createEquipmentTagSets and dfTagSetsPoints.usesEquipment[r]!='' and str(dfTagSetsPoints.usesEquipment[r])!="nan":
    equip=[s.strip() for s in dfTagSetsPoints.usesEquipment[r].split(';')]
    equip.append("")
  else: equip=[""]
  tagsets=set([dfTagSetsPoints.TagSet[r]])  | set(str(dfTagSetsPoints.hasSynonym[r]).split(",")) - set(['', 'nan'])
  for tagset in tagsets:
   for eq in equip:
    if eq!="" and eq not in tagset:
      ts=eq.replace(" ", "_") + " " + tagset
    else:
      ts=tagset
    if ts not in tagsetsPoints:
      tagsetsPoints[ts]={
        'TagSet':ts,
        'Tags':set(ts.split(' ')),
        'usesPoint':dfTagSetsPoints.usesPoint[r],
        'usesLocation':dfTagSetsPoints.usesLocation[r],
        'usesEquipment':dfTagSetsPoints.usesEquipment[r],
        'usesMeasurement':dfTagSetsPoints.usesMeasurement[r],
        'description':dfTagSetsPoints.Definition[r],
        'dimension':dfTagSetsPoints.Dimension[r],
        'synonyms':tagsets-set([ts]),
        'parent':dfTagSetsPoints.Parent[r],
        'parents':set([dfTagSetsPoints.Parent[r].replace('_',' ')]),
        'allparents':set([dfTagSetsPoints.Parent[r].replace('_',' ')]),
        'measurement':dfTagSetsPoints.Measurement[r],
        'measurement2':dfTagSetsPoints.Measurement2[r]
      }
      if createEquipmentTagSets: 
         tagsetsPoints[ts]['usesEquipment']=eq
      if eq!="" and len(tagsets-set([ts]))>0:  
        tagsetsPoints[ts]['synonyms']= [(eq.replace(" ", "_") + " " + ts2) for ts2 in (tagsets-set([ts])) if eq not in ts2]
      dfM=" "+tagset+" "
      if str(dfTagSets.usesLocation[r])!="nan":
        for loc in str(dfTagSets.usesLocation[r]).split(";"):
          dfM=dfM.replace(" "+loc.strip()+" ", " ")
      if str(dfTagSets.usesEquipment[r])!="nan":
        for eq in str(dfTagSets.usesEquipment[r]).split(";"):
          dfM=dfM.replace(" "+eq.strip()+" ", " ")
          dfM=dfM.replace(" "+eq.replace(" ", "_").strip()+" ", " ")
          if removeSynonyms and eq.strip() in synonyms:
            for s in synonyms[eq.strip()].split(' '):
              dfM=dfM.replace(" "+s+" ", " ")
      if str(dfTagSets.usesPoint[r])!="nan":
        dfM=dfM.replace(" "+str(dfTagSets.usesPoint[r])+" ", " ")
      tagsetsPoints[ts]['measurement2']=dfM.strip()
if '' in tagsetsPoints: del tagsetsPoints['']

In [30]:
# determine parent concepts
for tsA in tagsetsPoints:
  for tsB in tagsetsPoints:
    if tagsetsPoints[tsB]['Tags'] < tagsetsPoints[tsA]['Tags']:
      tagsetsPoints[tsA]['allparents'].add(tsB)
      tagsetsPoints[tsA]['parents'].add(tsB)

In [31]:
# minimize parent concepts
for tsA in tagsetsPoints:
  rmOldParent=set()
  for tsB in tagsetsPoints[tsA]['parents']:
    for tsC in tagsetsPoints[tsA]['parents']:
       if set(tsB.split(' ')) > set(tsC.split(' ')): # if direct parent
        rmOldParent.add(tsC)
  for tsC in rmOldParent:
    tagsetsPoints[tsA]['parents'].remove(tsC)
    #print("Remove "+tsC)    

In [32]:
tagsetsMeas={}
if not usedMeasOnly:
 for r in dfTagSetsMeas.index:
  tagsetsMeas[dfTagSetsMeas.TagSet[r]]={
    'TagSet':dfTagSetsMeas.TagSet[r],
    'Tags':set(dfTagSetsMeas.TagSet[r].split(' ')),
    'dimension':dfTagSetsMeas.Dimension[r],
    'measdim':getMeasDim(dfTagSetsMeas.TagSet[r]),
    'description':dfTagSetsMeas.Definition[r],
    'parent':dfTagSetsMeas.Parent[r],
    'parents':set([getMeasDim(dfTagSetsMeas.TagSet[r])]),
    'allparents':set([getMeasDim(dfTagSetsMeas.TagSet[r])])
  }
# add missing
meas=[tagsetsPoints[ts]['measurement2'] for ts in tagsetsPoints]
for ts in pd.unique(meas):
  tagsetsMeas[ts]={
    'TagSet':ts,
    'Tags':set(ts.split(' ')),
    'dimension':'MeasurementProperty',
    'measdim':getMeasDim(ts),
    'description':'',
    'parent':'',
    'parents':set([getMeasDim(ts)]),
    'allparents':set([getMeasDim(ts)])
  }
for ts in set(dfMeasTags.Tag)|set(['UndefinedMeasurement']):
  tagsetsMeas[ts]={
    'TagSet':ts,
    'Tags':set([ts]),
    'dimension':'MeasurementProperty',
    'measdim':'MeasurementProperty',
    'description':'',
    'parent':'',
    'parents':set(['MeasurementProperty']),
    'allparents':set(['MeasurementProperty'])
  }
    
if '' in tagsetsMeas: del tagsetsMeas['']

In [33]:
# determine parent concepts
for tsA in tagsetsMeas:
  for tsB in tagsetsMeas:
    if tagsetsMeas[tsB]['Tags'] < tagsetsMeas[tsA]['Tags']:
      tagsetsMeas[tsA]['allparents'].add(tsB)
      tagsetsMeas[tsA]['parents'].add(tsB)

In [34]:
# minimize parent concepts
for tsA in tagsetsMeas:
  rmOldParent=set()
  for tsB in tagsetsMeas[tsA]['parents']:
    for tsC in tagsetsMeas[tsA]['parents']:
      #if tagsetsMeas[tsB]['Tags'] > tagsetsMeas[tsC]['Tags']: # if direct parent
      if set(tsB.split(' ')) > set(tsC.split(' ')): # if direct parent
        rmOldParent.add(tsC)
  for tsC in rmOldParent:
    tagsetsMeas[tsA]['parents'].remove(tsC)
    #print("Remove "+tsC)  

In [35]:
nsTag= "tag:" # "ts:"
nsTagSet= ":" # "ts:"
fo = open('Brick.ttl', 'w')
fo.write("""@prefix owl: <http://www.w3.org/2002/07/owl#> .
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix skos: <http://www.w3.org/2004/02/skos/core#> .\n""")
fo.write("@prefix bf:  <http://buildsys.org/ontologies/BrickFrame#> .\n")
fo.write("@prefix tag: <http://buildsys.org/ontologies/BrickTag#> .\n")
fo.write("@prefix :    <http://buildsys.org/ontologies/Brick#> .\n\n")
fo.write("<http://buildsys.org/ontologies/Brick>  a owl:Ontology ;\n")
fo.write("\towl:imports <http://buildsys.org/ontologies/BrickFrame> ;\n")
fo.write("\towl:imports <http://buildsys.org/ontologies/BrickTag> ;\n")
fo.write('\trdfs:comment "Domain TagSet Definition"@en .\n\n')

47

In [36]:
brickTagSets={}
for hir in pd.unique(dfTagSets.Dimension.dropna().ravel()):
  tags=hir.split('>')
  atags=""
  for i in range(len(tags)):
    tag=tags[i]
    otags=atags
    #Joern's definition of atags (i.e. class names)
    #atags=(atags+"_"+tag).strip('_')
    #Bharath's definition of class names (atags)
    atags=tag.strip('_')
    if atags not in brickTagSets:
      indivLocName=nsTagSet + IndivName(atags)
      if i>0:
        fo.write("\n "+indivLocName+"  rdfs:subClassOf   "+brickTagSets[otags]+";")
      else:
        fo.write("\n "+indivLocName+"  rdfs:subClassOf   bf:TagSet;")
      fo.write('\n\t\t\t rdf:type   owl:Class ;')
      #fo.write('\n\t\t\t bf:isHierarchical  "";')
      fo.write('\n\t\t\t rdfs:label "'+tag+'"@en .\n')
      brickTagSets[atags]=indivLocName;

Add TagSets leaves

In [37]:
# create location individuals
for idx in dfTagSetsEqLoc.index:
  tagsets=set([str(dfTagSetsEqLoc.loc[idx, "TagSet"])]) | set(str(dfTagSetsEqLoc.loc[idx, "hasSynonym"]).split(","))
  ots=None
  for tagset in tagsets:
    if tagset!="nan":
      parent= brickTagSets[str(dfTagSetsEqLoc.loc[idx, "Dimension"]).split('>')[-1]]
      indivLocName=nsTagSet + IndivName(tagset)
      fo.write("\n "+indivLocName+"  rdfs:subClassOf   "+parent+";")
      fo.write('\n\t\t\t rdf:type   owl:Class ;')
      if ots:
        fo.write('\n\t\t\t owl:equivalentClass '+ots+';')
      fo.write('\n\t\t\t rdfs:label "'+str(dfTagSetsEqLoc.loc[idx, "TagSet"])+'"@en.\n')
      for tag in tagset.split():
        fo.write('\n '+indivLocName+'  bf:usesTag tag:'+tag+'.')
        if writeUsedBy: fo.write('\n tag:'+tag+'  bf:usedBy '+indivLocName+'.')
      brickTagSets[tagset]=indivLocName;
      ots=indivLocName;

In [38]:
# write measurement tagsets
for tsA in tagsetsMeas:
  ts=tagsetsMeas[tsA]
  indivLocName=nsTagSet + IndivName(ts['TagSet'])
  supClass=""#"bf:TagSet"# + nsTagSet + IndivName(ts['measdim'])
  for par in ts['parents']:
    supClass = supClass + ", " + nsTagSet + IndivName(par)
  fo.write("\n "+indivLocName+"  rdfs:subClassOf   "+supClass.strip(',').strip()+";")
  fo.write('\n\t\t\t rdf:type   owl:Class ;')
  if ts['description']!='' and str(ts['description'])!="nan":
    #fo.write('\n\t\t\t rdfs:description "'+ts['description']+'"@en;')
    fo.write('\n\t\t\t skos:definition "'+ts['description']+'"@en ;\n')
  fo.write('\n\t\t\t rdfs:label "'+ts['TagSet']+'"@en .\n')
  for tag in ts['Tags']:
    fo.write('\n '+indivLocName+'  bf:usesTag tag:'+tag+'.')
    if writeUsedBy: fo.write('\n tag:'+tag+'  bf:usedBy '+indivLocName+'.')

In [39]:
#Write point tagsets
for tsA in tagsetsPoints:
  ts=tagsetsPoints[tsA]
  if not ts['parents']:
    print(ts)
    continue;
  tagsets=set([ts['TagSet']])#  | set(str(ts["synonyms"]).split(",")) - set(['', 'nan'])
  ots=None
  for tagset in tagsets:
    tagset=tagset.strip()
    indivLocName=nsTagSet + IndivName(tagset)
    brickTagSets[tagset]=indivLocName;
    supClass= "" #"bf:TagSet, "
    for par in ts['parents']:
      supClass = supClass + ", " + nsTagSet + IndivName(par)
    fo.write("\n "+indivLocName+"  rdfs:subClassOf   "+supClass.strip(',').strip()+";")
    fo.write('\n\t\t\t rdf:type   owl:Class ;')
    if ts['synonyms']!='' and str(ts['synonyms'])!="nan":
      for syn in ts['synonyms']:
        if setEquivalent:
          fo.write('\n\t\t\t owl:equivalentClass '+nsTagSet + IndivName(syn)+';')
        else:
          fo.write('\n\t\t\t bf:equivalentTagSet '+nsTagSet + IndivName(syn)+';')
    ots=indivLocName;
    if ts['description']!='' and str(ts['description'])!="nan":
      #fo.write('\n\t\t\t rdfs:description "'+ts['description'].replace('"',"'")+'"@en;')
      fo.write('\n\t\t\t skos:definition "'+ts['description'].replace('"',"'")+'"@en;')
    fo.write('\n\t\t\t rdfs:label "'+ts['TagSet']+'"@en .\n')
    for tag in ts['Tags']:
      fo.write('\n '+indivLocName+'  bf:usesTag tag:'+tag+'.')
      if writeUsedBy: fo.write('\n tag:'+tag+'  bf:usedBy '+indivLocName+'.')
    if ts['usesLocation']!='' and str(ts['usesLocation'])!="nan":
      for loc in ts['usesLocation'].split(';'):
        fo.write('\n '+indivLocName+' bf:usesLocation :'+IndivName(loc.strip())+'.')
        if writeUsedBy: fo.write('\n :'+IndivName(loc.strip())+'  bf:usedBy '+indivLocName+'.')
    if ts['usesEquipment']!='' and str(ts['usesEquipment'])!="nan":
      for eq in ts['usesEquipment'].split(';'):
        fo.write('\n '+indivLocName+' bf:usesEquipment :'+IndivName(eq.strip())+'.')
        if writeUsedBy: fo.write('\n :'+IndivName(eq.strip())+'  bf:usedBy '+indivLocName+'.')
    if ts['usesPoint']!='' and str(ts['usesPoint'])!="nan":
      fo.write('\n '+indivLocName+' bf:usesPoint :'+IndivName(ts['usesPoint'])+'.')
      if writeUsedBy: fo.write('\n :'+IndivName(ts['usesPoint'])+'  bf:usedBy '+indivLocName+'.')
    if ts['measurement2']!='' and str(ts['measurement2'])!="nan":
      fo.write('\n '+indivLocName+' bf:usesMeasurement :'+IndivName(ts['measurement2'])+'.')
      if writeUsedBy: fo.write('\n :'+IndivName(ts['measurement2'])+'  bf:usedBy '+indivLocName+'.')
    ots=indivLocName;

In [40]:
fo.write('\n')
fo.close()

In [41]:
# format
g = rdflib.Graph()
result = g.parse('Brick.ttl', format='n3')

In [42]:
g.serialize(destination='Brick.ttl', format='turtle')

In [43]:
qres = g.query("""SELECT DISTINCT ?ts WHERE {  ?ts rdfs:subClassOf+ bf:TagSet . }""")
brickTagSets=set()
brickTagSetTags={}
for row in qres:
  ts=ns(row['ts'])
  brickTagSets.add(ts)
  brickTagSetTags[ts]=set(ts.split('_'))
len(brickTagSets)

2056

In [44]:
ex=pd.ExcelWriter('src/Schema EngineeringMod.xlsx')
dfTagSetsEquipment.to_excel(ex,"Equipment",index=False)
dfTagSetsLocation.to_excel(ex,"Location",index=False)
dfTagSetsPoints.to_excel(ex,"Point",index=False)
ex.save()