# Creates Brick

The notebook generates a brick ontology from the 'Tags.csv' and 'TagSets.csv' files.

In [None]:
#!pip install rdflib
#!pip install pydot2
#!pip install pydotplus

In [None]:
import rdflib
from rdflib.namespace import RDFS
from rdflib import URIRef, BNode, Literal
import re
from collections import defaultdict
import numpy as np
import pandas as pd
import json

In [None]:
def ns(url):
  url = url.replace("http://buildsys.org/ontologies/brick#", "")
  url = url.replace("http://buildsys.org/ontologies/brickFrame#", "")
  return url

In [None]:
def IndivName(name):
  return re.sub(r'\s','_',re.sub(r'[^\d\w\s]', '', name))

In [None]:
def getStr(s):
  #if type(s)==str or type(s)==unicode:
  if type(s)=='str' or type(s)=='unicode':
    return s
  else:
    return ''

### Load Tag and TagSets from Definition

In [None]:
#r = requests.get('docs.google.com/feeds/download/spreadsheets/Export?key=1QTSu0UxJ7UqRvgTW2P1Q4qudoBbvPqXpEhYiulyjcro&exportFormat=xlsx')
#r.status_code

In [None]:
#dfTags=pd.read_excel('Schema Engineering.xlsx',"Tags")
dfTags=pd.read_csv('Tags.csv')
schemaTags=set(pd.unique(dfTags.Tag.dropna().ravel()))
len(schemaTags)
dfTags.head()

In [None]:
dfTagSets=pd.read_csv('TagSets.csv')
schemaTagSets=set()
for ts in pd.unique(dfTagSets.TagSet.dropna().ravel()):
  schemaTagSets.add(ts.replace(' ','_'))
for ts in pd.unique(dfTagSets.hasSynonym.dropna().ravel()):
  for ts2 in ts.split(","):
   schemaTagSets.add(ts2.replace(' ','_'))
for row in pd.unique(dfTagSets.usesEquipment.dropna().ravel()):
  for ts in row.split(';'):
    schemaTagSets.add(ts.replace(' ','_'))
for row in pd.unique(dfTagSets.isPartOf.dropna().ravel()):
  for ts in row.split(';'):
    schemaTagSets.add(ts.replace(' ','_'))
len(schemaTagSets)
dfTagSets.head()

In [None]:
schemaUsedTags=set()
schemaTagSetTags={}
for ts in schemaTagSets:
  schemaUsedTags.update(ts.split('_'))
  schemaTagSetTags[ts]=set(ts.split('_'))
schemaMissingTags=(schemaUsedTags - schemaTags) - set('')
print("Missing Tags:" + str(len(schemaMissingTags)))
print(schemaMissingTags)

### Create Brick

Classify Tags by Type

In [None]:
# init Brick ontology
nsBrickTagSet= ":" # "ts:"
foBrick = open('../dist/Brick.ttl', 'w')
foBrick.write("""@prefix owl: <http://www.w3.org/2002/07/owl#> .
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix skos: <http://www.w3.org/2004/02/skos/core#> .\n""")
foBrick.write("@prefix bf:  <http://buildsys.org/ontologies/BrickFrame#> .\n")
foBrick.write("@prefix :    <http://buildsys.org/ontologies/Brick#> .\n\n")
foBrick.write("<http://buildsys.org/ontologies/Brick>  a owl:Ontology ;\n")
foBrick.write("\towl:imports <http://buildsys.org/ontologies/BrickFrame> ;\n")
foBrick.write('\trdfs:comment "Domain TagSet Definition"@en .\n\n')

In [None]:
# init BrickTag ontology
nsTagTag   = ":"
nsTagTagSet= "brick:" # "ts:"
foTag = open('../dist/BrickTag.ttl', 'w')
foTag.write("""@prefix owl: <http://www.w3.org/2002/07/owl#> .
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix skos: <http://www.w3.org/2004/02/skos/core#> .\n""")
foTag.write("@prefix bf:    <http://buildsys.org/ontologies/BrickFrame#> .\n")
foTag.write("@prefix brick: <http://buildsys.org/ontologies/Brick#> .\n\n")
foTag.write("@prefix :      <http://buildsys.org/ontologies/BrickTag#> .\n\n")
foTag.write("<http://buildsys.org/ontologies/BrickTag>  a owl:Ontology ;\n")
foTag.write("\towl:imports <http://buildsys.org/ontologies/BrickFrame> ;\n")
foTag.write("\towl:imports <http://buildsys.org/ontologies/Brick> ;\n")
foTag.write('\trdfs:comment "Domain Tag Definition"@en .\n\n')

In [None]:
#Classify tags by dimension from "Tags" sheet in Schema Engineering
dfMeasTags  = dfTags.loc[dfTags.Dimension == "MeasurementProperty>PhysicalProperties"]
dfMeasPropTags = dfTags.loc[dfTags.Dimension.str.startswith("MeasurementProperty")]
dfLocTags   = dfTags.loc[dfTags.Dimension == "Location"]
dfPointTags = dfTags.loc[dfTags.Dimension == "Point"]
dfEquipTags = dfTags.loc[dfTags.Dimension.str.startswith("Equipment")]

#Identify dimensions based on tags
def getPointDim(istr):
  for pt in dfPointTags.Tag:
    if pt in str(istr): return pt
  return "UndefinedPoint"

def getMeasDim(istr):
  for pt in dfMeasTags.Tag:
    if pt in str(istr): return pt
  return "UndefinedMeasurement"

def getLocDim(istr):
  for pt in dfLocTags.Tag:
    if pt in str(istr): return pt
  return "UndefinedLocation"

def getEquipDim(istr):
  for pt in dfEquipTags.Tag:
    if pt in str(istr): return pt
  return "UndefinedEquipment"

def filterMeas(istr):
  wstr=' '+str(istr)+' '
  for pt in dfPointTags.Tag:
    if pt in wstr: wstr=wstr.replace(' '+pt+' ', ' ')
  for pt in dfLocTags.Tag:
    if pt in wstr: wstr=wstr.replace(' '+pt+' ', ' ')
  for pt in dfEquipTags.Tag:
    if pt in wstr: wstr=wstr.replace(' '+pt+' ', ' ')
  return wstr.strip()

def getLastDim(istr):
  if ">"   not in istr:
    return "UndefinedEquipment"
  else:
    sstr=istr.split(">")
    return sstr[-1] # IndivName(sstr[-1])

In [None]:
removeSynonyms=False
synonyms={}
for r in dfTagSets.index:
  if str(dfTagSets.hasSynonym[r])!="nan":
    syn=dfTagSets.hasSynonym[r].split(";")
    for s in syn:
      synonyms[s]=dfTagSets.TagSet[r]

In [None]:
dfTagSets["Measurement2"]=dfTagSets.TagSet
dfMM=dfTagSets.Measurement2.ravel()
for r in range(len(dfMM)):
  dfM=" "+str(dfMM[r])+" "
  if str(dfTagSets.usesLocation[r])!="nan":
    for loc in str(dfTagSets.usesLocation[r]).split(";"):
      dfM=dfM.replace(" "+loc.strip()+" ", " ")
  if str(dfTagSets.usesEquipment[r])!="nan":
    for eq in str(dfTagSets.usesEquipment[r]).split(";"):
      dfM=dfM.replace(" "+eq.strip()+" ", " ")
      if removeSynonyms and eq.strip() in synonyms:
        for s in synonyms[eq.strip()].split(' '):
          dfM=dfM.replace(" "+s+" ", " ")
  if str(dfTagSets.usesPoint[r])!="nan":
    dfM=dfM.replace(" "+str(dfTagSets.usesPoint[r])+" ", " ")
  dfMM[r]=dfM.strip()
dfTagSets["Measurement2"]=dfMM

In [None]:
#Classify each tagset as a dimension based on their tags
dfTagSets['Point']=dfTagSets.TagSet.apply(getPointDim)
dfTagSets['MeasurementProperty']=dfTagSets.TagSet.apply(getMeasDim)
dfTagSets['Location']=dfTagSets.TagSet.apply(getLocDim)
dfTagSets['Parent']=dfTagSets.Dimension.apply(getLastDim)
dfTagSets['Measurement']=dfTagSets.TagSet.apply(filterMeas)

In [None]:
# Classify TagSets
dfTagSetsEquipment=dfTagSets.loc[dfTagSets.Dimension.str.startswith("Equipment")]
dfTagSetsLocation=dfTagSets.loc[dfTagSets.Dimension.str.startswith("Location")]
dfTagSetsEqLoc=dfTagSets.loc[dfTagSets.Dimension.str.startswith("Equipment") | dfTagSets.Dimension.str.startswith("Location")]
dfTagSetsPoints=dfTagSets.loc[dfTagSets.Dimension.str.startswith("Point")]
dfTagSetsMeas=dfTagSets.loc[dfTagSets.Dimension.str.startswith("MeasurementProperty")]

In [None]:
#dfTagSetsPoints.loc[:,('Measurement','Measurement2')]

In [None]:
#pd.unique(dfPointTagSets['Measurement2'].ravel())

In [None]:
# Missing tags
for dim in ["Location", "Equipment", "Point", "MeasurementProperty"]:
  usedTags=set([t for i in dfTagSets.loc[dfTagSets.Dimension.str.startswith(dim)].TagSet.str.split(' ') for t in i])
  defTags =set(dfTags.loc[dfTags.Dimension.str.startswith(dim)].Tag)
  #print("Missing Tags for "+dim+"\n"+str(usedTags-defTags)+"\n")

In [None]:
defMeasTagSets=set(dfTagSetsMeas.TagSet)
usedMeasTagSets=set(dfTagSetsPoints.Measurement)
#print("Used TagSets for Measurement\n"+str(defMeasTagSets-(defMeasTagSets-usedMeasTagSets))+"\n")
#print("Unused TagSets for Measurement\n"+str(defMeasTagSets-usedMeasTagSets)+"\n")
#print("Missing TagSets for Measurement\n"+str(usedMeasTagSets-defMeasTagSets)+"\n")
#usedMeasTagSets=set(dfTagSetsPoints.Measurement2)
#print("Used TagSets for Measurement\n"+str(defMeasTagSets-(defMeasTagSets-usedMeasTagSets))+"\n")
#print("Unused TagSets for Measurement2\n"+str(defMeasTagSets-usedMeasTagSets)+"\n")
#print("Missing TagSets for Measurement2\n"+str(usedMeasTagSets-defMeasTagSets)+"\n")

In [None]:
createEquipmentTagSets=True
setEquivalent=False
usedMeasOnly=True
writeUsedBy=False

In [None]:
tagsetsPoints={}
for r in dfTagSetsPoints.index:
  if createEquipmentTagSets and dfTagSetsPoints.usesEquipment[r]!='' and str(dfTagSetsPoints.usesEquipment[r])!="nan":
    equip=[s.strip() for s in dfTagSetsPoints.usesEquipment[r].split(';')]
    equip.append("")
  else: equip=[""]
  tagsets=set([dfTagSetsPoints.TagSet[r]])  | set(str(dfTagSetsPoints.hasSynonym[r]).split(",")) - set(['', 'nan'])
  for tagset in tagsets:
   for eq in equip:
    if eq!="" and eq not in tagset:
      ts=eq.replace(" ", "_") + " " + tagset
    else:
      ts=tagset
    if ts not in tagsetsPoints:
      tagsetsPoints[ts]={
        'TagSet':ts,
        'Tags':set(ts.split(' ')),
        'usesPoint':dfTagSetsPoints.usesPoint[r],
        'usesLocation':dfTagSetsPoints.usesLocation[r],
        'usesEquipment':dfTagSetsPoints.usesEquipment[r],
        'usesMeasurement':dfTagSetsPoints.usesMeasurement[r],
        'description':dfTagSetsPoints.Definition[r],
        'dimension':dfTagSetsPoints.Dimension[r],
        'synonyms':tagsets-set([ts]),
        'parent':dfTagSetsPoints.Parent[r],
        'parents':set([dfTagSetsPoints.Parent[r].replace('_',' ')]),
        'allparents':set([dfTagSetsPoints.Parent[r].replace('_',' ')]),
        'measurement':dfTagSetsPoints.Measurement[r],
        'measurement2':dfTagSetsPoints.Measurement2[r]
      }
      if createEquipmentTagSets: 
         tagsetsPoints[ts]['usesEquipment']=eq
      if eq!="" and len(tagsets-set([ts]))>0:  
        tagsetsPoints[ts]['synonyms']= [(eq.replace(" ", "_") + " " + ts2) for ts2 in (tagsets-set([ts])) if eq not in ts2]
      dfM=" "+tagset+" "
      if str(dfTagSets.usesLocation[r])!="nan":
        for loc in str(dfTagSets.usesLocation[r]).split(";"):
          dfM=dfM.replace(" "+loc.strip()+" ", " ")
      if str(dfTagSets.usesEquipment[r])!="nan":
        for eq in str(dfTagSets.usesEquipment[r]).split(";"):
          dfM=dfM.replace(" "+eq.strip()+" ", " ")
          dfM=dfM.replace(" "+eq.replace(" ", "_").strip()+" ", " ")
          if removeSynonyms and eq.strip() in synonyms:
            for s in synonyms[eq.strip()].split(' '):
              dfM=dfM.replace(" "+s+" ", " ")
      if str(dfTagSets.usesPoint[r])!="nan":
        dfM=dfM.replace(" "+str(dfTagSets.usesPoint[r])+" ", " ")
      tagsetsPoints[ts]['measurement2']=dfM.strip()
if '' in tagsetsPoints: del tagsetsPoints['']

In [None]:
# determine parent concepts
for tsA in tagsetsPoints:
  for tsB in tagsetsPoints:
    if tagsetsPoints[tsB]['Tags'] < tagsetsPoints[tsA]['Tags']:
      tagsetsPoints[tsA]['allparents'].add(tsB)
      tagsetsPoints[tsA]['parents'].add(tsB)

In [None]:
# minimize parent concepts
for tsA in tagsetsPoints:
  rmOldParent=set()
  for tsB in tagsetsPoints[tsA]['parents']:
    for tsC in tagsetsPoints[tsA]['parents']:
       if set(tsB.split(' ')) > set(tsC.split(' ')): # if direct parent
        rmOldParent.add(tsC)
  for tsC in rmOldParent:
    tagsetsPoints[tsA]['parents'].remove(tsC)
    #print("Remove "+tsC)    

In [None]:
tagsetsMeas={}
if not usedMeasOnly:
 for r in dfTagSetsMeas.index:
  tagsetsMeas[dfTagSetsMeas.TagSet[r]]={
    'TagSet':dfTagSetsMeas.TagSet[r],
    'Tags':set(dfTagSetsMeas.TagSet[r].split(' ')),
    'dimension':dfTagSetsMeas.Dimension[r],
    'measdim':getMeasDim(dfTagSetsMeas.TagSet[r]),
    'description':dfTagSetsMeas.Definition[r],
    'reference':dfTagSetsMeas.Reference[r],
    'parent':dfTagSetsMeas.Parent[r],
    'parents':set([getMeasDim(dfTagSetsMeas.TagSet[r])]),
    'allparents':set([getMeasDim(dfTagSetsMeas.TagSet[r])])
  }
# add missing
meas=[tagsetsPoints[ts]['measurement2'] for ts in tagsetsPoints]
for ts in pd.unique(meas):
  tagsetsMeas[ts]={
    'TagSet':ts,
    'Tags':set(ts.split(' ')),
    'dimension':'MeasurementProperty',
    'measdim':getMeasDim(ts),
    'description':'',
    'parent':'',
    'parents':set([getMeasDim(ts)]),
    'allparents':set([getMeasDim(ts)])
  }
for ts in set(dfMeasTags.Tag)|set(['UndefinedMeasurement']):
  tagsetsMeas[ts]={
    'TagSet':ts,
    'Tags':set([ts]),
    'dimension':'MeasurementProperty',
    'measdim':'MeasurementProperty',
    'description':'',
    'parent':'',
    'parents':set(['MeasurementProperty']),
    'allparents':set(['MeasurementProperty'])
  }
    
if '' in tagsetsMeas: del tagsetsMeas['']

In [None]:
# determine parent concepts
for tsA in tagsetsMeas:
  for tsB in tagsetsMeas:
    if tagsetsMeas[tsB]['Tags'] < tagsetsMeas[tsA]['Tags']:
      tagsetsMeas[tsA]['allparents'].add(tsB)
      tagsetsMeas[tsA]['parents'].add(tsB)

In [None]:
# minimize parent concepts
for tsA in tagsetsMeas:
  rmOldParent=set()
  for tsB in tagsetsMeas[tsA]['parents']:
    for tsC in tagsetsMeas[tsA]['parents']:
      #if tagsetsMeas[tsB]['Tags'] > tagsetsMeas[tsC]['Tags']: # if direct parent
      if set(tsB.split(' ')) > set(tsC.split(' ')): # if direct parent
        rmOldParent.add(tsC)
  for tsC in rmOldParent:
    tagsetsMeas[tsA]['parents'].remove(tsC)
    #print("Remove "+tsC)  

In [None]:
brickTagSets={}
for hir in pd.unique(dfTagSets.Dimension.dropna().ravel()):
  tags=hir.split('>')
  atags=""
  for i in range(len(tags)):
    tag=tags[i]
    otags=atags
    #Joern's definition of atags (i.e. class names)
    #atags=(atags+"_"+tag).strip('_')
    #Bharath's definition of class names (atags)
    atags=tag.strip('_')
    if atags not in brickTagSets:
      indivLocName=nsBrickTagSet + IndivName(atags)
      if i>0:
        foBrick.write("\n "+indivLocName+"  rdfs:subClassOf   "+brickTagSets[otags]+";")
      else:
        foBrick.write("\n "+indivLocName+"  rdfs:subClassOf   bf:TagSet;")
      foBrick.write('\n\t\t\t rdf:type   owl:Class ;')
      #foBrick.write('\n\t\t\t bf:isHierarchical  "";')
      foBrick.write('\n\t\t\t rdfs:label "'+tag+'"@en .\n')
      brickTagSets[atags]=indivLocName;

Add TagSets leaves

In [None]:
# create location individuals
for idx in dfTagSetsEqLoc.index:
  tagsets=set([str(dfTagSetsEqLoc.loc[idx, "TagSet"])]) | set(str(dfTagSetsEqLoc.loc[idx, "hasSynonym"]).split(","))
  ots=None
  for tagset in tagsets:
    if tagset!="nan":
      parent= brickTagSets[str(dfTagSetsEqLoc.loc[idx, "Dimension"]).split('>')[-1]]
      indivLocName=nsBrickTagSet + IndivName(tagset)
      foBrick.write("\n "+indivLocName+"  rdfs:subClassOf   "+parent+";")
      foBrick.write('\n\t\t\t rdf:type   owl:Class ;')
      if ots:
        foBrick.write('\n\t\t\t owl:equivalentClass '+ots+';')
      foBrick.write('\n\t\t\t rdfs:label "'+str(dfTagSetsEqLoc.loc[idx, "TagSet"])+'"@en.\n')
      for tag in tagset.split(): # write to BrickTag
        foTag.write('\n '+nsTagTagSet + IndivName(tagset)+'  bf:usesTag :'+tag+'.')
        if writeUsedBy: 
          foTag.write('\n :'+tag+'  bf:usedBy '+nsTagTagSet + IndivName(tagset)+'.')
      brickTagSets[tagset]=indivLocName;
      ots=indivLocName;

In [None]:
# write measurement tagsets
for tsA in tagsetsMeas:
  ts=tagsetsMeas[tsA]
  indivLocName=nsBrickTagSet + IndivName(ts['TagSet'])
  supClass=""#"bf:TagSet"# + nsBrickTagSet + IndivName(ts['measdim'])
  for par in ts['parents']:
    supClass = supClass + ", " + nsBrickTagSet + IndivName(par)
  foBrick.write("\n "+indivLocName+"  rdfs:subClassOf   "+supClass.strip(',').strip()+";")
  foBrick.write('\n\t\t\t rdf:type   owl:Class ;')
  if ts['description']!='' and str(ts['description'])!="nan":
    #foBrick.write('\n\t\t\t rdfs:description "'+ts['description']+'"@en;')
    foBrick.write('\n\t\t\t skos:definition "'+ts['description']+'"@en ;\n')
  foBrick.write('\n\t\t\t rdfs:label "'+ts['TagSet']+'"@en .\n')
  for tag in ts['Tags']: # write to BrickTag
    foTag.write('\n '+nsTagTagSet + IndivName(ts['TagSet'])+'  bf:usesTag :'+tag+'.')
    if writeUsedBy:
      foTag.write('\n :'+tag+'  bf:usedBy '+nsTagTagSet + IndivName(ts['TagSet'])+'.')

In [None]:
#Write point tagsets
for tsA in tagsetsPoints:
  ts=tagsetsPoints[tsA]
  if not ts['parents']:
    print(ts)
    continue;
  tagsets=set([ts['TagSet']])#  | set(str(ts["synonyms"]).split(",")) - set(['', 'nan'])
  ots=None
  for tagset in tagsets:
    tagset=tagset.strip()
    indivLocName=nsBrickTagSet + IndivName(tagset)
    brickTagSets[tagset]=indivLocName;
    supClass= "" #"bf:TagSet, "
    for par in ts['parents']:
      supClass = supClass + ", " + nsBrickTagSet + IndivName(par)
    foBrick.write("\n "+indivLocName+"  rdfs:subClassOf   "+supClass.strip(',').strip()+";")
    foBrick.write('\n\t\t\t rdf:type   owl:Class ;')
    if ts['synonyms']!='' and str(ts['synonyms'])!="nan":
      for syn in ts['synonyms']:
        if setEquivalent:
          foBrick.write('\n\t\t\t owl:equivalentClass '+nsBrickTagSet + IndivName(syn)+';')
        else:
          foBrick.write('\n\t\t\t bf:equivalentTagSet '+nsBrickTagSet + IndivName(syn)+';')
    ots=indivLocName;
    if 'description' in ts and ts['description']!='' and str(ts['description'])!="nan":
      #foBrick.write('\n\t\t\t rdfs:description "'+getStr(ts['description'].replace('"',"'"))+'"@en;')
      foBrick.write('\n\t\t\t skos:definition "'+getStr(ts['description'].replace('"',"'"))+'"@en;')
    if 'reference' in ts and ts['reference']!='' and str(ts['reference'])!="nan":
      foBrick.write('\n\t\t\t rdfs:isDefinedBy "'+getStr(ts['reference'].replace('"',"'")) + '"@en ;\n')
    foBrick.write('\n\t\t\t rdfs:label "'+ts['TagSet']+'"@en .\n')
    for tag in ts['Tags']: # write to BrickTag
      foTag.write('\n '+nsTagTagSet + IndivName(tagset)+'  bf:usesTag :'+tag+'.')
      if writeUsedBy: 
        foTag.write('\n :'+tag+'  bf:usedBy '+nsTagTagSet + IndivName(tagset)+'.')
    if ts['usesLocation']!='' and str(ts['usesLocation'])!="nan":
      for loc in ts['usesLocation'].split(';'):
        foBrick.write('\n '+indivLocName+' bf:usesLocation :'+IndivName(loc.strip())+'.')
        if writeUsedBy: 
          foBrick.write('\n :'+IndivName(loc.strip())+'  bf:usedByPoint '+indivLocName+'.')
    if ts['usesEquipment']!='' and str(ts['usesEquipment'])!="nan":
      for eq in ts['usesEquipment'].split(';'):
        foBrick.write('\n '+indivLocName+' bf:usesEquipment :'+IndivName(eq.strip())+'.')
        if writeUsedBy: 
          foBrick.write('\n :'+IndivName(eq.strip())+'  bf:usedByPoint '+indivLocName+'.')
    if ts['usesPoint']!='' and str(ts['usesPoint'])!="nan":
      foBrick.write('\n '+indivLocName+' bf:usesPoint :'+IndivName(ts['usesPoint'])+'.')
      if writeUsedBy: 
        foBrick.write('\n :'+IndivName(ts['usesPoint'])+'  bf:usedByPoint '+indivLocName+'.')
    if ts['measurement2']!='' and str(ts['measurement2'])!="nan":
      foBrick.write('\n '+indivLocName+' bf:usesMeasurement :'+IndivName(ts['measurement2'])+'.')
      if writeUsedBy: 
        foBrick.write('\n :'+IndivName(ts['measurement2'])+'  bf:usedByPoint '+indivLocName+'.')
    ots=indivLocName;

In [None]:
foBrick.write('\n')
foBrick.close()

In [None]:
# format
g = rdflib.Graph()
result = g.parse('../dist/Brick.ttl', format='n3')

In [None]:
# rewrite for formating
g.serialize(destination='../dist/Brick.ttl', format='turtle')

In [None]:
qres = g.query("""SELECT DISTINCT ?ts WHERE {  ?ts rdfs:subClassOf+ bf:TagSet . }""")
brickTagSets=set()
brickTagSetTags={}
for row in qres:
  ts=ns(row['ts'])
  brickTagSets.add(ts)
  brickTagSetTags[ts]=set(ts.split('_'))
len(brickTagSets)

### Write Tags

Write Tag Hierachy

In [None]:
brickTags={}
for hir in pd.unique(dfTags.Dimension.dropna().ravel()):
  tags=hir.split('>')
  atags=""
  for i in range(len(tags)):
    tag=tags[i]
    otags=atags
    atags=(atags+"_"+tag).strip("_")
    if atags not in brickTags:
      indivLocName=nsTagTag+IndivName(atags)
      if i>0:
        foTag.write("\n "+indivLocName+"  rdfs:subClassOf   "+brickTags[otags]+";")
        #foTag.write("\n "+indivLocName+"  rdfs:subClassOf   bf:Tag;")
      else:
        foTag.write("\n "+indivLocName+"  rdfs:subClassOf   bf:Tag;")
      foTag.write('\n\t\t\t rdf:type   owl:Class ;')
      foTag.write('\n\t\t\t bf:isHierarchical  "";')
      foTag.write('\n\t\t\t skos:definition ""@en ;\n')
      foTag.write('\n\t\t\t rdfs:label "'+tag+'"@en .\n')
      brickTags[atags]=indivLocName;
      parent=tag;

Add tag leaves

In [None]:
# create location individuals
for idx in dfTags.index:
  #parent=brickTags[str(dfTags.loc[idx, "Dimension"]).split('>')[-1]]
  parent=brickTags[str(dfTags.loc[idx, "Dimension"]).replace('>','_')]
  indivLocName=nsTagTag + IndivName(str(dfTags.loc[idx, "Tag"]))
  #foTag.write("\n "+indivLocName+"  rdfs:subClassOf   "+parent+";")
  foTag.write("\n "+indivLocName+"  rdfs:subClassOf   bf:Tag;")
  foTag.write('\n\t\t\t rdf:type   owl:Class ;')
  foTag.write('\n\t\t\t skos:definition "'+getStr(dfTags.loc[idx, "Definition"])+'"@en ;\n')
  foTag.write('\n\t\t\t rdfs:label "'+str(dfTags.loc[idx, "Tag"])+'"@en .\n')
  brickTags[tag]=indivLocName;
  #print(parent,indivLocName)

In [None]:
foTag.close()

In [None]:
# format
g = rdflib.Graph()
result = g.parse('../dist/BrickTag.ttl', format='n3')

In [None]:
# rewrite for formating
g.serialize(destination='../dist/BrickTag.ttl', format='turtle')