# Notebook to transform SKOS Vocabulary to Datahub Business Glossary yaml format.
Requires: rdflib, yaml

This example is self contained, it loads a turtle file from the project. You can just as easy perform the query against a sparql endpoint.

In [21]:
import rdflib
import yaml

In [37]:
g = rdflib.Graph()
g.parse("./ontologies/areaaldata_begrippen.ttl")

query = """
SELECT ?concept ?def ?prefLabel ?match  (group_concat(?narrower; SEPARATOR="|") as ?contains) (group_concat(?broader; SEPARATOR="|") as ?inherits)
WHERE {
    ?concept a skos:Concept .
    ?concept skos:definition ?def .
    ?concept skos:prefLabel ?prefLabel .
    optional { ?concept skos:exactMatch ?match .}
    optional { ?concept skos:broader ?broader .}
    optional { ?narrower skos:broader ?concept .}
}group by ?concept ?prefLabel ?def ?match 
order by ?concept
"""

result = g.query(query)
    

In [38]:
for row in result:
    print(row[0],'contains: '+row[4],'inherits: '+row[5])

http://otl.noord-holland.nl/id/areaaldata/concept/adres contains:  inherits: 
http://otl.noord-holland.nl/id/areaaldata/concept/bak contains:  inherits: 
http://otl.noord-holland.nl/id/areaaldata/concept/begroeidTerreindeel contains: http://otl.noord-holland.nl/id/areaaldata/concept/begroeidTerreindeelBerm|http://otl.noord-holland.nl/id/areaaldata/concept/begroeidTerreindeelPlantvak inherits: 
http://otl.noord-holland.nl/id/areaaldata/concept/begroeidTerreindeelBerm contains:  inherits: http://otl.noord-holland.nl/id/areaaldata/concept/begroeidTerreindeel
http://otl.noord-holland.nl/id/areaaldata/concept/begroeidTerreindeelBermKr contains:  inherits: 
http://otl.noord-holland.nl/id/areaaldata/concept/begroeidTerreindeelKruin contains:  inherits: 
http://otl.noord-holland.nl/id/areaaldata/concept/begroeidTerreindeelPlKr contains:  inherits: 
http://otl.noord-holland.nl/id/areaaldata/concept/begroeidTerreindeelPlantvak contains:  inherits: http://otl.noord-holland.nl/id/areaaldata/concep

In [39]:
termlist = []

for row in result:
    t = {}
    containsList = []
    inheritsList = []
    t['name'] = str(row[2])
    t['id'] = str('ad42_' + row[0].split('/')[-1:][0])
    t['description'] = str(row[1])
    t['source_url'] = str(row[0])
    if not row[3] is None:
        t['custom_properties'] = {'skos_exact_match': str(row[3]), 'source_url': str(row[0]) }
    else:
        t['custom_properties'] = {'source_url': str(row[0]) }
    if not row[4] is None:
        for i in row[4].split('|'):
            if '/' in i:
                inheritsList.append('ad42_' + i.split('/')[-1:][0])
        if len(inheritsList)>0:    
            t['inherits'] = inheritsList
    if not row[5] is None:
        for j in row[5].split('|'):
            if '/' in j:
                containsList.append('ad42_' + j.split('/')[-1:][0])
        if len(containsList)>0:
            t['contains'] = containsList
    termlist.append(t)

In [40]:
base =  {'version': 1, 
        'source': 'Datahub',
        'owners' : { 'users': ['id/di'] },
        'url': 'https://provincienh.github.io/OTL/otl-doc/',
        'nodes': [{'name': 'Areaaldata begrippenkader', 
                'id': 'ad42_Areaaldata_4_2_begrippenkader',
                'description': 'SKOS begrippenkader behorende bij het Areaaldata model van Provincie Noord-Holland', 
                'terms': termlist}]}
#print(yaml.dump(base,sort_keys=False))
ff = open('ad_glossary.yaml', 'w+')
yaml.dump(base, ff,sort_keys=False, allow_unicode=True)