# Notebook to transform SKOS Vocabulary to Datahub Business Glossary yaml format.
Requires: rdflib, yaml

This example is self contained, it loads a turtle file from the project. You can just as easy perform the query against a sparql endpoint.

In [1]:
import rdflib
import yaml

In [2]:
g = rdflib.Graph()
g.parse("./ontologies/areaaldata_begrippen.ttl")

query = """
SELECT ?concept ?def ?prefLabel ?match ?broader ?narrower
WHERE {
    ?concept a skos:Concept .
    ?concept skos:definition ?def .
    ?concept skos:prefLabel ?prefLabel .
    optional { ?concept skos:exactMatch ?match .}
    optional { ?concept skos:broader ?broader .}
    optional { ?narrower skos:broader ?concept .}
}"""

result = g.query(query)

termlist = []

for row in result:
    t = {}
    t['name'] = str(row[2])
    t['description'] = str(row[1])
    t['source_url'] = str(row[0])
    if not row[3] is None:
        t['custom_properties'] = {'skos_exact_match': str(row[3]), 'source_url': str(row[0]) }
    else:
        t['custom_properties'] = {'source_url': str(row[0]) }
    if not row[4] is None:
        t['inherits'] = [ 'Areaaldata begrippenkader.' + row[4].split('/')[-1:][0] ]
    if not row[5] is None:
        t['contains'] = [ 'Areaaldata begrippenkader.' + row[5].split('/')[-1:][0] ]
    termlist.append(t)
    

In [3]:
base =  {'version': 1, 
        'source': 'Datahub',
        'owners' : { 'users': ['id/di'] },
        'url': 'https://provincienh.github.io/OTL/otl-doc/',
        'nodes': [{'name': 'Areaaldata begrippenkader', 
                'description': 'SKOS begrippenkader behorende bij het Areaaldata model van Provincie Noord-Holland', 
                'terms': termlist}]}
#print(yaml.dump(base,sort_keys=False))
ff = open('ad_glossary.yaml', 'w+')
yaml.dump(base, ff,sort_keys=False, allow_unicode=True)