In [92]:
#!/usr/bin/python3

from xml.dom import minidom
from CLARIAH_CMDI.xml2dict.processor import CMDI # load, xmldom2dict
import json
from SchemaLOD import Schema
from rdflib import Graph, URIRef, Literal, BNode, plugin, Namespace
from rdflib.serializer import Serializer
from config import cmdifile, ROOT, DATAVERSE_ID, API_TOKEN
from Semaf import Semaf
from jGraph import jGraph
import sys
import requests
from datetime import datetime

sm = Semaf()
cmdifile = '/data/CLARIAH_CMDI/cmdi/easy-dataset:35803_easy-file:4297789_IPNV_208_publicinfo.cmdi'
cwfile = "test-cmdi-crosswalks.csv"
s = sm.loadcmdi(cmdifile)

In [93]:
class GraphBuilder():
    def __init__(self, thisobject=None, RootRef=None, crosswolksfile=None, thisformat='json', debug=False):
        self.stats = {}
        self.json = {}
        self.context = json.loads(thisobject)
        self.RootRef = RootRef
        self.dictcontent = []
        self.mappings = {}
        self.locator = {}
        self.namespaces = {}
        self.EnrichFlag = False
        self.crosswalks = {}
        # Default Graph 
        self.g = Graph()
        self.level = 0
    
    def SetRef(self, value):
        # Set references with loaded semantic mappings
        if value in self.mappings:
            RefURL = self.mappings[value]
        else:
            RefURL = "%s%s" % (self.RootRef, value)
        self.crosswalks[RefURL] = value
        #self.mappings[value] = RefURL
        return RefURL
    
    def setNamespaces(self):
        # Define namespaces
        ns1 = Namespace("%s" % self.RootRef)
        self.g.bind('cmdi', ns1)
        ns2 = Namespace("%s/#" % self.RootRef)
        self.g.bind('cmdidoc', ns2)
        ns3 = Namespace("%s/Keyword#" % self.RootRef)
        self.g.bind('keywords', ns3)
        ns4 = Namespace("https://dataverse.org/schema/citation")
        self.g.bind('citation', ns4)
        ns5 = Namespace("https://dataverse.org/schema/")
        self.g.bind('schema', ns5)
        ns6 = Namespace("http://purl.org/dc/terms/")
        self.g.bind('dcterms', ns6)

        for nsname in self.namespaces:
            ns = Namespace(nsname)
            self.g.bind(self.namespaces[nsname], "%s/" % ns)    

    def load_crosswalks(self, crossfile):
        with open(crossfile, encoding='utf-8') as fh:
            content = fh.readlines()
            for line in content:
                mapline = line.split(',')
                self.mappings[mapline[0]] = mapline[1]
        return self.mappings
    
    def rotatelist(self, thislist, previous_element, xpathroot, DEBUG=None):
        # previous_element = parent key
        # k = key
        # v = value
        #self.level = self.level + 1
        self.level = 0 
        for keyID in range(0, len(thislist)):
            key = thislist[keyID]
            if type(key) is dict:
                complexstatements = {}
                staID = BNode()
                staIDlocal = BNode()
                for k, v in key.items():
                    #root="%s/%s" % (self.RootRef, previous_element)
                    root = self.SetRef(previous_element)
                    # vty xpathroot = "%s/%s" % (xpathroot, k)
                    #kRef = "%s/%s" % (self.RootRef, k)
                    self.dictcontent.append({"list": root, "xpath": xpathroot, self.SetRef(k): v, 'type': type(v), 'sort': keyID })
                    if type(v) is str:
                        complexstatements[URIRef(self.SetRef(k))] = v
                        self.g.add((staIDlocal, URIRef(self.SetRef(k)), Literal(v)))
                    elif type(v) is list:
                        complexarray = []
                        for item in v:
                            self.level = self.level + 1
                            complexarray.append({ self.SetRef(k): item, URIRef("%s#Vocabulary" % self.SetRef(k)) : "url" })
                            
                            # Create and add a new statement in the graph
                            staIDar = BNode()
                            self.g.add((staIDar, URIRef(self.SetRef(k)), Literal(item)))
                            self.g.add((staIDar, self.skosxl['hiddenLabel'], Literal("%s/%s" % (xpathroot, k))))
                            # vty self.g.add((staIDar, self.skos['broader'], Literal(previous_element)))
                            self.g.add((staIDar, self.skos['broader'], URIRef(self.SetRef(previous_element))))
                            self.g.add((staIDar, self.skos['prefLabel'], Literal(k)))
                            self.g.add((staIDar, self.skos['note'], Literal(self.level)))
                            
                            if self.EnrichFlag:
                                self.g.add((staIDar, URIRef("%s#Vocabulary" % self.SetRef(k)), Literal('vocabulary name')))
                                self.g.add((staIDar, URIRef("%s#VocabularyURL" % self.SetRef(k)), Literal("http link to concept URI for %s" % item)))
                            # Add statements from array
                            self.g.add((staIDlocal, URIRef(self.SetRef(k)), staIDar))
                        complexstatements[URIRef(self.SetRef(k))] = complexarray
                    if DEBUG:
                        print(complexstatements)
                self.g.add((URIRef(root), URIRef(self.SetRef(k)), staIDlocal))
        return
    
    def rotate(self, thisdict, previous_element, DEBUG=None):
        self.cmdiloc = {}

        skos = Namespace('http://www.w3.org/2004/02/skos/core#')
        self.g.bind('skos', skos)        
        skosxl = Namespace('http://www.w3.org/2008/05/skos-xl#')
        self.skos = Namespace('http://www.w3.org/2008/05/skos-xl#')
        self.skosxl = Namespace('http://www.w3.org/2008/05/skos-xl#')
        self.g.bind('skosxl', skosxl)        
        
        if (isinstance(thisdict,list)):
            #root="%s/%s" % (self.RootRef, previous_element)
            root = self.SetRef(previous_element)
            #kRef = "%s/%s" % (self.RootRef, k)
            self.dictcontent.append({"list": root, self.SetRef(k): v })
            #print("%s" % root)
            self.g.add((URIRef(root), URIRef(self.SetRef(k)), Literal(v)))
            self.g.add((URIRef(root), skos['prefLabel'], Literal(root)))
            #self.g.add(((URIRef(root), skos['altLabel'], Literal(k)))            
            return

        for k,v in thisdict.items():
            if (isinstance(v,dict)):
                if previous_element:
                    fullXpath = "%s/%s" % (previous_element, k)
                else:
                    fullXpath = k
                self.namespaces[self.SetRef(previous_element)] = k.lower()
                # vty if DEBUG:
                #print("XPath %s [%s/%s]" % (fullXpath, previous_element, k))
                self.rotate(v, fullXpath)
                ###self.rotate(v, k)
                #root="%s%s" % (self.RootRef, previous_element)
                root = self.SetRef(previous_element)
                #kRef = "%s/%s" % (self.RootRef, k)
                staID = BNode()
                staID = URIRef(self.RootRef)
                self.g.add((staID, URIRef(root), URIRef(self.SetRef(k))))
                #self.g.add((staID, skos['broader'], URIRef(nodename)))
                self.g.add((staID, skos['hiddenLabel'], Literal(fullXpath)))
                self.g.add((staID, skos['altLabel'], Literal(k)))
                self.locator[root] = staID
                continue
            else:
                if (isinstance(v,list)):
                    if DEBUG:
                        print(k)
                    xpathroot = "%s/%s" % (previous_element, k)
                    self.rotatelist(v, k, xpathroot)
                    continue
                #root="%s%s" % (self.RootRef, previous_element)
                root = self.SetRef(previous_element)
                xpathroot = "%s/%s" % (previous_element, k)
                if DEBUG:
                    print(self.cmdiloc)
                #kRef = "%s/%s" % (self.RootRef, k)

                if self.SetRef(k) in self.cmdiloc:
                    try:
                        cache = self.cmdiloc['root']
                    except: 
                        cache = []

                    if type(cache) is list:
                        cache.append( { self.SetRef(k): v })
                    else:
                        cache = { self.SetRef(k): v }
                else:
                    self.cmdiloc = { self.SetRef(k): v }
                self.dictcontent.append({"parent": root, "xpath": xpathroot, self.SetRef(k): v, 'type': type(v) })

                # Add statement
                staID = BNode()
                self.locator[URIRef(self.SetRef(k))] = staID
                ### outdated self.g.add((URIRef(root), URIRef(self.SetRef(k)), Literal(v)))  
                #self.g.add((staID, skos['note1'], Literal(k)))
                self.g.add((URIRef(root), skosxl['hiddenLabel'], Literal(previous_element)))
                self.g.add((URIRef(root), skos['note'], Literal('parent statement')))
                                
                #self.g.add((URIRef(root), skosxl['Label'], Literal(k)))
                self.g.add((staID, skos['literalForm'], Literal(k)))
                self.g.add((staID, skosxl['hiddenLabel'], Literal("%s" % (xpathroot))))
                self.g.add((staID, URIRef(self.SetRef(k)), Literal(v)))
                self.g.add((staID, skos['note'], Literal('compound statement')))
                self.g.add((staID, skos['broader'], URIRef(root)))
                
                #self.g.add((URIRef(root), skosxl['LabelRelation'], staID))
                self.g.add((URIRef(self.SetRef(k)), skosxl['LabelRelation'], staID))
                self.g.add((URIRef(self.SetRef(k)), skosxl['hiddenLabel'], Literal(xpathroot)))  
                self.g.add((URIRef(self.SetRef(k)), skosxl['literalForm'], Literal(k)))  
                self.g.add((URIRef(self.SetRef(k)), skos['note'], Literal('compound statements')))               

        self.setNamespaces()
        return self.dictcontent


In [94]:
cmdigraph = GraphBuilder(sm.json, "https://dataverse.org/schema/cbs/")
#cmdigraph.load_crosswalks(cwfile)
items = cmdigraph.rotate(cmdigraph.context, False)
cmdigraph.context

{'#document': {'CMD': {'Header': {'MdSelfLink': None},
   'Resources': {'ResourceProxyList': {},
    'JournalFileProxyList': None,
    'ResourceRelationList': None},
   'Components': {'OralHistoryInterviewDANS': {'ID': 'IPNV_208',
     'InterviewGeneral': {'NumberOfSpeakers': '2',
      'CreationDate': '2007-11-07',
      'PublicationDate': '2013-03-01',
      'Duration': '02:10:00',
      'Owner': 'Veterans Institute, Doorn, The Netherlands',
      'Genre': 'interview',
      'Modality': {'Modality': 'Spoken'},
      'Multilinguality': {'Multilinguality': 'Monolingual'},
      'Access': {'Availability': 'All data including audio is accessible for authorised researchers.',
       'DistributionMedium': 'Distribution medium will be decided in consultation with Access Contact',
       'CatalogueLink': 'urn:nbn:nl:ui:13-gep-1cc',
       'Contact': {'Address': 'P.O. Box 93067, 2509 AB Den Haag, The Netherlands',
        'Email': 'info@dans.knaw.nl',
        'Organisation': 'Data Archiving a

In [95]:
outputfile = './tests/cmdi_test'
cmdigraph.g.serialize(format='n3', destination="%s.nt" % outputfile)

<Graph identifier=Nbc5bdee79409465a890d5dd3c8ade239 (<class 'rdflib.graph.Graph'>)>

In [96]:
s = Schema(cmdigraph.g)
field = 'https://dataverse.org/schema/citation/keyword#Term'
#triples = s.Relations(pred, NESTED=True, relation='#hiddenLabel')
#triples

In [97]:
pred = 'https://dataverse.org/schema/citation/keyword#Term'
pred = '#document/CMD/Components/OralHistoryInterviewDANS/Interviewer/Actor/Age'
pred = '#document/CMD/Components/OralHistoryInterviewDANS/InterviewGeneral/Access/Contact'
x = cmdigraph.g
#x = cmdigraph.g.triples((Literal(pred), None, None))
#x = cmdigraph.g.triples((None, cmdigraph.skosxl['hiddenLabel'], None)) #Literal('Actor')))
for s, p, o in x:
    print("\n")
    print("%s %s %s" % (s, p, o))
    y = cmdigraph.g.triples((s, None, None))
    if y:
        for s1, p1, o1 in y:
            print("\t%s %s %s" % (s1, p1, o1))
            t = 1



N402ad6f40e354e559abf9c5a6ce402bd http://www.w3.org/2008/05/skos-xl#hiddenLabel #document/CMD/Components/OralHistoryInterviewDANS/InterviewAudio/AudioFormat
	N402ad6f40e354e559abf9c5a6ce402bd http://www.w3.org/2004/02/skos/core#literalForm AudioFormat
	N402ad6f40e354e559abf9c5a6ce402bd http://www.w3.org/2008/05/skos-xl#hiddenLabel #document/CMD/Components/OralHistoryInterviewDANS/InterviewAudio/AudioFormat
	N402ad6f40e354e559abf9c5a6ce402bd https://dataverse.org/schema/cbs/AudioFormat WAV
	N402ad6f40e354e559abf9c5a6ce402bd http://www.w3.org/2004/02/skos/core#note compound statement
	N402ad6f40e354e559abf9c5a6ce402bd http://www.w3.org/2004/02/skos/core#broader https://dataverse.org/schema/cbs/#document/CMD/Components/OralHistoryInterviewDANS/InterviewAudio


Ndf2631dc6de64965b84b5ee06040cf6c https://dataverse.org/schema/cbs/Keyword N00cd4379a2ce4334b82019eec7101d9e
	Ndf2631dc6de64965b84b5ee06040cf6c https://dataverse.org/schema/cbs/TimeInterval 00:00:00-00:10:00
	Ndf2631dc6de64965b84b