In [22]:
from rdflib import Graph, URIRef, Literal, Namespace
from rdflib.namespace import RDF, RDFS, SKOS, XSD, OWL, PROV
from wikidataintegrator import wdi_core
import uuid
import urllib.parse

In [75]:
rdf_item = Graph()
ontolex = Namespace("http://www.w3.org/ns/lemon/ontolex#")
dct = Namespace("http://purl.org/dc/terms/")
rdfs = Namespace("http://www.w3.org/2000/01/rdf-schema#")
wikibase = Namespace("http://wikiba.se/ontology#")
rdf_item.namespace_manager.bind("wikibase", wikibase)
schema = Namespace("http://schema.org/")
rdf_item.namespace_manager.bind("schema", schema)
cc = Namespace("http://creativecommons.org/ns#")
geo = Namespace("http://www.opengis.net/ont/geosparql#")
prov = Namespace("http://www.w3.org/ns/prov#")
rdf_item.namespace_manager.bind("prov", prov)
wd = Namespace("http://www.wikidata.org/entity/")
rdf_item.namespace_manager.bind("wd", wd)
data = Namespace("https://www.wikidata.org/wiki/Special:EntityData/")
s = Namespace("http://www.wikidata.org/entity/statement/")
rdf_item.namespace_manager.bind("s", s)
ref = Namespace("http://www.wikidata.org/reference/")
rdf_item.namespace_manager.bind("ref", ref)
v = Namespace("http://www.wikidata.org/value/")
wdt = Namespace("http://www.wikidata.org/prop/direct/")
rdf_item.namespace_manager.bind("wdt", wdt)
wdtn = Namespace("http://www.wikidata.org/prop/direct-normalized/")
p = Namespace("http://www.wikidata.org/prop/")
rdf_item.namespace_manager.bind("p", p)
ps = Namespace("http://www.wikidata.org/prop/statement/")
rdf_item.namespace_manager.bind("ps", ps)
psv = Namespace("http://www.wikidata.org/prop/statement/value/")
psn = Namespace("http://www.wikidata.org/prop/statement/value-normalized/")
pq = Namespace("http://www.wikidata.org/prop/qualifier/")
rdf_item.namespace_manager.bind("pq", pq)
pqv = Namespace("http://www.wikidata.org/prop/qualifier/value/")
pqn = Namespace("http://www.wikidata.org/prop/qualifier/value-normalized/")
pr = Namespace("http://www.wikidata.org/prop/reference/")
rdf_item.namespace_manager.bind("pr", pr)
prv = Namespace("http://www.wikidata.org/prop/reference/value/")
prn = Namespace("http://www.wikidata.org/prop/reference/value-normalized/")
wdno = Namespace("http://www.wikidata.org/prop/novalue/")
                    


In [76]:
# qid = "Q35869"
qid = "Q38"
item = wdi_core.WDItemEngine(wd_item_id=qid)
json_item = item.get_wd_json_representation()

In [77]:
rdf_item.add((wd[qid], RDF.type, wikibase.Item)) 

In [78]:
property_type = {'commonsMedia': 'http://wikiba.se/ontology#CommonsMedia' ,
                'external-id': 'http://wikiba.se/ontology#ExternalId' ,
                'geo-shape': 'http://wikiba.se/ontology#GeoShape',
                'globe-coordinate': 'http://wikiba.se/ontology#GlobeCoordinate',
                'math': 'http://wikiba.se/ontology#Math',
                'monolingualtext': 'http://wikiba.se/ontology#Monolingualtext',
                'quantity': 'http://wikiba.se/ontology#Quantity',
                'string': 'http://wikiba.se/ontology#String',
                'tabular-data': 'http://wikiba.se/ontology#TabularData',
                'time': 'http://wikiba.se/ontology#Time',
                'edtf': '<http://wikiba.se/ontology#Edtf>',
                'url': 'http://wikiba.se/ontology#Url',
                'wikibase-item': 'http://wikiba.se/ontology#WikibaseItem',
                'wikibase-property': 'http://wikiba.se/ontology#WikibaseProperty'}

In [79]:
def makeWdt(claim, preferredSet, value):
    if preferredSet:
                    if claim["rank"] == "preferred":
                        rdf_item.add((wd[qid], wdt[pid], value))
    else:
                    if claim["rank"] == "normal":
                        rdf_item.add((wd[qid], wdt[pid], value))

In [28]:
for pid in json_item['claims'].keys():
    ## Properties and their derivatives
    rdf_item.add((wd[pid], RDF.type, wikibase.Property))
    rdf_item.add((wd[pid], wikibase.directClaim, wdt[pid]))
    rdf_item.add((wd[pid], wikibase.claim, p[pid]))
    rdf_item.add((wd[pid], wikibase.statementProperty, ps[pid]))
    rdf_item.add((wd[pid], wikibase.statementValue, psv[pid]))
    rdf_item.add((wd[pid], wikibase.qualifier, pq[pid]))
    rdf_item.add((wd[pid], wikibase.qualifierValue, pqv[pid]))
    rdf_item.add((wd[pid], wikibase.reference, pr[pid]))
    rdf_item.add((wd[pid], wikibase.referenceValue, prv[pid]))
    rdf_item.add((wd[pid], wikibase.novalue, wdno[pid]))
    rdf_item.add((wd[pid], wikibase.propertyType, URIRef(property_type[json_item['claims'][pid][0]["mainsnak"]["datatype"]])))
    
    ## Ststements 
    for claim in json_item['claims'][pid]:  
        statement_uri = s[claim["id"]]
        #rank
        if claim["rank"] == "normal": 
            rdf_item.add((statement_uri, wikibase.rank, wikibase.NormalRank))
        if claim["rank"] == "preferred":
            rdf_item.add((statement_uri, wikibase.rank, wikibase.PreferredRank))
        if claim["rank"] == "deprecated":
            rdf_item.add((statement_uri, wikibase.rank, wikibase.DeprecatedRank))
        
        # values
        preferredSet = False
        for claim2 in json_item['claims'][pid]:
            if claim2["rank"] == "preferred":
                preferredSet = True
                break
        
        ## first no value
        if claim["mainsnak"]["snaktype"] == "novalue":
            rdf_item.add((statement_uri, RDF.type, wdno[pid]))
        else: 
            ## commonsMedia
            if claim["mainsnak"]["datavalue"]["type"] == "commonsMedia":
                object  = URIRef("http://commons.wikimedia.org/wiki/Special:FilePath/"+claim["mainsnak"]["datavalue"]["value"].replace(" ", "_"))
                print(object)
                rdf_item.add((statement_uri, ps[pid], object))
                makeWdt(claim, preferredSet, object)
    
            ## string
            if claim["mainsnak"]["datavalue"]["type"] == "string":
                object = Literal(claim["mainsnak"]["datavalue"]["value"])
                rdf_item.add((statement_uri, ps[pid], object))
                makeWdt(claim, preferredSet, object)

            ## wikibase-item
            if claim["mainsnak"]["datatype"] == "wikibase-item":
                object = wd[claim["mainsnak"]["datavalue"]["value"]["id"]]
                rdf_item.add((statement_uri, ps[pid], object ))
                makeWdt(claim, preferredSet, object)
                
            ## monolingual-text
            if claim["mainsnak"]["datatype"] == "monolingualtext":
                object = Literal(claim["mainsnak"]["datavalue"]["value"]["text"], lang=claim["mainsnak"]["datavalue"]["value"]["language"])
                rdf_item.add((statement_uri, ps[pid], object))
                makeWdt(claim, preferredSet, object)
                
            ## 'geo-shape'
            if claim["mainsnak"]["datatype"] == "geo-shape":
                object = URIRef("http://commons.wikimedia.org/data/main/"+claim["mainsnak"]["datavalue"]["value"])
                rdf_item.add((statement_uri, ps[pid], object))
                makeWdt(claim, preferredSet, object)
            
            ## 'globe-coordinate'
            if claim["mainsnak"]["datatype"] == "globe-coordinate":
                latitude = claim["mainsnak"]["datavalue"]["value"]["latitude"]
                longitude = claim["mainsnak"]["datavalue"]["value"]["longitude"]
                # altitude = claim["mainsnak"]["datavalue"]["value"]["altitude"] # not used
                precision = claim["mainsnak"]["datavalue"]["value"]["precision"] # not used
                globe = claim["mainsnak"]["datavalue"]["value"]["globe"]   # not used
                object = Literal("Point("+str(longitude)+","+str(latitude)+")", datatype=geo.wktLiteral)
                rdf_item.add((statement_uri, ps[pid], object))
                makeWdt(claim, preferredSet, object)
                ## TODO Normalized values with units
                
            #math
            ## No statements exist that use Math datatype
            
            # quantity
            if claim["mainsnak"]["datatype"] == "quantity":
                amount = claim["mainsnak"]["datavalue"]["value"]["amount"]
                unit =  claim["mainsnak"]["datavalue"]["value"]["unit"] 
                object = Literal(claim["mainsnak"]["datavalue"]["value"]["amount"], datatype=XSD.decimal)
                rdf_item.add((statement_uri, ps[pid], object))
                makeWdt(claim, preferredSet, object)
                ## TODO Normalized values with units
                
            # tabular data
            ## Not used in Wikidata
            
            # time
            if claim["mainsnak"]["datatype"] == "time":
                object = Literal(claim["mainsnak"]["datavalue"]["value"]["time"], datatype=XSD.dateTime)
                rdf_item.add((statement_uri, ps[pid], object))
                makeWdt(claim, preferredSet, object)
                ## TODO normalize
            
            # url
            if claim["mainsnak"]["datatype"] == "url":
                object = URIRef(claim["mainsnak"]["datavalue"]["value"])
                rdf_item.add((statement_uri, ps[pid], object))
                makeWdt(claim, preferredSet, object)

        
        ## wdt (truthy ststements are set when claims are either Preferred rank or when no preferred rank exist
        ##      have normal rank. Statements with a deprecated rank are not reified in the truthy subgraph.)
        
        rdf_item.add((wd[qid], p[pid], statement_uri))
        rdf_item.add((statement_uri,RDF.type, wikibase.Statement))
        
        if preferredSet:
            if claim["rank"] == "preferred":
                  rdf_item.add((statement_uri,RDF.type, wikibase.BestRank))
        else:
            if claim["rank"] == "normal":
                  rdf_item.add((statement_uri,RDF.type, wikibase.BestRank))
                    
        
        #qualifiers
        for qualifier in claim["qualifiers"].keys():
            for qualifier_prop in claim["qualifiers"][qualifier]:
                if qualifier_prop["datatype"] == "commonsMedia":
                    object  = URIRef("http://commons.wikimedia.org/wiki/Special:FilePath/"+qualifier_prop["datavalue"]["value"].replace(" ", "_"))
                if qualifier_prop["datatype"] == "string":
                    object = Literal(qualifier_prop["datavalue"]["value"])
                if qualifier_prop["datatype"] == "wikibase-item":
                    object = wd[qualifier_prop["datavalue"]["value"]["id"]]  
                if qualifier_prop["datatype"] == "monolingualtext":
                    object = Literal(qualifier_prop["datavalue"]["value"]["text"], lang=qualifier_prop["datavalue"]["value"]["language"])
                if qualifier_prop["datatype"] == 'geo-shape':
                    object = URIRef("http://commons.wikimedia.org/data/main/"+qualifier_prop["datavalue"]["value"])
                if qualifier_prop["datatype"]== "globe-coordinate":
                    latitude = qualifier_prop["datavalue"]["value"]["latitude"]
                    longitude = qualifier_prop["datavalue"]["value"]["longitude"]
                    # altitude = claim["mainsnak"]["datavalue"]["value"]["altitude"] # not used
                    precision = qualifier_prop["datavalue"]["value"]["precision"] # not used
                    globe = qualifier_prop["datavalue"]["value"]["globe"]   # not used
                    object = Literal("Point("+str(longitude)+","+str(latitude)+")", datatype=geo.wktLiteral)
                if qualifier_prop["datatype"] == "quantity":
                    amount = qualifier_prop["datavalue"]["value"]["amount"]
                    unit =  qualifier_prop["datavalue"]["value"]["unit"] 
                    object = Literal(qualifier_prop["datavalue"]["value"]["amount"], datatype=XSD.decimal)    
                if qualifier_prop["datatype"] == "url":
                    object = URIRef(qualifier_prop["datavalue"]["value"])
                if qualifier_prop["datatype"] == "time":
                    object = Literal(qualifier_prop["datavalue"]["value"]["time"], datatype=XSD.DateTime)
                
                rdf_item.add((statement_uri, pq[qualifier], object)) 

        #references
        for reference in claim["references"]:
            reference_uri = ref[reference["hash"]]
            rdf_item.add((reference_uri, RDF.type, wikibase.Reference))
            rdf_item.add((statement_uri, PROV.wasDerivedFrom, reference_uri))

            for ref_prop in reference["snaks"].keys():
                for ref_prop_statement in reference["snaks"][ref_prop]:
                    if ref_prop_statement["datatype"] == "commonsMedia":
                        object  = URIRef("http://commons.wikimedia.org/wiki/Special:FilePath/"+ref_prop_statement["datavalue"]["value"].replace(" ", "_"))
                    if ref_prop_statement["datatype"] == "string":
                        object = Literal(ref_prop_statement["datavalue"]["value"])
                    if ref_prop_statement["datatype"] == "wikibase-item":
                        object = wd[ref_prop_statement["datavalue"]["value"]["id"]]  
                    if ref_prop_statement["datatype"] == "monolingualtext":
                        object = Literal(ref_prop_statement["datavalue"]["value"]["text"], lang=ref_prop_statement["datavalue"]["value"]["language"])
                    if ref_prop_statement["datatype"] == 'geo-shape':
                        object = URIRef("http://commons.wikimedia.org/data/main/"+ref_prop_statement["datavalue"]["value"])
                    if ref_prop_statement["datatype"]== "globe-coordinate":
                        latitude = ref_prop_statement["datavalue"]["value"]["latitude"]
                        longitude = ref_prop_statement["datavalue"]["value"]["longitude"]
                        # altitude = claim["mainsnak"]["datavalue"]["value"]["altitude"] # not used
                        precision = ref_prop_statement["datavalue"]["value"]["precision"] # not used
                        globe = ref_prop_statement["datavalue"]["value"]["globe"]   # not used
                        object = Literal("Point("+str(longitude)+","+str(latitude)+")", datatype=geo.wktLiteral)
                    if ref_prop_statement["datatype"] == "quantity":
                        amount = ref_prop_statement["datavalue"]["value"]["amount"]
                        unit =  ref_prop_statement["datavalue"]["value"]["unit"] 
                        object = Literal(ref_prop_statement["datavalue"]["value"]["amount"], datatype=XSD.decimal)    
                    if ref_prop_statement["datatype"] == "url":
                        object = URIRef(ref_prop_statement["datavalue"]["value"])
                    if ref_prop_statement["datatype"] == "time":
                        object = Literal(ref_prop_statement["datavalue"]["value"]["time"], datatype=XSD.DateTime)

                    rdf_item.add((statement_uri, pq[qualifier], object)) 

    

In [84]:
# sitelinks
for sitelink in json_item['sitelinks'].keys():
    wiki = URIRef(json_item['sitelinks'][sitelink]["url"])
    print(json_item['sitelinks'][sitelink]["url"])
    partof = URIRef(json_item['sitelinks'][sitelink]["url"].split("wiki")[0])
    language = sitelink.replace("wiki", "")
    rdf_item.add((wiki, RDF.type, schema.Article))
    rdf_item.add((wiki, schema.about, wd[qid]))
    rdf_item.add((wiki, schema.inLanguage, wd[qid]))
    rdf_item.add((wiki, schema.inPartOf, partof))

NameError: name 'partof' is not defined

In [33]:
# Heading
for language in json_item["labels"].keys():
    rdf_item.add((wd[qid], RDFS.label, Literal(json_item["labels"][language]["value"], language)))
    
for language in json_item["descriptions"].keys():
    rdf_item.add((wd[qid], schema.description, Literal(json_item["descriptions"][language]["value"], language)))
    
for language in json_item["aliases"].keys():
    for label in json_item["aliases"][language]:
        rdf_item.add((wd[qid], SKOS.altLabel, Literal(label, language))) 
                          
                         

In [83]:
print(rdf_item.serialize(format="turtle").decode("utf-8"))

@prefix wd: <http://www.wikidata.org/entity/> .
@prefix wikibase: <http://wikiba.se/ontology#> .

wd:Q38 a wikibase:Item .




In [55]:
compareRDF = Graph()
compareRDF.parse("http://www.wikidata.org/entity/"+qid+".ttl")

<Graph identifier=N4fc1fe0c63214ad7a45ee9685b34b52b (<class 'rdflib.graph.Graph'>)>

In [65]:
diffRdf = compareRDF-rdf_item
diffRdf.namespace_manager.bind("wikibase", wikibase)

diffRdf.namespace_manager.bind("schema", schema)
diffRdf.namespace_manager.bind("prov", prov)
diffRdf.namespace_manager.bind("wd", wd)
diffRdf.namespace_manager.bind("s", s)
diffRdf.namespace_manager.bind("ref", ref)
diffRdf.namespace_manager.bind("wdt", wdt)
diffRdf.namespace_manager.bind("p", p)
rdf_item.namespace_manager.bind("ps", ps)
diffRdf.namespace_manager.bind("pq", pq)
diffRdf.namespace_manager.bind("pr", pr)

In [66]:
diffRdf.serialize(format="turtle", destination="/tmp/1.ttl")
rdf_item.serialize(format="turtle", destination="/tmp/2.ttl")