Named graphs programs

In [15]:
pip install rdflib

Note: you may need to restart the kernel to use updated packages.


In [16]:
from rdflib import URIRef, BNode, Literal, Namespace, Graph
from rdflib.namespace import FOAF, DCTERMS, XSD, RDF, RDFS, SDO
import pprint

In [17]:
#create a graph for the knowledge graph, for other knowledge graphs paste the file location and name within the parse function
g = Graph()
g.parse("D:\\Downloads\\AnnaTest.ttl")

#for stmt in g:
#    pprint.pprint(stmt)

<Graph identifier=N2fd5bc6b48c140e081804bbbed5885c8 (<class 'rdflib.graph.Graph'>)>

In [18]:
# returns all proxies for a provided person
def linkedProxies(ProvidedPerson, g):
    query = """
    PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
    PREFIX ns2: <http://data.biographynet.nl/rdf/>
    PREFIX idm: <http://www.intavia.eu/idm-core/>
    select distinct ?s
    where { 
        ?s idm:person_proxy_for <%s> .
    }
    """ % ProvidedPerson
    
    proxies = g.query(query)
    return proxies

In [19]:
# returns biodes source for a proxy
def findSource(Proxy, g):
    query = """
    PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
    PREFIX ns2: <http://data.biographynet.nl/rdf/>
    PREFIX idm: <http://www.intavia.eu/idm-core/>
    PREFIX ore: <http://www.openarchives.org/ore/terms/>
    select ?biodes
    where { 
        <%s> ore:proxyIn ?biodes .
    }
    """ % Proxy
    
    Biodes = g.query(query)
    for stmt in  Biodes:
        return(stmt.biodes)

In [20]:
#returns all triples connected to proxy to depth of 2
def allTriples(Proxy, g): 
    # create a graph for a description
    # add all triples with subject that matches the description
    new_graph = Graph()
    new_graph += g.triples((Proxy, None, None))
    for stmt in g.triples((Proxy, None, None)):
        subject = stmt[2]
        new_graph += g.triples((subject, None, None))
    return new_graph

In [21]:
#for triples with proxy as subject, we can now use the actual person
def replaceProxyByPerson(triple, Person, Proxy):
    s, p, o = triple
    if s == Proxy:
        s = Person
    return s, p, o

In [40]:
def create_filename(proxy):
    firstsplit = str(proxy).split('/')
    str_proxy_number = str(firstsplit[4])
    file_name = 'D:\\' + str_proxy_number + '_named_graph.trig'
    return file_name

In [46]:
# for one provided person, write all rdf statements into a named graph
def PersonToNG(ProvidedPerson, g):
    for row in linkedProxies(ProvidedPerson, g):
        Proxy = row['s']
        Source = findSource(Proxy, g)
        named_graph = Graph()
        #addTripleToMainGraph(g ex:saidBy Source)
        all_triples = allTriples(Proxy, g)
        for triple in all_triples:
            s, p, o = replaceProxyByPerson(triple, ProvidedPerson, Proxy)
            named_graph.add((s, p, o))
        file_name = create_filename(Proxy)
        named_graph.serialize(destination=file_name, format = 'trig')

In [47]:
#make a query to find all the persons in the knowledge graph
def find_persons(g):
    query = """
    PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
    PREFIX ns2: <http://data.biographynet.nl/rdf/>
    PREFIX idm: <http://www.intavia.eu/idm-core/>
    select distinct ?person
    where { 
        ?person rdf:type idm:Provided_Person .
    }
    """

    persons = g.query(query)
    return persons

In [48]:
Persons = find_persons(g)
for row in Persons:
    Person = row['person']
    PersonToNG(Person, g)