# Requirements

1. Input folder
2. HypythesisInstances.ttl
3. PaperInstances.ttl
4. ProvenanceInstances.ttl

In [21]:
import sys

from rdflib import Graph, ConjunctiveGraph, Literal, BNode, Namespace, RDF, URIRef
from rdflib.namespace import DC, FOAF
import pprint

input_directory = "Input/"
hypothesis_instances = "HypothesisInstances.ttl"
paper_instances = "PaperInstances.ttl"
provenance_instances = "ProvenanceInstances.ttl"

hyp_inst_g = Graph()
hyp_inst_g.parse(input_directory+hypothesis_instances, format='turtle')

pap_inst_g = Graph()
pap_inst_g.parse(input_directory+paper_instances, format='turtle')

pro_inst_g = Graph()
pro_inst_g.parse(input_directory+provenance_instances, format='turtle')

merged_graph = hyp_inst_g + pro_inst_g + pap_inst_g


In [22]:
for stmt in merged_graph:
    pprint.pprint(stmt)

(rdflib.term.URIRef('http://example.org/hypothesis_ontology/12c8ef5614d9a35bed3ff95a3b1dc971842514fe#Provenance'),
 rdflib.term.URIRef('http://schema.org/url'),
 rdflib.term.URIRef('https://doi.org/10.1101/299776'))
(rdflib.term.URIRef('http://example.org/hypothesis_ontology/PMC5817272#Hypothesis'),
 rdflib.term.URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'),
 rdflib.term.URIRef('http://example.org/hypothesis_ontology/Hypothesis'))
(rdflib.term.URIRef('http://example.org/hypothesis_ontology/PMC4586545#Provenance'),
 rdflib.term.URIRef('http://purl.org/dc/terms/title'),
 rdflib.term.Literal('Seronegative Celiac Disease and Immunoglobulin Deficiency: Where to Look in the Submerged Iceberg?'))
(rdflib.term.URIRef('http://example.org/hypothesis_ontology/PMC6693190#Hypothesis'),
 rdflib.term.URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'),
 rdflib.term.URIRef('http://example.org/hypothesis_ontology/Hypothesis'))
(rdflib.term.URIRef('http://ns.inria.fr/covid19/e197a8c2fc

In [23]:
f = open("hypothesis_graph.nt", "w")
f.write(merged_graph.serialize(format='nt').decode())
f.close()

In [24]:
# You can also parse directly from a string
g = Graph()
g.parse("hypothesis_graph.nt", format="nt")

bibo_uri = URIRef("http://purl.org/ontology/bibo/")
bibo_namespace = Namespace(bibo_uri)
g.bind("bibo", bibo_namespace)

dct_uri = URIRef("http://purl.org/dc/terms/")
dct_namespace = Namespace(dct_uri)
g.bind("dct", dct_namespace)

fabio_uri = URIRef("http://purl.org/spar/fabio/")
fabio_namespace = Namespace(fabio_uri)
g.bind("fabio", fabio_namespace)

foaf_uri = URIRef("http://xmlns.com/foaf/0.1/")
foaf_namespace = Namespace(foaf_uri)
g.bind("foaf", foaf_namespace)

oa_uri = URIRef("http://www.w3.org/ns/oa#")
oa_namespace = Namespace(oa_uri)
g.bind("oa", oa_namespace)

schema_uri = URIRef("http://schema.org/")
schema_namespace = Namespace(schema_uri)
g.bind("schema", schema_namespace)

hyp_uri = URIRef("http://example.org/hypothesis_ontology/")
hyp_namespace = Namespace(hyp_uri)
g.bind("hyp", hyp_namespace)

print(g.serialize(format='turtle').decode())

@prefix bibo: <http://purl.org/ontology/bibo/> .
@prefix dct: <http://purl.org/dc/terms/> .
@prefix fabio: <http://purl.org/spar/fabio/> .
@prefix foaf: <http://xmlns.com/foaf/0.1/> .
@prefix hyp: <http://example.org/hypothesis_ontology/> .
@prefix oa: <http://www.w3.org/ns/oa#> .
@prefix schema: <http://schema.org/> .

<http://example.org/hypothesis_ontology/00340eea543336d54adda18236424de6a5e91c9d#Hypothesis> a hyp:Hypothesis ;
    hyp:isHypothesisOf <http://ns.inria.fr/covid19/00340eea543336d54adda18236424de6a5e91c9d> ;
    oa:hasSource <http://ns.inria.fr/covid19/00340eea543336d54adda18236424de6a5e91c9d#abstract> .

<http://example.org/hypothesis_ontology/00340eea543336d54adda18236424de6a5e91c9d#Provenance> a hyp:Provenance ;
    hyp:isProvenanceOf <http://ns.inria.fr/covid19/00340eea543336d54adda18236424de6a5e91c9d> ;
    dct:title "Regaining perspective on SARS-CoV-2 molecular tracing and its implications" ;
    bibo:doi "10.1101/2020.03.16.20034470" ;
    schema:url <https://doi

In [26]:
f = open("../Output-Graphs/hypothesis_graph.ttl", "w")
f.write(g.serialize(format='turtle').decode())
f.close()

## Saving RDF graphs

We use the function Graph.serialize(format)

In [8]:
g = Graph()
g.parse("demo.nt", format='n3')
# for a,b,c in g:
#     print(a,b,c)

print(g.serialize(format='nt').decode()) # 'html', 'hturtle', 'mdata', 'microdata', 'n3', 'nquads', 'nt', 'rdfa', 'rdfa1.0', 'rdfa1.1', 'trix', 'turtle', 'xml'

<http://bigasterisk.com/foaf.rdf#drewp> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://xmlns.com/foaf/0.1/Person> .
<http://bigasterisk.com/foaf.rdf#drewp> <http://example.com/says> "Hello world" .




##  Merging graphs

Merging graphs can be done via sequential parsings or by the overloaded operator +

**Note:** Set-theoretic graph semantics apply

In [9]:
graph = Graph()

# Sequential parsings merge *new* triples

graph.parse("demo.nt", format='nt')
graph.parse("demo.xml", format='xml')

print("Graph has {} triples".format(len(graph)))

Graph has 2 triples


In [10]:
g1 = Graph()
g1.parse("demo.nt", format='nt')
print("g1 has {} triples".format(len(g1)))

g2 = Graph()
g2.parse("demo.xml", format='xml')
print("g2 has {} triples".format(len(g2)))

graph = g1 + g2
print("g1 + g2 has {} triples".format(len(graph)))

g1 has 2 triples
g2 has 2 triples
g1 + g2 has 2 triples


In [11]:
# Now, if we merge graphs with different contents

tim_g = Graph()
tim_g.parse("http://www.w3.org/People/Berners-Lee/card")
print("Tim graph has {} triples".format(len(tim_g)))

g3 = g1 + tim_g
print("g3 has {} triples".format(len(g3)))

Tim graph has 86 triples
g3 has 88 triples



## Creating RDF triples

Triples are added to the graph with the function Graph.add()

The parameter is a triple given in a Python **tuple** (subject, predicate, object)

Notice the namespace convenience syntax!

In [12]:
g = Graph()

# Create an identifier to use as the subject for Donna.
donna = BNode()
# donna = URIRef("http://example.org/donna")

# Add triples using store's add method.
g.add( (donna, RDF.type, FOAF.Person) )
g.add( (donna, FOAF.nick, Literal("donna", lang="foo")) )
g.add( (donna, FOAF.name, Literal("Donna Fales")) )
g.add( (donna, FOAF.mbox, URIRef("mailto:donna@example.org")) )

for i in range(100):
    g.add((donna, FOAF.hadAge, Literal(i)))

print(len(g))

for s in g:
    pprint.pprint(s)
    print()


104
(rdflib.term.BNode('Nd18bb9b74e0e40cc865d314fd4514240'),
 rdflib.term.URIRef('http://xmlns.com/foaf/0.1/hadAge'),
 rdflib.term.Literal('79', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#integer')))

(rdflib.term.BNode('Nd18bb9b74e0e40cc865d314fd4514240'),
 rdflib.term.URIRef('http://xmlns.com/foaf/0.1/hadAge'),
 rdflib.term.Literal('39', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#integer')))

(rdflib.term.BNode('Nd18bb9b74e0e40cc865d314fd4514240'),
 rdflib.term.URIRef('http://xmlns.com/foaf/0.1/hadAge'),
 rdflib.term.Literal('66', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#integer')))

(rdflib.term.BNode('Nd18bb9b74e0e40cc865d314fd4514240'),
 rdflib.term.URIRef('http://xmlns.com/foaf/0.1/hadAge'),
 rdflib.term.Literal('84', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#integer')))

(rdflib.term.BNode('Nd18bb9b74e0e40cc865d314fd4514240'),
 rdflib.term.URIRef('http://xmlns.com/foaf/0.1/hadAge'),
 rdflib.term.Liter

In [13]:
for stmt in g:
    print(stmt)

(rdflib.term.BNode('Nd18bb9b74e0e40cc865d314fd4514240'), rdflib.term.URIRef('http://xmlns.com/foaf/0.1/hadAge'), rdflib.term.Literal('79', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#integer')))
(rdflib.term.BNode('Nd18bb9b74e0e40cc865d314fd4514240'), rdflib.term.URIRef('http://xmlns.com/foaf/0.1/hadAge'), rdflib.term.Literal('39', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#integer')))
(rdflib.term.BNode('Nd18bb9b74e0e40cc865d314fd4514240'), rdflib.term.URIRef('http://xmlns.com/foaf/0.1/hadAge'), rdflib.term.Literal('66', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#integer')))
(rdflib.term.BNode('Nd18bb9b74e0e40cc865d314fd4514240'), rdflib.term.URIRef('http://xmlns.com/foaf/0.1/hadAge'), rdflib.term.Literal('84', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#integer')))
(rdflib.term.BNode('Nd18bb9b74e0e40cc865d314fd4514240'), rdflib.term.URIRef('http://xmlns.com/foaf/0.1/hadAge'), rdflib.term.Literal('6', datatype=r

In [14]:
print(FOAF.Person)

http://xmlns.com/foaf/0.1/Person


In [16]:
print(FOAF.imadethisup)


http://xmlns.com/foaf/0.1/imadethisup


## Navigating graphs

rdflib uses iterators to navigate Graphs. The methods for navigating subjects, predicates and objects are Graph.subjects, Graph.predicates, Graph.objects

In [17]:
g = Graph()
g.parse("http://www.w3.org/People/Berners-Lee/card")

# Iterate over triples in store and print them out.
print("--- printing raw triples ---")
for s, p, o in g:
    print(p)

--- printing raw triples ---
http://xmlns.com/foaf/0.1/nick
http://purl.org/dc/elements/1.1/title
http://www.w3.org/2006/vcard/ns#region
http://www.w3.org/2003/01/geo/wgs84_pos#lat
http://www.w3.org/2000/10/swap/pim/contact#preferredURI
http://www.w3.org/ns/auth/cert#exponent
http://xmlns.com/foaf/0.1/account
http://www.w3.org/ns/solid/terms#profileBackgroundColor
http://www.w3.org/ns/solid/terms#editableProfile
http://www.w3.org/2006/vcard/ns#postal-code
http://xmlns.com/foaf/0.1/member
http://www.w3.org/2000/10/swap/pim/contact#street
http://xmlns.com/foaf/0.1/maker
http://www.w3.org/1999/02/22-rdf-syntax-ns#type
http://www.w3.org/2000/01/rdf-schema#label
http://xmlns.com/foaf/0.1/primaryTopic
http://www.w3.org/2003/01/geo/wgs84_pos#long
http://purl.org/dc/terms/title
http://www.w3.org/2000/01/rdf-schema#label
http://xmlns.com/foaf/0.1/maker
http://purl.org/dc/elements/1.1/title
http://creativecommons.org/ns#license
http://www.w3.org/ns/pim/space#storage
http://xmlns.com/foaf/0.1/acc

In [18]:
# Printing subjects, predicates and objects out of the tuple omits Python datatypes
print("--- printing raw triples ---")
for s, p, o in g:
    print(s, p, o)

--- printing raw triples ---
https://www.w3.org/People/Berners-Lee/card#i http://xmlns.com/foaf/0.1/nick TimBL
http://www.w3.org/DesignIssues/Overview.html http://purl.org/dc/elements/1.1/title Design Issues for the World Wide Web
N0a65e5e17a4a4d60802c311d46d066af http://www.w3.org/2006/vcard/ns#region MA
N737bbabff0e54ce9b0b5becd6adfebb6 http://www.w3.org/2003/01/geo/wgs84_pos#lat 42.361860
https://www.w3.org/People/Berners-Lee/card#i http://www.w3.org/2000/10/swap/pim/contact#preferredURI https://www.w3.org/People/Berners-Lee/card#i
N1912af6a0f4a4e96b76ac93ef02121e6 http://www.w3.org/ns/auth/cert#exponent 65537
https://www.w3.org/People/Berners-Lee/card#i http://xmlns.com/foaf/0.1/account http://www.reddit.com/user/timbl/
https://www.w3.org/People/Berners-Lee/card#i http://www.w3.org/ns/solid/terms#profileBackgroundColor #ffffff
https://www.w3.org/People/Berners-Lee/card#i http://www.w3.org/ns/solid/terms#editableProfile https://timbl.com/timbl/Public/friends.ttl
N0a65e5e17a4a4d60802

In [19]:
print("PRINTING SUBJECTS")
for s in g.subjects():
    print(s)

PRINTING SUBJECTS
https://www.w3.org/People/Berners-Lee/card#i
http://www.w3.org/DesignIssues/Overview.html
N0a65e5e17a4a4d60802c311d46d066af
N737bbabff0e54ce9b0b5becd6adfebb6
https://www.w3.org/People/Berners-Lee/card#i
N1912af6a0f4a4e96b76ac93ef02121e6
https://www.w3.org/People/Berners-Lee/card#i
https://www.w3.org/People/Berners-Lee/card#i
https://www.w3.org/People/Berners-Lee/card#i
N0a65e5e17a4a4d60802c311d46d066af
http://www.w3.org/data#W3C
N1d69280ae86649c89fb536889eb30b73
http://dig.csail.mit.edu/breadcrumbs/blog/4
N0a65e5e17a4a4d60802c311d46d066af
http://www.ecs.soton.ac.uk/~dt2/dlstuff/www2006_data#panel-panelk01
http://www.w3.org/People/Berners-Lee/card
N737bbabff0e54ce9b0b5becd6adfebb6
http://www.w3.org/2011/Talks/0331-hyderabad-tbl/data#talk
https://www.w3.org/People/Berners-Lee/card#i
http://www.w3.org/People/Berners-Lee/card
http://www.w3.org/People/Berners-Lee/card
https://timbl.com/timbl/Public/friends.ttl
https://www.w3.org/People/Berners-Lee/card#i
https://www.w3.org

In [20]:
print("PRINTING PREDICATES")
for p in g.predicates():
    if len(p) > 13:
        print(p)


PRINTING PREDICATES
http://xmlns.com/foaf/0.1/nick
http://purl.org/dc/elements/1.1/title
http://www.w3.org/2006/vcard/ns#region
http://www.w3.org/2003/01/geo/wgs84_pos#lat
http://www.w3.org/2000/10/swap/pim/contact#preferredURI
http://www.w3.org/ns/auth/cert#exponent
http://xmlns.com/foaf/0.1/account
http://www.w3.org/ns/solid/terms#profileBackgroundColor
http://www.w3.org/ns/solid/terms#editableProfile
http://www.w3.org/2006/vcard/ns#postal-code
http://xmlns.com/foaf/0.1/member
http://www.w3.org/2000/10/swap/pim/contact#street
http://xmlns.com/foaf/0.1/maker
http://www.w3.org/1999/02/22-rdf-syntax-ns#type
http://www.w3.org/2000/01/rdf-schema#label
http://xmlns.com/foaf/0.1/primaryTopic
http://www.w3.org/2003/01/geo/wgs84_pos#long
http://purl.org/dc/terms/title
http://www.w3.org/2000/01/rdf-schema#label
http://xmlns.com/foaf/0.1/maker
http://purl.org/dc/elements/1.1/title
http://creativecommons.org/ns#license
http://www.w3.org/ns/pim/space#storage
http://xmlns.com/foaf/0.1/account
http

In [21]:
print("PRINTING OBJECTS")
for o in g.objects():
    print(o)

PRINTING OBJECTS
TimBL
Design Issues for the World Wide Web
MA
42.361860
https://www.w3.org/People/Berners-Lee/card#i
65537
http://www.reddit.com/user/timbl/
#ffffff
https://timbl.com/timbl/Public/friends.ttl
02139
https://www.w3.org/People/Berners-Lee/card#i
32 Vassar Street
https://www.w3.org/People/Berners-Lee/card#i
http://www.w3.org/2006/vcard/ns#Work
The Next Wave of the Web (Plenary Panel)
https://www.w3.org/People/Berners-Lee/card#i
-71.091840
Designing the Web for an Open Society
Tim Berners-Lee
https://www.w3.org/People/Berners-Lee/card#i
Tim Berners-Lee's FOAF file
http://creativecommons.org/licenses/by-nc/3.0/
https://timbl.com/timbl/Public/
http://en.wikipedia.org/wiki/User:Timbl
Identity, Reference and the Web workshop 2006
MIT CSAIL Building 32
https://timbl.solid.community/
https://www.w3.org/
Timothy Berners-Lee
https://www.w3.org/People/Berners-Lee/card#i
https://www.w3.org/People/Berners-Lee/card#amy
https://www.w3.org/People/Berners-Lee/card#i
http://creativecommons

We can also filter the subjects, predicates and objects we want to retrieve, and match their values like in a database "join" operation

In [22]:
g = Graph()

# Create an identifier to use as the subject for Donna.
donna = URIRef('urn:donna')
ila = URIRef('urn:ila')
# Add triples using store's add method.
g.add( (donna, RDF.type, FOAF.Person) )
g.add( (ila, RDF.type, FOAF.Person) )
g.add( (ila, RDF.type, FOAF.Teacher) )
g.add( (donna, FOAF.nick, Literal("donna", lang="foo")) )
g.add( (donna, FOAF.name, Literal("Donna Fales")) )
g.add( (donna, FOAF.mbox, URIRef("mailto:donna@example.org")) )
g.add( (ila, FOAF.mbox, URIRef("mailto:ila@example.org")) )

# For each foaf:Person in the store print out its mbox property.
# print("--- printing mboxes ---")
# for person in g.subjects(RDF.type, FOAF.Person):
#     for mbox in g.objects(person, FOAF.mbox):
#         print(mbox)

# You can reuse matches of subjects to filter further e.g. objects
for entity in g.subjects(RDF.type, None):
    print(entity)
    for objects in g.objects(entity, RDF.type):
        print(objects)

urn:ila
http://xmlns.com/foaf/0.1/Teacher
http://xmlns.com/foaf/0.1/Person
urn:ila
http://xmlns.com/foaf/0.1/Teacher
http://xmlns.com/foaf/0.1/Person
urn:donna
http://xmlns.com/foaf/0.1/Person


### Basic triple matching (almost querying!)

We use method Graph.triples and a Python tuple that acts as a mask for specifying our criteria

In [23]:
g = Graph()
g.parse("http://www.w3.org/People/Berners-Lee/card")

tim = URIRef("https://www.w3.org/People/Berners-Lee/card#i")

if ( tim, RDF.type, FOAF.Person ) in g:
   print("This graph knows that Tim is a person!")

if ( tim, None, None ) in g:
    print("This graph contains triples about Tim!")

This graph knows that Tim is a person!
This graph contains triples about Tim!


In [24]:
for s,p,o in g.triples( (None, None, None) ):
    print(s,p,o)

N3ff07323bac645a6871eedd290d5b770 http://www.w3.org/2000/10/swap/pim/contact#postalCode 02139
http://dig.csail.mit.edu/data#DIG http://xmlns.com/foaf/0.1/member https://www.w3.org/People/Berners-Lee/card#i
https://www.w3.org/People/Berners-Lee/card#i http://www.w3.org/2000/10/swap/pim/contact#publicHomePage http://www.w3.org/People/Berners-Lee/
https://www.w3.org/People/Berners-Lee/card#i http://www.w3.org/1999/02/22-rdf-syntax-ns#type http://xmlns.com/foaf/0.1/Person
https://timbl.com/timbl/Public/friends.ttl http://www.w3.org/1999/02/22-rdf-syntax-ns#type http://xmlns.com/foaf/0.1/PersonalProfileDocument
N3ff07323bac645a6871eedd290d5b770 http://www.w3.org/2000/10/swap/pim/contact#street2 MIT CSAIL Building 32
https://www.w3.org/People/Berners-Lee/card#i http://www.w3.org/ns/solid/terms#profileHighlightColor #00467E
https://www.w3.org/People/Berners-Lee/card#i http://www.w3.org/2000/10/swap/pim/contact#preferredURI https://www.w3.org/People/Berners-Lee/card#i
Nf963e132ac39416ea77fb587

In [25]:
for s,p,o in g.triples( (tim, RDF.type, None) ):
    print(s,p,o)

https://www.w3.org/People/Berners-Lee/card#i http://www.w3.org/1999/02/22-rdf-syntax-ns#type http://www.w3.org/2000/10/swap/pim/contact#Male
https://www.w3.org/People/Berners-Lee/card#i http://www.w3.org/1999/02/22-rdf-syntax-ns#type http://xmlns.com/foaf/0.1/Person


## Namespaces and bindings

In [26]:
mid_uri = URIRef("http://purl.org/midi-ld/midi#")
mid = Namespace(mid_uri)

print(mid['hello'])  # as item - for things that are not valid python identifiers
print(mid.hello)     # as attribute

http://purl.org/midi-ld/midi#hello
http://purl.org/midi-ld/midi#hello


In [27]:
g = Graph()

# Create an identifier to use as the subject for Donna.
donna = BNode()

# Add triples using store's add method.
g.add( (donna, RDF.type, FOAF.Person) )
g.add( (donna, FOAF.nick, Literal("donna", lang="foo")) )
g.add( (donna, FOAF.name, Literal("Donna Fales")) )
g.add( (donna, FOAF.mbox, URIRef("mailto:donna@example.org")) )

print(g.serialize(format='turtle').decode())

@prefix ns1: <http://xmlns.com/foaf/0.1/> .
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix xml: <http://www.w3.org/XML/1998/namespace> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

[] a ns1:Person ;
    ns1:mbox <mailto:donna@example.org> ;
    ns1:name "Donna Fales" ;
    ns1:nick "donna"@foo .




In [28]:
foaf_uri = URIRef("http://xmlns.com/foaf/0.1/")
foaf_namespace = Namespace(foaf_uri)

g = Graph()

# Bind a few prefix, namespace pairs for more readable output
g.bind("foaf", foaf_namespace)

# Create an identifier to use as the subject for Donna.
donna = BNode()

# Add triples using store's add method.
g.add( (donna, RDF.type, FOAF.Person) )
g.add( (donna, FOAF.nick, Literal("donna", lang="foo")) )
g.add( (donna, FOAF.name, Literal("Donna Fales")) )
g.add( (donna, FOAF.mbox, URIRef("mailto:donna@example.org")) )

print(g.serialize(format='turtle').decode())

@prefix foaf: <http://xmlns.com/foaf/0.1/> .
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix xml: <http://www.w3.org/XML/1998/namespace> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

[] a foaf:Person ;
    foaf:mbox <mailto:donna@example.org> ;
    foaf:name "Donna Fales" ;
    foaf:nick "donna"@foo .


