Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Issue 1003 #1005

Merged
merged 8 commits into from
Apr 17, 2020
25 changes: 17 additions & 8 deletions rdflib/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -293,8 +293,9 @@ class Graph(Node):
For more on named graphs, see: http://www.w3.org/2004/03/trix/
"""

def __init__(self, store="default", identifier=None, namespace_manager=None):
def __init__(self, store="default", identifier=None, namespace_manager=None, base=None):
super(Graph, self).__init__()
self.base = base
self.__identifier = identifier or BNode()

if not isinstance(self.__identifier, Node):
Expand Down Expand Up @@ -952,6 +953,11 @@ def serialize(
Format support can be extended with plugins,
but "xml", "n3", "turtle", "nt", "pretty-xml", "trix", "trig" and "nquads" are built in.
"""

# if base is not given as attribute use the base set for the graph
if base is None:
base = self.base

serializer = plugin.get(format, Serializer)(self)
if destination is None:
stream = BytesIO()
Expand Down Expand Up @@ -1336,14 +1342,16 @@ class ConjunctiveGraph(Graph):
All queries are carried out against the union of all graphs.
"""

def __init__(self, store="default", identifier=None):
def __init__(self, store="default", identifier=None, default_graph_base=None):
super(ConjunctiveGraph, self).__init__(store, identifier=identifier)
assert self.store.context_aware, (
"ConjunctiveGraph must be backed by" " a context aware store."
)
self.context_aware = True
self.default_union = True # Conjunctive!
self.default_context = Graph(store=self.store, identifier=identifier or BNode())
self.default_context = Graph(
store=self.store, identifier=identifier or BNode(), base=default_graph_base
)

def __str__(self):
pattern = (
Expand Down Expand Up @@ -1483,12 +1491,12 @@ def contexts(self, triple=None):
else:
yield self.get_context(context)

def get_context(self, identifier, quoted=False):
def get_context(self, identifier, quoted=False, base=None):
"""Return a context graph for the given identifier

identifier must be a URIRef or BNode.
"""
return Graph(store=self.store, identifier=identifier, namespace_manager=self)
return Graph(store=self.store, identifier=identifier, namespace_manager=self, base=base)

def remove_context(self, context):
"""Removes the given context from the graph"""
Expand Down Expand Up @@ -1651,13 +1659,13 @@ class Dataset(ConjunctiveGraph):
.. versionadded:: 4.0
"""

def __init__(self, store="default", default_union=False):
def __init__(self, store="default", default_union=False, default_graph_base=None):
super(Dataset, self).__init__(store=store, identifier=None)

if not self.store.graph_aware:
raise Exception("DataSet must be backed by a graph-aware store!")
self.default_context = Graph(
store=self.store, identifier=DATASET_DEFAULT_GRAPH_ID
store=self.store, identifier=DATASET_DEFAULT_GRAPH_ID, base=default_graph_base
)

self.default_union = default_union
Expand All @@ -1668,7 +1676,7 @@ def __str__(self):
)
return pattern % self.store.__class__.__name__

def graph(self, identifier=None):
def graph(self, identifier=None, base=None):
if identifier is None:
from rdflib.term import rdflib_skolem_genid

Expand All @@ -1678,6 +1686,7 @@ def graph(self, identifier=None):
identifier = BNode().skolemize()

g = self._graph(identifier)
g.base = base

self.store.add_graph(g)
return g
Expand Down
16 changes: 14 additions & 2 deletions rdflib/plugins/serializers/rdfxml.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,11 @@ def __bindings(self):
yield prefix, namespace

def serialize(self, stream, base=None, encoding=None, **args):
self.base = base
# if base is given here, use that, if not and a base is set for the graph use that
if base is not None:
self.base = base
elif self.store.base is not None:
self.base = self.store.base
self.__stream = stream
self.__serialized = {}
encoding = self.encoding
Expand All @@ -62,6 +66,8 @@ def serialize(self, stream, base=None, encoding=None, **args):
# If provided, write xml:base attribute for the RDF
if "xml_base" in args:
write(' xml:base="%s"\n' % args['xml_base'])
elif self.base:
write(' xml:base="%s"\n' % self.base)
# TODO:
# assert(
# namespaces["http://www.w3.org/1999/02/22-rdf-syntax-ns#"]=='rdf')
Expand Down Expand Up @@ -163,7 +169,11 @@ def __init__(self, store, max_depth=3):
def serialize(self, stream, base=None, encoding=None, **args):
self.__serialized = {}
store = self.store
self.base = base
# if base is given here, use that, if not and a base is set for the graph use that
if base is not None:
self.base = base
elif store.base is not None:
self.base = store.base
self.max_depth = args.get("max_depth", 3)
assert self.max_depth > 0, "max_depth must be greater than 0"

Expand All @@ -184,6 +194,8 @@ def serialize(self, stream, base=None, encoding=None, **args):

if "xml_base" in args:
writer.attribute(XMLBASE, args["xml_base"])
elif self.base:
writer.attribute(XMLBASE, self.base)

writer.namespaces(namespaces.items())

Expand Down
8 changes: 6 additions & 2 deletions rdflib/plugins/serializers/trig.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ def preprocess(self):
for triple in context:
self.preprocessTriple(triple)

self._contexts[context]=(self.orderSubjects(), self._subjects, self._references)
self._contexts[context] = (self.orderSubjects(), self._subjects, self._references)

def reset(self):
super(TrigSerializer, self).reset()
Expand All @@ -49,7 +49,11 @@ def serialize(self, stream, base=None, encoding=None,
spacious=None, **args):
self.reset()
self.stream = stream
self.base = base
# if base is given here, use that, if not and a base is set for the graph use that
if base is not None:
self.base = base
elif self.store.base is not None:
self.base = self.store.base

if spacious is not None:
self._spacious = spacious
Expand Down
7 changes: 7 additions & 0 deletions rdflib/plugins/serializers/trix.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,11 @@ def serialize(self, stream, base=None, encoding=None, **args):
self.writer = XMLWriter(stream, nm, encoding, extra_ns={"": TRIXNS})

self.writer.push(TRIXNS[u"TriX"])
# if base is given here, use that, if not and a base is set for the graph use that
if base is None and self.store.base is not None:
base = self.store.base
if base is not None:
self.writer.attribute("http://www.w3.org/XML/1998/namespacebase", base)
self.writer.namespaces()

if isinstance(self.store, ConjunctiveGraph):
Expand All @@ -44,6 +49,8 @@ def serialize(self, stream, base=None, encoding=None, **args):

def _writeGraph(self, graph):
self.writer.push(TRIXNS[u"graph"])
if graph.base:
self.writer.attribute("http://www.w3.org/XML/1998/namespacebase", graph.base)
if isinstance(graph.identifier, URIRef):
self.writer.element(
TRIXNS[u"uri"], content=text_type(graph.identifier))
Expand Down
11 changes: 10 additions & 1 deletion rdflib/plugins/serializers/turtle.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,11 @@ def serialize(self, stream, base=None, encoding=None,
spacious=None, **args):
self.reset()
self.stream = stream
self.base = base
# if base is given here, use that, if not and a base is set for the graph use that
if base is not None:
self.base = base
white-gecko marked this conversation as resolved.
Show resolved Hide resolved
elif self.store.base is not None:
self.base = self.store.base

if spacious is not None:
self._spacious = spacious
Expand All @@ -246,6 +250,8 @@ def serialize(self, stream, base=None, encoding=None,
self.endDocument()
stream.write(b("\n"))

self.base = None

def preprocessTriple(self, triple):
super(TurtleSerializer, self).preprocessTriple(triple)
for i, node in enumerate(triple):
Expand Down Expand Up @@ -291,6 +297,9 @@ def getQName(self, uri, gen_prefix=True):
def startDocument(self):
self._started = True
ns_list = sorted(self.namespaces.items())

if self.base:
self.write(self.indent() + '@base <%s> .\n' % self.base)
for prefix, uri in ns_list:
self.write(self.indent() + '@prefix %s: <%s> .\n' % (prefix, uri))
if ns_list and self._spacious:
Expand Down
117 changes: 117 additions & 0 deletions test/test_issue1003.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
from rdflib import Graph, Dataset, Literal, Namespace, RDF, URIRef
from rdflib.namespace import SKOS, DCTERMS

"""
Testing scenarios:
1. no base set
2. base set at graph creation
3. base set at serialization
4. base set at both graph creation & serialization, serialization overrides
5. multiple serialization side effect checking
6. checking results for RDF/XML
7. checking results for N3
8. checking results for TriX & TriG
"""

# variables
base_one = Namespace("http://one.org/")
base_two = Namespace("http://two.org/")
title = Literal("Title", lang="en")
description = Literal("Test Description", lang="en")
creator = URIRef("https://creator.com")
cs = URIRef("")

# starting graph
g = Graph()
g.add((cs, RDF.type, SKOS.ConceptScheme))
g.add((cs, DCTERMS.creator, creator))
g.add((cs, DCTERMS.source, URIRef("nick")))
g.bind("dct", DCTERMS)
g.bind("skos", SKOS)


# 1. no base set for graph, no base set for serialization
g1 = Graph()
g1 += g
# @base should not be in output
assert "@base" not in g.serialize(format='turtle').decode("utf-8")


# 2. base one set for graph, no base set for serialization
g2 = Graph(base=base_one)
g2 += g
# @base should be in output, from Graph (one)
assert "@base <http://one.org/> ." in g2.serialize(format='turtle').decode("utf-8")


# 3. no base set for graph, base two set for serialization
g3 = Graph()
g3 += g
# @base should be in output, from serialization (two)
assert "@base <http://two.org/> ." in g3.serialize(format='turtle', base=base_two).decode("utf-8")


# 4. base one set for graph, base two set for serialization, Graph one overrides
g4 = Graph(base=base_one)
g4 += g
# @base should be in output, from graph (one)
assert "@base <http://two.org/> ." in g4.serialize(format='turtle', base=base_two).decode("utf-8")
# just checking that the serialization setting (two) hasn't snuck through
assert "@base <http://one.org/> ." not in g4.serialize(format='turtle', base=base_two).decode("utf-8")


# 5. multiple serialization side effect checking
g5 = Graph()
g5 += g
# @base should be in output, from serialization (two)
assert "@base <http://two.org/> ." in g5.serialize(format='turtle', base=base_two).decode("utf-8")

# checking for side affects - no base now set for this serialization
# @base should not be in output
assert "@base" not in g5.serialize(format='turtle').decode("utf-8")


# 6. checking results for RDF/XML
g6 = Graph()
g6 += g
g6.bind("dct", DCTERMS)
g6.bind("skos", SKOS)
assert "@xml:base" not in g6.serialize(format='xml').decode("utf-8")
assert 'xml:base="http://one.org/"' in g6.serialize(format='xml', base=base_one).decode("utf-8")
g6.base = base_two
assert 'xml:base="http://two.org/"' in g6.serialize(format='xml').decode("utf-8")
assert 'xml:base="http://one.org/"' in g6.serialize(format='xml', base=base_one).decode("utf-8")

# 7. checking results for N3
g7 = Graph()
g7 += g
g7.bind("dct", DCTERMS)
g7.bind("skos", SKOS)
assert "@xml:base" not in g7.serialize(format='xml').decode("utf-8")
assert "@base <http://one.org/> ." in g7.serialize(format='n3', base=base_one).decode("utf-8")
g7.base = base_two
assert "@base <http://two.org/> ." in g7.serialize(format='n3').decode("utf-8")
assert "@base <http://one.org/> ." in g7.serialize(format='n3', base=base_one).decode("utf-8")

# 8. checking results for TriX & TriG
# TriX can specify a base per graph but setting a base for the whole
base_three = Namespace("http://three.org/")
ds1 = Dataset()
ds1.bind("dct", DCTERMS)
ds1.bind("skos", SKOS)
g8 = ds1.graph(URIRef('http://g8.com/'), base=base_one)
g9 = ds1.graph(URIRef('http://g9.com/'))
g8 += g
g9 += g
g9.base = base_two
ds1.base = base_three

trix = ds1.serialize(format='trix', base=Namespace("http://two.org/")).decode("utf-8")
assert '<graph xml:base="http://one.org/">' in trix
assert '<graph xml:base="http://two.org/">' in trix
assert '<TriX xml:base="http://two.org/"' in trix

trig = ds1.serialize(format='trig', base=Namespace("http://two.org/")).decode("utf-8")
assert '@base <http://one.org/> .' not in trig
assert '@base <http://three.org/> .' not in trig
assert '@base <http://two.org/> .' in trig