From bd797ac225e0a96e0e84e4d5166612565e550ebf Mon Sep 17 00:00:00 2001 From: Matt Goldberg <59745812+mgberg@users.noreply.github.com> Date: Mon, 31 Jul 2023 19:32:06 -0400 Subject: [PATCH] feat: Don't generate prefixes for unknown URIs When serializing RDF graphs, URIs with unknown prefixes were assigned a namespace like `ns1:`. While the result would be smaller files, it does result in output that is not as readable. This change removes this automatic assignment of namespace prefixes. This is somewhat of an aesthetic choice, eventually we should have more flexibility in this regard so that users can exercise more control over how URIs in unknown namespaces are handled. With this change, users can still manually create namespace prefixes for URIs in unknown namespaces, but before it there was no way to avoid the undesired behaviour, so this seems like the better default. --- rdflib/plugins/serializers/trig.py | 6 ++++-- test/test_trig.py | 10 +++++----- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/rdflib/plugins/serializers/trig.py b/rdflib/plugins/serializers/trig.py index 18bee3f21..6f2aa50a8 100644 --- a/rdflib/plugins/serializers/trig.py +++ b/rdflib/plugins/serializers/trig.py @@ -40,7 +40,8 @@ def preprocess(self) -> None: if len(context) == 0: continue self.store = context - self.getQName(context.identifier) + # Don't generate a new prefix for a graph URI if one already exists + self.getQName(context.identifier, False) self._subjects = {} for triple in context: @@ -97,7 +98,8 @@ def serialize( if isinstance(store.identifier, BNode): iri = store.identifier.n3() else: - iri = self.getQName(store.identifier) + # Show the full graph URI if a prefix for it doesn't already exist + iri = self.getQName(store.identifier, False) if iri is None: # type error: "IdentifiedNode" has no attribute "n3" iri = store.identifier.n3() # type: ignore[attr-defined] diff --git a/test/test_trig.py b/test/test_trig.py index de5c2108f..1c158fa86 100644 --- a/test/test_trig.py +++ b/test/test_trig.py @@ -60,15 +60,15 @@ def test_remember_namespace(): # prefix for the graph but later serialize() calls would work. first_out = g.serialize(format="trig", encoding="latin-1") second_out = g.serialize(format="trig", encoding="latin-1") - assert b"@prefix ns1: ." in second_out - assert b"@prefix ns1: ." in first_out + assert b"@prefix ns1: ." not in second_out + assert b"@prefix ns1: ." not in first_out def test_graph_qname_syntax(): g = rdflib.ConjunctiveGraph() g.add(TRIPLE + (rdflib.URIRef("http://example.com/graph1"),)) out = g.serialize(format="trig", encoding="latin-1") - assert b"ns1:graph1 {" in out + assert b"ns1:graph1 {" not in out def test_graph_uri_syntax(): @@ -178,9 +178,9 @@ def test_prefixes(): cg.parse(data=data, format="trig") data = cg.serialize(format="trig", encoding="latin-1") - assert "ns2: ".encode("latin-1") not in data, data - assert "ns2:document1".encode("latin-1") in data, data + assert "ns2:document1".encode("latin-1") not in data, data def test_issue_2154():