From 5ee8bd796049a6c940ba26709b8d91b95686f9ab Mon Sep 17 00:00:00 2001 From: Nicholas Car Date: Tue, 25 Jul 2023 22:55:41 +0200 Subject: [PATCH] Longturtle improvements (#2500) * small updates to the longturtle serializer and an improved test for it * add docco --------- Co-authored-by: Iwan Aucamp Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- docs/plugin_serializers.rst | 11 +- rdflib/plugins/serializers/longturtle.py | 43 +- .../test_serializer_longturtle.py | 424 ++++++++++-------- 3 files changed, 268 insertions(+), 210 deletions(-) diff --git a/docs/plugin_serializers.rst b/docs/plugin_serializers.rst index ac836ee73..39d00df7f 100644 --- a/docs/plugin_serializers.rst +++ b/docs/plugin_serializers.rst @@ -32,7 +32,7 @@ xml :class:`~rdflib.plugins.serializers.rdfxml.XMLSerializer` JSON-LD ------- -JSON-LD - 'json-ld' - has been incorprated in rdflib since v6.0.0. +JSON-LD - 'json-ld' - has been incorporated into RDFLib since v6.0.0. HexTuples --------- @@ -43,3 +43,12 @@ Hextuple) will be an empty string. For context-aware (multi-graph) serialization, the 'graph' field of the default graph will be an empty string and the values for other graphs will be Blank Node IDs or IRIs. + +Longturtle +---------- +Longturtle is just the turtle format with newlines preferred over compactness - multiple nodes on the same line +to enhance the format's text file version control (think Git) friendliness - and more modern forms of prefix markers - +PREFIX instead of @prefix - to make it as similar to SPARQL as possible. + +Longturtle is Turtle 1.1 compliant and will work wherever ordinary turtle works, however some very old parsers don't +understand PREFIX, only @prefix... diff --git a/rdflib/plugins/serializers/longturtle.py b/rdflib/plugins/serializers/longturtle.py index ac2febdcf..e886574f3 100644 --- a/rdflib/plugins/serializers/longturtle.py +++ b/rdflib/plugins/serializers/longturtle.py @@ -13,7 +13,7 @@ on the start of the next line * uses default encoding (encode()) is used instead of "latin-1" -- Nicholas Car, 2021 +- Nicholas Car, 2023 """ from rdflib.exceptions import Error @@ -101,7 +101,6 @@ def serialize(self, stream, base=None, encoding=None, spacious=None, **args): self.write("\n") self.endDocument() - self.write("\n") self.base = None @@ -168,21 +167,20 @@ def s_default(self, subject): self.path(subject, SUBJECT) self.write("\n" + self.indent()) self.predicateList(subject) - self.write(" ;\n.") + self.write("\n.") return True def s_squared(self, subject): if (self._references[subject] > 0) or not isinstance(subject, BNode): return False self.write("\n" + self.indent() + "[]") - self.predicateList(subject) + self.predicateList(subject, newline=False) self.write(" ;\n.") return True def path(self, node, position, newline=False): if not ( - self.p_squared(node, position, newline) - or self.p_default(node, position, newline) + self.p_squared(node, position) or self.p_default(node, position, newline) ): raise Error("Cannot serialize node '%s'" % (node,)) @@ -207,7 +205,11 @@ def label(self, node, position): return self.getQName(node, position == VERB) or node.n3() - def p_squared(self, node, position, newline=False): + def p_squared( + self, + node, + position, + ): if ( not isinstance(node, BNode) or node in self._serialized @@ -216,23 +218,19 @@ def p_squared(self, node, position, newline=False): ): return False - if not newline: - self.write(" ") - if self.isValidList(node): # this is a list self.depth += 2 - self.write("(\n") - self.depth -= 1 + self.write(" (\n") + self.depth -= 2 self.doList(node) - self.depth -= 1 - self.write("\n" + self.indent(1) + ")") + self.write("\n" + self.indent() + ")") else: + # this is a Blank Node self.subjectDone(node) - self.depth += 2 - self.write("[\n") - self.depth -= 1 - self.predicateList(node, newline=False) + self.write("\n" + self.indent(1) + "[\n") + self.depth += 1 + self.predicateList(node) self.depth -= 1 self.write("\n" + self.indent(1) + "]") @@ -279,6 +277,7 @@ def predicateList(self, subject, newline=False): self.write(" ;\n" + self.indent(1)) self.verb(predicate, newline=True) self.objectList(properties[predicate]) + self.write(" ;") def verb(self, node, newline=False): self.path(node, VERB, newline) @@ -291,11 +290,13 @@ def objectList(self, objects): self.depth += depthmod first_nl = False if count > 1: - self.write("\n" + self.indent(1)) + if not isinstance(objects[0], BNode): + self.write("\n" + self.indent(1)) first_nl = True self.path(objects[0], OBJECT, newline=first_nl) for obj in objects[1:]: - self.write(" ,\n") - self.write(self.indent(1)) + self.write(" ,") + if not isinstance(obj, BNode): + self.write("\n" + self.indent(1)) self.path(obj, OBJECT, newline=True) self.depth -= depthmod diff --git a/test/test_serializers/test_serializer_longturtle.py b/test/test_serializers/test_serializer_longturtle.py index cc184787a..847d506ab 100644 --- a/test/test_serializers/test_serializer_longturtle.py +++ b/test/test_serializers/test_serializer_longturtle.py @@ -1,207 +1,255 @@ -# tests for the longturtle Serializer +import difflib +from textwrap import dedent -from rdflib import Graph +from rdflib import Graph, Namespace +from rdflib.namespace import GEO, SDO def test_longturtle(): - g = Graph() + """Compares the output of a longturtle graph serialization to a fixed, hand-typed, target + to test most of the longtertle differences to regular turtle - g.parse( + Includes basic triples, Blank Nodes - 2-levels deep - Collections and so on""" + # load graph with data + g = Graph().parse( data=""" - @prefix ex: . - @prefix ex2: . - @prefix rdf: . - @prefix xsd: . + { + "@context": { + "cn": "https://linked.data.gov.au/def/cn/", + "sdo": "https://schema.org/", + "Organization": "sdo:Organization", + "Person": "sdo:Person", + "Place": "sdo:Place", + "PostalAddress": "sdo:PostalAddress", + "address": "sdo:address", + "addressLocality": "sdo:addressLocality", + "addressRegion": "sdo:addressRegion", + "postalCode": "sdo:postalCode", + "addressCountry": "sdo:addressCountry", + "streetAddress": "sdo:streetAddress", + "age": "sdo:age", + "alternateName": "sdo:alternateName", + "geo": "sdo:geo", + "hasPart": "sdo:hasPart", + "identifier": "sdo:identifier", + "location": "sdo:location", + "name": "sdo:name", + "polygon": "sdo:polygon", + "value": "http://www.w3.org/1999/02/22-rdf-syntax-ns#value", + "wktLiteral": "http://www.opengis.net/ont/geosparql#wktLiteral", + "worksFor": "sdo:worksFor" + }, + "@graph": [ + { + "@id": "https://kurrawong.ai", + "@type": "Organization", + "location": { + "@id": "https://kurrawong.ai/hq" + } + }, + { + "@id": "https://kurrawong.ai/hq", + "@type": "Place", + "address": { + "@id": "_:n6924e85bfee648a4a45bac9f4ab9909ab8" + }, + "geo": { + "@id": "_:n6924e85bfee648a4a45bac9f4ab9909ab13" + }, + "name": "KurrawongAI HQ" + }, + { + "@id": "_:n6924e85bfee648a4a45bac9f4ab9909ab8", + "@type": "PostalAddress", + "addressCountry": { + "@id": "_:n6924e85bfee648a4a45bac9f4ab9909ab12" + }, + "addressLocality": "Shorncliffe", + "addressRegion": "QLD", + "postalCode": 4017, + "streetAddress": { + "@list": [ + 72, + "Yundah", + "Street" + ] + } + }, + { + "@id": "_:n6924e85bfee648a4a45bac9f4ab9909ab12", + "identifier": "au", + "name": "Australia" + }, + { + "@id": "_:n6924e85bfee648a4a45bac9f4ab9909ab13", + "polygon": { + "@type": "wktLiteral", + "@value": "POLYGON((153.082403 -27.325801, 153.08241 -27.32582, 153.082943 -27.325612, 153.083010 -27.325742, 153.083543 -27.325521, 153.083456 -27.325365, 153.082403 -27.325801))" + } + }, + { + "@id": "http://example.com/nicholas", + "@type": "Person", + "age": 41, + "alternateName": [ + "Nick Car", + "N.J. Car", + { + "@id": "_:n6924e85bfee648a4a45bac9f4ab9909ab1" + } + ], + "name": { + "@id": "_:n6924e85bfee648a4a45bac9f4ab9909ab2" + }, + "worksFor": { + "@id": "https://kurrawong.ai" + } + }, + { + "@id": "_:n6924e85bfee648a4a45bac9f4ab9909ab1", + "name": "Dr N.J. Car" + }, + { + "@id": "_:n6924e85bfee648a4a45bac9f4ab9909ab2", + "@type": "cn:CompoundName", + "hasPart": [{ + "@id": "_:n6924e85bfee648a4a45bac9f4ab9909ab3" + }, + { + "@id": "_:n6924e85bfee648a4a45bac9f4ab9909ab4" + }, + { + "@id": "_:n6924e85bfee648a4a45bac9f4ab9909ab5" + } + ] + }, + { + "@id": "_:n6924e85bfee648a4a45bac9f4ab9909ab3", + "@type": "cn:CompoundName", + "value": "Nicholas" + }, + { + "@id": "_:n6924e85bfee648a4a45bac9f4ab9909ab4", + "@type": "cn:CompoundName", + "value": "John" + }, + { + "@id": "_:n6924e85bfee648a4a45bac9f4ab9909ab5", + "@type": "cn:CompoundName", + "hasPart": [{ + "@id": "_:n6924e85bfee648a4a45bac9f4ab9909ab6" + }, + { + "@id": "_:n6924e85bfee648a4a45bac9f4ab9909ab7" + } + ] + }, + { + "@id": "_:n6924e85bfee648a4a45bac9f4ab9909ab6", + "@type": "cn:CompoundName", + "value": "Car" + }, + { + "@id": "_:n6924e85bfee648a4a45bac9f4ab9909ab7", + "@type": "cn:CompoundName", + "value": "Maxov" + } + ] + } + """, + format="application/ld+json", + ) + + # declare a few namespaces for Turtle + g.bind("ex", Namespace("http://example.com/")) + g.bind("geo", GEO) + g.bind("cn", Namespace("https://linked.data.gov.au/def/cn/")) + g.bind("sdo", SDO) + + # run the long turtle serializer + output = g.serialize(format="longturtle") + + # fix the target + target = dedent( + """ PREFIX cn: + PREFIX ex: + PREFIX geo: + PREFIX rdf: + PREFIX sdo: + PREFIX xsd: - - a ex:Thing , ex:OtherThing ; - ex:name "Thing", "Other Thing"@en , "もの"@ja , "rzecz"@pl ; - ex:singleValueProp "propval" ; - ex:multiValueProp "propval 1" ; - ex:multiValueProp "propval 2" ; - ex:multiValueProp "propval 3" ; - ex:multiValueProp "propval 4" ; - ex:bnObj [ - ex:singleValueProp "propval" ; - ex:multiValueProp "propval 1" ; - ex:multiValueProp "propval 2" ; - ex:bnObj [ - ex:singleValueProp "propval" ; - ex:multiValueProp "propval 1" ; - ex:multiValueProp "propval 2" ; - ex:bnObj [ - ex:singleValueProp "propval" ; - ex:multiValueProp "propval 1" ; - ex:multiValueProp "propval 2" ; + ex:nicholas + a sdo:Person ; + sdo:age 41 ; + sdo:alternateName + [ + sdo:name "Dr N.J. Car" ; + ] , + "N.J. Car" , + "Nick Car" ; + sdo:name + [ + a cn:CompoundName ; + sdo:hasPart + [ + a cn:CompoundName ; + rdf:value "Nicholas" ; ] , [ - ex:singleValueProp "propval" ; - ex:multiValueProp "propval 1" ; - ex:multiValueProp "propval 2" ; + a cn:CompoundName ; + rdf:value "John" ; ] , [ - ex:singleValueProp "propval" ; - ex:multiValueProp "propval 1" ; - ex:multiValueProp "propval 2" ; + a cn:CompoundName ; + sdo:hasPart + [ + a cn:CompoundName ; + rdf:value "Car" ; + ] , + [ + a cn:CompoundName ; + rdf:value "Maxov" ; + ] ; ] ; - ] ; ] ; - . + sdo:worksFor ; + . - ex:b - rdf:type ex:Thing ; - ex:name "B" ; - ex2:name "B" . + + a sdo:Organization ; + sdo:location ; + . - ex:c - rdf:type ex:Thing ; - ex:name "C" ; - ex:lst2 ( - ex:one - ex:two - ex:three - ) ; - ex:lst ( - ex:one - ex:two - ex:three - ) , - ( - ex:four - ex:fize - ex:six - ) ; - ex:bnObj [ - ex:lst ( - ex:one - ex:two - ex:three - ) , - ( - ex:four - ex:fize - ex:six + + a sdo:Place ; + sdo:address + [ + a sdo:PostalAddress ; + sdo:addressCountry + [ + sdo:identifier "au" ; + sdo:name "Australia" ; + ] ; + sdo:addressLocality "Shorncliffe" ; + sdo:addressRegion "QLD" ; + sdo:postalCode 4017 ; + sdo:streetAddress ( + 72 + "Yundah" + "Street" ) ; - ] . - """, - format="turtle", - ) - s = g.serialize(format="longturtle") - lines = s.split("\n") - - assert "ex:b" in lines - assert " a ex:Thing ;" in lines - assert ( - """ ex2:name "B" ; -.""" - in s - ) - assert ( - """ ( - ex:one - ex:two - ex:three - ) ,""" - in s + ] ; + sdo:geo + [ + sdo:polygon "POLYGON((153.082403 -27.325801, 153.08241 -27.32582, 153.082943 -27.325612, 153.083010 -27.325742, 153.083543 -27.325521, 153.083456 -27.325365, 153.082403 -27.325801))"^^geo:wktLiteral ; + ] ; + sdo:name "KurrawongAI HQ" ; + . + """ ) - assert ' ex:singleValueProp "propval" ;' in lines - - expected_s = """PREFIX ex: -PREFIX ex2: -PREFIX rdf: - -ex:b - a ex:Thing ; - ex:name "B" ; - ex2:name "B" ; -. - -ex:c - a ex:Thing ; - ex:bnObj [ - ex:lst - ( - ex:one - ex:two - ex:three - ) , - ( - ex:four - ex:fize - ex:six - ) - ] ; - ex:lst - ( - ex:four - ex:fize - ex:six - ) , - ( - ex:one - ex:two - ex:three - ) ; - ex:lst2 ( - ex:one - ex:two - ex:three - ) ; - ex:name "C" ; -. - - - a - ex:OtherThing , - ex:Thing ; - ex:bnObj [ - ex:bnObj [ - ex:bnObj - [ - ex:multiValueProp - "propval 1" , - "propval 2" ; - ex:singleValueProp "propval" - ] , - [ - ex:multiValueProp - "propval 1" , - "propval 2" ; - ex:singleValueProp "propval" - ] , - [ - ex:multiValueProp - "propval 1" , - "propval 2" ; - ex:singleValueProp "propval" - ] ; - ex:multiValueProp - "propval 1" , - "propval 2" ; - ex:singleValueProp "propval" - ] ; - ex:multiValueProp - "propval 1" , - "propval 2" ; - ex:singleValueProp "propval" - ] ; - ex:multiValueProp - "propval 1" , - "propval 2" , - "propval 3" , - "propval 4" ; - ex:name - "Thing" , - "Other Thing"@en , - "もの"@ja , - "rzecz"@pl ; - ex:singleValueProp "propval" ; -. - -""" - assert s == expected_s + # compare output to target + # - any differences will produce output + diff = "\n".join(list(difflib.unified_diff(target.split("\n"), output.split("\n")))) - # re-parse test - g2 = Graph().parse(data=s) # turtle - assert len(g2) == len(g) + assert not diff, diff