Merge branch 'master' into sparql-speedup

RDFLib · Dec 28, 2020 · 07bc739 · 07bc739
2 parents 5574acc + 6c294aa
commit 07bc739
Show file tree

Hide file tree

Showing 39 changed files with 276 additions and 159 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -1,4 +1,8 @@
 # http://travis-ci.org/#!/RDFLib/rdflib
+os: linux
+arch:
+ - amd64
+ - ppc64le
 language: python
 branches:
   only:

diff --git a/README.md b/README.md
@@ -101,7 +101,7 @@ g.bind("xsd", XSD)
 ```
 This will allow the n-triples triple above to be serialised like this:
  ```python
-print(g.serialize(format="turtle").decode("utf-8"))
+print(g.serialize(format="turtle"))
 ```
 
 With these results:

diff --git a/docs/conf.py b/docs/conf.py
@@ -102,7 +102,7 @@ def find_version(filename):
 exclude_trees = ["_build", "draft"]
 
 # The reST default role (used for this markup: `text`) to use for all documents.
-default_role = 'py:obj'
+default_role = "py:obj"
 
 # If true, '()' will be appended to :func: etc. cross-reference text.
 add_function_parentheses = True

diff --git a/docs/intro_to_creating_rdf.rst b/docs/intro_to_creating_rdf.rst
@@ -12,7 +12,7 @@ represented by the classes :class:`~rdflib.term.URIRef`, :class:`~rdflib.term.BN
 ``URIRefs`` and ``BNodes`` can both be thought of as resources, such a person, a company, a website, etc.
 
 * A ``BNode`` is a node where the exact URI is not known.
-* A ``URIRef`` is a node where the exact URI is knonw. ``URIRef``\s are also used to represent the properties/predicates in the RDF graph.
+* A ``URIRef`` is a node where the exact URI is known. ``URIRef``\s are also used to represent the properties/predicates in the RDF graph.
 * ``Literals`` represent attribute values, such as a name, a date, a number, etc. The most common literal values are XML data types, e.g. string, int...
 
 

diff --git a/docs/sphinx-requirements.txt b/docs/sphinx-requirements.txt
@@ -1,3 +1,3 @@
-sphinx==3.2.1
+sphinx==3.4.0
 sphinxcontrib-apidoc
 git+https://github.com/gniezen/n3pygments.git
diff --git a/rdflib/container.py b/rdflib/container.py
@@ -14,7 +14,7 @@ class Container(object):
         >>> from rdflib import Graph, BNode, Literal, Bag
         >>> g = Graph()
         >>> b = Bag(g, BNode(), [Literal("One"), Literal("Two"), Literal("Three")])
-        >>> print(g.serialize(format="turtle").decode())
+        >>> print(g.serialize(format="turtle"))
         @prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
         <BLANKLINE>
         [] a rdf:Bag ;
@@ -30,7 +30,7 @@ class Container(object):
 
         >>> # add a new item
         >>> b.append(Literal("Hello"))
-        >>> print(g.serialize(format="turtle").decode())
+        >>> print(g.serialize(format="turtle"))
         @prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
         <BLANKLINE>
         [] a rdf:Bag ;

diff --git a/rdflib/graph.py b/rdflib/graph.py
@@ -1,4 +1,4 @@
-from typing import Optional
+from typing import Optional, Union
 import logging
 from warnings import warn
 import random
@@ -957,12 +957,17 @@ def absolutize(self, uri, defrag=1):
         return self.namespace_manager.absolutize(uri, defrag)
 
     def serialize(
-        self, destination=None, format="xml", base=None, encoding=None, **args
-    ) -> Optional[bytes]:
+        self, destination=None, format="turtle", base=None, encoding=None, **args
+    ) -> Optional[Union[bytes, str]]:
         """Serialize the Graph to destination
 
         If destination is None serialize method returns the serialization as
-        bytes. Format defaults to xml (AKA rdf/xml).
+        bytes or string.
+
+        If encoding is None and destination is None, returns a string
+        If encoding is set, and Destination is None, returns bytes
+
+        Format defaults to turtle.
 
         Format support can be extended with plugins,
         but "xml", "n3", "turtle", "nt", "pretty-xml", "trix", "trig" and "nquads" are built in.
@@ -975,8 +980,12 @@ def serialize(
         serializer = plugin.get(format, Serializer)(self)
         if destination is None:
             stream = BytesIO()
-            serializer.serialize(stream, base=base, encoding=encoding, **args)
-            return stream.getvalue()
+            if encoding is None:
+                serializer.serialize(stream, base=base, encoding="utf-8", **args)
+                return stream.getvalue().decode("utf-8")
+            else:
+                serializer.serialize(stream, base=base, encoding=encoding, **args)
+                return stream.getvalue()
         if hasattr(destination, "write"):
             stream = destination
             serializer.serialize(stream, base=base, encoding=encoding, **args)
@@ -999,6 +1008,13 @@ def serialize(
                 shutil.copy(name, dest)
                 os.remove(name)
 
+    def print(self, format="turtle", encoding="utf-8", out=None):
+        print(
+            self.serialize(None, format=format, encoding=encoding).decode(encoding),
+            file=out,
+            flush=True,
+        )
+
     def parse(
         self,
         source=None,
@@ -1130,13 +1146,13 @@ def load(self, source, publicID=None, format="xml"):
     def query(
         self,
         query_object,
-        processor="sparql",
-        result="sparql",
+        processor: str = "sparql",
+        result: str = "sparql",
         initNs=None,
         initBindings=None,
-        use_store_provided=True,
+        use_store_provided: bool = True,
         **kwargs
-    ):
+    ) -> query.Result:
         """
         Query this graph.
 
@@ -1147,7 +1163,7 @@ def query(
         if none are given, the namespaces from the graph's namespace manager
         are used.
 
-        :returntype: rdflib.query.QueryResult
+        :returntype: rdflib.query.Result
 
         """
 

diff --git a/rdflib/plugins/sparql/evaluate.py b/rdflib/plugins/sparql/evaluate.py
@@ -187,6 +187,7 @@ def evalGraph(ctx, part):
 
     ctx = ctx.clone()
     graph = ctx[part.term]
+    prev_graph = ctx.graph
     if graph is None:
 
         for graph in ctx.dataset.contexts():
@@ -199,11 +200,13 @@ def evalGraph(ctx, part):
             c = c.push()
             graphSolution = [{part.term: graph.identifier}]
             for x in _join(evalPart(c, part.p), graphSolution):
+                x.ctx.graph = prev_graph
                 yield x
 
     else:
         c = ctx.pushGraph(ctx.dataset.get_context(graph))
         for x in evalPart(c, part.p):
+            x.ctx.graph = prev_graph
             yield x
 
 

diff --git a/rdflib/plugins/sparql/operators.py b/rdflib/plugins/sparql/operators.py
@@ -492,7 +492,7 @@ def Builtin_TIMEZONE(e, ctx):
     if not dt.tzinfo:
         raise SPARQLError("datatime has no timezone: %r" % dt)
 
-    delta = dt.tzinfo.utcoffset(ctx.now)
+    delta = dt.utcoffset()
 
     d = delta.days
     s = delta.seconds
@@ -1119,6 +1119,8 @@ def calculateFinalDateTime(obj1, dt1, obj2, dt2, operation):
 
 def EBV(rt):
     """
+    Effective Boolean Value (EBV)
+
     * If the argument is a typed literal with a datatype of xsd:boolean,
       the EBV is the value of that argument.
     * If the argument is a plain literal or a typed literal with a

diff --git a/rdflib/plugins/sparql/parserutils.py b/rdflib/plugins/sparql/parserutils.py
@@ -173,7 +173,7 @@ def get(self, a, variables=False, errors=False):
     def __getattr__(self, a):
         # Hack hack: OrderedDict relies on this
         if a in ("_OrderedDict__root", "_OrderedDict__end"):
-            raise AttributeError
+            raise AttributeError()
         try:
             return self[a]
         except KeyError:

diff --git a/rdflib/plugins/sparql/sparql.py b/rdflib/plugins/sparql/sparql.py
@@ -165,7 +165,7 @@ def __getitem__(self, key):
         if not isinstance(key, Node):
             key = Variable(key)
 
-        if not type(key) in (BNode, Variable):
+        if not isinstance(key, (BNode, Variable)):
             return key
 
         if key not in self._d:
@@ -178,22 +178,20 @@ def project(self, vars):
 
     def merge(self, other):
         res = FrozenBindings(self.ctx, itertools.chain(self.items(), other.items()))
-
         return res
 
-    def _now(self):
+    @property
+    def now(self):
         return self.ctx.now
 
-    def _bnodes(self):
+    @property
+    def bnodes(self):
         return self.ctx.bnodes
 
-    def _prologue(self):
+    @property
+    def prologue(self):
         return self.ctx.prologue
 
-    prologue = property(_prologue)
-    bnodes = property(_bnodes)
-    now = property(_now)
-
     def forget(self, before, _except=None):
         """
         return a frozen dict only of bindings made in self
@@ -224,7 +222,6 @@ def remember(self, these):
 
 
 class QueryContext(object):
-
     """
     Query context - passed along when evaluating the query
     """
@@ -246,10 +243,16 @@ def __init__(self, graph=None, bindings=None, initBindings=None):
             self.graph = graph
 
         self.prologue = None
-        self.now = datetime.datetime.now(isodate.tzinfo.UTC)
+        self._now = None
 
         self.bnodes = collections.defaultdict(BNode)
 
+    @property
+    def now(self) -> datetime.datetime:
+        if self._now is None:
+            self._now = datetime.datetime.now(isodate.tzinfo.UTC)
+        return self._now
+
     def clone(self, bindings=None):
         r = QueryContext(
             self._dataset if self._dataset is not None else self.graph,
@@ -261,7 +264,9 @@ def clone(self, bindings=None):
         r.bnodes = self.bnodes
         return r
 
-    def _get_dataset(self):
+    @property
+    def dataset(self):
+        """"current dataset"""
         if self._dataset is None:
             raise Exception(
                 "You performed a query operation requiring "
@@ -270,8 +275,6 @@ def _get_dataset(self):
             )
         return self._dataset
 
-    dataset = property(_get_dataset, doc="current dataset")
-
     def load(self, source, default=False, **kwargs):
         def _load(graph, source):
             try:
@@ -307,7 +310,7 @@ def _load(graph, source):
 
     def __getitem__(self, key):
         # in SPARQL BNodes are just labels
-        if not type(key) in (BNode, Variable):
+        if not isinstance(key, (BNode, Variable)):
             return key
         try:
             return self.bindings[key]
@@ -349,11 +352,6 @@ def push(self):
     def clean(self):
         return self.clone([])
 
-    # def pop(self):
-    #     self.bindings = self.bindings.outer
-    #     if self.bindings is None:
-    #         raise Exception("We've bottomed out of the bindings stack!")
-
     def thaw(self, frozenbindings):
         """
         Create a new read/write query context from the given solution
@@ -363,8 +361,7 @@ def thaw(self, frozenbindings):
         return c
 
 
-class Prologue(object):
-
+class Prologue:
     """
     A class for holding prefixing bindings and base URI information
     """
@@ -403,7 +400,7 @@ def absolutize(self, iri):
         return iri
 
 
-class Query(object):
+class Query:
     """
     A parsed and translated query
     """

diff --git a/rdflib/plugins/stores/sleepycat.py b/rdflib/plugins/stores/sleepycat.py
@@ -12,16 +12,11 @@ def bb(u):
 
 
 try:
-    from bsddb import db
+    from bsddb3 import db
 
     has_bsddb = True
 except ImportError:
-    try:
-        from bsddb3 import db
-
-        has_bsddb = True
-    except ImportError:
-        has_bsddb = False
+    has_bsddb = False
 
 
 if has_bsddb:

diff --git a/rdflib/plugins/stores/sparqlconnector.py b/rdflib/plugins/stores/sparqlconnector.py
@@ -66,16 +66,16 @@ def method(self):
 
     @method.setter
     def method(self, method):
-        if method not in ("GET", "POST"):
-            raise SPARQLConnectorException('Method must be "GET" or "POST"')
+        if method not in ("GET", "POST", "POST_FORM"):
+            raise SPARQLConnectorException('Method must be "GET", "POST", or "POST_FORM"')
 
         self._method = method
 
     def query(self, query, default_graph: str = None, named_graph: str = None):
         if not self.query_endpoint:
             raise SPARQLConnectorException("Query endpoint not set!")
 
-        params = {"query": query}
+        params = {}
         # this test ensures we don't have a useless (BNode) default graph URI, which calls to Graph().query() will add
         if default_graph is not None and type(default_graph) != BNode:
             params["default-graph-uri"] = default_graph
@@ -91,6 +91,7 @@ def query(self, query, default_graph: str = None, named_graph: str = None):
         args["headers"].update(headers)
 
         if self.method == "GET":
+            params["query"] = query
             args["params"].update(params)
             qsa = "?" + urlencode(args["params"])
             try:
@@ -99,8 +100,16 @@ def query(self, query, default_graph: str = None, named_graph: str = None):
                 raise ValueError("You did something wrong formulating either the URI or your SPARQL query")
         elif self.method == "POST":
             args["headers"].update({"Content-Type": "application/sparql-query"})
+            qsa = "?" + urlencode(params)
+            try:
+                res = urlopen(Request(self.query_endpoint + qsa, data=query.encode(), headers=args["headers"]))
+            except HTTPError as e:
+                return e.code, str(e), None
+        elif self.method == "POST_FORM":
+            params["query"] = query
+            args["params"].update(params)
             try:
-                res = urlopen(Request(self.query_endpoint, data=query.encode(), headers=args["headers"]))
+                res = urlopen(Request(self.query_endpoint, data=urlencode(args["params"]).encode(), headers=args["headers"]))
             except HTTPError as e:
                 return e.code, str(e), None
         else:

diff --git a/rdflib/plugins/stores/sparqlstore.py b/rdflib/plugins/stores/sparqlstore.py
@@ -101,8 +101,6 @@ def __init__(
         auth=None,
         **sparqlconnector_kwargs
     ):
-        """
-        """
         super(SPARQLStore, self).__init__(
             query_endpoint=query_endpoint, returnFormat=returnFormat, auth=auth, **sparqlconnector_kwargs
         )
@@ -374,7 +372,7 @@ def remove_graph(self, graph):
         raise TypeError("The SPARQL store is read only")
 
     def _is_contextual(self, graph):
-        """ Returns `True` if the "GRAPH" keyword must appear
+        """Returns `True` if the "GRAPH" keyword must appear
         in the final SPARQL query sent to the endpoint.
         """
         if (not self.context_aware) or (graph is None):