Skip to content

Commit

Permalink
Merge branch 'master' into sparql-speedup
Browse files Browse the repository at this point in the history
  • Loading branch information
nicholascar committed Dec 28, 2020
2 parents 5574acc + 6c294aa commit 07bc739
Show file tree
Hide file tree
Showing 39 changed files with 276 additions and 159 deletions.
4 changes: 4 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
# http://travis-ci.org/#!/RDFLib/rdflib
os: linux
arch:
- amd64
- ppc64le
language: python
branches:
only:
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ g.bind("xsd", XSD)
```
This will allow the n-triples triple above to be serialised like this:
```python
print(g.serialize(format="turtle").decode("utf-8"))
print(g.serialize(format="turtle"))
```

With these results:
Expand Down
2 changes: 1 addition & 1 deletion docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ def find_version(filename):
exclude_trees = ["_build", "draft"]

# The reST default role (used for this markup: `text`) to use for all documents.
default_role = 'py:obj'
default_role = "py:obj"

# If true, '()' will be appended to :func: etc. cross-reference text.
add_function_parentheses = True
Expand Down
2 changes: 1 addition & 1 deletion docs/intro_to_creating_rdf.rst
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ represented by the classes :class:`~rdflib.term.URIRef`, :class:`~rdflib.term.BN
``URIRefs`` and ``BNodes`` can both be thought of as resources, such a person, a company, a website, etc.

* A ``BNode`` is a node where the exact URI is not known.
* A ``URIRef`` is a node where the exact URI is knonw. ``URIRef``\s are also used to represent the properties/predicates in the RDF graph.
* A ``URIRef`` is a node where the exact URI is known. ``URIRef``\s are also used to represent the properties/predicates in the RDF graph.
* ``Literals`` represent attribute values, such as a name, a date, a number, etc. The most common literal values are XML data types, e.g. string, int...


Expand Down
2 changes: 1 addition & 1 deletion docs/sphinx-requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
sphinx==3.2.1
sphinx==3.4.0
sphinxcontrib-apidoc
git+https://github.com/gniezen/n3pygments.git
4 changes: 2 additions & 2 deletions rdflib/container.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ class Container(object):
>>> from rdflib import Graph, BNode, Literal, Bag
>>> g = Graph()
>>> b = Bag(g, BNode(), [Literal("One"), Literal("Two"), Literal("Three")])
>>> print(g.serialize(format="turtle").decode())
>>> print(g.serialize(format="turtle"))
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
<BLANKLINE>
[] a rdf:Bag ;
Expand All @@ -30,7 +30,7 @@ class Container(object):
>>> # add a new item
>>> b.append(Literal("Hello"))
>>> print(g.serialize(format="turtle").decode())
>>> print(g.serialize(format="turtle"))
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
<BLANKLINE>
[] a rdf:Bag ;
Expand Down
38 changes: 27 additions & 11 deletions rdflib/graph.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Optional
from typing import Optional, Union
import logging
from warnings import warn
import random
Expand Down Expand Up @@ -957,12 +957,17 @@ def absolutize(self, uri, defrag=1):
return self.namespace_manager.absolutize(uri, defrag)

def serialize(
self, destination=None, format="xml", base=None, encoding=None, **args
) -> Optional[bytes]:
self, destination=None, format="turtle", base=None, encoding=None, **args
) -> Optional[Union[bytes, str]]:
"""Serialize the Graph to destination
If destination is None serialize method returns the serialization as
bytes. Format defaults to xml (AKA rdf/xml).
bytes or string.
If encoding is None and destination is None, returns a string
If encoding is set, and Destination is None, returns bytes
Format defaults to turtle.
Format support can be extended with plugins,
but "xml", "n3", "turtle", "nt", "pretty-xml", "trix", "trig" and "nquads" are built in.
Expand All @@ -975,8 +980,12 @@ def serialize(
serializer = plugin.get(format, Serializer)(self)
if destination is None:
stream = BytesIO()
serializer.serialize(stream, base=base, encoding=encoding, **args)
return stream.getvalue()
if encoding is None:
serializer.serialize(stream, base=base, encoding="utf-8", **args)
return stream.getvalue().decode("utf-8")
else:
serializer.serialize(stream, base=base, encoding=encoding, **args)
return stream.getvalue()
if hasattr(destination, "write"):
stream = destination
serializer.serialize(stream, base=base, encoding=encoding, **args)
Expand All @@ -999,6 +1008,13 @@ def serialize(
shutil.copy(name, dest)
os.remove(name)

def print(self, format="turtle", encoding="utf-8", out=None):
print(
self.serialize(None, format=format, encoding=encoding).decode(encoding),
file=out,
flush=True,
)

def parse(
self,
source=None,
Expand Down Expand Up @@ -1130,13 +1146,13 @@ def load(self, source, publicID=None, format="xml"):
def query(
self,
query_object,
processor="sparql",
result="sparql",
processor: str = "sparql",
result: str = "sparql",
initNs=None,
initBindings=None,
use_store_provided=True,
use_store_provided: bool = True,
**kwargs
):
) -> query.Result:
"""
Query this graph.
Expand All @@ -1147,7 +1163,7 @@ def query(
if none are given, the namespaces from the graph's namespace manager
are used.
:returntype: rdflib.query.QueryResult
:returntype: rdflib.query.Result
"""

Expand Down
3 changes: 3 additions & 0 deletions rdflib/plugins/sparql/evaluate.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,7 @@ def evalGraph(ctx, part):

ctx = ctx.clone()
graph = ctx[part.term]
prev_graph = ctx.graph
if graph is None:

for graph in ctx.dataset.contexts():
Expand All @@ -199,11 +200,13 @@ def evalGraph(ctx, part):
c = c.push()
graphSolution = [{part.term: graph.identifier}]
for x in _join(evalPart(c, part.p), graphSolution):
x.ctx.graph = prev_graph
yield x

else:
c = ctx.pushGraph(ctx.dataset.get_context(graph))
for x in evalPart(c, part.p):
x.ctx.graph = prev_graph
yield x


Expand Down
4 changes: 3 additions & 1 deletion rdflib/plugins/sparql/operators.py
Original file line number Diff line number Diff line change
Expand Up @@ -492,7 +492,7 @@ def Builtin_TIMEZONE(e, ctx):
if not dt.tzinfo:
raise SPARQLError("datatime has no timezone: %r" % dt)

delta = dt.tzinfo.utcoffset(ctx.now)
delta = dt.utcoffset()

d = delta.days
s = delta.seconds
Expand Down Expand Up @@ -1119,6 +1119,8 @@ def calculateFinalDateTime(obj1, dt1, obj2, dt2, operation):

def EBV(rt):
"""
Effective Boolean Value (EBV)
* If the argument is a typed literal with a datatype of xsd:boolean,
the EBV is the value of that argument.
* If the argument is a plain literal or a typed literal with a
Expand Down
2 changes: 1 addition & 1 deletion rdflib/plugins/sparql/parserutils.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,7 @@ def get(self, a, variables=False, errors=False):
def __getattr__(self, a):
# Hack hack: OrderedDict relies on this
if a in ("_OrderedDict__root", "_OrderedDict__end"):
raise AttributeError
raise AttributeError()
try:
return self[a]
except KeyError:
Expand Down
43 changes: 20 additions & 23 deletions rdflib/plugins/sparql/sparql.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ def __getitem__(self, key):
if not isinstance(key, Node):
key = Variable(key)

if not type(key) in (BNode, Variable):
if not isinstance(key, (BNode, Variable)):
return key

if key not in self._d:
Expand All @@ -178,22 +178,20 @@ def project(self, vars):

def merge(self, other):
res = FrozenBindings(self.ctx, itertools.chain(self.items(), other.items()))

return res

def _now(self):
@property
def now(self):
return self.ctx.now

def _bnodes(self):
@property
def bnodes(self):
return self.ctx.bnodes

def _prologue(self):
@property
def prologue(self):
return self.ctx.prologue

prologue = property(_prologue)
bnodes = property(_bnodes)
now = property(_now)

def forget(self, before, _except=None):
"""
return a frozen dict only of bindings made in self
Expand Down Expand Up @@ -224,7 +222,6 @@ def remember(self, these):


class QueryContext(object):

"""
Query context - passed along when evaluating the query
"""
Expand All @@ -246,10 +243,16 @@ def __init__(self, graph=None, bindings=None, initBindings=None):
self.graph = graph

self.prologue = None
self.now = datetime.datetime.now(isodate.tzinfo.UTC)
self._now = None

self.bnodes = collections.defaultdict(BNode)

@property
def now(self) -> datetime.datetime:
if self._now is None:
self._now = datetime.datetime.now(isodate.tzinfo.UTC)
return self._now

def clone(self, bindings=None):
r = QueryContext(
self._dataset if self._dataset is not None else self.graph,
Expand All @@ -261,7 +264,9 @@ def clone(self, bindings=None):
r.bnodes = self.bnodes
return r

def _get_dataset(self):
@property
def dataset(self):
""""current dataset"""
if self._dataset is None:
raise Exception(
"You performed a query operation requiring "
Expand All @@ -270,8 +275,6 @@ def _get_dataset(self):
)
return self._dataset

dataset = property(_get_dataset, doc="current dataset")

def load(self, source, default=False, **kwargs):
def _load(graph, source):
try:
Expand Down Expand Up @@ -307,7 +310,7 @@ def _load(graph, source):

def __getitem__(self, key):
# in SPARQL BNodes are just labels
if not type(key) in (BNode, Variable):
if not isinstance(key, (BNode, Variable)):
return key
try:
return self.bindings[key]
Expand Down Expand Up @@ -349,11 +352,6 @@ def push(self):
def clean(self):
return self.clone([])

# def pop(self):
# self.bindings = self.bindings.outer
# if self.bindings is None:
# raise Exception("We've bottomed out of the bindings stack!")

def thaw(self, frozenbindings):
"""
Create a new read/write query context from the given solution
Expand All @@ -363,8 +361,7 @@ def thaw(self, frozenbindings):
return c


class Prologue(object):

class Prologue:
"""
A class for holding prefixing bindings and base URI information
"""
Expand Down Expand Up @@ -403,7 +400,7 @@ def absolutize(self, iri):
return iri


class Query(object):
class Query:
"""
A parsed and translated query
"""
Expand Down
9 changes: 2 additions & 7 deletions rdflib/plugins/stores/sleepycat.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,16 +12,11 @@ def bb(u):


try:
from bsddb import db
from bsddb3 import db

has_bsddb = True
except ImportError:
try:
from bsddb3 import db

has_bsddb = True
except ImportError:
has_bsddb = False
has_bsddb = False


if has_bsddb:
Expand Down
17 changes: 13 additions & 4 deletions rdflib/plugins/stores/sparqlconnector.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,16 +66,16 @@ def method(self):

@method.setter
def method(self, method):
if method not in ("GET", "POST"):
raise SPARQLConnectorException('Method must be "GET" or "POST"')
if method not in ("GET", "POST", "POST_FORM"):
raise SPARQLConnectorException('Method must be "GET", "POST", or "POST_FORM"')

self._method = method

def query(self, query, default_graph: str = None, named_graph: str = None):
if not self.query_endpoint:
raise SPARQLConnectorException("Query endpoint not set!")

params = {"query": query}
params = {}
# this test ensures we don't have a useless (BNode) default graph URI, which calls to Graph().query() will add
if default_graph is not None and type(default_graph) != BNode:
params["default-graph-uri"] = default_graph
Expand All @@ -91,6 +91,7 @@ def query(self, query, default_graph: str = None, named_graph: str = None):
args["headers"].update(headers)

if self.method == "GET":
params["query"] = query
args["params"].update(params)
qsa = "?" + urlencode(args["params"])
try:
Expand All @@ -99,8 +100,16 @@ def query(self, query, default_graph: str = None, named_graph: str = None):
raise ValueError("You did something wrong formulating either the URI or your SPARQL query")
elif self.method == "POST":
args["headers"].update({"Content-Type": "application/sparql-query"})
qsa = "?" + urlencode(params)
try:
res = urlopen(Request(self.query_endpoint + qsa, data=query.encode(), headers=args["headers"]))
except HTTPError as e:
return e.code, str(e), None
elif self.method == "POST_FORM":
params["query"] = query
args["params"].update(params)
try:
res = urlopen(Request(self.query_endpoint, data=query.encode(), headers=args["headers"]))
res = urlopen(Request(self.query_endpoint, data=urlencode(args["params"]).encode(), headers=args["headers"]))
except HTTPError as e:
return e.code, str(e), None
else:
Expand Down
4 changes: 1 addition & 3 deletions rdflib/plugins/stores/sparqlstore.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,8 +101,6 @@ def __init__(
auth=None,
**sparqlconnector_kwargs
):
"""
"""
super(SPARQLStore, self).__init__(
query_endpoint=query_endpoint, returnFormat=returnFormat, auth=auth, **sparqlconnector_kwargs
)
Expand Down Expand Up @@ -374,7 +372,7 @@ def remove_graph(self, graph):
raise TypeError("The SPARQL store is read only")

def _is_contextual(self, graph):
""" Returns `True` if the "GRAPH" keyword must appear
"""Returns `True` if the "GRAPH" keyword must appear
in the final SPARQL query sent to the endpoint.
"""
if (not self.context_aware) or (graph is None):
Expand Down

0 comments on commit 07bc739

Please sign in to comment.