Skip to content

Commit

Permalink
Merge branch 'master' into improve_graph_parse
Browse files Browse the repository at this point in the history
  • Loading branch information
nicholascar committed Aug 27, 2020
2 parents 3e42f5e + 9429538 commit 3afffcd
Show file tree
Hide file tree
Showing 13 changed files with 294 additions and 186 deletions.
2 changes: 1 addition & 1 deletion docs/plugin_parsers.rst
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ mdata :class:`~rdflib.plugins.parsers.structureddata.MicrodataParser`
microdata :class:`~rdflib.plugins.parsers.structureddata.MicrodataParser`
n3 :class:`~rdflib.plugins.parsers.notation3.N3Parser`
nquads :class:`~rdflib.plugins.parsers.nquads.NQuadsParser`
nt :class:`~rdflib.plugins.parsers.nt.NTParser`
nt :class:`~rdflib.plugins.parsers.ntriples.NTParser`
rdfa :class:`~rdflib.plugins.parsers.structureddata.RDFaParser`
rdfa1.0 :class:`~rdflib.plugins.parsers.structureddata.RDFa10Parser`
rdfa1.1 :class:`~rdflib.plugins.parsers.structureddata.RDFaParser`
Expand Down
2 changes: 1 addition & 1 deletion docs/sphinx-requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
sphinx==3.2.0
sphinx==3.2.1
sphinxcontrib-apidoc
git+https://github.com/gniezen/n3pygments.git
6 changes: 5 additions & 1 deletion rdflib/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,11 @@
try:
import __main__

if not hasattr(__main__, "__file__") and sys.stdout is not None and sys.stderr.isatty():
if (
not hasattr(__main__, "__file__")
and sys.stdout is not None
and sys.stderr.isatty()
):
# show log messages in interactive mode
_interactive_mode = True
logger.setLevel(logging.INFO)
Expand Down
2 changes: 1 addition & 1 deletion rdflib/compare.py
Original file line number Diff line number Diff line change
Expand Up @@ -335,7 +335,7 @@ def _refine(self, coloring, sequence):
coloring.extend(colors)
try:
si = sequence.index(c)
sequence = sequence[:si] + colors + sequence[si + 1:]
sequence = sequence[:si] + colors + sequence[si + 1 :]
except ValueError:
sequence = colors[1:] + sequence
combined_colors = []
Expand Down
12 changes: 9 additions & 3 deletions rdflib/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -779,13 +779,17 @@ def preferredLabel(
# setup the language filtering
if lang is not None:
if lang == "": # we only want not language-tagged literals

def langfilter(l_):
return l_.language is None

else:

def langfilter(l_):
return l_.language == lang

else: # we don't care about language tags

def langfilter(l_):
return True

Expand Down Expand Up @@ -1079,9 +1083,11 @@ def parse(
format = source.content_type
could_not_guess_format = False
if format is None:
if (hasattr(source, "file")
and getattr(source.file, "name", None)
and isinstance(source.file.name, str)):
if (
hasattr(source, "file")
and getattr(source.file, "name", None)
and isinstance(source.file.name, str)
):
format = rdflib.util.guess_format(source.file.name)
if format is None:
format = "turtle"
Expand Down
104 changes: 85 additions & 19 deletions rdflib/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,11 @@
want to do so through the Graph class parse method.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import codecs
import os
import sys

from io import BytesIO

from io import BytesIO, TextIOBase, TextIOWrapper, StringIO, BufferedIOBase

from urllib.request import pathname2url
from urllib.request import Request
Expand All @@ -41,13 +37,46 @@


class Parser(object):
__slots__ = set()

def __init__(self):
pass

def parse(self, source, sink):
pass


class BytesIOWrapper(BufferedIOBase):
__slots__ = ("wrapped", "encoded", "encoding")

def __init__(self, wrapped: str, encoding="utf-8"):
super(BytesIOWrapper, self).__init__()
self.wrapped = wrapped
self.encoding = encoding
self.encoded = None

def read(self, *args, **kwargs):
if self.encoded is None:
b, blen = codecs.getencoder(self.encoding)(self.wrapped)
self.encoded = BytesIO(b)
return self.encoded.read(*args, **kwargs)

def read1(self, *args, **kwargs):
if self.encoded is None:
b = codecs.getencoder(self.encoding)(self.wrapped)
self.encoded = BytesIO(b)
return self.encoded.read1(*args, **kwargs)

def readinto(self, *args, **kwargs):
raise NotImplementedError()

def readinto1(self, *args, **kwargs):
raise NotImplementedError()

def write(self, *args, **kwargs):
raise NotImplementedError()


class InputSource(xmlreader.InputSource, object):
"""
TODO:
Expand All @@ -59,23 +88,39 @@ def __init__(self, system_id=None):
self.auto_close = False # see Graph.parse(), true if opened by us

def close(self):
c = self.getCharacterStream()
if c and hasattr(c, "close"):
try:
c.close()
except Exception:
pass
f = self.getByteStream()
if f and hasattr(f, "close"):
f.close()
try:
f.close()
except Exception:
pass


class StringInputSource(InputSource):
"""
TODO:
Constructs an RDFLib Parser InputSource from a Python String or Bytes
"""

def __init__(self, value, system_id=None):
def __init__(self, value, encoding="utf-8", system_id=None):
super(StringInputSource, self).__init__(system_id)
stream = BytesIO(value)
self.setByteStream(stream)
# TODO:
# encoding = value.encoding
# self.setEncoding(encoding)
if isinstance(value, str):
stream = StringIO(value)
self.setCharacterStream(stream)
self.setEncoding(encoding)
b_stream = BytesIOWrapper(value, encoding)
self.setByteStream(b_stream)
else:
stream = BytesIO(value)
self.setByteStream(stream)
c_stream = TextIOWrapper(stream, encoding)
self.setCharacterStream(c_stream)
self.setEncoding(c_stream.encoding)


headers = {
Expand Down Expand Up @@ -134,8 +179,18 @@ def __init__(self, file):
system_id = URIRef(urljoin("file:", pathname2url(file.name)), base=base)
super(FileInputSource, self).__init__(system_id)
self.file = file
self.setByteStream(file)
# TODO: self.setEncoding(encoding)
if isinstance(file, TextIOBase): # Python3 unicode fp
self.setCharacterStream(file)
self.setEncoding(file.encoding)
try:
b = file.buffer
self.setByteStream(b)
except (AttributeError, LookupError):
self.setByteStream(file)
else:
self.setByteStream(file)
# We cannot set characterStream here because
# we do not know the Raw Bytes File encoding.

def __repr__(self):
return repr(self.file)
Expand Down Expand Up @@ -171,10 +226,21 @@ def create_input_source(
else:
if isinstance(source, str):
location = source
elif isinstance(source, bytes):
data = source
elif hasattr(source, "read") and not isinstance(source, Namespace):
f = source
input_source = InputSource()
input_source.setByteStream(f)
if hasattr(source, "encoding"):
input_source.setCharacterStream(source)
input_source.setEncoding(source.encoding)
try:
b = file.buffer
input_source.setByteStream(b)
except (AttributeError, LookupError):
input_source.setByteStream(source)
else:
input_source.setByteStream(f)
if f is sys.stdin:
input_source.setSystemId("file:///dev/stdin")
elif hasattr(f, "name"):
Expand Down Expand Up @@ -206,8 +272,8 @@ def create_input_source(
input_source = FileInputSource(file)

if data is not None:
if isinstance(data, str):
data = data.encode("utf-8")
if not isinstance(data, (str, bytes, bytearray)):
raise RuntimeError("parse data can only str, or bytes.")
input_source = StringInputSource(data)
auto_close = True

Expand Down
10 changes: 5 additions & 5 deletions rdflib/plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
entry_points = {
'rdf.plugins.parser': [
'nt = rdf.plugins.parsers.nt:NTParser',
'nt = rdf.plugins.parsers.ntriples:NTParser',
],
'rdf.plugins.serializer': [
'nt = rdf.plugins.serializers.NTSerializer:NTSerializer',
Expand Down Expand Up @@ -185,10 +185,10 @@ def plugins(name=None, kind=None):
register("text/turtle", Parser, "rdflib.plugins.parsers.notation3", "TurtleParser")
register("turtle", Parser, "rdflib.plugins.parsers.notation3", "TurtleParser")
register("ttl", Parser, "rdflib.plugins.parsers.notation3", "TurtleParser")
register("application/n-triples", Parser, "rdflib.plugins.parsers.nt", "NTParser")
register("ntriples", Parser, "rdflib.plugins.parsers.nt", "NTParser")
register("nt", Parser, "rdflib.plugins.parsers.nt", "NTParser")
register("nt11", Parser, "rdflib.plugins.parsers.nt", "NTParser")
register("application/n-triples", Parser, "rdflib.plugins.parsers.ntriples", "NTParser")
register("ntriples", Parser, "rdflib.plugins.parsers.ntriples", "NTParser")
register("nt", Parser, "rdflib.plugins.parsers.ntriples", "NTParser")
register("nt11", Parser, "rdflib.plugins.parsers.ntriples", "NTParser")
register("application/n-quads", Parser, "rdflib.plugins.parsers.nquads", "NQuadsParser")
register("nquads", Parser, "rdflib.plugins.parsers.nquads", "NQuadsParser")
register("application/trix", Parser, "rdflib.plugins.parsers.trix", "TriXParser")
Expand Down

0 comments on commit 3afffcd

Please sign in to comment.