Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: small InputSource related issues #2255

Merged
merged 1 commit into from
Mar 10, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
18 changes: 17 additions & 1 deletion rdflib/parser.py
Expand Up @@ -363,6 +363,10 @@ def create_input_source(
input_source = None

if source is not None:
if TYPE_CHECKING:
assert file is None
assert data is None
assert location is None
if isinstance(source, InputSource):
input_source = source
else:
Expand All @@ -379,7 +383,7 @@ def create_input_source(
input_source.setCharacterStream(source)
input_source.setEncoding(source.encoding)
try:
b = file.buffer # type: ignore[union-attr]
b = source.buffer # type: ignore[union-attr]
input_source.setByteStream(b)
except (AttributeError, LookupError):
input_source.setByteStream(source)
Expand All @@ -399,6 +403,10 @@ def create_input_source(
auto_close = False # make sure we close all file handles we open

if location is not None:
if TYPE_CHECKING:
assert file is None
assert data is None
assert source is None
(
absolute_location,
auto_close,
Expand All @@ -412,9 +420,17 @@ def create_input_source(
)

if file is not None:
if TYPE_CHECKING:
assert location is None
assert data is None
assert source is None
input_source = FileInputSource(file)

if data is not None:
if TYPE_CHECKING:
assert location is None
assert file is None
assert source is None
if isinstance(data, dict):
input_source = PythonInputSource(data)
auto_close = True
Expand Down
37 changes: 19 additions & 18 deletions rdflib/plugins/parsers/hext.py
Expand Up @@ -7,10 +7,11 @@

import json
import warnings
from typing import TYPE_CHECKING, Any, List, Optional, Union
from io import TextIOWrapper
from typing import Any, BinaryIO, List, Optional, TextIO, Union

from rdflib.graph import ConjunctiveGraph, Graph
from rdflib.parser import FileInputSource, InputSource, Parser
from rdflib.parser import InputSource, Parser
from rdflib.term import BNode, Literal, URIRef

__all__ = ["HextuplesParser"]
Expand Down Expand Up @@ -92,19 +93,19 @@ def parse(self, source: InputSource, graph: Graph, **kwargs: Any) -> None: # ty
cg = ConjunctiveGraph(store=graph.store, identifier=graph.identifier)
cg.default_context = graph

# handle different source types - only file and string (data) for now
if hasattr(source, "file"):
if TYPE_CHECKING:
assert isinstance(source, FileInputSource)
# type error: Item "TextIOBase" of "Union[BinaryIO, TextIO, TextIOBase, RawIOBase, BufferedIOBase]" has no attribute "name"
# type error: Item "RawIOBase" of "Union[BinaryIO, TextIO, TextIOBase, RawIOBase, BufferedIOBase]" has no attribute "name"
# type error: Item "BufferedIOBase" of "Union[BinaryIO, TextIO, TextIOBase, RawIOBase, BufferedIOBase]" has no attribute "name"
with open(source.file.name, encoding="utf-8") as fp: # type: ignore[union-attr]
for l in fp: # noqa: E741
self._parse_hextuple(cg, self._load_json_line(l))
elif hasattr(source, "_InputSource__bytefile"):
if hasattr(source._InputSource__bytefile, "wrapped"):
for (
l # noqa: E741
) in source._InputSource__bytefile.wrapped.strip().splitlines():
self._parse_hextuple(cg, self._load_json_line(l))
text_stream: Optional[TextIO] = source.getCharacterStream()
if text_stream is None:
binary_stream: Optional[BinaryIO] = source.getByteStream()
if binary_stream is None:
raise ValueError(
f"Source does not have a character stream or a byte stream and cannot be used {type(source)}"
)
text_stream = TextIOWrapper(binary_stream, encoding="utf-8")

for line in text_stream:
if len(line) == 0 or line.isspace():
# Skipping empty lines because this is what was being done before for the first and last lines, albeit in an rather indirect way.
# The result is that we accept input that would otherwise be invalid.
# Possibly we should just let this result in an error.
continue
self._parse_hextuple(cg, self._load_json_line(line))
3 changes: 2 additions & 1 deletion rdflib/util.py
Expand Up @@ -518,6 +518,7 @@ def _iri2uri(iri: str) -> str:
>>> _iri2uri("https://dbpedia.org/resource/Almería")
'https://dbpedia.org/resource/Almer%C3%ADa'
"""
# https://datatracker.ietf.org/doc/html/rfc3305

(scheme, netloc, path, query, fragment) = urlsplit(iri)

Expand All @@ -526,7 +527,7 @@ def _iri2uri(iri: str) -> str:
return iri

scheme = quote(scheme)
netloc = quote(netloc.encode("idna").decode("utf-8"))
netloc = netloc.encode("idna").decode("utf-8")
path = quote(path)
query = quote(query)
fragment = quote(fragment)
Expand Down
22 changes: 18 additions & 4 deletions test/conftest.py
Expand Up @@ -2,6 +2,8 @@

pytest.register_assert_rewrite("test.utils")

from test.utils.http import ctx_http_server # noqa: E402
from test.utils.httpfileserver import HTTPFileServer # noqa: E402
from typing import Generator # noqa: E402

from rdflib import Graph
Expand All @@ -16,20 +18,32 @@
# readibility.


@pytest.fixture(scope="session")
def http_file_server() -> Generator[HTTPFileServer, None, None]:
host = "127.0.0.1"
server = HTTPFileServer((host, 0))
with ctx_http_server(server) as served:
yield served


@pytest.fixture(scope="session")
def rdfs_graph() -> Graph:
return Graph().parse(TEST_DATA_DIR / "defined_namespaces/rdfs.ttl", format="turtle")


@pytest.fixture(scope="session")
def session_httpmock() -> Generator[ServedBaseHTTPServerMock, None, None]:
def _session_function_httpmock() -> Generator[ServedBaseHTTPServerMock, None, None]:
"""
This fixture is session scoped, but it is reset for each function in
:func:`function_httpmock`. This should not be used directly.
"""
with ServedBaseHTTPServerMock() as httpmock:
yield httpmock


@pytest.fixture(scope="function")
def function_httpmock(
session_httpmock: ServedBaseHTTPServerMock,
_session_function_httpmock: ServedBaseHTTPServerMock,
) -> Generator[ServedBaseHTTPServerMock, None, None]:
session_httpmock.reset()
yield session_httpmock
_session_function_httpmock.reset()
yield _session_function_httpmock
6 changes: 6 additions & 0 deletions test/data/fetcher.py
Expand Up @@ -268,6 +268,12 @@ def _member_io(
remote=Request("https://www.w3.org/2009/sparql/docs/tests/test-update.n3"),
local_path=(DATA_PATH / "defined_namespaces/ut.n3"),
),
FileResource(
remote=Request(
"https://github.com/web-platform-tests/wpt/raw/9d13065419df90d2ad71f3c6b78cc12e7800dae4/html/syntax/parsing/html5lib_tests1.html"
),
local_path=(DATA_PATH / "html5lib_tests1.html"),
),
]


Expand Down
28 changes: 28 additions & 0 deletions test/data/html5lib_tests1.html

Large diffs are not rendered by default.

20 changes: 20 additions & 0 deletions test/data/variants/diverse_triples.xml
@@ -0,0 +1,20 @@
<rdf:RDF
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
xmlns:eghttp="http://example.com/"
xmlns:egurn="urn:example:"
xmlns:egschema="example:"
xmlns:xsd="http://www.w3.org/2001/XMLSchema#" >
<rdf:Description rdf:about="example:object">
<eghttp:predicate>XSD string</eghttp:predicate>
</rdf:Description>
<rdf:Description rdf:about="http://example.com/subject">
<eghttp:predicate xml:lang="jpx">日本語の表記体系</eghttp:predicate>
</rdf:Description>
<rdf:Description rdf:about="urn:example:subject">
<egschema:predicate rdf:resource="example:subject"/>
</rdf:Description>
<rdf:Description rdf:about="example:subject">
<egschema:predicate rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">12</egschema:predicate>
<egschema:predicate rdf:resource="example:object"/>
</rdf:Description>
</rdf:RDF>
6 changes: 6 additions & 0 deletions test/data/variants/simple_triple.jsonld
@@ -0,0 +1,6 @@
{
"@id": "http://example.org/subject",
"http://example.org/predicate": {
"@id": "http://example.org/object"
}
}
2 changes: 2 additions & 0 deletions test/data/variants/simple_triple.ttl
@@ -0,0 +1,2 @@
<http://example.org/subject>
<http://example.org/predicate> <http://example.org/object> .
7 changes: 7 additions & 0 deletions test/data/variants/simple_triple.xml
@@ -0,0 +1,7 @@
<rdf:RDF
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
xmlns:j.0="http://example.org/" >
<rdf:Description rdf:about="http://example.org/subject">
<j.0:predicate rdf:resource="http://example.org/object"/>
</rdf:Description>
</rdf:RDF>
4 changes: 4 additions & 0 deletions test/jsonld/__init__.py
@@ -1,6 +1,10 @@
from typing import List

from rdflib import parser, plugin, serializer

assert plugin
assert serializer
assert parser
import json

__all__: List[str] = []
13 changes: 11 additions & 2 deletions test/test_graph/test_graph.py
Expand Up @@ -4,6 +4,7 @@
from pathlib import Path
from test.data import TEST_DATA_DIR, bob, cheese, hates, likes, michel, pizza, tarek
from test.utils import GraphHelper, get_unique_plugin_names
from test.utils.httpfileserver import HTTPFileServer, ProtoFileResource
from typing import Callable, Optional, Set
from urllib.error import HTTPError, URLError

Expand Down Expand Up @@ -272,7 +273,9 @@ def test_graph_intersection(make_graph: GraphFactory):
assert (michel, likes, cheese) in g1


def test_guess_format_for_parse(make_graph: GraphFactory):
def test_guess_format_for_parse(
make_graph: GraphFactory, http_file_server: HTTPFileServer
):
graph = make_graph()

# files
Expand Down Expand Up @@ -329,10 +332,16 @@ def test_guess_format_for_parse(make_graph: GraphFactory):
graph.parse(data=rdf, format="xml")

# URI
file_info = http_file_server.add_file_with_caching(
ProtoFileResource(
(("Content-Type", "text/html; charset=UTF-8"),),
TEST_DATA_DIR / "html5lib_tests1.html",
),
)

# only getting HTML
with pytest.raises(PluginException):
graph.parse(location="https://www.google.com")
graph.parse(location=file_info.request_url)

try:
graph.parse(location="http://www.w3.org/ns/adms.ttl")
Expand Down
6 changes: 3 additions & 3 deletions test/test_graph/test_graph_http.py
Expand Up @@ -3,11 +3,11 @@
from test.data import TEST_DATA_DIR
from test.utils import GraphHelper
from test.utils.graph import cached_graph
from test.utils.http import ctx_http_handler
from test.utils.httpservermock import (
MethodName,
MockHTTPResponse,
ServedBaseHTTPServerMock,
ctx_http_server,
)
from urllib.error import HTTPError

Expand Down Expand Up @@ -106,7 +106,7 @@ def test_content_negotiation(self) -> None:
expected.add((EG.a, EG.b, EG.c))
expected_triples = GraphHelper.triple_set(expected)

with ctx_http_server(ContentNegotiationHandler) as server:
with ctx_http_handler(ContentNegotiationHandler) as server:
(host, port) = server.server_address
if isinstance(host, (bytes, bytearray)):
host = host.decode("utf-8")
Expand All @@ -121,7 +121,7 @@ def test_content_negotiation_no_format(self) -> None:
expected.add((EG.a, EG.b, EG.c))
expected_triples = GraphHelper.triple_set(expected)

with ctx_http_server(ContentNegotiationHandler) as server:
with ctx_http_handler(ContentNegotiationHandler) as server:
(host, port) = server.server_address
if isinstance(host, (bytes, bytearray)):
host = host.decode("utf-8")
Expand Down
11 changes: 8 additions & 3 deletions test/test_graph/test_variants.py
@@ -1,3 +1,5 @@
from __future__ import annotations

import json
import logging
import os
Expand Down Expand Up @@ -69,6 +71,11 @@ def check(
}
assert set(self.has_subject_iris) == subjects_iris

@classmethod
def from_path(cls, path: Path) -> GraphAsserts:
with path.open("r") as f:
return cls(**json.load(f))


@dataclass(order=True)
class GraphVariants:
Expand Down Expand Up @@ -122,9 +129,7 @@ def for_files(
else:
graph_variant = graph_varaint_dict[file_key]
if variant_key.endswith("-asserts.json"):
graph_variant.asserts = GraphAsserts(
**json.loads(file_path.read_text())
)
graph_variant.asserts = GraphAsserts.from_path(file_path)
else:
graph_variant.variants[variant_key] = file_path
return graph_varaint_dict
Expand Down
Empty file added test/test_misc/__init__.py
Empty file.
15 changes: 0 additions & 15 deletions test/test_misc/test_create_input_source.py

This file was deleted.