Skip to content

Commit

Permalink
feat: add parser type hints (#2232)
Browse files Browse the repository at this point in the history
Add type hints to:
- `rdflib/parser.py`
- `rdflib/plugins/parser/*.py`
- some JSON-LD utils
- `rdflib/exceptions.py`.

This is mainly because the work I'm doing to fix
<#1844> is touching some of
this parser stuff and the type hints are useful to avoid mistakes.

No runtime changes are included in this PR.
  • Loading branch information
aucampia committed Mar 5, 2023
1 parent d2c9edc commit 123edf5
Show file tree
Hide file tree
Showing 14 changed files with 698 additions and 360 deletions.
1 change: 1 addition & 0 deletions docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -279,6 +279,7 @@ def find_version(filename):
nitpicky = False

nitpick_ignore = [
("py:class", "urllib.response.addinfourl"),
("py:data", "typing.Literal"),
("py:class", "typing.IO[bytes]"),
("py:class", "http.client.HTTPMessage"),
Expand Down
5 changes: 4 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,10 @@ pep8-naming = ["-N802", "-N806", "-N815"]
pycodestyle = ["-E402"]
[tool.flakeheaven.exceptions."test/utils/namespace/_*"]
pep8-naming = ["-N815"]

[tool.flakeheaven.exceptions."rdflib/plugins/parsers/rdfxml.py"]
pep8-naming = ["-N802"]
[tool.flakeheaven.exceptions."rdflib/plugins/parsers/trix.py"]
pep8-naming = ["-N802"]

[tool.black]
required-version = "22.12.0"
Expand Down
14 changes: 9 additions & 5 deletions rdflib/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,32 +5,36 @@
__all__ = [
"Error",
"ParserError",
"UniquenessError",
]


from typing import Any, Optional


class Error(Exception):
"""Base class for rdflib exceptions."""

def __init__(self, msg=None):
def __init__(self, msg: Optional[str] = None):
Exception.__init__(self, msg)
self.msg = msg


class ParserError(Error):
"""RDF Parser error."""

def __init__(self, msg):
def __init__(self, msg: str):
Error.__init__(self, msg)
self.msg = msg
self.msg: str = msg

def __str__(self):
def __str__(self) -> str:
return self.msg


class UniquenessError(Error):
"""A uniqueness assumption was made in the context, and that is not true"""

def __init__(self, values):
def __init__(self, values: Any):
Error.__init__(
self,
"\
Expand Down
47 changes: 25 additions & 22 deletions rdflib/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
want to do so through the Graph class parse method.
"""
from __future__ import annotations

import codecs
import os
Expand Down Expand Up @@ -37,9 +38,10 @@
from rdflib.term import URIRef

if TYPE_CHECKING:
from http.client import HTTPMessage, HTTPResponse
from email.message import Message
from urllib.response import addinfourl

from rdflib import Graph
from rdflib.graph import Graph

__all__ = [
"Parser",
Expand All @@ -57,7 +59,7 @@ class Parser(object):
def __init__(self):
pass

def parse(self, source: "InputSource", sink: "Graph"):
def parse(self, source: "InputSource", sink: "Graph") -> None:
pass


Expand Down Expand Up @@ -92,7 +94,7 @@ def write(self, *args, **kwargs):
raise NotImplementedError()


class InputSource(xmlreader.InputSource, object):
class InputSource(xmlreader.InputSource):
"""
TODO:
"""
Expand All @@ -102,7 +104,7 @@ def __init__(self, system_id: Optional[str] = None):
self.content_type: Optional[str] = None
self.auto_close = False # see Graph.parse(), true if opened by us

def close(self):
def close(self) -> None:
c = self.getCharacterStream()
if c and hasattr(c, "close"):
try:
Expand Down Expand Up @@ -133,26 +135,26 @@ class PythonInputSource(InputSource):
True
"""

def __init__(self, data, system_id=None):
def __init__(self, data: Any, system_id: Optional[str] = None):
self.content_type = None
self.auto_close = False # see Graph.parse(), true if opened by us
self.public_id = None
self.system_id = system_id
self.public_id: Optional[str] = None
self.system_id: Optional[str] = system_id
self.data = data

def getPublicId(self): # noqa: N802
def getPublicId(self) -> Optional[str]: # noqa: N802
return self.public_id

def setPublicId(self, public_id): # noqa: N802
def setPublicId(self, public_id: Optional[str]) -> None: # noqa: N802
self.public_id = public_id

def getSystemId(self): # noqa: N802
def getSystemId(self) -> Optional[str]: # noqa: N802
return self.system_id

def setSystemId(self, system_id): # noqa: N802
def setSystemId(self, system_id: Optional[str]) -> None: # noqa: N802
self.system_id = system_id

def close(self):
def close(self) -> None:
self.data = None


Expand Down Expand Up @@ -197,16 +199,16 @@ class URLInputSource(InputSource):
links: List[str]

@classmethod
def getallmatchingheaders(cls, message: "HTTPMessage", name):
def getallmatchingheaders(cls, message: "Message", name) -> List[str]:
# This is reimplemented here, because the method
# getallmatchingheaders from HTTPMessage is broken since Python 3.0
name = name.lower()
return [val for key, val in message.items() if key.lower() == name]

@classmethod
def get_links(cls, response: "HTTPResponse"):
def get_links(cls, response: addinfourl) -> List[str]:
linkslines = cls.getallmatchingheaders(response.headers, "Link")
retarray = []
retarray: List[str] = []
for linksline in linkslines:
links = [linkstr.strip() for linkstr in linksline.split(",")]
for link in links:
Expand Down Expand Up @@ -279,7 +281,7 @@ def _urlopen(req: Request) -> Any:
else:
raise

response: HTTPResponse = _urlopen(req)
response: addinfourl = _urlopen(req)
self.url = response.geturl() # in case redirections took place
self.links = self.get_links(response)
if format in ("json-ld", "application/ld+json"):
Expand All @@ -300,8 +302,9 @@ def _urlopen(req: Request) -> Any:
# TODO: self.setEncoding(encoding)
self.response_info = response.info() # a mimetools.Message instance

def __repr__(self):
return self.url
def __repr__(self) -> str:
# type error: Incompatible return value type (got "Optional[str]", expected "str")
return self.url # type: ignore[return-value]


class FileInputSource(InputSource):
Expand All @@ -325,7 +328,7 @@ def __init__(
# We cannot set characterStream here because
# we do not know the Raw Bytes File encoding.

def __repr__(self):
def __repr__(self) -> str:
return repr(self.file)


Expand All @@ -336,8 +339,8 @@ def create_input_source(
publicID: Optional[str] = None, # noqa: N803
location: Optional[str] = None,
file: Optional[Union[BinaryIO, TextIO]] = None,
data: Union[str, bytes, dict] = None,
format: str = None,
data: Optional[Union[str, bytes, dict]] = None,
format: Optional[str] = None,
) -> InputSource:
"""
Return an appropriate InputSource instance for the given
Expand Down
31 changes: 22 additions & 9 deletions rdflib/plugins/parsers/hext.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,15 @@
(ndjson) files, into Conjunctive. The store that backs the graph *must* be able to
handle contexts, i.e. multiple graphs.
"""
from __future__ import annotations

import json
import warnings
from typing import List, Union
from typing import TYPE_CHECKING, Any, List, Optional, Union

from rdflib import BNode, ConjunctiveGraph, Literal, URIRef
from rdflib.parser import Parser
from rdflib.graph import ConjunctiveGraph, Graph
from rdflib.parser import FileInputSource, InputSource, Parser
from rdflib.term import BNode, Literal, URIRef

__all__ = ["HextuplesParser"]

Expand All @@ -22,7 +25,7 @@ class HextuplesParser(Parser):
def __init__(self):
pass

def _load_json_line(self, line: str):
def _load_json_line(self, line: str) -> List[Optional[Any]]:
# this complex handing is because the 'value' component is
# allowed to be "" but not None
# all other "" values are treated as None
Expand All @@ -32,7 +35,9 @@ def _load_json_line(self, line: str):
ret2[2] = ""
return ret2

def _parse_hextuple(self, cg: ConjunctiveGraph, tup: List[Union[str, None]]):
def _parse_hextuple(
self, cg: ConjunctiveGraph, tup: List[Union[str, None]]
) -> None:
# all values check
# subject, predicate, value, datatype cannot be None
# language and graph may be None
Expand Down Expand Up @@ -71,7 +76,8 @@ def _parse_hextuple(self, cg: ConjunctiveGraph, tup: List[Union[str, None]]):
else:
cg.add((s, p, o))

def parse(self, source, graph, **kwargs):
# type error: Signature of "parse" incompatible with supertype "Parser"
def parse(self, source: InputSource, graph: Graph, **kwargs: Any) -> None: # type: ignore[override]
if kwargs.get("encoding") not in [None, "utf-8"]:
warnings.warn(
f"Hextuples files are always utf-8 encoded, "
Expand All @@ -88,10 +94,17 @@ def parse(self, source, graph, **kwargs):

# handle different source types - only file and string (data) for now
if hasattr(source, "file"):
with open(source.file.name, encoding="utf-8") as fp:
for l in fp:
if TYPE_CHECKING:
assert isinstance(source, FileInputSource)
# type error: Item "TextIOBase" of "Union[BinaryIO, TextIO, TextIOBase, RawIOBase, BufferedIOBase]" has no attribute "name"
# type error: Item "RawIOBase" of "Union[BinaryIO, TextIO, TextIOBase, RawIOBase, BufferedIOBase]" has no attribute "name"
# type error: Item "BufferedIOBase" of "Union[BinaryIO, TextIO, TextIOBase, RawIOBase, BufferedIOBase]" has no attribute "name"
with open(source.file.name, encoding="utf-8") as fp: # type: ignore[union-attr]
for l in fp: # noqa: E741
self._parse_hextuple(cg, self._load_json_line(l))
elif hasattr(source, "_InputSource__bytefile"):
if hasattr(source._InputSource__bytefile, "wrapped"):
for l in source._InputSource__bytefile.wrapped.strip().splitlines():
for (
l # noqa: E741
) in source._InputSource__bytefile.wrapped.strip().splitlines():
self._parse_hextuple(cg, self._load_json_line(l))

0 comments on commit 123edf5

Please sign in to comment.