feat: add parser type hints (#2232)

Add type hints to: - `rdflib/parser.py` - `rdflib/plugins/parser/*.py` - some JSON-LD utils - `rdflib/exceptions.py`. This is mainly because the work I'm doing to fix <#1844> is touching some of this parser stuff and the type hints are useful to avoid mistakes. No runtime changes are included in this PR.
RDFLib · Mar 5, 2023 · 123edf5 · 123edf5
1 parent d2c9edc
commit 123edf5
Show file tree

Hide file tree

Showing 14 changed files with 698 additions and 360 deletions.
diff --git a/docs/conf.py b/docs/conf.py
@@ -279,6 +279,7 @@ def find_version(filename):
     nitpicky = False
 
 nitpick_ignore = [
+    ("py:class", "urllib.response.addinfourl"),
     ("py:data", "typing.Literal"),
     ("py:class", "typing.IO[bytes]"),
     ("py:class", "http.client.HTTPMessage"),

diff --git a/pyproject.toml b/pyproject.toml
@@ -105,7 +105,10 @@ pep8-naming = ["-N802", "-N806", "-N815"]
 pycodestyle = ["-E402"]
 [tool.flakeheaven.exceptions."test/utils/namespace/_*"]
 pep8-naming = ["-N815"]
-
+[tool.flakeheaven.exceptions."rdflib/plugins/parsers/rdfxml.py"]
+pep8-naming = ["-N802"]
+[tool.flakeheaven.exceptions."rdflib/plugins/parsers/trix.py"]
+pep8-naming = ["-N802"]
 
 [tool.black]
 required-version = "22.12.0"

diff --git a/rdflib/exceptions.py b/rdflib/exceptions.py
@@ -5,32 +5,36 @@
 __all__ = [
     "Error",
     "ParserError",
+    "UniquenessError",
 ]
 
 
+from typing import Any, Optional
+
+
 class Error(Exception):
     """Base class for rdflib exceptions."""
 
-    def __init__(self, msg=None):
+    def __init__(self, msg: Optional[str] = None):
         Exception.__init__(self, msg)
         self.msg = msg
 
 
 class ParserError(Error):
     """RDF Parser error."""
 
-    def __init__(self, msg):
+    def __init__(self, msg: str):
         Error.__init__(self, msg)
-        self.msg = msg
+        self.msg: str = msg
 
-    def __str__(self):
+    def __str__(self) -> str:
         return self.msg
 
 
 class UniquenessError(Error):
     """A uniqueness assumption was made in the context, and that is not true"""
 
-    def __init__(self, values):
+    def __init__(self, values: Any):
         Error.__init__(
             self,
             "\

diff --git a/rdflib/parser.py b/rdflib/parser.py
@@ -9,6 +9,7 @@
 want to do so through the Graph class parse method.
 
 """
+from __future__ import annotations
 
 import codecs
 import os
@@ -37,9 +38,10 @@
 from rdflib.term import URIRef
 
 if TYPE_CHECKING:
-    from http.client import HTTPMessage, HTTPResponse
+    from email.message import Message
+    from urllib.response import addinfourl
 
-    from rdflib import Graph
+    from rdflib.graph import Graph
 
 __all__ = [
     "Parser",
@@ -57,7 +59,7 @@ class Parser(object):
     def __init__(self):
         pass
 
-    def parse(self, source: "InputSource", sink: "Graph"):
+    def parse(self, source: "InputSource", sink: "Graph") -> None:
         pass
 
 
@@ -92,7 +94,7 @@ def write(self, *args, **kwargs):
         raise NotImplementedError()
 
 
-class InputSource(xmlreader.InputSource, object):
+class InputSource(xmlreader.InputSource):
     """
     TODO:
     """
@@ -102,7 +104,7 @@ def __init__(self, system_id: Optional[str] = None):
         self.content_type: Optional[str] = None
         self.auto_close = False  # see Graph.parse(), true if opened by us
 
-    def close(self):
+    def close(self) -> None:
         c = self.getCharacterStream()
         if c and hasattr(c, "close"):
             try:
@@ -133,26 +135,26 @@ class PythonInputSource(InputSource):
     True
     """
 
-    def __init__(self, data, system_id=None):
+    def __init__(self, data: Any, system_id: Optional[str] = None):
         self.content_type = None
         self.auto_close = False  # see Graph.parse(), true if opened by us
-        self.public_id = None
-        self.system_id = system_id
+        self.public_id: Optional[str] = None
+        self.system_id: Optional[str] = system_id
         self.data = data
 
-    def getPublicId(self):  # noqa: N802
+    def getPublicId(self) -> Optional[str]:  # noqa: N802
         return self.public_id
 
-    def setPublicId(self, public_id):  # noqa: N802
+    def setPublicId(self, public_id: Optional[str]) -> None:  # noqa: N802
         self.public_id = public_id
 
-    def getSystemId(self):  # noqa: N802
+    def getSystemId(self) -> Optional[str]:  # noqa: N802
         return self.system_id
 
-    def setSystemId(self, system_id):  # noqa: N802
+    def setSystemId(self, system_id: Optional[str]) -> None:  # noqa: N802
         self.system_id = system_id
 
-    def close(self):
+    def close(self) -> None:
         self.data = None
 
 
@@ -197,16 +199,16 @@ class URLInputSource(InputSource):
     links: List[str]
 
     @classmethod
-    def getallmatchingheaders(cls, message: "HTTPMessage", name):
+    def getallmatchingheaders(cls, message: "Message", name) -> List[str]:
         # This is reimplemented here, because the method
         # getallmatchingheaders from HTTPMessage is broken since Python 3.0
         name = name.lower()
         return [val for key, val in message.items() if key.lower() == name]
 
     @classmethod
-    def get_links(cls, response: "HTTPResponse"):
+    def get_links(cls, response: addinfourl) -> List[str]:
         linkslines = cls.getallmatchingheaders(response.headers, "Link")
-        retarray = []
+        retarray: List[str] = []
         for linksline in linkslines:
             links = [linkstr.strip() for linkstr in linksline.split(",")]
             for link in links:
@@ -279,7 +281,7 @@ def _urlopen(req: Request) -> Any:
                 else:
                     raise
 
-        response: HTTPResponse = _urlopen(req)
+        response: addinfourl = _urlopen(req)
         self.url = response.geturl()  # in case redirections took place
         self.links = self.get_links(response)
         if format in ("json-ld", "application/ld+json"):
@@ -300,8 +302,9 @@ def _urlopen(req: Request) -> Any:
         # TODO: self.setEncoding(encoding)
         self.response_info = response.info()  # a mimetools.Message instance
 
-    def __repr__(self):
-        return self.url
+    def __repr__(self) -> str:
+        # type error: Incompatible return value type (got "Optional[str]", expected "str")
+        return self.url  # type: ignore[return-value]
 
 
 class FileInputSource(InputSource):
@@ -325,7 +328,7 @@ def __init__(
             # We cannot set characterStream here because
             # we do not know the Raw Bytes File encoding.
 
-    def __repr__(self):
+    def __repr__(self) -> str:
         return repr(self.file)
 
 
@@ -336,8 +339,8 @@ def create_input_source(
     publicID: Optional[str] = None,  # noqa: N803
     location: Optional[str] = None,
     file: Optional[Union[BinaryIO, TextIO]] = None,
-    data: Union[str, bytes, dict] = None,
-    format: str = None,
+    data: Optional[Union[str, bytes, dict]] = None,
+    format: Optional[str] = None,
 ) -> InputSource:
     """
     Return an appropriate InputSource instance for the given

diff --git a/rdflib/plugins/parsers/hext.py b/rdflib/plugins/parsers/hext.py
@@ -3,12 +3,15 @@
 (ndjson) files, into Conjunctive. The store that backs the graph *must* be able to
 handle contexts, i.e. multiple graphs.
 """
+from __future__ import annotations
+
 import json
 import warnings
-from typing import List, Union
+from typing import TYPE_CHECKING, Any, List, Optional, Union
 
-from rdflib import BNode, ConjunctiveGraph, Literal, URIRef
-from rdflib.parser import Parser
+from rdflib.graph import ConjunctiveGraph, Graph
+from rdflib.parser import FileInputSource, InputSource, Parser
+from rdflib.term import BNode, Literal, URIRef
 
 __all__ = ["HextuplesParser"]
 
@@ -22,7 +25,7 @@ class HextuplesParser(Parser):
     def __init__(self):
         pass
 
-    def _load_json_line(self, line: str):
+    def _load_json_line(self, line: str) -> List[Optional[Any]]:
         # this complex handing is because the 'value' component is
         # allowed to be "" but not None
         # all other "" values are treated as None
@@ -32,7 +35,9 @@ def _load_json_line(self, line: str):
             ret2[2] = ""
         return ret2
 
-    def _parse_hextuple(self, cg: ConjunctiveGraph, tup: List[Union[str, None]]):
+    def _parse_hextuple(
+        self, cg: ConjunctiveGraph, tup: List[Union[str, None]]
+    ) -> None:
         # all values check
         # subject, predicate, value, datatype cannot be None
         # language and graph may be None
@@ -71,7 +76,8 @@ def _parse_hextuple(self, cg: ConjunctiveGraph, tup: List[Union[str, None]]):
         else:
             cg.add((s, p, o))
 
-    def parse(self, source, graph, **kwargs):
+    # type error: Signature of "parse" incompatible with supertype "Parser"
+    def parse(self, source: InputSource, graph: Graph, **kwargs: Any) -> None:  # type: ignore[override]
         if kwargs.get("encoding") not in [None, "utf-8"]:
             warnings.warn(
                 f"Hextuples files are always utf-8 encoded, "
@@ -88,10 +94,17 @@ def parse(self, source, graph, **kwargs):
 
         # handle different source types - only file and string (data) for now
         if hasattr(source, "file"):
-            with open(source.file.name, encoding="utf-8") as fp:
-                for l in fp:
+            if TYPE_CHECKING:
+                assert isinstance(source, FileInputSource)
+            # type error: Item "TextIOBase" of "Union[BinaryIO, TextIO, TextIOBase, RawIOBase, BufferedIOBase]" has no attribute "name"
+            # type error: Item "RawIOBase" of "Union[BinaryIO, TextIO, TextIOBase, RawIOBase, BufferedIOBase]" has no attribute "name"
+            # type error: Item "BufferedIOBase" of "Union[BinaryIO, TextIO, TextIOBase, RawIOBase, BufferedIOBase]" has no attribute "name"
+            with open(source.file.name, encoding="utf-8") as fp:  # type: ignore[union-attr]
+                for l in fp:  # noqa: E741
                     self._parse_hextuple(cg, self._load_json_line(l))
         elif hasattr(source, "_InputSource__bytefile"):
             if hasattr(source._InputSource__bytefile, "wrapped"):
-                for l in source._InputSource__bytefile.wrapped.strip().splitlines():
+                for (
+                    l  # noqa: E741
+                ) in source._InputSource__bytefile.wrapped.strip().splitlines():
                     self._parse_hextuple(cg, self._load_json_line(l))