From cc490513f8f17a7adc0bcbab2e0e86f37e832300 Mon Sep 17 00:00:00 2001
From: Andrew Baumann <Andrew.Baumann@microsoft.com>
Date: Thu, 2 Sep 2021 17:04:35 -0700
Subject: [PATCH]  * expand and improve annotations in cmap,
 encryption/decompression and fonts  * disallow untyped calls; this way, we
 have a core set of    typed code that can grow over time    (just not for
 ccitt, because there's a ton of work lurking there)  * expand "typing: none"
 comments to suppress a specific error code

---
 mypy.ini                |   4 +-
 pdfminer/arcfour.py     |   7 +-
 pdfminer/ascii85.py     |   6 +-
 pdfminer/ccitt.py       |   7 +-
 pdfminer/cmapdb.py      |  90 ++++++++++---------
 pdfminer/encodingdb.py  |   6 +-
 pdfminer/image.py       |   2 +-
 pdfminer/layout.py      |  11 +--
 pdfminer/lzw.py         |  25 +++---
 pdfminer/pdfdocument.py | 194 +++++++++++++++++++++++-----------------
 pdfminer/pdffont.py     |  55 +++++++-----
 pdfminer/pdfinterp.py   |  33 ++++---
 pdfminer/pdfpage.py     |   6 +-
 pdfminer/pdfparser.py   |   9 +-
 pdfminer/pdftypes.py    |  84 ++++++++++-------
 pdfminer/psparser.py    |   2 +-
 pdfminer/runlength.py   |   2 +-
 pdfminer/utils.py       |   2 +-
 18 files changed, 313 insertions(+), 232 deletions(-)

diff --git a/mypy.ini b/mypy.ini
index 8953a1a3..72aacb79 100644
--- a/mypy.ini
+++ b/mypy.ini
@@ -2,7 +2,7 @@
 warn_unused_configs = True
 disallow_any_generics = True
 disallow_subclassing_any = True
-#disallow_untyped_calls = True
+disallow_untyped_calls = True
 #disallow_untyped_defs = True
 disallow_incomplete_defs = True
 #check_untyped_defs = True
@@ -17,3 +17,5 @@ strict_equality = True
 [mypy-cryptography.hazmat.*]
 ignore_missing_imports = True
 
+[mypy-pdfminer.ccitt]
+disallow_untyped_calls = False
diff --git a/pdfminer/arcfour.py b/pdfminer/arcfour.py
index e40b0804..5967a1af 100644
--- a/pdfminer/arcfour.py
+++ b/pdfminer/arcfour.py
@@ -5,9 +5,12 @@
 """
 
 
+from typing import Sequence
+
+
 class Arcfour:
 
-    def __init__(self, key):
+    def __init__(self, key: Sequence[int]):
         # because Py3 range is not indexable
         s = [i for i in range(256)]
         j = 0
@@ -19,7 +22,7 @@ def __init__(self, key):
         (self.i, self.j) = (0, 0)
         return
 
-    def process(self, data):
+    def process(self, data: bytes) -> bytes:
         (i, j) = (self.i, self.j)
         s = self.s
         r = b''
diff --git a/pdfminer/ascii85.py b/pdfminer/ascii85.py
index cde3f908..7c7c757f 100644
--- a/pdfminer/ascii85.py
+++ b/pdfminer/ascii85.py
@@ -9,7 +9,7 @@
 
 
 # ascii85decode(data)
-def ascii85decode(data):
+def ascii85decode(data: bytes) -> bytes:
     """
     In ASCII85 encoding, every four bytes are encoded with five ASCII
     letters, using 85 different types of characters (as 256**4 < 85**5).
@@ -47,7 +47,7 @@ def ascii85decode(data):
 trail_re = re.compile(br'^(?:[a-f\d]{2}|\s)*([a-f\d])[\s>]*$', re.IGNORECASE)
 
 
-def asciihexdecode(data):
+def asciihexdecode(data: bytes) -> bytes:
     """
     ASCIIHexDecode filter: PDFReference v1.4 section 3.3.1
     For each pair of ASCII hexadecimal digits (0-9 and A-F or a-f), the
@@ -57,7 +57,7 @@ def asciihexdecode(data):
     the EOD marker after reading an odd number of hexadecimal digits, it
     will behave as if a 0 followed the last digit.
     """
-    def decode(x):
+    def decode(x: bytes) -> bytes:
         i = int(x, 16)
         return bytes((i,))
 
diff --git a/pdfminer/ccitt.py b/pdfminer/ccitt.py
index 8ae123b5..e45e8252 100644
--- a/pdfminer/ccitt.py
+++ b/pdfminer/ccitt.py
@@ -13,6 +13,7 @@
 
 import sys
 import array
+from typing import Any, Dict
 
 
 def get_bytes(data):
@@ -541,7 +542,7 @@ def output_line(self, y, bits):
         return
 
 
-def ccittfaxdecode(data, params):
+def ccittfaxdecode(data: bytes, params: Dict[str, Any]) -> bytes:
     K = params.get('K')
     cols = params.get('Columns')
     bytealign = params.get('EncodedByteAlign')
@@ -551,7 +552,7 @@ def ccittfaxdecode(data, params):
     else:
         raise ValueError(K)
     parser.feedbytes(data)
-    return parser.close()
+    return parser.close()  # type: ignore[no-any-return]
 
 
 # test
@@ -562,7 +563,7 @@ def main(argv):
 
     class Parser(CCITTG4Parser):
         def __init__(self, width, bytealign=False):
-            import pygame  # type: ignore
+            import pygame  # type: ignore[import]
             CCITTG4Parser.__init__(self, width, bytealign=bytealign)
             self.img = pygame.Surface((self.width, 1000))
             return
diff --git a/pdfminer/cmapdb.py b/pdfminer/cmapdb.py
index e99ceb93..232d9a6a 100644
--- a/pdfminer/cmapdb.py
+++ b/pdfminer/cmapdb.py
@@ -16,10 +16,12 @@
 import pickle as pickle
 import struct
 import logging
-from typing import Any, Dict, List
+from typing import (Any, BinaryIO, Dict, Iterable, Iterator, List, Optional,
+                    TextIO, Tuple, Union)
 from .psparser import PSStackParser
 from .psparser import PSSyntaxError
 from .psparser import PSEOF
+from .psparser import PSKeyword
 from .psparser import PSLiteral
 from .psparser import literal_name
 from .psparser import KWD
@@ -39,44 +41,48 @@ class CMapBase:
 
     debug = 0
 
-    def __init__(self, **kwargs):
+    def __init__(self, **kwargs: Union[str, int]):
         self.attrs = kwargs.copy()
         return
 
-    def is_vertical(self):
+    def is_vertical(self) -> bool:
         return self.attrs.get('WMode', 0) != 0
 
-    def set_attr(self, k, v):
+    def set_attr(self, k: str, v: Any) -> None:
         self.attrs[k] = v
         return
 
-    def add_code2cid(self, code, cid):
+    def add_code2cid(self, code: str, cid: int) -> None:
         return
 
-    def add_cid2unichr(self, cid, code):
+    def add_cid2unichr(self, cid: int, code: Union[PSLiteral, bytes, int]
+                       ) -> None:
         return
 
-    def use_cmap(self, cmap):
+    def use_cmap(self, cmap: "CMapBase") -> None:
         return
 
+    def decode(self, code: bytes) -> Iterable[int]:
+        raise NotImplementedError
+
 
 class CMap(CMapBase):
 
-    def __init__(self, **kwargs):
+    def __init__(self, **kwargs: Union[str, int]):
         CMapBase.__init__(self, **kwargs)
-        self.code2cid = {}
+        self.code2cid: Dict[int, Any] = {}
         return
 
     def __repr__(self):
         return '<CMap: %s>' % self.attrs.get('CMapName')
 
-    def use_cmap(self, cmap):
+    def use_cmap(self, cmap: CMapBase) -> None:
         assert isinstance(cmap, CMap), str(type(cmap))
 
-        def copy(dst, src):
+        def copy(dst: Dict[Any, Any], src: Dict[Any, Any]) -> None:
             for (k, v) in src.items():
                 if isinstance(v, dict):
-                    d = {}
+                    d: Dict[Any, Any] = {}
                     dst[k] = d
                     copy(d, v)
                 else:
@@ -84,7 +90,7 @@ def copy(dst, src):
         copy(self.code2cid, cmap.code2cid)
         return
 
-    def decode(self, code):
+    def decode(self, code: bytes) -> Iterator[int]:
         log.debug('decode: %r, %r', self, code)
         d = self.code2cid
         for i in iter(code):
@@ -97,7 +103,9 @@ def decode(self, code):
                 d = self.code2cid
         return
 
-    def dump(self, out=sys.stdout, code2cid=None, code=None):
+    def dump(self, out: TextIO = sys.stdout,
+             code2cid: Optional[Dict[int, Any]] = None,
+             code: Tuple[int, ...] = ()) -> None:
         if code2cid is None:
             code2cid = self.code2cid
             code = ()
@@ -112,7 +120,7 @@ def dump(self, out=sys.stdout, code2cid=None, code=None):
 
 class IdentityCMap(CMapBase):
 
-    def decode(self, code):
+    def decode(self, code: bytes) -> Tuple[int, ...]:
         n = len(code)//2
         if n:
             return struct.unpack('>%dH' % n, code)
@@ -122,7 +130,7 @@ def decode(self, code):
 
 class IdentityCMapByte(IdentityCMap):
 
-    def decode(self, code):
+    def decode(self, code: bytes) -> Tuple[int, ...]:
         n = len(code)
         if n:
             return struct.unpack('>%dB' % n, code)
@@ -132,19 +140,19 @@ def decode(self, code):
 
 class UnicodeMap(CMapBase):
 
-    def __init__(self, **kwargs):
+    def __init__(self, **kwargs: Union[str, int]):
         CMapBase.__init__(self, **kwargs)
-        self.cid2unichr = {}
+        self.cid2unichr: Dict[int, str] = {}
         return
 
     def __repr__(self):
         return '<UnicodeMap: %s>' % self.attrs.get('CMapName')
 
-    def get_unichr(self, cid):
+    def get_unichr(self, cid: int) -> str:
         log.debug('get_unichr: %r, %r', self, cid)
         return self.cid2unichr[cid]
 
-    def dump(self, out=sys.stdout):
+    def dump(self, out: TextIO = sys.stdout) -> None:
         for (k, v) in sorted(self.cid2unichr.items()):
             out.write('cid %d = unicode %r\n' % (k, v))
         return
@@ -152,29 +160,31 @@ def dump(self, out=sys.stdout):
 
 class FileCMap(CMap):
 
-    def add_code2cid(self, code, cid):
+    def add_code2cid(self, code: str, cid: int) -> None:
         assert isinstance(code, str) and isinstance(cid, int),\
             str((type(code), type(cid)))
         d = self.code2cid
         for c in code[:-1]:
-            c = ord(c)
-            if c in d:
-                d = d[c]
+            ci = ord(c)
+            if ci in d:
+                d = d[ci]
             else:
-                t = {}
-                d[c] = t
+                t: Dict[int, Any] = {}
+                d[ci] = t
                 d = t
-        c = ord(code[-1])
-        d[c] = cid
+        ci = ord(code[-1])
+        d[ci] = cid
         return
 
 
 class FileUnicodeMap(UnicodeMap):
 
-    def add_cid2unichr(self, cid, code):
+    def add_cid2unichr(self, cid: int, code: Union[PSLiteral, bytes, int]
+                       ) -> None:
         assert isinstance(cid, int), str(type(cid))
         if isinstance(code, PSLiteral):
             # Interpret as an Adobe glyph name.
+            assert isinstance(code.name, str)
             self.cid2unichr[cid] = name2unicode(code.name)
         elif isinstance(code, bytes):
             # Interpret as UTF-16BE.
@@ -188,8 +198,8 @@ def add_cid2unichr(self, cid, code):
 
 class PyCMap(CMap):
 
-    def __init__(self, name, module):
-        CMap.__init__(self, CMapName=name)
+    def __init__(self, name: str, module: Any):
+        super().__init__(CMapName=name)
         self.code2cid = module.CODE2CID
         if module.IS_VERTICAL:
             self.attrs['WMode'] = 1
@@ -198,8 +208,8 @@ def __init__(self, name, module):
 
 class PyUnicodeMap(UnicodeMap):
 
-    def __init__(self, name, module, vertical):
-        UnicodeMap.__init__(self, CMapName=name)
+    def __init__(self, name: str, module: Any, vertical: bool):
+        super().__init__(CMapName=name)
         if vertical:
             self.cid2unichr = module.CID2UNICHR_V
             self.attrs['WMode'] = 1
@@ -264,17 +274,16 @@ def get_unicode_map(cls, name: str, vertical: bool = False) -> UnicodeMap:
         return cls._umap_cache[name][vertical]
 
 
-# int here means that we're not extending PSStackParser with additional types.
-class CMapParser(PSStackParser[int]):
+class CMapParser(PSStackParser[PSKeyword]):
 
-    def __init__(self, cmap, fp):
+    def __init__(self, cmap: CMapBase, fp: BinaryIO):
         PSStackParser.__init__(self, fp)
         self.cmap = cmap
         # some ToUnicode maps don't have "begincmap" keyword.
         self._in_cmap = True
         return
 
-    def run(self):
+    def run(self) -> None:
         try:
             self.nextobject()
         except PSEOF:
@@ -298,7 +307,7 @@ def run(self):
     KEYWORD_BEGINNOTDEFRANGE = KWD(b'beginnotdefrange')
     KEYWORD_ENDNOTDEFRANGE = KWD(b'endnotdefrange')
 
-    def do_keyword(self, pos, token):
+    def do_keyword(self, pos: int, token: PSKeyword) -> None:
         if token is self.KEYWORD_BEGINCMAP:
             self._in_cmap = True
             self.popall()
@@ -382,6 +391,7 @@ def do_keyword(self, pos, token):
                     for i in range(e1-s1+1):
                         self.cmap.add_cid2unichr(s1+i, code[i])
                 else:
+                    assert isinstance(code, bytes)
                     var = code[-4:]
                     base = nunpack(var)
                     prefix = code[:-4]
@@ -412,7 +422,7 @@ def do_keyword(self, pos, token):
         return
 
 
-def main(argv):
+def main(argv: List[str]) -> None:
     args = argv[1:]
     for fname in args:
         fp = open(fname, 'rb')
@@ -424,4 +434,4 @@ def main(argv):
 
 
 if __name__ == '__main__':
-    sys.exit(main(sys.argv))
+    main(sys.argv)
diff --git a/pdfminer/encodingdb.py b/pdfminer/encodingdb.py
index 58998a90..da51f702 100644
--- a/pdfminer/encodingdb.py
+++ b/pdfminer/encodingdb.py
@@ -10,7 +10,7 @@
 log = logging.getLogger(__name__)
 
 
-def name2unicode(name):
+def name2unicode(name: str) -> str:
     """Converts Adobe glyph names to Unicode numbers.
 
     In contrast to the specification, this raises a KeyError instead of return
@@ -32,7 +32,7 @@ def name2unicode(name):
 
     else:
         if name in glyphname2unicode:
-            return glyphname2unicode.get(name)
+            return glyphname2unicode[name]
 
         elif name.startswith('uni'):
             name_without_uni = name.strip('uni')
@@ -59,7 +59,7 @@ def name2unicode(name):
                    'it does not match specification' % name)
 
 
-def raise_key_error_for_invalid_unicode(unicode_digit):
+def raise_key_error_for_invalid_unicode(unicode_digit: int) -> None:
     """Unicode values should not be in the range D800 through DFFF because
     that is used for surrogate pairs in UTF-16
 
diff --git a/pdfminer/image.py b/pdfminer/image.py
index e825e83e..b0cc0171 100644
--- a/pdfminer/image.py
+++ b/pdfminer/image.py
@@ -81,7 +81,7 @@ def export_image(self, image):
         if ext == '.jpg':
             raw_data = image.stream.get_rawdata()
             if LITERAL_DEVICE_CMYK in image.colorspace:
-                from PIL import Image  # type: ignore
+                from PIL import Image  # type: ignore[import]
                 from PIL import ImageChops
                 ifp = BytesIO(raw_data)
                 i = Image.open(ifp)
diff --git a/pdfminer/layout.py b/pdfminer/layout.py
index f1c5652e..8bc51a45 100644
--- a/pdfminer/layout.py
+++ b/pdfminer/layout.py
@@ -298,8 +298,8 @@ class LTChar(LTComponent, LTText):
 
     def __init__(self, matrix: Matrix, font: PDFFont, fontsize: float,
                  scaling: float, rise: float, text: str, textwidth: float,
-                 textdisp: Point, ncs: PDFColorSpace,
-                 graphicstate: PDFGraphicState):
+                 textdisp: Union[float, Tuple[Optional[float], float]],
+                 ncs: PDFColorSpace, graphicstate: PDFGraphicState):
         LTText.__init__(self)
         self._text = text
         self.matrix = matrix
@@ -310,6 +310,7 @@ def __init__(self, matrix: Matrix, font: PDFFont, fontsize: float,
         # compute the boundary rectangle.
         if font.is_vertical():
             # vertical
+            assert isinstance(textdisp, tuple)
             (vx, vy) = textdisp
             if vx is None:
                 vx = fontsize * 0.5
@@ -385,7 +386,7 @@ def analyze(self, laparams: LAParams) -> None:
 
 
 class LTExpandableContainer(LTContainer[LTItemT]):
-    def __init__(self):
+    def __init__(self) -> None:
         LTContainer.__init__(self, (+INF, +INF, -INF, -INF))
         return
 
@@ -399,7 +400,7 @@ def add(self, obj: LTComponent) -> None:  # type: ignore[override]
 
 
 class LTTextContainer(LTExpandableContainer[LTItemT], LTText):
-    def __init__(self):
+    def __init__(self) -> None:
         LTText.__init__(self)
         LTExpandableContainer.__init__(self)
         return
@@ -569,7 +570,7 @@ class LTTextBox(LTTextContainer[LTTextLine]):
     of LTTextLine objects.
     """
 
-    def __init__(self):
+    def __init__(self) -> None:
         LTTextContainer.__init__(self)
         self.index: int = -1
         return
diff --git a/pdfminer/lzw.py b/pdfminer/lzw.py
index f0ed8a87..4e5e6df9 100644
--- a/pdfminer/lzw.py
+++ b/pdfminer/lzw.py
@@ -1,5 +1,6 @@
 from io import BytesIO
 import logging
+from typing import BinaryIO, Iterator, List, Optional, cast
 
 
 logger = logging.getLogger(__name__)
@@ -11,16 +12,17 @@ class CorruptDataError(Exception):
 
 class LZWDecoder:
 
-    def __init__(self, fp):
+    def __init__(self, fp: BinaryIO):
         self.fp = fp
         self.buff = 0
         self.bpos = 8
         self.nbits = 9
-        self.table = None
-        self.prevbuf = None
+        # NB: self.table stores None only in indices 256 and 257
+        self.table: Optional[List[Optional[bytes]]] = None
+        self.prevbuf: Optional[bytes] = None
         return
 
-    def readbits(self, bits):
+    def readbits(self, bits: int) -> int:
         v = 0
         while 1:
             # the number of remaining bits we can get from the current buffer.
@@ -45,7 +47,7 @@ def readbits(self, bits):
                 self.bpos = 0
         return v
 
-    def feed(self, code):
+    def feed(self, code: int) -> bytes:
         x = b''
         if code == 256:
             self.table = [bytes((c,)) for c in range(256)]  # 0-255
@@ -56,14 +58,16 @@ def feed(self, code):
         elif code == 257:
             pass
         elif not self.prevbuf:
-            x = self.prevbuf = self.table[code]
+            assert self.table is not None
+            x = self.prevbuf = cast(bytes, self.table[code])  # assume not None
         else:
+            assert self.table is not None
             if code < len(self.table):
-                x = self.table[code]
+                x = cast(bytes, self.table[code])  # assume not None
                 self.table.append(self.prevbuf+x[:1])
             elif code == len(self.table):
                 self.table.append(self.prevbuf+self.prevbuf[:1])
-                x = self.table[code]
+                x = cast(bytes, self.table[code])
             else:
                 raise CorruptDataError
             table_length = len(self.table)
@@ -76,7 +80,7 @@ def feed(self, code):
             self.prevbuf = x
         return x
 
-    def run(self):
+    def run(self) -> Iterator[bytes]:
         while 1:
             try:
                 code = self.readbits(self.nbits)
@@ -88,12 +92,13 @@ def run(self):
                 # just ignore corrupt data and stop yielding there
                 break
             yield x
+            assert self.table is not None
             logger.debug('nbits=%d, code=%d, output=%r, table=%r'
                          % (self.nbits, code, x, self.table[258:]))
         return
 
 
-def lzwdecode(data):
+def lzwdecode(data: bytes) -> bytes:
     fp = BytesIO(data)
     s = LZWDecoder(fp).run()
     return b''.join(s)
diff --git a/pdfminer/pdfdocument.py b/pdfminer/pdfdocument.py
index f0d0f4f3..2512b932 100644
--- a/pdfminer/pdfdocument.py
+++ b/pdfminer/pdfdocument.py
@@ -2,17 +2,18 @@
 import re
 import struct
 from hashlib import sha256, md5
-from typing import Iterable
+from typing import (Any, Dict, Iterable, Iterator, KeysView, List, Optional,
+                    Sequence, Tuple, Type, cast)
 
 from cryptography.hazmat.backends import default_backend
 from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes
 
 from . import settings
 from .arcfour import Arcfour
-from .pdfparser import PDFSyntaxError, PDFStreamParser
-from .pdftypes import PDFException, uint_value, PDFTypeError, PDFStream, \
+from .pdfparser import PDFSyntaxError, PDFParser, PDFStreamParser
+from .pdftypes import DecipherCallable, PDFException, PDFTypeError, PDFStream,\
     PDFObjectNotFound, decipher_all, int_value, str_value, list_value, \
-    dict_value, stream_value
+    uint_value, dict_value, stream_value
 from .psparser import PSEOF, literal_name, LIT, KWD
 from .utils import choplist, nunpack, decode_text
 
@@ -66,31 +67,33 @@ def __init__(self, *args):
 
 
 class PDFBaseXRef:
-
-    def get_trailer(self):
+    def get_trailer(self) -> Dict[Any, Any]:
         raise NotImplementedError
 
-    def get_objids(self):
+    def get_objids(self) -> Iterable[int]:
         return []
 
     # Must return
     #     (strmid, index, genno)
     #  or (None, pos, genno)
-    def get_pos(self, objid):
+    def get_pos(self, objid: int) -> Tuple[Optional[int], int, int]:
         raise KeyError(objid)
 
+    def load(self, parser: PDFParser) -> None:
+        raise NotImplementedError
+
 
 class PDFXRef(PDFBaseXRef):
 
-    def __init__(self):
-        self.offsets = {}
-        self.trailer = {}
+    def __init__(self) -> None:
+        self.offsets: Dict[int, Tuple[Optional[int], int, int]] = {}
+        self.trailer: Dict[Any, Any] = {}
         return
 
-    def __repr__(self):
+    def __repr__(self) -> str:
         return '<PDFXRef: offsets=%r>' % (self.offsets.keys())
 
-    def load(self, parser):
+    def load(self, parser: PDFParser) -> None:
         while True:
             try:
                 (pos, line) = parser.nextline()
@@ -124,15 +127,15 @@ def load(self, parser):
                     error_msg = 'Invalid XRef format: {!r}, line={!r}'\
                         .format(parser, line)
                     raise PDFNoValidXRef(error_msg)
-                (pos, genno, use) = f
-                if use != b'n':
+                (pos_b, genno_b, use_b) = f
+                if use_b != b'n':
                     continue
-                self.offsets[objid] = (None, int(pos), int(genno))
+                self.offsets[objid] = (None, int(pos_b), int(genno_b))
         log.info('xref objects: %r', self.offsets)
         self.load_trailer(parser)
         return
 
-    def load_trailer(self, parser):
+    def load_trailer(self, parser: PDFParser) -> None:
         try:
             (_, kwd) = parser.nexttoken()
             assert kwd is KWD(b'trailer'), str(kwd)
@@ -146,13 +149,13 @@ def load_trailer(self, parser):
         log.debug('trailer=%r', self.trailer)
         return
 
-    def get_trailer(self):
+    def get_trailer(self) -> Dict[Any, Any]:
         return self.trailer
 
-    def get_objids(self):
+    def get_objids(self) -> KeysView[int]:
         return self.offsets.keys()
 
-    def get_pos(self, objid):
+    def get_pos(self, objid: int) -> Tuple[Optional[int], int, int]:
         try:
             return self.offsets[objid]
         except KeyError:
@@ -166,25 +169,25 @@ def __repr__(self):
 
     PDFOBJ_CUE = re.compile(r'^(\d+)\s+(\d+)\s+obj\b')
 
-    def load(self, parser):
+    def load(self, parser: PDFParser) -> None:
         parser.seek(0)
         while 1:
             try:
-                (pos, line) = parser.nextline()
+                (pos, line_bytes) = parser.nextline()
             except PSEOF:
                 break
-            if line.startswith(b'trailer'):
+            if line_bytes.startswith(b'trailer'):
                 parser.seek(pos)
                 self.load_trailer(parser)
                 log.info('trailer: %r', self.trailer)
                 break
-            line = line.decode('latin-1')  # default pdf encoding
+            line = line_bytes.decode('latin-1')  # default pdf encoding
             m = self.PDFOBJ_CUE.match(line)
             if not m:
                 continue
-            (objid, genno) = m.groups()
-            objid = int(objid)
-            genno = int(genno)
+            (objid_s, genno_s) = m.groups()
+            objid = int(objid_s)
+            genno = int(genno_s)
             self.offsets[objid] = (None, pos, genno)
             # expand ObjStm.
             parser.seek(pos)
@@ -199,7 +202,7 @@ def load(self, parser):
                         raise PDFSyntaxError('N is not defined: %r' % stream)
                     n = 0
                 parser1 = PDFStreamParser(stream.get_data())
-                objs = []
+                objs: List[Any] = []
                 try:
                     while 1:
                         (_, obj) = parser1.nextobject()
@@ -215,17 +218,19 @@ def load(self, parser):
 
 class PDFXRefStream(PDFBaseXRef):
 
-    def __init__(self):
-        self.data = None
-        self.entlen = None
-        self.fl1 = self.fl2 = self.fl3 = None
-        self.ranges = []
+    def __init__(self) -> None:
+        self.data: Optional[bytes] = None
+        self.entlen: Optional[int] = None
+        self.fl1: Optional[int] = None
+        self.fl2: Optional[int] = None
+        self.fl3: Optional[int] = None
+        self.ranges: List[Tuple[Any, Any]] = []
         return
 
-    def __repr__(self):
+    def __repr__(self) -> str:
         return '<PDFXRefStream: ranges=%r>' % (self.ranges)
 
-    def load(self, parser):
+    def load(self, parser: PDFParser) -> None:
         (_, objid) = parser.nexttoken()  # ignored
         (_, genno) = parser.nexttoken()  # ignored
         (_, kwd) = parser.nexttoken()
@@ -237,8 +242,10 @@ def load(self, parser):
         index_array = stream.get('Index', (0, size))
         if len(index_array) % 2 != 0:
             raise PDFSyntaxError('Invalid index number')
-        self.ranges.extend(choplist(2, index_array))
+        self.ranges.extend(cast(Tuple[Any, Any], choplist(2, index_array)))
         (self.fl1, self.fl2, self.fl3) = stream['W']
+        assert (self.fl1 is not None and self.fl2 is not None
+                and self.fl3 is not None)
         self.data = stream.get_data()
         self.entlen = self.fl1+self.fl2+self.fl3
         self.trailer = stream.attrs
@@ -250,9 +257,11 @@ def load(self, parser):
     def get_trailer(self):
         return self.trailer
 
-    def get_objids(self):
+    def get_objids(self) -> Iterator[int]:
         for (start, nobjs) in self.ranges:
             for i in range(nobjs):
+                assert self.entlen is not None
+                assert self.data is not None
                 offset = self.entlen * i
                 ent = self.data[offset:offset+self.entlen]
                 f1 = nunpack(ent[:self.fl1], 1)
@@ -260,7 +269,7 @@ def get_objids(self):
                     yield start+i
         return
 
-    def get_pos(self, objid):
+    def get_pos(self, objid: int) -> Tuple[Optional[int], int, int]:
         index = 0
         for (start, nobjs) in self.ranges:
             if start <= objid and objid < start+nobjs:
@@ -270,6 +279,10 @@ def get_pos(self, objid):
                 index += nobjs
         else:
             raise KeyError(objid)
+        assert self.entlen is not None
+        assert self.data is not None
+        assert (self.fl1 is not None and self.fl2 is not None
+                and self.fl3 is not None)
         offset = self.entlen * index
         ent = self.data[offset:offset+self.entlen]
         f1 = nunpack(ent[:self.fl1], 1)
@@ -288,16 +301,17 @@ class PDFStandardSecurityHandler:
 
     PASSWORD_PADDING = (b'(\xbfN^Nu\x8aAd\x00NV\xff\xfa\x01\x08'
                         b'..\x00\xb6\xd0h>\x80/\x0c\xa9\xfedSiz')
-    supported_revisions: Iterable[int] = (2, 3)
+    supported_revisions: Tuple[int, ...] = (2, 3)
 
-    def __init__(self, docid, param, password=''):
+    def __init__(self, docid: Sequence[bytes], param: Dict[str, Any],
+                 password: str = ''):
         self.docid = docid
         self.param = param
         self.password = password
         self.init()
         return
 
-    def init(self):
+    def init(self) -> None:
         self.init_params()
         if self.r not in self.supported_revisions:
             error_msg = 'Unsupported revision: param=%r' % self.param
@@ -305,7 +319,7 @@ def init(self):
         self.init_key()
         return
 
-    def init_params(self):
+    def init_params(self) -> None:
         self.v = int_value(self.param.get('V', 0))
         self.r = int_value(self.param['R'])
         self.p = uint_value(self.param['P'], 32)
@@ -314,22 +328,22 @@ def init_params(self):
         self.length = int_value(self.param.get('Length', 40))
         return
 
-    def init_key(self):
+    def init_key(self) -> None:
         self.key = self.authenticate(self.password)
         if self.key is None:
             raise PDFPasswordIncorrect
         return
 
-    def is_printable(self):
+    def is_printable(self) -> bool:
         return bool(self.p & 4)
 
-    def is_modifiable(self):
+    def is_modifiable(self) -> bool:
         return bool(self.p & 8)
 
-    def is_extractable(self):
+    def is_extractable(self) -> bool:
         return bool(self.p & 16)
 
-    def compute_u(self, key):
+    def compute_u(self, key: bytes) -> bytes:
         if self.r == 2:
             # Algorithm 3.4
             return Arcfour(key).encrypt(self.PASSWORD_PADDING)  # 2
@@ -344,7 +358,7 @@ def compute_u(self, key):
             result += result  # 6
             return result
 
-    def compute_encryption_key(self, password):
+    def compute_encryption_key(self, password: bytes) -> bytes:
         # Algorithm 3.2
         password = (password + self.PASSWORD_PADDING)[:32]  # 1
         hash = md5(password)  # 2
@@ -353,7 +367,7 @@ def compute_encryption_key(self, password):
         hash.update(struct.pack('<L', self.p))  # 4
         hash.update(self.docid[0])  # 5
         if self.r >= 4:
-            if not self.encrypt_metadata:
+            if not cast(PDFStandardSecurityHandlerV4, self).encrypt_metadata:
                 hash.update(b'\xff\xff\xff\xff')
         result = hash.digest()
         n = 5
@@ -363,28 +377,28 @@ def compute_encryption_key(self, password):
                 result = md5(result[:n]).digest()
         return result[:n]
 
-    def authenticate(self, password):
-        password = password.encode("latin1")
-        key = self.authenticate_user_password(password)
+    def authenticate(self, password: str) -> Optional[bytes]:
+        password_bytes = password.encode("latin1")
+        key = self.authenticate_user_password(password_bytes)
         if key is None:
-            key = self.authenticate_owner_password(password)
+            key = self.authenticate_owner_password(password_bytes)
         return key
 
-    def authenticate_user_password(self, password):
+    def authenticate_user_password(self, password: bytes) -> Optional[bytes]:
         key = self.compute_encryption_key(password)
         if self.verify_encryption_key(key):
             return key
         else:
             return None
 
-    def verify_encryption_key(self, key):
+    def verify_encryption_key(self, key: bytes) -> bool:
         # Algorithm 3.6
         u = self.compute_u(key)
         if self.r == 2:
             return u == self.u
         return u[:16] == self.u[:16]
 
-    def authenticate_owner_password(self, password):
+    def authenticate_owner_password(self, password: bytes) -> Optional[bytes]:
         # Algorithm 3.7
         password = (password + self.PASSWORD_PADDING)[:32]
         hash = md5(password)
@@ -404,12 +418,14 @@ def authenticate_owner_password(self, password):
                 user_password = Arcfour(k).decrypt(user_password)
         return self.authenticate_user_password(user_password)
 
-    def decrypt(self, objid, genno, data, attrs=None):
+    def decrypt(self, objid: int, genno: int, data: bytes,
+                attrs: Optional[Dict[str, Any]] = None) -> bytes:
         return self.decrypt_rc4(objid, genno, data)
 
-    def decrypt_rc4(self, objid, genno, data):
+    def decrypt_rc4(self, objid: int, genno: int, data: bytes) -> bytes:
+        assert self.key is not None
         key = self.key + struct.pack('<L', objid)[:3] \
-              + struct.pack('<L', genno)[:2]
+            + struct.pack('<L', genno)[:2]
         hash = md5(key)
         key = hash.digest()[:min(len(key), 16)]
         return Arcfour(key).decrypt(data)
@@ -545,24 +561,25 @@ class PDFDocument:
 
     """
 
-    security_handler_registry = {
+    security_handler_registry: Dict[int, Type[PDFStandardSecurityHandler]] = {
         1: PDFStandardSecurityHandler,
         2: PDFStandardSecurityHandler,
         4: PDFStandardSecurityHandlerV4,
         5: PDFStandardSecurityHandlerV5,
     }
 
-    def __init__(self, parser, password='', caching=True, fallback=True):
+    def __init__(self, parser: PDFParser, password: str = '',
+                 caching: bool = True, fallback: bool = True):
         "Set the document to use a given PDFParser object."
         self.caching = caching
-        self.xrefs = []
+        self.xrefs: List[PDFBaseXRef] = []
         self.info = []
-        self.catalog = None
-        self.encryption = None
-        self.decipher = None
+        self.catalog: Dict[str, Any] = {}
+        self.encryption: Optional[Tuple[Any, Any]] = None
+        self.decipher: Optional[DecipherCallable] = None
         self._parser = None
-        self._cached_objs = {}
-        self._parsed_objs = {}
+        self._cached_objs: Dict[int, Tuple[Any, int]] = {}
+        self._parsed_objs: Dict[int, Tuple[List[Any], Any]] = {}
         self._parser = parser
         self._parser.set_document(self)
         self.is_printable = self.is_modifiable = self.is_extractable = True
@@ -575,9 +592,9 @@ def __init__(self, parser, password='', caching=True, fallback=True):
             pass  # fallback = True
         if fallback:
             parser.fallback = True
-            xref = PDFXRefFallback()
-            xref.load(parser)
-            self.xrefs.append(xref)
+            newxref = PDFXRefFallback()
+            newxref.load(parser)
+            self.xrefs.append(newxref)
         for xref in self.xrefs:
             trailer = xref.get_trailer()
             if not trailer:
@@ -611,7 +628,8 @@ def __init__(self, parser, password='', caching=True, fallback=True):
 
     # _initialize_password(password=b'')
     #   Perform the initialization with a given password.
-    def _initialize_password(self, password=''):
+    def _initialize_password(self, password: str = '') -> None:
+        assert self.encryption is not None
         (docid, param) = self.encryption
         if literal_name(param.get('Filter')) != 'Standard':
             raise PDFEncryptionError('Unknown filter: param=%r' % param)
@@ -624,15 +642,17 @@ def _initialize_password(self, password=''):
         self.is_printable = handler.is_printable()
         self.is_modifiable = handler.is_modifiable()
         self.is_extractable = handler.is_extractable()
+        assert self._parser is not None
         self._parser.fallback = False  # need to read streams with exact length
         return
 
-    def _getobj_objstm(self, stream, index, objid):
+    def _getobj_objstm(self, stream: PDFStream, index: int, objid: int) -> Any:
         if stream.objid in self._parsed_objs:
             (objs, n) = self._parsed_objs[stream.objid]
         else:
             (objs, n) = self._get_objects(stream)
             if self.caching:
+                assert stream.objid is not None
                 self._parsed_objs[stream.objid] = (objs, n)
         i = n*2+index
         try:
@@ -641,7 +661,7 @@ def _getobj_objstm(self, stream, index, objid):
             raise PDFSyntaxError('index too big: %r' % index)
         return obj
 
-    def _get_objects(self, stream):
+    def _get_objects(self, stream: PDFStream) -> Tuple[List[Any], Any]:
         if stream.get('Type') is not LITERAL_OBJSTM:
             if settings.STRICT:
                 raise PDFSyntaxError('Not a stream object: %r' % stream)
@@ -662,7 +682,8 @@ def _get_objects(self, stream):
             pass
         return (objs, n)
 
-    def _getobj_parse(self, pos, objid):
+    def _getobj_parse(self, pos: int, objid: Any) -> Any:
+        assert self._parser is not None
         self._parser.seek(pos)
         (_, objid1) = self._parser.nexttoken()  # objid
         (_, genno) = self._parser.nexttoken()  # genno
@@ -690,7 +711,7 @@ def _getobj_parse(self, pos, objid):
         return obj
 
     # can raise PDFObjectNotFound
-    def getobj(self, objid):
+    def getobj(self, objid: int) -> Any:
         """Get object from PDF
 
         :raises PDFException if PDFDocument is not initialized
@@ -729,11 +750,14 @@ def getobj(self, objid):
                 self._cached_objs[objid] = (obj, genno)
         return obj
 
-    def get_outlines(self):
+    OutlineType = Tuple[Any, Any, Any, Any, Any]
+
+    def get_outlines(self) -> Iterator[OutlineType]:
         if 'Outlines' not in self.catalog:
             raise PDFNoOutlines
 
-        def search(entry, level):
+        def search(entry: Any, level: int
+                   ) -> Iterator[PDFDocument.OutlineType]:
             entry = dict_value(entry)
             if 'Title' in entry:
                 if 'A' in entry or 'Dest' in entry:
@@ -749,7 +773,7 @@ def search(entry, level):
             return
         return search(self.catalog['Outlines'], 0)
 
-    def lookup_name(self, cat, key):
+    def lookup_name(self, cat: str, key: str) -> Any:
         try:
             names = dict_value(self.catalog['Names'])
         except (PDFTypeError, KeyError):
@@ -757,14 +781,14 @@ def lookup_name(self, cat, key):
         # may raise KeyError
         d0 = dict_value(names[cat])
 
-        def lookup(d):
+        def lookup(d: Dict[str, Any]) -> Any:
             if 'Limits' in d:
                 (k1, k2) = list_value(d['Limits'])
                 if key < k1 or k2 < key:
                     return None
             if 'Names' in d:
                 objs = list_value(d['Names'])
-                names = dict(choplist(2, objs))
+                names = dict(cast(Tuple[Any, Any], choplist(2, objs)))
                 return names[key]
             if 'Kids' in d:
                 for c in list_value(d['Kids']):
@@ -774,7 +798,7 @@ def lookup(d):
             raise KeyError((cat, key))
         return lookup(d0)
 
-    def get_dest(self, name):
+    def get_dest(self, name: str) -> Any:
         try:
             # PDF-1.2 or later
             obj = self.lookup_name('Dests', name)
@@ -789,7 +813,7 @@ def get_dest(self, name):
         return obj
 
     # find_xref
-    def find_xref(self, parser):
+    def find_xref(self, parser: PDFParser) -> int:
         """Internal function used to locate the first XRef."""
         # search the last xref table by scanning the file backwards.
         prev = None
@@ -803,10 +827,12 @@ def find_xref(self, parser):
         else:
             raise PDFNoValidXRef('Unexpected EOF')
         log.info('xref found: pos=%r', prev)
+        assert prev is not None
         return int(prev)
 
     # read xref table
-    def read_xref_from(self, parser, start, xrefs):
+    def read_xref_from(self, parser: PDFParser, start: int,
+                       xrefs: List[PDFBaseXRef]) -> None:
         """Reads XRefs from the given location."""
         parser.seek(start)
         parser.reset()
@@ -819,7 +845,7 @@ def read_xref_from(self, parser, start, xrefs):
             # XRefStream: PDF-1.5
             parser.seek(pos)
             parser.reset()
-            xref = PDFXRefStream()
+            xref: PDFBaseXRef = PDFXRefStream()
             xref.load(parser)
         else:
             if token is parser.KEYWORD_XREF:
diff --git a/pdfminer/pdffont.py b/pdfminer/pdffont.py
index 19bcca0b..df1634f9 100644
--- a/pdfminer/pdffont.py
+++ b/pdfminer/pdffont.py
@@ -2,9 +2,11 @@
 import struct
 import sys
 from io import BytesIO
+from typing import Any, Dict, Iterable, Optional, Tuple, Union
 
 from . import settings
 from .cmapdb import CMap
+from .cmapdb import CMapBase
 from .cmapdb import CMapDB
 from .cmapdb import CMapParser
 from .cmapdb import FileUnicodeMap
@@ -25,7 +27,7 @@
 from .psparser import PSLiteral
 from .psparser import PSStackParser
 from .psparser import literal_name
-from .utils import apply_matrix_norm
+from .utils import Rect, apply_matrix_norm
 from .utils import choplist
 from .utils import isnumber
 from .utils import nunpack
@@ -484,9 +486,10 @@ class PDFUnicodeNotDefined(PDFFontError):
 
 class PDFFont:
 
-    def __init__(self, descriptor, widths, default_width=None):
+    def __init__(self, descriptor: Dict[str, Any], widths: Dict[int, float],
+                 default_width: Optional[float] = None):
         self.descriptor = descriptor
-        self.widths = resolve_all(widths)
+        self.widths: Dict[int, float] = resolve_all(widths)
         self.fontname = resolve1(descriptor.get('FontName', 'unknown'))
         if isinstance(self.fontname, PSLiteral):
             self.fontname = literal_name(self.fontname)
@@ -499,8 +502,8 @@ def __init__(self, descriptor, widths, default_width=None):
         else:
             self.default_width = default_width
         self.leading = num_value(descriptor.get('Leading', 0))
-        self.bbox = list_value(resolve_all(descriptor.get('FontBBox',
-                                                          (0, 0, 0, 0))))
+        self.bbox: Rect = list_value(  # type: ignore[assignment]
+            resolve_all(descriptor.get('FontBBox', (0, 0, 0, 0))))
         self.hscale = self.vscale = .001
 
         # PDF RM 9.8.1 specifies /Descent should always be a negative number.
@@ -514,48 +517,52 @@ def __init__(self, descriptor, widths, default_width=None):
     def __repr__(self):
         return '<PDFFont>'
 
-    def is_vertical(self):
+    def is_vertical(self) -> bool:
         return False
 
-    def is_multibyte(self):
+    def is_multibyte(self) -> bool:
         return False
 
-    def decode(self, bytes):
+    def decode(self, bytes: bytes) -> Iterable[int]:
         return bytearray(bytes)  # map(ord, bytes)
 
-    def get_ascent(self):
+    def get_ascent(self) -> float:
         """Ascent above the baseline, in text space units"""
         return self.ascent * self.vscale
 
-    def get_descent(self):
+    def get_descent(self) -> float:
         """Descent below the baseline, in text space units; always negative"""
         return self.descent * self.vscale
 
-    def get_width(self):
+    def get_width(self) -> float:
         w = self.bbox[2]-self.bbox[0]
         if w == 0:
             w = -self.default_width
         return w * self.hscale
 
-    def get_height(self):
+    def get_height(self) -> float:
         h = self.bbox[3]-self.bbox[1]
         if h == 0:
             h = self.ascent - self.descent
         return h * self.vscale
 
-    def char_width(self, cid):
+    def char_width(self, cid: int) -> float:
         try:
             return self.widths[cid] * self.hscale
         except KeyError:
             try:
-                return self.widths[self.to_unichr(cid)] * self.hscale
+                # Type confusion: this appears to be a relic from Python 2
+                return (self.widths[self.to_unichr(cid)]  # type: ignore[index]
+                        * self.hscale)
             except (KeyError, PDFUnicodeNotDefined):
                 return self.default_width * self.hscale
 
-    def char_disp(self, cid):
+    def char_disp(self, cid: int
+                  ) -> Union[float, Tuple[Optional[float], float]]:
+        "Returns an integer for horizontal fonts, a tuple for vertical fonts."
         return 0
 
-    def string_width(self, s):
+    def string_width(self, s: bytes) -> float:
         return sum(self.char_width(cid) for cid in self.decode(s))
 
     def to_unichr(self, cid: int) -> str:
@@ -673,7 +680,7 @@ def __init__(self, rsrcmgr, spec, strict=settings.STRICT):
         cid_ordering = resolve1(
             self.cidsysteminfo.get('Ordering', b'unknown')).decode("latin1")
         self.cidcoding = '{}-{}'.format(cid_registry, cid_ordering)
-        self.cmap = self.get_cmap_from_spec(spec, strict)
+        self.cmap: CMapBase = self.get_cmap_from_spec(spec, strict)
 
         try:
             descriptor = dict_value(spec['FontDescriptor'])
@@ -723,7 +730,8 @@ def __init__(self, rsrcmgr, spec, strict=settings.STRICT):
         PDFFont.__init__(self, descriptor, widths, default_width=default_width)
         return
 
-    def get_cmap_from_spec(self, spec, strict):
+    def get_cmap_from_spec(self, spec: Dict[str, Any], strict: bool
+                           ) -> CMapBase:
         """Get cmap from font specification
 
         For certain PDFs, Encoding Type isn't mentioned as an attribute of
@@ -742,7 +750,7 @@ def get_cmap_from_spec(self, spec, strict):
             return CMap()
 
     @staticmethod
-    def _get_cmap_name(spec, strict):
+    def _get_cmap_name(spec: Dict[str, Any], strict: bool) -> str:
         """Get cmap name from font specification"""
         cmap_name = 'unknown'  # default value
 
@@ -756,15 +764,14 @@ def _get_cmap_name(spec, strict):
             if strict:
                 raise PDFFontError('Encoding is unspecified')
 
-        if type(cmap_name) is PDFStream:
+        if type(cmap_name) is PDFStream:  # type: ignore[comparison-overlap]
             if 'CMapName' in cmap_name:
                 cmap_name = cmap_name.get('CMapName').name
             else:
                 if strict:
                     raise PDFFontError('CMapName unspecified for encoding')
 
-        cmap_name = IDENTITY_ENCODER.get(cmap_name, cmap_name)
-        return cmap_name
+        return IDENTITY_ENCODER.get(cmap_name, cmap_name)
 
     def __repr__(self):
         return '<PDFCIDFont: basefont={!r}, cidcoding={!r}>'\
@@ -776,7 +783,7 @@ def is_vertical(self):
     def is_multibyte(self):
         return True
 
-    def decode(self, bytes):
+    def decode(self, bytes: bytes) -> Iterable[int]:
         return self.cmap.decode(bytes)
 
     def char_disp(self, cid):
@@ -802,4 +809,4 @@ def main(argv):
 
 
 if __name__ == '__main__':
-    sys.exit(main(sys.argv))
+    sys.exit(main(sys.argv))  # type: ignore[no-untyped-call]
diff --git a/pdfminer/pdfinterp.py b/pdfminer/pdfinterp.py
index 6ce48530..92baf0b6 100644
--- a/pdfminer/pdfinterp.py
+++ b/pdfminer/pdfinterp.py
@@ -33,7 +33,7 @@
 from .pdffont import PDFCIDFont
 from .pdfcolor import PDFColorSpace
 from .pdfcolor import PREDEFINED_COLORSPACE
-from .utils import Matrix, Point, PathSegment
+from .utils import Matrix, Point, PathSegment, Rect
 from .utils import choplist
 from .utils import mult_matrix
 from .utils import MATRIX_IDENTITY
@@ -61,7 +61,7 @@ class PDFTextState:
     matrix: Matrix
     linematrix: Point
 
-    def __init__(self):
+    def __init__(self) -> None:
         self.font: Optional[PDFFont] = None
         self.fontsize: float = 0
         self.charspace: float = 0
@@ -111,7 +111,7 @@ def reset(self) -> None:
 
 class PDFGraphicState:
 
-    def __init__(self):
+    def __init__(self) -> None:
         self.linewidth: float = 0
         self.linecap = None
         self.linejoin = None
@@ -197,16 +197,19 @@ def get_font(self, objid: Any, spec: Mapping[str, Any]) -> PDFFont:
                 subtype = 'Type1'
             if subtype in ('Type1', 'MMType1'):
                 # Type1 Font
-                font = PDFType1Font(self, spec)
+                font = PDFType1Font(
+                    self, spec)  # type: ignore[no-untyped-call]
             elif subtype == 'TrueType':
                 # TrueType Font
-                font = PDFTrueTypeFont(self, spec)
+                font = PDFTrueTypeFont(
+                    self, spec)  # type: ignore[no-untyped-call]
             elif subtype == 'Type3':
                 # Type3 Font
-                font = PDFType3Font(self, spec)
+                font = PDFType3Font(
+                    self, spec)  # type: ignore[no-untyped-call]
             elif subtype in ('CIDFontType0', 'CIDFontType2'):
                 # CID Font
-                font = PDFCIDFont(self, spec)
+                font = PDFCIDFont(self, spec)  # type: ignore[no-untyped-call]
             elif subtype == 'Type0':
                 # Type0 Font
                 dfonts = list_value(spec['DescendantFonts'])
@@ -219,7 +222,8 @@ def get_font(self, objid: Any, spec: Mapping[str, Any]) -> PDFFont:
             else:
                 if settings.STRICT:
                     raise PDFFontError('Invalid Font spec: %r' % spec)
-                font = PDFType1Font(self, spec)  # this is so wrong!
+                font = PDFType1Font(  # this is so wrong!
+                    self, spec)  # type: ignore[no-untyped-call]
             if objid and self.caching:
                 self._cached_fonts[objid] = font
         return font
@@ -227,13 +231,13 @@ def get_font(self, objid: Any, spec: Mapping[str, Any]) -> PDFFont:
 
 class PDFContentParser(PSStackParser[Union[PSKeyword, PDFStream]]):
 
-    def __init__(self, streams: List[Any]):
+    def __init__(self, streams: Sequence[Any]):
         self.streams = streams
         self.istream = 0
         # PSStackParser.__init__(fp=None) is safe only because we've overloaded
         # all the methods that would attempt to access self.fp without first
         # calling self.fillfp().
-        PSStackParser.__init__(self, None)  # type: ignore
+        PSStackParser.__init__(self, None)  # type: ignore[arg-type]
         return
 
     def fillfp(self) -> None:
@@ -260,7 +264,7 @@ def fillbuf(self) -> None:
             self.buf = self.fp.read(self.BUFSIZ)
             if self.buf:
                 break
-            self.fp = None  # type: ignore
+            self.fp = None  # type: ignore[assignment]
         self.charpos = 0
         return
 
@@ -887,8 +891,9 @@ def do_Do(self, xobjid: Any) -> None:
         subtype = xobj.get('Subtype')
         if subtype is LITERAL_FORM and 'BBox' in xobj:
             interpreter = self.dup()
-            bbox = list_value(xobj['BBox'])
-            matrix = list_value(xobj.get('Matrix', MATRIX_IDENTITY))
+            bbox: Rect = list_value(xobj['BBox'])  # type: ignore[assignment]
+            matrix: Matrix = list_value(xobj.get(
+                'Matrix', MATRIX_IDENTITY))  # type: ignore[assignment]
             # According to PDF reference 1.7 section 4.9.1, XObjects in
             # earlier PDFs (prior to v1.2) use the page's Resources entry
             # instead of having their own Resources entry.
@@ -939,7 +944,7 @@ def render_contents(self, resources: Any, streams: Sequence[Any],
         self.execute(list_value(streams))
         return
 
-    def execute(self, streams: List[Any]) -> None:
+    def execute(self, streams: Sequence[Any]) -> None:
         try:
             parser = PDFContentParser(streams)
         except PSEOF:
diff --git a/pdfminer/pdfpage.py b/pdfminer/pdfpage.py
index bfcd013b..168e112f 100644
--- a/pdfminer/pdfpage.py
+++ b/pdfminer/pdfpage.py
@@ -1,5 +1,6 @@
 import logging
-from typing import Any, BinaryIO, Container, Iterator, List, Optional
+from typing import (Any, BinaryIO, Container, Dict, Iterator, List, Optional,
+                    Tuple)
 import warnings
 from . import settings
 from .psparser import LIT
@@ -78,7 +79,8 @@ def __repr__(self) -> str:
 
     @classmethod
     def create_pages(cls, document: PDFDocument) -> Iterator["PDFPage"]:
-        def search(obj, parent):
+        def search(obj: Any, parent: Dict[str, Any]
+                   ) -> Iterator[Tuple[int, Dict[Any, Any]]]:
             if isinstance(obj, int):
                 objid = obj
                 tree = dict_value(document.getobj(objid)).copy()
diff --git a/pdfminer/pdfparser.py b/pdfminer/pdfparser.py
index b585ef05..f7cfeb17 100644
--- a/pdfminer/pdfparser.py
+++ b/pdfminer/pdfparser.py
@@ -77,7 +77,9 @@ def do_keyword(self, pos: int, token: PSKeyword) -> None:
             if len(self.curstack) >= 2:
                 try:
                     ((_, objid), (_, genno)) = self.pop(2)
-                    (objid, genno) = (int(objid), int(genno))  # type: ignore
+                    (objid, genno) = (
+                        int(objid), int(genno))  # type: ignore[arg-type]
+                    assert self.doc is not None
                     obj = PDFObjRef(self.doc, objid, genno)
                     self.push((pos, obj))
                 except PSSyntaxError:
@@ -89,7 +91,7 @@ def do_keyword(self, pos: int, token: PSKeyword) -> None:
             objlen = 0
             if not self.fallback:
                 try:
-                    objlen = int_value(dic['Length'])  # type: ignore
+                    objlen = int_value(dic['Length'])
                 except KeyError:
                     if settings.STRICT:
                         raise PDFSyntaxError('/Length is undefined: %r' % dic)
@@ -159,7 +161,8 @@ def do_keyword(self, pos: int, token: PSKeyword) -> None:
             # reference to indirect object
             try:
                 ((_, objid), (_, genno)) = self.pop(2)
-                (objid, genno) = (int(objid), int(genno))  # type: ignore
+                (objid, genno) = (
+                    int(objid), int(genno))  # type: ignore[arg-type]
                 obj = PDFObjRef(self.doc, objid, genno)
                 self.push((pos, obj))
             except PSSyntaxError:
diff --git a/pdfminer/pdftypes.py b/pdfminer/pdftypes.py
index 14c729b8..09adfa02 100644
--- a/pdfminer/pdftypes.py
+++ b/pdfminer/pdftypes.py
@@ -1,5 +1,7 @@
 import zlib
 import logging
+from typing import (TYPE_CHECKING, Any, Dict, Iterable, Optional, Protocol,
+                    Union, List, Tuple, cast)
 from .lzw import lzwdecode
 from .ascii85 import ascii85decode
 from .ascii85 import asciihexdecode
@@ -10,7 +12,9 @@
 from .psparser import LIT
 from . import settings
 from .utils import apply_png_predictor
-from .utils import isnumber
+
+if TYPE_CHECKING:
+    from .pdfdocument import PDFDocument
 
 
 log = logging.getLogger(__name__)
@@ -28,6 +32,12 @@
 LITERALS_JBIG2_DECODE = (LIT('JBIG2Decode'),)
 
 
+class DecipherCallable(Protocol):
+    def __call__(self, objid: int, genno: int, data: bytes,
+                 attrs: Optional[Dict[str, Any]] = None) -> bytes:
+        raise NotImplementedError
+
+
 class PDFObject(PSObject):
     pass
 
@@ -54,7 +64,7 @@ class PDFNotImplementedError(PDFException):
 
 class PDFObjRef(PDFObject):
 
-    def __init__(self, doc, objid, _):
+    def __init__(self, doc: Optional["PDFDocument"], objid: int, _: Any):
         if objid == 0:
             if settings.STRICT:
                 raise PDFValueError('PDF object id cannot be 0.')
@@ -62,17 +72,18 @@ def __init__(self, doc, objid, _):
         self.objid = objid
         return
 
-    def __repr__(self):
+    def __repr__(self) -> str:
         return '<PDFObjRef:%d>' % (self.objid)
 
-    def resolve(self, default=None):
+    def resolve(self, default: Any = None) -> Any:
+        assert self.doc is not None
         try:
             return self.doc.getobj(self.objid)
         except PDFObjectNotFound:
             return default
 
 
-def resolve1(x, default=None):
+def resolve1(x: Any, default: Any = None) -> Any:
     """Resolves an object.
 
     If this is an array or dictionary, it may still contains
@@ -83,7 +94,7 @@ def resolve1(x, default=None):
     return x
 
 
-def resolve_all(x, default=None):
+def resolve_all(x: Any, default: Any = None) -> Any:
     """Recursively resolves the given object and all the internals.
 
     Make sure there is no indirect reference within the nested object.
@@ -99,7 +110,8 @@ def resolve_all(x, default=None):
     return x
 
 
-def decipher_all(decipher, objid, genno, x):
+def decipher_all(decipher: DecipherCallable, objid: int, genno: int, x: Any
+                 ) -> Any:
     """Recursively deciphers the given object.
     """
     if isinstance(x, bytes):
@@ -112,7 +124,7 @@ def decipher_all(decipher, objid, genno, x):
     return x
 
 
-def int_value(x):
+def int_value(x: Any) -> int:
     x = resolve1(x)
     if not isinstance(x, int):
         if settings.STRICT:
@@ -121,7 +133,7 @@ def int_value(x):
     return x
 
 
-def float_value(x):
+def float_value(x: Any) -> float:
     x = resolve1(x)
     if not isinstance(x, float):
         if settings.STRICT:
@@ -130,34 +142,34 @@ def float_value(x):
     return x
 
 
-def num_value(x):
+def num_value(x: Any) -> float:
     x = resolve1(x)
-    if not isnumber(x):
+    if not isinstance(x, (int, float)):  # == utils.isnumber(x)
         if settings.STRICT:
             raise PDFTypeError('Int or Float required: %r' % x)
         return 0
     return x
 
 
-def uint_value(x, n_bits):
+def uint_value(x: Any, n_bits: int) -> int:
     """Resolve number and interpret it as a two's-complement unsigned number"""
-    x = int_value(x)
-    if x > 0:
-        return x
+    xi = int_value(x)
+    if xi > 0:
+        return xi
     else:
-        return x + 2**n_bits
+        return xi + cast(int, 2**n_bits)
 
 
-def str_value(x):
+def str_value(x: Any) -> bytes:
     x = resolve1(x)
     if not isinstance(x, bytes):
         if settings.STRICT:
             raise PDFTypeError('String required: %r' % x)
-        return ''
+        return b''
     return x
 
 
-def list_value(x):
+def list_value(x: Any) -> Union[List[Any], Tuple[Any, ...]]:
     x = resolve1(x)
     if not isinstance(x, (list, tuple)):
         if settings.STRICT:
@@ -166,7 +178,7 @@ def list_value(x):
     return x
 
 
-def dict_value(x):
+def dict_value(x: Any) -> Dict[Any, Any]:
     x = resolve1(x)
     if not isinstance(x, dict):
         if settings.STRICT:
@@ -176,7 +188,7 @@ def dict_value(x):
     return x
 
 
-def stream_value(x):
+def stream_value(x: Any) -> "PDFStream":
     x = resolve1(x)
     if not isinstance(x, PDFStream):
         if settings.STRICT:
@@ -187,22 +199,23 @@ def stream_value(x):
 
 class PDFStream(PDFObject):
 
-    def __init__(self, attrs, rawdata, decipher=None):
+    def __init__(self, attrs: Dict[str, Any], rawdata: bytes, decipher:
+                 Optional[DecipherCallable] = None):
         assert isinstance(attrs, dict), str(type(attrs))
         self.attrs = attrs
-        self.rawdata = rawdata
+        self.rawdata: Optional[bytes] = rawdata
         self.decipher = decipher
-        self.data = None
-        self.objid = None
-        self.genno = None
+        self.data: Optional[bytes] = None
+        self.objid: Optional[int] = None
+        self.genno: Optional[int] = None
         return
 
-    def set_objid(self, objid, genno):
+    def set_objid(self, objid: int, genno: int) -> None:
         self.objid = objid
         self.genno = genno
         return
 
-    def __repr__(self):
+    def __repr__(self) -> str:
         if self.data is None:
             assert self.rawdata is not None
             return '<PDFStream(%r): raw=%d, %r>' % \
@@ -218,16 +231,16 @@ def __contains__(self, name):
     def __getitem__(self, name):
         return self.attrs[name]
 
-    def get(self, name, default=None):
+    def get(self, name: str, default: Any = None) -> Any:
         return self.attrs.get(name, default)
 
-    def get_any(self, names, default=None):
+    def get_any(self, names: Iterable[str], default: Any = None) -> Any:
         for name in names:
             if name in self.attrs:
                 return self.attrs[name]
         return default
 
-    def get_filters(self):
+    def get_filters(self) -> List[Tuple[Any, Any]]:
         filters = self.get_any(('F', 'Filter'))
         params = self.get_any(('DP', 'DecodeParms', 'FDecodeParms'), {})
         if not filters:
@@ -248,12 +261,14 @@ def get_filters(self):
         # return list solves https://github.com/pdfminer/pdfminer.six/issues/15
         return list(zip(_filters, params))
 
-    def decode(self):
+    def decode(self) -> None:
         assert self.data is None \
                and self.rawdata is not None, str((self.data, self.rawdata))
         data = self.rawdata
         if self.decipher:
             # Handle encryption
+            assert self.objid is not None
+            assert self.genno is not None
             data = self.decipher(self.objid, self.genno, data, self.attrs)
         filters = self.get_filters()
         if not filters:
@@ -314,10 +329,11 @@ def decode(self):
         self.rawdata = None
         return
 
-    def get_data(self):
+    def get_data(self) -> bytes:
         if self.data is None:
             self.decode()
+            assert self.data is not None
         return self.data
 
-    def get_rawdata(self):
+    def get_rawdata(self) -> Optional[bytes]:
         return self.rawdata
diff --git a/pdfminer/psparser.py b/pdfminer/psparser.py
index 3ac72e57..81192425 100644
--- a/pdfminer/psparser.py
+++ b/pdfminer/psparser.py
@@ -103,7 +103,7 @@ def intern(self, name: PSLiteral.NameType) -> _SymbolT:
         else:
             # Type confusion issue: PSKeyword always takes bytes as name
             #                       PSLiteral uses either str or bytes
-            lit = self.klass(name)  # type: ignore
+            lit = self.klass(name)  # type: ignore[arg-type]
             self.dict[name] = lit
         return lit
 
diff --git a/pdfminer/runlength.py b/pdfminer/runlength.py
index f8ea228d..b79e18e6 100644
--- a/pdfminer/runlength.py
+++ b/pdfminer/runlength.py
@@ -6,7 +6,7 @@
 #
 
 
-def rldecode(data):
+def rldecode(data: bytes) -> bytes:
     """
     RunLength decoder (Adobe version) implementation based on PDF Reference
     version 1.4 section 3.3.4:
diff --git a/pdfminer/utils.py b/pdfminer/utils.py
index edf5c6ee..c3d229c2 100644
--- a/pdfminer/utils.py
+++ b/pdfminer/utils.py
@@ -83,7 +83,7 @@ def compatible_encode_method(bytesorstring: Union[bytes, str],
     return bytesorstring.decode(encoding, erraction)
 
 
-def paeth_predictor(left, above, upper_left):
+def paeth_predictor(left: int, above: int, upper_left: int) -> int:
     # From http://www.libpng.org/pub/png/spec/1.2/PNG-Filters.html
     # Initial estimate
     p = left + above - upper_left