From 0e6871c16abb29df2868ab145b4ce451b4b6c777 Mon Sep 17 00:00:00 2001
From: Andrew Baumann <Andrew.Baumann@microsoft.com>
Date: Fri, 20 Aug 2021 16:54:46 -0700
Subject: [PATCH] general progress on annotations  * finish utils  * annotate
 more of pdfinterp, pdfdevice  * document reason for # type: ignore comments 
 * fix cyclic imports  * satisfy flake8

---
 pdfminer/converter.py | 28 ++++++++++------
 pdfminer/layout.py    | 45 +++++++++++++++++--------
 pdfminer/pdfcolor.py  |  5 +--
 pdfminer/pdfdevice.py | 76 ++++++++++++++++++++++++++++---------------
 pdfminer/pdfinterp.py | 70 ++++++++++++++++++++++++---------------
 pdfminer/pdfpage.py   | 11 ++++---
 pdfminer/pdfparser.py |  3 +-
 pdfminer/psparser.py  |  4 +--
 pdfminer/utils.py     | 55 ++++++++++++++++++-------------
 9 files changed, 187 insertions(+), 110 deletions(-)

diff --git a/pdfminer/converter.py b/pdfminer/converter.py
index f5b44716..ec1735bf 100644
--- a/pdfminer/converter.py
+++ b/pdfminer/converter.py
@@ -1,17 +1,18 @@
 import io
 import logging
-from pdfminer.pdftypes import PDFStream
+from pdfminer.pdfcolor import PDFColorSpace
 from typing import List
-from pdfminer.pdfpage import PDFPage
 import re
 import sys
 
 from . import utils
-from .layout import LTChar, LTLayoutContainer
+from .layout import LAParams
+from .layout import LTChar
 from .layout import LTContainer
 from .layout import LTCurve
 from .layout import LTFigure
 from .layout import LTImage
+from .layout import LTLayoutContainer
 from .layout import LTLine
 from .layout import LTPage
 from .layout import LTRect
@@ -21,9 +22,13 @@
 from .layout import LTTextGroup
 from .layout import LTTextLine
 from .pdfdevice import PDFTextDevice
-from .pdffont import PDFFont, PDFUnicodeNotDefined
-from .pdfinterp import PDFResourceManager
-from .utils import Matrix, Rect, apply_matrix_pt
+from .pdffont import PDFFont
+from .pdffont import PDFUnicodeNotDefined
+from .pdfinterp import PDFGraphicState, PDFResourceManager
+from .pdfpage import PDFPage
+from .pdftypes import PDFStream
+from .utils import Matrix, Rect
+from .utils import apply_matrix_pt
 from .utils import bbox2str
 from .utils import enc
 from .utils import mult_matrix
@@ -35,7 +40,8 @@ class PDFLayoutAnalyzer(PDFTextDevice):
     cur_item: LTLayoutContainer
     ctm: Matrix
 
-    def __init__(self, rsrcmgr: PDFResourceManager, pageno=1, laparams=None):
+    def __init__(self, rsrcmgr: PDFResourceManager, pageno: int = 1,
+                 laparams: LAParams = None):
         PDFTextDevice.__init__(self, rsrcmgr)
         self.pageno = pageno
         self.laparams = laparams
@@ -79,7 +85,8 @@ def render_image(self, name: str, stream: PDFStream) -> None:
         self.cur_item.add(item)
         return
 
-    def paint_path(self, gstate, stroke, fill, evenodd, path):
+    def paint_path(self, gstate: PDFGraphicState, stroke, fill, evenodd, path
+                   ) -> None:
         """Paint paths described in section 4.4 of the PDF reference manual"""
         shape = ''.join(x[0] for x in path)
 
@@ -130,8 +137,9 @@ def paint_path(self, gstate, stroke, fill, evenodd, path):
                                 gstate.scolor, gstate.ncolor)
                 self.cur_item.add(curve)
 
-    def render_char(self, matrix: Matrix, font: PDFFont, fontsize, scaling, rise, cid, ncs,
-                    graphicstate):
+    def render_char(self, matrix: Matrix, font: PDFFont, fontsize: float,
+                    scaling: float, rise: float, cid: int, ncs: PDFColorSpace,
+                    graphicstate: PDFGraphicState) -> float:
         try:
             text = font.to_unichr(cid)
             assert isinstance(text, str), str(type(text))
diff --git a/pdfminer/layout.py b/pdfminer/layout.py
index 490a8400..634ba43a 100644
--- a/pdfminer/layout.py
+++ b/pdfminer/layout.py
@@ -1,9 +1,11 @@
 import heapq
 import logging
-from pdfminer.pdftypes import PDFStream
-from typing import Any, Dict, Generic, Iterable, Iterator, List, Optional, Sequence, Set, Tuple, TypeVar
+from typing import (Any, Dict, Generic, Iterable, Iterator, List, Optional,
+                    Sequence, Set, Tuple, TypeVar, cast)
 
-from .utils import INF, Matrix, Rect
+from .utils import INF
+from .utils import Matrix
+from .utils import Rect
 from .utils import Plane
 from .utils import apply_matrix_pt
 from .utils import bbox2str
@@ -11,6 +13,10 @@
 from .utils import get_bound
 from .utils import matrix2str
 from .utils import uniq
+from .pdfcolor import PDFColorSpace
+from .pdftypes import PDFStream
+from .pdfinterp import PDFGraphicState
+from .pdffont import PDFFont
 
 logger = logging.getLogger(__name__)
 
@@ -282,8 +288,9 @@ def get_text(self) -> str:
 class LTChar(LTComponent, LTText):
     """Actual letter in the text as a Unicode string."""
 
-    def __init__(self, matrix: Matrix, font, fontsize, scaling, rise,
-                 text: str, textwidth, textdisp, ncs, graphicstate):
+    def __init__(self, matrix: Matrix, font: PDFFont, fontsize, scaling, rise,
+                 text: str, textwidth, textdisp, ncs: PDFColorSpace,
+                 graphicstate: PDFGraphicState):
         LTText.__init__(self)
         self._text = text
         self.matrix = matrix
@@ -335,8 +342,10 @@ def is_compatible(self, obj: Any) -> bool:
         """Returns True if two characters can coexist in the same line."""
         return True
 
+
 LTContainerElement = TypeVar('LTContainerElement', LTItem, LTComponent)
 
+
 class LTContainer(LTComponent, Generic[LTContainerElement]):
     """Object that can be extended and analyzed"""
 
@@ -430,7 +439,8 @@ def add(self, obj: LTComponent) -> None:
         LTTextLine.add(self, obj)
         return
 
-    def find_neighbors(self, plane: Plane, ratio: float) -> List["LTTextLineHorizontal"]:
+    def find_neighbors(self, plane: Plane, ratio: float
+                       ) -> List["LTTextLineHorizontal"]:
         """
         Finds neighboring LTTextLineHorizontals in the plane.
 
@@ -486,7 +496,8 @@ def add(self, obj: LTComponent) -> None:
         LTTextLine.add(self, obj)
         return
 
-    def find_neighbors(self, plane: Plane, ratio: float) -> List["LTTextLineVertical"]:
+    def find_neighbors(self, plane: Plane, ratio: float
+                       ) -> List["LTTextLineVertical"]:
         """
         Finds neighboring LTTextLineVerticals in the plane.
 
@@ -600,7 +611,8 @@ def __init__(self, bbox: Rect):
         return
 
     # group_objects: group text object to textlines.
-    def group_objects(self, laparams: LAParams, objs: Iterable[LTComponent]) -> Iterator[LTTextLine]:
+    def group_objects(self, laparams: LAParams, objs: Iterable[LTComponent]
+                      ) -> Iterator[LTTextLine]:
         obj0 = None
         line = None
         for obj1 in objs:
@@ -670,11 +682,13 @@ def group_objects(self, laparams: LAParams, objs: Iterable[LTComponent]) -> Iter
             obj0 = obj1
         if line is None:
             line = LTTextLineHorizontal(laparams.word_margin)
-            line.add(obj0)  # type: ignore
+            assert obj0 is not None
+            line.add(obj0)
         yield line
         return
 
-    def group_textlines(self, laparams: LAParams, lines: Iterable[LTTextLine]) -> Iterator[LTTextBox]:
+    def group_textlines(self, laparams: LAParams, lines: Iterable[LTTextLine]
+                        ) -> Iterator[LTTextBox]:
         """Group neighboring lines to textboxes"""
         plane = Plane(self.bbox)
         plane.extend(lines)
@@ -705,7 +719,8 @@ def group_textlines(self, laparams: LAParams, lines: Iterable[LTTextLine]) -> It
                 yield box
         return
 
-    def group_textboxes(self, laparams: LAParams, boxes: Sequence[LTTextBox]) -> List[LTTextGroup]:
+    def group_textboxes(self, laparams: LAParams, boxes: Sequence[LTTextBox]
+                        ) -> List[LTTextGroup]:
         """Group textboxes hierarchically.
 
         Get pair-wise distances, via dist func defined below, and then merge
@@ -724,7 +739,7 @@ def group_textboxes(self, laparams: LAParams, boxes: Sequence[LTTextBox]) -> Lis
         :return: a list that has only one element, the final top level group.
         """
 
-        def dist(obj1:LTComponent, obj2:LTComponent) -> float:
+        def dist(obj1: LTComponent, obj2: LTComponent) -> float:
             """A distance function between two TextBoxes.
 
             Consider the bounding rectangle for obj1 and obj2.
@@ -752,7 +767,8 @@ def isany(obj1: LTComponent, obj2: LTComponent) -> Set[LTComponent]:
             objs = set(plane.find((x0, y0, x1, y1)))
             return objs.difference((obj1, obj2))
 
-        dists: List[Tuple[bool, float, int, int, LTTextContainer, LTTextContainer]] = []
+        dists: List[Tuple[bool, float, int, int, LTTextContainer,
+                          LTTextContainer]] = []
         for i in range(len(boxes)):
             box1 = boxes[i]
             for j in range(i+1, len(boxes)):
@@ -817,7 +833,8 @@ def getkey(box):
                 group.analyze(laparams)
                 assigner.run(group)
             textboxes.sort(key=lambda box: box.index)
-        self._objs = textboxes + otherobjs + empties
+        self._objs = (cast(List[LTComponent], textboxes) + otherobjs
+                      + cast(List[LTComponent], empties))
         return
 
 
diff --git a/pdfminer/pdfcolor.py b/pdfminer/pdfcolor.py
index ff28d54e..f6aa442d 100644
--- a/pdfminer/pdfcolor.py
+++ b/pdfminer/pdfcolor.py
@@ -1,4 +1,5 @@
 import collections
+from typing import Dict
 from .psparser import LIT
 
 
@@ -9,7 +10,7 @@
 
 class PDFColorSpace:
 
-    def __init__(self, name, ncomponents):
+    def __init__(self, name: str, ncomponents: int):
         self.name = name
         self.ncomponents = ncomponents
         return
@@ -19,7 +20,7 @@ def __repr__(self):
                (self.name, self.ncomponents)
 
 
-PREDEFINED_COLORSPACE = collections.OrderedDict()
+PREDEFINED_COLORSPACE: Dict[str, PDFColorSpace] = collections.OrderedDict()
 
 for (name, n) in [
     ('DeviceGray', 1),  # default value first
diff --git a/pdfminer/pdfdevice.py b/pdfminer/pdfdevice.py
index 20e62efa..e800d555 100644
--- a/pdfminer/pdfdevice.py
+++ b/pdfminer/pdfdevice.py
@@ -1,18 +1,24 @@
-from pdfminer.pdftypes import PDFStream
-from typing import List, Any, Optional
+from typing import (Any, IO, Iterable, List, Optional, Sequence, Tuple,
+                    TYPE_CHECKING)
 from . import utils
-from .utils import Matrix, Rect
-from .psparser import PSObject
+from .utils import Matrix, Point, Rect
+from .pdfcolor import PDFColorSpace
+from .pdffont import PDFFont
 from .pdffont import PDFUnicodeNotDefined
 from .pdfpage import PDFPage
-from .pdfinterp import PDFPageInterpreter, PDFResourceManager
+from .pdftypes import PDFStream
+
+if TYPE_CHECKING:
+    from .pdfinterp import PDFGraphicState
+    from .pdfinterp import PDFResourceManager
+    from .pdfinterp import PDFTextState
 
 
 class PDFDevice:
     """Translate the output of PDFPageInterpreter to the output that is needed
     """
 
-    def __init__(self, rsrcmgr: PDFResourceManager):
+    def __init__(self, rsrcmgr: "PDFResourceManager"):
         self.rsrcmgr = rsrcmgr
         self.ctm: Optional[Matrix] = None
         return
@@ -26,7 +32,7 @@ def __enter__(self):
     def __exit__(self, exc_type, exc_val, exc_tb):
         self.close()
 
-    def close(self):
+    def close(self) -> None:
         return
 
     def set_ctm(self, ctm: Matrix) -> None:
@@ -54,19 +60,26 @@ def begin_figure(self, name: str, bbox: Rect, matrix: Matrix) -> None:
     def end_figure(self, name: str) -> None:
         return
 
-    def paint_path(self, graphicstate, stroke, fill, evenodd, path) -> None:
+    def paint_path(self, graphicstate: "PDFGraphicState", stroke: bool,
+                   fill: bool, evenodd: bool,
+                   path: Sequence[Tuple[str, float, float]]) -> None:
         return
 
     def render_image(self, name: str, stream: PDFStream) -> None:
         return
 
-    def render_string(self, textstate, seq, ncs, graphicstate) -> None:
+    def render_string(self, textstate: "PDFTextState", seq: Iterable,
+                      ncs: PDFColorSpace, graphicstate: "PDFGraphicState"
+                      ) -> None:
         return
 
 
 class PDFTextDevice(PDFDevice):
 
-    def render_string(self, textstate, seq, ncs, graphicstate):
+    def render_string(self, textstate: "PDFTextState", seq: Iterable,
+                      ncs: PDFColorSpace, graphicstate: "PDFGraphicState"
+                      ) -> None:
+        assert self.ctm is not None
         matrix = utils.mult_matrix(textstate.matrix, self.ctm)
         font = textstate.font
         fontsize = textstate.fontsize
@@ -74,6 +87,7 @@ def render_string(self, textstate, seq, ncs, graphicstate):
         charspace = textstate.charspace * scaling
         wordspace = textstate.wordspace * scaling
         rise = textstate.rise
+        assert font is not None
         if font.is_multibyte():
             wordspace = 0
         dxscale = .001 * fontsize * scaling
@@ -89,9 +103,12 @@ def render_string(self, textstate, seq, ncs, graphicstate):
                 graphicstate)
         return
 
-    def render_string_horizontal(self, seq, matrix, pos,
-                                 font, fontsize, scaling, charspace, wordspace,
-                                 rise, dxscale, ncs, graphicstate):
+    def render_string_horizontal(self, seq: Iterable, matrix: Matrix,
+                                 pos: Point, font: PDFFont, fontsize: float,
+                                 scaling: float, charspace: float,
+                                 wordspace: float, rise: float, dxscale: float,
+                                 ncs: PDFColorSpace,
+                                 graphicstate: "PDFGraphicState") -> Point:
         (x, y) = pos
         needcharspace = False
         for obj in seq:
@@ -110,9 +127,11 @@ def render_string_horizontal(self, seq, matrix, pos,
                     needcharspace = True
         return (x, y)
 
-    def render_string_vertical(self, seq, matrix, pos,
-                               font, fontsize, scaling, charspace, wordspace,
-                               rise, dxscale, ncs, graphicstate):
+    def render_string_vertical(self, seq: Iterable, matrix: Matrix, pos: Point,
+                               font: PDFFont, fontsize: float, scaling: float,
+                               charspace: float, wordspace: float, rise: float,
+                               dxscale: float, ncs: PDFColorSpace,
+                               graphicstate: "PDFGraphicState") -> Point:
         (x, y) = pos
         needcharspace = False
         for obj in seq:
@@ -131,23 +150,28 @@ def render_string_vertical(self, seq, matrix, pos,
                     needcharspace = True
         return (x, y)
 
-    def render_char(self, matrix, font, fontsize, scaling, rise, cid, ncs,
-                    graphicstate):
+    def render_char(self, matrix: Matrix, font: PDFFont, fontsize: float,
+                    scaling: float, rise: float, cid: int, ncs: PDFColorSpace,
+                    graphicstate: "PDFGraphicState") -> float:
         return 0
 
 
 class TagExtractor(PDFDevice):
 
-    def __init__(self, rsrcmgr: PDFResourceManager, outfp, codec='utf-8'):
+    def __init__(self, rsrcmgr: "PDFResourceManager", outfp: IO,
+                 codec: str = 'utf-8'):
         PDFDevice.__init__(self, rsrcmgr)
         self.outfp = outfp
         self.codec = codec
         self.pageno = 0
-        self._stack: List[PSObject] = []
+        self._stack: List[Any] = []
         return
 
-    def render_string(self, textstate, seq, ncs, graphicstate):
+    def render_string(self, textstate: "PDFTextState", seq: Iterable,
+                      ncs: PDFColorSpace, graphicstate: "PDFGraphicState"
+                      ) -> None:
         font = textstate.font
+        assert font is not None
         text = ''
         for obj in seq:
             if isinstance(obj, str):
@@ -165,18 +189,18 @@ def render_string(self, textstate, seq, ncs, graphicstate):
         self.outfp.write(utils.enc(text))
         return
 
-    def begin_page(self, page, ctm):
+    def begin_page(self, page: PDFPage, ctm: Matrix) -> None:
         output = '<page id="%s" bbox="%s" rotate="%d">' %\
                  (self.pageno, utils.bbox2str(page.mediabox), page.rotate)
         self.outfp.write(utils.make_compat_bytes(output))
         return
 
-    def end_page(self, page):
+    def end_page(self, page: PDFPage) -> None:
         self.outfp.write(utils.make_compat_bytes('</page>\n'))
         self.pageno += 1
         return
 
-    def begin_tag(self, tag: Any, props=None):
+    def begin_tag(self, tag: Any, props=None) -> None:
         s = ''
         if isinstance(props, dict):
             s = ''.join(' {}="{}"'.format(utils.enc(k), utils.enc(str(v)))
@@ -186,14 +210,14 @@ def begin_tag(self, tag: Any, props=None):
         self._stack.append(tag)
         return
 
-    def end_tag(self):
+    def end_tag(self) -> None:
         assert self._stack, str(self.pageno)
         tag = self._stack.pop(-1)
         out_s = '</%s>' % utils.enc(tag.name)
         self.outfp.write(utils.make_compat_bytes(out_s))
         return
 
-    def do_tag(self, tag: Any, props=None):
+    def do_tag(self, tag: Any, props=None) -> None:
         self.begin_tag(tag, props)
         self._stack.pop(-1)
         return
diff --git a/pdfminer/pdfinterp.py b/pdfminer/pdfinterp.py
index 078914bf..f4e7cb20 100644
--- a/pdfminer/pdfinterp.py
+++ b/pdfminer/pdfinterp.py
@@ -1,11 +1,11 @@
 import re
 import logging
-from typing import Any, Dict, List, Sequence, Tuple
+from typing import Any, Dict, Iterable, List, Optional, Sequence, Tuple
 from io import BytesIO
 from .cmapdb import CMapDB
 from .cmapdb import CMap
 from .cmapdb import CMapBase
-from .psparser import PSLiteral, PSParserToken
+from .psparser import PSParserToken
 from .psparser import PSTypeError
 from .psparser import PSEOF
 from .psparser import PSKeyword
@@ -32,7 +32,8 @@
 from .pdffont import PDFCIDFont
 from .pdfcolor import PDFColorSpace
 from .pdfcolor import PREDEFINED_COLORSPACE
-from .utils import Matrix, choplist
+from .utils import Matrix, Point
+from .utils import choplist
 from .utils import mult_matrix
 from .utils import MATRIX_IDENTITY
 
@@ -56,16 +57,18 @@ class PDFInterpreterError(PDFException):
 
 
 class PDFTextState:
+    matrix: Matrix
+    linematrix: Point
 
     def __init__(self):
-        self.font = None
-        self.fontsize = 0
-        self.charspace = 0
-        self.wordspace = 0
-        self.scaling = 100
-        self.leading = 0
-        self.render = 0
-        self.rise = 0
+        self.font: Optional[PDFFont] = None
+        self.fontsize: float = 0
+        self.charspace: float = 0
+        self.wordspace: float = 0
+        self.scaling: float = 100
+        self.leading: float = 0
+        self.render: float = 0
+        self.rise: float = 0
         self.reset()
         # self.matrix is set
         # self.linematrix is set
@@ -102,7 +105,7 @@ def reset(self) -> None:
 class PDFGraphicState:
 
     def __init__(self):
-        self.linewidth = 0
+        self.linewidth: int = 0
         self.linecap = None
         self.linejoin = None
         self.miterlimit = None
@@ -220,6 +223,9 @@ class PDFContentParser(PSStackParser):
     def __init__(self, streams: List[Any]):
         self.streams = streams
         self.istream = 0
+        # PSStackParser.__init__(fp=None) is safe only because we've overloaded
+        # all the methods that would attempt to access self.fp without first
+        # calling self.fillfp().
         PSStackParser.__init__(self, None)  # type: ignore
         return
 
@@ -251,7 +257,8 @@ def fillbuf(self) -> None:
         self.charpos = 0
         return
 
-    def get_inline_data(self, pos: int, target: bytes = b'EI') -> Tuple[int, bytes]:
+    def get_inline_data(self, pos: int, target: bytes = b'EI'
+                        ) -> Tuple[int, bytes]:
         self.seek(pos)
         i = 0
         data = b''
@@ -324,19 +331,19 @@ def __init__(self, rsrcmgr: PDFResourceManager, device: PDFDevice):
         self.device = device
         return
 
-    def dup(self):
+    def dup(self) -> "PDFPageInterpreter":
         return self.__class__(self.rsrcmgr, self.device)
 
-    def init_resources(self, resources):
+    def init_resources(self, resources) -> None:
         """Prepare the fonts and XObjects listed in the Resource attribute."""
         self.resources = resources
         self.fontmap = {}
         self.xobjmap = {}
-        self.csmap = PREDEFINED_COLORSPACE.copy()
+        self.csmap: Dict[str, PDFColorSpace] = PREDEFINED_COLORSPACE.copy()
         if not resources:
             return
 
-        def get_colorspace(spec):
+        def get_colorspace(spec) -> Optional[PDFColorSpace]:
             if isinstance(spec, list):
                 name = literal_name(spec[0])
             else:
@@ -360,7 +367,9 @@ def get_colorspace(spec):
                     self.fontmap[fontid] = self.rsrcmgr.get_font(objid, spec)
             elif k == 'ColorSpace':
                 for (csid, spec) in dict_value(v).items():
-                    self.csmap[csid] = get_colorspace(resolve1(spec))
+                    colorspace = get_colorspace(resolve1(spec))
+                    if colorspace is not None:
+                        self.csmap[csid] = colorspace
             elif k == 'ProcSet':
                 self.rsrcmgr.get_procset(list_value(v))
             elif k == 'XObject':
@@ -370,7 +379,8 @@ def get_colorspace(spec):
 
     def init_state(self, ctm: Matrix) -> None:
         """Initialize the text and graphic states for rendering a page."""
-        self.gstack: List[Tuple[Matrix, PDFTextState, PDFGraphicState]] = []  # stack for graphical states.
+        # gstack: stack for graphical states.
+        self.gstack: List[Tuple[Matrix, PDFTextState, PDFGraphicState]] = []
         self.ctm = ctm
         self.device.set_ctm(self.ctm)
         self.textstate = PDFTextState()
@@ -379,7 +389,8 @@ def init_state(self, ctm: Matrix) -> None:
         # argstack: stack for command arguments.
         self.argstack: List[Any] = []
         # set some global states.
-        self.scs = self.ncs = None
+        self.scs: Optional[PDFColorSpace] = None
+        self.ncs: Optional[PDFColorSpace] = None
         if self.csmap:
             self.scs = self.ncs = next(iter(self.csmap.values()))
         return
@@ -395,26 +406,29 @@ def pop(self, n: int) -> Any:
         self.argstack = self.argstack[:-n]
         return x
 
-    def get_current_state(self) -> Tuple[Matrix, PDFTextState, PDFGraphicState]:
+    def get_current_state(self) -> Tuple[Matrix, PDFTextState,
+                                         PDFGraphicState]:
         return (self.ctm, self.textstate.copy(), self.graphicstate.copy())
 
-    def set_current_state(self, state: Tuple[Matrix, PDFTextState, PDFGraphicState]) -> None:
+    def set_current_state(self, state: Tuple[Matrix, PDFTextState,
+                                             PDFGraphicState]) -> None:
         (self.ctm, self.textstate, self.graphicstate) = state
         self.device.set_ctm(self.ctm)
         return
 
-    def do_q(self):
+    def do_q(self) -> None:
         """Save graphics state"""
         self.gstack.append(self.get_current_state())
         return
 
-    def do_Q(self):
+    def do_Q(self) -> None:
         """Restore graphics state"""
         if self.gstack:
             self.set_current_state(self.gstack.pop())
         return
 
-    def do_cm(self, a1, b1, c1, d1, e1, f1):
+    def do_cm(self, a1: float, b1: float, c1: float, d1: float, e1: float,
+              f1: float) -> None:
         """Concatenate matrix to current transformation matrix"""
         self.ctm = mult_matrix((a1, b1, c1, d1, e1, f1), self.ctm)
         self.device.set_ctm(self.ctm)
@@ -799,12 +813,13 @@ def do_T_a(self):
         self.textstate.linematrix = (0, 0)
         return
 
-    def do_TJ(self, seq):
+    def do_TJ(self, seq: Iterable):
         """Show text, allowing individual glyph positioning"""
         if self.textstate.font is None:
             if settings.STRICT:
                 raise PDFInterpreterError('No font specified!')
             return
+        assert self.ncs is not None
         self.device.render_string(self.textstate, seq, self.ncs,
                                   self.graphicstate.copy())
         return
@@ -902,7 +917,8 @@ def process_page(self, page: PDFPage) -> None:
         self.device.end_page(page)
         return
 
-    def render_contents(self, resources, streams, ctm: Matrix = MATRIX_IDENTITY):
+    def render_contents(self, resources, streams: Sequence,
+                        ctm: Matrix = MATRIX_IDENTITY) -> None:
         """Render the content streams.
 
         This method may be called recursively.
diff --git a/pdfminer/pdfpage.py b/pdfminer/pdfpage.py
index ae6d3399..b5b89a53 100644
--- a/pdfminer/pdfpage.py
+++ b/pdfminer/pdfpage.py
@@ -1,5 +1,5 @@
 import logging
-from typing import Any, Iterator
+from typing import Any, BinaryIO, Container, Iterator, List, Optional
 import warnings
 from . import settings
 from .psparser import LIT
@@ -67,7 +67,7 @@ def __init__(self, doc: PDFDocument, pageid: Any, attrs):
             contents = []
         if not isinstance(contents, list):
             contents = [contents]
-        self.contents = contents
+        self.contents: List = contents
         return
 
     def __repr__(self) -> str:
@@ -120,9 +120,10 @@ def search(obj, parent):
         return
 
     @classmethod
-    def get_pages(cls, fp,
-                  pagenos=None, maxpages: int = 0, password='',
-                  caching=True, check_extractable=False) -> Iterator["PDFPage"]:
+    def get_pages(cls, fp: BinaryIO,
+                  pagenos: Optional[Container[int]] = None, maxpages: int = 0,
+                  password: str = '', caching: bool = True,
+                  check_extractable: bool = False) -> Iterator["PDFPage"]:
         # Create a PDF parser object associated with the file object.
         parser = PDFParser(fp)
         # Create a PDF document object that stores the document structure.
diff --git a/pdfminer/pdfparser.py b/pdfminer/pdfparser.py
index ee64c2ee..18c04272 100644
--- a/pdfminer/pdfparser.py
+++ b/pdfminer/pdfparser.py
@@ -1,5 +1,6 @@
 import logging
 from io import BytesIO
+from typing import BinaryIO
 from .psparser import PSStackParser
 from .psparser import PSSyntaxError
 from .psparser import PSEOF
@@ -35,7 +36,7 @@ class PDFParser(PSStackParser):
 
     """
 
-    def __init__(self, fp):
+    def __init__(self, fp: BinaryIO):
         PSStackParser.__init__(self, fp)
         self.doc = None
         self.fallback = False
diff --git a/pdfminer/psparser.py b/pdfminer/psparser.py
index d419329d..caed19df 100644
--- a/pdfminer/psparser.py
+++ b/pdfminer/psparser.py
@@ -120,7 +120,7 @@ def intern(self, name: PSLiteral.NameType) -> _SymbolT:
 KEYWORD_DICT_END = KWD(b'>>')
 
 
-def literal_name(x: PSLiteral) -> PSLiteral.NameType:
+def literal_name(x: Any) -> Any:
     if not isinstance(x, PSLiteral):
         if settings.STRICT:
             raise PSTypeError('Literal required: {!r}'.format(x))
@@ -136,7 +136,7 @@ def literal_name(x: PSLiteral) -> PSLiteral.NameType:
     return name
 
 
-def keyword_name(x: PSKeyword) -> str:
+def keyword_name(x: Any) -> Any:
     if not isinstance(x, PSKeyword):
         if settings.STRICT:
             raise PSTypeError('Keyword required: %r' % x)
diff --git a/pdfminer/utils.py b/pdfminer/utils.py
index d33493d4..807ce11b 100644
--- a/pdfminer/utils.py
+++ b/pdfminer/utils.py
@@ -4,9 +4,12 @@
 import io
 import pathlib
 import struct
-from typing import Any, AnyStr, Dict, Iterable, Iterator, List, Set, Tuple
+from typing import (Any, Callable, Dict, Iterable, Iterator, List, Optional,
+                    Set, Tuple, TypeVar, Union, TYPE_CHECKING)
 from html import escape
-from .layout import LTComponent
+
+if TYPE_CHECKING:
+    from .layout import LTComponent
 
 import chardet  # For str encoding detection
 
@@ -42,13 +45,13 @@ def __exit__(self, exc_type, exc_val, exc_tb):
         return False
 
 
-def make_compat_bytes(in_str):
+def make_compat_bytes(in_str: str) -> bytes:
     "Converts to bytes, encoding to unicode."
     assert isinstance(in_str, str), str(type(in_str))
     return in_str.encode()
 
 
-def make_compat_str(in_str):
+def make_compat_str(in_str: Union[bytes, str]) -> str:
     """Converts to string, guessing encoding."""
     assert isinstance(in_str, (bytes, str)), str(type(in_str))
     if isinstance(in_str, bytes):
@@ -57,7 +60,7 @@ def make_compat_str(in_str):
     return in_str
 
 
-def shorten_str(s, size):
+def shorten_str(s: str, size: int) -> str:
     if size < 7:
         return s[:size]
     if len(s) > size:
@@ -67,8 +70,8 @@ def shorten_str(s, size):
         return s
 
 
-def compatible_encode_method(bytesorstring, encoding='utf-8',
-                             erraction='ignore'):
+def compatible_encode_method(bytesorstring: Union[bytes, str],
+                             encoding='utf-8', erraction='ignore') -> str:
     """When Py2 str.encode is called, it often means bytes.encode in Py3.
 
      This does either.
@@ -79,7 +82,8 @@ def compatible_encode_method(bytesorstring, encoding='utf-8',
     return bytesorstring.decode(encoding, erraction)
 
 
-def apply_png_predictor(pred, colors, columns, bitspercomponent, data):
+def apply_png_predictor(pred: Any, colors: int, columns: int,
+                        bitspercomponent: int, data: bytes) -> bytes:
     if bitspercomponent != 8:
         # unsupported
         raise ValueError("Unsupported `bitspercomponent': %d" %
@@ -164,7 +168,10 @@ def isnumber(x: Any) -> bool:
     return isinstance(x, (int, float))
 
 
-def uniq(objs: Iterable[Any]) -> Iterator[Any]:
+_T = TypeVar('_T')
+
+
+def uniq(objs: Iterable[_T]) -> Iterator[_T]:
     """Eliminates duplicated elements."""
     done = set()
     for obj in objs:
@@ -175,7 +182,8 @@ def uniq(objs: Iterable[Any]) -> Iterator[Any]:
     return
 
 
-def fsplit(pred, objs):
+def fsplit(pred: Callable[[_T], bool], objs: Iterable[_T]
+           ) -> Tuple[List[_T], List[_T]]:
     """Split a list into two classes according to the predicate."""
     t = []
     f = []
@@ -187,7 +195,7 @@ def fsplit(pred, objs):
     return t, f
 
 
-def drange(v0, v1, d):
+def drange(v0: float, v1: float, d: int) -> range:
     """Returns a discrete range."""
     return range(int(v0) // d, int(v1 + d) // d)
 
@@ -204,7 +212,8 @@ def get_bound(pts: Iterable[Point]) -> Rect:
     return x0, y0, x1, y1
 
 
-def pick(seq, func, maxobj=None):
+def pick(seq: Iterable[_T], func: Callable[[_T], float],
+         maxobj: Optional[_T] = None) -> Optional[_T]:
     """Picks the object obj where func(obj) has the highest value."""
     maxscore = None
     for obj in seq:
@@ -214,7 +223,7 @@ def pick(seq, func, maxobj=None):
     return maxobj
 
 
-def choplist(n, seq):
+def choplist(n: int, seq: Iterable[_T]) -> Iterator[Tuple[_T, ...]]:
     """Groups every n elements of the list."""
     r = []
     for x in seq:
@@ -288,7 +297,7 @@ def decode_text(s: bytes) -> str:
         return ''.join(PDFDocEncoding[c] for c in s)
 
 
-def enc(x):
+def enc(x: str) -> str:
     """Encodes a string for SGML/XML/HTML"""
     if isinstance(x, bytes):
         return ''
@@ -306,7 +315,7 @@ def matrix2str(m: Matrix) -> str:
         .format(a, b, c, d, e, f)
 
 
-def vecBetweenBoxes(obj1: LTComponent, obj2: LTComponent) -> Point:
+def vecBetweenBoxes(obj1: "LTComponent", obj2: "LTComponent") -> Point:
     """A distance function between two TextBoxes.
 
     Consider the bounding rectangle for obj1 and obj2.
@@ -341,9 +350,9 @@ class Plane:
     """
 
     def __init__(self, bbox: Rect, gridsize: int = 50):
-        self._seq: List[LTComponent] = []  # preserve the object order.
-        self._objs: Set[LTComponent] = set()
-        self._grid: Dict[Point, List[LTComponent]] = {}
+        self._seq: List["LTComponent"] = []  # preserve the object order.
+        self._objs: Set["LTComponent"] = set()
+        self._grid: Dict[Point, List["LTComponent"]] = {}
         self.gridsize = gridsize
         (self.x0, self.y0, self.x1, self.y1) = bbox
 
@@ -371,15 +380,15 @@ def _getrange(self, bbox: Rect) -> Iterator[Point]:
             for grid_x in drange(x0, x1, self.gridsize):
                 yield (grid_x, grid_y)
 
-    def extend(self, objs: Iterable[LTComponent]) -> None:
+    def extend(self, objs: Iterable["LTComponent"]) -> None:
         for obj in objs:
             self.add(obj)
 
-    def add(self, obj: LTComponent) -> None:
+    def add(self, obj: "LTComponent") -> None:
         """place an object."""
         for k in self._getrange((obj.x0, obj.y0, obj.x1, obj.y1)):
             if k not in self._grid:
-                r: List[LTComponent] = []
+                r: List["LTComponent"] = []
                 self._grid[k] = r
             else:
                 r = self._grid[k]
@@ -387,7 +396,7 @@ def add(self, obj: LTComponent) -> None:
         self._seq.append(obj)
         self._objs.add(obj)
 
-    def remove(self, obj: LTComponent) -> None:
+    def remove(self, obj: "LTComponent") -> None:
         """displace an object."""
         for k in self._getrange((obj.x0, obj.y0, obj.x1, obj.y1)):
             try:
@@ -396,7 +405,7 @@ def remove(self, obj: LTComponent) -> None:
                 pass
         self._objs.remove(obj)
 
-    def find(self, bbox: Rect) -> Iterator[LTComponent]:
+    def find(self, bbox: Rect) -> Iterator["LTComponent"]:
         """finds objects that are in a certain area."""
         (x0, y0, x1, y1) = bbox
         done = set()