finish annotating, and disallow_untyped_defs for pdfminer.* _except_ …

…ccitt and jbig2
0xabu · Sep 4, 2021 · 96b2043 · 96b2043
1 parent 0ab5863
commit 96b2043
Show file tree

Hide file tree

Showing 11 changed files with 141 additions and 103 deletions.
diff --git a/mypy.ini b/mypy.ini
@@ -3,9 +3,7 @@ warn_unused_configs = True
 disallow_any_generics = True
 disallow_subclassing_any = True
 disallow_untyped_calls = True
-#disallow_untyped_defs = True
 disallow_incomplete_defs = True
-#check_untyped_defs = True
 disallow_untyped_decorators = True
 no_implicit_optional = True
 warn_redundant_casts = True
@@ -14,8 +12,15 @@ warn_return_any = True
 no_implicit_reexport = True
 strict_equality = True
 
+[mypy-pdfminer.*]
+disallow_untyped_defs = True
+
 [mypy-pdfminer.ccitt]
 disallow_untyped_calls = False
+disallow_untyped_defs = False
+
+[mypy-pdfminer.jbig2]
+disallow_untyped_defs = False
 
 [mypy-cryptography.hazmat.*]
 ignore_missing_imports = True

diff --git a/pdfminer/cmapdb.py b/pdfminer/cmapdb.py
@@ -73,7 +73,7 @@ def __init__(self, **kwargs: Union[str, int]):
         self.code2cid: Dict[int, Any] = {}
         return
 
-    def __repr__(self):
+    def __repr__(self) -> str:
         return '<CMap: %s>' % self.attrs.get('CMapName')
 
     def use_cmap(self, cmap: CMapBase) -> None:
@@ -145,7 +145,7 @@ def __init__(self, **kwargs: Union[str, int]):
         self.cid2unichr: Dict[int, str] = {}
         return
 
-    def __repr__(self):
+    def __repr__(self) -> str:
         return '<UnicodeMap: %s>' % self.attrs.get('CMapName')
 
     def get_unichr(self, cid: int) -> str:

diff --git a/pdfminer/converter.py b/pdfminer/converter.py
@@ -2,11 +2,11 @@
 import logging
 from pdfminer.pdfcolor import PDFColorSpace
 from typing import (Any, BinaryIO, Dict, Generic, List, Optional, Sequence,
-                    TextIO, Tuple, TypeVar, cast)
+                    TextIO, Tuple, TypeVar, Union, cast)
 import re
 
 from . import utils
-from .layout import LAParams
+from .layout import LAParams, LTComponent, TextGroupElement
 from .layout import LTChar
 from .layout import LTContainer
 from .layout import LTCurve
@@ -230,15 +230,16 @@ def __init__(self,
         self.imagewriter = imagewriter
         return
 
-    def write_text(self, text):
+    def write_text(self, text: str) -> None:
         text = utils.compatible_encode_method(text, self.codec, 'ignore')
         if self.outfp_binary:
-            text = text.encode()
-        self.outfp.write(text)
+            cast(BinaryIO, self.outfp).write(text.encode())
+        else:
+            cast(TextIO, self.outfp).write(text)
         return
 
-    def receive_layout(self, ltpage):
-        def render(item):
+    def receive_layout(self, ltpage: LTPage) -> None:
+        def render(item: LTItem) -> None:
             if isinstance(item, LTContainer):
                 for child in item:
                     render(child)
@@ -258,13 +259,14 @@ def render(item):
     # Some dummy functions to save memory/CPU when all that is wanted
     # is text.  This stops all the image and drawing output from being
     # recorded and taking up RAM.
-    def render_image(self, name, stream):
+    def render_image(self, name: str, stream: PDFStream) -> None:
         if self.imagewriter is None:
             return
         PDFConverter.render_image(self, name, stream)
         return
 
-    def paint_path(self, gstate, stroke, fill, evenodd, path):
+    def paint_path(self, gstate: PDFGraphicState, stroke: bool, fill: bool,
+                   evenodd: bool, path: Sequence[PathSegment]) -> None:
         return
 
 
@@ -321,9 +323,9 @@ def __init__(self,
         if debug:
             self.rect_colors.update(self.RECT_COLORS)
             self.text_colors.update(self.TEXT_COLORS)
-        self._yoffset = self.pagemargin
+        self._yoffset: float = self.pagemargin
         self._font: Optional[Tuple[str, float]] = None
-        self._fontstack: List[Tuple[str, float]] = []
+        self._fontstack: List[Optional[Tuple[str, float]]] = []
         self.write_header()
         return
 
@@ -358,24 +360,27 @@ def write_text(self, text: str) -> None:
         self.write(enc(text))
         return
 
-    def place_rect(self, color, borderwidth, x, y, w, h):
-        color = self.rect_colors.get(color)
-        if color is not None:
+    def place_rect(self, color: str, borderwidth: int, x: float, y: float,
+                   w: float, h: float) -> None:
+        color2 = self.rect_colors.get(color)
+        if color2 is not None:
             s = '<span style="position:absolute; border: %s %dpx solid; ' \
                 'left:%dpx; top:%dpx; width:%dpx; height:%dpx;"></span>\n' % \
-                (color, borderwidth, x * self.scale,
+                (color2, borderwidth, x * self.scale,
                  (self._yoffset - y) * self.scale, w * self.scale,
                  h * self.scale)
             self.write(
                 s)
         return
 
-    def place_border(self, color, borderwidth, item):
+    def place_border(self, color: str, borderwidth: int, item: LTComponent
+                     ) -> None:
         self.place_rect(color, borderwidth, item.x0, item.y1, item.width,
                         item.height)
         return
 
-    def place_image(self, item, borderwidth, x, y, w, h):
+    def place_image(self, item: LTImage, borderwidth: int, x: float, y: float,
+                    w: float, h: float) -> None:
         if self.imagewriter is not None:
             name = self.imagewriter.export_image(item)
             s = '<img src="%s" border="%d" style="position:absolute; ' \
@@ -386,19 +391,21 @@ def place_image(self, item, borderwidth, x, y, w, h):
             self.write(s)
         return
 
-    def place_text(self, color, text, x, y, size):
-        color = self.text_colors.get(color)
-        if color is not None:
+    def place_text(self, color: str, text: str, x: float, y: float, size: float
+                   ) -> None:
+        color2 = self.text_colors.get(color)
+        if color2 is not None:
             s = '<span style="position:absolute; color:%s; left:%dpx; ' \
                 'top:%dpx; font-size:%dpx;">' % \
-                (color, x * self.scale, (self._yoffset - y) * self.scale,
+                (color2, x * self.scale, (self._yoffset - y) * self.scale,
                  size * self.scale * self.fontscale)
             self.write(s)
             self.write_text(text)
             self.write('</span>\n')
         return
 
-    def begin_div(self, color, borderwidth, x, y, w, h, writing_mode=False):
+    def begin_div(self, color: str, borderwidth: int, x: float, y: float,
+                  w: float, h: float, writing_mode: str = 'False') -> None:
         self._fontstack.append(self._font)
         self._font = None
         s = '<div style="position:absolute; border: %s %dpx solid; ' \
@@ -434,15 +441,16 @@ def put_newline(self) -> None:
         self.write('<br>')
         return
 
-    def receive_layout(self, ltpage):
-        def show_group(item):
+    def receive_layout(self, ltpage: LTPage) -> None:
+        def show_group(item: Union[LTTextGroup, TextGroupElement]) -> None:
             if isinstance(item, LTTextGroup):
                 self.place_border('textgroup', 1, item)
                 for child in item:
                     show_group(child)
             return
 
-        def render(item):
+        def render(item: LTItem) -> None:
+            child: LTItem
             if isinstance(item, LTPage):
                 self._yoffset += item.y1
                 self.place_border('page', 1, item)
@@ -573,6 +581,7 @@ def show_group(item: LTItem) -> None:
             return
 
         def render(item: LTItem) -> None:
+            child: LTItem
             if isinstance(item, LTPage):
                 s = '<page id="%s" bbox="%s" rotate="%d">\n' % \
                     (item.pageid, bbox2str(item.bbox), item.rotate)
@@ -631,8 +640,7 @@ def render(item: LTItem) -> None:
                 self.write('<text>%s</text>\n' % item.get_text())
             elif isinstance(item, LTImage):
                 if self.imagewriter is not None:
-                    name = (self.imagewriter.  # type: ignore[no-untyped-call]
-                            export_image(item))
+                    name = self.imagewriter.export_image(item)
                     self.write('<image src="%s" width="%d" height="%d" />\n' %
                                (enc(name), item.width, item.height))
                 else:

diff --git a/pdfminer/image.py b/pdfminer/image.py
@@ -2,20 +2,22 @@
 import os.path
 import struct
 from io import BytesIO
+from typing import BinaryIO, Tuple
 
 from .jbig2 import JBIG2StreamReader, JBIG2StreamWriter
+from .layout import LTImage
 from .pdfcolor import LITERAL_DEVICE_CMYK
 from .pdfcolor import LITERAL_DEVICE_GRAY
 from .pdfcolor import LITERAL_DEVICE_RGB
 from .pdftypes import LITERALS_DCT_DECODE, LITERALS_JBIG2_DECODE
 
 
-def align32(x):
+def align32(x: int) -> int:
     return ((x+3)//4)*4
 
 
 class BMPWriter:
-    def __init__(self, fp, bits, width, height):
+    def __init__(self, fp: BinaryIO, bits: int, width: int, height: int):
         self.fp = fp
         self.bits = bits
         self.width = width
@@ -51,7 +53,7 @@ def __init__(self, fp, bits, width, height):
         self.pos1 = self.pos0 + self.datasize
         return
 
-    def write_line(self, y, data):
+    def write_line(self, y: int, data: bytes) -> None:
         self.fp.seek(self.pos1 - (y+1)*self.linesize)
         self.fp.write(data)
         return
@@ -69,7 +71,7 @@ def __init__(self, outdir: str):
             os.makedirs(self.outdir)
         return
 
-    def export_image(self, image):
+    def export_image(self, image: LTImage) -> str:
         (width, height) = image.srcsize
 
         is_jbig2 = self.is_jbig2_image(image)
@@ -80,6 +82,7 @@ def export_image(self, image):
         fp = open(path, 'wb')
         if ext == '.jpg':
             raw_data = image.stream.get_rawdata()
+            assert raw_data is not None
             if LITERAL_DEVICE_CMYK in image.colorspace:
                 from PIL import Image  # type: ignore[import]
                 from PIL import ImageChops
@@ -94,11 +97,12 @@ def export_image(self, image):
             input_stream = BytesIO()
             input_stream.write(image.stream.get_data())
             input_stream.seek(0)
-            reader = JBIG2StreamReader(input_stream)
-            segments = reader.get_segments()
+            reader = \
+                JBIG2StreamReader(input_stream)  # type:ignore[no-untyped-call]
+            segments = reader.get_segments()  # type: ignore[no-untyped-call]
 
-            writer = JBIG2StreamWriter(fp)
-            writer.write_file(segments)
+            writer = JBIG2StreamWriter(fp)  # type: ignore[no-untyped-call]
+            writer.write_file(segments)  # type: ignore[no-untyped-call]
         elif image.bits == 1:
             bmp = BMPWriter(fp, 1, width, height)
             data = image.stream.get_data()
@@ -128,7 +132,7 @@ def export_image(self, image):
         return name
 
     @staticmethod
-    def is_jbig2_image(image):
+    def is_jbig2_image(image: LTImage) -> bool:
         filters = image.stream.get_filters()
         is_jbig2 = False
         for filter_name, params in filters:
@@ -138,7 +142,8 @@ def is_jbig2_image(image):
         return is_jbig2
 
     @staticmethod
-    def _get_image_extension(image, width, height, is_jbig2):
+    def _get_image_extension(image: LTImage, width: int, height: int,
+                             is_jbig2: bool) -> str:
         filters = image.stream.get_filters()
         if len(filters) == 1 and filters[0][0] in LITERALS_DCT_DECODE:
             ext = '.jpg'
@@ -154,7 +159,8 @@ def _get_image_extension(image, width, height, is_jbig2):
         return ext
 
     @staticmethod
-    def _create_unique_image_name(dirname, image_name, ext):
+    def _create_unique_image_name(dirname: str, image_name: str, ext: str
+                                  ) -> Tuple[str, str]:
         name = image_name + ext
         path = os.path.join(dirname, name)
         img_index = 0