Skip to content

Commit

Permalink
finish annotating, and disallow_untyped_defs for pdfminer.* _except_ …
Browse files Browse the repository at this point in the history
…ccitt and jbig2
  • Loading branch information
0xabu committed Sep 4, 2021
1 parent 0ab5863 commit 96b2043
Show file tree
Hide file tree
Showing 11 changed files with 141 additions and 103 deletions.
9 changes: 7 additions & 2 deletions mypy.ini
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,7 @@ warn_unused_configs = True
disallow_any_generics = True
disallow_subclassing_any = True
disallow_untyped_calls = True
#disallow_untyped_defs = True
disallow_incomplete_defs = True
#check_untyped_defs = True
disallow_untyped_decorators = True
no_implicit_optional = True
warn_redundant_casts = True
Expand All @@ -14,8 +12,15 @@ warn_return_any = True
no_implicit_reexport = True
strict_equality = True

[mypy-pdfminer.*]
disallow_untyped_defs = True

[mypy-pdfminer.ccitt]
disallow_untyped_calls = False
disallow_untyped_defs = False

[mypy-pdfminer.jbig2]
disallow_untyped_defs = False

[mypy-cryptography.hazmat.*]
ignore_missing_imports = True
Expand Down
4 changes: 2 additions & 2 deletions pdfminer/cmapdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ def __init__(self, **kwargs: Union[str, int]):
self.code2cid: Dict[int, Any] = {}
return

def __repr__(self):
def __repr__(self) -> str:
return '<CMap: %s>' % self.attrs.get('CMapName')

def use_cmap(self, cmap: CMapBase) -> None:
Expand Down Expand Up @@ -145,7 +145,7 @@ def __init__(self, **kwargs: Union[str, int]):
self.cid2unichr: Dict[int, str] = {}
return

def __repr__(self):
def __repr__(self) -> str:
return '<UnicodeMap: %s>' % self.attrs.get('CMapName')

def get_unichr(self, cid: int) -> str:
Expand Down
62 changes: 35 additions & 27 deletions pdfminer/converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,11 @@
import logging
from pdfminer.pdfcolor import PDFColorSpace
from typing import (Any, BinaryIO, Dict, Generic, List, Optional, Sequence,
TextIO, Tuple, TypeVar, cast)
TextIO, Tuple, TypeVar, Union, cast)
import re

from . import utils
from .layout import LAParams
from .layout import LAParams, LTComponent, TextGroupElement
from .layout import LTChar
from .layout import LTContainer
from .layout import LTCurve
Expand Down Expand Up @@ -230,15 +230,16 @@ def __init__(self,
self.imagewriter = imagewriter
return

def write_text(self, text):
def write_text(self, text: str) -> None:
text = utils.compatible_encode_method(text, self.codec, 'ignore')
if self.outfp_binary:
text = text.encode()
self.outfp.write(text)
cast(BinaryIO, self.outfp).write(text.encode())
else:
cast(TextIO, self.outfp).write(text)
return

def receive_layout(self, ltpage):
def render(item):
def receive_layout(self, ltpage: LTPage) -> None:
def render(item: LTItem) -> None:
if isinstance(item, LTContainer):
for child in item:
render(child)
Expand All @@ -258,13 +259,14 @@ def render(item):
# Some dummy functions to save memory/CPU when all that is wanted
# is text. This stops all the image and drawing output from being
# recorded and taking up RAM.
def render_image(self, name, stream):
def render_image(self, name: str, stream: PDFStream) -> None:
if self.imagewriter is None:
return
PDFConverter.render_image(self, name, stream)
return

def paint_path(self, gstate, stroke, fill, evenodd, path):
def paint_path(self, gstate: PDFGraphicState, stroke: bool, fill: bool,
evenodd: bool, path: Sequence[PathSegment]) -> None:
return


Expand Down Expand Up @@ -321,9 +323,9 @@ def __init__(self,
if debug:
self.rect_colors.update(self.RECT_COLORS)
self.text_colors.update(self.TEXT_COLORS)
self._yoffset = self.pagemargin
self._yoffset: float = self.pagemargin
self._font: Optional[Tuple[str, float]] = None
self._fontstack: List[Tuple[str, float]] = []
self._fontstack: List[Optional[Tuple[str, float]]] = []
self.write_header()
return

Expand Down Expand Up @@ -358,24 +360,27 @@ def write_text(self, text: str) -> None:
self.write(enc(text))
return

def place_rect(self, color, borderwidth, x, y, w, h):
color = self.rect_colors.get(color)
if color is not None:
def place_rect(self, color: str, borderwidth: int, x: float, y: float,
w: float, h: float) -> None:
color2 = self.rect_colors.get(color)
if color2 is not None:
s = '<span style="position:absolute; border: %s %dpx solid; ' \
'left:%dpx; top:%dpx; width:%dpx; height:%dpx;"></span>\n' % \
(color, borderwidth, x * self.scale,
(color2, borderwidth, x * self.scale,
(self._yoffset - y) * self.scale, w * self.scale,
h * self.scale)
self.write(
s)
return

def place_border(self, color, borderwidth, item):
def place_border(self, color: str, borderwidth: int, item: LTComponent
) -> None:
self.place_rect(color, borderwidth, item.x0, item.y1, item.width,
item.height)
return

def place_image(self, item, borderwidth, x, y, w, h):
def place_image(self, item: LTImage, borderwidth: int, x: float, y: float,
w: float, h: float) -> None:
if self.imagewriter is not None:
name = self.imagewriter.export_image(item)
s = '<img src="%s" border="%d" style="position:absolute; ' \
Expand All @@ -386,19 +391,21 @@ def place_image(self, item, borderwidth, x, y, w, h):
self.write(s)
return

def place_text(self, color, text, x, y, size):
color = self.text_colors.get(color)
if color is not None:
def place_text(self, color: str, text: str, x: float, y: float, size: float
) -> None:
color2 = self.text_colors.get(color)
if color2 is not None:
s = '<span style="position:absolute; color:%s; left:%dpx; ' \
'top:%dpx; font-size:%dpx;">' % \
(color, x * self.scale, (self._yoffset - y) * self.scale,
(color2, x * self.scale, (self._yoffset - y) * self.scale,
size * self.scale * self.fontscale)
self.write(s)
self.write_text(text)
self.write('</span>\n')
return

def begin_div(self, color, borderwidth, x, y, w, h, writing_mode=False):
def begin_div(self, color: str, borderwidth: int, x: float, y: float,
w: float, h: float, writing_mode: str = 'False') -> None:
self._fontstack.append(self._font)
self._font = None
s = '<div style="position:absolute; border: %s %dpx solid; ' \
Expand Down Expand Up @@ -434,15 +441,16 @@ def put_newline(self) -> None:
self.write('<br>')
return

def receive_layout(self, ltpage):
def show_group(item):
def receive_layout(self, ltpage: LTPage) -> None:
def show_group(item: Union[LTTextGroup, TextGroupElement]) -> None:
if isinstance(item, LTTextGroup):
self.place_border('textgroup', 1, item)
for child in item:
show_group(child)
return

def render(item):
def render(item: LTItem) -> None:
child: LTItem
if isinstance(item, LTPage):
self._yoffset += item.y1
self.place_border('page', 1, item)
Expand Down Expand Up @@ -573,6 +581,7 @@ def show_group(item: LTItem) -> None:
return

def render(item: LTItem) -> None:
child: LTItem
if isinstance(item, LTPage):
s = '<page id="%s" bbox="%s" rotate="%d">\n' % \
(item.pageid, bbox2str(item.bbox), item.rotate)
Expand Down Expand Up @@ -631,8 +640,7 @@ def render(item: LTItem) -> None:
self.write('<text>%s</text>\n' % item.get_text())
elif isinstance(item, LTImage):
if self.imagewriter is not None:
name = (self.imagewriter. # type: ignore[no-untyped-call]
export_image(item))
name = self.imagewriter.export_image(item)
self.write('<image src="%s" width="%d" height="%d" />\n' %
(enc(name), item.width, item.height))
else:
Expand Down
28 changes: 17 additions & 11 deletions pdfminer/image.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,20 +2,22 @@
import os.path
import struct
from io import BytesIO
from typing import BinaryIO, Tuple

from .jbig2 import JBIG2StreamReader, JBIG2StreamWriter
from .layout import LTImage
from .pdfcolor import LITERAL_DEVICE_CMYK
from .pdfcolor import LITERAL_DEVICE_GRAY
from .pdfcolor import LITERAL_DEVICE_RGB
from .pdftypes import LITERALS_DCT_DECODE, LITERALS_JBIG2_DECODE


def align32(x):
def align32(x: int) -> int:
return ((x+3)//4)*4


class BMPWriter:
def __init__(self, fp, bits, width, height):
def __init__(self, fp: BinaryIO, bits: int, width: int, height: int):
self.fp = fp
self.bits = bits
self.width = width
Expand Down Expand Up @@ -51,7 +53,7 @@ def __init__(self, fp, bits, width, height):
self.pos1 = self.pos0 + self.datasize
return

def write_line(self, y, data):
def write_line(self, y: int, data: bytes) -> None:
self.fp.seek(self.pos1 - (y+1)*self.linesize)
self.fp.write(data)
return
Expand All @@ -69,7 +71,7 @@ def __init__(self, outdir: str):
os.makedirs(self.outdir)
return

def export_image(self, image):
def export_image(self, image: LTImage) -> str:
(width, height) = image.srcsize

is_jbig2 = self.is_jbig2_image(image)
Expand All @@ -80,6 +82,7 @@ def export_image(self, image):
fp = open(path, 'wb')
if ext == '.jpg':
raw_data = image.stream.get_rawdata()
assert raw_data is not None
if LITERAL_DEVICE_CMYK in image.colorspace:
from PIL import Image # type: ignore[import]
from PIL import ImageChops
Expand All @@ -94,11 +97,12 @@ def export_image(self, image):
input_stream = BytesIO()
input_stream.write(image.stream.get_data())
input_stream.seek(0)
reader = JBIG2StreamReader(input_stream)
segments = reader.get_segments()
reader = \
JBIG2StreamReader(input_stream) # type:ignore[no-untyped-call]
segments = reader.get_segments() # type: ignore[no-untyped-call]

writer = JBIG2StreamWriter(fp)
writer.write_file(segments)
writer = JBIG2StreamWriter(fp) # type: ignore[no-untyped-call]
writer.write_file(segments) # type: ignore[no-untyped-call]
elif image.bits == 1:
bmp = BMPWriter(fp, 1, width, height)
data = image.stream.get_data()
Expand Down Expand Up @@ -128,7 +132,7 @@ def export_image(self, image):
return name

@staticmethod
def is_jbig2_image(image):
def is_jbig2_image(image: LTImage) -> bool:
filters = image.stream.get_filters()
is_jbig2 = False
for filter_name, params in filters:
Expand All @@ -138,7 +142,8 @@ def is_jbig2_image(image):
return is_jbig2

@staticmethod
def _get_image_extension(image, width, height, is_jbig2):
def _get_image_extension(image: LTImage, width: int, height: int,
is_jbig2: bool) -> str:
filters = image.stream.get_filters()
if len(filters) == 1 and filters[0][0] in LITERALS_DCT_DECODE:
ext = '.jpg'
Expand All @@ -154,7 +159,8 @@ def _get_image_extension(image, width, height, is_jbig2):
return ext

@staticmethod
def _create_unique_image_name(dirname, image_name, ext):
def _create_unique_image_name(dirname: str, image_name: str, ext: str
) -> Tuple[str, str]:
name = image_name + ext
path = os.path.join(dirname, name)
img_index = 0
Expand Down
Loading

0 comments on commit 96b2043

Please sign in to comment.