Skip to content

Commit

Permalink
annotate a couple more scripts; document sketchy code
Browse files Browse the repository at this point in the history
  • Loading branch information
0xabu committed Sep 4, 2021
1 parent 291981f commit 4b689f1
Show file tree
Hide file tree
Showing 4 changed files with 31 additions and 24 deletions.
3 changes: 0 additions & 3 deletions mypy.ini
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,6 @@ disallow_untyped_calls = False
[mypy-cryptography.hazmat.*]
ignore_missing_imports = True

[mypy-hotshot.*]
ignore_missing_imports = True

[mypy-nose.*]
ignore_missing_imports = True

Expand Down
15 changes: 8 additions & 7 deletions tools/pdfdiff.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import io
import logging
import sys
from typing import Any, Iterable, List, Optional

import pdfminer.settings
from pdfminer import high_level, layout
Expand All @@ -16,7 +17,7 @@
logging.basicConfig()


def compare(file1, file2, **kwargs):
def compare(file1: str, file2: str, **kwargs: Any) -> Iterable[str]:
# If any LAParams group arguments were passed,
# create an LAParams object and
# populate with given args. Otherwise, set it to None.
Expand All @@ -26,7 +27,7 @@ def compare(file1, file2, **kwargs):
"char_margin", "line_margin", "boxes_flow"):
paramv = kwargs.get(param, None)
if paramv is not None:
laparams[param] = paramv
setattr(laparams, param, paramv)
kwargs['laparams'] = laparams

s1 = io.StringIO()
Expand All @@ -40,20 +41,20 @@ def compare(file1, file2, **kwargs):
import difflib
s1.seek(0)
s2.seek(0)
s1, s2 = s1.readlines(), s2.readlines()
s1_lines, s2_lines = s1.readlines(), s2.readlines()

import os.path
try:
extension = os.path.splitext(kwargs['outfile'])[1][1:4]
if extension.lower() == 'htm':
return difflib.HtmlDiff().make_file(s1, s2)
return difflib.HtmlDiff().make_file(s1_lines, s2_lines)
except KeyError:
pass
return difflib.unified_diff(s1, s2, n=kwargs['context_lines'])
return difflib.unified_diff(s1_lines, s2_lines, n=kwargs['context_lines'])


# main
def main(args=None):
def main(args: Optional[List[str]] = None) -> int:
import argparse
P = argparse.ArgumentParser(description=__doc__)
P.add_argument("file1", type=str, default=None, help="File 1 to compare.")
Expand Down Expand Up @@ -142,4 +143,4 @@ def main(args=None):


if __name__ == '__main__':
sys.exit(main()) # type: ignore[no-untyped-call]
sys.exit(main())
13 changes: 8 additions & 5 deletions tools/pdfstats.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import sys
import os
import collections
from typing import Any, Counter, Iterator, List

from pdfminer.pdfparser import PDFParser
from pdfminer.pdfdocument import PDFDocument, PDFTextExtractionNotAllowed
Expand All @@ -19,18 +20,18 @@
_, SCRIPT = os.path.split(__file__)


def msg(*args, **kwargs):
def msg(*args: Any, **kwargs: Any) -> None:
print(' '.join(map(str, args)), **kwargs) # noqa E999


def flat_iter(obj):
def flat_iter(obj: Any) -> Iterator[Any]:
yield obj
if isinstance(obj, LTContainer):
for ob in obj:
yield from flat_iter(ob)


def main(args):
def main(args: List[str]) -> int:
msg(SCRIPT, args)

if len(args) != 1:
Expand All @@ -40,7 +41,7 @@ def main(args):

infilename, = args

lt_types = collections.Counter()
lt_types: Counter[str] = collections.Counter()

with open(infilename, 'rb') as pdf_file:

Expand Down Expand Up @@ -77,6 +78,8 @@ def main(args):
msg('page_count', page_count)
msg('lt_types:', ' '.join('{}:{}'.format(*tc) for tc in lt_types.items()))

return 0


if __name__ == '__main__':
sys.exit(main(sys.argv[1:])) # type: ignore[no-untyped-call]
sys.exit(main(sys.argv[1:]))
24 changes: 15 additions & 9 deletions tools/prof.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
#!/usr/bin/env python3
import sys
from typing import List


def prof_main(argv):
import hotshot.stats
def prof_main(argv: List[str]) -> int:
import hotshot.stats # type: ignore[import]

def usage():
def usage() -> int:
print('usage: %s module.function [args ...]' % argv[0])
return 100
args = argv[1:]
Expand All @@ -15,20 +16,25 @@ def usage():
prof = name+'.prof'
i = name.rindex('.')
(modname, funcname) = (name[:i], name[i+1:])
module = __import__(modname, fromlist=1)

# Type error: fromlist expects sequence of strings; presumably the intent
# is to retrieve the named module rather than a top-level package (as in
# "when a non-empty fromlist argument is given...").
module = __import__(modname, fromlist=1) # type: ignore[arg-type]

func = getattr(module, funcname)
if args:
args.insert(0, argv[0])
prof = hotshot.Profile(prof)
prof.runcall(lambda: func(args))
prof.close()
profile = hotshot.Profile(prof)
profile.runcall(lambda: func(args))
profile.close()
else:
stats = hotshot.stats.load(prof)
stats.strip_dirs()
stats.sort_stats('time', 'calls')
stats.print_stats(1000)
return
return 0


if __name__ == '__main__':
sys.exit(prof_main(sys.argv)) # type: ignore[no-untyped-call]
sys.exit(prof_main(sys.argv))

0 comments on commit 4b689f1

Please sign in to comment.