Skip to content

Commit

Permalink
Ghostcript 9.21 seems to have a regression related to Unicode metadata
Browse files Browse the repository at this point in the history
  • Loading branch information
James R. Barlow committed Mar 24, 2017
1 parent 8ddbe81 commit 199de96
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 3 deletions.
2 changes: 1 addition & 1 deletion RELEASE_NOTES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ OCRmyPDF uses `semantic versioning <http://semver.org/>`_.
v4.5.2
======

- Fix issue #147, unpaper loses DPI information, which affects PDF rendering with ``--pdf-renderer tess4``
- Fix issue #147. ``--pdf-renderer tess4 --clean`` will produce an oversized page containing the original image in the bottom left corner, due to loss DPI information.
- Make "using Tesseract 4.0" warning less ominous
- Set up machinery for homebrew OCRmyPDF tap

Expand Down
22 changes: 21 additions & 1 deletion ocrmypdf/exec/ghostscript.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,32 @@
# © 2015 James R. Barlow: github.com/jbarlow83

from tempfile import NamedTemporaryFile
from subprocess import Popen, PIPE, STDOUT, check_call
from subprocess import Popen, PIPE, STDOUT, check_call, CalledProcessError, \
check_output
from shutil import copy
from functools import lru_cache
from . import get_program
from ..pdfa import SRGB_ICC_PROFILE


@lru_cache(maxsize=1)
def version():
args_gs = [
get_program('gs'),
'--version'
]
try:
version = check_output(
args_gs, close_fds=True, universal_newlines=True,
stderr=STDOUT)
except CalledProcessError as e:
print("Could not find Ghostscript executable on system PATH.",
file=sys.stderr)
raise MissingDependencyError from e

return version.strip()


def rasterize_pdf(input_file, output_file, xres, yres, raster_device, log,
pageno=1):
with NamedTemporaryFile(delete=True) as tmp:
Expand Down
7 changes: 6 additions & 1 deletion tests/test_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from ocrmypdf.exceptions import ExitCode
from ocrmypdf import leptonica
from ocrmypdf.pdfa import file_claims_pdfa
from ocrmypdf.exec import ghostscript


check_ocrmypdf = pytest.helpers.check_ocrmypdf
Expand Down Expand Up @@ -172,6 +173,10 @@ def test_preserve_metadata(spoof_tesseract_noop, output_type,
@pytest.mark.skipif(
pytest.helpers.is_macos() and pytest.helpers.running_in_travis(),
reason="save Travis the trouble of installing poppler")
@pytest.mark.xfail(
ghostscript.version() == '9.21',
reason="gs 9.21 has a regression that affects this"
)
@pytest.mark.parametrize("output_type", [
'pdfa', 'pdf'
])
Expand All @@ -191,7 +196,7 @@ def test_override_metadata(spoof_tesseract_noop, output_type, resources,
'--output-type', output_type,
env=spoof_tesseract_noop)

assert p.returncode == ExitCode.ok
assert p.returncode == ExitCode.ok, err

pdf = str(outpdf)

Expand Down

0 comments on commit 199de96

Please sign in to comment.