Skip to content

Commit

Permalink
Use Latin-1-decoded strings instead of bytestrings in pdfrw streams
Browse files Browse the repository at this point in the history
Fix #558.
  • Loading branch information
liZe committed Jan 30, 2018
1 parent b24704f commit a5bbc22
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 3 deletions.
8 changes: 5 additions & 3 deletions weasyprint/pdf.py
Expand Up @@ -89,13 +89,15 @@ def _create_compressed_file_object(source):

pdf_file_object = PdfDict(
Type=PdfName('EmbeddedFile'), Filter=PdfName('FlateDecode'))
pdf_file_object.stream = b''

# pdfrw needs Latin-1-decoded unicode strings in object.stream
pdf_file_object.stream = ''
size = 0
for data in iter(lambda: source.read(4096), b''):
size += len(data)
md5.update(data)
pdf_file_object.stream += compress.compress(data)
pdf_file_object.stream += compress.flush(zlib.Z_FINISH)
pdf_file_object.stream += compress.compress(data).decode('latin-1')
pdf_file_object.stream += compress.flush(zlib.Z_FINISH).decode('latin-1')
pdf_file_object.Params = PdfDict(
CheckSum=PdfString('<{}>'.format(md5.hexdigest())), Size=size)
return pdf_file_object
Expand Down
13 changes: 13 additions & 0 deletions weasyprint/tests/test_pdf.py
Expand Up @@ -15,6 +15,7 @@
import hashlib
import io
import os
import zlib

import cairocffi
import pytest
Expand Down Expand Up @@ -427,27 +428,39 @@ def test_embedded_files():
pdf = PdfReader(fdata=pdf_bytes)
embedded = pdf.Root.Names.EmbeddedFiles.Names

assert zlib.decompress(
embedded[1].EF.F.stream.encode('latin-1')) == b'hi there'
assert embedded[1].EF.F.Params.CheckSum == (
'<{}>'.format(hashlib.md5(b'hi there').hexdigest()))
assert embedded[1].F.decode() == ''
assert embedded[1].UF.decode() == 'attachment.bin'
assert embedded[1].Desc.decode() == 'some file attachment äöü'

assert zlib.decompress(
embedded[3].EF.F.stream.encode('latin-1')) == b'12345678'
assert embedded[3].EF.F.Params.CheckSum == (
'<{}>'.format(hashlib.md5(adata).hexdigest()))
assert embedded[3].UF.decode() == os.path.basename(absolute_tmp_file)

assert zlib.decompress(
embedded[5].EF.F.stream.encode('latin-1')) == b'abcdefgh'
assert embedded[5].EF.F.Params.CheckSum == (
'<{}>'.format(hashlib.md5(rdata).hexdigest()))
assert embedded[5].UF.decode() == os.path.basename(relative_tmp_file)

assert zlib.decompress(
embedded[7].EF.F.stream.encode('latin-1')) == b'oob attachment'
assert embedded[7].EF.F.Params.CheckSum == (
'<{}>'.format(hashlib.md5(b'oob attachment').hexdigest()))
assert embedded[7].Desc.decode() == 'Hello'

assert zlib.decompress(
embedded[9].EF.F.stream.encode('latin-1')) == b'raw URL'
assert embedded[9].EF.F.Params.CheckSum == (
'<{}>'.format(hashlib.md5(b'raw URL').hexdigest()))

assert zlib.decompress(
embedded[11].EF.F.stream.encode('latin-1')) == b'file like obj'
assert embedded[11].EF.F.Params.CheckSum == (
'<{}>'.format(hashlib.md5(b'file like obj').hexdigest()))

Expand Down

0 comments on commit a5bbc22

Please sign in to comment.