Permalink
Browse files

Test the PDF parser (but not writer)

  • Loading branch information...
1 parent b1b5d85 commit 1b3f7d478fd62ad23a07b43313ea5690414dfe81 @SimonSapin SimonSapin committed May 20, 2012
Showing with 29 additions and 25 deletions.
  1. +4 −25 weasyprint/pdf.py
  2. +25 −0 weasyprint/tests/test_pdf.py
View
@@ -34,7 +34,6 @@
import os
import re
import string
-import itertools
from . import VERSION_STRING
from .compat import xrange, iteritems
@@ -80,7 +79,7 @@ def __repr__(self):
_re_cache = {}
- def _get_value(self, key, value_re):
+ def get_value(self, key, value_re):
regex = self._re_cache.get((key, value_re))
if not regex:
regex = re.compile(pdf_format('/{0} {1}', key, value_re))
@@ -93,7 +92,7 @@ def get_type(self):
"""
# No end delimiter, + defaults to greedy
- return self._get_value('Type', '/(\w+)').decode('ascii')
+ return self.get_value('Type', '/(\w+)').decode('ascii')
def get_indirect_dict(self, key, pdf_file):
"""Read the value for `key` and follow the reference, assuming
@@ -102,7 +101,7 @@ def get_indirect_dict(self, key, pdf_file):
:return: a new PDFDictionary instance.
"""
- object_number = int(self._get_value(key, '(\d+) 0 R'))
+ object_number = int(self.get_value(key, '(\d+) 0 R'))
return type(self)(object_number, pdf_file.read_object(object_number))
def get_indirect_dict_array(self, key, pdf_file):
@@ -112,7 +111,7 @@ def get_indirect_dict_array(self, key, pdf_file):
:return: a list of new PDFDictionary instance.
"""
- parts = self._get_value(key, '\[([^\]]+)\]').split(b' 0 R')
+ parts = self.get_value(key, '\[(.+?)\]').split(b' 0 R')
# The array looks like this: ' <a> 0 R <b> 0 R <c> 0 R '
# so `parts` ends up like this [' <a>', ' <b>', ' <c>', ' ']
# With the trailing white space in the list.
@@ -344,23 +343,3 @@ def add_pdf_metadata(fileobj, links, destinations, bookmarks):
'{0} 0 R'.format(n) for n in annotations)))
pdf.finish()
-
-
-def test():
- import cairo
- import io
- fileobj = io.BytesIO()
- surface = cairo.PDFSurface(fileobj, 100, 100)
-# for i in xrange(20):
-# surface.show_page()
- surface.finish()
- add_pdf_metadata(fileobj)
- print(fileobj.getvalue().decode('latin1'))
-
-# pdf = PDFFile(fileobj)
-# print(pdf.page_tree)
-# print(len(pdf.pages))
-
-
-if __name__ == '__main__':
- test()
@@ -12,8 +12,32 @@
from __future__ import division, unicode_literals
+import io
+
+import cairo
+
from .. import HTML
from ..document import PDFDocument
+from .. import pdf
+from .testing_utils import assert_no_logs
+
+
+@assert_no_logs
+def test_pdf_parser():
+ fileobj = io.BytesIO()
+ surface = cairo.PDFSurface(fileobj, 1, 1)
+ for width, height in [
+ (100, 100),
+ (200, 10),
+ (3.14, 987654321)
+ ]:
+ surface.set_size(width, height)
+ surface.show_page()
+ surface.finish()
+
+ sizes = [page.get_value('MediaBox', '\[(.+?)\]').strip()
+ for page in pdf.PDFFile(fileobj).pages]
+ assert sizes == [b'0 0 100 100', b'0 0 200 10', b'0 0 3.14 987654321']
def get_bookmarks(html):
@@ -25,6 +49,7 @@ def get_bookmarks(html):
return root, bookmarks
+@assert_no_logs
def test_bookmarks():
"""Test the structure of the document bookmarks.

0 comments on commit 1b3f7d4

Please sign in to comment.