Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

Add metadata in the low-level API.

  • Loading branch information...
commit 6e63903a13a2f420f69db37e99f1df9ac4714c13 1 parent ce484dd
@SimonSapin SimonSapin authored
View
1  docs/conf.py
@@ -36,6 +36,7 @@ def __call__(self, *args, **kwargs):
# Add any Sphinx extension module names here, as strings. They can be extensions
# coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
extensions = ['sphinx.ext.autodoc', 'sphinx.ext.intersphinx']
+autodoc_member_order = 'bysource'
# Add any paths that contain templates here, relative to this directory.
templates_path = ['_templates']
View
8 docs/using.rst
@@ -71,7 +71,6 @@ High-level API
.. autoclass:: HTML(input, **kwargs)
:members:
- :member-order: bysource
.. autoclass:: CSS(input, **kwargs)
@@ -87,13 +86,12 @@ pages, each page separately, or even use any type of cairo surface for ouput
other than PDF or PNG.
.. automethod:: HTML.render
+
.. module:: weasyprint.document
-.. autoclass:: Document
+.. autoclass:: Page()
:members:
- :member-order: bysource
-.. autoclass:: Page
+.. autoclass:: Document()
:members:
- :member-order: bysource
.. currentmodule:: weasyprint
View
3  weasyprint/__init__.py
@@ -147,8 +147,7 @@ def write_pdf(self, target=None, stylesheets=None):
If :obj:`target` is :obj:`None`, a PDF byte string.
"""
- return self.render(
- stylesheets, resolution=72, enable_hinting=False).write_pdf(target)
+ return self.render(stylesheets, resolution=72).write_pdf(target)
def write_png(self, target=None, stylesheets=None, resolution=96):
"""Render the document to a single PNG image.
View
172 weasyprint/document.py
@@ -20,25 +20,94 @@
from . import CSS
from . import images
+from .logger import LOGGER
from .css import get_all_computed_styles
+from .formatting_structure import boxes
from .formatting_structure.build import build_formatting_structure
from .layout import layout_document
from .draw import draw_page, stacked
from .pdf import write_pdf_metadata
-from .compat import izip
+from .compat import izip, iteritems
from .urls import FILESYSTEM_ENCODING
+class _TaggedTuple(tuple):
+ """A tuple with a :attr:`sourceline` attribute,
+ The line number in the HTML source for whatever the tuple represents.
+
+ """
+
+
+def _get_metadata(box, bookmarks, links, anchors, matrix):
+ bookmark_label = box.bookmark_label
+ bookmark_level = box.bookmark_level
+ link = box.style.link
+ anchor_name = box.style.anchor
+ has_bookmark = bookmark_label and bookmark_level
+ # 'link' is inherited but redundant on text boxes
+ has_link = link and not isinstance(box, boxes.TextBox)
+ # In case of duplicate IDs, only the first is an anchor.
+ has_anchor = anchor_name and anchor_name not in anchors
+
+ if has_bookmark or has_link or has_anchor:
+ pos_x, pos_y, width, height = box.hit_area()
+ pos_x, pos_y = matrix.transform_point(pos_x, pos_y)
+ width, height = matrix.transform_distance(width, height)
+ if has_bookmark:
+ bookmarks.append((bookmark_level, bookmark_label, (pos_x, pos_y)))
+ if has_link:
+ link_type, target = link
+ link = _TaggedTuple(
+ (link_type, target, (pos_x, pos_y, width, height)))
+ link.sourceline = box.sourceline
+ links.append(link)
+ if has_anchor:
+ anchors[anchor_name] = pos_x, pos_y
+
+ for child in box.all_children():
+ _get_metadata(child, bookmarks, links, anchors, matrix)
+
+
class Page(object):
- """Represents a single rendered page."""
+ """Represents a single rendered page.
+
+ Should be obtained from :attr:`Document.pages` but not
+ instantiated directly.
+
+ """
def __init__(self, page, enable_hinting=False, resolution=96):
- self._page_box = page
- self._enable_hinting = enable_hinting
- self._dppx = resolution / 96
+ dppx = resolution / 96
+
#: The page width, including margins, in cairo user units.
- self.width = page.margin_width() * self._dppx
+ self.width = page.margin_width() * dppx
+
#: The page height, including margins, in cairo user units.
- self.height = page.margin_height() * self._dppx
+ self.height = page.margin_height() * dppx
+
+ #: A list of ``(bookmark_level, bookmark_label, point)``.
+ #: A point is ``(x, y)`` in cairo units from the top-left of the page.
+ self.bookmarks = []
+
+ #: A list of ``(link_type, target, rectangle)``.
+ #: A rectangle is ``(x, y, width, height)``, in cairo units
+ #: form the top-left of the page.
+ #: The link type one of two strings:
+ #:
+ #: * ``'external'``: :obj:`target` is an absolute URL
+ #: * ``'internal'``: :obj:`target` is an anchor name (see
+ #: :attr:`Page.anchors` and :meth:`Document.all_anchors`).
+ #: An anchor might be defined in another page, or not at all.
+ self.links = []
+
+ #: A dict mapping anchor names to points (``(x, y)`` in cairo units
+ #: form the top-left of the page.)
+ self.anchors = {}
+
+ _get_metadata(page, self.bookmarks, self.links, self.anchors,
+ cairo.Matrix(xx=dppx, yy=dppx))
+ self._page_box = page
+ self._enable_hinting = enable_hinting
+ self._dppx = dppx
def paint(self, cairo_context, left_x=0, top_y=0, clip=False):
"""Paint the surface in cairo, on any type of surface.
@@ -83,6 +152,13 @@ def paint(self, cairo_context, left_x=0, top_y=0, clip=False):
class Document(object):
+ """A rendered document, with access to individual pages
+ ready to be painted on any cairo surfaces.
+
+ Should be obtained from :meth:`HTML.render() <weasyprint.HTML.render>`
+ but not instantiated directly.
+
+ """
@classmethod
def render(cls, html, stylesheets, resolution, enable_hinting):
style_for = get_all_computed_styles(html, user_stylesheets=[
@@ -107,6 +183,86 @@ def copy(self, pages='all'):
pages = self.pages
return type(self)(pages)
+ def resolve_links(self):
+ """Resolve internal hyperlinks.
+
+ Links to a missing anchor are removed with a warning.
+ If multiple anchors have the same name, the first is used.
+
+ :returns:
+ A generator yielding lists (one per page) like :attr:`Page.links`,
+ except that :obj:`target` for internal hyperlinks is
+ ``(page_number, x, y)`` instead of an anchor name.
+ The page number is an index (0-based) in the :attr:`pages` list,
+ ``x, y`` are in cairo units from the top-left of the page.
+
+ """
+ anchors = {}
+ for i, page in enumerate(self.pages):
+ for anchor_name, (point_x, point_y) in iteritems(page.anchors):
+ anchors.setdefault(anchor_name, (i, point_x, point_y))
+ for page in self.pages:
+ page_links = []
+ for link in page.links:
+ link_type, anchor_name, rectangle = link
+ if link_type == 'internal':
+ target = anchors.get(anchor_name)
+ if target is None:
+ LOGGER.warn(
+ 'No anchor #%s for internal URI reference '
+ 'at line %s' % (anchor_name, link.sourceline))
+ else:
+ page_links.append((link_type, target, rectangle))
+ else:
+ # External link
+ page_links.append(link)
+ yield page_links
+
+ def make_bookmark_tree(self):
+ """Make a tree of all bookmarks in the document.
+
+ :return: a list of bookmark subtrees.
+ A subtree is ``(label, target, children)``. :obj:`label` is
+ a string, :obj:`target` is ``(page_number, x, y)`` like in
+ :meth:`resolve_links`, and :obj:`children` is itself a (recursive)
+ list of subtrees.
+
+ """
+ root = []
+ # At one point in the document, for each "output" depth, how much
+ # to add to get the source level (CSS values of bookmark-level).
+ # Eg. with <h1> then <h3>, level_shifts == [0, 1]
+ # 1 means that <h3> has depth 3 - 1 = 2 in the output.
+ skipped_levels = []
+ last_by_depth = [root]
+ previous_level = 0
+ for page_number, page in enumerate(self.pages):
+ for level, label, (point_x, point_y) in page.bookmarks:
+ if level > previous_level:
+ # Example: if the previous bookmark is a <h2>, the next
+ # depth "should" be for <h3>. If now we get a <h6> we’re
+ # skipping two levels: append 6 - 3 - 1 = 2
+ skipped_levels.append(level - previous_level - 1)
+ else:
+ temp = level
+ while temp < previous_level:
+ temp += 1 + skipped_levels.pop()
+ if temp > previous_level:
+ # We remove too many "skips", add some back:
+ skipped_levels.append(temp - previous_level - 1)
+
+ previous_level = level
+ depth = level - sum(skipped_levels)
+ assert depth == len(skipped_levels)
+ assert depth >= 1
+
+ children = []
+ subtree = label, (page_number, point_x, point_y), children
+ last_by_depth[depth - 1].append(subtree)
+ del last_by_depth[depth:]
+ last_by_depth.append(children)
+ return root
+
def write_pdf(self, target=None):
"""Paint pages; write PDF bytes to ``target``, or return them
if ``target`` is ``None``.
@@ -131,7 +287,7 @@ def write_pdf(self, target=None):
surface.show_page()
surface.finish()
- write_pdf_metadata(self.pages, file_obj)
+ write_pdf_metadata(self, file_obj)
if target is None:
return file_obj.getvalue()
View
198 weasyprint/pdf.py
@@ -295,165 +295,99 @@ def _start_writing(self):
return fileobj.tell(), fileobj.write
-def process_bookmarks(raw_bookmarks):
- """Transform a list of bookmarks as found in the document
- to a data structure ready for PDF.
+def flatten_bookmarks(bookmarks, depth=1):
+ for label, target, children in bookmarks:
+ yield label, target, depth
+ for result in flatten_bookmarks(children, depth + 1):
+ yield result
- """
- root = {'Count': 0}
- bookmark_list = []
- # At one point in the document, for each "output" level (ie. depth in the
- # PDF outline tree), how much to add to get the source level (CSS values
- # of bookmark-level).
- # Eg. with <h1> then <h3>, level_shifts == [0, 1]
- # 1 means that <h3> has depth 3 - 1 = 2 in the output.
- level_shifts = []
- last_by_level = [root]
- indices_by_level = [0]
-
- for i, (level, label, destination) in enumerate(raw_bookmarks, start=1):
- # Calculate the real level of the bookmark
- previous_level = len(last_by_level) - 1 + sum(level_shifts)
- if level > previous_level:
- level_shifts.append(level - previous_level - 1)
- else:
- temp_level = level
- while temp_level < previous_level:
- temp_level += 1 + level_shifts.pop()
- if temp_level > previous_level:
- # The last pop’d value was too big
- level_shifts.append(temp_level - previous_level - 1)
+def prepare_metadata(document, bookmark_root_id):
+ """Change metadata into data structures closer to the PDF objects.
- # Resolve level inconsistencies
- level -= sum(level_shifts)
+ In particulare, convert from cairo units (origin a the top-left corner)
+ to PDF units (origin at the bottom-left corner.)
+ """
+ # X and width unchanged; Y’ = page_height - Y; height’ = -height
+ page_heights = [page.height for page in document.pages]
+ links = []
+ for page_number, page_links in enumerate(document.resolve_links()):
+ new_page_links = []
+ for link_type, target, rectangle in page_links:
+ if link_type == 'internal':
+ target_page, target_x, target_y = target
+ target = (target_page, target_x,
+ page_heights[target_page] - target_y)
+ # x, y, w, h => x0, y0, x1, y1
+ rect_x, rect_y, width, height = rectangle
+ pdf_y1 = page_heights[page_number] - rect_y
+ rectangle = rect_x, pdf_y1, rect_x + width, pdf_y1 - height
+ new_page_links.append((link_type, target, rectangle))
+ links.append(new_page_links)
+
+ bookmark_root = {'Count': 0}
+ bookmark_list = []
+ last_id_by_depth = [bookmark_root_id]
+ last_by_depth = [bookmark_root]
+ for bookmark_id, (label, target, depth) in enumerate(
+ flatten_bookmarks(document.make_bookmark_tree()),
+ bookmark_root_id + 1):
+ target_page, target_x, target_y = target
+ target = target_page, target_x, page_heights[target_page] - target_y
bookmark = {
'Count': 0, 'First': None, 'Last': None, 'Prev': None,
- 'Next': None, 'Parent': indices_by_level[level - 1],
- 'label': label, 'destination': destination}
+ 'Next': None, 'Parent': last_id_by_depth[depth - 1],
+ 'label': label, 'target': target}
- if level > len(last_by_level) - 1:
- last_by_level[level - 1]['First'] = i
+ if depth > len(last_by_depth) - 1:
+ last_by_depth[depth - 1]['First'] = bookmark_id
else:
- # The bookmark is sibling of indices_by_level[level]
- bookmark['Prev'] = indices_by_level[level]
- last_by_level[level]['Next'] = i
+ # The bookmark is sibling of last_id_by_depth[depth]
+ bookmark['Prev'] = last_id_by_depth[depth]
+ last_by_depth[depth]['Next'] = bookmark_id
- # Remove the bookmarks with a level higher than the current one
- del last_by_level[level:]
- del indices_by_level[level:]
+ # Remove the bookmarks with a depth higher than the current one
+ del last_by_depth[depth:]
+ del last_id_by_depth[depth:]
- for count_level in range(level):
- last_by_level[count_level]['Count'] += 1
- last_by_level[level - 1]['Last'] = i
+ for i in range(depth):
+ last_by_depth[i]['Count'] += 1
+ last_by_depth[depth - 1]['Last'] = bookmark_id
- last_by_level.append(bookmark)
- indices_by_level.append(i)
+ last_by_depth.append(bookmark)
+ last_id_by_depth.append(bookmark_id)
bookmark_list.append(bookmark)
-
- return root, bookmark_list
-
-
-def gather_metadata(pages):
- """Traverse the layout tree (boxes) to find all metadata."""
- def walk(box):
- # "Border area. That's the area that hit-testing is done on."
- # http://lists.w3.org/Archives/Public/www-style/2012Jun/0318.html
- if box.bookmark_label and box.bookmark_level:
- pos_x, pos_y, _, _ = box.hit_area()
- pos_x, pos_y = point_to_pdf(pos_x, pos_y)
- bookmarks.append((
- box.bookmark_level,
- box.bookmark_label,
- (page_index, pos_x, pos_y)))
-
- # 'link' is inherited but redundant on text boxes
- if box.style.link and not isinstance(box, boxes.TextBox):
- pos_x, pos_y, width, height = box.hit_area()
- pos_x, pos_y = point_to_pdf(pos_x, pos_y)
- width, height = distance_to_pdf(width, height)
- page_links.append(
- (box, (pos_x, pos_y, pos_x + width, pos_y + height)))
-
- if box.style.anchor and box.style.anchor not in anchors:
- pos_x, pos_y, _, _ = box.hit_area()
- pos_x, pos_y = point_to_pdf(pos_x, pos_y)
- anchors[box.style.anchor] = (page_index, pos_x, pos_y)
-
- if isinstance(box, boxes.ParentBox):
- for child in box.children:
- walk(child)
-
- bookmarks = []
- links_by_page = []
- anchors = {}
- for page_index, page in enumerate(pages):
- # Internal WeasyPrint coordinates are pixels right and down from
- # the top-left corner.
- # PDF coordinates are points right and up from the bottom-left corner.
- # page.height is already in points.
- matrix = cairo.Matrix(
- PX_TO_PT, 0, 0, -PX_TO_PT, 0, page.height * PX_TO_PT / page._dppx)
- point_to_pdf = matrix.transform_point
- distance_to_pdf = matrix.transform_distance
- page_links = []
- walk(page._page_box)
- links_by_page.append(page_links)
-
- # A list (by page) of lists of either:
- # ('external', uri, rectangle) or
- # ('internal', (page_index, target_x, target_y), rectangle)
- resolved_links_by_page = []
- for page_links in links_by_page:
- resolved_page_links = []
- for box, rectangle in page_links:
- type_, href = box.style.link
- if type_ == 'internal':
- target = anchors.get(href)
- if target is None:
- LOGGER.warn(
- 'No anchor #%s for internal URI reference at line %s'
- % (href, box.sourceline))
- else:
- resolved_page_links.append((type_, target, rectangle))
- else:
- # external link:
- resolved_page_links.append((type_, href, rectangle))
- resolved_links_by_page.append(resolved_page_links)
-
- return process_bookmarks(bookmarks), resolved_links_by_page
+ return bookmark_root, bookmark_list, links
-def write_pdf_metadata(pages, fileobj):
- bookmarks, links = gather_metadata(pages)
-
+def write_pdf_metadata(document, fileobj):
+ """Append to a seekable file-like object to add PDF metadata."""
pdf = PDFFile(fileobj)
- pdf.overwrite_object(pdf.info.object_number, pdf_format(
- '<< /Producer {producer!P} >>',
- producer=VERSION_STRING))
+ bookmark_root_id = pdf.next_object_number()
+ bookmark_root, bookmarks, links = prepare_metadata(
+ document, bookmark_root_id)
- root, bookmarks = bookmarks
if bookmarks:
- bookmark_root = pdf.next_object_number()
pdf.write_new_object(pdf_format(
'<< /Type /Outlines /Count {0} /First {1} 0 R /Last {2} 0 R\n>>',
- root['Count'],
- root['First'] + bookmark_root,
- root['Last'] + bookmark_root))
+ bookmark_root['Count'],
+ bookmark_root['First'] + bookmark_root_id,
+ bookmark_root['Last'] + bookmark_root_id))
pdf.extend_dict(pdf.catalog, pdf_format(
- '/Outlines {0} 0 R /PageMode /UseOutlines', bookmark_root))
+ '/Outlines {0} 0 R /PageMode /UseOutlines', bookmark_root_id))
for bookmark in bookmarks:
content = [pdf_format('<< /Title {0!P}\n', bookmark['label'])]
content.append(pdf_format(
'/A << /Type /Action /S /GoTo /D [{0} /XYZ {1:f} {2:f} 0] >>',
- *bookmark['destination']))
+ *bookmark['target']))
if bookmark['Count']:
content.append(pdf_format('/Count {0}\n', bookmark['Count']))
for key in ['Parent', 'Prev', 'Next', 'First', 'Last']:
if bookmark[key]:
content.append(pdf_format(
- '/{0} {1} 0 R\n', key, bookmark[key] + bookmark_root))
+ '/{0} {1} 0 R\n', key,
+ bookmark[key] + bookmark_root_id))
content.append(b'>>')
pdf.write_new_object(b''.join(content))
@@ -481,4 +415,8 @@ def write_pdf_metadata(pages, fileobj):
'/Annots [{0}]', ' '.join(
'{0} 0 R'.format(n) for n in annotations)))
+ pdf.overwrite_object(pdf.info.object_number, pdf_format(
+ '<< /Producer {producer!P} >>',
+ producer=VERSION_STRING))
+
pdf.finish()
View
231 weasyprint/tests/test_api.py
@@ -520,6 +520,237 @@ def png_size(png_bytes):
assert png_size(document.copy([page_2]).write_png()) == (6, 4)
+@assert_no_logs
+def test_bookmarks():
+ def assert_bookmarks(html, expected_by_page, expected_tree):
+ document = TestHTML(string=html).render()
+ assert [p.bookmarks for p in document.pages] == expected_by_page
+ assert document.make_bookmark_tree() == expected_tree
+ assert_bookmarks('''
+ <style>* { height: 10px }</style>
+ <h1>a</h1>
+ <h4 style="page-break-after: always">b</h4>
+ <h3 style="position: relative; top: 2px; left: 3px">c</h3>
+ <h2>d</h2>
+ <h1>e</h1>
+ ''', [
+ [(1, 'a', (0, 0)), (4, 'b', (0, 10))],
+ [(3, 'c', (3, 2)), (2, 'd', (0, 10)), (1, 'e', (0, 20))],
+ ], [
+ ('a', (0, 0, 0), [
+ ('b', (0, 0, 10), []),
+ ('c', (1, 3, 2), []),
+ ('d', (1, 0, 10), [])]),
+ ('e', (1, 0, 20), []),
+ ])
+ assert_bookmarks('''
+ <style>
+ * { height: 90px; margin: 0 0 10px 0 }
+ </style>
+ <h1>Title 1</h1>
+ <h1>Title 2</h1>
+ <h2 style="position: relative; left: 20px">Title 3</h2>
+ <h2>Title 4</h2>
+ <h3>Title 5</h3>
+ <span style="display: block; page-break-before: always"></span>
+ <h2>Title 6</h2>
+ <h1>Title 7</h1>
+ <h2>Title 8</h2>
+ <h3>Title 9</h3>
+ <h1>Title 10</h1>
+ <h2>Title 11</h2>
+ ''', [
+ [
+ (1, 'Title 1', (0, 0)),
+ (1, 'Title 2', (0, 100)),
+ (2, 'Title 3', (20, 200)),
+ (2, 'Title 4', (0, 300)),
+ (3, 'Title 5', (0, 400))
+ ], [
+ (2, 'Title 6', (0, 100)),
+ (1, 'Title 7', (0, 200)),
+ (2, 'Title 8', (0, 300)),
+ (3, 'Title 9', (0, 400)),
+ (1, 'Title 10', (0, 500)),
+ (2, 'Title 11', (0, 600))
+ ],
+ ], [
+ ('Title 1', (0, 0, 0), []),
+ ('Title 2', (0, 0, 100), [
+ ('Title 3', (0, 20, 200), []),
+ ('Title 4', (0, 0, 300), [
+ ('Title 5', (0, 0, 400), [])]),
+ ('Title 6', (1, 0, 100), [])]),
+ ('Title 7', (1, 0, 200), [
+ ('Title 8', (1, 0, 300), [
+ ('Title 9', (1, 0, 400), [])])]),
+ ('Title 10', (1, 0, 500), [
+ ('Title 11', (1, 0, 600), [])]),
+ ])
+ assert_bookmarks('''
+ <style>* { height: 10px }</style>
+ <h2>A</h2> <p>depth 1</p>
+ <h4>B</h4> <p>depth 2</p>
+ <h2>C</h2> <p>depth 1</p>
+ <h3>D</h3> <p>depth 2</p>
+ <h4>E</h4> <p>depth 3</p>
+ ''', [[
+ (2, 'A', (0, 0)),
+ (4, 'B', (0, 20)),
+ (2, 'C', (0, 40)),
+ (3, 'D', (0, 60)),
+ (4, 'E', (0, 80)),
+ ]], [
+ ('A', (0, 0, 0), [
+ ('B', (0, 0, 20), [])]),
+ ('C', (0, 0, 40), [
+ ('D', (0, 0, 60), [
+ ('E', (0, 0, 80), [])])]),
+ ])
+ assert_bookmarks('''
+ <style>* { height: 10px; font-size: 0 }</style>
+ <h2>A</h2> <p>h2 depth 1</p>
+ <h4>B</h4> <p>h4 depth 2</p>
+ <h3>C</h3> <p>h3 depth 2</p>
+ <h5>D</h5> <p>h5 depth 3</p>
+ <h1>E</h1> <p>h1 depth 1</p>
+ <h2>F</h2> <p>h2 depth 2</p>
+ <h2>G</h2> <p>h2 depth 2</p>
+ <h4>H</h4> <p>h4 depth 3</p>
+ <h1>I</h1> <p>h1 depth 1</p>
+ ''', [[
+ (2, 'A', (0, 0)),
+ (4, 'B', (0, 20)),
+ (3, 'C', (0, 40)),
+ (5, 'D', (0, 60)),
+ (1, 'E', (0, 70)),
+ (2, 'F', (0, 90)),
+ (2, 'G', (0, 110)),
+ (4, 'H', (0, 130)),
+ (1, 'I', (0, 150)),
+ ]], [
+ ('A', (0, 0, 0), [
+ ('B', (0, 0, 20), []),
+ ('C', (0, 0, 40), [
+ ('D', (0, 0, 60), [])])]),
+ ('E', (0, 0, 70), [
+ ('F', (0, 0, 90), []),
+ ('G', (0, 0, 110), [
+ ('H', (0, 0, 130), [])])]),
+ ('I', (0, 0, 150), []),
+ ])
+
+
+
+@assert_no_logs
+def test_links():
+ def assert_links(html, expected_links_by_page, expected_anchors_by_page,
+ expected_resolved_links,
+ base_url=resource_filename('<inline HTML>'), warnings=()):
+ with capture_logs() as logs:
+ document = TestHTML(string=html, base_url=base_url).render()
+ resolved_links = list(document.resolve_links())
+ assert len(logs) == len(warnings)
+ for message, expected in zip(logs, warnings):
+ assert expected in message
+ assert [p.links for p in document.pages] == expected_links_by_page
+ assert [p.anchors for p in document.pages] == expected_anchors_by_page
+ assert resolved_links == expected_resolved_links
+
+ assert_links('''
+ <style>
+ body { font-size: 10px; line-height: 2; width: 200px }
+ p { height: 90px; margin: 0 0 10px 0 }
+ img { width: 30px; vertical-align: top }
+ </style>
+ <p><a href="http://weasyprint.org"><img src=pattern.png></a></p>
+ <p style="padding: 0 10px"><a
+ href="#lipsum"><img style="border: solid 1px"
+ src=pattern.png></a></p>
+ <p id=hello>Hello, World</p>
+ <p id=lipsum>
+ <a style="display: block; page-break-before: always; height: 30px"
+ href="#hel%6Co"></a>
+ </p>
+ ''', [
+ [
+ ('external', 'http://weasyprint.org', (0, 0, 30, 20)),
+ ('external', 'http://weasyprint.org', (0, 0, 30, 30)),
+ ('internal', 'lipsum', (10, 100, 32, 20)),
+ ('internal', 'lipsum', (10, 100, 32, 32))
+ ],
+ [('internal', 'hello', (0, 0, 200, 30))],
+ ], [
+ {'hello': (0, 200)},
+ {'lipsum': (0, 0)}
+ ], [
+ [
+ ('external', 'http://weasyprint.org', (0, 0, 30, 20)),
+ ('external', 'http://weasyprint.org', (0, 0, 30, 30)),
+ ('internal', (1, 0, 0), (10, 100, 32, 20)),
+ ('internal', (1, 0, 0), (10, 100, 32, 32))
+ ],
+ [('internal', (0, 0, 200), (0, 0, 200, 30))],
+ ])
+
+ assert_links('''
+ <body style="width: 200px">
+ <a href="../lipsum" style="display: block; margin: 10px 5px">
+ ''', [[
+ ('external', 'http://weasyprint.org/foo/lipsum', (5, 10, 190, 0)),
+ ]], [{}], [[
+ ('external', 'http://weasyprint.org/foo/lipsum', (5, 10, 190, 0)),
+ ]],
+ base_url='http://weasyprint.org/foo/bar/')
+
+ # Relative URI reference without a base URI: not allowed
+ assert_links('<a href="../lipsum">',
+ [[]], [{}], [[]], base_url=None, warnings=[
+ 'WARNING: Relative URI reference without a base URI'])
+ assert_links('<div style="-weasy-link: url(../lipsum)">',
+ [[]], [{}], [[]], base_url=None, warnings=[
+ "WARNING: Ignored `-weasy-link: url(../lipsum)` at 1:1, "
+ "Relative URI reference without a base URI: '../lipsum'."])
+
+ # Internal URI reference without a base URI: OK
+ assert_links('''
+ <body style="width: 200px">
+ <a href="#lipsum" id="lipsum" style="display: block; margin: 10px 5px">
+ ''', [[
+ ('internal', 'lipsum', (5, 10, 190, 0)),
+ ]], [
+ {'lipsum': (5, 10)}
+ ], [[
+ ('internal', (0, 5, 10), (5, 10, 190, 0)),
+ ]], base_url=None)
+
+ assert_links('''
+ <body style="width: 200px">
+ <div style="-weasy-link: url(#lipsum); margin: 10px 5px" id="lipsum">
+ ''', [[
+ ('internal', 'lipsum', (5, 10, 190, 0)),
+ ]], [
+ {'lipsum': (5, 10)}
+ ], [[
+ ('internal', (0, 5, 10), (5, 10, 190, 0)),
+ ]], base_url=None)
+
+ assert_links('''
+ <style> a { display: block; height: 15px } </style>
+ <body style="width: 200px">
+ <a href="#lipsum"></a>
+ <a href="#missing" id="lipsum"></a>
+ ''', [[
+ ('internal', 'lipsum', (0, 0, 200, 15)),
+ ('internal', 'missing', (0, 15, 200, 15)),
+ ]], [
+ {'lipsum': (0, 15)}
+ ], [[
+ ('internal', (0, 0, 15), (0, 0, 200, 15)),
+ ]], base_url=None, warnings=[
+ 'WARNING: No anchor #missing for internal URI reference'])
+
+
def wsgi_client(path_info, qs_args=None):
start_response_calls = []
def start_response(status, headers):
View
42 weasyprint/tests/test_pdf.py
@@ -3,7 +3,7 @@
weasyprint.tests.test_metadata
------------------------------
- Test metadata of the document (bookmarks, links and destinations).
+ Test metadata of the document (bookmarks and hyperlinks).
:copyright: Copyright 2011-2012 Simon Sapin and contributors, see AUTHORS.
:license: BSD, see LICENSE for details.
@@ -41,27 +41,29 @@ def test_pdf_parser():
def get_metadata(html, base_url=resource_filename('<inline HTML>')):
- return pdf.gather_metadata(TestHTML(string=html, base_url=base_url).render(
- resolution=72, stylesheets=[
- CSS(string='@page { size: 500pt 1000pt; margin: 50pt }')]).pages)
+ return pdf.prepare_metadata(
+ TestHTML(string=html, base_url=base_url).render(
+ resolution=72, stylesheets=[
+ CSS(string='@page { size: 500pt 1000pt; margin: 50pt }')]),
+ bookmark_root_id=0)
def get_bookmarks(html, structure_only=False):
- (root, bookmarks), _links = get_metadata(html)
+ root, bookmarks, _links = get_metadata(html)
for bookmark in bookmarks:
if structure_only:
- bookmark.pop('destination')
+ bookmark.pop('target')
bookmark.pop('label')
else:
# Eliminate errors of floating point arithmetic
# (eg. 499.99999999999994 instead of 500)
- p, x, y = bookmark['destination']
- bookmark['destination'] = p, round(x, 6), round(y, 6)
+ p, x, y = bookmark['target']
+ bookmark['target'] = p, round(x, 6), round(y, 6)
return root, bookmarks
def get_links(html, **kwargs):
- _bookmarks, links = get_metadata(html, **kwargs)
+ _root, _bookmarks, links = get_metadata(html, **kwargs)
return [
[
(
@@ -122,27 +124,27 @@ def test_bookmarks():
assert root == dict(Count=11, First=1, Last=10)
assert bookmarks == [
dict(Count=0, First=None, Last=None, Next=2, Parent=0, Prev=None,
- label='Title 1', destination=(0, 50, 950)),
+ label='Title 1', target=(0, 50, 950)),
dict(Count=4, First=3, Last=6, Next=7, Parent=0, Prev=1,
- label='Title 2', destination=(0, 50, 850)),
+ label='Title 2', target=(0, 50, 850)),
dict(Count=0, First=None, Last=None, Next=4, Parent=2, Prev=None,
- label='Title 3', destination=(0, 70, 750)),
+ label='Title 3', target=(0, 70, 750)),
dict(Count=1, First=5, Last=5, Next=6, Parent=2, Prev=3,
- label='Title 4', destination=(0, 50, 650)),
+ label='Title 4', target=(0, 50, 650)),
dict(Count=0, First=None, Last=None, Next=None, Parent=4, Prev=None,
- label='Title 5', destination=(0, 50, 550)),
+ label='Title 5', target=(0, 50, 550)),
dict(Count=0, First=None, Last=None, Next=None, Parent=2, Prev=4,
- label='Title 6', destination=(1, 50, 850)),
+ label='Title 6', target=(1, 50, 850)),
dict(Count=2, First=8, Last=8, Next=10, Parent=0, Prev=2,
- label='Title 7', destination=(1, 50, 750)),
+ label='Title 7', target=(1, 50, 750)),
dict(Count=1, First=9, Last=9, Next=None, Parent=7, Prev=None,
- label='Title 8', destination=(1, 50, 650)),
+ label='Title 8', target=(1, 50, 650)),
dict(Count=0, First=None, Last=None, Next=None, Parent=8, Prev=None,
- label='Title 9', destination=(1, 50, 550)),
+ label='Title 9', target=(1, 50, 550)),
dict(Count=1, First=11, Last=11, Next=None, Parent=0, Prev=7,
- label='Title 10', destination=(1, 50, 450)),
+ label='Title 10', target=(1, 50, 450)),
dict(Count=0, First=None, Last=None, Next=None, Parent=10, Prev=None,
- label='Title 11', destination=(1, 50, 350))]
+ label='Title 11', target=(1, 50, 350))]
root, bookmarks = get_bookmarks('''
<h2>1</h2> level 1
Please sign in to comment.
Something went wrong with that request. Please try again.