Permalink
Browse files

Add metadata in the low-level API.

  • Loading branch information...
1 parent ce484dd commit 6e63903a13a2f420f69db37e99f1df9ac4714c13 @SimonSapin SimonSapin committed Oct 4, 2012
Showing with 490 additions and 165 deletions.
  1. +1 −0 docs/conf.py
  2. +3 −5 docs/using.rst
  3. +1 −2 weasyprint/__init__.py
  4. +164 −8 weasyprint/document.py
  5. +68 −130 weasyprint/pdf.py
  6. +231 −0 weasyprint/tests/test_api.py
  7. +22 −20 weasyprint/tests/test_pdf.py
View
@@ -36,6 +36,7 @@ def __call__(self, *args, **kwargs):
# Add any Sphinx extension module names here, as strings. They can be extensions
# coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
extensions = ['sphinx.ext.autodoc', 'sphinx.ext.intersphinx']
+autodoc_member_order = 'bysource'
# Add any paths that contain templates here, relative to this directory.
templates_path = ['_templates']
View
@@ -71,7 +71,6 @@ High-level API
.. autoclass:: HTML(input, **kwargs)
:members:
- :member-order: bysource
.. autoclass:: CSS(input, **kwargs)
@@ -87,13 +86,12 @@ pages, each page separately, or even use any type of cairo surface for ouput
other than PDF or PNG.
.. automethod:: HTML.render
+
.. module:: weasyprint.document
-.. autoclass:: Document
+.. autoclass:: Page()
:members:
- :member-order: bysource
-.. autoclass:: Page
+.. autoclass:: Document()
:members:
- :member-order: bysource
.. currentmodule:: weasyprint
View
@@ -147,8 +147,7 @@ def write_pdf(self, target=None, stylesheets=None):
If :obj:`target` is :obj:`None`, a PDF byte string.
"""
- return self.render(
- stylesheets, resolution=72, enable_hinting=False).write_pdf(target)
+ return self.render(stylesheets, resolution=72).write_pdf(target)
def write_png(self, target=None, stylesheets=None, resolution=96):
"""Render the document to a single PNG image.
View
@@ -20,25 +20,94 @@
from . import CSS
from . import images
+from .logger import LOGGER
from .css import get_all_computed_styles
+from .formatting_structure import boxes
from .formatting_structure.build import build_formatting_structure
from .layout import layout_document
from .draw import draw_page, stacked
from .pdf import write_pdf_metadata
-from .compat import izip
+from .compat import izip, iteritems
from .urls import FILESYSTEM_ENCODING
+class _TaggedTuple(tuple):
+ """A tuple with a :attr:`sourceline` attribute,
+ The line number in the HTML source for whatever the tuple represents.
+
+ """
+
+
+def _get_metadata(box, bookmarks, links, anchors, matrix):
+ bookmark_label = box.bookmark_label
+ bookmark_level = box.bookmark_level
+ link = box.style.link
+ anchor_name = box.style.anchor
+ has_bookmark = bookmark_label and bookmark_level
+ # 'link' is inherited but redundant on text boxes
+ has_link = link and not isinstance(box, boxes.TextBox)
+ # In case of duplicate IDs, only the first is an anchor.
+ has_anchor = anchor_name and anchor_name not in anchors
+
+ if has_bookmark or has_link or has_anchor:
+ pos_x, pos_y, width, height = box.hit_area()
+ pos_x, pos_y = matrix.transform_point(pos_x, pos_y)
+ width, height = matrix.transform_distance(width, height)
+ if has_bookmark:
+ bookmarks.append((bookmark_level, bookmark_label, (pos_x, pos_y)))
+ if has_link:
+ link_type, target = link
+ link = _TaggedTuple(
+ (link_type, target, (pos_x, pos_y, width, height)))
+ link.sourceline = box.sourceline
+ links.append(link)
+ if has_anchor:
+ anchors[anchor_name] = pos_x, pos_y
+
+ for child in box.all_children():
+ _get_metadata(child, bookmarks, links, anchors, matrix)
+
+
class Page(object):
- """Represents a single rendered page."""
+ """Represents a single rendered page.
+
+ Should be obtained from :attr:`Document.pages` but not
+ instantiated directly.
+
+ """
def __init__(self, page, enable_hinting=False, resolution=96):
- self._page_box = page
- self._enable_hinting = enable_hinting
- self._dppx = resolution / 96
+ dppx = resolution / 96
+
#: The page width, including margins, in cairo user units.
- self.width = page.margin_width() * self._dppx
+ self.width = page.margin_width() * dppx
+
#: The page height, including margins, in cairo user units.
- self.height = page.margin_height() * self._dppx
+ self.height = page.margin_height() * dppx
+
+ #: A list of ``(bookmark_level, bookmark_label, point)``.
+ #: A point is ``(x, y)`` in cairo units from the top-left of the page.
+ self.bookmarks = []
+
+ #: A list of ``(link_type, target, rectangle)``.
+ #: A rectangle is ``(x, y, width, height)``, in cairo units
+ #: form the top-left of the page.
+ #: The link type one of two strings:
+ #:
+ #: * ``'external'``: :obj:`target` is an absolute URL
+ #: * ``'internal'``: :obj:`target` is an anchor name (see
+ #: :attr:`Page.anchors` and :meth:`Document.all_anchors`).
+ #: An anchor might be defined in another page, or not at all.
+ self.links = []
+
+ #: A dict mapping anchor names to points (``(x, y)`` in cairo units
+ #: form the top-left of the page.)
+ self.anchors = {}
+
+ _get_metadata(page, self.bookmarks, self.links, self.anchors,
+ cairo.Matrix(xx=dppx, yy=dppx))
+ self._page_box = page
+ self._enable_hinting = enable_hinting
+ self._dppx = dppx
def paint(self, cairo_context, left_x=0, top_y=0, clip=False):
"""Paint the surface in cairo, on any type of surface.
@@ -83,6 +152,13 @@ def paint(self, cairo_context, left_x=0, top_y=0, clip=False):
class Document(object):
+ """A rendered document, with access to individual pages
+ ready to be painted on any cairo surfaces.
+
+ Should be obtained from :meth:`HTML.render() <weasyprint.HTML.render>`
+ but not instantiated directly.
+
+ """
@classmethod
def render(cls, html, stylesheets, resolution, enable_hinting):
style_for = get_all_computed_styles(html, user_stylesheets=[
@@ -107,6 +183,86 @@ def copy(self, pages='all'):
pages = self.pages
return type(self)(pages)
+ def resolve_links(self):
+ """Resolve internal hyperlinks.
+
+ Links to a missing anchor are removed with a warning.
+ If multiple anchors have the same name, the first is used.
+
+ :returns:
+ A generator yielding lists (one per page) like :attr:`Page.links`,
+ except that :obj:`target` for internal hyperlinks is
+ ``(page_number, x, y)`` instead of an anchor name.
+ The page number is an index (0-based) in the :attr:`pages` list,
+ ``x, y`` are in cairo units from the top-left of the page.
+
+ """
+ anchors = {}
+ for i, page in enumerate(self.pages):
+ for anchor_name, (point_x, point_y) in iteritems(page.anchors):
+ anchors.setdefault(anchor_name, (i, point_x, point_y))
+ for page in self.pages:
+ page_links = []
+ for link in page.links:
+ link_type, anchor_name, rectangle = link
+ if link_type == 'internal':
+ target = anchors.get(anchor_name)
+ if target is None:
+ LOGGER.warn(
+ 'No anchor #%s for internal URI reference '
+ 'at line %s' % (anchor_name, link.sourceline))
+ else:
+ page_links.append((link_type, target, rectangle))
+ else:
+ # External link
+ page_links.append(link)
+ yield page_links
+
+ def make_bookmark_tree(self):
+ """Make a tree of all bookmarks in the document.
+
+ :return: a list of bookmark subtrees.
+ A subtree is ``(label, target, children)``. :obj:`label` is
+ a string, :obj:`target` is ``(page_number, x, y)`` like in
+ :meth:`resolve_links`, and :obj:`children` is itself a (recursive)
+ list of subtrees.
+
+ """
+ root = []
+ # At one point in the document, for each "output" depth, how much
+ # to add to get the source level (CSS values of bookmark-level).
+ # Eg. with <h1> then <h3>, level_shifts == [0, 1]
+ # 1 means that <h3> has depth 3 - 1 = 2 in the output.
+ skipped_levels = []
+ last_by_depth = [root]
+ previous_level = 0
+ for page_number, page in enumerate(self.pages):
+ for level, label, (point_x, point_y) in page.bookmarks:
+ if level > previous_level:
+ # Example: if the previous bookmark is a <h2>, the next
+ # depth "should" be for <h3>. If now we get a <h6> we’re
+ # skipping two levels: append 6 - 3 - 1 = 2
+ skipped_levels.append(level - previous_level - 1)
+ else:
+ temp = level
+ while temp < previous_level:
+ temp += 1 + skipped_levels.pop()
+ if temp > previous_level:
+ # We remove too many "skips", add some back:
+ skipped_levels.append(temp - previous_level - 1)
+
+ previous_level = level
+ depth = level - sum(skipped_levels)
+ assert depth == len(skipped_levels)
+ assert depth >= 1
+
+ children = []
+ subtree = label, (page_number, point_x, point_y), children
+ last_by_depth[depth - 1].append(subtree)
+ del last_by_depth[depth:]
+ last_by_depth.append(children)
+ return root
+
def write_pdf(self, target=None):
"""Paint pages; write PDF bytes to ``target``, or return them
if ``target`` is ``None``.
@@ -131,7 +287,7 @@ def write_pdf(self, target=None):
surface.show_page()
surface.finish()
- write_pdf_metadata(self.pages, file_obj)
+ write_pdf_metadata(self, file_obj)
if target is None:
return file_obj.getvalue()
Oops, something went wrong.

0 comments on commit 6e63903

Please sign in to comment.