Better handling of text overflow in FPDF.write() & FPDF.write_html() - …

…fix py-pdf#847 (py-pdf#850)
Tolker-KU · Jul 24, 2023 · 750af06 · 750af06
1 parent 199d419
commit 750af06
Show file tree

Hide file tree

Showing 14 changed files with 162 additions and 80 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -27,6 +27,7 @@ This can also be enabled programmatically with `warnings.simplefilter('default',
 ### Fixed
 - [`FPDF.table()`](https://pyfpdf.github.io/fpdf2/fpdf/fpdf.html#fpdf.fpdf.FPDF.table): the `colspan` setting has been fixed - [documentation](https://pyfpdf.github.io/fpdf2/Tables.html#column-span)
 - [`FPDF.image()`](https://pyfpdf.github.io/fpdf2/fpdf/fpdf.html#fpdf.fpdf.FPDF.image): allowing images path starting with `data` to be passed as input
+- text overflow is better handled by `FPDF.write()` & `FPDF.write_html()` - _cf._ [issue #847](https://github.com/PyFPDF/fpdf2/issues/847)
 - the initial text color is preserved when using `FPDF.write_html()` - _cf._ [issue #846](https://github.com/PyFPDF/fpdf2/issues/846)
 ### Deprecated
 - the `center` optional parameter of [`FPDF.cell()`](https://pyfpdf.github.io/fpdf2/fpdf/fpdf.html#fpdf.fpdf.FPDF.cell) is **no more** deprecated, as it allows for horizontal positioning, which is different from text alignment control with `align="C"`

diff --git a/fpdf/fonts.py b/fpdf/fonts.py
@@ -61,6 +61,9 @@ def __init__(self, fpdf, fontkey, style):
         self.fontkey = fontkey
         self.emphasis = TextEmphasis.coerce(style)
 
+    def __repr__(self):
+        return f"CoreFont(i={self.i}, fontkey={self.fontkey})"
+
 
 class TTFFont:
     __slots__ = (

diff --git a/fpdf/fpdf.py b/fpdf/fpdf.py
@@ -3605,7 +3605,7 @@ def write(
         # first line from current x position to right margin
         first_width = self.w - self.x - self.r_margin
         txt_line = multi_line_break.get_line_of_given_width(
-            first_width - 2 * self.c_margin, wordsplit=False
+            first_width - 2 * self.c_margin,
         )
         # remaining lines fill between margins
         full_width = self.w - self.l_margin - self.r_margin

diff --git a/fpdf/line_break.py b/fpdf/line_break.py
@@ -38,12 +38,10 @@ def __init__(
         self.link = link
 
     def __repr__(self):
-        gstate = self.graphics_state.copy()
-        if "current_font" in gstate:
-            del gstate["current_font"]  # TMI
         return (
             f"Fragment(characters={self.characters},"
-            f" graphics_state={gstate}, k={self.k}, link={self.link})"
+            f" graphics_state={self.graphics_state},"
+            f" k={self.k}, link={self.link})"
         )
 
     @property
@@ -394,18 +392,14 @@ def __init__(
         self.idx_last_forced_break = None
 
     # pylint: disable=too-many-return-statements
-    def get_line_of_given_width(self, maximum_width: float, wordsplit: bool = True):
+    def get_line_of_given_width(self, maximum_width: float):
         first_char = True  # "Tw" ignores the first character in a text object.
         idx_last_forced_break = self.idx_last_forced_break
         self.idx_last_forced_break = None
 
         if self.fragment_index == len(self.styled_text_fragments):
             return None
 
-        last_fragment_index = self.fragment_index
-        last_character_index = self.character_index
-        line_full = False
-
         current_line = CurrentLine(print_sh=self.print_sh)
         while self.fragment_index < len(self.styled_text_fragments):
             current_fragment = self.styled_text_fragments[self.fragment_index]
@@ -442,9 +436,6 @@ def get_line_of_given_width(self, maximum_width: float, wordsplit: bool = True):
                     ) = current_line.automatic_break(self.justify)
                     self.character_index += 1
                     return line
-                if not wordsplit:
-                    line_full = True
-                    break
                 if idx_last_forced_break == self.character_index:
                     raise FPDFException(
                         "Not enough horizontal space to render a single character"
@@ -464,12 +455,6 @@ def get_line_of_given_width(self, maximum_width: float, wordsplit: bool = True):
 
             self.character_index += 1
 
-        if line_full and not wordsplit:
-            # roll back and return empty line to trigger continuation
-            # on the next line.
-            self.fragment_index = last_fragment_index
-            self.character_index = last_character_index
-            return CurrentLine().manual_break(self.justify)
         if current_line.width:
             return current_line.manual_break()
         return None
diff --git a/fpdf/svg.py b/fpdf/svg.py
@@ -1,3 +1,4 @@
+from collections import defaultdict
 import math, re, warnings
 
 from fontTools.svgLib.path import parse_path
@@ -16,6 +17,7 @@
 from .drawing import (
     color_from_hex_string,
     color_from_rgb_string,
+    ClippingPath,
     GraphicsContext,
     GraphicsStyle,
     PaintedPath,
@@ -323,15 +325,16 @@ class ShapeBuilder:
     """A namespace within which methods for converting basic shapes can be looked up."""
 
     @staticmethod
-    def new_path(tag):
+    def new_path(tag, path):
         """Create a new path with the appropriate styles."""
-        path = PaintedPath()
+        if path is None:
+            path = PaintedPath()
         apply_styles(path, tag)
 
         return path
 
     @classmethod
-    def rect(cls, tag):
+    def rect(cls, tag, path=None):
         """Convert an SVG <rect> into a PDF path."""
         # svg rect is wound clockwise
         if "x" in tag.attrib:
@@ -374,14 +377,16 @@ def rect(cls, tag):
             raise ValueError(f"bad rect {tag}")
 
         if (width == 0) or (height == 0):
-            return PaintedPath()
+            if path is None:
+                return PaintedPath()
+            return path
 
         if rx > (width / 2):
             rx = width / 2
         if ry > (height / 2):
             ry = height / 2
 
-        path = cls.new_path(tag)
+        path = cls.new_path(tag, path)
 
         path.rectangle(x, y, width, height, rx, ry)
         return path
@@ -649,6 +654,8 @@ def from_file(cls, filename, *args, encoding="utf-8", **kwargs):
 
     def __init__(self, svg_text):
         self.cross_references = {}
+        self.clip_paths = {}
+        self.clipped_elements = defaultdict(list)
 
         # disabling bandit rule as we use defusedxml:
         svg_tree = parse_xml_str(svg_text)  # nosec B314
@@ -659,6 +666,12 @@ def __init__(self, svg_text):
         self.extract_shape_info(svg_tree)
         self.convert_graphics(svg_tree)
 
+        breakpoint()
+        for url, elements in self.clipped_elements.items():
+            for elem in elements:
+                assert type(elem) is GraphicsContext
+                elem.clipping_path = self.clip_paths[url]
+
     @force_nodocument
     def extract_shape_info(self, root_tag):
         """Collect shape info from the given SVG."""
@@ -814,7 +827,7 @@ def transform_to_rect_viewport(
                 )
 
         self.base_group.transform = transform
-
+        breakpoint()
         return vp_width / scale, vp_height / scale, self.base_group
 
     def draw_to_page(self, pdf, x=None, y=None, debug_stream=None):
@@ -853,8 +866,25 @@ def handle_defs(self, defs):
                 self.build_group(child)
             if child.tag in xmlns_lookup("svg", "path"):
                 self.build_path(child)
+            if child.tag in xmlns_lookup("svg", "clipPath"):
+                self.handle_clip_path(child)
             # We could/should also support <defs> that are rect, circle, ellipse, line, polyline, polygon...
 
+    @force_nodocument
+    def handle_clip_path(self, clip_path):
+        """Parse elements in <clipPath> and store in lookup table"""
+        if len(clip_path) != 1:
+            raise ValueError()
+
+        child = clip_path[0]
+
+        if child.tag in xmlns_lookup("svg", "path"):
+            item = self.build_path(child, pdf_path=ClippingPath())
+        elif child.tag in shape_tags:
+            item = getattr(ShapeBuilder, shape_tags[child.tag])(child, ClippingPath())
+
+        self.clip_paths["url(#" + clip_path.attrib["id"] + ")"] = item
+
     # this assumes xrefs only reference already-defined ids.
     # I don't know if this is required by the SVG spec.
     @force_nodocument
@@ -898,14 +928,23 @@ def build_group(self, group, pdf_group=None):
         for child in group:
             if child.tag in xmlns_lookup("svg", "defs"):
                 self.handle_defs(child)
+                continue
             if child.tag in xmlns_lookup("svg", "g"):
-                pdf_group.add_item(self.build_group(child))
-            if child.tag in xmlns_lookup("svg", "path"):
-                pdf_group.add_item(self.build_path(child))
+                item = self.build_group(child)
+            elif child.tag in xmlns_lookup("svg", "path"):
+                item = self.build_path(child)
             elif child.tag in shape_tags:
-                pdf_group.add_item(getattr(ShapeBuilder, shape_tags[child.tag])(child))
-            if child.tag in xmlns_lookup("svg", "use"):
-                pdf_group.add_item(self.build_xref(child))
+                item = getattr(ShapeBuilder, shape_tags[child.tag])(child)
+            elif child.tag in xmlns_lookup("svg", "use"):
+                item = self.build_xref(child)
+
+            if "clip-path" in child.attrib:
+                sub_context = GraphicsContext()
+                sub_context.add_item(item)
+                self.clipped_elements[child.attrib["clip-path"]].append(sub_context)
+                item = sub_context
+
+            pdf_group.add_item(item, _copy=False)
 
         try:
             self.cross_references["#" + group.attrib["id"]] = pdf_group
@@ -915,9 +954,11 @@ def build_group(self, group, pdf_group=None):
         return pdf_group
 
     @force_nodocument
-    def build_path(self, path):
+    def build_path(self, path, pdf_path=None):
         """Convert an SVG <path> tag into a PDF path object."""
-        pdf_path = PaintedPath()
+        if pdf_path is None:
+            pdf_path = PaintedPath()
+
         apply_styles(pdf_path, path)
 
         svg_path = path.attrib.get("d", None)

diff --git a/test/svg/generated_pdf/clippath_rect.pdf b/test/svg/generated_pdf/clippath_rect.pdf
diff --git a/test/svg/svg_sources/clippath_rect.svg b/test/svg/svg_sources/clippath_rect.svg
diff --git a/test/svg/test_svg.py b/test/svg/test_svg.py
@@ -279,3 +279,19 @@ def test_svg_conversion_priority_styles(self, tmp_path):
         svg.draw_to_page(pdf)
 
         assert_pdf_equal(pdf, GENERATED_PDF_DIR / f"{svg_file.stem}.pdf", tmp_path)
+
+    def test_svg_clippath_rect(self, tmp_path):
+        svg_file = parameters.svgfile("clippath_rect.svg")
+
+        svg = fpdf.svg.SVGObject.from_file(svg_file)
+
+        pdf = fpdf.FPDF(unit="pt", format=(svg.width, svg.height))
+        pdf.set_margin(0)
+        pdf.allow_images_transparency = False
+        pdf.add_page()
+
+        svg.draw_to_page(pdf)
+
+        assert_pdf_equal(
+            pdf, GENERATED_PDF_DIR / f"{svg_file.stem}.pdf", tmp_path, generate=True
+        )
diff --git a/test/text/test_line_break.py b/test/text/test_line_break.py
@@ -1129,3 +1129,13 @@ def test_trim_trailing_spaces():
     cl.fragments = [frag]
     res = cl.trim_trailing_spaces()
     assert res is None
+
+
+def test_line_break_no_initial_newline():  # issue-847
+    text = "X" * 50
+    alphabet = {"normal": {}}
+    alphabet["normal"]["X"] = 4.7
+    fragments = [FxFragment(alphabet, text, _gs_normal, 1)]
+    multi_line_break = MultiLineBreak(fragments)
+    text_line = multi_line_break.get_line_of_given_width(188)
+    assert text_line.fragments
diff --git a/test/text/test_unbreakable.py b/test/text/test_unbreakable.py
@@ -167,12 +167,12 @@ def test_multi_cell_table_unbreakable_with_split_only(tmp_path):  # issue 359
 
     pdf.ln()
 
-    with pdf.unbreakable() as doc:
-        for _ in range(4):
-            for row in data:
-                max_no_of_lines_in_cell = 1
-                for cell in row:
-                    with pytest.warns(DeprecationWarning, match=expected_warn):
+    with pytest.warns(DeprecationWarning, match=expected_warn):
+        with pdf.unbreakable() as doc:
+            for _ in range(4):
+                for row in data:
+                    max_no_of_lines_in_cell = 1
+                    for cell in row:
                         result = doc.multi_cell(
                             cell_width,
                             l_height,
@@ -184,39 +184,39 @@ def test_multi_cell_table_unbreakable_with_split_only(tmp_path):  # issue 359
                             max_line_height=l_height,
                             split_only=True,
                         )
-                    no_of_lines_in_cell = len(result)
-                    if no_of_lines_in_cell > max_no_of_lines_in_cell:
-                        max_no_of_lines_in_cell = no_of_lines_in_cell
-                no_of_lines_list.append(max_no_of_lines_in_cell)
-
-            for j, row in enumerate(data):
-                cell_height = no_of_lines_list[j] * l_height
-                for cell in row:
-                    if j == 0:
-                        doc.multi_cell(
-                            cell_width,
-                            cell_height,
-                            "**" + cell + "**",
-                            border=1,
-                            fill=False,
-                            align="L",
-                            new_x="RIGHT",
-                            new_y="TOP",
-                            max_line_height=l_height,
-                            markdown=False,
-                        )
-                    else:
-                        doc.multi_cell(
-                            cell_width,
-                            cell_height,
-                            cell,
-                            border=1,
-                            align="L",
-                            new_x="RIGHT",
-                            new_y="TOP",
-                            max_line_height=l_height,
-                        )
-                doc.ln(cell_height)
+                        no_of_lines_in_cell = len(result)
+                        if no_of_lines_in_cell > max_no_of_lines_in_cell:
+                            max_no_of_lines_in_cell = no_of_lines_in_cell
+                    no_of_lines_list.append(max_no_of_lines_in_cell)
+
+                for j, row in enumerate(data):
+                    cell_height = no_of_lines_list[j] * l_height
+                    for cell in row:
+                        if j == 0:
+                            doc.multi_cell(
+                                cell_width,
+                                cell_height,
+                                "**" + cell + "**",
+                                border=1,
+                                fill=False,
+                                align="L",
+                                new_x="RIGHT",
+                                new_y="TOP",
+                                max_line_height=l_height,
+                                markdown=False,
+                            )
+                        else:
+                            doc.multi_cell(
+                                cell_width,
+                                cell_height,
+                                cell,
+                                border=1,
+                                align="L",
+                                new_x="RIGHT",
+                                new_y="TOP",
+                                max_line_height=l_height,
+                            )
+                    doc.ln(cell_height)
 
     assert_pdf_equal(
         pdf, HERE / "multi_cell_table_unbreakable_with_split_only.pdf", tmp_path

diff --git a/test/text/test_varied_fragments.py b/test/text/test_varied_fragments.py
@@ -29,7 +29,7 @@ def write_fragments(self, frags, align=Align.L):
         # first line from current x position to right margin
         first_width = self.w - self.x - self.r_margin
         text_line = multi_line_break.get_line_of_given_width(
-            first_width - 2 * self.c_margin, wordsplit=False
+            first_width - 2 * self.c_margin
         )
         # remaining lines fill between margins
         full_width = self.w - self.l_margin - self.r_margin