In xhtml.separate_images be a bit less defensive, too complex cases…

… are still ignored but when the `<p>` contains only non textual elements like `<br>` or `blanks`, just ignore these elements. See #MOD-864
IMIO · Jun 15, 2021 · 80bfe5f · 80bfe5f
1 parent 22c787b
commit 80bfe5f
Show file tree

Hide file tree

Showing 3 changed files with 28 additions and 4 deletions.
diff --git a/CHANGES.rst b/CHANGES.rst
@@ -4,8 +4,10 @@ Changelog
 0.44 (unreleased)
 -----------------
 
-- Nothing changed yet.
-
+- In `xhtml.separate_images` be a bit less defensive, too complex cases are
+  still ignored but when the `<p>` contains only non textual elements like
+  `<br>` or `blanks`, just ignore these elements.
+  [gbastien]
 
 0.43 (2021-05-31)
 -----------------

diff --git a/src/imio/helpers/tests/test_xhtml.py b/src/imio/helpers/tests/test_xhtml.py
@@ -810,3 +810,21 @@ def test_separate_images(self):
                          '<p><img src="http://plone/nohost/image6.png"></p>'
                          '<table><tr><td><p><img src="http://plone/nohost/image7.png">'
                          '<img src="http://plone/nohost/image8.png"></p></td></tr></table>')
+        # <br> are ignored
+        text = '<p><img src="http://plone/nohost/image1.png"><br><br />' \
+            '<img src="http://plone/nohost/image2.png"></p>'
+        result = separate_images(text)
+        self.assertEqual(result, '<p><img src="http://plone/nohost/image1.png"><br><br></p>'
+                         '<p><img src="http://plone/nohost/image2.png"></p>')
+        # blanks are ignored
+        text = '<p><img src="http://plone/nohost/image1.png">&nbsp; &nbsp;' \
+            '<img src="http://plone/nohost/image2.png"></p>'
+        result = separate_images(text)
+        self.assertEqual(result, '<p><img src="http://plone/nohost/image1.png">\xc2\xa0 \xc2\xa0</p>'
+                         '<p><img src="http://plone/nohost/image2.png"></p>')
+        # blanks and <br> are ignored as well
+        text = '<p><img src="http://plone/nohost/image1.png">&nbsp; &nbsp;<br>' \
+            '<img src="http://plone/nohost/image2.png"></p>'
+        result = separate_images(text)
+        self.assertEqual(result, '<p><img src="http://plone/nohost/image1.png">\xc2\xa0 \xc2\xa0<br></p>'
+                         '<p><img src="http://plone/nohost/image2.png"></p>')
diff --git a/src/imio/helpers/xhtml.py b/src/imio/helpers/xhtml.py
@@ -553,8 +553,12 @@ def separate_images(xhtmlContent, pretty_print=False):
         # only manage <p>/<div> containing several <img>, nothing else
         imgs = elt.xpath('.//img')
         len_imgs = len(imgs)
-        contained_tags = elt.getchildren()
-        if len_imgs > 1 and len_imgs == len(contained_tags) and not elt.text_content():
+        # <p> may not contain anything else than <img> or <br>
+        contained_tags = [child for child in elt.getchildren()
+                          if child.tag not in ('br', )]
+        # contained text, if <p> contains <img> and text, we can not separate it
+        text = elt.text_content().strip()
+        if len_imgs > 1 and len_imgs == len(contained_tags) and not text:
             changed = True
             for img_index, img in enumerate(imgs[1:]):
                 new_elt = lxml.html.Element(elt.tag)