From 40799173c12fa243a60a2b6a977ea1fe503e982f Mon Sep 17 00:00:00 2001 From: Gauthier Bastien Date: Thu, 27 May 2021 09:59:19 +0200 Subject: [PATCH 1/3] Added `xhtml.imagesToData` that turns the src of images used in a xhtml content from an `http` or equivalent URL to a data base64 value. See #MOD-858 --- CHANGES.rst | 3 + src/imio/helpers/cache.py | 2 +- src/imio/helpers/emailer.py | 5 +- src/imio/helpers/patches.py | 1 + src/imio/helpers/security.py | 2 +- src/imio/helpers/tests/test_content.py | 2 +- src/imio/helpers/tests/test_xhtml.py | 37 ++++++++ src/imio/helpers/xhtml.py | 113 ++++++++++++++++++------- 8 files changed, 128 insertions(+), 37 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index d5b76df..563b162 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -6,6 +6,9 @@ Changelog - Lowercased email address after validation. [sgeulette] +- Added `xhtml.imagesToData` that turns the src of images used in a xhtml + content from an `http` or equivalent URL to a data base64 value. + [gbastien] 0.42 (2021-04-30) ----------------- diff --git a/src/imio/helpers/cache.py b/src/imio/helpers/cache.py index 8bb130f..a42b4a5 100644 --- a/src/imio/helpers/cache.py +++ b/src/imio/helpers/cache.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- from datetime import datetime +from persistent.mapping import PersistentMapping from plone import api from plone.i18n.normalizer import IIDNormalizer from plone.memoize import ram @@ -9,7 +10,6 @@ from zope.component import getUtility from zope.component import queryUtility from zope.schema.interfaces import IVocabularyFactory -from persistent.mapping import PersistentMapping import logging diff --git a/src/imio/helpers/emailer.py b/src/imio/helpers/emailer.py index 1bdb4e8..3e0451b 100644 --- a/src/imio/helpers/emailer.py +++ b/src/imio/helpers/emailer.py @@ -1,10 +1,10 @@ # -*- coding: utf-8 -*- +from email import encoders from email.mime.base import MIMEBase from email.mime.multipart import MIMEMultipart from email.mime.text import MIMEText from email.utils import parseaddr -from email import encoders from imio.helpers import _ from imio.helpers.content import safe_encode from plone import api @@ -12,13 +12,14 @@ from Products.CMFDefault.utils import checkEmailAddress from Products.CMFPlone.utils import safe_unicode from smtplib import SMTPException -from zope.component import getMultiAdapter from zope import schema +from zope.component import getMultiAdapter import csv import logging import socket + logger = logging.getLogger("imio.helpers") EMAIL_CHARSET = 'utf-8' diff --git a/src/imio/helpers/patches.py b/src/imio/helpers/patches.py index 1e4af2c..2576af5 100644 --- a/src/imio/helpers/patches.py +++ b/src/imio/helpers/patches.py @@ -2,6 +2,7 @@ import pkg_resources + try: pkg_resources.get_distribution("collective.solr") except pkg_resources.DistributionNotFound: diff --git a/src/imio/helpers/security.py b/src/imio/helpers/security.py index abec7e0..f1bfe24 100644 --- a/src/imio/helpers/security.py +++ b/src/imio/helpers/security.py @@ -10,12 +10,12 @@ from random import sample from random import seed from time import time +from zope.component import getMultiAdapter import logging import os import string -from zope.component import getMultiAdapter logger = logging.getLogger("imio.helpers") diff --git a/src/imio/helpers/tests/test_content.py b/src/imio/helpers/tests/test_content.py index ecf75d4..17955d5 100644 --- a/src/imio/helpers/tests/test_content.py +++ b/src/imio/helpers/tests/test_content.py @@ -14,8 +14,8 @@ from imio.helpers.content import get_state_infos from imio.helpers.content import get_vocab from imio.helpers.content import normalize_name -from imio.helpers.content import object_values from imio.helpers.content import object_ids +from imio.helpers.content import object_values from imio.helpers.content import restore_link_integrity_checks from imio.helpers.content import richtextval from imio.helpers.content import safe_delattr diff --git a/src/imio/helpers/tests/test_xhtml.py b/src/imio/helpers/tests/test_xhtml.py index fef5b42..c3203db 100644 --- a/src/imio/helpers/tests/test_xhtml.py +++ b/src/imio/helpers/tests/test_xhtml.py @@ -2,6 +2,7 @@ from imio.helpers.testing import IntegrationTestCase from imio.helpers.xhtml import addClassToContent from imio.helpers.xhtml import addClassToLastChildren +from imio.helpers.xhtml import imagesToData from imio.helpers.xhtml import imagesToPath from imio.helpers.xhtml import markEmptyTags from imio.helpers.xhtml import object_link @@ -16,6 +17,7 @@ import urllib + picsum_image1_url = 'https://i.picsum.photos/id/10/200/300.jpg?hmac=94QiqvBcKJMHpneU69KYg2pky8aZ6iBzKrAuhSUBB9s' picsum_image2_url = 'https://i.picsum.photos/id/1082/200/200.jpg?hmac=3usO1ziO7kCseIG52ruhRigxyk39W_L9eECWe1Hs6fY' @@ -32,6 +34,20 @@ "OXORpldeTR7KdNjU1QVosfhpsfhpt/cwHP6JojjAmhovhRofL5/KTY7NdGBPhToBdjz+L9gTfb28/QDsHx4Ge5rVu2+zHWNDQ3InVk+YbvWe" \ "+HEAkp6v/6cnemATSvoCmtFvb0Kvy47BR72AfwDXsx4tZedcTQAAAABJRU5ErkJggg==" +base64_gif_img_data = "" \ + "Pg5O6gqe2gqOq4v+igqt9tetxSYNs7Tdo5TNc5TNUbMdUbMNQRKNIKIdIJINIGHdEGHtEDGtACFdAAFtAAFNAAEs8AFAAAAAAAA" \ + "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" \ + "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" \ + "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" \ + "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" \ + "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" \ + "AAAAAAAAAAAAAAAAAAAAAAAAAAAAACIAIP3sAAoACBL2OAAAAPjHWRQAABUKiAAAABL2FAAAAhL+dPkBhPm4MP///xL2YPZNegA" \ + "AAAAQAAAABgAAAAAAABL2wAAAAAAARRL25PEPfxQAAPEQAAAAAAAAA/3r+AAAAAAAGAAAABL2uAAAQgAAABL2pAAAAAAAAAAAAAA" \ + "ADAAAAgABAfDTAAAAmAAAAAAAAAAAABL27Mku0AAQAAAAAAAAAEc1MUaDsAAAGBL3FPELyQAAAAABEgAAAwAAAQAAAxL22AAAmB" \ + "L+dPO4dPPKQP///0c70EUoOQAAmMku0AAQABL3TAAAAEaDsEc1MUaDsAAABkT98AABEhL3wAAALAAAQAAAQBL3kQAACSH5BAEAA" \ + "AAALAAAAAAKAAoAQAhGAAEIHEhw4IIIFCAsKCBwQAQQFyKCeKDAIEKFDAE4hBiRA0WCAwwUBLCAA4cMICg0YIjAQoaIFzJMSCCw" \ + "5EkOKjM2FDkwIAA7" + class TestXHTMLModule(IntegrationTestCase): """ @@ -456,6 +472,27 @@ def test_imagesToPath(self): text = 'Image' self.assertEqual(imagesToPath(doc2, text), text) + def test_imagesToData(self): + """ + Test that images src contained in a XHTML content are correctly changed to + the a data base64 value. + Method is based on same as imagesToPath so we do not redo + tests that are already done in test_imagesToPath. + """ + # create a document and an image + docId = self.portal.invokeFactory('Document', id='doc', title='Document') + doc = getattr(self.portal, docId) + file_path = path.join(path.dirname(__file__), 'dot.gif') + data = open(file_path, 'r') + img = self.portal.invokeFactory('Image', id='img', title='Image', file=data.read()) + img = getattr(self.portal, img) + # has a blob + self.assertEqual(img.get_size(), 873) + text = '

Image end of text.

'.format(self.portal_url) + self.assertEqual( + imagesToData(doc, text), + '

Image end of text.

'.format(base64_gif_img_data)) + def test_storeExternalImagesLocally(self): """ Test that images src contained in a XHTML that reference external images is diff --git a/src/imio/helpers/xhtml.py b/src/imio/helpers/xhtml.py index cf32baa..5d23905 100644 --- a/src/imio/helpers/xhtml.py +++ b/src/imio/helpers/xhtml.py @@ -9,6 +9,7 @@ from zExceptions import NotFound from zope.container.interfaces import INameChooser +import base64 import cgi import logging import lxml.html @@ -254,7 +255,48 @@ def removeCssClasses(xhtmlContent, for x in tree.iterchildren()]) -def imagesToPath(context, xhtmlContent, pretty_print=False): +def _img_from_src(context, img, portal, portal_url): + """ """ + # check if it is a local or an external image + img_src = img.attrib.get('src', None) + # wrong without src or external image + if not img_src or (img_src.startswith('http') and not img_src.startswith(portal_url)): + return + # here, we have an image contained in the portal + # either absolute path (http://...) or relative (../images/myimage.png) + imageObj = None + # absolute path + if img_src.startswith(portal_url): + img_src = img_src.replace(portal_url, '') + try: + # get the image but remove leading '/' + imageObj = portal.unrestrictedTraverse(img_src[1:]) + except (KeyError, AttributeError, NotFound): + return + # relative path + else: + try: + imageObj = context.unrestrictedTraverse(img_src) + # in case we have a wrong resolveuid/unknown_uid, it raises NotFound + except (KeyError, AttributeError, NotFound): + return + + # maybe we have a ImageScale instead of the real Image object? + if isinstance(imageObj, ImageScale): + imageObj = imageObj.aq_inner.aq_parent + return imageObj + + +def _get_image_blob(imageObj): + """Be defensinve in case this is a wrong with a src + to someting else than an image... """ + blob = None + if hasattr(aq_base(imageObj), 'getBlobWrapper') and imageObj.get_size(): + blob = imageObj.getBlobWrapper() + return blob + + +def _transform_images(context, xhtmlContent, pretty_print=False, transform_type="path"): '''Turn source contained in given p_xhtmlContent to a FileSystem absolute path to the .blob binary stored on the server. This is usefull when generating documents with XHTML containing images that are private, LibreOffice is not able to access these @@ -284,39 +326,23 @@ def imagesToPath(context, xhtmlContent, pretty_print=False): portal = api.portal.get() portal_url = portal.absolute_url() for img in imgs: - # check if it is a local or an external image - img_src = img.attrib.get('src', None) - # wrong without src or external image - if not img_src or (img_src.startswith('http') and not img_src.startswith(portal_url)): + imageObj = _img_from_src(context, img, portal, portal_url) + if imageObj is None: continue - # here, we have an image contained in the portal - # either absolute path (http://...) or relative (../images/myimage.png) - imageObj = None - # absolute path - if img_src.startswith(portal_url): - img_src = img_src.replace(portal_url, '') - try: - # get the image but remove leading '/' - imageObj = portal.unrestrictedTraverse(img_src[1:]) - except (KeyError, AttributeError, NotFound): - continue - # relative path - else: - try: - imageObj = context.unrestrictedTraverse(img_src) - # in case we have a wrong resolveuid/unknown_uid, it raises NotFound - except (KeyError, AttributeError, NotFound): - continue - # maybe we have a ImageScale instead of the real Image object? - if isinstance(imageObj, ImageScale): - imageObj = imageObj.aq_inner.aq_parent + + blob = _get_image_blob(imageObj) + # change img src only if a blob was found blob_path = None - # be defensinve in case this is a wrong with a src to someting else than an image... - if hasattr(aq_base(imageObj), 'getBlobWrapper') and imageObj.get_size(): - blob_path = imageObj.getBlobWrapper().blob._p_blob_committed - # change img src only if a blob_path was found - if blob_path: - img.attrib['src'] = blob_path + if blob: + if transform_type == "path": + blob_path = blob.blob._p_blob_committed + if blob_path: + img.attrib['src'] = blob_path + elif transform_type == "data": + blob_path = blob.blob._p_blob_committed + if blob_path and blob.content_type.startswith('image/'): + img.attrib['src'] = "data:{0};base64,{1}".format( + blob.content_type, base64.b64encode(blob.data)) # use encoding to 'ascii' so HTML entities are translated to something readable return ''.join([lxml.html.tostring(x, @@ -325,6 +351,29 @@ def imagesToPath(context, xhtmlContent, pretty_print=False): method='html') for x in tree.iterchildren()]) +def imagesToPath(context, xhtmlContent, pretty_print=False): + '''Turn source contained in given p_xhtmlContent to a FileSystem absolute path + to the .blob binary stored on the server. This is usefull when generating documents + with XHTML containing images that are private, LibreOffice is not able to access these + images using the HTTP request. + becomes + , + external images are left unchanged. + The image_scale is not kept, so : + becomes + .''' + + return _transform_images(context, xhtmlContent, pretty_print, transform_type="path") + + +def imagesToData(context, xhtmlContent, pretty_print=False): + '''Turn source contained in given p_xhtmlContent to a data:image/png;base64... value. + External images are left unchanged. + The image_scale is not kept, so we get the full image.''' + + return _transform_images(context, xhtmlContent, pretty_print, transform_type="data") + + def storeImagesLocally(context, xhtmlContent, imagePortalType='Image', From fef2ae83a52bf05f76c2b57c9d48d2ef166bd143 Mon Sep 17 00:00:00 2001 From: Gauthier Bastien Date: Thu, 27 May 2021 16:03:49 +0200 Subject: [PATCH 2/3] Completed test_separate_images to test when text contains no image at all --- src/imio/helpers/tests/test_xhtml.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/imio/helpers/tests/test_xhtml.py b/src/imio/helpers/tests/test_xhtml.py index c3203db..3c2e70d 100644 --- a/src/imio/helpers/tests/test_xhtml.py +++ b/src/imio/helpers/tests/test_xhtml.py @@ -731,6 +731,10 @@ def test_object_link(self): u'') def test_separate_images(self): + # no image, content is returned as is + text = '

My text.

My text.

My text.

' + result = separate_images(text) + self.assertEqual(text, result) # one image, nothing changed text = '

' result = separate_images(text) From ae72c5a20ff3ebfd3f896d2246f9e7aba8db5d5a Mon Sep 17 00:00:00 2001 From: Gauthier Bastien Date: Thu, 27 May 2021 16:08:30 +0200 Subject: [PATCH 3/3] Added basic test_setup_logger --- src/imio/helpers/tests/test_security.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/imio/helpers/tests/test_security.py b/src/imio/helpers/tests/test_security.py index fef12c6..4492fff 100644 --- a/src/imio/helpers/tests/test_security.py +++ b/src/imio/helpers/tests/test_security.py @@ -5,6 +5,7 @@ from imio.helpers.security import get_environment from imio.helpers.security import get_user_from_criteria from imio.helpers.security import is_develop_environment +from imio.helpers.security import setup_logger from imio.helpers.testing import IntegrationTestCase from plone import api @@ -58,3 +59,7 @@ def test_get_user_from_criteria(self): self.assertEqual(len(get_user_from_criteria(self.portal, fullname='Stéph')), 1) self.assertEqual(len(get_user_from_criteria(self.portal, email='.be')), 2) self.assertEqual(len(get_user_from_criteria(self.portal, fullname='Smith')), 2) + + def test_setup_logger(self): + # just call it to check that it is not broken + self.assertIsNone(setup_logger())