Don’t duplicate images drawn with multiple dpi ratios

When the same image is displayed twice, with two different sizes, we now only store the image once, with the maximum size required. Smaller images are then drawn with a better resolution than expected, but the PDF is obviously smaller than storing the low-quality version in addition of the high-quality one. Fix #1877.
Kozea · May 15, 2023 · 33892cd · 33892cd
1 parent be72f5c
commit 33892cd
Show file tree

Hide file tree

Showing 3 changed files with 34 additions and 22 deletions.
diff --git a/weasyprint/images.py b/weasyprint/images.py
@@ -93,7 +93,7 @@ def draw(self, stream, concrete_width, concrete_height, image_rendering):
         if self.width <= 0 or self.height <= 0:
             return
 
-        interpolate = 'true' if image_rendering == 'auto' else 'false'
+        interpolate = image_rendering == 'auto'
         ratio = 1
         if self._dpi:
             pt_to_in = 4 / 3 / 96
@@ -115,7 +115,20 @@ def cache_image_data(self, data, filename=None, alpha=False):
             key = f'{self.id}{int(alpha)}{self._dpi or ""}'
             return LazyImage(self._cache, key, data)
 
-    def get_x_object(self, width, height, interpolate):
+    def get_x_object(self, interpolate, dpi_ratio):
+        if dpi_ratio == 1:
+            width, height = self.width, self.height
+        else:
+            thumbnail = Image.open(io.BytesIO(self.image_data.data))
+            width = max(1, int(round(self.width * dpi_ratio)))
+            height = max(1, int(round(self.height * dpi_ratio)))
+            thumbnail.thumbnail((width, height))
+            image_file = io.BytesIO()
+            thumbnail.save(
+                image_file, format=thumbnail.format, optimize=self.optimize)
+            width, height = thumbnail.width, thumbnail.height
+            self.image_data = self.cache_image_data(image_file.getvalue())
+
         if self.mode in ('RGB', 'RGBA'):
             color_space = '/DeviceRGB'
         elif self.mode in ('L', 'LA'):
@@ -133,7 +146,7 @@ def get_x_object(self, width, height, interpolate):
             'Height': height,
             'ColorSpace': color_space,
             'BitsPerComponent': 8,
-            'Interpolate': interpolate,
+            'Interpolate': 'true' if interpolate else 'false',
         })
 
         if self.format == 'JPEG':
@@ -175,7 +188,7 @@ def get_x_object(self, width, height, interpolate):
                 'Height': height,
                 'ColorSpace': '/DeviceGray',
                 'BitsPerComponent': 8,
-                'Interpolate': interpolate,
+                'Interpolate': 'true' if interpolate else 'false',
             })
         else:
             png_data = self._get_png_data(

diff --git a/weasyprint/pdf/__init__.py b/weasyprint/pdf/__init__.py
@@ -62,12 +62,19 @@ def _use_references(pdf, resources, images):
     for key, x_object in resources.get('XObject', {}).items():
         # Images
         if x_object is None:
-            x_object = images[key]
-            if x_object.number is not None:
+            image_data = images[key]
+            x_object = image_data['x_object']
+
+            if x_object is not None:
                 # Image already added to PDF
                 resources['XObject'][key] = x_object.reference
                 continue
 
+            image = image_data['image']
+            dpi_ratio = max(image_data['dpi_ratios'])
+            x_object = image.get_x_object(image_data['interpolate'], dpi_ratio)
+            image_data['x_object'] = x_object
+
         pdf.add_object(x_object)
         resources['XObject'][key] = x_object.reference
 

diff --git a/weasyprint/pdf/stream.py b/weasyprint/pdf/stream.py
@@ -8,7 +8,6 @@
 from fontTools import subset
 from fontTools.ttLib import TTFont, TTLibError, ttFont
 from fontTools.varLib.mutator import instantiateVariableFont
-from PIL import Image
 
 from ..logger import LOGGER
 from ..matrix import Matrix
@@ -363,26 +362,19 @@ def add_group(self, x, y, width, height):
         return group
 
     def add_image(self, image, interpolate, ratio):
-        width, height = image.width, image.height
-        image_name = f'i{image.id}{width}{height}{interpolate}{ratio}'
+        image_name = f'i{image.id}{int(interpolate)}'
         self._x_objects[image_name] = None  # Set by write_pdf
         if image_name in self._images:
             # Reuse image already stored in document
+            self._images[image_name]['dpi_ratios'].add(ratio)
             return image_name
 
-        if ratio != 1:
-            thumbnail = Image.open(io.BytesIO(image.image_data.data))
-            width = int(round(image.width * ratio))
-            height = int(round(image.height * ratio))
-            thumbnail.thumbnail((max(1, width), max(1, height)))
-            image_file = io.BytesIO()
-            thumbnail.save(
-                image_file, format=thumbnail.format, optimize=image.optimize)
-            width, height = thumbnail.width, thumbnail.height
-            image.image_data = image.cache_image_data(image_file.getvalue())
-
-        x_object = image.get_x_object(width, height, interpolate)
-        self._images[image_name] = x_object
+        self._images[image_name] = {
+            'image': image,
+            'interpolate': interpolate,
+            'dpi_ratios': {ratio},
+            'x_object': None,  # Set by write_pdf
+        }
         return image_name
 
     def add_pattern(self, x, y, width, height, repeat_width, repeat_height,