Skip to content
Branch: master
Find file Copy path
Find file Copy path
typo aa07152 Aug 14, 2019
1 contributor

Users who have contributed to this file

123 lines (86 sloc) 3.24 KB
Idea of using BeautifulSoup from
import json
from io import BytesIO
import os
import tempfile
from bs4 import BeautifulSoup
from PIL import Image
import requests
from pelican import signals
class ImageCache():
CACHE_FOLDER_NAME = "img_cache"
CACHE_FILE_NAME = "cache.json"
# TODO: Move to settings, but what default value to provide if not present?
def __init__(self):
self.cache = {}
self.dirty = False
def load(self, instance):
cache_file = self._get_cache_file_path(instance)
if os.path.exists(cache_file):
with open(cache_file, "r") as file_handle:
self.cache = json.load(file_handle)
def save(self, instance):
if not self.dirty:
cache_file = self._get_cache_file_path(instance)
with open(cache_file, "w") as file_handle:
json.dump(self.cache, file_handle)
def get_image_width_and_height(self, image_path):
if image_path in self.cache.keys():
width, height = self.cache[image_path]
print("Caching '{}'".format(image_path))
request = requests.get(image_path)
image =
width, height = image.size
self.cache[image_path] = (width, height)
self.dirty = True
if width > self.MAX_WIDTH:
# recalculate proportions and restrain image size
original_proportion = width / height
width = self.MAX_WIDTH
height = int(width / original_proportion)
return (width, height)
def _get_cache_file_path(cls, instance):
base_path = os.path.dirname(instance.settings["PATH"])
cache_path = os.path.join(base_path, cls.CACHE_FOLDER_NAME)
if not os.path.exists(cache_path):
print("Created image cache folder '{}'".format(cache_path))
return os.path.join(cache_path, cls.CACHE_FILE_NAME)
def content_object_init(instance):
if instance._content is None:
image_cache = ImageCache()
content = instance._content
soup = BeautifulSoup(content, "html.parser")
for img in soup(["img"]):
# Text or similar containing a `<img>` tag
if not img.get("src"):
# Don't touch base64 encoded images
img_path, _ = os.path.split(img["src"])
if img_path.startswith("data:image"):
# Always define width & height
width, height = image_cache.get_image_width_and_height(img["src"])
img["width"] = img.get("width", width)
img["height"] = img.get("height", height)
# Chrome's native lazy load
img["loading"] = "lazy"
# Good practice
if img.get("alt", "") == img["src"]:
img["alt"] = ""
# Old MCE cleanup
if img.get("mce_src"):
del img["mce_src"]
instance._content = soup.decode()
def register():
You can’t perform that action at this time.