In [22]:
import mimetypes
import pathlib
import shutil
import tempfile
from PIL import Image, ImageOps


In [23]:
NOTEBOOKS_DIR = pathlib.Path().resolve()
REPO_DIR = NOTEBOOKS_DIR.parent
DATA_DIR = REPO_DIR / "data"
INPUTS_DIR = DATA_DIR / "inputs"
READY_DIR = DATA_DIR / "ready"
OUTPUTS_DIR = DATA_DIR / "outputs"

In [24]:
def perform_is_image(path, require_can_open=True):
    try:
        mime_type, _ = mimetypes.guess_type(path)
    except:
        mime_type = ""

    if not mime_type.startswith("image"):
        return False
    if require_can_open:
        try:
            Image.open(path)
        except:
            return False
    return True

In [25]:
def clear_and_optimise_image(image_path, output_path, max_size=(1920, 1920)):
    """
    Removes all metadata from an image (e.g. EXIF data).
    Optimizes the image file size while preserving quality and transparency when needed.
    """
    # Convert to Path objects
    image_path = pathlib.Path(image_path)
    output_path = pathlib.Path(output_path)
    
    # Open and create clean copy
    original = Image.open(image_path)

    # Determine if image has transparency
    has_transparency = (
        original.mode in ('RGBA', 'LA') or 
        (original.mode == 'P' and 'transparency' in original.info)
    )
    
    # Auto-rotate based on EXIF
    original = ImageOps.exif_transpose(original)

    # Resize if larger than max_size while maintaining aspect ratio
    if original.size[0] > max_size[0] or original.size[1] > max_size[1]:
        original.thumbnail(max_size, Image.Resampling.LANCZOS)

    # Convert mode based on transparency
    if has_transparency:
        if original.mode != 'RGBA':
            original = original.convert('RGBA')
        best_format = 'PNG'
    else:
        if original.mode in ('RGBA', 'P', 'LA'):
            original = original.convert('RGB')
        best_format = 'JPEG'

    # Save with optimized settings
    save_kwargs = {}
    if best_format == 'JPEG':
        save_kwargs.update({
            'quality': 85,
            'optimize': True,
            'progressive': True
        })
        output_path = output_path.with_suffix('.jpg')
    elif best_format == 'PNG':
        save_kwargs.update({
            'optimize': True,
            'compress_level': 6
        })
        output_path = output_path.with_suffix('.png')
    print(f'Saving {output_path}')
    original.save(output_path, format=best_format, **save_kwargs)
    return output_path

In [26]:
image_file_paths = []
for file_path in INPUTS_DIR.glob("*"):
    is_image = perform_is_image(file_path)
    if not is_image:
        continue
    start_output_path = READY_DIR / file_path.name
    final_output_path = clear_and_optimise_image(file_path, start_output_path)
    image_file_paths.append(final_output_path)

Saving /root/source/imagenation-api/data/ready/2.jpg
Saving /root/source/imagenation-api/data/ready/4.jpg
Saving /root/source/imagenation-api/data/ready/5.jpg
Saving /root/source/imagenation-api/data/ready/3.jpg
Saving /root/source/imagenation-api/data/ready/6.jpg
Saving /root/source/imagenation-api/data/ready/10.jpg
Saving /root/source/imagenation-api/data/ready/8.jpg
Saving /root/source/imagenation-api/data/ready/9.jpg
Saving /root/source/imagenation-api/data/ready/1.jpg
Saving /root/source/imagenation-api/data/ready/7.jpg


In [27]:
OUTPUTS_DIR.mkdir(exist_ok=True, parents=True)
zip_out_path = OUTPUTS_DIR / "images-optimized.zip"

In [28]:
with tempfile.TemporaryDirectory() as tmp_dir:
    for image_file_path in image_file_paths:
        shutil.copy(image_file_path, tmp_dir)

    shutil.make_archive(zip_out_path.with_suffix(""), "zip", tmp_dir)
    print(f"Zipped images to {zip_out_path}")

Zipped images to /root/source/imagenation-api/data/outputs/images-optimized.zip
