# Image Pixel Downsizer Tool

* Author: docai-incubator@google.com

## Disclaimer

This tool is not supported by the Google engineering team or product team. It is provided and supported on a best-effort basis by the **DocAI Incubator Team**. No guarantees of performance are implied.

# Objective
This tool is created to scale down images that exceed 40 million pixels to a smaller size. This adjustment ensures compatibility with all document AI processors, adhering to the maximum image resolution restriction of 40 million pixels as specified in the content limits at https://cloud.google.com/document-ai/quotas#content_limits. By reducing the image size to below 40 million pixels, the images become suitable for processing with the document AI parser.

# Prerequisites
* Vertex AI Notebook
* GCS Folder Path

# Step-by-Step Procedure

## 1. Import Modules/Packages

In [None]:
import io
from typing import Union

from google.cloud import storage
from PIL import Image

## 2. Input Details

In [None]:
INPUT_FOLDER = "gs://bucket_name/testing_2024/input"  # Input Images Folder path
OUTPUT_FOLDER = "gs://bucket_name/testing_2024/output"  # Output Image Folder path

## 3. Run Below Code-Cells

In [None]:
def resize_image(
    image: Image.Image, image_name: str, max_megapixels: int = 40
) -> Union[Image.Image, None]:
    """It will resize image from >40 MegaPixel to <40 MegaPixel

    Args:
        image (Image.Image): Image data in Pillow image-object
        image_name (str): GCS uri of an image
        max_megapixels (int, optional): Desired mega pixels value. Defaults to 40.

    Returns:
        Union[Image.Image, None]: Returns resized image which is <40 MegaPixels
    """

    width, height = image.size
    # 1 megapixel = 1,000,000 pixels
    megapixels = (width * height) / 1e6
    if megapixels > max_megapixels:
        print(f"{image_name} is {megapixels:.3f} megapixels.\n\tDownsizing...")
        aspect_ratio = width / height
        new_width = int((max_megapixels * 1e6 * aspect_ratio) ** 0.5)
        new_height = int((max_megapixels * 1e6 / aspect_ratio) ** 0.5)
        resized_img = image.resize((new_width, new_height))
        return resized_img
    else:
        print(f"{image_name} is {megapixels:.3f} megapixels.\n\tSkipped.")
        return None


def process_images(input_folder: str, output_folder: str) -> None:
    """
    It will iterate through all images in provide inout folder,
    resize them if require and stores in output folder

    Args:
        input_folder (str): GCS input uri which holds image files
        output_folder (str): GCS uri to store resized images
    """

    client = storage.Client()
    bucket_name = input_folder.split("/")[2]
    prefix = "/".join(input_folder.split("/")[3:]) + "/"
    bucket = client.get_bucket(bucket_name)
    blobs = bucket.list_blobs(prefix=prefix)

    for blob in blobs:
        if blob.name.lower().endswith((".jpg", ".jpeg", ".png")):
            blob_data = blob.download_as_bytes()
            image = Image.open(io.BytesIO(blob_data))
            resized_image = resize_image(image, blob.name)
            if resized_image:
                img_byte_arr = io.BytesIO()
                resized_image.save(img_byte_arr, format=image.format)
                img_byte_arr = img_byte_arr.getvalue()
            else:
                img_byte_arr = blob_data

            if blob.name.lower().endswith(".png"):
                content_type = "image/png"
            elif blob.name.lower().endswith(".jpg") or blob.name.lower().endswith(
                ".jpeg"
            ):
                content_type = "image/jpeg"
            else:
                content_type = "application/octet-stream"  # Fallback MIME type

            output_blob_name = blob.name.replace(
                prefix, "/".join(output_folder.split("/")[3:]) + "/"
            )
            output_blob = bucket.blob(output_blob_name)
            # Saving file to GCS
            output_blob.upload_from_string(img_byte_arr, content_type=content_type)


process_images(INPUT_FOLDER, OUTPUT_FOLDER)

# 4. Output Details

Refer below image sizes, before and after downsizing

<b> Without Downsizing</b>  
<img src="./images/input_sample.png" width=600 height=600 alt="Without_downsizing">

<b>After Resizing</b>  
<img src="./images/output_sample.png" width=600 height=600 alt="after_resizing">