In [10]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path
import os
from numpy.typing import NDArray
from PIL import Image



In [11]:
def get_image(file_path:Path)-> NDArray|None:
    if os.path.isfile(file_path) and file_name.lower().endswith(".png"):
        image = Image.open(file_path)
        if image.mode == "RGBA":
            image = image.convert("RGB")
            # array = np.array(image)
        return image
    print("returned none")
    return None

def save_webp_img(img:Image,name:str,quality: int) -> None:
    img.save(os.path.join(destinationFolder, f"{name}.webp"),format="WEBP",quality=quality)

def get_image_size(folder:Path,base_name:str,format:str) -> int:
    test_image_path = os.path.join(folder, f"{base_name}.{format}")
    test_image_size = os.path.getsize(test_image_path) / 1000
    return np.around(test_image_size,decimals=2)

def remove_image(folder:Path,base_name:str,format:str):
    file_path = os.path.join(folder, f"{base_name}.{format}")
    os.remove(file_path)

In [12]:
sourceFolder = "cropped_images"
destinationFolder = "compressed_draft"

if not os.path.exists(destinationFolder):
    os.makedirs(destinationFolder)

sorted_sourcefolder_walk = sorted(os.listdir(sourceFolder), key=lambda x: int(x[:-4]))

In [13]:
desired_webp_size_kb = 50


tolerated_file_size_deviation = 2
base_webp_quality= 60
webp_quality_step = 3

webp_qualities_dict : dict[str:int] = {}
webp_max_quality = 100
webp_min_quality = 0



for file_name in sorted_sourcefolder_walk:
    file_path = os.path.join(sourceFolder, file_name)
    image= get_image(file_path)
    base_name = os.path.splitext(file_name)[0]
    png_size = get_image_size(sourceFolder,base_name,'png')
    "Some pictures are too small, meaning even with highest compression ratio they do not reach our desired size on disk"
    if png_size < 3.5* desired_webp_size_kb:
        continue
    "First attempt to compress to desired file size."
    save_webp_img(img=image,name=base_name,quality=base_webp_quality)
    webp_size = get_image_size(destinationFolder,base_name,'webp')
    file_size_one_step_before = 0
    reached_webp_quality = False
    new_webp_quality = base_webp_quality
    while not reached_webp_quality:

        if np.abs(webp_size-desired_webp_size_kb) < tolerated_file_size_deviation:
            webp_qualities_dict[base_name] = new_webp_quality
            image.close()
            print(f"{base_name} was below threshold")
            break

        elif webp_size < desired_webp_size_kb:
            """we should increase webp quality to reach desired file size on disk. """
            new_webp_quality += webp_quality_step
            save_webp_img(img=image,name=base_name,quality=new_webp_quality)
            webp_size = get_image_size(destinationFolder,base_name,'webp')
            "Below statement means if we have overdone the fine tuning and previous step was actually better. restore previous step"
            if np.abs(file_size_one_step_before - desired_webp_size_kb) <= np.abs(webp_size-desired_webp_size_kb):

                if not np.abs(webp_size-desired_webp_size_kb) < tolerated_file_size_deviation:
                    remove_image(destinationFolder,base_name,'webp')
                    break

                print(f"{base_name} with quality {new_webp_quality} overshoot")
                save_webp_img(img=image,name=base_name,quality=new_webp_quality - webp_quality_step)
                webp_qualities_dict[base_name] = new_webp_quality - webp_quality_step
                image.close()
                break
                
            elif new_webp_quality >= 98:

                if not np.abs(webp_size-desired_webp_size_kb) < tolerated_file_size_deviation:
                    remove_image(destinationFolder,base_name,'webp')
                    break

                webp_qualities_dict[base_name] = new_webp_quality
                image.close()
                print(f"{base_name} used highest quality !!!")
                break

            file_size_one_step_before = webp_size

        elif webp_size > desired_webp_size_kb:
            """we should decrease webp quality to reach desired file size on disk. """
            new_webp_quality -= webp_quality_step
            save_webp_img(img=image,name=base_name,quality=new_webp_quality)
            webp_size = get_image_size(destinationFolder,base_name,'webp')

            if np.abs(file_size_one_step_before - desired_webp_size_kb) <= np.abs(webp_size-desired_webp_size_kb):

                if not np.abs(webp_size-desired_webp_size_kb) < tolerated_file_size_deviation:
                    remove_image(destinationFolder,base_name,'webp')
                    break

                print(f"{base_name} with quality {new_webp_quality} overshoot")
                save_webp_img(img=image,name=base_name,quality=new_webp_quality + webp_quality_step)
                webp_qualities_dict[base_name] = new_webp_quality + webp_quality_step
                image.close()
                break
            elif new_webp_quality <= 3:

                if not np.abs(webp_size-desired_webp_size_kb) < tolerated_file_size_deviation:
                    remove_image(destinationFolder,base_name,'webp')
                    break

                webp_qualities_dict[base_name] = new_webp_quality
                image.close()
                print(f"{base_name} used lowest quality !!!")
                break

            file_size_one_step_before = webp_size


1 was below threshold
3 was below threshold
4 was below threshold
5 was below threshold
6 was below threshold
8 was below threshold
10 was below threshold
11 was below threshold
13 was below threshold
15 was below threshold
16 was below threshold
17 was below threshold
18 was below threshold
19 was below threshold
20 was below threshold
21 was below threshold
25 was below threshold
27 was below threshold
28 was below threshold
30 was below threshold
31 was below threshold
33 was below threshold
34 was below threshold
37 was below threshold
38 was below threshold
39 was below threshold
41 was below threshold
43 was below threshold
45 was below threshold
46 was below threshold
47 was below threshold
49 was below threshold
51 was below threshold
53 was below threshold
55 was below threshold
56 was below threshold
57 used highest quality !!!
59 was below threshold
60 was below threshold
61 was below threshold
66 was below threshold
68 was below threshold
71 was below threshold
72 was below

In [14]:
webp_qualities_dict.values()

dict_values([50, 41, 86, 56, 56, 68, 50, 44, 83, 29, 47, 71, 56, 35, 86, 53, 53, 74, 83, 44, 77, 74, 32, 71, 32, 62, 80, 95, 38, 83, 77, 29, 41, 32, 80, 29, 98, 74, 86, 35, 32, 62, 68, 77, 62, 77, 38, 41, 56, 56, 32, 35, 68, 92, 80, 44, 83, 38, 59, 44, 59, 65, 89, 38, 26, 74, 41, 86, 74, 86, 44, 86, 38, 80, 65, 47, 47, 71, 74, 59, 50, 83, 83, 38, 65, 47, 80, 77, 35, 47, 44, 53, 83, 89, 35, 32, 68, 62, 35, 68, 62, 35, 53, 80, 83, 86, 77, 29, 53, 32, 35, 71, 71, 47, 62, 86, 86, 68, 50, 89, 74, 53, 62, 68, 80, 50, 41, 32, 56, 29, 50, 53, 77, 77, 74, 77, 80, 77, 41, 74, 80, 32, 68, 68, 44, 74, 26, 71, 77, 65, 50, 74, 65, 62, 50, 89, 77, 89, 59, 50, 77, 68, 35, 56, 80, 71, 83, 59, 35, 86, 77, 83, 74, 47, 62, 65, 41, 59, 38, 32, 59, 74, 56, 65, 29, 62, 77, 38, 56, 50, 59, 35, 77, 80, 65, 53, 71, 35, 80, 59, 74, 65, 35, 62, 41, 68, 44, 53, 80, 35, 50, 89, 32, 38, 53, 47, 41, 53, 77, 59, 35, 50, 50, 68, 86, 74, 41, 62, 83, 83, 47, 74, 68, 77, 47, 38, 68, 38, 44, 83, 65, 38, 59, 53, 32, 59, 44,