In [32]:
import os
import logging
from tqdm import tqdm
from PIL import Image

In [33]:
#logging settings
logging.basicConfig(
    level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
)

In [34]:
def check_is_dir(path):
    if not os.path.isdir(path):
        raise ValueError(f"Provided path: {path} is not a directory.")
    return True

In [35]:
def filter_images(list_of_files):
    valid_extensions = {'.jpg', '.jpeg', '.png','.webp'}
    return [file for file in list_of_files if any(file.endswith(ext) for ext in valid_extensions)]

In [36]:
def load_image(image_path):
    image=Image.open(image_path).convert('RGB')
    return image

In [37]:
def read_images_from_dir(dir_path):
    check_is_dir(dir_path)
    files = os.listdir(dir_path)
    image_files = filter_images(files)
    image_paths = [os.path.join(dir_path, file) for file in image_files]
    images = [load_image(image_path) for image_path in tqdm(image_paths)]
    logging.info(f"Load {len(images)} images from {dir_path}")
    return images

In [38]:
loaded_images=read_images_from_dir("raw_images/bird")

100%|██████████| 229/229 [00:02<00:00, 83.12it/s] 
2024-06-10 14:51:29,499 - INFO - Load 229 images from raw_images/bird


In [39]:
def max_resolution_rescale(image, max_width, max_height):
    width, height = image.size
    if width > max_width or height > max_height:
        ratio = min(max_width/width, max_height/height)
        new_width = int(width * ratio)
        new_height = int(height*ratio)
        image = image.resize((new_width,new_height), Image.LANCZOS)
    return image

In [40]:
max_size = 1024

resized_images = list(map(lambda x: max_resolution_rescale(x,max_size,max_size), tqdm(loaded_images)))

100%|██████████| 229/229 [00:02<00:00, 78.33it/s]


In [41]:
def min_resolution_filter(image,min_width, min_height):
    width, height = image.size
    return width>= min_width and height>= min_height

In [42]:
min_size = 224
filter_images = list(filter(lambda x: max_resolution_rescale(x,min_size,min_size), tqdm(resized_images)))

100%|██████████| 229/229 [00:00<00:00, 233.97it/s]
