In [1]:
import os
import shutil
from typing import Optional

from PIL import Image
from keras.preprocessing.image import img_to_array

In [2]:
TEST_FOLDER_PATH = 'D:\\KPI\\Bachelor_thesis\\code\\data\\images\\test'
TRAIN_FOLDER_PATH = 'D:\\KPI\\Bachelor_thesis\\code\\data\\images\\train'
VAL_FOLDER_PATH = 'D:\\KPI\\Bachelor_thesis\\code\\data\\images\\val'

RAW_TEST_FOLDER_PATH = 'D:\\KPI\\Bachelor_thesis\\code\\data\\raw_images\\test'
RAW_TRAIN_FOLDER_PATH = 'D:\\KPI\\Bachelor_thesis\\code\\data\\raw_images\\train'
RAW_VAL_FOLDER_PATH = 'D:\\KPI\\Bachelor_thesis\\code\\data\\raw_images\\val'

In [4]:
def filter_and_copy_images(source_path: str, target_path: Optional[str] = None) -> int:
    """
    Filter images in the source directory based on specified criteria and copy them to the target directory.

    Args:
        source_path (str): The path to the source directory containing images.
        target_path (Optional[str]): The path to the target directory where filtered images will be copied.
                                     If None, filtered images will not be copied and only the count will be returned.

    Returns:
        int: The total number of filtered images.
    """

    counter = 0

    if target_path is not None and not os.path.exists(target_path):
        os.makedirs(target_path)

    for filename in os.listdir(source_path):
        file_path = os.path.join(source_path, filename)

        with Image.open(file_path) as image:
            array = img_to_array(image)
            channels = array.shape[-1]
            width, height = image.size

        if channels == 3 and 500 <= width <= 800 and 300 <= height <= 800:
            if target_path:
                target_file_path = os.path.join(target_path, filename)
                if not os.path.exists(target_file_path):
                    shutil.copy(file_path, target_file_path)
            counter += 1

    return counter


In [11]:
print(f"Total filtered images of test folder  = {filter_and_copy_images(RAW_TEST_FOLDER_PATH, TEST_FOLDER_PATH)}")
print(f"Total filtered images of val folder   = {filter_and_copy_images(RAW_VAL_FOLDER_PATH, VAL_FOLDER_PATH)}")

Total filtered images of test folder  = 66232


In [6]:
def filter_and_copy_images(source_path: str, target_path: Optional[str] = None) -> int:
    """
    Filter images in the source directory and its subdirectories based on specified criteria 
    and copy them to the target directory.

    Args:
        source_path (str):           The path to the source directory containing images.
        target_path (Optional[str]): The path to the target directory where filtered images will be copied.
                                     If None, filtered images will not be copied and only the count will be returned.

    Returns:
        int: The total number of filtered images.
    """
    counter = 0

    if target_path is not None and not os.path.exists(target_path):
        os.makedirs(target_path)

    for root, _dirs, files in os.walk(source_path):
        for filename in files:
            file_path = os.path.join(root, filename)

            with Image.open(file_path) as image:
                array = img_to_array(image)
                channels = array.shape[-1]
                width, height = image.size

            if channels == 3 and 500 <= width <= 800 and 300 <= height <= 800:
                if target_path:
                    target_file_path = os.path.join(target_path, filename)
                    if not os.path.exists(target_file_path):
                        shutil.copy(file_path, target_file_path)
                counter += 1

    return counter


In [7]:
print(f"Total filtered images of train folder = {filter_and_copy_images(RAW_TRAIN_FOLDER_PATH, TRAIN_FOLDER_PATH)}")


Total filtered images of train folder = 445929
