In [1]:
import os
from PIL import Image, UnidentifiedImageError
import numpy as np

In [2]:

ROOT_DATA_DIR = "/media/tairo/Storages/AIProject/AboutModel/TraAI/Data"
IMAGE_EXTENSIONS = ('.png', '.jpg', '.jpeg', '.gif', '.bmp', '.tiff')
ACCEPTABLE_DTYPES = [np.dtype('uint8')] # We are now specifically checking for uint8


In [3]:
def check_all_image_dtypes(start_directory):
    """
    Recursively scans all images under start_directory,
    converts them to NumPy arrays, and checks their dtype.
    Prints warnings for images not resulting in an acceptable dtype (e.g., uint8).
    """
    print(f"Starting image NumPy dtype scan in: {start_directory}")
    print(f"Acceptable NumPy dtypes: {[str(dt) for dt in ACCEPTABLE_DTYPES]}") # Show as strings for readability
    print("-" * 60)

    images_scanned_count = 0
    problematic_images_count = 0

    for dirpath, _, filenames in os.walk(start_directory):
        for filename in filenames:
            if filename.lower().endswith(IMAGE_EXTENSIONS):
                image_path = os.path.join(dirpath, filename)
                images_scanned_count += 1
                try:
                    with Image.open(image_path) as img:
                        # For 'P' mode images with transparency, converting to 'RGBA' first is often safer
                        # before converting to a NumPy array to ensure consistency.
                        # Otherwise, transparency might be handled in ways that change the effective dtype
                        # or shape unexpectedly for some downstream processes if not explicitly converted.
                        # However, for a simple dtype check, this might not always be necessary if you
                        # just want to see the raw loaded dtype from Pillow.
                        # Let's check the mode and convert 'P' to 'RGB' or 'RGBA' as common libraries do.
                        final_img_for_array = img
                        if img.mode == 'P':
                            # print(f"  INFO: Image '{image_path}' (Mode: P) converting to RGBA for NumPy array check.")
                            final_img_for_array = img.convert('RGBA') # Convert palette to RGBA
                        elif img.mode == 'LA':
                            # print(f"  INFO: Image '{image_path}' (Mode: LA) converting to RGBA for NumPy array check.")
                            final_img_for_array = img.convert('RGBA')
                        elif img.mode not in ['RGB', 'RGBA', 'L', '1']: # '1' (binary) loads as bool, converts to uint8 by np.array
                            # Other modes might need specific conversion
                            # print(f"  INFO: Image '{image_path}' (Mode: {img.mode}) attempting generic convert to RGB for NumPy array check.")
                            try:
                                final_img_for_array = img.convert('RGB')
                            except Exception: # If conversion fails, use original for dtype check
                                final_img_for_array = img


                        # Convert the PIL Image to a NumPy array
                        img_array = np.array(final_img_for_array)
                        dtype = img_array.dtype

                        if dtype not in ACCEPTABLE_DTYPES:
                            # If mode '1', np.array(img) might give bool. TensorFlow usually handles this fine as uint8.
                            # So let's add a specific check for bool originating from mode '1'
                            if img.mode == '1' and dtype == np.dtype('bool'):
                                # print(f"  OK (Interpreted): Image '{image_path}' (Mode: 1) -> NumPy dtype: {dtype} (Interprets as uint8 effectively)")
                                pass
                            else:
                                print(f"  WARNING: Image '{image_path}' (Mode: {img.mode}) -> NumPy dtype: {dtype}.")
                                problematic_images_count += 1
                        # else:
                            # print(f"  OK: Image '{image_path}' (Mode: {img.mode}) -> NumPy dtype: {dtype}")

                except UnidentifiedImageError:
                    print(f"  ERROR: Cannot identify image (corrupted or unsupported): {image_path}")
                    problematic_images_count += 1
                except Exception as e:
                    print(f"  ERROR: Could not process {image_path}: {e}")
                    problematic_images_count += 1

    print("\n" + "-" * 60)
    print("Image NumPy dtype scan complete.")
    print(f"Total images scanned: {images_scanned_count}")
    if problematic_images_count > 0:
        print(f"Number of images with non-acceptable NumPy dtypes or errors: {problematic_images_count}")
        print("Please review the WARNING/ERROR messages above.")
    else:
        print("All scanned images, when converted to NumPy arrays, appear to have acceptable dtypes (e.g., uint8).")
    print("-" * 60)

if __name__ == "__main__":
    check_all_image_dtypes(ROOT_DATA_DIR)

Starting image NumPy dtype scan in: /media/tairo/Storages/AIProject/AboutModel/TraAI/Data
Acceptable NumPy dtypes: ['uint8']
------------------------------------------------------------

------------------------------------------------------------
Image NumPy dtype scan complete.
Total images scanned: 9193
All scanned images, when converted to NumPy arrays, appear to have acceptable dtypes (e.g., uint8).
------------------------------------------------------------
