In [3]:
import os
from PIL import Image
import numpy as np
# Removed 'from collections import Counter' as it wasn't used

def get_color_percentage(image_path, target_colors):
    """
    Calculate the total percentage of pixels matching any of the target colors

    Args:
        image_path (str): Path to the image file
        target_colors (list): List of hex color strings to match ('#RRGGBB')

    Returns:
        float: Percentage of pixels matching any target color (0-100), or -1 on error
    """
    try:
        img = Image.open(image_path)
        # Ensure image has an alpha channel if needed, or convert to RGB
        # Converting directly to RGB might be safer if alpha isn't needed
        if img.mode != 'RGB':
            img = img.convert('RGB')

        img_array = np.array(img)
        # Check if the image array has 3 dimensions (height, width, channels)
        if img_array.ndim != 3 or img_array.shape[2] != 3:
             print(f"Warning: Unexpected image format or shape for {image_path}. Shape: {img_array.shape}. Skipping color check.")
             # Return a value indicating it couldn't be processed correctly for color check
             # Or handle based on how you want to treat non-standard images
             return -1 # Indicate an issue rather than 0%

        pixels = img_array.reshape(-1, 3)

        # Convert target hex colors to RGB tuples (set for faster lookup)
        target_rgb_set = set()
        for hex_color in target_colors:
            hex_color = hex_color.lstrip('#')
            if len(hex_color) == 6: # Ensure it's a valid hex color length
                try:
                    rgb = tuple(int(hex_color[i:i+2], 16) for i in (0, 2, 4))
                    target_rgb_set.add(rgb)
                except ValueError:
                    print(f"Warning: Invalid hex color '{hex_color}' provided.")
            else:
                print(f"Warning: Invalid hex color format '{hex_color}' provided.")

        if not target_rgb_set:
             print(f"Warning: No valid target colors specified for {image_path}")
             return 0 # No colors to match, so 0%

        # Count matching pixels efficiently
        matching_pixels = 0
        # Convert numpy array pixels to tuples for set lookup
        # This can be slow for large images, consider numpy vectorized operations for speed
        pixel_tuples = map(tuple, pixels)
        for p_tuple in pixel_tuples:
             if p_tuple in target_rgb_set:
                 matching_pixels += 1

        total_pixels = len(pixels)
        if total_pixels == 0:
            return 0 # Avoid division by zero for empty images

        return (matching_pixels / total_pixels) * 100

    # More specific exception handling is often better
    except FileNotFoundError:
        print(f"Error: File not found {image_path}")
        return -1 # Indicate specific error
    except Exception as e:
        print(f"Error processing {image_path}: {e}")
        return -1 # Indicate general error

def delete_images_by_color_dominance(directory):
    """
    Delete images within a specific directory based on color dominance rules.
    """
    # Define rules: list of ([colors_to_check], threshold_percentage)
    # Example: Delete if pure white ('#FFFFFF') is more than 4%
    deletion_rules = [
        (['#FFFFFF'], 4)
    ]

    deleted_count = 0
    kept_count = 0
    error_count = 0

    print(f"Processing directory: {directory}")

    # Check if directory exists
    if not os.path.isdir(directory):
        print(f"Error: Directory not found: {directory}")
        return # Stop processing this directory

    for filename in os.listdir(directory):
        # Check for common image extensions
        if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.gif', '.tif', '.tiff')):
            filepath = os.path.join(directory, filename)

            # Ensure it's a file, not a subdirectory mistaken for an image
            if not os.path.isfile(filepath):
                continue

            delete_file = False
            processed_successfully = True

            for colors, threshold in deletion_rules:
                percentage = get_color_percentage(filepath, colors)

                if percentage == -1: # Check for processing error signal
                    print(f"Skipping deletion check for {filename} due to processing error.")
                    error_count += 1
                    processed_successfully = False
                    break # Stop checking rules for this file if error occurred

                # Only proceed with deletion check if percentage is valid and above threshold
                if percentage > threshold:
                    print(f"  - Deleting {filename}: Color(s) {colors} cover {percentage:.2f}% (> {threshold}%)")
                    try:
                        os.remove(filepath)
                        deleted_count += 1
                        delete_file = True
                        break  # Stop checking other rules for this file, it's deleted
                    except Exception as e:
                        print(f"  - Failed to delete {filename}: {e}")
                        error_count += 1 # Count failure to delete as an error/issue
                        processed_successfully = False
                        # Decide if you want to 'break' here or let other rules potentially apply
                        # Breaking seems logical if deletion failed.
                        break

            # If the file was processed without error and not deleted, count it as kept
            if processed_successfully and not delete_file:
                kept_count += 1
                # print(f"  - Keeping {filename}") # Optional: uncomment for verbose output

    print(f"Finished processing {directory}")
    print(f"  Deleted: {deleted_count}")
    print(f"  Kept: {kept_count}")
    if error_count > 0:
        print(f"  Errors/Skipped: {error_count}")
    print("-" * 20) # Separator


# --- Main Execution ---
zdjecia_base_dir = r"C:\Users\karol\Downloads\dane\niezabudowane\zdjecia"
# other_base_dir = r"D:\Pobrane\dane\zabudowane\zdjecia" # Example if needed

print(f"Starting processing in base directory: {zdjecia_base_dir}")
print("=" * 40)

# Check if the base directory exists
if not os.path.isdir(zdjecia_base_dir):
    print(f"Error: Base directory '{zdjecia_base_dir}' not found. Exiting.")
else:
    # Iterate through items (files and folders) in the base directory
    for item_name in os.listdir(zdjecia_base_dir):
        item_path = os.path.join(zdjecia_base_dir, item_name)

        # Check if the item is a directory
        if os.path.isdir(item_path):
            # Call the processing function specifically for this subdirectory
            delete_images_by_color_dominance(item_path)
        # else:
            # Optional: print a message if you want to know about non-directory items
            # print(f"Skipping non-directory item in base folder: {item_name}")

    print("=" * 40)
    print("Overall processing finished.")


Starting processing in base directory: C:\Users\karol\Downloads\dane\niezabudowane\zdjecia
Processing directory: C:\Users\karol\Downloads\dane\niezabudowane\zdjecia\gdansk
  - Deleting cell_10.png: Color(s) ['#FFFFFF'] cover 19.94% (> 4%)
  - Deleting cell_11.png: Color(s) ['#FFFFFF'] cover 29.40% (> 4%)
  - Deleting cell_12.png: Color(s) ['#FFFFFF'] cover 29.40% (> 4%)
  - Deleting cell_13.png: Color(s) ['#FFFFFF'] cover 29.40% (> 4%)
  - Deleting cell_14.png: Color(s) ['#FFFFFF'] cover 13.52% (> 4%)
  - Deleting cell_8.png: Color(s) ['#FFFFFF'] cover 6.60% (> 4%)
  - Deleting cell_9.png: Color(s) ['#FFFFFF'] cover 6.60% (> 4%)
Finished processing C:\Users\karol\Downloads\dane\niezabudowane\zdjecia\gdansk
  Deleted: 7
  Kept: 17
--------------------
Processing directory: C:\Users\karol\Downloads\dane\niezabudowane\zdjecia\warszawa
Finished processing C:\Users\karol\Downloads\dane\niezabudowane\zdjecia\warszawa
  Deleted: 0
  Kept: 26
--------------------
Overall processing finished.
