In [None]:
from PIL import Image, ImageFile
import os

In [None]:
allowed_extensions = ['.jpeg', '.jpg', '.png', '.bmp']
ImageFile.LOAD_TRUNCATED_IMAGES = True

In [None]:
def is_valid_image(file_path):
    try:
        with Image.open(file_path) as img:
            img.verify()
        return True
    except:
        return False

In [None]:
def get_file_extension(file_path):
    return os.path.splitext(file_path)[1].lower()

In [None]:
def process_images():
    for image_class in os.listdir(data_dir):
        class_dir = os.path.join(data_dir, image_class)
        if not os.path.isdir(class_dir):
            continue

        for image_name in os.listdir(class_dir):
            image_path = os.path.join(class_dir, image_name)
            
            # Check file extension
            if get_file_extension(image_path) not in allowed_extensions:
                print(f"Removed {image_path} due to invalid extension.")
                os.remove(image_path)
                continue

            # Verify image integrity
            if not is_valid_image(image_path):
                print(f"Removed {image_path} due to file corruption or invalid format.")
                os.remove(image_path)
                continue

            try:
                with Image.open(image_path) as img:
                    # Convert to RGB if it's not
                    if img.mode != 'RGB':
                        img = img.convert('RGB')
                    
                    # Save as PNG
                    new_path = os.path.splitext(image_path)[0] + '.png'
                    img.save(new_path, "PNG")
                    
                    # Remove the original file if it's not PNG
                    if image_path != new_path:
                        os.remove(image_path)
                        print(f"Converted {image_path} to PNG.")
            except Exception as e:
                print(f"Error processing {image_path}: {str(e)}")
                os.remove(image_path)

In [None]:
process_images()
print("Image preprocessing completed.")