In [1]:
from PIL import Image, ImageFile
import os
import concurrent.futures
os.chdir('/tf-acno-projects/image-classification/')

In [2]:
data_dir = 'faces_data/'
allowed_extensions = ['.jpeg', '.jpg', '.png', '.bmp']
ImageFile.LOAD_TRUNCATED_IMAGES = True

In [3]:
def is_valid_image(file_path):
    try:
        with Image.open(file_path) as img:
            img.verify()
        return True
    except (IOError, SyntaxError):
        return False

In [4]:
def get_file_extension(file_path):
    return os.path.splitext(file_path)[1].lower()

In [5]:
# Function to process a single image
def process_image(image_path):
    # Check file extension
    if get_file_extension(image_path) not in allowed_extensions:
        print(f"Removed {image_path} due to invalid extension.")
        os.remove(image_path)
        return

    # Verify image integrity
    if not is_valid_image(image_path):
        print(f"Removed {image_path} due to file corruption or invalid format.")
        os.remove(image_path)
        return

    try:
        with Image.open(image_path) as img:
            # Convert to RGB if it's not
            if img.mode in ['P', 'L', 'RGBA']:  # Handle palette, grayscale, and RGBA
                img = img.convert('RGB')
                print(f"Converting {os.path.splitext(image_path)[0]} to RGB")
            img = img.resize((128, 128))
             # Save image without ICC profile to avoid iCCP warnings
            img.save(image_path, format="PNG", icc_profile=None)
            
    except Exception as e:
        print(f"Error processing {image_path}: {str(e)}")
        os.remove(image_path)

In [6]:
# Function to process images in parallel
def process_images_in_parallel():
    image_paths = []
    # Walk through all directories and collect all image paths
    for image_class in os.listdir(data_dir):
        class_dir = os.path.join(data_dir, image_class)
        if not os.path.isdir(class_dir):
            continue
        
        image_paths += [os.path.join(class_dir, image_name) for image_name in os.listdir(class_dir)]
    
    # Process images in parallel
    with concurrent.futures.ProcessPoolExecutor() as executor:
        executor.map(process_image, image_paths)

In [7]:
process_images_in_parallel()
print("Image preprocessing completed.")

Removed faces_data/angry/angry faces - Google Search - 12_8_2024 11-58-33 PM due to invalid extension.
Removed faces_data/shocked/ai-img-extender.svg due to invalid extension.
Removed faces_data/shocked/image (447).svg due to invalid extension.Removed faces_data/shocked/WUNw-C6ySPWdjCJm3sEnXw.webp due to invalid extension.
Removed faces_data/shocked/privacy_checkup_icon.svg due to invalid extension.
Removed faces_data/shocked/image (266).svg due to invalid extension.
Removed faces_data/shocked/image (487).svg due to invalid extension.Removed faces_data/shocked/3p0pNFujIw2ea48dnt5KmDn05W8mTHuRvTgMeFHfVnbRBoWjjtkUKxa1QyAa.webp due to invalid extension.
Removed faces_data/shocked/image (214).svg due to invalid extension.

Removed faces_data/shocked/image (603).svg due to invalid extension.Removed faces_data/shocked/plus.svg due to invalid extension.


Converting faces_data/shocked/p5lVJAicHuI6Ra6jtpYimNt53JZQNCcN06a-Q4fUaNVFo3cjVisZMY_UwBTg to RGB
Removed faces_data/shocked/image (56).svg





Converting faces_data/shocked/DAo-8saTV7FUPPTcvF0ct4XMbh2h1DeauNlipLG5rcggaWBdwh8PL_ZrFOGY to RGBConverting faces_data/shocked/chat-icon-256x224-rhtlxzj6 to RGB

Converting faces_data/shocked/drooling-face-with-heart-eyes-emoji-256x246-k3i3yvm1 to RGBRemoved faces_data/shocked/group-4449.png=c-s40 due to invalid extension.Removed faces_data/shocked/laugh-face-emoji-for-a-chat-vector-44530637.webp due to invalid extension.Converting faces_data/shocked/6nGdwtbmSCuuGF5fSCqvv0f-GOsp927ZXRFxC1NNEqlH-EwAGEqlHXN2rcar (1) to RGB



Removed faces_data/shocked/fa9e0e90d1e7ec399dad9f3257a9bb63.svg due to invalid extension.Removed faces_data/shocked/image (39).svg due to invalid extension.Removed faces_data/shocked/plstujaWSQKM4Szih_FyJA.webp due to invalid extension.
Converting faces_data/shocked/AqBNM_Xi-raRYPTac9ym_mBnCKXULqn7Pgw0UNavMe-0_Qs-A2_y9vSNQGQy to RGB

Removed faces_data/shocked/image (18).svg due to invalid extension.

Removed faces_data/shocked/glue-google-solid-logo (1).svg due t



Removed faces_data/shocked/image (483).svg due to invalid extension.
Converting faces_data/shocked/arrow-from-right-icon-256x211-scc4qc7d to RGBRemoved faces_data/shocked/san-francisco-map-light.webp due to invalid extension.Removed faces_data/shocked/image (306).svg due to invalid extension.



Removed faces_data/shocked/ZlNWCYHqQ7SUAESPy72ikg.webp due to invalid extension.Removed faces_data/shocked/image (219).svg due to invalid extension.


Removed faces_data/shocked/image (145).svg due to invalid extension.Removed faces_data/shocked/image (562).svg due to invalid extension.

Removed faces_data/shocked/image (68).svg due to invalid extension.Removed faces_data/shocked/shocked-face-2b369c1c_0_l.webp due to invalid extension.





Converting faces_data/shocked/shocked-icon-256x256-801ofht1 to RGBRemoved faces_data/shocked/AuE_7DAkQeKp4z1PR3gBUg.webp due to invalid extension.





Converting faces_data/shocked/3xWy7lTFLEzfX5UeLUd3iLKF_oMwOVb4gKlb__yEcimkl1lBhU0n6u3B34zG to RGBConverting faces_data/shocked/message-icon-256x254-fbmki9s3 to RGBRemoved faces_data/shocked/tos_main_illustration.svg due to invalid extension.


Removed faces_data/shocked/image (38).svg due to invalid extension.Converting faces_data/shocked/SP5F8XlkxjIfM3uEu47BolKEBwkqWrOfyvwywHut48p0AZgedzyhcoevSaJC (1) to RGBConverting faces_data/shocked/painfully-shocked-face-emoji-144x144-1e0muhux to RGB
Removed faces_data/shocked/4f19891c43001db11efc8048f9bc7cdb.svg due to invalid extension.

Removed faces_data/shocked/stock-photo-photographer-AchillesReel-27671953_36-png-pagesp.png due to file corruption or invalid format.

Removed faces_data/shocked/image (274).svg due to invalid extension.Converting faces_data/shocked/u2XGSr0jis3w5sLeuh8UMqGHgtdqPVPi77xYhPJdMO9C41wYUue3EKPJvwp- (1) to RGBRemoved faces_data/shocked/image (294).svg due to invalid extension.

Converting faces_data/shocked/googlelo



Converting faces_data/shocked/videogame-icon-256x215-dmjvd9sq to RGB
Removed faces_data/shocked/image (60).svg due to invalid extension.

Removed faces_data/shocked/image (36).svg due to invalid extension.Removed faces_data/shocked/postmark.png due to file corruption or invalid format.

Removed faces_data/shocked/DQ9VVZbqR_muPB8wRZWS5Q.webp due to invalid extension.Converting faces_data/shocked/s0RYbwmQqRo2R4S0cKGT5Z7pSaSyG-NbI2SH62esgSZ_axFPPs18v0LRqtaC to RGB
Converting faces_data/shocked/header-link-ai-img-extender to RGBRemoved faces_data/shocked/lazy-bg.png due to file corruption or invalid format.
Removed faces_data/shocked/JDFOyo903E9WGstK0YhI2ZFOKR3h4qDxBngX5M8XJVBZFKzOBoxLmk3OVlgN.webp due to invalid extension.Removed faces_data/shocked/image (138).svg due to invalid extension.
Removed faces_data/shocked/image (564).svg due to invalid extension.

Removed faces_data/shocked/sUcH5IIITlexVKpsDv4Xbw.webp due to invalid extension.
Removed faces_data/shocked/f53a065feaef6edf1c0da97b



Removed faces_data/shocked/fkErRzP8RKe6_rNPqY6vAA.webp due to invalid extension.Removed faces_data/shocked/image (158).svg due to invalid extension.
Converting faces_data/shocked/camera-slr-icon-255x256-vzoys3mm to RGB
Converting faces_data/shocked/Ucxl6g9AKLX3XmK7an_99LzivIJsXn5cvQdIMM_g4nNFZdULnGa4TH45WVlF (1) to RGB
Converting faces_data/shocked/QsFLnA2p7QlFCy4Rk-TH0XoaWFilCOzzt8FPO58nI_FXh5wQkjWEMpBKMkJx (1) to RGBRemoved faces_data/shocked/image (92).svg due to invalid extension.Removed faces_data/shocked/image (249).svg due to invalid extension.
Removed faces_data/shocked/V_9Ty3lJtalC4u9PFIdOo1IQ53n8-mNT4Xmd5Cf67erShp64sYCjzmYr_Tbo.webp due to invalid extension.



Removed faces_data/shocked/image (162).svg due to invalid extension.
Removed faces_data/shocked/image (351).svg due to invalid extension.Removed faces_data/shocked/image (357).svg due to invalid extension.

Converting faces_data/shocked/app-img_goal-seek-for-sheets to RGBRemoved faces_data/shocked/360_F_182877688_pIetT



Removed faces_data/shocked/image (332).svg due to invalid extension.
Removed faces_data/shocked/image (621).svg due to invalid extension.Removed faces_data/shocked/image (153).svg due to invalid extension.Converting faces_data/shocked/friendlystock-logo-sm to RGB



Converting faces_data/shocked/cloud-phone-icon-256x256-q23ym8tf to RGBRemoved faces_data/shocked/btn-down-shadow.svg due to invalid extension.
Converting faces_data/shocked/header-link-ai-writer to RGBRemoved faces_data/shocked/image (275).svg due to invalid extension.

Removed faces_data/shocked/image (271).svg due to invalid extension.

Removed faces_data/shocked/image (267).svg due to invalid extension.Removed faces_data/shocked/smileysad-emoticon-yellow-face-with-emotions-vector-15022732.webp due to invalid extension.

Removed faces_data/shocked/image (264).svg due to invalid extension.Removed faces_data/shocked/image (590).svg due to invalid extension.Converting faces_data/shocked/7BR1vvvQSnzGibLkyR89xVw9SKyQQsfQkQq9Jc



Converting faces_data/shocked/DaaQa-Y-b3_IAhu6SBFb2vRl8PFR5iuCLwLszc16_OTlLrEFvFF9P4CS0ui- to RGB
Converting faces_data/shocked/tags-icon-256x203-kgl24oux to RGB
Converting faces_data/shocked/inbox-fill-icon-256x161-ht8k77v3 to RGB
Converting faces_data/shocked/6cr6PdE9s0J1ovFNm38uf-dwcOP--68QMWey603BCUah-QcO0gL0TvyqmTBY (1) to RGB
Removed faces_data/shocked/6Ij3BrGOfLbkwxdB_SSX8gEvOx68QQC9IPzhmZLddxbklr1yzblPj8MYEf2X.webp due to invalid extension.Removed faces_data/shocked/emoji-raises-his-hands-and-asks-a-question-vector-33360101.webp due to invalid extension.


Removed faces_data/shocked/image (394).svg due to invalid extension.Removed faces_data/shocked/respect_others_illustration.svg due to invalid extension.

Removed faces_data/shocked/san-francisco-map-dark.webp due to invalid extension.Removed faces_data/shocked/Gif-240-banner-thing-1.gif due to invalid extension.
Removed faces_data/shocked/39b031d352a2e1586cf50ac7f2bbc18b.svg due to invalid extension.

Converting faces_data/sh