In [1]:
import os
import shutil
from PIL import Image
from collections import defaultdict

In [2]:
def handle_invalid_action_images(dataset_path):
    # Store bad images for later analysis
    deleted_folder = os.path.join(dataset_path, '..', 'handle_action_images_bin')
    print(deleted_folder)
    if not os.path.exists(deleted_folder):
        os.makedirs(deleted_folder)

    deleted_strokes_counts = defaultdict(int)
    for foldername, _, filenames in os.walk(dataset_path):
        for filename in filenames:
            file_path = os.path.join(foldername, filename)
            try:
                with Image.open(file_path) as img:
                    img.verify()
                with Image.open(file_path) as img:
                    img = img.convert("RGB")
                    if img.size != (12, 20):
                        raise ValueError(f"Invalid image size: {img.size}. Expected size is (12, 20).")
            except (IOError, SyntaxError, ValueError, AttributeError) as e:
                print(f"Invalid image found: {file_path}. Reason: {e}. Moving to ../handle_action_images_bin.")
                
                relative_path = os.path.relpath(foldername, dataset_path)
                target_folder = os.path.join(deleted_folder, os.path.basename(dataset_path), relative_path)
                
                print(relative_path)
                print(target_folder)
                if not os.path.exists(target_folder):
                    os.makedirs(target_folder)
                
                shutil.move(file_path, os.path.join(target_folder, filename))
                print(f"Moved {filename} to {target_folder}")
                
                stroke_type = os.path.basename(foldername)
                deleted_strokes_counts[stroke_type] += 1 
            
    print('-' * 50)
    print("Finished processing and cleaning the dataset.")
    
    for stroke_type, count in deleted_strokes_counts.items():
        if stroke_type in ['forehand', 'backhand', 'other', 'serve']:
            print(f"{stroke_type}: {count} images deleted")

In [3]:
dataset_path = '../../data/datasets/action_images_dataset_original_v1.0'
handle_invalid_action_images(dataset_path)

../../data/datasets/action_images_dataset_original_v1.0/../handle_action_images_bin
--------------------------------------------------
Finished processing and cleaning the dataset.


In [4]:
#--------------------------------------------------
#Finished processing and cleaning the dataset.
#forehand: 76 images deleted
#other: 247 images deleted
#serve: 151 images deleted
#backhand: 65 images deleted