In [None]:
import os
from PIL import Image

# Directories for images and labels
img_dir = 'train_images_noise/'
label_dir = 'train_labels_noise/'

# First, remove corrupted image files and their corresponding labels

# Iterate over each file in the image directory
for img_file in os.listdir(img_dir):
    try:
        # Try to open and load the image
        img = Image.open(os.path.join(img_dir, img_file))
        img.verify()  # verify that it is, in fact an image
    except (IOError, SyntaxError) as e:
        print('Bad file:', img_file)
        # If an error is encountered, remove the image file
        os.remove(os.path.join(img_dir, img_file))

        # If a corresponding label file exists, remove it as well
        label_file = os.path.join(
            label_dir, img_file.rsplit('.', 1)[0] + '.txt')
        if os.path.exists(label_file):
            os.remove(label_file)

print('Finished removing corrupted files.')

# Then, remove orphaned image or label files

# Get list of image and label file names without extensions
img_files = {os.path.splitext(file)[0] for file in os.listdir(img_dir)}
label_files = {os.path.splitext(file)[0] for file in os.listdir(label_dir)}

# Find orphaned image files (files with no matching label)
for img_file in img_files - label_files:
    print(f"Deleting orphaned image file: {img_file}.jpg")
    os.remove(os.path.join(img_dir, f"{img_file}.jpg"))

# Find orphaned label files (files with no matching image)
for label_file in label_files - img_files:
    print(f"Deleting orphaned label file: {label_file}.txt")
    os.remove(os.path.join(label_dir, f"{label_file}.txt"))

print('Finished removing orphaned files.')
