In [2]:
import os
import shutil
import xml.etree.ElementTree as ET

# Paths
vallabelspath = "F:/ImagenetILSVRC/Annotations/DET/val"
valimagespath = "F:/ImagenetILSVRC/Data/DET/val"
val_output_path = "F:/ImagenetILSVRC/FilteredVal"  # Where filtered images will be saved

# Ensure output directory exists
os.makedirs(val_output_path, exist_ok=True)

# Define allowed labels
labels = {
    "n02084071": "Dog",
    "n02274259": "Butterfly",
    "n03337140": "FilingCabinet",
    "n02815834": "Beaker",
    "n03642806": "Laptop",
    "n03958227": "PlasticBag",
    "n07697537": "Hotdog",
    "n07753592": "Banana",
    "n02206856": "Bee",
    "n04557648": "WaterBottle"
}

# Iterate over all annotation files
for annotation_file in os.listdir(vallabelspath):
    if not annotation_file.endswith(".xml"):
        continue  # Skip non-XML files

    annotation_path = os.path.join(vallabelspath, annotation_file)
    
    # Parse XML
    tree = ET.parse(annotation_path)
    root = tree.getroot()

    # Extract object category (some files may contain multiple objects)
    object_elements = root.findall("./object/name")

    # Check if any object in the image belongs to our labels
    matching_objects = [obj.text for obj in object_elements if obj.text in labels]
    
    if matching_objects:  # If at least one object in the image matches our labels
        # Get corresponding image filename
        image_filename = annotation_file.replace(".xml", ".JPEG")
        image_path = os.path.join(valimagespath, image_filename)

        # Determine the label (class) of the image, use the first matching object
        image_label = matching_objects[0]
        
        # Get the label folder name (to organize images by their class)
        label_folder = labels[image_label]
        label_folder_path = os.path.join(val_output_path, label_folder)

        # Create the class folder if it doesn't exist
        os.makedirs(label_folder_path, exist_ok=True)

        # Copy the image to the corresponding label folder
        if os.path.exists(image_path):
            shutil.copy(image_path, os.path.join(label_folder_path, image_filename))
            print(f"Copied: {image_filename} ({', '.join(matching_objects)})")
        else:
            print(f"Warning: Image not found {image_path}")

        # Copy the corresponding XML file to the same folder
        shutil.copy(annotation_path, os.path.join(label_folder_path, annotation_file))
        print(f"Copied: {annotation_file} (XML)")

print("Validation dataset filtering complete!")


Copied: ILSVRC2012_val_00000022.JPEG (n02084071, n02084071, n02084071, n02084071)
Copied: ILSVRC2012_val_00000022.xml (XML)
Copied: ILSVRC2012_val_00000031.JPEG (n02274259)
Copied: ILSVRC2012_val_00000031.xml (XML)
Copied: ILSVRC2012_val_00000057.JPEG (n02084071, n02084071)
Copied: ILSVRC2012_val_00000057.xml (XML)
Copied: ILSVRC2012_val_00000077.JPEG (n02084071)
Copied: ILSVRC2012_val_00000077.xml (XML)
Copied: ILSVRC2012_val_00000115.JPEG (n02084071)
Copied: ILSVRC2012_val_00000115.xml (XML)
Copied: ILSVRC2012_val_00000132.JPEG (n03337140)
Copied: ILSVRC2012_val_00000132.xml (XML)
Copied: ILSVRC2012_val_00000134.JPEG (n02084071, n02084071)
Copied: ILSVRC2012_val_00000134.xml (XML)
Copied: ILSVRC2012_val_00000140.JPEG (n02084071)
Copied: ILSVRC2012_val_00000140.xml (XML)
Copied: ILSVRC2012_val_00000141.JPEG (n02084071)
Copied: ILSVRC2012_val_00000141.xml (XML)
Copied: ILSVRC2012_val_00000147.JPEG (n02084071)
Copied: ILSVRC2012_val_00000147.xml (XML)
Copied: ILSVRC2012_val_00000151.JPE