In [3]:
# for each file in each subfolder of datasets, remove in the name the substring "_annotated"
import os

def rename_files_in_subfolders(base_dir, substring_to_remove):
    # Walk through each folder and subfolder in the base directory
    for root, dirs, files in os.walk(base_dir):
        for file in files:
            if substring_to_remove in file:
                # Build the old and new file paths
                old_file_path = os.path.join(root, file)
                new_file_name = file.replace(substring_to_remove, "")
                new_file_path = os.path.join(root, new_file_name)

                # Rename the file
                os.rename(old_file_path, new_file_path)
                print(f"Renamed: {old_file_path} to {new_file_path}")

# Example usage:
base_directory = 'datasets'  # Change this to your path
substring = '_annotated'

rename_files_in_subfolders(base_directory, substring)

Renamed: datasets/winti/annotated_images/frame000508_annotated.png to datasets/winti/annotated_images/frame000508.png
Renamed: datasets/winti/annotated_images/frame000458_annotated.png to datasets/winti/annotated_images/frame000458.png
Renamed: datasets/winti/annotated_images/frame000190_annotated.png to datasets/winti/annotated_images/frame000190.png
Renamed: datasets/winti/annotated_images/frame000371_annotated.png to datasets/winti/annotated_images/frame000371.png
Renamed: datasets/winti/annotated_images/frame000221_annotated.png to datasets/winti/annotated_images/frame000221.png
Renamed: datasets/winti/annotated_images/frame000187_annotated.png to datasets/winti/annotated_images/frame000187.png
Renamed: datasets/winti/annotated_images/frame000613_annotated.png to datasets/winti/annotated_images/frame000613.png
Renamed: datasets/winti/annotated_images/frame000743_annotated.png to datasets/winti/annotated_images/frame000743.png
Renamed: datasets/winti/annotated_images/frame000366_ann

In [5]:
import os

def clean_images_and_labels(base_dir):
    # Walk through each subfolder of datasets
    for subfolder in os.listdir(base_dir):
        subfolder_path = os.path.join(base_dir, subfolder)
        if not os.path.isdir(subfolder_path):
            continue

        # Define paths for annotated, images, and labels
        annotated_dir = os.path.join(subfolder_path, 'annotated_images')
        images_dir = os.path.join(subfolder_path, 'images')
        labels_dir = os.path.join(subfolder_path, 'labels')

        # Ensure annotated, images, and labels folders exist
        if not (os.path.exists(annotated_dir) and os.path.exists(images_dir) and os.path.exists(labels_dir)):
            print(f"Skipping {subfolder}, missing folders.")
            continue

        # Get a set of image names in the annotated folder (without the .png extension)
        annotated_files = {os.path.splitext(f)[0] for f in os.listdir(annotated_dir) if f.endswith('.png')}

        # Remove files from the images folder that are not in annotated
        for image_file in os.listdir(images_dir):
            image_name = os.path.splitext(image_file)[0]
            if image_name not in annotated_files:
                file_path = os.path.join(images_dir, image_file)
                os.remove(file_path)
                print(f"Removed image: {file_path}")

        # Remove files from the labels folder that are not in annotated
        for label_file in os.listdir(labels_dir):
            label_name = os.path.splitext(label_file)[0]
            if label_name not in annotated_files:
                file_path = os.path.join(labels_dir, label_file)
                os.remove(file_path)
                print(f"Removed label: {file_path}")

# Example usage:
base_directory = 'datasets'  # Path to your datasets folder
clean_images_and_labels(base_directory)

Removed image: datasets/winti/images/frame000815.png
Removed image: datasets/winti/images/frame000801.png
Removed image: datasets/winti/images/frame001289.png
Removed image: datasets/winti/images/frame000829.png
Removed image: datasets/winti/images/frame001276.png
Removed image: datasets/winti/images/frame001262.png
Removed image: datasets/winti/images/frame001074.png
Removed image: datasets/winti/images/frame000578.png
Removed image: datasets/winti/images/frame001100.png
Removed image: datasets/winti/images/frame001114.png
Removed image: datasets/winti/images/frame000544.png
Removed image: datasets/winti/images/frame001128.png
Removed image: datasets/winti/images/frame000550.png
Removed image: datasets/winti/images/frame000961.png
Removed image: datasets/winti/images/frame000785.png
Removed image: datasets/winti/images/frame000791.png
Removed image: datasets/winti/images/frame000949.png
Removed image: datasets/winti/images/frame001302.png
Removed image: datasets/winti/images/frame0014

In [8]:
import os

def check_consistency_in_folders(base_dir):
    # Walk through each subfolder of datasets
    for subfolder in os.listdir(base_dir):
        subfolder_path = os.path.join(base_dir, subfolder)
        if not os.path.isdir(subfolder_path):
            continue

        # Define paths for annotated, images, and labels
        annotated_dir = os.path.join(subfolder_path, 'annotated_images')
        images_dir = os.path.join(subfolder_path, 'images')
        labels_dir = os.path.join(subfolder_path, 'labels')

        # Ensure annotated, images, and labels folders exist
        if not (os.path.exists(annotated_dir) and os.path.exists(images_dir) and os.path.exists(labels_dir)):
            print(f"Skipping {subfolder}, missing folders.")
            continue

        # Get a set of filenames (without extensions) in each folder
        annotated_files = {os.path.splitext(f)[0] for f in os.listdir(annotated_dir) if f.endswith('.png')}
        image_files = {os.path.splitext(f)[0] for f in os.listdir(images_dir) if f.endswith('.png')}
        label_files = {os.path.splitext(f)[0] for f in os.listdir(labels_dir) if f.endswith('.txt')}

        # Check if all filenames are consistent across folders
        if annotated_files == image_files == label_files:
            print(f"All files are consistent in {subfolder}.")
        else:
            print(f"Inconsistency found in {subfolder}:")
            print(f"Annotated: {annotated_files}")
            print(f"Images: {image_files}")
            print(f"Labels: {label_files}")
            print("")

# Example usage:
base_directory = 'datasets'  # Path to your datasets folder
check_consistency_in_folders(base_directory)

All files are consistent in winti.
All files are consistent in kaggle.
All files are consistent in my_vid.
