In [1]:
# Script takes nested folders of images and associated json files and renames them
    # renaming is chronological via numbers
from pathlib import Path
import os

In [2]:
# --- CONFIGURATION ---
root_directory = Path("/Users/natalyagrokh/AI/ml_expressions/img_datasets/pexels_dataset_archive/pexels_scraped copy")

In [3]:
print("Scanning directories...")

# Get all subdirectories and sort them alphabetically for consistent ordering
sub_dirs = sorted([d for d in root_directory.iterdir() if d.is_dir()])

all_file_pairs = []
for sub_dir in sub_dirs:
    # Helper function to extract number from filename for proper sorting
    def get_image_number(file_path):
        try:
            # Assumes format "image_123"
            return int(file_path.stem.split('_')[1])
        except (ValueError, IndexError):
            # Return a large number for any file that doesn't match the pattern
            return float('inf')

    # Find all image files
    all_images_in_dir = list(sub_dir.glob("*.jpeg")) + list(sub_dir.glob("*.jpg")) + list(sub_dir.glob("*.png"))
    
    # Sort the files numerically based on the number in the filename
    image_files = sorted(all_images_in_dir, key=get_image_number)
    
    for image_path in image_files:
        json_path = image_path.with_suffix('.json')
        if json_path.exists():
            all_file_pairs.append((image_path, json_path))

if not all_file_pairs:
    print("No image/JSON pairs found to renumber.")
else:
    print(f"Found {len(all_file_pairs)} image/JSON pairs to be renumbered.")

Scanning directories...
Found 23995 image/JSON pairs to be renumbered.


In [4]:
try:
    first_image_name = all_file_pairs[0][0].stem
    number_part = first_image_name.split('_')[1]
    padding_width = len(number_part)
except IndexError:
    # Default to 4 if padding can't be determined from the first file
    padding_width = 4
    
print(f"Detected filename padding of {padding_width} digits (e.g., image_{1:0{padding_width}d}).")

Detected filename padding of 1 digits (e.g., image_1).


In [5]:
print("--- Starting Renaming Process ---")

counter = 1
renamed_count = 0
for image_path, json_path in all_file_pairs:
    new_base_name = f"image_{counter:0{padding_width}d}"
    
    new_image_path = image_path.with_stem(new_base_name)
    new_json_path = json_path.with_stem(new_base_name)

    if image_path != new_image_path:
        try:
            # Rename JSON first, then the image to be safe
            json_path.rename(new_json_path)
            image_path.rename(new_image_path)
            # print(f"Renamed pair for '{new_base_name}' in '{image_path.parent.name}'") # Uncomment for verbose output
            renamed_count += 1
        except Exception as e:
            print(f"Error renaming '{image_path.name}': {e}")
    counter += 1

print("\n--- Process Complete ---")
print(f"Successfully renamed {renamed_count} image/JSON pairs.")

--- Starting Renaming Process ---

--- Process Complete ---
Successfully renamed 16058 image/JSON pairs.
