In [18]:
import os
import re

def sanitize_filename(filename):
    """
    Convert a string into a valid filename by:
    1. Removing or replacing invalid characters
    2. Truncating length to avoid too long filenames
    3. Ensuring the filename is unique and valid
    """
    # Remove or replace invalid filename characters
    filename = re.sub(r'[<>:"/\\|?*]', '', filename)
    # Replace spaces and multiple underscores with single underscore
    filename = re.sub(r'\s+', '_', filename)
    # Remove dots from the end of the filename
    filename = filename.rstrip('.')
    # Truncate to reasonable length (max 100 chars)
    filename = filename[:100]
    return filename.lower()

def rename_images(prompt_file, image_dir):
    """
    Rename images in image_dir based on prompts in prompt_file.
    """
    # Read prompts
    with open(prompt_file, 'r') as f:
        prompts = [line.strip() for line in f.readlines()]
    
    # Get list of image files
    image_files = sorted([f for f in os.listdir(image_dir) if f.endswith(('.png', '.jpg', '.jpeg'))])
    
    # Dictionary to keep track of duplicate filenames
    filename_count = {}
    
    print(f"Found {len(image_files)} images and {len(prompts)} prompts")
    
    # Rename files
    for i, (image_file, prompt) in enumerate(zip(image_files, prompts)):
        if not prompt.strip():  # Skip empty prompts
            continue
            
        # Create new filename from prompt
        base_filename = sanitize_filename(prompt)
        
        # Handle duplicate filenames
        if base_filename in filename_count:
            filename_count[base_filename] += 1
            new_filename = f"{base_filename}_{filename_count[base_filename]}.png"
        else:
            filename_count[base_filename] = 0
            new_filename = f"{base_filename}.png"
        
        # Full paths
        old_path = os.path.join(image_dir, image_file)
        new_path = os.path.join(image_dir, new_filename)
        
        # Rename file
        try:
            os.rename(old_path, new_path)
            print(f"Renamed: {image_file} -> {new_filename}")
        except Exception as e:
            print(f"Error renaming {image_file}: {str(e)}")

# Example usage
prompt_file = "/shared/shashmi/sd3.5_generated_images/sd3_hard_examples_prompts.txt"
image_dir = "/shared/shashmi/dawoud_dataset/sd3.5_generated_examples_hard_medium/sd3_hard_examples"

rename_images(prompt_file, image_dir)

Found 225 images and 225 prompts
Renamed: 000000.png -> a_pink_toilet_and_a_pink_bath_tub_in_the_bathroom.png
Renamed: 000001.png -> a_busy_chinese_market_crowded_at_night.png
Renamed: 000002.png -> a_bathroom_with_a_stained_glass_and_stone_wall_and.png
Renamed: 000003.png -> a_bathtub_with_candles_lit_up_around_it_and_a_stoo.png
Renamed: 000004.png -> a_bench_overlooks_a_lush_green_valley_below.png
Renamed: 000005.png -> a_bench_that_is_beautifully_shaded_by_a_tree.png
Renamed: 000006.png -> a_big_city_street_at_night_with_the_ghost_of_a_tax.png
Renamed: 000007.png -> a_bird_flying_through_the_air_while_flapping_it_s.png
Renamed: 000008.png -> a_bird_is_sitting_on_a_berry_bush.png
Renamed: 000009.png -> a_bird_that_is_sitting_on_top_of_a_branch.png
Renamed: 000010.png -> a_bird_that_is_sitting_on_top_of_a_tree.png
Renamed: 000011.png -> a_black_and_white_photograph_of_a_plane_with_many.png
Renamed: 000012.png -> a_black_lab_waits_at_a_picnic_table_for_it_s_maste.png
Renamed: 000013.pn