In [12]:
import os
import pandas as pd
import re

In [2]:
path = "data/synthbuster"

In [7]:
def find_all_images(root_folder):
    """
    Returns a list of all PNG images in the given directory and its subdirectories.
    Args:
        root_folder (str): The root directory to search for images.
    Returns:
        List[str]: A list of paths to all PNG images found.
    """
    png_paths = []
    for dirpath, _, filenames in os.walk(root_folder):
        for filename in filenames:
            if filename.lower().endswith('.png'):
                full_path = os.path.join(dirpath, filename)
                png_paths.append(full_path)
    return png_paths

In [31]:
file_paths = find_all_images(path)

print(f"Found {len(file_paths)} images in {path}")
print("First path: ", file_paths[0])

Found 9000 images in data/synthbuster
First path:  data/synthbuster/dalle3/r1b106abdt.png


In [32]:
def filter_images_by_prompt(csv_path, words):
    """
    Returns a list of image names where all the given words appear in the prompt.

    Args:
        csv_path (str): Path to the CSV file.
        words (list of str): Words to search for in the prompt.

    Returns:
        List[str]: Image names where all words are present in the prompt.
    """
    df = pd.read_csv(csv_path)
    # Convert all prompts to lowercase for case-insensitive matching
    df['Prompt'] = df['Prompt'].str.lower()
    words = [w.lower() for w in words]

    def prompt_contains_all(prompt):
        return all(word in prompt for word in words)

    # Filter rows
    matches = df[df['Prompt'].apply(prompt_contains_all)]
    return matches['image name (matching Raise-1k)'].tolist()

In [35]:
csv_path = "data/synthbuster/prompts.csv"
image_names = filter_images_by_prompt(csv_path, ['building'])
print(len(image_names))
print("First image name: ", image_names[0])

190
First image name:  r0b3979d0t


In [36]:
def filter_paths_by_image_stems(file_paths, image_names):
    """
    Returns all file paths whose basename (without extension) is in image_names.

    Args:
        file_paths (list of str): List of full image file paths.
        image_names (list of str): List of image names (without extensions).

    Returns:
        List[str]: Filtered list of file paths.
    """
    image_names_set = set(image_names)
    return [
        fp for fp in file_paths
        if os.path.splitext(os.path.basename(fp))[0] in image_names_set
    ]


In [41]:
filtered_paths = filter_paths_by_image_stems(file_paths, image_names)
print("First image name: ", filtered_paths[0])
print("Len of filtered paths: ", len(filtered_paths))


First image name:  data/synthbuster/dalle3/r1ee6f90et.png
Len of filtered paths:  1710


In [54]:
def make_name_folder_filenames(file_paths):
    """
    For each path, return a tuple of (new_name, original_path), where
    new_name is 'name_folder.ext' (e.g., 'r1ee6f90et_dalle3.png').

    Args:
        file_paths (list of str): List of original file paths.

    Returns:
        List[Tuple[str, str]]: List of (new_name, path) tuples.
    """
    new_names = []
    for path in file_paths:
        base = os.path.basename(path)
        name, ext = os.path.splitext(base)
        folder = os.path.basename(os.path.dirname(path))
        new_name = f"{name}_{folder}{ext}"
        new_names.append((new_name, path))
    return new_names

In [55]:
filtered_tuples = make_name_folder_filenames(filtered_paths)

First new name:  ('r1ee6f90et_dalle3.png', 'data/synthbuster/dalle3/r1ee6f90et.png')
1710


In [56]:
destination_folder = "data/synthbuster/building"

In [57]:
import shutil

def copy_tuples_to_destination(tuples_list, destination_folder):
    """
    Copies files to the destination folder with new names.

    Args:
        tuples_list (list of tuples): Each tuple is (new_name, original_path).
        destination_folder (str): The folder to save the copied files.
    """
    os.makedirs(destination_folder, exist_ok=True)
    for new_name, orig_path in tuples_list:
        dest_path = os.path.join(destination_folder, new_name)
        shutil.copy2(orig_path, dest_path)


In [58]:
copy_tuples_to_destination(filtered_tuples, destination_folder)

In [62]:
import os
import shutil
from PIL import Image
from IPython.display import display, clear_output
import ipywidgets as widgets

def image_browser_move(folder, dest_folder):
    os.makedirs(dest_folder, exist_ok=True)
    image_files = [f for f in os.listdir(folder) if f.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.gif'))]
    image_files.sort()
    index = [0]  # use mutable type for closure

    output = widgets.Output()

    def show_image():
        output.clear_output(wait=True)
        if index[0] < 0 or index[0] >= len(image_files):
            with output:
                print("No more images.")
            return
        img_path = os.path.join(folder, image_files[index[0]])
        with output:
            clear_output(wait=True)
            img = Image.open(img_path)
            display(img)
            print(f"{index[0]+1}/{len(image_files)}: {image_files[index[0]]}")

    def on_keep(b):
        img_path = os.path.join(folder, image_files[index[0]])
        dest_path = os.path.join(dest_folder, image_files[index[0]])
        try:
            shutil.move(img_path, dest_path)
            print(f"Moved: {image_files[index[0]]} to {dest_folder}")
        except Exception as e:
            print(f"Error moving {image_files[index[0]]}: {e}")
        del image_files[index[0]]
        if index[0] >= len(image_files):
            output.clear_output(wait=True)
            with output:
                print("Finished.")
        else:
            show_image()

    def on_skip(b):
        if index[0] < len(image_files) - 1:
            index[0] += 1
            show_image()
        else:
            output.clear_output(wait=True)
            with output:
                print("Finished.")

    keep_button = widgets.Button(description="Keep (Move)", button_style='success')
    skip_button = widgets.Button(description="Skip", button_style='warning')

    keep_button.on_click(on_keep)
    skip_button.on_click(on_skip)

    show_image()
    display(widgets.HBox([keep_button, skip_button]), output)

# Usage:
# image_browser_move('data/synthbuster/building', 'data/synthbuster/keep')


In [63]:
folder = "data/synthbuster/building"
dest_folder = "data/synthbuster/keep"

In [None]:
image_browser_move(folder, dest_folder)

HBox(children=(Button(button_style='success', description='Keep (Move)', style=ButtonStyle()), Button(button_s…

Output()