# Image Renaming Notebook

This notebook standardizes filenames in the `data/raw/` directory.

**Naming format:** `{folder_name}_{sequence_number}.jpg`

Example: `alu-bottle_001.jpg`, `alu-bottle_002.jpg`, etc.


In [None]:
import os
import shutil
from pathlib import Path

In [2]:
# Configuration
RAW_DIR = Path("data/raw")

# Supported image extensions
IMAGE_EXTENSIONS = {".jpg", ".jpeg", ".png", ".bmp", ".gif", ".webp", ".tiff"}

print(f"Raw directory: {RAW_DIR.absolute()}")
print(f"Directory exists: {RAW_DIR.exists()}")

Raw directory: c:\Users\thatt\Documents\Coding Project\Python Projects\PyCode-Arsenal-EDU\projects\KB-img-pro\data\raw
Directory exists: True


In [3]:
def get_image_files(folder_path: Path) -> list:
    """
    Get all image files in a folder, sorted alphabetically.

    Args:
        folder_path: Path to the folder containing images

    Returns:
        List of Path objects for each image file
    """
    image_files = []
    for file_path in folder_path.iterdir():
        if file_path.is_file() and file_path.suffix.lower() in IMAGE_EXTENSIONS:
            image_files.append(file_path)

    # Sort files to ensure consistent ordering
    return sorted(image_files)

In [4]:
def rename_images_in_folder(folder_path: Path, dry_run: bool = True) -> dict:
    """
    Rename all images in a folder to the format: {folder_name}_{sequence}.jpg

    Args:
        folder_path: Path to subfolder in raw directory
        dry_run: If True, only print what would happen without renaming

    Returns:
        Dictionary with stats: {'renamed': int, 'errors': list}
    """
    folder_name = folder_path.name
    image_files = get_image_files(folder_path)

    stats = {"renamed": 0, "skipped": 0, "errors": []}

    if not image_files:
        print(f"  No images found in '{folder_name}'")
        return stats

    # Determine padding width based on number of files
    padding_width = max(3, len(str(len(image_files))))

    print(f"  Processing {len(image_files)} images in '{folder_name}'...")

    # First pass: Rename to temporary names to avoid conflicts
    temp_mapping = []
    for idx, file_path in enumerate(image_files, start=1):
        sequence_number = str(idx).zfill(padding_width)
        new_name = f"{folder_name}_{sequence_number}.jpg"
        new_path = folder_path / new_name
        temp_path = folder_path / f"__temp_{idx}_{file_path.name}"

        temp_mapping.append((file_path, temp_path, new_path, new_name))

    if dry_run:
        print(f"  [DRY RUN] Would rename {len(temp_mapping)} files:")
        for old_path, _, new_path, new_name in temp_mapping[:5]:
            print(f"    {old_path.name} -> {new_name}")
        if len(temp_mapping) > 5:
            print(f"    ... and {len(temp_mapping) - 5} more")
        stats["renamed"] = len(temp_mapping)
    else:
        # Step 1: Rename to temporary names
        for old_path, temp_path, _, _ in temp_mapping:
            try:
                shutil.move(str(old_path), str(temp_path))
            except Exception as e:
                stats["errors"].append(f"Error moving {old_path} to temp: {e}")

        # Step 2: Rename from temporary to final names
        for _, temp_path, new_path, new_name in temp_mapping:
            try:
                shutil.move(str(temp_path), str(new_path))
                stats["renamed"] += 1
            except Exception as e:
                stats["errors"].append(f"Error renaming to {new_name}: {e}")

    return stats

In [5]:
def rename_all_images(raw_dir: Path, dry_run: bool = True) -> dict:
    """
    Rename images in all subfolders of the raw directory.

    Args:
        raw_dir: Path to the raw data directory
        dry_run: If True, only show what would happen

    Returns:
        Summary statistics dictionary
    """
    if not raw_dir.exists():
        raise FileNotFoundError(f"Raw directory not found: {raw_dir}")

    subfolders = sorted([f for f in raw_dir.iterdir() if f.is_dir()])

    print(f"{'=' * 60}")
    print(f"Image Renaming {'(DRY RUN)' if dry_run else '(ACTUAL RUN)'}")
    print(f"{'=' * 60}")
    print(f"Found {len(subfolders)} subfolders in '{raw_dir}'\n")

    total_stats = {"total_renamed": 0, "total_errors": []}

    for folder in subfolders:
        stats = rename_images_in_folder(folder, dry_run=dry_run)
        total_stats["total_renamed"] += stats["renamed"]
        total_stats["total_errors"].extend(stats["errors"])
        print()

    print(f"{'=' * 60}")
    print(f"SUMMARY")
    print(f"{'=' * 60}")
    print(
        f"Total images {'would be' if dry_run else ''} renamed: {total_stats['total_renamed']}"
    )

    if total_stats["total_errors"]:
        print(f"\nErrors encountered:")
        for error in total_stats["total_errors"]:
            print(f"  - {error}")
    else:
        print("No errors encountered.")

    return total_stats

## Step 1: Dry Run (Preview Changes)

First, let's see what changes would be made without actually renaming anything.


In [9]:
# DRY RUN - Preview what will be renamed
rename_all_images(RAW_DIR, dry_run=True)

Image Renaming (DRY RUN)
Found 6 subfolders in 'data\raw'

  Processing 33 images in 'alu-bottle'...
  [DRY RUN] Would rename 33 files:
    1xAk6OpNOGwu.jpg -> alu-bottle_001.jpg
    4292Y6pgnMFi.jpg -> alu-bottle_002.jpg
    48BdobIo4EqM.jpg -> alu-bottle_003.jpg
    5fbWoReJi3SF.jpg -> alu-bottle_004.jpg
    5nvSxWaucCys.jpg -> alu-bottle_005.jpg
    ... and 28 more

  Processing 1 images in 'bg'...
  [DRY RUN] Would rename 1 files:
    a7WHkJlQhK4E.jpg -> bg_001.jpg

  Processing 59 images in 'drink'...
  [DRY RUN] Would rename 59 files:
    0MtkjSJQdIJj.jpg -> drink_001.jpg
    2cDjy4qc19rb.jpg -> drink_002.jpg
    2EorjdTkMMAM.jpg -> drink_003.jpg
    6hebzh4fC7g1.jpg -> drink_004.jpg
    7CLtNaxF74js.jpg -> drink_005.jpg
    ... and 54 more

  Processing 171 images in 'pet'...
  [DRY RUN] Would rename 171 files:
    0cvEnBrx1tRT.jpg -> pet_001.jpg
    0iBk05MUrkuD.jpg -> pet_002.jpg
    0j3uxSQYeSRo.jpg -> pet_003.jpg
    0UJCO7lmg4NG.jpg -> pet_004.jpg
    17YesWb4Vucm.jpg -> pe

{'total_renamed': 295, 'total_errors': []}

## Step 2: Execute Renaming

⚠️ **WARNING:** Running the cell below will permanently rename all image files. Make sure to backup your data first!

Uncomment and run when ready.


In [10]:
# ACTUAL RUN - Uncomment the line below to execute renaming
rename_all_images(RAW_DIR, dry_run=False)

Image Renaming (ACTUAL RUN)
Found 6 subfolders in 'data\raw'

  Processing 33 images in 'alu-bottle'...

  Processing 1 images in 'bg'...

  Processing 59 images in 'drink'...

  Processing 171 images in 'pet'...

  Processing 24 images in 'taobin-pa'...

  Processing 7 images in 'yakuu'...

SUMMARY
Total images  renamed: 295
No errors encountered.


{'total_renamed': 295, 'total_errors': []}

## Verification

After renaming, verify the results:


In [8]:
# Verify renamed files
for folder in sorted(RAW_DIR.iterdir()):
    if folder.is_dir():
        files = list(folder.iterdir())
        print(f"\n{folder.name}/")
        for f in sorted(files)[:5]:
            print(f"  {f.name}")
        if len(files) > 5:
            print(f"  ... and {len(files) - 5} more files")


alu-bottle/
  1xAk6OpNOGwu.jpg
  4292Y6pgnMFi.jpg
  48BdobIo4EqM.jpg
  5fbWoReJi3SF.jpg
  5nvSxWaucCys.jpg
  ... and 28 more files

bg/
  a7WHkJlQhK4E.jpg

drink/
  0MtkjSJQdIJj.jpg
  2cDjy4qc19rb.jpg
  2EorjdTkMMAM.jpg
  6hebzh4fC7g1.jpg
  7CLtNaxF74js.jpg
  ... and 54 more files

pet/
  0cvEnBrx1tRT.jpg
  0iBk05MUrkuD.jpg
  0j3uxSQYeSRo.jpg
  0UJCO7lmg4NG.jpg
  17YesWb4Vucm.jpg
  ... and 166 more files

taobin-pa/
  0N8sR37nR1lK.jpg
  0uODp9TE1DN9.jpg
  1HRbaEmGpl3Q.jpg
  1MltUBzKSq3V.jpg
  5Um2H07Lzayp.jpg
  ... and 19 more files

yakuu/
  4Kgo9M9MJh3a.jpg
  5omfGCaUmN2H.jpg
  arDqft4xC1iS.jpg
  Ce8RVX9apOAc.jpg
  hRXX4ARXGS36.jpg
  ... and 2 more files
