In [6]:
import os
import hashlib
import pandas as pd

# ================= ADD PATH HERE =================
IMAGE_DIR = r"C:\Users\maila\Desktop\Defect_Detection\Ok copy"
START_INDEX = 1
# =================================================

VALID_EXTENSIONS = (".jpg", ".jpeg", ".png")

def compute_hash(file_path):
    sha256 = hashlib.sha256()
    with open(file_path, "rb") as f:
        for chunk in iter(lambda: f.read(8192), b""):
            sha256.update(chunk)
    return sha256.hexdigest()

# Collect and sort image files
image_files = sorted([
    f for f in os.listdir(IMAGE_DIR)
    if f.lower().endswith(VALID_EXTENSIONS)
])

if len(image_files) == 0:
    raise ValueError("No images found in the folder.")

seen_hashes = set()
unique_files = []
duplicate_files = []

# STEP 0: Detect duplicates
for fname in image_files:
    path = os.path.join(IMAGE_DIR, fname)
    file_hash = compute_hash(path)

    if file_hash in seen_hashes:
        duplicate_files.append(fname)
    else:
        seen_hashes.add(file_hash)
        unique_files.append(fname)

print(f"Unique images   : {len(unique_files)}")
print(f"Duplicate images: {len(duplicate_files)}")

# STEP 1: Rename UNIQUE images to temporary names
mapping = []
counter = START_INDEX

for fname in unique_files:
    old_path = os.path.join(IMAGE_DIR, fname)
    temp_name = f"__temp__{counter}.tmp"
    temp_path = os.path.join(IMAGE_DIR, temp_name)

    os.rename(old_path, temp_path)

    mapping.append({
        "old_name": fname,
        "temp_name": temp_name
    })

    counter += 1

# STEP 2: Rename temp files to final sequential names
counter = START_INDEX
final_mapping = []

for item in mapping:
    temp_path = os.path.join(IMAGE_DIR, item["temp_name"])
    new_name = f"{counter}.jpg"
    new_path = os.path.join(IMAGE_DIR, new_name)

    os.rename(temp_path, new_path)

    final_mapping.append({
        "old_name": item["old_name"],
        "new_name": new_name
    })

    counter += 1

# Save mappings
pd.DataFrame(final_mapping).to_csv("image_rename_mapping_ok.csv", index=False)
pd.DataFrame({"duplicate_file": duplicate_files}).to_csv(
    "duplicate_images_skipped.csv", index=False
)

print("\nRenaming complete.")
print(f"Images renamed        : {len(final_mapping)}")
print(f"Duplicates skipped    : {len(duplicate_files)}")
print("Mapping saved as      : image_rename_mapping_ok.csv")
print("Duplicates list saved : duplicate_images_skipped.csv")


Unique images   : 1002
Duplicate images: 10

Renaming complete.
Images renamed        : 1002
Duplicates skipped    : 10
Mapping saved as      : image_rename_mapping_ok.csv
Duplicates list saved : duplicate_images_skipped.csv


In [7]:
import os
import hashlib
import pandas as pd

# ================= ADD PATH HERE =================
IMAGE_DIR = r"C:\Users\maila\Desktop\Defect_Detection\Not_OK copy"
START_INDEX = 2000
# =================================================

VALID_EXTENSIONS = (".jpg", ".jpeg", ".png")

def compute_hash(file_path):
    sha256 = hashlib.sha256()
    with open(file_path, "rb") as f:
        for chunk in iter(lambda: f.read(8192), b""):
            sha256.update(chunk)
    return sha256.hexdigest()

# Collect and sort image files
image_files = sorted([
    f for f in os.listdir(IMAGE_DIR)
    if f.lower().endswith(VALID_EXTENSIONS)
])

if len(image_files) == 0:
    raise ValueError("No images found in the folder.")

seen_hashes = set()
unique_files = []
duplicate_files = []

# STEP 0: Detect duplicates
for fname in image_files:
    path = os.path.join(IMAGE_DIR, fname)
    file_hash = compute_hash(path)

    if file_hash in seen_hashes:
        duplicate_files.append(fname)
    else:
        seen_hashes.add(file_hash)
        unique_files.append(fname)

print(f"Unique images   : {len(unique_files)}")
print(f"Duplicate images: {len(duplicate_files)}")

# STEP 1: Rename UNIQUE images to temporary names
mapping = []
counter = START_INDEX

for fname in unique_files:
    old_path = os.path.join(IMAGE_DIR, fname)
    temp_name = f"__temp__{counter}.tmp"
    temp_path = os.path.join(IMAGE_DIR, temp_name)

    os.rename(old_path, temp_path)

    mapping.append({
        "old_name": fname,
        "temp_name": temp_name
    })

    counter += 1

# STEP 2: Rename temp files to final sequential names
counter = START_INDEX
final_mapping = []

for item in mapping:
    temp_path = os.path.join(IMAGE_DIR, item["temp_name"])
    new_name = f"{counter}.jpg"
    new_path = os.path.join(IMAGE_DIR, new_name)

    os.rename(temp_path, new_path)

    final_mapping.append({
        "old_name": item["old_name"],
        "new_name": new_name
    })

    counter += 1

# Save mappings
pd.DataFrame(final_mapping).to_csv("image_rename_mapping_Not_ok.csv", index=False)
pd.DataFrame({"duplicate_file": duplicate_files}).to_csv(
    "duplicate_images_skipped.csv", index=False
)

print("\nRenaming complete.")
print(f"Images renamed        : {len(final_mapping)}")
print(f"Duplicates skipped    : {len(duplicate_files)}")
print("Mapping saved as      : image_rename_mapping_Not_ok.csv")
print("Duplicates list saved : duplicate_images_skipped.csv")


Unique images   : 4699
Duplicate images: 9

Renaming complete.
Images renamed        : 4699
Duplicates skipped    : 9
Mapping saved as      : image_rename_mapping_Not_ok.csv
Duplicates list saved : duplicate_images_skipped.csv
