In [1]:
import cv2
import os
from glob import glob

def crop_from_labels(
    image_dir: str,
    label_dir: str,
    output_dir: str
):
    os.makedirs(output_dir, exist_ok=True)

    for lbl_path in glob(os.path.join(label_dir, "*.txt")):
        base = os.path.splitext(os.path.basename(lbl_path))[0]
        img_path = os.path.join(image_dir, base + ".jpg")
        img = cv2.imread(img_path)
        h, w = img.shape[:2]

        with open(lbl_path) as f:
            for i, line in enumerate(f):
                cls_id, x_ctr, y_ctr, w_rel, h_rel = map(float, line.split())
                # convert normalized to pixels
                bw, bh = w_rel * w, h_rel * h
                bx, by = x_ctr * w, y_ctr * h
                x1 = int(bx - bw/2)
                y1 = int(by - bh/2)
                x2 = int(bx + bw/2)
                y2 = int(by + bh/2)

                crop = img[y1:y2, x1:x2]
                out_name = f"{base}_cls{int(cls_id)}_{i}.jpg"
                cv2.imwrite(os.path.join(output_dir, out_name), crop)

if __name__ == "__main__":
    crop_from_labels(
        image_dir="cccd_id.v3i.yolov8/train/images",
        label_dir="cccd_id.v3i.yolov8/train/labels",
        output_dir="cropped_id_train_1"
    )


In [2]:
import os
import re

def clean_filenames(folder: str):
    # matches the unwanted hash‐style suffix
    pattern = re.compile(r'_(?:jpg|png)\.rf\.[0-9a-fA-F]+')
    
    for fname in os.listdir(folder):
        full_path = os.path.join(folder, fname)
        if not os.path.isfile(full_path):
            continue

        base, ext = os.path.splitext(fname)
        # remove the _jpg.rf.<hash> or _png.rf.<hash> part
        new_base = pattern.sub('', base)
        
        # if something changed, rename
        if new_base != base:
            new_name = new_base + ext
            new_path = os.path.join(folder, new_name)
            print(f"Renaming:\n  {fname}\n→ {new_name}\n")
            os.rename(full_path, new_path)

if __name__ == "__main__":
    clean_filenames("cropped_id_train_1")


Renaming:
  img1357_jpg.rf.6f3505524a484948e971ab021e874b96_cls0_0.jpg
→ img1357_cls0_0.jpg

Renaming:
  img1488_jpg.rf.5ef434631313b73c3f5292ce136534f2_cls0_0.jpg
→ img1488_cls0_0.jpg

Renaming:
  img1339_jpg.rf.d959c04539fca4fa511168bf10d856a2_cls0_0.jpg
→ img1339_cls0_0.jpg

Renaming:
  img1275_jpg.rf.8b0f0725bad1401cbc4219d34dfc1092_cls0_0.jpg
→ img1275_cls0_0.jpg

Renaming:
  img1038_jpg.rf.34531171c7e9b2dbe1241509690bbd0f_cls0_0.jpg
→ img1038_cls0_0.jpg

Renaming:
  img1627_jpg.rf.42633b7fa32a824c35877d9f5c8e3a31_cls0_0.jpg
→ img1627_cls0_0.jpg

Renaming:
  img1316_jpg.rf.f9e58d36213333fc16279f09ad994a8c_cls0_0.jpg
→ img1316_cls0_0.jpg

Renaming:
  img1280_jpg.rf.81331000d1b7bc215bbf45a21c430da0_cls0_0.jpg
→ img1280_cls0_0.jpg

Renaming:
  img1303_jpg.rf.13b75e847c9f0228f712c661cb87b33c_cls0_0.jpg
→ img1303_cls0_0.jpg

Renaming:
  img1617_jpg.rf.6c96651ccdec0a4aeb7e26043a1894be_cls0_0.jpg
→ img1617_cls0_0.jpg

Renaming:
  img1564_jpg.rf.8fde99098bc5e959ef0035e4cfeae211_cls0_0.jpg

In [3]:
import os

def simplify_filenames(folder: str):
    for fname in os.listdir(folder):
        full_path = os.path.join(folder, fname)
        if not os.path.isfile(full_path):
            continue

        base, ext = os.path.splitext(fname)
        # take only the part before the first underscore
        new_base = base.split("_", 1)[0]
        new_name = new_base + ext
        new_path = os.path.join(folder, new_name)

        # if a file with the target name already exists, skip or overwrite
        if new_path != full_path:
            if os.path.exists(new_path):
                print(f"⚠️  {new_name} already exists, skipping rename of {fname}")
            else:
                print(f"Renaming: {fname} → {new_name}")
                os.rename(full_path, new_path)

if __name__ == "__main__":
    simplify_filenames("cropped_id_train_1")


Renaming: img1589_cls0_0.jpg → img1589.jpg
Renaming: img1514_cls0_0.jpg → img1514.jpg
Renaming: img1276_cls0_0.jpg → img1276.jpg
Renaming: img1362_cls0_0.jpg → img1362.jpg
Renaming: img1437_cls0_0.jpg → img1437.jpg
Renaming: img1300_cls0_0.jpg → img1300.jpg
Renaming: img1337_cls0_0.jpg → img1337.jpg
Renaming: img1295_cls0_0.jpg → img1295.jpg
Renaming: img1381_cls0_0.jpg → img1381.jpg
Renaming: img1349_cls0_0.jpg → img1349.jpg
Renaming: img1634_cls0_0.jpg → img1634.jpg
Renaming: img1449_cls0_0.jpg → img1449.jpg
Renaming: img1603_cls0_0.jpg → img1603.jpg
Renaming: img1075_cls0_0.jpg → img1075.jpg
Renaming: img1625_cls0_0.jpg → img1625.jpg
Renaming: img1284_cls0_0.jpg → img1284.jpg
Renaming: img1326_cls0_0.jpg → img1326.jpg
Renaming: img1311_cls0_0.jpg → img1311.jpg
Renaming: img1550_cls0_0.jpg → img1550.jpg
Renaming: img1567_cls0_0.jpg → img1567.jpg
Renaming: img1426_cls0_0.jpg → img1426.jpg
Renaming: img1344_cls0_0.jpg → img1344.jpg
Renaming: img1532_cls0_0.jpg → img1532.jpg
Renaming: i