In [32]:
import os
import cv2
from tqdm import tqdm
import albumentations as A
# from albumentations.augmentations.dropout import Cutout
from sklearn.model_selection import train_test_split

print("Starting augmented dataset generation from face-annotated crops...")

input_root = "yolo_annotated_images/"
base_output_path = "yolo_annotated_images/yolo_dataset"

output_dirs = {
    "train_img": os.path.join(base_output_path, "images/train"),
    "train_lbl": os.path.join(base_output_path, "labels/train"),
    "val_img": os.path.join(base_output_path, "images/val"),
    "val_lbl": os.path.join(base_output_path, "labels/val")
}
for path in output_dirs.values():
    os.makedirs(path, exist_ok=True)

transform = A.Compose([
    A.Resize(640, 640),
    
    A.HorizontalFlip(p=0.5),
    A.ShiftScaleRotate(
        shift_limit=0.1, scale_limit=0.2, rotate_limit=20, p=0.7, border_mode=0
    ),

    A.RandomBrightnessContrast(brightness_limit=0.3, contrast_limit=0.3, p=0.5),
    A.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3, hue=0.2, p=0.5),
    A.RGBShift(r_shift_limit=25, g_shift_limit=25, b_shift_limit=25, p=0.3),
    A.HueSaturationValue(hue_shift_limit=20, sat_shift_limit=30, val_shift_limit=20, p=0.3),

    A.MotionBlur(blur_limit=5, p=0.2),
    A.GaussianBlur(blur_limit=3, p=0.2),
    A.GaussNoise(var_limit=(10.0, 50.0), p=0.3),

    
    A.Perspective(scale=(0.05, 0.1), p=0.2)
], bbox_params=A.BboxParams(format='yolo', label_fields=['class_labels']))

person_folders = sorted(os.listdir(input_root))
class_map = {name: idx for idx, name in enumerate(person_folders)}

os.makedirs(base_output_path, exist_ok=True)
with open(os.path.join(base_output_path, "classes.txt"), "w") as f:
    for name, idx in class_map.items():
        f.write(f"{idx} {name}\n")

all_image_label_pairs = []
for person in person_folders:
    person_path = os.path.join(input_root, person)
    for img_file in os.listdir(person_path):
        if img_file.lower().endswith((".jpg", ".jpeg", ".png")):
            img_path = os.path.join(person_path, img_file)
            label_path = os.path.splitext(img_path)[0] + ".txt"
            if os.path.exists(label_path):
                all_image_label_pairs.append((img_path, label_path, person))

train_pairs, val_pairs = train_test_split(all_image_label_pairs, test_size=0.2, random_state=42)
all_pairs = {"train": train_pairs, "val": val_pairs}

for split, pairs in all_pairs.items():
    for img_path, label_path, person in tqdm(pairs, desc=f"Processing {split}"):
        class_id = class_map[person]

        img = cv2.imread(img_path)
        if img is None:
            print(f"Cannot read {img_path}")
            continue

        with open(label_path, "r") as f:
            boxes = []
            class_labels = []
            for line in f.readlines():
                parts = line.strip().split()
                if len(parts) != 5:
                    continue
                _, x, y, bw, bh = map(float, parts)

                if bw <= 0 or bh <= 0 or x <= 0 or y <= 0:
                    print(f"Skipping invalid bbox in {label_path}: {x}, {y}, {bw}, {bh}")
                    continue

                boxes.append([x, y, bw, bh])
                class_labels.append(class_id)

        for i in range(40):
            base_name = f"{person}_{os.path.splitext(os.path.basename(img_path))[0]}_{i}.jpg"
            out_img_path = os.path.join(output_dirs[f"{split}_img"], base_name)
            out_lbl_path = os.path.join(output_dirs[f"{split}_lbl"], base_name.replace(".jpg", ".txt"))

            if os.path.exists(out_img_path) and os.path.exists(out_lbl_path):
                continue

            try:
                augmented = transform(image=img, bboxes=boxes, class_labels=class_labels)
            except Exception as e:
                print(f"Augmentation failed on {img_path}: {e}")
                continue

            aug_img = augmented["image"]
            aug_boxes = augmented["bboxes"]
            aug_labels = augmented["class_labels"]

            cv2.imwrite(out_img_path, aug_img)

            with open(out_lbl_path, "w") as f:
                for label, bbox in zip(aug_labels, aug_boxes):
                    x, y, bw, bh = bbox
                    f.write(f"{label} {x:.6f} {y:.6f} {bw:.6f} {bh:.6f}\n")

print("All done with augmentation and label transformation!")

Starting augmented dataset generation from face-annotated crops...


  A.GaussNoise(var_limit=(10.0, 50.0), p=0.3),
Processing train:   3%|▎         | 12/376 [00:10<05:31,  1.10it/s]

⚠️ Skipping invalid bbox in yolo_annotated_images/Agrim_Verma\Agrim Verma _frame_13.txt: 0.885417, 0.704688, 0.0, 0.0


Processing train:   5%|▍         | 17/376 [00:15<05:21,  1.12it/s]

⚠️ Skipping invalid bbox in yolo_annotated_images/Agrim_Verma\Agrim Verma _frame_2.txt: 0.69375, 0.657813, 0.0, 0.0


Processing train:  12%|█▎        | 47/376 [00:42<04:42,  1.17it/s]

⚠️ Skipping invalid bbox in yolo_annotated_images/Ayush_patel\Ayush Patel_frame_14.txt: 0.697917, 0.696875, 0.0, 0.0


Processing train:  24%|██▍       | 90/376 [01:19<04:07,  1.16it/s]

⚠️ Skipping invalid bbox in yolo_annotated_images/Dhruv_singh\Dhruv Singh_fullRight.txt: 0.647917, 0.654687, 0.0, 0.0


Processing train:  32%|███▏      | 119/376 [01:45<03:45,  1.14it/s]

⚠️ Skipping invalid bbox in yolo_annotated_images/Agrim_Verma\Agrim Verma _frame_26.txt: 0.647917, 0.671094, 0.0, 0.067187


Processing train:  55%|█████▍    | 206/376 [02:59<02:22,  1.20it/s]

⚠️ Skipping invalid bbox in yolo_annotated_images/Aumkumar_Savaliya\Aumkumar Savaliya _center.txt: 0.66875, 0.607812, 0.0, 0.0


Processing train:  88%|████████▊ | 332/376 [04:45<00:36,  1.19it/s]

⚠️ Skipping invalid bbox in yolo_annotated_images/Inian_Aditya_M\Inian Aditya M_halfLeftTop.txt: 0.660417, 0.679688, 0.0, 0.0


Processing train:  98%|█████████▊| 368/376 [05:15<00:06,  1.26it/s]

⚠️ Skipping invalid bbox in yolo_annotated_images/Agrim_Verma\Agrim Verma _frame_1.txt: 0.720833, 0.690625, 0.0, 0.0


Processing train: 100%|██████████| 376/376 [05:22<00:00,  1.17it/s]
Processing val:  32%|███▏      | 30/95 [00:25<00:57,  1.13it/s]

⚠️ Skipping invalid bbox in yolo_annotated_images/Amit_Kumar_Meena\Amit kumar meena _frame_8.txt: 0.841667, 0.704688, 0.0, 0.0


Processing val: 100%|██████████| 95/95 [01:20<00:00,  1.17it/s]

✅ All done with augmentation and label transformation!





In [27]:
import glob

[i[22:] for i in glob.glob("yolo_annotated_images/*")]

['Aabis',
 'Aaditya_Kumawat',
 'Agrim_Verma',
 'Ak',
 'Akshat_Sharma',
 'Amit_Kumar_Meena',
 'Ananta',
 'Aumkumar_Savaliya',
 'Avanish',
 'Ayush_Chandra',
 'Ayush_patel',
 'Dhruv_singh',
 'Gorang_Rathi',
 'Hrishikesh_Giri',
 'Inian_Aditya_M',
 'Kartik_Sharma',
 'Krishang_Goyal',
 'Krish_Bansal',
 'Miling_soni',
 'Param',
 'Yash',
 'yolo_dataset']

In [11]:
!yolo detect train model=yolov8n.pt data=dataset.yaml epochs=50 imgsz=640


^C


Creating new Ultralytics Settings v0.0.6 file ✅ 
View Ultralytics Settings with 'yolo settings' or at 'C:\Users\anshg\AppData\Roaming\Ultralytics\settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.
Downloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8n.pt to 'yolov8n.pt'...
Ultralytics 8.3.111 🚀 Python-3.12.3 torch-2.6.0+cpu CPU (AMD Ryzen 7 6800H with Radeon Graphics)
[34m[1mengine\trainer: [0mtask=detect, mode=train, model=yolov8n.pt, data=dataset.yaml, epochs=50, time=None, patience=100, batch=16, imgsz=640, save=True, save_period=-1, cache=False, device=None, workers=8, project=None, name=train, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=Fa


  0%|          | 0.00/6.25M [00:00<?, ?B/s]
  2%|▏         | 128k/6.25M [00:00<00:07, 829kB/s]
  4%|▍         | 256k/6.25M [00:00<00:07, 882kB/s]
  6%|▌         | 384k/6.25M [00:00<00:06, 923kB/s]
  8%|▊         | 512k/6.25M [00:00<00:06, 919kB/s]
 10%|█         | 640k/6.25M [00:00<00:06, 952kB/s]
 12%|█▏        | 768k/6.25M [00:00<00:06, 918kB/s]
 14%|█▍        | 896k/6.25M [00:01<00:06, 867kB/s]
 16%|█▌        | 1.00M/6.25M [00:01<00:06, 887kB/s]
 18%|█▊        | 1.12M/6.25M [00:01<00:05, 908kB/s]
 20%|██        | 1.25M/6.25M [00:01<00:05, 907kB/s]
 22%|██▏       | 1.38M/6.25M [00:01<00:05, 960kB/s]
 24%|██▍       | 1.50M/6.25M [00:01<00:04, 1.02MB/s]
 26%|██▌       | 1.62M/6.25M [00:01<00:04, 1.05MB/s]
 28%|██▊       | 1.75M/6.25M [00:01<00:05, 929kB/s] 
 30%|███       | 1.88M/6.25M [00:02<00:04, 923kB/s]
 32%|███▏      | 2.00M/6.25M [00:02<00:04, 929kB/s]
 34%|███▍      | 2.12M/6.25M [00:02<00:04, 931kB/s]
 36%|███▌      | 2.25M/6.25M [00:02<00:04, 990kB/s]
 38%|███▊      | 2.38M/