Kaggle URL: [Yolo11s Augmentation Dataset Creation](https://www.kaggle.com/code/phatle1578/yolo11s-augmentation-dataset-creation)

In [None]:
# ====================================================
# 1Ô∏è‚É£ C√ÄI ƒê·∫∂T M√îI TR∆Ø·ªúNG (ƒê√É FIX RAY + TENSORBOARD)
# ====================================================
!pip uninstall -y ray ray[default] ray[tune] >/dev/null 2>&1 || true
!pip -q install ultralytics==8.3.27 opencv-contrib-python==4.10.0.84 tqdm==4.67.1 torch==2.1.2 torchvision==0.16.2 open_clip_torch==2.24.0 timm==0.9.12 # (P) add last library
# üîß FIX l·ªói TensorBoard / protobuf conflict
!pip install -q "numpy<2.0"
!pip install -q protobuf==3.20.3 tensorboard==2.14.0
!pip install -q filterpy

In [None]:
# ====================================================
# 2Ô∏è‚É£ CHU·∫®N B·ªä D·ªÆ LI·ªÜU
# ====================================================
import os, re, json, shutil, random, cv2
import numpy as np
from pathlib import Path
from tqdm import tqdm

In [None]:
TRAIN_ROOT = Path("/kaggle/input/train-zaic-dl")
ANN_PATH = Path("/kaggle/input/annotation/annotations.json")
WORK_DIR = Path("/kaggle/working/zaic_yolo")

YOLO_IMG_DIR_TR = WORK_DIR/"yolo_dataset/images/train"
YOLO_IMG_DIR_VA = WORK_DIR/"yolo_dataset/images/val"
YOLO_LBL_DIR_TR = WORK_DIR/"yolo_dataset/labels/train"
YOLO_LBL_DIR_VA = WORK_DIR/"yolo_dataset/labels/val"

In [None]:
for p in [YOLO_IMG_DIR_TR, YOLO_IMG_DIR_VA, YOLO_LBL_DIR_TR, YOLO_LBL_DIR_VA]:
    p.mkdir(parents=True, exist_ok=True)

# Load annotations
with open(ANN_PATH, "r") as f:
    ann_json = json.load(f)

video_to_frame_bboxes = {}
ann_list = [ann_json] if isinstance(ann_json, dict) and "video_id" in ann_json else ann_json
for item in ann_list:
    vid = item["video_id"]
    video_to_frame_bboxes.setdefault(vid, {})
    for ann in item.get("annotations", []):
        for bb in ann.get("bboxes", []):
            fr = int(bb["frame"])
            video_to_frame_bboxes[vid].setdefault(fr, []).append([bb["x1"], bb["y1"], bb["x2"], bb["y2"]])

In [None]:
# Utils
num_re = re.compile(r"(\d+)")
def extract_frame_index(filename: str):
    nums = num_re.findall(Path(filename).stem)
    return int(nums[-1]) if nums else None

In [None]:
def to_yolo_line(x1,y1,x2,y2, W,H, cls=0):
    x1, y1 = max(0, x1), max(0, y1)
    x2, y2 = min(W-1, x2), min(H-1, y2)
    w = max(1, x2 - x1); h = max(1, y2 - y1)
    cx = x1 + w/2; cy = y1 + h/2
    return f"{cls} {cx/W:.6f} {cy/H:.6f} {w/W:.6f} {h/H:.6f}"

In [None]:
# Split train/val
s_dirs = sorted([p for p in TRAIN_ROOT.iterdir() if p.is_dir()])
random.seed(42)
random.shuffle(s_dirs)
split = int(0.9 * len(s_dirs))
train_s = set(p.name for p in s_dirs[:split])
val_s = set(p.name for p in s_dirs[split:])

In [None]:
# Convert frames ‚Üí YOLO
def process_split(s_name, img_out_dir, lbl_out_dir):
    s_path = TRAIN_ROOT/s_name/"object_frames"
    if not s_path.exists(): return
    img_files = sorted(list(s_path.glob("*.jpg")))
    frame_to_file = {extract_frame_index(f.name): f for f in img_files if extract_frame_index(f.name) is not None}
    ann_frames = video_to_frame_bboxes.get(s_name, {})
    for fr, bboxes in ann_frames.items():
        if fr not in frame_to_file: continue
        src_img = frame_to_file[fr]
        img = cv2.imread(str(src_img))
        if img is None: continue
        H, W = img.shape[:2]
        lbl_path = lbl_out_dir / (src_img.stem + ".txt")
        with open(lbl_path, "w") as f:
            f.write("\n".join([to_yolo_line(*bb, W,H, cls=0) for bb in bboxes]))
        shutil.copy2(src_img, img_out_dir/src_img.name)

In [None]:
REID_ROOT = WORK_DIR / "reid_dataset"
REID_ROOT.mkdir(parents=True, exist_ok=True)
print(f"üîπ B·∫Øt ƒë·∫ßu t·∫°o Re-ID dataset t·∫°i: {REID_ROOT}")

num_classes_reid = 0
class_map_reid = {}

# Ch·ªâ d√πng c√°c video trong t·∫≠p train
for s_name in tqdm(train_s, desc="Processing Re-ID dataset"):
    s_path_frames = TRAIN_ROOT / s_name / "object_frames"
    if not s_path_frames.exists():
        continue

    # √Ånh x·∫° t√™n video sang label s·ªë
    if s_name not in class_map_reid:
        class_map_reid[s_name] = num_classes_reid
        num_classes_reid += 1
    label_id = class_map_reid[s_name]

    # T·∫°o th∆∞ m·ª•c cho class n√†y
    class_dir = REID_ROOT / str(label_id)
    class_dir.mkdir(exist_ok=True)

    # L·∫•y danh s√°ch file ·∫£nh
    img_files = sorted(s_path_frames.glob("*.jpg"))
    frame_to_file = {
        extract_frame_index(f.name): f
        for f in img_files
        if extract_frame_index(f.name) is not None
    }

    # L·∫•y bbox cho t·ª´ng frame
    ann_frames = video_to_frame_bboxes.get(s_name, {})

    img_count = 0
    for fr, bboxes in ann_frames.items():
        if fr not in frame_to_file:
            continue

        src_img_path = frame_to_file[fr]
        img = cv2.imread(str(src_img_path))
        if img is None:
            continue

        for i, bb in enumerate(bboxes):
            x1, y1, x2, y2 = map(int, bb)
            cropped_obj = img[y1:y2, x1:x2]

            if cropped_obj.size > 0:
                out_name = f"{fr:04d}_{i}.jpg"
                cv2.imwrite(str(class_dir / out_name), cropped_obj)
                img_count += 1

    if img_count == 0:
        print(f"‚ö†Ô∏è Video {s_name} kh√¥ng c√≥ ·∫£nh crop n√†o ƒë∆∞·ª£c l∆∞u.")

print(f"‚úÖ T·∫°o Re-ID dataset ho√†n t·∫•t. T·ªïng c·ªông {num_classes_reid} classes.")

# L∆∞u s·ªë l∆∞·ª£ng class ƒë·ªÉ d√πng khi train
%store num_classes_reid

In [None]:
for s in tqdm(train_s, desc="Building YOLO train split"):
    process_split(s, YOLO_IMG_DIR_TR, YOLO_LBL_DIR_TR)
for s in tqdm(val_s, desc="Building YOLO val split"):
    process_split(s, YOLO_IMG_DIR_VA, YOLO_LBL_DIR_VA)


In [None]:
# 3Ô∏è‚É£ AUGMENTATION N√ÇNG CAO (FIX BUG)
# ====================================================
def rand_bool(p): return random.random() < p

In [None]:
# === Photometric augmentations ===
def jitter_hsv(img, dh=10, ds=40, dv=30, p=0.7):
    if not rand_bool(p): return img
    hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV).astype(np.int32)
    h,s,v = cv2.split(hsv)
    h = (h + random.randint(-dh, dh)) % 180
    s = np.clip(s + random.randint(-ds, ds), 0, 255)
    v = np.clip(v + random.randint(-dv, dv), 0, 255)
    hsv = cv2.merge([h.astype(np.uint8), s.astype(np.uint8), v.astype(np.uint8)])
    return cv2.cvtColor(hsv, cv2.COLOR_HSV2BGR)

In [None]:
def brightness_contrast(img, p=0.7, b_lim=0.25, c_lim=0.25):
    if not rand_bool(p): return img
    alpha = 1.0 + random.uniform(-c_lim, c_lim)
    beta = 255.0 * random.uniform(-b_lim, b_lim)
    return cv2.convertScaleAbs(img, alpha=alpha, beta=beta)

In [None]:
def adjust_gamma(img, p=0.4, g_range=(0.8,1.3)):
    if not rand_bool(p): return img
    gamma = random.uniform(*g_range)
    inv = 1.0 / gamma
    table = ((np.arange(256)/255.0)**inv * 255).astype(np.uint8)
    return cv2.LUT(img, table)

In [None]:
def gaussian_noise(img, p=0.4, var=(5.0, 50.0)):
    if not rand_bool(p): return img
    sigma = np.sqrt(random.uniform(*var))
    noise = np.random.normal(0, sigma, img.shape).astype(np.float32)
    return np.clip(img.astype(np.float32) + noise, 0, 255).astype(np.uint8)

In [None]:
def motion_blur(img, p=0.35):
    if not rand_bool(p): return img
    k = random.choice([5,7,9])
    kernel = np.zeros((k,k), np.float32)
    kernel[k//2, :] = 1.0
    angle = random.uniform(0,180)
    M = cv2.getRotationMatrix2D((k/2-0.5, k/2-0.5), angle, 1.0)
    kernel = cv2.warpAffine(kernel, M, (k,k))
    kernel /= (kernel.sum() + 1e-8)
    return cv2.filter2D(img, -1, kernel)

In [None]:
def clahe_lab(img, p=0.25):
    if not rand_bool(p): return img
    lab = cv2.cvtColor(img, cv2.COLOR_BGR2LAB)
    l,a,b = cv2.split(lab)
    clahe = cv2.createCLAHE(clipLimit=random.uniform(2.0,4.0), tileGridSize=(8,8))
    l = clahe.apply(l)
    return cv2.cvtColor(cv2.merge([l,a,b]), cv2.COLOR_LAB2BGR)

In [None]:
# === Geometric augmentations ===
def random_perspective(img, p=0.4):
    if not rand_bool(p): return img
    h, w = img.shape[:2]
    src = np.float32([[0,0],[w,0],[w,h],[0,h]])
    jitter = np.random.uniform(-0.1, 0.1, (4,2))
    dst = src * (1 + jitter)
    dst[:,0] = np.clip(dst[:,0], 0, w)
    dst[:,1] = np.clip(dst[:,1], 0, h)
    try:
        M = cv2.getPerspectiveTransform(src, dst)
        return cv2.warpPerspective(img, M, (w,h), borderMode=cv2.BORDER_REFLECT)
    except cv2.error:
        return img

In [None]:
def random_crop(img, p=0.4):
    if not rand_bool(p): return img
    h,w = img.shape[:2]
    scale = random.uniform(0.7, 1.0)
    nh, nw = int(h*scale), int(w*scale)
    y1 = random.randint(0, h-nh)
    x1 = random.randint(0, w-nw)
    cropped = img[y1:y1+nh, x1:x1+nw]
    return cv2.resize(cropped, (w,h))

In [None]:
def random_rotate(img, p=0.4):
    if not rand_bool(p): return img
    h, w = img.shape[:2]
    angle = random.uniform(-10, 10)
    M = cv2.getRotationMatrix2D((w/2,h/2), angle, 1)
    return cv2.warpAffine(img, M, (w,h), borderMode=cv2.BORDER_REFLECT)

In [None]:
# === CutMix-like augmentation ===
def cutmix_pair(img1, img2, p=0.3):
    if not rand_bool(p): return img1
    h,w = img1.shape[:2]
    x1, y1 = random.randint(0,w//2), random.randint(0,h//2)
    x2, y2 = random.randint(w//2,w), random.randint(h//2,h)
    img1[y1:y2, x1:x2] = img2[y1:y2, x1:x2]
    return img1

In [None]:
def advanced_augment(img, pool=None):
    img = jitter_hsv(img)
    img = brightness_contrast(img)
    img = adjust_gamma(img)
    img = gaussian_noise(img)
    img = motion_blur(img)
    img = clahe_lab(img)
    img = random_crop(img)
    img = random_rotate(img)
    img = random_perspective(img)
    if pool:
        img2 = random.choice(pool)
        img = cutmix_pair(img, img2)
    return img

In [None]:
def augment_dataset(img_dir, lbl_dir, k=2):
    imgs = sorted(Path(img_dir).glob("*.jpg"))
    pool = [cv2.imread(str(p)) for p in random.sample(imgs, min(10,len(imgs)))]
    for p in tqdm(imgs, desc=f"Augment++ {img_dir.name}"):
        lbl_path = Path(lbl_dir)/f"{p.stem}.txt"
        if not lbl_path.exists(): continue
        img = cv2.imread(str(p))
        lbl_txt = lbl_path.read_text()
        for i in range(k):
            out = advanced_augment(img, pool)
            out_name = f"{p.stem}_augx{i}.jpg"
            cv2.imwrite(str(Path(img_dir)/out_name), out)
            (Path(lbl_dir)/f"{p.stem}_augx{i}.txt").write_text(lbl_txt)

In [None]:
augment_dataset(YOLO_IMG_DIR_TR, YOLO_LBL_DIR_TR, k=2)