In [None]:
import os
import shutil
import random
from pathlib import Path
from PIL import Image
from tqdm import tqdm

In [None]:
# ==========================================
# USER CONFIGURATION
# ==========================================
# 1. Path to your main folder (containing 'annotations' and 'sequences')
visdrone_root = "/content/drive/MyDrive/ZALO_AI/VisDrone2019-SOT-val"

# 2. Output directory
output_dir = "/content/drive/MyDrive/ZALO_AI/VisDrone_val_Yolo"

# 3. Split ratio (0.85 means 85% train, 15% val)
train_ratio = 0.85
# ==========================================

def setup_directories(root):
    dirs = {
        'train_img': root / 'images' / 'train',
        'val_img': root / 'images' / 'val',
        'train_lbl': root / 'labels' / 'train',
        'val_lbl': root / 'labels' / 'val'
    }
    for d in dirs.values():
        d.mkdir(parents=True, exist_ok=True)
    return dirs

def convert_to_yolo_bbox(bbox, img_w, img_h):
    # VisDrone bbox is: x_min, y_min, width, height
    x, y, w, h = bbox

    # YOLO format: x_center, y_center, width, height (normalized 0-1)
    x_center = (x + w / 2) / img_w
    y_center = (y + h / 2) / img_h
    norm_w = w / img_w
    norm_h = h / img_h

    # Clamp to ensure they stay within image boundaries
    return (
        max(0, min(1, x_center)),
        max(0, min(1, y_center)),
        max(0, min(1, norm_w)),
        max(0, min(1, norm_h))
    )

def process_sequences(seq_list, source_root, out_root, split_type):
    """
    split_type: 'train' or 'val'
    seq_list: list of annotation files (e.g., [Path('UAV_0024_s.txt'), ...])
    """

    # Set destination paths
    dest_img_dir = out_root / 'images' / split_type
    dest_lbl_dir = out_root / 'labels' / split_type

    sequences_dir = source_root / "sequences"

    print(f"\n[INFO] Processing {len(seq_list)} videos for '{split_type}'...")

    for ann_file in tqdm(seq_list):
        # 1. Get Sequence Name (e.g., "UAV_0024_s")
        seq_name = ann_file.stem

        # 2. Find the corresponding Image Folder
        # We look for sequences/UAV_0024_s
        seq_folder = sequences_dir / seq_name

        # Fallback: Sometimes folders don't have '_s' suffix
        if not seq_folder.exists():
            seq_folder = sequences_dir / seq_name.replace("_s", "")

        if not seq_folder.exists() or not seq_folder.is_dir():
            # print(f"Skipping {seq_name} - Image folder not found in 'sequences/'")
            continue

        # 3. Get all images (jpg, png)
        images = sorted(list(seq_folder.glob("*.jpg")) + list(seq_folder.glob("*.png")) + list(seq_folder.glob("*.jpeg")))

        # 4. Parse Annotation File
        with open(ann_file, 'r') as f:
            lines = [line.strip() for line in f.readlines() if line.strip()]

        # Check synchronization
        # VisDrone SOT usually matches lines to frames.
        # Note: Some object detection datasets have multiple lines per frame.
        # For SOT (Single Object Tracking), it is usually 1 line per frame.

        for i, img_path in enumerate(images):
            if i >= len(lines): break # Safety break if more images than labels

            # Parse the line: x,y,w,h,score,category,truncation,occlusion
            parts = lines[i].split(',')
            try:
                bbox = list(map(float, parts[:4])) # Take first 4: x,y,w,h
            except ValueError:
                continue # Bad line

            # Load image to get dimensions
            # Optimization: we could read just the first one if dimensions are constant,
            # but for safety we read every image.
            try:
                with Image.open(img_path) as img:
                    w_img, h_img = img.size
            except:
                continue # Corrupt image

            # Convert
            yolo_bbox = convert_to_yolo_bbox(bbox, w_img, h_img)

            # 5. Write Output
            # Create unique ID: UAV_0024_s_000001
            frame_id = f"{seq_name}_{i+1:06d}"

            # Copy Image
            shutil.copy2(img_path, dest_img_dir / f"{frame_id}.jpg")

            # Write Label
            with open(dest_lbl_dir / f"{frame_id}.txt", 'w') as f_out:
                # Class 0 for 'drone/target'
                f_out.write(f"0 {yolo_bbox[0]:.6f} {yolo_bbox[1]:.6f} {yolo_bbox[2]:.6f} {yolo_bbox[3]:.6f}\n")

if __name__ == "__main__":
    src_path = Path(visdrone_root)
    out_path = Path(output_dir)

    print(f"--- Starting Conversion ---")
    print(f"Source: {src_path}")
    print(f"Output: {out_path}")

    if not src_path.exists():
        print("ERROR: Source folder not found!")
        exit()

    # 1. Setup Folders
    setup_directories(out_path)

    # 2. Gather Sequences from Annotations
    # This finds all files ending in .txt inside annotations/
    ann_dir = src_path / "annotations"
    all_seq_files = sorted(list(ann_dir.glob("*.txt")))

    # Filter out any system files or non-sequence files if necessary
    all_seq_files = [f for f in all_seq_files if "attr" not in f.name] # exclude attribute files just in case

    print(f"Found {len(all_seq_files)} sequences.")

    # 3. Shuffle & Split
    random.seed(42)
    random.shuffle(all_seq_files)

    split_point = int(len(all_seq_files) * train_ratio)
    train_files = all_seq_files[:split_point]
    val_files = all_seq_files[split_point:]

    # 4. Process
    process_sequences(train_files, src_path, out_path, 'train')
    process_sequences(val_files, src_path, out_path, 'val')

    # 5. Create YAML
    yaml_content = f"""path: {out_path.absolute()}
train: images/train
val: images/val
names:
  0: object
"""
    with open(out_path / "data.yaml", 'w') as f:
        f.write(yaml_content)

    print("\n--- Conversion Complete ---")
    print(f"Train sequences: {len(train_files)}")
    print(f"Val sequences: {len(val_files)}")
    print(f"YAML saved to: {out_path}/data.yaml")

--- Starting Conversion ---
Source: /content/drive/MyDrive/ZALO_AI/VisDrone2019-SOT-train-1
Output: /content/drive/MyDrive/ZALO_AI/Train-1_Yolo
Found 43 sequences.

[INFO] Processing 36 videos for 'train'...


100%|██████████| 36/36 [37:13<00:00, 62.05s/it]



[INFO] Processing 7 videos for 'val'...


100%|██████████| 7/7 [04:16<00:00, 36.58s/it]


--- Conversion Complete ---
Train sequences: 36
Val sequences: 7
YAML saved to: /content/drive/MyDrive/ZALO_AI/Train-1_Yolo/data.yaml



