In [1]:
import os
import cv2
import json
import shutil
import numpy as np
import pandas as pd
from pathlib import Path
from tqdm.notebook import tqdm

# ================= KONFIGURASI =================
# Path input
INPUT_PREPRO_DIR = Path("/kaggle/input/multi-object-detection-tracking-prepro-v2/preprocessed_v2") 
VIDEO_DIR = Path("/kaggle/input/driving-video-with-object-tracking/bdd100k_videos_train_00/bdd100k/videos/train")

# Output dataset untuk YOLO
DATASET_DIR = Path("/kaggle/working/dataset")
IMG_DIR = DATASET_DIR / "images"
LBL_DIR = DATASET_DIR / "labels"

# Mapping Class ID
CLASS_MAP = {
    'car': 0, 'pedestrian': 1, 'truck': 2, 'bus': 3,
    'bicycle': 4, 'other vehicle': 5, 'rider': 6,
    'motorcycle': 7, 'other person': 8, 'train': 9, 'trailer': 10
}

# Konfigurasi Split & Limit
VAL_SPLIT = 0.2 
SEED = 42
LIMIT_VIDEOS = 720  # Sesuai request

# --- KONFIGURASI BARU (SESUAI PILIHAN ANDA) ---
FRAME_STRIDE = 3            # Ambil 1 frame setiap 3 frame
TARGET_SIZE = (960, 540)    # Resize ke 960x540
JPEG_QUALITY = 100          # Kualitas gambar 100%

# ================= FUNGSI BANTUAN =================
def create_yolo_dirs():
    for split in ['train', 'val']:
        (IMG_DIR / split).mkdir(parents=True, exist_ok=True)
        (LBL_DIR / split).mkdir(parents=True, exist_ok=True)

def get_yolo_bbox(row, orig_w, orig_h):
    # PENTING: Koordinat YOLO harus dinormalisasi berdasarkan ukuran ASLI (sebelum resize)
    # karena posisi relatif objek (0.0 - 1.0) tidak berubah meskipun gambar di-resize.
    dw = 1.0 / orig_w
    dh = 1.0 / orig_h
    
    x_center = ((row['x1'] + row['x2']) / 2.0) * dw
    y_center = ((row['y1'] + row['y2']) / 2.0) * dh
    w = (row['x2'] - row['x1']) * dw
    h = (row['y2'] - row['y1']) * dh
    
    return x_center, y_center, w, h

def stratified_sample(all_videos, df_flags, n_samples):
    """
    Mengambil sampel video dengan mempertahankan rasio Siang/Malam.
    """
    print("   Melakukan Stratified Sampling...")
    
    video_conditions = df_flags[df_flags['video'].isin(all_videos)].groupby('video')['is_night'].mean()
    
    day_videos = video_conditions[video_conditions <= 0.5].index.tolist()
    night_videos = video_conditions[video_conditions > 0.5].index.tolist()
    
    total_avail = len(day_videos) + len(night_videos)
    if n_samples >= total_avail:
        return all_videos 
    
    n_day = int(n_samples * (len(day_videos) / total_avail))
    n_night = n_samples - n_day
    
    print(f"   Proporsi Asli: Day {len(day_videos)} | Night {len(night_videos)}")
    print(f"   Target Sample: Day {n_day} | Night {n_night}")
    
    np.random.seed(SEED)
    selected_day = np.random.choice(day_videos, n_day, replace=False).tolist()
    selected_night = np.random.choice(night_videos, n_night, replace=False).tolist()
    
    selected_videos = selected_day + selected_night
    np.random.shuffle(selected_videos) 
    
    return selected_videos

def process_dataset():
    # Cleanup dataset sebelumnya agar bersih
    if DATASET_DIR.exists():
        shutil.rmtree(DATASET_DIR)
        
    print("Loading data...")
    # 1. Load Manifest, Labels & Flags
    manifest_path = INPUT_PREPRO_DIR / "P6" / "train_manifest.json"
    labels_path = INPUT_PREPRO_DIR / "P3" / "labels_clean_dropShort.parquet"
    flags_path = INPUT_PREPRO_DIR / "P5" / "day_night_flags.parquet" 
    
    with open(manifest_path, 'r') as f:
        manifest = json.load(f)
        
    df_labels = pd.read_parquet(labels_path)
    df_flags = pd.read_parquet(flags_path)
    
    # Filter label
    manifest_keys = set(f"{m['video']}_{m['frameIndex']}" for m in manifest)
    df_labels['key'] = df_labels['video'] + "_" + df_labels['frameIndex'].astype(str)
    df_labels = df_labels[df_labels['key'].isin(manifest_keys)].copy()
    
    # 2. Prepare Videos dengan Stratified Sampling
    unique_videos = list(set(m['video'] for m in manifest))
    
    if LIMIT_VIDEOS is not None and LIMIT_VIDEOS < len(unique_videos):
        print(f"‚ö†Ô∏è LIMIT ACTIVE: Mengambil {LIMIT_VIDEOS} video dengan Stratified Sampling (Day/Night).")
        selected_videos = stratified_sample(unique_videos, df_flags, LIMIT_VIDEOS)
    else:
        selected_videos = unique_videos
        np.random.seed(SEED)
        np.random.shuffle(selected_videos)

    # Split Train/Val
    num_val = int(len(selected_videos) * VAL_SPLIT)
    if num_val == 0 and len(selected_videos) > 1: num_val = 1
        
    val_videos = set(selected_videos[:num_val])
    train_videos = set(selected_videos[num_val:])
    
    print(f"Total Videos to Process: {len(selected_videos)}")
    print(f"Train Videos: {len(train_videos)}, Val Videos: {len(val_videos)}")
    
    # Group manifest by video 
    video_groups = {}
    valid_video_set = set(selected_videos)
    
    for m in manifest:
        if m['video'] in valid_video_set:
            if m['video'] not in video_groups:
                video_groups[m['video']] = []
            video_groups[m['video']].append(m['frameIndex'])
        
    create_yolo_dirs()
    
    # 3. Extraction Loop
    print(f"Starting extraction (Stride={FRAME_STRIDE}, Size={TARGET_SIZE}, Quality={JPEG_QUALITY}%)...")
    
    total_imgs = 0
    
    for video_name, frames in tqdm(video_groups.items(), desc="Processing Videos"):
        split = 'val' if video_name in val_videos else 'train'
        
        video_path = VIDEO_DIR / f"{video_name}.mov"
        if not video_path.exists():
            video_path = VIDEO_DIR / video_name 
        if not video_path.exists():
            continue
            
        cap = cv2.VideoCapture(str(video_path))
        if not cap.isOpened(): continue
            
        frames = sorted(frames)
        
        # Gunakan enumerate untuk logika Stride
        for i, frame_idx in enumerate(frames):
            
            # --- LOGIKA 1: FRAME STRIDE ---
            # Jika sisa bagi index dengan FRAME_STRIDE bukan 0, skip frame ini.
            if i % FRAME_STRIDE != 0:
                continue

            cap.set(cv2.CAP_PROP_POS_FRAMES, frame_idx)
            ret, img = cap.read()
            if not ret: continue
                
            h_orig, w_orig = img.shape[:2]
            
            # --- LOGIKA 2: RESIZE ---
            # Resize gambar ke target size (960, 540)
            img_resized = cv2.resize(img, TARGET_SIZE, interpolation=cv2.INTER_LINEAR)
            
            # Save Image
            file_id = f"{video_name}_{frame_idx:06d}"
            img_out_path = IMG_DIR / split / f"{file_id}.jpg"
            
            # --- LOGIKA 3: JPEG QUALITY ---
            # Simpan dengan kualitas yang ditentukan
            cv2.imwrite(str(img_out_path), img_resized, [int(cv2.IMWRITE_JPEG_QUALITY), JPEG_QUALITY])
            
            total_imgs += 1
            
            # Save Labels
            frame_labels = df_labels[
                (df_labels['video'] == video_name) & 
                (df_labels['frameIndex'] == frame_idx)
            ]
            
            txt_out_path = LBL_DIR / split / f"{file_id}.txt"
            with open(txt_out_path, 'w') as f_txt:
                for _, row in frame_labels.iterrows():
                    cls_name = row['category']
                    if cls_name not in CLASS_MAP: continue
                    cls_id = CLASS_MAP[cls_name]
                    
                    # NOTE: Koordinat bbox dihitung berdasarkan ukuran ASLI (w_orig, h_orig).
                    # Ini benar karena koordinat YOLO adalah relatif (0.0 - 1.0).
                    xc, yc, w, h = get_yolo_bbox(row, w_orig, h_orig)
                    
                    xc = max(0, min(1, xc))
                    yc = max(0, min(1, yc))
                    w = max(0, min(1, w))
                    h = max(0, min(1, h))
                    
                    f_txt.write(f"{cls_id} {xc:.6f} {yc:.6f} {w:.6f} {h:.6f}\n")
        
        cap.release()

    print("Dataset generation complete!")
    print(f"Total Images Saved: {total_imgs}")
    print(f"Train images: {len(list((IMG_DIR/'train').glob('*.jpg')))}")
    print(f"Val images:   {len(list((IMG_DIR/'val').glob('*.jpg')))}")

if __name__ == "__main__":
    process_dataset()

Loading data...
‚ö†Ô∏è LIMIT ACTIVE: Mengambil 720 video dengan Stratified Sampling (Day/Night).
   Melakukan Stratified Sampling...
   Proporsi Asli: Day 522 | Night 439
   Target Sample: Day 391 | Night 329
Total Videos to Process: 720
Train Videos: 576, Val Videos: 144
Starting extraction (Stride=3, Size=(960, 540), Quality=100%)...


Processing Videos:   0%|          | 0/720 [00:00<?, ?it/s]

Dataset generation complete!
Total Images Saved: 46524
Train images: 37310
Val images:   9214


In [2]:
import yaml
import os

# ================= KONFIGURASI =================
# Pastikan ini SAMA PERSIS dengan Step 1
CLASS_MAP = {
    'car': 0, 'pedestrian': 1, 'truck': 2, 'bus': 3,
    'bicycle': 4, 'other vehicle': 5, 'rider': 6,
    'motorcycle': 7, 'other person': 8, 'train': 9, 'trailer': 10
}

# Konversi ke format yang diminta YOLO: {0: 'car', 1: 'pedestrian', ...}
# Kita perlu membalik key dan value dari CLASS_MAP
NAMES_MAP = {v: k for k, v in CLASS_MAP.items()}

# Path output file yaml
YAML_OUTPUT_PATH = '/kaggle/working/bdd_mot.yaml'

# ================= PEMBUATAN YAML =================
# Struktur Dictionary untuk YAML
data_yaml = {
    'path': '/kaggle/working/dataset',  # Base path dataset (Absolute path)
    'train': 'images/train',            # Folder training (relative to path)
    'val': 'images/val',                # Folder validasi (relative to path)
    'names': NAMES_MAP                  # Mapping ID ke Nama Kelas
}

# Tulis ke file
with open(YAML_OUTPUT_PATH, 'w') as f:
    yaml.dump(data_yaml, f, sort_keys=False)

print(f"‚úÖ File konfigurasi berhasil dibuat: {YAML_OUTPUT_PATH}")
print("\nIsi file bdd_mot.yaml:")
print("-" * 30)
with open(YAML_OUTPUT_PATH, 'r') as f:
    print(f.read())
print("-" * 30)

‚úÖ File konfigurasi berhasil dibuat: /kaggle/working/bdd_mot.yaml

Isi file bdd_mot.yaml:
------------------------------
path: /kaggle/working/dataset
train: images/train
val: images/val
names:
  0: car
  1: pedestrian
  2: truck
  3: bus
  4: bicycle
  5: other vehicle
  6: rider
  7: motorcycle
  8: other person
  9: train
  10: trailer

------------------------------


In [3]:
!pip install ultralytics

Collecting ultralytics
  Downloading ultralytics-8.4.8-py3-none-any.whl.metadata (38 kB)
Collecting ultralytics-thop>=2.0.18 (from ultralytics)
  Downloading ultralytics_thop-2.0.18-py3-none-any.whl.metadata (14 kB)
Downloading ultralytics-8.4.8-py3-none-any.whl (1.2 MB)
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m1.2/1.2 MB[0m [31m25.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading ultralytics_thop-2.0.18-py3-none-any.whl (28 kB)
Installing collected packages: ultralytics-thop, ultralytics
Successfully installed ultralytics-8.4.8 ultralytics-thop-2.0.18


In [4]:
from ultralytics import YOLO
import torch
import gc
import os

# ================= KONFIGURASI =================
# Matikan WandB agar tidak perlu login (offline mode)
os.environ['WANDB_DISABLED'] = 'true'

# Bersihkan Memori GPU sebelum mulai
gc.collect()
torch.cuda.empty_cache()

# Cek GPU
print(f"üî• GPU Available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"   Device: {torch.cuda.get_device_name(0)}")

# ================= TRAINING LOOP =================
def train_yolo():
    # 1. Load Model
    # Gunakan 'yolov8s.pt' (small) untuk keseimbangan performa/speed yang baik
    print("Loading model...")
    model = YOLO('yolov8s.pt') 

    # 2. Start Training
    print("Starting training (30 Epochs with Imbalance Strategy)...")
    results = model.train(
        data='/kaggle/working/bdd_mot.yaml', # Path ke file yaml dari Step 2
        
        # --- DURASI TRAINING ---
        epochs=30,        # REQUEST: Naik ke 30 epoch
        patience=10,      # Stop jika tidak ada perbaikan dalam 10 epoch (Early Stopping)
        
        # --- INPUT CONFIG ---
        imgsz=640,        # Resolusi input (Standard YOLOv8)
        batch=16,         # Batch size (Jika OOM, turunkan ke 8)
        
        # --- STRATEGI UNTUK KELAS MINORITAS & IMBALANCE ---
        rect=True,           # Wajib untuk dataset mengemudi (aspek rasio lebar)
        cos_lr=True,         # Cosine LR scheduler (Membantu konvergensi di epoch panjang)
        label_smoothing=0.1, # Regularisasi: Mencegah overfitting pada kelas mayoritas (Car)
        close_mosaic=10,     # Matikan augmentasi Mosaic di 10 epoch terakhir (Fokus objek utuh)
        
        # --- SYSTEM CONFIG ---
        workers=4,        
        optimizer='auto', 
        
        # --- LOGGING ---
        project='bdd_mot_project', 
        name='yolov8s_run_30e',   # Ganti nama agar tahu ini run 30 epoch
        exist_ok=True,    
        plots=True,       
        save=True,        
        val=True          
    )
    
    print("‚úÖ Training Selesai!")
    # Kita gunakan properti save_dir agar dinamis (tidak hardcode path)
    print(f"   Best Model Saved at: {results.save_dir}/weights/best.pt")
    return model, results

# Eksekusi
if __name__ == "__main__":
    model, results = train_yolo()

Creating new Ultralytics Settings v0.0.6 file ‚úÖ 
View Ultralytics Settings with 'yolo settings' or at '/root/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.
üî• GPU Available: True
   Device: Tesla P100-PCIE-16GB
Loading model...
[KDownloading https://github.com/ultralytics/assets/releases/download/v8.4.0/yolov8s.pt to 'yolov8s.pt': 100% ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ 21.5MB 224.8MB/s 0.1s
Starting training (30 Epochs with Imbalance Strategy)...
Ultralytics 8.4.8 üöÄ Python-3.12.12 torch-2.8.0+cu126 CUDA:0 (Tesla P100-PCIE-16GB, 16269MiB)
[34m[1mengine/trainer: [0magnostic_nms=False, amp=True, angle=1.0, augment=False, auto_augment=randaugment, batch=16, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, compile=False, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=True, cutmix=0.0,

In [5]:
# Install library supervision untuk visualisasi tracking yang keren
!pip install supervision --quiet

[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m212.4/212.4 kB[0m [31m8.0 MB/s[0m eta [36m0:00:00[0m
[?25h

In [6]:
import supervision as sv
from ultralytics import YOLO
import os
import random

# ================= KONFIGURASI =================
# Path Model (Pastikan path ini sesuai dengan output Step 3)
MODEL_PATH = '/kaggle/working/bdd_mot_project/yolov8s_run_test/weights/best.pt'

# Path Video Test
# Kita ambil salah satu video dari folder train asli (karena kita tidak mindahin video ke folder val)
VIDEO_DIR = "/kaggle/input/driving-video-with-object-tracking/bdd100k_videos_train_00/bdd100k/videos/train"

# Pilih video acak untuk tes (atau ganti nama file spesifik jika mau)
# Kita cari video yang ada di list validasi (opsional, acak saja untuk demo)
all_videos = [f for f in os.listdir(VIDEO_DIR) if f.endswith('.mov')]
TEST_VIDEO_NAME = random.choice(all_videos) 
SOURCE_VIDEO_PATH = os.path.join(VIDEO_DIR, TEST_VIDEO_NAME)

# Output Path
OUTPUT_VIDEO_PATH = f"/kaggle/working/output_tracking_{TEST_VIDEO_NAME.replace('.mov', '.mp4')}"

print(f"üé¨ Processing Video: {TEST_VIDEO_NAME}")
print(f"üß† Loading Model: {MODEL_PATH}")

# ================= TRACKING PIPELINE =================
def run_tracking():
    # 1. Load Model
    if not os.path.exists(MODEL_PATH):
        print("‚ùå Error: Model path tidak ditemukan. Pastikan training Step 3 sukses.")
        return

    model = YOLO(MODEL_PATH)

    # 2. Setup Video Info
    video_info = sv.VideoInfo.from_video_path(SOURCE_VIDEO_PATH)
    
    # 3. Setup Annotators (Supervision)
    # TraceAnnotator: Menggambar jejak garis gerakan
    trace_annotator = sv.TraceAnnotator(
        trace_length=60, 
        thickness=2
    )
    # BoxAnnotator: Menggambar kotak
    box_annotator = sv.BoxAnnotator(
        thickness=2
    )
    # LabelAnnotator: Menulis Class Name + Track ID
    label_annotator = sv.LabelAnnotator(
        text_scale=0.5,
        text_thickness=1,
        text_padding=5
    )

    # 4. Open Video Sink (Writer)
    # Kita limit 300 frame (10 detik) saja agar proses cepat untuk demo
    MAX_FRAMES = 300 
    
    with sv.VideoSink(target_path=OUTPUT_VIDEO_PATH, video_info=video_info) as sink:
        
        # Loop Inference menggunakan Generator model.track
        results_generator = model.track(
            source=SOURCE_VIDEO_PATH, 
            persist=True,        # PENTING: Mengaktifkan memori antar-frame untuk tracking
            tracker="bytetrack.yaml", # Default tracker config
            conf=0.3,            # Confidence threshold
            iou=0.5,             # IoU threshold
            stream=True,         # Generator mode (hemat memori)
            verbose=False
        )
        
        print("üöÄ Starting Inference & Rendering...")
        
        for i, result in enumerate(results_generator):
            if i >= MAX_FRAMES: 
                break
                
            # Konversi hasil YOLO ke format Supervision
            detections = sv.Detections.from_ultralytics(result)
            
            # Ambil Track ID (jika ada)
            if result.boxes.id is not None:
                detections.tracker_id = result.boxes.id.cpu().numpy().astype(int)
                
            # Filter class (opsional, misal hanya mobil & orang)
            # detections = detections[detections.class_id != 0] 

            # Construct Labels: "ID: Class Conf"
            labels = []
            for tracker_id, class_id, confidence in zip(detections.tracker_id, detections.class_id, detections.confidence):
                class_name = model.model.names[class_id]
                labels.append(f"#{tracker_id} {class_name} {confidence:.2f}")

            # Annotate Frame
            frame = result.orig_img.copy()
            
            # Gambar Jejak dulu (di bawah kotak)
            annotated_frame = trace_annotator.annotate(
                scene=frame, detections=detections
            )
            # Gambar Kotak
            annotated_frame = box_annotator.annotate(
                scene=annotated_frame, detections=detections
            )
            # Gambar Label
            annotated_frame = label_annotator.annotate(
                scene=annotated_frame, detections=detections, labels=labels
            )

            # Simpan Frame ke Video
            sink.write_frame(annotated_frame)
            
            if i % 50 == 0:
                print(f"   Processed frame {i}/{MAX_FRAMES}...")

    print(f"‚úÖ Selesai! Video tersimpan di: {OUTPUT_VIDEO_PATH}")

if __name__ == "__main__":
    run_tracking()

üé¨ Processing Video: 015fe6c9-48a58255.mov
üß† Loading Model: /kaggle/working/bdd_mot_project/yolov8s_run_test/weights/best.pt
‚ùå Error: Model path tidak ditemukan. Pastikan training Step 3 sukses.


In [7]:
from IPython.display import HTML
from base64 import b64encode

def show_video(video_path):
    if not os.path.exists(video_path):
        print("Video not found.")
        return
        
    mp4 = open(video_path, 'rb').read()
    data_url = "data:video/mp4;base64," + b64encode(mp4).decode()
    return HTML(f"""
    <video width=640 controls>
          <source src="{data_url}" type="video/mp4">
    </video>
    """)

# Tampilkan video
show_video(OUTPUT_VIDEO_PATH)

Video not found.
