In [None]:
!pip install -U ultralytics
import ultralytics
ultralytics.checks()  # Verify setup

Ultralytics 8.3.228 ðŸš€ Python-3.12.12 torch-2.8.0+cu126 CUDA:0 (NVIDIA L4, 22693MiB)
Setup complete âœ… (12 CPUs, 53.0 GB RAM, 38.2/112.6 GB disk)


In [None]:
import os
import json
from tqdm import tqdm

import json
from collections import defaultdict
from ultralytics import YOLO

# Inferencing

In [None]:
MODEL_PATH = '/content/drive/MyDrive/ZALO_AI/1st_version/yolo_dataset/yolo11sZalo/train/weights/best.pt'

TEST_DATA_DIR = '/content/drive/MyDrive/ZALO_AI/public_test/samples'

OUTPUT_FILE = 'yolo11s_zalo.json'

CONFIDENCE_THRESHOLD = 0.25

In [None]:
def run_inference():
    """
    Run YOLO11 on test videos and write detections to JSON.
    """

    # load trained model
    try:
        model = YOLO(MODEL_PATH)
        print(f"Successfully loaded model from {MODEL_PATH}")
    except Exception as e:
        print(f"Error: Could not load model from {MODEL_PATH}")
        print(e)
        return

    all_predictions = []

    # list video folders
    try:
        video_folders = sorted([f for f in os.listdir(TEST_DATA_DIR) if os.path.isdir(os.path.join(TEST_DATA_DIR, f))])
    except FileNotFoundError:
        print(f"Error: Test data directory not found at: {TEST_DATA_DIR}")
        return

    if not video_folders:
        print(f"Error: No video folders found in {TEST_DATA_DIR}")
        return

    print(f"Found {len(video_folders)} videos to process...")

    for video_folder_name in tqdm(video_folders, desc="Processing videos"):
        video_path = os.path.join(TEST_DATA_DIR, video_folder_name, 'drone_video.mp4')

        if not os.path.exists(video_path):
            print(f"Warning: 'drone_video.mp4' not found in {video_folder_name}, skipping.")
            continue

        video_bboxes = []

        try:
            # predict on the video
            results_generator = model.predict(
                video_path,
                stream=True,
                conf=CONFIDENCE_THRESHOLD,
                verbose=False
            )

            # iterate frames
            for frame_idx, results in enumerate(results_generator):

                # bounding boxes in [x1, y1, x2, y2]
                xyxy_boxes = results.boxes.xyxy.cpu().numpy()

                if len(xyxy_boxes) == 0:
                    continue

                # loop each detection
                for box in xyxy_boxes:
                    x1, y1, x2, y2 = box

                    # build bbox dict
                    bbox_data = {
                        "frame": frame_idx,
                        "x1": int(round(x1)),
                        "y1": int(round(y1)),
                        "x2": int(round(x2)),
                        "y2": int(round(y2))
                    }
                    video_bboxes.append(bbox_data)

        except Exception as e:
            print(f"Error while processing video {video_path}: {e}")
            continue 

        # Build JSON structure
        # Format: detections -> list of {bboxes}
        detections_list = []
        if len(video_bboxes) > 0:
            # Add detections if present
            detections_list.append({"bboxes": video_bboxes})
        
        final_video_obj = {
            "video_id": video_folder_name,
            "detections": detections_list
        }
        all_predictions.append(final_video_obj)

    # save merged predictions
    try:
        print(f"\nSaving all {len(all_predictions)} video predictions to {OUTPUT_FILE}...")
        with open(OUTPUT_FILE, 'w') as f:
            json.dump(all_predictions, f, indent=4)
        print("Inference complete.")
    except Exception as e:
        print(f"Error: Could not write output JSON file: {e}")

if __name__ == "__main__":
    run_inference()

Successfully loaded model from /content/drive/MyDrive/ZALO_AI/1st_version/yolo_dataset/yolo11sZalo/train2/weights/best.pt
Found 6 videos to process...


Processing videos: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 6/6 [09:24<00:00, 94.05s/it]


Saving all 6 video predictions to yolo11s_zalo.json...
Inference complete.





# Inference with tracker

In [None]:
DATASET_ROOT = "/content/drive/MyDrive/ZALO_AI/public_test/samples"
OUTPUT_FILE = "yolo11s_track_zalo.json"
MODEL_WEIGHTS = "/content/drive/MyDrive/ZALO_AI/1st_version/yolo_dataset/yolo11sZalo/train/weights/best.pt"

def process_dataset():
    # 1) load model
    model = YOLO(MODEL_WEIGHTS)

    # hold all video outputs
    all_videos_output = []

    # 2) find video folders (under samples/, sorted for consistency)
    if not os.path.exists(DATASET_ROOT):
        print(f"Error: Dataset root '{DATASET_ROOT}' not found.")
        return

    video_folders = sorted(os.listdir(DATASET_ROOT))

    print(f"Found {len(video_folders)} folders. Starting processing...")

    # 3) iterate through each folder
    for folder_name in tqdm(video_folders):
        video_path = os.path.join(DATASET_ROOT, folder_name, "drone_video.mp4")

        # skip if the video file doesn't exist
        if not os.path.exists(video_path):
            continue

        # folder name is the video_id (e.g., "drone_video_001")
        video_id = folder_name

        results = model.track(source=video_path, conf=0.3, iou=0.5, stream=True, verbose=False)

        track_history = defaultdict(list)

        for frame_index, r in enumerate(results):
            if r.boxes and r.boxes.id is not None:
                boxes = r.boxes.xyxy.int().cpu().tolist()
                track_ids = r.boxes.id.int().cpu().tolist()

                for track_id, box in zip(track_ids, boxes):
                    x1, y1, x2, y2 = box

                    bbox_entry = {
                        "frame": frame_index,
                        "x1": x1,
                        "y1": y1,
                        "x2": x2,
                        "y2": y2
                    }
                    track_history[track_id].append(bbox_entry)

        detections_list = []
        for track_id, bboxes in track_history.items():
            detections_list.append({
                "bboxes": bboxes
            })

        # append to dataset output
        all_videos_output.append({
            "video_id": video_id,
            "detections": detections_list
        })

    # 4) save merged JSON
    with open(OUTPUT_FILE, "w") as f:
        json.dump(all_videos_output, f, indent=3)

    print(f"\nProcessing complete! Results saved to {OUTPUT_FILE}")

process_dataset()

Found 6 folders. Starting processing...


100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 6/6 [21:24<00:00, 214.08s/it]


Processing complete! Results saved to yolo11s_track_zalo.json





# Visualize output

In [None]:
model = YOLO("/content/drive/MyDrive/ZALO_AI/VisDrone_val_Yolo/yolo11s/train/weights/best.pt")

results_generator = model.track(
    source="/content/drive/MyDrive/ZALO_AI/train/samples/Backpack_0/drone_video.mp4",
    conf=0.3,
    iou=0.5,
    save=True,
    stream=True
)

for r in results_generator:
    pass

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
video 1/1 (frame 5469/10466) /content/drive/MyDrive/ZALO_AI/train/samples/Backpack_0/drone_video.mp4: 544x960 (no detections), 9.3ms
video 1/1 (frame 5470/10466) /content/drive/MyDrive/ZALO_AI/train/samples/Backpack_0/drone_video.mp4: 544x960 (no detections), 11.4ms
video 1/1 (frame 5471/10466) /content/drive/MyDrive/ZALO_AI/train/samples/Backpack_0/drone_video.mp4: 544x960 (no detections), 9.3ms
video 1/1 (frame 5472/10466) /content/drive/MyDrive/ZALO_AI/train/samples/Backpack_0/drone_video.mp4: 544x960 (no detections), 11.1ms
video 1/1 (frame 5473/10466) /content/drive/MyDrive/ZALO_AI/train/samples/Backpack_0/drone_video.mp4: 544x960 (no detections), 9.3ms
video 1/1 (frame 5474/10466) /content/drive/MyDrive/ZALO_AI/train/samples/Backpack_0/drone_video.mp4: 544x960 (no detections), 11.4ms
video 1/1 (frame 5475/10466) /content/drive/MyDrive/ZALO_AI/train/samples/Backpack_0/drone_video.mp4: 544x960 (no detections), 9.3ms
v