In [1]:
import fiftyone as fo
import fiftyone.zoo as foz
import os

# ==== CONFIGURATION ====
PROJECT_ROOT = os.path.dirname(os.getcwd())  # parent of 'notebooks'
DATASET_NAME = "coco-mini-all"
OUTPUT_DIR = os.path.join(PROJECT_ROOT, "data", "yolo_dataset")
MAX_SAMPLES = 5000  # you can increase later if you have more disk space

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# ==== STEP 1: DOWNLOAD SMALL SUBSET FROM COCO ====
print(f"ðŸ“¥ Downloading {MAX_SAMPLES} samples from COCO-2017 (all classes)...")

dataset = foz.load_zoo_dataset(
    "coco-2017",
    split="train",
    label_types=["detections"],
    max_samples=5000,
    dataset_name="coco-mini-all-5000",  # <-- use a new name
    shuffle=True,
)

print(f"âœ… Dataset '{DATASET_NAME}' loaded with {len(dataset)} samples.")

ðŸ“¥ Downloading 5000 samples from COCO-2017 (all classes)...
Downloading split 'train' to '/home/omar/fiftyone/coco-2017/train' if necessary
Found annotations at '/home/omar/fiftyone/coco-2017/raw/instances_train2017.json'
Sufficient images already downloaded
Existing download of split 'train' is sufficient
You are running the oldest supported major version of MongoDB. Please refer to https://deprecation.voxel51.com for deprecation notices. You can suppress this exception by setting your `database_validation` config parameter to `False`. See https://docs.voxel51.com/user_guide/config.html#configuring-a-mongodb-connection for more information
Loading 'coco-2017' split 'train'
 100% |â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 5000/5000 [29.7s elapsed, 0s remaining, 186.3 samples/s]      
Dataset 'coco-mini-all-5000' created
âœ… Dataset 'coco-mini-all' loaded with 5000 samples.


In [5]:
from glob import glob
import os

IMAGES_DIR = "../data/yolo_dataset/images"
all_images = glob(os.path.join(IMAGES_DIR, "**", "*.jpg"), recursive=True)
print("Total images in dataset:", len(all_images))

Total images in dataset: 5000


In [4]:
# ==== STEP 2: EXPORT TO YOLO FORMAT ====
os.makedirs(OUTPUT_DIR, exist_ok=True)

print("ðŸ“¦ Exporting to YOLO format...")
dataset.export(
    export_dir=OUTPUT_DIR,
    dataset_type=fo.types.YOLOv5Dataset,  # compatible with YOLOv8
)

print("âœ… Export complete!")
print(f"Your dataset is ready at: {os.path.abspath(OUTPUT_DIR)}")

ðŸ“¦ Exporting to YOLO format...
Directory '/home/omar/Desktop/object-detection-video/object_detection_project/data/yolo_dataset' already exists; export will be merged with existing files
 100% |â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 5000/5000 [15.5s elapsed, 0s remaining, 345.7 samples/s]      
âœ… Export complete!
Your dataset is ready at: /home/omar/Desktop/object-detection-video/object_detection_project/data/yolo_dataset


In [6]:
print("Number of samples loaded in FiftyOne dataset:", len(dataset))

Number of samples loaded in FiftyOne dataset: 5000


In [4]:
# ==== STEP 3: (Optional) Visualize ====
session = fo.launch_app(dataset)
session.wait()  # keep UI open until you close it

Notebook sessions cannot wait
