# Explore a Roboflow Dataset

This notebook demonstrates how to download a dataset from Roboflow, parse its configuration, and visualize samples with bounding boxes.

In [None]:
# Setup & Download
# !pip install roboflow
from roboflow import Roboflow

rf = Roboflow(api_key="<YOUR_API_KEY>")
project = rf.workspace("lus-gabriel").project("hard-hat-sample-5g5ip")
version = project.version(2)
dataset = version.download("yolov5")
print(f"Dataset downloaded to: {dataset.location}")

In [None]:
# Parse data.yaml using load_dataset_config
from modern_yolonas.data import load_dataset_config, YOLODetectionDataset

cfg = load_dataset_config("./hardhat-dataset/data.yaml")
print(f"Dataset root:  {cfg.root}")
print(f"Num classes:   {cfg.num_classes}")
print(f"Class names:   {cfg.class_names}")
print(f"Train split:   {cfg.train_split}")
print(f"Val split:     {cfg.val_split}")

In [None]:
# Explore - count class distribution and visualize samples
import cv2
import numpy as np
import matplotlib.pyplot as plt
from collections import Counter

train_raw = YOLODetectionDataset(root=cfg.root, split=cfg.train_split)
val_raw = YOLODetectionDataset(root=cfg.root, split=cfg.val_split)

print(f"Train images: {len(train_raw)}")
print(f"Val images:   {len(val_raw)}")

class_counts = Counter()
for i in range(len(train_raw)):
    _, targets = train_raw.load_raw(i)
    for t in targets:
        class_counts[int(t[0])] += 1

print("\nTraining set class distribution:")
for cls_id, count in sorted(class_counts.items()):
    name = cfg.class_names[cls_id] if cls_id < len(cfg.class_names) else f"class_{cls_id}"
    print(f"  {name}: {count}")

fig, axes = plt.subplots(1, 4, figsize=(20, 5))
for ax, idx in zip(axes, range(4)):
    img, targets = train_raw.load_raw(idx)
    img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    h, w = img.shape[:2]
    for t in targets:
        cls_id, cx, cy, bw, bh = t
        x1 = int((cx - bw / 2) * w)
        y1 = int((cy - bh / 2) * h)
        x2 = int((cx + bw / 2) * w)
        y2 = int((cy + bh / 2) * h)
        color = [(0, 255, 0), (255, 0, 0), (0, 0, 255)][int(cls_id) % 3]
        cv2.rectangle(img_rgb, (x1, y1), (x2, y2), color, 2)
        label = cfg.class_names[int(cls_id)] if int(cls_id) < len(cfg.class_names) else str(int(cls_id))
        cv2.putText(img_rgb, label, (x1, y1 - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 1)
    ax.imshow(img_rgb)
    ax.set_title(f"Image {idx} ({len(targets)} objects)")
    ax.axis("off")
plt.tight_layout()
plt.show()