Check contents of yaml file

In [None]:
!cat "/content/drive/MyDrive/Colab Notebooks/dataset/data.yaml"


train: /content/drive/MyDrive/Colab Notebooks/dataset/train/images
val: /content/drive/MyDrive/Colab Notebooks/dataset/val/images
test: /content/drive/MyDrive/Colab Notebooks/dataset/test/images

nc: 7
names: ['book', 'books', 'monitor', 'office-chair','whiteboard','table','tv' ]

rewrite yaml file if necessary depending on your folders/path

In [None]:
with open('/content/drive/MyDrive/Colab Notebooks/dataset/data.yaml', 'w') as f:
    f.write("""train: /content/drive/MyDrive/Colab Notebooks/dataset/train/images
val: /content/drive/MyDrive/Colab Notebooks/dataset/val/images
test: /content/drive/MyDrive/Colab Notebooks/dataset/test/images

nc: 7
names: ['book', 'books', 'monitor', 'office-chair','whiteboard','table','tv' ]
""".strip())


In [None]:
import os, glob, random, shutil
from pathlib import Path
from sklearn.model_selection import train_test_split

If using HEIC images - Converting HEIC images to jpg

In [None]:
# HEIC/HEIF support with robust fallbacks
import sys, subprocess, importlib

def pip_install(pkg):
    subprocess.check_call([sys.executable, "-m", "pip", "install", "-q", pkg])

# Make sure these are available
for pkg in ["pillow-heif", "pyheif", "imageio"]:
    try:
        importlib.import_module(pkg.replace("-", "_"))
    except Exception:
        pip_install(pkg)

# Register HEIF/HEIC opener for PIL
from pillow_heif import register_heif_opener
register_heif_opener()

# Sanity check
import PIL, pillow_heif
print("PIL:", PIL.__version__, "| pillow-heif:", pillow_heif.__version__)


PIL: 11.3.0 | pillow-heif: 1.1.0


In [None]:
import os, shutil, numpy as np
from pathlib import Path
from PIL import Image, ImageOps, UnidentifiedImageError

# Fallback decoders
import importlib
HAS_PYHEIF = importlib.util.find_spec("pyheif") is not None
if HAS_PYHEIF:
    import pyheif
import imageio.v3 as iio

SRC_DIR = "/content/drive/MyDrive/Colab Notebooks/Images"  # <-- your source images
DST_DIR = "/content/drive/MyDrive/Colab Notebooks/Images_Converted"    # <-- new folder to use
os.makedirs(DST_DIR, exist_ok=True)

IMG_OK_EXTS = {".jpg",".jpeg",".png",".webp",".bmp",".tif",".tiff"}
HEIC_EXTS   = {".heic", ".heif"}  # case-insensitive check below

def heic_to_jpg(src_path, dst_path):
    try:
        with Image.open(src_path) as im:
            im = ImageOps.exif_transpose(im).convert("RGB")
            im.save(dst_path, quality=95)
        return True
    except Exception as e_pil:
        pass


cnt_ok = cnt_copy = cnt_fail = 0
for p in Path(SRC_DIR).glob("*"):
    if not p.is_file():
        continue
    suffix = p.suffix.lower()
    base   = p.stem

    if suffix in HEIC_EXTS or suffix.upper() in {".HEIC", ".HEIF"}:
        out = Path(DST_DIR) / f"{base}.jpg"   # keep basename for label pairing
        if heic_to_jpg(str(p), str(out)):
            print("HEIC->JPG:", p.name, "->", out.name)
            cnt_ok += 1
        else:
            cnt_fail += 1
    elif suffix in IMG_OK_EXTS:
        shutil.copy2(str(p), str(Path(DST_DIR) / p.name))
        cnt_copy += 1
    else:
        # Ignore videos or unsupported files; log if you want:
        # print("[skip non-image]", p.name)
        pass

print(f"\nConverted HEIC: {cnt_ok}, Copied others: {cnt_copy}, Failed: {cnt_fail}")
print("Converted images in:", DST_DIR)


HEIC->JPG: IMG_4601.HEIC -> IMG_4601.jpg
HEIC->JPG: IMG_4602.HEIC -> IMG_4602.jpg
HEIC->JPG: IMG_4603.HEIC -> IMG_4603.jpg
HEIC->JPG: IMG_4604.HEIC -> IMG_4604.jpg
HEIC->JPG: IMG_4605.HEIC -> IMG_4605.jpg
HEIC->JPG: IMG_4606.HEIC -> IMG_4606.jpg
HEIC->JPG: IMG_4607.HEIC -> IMG_4607.jpg
HEIC->JPG: IMG_4608.HEIC -> IMG_4608.jpg
HEIC->JPG: IMG_4609.HEIC -> IMG_4609.jpg
HEIC->JPG: IMG_4610.HEIC -> IMG_4610.jpg
HEIC->JPG: IMG_4611.HEIC -> IMG_4611.jpg
HEIC->JPG: IMG_4612.HEIC -> IMG_4612.jpg
HEIC->JPG: IMG_4613.HEIC -> IMG_4613.jpg
HEIC->JPG: IMG_4614.HEIC -> IMG_4614.jpg
HEIC->JPG: IMG_4615.HEIC -> IMG_4615.jpg
HEIC->JPG: IMG_4616.HEIC -> IMG_4616.jpg
HEIC->JPG: IMG_4617.HEIC -> IMG_4617.jpg
HEIC->JPG: IMG_4618.HEIC -> IMG_4618.jpg
HEIC->JPG: IMG_4619.HEIC -> IMG_4619.jpg
HEIC->JPG: IMG_4620.HEIC -> IMG_4620.jpg
HEIC->JPG: IMG_4621.HEIC -> IMG_4621.jpg
HEIC->JPG: IMG_4622.HEIC -> IMG_4622.jpg
HEIC->JPG: IMG_4623.HEIC -> IMG_4623.jpg
HEIC->JPG: IMG_4624.HEIC -> IMG_4624.jpg
HEIC->JPG: IMG_4

Train Test Validation Split for the dataset

In [None]:
IMAGES_DIR = "/content/drive/MyDrive/Colab Notebooks/Images_Converted"       # <— use the converted images
LABELS_DIR = "/content/drive/MyDrive/Colab Notebooks/Labels"
OUTPUT_DIR = "/content/drive/MyDrive/Colab Notebooks/dataset"

In [None]:
# Split ratios
TRAIN_RATIO = 0.7
VAL_RATIO = 0.2
TEST_RATIO = 0.1

def get_image_label_pairs(images_dir, labels_dir):
    """
    Get matching image-label pairs
    """
    image_extensions = ['*.png', '*.heic', '*.jpg', '*.jpeg']
    image_files = []

    for ext in image_extensions:
        image_files.extend(glob.glob(os.path.join(images_dir, ext)))

    pairs = []
    for image_path in image_files:
        # Get filename without extension
        base_name = Path(image_path).stem
        label_path = os.path.join(labels_dir, f"{base_name}.txt")

        if os.path.exists(label_path):
            pairs.append((image_path, label_path))
        else:
            print(f"Warning: No label file found for {image_path}")

    print(f"Found {len(pairs)} image-label pairs")
    return pairs

def create_split_directories(output_dir):
    """
    Create directory structure for train/val/test splits
    """
    splits = ['train', 'val', 'test']
    subdirs = ['images', 'labels']

    for split in splits:
        for subdir in subdirs:
            dir_path = os.path.join(output_dir, split, subdir)
            os.makedirs(dir_path, exist_ok=True)
            print(f"Created directory: {dir_path}")

def split_dataset(pairs, train_ratio=0.7, val_ratio=0.2, test_ratio=0.1, random_seed=42):
    """
    Split image-label pairs into train/val/test
    """
    random.seed(random_seed)
    random.shuffle(pairs)

    total = len(pairs)
    train_end = int(total * train_ratio)
    val_end = train_end + int(total * val_ratio)

    train_pairs = pairs[:train_end]
    val_pairs = pairs[train_end:val_end]
    test_pairs = pairs[val_end:]

    print(f"Split sizes:")
    print(f"  Train: {len(train_pairs)} ({len(train_pairs)/total*100:.1f}%)")
    print(f"  Val:   {len(val_pairs)} ({len(val_pairs)/total*100:.1f}%)")
    print(f"  Test:  {len(test_pairs)} ({len(test_pairs)/total*100:.1f}%)")

    return train_pairs, val_pairs, test_pairs

def copy_files(pairs, split_name, output_dir):
    """
    Copy image and label files to the appropriate split directory
    """
    images_dest = os.path.join(output_dir, split_name, 'images')
    labels_dest = os.path.join(output_dir, split_name, 'labels')

    for image_path, label_path in pairs:
        # Copy image file
        image_filename = os.path.basename(image_path)
        shutil.copy2(image_path, os.path.join(images_dest, image_filename))

        # Copy label file
        label_filename = os.path.basename(label_path)
        shutil.copy2(label_path, os.path.join(labels_dest, label_filename))

    print(f"Copied {len(pairs)} pairs to {split_name} split")

def main():
    """
    Main function to split the dataset
    """
    print("Starting dataset split...")

    # Get image-label pairs
    pairs = get_image_label_pairs(IMAGES_DIR, LABELS_DIR)

    if len(pairs) == 0:
        print("No matching image-label pairs found!")
        return

    # Create output directory structure
    create_split_directories(OUTPUT_DIR)

    # Split the dataset
    train_pairs, val_pairs, test_pairs = split_dataset(
        pairs, TRAIN_RATIO, VAL_RATIO, TEST_RATIO
    )

    # Copy files to respective directories
    copy_files(train_pairs, 'train', OUTPUT_DIR)
    copy_files(val_pairs, 'val', OUTPUT_DIR)
    copy_files(test_pairs, 'test', OUTPUT_DIR)

    print("\nDataset split completed!")
    print(f"Output directory: {OUTPUT_DIR}")
    print("\nDirectory structure:")
    print("dataset/")
    print("├── train/")
    print("│   ├── images/")
    print("│   └── labels/")
    print("├── val/")
    print("│   ├── images/")
    print("│   └── labels/")
    print("└── test/")
    print("    ├── images/")
    print("    └── labels/")

# Run the split
if __name__ == "__main__":
    main()


Starting dataset split...
Found 125 image-label pairs
Created directory: /content/drive/MyDrive/Colab Notebooks/dataset/train/images
Created directory: /content/drive/MyDrive/Colab Notebooks/dataset/train/labels
Created directory: /content/drive/MyDrive/Colab Notebooks/dataset/val/images
Created directory: /content/drive/MyDrive/Colab Notebooks/dataset/val/labels
Created directory: /content/drive/MyDrive/Colab Notebooks/dataset/test/images
Created directory: /content/drive/MyDrive/Colab Notebooks/dataset/test/labels
Split sizes:
  Train: 87 (69.6%)
  Val:   25 (20.0%)
  Test:  13 (10.4%)
Copied 87 pairs to train split
Copied 25 pairs to val split
Copied 13 pairs to test split

Dataset split completed!
Output directory: /content/drive/MyDrive/Colab Notebooks/dataset

Directory structure:
dataset/
├── train/
│   ├── images/
│   └── labels/
├── val/
│   ├── images/
│   └── labels/
└── test/
    ├── images/
    └── labels/


Training the YOLO model

In [None]:
!pip install torch --quiet
!pip install ultralytics --upgrade --quiet

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/1.1 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m1.1/1.1 MB[0m [31m43.8 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m27.4 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
from ultralytics import YOLO

# Load a pretrained YOLOv8 model
model = YOLO('/content/yolov8s.pt') #change this based on the model you want to compare

# Train the model
model.train(
    data='/content/drive/MyDrive/Colab Notebooks/dataset/data.yaml',
    epochs=100,
    imgsz=640,
    batch=32,
    cache=True
)


Creating new Ultralytics Settings v0.0.6 file ✅ 
View Ultralytics Settings with 'yolo settings' or at '/root/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.
[KDownloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8s.pt to '/content/yolov8s.pt': 100% ━━━━━━━━━━━━ 21.5MB 126.6MB/s 0.2s
Ultralytics 8.3.195 🚀 Python-3.12.11 torch-2.8.0+cu126 CUDA:0 (Tesla T4, 15095MiB)
[34m[1mengine/trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=32, bgr=0.0, box=7.5, cache=True, cfg=None, classes=None, close_mosaic=10, cls=0.5, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=/content/drive/MyDrive/Colab Notebooks/dataset/data.yaml, degrees=0.0, deterministic=True, device=None, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=100, erasi

ultralytics.utils.metrics.DetMetrics object with attributes:

ap_class_index: array([2, 3, 4, 5, 6, 7])
box: ultralytics.utils.metrics.Metric object
confusion_matrix: <ultralytics.utils.metrics.ConfusionMatrix object at 0x7e38144776e0>
curves: ['Precision-Recall(B)', 'F1-Confidence(B)', 'Precision-Confidence(B)', 'Recall-Confidence(B)']
curves_results: [[array([          0,    0.001001,    0.002002,    0.003003,    0.004004,    0.005005,    0.006006,    0.007007,    0.008008,    0.009009,     0.01001,    0.011011,    0.012012,    0.013013,    0.014014,    0.015015,    0.016016,    0.017017,    0.018018,    0.019019,     0.02002,    0.021021,    0.022022,    0.023023,
          0.024024,    0.025025,    0.026026,    0.027027,    0.028028,    0.029029,     0.03003,    0.031031,    0.032032,    0.033033,    0.034034,    0.035035,    0.036036,    0.037037,    0.038038,    0.039039,     0.04004,    0.041041,    0.042042,    0.043043,    0.044044,    0.045045,    0.046046,    0.047047,
     

Alternative retrain same model with more heavy augmentation

In [None]:
from ultralytics import YOLO
model = YOLO('yolov8s.pt')
base_args = dict(
    data='/content/drive/MyDrive/Colab Notebooks/dataset/data.yaml',
    imgsz=768, epochs=250, batch=16,
    lr0=0.003, patience=50, workers=2, cache=True,
    mosaic=0.8, close_mosaic=10, mixup=0.15,
    hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, scale=0.5, shear=2.0, perspective=0.001,
)
res = model.train(**base_args)


Ultralytics 8.3.195 🚀 Python-3.12.11 torch-2.8.0+cu126 CUDA:0 (Tesla T4, 15095MiB)
[34m[1mengine/trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=16, bgr=0.0, box=7.5, cache=True, cfg=None, classes=None, close_mosaic=10, cls=0.5, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=/content/drive/MyDrive/Colab Notebooks/dataset/data.yaml, degrees=0.0, deterministic=True, device=None, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=250, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=768, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.003, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.15, mode=train, model=yolov8s.pt, momentum=0.937, mosaic=0.8, multi_scale=False, name=train3, nbs=64, nms=False, opset=None, optimize=False, optimizer=auto, overlap_mask=True, patience=50, perspec

Performance metrics

In [None]:
from ultralytics import YOLO

model = YOLO('/content/runs/detect/train3/weights/best.pt')  # or last.pt

results = model.val(  # same data & size you trained with (change if needed)
    data='/content/drive/MyDrive/Colab Notebooks/dataset/data.yaml',
    imgsz=640,
    batch=32,
    iou=0.7,         # IoU threshold for mAP
    conf=0.001,      # low conf to compute PR curve properly
    split='val'      # or 'test' if you defined a test split in YAML
)

print({
    "mAP50-95": results.box.map,       # mean AP @ IoU 0.50:0.95
    "mAP50":    results.box.map50,     # AP @ IoU 0.50
    "mAP75":    results.box.map75,     # AP @ IoU 0.75
    "per_class_mAP": results.box.maps, # list per class (same order as your YAML names)
    "speed_ms": results.speed          # dict: preprocess/inference/NMS times
})

Ultralytics 8.3.195 🚀 Python-3.12.11 torch-2.8.0+cu126 CUDA:0 (Tesla T4, 15095MiB)
Model summary (fused): 72 layers, 11,128,293 parameters, 0 gradients, 28.5 GFLOPs
[34m[1mval: [0mFast image access ✅ (ping: 0.3±0.1 ms, read: 641.1±71.1 MB/s, size: 4396.5 KB)
[K[34m[1mval: [0mScanning /content/drive/MyDrive/Colab Notebooks/dataset/val/labels.cache... 25 images, 2 backgrounds, 0 corrupt: 100% ━━━━━━━━━━━━ 25/25 18.0Kit/s 0.0s
[K                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100% ━━━━━━━━━━━━ 1/1 0.3it/s 3.8s
                   all         25         49      0.789      0.756      0.771      0.475
               monitor          5          6      0.827      0.801      0.816      0.523
          office-chair         16         29      0.768      0.552      0.671      0.397
            whiteboard          5          5      0.994          1      0.995      0.573
                 table          6          7      0.471      0.429      0.376    

In [None]:
model.val(plots=True, save_json=True)  # saves confusion_matrix.png, PR curves, etc.
print(model.names)

Ultralytics 8.3.195 🚀 Python-3.12.11 torch-2.8.0+cu126 CUDA:0 (Tesla T4, 15095MiB)
[34m[1mval: [0mFast image access ✅ (ping: 0.4±0.1 ms, read: 618.7±81.7 MB/s, size: 4545.1 KB)
[K[34m[1mval: [0mScanning /content/drive/MyDrive/Colab Notebooks/dataset/val/labels.cache... 25 images, 2 backgrounds, 0 corrupt: 100% ━━━━━━━━━━━━ 25/25 46.2Kit/s 0.0s
[K                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100% ━━━━━━━━━━━━ 2/2 0.4it/s 4.7s
                   all         25         49      0.809      0.701      0.765      0.505
               monitor          5          6      0.901      0.667      0.779      0.572
          office-chair         16         29      0.737      0.552      0.657      0.376
            whiteboard          5          5      0.971          1      0.995       0.63
                 table          6          7      0.446      0.286        0.4      0.251
                    tv          1          2      0.988          1      0.

In [None]:
# Get class names from the model
names = model.model.names  # dict: {class_id: name}

for i, ap in enumerate(results.box.maps):
    print(f"{names[i]}: AP50-95={ap:.3f}")


book: AP50-95=0.475
books: AP50-95=0.475
monitor: AP50-95=0.523
office-chair: AP50-95=0.397
whiteboard: AP50-95=0.573
table: AP50-95=0.183
tv: AP50-95=0.698


# **Copy runs to google drive**

In [None]:
# Copy all runs to Drive
source = '/content/runs'
destination = '/content/drive/MyDrive/Colab Notebooks/AIAND runs/yolov5su'

if os.path.exists(source):
    shutil.copytree(source, destination, dirs_exist_ok=True)
    print(f"Runs copied to {destination}")

Runs copied to /content/drive/MyDrive/Colab Notebooks/AIAND runs/yolov5su


Count of instances per class in the training dataset

In [None]:
import os, glob
from collections import Counter

LABELS_DIR = "/content/drive/MyDrive/Colab Notebooks/dataset/train/labels"
counts = Counter()
for p in glob.glob(os.path.join(LABELS_DIR, "*.txt")):
    with open(p) as f:
        for ln in f:
            if ln.strip():
                cls = int(float(ln.split()[0]))
                counts[cls] += 1
print(counts)


Counter({3: 115, 5: 31, 2: 13, 4: 13, 6: 9, 1: 6, 0: 2})


combining class book and books (remapping all to class 1)

In [None]:
DATASET_DIR = "/content/drive/MyDrive/Colab Notebooks/dataset"

OLD_ID = 0   # book
NEW_ID = 1   # books

SPLITS = ["train", "val", "test"]  # use 'val' (not 'valid')

def count_classes_in_dir(labels_dir):
    c = Counter()
    for p in Path(labels_dir).glob("*.txt"):
        with open(p, "r", encoding="utf-8") as f:
            for ln in f:
                ln = ln.strip()
                if not ln:
                    continue
                parts = ln.split()
                try:
                    cls = int(float(parts[0]))
                    c[cls] += 1
                except Exception:
                    pass
    return c

def remap_file(txt_path, old_id=OLD_ID, new_id=NEW_ID, make_backup=True):
    txt_path = Path(txt_path)
    with open(txt_path, "r", encoding="utf-8") as f:
        lines = f.readlines()

    changed = 0
    out_lines = []
    for ln in lines:
        s = ln.strip()
        if not s:
            out_lines.append(ln)
            continue
        parts = s.split()
        try:
            cls = int(float(parts[0]))
        except Exception:
            out_lines.append(ln)  # keep as-is if malformed
            continue

        if cls == old_id:
            parts[0] = str(new_id)
            changed += 1
            new_line = " ".join(parts) + "\n"
            out_lines.append(new_line)
        else:
            out_lines.append(ln)

    if changed > 0:
        if make_backup:
            bak = txt_path.with_suffix(".txt.bak")
            if not bak.exists():
                shutil.copy2(txt_path, bak)
        with open(txt_path, "w", encoding="utf-8") as f:
            f.writelines(out_lines)
    return changed

def remap_split(split):
    labels_dir = Path(DATASET_DIR) / split / "labels"
    if not labels_dir.exists():
        print(f"[skip] {labels_dir} not found")
        return (Counter(), Counter(), 0)

    before = count_classes_in_dir(labels_dir)

    total_changed = 0
    for txt in labels_dir.glob("*.txt"):
        total_changed += remap_file(txt)

    after = count_classes_in_dir(labels_dir)

    # remove Ultralytics cache (forces refresh)
    cache = labels_dir.with_suffix(".cache")  # e.g., .../labels.cache
    if cache.exists():
        try:
            cache.unlink()
            print(f"Deleted cache: {cache}")
        except Exception as e:
            print(f"[warn] Could not delete {cache}: {e}")

    return before, after, total_changed

def main():
    grand_before = Counter()
    grand_after = Counter()
    grand_changed = 0

    for split in SPLITS:
        print(f"\n=== Split: {split} ===")
        b, a, ch = remap_split(split)
        print("Before:", dict(sorted(b.items())))
        print("After :", dict(sorted(a.items())))
        print(f"Changed boxes in {split}: {ch}")
        grand_before += b
        grand_after += a
        grand_changed += ch

    print("\n=== TOTAL ===")
    print("Before:", dict(sorted(grand_before.items())))
    print("After :", dict(sorted(grand_after.items())))
    print(f"Total boxes changed (0 → 1): {grand_changed}")

if __name__ == "__main__":
    main()


=== Split: train ===
Deleted cache: /content/drive/MyDrive/Colab Notebooks/dataset/train/labels.cache
Before: {0: 2, 1: 6, 2: 13, 3: 115, 4: 13, 5: 31, 6: 18, 7: 9}
After : {1: 8, 2: 13, 3: 115, 4: 13, 5: 31, 6: 18, 7: 9}
Changed boxes in train: 2

=== Split: val ===
Deleted cache: /content/drive/MyDrive/Colab Notebooks/dataset/val/labels.cache
Before: {2: 6, 3: 29, 4: 5, 5: 7, 6: 3, 7: 2}
After : {2: 6, 3: 29, 4: 5, 5: 7, 6: 3, 7: 2}
Changed boxes in val: 0

=== Split: test ===
Before: {2: 1, 3: 20, 4: 2, 5: 4, 6: 4}
After : {2: 1, 3: 20, 4: 2, 5: 4, 6: 4}
Changed boxes in test: 0

=== TOTAL ===
Before: {0: 2, 1: 6, 2: 20, 3: 164, 4: 20, 5: 42, 6: 25, 7: 11}
After : {1: 8, 2: 20, 3: 164, 4: 20, 5: 42, 6: 25, 7: 11}
Total boxes changed (0 → 1): 2


Combine roboflow + custom

In [None]:
from google.colab import files
uploaded = files.upload()

Saving Roboflow.zip to Roboflow.zip


In [None]:
!unzip Roboflow.zip -d /content/dataset

Archive:  Roboflow.zip
   creating: /content/dataset/test/
  inflating: /content/dataset/__MACOSX/._test  
   creating: /content/dataset/test/images/
  inflating: /content/dataset/__MACOSX/test/._images  
   creating: /content/dataset/test/labels/
  inflating: /content/dataset/__MACOSX/test/._labels  
  inflating: /content/dataset/test/images/50618483515516235482_mp4-0137_jpg.rf.b0e3b52b65549d0f0ab56c4c41e19704.jpg  
  inflating: /content/dataset/__MACOSX/test/images/._50618483515516235482_mp4-0137_jpg.rf.b0e3b52b65549d0f0ab56c4c41e19704.jpg  
  inflating: /content/dataset/test/images/IMG_6906_jpg.rf.0129d0eee932a15accc63059ec602970.jpg  
  inflating: /content/dataset/__MACOSX/test/images/._IMG_6906_jpg.rf.0129d0eee932a15accc63059ec602970.jpg  
  inflating: /content/dataset/test/images/50618483515516235482_mp4-0048_jpg.rf.a5e8a64645ea0a6ee9a42ce5c6a82fa7.jpg  
  inflating: /content/dataset/__MACOSX/test/images/._50618483515516235482_mp4-0048_jpg.rf.a5e8a64645ea0a6ee9a42ce5c6a82fa7.jpg 

In [None]:
import os, shutil, glob, yaml
from pathlib import Path

# ---------- INPUTS ----------
# Session dataset (note 'valid' instead of 'val'):
sess_root = Path("/content/dataset")
sess_split_map = {"train":"train", "val":"valid", "test":"test"}   # maps canonical -> actual folder name
# Drive dataset:
drv_root  = Path("/content/drive/MyDrive/Colab Notebooks/dataset")
drv_split_map  = {"train":"train", "val":"val", "test":"test"}

# Combined (persistent) output:
out_root = Path("/content/drive/MyDrive/Colab Notebooks/combined_dataset")

# Classes (as you provided)
nc = 7
names = ['book', 'books', 'monitor', 'office-chair', 'whiteboard', 'table', 'tv']


In [None]:
# ---------- HELPERS ----------
def copy_pairwise_yolo(src_img_dir: Path, src_lbl_dir: Path, dst_img_dir: Path, dst_lbl_dir: Path, prefix: str):
    """Copy images and their YOLO labels; rename with a prefix to avoid collisions."""
    dst_img_dir.mkdir(parents=True, exist_ok=True)
    dst_lbl_dir.mkdir(parents=True, exist_ok=True)

    img_exts = {".jpg", ".jpeg", ".png", ".bmp", ".tif", ".tiff", ".webp"}
    imgs = [p for p in src_img_dir.rglob("*") if p.suffix.lower() in img_exts]

    copied = 0
    for img in imgs:
        stem = img.stem
        lbl = src_lbl_dir / f"{stem}.txt"

        # New names with prefix
        new_img = dst_img_dir / f"{prefix}{stem}{img.suffix.lower()}"
        new_lbl = dst_lbl_dir / f"{prefix}{stem}.txt"

        # Copy image
        shutil.copy2(img, new_img)

        # Copy label if exists, else create empty (optional; comment out to skip)
        if lbl.exists():
            shutil.copy2(lbl, new_lbl)
        else:
            # If your test set has no labels, that's OK; skip creating empty labels if you prefer
            new_lbl.write_text("")
        copied += 1
    return copied

def joinp(*a): return Path(*a)

# ---------- MERGE ----------
splits = ["train","val","test"]

# Build canonical input dirs for SESSION dataset
sess_paths = {
    s: {
        "img": joinp(sess_root, sess_split_map[s], "images"),
        "lbl": joinp(sess_root, sess_split_map[s], "labels"),
    } for s in splits
}

# Build canonical input dirs for DRIVE dataset
drv_paths = {
    s: {
        "img": joinp(drv_root, drv_split_map[s], "images"),
        "lbl": joinp(drv_root, drv_split_map[s], "labels"),
    } for s in splits
}

# Output dirs
out_paths = {
    s: {
        "img": joinp(out_root, s, "images"),
        "lbl": joinp(out_root, s, "labels"),
    } for s in splits
}

# Do the copying
report = {}
for s in splits:
    copied_a = copy_pairwise_yolo(sess_paths[s]["img"], sess_paths[s]["lbl"], out_paths[s]["img"], out_paths[s]["lbl"], prefix="a_")
    copied_b = copy_pairwise_yolo(drv_paths[s]["img"],  drv_paths[s]["lbl"],  out_paths[s]["img"], out_paths[s]["lbl"], prefix="b_")
    report[s] = (copied_a, copied_b)

# ---------- WRITE data.yaml ----------
data_yaml = {
    "path": str(out_root),  # optional; many tools ignore this
    "train": str(out_paths["train"]["img"].parent),  # directory with images/labels subfolders
    "val":   str(out_paths["val"]["img"].parent),
    "test":  str(out_paths["test"]["img"].parent),
    "nc": nc,
    "names": names,
}
with open(out_root / "data.yaml", "w") as f:
    yaml.safe_dump(data_yaml, f, sort_keys=False)

# ---------- QUICK COUNTS ----------
def count_imgs_lbls(root_dir: Path):
    imgs = glob.glob(str(root_dir / "images" / "*"))
    lbls = glob.glob(str(root_dir / "labels" / "*.txt"))
    return len(imgs), len(lbls)

print("=== Merge summary (images copied: session, drive) ===")
for s in splits:
    print(f"{s:5s}: {report[s][0]} from session, {report[s][1]} from drive")
print("\n=== Final counts in combined_dataset ===")
for s in splits:
    cimg, clbl = count_imgs_lbls(out_paths[s]["img"].parent)
    print(f"{s:5s}: images={cimg}, labels={clbl}")

print(f"\nWrote: {out_root/'data.yaml'}")


=== Merge summary (images copied: session, drive) ===
train: 597 from session, 87 from drive
val  : 57 from session, 25 from drive
test : 29 from session, 13 from drive

=== Final counts in combined_dataset ===
train: images=684, labels=684
val  : images=82, labels=82
test : images=42, labels=42

Wrote: /content/drive/MyDrive/Colab Notebooks/combined_dataset/data.yaml


In [None]:
from ultralytics import YOLO

# Load a pretrained YOLOv8 model
model = YOLO('/content/yolov5s.pt') #change this based on the model you want to compare

# Train the model
model.train(
    data=str(out_root/'data.yaml'),
    epochs=100,
    imgsz=640,
    batch=32,
    patience=50,
    cache=True
)


Creating new Ultralytics Settings v0.0.6 file ✅ 
View Ultralytics Settings with 'yolo settings' or at '/root/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.
PRO TIP 💡 Replace 'model=/content/yolov5s.pt' with new 'model=/content/yolov5su.pt'.
YOLOv5 'u' models are trained with https://github.com/ultralytics/ultralytics and feature improved performance vs standard YOLOv5 models trained with https://github.com/ultralytics/yolov5.

[KDownloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov5su.pt to '/content/yolov5su.pt': 100% ━━━━━━━━━━━━ 17.7MB 128.2MB/s 0.1s
Ultralytics 8.3.199 🚀 Python-3.12.11 torch-2.8.0+cu126 CUDA:0 (Tesla T4, 15095MiB)
[34m[1mengine/trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=32, bgr=0.0, box=7.5, cache=True, cfg=None, classes=None, close_mosa

ultralytics.utils.metrics.DetMetrics object with attributes:

ap_class_index: array([1, 2, 3, 4, 5, 6])
box: ultralytics.utils.metrics.Metric object
confusion_matrix: <ultralytics.utils.metrics.ConfusionMatrix object at 0x7a87d82f0fb0>
curves: ['Precision-Recall(B)', 'F1-Confidence(B)', 'Precision-Confidence(B)', 'Recall-Confidence(B)']
curves_results: [[array([          0,    0.001001,    0.002002,    0.003003,    0.004004,    0.005005,    0.006006,    0.007007,    0.008008,    0.009009,     0.01001,    0.011011,    0.012012,    0.013013,    0.014014,    0.015015,    0.016016,    0.017017,    0.018018,    0.019019,     0.02002,    0.021021,    0.022022,    0.023023,
          0.024024,    0.025025,    0.026026,    0.027027,    0.028028,    0.029029,     0.03003,    0.031031,    0.032032,    0.033033,    0.034034,    0.035035,    0.036036,    0.037037,    0.038038,    0.039039,     0.04004,    0.041041,    0.042042,    0.043043,    0.044044,    0.045045,    0.046046,    0.047047,
     

In [None]:
from ultralytics import YOLO

model = YOLO('/content/runs/detect/train/weights/best.pt')  # or last.pt

results = model.val(  # same data & size you trained with (change if needed)
    data=str(out_root/'data.yaml'),
    imgsz=640,
    batch=32,
    iou=0.7,         # IoU threshold for mAP
    conf=0.001,      # low conf to compute PR curve properly
    split='val',      # or 'test' if you defined a test split in YAML
    plots=True,
    save_json=True
)

print({
    "mAP50-95": results.box.map,       # mean AP @ IoU 0.50:0.95
    "mAP50":    results.box.map50,     # AP @ IoU 0.50
    "mAP75":    results.box.map75,     # AP @ IoU 0.75
    "per_class_mAP": results.box.maps, # list per class (same order as your YAML names)
    "speed_ms": results.speed          # dict: preprocess/inference/NMS times
})

Ultralytics 8.3.199 🚀 Python-3.12.11 torch-2.8.0+cu126 CUDA:0 (Tesla T4, 15095MiB)
YOLOv5s summary (fused): 84 layers, 9,114,245 parameters, 0 gradients, 23.8 GFLOPs
[34m[1mval: [0mFast image access ✅ (ping: 0.3±0.1 ms, read: 389.7±342.4 MB/s, size: 1996.2 KB)
[K[34m[1mval: [0mScanning /content/drive/MyDrive/Colab Notebooks/combined_dataset/val/labels.cache... 82 images, 2 backgrounds, 0 corrupt: 100% ━━━━━━━━━━━━ 82/82 126.2Kit/s 0.0s
[K                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100% ━━━━━━━━━━━━ 3/3 0.5it/s 6.2s
                   all         82        253      0.826      0.823      0.857      0.612
                 books          9        153      0.993      0.912      0.974      0.732
               monitor         46         50      0.918      0.893      0.937      0.895
          office-chair         23         36      0.809      0.707      0.791      0.537
            whiteboard          5          5      0.823          1   

In [None]:
# Get class names from the model
names = model.model.names  # dict: {class_id: name}

for i, ap in enumerate(results.box.maps):
    print(f"{names[i]}: AP50-95={ap:.3f}")


book: AP50-95=0.612
books: AP50-95=0.732
monitor: AP50-95=0.895
office-chair: AP50-95=0.537
whiteboard: AP50-95=0.505
table: AP50-95=0.254
tv: AP50-95=0.748


download the model

In [None]:
!zip -r yolov8n_training.zip /content/runs/detect/train3
files.download('yolov8n_training.zip')

**Testing the model**

In [6]:
from ultralytics import YOLO

# Load trained model
model = YOLO('/content/drive/MyDrive/Colab Notebooks/AIAND runs/detect/train3/weights/best.pt')

# Predict on a single test image
results = model.predict(source='/content/DEAKIN_Library-12-1.jpg', conf=0.1)

# Display the result
results[0].show()


Creating new Ultralytics Settings v0.0.6 file ✅ 
View Ultralytics Settings with 'yolo settings' or at '/root/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.



FileNotFoundError: /content/DEAKIN_Library-12-1.jpg does not exist

Test to speech audio output

In [1]:
!pip -q install gTTS ultralytics
!pip -q install gradio

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m11.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m98.2/98.2 kB[0m [31m7.9 MB/s[0m eta [36m0:00:00[0m
[?25h

In [2]:
import numpy as np, time, uuid, os
from gtts import gTTS
import gradio as gr

In [7]:
# Load your trained model
model = YOLO('/content/drive/MyDrive/Colab Notebooks/AIAND runs/detect/train3/weights/best.pt')
names = {int(k): v for k, v in model.names.items()} if hasattr(model, "names") else {}

def gen_tts_file(text: str) -> str:
    path = f"/content/{uuid.uuid4().hex}.mp3"
    gTTS(text=text, lang='en').save(path)
    return path

In [14]:
import time
from collections import deque
import numpy as np
import gradio as gr

# Tunables
CONF_THRESH   = {"office-chair": 0.80, "table": 0.15, "monitor": 0.70, "book": 0.65,"whiteboard": 0.10, "tv":0.10 }
MIN_AREA_FRAC = 0.012
IOU_NMS       = 0.60
PERSIST_N     = 5
PERSIST_M     = 3
COOLDOWN_SEC  = 3.0

# What to say per class

CLASS_PHRASES = {
    "office-chair": "Chair ahead",
    "monitor": "Monitor ahead",
    "book": "Book detected",
    "table": "Table in front",
    "whiteboard": "Whiteboard in front",
    "tv": "TV in front"
}

def init_state():
    return {
        "last_spoken": 0.0,
        "hist": {cls: deque(maxlen=PERSIST_N) for cls in CONF_THRESH.keys()}
    }

def box_area_xyxy(box):
    x1, y1, x2, y2 = [float(v) for v in box]
    return max(0.0, x2 - x1) * max(0.0, y2 - y1)

def detect_and_speak(frame, state):
    # ---- Defensive guards ----
    try:
        if state is None or "hist" not in state:
            state = init_state()
        if frame is None:
            # Return a blank image-shaped array to satisfy outputs
            return np.zeros((480, 640, 3), dtype=np.uint8), None, state

        # Ensure ndarray uint8 HxWx3
        if not isinstance(frame, np.ndarray):
            frame = np.array(frame)
        if frame.ndim == 2:  # grayscale -> RGB
            frame = np.stack([frame]*3, axis=-1)
        if frame.dtype != np.uint8:
            frame = frame.astype(np.uint8)

        H, W = frame.shape[:2]
        frame_area = float(H * W)

        # Run YOLO
        res = model.predict(
            source=frame,
            conf=min(CONF_THRESH.values()),
            iou=IOU_NMS,
            imgsz=640,
            verbose=False
        )[0]

        annotated = res.plot()  # numpy uint8 HxWx3

        # Reset per-frame hits
        frame_hits = {cls: 0 for cls in CONF_THRESH.keys()}

        if getattr(res, "boxes", None) is not None and len(res.boxes) > 0:
            cls_list = res.boxes.cls.tolist()
            conf_list = res.boxes.conf.tolist()
            xyxy_list = res.boxes.xyxy.cpu().numpy()

            for cls_i, conf_i, box in zip(cls_list, conf_list, xyxy_list):
                cls_name = names.get(int(cls_i), str(int(cls_i))).lower()
                if cls_name not in CONF_THRESH:
                    continue
                if conf_i < CONF_THRESH[cls_name]:
                    continue
                if box_area_xyxy(box) < MIN_AREA_FRAC * frame_area:
                    continue
                frame_hits[cls_name] += 1

        # Rolling persistence
        for cls in CONF_THRESH.keys():
            state["hist"][cls].append(1 if frame_hits[cls] > 0 else 0)

        now = time.time()
        audio_path = None

        def persistent(cls):
            window = state["hist"][cls]
            return len(window) == PERSIST_N and sum(window) >= PERSIST_M

        # Speak at most one phrase per cooldown window
        if now - state["last_spoken"] > COOLDOWN_SEC:
            for cls, phrase in CLASS_PHRASES.items():
                if persistent(cls):
                    audio_path = gen_tts_file(phrase)  # must return a real file path
                    state["last_spoken"] = now
                    # Optional: clear all to avoid back-to-back triggers
                    for c in state["hist"]:
                        state["hist"][c].clear()
                    break

        return annotated, audio_path, state

    except Exception as e:
        # Return a black frame + no audio, keep state so UI doesn't crash
        print("detect_and_speak error:", repr(e))
        return np.zeros((480, 640, 3), dtype=np.uint8), None, state


In [15]:
# Gradio UI: webcam
with gr.Blocks() as demo:
    gr.Markdown("## YOLO live camera with TTS")
    cam = gr.Image(sources=["webcam"], streaming=True, label="Webcam")
    out_img = gr.Image(label="Detections (annotated)")
    # 👇 Explicitly make the Audio output accept file paths reliably
    out_audio = gr.Audio(label="TTS", autoplay=True)
    state = gr.State(init_state())

    cam.stream(
        fn=detect_and_speak,
        inputs=[cam, state],
        outputs=[out_img, out_audio, state]
    )

demo.launch(debug=True)  # turn on for stack traces in Colab/console


It looks like you are running Gradio on a hosted Jupyter notebook, which requires `share=True`. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://62740aea09135c5072.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


Keyboard interruption in main thread... closing server.
Killing tunnel 127.0.0.1:7860 <> https://62740aea09135c5072.gradio.live


