In [2]:
import numpy as np
import pandas as pd
import os
for dirname, _, filenames in os.walk("/home/cacc/Repositories/Datasets/VOC2007"):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [5]:
import os
import shutil
import xml.etree.ElementTree as ET
from pathlib import Path
from sklearn.model_selection import train_test_split

# ✅ Path config
ROOT = Path(".")
ANNOTATIONS = [
    "/home/cacc/Repositories/Dataset/VOC2007/VOCtest_06-Nov-2007/VOCdevkit/VOC2007/Annotations",
    "/home/cacc/Repositories/Dataset/VOC2007/VOCtrainval_06-Nov-2007/VOCdevkit/VOC2007/Annotations"
]
IMAGES = [
    "/home/cacc/Repositories/Dataset/VOC2007/VOCtest_06-Nov-2007/VOCdevkit/VOC2007/JPEGImages",
    "/home/cacc/Repositories/Dataset/VOC2007/VOCtrainval_06-Nov-2007/VOCdevkit/VOC2007/JPEGImages"
]
IMAGESETS_MAIN = "/home/cacc/Repositories/Dataset/VOC2007/VOCtrainval_06-Nov-2007/VOCdevkit/VOC2007/ImageSets/Main"

# ✅ Create YOLO directory structure
for d in ['images/train', 'images/val', 'labels/train', 'labels/val']:
    (ROOT / d).mkdir(parents=True, exist_ok=True)

# ✅ VOC class names
VOC_CLASSES = ['aeroplane', 'bicycle', 'bird', 'boat', 'bottle',
               'bus', 'car', 'cat', 'chair', 'cow', 'diningtable',
               'dog', 'horse', 'motorbike', 'person', 'pottedplant',
               'sheep', 'sofa', 'train', 'tvmonitor']
class_to_id = {name: i for i, name in enumerate(VOC_CLASSES)}

# ✅ Parse annotation
def convert_voc_to_yolo(ann_file, out_file, img_w, img_h):
    tree = ET.parse(ann_file)
    root = tree.getroot()

    yolo_labels = []

    for obj in root.findall('object'):
        cls = obj.find('name').text
        if cls not in class_to_id:
            continue
        cls_id = class_to_id[cls]

        xml_box = obj.find('bndbox')
        xmin = int(xml_box.find('xmin').text)
        ymin = int(xml_box.find('ymin').text)
        xmax = int(xml_box.find('xmax').text)
        ymax = int(xml_box.find('ymax').text)

        x_center = (xmin + xmax) / 2.0 / img_w
        y_center = (ymin + ymax) / 2.0 / img_h
        width = (xmax - xmin) / img_w
        height = (ymax - ymin) / img_h

        yolo_labels.append(f"{cls_id} {x_center:.6f} {y_center:.6f} {width:.6f} {height:.6f}")

    with open(out_file, "w") as f:
        f.write("\n".join(yolo_labels))

# ✅ Collect image and annotation pairs
all_images = []
for img_dir, ann_dir in zip(IMAGES, ANNOTATIONS):
    for filename in os.listdir(ann_dir):
        img_id = filename.replace(".xml", "")
        img_path = os.path.join(img_dir, img_id + ".jpg")
        ann_path = os.path.join(ann_dir, filename)

        if os.path.exists(img_path):
            all_images.append((img_path, ann_path))

# ✅ Split into train and val
train_data, val_data = train_test_split(all_images, test_size=0.2, random_state=42)

# ✅ Process function
def process_data(data_split, split_name):
    for img_path, ann_path in data_split:
        img_id = Path(img_path).stem
        out_img = ROOT / f"images/{split_name}/{img_id}.jpg"
        out_label = ROOT / f"labels/{split_name}/{img_id}.txt"

        shutil.copy(img_path, out_img)

        # Get image size
        import cv2
        img = cv2.imread(img_path)
        h, w = img.shape[:2]

        convert_voc_to_yolo(ann_path, out_label, w, h)

process_data(train_data, "train")
process_data(val_data, "val")

In [6]:
yaml_content = """
train: images/train
val: images/val

nc: 20
names: ['aeroplane', 'bicycle', 'bird', 'boat', 'bottle',
        'bus', 'car', 'cat', 'chair', 'cow', 'diningtable',
        'dog', 'horse', 'motorbike', 'person', 'pottedplant',
        'sheep', 'sofa', 'train', 'tvmonitor']
"""

with open("VOC2007/data.yaml", "w") as f:
    f.write(yaml_content.strip())

In [10]:
from ultralytics import YOLO
model = YOLO("yolov8n.pt")
model.train(
    data=str(ROOT / "data.yaml"),
    epochs=20,
    imgsz=640,
    batch=16,
    name="yolov8n-voc",
    project="yolo_ablation"
)

Ultralytics 8.3.109 🚀 Python-3.10.16 torch-2.6.0+cu124 CUDA:0 (NVIDIA GeForce RTX 4060 Laptop GPU, 7933MiB)
[34m[1mengine/trainer: [0mtask=detect, mode=train, model=yolov8n.pt, data=data.yaml, epochs=20, time=None, patience=100, batch=16, imgsz=640, save=True, save_period=-1, cache=False, device=None, workers=8, project=yolo_ablation, name=yolov8n-voc3, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, embed=None, show=False, save_frames=False, save_txt=False, save_conf=False, save_crop=False, show_labels=True, show_co

100%|██████████| 5.35M/5.35M [00:00<00:00, 10.5MB/s]


[34m[1mAMP: [0mchecks passed ✅


[34m[1mtrain: [0mScanning /home/cacc/Repositories/ML-Notes/ObjectDetection/datasets/labels/train... 7970 images, 0 backgrounds, 0 corrupt: 100%|██████████| 7970/7970 [00:01<00:00, 4286.38it/s]


[34m[1mtrain: [0mNew cache created: /home/cacc/Repositories/ML-Notes/ObjectDetection/datasets/labels/train.cache
[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, num_output_channels=3, method='weighted_average'), CLAHE(p=0.01, clip_limit=(1.0, 4.0), tile_grid_size=(8, 8))


  data = fetch_version_info()
[34m[1mval: [0mScanning /home/cacc/Repositories/ML-Notes/ObjectDetection/datasets/labels/val... 1993 images, 0 backgrounds, 0 corrupt: 100%|██████████| 1993/1993 [00:01<00:00, 1778.30it/s]

[34m[1mval: [0mNew cache created: /home/cacc/Repositories/ML-Notes/ObjectDetection/datasets/labels/val.cache





Plotting labels to yolo_ablation/yolov8n-voc3/labels.jpg... 
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.000417, momentum=0.9) with parameter groups 57 weight(decay=0.0), 64 weight(decay=0.0005), 63 bias(decay=0.0)
[34m[1mTensorBoard: [0mmodel graph visualization added ✅
Image sizes 640 train, 640 val
Using 8 dataloader workers
Logging results to [1myolo_ablation/yolov8n-voc3[0m
Starting training for 20 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       1/20      2.29G       1.09      2.543      1.289          8        640: 100%|██████████| 499/499 [01:00<00:00,  8.20it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 63/63 [00:06<00:00,  9.11it/s]


                   all       1993       5970      0.662      0.614      0.647      0.435

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       2/20      2.28G      1.125       1.76      1.319         10        640: 100%|██████████| 499/499 [01:00<00:00,  8.26it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 63/63 [00:08<00:00,  7.38it/s]


                   all       1993       5970      0.671      0.559      0.624      0.411

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       3/20       2.3G      1.148      1.722      1.337         10        640: 100%|██████████| 499/499 [00:58<00:00,  8.49it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 63/63 [00:06<00:00,  9.60it/s]

                   all       1993       5970      0.635      0.588      0.617      0.406






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       4/20      2.32G      1.156      1.638      1.338          5        640: 100%|██████████| 499/499 [00:56<00:00,  8.84it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 63/63 [00:06<00:00,  9.09it/s]


                   all       1993       5970      0.713      0.567      0.638      0.421

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       5/20      2.33G      1.135      1.555      1.326         13        640: 100%|██████████| 499/499 [00:59<00:00,  8.34it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 63/63 [00:06<00:00,  9.72it/s]

                   all       1993       5970      0.699      0.598      0.659      0.443






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       6/20      2.36G      1.118      1.489      1.313         33        640: 100%|██████████| 499/499 [00:54<00:00,  9.09it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 63/63 [00:06<00:00, 10.35it/s]

                   all       1993       5970      0.694      0.613      0.669      0.445






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       7/20      2.38G      1.101       1.43      1.301         10        640: 100%|██████████| 499/499 [00:54<00:00,  9.08it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 63/63 [00:06<00:00, 10.30it/s]


                   all       1993       5970      0.724       0.61      0.681      0.459

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       8/20      2.39G      1.092      1.358      1.289         10        640: 100%|██████████| 499/499 [00:57<00:00,  8.74it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 63/63 [00:06<00:00,  9.99it/s]


                   all       1993       5970      0.732      0.623      0.705      0.479

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       9/20       2.4G      1.074      1.319      1.275          6        640: 100%|██████████| 499/499 [00:56<00:00,  8.89it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 63/63 [00:06<00:00,  9.32it/s]


                   all       1993       5970      0.739      0.616      0.701      0.485

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      10/20      2.42G      1.061      1.286       1.27          3        640: 100%|██████████| 499/499 [00:56<00:00,  8.85it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 63/63 [00:05<00:00, 10.53it/s]


                   all       1993       5970      0.725      0.638      0.703      0.491
Closing dataloader mosaic
[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, num_output_channels=3, method='weighted_average'), CLAHE(p=0.01, clip_limit=(1.0, 4.0), tile_grid_size=(8, 8))

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      11/20      2.44G      1.061      1.215       1.27          4        640: 100%|██████████| 499/499 [00:55<00:00,  8.96it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 63/63 [00:06<00:00,  9.82it/s]


                   all       1993       5970      0.724      0.641      0.702      0.487

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      12/20      2.46G      1.036      1.139      1.252          4        640: 100%|██████████| 499/499 [00:55<00:00,  9.04it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 63/63 [00:06<00:00,  9.73it/s]

                   all       1993       5970      0.723      0.646      0.709      0.492






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      13/20      2.46G       1.02      1.079      1.237          2        640: 100%|██████████| 499/499 [00:57<00:00,  8.63it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 63/63 [00:06<00:00, 10.43it/s]

                   all       1993       5970      0.743      0.652      0.724      0.507






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      14/20      2.49G     0.9977      1.035      1.226          6        640: 100%|██████████| 499/499 [00:54<00:00,  9.20it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 63/63 [00:06<00:00, 10.23it/s]

                   all       1993       5970      0.771      0.646      0.736      0.524






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      15/20      2.51G     0.9835      1.002      1.213          6        640: 100%|██████████| 499/499 [00:53<00:00,  9.27it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 63/63 [00:06<00:00, 10.41it/s]

                   all       1993       5970      0.779      0.652      0.742      0.529






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      16/20      2.52G      0.968     0.9568      1.197          6        640: 100%|██████████| 499/499 [00:53<00:00,  9.25it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 63/63 [00:05<00:00, 10.55it/s]

                   all       1993       5970      0.802      0.644      0.749      0.538






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      17/20      2.53G     0.9518     0.9281      1.187          3        640: 100%|██████████| 499/499 [00:53<00:00,  9.28it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 63/63 [00:05<00:00, 10.57it/s]

                   all       1993       5970      0.786      0.674      0.756      0.543






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      18/20      2.56G     0.9301     0.8889       1.17         11        640: 100%|██████████| 499/499 [00:53<00:00,  9.25it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 63/63 [00:05<00:00, 10.81it/s]

                   all       1993       5970      0.773      0.683       0.76      0.551






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      19/20      2.57G     0.9139     0.8674      1.159          6        640: 100%|██████████| 499/499 [00:51<00:00,  9.61it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 63/63 [00:05<00:00, 10.80it/s]

                   all       1993       5970      0.772      0.685      0.763      0.552

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size



      20/20      2.59G     0.9075     0.8464      1.152          9        640: 100%|██████████| 499/499 [00:51<00:00,  9.60it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 63/63 [00:05<00:00, 10.86it/s]

                   all       1993       5970      0.809      0.671      0.766      0.556

20 epochs completed in 0.348 hours.





Optimizer stripped from yolo_ablation/yolov8n-voc3/weights/last.pt, 6.2MB
Optimizer stripped from yolo_ablation/yolov8n-voc3/weights/best.pt, 6.2MB

Validating yolo_ablation/yolov8n-voc3/weights/best.pt...
Ultralytics 8.3.109 🚀 Python-3.10.16 torch-2.6.0+cu124 CUDA:0 (NVIDIA GeForce RTX 4060 Laptop GPU, 7933MiB)
Model summary (fused): 72 layers, 3,009,548 parameters, 0 gradients, 8.1 GFLOPs


                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 63/63 [00:06<00:00,  9.93it/s]


                   all       1993       5970      0.812       0.67      0.766      0.556
             aeroplane        101        153      0.838      0.797      0.884      0.606
               bicycle         91        148      0.891      0.696      0.804      0.609
                  bird        150        245      0.799      0.664      0.752      0.532
                  boat         69        184      0.776      0.488       0.62      0.372
                bottle         89        247      0.786      0.401      0.517      0.343
                   bus         79        102      0.772      0.699      0.773      0.668
                   car        296        603      0.867      0.773      0.875       0.65
                   cat        135        152      0.851      0.829      0.885      0.693
                 chair        223        523      0.742      0.486      0.583      0.396
                   cow         50        138       0.86      0.758      0.853      0.616
           diningtabl

ultralytics.utils.metrics.DetMetrics object with attributes:

ap_class_index: array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19])
box: ultralytics.utils.metrics.Metric object
confusion_matrix: <ultralytics.utils.metrics.ConfusionMatrix object at 0x7794b88c5900>
curves: ['Precision-Recall(B)', 'F1-Confidence(B)', 'Precision-Confidence(B)', 'Recall-Confidence(B)']
curves_results: [[array([          0,    0.001001,    0.002002,    0.003003,    0.004004,    0.005005,    0.006006,    0.007007,    0.008008,    0.009009,     0.01001,    0.011011,    0.012012,    0.013013,    0.014014,    0.015015,    0.016016,    0.017017,    0.018018,    0.019019,     0.02002,    0.021021,    0.022022,    0.023023,
          0.024024,    0.025025,    0.026026,    0.027027,    0.028028,    0.029029,     0.03003,    0.031031,    0.032032,    0.033033,    0.034034,    0.035035,    0.036036,    0.037037,    0.038038,    0.039039,     0.04004,    0.041041,    0.042042,    0.043

In [12]:
import pandas as pd
import matplotlib.pyplot as plt
from ultralytics import YOLO

# Define your model variants
model_variants = ['yolov8n.pt']

# Define the path to your datasets YAML file
data_yaml = "data.yaml" # Change this if needed

results_dict = {}

# Loop over model variants and collect metrics
for variant in model_variants:
    model_path = f"VOC2007/yolo_ablation/{variant.replace('.pt', '')}-voc/weights/best.pt"
    model = YOLO(model_path)

    # Validation
    results = model.val(data=data_yaml, imgsz=640)

    # Model info
    info = model.info(verbose=False)
    
    # Fallback in case it's not a dictionary
    try:
        params = info['params']
        fps = info['speed']['inference']
    except Exception:
        params = None
        fps = None

    results_dict[variant] = {
        "mAP@50": results.box.map50,
        "mAP@50:95": results.box.map,
        "Params": params,
        "FPS": fps
    }

# Create DataFrame
results_df = pd.DataFrame(results_dict).T
results_df.index.name = "Model"
results_df.reset_index(inplace=True)

# Print the results
print("\n📊 YOLOv8 Ablation Study Results:\n")
print(results_df)

# Optional: Clean and convert to numeric
results_df_clean = results_df.copy()
for col in ["mAP@50", "mAP@50:95", "Params", "FPS"]:
    results_df_clean[col] = pd.to_numeric(results_df_clean[col], errors='coerce')

# Plot mAP@50 and FPS side by side
plt.figure(figsize=(12, 5))

plt.subplot(1, 2, 1)
plt.bar(results_df_clean["Model"], results_df_clean["mAP@50"], color='skyblue')
plt.title("🔍 mAP@50 Comparison")
plt.ylabel("mAP@50")
plt.grid(True)

plt.subplot(1, 2, 2)
plt.bar(results_df_clean["Model"], results_df_clean["FPS"], color='orange')
plt.title("⚡ Inference Speed (FPS)")
plt.ylabel("FPS")
plt.grid(True)

plt.tight_layout()
plt.show()

Ultralytics 8.3.109 🚀 Python-3.10.16 torch-2.6.0+cu124 CUDA:0 (NVIDIA GeForce RTX 4060 Laptop GPU, 7933MiB)
Model summary (fused): 72 layers, 3,009,548 parameters, 0 gradients, 8.1 GFLOPs


[34m[1mval: [0mScanning /home/cacc/Repositories/ML-Notes/ObjectDetection/datasets/labels/val.cache... 1993 images, 0 backgrounds, 0 corrupt: 100%|██████████| 1993/1993 [00:00<?, ?it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 125/125 [00:10<00:00, 11.94it/s]


                   all       1993       5970      0.797      0.677      0.766      0.556
             aeroplane        101        153      0.818      0.793      0.885      0.605
               bicycle         91        148      0.885      0.723      0.803      0.608
                  bird        150        245       0.79      0.673      0.747      0.522
                  boat         69        184      0.742      0.484      0.611      0.372
                bottle         89        247      0.741      0.406      0.516      0.343
                   bus         79        102      0.774      0.704      0.768      0.665
                   car        296        603      0.855      0.781      0.873      0.649
                   cat        135        152      0.828      0.836      0.884      0.692
                 chair        223        523       0.72      0.499      0.585      0.397
                   cow         50        138      0.816      0.783       0.86      0.617
           diningtabl

  plt.tight_layout()


<Figure size 1200x500 with 2 Axes>