## Object Detection Model training using YOLO
References - 
- [Documentation](https://docs.ultralytics.com/yolov5/tutorials/train_custom_data/#13-prepare-dataset-for-yolov5)
- [Testing IoU](https://stackoverflow.com/questions/77565416/how-to-test-iou-score-after-training-a-yolo-model)

In [2]:
# Import necessary libraries
import numpy as np
import os, sys 
import matplotlib.pyplot as plt
from ultralytics import YOLO
import fiftyone as fo 
import fiftyone.zoo as foz
from fiftyone import ViewField as F
import json, shutil 
from collections import defaultdict

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
# Load dataset

# Save to current directory
curr_dir = os.getcwd()
fo.config.dataset_zoo_dir = curr_dir

# Download the data
# By default, the following loads data for detections
dataset = foz.load_zoo_dataset("coco-2017", 
                            splits=['train'],
                            max_samples=2000,
                            shuffle=True,
                            seed=0,
                            label_types=['detection'],
                            only_matching=True,
                            classes=['person'])

Downloading split 'train' to '/Users/sudhansh/Desktop/Projects/Machine Learning Practice/Object Detection/coco-2017/train' if necessary
Found annotations at '/Users/sudhansh/Desktop/Projects/Machine Learning Practice/Object Detection/coco-2017/raw/instances_train2017.json'
Sufficient images already downloaded
Existing download of split 'train' is sufficient
Loading existing dataset 'coco-2017-train-2000'. To reload from disk, either delete the existing dataset or provide a custom `dataset_name` to use


In [20]:
# Load the downloaded dataset
coco_dataset = fo.Dataset.from_dir(
    dataset_type=fo.types.COCODetectionDataset,
    data_path='coco-2017/train/data',
    labels_path='coco-2017/train/labels.json',
    max_samples=50,
    include_id=True,
)

 100% |███████████████████| 50/50 [667.4ms elapsed, 0s remaining, 74.9 samples/s]      


In [21]:
# The above downloads all classes in COCO
# We filter them to only have people using the following - 
coco_dataset.export(
    labels_path="coco-2017/labels.json",
    dataset_type=fo.types.COCODetectionDataset,
    classes=['person'],
)

Found multiple fields ['detections', 'segmentations'] with compatible type (<class 'fiftyone.core.labels.Detections'>, <class 'fiftyone.core.labels.Polylines'>, <class 'fiftyone.core.labels.Keypoints'>); exporting 'detections'
                                                                                        



 100% |███████████████████| 50/50 [218.9ms elapsed, 0s remaining, 228.4 samples/s]      


In [22]:
# We need to convert the dataset to YOLO format
input_dir = curr_dir + "/coco-2017/"
output_dir = curr_dir + "/yolo/"

images_folder = input_dir + "train/data/"

if not os.path.exists(output_dir):
	os.mkdir(output_dir)

for split in ['train', 'test']:
	shutil.rmtree(output_dir + split, ignore_errors=True)
	os.mkdir(output_dir + split)
	os.mkdir(output_dir + split + '/images')
	os.mkdir(output_dir + split + '/labels')

# Read the annotations
with open(input_dir + 'labels.json', 'r') as f:
		data = json.load(f)

# Count number of annotations
num_data = len(data['images'])
print(f"Total number of images are {num_data}")

# Choose 80-20 split
num_train = np.floor(0.8*num_data)
num_test = np.floor(0.2*num_data)
print(num_train, num_test) 

# Match annotations to images and write in YOLO format
count = 0

id_ann = defaultdict(list)
for ann in data['annotations']:
	id_ann[ann['image_id']].append(ann)

for image in data['images']:
	width = image['width']
	height = image['height']
	filename = image['file_name'].split('.')[0]
	id = image['id']

	# Writing current object and copying image
	if count < num_train:
		split = 'train'
	else:
		split = 'test'
	
	f = open(f'{output_dir}{split}/labels/{filename}.txt', 'w')

	for annotation in id_ann[id]:
		current_category = annotation['category_id']
		x, y, w, h = annotation['bbox']
		
		# Finding midpoints
		x_centre = x + w/2
		y_centre = y + h/2
		
		# Normalization
		x_centre /= width
		y_centre /= height
		w /= width
		h /= height
		
		# Limiting upto fix number of decimal places
		x_centre = format(x_centre, '.6f')
		y_centre = format(y_centre, '.6f')
		w = format(w, '.6f')
		h = format(h, '.6f')

		f.write(f"{current_category} {x_centre} {y_centre} {w} {h}\n")
	f.close()
	shutil.copy(images_folder + image['file_name'], f'{output_dir}{split}/images/{filename}.jpg')	
	count += 1
				
	


Total number of images are 50
40.0 10.0


In [23]:
# Load model 

# Uncomment if training from scratch
# model = YOLO('yolov8n.yaml')

# Uncomment for pretrained model
model = YOLO('yolov5n.pt')

PRO TIP 💡 Replace 'model=yolov5n.pt' with new 'model=yolov5nu.pt'.
YOLOv5 'u' models are trained with https://github.com/ultralytics/ultralytics and feature improved performance vs standard YOLOv5 models trained with https://github.com/ultralytics/yolov5.



In [34]:
# Train the model
# Device = mps is for Apple Silicon
train_results = model.train(data='config.yaml', batch=16, epochs=10, plots=True, device='mps')

Ultralytics YOLOv8.1.34 🚀 Python-3.8.16 torch-1.13.1 CPU (Apple M1 Pro)
[34m[1mengine/trainer: [0mtask=detect, mode=train, model=yolov5n.pt, data=config.yaml, epochs=10, time=None, patience=100, batch=16, imgsz=640, save=True, save_period=-1, cache=False, device=mps, workers=0, project=None, name=train202, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=False, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, embed=None, show=False, save_frames=False, save_txt=False, save_conf=False, save_crop=False, show_labels=True, show_conf=True, show_boxes=True, li

[34m[1mtrain: [0mScanning /Users/sudhansh/Desktop/Projects/Machine Learning Practice/Object Detection/yolo/train/labels.cache... 40 images, 0 backgrounds, 0 corrupt: 100%|██████████| 40/40 [00:00<?, ?it/s]
[34m[1mval: [0mScanning /Users/sudhansh/Desktop/Projects/Machine Learning Practice/Object Detection/yolo/train/labels.cache... 40 images, 0 backgrounds, 0 corrupt: 100%|██████████| 40/40 [00:00<?, ?it/s]

[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.002, momentum=0.9) with parameter groups 69 weight(decay=0.0), 76 weight(decay=0.0005), 75 bias(decay=0.0)
Image sizes 640 train, 640 val
Using 0 dataloader workers
Logging results to [1m/opt/homebrew/runs/detect/train202[0m
Starting training for 10 epochs...
Closing dataloader mosaic

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size



       1/10         0G      3.134      3.403       2.58         60        640: 100%|██████████| 3/3 [00:15<00:00,  5.00s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 2/2 [00:07<00:00,  3.94s/it]

                   all         40        205     0.0419     0.0585     0.0148    0.00344






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       2/10         0G      2.984      3.166      2.531         47        640: 100%|██████████| 3/3 [00:15<00:00,  5.10s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 2/2 [00:07<00:00,  3.94s/it]

                   all         40        205     0.0563     0.0488     0.0108     0.0034






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       3/10         0G      2.829      2.927      2.378         54        640: 100%|██████████| 3/3 [00:14<00:00,  4.80s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 2/2 [00:08<00:00,  4.11s/it]

                   all         40        205      0.114     0.0585     0.0204    0.00545






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       4/10         0G      2.727      2.752      2.258         32        640: 100%|██████████| 3/3 [00:18<00:00,  6.33s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 2/2 [00:08<00:00,  4.35s/it]

                   all         40        205     0.0546     0.0878     0.0199    0.00569






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       5/10         0G      2.417      2.703      2.124         21        640: 100%|██████████| 3/3 [00:15<00:00,  5.30s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 2/2 [00:08<00:00,  4.00s/it]

                   all         40        205      0.156     0.0719     0.0284    0.00788






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       6/10         0G      2.643      2.631      2.181         44        640: 100%|██████████| 3/3 [00:16<00:00,  5.42s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 2/2 [00:07<00:00,  3.87s/it]

                   all         40        205      0.202     0.0341      0.033    0.00708






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       7/10         0G      2.372      2.632      2.091         22        640: 100%|██████████| 3/3 [00:15<00:00,  5.24s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 2/2 [00:08<00:00,  4.14s/it]

                   all         40        205      0.255     0.0146     0.0323    0.00576






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       8/10         0G      2.406      2.538      2.109         46        640: 100%|██████████| 3/3 [00:14<00:00,  4.95s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 2/2 [00:08<00:00,  4.09s/it]

                   all         40        205      0.218      0.039      0.028    0.00519






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       9/10         0G      2.368      2.449      2.027         61        640: 100%|██████████| 3/3 [00:14<00:00,  4.91s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 2/2 [00:08<00:00,  4.00s/it]

                   all         40        205       0.17     0.0488     0.0333    0.00615






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      10/10         0G      2.369      2.554      1.981         32        640: 100%|██████████| 3/3 [00:14<00:00,  4.73s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 2/2 [00:08<00:00,  4.01s/it]

                   all         40        205      0.298    0.00976     0.0345    0.00616






10 epochs completed in 0.067 hours.
Optimizer stripped from /opt/homebrew/runs/detect/train202/weights/last.pt, 5.2MB
Optimizer stripped from /opt/homebrew/runs/detect/train202/weights/best.pt, 5.2MB

Validating /opt/homebrew/runs/detect/train202/weights/best.pt...
Ultralytics YOLOv8.1.34 🚀 Python-3.8.16 torch-1.13.1 CPU (Apple M1 Pro)
YOLOv5n summary (fused): 193 layers, 2503139 parameters, 0 gradients, 7.1 GFLOPs


                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 2/2 [00:07<00:00,  3.58s/it]

                   all         40        205      0.151     0.0703     0.0282    0.00794
Speed: 1.0ms preprocess, 171.1ms inference, 0.0ms loss, 3.4ms postprocess per image





In [39]:
print(train_results)

ultralytics.utils.metrics.DetMetrics object with attributes:

ap_class_index: array([0])
box: ultralytics.utils.metrics.Metric object
confusion_matrix: <ultralytics.utils.metrics.ConfusionMatrix object at 0x2afe5e940>
curves: ['Precision-Recall(B)', 'F1-Confidence(B)', 'Precision-Confidence(B)', 'Recall-Confidence(B)']
curves_results: [[array([          0,    0.001001,    0.002002,    0.003003,    0.004004,    0.005005,    0.006006,    0.007007,    0.008008,    0.009009,     0.01001,    0.011011,    0.012012,    0.013013,    0.014014,    0.015015,    0.016016,    0.017017,    0.018018,    0.019019,     0.02002,    0.021021,    0.022022,    0.023023,
          0.024024,    0.025025,    0.026026,    0.027027,    0.028028,    0.029029,     0.03003,    0.031031,    0.032032,    0.033033,    0.034034,    0.035035,    0.036036,    0.037037,    0.038038,    0.039039,     0.04004,    0.041041,    0.042042,    0.043043,    0.044044,    0.045045,    0.046046,    0.047047,
          0.048048,    

In [40]:
val_res = model.val()

Ultralytics YOLOv8.1.34 🚀 Python-3.8.16 torch-1.13.1 CPU (Apple M1 Pro)
YOLOv5n summary (fused): 193 layers, 2503139 parameters, 0 gradients, 7.1 GFLOPs


[34m[1mval: [0mScanning /Users/sudhansh/Desktop/Projects/Machine Learning Practice/Object Detection/yolo/train/labels.cache... 40 images, 0 backgrounds, 0 corrupt: 100%|██████████| 40/40 [00:00<?, ?it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:07<00:00,  2.51s/it]

                   all         40        205      0.152      0.078     0.0301     0.0084
Speed: 1.7ms preprocess, 180.0ms inference, 0.0ms loss, 2.7ms postprocess per image





In [29]:
# Test the model 
# Run batched inference on a list of images
results = model(['yolo/train/images/000000012424.jpg', 
                 'yolo/test/images/000000014083.jpg', 
                 'yolo/train/images/000000010232.jpg'])  # return a list of Results objects

# Process results list
for result in results:
    boxes = result.boxes  # Boxes object for bounding box outputs
    masks = result.masks  # Masks object for segmentation masks outputs
    keypoints = result.keypoints  # Keypoints object for pose outputs
    probs = result.probs  # Probs object for classification outputs
    result.show()  # display to screen
    result.save(filename='result.jpg')  # save to disk


0: 448x640 4 persons, 40.9ms
1: 448x640 2 persons, 40.9ms
2: 448x640 (no detections), 40.9ms
Speed: 1.0ms preprocess, 40.9ms inference, 0.2ms postprocess per image at shape (1, 3, 448, 640)


In [46]:
# Test the data with IOU score 
test_images_folder = output_dir + 'test/images/'
test_files = os.listdir(test_images_folder)
test_results = model([test_images_folder + c for c in test_files])

for result in test_results: 
    print(result.boxes)


0: 640x640 (no detections), 63.5ms
1: 640x640 (no detections), 63.5ms
2: 640x640 1 person, 63.5ms
3: 640x640 (no detections), 63.5ms
4: 640x640 (no detections), 63.5ms
5: 640x640 (no detections), 63.5ms
6: 640x640 (no detections), 63.5ms
7: 640x640 (no detections), 63.5ms
8: 640x640 2 persons, 63.5ms
9: 640x640 (no detections), 63.5ms
Speed: 2.8ms preprocess, 63.5ms inference, 0.3ms postprocess per image at shape (1, 3, 640, 640)
ultralytics.engine.results.Boxes object with attributes:

cls: tensor([])
conf: tensor([])
data: tensor([], size=(0, 6))
id: None
is_track: False
orig_shape: (427, 640)
shape: torch.Size([0, 6])
xywh: tensor([], size=(0, 4))
xywhn: tensor([], size=(0, 4))
xyxy: tensor([], size=(0, 4))
xyxyn: tensor([], size=(0, 4))
ultralytics.engine.results.Boxes object with attributes:

cls: tensor([])
conf: tensor([])
data: tensor([], size=(0, 6))
id: None
is_track: False
orig_shape: (428, 640)
shape: torch.Size([0, 6])
xywh: tensor([], size=(0, 4))
xywhn: tensor([], size=