## Object Detection Model training using YOLO
References - 
- [Documentation](https://docs.ultralytics.com/yolov5/tutorials/train_custom_data/#13-prepare-dataset-for-yolov5)
- [Testing IoU](https://stackoverflow.com/questions/77565416/how-to-test-iou-score-after-training-a-yolo-model)
- [IoU calculation](https://stackoverflow.com/questions/25349178/calculating-percentage-of-bounding-box-overlap-for-image-detector-evaluation)
- [Hungarian Algorithm to match Bounding Boxes](https://gist.github.com/AruniRC/c629c2df0e68e23aff7dcaeef87c72d4)

In [1]:
# Import necessary libraries
import numpy as np
import os, sys 
import matplotlib.pyplot as plt
from ultralytics import YOLO
import fiftyone as fo 
import fiftyone.zoo as foz
from fiftyone import ViewField as F
import json, shutil 
from collections import defaultdict
from itertools import product 
from functools import reduce
from scipy.optimize import linear_sum_assignment

# Save to current directory
curr_dir = os.getcwd()

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
# Parameters
train_test_split = 0.8
num_epochs = 10

In [2]:
# Load dataset
fo.config.dataset_zoo_dir = curr_dir

# Download the data
# By default, the following loads data for detections
dataset = foz.load_zoo_dataset("coco-2017", 
                            splits=['train'],
                            shuffle=True,
                            seed=0,
                            label_types=['detections'],
                            only_matching=True,
                            classes=['person'])

Downloading split 'train' to '/Users/sudhansh/Desktop/Projects/Machine Learning Practice/Object Detection/coco-2017/train' if necessary
Found annotations at '/Users/sudhansh/Desktop/Projects/Machine Learning Practice/Object Detection/coco-2017/raw/instances_train2017.json'
Sufficient images already downloaded
Existing download of split 'train' is sufficient
Loading existing dataset 'coco-2017-train-5000'. To reload from disk, either delete the existing dataset or provide a custom `dataset_name` to use


In [None]:
# Load the downloaded dataset
coco_dataset = fo.Dataset.from_dir(
    dataset_type=fo.types.COCODetectionDataset,
    data_path='coco-2017/train/data',
    labels_path='coco-2017/train/labels.json',
    max_samples=max_samples,
    include_id=True,
)

In [4]:
# The above downloads all classes in COCO
# We filter them to only have people using the following - 
coco_dataset.export(
    labels_path="coco-2017/labels.json",
    dataset_type=fo.types.COCODetectionDataset,
    classes=['person'],
)

Found multiple fields ['detections', 'segmentations'] with compatible type (<class 'fiftyone.core.labels.Detections'>, <class 'fiftyone.core.labels.Polylines'>, <class 'fiftyone.core.labels.Keypoints'>); exporting 'detections'
   0% |/--------------|   17/5000 [121.4ms elapsed, 35.6s remaining, 140.0 samples/s] 



   1% |\--------------|   51/5000 [325.6ms elapsed, 31.6s remaining, 156.6 samples/s] 



   2% |/--------------|   90/5000 [533.5ms elapsed, 29.1s remaining, 168.7 samples/s] 



   3% ||--------------|  134/5000 [1.1s elapsed, 40.0s remaining, 121.7 samples/s]    



   4% |\--------------|  181/5000 [1.4s elapsed, 37.7s remaining, 124.1 samples/s]    



   4% |/--------------|  217/5000 [1.6s elapsed, 36.0s remaining, 119.7 samples/s]    



   6% ||--------------|  291/5000 [1.9s elapsed, 30.9s remaining, 187.8 samples/s]    



   7% |█|-------------|  361/5000 [2.5s elapsed, 32.0s remaining, 164.2 samples/s]    



  10% |█--------------|  477/5000 [3.1s elapsed, 29.3s remaining, 182.5 samples/s]    



  15% |██-------------|  758/5000 [4.8s elapsed, 26.8s remaining, 163.6 samples/s]    



  16% |██|------------|  796/5000 [5.0s elapsed, 26.3s remaining, 164.8 samples/s]    



 100% |███████████████| 5000/5000 [27.0s elapsed, 0s remaining, 191.8 samples/s]      


In [2]:
# We need to convert the dataset to YOLO format
input_dir = curr_dir + "/coco-2017/"
output_dir = curr_dir + "/yolo/"

images_folder = input_dir + "train/data/"


if not os.path.exists(output_dir):
	os.mkdir(output_dir)

for split in ['train', 'test']:
	shutil.rmtree(output_dir + split, ignore_errors=True)
	os.mkdir(output_dir + split)
	os.mkdir(output_dir + split + '/images')
	os.mkdir(output_dir + split + '/labels')

In [None]:

ground_truths = defaultdict(list)

# Read the annotations
with open(input_dir + 'labels.json', 'r') as f:
		data = json.load(f)

# Count number of annotations
num_data = len(data['images'])
print(f"Total number of images are {num_data}")

# Choose 80-20 split
num_train = np.floor(train_test_split*num_data)
num_test = np.floor((1 - train_test_split)*num_data)

# Match annotations to images and write in YOLO format
count = 0

id_ann = defaultdict(list)
for ann in data['annotations']:
	id_ann[ann['image_id']].append(ann)

for image in data['images']:
	width = image['width']
	height = image['height']
	filename = image['file_name'].split('.')[0]
	id = image['id']

	
	# Writing current object and copying image
	if count < num_train:
		split = 'train'
	else:
		split = 'test'
	
	f = open(f'{output_dir}{split}/labels/{filename}.txt', 'w')

	for annotation in id_ann[id]:
		current_category = annotation['category_id']
		x, y, w, h = annotation['bbox']
		
		# Finding midpoints
		x_centre = x + w/2
		y_centre = y + h/2
		
		# Normalization
		x_centre /= width
		y_centre /= height
		w /= width
		h /= height
		
		# Limiting upto fix number of decimal places
		sx_centre = format(x_centre, '.6f')
		sy_centre = format(y_centre, '.6f')
		sw = format(w, '.6f')
		sh = format(h, '.6f')


		ground_truths[image['file_name']].append([x_centre, y_centre, w, h])
		
		f.write(f"{current_category} {sx_centre} {sy_centre} {sw} {sh}\n")
	f.close()
	shutil.copy(images_folder + image['file_name'], f'{output_dir}{split}/images/{filename}.jpg')	
	count += 1
				
	


In [3]:
# Load model for training
# We imported YOLO from ultralytics
model = YOLO('yolov5nu.pt')

In [None]:
# Train the model
# Device = mps is for Apple Silicon
train_results = model.train(data='config.yaml', batch=16, epochs=num_epochs, plots=True, device='mps')

In [13]:
# Save the model
model.save(filename='trained.pt')

In [4]:
# Load model from trained weights
model.load('trained.pt')

Transferred 427/427 items from pretrained weights


YOLO(
  (model): DetectionModel(
    (model): Sequential(
      (0): Conv(
        (conv): Conv2d(3, 16, kernel_size=(6, 6), stride=(2, 2), padding=(2, 2), bias=False)
        (bn): BatchNorm2d(16, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
        (act): SiLU(inplace=True)
      )
      (1): Conv(
        (conv): Conv2d(16, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
        (bn): BatchNorm2d(32, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
        (act): SiLU(inplace=True)
      )
      (2): C3(
        (cv1): Conv(
          (conv): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn): BatchNorm2d(16, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
          (act): SiLU(inplace=True)
        )
        (cv2): Conv(
          (conv): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn): BatchNorm2d(16, eps=0.001, momentum=0.03, affine=True, track_running_st

In [5]:
def calc_iou(bb1, bb2):
    b1_x1 = bb1[0] - bb1[2]/2
    b1_x2 = bb1[0] + bb1[2]/2
    b1_y1 = bb1[1] - bb1[3]/2
    b1_y2 = bb1[1] + bb1[3]/2

    b2_x1 = bb2[0] - bb2[2]/2
    b2_x2 = bb2[0] + bb2[2]/2
    b2_y1 = bb2[1] - bb2[3]/2
    b2_y2 = bb2[1] + bb2[3]/2

    # determine the coordinates of the intersection rectangle
    x_left = max(b1_x1, b2_x1)
    y_top = max(b1_y1, b2_y1)
    x_right = min(b1_x2, b2_x2)
    y_bottom = min(b1_y2, b2_y2)

    if x_right < x_left or y_bottom < y_top:
        return 0.0

    # The intersection of two axis-aligned bounding boxes is always an
    # axis-aligned bounding box
    intersection_area = (x_right - x_left) * (y_bottom - y_top)

    # compute the area of both AABBs
    bb1_area = bb1[2]*bb1[3]
    bb2_area = bb2[2]*bb2[3]

    # compute the intersection over union by taking the intersection
    # area and dividing it by the sum of prediction + ground-truth
    # areas - the interesection area
    iou = intersection_area / float(bb1_area + bb2_area - intersection_area)
    assert iou >= 0.0
    assert iou <= 1.0
    return iou

In [6]:
# Test the data with IOU score 
test_images_folder = output_dir + 'test/images/'
test_labels_folder = output_dir + 'test/labels'
test_files = os.listdir(test_images_folder)
sum_iou = 0
ious = defaultdict(float)

results_folder = output_dir + 'results/'
if not os.path.exists(results_folder):
	os.mkdir(results_folder)
     
for test_file in test_files:
    res = model.predict(test_images_folder + test_file)
    res[0].save(results_folder + test_file)
    
    gt = ground_truths[test_file]
    preds = res[0].boxes.xywhn.numpy()
    
    if len(gt) == 0 or len(preds) == 0:
         continue
    
    combinations = list(product(gt, preds))
    iou_matrix = np.zeros((len(gt), len(preds)))
    for i in range(len(gt)):
        for j in range(len(preds)):
            iou_matrix[i, j] = calc_iou(gt[i], preds[j])

    # Do the Hungarian matching algorithm
    gt_idx, pred_idx = linear_sum_assignment(1 - iou_matrix)
    assigned_ious = np.sort(iou_matrix[gt_idx, pred_idx])[-len(gt):]
    
    # Compute mean across all instances in the image
    mean_iou = np.mean(assigned_ious)

    assert mean_iou <= 1.0 
    
    sum_iou += mean_iou
    ious[test_file] = (mean_iou, assigned_ious)

In [8]:
test_files = os.listdir(test_images_folder)
print(test_files)

[]


In [7]:
# Calculate the mean across all test cases
num_test = len(test_files)
print(sum_iou/num_test)

ZeroDivisionError: division by zero