## Object Detection Model training using YOLO
References - 
- [Documentation](https://docs.ultralytics.com/yolov5/tutorials/train_custom_data/#13-prepare-dataset-for-yolov5)

In [1]:
# Import necessary libraries
import numpy as np
import os, sys 
import matplotlib.pyplot as plt
from ultralytics import YOLO
import fiftyone as fo 
import fiftyone.zoo as foz
import json, shutil 
from collections import defaultdict

  from .autonotebook import tqdm as notebook_tqdm


In [13]:
# Load dataset

# Save to current directory
curr_dir = os.getcwd()
fo.config.dataset_zoo_dir = curr_dir

# Download the data
# By default, the following loads data for detections
dataset = foz.load_zoo_dataset("coco-2017", 
                            splits=['train'],
                            max_samples=2000,
                            shuffle=True,
                            label_type=['detection'],
                            classes=['person'])

Downloading split 'train' to '/Users/sudhansh/Desktop/Projects/Machine Learning Practice/Object Detection/coco-2017/train' if necessary
Downloading annotations to '/Users/sudhansh/Desktop/Projects/Machine Learning Practice/Object Detection/coco-2017/tmp-download/annotations_trainval2017.zip'
 100% |██████|    1.9Gb/1.9Gb [46.6s elapsed, 0s remaining, 29.4Mb/s]      
Extracting annotations to '/Users/sudhansh/Desktop/Projects/Machine Learning Practice/Object Detection/coco-2017/raw/instances_train2017.json'
Downloading 2000 images
  64% |██████████|-----| 1273/2000 [1.4m elapsed, 45.7s remaining, 16.2 images/s]   

In [12]:
# The above downloads all classes in COCO
# We filter them to only have people using the following - 
dataset.export(
    labels_path="coco-2017/labels.json",
    dataset_type=fo.types.COCODetectionDataset,
    classes=['person'],
)

Ignoring unsupported parameter 'max_samples'
 100% |███████████████████| 50/50 [85.2ms elapsed, 0s remaining, 586.9 samples/s] 


In [10]:
# We need to convert the dataset to YOLO format
input_dir = curr_dir + "/coco-2017/"
output_dir = curr_dir + "/yolo/"

images_folder = input_dir + "train/data/"

if not os.path.exists(output_dir):
	os.mkdir(output_dir)

for split in ['train', 'test']:
	shutil.rmtree(output_dir + split, ignore_errors=True)
	os.mkdir(output_dir + split)
	os.mkdir(output_dir + split + '/images')
	os.mkdir(output_dir + split + '/labels')

# Read the annotations
with open(input_dir + 'labels.json', 'r') as f:
		data = json.load(f)

# Count number of annotations
num_data = len(data['images'])
print(f"Total number of images are {num_data}")

# Choose 80-20 split
num_train = np.floor(0.8*num_data)
num_test = np.floor(0.2*num_data)
print(num_train, num_test) 

# Match annotations to images and write in YOLO format
count = 0

id_ann = defaultdict(list)
for ann in data['annotations']:
	id_ann[ann['image_id']].append(ann)

for image in data['images']:
	width = image['width']
	height = image['height']
	filename = image['file_name'].split('.')[0]
	id = image['id']

	# Writing current object and copying image
	if count < num_train:
		split = 'train'
	else:
		split = 'test'
	
	f = open(f'{output_dir}{split}/labels/{filename}.txt', 'w')

	for annotation in id_ann[id]:
		current_category = annotation['category_id']
		x, y, w, h = annotation['bbox']
		
		# Finding midpoints
		x_centre = x + w/2
		y_centre = y + h/2
		
		# Normalization
		x_centre /= width
		y_centre /= height
		w /= width
		h /= height
		
		# Limiting upto fix number of decimal places
		x_centre = format(x_centre, '.6f')
		y_centre = format(y_centre, '.6f')
		w = format(w, '.6f')
		h = format(h, '.6f')

		f.write(f"{current_category} {x_centre} {y_centre} {w} {h}\n")
	f.close()
	shutil.copy(images_folder + image['file_name'], f'{output_dir}{split}/images/{filename}.jpg')	
	count += 1
				
	


Total number of images are 50
40.0 10.0


In [None]:
# Load model 

# Uncomment if training from scratch
# model = YOLO('yolov8n.yaml')

# Uncomment for pretrained model
model = YOLO('yolov8n.pt')

In [None]:
# Train the model
# Device = mps is for Apple Silicon
results = model.train(data='config.yaml', batch=16, epochs=20, plots=False, device='mps')

Ultralytics YOLOv8.1.34 🚀 Python-3.8.16 torch-1.13.1 CPU (Apple M1 Pro)
[34m[1mengine/trainer: [0mtask=detect, mode=train, model=yolov8n.pt, data=config.yaml, epochs=2, time=None, patience=100, batch=16, imgsz=640, save=True, save_period=-1, cache=False, device=mps, workers=0, project=None, name=train1722, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=False, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, embed=None, show=False, save_frames=False, save_txt=False, save_conf=False, save_crop=False, show_labels=True, show_conf=True, show_boxes=True, li

[34m[1mtrain: [0mScanning /Users/sudhansh/Desktop/Projects/Machine Learning Practice/Object Detection/yolo/train/labels.cache... 40 images, 0 backgrounds, 0 corrupt: 100%|██████████| 40/40 [00:00<?, ?it/s]
[34m[1mval: [0mScanning /Users/sudhansh/Desktop/Projects/Machine Learning Practice/Object Detection/yolo/train/labels.cache... 40 images, 0 backgrounds, 0 corrupt: 100%|██████████| 40/40 [00:00<?, ?it/s]

[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.002, momentum=0.9) with parameter groups 57 weight(decay=0.0), 64 weight(decay=0.0005), 63 bias(decay=0.0)
Image sizes 640 train, 640 val
Using 0 dataloader workers
Logging results to [1m/opt/homebrew/runs/detect/train1722[0m
Starting training for 2 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size



        1/2         0G      2.468      2.686      2.309         28        640: 100%|██████████| 3/3 [00:14<00:00,  4.75s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 2/2 [00:05<00:00,  2.55s/it]

                   all         40        153      0.188     0.0261     0.0313    0.00899






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


        2/2         0G       2.56      2.693      2.381         21        640: 100%|██████████| 3/3 [00:13<00:00,  4.62s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 2/2 [00:05<00:00,  2.52s/it]

                   all         40        153      0.282     0.0206     0.0451     0.0148






2 epochs completed in 0.011 hours.
Optimizer stripped from /opt/homebrew/runs/detect/train1722/weights/last.pt, 6.2MB
Optimizer stripped from /opt/homebrew/runs/detect/train1722/weights/best.pt, 6.2MB

Validating /opt/homebrew/runs/detect/train1722/weights/best.pt...
Ultralytics YOLOv8.1.34 🚀 Python-3.8.16 torch-1.13.1 CPU (Apple M1 Pro)
Model summary (fused): 168 layers, 3005843 parameters, 0 gradients, 8.1 GFLOPs


                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 2/2 [00:04<00:00,  2.40s/it]

                   all         40        153      0.278     0.0202      0.045     0.0148
Speed: 0.9ms preprocess, 113.4ms inference, 0.0ms loss, 2.3ms postprocess per image





In [None]:
print(results)

ultralytics.utils.metrics.DetMetrics object with attributes:

ap_class_index: array([0])
box: ultralytics.utils.metrics.Metric object
confusion_matrix: <ultralytics.utils.metrics.ConfusionMatrix object at 0x2b4c56310>
curves: ['Precision-Recall(B)', 'F1-Confidence(B)', 'Precision-Confidence(B)', 'Recall-Confidence(B)']
curves_results: [[array([          0,    0.001001,    0.002002,    0.003003,    0.004004,    0.005005,    0.006006,    0.007007,    0.008008,    0.009009,     0.01001,    0.011011,    0.012012,    0.013013,    0.014014,    0.015015,    0.016016,    0.017017,    0.018018,    0.019019,     0.02002,    0.021021,    0.022022,    0.023023,
          0.024024,    0.025025,    0.026026,    0.027027,    0.028028,    0.029029,     0.03003,    0.031031,    0.032032,    0.033033,    0.034034,    0.035035,    0.036036,    0.037037,    0.038038,    0.039039,     0.04004,    0.041041,    0.042042,    0.043043,    0.044044,    0.045045,    0.046046,    0.047047,
          0.048048,    

In [None]:
# Test the model 
# Run batched inference on a list of images
results = model(['yolo/train/images/000000515123.jpg', 
                 'yolo/test/images/000000381928.jpg', 
                 'yolo/test/images/000000068657.jpg',
                 'yolo/test/images/000000518124.jpg'])  # return a list of Results objects

# Process results list
for result in results:
    boxes = result.boxes  # Boxes object for bounding box outputs
    masks = result.masks  # Masks object for segmentation masks outputs
    keypoints = result.keypoints  # Keypoints object for pose outputs
    probs = result.probs  # Probs object for classification outputs
    # result.show()  # display to screen
    result.save(filename='result.jpg')  # save to disk


0: 640x640 1 person, 2 suitcases, 72.9ms
1: 640x640 10 persons, 8 kites, 72.9ms
2: 640x640 1 person, 1 couch, 1 remote, 9 books, 72.9ms
3: 640x640 9 persons, 1 frisbee, 72.9ms
Speed: 2.2ms preprocess, 72.9ms inference, 0.9ms postprocess per image at shape (1, 3, 640, 640)


In [None]:
# Test the data with IOU score 
model.test(data='config.yaml')

AttributeError: 'YOLO' object has no attribute 'test'