In [2]:
import cv2
import numpy as np
import matplotlib.pyplot as plt
from ultralytics import YOLO
import pandas as pd
import os
import shutil

##### Copy images to different directory for yolo training

In [3]:
def yolo_dataset(src_dir, dest_dir):
    if not os.path.exists(dest_dir):
        os.makedirs(dest_dir)
    image_names = sorted(os.listdir(src_dir))
    for image_name in image_names:
        src_path = os.path.join(src_dir, image_name)
        dest_path = os.path.join(dest_dir, image_name)
        shutil.copyfile(src_path, dest_path)

##### Convert given annotations to yolo labels format

In [4]:
def yolo_labels(images_dir, orig_labels_dir, yolo_labels_dir):
    image_names = sorted(os.listdir(images_dir))
    train_labels = pd.read_json(orig_labels_dir)[['image_id', 'category_id', 'bbox']]

    if not os.path.exists(yolo_labels_dir):
        os.makedirs(yolo_labels_dir)
    
    for _, row in train_labels.iterrows():
        image_id, category_id, bbox = row['image_id'], row['category_id'], row['bbox']

        xmin, ymin, box_width, box_height = bbox
        xmin, ymin, box_width, box_height = int(xmin), int(ymin), int(box_width), int(box_height)
        xmax, ymax = xmin + box_width, ymin + box_height

        image_name = image_names[int(image_id)]
        curr_path = os.path.join(images_dir, image_name)
        image = cv2.imread(curr_path)
        image_width, image_height = image.shape[:2]

        x_center = (xmin + xmax) / (2 * image_width)
        y_center = (ymin + ymax) / (2 * image_height)
        box_width = box_width / image_width
        box_height = box_height / image_height

        yolo_label_format = f'{category_id} {x_center:.5f} {y_center:.5f} {box_width:.5f} {box_height:.5f}'

        label_file_name = str(image_name).replace('.jpg', '')

        label_path = os.path.join(yolo_labels_dir, label_file_name + '.txt')
        with open(label_path, 'w') as file:
            file.write(yolo_label_format)

In [4]:
yolo_dataset('../archive/train/train/', './data/images/train/')

In [5]:
yolo_dataset('../archive/valid/valid/', './data/images/valid/')

In [6]:
yolo_labels('../archive/train/train/', '../archive/train_annotations', './data/labels/train/')

In [7]:
yolo_labels('../archive/valid/valid/', '../archive/valid_annotations', './data/labels/valid/')

##### Training

In [8]:
# Create a new YOLO model from scratch
model = YOLO('yolov8n.yaml')


                   from  n    params  module                                       arguments                     
  0                  -1  1       464  ultralytics.nn.modules.conv.Conv             [3, 16, 3, 2]                 
  1                  -1  1      4672  ultralytics.nn.modules.conv.Conv             [16, 32, 3, 2]                
  2                  -1  1      7360  ultralytics.nn.modules.block.C2f             [32, 32, 1, True]             
  3                  -1  1     18560  ultralytics.nn.modules.conv.Conv             [32, 64, 3, 2]                
  4                  -1  2     49664  ultralytics.nn.modules.block.C2f             [64, 64, 2, True]             
  5                  -1  1     73984  ultralytics.nn.modules.conv.Conv             [64, 128, 3, 2]               
  6                  -1  2    197632  ultralytics.nn.modules.block.C2f             [128, 128, 2, True]           
  7                  -1  1    295424  ultralytics.nn.modules.conv.Conv             [128

In [11]:
# Train the model using the 'coco128.yaml' dataset for 10 epochs
results = model.train(data='yolo_config.yaml', epochs=10)

Ultralytics YOLOv8.0.136  Python-3.9.16 torch-2.0.1+cpu CPU (Intel Core(TM) i7-6500U 2.50GHz)
[34m[1mengine\trainer: [0mtask=detect, mode=train, model=yolov8n.yaml, data=yolo_config.yaml, epochs=10, patience=50, batch=16, imgsz=640, save=True, save_period=-1, cache=False, device=None, workers=8, project=None, name=None, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, show=False, save_txt=False, save_conf=False, save_crop=False, show_labels=True, show_conf=True, vid_stride=1, line_width=None, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, boxes=True, format=torchscript, keras=False, optimize=False, int8=False, 

##### Testing

In [15]:
# Notice: these weight files were generated when training yolo model in colab.

# My shit laptop can't make it. Even 10 Epoch will be very time consuming.

# If training model in local enviroment, it will be stored in runs/detect/train/weights

test_model = YOLO('./colab_generated_weights/best.pt')

In [13]:
# Function to perform object detection
def detection(img_path, model):
    # Load image
    img = cv2.imread(img_path)

    # Perform object detection
    results = model.predict(source = img)

    # print(f'results = {results}')

    # Extract bounding boxes
    boxes = results[0].boxes
    print(f'boxes length = {len(boxes)}')

    bounding_boxes = []

    for box in boxes:
        left, top, right, bottom = box.xyxy[0]
        left, top, right, bottom = int(left), int(top), int(right), int(bottom)
        print(left, top, right, bottom)
        print(f'box = {box}')

        cls_id = int(list(box.cls)[0])

        print(f'cls_id = {cls_id}')

        bounding_boxes.append((left, top, right, bottom))

    return bounding_boxes

In [14]:
# Example usage
# image_path = '../archive/train/train/image_id_000.jpg'
image_path = '../archive/valid/valid/image_id_071.jpg'

image = cv2.imread(image_path)

bounding_boxes = detection(image_path, test_model)

# Draw bounding boxes on the image
for (left, top, right, bottom) in bounding_boxes:
    start_point = (left, top)
    end_point = (right, bottom)
    color = (0, 255, 0)
    thickness = 2
    lineType = cv2.LINE_AA
    cv2.rectangle(image, start_point, end_point, color, thickness, lineType)

cv2.imshow('Output', image)
cv2.waitKey(0)
cv2.destroyAllWindows()


0: 640x640 1 turtle, 417.1ms
Speed: 6.0ms preprocess, 417.1ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)


boxes length = 1
215 40 560 364
box = ultralytics.engine.results.Boxes object with attributes:

boxes: tensor([[215.1532,  40.3934, 560.2317, 364.9516,   0.9322,   2.0000]])
cls: tensor([2.])
conf: tensor([0.9322])
data: tensor([[215.1532,  40.3934, 560.2317, 364.9516,   0.9322,   2.0000]])
id: None
is_track: False
orig_shape: (640, 640)
shape: torch.Size([1, 6])
xywh: tensor([[387.6924, 202.6725, 345.0785, 324.5582]])
xywhn: tensor([[0.6058, 0.3167, 0.5392, 0.5071]])
xyxy: tensor([[215.1532,  40.3934, 560.2317, 364.9516]])
xyxyn: tensor([[0.3362, 0.0631, 0.8754, 0.5702]])
cls_id = 2
