In [2]:
!pip install ultralytics
!pip install pycocotools
!pip install requests tqdm

Collecting ultralytics
  Downloading ultralytics-8.2.54-py3-none-any.whl (800 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/800.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━━[0m [32m378.9/800.1 kB[0m [31m12.3 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m800.1/800.1 kB[0m [31m17.5 MB/s[0m eta [36m0:00:00[0m
Collecting ultralytics-thop>=2.0.0 (from ultralytics)
  Downloading ultralytics_thop-2.0.0-py3-none-any.whl (25 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch>=1.8.0->ultralytics)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch>=1.8.0->ultralytics)
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch>=1.8.0->ultralytics)
  Using cache

In [3]:
import os
import requests
from zipfile import ZipFile
from tqdm import tqdm

urls = {
    'train2017': 'http://images.cocodataset.org/zips/train2017.zip',
    'val2017': 'http://images.cocodataset.org/zips/val2017.zip',
    'annotations': 'http://images.cocodataset.org/annotations/annotations_trainval2017.zip'
}
os.makedirs('coco_dataset', exist_ok=True)
os.chdir('coco_dataset')

# Downloads and extracts zip files from the specified url
def download_and_extract(url, dest):
    # Get request from URL
    response = requests.get(url, stream=True)
    # Calculates size of the file
    total_size = int(response.headers.get('content-length', 0))
    block_size = 1024
    t = tqdm(total=total_size, unit='iB', unit_scale=True)
    # Open and save data to file
    with open(dest, 'wb') as file:
        for data in response.iter_content(block_size):
            t.update(len(data))
            file.write(data)
    t.close()
    # Extract zipfile contents
    with ZipFile(dest, 'r') as zip_ref:
        zip_ref.extractall()
    # Clean up the zipfile after contents extracted
    os.remove(dest)

# Populates our local Colab Environment with the COCO Dataset's training,
# validation, and annotation datasets
for name, url in urls.items():
    download_and_extract(url, f"{name}.zip")

100%|██████████| 19.3G/19.3G [20:07<00:00, 16.0MiB/s]
100%|██████████| 816M/816M [01:26<00:00, 9.42MiB/s]
100%|██████████| 253M/253M [00:20<00:00, 12.3MiB/s]


In [None]:
import json
import shutil
import random
# Load COCO annotations
with open('/content/coco_dataset/annotations/instances_train2017.json') as f:
    annotations = json.load(f)

# Define what objects from COCO we are looking to detect
categories_of_interest = ['toothbrush', 'scissors', 'mouse']
category_ids = [category['id'] for category in annotations['categories'] if
                category['name'] in categories_of_interest]
# Create directories for the YOLOv8 images and labels datasets
os.makedirs('/content/coco_dataset/yolo_data/images/train', exist_ok=True)
os.makedirs('/content/coco_dataset/yolo_data/images/val', exist_ok=True)
os.makedirs('/content/coco_dataset/yolo_data/labels/train', exist_ok=True)
os.makedirs('/content/coco_dataset/yolo_data/labels/val', exist_ok=True)

subset_size = 40000 # Increase depending on how much of COCO dataset we want to train on
subset_images = annotations['images'][:subset_size]

# Dataset 80/20 split
random.shuffle(subset_images)
split_index = int(0.8 * len(subset_images))
train_images = subset_images[:split_index]
val_images = subset_images[split_index:]

# Training image annotation conversions
for image in tqdm(train_images):
    image_id = image['id']
    file_name = image['file_name']
    height = image['height']
    width = image['width']
    # Get all relevant annotations in our categories of interest
    image_annotations = [ann for ann in annotations['annotations'] if ann['image_id']
                         == image_id and ann['category_id'] in category_ids]
    if image_annotations:
        # Copy image and labels to appropriate corresponding YOLO directory
        src_img_path = os.path.join('train2017', file_name)
        dest_img_path = os.path.join('/content/coco_dataset/yolo_data/images/train',
                                     file_name)
        shutil.copyfile(src_img_path, dest_img_path)
        label_file_path = os.path.join('/content/coco_dataset/yolo_data/labels/train',
                                       file_name.replace('.jpg', '.txt'))
        # Create YOLO formatted label file from our COCO annotation information
        # (X_Center and Y_Center used instead of X_min and height vars for example)
        with open(label_file_path, 'w') as label_file:
            for annotation in image_annotations:
                bbox = annotation['bbox']
                category_id = annotation['category_id']
                x_center = (bbox[0] + bbox[2]/2) / width
                y_center = (bbox[1] + bbox[3]/2) / height
                w = bbox[2] / width
                h = bbox[3] / height
                label_file.write(f"{category_ids.index(category_id)} {x_center} "
                                f"{y_center} {w} {h}\n")
# Validation image annotation conversions
for image in tqdm(val_images):
    image_id = image['id']
    file_name = image['file_name']
    height = image['height']
    width = image['width']
    # Get all relevant annotations in our categories of interest
    image_annotations = [ann for ann in annotations['annotations'] if ann['image_id']
                         == image_id and ann['category_id'] in category_ids]
    if image_annotations:
      # Copy image and labels to appropriate corresponding YOLO directory
        src_img_path = os.path.join('train2017', file_name)
        dest_img_path = os.path.join('/content/coco_dataset/yolo_data/images/val',
                                     file_name)
        shutil.copyfile(src_img_path, dest_img_path)
        label_file_path = os.path.join('/content/coco_dataset/yolo_data/labels/val',
                                       file_name.replace('.jpg', '.txt'))
        # Create YOLO formatted label file from our COCO annotation information
        # (X_Center and Y_Center used instead of X_min and height vars for example)
        with open(label_file_path, 'w') as label_file:
            for annotation in image_annotations:
                bbox = annotation['bbox']
                category_id = annotation['category_id']
                x_center = (bbox[0] + bbox[2]/2) / width
                y_center = (bbox[1] + bbox[3]/2) / height
                w = bbox[2] / width
                h = bbox[3] / height
                label_file.write(f"{category_ids.index(category_id)} {x_center} "
                                f"{y_center} {w} {h}\n")

 96%|█████████▌| 30769/32000 [1:23:22<03:18,  6.21it/s]

Write to the COCO YAML configuration file with 3 classes (for our 3 objects) and the specified paths for our training and validation image datasets

In [None]:
# COCO YAML Configuration File
coco_yaml_content = """
train: ./yolo_data/images/train
val: ./yolo_data/images/val

nc: 3
names: ['mouse', 'scissors', 'toothbrush']
"""

with open("/content/coco_dataset/coco.yaml", "w") as file:
    file.write(coco_yaml_content)

Model Training (Epochs to be determined) Using YOLOv8n model

In [None]:
from ultralytics import YOLO

model = YOLO('yolov8n.pt')

# Train model (More epochs can be done at expense of computational power)
model.train(data='/content/coco_dataset/coco.yaml',
            epochs=150,
            imgsz=640,
            batch=16,
            name='yolov8_coco',
            workers=4)


Create zipped file containing model w/ weights

In [None]:
import shutil
from google.colab import files

shutil.make_archive('yolov8_coco', 'zip', '/content/runs/train/yolov8_coco')
files.download('yolov8_coco.zip')

In [None]:
import cv2
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
from ultralytics import YOLO

# Visualize
def plot_boxes(img, boxes, confidences, class_ids):
    for box, conf, cls in zip(boxes, confidences, class_ids): # Iterate using zip
        x1, y1, x2, y2 = box
        label = f'{model.names[int(cls)]} {conf:.2f}'
        cv2.rectangle(img, (int(x1), int(y1)), (int(x2), int(y2)), (0, 255, 0), 2)
        cv2.putText(img, label, (int(x1), int(y1) - 10), cv2.FONT_HERSHEY_SIMPLEX,
                    0.5, (0, 255, 0), 2)
    return img

# Path to the image
image_path = '/content/scissorsopen.jpg'
image = Image.open(image_path)
image = np.array(image)

# Resulting bounding boxes predicted by model
results = model(image_path)

if results[0].boxes.xyxy is not None:
    boxes = results[0].boxes.xyxy.cpu().numpy()  #.cpu for CPU usage, remove otherwise
    confidences = results[0].boxes.conf.cpu().numpy()
    class_ids = results[0].boxes.cls.cpu().numpy()
else:
    boxes = []
    confidences = []
    class_ids = []

# Plot the image with bounding boxes
img_with_boxes = plot_boxes(image.copy(), boxes, confidences, class_ids) # Pass
# confidences and class IDs

# Display the image
plt.figure(figsize=(10, 10))
plt.imshow(img_with_boxes)
plt.axis('off')
plt.show()