## Model Prediction

In [None]:
#run this if ultralytics not already installed on your machine
pip install ultralytics

In [10]:
#run this for prediction, be sure to change img variable to path to your image
from ultralytics import YOLO
model = YOLO(r'C:\Users\Musa Sarsour\OneDrive - University Of Jordan\Study\1st Semester 2023\PR\runs\detect\train75\weights\best.pt')
img = "test5.jpeg"
model.predict(img,save=True)


image 1/1 C:\Users\Moham\OneDrive - University Of Jordan\Study\1st Semester 2023\PR\test5.jpeg: 736x576 1 person, 1 car, 40.1ms
Speed: 3.5ms preprocess, 40.1ms inference, 2.1ms postprocess per image at shape (1, 3, 736, 576)
Results saved to [1mruns\detect\predict10[0m


[ultralytics.engine.results.Results object with attributes:
 
 boxes: ultralytics.engine.results.Boxes object
 keypoints: None
 masks: None
 names: {0: 'person', 1: 'bicycle', 2: 'car', 3: 'motorcycle', 4: 'airplane', 5: 'bus', 6: 'train', 7: 'truck', 8: 'boat', 9: 'traffic light', 10: 'fire hydrant', 11: 'stop sign', 12: 'parking meter', 13: 'bench', 14: 'bird', 15: 'cat', 16: 'dog', 17: 'horse', 18: 'sheep', 19: 'cow', 20: 'elephant', 21: 'bear', 22: 'zebra', 23: 'giraffe', 24: 'backpack', 25: 'umbrella', 26: 'handbag', 27: 'tie', 28: 'suitcase', 29: 'frisbee', 30: 'skis', 31: 'snowboard', 32: 'sports ball', 33: 'kite', 34: 'baseball bat', 35: 'baseball glove', 36: 'skateboard', 37: 'surfboard', 38: 'tennis racket', 39: 'bottle', 40: 'wine glass', 41: 'cup', 42: 'fork', 43: 'knife', 44: 'spoon', 45: 'bowl', 46: 'banana', 47: 'apple', 48: 'sandwich', 49: 'orange', 50: 'broccoli', 51: 'carrot', 52: 'hot dog', 53: 'pizza', 54: 'donut', 55: 'cake', 56: 'chair', 57: 'couch', 58: 'potted p

## Dataset Preparation 

In [6]:
from pycocotools.coco import COCO

def fetch_exclusive_class_images(annotation_file, desired_classes):
    """
    This function loads the COCO dataset and returns a list with only the classes defined in the desired_classes variable
    -------
    Parameters:
        annotation_file(str): Path to COCO dataset annotation file
        desired_classes(list): List of classes you want the function to search for
    Returns:
        unique_img_ids(list): List of unique images that match the desired classes
    """
    # Initialize COCO API
    coco = COCO(annotation_file)

    # Get category IDs for desired classes
    cat_ids = coco.getCatIds(catNms=desired_classes)

    # Unique Images to be returned by function
    unique_img_ids = set()

    for cat_id in cat_ids:
        # Get all image IDs containing the current class
        img_ids = coco.getImgIds(catIds=[cat_id])

        for img_id in img_ids:
            # Skip if we have already processed this image
            if img_id in unique_img_ids:
                continue

            # Get all annotations for the image
            ann_ids = coco.getAnnIds(imgIds=img_id, iscrowd=None)
            annotations = coco.loadAnns(ann_ids)

            # Check if the image contains only objects from the desired classes
            if all(ann['category_id'] in cat_ids for ann in annotations):
                unique_img_ids.add(img_id)
    
    return list(unique_img_ids)

# Path to the COCO annotation file
annotation_file = r'C:\Users\Musa Sarsour\datasets\coco\annotations\instances_train2017.json'

# Define desired classes
desired_classes = ['bicycle','person', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 
                   'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 
                   'horse', 'sheep', 'cow', 'backpack', 'umbrella', 'handbag',  'suitcase', 'frisbee', 'sports ball',
                   'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle',
                   'chair', 'couch', 'potted plant', 'dining table', 'toilet' ,'tv' ,'laptop', 'mouse', 'keyboard',
                   'cell phone', 'sink', 'book', 'clock', 'vase' ]   

# Test function before using in later instances
exclusive_images = fetch_exclusive_class_images(annotation_file, desired_classes)

print(f"Number of exclusive images found: {len(exclusive_images)}")


loading annotations into memory...
Done (t=19.30s)
creating index...
index created!
Number of exclusive images found: 68091


In [7]:
from pycocotools.coco import COCO
import os
import shutil
import random

def split_dataset(exclusive_images, train_size, test_size, val_size, source_dir, dest_dir, annotation_file):
    """
    This function takes the list of exclusive images, splits them according to a set number of train:test:val and saves them to file
    Parameters:
        exclusive_images (list): List of images to be split
        train_size (int): number of train images
        test_size (int): number of test images
        val_size (int): number of validation images
        source_dir (str): Path to COCO images we want to split from 
        dest_dir (str): Path to directory we want the split images saved to
        annotation_file (str): Path to COCO annotation file
    Returns: 
        None
    """
    # Initialize COCO API 
    coco = COCO(annotation_file)

    # Shuffle the exclusive images list
    random.shuffle(exclusive_images)

    # Split the dataset
    train_images = exclusive_images[:train_size]
    test_images = exclusive_images[train_size:train_size + test_size]
    val_images = exclusive_images[train_size + test_size:train_size + test_size + val_size]

    # Function to copy images to the respective folders
    def copy_images(image_ids, split_type):
        split_dir = os.path.join(dest_dir, split_type, 'images')
        os.makedirs(split_dir, exist_ok=True)
        for img_id in image_ids:
            img_info = coco.loadImgs(img_id)[0]
            source_path = os.path.join(source_dir, img_info['file_name'])
            dest_path = os.path.join(split_dir, img_info['file_name'])
            shutil.copy2(source_path, dest_path)

    # Copy images to their respective directories
    copy_images(train_images, 'train')
    copy_images(test_images, 'test')
    copy_images(val_images, 'val')

# Source and destination directories
source_dir = r'datasets\coco\images\train2017'  
dest_dir = r'coco_custom'    

# Path to COCO annotation file
annotation_file = r'datasets\coco\annotations\instances_train2017.json' 

# Run this if previous function hasn't been run before
#exclusive_images = fetch_exclusive_class_images(annotation_file, desired_classes)

# Function call
split_dataset(exclusive_images, 8000, 1500, 500, source_dir, dest_dir, annotation_file)

print("Dataset split and copied successfully.")


loading annotations into memory...
Done (t=19.98s)
creating index...
index created!
loading annotations into memory...
Done (t=17.02s)
creating index...
index created!
Dataset split and copied successfully.


In [9]:
import os
import shutil

def copy_labels_to_splits(base_img_dir, source_label_dir, split_types):
    """
    This function copies the labels from the original COCO dataset for the respective split images
    Parameters: 
        base_img_dir (str): Path of copied images 
        source_label_dir (str): Path of labels from original COCO dataset
        split_types (list): List of splits, most probably ['train', 'test', 'val']
    Returns: 
        None
    """
    for split in split_types:
        img_dir = os.path.join(base_img_dir, split, 'images')
        label_dir = os.path.join(base_img_dir, split, 'labels')
        os.makedirs(label_dir, exist_ok=True)  # Create label directory if it doesn't exist

        for img_file in os.listdir(img_dir):
            label_file = os.path.splitext(img_file)[0] + '.txt'
            source_label_path = os.path.join(source_label_dir, label_file)
            dest_label_path = os.path.join(label_dir, label_file)

            if os.path.exists(source_label_path):
                shutil.copy2(source_label_path, dest_label_path)

# Base directory for split images and the source directory for original COCO labels
base_img_dir = r'coco_custom'  
source_label_dir = r'datasets\coco\labels\train2017'  

# Function call
copy_labels_to_splits(base_img_dir, source_label_dir, ['train', 'test', 'val'])

print("Labels copied successfully to train, test, and val folders.")

Labels copied successfully to train, test, and val folders.


## Model Validation

In [1]:
from ultralytics import YOLO
model = YOLO('yolov8n') #Base YOLOv8n model
metrics = model.val(data='coco_custom.yaml', plots=True)

Ultralytics YOLOv8.0.230 🚀 Python-3.10.9 torch-1.12.1+cu113 CUDA:0 (NVIDIA GeForce RTX 2060 SUPER, 8192MiB)
YOLOv8n summary (fused): 168 layers, 3151904 parameters, 0 gradients, 8.7 GFLOPs


[34m[1mval: [0mScanning C:\Users\Moham\OneDrive - University Of Jordan\Study\1st Semester 2023\PR\coco_custom\val\labels.cache... 500 images, 0 backgrounds, 0 corrupt: 100%|██████████| 500/500 [00:00<?, ?it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 32/32 [00:05<00:00,  6.12it/s]


                   all        500       3364      0.718      0.585      0.649       0.48
                person        500       1216      0.787      0.653      0.746      0.511
               bicycle        500         65      0.798      0.369      0.508      0.315
                   car        500        231      0.699       0.55      0.648       0.41
            motorcycle        500         48      0.761      0.688      0.752      0.462
              airplane        500         33      0.689      0.727       0.73      0.601
                   bus        500         31      0.811      0.774       0.82      0.728
                 train        500         16      0.695      0.855      0.884      0.717
                 truck        500         72      0.616      0.445      0.549      0.416
                  boat        500         69      0.813      0.441      0.538      0.285
         traffic light        500        104      0.722      0.399      0.523      0.301
          fire hydran

In [1]:
from ultralytics import YOLO
model = YOLO('model.pt') #Our custom model
metrics = model.val(data='coco_custom.yaml', plots=True)

Ultralytics YOLOv8.0.230 🚀 Python-3.10.9 torch-1.12.1+cu113 CUDA:0 (NVIDIA GeForce RTX 2060 SUPER, 8192MiB)
Model summary (fused): 218 layers, 25886080 parameters, 0 gradients, 78.9 GFLOPs


[34m[1mval: [0mScanning C:\Users\Moham\OneDrive - University Of Jordan\Study\1st Semester 2023\PR\coco_custom\val\labels.cache... 500 images, 0 backgrounds, 0 corrupt: 100%|██████████| 500/500 [00:00<?, ?it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 32/32 [00:22<00:00,  1.43it/s]


                   all        500       3364      0.702      0.633      0.677        0.5
                person        500       1216      0.765      0.734      0.796      0.571
               bicycle        500         65      0.841      0.569      0.691      0.423
                   car        500        231      0.702      0.649      0.725        0.5
            motorcycle        500         48      0.697      0.771      0.792      0.542
              airplane        500         33      0.812      0.758      0.814       0.67
                   bus        500         31      0.682      0.806      0.865      0.741
                 train        500         16      0.716      0.787      0.817      0.673
                 truck        500         72      0.601      0.486      0.589      0.446
                  boat        500         69      0.817      0.522      0.623      0.363
         traffic light        500        104      0.707       0.51      0.626      0.377
          fire hydran

## Model Prediction on Webcam

##### Note: in some cases, after pressing q to end the feed and stop the program, memory leak might happen and you would have to restart the kernel

In [None]:
from ultralytics import YOLO
import cv2
import math 
# start webcam
cap = cv2.VideoCapture(0)
cap.set(3, 1360)
cap.set(4, 720)

# model
model = YOLO("model.pt")

# object classes
classNames = ["person", "bicycle", "car", "motorbike", "aeroplane", "bus", "train", "truck", "boat",
              "traffic light", "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat",
              "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella",
              "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite", "baseball bat",
              "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup",
              "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli",
              "carrot", "hot dog", "pizza", "donut", "cake", "chair", "sofa", "pottedplant", "bed",
              "diningtable", "toilet", "tvmonitor", "laptop", "mouse", "remote", "keyboard", "cell phone",
              "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors",
              "teddy bear", "hair drier", "toothbrush"
              ] 


while True:
    success, img = cap.read()
    results = model(img, stream=True)

    # coordinates
    for r in results:
        boxes = r.boxes

        for box in boxes:
            # bounding box
            x1, y1, x2, y2 = box.xyxy[0]
            x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2) # convert to int values

            # put box in cam
            cv2.rectangle(img, (x1, y1), (x2, y2), (255, 0, 255), 3)

            # confidence
            confidence = math.ceil((box.conf[0]*100))/100
            print("Confidence --->",confidence)

            # class name
            cls = int(box.cls[0])
            print("Class name -->", classNames[cls])

            # object details
            org = [x1, y1]
            font = cv2.FONT_HERSHEY_SIMPLEX
            fontScale = 1
            color = (255, 0, 0)
            thickness = 2

            cv2.putText(img, classNames[cls], org, font, fontScale, color, thickness)

    cv2.imshow('Webcam', img)
    if cv2.waitKey(1) == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()

## Model prediction on video

In [7]:
import supervision as sv
import numpy as np
from ultralytics import YOLO

VIDEO_PATH = "video4.mp4"

model = YOLO(r'C:\Users\Musa Sarsour\OneDrive - University Of Jordan\Study\1st Semester 2023\PR\runs\detect\train75\weights\best.pt')
video_info = sv.VideoInfo.from_video_path(VIDEO_PATH)

In [8]:
#Function to run prediction on every frame of video and save video 
def process_frame(frame: np.ndarray, _) -> np.ndarray:
    """
    Processes a single video frame for object detection.
    This function applies a pre-trained model to detect objects in an image frame and applies annotation with bounding boxes and class labels
    Parameters:
    frame (np.ndarray): A single frame of the video as a NumPy array.
    _ (ignored): A placeholder for an optional second argument that is not used.
    Returns:
    np.ndarray: The processed frame with object detections annotated.
    The function uses the 'model' variable which should be a pre-loaded model for object 
    detection.
    """
    results = model(frame, imgsz=1280)[0]
    
    detections = sv.Detections.from_ultralytics(results)

    box_annotator = sv.BoxAnnotator(thickness=4, text_thickness=4, text_scale=2)

    labels = [f"{model.names[class_id]} {confidence:0.2f}" for _, _, confidence, class_id, _ in detections]
    frame = box_annotator.annotate(scene=frame, detections=detections, labels=labels)

    return frame


sv.process_video(source_path=VIDEO_PATH, target_path=f"result5.mp4", callback=process_frame)


0: 1280x736 2 persons, 2 cars, 1 bird, 52.2ms
Speed: 5.0ms preprocess, 52.2ms inference, 1.0ms postprocess per image at shape (1, 3, 1280, 736)

0: 1280x736 2 persons, 2 cars, 1 bird, 51.0ms
Speed: 6.0ms preprocess, 51.0ms inference, 2.0ms postprocess per image at shape (1, 3, 1280, 736)

0: 1280x736 2 persons, 2 cars, 1 bird, 50.2ms
Speed: 6.5ms preprocess, 50.2ms inference, 2.7ms postprocess per image at shape (1, 3, 1280, 736)

0: 1280x736 2 persons, 2 cars, 2 birds, 43.2ms
Speed: 6.0ms preprocess, 43.2ms inference, 2.0ms postprocess per image at shape (1, 3, 1280, 736)

0: 1280x736 2 persons, 2 cars, 2 birds, 41.1ms
Speed: 7.0ms preprocess, 41.1ms inference, 2.0ms postprocess per image at shape (1, 3, 1280, 736)

0: 1280x736 2 persons, 2 cars, 2 birds, 42.6ms
Speed: 5.5ms preprocess, 42.6ms inference, 2.0ms postprocess per image at shape (1, 3, 1280, 736)

0: 1280x736 2 persons, 2 cars, 1 bird, 41.8ms
Speed: 6.6ms preprocess, 41.8ms inference, 2.0ms postprocess per image at shape 