# YOLOv8 model Evaluation

The following code loads the YOLOv8 trained model and calculates evaluation metrics based on its perfromance against a testing set.

In [1]:
from ultralytics import YOLO
import cv2
import glob
from PIL import Image
import torch
import os
from torchvision import transforms
from torch.utils.tensorboard import SummaryWriter
import numpy as np
import matplotlib.pyplot as plt
import io



# Function to load the test dataset
def load_test_dataset(images_path, labels_path):
    dataset = []
    image_files = glob.glob(os.path.join(images_path, "*.jpg"))

    for image_file in image_files:
        label_file = os.path.join(labels_path, os.path.basename(image_file).replace(".jpg", ".txt"))
        with open(label_file, 'r') as file:
            annotations = [line.strip().split() for line in file.readlines()]
            annotations = [(int(cls), float(x), float(y), float(w), float(h)) for cls, x, y, w, h in annotations]
        dataset.append((image_file, annotations))
    return dataset

# Function to convert YOLO predictions to a common format
def convert_yolo_predictions(predictions, image_shape):
    converted_predictions = []
    height, width = image_shape[:2]

    for prediction in predictions:
        for box in prediction.boxes:
            x1, y1, x2, y2 = box.xyxy[0]
            x_center = ((x2 + x1) / 2) / width
            y_center = ((y2 + y1) / 2) / height
            box_width = (x2 - x1) / width
            box_height = (y2 - y1) / height
            class_id = int(box.cls[0])
            converted_predictions.append((class_id, x_center, y_center, box_width, box_height))

    return converted_predictions

def iou(box1, box2):
    
    _, x1_center, y1_center, width1, height1 = box1
    _, x2_center, y2_center, width2, height2 = box2

    x1 = max(x1_center - width1 / 2, x2_center - width2 / 2)
    y1 = max(y1_center - height1 / 2, y2_center - height2 / 2)
    x2 = min(x1_center + width1 / 2, x2_center + width2 / 2)
    y2 = min(y1_center + height1 / 2, y2_center + height2 / 2)

    # Intersection Area
    intersection_area = max(0, x2 - x1) * max(0, y2 - y1)

    box1_area = width1 * height1
    box2_area = width2 * height2

    # Union area
    union_area = box1_area + box2_area - intersection_area

    # Compute the IoU
    iou = intersection_area / union_area

    return iou

# Function to plot confusion matrix
def plot_confusion_matrix(tp, fp, fn, class_names):
    # Create a confusion matrix
    confusion_matrix = np.array([tp, fp, fn])

    fig, ax = plt.subplots(figsize=(12, 6))

    cax = ax.matshow(confusion_matrix, cmap=plt.cm.Blues)
    fig.colorbar(cax)

    # Setting up axis labels
    ax.set_xticks(np.arange(len(class_names)))
    ax.set_yticks(np.arange(3))
    ax.set_xticklabels(class_names, rotation=45, ha="left")
    ax.set_yticklabels(['TP', 'FP', 'FN'])

    for (i, j), val in np.ndenumerate(confusion_matrix):
        ax.text(j, i, f'{val}', ha='center', va='center', color='red')

    plt.xlabel('Classes')
    plt.ylabel('Metrics')
    plt.title('Confusion Matrix')

    plt.tight_layout()

    return fig


log_dir = r"C:\Users\kyled\yolov8\runs\YOLOv8_tensorboard"

writer = SummaryWriter(log_dir)

model = YOLO(r"C:\Users\kyled\ultralytics\runs\detect\train10\weights\best.pt") 

test_images_path = r"C:\Users\kyled\Downloads\yolo_formatted_testing_set\images" 
test_labels_path = r"C:\Users\kyled\Downloads\yolo_formatted_testing_set\Labels" 

test_dataset = load_test_dataset(test_images_path, test_labels_path)

all_ground_truths = []
all_predictions = []

num_classes = 10 
class_names = ['pizza', 'Mushroom', 'Pepperoni', 'Yellow Peppers', 'Black Olives', 'Onion', 'Ham', 'Tomato', 'Broccoli', 'Green Olives'] 

# Initializing counters for TP, FP and FN for each class
TP_per_class = [0] * num_classes
FP_per_class = [0] * num_classes
FN_per_class = [0] * num_classes

iou_threshold = 0.5

# Processing each image in the test dataset
# Getting values to calculate confusion matrix
for image_path, ground_truths in test_dataset:
    image = cv2.imread(image_path)
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image_pil = Image.fromarray(image_rgb)

    # Predicting using the model
    results = model.predict(source=image_pil, stream=True)

    predictions = convert_yolo_predictions(results, image.shape)

    matched_ground_truths = [False] * len(ground_truths)

    for prediction in predictions:
        predicted_class, _, _, _, _ = prediction
        best_iou = 0
        best_gt_index = -1

        for gt_index, ground_truth in enumerate(ground_truths):
            gt_class, _, _, _, _ = ground_truth
            current_iou = iou(prediction, ground_truth)

            if current_iou > best_iou and current_iou >= iou_threshold and predicted_class == gt_class:
                best_iou = current_iou
                best_gt_index = gt_index

        if best_gt_index >= 0:
            # True Positive
            TP_per_class[predicted_class] += 1
            matched_ground_truths[best_gt_index] = True
        else:
            # False Positive
            FP_per_class[predicted_class] += 1

    for matched, ground_truth in zip(matched_ground_truths, ground_truths):
        gt_class, _, _, _, _ = ground_truth
        if not matched:
            # False Negative
            FN_per_class[gt_class] += 1
    
# Getting values to calculate metrics
for image_path, ground_truth in test_dataset:
    image = cv2.imread(image_path)
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image_pil = Image.fromarray(image_rgb)

    # Predicting using the model
    results = model.predict(source=image_pil, stream=True)

    predictions = convert_yolo_predictions(results, image.shape)

    # Adding predictions and ground truths to the lists
    all_predictions.extend(predictions)
    all_ground_truths.extend(ground_truth)

TP, FP, FN = 0, 0, 0

for ground_truth in all_ground_truths:
    matched = False
    for prediction in all_predictions:
        if iou(ground_truth, prediction) >= iou_threshold:
            matched = True
            TP += 1
            break
    if not matched:
        FN += 1

FP = len(all_predictions) - TP

# Calculating precision and recall
precision = TP / (TP + FP) if (TP + FP) > 0 else 0
recall = TP / (TP + FN) if (TP + FN) > 0 else 0

# Logging overall precision and recall to TensorBoard
writer.add_scalar("YOLOv8 Precision", precision)
writer.add_scalar("YOLOv8 Recall", recall)

# Plotting the confusion matrix
fig = plot_confusion_matrix(TP_per_class, FP_per_class, FN_per_class, class_names)

buf = io.BytesIO()
plt.savefig(buf, format='png')
buf.seek(0)

image = Image.open(buf)

# Defining a transform to convert PIL image to tensor
transform = transforms.Compose([
    transforms.ToTensor()
])

image_tensor = transform(image)

writer.add_image("YOLOv8 Confusion Matrix", image_tensor, 0)

writer.close()
plt.close(fig)




0: 640x640 (no detections), 197.6ms
Speed: 6.5ms preprocess, 197.6ms inference, 2.1ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 pizza, 199.9ms
Speed: 3.0ms preprocess, 199.9ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 pizza, 189.3ms
Speed: 5.0ms preprocess, 189.3ms inference, 3.1ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 pizza, 173.5ms
Speed: 6.6ms preprocess, 173.5ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 181.3ms
Speed: 4.0ms preprocess, 181.3ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 178.3ms
Speed: 5.1ms preprocess, 178.3ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 170.4ms
Speed: 4.0ms preprocess, 170.4ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 132.6ms
Speed: 3.5ms preprocess, 132.6ms inferenc

In [2]:
im1 = Image.open(r"C:\Users\kyled\Downloads\pizza_test.jpg")
im1_cv2 = np.array(im1)
im1_cv2 = im1_cv2[:, :, ::-1].copy()

# Performing detection
results = model.predict(source=im1, stream=True)

for result in results:
    boxes = result.boxes 

    # Drawing bounding boxes and labels on the image
    for box in boxes:
        # Extracting bounding box coordinates and class ID
        x1, y1, x2, y2 = box.xyxy[0]
        class_id = int(box.cls[0])

        # Draw rectangle and text
        cv2.rectangle(im1_cv2, (int(x1), int(y1)), (int(x2), int(y2)), (0, 255, 0), 2)
        cv2.putText(im1_cv2, class_names[class_id], (int(x1), int(y1-10)), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0,255,0), 2)

cv2.imshow("Detections", im1_cv2)
cv2.waitKey(0)
cv2.destroyAllWindows()


0: 448x640 1 pizza, 11 Pepperonis, 78.8ms
Speed: 3.0ms preprocess, 78.8ms inference, 1.0ms postprocess per image at shape (1, 3, 448, 640)
