In [1]:
from ultralytics import YOLO

# start model "../../models/yolo11s.pt"
# 1. train run "../../runs/detect/train/weights/last.pt"
# 2. train run "../../runs/detect/train2/weights/last.pt" -> worse results than first model
model = YOLO("../../runs/detect/train/weights/best.pt")
model.info()

YOLO11s summary: 319 layers, 9,428,179 parameters, 0 gradients, 21.5 GFLOPs


(319, 9428179, 0, 21.548492800000002)

In [None]:
# Fine-tune on your dataset
results = model.train(
    data="data.yaml",
    epochs=10,
    batch=32,
    imgsz=640, # Rescales images to this size
    device="cuda",
    # freeze = 300, # for finetuning instead of retraining https://github.com/ultralytics/ultralytics/issues/6184 -> I will retrain
    # resume=True, # to continue training
    # fraction = 0.3 # only use this amount of the test data  
)

In [149]:
metrics = model.val(conf=0.25, iou=0.5, half=False, batch=1, rect=False)

Ultralytics 8.3.70  Python-3.12.6 torch-2.6.0+cu126 CUDA:0 (NVIDIA GeForce GTX 1080 Ti, 11264MiB)


[34m[1mval: [0mScanning D:\Zeug\Unizeug\Master_DataScience\3.Semester\Learning from Images\Project\data\merged\labels\val.cache... 1132 images, 0 backgrounds, 0 corrupt: 100%|██████████| 1132/1132 [00:00<?, ?it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1132/1132 [00:20<00:00, 56.60it/s]


                   all       1132       1635      0.852      0.791      0.846      0.495
Speed: 0.4ms preprocess, 12.1ms inference, 0.0ms loss, 1.3ms postprocess per image
Results saved to [1md:\Zeug\Unizeug\Master_DataScience\3.Semester\Learning from Images\Project\runs\detect\val8[0m


In [None]:
{'Box(P)': 0.8171428571428572, 'Box(R)': 0.7871559633027523, 'mAP50': tensor(0.9467), 'mAP50-95': np.float64(0.6645460531408871), 'Confusion Matrix': {'TP': 1287, 'FP': 288, 'FN': 348, 'TN': None}

In [None]:
results = model("../../data/img.jpg")
results[0].show()




image 1/1 d:\Zeug\Unizeug\Master_DataScience\3.Semester\Learning from Images\Project\src\yolo\..\..\data\img.jpg: 480x640 1 license_plate, 44.0ms
Speed: 3.1ms preprocess, 44.0ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)


In [4]:
path = model.export(format="onnx")

Ultralytics 8.3.70  Python-3.12.6 torch-2.6.0+cu126 CPU (Intel Core(TM) i7-8700K 3.70GHz)

[34m[1mPyTorch:[0m starting from '..\..\runs\detect\train\weights\best.pt' with input shape (1, 3, 640, 640) BCHW and output shape(s) (1, 5, 8400) (18.3 MB)

[34m[1mONNX:[0m starting export with onnx 1.17.0 opset 19...
[34m[1mONNX:[0m slimming with onnxslim 0.1.48...
[34m[1mONNX:[0m export success  3.6s, saved as '..\..\runs\detect\train\weights\best.onnx' (36.2 MB)

Export complete (4.2s)
Results saved to [1mD:\Zeug\Unizeug\Master_DataScience\3.Semester\Learning from Images\Project\runs\detect\train\weights[0m
Predict:         yolo predict task=detect model=..\..\runs\detect\train\weights\best.onnx imgsz=640  
Validate:        yolo val task=detect model=..\..\runs\detect\train\weights\best.onnx imgsz=640 data=data.yaml  
Visualize:       https://netron.app


In [146]:
import numpy as np
from torch import tensor
from torchmetrics.classification import AveragePrecision


def iou(box1, box2):
    x1 = max(box1[0], box2[0])
    y1 = max(box1[1], box2[1])
    x2 = min(box1[2], box2[2])
    y2 = min(box1[3], box2[3])
    
    intersection = max(0, x2 - x1) * max(0, y2 - y1)
    area_box1 = (box1[2] - box1[0]) * (box1[3] - box1[1])
    area_box2 = (box2[2] - box2[0]) * (box2[3] - box2[1])
    union = area_box1 + area_box2 - intersection
    
    return intersection / union if union > 0 else 0

def evaluate_model(model, images, bboxes, predict_function, conf_threshold = 0.01, iou_threshold=0.5):
    """
    Evaluate model performance using given images and annotations.
    
    Args:
        images (list): List of images.
        annotations (list): List of ground truth bounding boxes.
        predict_function (function): Function that takes an image and returns (confidences, predicted_boxes).
        iou_threshold (float): IoU threshold to determine TP vs FP.
    """
    tp, fp, fn = 0, 0, 0
    predicted_confidences = []
    predicted_true_labels = []
    iou_scores = []
    
    for img, ground_truths in zip(images, bboxes):
        confidences, predicted_boxes = predict_function(model, img)
        # TODO sort confidences and predicted boxes by confidence scores
        matched = set()
        
        for conf, pred_box in zip(confidences, predicted_boxes):
            if conf < conf_threshold:
                break
            
            iou_max = 0
            matched_gt = None
            
            for i, gt_box in enumerate(ground_truths):
                iou_score = iou(pred_box, gt_box)
                if iou_score > iou_max:
                    iou_max = iou_score
                    matched_gt = i

            
            predicted_confidences.append(conf)
            predicted_true_labels.append(1 if iou_max >= iou_threshold and matched_gt not in matched else 0)
            iou_scores.append(iou_max)
            
            if predicted_true_labels[-1] == 1:
                # predicted: license plate, true: license plate
                tp += 1
                matched.add(matched_gt)
            else:
                # predicted: license plate, true: background
                fp += 1
        
        # predicted: background, true: license plate (license plates that were not predicted)
        fn += len(ground_truths) - len(matched)
        # tn: predicted: background, true: background (we dont predict the background, cant say number of background bounding boxes)

    confusion_matrix = {"TP": tp, "FP": fp, "FN": fn, "TN": None}
    precision = tp / (tp + fp + 1e-7)
    recall = tp / (tp + fn + 1e-7)
    
    # TODO Doesnt make sense to have mAP in the same function as precision and recall where iou is an argument
    # https://www.v7labs.com/blog/mean-average-precision
    # Blog Describes AP Wrong: https://www.reddit.com/r/computervision/comments/162ss9x/trouble_understanding_map50_metric/
    # TODO Map = go over different confidences and calculate precision, take mean precision
    # TODO below is wrong just write own python method
    ap = AveragePrecision(task="binary")
    mAP50 = ap(tensor(predicted_confidences), tensor(predicted_true_labels))
    # TODO mean average precision for different IOU thresholds, I need to calculate precision for different IoU Thresholds 0.5, 0.55, 0.6,..., 0.9
    mAP50_95 = np.mean(iou_scores) 
    
    return {
        "Box(P)": precision,
        "Box(R)": recall,
        "mAP50": mAP50,
        "mAP50-95": mAP50_95,
        "Confusion Matrix": confusion_matrix
    }

In [79]:
def yolo_predict(model, img):
    # default arguments which overwrite the evaluation function
    # https://docs.ultralytics.com/modes/predict/#inference-arguments
    results = model(img, verbose=False)
    confidences = []
    boxes = []
    
    for result in results:
        for box in result.boxes:
            confidences.append(box.conf.item())
            boxes.append(box.xyxy[0].tolist())
    
    return confidences, boxes

def yolo_to_bbox(annotation_path, img_width, img_height):
    bboxes = []
    with open(annotation_path, "r") as f:
        for line in f:
            parts = line.strip().split()
            
            # I have only one class and ignore the class id
            _, x_center, y_center, width, height = map(float, parts)  
            x_min = int((x_center - width / 2) * img_width)
            y_min = int((y_center - height / 2) * img_height)
            x_max = int((x_center + width / 2) * img_width)
            y_max = int((y_center + height / 2) * img_height)
            
            bboxes.append([x_min, y_min, x_max, y_max])
    
    return bboxes

In [120]:
results = model("../../data/merged/images/val/img_dataset_l25471.jpg")

In [126]:
with open("../../data/merged/labels/val/img_dataset_l25471.txt", "r") as s:
    print(s.read())

0 0.13289062499999998 0.8077083333333333 0.02062499999999999 0.01250000000000003
0 0.7575390625 0.7143753333333334 0.01937499999999992 0.011665999999999954
0 0.852421875 0.8362496666666667 0.028750000000000053 0.020833999999999946



In [132]:
bbox_iou(tensor([0.1328, 0.8077, 0.0206, 0.0125]), tensor([0.1336, 0.8072, 0.0240, 0.0171]), True)

tensor([0.6273])

In [121]:
results[0].boxes

ultralytics.engine.results.Boxes object with attributes:

cls: tensor([0., 0., 0., 0.], device='cuda:0')
conf: tensor([0.6007, 0.5381, 0.5187, 0.2753], device='cuda:0')
data: tensor([[8.6098e+02, 6.3628e+02, 8.8565e+02, 6.4966e+02, 6.0071e-01, 0.0000e+00],
        [1.2452e+02, 6.1338e+02, 1.4905e+02, 6.2650e+02, 5.3813e-01, 0.0000e+00],
        [2.4927e+00, 7.2909e+02, 3.4923e+01, 7.4470e+02, 5.1873e-01, 0.0000e+00],
        [1.6709e+02, 5.9117e+02, 1.9439e+02, 6.0522e+02, 2.7529e-01, 0.0000e+00]], device='cuda:0')
id: None
is_track: False
orig_shape: (768, 1024)
shape: torch.Size([4, 6])
xywh: tensor([[873.3170, 642.9738,  24.6746,  13.3813],
        [136.7847, 619.9441,  24.5333,  13.1199],
        [ 18.7079, 736.8942,  32.4305,  15.6099],
        [180.7380, 598.1912,  27.3021,  14.0504]], device='cuda:0')
xywhn: tensor([[0.8528, 0.8372, 0.0241, 0.0174],
        [0.1336, 0.8072, 0.0240, 0.0171],
        [0.0183, 0.9595, 0.0317, 0.0203],
        [0.1765, 0.7789, 0.0267, 0.0183]], devi

In [140]:
import os
import cv2

image_dir = "../../data/merged/images/val"
annotation_dir = "../../data/merged/labels/val"

image_paths = [os.path.join(image_dir, f) for f in os.listdir(image_dir)]
annotation_paths = [os.path.join(annotation_dir, f) for f in os.listdir(annotation_dir)]
assert len(image_paths) == len(annotation_paths)

#image_paths = ["../../data/merged/images/test/img_dataset_s431.jpg", "../../data/merged/images/test/img_dataset_s432.jpg"]
#annotation_paths = ["../../data/merged/labels/test/img_dataset_s431.txt", "../../data/merged/labels/test/img_dataset_s432.txt"]


all_bboxes = []
for annotation_path, img_path in zip(annotation_paths, image_paths):
    img = cv2.imread(img_path)
    img_height, img_width = img.shape[0], img.shape[1]
    # Convert YOLO annotations
    bboxes = yolo_to_bbox(annotation_path, img_width, img_height)
    all_bboxes.append(bboxes)

In [148]:
metrics = evaluate_model(model, image_paths, all_bboxes, yolo_predict)
print(metrics)

{'Box(P)': 0.817142857090975, 'Box(R)': 0.7871559632546081, 'mAP50': tensor(0.9463), 'mAP50-95': np.float64(0.6645460531408871), 'Confusion Matrix': {'TP': 1287, 'FP': 288, 'FN': 348, 'TN': None}}
