In [1]:
# to use virtual env
# python3 -m venv venv
# source venv/bin/activate

!pip3 install --upgrade pip
!pip3 install ultralytics opencv-python
!pip3 install pandas
!pip3 install albumentations
!pip3 install pytesseract






In [None]:
# download the dataset: https://www.kaggle.com/datasets/andrewmvd/car-plate-detection?resource=download
# rename the folder: "dataset"

In [2]:
import os
import shutil
import random
import time
import pandas as pd
from ultralytics import YOLO
import xml.etree.ElementTree as ET
import torch
import cv2



In [2]:
# process the annotations in YOLO text format
# split the data into train 70%, val 20% and test 10%

random.seed(42)

image_dir = 'dataset/images'
label_dir = 'dataset/annotations'

out_base = 'dataset_split'

def convert_xml_to_yolo(xml_path, txt_path):
    tree = ET.parse(xml_path)
    root = tree.getroot()

    size = root.find('size')
    width = float(size.find('width').text)
    height = float(size.find('height').text)

    lines = []
    for obj in root.findall('object'):
        class_id = 0 

        bndbox = obj.find('bndbox')
        xmin = float(bndbox.find('xmin').text)
        ymin = float(bndbox.find('ymin').text)
        xmax = float(bndbox.find('xmax').text)
        ymax = float(bndbox.find('ymax').text)

        x_center = ((xmin + xmax) / 2) / width
        y_center = ((ymin + ymax) / 2) / height
        w = (xmax - xmin) / width
        h = (ymax - ymin) / height

        lines.append(f"{class_id} {x_center:.6f} {y_center:.6f} {w:.6f} {h:.6f}")

    with open(txt_path, 'w') as f:
        f.write("\n".join(lines))

image_files = [f for f in os.listdir(image_dir) if f.endswith('.png')]
base_names = [os.path.splitext(f)[0] for f in image_files]

random.shuffle(base_names)
total = len(base_names)
train_end = int(0.7 * total)
val_end = int(0.9 * total)

splits = {
    'train': base_names[:train_end],
    'val': base_names[train_end:val_end],
    'test': base_names[val_end:]
}

for split in ['train', 'val', 'test']:
    os.makedirs(f'{out_base}/images/{split}', exist_ok=True)
    os.makedirs(f'{out_base}/labels/{split}', exist_ok=True)

for split, names in splits.items():
    for name in names:
        img_src = os.path.join(image_dir, f'{name}.png')
        lbl_src = os.path.join(label_dir, f'{name}.xml')

        img_dst = os.path.join(out_base, 'images', split, f'{name}.png')
        txt_dst = os.path.join(out_base, 'labels', split, f'{name}.txt')

        if os.path.exists(img_src):
            shutil.copy(img_src, img_dst)

        if os.path.exists(lbl_src):
            convert_xml_to_yolo(lbl_src, txt_dst)

print("Files split, grouped, and labels converted to YOLO TXT format")


Files split, grouped, and labels converted to YOLO TXT format


In [4]:
if torch.backends.mps.is_available():
    device = "mps"

elif torch.cuda.is_available():        
    device = "cuda"

else:
    device = "cpu"

print(f"Using device: {device}")


Using device: cuda


In [35]:
models = ["yolov8n.pt", "yolov8s.pt", "yolov8m.pt"]
data_yaml = "dataset_split/data.yaml"
epochs = 5

results = []

for model_name in models:
    print(f"\nTraining {model_name}...")
    model = YOLO(model_name)
    
    start_time = time.time()
    metrics = model.train(data=data_yaml, epochs=epochs)
    end_time = time.time()
    train_time = round(end_time - start_time, 2)
    
    map50 = metrics.results_dict.get("metrics/mAP50(B)", None)
    recall = metrics.results_dict.get("metrics/recall(B)", None)
    precision = metrics.results_dict.get("metrics/precision(B)", None)
    f1 = metrics.results_dict.get("metrics/f1(B)", None)
    
    results.append({
        "Model": model_name,
        "Precision": round(precision, 4) if precision else None,
        "Recall": round(recall, 4) if recall else None,
        "F1 Score": round(f1, 4) if f1 else None,
        "mAP50": round(map50, 4) if map50 else None,
        "Training Time (s)": train_time
    })

df = pd.DataFrame(results)
df.to_csv("yolo_model_comparison.csv", index=False)
display(df)


KeyboardInterrupt: 

In [5]:
# train and validate model
# from the table above I could see yolo v8 small performs better

model = YOLO("yolov8m.pt")
model.train(data="dataset_split/data.yaml", name="model_blur",
        epochs=10,
        batch=8,
        device = device
        )
metrics = model.val(data="dataset_split/data.yaml")
print(metrics)


Ultralytics 8.3.174  Python-3.9.2 torch-2.5.1+cu121 CUDA:0 (NVIDIA GeForce RTX 3050 Laptop GPU, 4096MiB)
[34m[1mengine\trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=8, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=dataset_split/data.yaml, degrees=0.0, deterministic=True, device=0, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=10, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=640, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.01, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=yolov8m.pt, momentum=0.937, mosaic=1.0, multi_scale=False, name=model_blur, nbs=64, nms=False, opset=None, optimize=False, optimizer=auto, overlap_mask=True, patience=100, perspective=0.0, pl

[34m[1mtrain: [0mScanning D:\Term 2\Deep_Learning_Project\dl_project\dataset_split\labels\train.cache... 1829 images, 0 backgrounds, 0 corrupt: 100%|██████████| 1829/1829 [00:00<?, ?it/s]

[34m[1mtrain: [0mD:\Term 2\Deep_Learning_Project\dl_project\dataset_split\images\train\car-wbs-MH03AR5549_00000.jpg: corrupt JPEG restored and saved





[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, method='weighted_average', num_output_channels=3), CLAHE(p=0.01, clip_limit=(1.0, 4.0), tile_grid_size=(8, 8))
[34m[1mval: [0mFast image access  (ping: 0.10.0 ms, read: 271.4112.0 MB/s, size: 393.5 KB)


[34m[1mval: [0mScanning D:\Term 2\Deep_Learning_Project\dl_project\dataset_split\labels\val.cache... 255 images, 0 backgrounds, 0 corrupt: 100%|██████████| 255/255 [00:00<?, ?it/s]


Plotting labels to runs\detect\model_blur\labels.jpg... 
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.002, momentum=0.9) with parameter groups 77 weight(decay=0.0), 84 weight(decay=0.0005), 83 bias(decay=0.0)
Image sizes 640 train, 640 val
Using 8 dataloader workers
Logging results to [1mruns\detect\model_blur[0m
Starting training for 10 epochs...
Closing dataloader mosaic
[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, method='weighted_average', num_output_channels=3), CLAHE(p=0.01, clip_limit=(1.0, 4.0), tile_grid_size=(8, 8))

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       1/10      3.25G      1.122      1.176      1.137          5        640: 100%|██████████| 229/229 [01:44<00:00,  2.18it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 16/16 [00:04<00:00,  3.52it/s]

                   all        255        262      0.455      0.519      0.396      0.222






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       2/10      3.33G       1.21      0.951      1.182          5        640: 100%|██████████| 229/229 [01:40<00:00,  2.28it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 16/16 [00:04<00:00,  3.58it/s]

                   all        255        262      0.919      0.779      0.871      0.576






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       3/10      3.39G      1.156     0.8825      1.174          5        640: 100%|██████████| 229/229 [01:40<00:00,  2.29it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 16/16 [00:04<00:00,  3.59it/s]

                   all        255        262      0.877      0.791      0.886      0.582






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       4/10      3.42G      1.112     0.8062      1.159          5        640: 100%|██████████| 229/229 [01:39<00:00,  2.29it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 16/16 [00:04<00:00,  3.60it/s]

                   all        255        262      0.938      0.808      0.916      0.629






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       5/10       3.4G      1.053     0.7213      1.117          5        640: 100%|██████████| 229/229 [01:40<00:00,  2.28it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 16/16 [00:04<00:00,  3.61it/s]

                   all        255        262      0.899      0.866      0.926      0.669






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       6/10      3.42G     0.9833     0.6408      1.073          5        640: 100%|██████████| 229/229 [01:40<00:00,  2.29it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 16/16 [00:04<00:00,  3.63it/s]

                   all        255        262      0.944       0.92      0.972      0.721






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       7/10      3.41G     0.9207     0.5847      1.042          5        640: 100%|██████████| 229/229 [01:39<00:00,  2.30it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 16/16 [00:04<00:00,  3.57it/s]

                   all        255        262      0.952      0.916      0.971       0.72






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       8/10      3.41G     0.8558     0.5275      1.015          5        640: 100%|██████████| 229/229 [01:39<00:00,  2.29it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 16/16 [00:04<00:00,  3.61it/s]

                   all        255        262      0.961      0.933      0.981      0.728






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       9/10      3.41G     0.8209     0.4857      0.997          5        640: 100%|██████████| 229/229 [01:40<00:00,  2.29it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 16/16 [00:04<00:00,  3.60it/s]

                   all        255        262      0.931      0.943      0.977      0.747






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      10/10      3.38G     0.7969      0.464     0.9865          5        640: 100%|██████████| 229/229 [01:40<00:00,  2.29it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 16/16 [00:04<00:00,  3.56it/s]

                   all        255        262      0.969      0.935      0.977      0.765






10 epochs completed in 0.302 hours.
Optimizer stripped from runs\detect\model_blur\weights\last.pt, 52.0MB
Optimizer stripped from runs\detect\model_blur\weights\best.pt, 52.0MB

Validating runs\detect\model_blur\weights\best.pt...
Ultralytics 8.3.174  Python-3.9.2 torch-2.5.1+cu121 CUDA:0 (NVIDIA GeForce RTX 3050 Laptop GPU, 4096MiB)
Model summary (fused): 92 layers, 25,840,339 parameters, 0 gradients, 78.7 GFLOPs


                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 16/16 [00:04<00:00,  3.52it/s]


                   all        255        262      0.969      0.935      0.977      0.766
Speed: 0.4ms preprocess, 13.1ms inference, 0.0ms loss, 1.4ms postprocess per image
Results saved to [1mruns\detect\model_blur[0m
Ultralytics 8.3.174  Python-3.9.2 torch-2.5.1+cu121 CUDA:0 (NVIDIA GeForce RTX 3050 Laptop GPU, 4096MiB)
Model summary (fused): 92 layers, 25,840,339 parameters, 0 gradients, 78.7 GFLOPs
[34m[1mval: [0mFast image access  (ping: 0.00.0 ms, read: 1723.6721.0 MB/s, size: 234.4 KB)


[34m[1mval: [0mScanning D:\Term 2\Deep_Learning_Project\dl_project\dataset_split\labels\val.cache... 255 images, 0 backgrounds, 0 corrupt: 100%|██████████| 255/255 [00:00<?, ?it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 32/32 [00:08<00:00,  3.79it/s]


                   all        255        262      0.969      0.939      0.978      0.766
Speed: 0.3ms preprocess, 28.4ms inference, 0.0ms loss, 1.3ms postprocess per image
Results saved to [1mruns\detect\model_blur2[0m
ultralytics.utils.metrics.DetMetrics object with attributes:

ap_class_index: array([0])
box: ultralytics.utils.metrics.Metric object
confusion_matrix: <ultralytics.utils.metrics.ConfusionMatrix object at 0x0000018BFB515C70>
curves: ['Precision-Recall(B)', 'F1-Confidence(B)', 'Precision-Confidence(B)', 'Recall-Confidence(B)']
curves_results: [[array([          0,    0.001001,    0.002002,    0.003003,    0.004004,    0.005005,    0.006006,    0.007007,    0.008008,    0.009009,     0.01001,    0.011011,    0.012012,    0.013013,    0.014014,    0.015015,    0.016016,    0.017017,    0.018018,    0.019019,     0.02002,    0.021021,    0.022022,    0.023023,
          0.024024,    0.025025,    0.026026,    0.027027,    0.028028,    0.029029,     0.03003,    0.031031,    

In [10]:
model = YOLO("yolov8m.pt")
results = model.predict(source="test2.mp4", conf=0.4, save=True)



inference results will accumulate in RAM unless `stream=True` is passed, causing potential out-of-memory
errors for large sources or long-running streams and videos. See https://docs.ultralytics.com/modes/predict/ for help.

Example:
    results = model(source=..., stream=True)  # generator of Results objects
    for r in results:
        boxes = r.boxes  # Boxes object for bbox outputs
        masks = r.masks  # Masks object for segment masks outputs
        probs = r.probs  # Class probabilities for classification outputs

video 1/1 (frame 1/663) d:\Term 2\Deep_Learning_Project\dl_project\test2.mp4: 640x384 3 cars, 73.9ms
video 1/1 (frame 2/663) d:\Term 2\Deep_Learning_Project\dl_project\test2.mp4: 640x384 3 cars, 66.5ms
video 1/1 (frame 3/663) d:\Term 2\Deep_Learning_Project\dl_project\test2.mp4: 640x384 3 cars, 66.1ms
video 1/1 (frame 4/663) d:\Term 2\Deep_Learning_Project\dl_project\test2.mp4: 640x384 4 cars, 66.1ms
video 1/1 (frame 5/663) d:\Term 2\Deep_Learning_Project\dl_proje

KeyboardInterrupt: 

In [8]:
import cv2
import pytesseract
import os
from ultralytics import YOLO

# Configure Tesseract path (Windows) - optional if not using OCR
pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"

# Load YOLO model
model = YOLO("runs/detect/model_blur/weights/best.pt")

# Video input/output
input_path = "test2.mp4"
cap = cv2.VideoCapture(input_path)
fps = cap.get(cv2.CAP_PROP_FPS)
width  = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter("output.mp4", fourcc, fps, (width, height))

# Folder to store unique plate images
output_folder = "license_plates"
os.makedirs(output_folder, exist_ok=True)

def compute_iou(box1, box2):
    # box format: [x1, y1, x2, y2]
    x1 = max(box1[0], box2[0])
    y1 = max(box1[1], box2[1])
    x2 = min(box1[2], box2[2])
    y2 = min(box1[3], box2[3])

    intersection = max(0, x2 - x1) * max(0, y2 - y1)
    area1 = (box1[2] - box1[0]) * (box1[3] - box1[1])
    area2 = (box2[2] - box2[0]) * (box2[3] - box2[1])
    union = area1 + area2 - intersection
    return intersection / union if union > 0 else 0

frame_index = 0

while True:
    ret, frame = cap.read()
    if not ret:
        break

    results = model.predict(frame, conf=0.4, stream=True, verbose=False)

    for result in results:
        for box in result.boxes.xyxy.cpu().numpy():
            x1, y1, x2, y2 = map(int, box)
            x1, y1 = max(0, x1), max(0, y1)
            x2, y2 = min(frame.shape[1], x2), min(frame.shape[0], y2)

            roi = frame[y1:y2, x1:x2]
            if roi.size == 0:
                continue

            # IoU check for unique plate
            # is_new_plate = True
            # for saved_box in saved_boxes:
            #     if compute_iou([x1, y1, x2, y2], saved_box) > 0.7:
            #         is_new_plate = False
            #         break

            # Compute timestamp
            timestamp_seconds = frame_index / fps
            hours = int(timestamp_seconds // 3600)
            minutes = int((timestamp_seconds % 3600) // 60)
            seconds = int(timestamp_seconds % 60)
            timestamp_str = f"{hours:02}_{minutes:02}_{seconds:02}"  # for filename
            timestamp_label = f"{hours:02}:{minutes:02}:{seconds:02}" # for video

            # if is_new_plate:
                # Save cropped image as timestamp.jpg
            filename = os.path.join(output_folder, f"{timestamp_str}.jpg")
            cv2.imwrite(filename, roi)
            print(f"Saved plate: {filename}")

                # Add to saved boxes
                # saved_boxes.append([x1, y1, x2, y2])

            # Blur plate in the output video
            frame[y1:y2, x1:x2] = cv2.GaussianBlur(roi, (35, 35), 0)

            # Draw bright red border
            cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 0, 255), 3)

            # Draw timestamp label above the blur
            text_position = (x1, max(0, y1-10))
            cv2.putText(frame, timestamp_label, text_position,
                        cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2, cv2.LINE_AA)

    out.write(frame)
    frame_index += 1

cap.release()
out.release()

print("Blurred video saved as output.mp4")
print(f"Unique plate images saved in folder: {output_folder}")


Blurred video saved as output.mp4
Unique plate images saved in folder: license_plates
