In [2]:
!pip install ultralytics realesrgan basicsr torch easyocr

Collecting ultralytics
  Using cached ultralytics-8.3.130-py3-none-any.whl.metadata (37 kB)
Collecting realesrgan
  Using cached realesrgan-0.3.0-py3-none-any.whl.metadata (17 kB)
Collecting basicsr
  Using cached basicsr-1.4.2.tar.gz (172 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting easyocr
  Downloading easyocr-1.7.2-py3-none-any.whl.metadata (10 kB)
Collecting ultralytics-thop>=2.0.0 (from ultralytics)
  Downloading ultralytics_thop-2.0.14-py3-none-any.whl.metadata (9.4 kB)
Collecting facexlib>=0.2.5 (from realesrgan)
  Downloading facexlib-0.3.0-py3-none-any.whl.metadata (4.6 kB)
Collecting gfpgan>=1.3.5 (from realesrgan)
  Downloading gfpgan-1.3.8-py3-none-any.whl.metadata (12 kB)
Collecting addict (from basicsr)
  Downloading addict-2.4.0-py3-none-any.whl.metadata (1.0 kB)
Collecting lmdb (from basicsr)
  Downloading lmdb-1.6.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.1 kB)
Collecting tb-nightly (from basicsr)
  Downloading tb_

In [1]:
import cv2
from paddleocr import PaddleOCR
from ultralytics import YOLO
import numpy as np
from utils import Utils
from sort.sort import Sort




In [7]:
# init models
coco_model = YOLO("models/yolo11s.pt")
license_plate_model = YOLO("logs/retrain/weights/best.pt")
ocr = PaddleOCR(use_angle_cls=True, lang="en", use_gpu=False, show_log=False)

utils = Utils(license_plate_model, coco_model, ocr, None)

file_name = "video1.mp4"
video = cv2.VideoCapture(f"source/{file_name}")
fps = float(video.get(cv2.CAP_PROP_FPS))
width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
detecting_area = (width//4, height//4, width*3//4, height*3//4)

overlay = np.zeros((height, width, 3), dtype=np.uint8)
cv2.rectangle(overlay, (detecting_area[0], detecting_area[1]), (detecting_area[2], detecting_area[3]), (0, 255, 0), -1)

array([[[0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        ...,
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0]],

       [[0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        ...,
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0]],

       [[0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        ...,
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0]],

       ...,

       [[0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        ...,
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0]],

       [[0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        ...,
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0]],

       [[0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        ...,
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0]]], dtype=uint8)

In [8]:
# video writer setup
fourcc = cv2.VideoWriter_fourcc(*"mp4v")
out = cv2.VideoWriter(f"output/{file_name}", fourcc, fps, (width, height))

In [9]:
tracker = Sort()

tracked_objs = []
tracked_ids = []
license_plate_number = ""

each_nth_frame = 3
frame_number = -1
while True:
    frame_number += 1
    ret, frame = video.read()
    
    # each 5th frame
    if (frame_number % each_nth_frame != 0):
        continue
    
    if not ret:
        break
    
    img = frame[detecting_area[1]:detecting_area[3], detecting_area[0]:detecting_area[2]]
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    
    coco_results = coco_model.predict(img, conf=0.5)
    if not coco_results:
        continue
    
    detections = [
        [x1, y1, x2, y2, score]
        for x1, y1, x2, y2, score, cls in coco_results[0].boxes.data.tolist()
        if utils.is_vehicle(cls)
    ]
    
    detections = np.array(detections) if detections else np.empty((0, 5))
    tracked_objs = tracker.update(detections)
    
    new_ids = tracked_objs[:, -1]
    if set(new_ids) != set(tracked_ids) or len(tracked_ids) > 0 and license_plate_number is None:
        tracked_ids = new_ids
        print("Detecting IDs: ", tracked_ids)
        license_plate_number = ""
        
        for vehicle in tracked_objs:
            print("Vehicle detected: ", vehicle)
            vehicle_img = utils.crop_vehicle(img, vehicle)
            
            license_plate_obj = utils.fetch_license_plate(vehicle_img)
            if license_plate_obj is not None:
                license_plate_img, x, y = license_plate_obj
                license_plate_number = utils.extract_license_plate_number(license_plate_img)
                
    utils.draw_license_plate_number(license_plate_number, frame, (0, 0))
    cv2.rectangle(frame, (detecting_area[0], detecting_area[1]), (detecting_area[2], detecting_area[3]), (0, 255, 0), 2)
    frame = cv2.addWeighted(overlay, 0.1, frame, 1, 0)
    out.write(frame)
    
out.release()
video.release()


0: 384x640 (no detections), 275.0ms
Speed: 7.5ms preprocess, 275.0ms inference, 11.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 197.8ms
Speed: 1.9ms preprocess, 197.8ms inference, 0.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 205.8ms
Speed: 1.3ms preprocess, 205.8ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 204.2ms
Speed: 1.9ms preprocess, 204.2ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 184.4ms
Speed: 2.0ms preprocess, 184.4ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 261.6ms
Speed: 1.5ms preprocess, 261.6ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 car, 188.9ms
Speed: 2.1ms preprocess, 188.9ms inference, 4.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 car, 182.5ms
Speed: 2.1ms preprocess, 182.5ms infe

In [16]:
while True:
    ret, frame = video.read()

    if not ret:
        break

    frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

    coco_results = coco_model.predict(frame, conf=0.5)
    print(coco_results[0].boxes.data.tolist())
    if not coco_results:
        continue

    for vehicle in (object for object in coco_results[0].boxes.data.tolist() if utils.is_vehicle(object[-1])):
        vehicle_img = utils.crop_vehicle(frame, vehicle)
        result = utils.fetch_license_plate(vehicle_img)
        if result is not None:
            license_plate_img, x, y = result
            # license_plate_img = normalize_license_plate(license_plate_img)
            license_plate_number = utils.extract_license_plate_number(license_plate_img)

            x = x + int(vehicle[0])
            y = y + int(vehicle[1])

            plate_h, plate_w, _ = license_plate_img.shape
            cv2.rectangle(frame, (x, y), (x + plate_w, y + plate_h), (0, 255, 0), 2)

            text_size, _ = cv2.getTextSize(license_plate_number, cv2.FONT_HERSHEY_SIMPLEX, 1, 2)
            text_w, text_h = text_size
            cv2.rectangle(frame, (x, y), (x + text_w, y - text_h), (0, 255, 0), -1)
            cv2.putText(frame, license_plate_number, (x, y), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)

    # cv2.imshow("Vehicle", img)
    # cv2.waitKey()
    out.write(frame)

out.release()

[1;30;43mStreaming output truncated to the last 5000 lines.[0m

0: 576x640 (no detections), 15.9ms
Speed: 4.1ms preprocess, 15.9ms inference, 0.9ms postprocess per image at shape (1, 3, 576, 640)
No license plate detected.
Cropping vehicle...
Detecting license plate...

0: 512x640 (no detections), 14.5ms
Speed: 2.6ms preprocess, 14.5ms inference, 0.8ms postprocess per image at shape (1, 3, 512, 640)
No license plate detected.
Cropping vehicle...
Detecting license plate...

0: 576x640 (no detections), 12.6ms
Speed: 3.8ms preprocess, 12.6ms inference, 3.9ms postprocess per image at shape (1, 3, 576, 640)
No license plate detected.

0: 384x640 2 cars, 1 bus, 17.8ms
Speed: 6.0ms preprocess, 17.8ms inference, 2.2ms postprocess per image at shape (1, 3, 384, 640)
[[480.402587890625, 0.9954833984375, 1276.2950439453125, 710.9000244140625, 0.8284034729003906, 2.0], [480.5516357421875, 0.0, 1276.8175048828125, 710.0116577148438, 0.7842857837677002, 5.0], [250.01443481445312, 250.0563049316406

In [10]:
# cv2.destroyAllWindows()
video.release()
out.release()