In [1]:
import cv2
import numpy as np
from ultralytics import YOLO
import easyocr
import re
import torch
from collections import defaultdict, deque


In [2]:
import torch
print(torch.cuda.is_available())
print(torch.cuda.get_device_name(0))

True
NVIDIA GeForce GTX 1650


In [3]:
model = YOLO("saved_models/license_plate_best.pt")
model.to("cuda")  # move YOLO to GPU

reader = easyocr.Reader(['en'], gpu=torch.cuda.is_available())

# 2 letters + 2 numbers + 3 letters
plate_pattern = re.compile(r"^[A-Z]{2}[0-9]{2}[A-Z]{3}$")


In [4]:
# ---------------------------------------------------
# 4️⃣ Function to correct OCR mistakes
# ---------------------------------------------------
def correct_plate_format(ocr_text):
    """
    Corrects common OCR mistakes in license plates.
    Expected format: AA11AAA
    """

    # Mapping numbers that often look like letters
    mapping_num_to_alpha = {
        "0": "O",
        "1": "I",
        "5": "S",
        "8": "B"
    }

    # Mapping letters that often look like numbers
    mapping_alpha_to_num = {
        "O": "0",
        "I": "1",
        "Z": "2",
        "S": "5",
        "B": "8"
    }

    # Clean the OCR text
    ocr_text = ocr_text.upper().replace(" ", "")

    # If length is not 7 → discard
    if len(ocr_text) != 7:
        return ""

    corrected = []

    # Loop through each character
    for i, ch in enumerate(ocr_text):

        # ------------------------------------------
        # Alphabet positions (0,1 and 4,5,6)
        # ------------------------------------------
        if i < 2 or i >= 4:

            # If digit found in alphabet position → convert if possible
            if ch.isdigit() and ch in mapping_num_to_alpha:
                corrected.append(mapping_num_to_alpha[ch])

            # If already alphabet → keep it
            elif ch.isalpha():
                corrected.append(ch)

            # Invalid character
            else:
                return ""

        # ------------------------------------------
        # Numeric positions (2,3)
        # ------------------------------------------
        else:

            # If alphabet found in numeric position → convert if possible
            if ch.isalpha() and ch in mapping_alpha_to_num:
                corrected.append(mapping_alpha_to_num[ch])

            # If already digit → keep it
            elif ch.isdigit():
                corrected.append(ch)

            # Invalid character
            else:
                return ""

    return "".join(corrected)

In [5]:
# import cv2

def recognize_plate(plate_crop):
    """
    Takes a cropped license plate image
    Runs preprocessing + OCR
    Returns a corrected and validated plate string
    """

    # -----------------------------------------
    # 1️⃣ Safety check (empty crop)
    # -----------------------------------------
    if plate_crop is None or plate_crop.size == 0:
        return ""

    # -----------------------------------------
    # 2️⃣ Preprocessing for better OCR accuracy
    # -----------------------------------------

    # Convert to grayscale
    gray = cv2.cvtColor(plate_crop, cv2.COLOR_BGR2GRAY)

    # Apply OTSU thresholding (automatic binarization)
    _, thresh = cv2.threshold(
        gray,
        0,
        255,
        cv2.THRESH_BINARY + cv2.THRESH_OTSU
    )

    # Resize image (increase size improves OCR accuracy)
    plate_resized = cv2.resize(
        thresh,
        None,
        fx=2,
        fy=2,
        interpolation=cv2.INTER_CUBIC
    )

    # -----------------------------------------
    # 3️⃣ Run EasyOCR
    # -----------------------------------------
    try:
        ocr_result = reader.readtext(
            plate_resized,
            detail=0,  # only return text
            allowlist='ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789'
        )

        # -------------------------------------
        # 4️⃣ Validate and correct result
        # -------------------------------------
        if len(ocr_result) > 0:

            # Take first detected string
            candidate = correct_plate_format(ocr_result[0])

            # Check against regex pattern
            if candidate and plate_pattern.match(candidate):
                return candidate

    except Exception as e:
        # You can print error for debugging
        # print("OCR Error:", e)
        pass

    return ""


In [6]:
# from collections import defaultdict, deque

# --------------------------------------------------
# Store last 10 OCR predictions per detected box
# --------------------------------------------------
plate_history = defaultdict(lambda: deque(maxlen=10))

# Store final stable plate result per box
plate_final = {}

# --------------------------------------------------
# Generate a pseudo ID for each bounding box
# --------------------------------------------------
def get_box_id(x1, y1, x2, y2):
    """
    Creates a pseudo ID using rounded box coordinates.
    This helps track the same plate across frames.
    """
    return f"{int(x1/10)}_{int(y1/10)}_{int(x2/10)}_{int(y2/10)}"


# --------------------------------------------------
# Stabilize OCR result using majority voting
# --------------------------------------------------
def get_stable_plate(box_id, new_text):
    """
    Maintains history of OCR predictions for a plate
    and returns the most frequent (stable) result.
    """

    # Add new OCR prediction to history
    if new_text:
        plate_history[box_id].append(new_text)

        # Majority voting (most common value)
        most_common = max(
            set(plate_history[box_id]),
            key=plate_history[box_id].count
        )

        # Save stable result
        plate_final[box_id] = most_common

    # Return stable plate (or empty if none)
    return plate_final.get(box_id, "")


In [7]:
# import cv2

# --------------------------------------------------
# Input / Output video paths
# --------------------------------------------------
input_video = "test_images_and_videos/video4.mp4"
output_video = "output_with_license_v3.mp4"

frame_size_1=1280
frame_size_2=720

# --------------------------------------------------
# Open input video
# --------------------------------------------------
cap = cv2.VideoCapture(input_video)

if not cap.isOpened():
    print("❌ Error: Cannot open video file")
    exit()

# --------------------------------------------------
# Get video properties
# --------------------------------------------------
fps = cap.get(cv2.CAP_PROP_FPS)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

# width = int(cap.get(3))
# height = int(cap.get(4))

# --------------------------------------------------
# Define codec and create VideoWriter
# mp4v works well for .mp4 files
# --------------------------------------------------
fourcc = cv2.VideoWriter_fourcc(*"mp4v")

out = cv2.VideoWriter(
    output_video,
    fourcc,
    fps,
    (width, height)
)

# --------------------------------------------------
# Detection confidence threshold
# --------------------------------------------------
CONF_THRESH = 0.3

# Load pre-created mask image (white = keep, black = ignore)
road_mask = cv2.imread("test_images_and_videos/mask4.png", cv2.IMREAD_GRAYSCALE)

if road_mask is None:
    print("❌ Mask image not found")
    exit()

road_mask = cv2.resize(road_mask, (frame_size_1, frame_size_2))


In [None]:
# ==========================================================
# 5️⃣ MAIN LOOP
# ==========================================================
while cap.isOpened():

    ret, frame = cap.read()
    if not ret:
        break

    frame = cv2.resize(frame, (frame_size_1, frame_size_2))

    # masked_frame = cv2.bitwise_and(frame, road_mask)  # overlap mask and actual video to get only counting area

    # Resize mask to match frame size
    # mask_resized = cv2.resize(road_mask, (frame.shape[1], frame.shape[0]))

    # Apply mask
    frame_masked = cv2.bitwise_and(frame, frame, mask=road_mask)


    # YOLO detection
    # results = model(frame, verbose=False)   # cpu
    results = model(frame_masked, device=0, verbose=False)  # gpu


    for r in results:
        boxes = r.boxes

        for box in boxes:

            # conf = float(box.conf.cpu().numpy())

            conf = box.conf.item()


            if conf < CONF_THRESH:
                continue

            x1, y1, x2, y2 = map(int, box.xyxy.cpu().numpy()[0])

            plate_crop = frame[y1:y2, x1:x2]

            # OCR
            text = recognize_plate(plate_crop)

            # Stabilization
            box_id = get_box_id(x1, y1, x2, y2)
            stable_text = get_stable_plate(box_id, text)

            # Draw rectangle
            cv2.rectangle(frame, (x1, y1), (x2, y2), (0,255,0), 3)

            # Overlay zoomed plate
            if plate_crop.size > 0:

                overlay_h, overlay_w = 150, 400
                plate_resized = cv2.resize(
                    plate_crop,
                    (overlay_w, overlay_h)
                )

                oy1 = max(0, y1 - overlay_h - 40)
                ox1 = x1
                oy2 = oy1 + overlay_h
                ox2 = ox1 + overlay_w

                if oy2 <= frame.shape[0] and ox2 <= frame.shape[1]:
                    frame[oy1:oy2, ox1:ox2] = plate_resized

                    # Text with black outline
                    cv2.putText(
                        frame, stable_text,
                        (ox1, oy1 - 20),
                        cv2.FONT_HERSHEY_SIMPLEX,
                        2, (0,0,0), 6
                    )

                    # White text
                    cv2.putText(
                        frame, stable_text,
                        (ox1, oy1 - 20),
                        cv2.FONT_HERSHEY_SIMPLEX,
                        2, (255,255,255), 3
                    )

    out.write(frame)
    cv2.imshow("Annotated Video", frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break


# ==========================================================
# CLEANUP
# ==========================================================
cap.release()
out.release()
cv2.destroyAllWindows()

print("✅ Processing complete. Video saved:", output_video)

KeyboardInterrupt: 

: 