Paquetes necesarios

In [2]:
import cv2  
import math 

from ultralytics import YOLO
from matplotlib import pyplot as plt
import easyocr

Extraemos las clases del modelo YOLO 11

In [2]:
model = YOLO('yolo11n.pt')

vid = cv2.VideoCapture("C0142.MP4")

names = None

while vid.isOpened():
    ret, frame = vid.read()

    if ret:
        results = model(frame, show=False)
        if names is None:
            names = results[0].names
        annotated_frame = results[0].plot()
        cv2.imshow("Deteccion de YOLO", annotated_frame)

        # Salir del vídeo cuando presionamos ESC
        if cv2.waitKey(1) & 0xFF == 27 or cv2.getWindowProperty("Deteccion de YOLO", cv2.WND_PROP_VISIBLE) < 1:
            break
    else:
        # El vídeo ya se terminó
        break

vid.release()
cv2.destroyAllWindows()

# Leemos las posibles clases
with open("classes.txt", "w") as f:
    f.write(str(names))


0: 384x640 4 cars, 1 bus, 186.7ms
Speed: 70.3ms preprocess, 186.7ms inference, 16.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 4 cars, 1 bus, 93.4ms
Speed: 4.4ms preprocess, 93.4ms inference, 3.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 5 cars, 1 bus, 90.6ms
Speed: 1.8ms preprocess, 90.6ms inference, 1.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 5 cars, 1 bus, 92.6ms
Speed: 2.5ms preprocess, 92.6ms inference, 1.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 4 cars, 1 bus, 100.9ms
Speed: 2.6ms preprocess, 100.9ms inference, 2.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 3 cars, 1 bus, 83.0ms
Speed: 2.4ms preprocess, 83.0ms inference, 1.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 3 cars, 1 bus, 71.6ms
Speed: 1.6ms preprocess, 71.6ms inference, 1.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 2 cars, 1 bus, 80.7ms
Speed: 1.6ms

### Mostramos el funcionamiento de nuestro modelo entrenado

In [None]:
model = YOLO('best.pt')

vid = cv2.VideoCapture("C0142.MP4")

while vid.isOpened():
    ret, frame = vid.read()

    if ret:
        results = model(frame, show=False)
        annotated_frame = results[0].plot()
        cv2.imshow("Deteccion de YOLO", annotated_frame)

        # Salir del vídeo cuando presionamos ESC
        if cv2.waitKey(1) & 0xFF == 27 or cv2.getWindowProperty("Deteccion de YOLO", cv2.WND_PROP_VISIBLE) < 1:
            break
    else:
        # El vídeo ya se terminó
        break

vid.release()
cv2.destroyAllWindows()

In [23]:
import cv2
from ultralytics import YOLO
import easyocr
import numpy as np

model = YOLO('best.pt')
reader = easyocr.Reader(['es'])
vid = cv2.VideoCapture("C0142.MP4")

if not vid.isOpened():
    exit()

frame_count = 0
frame_skip = 3
last_plate = None
margin = 10  

while True:
    ret, frame = vid.read()
    if not ret:
        break

    frame_count += 1
    results = model(frame, verbose=False)
    detections = results[0].boxes

    for box in detections:
        x1, y1, x2, y2 = map(int, box.xyxy[0])

        cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 1)

        if frame_count % frame_skip == 0:
            x1m = max(0, x1 - margin)
            y1m = max(0, y1 - margin)
            x2m = min(frame.shape[1], x2 + margin)
            y2m = min(frame.shape[0], y2 + margin)
            placa_crop = frame[y1m:y2m, x1m:x2m]

            if placa_crop.size > 0:
                escala = 3
                placa_crop = cv2.resize(placa_crop, None, fx=escala, fy=escala, interpolation=cv2.INTER_CUBIC)

                gray = cv2.cvtColor(placa_crop, cv2.COLOR_BGR2GRAY)
                gray = cv2.equalizeHist(gray)
                gray = cv2.convertScaleAbs(gray, alpha=1.5, beta=0)

                ocr_result = reader.readtext(
                    gray,
                    allowlist='ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789',
                    detail=1
                )

                if len(ocr_result) > 0:
                    text = ocr_result[0][1].strip()
                    prob = ocr_result[0][2]

                    if len(text) >= 4 and prob > 0.5 and text != last_plate:
                        last_plate = text
                        timestamp = vid.get(cv2.CAP_PROP_POS_MSEC) / 1000
                        print(f"[{timestamp:.2f}s] Matrícula: {text} (Conf: {prob:.2f})")
                        cv2.putText(frame, f'{text}', (x1, y1 - 10),
                                    cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2, cv2.LINE_AA)

    cv2.imshow("Detección + OCR", frame)
    if cv2.waitKey(30) & 0xFF == 27:
        break

vid.release()
cv2.destroyAllWindows()





Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


[31.28s] Matrícula: 271LC (Conf: 0.55)


### Usamos el modelo pre-entrenado de YOLO y el nuestro en conjunto 
Utilizamos el modelo pre-entrenado para detectar personas y vehículos, posteriormente, cuando hayamos detectado un vehículo, se lo pasamos a nuestro modelo entrenado en matrículas para que le detecte la matrícula.

In [13]:
base_model = YOLO('yolo11n.pt')
our_model = YOLO('best.pt')
vid = cv2.VideoCapture("C0142.MP4")

def ocr(placa_crop):
    escala = 3
    placa_crop = cv2.resize(placa_crop, None, fx=escala, fy=escala, interpolation=cv2.INTER_CUBIC)

    gray = cv2.cvtColor(placa_crop, cv2.COLOR_BGR2GRAY)
    gray = cv2.equalizeHist(gray)
    gray = cv2.convertScaleAbs(gray, alpha=1.5, beta=0)

    ocr_result = reader.readtext(
        gray,
        allowlist='ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789',
        detail=1
    )

    if len(ocr_result) > 0:
        text = ocr_result[0][1].strip()
        prob = ocr_result[0][2]

        if len(text) >= 4 and prob > 0.5 and text != last_plate:
            last_plate = text
            timestamp = vid.get(cv2.CAP_PROP_POS_MSEC) / 1000
            print(f"[{timestamp:.2f}s] Matrícula: {text} (Conf: {prob:.2f})")
            cv2.putText(frame, f'{text}', (x1, y1 - 10),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2, cv2.LINE_AA)

reader = easyocr.Reader(['es'], gpu=False) 

classes = [0, 2, 3, 5, 7]    # Person, car, motorcycle, bus, truck

car_boxes = []
car_boxes_left_coords = []

while vid.isOpened():
    ret, frame = vid.read()

    if ret:
        base_results = base_model(frame, show=False, classes=classes)
        plates_result = None
        annotated_frame = base_results[0].plot()
        bounding_boxes = list()
        names = list()
        for result in base_results:
            bounding_boxes += (result.boxes.xyxy).tolist()
            names += [result.names[cls.item()] for cls in result.boxes.cls.int()]
            for i, name in enumerate(names):
                x1, y1, x2, y2 = [int(item) for item in bounding_boxes[i]]
                if name != "person":
                    vehicle_box = frame[y1:y2, x1:x2]
                    plates_result = our_model(vehicle_box, show=False)
                    if len(plates_result[0].boxes) > 0:
                        plate_detection = (plates_result[0].boxes.xyxy).tolist()
                        px1, py1, px2, py2 = [int(item) for item in plate_detection[0]]
                        plate = vehicle_box[py1:py2, px1:px2]
                        real_x1 = px1+x1
                        real_y1 = py1+y1
                        real_x2 = px2+x1
                        real_y2 = py2+y1
                        ocr(plate)
                        cv2.rectangle(annotated_frame, (real_x1, real_y1), (real_x2, real_y2), (0, 255, 0), 1)
                        
        cv2.imshow("Deteccion de YOLO", annotated_frame)

        # Salir del vídeo cuando presionamos ESC
        if cv2.waitKey(1) & 0xFF == 27 or cv2.getWindowProperty("Deteccion de YOLO", cv2.WND_PROP_VISIBLE) < 1:
            break
    else:
        # El vídeo ya se terminó
        break

vid.release()
cv2.destroyAllWindows()

Using CPU. Note: This module is much faster with a GPU.



0: 384x640 4 cars, 1 bus, 102.2ms
Speed: 51.7ms preprocess, 102.2ms inference, 2.4ms postprocess per image at shape (1, 3, 384, 640)

0: 544x640 (no detections), 147.4ms
Speed: 4.8ms preprocess, 147.4ms inference, 1.2ms postprocess per image at shape (1, 3, 544, 640)

0: 512x640 (no detections), 141.7ms
Speed: 3.5ms preprocess, 141.7ms inference, 1.1ms postprocess per image at shape (1, 3, 512, 640)

0: 416x640 (no detections), 177.5ms
Speed: 3.1ms preprocess, 177.5ms inference, 0.9ms postprocess per image at shape (1, 3, 416, 640)

0: 416x640 (no detections), 112.3ms
Speed: 4.9ms preprocess, 112.3ms inference, 0.8ms postprocess per image at shape (1, 3, 416, 640)

0: 480x640 (no detections), 132.4ms
Speed: 3.2ms preprocess, 132.4ms inference, 0.9ms postprocess per image at shape (1, 3, 480, 640)

0: 384x640 4 cars, 1 bus, 146.6ms
Speed: 5.5ms preprocess, 146.6ms inference, 2.2ms postprocess per image at shape (1, 3, 384, 640)

0: 512x640 (no detections), 171.7ms
Speed: 3.9ms preproce



0: 480x640 (no detections), 141.7ms
Speed: 2.4ms preprocess, 141.7ms inference, 1.3ms postprocess per image at shape (1, 3, 480, 640)

0: 448x640 (no detections), 130.4ms
Speed: 3.2ms preprocess, 130.4ms inference, 1.0ms postprocess per image at shape (1, 3, 448, 640)

0: 512x640 (no detections), 138.2ms
Speed: 3.8ms preprocess, 138.2ms inference, 0.9ms postprocess per image at shape (1, 3, 512, 640)

0: 384x640 3 cars, 1 bus, 122.1ms
Speed: 4.5ms preprocess, 122.1ms inference, 2.0ms postprocess per image at shape (1, 3, 384, 640)

0: 448x640 1 matricula, 120.2ms
Speed: 3.6ms preprocess, 120.2ms inference, 1.3ms postprocess per image at shape (1, 3, 448, 640)

0: 480x640 (no detections), 135.7ms
Speed: 2.8ms preprocess, 135.7ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)

0: 448x640 (no detections), 192.9ms
Speed: 2.9ms preprocess, 192.9ms inference, 1.2ms postprocess per image at shape (1, 3, 448, 640)

0: 640x640 (no detections), 177.5ms
Speed: 6.2ms preprocess, 

In [None]:

base_results = base_model(frame, show=False, classes=classes)
plates_result = None
annotated_frame = base_results[0].plot()
for box in result.boxes:
    if result.names[int(box.cls)] != "person":
        x1, y1, x2, y2 = box.xyxy[0].int().tolist()
        car_boxes.append(frame[y1:y2, x1:x2])
        car_boxes_left_coords.append((x1, y1))
if car_boxes:
    plates_results = our_model(car_boxes, show=False)
    for i, plates_result in enumerate(plates_results):
        car_x, car_y = car_boxes_left_coords[i]
        if len(plates_result.boxes) > 0:
            px1, py1, px2, py2 = plates_result.boxes[0].xyxy[0].int().tolist()
            real_x1 = px1 + car_x
            real_y1 = py1 + car_y
            real_x2 = px2 + car_x
            real_y2 = py2 + car_y
            cv2.rectangle(annotated_frame, (real_x1, real_y1), (real_x2, real_y2), (0, 255, 0), 3)


    