Paquetes necesarios

In [2]:
import cv2  
import math 

from ultralytics import YOLO
from matplotlib import pyplot as plt

Extraemos las clases del modelo YOLO 11

In [2]:
model = YOLO('yolo11n.pt')

vid = cv2.VideoCapture("C0142.MP4")

names = None

while vid.isOpened():
    ret, frame = vid.read()

    if ret:
        results = model(frame, show=False)
        if names is None:
            names = results[0].names
        annotated_frame = results[0].plot()
        cv2.imshow("Deteccion de YOLO", annotated_frame)

        # Salir del vídeo cuando presionamos ESC
        if cv2.waitKey(1) & 0xFF == 27 or cv2.getWindowProperty("Deteccion de YOLO", cv2.WND_PROP_VISIBLE) < 1:
            break
    else:
        # El vídeo ya se terminó
        break

vid.release()
cv2.destroyAllWindows()

# Leemos las posibles clases
with open("classes.txt", "w") as f:
    f.write(str(names))


0: 384x640 4 cars, 1 bus, 186.7ms
Speed: 70.3ms preprocess, 186.7ms inference, 16.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 4 cars, 1 bus, 93.4ms
Speed: 4.4ms preprocess, 93.4ms inference, 3.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 5 cars, 1 bus, 90.6ms
Speed: 1.8ms preprocess, 90.6ms inference, 1.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 5 cars, 1 bus, 92.6ms
Speed: 2.5ms preprocess, 92.6ms inference, 1.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 4 cars, 1 bus, 100.9ms
Speed: 2.6ms preprocess, 100.9ms inference, 2.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 3 cars, 1 bus, 83.0ms
Speed: 2.4ms preprocess, 83.0ms inference, 1.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 3 cars, 1 bus, 71.6ms
Speed: 1.6ms preprocess, 71.6ms inference, 1.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 2 cars, 1 bus, 80.7ms
Speed: 1.6ms

### Mostramos el funcionamiento de nuestro modelo entrenado

In [4]:
model = YOLO('best.pt')

vid = cv2.VideoCapture("C0142.MP4")

while vid.isOpened():
    ret, frame = vid.read()

    if ret:
        results = model(frame, show=False)
        annotated_frame = results[0].plot()
        cv2.imshow("Deteccion de YOLO", annotated_frame)

        # Salir del vídeo cuando presionamos ESC
        if cv2.waitKey(1) & 0xFF == 27 or cv2.getWindowProperty("Deteccion de YOLO", cv2.WND_PROP_VISIBLE) < 1:
            break
    else:
        # El vídeo ya se terminó
        break

vid.release()
cv2.destroyAllWindows()


0: 384x640 (no detections), 117.8ms
Speed: 14.4ms preprocess, 117.8ms inference, 0.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 94.0ms
Speed: 3.1ms preprocess, 94.0ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 83.0ms
Speed: 2.3ms preprocess, 83.0ms inference, 0.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 79.1ms
Speed: 2.1ms preprocess, 79.1ms inference, 0.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 75.1ms
Speed: 1.6ms preprocess, 75.1ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 74.3ms
Speed: 1.8ms preprocess, 74.3ms inference, 0.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 74.6ms
Speed: 1.7ms preprocess, 74.6ms inference, 0.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 80.7ms
Speed: 1.9ms preprocess, 80.7m

In [23]:
import cv2
from ultralytics import YOLO
import easyocr
import numpy as np

model = YOLO('best.pt')
reader = easyocr.Reader(['es'])
vid = cv2.VideoCapture("C0142.MP4")

if not vid.isOpened():
    exit()

frame_count = 0
frame_skip = 3
last_plate = None
margin = 10  

while True:
    ret, frame = vid.read()
    if not ret:
        break

    frame_count += 1
    results = model(frame, verbose=False)
    detections = results[0].boxes

    for box in detections:
        x1, y1, x2, y2 = map(int, box.xyxy[0])

        cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 1)

        if frame_count % frame_skip == 0:
            x1m = max(0, x1 - margin)
            y1m = max(0, y1 - margin)
            x2m = min(frame.shape[1], x2 + margin)
            y2m = min(frame.shape[0], y2 + margin)
            placa_crop = frame[y1m:y2m, x1m:x2m]

            if placa_crop.size > 0:
                escala = 3
                placa_crop = cv2.resize(placa_crop, None, fx=escala, fy=escala, interpolation=cv2.INTER_CUBIC)

                gray = cv2.cvtColor(placa_crop, cv2.COLOR_BGR2GRAY)
                gray = cv2.equalizeHist(gray)
                gray = cv2.convertScaleAbs(gray, alpha=1.5, beta=0)

                ocr_result = reader.readtext(
                    gray,
                    allowlist='ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789',
                    detail=1
                )

                if len(ocr_result) > 0:
                    text = ocr_result[0][1].strip()
                    prob = ocr_result[0][2]

                    if len(text) >= 4 and prob > 0.5 and text != last_plate:
                        last_plate = text
                        timestamp = vid.get(cv2.CAP_PROP_POS_MSEC) / 1000
                        print(f"[{timestamp:.2f}s] Matrícula: {text} (Conf: {prob:.2f})")
                        cv2.putText(frame, f'{text}', (x1, y1 - 10),
                                    cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2, cv2.LINE_AA)

    cv2.imshow("Detección + OCR", frame)
    if cv2.waitKey(30) & 0xFF == 27:
        break

vid.release()
cv2.destroyAllWindows()





Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


[31.28s] Matrícula: 271LC (Conf: 0.55)


### Usamos el modelo pre-entrenado de YOLO y el nuestro en conjunto 
Utilizamos el modelo pre-entrenado para detectar personas y vehículos, posteriormente, cuando hayamos detectado un vehículo, se lo pasamos a nuestro modelo entrenado en matrículas para que le detecte la matrícula.

In [3]:
base_model = YOLO('yolo11n.pt')
our_model = YOLO('best.pt')
vid = cv2.VideoCapture("C0142.MP4")

def get_coordinates(bounding_boxes, index):
    x_center = bounding_boxes[index][0]
    y_center = bounding_boxes[index][1]
    width = bounding_boxes[index][2]
    height = bounding_boxes[index][3]
    x = x_center-(width//2)
    y = y_center-(height//2)
    return int(x), int(y), int(width), int(height)

while vid.isOpened():
    ret, frame = vid.read()

    if ret:
        base_results = base_model(frame, show=False)
        plates_result = None
        annotated_frame = base_results[0].plot()
        bounding_boxes = list()
        names = list()
        for result in base_results:
            bounding_boxes += (result.boxes.xywh)
            names += [result.names[cls.item()] for cls in result.boxes.cls.int()]
            for i, name in enumerate(names):
                x1, y1, w1, h1 = get_coordinates(bounding_boxes, i)
                if name != "person":
                    vehicle_box = frame[y1:y1+h1, x1:x1+w1]
                    plates_result = our_model(vehicle_box, show=False)
                    for j, plate_details in enumerate(plates_result):
                        x2, y2, w2, h2 = get_coordinates(vehicle_box, j)
                        


        cv2.imshow("Deteccion de YOLO", annotated_frame)

        # Salir del vídeo cuando presionamos ESC
        if cv2.waitKey(1) & 0xFF == 27 or cv2.getWindowProperty("Deteccion de YOLO", cv2.WND_PROP_VISIBLE) < 1:
            break
    else:
        # El vídeo ya se terminó
        break

vid.release()
cv2.destroyAllWindows()


0: 384x640 4 cars, 1 bus, 84.4ms
Speed: 10.4ms preprocess, 84.4ms inference, 1.6ms postprocess per image at shape (1, 3, 384, 640)

0: 544x640 (no detections), 109.2ms
Speed: 4.4ms preprocess, 109.2ms inference, 0.4ms postprocess per image at shape (1, 3, 544, 640)


TypeError: only length-1 arrays can be converted to Python scalars