Paquetes necesarios

In [2]:
import cv2  
import math 

from ultralytics import YOLO
from matplotlib import pyplot as plt
import easyocr

Extraemos las clases del modelo YOLO 11

In [None]:
model = YOLO('yolo11n.pt')

vid = cv2.VideoCapture("C0142.MP4")

names = None

while vid.isOpened():
    ret, frame = vid.read()

    if ret:
        results = model(frame, show=False)
        if names is None:
            names = results[0].names
        annotated_frame = results[0].plot()
        cv2.imshow("Deteccion de YOLO", annotated_frame)

        # Salir del vídeo cuando presionamos ESC
        if cv2.waitKey(1) & 0xFF == 27 or cv2.getWindowProperty("Deteccion de YOLO", cv2.WND_PROP_VISIBLE) < 1:
            break
    else:
        # El vídeo ya se terminó
        break

vid.release()
cv2.destroyAllWindows()

# Leemos las posibles clases
with open("classes.txt", "w") as f:
    f.write(str(names))

### Mostramos el funcionamiento de nuestro modelo entrenado

In [None]:
model = YOLO('best.pt')

vid = cv2.VideoCapture("C0142.MP4")

while vid.isOpened():
    ret, frame = vid.read()

    if ret:
        results = model(frame, show=False)
        annotated_frame = results[0].plot()
        cv2.imshow("Deteccion de YOLO", annotated_frame)

        # Salir del vídeo cuando presionamos ESC
        if cv2.waitKey(1) & 0xFF == 27 or cv2.getWindowProperty("Deteccion de YOLO", cv2.WND_PROP_VISIBLE) < 1:
            break
    else:
        # El vídeo ya se terminó
        break

vid.release()
cv2.destroyAllWindows()

In [23]:
import cv2
from ultralytics import YOLO
import easyocr
import numpy as np

model = YOLO('best.pt')
reader = easyocr.Reader(['es'])
vid = cv2.VideoCapture("C0142.MP4")

if not vid.isOpened():
    exit()

frame_count = 0
frame_skip = 3
last_plate = None
margin = 10  

while True:
    ret, frame = vid.read()
    if not ret:
        break

    frame_count += 1
    results = model(frame, verbose=False)
    detections = results[0].boxes

    for box in detections:
        x1, y1, x2, y2 = map(int, box.xyxy[0])

        cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 1)

        if frame_count % frame_skip == 0:
            x1m = max(0, x1 - margin)
            y1m = max(0, y1 - margin)
            x2m = min(frame.shape[1], x2 + margin)
            y2m = min(frame.shape[0], y2 + margin)
            placa_crop = frame[y1m:y2m, x1m:x2m]

            if placa_crop.size > 0:
                escala = 3
                placa_crop = cv2.resize(placa_crop, None, fx=escala, fy=escala, interpolation=cv2.INTER_CUBIC)

                gray = cv2.cvtColor(placa_crop, cv2.COLOR_BGR2GRAY)
                gray = cv2.equalizeHist(gray)
                gray = cv2.convertScaleAbs(gray, alpha=1.5, beta=0)

                ocr_result = reader.readtext(
                    gray,
                    allowlist='ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789',
                    detail=1
                )

                if len(ocr_result) > 0:
                    text = ocr_result[0][1].strip()
                    prob = ocr_result[0][2]

                    if len(text) >= 4 and prob > 0.5 and text != last_plate:
                        last_plate = text
                        timestamp = vid.get(cv2.CAP_PROP_POS_MSEC) / 1000
                        print(f"[{timestamp:.2f}s] Matrícula: {text} (Conf: {prob:.2f})")
                        cv2.putText(frame, f'{text}', (x1, y1 - 10),
                                    cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2, cv2.LINE_AA)

    cv2.imshow("Detección + OCR", frame)
    if cv2.waitKey(30) & 0xFF == 27:
        break

vid.release()
cv2.destroyAllWindows()





Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


[31.28s] Matrícula: 271LC (Conf: 0.55)


### Usamos el modelo pre-entrenado de YOLO y el nuestro en conjunto 
Utilizamos el modelo pre-entrenado para detectar personas y vehículos, posteriormente, cuando hayamos detectado un vehículo, se lo pasamos a nuestro modelo entrenado en matrículas para que le detecte la matrícula.

In [None]:
base_model = YOLO('yolo11n.pt')
our_model = YOLO('best.pt')
vid = cv2.VideoCapture("prueba_coches.mp4")

frame_width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = int(vid.get(cv2.CAP_PROP_FPS))

output_path = 'resultados.mp4'

fourcc = cv2.VideoWriter_fourcc(*'mp4v') 
out = cv2.VideoWriter(output_path, fourcc, fps, (frame_width, frame_height))

def ocr(placa_crop, last_plate=None):
    escala = 3
    placa_crop = cv2.resize(placa_crop, None, fx=escala, fy=escala, interpolation=cv2.INTER_CUBIC)

    gray = cv2.cvtColor(placa_crop, cv2.COLOR_BGR2GRAY)
    gray = cv2.equalizeHist(gray)
    gray = cv2.convertScaleAbs(gray, alpha=1.5, beta=0)

    ocr_result = reader.readtext(
        gray,
        allowlist='ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789',
        detail=1
    )

    text = ""

    if len(ocr_result) > 0:
        text = ocr_result[0][1].strip()
        prob = ocr_result[0][2]

        if len(text) >= 4 and prob > 0.5 and text != last_plate:
            last_plate = text
            timestamp = vid.get(cv2.CAP_PROP_POS_MSEC) / 1000
            print(f"[{timestamp:.2f}s] Matrícula: {text} (Conf: {prob:.2f})")
            cv2.putText(frame, f'{text}', (x1, y1 - 10),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2, cv2.LINE_AA)
    return text

reader = easyocr.Reader(['es'], gpu=False) 

classes = [0, 2, 3, 5, 7]    # Person, car, motorcycle, bus, truck

car_boxes = []
car_boxes_left_coords = []

save_csv = []

while vid.isOpened():
    ret, frame = vid.read()

    if ret:
        base_results = base_model.track(frame, persist=True, show=False, classes=classes)
        plates_result = None
        annotated_frame = base_results[0].plot()
        boxes = list()
        for result in base_results:
            boxes += result.boxes
        for box in boxes:
            bounding_box = box.xyxy.tolist()
            name = result[0].names[box.cls.int().item()]
            conf = box.conf
            track_id = str(int(box.id[0].tolist()))
            x1, y1, x2, y2 = [int(item) for item in bounding_box[0]]
            plate, plate_conf, px1, py1, px2, py2, plate_text = "", "", "", "", "", "", ""
            if name != "person":
                vehicle_box = frame[y1:y2, x1:x2]
                plates_result = our_model(vehicle_box, show=False)
                if len(plates_result[0].boxes) > 0:
                    plate_conf = plates_result[0].boxes.conf
                    plate_detection = (plates_result[0].boxes.xyxy).tolist()
                    px1, py1, px2, py2 = [int(item) for item in plate_detection[0]]
                    plate = vehicle_box[py1:py2, px1:px2]
                    real_x1 = px1+x1
                    real_y1 = py1+y1
                    real_x2 = px2+x1
                    real_y2 = py2+y1
                    plate_text = ocr(plate)
                    cv2.rectangle(annotated_frame, (real_x1, real_y1), (real_x2, real_y2), (0, 255, 0), 1)
            save_csv.append([frame, name, conf, track_id, x1, y1, x2, y2, plate, plate_conf, px1, py1, px2, py2, plate_text])
                        
        out.write(annotated_frame)
    else:
        # El vídeo ya se terminó
        break

vid.release()
out.release()
cv2.destroyAllWindows()

# fotograma, tipo_objeto, confianza, identificador_tracking, x1, y1, x2, y2, matrícula_en_su_caso, confianza, mx1,my1,mx2,my2, texto_matricula
print(save_csv)

# Mostrar el total de cada clase
count_classes = {"person": 0, "car": 0, "motorcycle": 0, "bus": 0, "truck": 0}
for row in save_csv:
    for c in count_classes.keys(): 
        if c in row[1]:
            count_classes[c] += 1
print(count_classes)

Opción 2 (por si hace falta)

In [None]:

base_results = base_model(frame, show=False, classes=classes)
plates_result = None
annotated_frame = base_results[0].plot()
for box in result.boxes:
    if result.names[int(box.cls)] != "person":
        x1, y1, x2, y2 = box.xyxy[0].int().tolist()
        car_boxes.append(frame[y1:y2, x1:x2])
        car_boxes_left_coords.append((x1, y1))
if car_boxes:
    plates_results = our_model(car_boxes, show=False)
    for i, plates_result in enumerate(plates_results):
        car_x, car_y = car_boxes_left_coords[i]
        if len(plates_result.boxes) > 0:
            px1, py1, px2, py2 = plates_result.boxes[0].xyxy[0].int().tolist()
            real_x1 = px1 + car_x
            real_y1 = py1 + car_y
            real_x2 = px2 + car_x
            real_y2 = py2 + car_y
            cv2.rectangle(annotated_frame, (real_x1, real_y1), (real_x2, real_y2), (0, 255, 0), 3)


    

Pruebas OCR

In [None]:
import cv2
import numpy as np
import time
from ultralytics import YOLO
import easyocr
from IPython.display import Video, display
import os


model_path = "best.pt"
video_path = "C0142.MP4"
crop_dir   = "crops/"  
os.makedirs(crop_dir, exist_ok=True)

model = YOLO(model_path)
reader = easyocr.Reader(['en'], gpu=True)

cap = cv2.VideoCapture(video_path)
if not cap.isOpened():
    raise Exception(f"No se pudo abrir el vídeo: {video_path}")

width  = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps_in = cap.get(cv2.CAP_PROP_FPS) or 20.0

fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter('resultado.mp4', fourcc, fps_in, (width, height))

margin = 10
frame_count = 0
last_texts = set()

def preprocess_for_ocr(img_crop, escala=4):

    img = cv2.resize(img_crop, None, fx=escala, fy=escala, interpolation=cv2.INTER_CUBIC)
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

    gray_blur = cv2.GaussianBlur(gray, (3, 3), 0)

    thresh = cv2.adaptiveThreshold(
        gray_blur,
        255,
        cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
        cv2.THRESH_BINARY_INV,
        13,
        2
    )

    return thresh

while True:
    ret, frame = cap.read()
    if not ret:
        break
    frame_count += 1

    results = model(frame, verbose=False)
    detections = results[0].boxes

    for box in detections:
        x1, y1, x2, y2 = map(int, box.xyxy[0])
        conf = float(box.conf[0])
        if conf < 0.6:
            continue

        w, h = x2 - x1, y2 - y1
        extra = int(max(w, h) * 0.15)
        x1m = max(0, x1 - margin - extra)
        y1m = max(0, y1 - margin - extra)
        x2m = min(frame.shape[1], x2 + margin + extra)
        y2m = min(frame.shape[0], y2 + margin + extra)

        placa_crop = frame[y1m:y2m, x1m:x2m]

        if placa_crop.size > 0:

            gray = preprocess_for_ocr(placa_crop)

            ocr_result = reader.readtext(
                gray,
                allowlist='ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789',
                detail=1,
                text_threshold=0.4
            )

            if len(ocr_result) > 0:
                for (bbox, text, prob) in ocr_result:
                    text_clean = text.strip().replace(" ", "")
                    if prob > 0.7 and len(text_clean) >= 7 and text_clean not in last_texts:
                        last_texts.add(text_clean)
                        timestamp = cap.get(cv2.CAP_PROP_POS_MSEC) / 1000

                        print(f"[{timestamp:.2f}s] Matrícula detectada: {text_clean} (Conf: {prob:.2f})")

                        cv2.putText(frame, text_clean, (x1, y1 - 10),
                                    cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2, cv2.LINE_AA)

                        crop_filename = os.path.join(crop_dir, f"{text_clean}_{frame_count}.jpg")
                        cv2.imwrite(crop_filename, gray)
                        print(f"Guardada imagen: {crop_filename}")
            else:
                pass


        cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)

    out.write(frame)

cap.release()
out.release()
print(f"Procesamiento completado. Total frames: {frame_count}")
display(Video('resultado.mp4', embed=True))