Paquetes necesarios

In [2]:
import csv
import math
import os

import cv2
import easyocr
import numpy as np
import torch
from IPython.display import Video, display
from matplotlib import pyplot as plt
from PIL import Image
from transformers import AutoProcessor, AutoModelForImageTextToText
from ultralytics import YOLO

  from .autonotebook import tqdm as notebook_tqdm


Extraemos las clases del modelo YOLO 11

In [None]:
model = YOLO('yolo11n.pt')

vid = cv2.VideoCapture("C0142.MP4")

names = None

while vid.isOpened():
    ret, frame = vid.read()

    if ret:
        results = model(frame, show=False)
        if names is None:
            names = results[0].names
        annotated_frame = results[0].plot()
        cv2.imshow("Deteccion de YOLO", annotated_frame)

        # Salir del vídeo cuando presionamos ESC
        if cv2.waitKey(1) & 0xFF == 27 or cv2.getWindowProperty("Deteccion de YOLO", cv2.WND_PROP_VISIBLE) < 1:
            break
    else:
        # El vídeo ya se terminó
        break

vid.release()
cv2.destroyAllWindows()

# Leemos las posibles clases
with open("classes.txt", "w") as f:
    f.write(str(names))

### Mostramos el funcionamiento de nuestro modelo entrenado

In [None]:
model = YOLO('best.pt')

vid = cv2.VideoCapture("C0142.MP4")

while vid.isOpened():
    ret, frame = vid.read()

    if ret:
        results = model(frame, show=False)
        annotated_frame = results[0].plot()
        cv2.imshow("Deteccion de YOLO", annotated_frame)

        # Salir del vídeo cuando presionamos ESC
        if cv2.waitKey(1) & 0xFF == 27 or cv2.getWindowProperty("Deteccion de YOLO", cv2.WND_PROP_VISIBLE) < 1:
            break
    else:
        # El vídeo ya se terminó
        break

vid.release()
cv2.destroyAllWindows()

### Usamos el modelo pre-entrenado de YOLO y el nuestro en conjunto 
Utilizamos el modelo pre-entrenado para detectar personas y vehículos, posteriormente, cuando hayamos detectado un vehículo, se lo pasamos a nuestro modelo entrenado en matrículas para que le detecte la matrícula.

Código para las detecciones de OCR

In [25]:
# Código necesario para el VLM
from transformers import AutoProcessor, AutoModelForImageTextToText
import torch
device = "cpu"  # or "cpu"

processor = AutoProcessor.from_pretrained("HuggingFaceTB/SmolVLM-Instruct")
model = AutoModelForImageTextToText.from_pretrained("HuggingFaceTB/SmolVLM-Instruct",
                                                dtype=torch.bfloat16,
                                                _attn_implementation="flash_attention_2" if device == "cuda" else "eager").to(device)

In [26]:
import re

# Solo se está usando en el Tesseract (Se elimina?)
def preprocess_for_ocr(img_crop, escala=4):

    img = cv2.resize(img_crop, None, fx=escala, fy=escala, interpolation=cv2.INTER_CUBIC)
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

    gray_blur = cv2.GaussianBlur(gray, (3, 3), 0)

    thresh = cv2.adaptiveThreshold(
        gray_blur,
        255,
        cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
        cv2.THRESH_BINARY_INV,
        13,
        2
    )

    return thresh

reader = easyocr.Reader(['es'], gpu=False) 

def ocr_easy(placa_crop, frame, x1, y1, last_plate=None):
    escala = 3
    placa_crop = cv2.resize(placa_crop, None, fx=escala, fy=escala, interpolation=cv2.INTER_CUBIC)

    gray = cv2.cvtColor(placa_crop, cv2.COLOR_BGR2GRAY)
    gray = cv2.equalizeHist(gray)
    gray = cv2.convertScaleAbs(gray, alpha=1.5, beta=0)

    ocr_result = reader.readtext(
        gray,
        allowlist='ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789',
        detail=1
    )

    text = ""

    if len(ocr_result) > 0:
        text = ocr_result[0][1].strip()
        prob = ocr_result[0][2]

        if len(text) >= 4 and prob > 0.5 and text != last_plate:
            last_plate = text
            timestamp = vid.get(cv2.CAP_PROP_POS_MSEC) / 1000
            plate_pattern = re.compile("^[0-9]{4}[BCDFGHJKLMNPRSTVWXYZ]{3}$")
            if plate_pattern.match(text.strip()):
                print(f"[{timestamp:.2f}s] Matrícula: {text} (Conf: {prob:.2f})")
                cv2.putText(frame, f'{text}', (x1, y1 - 10),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2, cv2.LINE_AA)
            else:
                return
   
        return text

def ocr_vlm(crop, frame, x1, y1, x2, y2):
    plate_img = Image.fromarray(cv2.cvtColor(crop, cv2.COLOR_BGR2RGB))

    messages = [
        {
            "role": "user",
            "content": [
                {"type": "image"},
                {"type": "text", "text": "Read the text on this license plate."}
            ]
        },
    ]

    prompt = processor.apply_chat_template(messages, add_generation_prompt=True)
    inputs = processor(text=prompt, images=[plate_img], return_tensors="pt").to(device)

    with torch.no_grad():
        generated_ids = model.generate(**inputs, max_new_tokens=10)
        generated_texts = processor.batch_decode(generated_ids, skip_special_tokens=True)
        plate_text = generated_texts[0].strip()

        if "Assistant: " in plate_text:
            raw_text = plate_text.split("Assistant: ")[1]

    plate_pattern = re.compile("^[0-9]{4}[BCDFGHJKLMNPRSTVWXYZ]{3}$")
    if plate_pattern.match(plate_text.strip()):
        cv2.putText(frame, raw_text, (x1, max(30, y1 - 10)),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2)
    else:
        return
    
    return raw_text

# NO SE USA (Lo eliminamos cuando esté confirmado que usamos los otros dos)
def ocr_tesseract(placa_crop, frame_count, cap, crop_dir="crops/", last_texts=set()):
    if placa_crop.size > 0:
        gray = preprocess_for_ocr(placa_crop)

        # Usando Tesseract
        ocr_result = pytesseract.image_to_data(
            gray,
            output_type=Output.DICT,
            config='--psm 7 -c tessedit_char_whitelist=ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789'
        )

        n_boxes = len(ocr_result['text'])
        for i in range(n_boxes):
            text = ocr_result['text'][i].strip().replace(" ", "")
            conf = float(ocr_result['conf'][i])
            if len(text) >= 7 and conf > 60 and text not in last_texts:
                last_texts.add(text)
                timestamp = cap.get(cv2.CAP_PROP_POS_MSEC) / 1000
                print(f"[{timestamp:.2f}s] Matrícula detectada: {text} (Conf: {conf:.2f})")

                cv2.putText(frame, text, (x1, y1 - 10),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2, cv2.LINE_AA)

                crop_filename = os.path.join(crop_dir, f"{text}_{frame_count}.jpg")
                cv2.imwrite(crop_filename, gray)
                print(f"Guardada imagen: {crop_filename}")
    return text

Using CPU. Note: This module is much faster with a GPU.


Código principal

In [None]:

BASE_MODEL_PATH = 'yolo11n.pt'
OUR_MODEL_PATH = 'best.pt'

VIDEO_PATH = "Copy of C0142_1.mp4"
base_model = YOLO(BASE_MODEL_PATH)
our_model = YOLO(OUR_MODEL_PATH)
vid = cv2.VideoCapture(VIDEO_PATH)

frame_width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = int(vid.get(cv2.CAP_PROP_FPS))

output_path = 'resultados.mp4'

fourcc = cv2.VideoWriter_fourcc(*'mp4v') 
out = cv2.VideoWriter(output_path, fourcc, fps, (frame_width, frame_height))

def data_to_csv(registros):
    columnas = [
        "fotograma", "tipo_objeto", "confianza", "id_tracking",
        "x1", "y1", "x2", "y2",
        "matricula_detectada", "conf_ocr",
        "mx1", "my1", "mx2", "my2",
        "texto_matricula"
    ]

    with open("resultados.csv", "w", newline="", encoding="utf-8") as f:
        writer = csv.writer(f, delimiter=';')
        writer.writerow(columnas)    
        writer.writerows(registros)  

    print("Archivo 'resultados.csv' creado correctamente.")


classes = [0, 2, 3, 5, 7]    # Person, car, motorcycle, bus, truck

car_boxes = []
car_boxes_left_coords = []

track_ids = set()
count_classes = {"person": 0, "car": 0, "motorcycle": 0, "bus": 0, "truck": 0}

save_csv = []
frame_count = 0

while vid.isOpened():
    ret, frame = vid.read()
    frame_count += 1

    if ret:
        base_results = base_model.track(frame, persist=True, show=False, classes=classes)
        plates_result = None
        annotated_frame = base_results[0].plot()
        boxes = list()

        # Mostramos un recuadro arriba a la izquierda que muestre las matrículas que se vayan detectando
        text_box_w = int(frame.shape[1]*0.25)
        text_box_h = int(frame.shape[0]*0.09)
        
        cv2.rectangle(annotated_frame, (0, 0), (text_box_w, text_box_h), (0, 0, 0), -1)

        last_plate = ""
        show_plate_text = ""
        
        for result in base_results:
            boxes += result.boxes
        for box in boxes:
            bounding_box = box.xyxy.tolist()
            name = result[0].names[box.cls.int().item()]
            conf = box.conf
            track_id = str(int(box.id[0].tolist()))
            if track_id not in track_ids:
                track_ids.add(track_id)
                count_classes[name] += 1
            x1, y1, x2, y2 = [int(item) for item in bounding_box[0]]
            plate, plate_conf, px1, py1, px2, py2, plate_text = "", "", "", "", "", "", ""
            if name != "person":
                vehicle_box = frame[y1:y2, x1:x2]
                plates_result = our_model(vehicle_box, show=False)
                if len(plates_result[0].boxes) > 0:
                    plate_conf = plates_result[0].boxes.conf
                    plate_detection = (plates_result[0].boxes.xyxy).tolist()
                    px1, py1, px2, py2 = [int(item) for item in plate_detection[0]]
                    plate = vehicle_box[py1:py2, px1:px2]
                    real_x1 = px1+x1
                    real_y1 = py1+y1
                    real_x2 = px2+x1
                    real_y2 = py2+y1
                    cv2.rectangle(annotated_frame, (real_x1, real_y1), (real_x2, real_y2), (0, 255, 0), 2)
                    plate_text = ocr_easy(plate, frame, real_x1, real_y1)
                    plate_text = plate_text.strip()
                    if plate_text is not None:
                        show_plate_text = plate_text
            save_csv.append(["frame", name, conf, track_id, x1, y1, x2, y2, "plate", plate_conf, px1, py1, px2, py2, plate_text])
            if show_plate_text != last_plate:
                font = cv2.FONT_HERSHEY_SIMPLEX
                (text_width, text_height), baseline = cv2.getTextSize(f"Matrícula detectada: {show_plate_text}", font, 0.8, 2)
                text_x = (text_box_w - text_width) // 2
                text_y = (text_box_h + text_height) // 2 - baseline
                cv2.putText(annotated_frame, show_plate_text, (text_x, text_y), font, 0.8, (255, 255, 255), 2)
                last_plate = show_plate_text
            
        out.write(annotated_frame)
        """cv2.imshow("Deteccion de YOLO", annotated_frame)
        
        if cv2.waitKey(1) & 0xFF == 27 or cv2.getWindowProperty("Deteccion de YOLO", cv2.WND_PROP_VISIBLE) < 1:
            break"""
    else:
        # El vídeo ya se terminó
        break

vid.release()
out.release()
cv2.destroyAllWindows()

print(count_classes)


0: 384x640 2 persons, 10 cars, 74.7ms
Speed: 45.1ms preprocess, 74.7ms inference, 1.6ms postprocess per image at shape (1, 3, 384, 640)

0: 416x640 1 matricula, 134.9ms
Speed: 4.1ms preprocess, 134.9ms inference, 1.2ms postprocess per image at shape (1, 3, 416, 640)


KeyboardInterrupt: 

### Pruebas OCR

#### Easy

In [None]:
import cv2
import numpy as np
import time
from ultralytics import YOLO
import easyocr
from IPython.display import Video, display
import os


model_path = "best.pt"
video_path = "C0142.MP4"
crop_dir   = "crops/"  
os.makedirs(crop_dir, exist_ok=True)

model = YOLO(model_path)
reader = easyocr.Reader(['en'], gpu=True)

cap = cv2.VideoCapture(video_path)
if not cap.isOpened():
    raise Exception(f"No se pudo abrir el vídeo: {video_path}")

width  = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps_in = cap.get(cv2.CAP_PROP_FPS) or 20.0

fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter('resultado.mp4', fourcc, fps_in, (width, height))

margin = 10
frame_count = 0
last_texts = set()

def preprocess_for_ocr(img_crop, escala=4):

    img = cv2.resize(img_crop, None, fx=escala, fy=escala, interpolation=cv2.INTER_CUBIC)
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

    gray_blur = cv2.GaussianBlur(gray, (3, 3), 0)

    thresh = cv2.adaptiveThreshold(
        gray_blur,
        255,
        cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
        cv2.THRESH_BINARY_INV,
        13,
        2
    )

    return thresh

while True:
    ret, frame = cap.read()
    if not ret:
        break
    frame_count += 1

    results = model(frame, verbose=False)
    detections = results[0].boxes

    for box in detections:
        x1, y1, x2, y2 = map(int, box.xyxy[0])
        conf = float(box.conf[0])
        if conf < 0.2:
            continue

        w, h = x2 - x1, y2 - y1
        extra = int(max(w, h) * 0.15)
        x1m = max(0, x1 - margin - extra)
        y1m = max(0, y1 - margin - extra)
        x2m = min(frame.shape[1], x2 + margin + extra)
        y2m = min(frame.shape[0], y2 + margin + extra)

        placa_crop = frame[y1m:y2m, x1m:x2m]

        if placa_crop.size > 0:

            gray = preprocess_for_ocr(placa_crop)

            ocr_result = reader.readtext(
                gray,
                allowlist='ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789',
                detail=1,
                text_threshold=0.2
            )

            if len(ocr_result) > 0:
                for (bbox, text, prob) in ocr_result:
                    text_clean = text.strip().replace(" ", "")
                    if prob > 0.7 and len(text_clean) >= 7 and text_clean not in last_texts:
                        last_texts.add(text_clean)
                        timestamp = cap.get(cv2.CAP_PROP_POS_MSEC) / 1000

                        print(f"[{timestamp:.2f}s] Matrícula detectada: {text_clean} (Conf: {prob:.2f})")

                        cv2.putText(frame, text_clean, (x1, y1 - 10),
                                    cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2, cv2.LINE_AA)

                        crop_filename = os.path.join(crop_dir, f"{text_clean}_{frame_count}.jpg")
                        cv2.imwrite(crop_filename, gray)
                        print(f"Guardada imagen: {crop_filename}")
            else:
                pass


        cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)

    out.write(frame)

cap.release()
out.release()
print(f"Procesamiento completado. Total frames: {frame_count}")
display(Video('resultado.mp4', embed=True))

#### Tesseract

In [None]:
import cv2
import numpy as np
from ultralytics import YOLO
import pytesseract
from pytesseract import Output
from IPython.display import Video, display
import os

# Configura la ruta si Tesseract no está en PATH
pytesseract.pytesseract.tesseract_cmd = r'C:/Program Files/Tesseract-OCR/tesseract'

model_path = "best.pt"
video_path = "C0142.MP4"
crop_dir   = "crops/"  
os.makedirs(crop_dir, exist_ok=True)

model = YOLO(model_path)

cap = cv2.VideoCapture(video_path)
if not cap.isOpened():
    raise Exception(f"No se pudo abrir el vídeo: {video_path}")

width  = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps_in = cap.get(cv2.CAP_PROP_FPS) or 20.0

fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter('resultado_tesseract.mp4', fourcc, fps_in, (width, height))

margin = 10
frame_count = 0
last_texts = set()

def preprocess_for_ocr(img_crop, escala=4):
    # Escala y convierte a gris
    img = cv2.resize(img_crop, None, fx=escala, fy=escala, interpolation=cv2.INTER_CUBIC)
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    gray_blur = cv2.GaussianBlur(gray, (3, 3), 0)
    # Umbral adaptativo
    thresh = cv2.adaptiveThreshold(
        gray_blur,
        255,
        cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
        cv2.THRESH_BINARY_INV,
        13,
        2
    )
    return thresh

while True:
    ret, frame = cap.read()
    if not ret:
        break
    frame_count += 1

    results = model(frame, verbose=False)
    detections = results[0].boxes

    for box in detections:
        x1, y1, x2, y2 = map(int, box.xyxy[0])
        conf = float(box.conf[0])
        if conf < 0.2:
            continue

        w, h = x2 - x1, y2 - y1
        extra = int(max(w, h) * 0.15)
        x1m = max(0, x1 - margin - extra)
        y1m = max(0, y1 - margin - extra)
        x2m = min(frame.shape[1], x2 + margin + extra)
        y2m = min(frame.shape[0], y2 + margin + extra)

        placa_crop = frame[y1m:y2m, x1m:x2m]

        if placa_crop.size > 0:
            gray = preprocess_for_ocr(placa_crop)

            # Usando Tesseract
            ocr_result = pytesseract.image_to_data(
                gray,
                output_type=Output.DICT,
                config='--psm 7 -c tessedit_char_whitelist=ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789'
            )

            n_boxes = len(ocr_result['text'])
            for i in range(n_boxes):
                text = ocr_result['text'][i].strip().replace(" ", "")
                conf = float(ocr_result['conf'][i])
                if len(text) >= 7 and conf > 60 and text not in last_texts:
                    last_texts.add(text)
                    timestamp = cap.get(cv2.CAP_PROP_POS_MSEC) / 1000
                    print(f"[{timestamp:.2f}s] Matrícula detectada: {text} (Conf: {conf:.2f})")

                    cv2.putText(frame, text, (x1, y1 - 10),
                                cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2, cv2.LINE_AA)

                    crop_filename = os.path.join(crop_dir, f"{text}_{frame_count}.jpg")
                    cv2.imwrite(crop_filename, gray)
                    print(f"Guardada imagen: {crop_filename}")

        cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)

    out.write(frame)

cap.release()
out.release()
print(f"Procesamiento completado. Total frames: {frame_count}")
display(Video('resultado_tesseract.mp4', embed=True))

In [11]:
import csv

registros = [
    [1, "auto", 0.92, 3, 120, 200, 360, 480, True, 0.88, 135, 215, 345, 465, "ABC1234"],
    [1, "moto", 0.85, 5, 400, 220, 500, 380, False, 0.00, 0, 0, 0, 0, ""],
    [2, "auto", 0.95, 3, 125, 205, 365, 485, True, 0.90, 140, 220, 350, 470, "ABC1234"],
    [2, "camioneta", 0.88, 7, 600, 250, 900, 550, True, 0.75, 620, 270, 880, 530, "XYZ9876"],
    [3, "auto", 0.93, 3, 130, 210, 370, 490, True, 0.85, 145, 225, 355, 475, "ABC1234"],
    [3, "moto", 0.80, 5, 405, 225, 505, 385, False, 0.00, 0, 0, 0, 0, ""]
]
def data_to_csv(registros):
    columnas = [
        "fotograma", "tipo_objeto", "confianza", "id_tracking",
        "x1", "y1", "x2", "y2",
        "matricula_detectada", "conf_ocr",
        "mx1", "my1", "mx2", "my2",
        "texto_matricula"
    ]

    with open("resultados.csv", "w", newline="", encoding="utf-8") as f:
        writer = csv.writer(f, delimiter=';')
        writer.writerow(columnas)    
        writer.writerows(registros)  

    print("Archivo 'resultados.csv' creado correctamente.")

data_to_csv(registros)

Archivo 'resultados.csv' creado correctamente.


### Comparativa entre modelos

In [29]:
import os
import time
import cv2
from tqdm import tqdm

PATH = "matriculas_comparativa"
OUR_MODEL_PATH = 'best.pt'

our_model = YOLO(OUR_MODEL_PATH)

correct_vlm = 0
correct_easy = 0
time_vlm = 0.0
time_easy = 0.0

image_files = [
    f for f in os.listdir(PATH)
    if f.lower().endswith((".jpg", ".jpeg"))
]


for img_name in tqdm(image_files, desc="Procesamiento de matrículas"):
    img_path = os.path.join(PATH, img_name)
    label = os.path.splitext(img_name)[0].upper()

    frame = cv2.imread(img_path)
    if frame is None:
        continue

    result = our_model(frame, show=False)
    plate = result[0].boxes.xyxy.tolist()
    x1, y1, x2, y2 = [int(item) for item in plate[0]]

    start = time.time()
    try:
        text_vlm = ocr_vlm(plate, frame, x1, y1, x2, y2)
    except Exception as e:
        text_vlm = ""
    time_vlm += time.time() - start
    if text_vlm and text_vlm.strip().replace(" ", "").upper() == label:
        correct_vlm += 1

    start = time.time()
    try:
        text_easy = ocr_easy(frame, frame, x1, y1)
    except Exception as e:
        print(f"[EasyOCR error on {img_name}]: {e}")
        text_easy = ""
    time_easy += time.time() - start
    if text_easy and text_easy.strip().replace(" ", "").upper() == label:
        correct_easy += 1

total = len(image_files)
avg_time_vlm = time_vlm / total if total > 0 else 0
avg_time_easy = time_easy / total if total > 0 else 0

print("\n=== COMPARATIVA DE MODELOS ===")
print(f"Número de imágenes probadas: {total}")
print(f"SmolVLM (aciertos): {correct_vlm} / {total} ({correct_vlm / total:.2%})")
print(f"EasyOCR (aciertos): {correct_easy} / {total} ({correct_easy / total:.2%})")

print("\n=== TIEMPO MEDIO DE INFERENCIA ===")
print(f"SmolVLM: {avg_time_vlm:.3f} s/img")
print(f"EasyOCR: {avg_time_easy:.3f} s/img")


Procesamiento de matrículas:   0%|          | 0/50 [00:00<?, ?it/s]


0: 640x384 1 matricula, 300.0ms
Speed: 10.5ms preprocess, 300.0ms inference, 1.8ms postprocess per image at shape (1, 3, 640, 384)


Procesamiento de matrículas:   2%|▏         | 1/50 [00:22<18:09, 22.23s/it]


0: 640x384 1 matricula, 162.2ms
Speed: 6.0ms preprocess, 162.2ms inference, 14.9ms postprocess per image at shape (1, 3, 640, 384)


Procesamiento de matrículas:   4%|▍         | 2/50 [00:40<16:03, 20.07s/it]


0: 640x448 1 matricula, 340.0ms
Speed: 7.1ms preprocess, 340.0ms inference, 2.4ms postprocess per image at shape (1, 3, 640, 448)


Procesamiento de matrículas:   6%|▌         | 3/50 [00:58<14:56, 19.07s/it]


0: 640x320 1 matricula, 113.5ms
Speed: 6.8ms preprocess, 113.5ms inference, 1.5ms postprocess per image at shape (1, 3, 640, 320)


Procesamiento de matrículas:   8%|▊         | 4/50 [01:14<13:34, 17.70s/it]


0: 640x480 1 matricula, 111.0ms
Speed: 48.4ms preprocess, 111.0ms inference, 1.5ms postprocess per image at shape (1, 3, 640, 480)


Procesamiento de matrículas:  10%|█         | 5/50 [01:43<16:25, 21.89s/it]


0: 480x640 1 matricula, 518.2ms
Speed: 14.9ms preprocess, 518.2ms inference, 2.6ms postprocess per image at shape (1, 3, 480, 640)


Procesamiento de matrículas:  12%|█▏        | 6/50 [02:07<16:39, 22.71s/it]

[0.00s] Matrícula: 0416MLX (Conf: 0.72)

0: 640x480 1 matricula, 91.0ms
Speed: 71.0ms preprocess, 91.0ms inference, 1.8ms postprocess per image at shape (1, 3, 640, 480)


Procesamiento de matrículas:  14%|█▍        | 7/50 [02:27<15:26, 21.54s/it]


0: 640x480 1 matricula, 71.1ms
Speed: 3.7ms preprocess, 71.1ms inference, 1.1ms postprocess per image at shape (1, 3, 640, 480)


Procesamiento de matrículas:  16%|█▌        | 8/50 [02:44<14:12, 20.29s/it]

[0.00s] Matrícula: 0476HNN (Conf: 0.89)

0: 640x480 1 matricula, 132.4ms
Speed: 69.0ms preprocess, 132.4ms inference, 1.9ms postprocess per image at shape (1, 3, 640, 480)


Procesamiento de matrículas:  18%|█▊        | 9/50 [03:02<13:20, 19.52s/it]


0: 640x384 1 matricula, 87.7ms
Speed: 2.9ms preprocess, 87.7ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 384)


Procesamiento de matrículas:  20%|██        | 10/50 [03:19<12:30, 18.77s/it]


0: 640x480 1 matricula, 80.5ms
Speed: 46.0ms preprocess, 80.5ms inference, 1.5ms postprocess per image at shape (1, 3, 640, 480)


Procesamiento de matrículas:  22%|██▏       | 11/50 [03:42<13:02, 20.07s/it]


0: 640x480 2 matriculas, 131.0ms
Speed: 6.7ms preprocess, 131.0ms inference, 4.2ms postprocess per image at shape (1, 3, 640, 480)


Procesamiento de matrículas:  24%|██▍       | 12/50 [04:09<14:04, 22.23s/it]


0: 480x640 1 matricula, 487.1ms
Speed: 6.4ms preprocess, 487.1ms inference, 15.5ms postprocess per image at shape (1, 3, 480, 640)


Procesamiento de matrículas:  26%|██▌       | 13/50 [04:34<14:06, 22.87s/it]


0: 480x640 1 matricula, 170.0ms
Speed: 10.2ms preprocess, 170.0ms inference, 15.3ms postprocess per image at shape (1, 3, 480, 640)


Procesamiento de matrículas:  28%|██▊       | 14/50 [04:55<13:26, 22.41s/it]


0: 640x480 1 matricula, 188.8ms
Speed: 2.8ms preprocess, 188.8ms inference, 1.3ms postprocess per image at shape (1, 3, 640, 480)


Procesamiento de matrículas:  30%|███       | 15/50 [05:15<12:38, 21.68s/it]


0: 640x384 2 matriculas, 148.2ms
Speed: 9.1ms preprocess, 148.2ms inference, 2.2ms postprocess per image at shape (1, 3, 640, 384)


Procesamiento de matrículas:  32%|███▏      | 16/50 [05:34<11:55, 21.04s/it]


0: 640x480 1 matricula, 221.4ms
Speed: 3.1ms preprocess, 221.4ms inference, 1.5ms postprocess per image at shape (1, 3, 640, 480)


Procesamiento de matrículas:  34%|███▍      | 17/50 [05:56<11:39, 21.19s/it]


0: 480x640 1 matricula, 128.9ms
Speed: 6.0ms preprocess, 128.9ms inference, 1.4ms postprocess per image at shape (1, 3, 480, 640)


Procesamiento de matrículas:  36%|███▌      | 18/50 [06:17<11:20, 21.25s/it]


0: 640x384 1 matricula, 114.6ms
Speed: 70.0ms preprocess, 114.6ms inference, 1.3ms postprocess per image at shape (1, 3, 640, 384)


Procesamiento de matrículas:  38%|███▊      | 19/50 [06:38<10:50, 20.97s/it]


0: 640x480 1 matricula, 157.1ms
Speed: 4.6ms preprocess, 157.1ms inference, 2.3ms postprocess per image at shape (1, 3, 640, 480)


Procesamiento de matrículas:  40%|████      | 20/50 [07:12<12:27, 24.92s/it]


0: 640x480 1 matricula, 309.6ms
Speed: 29.9ms preprocess, 309.6ms inference, 14.3ms postprocess per image at shape (1, 3, 640, 480)


Procesamiento de matrículas:  42%|████▏     | 21/50 [07:38<12:17, 25.43s/it]


0: 640x384 1 matricula, 264.3ms
Speed: 7.5ms preprocess, 264.3ms inference, 5.5ms postprocess per image at shape (1, 3, 640, 384)


Procesamiento de matrículas:  44%|████▍     | 22/50 [07:59<11:14, 24.09s/it]


0: 640x480 1 matricula, 502.9ms
Speed: 12.4ms preprocess, 502.9ms inference, 3.1ms postprocess per image at shape (1, 3, 640, 480)


Procesamiento de matrículas:  46%|████▌     | 23/50 [08:30<11:45, 26.14s/it]


0: 640x480 1 matricula, 119.6ms
Speed: 53.7ms preprocess, 119.6ms inference, 1.8ms postprocess per image at shape (1, 3, 640, 480)


Procesamiento de matrículas:  48%|████▊     | 24/50 [08:54<10:58, 25.33s/it]


0: 640x480 1 matricula, 168.0ms
Speed: 5.8ms preprocess, 168.0ms inference, 3.7ms postprocess per image at shape (1, 3, 640, 480)


Procesamiento de matrículas:  50%|█████     | 25/50 [09:17<10:20, 24.81s/it]


0: 640x384 1 matricula, 219.9ms
Speed: 5.0ms preprocess, 219.9ms inference, 1.4ms postprocess per image at shape (1, 3, 640, 384)


Procesamiento de matrículas:  52%|█████▏    | 26/50 [09:40<09:38, 24.09s/it]


0: 640x384 1 matricula, 90.5ms
Speed: 3.2ms preprocess, 90.5ms inference, 1.6ms postprocess per image at shape (1, 3, 640, 384)


Procesamiento de matrículas:  54%|█████▍    | 27/50 [09:55<08:12, 21.40s/it]


0: 640x384 1 matricula, 73.0ms
Speed: 50.5ms preprocess, 73.0ms inference, 1.2ms postprocess per image at shape (1, 3, 640, 384)


Procesamiento de matrículas:  56%|█████▌    | 28/50 [10:15<07:44, 21.12s/it]


0: 480x640 1 matricula, 289.1ms
Speed: 7.9ms preprocess, 289.1ms inference, 16.5ms postprocess per image at shape (1, 3, 480, 640)


Procesamiento de matrículas:  58%|█████▊    | 29/50 [10:43<08:05, 23.11s/it]


0: 640x480 1 matricula, 887.1ms
Speed: 30.8ms preprocess, 887.1ms inference, 72.4ms postprocess per image at shape (1, 3, 640, 480)


Procesamiento de matrículas:  60%|██████    | 30/50 [11:09<07:57, 23.87s/it]


0: 640x384 1 matricula, 116.4ms
Speed: 8.5ms preprocess, 116.4ms inference, 1.3ms postprocess per image at shape (1, 3, 640, 384)


Procesamiento de matrículas:  62%|██████▏   | 31/50 [11:33<07:36, 24.02s/it]


0: 640x480 1 matricula, 226.1ms
Speed: 4.1ms preprocess, 226.1ms inference, 11.5ms postprocess per image at shape (1, 3, 640, 480)


Procesamiento de matrículas:  64%|██████▍   | 32/50 [11:57<07:12, 24.04s/it]


0: 640x480 1 matricula, 104.1ms
Speed: 5.5ms preprocess, 104.1ms inference, 2.3ms postprocess per image at shape (1, 3, 640, 480)


Procesamiento de matrículas:  66%|██████▌   | 33/50 [12:17<06:28, 22.84s/it]


0: 480x640 1 matricula, 128.2ms
Speed: 67.5ms preprocess, 128.2ms inference, 1.9ms postprocess per image at shape (1, 3, 480, 640)


Procesamiento de matrículas:  68%|██████▊   | 34/50 [12:37<05:49, 21.84s/it]


0: 640x384 1 matricula, 138.5ms
Speed: 7.5ms preprocess, 138.5ms inference, 1.4ms postprocess per image at shape (1, 3, 640, 384)


Procesamiento de matrículas:  70%|███████   | 35/50 [12:58<05:25, 21.69s/it]

[0.00s] Matrícula: 2753LBJ (Conf: 0.78)

0: 640x480 1 matricula, 209.1ms
Speed: 60.0ms preprocess, 209.1ms inference, 1.5ms postprocess per image at shape (1, 3, 640, 480)


Procesamiento de matrículas:  72%|███████▏  | 36/50 [13:22<05:13, 22.39s/it]


0: 640x480 1 matricula, 114.1ms
Speed: 6.9ms preprocess, 114.1ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 480)


Procesamiento de matrículas:  74%|███████▍  | 37/50 [13:43<04:43, 21.78s/it]


0: 640x384 1 matricula, 185.0ms
Speed: 4.2ms preprocess, 185.0ms inference, 1.3ms postprocess per image at shape (1, 3, 640, 384)


Procesamiento de matrículas:  76%|███████▌  | 38/50 [14:05<04:25, 22.10s/it]


0: 640x480 2 matriculas, 111.7ms
Speed: 5.0ms preprocess, 111.7ms inference, 2.1ms postprocess per image at shape (1, 3, 640, 480)


Procesamiento de matrículas:  78%|███████▊  | 39/50 [14:56<05:38, 30.76s/it]


0: 640x384 2 matriculas, 1076.5ms
Speed: 73.6ms preprocess, 1076.5ms inference, 25.4ms postprocess per image at shape (1, 3, 640, 384)


Procesamiento de matrículas:  80%|████████  | 40/50 [15:17<04:38, 27.85s/it]


0: 640x384 1 matricula, 68.3ms
Speed: 2.6ms preprocess, 68.3ms inference, 2.2ms postprocess per image at shape (1, 3, 640, 384)


Procesamiento de matrículas:  82%|████████▏ | 41/50 [15:33<03:36, 24.11s/it]


0: 640x384 1 matricula, 80.2ms
Speed: 62.2ms preprocess, 80.2ms inference, 1.4ms postprocess per image at shape (1, 3, 640, 384)


Procesamiento de matrículas:  84%|████████▍ | 42/50 [15:50<02:55, 21.93s/it]


0: 640x480 1 matricula, 262.3ms
Speed: 4.9ms preprocess, 262.3ms inference, 2.3ms postprocess per image at shape (1, 3, 640, 480)


Procesamiento de matrículas:  86%|████████▌ | 43/50 [16:15<02:40, 22.86s/it]


0: 480x640 1 matricula, 232.0ms
Speed: 5.1ms preprocess, 232.0ms inference, 2.5ms postprocess per image at shape (1, 3, 480, 640)


Procesamiento de matrículas:  88%|████████▊ | 44/50 [16:34<02:11, 21.92s/it]


0: 640x384 1 matricula, 178.6ms
Speed: 3.9ms preprocess, 178.6ms inference, 1.6ms postprocess per image at shape (1, 3, 640, 384)


Procesamiento de matrículas:  90%|█████████ | 45/50 [16:58<01:51, 22.34s/it]

[0.00s] Matrícula: 3654MYH (Conf: 0.65)

0: 480x640 1 matricula, 224.6ms
Speed: 4.5ms preprocess, 224.6ms inference, 1.9ms postprocess per image at shape (1, 3, 480, 640)


Procesamiento de matrículas:  92%|█████████▏| 46/50 [17:29<01:40, 25.18s/it]


0: 640x480 1 matricula, 724.4ms
Speed: 13.0ms preprocess, 724.4ms inference, 30.8ms postprocess per image at shape (1, 3, 640, 480)


Procesamiento de matrículas:  94%|█████████▍| 47/50 [17:53<01:13, 24.66s/it]


0: 640x384 1 matricula, 160.4ms
Speed: 11.2ms preprocess, 160.4ms inference, 1.5ms postprocess per image at shape (1, 3, 640, 384)


Procesamiento de matrículas:  96%|█████████▌| 48/50 [18:13<00:46, 23.35s/it]


0: 640x480 1 matricula, 387.7ms
Speed: 7.1ms preprocess, 387.7ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 480)


Procesamiento de matrículas:  98%|█████████▊| 49/50 [19:16<00:35, 35.24s/it]

[0.00s] Matrícula: 3943FGF (Conf: 1.00)

0: 480x640 1 matricula, 935.8ms
Speed: 46.3ms preprocess, 935.8ms inference, 17.4ms postprocess per image at shape (1, 3, 480, 640)


Procesamiento de matrículas: 100%|██████████| 50/50 [19:41<00:00, 23.64s/it]


=== COMPARATIVA DE MODELOS ===
Número de imágenes probadas: 50
SmolVLM (aciertos): 0 / 50 (0.00%)
EasyOCR (aciertos): 4 / 50 (8.00%)

=== TIEMPO MEDIO DE INFERENCIA ===
SmolVLM: 0.002 s/img
EasyOCR: 23.150 s/img



