In [None]:
import os
import time
import cv2
from tqdm.notebook import tqdm
import easyocr
import numpy as np
import torch
from IPython.display import Video, display
from matplotlib import pyplot as plt
from PIL import Image
from transformers import AutoProcessor, AutoModelForImageTextToText
from ultralytics import YOLO

PATH = "/kaggle/input/matriculas-comparativa"
OUR_MODEL_PATH = '/kaggle/input/best-pt/pytorch/yolo11n/1/best.pt'

our_model = YOLO(OUR_MODEL_PATH)
our_model.to('cuda')

correct_vlm = 0
correct_easy = 0
time_vlm = 0.0
time_easy = 0.0

image_files = [
    f for f in os.listdir(PATH)
    if f.lower().endswith((".jpg", ".jpeg"))
]


for img_name in tqdm(image_files, desc="Procesamiento de matrículas"):
    img_path = os.path.join(PATH, img_name)
    label = os.path.splitext(img_name)[0].upper()

    frame = cv2.imread(img_path)
    if frame is None:
        continue

    result = our_model(frame, show=False)
    plate = result[0].boxes.xyxy.tolist()
    x1, y1, x2, y2 = [int(item) for item in plate[0]]

    start = time.time()
    try:
        text_vlm = ocr_vlm_com(frame, x1, y1, x2, y2, processor, model, device="cuda", max_new_tokens=15)
    except Exception as e:
        print(f"[VLM error on {img_name}]: {e}")
        text_vlm = ""
    time_vlm += time.time() - start
    if text_vlm and text_vlm.strip().replace(" ", "").upper() == label:
        correct_vlm += 1


    start = time.time()
    try:
        text_easy = ocr_easy_com(frame, x1, y1, x2, y2)
    except Exception as e:
        print(f"[EasyOCR error on {img_name}]: {e}")
        text_easy = ""
    time_easy += time.time() - start
    if text_easy and text_easy.strip().replace(" ", "").upper() == label:
        correct_easy += 1

total = len(image_files)
avg_time_vlm = time_vlm / total if total > 0 else 0
avg_time_easy = time_easy / total if total > 0 else 0

print("\n=== COMPARATIVA DE MODELOS ===")
print(f"Número de imágenes probadas: {total}")
print(f"SmolVLM (aciertos): {correct_vlm} / {total} ({correct_vlm / total:.2%})")
print(f"EasyOCR (aciertos): {correct_easy} / {total} ({correct_easy / total:.2%})")

print("\n=== TIEMPO MEDIO DE INFERENCIA ===")
print(f"SmolVLM: {avg_time_vlm:.3f} s/img")
print(f"EasyOCR: {avg_time_easy:.3f} s/img")


In [None]:
import os
os.environ["TRANSFORMERS_NO_TF"] = "1"
os.environ["DISABLE_TF_IMPORT"] = "1"
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"



from transformers import AutoProcessor, AutoModelForImageTextToText
from ultralytics import YOLO
import torch, cv2, numpy as np, os, time
from tqdm.notebook import tqdm
import easyocr


In [None]:
import cv2
import easyocr
import numpy as np

reader = easyocr.Reader(['es'], gpu=False)

def ocr_easy_com(frame, x1, y1, x2, y2, escala=3):
    placa_crop = frame[y1:y2, x1:x2]

    placa_crop = cv2.resize(placa_crop, None, fx=escala, fy=escala, interpolation=cv2.INTER_CUBIC)

    gray = cv2.cvtColor(placa_crop, cv2.COLOR_BGR2GRAY)
    gray = cv2.equalizeHist(gray)
    gray = cv2.convertScaleAbs(gray, alpha=1.5, beta=0)

    results_ocr = reader.readtext(
        gray,
        allowlist='ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789',
        detail=1
    )

    if not results_ocr:
        return ""

    results_sorted = sorted(results_ocr, key=lambda r: r[0][0][0])  # r[0][0][0] = x top-left
    plate_parts = [text.strip().upper() for (_, text, _) in results_sorted]
    license_plate = "".join(plate_parts)

    return license_plate