In [None]:
# Before executing this script download the test video from:
# https://drive.google.com/file/d/1KD8wfVymKUBqaDVKAdiEM0jh1bQHARn5/view?usp=sharing 

# Library import
import cv2
import numpy as np
import matplotlib.pyplot as plt
import time
from ultralytics import YOLO
import models
from functions import *
import re
from datetime import datetime




# Define and open externally pretrained models
#yv8n_model = YOLO(models.yolov8n)
yv8n_model = YOLO("yolo11n.pt")
lp_model= YOLO("models/plate_model.pt")
char_model = YOLO(models.chars3)


# Define the path of the video to be analized, load it and print video properties (width, height, frames per second). Raise error if opening is not possible.
video_path = 'videos/calle3.mp4'
cap = cv2.VideoCapture(video_path)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = cap.get(cv2.CAP_PROP_FPS)
print("Video opened. Video properties: Frames per second =", fps, "| Video size (height x width):", height, "x", width)
if not cap.isOpened():
    print("Error while opening video.")
    exit()
#Defining we are working with a video currently (no stream)
stream = False

# Determine the vehicle classes that are to be recognized
labels = {2:'car',3:'motorcycle',5:'bus',7:'truck'}
labels_lp= {0: '0', 1: '1', 2: '2', 3: '3', 4: '4', 5: '5', 6: '6', 7: '7', 8: '8', 9: '9', 10: 'A', 11: 'B', 12: 'C', 13: 'D', 14: 'E', 15: 'F', 16: 'G', 17: 'H', 18: 'I', 19: 'J', 20: 'K', 21: 'L', 22: 'M', 23: 'N', 24: 'O', 25: 'P', 26: 'R', 27: 'S', 28: 'T', 29: 'U', 30: 'V', 31: 'W', 32: 'X', 33: 'Y', 34: 'Z'}

# Create an empty list where all the recognized character identifications are going to be stored
identified_characters = []


# Create image analization loop until the end of the video
while True:

    # Read the loaded video until the end of the video
    ret, frame = cap.read()
    if not ret:
        print("End of video.")
        break

    # Scale frame down to a lower resolution to achieve less necessary computational power and therefore faster vehicle recognition     
    new_width = width
    new_height = int(height * 9 / 16)
    framehd = cv2.resize(frame, (0, 0), fx=1/3, fy=1/3)
    frame640 = cv2.resize(framehd, (0, 0), fx=1/2, fy=1/2)
    frame320 = cv2.resize(frame640, (0, 0), fx=1/2, fy=1/2)
        
    # Vehicle detection with loaded YOLOv8n model using the frame with reduced resolution for faster processing | parameter vid_stride defines framerate: every n frames 1 frame is being processed by YOLOv8n
    vehicles_results = yv8n_model(frame320, imgsz=320, stream=stream, verbose=False, conf=0.5, classes=[2,3,5,7], vid_stride=2) 
    
    # If no vehicle detection just continue with next iteration
    if not vehicles_results: continue
    
    # Extracting the bounding box coordinates from the results of the vehicle detection model and converting them to a NumPy array of integers
    if stream:
        vehicles_detected = [result.boxes.cpu().numpy().data.astype(int) for result in vehicles_results][0]
    else:
        vehicles_results[0].plot()
        vehicles_detected = vehicles_results[0].boxes.cpu().data.numpy().astype(int)

    # Now pass the coordinate of the bounding boxes of the recognized vehicle to the license plate detecion model.    
    for vehicle in vehicles_detected:

        # Recovering the 640 pixels image quality for the license plate detection 
        conf, cls = vehicle[-2:]
        r = vehicle[:4] * 2
        vehicle_frame = frame640[r[1]:r[3], r[0]:r[2]]
    
        # Detection of the license plates using the externally pretrained license plate recognition model
        lp_results = lp_model(vehicle_frame, imgsz=640, stream=stream, verbose=False, conf=0.5, iou=0.4)
        
        # If no license plated detected just continue with next iteration
        if not lp_results: continue
        
        # Extracting the bounding box coordinates from the results of the vehicle detection model and converting them to a NumPy array of integers
        if stream:
            lps_detected = [result.boxes.cpu().numpy().data.astype(int) for result in lp_results][0]
        else:
            lps_detected = lp_results[0].boxes.cpu().data.numpy().astype(int)
            
        # Now pass the coordinate of the bounding boxes of the recognized license plate to the character identification model and iterate through them
        for lp in lps_detected:
            
            # Extract from the original frame the license plate detection area and re-establish the original video quality
            lp_conf = lp[-2]
            rp = lp[:4] * 6
            # Pre-selection: sort out all the images where the width-height ratio doesn´t fit the expected license plate ratio
            if (rp[2] - rp[0])/(rp[3] - rp[1]) < 1.2 : continue
            ro = r*6
            lp_frame = frame[ro[1]:ro[3], ro[0]:ro[2]][rp[1]:rp[3], rp[0]:rp[2]]
            

            # Image pre-processing for better character recognition
            # Define where to save the license plate images after preprocessing
            subfolder_path = "fotos"
            current_time = datetime.now().strftime("%Y%m%d_%H%M%S")
            filename_lp = f"{subfolder_path}/license_plate_{current_time}.png"
            cv2.imwrite(filename_lp, lp_frame)
            #Preprocessing way no.1 for yellow license plates
            lp_frame_preprocessed_1_step_1 = preprocessing_1_segmentation(lp_frame)
            lp_frame_preprocessed_2_step_1 = preprocessing_2_segmentation(lp_frame)
            if lp_frame_preprocessed_2_step_1[:,:,2].mean() < 35:
                lp_frame_preprocessed_1_step_2 = preprocessing_1_color_correction(lp_frame_preprocessed_1_step_1)
                preprocessed_img_3channels = cv2.cvtColor(lp_frame_preprocessed_1_step_2, cv2.COLOR_GRAY2RGB)
                #Save preprocessed license plate image 1
                filename_ppi_1 = f"{subfolder_path}/lp_ppi_1_{current_time}.png"
                cv2.imwrite(filename_ppi_1, lp_frame_preprocessed_1_step_2)
            else:
                #Preprocessing way no.2 for white license plates and converting it into a 3 channel img
                lp_frame_preprocessed_2_step_2 = preprocessing_2_color_correction(lp_frame_preprocessed_2_step_1)
                preprocessed_img_3channels = cv2.cvtColor(lp_frame_preprocessed_2_step_2, cv2.COLOR_GRAY2RGB)
                #Save preprocessed license plate image 2
                filename_ppi_2 = f"{subfolder_path}/lp_ppi_2_{current_time}.png"
                cv2.imwrite(filename_ppi_2, lp_frame_preprocessed_2_step_2)
                # print("Damaged license plate found.")
                
            # Show the preprocessed image
            cv2.imshow('Preprocessed license plate',preprocessed_img_3channels)

            # Identify the characters on the license plate by the character identification model
            char_results = char_model(preprocessed_img_3channels, imgsz=224, stream=stream, verbose=False, iou=0.8, max_det=6, conf=0.2)
    
            # If no characters detected and identified just continue with next iteration
            if not char_results: continue

            # Extracting the bounding box coordinates from the results of the character identification model and converting them to a NumPy array of integers
            if stream:
                chars_detected = [result.boxes.cpu().numpy().data.astype(int) for result in char_results][0]
            else:
                char_results[0].plot()
                chars_detected = char_results[0].boxes.cpu().data.numpy().astype(int)
            
            # Extract the detected character prediction results if there are 6 characters identified
            lp_text = ''
            chars_detected_ordenados = sorted(chars_detected, key=lambda char: char[0])
            for char in chars_detected_ordenados:
                # print(char)
                char_conf, char_cls = char[-2:]
                rc = char[:4]
                # Visualize the identified characters in a green box
                cv2.rectangle(lp_frame, rc[:2], rc[2:], (0, 255, 0), 1)
                lp_text+=labels_lp[char_cls]
            if len(lp_text)==6:
                print(lp_text)
                # Append the recognized characters to the list identified_characters to save them later on
                identified_characters.append(lp_text)
            if x:=re.match(re.compile(r'^[A-Z]{3}\d{2}[A-Z0-9]{1}$'), lp_text):
                text = x[0]
                print(text)
                                
            
            ###------------Visualization of the detected vehicles, license plates and characters-------------###
            # Visualize all the recognized characters
            for char in chars_detected:
                char_conf = char[-2]
                rc = char[:4]
                cv2.rectangle(lp_frame, rc[:2], rc[2:], (0, 255, 0), 1)
            # Visualize all the recognized license plates 
            cv2.rectangle(vehicle_frame, lp[:4][:2], lp[:4][2:], (0, 255, 255), 1)
            # Use for those recognitions an additional window showing the license plate frame
            cv2.imshow('License Plate',lp_frame)

        # Visualize the vehicle detection in the 640 pixels frame (adetection of motorcycles in blue, any other vehicle detection in white)
        if cls ==3 :
            cv2.rectangle(frame640, r[:2], r[2:], (255, 0, 0), 2)
        else:
            cv2.rectangle(frame640, r[:2], r[2:], (255, 255, 255), 2)

    # Show the 640 pixels frame while executing the detection              
    cv2.imshow("result", frame640)

    # Define options to close the windows and object recognition with the key "q" and to pause it with the key "p"
    key = cv2.waitKey(1) & 0xFF
    if key == ord('q'):
        break
    elif key == ord('p'):
        cv2.waitKey(0)

# Clean finish: Release the captured frame and close windows
cap.release()
cv2.destroyAllWindows()

# Save the recognized license plates list to a txt-file
# Open the file in write mode
with open("recognitions.txt", 'w') as file:
    # Write each string from the list to the file
    for item in identified_characters:
        file.write(item + '\n')





Video opened. Video properties: Frames per second = 23.976023976023978 | Video size (height x width): 1920 x 1080


# OCR

In [37]:
# Before executing this script download the test video from:
# https://drive.google.com/file/d/1KD8wfVymKUBqaDVKAdiEM0jh1bQHARn5/view?usp=sharing 

# Library import
import cv2
import numpy as np
import matplotlib.pyplot as plt
import time
from ultralytics import YOLO
import models
from functions import *
import re
from datetime import datetime
import os

# Create folder to save detected license plate images if it doesn't exist
output_folder = "fotos"
if not os.path.exists(output_folder):
    os.makedirs(output_folder)

# Define and open externally pretrained models
# yv8n_model = YOLO(models.yolov8n)
yv8n_model = YOLO("yolo11n.pt")
lp_model = YOLO("models/plate_model.pt")
char_model = YOLO(models.chars3)

# Define the path of the video to be analyzed, load it and print video properties
video_path = 'videos/calle2.mp4'
cap = cv2.VideoCapture(video_path)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = cap.get(cv2.CAP_PROP_FPS)
print("Video opened. Video properties: Frames per second =", fps, "| Video size (height x width):", height, "x", width)
if not cap.isOpened():
    print("Error while opening video.")
    exit()

# We are working with a video (not a stream)
stream = False

# Define the vehicle classes to be recognized
labels = {2: 'car', 3: 'motorcycle', 5: 'bus', 7: 'truck'}
labels_lp = {0: '0', 1: '1', 2: '2', 3: '3', 4: '4', 5: '5', 6: '6', 7: '7', 8: '8', 9: '9', 
             10: 'A', 11: 'B', 12: 'C', 13: 'D', 14: 'E', 15: 'F', 16: 'G', 17: 'H', 18: 'I', 
             19: 'J', 20: 'K', 21: 'L', 22: 'M', 23: 'N', 24: 'O', 25: 'P', 26: 'R', 27: 'S', 
             28: 'T', 29: 'U', 30: 'V', 31: 'W', 32: 'X', 33: 'Y', 34: 'Z'}

# Create an empty list where all the recognized character identifications are stored
identified_characters = []

# Main processing loop: iterate until the end of the video
while True:
    ret, frame = cap.read()
    if not ret:
        print("End of video.")
        break

    # Scale frame down to lower resolution for faster vehicle recognition
    new_width = width
    new_height = int(height * 9 / 16)
    framehd = cv2.resize(frame, (0, 0), fx=1/3, fy=1/3)
    frame640 = cv2.resize(framehd, (0, 0), fx=1/2, fy=1/2)
    frame320 = cv2.resize(frame640, (0, 0), fx=1/2, fy=1/2)
        
    # Vehicle detection with YOLO model (processing every 2nd frame for speed)
    vehicles_results = yv8n_model(frame320, imgsz=320, stream=stream, verbose=False, conf=0.5, classes=[2,3,5,7], vid_stride=2) 
    if not vehicles_results: 
        continue

    # Extract bounding boxes from vehicle detection results
    if stream:
        vehicles_detected = [result.boxes.cpu().numpy().data.astype(int) for result in vehicles_results][0]
    else:
        vehicles_results[0].plot()
        vehicles_detected = vehicles_results[0].boxes.cpu().data.numpy().astype(int)

    # Process each detected vehicle for license plate detection
    for vehicle in vehicles_detected:
        # Recover higher quality image for LP detection
        conf, cls = vehicle[-2:]
        r = vehicle[:4] * 2
        vehicle_frame = frame640[r[1]:r[3], r[0]:r[2]]
    
        # License plate detection using pretrained model
        lp_results = lp_model(vehicle_frame, imgsz=1280, stream=stream, verbose=False, conf=0.5, iou=0.4)
        if not lp_results: 
            continue
        
        if stream:
            lps_detected = [result.boxes.cpu().numpy().data.astype(int) for result in lp_results][0]
        else:
            lps_detected = lp_results[0].boxes.cpu().data.numpy().astype(int)
            
        # For each detected license plate, process for character recognition
        for lp in lps_detected:
            lp_conf = lp[-2]
            rp = lp[:4] * 6  # Scale bounding box to original quality
            # Pre-selection: discard detections with an unexpected aspect ratio
            if (rp[2] - rp[0]) / (rp[3] - rp[1]) < 1.2:
                continue
            ro = r * 6
            lp_frame = frame[ro[1]:ro[3], ro[0]:ro[2]][rp[1]:rp[3], rp[0]:rp[2]]
            
            # Save the raw detected license plate image in the folder "fotos"
            current_time = datetime.now().strftime("%Y%m%d_%H%M%S_%f")
            filename_lp = f"{output_folder}/license_plate_{current_time}.png"
            cv2.imwrite(filename_lp, lp_frame)
            print("Guardado imagen de placa:", filename_lp)

            # Preprocess image for better character recognition (choose one of the methods)
            lp_frame_preprocessed_1_step_1 = preprocessing_1_segmentation(lp_frame)
            lp_frame_preprocessed_2_step_1 = preprocessing_2_segmentation(lp_frame)
            if lp_frame_preprocessed_2_step_1[:,:,2].mean() < 35:
                lp_frame_preprocessed_1_step_2 = preprocessing_1_color_correction(lp_frame_preprocessed_1_step_1)
                preprocessed_img_3channels = cv2.cvtColor(lp_frame_preprocessed_1_step_2, cv2.COLOR_GRAY2RGB)
                filename_ppi_1 = f"{output_folder}/lp_ppi_1_{current_time}.png"
                cv2.imwrite(filename_ppi_1, lp_frame_preprocessed_1_step_2)
                print("Guardada imagen preprocesada (1):", filename_ppi_1)
            else:
                lp_frame_preprocessed_2_step_2 = preprocessing_2_color_correction(lp_frame_preprocessed_2_step_1)
                preprocessed_img_3channels = cv2.cvtColor(lp_frame_preprocessed_2_step_2, cv2.COLOR_GRAY2RGB)
                filename_ppi_2 = f"{output_folder}/lp_ppi_2_{current_time}.png"
                cv2.imwrite(filename_ppi_2, lp_frame_preprocessed_2_step_2)
                print("Guardada imagen preprocesada (2):", filename_ppi_2)
            
            # (Opcional) Mostrar la imagen preprocesada
            cv2.imshow('Preprocessed license plate', preprocessed_img_3channels)

            # Identify characters on the license plate (no se guarda el resultado del OCR aquí, solo visualización)
            char_results = char_model(preprocessed_img_3channels, imgsz=224, stream=stream, verbose=False, iou=0.8, max_det=6, conf=0.2)
            if not char_results: 
                continue

            if stream:
                chars_detected = [result.boxes.cpu().numpy().data.astype(int) for result in char_results][0]
            else:
                char_results[0].plot()
                chars_detected = char_results[0].boxes.cpu().data.numpy().astype(int)
            
            lp_text = ''
            chars_detected_ordenados = sorted(chars_detected, key=lambda char: char[0])
            for char in chars_detected_ordenados:
                char_conf, char_cls = char[-2:]
                rc = char[:4]
                cv2.rectangle(lp_frame, rc[:2], rc[2:], (0, 255, 0), 1)
                lp_text += labels_lp[char_cls]
            if len(lp_text) == 6:
                print("Placa reconocida:", lp_text)
                identified_characters.append(lp_text)
            
            # Visualización adicional (dibujar sobre el frame)
            cv2.rectangle(vehicle_frame, lp[:4][:2], lp[:4][2:], (0, 255, 255), 1)
            cv2.imshow('License Plate', lp_frame)

        # Visualize vehicle detection on the 640 pixels frame (blue for motorcycles, white for others)
        if cls == 3:
            cv2.rectangle(frame640, r[:2], r[2:], (255, 0, 0), 2)
        else:
            cv2.rectangle(frame640, r[:2], r[2:], (255, 255, 255), 2)

    cv2.imshow("result", frame640)

    key = cv2.waitKey(1) & 0xFF
    if key == ord('q'):
        break
    elif key == ord('p'):
        cv2.waitKey(0)

cap.release()
cv2.destroyAllWindows()

# Save the recognized license plates list to a txt-file
with open("recognitions.txt", 'w') as file:
    for item in identified_characters:
        file.write(item + '\n')



Video opened. Video properties: Frames per second = 30.0 | Video size (height x width): 1920 x 1080
Guardado imagen de placa: fotos/license_plate_20250315_203442_412401.png
Guardada imagen preprocesada (2): fotos/lp_ppi_2_20250315_203442_412401.png
Guardado imagen de placa: fotos/license_plate_20250315_203444_468320.png
Guardada imagen preprocesada (2): fotos/lp_ppi_2_20250315_203444_468320.png
Guardado imagen de placa: fotos/license_plate_20250315_203445_995784.png
Guardada imagen preprocesada (2): fotos/lp_ppi_2_20250315_203445_995784.png
Guardado imagen de placa: fotos/license_plate_20250315_203448_630273.png
Guardada imagen preprocesada (2): fotos/lp_ppi_2_20250315_203448_630273.png
Guardado imagen de placa: fotos/license_plate_20250315_203449_862508.png
Guardada imagen preprocesada (2): fotos/lp_ppi_2_20250315_203449_862508.png
End of video.


In [20]:
import cv2
import easyocr

# Crear el objeto lector; puedes especificar los idiomas (en este caso 'en' para inglés)
reader = easyocr.Reader(['en'], gpu=False)

# Ruta de la imagen (puede ser una imagen de la placa recortada previamente)
image_path = "fotos\lp_ppi_2_20250315_122540_544457.png"
img = cv2.imread(image_path)

# Opcional: Preprocesamiento (por ejemplo, convertir a escala de grises, ajustar contraste, etc.)
# img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

# Leer el texto en la imagen
results = reader.readtext(img)

# Iterar sobre los resultados para mostrarlos
for (bbox, text, confidence) in results:
    print(f"Texto: {text} - Confianza: {confidence:.2f}")
    # Dibujar el rectángulo en la imagen
    top_left = tuple(map(int, bbox[0]))
    bottom_right = tuple(map(int, bbox[2]))
    cv2.rectangle(img, top_left, bottom_right, (0, 255, 0), 2)
    # Escribir el texto detectado sobre la imagen
    cv2.putText(img, text, top_left, cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2)

# Mostrar la imagen con las detecciones
cv2.imshow("OCR EasyOCR", img)
cv2.waitKey(0)
cv2.destroyAllWindows()


Using CPU. Note: This module is much faster with a GPU.


Texto: ISEQ - Confianza: 0.47


In [21]:
import cv2
import easyocr
import os

# Crear el objeto lector para OCR; se especifica el idioma (en este caso, inglés)
reader = easyocr.Reader(['en'], gpu=False)

# Ruta de la carpeta que contiene las imágenes
folder_path = "fotos"

# Iterar sobre todos los archivos de la carpeta
for filename in os.listdir(folder_path):
    # Procesar solo archivos de imagen (png, jpg, jpeg)
    if filename.lower().endswith(('.png', '.jpg', '.jpeg')):
        image_path = os.path.join(folder_path, filename)
        img = cv2.imread(image_path)
        if img is None:
            print(f"No se pudo leer la imagen {image_path}")
            continue

        # Aplicar OCR a la imagen
        results = reader.readtext(img)

        # Dibujar los resultados en la imagen y mostrar información en consola
        for (bbox, text, confidence) in results:
            print(f"Imagen: {filename} - Texto: {text} - Confianza: {confidence:.2f}")
            # Convertir las coordenadas a enteros
            top_left = tuple(map(int, bbox[0]))
            bottom_right = tuple(map(int, bbox[2]))
            # Dibujar el rectángulo
            cv2.rectangle(img, top_left, bottom_right, (0, 255, 0), 2)
            # Escribir el texto sobre la imagen
            cv2.putText(img, text, top_left, cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2)

        # Mostrar la imagen con las detecciones
        cv2.imshow("OCR EasyOCR", img)
        key = cv2.waitKey(0)  # Espera hasta que se presione una tecla
        if key == 27:  # Si se presiona la tecla ESC, salir del bucle
            break

cv2.destroyAllWindows()


Using CPU. Note: This module is much faster with a GPU.


Imagen: license_plate_20250315_122535_040308.png - Texto: iSE 08) - Confianza: 0.27
Imagen: license_plate_20250315_122536_151589.png - Texto: FSE 081 - Confianza: 0.39
Imagen: license_plate_20250315_122536_807285.png - Texto: fSE 0&1 - Confianza: 0.13
Imagen: license_plate_20250315_122537_665457.png - Texto: FTSE 0g1 - Confianza: 0.08
Imagen: license_plate_20250315_122538_385675.png - Texto: fTSE 0g1 - Confianza: 0.28
Imagen: license_plate_20250315_122538_736652.png - Texto: FtsE 0g1 - Confianza: 0.04
Imagen: license_plate_20250315_122539_273356.png - Texto: TTSE 061 - Confianza: 0.60
Imagen: license_plate_20250315_122539_820857.png - Texto: FTSE 081 - Confianza: 0.39
Imagen: license_plate_20250315_122540_544457.png - Texto: TTSE 081 - Confianza: 0.34


In [39]:
import os
import cv2
import numpy as np
from azure.ai.vision.imageanalysis import ImageAnalysisClient
from azure.ai.vision.imageanalysis.models import VisualFeatures
from azure.core.credentials import AzureKeyCredential
import keys

# Configuración: Se obtienen las credenciales (endpoint y clave) de tu recurso de Azure
endpoint = keys.ocr_endpoint           # Ejemplo: "https://<tu-recurso>.cognitiveservices.azure.com"
subscription_key = keys.ocr_subscription_key

# Crear el cliente de análisis de imágenes de Azure
client = ImageAnalysisClient(
    endpoint=endpoint,
    credential=AzureKeyCredential(subscription_key)
)

# Carpeta que contiene las imágenes locales
folder_path = "detected_vehicles/"

# Recorrer cada archivo de imagen en la carpeta
for filename in os.listdir(folder_path):
    if filename.lower().endswith(('.png', '.jpg', '.jpeg')):
        image_path = os.path.join(folder_path, filename)
        # Leer la imagen con OpenCV (para mostrarla posteriormente con anotaciones)
        img = cv2.imread(image_path)
        if img is None:
            print(f"No se pudo leer la imagen {image_path}")
            continue

        # Abrir la imagen en modo binario y leer sus bytes
        with open(image_path, "rb") as f:
            image_bytes = f.read()

        # Llamar al servicio OCR de Azure usando el método analyze (en lugar de analyze_from_url)
        # Se solicitan las características CAPTION y READ (OCR)
        result = client.analyze(
            image_data=image_bytes,
            visual_features=[ VisualFeatures.READ, VisualFeatures.OBJECTS],
            gender_neutral_caption=True  # Opcional
        )

        print(f"Resultados para la imagen: {filename}")
        # Mostrar el caption si existe
        if result.caption is not None:
            print(f" Caption: '{result.caption.text}', Confidence {result.caption.confidence:.4f}")
        else:
            print(" No se detectó caption.")

        # Procesar el resultado de OCR (Read)
        if result.read is not None and len(result.read.blocks) > 0:
            for block in result.read.blocks:
                for line in block.lines:
                    line_text = " ".join([word.text for word in line.words])
                    print(f" OCR - Línea: '{line_text}'")
                    # Dibujar los bounding boxes de cada palabra sobre la imagen
                    for word in line.words:
                        # Se asume que word.bounding_polygon es una lista de puntos con atributos x e y
                        pts = [(int(point.x), int(point.y)) for point in word.bounding_polygon]
                        pts_np = np.array(pts, np.int32).reshape((-1, 1, 2))
                        cv2.polylines(img, [pts_np], isClosed=True, color=(0, 255, 0), thickness=2)
                        cv2.putText(img, word.text, (pts[0][0], pts[0][1] - 10),
                                    cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2)
        else:
            print(" No se detectó texto (OCR).")

        # Mostrar la imagen con las anotaciones
        cv2.imshow("Azure OCR", img)
        key = cv2.waitKey(0)
        if key == 27:  # Si se presiona ESC, salir
            break

cv2.destroyAllWindows()


Resultados para la imagen: vehicle_11.jpg
 No se detectó caption.
 OCR - Línea: '10577'
Resultados para la imagen: vehicle_12.jpg
 No se detectó caption.
 No se detectó texto (OCR).
Resultados para la imagen: vehicle_13.jpg
 No se detectó caption.
 OCR - Línea: 'INO.577'
Resultados para la imagen: vehicle_15.jpg
 No se detectó caption.
 No se detectó texto (OCR).
Resultados para la imagen: vehicle_17.jpg
 No se detectó caption.
 No se detectó texto (OCR).
Resultados para la imagen: vehicle_18.jpg
 No se detectó caption.
 No se detectó texto (OCR).
Resultados para la imagen: vehicle_2.jpg
 No se detectó caption.
 No se detectó texto (OCR).
Resultados para la imagen: vehicle_21.jpg
 No se detectó caption.
 No se detectó texto (OCR).
Resultados para la imagen: vehicle_24.jpg
 No se detectó caption.
 OCR - Línea: 'LIX 570'
Resultados para la imagen: vehicle_25.jpg
 No se detectó caption.
 OCR - Línea: 'A'
 OCR - Línea: 'LXX 570'
Resultados para la imagen: vehicle_26.jpg
 No se detectó capt