#Paquetes necesarios

In [2]:
import cv2  
import math 

from ultralytics import YOLO



Desde cámara, detección con yolov8 y modelo nano

In [4]:
# Carga del modelo
model = YOLO('yolov8n.pt')

# Nombre de las distintas clases
classNames = ["person", "bicycle", "car", "motorbike", "aeroplane", "bus", "train", "truck", "boat",
              "traffic light", "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat",
              "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella",
              "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite", "baseball bat",
              "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup",
              "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli",
              "carrot", "hot dog", "pizza", "donut", "cake", "chair", "sofa", "pottedplant", "bed",
              "diningtable", "toilet", "tvmonitor", "laptop", "mouse", "remote", "keyboard", "cell phone",
              "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors",
              "teddy bear", "hair drier", "toothbrush"
              ]


# Captura desde lawebcam
vid = cv2.VideoCapture(0)
  
while(True):      
    # fotograma a fotograma
    ret, img = vid.read()
  
    # si hay imagen válida
    if ret:  
        # Perform inference on an image
        results = model(img, stream=True)
        
        # Para cada detección
        for r in results:
            boxes = r.boxes

            for box in boxes:
                # Contenedor
                x1, y1, x2, y2 = box.xyxy[0]
                x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2) # convert to int values
                
                # Confianza
                confidence = math.ceil((box.conf[0]*100))/100
                print("Confidence --->",confidence)

                # Clase
                cls = int(box.cls[0])
                print("Class name -->", classNames[cls])

                # Convierte identificador numérico de clase a un color RGB
                escala = int((cls / len(classNames)) * 255 * 3)
                if escala >= 255*2:
                    R = 255
                    G = 255
                    B = escala - 255*2
                else:
                    if escala >= 255:
                        R = 255
                        G = escala - 255
                        B = 0
                    else:
                        R = escala
                        G = 0
                        B = 0

                # Dibuja el contenedor y clase
                cv2.rectangle(img, (x1, y1), (x2, y2), (R, G, B), 3)
                cv2.putText(img, classNames[cls] , [x1, y1], cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, B), 2)

        # Muestra fotograma
        cv2.imshow('Vid', img)
    
    # Detenemos pulsado ESC
    if cv2.waitKey(20) == 27:
        break
  
# Libera el objeto de captura
vid.release()
# Destruye ventanas
cv2.destroyAllWindows()


0: 480x640 1 person, 1 dog, 205.1ms
Speed: 21.8ms preprocess, 205.1ms inference, 23.7ms postprocess per image at shape (1, 3, 480, 640)



Confidence ---> 0.88
Class name --> person
Confidence ---> 0.35
Class name --> dog


0: 480x640 2 persons, 158.6ms
Speed: 2.0ms preprocess, 158.6ms inference, 4.6ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 2 persons, 141.2ms


Confidence ---> 0.93
Class name --> person
Confidence ---> 0.89
Class name --> person
Confidence ---> 0.94
Class name --> person
Confidence ---> 0.87
Class name --> person


Speed: 2.3ms preprocess, 141.2ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 2 persons, 145.0ms
Speed: 2.0ms preprocess, 145.0ms inference, 2.1ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 2 persons, 125.5ms
Speed: 1.5ms preprocess, 125.5ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)


Confidence ---> 0.94
Class name --> person
Confidence ---> 0.88
Class name --> person
Confidence ---> 0.94
Class name --> person
Confidence ---> 0.88
Class name --> person



0: 480x640 2 persons, 124.4ms
Speed: 0.9ms preprocess, 124.4ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 2 persons, 125.2ms
Speed: 1.0ms preprocess, 125.2ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)


Confidence ---> 0.93
Class name --> person
Confidence ---> 0.87
Class name --> person
Confidence ---> 0.93
Class name --> person
Confidence ---> 0.89
Class name --> person



0: 480x640 2 persons, 132.4ms
Speed: 1.0ms preprocess, 132.4ms inference, 2.3ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 2 persons, 132.8ms
Speed: 3.1ms preprocess, 132.8ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)



Confidence ---> 0.93
Class name --> person
Confidence ---> 0.89
Class name --> person
Confidence ---> 0.94
Class name --> person
Confidence ---> 0.91
Class name --> person


0: 480x640 2 persons, 202.7ms
Speed: 2.5ms preprocess, 202.7ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 2 persons, 1 bench, 118.0ms
Speed: 2.0ms preprocess, 118.0ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)



Confidence ---> 0.93
Class name --> person
Confidence ---> 0.89
Class name --> person
Confidence ---> 0.93
Class name --> person
Confidence ---> 0.91
Class name --> person
Confidence ---> 0.33
Class name --> bench


0: 480x640 2 persons, 1 bench, 120.1ms
Speed: 1.5ms preprocess, 120.1ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 2 persons, 121.5ms
Speed: 1.0ms preprocess, 121.5ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)


Confidence ---> 0.94
Class name --> person
Confidence ---> 0.9
Class name --> person
Confidence ---> 0.26
Class name --> bench
Confidence ---> 0.94
Class name --> person
Confidence ---> 0.94
Class name --> person



0: 480x640 3 persons, 1 bench, 145.9ms
Speed: 1.5ms preprocess, 145.9ms inference, 2.5ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 2 persons, 1 bench, 145.7ms
Speed: 2.5ms preprocess, 145.7ms inference, 2.1ms postprocess per image at shape (1, 3, 480, 640)


Confidence ---> 0.91
Class name --> person
Confidence ---> 0.91
Class name --> person
Confidence ---> 0.27
Class name --> bench
Confidence ---> 0.26
Class name --> person
Confidence ---> 0.94
Class name --> person
Confidence ---> 0.92
Class name --> person
Confidence ---> 0.31
Class name --> bench



0: 480x640 2 persons, 1 bench, 134.4ms
Speed: 1.6ms preprocess, 134.4ms inference, 2.2ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 2 persons, 119.9ms
Speed: 1.9ms preprocess, 119.9ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)



Confidence ---> 0.95
Class name --> person
Confidence ---> 0.9
Class name --> person
Confidence ---> 0.28
Class name --> bench
Confidence ---> 0.91
Class name --> person
Confidence ---> 0.86
Class name --> person


0: 480x640 2 persons, 1 laptop, 122.4ms
Speed: 2.6ms preprocess, 122.4ms inference, 2.5ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 2 persons, 126.3ms
Speed: 1.0ms preprocess, 126.3ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)


Confidence ---> 0.87
Class name --> person
Confidence ---> 0.85
Class name --> person
Confidence ---> 0.43
Class name --> laptop
Confidence ---> 0.87
Class name --> person
Confidence ---> 0.78
Class name --> person



0: 480x640 3 persons, 133.0ms
Speed: 2.0ms preprocess, 133.0ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 3 persons, 132.3ms
Speed: 2.4ms preprocess, 132.3ms inference, 1.7ms postprocess per image at shape (1, 3, 480, 640)



Confidence ---> 0.84
Class name --> person
Confidence ---> 0.83
Class name --> person
Confidence ---> 0.28
Class name --> person
Confidence ---> 0.81
Class name --> person
Confidence ---> 0.73
Class name --> person
Confidence ---> 0.35
Class name --> person


0: 480x640 2 persons, 136.7ms
Speed: 2.0ms preprocess, 136.7ms inference, 1.5ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 2 persons, 118.1ms
Speed: 2.6ms preprocess, 118.1ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)



Confidence ---> 0.95
Class name --> person
Confidence ---> 0.85
Class name --> person
Confidence ---> 0.96
Class name --> person
Confidence ---> 0.88
Class name --> person


0: 480x640 2 persons, 121.7ms
Speed: 2.5ms preprocess, 121.7ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 2 persons, 110.6ms
Speed: 2.7ms preprocess, 110.6ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)



Confidence ---> 0.96
Class name --> person
Confidence ---> 0.91
Class name --> person
Confidence ---> 0.95
Class name --> person
Confidence ---> 0.87
Class name --> person


0: 480x640 2 persons, 135.3ms
Speed: 4.0ms preprocess, 135.3ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 2 persons, 139.5ms
Speed: 3.0ms preprocess, 139.5ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)


Confidence ---> 0.93
Class name --> person
Confidence ---> 0.9
Class name --> person
Confidence ---> 0.92
Class name --> person
Confidence ---> 0.87
Class name --> person



0: 480x640 2 persons, 139.5ms
Speed: 1.0ms preprocess, 139.5ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 2 persons, 121.1ms
Speed: 1.5ms preprocess, 121.1ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)



Confidence ---> 0.93
Class name --> person
Confidence ---> 0.92
Class name --> person
Confidence ---> 0.93
Class name --> person
Confidence ---> 0.92
Class name --> person


0: 480x640 2 persons, 176.3ms
Speed: 2.6ms preprocess, 176.3ms inference, 2.2ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 2 persons, 132.2ms
Speed: 1.0ms preprocess, 132.2ms inference, 2.1ms postprocess per image at shape (1, 3, 480, 640)



Confidence ---> 0.94
Class name --> person
Confidence ---> 0.92
Class name --> person
Confidence ---> 0.93
Class name --> person
Confidence ---> 0.92
Class name --> person


0: 480x640 2 persons, 134.7ms
Speed: 2.3ms preprocess, 134.7ms inference, 2.3ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 2 persons, 133.9ms
Speed: 1.2ms preprocess, 133.9ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)



Confidence ---> 0.91
Class name --> person
Confidence ---> 0.9
Class name --> person
Confidence ---> 0.91
Class name --> person
Confidence ---> 0.89
Class name --> person


0: 480x640 2 persons, 137.4ms
Speed: 2.1ms preprocess, 137.4ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 2 persons, 116.3ms
Speed: 1.0ms preprocess, 116.3ms inference, 2.1ms postprocess per image at shape (1, 3, 480, 640)



Confidence ---> 0.89
Class name --> person
Confidence ---> 0.85
Class name --> person
Confidence ---> 0.89
Class name --> person
Confidence ---> 0.86
Class name --> person


0: 480x640 2 persons, 121.2ms
Speed: 1.8ms preprocess, 121.2ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 2 persons, 130.2ms


Confidence ---> 0.89
Class name --> person
Confidence ---> 0.87
Class name --> person
Confidence ---> 0.93
Class name --> person
Confidence ---> 0.91
Class name --> person


Speed: 1.0ms preprocess, 130.2ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 2 persons, 140.8ms
Speed: 2.5ms preprocess, 140.8ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 2 persons, 134.9ms
Speed: 2.0ms preprocess, 134.9ms inference, 2.5ms postprocess per image at shape (1, 3, 480, 640)


Confidence ---> 0.86
Class name --> person
Confidence ---> 0.6
Class name --> person
Confidence ---> 0.88
Class name --> person
Confidence ---> 0.85
Class name --> person



0: 480x640 2 persons, 126.1ms
Speed: 1.0ms preprocess, 126.1ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)


Confidence ---> 0.89
Class name --> person
Confidence ---> 0.79
Class name --> person


Reconocimiento decaracteres tras instalar easyocr

In [276]:
import easyocr

#Carga del modelo de lengua
reader = easyocr.Reader(['es'], gpu=False) 

#Reconocimiento de una imagen
result = reader.readtext('toy.tif')
print(result)

#Con restricción de caracteres reconocibles
#result = reader.readtext('toy.tif', allowlist ='0123456789')

Using CPU. Note: This module is much faster with a GPU.


[([[49, 85], [617, 85], [617, 147], [49, 147]], 'Hasta el infinito y más allá', 0.6744628105513019)]


In [13]:
import sys
for path in sys.path:
    print(path)



c:\Users\Eric\Desktop\vc-5\P5
c:\Users\Eric\anaconda3\envs\VC_P1\python311.zip
c:\Users\Eric\anaconda3\envs\VC_P1\DLLs
c:\Users\Eric\anaconda3\envs\VC_P1\Lib
c:\Users\Eric\anaconda3\envs\VC_P1

C:\Users\Eric\AppData\Roaming\Python\Python311\site-packages
c:\Users\Eric\anaconda3\envs\VC_P1\Lib\site-packages
c:\Users\Eric\anaconda3\envs\VC_P1\Lib\site-packages\win32
c:\Users\Eric\anaconda3\envs\VC_P1\Lib\site-packages\win32\lib
c:\Users\Eric\anaconda3\envs\VC_P1\Lib\site-packages\Pythonwin


Prueba yolo con imagen

In [5]:
# Carga del modelo
model = YOLO('yolov8n.pt')

# Nombre de las distintas clases
classNames = ["person", "bicycle", "car", "motorbike", "aeroplane", "bus", "train", "truck", "boat",
              "traffic light", "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat",
              "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella",
              "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite", "baseball bat",
              "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup",
              "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli",
              "carrot", "hot dog", "pizza", "donut", "cake", "chair", "sofa", "pottedplant", "bed",
              "diningtable", "toilet", "tvmonitor", "laptop", "mouse", "remote", "keyboard", "cell phone",
              "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors",
              "teddy bear", "hair drier", "toothbrush"
              ]

# Carga la imagen que deseas procesar
image_path = 'images.png'  # Reemplaza 'tu_imagen.jpg' por la ruta de tu imagen
img = cv2.imread(image_path)

# Realiza inferencia en la imagen
results = model(img)

# Para cada detección
for r in results:
    boxes = r.boxes

    for box in boxes:
        # Contenedor
        x1, y1, x2, y2 = box.xyxy[0]
        x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)  # Convierte a valores enteros

        # Confianza
        confidence = math.ceil((box.conf[0] * 100)) / 100
        print("Confidence --->", confidence)

        # Clase
        cls = int(box.cls[0])
        print("Class name -->", classNames[cls])

        # Convierte el identificador numérico de la clase en un color RGB
        escala = int((cls / len(classNames)) * 255 * 3)
        if escala >= 255 * 2:
            R = 255
            G = 255
            B = escala - 255 * 2
        else:
            if escala >= 255:
                R = 255
                G = escala - 255
                B = 0
            else:
                R = escala
                G = 0
                B = 0

        # Dibuja el contenedor y la clase
        cv2.rectangle(img, (x1, y1), (x2, y2), (R, G, B), 3)
        cv2.putText(img, classNames[cls], [x1, y1], cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, B), 2)

# Muestra la imagen con las detecciones
cv2.imshow('Image', img)
cv2.waitKey(0)
cv2.destroyAllWindows()




0: 416x640 4 persons, 426.5ms
Speed: 46.7ms preprocess, 426.5ms inference, 39.6ms postprocess per image at shape (1, 3, 416, 640)


Confidence ---> 0.89
Class name --> person
Confidence ---> 0.79
Class name --> person
Confidence ---> 0.76
Class name --> person
Confidence ---> 0.61
Class name --> person


Método 1: Solo falta el ocr, no mezcles el codigo de abajo con lo q vayas a hacer,
Crea un bloque nuevo copia y pega y empieza desde ahí

In [378]:
import cv2
import math
from ultralytics import YOLO  

# Función para verificar si un contorno tiene forma rectangular aproximada
def is_approximately_rectangular(contour, epsilon=0.009):
    perimeter = cv2.arcLength(contour, True)
    approx = cv2.approxPolyDP(contour, epsilon * perimeter, True)
    return len(approx) == 4

# Carga del modelo YOLO
model = YOLO('yolov8n.pt')

# Nombre de las distintas clases
classNames = ["person", "bicycle", "car", "motorbike", "aeroplane", "bus", "train", "truck", "boat",
              "traffic light", "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat",
              "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella",
              "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite", "baseball bat",
              "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup",
              "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli",
              "carrot", "hot dog", "pizza", "donut", "cake", "chair", "sofa", "pottedplant", "bed",
              "diningtable", "toilet", "tvmonitor", "laptop", "mouse", "remote", "keyboard", "cell phone",
              "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors",
              "teddy bear", "hair drier", "toothbrush"
              ]

# Carga la imagen que deseas procesar
image_path = 'prueba.jpg'  
img = cv2.imread(image_path)

# Realiza inferencia en la imagen
results = model(img)

# Para cada detección
for r in results:
    boxes = r.boxes

    for box in boxes:
        # Contenedor
        x1, y1, x2, y2 = box.xyxy[0]
        x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)  # Convierte a valores enteros

        # Confianza
        confidence = math.ceil((box.conf[0] * 100)) / 100
        print("Confidence --->", confidence)

        # Clase
        cls = int(box.cls[0])
        print("Class name -->", classNames[cls])

        # Convierte el identificador numérico de la clase en un color RGB
        escala = int((cls / len(classNames)) * 255 * 3)
        if escala >= 255 * 2:
            R = 255
            G = 255
            B = escala - 255 * 2
        else:
            if escala >= 255:
                R = 255
                G = escala - 255
                B = 0
            else:
                R = escala
                G = 0
                B = 0

        # Dibuja el contenedor y la clase
        cv2.rectangle(img, (x1, y1), (x2, y2), (R, G, B), 3)
        cv2.putText(img, classNames[cls], (x1, y1), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, B), 2)

        # Obtén la ROI
        roi = img[y1:y2, x1:x2]

        # Convierte ROI a escala de grises
        gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)

        # Aplica umbral para encontrar contornos
        _, thresh = cv2.threshold(gray, 120, 255, cv2.THRESH_BINARY)

        # Encuentra contornos en la ROI
        contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

        # Umbral de área para considerar como contorno grande (ajusta este valor según tus necesidades)
        area_threshold = 1000

        # Filtra los contornos que tienen forma rectangular aproximada y un área grande
        filtered_contours = [contour for contour in contours if is_approximately_rectangular(contour) and cv2.contourArea(contour) > area_threshold]

        # Dibuja los contornos filtrados en la ROI
        cv2.drawContours(roi, filtered_contours, -1, (0, 255, 0), 2)


# Muestra la imagen con las detecciones y los contornos filtrados
cv2.imshow('Image', img)
cv2.waitKey(0)
cv2.destroyAllWindows()



0: 256x640 2 cars, 181.8ms
Speed: 1.0ms preprocess, 181.8ms inference, 7.1ms postprocess per image at shape (1, 3, 256, 640)


Confidence ---> 0.94
Class name --> car
Confidence ---> 0.34
Class name --> car


Prueba contornos 1 imagen Buena

In [8]:
import cv2
import math
from ultralytics import YOLO  
import string
# Initialize the OCR reader
reader = easyocr.Reader(['en'], gpu=False)

# Mapping dictionaries for character conversion
dict_char_to_int = {'O': '0',
                    'I': '1',
                    'J': '3',
                    'A': '4',
                    'G': '6',
                    'S': '5'}

dict_int_to_char = {'0': 'O',
                    '1': 'I',
                    '3': 'J',
                    '4': 'A',
                    '6': 'G',
                    '5': 'S'}

def license_complies_format(text):
    """
    Check if the license plate text complies with the required format.

    Args:
        text (str): License plate text.

    Returns:
        bool: True if the license plate complies with the format, False otherwise.
    """
    if len(text) != 7:
        return False

    if (text[0] in ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'] or text[0] in dict_char_to_int.keys()) and \
       (text[1] in ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'] or text[1] in dict_char_to_int.keys()) and \
       (text[2] in ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'] or text[2] in dict_char_to_int.keys()) and \
       (text[3] in ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'] or text[3] in dict_char_to_int.keys()) and \
       (text[4] in string.ascii_uppercase or text[4] in dict_int_to_char.keys()) and \
       (text[5] in string.ascii_uppercase or text[5] in dict_int_to_char.keys()) and \
       (text[6] in string.ascii_uppercase or text[6] in dict_int_to_char.keys()):
        return True
    else:
        return False


def format_license(text):
    """
    Format the license plate text by converting characters using the mapping dictionaries.

    Args:
        text (str): License plate text.

    Returns:
        str: Formatted license plate text.
    """
    license_plate_ = ''
    mapping = {0: dict_char_to_int, 1: dict_char_to_int, 2: dict_char_to_int, 3: dict_char_to_int,
               4: dict_int_to_char, 5: dict_int_to_char, 6: dict_int_to_char}
    for j in [0, 1, 2, 3, 4, 5, 6]:
        if text[j] in mapping[j].keys():
            license_plate_ += mapping[j][text[j]]
        else:
            license_plate_ += text[j]

    return license_plate_



def read_license_plate(license_plate_crop):
    """
    Read the license plate text from the given cropped image.

    Args:
        license_plate_crop (PIL.Image.Image): Cropped image containing the license plate.

    Returns:
        tuple: Tuple containing the formatted license plate text and its confidence score.
    """

    detections = reader.readtext(license_plate_crop)
    # Inicializa una variable para almacenar el texto combinado de todas las detecciones
    combined_text = ""

    # Itera a través de las detecciones
    for detection in detections:
        bbox, text, score = detection

        # Convierte a mayúsculas y elimina espacios en blanco
        text = text.upper().replace(' ', '')
        print("soy " + text)
        # Agrega el texto de la detección actual a la variable combinada
        combined_text += text
        print(combined_text)
        if license_complies_format(combined_text):
            return format_license(combined_text), score

    return None, None




# Función para verificar si un contorno tiene forma rectangular aproximada
def is_approximately_rectangular(contour, epsilon=0.009):
    perimeter = cv2.arcLength(contour, True)
    approx = cv2.approxPolyDP(contour, epsilon * perimeter, True)
    return len(approx) == 4

# Carga del modelo YOLO
model = YOLO('yolov8n.pt')

# Nombre de las distintas clases
classNames = ["person", "bicycle", "car", "motorbike", "aeroplane", "bus", "train", "truck", "boat",
              "traffic light", "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat",
              "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella",
              "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite", "baseball bat",
              "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup",
              "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli",
              "carrot", "hot dog", "pizza", "donut", "cake", "chair", "sofa", "pottedplant", "bed",
              "diningtable", "toilet", "tvmonitor", "laptop", "mouse", "remote", "keyboard", "cell phone",
              "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors",
              "teddy bear", "hair drier", "toothbrush"
              ]

# Carga la imagen que deseas procesar
image_path = 'prueba.jpg'  
img = cv2.imread(image_path)

# Realiza inferencia en la imagen
results = model(img)

# Para cada detección
for r in results:
    boxes = r.boxes

    for box in boxes:
        # Contenedor
        x1, y1, x2, y2 = box.xyxy[0]
        x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)  # Convierte a valores enteros

        # Confianza
        confidence = math.ceil((box.conf[0] * 100)) / 100
        print("Confidence --->", confidence)

        # Clase
        cls = int(box.cls[0])
        print("Class name -->", classNames[cls])

        # Convierte el identificador numérico de la clase en un color RGB
        escala = int((cls / len(classNames)) * 255 * 3)
        if escala >= 255 * 2:
            R = 255
            G = 255
            B = escala - 255 * 2
        else:
            if escala >= 255:
                R = 255
                G = escala - 255
                B = 0
            else:
                R = escala
                G = 0
                B = 0

        # Dibuja el contenedor y la clase
        cv2.rectangle(img, (x1, y1), (x2, y2), (R, G, B), 3)
        cv2.putText(img, classNames[cls], (x1, y1), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, B), 2)

        # Obtén la ROI
        roi = img[y1:y2, x1:x2]

        # Convierte ROI a escala de grises
        gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)

        # Aplica umbral para encontrar contornos
        _, thresh = cv2.threshold(gray, 120, 255, cv2.THRESH_BINARY)

        # Encuentra contornos en la ROI
        contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

        # Umbral de área para considerar como contorno grande (ajusta este valor según tus necesidades)
        area_threshold = 1000

        # Filtra los contornos que tienen forma rectangular aproximada y un área grande
        filtered_contours = [contour for contour in contours if is_approximately_rectangular(contour) and cv2.contourArea(contour) > area_threshold]

        # Dibuja los contornos filtrados en la ROI
        cv2.drawContours(roi, filtered_contours, -1, (0, 255, 0), 2)


# Muestra la imagen con las detecciones y los contornos filtrados
cv2.imshow('Image', img)
cv2.waitKey(0)
cv2.destroyAllWindows()


Using CPU. Note: This module is much faster with a GPU.



0: 256x640 2 cars, 76.4ms
Speed: 2.0ms preprocess, 76.4ms inference, 1.0ms postprocess per image at shape (1, 3, 256, 640)


Confidence ---> 0.94
Class name --> car
Confidence ---> 0.34
Class name --> car


In [7]:
import os
# Directorio base donde se encuentra la carpeta "dataset"
base_dir = ''

# Ruta completa de la carpeta "train" dentro de la estructura
train_dir = os.path.join(base_dir, 'dataset', 'train', 'images')

# Crear una lista para almacenar los nombres de los archivos que cumplan el patrón
file_list = []

# Iterar a través de los archivos en la carpeta "train"
for root, dirs, files in os.walk(train_dir):
    for filename in files:
        if filename.lower().endswith(('.jpg', '.jpeg', '.png', '.gif', '.bmp')):
            # Comprobar si el archivo es una imagen (puedes agregar más extensiones si es necesario)
            file_list.append(os.path.join(root, filename))
print(file_list)
# Imprimir la lista de archivos que cumplen el patrón
for file_path in file_list:
    print(file_path)


['dataset\\train\\images\\0802HFP_jpg.rf.30755e346cf1455361344eeeceb55cb1.jpg', 'dataset\\train\\images\\0802HFP_jpg.rf.30cd9ca231b6667068ad26b808dd99da.jpg', 'dataset\\train\\images\\0802HFP_jpg.rf.ba381edbab2dcddf24f9739d99ae26d4.jpg', 'dataset\\train\\images\\1159FPG_jpg.rf.165f7f6df48b376a9863eab1befe71bf.jpg', 'dataset\\train\\images\\1159FPG_jpg.rf.3ff8b432d3fb4ab2537c7f6ec80ce560.jpg', 'dataset\\train\\images\\1159FPG_jpg.rf.8ac0d77c1a7d6f3b8f208fe9ec6f887f.jpg', 'dataset\\train\\images\\1319FSX_jpg.rf.42cac49225e519a2c8e5d1ef1bc1a9df.jpg', 'dataset\\train\\images\\1319FSX_jpg.rf.8aa60298e9d1b1a0ec29e0189c61bf40.jpg', 'dataset\\train\\images\\1319FSX_jpg.rf.f37800d74affae5e5dc10a94693dbfbf.jpg', 'dataset\\train\\images\\15989862207427_jpg.rf.8f731f82cf1639282dc1a5c81fd7d483.jpg', 'dataset\\train\\images\\15989862207427_jpg.rf.8ff0c28508f614a0427c9a7bf48136db.jpg', 'dataset\\train\\images\\15989862207427_jpg.rf.d4400cbe860cbdc49c9f7a7ac785a91c.jpg', 'dataset\\train\\images\\16370

Prueba con dataset contornos buena


In [19]:
import cv2
import math
from ultralytics import YOLO  # Asegúrate de importar el módulo YOLO correcto
import string
import easyocr
import numpy as np
import os

# Directorio base donde se encuentra la carpeta "dataset"
base_dir = ''

# Ruta completa de la carpeta "train" dentro de la estructura
train_dir = os.path.join(base_dir, 'dataset_2', 'valid', 'images')

# Crear una lista para almacenar los nombres de los archivos de imágenes
image_files = []

# Iterar a través de los archivos en la carpeta "train"
for root, dirs, files in os.walk(train_dir):
    for filename in files:
        if filename.lower().endswith(('.jpg', '.jpeg', '.png', '.gif', '.bmp')):
            # Comprobar si el archivo es una imagen (puedes agregar más extensiones si es necesario)
            image_files.append(os.path.join(root, filename))

# Carga del modelo YOLO
model = YOLO('yolov8n.pt')

# Nombre de las distintas clases
classNames = ["person", "bicycle", "car", "motorbike", "aeroplane", "bus", "train", "truck", "boat",
              "traffic light", "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat",
              "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella",
              "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite", "baseball bat",
              "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup",
              "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli",
              "carrot", "hot dog", "pizza", "donut", "cake", "chair", "sofa", "pottedplant", "bed",
              "diningtable", "toilet", "tvmonitor", "laptop", "mouse", "remote", "keyboard", "cell phone",
              "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors",
              "teddy bear", "hair drier", "toothbrush"
              ]
# Función para verificar si un contorno tiene forma rectangular aproximada
def is_approximately_rectangular(contour, epsilon=0.009):
    perimeter = cv2.arcLength(contour, True)
    approx = cv2.approxPolyDP(contour, epsilon * perimeter, True)
    return len(approx) == 4
# Initialize the OCR reader
reader = easyocr.Reader(['en'], gpu=False)

# Mapping dictionaries for character conversion
dict_char_to_int = {'O': '0',
                    'I': '1',
                    'J': '3',
                    'A': '4',
                    'G': '6',
                    'S': '5',
                    'B': '3'}

dict_int_to_char = {'0': 'O',
                    '1': 'I',
                    '3': 'J',
                    '4': 'A',
                    '6': 'G',
                    '5': 'S'}

def license_complies_format(text):
    """
    Check if the license plate text complies with the required format.

    Args:
        text (str): License plate text.

    Returns:
        bool: True if the license plate complies with the format, False otherwise.
    """
    if len(text) != 7:
        return False

    if (text[0] in ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'] or text[0] in dict_char_to_int.keys()) and \
       (text[1] in ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'] or text[1] in dict_char_to_int.keys()) and \
       (text[2] in ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'] or text[2] in dict_char_to_int.keys()) and \
       (text[3] in ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'] or text[3] in dict_char_to_int.keys()) and \
       (text[4] in string.ascii_uppercase or text[4] in dict_int_to_char.keys()) and \
       (text[5] in string.ascii_uppercase or text[5] in dict_int_to_char.keys()) and \
       (text[6] in string.ascii_uppercase or text[6] in dict_int_to_char.keys()):
        return True
    else:
        return False


def format_license(text):
    """
    Format the license plate text by converting characters using the mapping dictionaries.

    Args:
        text (str): License plate text.

    Returns:
        str: Formatted license plate text.
    """
    license_plate_ = ''
    mapping = {0: dict_char_to_int, 1: dict_char_to_int, 2: dict_char_to_int, 3: dict_char_to_int,
               4: dict_int_to_char, 5: dict_int_to_char, 6: dict_int_to_char}
    for j in [0, 1, 2, 3, 4, 5, 6]:
        if text[j] in mapping[j].keys():
            license_plate_ += mapping[j][text[j]]
        else:
            license_plate_ += text[j]

    return license_plate_


def read_license_plate(license_plate_crop):
    """
    Read the license plate text from the given cropped image.

    Args:
        license_plate_crop (PIL.Image.Image): Cropped image containing the license plate.

    Returns:
        tuple: Tuple containing the formatted license plate text and its confidence score.
    """

    detections = reader.readtext(license_plate_crop)
    # Inicializa una variable para almacenar el texto combinado de todas las detecciones
    combined_text = ""

    # Itera a través de las detecciones
    for detection in detections:
        bbox, text, score = detection

        # Convierte a mayúsculas y elimina espacios en blanco
        text = text.upper().replace(' ', '')
        print("soy " + text)
        # Agrega el texto de la detección actual a la variable combinada
        combined_text += text
        print(combined_text)
        if license_complies_format(combined_text):
            return format_license(combined_text), score

    return None, None


# Iterar a través de la lista de archivos de imágenes
for image_path in image_files:
    # Carga la imagen que deseas procesar
    img = cv2.imread(image_path)

    # Realiza inferencia en la imagen
    results = model(img)

    # Para cada detección
    for r in results:
        boxes = r.boxes

        for box in boxes:
            # Contenedor
            x1, y1, x2, y2 = box.xyxy[0]
            x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)  # Convierte a valores enteros

            # Confianza
            confidence = math.ceil((box.conf[0] * 100)) / 100
            print("Confidence --->", confidence)

            # Clase
            cls = int(box.cls[0])
            print("Class name -->", classNames[cls])

            # Convierte el identificador numérico de la clase en un color RGB
            escala = int((cls / len(classNames)) * 255 * 3)
            if escala >= 255 * 2:
                R = 255
                G = 255
                B = escala - 255 * 2
            else:
                if escala >= 255:
                    R = 255
                    G = escala - 255
                    B = 0
                else:
                    R = escala
                    G = 0
                    B = 0

            # Dibuja el contenedor y la clase
            cv2.rectangle(img, (x1, y1), (x2, y2), (R, G, B), 3)
            cv2.putText(img, classNames[cls], (x1, y1), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, B), 2)

            # Obtén la ROI
            roi = img[y1:y2, x1:x2]

            # Convierte ROI a escala de grises
            gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)

            # Aplica umbral para encontrar contornos
            _, thresh = cv2.threshold(gray, 123, 255, cv2.THRESH_BINARY)

            # Encuentra contornos en la ROI
            contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
            # Crear una imagen en blanco del mismo tamaño que la ROI
            contour_image = np.zeros_like(roi)

            # Dibujar los contornos en la imagen en blanco
            cv2.drawContours(contour_image, contours, -1, (255, 255, 255), 2)

            # Mostrar la imagen con los contornos
            cv2.imshow('License Plate Contours', contour_image)
            cv2.waitKey(0)
            cv2.destroyAllWindows()
            # Umbral de área para considerar como contorno grande (ajusta este valor según tus necesidades)
            area_threshold = 1000

            # Filtra los contornos que tienen forma rectangular aproximada y un área grande
            filtered_contours = [contour for contour in contours if is_approximately_rectangular(contour) and cv2.contourArea(contour) > area_threshold]


            if len(filtered_contours )!= 0:
                # Encuentra las coordenadas del rectángulo que encapsula el primer contorno filtrado
                x, y, w, h = cv2.boundingRect(filtered_contours[0])
                # Utiliza las coordenadas para extraer la región de interés (ROI)
                license_plate_crop = roi[y:y + h, x:x + w]
                # read license plate number
                license_plate_text, license_plate_text_score = read_license_plate(license_plate_crop)
                            # Muestra la última matrícula detectada incluso si no hay una nueva detección
                # Después de procesar la imagen y obtener la ROI
                print(license_plate_text)
                cv2.imshow('License Plate Crop', license_plate_crop)
                cv2.waitKey(0)
                cv2.destroyAllWindows()

                if license_plate_text is not None:
                    cv2.putText(roi, license_plate_text, (x, y - 20),
                                cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
            # Dibuja los contornos filtrados en la ROI
            cv2.drawContours(roi, filtered_contours, -1, (0, 255, 0), 2)

            # Verifica si las coordenadas están dentro de los límites de la imagen
            if x1 >= 0 and y1 >= 0 and x2 <= roi.shape[1] and y2 <= roi.shape[0]:
                # Extrae la ROI
                license_plate_crop = roi[y1:y2, x1:x2]
                # Muestra la ROI antes de la conversión a escala de grises
                cv2.imshow('ROI', license_plate_crop)
                cv2.waitKey(0)
                # Verifica las dimensiones de la ROI
                print("Dimensiones de la ROI:", license_plate_crop.shape)

                # Verifica si la ROI no está vacía
                if not license_plate_crop.empty():
                    # Convierte la ROI a escala de grises
                    gray_plate = cv2.cvtColor(license_plate_crop, cv2.COLOR_BGR2GRAY)

                    # Verifica las dimensiones después de la conversión a escala de grises
                    print("Dimensiones después de la conversión a escala de grises:", gray_plate.shape)
                else:
                    print("La ROI está vacía.")
            else:
                print("Coordenadas fuera de los límites de la imagen.")
          

    # Muestra la imagen con las detecciones y los contornos filtrados
    cv2.imshow('Image', img)
    cv2.waitKey(0)

# Cierra todas las ventanas de visualización
cv2.destroyAllWindows()


Using CPU. Note: This module is much faster with a GPU.

0: 640x640 2 cars, 197.2ms
Speed: 3.0ms preprocess, 197.2ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)


Confidence ---> 0.93
Class name --> car
Coordenadas fuera de los límites de la imagen.
Confidence ---> 0.88
Class name --> car
Coordenadas fuera de los límites de la imagen.



0: 640x640 1 bus, 173.9ms
Speed: 6.5ms preprocess, 173.9ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)


Confidence ---> 0.34
Class name --> bus
Coordenadas fuera de los límites de la imagen.



0: 640x640 1 car, 2 trucks, 205.4ms
Speed: 6.1ms preprocess, 205.4ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)


Confidence ---> 0.67
Class name --> truck
Coordenadas fuera de los límites de la imagen.
Confidence ---> 0.44
Class name --> truck
Coordenadas fuera de los límites de la imagen.
Confidence ---> 0.29
Class name --> car
Coordenadas fuera de los límites de la imagen.



0: 640x640 1 car, 145.7ms
Speed: 4.0ms preprocess, 145.7ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)


Confidence ---> 0.85
Class name --> car
Coordenadas fuera de los límites de la imagen.



0: 640x640 2 cars, 149.0ms
Speed: 4.9ms preprocess, 149.0ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)


Confidence ---> 0.83
Class name --> car
Coordenadas fuera de los límites de la imagen.
Confidence ---> 0.59
Class name --> car
soy 0063
0063
soy ASJ
0063ASJ
0063ASJ
Coordenadas fuera de los límites de la imagen.



0: 640x640 2 cars, 209.3ms
Speed: 6.0ms preprocess, 209.3ms inference, 4.5ms postprocess per image at shape (1, 3, 640, 640)


Confidence ---> 0.7
Class name --> car
Coordenadas fuera de los límites de la imagen.
Confidence ---> 0.49
Class name --> car
Coordenadas fuera de los límites de la imagen.



0: 640x640 1 car, 161.7ms
Speed: 5.6ms preprocess, 161.7ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)


Confidence ---> 0.71
Class name --> car
Coordenadas fuera de los límites de la imagen.



0: 640x640 1 car, 144.5ms
Speed: 5.0ms preprocess, 144.5ms inference, 1.1ms postprocess per image at shape (1, 3, 640, 640)


Confidence ---> 0.76
Class name --> car
Coordenadas fuera de los límites de la imagen.



0: 640x640 2 cars, 1 truck, 133.4ms
Speed: 6.0ms preprocess, 133.4ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)


Confidence ---> 0.7
Class name --> car
soy 3587
3587
soy KSG
3587KSG
3587KSG
Coordenadas fuera de los límites de la imagen.
Confidence ---> 0.38
Class name --> car
Coordenadas fuera de los límites de la imagen.
Confidence ---> 0.34
Class name --> truck
Coordenadas fuera de los límites de la imagen.



0: 640x640 1 car, 150.9ms
Speed: 5.9ms preprocess, 150.9ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)


Confidence ---> 0.83
Class name --> car
soy 1805
1805
soy GTP
1805GTP
1805GTP
Coordenadas fuera de los límites de la imagen.



0: 640x640 2 persons, 2 cars, 140.4ms
Speed: 6.1ms preprocess, 140.4ms inference, 4.0ms postprocess per image at shape (1, 3, 640, 640)


Confidence ---> 0.92
Class name --> car
Coordenadas fuera de los límites de la imagen.
Confidence ---> 0.83
Class name --> person
Coordenadas fuera de los límites de la imagen.
Confidence ---> 0.83
Class name --> car
Coordenadas fuera de los límites de la imagen.
Confidence ---> 0.44
Class name --> person
Coordenadas fuera de los límites de la imagen.



0: 640x640 3 cars, 199.3ms
Speed: 4.6ms preprocess, 199.3ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)


Confidence ---> 0.9
Class name --> car
Coordenadas fuera de los límites de la imagen.
Confidence ---> 0.8
Class name --> car
Coordenadas fuera de los límites de la imagen.
Confidence ---> 0.65
Class name --> car
Coordenadas fuera de los límites de la imagen.



0: 640x640 3 cars, 159.7ms
Speed: 5.0ms preprocess, 159.7ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)


Confidence ---> 0.88
Class name --> car
Coordenadas fuera de los límites de la imagen.
Confidence ---> 0.74
Class name --> car
Coordenadas fuera de los límites de la imagen.
Confidence ---> 0.71
Class name --> car
Coordenadas fuera de los límites de la imagen.



0: 640x640 2 cars, 1 bus, 1 stop sign, 177.3ms
Speed: 4.9ms preprocess, 177.3ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)


Confidence ---> 0.84
Class name --> car
Coordenadas fuera de los límites de la imagen.
Confidence ---> 0.78
Class name --> bus
Coordenadas fuera de los límites de la imagen.
Confidence ---> 0.45
Class name --> car
Coordenadas fuera de los límites de la imagen.
Confidence ---> 0.32
Class name --> stop sign
Coordenadas fuera de los límites de la imagen.



0: 640x640 1 person, 6 cars, 141.5ms
Speed: 5.0ms preprocess, 141.5ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)


Confidence ---> 0.92
Class name --> car
soy 0182GKT
0182GKT
0182GKT
Coordenadas fuera de los límites de la imagen.
Confidence ---> 0.83
Class name --> car
Coordenadas fuera de los límites de la imagen.
Confidence ---> 0.82
Class name --> person
Coordenadas fuera de los límites de la imagen.
Confidence ---> 0.64
Class name --> car
Coordenadas fuera de los límites de la imagen.
Confidence ---> 0.32
Class name --> car
Coordenadas fuera de los límites de la imagen.
Confidence ---> 0.32
Class name --> car
Coordenadas fuera de los límites de la imagen.
Confidence ---> 0.28
Class name --> car
Coordenadas fuera de los límites de la imagen.



0: 256x640 2 cars, 76.1ms
Speed: 3.1ms preprocess, 76.1ms inference, 1.0ms postprocess per image at shape (1, 3, 256, 640)


Confidence ---> 0.94
Class name --> car
soy B483FPK
B483FPK
3483FPK
Coordenadas fuera de los límites de la imagen.
Confidence ---> 0.34
Class name --> car
Coordenadas fuera de los límites de la imagen.


In [15]:
from ultralytics import YOLO

# Load a model
model = YOLO("yolov8n.yaml")  # build a new model from scratch

# Use the model
results = model.train(data="config.yaml", epochs=100, patience=15)  # train the model



                   from  n    params  module                                       arguments                     
  0                  -1  1       464  ultralytics.nn.modules.conv.Conv             [3, 16, 3, 2]                 
  1                  -1  1      4672  ultralytics.nn.modules.conv.Conv             [16, 32, 3, 2]                
  2                  -1  1      7360  ultralytics.nn.modules.block.C2f             [32, 32, 1, True]             
  3                  -1  1     18560  ultralytics.nn.modules.conv.Conv             [32, 64, 3, 2]                
  4                  -1  2     49664  ultralytics.nn.modules.block.C2f             [64, 64, 2, True]             
  5                  -1  1     73984  ultralytics.nn.modules.conv.Conv             [64, 128, 3, 2]               
  6                  -1  2    197632  ultralytics.nn.modules.block.C2f             [128, 128, 2, True]           
  7                  -1  1    295424  ultralytics.nn.modules.conv.Conv             [128

Modelo YOLO sin OCR -> Bueno

In [18]:
import cv2
import math
import easyocr
from ultralytics import YOLO

# Carga de los modelos
# Carga del modelo YOLO
car_model = YOLO('yolov8n.pt')

license_plate_model = YOLO('best.pt')

# Captura de video desde un archivo
cap = cv2.VideoCapture('prueba.mp4')  # Reemplaza 'tu_video.mp4' con el nombre de tu archivo de video

# Inicializa el lector de OCR de EasyOCR
reader = easyocr.Reader(lang_list=['en'])  # Ajusta los idiomas según tus necesidades

while True:
    ret, frame = cap.read()

    if not ret:
        break

    # Realiza detección de vehículos en el frame
    car_results = license_plate_model(frame, stream=True)

    for r in car_results:
        car_boxes = r.boxes

        for car_box in car_boxes:
            x1, y1, x2, y2 = car_box.xyxy[0]
            x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)

            # Dibuja el bounding box del vehículo
            cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 3)

            # Realiza detección de matrículas en el área del vehículo
            license_plate_crop = frame[y1:y2, x1:x2]
            license_plate_results = license_plate_model(license_plate_crop, stream=True)

            for lp_result in license_plate_results:
                lp_boxes = lp_result.boxes

                for lp_box in lp_boxes:
                    x1_lp, y1_lp, x2_lp, y2_lp = lp_box.xyxy[0]
                    x1_lp, y1_lp, x2_lp, y2_lp = int(x1_lp), int(y1_lp), int(x2_lp), int(y2_lp)

                    # Dibuja el bounding box de la matrícula
                    cv2.rectangle(frame, (x1 + x1_lp, y1 + y1_lp), (x1 + x2_lp, y1 + y2_lp), (0, 0, 255), 1)

                    # Realiza OCR en la matrícula con EasyOCR
                    license_plate_crop = frame[y1 + y1_lp:y1 + y2_lp, x1 + x1_lp:x1 + x2_lp]
                    gray_plate = cv2.cvtColor(license_plate_crop, cv2.COLOR_BGR2GRAY)
                    results = reader.readtext(gray_plate)

                    if results:
                        license_plate_text = results[0][1]

                        # Muestra el texto de la matrícula en la ventana
                        cv2.putText(frame, license_plate_text, (x1 + x1_lp, y1 + y1_lp - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2)

    # Muestra el frame con las detecciones en una ventana
    cv2.imshow('Video con Detecciones', frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()


Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


0: 128x640 (no detections), 107.1ms
Speed: 0.9ms preprocess, 107.1ms inference, 0.0ms postprocess per image at shape (1, 3, 128, 640)
0: 384x640 1 matricula, 107.1ms
Speed: 5.2ms preprocess, 107.1ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 92.9ms
Speed: 4.0ms preprocess, 92.9ms inference, 1.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 89.3ms
Speed: 1.3ms preprocess, 89.3ms inference, 1.7ms postprocess per image at shape (1, 3, 384, 640)


0: 128x640 (no detections), 42.1ms
Speed: 1.0ms preprocess, 42.1ms inference, 0.4ms postprocess per image at shape (1, 3, 128, 640)
0: 384x640 1 matricula, 78.1ms
Speed: 3.0ms preprocess, 78.1ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 80.8ms
Speed: 2.0ms preprocess, 80.8ms inference, 1.0ms postprocess per imag

In [2]:
import cv2
import math
import easyocr
from ultralytics import YOLO

In [17]:
# Carga del modelo
model = YOLO('best.pt')

# Nombre de las distintas clases
classNames = ["matricula"]

# Lee la imagen desde un archivo
img = cv2.imread('mssulove.png')

# Perform inference on the image
results = model(img) 
print(model(img))
# Para cada detección
for r in results:
    boxes = r.boxes

    for box in boxes:
        # Contenedor
        x1, y1, x2, y2 = box.xyxy[0]
        x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2) # convert to int values

        # Confianza
        confidence = math.ceil((box.conf[0]*100))/100
        print("Confidence --->",confidence)

        # Clase
        cls = int(box.cls[0])
        print("Class name -->", classNames[cls])

        # Convierte identificador numérico de clase a un color RGB
        escala = int((cls / len(classNames)) * 255 * 3)
        if escala >= 255*2:
            R = 255
            G = 255
            B = escala - 255*2
        else:
            if escala >= 255:
                R = 255
                G = escala - 255
                B = 0
            else:
                R = escala
                G = 0
                B = 0

        # Dibuja el contenedor y clase
        cv2.rectangle(img, (x1, y1), (x2, y2), (R, G, B), 1)
        cv2.putText(img, classNames[cls] , [x1, y1], cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, B), 2)

# Muestra la imagen con las detecciones
cv2.imshow('Imagen con Detecciones', img)
cv2.waitKey(0)

# Destruye la ventana
cv2.destroyAllWindows()



0: 256x640 1 matricula, 62.5ms
Speed: 4.0ms preprocess, 62.5ms inference, 0.0ms postprocess per image at shape (1, 3, 256, 640)

0: 256x640 1 matricula, 58.9ms
Speed: 2.1ms preprocess, 58.9ms inference, 1.0ms postprocess per image at shape (1, 3, 256, 640)


[ultralytics.engine.results.Results object with attributes:

boxes: ultralytics.engine.results.Boxes object
keypoints: None
masks: None
names: {0: 'matricula'}
orig_img: array([[[153, 147, 147],
        [177, 170, 170],
        [192, 185, 185],
        ...,
        [ 58,  56,  54],
        [255, 255, 255],
        [255, 255, 255]],

       [[150, 145, 144],
        [160, 153, 153],
        [211, 204, 204],
        ...,
        [ 72,  69,  66],
        [255, 255, 255],
        [255, 255, 255]],

       [[154, 149, 148],
        [137, 130, 130],
        [187, 180, 180],
        ...,
        [133, 130, 127],
        [255, 255, 255],
        [255, 255, 255]],

       ...,

       [[ 29,  28,  25],
        [ 27,  26,  23],
        [ 26,  25,  22],
        ...,
        [194, 192, 191],
        [255, 255, 255],
        [255, 255, 255]],

       [[ 25,  24,  21],
        [ 25,  24,  21],
        [ 25,  24,  21],
        ...,
        [195, 193, 192],
        [255, 255, 255],
        [255, 255, 

Dta prueba
tiempor real bueno de verdad


In [16]:
import cv2
import math
import easyocr
from ultralytics import YOLO
import string
# Mapping dictionaries for character conversion
dict_char_to_int = {'O': '0',
                    'I': '1',
                    'J': '3',
                    'A': '4',
                    'G': '6',
                    'S': '5'}

dict_int_to_char = {'0': 'O',
                    '1': 'I',
                    '3': 'J',
                    '4': 'A',
                    '6': 'G',
                    '5': 'S'}


def license_complies_format(text):
    """
    Check if the license plate text complies with the required format.

    Args:
        text (str): License plate text.

    Returns:
        bool: True if the license plate complies with the format, False otherwise.
    """
    if len(text) != 7:
        return False

    if (text[0] in ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'] or text[0] in dict_char_to_int.keys()) and \
       (text[1] in ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'] or text[1] in dict_char_to_int.keys()) and \
       (text[2] in ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'] or text[2] in dict_char_to_int.keys()) and \
       (text[3] in ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'] or text[3] in dict_char_to_int.keys()) and \
       (text[4] in string.ascii_uppercase or text[4] in dict_int_to_char.keys()) and \
       (text[5] in string.ascii_uppercase or text[5] in dict_int_to_char.keys()) and \
       (text[6] in string.ascii_uppercase or text[6] in dict_int_to_char.keys()):
        return True
    else:
        return False


def format_license(text):
    """
    Format the license plate text by converting characters using the mapping dictionaries.

    Args:
        text (str): License plate text.

    Returns:
        str: Formatted license plate text.
    """
    license_plate_ = ''
    mapping = {0: dict_int_to_char, 1: dict_int_to_char, 4: dict_int_to_char, 5: dict_int_to_char, 6: dict_int_to_char,
               2: dict_char_to_int, 3: dict_char_to_int}
    for j in [0, 1, 2, 3, 4, 5, 6]:
        if text[j] in mapping[j].keys():
            license_plate_ += mapping[j][text[j]]
        else:
            license_plate_ += text[j]

    return license_plate_


def read_license_plate(license_plate_crop):
    """
    Read the license plate text from the given cropped image.

    Args:
        license_plate_crop (PIL.Image.Image): Cropped image containing the license plate.

    Returns:
        tuple: Tuple containing the formatted license plate text and its confidence score.
    """

    detections = reader.readtext(license_plate_crop)

    for detection in detections:
        bbox, text, score = detection

        text = text.upper().replace(' ', '')

        if license_complies_format(text):
            return format_license(text), score

    return None, None


# Carga de los modelos
# Carga del modelo YOLO
car_model = YOLO('yolov8n.pt')

license_plate_model = YOLO('best.pt')

# Captura de video desde un archivo
cap = cv2.VideoCapture('prueba.mp4')  # Reemplaza 'tu_video.mp4' con el nombre de tu archivo de video
# Variable para almacenar la última matrícula detectada y su puntaje
last_license_plate = None
# Inicializa el lector de OCR de EasyOCR
reader = easyocr.Reader(lang_list=['en'])  # Ajusta los idiomas según tus necesidades

while True:
    ret, frame = cap.read()

    if not ret:
        break

    # Realiza detección de vehículos en el frame
    car_results = license_plate_model(frame, stream=True)

    for r in car_results:
        car_boxes = r.boxes

        for car_box in car_boxes:
            x1, y1, x2, y2 = car_box.xyxy[0]
            x1, y1, x2, y2 = int(x1-10), int(y1-10), int(x2+10), int(y2+10)
            # Asegurarse de que las coordenadas estén dentro de los límites válidos
            if x1 < 0:
                x1 = 0
            if y1 < 0:
                y1 = 0
            if x2 > frame.shape[1]:
                x2 = frame.shape[1]
            if y2 > frame.shape[0]:
                y2 = frame.shape[0]
            # Dibuja el bounding box del vehículo
            cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)

            # Realiza OCR en la matrícula con EasyOCR
            license_plate_crop = frame[y1:y2, x1:x2]
            gray_plate = cv2.cvtColor(license_plate_crop, cv2.COLOR_BGR2GRAY)

            # read license plate number
            license_plate_text, license_plate_text_score = read_license_plate(gray_plate)
            # Verifica si la nueva detección es lo suficientemente confiable
            if license_plate_text_score is not None and license_plate_text_score > 0.5:
                last_license_plate = license_plate_text
                last_license_plate_score = license_plate_text_score

            # Muestra la última matrícula detectada incluso si no hay una nueva detección
            if last_license_plate is not None:
                cv2.putText(frame, last_license_plate, (x1-10, y1 - 10),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2)

    # Muestra el frame con las detecciones en una ventana
    cv2.imshow('Video con Detecciones', frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()


Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.

0: 384x640 1 matricula, 207.0ms
Speed: 10.0ms preprocess, 207.0ms inference, 9.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 170.8ms
Speed: 5.5ms preprocess, 170.8ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 95.6ms
Speed: 3.0ms preprocess, 95.6ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 matricula, 74.3ms
Speed: 2.5ms preprocess, 74.3ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 81.5ms
Speed: 1.0ms preprocess, 81.5ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 72.8ms
Speed: 1.0ms preprocess, 72.8ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 matricula, 72.1ms
Speed: 1.0ms preprocess, 72.1ms inference, 1.1ms postprocess per image a

Prueba Eric OCR 1

In [15]:
# Carga del modelo
model = YOLO('best.pt')

# Nombre de las distintas clases
classNames = ["matricula"]

# Lee la imagen desde un archivo
img = cv2.imread('mssulove.png')


reader = easyocr.Reader(['en'])
# Perform inference on the image
results = model(img) 
print(model(img))
# Para cada detección
for r in results:
    boxes = r.boxes

    for box in boxes:
        # Contenedor
        x1, y1, x2, y2 = box.xyxy[0]
        x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2) # convert to int values

        # Confianza
        confidence = math.ceil((box.conf[0]*100))/100
        print("Confidence --->",confidence)

        # Clase
        cls = int(box.cls[0])
        print("Class name -->", classNames[cls])

        # Convierte identificador numérico de clase a un color RGB
        escala = int((cls / len(classNames)) * 255 * 3)
        if escala >= 255*2:
            R = 255
            G = 255
            B = escala - 255*2
        else:
            if escala >= 255:
                R = 255
                G = escala - 255
                B = 0
            else:
                R = escala
                G = 0
                B = 0
        # Dibuja el contenedor y clase
        cv2.rectangle(img, (x1+7, y1), (x2+7, y2), (R, G, B), 2)

        # Realiza OCR en la matrícula con EasyOCR
        license_plate_crop = img[y1-5:y2+5, x1+10:x2+10]
        # Redimensiona la región de la matrícula a un tamaño deseado
        new_width = 250  # Define el ancho deseado
        new_height = 150  # Define la altura deseada
        license_plate_crop_resized = cv2.resize(license_plate_crop, (new_width, new_height))

        # Procesa la región de la matrícula redimensionada
        license_plate_crop_gray = cv2.cvtColor(license_plate_crop_resized, cv2.COLOR_BGR2GRAY)

        # Ajusta los parámetros de umbralización adaptativa para hacer las letras negras más claras
        block_size = 21  # Tamaño del bloque para la umbralización adaptativa (debe ser impar)
        C = 3  # Constante que se resta del valor promedio (puede ajustarse)

        license_plate_crop_thresh = cv2.adaptiveThreshold(license_plate_crop_gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, block_size, C)

        # Realiza OCR en la región de la matrícula redimensionada
        results = reader.readtext(license_plate_crop_thresh)

         # Verifica license_plate_crop
        
        cv2.imshow('License Plate Crop', license_plate_crop_thresh)
        print("resultados:")
        print(results)
        print("----------------------")

        # Para cada resultado de OCR
        for (bbox, text, prob) in results:
            # Dibuja el texto reconocido
            (top_left, top_right, bottom_right, bottom_left) = bbox
            cv2.rectangle(license_plate_crop, (x1 + int(top_left[0]), y1 + int(top_left[1])),
                        (x1 + int(bottom_right[0]), y1 + int(bottom_right[1])), (0, 0, 255), 2)
            cv2.putText(img, text, (x1 + int(top_left[0]), y1 + int(top_left[1]) - 10),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2)


# Muestra la imagen con las detecciones
cv2.imshow('Imagen con Detecciones', img)
cv2.waitKey(0)

# Destruye la ventana
cv2.destroyAllWindows()


Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.

0: 256x640 1 matricula, 989.4ms
Speed: 124.0ms preprocess, 989.4ms inference, 18.0ms postprocess per image at shape (1, 3, 256, 640)

0: 256x640 1 matricula, 95.3ms
Speed: 1.0ms preprocess, 95.3ms inference, 1.0ms postprocess per image at shape (1, 3, 256, 640)


[ultralytics.engine.results.Results object with attributes:

boxes: ultralytics.engine.results.Boxes object
keypoints: None
masks: None
names: {0: 'matricula'}
orig_img: array([[[153, 147, 147],
        [177, 170, 170],
        [192, 185, 185],
        ...,
        [ 58,  56,  54],
        [255, 255, 255],
        [255, 255, 255]],

       [[150, 145, 144],
        [160, 153, 153],
        [211, 204, 204],
        ...,
        [ 72,  69,  66],
        [255, 255, 255],
        [255, 255, 255]],

       [[154, 149, 148],
        [137, 130, 130],
        [187, 180, 180],
        ...,
        [133, 130, 127],
        [255, 255, 255],
        [255, 255, 255]],

       ...,

       [[ 29,  28,  25],
        [ 27,  26,  23],
        [ 26,  25,  22],
        ...,
        [194, 192, 191],
        [255, 255, 255],
        [255, 255, 255]],

       [[ 25,  24,  21],
        [ 25,  24,  21],
        [ 25,  24,  21],
        ...,
        [195, 193, 192],
        [255, 255, 255],
        [255, 255, 

PRUEBA OCR 3

Prueba Eric csv

Hay varias partes.

In [53]:
from ultralytics import YOLO
import cv2

from sort.sort import *
import string
import easyocr

# Initialize the OCR reader
reader = easyocr.Reader(['en'], gpu=False)

# Mapping dictionaries for character conversion
dict_char_to_int = {'O': '0',
                    'I': '1',
                    'J': '3',
                    'A': '4',
                    'G': '6',
                    'S': '5'}

dict_int_to_char = {'0': 'O',
                    '1': 'I',
                    '3': 'J',
                    '4': 'A',
                    '6': 'G',
                    '5': 'S'}


def write_csv(results, output_path):
    """
    Write the results to a CSV file.

    Args:
        results (dict): Dictionary containing the results.
        output_path (str): Path to the output CSV file.
    """
    with open(output_path, 'w') as f:
        f.write('{},{},{},{},{},{},{}\n'.format('frame_nmr', 'car_id', 'car_bbox',
                                                'license_plate_bbox', 'license_plate_bbox_score', 'license_number',
                                                'license_number_score'))

        for frame_nmr in results.keys():
            for car_id in results[frame_nmr].keys():
                print(results[frame_nmr][car_id])
                if 'car' in results[frame_nmr][car_id].keys() and \
                   'license_plate' in results[frame_nmr][car_id].keys() and \
                   'text' in results[frame_nmr][car_id]['license_plate'].keys():
                    f.write('{},{},{},{},{},{},{}\n'.format(frame_nmr,
                                                            car_id,
                                                            '[{} {} {} {}]'.format(
                                                                results[frame_nmr][car_id]['car']['bbox'][0],
                                                                results[frame_nmr][car_id]['car']['bbox'][1],
                                                                results[frame_nmr][car_id]['car']['bbox'][2],
                                                                results[frame_nmr][car_id]['car']['bbox'][3]),
                                                            '[{} {} {} {}]'.format(
                                                                results[frame_nmr][car_id]['license_plate']['bbox'][0],
                                                                results[frame_nmr][car_id]['license_plate']['bbox'][1],
                                                                results[frame_nmr][car_id]['license_plate']['bbox'][2],
                                                                results[frame_nmr][car_id]['license_plate']['bbox'][3]),
                                                            results[frame_nmr][car_id]['license_plate']['bbox_score'],
                                                            results[frame_nmr][car_id]['license_plate']['text'],
                                                            results[frame_nmr][car_id]['license_plate']['text_score'])
                            )
        f.close()


def license_complies_format(text):
    """
    Check if the license plate text complies with the required format.

    Args:
        text (str): License plate text.

    Returns:
        bool: True if the license plate complies with the format, False otherwise.
    """
    if len(text) != 7:
        return False

    if (text[0] in ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'] or text[0] in dict_char_to_int.keys()) and \
       (text[1] in ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'] or text[1] in dict_char_to_int.keys()) and \
       (text[2] in ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'] or text[2] in dict_char_to_int.keys()) and \
       (text[3] in ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'] or text[3] in dict_char_to_int.keys()) and \
       (text[4] in string.ascii_uppercase or text[4] in dict_int_to_char.keys()) and \
       (text[5] in string.ascii_uppercase or text[5] in dict_int_to_char.keys()) and \
       (text[6] in string.ascii_uppercase or text[6] in dict_int_to_char.keys()):
        return True
    else:
        return False


def format_license(text):
    """
    Format the license plate text by converting characters using the mapping dictionaries.

    Args:
        text (str): License plate text.

    Returns:
        str: Formatted license plate text.
    """
    license_plate_ = ''
    mapping = {0: dict_char_to_int, 1: dict_char_to_int, 2: dict_char_to_int, 3: dict_char_to_int,
               4: dict_int_to_char, 5: dict_int_to_char, 6: dict_int_to_char}
    for j in [0, 1, 2, 3, 4, 5, 6]:
        if text[j] in mapping[j].keys():
            license_plate_ += mapping[j][text[j]]
        else:
            license_plate_ += text[j]

    return license_plate_


def read_license_plate(license_plate_crop):
    """
    Read the license plate text from the given cropped image.

    Args:
        license_plate_crop (PIL.Image.Image): Cropped image containing the license plate.

    Returns:
        tuple: Tuple containing the formatted license plate text and its confidence score.
    """

    detections = reader.readtext(license_plate_crop)

    for detection in detections:
        bbox, text, score = detection

        text = text.upper().replace(' ', '')

        if license_complies_format(text):
            return format_license(text), score

    return None, None


def get_car(license_plate, vehicle_track_ids):
    """
    Retrieve the vehicle coordinates and ID based on the license plate coordinates.

    Args:
        license_plate (tuple): Tuple containing the coordinates of the license plate (x1, y1, x2, y2, score, class_id).
        vehicle_track_ids (list): List of vehicle track IDs and their corresponding coordinates.

    Returns:
        tuple: Tuple containing the vehicle coordinates (x1, y1, x2, y2) and ID.
    """
    x1, y1, x2, y2, score, class_id = license_plate

    foundIt = False
    for j in range(len(vehicle_track_ids)):
        xcar1, ycar1, xcar2, ycar2, car_id = vehicle_track_ids[j]

        if x1 > xcar1 and y1 > ycar1 and x2 < xcar2 and y2 < ycar2:
            car_indx = j
            foundIt = True
            break

    if foundIt:
        return vehicle_track_ids[car_indx]

    return -1, -1, -1, -1, -1

results = {}

mot_tracker = Sort()

# load models
coco_model = YOLO('yolov8n.pt')
license_plate_detector = YOLO('best.pt')

# load video
cap = cv2.VideoCapture('prueba.mp4')

vehicles = [2, 3, 5, 7]

# read frames
frame_nmr = -1
ret = True
while ret:
    frame_nmr += 1
    ret, frame = cap.read()
    if ret:
        results[frame_nmr] = {}
        # detect vehicles
        detections = coco_model(frame)[0]
        detections_ = []
        for detection in detections.boxes.data.tolist():
            x1, y1, x2, y2, score, class_id = detection
            if int(class_id) in vehicles:
                detections_.append([x1, y1, x2, y2, score])

        # track vehicles
        track_ids = mot_tracker.update(np.asarray(detections_))

        # detect license plates
        license_plates = license_plate_detector(frame)[0]
        for license_plate in license_plates.boxes.data.tolist():
            x1, y1, x2, y2, score, class_id = license_plate

            # assign license plate to car
            xcar1, ycar1, xcar2, ycar2, car_id = get_car(license_plate, track_ids)

            if car_id != -1:

                # Redimensiona la región de la matrícula a un tamaño deseado
                new_width = 25  # Define el ancho deseado
                new_height = 10  # Define la altura deseada
                # Realiza detección de matrículas en el área del vehículo
                # Factor de zoom para hacer el recorte más grande
                zoom_factor = 1.5  # Ajusta según sea necesario

                
            # Realiza OCR en la matrícula con EasyOCR

                # crop license plate
                license_plate_crop = frame[int(y1):int(y2), int(x1): int(x2), :]
                new_width = int(zoom_factor * license_plate_crop.shape[1])  # Define el ancho deseado
                new_height = int(zoom_factor * license_plate_crop.shape[0])  # Define la altura deseada
                        # Asegurarse de que las coordenadas estén dentro de los límites válidos
                        
                if x1 < 0:
                    x1 = 0
                if y1 < 0:
                    y1 = 0
                if x2 > frame.shape[1]:
                    x2 = frame.shape[1]
                if y2 > frame.shape[0]:
                    y2 = frame.shape[0]
                
                if x1 > frame.shape[1]:
                    x1 = frame.shape[1]
                if y1 > frame.shape[0]:
                    y1 = frame.shape[0]
                    
                license_plate_crop_resized = cv2.resize(license_plate_crop, (new_width, new_height))


        
                license_plate_results = license_plate_detector(license_plate_crop, stream=True)

            
                # read license plate number
                license_plate_text, license_plate_text_score = read_license_plate(license_plate_crop_resized)

                if license_plate_text is not None:
                    results[frame_nmr][car_id] = {'car': {'bbox': [xcar1, ycar1, xcar2, ycar2]},
                                                  'license_plate': {'bbox': [x1, y1, x2, y2],
                                                                    'text': license_plate_text,
                                                                    'bbox_score': score,
                                                                    'text_score': license_plate_text_score}}

# write results
write_csv(results, r"C:\Users\Eric\Desktop\vc-5\VC_P5\P5\test.csv")

Using CPU. Note: This module is much faster with a GPU.

0: 384x640 4 persons, 11 cars, 2 motorcycles, 2 buss, 3 trucks, 2 traffic lights, 127.2ms
Speed: 14.8ms preprocess, 127.2ms inference, 4.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 matricula, 106.0ms
Speed: 2.3ms preprocess, 106.0ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 4 persons, 11 cars, 2 motorcycles, 2 buss, 3 trucks, 3 traffic lights, 109.4ms
Speed: 10.6ms preprocess, 109.4ms inference, 2.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 105.8ms
Speed: 2.9ms preprocess, 105.8ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 4 persons, 11 cars, 2 motorcycles, 2 buss, 2 trucks, 3 traffic lights, 109.8ms
Speed: 3.0ms preprocess, 109.8ms inference, 2.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 94.4ms
Speed: 3.0ms preprocess, 94.4ms inference, 0.0ms postprocess per image at

{'car': {'bbox': [292.6574908866212, 176.10984305780556, 515.9377871289352, 339.7955971211814]}, 'license_plate': {'bbox': [328.5865783691406, 297.283447265625, 378.177001953125, 317.21502685546875], 'text': '7836ACL', 'bbox_score': 0.25335997343063354, 'text_score': 0.25564753817054475}}
{'car': {'bbox': [288.7370119617587, 176.63476638882779, 514.4964506535441, 340.1356059270637]}, 'license_plate': {'bbox': [329.9085388183594, 296.1483459472656, 377.2054138183594, 318.0135192871094], 'text': '1836LCL', 'bbox_score': 0.2765236794948578, 'text_score': 0.28744293191121795}}
{'car': {'bbox': [285.48370621559616, 177.2890019566268, 513.6027277770764, 340.5531604941703]}, 'license_plate': {'bbox': [316.50347900390625, 301.2298278808594, 376.3665771484375, 317.96337890625], 'text': '1483GCL', 'bbox_score': 0.4343547821044922, 'text_score': 0.7082960016552772}}
{'car': {'bbox': [219.44926479624203, 181.4429424241684, 495.765331256408, 340.6152644344458]}, 'license_plate': {'bbox': [280.29302

Como no me gusta el resultado, al igual que le comenté en su despacho, he querido mejorar el output.
De este modo, la manera de que se vea más fluido el video generado posteriormente. Utilize la técnica de interpolar datos.
Para aladir fluidez en el resultado final.

In [54]:
import csv
import numpy as np
from scipy.interpolate import interp1d


def interpolate_bounding_boxes(data):
    # Extract necessary data columns from input data
    frame_numbers = np.array([int(row['frame_nmr']) for row in data])
    car_ids = np.array([int(float(row['car_id'])) for row in data])
    car_bboxes = np.array([list(map(float, row['car_bbox'][1:-1].split())) for row in data])
    license_plate_bboxes = np.array([list(map(float, row['license_plate_bbox'][1:-1].split())) for row in data])

    interpolated_data = []
    unique_car_ids = np.unique(car_ids)
    for car_id in unique_car_ids:

        frame_numbers_ = [p['frame_nmr'] for p in data if int(float(p['car_id'])) == int(float(car_id))]
        print(frame_numbers_, car_id)

        # Filter data for a specific car ID
        car_mask = car_ids == car_id
        car_frame_numbers = frame_numbers[car_mask]
        car_bboxes_interpolated = []
        license_plate_bboxes_interpolated = []

        first_frame_number = car_frame_numbers[0]
        last_frame_number = car_frame_numbers[-1]

        for i in range(len(car_bboxes[car_mask])):
            frame_number = car_frame_numbers[i]
            car_bbox = car_bboxes[car_mask][i]
            license_plate_bbox = license_plate_bboxes[car_mask][i]

            if i > 0:
                prev_frame_number = car_frame_numbers[i-1]
                prev_car_bbox = car_bboxes_interpolated[-1]
                prev_license_plate_bbox = license_plate_bboxes_interpolated[-1]

                if frame_number - prev_frame_number > 1:
                    # Interpolate missing frames' bounding boxes
                    frames_gap = frame_number - prev_frame_number
                    x = np.array([prev_frame_number, frame_number])
                    x_new = np.linspace(prev_frame_number, frame_number, num=frames_gap, endpoint=False)
                    interp_func = interp1d(x, np.vstack((prev_car_bbox, car_bbox)), axis=0, kind='linear')
                    interpolated_car_bboxes = interp_func(x_new)
                    interp_func = interp1d(x, np.vstack((prev_license_plate_bbox, license_plate_bbox)), axis=0, kind='linear')
                    interpolated_license_plate_bboxes = interp_func(x_new)

                    car_bboxes_interpolated.extend(interpolated_car_bboxes[1:])
                    license_plate_bboxes_interpolated.extend(interpolated_license_plate_bboxes[1:])

            car_bboxes_interpolated.append(car_bbox)
            license_plate_bboxes_interpolated.append(license_plate_bbox)

        for i in range(len(car_bboxes_interpolated)):
            frame_number = first_frame_number + i
            row = {}
            row['frame_nmr'] = str(frame_number)
            row['car_id'] = str(car_id)
            row['car_bbox'] = ' '.join(map(str, car_bboxes_interpolated[i]))
            row['license_plate_bbox'] = ' '.join(map(str, license_plate_bboxes_interpolated[i]))

            if str(frame_number) not in frame_numbers_:
                # Imputed row, set the following fields to '0'
                row['license_plate_bbox_score'] = '0'
                row['license_number'] = '0'
                row['license_number_score'] = '0'
            else:
                # Original row, retrieve values from the input data if available
                original_row = [p for p in data if int(p['frame_nmr']) == frame_number and int(float(p['car_id'])) == int(float(car_id))][0]
                row['license_plate_bbox_score'] = original_row['license_plate_bbox_score'] if 'license_plate_bbox_score' in original_row else '0'
                row['license_number'] = original_row['license_number'] if 'license_number' in original_row else '0'
                row['license_number_score'] = original_row['license_number_score'] if 'license_number_score' in original_row else '0'

            interpolated_data.append(row)

    return interpolated_data


# Load the CSV file
with open('test.csv', 'r') as file:
    reader = csv.DictReader(file)
    data = list(reader)

# Interpolate missing data
interpolated_data = interpolate_bounding_boxes(data)

# Write updated data to a new CSV file
header = ['frame_nmr', 'car_id', 'car_bbox', 'license_plate_bbox', 'license_plate_bbox_score', 'license_number', 'license_number_score']
with open('test_interpolated.csv', 'w', newline='') as file:
    writer = csv.DictWriter(file, fieldnames=header)
    writer.writeheader()
    writer.writerows(interpolated_data)

['175', '177', '178', '179', '180', '181', '182'] 5437
['122', '123', '124', '139'] 5464
['209', '215', '216', '217', '219', '220', '221', '222', '233', '236', '238', '239', '242', '244'] 5615
['362', '404'] 5695
['349', '354', '355', '359', '360', '362', '364', '365', '368'] 5841
['426', '438', '439', '442'] 5881
['455', '456', '457', '461', '463', '465', '467'] 5940
['504', '505', '508', '512', '513', '514', '519', '524'] 5966


Generar video

In [71]:
import ast

import cv2
import numpy as np
import pandas as pd


def draw_border(img, top_left, bottom_right, color=(0, 255, 0), thickness=10, line_length_x=200, line_length_y=200):
    x1, y1 = top_left
    x2, y2 = bottom_right

    cv2.line(img, (x1, y1), (x1, y1 + line_length_y), color, thickness)  #-- top-left
    cv2.line(img, (x1, y1), (x1 + line_length_x, y1), color, thickness)

    cv2.line(img, (x1, y2), (x1, y2 - line_length_y), color, thickness)  #-- bottom-left
    cv2.line(img, (x1, y2), (x1 + line_length_x, y2), color, thickness)

    cv2.line(img, (x2, y1), (x2 - line_length_x, y1), color, thickness)  #-- top-right
    cv2.line(img, (x2, y1), (x2, y1 + line_length_y), color, thickness)

    cv2.line(img, (x2, y2), (x2, y2 - line_length_y), color, thickness)  #-- bottom-right
    cv2.line(img, (x2, y2), (x2 - line_length_x, y2), color, thickness)

    return img


results = pd.read_csv('./test_interpolated.csv')

# load video
video_path = 'prueba.mp4'
cap = cv2.VideoCapture(video_path)

fourcc = cv2.VideoWriter_fourcc(*'mp4v')  # Specify the codec
fps = cap.get(cv2.CAP_PROP_FPS)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
out = cv2.VideoWriter('./out.mp4', fourcc, fps, (width, height))

license_plate = {}
for car_id in np.unique(results['car_id']):
    max_ = np.amax(results[results['car_id'] == car_id]['license_number_score'])
    license_plate[car_id] = {'license_crop': None,
                             'license_plate_number': results[(results['car_id'] == car_id) &
                                                             (results['license_number_score'] == max_)]['license_number'].iloc[0]}
    cap.set(cv2.CAP_PROP_POS_FRAMES, results[(results['car_id'] == car_id) &
                                             (results['license_number_score'] == max_)]['frame_nmr'].iloc[0])
    ret, frame = cap.read()

    x1, y1, x2, y2 = ast.literal_eval(results[(results['car_id'] == car_id) &
                                              (results['license_number_score'] == max_)]['license_plate_bbox'].iloc[0].replace('[ ', '[').replace('   ', ' ').replace('  ', ' ').replace(' ', ','))

    license_crop = frame[int(y1):int(y2), int(x1):int(x2), :]
    license_crop = cv2.resize(license_crop, (int((x2 - x1) * 400 / (y2 - y1)), 400))

    license_plate[car_id]['license_crop'] = license_crop


frame_nmr = -1

cap.set(cv2.CAP_PROP_POS_FRAMES, 0)

# read frames
ret = True
while ret:
    ret, frame = cap.read()
    frame_nmr += 1
    if ret:
        df_ = results[results['frame_nmr'] == frame_nmr]
        for row_indx in range(len(df_)):
            # draw car
            car_x1, car_y1, car_x2, car_y2 = ast.literal_eval(df_.iloc[row_indx]['car_bbox'].replace('[ ', '[').replace('   ', ' ').replace('  ', ' ').replace(' ', ','))
            draw_border(frame, (int(car_x1), int(car_y1)), (int(car_x2), int(car_y2)), (0, 255, 0), 5,
                        line_length_x=100, line_length_y=100)

            # draw license plate
            x1, y1, x2, y2 = ast.literal_eval(df_.iloc[row_indx]['license_plate_bbox'].replace('[ ', '[').replace('   ', ' ').replace('  ', ' ').replace(' ', ','))
            cv2.rectangle(frame, (int(x1), int(y1)), (int(x2), int(y2)), (0, 0, 255), 2)

            # Crop license plate
            license_crop = license_plate[df_.iloc[row_indx]['car_id']]['license_crop']

            H, W, _ = license_crop.shape


                    
            # Resize license_crop to match the height of the region in the frame
            license_crop_resized = cv2.resize(license_crop, (int((car_x2 - car_x1) * 100 / (y2 - y1)), 100))

            # Calculate the width and height of the resized license crop
            H_resized, W_resized, _ = license_crop_resized.shape

            # Calculate the width and height of the specified region
            region_height = int(car_y1) - int(car_y1) + H_resized
            region_width = int((car_x2 + car_x1 - W_resized) / 2) - int((car_x2 + car_x1 + W_resized) / 2)

            # Make sure the resized license crop fits within the specified region
            license_crop_resized = license_crop_resized[:region_height, :region_width, :]

            # Assign the resized license_crop to the frame
            frame[int(car_y1):int(car_y1) + H_resized,
                int((car_x2 + car_x1 - W_resized) / 2):int((car_x2 + car_x1 - W_resized) / 2) + region_width, :] = license_crop_resized
            # Calculate the font size based on the resized license_crop height
            font_size = int(H_resized / 100)  # You can adjust the division factor as needed

            # Calculate the text width for the resized text
            (text_width, text_height), _ = cv2.getTextSize(
                license_plate[df_.iloc[row_indx]['car_id']]['license_plate_number'],
                cv2.FONT_HERSHEY_SIMPLEX,
                font_size,
                5  # Thickness is set to 1
            )

            # Draw text on the frame
            cv2.putText(frame,
                        license_plate[df_.iloc[row_indx]['car_id']]['license_plate_number'],
                        (int((x2 + x1 - text_width) / 2), int(y1 - 20 + (text_height / 2))),
                        cv2.FONT_HERSHEY_SIMPLEX,
                        font_size,
                        (0, 0, 0),
                        4)  # Adjust the thickness of the text as needed




        out.write(frame)
        frame = cv2.resize(frame, (1280, 900))

        # cv2.imshow('frame', frame)
        # cv2.waitKey(0)

out.release()
cap.release()