#Paquetes necesarios

In [2]:
import cv2  
import math 

from ultralytics import YOLO



Desde cámara, detección con yolov8 y modelo nano

In [4]:
# Carga del modelo
model = YOLO('yolov8n.pt')

# Nombre de las distintas clases
classNames = ["person", "bicycle", "car", "motorbike", "aeroplane", "bus", "train", "truck", "boat",
              "traffic light", "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat",
              "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella",
              "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite", "baseball bat",
              "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup",
              "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli",
              "carrot", "hot dog", "pizza", "donut", "cake", "chair", "sofa", "pottedplant", "bed",
              "diningtable", "toilet", "tvmonitor", "laptop", "mouse", "remote", "keyboard", "cell phone",
              "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors",
              "teddy bear", "hair drier", "toothbrush"
              ]


# Captura desde lawebcam
vid = cv2.VideoCapture(0)
  
while(True):      
    # fotograma a fotograma
    ret, img = vid.read()
  
    # si hay imagen válida
    if ret:  
        # Perform inference on an image
        results = model(img, stream=True)
        
        # Para cada detección
        for r in results:
            boxes = r.boxes

            for box in boxes:
                # Contenedor
                x1, y1, x2, y2 = box.xyxy[0]
                x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2) # convert to int values
                
                # Confianza
                confidence = math.ceil((box.conf[0]*100))/100
                print("Confidence --->",confidence)

                # Clase
                cls = int(box.cls[0])
                print("Class name -->", classNames[cls])

                # Convierte identificador numérico de clase a un color RGB
                escala = int((cls / len(classNames)) * 255 * 3)
                if escala >= 255*2:
                    R = 255
                    G = 255
                    B = escala - 255*2
                else:
                    if escala >= 255:
                        R = 255
                        G = escala - 255
                        B = 0
                    else:
                        R = escala
                        G = 0
                        B = 0

                # Dibuja el contenedor y clase
                cv2.rectangle(img, (x1, y1), (x2, y2), (R, G, B), 3)
                cv2.putText(img, classNames[cls] , [x1, y1], cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, B), 2)

        # Muestra fotograma
        cv2.imshow('Vid', img)
    
    # Detenemos pulsado ESC
    if cv2.waitKey(20) == 27:
        break
  
# Libera el objeto de captura
vid.release()
# Destruye ventanas
cv2.destroyAllWindows()


0: 480x640 1 person, 1 dog, 205.1ms
Speed: 21.8ms preprocess, 205.1ms inference, 23.7ms postprocess per image at shape (1, 3, 480, 640)



Confidence ---> 0.88
Class name --> person
Confidence ---> 0.35
Class name --> dog


0: 480x640 2 persons, 158.6ms
Speed: 2.0ms preprocess, 158.6ms inference, 4.6ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 2 persons, 141.2ms


Confidence ---> 0.93
Class name --> person
Confidence ---> 0.89
Class name --> person
Confidence ---> 0.94
Class name --> person
Confidence ---> 0.87
Class name --> person


Speed: 2.3ms preprocess, 141.2ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 2 persons, 145.0ms
Speed: 2.0ms preprocess, 145.0ms inference, 2.1ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 2 persons, 125.5ms
Speed: 1.5ms preprocess, 125.5ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)


Confidence ---> 0.94
Class name --> person
Confidence ---> 0.88
Class name --> person
Confidence ---> 0.94
Class name --> person
Confidence ---> 0.88
Class name --> person



0: 480x640 2 persons, 124.4ms
Speed: 0.9ms preprocess, 124.4ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 2 persons, 125.2ms
Speed: 1.0ms preprocess, 125.2ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)


Confidence ---> 0.93
Class name --> person
Confidence ---> 0.87
Class name --> person
Confidence ---> 0.93
Class name --> person
Confidence ---> 0.89
Class name --> person



0: 480x640 2 persons, 132.4ms
Speed: 1.0ms preprocess, 132.4ms inference, 2.3ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 2 persons, 132.8ms
Speed: 3.1ms preprocess, 132.8ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)



Confidence ---> 0.93
Class name --> person
Confidence ---> 0.89
Class name --> person
Confidence ---> 0.94
Class name --> person
Confidence ---> 0.91
Class name --> person


0: 480x640 2 persons, 202.7ms
Speed: 2.5ms preprocess, 202.7ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 2 persons, 1 bench, 118.0ms
Speed: 2.0ms preprocess, 118.0ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)



Confidence ---> 0.93
Class name --> person
Confidence ---> 0.89
Class name --> person
Confidence ---> 0.93
Class name --> person
Confidence ---> 0.91
Class name --> person
Confidence ---> 0.33
Class name --> bench


0: 480x640 2 persons, 1 bench, 120.1ms
Speed: 1.5ms preprocess, 120.1ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 2 persons, 121.5ms
Speed: 1.0ms preprocess, 121.5ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)


Confidence ---> 0.94
Class name --> person
Confidence ---> 0.9
Class name --> person
Confidence ---> 0.26
Class name --> bench
Confidence ---> 0.94
Class name --> person
Confidence ---> 0.94
Class name --> person



0: 480x640 3 persons, 1 bench, 145.9ms
Speed: 1.5ms preprocess, 145.9ms inference, 2.5ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 2 persons, 1 bench, 145.7ms
Speed: 2.5ms preprocess, 145.7ms inference, 2.1ms postprocess per image at shape (1, 3, 480, 640)


Confidence ---> 0.91
Class name --> person
Confidence ---> 0.91
Class name --> person
Confidence ---> 0.27
Class name --> bench
Confidence ---> 0.26
Class name --> person
Confidence ---> 0.94
Class name --> person
Confidence ---> 0.92
Class name --> person
Confidence ---> 0.31
Class name --> bench



0: 480x640 2 persons, 1 bench, 134.4ms
Speed: 1.6ms preprocess, 134.4ms inference, 2.2ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 2 persons, 119.9ms
Speed: 1.9ms preprocess, 119.9ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)



Confidence ---> 0.95
Class name --> person
Confidence ---> 0.9
Class name --> person
Confidence ---> 0.28
Class name --> bench
Confidence ---> 0.91
Class name --> person
Confidence ---> 0.86
Class name --> person


0: 480x640 2 persons, 1 laptop, 122.4ms
Speed: 2.6ms preprocess, 122.4ms inference, 2.5ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 2 persons, 126.3ms
Speed: 1.0ms preprocess, 126.3ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)


Confidence ---> 0.87
Class name --> person
Confidence ---> 0.85
Class name --> person
Confidence ---> 0.43
Class name --> laptop
Confidence ---> 0.87
Class name --> person
Confidence ---> 0.78
Class name --> person



0: 480x640 3 persons, 133.0ms
Speed: 2.0ms preprocess, 133.0ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 3 persons, 132.3ms
Speed: 2.4ms preprocess, 132.3ms inference, 1.7ms postprocess per image at shape (1, 3, 480, 640)



Confidence ---> 0.84
Class name --> person
Confidence ---> 0.83
Class name --> person
Confidence ---> 0.28
Class name --> person
Confidence ---> 0.81
Class name --> person
Confidence ---> 0.73
Class name --> person
Confidence ---> 0.35
Class name --> person


0: 480x640 2 persons, 136.7ms
Speed: 2.0ms preprocess, 136.7ms inference, 1.5ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 2 persons, 118.1ms
Speed: 2.6ms preprocess, 118.1ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)



Confidence ---> 0.95
Class name --> person
Confidence ---> 0.85
Class name --> person
Confidence ---> 0.96
Class name --> person
Confidence ---> 0.88
Class name --> person


0: 480x640 2 persons, 121.7ms
Speed: 2.5ms preprocess, 121.7ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 2 persons, 110.6ms
Speed: 2.7ms preprocess, 110.6ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)



Confidence ---> 0.96
Class name --> person
Confidence ---> 0.91
Class name --> person
Confidence ---> 0.95
Class name --> person
Confidence ---> 0.87
Class name --> person


0: 480x640 2 persons, 135.3ms
Speed: 4.0ms preprocess, 135.3ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 2 persons, 139.5ms
Speed: 3.0ms preprocess, 139.5ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)


Confidence ---> 0.93
Class name --> person
Confidence ---> 0.9
Class name --> person
Confidence ---> 0.92
Class name --> person
Confidence ---> 0.87
Class name --> person



0: 480x640 2 persons, 139.5ms
Speed: 1.0ms preprocess, 139.5ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 2 persons, 121.1ms
Speed: 1.5ms preprocess, 121.1ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)



Confidence ---> 0.93
Class name --> person
Confidence ---> 0.92
Class name --> person
Confidence ---> 0.93
Class name --> person
Confidence ---> 0.92
Class name --> person


0: 480x640 2 persons, 176.3ms
Speed: 2.6ms preprocess, 176.3ms inference, 2.2ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 2 persons, 132.2ms
Speed: 1.0ms preprocess, 132.2ms inference, 2.1ms postprocess per image at shape (1, 3, 480, 640)



Confidence ---> 0.94
Class name --> person
Confidence ---> 0.92
Class name --> person
Confidence ---> 0.93
Class name --> person
Confidence ---> 0.92
Class name --> person


0: 480x640 2 persons, 134.7ms
Speed: 2.3ms preprocess, 134.7ms inference, 2.3ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 2 persons, 133.9ms
Speed: 1.2ms preprocess, 133.9ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)



Confidence ---> 0.91
Class name --> person
Confidence ---> 0.9
Class name --> person
Confidence ---> 0.91
Class name --> person
Confidence ---> 0.89
Class name --> person


0: 480x640 2 persons, 137.4ms
Speed: 2.1ms preprocess, 137.4ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 2 persons, 116.3ms
Speed: 1.0ms preprocess, 116.3ms inference, 2.1ms postprocess per image at shape (1, 3, 480, 640)



Confidence ---> 0.89
Class name --> person
Confidence ---> 0.85
Class name --> person
Confidence ---> 0.89
Class name --> person
Confidence ---> 0.86
Class name --> person


0: 480x640 2 persons, 121.2ms
Speed: 1.8ms preprocess, 121.2ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 2 persons, 130.2ms


Confidence ---> 0.89
Class name --> person
Confidence ---> 0.87
Class name --> person
Confidence ---> 0.93
Class name --> person
Confidence ---> 0.91
Class name --> person


Speed: 1.0ms preprocess, 130.2ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 2 persons, 140.8ms
Speed: 2.5ms preprocess, 140.8ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 2 persons, 134.9ms
Speed: 2.0ms preprocess, 134.9ms inference, 2.5ms postprocess per image at shape (1, 3, 480, 640)


Confidence ---> 0.86
Class name --> person
Confidence ---> 0.6
Class name --> person
Confidence ---> 0.88
Class name --> person
Confidence ---> 0.85
Class name --> person



0: 480x640 2 persons, 126.1ms
Speed: 1.0ms preprocess, 126.1ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)


Confidence ---> 0.89
Class name --> person
Confidence ---> 0.79
Class name --> person


Reconocimiento de caracteres tras instalar pytesseract y tesseract

In [13]:
# Tesseract
import cv2
import pytesseract

# Previamente debes descargar los ejecutables
# Si la ruta de Tesseract no está en el PATH, ruta al ejecutable
pytesseract.pytesseract.tesseract_cmd = r'C:/Program Files/Tesseract-OCR/tesseract'

# Lenguajes disponibles
print(pytesseract.get_languages(config=''))

#Cargo imagen y ocnvierto a RGB
img = cv2.imread('toy.tif') 
img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

#Aplica reconocedor a imagen cargada
print(pytesseract.image_to_string(img_rgb))

ModuleNotFoundError: No module named 'pytesseract'

Reconocimiento decaracteres tras instalar easyocr

In [1]:
import easyocr

#Carga del modelo de lengua
reader = easyocr.Reader(['es'], gpu=False) 

#Reconocimiento de una imagen
result = reader.readtext('toy.tif')
print(result)

#Con restricción de caracteres reconocibles
#result = reader.readtext('toy.tif', allowlist ='0123456789')

Using CPU. Note: This module is much faster with a GPU.


[([[49, 85], [617, 85], [617, 147], [49, 147]], 'Hasta el infinito y más allá', 0.6744628105513019)]


In [13]:
import sys
for path in sys.path:
    print(path)



c:\Users\Eric\Desktop\vc-5\P5
c:\Users\Eric\anaconda3\envs\VC_P1\python311.zip
c:\Users\Eric\anaconda3\envs\VC_P1\DLLs
c:\Users\Eric\anaconda3\envs\VC_P1\Lib
c:\Users\Eric\anaconda3\envs\VC_P1

C:\Users\Eric\AppData\Roaming\Python\Python311\site-packages
c:\Users\Eric\anaconda3\envs\VC_P1\Lib\site-packages
c:\Users\Eric\anaconda3\envs\VC_P1\Lib\site-packages\win32
c:\Users\Eric\anaconda3\envs\VC_P1\Lib\site-packages\win32\lib
c:\Users\Eric\anaconda3\envs\VC_P1\Lib\site-packages\Pythonwin


Prueba yolo con imagen

In [5]:
# Carga del modelo
model = YOLO('yolov8n.pt')

# Nombre de las distintas clases
classNames = ["person", "bicycle", "car", "motorbike", "aeroplane", "bus", "train", "truck", "boat",
              "traffic light", "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat",
              "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella",
              "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite", "baseball bat",
              "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup",
              "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli",
              "carrot", "hot dog", "pizza", "donut", "cake", "chair", "sofa", "pottedplant", "bed",
              "diningtable", "toilet", "tvmonitor", "laptop", "mouse", "remote", "keyboard", "cell phone",
              "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors",
              "teddy bear", "hair drier", "toothbrush"
              ]

# Carga la imagen que deseas procesar
image_path = 'images.png'  # Reemplaza 'tu_imagen.jpg' por la ruta de tu imagen
img = cv2.imread(image_path)

# Realiza inferencia en la imagen
results = model(img)

# Para cada detección
for r in results:
    boxes = r.boxes

    for box in boxes:
        # Contenedor
        x1, y1, x2, y2 = box.xyxy[0]
        x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)  # Convierte a valores enteros

        # Confianza
        confidence = math.ceil((box.conf[0] * 100)) / 100
        print("Confidence --->", confidence)

        # Clase
        cls = int(box.cls[0])
        print("Class name -->", classNames[cls])

        # Convierte el identificador numérico de la clase en un color RGB
        escala = int((cls / len(classNames)) * 255 * 3)
        if escala >= 255 * 2:
            R = 255
            G = 255
            B = escala - 255 * 2
        else:
            if escala >= 255:
                R = 255
                G = escala - 255
                B = 0
            else:
                R = escala
                G = 0
                B = 0

        # Dibuja el contenedor y la clase
        cv2.rectangle(img, (x1, y1), (x2, y2), (R, G, B), 3)
        cv2.putText(img, classNames[cls], [x1, y1], cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, B), 2)

# Muestra la imagen con las detecciones
cv2.imshow('Image', img)
cv2.waitKey(0)
cv2.destroyAllWindows()




0: 416x640 4 persons, 426.5ms
Speed: 46.7ms preprocess, 426.5ms inference, 39.6ms postprocess per image at shape (1, 3, 416, 640)


Confidence ---> 0.89
Class name --> person
Confidence ---> 0.79
Class name --> person
Confidence ---> 0.76
Class name --> person
Confidence ---> 0.61
Class name --> person


Método S

In [23]:
import cv2
import math
from ultralytics import YOLO  # Asegúrate de importar el módulo YOLO correcto

# Carga del modelo
model = YOLO('yolov8n.pt')

# Nombre de las distintas clases
classNames = ["person", "bicycle", "car", "motorbike", "aeroplane", "bus", "train", "truck", "boat",
              "traffic light", "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat",
              "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella",
              "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite", "baseball bat",
              "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup",
              "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli",
              "carrot", "hot dog", "pizza", "donut", "cake", "chair", "sofa", "pottedplant", "bed",
              "diningtable", "toilet", "tvmonitor", "laptop", "mouse", "remote", "keyboard", "cell phone",
              "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors",
              "teddy bear", "hair drier", "toothbrush"
              ]

# Carga la imagen que deseas procesar
image_path = 'prueba.jpg'  # Reemplaza 'tu_imagen.jpg' por la ruta de tu imagen
img = cv2.imread(image_path)

# Realiza inferencia en la imagen
results = model(img)

# Para cada detección
for r in results:
    boxes = r.boxes

    for box in boxes:
        # Contenedor
        x1, y1, x2, y2 = box.xyxy[0]
        x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)  # Convierte a valores enteros

        # Confianza
        confidence = math.ceil((box.conf[0] * 100)) / 100
        print("Confidence --->", confidence)

        # Clase
        cls = int(box.cls[0])
        print("Class name -->", classNames[cls])

        # Convierte el identificador numérico de la clase en un color RGB
        escala = int((cls / len(classNames)) * 255 * 3)
        if escala >= 255 * 2:
            R = 255
            G = 255
            B = escala - 255 * 2
        else:
            if escala >= 255:
                R = 255
                G = escala - 255
                B = 0
            else:
                R = escala
                G = 0
                B = 0

        # Dibuja el contenedor y la clase
        cv2.rectangle(img, (x1, y1), (x2, y2), (R, G, B), 3)
        cv2.putText(img, classNames[cls], (x1, y1), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, B), 2)

        # Obtén la ROI
        roi = img[y1:y2, x1:x2]

        # Convierte ROI a escala de grises
        gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)

        # Aplica umbral para encontrar contornos
        _, thresh = cv2.threshold(gray, 100, 255, cv2.THRESH_BINARY)

        # Encuentra contornos en la ROI
        contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

        # Dibuja los contornos en la ROI
        cv2.drawContours(roi, contours, -1, (0, 255, 0), 2)

# Muestra la imagen con las detecciones y los contornos
cv2.imshow('Image', img)
cv2.waitKey(0)
cv2.destroyAllWindows()



0: 256x640 2 cars, 65.0ms
Speed: 0.0ms preprocess, 65.0ms inference, 0.0ms postprocess per image at shape (1, 3, 256, 640)


Confidence ---> 0.94
Class name --> car
Confidence ---> 0.34
Class name --> car


In [5]:
import cv2
import math
from ultralytics import YOLO  # Asegúrate de importar el módulo YOLO correcto




# Función para verificar si un contorno tiene forma rectangular aproximada
def is_approximately_rectangular(contour, epsilon=0.009):
    perimeter = cv2.arcLength(contour, True)
    approx = cv2.approxPolyDP(contour, epsilon * perimeter, True)
    return len(approx) == 4

# Carga del modelo YOLO
model = YOLO('yolov8n.pt')

# Nombre de las distintas clases
classNames = ["person", "bicycle", "car", "motorbike", "aeroplane", "bus", "train", "truck", "boat",
              "traffic light", "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat",
              "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella",
              "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite", "baseball bat",
              "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup",
              "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli",
              "carrot", "hot dog", "pizza", "donut", "cake", "chair", "sofa", "pottedplant", "bed",
              "diningtable", "toilet", "tvmonitor", "laptop", "mouse", "remote", "keyboard", "cell phone",
              "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors",
              "teddy bear", "hair drier", "toothbrush"
              ]

# Carga la imagen que deseas procesar
image_path = 'prueba.jpg'  # Reemplaza 'tu_imagen.jpg' por la ruta de tu imagen
img = cv2.imread(image_path)

# Realiza inferencia en la imagen
results = model(img)

# Para cada detección
for r in results:
    boxes = r.boxes

    for box in boxes:
        # Contenedor
        x1, y1, x2, y2 = box.xyxy[0]
        x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)  # Convierte a valores enteros

        # Confianza
        confidence = math.ceil((box.conf[0] * 100)) / 100
        print("Confidence --->", confidence)

        # Clase
        cls = int(box.cls[0])
        print("Class name -->", classNames[cls])

        # Convierte el identificador numérico de la clase en un color RGB
        escala = int((cls / len(classNames)) * 255 * 3)
        if escala >= 255 * 2:
            R = 255
            G = 255
            B = escala - 255 * 2
        else:
            if escala >= 255:
                R = 255
                G = escala - 255
                B = 0
            else:
                R = escala
                G = 0
                B = 0

        # Dibuja el contenedor y la clase
        cv2.rectangle(img, (x1, y1), (x2, y2), (R, G, B), 3)
        cv2.putText(img, classNames[cls], (x1, y1), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, B), 2)

        # Obtén la ROI
        roi = img[y1:y2, x1:x2]

        # Convierte ROI a escala de grises
        gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)

        # Aplica umbral para encontrar contornos
        _, thresh = cv2.threshold(gray, 120, 255, cv2.THRESH_BINARY)

        # Encuentra contornos en la ROI
        contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

        # Umbral de área para considerar como contorno grande (ajusta este valor según tus necesidades)
        area_threshold = 1000

        # Filtra los contornos que tienen forma rectangular aproximada y un área grande
        filtered_contours = [contour for contour in contours if is_approximately_rectangular(contour) and cv2.contourArea(contour) > area_threshold]

        # Dibuja los contornos filtrados en la ROI
        cv2.drawContours(roi, filtered_contours, -1, (0, 255, 0), 2)

        # Dibuja los contornos filtrados en la ROI
        cv2.drawContours(roi, filtered_contours, -1, (0, 255, 0), 2)

# Muestra la imagen con las detecciones y los contornos filtrados
cv2.imshow('Image', img)
cv2.waitKey(0)
cv2.destroyAllWindows()



0: 256x640 2 cars, 86.3ms
Speed: 4.5ms preprocess, 86.3ms inference, 0.0ms postprocess per image at shape (1, 3, 256, 640)


Confidence ---> 0.94
Class name --> car
Confidence ---> 0.34
Class name --> car


In [7]:
import os
# Directorio base donde se encuentra la carpeta "dataset"
base_dir = ''

# Ruta completa de la carpeta "train" dentro de la estructura
train_dir = os.path.join(base_dir, 'dataset', 'train', 'images')

# Crear una lista para almacenar los nombres de los archivos que cumplan el patrón
file_list = []

# Iterar a través de los archivos en la carpeta "train"
for root, dirs, files in os.walk(train_dir):
    for filename in files:
        if filename.lower().endswith(('.jpg', '.jpeg', '.png', '.gif', '.bmp')):
            # Comprobar si el archivo es una imagen (puedes agregar más extensiones si es necesario)
            file_list.append(os.path.join(root, filename))
print(file_list)
# Imprimir la lista de archivos que cumplen el patrón
for file_path in file_list:
    print(file_path)


['dataset\\train\\images\\0802HFP_jpg.rf.30755e346cf1455361344eeeceb55cb1.jpg', 'dataset\\train\\images\\0802HFP_jpg.rf.30cd9ca231b6667068ad26b808dd99da.jpg', 'dataset\\train\\images\\0802HFP_jpg.rf.ba381edbab2dcddf24f9739d99ae26d4.jpg', 'dataset\\train\\images\\1159FPG_jpg.rf.165f7f6df48b376a9863eab1befe71bf.jpg', 'dataset\\train\\images\\1159FPG_jpg.rf.3ff8b432d3fb4ab2537c7f6ec80ce560.jpg', 'dataset\\train\\images\\1159FPG_jpg.rf.8ac0d77c1a7d6f3b8f208fe9ec6f887f.jpg', 'dataset\\train\\images\\1319FSX_jpg.rf.42cac49225e519a2c8e5d1ef1bc1a9df.jpg', 'dataset\\train\\images\\1319FSX_jpg.rf.8aa60298e9d1b1a0ec29e0189c61bf40.jpg', 'dataset\\train\\images\\1319FSX_jpg.rf.f37800d74affae5e5dc10a94693dbfbf.jpg', 'dataset\\train\\images\\15989862207427_jpg.rf.8f731f82cf1639282dc1a5c81fd7d483.jpg', 'dataset\\train\\images\\15989862207427_jpg.rf.8ff0c28508f614a0427c9a7bf48136db.jpg', 'dataset\\train\\images\\15989862207427_jpg.rf.d4400cbe860cbdc49c9f7a7ac785a91c.jpg', 'dataset\\train\\images\\16370

Prueba con dataset

In [8]:
import cv2
import math
from ultralytics import YOLO  # Asegúrate de importar el módulo YOLO correcto

import os

# Directorio base donde se encuentra la carpeta "dataset"
base_dir = ''

# Ruta completa de la carpeta "train" dentro de la estructura
train_dir = os.path.join(base_dir, 'dataset', 'train', 'images')

# Crear una lista para almacenar los nombres de los archivos de imágenes
image_files = []

# Iterar a través de los archivos en la carpeta "train"
for root, dirs, files in os.walk(train_dir):
    for filename in files:
        if filename.lower().endswith(('.jpg', '.jpeg', '.png', '.gif', '.bmp')):
            # Comprobar si el archivo es una imagen (puedes agregar más extensiones si es necesario)
            image_files.append(os.path.join(root, filename))

# Carga del modelo YOLO
model = YOLO('yolov8n.pt')

# Nombre de las distintas clases
classNames = ["person", "bicycle", "car", "motorbike", "aeroplane", "bus", "train", "truck", "boat",
              "traffic light", "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat",
              "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella",
              "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite", "baseball bat",
              "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup",
              "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli",
              "carrot", "hot dog", "pizza", "donut", "cake", "chair", "sofa", "pottedplant", "bed",
              "diningtable", "toilet", "tvmonitor", "laptop", "mouse", "remote", "keyboard", "cell phone",
              "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors",
              "teddy bear", "hair drier", "toothbrush"
              ]

# Iterar a través de la lista de archivos de imágenes
for image_path in image_files:
    # Carga la imagen que deseas procesar
    img = cv2.imread(image_path)

    # Realiza inferencia en la imagen
    results = model(img)

    # Para cada detección
    for r in results:
        boxes = r.boxes

        for box in boxes:
            # Contenedor
            x1, y1, x2, y2 = box.xyxy[0]
            x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)  # Convierte a valores enteros

            # Confianza
            confidence = math.ceil((box.conf[0] * 100)) / 100
            print("Confidence --->", confidence)

            # Clase
            cls = int(box.cls[0])
            print("Class name -->", classNames[cls])

            # Convierte el identificador numérico de la clase en un color RGB
            escala = int((cls / len(classNames)) * 255 * 3)
            if escala >= 255 * 2:
                R = 255
                G = 255
                B = escala - 255 * 2
            else:
                if escala >= 255:
                    R = 255
                    G = escala - 255
                    B = 0
                else:
                    R = escala
                    G = 0
                    B = 0

            # Dibuja el contenedor y la clase
            cv2.rectangle(img, (x1, y1), (x2, y2), (R, G, B), 3)
            cv2.putText(img, classNames[cls], (x1, y1), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, B), 2)

            # Obtén la ROI
            roi = img[y1:y2, x1:x2]

            # Convierte ROI a escala de grises
            gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)

            # Aplica umbral para encontrar contornos
            _, thresh = cv2.threshold(gray, 120, 255, cv2.THRESH_BINARY)

            # Encuentra contornos en la ROI
            contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

            # Umbral de área para considerar como contorno grande (ajusta este valor según tus necesidades)
            area_threshold = 1000

            # Filtra los contornos que tienen forma rectangular aproximada y un área grande
            filtered_contours = [contour for contour in contours if is_approximately_rectangular(contour) and cv2.contourArea(contour) > area_threshold]

            # Dibuja los contornos filtrados en la ROI
            cv2.drawContours(roi, filtered_contours, -1, (0, 255, 0), 2)

    # Muestra la imagen con las detecciones y los contornos filtrados
    cv2.imshow('Image', img)
    cv2.waitKey(0)

# Cierra todas las ventanas de visualización
cv2.destroyAllWindows()



0: 640x640 1 train, 162.6ms
Speed: 8.1ms preprocess, 162.6ms inference, 5.2ms postprocess per image at shape (1, 3, 640, 640)


Confidence ---> 0.3
Class name --> train



0: 640x640 1 oven, 155.1ms
Speed: 8.8ms preprocess, 155.1ms inference, 3.3ms postprocess per image at shape (1, 3, 640, 640)


Confidence ---> 0.46
Class name --> oven



0: 640x640 1 car, 1 oven, 169.9ms
Speed: 9.6ms preprocess, 169.9ms inference, 0.0ms postprocess per image at shape (1, 3, 640, 640)


Confidence ---> 0.38
Class name --> oven
Confidence ---> 0.29
Class name --> car



0: 640x640 1 truck, 207.6ms
Speed: 11.1ms preprocess, 207.6ms inference, 0.0ms postprocess per image at shape (1, 3, 640, 640)


Confidence ---> 0.64
Class name --> truck



0: 640x640 1 truck, 179.9ms
Speed: 2.6ms preprocess, 179.9ms inference, 5.2ms postprocess per image at shape (1, 3, 640, 640)


Confidence ---> 0.71
Class name --> truck



0: 640x640 1 truck, 200.2ms
Speed: 0.0ms preprocess, 200.2ms inference, 8.6ms postprocess per image at shape (1, 3, 640, 640)


Confidence ---> 0.74
Class name --> truck



0: 640x640 1 car, 1 bus, 214.4ms
Speed: 8.7ms preprocess, 214.4ms inference, 0.5ms postprocess per image at shape (1, 3, 640, 640)


Confidence ---> 0.34
Class name --> bus
Confidence ---> 0.32
Class name --> car



0: 640x640 1 bus, 1 truck, 289.4ms
Speed: 10.0ms preprocess, 289.4ms inference, 0.0ms postprocess per image at shape (1, 3, 640, 640)


Confidence ---> 0.37
Class name --> truck
Confidence ---> 0.31
Class name --> bus



0: 640x640 1 bus, 1 truck, 208.1ms
Speed: 8.0ms preprocess, 208.1ms inference, 1.5ms postprocess per image at shape (1, 3, 640, 640)


Confidence ---> 0.54
Class name --> bus
Confidence ---> 0.43
Class name --> truck



0: 640x640 1 truck, 180.1ms
Speed: 7.8ms preprocess, 180.1ms inference, 4.6ms postprocess per image at shape (1, 3, 640, 640)


Confidence ---> 0.83
Class name --> truck



0: 640x640 1 truck, 170.0ms
Speed: 0.0ms preprocess, 170.0ms inference, 0.0ms postprocess per image at shape (1, 3, 640, 640)


Confidence ---> 0.74
Class name --> truck



0: 640x640 1 truck, 169.2ms
Speed: 10.4ms preprocess, 169.2ms inference, 5.0ms postprocess per image at shape (1, 3, 640, 640)


Confidence ---> 0.79
Class name --> truck



0: 640x640 (no detections), 188.0ms
Speed: 10.1ms preprocess, 188.0ms inference, 2.1ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 remote, 195.7ms
Speed: 9.7ms preprocess, 195.7ms inference, 3.8ms postprocess per image at shape (1, 3, 640, 640)


Confidence ---> 0.7
Class name --> remote



0: 640x640 1 car, 189.8ms
Speed: 0.0ms preprocess, 189.8ms inference, 0.0ms postprocess per image at shape (1, 3, 640, 640)


Confidence ---> 0.4
Class name --> car



0: 640x640 2 cars, 1 truck, 194.3ms
Speed: 5.4ms preprocess, 194.3ms inference, 0.0ms postprocess per image at shape (1, 3, 640, 640)


Confidence ---> 0.76
Class name --> car
Confidence ---> 0.63
Class name --> car
Confidence ---> 0.38
Class name --> truck



0: 640x640 2 cars, 1 truck, 239.8ms
Speed: 0.0ms preprocess, 239.8ms inference, 2.4ms postprocess per image at shape (1, 3, 640, 640)


Confidence ---> 0.7
Class name --> car
Confidence ---> 0.65
Class name --> car
Confidence ---> 0.52
Class name --> truck



0: 640x640 2 cars, 1 truck, 199.7ms
Speed: 12.0ms preprocess, 199.7ms inference, 0.4ms postprocess per image at shape (1, 3, 640, 640)


Confidence ---> 0.8
Class name --> car
Confidence ---> 0.67
Class name --> car
Confidence ---> 0.39
Class name --> truck



0: 640x640 1 car, 190.4ms
Speed: 9.6ms preprocess, 190.4ms inference, 7.5ms postprocess per image at shape (1, 3, 640, 640)


Confidence ---> 0.73
Class name --> car



0: 640x640 1 car, 1 truck, 162.9ms
Speed: 9.4ms preprocess, 162.9ms inference, 0.0ms postprocess per image at shape (1, 3, 640, 640)


Confidence ---> 0.58
Class name --> truck
Confidence ---> 0.47
Class name --> car



0: 640x640 1 car, 1 truck, 124.2ms
Speed: 0.0ms preprocess, 124.2ms inference, 0.0ms postprocess per image at shape (1, 3, 640, 640)


Confidence ---> 0.44
Class name --> truck
Confidence ---> 0.31
Class name --> car



0: 640x640 1 car, 1 bus, 190.9ms
Speed: 1.3ms preprocess, 190.9ms inference, 0.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 car, 118.4ms
Speed: 0.0ms preprocess, 118.4ms inference, 8.1ms postprocess per image at shape (1, 3, 640, 640)


Confidence ---> 0.57
Class name --> bus
Confidence ---> 0.38
Class name --> car
Confidence ---> 0.37
Class name --> car



0: 640x640 2 cars, 1 truck, 124.1ms
Speed: 5.6ms preprocess, 124.1ms inference, 5.3ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 car, 1 motorcycle, 1 parking meter, 128.3ms
Speed: 8.7ms preprocess, 128.3ms inference, 4.6ms postprocess per image at shape (1, 3, 640, 640)


Confidence ---> 0.48
Class name --> car
Confidence ---> 0.47
Class name --> car
Confidence ---> 0.33
Class name --> truck
Confidence ---> 0.55
Class name --> car
Confidence ---> 0.35
Class name --> motorbike
Confidence ---> 0.32
Class name --> parking meter



0: 640x640 1 car, 1 backpack, 122.2ms
Speed: 8.3ms preprocess, 122.2ms inference, 7.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 2 cars, 1 motorcycle, 130.2ms


Confidence ---> 0.59
Class name --> car
Confidence ---> 0.28
Class name --> backpack


Speed: 4.5ms preprocess, 130.2ms inference, 7.5ms postprocess per image at shape (1, 3, 640, 640)


Confidence ---> 0.54
Class name --> car
Confidence ---> 0.27
Class name --> motorbike
Confidence ---> 0.26
Class name --> car



0: 640x640 1 bus, 246.0ms
Speed: 3.7ms preprocess, 246.0ms inference, 5.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 bus, 115.7ms
Speed: 4.2ms preprocess, 115.7ms inference, 0.0ms postprocess per image at shape (1, 3, 640, 640)


Confidence ---> 0.87
Class name --> bus
Confidence ---> 0.87
Class name --> bus



0: 640x640 1 bus, 176.6ms
Speed: 4.6ms preprocess, 176.6ms inference, 8.3ms postprocess per image at shape (1, 3, 640, 640)



Confidence ---> 0.85
Class name --> bus


0: 640x640 3 cars, 149.2ms
Speed: 8.2ms preprocess, 149.2ms inference, 5.4ms postprocess per image at shape (1, 3, 640, 640)


Confidence ---> 0.9
Class name --> car
Confidence ---> 0.77
Class name --> car
Confidence ---> 0.69
Class name --> car



0: 640x640 3 cars, 195.5ms
Speed: 4.5ms preprocess, 195.5ms inference, 0.0ms postprocess per image at shape (1, 3, 640, 640)


Confidence ---> 0.9
Class name --> car
Confidence ---> 0.87
Class name --> car
Confidence ---> 0.48
Class name --> car



0: 640x640 3 cars, 188.3ms
Speed: 0.0ms preprocess, 188.3ms inference, 0.0ms postprocess per image at shape (1, 3, 640, 640)


Confidence ---> 0.91
Class name --> car
Confidence ---> 0.86
Class name --> car
Confidence ---> 0.39
Class name --> car



0: 640x640 4 cars, 1 truck, 199.8ms
Speed: 9.5ms preprocess, 199.8ms inference, 5.1ms postprocess per image at shape (1, 3, 640, 640)


Confidence ---> 0.86
Class name --> car
Confidence ---> 0.7
Class name --> car
Confidence ---> 0.57
Class name --> car
Confidence ---> 0.45
Class name --> truck
Confidence ---> 0.3
Class name --> car



0: 640x640 4 cars, 1 truck, 208.6ms
Speed: 2.0ms preprocess, 208.6ms inference, 3.6ms postprocess per image at shape (1, 3, 640, 640)


Confidence ---> 0.71
Class name --> car
Confidence ---> 0.67
Class name --> car
Confidence ---> 0.41
Class name --> car
Confidence ---> 0.39
Class name --> car
Confidence ---> 0.29
Class name --> truck



0: 640x640 4 cars, 1 truck, 191.0ms
Speed: 8.6ms preprocess, 191.0ms inference, 2.1ms postprocess per image at shape (1, 3, 640, 640)


Confidence ---> 0.79
Class name --> car
Confidence ---> 0.76
Class name --> car
Confidence ---> 0.64
Class name --> car
Confidence ---> 0.35
Class name --> car
Confidence ---> 0.29
Class name --> truck



0: 640x640 2 cars, 1 bus, 192.0ms
Speed: 9.6ms preprocess, 192.0ms inference, 2.2ms postprocess per image at shape (1, 3, 640, 640)


Confidence ---> 0.9
Class name --> car
Confidence ---> 0.79
Class name --> car
Confidence ---> 0.34
Class name --> bus



0: 640x640 2 cars, 1 bus, 190.1ms
Speed: 10.5ms preprocess, 190.1ms inference, 0.0ms postprocess per image at shape (1, 3, 640, 640)


Confidence ---> 0.94
Class name --> car
Confidence ---> 0.63
Class name --> car
Confidence ---> 0.44
Class name --> bus



0: 640x640 2 cars, 204.2ms
Speed: 4.1ms preprocess, 204.2ms inference, 0.0ms postprocess per image at shape (1, 3, 640, 640)


Confidence ---> 0.92
Class name --> car
Confidence ---> 0.79
Class name --> car



0: 640x640 5 cars, 1 bus, 191.7ms
Speed: 8.0ms preprocess, 191.7ms inference, 5.1ms postprocess per image at shape (1, 3, 640, 640)


Confidence ---> 0.87
Class name --> car
Confidence ---> 0.81
Class name --> car
Confidence ---> 0.77
Class name --> car
Confidence ---> 0.58
Class name --> car
Confidence ---> 0.46
Class name --> bus
Confidence ---> 0.29
Class name --> car



0: 640x640 5 cars, 1 truck, 160.5ms
Speed: 9.5ms preprocess, 160.5ms inference, 0.0ms postprocess per image at shape (1, 3, 640, 640)


Confidence ---> 0.86
Class name --> car
Confidence ---> 0.85
Class name --> car
Confidence ---> 0.77
Class name --> car
Confidence ---> 0.57
Class name --> car
Confidence ---> 0.42
Class name --> truck
Confidence ---> 0.35
Class name --> car



0: 640x640 5 cars, 1 truck, 195.0ms
Speed: 5.2ms preprocess, 195.0ms inference, 0.0ms postprocess per image at shape (1, 3, 640, 640)



Confidence ---> 0.84
Class name --> car
Confidence ---> 0.81
Class name --> car
Confidence ---> 0.79
Class name --> car
Confidence ---> 0.6
Class name --> car
Confidence ---> 0.49
Class name --> car
Confidence ---> 0.41
Class name --> truck


0: 640x640 1 car, 172.3ms
Speed: 7.4ms preprocess, 172.3ms inference, 0.0ms postprocess per image at shape (1, 3, 640, 640)



Confidence ---> 0.54
Class name --> car


0: 640x640 2 cars, 128.2ms
Speed: 5.0ms preprocess, 128.2ms inference, 0.0ms postprocess per image at shape (1, 3, 640, 640)


Confidence ---> 0.52
Class name --> car
Confidence ---> 0.36
Class name --> car



0: 640x640 3 cars, 190.4ms
Speed: 14.6ms preprocess, 190.4ms inference, 4.6ms postprocess per image at shape (1, 3, 640, 640)


Confidence ---> 0.46
Class name --> car
Confidence ---> 0.36
Class name --> car
Confidence ---> 0.29
Class name --> car



0: 640x640 1 car, 142.8ms
Speed: 3.8ms preprocess, 142.8ms inference, 0.0ms postprocess per image at shape (1, 3, 640, 640)


Confidence ---> 0.6
Class name --> car



0: 640x640 1 car, 156.3ms
Speed: 11.8ms preprocess, 156.3ms inference, 0.0ms postprocess per image at shape (1, 3, 640, 640)


Confidence ---> 0.68
Class name --> car



0: 640x640 1 car, 1 truck, 175.0ms
Speed: 5.0ms preprocess, 175.0ms inference, 5.1ms postprocess per image at shape (1, 3, 640, 640)


Confidence ---> 0.64
Class name --> car
Confidence ---> 0.64
Class name --> truck



0: 640x640 2 persons, 4 cars, 159.8ms
Speed: 5.0ms preprocess, 159.8ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)


Confidence ---> 0.92
Class name --> car
Confidence ---> 0.86
Class name --> person
Confidence ---> 0.85
Class name --> car
Confidence ---> 0.72
Class name --> car
Confidence ---> 0.66
Class name --> car
Confidence ---> 0.42
Class name --> person



0: 640x640 2 persons, 4 cars, 147.0ms
Speed: 4.6ms preprocess, 147.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)



Confidence ---> 0.9
Class name --> car
Confidence ---> 0.85
Class name --> person
Confidence ---> 0.82
Class name --> car
Confidence ---> 0.77
Class name --> car
Confidence ---> 0.7
Class name --> car
Confidence ---> 0.42
Class name --> person


0: 640x640 2 persons, 4 cars, 199.9ms
Speed: 5.1ms preprocess, 199.9ms inference, 5.1ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 5 persons, 2 cars, 160.3ms
Speed: 5.0ms preprocess, 160.3ms inference, 0.0ms postprocess per image at shape (1, 3, 640, 640)


Confidence ---> 0.95
Class name --> car
Confidence ---> 0.9
Class name --> car
Confidence ---> 0.83
Class name --> person
Confidence ---> 0.81
Class name --> car
Confidence ---> 0.58
Class name --> person
Confidence ---> 0.48
Class name --> car
Confidence ---> 0.83
Class name --> car
Confidence ---> 0.81
Class name --> person
Confidence ---> 0.6
Class name --> person
Confidence ---> 0.58
Class name --> person
Confidence ---> 0.34
Class name --> car
Confidence ---> 0.33
Class name --> person
Confidence ---> 0.28
Class name --> person



0: 640x640 5 persons, 2 cars, 152.1ms
Speed: 6.6ms preprocess, 152.1ms inference, 2.8ms postprocess per image at shape (1, 3, 640, 640)


Confidence ---> 0.86
Class name --> car
Confidence ---> 0.81
Class name --> person
Confidence ---> 0.8
Class name --> person
Confidence ---> 0.51
Class name --> person
Confidence ---> 0.47
Class name --> person
Confidence ---> 0.31
Class name --> person
Confidence ---> 0.27
Class name --> car



0: 640x640 7 persons, 4 cars, 182.9ms
Speed: 8.9ms preprocess, 182.9ms inference, 0.0ms postprocess per image at shape (1, 3, 640, 640)



Confidence ---> 0.83
Class name --> person
Confidence ---> 0.8
Class name --> car
Confidence ---> 0.69
Class name --> person
Confidence ---> 0.51
Class name --> person
Confidence ---> 0.41
Class name --> car
Confidence ---> 0.36
Class name --> person
Confidence ---> 0.35
Class name --> car
Confidence ---> 0.35
Class name --> car
Confidence ---> 0.33
Class name --> person
Confidence ---> 0.31
Class name --> person
Confidence ---> 0.26
Class name --> person


0: 640x640 1 car, 1 truck, 493.7ms
Speed: 5.5ms preprocess, 493.7ms inference, 6.6ms postprocess per image at shape (1, 3, 640, 640)



Confidence ---> 0.41
Class name --> car
Confidence ---> 0.38
Class name --> truck


0: 640x640 1 truck, 496.9ms
Speed: 7.2ms preprocess, 496.9ms inference, 5.4ms postprocess per image at shape (1, 3, 640, 640)



Confidence ---> 0.52
Class name --> truck


0: 640x640 1 bus, 1 truck, 258.2ms
Speed: 17.6ms preprocess, 258.2ms inference, 4.3ms postprocess per image at shape (1, 3, 640, 640)


Confidence ---> 0.41
Class name --> truck
Confidence ---> 0.4
Class name --> bus



0: 640x640 1 car, 1 truck, 448.8ms
Speed: 9.8ms preprocess, 448.8ms inference, 7.8ms postprocess per image at shape (1, 3, 640, 640)


Confidence ---> 0.37
Class name --> car
Confidence ---> 0.3
Class name --> truck



0: 640x640 1 car, 265.6ms
Speed: 4.9ms preprocess, 265.6ms inference, 2.3ms postprocess per image at shape (1, 3, 640, 640)


Confidence ---> 0.39
Class name --> car



0: 640x640 1 train, 206.7ms
Speed: 6.7ms preprocess, 206.7ms inference, 4.2ms postprocess per image at shape (1, 3, 640, 640)


Confidence ---> 0.35
Class name --> train


In [15]:
from ultralytics import YOLO

# Load a model
model = YOLO("yolov8n.yaml")  # build a new model from scratch

# Use the model
results = model.train(data="config.yaml", epochs=100, patience=15)  # train the model



                   from  n    params  module                                       arguments                     
  0                  -1  1       464  ultralytics.nn.modules.conv.Conv             [3, 16, 3, 2]                 
  1                  -1  1      4672  ultralytics.nn.modules.conv.Conv             [16, 32, 3, 2]                
  2                  -1  1      7360  ultralytics.nn.modules.block.C2f             [32, 32, 1, True]             
  3                  -1  1     18560  ultralytics.nn.modules.conv.Conv             [32, 64, 3, 2]                
  4                  -1  2     49664  ultralytics.nn.modules.block.C2f             [64, 64, 2, True]             
  5                  -1  1     73984  ultralytics.nn.modules.conv.Conv             [64, 128, 3, 2]               
  6                  -1  2    197632  ultralytics.nn.modules.block.C2f             [128, 128, 2, True]           
  7                  -1  1    295424  ultralytics.nn.modules.conv.Conv             [128

In [21]:
import cv2
import math
import easyocr
from ultralytics import YOLO

# Carga de los modelos
# Carga del modelo YOLO
car_model = YOLO('yolov8n.pt')

license_plate_model = YOLO('best.pt')

# Captura de video desde un archivo
cap = cv2.VideoCapture('prueba.mp4')  # Reemplaza 'tu_video.mp4' con el nombre de tu archivo de video

# Inicializa el lector de OCR de EasyOCR
reader = easyocr.Reader(lang_list=['en'])  # Ajusta los idiomas según tus necesidades

while True:
    ret, frame = cap.read()

    if not ret:
        break

    # Realiza detección de vehículos en el frame
    car_results = license_plate_model(frame, stream=True)

    for r in car_results:
        car_boxes = r.boxes

        for car_box in car_boxes:
            x1, y1, x2, y2 = car_box.xyxy[0]
            x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)

            # Dibuja el bounding box del vehículo
            cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 3)

            # Realiza detección de matrículas en el área del vehículo
            license_plate_crop = frame[y1:y2, x1:x2]
            license_plate_results = license_plate_model(license_plate_crop, stream=True)

            for lp_result in license_plate_results:
                lp_boxes = lp_result.boxes

                for lp_box in lp_boxes:
                    x1_lp, y1_lp, x2_lp, y2_lp = lp_box.xyxy[0]
                    x1_lp, y1_lp, x2_lp, y2_lp = int(x1_lp), int(y1_lp), int(x2_lp), int(y2_lp)

                    # Dibuja el bounding box de la matrícula
                    cv2.rectangle(frame, (x1 + x1_lp, y1 + y1_lp), (x1 + x2_lp, y1 + y2_lp), (0, 0, 255), 2)

                    # Realiza OCR en la matrícula con EasyOCR
                    license_plate_crop = frame[y1 + y1_lp:y1 + y2_lp, x1 + x1_lp:x1 + x2_lp]
                    gray_plate = cv2.cvtColor(license_plate_crop, cv2.COLOR_BGR2GRAY)
                    results = reader.readtext(gray_plate)

                    if results:
                        license_plate_text = results[0][1]

                        # Muestra el texto de la matrícula en la ventana
                        cv2.putText(frame, license_plate_text, (x1 + x1_lp, y1 + y1_lp - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2)

    # Muestra el frame con las detecciones en una ventana
    cv2.imshow('Video con Detecciones', frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()


Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


0: 128x640 (no detections), 160.2ms
Speed: 4.5ms preprocess, 160.2ms inference, 0.5ms postprocess per image at shape (1, 3, 128, 640)
0: 384x640 1 matricula, 180.4ms
Speed: 10.1ms preprocess, 180.4ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 134.5ms
Speed: 7.0ms preprocess, 134.5ms inference, 1.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 110.4ms
Speed: 3.5ms preprocess, 110.4ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)


0: 128x640 (no detections), 47.9ms
Speed: 0.5ms preprocess, 47.9ms inference, 0.0ms postprocess per image at shape (1, 3, 128, 640)
0: 384x640 1 matricula, 95.2ms
Speed: 0.0ms preprocess, 95.2ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 91.9ms
Speed: 3.3ms preprocess, 91.9ms inference, 0.0ms postprocess per

In [2]:
import cv2
import math
import easyocr
from ultralytics import YOLO

In [20]:
# Carga del modelo
model = YOLO('best.pt')

# Nombre de las distintas clases
classNames = ["matricula"]

# Lee la imagen desde un archivo
img = cv2.imread('mssulove.png')

# Perform inference on the image
results = model(img) 
print(model(img))
# Para cada detección
for r in results:
    boxes = r.boxes

    for box in boxes:
        # Contenedor
        x1, y1, x2, y2 = box.xyxy[0]
        x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2) # convert to int values

        # Confianza
        confidence = math.ceil((box.conf[0]*100))/100
        print("Confidence --->",confidence)

        # Clase
        cls = int(box.cls[0])
        print("Class name -->", classNames[cls])

        # Convierte identificador numérico de clase a un color RGB
        escala = int((cls / len(classNames)) * 255 * 3)
        if escala >= 255*2:
            R = 255
            G = 255
            B = escala - 255*2
        else:
            if escala >= 255:
                R = 255
                G = escala - 255
                B = 0
            else:
                R = escala
                G = 0
                B = 0

        # Dibuja el contenedor y clase
        cv2.rectangle(img, (x1, y1), (x2, y2), (R, G, B), 3)
        cv2.putText(img, classNames[cls] , [x1, y1], cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, B), 2)

# Muestra la imagen con las detecciones
cv2.imshow('Imagen con Detecciones', img)
cv2.waitKey(0)

# Destruye la ventana
cv2.destroyAllWindows()



0: 256x640 1 matricula, 72.5ms
Speed: 4.0ms preprocess, 72.5ms inference, 5.6ms postprocess per image at shape (1, 3, 256, 640)

0: 256x640 1 matricula, 65.7ms
Speed: 1.3ms preprocess, 65.7ms inference, 0.0ms postprocess per image at shape (1, 3, 256, 640)


[ultralytics.engine.results.Results object with attributes:

boxes: ultralytics.engine.results.Boxes object
keypoints: None
masks: None
names: {0: 'matricula'}
orig_img: array([[[153, 147, 147],
        [177, 170, 170],
        [192, 185, 185],
        ...,
        [ 58,  56,  54],
        [255, 255, 255],
        [255, 255, 255]],

       [[150, 145, 144],
        [160, 153, 153],
        [211, 204, 204],
        ...,
        [ 72,  69,  66],
        [255, 255, 255],
        [255, 255, 255]],

       [[154, 149, 148],
        [137, 130, 130],
        [187, 180, 180],
        ...,
        [133, 130, 127],
        [255, 255, 255],
        [255, 255, 255]],

       ...,

       [[ 29,  28,  25],
        [ 27,  26,  23],
        [ 26,  25,  22],
        ...,
        [194, 192, 191],
        [255, 255, 255],
        [255, 255, 255]],

       [[ 25,  24,  21],
        [ 25,  24,  21],
        [ 25,  24,  21],
        ...,
        [195, 193, 192],
        [255, 255, 255],
        [255, 255, 