# Imports

In [1]:
import cv2
import math
import numpy as np
from collections import (defaultdict, namedtuple)

In [2]:
from ultralytics import YOLO

In [3]:
import easyocr

# Constants

In [4]:
ALL_TAGS = ["person", "bicycle", "car", "motorbike", "aeroplane", "bus", "train", "truck", "boat", "traffic light", "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "sofa", "pottedplant", "bed", "diningtable", "toilet", "tvmonitor", "laptop", "mouse", "remote", "keyboard", "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush"]
TAGS = ["person", "bicycle", "car", "motorbike", "aeroplane", "bus", "train", "truck", "boat" ]
TAGS_IDXS = [ALL_TAGS.index(elm) for elm in TAGS]

MODEL_FN = 'yolo11n.pt'
MODEL_LP_FN = 'yolo11n-license_plate.pt'
VIDEO_FN = "C0142.MP4"

OUTPUT_VIDEO_FN = "output.mp4"
OUTPUT_DATA_FN = "output.csv"

CSV_HEAD = "fotograma,tipo_objeto,confianza,identificador_tracking,x1,y1,x2,y2,matrícula_en_su_caso,confianza_matricula,mx1,my1,mx2,my2,texto_matricula"

Entry = namedtuple('Entry', CSV_HEAD)
Entry.__str__ = lambda self: ",".join([str(self._asdict()[field]) for field in self._fields])

In [5]:
model = YOLO(MODEL_FN)

In [6]:
model_lp = YOLO(MODEL_LP_FN)

In [7]:
ocr = easyocr.Reader(['es'])

Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


# Work

In [8]:


vid = cv2.VideoCapture(VIDEO_FN)


out = cv2.VideoWriter('output.mp4', cv2.VideoWriter_fourcc(*'mp4v'), vid.get(cv2.CAP_PROP_FPS), 
                      (int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)), int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))))

entries = []
frame_id = 0

lpbox, lptext, lpprob = None, None, None
lpx1, lpy1, lpx2, lpy2 = None, None, None, None

while True:
    ret, img = vid.read()

    if ret:

        frame_id+=1
        results = model.track(img, persist=True, classes=TAGS_IDXS, verbose=False)


        for r in results:
            for box in r.boxes:

                # container
                x1, y1, x2, y2 = box.xyxy[0]
                x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)

                # tracking id
                if box.id is not None: track_id = str(int(box.id[0]))
                else: track_id = ''

                # confidence
                confidence = math.ceil(box.conf[0]*100)/100

                # class
                cls = int(box.cls[0])
                classname = ALL_TAGS[cls]

                # class id to RGB color
                scale = int((cls/len(TAGS)) * 255 * 3)
                if scale >= 255 * 2:    R, G, B = 255, 255, scale - 255*2
                elif scale >= 255:      R, G, B = 255, scale - 255, 0
                else:                   R, G, B = scale, 0, 0


                # vehicle with lp
                if cls == 2 or cls == 3 or cls == 5:

                    subimg = img[y1:y2,x1:x2]
                    lpresult = model_lp(subimg,  verbose=False)
    
                    if len(lpresult) > 0 and len(lpresult[0].boxes.cpu()) > 0:

                        lpbox = lpresult[0].boxes[0]

                        # container
                        lpx1, lpy1, lpx2, lpy2 = lpbox.xyxy[0]
                        lpx1, lpy1, lpx2, lpy2 = int(lpx1), int(lpy1), int(lpx2), int(lpy2)

                        if lpy1 < lpy2 and lpx1 < lpx2:
                            lpimg = subimg[lpy1:lpy2,lpx1:lpx2]
                            cv2.imshow("lp", lpimg)

                            ocr_res = ocr.recognize(lpimg, allowlist="ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789")
                            if len(ocr_res) > 0: lpbox, lptext, lpprob = ocr_res[0]

                            cv2.rectangle(img, (x1+lpx1, y1+lpy1), (x1+lpx2, y1+lpy2), (255, 0, 0), 3)
                        
                        
                # add entry
                entries.append(Entry(frame_id, classname, confidence, track_id, x1, y1, x2, y2,
                                               "MATRICULA", 
                                                lpprob if lptext else None,
                                                x1+lpx1 if lptext else None,
                                                y1+lpy1 if lptext else None, 
                                                x1+lpx2 if lptext else None,
                                                y1+lpy2 if lptext else None, 
                                                lptext if lptext else None))
                
                lptext = None

                # show on img
                cv2.rectangle(img, (x1, y1), (x2, y2), (R, G, B), 3)
                cv2.putText(img, f'{track_id} {classname}', [x1, y1], cv2.FONT_HERSHEY_SIMPLEX, 1, (100, 255, B), 3)


        cv2.imshow("Video", img)
        out.write(img)

    if cv2.waitKey(10) == 27: break

with open('output.csv', 'w') as file:
    file.write(CSV_HEAD + '\n')
    for entry in entries:
        file.write(str(entry) + '\n')


out.release()
vid.release()
cv2.destroyAllWindows()
cv2.waitKey(1)

2024-10-29 23:58:58.173 python[91357:2371664] +[IMKClient subclass]: chose IMKClient_Legacy
2024-10-29 23:58:58.174 python[91357:2371664] +[IMKInputSession subclass]: chose IMKInputSession_Legacy


-1