# Task

Para la entrega de esta práctica, la tarea consiste en desarrollar un prototipo que procese uno (vídeo ejemplo proporcionado) o varios vídeos (incluyendo vídeos de cosecha propia):

- Detecte y siga las personas y vehículos presentes
- Detecte y lea las matrículas de los vehículos presentes
- Cuente el total de cada clase
- Vuelque a disco un vídeo que visualice los resultados
- Genere un archivo csv con el resultado de la detección y seguimiento. Se sugiere un formato con al menos los siguientes campos:

`fotograma, tipo_objeto, confianza, identificador_tracking, x1, y1, x2, y2, matrícula_en_su_caso, confianza, mx1,my1,mx2,my2, texto_matricula`

In [None]:
# Import necessary libraries
from collections import defaultdict
from pytesseract import Output
from ultralytics import YOLO
import pytesseract
import numpy as np
import math
import cv2

In [None]:
# You need to download the Tesseract executables beforehand
# If the Tesseract path is not in the system PATH, specify the executable path
pytesseract.pytesseract.tesseract_cmd = r'/opt/homebrew/bin/tesseract'

# Boolean variable indicating if the video should be stored in memory (False) or displayed on the screen (True)
display = True

# Load the YOLO model for object detection, downloading the model file if it’s not available locally
model = YOLO('YOLO/yolo11n.pt')  # Model to detect vehicles
license_plate_model = YOLO('YOLO/best.pt')  # Model to detect license plates

# Labels for different object classes
classNames = ["person", "bicycle", "car", "motorbike", "", "bus", "", "truck"]

# Configuration for video capture from a file
vid = cv2.VideoCapture("Videos/Original Video.mp4")

# If saving to disk, set the width, height, and output file configuration
if not display:
    frame_width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
    output_video = cv2.VideoWriter('YOLO/resultados.avi', cv2.VideoWriter_fourcc(*'XVID'), 20, (frame_width, frame_height))

# Loop to process each frame of the video
while(True):
    # Read the next frame from the video
    ret, img = vid.read()

    # If the frame is valid
    if ret:
        # Perform object tracking with persistence across frames, focusing on specific classes
        results = model.track(img, persist=True, classes=[0, 1, 2, 3, 5, 7])

        # Process each detection result
        for r in results:
            boxes = r.boxes

            # Process each bounding box of detected objects
            for box in boxes:
                # Define the bounding box limits
                x1, y1, x2, y2 = box.xyxy[0]
                x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)

                # Get the tracking ID if available
                if box.id is not None:
                    track_id = str(int(box.id[0].tolist()))
                else:
                    track_id = ''

                # Calculate the detection confidence level
                confidence = math.ceil((box.conf[0] * 100)) / 100
                print("Confidence --->", confidence)

                # Determine the object class
                cls = int(box.cls[0])
                print("Class -->", classNames[cls])

                # If the detected class is a vehicle (e.g., class >= 2 in this context)
                if cls >= 2:
                    # Crop the image for the detected vehicle
                    car_crop = img[y1:y2, x1:x2]

                    # Detect license plates within the vehicle crop
                    license_plate_results = license_plate_model.predict(car_crop)
                    possible_licenses = {"Not Found": 0}

                    # Process each license plate detection within the vehicle
                    for lp_result in license_plate_results:
                        lp_boxes = lp_result.boxes
                        for lp_box in lp_boxes:
                            # Bounding box coordinates of the license plate
                            lp_x1, lp_y1, lp_x2, lp_y2 = lp_box.xyxy[0]
                            lp_x1, lp_y1, lp_x2, lp_y2 = int(lp_x1), int(lp_y1), int(lp_x2), int(lp_y2)

                            # Use OCR to extract text from the detected license plate and store the possible value
                            possible_licenses[pytesseract.image_to_string(img)] = 1

                            # Adjust license plate coordinates relative to the full frame
                            lp_x1 += x1
                            lp_x2 += x1
                            lp_y1 += y1
                            lp_y2 += y1

                            # Draw the license plate rectangle and display it on the image
                            cv2.rectangle(img, (lp_x1, lp_y1), (lp_x2, lp_y2), (0, 255, 0), 2)
                            cv2.putText(img,
                                        f"License: {max(possible_licenses, key=possible_licenses.get)}",
                                        (lp_x1, lp_y1 - 10),
                                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

                # Assign a unique RGB color based on the class to visualize each type of detected object
                scale = int((cls / len(classNames)) * 255 * 3)
                if scale >= 255 * 2:
                    R, G, B = 255, 255, scale - 255 * 2
                else:
                    R, G, B = (255, scale - 255, 0) if scale >= 255 else (scale, 0, 0)

                # Draw the bounding box and class name on the image
                cv2.rectangle(img, (x1, y1), (x2, y2), (R, G, B), 3)
                cv2.putText(img, track_id + ' ' + classNames[cls], [x1, y1], cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, B), 2)

        # Display the frame on the screen or save it to the video file
        if display:
            cv2.imshow('Video', img)
        else:
            output_video.write(img)

        # Stop processing if the ESC key is pressed
        if cv2.waitKey(20) == 27:
            break
    else:
        break

# Release the video capture resource
vid.release()
if display:
    cv2.destroyAllWindows()
else:
    output_video.release()  # Release the video file