In [3]:
# Importar bibliotecas necesarias
import cv2
import re
import numpy as np
import pytesseract
from pytesseract import Output
from ultralytics import YOLO
import csv

# Configuración de Tesseract para OCR
pytesseract.pytesseract.tesseract_cmd = r'C:/Program Files/Tesseract-OCR/tesseract'

# Cargar los modelos YOLO
model1 = YOLO("../models/best_vc.pt")  # Modelo para matrículas
model2 = YOLO("../models/yolo11n.pt")  # Modelo preentrenado para objetos (personas, coches, etc.)

# Clases de cada modelo
classNames_model1 = ["matricula"]
classNames_model2 = ["person", "bicycle", "car", "motorbike", "aeroplane", "bus", "train", "truck", "boat",
              "traffic light", "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat",
              "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella",
              "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite", "baseball bat",
              "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup",
              "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli",
              "carrot", "hot dog", "pizza", "donut", "cake", "chair", "sofa", "pottedplant", "bed",
              "diningtable", "toilet", "tvmonitor", "laptop", "mouse", "remote", "keyboard", "cell phone",
              "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors",
              "teddy bear", "hair drier", "toothbrush"]

# Patrón para matrículas españolas (formato NNNN AAA)
plate_pattern = re.compile(r'^\d{4}\s[BCDFGHJKLMNPRSTVWXYZ]{3}$')

# Función para validar y formatear matrícula
def validate(plate):
    if plate_pattern.match(plate):
        return plate
    else:
        # Intentar limpiar el texto de la matrícula
        plate_refactored = re.sub(r'^\s*.*?(\d{4}\s[BCDFGHJKLMNPRSTVWXYZ]{3}).*$', r'\1', plate)
        return plate_refactored if plate_pattern.match(plate_refactored) else None

# Archivo CSV para registrar las detecciones
csv_file_path = 'detections.csv'
csv_headers = ['frame', 'object_class', 'confidence', 'x1', 'y1', 'x2', 'y2', 'plate_text']

# Inicializar variables
detected_plates = set()  # Conjunto para evitar duplicados de matrículas
frame_idx = 0

# Configuración de captura de video
input_video_path = "C:/Users/danif/Downloads/C0142.MP4"
output_video_path = 'output_with_detections.mp4'
vid = cv2.VideoCapture(input_video_path)

# Configuración para guardar el video de salida
frame_width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = int(vid.get(cv2.CAP_PROP_FPS))
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
output_video = cv2.VideoWriter(output_video_path, fourcc, fps, (frame_width, frame_height))

# Abrir CSV en modo escritura
with open(csv_file_path, mode='w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(csv_headers)

    while True:
        ret, img = vid.read()
        if not ret:
            break

        # Resetear contadores de personas y coches para cada fotograma
        frame_person_count = 0
        frame_car_count = 0

        # ------------------------ Detección de Matrículas ------------------------
        results1 = model1(img, stream=True)
        for r in results1:
            for box in r.boxes:
                cls = int(box.cls[0])
                if cls < len(classNames_model1):
                    class_name = classNames_model1[cls]
                    x1, y1, x2, y2 = map(int, box.xyxy[0])
                    confidence = round(float(box.conf[0]) * 100, 2)

                    # Recortar la imagen de la matrícula y procesarla para OCR
                    plate_image = img[y1:y2, x1:x2]
                    gray = cv2.cvtColor(plate_image, cv2.COLOR_BGR2GRAY)
                    clahe = cv2.createCLAHE(clipLimit=4.0, tileGridSize=(8, 8))
                    contrast_img = clahe.apply(gray)
                    resized_img = cv2.resize(contrast_img, None, fx=2, fy=2, interpolation=cv2.INTER_LINEAR)
                    kernel = np.array([[0, -2, 0], [-2, 9, -2], [0, -2, 0]])
                    sharpened_img = cv2.filter2D(resized_img, -1, kernel)
                    text = pytesseract.image_to_string(sharpened_img, config='--psm 8', output_type=Output.STRING).strip()

                    # Validar y reformatear la matrícula detectada
                    valid_plate = validate(text)
                    if valid_plate and valid_plate not in detected_plates:
                        detected_plates.add(valid_plate)  # Evitar duplicados
                        writer.writerow([frame_idx, class_name, confidence, x1, y1, x2, y2, valid_plate])
                        print("\nMATRÍCULA DETECTADA Y GUARDADA:", valid_plate)

                    # Dibujar la detección de la matrícula en la imagen
                    cv2.rectangle(img, (x1, y1), (x2, y2), (255, 0, 0), 2)
                    cv2.putText(img, f'{class_name} {confidence}%', (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 1)

        # ------------------------ Detección de Personas y Coches ------------------------
        results2 = model2(img, stream=True)
        frame_objects = {"person": set(), "car": set()}  # Diccionarios para objetos únicos en el fotograma actual

        for r in results2:
            for box in r.boxes:
                cls = int(box.cls[0])
                if cls < len(classNames_model2):
                    class_name = classNames_model2[cls]
                    x1, y1, x2, y2 = map(int, box.xyxy[0])
                    confidence = round(float(box.conf[0]) * 100, 2)

                    # Crear un identificador único basado en la posición del objeto
                    obj_id = (x1, y1, x2, y2)

                    # Si es una persona o un coche y no ha sido contado en este frame
                    if class_name == "person" and obj_id not in frame_objects["person"]:
                        frame_person_count += 1
                        frame_objects["person"].add(obj_id)
                    elif class_name == "car" and obj_id not in frame_objects["car"]:
                        frame_car_count += 1
                        frame_objects["car"].add(obj_id)

                    # Dibujar la detección en la imagen
                    cv2.rectangle(img, (x1, y1), (x2, y2), (0, 255, 0), 2)
                    cv2.putText(img, f'{class_name} {confidence}%', (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1)

        # Mostrar conteo de personas y coches únicos en el fotograma actual
        cv2.putText(img, f'Personas en frame: {frame_person_count}', (20, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)
        cv2.putText(img, f'Coches en frame: {frame_car_count}', (20, 80), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)

        # Escribir el fotograma procesado en el video de salida
        output_video.write(img)

        # Visualizar el fotograma procesado en tiempo real
        cv2.imshow("Detección", img)

        frame_idx += 1
        # Permitir salir del bucle pulsando 'Esc'
        if cv2.waitKey(1) == 27:
            break

# Liberar recursos
vid.release()
output_video.release()
cv2.destroyAllWindows()



0: 384x640 (no detections), 111.4ms
Speed: 17.4ms preprocess, 111.4ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 4 cars, 1 bus, 118.1ms
Speed: 0.0ms preprocess, 118.1ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 96.8ms
Speed: 1.9ms preprocess, 96.8ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 4 cars, 1 bus, 106.1ms
Speed: 15.6ms preprocess, 106.1ms inference, 2.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 89.6ms
Speed: 0.0ms preprocess, 89.6ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 5 cars, 1 bus, 100.6ms
Speed: 0.0ms preprocess, 100.6ms inference, 3.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 63.2ms
Speed: 4.6ms preprocess, 63.2ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 5 cars, 1 bus, 86.9ms
Speed: 0.0ms preproce