# Desarrollar un prototipo que procese varios vídeos propios

## Detectar y seguir a las personas y vehículos presentes

In [1]:
import torch
from ultralytics import YOLO
import cv2
import csv
from collections import defaultdict

# ---------------------------
# CONFIGURACIÓN
# ---------------------------
device = 'mps' if torch.backends.mps.is_available() else 'cpu'
model = YOLO('yolo11n.pt')  # YOLOv11 nano
classes_to_detect = [0, 2]  # 0=person, 2=car
classNames = ["person", "bicycle", "car", "motorbike", "aeroplane", "bus", "train", "truck", "boat",
              "traffic light", "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat",
              "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella",
              "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite", "baseball bat",
              "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup",
              "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli",
              "carrot", "hot dog", "pizza", "donut", "cake", "chair", "sofa", "pottedplant", "bed",
              "diningtable", "toilet", "tvmonitor", "laptop", "mouse", "remote", "keyboard", "cell phone",
              "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors",
              "teddy bear", "hair drier", "toothbrush"
              ]

video_path = "VID_20251016_160058367.mp4"
output_video_path = "salida_yolo_tracking_sinOCR.mp4"
output_csv_path = "detecciones_yolo_tracking_sinOCR.csv"

# Contador de objetos por clase
total_count = defaultdict(int)

# ---------------------------
# ABRIR VIDEO Y GUARDAR SALIDA
# ---------------------------
cap = cv2.VideoCapture(video_path)
fps = cap.get(cv2.CAP_PROP_FPS)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(output_video_path, fourcc, fps, (width, height))

# ---------------------------
# CSV
# ---------------------------
csv_file = open(output_csv_path, mode='w', newline='')
csv_writer = csv.writer(csv_file)
csv_writer.writerow([
    "fotograma", "tipo_objeto", "confianza", "id_tracking",
    "x1","y1","x2","y2",
    "matricula", "conf_matricula", "mx1","my1","mx2","my2","texto_matricula"
])

frame_id = 0

# ---------------------------
# PROCESAR VIDEO
# ---------------------------
while True:
    ret, frame = cap.read()
    if not ret:
        break

    frame_id += 1

    # Detecta y trackea personas y coches
    results = model.track(
        source=frame,
        persist=True,
        classes=classes_to_detect,
        device=device,
        verbose=False
    )

    annotated_frame = frame.copy()

    if results and results[0] is not None:
        r = results[0]
        boxes = r.boxes

        for box in boxes:
            x1, y1, x2, y2 = map(int, box.xyxy[0])
            cls = int(box.cls[0])
            conf = float(box.conf[0])
            track_id = int(box.id[0]) if box.id is not None else -1
            label = classNames[cls] if cls < len(classNames) else f"class_{cls}"

            # Incrementa contador
            total_count[label] += 1

            # Dibuja bounding box e ID
            cv2.rectangle(annotated_frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
            cv2.putText(annotated_frame, f"{label} ID:{track_id} {conf:.2f}", 
                        (x1, y1-10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255,255,255), 2)

            # Escribir en CSV (sin matrícula)
            csv_writer.writerow([
                frame_id, label, f"{conf:.2f}", track_id,
                x1, y1, x2, y2,
                "", "", 0,0,0,0,""
            ])

    out.write(annotated_frame)
    cv2.imshow("YOLO Tracking Personas y Coches", annotated_frame)
    if cv2.waitKey(1) & 0xFF == 27:
        break

# ---------------------------
# FIN
# ---------------------------
cap.release()
out.release()
csv_file.close()
cv2.destroyAllWindows()

# Muestra totales
print("Total objetos detectados:")
for k,v in total_count.items():
    print(f"{k}: {v}")


Total objetos detectados:
car: 4480
person: 1060
