# Desarrollar un prototipo que procese varios v√≠deos propios

## Dataset

In [None]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("sujaymann/car-number-plate-dataset-yolo-format")

print("Path to dataset files:", path)

## Detectar y seguir a las personas y veh√≠culos presentes

In [2]:
import torch
from ultralytics import YOLO
import cv2
import csv
from collections import defaultdict

# ---------------------------
# CONFIGURACI√ìN
# ---------------------------
device = 'mps' if torch.backends.mps.is_available() else 'cpu'
model = YOLO('yolo11n.pt')  # YOLOv11 nano
classes_to_detect = [0, 2]  # 0=person, 2=car
classNames = ["person", "bicycle", "car", "motorbike", "aeroplane", "bus", "train", "truck", "boat",
              "traffic light", "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat",
              "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella",
              "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite", "baseball bat",
              "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup",
              "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli",
              "carrot", "hot dog", "pizza", "donut", "cake", "chair", "sofa", "pottedplant", "bed",
              "diningtable", "toilet", "tvmonitor", "laptop", "mouse", "remote", "keyboard", "cell phone",
              "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors",
              "teddy bear", "hair drier", "toothbrush"
              ]

video_path = "vc_coches.mp4"
output_video_path = "salida_yolo_tracking_sinOCR.mp4"
output_csv_path = "detecciones_yolo_tracking_sinOCR.csv"

# Contador de objetos por clase
total_count = defaultdict(int)

# ---------------------------
# ABRIR VIDEO Y GUARDAR SALIDA
# ---------------------------
cap = cv2.VideoCapture(video_path)
fps = cap.get(cv2.CAP_PROP_FPS)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(output_video_path, fourcc, fps, (width, height))

# ---------------------------
# CSV
# ---------------------------
csv_file = open(output_csv_path, mode='w', newline='')
csv_writer = csv.writer(csv_file)
csv_writer.writerow([
    "fotograma", "tipo_objeto", "confianza", "id_tracking",
    "x1","y1","x2","y2",
    "matricula", "conf_matricula", "mx1","my1","mx2","my2","texto_matricula"
])

frame_id = 0

# ---------------------------
# PROCESAR VIDEO
# ---------------------------
while True:
    ret, frame = cap.read()
    if not ret:
        break

    frame_id += 1

    # Detecta y trackea personas y coches
    results = model.track(
        source=frame,
        persist=True,
        classes=classes_to_detect,
        device=device,
        verbose=False
    )

    annotated_frame = frame.copy()

    if results and results[0] is not None:
        r = results[0]
        boxes = r.boxes

        for box in boxes:
            x1, y1, x2, y2 = map(int, box.xyxy[0])
            cls = int(box.cls[0])
            conf = float(box.conf[0])
            track_id = int(box.id[0]) if box.id is not None else -1
            label = classNames[cls] if cls < len(classNames) else f"class_{cls}"

            # Incrementa contador
            total_count[label] += 1

            # Dibuja bounding box e ID
            cv2.rectangle(annotated_frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
            cv2.putText(annotated_frame, f"{label} ID:{track_id} {conf:.2f}", 
                        (x1, y1-10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255,255,255), 2)

            # Escribir en CSV (sin matr√≠cula)
            csv_writer.writerow([
                frame_id, label, f"{conf:.2f}", track_id,
                x1, y1, x2, y2,
                "", "", 0,0,0,0,""
            ])

    out.write(annotated_frame)
    cv2.imshow("YOLO Tracking Personas y Coches", annotated_frame)
    if cv2.waitKey(1) & 0xFF == 27:
        break

# ---------------------------
# FIN
# ---------------------------
cap.release()
out.release()
csv_file.close()
cv2.destroyAllWindows()

# Muestra totales
print("Total objetos detectados:")
for k,v in total_count.items():
    print(f"{k}: {v}")


Total objetos detectados:
car: 1441
person: 239


## Entrenamiento de un modelo YOLOv8 personalizado para la detecci√≥n de matr√≠culas.
Se utiliz√≥ un conjunto de datos de matr√≠culas disponible en [Kaggle](https://www.kaggle.com/) con anotaciones en formato YOLO, descrito en el archivo `data.yaml`.


In [1]:
from ultralytics import YOLO

model = YOLO("yolo11n.pt")

model.train(
    data="data.yaml",
    imgsz=416,
    epochs=10,
    batch=4,
    device="mps",
    name="matriculas_detector2"
)


New https://pypi.org/project/ultralytics/8.3.225 available üòÉ Update with 'pip install -U ultralytics'
Ultralytics 8.3.215 üöÄ Python-3.9.24 torch-2.8.0 MPS (Apple M2)
[34m[1mengine/trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=4, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, compile=False, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=data.yaml, degrees=0.0, deterministic=True, device=mps, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=10, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=416, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.01, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=yolo11n.pt, momentum=0.937, mosaic=1.0, multi_scale=False, name=matriculas_detector24, nbs=64, nms=False, opset=None, opti

ultralytics.utils.metrics.DetMetrics object with attributes:

ap_class_index: array([0])
box: ultralytics.utils.metrics.Metric object
confusion_matrix: <ultralytics.utils.metrics.ConfusionMatrix object at 0x3d36f42e0>
curves: ['Precision-Recall(B)', 'F1-Confidence(B)', 'Precision-Confidence(B)', 'Recall-Confidence(B)']
curves_results: [[array([          0,    0.001001,    0.002002,    0.003003,    0.004004,    0.005005,    0.006006,    0.007007,    0.008008,    0.009009,     0.01001,    0.011011,    0.012012,    0.013013,    0.014014,    0.015015,    0.016016,    0.017017,    0.018018,    0.019019,     0.02002,    0.021021,    0.022022,    0.023023,
          0.024024,    0.025025,    0.026026,    0.027027,    0.028028,    0.029029,     0.03003,    0.031031,    0.032032,    0.033033,    0.034034,    0.035035,    0.036036,    0.037037,    0.038038,    0.039039,     0.04004,    0.041041,    0.042042,    0.043043,    0.044044,    0.045045,    0.046046,    0.047047,
          0.048048,    