# Práctica 4

## Bibliotecas

In [13]:
import os
import torch
from ultralytics import YOLO
from PIL import Image
import logging
import warnings
import signal
import sys
import csv
import cv2
import pytesseract
import easyocr
from collections import defaultdict, deque


logging.getLogger('ultralytics').setLevel(logging.ERROR)
warnings.filterwarnings('ignore')

# Evitar crash OpenMP en Windows
os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"

# Forzar GPU si está disponible
device = "cuda" if torch.cuda.is_available() else "cpu"

### Verificación de uso de gpu:

In [8]:
print("PyTorch version:", torch.__version__)
print("CUDA available:", torch.cuda.is_available())
print("Current device:", torch.cuda.current_device())
print("Device name:", torch.cuda.get_device_name(torch.cuda.current_device()))

PyTorch version: 2.5.1+cu121
CUDA available: True
Current device: 0
Device name: NVIDIA GeForce RTX 4050 Laptop GPU


## Entrenamineto

In [None]:
# Carga el modelo preentrenado
model = YOLO('yolo11n.pt')

model.train(
    data=r"C:\Users\asmae\Documents\INGENIERIA INFORMATICA\4 CUARTO\VC\Practicas\VC_P4&P5\dataset\data.yaml",
    epochs=100,
    imgsz=640,
    batch=16,
    name="license_plate_augmented",
    workers=2,       # para Windows/Jupyter
    plots=False,
    patience=10,     # early stopping
    augment=True,    # activa augmentations
    mosaic=1,        # combina varias imágenes
    mixup=0.5,       # mezcla imágenes 50%
    copy_paste=0.3,  # copia/pega objetos en otras imágenes
    auto_augment="randaugment"  # auto augment avanzado
)

metrics = model.val()
print(metrics)

## Detección en el vídeo:
### Conteo con Tessercat e EasyOCR

In [None]:
VEHICLE_CLASSES = {
    0: "persona",
    2: "coche",
    3: "motocicleta",
    5: "autobus",
    7: "camion"
}

base_mnodel = YOLO("yolo11n.pt")  
lp_model = YOLO(r"runs/detect/license_plate_augmented/weights/best.pt")

reader = easyocr.Reader(['en'], gpu=True)

video_path = "C0142.MP4"
output_path_tesseract = "salida_detecciones_pytesseract.mp4"
output_path_easyocr = "salida_detecciones_easyocr.mp4"
csv_path_tesseract = "detecciones_pytesseract.csv"
csv_path_easyocr = "detecciones_easyocr.csv"

video = cv2.VideoCapture(video_path)
if not video.isOpened():
    print("Error al abrir el video")
    exit()

fps = int(video.get(cv2.CAP_PROP_FPS))
width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))

fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out_tesseract = cv2.VideoWriter(output_path_tesseract, fourcc, fps, (width, height))
out_easyocr = cv2.VideoWriter(output_path_easyocr, fourcc, fps, (width, height))

def cerrar_recursos(signal_received=None, frame=None):
    print("\nGuardando y cerrando correctamente...")
    video.release()
    out_tesseract.release()
    out_easyocr.release()
    cv2.destroyAllWindows()
    sys.exit(0)

signal.signal(signal.SIGINT, cerrar_recursos)
signal.signal(signal.SIGTERM, cerrar_recursos)

pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"

def draw_text_with_background(img, text, pos, font_scale=0.6, thickness=2, text_color=(255, 255, 255), bg_color=(0, 0, 0)):
    font = cv2.FONT_HERSHEY_SIMPLEX
    (text_width, text_height), baseline = cv2.getTextSize(text, font, font_scale, thickness)
    x, y = pos
    y = min(y, img.shape[0] - text_height - 10)
    x = max(0, min(x, img.shape[1] - text_width - 10))
    cv2.rectangle(img, (x - 5, y - text_height - 5), (x + text_width + 5, y + baseline + 5), bg_color, -1)
    cv2.putText(img, text, (x, y), font, font_scale, text_color, thickness)
    return text_height + baseline + 10

def leer_matricula_easyocr(imagen):
    resultado = reader.readtext(imagen, detail=0, paragraph=False)
    textos = [''.join(c for c in t if c.isalnum()) for t in resultado if len(t) > 2]
    return ' '.join(textos) if textos else ""

object_tracks = {}
exit_counts = defaultdict(lambda: {"izquierda": 0, "derecha": 0, "arriba": 0, "abajo": 0})
unique_ids = defaultdict(set)

try:
    with open(csv_path_tesseract, mode='w', newline='') as f_tesseract, open(csv_path_easyocr, mode='w', newline='') as f_easyocr:
        writer_tesseract = csv.writer(f_tesseract)
        writer_tesseract.writerow(["fotograma", "tipo_objeto", "confianza", "identificador_tracking", "x1", "y1", "x2", "y2", "matricula_en_su_caso", "confianza_matricula", "mx1", "my1", "mx2", "my2", "texto_matricula"])
        
        writer_easyocr = csv.writer(f_easyocr)
        writer_easyocr.writerow(["fotograma", "tipo_objeto", "confianza", "identificador_tracking", "x1", "y1", "x2", "y2", "matricula_en_su_caso", "confianza_matricula", "mx1", "my1", "mx2", "my2", "texto_matricula"])

        frame_num = 0

        while True:
            ret, frame = video.read()
            if not ret:
                break
            
            frame_num += 1
            print(f"Frame num: {frame_num}")
            
            display_frame_tesseract = frame.copy()
            display_frame_easyocr = frame.copy()

            results_coco = base_mnodel.track(frame, classes=[0, 2, 3, 5, 7], persist=True, tracker="bytetrack.yaml", verbose=False)

            display_frame_tesseract = results_coco[0].plot()
            display_frame_easyocr = display_frame_tesseract.copy()

            for box in results_coco[0].boxes:
                cls_id = int(box.cls[0].item())
                conf = box.conf[0].item()
                x1, y1, x2, y2 = map(int, box.xyxy[0])
                class_model = VEHICLE_CLASSES.get(cls_id, "desconocido")
                track_id = int(box.id[0]) if box.id is not None else None

                if track_id is not None:
                    unique_ids[class_model].add(track_id)
                    cx = int((x1 + x2) / 2)
                    cy = int((y1 + y2) / 2)
                    if track_id not in object_tracks:
                        object_tracks[track_id] = {"centros": [], "tipo": class_model, "salido": False}
                    object_tracks[track_id]["centros"].append((cx, cy))
                    if not object_tracks[track_id]["salido"]:
                        if cx <= 5:
                            exit_counts[class_model]["izquierda"] += 1
                            object_tracks[track_id]["salido"] = True
                        elif cx >= width - 5:
                            exit_counts[class_model]["derecha"] += 1
                            object_tracks[track_id]["salido"] = True
                        elif cy <= 5:
                            exit_counts[class_model]["arriba"] += 1
                            object_tracks[track_id]["salido"] = True
                        elif cy >= height - 5:
                            exit_counts[class_model]["abajo"] += 1
                            object_tracks[track_id]["salido"] = True

                plate_detected = False
                conf_plate = None
                mx1 = my1 = mx2 = my2 = None
                plate_text_tesseract = ""
                plate_text_easyocr = ""

                if class_model != "persona":
                    vehicle_roi = frame[y1:y2, x1:x2]
                    results_plate = lp_model.predict(vehicle_roi, save=False, show=False, verbose=False)

                    if len(results_plate[0].boxes) > 0:
                        plate_detected = True
                        plate_plot = results_plate[0].plot()
                        display_frame_tesseract[y1:y2, x1:x2] = plate_plot
                        display_frame_easyocr[y1:y2, x1:x2] = plate_plot
                        
                        plate_box = results_plate[0].boxes[0]
                        px1, py1, px2, py2 = map(int, plate_box.xyxy[0])
                        conf_plate = plate_box.conf[0].item()
                        mx1, my1, mx2, my2 = x1 + px1, y1 + py1, x1 + px2, y1 + py2
                        
                        plate_roi = vehicle_roi[py1:py2, px1:px2]
                        
                        if plate_roi.size > 0:
                            plate_roi_rgb = cv2.cvtColor(plate_roi, cv2.COLOR_BGR2RGB)
                            plate_text_tesseract = pytesseract.image_to_string(plate_roi_rgb).replace("\n", " ").strip()
                            try:
                                plate_text_easyocr = leer_matricula_easyocr(plate_roi_rgb)
                            except Exception as e:
                                print(f"Error EasyOCR frame {frame_num}: {e}")
                                plate_text_easyocr = ""

                    info_y = y2 + 10
                    
                    if plate_detected:
                        status_text = f"Matricula: SI (Conf: {conf_plate:.2f})"
                        offset_t = draw_text_with_background(display_frame_tesseract, status_text, (x1, info_y), 0.5, 2, (0, 255, 0), (0, 0, 0))
                        offset_e = draw_text_with_background(display_frame_easyocr, status_text, (x1, info_y), 0.5, 2, (0, 255, 0), (0, 0, 0))
                        
                        if plate_text_tesseract:
                            draw_text_with_background(display_frame_tesseract, f"Texto: {plate_text_tesseract}", (x1, info_y + offset_t), 0.6, 2, (255, 255, 0), (0, 0, 0))
                        else:
                            draw_text_with_background(display_frame_tesseract, "Texto: No legible", (x1, info_y + offset_t), 0.5, 2, (0, 165, 255), (0, 0, 0))
                        
                        if plate_text_easyocr:
                            draw_text_with_background(display_frame_easyocr, f"Texto: {plate_text_easyocr}", (x1, info_y + offset_e), 0.6, 2, (255, 255, 0), (0, 0, 0))
                        else:
                            draw_text_with_background(display_frame_easyocr, "Texto: No legible", (x1, info_y + offset_e), 0.5, 2, (0, 165, 255), (0, 0, 0))
                    else:
                        draw_text_with_background(display_frame_tesseract, "Matricula: NO", (x1, info_y), 0.5, 2, (0, 0, 255), (0, 0, 0))
                        draw_text_with_background(display_frame_easyocr, "Matricula: NO", (x1, info_y), 0.5, 2, (0, 0, 255), (0, 0, 0))

                writer_tesseract.writerow([frame_num, class_model, conf, track_id, x1, y1, x2, y2, plate_detected, conf_plate, mx1, my1, mx2, my2, plate_text_tesseract])
                writer_easyocr.writerow([frame_num, class_model, conf, track_id, x1, y1, x2, y2, plate_detected, conf_plate, mx1, my1, mx2, my2, plate_text_easyocr])

            overlay_t = display_frame_tesseract.copy()
            panel_x, panel_y = 10, 30
            draw_text_with_background(overlay_t, f"Frame: {frame_num}", (panel_x, panel_y), 0.6, 2, (255, 255, 255), (0, 0, 0))
            panel_y += 30
            for tipo in VEHICLE_CLASSES.values():
                draw_text_with_background(overlay_t, f"{tipo.capitalize()}: {len(unique_ids[tipo])}", (panel_x, panel_y), 0.6, 2, (0, 255, 255), (0, 0, 0))
                panel_y += 25
            panel_y += 10
            for tipo, direcciones in exit_counts.items():
                draw_text_with_background(overlay_t, f"{tipo.upper()}:", (panel_x, panel_y), 0.6, 2, (255, 255, 255), (50, 50, 50))
                panel_y += 22
                for dir, count in direcciones.items():
                    draw_text_with_background(overlay_t, f"  {dir}: {count}", (panel_x + 20, panel_y), 0.55, 2, (0, 200, 0), (0, 0, 0))
                    panel_y += 20
                panel_y += 10
            cv2.addWeighted(overlay_t, 0.9, display_frame_tesseract, 0.1, 0, display_frame_tesseract)

            overlay_e = display_frame_easyocr.copy()
            panel_x, panel_y = 10, 30
            draw_text_with_background(overlay_e, f"Frame: {frame_num}", (panel_x, panel_y), 0.6, 2, (255, 255, 255), (0, 0, 0))
            panel_y += 30
            for tipo in VEHICLE_CLASSES.values():
                draw_text_with_background(overlay_e, f"{tipo.capitalize()}: {len(unique_ids[tipo])}", (panel_x, panel_y), 0.6, 2, (0, 255, 255), (0, 0, 0))
                panel_y += 25
            panel_y += 10
            for tipo, direcciones in exit_counts.items():
                draw_text_with_background(overlay_e, f"{tipo.upper()}:", (panel_x, panel_y), 0.6, 2, (255, 255, 255), (50, 50, 50))
                panel_y += 22
                for dir, count in direcciones.items():
                    draw_text_with_background(overlay_e, f"  {dir}: {count}", (panel_x + 20, panel_y), 0.55, 2, (0, 200, 0), (0, 0, 0))
                    panel_y += 20
                panel_y += 10
            cv2.addWeighted(overlay_e, 0.9, display_frame_easyocr, 0.1, 0, display_frame_easyocr)

            out_tesseract.write(display_frame_tesseract)
            out_easyocr.write(display_frame_easyocr)
finally:
    video.release()
    out_tesseract.release()
    out_easyocr.release()
    print(f"Video Tesseract: {output_path_tesseract}")
    print(f"Video EasyOCR: {output_path_easyocr}")
    print(f"CSV Tesseract: {csv_path_tesseract}")
    print(f"CSV EasyOCR: {csv_path_easyocr}")

### Conteo con SmolVLM

### Carga del modelo SmolVLM

In [None]:
from transformers import AutoProcessor, AutoModelForVision2Seq
from PIL import Image
import torch
import numpy as np
import cv2

model_name = "HuggingFaceTB/SmolVLM-Instruct"

print("Cargando SmolVLM...")
processor = AutoProcessor.from_pretrained(model_name)
model = AutoModelForVision2Seq.from_pretrained(
    model_name,
    torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
    device_map="auto"
)
print("SmolVLM cargado!")

def leer_matricula(imagen):
    """
    imagen: puede ser ruta (str), PIL Image, o numpy array (frame de OpenCV)
    """
    if isinstance(imagen, str):
        img = Image.open(imagen).convert("RGB")
    elif isinstance(imagen, np.ndarray):
        if len(imagen.shape) == 3 and imagen.shape[2] == 3:
            img = Image.fromarray(cv2.cvtColor(imagen, cv2.COLOR_BGR2RGB))
        else:
            img = Image.fromarray(imagen)
    else:
        img = imagen.convert("RGB")
    
    messages = [
        {
            "role": "user",
            "content": [
                {"type": "image"},
                {"type": "text", "text": "What is the license plate number in this image? Only respond with the alphanumeric characters, without spaces or special characters."}
            ]
        }
    ]
    
    prompt = processor.apply_chat_template(messages, add_generation_prompt=True)
    
    inputs = processor(
        text=prompt,
        images=[img],
        return_tensors="pt"
    )
    
    inputs = {k: v.to(model.device) for k, v in inputs.items()}
    
    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=50,
            do_sample=False
        )
    
    resultado = processor.decode(outputs[0], skip_special_tokens=True)
    
    if "Assistant:" in resultado:
        resultado = resultado.split("Assistant:")[-1].strip()
    elif "\n" in resultado:
        resultado = resultado.split("\n")[-1].strip()
    
    return resultado.replace("/s", "").replace("-", "").replace("_", "")

### Generación del vídeo con SmolVLM


In [None]:
VEHICLE_CLASSES = {
    0: "persona",
    2: "coche",
    3: "motocicleta",
    5: "autobus",
    7: "camion"
}

base_mnodel = YOLO("yolo11n.pt")  
lp_model = YOLO(r"runs/detect/license_plate_augmented/weights/best.pt")

video_path = "C0142.MP4"
output_path = "salida_detecciones_smolvlm.mp4"
csv_path = "detecciones_smolvlm.csv"

video = cv2.VideoCapture(video_path)
if not video.isOpened():
    print("Error al abrir el video")
    exit()

fps = int(video.get(cv2.CAP_PROP_FPS))
width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))

fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))

def cerrar_recursos(signal_received=None, frame=None):
    print("\nGuardando y cerrando correctamente...")
    video.release()
    out.release()
    cv2.destroyAllWindows()
    sys.exit(0)

signal.signal(signal.SIGINT, cerrar_recursos)
signal.signal(signal.SIGTERM, cerrar_recursos)

def draw_text_with_background(img, text, pos, font_scale=0.6, thickness=2, text_color=(255, 255, 255), bg_color=(0, 0, 0)):
    font = cv2.FONT_HERSHEY_SIMPLEX
    (text_width, text_height), baseline = cv2.getTextSize(text, font, font_scale, thickness)
    x, y = pos
    y = min(y, img.shape[0] - text_height - 10)
    x = max(0, min(x, img.shape[1] - text_width - 10))
    cv2.rectangle(img, (x - 5, y - text_height - 5), (x + text_width + 5, y + baseline + 5), bg_color, -1)
    cv2.putText(img, text, (x, y), font, font_scale, text_color, thickness)
    return text_height + baseline + 10

try:
    with open(csv_path, mode='w', newline='') as f:
        writer = csv.writer(f)
        writer.writerow(["fotograma", "tipo_objeto", "confianza", "identificador_tracking", "x1", "y1", "x2", "y2", "matricula_en_su_caso", "confianza_matricula", "mx1", "my1", "mx2", "my2", "texto_matricula"])

        frame_num = 0

        while True:
            ret, frame = video.read()
            if not ret:
                break
            
            frame_num += 1
            print(f"Frame num: {frame_num}")
            
            display_frame = frame.copy()
            results_coco = base_mnodel.track(frame, classes=[0, 2, 3, 5, 7], persist=True, tracker="bytetrack.yaml", verbose=False)
            display_frame = results_coco[0].plot()

            for box in results_coco[0].boxes:
                cls_id = int(box.cls[0].item())
                conf = box.conf[0].item()
                x1, y1, x2, y2 = map(int, box.xyxy[0])
                class_model = VEHICLE_CLASSES.get(cls_id, "desconocido")
                track_id = int(box.id[0]) if box.id is not None else None

                plate_detected = False
                conf_plate = None
                mx1 = my1 = mx2 = my2 = None
                plate_text = ""

                if class_model != "persona":
                    vehicle_roi = frame[y1:y2, x1:x2]
                    results_plate = lp_model.predict(vehicle_roi, save=False, show=False, verbose=False)

                    if len(results_plate[0].boxes) > 0:
                        plate_detected = True
                        plate_plot = results_plate[0].plot()
                        display_frame[y1:y2, x1:x2] = plate_plot
                        
                        plate_box = results_plate[0].boxes[0]
                        px1, py1, px2, py2 = map(int, plate_box.xyxy[0])
                        conf_plate = plate_box.conf[0].item()
                        mx1, my1, mx2, my2 = x1 + px1, y1 + py1, x1 + px2, y1 + py2
                        
                        plate_roi = vehicle_roi[py1:py2, px1:px2]
                        
                        if plate_roi.size > 0:
                            plate_roi_rgb = cv2.cvtColor(plate_roi, cv2.COLOR_BGR2RGB)
                            try:
                                plate_text = leer_matricula(plate_roi_rgb)
                            except Exception as e:
                                print(f"Error SmolVLM frame {frame_num}: {e}")
                                plate_text = ""

                    info_y = y2 + 10
                    
                    if plate_detected:
                        status_text = f"Matricula: SI (Conf: {conf_plate:.2f})"
                        offset = draw_text_with_background(display_frame, status_text, (x1, info_y), 0.5, 2, (0, 255, 0), (0, 0, 0))
                        
                        if plate_text:
                            draw_text_with_background(display_frame, f"Texto: {plate_text}", (x1, info_y + offset), 0.6, 2, (255, 255, 0), (0, 0, 0))
                        else:
                            draw_text_with_background(display_frame, "Texto: No legible", (x1, info_y + offset), 0.5, 2, (0, 165, 255), (0, 0, 0))
                    else:
                        draw_text_with_background(display_frame, "Matricula: NO", (x1, info_y), 0.5, 2, (0, 0, 255), (0, 0, 0))

                writer.writerow([frame_num, class_model, conf, track_id, x1, y1, x2, y2, plate_detected, conf_plate, mx1, my1, mx2, my2, plate_text])
            
            out.write(display_frame)
finally:
    print("\nFinalizando ejecución, guardando video y CSV...")
    video.release()
    out.release()
    cv2.destroyAllWindows()
    print(f"Video guardado en: {output_path}")
    print(f"CSV guardado en: {csv_path}")

## Conteo de clases a partir de los csv

In [12]:
def count_classes(csv_file):
    objetos = {}

    with open(csv_file, newline='', encoding='latin-1') as csvfile:
        lector = csv.DictReader(csvfile)
        for fila in lector:
            tipo = fila['tipo_objeto']
            track_id = fila['identificador_tracking']

            if track_id and track_id.lower() != "none":
                if tipo not in objetos:
                    objetos[tipo] = set()
                objetos[tipo].add(track_id)

    print(f"\nResultados para {csv_file}:")
    for tipo, ids in objetos.items():
        print(f"  {tipo}: {len(ids)} únicos")

count_classes("detecciones_pytesseract.csv")
count_classes("detecciones_easyocr.csv")
count_classes("detecciones_smolvlm.csv")


Resultados para detecciones_pytesseract.csv:
  autobus: 9 únicos
  coche: 261 únicos
  camion: 11 únicos
  persona: 48 únicos
  motocicleta: 7 únicos

Resultados para detecciones_easyocr.csv:
  autobus: 9 únicos
  coche: 261 únicos
  camion: 11 únicos
  persona: 48 únicos
  motocicleta: 7 únicos

Resultados para detecciones_smolvlm.csv:
  autobus: 7 únicos
  coche: 98 únicos
  camion: 8 únicos
  persona: 21 únicos
  motocicleta: 2 únicos


## Determinación del flujo

In [None]:
VEHICLE_CLASSES = {0:"persona",2:"coche",3:"motocicleta",5:"autobus",7:"camion"}
model = YOLO("yolo11n.pt")

video_path = "C0142.MP4"
output_path = "salida_flujo_direccional.mp4"

video = cv2.VideoCapture(video_path)
if not video.isOpened():
    print("Error al abrir el video"); exit()

fps = int(video.get(cv2.CAP_PROP_FPS))
width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))

def cerrar(signal_received=None, frame=None):
    print("\nCerrando..."); video.release(); out.release(); sys.exit(0)

signal.signal(signal.SIGINT, cerrar)
signal.signal(signal.SIGTERM, cerrar)

def draw_text(img, text, pos, color=(255,255,255), bg=(0,0,0)):
    font=cv2.FONT_HERSHEY_SIMPLEX
    (tw,th),_=cv2.getTextSize(text,font,0.6,2)
    x,y=pos; cv2.rectangle(img,(x-5,y-th-5),(x+tw+5,y+5),bg,-1)
    cv2.putText(img,text,(x,y),font,0.6,color,2)

tracks = {}  # id -> {"tipo":, "positions":deque, "active":True}
entry_counts = defaultdict(lambda:{"izquierda":0,"derecha":0,"arriba":0,"abajo":0})
exit_counts  = defaultdict(lambda:{"izquierda":0,"derecha":0,"arriba":0,"abajo":0})
margin = 40
min_frames_inside = 5

frame_num = 0

while True:
    ret, frame = video.read()
    if not ret: break
    frame_num += 1

    results = model.track(frame, classes=list(VEHICLE_CLASSES.keys()), persist=True, tracker="bytetrack.yaml", verbose=False)
    display = results[0].plot()
    current_ids = set()

    for box in results[0].boxes:
        cls_id = int(box.cls[0].item()); tipo = VEHICLE_CLASSES.get(cls_id,"desconocido")
        track_id = int(box.id[0]) if box.id is not None else None
        if track_id is None: continue
        x1,y1,x2,y2 = map(int, box.xyxy[0])
        cx,cy = int((x1+x2)/2),int((y1+y2)/2)
        current_ids.add(track_id)

        if track_id not in tracks:
            tracks[track_id] = {"tipo":tipo, "positions":deque(maxlen=10), "entered":False}
        tracks[track_id]["positions"].append((cx,cy))

        # Detectar si entra desde un borde
        if not tracks[track_id]["entered"]:
            if len(tracks[track_id]["positions"])>=min_frames_inside:
                first_x,first_y = tracks[track_id]["positions"][0]
                if first_x < margin: entry_counts[tipo]["izquierda"]+=1
                elif first_x > width-margin: entry_counts[tipo]["derecha"]+=1
                elif first_y < margin: entry_counts[tipo]["arriba"]+=1
                elif first_y > height-margin: entry_counts[tipo]["abajo"]+=1
                tracks[track_id]["entered"]=True

    # Detectar salidas
    for tid,data in list(tracks.items()):
        if tid not in current_ids and len(data["positions"])>0:
            cx,cy = data["positions"][-1]
            tipo=data["tipo"]
            if cx < margin: exit_counts[tipo]["izquierda"]+=1
            elif cx > width-margin: exit_counts[tipo]["derecha"]+=1
            elif cy < margin: exit_counts[tipo]["arriba"]+=1
            elif cy > height-margin: exit_counts[tipo]["abajo"]+=1
            del tracks[tid]

    # Dibujar bordes y estadísticas
    cv2.rectangle(display,(0,0),(margin,height),(0,0,255),2)
    cv2.rectangle(display,(width-margin,0),(width,height),(0,0,255),2)
    cv2.rectangle(display,(0,0),(width,margin),(0,0,255),2)
    cv2.rectangle(display,(0,height-margin),(width,height),(0,0,255),2)

    y=30
    draw_text(display,f"Frame {frame_num}",(10,y)); y+=30
    draw_text(display,"== ENTRADAS ==",(10,y),(0,255,0)); y+=25
    for t,dirs in entry_counts.items():
        draw_text(display,f"{t}: izq {dirs['izquierda']} der {dirs['derecha']} arr {dirs['arriba']} aba {dirs['abajo']}",(10,y),(0,255,0))
        y+=25
    y+=10
    draw_text(display,"== SALIDAS ==",(10,y),(0,200,255)); y+=25
    for t,dirs in exit_counts.items():
        draw_text(display,f"{t}: izq {dirs['izquierda']} der {dirs['derecha']} arr {dirs['arriba']} aba {dirs['abajo']}",(10,y),(0,200,255))
        y+=25

    out.write(display)

video.release(); out.release()
print("Vídeo con flujo direccional guardado:", output_path)

## Anonimación de objetos detectados en el vídeo

In [1]:
import cv2

def anonimize(frame, *box):
    x1, y1,x2, y2 = box
    frame_copy = frame[y1:y2, x1:x2].copy()
    return cv2.GaussianBlur(frame_copy, (51,51), 15)

In [None]:

base_mnodel = YOLO("yolo11n.pt")  
lp_model = YOLO(r"runs/detect/license_plate_augmented/weights/best.pt")

video_path = "C0142.MP4"
output_path = "salida_anonimación_de_personas_y_matriculas.mp4"

cap = cv2.VideoCapture(video_path)
if not cap.isOpened():
    print("Error al abrir el video")
    exit()

# Extraer propiedades del video original para replicarlas en la salida
fps = int(cap.get(cv2.CAP_PROP_FPS))           # Fotogramas por segundo
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))  # Ancho
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) # Alto

# Codec de video: 'mp4v' es compatible con la mayoría de reproductores
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))

frame_num = 0

while True:
    ret, frame = cap.read()
    if not ret:
        break
    frame_num += 1
    #display_frame = frame.copy()

    results_coco = base_mnodel.track(
        frame, 
        classes=[0, 2, 3, 5, 7], 
        persist=True, 
        tracker="bytetrack.yaml"
    )

    for box in results_coco[0].boxes:
        # ID de clase (0=person, 2=car)
        cls_id = int(box.cls[0].item())
        #conf = box.conf[0].item()

        x1, y1, x2, y2 = map(int, box.xyxy[0])
        tipo = "persona" if cls_id == 0 else "vehiculo"
        
        if tipo == "persona":
            if y2 > y1 and x2 > x1:
                blurred_roi = anonimize(frame, x1, y1, x2, y2)
                frame[y1:y2, x1:x2] = blurred_roi
                #frame[y1:y2, x1:x2] = anonimize(frame, x1, y1, x2, y2)
        
        if tipo == "vehiculo":
            car_roi = frame[y1:y2, x1:x2]
            results_plate = lp_model.predict(car_roi, save=False, show=False)

            # Si se detectó al menos una matrícula
            if len(results_plate[0].boxes) > 0:
                # Tomar la detección con mayor confianza (índice 0)
                plate_box = results_plate[0].boxes[0]
                px1, py1, px2, py2 = map(int, plate_box.xyxy[0])
                
                mx1, my1, mx2, my2 = x1 + px1, y1 + py1, x1 + px2, y1 + py2
                if my2>my1 and mx2>mx1:
                    blurred_plate_roi = anonimize(frame, mx1, my1, mx2, my2)
                    frame[my1:my2, mx1:mx2] = blurred_plate_roi

    display_frame = frame.copy()

    for box in results_coco[0].boxes:
        cls_id = int(box.cls[0].item()) 
        conf = box.conf[0].item() # Confianza de la detección [0-1]

        x1, y1, x2, y2 = map(int, box.xyxy[0])
        tipo = "persona" if cls_id == 0 else "vehiculo"
        
        # Obtener ID de tracking (puede ser None si el objeto acaba de aparecer)
        track_id = int(box.id[0]) if box.id is not None else None

        color = (0, 255, 0) if tipo == "vehiculo" else (0, 0, 255)
        cv2.rectangle(display_frame, (x1, y1), (x2, y2), color, 2)

        label = f"{tipo} ID:{track_id} {conf:.2f}"
        cv2.putText(display_frame, label, (x1, y1-10), 
                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
        
        if tipo == "vehiculo":
            car_roi = frame[y1:y2, x1:x2]
            results_plate = lp_model.predict(car_roi, save=False, show=False)

            if len(results_plate[0].boxes) > 0:
            # Inferencia del modelo custom SOLO en esta ROI
                results_plate = lp_model.predict(car_roi, save=False, show=False)
            
            # Si se detectó al menos una matrícula
            if len(results_plate[0].boxes) > 0:
                # Tomar la detección con mayor confianza (índice 0)
                plate_box = results_plate[0].boxes[0]
                px1, py1, px2, py2 = map(int, plate_box.xyxy[0])
                conf_plate = plate_box.conf[0].item()
                
                # CONVERSIÓN DE COORDENADAS: Relativas → Absolutas
                # Las coordenadas (px1, py1, px2, py2) son relativas a car_roi
                # Necesitamos convertirlas al sistema de coordenadas del frame completo
                mx1, my1, mx2, my2 = x1 + px1, y1 + py1, x1 + px2, y1 + py2
                
                # Dibujar bounding box de la matrícula (azul)
                cv2.rectangle(display_frame, (mx1, my1), (mx2, my2), (255, 0, 0), 2)
                
                plate_roi = car_roi[py1:py2, px1:px2]
                
    out.write(display_frame)

    # Feedback de progreso cada 30 frames (~1 segundo a 30fps)
    if frame_num % 30 == 0:
        print(f"Procesando frame {frame_num}...")

cap.release()
out.release()

print(f"Video procesado guardado en: {output_path}")
print(f"Total de frames procesados: {frame_num}")